This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix various minor pod issues
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
199e78b7
DM
69#define PL_preambled (PL_parser->preambled)
70#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 71#define PL_linestr (PL_parser->linestr)
c2598295
DM
72#define PL_expect (PL_parser->expect)
73#define PL_copline (PL_parser->copline)
f06b5848
DM
74#define PL_bufptr (PL_parser->bufptr)
75#define PL_oldbufptr (PL_parser->oldbufptr)
76#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77#define PL_linestart (PL_parser->linestart)
78#define PL_bufend (PL_parser->bufend)
79#define PL_last_uni (PL_parser->last_uni)
80#define PL_last_lop (PL_parser->last_lop)
81#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 82#define PL_lex_state (PL_parser->lex_state)
2f9285f8 83#define PL_rsfp (PL_parser->rsfp)
5486870f 84#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
85#define PL_in_my (PL_parser->in_my)
86#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 87#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 88#define PL_multi_end (PL_parser->multi_end)
13765c85 89#define PL_error_count (PL_parser->error_count)
199e78b7
DM
90
91#ifdef PERL_MAD
92# define PL_endwhite (PL_parser->endwhite)
93# define PL_faketokens (PL_parser->faketokens)
94# define PL_lasttoke (PL_parser->lasttoke)
95# define PL_nextwhite (PL_parser->nextwhite)
96# define PL_realtokenstart (PL_parser->realtokenstart)
97# define PL_skipwhite (PL_parser->skipwhite)
98# define PL_thisclose (PL_parser->thisclose)
99# define PL_thismad (PL_parser->thismad)
100# define PL_thisopen (PL_parser->thisopen)
101# define PL_thisstuff (PL_parser->thisstuff)
102# define PL_thistoken (PL_parser->thistoken)
103# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_nexttoke (PL_parser->nexttoke)
106# define PL_curforce (PL_parser->curforce)
107#else
108# define PL_nexttoke (PL_parser->nexttoke)
109# define PL_nexttype (PL_parser->nexttype)
110# define PL_nextval (PL_parser->nextval)
199e78b7
DM
111#endif
112
a1894d81 113static const char* const ident_too_long = "Identifier too long";
8903cb82 114
29595ff2 115#ifdef PERL_MAD
29595ff2 116# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 117# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 118#else
5db06880 119# define CURMAD(slot,sv)
9ded7720 120# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
121#endif
122
a7aaec61
Z
123#define XENUMMASK 0x3f
124#define XFAKEEOF 0x40
125#define XFAKEBRACK 0x80
9059aa12 126
39e02b42
JH
127#ifdef USE_UTF8_SCRIPTS
128# define UTF (!IN_BYTES)
2b9d42f0 129#else
802a15e9 130# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 131#endif
a0ed51b3 132
b1fc3636
CJ
133/* The maximum number of characters preceding the unrecognized one to display */
134#define UNRECOGNIZED_PRECEDE_COUNT 10
135
61f0cdd9 136/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
137 * 1999-02-27 mjd-perl-patch@plover.com */
138#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139
bf4acbe4 140#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 141
ffb4593c
NT
142/* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
79072805 144 * can get by with a single comparison (if the compiler is smart enough).
9da1dd8f
DM
145 *
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
79072805
LW
148 */
149
fb73857a 150/* #define LEX_NOTPARSING 11 is done in perl.h. */
151
b6007c36
DM
152#define LEX_NORMAL 10 /* normal code (ie not within "...") */
153#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156#define LEX_INTERPSTART 6 /* expecting the start of a $var */
157
158 /* at end of code, eg "$x" followed by: */
159#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161
162#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164#define LEX_INTERPCONST 2 /* NOT USED */
165#define LEX_FORMLINE 1 /* expecting a format line */
166#define LEX_KNOWNEXT 0 /* next token known; just return it */
167
79072805 168
bbf60fe6 169#ifdef DEBUGGING
27da23d5 170static const char* const lex_state_names[] = {
bbf60fe6
DM
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
79072805
LW
185#ifdef ff_next
186#undef ff_next
d48672a2
LW
187#endif
188
79072805 189#include "keywords.h"
fe14fcc3 190
ffb4593c
NT
191/* CLINE is a macro that ensures PL_copline has a sane value */
192
ae986130
LW
193#ifdef CLINE
194#undef CLINE
195#endif
57843af0 196#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 197
5db06880 198#ifdef PERL_MAD
29595ff2
NC
199# define SKIPSPACE0(s) skipspace0(s)
200# define SKIPSPACE1(s) skipspace1(s)
201# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202# define PEEKSPACE(s) skipspace2(s,0)
203#else
204# define SKIPSPACE0(s) skipspace(s)
205# define SKIPSPACE1(s) skipspace(s)
206# define SKIPSPACE2(s,tsv) skipspace(s)
207# define PEEKSPACE(s) skipspace(s)
208#endif
209
ffb4593c
NT
210/*
211 * Convenience functions to return different tokens and prime the
9cbb5ea2 212 * lexer for the next token. They all take an argument.
ffb4593c
NT
213 *
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
7eb971ee 224 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 255#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
d68ce4ac 272#define UNI3(f,x,have_x) { \
6154021b 273 pl_yylval.ival = f; \
d68ce4ac 274 if (have_x) PL_expect = x; \
376fcdbf
AL
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
d68ce4ac
FC
283#define UNI(f) UNI3(f,XTERM,1)
284#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
b5fb7ce3
FC
285#define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
287 OPERATOR(f); \
288 }
a687059c 289
d68ce4ac 290#define UNIBRACK(f) UNI3(f,0,0)
79072805 291
9f68db38 292/* grandfather return to old style */
78cdf107
Z
293#define OLDLOP(f) \
294 do { \
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
298 PL_expect = XTERM; \
299 PL_bufptr = s; \
300 return (int)LSTOP; \
301 } while(0)
79072805 302
83944c01
FC
303#define COPLINE_INC_WITH_HERELINES \
304 STMT_START { \
305 CopLINE_inc(PL_curcop); \
d794b522
FC
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
83944c01
FC
309 } STMT_END
310
311
8fa7f367
JH
312#ifdef DEBUGGING
313
6154021b 314/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
315enum token_type {
316 TOKENTYPE_NONE,
317 TOKENTYPE_IVAL,
6154021b 318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6 319 TOKENTYPE_PVAL,
aeaef349 320 TOKENTYPE_OPVAL
bbf60fe6
DM
321};
322
6d4a66ac
NC
323static struct debug_tokens {
324 const int token;
325 enum token_type type;
326 const char *name;
327} const debug_tokens[] =
9041c2e3 328{
bbf60fe6
DM
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
705fe0e5
FC
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
bbf60fe6
DM
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
5504e6cf 361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
bbf60fe6
DM
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
bbf60fe6
DM
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
f3f204dc 375 { PEG, TOKENTYPE_NONE, "PEG" },
88e1f1a2
JV
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
f3f204dc 385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
bbf60fe6
DM
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
f3f204dc 388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
bbf60fe6
DM
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 402 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
403};
404
6154021b 405/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 406
bbf60fe6 407STATIC int
704d4215 408S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 409{
97aff369 410 dVAR;
7918f24d
NC
411
412 PERL_ARGS_ASSERT_TOKEREPORT;
413
bbf60fe6 414 if (DEBUG_T_TEST) {
bd61b366 415 const char *name = NULL;
bbf60fe6 416 enum token_type type = TOKENTYPE_NONE;
f54cb97a 417 const struct debug_tokens *p;
396482e1 418 SV* const report = newSVpvs("<== ");
bbf60fe6 419
f54cb97a 420 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
421 if (p->token == (int)rv) {
422 name = p->name;
423 type = p->type;
424 break;
425 }
426 }
427 if (name)
54667de8 428 Perl_sv_catpv(aTHX_ report, name);
74736ae6 429 else if ((char)rv > ' ' && (char)rv <= '~')
bbf60fe6
DM
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
431 else if (!rv)
396482e1 432 sv_catpvs(report, "EOF");
bbf60fe6
DM
433 else
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
435 switch (type) {
436 case TOKENTYPE_NONE:
bbf60fe6
DM
437 break;
438 case TOKENTYPE_IVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 443 PL_op_name[lvalp->ival]);
bbf60fe6
DM
444 break;
445 case TOKENTYPE_PVAL:
704d4215 446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
447 break;
448 case TOKENTYPE_OPVAL:
704d4215 449 if (lvalp->opval) {
401441c0 450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 453 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 454 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
455 }
456
457 }
401441c0 458 else
396482e1 459 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
460 break;
461 }
b6007c36 462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
463 };
464 return (int)rv;
998054bd
SC
465}
466
b6007c36
DM
467
468/* print the buffer with suitable escapes */
469
470STATIC void
15f169a1 471S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 472{
396482e1 473 SV* const tmp = newSVpvs("");
7918f24d
NC
474
475 PERL_ARGS_ASSERT_PRINTBUF;
476
b6007c36
DM
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
478 SvREFCNT_dec(tmp);
479}
480
8fa7f367
JH
481#endif
482
8290c323
NC
483static int
484S_deprecate_commaless_var_list(pTHX) {
485 PL_expect = XTERM;
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
488}
489
ffb4593c
NT
490/*
491 * S_ao
492 *
c963b151
BD
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
495 */
496
76e3520e 497STATIC int
cea2e8a9 498S_ao(pTHX_ int toketype)
a0d0e21e 499{
97aff369 500 dVAR;
3280af22
NIS
501 if (*PL_bufptr == '=') {
502 PL_bufptr++;
a0d0e21e 503 if (toketype == ANDAND)
6154021b 504 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 505 else if (toketype == OROR)
6154021b 506 pl_yylval.ival = OP_ORASSIGN;
c963b151 507 else if (toketype == DORDOR)
6154021b 508 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
509 toketype = ASSIGNOP;
510 }
511 return toketype;
512}
513
ffb4593c
NT
514/*
515 * S_no_op
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
525 */
526
76e3520e 527STATIC void
15f169a1 528S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 529{
97aff369 530 dVAR;
9d4ba2ae
AL
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 533
7918f24d
NC
534 PERL_ARGS_ASSERT_NO_OP;
535
1189a94a
GS
536 if (!s)
537 s = oldbp;
07c798fb 538 else
1189a94a 539 PL_bufptr = s;
734ab321 540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
56da5a46
RGS
541 if (ckWARN_d(WARN_SYNTAX)) {
542 if (is_first)
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 546 const char *t;
8a2bca12 547 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
734ab321 548 t += UTF ? UTF8SKIP(t) : 1)
c35e046a 549 NOOP;
56da5a46
RGS
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46
RGS
555 }
556 else {
557 assert(s >= oldbp);
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46 562 }
07c798fb 563 }
3280af22 564 PL_bufptr = oldbp;
8990e307
LW
565}
566
ffb4593c
NT
567/*
568 * S_missingterm
569 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 570 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
573 * This is fatal.
574 */
575
76e3520e 576STATIC void
cea2e8a9 577S_missingterm(pTHX_ char *s)
8990e307 578{
97aff369 579 dVAR;
8990e307
LW
580 char tmpbuf[3];
581 char q;
582 if (s) {
9d4ba2ae 583 char * const nl = strrchr(s,'\n');
d2719217 584 if (nl)
8990e307
LW
585 *nl = '\0';
586 }
463559e7 587 else if (isCNTRL(PL_multi_close)) {
8990e307 588 *tmpbuf = '^';
585ec06d 589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
590 tmpbuf[2] = '\0';
591 s = tmpbuf;
592 }
593 else {
eb160463 594 *tmpbuf = (char)PL_multi_close;
8990e307
LW
595 tmpbuf[1] = '\0';
596 s = tmpbuf;
597 }
598 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 600}
79072805 601
dd0ac2b9
FC
602#include "feature.h"
603
0d863452 604/*
0d863452
RH
605 * Check whether the named feature is enabled.
606 */
26ea9e12 607bool
3fff3427 608Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 609{
97aff369 610 dVAR;
4a731d7b 611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
612
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
614
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 616
26ea9e12
NC
617 if (namelen > MAX_FEATURE_LEN)
618 return FALSE;
3fff3427 619 memcpy(&he_name[8], name, namelen);
7d69d4a6 620
c8ca97b0
NC
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
0d863452
RH
623}
624
ffb4593c 625/*
9cbb5ea2
GS
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
ffb4593c
NT
628 */
629
c39cd008
GS
630#ifdef PERL_CR_FILTER
631static void
632strip_return(SV *sv)
633{
eb578fdb
KW
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
7918f24d
NC
636
637 PERL_ARGS_ASSERT_STRIP_RETURN;
638
c39cd008
GS
639 /* outer loop optimized to do nothing if there are no CR-LFs */
640 while (s < e) {
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
eb578fdb 643 char *d = s - 1;
c39cd008
GS
644 *d++ = *s++;
645 while (s < e) {
646 if (*s == '\r' && s[1] == '\n')
647 s++;
648 *d++ = *s++;
649 }
650 SvCUR(sv) -= s - d;
651 return;
652 }
653 }
654}
a868473f 655
76e3520e 656STATIC I32
c39cd008 657S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 658{
f54cb97a 659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
660 if (count > 0 && !maxlen)
661 strip_return(sv);
662 return count;
a868473f
NIS
663}
664#endif
665
ffb4593c 666/*
8eaa0acf
Z
667=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
668
669Creates and initialises a new lexer/parser state object, supplying
670a context in which to lex and parse from a new source of Perl code.
671A pointer to the new state object is placed in L</PL_parser>. An entry
672is made on the save stack so that upon unwinding the new state object
673will be destroyed and the former value of L</PL_parser> will be restored.
674Nothing else need be done to clean up the parsing context.
675
676The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677non-null, provides a string (in SV form) containing code to be parsed.
678A copy of the string is made, so subsequent modification of I<line>
679does not affect parsing. I<rsfp>, if non-null, provides an input stream
680from which code will be read to be parsed. If both are non-null, the
681code in I<line> comes first and must consist of complete lines of input,
682and I<rsfp> supplies the remainder of the source.
683
e368b3bd
FC
684The I<flags> parameter is reserved for future use. Currently it is only
685used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
686
687=cut
688*/
ffb4593c 689
27fcb6ee 690/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
87606032
NC
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
27fcb6ee 698
a0d0e21e 699void
8eaa0acf 700Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 701{
97aff369 702 dVAR;
6ef55633 703 const char *s = NULL;
5486870f 704 yy_parser *parser, *oparser;
60d63348 705 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
707
708 /* create and initialise a parser */
709
199e78b7 710 Newxz(parser, 1, yy_parser);
5486870f 711 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
712 PL_parser = parser;
713
28ac2b49
Z
714 parser->stack = NULL;
715 parser->ps = NULL;
716 parser->stack_size = 0;
acdf0a21 717
e3abe207
DM
718 /* on scope exit, free this parser and restore any outer one */
719 SAVEPARSER(parser);
7c4baf47 720 parser->saved_curcop = PL_curcop;
e3abe207 721
acdf0a21 722 /* initialise lexer state */
8990e307 723
fb205e7a
DM
724#ifdef PERL_MAD
725 parser->curforce = -1;
726#else
727 parser->nexttoke = 0;
728#endif
ca4cfd28 729 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 730 parser->copline = NOLINE;
5afb0a62 731 parser->lex_state = LEX_NORMAL;
c2598295 732 parser->expect = XSTATE;
2f9285f8 733 parser->rsfp = rsfp;
27fcb6ee
FC
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
736 ? NULL
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
741 ));
2f9285f8 742
199e78b7
DM
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
d794b522 746 Newxz(parser->lex_shared, 1, LEXSHARED);
02b34bbe 747
10efb74f 748 if (line) {
0528fd32 749 STRLEN len;
10efb74f 750 s = SvPV_const(line, len);
0abcdfa4
FC
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
11076590 754 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
755 } else {
756 parser->linestr = newSVpvs("\n;");
8990e307 757 }
f06b5848
DM
758 parser->oldoldbufptr =
759 parser->oldbufptr =
760 parser->bufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
87606032
NC
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
737c24fc 766
60d63348 767 parser->in_pod = parser->filtered = 0;
79072805 768}
a687059c 769
e3abe207
DM
770
771/* delete a parser object */
772
773void
774Perl_parser_free(pTHX_ const yy_parser *parser)
775{
7918f24d
NC
776 PERL_ARGS_ASSERT_PARSER_FREE;
777
7c4baf47 778 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
779 SvREFCNT_dec(parser->linestr);
780
87606032 781 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
2f9285f8 782 PerlIO_clearerr(parser->rsfp);
799361c3
SH
783 else if (parser->rsfp && (!parser->old_parser ||
784 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 785 PerlIO_close(parser->rsfp);
5486870f 786 SvREFCNT_dec(parser->rsfp_filters);
10002bc1
FC
787 SvREFCNT_dec(parser->lex_stuff);
788 SvREFCNT_dec(parser->sublex_info.repl);
3ac7ff8f
FC
789
790 Safefree(parser->lex_brackstack);
791 Safefree(parser->lex_casestack);
792 Safefree(parser->lex_shared);
793 PL_parser = parser->old_parser;
794 Safefree(parser);
795}
796
797void
798Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
799{
800#ifdef PERL_MAD
801 I32 nexttoke = parser->lasttoke;
802#else
803 I32 nexttoke = parser->nexttoke;
804#endif
805 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
3ce3dcd9
FC
806 while (nexttoke--) {
807#ifdef PERL_MAD
808 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
3ac7ff8f
FC
809 & 0xffff)
810 && parser->nexttoke[nexttoke].next_val.opval
811 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
812 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
813 op_free(parser->nexttoke[nexttoke].next_val.opval);
814 parser->nexttoke[nexttoke].next_val.opval = NULL;
815 }
3ce3dcd9 816#else
3ac7ff8f
FC
817 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
818 && parser->nextval[nexttoke].opval
819 && parser->nextval[nexttoke].opval->op_slabbed
820 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
3ce3dcd9 821 op_free(parser->nextval[nexttoke].opval);
3ac7ff8f
FC
822 parser->nextval[nexttoke].opval = NULL;
823 }
3ce3dcd9
FC
824#endif
825 }
e3abe207
DM
826}
827
828
ffb4593c 829/*
f0e67a1d
Z
830=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
831
832Buffer scalar containing the chunk currently under consideration of the
833text currently being lexed. This is always a plain string scalar (for
834which C<SvPOK> is true). It is not intended to be used as a scalar by
835normal scalar means; instead refer to the buffer directly by the pointer
836variables described below.
837
838The lexer maintains various C<char*> pointers to things in the
839C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
840reallocated, all of these pointers must be updated. Don't attempt to
841do this manually, but rather use L</lex_grow_linestr> if you need to
842reallocate the buffer.
843
844The content of the text chunk in the buffer is commonly exactly one
845complete line of input, up to and including a newline terminator,
846but there are situations where it is otherwise. The octets of the
847buffer may be intended to be interpreted as either UTF-8 or Latin-1.
848The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
849flag on this scalar, which may disagree with it.
850
851For direct examination of the buffer, the variable
852L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
853lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
854of these pointers is usually preferable to examination of the scalar
855through normal scalar means.
856
857=for apidoc AmxU|char *|PL_parser-E<gt>bufend
858
859Direct pointer to the end of the chunk of text currently being lexed, the
860end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
861+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
862always located at the end of the buffer, and does not count as part of
863the buffer's contents.
864
865=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
866
867Points to the current position of lexing inside the lexer buffer.
868Characters around this point may be freely examined, within
869the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
870L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
871interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
872
873Lexing code (whether in the Perl core or not) moves this pointer past
874the characters that it consumes. It is also expected to perform some
875bookkeeping whenever a newline character is consumed. This movement
876can be more conveniently performed by the function L</lex_read_to>,
877which handles newlines appropriately.
878
879Interpretation of the buffer's octets can be abstracted out by
880using the slightly higher-level functions L</lex_peek_unichar> and
881L</lex_read_unichar>.
882
883=for apidoc AmxU|char *|PL_parser-E<gt>linestart
884
885Points to the start of the current line inside the lexer buffer.
886This is useful for indicating at which column an error occurred, and
887not much else. This must be updated by any lexing code that consumes
888a newline; the function L</lex_read_to> handles this detail.
889
890=cut
891*/
892
893/*
894=for apidoc Amx|bool|lex_bufutf8
895
896Indicates whether the octets in the lexer buffer
897(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
898of Unicode characters. If not, they should be interpreted as Latin-1
899characters. This is analogous to the C<SvUTF8> flag for scalars.
900
901In UTF-8 mode, it is not guaranteed that the lexer buffer actually
902contains valid UTF-8. Lexing code must be robust in the face of invalid
903encoding.
904
905The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
906is significant, but not the whole story regarding the input character
907encoding. Normally, when a file is being read, the scalar contains octets
908and its C<SvUTF8> flag is off, but the octets should be interpreted as
909UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
910however, the scalar may have the C<SvUTF8> flag on, and in this case its
911octets should be interpreted as UTF-8 unless the C<use bytes> pragma
912is in effect. This logic may change in the future; use this function
913instead of implementing the logic yourself.
914
915=cut
916*/
917
918bool
919Perl_lex_bufutf8(pTHX)
920{
921 return UTF;
922}
923
924/*
925=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
926
927Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
928at least I<len> octets (including terminating NUL). Returns a
929pointer to the reallocated buffer. This is necessary before making
930any direct modification of the buffer that would increase its length.
931L</lex_stuff_pvn> provides a more convenient way to insert text into
932the buffer.
933
934Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
935this function updates all of the lexer's variables that point directly
936into the buffer.
937
938=cut
939*/
940
941char *
942Perl_lex_grow_linestr(pTHX_ STRLEN len)
943{
944 SV *linestr;
945 char *buf;
946 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
c7641931 947 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
f0e67a1d
Z
948 linestr = PL_parser->linestr;
949 buf = SvPVX(linestr);
950 if (len <= SvLEN(linestr))
951 return buf;
952 bufend_pos = PL_parser->bufend - buf;
953 bufptr_pos = PL_parser->bufptr - buf;
954 oldbufptr_pos = PL_parser->oldbufptr - buf;
955 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
956 linestart_pos = PL_parser->linestart - buf;
957 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
958 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3328ab5a
FC
959 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
960 PL_parser->lex_shared->re_eval_start - buf : 0;
c7641931 961
f0e67a1d 962 buf = sv_grow(linestr, len);
c7641931 963
f0e67a1d
Z
964 PL_parser->bufend = buf + bufend_pos;
965 PL_parser->bufptr = buf + bufptr_pos;
966 PL_parser->oldbufptr = buf + oldbufptr_pos;
967 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
968 PL_parser->linestart = buf + linestart_pos;
969 if (PL_parser->last_uni)
970 PL_parser->last_uni = buf + last_uni_pos;
971 if (PL_parser->last_lop)
972 PL_parser->last_lop = buf + last_lop_pos;
3328ab5a
FC
973 if (PL_parser->lex_shared->re_eval_start)
974 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
f0e67a1d
Z
975 return buf;
976}
977
978/*
83aa740e 979=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
980
981Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
982immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
983reallocating the buffer if necessary. This means that lexing code that
984runs later will see the characters as if they had appeared in the input.
985It is not recommended to do this as part of normal parsing, and most
986uses of this facility run the risk of the inserted characters being
987interpreted in an unintended manner.
988
989The string to be inserted is represented by I<len> octets starting
990at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
991according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
992The characters are recoded for the lexer buffer, according to how the
993buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 994to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
995function is more convenient.
996
997=cut
998*/
999
1000void
83aa740e 1001Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 1002{
749123ff 1003 dVAR;
f0e67a1d
Z
1004 char *bufptr;
1005 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1006 if (flags & ~(LEX_STUFF_UTF8))
1007 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1008 if (UTF) {
1009 if (flags & LEX_STUFF_UTF8) {
1010 goto plain_copy;
1011 } else {
54d004e8 1012 STRLEN highhalf = 0; /* Count of variants */
83aa740e 1013 const char *p, *e = pv+len;
54d004e8
KW
1014 for (p = pv; p != e; p++) {
1015 if (! UTF8_IS_INVARIANT(*p)) {
1016 highhalf++;
1017 }
1018 }
f0e67a1d
Z
1019 if (!highhalf)
1020 goto plain_copy;
1021 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1022 bufptr = PL_parser->bufptr;
1023 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1024 SvCUR_set(PL_parser->linestr,
1025 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
1026 PL_parser->bufend += len+highhalf;
1027 for (p = pv; p != e; p++) {
1028 U8 c = (U8)*p;
54d004e8
KW
1029 if (! UTF8_IS_INVARIANT(c)) {
1030 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1031 *bufptr++ = UTF8_TWO_BYTE_LO(c);
f0e67a1d
Z
1032 } else {
1033 *bufptr++ = (char)c;
1034 }
1035 }
1036 }
1037 } else {
1038 if (flags & LEX_STUFF_UTF8) {
1039 STRLEN highhalf = 0;
83aa740e 1040 const char *p, *e = pv+len;
f0e67a1d
Z
1041 for (p = pv; p != e; p++) {
1042 U8 c = (U8)*p;
54d004e8 1043 if (UTF8_IS_ABOVE_LATIN1(c)) {
f0e67a1d
Z
1044 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1045 "non-Latin-1 character into Latin-1 input");
54d004e8 1046 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
f0e67a1d
Z
1047 p++;
1048 highhalf++;
54d004e8 1049 } else if (! UTF8_IS_INVARIANT(c)) {
f0e67a1d
Z
1050 /* malformed UTF-8 */
1051 ENTER;
1052 SAVESPTR(PL_warnhook);
1053 PL_warnhook = PERL_WARNHOOK_FATAL;
1054 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1055 LEAVE;
1056 }
1057 }
1058 if (!highhalf)
1059 goto plain_copy;
1060 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1061 bufptr = PL_parser->bufptr;
1062 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1063 SvCUR_set(PL_parser->linestr,
1064 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d 1065 PL_parser->bufend += len-highhalf;
54d004e8
KW
1066 p = pv;
1067 while (p < e) {
1068 if (UTF8_IS_INVARIANT(*p)) {
1069 *bufptr++ = *p;
1070 p++;
f0e67a1d 1071 }
54d004e8
KW
1072 else {
1073 assert(p < e -1 );
1074 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1075 p += 2;
1076 }
f0e67a1d
Z
1077 }
1078 } else {
54d004e8 1079 plain_copy:
f0e67a1d
Z
1080 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1081 bufptr = PL_parser->bufptr;
1082 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1083 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1084 PL_parser->bufend += len;
1085 Copy(pv, bufptr, len, char);
1086 }
1087 }
1088}
1089
1090/*
9dcc53ea
Z
1091=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1092
1093Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1094immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1095reallocating the buffer if necessary. This means that lexing code that
1096runs later will see the characters as if they had appeared in the input.
1097It is not recommended to do this as part of normal parsing, and most
1098uses of this facility run the risk of the inserted characters being
1099interpreted in an unintended manner.
1100
1101The string to be inserted is represented by octets starting at I<pv>
1102and continuing to the first nul. These octets are interpreted as either
1103UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1104in I<flags>. The characters are recoded for the lexer buffer, according
1105to how the buffer is currently being interpreted (L</lex_bufutf8>).
1106If it is not convenient to nul-terminate a string to be inserted, the
1107L</lex_stuff_pvn> function is more appropriate.
1108
1109=cut
1110*/
1111
1112void
1113Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1114{
1115 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1116 lex_stuff_pvn(pv, strlen(pv), flags);
1117}
1118
1119/*
f0e67a1d
Z
1120=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1121
1122Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1123immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1124reallocating the buffer if necessary. This means that lexing code that
1125runs later will see the characters as if they had appeared in the input.
1126It is not recommended to do this as part of normal parsing, and most
1127uses of this facility run the risk of the inserted characters being
1128interpreted in an unintended manner.
1129
1130The string to be inserted is the string value of I<sv>. The characters
1131are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1132being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1133not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1134need to construct a scalar.
1135
1136=cut
1137*/
1138
1139void
1140Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1141{
1142 char *pv;
1143 STRLEN len;
1144 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1145 if (flags)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1147 pv = SvPV(sv, len);
1148 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1149}
1150
1151/*
1152=for apidoc Amx|void|lex_unstuff|char *ptr
1153
1154Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1155I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1156This hides the discarded text from any lexing code that runs later,
1157as if the text had never appeared.
1158
1159This is not the normal way to consume lexed text. For that, use
1160L</lex_read_to>.
1161
1162=cut
1163*/
1164
1165void
1166Perl_lex_unstuff(pTHX_ char *ptr)
1167{
1168 char *buf, *bufend;
1169 STRLEN unstuff_len;
1170 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1171 buf = PL_parser->bufptr;
1172 if (ptr < buf)
1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1174 if (ptr == buf)
1175 return;
1176 bufend = PL_parser->bufend;
1177 if (ptr > bufend)
1178 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1179 unstuff_len = ptr - buf;
1180 Move(ptr, buf, bufend+1-ptr, char);
1181 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1182 PL_parser->bufend = bufend - unstuff_len;
1183}
1184
1185/*
1186=for apidoc Amx|void|lex_read_to|char *ptr
1187
1188Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1189to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1190performing the correct bookkeeping whenever a newline character is passed.
1191This is the normal way to consume lexed text.
1192
1193Interpretation of the buffer's octets can be abstracted out by
1194using the slightly higher-level functions L</lex_peek_unichar> and
1195L</lex_read_unichar>.
1196
1197=cut
1198*/
1199
1200void
1201Perl_lex_read_to(pTHX_ char *ptr)
1202{
1203 char *s;
1204 PERL_ARGS_ASSERT_LEX_READ_TO;
1205 s = PL_parser->bufptr;
1206 if (ptr < s || ptr > PL_parser->bufend)
1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1208 for (; s != ptr; s++)
1209 if (*s == '\n') {
83944c01 1210 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1211 PL_parser->linestart = s+1;
1212 }
1213 PL_parser->bufptr = ptr;
1214}
1215
1216/*
1217=for apidoc Amx|void|lex_discard_to|char *ptr
1218
1219Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1220up to I<ptr>. The remaining content of the buffer will be moved, and
1221all pointers into the buffer updated appropriately. I<ptr> must not
1222be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1223it is not permitted to discard text that has yet to be lexed.
1224
1225Normally it is not necessarily to do this directly, because it suffices to
1226use the implicit discarding behaviour of L</lex_next_chunk> and things
1227based on it. However, if a token stretches across multiple lines,
1f317c95 1228and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1229that purpose, then after completion of the token it would be wise to
1230explicitly discard the now-unneeded earlier lines, to avoid future
1231multi-line tokens growing the buffer without bound.
1232
1233=cut
1234*/
1235
1236void
1237Perl_lex_discard_to(pTHX_ char *ptr)
1238{
1239 char *buf;
1240 STRLEN discard_len;
1241 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1242 buf = SvPVX(PL_parser->linestr);
1243 if (ptr < buf)
1244 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1245 if (ptr == buf)
1246 return;
1247 if (ptr > PL_parser->bufptr)
1248 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1249 discard_len = ptr - buf;
1250 if (PL_parser->oldbufptr < ptr)
1251 PL_parser->oldbufptr = ptr;
1252 if (PL_parser->oldoldbufptr < ptr)
1253 PL_parser->oldoldbufptr = ptr;
1254 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1255 PL_parser->last_uni = NULL;
1256 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1257 PL_parser->last_lop = NULL;
1258 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1259 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1260 PL_parser->bufend -= discard_len;
1261 PL_parser->bufptr -= discard_len;
1262 PL_parser->oldbufptr -= discard_len;
1263 PL_parser->oldoldbufptr -= discard_len;
1264 if (PL_parser->last_uni)
1265 PL_parser->last_uni -= discard_len;
1266 if (PL_parser->last_lop)
1267 PL_parser->last_lop -= discard_len;
1268}
1269
1270/*
1271=for apidoc Amx|bool|lex_next_chunk|U32 flags
1272
1273Reads in the next chunk of text to be lexed, appending it to
1274L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1275looked to the end of the current chunk and wants to know more. It is
1276usual, but not necessary, for lexing to have consumed the entirety of
1277the current chunk at this time.
1278
1279If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1280chunk (i.e., the current chunk has been entirely consumed), normally the
1281current chunk will be discarded at the same time that the new chunk is
1282read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1283will not be discarded. If the current chunk has not been entirely
1284consumed, then it will not be discarded regardless of the flag.
1285
1286Returns true if some new text was added to the buffer, or false if the
1287buffer has reached the end of the input text.
1288
1289=cut
1290*/
1291
1292#define LEX_FAKE_EOF 0x80000000
112d1284 1293#define LEX_NO_TERM 0x40000000
f0e67a1d
Z
1294
1295bool
1296Perl_lex_next_chunk(pTHX_ U32 flags)
1297{
1298 SV *linestr;
1299 char *buf;
1300 STRLEN old_bufend_pos, new_bufend_pos;
1301 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1302 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1303 bool got_some_for_debugger = 0;
f0e67a1d 1304 bool got_some;
112d1284 1305 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
f0e67a1d 1306 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1307 linestr = PL_parser->linestr;
1308 buf = SvPVX(linestr);
1309 if (!(flags & LEX_KEEP_PREVIOUS) &&
1310 PL_parser->bufptr == PL_parser->bufend) {
1311 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1312 linestart_pos = 0;
1313 if (PL_parser->last_uni != PL_parser->bufend)
1314 PL_parser->last_uni = NULL;
1315 if (PL_parser->last_lop != PL_parser->bufend)
1316 PL_parser->last_lop = NULL;
1317 last_uni_pos = last_lop_pos = 0;
1318 *buf = 0;
1319 SvCUR(linestr) = 0;
1320 } else {
1321 old_bufend_pos = PL_parser->bufend - buf;
1322 bufptr_pos = PL_parser->bufptr - buf;
1323 oldbufptr_pos = PL_parser->oldbufptr - buf;
1324 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1325 linestart_pos = PL_parser->linestart - buf;
1326 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1327 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1328 }
1329 if (flags & LEX_FAKE_EOF) {
1330 goto eof;
60d63348 1331 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1332 got_some = 0;
1333 } else if (filter_gets(linestr, old_bufend_pos)) {
1334 got_some = 1;
17cc9359 1335 got_some_for_debugger = 1;
112d1284
FC
1336 } else if (flags & LEX_NO_TERM) {
1337 got_some = 0;
f0e67a1d 1338 } else {
580561a3
Z
1339 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1340 sv_setpvs(linestr, "");
f0e67a1d
Z
1341 eof:
1342 /* End of real input. Close filehandle (unless it was STDIN),
1343 * then add implicit termination.
1344 */
87606032 1345 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
f0e67a1d
Z
1346 PerlIO_clearerr(PL_parser->rsfp);
1347 else if (PL_parser->rsfp)
1348 (void)PerlIO_close(PL_parser->rsfp);
1349 PL_parser->rsfp = NULL;
60d63348 1350 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1351#ifdef PERL_MAD
1352 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1353 PL_faketokens = 1;
1354#endif
1355 if (!PL_in_eval && PL_minus_p) {
1356 sv_catpvs(linestr,
1357 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1358 PL_minus_n = PL_minus_p = 0;
1359 } else if (!PL_in_eval && PL_minus_n) {
1360 sv_catpvs(linestr, /*{*/";}");
1361 PL_minus_n = 0;
1362 } else
1363 sv_catpvs(linestr, ";");
1364 got_some = 1;
1365 }
1366 buf = SvPVX(linestr);
1367 new_bufend_pos = SvCUR(linestr);
1368 PL_parser->bufend = buf + new_bufend_pos;
1369 PL_parser->bufptr = buf + bufptr_pos;
1370 PL_parser->oldbufptr = buf + oldbufptr_pos;
1371 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1372 PL_parser->linestart = buf + linestart_pos;
1373 if (PL_parser->last_uni)
1374 PL_parser->last_uni = buf + last_uni_pos;
1375 if (PL_parser->last_lop)
1376 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1377 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1378 PL_curstash != PL_debstash) {
1379 /* debugger active and we're not compiling the debugger code,
1380 * so store the line into the debugger's array of lines
1381 */
1382 update_debugger_info(NULL, buf+old_bufend_pos,
1383 new_bufend_pos-old_bufend_pos);
1384 }
1385 return got_some;
1386}
1387
1388/*
1389=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1390
1391Looks ahead one (Unicode) character in the text currently being lexed.
1392Returns the codepoint (unsigned integer value) of the next character,
1393or -1 if lexing has reached the end of the input text. To consume the
1394peeked character, use L</lex_read_unichar>.
1395
1396If the next character is in (or extends into) the next chunk of input
1397text, the next chunk will be read in. Normally the current chunk will be
1398discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1399then the current chunk will not be discarded.
1400
1401If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1402is encountered, an exception is generated.
1403
1404=cut
1405*/
1406
1407I32
1408Perl_lex_peek_unichar(pTHX_ U32 flags)
1409{
749123ff 1410 dVAR;
f0e67a1d
Z
1411 char *s, *bufend;
1412 if (flags & ~(LEX_KEEP_PREVIOUS))
1413 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1414 s = PL_parser->bufptr;
1415 bufend = PL_parser->bufend;
1416 if (UTF) {
1417 U8 head;
1418 I32 unichar;
1419 STRLEN len, retlen;
1420 if (s == bufend) {
1421 if (!lex_next_chunk(flags))
1422 return -1;
1423 s = PL_parser->bufptr;
1424 bufend = PL_parser->bufend;
1425 }
1426 head = (U8)*s;
54d004e8 1427 if (UTF8_IS_INVARIANT(head))
f0e67a1d 1428 return head;
54d004e8
KW
1429 if (UTF8_IS_START(head)) {
1430 len = UTF8SKIP(&head);
f0e67a1d
Z
1431 while ((STRLEN)(bufend-s) < len) {
1432 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1433 break;
1434 s = PL_parser->bufptr;
1435 bufend = PL_parser->bufend;
1436 }
1437 }
1438 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1439 if (retlen == (STRLEN)-1) {
1440 /* malformed UTF-8 */
1441 ENTER;
1442 SAVESPTR(PL_warnhook);
1443 PL_warnhook = PERL_WARNHOOK_FATAL;
1444 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1445 LEAVE;
1446 }
1447 return unichar;
1448 } else {
1449 if (s == bufend) {
1450 if (!lex_next_chunk(flags))
1451 return -1;
1452 s = PL_parser->bufptr;
1453 }
1454 return (U8)*s;
1455 }
1456}
1457
1458/*
1459=for apidoc Amx|I32|lex_read_unichar|U32 flags
1460
1461Reads the next (Unicode) character in the text currently being lexed.
1462Returns the codepoint (unsigned integer value) of the character read,
1463and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1464if lexing has reached the end of the input text. To non-destructively
1465examine the next character, use L</lex_peek_unichar> instead.
1466
1467If the next character is in (or extends into) the next chunk of input
1468text, the next chunk will be read in. Normally the current chunk will be
1469discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1470then the current chunk will not be discarded.
1471
1472If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1473is encountered, an exception is generated.
1474
1475=cut
1476*/
1477
1478I32
1479Perl_lex_read_unichar(pTHX_ U32 flags)
1480{
1481 I32 c;
1482 if (flags & ~(LEX_KEEP_PREVIOUS))
1483 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1484 c = lex_peek_unichar(flags);
1485 if (c != -1) {
1486 if (c == '\n')
83944c01 1487 COPLINE_INC_WITH_HERELINES;
d9018cbe
EB
1488 if (UTF)
1489 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1490 else
1491 ++(PL_parser->bufptr);
f0e67a1d
Z
1492 }
1493 return c;
1494}
1495
1496/*
1497=for apidoc Amx|void|lex_read_space|U32 flags
1498
1499Reads optional spaces, in Perl style, in the text currently being
1500lexed. The spaces may include ordinary whitespace characters and
1501Perl-style comments. C<#line> directives are processed if encountered.
1502L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1503at a non-space character (or the end of the input text).
1504
1505If spaces extend into the next chunk of input text, the next chunk will
1506be read in. Normally the current chunk will be discarded at the same
1507time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1508chunk will not be discarded.
1509
1510=cut
1511*/
1512
f0998909
Z
1513#define LEX_NO_NEXT_CHUNK 0x80000000
1514
f0e67a1d
Z
1515void
1516Perl_lex_read_space(pTHX_ U32 flags)
1517{
1518 char *s, *bufend;
1519 bool need_incline = 0;
f0998909 1520 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1521 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1522#ifdef PERL_MAD
1523 if (PL_skipwhite) {
1524 sv_free(PL_skipwhite);
1525 PL_skipwhite = NULL;
1526 }
1527 if (PL_madskills)
1528 PL_skipwhite = newSVpvs("");
1529#endif /* PERL_MAD */
1530 s = PL_parser->bufptr;
1531 bufend = PL_parser->bufend;
1532 while (1) {
1533 char c = *s;
1534 if (c == '#') {
1535 do {
1536 c = *++s;
1537 } while (!(c == '\n' || (c == 0 && s == bufend)));
1538 } else if (c == '\n') {
1539 s++;
1540 PL_parser->linestart = s;
1541 if (s == bufend)
1542 need_incline = 1;
1543 else
1544 incline(s);
1545 } else if (isSPACE(c)) {
1546 s++;
1547 } else if (c == 0 && s == bufend) {
1548 bool got_more;
1549#ifdef PERL_MAD
1550 if (PL_madskills)
1551 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1552#endif /* PERL_MAD */
f0998909
Z
1553 if (flags & LEX_NO_NEXT_CHUNK)
1554 break;
f0e67a1d 1555 PL_parser->bufptr = s;
83944c01 1556 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1557 got_more = lex_next_chunk(flags);
1558 CopLINE_dec(PL_curcop);
1559 s = PL_parser->bufptr;
1560 bufend = PL_parser->bufend;
1561 if (!got_more)
1562 break;
1563 if (need_incline && PL_parser->rsfp) {
1564 incline(s);
1565 need_incline = 0;
1566 }
1567 } else {
1568 break;
1569 }
1570 }
1571#ifdef PERL_MAD
1572 if (PL_madskills)
1573 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1574#endif /* PERL_MAD */
1575 PL_parser->bufptr = s;
1576}
1577
1578/*
ffb4593c
NT
1579 * S_incline
1580 * This subroutine has nothing to do with tilting, whether at windmills
1581 * or pinball tables. Its name is short for "increment line". It
57843af0 1582 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1583 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1584 * # line 500 "foo.pm"
1585 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1586 */
1587
76e3520e 1588STATIC void
d9095cec 1589S_incline(pTHX_ const char *s)
463ee0b2 1590{
97aff369 1591 dVAR;
d9095cec
NC
1592 const char *t;
1593 const char *n;
1594 const char *e;
8818d409 1595 line_t line_num;
463ee0b2 1596
7918f24d
NC
1597 PERL_ARGS_ASSERT_INCLINE;
1598
83944c01 1599 COPLINE_INC_WITH_HERELINES;
451f421f
FC
1600 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1601 && s+1 == PL_bufend && *s == ';') {
1602 /* fake newline in string eval */
1603 CopLINE_dec(PL_curcop);
1604 return;
1605 }
463ee0b2
LW
1606 if (*s++ != '#')
1607 return;
d4c19fe8
AL
1608 while (SPACE_OR_TAB(*s))
1609 s++;
73659bf1
GS
1610 if (strnEQ(s, "line", 4))
1611 s += 4;
1612 else
1613 return;
084592ab 1614 if (SPACE_OR_TAB(*s))
73659bf1 1615 s++;
4e553d73 1616 else
73659bf1 1617 return;
d4c19fe8
AL
1618 while (SPACE_OR_TAB(*s))
1619 s++;
463ee0b2
LW
1620 if (!isDIGIT(*s))
1621 return;
d4c19fe8 1622
463ee0b2
LW
1623 n = s;
1624 while (isDIGIT(*s))
1625 s++;
07714eb4 1626 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1627 return;
bf4acbe4 1628 while (SPACE_OR_TAB(*s))
463ee0b2 1629 s++;
73659bf1 1630 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1631 s++;
73659bf1
GS
1632 e = t + 1;
1633 }
463ee0b2 1634 else {
c35e046a
AL
1635 t = s;
1636 while (!isSPACE(*t))
1637 t++;
73659bf1 1638 e = t;
463ee0b2 1639 }
bf4acbe4 1640 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1641 e++;
1642 if (*e != '\n' && *e != '\0')
1643 return; /* false alarm */
1644
8818d409
FC
1645 line_num = atoi(n)-1;
1646
f4dd75d9 1647 if (t - s > 0) {
d9095cec 1648 const STRLEN len = t - s;
19bad673
NC
1649 SV *const temp_sv = CopFILESV(PL_curcop);
1650 const char *cf;
1651 STRLEN tmplen;
1652
1653 if (temp_sv) {
1654 cf = SvPVX(temp_sv);
1655 tmplen = SvCUR(temp_sv);
1656 } else {
1657 cf = NULL;
1658 tmplen = 0;
1659 }
1660
d1299d44 1661 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1662 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1663 * to *{"::_<newfilename"} */
44867030
NC
1664 /* However, the long form of evals is only turned on by the
1665 debugger - usually they're "(eval %lu)" */
1666 char smallbuf[128];
1667 char *tmpbuf;
1668 GV **gvp;
d9095cec 1669 STRLEN tmplen2 = len;
798b63bc 1670 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1671 tmpbuf = smallbuf;
1672 else
2ae0db35 1673 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1674 tmpbuf[0] = '_';
1675 tmpbuf[1] = '<';
2ae0db35 1676 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1677 tmplen += 2;
8a5ee598
RGS
1678 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1679 if (gvp) {
44867030
NC
1680 char *tmpbuf2;
1681 GV *gv2;
1682
1683 if (tmplen2 + 2 <= sizeof smallbuf)
1684 tmpbuf2 = smallbuf;
1685 else
1686 Newx(tmpbuf2, tmplen2 + 2, char);
1687
1688 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1689 /* Either they malloc'd it, or we malloc'd it,
1690 so no prefix is present in ours. */
1691 tmpbuf2[0] = '_';
1692 tmpbuf2[1] = '<';
1693 }
1694
1695 memcpy(tmpbuf2 + 2, s, tmplen2);
1696 tmplen2 += 2;
1697
8a5ee598 1698 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1699 if (!isGV(gv2)) {
8a5ee598 1700 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1701 /* adjust ${"::_<newfilename"} to store the new file name */
1702 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1703 /* The line number may differ. If that is the case,
1704 alias the saved lines that are in the array.
1705 Otherwise alias the whole array. */
1706 if (CopLINE(PL_curcop) == line_num) {
1707 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1708 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1709 }
1710 else if (GvAV(*gvp)) {
1711 AV * const av = GvAV(*gvp);
1712 const I32 start = CopLINE(PL_curcop)+1;
1713 I32 items = AvFILLp(av) - start;
1714 if (items > 0) {
1715 AV * const av2 = GvAVn(gv2);
1716 SV **svp = AvARRAY(av) + start;
1717 I32 l = (I32)line_num+1;
1718 while (items--)
1719 av_store(av2, l++, SvREFCNT_inc(*svp++));
1720 }
1721 }
e5527e4b 1722 }
44867030
NC
1723
1724 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1725 }
e66cf94c 1726 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1727 }
05ec9bb3 1728 CopFILE_free(PL_curcop);
d9095cec 1729 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1730 }
8818d409 1731 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1732}
1733
29595ff2 1734#ifdef PERL_MAD
cd81e915 1735/* skip space before PL_thistoken */
29595ff2
NC
1736
1737STATIC char *
5aaab254 1738S_skipspace0(pTHX_ char *s)
29595ff2 1739{
7918f24d
NC
1740 PERL_ARGS_ASSERT_SKIPSPACE0;
1741
29595ff2
NC
1742 s = skipspace(s);
1743 if (!PL_madskills)
1744 return s;
cd81e915
NC
1745 if (PL_skipwhite) {
1746 if (!PL_thiswhite)
6b29d1f5 1747 PL_thiswhite = newSVpvs("");
cd81e915
NC
1748 sv_catsv(PL_thiswhite, PL_skipwhite);
1749 sv_free(PL_skipwhite);
1750 PL_skipwhite = 0;
1751 }
1752 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1753 return s;
1754}
1755
cd81e915 1756/* skip space after PL_thistoken */
29595ff2
NC
1757
1758STATIC char *
5aaab254 1759S_skipspace1(pTHX_ char *s)
29595ff2 1760{
d4c19fe8 1761 const char *start = s;
29595ff2
NC
1762 I32 startoff = start - SvPVX(PL_linestr);
1763
7918f24d
NC
1764 PERL_ARGS_ASSERT_SKIPSPACE1;
1765
29595ff2
NC
1766 s = skipspace(s);
1767 if (!PL_madskills)
1768 return s;
1769 start = SvPVX(PL_linestr) + startoff;
cd81e915 1770 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1771 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1772 PL_thistoken = newSVpvn(tstart, start - tstart);
1773 }
1774 PL_realtokenstart = -1;
1775 if (PL_skipwhite) {
1776 if (!PL_nextwhite)
6b29d1f5 1777 PL_nextwhite = newSVpvs("");
cd81e915
NC
1778 sv_catsv(PL_nextwhite, PL_skipwhite);
1779 sv_free(PL_skipwhite);
1780 PL_skipwhite = 0;
29595ff2
NC
1781 }
1782 return s;
1783}
1784
1785STATIC char *
5aaab254 1786S_skipspace2(pTHX_ char *s, SV **svp)
29595ff2 1787{
c35e046a
AL
1788 char *start;
1789 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1790 const I32 startoff = s - SvPVX(PL_linestr);
1791
7918f24d
NC
1792 PERL_ARGS_ASSERT_SKIPSPACE2;
1793
29595ff2
NC
1794 s = skipspace(s);
1795 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1796 if (!PL_madskills || !svp)
1797 return s;
1798 start = SvPVX(PL_linestr) + startoff;
cd81e915 1799 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1800 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1801 PL_thistoken = newSVpvn(tstart, start - tstart);
1802 PL_realtokenstart = -1;
29595ff2 1803 }
cd81e915 1804 if (PL_skipwhite) {
29595ff2 1805 if (!*svp)
6b29d1f5 1806 *svp = newSVpvs("");
cd81e915
NC
1807 sv_setsv(*svp, PL_skipwhite);
1808 sv_free(PL_skipwhite);
1809 PL_skipwhite = 0;
29595ff2
NC
1810 }
1811
1812 return s;
1813}
1814#endif
1815
80a702cd 1816STATIC void
15f169a1 1817S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1818{
1819 AV *av = CopFILEAVx(PL_curcop);
1820 if (av) {
b9f83d2f 1821 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1822 if (orig_sv)
1823 sv_setsv(sv, orig_sv);
1824 else
1825 sv_setpvn(sv, buf, len);
80a702cd
RGS
1826 (void)SvIOK_on(sv);
1827 SvIV_set(sv, 0);
1828 av_store(av, (I32)CopLINE(PL_curcop), sv);
1829 }
1830}
1831
ffb4593c
NT
1832/*
1833 * S_skipspace
1834 * Called to gobble the appropriate amount and type of whitespace.
1835 * Skips comments as well.
1836 */
1837
76e3520e 1838STATIC char *
5aaab254 1839S_skipspace(pTHX_ char *s)
a687059c 1840{
5db06880 1841#ifdef PERL_MAD
f0e67a1d
Z
1842 char *start = s;
1843#endif /* PERL_MAD */
7918f24d 1844 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1845#ifdef PERL_MAD
cd81e915
NC
1846 if (PL_skipwhite) {
1847 sv_free(PL_skipwhite);
f0e67a1d 1848 PL_skipwhite = NULL;
5db06880 1849 }
f0e67a1d 1850#endif /* PERL_MAD */
3280af22 1851 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1852 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1853 s++;
f0e67a1d
Z
1854 } else {
1855 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1856 PL_bufptr = s;
f0998909
Z
1857 lex_read_space(LEX_KEEP_PREVIOUS |
1858 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1859 LEX_NO_NEXT_CHUNK : 0));
3280af22 1860 s = PL_bufptr;
f0e67a1d
Z
1861 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1862 if (PL_linestart > PL_bufptr)
1863 PL_bufptr = PL_linestart;
1864 return s;
463ee0b2 1865 }
5db06880 1866#ifdef PERL_MAD
f0e67a1d
Z
1867 if (PL_madskills)
1868 PL_skipwhite = newSVpvn(start, s-start);
1869#endif /* PERL_MAD */
5db06880 1870 return s;
a687059c 1871}
378cc40b 1872
ffb4593c
NT
1873/*
1874 * S_check_uni
1875 * Check the unary operators to ensure there's no ambiguity in how they're
1876 * used. An ambiguous piece of code would be:
1877 * rand + 5
1878 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1879 * the +5 is its argument.
1880 */
1881
76e3520e 1882STATIC void
cea2e8a9 1883S_check_uni(pTHX)
ba106d47 1884{
97aff369 1885 dVAR;
d4c19fe8
AL
1886 const char *s;
1887 const char *t;
2f3197b3 1888
3280af22 1889 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1890 return;
3280af22
NIS
1891 while (isSPACE(*PL_last_uni))
1892 PL_last_uni++;
c35e046a 1893 s = PL_last_uni;
8a2bca12 1894 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
c35e046a 1895 s++;
3280af22 1896 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1897 return;
6136c704 1898
9b387841
NC
1899 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1900 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1901 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1902}
1903
ffb4593c
NT
1904/*
1905 * LOP : macro to build a list operator. Its behaviour has been replaced
1906 * with a subroutine, S_lop() for which LOP is just another name.
1907 */
1908
a0d0e21e
LW
1909#define LOP(f,x) return lop(f,x,s)
1910
ffb4593c
NT
1911/*
1912 * S_lop
1913 * Build a list operator (or something that might be one). The rules:
1914 * - if we have a next token, then it's a list operator [why?]
1915 * - if the next thing is an opening paren, then it's a function
1916 * - else it's a list operator
1917 */
1918
76e3520e 1919STATIC I32
a0be28da 1920S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1921{
97aff369 1922 dVAR;
7918f24d
NC
1923
1924 PERL_ARGS_ASSERT_LOP;
1925
6154021b 1926 pl_yylval.ival = f;
35c8bce7 1927 CLINE;
3280af22
NIS
1928 PL_expect = x;
1929 PL_bufptr = s;
1930 PL_last_lop = PL_oldbufptr;
eb160463 1931 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1932#ifdef PERL_MAD
1933 if (PL_lasttoke)
78cdf107 1934 goto lstop;
5db06880 1935#else
3280af22 1936 if (PL_nexttoke)
78cdf107 1937 goto lstop;
5db06880 1938#endif
79072805 1939 if (*s == '(')
bbf60fe6 1940 return REPORT(FUNC);
29595ff2 1941 s = PEEKSPACE(s);
79072805 1942 if (*s == '(')
bbf60fe6 1943 return REPORT(FUNC);
78cdf107
Z
1944 else {
1945 lstop:
1946 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1947 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1948 return REPORT(LSTOP);
78cdf107 1949 }
79072805
LW
1950}
1951
5db06880
NC
1952#ifdef PERL_MAD
1953 /*
1954 * S_start_force
1955 * Sets up for an eventual force_next(). start_force(0) basically does
1956 * an unshift, while start_force(-1) does a push. yylex removes items
1957 * on the "pop" end.
1958 */
1959
1960STATIC void
1961S_start_force(pTHX_ int where)
1962{
1963 int i;
1964
cd81e915 1965 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1966 where = PL_lasttoke;
cd81e915
NC
1967 assert(PL_curforce < 0 || PL_curforce == where);
1968 if (PL_curforce != where) {
5db06880
NC
1969 for (i = PL_lasttoke; i > where; --i) {
1970 PL_nexttoke[i] = PL_nexttoke[i-1];
1971 }
1972 PL_lasttoke++;
1973 }
cd81e915 1974 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1975 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1976 PL_curforce = where;
1977 if (PL_nextwhite) {
5db06880 1978 if (PL_madskills)
6b29d1f5 1979 curmad('^', newSVpvs(""));
cd81e915 1980 CURMAD('_', PL_nextwhite);
5db06880
NC
1981 }
1982}
1983
1984STATIC void
1985S_curmad(pTHX_ char slot, SV *sv)
1986{
1987 MADPROP **where;
1988
1989 if (!sv)
1990 return;
cd81e915
NC
1991 if (PL_curforce < 0)
1992 where = &PL_thismad;
5db06880 1993 else
cd81e915 1994 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1995
cd81e915 1996 if (PL_faketokens)
76f68e9b 1997 sv_setpvs(sv, "");
5db06880
NC
1998 else {
1999 if (!IN_BYTES) {
2000 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2001 SvUTF8_on(sv);
2002 else if (PL_encoding) {
2003 sv_recode_to_utf8(sv, PL_encoding);
2004 }
2005 }
2006 }
2007
2008 /* keep a slot open for the head of the list? */
2009 if (slot != '_' && *where && (*where)->mad_key == '^') {
2010 (*where)->mad_key = slot;
daba3364 2011 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
2012 (*where)->mad_val = (void*)sv;
2013 }
2014 else
2015 addmad(newMADsv(slot, sv), where, 0);
2016}
2017#else
b3f24c00
MHM
2018# define start_force(where) NOOP
2019# define curmad(slot, sv) NOOP
5db06880
NC
2020#endif
2021
ffb4593c
NT
2022/*
2023 * S_force_next
9cbb5ea2 2024 * When the lexer realizes it knows the next token (for instance,
ffb4593c 2025 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 2026 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
2027 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2028 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
2029 */
2030
4e553d73 2031STATIC void
cea2e8a9 2032S_force_next(pTHX_ I32 type)
79072805 2033{
97aff369 2034 dVAR;
704d4215
GG
2035#ifdef DEBUGGING
2036 if (DEBUG_T_TEST) {
2037 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 2038 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
2039 }
2040#endif
5db06880 2041#ifdef PERL_MAD
cd81e915 2042 if (PL_curforce < 0)
5db06880 2043 start_force(PL_lasttoke);
cd81e915 2044 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
2045 if (PL_lex_state != LEX_KNOWNEXT)
2046 PL_lex_defer = PL_lex_state;
2047 PL_lex_state = LEX_KNOWNEXT;
2048 PL_lex_expect = PL_expect;
cd81e915 2049 PL_curforce = -1;
5db06880 2050#else
3280af22
NIS
2051 PL_nexttype[PL_nexttoke] = type;
2052 PL_nexttoke++;
2053 if (PL_lex_state != LEX_KNOWNEXT) {
2054 PL_lex_defer = PL_lex_state;
2055 PL_lex_expect = PL_expect;
2056 PL_lex_state = LEX_KNOWNEXT;
79072805 2057 }
5db06880 2058#endif
79072805
LW
2059}
2060
28ac2b49
Z
2061void
2062Perl_yyunlex(pTHX)
2063{
a7aaec61
Z
2064 int yyc = PL_parser->yychar;
2065 if (yyc != YYEMPTY) {
2066 if (yyc) {
2067 start_force(-1);
2068 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2069 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2070 PL_lex_allbrackets--;
a7aaec61 2071 PL_lex_brackets--;
78cdf107
Z
2072 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2073 } else if (yyc == '('/*)*/) {
2074 PL_lex_allbrackets--;
2075 yyc |= (2<<24);
a7aaec61
Z
2076 }
2077 force_next(yyc);
2078 }
28ac2b49
Z
2079 PL_parser->yychar = YYEMPTY;
2080 }
2081}
2082
d0a148a6 2083STATIC SV *
15f169a1 2084S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2085{
97aff369 2086 dVAR;
740cce10 2087 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2088 !IN_BYTES
2089 && UTF
2090 && !is_ascii_string((const U8*)start, len)
740cce10 2091 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2092 return sv;
2093}
2094
ffb4593c
NT
2095/*
2096 * S_force_word
2097 * When the lexer knows the next thing is a word (for instance, it has
2098 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2099 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2100 * lookahead.
ffb4593c
NT
2101 *
2102 * Arguments:
b1b65b59 2103 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2104 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2105 * int check_keyword : if true, Perl checks to make sure the word isn't
2106 * a keyword (do this if the word is a label, e.g. goto FOO)
2107 * int allow_pack : if true, : characters will also be allowed (require,
2108 * use, etc. do this)
9cbb5ea2 2109 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2110 */
2111
76e3520e 2112STATIC char *
5aaab254 2113S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2114{
97aff369 2115 dVAR;
eb578fdb 2116 char *s;
463ee0b2 2117 STRLEN len;
4e553d73 2118
7918f24d
NC
2119 PERL_ARGS_ASSERT_FORCE_WORD;
2120
29595ff2 2121 start = SKIPSPACE1(start);
463ee0b2 2122 s = start;
7e2040f0 2123 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2124 (allow_pack && *s == ':') ||
15f0808c 2125 (allow_initial_tick && *s == '\'') )
a0d0e21e 2126 {
3280af22 2127 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2128 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2129 return start;
cd81e915 2130 start_force(PL_curforce);
5db06880
NC
2131 if (PL_madskills)
2132 curmad('X', newSVpvn(start,s-start));
463ee0b2 2133 if (token == METHOD) {
29595ff2 2134 s = SKIPSPACE1(s);
463ee0b2 2135 if (*s == '(')
3280af22 2136 PL_expect = XTERM;
463ee0b2 2137 else {
3280af22 2138 PL_expect = XOPERATOR;
463ee0b2 2139 }
79072805 2140 }
e74e6b3d 2141 if (PL_madskills)
63575281 2142 curmad('g', newSVpvs( "forced" ));
9ded7720 2143 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2144 = (OP*)newSVOP(OP_CONST,0,
2145 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2146 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2147 force_next(token);
2148 }
2149 return s;
2150}
2151
ffb4593c
NT
2152/*
2153 * S_force_ident
9cbb5ea2 2154 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2155 * text only contains the "foo" portion. The first argument is a pointer
2156 * to the "foo", and the second argument is the type symbol to prefix.
2157 * Forces the next token to be a "WORD".
9cbb5ea2 2158 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2159 */
2160
76e3520e 2161STATIC void
5aaab254 2162S_force_ident(pTHX_ const char *s, int kind)
79072805 2163{
97aff369 2164 dVAR;
7918f24d
NC
2165
2166 PERL_ARGS_ASSERT_FORCE_IDENT;
2167
c9b48522
DD
2168 if (s[0]) {
2169 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
728847b1
BF
2170 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2171 UTF ? SVf_UTF8 : 0));
cd81e915 2172 start_force(PL_curforce);
9ded7720 2173 NEXTVAL_NEXTTOKE.opval = o;
79072805 2174 force_next(WORD);
748a9306 2175 if (kind) {
11343788 2176 o->op_private = OPpCONST_ENTERED;
55497cff 2177 /* XXX see note in pp_entereval() for why we forgo typo
2178 warnings if the symbol must be introduced in an eval.
2179 GSAR 96-10-12 */
90e5519e 2180 gv_fetchpvn_flags(s, len,
728847b1
BF
2181 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2182 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2183 kind == '$' ? SVt_PV :
2184 kind == '@' ? SVt_PVAV :
2185 kind == '%' ? SVt_PVHV :
a0d0e21e 2186 SVt_PVGV
90e5519e 2187 );
748a9306 2188 }
79072805
LW
2189 }
2190}
2191
3f33d153
FC
2192static void
2193S_force_ident_maybe_lex(pTHX_ char pit)
2194{
2195 start_force(PL_curforce);
2196 NEXTVAL_NEXTTOKE.ival = pit;
2197 force_next('p');
2198}
2199
1571675a
GS
2200NV
2201Perl_str_to_version(pTHX_ SV *sv)
2202{
2203 NV retval = 0.0;
2204 NV nshift = 1.0;
2205 STRLEN len;
cfd0369c 2206 const char *start = SvPV_const(sv,len);
9d4ba2ae 2207 const char * const end = start + len;
504618e9 2208 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2209
2210 PERL_ARGS_ASSERT_STR_TO_VERSION;
2211
1571675a 2212 while (start < end) {
ba210ebe 2213 STRLEN skip;
1571675a
GS
2214 UV n;
2215 if (utf)
9041c2e3 2216 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2217 else {
2218 n = *(U8*)start;
2219 skip = 1;
2220 }
2221 retval += ((NV)n)/nshift;
2222 start += skip;
2223 nshift *= 1000;
2224 }
2225 return retval;
2226}
2227
4e553d73 2228/*
ffb4593c
NT
2229 * S_force_version
2230 * Forces the next token to be a version number.
e759cc13
RGS
2231 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2232 * and if "guessing" is TRUE, then no new token is created (and the caller
2233 * must use an alternative parsing method).
ffb4593c
NT
2234 */
2235
76e3520e 2236STATIC char *
e759cc13 2237S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2238{
97aff369 2239 dVAR;
5f66b61c 2240 OP *version = NULL;
44dcb63b 2241 char *d;
5db06880
NC
2242#ifdef PERL_MAD
2243 I32 startoff = s - SvPVX(PL_linestr);
2244#endif
89bfa8cd 2245
7918f24d
NC
2246 PERL_ARGS_ASSERT_FORCE_VERSION;
2247
29595ff2 2248 s = SKIPSPACE1(s);
89bfa8cd 2249
44dcb63b 2250 d = s;
dd629d5b 2251 if (*d == 'v')
44dcb63b 2252 d++;
44dcb63b 2253 if (isDIGIT(*d)) {
e759cc13
RGS
2254 while (isDIGIT(*d) || *d == '_' || *d == '.')
2255 d++;
5db06880
NC
2256#ifdef PERL_MAD
2257 if (PL_madskills) {
cd81e915 2258 start_force(PL_curforce);
5db06880
NC
2259 curmad('X', newSVpvn(s,d-s));
2260 }
2261#endif
4e4da3ac 2262 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2263 SV *ver;
8d08d9ba 2264#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2265 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2266 setlocale(LC_NUMERIC, "C");
8d08d9ba 2267#endif
6154021b 2268 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2269#ifdef USE_LOCALE_NUMERIC
2270 setlocale(LC_NUMERIC, loc);
909d3787 2271 Safefree(loc);
8d08d9ba 2272#endif
6154021b 2273 version = pl_yylval.opval;
dd629d5b
GS
2274 ver = cSVOPx(version)->op_sv;
2275 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2276 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2277 SvNV_set(ver, str_to_version(ver));
1571675a 2278 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2279 }
89bfa8cd 2280 }
5db06880
NC
2281 else if (guessing) {
2282#ifdef PERL_MAD
2283 if (PL_madskills) {
cd81e915
NC
2284 sv_free(PL_nextwhite); /* let next token collect whitespace */
2285 PL_nextwhite = 0;
5db06880
NC
2286 s = SvPVX(PL_linestr) + startoff;
2287 }
2288#endif
e759cc13 2289 return s;
5db06880 2290 }
89bfa8cd 2291 }
2292
5db06880
NC
2293#ifdef PERL_MAD
2294 if (PL_madskills && !version) {
cd81e915
NC
2295 sv_free(PL_nextwhite); /* let next token collect whitespace */
2296 PL_nextwhite = 0;
5db06880
NC
2297 s = SvPVX(PL_linestr) + startoff;
2298 }
2299#endif
89bfa8cd 2300 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2301 start_force(PL_curforce);
9ded7720 2302 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2303 force_next(WORD);
89bfa8cd 2304
e759cc13 2305 return s;
89bfa8cd 2306}
2307
ffb4593c 2308/*
91152fc1
DG
2309 * S_force_strict_version
2310 * Forces the next token to be a version number using strict syntax rules.
2311 */
2312
2313STATIC char *
2314S_force_strict_version(pTHX_ char *s)
2315{
2316 dVAR;
2317 OP *version = NULL;
2318#ifdef PERL_MAD
2319 I32 startoff = s - SvPVX(PL_linestr);
2320#endif
2321 const char *errstr = NULL;
2322
2323 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2324
2325 while (isSPACE(*s)) /* leading whitespace */
2326 s++;
2327
2328 if (is_STRICT_VERSION(s,&errstr)) {
2329 SV *ver = newSV(0);
2330 s = (char *)scan_version(s, ver, 0);
2331 version = newSVOP(OP_CONST, 0, ver);
2332 }
4e4da3ac
Z
2333 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2334 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2335 {
91152fc1
DG
2336 PL_bufptr = s;
2337 if (errstr)
2338 yyerror(errstr); /* version required */
2339 return s;
2340 }
2341
2342#ifdef PERL_MAD
2343 if (PL_madskills && !version) {
2344 sv_free(PL_nextwhite); /* let next token collect whitespace */
2345 PL_nextwhite = 0;
2346 s = SvPVX(PL_linestr) + startoff;
2347 }
2348#endif
2349 /* NOTE: The parser sees the package name and the VERSION swapped */
2350 start_force(PL_curforce);
2351 NEXTVAL_NEXTTOKE.opval = version;
2352 force_next(WORD);
2353
2354 return s;
2355}
2356
2357/*
ffb4593c
NT
2358 * S_tokeq
2359 * Tokenize a quoted string passed in as an SV. It finds the next
2360 * chunk, up to end of string or a backslash. It may make a new
2361 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2362 * turns \\ into \.
2363 */
2364
76e3520e 2365STATIC SV *
cea2e8a9 2366S_tokeq(pTHX_ SV *sv)
79072805 2367{
97aff369 2368 dVAR;
eb578fdb
KW
2369 char *s;
2370 char *send;
2371 char *d;
b3ac6de7
IZ
2372 STRLEN len = 0;
2373 SV *pv = sv;
79072805 2374
7918f24d
NC
2375 PERL_ARGS_ASSERT_TOKEQ;
2376
79072805 2377 if (!SvLEN(sv))
b3ac6de7 2378 goto finish;
79072805 2379
a0d0e21e 2380 s = SvPV_force(sv, len);
21a311ee 2381 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2382 goto finish;
463ee0b2 2383 send = s + len;
dcb21ed6
NC
2384 /* This is relying on the SV being "well formed" with a trailing '\0' */
2385 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2386 s++;
2387 if (s == send)
b3ac6de7 2388 goto finish;
79072805 2389 d = s;
be4731d2 2390 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2391 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2392 }
79072805
LW
2393 while (s < send) {
2394 if (*s == '\\') {
a0d0e21e 2395 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2396 s++; /* all that, just for this */
2397 }
2398 *d++ = *s++;
2399 }
2400 *d = '\0';
95a20fc0 2401 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2402 finish:
3280af22 2403 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2404 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2405 return sv;
2406}
2407
ffb4593c
NT
2408/*
2409 * Now come three functions related to double-quote context,
2410 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2411 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2412 * interact with PL_lex_state, and create fake ( ... ) argument lists
2413 * to handle functions and concatenation.
ecd24171
DM
2414 * For example,
2415 * "foo\lbar"
2416 * is tokenised as
2417 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
ffb4593c
NT
2418 */
2419
2420/*
2421 * S_sublex_start
6154021b 2422 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2423 *
2424 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2425 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2426 *
2427 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2428 *
2429 * Everything else becomes a FUNC.
2430 *
2431 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2432 * had an OP_CONST or OP_READLINE). This just sets us up for a
2433 * call to S_sublex_push().
2434 */
2435
76e3520e 2436STATIC I32
cea2e8a9 2437S_sublex_start(pTHX)
79072805 2438{
97aff369 2439 dVAR;
eb578fdb 2440 const I32 op_type = pl_yylval.ival;
79072805
LW
2441
2442 if (op_type == OP_NULL) {
6154021b 2443 pl_yylval.opval = PL_lex_op;
5f66b61c 2444 PL_lex_op = NULL;
79072805
LW
2445 return THING;
2446 }
2447 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2448 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2449
2450 if (SvTYPE(sv) == SVt_PVIV) {
2451 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2452 STRLEN len;
96a5add6 2453 const char * const p = SvPV_const(sv, len);
740cce10 2454 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2455 SvREFCNT_dec(sv);
2456 sv = nsv;
4e553d73 2457 }
6154021b 2458 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2459 PL_lex_stuff = NULL;
6f33ba73
RGS
2460 /* Allow <FH> // "foo" */
2461 if (op_type == OP_READLINE)
2462 PL_expect = XTERMORDORDOR;
79072805
LW
2463 return THING;
2464 }
e3f73d4e
RGS
2465 else if (op_type == OP_BACKTICK && PL_lex_op) {
2466 /* readpipe() vas overriden */
2467 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2468 pl_yylval.opval = PL_lex_op;
9b201d7d 2469 PL_lex_op = NULL;
e3f73d4e
RGS
2470 PL_lex_stuff = NULL;
2471 return THING;
2472 }
79072805 2473
3280af22 2474 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2475 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2476 PL_sublex_info.sub_op = PL_lex_op;
2477 PL_lex_state = LEX_INTERPPUSH;
55497cff 2478
3280af22
NIS
2479 PL_expect = XTERM;
2480 if (PL_lex_op) {
6154021b 2481 pl_yylval.opval = PL_lex_op;
5f66b61c 2482 PL_lex_op = NULL;
55497cff 2483 return PMFUNC;
2484 }
2485 else
2486 return FUNC;
2487}
2488
ffb4593c
NT
2489/*
2490 * S_sublex_push
2491 * Create a new scope to save the lexing state. The scope will be
2492 * ended in S_sublex_done. Returns a '(', starting the function arguments
2493 * to the uc, lc, etc. found before.
2494 * Sets PL_lex_state to LEX_INTERPCONCAT.
2495 */
2496
76e3520e 2497STATIC I32
cea2e8a9 2498S_sublex_push(pTHX)
55497cff 2499{
27da23d5 2500 dVAR;
78a635de 2501 LEXSHARED *shared;
f46d017c 2502 ENTER;
55497cff 2503
3280af22 2504 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2505 SAVEBOOL(PL_lex_dojoin);
3280af22 2506 SAVEI32(PL_lex_brackets);
78cdf107 2507 SAVEI32(PL_lex_allbrackets);
b27dce25 2508 SAVEI32(PL_lex_formbrack);
78cdf107 2509 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2510 SAVEI32(PL_lex_casemods);
2511 SAVEI32(PL_lex_starts);
651b5b28 2512 SAVEI8(PL_lex_state);
7cc34111 2513 SAVESPTR(PL_lex_repl);
7766f137 2514 SAVEVPTR(PL_lex_inpat);
98246f1e 2515 SAVEI16(PL_lex_inwhat);
57843af0 2516 SAVECOPLINE(PL_curcop);
3280af22 2517 SAVEPPTR(PL_bufptr);
8452ff4b 2518 SAVEPPTR(PL_bufend);
3280af22
NIS
2519 SAVEPPTR(PL_oldbufptr);
2520 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2521 SAVEPPTR(PL_last_lop);
2522 SAVEPPTR(PL_last_uni);
3280af22
NIS
2523 SAVEPPTR(PL_linestart);
2524 SAVESPTR(PL_linestr);
8edd5f42
RGS
2525 SAVEGENERICPV(PL_lex_brackstack);
2526 SAVEGENERICPV(PL_lex_casestack);
78a635de 2527 SAVEGENERICPV(PL_parser->lex_shared);
3280af22 2528
99bd9d90 2529 /* The here-doc parser needs to be able to peek into outer lexing
60f40a38
FC
2530 scopes to find the body of the here-doc. So we put PL_linestr and
2531 PL_bufptr into lex_shared, to ‘share’ those values.
99bd9d90 2532 */
60f40a38
FC
2533 PL_parser->lex_shared->ls_linestr = PL_linestr;
2534 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
99bd9d90 2535
3280af22 2536 PL_linestr = PL_lex_stuff;
7cc34111 2537 PL_lex_repl = PL_sublex_info.repl;
a0714e2c 2538 PL_lex_stuff = NULL;
7cc34111 2539 PL_sublex_info.repl = NULL;
3280af22 2540
9cbb5ea2
GS
2541 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2542 = SvPVX(PL_linestr);
3280af22 2543 PL_bufend += SvCUR(PL_linestr);
bd61b366 2544 PL_last_lop = PL_last_uni = NULL;
3280af22 2545 SAVEFREESV(PL_linestr);
4dc843bc 2546 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
3280af22
NIS
2547
2548 PL_lex_dojoin = FALSE;
b27dce25 2549 PL_lex_brackets = PL_lex_formbrack = 0;
78cdf107
Z
2550 PL_lex_allbrackets = 0;
2551 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2552 Newx(PL_lex_brackstack, 120, char);
2553 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2554 PL_lex_casemods = 0;
2555 *PL_lex_casestack = '\0';
2556 PL_lex_starts = 0;
2557 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2558 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
78a635de
FC
2559
2560 Newxz(shared, 1, LEXSHARED);
2561 shared->ls_prev = PL_parser->lex_shared;
2562 PL_parser->lex_shared = shared;
3280af22
NIS
2563
2564 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2565 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2566 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2567 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2568 else
5f66b61c 2569 PL_lex_inpat = NULL;
79072805 2570
55497cff 2571 return '(';
79072805
LW
2572}
2573
ffb4593c
NT
2574/*
2575 * S_sublex_done
2576 * Restores lexer state after a S_sublex_push.
2577 */
2578
76e3520e 2579STATIC I32
cea2e8a9 2580S_sublex_done(pTHX)
79072805 2581{
27da23d5 2582 dVAR;
3280af22 2583 if (!PL_lex_starts++) {
396482e1 2584 SV * const sv = newSVpvs("");
9aa983d2
JH
2585 if (SvUTF8(PL_linestr))
2586 SvUTF8_on(sv);
3280af22 2587 PL_expect = XOPERATOR;
6154021b 2588 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2589 return THING;
2590 }
2591
3280af22
NIS
2592 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2593 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2594 return yylex();
79072805
LW
2595 }
2596
ffb4593c 2597 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2598 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2599 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2600 PL_linestr = PL_lex_repl;
2601 PL_lex_inpat = 0;
2602 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2603 PL_bufend += SvCUR(PL_linestr);
bd61b366 2604 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2605 PL_lex_dojoin = FALSE;
2606 PL_lex_brackets = 0;
78cdf107
Z
2607 PL_lex_allbrackets = 0;
2608 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2609 PL_lex_casemods = 0;
2610 *PL_lex_casestack = '\0';
2611 PL_lex_starts = 0;
25da4f38 2612 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2613 PL_lex_state = LEX_INTERPNORMAL;
2614 PL_lex_starts++;
e9fa98b2
HS
2615 /* we don't clear PL_lex_repl here, so that we can check later
2616 whether this is an evalled subst; that means we rely on the
2617 logic to ensure sublex_done() is called again only via the
2618 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2619 }
e9fa98b2 2620 else {
3280af22 2621 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2622 PL_lex_repl = NULL;
e9fa98b2 2623 }
79072805 2624 return ',';
ffed7fef
LW
2625 }
2626 else {
5db06880
NC
2627#ifdef PERL_MAD
2628 if (PL_madskills) {
cd81e915
NC
2629 if (PL_thiswhite) {
2630 if (!PL_endwhite)
6b29d1f5 2631 PL_endwhite = newSVpvs("");
cd81e915
NC
2632 sv_catsv(PL_endwhite, PL_thiswhite);
2633 PL_thiswhite = 0;
2634 }
2635 if (PL_thistoken)
76f68e9b 2636 sv_setpvs(PL_thistoken,"");
5db06880 2637 else
cd81e915 2638 PL_realtokenstart = -1;
5db06880
NC
2639 }
2640#endif
f46d017c 2641 LEAVE;
3280af22
NIS
2642 PL_bufend = SvPVX(PL_linestr);
2643 PL_bufend += SvCUR(PL_linestr);
2644 PL_expect = XOPERATOR;
09bef843 2645 PL_sublex_info.sub_inwhat = 0;
79072805 2646 return ')';
ffed7fef
LW
2647 }
2648}
2649
6f613c73
KW
2650PERL_STATIC_INLINE SV*
2651S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2652{
140b12ad
KW
2653 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2654 * interior, hence to the "}". Finds what the name resolves to, returning
2655 * an SV* containing it; NULL if no valid one found */
2656
dd2b1b72 2657 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
6f613c73 2658
0c415a79
KW
2659 HV * table;
2660 SV **cvp;
2661 SV *cv;
2662 SV *rv;
2663 HV *stash;
2664 const U8* first_bad_char_loc;
2665 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2666
6f613c73
KW
2667 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2668
107160e2
KW
2669 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2670 e - backslash_ptr,
2671 &first_bad_char_loc))
2672 {
2673 /* If warnings are on, this will print a more detailed analysis of what
2674 * is wrong than the error message below */
2675 utf8n_to_uvuni(first_bad_char_loc,
2676 e - ((char *) first_bad_char_loc),
2677 NULL, 0);
2678
2679 /* We deliberately don't try to print the malformed character, which
2680 * might not print very well; it also may be just the first of many
2681 * malformations, so don't print what comes after it */
2682 yyerror(Perl_form(aTHX_
2683 "Malformed UTF-8 character immediately after '%.*s'",
2684 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2685 return NULL;
2686 }
2687
2688 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2689 /* include the <}> */
2690 e - backslash_ptr + 1);
6f613c73 2691 if (! SvPOK(res)) {
b6407c49 2692 SvREFCNT_dec_NN(res);
6f613c73
KW
2693 return NULL;
2694 }
2695
0c415a79
KW
2696 /* See if the charnames handler is the Perl core's, and if so, we can skip
2697 * the validation needed for a user-supplied one, as Perl's does its own
2698 * validation. */
2699 table = GvHV(PL_hintgv); /* ^H */
2700 cvp = hv_fetchs(table, "charnames", FALSE);
67a057d6
FC
2701 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2702 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
0c415a79
KW
2703 {
2704 const char * const name = HvNAME(stash);
2705 if strEQ(name, "_charnames") {
2706 return res;
2707 }
2708 }
2709
bde9e88d
KW
2710 /* Here, it isn't Perl's charname handler. We can't rely on a
2711 * user-supplied handler to validate the input name. For non-ut8 input,
2712 * look to see that the first character is legal. Then loop through the
2713 * rest checking that each is a continuation */
6f613c73 2714
b6ba1137
KW
2715 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2716 * the same thing */
2717
b6ba1137 2718 if (! UTF) {
bde9e88d 2719 if (! isALPHAU(*s)) {
b6ba1137
KW
2720 goto bad_charname;
2721 }
bde9e88d
KW
2722 s++;
2723 while (s < e) {
2724 if (! isCHARNAME_CONT(*s)) {
b6ba1137
KW
2725 goto bad_charname;
2726 }
bd299e29
KW
2727 if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2728 Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
2729 }
bde9e88d 2730 s++;
b6ba1137 2731 }
bd299e29
KW
2732 if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2733 Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
2734 }
b6ba1137
KW
2735 }
2736 else {
bde9e88d
KW
2737 /* Similarly for utf8. For invariants can check directly; for other
2738 * Latin1, can calculate their code point and check; otherwise use a
2739 * swash */
2740 if (UTF8_IS_INVARIANT(*s)) {
2741 if (! isALPHAU(*s)) {
140b12ad
KW
2742 goto bad_charname;
2743 }
bde9e88d
KW
2744 s++;
2745 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2746 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
b6ba1137 2747 goto bad_charname;
6f613c73 2748 }
bde9e88d 2749 s += 2;
6f613c73 2750 }
bde9e88d
KW
2751 else {
2752 if (! PL_utf8_charname_begin) {
2753 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2754 PL_utf8_charname_begin = _core_swash_init("utf8",
2755 "_Perl_Charname_Begin",
2756 &PL_sv_undef,
2757 1, 0, NULL, &flags);
2758 }
2759 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2760 goto bad_charname;
2761 }
2762 s += UTF8SKIP(s);
2763 }
2764
2765 while (s < e) {
2766 if (UTF8_IS_INVARIANT(*s)) {
2767 if (! isCHARNAME_CONT(*s)) {
2768 goto bad_charname;
2769 }
bd299e29
KW
2770 if (*s == ' ' && *(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2771 Perl_warn(aTHX_ "A sequence of multiple spaces in a charnames alias definition is deprecated");
2772 }
bde9e88d
KW
2773 s++;
2774 }
2775 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2776 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2777 *(s+1)))))
2778 {
2779 goto bad_charname;
2780 }
2781 s += 2;
2782 }
2783 else {
2784 if (! PL_utf8_charname_continue) {
2785 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2786 PL_utf8_charname_continue = _core_swash_init("utf8",
2787 "_Perl_Charname_Continue",
2788 &PL_sv_undef,
2789 1, 0, NULL, &flags);
2790 }
2791 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2792 goto bad_charname;
2793 }
2794 s += UTF8SKIP(s);
6f613c73
KW
2795 }
2796 }
bd299e29
KW
2797 if (*(s-1) == ' ' && ckWARN(WARN_DEPRECATED)) {
2798 Perl_warn(aTHX_ "Trailing white-space in a charnames alias definition is deprecated");
2799 }
6f613c73
KW
2800 }
2801
94ca1619 2802 if (SvUTF8(res)) { /* Don't accept malformed input */
bde9e88d
KW
2803 const U8* first_bad_char_loc;
2804 STRLEN len;
2805 const char* const str = SvPV_const(res, len);
2806 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2807 /* If warnings are on, this will print a more detailed analysis of
2808 * what is wrong than the error message below */
2809 utf8n_to_uvuni(first_bad_char_loc,
2810 (char *) first_bad_char_loc - str,
2811 NULL, 0);
2812
2813 /* We deliberately don't try to print the malformed character,
2814 * which might not print very well; it also may be just the first
2815 * of many malformations, so don't print what comes after it */
2816 yyerror_pv(
2817 Perl_form(aTHX_
2818 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2819 (int) (e - backslash_ptr + 1), backslash_ptr,
2820 (int) ((char *) first_bad_char_loc - str), str
2821 ),
2822 SVf_UTF8);
2823 return NULL;
2824 }
2825 }
140b12ad 2826
bde9e88d 2827 return res;
140b12ad 2828
bde9e88d
KW
2829 bad_charname: {
2830 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2831
2832 /* The final %.*s makes sure that should the trailing NUL be missing
2833 * that this print won't run off the end of the string */
2834 yyerror_pv(
2835 Perl_form(aTHX_
2836 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2837 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2838 (int)(e - s + bad_char_size), s + bad_char_size
2839 ),
2840 UTF ? SVf_UTF8 : 0);
2841 return NULL;
2842 }
6f613c73
KW
2843}
2844
02aa26ce
NT
2845/*
2846 scan_const
2847
9da1dd8f
DM
2848 Extracts the next constant part of a pattern, double-quoted string,
2849 or transliteration. This is terrifying code.
2850
2851 For example, in parsing the double-quoted string "ab\x63$d", it would
2852 stop at the '$' and return an OP_CONST containing 'abc'.
02aa26ce 2853
94def140 2854 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2855 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2856 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2857
94def140
TS
2858 Returns a pointer to the character scanned up to. If this is
2859 advanced from the start pointer supplied (i.e. if anything was
9da1dd8f 2860 successfully parsed), will leave an OP_CONST for the substring scanned
6154021b 2861 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2862 by looking at the next characters herself.
2863
02aa26ce 2864 In patterns:
9da1dd8f
DM
2865 expand:
2866 \N{ABC} => \N{U+41.42.43}
2867
2868 pass through:
2869 all other \-char, including \N and \N{ apart from \N{ABC}
2870
2871 stops on:
2872 @ and $ where it appears to be a var, but not for $ as tail anchor
2873 \l \L \u \U \Q \E
2874 (?{ or (??{
2875
02aa26ce
NT
2876
2877 In transliterations:
2878 characters are VERY literal, except for - not at the start or end
94def140
TS
2879 of the string, which indicates a range. If the range is in bytes,
2880 scan_const expands the range to the full set of intermediate
2881 characters. If the range is in utf8, the hyphen is replaced with
2882 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2883
2884 In double-quoted strings:
2885 backslashes:
2886 double-quoted style: \r and \n
ff3f963a 2887 constants: \x31, etc.
94def140 2888 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2889 case and quoting: \U \Q \E
2890 stops on @ and $
2891
2892 scan_const does *not* construct ops to handle interpolated strings.
2893 It stops processing as soon as it finds an embedded $ or @ variable
2894 and leaves it to the caller to work out what's going on.
2895
94def140
TS
2896 embedded arrays (whether in pattern or not) could be:
2897 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2898
2899 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2900
2901 $ in pattern could be $foo or could be tail anchor. Assumption:
2902 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2903 followed by one of "()| \r\n\t"
02aa26ce 2904
9da1dd8f 2905 \1 (backreferences) are turned into $1 in substitutions
02aa26ce
NT
2906
2907 The structure of the code is
2908 while (there's a character to process) {
94def140
TS
2909 handle transliteration ranges
2910 skip regexp comments /(?#comment)/ and codes /(?{code})/
2911 skip #-initiated comments in //x patterns
2912 check for embedded arrays
02aa26ce
NT
2913 check for embedded scalars
2914 if (backslash) {
94def140 2915 deprecate \1 in substitution replacements
02aa26ce
NT
2916 handle string-changing backslashes \l \U \Q \E, etc.
2917 switch (what was escaped) {
94def140 2918 handle \- in a transliteration (becomes a literal -)
ff3f963a 2919 if a pattern and not \N{, go treat as regular character
94def140
TS
2920 handle \132 (octal characters)
2921 handle \x15 and \x{1234} (hex characters)
ff3f963a 2922 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2923 handle \cV (control characters)
2924 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2925 } (end switch)
77a135fe 2926 continue
02aa26ce 2927 } (end if backslash)
77a135fe 2928 handle regular character
02aa26ce 2929 } (end while character to read)
4e553d73 2930
02aa26ce
NT
2931*/
2932
76e3520e 2933STATIC char *
cea2e8a9 2934S_scan_const(pTHX_ char *start)
79072805 2935{
97aff369 2936 dVAR;
eb578fdb 2937 char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2938 SV *sv = newSV(send - start); /* sv for the constant. See
2939 note below on sizing. */
eb578fdb
KW
2940 char *s = start; /* start of the constant */
2941 char *d = SvPVX(sv); /* destination for copies */
02aa26ce 2942 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2943 bool didrange = FALSE; /* did we just finish a range? */
2866decb 2944 bool in_charclass = FALSE; /* within /[...]/ */
b953e60c
KW
2945 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2946 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2947 to be UTF8? But, this can
2948 show as true when the source
2949 isn't utf8, as for example
2950 when it is entirely composed
2951 of hex constants */
6f613c73 2952 SV *res; /* result from charnames */
77a135fe
KW
2953
2954 /* Note on sizing: The scanned constant is placed into sv, which is
2955 * initialized by newSV() assuming one byte of output for every byte of
2956 * input. This routine expects newSV() to allocate an extra byte for a
2957 * trailing NUL, which this routine will append if it gets to the end of
2958 * the input. There may be more bytes of input than output (eg., \N{LATIN
2959 * CAPITAL LETTER A}), or more output than input if the constant ends up
2960 * recoded to utf8, but each time a construct is found that might increase
2961 * the needed size, SvGROW() is called. Its size parameter each time is
2962 * based on the best guess estimate at the time, namely the length used so
2963 * far, plus the length the current construct will occupy, plus room for
2964 * the trailing NUL, plus one byte for every input byte still unscanned */
2965
c3320c2a
KW
2966 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2967 before set */
4c3a8340
TS
2968#ifdef EBCDIC
2969 UV literal_endpoint = 0;
e294cc5d 2970 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2971#endif
012bcf8d 2972
7918f24d
NC
2973 PERL_ARGS_ASSERT_SCAN_CONST;
2974
bb16bae8 2975 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2976 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2977 /* If we are doing a trans and we know we want UTF8 set expectation */
2978 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2979 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2980 }
2981
b899e89d
FC
2982 /* Protect sv from errors and fatal warnings. */
2983 ENTER_with_name("scan_const");
2984 SAVEFREESV(sv);
2b9d42f0 2985
79072805 2986 while (s < send || dorange) {
ff3f963a 2987
02aa26ce 2988 /* get transliterations out of the way (they're most literal) */
3280af22 2989 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2990 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2991 if (dorange) {
1ba5c669
JH
2992 I32 i; /* current expanded character */
2993 I32 min; /* first character in range */
2994 I32 max; /* last character in range */
02aa26ce 2995
e294cc5d
JH
2996#ifdef EBCDIC
2997 UV uvmax = 0;
2998#endif
2999
3000 if (has_utf8
3001#ifdef EBCDIC
3002 && !native_range
3003#endif
1953db30 3004 ) {
9d4ba2ae 3005 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
3006 char *e = d++;
3007 while (e-- > c)
3008 *(e + 1) = *e;
25716404 3009 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
3010 /* mark the range as done, and continue */
3011 dorange = FALSE;
3012 didrange = TRUE;
3013 continue;
3014 }
2b9d42f0 3015
95a20fc0 3016 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
3017#ifdef EBCDIC
3018 SvGROW(sv,
3019 SvLEN(sv) + (has_utf8 ?
3020 (512 - UTF_CONTINUATION_MARK +
3021 UNISKIP(0x100))
3022 : 256));
3023 /* How many two-byte within 0..255: 128 in UTF-8,
3024 * 96 in UTF-8-mod. */
3025#else
9cbb5ea2 3026 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 3027#endif
9cbb5ea2 3028 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
3029#ifdef EBCDIC
3030 if (has_utf8) {
3031 int j;
3032 for (j = 0; j <= 1; j++) {
3033 char * const c = (char*)utf8_hop((U8*)d, -1);
3034 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3035 if (j)
3036 min = (U8)uv;
3037 else if (uv < 256)
3038 max = (U8)uv;
3039 else {
3040 max = (U8)0xff; /* only to \xff */
3041 uvmax = uv; /* \x{100} to uvmax */
3042 }
3043 d = c; /* eat endpoint chars */
3044 }
3045 }
3046 else {
3047#endif
3048 d -= 2; /* eat the first char and the - */
3049 min = (U8)*d; /* first char in range */
3050 max = (U8)d[1]; /* last char in range */
3051#ifdef EBCDIC
3052 }
3053#endif
8ada0baa 3054
c2e66d9e 3055 if (min > max) {
01ec43d0 3056 Perl_croak(aTHX_
d1573ac7 3057 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 3058 (char)min, (char)max);
c2e66d9e
GS
3059 }
3060
c7f1f016 3061#ifdef EBCDIC
4c3a8340
TS
3062 if (literal_endpoint == 2 &&
3063 ((isLOWER(min) && isLOWER(max)) ||
3064 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
3065 if (isLOWER(min)) {
3066 for (i = min; i <= max; i++)
3067 if (isLOWER(i))
db42d148 3068 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3069 } else {
3070 for (i = min; i <= max; i++)
3071 if (isUPPER(i))
db42d148 3072 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3073 }
3074 }
3075 else
3076#endif
3077 for (i = min; i <= max; i++)
e294cc5d
JH
3078#ifdef EBCDIC
3079 if (has_utf8) {
3080 const U8 ch = (U8)NATIVE_TO_UTF(i);
3081 if (UNI_IS_INVARIANT(ch))
3082 *d++ = (U8)i;
3083 else {
3084 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3085 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3086 }
3087 }
3088 else
3089#endif
3090 *d++ = (char)i;
3091
3092#ifdef EBCDIC
3093 if (uvmax) {
3094 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3095 if (uvmax > 0x101)
3096 *d++ = (char)UTF_TO_NATIVE(0xff);
3097 if (uvmax > 0x100)
3098 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3099 }
3100#endif
02aa26ce
NT
3101
3102 /* mark the range as done, and continue */
79072805 3103 dorange = FALSE;
01ec43d0 3104 didrange = TRUE;
4c3a8340
TS
3105#ifdef EBCDIC
3106 literal_endpoint = 0;
3107#endif
79072805 3108 continue;
4e553d73 3109 }
02aa26ce
NT
3110
3111 /* range begins (ignore - as first or last char) */
79072805 3112 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 3113 if (didrange) {
1fafa243 3114 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 3115 }
e294cc5d
JH
3116 if (has_utf8
3117#ifdef EBCDIC
3118 && !native_range
3119#endif
3120 ) {
25716404 3121 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
3122 s++;
3123 continue;
3124 }
79072805
LW
3125 dorange = TRUE;
3126 s++;
01ec43d0
GS
3127 }
3128 else {
3129 didrange = FALSE;
4c3a8340
TS
3130#ifdef EBCDIC
3131 literal_endpoint = 0;
e294cc5d 3132 native_range = TRUE;
4c3a8340 3133#endif
01ec43d0 3134 }
79072805 3135 }
02aa26ce
NT
3136
3137 /* if we get here, we're not doing a transliteration */
3138
e4a2df84
DM
3139 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3140 char *s1 = s-1;
3141 int esc = 0;
3142 while (s1 >= start && *s1-- == '\\')
3143 esc = !esc;
3144 if (!esc)
3145 in_charclass = TRUE;
3146 }
2866decb 3147
e4a2df84
DM
3148 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3149 char *s1 = s-1;
3150 int esc = 0;
3151 while (s1 >= start && *s1-- == '\\')
3152 esc = !esc;
3153 if (!esc)
3154 in_charclass = FALSE;
3155 }
2866decb 3156
9da1dd8f
DM
3157 /* skip for regexp comments /(?#comment)/, except for the last
3158 * char, which will be done separately.
3159 * Stop on (?{..}) and friends */
3160
3280af22 3161 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 3162 if (s[2] == '#') {
e994fd66 3163 while (s+1 < send && *s != ')')
db42d148 3164 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94 3165 }
2866decb 3166 else if (!PL_lex_casemods && !in_charclass &&
d3cec5e5
DM
3167 ( s[2] == '{' /* This should match regcomp.c */
3168 || (s[2] == '?' && s[3] == '{')))
155aba94 3169 {
9da1dd8f 3170 break;
cc6b7395 3171 }
748a9306 3172 }
02aa26ce
NT
3173
3174 /* likewise skip #-initiated comments in //x patterns */
3280af22 3175 else if (*s == '#' && PL_lex_inpat &&
73134a2e 3176 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 3177 while (s+1 < send && *s != '\n')
db42d148 3178 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 3179 }
02aa26ce 3180
9da1dd8f
DM
3181 /* no further processing of single-quoted regex */
3182 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3183 goto default_action;
3184
5d1d4326 3185 /* check for embedded arrays
da6eedaa 3186 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 3187 */
1749ea0d 3188 else if (*s == '@' && s[1]) {
8a2bca12 3189 if (isWORDCHAR_lazy_if(s+1,UTF))
1749ea0d
TS
3190 break;
3191 if (strchr(":'{$", s[1]))
3192 break;
3193 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3194 break; /* in regexp, neither @+ nor @- are interpolated */
3195 }
02aa26ce
NT
3196
3197 /* check for embedded scalars. only stop if we're sure it's a
3198 variable.
3199 */
79072805 3200 else if (*s == '$') {
3280af22 3201 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 3202 break;
77772344 3203 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
3204 if (s[1] == '\\') {
3205 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3206 "Possible unintended interpolation of $\\ in regex");
77772344 3207 }
79072805 3208 break; /* in regexp, $ might be tail anchor */
77772344 3209 }
79072805 3210 }
02aa26ce 3211
2b9d42f0
NIS
3212 /* End of else if chain - OP_TRANS rejoin rest */
3213
02aa26ce 3214 /* backslashes */
79072805 3215 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
3216 char* e; /* Can be used for ending '}', etc. */
3217
79072805 3218 s++;
02aa26ce 3219
7d0fc23c
KW
3220 /* warn on \1 - \9 in substitution replacements, but note that \11
3221 * is an octal; and \19 is \1 followed by '9' */
3280af22 3222 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 3223 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 3224 {
a2a5de95 3225 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
3226 *--s = '$';
3227 break;
3228 }
02aa26ce
NT
3229
3230 /* string-change backslash escapes */
838f2281 3231 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
3232 --s;
3233 break;
3234 }
ff3f963a
KW
3235 /* In a pattern, process \N, but skip any other backslash escapes.
3236 * This is because we don't want to translate an escape sequence
3237 * into a meta symbol and have the regex compiler use the meta
3238 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3239 * in spite of this, we do have to process \N here while the proper
3240 * charnames handler is in scope. See bugs #56444 and #62056.
3241 * There is a complication because \N in a pattern may also stand
3242 * for 'match a non-nl', and not mean a charname, in which case its
3243 * processing should be deferred to the regex compiler. To be a
3244 * charname it must be followed immediately by a '{', and not look
3245 * like \N followed by a curly quantifier, i.e., not something like
3246 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3247 * quantifier */
3248 else if (PL_lex_inpat
3249 && (*s != 'N'
3250 || s[1] != '{'
4d68ffa0 3251 || regcurly(s + 1, FALSE)))
ff3f963a 3252 {
cc74c5bd
TS
3253 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3254 goto default_action;
3255 }
02aa26ce 3256
79072805 3257 switch (*s) {
02aa26ce
NT
3258
3259 /* quoted - in transliterations */
79072805 3260 case '-':
3280af22 3261 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
3262 *d++ = *s++;
3263 continue;
3264 }
3265 /* FALL THROUGH */
3266 default:
11b8faa4 3267 {
15861f94 3268 if ((isALPHANUMERIC(*s)))
a2a5de95
NC
3269 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3270 "Unrecognized escape \\%c passed through",
3271 *s);
11b8faa4 3272 /* default action is to copy the quoted character */
f9a63242 3273 goto default_action;
11b8faa4 3274 }
02aa26ce 3275
632403cc 3276 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
3277 case '0': case '1': case '2': case '3':
3278 case '4': case '5': case '6': case '7':
ba210ebe 3279 {
5e0a247b 3280 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
53305cf1 3281 STRLEN len = 3;
77a135fe 3282 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe 3283 s += len;
5e0a247b
KW
3284 if (len < 3 && s < send && isDIGIT(*s)
3285 && ckWARN(WARN_MISC))
3286 {
3287 Perl_warner(aTHX_ packWARN(WARN_MISC),
3288 "%s", form_short_octal_warning(s, len));
3289 }
ba210ebe 3290 }
012bcf8d 3291 goto NUM_ESCAPE_INSERT;
02aa26ce 3292
f0a2b745
KW
3293 /* eg. \o{24} indicates the octal constant \024 */
3294 case 'o':
3295 {
454155d9 3296 const char* error;
f0a2b745 3297
00ce5563 3298 bool valid = grok_bslash_o(&s, &uv, &error,
80f4111b
KW
3299 TRUE, /* Output warning */
3300 FALSE, /* Not strict */
17896a85
KW
3301 TRUE, /* Output warnings for
3302 non-portables */
80f4111b 3303 UTF);
454155d9 3304 if (! valid) {
f0a2b745
KW
3305 yyerror(error);
3306 continue;
3307 }
3308 goto NUM_ESCAPE_INSERT;
3309 }
3310
77a135fe 3311 /* eg. \x24 indicates the hex constant 0x24 */
79072805 3312 case 'x':
a0481293 3313 {
a0481293 3314 const char* error;
355860ce 3315
00ce5563 3316 bool valid = grok_bslash_x(&s, &uv, &error,
80f4111b
KW
3317 TRUE, /* Output warning */
3318 FALSE, /* Not strict */
17896a85
KW
3319 TRUE, /* Output warnings for
3320 non-portables */
80f4111b 3321 UTF);
a0481293
KW
3322 if (! valid) {
3323 yyerror(error);
355860ce 3324 continue;
ba210ebe 3325 }
012bcf8d
GS
3326 }
3327
3328 NUM_ESCAPE_INSERT:
ff3f963a
KW
3329 /* Insert oct or hex escaped character. There will always be
3330 * enough room in sv since such escapes will be longer than any
3331 * UTF-8 sequence they can end up as, except if they force us
3332 * to recode the rest of the string into utf8 */
ba7cea30 3333
77a135fe 3334 /* Here uv is the ordinal of the next character being added in
ff3f963a 3335 * unicode (converted from native). */
77a135fe 3336 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3337 if (!has_utf8 && uv > 255) {
77a135fe
KW
3338 /* Might need to recode whatever we have accumulated so
3339 * far if it contains any chars variant in utf8 or
3340 * utf-ebcdic. */
3341
3342 SvCUR_set(sv, d - SvPVX_const(sv));
3343 SvPOK_on(sv);
3344 *d = '\0';
77a135fe 3345 /* See Note on sizing above. */
7bf79863
KW
3346 sv_utf8_upgrade_flags_grow(sv,
3347 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3348 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3349 d = SvPVX(sv) + SvCUR(sv);
3350 has_utf8 = TRUE;
012bcf8d
GS
3351 }
3352
77a135fe
KW
3353 if (has_utf8) {
3354 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3355 if (PL_lex_inwhat == OP_TRANS &&
3356 PL_sublex_info.sub_op) {
3357 PL_sublex_info.sub_op->op_private |=
3358 (PL_lex_repl ? OPpTRANS_FROM_UTF
3359 : OPpTRANS_TO_UTF);
f9a63242 3360 }
e294cc5d
JH
3361#ifdef EBCDIC
3362 if (uv > 255 && !dorange)
3363 native_range = FALSE;
3364#endif
012bcf8d 3365 }
a0ed51b3 3366 else {
012bcf8d 3367 *d++ = (char)uv;
a0ed51b3 3368 }
012bcf8d
GS
3369 }
3370 else {
c4d5f83a 3371 *d++ = (char) uv;
a0ed51b3 3372 }
79072805 3373 continue;
02aa26ce 3374
4a2d328f 3375 case 'N':
ff3f963a
KW
3376 /* In a non-pattern \N must be a named character, like \N{LATIN
3377 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3378 * mean to match a non-newline. For non-patterns, named
3379 * characters are converted to their string equivalents. In
3380 * patterns, named characters are not converted to their
3381 * ultimate forms for the same reasons that other escapes
3382 * aren't. Instead, they are converted to the \N{U+...} form
3383 * to get the value from the charnames that is in effect right
3384 * now, while preserving the fact that it was a named character
3385 * so that the regex compiler knows this */
3386
3387 /* This section of code doesn't generally use the
3388 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3389 * a close examination of this macro and determined it is a
3390 * no-op except on utfebcdic variant characters. Every
3391 * character generated by this that would normally need to be
3392 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3393 * needed, and would complicate use of copy(). XXX There are
3394 * other parts of this file where the macro is used
3395 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3396
3397 /* The structure of this section of code (besides checking for
3398 * errors and upgrading to utf8) is:
3399 * Further disambiguate between the two meanings of \N, and if
3400 * not a charname, go process it elsewhere
0a96133f
KW
3401 * If of form \N{U+...}, pass it through if a pattern;
3402 * otherwise convert to utf8
3403 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3404 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3405
3406 /* Here, s points to the 'N'; the test below is guaranteed to
3407 * succeed if we are being called on a pattern as we already
3408 * know from a test above that the next character is a '{'.
3409 * On a non-pattern \N must mean 'named sequence, which
3410 * requires braces */
3411 s++;
3412 if (*s != '{') {
3413 yyerror("Missing braces on \\N{}");
3414 continue;
3415 }
3416 s++;
3417
0a96133f 3418 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3419 if (! (e = strchr(s, '}'))) {
3420 if (! PL_lex_inpat) {
5777a3f7 3421 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3422 } else {
3423 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3424 }
0a96133f 3425 continue;
ff3f963a 3426 }
cddc7ef4 3427
ff3f963a 3428 /* Here it looks like a named character */
cddc7ef4 3429
ff3f963a
KW
3430 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3431 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3432 | PERL_SCAN_DISALLOW_PREFIX;
3433 STRLEN len;
3434
3435 /* For \N{U+...}, the '...' is a unicode value even on
3436 * EBCDIC machines */
3437 s += 2; /* Skip to next char after the 'U+' */
3438 len = e - s;
3439 uv = grok_hex(s, &len, &flags, NULL);
3440 if (len == 0 || len != (STRLEN)(e - s)) {
3441 yyerror("Invalid hexadecimal number in \\N{U+...}");
3442 s = e + 1;
3443 continue;
3444 }
3445
3446 if (PL_lex_inpat) {
3447
e2a7e165
KW
3448 /* On non-EBCDIC platforms, pass through to the regex
3449 * compiler unchanged. The reason we evaluated the
3450 * number above is to make sure there wasn't a syntax
3451 * error. But on EBCDIC we convert to native so
3452 * downstream code can continue to assume it's native
3453 */
ff3f963a 3454 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3455#ifdef EBCDIC
3456 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3457 and the \0 */
3458 "\\N{U+%X}",
3459 (unsigned int) UNI_TO_NATIVE(uv));
3460#else
ff3f963a
KW
3461 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3462 d += e - s + 1;
e2a7e165 3463#endif
ff3f963a
KW
3464 }
3465 else { /* Not a pattern: convert the hex to string */
3466
3467 /* If destination is not in utf8, unconditionally
3468 * recode it to be so. This is because \N{} implies
3469 * Unicode semantics, and scalars have to be in utf8
3470 * to guarantee those semantics */
3471 if (! has_utf8) {
3472 SvCUR_set(sv, d - SvPVX_const(sv));
3473 SvPOK_on(sv);
3474 *d = '\0';
3475 /* See Note on sizing above. */
3476 sv_utf8_upgrade_flags_grow(
3477 sv,
3478 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3479 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3480 d = SvPVX(sv) + SvCUR(sv);
3481 has_utf8 = TRUE;
3482 }
3483
3484 /* Add the string to the output */
3485 if (UNI_IS_INVARIANT(uv)) {
3486 *d++ = (char) uv;
3487 }
3488 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3489 }
3490 }
6f613c73
KW
3491 else /* Here is \N{NAME} but not \N{U+...}. */
3492 if ((res = get_and_check_backslash_N_name(s, e)))
3493 {
3494 STRLEN len;
3495 const char *str = SvPV_const(res, len);
3496 if (PL_lex_inpat) {
ff3f963a
KW
3497
3498 if (! len) { /* The name resolved to an empty string */
3499 Copy("\\N{}", d, 4, char);
3500 d += 4;
3501 }
3502 else {
3503 /* In order to not lose information for the regex
3504 * compiler, pass the result in the specially made
3505 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3506 * the code points in hex of each character
3507 * returned by charnames */
3508
3509 const char *str_end = str + len;
3b721c4f 3510 const STRLEN off = d - SvPVX_const(sv);
94ca1619
KW
3511
3512 if (! SvUTF8(res)) {
3513 /* For the non-UTF-8 case, we can determine the
3514 * exact length needed without having to parse
3515 * through the string. Each character takes up
3516 * 2 hex digits plus either a trailing dot or
3517 * the "}" */
3518 d = off + SvGROW(sv, off
3519 + 3 * len
3520 + 6 /* For the "\N{U+", and
3521 trailing NUL */
3522 + (STRLEN)(send - e));
3523 Copy("\\N{U+", d, 5, char);
3524 d += 5;
3525 while (str < str_end) {
3526 char hex_string[4];
3527 my_snprintf(hex_string, sizeof(hex_string),
3528 "%02X.", (U8) *str);
3529 Copy(hex_string, d, 3, char);
3530 d += 3;
3531 str++;
3532 }
3533 d--; /* We will overwrite below the final
3534 dot with a right brace */
3535 }
3536 else {
1953db30
KW
3537 STRLEN char_length; /* cur char's byte length */
3538
3539 /* and the number of bytes after this is
3540 * translated into hex digits */
3541 STRLEN output_length;
3542
3543 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3544 * for max('U+', '.'); and 1 for NUL */
3545 char hex_string[2 * UTF8_MAXBYTES + 5];
3546
3547 /* Get the first character of the result. */
3548 U32 uv = utf8n_to_uvuni((U8 *) str,
3549 len,
3550 &char_length,
3551 UTF8_ALLOW_ANYUV);
3552 /* Convert first code point to hex, including
3553 * the boiler plate before it. For all these,
3554 * we convert to native format so that
3555 * downstream code can continue to assume the
3556 * input is native */
3557 output_length =
3558 my_snprintf(hex_string, sizeof(hex_string),
3559 "\\N{U+%X",
3560 (unsigned int) UNI_TO_NATIVE(uv));
3561
3562 /* Make sure there is enough space to hold it */
3563 d = off + SvGROW(sv, off
3564 + output_length
3565 + (STRLEN)(send - e)
3566 + 2); /* '}' + NUL */
3567 /* And output it */
3568 Copy(hex_string, d, output_length, char);
3569 d += output_length;
3570
3571 /* For each subsequent character, append dot and
3572 * its ordinal in hex */
3573 while ((str += char_length) < str_end) {
3574 const STRLEN off = d - SvPVX_const(sv);
3575 U32 uv = utf8n_to_uvuni((U8 *) str,
3576 str_end - str,
3577 &char_length,
3578 UTF8_ALLOW_ANYUV);
3579 output_length =
3580 my_snprintf(hex_string,
3581 sizeof(hex_string),
3582 ".%X",
3583 (unsigned int) UNI_TO_NATIVE(uv));
3584
3585 d = off + SvGROW(sv, off
3586 + output_length
3587 + (STRLEN)(send - e)
3588 + 2); /* '}' + NUL */
3589 Copy(hex_string, d, output_length, char);
3590 d += output_length;
3591 }
94ca1619 3592 }
ff3f963a
KW
3593
3594 *d++ = '}'; /* Done. Add the trailing brace */
3595 }
3596 }
3597 else { /* Here, not in a pattern. Convert the name to a
3598 * string. */
3599
3600 /* If destination is not in utf8, unconditionally
3601 * recode it to be so. This is because \N{} implies
3602 * Unicode semantics, and scalars have to be in utf8
3603 * to guarantee those semantics */
3604 if (! has_utf8) {
3605 SvCUR_set(sv, d - SvPVX_const(sv));
3606 SvPOK_on(sv);
3607 *d = '\0';
3608 /* See Note on sizing above. */
3609 sv_utf8_upgrade_flags_grow(sv,
3610 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3611 len + (STRLEN)(send - s) + 1);
3612 d = SvPVX(sv) + SvCUR(sv);
3613 has_utf8 = TRUE;
3614 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3615
3616 /* See Note on sizing above. (NOTE: SvCUR() is not
3617 * set correctly here). */
3618 const STRLEN off = d - SvPVX_const(sv);
3619 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3620 }
3621 Copy(str, d, len, char);
3622 d += len;
423cee85 3623 }
6f613c73 3624
423cee85 3625 SvREFCNT_dec(res);
cb233ae3 3626
cb233ae3 3627 } /* End \N{NAME} */
ff3f963a
KW
3628#ifdef EBCDIC
3629 if (!dorange)
3630 native_range = FALSE; /* \N{} is defined to be Unicode */
3631#endif
3632 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3633 continue;
3634
02aa26ce 3635 /* \c is a control character */
79072805
LW
3636 case 'c':
3637 s++;
961ce445 3638 if (s < send) {
17a3df4c 3639 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3640 }
961ce445
RGS
3641 else {
3642 yyerror("Missing control char name in \\c");
3643 }
79072805 3644 continue;
02aa26ce
NT
3645
3646 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3647 case 'b':
db42d148 3648 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3649 break;
3650 case 'n':
db42d148 3651 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3652 break;
3653 case 'r':
db42d148 3654 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3655 break;
3656 case 'f':
db42d148 3657 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3658 break;
3659 case 't':
db42d148 3660 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3661 break;
34a3fe2a 3662 case 'e':
db42d148 3663 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3664 break;
3665 case 'a':
db42d148 3666 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3667 break;
02aa26ce
NT
3668 } /* end switch */
3669
79072805
LW
3670 s++;
3671 continue;
02aa26ce 3672 } /* end if (backslash) */
4c3a8340
TS
3673#ifdef EBCDIC
3674 else
3675 literal_endpoint++;
3676#endif
02aa26ce 3677
f9a63242 3678 default_action:
77a135fe
KW
3679 /* If we started with encoded form, or already know we want it,
3680 then encode the next character */
3681 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3682 STRLEN len = 1;
77a135fe
KW
3683
3684
3685 /* One might think that it is wasted effort in the case of the
3686 * source being utf8 (this_utf8 == TRUE) to take the next character
3687 * in the source, convert it to an unsigned value, and then convert
3688 * it back again. But the source has not been validated here. The
3689 * routine that does the conversion checks for errors like
3690 * malformed utf8 */
3691
5f66b61c
AL
3692 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3693 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3694 if (!has_utf8) {
3695 SvCUR_set(sv, d - SvPVX_const(sv));
3696 SvPOK_on(sv);
3697 *d = '\0';
77a135fe 3698 /* See Note on sizing above. */
7bf79863
KW
3699 sv_utf8_upgrade_flags_grow(sv,
3700 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3701 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3702 d = SvPVX(sv) + SvCUR(sv);
3703 has_utf8 = TRUE;
3704 } else if (need > len) {
3705 /* encoded value larger than old, may need extra space (NOTE:
3706 * SvCUR() is not set correctly here). See Note on sizing
3707 * above. */
9d4ba2ae 3708 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3709 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3710 }
77a135fe
KW
3711 s += len;
3712
5f66b61c 3713 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3714#ifdef EBCDIC
3715 if (uv > 255 && !dorange)
3716 native_range = FALSE;
3717#endif
2b9d42f0
NIS
3718 }
3719 else {
3720 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3721 }
02aa26ce
NT
3722 } /* while loop to process each character */
3723
3724 /* terminate the string and set up the sv */
79072805 3725 *d = '\0';
95a20fc0 3726 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3727 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3728 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3729 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3730
79072805 3731 SvPOK_on(sv);
9f4817db 3732 if (PL_encoding && !has_utf8) {
d0063567
DK
3733 sv_recode_to_utf8(sv, PL_encoding);
3734 if (SvUTF8(sv))
3735 has_utf8 = TRUE;
9f4817db 3736 }
2b9d42f0 3737 if (has_utf8) {
7e2040f0 3738 SvUTF8_on(sv);
2b9d42f0 3739 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3740 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3741 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3742 }
3743 }
79072805 3744
02aa26ce 3745 /* shrink the sv if we allocated more than we used */
79072805 3746 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3747 SvPV_shrink_to_cur(sv);
79072805 3748 }
02aa26ce 3749
6154021b 3750 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3751 if (s > PL_bufptr) {
b899e89d 3752 SvREFCNT_inc_simple_void_NN(sv);
eb0d8d16
NC
3753 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3754 const char *const key = PL_lex_inpat ? "qr" : "q";
3755 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3756 const char *type;
3757 STRLEN typelen;
3758
3759 if (PL_lex_inwhat == OP_TRANS) {
3760 type = "tr";
3761 typelen = 2;
3762 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3763 type = "s";
3764 typelen = 1;
9da1dd8f
DM
3765 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3766 type = "q";
3767 typelen = 1;
eb0d8d16
NC
3768 } else {
3769 type = "qq";
3770 typelen = 2;
3771 }
3772
3773 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3774 type, typelen);
3775 }
6154021b 3776 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b899e89d
FC
3777 }
3778 LEAVE_with_name("scan_const");
79072805
LW
3779 return s;
3780}
3781
ffb4593c
NT
3782/* S_intuit_more
3783 * Returns TRUE if there's more to the expression (e.g., a subscript),
3784 * FALSE otherwise.
ffb4593c
NT
3785 *
3786 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3787 *
3788 * ->[ and ->{ return TRUE
3789 * { and [ outside a pattern are always subscripts, so return TRUE
3790 * if we're outside a pattern and it's not { or [, then return FALSE
3791 * if we're in a pattern and the first char is a {
3792 * {4,5} (any digits around the comma) returns FALSE
3793 * if we're in a pattern and the first char is a [
3794 * [] returns FALSE
3795 * [SOMETHING] has a funky algorithm to decide whether it's a
3796 * character class or not. It has to deal with things like
3797 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3798 * anything else returns TRUE
3799 */
3800
9cbb5ea2
GS
3801/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3802
76e3520e 3803STATIC int
5aaab254 3804S_intuit_more(pTHX_ char *s)
79072805 3805{
97aff369 3806 dVAR;
7918f24d
NC
3807
3808 PERL_ARGS_ASSERT_INTUIT_MORE;
3809
3280af22 3810 if (PL_lex_brackets)
79072805
LW
3811 return TRUE;
3812 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3813 return TRUE;
3814 if (*s != '{' && *s != '[')
3815 return FALSE;
3280af22 3816 if (!PL_lex_inpat)
79072805
LW
3817 return TRUE;
3818
3819 /* In a pattern, so maybe we have {n,m}. */
3820 if (*s == '{') {
4d68ffa0 3821 if (regcurly(s, FALSE)) {
79072805 3822 return FALSE;
b3155d95 3823 }
79072805 3824 return TRUE;
79072805
LW
3825 }
3826
3827 /* On the other hand, maybe we have a character class */
3828
3829 s++;
3830 if (*s == ']' || *s == '^')
3831 return FALSE;
3832 else {
ffb4593c 3833 /* this is terrifying, and it works */
99f2bdb7 3834 int weight;
79072805 3835 char seen[256];
9d4ba2ae 3836 const char * const send = strchr(s,']');
99f2bdb7 3837 unsigned char un_char, last_un_char;
3280af22 3838 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3839
3840 if (!send) /* has to be an expression */
3841 return TRUE;
99f2bdb7 3842 weight = 2; /* let's weigh the evidence */
79072805 3843
79072805
LW
3844 if (*s == '$')
3845 weight -= 3;
3846 else if (isDIGIT(*s)) {
3847 if (s[1] != ']') {
3848 if (isDIGIT(s[1]) && s[2] == ']')
3849 weight -= 10;
3850 }
3851 else
3852 weight -= 100;
3853 }
99f2bdb7
DD
3854 Zero(seen,256,char);
3855 un_char = 255;
79072805
LW
3856 for (; s < send; s++) {
3857 last_un_char = un_char;
3858 un_char = (unsigned char)*s;
3859 switch (*s) {
3860 case '@':
3861 case '&':
3862 case '$':
3863 weight -= seen[un_char] * 10;
8a2bca12 3864 if (isWORDCHAR_lazy_if(s+1,UTF)) {
90e5519e 3865 int len;
8903cb82 3866 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3867 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3868 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3869 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3870 weight -= 100;
3871 else
3872 weight -= 10;
3873 }
3874 else if (*s == '$' && s[1] &&
93a17b20
LW
3875 strchr("[#!%*<>()-=",s[1])) {
3876 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3877 weight -= 10;
3878 else
3879 weight -= 1;
3880 }
3881 break;
3882 case '\\':
3883 un_char = 254;
3884 if (s[1]) {
93a17b20 3885 if (strchr("wds]",s[1]))
79072805 3886 weight += 100;
10edeb5d 3887 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3888 weight += 1;
93a17b20 3889 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3890 weight += 40;
3891 else if (isDIGIT(s[1])) {
3892 weight += 40;
3893 while (s[1] && isDIGIT(s[1]))
3894 s++;
3895 }
3896 }
3897 else
3898 weight += 100;
3899 break;
3900 case '-':
3901 if (s[1] == '\\')
3902 weight += 50;
93a17b20 3903 if (strchr("aA01! ",last_un_char))
79072805 3904 weight += 30;
93a17b20 3905 if (strchr("zZ79~",s[1]))
79072805 3906 weight += 30;
f27ffc4a
GS
3907 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3908 weight -= 5; /* cope with negative subscript */
79072805
LW
3909 break;
3910 default:
0eb30aeb 3911 if (!isWORDCHAR(last_un_char)
3792a11b
NC
3912 && !(last_un_char == '$' || last_un_char == '@'
3913 || last_un_char == '&')
3914 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3915 char *d = tmpbuf;
3916 while (isALPHA(*s))
3917 *d++ = *s++;
3918 *d = '\0';
5458a98a 3919 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3920 weight -= 150;
3921 }
3922 if (un_char == last_un_char + 1)
3923 weight += 5;
3924 weight -= seen[un_char];
3925 break;
3926 }
3927 seen[un_char]++;
3928 }
3929 if (weight >= 0) /* probably a character class */
3930 return FALSE;
3931 }
3932
3933 return TRUE;
3934}
ffed7fef 3935
ffb4593c
NT
3936/*
3937 * S_intuit_method
3938 *
3939 * Does all the checking to disambiguate
3940 * foo bar
3941 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3942 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3943 *
3944 * First argument is the stuff after the first token, e.g. "bar".
3945 *
a4fd4a89 3946 * Not a method if foo is a filehandle.
ffb4593c
NT
3947 * Not a method if foo is a subroutine prototyped to take a filehandle.
3948 * Not a method if it's really "Foo $bar"
3949 * Method if it's "foo $bar"
3950 * Not a method if it's really "print foo $bar"
3951 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3952 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3953 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3954 * =>
3955 */
3956
76e3520e 3957STATIC int
62d55b22 3958S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3959{
97aff369 3960 dVAR;
a0d0e21e 3961 char *s = start + (*start == '$');
3280af22 3962 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3963 STRLEN len;
3964 GV* indirgv;
5db06880
NC
3965#ifdef PERL_MAD
3966 int soff;
3967#endif
a0d0e21e 3968
7918f24d
NC
3969 PERL_ARGS_ASSERT_INTUIT_METHOD;
3970
aca88b25 3971 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3972 return 0;
aca88b25 3973 if (cv && SvPOK(cv)) {
8fa6a409 3974 const char *proto = CvPROTO(cv);
62d55b22
NC
3975 if (proto) {
3976 if (*proto == ';')
3977 proto++;
3978 if (*proto == '*')
3979 return 0;
3980 }
a0d0e21e 3981 }
8903cb82 3982 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3983 /* start is the beginning of the possible filehandle/object,
3984 * and s is the end of it
3985 * tmpbuf is a copy of it
3986 */
3987
a0d0e21e 3988 if (*start == '$') {
39c012bc 3989 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3ef1310e 3990 isUPPER(*PL_tokenbuf))
a0d0e21e 3991 return 0;
5db06880
NC
3992#ifdef PERL_MAD
3993 len = start - SvPVX(PL_linestr);
3994#endif
29595ff2 3995 s = PEEKSPACE(s);
f0092767 3996#ifdef PERL_MAD
5db06880
NC
3997 start = SvPVX(PL_linestr) + len;
3998#endif
3280af22
NIS
3999 PL_bufptr = start;
4000 PL_expect = XREF;
a0d0e21e
LW
4001 return *s == '(' ? FUNCMETH : METHOD;
4002 }
5458a98a 4003 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
4004 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4005 len -= 2;
4006 tmpbuf[len] = '\0';
5db06880
NC
4007#ifdef PERL_MAD
4008 soff = s - SvPVX(PL_linestr);
4009#endif
c3e0f903
GS
4010 goto bare_package;
4011 }
38d2cf30 4012 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 4013 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
4014 return 0;
4015 /* filehandle or package name makes it a method */
39c012bc 4016 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
4017#ifdef PERL_MAD
4018 soff = s - SvPVX(PL_linestr);
4019#endif
29595ff2 4020 s = PEEKSPACE(s);
3280af22 4021 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 4022 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 4023 bare_package:
cd81e915 4024 start_force(PL_curforce);
9ded7720 4025 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 4026 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 4027 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 4028 if (PL_madskills)
38d2cf30
BF
4029 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4030 ( UTF ? SVf_UTF8 : 0 )));
3280af22 4031 PL_expect = XTERM;
a0d0e21e 4032 force_next(WORD);
3280af22 4033 PL_bufptr = s;
5db06880
NC
4034#ifdef PERL_MAD
4035 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4036#endif
a0d0e21e
LW
4037 return *s == '(' ? FUNCMETH : METHOD;
4038 }
4039 }
4040 return 0;
4041}
4042
16d20bd9 4043/* Encoded script support. filter_add() effectively inserts a
4e553d73 4044 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
4045 * Note that the filter function only applies to the current source file
4046 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4047 *
4048 * The datasv parameter (which may be NULL) can be used to pass
4049 * private data to this instance of the filter. The filter function
4050 * can recover the SV using the FILTER_DATA macro and use it to
4051 * store private buffers and state information.
4052 *
4053 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 4054 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 4055 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
4056 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4057 * private use must be set using malloc'd pointers.
4058 */
16d20bd9
AD
4059
4060SV *
864dbfa3 4061Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 4062{
97aff369 4063 dVAR;
f4c556ac 4064 if (!funcp)
a0714e2c 4065 return NULL;
f4c556ac 4066
5486870f
DM
4067 if (!PL_parser)
4068 return NULL;
4069
f1c31c52
FC
4070 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4071 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4072
3280af22
NIS
4073 if (!PL_rsfp_filters)
4074 PL_rsfp_filters = newAV();
16d20bd9 4075 if (!datasv)
561b68a9 4076 datasv = newSV(0);
862a34c6 4077 SvUPGRADE(datasv, SVt_PVIO);
8141890a 4078 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 4079 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 4080 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
4081 FPTR2DPTR(void *, IoANY(datasv)),
4082 SvPV_nolen(datasv)));
3280af22
NIS
4083 av_unshift(PL_rsfp_filters, 1);
4084 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
4085 if (
4086 !PL_parser->filtered
4087 && PL_parser->lex_flags & LEX_EVALBYTES
4088 && PL_bufptr < PL_bufend
4089 ) {
4090 const char *s = PL_bufptr;
4091 while (s < PL_bufend) {
4092 if (*s == '\n') {
4093 SV *linestr = PL_parser->linestr;
4094 char *buf = SvPVX(linestr);
4095 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4096 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4097 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4098 STRLEN const linestart_pos = PL_parser->linestart - buf;
4099 STRLEN const last_uni_pos =
4100 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4101 STRLEN const last_lop_pos =
4102 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4103 av_push(PL_rsfp_filters, linestr);
4104 PL_parser->linestr =
4105 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4106 buf = SvPVX(PL_parser->linestr);
4107 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4108 PL_parser->bufptr = buf + bufptr_pos;
4109 PL_parser->oldbufptr = buf + oldbufptr_pos;
4110 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4111 PL_parser->linestart = buf + linestart_pos;
4112 if (PL_parser->last_uni)
4113 PL_parser->last_uni = buf + last_uni_pos;
4114 if (PL_parser->last_lop)
4115 PL_parser->last_lop = buf + last_lop_pos;
4116 SvLEN(linestr) = SvCUR(linestr);
4117 SvCUR(linestr) = s-SvPVX(linestr);
4118 PL_parser->filtered = 1;
4119 break;
4120 }
4121 s++;
4122 }
4123 }
16d20bd9
AD
4124 return(datasv);
4125}
4e553d73 4126
16d20bd9
AD
4127
4128/* Delete most recently added instance of this filter function. */
a0d0e21e 4129void
864dbfa3 4130Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 4131{
97aff369 4132 dVAR;
e0c19803 4133 SV *datasv;
24801a4b 4134
7918f24d
NC
4135 PERL_ARGS_ASSERT_FILTER_DEL;
4136
33073adb 4137#ifdef DEBUGGING
55662e27
JH
4138 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4139 FPTR2DPTR(void*, funcp)));
33073adb 4140#endif
5486870f 4141 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
4142 return;
4143 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 4144 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 4145 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 4146 sv_free(av_pop(PL_rsfp_filters));
e50aee73 4147
16d20bd9
AD
4148 return;
4149 }
4150 /* we need to search for the correct entry and clear it */
cea2e8a9 4151 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
4152}
4153
4154
1de9afcd
RGS
4155/* Invoke the idxth filter function for the current rsfp. */
4156/* maxlen 0 = read one text line */
16d20bd9 4157I32
864dbfa3 4158Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 4159{
97aff369 4160 dVAR;
16d20bd9
AD
4161 filter_t funcp;
4162 SV *datasv = NULL;
f482118e
NC
4163 /* This API is bad. It should have been using unsigned int for maxlen.
4164 Not sure if we want to change the API, but if not we should sanity
4165 check the value here. */
60d63348 4166 unsigned int correct_length
39cd7a59
NC
4167 = maxlen < 0 ?
4168#ifdef PERL_MICRO
4169 0x7FFFFFFF
4170#else
4171 INT_MAX
4172#endif
4173 : maxlen;
e50aee73 4174
7918f24d
NC
4175 PERL_ARGS_ASSERT_FILTER_READ;
4176
5486870f 4177 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 4178 return -1;
1de9afcd 4179 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
4180 /* Provide a default input filter to make life easy. */
4181 /* Note that we append to the line. This is handy. */
f4c556ac
GS
4182 DEBUG_P(PerlIO_printf(Perl_debug_log,
4183 "filter_read %d: from rsfp\n", idx));
f482118e 4184 if (correct_length) {
16d20bd9
AD
4185 /* Want a block */
4186 int len ;
f54cb97a 4187 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
4188
4189 /* ensure buf_sv is large enough */
881d8f0a 4190 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
4191 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4192 correct_length)) <= 0) {
3280af22 4193 if (PerlIO_error(PL_rsfp))
37120919
AD
4194 return -1; /* error */
4195 else
4196 return 0 ; /* end of file */
4197 }
16d20bd9 4198 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 4199 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
4200 } else {
4201 /* Want a line */
3280af22
NIS
4202 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4203 if (PerlIO_error(PL_rsfp))
37120919
AD
4204 return -1; /* error */
4205 else
4206 return 0 ; /* end of file */
4207 }
16d20bd9
AD
4208 }
4209 return SvCUR(buf_sv);
4210 }
4211 /* Skip this filter slot if filter has been deleted */
1de9afcd 4212 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
4213 DEBUG_P(PerlIO_printf(Perl_debug_log,
4214 "filter_read %d: skipped (filter deleted)\n",
4215 idx));
f482118e 4216 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4217 }
60d63348
FC
4218 if (SvTYPE(datasv) != SVt_PVIO) {
4219 if (correct_length) {
4220 /* Want a block */
4221 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4222 if (!remainder) return 0; /* eof */
4223 if (correct_length > remainder) correct_length = remainder;
4224 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4225 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4226 } else {
4227 /* Want a line */
4228 const char *s = SvEND(datasv);
4229 const char *send = SvPVX(datasv) + SvLEN(datasv);
4230 while (s < send) {
4231 if (*s == '\n') {
4232 s++;
4233 break;
4234 }
4235 s++;
4236 }
4237 if (s == send) return 0; /* eof */
4238 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4239 SvCUR_set(datasv, s-SvPVX(datasv));
4240 }
4241 return SvCUR(buf_sv);
4242 }
16d20bd9 4243 /* Get function pointer hidden within datasv */
8141890a 4244 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4245 DEBUG_P(PerlIO_printf(Perl_debug_log,
4246 "filter_read %d: via function %p (%s)\n",
ca0270c4 4247 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4248 /* Call function. The function is expected to */
4249 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4250 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4251 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4252}
4253
76e3520e 4254STATIC char *
5aaab254 4255S_filter_gets(pTHX_ SV *sv, STRLEN append)
16d20bd9 4256{
97aff369 4257 dVAR;
7918f24d
NC
4258
4259 PERL_ARGS_ASSERT_FILTER_GETS;
4260
c39cd008 4261#ifdef PERL_CR_FILTER
3280af22 4262 if (!PL_rsfp_filters) {
c39cd008 4263 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4264 }
4265#endif
3280af22 4266 if (PL_rsfp_filters) {
55497cff 4267 if (!append)
4268 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4269 if (FILTER_READ(0, sv, 0) > 0)
4270 return ( SvPVX(sv) ) ;
4271 else
bd61b366 4272 return NULL ;
16d20bd9 4273 }
9d116dd7 4274 else
5cc814fd 4275 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4276}
4277
01ec43d0 4278STATIC HV *
9bde8eb0 4279S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4280{
97aff369 4281 dVAR;
def3634b
GS
4282 GV *gv;
4283
7918f24d
NC
4284 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4285
01ec43d0 4286 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4287 return PL_curstash;
4288
4289 if (len > 2 &&
4290 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4291 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4292 {
4293 return GvHV(gv); /* Foo:: */
def3634b
GS
4294 }
4295
4296 /* use constant CLASS => 'MyClass' */
acc6da14 4297 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4298 if (gv && GvCV(gv)) {
4299 SV * const sv = cv_const_sv(GvCV(gv));
4300 if (sv)
9bde8eb0 4301 pkgname = SvPV_const(sv, len);
def3634b
GS
4302 }
4303
acc6da14 4304 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4305}
a0d0e21e 4306
e3f73d4e
RGS
4307/*
4308 * S_readpipe_override
486ec47a 4309 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4310 * optree, provided sublex_start() is called afterwards.
4311 */
4312STATIC void
1d51329b 4313S_readpipe_override(pTHX)
e3f73d4e
RGS
4314{
4315 GV **gvp;
4316 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4317 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4318 if ((gv_readpipe
4319 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4320 ||
4321 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4322 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4323 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4324 {
4325 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4326 op_append_elem(OP_LIST,
e3f73d4e
RGS
4327 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4328 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4329 }
e3f73d4e
RGS
4330}
4331
5db06880
NC
4332#ifdef PERL_MAD
4333 /*
4334 * Perl_madlex
4335 * The intent of this yylex wrapper is to minimize the changes to the
4336 * tokener when we aren't interested in collecting madprops. It remains
4337 * to be seen how successful this strategy will be...
4338 */
4339
4340int
4341Perl_madlex(pTHX)
4342{
4343 int optype;
4344 char *s = PL_bufptr;
4345
cd81e915
NC
4346 /* make sure PL_thiswhite is initialized */
4347 PL_thiswhite = 0;
4348 PL_thismad = 0;
5db06880 4349
5db06880 4350 /* previous token ate up our whitespace? */
cd81e915
NC
4351 if (!PL_lasttoke && PL_nextwhite) {
4352 PL_thiswhite = PL_nextwhite;
4353 PL_nextwhite = 0;
5db06880
NC
4354 }
4355
4356 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4357 PL_realtokenstart = -1;
4358 PL_thistoken = 0;
5db06880
NC
4359 optype = yylex();
4360 s = PL_bufptr;
cd81e915 4361 assert(PL_curforce < 0);
5db06880 4362
cd81e915
NC
4363 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4364 if (!PL_thistoken) {
4365 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4366 PL_thistoken = newSVpvs("");
5db06880 4367 else {
c35e046a 4368 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4369 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4370 }
4371 }
cd81e915
NC
4372 if (PL_thismad) /* install head */
4373 CURMAD('X', PL_thistoken);
5db06880
NC
4374 }
4375
4376 /* last whitespace of a sublex? */
cd81e915
NC
4377 if (optype == ')' && PL_endwhite) {
4378 CURMAD('X', PL_endwhite);
5db06880
NC
4379 }
4380
cd81e915 4381 if (!PL_thismad) {
5db06880
NC
4382
4383 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4384 if (!PL_thiswhite && !PL_endwhite && !optype) {
4385 sv_free(PL_thistoken);
4386 PL_thistoken = 0;
5db06880
NC
4387 return 0;
4388 }
4389
4390 /* put off final whitespace till peg */
60d63348 4391 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4392 PL_nextwhite = PL_thiswhite;
4393 PL_thiswhite = 0;
5db06880 4394 }
cd81e915
NC
4395 else if (PL_thisopen) {
4396 CURMAD('q', PL_thisopen);
4397 if (PL_thistoken)
4398 sv_free(PL_thistoken);
4399 PL_thistoken = 0;
5db06880
NC
4400 }
4401 else {
4402 /* Store actual token text as madprop X */
cd81e915 4403 CURMAD('X', PL_thistoken);
5db06880
NC
4404 }
4405
cd81e915 4406 if (PL_thiswhite) {
5db06880 4407 /* add preceding whitespace as madprop _ */
cd81e915 4408 CURMAD('_', PL_thiswhite);
5db06880
NC
4409 }
4410
cd81e915 4411 if (PL_thisstuff) {
5db06880 4412 /* add quoted material as madprop = */
cd81e915 4413 CURMAD('=', PL_thisstuff);
5db06880
NC
4414 }
4415
cd81e915 4416 if (PL_thisclose) {
5db06880 4417 /* add terminating quote as madprop Q */
cd81e915 4418 CURMAD('Q', PL_thisclose);
5db06880
NC
4419 }
4420 }
4421
4422 /* special processing based on optype */
4423
4424 switch (optype) {
4425
4426 /* opval doesn't need a TOKEN since it can already store mp */
4427 case WORD:
4428 case METHOD:
4429 case FUNCMETH:
4430 case THING:
4431 case PMFUNC:
4432 case PRIVATEREF:
4433 case FUNC0SUB:
4434 case UNIOPSUB:
4435 case LSTOPSUB:
6154021b
RGS
4436 if (pl_yylval.opval)
4437 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4438 PL_thismad = 0;
5db06880
NC
4439 return optype;
4440
4441 /* fake EOF */
4442 case 0:
4443 optype = PEG;
cd81e915
NC
4444 if (PL_endwhite) {
4445 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4446 PL_endwhite = 0;
5db06880
NC
4447 }
4448 break;
4449
5504e6cf
FC
4450 /* pval */
4451 case LABEL:
4452 break;
4453
5db06880
NC
4454 case ']':
4455 case '}':
cd81e915 4456 if (PL_faketokens)
5db06880
NC
4457 break;
4458 /* remember any fake bracket that lexer is about to discard */
4459 if (PL_lex_brackets == 1 &&
4460 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4461 {
4462 s = PL_bufptr;
4463 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4464 s++;
4465 if (*s == '}') {
cd81e915
NC
4466 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4467 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4468 PL_thiswhite = 0;
5db06880
NC
4469 PL_bufptr = s - 1;
4470 break; /* don't bother looking for trailing comment */
4471 }
4472 else
4473 s = PL_bufptr;
4474 }
4475 if (optype == ']')
4476 break;
4477 /* FALLTHROUGH */
4478
4479 /* attach a trailing comment to its statement instead of next token */
4480 case ';':
cd81e915 4481 if (PL_faketokens)
5db06880
NC
4482 break;
4483 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4484 s = PL_bufptr;
4485 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4486 s++;
4487 if (*s == '\n' || *s == '#') {
4488 while (s < PL_bufend && *s != '\n')
4489 s++;
4490 if (s < PL_bufend)
4491 s++;
cd81e915
NC
4492 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4493 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4494 PL_thiswhite = 0;
5db06880
NC
4495 PL_bufptr = s;
4496 }
4497 }
4498 break;
4499
5db06880
NC
4500 /* ival */
4501 default:
4502 break;
4503
4504 }
4505
4506 /* Create new token struct. Note: opvals return early above. */
6154021b 4507 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4508 PL_thismad = 0;
5db06880
NC
4509 return optype;
4510}
4511#endif
4512
468aa647 4513STATIC char *
cc6ed77d 4514S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4515 dVAR;
7918f24d
NC
4516
4517 PERL_ARGS_ASSERT_TOKENIZE_USE;
4518
468aa647
RGS
4519 if (PL_expect != XSTATE)
4520 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4521 is_use ? "use" : "no"));
52d0e95b 4522 PL_expect = XTERM;
29595ff2 4523 s = SKIPSPACE1(s);
468aa647
RGS
4524 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4525 s = force_version(s, TRUE);
17c59fdf
VP
4526 if (*s == ';' || *s == '}'
4527 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4528 start_force(PL_curforce);
9ded7720 4529 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4530 force_next(WORD);
4531 }
4532 else if (*s == 'v') {
4533 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4534 s = force_version(s, FALSE);
4535 }
4536 }
4537 else {
4538 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4539 s = force_version(s, FALSE);
4540 }
6154021b 4541 pl_yylval.ival = is_use;
468aa647
RGS
4542 return s;
4543}
748a9306 4544#ifdef DEBUGGING
27da23d5 4545 static const char* const exp_name[] =
09bef843 4546 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4547 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4548 };
748a9306 4549#endif
463ee0b2 4550
361d9b55
Z
4551#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4552STATIC bool
4553S_word_takes_any_delimeter(char *p, STRLEN len)
4554{
4555 return (len == 1 && strchr("msyq", p[0])) ||
4556 (len == 2 && (
4557 (p[0] == 't' && p[1] == 'r') ||
4558 (p[0] == 'q' && strchr("qwxr", p[1]))));
4559}
4560
02aa26ce
NT
4561/*
4562 yylex
4563
4564 Works out what to call the token just pulled out of the input
4565 stream. The yacc parser takes care of taking the ops we return and
4566 stitching them into a tree.
4567
4568 Returns:
3875fc11 4569 The type of the next token
02aa26ce
NT
4570
4571 Structure:
3875fc11
FC
4572 Switch based on the current state:
4573 - if we already built the token before, use it
4574 - if we have a case modifier in a string, deal with that
4575 - handle other cases of interpolation inside a string
4576 - scan the next line if we are inside a format
4577 In the normal state switch on the next character:
4578 - default:
4579 if alphabetic, go to key lookup
4580 unrecoginized character - croak
4581 - 0/4/26: handle end-of-line or EOF
4582 - cases for whitespace
4583 - \n and #: handle comments and line numbers
4584 - various operators, brackets and sigils
4585 - numbers
4586 - quotes
4587 - 'v': vstrings (or go to key lookup)
4588 - 'x' repetition operator (or go to key lookup)
4589 - other ASCII alphanumerics (key lookup begins here):
4590 word before => ?
4591 keyword plugin
4592 scan built-in keyword (but do nothing with it yet)
4593 check for statement label
4594 check for lexical subs
4595 goto just_a_word if there is one
4596 see whether built-in keyword is overridden
4597 switch on keyword number:
4598 - default: just_a_word:
4599 not a built-in keyword; handle bareword lookup
4600 disambiguate between method and sub call
4601 fall back to bareword
4602 - cases for built-in keywords
02aa26ce
NT
4603*/
4604
20141f0e 4605
dba4d153
JH
4606#ifdef __SC__
4607#pragma segment Perl_yylex
4608#endif
dba4d153 4609int
dba4d153 4610Perl_yylex(pTHX)
20141f0e 4611{
97aff369 4612 dVAR;
eb578fdb
KW
4613 char *s = PL_bufptr;
4614 char *d;
463ee0b2 4615 STRLEN len;
705fe0e5
FC
4616 bool bof = FALSE;
4617 U8 formbrack = 0;
580561a3 4618 U32 fake_eof = 0;
a687059c 4619
10edeb5d
JH
4620 /* orig_keyword, gvp, and gv are initialized here because
4621 * jump to the label just_a_word_zero can bypass their
4622 * initialization later. */
4623 I32 orig_keyword = 0;
4624 GV *gv = NULL;
4625 GV **gvp = NULL;
4626
bbf60fe6 4627 DEBUG_T( {
396482e1 4628 SV* tmp = newSVpvs("");
b6007c36
DM
4629 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4630 (IV)CopLINE(PL_curcop),
4631 lex_state_names[PL_lex_state],
4632 exp_name[PL_expect],
4633 pv_display(tmp, s, strlen(s), 0, 60));
4634 SvREFCNT_dec(tmp);
bbf60fe6 4635 } );
02aa26ce 4636
3280af22 4637 switch (PL_lex_state) {
79072805
LW
4638#ifdef COMMENTARY
4639 case LEX_NORMAL: /* Some compilers will produce faster */
4640 case LEX_INTERPNORMAL: /* code if we comment these out. */
4641 break;
4642#endif
4643
09bef843 4644 /* when we've already built the next token, just pull it out of the queue */
79072805 4645 case LEX_KNOWNEXT:
5db06880
NC
4646#ifdef PERL_MAD
4647 PL_lasttoke--;
6154021b 4648 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4649 if (PL_madskills) {
cd81e915 4650 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4651 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4652 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4653 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4654 PL_thismad->mad_val = 0;
4655 mad_free(PL_thismad);
4656 PL_thismad = 0;
5db06880
NC
4657 }
4658 }
4659 if (!PL_lasttoke) {
4660 PL_lex_state = PL_lex_defer;
4661 PL_expect = PL_lex_expect;
4662 PL_lex_defer = LEX_NORMAL;
4663 if (!PL_nexttoke[PL_lasttoke].next_type)
4664 return yylex();
4665 }
4666#else
3280af22 4667 PL_nexttoke--;
6154021b 4668 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4669 if (!PL_nexttoke) {
4670 PL_lex_state = PL_lex_defer;
4671 PL_expect = PL_lex_expect;
4672 PL_lex_defer = LEX_NORMAL;
463ee0b2 4673 }
5db06880 4674#endif
a7aaec61
Z
4675 {
4676 I32 next_type;
5db06880 4677#ifdef PERL_MAD
a7aaec61 4678 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4679#else
a7aaec61 4680 next_type = PL_nexttype[PL_nexttoke];
5db06880 4681#endif
78cdf107
Z
4682 if (next_type & (7<<24)) {
4683 if (next_type & (1<<24)) {
4684 if (PL_lex_brackets > 100)
4685 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4686 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4687 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4688 }
4689 if (next_type & (2<<24))
4690 PL_lex_allbrackets++;
4691 if (next_type & (4<<24))
4692 PL_lex_allbrackets--;
a7aaec61
Z
4693 next_type &= 0xffff;
4694 }
3f33d153 4695 return REPORT(next_type == 'p' ? pending_ident() : next_type);
a7aaec61 4696 }
79072805 4697
02aa26ce 4698 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4699 when we get here, PL_bufptr is at the \
02aa26ce 4700 */
79072805
LW
4701 case LEX_INTERPCASEMOD:
4702#ifdef DEBUGGING
3280af22 4703 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4704 Perl_croak(aTHX_
4705 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4706 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4707#endif
02aa26ce 4708 /* handle \E or end of string */
3280af22 4709 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4710 /* if at a \E */
3280af22 4711 if (PL_lex_casemods) {
f54cb97a 4712 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4713 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4714
3792a11b 4715 if (PL_bufptr != PL_bufend
838f2281
BF
4716 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4717 || oldmod == 'F')) {
3280af22
NIS
4718 PL_bufptr += 2;
4719 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4720#ifdef PERL_MAD
4721 if (PL_madskills)
6b29d1f5 4722 PL_thistoken = newSVpvs("\\E");
5db06880 4723#endif
a0d0e21e 4724 }
78cdf107 4725 PL_lex_allbrackets--;
bbf60fe6 4726 return REPORT(')');
79072805 4727 }
52ed07f6
BF
4728 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4729 /* Got an unpaired \E */
4730 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4731 "Useless use of \\E");
52ed07f6 4732 }
5db06880
NC
4733#ifdef PERL_MAD
4734 while (PL_bufptr != PL_bufend &&
4735 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
1cac5c33
FC
4736 if (PL_madskills) {
4737 if (!PL_thiswhite)
6b29d1f5 4738 PL_thiswhite = newSVpvs("");
1cac5c33
FC
4739 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4740 }
5db06880
NC
4741 PL_bufptr += 2;
4742 }
4743#else
3280af22
NIS
4744 if (PL_bufptr != PL_bufend)
4745 PL_bufptr += 2;
5db06880 4746#endif
3280af22 4747 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4748 return yylex();
79072805
LW
4749 }
4750 else {
607df283 4751 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4752 "### Saw case modifier\n"); });
3280af22 4753 s = PL_bufptr + 1;
6e909404 4754 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4755#ifdef PERL_MAD
1cac5c33
FC
4756 if (PL_madskills) {
4757 if (!PL_thiswhite)
6b29d1f5 4758 PL_thiswhite = newSVpvs("");
1cac5c33
FC
4759 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4760 }
5db06880 4761#endif
89122651 4762 PL_bufptr = s + 3;
6e909404
JH
4763 PL_lex_state = LEX_INTERPCONCAT;
4764 return yylex();
a0d0e21e 4765 }
6e909404 4766 else {
90771dc0 4767 I32 tmp;
5db06880
NC
4768 if (!PL_madskills) /* when just compiling don't need correct */
4769 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4770 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4771 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4772 (strchr(PL_lex_casestack, 'L')
4773 || strchr(PL_lex_casestack, 'U')
4774 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4775 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4776 PL_lex_allbrackets--;
bbf60fe6 4777 return REPORT(')');
6e909404
JH
4778 }
4779 if (PL_lex_casemods > 10)
4780 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4781 PL_lex_casestack[PL_lex_casemods++] = *s;
4782 PL_lex_casestack[PL_lex_casemods] = '\0';
4783 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4784 start_force(PL_curforce);
9ded7720 4785 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4786 force_next((2<<24)|'(');
cd81e915 4787 start_force(PL_curforce);
6e909404 4788 if (*s == 'l')
9ded7720 4789 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4790 else if (*s == 'u')
9ded7720 4791 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4792 else if (*s == 'L')
9ded7720 4793 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4794 else if (*s == 'U')
9ded7720 4795 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4796 else if (*s == 'Q')
9ded7720 4797 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4798 else if (*s == 'F')
4799 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4800 else
5637ef5b 4801 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4802 if (PL_madskills) {
a5849ce5
NC
4803 SV* const tmpsv = newSVpvs("\\ ");
4804 /* replace the space with the character we want to escape
4805 */
4806 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4807 curmad('_', tmpsv);
4808 }
6e909404 4809 PL_bufptr = s + 1;
a0d0e21e 4810 }
79072805 4811 force_next(FUNC);
3280af22
NIS
4812 if (PL_lex_starts) {
4813 s = PL_bufptr;
4814 PL_lex_starts = 0;
5db06880
NC
4815#ifdef PERL_MAD
4816 if (PL_madskills) {
cd81e915
NC
4817 if (PL_thistoken)
4818 sv_free(PL_thistoken);
6b29d1f5 4819 PL_thistoken = newSVpvs("");
5db06880
NC
4820 }
4821#endif
131b3ad0
DM
4822 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4823 if (PL_lex_casemods == 1 && PL_lex_inpat)
4824 OPERATOR(',');
4825 else
4826 Aop(OP_CONCAT);
79072805
LW
4827 }
4828 else
cea2e8a9 4829 return yylex();
79072805
LW
4830 }
4831
55497cff 4832 case LEX_INTERPPUSH:
bbf60fe6 4833 return REPORT(sublex_push());
55497cff 4834
79072805 4835 case LEX_INTERPSTART:
3280af22 4836 if (PL_bufptr == PL_bufend)
bbf60fe6 4837 return REPORT(sublex_done());
9da1dd8f 4838 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
b6007c36 4839 "### Interpolated variable\n"); });
3280af22
NIS
4840 PL_expect = XTERM;
4841 PL_lex_dojoin = (*PL_bufptr == '@');
4842 PL_lex_state = LEX_INTERPNORMAL;
4843 if (PL_lex_dojoin) {
cd81e915 4844 start_force(PL_curforce);
9ded7720 4845 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4846 force_next(',');
cd81e915 4847 start_force(PL_curforce);
a0d0e21e 4848 force_ident("\"", '$');
cd81e915 4849 start_force(PL_curforce);
9ded7720 4850 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4851 force_next('$');
cd81e915 4852 start_force(PL_curforce);
9ded7720 4853 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4854 force_next((2<<24)|'(');
cd81e915 4855 start_force(PL_curforce);
9ded7720 4856 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4857 force_next(FUNC);
4858 }
9da1dd8f
DM
4859 /* Convert (?{...}) and friends to 'do {...}' */
4860 if (PL_lex_inpat && *PL_bufptr == '(') {
3328ab5a 4861 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9da1dd8f
DM
4862 PL_bufptr += 2;
4863 if (*PL_bufptr != '{')
4864 PL_bufptr++;
6165f85b
DM
4865 start_force(PL_curforce);
4866 /* XXX probably need a CURMAD(something) here */
9da1dd8f
DM
4867 PL_expect = XTERMBLOCK;
4868 force_next(DO);
4869 }
4870
3280af22
NIS
4871 if (PL_lex_starts++) {
4872 s = PL_bufptr;
5db06880
NC
4873#ifdef PERL_MAD
4874 if (PL_madskills) {
cd81e915
NC
4875 if (PL_thistoken)
4876 sv_free(PL_thistoken);
6b29d1f5 4877 PL_thistoken = newSVpvs("");
5db06880
NC
4878 }
4879#endif
131b3ad0
DM
4880 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4881 if (!PL_lex_casemods && PL_lex_inpat)
4882 OPERATOR(',');
4883 else
4884 Aop(OP_CONCAT);
79072805 4885 }
cea2e8a9 4886 return yylex();
79072805
LW
4887
4888 case LEX_INTERPENDMAYBE:
3280af22
NIS
4889 if (intuit_more(PL_bufptr)) {
4890 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4891 break;
4892 }
4893 /* FALL THROUGH */
4894
4895 case LEX_INTERPEND:
3280af22
NIS
4896 if (PL_lex_dojoin) {
4897 PL_lex_dojoin = FALSE;
4898 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4899#ifdef PERL_MAD
4900 if (PL_madskills) {
cd81e915
NC
4901 if (PL_thistoken)
4902 sv_free(PL_thistoken);
6b29d1f5 4903 PL_thistoken = newSVpvs("");
5db06880
NC
4904 }
4905#endif
78cdf107 4906 PL_lex_allbrackets--;
bbf60fe6 4907 return REPORT(')');
79072805 4908 }
43a16006 4909 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4910 && SvEVALED(PL_lex_repl))
43a16006 4911 {
e9fa98b2 4912 if (PL_bufptr != PL_bufend)
cea2e8a9 4913 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4914 PL_lex_repl = NULL;
e9fa98b2 4915 }
db444266
FC
4916 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4917 re_eval_str. If the here-doc body’s length equals the previous
4918 value of re_eval_start, re_eval_start will now be null. So
4919 check re_eval_str as well. */
3328ab5a
FC
4920 if (PL_parser->lex_shared->re_eval_start
4921 || PL_parser->lex_shared->re_eval_str) {
db444266 4922 SV *sv;
9da1dd8f
DM
4923 if (*PL_bufptr != ')')
4924 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4925 PL_bufptr++;
4926 /* having compiled a (?{..}) expression, return the original
4927 * text too, as a const */
3328ab5a
FC
4928 if (PL_parser->lex_shared->re_eval_str) {
4929 sv = PL_parser->lex_shared->re_eval_str;
4930 PL_parser->lex_shared->re_eval_str = NULL;
4931 SvCUR_set(sv,
4932 PL_bufptr - PL_parser->lex_shared->re_eval_start);
db444266
FC
4933 SvPV_shrink_to_cur(sv);
4934 }
3328ab5a
FC
4935 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4936 PL_bufptr - PL_parser->lex_shared->re_eval_start);
6165f85b
DM
4937 start_force(PL_curforce);
4938 /* XXX probably need a CURMAD(something) here */
4939 NEXTVAL_NEXTTOKE.opval =
9da1dd8f 4940 (OP*)newSVOP(OP_CONST, 0,
db444266 4941 sv);
9da1dd8f 4942 force_next(THING);
3328ab5a 4943 PL_parser->lex_shared->re_eval_start = NULL;
9da1dd8f
DM
4944 PL_expect = XTERM;
4945 return REPORT(',');
4946 }
4947
79072805
LW
4948 /* FALLTHROUGH */
4949 case LEX_INTERPCONCAT:
4950#ifdef DEBUGGING
3280af22 4951 if (PL_lex_brackets)
5637ef5b
NC
4952 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4953 (long) PL_lex_brackets);
79072805 4954#endif
3280af22 4955 if (PL_bufptr == PL_bufend)
bbf60fe6 4956 return REPORT(sublex_done());
79072805 4957
9da1dd8f
DM
4958 /* m'foo' still needs to be parsed for possible (?{...}) */
4959 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
3280af22 4960 SV *sv = newSVsv(PL_linestr);
9da1dd8f 4961 sv = tokeq(sv);
6154021b 4962 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4963 s = PL_bufend;
79072805
LW
4964 }
4965 else {
3280af22 4966 s = scan_const(PL_bufptr);
79072805 4967 if (*s == '\\')
3280af22 4968 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4969 else
3280af22 4970 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4971 }
4972
3280af22 4973 if (s != PL_bufptr) {
cd81e915 4974 start_force(PL_curforce);
5db06880
NC
4975 if (PL_madskills) {
4976 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4977 }
6154021b 4978 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4979 PL_expect = XTERM;
79072805 4980 force_next(THING);
131b3ad0 4981 if (PL_lex_starts++) {
5db06880
NC
4982#ifdef PERL_MAD
4983 if (PL_madskills) {
cd81e915
NC
4984 if (PL_thistoken)
4985 sv_free(PL_thistoken);
6b29d1f5 4986 PL_thistoken = newSVpvs("");
5db06880
NC
4987 }
4988#endif
131b3ad0
DM
4989 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4990 if (!PL_lex_casemods && PL_lex_inpat)
4991 OPERATOR(',');
4992 else
4993 Aop(OP_CONCAT);
4994 }
79072805 4995 else {
3280af22 4996 PL_bufptr = s;
cea2e8a9 4997 return yylex();
79072805
LW
4998 }
4999 }
5000
cea2e8a9 5001 return yylex();
a0d0e21e 5002 case LEX_FORMLINE:
3280af22
NIS
5003 s = scan_formline(PL_bufptr);
5004 if (!PL_lex_formbrack)
7c70caa5 5005 {
705fe0e5 5006 formbrack = 1;
a0d0e21e 5007 goto rightbracket;
7c70caa5 5008 }
705fe0e5
FC
5009 PL_bufptr = s;
5010 return yylex();
79072805
LW
5011 }
5012
3280af22
NIS
5013 s = PL_bufptr;
5014 PL_oldoldbufptr = PL_oldbufptr;
5015 PL_oldbufptr = s;
463ee0b2
LW
5016
5017 retry:
5db06880 5018#ifdef PERL_MAD
cd81e915
NC
5019 if (PL_thistoken) {
5020 sv_free(PL_thistoken);
5021 PL_thistoken = 0;
5db06880 5022 }
cd81e915 5023 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 5024#endif
378cc40b
LW
5025 switch (*s) {
5026 default:
7e2040f0 5027 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 5028 goto keylookup;
b1fc3636 5029 {
e2f06df0
BF
5030 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5031 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5032 UTF8SKIP(s),
5033 SVs_TEMP | SVf_UTF8),
5034 10, UNI_DISPLAY_ISPRINT))
5035 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
b1fc3636
CJ
5036 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5037 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5038 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5039 } else {
5040 d = PL_linestart;
5041 }
5042 *s = '\0';
e2f06df0
BF
5043 sv_setpv(dsv, d);
5044 if (UTF)
5045 SvUTF8_on(dsv);
5046 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
b1fc3636 5047 }
e929a76b
LW
5048 case 4:
5049 case 26:
5050 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 5051 case 0:
5db06880
NC
5052#ifdef PERL_MAD
5053 if (PL_madskills)
cd81e915 5054 PL_faketokens = 0;
5db06880 5055#endif
60d63348 5056 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
5057 PL_last_uni = 0;
5058 PL_last_lop = 0;
a7aaec61
Z
5059 if (PL_lex_brackets &&
5060 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
5061 yyerror((const char *)
5062 (PL_lex_formbrack
5063 ? "Format not terminated"
5064 : "Missing right curly or square bracket"));
c5ee2135 5065 }
4e553d73 5066 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 5067 "### Tokener got EOF\n");
5f80b19c 5068 } );
79072805 5069 TOKEN(0);
463ee0b2 5070 }
3280af22 5071 if (s++ < PL_bufend)
a687059c 5072 goto retry; /* ignore stray nulls */
3280af22
NIS
5073 PL_last_uni = 0;
5074 PL_last_lop = 0;
5075 if (!PL_in_eval && !PL_preambled) {
5076 PL_preambled = TRUE;
5db06880
NC
5077#ifdef PERL_MAD
5078 if (PL_madskills)
cd81e915 5079 PL_faketokens = 1;
5db06880 5080#endif
5ab7ff98
NC
5081 if (PL_perldb) {
5082 /* Generate a string of Perl code to load the debugger.
5083 * If PERL5DB is set, it will return the contents of that,
5084 * otherwise a compile-time require of perl5db.pl. */
5085
5086 const char * const pdb = PerlEnv_getenv("PERL5DB");
5087
5088 if (pdb) {
5089 sv_setpv(PL_linestr, pdb);
5090 sv_catpvs(PL_linestr,";");
5091 } else {
5092 SETERRNO(0,SS_NORMAL);
5093 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5094 }
5095 } else
5096 sv_setpvs(PL_linestr,"");
c62eb204
NC
5097 if (PL_preambleav) {
5098 SV **svp = AvARRAY(PL_preambleav);
5099 SV **const end = svp + AvFILLp(PL_preambleav);
5100 while(svp <= end) {
5101 sv_catsv(PL_linestr, *svp);
5102 ++svp;
396482e1 5103 sv_catpvs(PL_linestr, ";");
91b7def8 5104 }
daba3364 5105 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 5106 PL_preambleav = NULL;
91b7def8 5107 }
9f639728
FR
5108 if (PL_minus_E)
5109 sv_catpvs(PL_linestr,
5110 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 5111 if (PL_minus_n || PL_minus_p) {
f0e67a1d 5112 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 5113 if (PL_minus_l)
396482e1 5114 sv_catpvs(PL_linestr,"chomp;");
3280af22 5115 if (PL_minus_a) {
3280af22 5116 if (PL_minus_F) {
3792a11b
NC
5117 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5118 || *PL_splitstr == '"')
3280af22 5119 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 5120 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 5121 else {
c8ef6a4b
NC
5122 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5123 bytes can be used as quoting characters. :-) */
dd374669 5124 const char *splits = PL_splitstr;
91d456ae 5125 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
5126 do {
5127 /* Need to \ \s */
dd374669
AL
5128 if (*splits == '\\')
5129 sv_catpvn(PL_linestr, splits, 1);
5130 sv_catpvn(PL_linestr, splits, 1);
5131 } while (*splits++);
48c4c863
NC
5132 /* This loop will embed the trailing NUL of
5133 PL_linestr as the last thing it does before
5134 terminating. */
396482e1 5135 sv_catpvs(PL_linestr, ");");
54310121 5136 }
2304df62
AD
5137 }
5138 else
396482e1 5139 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 5140 }
79072805 5141 }
396482e1 5142 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
5143 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5144 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5145 PL_last_lop = PL_last_uni = NULL;
65269a95 5146 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 5147 update_debugger_info(PL_linestr, NULL, 0);
79072805 5148 goto retry;
a687059c 5149 }
e929a76b 5150 do {
580561a3
Z
5151 fake_eof = 0;
5152 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 5153 if (0) {
7e28d3af 5154 fake_eof:
f0e67a1d
Z
5155 fake_eof = LEX_FAKE_EOF;
5156 }
5157 PL_bufptr = PL_bufend;
83944c01 5158 COPLINE_INC_WITH_HERELINES;
f0e67a1d 5159 if (!lex_next_chunk(fake_eof)) {
17cc9359 5160 CopLINE_dec(PL_curcop);
f0e67a1d
Z
5161 s = PL_bufptr;
5162 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5163 }
17cc9359 5164 CopLINE_dec(PL_curcop);
5db06880 5165#ifdef PERL_MAD
f0e67a1d 5166 if (!PL_rsfp)
cd81e915 5167 PL_realtokenstart = -1;
5db06880 5168#endif
f0e67a1d 5169 s = PL_bufptr;
7aa207d6
JH
5170 /* If it looks like the start of a BOM or raw UTF-16,
5171 * check if it in fact is. */
580561a3 5172 if (bof && PL_rsfp &&
7aa207d6
JH
5173 (*s == 0 ||
5174 *(U8*)s == 0xEF ||
5175 *(U8*)s >= 0xFE ||
5176 s[1] == 0)) {
879bc93b
DM
5177 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5178 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
5179#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5180 /* offset may include swallowed CR */
5181 if (!bof)
879bc93b 5182 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 5183#endif
7e28d3af 5184 if (bof) {
3280af22 5185 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 5186 s = swallow_bom((U8*)s);
e929a76b 5187 }
378cc40b 5188 }
737c24fc 5189 if (PL_parser->in_pod) {
a0d0e21e 5190 /* Incest with pod. */
5db06880
NC
5191#ifdef PERL_MAD
5192 if (PL_madskills)
cd81e915 5193 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 5194#endif
01a57ef7 5195 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 5196 sv_setpvs(PL_linestr, "");
3280af22
NIS
5197 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5198 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5199 PL_last_lop = PL_last_uni = NULL;
737c24fc 5200 PL_parser->in_pod = 0;
a0d0e21e 5201 }
4e553d73 5202 }
60d63348 5203 if (PL_rsfp || PL_parser->filtered)
85613cab 5204 incline(s);
737c24fc 5205 } while (PL_parser->in_pod);
3280af22 5206 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 5207 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5208 PL_last_lop = PL_last_uni = NULL;
57843af0 5209 if (CopLINE(PL_curcop) == 1) {
3280af22 5210 while (s < PL_bufend && isSPACE(*s))
79072805 5211 s++;
a0d0e21e 5212 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 5213 s++;
5db06880
NC
5214#ifdef PERL_MAD
5215 if (PL_madskills)
cd81e915 5216 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 5217#endif
bd61b366 5218 d = NULL;
3280af22 5219 if (!PL_in_eval) {
44a8e56a 5220 if (*s == '#' && *(s+1) == '!')
5221 d = s + 2;
5222#ifdef ALTERNATE_SHEBANG
5223 else {
bfed75c6 5224 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 5225 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5226 d = s + (sizeof(as) - 1);
5227 }
5228#endif /* ALTERNATE_SHEBANG */
5229 }
5230 if (d) {
b8378b72 5231 char *ipath;
774d564b 5232 char *ipathend;
b8378b72 5233
774d564b 5234 while (isSPACE(*d))
b8378b72
CS
5235 d++;
5236 ipath = d;
774d564b 5237 while (*d && !isSPACE(*d))
5238 d++;
5239 ipathend = d;
5240
5241#ifdef ARG_ZERO_IS_SCRIPT
5242 if (ipathend > ipath) {
5243 /*
5244 * HP-UX (at least) sets argv[0] to the script name,
5245 * which makes $^X incorrect. And Digital UNIX and Linux,
5246 * at least, set argv[0] to the basename of the Perl
5247 * interpreter. So, having found "#!", we'll set it right.
5248 */
fafc274c
NC
5249 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5250 SVt_PV)); /* $^X */
774d564b 5251 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 5252 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 5253 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 5254 SvSETMAGIC(x);
5255 }
556c1dec
JH
5256 else {
5257 STRLEN blen;
5258 STRLEN llen;
cfd0369c 5259 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 5260 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
5261 if (llen < blen) {
5262 bstart += blen - llen;
5263 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5264 sv_setpvn(x, ipath, ipathend - ipath);
5265 SvSETMAGIC(x);
5266 }
5267 }
5268 }
774d564b 5269 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 5270 }
774d564b 5271#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
5272
5273 /*
5274 * Look for options.
5275 */
748a9306 5276 d = instr(s,"perl -");
84e30d1a 5277 if (!d) {
748a9306 5278 d = instr(s,"perl");
84e30d1a
GS
5279#if defined(DOSISH)
5280 /* avoid getting into infinite loops when shebang
5281 * line contains "Perl" rather than "perl" */
5282 if (!d) {
5283 for (d = ipathend-4; d >= ipath; --d) {
5284 if ((*d == 'p' || *d == 'P')
5285 && !ibcmp(d, "perl", 4))
5286 {
5287 break;
5288 }
5289 }
5290 if (d < ipath)
bd61b366 5291 d = NULL;
84e30d1a
GS
5292 }
5293#endif
5294 }
44a8e56a 5295#ifdef ALTERNATE_SHEBANG
5296 /*
5297 * If the ALTERNATE_SHEBANG on this system starts with a
5298 * character that can be part of a Perl expression, then if
5299 * we see it but not "perl", we're probably looking at the
5300 * start of Perl code, not a request to hand off to some
5301 * other interpreter. Similarly, if "perl" is there, but
5302 * not in the first 'word' of the line, we assume the line
5303 * contains the start of the Perl program.
44a8e56a 5304 */
5305 if (d && *s != '#') {
f54cb97a 5306 const char *c = ipath;
44a8e56a 5307 while (*c && !strchr("; \t\r\n\f\v#", *c))
5308 c++;
5309 if (c < d)
bd61b366 5310 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5311 else
5312 *s = '#'; /* Don't try to parse shebang line */
5313 }
774d564b 5314#endif /* ALTERNATE_SHEBANG */
748a9306 5315 if (!d &&
44a8e56a 5316 *s == '#' &&
774d564b 5317 ipathend > ipath &&
3280af22 5318 !PL_minus_c &&
748a9306 5319 !instr(s,"indir") &&
3280af22 5320 instr(PL_origargv[0],"perl"))
748a9306 5321 {
27da23d5 5322 dVAR;
9f68db38 5323 char **newargv;
9f68db38 5324
774d564b 5325 *ipathend = '\0';
5326 s = ipathend + 1;
3280af22 5327 while (s < PL_bufend && isSPACE(*s))
9f68db38 5328 s++;
3280af22 5329 if (s < PL_bufend) {
d85f917e 5330 Newx(newargv,PL_origargc+3,char*);
9f68db38 5331 newargv[1] = s;
3280af22 5332 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5333 s++;
5334 *s = '\0';
3280af22 5335 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5336 }
5337 else
3280af22 5338 newargv = PL_origargv;
774d564b 5339 newargv[0] = ipath;
b35112e7 5340 PERL_FPU_PRE_EXEC
b4748376 5341 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5342 PERL_FPU_POST_EXEC
cea2e8a9 5343 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5344 }
748a9306 5345 if (d) {
c35e046a
AL
5346 while (*d && !isSPACE(*d))
5347 d++;
5348 while (SPACE_OR_TAB(*d))
5349 d++;
748a9306
LW
5350
5351 if (*d++ == '-') {
f54cb97a 5352 const bool switches_done = PL_doswitches;
fb993905
GA
5353 const U32 oldpdb = PL_perldb;
5354 const bool oldn = PL_minus_n;
5355 const bool oldp = PL_minus_p;
c7030b81 5356 const char *d1 = d;
fb993905 5357
8cc95fdb 5358 do {
4ba71d51
FC
5359 bool baduni = FALSE;
5360 if (*d1 == 'C') {
bd0ab00d
NC
5361 const char *d2 = d1 + 1;
5362 if (parse_unicode_opts((const char **)&d2)
5363 != PL_unicode)
5364 baduni = TRUE;
4ba71d51
FC
5365 }
5366 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5367 const char * const m = d1;
5368 while (*d1 && !isSPACE(*d1))
5369 d1++;
cea2e8a9 5370 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5371 (int)(d1 - m), m);
8cc95fdb 5372 }
c7030b81
NC
5373 d1 = moreswitches(d1);
5374 } while (d1);
f0b2cf55
YST
5375 if (PL_doswitches && !switches_done) {
5376 int argc = PL_origargc;
5377 char **argv = PL_origargv;
5378 do {
5379 argc--,argv++;
5380 } while (argc && argv[0][0] == '-' && argv[0][1]);
5381 init_argv_symbols(argc,argv);
5382 }
65269a95 5383 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5384 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5385 /* if we have already added "LINE: while (<>) {",
5386 we must not do it again */
748a9306 5387 {
76f68e9b 5388 sv_setpvs(PL_linestr, "");
3280af22
NIS
5389 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5390 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5391 PL_last_lop = PL_last_uni = NULL;
3280af22 5392 PL_preambled = FALSE;
65269a95 5393 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5394 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5395 goto retry;
5396 }
a0d0e21e 5397 }
79072805 5398 }
9f68db38 5399 }
79072805 5400 }
3280af22 5401 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5402 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5403 start_force(PL_curforce);
5404 NEXTVAL_NEXTTOKE.ival = 0;
5405 force_next(FORMRBRACK);
5406 TOKEN(';');
ae986130 5407 }
378cc40b 5408 goto retry;
4fdae800 5409 case '\r':
6a27c188 5410#ifdef PERL_STRICT_CR
cea2e8a9 5411 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5412 Perl_croak(aTHX_
cc507455 5413 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5414#endif
4fdae800 5415 case ' ': case '\t': case '\f': case 013:
5db06880 5416#ifdef PERL_MAD
cd81e915 5417 PL_realtokenstart = -1;
1cac5c33
FC
5418 if (PL_madskills) {
5419 if (!PL_thiswhite)
ac372eb8 5420 PL_thiswhite = newSVpvs("");
1cac5c33
FC
5421 sv_catpvn(PL_thiswhite, s, 1);
5422 }
5db06880 5423#endif
ac372eb8 5424 s++;
378cc40b 5425 goto retry;
378cc40b 5426 case '#':
e929a76b 5427 case '\n':
5db06880 5428#ifdef PERL_MAD
cd81e915 5429 PL_realtokenstart = -1;
5db06880 5430 if (PL_madskills)
cd81e915 5431 PL_faketokens = 0;
5db06880 5432#endif
60d63348 5433 if (PL_lex_state != LEX_NORMAL ||
62e4c90a
FC
5434 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5435 if (*s == '#' && s == PL_linestart && PL_in_eval
60d63348 5436 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5437 /* handle eval qq[#line 1 "foo"\n ...] */
5438 CopLINE_dec(PL_curcop);
5439 incline(s);
5440 }
5db06880
NC
5441 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5442 s = SKIPSPACE0(s);
62e4c90a 5443 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5444 incline(s);
5445 }
5446 else {
9c74ccc9 5447 const bool in_comment = *s == '#';
5db06880
NC
5448 d = s;
5449 while (d < PL_bufend && *d != '\n')
5450 d++;
5451 if (d < PL_bufend)
5452 d++;
5453 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5454 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5455 d, PL_bufend);
5db06880
NC
5456#ifdef PERL_MAD
5457 if (PL_madskills)
cd81e915 5458 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5459#endif
5460 s = d;
9c74ccc9
FC
5461 if (in_comment && d == PL_bufend
5462 && PL_lex_state == LEX_INTERPNORMAL
90a536e1 5463 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
9c74ccc9
FC
5464 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5465 else incline(s);
5db06880 5466 }
3280af22 5467 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5468 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5469 start_force(PL_curforce);
5470 NEXTVAL_NEXTTOKE.ival = 0;
5471 force_next(FORMRBRACK);
5472 TOKEN(';');
a687059c 5473 }
378cc40b 5474 }
a687059c 5475 else {
5db06880
NC
5476#ifdef PERL_MAD
5477 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5478 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5479 PL_faketokens = 0;
5db06880
NC
5480 s = SKIPSPACE0(s);
5481 TOKEN(PEG); /* make sure any #! line is accessible */
5482 }
5483 s = SKIPSPACE0(s);
5484 }
5485 else {
5486/* if (PL_madskills && PL_lex_formbrack) { */
5487 d = s;
5488 while (d < PL_bufend && *d != '\n')
5489 d++;
5490 if (d < PL_bufend)
5491 d++;
5492 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5493 Perl_croak(aTHX_ "panic: input overflow");
5494 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5495 if (!PL_thiswhite)
6b29d1f5 5496 PL_thiswhite = newSVpvs("");
5db06880 5497 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5498 sv_setpvs(PL_thiswhite, "");
cd81e915 5499 PL_faketokens = 0;
5db06880 5500 }
cd81e915 5501 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5502 }
5503 s = d;
5504/* }
5505 *s = '\0';
5506 PL_bufend = s; */
5507 }
5508#else
378cc40b 5509 *s = '\0';
3280af22 5510 PL_bufend = s;
5db06880 5511#endif
a687059c 5512 }
378cc40b
LW
5513 goto retry;
5514 case '-':
0eb30aeb 5515 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
e5edeb50 5516 I32 ftst = 0;
90771dc0 5517 char tmp;
e5edeb50 5518
378cc40b 5519 s++;
3280af22 5520 PL_bufptr = s;
748a9306
LW
5521 tmp = *s++;
5522
bf4acbe4 5523 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5524 s++;
5525
5526 if (strnEQ(s,"=>",2)) {
3280af22 5527 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5528 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5529 OPERATOR('-'); /* unary minus */
5530 }
3280af22 5531 PL_last_uni = PL_oldbufptr;
748a9306 5532 switch (tmp) {
e5edeb50
JH
5533 case 'r': ftst = OP_FTEREAD; break;
5534 case 'w': ftst = OP_FTEWRITE; break;
5535 case 'x': ftst = OP_FTEEXEC; break;
5536 case 'o': ftst = OP_FTEOWNED; break;
5537 case 'R': ftst = OP_FTRREAD; break;
5538 case 'W': ftst = OP_FTRWRITE; break;
5539 case 'X': ftst = OP_FTREXEC; break;
5540 case 'O': ftst = OP_FTROWNED; break;
5541 case 'e': ftst = OP_FTIS; break;
5542 case 'z': ftst = OP_FTZERO; break;
5543 case 's': ftst = OP_FTSIZE; break;
5544 case 'f': ftst = OP_FTFILE; break;
5545 case 'd': ftst = OP_FTDIR; break;
5546 case 'l': ftst = OP_FTLINK; break;
5547 case 'p': ftst = OP_FTPIPE; break;
5548 case 'S': ftst = OP_FTSOCK; break;
5549 case 'u': ftst = OP_FTSUID; break;
5550 case 'g': ftst = OP_FTSGID; break;
5551 case 'k': ftst = OP_FTSVTX; break;
5552 case 'b': ftst = OP_FTBLK; break;
5553 case 'c': ftst = OP_FTCHR; break;
5554 case 't': ftst = OP_FTTTY; break;
5555 case 'T': ftst = OP_FTTEXT; break;
5556 case 'B': ftst = OP_FTBINARY; break;
5557 case 'M': case 'A': case 'C':
fafc274c 5558 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5559 switch (tmp) {
5560 case 'M': ftst = OP_FTMTIME; break;
5561 case 'A': ftst = OP_FTATIME; break;
5562 case 'C': ftst = OP_FTCTIME; break;
5563 default: break;
5564 }
5565 break;
378cc40b 5566 default:
378cc40b
LW
5567 break;
5568 }
e5edeb50 5569 if (ftst) {
eb160463 5570 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5571 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5572 "### Saw file test %c\n", (int)tmp);
5f80b19c 5573 } );
e5edeb50
JH
5574 FTST(ftst);
5575 }
5576 else {
5577 /* Assume it was a minus followed by a one-letter named
5578 * subroutine call (or a -bareword), then. */
95c31fe3 5579 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5580 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5581 (int) tmp);
5f80b19c 5582 } );
3cf7b4c4 5583 s = --PL_bufptr;
e5edeb50 5584 }
378cc40b 5585 }
90771dc0
NC
5586 {
5587 const char tmp = *s++;
5588 if (*s == tmp) {
5589 s++;
5590 if (PL_expect == XOPERATOR)
5591 TERM(POSTDEC);
5592 else
5593 OPERATOR(PREDEC);
5594 }
5595 else if (*s == '>') {
5596 s++;
29595ff2 5597 s = SKIPSPACE1(s);
90771dc0
NC
5598 if (isIDFIRST_lazy_if(s,UTF)) {
5599 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5600 TOKEN(ARROW);
5601 }
5602 else if (*s == '$')
5603 OPERATOR(ARROW);
5604 else
5605 TERM(ARROW);
5606 }
78cdf107
Z
5607 if (PL_expect == XOPERATOR) {
5608 if (*s == '=' && !PL_lex_allbrackets &&
5609 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5610 s--;
5611 TOKEN(0);
5612 }
90771dc0 5613 Aop(OP_SUBTRACT);
78cdf107 5614 }
90771dc0
NC
5615 else {
5616 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5617 check_uni();
5618 OPERATOR('-'); /* unary minus */
79072805 5619 }
2f3197b3 5620 }
79072805 5621
378cc40b 5622 case '+':
90771dc0
NC
5623 {
5624 const char tmp = *s++;
5625 if (*s == tmp) {
5626 s++;
5627 if (PL_expect == XOPERATOR)
5628 TERM(POSTINC);
5629 else
5630 OPERATOR(PREINC);
5631 }
78cdf107
Z
5632 if (PL_expect == XOPERATOR) {
5633 if (*s == '=' && !PL_lex_allbrackets &&
5634 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5635 s--;
5636 TOKEN(0);
5637 }
90771dc0 5638 Aop(OP_ADD);
78cdf107 5639 }
90771dc0
NC
5640 else {
5641 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5642 check_uni();
5643 OPERATOR('+');
5644 }
2f3197b3 5645 }
a687059c 5646
378cc40b 5647 case '*':
3280af22
NIS
5648 if (PL_expect != XOPERATOR) {
5649 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5650 PL_expect = XOPERATOR;
5651 force_ident(PL_tokenbuf, '*');
5652 if (!*PL_tokenbuf)
a0d0e21e 5653 PREREF('*');
79072805 5654 TERM('*');
a687059c 5655 }
79072805
LW
5656 s++;
5657 if (*s == '*') {
a687059c 5658 s++;
78cdf107
Z
5659 if (*s == '=' && !PL_lex_allbrackets &&
5660 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5661 s -= 2;
5662 TOKEN(0);
5663 }
79072805 5664 PWop(OP_POW);
a687059c 5665 }
78cdf107
Z
5666 if (*s == '=' && !PL_lex_allbrackets &&
5667 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5668 s--;
5669 TOKEN(0);
5670 }
79072805
LW
5671 Mop(OP_MULTIPLY);
5672
378cc40b 5673 case '%':
3280af22 5674 if (PL_expect == XOPERATOR) {
78cdf107
Z
5675 if (s[1] == '=' && !PL_lex_allbrackets &&
5676 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5677 TOKEN(0);
bbce6d69 5678 ++s;
5679 Mop(OP_MODULO);
a687059c 5680 }
3280af22 5681 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5682 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5683 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5684 if (!PL_tokenbuf[1]) {
bbce6d69 5685 PREREF('%');
a687059c 5686 }
60ac52eb
FC
5687 PL_expect = XOPERATOR;
5688 force_ident_maybe_lex('%');
bbce6d69 5689 TERM('%');
a687059c 5690
378cc40b 5691 case '^':
78cdf107
Z
5692 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5693 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5694 TOKEN(0);
79072805 5695 s++;
a0d0e21e 5696 BOop(OP_BIT_XOR);
79072805 5697 case '[':
a7aaec61
Z
5698 if (PL_lex_brackets > 100)
5699 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5700 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5701 PL_lex_allbrackets++;
df3467db
IG
5702 {
5703 const char tmp = *s++;
5704 OPERATOR(tmp);
5705 }
378cc40b 5706 case '~':
0d863452 5707 if (s[1] == '~'
3e7dd34d 5708 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5709 {
78cdf107
Z
5710 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5711 TOKEN(0);
0d863452
RH
5712 s += 2;
5713 Eop(OP_SMARTMATCH);
5714 }
78cdf107
Z
5715 s++;
5716 OPERATOR('~');
378cc40b 5717 case ',':
78cdf107
Z
5718 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5719 TOKEN(0);
5720 s++;
5721 OPERATOR(',');
a0d0e21e
LW
5722 case ':':
5723 if (s[1] == ':') {
5724 len = 0;
0bfa2a8a 5725 goto just_a_word_zero_gv;
a0d0e21e
LW
5726 }
5727 s++;
09bef843
SB
5728 switch (PL_expect) {
5729 OP *attrs;
5db06880
NC
5730#ifdef PERL_MAD
5731 I32 stuffstart;
5732#endif
09bef843
SB
5733 case XOPERATOR:
5734 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5735 break;
5736 PL_bufptr = s; /* update in case we back off */
d83f38d8 5737 if (*s == '=') {
2dc78664
NC
5738 Perl_croak(aTHX_
5739 "Use of := for an empty attribute list is not allowed");
d83f38d8 5740 }
09bef843
SB
5741 goto grabattrs;
5742 case XATTRBLOCK:
5743 PL_expect = XBLOCK;
5744 goto grabattrs;
5745 case XATTRTERM:
5746 PL_expect = XTERMBLOCK;
5747 grabattrs:
5db06880
NC
5748#ifdef PERL_MAD
5749 stuffstart = s - SvPVX(PL_linestr) - 1;
5750#endif
29595ff2 5751 s = PEEKSPACE(s);
5f66b61c 5752 attrs = NULL;
7e2040f0 5753 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5754 I32 tmp;
5cc237b8 5755 SV *sv;
09bef843 5756 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5757 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5758 if (tmp < 0) tmp = -tmp;
5759 switch (tmp) {
5760 case KEY_or:
5761 case KEY_and:
5762 case KEY_for:
11baf631 5763 case KEY_foreach:
f9829d6b
GS
5764 case KEY_unless:
5765 case KEY_if:
5766 case KEY_while:
5767 case KEY_until:
5768 goto got_attrs;
5769 default:
5770 break;
5771 }
5772 }
89a5757c 5773 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843 5774 if (*d == '(') {
4d68ffa0 5775 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
09bef843 5776 if (!d) {
09bef843
SB
5777 /* MUST advance bufptr here to avoid bogus
5778 "at end of line" context messages from yyerror().
5779 */
5780 PL_bufptr = s + len;
5781 yyerror("Unterminated attribute parameter in attribute list");
5782 if (attrs)
5783 op_free(attrs);
5cc237b8 5784 sv_free(sv);
bbf60fe6 5785 return REPORT(0); /* EOF indicator */
09bef843
SB
5786 }
5787 }
5788 if (PL_lex_stuff) {
09bef843 5789 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5790 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5791 newSVOP(OP_CONST, 0, sv));
5792 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5793 PL_lex_stuff = NULL;
09bef843
SB
5794 }
5795 else {
5cc237b8
BS
5796 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5797 sv_free(sv);
1108974d 5798 if (PL_in_my == KEY_our) {
df9a6019 5799 deprecate(":unique");
1108974d 5800 }
bfed75c6 5801 else
371fce9b
DM
5802 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5803 }
5804
d3cea301
SB
5805 /* NOTE: any CV attrs applied here need to be part of
5806 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5807 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5808 sv_free(sv);
78f9721b 5809 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5810 }
5811 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5812 sv_free(sv);
8e5dadda 5813 deprecate(":locked");
5cc237b8
BS
5814 }
5815 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5816 sv_free(sv);
78f9721b 5817 CvMETHOD_on(PL_compcv);
5cc237b8 5818 }
78f9721b
SM
5819 /* After we've set the flags, it could be argued that
5820 we don't need to do the attributes.pm-based setting
5821 process, and shouldn't bother appending recognized
d3cea301
SB
5822 flags. To experiment with that, uncomment the
5823 following "else". (Note that's already been
5824 uncommented. That keeps the above-applied built-in
5825 attributes from being intercepted (and possibly
5826 rejected) by a package's attribute routines, but is
5827 justified by the performance win for the common case
5828 of applying only built-in attributes.) */
0256094b 5829 else
2fcb4757 5830 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5831 newSVOP(OP_CONST, 0,
5cc237b8 5832 sv));
09bef843 5833 }
29595ff2 5834 s = PEEKSPACE(d);
0120eecf 5835 if (*s == ':' && s[1] != ':')
29595ff2 5836 s = PEEKSPACE(s+1);
0120eecf
GS
5837 else if (s == d)
5838 break; /* require real whitespace or :'s */
29595ff2 5839 /* XXX losing whitespace on sequential attributes here */
09bef843 5840 }
90771dc0
NC
5841 {
5842 const char tmp
5843 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5844 if (*s != ';' && *s != '}' && *s != tmp
5845 && (tmp != '=' || *s != ')')) {
5846 const char q = ((*s == '\'') ? '"' : '\'');
5847 /* If here for an expression, and parsed no attrs, back
5848 off. */
5849 if (tmp == '=' && !attrs) {
5850 s = PL_bufptr;
5851 break;
5852 }
5853 /* MUST advance bufptr here to avoid bogus "at end of line"
5854 context messages from yyerror().
5855 */
5856 PL_bufptr = s;
10edeb5d
JH
5857 yyerror( (const char *)
5858 (*s
5859 ? Perl_form(aTHX_ "Invalid separator character "
5860 "%c%c%c in attribute list", q, *s, q)
5861 : "Unterminated attribute list" ) );
90771dc0
NC
5862 if (attrs)
5863 op_free(attrs);
5864 OPERATOR(':');
09bef843 5865 }
09bef843 5866 }
f9829d6b 5867 got_attrs:
09bef843 5868 if (attrs) {
cd81e915 5869 start_force(PL_curforce);
9ded7720 5870 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5871 CURMAD('_', PL_nextwhite);
89122651 5872 force_next(THING);
5db06880
NC
5873 }
5874#ifdef PERL_MAD
5875 if (PL_madskills) {
cd81e915 5876 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5877 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5878 }
5db06880 5879#endif
09bef843
SB
5880 TOKEN(COLONATTR);
5881 }
78cdf107
Z
5882 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5883 s--;
5884 TOKEN(0);
5885 }
5886 PL_lex_allbrackets--;
a0d0e21e 5887 OPERATOR(':');
8990e307
LW
5888 case '(':
5889 s++;
3280af22
NIS
5890 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5891 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5892 else
3280af22 5893 PL_expect = XTERM;
29595ff2 5894 s = SKIPSPACE1(s);
78cdf107 5895 PL_lex_allbrackets++;
a0d0e21e 5896 TOKEN('(');
378cc40b 5897 case ';':
78cdf107
Z
5898 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5899 TOKEN(0);
f4dd75d9 5900 CLINE;
78cdf107
Z
5901 s++;
5902 OPERATOR(';');
378cc40b 5903 case ')':
78cdf107
Z
5904 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5905 TOKEN(0);
5906 s++;
5907 PL_lex_allbrackets--;
5908 s = SKIPSPACE1(s);
5909 if (*s == '{')
5910 PREBLOCK(')');
5911 TERM(')');
79072805 5912 case ']':
a7aaec61
Z
5913 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5914 TOKEN(0);
79072805 5915 s++;
3280af22 5916 if (PL_lex_brackets <= 0)
d98d5fff 5917 yyerror("Unmatched right square bracket");
463ee0b2 5918 else
3280af22 5919 --PL_lex_brackets;
78cdf107 5920 PL_lex_allbrackets--;
3280af22
NIS
5921 if (PL_lex_state == LEX_INTERPNORMAL) {
5922 if (PL_lex_brackets == 0) {
02255c60
FC
5923 if (*s == '-' && s[1] == '>')
5924 PL_lex_state = LEX_INTERPENDMAYBE;
5925 else if (*s != '[' && *s != '{')
3280af22 5926 PL_lex_state = LEX_INTERPEND;
79072805
LW
5927 }
5928 }
4633a7c4 5929 TERM(']');
79072805 5930 case '{':
79072805 5931 s++;
eaf6a13d 5932 leftbracket:
3280af22 5933 if (PL_lex_brackets > 100) {
8edd5f42 5934 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5935 }
3280af22 5936 switch (PL_expect) {
a0d0e21e 5937 case XTERM:
819b004e 5938 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5939 PL_lex_allbrackets++;
79072805 5940 OPERATOR(HASHBRACK);
a0d0e21e 5941 case XOPERATOR:
bf4acbe4 5942 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5943 s++;
44a8e56a 5944 d = s;
3280af22
NIS
5945 PL_tokenbuf[0] = '\0';
5946 if (d < PL_bufend && *d == '-') {
5947 PL_tokenbuf[0] = '-';
44a8e56a 5948 d++;
bf4acbe4 5949 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5950 d++;
5951 }
7e2040f0 5952 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5953 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5954 FALSE, &len);
bf4acbe4 5955 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5956 d++;
5957 if (*d == '}') {
f54cb97a 5958 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5959 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5960 if (minus)
5961 force_next('-');
748a9306
LW
5962 }
5963 }
5964 /* FALL THROUGH */
09bef843 5965 case XATTRBLOCK:
748a9306 5966 case XBLOCK:
3280af22 5967 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5968 PL_lex_allbrackets++;
3280af22 5969 PL_expect = XSTATE;
a0d0e21e 5970 break;
09bef843 5971 case XATTRTERM:
a0d0e21e 5972 case XTERMBLOCK:
3280af22 5973 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5974 PL_lex_allbrackets++;
3280af22 5975 PL_expect = XSTATE;
a0d0e21e
LW
5976 break;
5977 default: {
f54cb97a 5978 const char *t;
3280af22
NIS
5979 if (PL_oldoldbufptr == PL_last_lop)
5980 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5981 else
3280af22 5982 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5983 PL_lex_allbrackets++;
29595ff2 5984 s = SKIPSPACE1(s);
8452ff4b
SB
5985 if (*s == '}') {
5986 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5987 PL_expect = XTERM;
5988 /* This hack is to get the ${} in the message. */
5989 PL_bufptr = s+1;
5990 yyerror("syntax error");
5991 break;
5992 }
a0d0e21e 5993 OPERATOR(HASHBRACK);
8452ff4b 5994 }
b8a4b1be
GS
5995 /* This hack serves to disambiguate a pair of curlies
5996 * as being a block or an anon hash. Normally, expectation
5997 * determines that, but in cases where we're not in a
5998 * position to expect anything in particular (like inside
5999 * eval"") we have to resolve the ambiguity. This code
6000 * covers the case where the first term in the curlies is a
6001 * quoted string. Most other cases need to be explicitly
a0288114 6002 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
6003 * curly in order to force resolution as an anon hash.
6004 *
6005 * XXX should probably propagate the outer expectation
6006 * into eval"" to rely less on this hack, but that could
6007 * potentially break current behavior of eval"".
6008 * GSAR 97-07-21
6009 */
6010 t = s;
6011 if (*s == '\'' || *s == '"' || *s == '`') {
6012 /* common case: get past first string, handling escapes */
3280af22 6013 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
6014 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6015 t++;
6016 t++;
a0d0e21e 6017 }
b8a4b1be 6018 else if (*s == 'q') {
3280af22 6019 if (++t < PL_bufend
0eb30aeb 6020 && (!isWORDCHAR(*t)
3280af22 6021 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0eb30aeb 6022 && !isWORDCHAR(*t))))
0505442f 6023 {
abc667d1 6024 /* skip q//-like construct */
f54cb97a 6025 const char *tmps;
b8a4b1be
GS
6026 char open, close, term;
6027 I32 brackets = 1;
6028
3280af22 6029 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 6030 t++;
abc667d1
DM
6031 /* check for q => */
6032 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6033 OPERATOR(HASHBRACK);
6034 }
b8a4b1be
GS
6035 term = *t;
6036 open = term;
6037 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6038 term = tmps[5];
6039 close = term;
6040 if (open == close)
3280af22
NIS
6041 for (t++; t < PL_bufend; t++) {
6042 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 6043 t++;
6d07e5e9 6044 else if (*t == open)
b8a4b1be
GS
6045 break;
6046 }
abc667d1 6047 else {
3280af22
NIS
6048 for (t++; t < PL_bufend; t++) {
6049 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 6050 t++;
6d07e5e9 6051 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
6052 break;
6053 else if (*t == open)
6054 brackets++;
6055 }
abc667d1
DM
6056 }
6057 t++;
b8a4b1be 6058 }
abc667d1
DM
6059 else
6060 /* skip plain q word */
8a2bca12 6061 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
abc667d1 6062 t += UTF8SKIP(t);
a0d0e21e 6063 }
8a2bca12 6064 else if (isWORDCHAR_lazy_if(t,UTF)) {
0505442f 6065 t += UTF8SKIP(t);
8a2bca12 6066 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
0505442f 6067 t += UTF8SKIP(t);
a0d0e21e 6068 }
3280af22 6069 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 6070 t++;
b8a4b1be
GS
6071 /* if comma follows first term, call it an anon hash */
6072 /* XXX it could be a comma expression with loop modifiers */
3280af22 6073 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 6074 || (*t == '=' && t[1] == '>')))
a0d0e21e 6075 OPERATOR(HASHBRACK);
3280af22 6076 if (PL_expect == XREF)
4e4e412b 6077 PL_expect = XTERM;
a0d0e21e 6078 else {
3280af22
NIS
6079 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6080 PL_expect = XSTATE;
a0d0e21e 6081 }
8990e307 6082 }
a0d0e21e 6083 break;
463ee0b2 6084 }
6154021b 6085 pl_yylval.ival = CopLINE(PL_curcop);
79072805 6086 if (isSPACE(*s) || *s == '#')
3280af22 6087 PL_copline = NOLINE; /* invalidate current command line number */
7c70caa5 6088 TOKEN(formbrack ? '=' : '{');
378cc40b 6089 case '}':
a7aaec61
Z
6090 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6091 TOKEN(0);
79072805
LW
6092 rightbracket:
6093 s++;
3280af22 6094 if (PL_lex_brackets <= 0)
d98d5fff 6095 yyerror("Unmatched right curly bracket");
463ee0b2 6096 else
3280af22 6097 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 6098 PL_lex_allbrackets--;
3280af22
NIS
6099 if (PL_lex_state == LEX_INTERPNORMAL) {
6100 if (PL_lex_brackets == 0) {
9059aa12
LW
6101 if (PL_expect & XFAKEBRACK) {
6102 PL_expect &= XENUMMASK;
3280af22
NIS
6103 PL_lex_state = LEX_INTERPEND;
6104 PL_bufptr = s;
5db06880
NC
6105#if 0
6106 if (PL_madskills) {
cd81e915 6107 if (!PL_thiswhite)
6b29d1f5 6108 PL_thiswhite = newSVpvs("");
76f68e9b 6109 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
6110 }
6111#endif
cea2e8a9 6112 return yylex(); /* ignore fake brackets */
79072805 6113 }
f777953f 6114 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6b00f562
FC
6115 && SvEVALED(PL_lex_repl))
6116 PL_lex_state = LEX_INTERPEND;
6117 else if (*s == '-' && s[1] == '>')
3280af22 6118 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 6119 else if (*s != '[' && *s != '{')
3280af22 6120 PL_lex_state = LEX_INTERPEND;
79072805
LW
6121 }
6122 }
9059aa12
LW
6123 if (PL_expect & XFAKEBRACK) {
6124 PL_expect &= XENUMMASK;
3280af22 6125 PL_bufptr = s;
cea2e8a9 6126 return yylex(); /* ignore fake brackets */
748a9306 6127 }
cd81e915 6128 start_force(PL_curforce);
5db06880
NC
6129 if (PL_madskills) {
6130 curmad('X', newSVpvn(s-1,1));
cd81e915 6131 CURMAD('_', PL_thiswhite);
5db06880 6132 }
7c70caa5 6133 force_next(formbrack ? '.' : '}');
583c9d5c 6134 if (formbrack) LEAVE;
5db06880 6135#ifdef PERL_MAD
1cac5c33 6136 if (PL_madskills && !PL_thistoken)
6b29d1f5 6137 PL_thistoken = newSVpvs("");
5db06880 6138#endif
705fe0e5
FC
6139 if (formbrack == 2) { /* means . where arguments were expected */
6140 start_force(PL_curforce);
6141 force_next(';');
96f9b782 6142 TOKEN(FORMRBRACK);
705fe0e5 6143 }
79072805 6144 TOKEN(';');
378cc40b
LW
6145 case '&':
6146 s++;
78cdf107
Z
6147 if (*s++ == '&') {
6148 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6149 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6150 s -= 2;
6151 TOKEN(0);
6152 }
a0d0e21e 6153 AOPERATOR(ANDAND);
78cdf107 6154 }
378cc40b 6155 s--;
3280af22 6156 if (PL_expect == XOPERATOR) {
041457d9
DM
6157 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6158 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 6159 {
57843af0 6160 CopLINE_dec(PL_curcop);
f1f66076 6161 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6162 CopLINE_inc(PL_curcop);
463ee0b2 6163 }
78cdf107
Z
6164 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6165 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6166 s--;
6167 TOKEN(0);
6168 }
79072805 6169 BAop(OP_BIT_AND);
463ee0b2 6170 }
79072805 6171
c07656ed
FC
6172 PL_tokenbuf[0] = '&';
6173 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6174 sizeof PL_tokenbuf - 1, TRUE);
6175 if (PL_tokenbuf[1]) {
3280af22 6176 PL_expect = XOPERATOR;
60ac52eb 6177 force_ident_maybe_lex('&');
463ee0b2 6178 }
79072805
LW
6179 else
6180 PREREF('&');
6154021b 6181 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
6182 TERM('&');
6183
378cc40b
LW
6184 case '|':
6185 s++;
78cdf107
Z
6186 if (*s++ == '|') {
6187 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6188 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6189 s -= 2;
6190 TOKEN(0);
6191 }
a0d0e21e 6192 AOPERATOR(OROR);
78cdf107 6193 }
378cc40b 6194 s--;
78cdf107
Z
6195 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6196 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6197 s--;
6198 TOKEN(0);
6199 }
79072805 6200 BOop(OP_BIT_OR);
378cc40b
LW
6201 case '=':
6202 s++;
748a9306 6203 {
90771dc0 6204 const char tmp = *s++;
78cdf107
Z
6205 if (tmp == '=') {
6206 if (!PL_lex_allbrackets &&
6207 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6208 s -= 2;
6209 TOKEN(0);
6210 }
90771dc0 6211 Eop(OP_EQ);
78cdf107
Z
6212 }
6213 if (tmp == '>') {
6214 if (!PL_lex_allbrackets &&
6215 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6216 s -= 2;
6217 TOKEN(0);
6218 }
90771dc0 6219 OPERATOR(',');
78cdf107 6220 }
90771dc0
NC
6221 if (tmp == '~')
6222 PMop(OP_MATCH);
6223 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6224 && strchr("+-*/%.^&|<",tmp))
6225 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6226 "Reversed %c= operator",(int)tmp);
6227 s--;
6228 if (PL_expect == XSTATE && isALPHA(tmp) &&
6229 (s == PL_linestart+1 || s[-2] == '\n') )
6230 {
62e4c90a 6231 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
4a7239ff 6232 || PL_lex_state != LEX_NORMAL) {
90771dc0
NC
6233 d = PL_bufend;
6234 while (s < d) {
6235 if (*s++ == '\n') {
6236 incline(s);
6237 if (strnEQ(s,"=cut",4)) {
6238 s = strchr(s,'\n');
6239 if (s)
6240 s++;
6241 else
6242 s = d;
6243 incline(s);
6244 goto retry;
6245 }
6246 }
a5f75d66 6247 }
90771dc0 6248 goto retry;
a5f75d66 6249 }
5db06880
NC
6250#ifdef PERL_MAD
6251 if (PL_madskills) {
cd81e915 6252 if (!PL_thiswhite)
6b29d1f5 6253 PL_thiswhite = newSVpvs("");
cd81e915 6254 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
6255 PL_bufend - PL_linestart);
6256 }
6257#endif
90771dc0 6258 s = PL_bufend;
737c24fc 6259 PL_parser->in_pod = 1;
90771dc0 6260 goto retry;
a5f75d66 6261 }
a0d0e21e 6262 }
64a40898 6263 if (PL_expect == XBLOCK) {
c35e046a 6264 const char *t = s;
51882d45 6265#ifdef PERL_STRICT_CR
c35e046a 6266 while (SPACE_OR_TAB(*t))
51882d45 6267#else
c35e046a 6268 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 6269#endif
c35e046a 6270 t++;
a0d0e21e 6271 if (*t == '\n' || *t == '#') {
705fe0e5 6272 formbrack = 1;
583c9d5c
FC
6273 ENTER;
6274 SAVEI8(PL_parser->form_lex_state);
64a40898 6275 SAVEI32(PL_lex_formbrack);
583c9d5c 6276 PL_parser->form_lex_state = PL_lex_state;
64a40898 6277 PL_lex_formbrack = PL_lex_brackets + 1;
a0d0e21e
LW
6278 goto leftbracket;
6279 }
79072805 6280 }
78cdf107
Z
6281 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6282 s--;
6283 TOKEN(0);
6284 }
6154021b 6285 pl_yylval.ival = 0;
a0d0e21e 6286 OPERATOR(ASSIGNOP);
378cc40b
LW
6287 case '!':
6288 s++;
90771dc0
NC
6289 {
6290 const char tmp = *s++;
6291 if (tmp == '=') {
6292 /* was this !=~ where !~ was meant?
6293 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6294
6295 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6296 const char *t = s+1;
6297
6298 while (t < PL_bufend && isSPACE(*t))
6299 ++t;
6300
6301 if (*t == '/' || *t == '?' ||
6302 ((*t == 'm' || *t == 's' || *t == 'y')
0eb30aeb
KW
6303 && !isWORDCHAR(t[1])) ||
6304 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
90771dc0
NC
6305 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6306 "!=~ should be !~");
6307 }
78cdf107
Z
6308 if (!PL_lex_allbrackets &&
6309 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6310 s -= 2;
6311 TOKEN(0);
6312 }
90771dc0
NC
6313 Eop(OP_NE);
6314 }
6315 if (tmp == '~')
6316 PMop(OP_NOT);
6317 }
378cc40b
LW
6318 s--;
6319 OPERATOR('!');
6320 case '<':
3280af22 6321 if (PL_expect != XOPERATOR) {
93a17b20 6322 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6323 check_uni();
79072805
LW
6324 if (s[1] == '<')
6325 s = scan_heredoc(s);
6326 else
6327 s = scan_inputsymbol(s);
78a635de
FC
6328 PL_expect = XOPERATOR;
6329 TOKEN(sublex_start());
378cc40b
LW
6330 }
6331 s++;
90771dc0
NC
6332 {
6333 char tmp = *s++;
78cdf107
Z
6334 if (tmp == '<') {
6335 if (*s == '=' && !PL_lex_allbrackets &&
6336 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6337 s -= 2;
6338 TOKEN(0);
6339 }
90771dc0 6340 SHop(OP_LEFT_SHIFT);
78cdf107 6341 }
90771dc0
NC
6342 if (tmp == '=') {
6343 tmp = *s++;
78cdf107
Z
6344 if (tmp == '>') {
6345 if (!PL_lex_allbrackets &&
6346 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6347 s -= 3;
6348 TOKEN(0);
6349 }
90771dc0 6350 Eop(OP_NCMP);
78cdf107 6351 }
90771dc0 6352 s--;
78cdf107
Z
6353 if (!PL_lex_allbrackets &&
6354 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6355 s -= 2;
6356 TOKEN(0);
6357 }
90771dc0
NC
6358 Rop(OP_LE);
6359 }
395c3793 6360 }
378cc40b 6361 s--;
78cdf107
Z
6362 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6363 s--;
6364 TOKEN(0);
6365 }
79072805 6366 Rop(OP_LT);
378cc40b
LW
6367 case '>':
6368 s++;
90771dc0
NC
6369 {
6370 const char tmp = *s++;
78cdf107
Z
6371 if (tmp == '>') {
6372 if (*s == '=' && !PL_lex_allbrackets &&
6373 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6374 s -= 2;
6375 TOKEN(0);
6376 }
90771dc0 6377 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6378 }
6379 else if (tmp == '=') {
6380 if (!PL_lex_allbrackets &&
6381 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6382 s -= 2;
6383 TOKEN(0);
6384 }
90771dc0 6385 Rop(OP_GE);
78cdf107 6386 }
90771dc0 6387 }
378cc40b 6388 s--;
78cdf107
Z
6389 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6390 s--;
6391 TOKEN(0);
6392 }
79072805 6393 Rop(OP_GT);
378cc40b
LW
6394
6395 case '$':
bbce6d69 6396 CLINE;
6397
3280af22
NIS
6398 if (PL_expect == XOPERATOR) {
6399 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6400 return deprecate_commaless_var_list();
a0d0e21e 6401 }
8990e307 6402 }
a0d0e21e 6403
c0b977fd 6404 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6405 PL_tokenbuf[0] = '@';
376b8730
SM
6406 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6407 sizeof PL_tokenbuf - 1, FALSE);
6408 if (PL_expect == XOPERATOR)
6409 no_op("Array length", s);
3280af22 6410 if (!PL_tokenbuf[1])
a0d0e21e 6411 PREREF(DOLSHARP);
3280af22 6412 PL_expect = XOPERATOR;
60ac52eb 6413 force_ident_maybe_lex('#');
463ee0b2 6414 TOKEN(DOLSHARP);
79072805 6415 }
bbce6d69 6416
3280af22 6417 PL_tokenbuf[0] = '$';
376b8730
SM
6418 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6419 sizeof PL_tokenbuf - 1, FALSE);
6420 if (PL_expect == XOPERATOR)
6421 no_op("Scalar", s);
3280af22
NIS
6422 if (!PL_tokenbuf[1]) {
6423 if (s == PL_bufend)
bbce6d69 6424 yyerror("Final $ should be \\$ or $name");
6425 PREREF('$');
8990e307 6426 }
a0d0e21e 6427
ff68c719 6428 d = s;
90771dc0
NC
6429 {
6430 const char tmp = *s;
ae28bb2a 6431 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6432 s = SKIPSPACE1(s);
ff68c719 6433
90771dc0
NC
6434 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6435 && intuit_more(s)) {
6436 if (*s == '[') {
6437 PL_tokenbuf[0] = '@';
6438 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6439 char *t = s+1;
6440
8a2bca12 6441 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
c35e046a 6442 t++;
90771dc0 6443 if (*t++ == ',') {
29595ff2 6444 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6445 while (t < PL_bufend && *t != ']')
6446 t++;
9014280d 6447 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6448 "Multidimensional syntax %.*s not supported",
36c7798d 6449 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6450 }
748a9306 6451 }
93a17b20 6452 }
90771dc0
NC
6453 else if (*s == '{') {
6454 char *t;
6455 PL_tokenbuf[0] = '%';
6456 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6457 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6458 {
6459 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6460 do {
6461 t++;
6462 } while (isSPACE(*t));
90771dc0 6463 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6464 STRLEN len;
90771dc0 6465 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6466 &len);
c35e046a
AL
6467 while (isSPACE(*t))
6468 t++;
4c01a014
BF
6469 if (*t == ';'
6470 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
90771dc0 6471 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4c01a014
BF
6472 "You need to quote \"%"SVf"\"",
6473 SVfARG(newSVpvn_flags(tmpbuf, len,
6474 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
90771dc0
NC
6475 }
6476 }
6477 }
93a17b20 6478 }
bbce6d69 6479
90771dc0
NC
6480 PL_expect = XOPERATOR;
6481 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6482 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6483 if (!islop || PL_last_lop_op == OP_GREPSTART)
6484 PL_expect = XOPERATOR;
6485 else if (strchr("$@\"'`q", *s))
6486 PL_expect = XTERM; /* e.g. print $fh "foo" */
6487 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6488 PL_expect = XTERM; /* e.g. print $fh &sub */
6489 else if (isIDFIRST_lazy_if(s,UTF)) {
6490 char tmpbuf[sizeof PL_tokenbuf];
6491 int t2;
6492 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6493 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6494 /* binary operators exclude handle interpretations */
6495 switch (t2) {
6496 case -KEY_x:
6497 case -KEY_eq:
6498 case -KEY_ne:
6499 case -KEY_gt:
6500 case -KEY_lt:
6501 case -KEY_ge:
6502 case -KEY_le:
6503 case -KEY_cmp:
6504 break;
6505 default:
6506 PL_expect = XTERM; /* e.g. print $fh length() */
6507 break;
6508 }
6509 }
6510 else {
6511 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6512 }
6513 }
90771dc0
NC
6514 else if (isDIGIT(*s))
6515 PL_expect = XTERM; /* e.g. print $fh 3 */
6516 else if (*s == '.' && isDIGIT(s[1]))
6517 PL_expect = XTERM; /* e.g. print $fh .3 */
6518 else if ((*s == '?' || *s == '-' || *s == '+')
6519 && !isSPACE(s[1]) && s[1] != '=')
6520 PL_expect = XTERM; /* e.g. print $fh -1 */
6521 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6522 && s[1] != '/')
6523 PL_expect = XTERM; /* e.g. print $fh /.../
6524 XXX except DORDOR operator
6525 */
6526 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6527 && s[2] != '=')
6528 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6529 }
bbce6d69 6530 }
60ac52eb 6531 force_ident_maybe_lex('$');
79072805 6532 TOKEN('$');
378cc40b
LW
6533
6534 case '@':
3280af22 6535 if (PL_expect == XOPERATOR)
bbce6d69 6536 no_op("Array", s);
3280af22
NIS
6537 PL_tokenbuf[0] = '@';
6538 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6539 if (!PL_tokenbuf[1]) {
bbce6d69 6540 PREREF('@');
6541 }
3280af22 6542 if (PL_lex_state == LEX_NORMAL)
29595ff2 6543 s = SKIPSPACE1(s);
3280af22 6544 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6545 if (*s == '{')
3280af22 6546 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6547
6548 /* Warn about @ where they meant $. */
041457d9
DM
6549 if (*s == '[' || *s == '{') {
6550 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6551 const char *t = s + 1;
8a2bca12 6552 while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
b9e186cd 6553 t += UTF ? UTF8SKIP(t) : 1;
a0d0e21e
LW
6554 if (*t == '}' || *t == ']') {
6555 t++;
29595ff2 6556 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6557 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b9e186cd
BF
6559 "Scalar value %"SVf" better written as $%"SVf,
6560 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6561 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6562 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6563 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
a0d0e21e 6564 }
93a17b20
LW
6565 }
6566 }
463ee0b2 6567 }
60ac52eb
FC
6568 PL_expect = XOPERATOR;
6569 force_ident_maybe_lex('@');
79072805 6570 TERM('@');
378cc40b 6571
c963b151 6572 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6573 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6574 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6575 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6576 TOKEN(0);
6f33ba73
RGS
6577 s += 2;
6578 AOPERATOR(DORDOR);
6579 }
c963b151 6580 case '?': /* may either be conditional or pattern */
be25f609 6581 if (PL_expect == XOPERATOR) {
90771dc0 6582 char tmp = *s++;
c963b151 6583 if(tmp == '?') {
78cdf107
Z
6584 if (!PL_lex_allbrackets &&
6585 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6586 s--;
6587 TOKEN(0);
6588 }
6589 PL_lex_allbrackets++;
be25f609 6590 OPERATOR('?');
c963b151
BD
6591 }
6592 else {
6593 tmp = *s++;
6594 if(tmp == '/') {
6595 /* A // operator. */
78cdf107
Z
6596 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6597 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6598 LEX_FAKEEOF_LOGIC)) {
6599 s -= 2;
6600 TOKEN(0);
6601 }
c963b151
BD
6602 AOPERATOR(DORDOR);
6603 }
6604 else {
6605 s--;
78cdf107
Z
6606 if (*s == '=' && !PL_lex_allbrackets &&
6607 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6608 s--;
6609 TOKEN(0);
6610 }
c963b151
BD
6611 Mop(OP_DIVIDE);
6612 }
6613 }
6614 }
6615 else {
6616 /* Disable warning on "study /blah/" */
6617 if (PL_oldoldbufptr == PL_last_uni
6618 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6619 || memNE(PL_last_uni, "study", 5)
8a2bca12 6620 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
c963b151
BD
6621 ))
6622 check_uni();
725a61d7
Z
6623 if (*s == '?')
6624 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6625 s = scan_pat(s,OP_MATCH);
6626 TERM(sublex_start());
6627 }
378cc40b
LW
6628
6629 case '.':
51882d45
GS
6630 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6631#ifdef PERL_STRICT_CR
6632 && s[1] == '\n'
6633#else
6634 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6635#endif
6636 && (s == PL_linestart || s[-1] == '\n') )
6637 {
3280af22 6638 PL_expect = XSTATE;
705fe0e5 6639 formbrack = 2; /* dot seen where arguments expected */
79072805
LW
6640 goto rightbracket;
6641 }
be25f609 6642 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6643 s += 3;
6644 OPERATOR(YADAYADA);
6645 }
3280af22 6646 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6647 char tmp = *s++;
a687059c 6648 if (*s == tmp) {
78cdf107
Z
6649 if (!PL_lex_allbrackets &&
6650 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6651 s--;
6652 TOKEN(0);
6653 }
a687059c 6654 s++;
2f3197b3
LW
6655 if (*s == tmp) {
6656 s++;
6154021b 6657 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6658 }
6659 else
6154021b 6660 pl_yylval.ival = 0;
378cc40b 6661 OPERATOR(DOTDOT);
a687059c 6662 }
78cdf107
Z
6663 if (*s == '=' && !PL_lex_allbrackets &&
6664 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6665 s--;
6666 TOKEN(0);
6667 }
79072805 6668 Aop(OP_CONCAT);
378cc40b
LW
6669 }
6670 /* FALL THROUGH */
6671 case '0': case '1': case '2': case '3': case '4':
6672 case '5': case '6': case '7': case '8': case '9':
6154021b 6673 s = scan_num(s, &pl_yylval);
931e0695 6674 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6675 if (PL_expect == XOPERATOR)
8990e307 6676 no_op("Number",s);
79072805
LW
6677 TERM(THING);
6678
6679 case '\'':
4d68ffa0 6680 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6681 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6682 if (PL_expect == XOPERATOR) {
6683 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6684 return deprecate_commaless_var_list();
a0d0e21e 6685 }
463ee0b2 6686 else
8990e307 6687 no_op("String",s);
463ee0b2 6688 }
79072805 6689 if (!s)
d4c19fe8 6690 missingterm(NULL);
6154021b 6691 pl_yylval.ival = OP_CONST;
79072805
LW
6692 TERM(sublex_start());
6693
6694 case '"':
4d68ffa0 6695 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6696 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6697 if (PL_expect == XOPERATOR) {
6698 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6699 return deprecate_commaless_var_list();
a0d0e21e 6700 }
463ee0b2 6701 else
8990e307 6702 no_op("String",s);
463ee0b2 6703 }
79072805 6704 if (!s)
d4c19fe8 6705 missingterm(NULL);
6154021b 6706 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6707 /* FIXME. I think that this can be const if char *d is replaced by
6708 more localised variables. */
3280af22 6709 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6710 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6711 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6712 break;
6713 }
6714 }
79072805
LW
6715 TERM(sublex_start());
6716
6717 case '`':
4d68ffa0 6718 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6719 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6720 if (PL_expect == XOPERATOR)
8990e307 6721 no_op("Backticks",s);
79072805 6722 if (!s)
d4c19fe8 6723 missingterm(NULL);
9b201d7d 6724 readpipe_override();
79072805
LW
6725 TERM(sublex_start());
6726
6727 case '\\':
6728 s++;
a2a5de95
NC
6729 if (PL_lex_inwhat && isDIGIT(*s))
6730 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6731 *s, *s);
3280af22 6732 if (PL_expect == XOPERATOR)
8990e307 6733 no_op("Backslash",s);
79072805
LW
6734 OPERATOR(REFGEN);
6735
a7cb1f99 6736 case 'v':
e526c9e6 6737 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6738 char *start = s + 2;
dd629d5b 6739 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6740 start++;
6741 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6742 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6743 TERM(THING);
6744 }
e9d2327d
FC
6745 else if ((*start == ':' && start[1] == ':')
6746 || (PL_expect == XSTATE && *start == ':'))
6747 goto keylookup;
6748 else if (PL_expect == XSTATE) {
6749 d = start;
6750 while (d < PL_bufend && isSPACE(*d)) d++;
6751 if (*d == ':') goto keylookup;
6752 }
e526c9e6 6753 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
e9d2327d 6754 if (!isALPHA(*start) && (PL_expect == XTERM
6f33ba73
RGS
6755 || PL_expect == XREF || PL_expect == XSTATE
6756 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6757 GV *const gv = gv_fetchpvn_flags(s, start - s,
6758 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6759 if (!gv) {
6154021b 6760 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6761 TERM(THING);
6762 }
6763 }
a7cb1f99
GS
6764 }
6765 goto keylookup;
79072805 6766 case 'x':
3280af22 6767 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6768 s++;
6769 Mop(OP_REPEAT);
2f3197b3 6770 }
79072805
LW
6771 goto keylookup;
6772
378cc40b 6773 case '_':
79072805
LW
6774 case 'a': case 'A':
6775 case 'b': case 'B':
6776 case 'c': case 'C':
6777 case 'd': case 'D':
6778 case 'e': case 'E':
6779 case 'f': case 'F':
6780 case 'g': case 'G':
6781 case 'h': case 'H':
6782 case 'i': case 'I':
6783 case 'j': case 'J':
6784 case 'k': case 'K':
6785 case 'l': case 'L':
6786 case 'm': case 'M':
6787 case 'n': case 'N':
6788 case 'o': case 'O':
6789 case 'p': case 'P':
6790 case 'q': case 'Q':
6791 case 'r': case 'R':
6792 case 's': case 'S':
6793 case 't': case 'T':
6794 case 'u': case 'U':
a7cb1f99 6795 case 'V':
79072805
LW
6796 case 'w': case 'W':
6797 case 'X':
6798 case 'y': case 'Y':
6799 case 'z': case 'Z':
6800
49dc05e3 6801 keylookup: {
88e1f1a2 6802 bool anydelim;
18f70389 6803 bool lex;
90771dc0 6804 I32 tmp;
18f70389 6805 SV *sv;
73f3e228
FC
6806 CV *cv;
6807 PADOFFSET off;
6808 OP *rv2cv_op;
10edeb5d 6809
18f70389 6810 lex = FALSE;
10edeb5d 6811 orig_keyword = 0;
73f3e228 6812 off = 0;
18f70389 6813 sv = NULL;
73f3e228 6814 cv = NULL;
10edeb5d
JH
6815 gv = NULL;
6816 gvp = NULL;
73f3e228 6817 rv2cv_op = NULL;
49dc05e3 6818
3280af22
NIS
6819 PL_bufptr = s;
6820 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6821
6822 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6823 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6824
6825 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6826 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6827 goto just_a_word;
6828
3643fb5f 6829 d = s;
3280af22 6830 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6831 d++; /* no comments skipped here, or s### is misparsed */
6832
748a9306 6833 /* Is this a word before a => operator? */
1c3923b3 6834 if (*d == '=' && d[1] == '>') {
748a9306 6835 CLINE;
6154021b 6836 pl_yylval.opval
d0a148a6
NC
6837 = (OP*)newSVOP(OP_CONST, 0,
6838 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6839 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6840 TERM(WORD);
6841 }
6842
88e1f1a2
JV
6843 /* Check for plugged-in keyword */
6844 {
6845 OP *o;
6846 int result;
6847 char *saved_bufptr = PL_bufptr;
6848 PL_bufptr = s;
16c91539 6849 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6850 s = PL_bufptr;
6851 if (result == KEYWORD_PLUGIN_DECLINE) {
6852 /* not a plugged-in keyword */
6853 PL_bufptr = saved_bufptr;
6854 } else if (result == KEYWORD_PLUGIN_STMT) {
6855 pl_yylval.opval = o;
6856 CLINE;
6857 PL_expect = XSTATE;
6858 return REPORT(PLUGSTMT);
6859 } else if (result == KEYWORD_PLUGIN_EXPR) {
6860 pl_yylval.opval = o;
6861 CLINE;
6862 PL_expect = XOPERATOR;
6863 return REPORT(PLUGEXPR);
6864 } else {
6865 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6866 PL_tokenbuf);
6867 }
6868 }
6869
6870 /* Check for built-in keyword */
6871 tmp = keyword(PL_tokenbuf, len, 0);
6872
6873 /* Is this a label? */
6874 if (!anydelim && PL_expect == XSTATE
6875 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2 6876 s = d + 1;
5504e6cf
FC
6877 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6878 pl_yylval.pval[len] = '\0';
6879 pl_yylval.pval[len+1] = UTF ? 1 : 0;
88e1f1a2
JV
6880 CLINE;
6881 TOKEN(LABEL);
6882 }
6883
18f70389
FC
6884 /* Check for lexical sub */
6885 if (PL_expect != XOPERATOR) {
6886 char tmpbuf[sizeof PL_tokenbuf + 1];
18f70389
FC
6887 *tmpbuf = '&';
6888 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6889 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6890 if (off != NOT_IN_PAD) {
73f3e228 6891 assert(off); /* we assume this is boolean-true below */
18f70389
FC
6892 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6893 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6894 HEK * const stashname = HvNAME_HEK(stash);
6895 sv = newSVhek(stashname);
6896 sv_catpvs(sv, "::");
6897 sv_catpvn_flags(sv, PL_tokenbuf, len,
6898 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6899 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6900 SVt_PVCV);
73f3e228 6901 off = 0;
18f70389 6902 }
73f3e228
FC
6903 else {
6904 rv2cv_op = newOP(OP_PADANY, 0);
6905 rv2cv_op->op_targ = off;
6906 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6907 cv = (CV *)PAD_SV(off);
6908 }
6909 lex = TRUE;
6910 goto just_a_word;
18f70389 6911 }
73f3e228 6912 off = 0;
18f70389
FC
6913 }
6914
a0d0e21e 6915 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6916 GV *ogv = NULL; /* override (winner) */
6917 GV *hgv = NULL; /* hidden (loser) */
3280af22 6918 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6919 CV *cv;
af9f5953
BF
6920 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6921 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6922 (cv = GvCVu(gv)))
6923 {
6924 if (GvIMPORTED_CV(gv))
6925 ogv = gv;
6926 else if (! CvMETHOD(cv))
6927 hgv = gv;
6928 }
6929 if (!ogv &&
af9f5953 6930 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6931 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6932 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6933 GvCVu(gv) && GvIMPORTED_CV(gv))
6934 {
6935 ogv = gv;
6936 }
6937 }
6938 if (ogv) {
30fe34ed 6939 orig_keyword = tmp;
56f7f34b 6940 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6941 }
6942 else if (gv && !gvp
6943 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6944 && GvCVu(gv))
6e7b2336
GS
6945 {
6946 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6947 }
56f7f34b
CS
6948 else { /* no override */
6949 tmp = -tmp;
a2a5de95
NC
6950 if (tmp == KEY_dump) {
6951 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6952 "dump() better written as CORE::dump()");
ac206dc8 6953 }
a0714e2c 6954 gv = NULL;
56f7f34b 6955 gvp = 0;
a2a5de95
NC
6956 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6957 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6958 "Ambiguous call resolved as CORE::%s(), "
6959 "qualify as such or use &",
6960 GvENAME(hgv));
49dc05e3 6961 }
a0d0e21e
LW
6962 }
6963
6964 reserved_word:
6965 switch (tmp) {
79072805
LW
6966
6967 default: /* not a keyword */
0bfa2a8a
NC
6968 /* Trade off - by using this evil construction we can pull the
6969 variable gv into the block labelled keylookup. If not, then
6970 we have to give it function scope so that the goto from the
6971 earlier ':' case doesn't bypass the initialisation. */
6972 if (0) {
6973 just_a_word_zero_gv:
73f3e228
FC
6974 sv = NULL;
6975 cv = NULL;
0bfa2a8a
NC
6976 gv = NULL;
6977 gvp = NULL;
73f3e228 6978 rv2cv_op = NULL;
8bee0991 6979 orig_keyword = 0;
18f70389
FC
6980 lex = 0;
6981 off = 0;
0bfa2a8a 6982 }
93a17b20 6983 just_a_word: {
ce29ac45 6984 int pkgname = 0;
f54cb97a 6985 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
898c3bca
FC
6986 const char penultchar =
6987 lastchar && PL_bufptr - 2 >= PL_linestart
6988 ? PL_bufptr[-2]
6989 : 0;
5db06880 6990#ifdef PERL_MAD
cd81e915 6991 SV *nextPL_nextwhite = 0;
5db06880
NC
6992#endif
6993
8990e307
LW
6994
6995 /* Get the rest if it looks like a package qualifier */
6996
155aba94 6997 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6998 STRLEN morelen;
3280af22 6999 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
7000 TRUE, &morelen);
7001 if (!morelen)
86fe3f36
BF
7002 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
7003 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7004 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
ec2ab091 7005 *s == '\'' ? "'" : "::");
c3e0f903 7006 len += morelen;
ce29ac45 7007 pkgname = 1;
a0d0e21e 7008 }
8990e307 7009
3280af22
NIS
7010 if (PL_expect == XOPERATOR) {
7011 if (PL_bufptr == PL_linestart) {
57843af0 7012 CopLINE_dec(PL_curcop);
f1f66076 7013 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 7014 CopLINE_inc(PL_curcop);
463ee0b2
LW
7015 }
7016 else
54310121 7017 no_op("Bareword",s);
463ee0b2 7018 }
8990e307 7019
c3e0f903 7020 /* Look for a subroutine with this name in current package,
73f3e228
FC
7021 unless this is a lexical sub, or name is "Foo::",
7022 in which case Foo is a bareword
c3e0f903
GS
7023 (and a package name). */
7024
5db06880 7025 if (len > 2 && !PL_madskills &&
3280af22 7026 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 7027 {
f776e3cd 7028 if (ckWARN(WARN_BAREWORD)
af9f5953 7029 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 7030 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
979a1401
BF
7031 "Bareword \"%"SVf"\" refers to nonexistent package",
7032 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7033 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
c3e0f903 7034 len -= 2;
3280af22 7035 PL_tokenbuf[len] = '\0';
a0714e2c 7036 gv = NULL;
c3e0f903
GS
7037 gvp = 0;
7038 }
7039 else {
73f3e228 7040 if (!lex && !gv) {
62d55b22
NC
7041 /* Mustn't actually add anything to a symbol table.
7042 But also don't want to "initialise" any placeholder
7043 constants that might already be there into full
7044 blown PVGVs with attached PVCV. */
90e5519e 7045 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
7046 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7047 SVt_PVCV);
62d55b22 7048 }
b3d904f3 7049 len = 0;
c3e0f903
GS
7050 }
7051
7052 /* if we saw a global override before, get the right name */
8990e307 7053
73f3e228 7054 if (!sv)
18f70389 7055 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
37bb7629 7056 len ? len : strlen(PL_tokenbuf));
49dc05e3 7057 if (gvp) {
37bb7629 7058 SV * const tmp_sv = sv;
396482e1 7059 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
7060 sv_catsv(sv, tmp_sv);
7061 SvREFCNT_dec(tmp_sv);
8a7a129d 7062 }
37bb7629 7063
5db06880 7064#ifdef PERL_MAD
cd81e915
NC
7065 if (PL_madskills && !PL_thistoken) {
7066 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 7067 PL_thistoken = newSVpvn(start,s - start);
cd81e915 7068 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
7069 }
7070#endif
8990e307 7071
a0d0e21e 7072 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 7073 CLINE;
6154021b
RGS
7074 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7075 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 7076
c3e0f903 7077 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
7078 if (len)
7079 goto safe_bareword;
7080
73f3e228 7081 if (!off)
f7461760 7082 {
d8ebba9f 7083 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
7084 const_op->op_private = OPpCONST_BARE;
7085 rv2cv_op = newCVREF(0, const_op);
73f3e228 7086 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
f7461760 7087 }
5069cc75 7088
8990e307
LW
7089 /* See if it's the indirect object for a list operator. */
7090
3280af22
NIS
7091 if (PL_oldoldbufptr &&
7092 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
7093 (PL_oldoldbufptr == PL_last_lop
7094 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 7095 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
7096 (PL_expect == XREF ||
7097 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 7098 {
748a9306
LW
7099 bool immediate_paren = *s == '(';
7100
a0d0e21e 7101 /* (Now we can afford to cross potential line boundary.) */
cd81e915 7102 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 7103#ifdef PERL_MAD
cd81e915 7104 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 7105#endif
a0d0e21e
LW
7106
7107 /* Two barewords in a row may indicate method call. */
7108
62d55b22 7109 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
7110 (tmp = intuit_method(s, gv, cv))) {
7111 op_free(rv2cv_op);
78cdf107
Z
7112 if (tmp == METHOD && !PL_lex_allbrackets &&
7113 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7114 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7115 return REPORT(tmp);
f7461760 7116 }
a0d0e21e
LW
7117
7118 /* If not a declared subroutine, it's an indirect object. */
7119 /* (But it's an indir obj regardless for sort.) */
7294df96 7120 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 7121
7294df96
RGS
7122 if (
7123 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 7124 (!cv &&
a9ef352a 7125 (PL_last_lop_op != OP_MAPSTART &&
f0670693 7126 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
7127 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7128 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7129 )
a9ef352a 7130 {
3280af22 7131 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 7132 goto bareword;
93a17b20
LW
7133 }
7134 }
8990e307 7135
3280af22 7136 PL_expect = XOPERATOR;
5db06880
NC
7137#ifdef PERL_MAD
7138 if (isSPACE(*s))
cd81e915
NC
7139 s = SKIPSPACE2(s,nextPL_nextwhite);
7140 PL_nextwhite = nextPL_nextwhite;
5db06880 7141#else
8990e307 7142 s = skipspace(s);
5db06880 7143#endif
1c3923b3
GS
7144
7145 /* Is this a word before a => operator? */
ce29ac45 7146 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 7147 op_free(rv2cv_op);
1c3923b3 7148 CLINE;
6154021b 7149 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 7150 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 7151 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
7152 TERM(WORD);
7153 }
7154
7155 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 7156 if (*s == '(') {
79072805 7157 CLINE;
5069cc75 7158 if (cv) {
c35e046a
AL
7159 d = s + 1;
7160 while (SPACE_OR_TAB(*d))
7161 d++;
f7461760 7162 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 7163 s = d + 1;
c631f32b 7164 goto its_constant;
96e4d5b1 7165 }
7166 }
5db06880
NC
7167#ifdef PERL_MAD
7168 if (PL_madskills) {
cd81e915
NC
7169 PL_nextwhite = PL_thiswhite;
7170 PL_thiswhite = 0;
5db06880 7171 }
cd81e915 7172 start_force(PL_curforce);
5db06880 7173#endif
73f3e228
FC
7174 NEXTVAL_NEXTTOKE.opval =
7175 off ? rv2cv_op : pl_yylval.opval;
3280af22 7176 PL_expect = XOPERATOR;
5db06880
NC
7177#ifdef PERL_MAD
7178 if (PL_madskills) {
cd81e915
NC
7179 PL_nextwhite = nextPL_nextwhite;
7180 curmad('X', PL_thistoken);
6b29d1f5 7181 PL_thistoken = newSVpvs("");
5db06880
NC
7182 }
7183#endif
73f3e228
FC
7184 if (off)
7185 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7186 else op_free(rv2cv_op), force_next(WORD);
6154021b 7187 pl_yylval.ival = 0;
463ee0b2 7188 TOKEN('&');
79072805 7189 }
93a17b20 7190
a0d0e21e 7191 /* If followed by var or block, call it a method (unless sub) */
8990e307 7192
f7461760
Z
7193 if ((*s == '$' || *s == '{') && !cv) {
7194 op_free(rv2cv_op);
3280af22
NIS
7195 PL_last_lop = PL_oldbufptr;
7196 PL_last_lop_op = OP_METHOD;
78cdf107
Z
7197 if (!PL_lex_allbrackets &&
7198 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7199 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 7200 PREBLOCK(METHOD);
463ee0b2
LW
7201 }
7202
8990e307
LW
7203 /* If followed by a bareword, see if it looks like indir obj. */
7204
30fe34ed
RGS
7205 if (!orig_keyword
7206 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
7207 && (tmp = intuit_method(s, gv, cv))) {
7208 op_free(rv2cv_op);
78cdf107
Z
7209 if (tmp == METHOD && !PL_lex_allbrackets &&
7210 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7211 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7212 return REPORT(tmp);
f7461760 7213 }
93a17b20 7214
8990e307
LW
7215 /* Not a method, so call it a subroutine (if defined) */
7216
5069cc75 7217 if (cv) {
898c3bca 7218 if (lastchar == '-' && penultchar != '-') {
43b5ab4c
BF
7219 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7220 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7221 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7222 SVfARG(tmpsv), SVfARG(tmpsv));
7223 }
89bfa8cd 7224 /* Check for a constant sub */
f7461760 7225 if ((sv = cv_const_sv(cv))) {
96e4d5b1 7226 its_constant:
f7461760 7227 op_free(rv2cv_op);
6154021b
RGS
7228 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7229 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
cc2ebcd7 7230 pl_yylval.opval->op_private = OPpCONST_FOLDED;
6b7c6d95 7231 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 7232 TOKEN(WORD);
89bfa8cd 7233 }
7234
6154021b 7235 op_free(pl_yylval.opval);
f7461760 7236 pl_yylval.opval = rv2cv_op;
6154021b 7237 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 7238 PL_last_lop = PL_oldbufptr;
bf848113 7239 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 7240 /* Is there a prototype? */
5db06880
NC
7241 if (
7242#ifdef PERL_MAD
7243 cv &&
7244#endif
d9f2850e
RGS
7245 SvPOK(cv))
7246 {
8fa6a409
FC
7247 STRLEN protolen = CvPROTOLEN(cv);
7248 const char *proto = CvPROTO(cv);
b5fb7ce3 7249 bool optional;
5f66b61c 7250 if (!protolen)
4633a7c4 7251 TERM(FUNC0SUB);
b5fb7ce3
FC
7252 if ((optional = *proto == ';'))
7253 do
0f5d0394 7254 proto++;
b5fb7ce3 7255 while (*proto == ';');
649d02de
FC
7256 if (
7257 (
7258 (
7259 *proto == '$' || *proto == '_'
c035a075 7260 || *proto == '*' || *proto == '+'
649d02de
FC
7261 )
7262 && proto[1] == '\0'
7263 )
7264 || (
7265 *proto == '\\' && proto[1] && proto[2] == '\0'
7266 )
7267 )
b5fb7ce3 7268 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
7269 if (*proto == '\\' && proto[1] == '[') {
7270 const char *p = proto + 2;
7271 while(*p && *p != ']')
7272 ++p;
b5fb7ce3
FC
7273 if(*p == ']' && !p[1])
7274 UNIPROTO(UNIOPSUB,optional);
649d02de 7275 }
7a52d87a 7276 if (*proto == '&' && *s == '{') {
49a54bbe
NC
7277 if (PL_curstash)
7278 sv_setpvs(PL_subname, "__ANON__");
7279 else
7280 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
7281 if (!PL_lex_allbrackets &&
7282 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7283 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
7284 PREBLOCK(LSTOPSUB);
7285 }
a9ef352a 7286 }
5db06880
NC
7287#ifdef PERL_MAD
7288 {
7289 if (PL_madskills) {
cd81e915
NC
7290 PL_nextwhite = PL_thiswhite;
7291 PL_thiswhite = 0;
5db06880 7292 }
cd81e915 7293 start_force(PL_curforce);
6154021b 7294 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
7295 PL_expect = XTERM;
7296 if (PL_madskills) {
cd81e915
NC
7297 PL_nextwhite = nextPL_nextwhite;
7298 curmad('X', PL_thistoken);
6b29d1f5 7299 PL_thistoken = newSVpvs("");
5db06880 7300 }
73f3e228 7301 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7302 if (!PL_lex_allbrackets &&
7303 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7304 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7305 TOKEN(NOAMP);
7306 }
7307 }
7308
7309 /* Guess harder when madskills require "best effort". */
7310 if (PL_madskills && (!gv || !GvCVu(gv))) {
7311 int probable_sub = 0;
7312 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7313 probable_sub = 1;
7314 else if (isALPHA(*s)) {
7315 char tmpbuf[1024];
7316 STRLEN tmplen;
7317 d = s;
7318 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 7319 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
7320 probable_sub = 1;
7321 else {
7322 while (d < PL_bufend && isSPACE(*d))
7323 d++;
7324 if (*d == '=' && d[1] == '>')
7325 probable_sub = 1;
7326 }
7327 }
7328 if (probable_sub) {
af9f5953
BF
7329 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7330 SVt_PVCV);
6154021b 7331 op_free(pl_yylval.opval);
f7461760 7332 pl_yylval.opval = rv2cv_op;
6154021b 7333 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
7334 PL_last_lop = PL_oldbufptr;
7335 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
7336 PL_nextwhite = PL_thiswhite;
7337 PL_thiswhite = 0;
7338 start_force(PL_curforce);
6154021b 7339 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 7340 PL_expect = XTERM;
cd81e915
NC
7341 PL_nextwhite = nextPL_nextwhite;
7342 curmad('X', PL_thistoken);
6b29d1f5 7343 PL_thistoken = newSVpvs("");
73f3e228 7344 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7345 if (!PL_lex_allbrackets &&
7346 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7347 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7348 TOKEN(NOAMP);
7349 }
7350#else
6154021b 7351 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 7352 PL_expect = XTERM;
73f3e228 7353 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7354 if (!PL_lex_allbrackets &&
7355 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7356 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 7357 TOKEN(NOAMP);
5db06880 7358#endif
8990e307 7359 }
748a9306 7360
8990e307
LW
7361 /* Call it a bare word */
7362
5603f27d 7363 if (PL_hints & HINT_STRICT_SUBS)
6154021b 7364 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 7365 else {
9a073a1d
RGS
7366 bareword:
7367 /* after "print" and similar functions (corresponding to
7368 * "F? L" in opcode.pl), whatever wasn't already parsed as
7369 * a filehandle should be subject to "strict subs".
7370 * Likewise for the optional indirect-object argument to system
7371 * or exec, which can't be a bareword */
7372 if ((PL_last_lop_op == OP_PRINT
7373 || PL_last_lop_op == OP_PRTF
7374 || PL_last_lop_op == OP_SAY
7375 || PL_last_lop_op == OP_SYSTEM
7376 || PL_last_lop_op == OP_EXEC)
7377 && (PL_hints & HINT_STRICT_SUBS))
7378 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7379 if (lastchar != '-') {
7380 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7381 d = PL_tokenbuf;
7382 while (isLOWER(*d))
7383 d++;
af9f5953 7384 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7385 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7386 PL_tokenbuf);
7387 }
748a9306
LW
7388 }
7389 }
f7461760 7390 op_free(rv2cv_op);
c3e0f903
GS
7391
7392 safe_bareword:
9b387841
NC
7393 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7394 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
02571fe8
BF
7395 "Operator or semicolon missing before %c%"SVf,
7396 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7397 strlen(PL_tokenbuf),
7398 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
9b387841
NC
7399 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7400 "Ambiguous use of %c resolved as operator %c",
7401 lastchar, lastchar);
748a9306 7402 }
93a17b20 7403 TOKEN(WORD);
79072805 7404 }
79072805 7405
68dc0745 7406 case KEY___FILE__:
7eb971ee 7407 FUN0OP(
14f0f125 7408 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7409 );
46fc3d4c 7410
79072805 7411 case KEY___LINE__:
7eb971ee
FC
7412 FUN0OP(
7413 (OP*)newSVOP(OP_CONST, 0,
7414 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7415 );
68dc0745 7416
7417 case KEY___PACKAGE__:
7eb971ee
FC
7418 FUN0OP(
7419 (OP*)newSVOP(OP_CONST, 0,
3280af22 7420 (PL_curstash
5aaec2b4 7421 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7422 : &PL_sv_undef))
7423 );
79072805 7424
e50aee73 7425 case KEY___DATA__:
79072805
LW
7426 case KEY___END__: {
7427 GV *gv;
3280af22 7428 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7429 const char *pname = "main";
affc13fc
FC
7430 STRLEN plen = 4;
7431 U32 putf8 = 0;
3280af22 7432 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7433 {
7434 HV * const stash =
7435 PL_curstash ? PL_curstash : PL_defstash;
7436 pname = HvNAME_get(stash);
7437 plen = HvNAMELEN (stash);
7438 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7439 }
7440 gv = gv_fetchpvn_flags(
7441 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7442 plen+6, GV_ADD|putf8, SVt_PVIO
7443 );
a5f75d66 7444 GvMULTI_on(gv);
79072805 7445 if (!GvIO(gv))
a0d0e21e 7446 GvIOp(gv) = newIO();
3280af22 7447 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7448#if defined(HAS_FCNTL) && defined(F_SETFD)
7449 {
f54cb97a 7450 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7451 fcntl(fd,F_SETFD,fd >= 3);
7452 }
79072805 7453#endif
fd049845 7454 /* Mark this internal pseudo-handle as clean */
7455 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7456 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7457 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7458 else
50952442 7459 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7460#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7461 /* if the script was opened in binmode, we need to revert
53129d29 7462 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7463 * XXX this is a questionable hack at best. */
53129d29
GS
7464 if (PL_bufend-PL_bufptr > 2
7465 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7466 {
7467 Off_t loc = 0;
50952442 7468 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7469 loc = PerlIO_tell(PL_rsfp);
7470 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7471 }
2986a63f
JH
7472#ifdef NETWARE
7473 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7474#else
c39cd008 7475 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7476#endif /* NETWARE */
c39cd008
GS
7477 if (loc > 0)
7478 PerlIO_seek(PL_rsfp, loc, 0);
7479 }
7480 }
7481#endif
7948272d 7482#ifdef PERLIO_LAYERS
52d2e0f4
JH
7483 if (!IN_BYTES) {
7484 if (UTF)
7485 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7486 else if (PL_encoding) {
7487 SV *name;
7488 dSP;
7489 ENTER;
7490 SAVETMPS;
7491 PUSHMARK(sp);
7492 EXTEND(SP, 1);
7493 XPUSHs(PL_encoding);
7494 PUTBACK;
7495 call_method("name", G_SCALAR);
7496 SPAGAIN;
7497 name = POPs;
7498 PUTBACK;
bfed75c6 7499 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7500 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7501 SVfARG(name)));
52d2e0f4
JH
7502 FREETMPS;
7503 LEAVE;
7504 }
7505 }
7948272d 7506#endif
5db06880
NC
7507#ifdef PERL_MAD
7508 if (PL_madskills) {
cd81e915
NC
7509 if (PL_realtokenstart >= 0) {
7510 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7511 if (!PL_endwhite)
6b29d1f5 7512 PL_endwhite = newSVpvs("");
cd81e915
NC
7513 sv_catsv(PL_endwhite, PL_thiswhite);
7514 PL_thiswhite = 0;
7515 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7516 PL_realtokenstart = -1;
5db06880 7517 }
5cc814fd
NC
7518 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7519 != NULL) ;
5db06880
NC
7520 }
7521#endif
4608196e 7522 PL_rsfp = NULL;
79072805
LW
7523 }
7524 goto fake_eof;
e929a76b 7525 }
de3bb511 7526
84ed0108 7527 case KEY___SUB__:
1a35f9ff 7528 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7529
8990e307 7530 case KEY_AUTOLOAD:
ed6116ce 7531 case KEY_DESTROY:
79072805 7532 case KEY_BEGIN:
3c10abe3 7533 case KEY_UNITCHECK:
7d30b5c4 7534 case KEY_CHECK:
7d07dbc2 7535 case KEY_INIT:
7d30b5c4 7536 case KEY_END:
3280af22
NIS
7537 if (PL_expect == XSTATE) {
7538 s = PL_bufptr;
93a17b20 7539 goto really_sub;
79072805
LW
7540 }
7541 goto just_a_word;
7542
a0d0e21e
LW
7543 case KEY_CORE:
7544 if (*s == ':' && s[1] == ':') {
ee36fb64 7545 STRLEN olen = len;
748a9306 7546 d = s;
ee36fb64 7547 s += 2;
3280af22 7548 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
ee36fb64
FC
7549 if ((*s == ':' && s[1] == ':')
7550 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7551 {
7552 s = d;
7553 len = olen;
7554 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7555 goto just_a_word;
7556 }
7557 if (!tmp)
3773592b
BF
7558 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7559 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7560 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
a0d0e21e
LW
7561 if (tmp < 0)
7562 tmp = -tmp;
d67594ff
FC
7563 else if (tmp == KEY_require || tmp == KEY_do
7564 || tmp == KEY_glob)
a72a1c8b 7565 /* that's a way to remember we saw "CORE::" */
850e8516 7566 orig_keyword = tmp;
a0d0e21e
LW
7567 goto reserved_word;
7568 }
7569 goto just_a_word;
7570
463ee0b2
LW
7571 case KEY_abs:
7572 UNI(OP_ABS);
7573
79072805
LW
7574 case KEY_alarm:
7575 UNI(OP_ALARM);
7576
7577 case KEY_accept:
a0d0e21e 7578 LOP(OP_ACCEPT,XTERM);
79072805 7579
463ee0b2 7580 case KEY_and:
78cdf107
Z
7581 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7582 return REPORT(0);
463ee0b2
LW
7583 OPERATOR(ANDOP);
7584
79072805 7585 case KEY_atan2:
a0d0e21e 7586 LOP(OP_ATAN2,XTERM);
85e6fe83 7587
79072805 7588 case KEY_bind:
a0d0e21e 7589 LOP(OP_BIND,XTERM);
79072805
LW
7590
7591 case KEY_binmode:
1c1fc3ea 7592 LOP(OP_BINMODE,XTERM);
79072805
LW
7593
7594 case KEY_bless:
a0d0e21e 7595 LOP(OP_BLESS,XTERM);
79072805 7596
0d863452
RH
7597 case KEY_break:
7598 FUN0(OP_BREAK);
7599
79072805
LW
7600 case KEY_chop:
7601 UNI(OP_CHOP);
7602
7603 case KEY_continue:
0d863452
RH
7604 /* We have to disambiguate the two senses of
7605 "continue". If the next token is a '{' then
7606 treat it as the start of a continue block;
7607 otherwise treat it as a control operator.
7608 */
7609 s = skipspace(s);
7610 if (*s == '{')
79072805 7611 PREBLOCK(CONTINUE);
0d863452
RH
7612 else
7613 FUN0(OP_CONTINUE);
79072805
LW
7614
7615 case KEY_chdir:
fafc274c
NC
7616 /* may use HOME */
7617 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7618 UNI(OP_CHDIR);
7619
7620 case KEY_close:
7621 UNI(OP_CLOSE);
7622
7623 case KEY_closedir:
7624 UNI(OP_CLOSEDIR);
7625
7626 case KEY_cmp:
78cdf107
Z
7627 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7628 return REPORT(0);
79072805
LW
7629 Eop(OP_SCMP);
7630
7631 case KEY_caller:
7632 UNI(OP_CALLER);
7633
7634 case KEY_crypt:
7635#ifdef FCRYPT
f4c556ac
GS
7636 if (!PL_cryptseen) {
7637 PL_cryptseen = TRUE;
de3bb511 7638 init_des();
f4c556ac 7639 }
a687059c 7640#endif
a0d0e21e 7641 LOP(OP_CRYPT,XTERM);
79072805
LW
7642
7643 case KEY_chmod:
a0d0e21e 7644 LOP(OP_CHMOD,XTERM);
79072805
LW
7645
7646 case KEY_chown:
a0d0e21e 7647 LOP(OP_CHOWN,XTERM);
79072805
LW
7648
7649 case KEY_connect:
a0d0e21e 7650 LOP(OP_CONNECT,XTERM);
79072805 7651
463ee0b2
LW
7652 case KEY_chr:
7653 UNI(OP_CHR);
7654
79072805
LW
7655 case KEY_cos:
7656 UNI(OP_COS);
7657
7658 case KEY_chroot:
7659 UNI(OP_CHROOT);
7660
0d863452
RH
7661 case KEY_default:
7662 PREBLOCK(DEFAULT);
7663
79072805 7664 case KEY_do:
29595ff2 7665 s = SKIPSPACE1(s);
79072805 7666 if (*s == '{')
a0d0e21e 7667 PRETERMBLOCK(DO);
c2900bb8 7668 if (*s != '\'') {
4b473a5a
FC
7669 *PL_tokenbuf = '&';
7670 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7671 1, &len);
7672 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
c2900bb8 7673 d = SKIPSPACE1(d);
4b473a5a 7674 if (*d == '(') {
60ac52eb 7675 force_ident_maybe_lex('&');
4b473a5a
FC
7676 s = d;
7677 }
c2900bb8
FC
7678 }
7679 }
850e8516
RGS
7680 if (orig_keyword == KEY_do) {
7681 orig_keyword = 0;
6154021b 7682 pl_yylval.ival = 1;
850e8516
RGS
7683 }
7684 else
6154021b 7685 pl_yylval.ival = 0;
378cc40b 7686 OPERATOR(DO);
79072805
LW
7687
7688 case KEY_die:
3280af22 7689 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7690 LOP(OP_DIE,XTERM);
79072805
LW
7691
7692 case KEY_defined:
7693 UNI(OP_DEFINED);
7694
7695 case KEY_delete:
a0d0e21e 7696 UNI(OP_DELETE);
79072805
LW
7697
7698 case KEY_dbmopen:
74e8ce34
NC
7699 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7700 STR_WITH_LEN("NDBM_File::"),
7701 STR_WITH_LEN("DB_File::"),
7702 STR_WITH_LEN("GDBM_File::"),
7703 STR_WITH_LEN("SDBM_File::"),
7704 STR_WITH_LEN("ODBM_File::"),
7705 NULL);
a0d0e21e 7706 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7707
7708 case KEY_dbmclose:
7709 UNI(OP_DBMCLOSE);
7710
7711 case KEY_dump:
c31f6d3b 7712 PL_expect = XOPERATOR;
a0d0e21e 7713 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7714 LOOPX(OP_DUMP);
7715
7716 case KEY_else:
7717 PREBLOCK(ELSE);
7718
7719 case KEY_elsif:
6154021b 7720 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7721 OPERATOR(ELSIF);
7722
7723 case KEY_eq:
78cdf107
Z
7724 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7725 return REPORT(0);
79072805
LW
7726 Eop(OP_SEQ);
7727
a0d0e21e
LW
7728 case KEY_exists:
7729 UNI(OP_EXISTS);
4e553d73 7730
79072805 7731 case KEY_exit:
5db06880
NC
7732 if (PL_madskills)
7733 UNI(OP_INT);
79072805
LW
7734 UNI(OP_EXIT);
7735
7736 case KEY_eval:
29595ff2 7737 s = SKIPSPACE1(s);
32e2a35d
RGS
7738 if (*s == '{') { /* block eval */
7739 PL_expect = XTERMBLOCK;
7740 UNIBRACK(OP_ENTERTRY);
7741 }
7742 else { /* string eval */
7743 PL_expect = XTERM;
7744 UNIBRACK(OP_ENTEREVAL);
7745 }
79072805 7746
7d789282
FC
7747 case KEY_evalbytes:
7748 PL_expect = XTERM;
7749 UNIBRACK(-OP_ENTEREVAL);
7750
79072805
LW
7751 case KEY_eof:
7752 UNI(OP_EOF);
7753
7754 case KEY_exp:
7755 UNI(OP_EXP);
7756
7757 case KEY_each:
7758 UNI(OP_EACH);
7759
7760 case KEY_exec:
a0d0e21e 7761 LOP(OP_EXEC,XREF);
79072805
LW
7762
7763 case KEY_endhostent:
7764 FUN0(OP_EHOSTENT);
7765
7766 case KEY_endnetent:
7767 FUN0(OP_ENETENT);
7768
7769 case KEY_endservent:
7770 FUN0(OP_ESERVENT);
7771
7772 case KEY_endprotoent:
7773 FUN0(OP_EPROTOENT);
7774
7775 case KEY_endpwent:
7776 FUN0(OP_EPWENT);
7777
7778 case KEY_endgrent:
7779 FUN0(OP_EGRENT);
7780
7781 case KEY_for:
7782 case KEY_foreach:
78cdf107
Z
7783 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7784 return REPORT(0);
6154021b 7785 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7786 s = SKIPSPACE1(s);
7e2040f0 7787 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7788 char *p = s;
5db06880
NC
7789#ifdef PERL_MAD
7790 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7791#endif
7792
3280af22 7793 if ((PL_bufend - p) >= 3 &&
55497cff 7794 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7795 p += 2;
77ca0c92
LW
7796 else if ((PL_bufend - p) >= 4 &&
7797 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7798 p += 3;
29595ff2 7799 p = PEEKSPACE(p);
7e2040f0 7800 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7801 p = scan_ident(p, PL_bufend,
7802 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7803 p = PEEKSPACE(p);
77ca0c92
LW
7804 }
7805 if (*p != '$')
cea2e8a9 7806 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7807#ifdef PERL_MAD
7808 s = SvPVX(PL_linestr) + soff;
7809#endif
55497cff 7810 }
79072805
LW
7811 OPERATOR(FOR);
7812
7813 case KEY_formline:
a0d0e21e 7814 LOP(OP_FORMLINE,XTERM);
79072805
LW
7815
7816 case KEY_fork:
7817 FUN0(OP_FORK);
7818
838f2281
BF
7819 case KEY_fc:
7820 UNI(OP_FC);
7821
79072805 7822 case KEY_fcntl:
a0d0e21e 7823 LOP(OP_FCNTL,XTERM);
79072805
LW
7824
7825 case KEY_fileno:
7826 UNI(OP_FILENO);
7827
7828 case KEY_flock:
a0d0e21e 7829 LOP(OP_FLOCK,XTERM);
79072805
LW
7830
7831 case KEY_gt:
78cdf107
Z
7832 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7833 return REPORT(0);
79072805
LW
7834 Rop(OP_SGT);
7835
7836 case KEY_ge:
78cdf107
Z
7837 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7838 return REPORT(0);
79072805
LW
7839 Rop(OP_SGE);
7840
7841 case KEY_grep:
2c38e13d 7842 LOP(OP_GREPSTART, XREF);
79072805
LW
7843
7844 case KEY_goto:
c31f6d3b 7845 PL_expect = XOPERATOR;
a0d0e21e 7846 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7847 LOOPX(OP_GOTO);
7848
7849 case KEY_gmtime:
7850 UNI(OP_GMTIME);
7851
7852 case KEY_getc:
6f33ba73 7853 UNIDOR(OP_GETC);
79072805
LW
7854
7855 case KEY_getppid:
7856 FUN0(OP_GETPPID);
7857
7858 case KEY_getpgrp:
7859 UNI(OP_GETPGRP);
7860
7861 case KEY_getpriority:
a0d0e21e 7862 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7863
7864 case KEY_getprotobyname:
7865 UNI(OP_GPBYNAME);
7866
7867 case KEY_getprotobynumber:
a0d0e21e 7868 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7869
7870 case KEY_getprotoent:
7871 FUN0(OP_GPROTOENT);
7872
7873 case KEY_getpwent:
7874 FUN0(OP_GPWENT);
7875
7876 case KEY_getpwnam:
ff68c719 7877 UNI(OP_GPWNAM);
79072805
LW
7878
7879 case KEY_getpwuid:
ff68c719 7880 UNI(OP_GPWUID);
79072805
LW
7881
7882 case KEY_getpeername:
7883 UNI(OP_GETPEERNAME);
7884
7885 case KEY_gethostbyname:
7886 UNI(OP_GHBYNAME);
7887
7888 case KEY_gethostbyaddr:
a0d0e21e 7889 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7890
7891 case KEY_gethostent:
7892 FUN0(OP_GHOSTENT);
7893
7894 case KEY_getnetbyname:
7895 UNI(OP_GNBYNAME);
7896
7897 case KEY_getnetbyaddr:
a0d0e21e 7898 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7899
7900 case KEY_getnetent:
7901 FUN0(OP_GNETENT);
7902
7903 case KEY_getservbyname:
a0d0e21e 7904 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7905
7906 case KEY_getservbyport:
a0d0e21e 7907 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7908
7909 case KEY_getservent:
7910 FUN0(OP_GSERVENT);
7911
7912 case KEY_getsockname:
7913 UNI(OP_GETSOCKNAME);
7914
7915 case KEY_getsockopt:
a0d0e21e 7916 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7917
7918 case KEY_getgrent:
7919 FUN0(OP_GGRENT);
7920
7921 case KEY_getgrnam:
ff68c719 7922 UNI(OP_GGRNAM);
79072805
LW
7923
7924 case KEY_getgrgid:
ff68c719 7925 UNI(OP_GGRGID);
79072805
LW
7926
7927 case KEY_getlogin:
7928 FUN0(OP_GETLOGIN);
7929
0d863452 7930 case KEY_given:
6154021b 7931 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7932 OPERATOR(GIVEN);
7933
93a17b20 7934 case KEY_glob:
d67594ff
FC
7935 LOP(
7936 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7937 XTERM
7938 );
93a17b20 7939
79072805
LW
7940 case KEY_hex:
7941 UNI(OP_HEX);
7942
7943 case KEY_if:
78cdf107
Z
7944 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7945 return REPORT(0);
6154021b 7946 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7947 OPERATOR(IF);
7948
7949 case KEY_index:
a0d0e21e 7950 LOP(OP_INDEX,XTERM);
79072805
LW
7951
7952 case KEY_int:
7953 UNI(OP_INT);
7954
7955 case KEY_ioctl:
a0d0e21e 7956 LOP(OP_IOCTL,XTERM);
79072805
LW
7957
7958 case KEY_join:
a0d0e21e 7959 LOP(OP_JOIN,XTERM);
79072805
LW
7960
7961 case KEY_keys:
7962 UNI(OP_KEYS);
7963
7964 case KEY_kill:
a0d0e21e 7965 LOP(OP_KILL,XTERM);
79072805
LW
7966
7967 case KEY_last:
c31f6d3b 7968 PL_expect = XOPERATOR;
a0d0e21e 7969 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7970 LOOPX(OP_LAST);
4e553d73 7971
79072805
LW
7972 case KEY_lc:
7973 UNI(OP_LC);
7974
7975 case KEY_lcfirst:
7976 UNI(OP_LCFIRST);
7977
7978 case KEY_local:
6154021b 7979 pl_yylval.ival = 0;
79072805
LW
7980 OPERATOR(LOCAL);
7981
7982 case KEY_length:
7983 UNI(OP_LENGTH);
7984
7985 case KEY_lt:
78cdf107
Z
7986 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7987 return REPORT(0);
79072805
LW
7988 Rop(OP_SLT);
7989
7990 case KEY_le:
78cdf107
Z
7991 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7992 return REPORT(0);
79072805
LW
7993 Rop(OP_SLE);
7994
7995 case KEY_localtime:
7996 UNI(OP_LOCALTIME);
7997
7998 case KEY_log:
7999 UNI(OP_LOG);
8000
8001 case KEY_link:
a0d0e21e 8002 LOP(OP_LINK,XTERM);
79072805
LW
8003
8004 case KEY_listen:
a0d0e21e 8005 LOP(OP_LISTEN,XTERM);
79072805 8006
c0329465
MB
8007 case KEY_lock:
8008 UNI(OP_LOCK);
8009
79072805
LW
8010 case KEY_lstat:
8011 UNI(OP_LSTAT);
8012
8013 case KEY_m:
8782bef2 8014 s = scan_pat(s,OP_MATCH);
79072805
LW
8015 TERM(sublex_start());
8016
a0d0e21e 8017 case KEY_map:
2c38e13d 8018 LOP(OP_MAPSTART, XREF);
4e4e412b 8019
79072805 8020 case KEY_mkdir:
a0d0e21e 8021 LOP(OP_MKDIR,XTERM);
79072805
LW
8022
8023 case KEY_msgctl:
a0d0e21e 8024 LOP(OP_MSGCTL,XTERM);
79072805
LW
8025
8026 case KEY_msgget:
a0d0e21e 8027 LOP(OP_MSGGET,XTERM);
79072805
LW
8028
8029 case KEY_msgrcv:
a0d0e21e 8030 LOP(OP_MSGRCV,XTERM);
79072805
LW
8031
8032 case KEY_msgsnd:
a0d0e21e 8033 LOP(OP_MSGSND,XTERM);
79072805 8034
77ca0c92 8035 case KEY_our:
93a17b20 8036 case KEY_my:
952306ac 8037 case KEY_state:
eac04b2e 8038 PL_in_my = (U16)tmp;
29595ff2 8039 s = SKIPSPACE1(s);
7e2040f0 8040 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
8041#ifdef PERL_MAD
8042 char* start = s;
8043#endif
3280af22 8044 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843 8045 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
e7d0b801
FC
8046 {
8047 if (!FEATURE_LEXSUBS_IS_ENABLED)
8048 Perl_croak(aTHX_
8049 "Experimental \"%s\" subs not enabled",
8050 tmp == KEY_my ? "my" :
8051 tmp == KEY_state ? "state" : "our");
64fbf0dd
FC
8052 Perl_ck_warner_d(aTHX_
8053 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8054 "The lexical_subs feature is experimental");
09bef843 8055 goto really_sub;
e7d0b801 8056 }
def3634b 8057 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 8058 if (!PL_in_my_stash) {
c750a3ec 8059 char tmpbuf[1024];
3280af22 8060 PL_bufptr = s;
d9fad198 8061 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
3c54b17a 8062 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
c750a3ec 8063 }
5db06880
NC
8064#ifdef PERL_MAD
8065 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
8066 sv_catsv(PL_thistoken, PL_nextwhite);
8067 PL_nextwhite = 0;
8068 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
8069 }
8070#endif
c750a3ec 8071 }
6154021b 8072 pl_yylval.ival = 1;
55497cff 8073 OPERATOR(MY);
93a17b20 8074
79072805 8075 case KEY_next:
c31f6d3b 8076 PL_expect = XOPERATOR;
a0d0e21e 8077 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8078 LOOPX(OP_NEXT);
8079
8080 case KEY_ne:
78cdf107
Z
8081 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8082 return REPORT(0);
79072805
LW
8083 Eop(OP_SNE);
8084
a0d0e21e 8085 case KEY_no:
468aa647 8086 s = tokenize_use(0, s);
52d0e95b 8087 TERM(USE);
a0d0e21e
LW
8088
8089 case KEY_not:
29595ff2 8090 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 8091 FUN1(OP_NOT);
78cdf107
Z
8092 else {
8093 if (!PL_lex_allbrackets &&
8094 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8095 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 8096 OPERATOR(NOTOP);
78cdf107 8097 }
a0d0e21e 8098
79072805 8099 case KEY_open:
29595ff2 8100 s = SKIPSPACE1(s);
7e2040f0 8101 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 8102 const char *t;
8a2bca12 8103 for (d = s; isWORDCHAR_lazy_if(d,UTF);) {
71aa9713
BF
8104 d += UTF ? UTF8SKIP(d) : 1;
8105 if (UTF) {
7dbf68d2 8106 while (UTF8_IS_CONTINUED(*d) && _is_utf8_mark((U8*)d)) {
71aa9713
BF
8107 d += UTF ? UTF8SKIP(d) : 1;
8108 }
8109 }
8110 }
c35e046a
AL
8111 for (t=d; isSPACE(*t);)
8112 t++;
e2ab214b 8113 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
8114 /* [perl #16184] */
8115 && !(t[0] == '=' && t[1] == '>')
db3abe52 8116 && !(t[0] == ':' && t[1] == ':')
240d1b6f 8117 && !keyword(s, d-s, 0)
66fbe8fb 8118 ) {
71aa9713
BF
8119 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
8120 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
9014280d 8121 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
71aa9713
BF
8122 "Precedence problem: open %"SVf" should be open(%"SVf")",
8123 SVfARG(tmpsv), SVfARG(tmpsv));
66fbe8fb 8124 }
93a17b20 8125 }
a0d0e21e 8126 LOP(OP_OPEN,XTERM);
79072805 8127
463ee0b2 8128 case KEY_or:
78cdf107
Z
8129 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8130 return REPORT(0);
6154021b 8131 pl_yylval.ival = OP_OR;
463ee0b2
LW
8132 OPERATOR(OROP);
8133
79072805
LW
8134 case KEY_ord:
8135 UNI(OP_ORD);
8136
8137 case KEY_oct:
8138 UNI(OP_OCT);
8139
8140 case KEY_opendir:
a0d0e21e 8141 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
8142
8143 case KEY_print:
3280af22 8144 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8145 LOP(OP_PRINT,XREF);
79072805
LW
8146
8147 case KEY_printf:
3280af22 8148 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8149 LOP(OP_PRTF,XREF);
79072805 8150
c07a80fd 8151 case KEY_prototype:
8152 UNI(OP_PROTOTYPE);
8153
79072805 8154 case KEY_push:
a0d0e21e 8155 LOP(OP_PUSH,XTERM);
79072805
LW
8156
8157 case KEY_pop:
6f33ba73 8158 UNIDOR(OP_POP);
79072805 8159
a0d0e21e 8160 case KEY_pos:
6f33ba73 8161 UNIDOR(OP_POS);
4e553d73 8162
79072805 8163 case KEY_pack:
a0d0e21e 8164 LOP(OP_PACK,XTERM);
79072805
LW
8165
8166 case KEY_package:
a0d0e21e 8167 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 8168 s = SKIPSPACE1(s);
91152fc1 8169 s = force_strict_version(s);
4e4da3ac 8170 PL_lex_expect = XBLOCK;
79072805
LW
8171 OPERATOR(PACKAGE);
8172
8173 case KEY_pipe:
a0d0e21e 8174 LOP(OP_PIPE_OP,XTERM);
79072805
LW
8175
8176 case KEY_q:
4d68ffa0 8177 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8178 if (!s)
d4c19fe8 8179 missingterm(NULL);
6154021b 8180 pl_yylval.ival = OP_CONST;
79072805
LW
8181 TERM(sublex_start());
8182
a0d0e21e
LW
8183 case KEY_quotemeta:
8184 UNI(OP_QUOTEMETA);
8185
ea25a9b2
Z
8186 case KEY_qw: {
8187 OP *words = NULL;
4d68ffa0 8188 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8990e307 8189 if (!s)
d4c19fe8 8190 missingterm(NULL);
3480a8d2 8191 PL_expect = XOPERATOR;
8127e0e3 8192 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
8193 int warned_comma = !ckWARN(WARN_QW);
8194 int warned_comment = warned_comma;
3280af22 8195 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 8196 while (len) {
d4c19fe8
AL
8197 for (; isSPACE(*d) && len; --len, ++d)
8198 /**/;
8127e0e3 8199 if (len) {
d4c19fe8 8200 SV *sv;
f54cb97a 8201 const char *b = d;
7e03b518 8202 if (!warned_comma || !warned_comment) {
8127e0e3 8203 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 8204 if (!warned_comma && *d == ',') {
9014280d 8205 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8206 "Possible attempt to separate words with commas");
7e03b518 8207 ++warned_comma;
8127e0e3 8208 }
7e03b518 8209 else if (!warned_comment && *d == '#') {
9014280d 8210 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8211 "Possible attempt to put comments in qw() list");
7e03b518 8212 ++warned_comment;
8127e0e3
GS
8213 }
8214 }
8215 }
8216 else {
d4c19fe8
AL
8217 for (; !isSPACE(*d) && len; --len, ++d)
8218 /**/;
8127e0e3 8219 }
740cce10 8220 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 8221 words = op_append_elem(OP_LIST, words,
7948272d 8222 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 8223 }
8224 }
8225 }
ea25a9b2
Z
8226 if (!words)
8227 words = newNULLLIST();
37fd879b 8228 if (PL_lex_stuff) {
8127e0e3 8229 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 8230 PL_lex_stuff = NULL;
37fd879b 8231 }
ea25a9b2
Z
8232 PL_expect = XOPERATOR;
8233 pl_yylval.opval = sawparens(words);
8234 TOKEN(QWLIST);
8235 }
8990e307 8236
79072805 8237 case KEY_qq:
4d68ffa0 8238 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8239 if (!s)
d4c19fe8 8240 missingterm(NULL);
6154021b 8241 pl_yylval.ival = OP_STRINGIFY;
3280af22 8242 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 8243 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
8244 TERM(sublex_start());
8245
8782bef2
GB
8246 case KEY_qr:
8247 s = scan_pat(s,OP_QR);
8248 TERM(sublex_start());
8249
79072805 8250 case KEY_qx:
4d68ffa0 8251 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8252 if (!s)
d4c19fe8 8253 missingterm(NULL);
9b201d7d 8254 readpipe_override();
79072805
LW
8255 TERM(sublex_start());
8256
8257 case KEY_return:
8258 OLDLOP(OP_RETURN);
8259
8260 case KEY_require:
29595ff2 8261 s = SKIPSPACE1(s);
c31f6d3b 8262 PL_expect = XOPERATOR;
e759cc13
RGS
8263 if (isDIGIT(*s)) {
8264 s = force_version(s, FALSE);
a7cb1f99 8265 }
e759cc13
RGS
8266 else if (*s != 'v' || !isDIGIT(s[1])
8267 || (s = force_version(s, TRUE), *s == 'v'))
8268 {
a7cb1f99
GS
8269 *PL_tokenbuf = '\0';
8270 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 8271 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
8272 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8273 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
8274 else if (*s == '<')
8275 yyerror("<> should be quotes");
8276 }
a72a1c8b
RGS
8277 if (orig_keyword == KEY_require) {
8278 orig_keyword = 0;
6154021b 8279 pl_yylval.ival = 1;
a72a1c8b
RGS
8280 }
8281 else
6154021b 8282 pl_yylval.ival = 0;
a72a1c8b
RGS
8283 PL_expect = XTERM;
8284 PL_bufptr = s;
8285 PL_last_uni = PL_oldbufptr;
8286 PL_last_lop_op = OP_REQUIRE;
8287 s = skipspace(s);
8288 return REPORT( (int)REQUIRE );
79072805
LW
8289
8290 case KEY_reset:
8291 UNI(OP_RESET);
8292
8293 case KEY_redo:
c31f6d3b 8294 PL_expect = XOPERATOR;
a0d0e21e 8295 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8296 LOOPX(OP_REDO);
8297
8298 case KEY_rename:
a0d0e21e 8299 LOP(OP_RENAME,XTERM);
79072805
LW
8300
8301 case KEY_rand:
8302 UNI(OP_RAND);
8303
8304 case KEY_rmdir:
8305 UNI(OP_RMDIR);
8306
8307 case KEY_rindex:
a0d0e21e 8308 LOP(OP_RINDEX,XTERM);
79072805
LW
8309
8310 case KEY_read:
a0d0e21e 8311 LOP(OP_READ,XTERM);
79072805
LW
8312
8313 case KEY_readdir:
8314 UNI(OP_READDIR);
8315
93a17b20 8316 case KEY_readline:
6f33ba73 8317 UNIDOR(OP_READLINE);
93a17b20
LW
8318
8319 case KEY_readpipe:
0858480c 8320 UNIDOR(OP_BACKTICK);
93a17b20 8321
79072805
LW
8322 case KEY_rewinddir:
8323 UNI(OP_REWINDDIR);
8324
8325 case KEY_recv:
a0d0e21e 8326 LOP(OP_RECV,XTERM);
79072805
LW
8327
8328 case KEY_reverse:
a0d0e21e 8329 LOP(OP_REVERSE,XTERM);
79072805
LW
8330
8331 case KEY_readlink:
6f33ba73 8332 UNIDOR(OP_READLINK);
79072805
LW
8333
8334 case KEY_ref:
8335 UNI(OP_REF);
8336
8337 case KEY_s:
8338 s = scan_subst(s);
6154021b 8339 if (pl_yylval.opval)
79072805
LW
8340 TERM(sublex_start());
8341 else
8342 TOKEN(1); /* force error */
8343
0d863452
RH
8344 case KEY_say:
8345 checkcomma(s,PL_tokenbuf,"filehandle");
8346 LOP(OP_SAY,XREF);
8347
a0d0e21e
LW
8348 case KEY_chomp:
8349 UNI(OP_CHOMP);
4e553d73 8350
79072805
LW
8351 case KEY_scalar:
8352 UNI(OP_SCALAR);
8353
8354 case KEY_select:
a0d0e21e 8355 LOP(OP_SELECT,XTERM);
79072805
LW
8356
8357 case KEY_seek:
a0d0e21e 8358 LOP(OP_SEEK,XTERM);
79072805
LW
8359
8360 case KEY_semctl:
a0d0e21e 8361 LOP(OP_SEMCTL,XTERM);
79072805
LW
8362
8363 case KEY_semget:
a0d0e21e 8364 LOP(OP_SEMGET,XTERM);
79072805
LW
8365
8366 case KEY_semop:
a0d0e21e 8367 LOP(OP_SEMOP,XTERM);
79072805
LW
8368
8369 case KEY_send:
a0d0e21e 8370 LOP(OP_SEND,XTERM);
79072805
LW
8371
8372 case KEY_setpgrp:
a0d0e21e 8373 LOP(OP_SETPGRP,XTERM);
79072805
LW
8374
8375 case KEY_setpriority:
a0d0e21e 8376 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
8377
8378 case KEY_sethostent:
ff68c719 8379 UNI(OP_SHOSTENT);
79072805
LW
8380
8381 case KEY_setnetent:
ff68c719 8382 UNI(OP_SNETENT);
79072805
LW
8383
8384 case KEY_setservent:
ff68c719 8385 UNI(OP_SSERVENT);
79072805
LW
8386
8387 case KEY_setprotoent:
ff68c719 8388 UNI(OP_SPROTOENT);
79072805
LW
8389
8390 case KEY_setpwent:
8391 FUN0(OP_SPWENT);
8392
8393 case KEY_setgrent:
8394 FUN0(OP_SGRENT);
8395
8396 case KEY_seekdir:
a0d0e21e 8397 LOP(OP_SEEKDIR,XTERM);
79072805
LW
8398
8399 case KEY_setsockopt:
a0d0e21e 8400 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
8401
8402 case KEY_shift:
6f33ba73 8403 UNIDOR(OP_SHIFT);
79072805
LW
8404
8405 case KEY_shmctl:
a0d0e21e 8406 LOP(OP_SHMCTL,XTERM);
79072805
LW
8407
8408 case KEY_shmget:
a0d0e21e 8409 LOP(OP_SHMGET,XTERM);
79072805
LW
8410
8411 case KEY_shmread:
a0d0e21e 8412 LOP(OP_SHMREAD,XTERM);
79072805
LW
8413
8414 case KEY_shmwrite:
a0d0e21e 8415 LOP(OP_SHMWRITE,XTERM);
79072805
LW
8416
8417 case KEY_shutdown:
a0d0e21e 8418 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8419
8420 case KEY_sin:
8421 UNI(OP_SIN);
8422
8423 case KEY_sleep:
8424 UNI(OP_SLEEP);
8425
8426 case KEY_socket:
a0d0e21e 8427 LOP(OP_SOCKET,XTERM);
79072805
LW
8428
8429 case KEY_socketpair:
a0d0e21e 8430 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8431
8432 case KEY_sort:
3280af22 8433 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8434 s = SKIPSPACE1(s);
3280af22 8435 PL_expect = XTERM;
15f0808c 8436 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 8437 LOP(OP_SORT,XREF);
79072805
LW
8438
8439 case KEY_split:
a0d0e21e 8440 LOP(OP_SPLIT,XTERM);
79072805
LW
8441
8442 case KEY_sprintf:
a0d0e21e 8443 LOP(OP_SPRINTF,XTERM);
79072805
LW
8444
8445 case KEY_splice:
a0d0e21e 8446 LOP(OP_SPLICE,XTERM);
79072805
LW
8447
8448 case KEY_sqrt:
8449 UNI(OP_SQRT);
8450
8451 case KEY_srand:
8452 UNI(OP_SRAND);
8453
8454 case KEY_stat:
8455 UNI(OP_STAT);
8456
8457 case KEY_study:
79072805
LW
8458 UNI(OP_STUDY);
8459
8460 case KEY_substr:
a0d0e21e 8461 LOP(OP_SUBSTR,XTERM);
79072805
LW
8462
8463 case KEY_format:
8464 case KEY_sub:
93a17b20 8465 really_sub:
09bef843 8466 {
24b6ef70 8467 char * const tmpbuf = PL_tokenbuf + 1;
9c5ffd7c 8468 SSize_t tboffset = 0;
09bef843 8469 expectation attrful;
28cc6278 8470 bool have_name, have_proto;
f54cb97a 8471 const int key = tmp;
09bef843 8472
5db06880
NC
8473#ifdef PERL_MAD
8474 SV *tmpwhite = 0;
8475
cd81e915 8476 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1cac5c33
FC
8477 SV *subtoken = PL_madskills
8478 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8479 : NULL;
cd81e915 8480 PL_thistoken = 0;
5db06880
NC
8481
8482 d = s;
8483 s = SKIPSPACE2(s,tmpwhite);
8484#else
8767b1ab 8485 d = s;
09bef843 8486 s = skipspace(s);
5db06880 8487#endif
09bef843 8488
7e2040f0 8489 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8490 (*s == ':' && s[1] == ':'))
8491 {
5db06880 8492#ifdef PERL_MAD
4f61fd4b 8493 SV *nametoke = NULL;
5db06880
NC
8494#endif
8495
09bef843
SB
8496 PL_expect = XBLOCK;
8497 attrful = XATTRBLOCK;
b1b65b59
JH
8498 /* remember buffer pos'n for later force_word */
8499 tboffset = s - PL_oldbufptr;
24b6ef70
FC
8500 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8501 &len);
5db06880
NC
8502#ifdef PERL_MAD
8503 if (PL_madskills)
af9f5953 8504 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8505#endif
689aac7b
FC
8506 *PL_tokenbuf = '&';
8507 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8508 || pad_findmy_pvn(
8509 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8510 ) != NOT_IN_PAD)
6502358f 8511 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8512 else {
8513 sv_setsv(PL_subname,PL_curstname);
396482e1 8514 sv_catpvs(PL_subname,"::");
09bef843
SB
8515 sv_catpvn(PL_subname,tmpbuf,len);
8516 }
af9f5953
BF
8517 if (SvUTF8(PL_linestr))
8518 SvUTF8_on(PL_subname);
09bef843 8519 have_name = TRUE;
5db06880 8520
60ac52eb 8521
5db06880 8522#ifdef PERL_MAD
60ac52eb
FC
8523 start_force(0);
8524 CURMAD('X', nametoke);
8525 CURMAD('_', tmpwhite);
4210d3f1 8526 force_ident_maybe_lex('&');
5db06880
NC
8527
8528 s = SKIPSPACE2(d,tmpwhite);
8529#else
8530 s = skipspace(d);
8531#endif
09bef843 8532 }
463ee0b2 8533 else {
8767b1ab
FC
8534 if (key == KEY_my || key == KEY_our || key==KEY_state)
8535 {
8536 *d = '\0';
8537 /* diag_listed_as: Missing name in "%s sub" */
8538 Perl_croak(aTHX_
8539 "Missing name in \"%s\"", PL_bufptr);
8540 }
09bef843
SB
8541 PL_expect = XTERMBLOCK;
8542 attrful = XATTRTERM;
76f68e9b 8543 sv_setpvs(PL_subname,"?");
09bef843 8544 have_name = FALSE;
463ee0b2 8545 }
4633a7c4 8546
09bef843 8547 if (key == KEY_format) {
5db06880 8548#ifdef PERL_MAD
cd81e915 8549 PL_thistoken = subtoken;
5db06880
NC
8550 s = d;
8551#else
09bef843 8552 if (have_name)
b1b65b59
JH
8553 (void) force_word(PL_oldbufptr + tboffset, WORD,
8554 FALSE, TRUE, TRUE);
5db06880 8555#endif
64a40898 8556 PREBLOCK(FORMAT);
09bef843 8557 }
79072805 8558
09bef843
SB
8559 /* Look for a prototype */
8560 if (*s == '(') {
d9f2850e
RGS
8561 char *p;
8562 bool bad_proto = FALSE;
9e8d7757
RB
8563 bool in_brackets = FALSE;
8564 char greedy_proto = ' ';
8565 bool proto_after_greedy_proto = FALSE;
8566 bool must_be_last = FALSE;
8567 bool underscore = FALSE;
aef2a98a 8568 bool seen_underscore = FALSE;
197afce1 8569 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8570 STRLEN tmplen;
09bef843 8571
4d68ffa0 8572 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
37fd879b 8573 if (!s)
09bef843 8574 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8575 /* strip spaces and check for bad characters */
dab1c735 8576 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8577 tmp = 0;
dab1c735 8578 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8579 if (!isSPACE(*p)) {
dab1c735 8580 d[tmp++] = *p;
9e8d7757 8581
197afce1 8582 if (warnillegalproto) {
9e8d7757
RB
8583 if (must_be_last)
8584 proto_after_greedy_proto = TRUE;
dab1c735 8585 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8586 bad_proto = TRUE;
8587 }
8588 else {
8589 if ( underscore ) {
34daab0f 8590 if ( !strchr(";@%", *p) )
9e8d7757
RB
8591 bad_proto = TRUE;
8592 underscore = FALSE;
8593 }
8594 if ( *p == '[' ) {
8595 in_brackets = TRUE;
8596 }
8597 else if ( *p == ']' ) {
8598 in_brackets = FALSE;
8599 }
8600 else if ( (*p == '@' || *p == '%') &&
8601 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8602 !in_brackets ) {
8603 must_be_last = TRUE;
8604 greedy_proto = *p;
8605 }
8606 else if ( *p == '_' ) {
aef2a98a 8607 underscore = seen_underscore = TRUE;
9e8d7757
RB
8608 }
8609 }
8610 }
d37a9538 8611 }
09bef843 8612 }
dab1c735 8613 d[tmp] = '\0';
9e8d7757 8614 if (proto_after_greedy_proto)
197afce1 8615 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8616 "Prototype after '%c' for %"SVf" : %s",
8617 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8618 if (bad_proto) {
8619 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8620 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8621 "Illegal character %sin prototype for %"SVf" : %s",
8622 seen_underscore ? "after '_' " : "",
dab1c735 8623 SVfARG(PL_subname),
97eb901d
BF
8624 SvUTF8(PL_lex_stuff)
8625 ? sv_uni_display(dsv,
8626 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8627 tmp,
8628 UNI_DISPLAY_ISPRINT)
8629 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8630 PERL_PV_ESCAPE_NONASCII));
dab1c735
BF
8631 }
8632 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8633 have_proto = TRUE;
68dc0745 8634
5db06880
NC
8635#ifdef PERL_MAD
8636 start_force(0);
cd81e915 8637 CURMAD('q', PL_thisopen);
5db06880 8638 CURMAD('_', tmpwhite);
cd81e915
NC
8639 CURMAD('=', PL_thisstuff);
8640 CURMAD('Q', PL_thisclose);
5db06880
NC
8641 NEXTVAL_NEXTTOKE.opval =
8642 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8643 PL_lex_stuff = NULL;
5db06880
NC
8644 force_next(THING);
8645
8646 s = SKIPSPACE2(s,tmpwhite);
8647#else
09bef843 8648 s = skipspace(s);
5db06880 8649#endif
4633a7c4 8650 }
09bef843
SB
8651 else
8652 have_proto = FALSE;
8653
8654 if (*s == ':' && s[1] != ':')
8655 PL_expect = attrful;
8e742a20
MHM
8656 else if (*s != '{' && key == KEY_sub) {
8657 if (!have_name)
8658 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8659 else if (*s != ';' && *s != '}')
be2597df 8660 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8661 }
09bef843 8662
5db06880
NC
8663#ifdef PERL_MAD
8664 start_force(0);
8665 if (tmpwhite) {
8666 if (PL_madskills)
6b29d1f5 8667 curmad('^', newSVpvs(""));
5db06880
NC
8668 CURMAD('_', tmpwhite);
8669 }
8670 force_next(0);
8671
cd81e915 8672 PL_thistoken = subtoken;
5db06880 8673#else
09bef843 8674 if (have_proto) {
9ded7720 8675 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8676 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8677 PL_lex_stuff = NULL;
09bef843 8678 force_next(THING);
68dc0745 8679 }
5db06880 8680#endif
09bef843 8681 if (!have_name) {
49a54bbe
NC
8682 if (PL_curstash)
8683 sv_setpvs(PL_subname, "__ANON__");
8684 else
8685 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8686 TOKEN(ANONSUB);
4633a7c4 8687 }
5db06880 8688#ifndef PERL_MAD
4210d3f1 8689 force_ident_maybe_lex('&');
5db06880 8690#endif
09bef843 8691 TOKEN(SUB);
4633a7c4 8692 }
79072805
LW
8693
8694 case KEY_system:
a0d0e21e 8695 LOP(OP_SYSTEM,XREF);
79072805
LW
8696
8697 case KEY_symlink:
a0d0e21e 8698 LOP(OP_SYMLINK,XTERM);
79072805
LW
8699
8700 case KEY_syscall:
a0d0e21e 8701 LOP(OP_SYSCALL,XTERM);
79072805 8702
c07a80fd 8703 case KEY_sysopen:
8704 LOP(OP_SYSOPEN,XTERM);
8705
137443ea 8706 case KEY_sysseek:
8707 LOP(OP_SYSSEEK,XTERM);
8708
79072805 8709 case KEY_sysread:
a0d0e21e 8710 LOP(OP_SYSREAD,XTERM);
79072805
LW
8711
8712 case KEY_syswrite:
a0d0e21e 8713 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8714
8715 case KEY_tr:
8ce4b50f 8716 case KEY_y:
79072805
LW
8717 s = scan_trans(s);
8718 TERM(sublex_start());
8719
8720 case KEY_tell:
8721 UNI(OP_TELL);
8722
8723 case KEY_telldir:
8724 UNI(OP_TELLDIR);
8725
463ee0b2 8726 case KEY_tie:
a0d0e21e 8727 LOP(OP_TIE,XTERM);
463ee0b2 8728
c07a80fd 8729 case KEY_tied:
8730 UNI(OP_TIED);
8731
79072805
LW
8732 case KEY_time:
8733 FUN0(OP_TIME);
8734
8735 case KEY_times:
8736 FUN0(OP_TMS);
8737
8738 case KEY_truncate:
a0d0e21e 8739 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8740
8741 case KEY_uc:
8742 UNI(OP_UC);
8743
8744 case KEY_ucfirst:
8745 UNI(OP_UCFIRST);
8746
463ee0b2
LW
8747 case KEY_untie:
8748 UNI(OP_UNTIE);
8749
79072805 8750 case KEY_until:
78cdf107
Z
8751 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8752 return REPORT(0);
6154021b 8753 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8754 OPERATOR(UNTIL);
8755
8756 case KEY_unless:
78cdf107
Z
8757 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8758 return REPORT(0);
6154021b 8759 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8760 OPERATOR(UNLESS);
8761
8762 case KEY_unlink:
a0d0e21e 8763 LOP(OP_UNLINK,XTERM);
79072805
LW
8764
8765 case KEY_undef:
6f33ba73 8766 UNIDOR(OP_UNDEF);
79072805
LW
8767
8768 case KEY_unpack:
a0d0e21e 8769 LOP(OP_UNPACK,XTERM);
79072805
LW
8770
8771 case KEY_utime:
a0d0e21e 8772 LOP(OP_UTIME,XTERM);
79072805
LW
8773
8774 case KEY_umask:
6f33ba73 8775 UNIDOR(OP_UMASK);
79072805
LW
8776
8777 case KEY_unshift:
a0d0e21e
LW
8778 LOP(OP_UNSHIFT,XTERM);
8779
8780 case KEY_use:
468aa647 8781 s = tokenize_use(1, s);
a0d0e21e 8782 OPERATOR(USE);
79072805
LW
8783
8784 case KEY_values:
8785 UNI(OP_VALUES);
8786
8787 case KEY_vec:
a0d0e21e 8788 LOP(OP_VEC,XTERM);
79072805 8789
0d863452 8790 case KEY_when:
78cdf107
Z
8791 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8792 return REPORT(0);
6154021b 8793 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8794 OPERATOR(WHEN);
8795
79072805 8796 case KEY_while:
78cdf107
Z
8797 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8798 return REPORT(0);
6154021b 8799 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8800 OPERATOR(WHILE);
8801
8802 case KEY_warn:
3280af22 8803 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8804 LOP(OP_WARN,XTERM);
79072805
LW
8805
8806 case KEY_wait:
8807 FUN0(OP_WAIT);
8808
8809 case KEY_waitpid:
a0d0e21e 8810 LOP(OP_WAITPID,XTERM);
79072805
LW
8811
8812 case KEY_wantarray:
8813 FUN0(OP_WANTARRAY);
8814
8815 case KEY_write:
9d116dd7
JH
8816#ifdef EBCDIC
8817 {
df3728a2
JH
8818 char ctl_l[2];
8819 ctl_l[0] = toCTRL('L');
8820 ctl_l[1] = '\0';
fafc274c 8821 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8822 }
8823#else
fafc274c
NC
8824 /* Make sure $^L is defined */
8825 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8826#endif
79072805
LW
8827 UNI(OP_ENTERWRITE);
8828
8829 case KEY_x:
78cdf107
Z
8830 if (PL_expect == XOPERATOR) {
8831 if (*s == '=' && !PL_lex_allbrackets &&
8832 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8833 return REPORT(0);
79072805 8834 Mop(OP_REPEAT);
78cdf107 8835 }
79072805
LW
8836 check_uni();
8837 goto just_a_word;
8838
a0d0e21e 8839 case KEY_xor:
78cdf107
Z
8840 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8841 return REPORT(0);
6154021b 8842 pl_yylval.ival = OP_XOR;
a0d0e21e 8843 OPERATOR(OROP);
79072805 8844 }
49dc05e3 8845 }}
79072805 8846}
bf4acbe4
GS
8847#ifdef __SC__
8848#pragma segment Main
8849#endif
79072805 8850
3875fc11
FC
8851/*
8852 S_pending_ident
8853
8854 Looks up an identifier in the pad or in a package
8855
8856 Returns:
8857 PRIVATEREF if this is a lexical name.
8858 WORD if this belongs to a package.
8859
8860 Structure:
8861 if we're in a my declaration
8862 croak if they tried to say my($foo::bar)
8863 build the ops for a my() declaration
8864 if it's an access to a my() variable
8865 build ops for access to a my() variable
8866 if in a dq string, and they've said @foo and we can't find @foo
8867 warn
8868 build ops for a bareword
8869*/
8870
3f33d153
FC
8871static int
8872S_pending_ident(pTHX)
8eceec63 8873{
97aff369 8874 dVAR;
bbd11bfc 8875 PADOFFSET tmp = 0;
3f33d153 8876 const char pit = (char)pl_yylval.ival;
9bde8eb0
NC
8877 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8878 /* All routes through this function want to know if there is a colon. */
c099d646 8879 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63 8880
3f33d153
FC
8881 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8882 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8883
8884 /* if we're in a my(), we can't allow dynamics here.
8885 $foo'bar has already been turned into $foo::bar, so
8886 just check for colons.
8887
8888 if it's a legal name, the OP is a PADANY.
8889 */
8890 if (PL_in_my) {
8891 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8892 if (has_colon)
4bca4ee0 8893 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8eceec63 8894 "variable %s in \"our\"",
4bca4ee0 8895 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
bc9b26ca 8896 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8897 }
8898 else {
9bde8eb0 8899 if (has_colon)
58576270
BF
8900 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8901 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8902 UTF ? SVf_UTF8 : 0);
8eceec63 8903
3f33d153
FC
8904 pl_yylval.opval = newOP(OP_PADANY, 0);
8905 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
bc9b26ca 8906 UTF ? SVf_UTF8 : 0);
3f33d153 8907 return PRIVATEREF;
8eceec63
SC
8908 }
8909 }
8910
8911 /*
8912 build the ops for accesses to a my() variable.
8eceec63
SC
8913 */
8914
9bde8eb0 8915 if (!has_colon) {
8716503d 8916 if (!PL_in_my)
bc9b26ca
BF
8917 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8918 UTF ? SVf_UTF8 : 0);
8716503d 8919 if (tmp != NOT_IN_PAD) {
8eceec63 8920 /* might be an "our" variable" */
00b1698f 8921 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8922 /* build ops for a bareword */
b64e5050
AL
8923 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8924 HEK * const stashname = HvNAME_HEK(stash);
8925 SV * const sym = newSVhek(stashname);
396482e1 8926 sv_catpvs(sym, "::");
2a33114a 8927 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
3f33d153
FC
8928 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8929 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
8930 if (pit != '&')
8931 gv_fetchsv(sym,
8eceec63
SC
8932 (PL_in_eval
8933 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8934 : GV_ADDMULTI
8eceec63
SC
8935 ),
8936 ((PL_tokenbuf[0] == '$') ? SVt_PV
8937 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8938 : SVt_PVHV));
3f33d153 8939 return WORD;
8eceec63
SC
8940 }
8941
3f33d153
FC
8942 pl_yylval.opval = newOP(OP_PADANY, 0);
8943 pl_yylval.opval->op_targ = tmp;
8944 return PRIVATEREF;
8eceec63
SC
8945 }
8946 }
8947
8948 /*
8949 Whine if they've said @foo in a doublequoted string,
8950 and @foo isn't a variable we can find in the symbol
8951 table.
8952 */
d824713b
NC
8953 if (ckWARN(WARN_AMBIGUOUS) &&
8954 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8955 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8956 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8957 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8958 /* DO NOT warn for @- and @+ */
8959 && !( PL_tokenbuf[2] == '\0' &&
8960 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8961 )
8eceec63
SC
8962 {
8963 /* Downgraded from fatal to warning 20000522 mjd */
d824713b 8964 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
29fb1d0e
BF
8965 "Possible unintended interpolation of %"SVf" in string",
8966 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8967 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8eceec63
SC
8968 }
8969 }
8970
8971 /* build ops for a bareword */
3f33d153
FC
8972 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8973 newSVpvn_flags(PL_tokenbuf + 1,
0be4d16f
BF
8974 tokenbuf_len - 1,
8975 UTF ? SVf_UTF8 : 0 ));
3f33d153 8976 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
8977 if (pit != '&')
8978 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8979 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8980 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8981 ((PL_tokenbuf[0] == '$') ? SVt_PV
8982 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8983 : SVt_PVHV));
3f33d153 8984 return WORD;
8eceec63
SC
8985}
8986
76e3520e 8987STATIC void
c94115d8 8988S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8989{
97aff369 8990 dVAR;
2f3197b3 8991
7918f24d
NC
8992 PERL_ARGS_ASSERT_CHECKCOMMA;
8993
d008e5eb 8994 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8995 if (ckWARN(WARN_SYNTAX)) {
8996 int level = 1;
26ff0806 8997 const char *w;
d008e5eb
GS
8998 for (w = s+2; *w && level; w++) {
8999 if (*w == '(')
9000 ++level;
9001 else if (*w == ')')
9002 --level;
9003 }
888fea98
NC
9004 while (isSPACE(*w))
9005 ++w;
b1439985
RGS
9006 /* the list of chars below is for end of statements or
9007 * block / parens, boolean operators (&&, ||, //) and branch
9008 * constructs (or, and, if, until, unless, while, err, for).
9009 * Not a very solid hack... */
9010 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 9011 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 9012 "%s (...) interpreted as function",name);
d008e5eb 9013 }
2f3197b3 9014 }
3280af22 9015 while (s < PL_bufend && isSPACE(*s))
2f3197b3 9016 s++;
a687059c
LW
9017 if (*s == '(')
9018 s++;
3280af22 9019 while (s < PL_bufend && isSPACE(*s))
a687059c 9020 s++;
7e2040f0 9021 if (isIDFIRST_lazy_if(s,UTF)) {
d0fb66e4
BF
9022 const char * const w = s;
9023 s += UTF ? UTF8SKIP(s) : 1;
8a2bca12 9024 while (isWORDCHAR_lazy_if(s,UTF))
d0fb66e4 9025 s += UTF ? UTF8SKIP(s) : 1;
3280af22 9026 while (s < PL_bufend && isSPACE(*s))
a687059c 9027 s++;
e929a76b 9028 if (*s == ',') {
c94115d8 9029 GV* gv;
5458a98a 9030 if (keyword(w, s - w, 0))
e929a76b 9031 return;
c94115d8 9032
2e38bce1 9033 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 9034 if (gv && GvCVu(gv))
abbb3198 9035 return;
cea2e8a9 9036 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
9037 }
9038 }
9039}
9040
14ca8ff4 9041/* Either returns sv, or mortalizes/frees sv and returns a new SV*.
423cee85
JH
9042 Best used as sv=new_constant(..., sv, ...).
9043 If s, pv are NULL, calls subroutine with one argument,
107160e2
KW
9044 and <type> is used with error messages only.
9045 <type> is assumed to be well formed UTF-8 */
423cee85 9046
b3ac6de7 9047STATIC SV *
eb0d8d16
NC
9048S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9049 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 9050{
27da23d5 9051 dVAR; dSP;
fbb93542 9052 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 9053 SV *res;
eed484f9 9054 SV *errsv = NULL;
b3ac6de7
IZ
9055 SV **cvp;
9056 SV *cv, *typesv;
89e33a05 9057 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 9058
7918f24d 9059 PERL_ARGS_ASSERT_NEW_CONSTANT;
f374c70f
FC
9060 /* We assume that this is true: */
9061 if (*key == 'c') { assert (strEQ(key, "charnames")); }
bb4784f0 9062 assert(type || s);
7918f24d 9063
f8988b41 9064 /* charnames doesn't work well if there have been errors found */
f374c70f 9065 if (PL_error_count > 0 && *key == 'c')
14ca8ff4
FC
9066 {
9067 SvREFCNT_dec_NN(sv);
f8988b41 9068 return &PL_sv_undef;
14ca8ff4 9069 }
f8988b41 9070
5f7f7af5 9071 sv_2mortal(sv); /* Parent created it permanently */
fbb93542
KW
9072 if (!table
9073 || ! (PL_hints & HINT_LOCALIZE_HH)
9074 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9075 || ! SvOK(*cvp))
9076 {
5f7f7af5 9077 char *msg;
423cee85 9078
fbb93542
KW
9079 /* Here haven't found what we're looking for. If it is charnames,
9080 * perhaps it needs to be loaded. Try doing that before giving up */
f374c70f 9081 if (*key == 'c') {
fbb93542
KW
9082 Perl_load_module(aTHX_
9083 0,
9084 newSVpvs("_charnames"),
9085 /* version parameter; no need to specify it, as if
9086 * we get too early a version, will fail anyway,
9087 * not being able to find '_charnames' */
9088 NULL,
9089 newSVpvs(":full"),
9090 newSVpvs(":short"),
9091 NULL);
9092 SPAGAIN;
9093 table = GvHV(PL_hintgv);
9094 if (table
9095 && (PL_hints & HINT_LOCALIZE_HH)
9096 && (cvp = hv_fetch(table, key, keylen, FALSE))
9097 && SvOK(*cvp))
9098 {
9099 goto now_ok;
9100 }
9101 }
9102 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
5f7f7af5 9103 msg = Perl_form(aTHX_
bb4784f0
FC
9104 "Constant(%.*s) unknown",
9105 (int)(type ? typelen : len),
9106 (type ? type: s));
fbb93542
KW
9107 }
9108 else {
3918902d
KW
9109 why1 = "$^H{";
9110 why2 = key;
9111 why3 = "} is not defined";
9112 report:
f374c70f 9113 if (*key == 'c') {
5f7f7af5 9114 msg = Perl_form(aTHX_
bde9e88d
KW
9115 /* The +3 is for '\N{'; -4 for that, plus '}' */
9116 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
5f7f7af5 9117 );
90249f0a
KW
9118 }
9119 else {
5f7f7af5 9120 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
bb4784f0
FC
9121 (int)(type ? typelen : len),
9122 (type ? type: s), why1, why2, why3);
90249f0a 9123 }
3918902d 9124 }
5f7f7af5
FC
9125 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9126 return SvREFCNT_inc_simple_NN(sv);
423cee85 9127 }
fbb93542 9128now_ok:
b3ac6de7 9129 cv = *cvp;
423cee85 9130 if (!pv && s)
59cd0e26 9131 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 9132 if (type && pv)
59cd0e26 9133 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 9134 else
423cee85 9135 typesv = &PL_sv_undef;
4e553d73 9136
e788e7d3 9137 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
9138 ENTER ;
9139 SAVETMPS;
4e553d73 9140
423cee85 9141 PUSHMARK(SP) ;
a5845cb7 9142 EXTEND(sp, 3);
423cee85
JH
9143 if (pv)
9144 PUSHs(pv);
b3ac6de7 9145 PUSHs(sv);
423cee85
JH
9146 if (pv)
9147 PUSHs(typesv);
b3ac6de7 9148 PUTBACK;
423cee85 9149 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 9150
423cee85 9151 SPAGAIN ;
4e553d73 9152
423cee85 9153 /* Check the eval first */
eed484f9 9154 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
c06ecf4f
DD
9155 STRLEN errlen;
9156 const char * errstr;
eed484f9
DD
9157 sv_catpvs(errsv, "Propagated");
9158 errstr = SvPV_const(errsv, errlen);
c06ecf4f 9159 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
e1f15930 9160 (void)POPs;
ae5c22c1 9161 res = SvREFCNT_inc_simple_NN(sv);
423cee85
JH
9162 }
9163 else {
9164 res = POPs;
ae5c22c1 9165 SvREFCNT_inc_simple_void_NN(res);
423cee85 9166 }
4e553d73 9167
423cee85
JH
9168 PUTBACK ;
9169 FREETMPS ;
9170 LEAVE ;
b3ac6de7 9171 POPSTACK;
4e553d73 9172
b3ac6de7 9173 if (!SvOK(res)) {
423cee85
JH
9174 why1 = "Call to &{$^H{";
9175 why2 = key;
f0af216f 9176 why3 = "}} did not return a defined value";
423cee85 9177 sv = res;
5f7f7af5 9178 (void)sv_2mortal(sv);
423cee85 9179 goto report;
9b0e499b 9180 }
423cee85 9181
9b0e499b 9182 return res;
b3ac6de7 9183}
4e553d73 9184
d0a148a6
NC
9185/* Returns a NUL terminated string, with the length of the string written to
9186 *slp
9187 */
76e3520e 9188STATIC char *
5aaab254 9189S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 9190{
97aff369 9191 dVAR;
eb578fdb
KW
9192 char *d = dest;
9193 char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
9194
9195 PERL_ARGS_ASSERT_SCAN_WORD;
9196
463ee0b2 9197 for (;;) {
8903cb82 9198 if (d >= e)
cea2e8a9 9199 Perl_croak(aTHX_ ident_too_long);
0eb30aeb 9200 if (isWORDCHAR(*s)
15861f94
KW
9201 || (!UTF && isALPHANUMERIC_L1(*s))) /* UTF handled below */
9202 {
463ee0b2 9203 *d++ = *s++;
15861f94 9204 }
c35e046a 9205 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
9206 *d++ = ':';
9207 *d++ = ':';
9208 s++;
9209 }
c35e046a 9210 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
9211 *d++ = *s++;
9212 *d++ = *s++;
9213 }
0eb30aeb 9214 else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) {
a0ed51b3 9215 char *t = s + UTF8SKIP(s);
c35e046a 9216 size_t len;
7dbf68d2 9217 while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
a0ed51b3 9218 t += UTF8SKIP(t);
c35e046a
AL
9219 len = t - s;
9220 if (d + len > e)
cea2e8a9 9221 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
9222 Copy(s, d, len, char);
9223 d += len;
a0ed51b3
LW
9224 s = t;
9225 }
463ee0b2
LW
9226 else {
9227 *d = '\0';
9228 *slp = d - dest;
9229 return s;
e929a76b 9230 }
378cc40b
LW
9231 }
9232}
9233
76e3520e 9234STATIC char *
5aaab254 9235S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 9236{
97aff369 9237 dVAR;
6136c704 9238 char *bracket = NULL;
748a9306 9239 char funny = *s++;
eb578fdb
KW
9240 char *d = dest;
9241 char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 9242
7918f24d
NC
9243 PERL_ARGS_ASSERT_SCAN_IDENT;
9244
a0d0e21e 9245 if (isSPACE(*s))
29595ff2 9246 s = PEEKSPACE(s);
de3bb511 9247 if (isDIGIT(*s)) {
8903cb82 9248 while (isDIGIT(*s)) {
9249 if (d >= e)
cea2e8a9 9250 Perl_croak(aTHX_ ident_too_long);
378cc40b 9251 *d++ = *s++;
8903cb82 9252 }
378cc40b
LW
9253 }
9254 else {
463ee0b2 9255 for (;;) {
8903cb82 9256 if (d >= e)
cea2e8a9 9257 Perl_croak(aTHX_ ident_too_long);
0eb30aeb 9258 if (isWORDCHAR(*s)) /* UTF handled below */
463ee0b2 9259 *d++ = *s++;
7e2040f0 9260 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
9261 *d++ = ':';
9262 *d++ = ':';
9263 s++;
9264 }
a0d0e21e 9265 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
9266 *d++ = *s++;
9267 *d++ = *s++;
9268 }
0eb30aeb 9269 else if (UTF && UTF8_IS_START(*s) && isWORDCHAR_utf8((U8*)s)) {
a0ed51b3 9270 char *t = s + UTF8SKIP(s);
7dbf68d2 9271 while (UTF8_IS_CONTINUED(*t) && _is_utf8_mark((U8*)t))
a0ed51b3
LW
9272 t += UTF8SKIP(t);
9273 if (d + (t - s) > e)
cea2e8a9 9274 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
9275 Copy(s, d, t - s, char);
9276 d += t - s;
9277 s = t;
9278 }
463ee0b2
LW
9279 else
9280 break;
9281 }
378cc40b
LW
9282 }
9283 *d = '\0';
9284 d = dest;
79072805 9285 if (*d) {
3280af22
NIS
9286 if (PL_lex_state != LEX_NORMAL)
9287 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 9288 return s;
378cc40b 9289 }
748a9306 9290 if (*s == '$' && s[1] &&
8a2bca12 9291 (isWORDCHAR_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 9292 {
4810e5ec 9293 return s;
5cd24f17 9294 }
79072805
LW
9295 if (*s == '{') {
9296 bracket = s;
9297 s++;
9298 }
204e6232
BF
9299 if (s < send) {
9300 if (UTF) {
9301 const STRLEN skip = UTF8SKIP(s);
9302 STRLEN i;
9303 d[skip] = '\0';
9304 for ( i = 0; i < skip; i++ )
9305 d[i] = *s++;
9306 }
9307 else {
9308 *d = *s++;
9309 d[1] = '\0';
9310 }
9311 }
2b92dfce 9312 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 9313 *d = toCTRL(*s);
9314 s++;
de3bb511 9315 }
fbdd83da
DIM
9316 else if (ck_uni && !bracket)
9317 check_uni();
79072805 9318 if (bracket) {
748a9306 9319 if (isSPACE(s[-1])) {
fa83b5b6 9320 while (s < send) {
f54cb97a 9321 const char ch = *s++;
bf4acbe4 9322 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 9323 *d = ch;
9324 break;
9325 }
9326 }
748a9306 9327 }
7e2040f0 9328 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 9329 d += UTF8SKIP(d);
a0ed51b3 9330 if (UTF) {
6136c704 9331 char *end = s;
8a2bca12 9332 while ((end < send && isWORDCHAR_lazy_if(end,UTF)) || *end == ':') {
6136c704 9333 end += UTF8SKIP(end);
7dbf68d2 9334 while (end < send && UTF8_IS_CONTINUED(*end) && _is_utf8_mark((U8*)end))
6136c704 9335 end += UTF8SKIP(end);
a0ed51b3 9336 }
6136c704
AL
9337 Copy(s, d, end - s, char);
9338 d += end - s;
9339 s = end;
a0ed51b3
LW
9340 }
9341 else {
0eb30aeb 9342 while ((isWORDCHAR(*s) || *s == ':') && d < e)
a0ed51b3 9343 *d++ = *s++;
2b92dfce 9344 if (d >= e)
cea2e8a9 9345 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 9346 }
79072805 9347 *d = '\0';
c35e046a
AL
9348 while (s < send && SPACE_OR_TAB(*s))
9349 s++;
ff68c719 9350 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 9351 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
9352 const char * const brack =
9353 (const char *)
9354 ((*s == '[') ? "[...]" : "{...}");
e850844c 9355 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 9356 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 9357 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
9358 funny, dest, brack, funny, dest, brack);
9359 }
79072805 9360 bracket++;
a0be28da 9361 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 9362 PL_lex_allbrackets++;
79072805
LW
9363 return s;
9364 }
4e553d73
NIS
9365 }
9366 /* Handle extended ${^Foo} variables
2b92dfce 9367 * 1999-02-27 mjd-perl-patch@plover.com */
0eb30aeb
KW
9368 else if (!isWORDCHAR(*d) && !isPRINT(*d) /* isCTRL(d) */
9369 && isWORDCHAR(*s))
2b92dfce
GS
9370 {
9371 d++;
0eb30aeb 9372 while (isWORDCHAR(*s) && d < e) {
2b92dfce
GS
9373 *d++ = *s++;
9374 }
9375 if (d >= e)
cea2e8a9 9376 Perl_croak(aTHX_ ident_too_long);
2b92dfce 9377 *d = '\0';
79072805
LW
9378 }
9379 if (*s == '}') {
9380 s++;
7df0d042 9381 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 9382 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
9383 PL_expect = XREF;
9384 }
d008e5eb 9385 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 9386 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241 9387 (keyword(dest, d - dest, 0)
5c66c3dd 9388 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
d008e5eb 9389 {
5c66c3dd
BF
9390 SV *tmp = newSVpvn_flags( dest, d - dest,
9391 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
c35e046a
AL
9392 if (funny == '#')
9393 funny = '@';
9014280d 9394 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5c66c3dd
BF
9395 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9396 funny, tmp, funny, tmp);
d008e5eb
GS
9397 }
9398 }
79072805
LW
9399 }
9400 else {
9401 s = bracket; /* let the parser handle it */
93a17b20 9402 *dest = '\0';
79072805
LW
9403 }
9404 }
3280af22
NIS
9405 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9406 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
9407 return s;
9408}
9409
858a358b 9410static bool
3955e1a9 9411S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
9412
9413 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9414 * the parse starting at 's', based on the subset that are valid in this
9415 * context input to this routine in 'valid_flags'. Advances s. Returns
96f3bfda
KW
9416 * TRUE if the input should be treated as a valid flag, so the next char
9417 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9418 * first call on the current regex. This routine will set it to any
9419 * charset modifier found. The caller shouldn't change it. This way,
9420 * another charset modifier encountered in the parse can be detected as an
9421 * error, as we have decided to allow only one */
858a358b
KW
9422
9423 const char c = **s;
84159251 9424 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
94b03d7d 9425
84159251 9426 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8a2bca12 9427 if (isWORDCHAR_lazy_if(*s, UTF)) {
4f8dbb2d 9428 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
84159251
BF
9429 UTF ? SVf_UTF8 : 0);
9430 (*s) += charlen;
96f3bfda
KW
9431 /* Pretend that it worked, so will continue processing before
9432 * dieing */
0da72d5e 9433 return TRUE;
858a358b
KW
9434 }
9435 return FALSE;
9436 }
9437
9438 switch (c) {
94b03d7d 9439
858a358b
KW
9440 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9441 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9442 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9443 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9444 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9445 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d 9446 case LOCALE_PAT_MOD:
3955e1a9
KW
9447 if (*charset) {
9448 goto multiple_charsets;
9449 }
94b03d7d 9450 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9451 *charset = c;
94b03d7d
KW
9452 break;
9453 case UNICODE_PAT_MOD:
3955e1a9
KW
9454 if (*charset) {
9455 goto multiple_charsets;
9456 }
94b03d7d 9457 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9458 *charset = c;
94b03d7d
KW
9459 break;
9460 case ASCII_RESTRICT_PAT_MOD:
ff3f26d2 9461 if (! *charset) {
94b03d7d
KW
9462 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9463 }
ff3f26d2
KW
9464 else {
9465
9466 /* Error if previous modifier wasn't an 'a', but if it was, see
9467 * if, and accept, a second occurrence (only) */
9468 if (*charset != 'a'
9469 || get_regex_charset(*pmfl)
9470 != REGEX_ASCII_RESTRICTED_CHARSET)
9471 {
9472 goto multiple_charsets;
9473 }
9474 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9475 }
9476 *charset = c;
94b03d7d
KW
9477 break;
9478 case DEPENDS_PAT_MOD:
3955e1a9
KW
9479 if (*charset) {
9480 goto multiple_charsets;
9481 }
94b03d7d 9482 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9483 *charset = c;
94b03d7d 9484 break;
879d0c72 9485 }
94b03d7d 9486
858a358b
KW
9487 (*s)++;
9488 return TRUE;
94b03d7d 9489
3955e1a9
KW
9490 multiple_charsets:
9491 if (*charset != c) {
9492 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9493 }
ff3f26d2
KW
9494 else if (c == 'a') {
9495 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9496 }
3955e1a9
KW
9497 else {
9498 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9499 }
9500
9501 /* Pretend that it worked, so will continue processing before dieing */
9502 (*s)++;
9503 return TRUE;
879d0c72
NC
9504}
9505
76e3520e 9506STATIC char *
cea2e8a9 9507S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9508{
97aff369 9509 dVAR;
79072805 9510 PMOP *pm;
4d68ffa0
KW
9511 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing,
9512 TRUE /* look for escaped bracketed metas */ );
10edeb5d 9513 const char * const valid_flags =
a20207d7 9514 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9515 char charset = '\0'; /* character set modifier */
5db06880
NC
9516#ifdef PERL_MAD
9517 char *modstart;
9518#endif
9519
7918f24d 9520 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9521
d24ca0c5
DM
9522 /* this was only needed for the initial scan_str; set it to false
9523 * so that any (?{}) code blocks etc are parsed normally */
9524 PL_reg_state.re_reparsing = FALSE;
25c09cbf 9525 if (!s) {
6136c704 9526 const char * const delimiter = skipspace(start);
10edeb5d
JH
9527 Perl_croak(aTHX_
9528 (const char *)
9529 (*delimiter == '?'
9530 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9531 : "Search pattern not terminated" ));
25c09cbf 9532 }
bbce6d69 9533
8782bef2 9534 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9535 if (PL_multi_open == '?') {
9536 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9537 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9538
9539 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9540 allows us to restrict the list needed by reset to just the ??
9541 matches. */
9542 assert(type != OP_TRANS);
9543 if (PL_curstash) {
daba3364 9544 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9545 U32 elements;
9546 if (!mg) {
daba3364 9547 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9548 0);
9549 }
9550 elements = mg->mg_len / sizeof(PMOP**);
9551 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9552 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9553 mg->mg_len = elements * sizeof(PMOP**);
9554 PmopSTASH_set(pm,PL_curstash);
9555 }
9556 }
5db06880
NC
9557#ifdef PERL_MAD
9558 modstart = s;
9559#endif
d63c20f2
DM
9560
9561 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9562 * anon CV. False positives like qr/[(?{]/ are harmless */
9563
9564 if (type == OP_QR) {
6f635923
DM
9565 STRLEN len;
9566 char *e, *p = SvPV(PL_lex_stuff, len);
9567 e = p + len;
9568 for (; p < e; p++) {
d63c20f2
DM
9569 if (p[0] == '(' && p[1] == '?'
9570 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9571 {
9572 pm->op_pmflags |= PMf_HAS_CV;
9573 break;
9574 }
9575 }
6f635923 9576 pm->op_pmflags |= PMf_IS_QR;
d63c20f2
DM
9577 }
9578
3955e1a9 9579 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9580#ifdef PERL_MAD
9581 if (PL_madskills && modstart != s) {
9582 SV* tmptoken = newSVpvn(modstart, s - modstart);
9583 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9584 }
9585#endif
4ac733c9 9586 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9587 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9588 {
a2a5de95
NC
9589 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9590 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9591 }
9592
3280af22 9593 PL_lex_op = (OP*)pm;
6154021b 9594 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9595 return s;
9596}
9597
76e3520e 9598STATIC char *
cea2e8a9 9599S_scan_subst(pTHX_ char *start)
79072805 9600{
27da23d5 9601 dVAR;
22594288 9602 char *s;
eb578fdb 9603 PMOP *pm;
4fdae800 9604 I32 first_start;
79072805 9605 I32 es = 0;
3955e1a9 9606 char charset = '\0'; /* character set modifier */
5db06880
NC
9607#ifdef PERL_MAD
9608 char *modstart;
9609#endif
79072805 9610
7918f24d
NC
9611 PERL_ARGS_ASSERT_SCAN_SUBST;
9612
6154021b 9613 pl_yylval.ival = OP_NULL;
79072805 9614
4d68ffa0
KW
9615 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9616 TRUE /* look for escaped bracketed metas */ );
79072805 9617
37fd879b 9618 if (!s)
cea2e8a9 9619 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9620
3280af22 9621 if (s[-1] == PL_multi_open)
79072805 9622 s--;
5db06880
NC
9623#ifdef PERL_MAD
9624 if (PL_madskills) {
cd81e915
NC
9625 CURMAD('q', PL_thisopen);
9626 CURMAD('_', PL_thiswhite);
9627 CURMAD('E', PL_thisstuff);
9628 CURMAD('Q', PL_thisclose);
9629 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9630 }
9631#endif
79072805 9632
3280af22 9633 first_start = PL_multi_start;
4d68ffa0 9634 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 9635 if (!s) {
37fd879b 9636 if (PL_lex_stuff) {
3280af22 9637 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9638 PL_lex_stuff = NULL;
37fd879b 9639 }
cea2e8a9 9640 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9641 }
3280af22 9642 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9643
79072805 9644 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9645
9646#ifdef PERL_MAD
9647 if (PL_madskills) {
cd81e915
NC
9648 CURMAD('z', PL_thisopen);
9649 CURMAD('R', PL_thisstuff);
9650 CURMAD('Z', PL_thisclose);
5db06880
NC
9651 }
9652 modstart = s;
9653#endif
9654
48c036b1 9655 while (*s) {
a20207d7 9656 if (*s == EXEC_PAT_MOD) {
a687059c 9657 s++;
2f3197b3 9658 es++;
a687059c 9659 }
3955e1a9
KW
9660 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9661 {
48c036b1 9662 break;
aa78b661 9663 }
378cc40b 9664 }
79072805 9665
5db06880
NC
9666#ifdef PERL_MAD
9667 if (PL_madskills) {
9668 if (modstart != s)
9669 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9670 append_madprops(PL_thismad, (OP*)pm, 0);
9671 PL_thismad = 0;
5db06880
NC
9672 }
9673#endif
a2a5de95
NC
9674 if ((pm->op_pmflags & PMf_CONTINUE)) {
9675 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9676 }
9677
79072805 9678 if (es) {
6136c704
AL
9679 SV * const repl = newSVpvs("");
9680
0244c3a4 9681 PL_multi_end = 0;
79072805 9682 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9683 while (es-- > 0) {
9684 if (es)
9685 sv_catpvs(repl, "eval ");
9686 else
9687 sv_catpvs(repl, "do ");
9688 }
6f43d98f 9689 sv_catpvs(repl, "{");
7cc34111 9690 sv_catsv(repl, PL_sublex_info.repl);
9badc361 9691 sv_catpvs(repl, "}");
25da4f38 9692 SvEVALED_on(repl);
7cc34111
FC
9693 SvREFCNT_dec(PL_sublex_info.repl);
9694 PL_sublex_info.repl = repl;
378cc40b 9695 }
79072805 9696
3280af22 9697 PL_lex_op = (OP*)pm;
6154021b 9698 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9699 return s;
9700}
9701
76e3520e 9702STATIC char *
cea2e8a9 9703S_scan_trans(pTHX_ char *start)
378cc40b 9704{
97aff369 9705 dVAR;
eb578fdb 9706 char* s;
11343788 9707 OP *o;
b84c11c8
NC
9708 U8 squash;
9709 U8 del;
9710 U8 complement;
bb16bae8 9711 bool nondestruct = 0;
5db06880
NC
9712#ifdef PERL_MAD
9713 char *modstart;
9714#endif
79072805 9715
7918f24d
NC
9716 PERL_ARGS_ASSERT_SCAN_TRANS;
9717
6154021b 9718 pl_yylval.ival = OP_NULL;
79072805 9719
4d68ffa0 9720 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
37fd879b 9721 if (!s)
cea2e8a9 9722 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9723
3280af22 9724 if (s[-1] == PL_multi_open)
2f3197b3 9725 s--;
5db06880
NC
9726#ifdef PERL_MAD
9727 if (PL_madskills) {
cd81e915
NC
9728 CURMAD('q', PL_thisopen);
9729 CURMAD('_', PL_thiswhite);
9730 CURMAD('E', PL_thisstuff);
9731 CURMAD('Q', PL_thisclose);
9732 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9733 }
9734#endif
2f3197b3 9735
4d68ffa0 9736 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 9737 if (!s) {
37fd879b 9738 if (PL_lex_stuff) {
3280af22 9739 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9740 PL_lex_stuff = NULL;
37fd879b 9741 }
cea2e8a9 9742 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9743 }
5db06880 9744 if (PL_madskills) {
cd81e915
NC
9745 CURMAD('z', PL_thisopen);
9746 CURMAD('R', PL_thisstuff);
9747 CURMAD('Z', PL_thisclose);
5db06880 9748 }
79072805 9749
a0ed51b3 9750 complement = del = squash = 0;
5db06880
NC
9751#ifdef PERL_MAD
9752 modstart = s;
9753#endif
7a1e2023
NC
9754 while (1) {
9755 switch (*s) {
9756 case 'c':
79072805 9757 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9758 break;
9759 case 'd':
a0ed51b3 9760 del = OPpTRANS_DELETE;
7a1e2023
NC
9761 break;
9762 case 's':
79072805 9763 squash = OPpTRANS_SQUASH;
7a1e2023 9764 break;
bb16bae8
FC
9765 case 'r':
9766 nondestruct = 1;
9767 break;
7a1e2023
NC
9768 default:
9769 goto no_more;
9770 }
395c3793
LW
9771 s++;
9772 }
7a1e2023 9773 no_more:
8973db79 9774
9100eeb1 9775 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
59f00321
RGS
9776 o->op_private &= ~OPpTRANS_ALL;
9777 o->op_private |= del|squash|complement|
7948272d 9778 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7cc34111 9779 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
79072805 9780
3280af22 9781 PL_lex_op = o;
bb16bae8 9782 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9783
9784#ifdef PERL_MAD
9785 if (PL_madskills) {
9786 if (modstart != s)
9787 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9788 append_madprops(PL_thismad, o, 0);
9789 PL_thismad = 0;
5db06880
NC
9790 }
9791#endif
9792
79072805
LW
9793 return s;
9794}
9795
5097bf9b
FC
9796/* scan_heredoc
9797 Takes a pointer to the first < in <<FOO.
9798 Returns a pointer to the byte following <<FOO.
9799
9800 This function scans a heredoc, which involves different methods
9801 depending on whether we are in a string eval, quoted construct, etc.
9802 This is because PL_linestr could containing a single line of input, or
9803 a whole string being evalled, or the contents of the current quote-
9804 like operator.
9805
19bbc0d7
FC
9806 The two basic methods are:
9807 - Steal lines from the input stream
9808 - Scan the heredoc in PL_linestr and remove it therefrom
9809
9810 In a file scope or filtered eval, the first method is used; in a
9811 string eval, the second.
9812
9813 In a quote-like operator, we have to choose between the two,
9814 depending on where we can find a newline. We peek into outer lex-
9815 ing scopes until we find one with a newline in it. If we reach the
9816 outermost lexing scope and it is a file, we use the stream method.
9817 Otherwise it is treated as an eval.
5097bf9b
FC
9818*/
9819
76e3520e 9820STATIC char *
5aaab254 9821S_scan_heredoc(pTHX_ char *s)
79072805 9822{
97aff369 9823 dVAR;
79072805
LW
9824 I32 op_type = OP_SCALAR;
9825 I32 len;
9826 SV *tmpstr;
9827 char term;
eb578fdb
KW
9828 char *d;
9829 char *e;
4633a7c4 9830 char *peek;
5097bf9b 9831 const bool infile = PL_rsfp || PL_parser->filtered;
78a635de 9832 LEXSHARED *shared = PL_parser->lex_shared;
5db06880
NC
9833#ifdef PERL_MAD
9834 I32 stuffstart = s - SvPVX(PL_linestr);
9835 char *tstart;
9836
cd81e915 9837 PL_realtokenstart = -1;
5db06880 9838#endif
79072805 9839
7918f24d
NC
9840 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9841
79072805 9842 s += 2;
5097bf9b 9843 d = PL_tokenbuf + 1;
3280af22 9844 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
5097bf9b 9845 *PL_tokenbuf = '\n';
c35e046a
AL
9846 peek = s;
9847 while (SPACE_OR_TAB(*peek))
9848 peek++;
3792a11b 9849 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9850 s = peek;
79072805 9851 term = *s++;
3280af22 9852 s = delimcpy(d, e, s, PL_bufend, term, &len);
6f2d7fc9
FC
9853 if (s == PL_bufend)
9854 Perl_croak(aTHX_ "Unterminated delimiter for here document");
fc36a67e 9855 d += len;
6f2d7fc9 9856 s++;
79072805
LW
9857 }
9858 else {
9859 if (*s == '\\')
458391bd 9860 /* <<\FOO is equivalent to <<'FOO' */
79072805
LW
9861 s++, term = '\'';
9862 else
9863 term = '"';
8a2bca12 9864 if (!isWORDCHAR_lazy_if(s,UTF))
8ab8f082 9865 deprecate("bare << to mean <<\"\"");
8a2bca12 9866 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
fc36a67e 9867 if (d < e)
9868 *d++ = *s;
9869 }
9870 }
3280af22 9871 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9872 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9873 *d++ = '\n';
9874 *d = '\0';
3280af22 9875 len = d - PL_tokenbuf;
5db06880
NC
9876
9877#ifdef PERL_MAD
9878 if (PL_madskills) {
5097bf9b
FC
9879 tstart = PL_tokenbuf + 1;
9880 PL_thisclose = newSVpvn(tstart, len - 1);
5db06880 9881 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9882 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9883 stuffstart = s - SvPVX(PL_linestr);
9884 }
9885#endif
6a27c188 9886#ifndef PERL_STRICT_CR
f63a84b2
LW
9887 d = strchr(s, '\r');
9888 if (d) {
b464bac0 9889 char * const olds = s;
f63a84b2 9890 s = d;
3280af22 9891 while (s < PL_bufend) {
f63a84b2
LW
9892 if (*s == '\r') {
9893 *d++ = '\n';
9894 if (*++s == '\n')
9895 s++;
9896 }
9897 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9898 *d++ = *s++;
9899 s++;
9900 }
9901 else
9902 *d++ = *s++;
9903 }
9904 *d = '\0';
3280af22 9905 PL_bufend = d;
95a20fc0 9906 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9907 s = olds;
9908 }
9909#endif
5db06880
NC
9910#ifdef PERL_MAD
9911 if (PL_madskills) {
9912 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9913 if (PL_thisstuff)
9914 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9915 else
cd81e915 9916 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880 9917 }
748a9306 9918
5db06880 9919 stuffstart = s - SvPVX(PL_linestr);
5db06880
NC
9920#endif
9921
7d0a29fe
NC
9922 tmpstr = newSV_type(SVt_PVIV);
9923 SvGROW(tmpstr, 80);
748a9306 9924 if (term == '\'') {
79072805 9925 op_type = OP_CONST;
45977657 9926 SvIV_set(tmpstr, -1);
748a9306
LW
9927 }
9928 else if (term == '`') {
79072805 9929 op_type = OP_BACKTICK;
45977657 9930 SvIV_set(tmpstr, '\\');
748a9306 9931 }
79072805 9932
78a635de 9933 PL_multi_start = CopLINE(PL_curcop) + 1;
3280af22 9934 PL_multi_open = PL_multi_close = '<';
19bbc0d7 9935 /* inside a string eval or quote-like operator */
4efe39d2 9936 if (!infile || PL_lex_inwhat) {
60f40a38 9937 SV *linestr;
3526bd3e 9938 char *bufend;
074b1c59 9939 char * const olds = s;
d37427bc 9940 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
19bbc0d7
FC
9941 /* These two fields are not set until an inner lexing scope is
9942 entered. But we need them set here. */
4efe39d2
FC
9943 shared->ls_bufptr = s;
9944 shared->ls_linestr = PL_linestr;
9945 if (PL_lex_inwhat)
9946 /* Look for a newline. If the current buffer does not have one,
9947 peek into the line buffer of the parent lexing scope, going
9948 up as many levels as necessary to find one with a newline
9949 after bufptr.
9950 */
9951 while (!(s = (char *)memchr(
9952 (void *)shared->ls_bufptr, '\n',
9953 SvEND(shared->ls_linestr)-shared->ls_bufptr
9954 ))) {
60f40a38 9955 shared = shared->ls_prev;
f68f7dc1
FC
9956 /* shared is only null if we have gone beyond the outermost
9957 lexing scope. In a file, we will have broken out of the
9958 loop in the previous iteration. In an eval, the string buf-
9959 fer ends with "\n;", so the while condition below will have
9960 evaluated to false. So shared can never be null. */
9961 assert(shared);
60f40a38
FC
9962 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9963 most lexing scope. In a file, shared->ls_linestr at that
9964 level is just one line, so there is no body to steal. */
9965 if (infile && !shared->ls_prev) {
074b1c59 9966 s = olds;
99bd9d90
FC
9967 goto streaming;
9968 }
4efe39d2
FC
9969 }
9970 else { /* eval */
9971 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9972 assert(s);
9973 }
60f40a38
FC
9974 linestr = shared->ls_linestr;
9975 bufend = SvEND(linestr);
0244c3a4
GS
9976 d = s;
9977 while (s < bufend &&
5bd13da3 9978 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
0244c3a4 9979 if (*s++ == '\n')
78a635de 9980 ++shared->herelines;
0244c3a4
GS
9981 }
9982 if (s >= bufend) {
932d0cf1 9983 goto interminable;
0244c3a4 9984 }
3328ab5a 9985 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9986#ifdef PERL_MAD
9987 if (PL_madskills) {
cd81e915
NC
9988 if (PL_thisstuff)
9989 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9990 else
cd81e915 9991 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9992 stuffstart = s - SvPVX(PL_linestr);
9993 }
9994#endif
79072805 9995 s += len - 1;
d794b522 9996 /* the preceding stmt passes a newline */
78a635de 9997 shared->herelines++;
49d8d3a1 9998
db444266
FC
9999 /* s now points to the newline after the heredoc terminator.
10000 d points to the newline before the body of the heredoc.
10001 */
19bbc0d7
FC
10002
10003 /* We are going to modify linestr in place here, so set
10004 aside copies of the string if necessary for re-evals or
10005 (caller $n)[6]. */
a91428a4 10006 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
3328ab5a
FC
10007 check shared->re_eval_str. */
10008 if (shared->re_eval_start || shared->re_eval_str) {
db444266 10009 /* Set aside the rest of the regexp */
3328ab5a
FC
10010 if (!shared->re_eval_str)
10011 shared->re_eval_str =
10012 newSVpvn(shared->re_eval_start,
4efe39d2 10013 bufend - shared->re_eval_start);
3328ab5a 10014 shared->re_eval_start -= s-d;
db444266 10015 }
d4fe4ada
RU
10016 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10017 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10018 cx->blk_eval.cur_text == linestr)
10019 {
4efe39d2 10020 cx->blk_eval.cur_text = newSVsv(linestr);
d37427bc
FC
10021 SvSCREAM_on(cx->blk_eval.cur_text);
10022 }
db444266 10023 /* Copy everything from s onwards back to d. */
4efe39d2
FC
10024 Move(s,d,bufend-s + 1,char);
10025 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
19bbc0d7
FC
10026 /* Setting PL_bufend only applies when we have not dug deeper
10027 into other scopes, because sublex_done sets PL_bufend to
10028 SvEND(PL_linestr). */
4efe39d2 10029 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
db444266 10030 s = olds;
79072805
LW
10031 }
10032 else
a7922135 10033 {
3328ab5a 10034 SV *linestr_save;
a7922135
FC
10035 streaming:
10036 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10037 term = PL_tokenbuf[1];
10038 len--;
3328ab5a 10039 linestr_save = PL_linestr; /* must restore this afterwards */
074b1c59 10040 d = s; /* and this */
3328ab5a 10041 PL_linestr = newSVpvs("");
074b1c59
FC
10042 PL_bufend = SvPVX(PL_linestr);
10043 while (1) {
5db06880
NC
10044#ifdef PERL_MAD
10045 if (PL_madskills) {
10046 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10047 if (PL_thisstuff)
10048 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10049 else
cd81e915 10050 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10051 }
10052#endif
074b1c59 10053 PL_bufptr = PL_bufend;
d794b522 10054 CopLINE_set(PL_curcop,
78a635de 10055 PL_multi_start + shared->herelines);
112d1284
FC
10056 if (!lex_next_chunk(LEX_NO_TERM)
10057 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
3328ab5a 10058 SvREFCNT_dec(linestr_save);
932d0cf1 10059 goto interminable;
79072805 10060 }
78a635de 10061 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
112d1284
FC
10062 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10063 lex_grow_linestr(SvCUR(PL_linestr) + 2);
10064 sv_catpvs(PL_linestr, "\n\0");
10065 }
f0e67a1d 10066 s = PL_bufptr;
5db06880
NC
10067#ifdef PERL_MAD
10068 stuffstart = s - SvPVX(PL_linestr);
10069#endif
78a635de 10070 shared->herelines++;
bd61b366 10071 PL_last_lop = PL_last_uni = NULL;
6a27c188 10072#ifndef PERL_STRICT_CR
3280af22 10073 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
10074 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10075 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 10076 {
3280af22
NIS
10077 PL_bufend[-2] = '\n';
10078 PL_bufend--;
95a20fc0 10079 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 10080 }
3280af22
NIS
10081 else if (PL_bufend[-1] == '\r')
10082 PL_bufend[-1] = '\n';
f63a84b2 10083 }
3280af22
NIS
10084 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10085 PL_bufend[-1] = '\n';
f63a84b2 10086#endif
5097bf9b 10087 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
3328ab5a
FC
10088 SvREFCNT_dec(PL_linestr);
10089 PL_linestr = linestr_save;
10090 PL_linestart = SvPVX(linestr_save);
3280af22 10091 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3328ab5a 10092 s = d;
074b1c59 10093 break;
79072805
LW
10094 }
10095 else {
3280af22 10096 sv_catsv(tmpstr,PL_linestr);
395c3793 10097 }
a7922135 10098 }
395c3793 10099 }
57843af0 10100 PL_multi_end = CopLINE(PL_curcop);
79072805 10101 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 10102 SvPV_shrink_to_cur(tmpstr);
79072805 10103 }
2f31ce75 10104 if (!IN_BYTES) {
95a20fc0 10105 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
10106 SvUTF8_on(tmpstr);
10107 else if (PL_encoding)
10108 sv_recode_to_utf8(tmpstr, PL_encoding);
10109 }
3280af22 10110 PL_lex_stuff = tmpstr;
6154021b 10111 pl_yylval.ival = op_type;
79072805 10112 return s;
932d0cf1
FC
10113
10114 interminable:
932d0cf1
FC
10115 SvREFCNT_dec(tmpstr);
10116 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10117 missingterm(PL_tokenbuf + 1);
79072805
LW
10118}
10119
02aa26ce
NT
10120/* scan_inputsymbol
10121 takes: current position in input buffer
10122 returns: new position in input buffer
6154021b 10123 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
10124
10125 This code handles:
10126
10127 <> read from ARGV
10128 <FH> read from filehandle
10129 <pkg::FH> read from package qualified filehandle
10130 <pkg'FH> read from package qualified filehandle
10131 <$fh> read from filehandle in $fh
10132 <*.h> filename glob
10133
10134*/
10135
76e3520e 10136STATIC char *
cea2e8a9 10137S_scan_inputsymbol(pTHX_ char *start)
79072805 10138{
97aff369 10139 dVAR;
eb578fdb 10140 char *s = start; /* current position in buffer */
1b420867 10141 char *end;
79072805 10142 I32 len;
6136c704
AL
10143 char *d = PL_tokenbuf; /* start of temp holding space */
10144 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10145
7918f24d
NC
10146 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10147
1b420867
GS
10148 end = strchr(s, '\n');
10149 if (!end)
10150 end = PL_bufend;
10151 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
10152
10153 /* die if we didn't have space for the contents of the <>,
1b420867 10154 or if it didn't end, or if we see a newline
02aa26ce
NT
10155 */
10156
bb7a0f54 10157 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 10158 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 10159 if (s >= end)
cea2e8a9 10160 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 10161
fc36a67e 10162 s++;
02aa26ce
NT
10163
10164 /* check for <$fh>
10165 Remember, only scalar variables are interpreted as filehandles by
10166 this code. Anything more complex (e.g., <$fh{$num}>) will be
10167 treated as a glob() call.
10168 This code makes use of the fact that except for the $ at the front,
10169 a scalar variable and a filehandle look the same.
10170 */
4633a7c4 10171 if (*d == '$' && d[1]) d++;
02aa26ce
NT
10172
10173 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
8a2bca12 10174 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 10175 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
10176
10177 /* If we've tried to read what we allow filehandles to look like, and
10178 there's still text left, then it must be a glob() and not a getline.
10179 Use scan_str to pull out the stuff between the <> and treat it
10180 as nothing more than a string.
10181 */
10182
3280af22 10183 if (d - PL_tokenbuf != len) {
6154021b 10184 pl_yylval.ival = OP_GLOB;
4d68ffa0 10185 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 10186 if (!s)
cea2e8a9 10187 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
10188 return s;
10189 }
395c3793 10190 else {
9b3023bc 10191 bool readline_overriden = FALSE;
6136c704 10192 GV *gv_readline;
9b3023bc 10193 GV **gvp;
02aa26ce 10194 /* we're in a filehandle read situation */
3280af22 10195 d = PL_tokenbuf;
02aa26ce
NT
10196
10197 /* turn <> into <ARGV> */
79072805 10198 if (!len)
689badd5 10199 Copy("ARGV",d,5,char);
02aa26ce 10200
9b3023bc 10201 /* Check whether readline() is overriden */
fafc274c 10202 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 10203 if ((gv_readline
ba979b31 10204 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 10205 ||
017a3ce5 10206 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 10207 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 10208 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
10209 readline_overriden = TRUE;
10210
02aa26ce
NT
10211 /* if <$fh>, create the ops to turn the variable into a
10212 filehandle
10213 */
79072805 10214 if (*d == '$') {
02aa26ce
NT
10215 /* try to find it in the pad for this block, otherwise find
10216 add symbol table ops
10217 */
bc9b26ca 10218 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 10219 if (tmp != NOT_IN_PAD) {
00b1698f 10220 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
10221 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10222 HEK * const stashname = HvNAME_HEK(stash);
10223 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 10224 sv_catpvs(sym, "::");
f558d5af
JH
10225 sv_catpv(sym, d+1);
10226 d = SvPVX(sym);
10227 goto intro_sym;
10228 }
10229 else {
6136c704 10230 OP * const o = newOP(OP_PADSV, 0);
f558d5af 10231 o->op_targ = tmp;
9b3023bc
RGS
10232 PL_lex_op = readline_overriden
10233 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10234 op_append_elem(OP_LIST, o,
9b3023bc
RGS
10235 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10236 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 10237 }
a0d0e21e
LW
10238 }
10239 else {
f558d5af
JH
10240 GV *gv;
10241 ++d;
10242intro_sym:
10243 gv = gv_fetchpv(d,
10244 (PL_in_eval
10245 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 10246 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 10247 SVt_PV);
9b3023bc
RGS
10248 PL_lex_op = readline_overriden
10249 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10250 op_append_elem(OP_LIST,
9b3023bc
RGS
10251 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10252 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10253 : (OP*)newUNOP(OP_READLINE, 0,
10254 newUNOP(OP_RV2SV, 0,
10255 newGVOP(OP_GV, 0, gv)));
a0d0e21e 10256 }
7c6fadd6
RGS
10257 if (!readline_overriden)
10258 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
10259 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10260 pl_yylval.ival = OP_NULL;
79072805 10261 }
02aa26ce
NT
10262
10263 /* If it's none of the above, it must be a literal filehandle
10264 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 10265 else {
25db2ea6 10266 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
10267 PL_lex_op = readline_overriden
10268 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10269 op_append_elem(OP_LIST,
9b3023bc
RGS
10270 newGVOP(OP_GV, 0, gv),
10271 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10272 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 10273 pl_yylval.ival = OP_NULL;
79072805
LW
10274 }
10275 }
02aa26ce 10276
79072805
LW
10277 return s;
10278}
10279
02aa26ce
NT
10280
10281/* scan_str
10282 takes: start position in buffer
09bef843
SB
10283 keep_quoted preserve \ on the embedded delimiter(s)
10284 keep_delims preserve the delimiters around the string
d24ca0c5
DM
10285 re_reparse compiling a run-time /(?{})/:
10286 collapse // to /, and skip encoding src
02aa26ce
NT
10287 returns: position to continue reading from buffer
10288 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10289 updates the read buffer.
10290
10291 This subroutine pulls a string out of the input. It is called for:
10292 q single quotes q(literal text)
10293 ' single quotes 'literal text'
10294 qq double quotes qq(interpolate $here please)
10295 " double quotes "interpolate $here please"
10296 qx backticks qx(/bin/ls -l)
10297 ` backticks `/bin/ls -l`
10298 qw quote words @EXPORT_OK = qw( func() $spam )
10299 m// regexp match m/this/
10300 s/// regexp substitute s/this/that/
10301 tr/// string transliterate tr/this/that/
10302 y/// string transliterate y/this/that/
10303 ($*@) sub prototypes sub foo ($)
09bef843 10304 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
10305 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10306
10307 In most of these cases (all but <>, patterns and transliterate)
10308 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10309 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10310 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10311 calls scan_str().
4e553d73 10312
02aa26ce
NT
10313 It skips whitespace before the string starts, and treats the first
10314 character as the delimiter. If the delimiter is one of ([{< then
10315 the corresponding "close" character )]}> is used as the closing
10316 delimiter. It allows quoting of delimiters, and if the string has
10317 balanced delimiters ([{<>}]) it allows nesting.
10318
37fd879b
HS
10319 On success, the SV with the resulting string is put into lex_stuff or,
10320 if that is already non-NULL, into lex_repl. The second case occurs only
10321 when parsing the RHS of the special constructs s/// and tr/// (y///).
10322 For convenience, the terminating delimiter character is stuffed into
10323 SvIVX of the SV.
02aa26ce
NT
10324*/
10325
76e3520e 10326STATIC char *
4d68ffa0
KW
10327S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10328 bool deprecate_escaped_meta /* Should we issue a deprecation warning
10329 for certain paired metacharacters that
10330 appear escaped within it */
10331 )
79072805 10332{
97aff369 10333 dVAR;
a8d9c7ae
KW
10334 SV *sv; /* scalar value: string */
10335 const char *tmps; /* temp string, used for delimiter matching */
eb578fdb
KW
10336 char *s = start; /* current position in the buffer */
10337 char term; /* terminating character */
10338 char *to; /* current position in the sv's data */
a8d9c7ae
KW
10339 I32 brackets = 1; /* bracket nesting level */
10340 bool has_utf8 = FALSE; /* is there any utf8 content? */
10341 I32 termcode; /* terminating char. code */
10342 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10343 STRLEN termlen; /* length of terminating string */
10344 int last_off = 0; /* last position for nesting bracket */
4d68ffa0 10345 char *escaped_open = NULL;
5db06880
NC
10346#ifdef PERL_MAD
10347 int stuffstart;
10348 char *tstart;
10349#endif
02aa26ce 10350
7918f24d
NC
10351 PERL_ARGS_ASSERT_SCAN_STR;
10352
02aa26ce 10353 /* skip space before the delimiter */
29595ff2
NC
10354 if (isSPACE(*s)) {
10355 s = PEEKSPACE(s);
10356 }
02aa26ce 10357
5db06880 10358#ifdef PERL_MAD
cd81e915
NC
10359 if (PL_realtokenstart >= 0) {
10360 stuffstart = PL_realtokenstart;
10361 PL_realtokenstart = -1;
5db06880
NC
10362 }
10363 else
10364 stuffstart = start - SvPVX(PL_linestr);
10365#endif
02aa26ce 10366 /* mark where we are, in case we need to report errors */
79072805 10367 CLINE;
02aa26ce
NT
10368
10369 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 10370 term = *s;
220e2d4e
IH
10371 if (!UTF) {
10372 termcode = termstr[0] = term;
10373 termlen = 1;
10374 }
10375 else {
4b88fb76 10376 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
220e2d4e
IH
10377 Copy(s, termstr, termlen, U8);
10378 if (!UTF8_IS_INVARIANT(term))
10379 has_utf8 = TRUE;
10380 }
b1c7b182 10381
02aa26ce 10382 /* mark where we are */
57843af0 10383 PL_multi_start = CopLINE(PL_curcop);
3280af22 10384 PL_multi_open = term;
02aa26ce
NT
10385
10386 /* find corresponding closing delimiter */
93a17b20 10387 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
10388 termcode = termstr[0] = term = tmps[5];
10389
3280af22 10390 PL_multi_close = term;
79072805 10391
4d68ffa0
KW
10392 /* A warning is raised if the input parameter requires it for escaped (by a
10393 * backslash) paired metacharacters {} [] and () when the delimiters are
10394 * those same characters, and the backslash is ineffective. This doesn't
10395 * happen for <>, as they aren't metas. */
10396 if (deprecate_escaped_meta
10397 && (PL_multi_open == PL_multi_close
10398 || ! ckWARN_d(WARN_DEPRECATED)
10399 || PL_multi_open == '<'))
10400 {
10401 deprecate_escaped_meta = FALSE;
10402 }
10403
561b68a9
SH
10404 /* create a new SV to hold the contents. 79 is the SV's initial length.
10405 What a random number. */
7d0a29fe
NC
10406 sv = newSV_type(SVt_PVIV);
10407 SvGROW(sv, 80);
45977657 10408 SvIV_set(sv, termcode);
a0d0e21e 10409 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
10410
10411 /* move past delimiter and try to read a complete string */
09bef843 10412 if (keep_delims)
220e2d4e
IH
10413 sv_catpvn(sv, s, termlen);
10414 s += termlen;
5db06880
NC
10415#ifdef PERL_MAD
10416 tstart = SvPVX(PL_linestr) + stuffstart;
1cac5c33 10417 if (PL_madskills && !PL_thisopen && !keep_delims) {
cd81e915 10418 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
10419 stuffstart = s - SvPVX(PL_linestr);
10420 }
10421#endif
93a17b20 10422 for (;;) {
d24ca0c5 10423 if (PL_encoding && !UTF && !re_reparse) {
220e2d4e
IH
10424 bool cont = TRUE;
10425
10426 while (cont) {
95a20fc0 10427 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 10428 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 10429 &offset, (char*)termstr, termlen);
6136c704
AL
10430 const char * const ns = SvPVX_const(PL_linestr) + offset;
10431 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
10432
10433 for (; s < ns; s++) {
60d63348 10434 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10435 COPLINE_INC_WITH_HERELINES;
220e2d4e
IH
10436 }
10437 if (!found)
10438 goto read_more_line;
10439 else {
10440 /* handle quoted delimiters */
52327caf 10441 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 10442 const char *t;
95a20fc0 10443 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
10444 t--;
10445 if ((svlast-1 - t) % 2) {
10446 if (!keep_quoted) {
10447 *(svlast-1) = term;
10448 *svlast = '\0';
10449 SvCUR_set(sv, SvCUR(sv) - 1);
10450 }
10451 continue;
10452 }
10453 }
10454 if (PL_multi_open == PL_multi_close) {
10455 cont = FALSE;
10456 }
10457 else {
f54cb97a
AL
10458 const char *t;
10459 char *w;
0331ef07 10460 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
10461 /* At here, all closes are "was quoted" one,
10462 so we don't check PL_multi_close. */
10463 if (*t == '\\') {
10464 if (!keep_quoted && *(t+1) == PL_multi_open)
10465 t++;
10466 else
10467 *w++ = *t++;
10468 }
10469 else if (*t == PL_multi_open)
10470 brackets++;
10471
10472 *w = *t;
10473 }
10474 if (w < t) {
10475 *w++ = term;
10476 *w = '\0';
95a20fc0 10477 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 10478 }
0331ef07 10479 last_off = w - SvPVX(sv);
220e2d4e
IH
10480 if (--brackets <= 0)
10481 cont = FALSE;
10482 }
10483 }
10484 }
10485 if (!keep_delims) {
10486 SvCUR_set(sv, SvCUR(sv) - 1);
10487 *SvEND(sv) = '\0';
10488 }
10489 break;
10490 }
10491
02aa26ce 10492 /* extend sv if need be */
3280af22 10493 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 10494 /* set 'to' to the next character in the sv's string */
463ee0b2 10495 to = SvPVX(sv)+SvCUR(sv);
09bef843 10496
02aa26ce 10497 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
10498 if (PL_multi_open == PL_multi_close) {
10499 for (; s < PL_bufend; s++,to++) {
02aa26ce 10500 /* embedded newlines increment the current line number */
60d63348 10501 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10502 COPLINE_INC_WITH_HERELINES;
02aa26ce 10503 /* handle quoted delimiters */
3280af22 10504 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
d24ca0c5
DM
10505 if (!keep_quoted
10506 && (s[1] == term
10507 || (re_reparse && s[1] == '\\'))
10508 )
a0d0e21e 10509 s++;
d24ca0c5 10510 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10511 else
10512 *to++ = *s++;
10513 }
02aa26ce
NT
10514 /* terminate when run out of buffer (the for() condition), or
10515 have found the terminator */
220e2d4e
IH
10516 else if (*s == term) {
10517 if (termlen == 1)
10518 break;
f3b9ce0f 10519 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10520 break;
10521 }
63cd0674 10522 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10523 has_utf8 = TRUE;
93a17b20
LW
10524 *to = *s;
10525 }
10526 }
02aa26ce
NT
10527
10528 /* if the terminator isn't the same as the start character (e.g.,
10529 matched brackets), we have to allow more in the quoting, and
10530 be prepared for nested brackets.
10531 */
93a17b20 10532 else {
02aa26ce 10533 /* read until we run out of string, or we find the terminator */
3280af22 10534 for (; s < PL_bufend; s++,to++) {
02aa26ce 10535 /* embedded newlines increment the line count */
60d63348 10536 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10537 COPLINE_INC_WITH_HERELINES;
02aa26ce 10538 /* backslashes can escape the open or closing characters */
3280af22 10539 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10540 if (!keep_quoted &&
10541 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
4d68ffa0 10542 {
a0d0e21e 10543 s++;
4d68ffa0
KW
10544
10545 /* Here, 'deprecate_escaped_meta' is true iff the
10546 * delimiters are paired metacharacters, and 's' points
10547 * to an occurrence of one of them within the string,
10548 * which was preceded by a backslash. If this is a
10549 * context where the delimiter is also a metacharacter,
10550 * the backslash is useless, and deprecated. () and []
10551 * are meta in any context. {} are meta only when
10552 * appearing in a quantifier or in things like '\p{'.
10553 * They also aren't meta unless there is a matching
10554 * closed, escaped char later on within the string.
10555 * If 's' points to an open, set a flag; if to a close,
10556 * test that flag, and raise a warning if it was set */
10557
10558 if (deprecate_escaped_meta) {
10559 if (*s == PL_multi_open) {
10560 if (*s != '{') {
10561 escaped_open = s;
10562 }
10563 else if (regcurly(s,
10564 TRUE /* Look for a closing
10565 '\}' */)
10566 || (s - start > 2 /* Look for e.g.
10567 '\x{' */
10568 && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
10569 {
10570 escaped_open = s;
10571 }
10572 }
10573 else if (escaped_open) {
10574 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10575 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10576 escaped_open = NULL;
10577 }
10578 }
10579 }
a0d0e21e
LW
10580 else
10581 *to++ = *s++;
10582 }
02aa26ce 10583 /* allow nested opens and closes */
3280af22 10584 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10585 break;
3280af22 10586 else if (*s == PL_multi_open)
93a17b20 10587 brackets++;
63cd0674 10588 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10589 has_utf8 = TRUE;
93a17b20
LW
10590 *to = *s;
10591 }
10592 }
02aa26ce 10593 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10594 *to = '\0';
95a20fc0 10595 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10596
02aa26ce
NT
10597 /*
10598 * this next chunk reads more into the buffer if we're not done yet
10599 */
10600
b1c7b182
GS
10601 if (s < PL_bufend)
10602 break; /* handle case where we are done yet :-) */
79072805 10603
6a27c188 10604#ifndef PERL_STRICT_CR
95a20fc0 10605 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10606 if ((to[-2] == '\r' && to[-1] == '\n') ||
10607 (to[-2] == '\n' && to[-1] == '\r'))
10608 {
f63a84b2
LW
10609 to[-2] = '\n';
10610 to--;
95a20fc0 10611 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10612 }
10613 else if (to[-1] == '\r')
10614 to[-1] = '\n';
10615 }
95a20fc0 10616 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10617 to[-1] = '\n';
10618#endif
10619
220e2d4e 10620 read_more_line:
02aa26ce
NT
10621 /* if we're out of file, or a read fails, bail and reset the current
10622 line marker so we can report where the unterminated string began
10623 */
5db06880
NC
10624#ifdef PERL_MAD
10625 if (PL_madskills) {
c35e046a 10626 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10627 if (PL_thisstuff)
10628 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10629 else
cd81e915 10630 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10631 }
10632#endif
83944c01 10633 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
10634 PL_bufptr = PL_bufend;
10635 if (!lex_next_chunk(0)) {
c07a80fd 10636 sv_free(sv);
eb160463 10637 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10638 return NULL;
79072805 10639 }
f0e67a1d 10640 s = PL_bufptr;
5db06880
NC
10641#ifdef PERL_MAD
10642 stuffstart = 0;
10643#endif
378cc40b 10644 }
4e553d73 10645
02aa26ce
NT
10646 /* at this point, we have successfully read the delimited string */
10647
d24ca0c5 10648 if (!PL_encoding || UTF || re_reparse) {
5db06880
NC
10649#ifdef PERL_MAD
10650 if (PL_madskills) {
c35e046a 10651 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10652 const int len = s - tstart;
cd81e915 10653 if (PL_thisstuff)
c35e046a 10654 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10655 else
c35e046a 10656 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10657 if (!PL_thisclose && !keep_delims)
10658 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10659 }
10660#endif
10661
220e2d4e
IH
10662 if (keep_delims)
10663 sv_catpvn(sv, s, termlen);
10664 s += termlen;
10665 }
5db06880
NC
10666#ifdef PERL_MAD
10667 else {
10668 if (PL_madskills) {
c35e046a
AL
10669 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10670 const int len = s - tstart - termlen;
cd81e915 10671 if (PL_thisstuff)
c35e046a 10672 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10673 else
c35e046a 10674 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10675 if (!PL_thisclose && !keep_delims)
10676 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10677 }
10678 }
10679#endif
d24ca0c5 10680 if (has_utf8 || (PL_encoding && !re_reparse))
b1c7b182 10681 SvUTF8_on(sv);
d0063567 10682
57843af0 10683 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10684
10685 /* if we allocated too much space, give some back */
93a17b20
LW
10686 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10687 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10688 SvPV_renew(sv, SvLEN(sv));
79072805 10689 }
02aa26ce
NT
10690
10691 /* decide whether this is the first or second quoted string we've read
10692 for this op
10693 */
4e553d73 10694
3280af22 10695 if (PL_lex_stuff)
7cc34111 10696 PL_sublex_info.repl = sv;
79072805 10697 else
3280af22 10698 PL_lex_stuff = sv;
378cc40b
LW
10699 return s;
10700}
10701
02aa26ce
NT
10702/*
10703 scan_num
10704 takes: pointer to position in buffer
10705 returns: pointer to new position in buffer
6154021b 10706 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10707
10708 Read a number in any of the formats that Perl accepts:
10709
7fd134d9
JH
10710 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10711 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10712 0b[01](_?[01])*
10713 0[0-7](_?[0-7])*
10714 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10715
3280af22 10716 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10717 thing it reads.
10718
10719 If it reads a number without a decimal point or an exponent, it will
10720 try converting the number to an integer and see if it can do so
10721 without loss of precision.
10722*/
4e553d73 10723
378cc40b 10724char *
bfed75c6 10725Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10726{
97aff369 10727 dVAR;
eb578fdb
KW
10728 const char *s = start; /* current position in buffer */
10729 char *d; /* destination in temp buffer */
10730 char *e; /* end of temp buffer */
86554af2 10731 NV nv; /* number read, as a double */
a0714e2c 10732 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10733 bool floatit; /* boolean: int or float? */
cbbf8932 10734 const char *lastub = NULL; /* position of last underbar */
a1894d81 10735 static const char* const number_too_long = "Number too long";
378cc40b 10736
7918f24d
NC
10737 PERL_ARGS_ASSERT_SCAN_NUM;
10738
02aa26ce
NT
10739 /* We use the first character to decide what type of number this is */
10740
378cc40b 10741 switch (*s) {
79072805 10742 default:
5637ef5b 10743 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10744
02aa26ce 10745 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10746 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10747 case '0':
10748 {
02aa26ce
NT
10749 /* variables:
10750 u holds the "number so far"
4f19785b
WSI
10751 shift the power of 2 of the base
10752 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10753 overflowed was the number more than we can hold?
10754
10755 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10756 we in octal/hex/binary?" indicator to disallow hex characters
10757 when in octal mode.
02aa26ce 10758 */
9e24b6e2
JH
10759 NV n = 0.0;
10760 UV u = 0;
79072805 10761 I32 shift;
9e24b6e2 10762 bool overflowed = FALSE;
61f33854 10763 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10764 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10765 static const char* const bases[5] =
10766 { "", "binary", "", "octal", "hexadecimal" };
10767 static const char* const Bases[5] =
10768 { "", "Binary", "", "Octal", "Hexadecimal" };
10769 static const char* const maxima[5] =
10770 { "",
10771 "0b11111111111111111111111111111111",
10772 "",
10773 "037777777777",
10774 "0xffffffff" };
bfed75c6 10775 const char *base, *Base, *max;
378cc40b 10776
02aa26ce 10777 /* check for hex */
a674e8db 10778 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10779 shift = 4;
10780 s += 2;
61f33854 10781 just_zero = FALSE;
a674e8db 10782 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10783 shift = 1;
10784 s += 2;
61f33854 10785 just_zero = FALSE;
378cc40b 10786 }
02aa26ce 10787 /* check for a decimal in disguise */
b78218b7 10788 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10789 goto decimal;
02aa26ce 10790 /* so it must be octal */
928753ea 10791 else {
378cc40b 10792 shift = 3;
928753ea
JH
10793 s++;
10794 }
10795
10796 if (*s == '_') {
a2a5de95 10797 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10798 "Misplaced _ in number");
10799 lastub = s++;
10800 }
9e24b6e2
JH
10801
10802 base = bases[shift];
10803 Base = Bases[shift];
10804 max = maxima[shift];
02aa26ce 10805
4f19785b 10806 /* read the rest of the number */
378cc40b 10807 for (;;) {
9e24b6e2 10808 /* x is used in the overflow test,
893fe2c2 10809 b is the digit we're adding on. */
9e24b6e2 10810 UV x, b;
55497cff 10811
378cc40b 10812 switch (*s) {
02aa26ce
NT
10813
10814 /* if we don't mention it, we're done */
378cc40b
LW
10815 default:
10816 goto out;
02aa26ce 10817
928753ea 10818 /* _ are ignored -- but warned about if consecutive */
de3bb511 10819 case '_':
a2a5de95
NC
10820 if (lastub && s == lastub + 1)
10821 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10822 "Misplaced _ in number");
928753ea 10823 lastub = s++;
de3bb511 10824 break;
02aa26ce
NT
10825
10826 /* 8 and 9 are not octal */
378cc40b 10827 case '8': case '9':
4f19785b 10828 if (shift == 3)
cea2e8a9 10829 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10830 /* FALL THROUGH */
02aa26ce
NT
10831
10832 /* octal digits */
4f19785b 10833 case '2': case '3': case '4':
378cc40b 10834 case '5': case '6': case '7':
4f19785b 10835 if (shift == 1)
cea2e8a9 10836 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10837 /* FALL THROUGH */
10838
10839 case '0': case '1':
02aa26ce 10840 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10841 goto digit;
02aa26ce
NT
10842
10843 /* hex digits */
378cc40b
LW
10844 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10845 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10846 /* make sure they said 0x */
378cc40b
LW
10847 if (shift != 4)
10848 goto out;
55497cff 10849 b = (*s++ & 7) + 9;
02aa26ce
NT
10850
10851 /* Prepare to put the digit we have onto the end
10852 of the number so far. We check for overflows.
10853 */
10854
55497cff 10855 digit:
61f33854 10856 just_zero = FALSE;
9e24b6e2
JH
10857 if (!overflowed) {
10858 x = u << shift; /* make room for the digit */
10859
10860 if ((x >> shift) != u
10861 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10862 overflowed = TRUE;
10863 n = (NV) u;
9b387841
NC
10864 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10865 "Integer overflow in %s number",
10866 base);
9e24b6e2
JH
10867 } else
10868 u = x | b; /* add the digit to the end */
10869 }
10870 if (overflowed) {
10871 n *= nvshift[shift];
10872 /* If an NV has not enough bits in its
10873 * mantissa to represent an UV this summing of
10874 * small low-order numbers is a waste of time
10875 * (because the NV cannot preserve the
10876 * low-order bits anyway): we could just
10877 * remember when did we overflow and in the
10878 * end just multiply n by the right
10879 * amount. */
10880 n += (NV) b;
55497cff 10881 }
378cc40b
LW
10882 break;
10883 }
10884 }
02aa26ce
NT
10885
10886 /* if we get here, we had success: make a scalar value from
10887 the number.
10888 */
378cc40b 10889 out:
928753ea
JH
10890
10891 /* final misplaced underbar check */
10892 if (s[-1] == '_') {
a2a5de95 10893 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10894 }
10895
9e24b6e2 10896 if (overflowed) {
a2a5de95
NC
10897 if (n > 4294967295.0)
10898 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10899 "%s number > %s non-portable",
10900 Base, max);
b081dd7e 10901 sv = newSVnv(n);
9e24b6e2
JH
10902 }
10903 else {
15041a67 10904#if UVSIZE > 4
a2a5de95
NC
10905 if (u > 0xffffffff)
10906 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10907 "%s number > %s non-portable",
10908 Base, max);
2cc4c2dc 10909#endif
b081dd7e 10910 sv = newSVuv(u);
9e24b6e2 10911 }
61f33854 10912 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10913 sv = new_constant(start, s - start, "integer",
eb0d8d16 10914 sv, NULL, NULL, 0);
61f33854 10915 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10916 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10917 }
10918 break;
02aa26ce
NT
10919
10920 /*
10921 handle decimal numbers.
10922 we're also sent here when we read a 0 as the first digit
10923 */
378cc40b
LW
10924 case '1': case '2': case '3': case '4': case '5':
10925 case '6': case '7': case '8': case '9': case '.':
10926 decimal:
3280af22
NIS
10927 d = PL_tokenbuf;
10928 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10929 floatit = FALSE;
02aa26ce
NT
10930
10931 /* read next group of digits and _ and copy into d */
de3bb511 10932 while (isDIGIT(*s) || *s == '_') {
4e553d73 10933 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10934 if -w is on
10935 */
93a17b20 10936 if (*s == '_') {
a2a5de95
NC
10937 if (lastub && s == lastub + 1)
10938 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10939 "Misplaced _ in number");
928753ea 10940 lastub = s++;
93a17b20 10941 }
fc36a67e 10942 else {
02aa26ce 10943 /* check for end of fixed-length buffer */
fc36a67e 10944 if (d >= e)
cea2e8a9 10945 Perl_croak(aTHX_ number_too_long);
02aa26ce 10946 /* if we're ok, copy the character */
378cc40b 10947 *d++ = *s++;
fc36a67e 10948 }
378cc40b 10949 }
02aa26ce
NT
10950
10951 /* final misplaced underbar check */
928753ea 10952 if (lastub && s == lastub + 1) {
a2a5de95 10953 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10954 }
02aa26ce
NT
10955
10956 /* read a decimal portion if there is one. avoid
10957 3..5 being interpreted as the number 3. followed
10958 by .5
10959 */
2f3197b3 10960 if (*s == '.' && s[1] != '.') {
79072805 10961 floatit = TRUE;
378cc40b 10962 *d++ = *s++;
02aa26ce 10963
928753ea 10964 if (*s == '_') {
a2a5de95
NC
10965 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10966 "Misplaced _ in number");
928753ea
JH
10967 lastub = s;
10968 }
10969
10970 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10971 */
fc36a67e 10972 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10973 /* fixed length buffer check */
fc36a67e 10974 if (d >= e)
cea2e8a9 10975 Perl_croak(aTHX_ number_too_long);
928753ea 10976 if (*s == '_') {
a2a5de95
NC
10977 if (lastub && s == lastub + 1)
10978 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10979 "Misplaced _ in number");
928753ea
JH
10980 lastub = s;
10981 }
10982 else
fc36a67e 10983 *d++ = *s;
378cc40b 10984 }
928753ea
JH
10985 /* fractional part ending in underbar? */
10986 if (s[-1] == '_') {
a2a5de95
NC
10987 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10988 "Misplaced _ in number");
928753ea 10989 }
dd629d5b
GS
10990 if (*s == '.' && isDIGIT(s[1])) {
10991 /* oops, it's really a v-string, but without the "v" */
f4758303 10992 s = start;
dd629d5b
GS
10993 goto vstring;
10994 }
378cc40b 10995 }
02aa26ce
NT
10996
10997 /* read exponent part, if present */
3792a11b 10998 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10999 floatit = TRUE;
11000 s++;
02aa26ce
NT
11001
11002 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 11003 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 11004
7fd134d9
JH
11005 /* stray preinitial _ */
11006 if (*s == '_') {
a2a5de95
NC
11007 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11008 "Misplaced _ in number");
7fd134d9
JH
11009 lastub = s++;
11010 }
11011
02aa26ce 11012 /* allow positive or negative exponent */
378cc40b
LW
11013 if (*s == '+' || *s == '-')
11014 *d++ = *s++;
02aa26ce 11015
7fd134d9
JH
11016 /* stray initial _ */
11017 if (*s == '_') {
a2a5de95
NC
11018 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11019 "Misplaced _ in number");
7fd134d9
JH
11020 lastub = s++;
11021 }
11022
7fd134d9
JH
11023 /* read digits of exponent */
11024 while (isDIGIT(*s) || *s == '_') {
11025 if (isDIGIT(*s)) {
11026 if (d >= e)
11027 Perl_croak(aTHX_ number_too_long);
b3b48e3e 11028 *d++ = *s++;
7fd134d9
JH
11029 }
11030 else {
041457d9 11031 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
11032 (!isDIGIT(s[1]) && s[1] != '_')))
11033 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11034 "Misplaced _ in number");
b3b48e3e 11035 lastub = s++;
7fd134d9 11036 }
7fd134d9 11037 }
378cc40b 11038 }
02aa26ce 11039
02aa26ce 11040
0b7fceb9 11041 /*
58bb9ec3
NC
11042 We try to do an integer conversion first if no characters
11043 indicating "float" have been found.
0b7fceb9
MU
11044 */
11045
11046 if (!floatit) {
58bb9ec3 11047 UV uv;
6136c704 11048 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
11049
11050 if (flags == IS_NUMBER_IN_UV) {
11051 if (uv <= IV_MAX)
b081dd7e 11052 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 11053 else
b081dd7e 11054 sv = newSVuv(uv);
58bb9ec3
NC
11055 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11056 if (uv <= (UV) IV_MIN)
b081dd7e 11057 sv = newSViv(-(IV)uv);
58bb9ec3
NC
11058 else
11059 floatit = TRUE;
11060 } else
11061 floatit = TRUE;
11062 }
0b7fceb9 11063 if (floatit) {
58bb9ec3
NC
11064 /* terminate the string */
11065 *d = '\0';
86554af2 11066 nv = Atof(PL_tokenbuf);
b081dd7e 11067 sv = newSVnv(nv);
86554af2 11068 }
86554af2 11069
eb0d8d16
NC
11070 if ( floatit
11071 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11072 const char *const key = floatit ? "float" : "integer";
11073 const STRLEN keylen = floatit ? 5 : 7;
11074 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11075 key, keylen, sv, NULL, NULL, 0);
11076 }
378cc40b 11077 break;
0b7fceb9 11078
e312add1 11079 /* if it starts with a v, it could be a v-string */
a7cb1f99 11080 case 'v':
dd629d5b 11081vstring:
561b68a9 11082 sv = newSV(5); /* preallocate storage space */
ecabb004
FC
11083 ENTER_with_name("scan_vstring");
11084 SAVEFREESV(sv);
65b06e02 11085 s = scan_vstring(s, PL_bufend, sv);
ecabb004
FC
11086 SvREFCNT_inc_simple_void_NN(sv);
11087 LEAVE_with_name("scan_vstring");
a7cb1f99 11088 break;
79072805 11089 }
a687059c 11090
02aa26ce
NT
11091 /* make the op for the constant and return */
11092
a86a20aa 11093 if (sv)
b73d6f50 11094 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 11095 else
5f66b61c 11096 lvalp->opval = NULL;
a687059c 11097
73d840c0 11098 return (char *)s;
378cc40b
LW
11099}
11100
76e3520e 11101STATIC char *
5aaab254 11102S_scan_formline(pTHX_ char *s)
378cc40b 11103{
97aff369 11104 dVAR;
eb578fdb
KW
11105 char *eol;
11106 char *t;
6136c704 11107 SV * const stuff = newSVpvs("");
79072805 11108 bool needargs = FALSE;
c5ee2135 11109 bool eofmt = FALSE;
5db06880
NC
11110#ifdef PERL_MAD
11111 char *tokenstart = s;
4f61fd4b
JC
11112 SV* savewhite = NULL;
11113
5db06880 11114 if (PL_madskills) {
cd81e915
NC
11115 savewhite = PL_thiswhite;
11116 PL_thiswhite = 0;
5db06880
NC
11117 }
11118#endif
378cc40b 11119
7918f24d
NC
11120 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11121
79072805 11122 while (!needargs) {
a1b95068 11123 if (*s == '.') {
c35e046a 11124 t = s+1;
51882d45 11125#ifdef PERL_STRICT_CR
c35e046a
AL
11126 while (SPACE_OR_TAB(*t))
11127 t++;
51882d45 11128#else
c35e046a
AL
11129 while (SPACE_OR_TAB(*t) || *t == '\r')
11130 t++;
51882d45 11131#endif
c5ee2135
WL
11132 if (*t == '\n' || t == PL_bufend) {
11133 eofmt = TRUE;
79072805 11134 break;
c5ee2135 11135 }
79072805 11136 }
583c9d5c
FC
11137 eol = (char *) memchr(s,'\n',PL_bufend-s);
11138 if (!eol++)
3280af22 11139 eol = PL_bufend;
79072805 11140 if (*s != '#') {
a0d0e21e
LW
11141 for (t = s; t < eol; t++) {
11142 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11143 needargs = FALSE;
11144 goto enough; /* ~~ must be first line in formline */
378cc40b 11145 }
a0d0e21e
LW
11146 if (*t == '@' || *t == '^')
11147 needargs = TRUE;
378cc40b 11148 }
7121b347
MG
11149 if (eol > s) {
11150 sv_catpvn(stuff, s, eol-s);
2dc4c65b 11151#ifndef PERL_STRICT_CR
7121b347
MG
11152 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11153 char *end = SvPVX(stuff) + SvCUR(stuff);
11154 end[-2] = '\n';
11155 end[-1] = '\0';
b162af07 11156 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 11157 }
2dc4c65b 11158#endif
7121b347
MG
11159 }
11160 else
11161 break;
79072805 11162 }
95a20fc0 11163 s = (char*)eol;
583c9d5c
FC
11164 if ((PL_rsfp || PL_parser->filtered)
11165 && PL_parser->form_lex_state == LEX_NORMAL) {
f0e67a1d 11166 bool got_some;
5db06880
NC
11167#ifdef PERL_MAD
11168 if (PL_madskills) {
cd81e915
NC
11169 if (PL_thistoken)
11170 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 11171 else
cd81e915 11172 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
11173 }
11174#endif
f0e67a1d 11175 PL_bufptr = PL_bufend;
83944c01 11176 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
11177 got_some = lex_next_chunk(0);
11178 CopLINE_dec(PL_curcop);
11179 s = PL_bufptr;
5db06880 11180#ifdef PERL_MAD
f0e67a1d 11181 tokenstart = PL_bufptr;
5db06880 11182#endif
f0e67a1d 11183 if (!got_some)
378cc40b 11184 break;
378cc40b 11185 }
463ee0b2 11186 incline(s);
79072805 11187 }
a0d0e21e 11188 enough:
5c9ae74d
FC
11189 if (!SvCUR(stuff) || needargs)
11190 PL_lex_state = PL_parser->form_lex_state;
a0d0e21e 11191 if (SvCUR(stuff)) {
705fe0e5 11192 PL_expect = XSTATE;
79072805 11193 if (needargs) {
cd81e915 11194 start_force(PL_curforce);
9ded7720 11195 NEXTVAL_NEXTTOKE.ival = 0;
705fe0e5 11196 force_next(FORMLBRACK);
79072805 11197 }
1bd51a4c 11198 if (!IN_BYTES) {
95a20fc0 11199 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
11200 SvUTF8_on(stuff);
11201 else if (PL_encoding)
11202 sv_recode_to_utf8(stuff, PL_encoding);
11203 }
cd81e915 11204 start_force(PL_curforce);
9ded7720 11205 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 11206 force_next(THING);
378cc40b 11207 }
79072805 11208 else {
8990e307 11209 SvREFCNT_dec(stuff);
c5ee2135
WL
11210 if (eofmt)
11211 PL_lex_formbrack = 0;
79072805 11212 }
5db06880
NC
11213#ifdef PERL_MAD
11214 if (PL_madskills) {
cd81e915
NC
11215 if (PL_thistoken)
11216 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 11217 else
cd81e915
NC
11218 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11219 PL_thiswhite = savewhite;
5db06880
NC
11220 }
11221#endif
79072805 11222 return s;
378cc40b 11223}
a687059c 11224
ba6d6ac9 11225I32
864dbfa3 11226Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 11227{
97aff369 11228 dVAR;
a3b680e6 11229 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 11230 CV* const outsidecv = PL_compcv;
8990e307 11231
7766f137 11232 SAVEI32(PL_subline);
3280af22 11233 save_item(PL_subname);
3280af22 11234 SAVESPTR(PL_compcv);
3280af22 11235
ea726b52 11236 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
11237 CvFLAGS(PL_compcv) |= flags;
11238
57843af0 11239 PL_subline = CopLINE(PL_curcop);
dd2155a4 11240 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 11241 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 11242 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
db4cf31d 11243 if (outsidecv && CvPADLIST(outsidecv))
8771da69
FC
11244 CvPADLIST(PL_compcv)->xpadl_outid =
11245 PadlistNAMES(CvPADLIST(outsidecv));
748a9306 11246
8990e307
LW
11247 return oldsavestack_ix;
11248}
11249
084592ab
CN
11250#ifdef __SC__
11251#pragma segment Perl_yylex
11252#endif
af41e527 11253static int
19c62481 11254S_yywarn(pTHX_ const char *const s, U32 flags)
8990e307 11255{
97aff369 11256 dVAR;
7918f24d
NC
11257
11258 PERL_ARGS_ASSERT_YYWARN;
11259
faef0170 11260 PL_in_eval |= EVAL_WARNONLY;
19c62481 11261 yyerror_pv(s, flags);
faef0170 11262 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 11263 return 0;
8990e307
LW
11264}
11265
11266int
15f169a1 11267Perl_yyerror(pTHX_ const char *const s)
463ee0b2 11268{
19c62481
BF
11269 PERL_ARGS_ASSERT_YYERROR;
11270 return yyerror_pvn(s, strlen(s), 0);
11271}
11272
11273int
11274Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11275{
11276 PERL_ARGS_ASSERT_YYERROR_PV;
11277 return yyerror_pvn(s, strlen(s), flags);
11278}
11279
11280int
19c62481
BF
11281Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11282{
97aff369 11283 dVAR;
bfed75c6 11284 const char *context = NULL;
68dc0745 11285 int contlen = -1;
46fc3d4c 11286 SV *msg;
19c62481 11287 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
5912531f 11288 int yychar = PL_parser->yychar;
463ee0b2 11289
19c62481 11290 PERL_ARGS_ASSERT_YYERROR_PVN;
7918f24d 11291
3280af22 11292 if (!yychar || (yychar == ';' && !PL_rsfp))
19c62481 11293 sv_catpvs(where_sv, "at EOF");
8bcfe651
TM
11294 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11295 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11296 PL_oldbufptr != PL_bufptr) {
f355267c
JH
11297 /*
11298 Only for NetWare:
11299 The code below is removed for NetWare because it abends/crashes on NetWare
11300 when the script has error such as not having the closing quotes like:
11301 if ($var eq "value)
11302 Checking of white spaces is anyway done in NetWare code.
11303 */
11304#ifndef NETWARE
3280af22
NIS
11305 while (isSPACE(*PL_oldoldbufptr))
11306 PL_oldoldbufptr++;
f355267c 11307#endif
3280af22
NIS
11308 context = PL_oldoldbufptr;
11309 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 11310 }
8bcfe651
TM
11311 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11312 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
11313 /*
11314 Only for NetWare:
11315 The code below is removed for NetWare because it abends/crashes on NetWare
11316 when the script has error such as not having the closing quotes like:
11317 if ($var eq "value)
11318 Checking of white spaces is anyway done in NetWare code.
11319 */
11320#ifndef NETWARE
3280af22
NIS
11321 while (isSPACE(*PL_oldbufptr))
11322 PL_oldbufptr++;
f355267c 11323#endif
3280af22
NIS
11324 context = PL_oldbufptr;
11325 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
11326 }
11327 else if (yychar > 255)
19c62481 11328 sv_catpvs(where_sv, "next token ???");
12fbd33b 11329 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
11330 if (PL_lex_state == LEX_NORMAL ||
11331 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
19c62481 11332 sv_catpvs(where_sv, "at end of line");
3280af22 11333 else if (PL_lex_inpat)
19c62481 11334 sv_catpvs(where_sv, "within pattern");
463ee0b2 11335 else
19c62481 11336 sv_catpvs(where_sv, "within string");
463ee0b2 11337 }
46fc3d4c 11338 else {
19c62481 11339 sv_catpvs(where_sv, "next char ");
46fc3d4c 11340 if (yychar < 32)
cea2e8a9 11341 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 11342 else if (isPRINT_LC(yychar)) {
88c9ea1e 11343 const char string = yychar;
5e7aa789
NC
11344 sv_catpvn(where_sv, &string, 1);
11345 }
463ee0b2 11346 else
cea2e8a9 11347 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
463ee0b2 11348 }
b604e366 11349 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
ed094faf 11350 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 11351 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 11352 if (context)
19c62481
BF
11353 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11354 SVfARG(newSVpvn_flags(context, contlen,
11355 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
463ee0b2 11356 else
19c62481 11357 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
57843af0 11358 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 11359 Perl_sv_catpvf(aTHX_ msg,
57def98f 11360 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 11361 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 11362 PL_multi_end = 0;
a0d0e21e 11363 }
500960a6 11364 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 11365 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 11366 }
463ee0b2 11367 else
5a844595 11368 qerror(msg);
c7d6bfb2 11369 if (PL_error_count >= 10) {
eed484f9
DD
11370 SV * errsv;
11371 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
d2560b70 11372 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
eed484f9 11373 SVfARG(errsv), OutCopFILE(PL_curcop));
c7d6bfb2
GS
11374 else
11375 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 11376 OutCopFILE(PL_curcop));
c7d6bfb2 11377 }
3280af22 11378 PL_in_my = 0;
5c284bb0 11379 PL_in_my_stash = NULL;
463ee0b2
LW
11380 return 0;
11381}
084592ab
CN
11382#ifdef __SC__
11383#pragma segment Main
11384#endif
4e35701f 11385
b250498f 11386STATIC char*
3ae08724 11387S_swallow_bom(pTHX_ U8 *s)
01ec43d0 11388{
97aff369 11389 dVAR;
f54cb97a 11390 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
11391
11392 PERL_ARGS_ASSERT_SWALLOW_BOM;
11393
7aa207d6 11394 switch (s[0]) {
4e553d73
NIS
11395 case 0xFF:
11396 if (s[1] == 0xFE) {
ee6ba15d 11397 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 11398 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 11399 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11400 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 11401#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 11402 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 11403 s += 2;
dea0fc0b 11404 if (PL_bufend > (char*)s) {
81a923f4 11405 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 11406 }
b250498f 11407#else
dcbac5bb 11408 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11409 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 11410#endif
01ec43d0
GS
11411 }
11412 break;
78ae23f5 11413 case 0xFE:
7aa207d6 11414 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 11415#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11416 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
11417 s += 2;
11418 if (PL_bufend > (char *)s) {
81a923f4 11419 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 11420 }
b250498f 11421#else
dcbac5bb 11422 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11423 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 11424#endif
01ec43d0
GS
11425 }
11426 break;
3ae08724
GS
11427 case 0xEF:
11428 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 11429 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
11430 s += 3; /* UTF-8 */
11431 }
11432 break;
11433 case 0:
7aa207d6
JH
11434 if (slen > 3) {
11435 if (s[1] == 0) {
11436 if (s[2] == 0xFE && s[3] == 0xFF) {
11437 /* UTF-32 big-endian */
dcbac5bb 11438 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11439 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
11440 }
11441 }
11442 else if (s[2] == 0 && s[3] != 0) {
11443 /* Leading bytes
11444 * 00 xx 00 xx
11445 * are a good indicator of UTF-16BE. */
ee6ba15d 11446#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11447 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
11448 s = add_utf16_textfilter(s, FALSE);
11449#else
dcbac5bb 11450 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11451 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11452#endif
7aa207d6 11453 }
01ec43d0 11454 }
e294cc5d
JH
11455#ifdef EBCDIC
11456 case 0xDD:
11457 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11458 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11459 s += 4; /* UTF-8 */
11460 }
11461 break;
11462#endif
11463
7aa207d6
JH
11464 default:
11465 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11466 /* Leading bytes
11467 * xx 00 xx 00
11468 * are a good indicator of UTF-16LE. */
ee6ba15d 11469#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11470 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 11471 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 11472#else
dcbac5bb 11473 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11474 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11475#endif
7aa207d6 11476 }
01ec43d0 11477 }
b8f84bb2 11478 return (char*)s;
b250498f 11479}
4755096e 11480
6e3aabd6
GS
11481
11482#ifndef PERL_NO_UTF16_FILTER
11483static I32
a28af015 11484S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 11485{
97aff369 11486 dVAR;
f3040f2c 11487 SV *const filter = FILTER_DATA(idx);
2a773401
NC
11488 /* We re-use this each time round, throwing the contents away before we
11489 return. */
2a773401 11490 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 11491 SV *const utf8_buffer = filter;
c28d6105 11492 IV status = IoPAGE(filter);
f2338a2e 11493 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 11494 I32 retval;
c8b0cbae 11495
c85ae797
NC
11496 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11497
c8b0cbae
NC
11498 /* As we're automatically added, at the lowest level, and hence only called
11499 from this file, we can be sure that we're not called in block mode. Hence
11500 don't bother writing code to deal with block mode. */
11501 if (maxlen) {
11502 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11503 }
c28d6105
NC
11504 if (status < 0) {
11505 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11506 }
1de9afcd 11507 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 11508 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 11509 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
11510 reverse ? 'l' : 'b', idx, maxlen, status,
11511 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11512
11513 while (1) {
11514 STRLEN chars;
11515 STRLEN have;
dea0fc0b 11516 I32 newlen;
2a773401 11517 U8 *end;
c28d6105
NC
11518 /* First, look in our buffer of existing UTF-8 data: */
11519 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11520
11521 if (nl) {
11522 ++nl;
11523 } else if (status == 0) {
11524 /* EOF */
11525 IoPAGE(filter) = 0;
11526 nl = SvEND(utf8_buffer);
11527 }
11528 if (nl) {
d2d1d4de
NC
11529 STRLEN got = nl - SvPVX(utf8_buffer);
11530 /* Did we have anything to append? */
11531 retval = got != 0;
11532 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
11533 /* Everything else in this code works just fine if SVp_POK isn't
11534 set. This, however, needs it, and we need it to work, else
11535 we loop infinitely because the buffer is never consumed. */
11536 sv_chop(utf8_buffer, nl);
11537 break;
11538 }
ba77e4cc 11539
c28d6105
NC
11540 /* OK, not a complete line there, so need to read some more UTF-16.
11541 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
11542 while (1) {
11543 if (status <= 0)
11544 break;
11545 if (SvCUR(utf16_buffer) >= 2) {
11546 /* Location of the high octet of the last complete code point.
11547 Gosh, UTF-16 is a pain. All the benefits of variable length,
11548 *coupled* with all the benefits of partial reads and
11549 endianness. */
11550 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11551 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11552
11553 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11554 break;
11555 }
11556
11557 /* We have the first half of a surrogate. Read more. */
11558 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11559 }
c28d6105 11560
c28d6105
NC
11561 status = FILTER_READ(idx + 1, utf16_buffer,
11562 160 + (SvCUR(utf16_buffer) & 1));
11563 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11564 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11565 if (status < 0) {
11566 /* Error */
11567 IoPAGE(filter) = status;
11568 return status;
11569 }
11570 }
11571
11572 chars = SvCUR(utf16_buffer) >> 1;
11573 have = SvCUR(utf8_buffer);
11574 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11575
aa6dbd60 11576 if (reverse) {
c28d6105
NC
11577 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11578 (U8*)SvPVX_const(utf8_buffer) + have,
11579 chars * 2, &newlen);
aa6dbd60 11580 } else {
2a773401 11581 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11582 (U8*)SvPVX_const(utf8_buffer) + have,
11583 chars * 2, &newlen);
2a773401 11584 }
c28d6105 11585 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11586 *end = '\0';
c28d6105 11587
e07286ed
NC
11588 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11589 it's private to us, and utf16_to_utf8{,reversed} take a
11590 (pointer,length) pair, rather than a NUL-terminated string. */
11591 if(SvCUR(utf16_buffer) & 1) {
11592 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11593 SvCUR_set(utf16_buffer, 1);
11594 } else {
11595 SvCUR_set(utf16_buffer, 0);
11596 }
2a773401 11597 }
c28d6105
NC
11598 DEBUG_P(PerlIO_printf(Perl_debug_log,
11599 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11600 status,
11601 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11602 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11603 return retval;
6e3aabd6 11604}
81a923f4
NC
11605
11606static U8 *
11607S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11608{
2a773401 11609 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11610
c85ae797
NC
11611 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11612
c28d6105 11613 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11614 sv_setpvs(filter, "");
2a773401 11615 IoLINES(filter) = reversed;
c28d6105
NC
11616 IoPAGE(filter) = 1; /* Not EOF */
11617
11618 /* Sadly, we have to return a valid pointer, come what may, so we have to
11619 ignore any error return from this. */
11620 SvCUR_set(PL_linestr, 0);
11621 if (FILTER_READ(0, PL_linestr, 0)) {
11622 SvUTF8_on(PL_linestr);
81a923f4 11623 } else {
c28d6105 11624 SvUTF8_on(PL_linestr);
81a923f4 11625 }
c28d6105 11626 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11627 return (U8*)SvPVX(PL_linestr);
11628}
6e3aabd6 11629#endif
9f4817db 11630
f333445c
JP
11631/*
11632Returns a pointer to the next character after the parsed
11633vstring, as well as updating the passed in sv.
11634
11635Function must be called like
11636
615e0a48 11637 sv = sv_2mortal(newSV(5));
65b06e02 11638 s = scan_vstring(s,e,sv);
f333445c 11639
65b06e02 11640where s and e are the start and end of the string.
f333445c
JP
11641The sv should already be large enough to store the vstring
11642passed in, for performance reasons.
11643
615e0a48
FC
11644This function may croak if fatal warnings are enabled in the
11645calling scope, hence the sv_2mortal in the example (to prevent
11646a leak). Make sure to do SvREFCNT_inc afterwards if you use
11647sv_2mortal.
11648
f333445c
JP
11649*/
11650
11651char *
15f169a1 11652Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11653{
97aff369 11654 dVAR;
bfed75c6
AL
11655 const char *pos = s;
11656 const char *start = s;
7918f24d
NC
11657
11658 PERL_ARGS_ASSERT_SCAN_VSTRING;
11659
f333445c 11660 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11661 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11662 pos++;
f333445c
JP
11663 if ( *pos != '.') {
11664 /* this may not be a v-string if followed by => */
bfed75c6 11665 const char *next = pos;
65b06e02 11666 while (next < e && isSPACE(*next))
8fc7bb1c 11667 ++next;
65b06e02 11668 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11669 /* return string not v-string */
11670 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11671 return (char *)pos;
f333445c
JP
11672 }
11673 }
11674
11675 if (!isALPHA(*pos)) {
89ebb4a3 11676 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11677
d4c19fe8
AL
11678 if (*s == 'v')
11679 s++; /* get past 'v' */
f333445c 11680
76f68e9b 11681 sv_setpvs(sv, "");
f333445c
JP
11682
11683 for (;;) {
d4c19fe8 11684 /* this is atoi() that tolerates underscores */
0bd48802
AL
11685 U8 *tmpend;
11686 UV rev = 0;
d4c19fe8
AL
11687 const char *end = pos;
11688 UV mult = 1;
11689 while (--end >= s) {
11690 if (*end != '_') {
11691 const UV orev = rev;
f333445c
JP
11692 rev += (*end - '0') * mult;
11693 mult *= 10;
9b387841 11694 if (orev > rev)
dcbac5bb 11695 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11696 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11697 "Integer overflow in decimal number");
f333445c
JP
11698 }
11699 }
11700#ifdef EBCDIC
11701 if (rev > 0x7FFFFFFF)
11702 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11703#endif
11704 /* Append native character for the rev point */
11705 tmpend = uvchr_to_utf8(tmpbuf, rev);
11706 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11707 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11708 SvUTF8_on(sv);
65b06e02 11709 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11710 s = ++pos;
11711 else {
11712 s = pos;
11713 break;
11714 }
65b06e02 11715 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11716 pos++;
11717 }
11718 SvPOK_on(sv);
11719 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11720 SvRMAGICAL_on(sv);
11721 }
73d840c0 11722 return (char *)s;
f333445c
JP
11723}
11724
88e1f1a2
JV
11725int
11726Perl_keyword_plugin_standard(pTHX_
11727 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11728{
11729 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11730 PERL_UNUSED_CONTEXT;
11731 PERL_UNUSED_ARG(keyword_ptr);
11732 PERL_UNUSED_ARG(keyword_len);
11733 PERL_UNUSED_ARG(op_ptr);
11734 return KEYWORD_PLUGIN_DECLINE;
11735}
11736
78cdf107 11737#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11738static void
78cdf107 11739S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11740{
11741 SAVEI32(PL_lex_brackets);
11742 if (PL_lex_brackets > 100)
11743 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11744 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11745 SAVEI32(PL_lex_allbrackets);
11746 PL_lex_allbrackets = 0;
11747 SAVEI8(PL_lex_fakeeof);
2dcac756 11748 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11749 if(yyparse(gramtype) && !PL_parser->error_count)
11750 qerror(Perl_mess(aTHX_ "Parse error"));
11751}
11752
78cdf107 11753#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11754static OP *
78cdf107 11755S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11756{
11757 OP *o;
11758 ENTER;
11759 SAVEVPTR(PL_eval_root);
11760 PL_eval_root = NULL;
78cdf107 11761 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11762 o = PL_eval_root;
11763 LEAVE;
11764 return o;
11765}
11766
78cdf107
Z
11767#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11768static OP *
11769S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11770{
11771 OP *exprop;
11772 if (flags & ~PARSE_OPTIONAL)
11773 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11774 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11775 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11776 if (!PL_parser->error_count)
11777 qerror(Perl_mess(aTHX_ "Parse error"));
11778 exprop = newOP(OP_NULL, 0);
11779 }
11780 return exprop;
11781}
11782
11783/*
11784=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11785
11786Parse a Perl arithmetic expression. This may contain operators of precedence
11787down to the bit shift operators. The expression must be followed (and thus
11788terminated) either by a comparison or lower-precedence operator or by
11789something that would normally terminate an expression such as semicolon.
11790If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11791otherwise it is mandatory. It is up to the caller to ensure that the
11792dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11793the source of the code to be parsed and the lexical context for the
11794expression.
11795
11796The op tree representing the expression is returned. If an optional
11797expression is absent, a null pointer is returned, otherwise the pointer
11798will be non-null.
11799
11800If an error occurs in parsing or compilation, in most cases a valid op
11801tree is returned anyway. The error is reflected in the parser state,
11802normally resulting in a single exception at the top level of parsing
11803which covers all the compilation errors that occurred. Some compilation
11804errors, however, will throw an exception immediately.
11805
11806=cut
11807*/
11808
11809OP *
11810Perl_parse_arithexpr(pTHX_ U32 flags)
11811{
11812 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11813}
11814
11815/*
11816=for apidoc Amx|OP *|parse_termexpr|U32 flags
11817
11818Parse a Perl term expression. This may contain operators of precedence
11819down to the assignment operators. The expression must be followed (and thus
11820terminated) either by a comma or lower-precedence operator or by
11821something that would normally terminate an expression such as semicolon.
11822If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11823otherwise it is mandatory. It is up to the caller to ensure that the
11824dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11825the source of the code to be parsed and the lexical context for the
11826expression.
11827
11828The op tree representing the expression is returned. If an optional
11829expression is absent, a null pointer is returned, otherwise the pointer
11830will be non-null.
11831
11832If an error occurs in parsing or compilation, in most cases a valid op
11833tree is returned anyway. The error is reflected in the parser state,
11834normally resulting in a single exception at the top level of parsing
11835which covers all the compilation errors that occurred. Some compilation
11836errors, however, will throw an exception immediately.
11837
11838=cut
11839*/
11840
11841OP *
11842Perl_parse_termexpr(pTHX_ U32 flags)
11843{
11844 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11845}
11846
11847/*
11848=for apidoc Amx|OP *|parse_listexpr|U32 flags
11849
11850Parse a Perl list expression. This may contain operators of precedence
11851down to the comma operator. The expression must be followed (and thus
11852terminated) either by a low-precedence logic operator such as C<or> or by
11853something that would normally terminate an expression such as semicolon.
11854If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11855otherwise it is mandatory. It is up to the caller to ensure that the
11856dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11857the source of the code to be parsed and the lexical context for the
11858expression.
11859
11860The op tree representing the expression is returned. If an optional
11861expression is absent, a null pointer is returned, otherwise the pointer
11862will be non-null.
11863
11864If an error occurs in parsing or compilation, in most cases a valid op
11865tree is returned anyway. The error is reflected in the parser state,
11866normally resulting in a single exception at the top level of parsing
11867which covers all the compilation errors that occurred. Some compilation
11868errors, however, will throw an exception immediately.
11869
11870=cut
11871*/
11872
11873OP *
11874Perl_parse_listexpr(pTHX_ U32 flags)
11875{
11876 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11877}
11878
11879/*
11880=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11881
11882Parse a single complete Perl expression. This allows the full
11883expression grammar, including the lowest-precedence operators such
11884as C<or>. The expression must be followed (and thus terminated) by a
11885token that an expression would normally be terminated by: end-of-file,
11886closing bracketing punctuation, semicolon, or one of the keywords that
11887signals a postfix expression-statement modifier. If I<flags> includes
11888C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11889mandatory. It is up to the caller to ensure that the dynamic parser
11890state (L</PL_parser> et al) is correctly set to reflect the source of
11891the code to be parsed and the lexical context for the expression.
11892
11893The op tree representing the expression is returned. If an optional
11894expression is absent, a null pointer is returned, otherwise the pointer
11895will be non-null.
11896
11897If an error occurs in parsing or compilation, in most cases a valid op
11898tree is returned anyway. The error is reflected in the parser state,
11899normally resulting in a single exception at the top level of parsing
11900which covers all the compilation errors that occurred. Some compilation
11901errors, however, will throw an exception immediately.
11902
11903=cut
11904*/
11905
11906OP *
11907Perl_parse_fullexpr(pTHX_ U32 flags)
11908{
11909 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11910}
11911
e53d8f76
Z
11912/*
11913=for apidoc Amx|OP *|parse_block|U32 flags
11914
11915Parse a single complete Perl code block. This consists of an opening
11916brace, a sequence of statements, and a closing brace. The block
11917constitutes a lexical scope, so C<my> variables and various compile-time
11918effects can be contained within it. It is up to the caller to ensure
11919that the dynamic parser state (L</PL_parser> et al) is correctly set to
11920reflect the source of the code to be parsed and the lexical context for
11921the statement.
11922
11923The op tree representing the code block is returned. This is always a
11924real op, never a null pointer. It will normally be a C<lineseq> list,
11925including C<nextstate> or equivalent ops. No ops to construct any kind
11926of runtime scope are included by virtue of it being a block.
11927
11928If an error occurs in parsing or compilation, in most cases a valid op
11929tree (most likely null) is returned anyway. The error is reflected in
11930the parser state, normally resulting in a single exception at the top
11931level of parsing which covers all the compilation errors that occurred.
11932Some compilation errors, however, will throw an exception immediately.
11933
11934The I<flags> parameter is reserved for future use, and must always
11935be zero.
11936
11937=cut
11938*/
11939
11940OP *
11941Perl_parse_block(pTHX_ U32 flags)
11942{
11943 if (flags)
11944 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11945 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11946}
11947
1da4ca5f 11948/*
8359b381
Z
11949=for apidoc Amx|OP *|parse_barestmt|U32 flags
11950
11951Parse a single unadorned Perl statement. This may be a normal imperative
11952statement or a declaration that has compile-time effect. It does not
11953include any label or other affixture. It is up to the caller to ensure
11954that the dynamic parser state (L</PL_parser> et al) is correctly set to
11955reflect the source of the code to be parsed and the lexical context for
11956the statement.
11957
11958The op tree representing the statement is returned. This may be a
11959null pointer if the statement is null, for example if it was actually
11960a subroutine definition (which has compile-time side effects). If not
11961null, it will be ops directly implementing the statement, suitable to
11962pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11963equivalent op (except for those embedded in a scope contained entirely
11964within the statement).
11965
11966If an error occurs in parsing or compilation, in most cases a valid op
11967tree (most likely null) is returned anyway. The error is reflected in
11968the parser state, normally resulting in a single exception at the top
11969level of parsing which covers all the compilation errors that occurred.
11970Some compilation errors, however, will throw an exception immediately.
11971
11972The I<flags> parameter is reserved for future use, and must always
11973be zero.
11974
11975=cut
11976*/
11977
11978OP *
11979Perl_parse_barestmt(pTHX_ U32 flags)
11980{
11981 if (flags)
11982 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11983 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11984}
11985
11986/*
361d9b55
Z
11987=for apidoc Amx|SV *|parse_label|U32 flags
11988
11989Parse a single label, possibly optional, of the type that may prefix a
11990Perl statement. It is up to the caller to ensure that the dynamic parser
11991state (L</PL_parser> et al) is correctly set to reflect the source of
11992the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11993label is optional, otherwise it is mandatory.
11994
11995The name of the label is returned in the form of a fresh scalar. If an
11996optional label is absent, a null pointer is returned.
11997
11998If an error occurs in parsing, which can only occur if the label is
11999mandatory, a valid label is returned anyway. The error is reflected in
12000the parser state, normally resulting in a single exception at the top
12001level of parsing which covers all the compilation errors that occurred.
12002
12003=cut
12004*/
12005
12006SV *
12007Perl_parse_label(pTHX_ U32 flags)
12008{
12009 if (flags & ~PARSE_OPTIONAL)
12010 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12011 if (PL_lex_state == LEX_KNOWNEXT) {
12012 PL_parser->yychar = yylex();
12013 if (PL_parser->yychar == LABEL) {
5504e6cf
FC
12014 char * const lpv = pl_yylval.pval;
12015 STRLEN llen = strlen(lpv);
361d9b55 12016 PL_parser->yychar = YYEMPTY;
5504e6cf 12017 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
361d9b55
Z
12018 } else {
12019 yyunlex();
12020 goto no_label;
12021 }
12022 } else {
12023 char *s, *t;
361d9b55
Z
12024 STRLEN wlen, bufptr_pos;
12025 lex_read_space(0);
12026 t = s = PL_bufptr;
5db1eb8d 12027 if (!isIDFIRST_lazy_if(s, UTF))
361d9b55 12028 goto no_label;
5db1eb8d 12029 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
361d9b55
Z
12030 if (word_takes_any_delimeter(s, wlen))
12031 goto no_label;
12032 bufptr_pos = s - SvPVX(PL_linestr);
12033 PL_bufptr = t;
12034 lex_read_space(LEX_KEEP_PREVIOUS);
12035 t = PL_bufptr;
12036 s = SvPVX(PL_linestr) + bufptr_pos;
12037 if (t[0] == ':' && t[1] != ':') {
12038 PL_oldoldbufptr = PL_oldbufptr;
12039 PL_oldbufptr = s;
12040 PL_bufptr = t+1;
5db1eb8d 12041 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
361d9b55
Z
12042 } else {
12043 PL_bufptr = s;
12044 no_label:
12045 if (flags & PARSE_OPTIONAL) {
12046 return NULL;
12047 } else {
12048 qerror(Perl_mess(aTHX_ "Parse error"));
12049 return newSVpvs("x");
12050 }
12051 }
12052 }
12053}
12054
12055/*
28ac2b49
Z
12056=for apidoc Amx|OP *|parse_fullstmt|U32 flags
12057
12058Parse a single complete Perl statement. This may be a normal imperative
8359b381 12059statement or a declaration that has compile-time effect, and may include
8e720305 12060optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
12061parser state (L</PL_parser> et al) is correctly set to reflect the source
12062of the code to be parsed and the lexical context for the statement.
12063
12064The op tree representing the statement is returned. This may be a
12065null pointer if the statement is null, for example if it was actually
12066a subroutine definition (which has compile-time side effects). If not
12067null, it will be the result of a L</newSTATEOP> call, normally including
12068a C<nextstate> or equivalent op.
12069
12070If an error occurs in parsing or compilation, in most cases a valid op
12071tree (most likely null) is returned anyway. The error is reflected in
12072the parser state, normally resulting in a single exception at the top
12073level of parsing which covers all the compilation errors that occurred.
12074Some compilation errors, however, will throw an exception immediately.
12075
12076The I<flags> parameter is reserved for future use, and must always
12077be zero.
12078
12079=cut
12080*/
12081
12082OP *
12083Perl_parse_fullstmt(pTHX_ U32 flags)
12084{
28ac2b49
Z
12085 if (flags)
12086 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 12087 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
12088}
12089
07ffcb73
Z
12090/*
12091=for apidoc Amx|OP *|parse_stmtseq|U32 flags
12092
12093Parse a sequence of zero or more Perl statements. These may be normal
12094imperative statements, including optional labels, or declarations
12095that have compile-time effect, or any mixture thereof. The statement
12096sequence ends when a closing brace or end-of-file is encountered in a
12097place where a new statement could have validly started. It is up to
12098the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12099is correctly set to reflect the source of the code to be parsed and the
12100lexical context for the statements.
12101
12102The op tree representing the statement sequence is returned. This may
12103be a null pointer if the statements were all null, for example if there
12104were no statements or if there were only subroutine definitions (which
12105have compile-time side effects). If not null, it will be a C<lineseq>
12106list, normally including C<nextstate> or equivalent ops.
12107
12108If an error occurs in parsing or compilation, in most cases a valid op
12109tree is returned anyway. The error is reflected in the parser state,
12110normally resulting in a single exception at the top level of parsing
12111which covers all the compilation errors that occurred. Some compilation
12112errors, however, will throw an exception immediately.
12113
12114The I<flags> parameter is reserved for future use, and must always
12115be zero.
12116
12117=cut
12118*/
12119
12120OP *
12121Perl_parse_stmtseq(pTHX_ U32 flags)
12122{
12123 OP *stmtseqop;
e53d8f76 12124 I32 c;
07ffcb73 12125 if (flags)
78cdf107
Z
12126 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12127 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
12128 c = lex_peek_unichar(0);
12129 if (c != -1 && c != /*{*/'}')
07ffcb73 12130 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
12131 return stmtseqop;
12132}
12133
28ac2b49 12134/*
1da4ca5f
NC
12135 * Local variables:
12136 * c-indentation-style: bsd
12137 * c-basic-offset: 4
14d04a33 12138 * indent-tabs-mode: nil
1da4ca5f
NC
12139 * End:
12140 *
14d04a33 12141 * ex: set ts=8 sts=4 sw=4 et:
37442d52 12142 */