This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
bisect-runner.pl should match patches with directory 'b' before 'a'.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
199e78b7
DM
69#define PL_preambled (PL_parser->preambled)
70#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 71#define PL_linestr (PL_parser->linestr)
c2598295
DM
72#define PL_expect (PL_parser->expect)
73#define PL_copline (PL_parser->copline)
f06b5848
DM
74#define PL_bufptr (PL_parser->bufptr)
75#define PL_oldbufptr (PL_parser->oldbufptr)
76#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77#define PL_linestart (PL_parser->linestart)
78#define PL_bufend (PL_parser->bufend)
79#define PL_last_uni (PL_parser->last_uni)
80#define PL_last_lop (PL_parser->last_lop)
81#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 82#define PL_lex_state (PL_parser->lex_state)
2f9285f8 83#define PL_rsfp (PL_parser->rsfp)
5486870f 84#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
85#define PL_in_my (PL_parser->in_my)
86#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 87#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 88#define PL_multi_end (PL_parser->multi_end)
13765c85 89#define PL_error_count (PL_parser->error_count)
199e78b7
DM
90
91#ifdef PERL_MAD
92# define PL_endwhite (PL_parser->endwhite)
93# define PL_faketokens (PL_parser->faketokens)
94# define PL_lasttoke (PL_parser->lasttoke)
95# define PL_nextwhite (PL_parser->nextwhite)
96# define PL_realtokenstart (PL_parser->realtokenstart)
97# define PL_skipwhite (PL_parser->skipwhite)
98# define PL_thisclose (PL_parser->thisclose)
99# define PL_thismad (PL_parser->thismad)
100# define PL_thisopen (PL_parser->thisopen)
101# define PL_thisstuff (PL_parser->thisstuff)
102# define PL_thistoken (PL_parser->thistoken)
103# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_nexttoke (PL_parser->nexttoke)
106# define PL_curforce (PL_parser->curforce)
107#else
108# define PL_nexttoke (PL_parser->nexttoke)
109# define PL_nexttype (PL_parser->nexttype)
110# define PL_nextval (PL_parser->nextval)
199e78b7
DM
111#endif
112
a1894d81 113static const char* const ident_too_long = "Identifier too long";
8903cb82 114
29595ff2 115#ifdef PERL_MAD
29595ff2 116# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 117# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 118#else
5db06880 119# define CURMAD(slot,sv)
9ded7720 120# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
121#endif
122
a7aaec61
Z
123#define XENUMMASK 0x3f
124#define XFAKEEOF 0x40
125#define XFAKEBRACK 0x80
9059aa12 126
39e02b42
JH
127#ifdef USE_UTF8_SCRIPTS
128# define UTF (!IN_BYTES)
2b9d42f0 129#else
802a15e9 130# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 131#endif
a0ed51b3 132
b1fc3636
CJ
133/* The maximum number of characters preceding the unrecognized one to display */
134#define UNRECOGNIZED_PRECEDE_COUNT 10
135
61f0cdd9 136/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
137 * 1999-02-27 mjd-perl-patch@plover.com */
138#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139
14bd96d0 140#define SPACE_OR_TAB(c) isBLANK_A(c)
bf4acbe4 141
ffb4593c
NT
142/* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
79072805 144 * can get by with a single comparison (if the compiler is smart enough).
9da1dd8f
DM
145 *
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
79072805
LW
148 */
149
fb73857a 150/* #define LEX_NOTPARSING 11 is done in perl.h. */
151
b6007c36
DM
152#define LEX_NORMAL 10 /* normal code (ie not within "...") */
153#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156#define LEX_INTERPSTART 6 /* expecting the start of a $var */
157
158 /* at end of code, eg "$x" followed by: */
159#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161
162#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164#define LEX_INTERPCONST 2 /* NOT USED */
165#define LEX_FORMLINE 1 /* expecting a format line */
166#define LEX_KNOWNEXT 0 /* next token known; just return it */
167
79072805 168
bbf60fe6 169#ifdef DEBUGGING
27da23d5 170static const char* const lex_state_names[] = {
bbf60fe6
DM
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
79072805
LW
185#ifdef ff_next
186#undef ff_next
d48672a2
LW
187#endif
188
79072805 189#include "keywords.h"
fe14fcc3 190
ffb4593c
NT
191/* CLINE is a macro that ensures PL_copline has a sane value */
192
ae986130
LW
193#ifdef CLINE
194#undef CLINE
195#endif
57843af0 196#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 197
5db06880 198#ifdef PERL_MAD
29595ff2
NC
199# define SKIPSPACE0(s) skipspace0(s)
200# define SKIPSPACE1(s) skipspace1(s)
201# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202# define PEEKSPACE(s) skipspace2(s,0)
203#else
204# define SKIPSPACE0(s) skipspace(s)
205# define SKIPSPACE1(s) skipspace(s)
206# define SKIPSPACE2(s,tsv) skipspace(s)
207# define PEEKSPACE(s) skipspace(s)
208#endif
209
ffb4593c
NT
210/*
211 * Convenience functions to return different tokens and prime the
9cbb5ea2 212 * lexer for the next token. They all take an argument.
ffb4593c
NT
213 *
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
7eb971ee 224 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 255#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
d68ce4ac 272#define UNI3(f,x,have_x) { \
6154021b 273 pl_yylval.ival = f; \
d68ce4ac 274 if (have_x) PL_expect = x; \
376fcdbf
AL
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
d68ce4ac
FC
283#define UNI(f) UNI3(f,XTERM,1)
284#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
b5fb7ce3
FC
285#define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
287 OPERATOR(f); \
288 }
a687059c 289
d68ce4ac 290#define UNIBRACK(f) UNI3(f,0,0)
79072805 291
9f68db38 292/* grandfather return to old style */
78cdf107
Z
293#define OLDLOP(f) \
294 do { \
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
298 PL_expect = XTERM; \
299 PL_bufptr = s; \
300 return (int)LSTOP; \
301 } while(0)
79072805 302
83944c01
FC
303#define COPLINE_INC_WITH_HERELINES \
304 STMT_START { \
305 CopLINE_inc(PL_curcop); \
d794b522
FC
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
83944c01
FC
309 } STMT_END
310
311
8fa7f367
JH
312#ifdef DEBUGGING
313
6154021b 314/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
315enum token_type {
316 TOKENTYPE_NONE,
317 TOKENTYPE_IVAL,
6154021b 318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6 319 TOKENTYPE_PVAL,
aeaef349 320 TOKENTYPE_OPVAL
bbf60fe6
DM
321};
322
6d4a66ac
NC
323static struct debug_tokens {
324 const int token;
325 enum token_type type;
326 const char *name;
327} const debug_tokens[] =
9041c2e3 328{
bbf60fe6
DM
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
705fe0e5
FC
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
bbf60fe6
DM
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
5504e6cf 361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
bbf60fe6
DM
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
bbf60fe6
DM
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
f3f204dc 375 { PEG, TOKENTYPE_NONE, "PEG" },
88e1f1a2
JV
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
f3f204dc 385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
bbf60fe6
DM
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
f3f204dc 388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
bbf60fe6
DM
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 402 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
403};
404
6154021b 405/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 406
bbf60fe6 407STATIC int
704d4215 408S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 409{
97aff369 410 dVAR;
7918f24d
NC
411
412 PERL_ARGS_ASSERT_TOKEREPORT;
413
bbf60fe6 414 if (DEBUG_T_TEST) {
bd61b366 415 const char *name = NULL;
bbf60fe6 416 enum token_type type = TOKENTYPE_NONE;
f54cb97a 417 const struct debug_tokens *p;
396482e1 418 SV* const report = newSVpvs("<== ");
bbf60fe6 419
f54cb97a 420 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
421 if (p->token == (int)rv) {
422 name = p->name;
423 type = p->type;
424 break;
425 }
426 }
427 if (name)
54667de8 428 Perl_sv_catpv(aTHX_ report, name);
74736ae6 429 else if ((char)rv > ' ' && (char)rv <= '~')
bbf60fe6
DM
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
431 else if (!rv)
396482e1 432 sv_catpvs(report, "EOF");
bbf60fe6
DM
433 else
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
435 switch (type) {
436 case TOKENTYPE_NONE:
bbf60fe6
DM
437 break;
438 case TOKENTYPE_IVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 443 PL_op_name[lvalp->ival]);
bbf60fe6
DM
444 break;
445 case TOKENTYPE_PVAL:
704d4215 446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
447 break;
448 case TOKENTYPE_OPVAL:
704d4215 449 if (lvalp->opval) {
401441c0 450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 453 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 454 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
455 }
456
457 }
401441c0 458 else
396482e1 459 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
460 break;
461 }
b6007c36 462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
463 };
464 return (int)rv;
998054bd
SC
465}
466
b6007c36
DM
467
468/* print the buffer with suitable escapes */
469
470STATIC void
15f169a1 471S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 472{
396482e1 473 SV* const tmp = newSVpvs("");
7918f24d
NC
474
475 PERL_ARGS_ASSERT_PRINTBUF;
476
b6007c36
DM
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
478 SvREFCNT_dec(tmp);
479}
480
8fa7f367
JH
481#endif
482
8290c323
NC
483static int
484S_deprecate_commaless_var_list(pTHX) {
485 PL_expect = XTERM;
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
488}
489
ffb4593c
NT
490/*
491 * S_ao
492 *
c963b151
BD
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
495 */
496
76e3520e 497STATIC int
cea2e8a9 498S_ao(pTHX_ int toketype)
a0d0e21e 499{
97aff369 500 dVAR;
3280af22
NIS
501 if (*PL_bufptr == '=') {
502 PL_bufptr++;
a0d0e21e 503 if (toketype == ANDAND)
6154021b 504 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 505 else if (toketype == OROR)
6154021b 506 pl_yylval.ival = OP_ORASSIGN;
c963b151 507 else if (toketype == DORDOR)
6154021b 508 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
509 toketype = ASSIGNOP;
510 }
511 return toketype;
512}
513
ffb4593c
NT
514/*
515 * S_no_op
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
525 */
526
76e3520e 527STATIC void
15f169a1 528S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 529{
97aff369 530 dVAR;
9d4ba2ae
AL
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 533
7918f24d
NC
534 PERL_ARGS_ASSERT_NO_OP;
535
1189a94a
GS
536 if (!s)
537 s = oldbp;
07c798fb 538 else
1189a94a 539 PL_bufptr = s;
734ab321 540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
56da5a46
RGS
541 if (ckWARN_d(WARN_SYNTAX)) {
542 if (is_first)
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 546 const char *t;
8a2bca12 547 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
734ab321 548 t += UTF ? UTF8SKIP(t) : 1)
c35e046a 549 NOOP;
56da5a46
RGS
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46
RGS
555 }
556 else {
557 assert(s >= oldbp);
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46 562 }
07c798fb 563 }
3280af22 564 PL_bufptr = oldbp;
8990e307
LW
565}
566
ffb4593c
NT
567/*
568 * S_missingterm
569 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 570 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
573 * This is fatal.
574 */
575
76e3520e 576STATIC void
cea2e8a9 577S_missingterm(pTHX_ char *s)
8990e307 578{
97aff369 579 dVAR;
8990e307
LW
580 char tmpbuf[3];
581 char q;
582 if (s) {
9d4ba2ae 583 char * const nl = strrchr(s,'\n');
d2719217 584 if (nl)
8990e307
LW
585 *nl = '\0';
586 }
463559e7 587 else if (isCNTRL(PL_multi_close)) {
8990e307 588 *tmpbuf = '^';
585ec06d 589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
590 tmpbuf[2] = '\0';
591 s = tmpbuf;
592 }
593 else {
eb160463 594 *tmpbuf = (char)PL_multi_close;
8990e307
LW
595 tmpbuf[1] = '\0';
596 s = tmpbuf;
597 }
598 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 600}
79072805 601
dd0ac2b9
FC
602#include "feature.h"
603
0d863452 604/*
0d863452
RH
605 * Check whether the named feature is enabled.
606 */
26ea9e12 607bool
3fff3427 608Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 609{
97aff369 610 dVAR;
4a731d7b 611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
612
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
614
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 616
26ea9e12
NC
617 if (namelen > MAX_FEATURE_LEN)
618 return FALSE;
3fff3427 619 memcpy(&he_name[8], name, namelen);
7d69d4a6 620
c8ca97b0
NC
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
0d863452
RH
623}
624
ffb4593c 625/*
9cbb5ea2
GS
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
ffb4593c
NT
628 */
629
c39cd008
GS
630#ifdef PERL_CR_FILTER
631static void
632strip_return(SV *sv)
633{
eb578fdb
KW
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
7918f24d
NC
636
637 PERL_ARGS_ASSERT_STRIP_RETURN;
638
c39cd008
GS
639 /* outer loop optimized to do nothing if there are no CR-LFs */
640 while (s < e) {
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
eb578fdb 643 char *d = s - 1;
c39cd008
GS
644 *d++ = *s++;
645 while (s < e) {
646 if (*s == '\r' && s[1] == '\n')
647 s++;
648 *d++ = *s++;
649 }
650 SvCUR(sv) -= s - d;
651 return;
652 }
653 }
654}
a868473f 655
76e3520e 656STATIC I32
c39cd008 657S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 658{
f54cb97a 659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
660 if (count > 0 && !maxlen)
661 strip_return(sv);
662 return count;
a868473f
NIS
663}
664#endif
665
ffb4593c 666/*
8eaa0acf
Z
667=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
668
669Creates and initialises a new lexer/parser state object, supplying
670a context in which to lex and parse from a new source of Perl code.
671A pointer to the new state object is placed in L</PL_parser>. An entry
672is made on the save stack so that upon unwinding the new state object
673will be destroyed and the former value of L</PL_parser> will be restored.
674Nothing else need be done to clean up the parsing context.
675
676The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677non-null, provides a string (in SV form) containing code to be parsed.
678A copy of the string is made, so subsequent modification of I<line>
679does not affect parsing. I<rsfp>, if non-null, provides an input stream
680from which code will be read to be parsed. If both are non-null, the
681code in I<line> comes first and must consist of complete lines of input,
682and I<rsfp> supplies the remainder of the source.
683
e368b3bd
FC
684The I<flags> parameter is reserved for future use. Currently it is only
685used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
686
687=cut
688*/
ffb4593c 689
27fcb6ee 690/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
87606032
NC
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
27fcb6ee 698
a0d0e21e 699void
8eaa0acf 700Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 701{
97aff369 702 dVAR;
6ef55633 703 const char *s = NULL;
5486870f 704 yy_parser *parser, *oparser;
60d63348 705 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
707
708 /* create and initialise a parser */
709
199e78b7 710 Newxz(parser, 1, yy_parser);
5486870f 711 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
712 PL_parser = parser;
713
28ac2b49
Z
714 parser->stack = NULL;
715 parser->ps = NULL;
716 parser->stack_size = 0;
acdf0a21 717
e3abe207
DM
718 /* on scope exit, free this parser and restore any outer one */
719 SAVEPARSER(parser);
7c4baf47 720 parser->saved_curcop = PL_curcop;
e3abe207 721
acdf0a21 722 /* initialise lexer state */
8990e307 723
fb205e7a
DM
724#ifdef PERL_MAD
725 parser->curforce = -1;
726#else
727 parser->nexttoke = 0;
728#endif
ca4cfd28 729 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 730 parser->copline = NOLINE;
5afb0a62 731 parser->lex_state = LEX_NORMAL;
c2598295 732 parser->expect = XSTATE;
2f9285f8 733 parser->rsfp = rsfp;
27fcb6ee
FC
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
736 ? NULL
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
741 ));
2f9285f8 742
199e78b7
DM
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
d794b522 746 Newxz(parser->lex_shared, 1, LEXSHARED);
02b34bbe 747
10efb74f 748 if (line) {
0528fd32 749 STRLEN len;
10efb74f 750 s = SvPV_const(line, len);
0abcdfa4
FC
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
11076590 754 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
755 } else {
756 parser->linestr = newSVpvs("\n;");
8990e307 757 }
f06b5848
DM
758 parser->oldoldbufptr =
759 parser->oldbufptr =
760 parser->bufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
87606032
NC
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
737c24fc 766
60d63348 767 parser->in_pod = parser->filtered = 0;
79072805 768}
a687059c 769
e3abe207
DM
770
771/* delete a parser object */
772
773void
774Perl_parser_free(pTHX_ const yy_parser *parser)
775{
7918f24d
NC
776 PERL_ARGS_ASSERT_PARSER_FREE;
777
7c4baf47 778 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
779 SvREFCNT_dec(parser->linestr);
780
87606032 781 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
2f9285f8 782 PerlIO_clearerr(parser->rsfp);
799361c3
SH
783 else if (parser->rsfp && (!parser->old_parser ||
784 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 785 PerlIO_close(parser->rsfp);
5486870f 786 SvREFCNT_dec(parser->rsfp_filters);
10002bc1
FC
787 SvREFCNT_dec(parser->lex_stuff);
788 SvREFCNT_dec(parser->sublex_info.repl);
3ac7ff8f
FC
789
790 Safefree(parser->lex_brackstack);
791 Safefree(parser->lex_casestack);
792 Safefree(parser->lex_shared);
793 PL_parser = parser->old_parser;
794 Safefree(parser);
795}
796
797void
798Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
799{
800#ifdef PERL_MAD
801 I32 nexttoke = parser->lasttoke;
802#else
803 I32 nexttoke = parser->nexttoke;
804#endif
805 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
3ce3dcd9
FC
806 while (nexttoke--) {
807#ifdef PERL_MAD
808 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
3ac7ff8f
FC
809 & 0xffff)
810 && parser->nexttoke[nexttoke].next_val.opval
811 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
812 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
813 op_free(parser->nexttoke[nexttoke].next_val.opval);
814 parser->nexttoke[nexttoke].next_val.opval = NULL;
815 }
3ce3dcd9 816#else
3ac7ff8f
FC
817 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
818 && parser->nextval[nexttoke].opval
819 && parser->nextval[nexttoke].opval->op_slabbed
820 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
3ce3dcd9 821 op_free(parser->nextval[nexttoke].opval);
3ac7ff8f
FC
822 parser->nextval[nexttoke].opval = NULL;
823 }
3ce3dcd9
FC
824#endif
825 }
e3abe207
DM
826}
827
828
ffb4593c 829/*
f0e67a1d
Z
830=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
831
832Buffer scalar containing the chunk currently under consideration of the
833text currently being lexed. This is always a plain string scalar (for
834which C<SvPOK> is true). It is not intended to be used as a scalar by
835normal scalar means; instead refer to the buffer directly by the pointer
836variables described below.
837
838The lexer maintains various C<char*> pointers to things in the
839C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
840reallocated, all of these pointers must be updated. Don't attempt to
841do this manually, but rather use L</lex_grow_linestr> if you need to
842reallocate the buffer.
843
844The content of the text chunk in the buffer is commonly exactly one
845complete line of input, up to and including a newline terminator,
846but there are situations where it is otherwise. The octets of the
847buffer may be intended to be interpreted as either UTF-8 or Latin-1.
848The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
849flag on this scalar, which may disagree with it.
850
851For direct examination of the buffer, the variable
852L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
853lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
854of these pointers is usually preferable to examination of the scalar
855through normal scalar means.
856
857=for apidoc AmxU|char *|PL_parser-E<gt>bufend
858
859Direct pointer to the end of the chunk of text currently being lexed, the
860end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
861+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
862always located at the end of the buffer, and does not count as part of
863the buffer's contents.
864
865=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
866
867Points to the current position of lexing inside the lexer buffer.
868Characters around this point may be freely examined, within
869the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
870L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
871interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
872
873Lexing code (whether in the Perl core or not) moves this pointer past
874the characters that it consumes. It is also expected to perform some
875bookkeeping whenever a newline character is consumed. This movement
876can be more conveniently performed by the function L</lex_read_to>,
877which handles newlines appropriately.
878
879Interpretation of the buffer's octets can be abstracted out by
880using the slightly higher-level functions L</lex_peek_unichar> and
881L</lex_read_unichar>.
882
883=for apidoc AmxU|char *|PL_parser-E<gt>linestart
884
885Points to the start of the current line inside the lexer buffer.
886This is useful for indicating at which column an error occurred, and
887not much else. This must be updated by any lexing code that consumes
888a newline; the function L</lex_read_to> handles this detail.
889
890=cut
891*/
892
893/*
894=for apidoc Amx|bool|lex_bufutf8
895
896Indicates whether the octets in the lexer buffer
897(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
898of Unicode characters. If not, they should be interpreted as Latin-1
899characters. This is analogous to the C<SvUTF8> flag for scalars.
900
901In UTF-8 mode, it is not guaranteed that the lexer buffer actually
902contains valid UTF-8. Lexing code must be robust in the face of invalid
903encoding.
904
905The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
906is significant, but not the whole story regarding the input character
907encoding. Normally, when a file is being read, the scalar contains octets
908and its C<SvUTF8> flag is off, but the octets should be interpreted as
909UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
910however, the scalar may have the C<SvUTF8> flag on, and in this case its
911octets should be interpreted as UTF-8 unless the C<use bytes> pragma
912is in effect. This logic may change in the future; use this function
913instead of implementing the logic yourself.
914
915=cut
916*/
917
918bool
919Perl_lex_bufutf8(pTHX)
920{
921 return UTF;
922}
923
924/*
925=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
926
927Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
928at least I<len> octets (including terminating NUL). Returns a
929pointer to the reallocated buffer. This is necessary before making
930any direct modification of the buffer that would increase its length.
931L</lex_stuff_pvn> provides a more convenient way to insert text into
932the buffer.
933
934Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
935this function updates all of the lexer's variables that point directly
936into the buffer.
937
938=cut
939*/
940
941char *
942Perl_lex_grow_linestr(pTHX_ STRLEN len)
943{
944 SV *linestr;
945 char *buf;
946 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
c7641931 947 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
f0e67a1d
Z
948 linestr = PL_parser->linestr;
949 buf = SvPVX(linestr);
950 if (len <= SvLEN(linestr))
951 return buf;
952 bufend_pos = PL_parser->bufend - buf;
953 bufptr_pos = PL_parser->bufptr - buf;
954 oldbufptr_pos = PL_parser->oldbufptr - buf;
955 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
956 linestart_pos = PL_parser->linestart - buf;
957 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
958 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3328ab5a
FC
959 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
960 PL_parser->lex_shared->re_eval_start - buf : 0;
c7641931 961
f0e67a1d 962 buf = sv_grow(linestr, len);
c7641931 963
f0e67a1d
Z
964 PL_parser->bufend = buf + bufend_pos;
965 PL_parser->bufptr = buf + bufptr_pos;
966 PL_parser->oldbufptr = buf + oldbufptr_pos;
967 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
968 PL_parser->linestart = buf + linestart_pos;
969 if (PL_parser->last_uni)
970 PL_parser->last_uni = buf + last_uni_pos;
971 if (PL_parser->last_lop)
972 PL_parser->last_lop = buf + last_lop_pos;
3328ab5a
FC
973 if (PL_parser->lex_shared->re_eval_start)
974 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
f0e67a1d
Z
975 return buf;
976}
977
978/*
83aa740e 979=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
980
981Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
982immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
983reallocating the buffer if necessary. This means that lexing code that
984runs later will see the characters as if they had appeared in the input.
985It is not recommended to do this as part of normal parsing, and most
986uses of this facility run the risk of the inserted characters being
987interpreted in an unintended manner.
988
989The string to be inserted is represented by I<len> octets starting
990at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
991according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
992The characters are recoded for the lexer buffer, according to how the
993buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 994to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
995function is more convenient.
996
997=cut
998*/
999
1000void
83aa740e 1001Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 1002{
749123ff 1003 dVAR;
f0e67a1d
Z
1004 char *bufptr;
1005 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1006 if (flags & ~(LEX_STUFF_UTF8))
1007 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1008 if (UTF) {
1009 if (flags & LEX_STUFF_UTF8) {
1010 goto plain_copy;
1011 } else {
54d004e8 1012 STRLEN highhalf = 0; /* Count of variants */
83aa740e 1013 const char *p, *e = pv+len;
54d004e8
KW
1014 for (p = pv; p != e; p++) {
1015 if (! UTF8_IS_INVARIANT(*p)) {
1016 highhalf++;
1017 }
1018 }
f0e67a1d
Z
1019 if (!highhalf)
1020 goto plain_copy;
1021 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1022 bufptr = PL_parser->bufptr;
1023 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1024 SvCUR_set(PL_parser->linestr,
1025 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
1026 PL_parser->bufend += len+highhalf;
1027 for (p = pv; p != e; p++) {
1028 U8 c = (U8)*p;
54d004e8
KW
1029 if (! UTF8_IS_INVARIANT(c)) {
1030 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1031 *bufptr++ = UTF8_TWO_BYTE_LO(c);
f0e67a1d
Z
1032 } else {
1033 *bufptr++ = (char)c;
1034 }
1035 }
1036 }
1037 } else {
1038 if (flags & LEX_STUFF_UTF8) {
1039 STRLEN highhalf = 0;
83aa740e 1040 const char *p, *e = pv+len;
f0e67a1d
Z
1041 for (p = pv; p != e; p++) {
1042 U8 c = (U8)*p;
54d004e8 1043 if (UTF8_IS_ABOVE_LATIN1(c)) {
f0e67a1d
Z
1044 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1045 "non-Latin-1 character into Latin-1 input");
54d004e8 1046 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
f0e67a1d
Z
1047 p++;
1048 highhalf++;
54d004e8 1049 } else if (! UTF8_IS_INVARIANT(c)) {
f0e67a1d
Z
1050 /* malformed UTF-8 */
1051 ENTER;
1052 SAVESPTR(PL_warnhook);
1053 PL_warnhook = PERL_WARNHOOK_FATAL;
1054 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1055 LEAVE;
1056 }
1057 }
1058 if (!highhalf)
1059 goto plain_copy;
1060 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1061 bufptr = PL_parser->bufptr;
1062 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1063 SvCUR_set(PL_parser->linestr,
1064 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d 1065 PL_parser->bufend += len-highhalf;
54d004e8
KW
1066 p = pv;
1067 while (p < e) {
1068 if (UTF8_IS_INVARIANT(*p)) {
1069 *bufptr++ = *p;
1070 p++;
f0e67a1d 1071 }
54d004e8
KW
1072 else {
1073 assert(p < e -1 );
1074 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1075 p += 2;
1076 }
f0e67a1d
Z
1077 }
1078 } else {
54d004e8 1079 plain_copy:
f0e67a1d
Z
1080 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1081 bufptr = PL_parser->bufptr;
1082 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1083 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1084 PL_parser->bufend += len;
1085 Copy(pv, bufptr, len, char);
1086 }
1087 }
1088}
1089
1090/*
9dcc53ea
Z
1091=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1092
1093Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1094immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1095reallocating the buffer if necessary. This means that lexing code that
1096runs later will see the characters as if they had appeared in the input.
1097It is not recommended to do this as part of normal parsing, and most
1098uses of this facility run the risk of the inserted characters being
1099interpreted in an unintended manner.
1100
1101The string to be inserted is represented by octets starting at I<pv>
1102and continuing to the first nul. These octets are interpreted as either
1103UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1104in I<flags>. The characters are recoded for the lexer buffer, according
1105to how the buffer is currently being interpreted (L</lex_bufutf8>).
1106If it is not convenient to nul-terminate a string to be inserted, the
1107L</lex_stuff_pvn> function is more appropriate.
1108
1109=cut
1110*/
1111
1112void
1113Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1114{
1115 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1116 lex_stuff_pvn(pv, strlen(pv), flags);
1117}
1118
1119/*
f0e67a1d
Z
1120=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1121
1122Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1123immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1124reallocating the buffer if necessary. This means that lexing code that
1125runs later will see the characters as if they had appeared in the input.
1126It is not recommended to do this as part of normal parsing, and most
1127uses of this facility run the risk of the inserted characters being
1128interpreted in an unintended manner.
1129
1130The string to be inserted is the string value of I<sv>. The characters
1131are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1132being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1133not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1134need to construct a scalar.
1135
1136=cut
1137*/
1138
1139void
1140Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1141{
1142 char *pv;
1143 STRLEN len;
1144 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1145 if (flags)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1147 pv = SvPV(sv, len);
1148 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1149}
1150
1151/*
1152=for apidoc Amx|void|lex_unstuff|char *ptr
1153
1154Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1155I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1156This hides the discarded text from any lexing code that runs later,
1157as if the text had never appeared.
1158
1159This is not the normal way to consume lexed text. For that, use
1160L</lex_read_to>.
1161
1162=cut
1163*/
1164
1165void
1166Perl_lex_unstuff(pTHX_ char *ptr)
1167{
1168 char *buf, *bufend;
1169 STRLEN unstuff_len;
1170 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1171 buf = PL_parser->bufptr;
1172 if (ptr < buf)
1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1174 if (ptr == buf)
1175 return;
1176 bufend = PL_parser->bufend;
1177 if (ptr > bufend)
1178 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1179 unstuff_len = ptr - buf;
1180 Move(ptr, buf, bufend+1-ptr, char);
1181 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1182 PL_parser->bufend = bufend - unstuff_len;
1183}
1184
1185/*
1186=for apidoc Amx|void|lex_read_to|char *ptr
1187
1188Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1189to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1190performing the correct bookkeeping whenever a newline character is passed.
1191This is the normal way to consume lexed text.
1192
1193Interpretation of the buffer's octets can be abstracted out by
1194using the slightly higher-level functions L</lex_peek_unichar> and
1195L</lex_read_unichar>.
1196
1197=cut
1198*/
1199
1200void
1201Perl_lex_read_to(pTHX_ char *ptr)
1202{
1203 char *s;
1204 PERL_ARGS_ASSERT_LEX_READ_TO;
1205 s = PL_parser->bufptr;
1206 if (ptr < s || ptr > PL_parser->bufend)
1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1208 for (; s != ptr; s++)
1209 if (*s == '\n') {
83944c01 1210 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1211 PL_parser->linestart = s+1;
1212 }
1213 PL_parser->bufptr = ptr;
1214}
1215
1216/*
1217=for apidoc Amx|void|lex_discard_to|char *ptr
1218
1219Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1220up to I<ptr>. The remaining content of the buffer will be moved, and
1221all pointers into the buffer updated appropriately. I<ptr> must not
1222be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1223it is not permitted to discard text that has yet to be lexed.
1224
1225Normally it is not necessarily to do this directly, because it suffices to
1226use the implicit discarding behaviour of L</lex_next_chunk> and things
1227based on it. However, if a token stretches across multiple lines,
1f317c95 1228and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1229that purpose, then after completion of the token it would be wise to
1230explicitly discard the now-unneeded earlier lines, to avoid future
1231multi-line tokens growing the buffer without bound.
1232
1233=cut
1234*/
1235
1236void
1237Perl_lex_discard_to(pTHX_ char *ptr)
1238{
1239 char *buf;
1240 STRLEN discard_len;
1241 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1242 buf = SvPVX(PL_parser->linestr);
1243 if (ptr < buf)
1244 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1245 if (ptr == buf)
1246 return;
1247 if (ptr > PL_parser->bufptr)
1248 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1249 discard_len = ptr - buf;
1250 if (PL_parser->oldbufptr < ptr)
1251 PL_parser->oldbufptr = ptr;
1252 if (PL_parser->oldoldbufptr < ptr)
1253 PL_parser->oldoldbufptr = ptr;
1254 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1255 PL_parser->last_uni = NULL;
1256 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1257 PL_parser->last_lop = NULL;
1258 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1259 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1260 PL_parser->bufend -= discard_len;
1261 PL_parser->bufptr -= discard_len;
1262 PL_parser->oldbufptr -= discard_len;
1263 PL_parser->oldoldbufptr -= discard_len;
1264 if (PL_parser->last_uni)
1265 PL_parser->last_uni -= discard_len;
1266 if (PL_parser->last_lop)
1267 PL_parser->last_lop -= discard_len;
1268}
1269
1270/*
1271=for apidoc Amx|bool|lex_next_chunk|U32 flags
1272
1273Reads in the next chunk of text to be lexed, appending it to
1274L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1275looked to the end of the current chunk and wants to know more. It is
1276usual, but not necessary, for lexing to have consumed the entirety of
1277the current chunk at this time.
1278
1279If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1280chunk (i.e., the current chunk has been entirely consumed), normally the
1281current chunk will be discarded at the same time that the new chunk is
1282read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1283will not be discarded. If the current chunk has not been entirely
1284consumed, then it will not be discarded regardless of the flag.
1285
1286Returns true if some new text was added to the buffer, or false if the
1287buffer has reached the end of the input text.
1288
1289=cut
1290*/
1291
1292#define LEX_FAKE_EOF 0x80000000
112d1284 1293#define LEX_NO_TERM 0x40000000
f0e67a1d
Z
1294
1295bool
1296Perl_lex_next_chunk(pTHX_ U32 flags)
1297{
1298 SV *linestr;
1299 char *buf;
1300 STRLEN old_bufend_pos, new_bufend_pos;
1301 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1302 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1303 bool got_some_for_debugger = 0;
f0e67a1d 1304 bool got_some;
112d1284 1305 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
f0e67a1d 1306 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1307 linestr = PL_parser->linestr;
1308 buf = SvPVX(linestr);
1309 if (!(flags & LEX_KEEP_PREVIOUS) &&
1310 PL_parser->bufptr == PL_parser->bufend) {
1311 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1312 linestart_pos = 0;
1313 if (PL_parser->last_uni != PL_parser->bufend)
1314 PL_parser->last_uni = NULL;
1315 if (PL_parser->last_lop != PL_parser->bufend)
1316 PL_parser->last_lop = NULL;
1317 last_uni_pos = last_lop_pos = 0;
1318 *buf = 0;
1319 SvCUR(linestr) = 0;
1320 } else {
1321 old_bufend_pos = PL_parser->bufend - buf;
1322 bufptr_pos = PL_parser->bufptr - buf;
1323 oldbufptr_pos = PL_parser->oldbufptr - buf;
1324 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1325 linestart_pos = PL_parser->linestart - buf;
1326 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1327 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1328 }
1329 if (flags & LEX_FAKE_EOF) {
1330 goto eof;
60d63348 1331 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1332 got_some = 0;
1333 } else if (filter_gets(linestr, old_bufend_pos)) {
1334 got_some = 1;
17cc9359 1335 got_some_for_debugger = 1;
112d1284
FC
1336 } else if (flags & LEX_NO_TERM) {
1337 got_some = 0;
f0e67a1d 1338 } else {
580561a3
Z
1339 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1340 sv_setpvs(linestr, "");
f0e67a1d
Z
1341 eof:
1342 /* End of real input. Close filehandle (unless it was STDIN),
1343 * then add implicit termination.
1344 */
87606032 1345 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
f0e67a1d
Z
1346 PerlIO_clearerr(PL_parser->rsfp);
1347 else if (PL_parser->rsfp)
1348 (void)PerlIO_close(PL_parser->rsfp);
1349 PL_parser->rsfp = NULL;
60d63348 1350 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1351#ifdef PERL_MAD
1352 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1353 PL_faketokens = 1;
1354#endif
1355 if (!PL_in_eval && PL_minus_p) {
1356 sv_catpvs(linestr,
1357 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1358 PL_minus_n = PL_minus_p = 0;
1359 } else if (!PL_in_eval && PL_minus_n) {
1360 sv_catpvs(linestr, /*{*/";}");
1361 PL_minus_n = 0;
1362 } else
1363 sv_catpvs(linestr, ";");
1364 got_some = 1;
1365 }
1366 buf = SvPVX(linestr);
1367 new_bufend_pos = SvCUR(linestr);
1368 PL_parser->bufend = buf + new_bufend_pos;
1369 PL_parser->bufptr = buf + bufptr_pos;
1370 PL_parser->oldbufptr = buf + oldbufptr_pos;
1371 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1372 PL_parser->linestart = buf + linestart_pos;
1373 if (PL_parser->last_uni)
1374 PL_parser->last_uni = buf + last_uni_pos;
1375 if (PL_parser->last_lop)
1376 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1377 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1378 PL_curstash != PL_debstash) {
1379 /* debugger active and we're not compiling the debugger code,
1380 * so store the line into the debugger's array of lines
1381 */
1382 update_debugger_info(NULL, buf+old_bufend_pos,
1383 new_bufend_pos-old_bufend_pos);
1384 }
1385 return got_some;
1386}
1387
1388/*
1389=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1390
1391Looks ahead one (Unicode) character in the text currently being lexed.
1392Returns the codepoint (unsigned integer value) of the next character,
1393or -1 if lexing has reached the end of the input text. To consume the
1394peeked character, use L</lex_read_unichar>.
1395
1396If the next character is in (or extends into) the next chunk of input
1397text, the next chunk will be read in. Normally the current chunk will be
1398discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1399then the current chunk will not be discarded.
1400
1401If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1402is encountered, an exception is generated.
1403
1404=cut
1405*/
1406
1407I32
1408Perl_lex_peek_unichar(pTHX_ U32 flags)
1409{
749123ff 1410 dVAR;
f0e67a1d
Z
1411 char *s, *bufend;
1412 if (flags & ~(LEX_KEEP_PREVIOUS))
1413 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1414 s = PL_parser->bufptr;
1415 bufend = PL_parser->bufend;
1416 if (UTF) {
1417 U8 head;
1418 I32 unichar;
1419 STRLEN len, retlen;
1420 if (s == bufend) {
1421 if (!lex_next_chunk(flags))
1422 return -1;
1423 s = PL_parser->bufptr;
1424 bufend = PL_parser->bufend;
1425 }
1426 head = (U8)*s;
54d004e8 1427 if (UTF8_IS_INVARIANT(head))
f0e67a1d 1428 return head;
54d004e8
KW
1429 if (UTF8_IS_START(head)) {
1430 len = UTF8SKIP(&head);
f0e67a1d
Z
1431 while ((STRLEN)(bufend-s) < len) {
1432 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1433 break;
1434 s = PL_parser->bufptr;
1435 bufend = PL_parser->bufend;
1436 }
1437 }
1438 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1439 if (retlen == (STRLEN)-1) {
1440 /* malformed UTF-8 */
1441 ENTER;
1442 SAVESPTR(PL_warnhook);
1443 PL_warnhook = PERL_WARNHOOK_FATAL;
1444 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1445 LEAVE;
1446 }
1447 return unichar;
1448 } else {
1449 if (s == bufend) {
1450 if (!lex_next_chunk(flags))
1451 return -1;
1452 s = PL_parser->bufptr;
1453 }
1454 return (U8)*s;
1455 }
1456}
1457
1458/*
1459=for apidoc Amx|I32|lex_read_unichar|U32 flags
1460
1461Reads the next (Unicode) character in the text currently being lexed.
1462Returns the codepoint (unsigned integer value) of the character read,
1463and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1464if lexing has reached the end of the input text. To non-destructively
1465examine the next character, use L</lex_peek_unichar> instead.
1466
1467If the next character is in (or extends into) the next chunk of input
1468text, the next chunk will be read in. Normally the current chunk will be
1469discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1470then the current chunk will not be discarded.
1471
1472If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1473is encountered, an exception is generated.
1474
1475=cut
1476*/
1477
1478I32
1479Perl_lex_read_unichar(pTHX_ U32 flags)
1480{
1481 I32 c;
1482 if (flags & ~(LEX_KEEP_PREVIOUS))
1483 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1484 c = lex_peek_unichar(flags);
1485 if (c != -1) {
1486 if (c == '\n')
83944c01 1487 COPLINE_INC_WITH_HERELINES;
d9018cbe
EB
1488 if (UTF)
1489 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1490 else
1491 ++(PL_parser->bufptr);
f0e67a1d
Z
1492 }
1493 return c;
1494}
1495
1496/*
1497=for apidoc Amx|void|lex_read_space|U32 flags
1498
1499Reads optional spaces, in Perl style, in the text currently being
1500lexed. The spaces may include ordinary whitespace characters and
1501Perl-style comments. C<#line> directives are processed if encountered.
1502L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1503at a non-space character (or the end of the input text).
1504
1505If spaces extend into the next chunk of input text, the next chunk will
1506be read in. Normally the current chunk will be discarded at the same
1507time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1508chunk will not be discarded.
1509
1510=cut
1511*/
1512
f0998909
Z
1513#define LEX_NO_NEXT_CHUNK 0x80000000
1514
f0e67a1d
Z
1515void
1516Perl_lex_read_space(pTHX_ U32 flags)
1517{
1518 char *s, *bufend;
1519 bool need_incline = 0;
f0998909 1520 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1521 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1522#ifdef PERL_MAD
1523 if (PL_skipwhite) {
1524 sv_free(PL_skipwhite);
1525 PL_skipwhite = NULL;
1526 }
1527 if (PL_madskills)
1528 PL_skipwhite = newSVpvs("");
1529#endif /* PERL_MAD */
1530 s = PL_parser->bufptr;
1531 bufend = PL_parser->bufend;
1532 while (1) {
1533 char c = *s;
1534 if (c == '#') {
1535 do {
1536 c = *++s;
1537 } while (!(c == '\n' || (c == 0 && s == bufend)));
1538 } else if (c == '\n') {
1539 s++;
1540 PL_parser->linestart = s;
1541 if (s == bufend)
1542 need_incline = 1;
1543 else
1544 incline(s);
1545 } else if (isSPACE(c)) {
1546 s++;
1547 } else if (c == 0 && s == bufend) {
1548 bool got_more;
1549#ifdef PERL_MAD
1550 if (PL_madskills)
1551 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1552#endif /* PERL_MAD */
f0998909
Z
1553 if (flags & LEX_NO_NEXT_CHUNK)
1554 break;
f0e67a1d 1555 PL_parser->bufptr = s;
83944c01 1556 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1557 got_more = lex_next_chunk(flags);
1558 CopLINE_dec(PL_curcop);
1559 s = PL_parser->bufptr;
1560 bufend = PL_parser->bufend;
1561 if (!got_more)
1562 break;
1563 if (need_incline && PL_parser->rsfp) {
1564 incline(s);
1565 need_incline = 0;
1566 }
1567 } else {
1568 break;
1569 }
1570 }
1571#ifdef PERL_MAD
1572 if (PL_madskills)
1573 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1574#endif /* PERL_MAD */
1575 PL_parser->bufptr = s;
1576}
1577
1578/*
ffb4593c
NT
1579 * S_incline
1580 * This subroutine has nothing to do with tilting, whether at windmills
1581 * or pinball tables. Its name is short for "increment line". It
57843af0 1582 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1583 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1584 * # line 500 "foo.pm"
1585 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1586 */
1587
76e3520e 1588STATIC void
d9095cec 1589S_incline(pTHX_ const char *s)
463ee0b2 1590{
97aff369 1591 dVAR;
d9095cec
NC
1592 const char *t;
1593 const char *n;
1594 const char *e;
8818d409 1595 line_t line_num;
463ee0b2 1596
7918f24d
NC
1597 PERL_ARGS_ASSERT_INCLINE;
1598
83944c01 1599 COPLINE_INC_WITH_HERELINES;
451f421f
FC
1600 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1601 && s+1 == PL_bufend && *s == ';') {
1602 /* fake newline in string eval */
1603 CopLINE_dec(PL_curcop);
1604 return;
1605 }
463ee0b2
LW
1606 if (*s++ != '#')
1607 return;
d4c19fe8
AL
1608 while (SPACE_OR_TAB(*s))
1609 s++;
73659bf1
GS
1610 if (strnEQ(s, "line", 4))
1611 s += 4;
1612 else
1613 return;
084592ab 1614 if (SPACE_OR_TAB(*s))
73659bf1 1615 s++;
4e553d73 1616 else
73659bf1 1617 return;
d4c19fe8
AL
1618 while (SPACE_OR_TAB(*s))
1619 s++;
463ee0b2
LW
1620 if (!isDIGIT(*s))
1621 return;
d4c19fe8 1622
463ee0b2
LW
1623 n = s;
1624 while (isDIGIT(*s))
1625 s++;
07714eb4 1626 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1627 return;
bf4acbe4 1628 while (SPACE_OR_TAB(*s))
463ee0b2 1629 s++;
73659bf1 1630 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1631 s++;
73659bf1
GS
1632 e = t + 1;
1633 }
463ee0b2 1634 else {
c35e046a
AL
1635 t = s;
1636 while (!isSPACE(*t))
1637 t++;
73659bf1 1638 e = t;
463ee0b2 1639 }
bf4acbe4 1640 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1641 e++;
1642 if (*e != '\n' && *e != '\0')
1643 return; /* false alarm */
1644
8818d409
FC
1645 line_num = atoi(n)-1;
1646
f4dd75d9 1647 if (t - s > 0) {
d9095cec 1648 const STRLEN len = t - s;
19bad673
NC
1649 SV *const temp_sv = CopFILESV(PL_curcop);
1650 const char *cf;
1651 STRLEN tmplen;
1652
1653 if (temp_sv) {
1654 cf = SvPVX(temp_sv);
1655 tmplen = SvCUR(temp_sv);
1656 } else {
1657 cf = NULL;
1658 tmplen = 0;
1659 }
1660
d1299d44 1661 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1662 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1663 * to *{"::_<newfilename"} */
44867030
NC
1664 /* However, the long form of evals is only turned on by the
1665 debugger - usually they're "(eval %lu)" */
1666 char smallbuf[128];
1667 char *tmpbuf;
1668 GV **gvp;
d9095cec 1669 STRLEN tmplen2 = len;
798b63bc 1670 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1671 tmpbuf = smallbuf;
1672 else
2ae0db35 1673 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1674 tmpbuf[0] = '_';
1675 tmpbuf[1] = '<';
2ae0db35 1676 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1677 tmplen += 2;
8a5ee598
RGS
1678 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1679 if (gvp) {
44867030
NC
1680 char *tmpbuf2;
1681 GV *gv2;
1682
1683 if (tmplen2 + 2 <= sizeof smallbuf)
1684 tmpbuf2 = smallbuf;
1685 else
1686 Newx(tmpbuf2, tmplen2 + 2, char);
1687
1688 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1689 /* Either they malloc'd it, or we malloc'd it,
1690 so no prefix is present in ours. */
1691 tmpbuf2[0] = '_';
1692 tmpbuf2[1] = '<';
1693 }
1694
1695 memcpy(tmpbuf2 + 2, s, tmplen2);
1696 tmplen2 += 2;
1697
8a5ee598 1698 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1699 if (!isGV(gv2)) {
8a5ee598 1700 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1701 /* adjust ${"::_<newfilename"} to store the new file name */
1702 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1703 /* The line number may differ. If that is the case,
1704 alias the saved lines that are in the array.
1705 Otherwise alias the whole array. */
1706 if (CopLINE(PL_curcop) == line_num) {
1707 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1708 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1709 }
1710 else if (GvAV(*gvp)) {
1711 AV * const av = GvAV(*gvp);
1712 const I32 start = CopLINE(PL_curcop)+1;
1713 I32 items = AvFILLp(av) - start;
1714 if (items > 0) {
1715 AV * const av2 = GvAVn(gv2);
1716 SV **svp = AvARRAY(av) + start;
1717 I32 l = (I32)line_num+1;
1718 while (items--)
1719 av_store(av2, l++, SvREFCNT_inc(*svp++));
1720 }
1721 }
e5527e4b 1722 }
44867030
NC
1723
1724 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1725 }
e66cf94c 1726 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1727 }
05ec9bb3 1728 CopFILE_free(PL_curcop);
d9095cec 1729 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1730 }
8818d409 1731 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1732}
1733
29595ff2 1734#ifdef PERL_MAD
cd81e915 1735/* skip space before PL_thistoken */
29595ff2
NC
1736
1737STATIC char *
5aaab254 1738S_skipspace0(pTHX_ char *s)
29595ff2 1739{
7918f24d
NC
1740 PERL_ARGS_ASSERT_SKIPSPACE0;
1741
29595ff2
NC
1742 s = skipspace(s);
1743 if (!PL_madskills)
1744 return s;
cd81e915
NC
1745 if (PL_skipwhite) {
1746 if (!PL_thiswhite)
6b29d1f5 1747 PL_thiswhite = newSVpvs("");
cd81e915
NC
1748 sv_catsv(PL_thiswhite, PL_skipwhite);
1749 sv_free(PL_skipwhite);
1750 PL_skipwhite = 0;
1751 }
1752 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1753 return s;
1754}
1755
cd81e915 1756/* skip space after PL_thistoken */
29595ff2
NC
1757
1758STATIC char *
5aaab254 1759S_skipspace1(pTHX_ char *s)
29595ff2 1760{
d4c19fe8 1761 const char *start = s;
29595ff2
NC
1762 I32 startoff = start - SvPVX(PL_linestr);
1763
7918f24d
NC
1764 PERL_ARGS_ASSERT_SKIPSPACE1;
1765
29595ff2
NC
1766 s = skipspace(s);
1767 if (!PL_madskills)
1768 return s;
1769 start = SvPVX(PL_linestr) + startoff;
cd81e915 1770 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1771 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1772 PL_thistoken = newSVpvn(tstart, start - tstart);
1773 }
1774 PL_realtokenstart = -1;
1775 if (PL_skipwhite) {
1776 if (!PL_nextwhite)
6b29d1f5 1777 PL_nextwhite = newSVpvs("");
cd81e915
NC
1778 sv_catsv(PL_nextwhite, PL_skipwhite);
1779 sv_free(PL_skipwhite);
1780 PL_skipwhite = 0;
29595ff2
NC
1781 }
1782 return s;
1783}
1784
1785STATIC char *
5aaab254 1786S_skipspace2(pTHX_ char *s, SV **svp)
29595ff2 1787{
c35e046a
AL
1788 char *start;
1789 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1790 const I32 startoff = s - SvPVX(PL_linestr);
1791
7918f24d
NC
1792 PERL_ARGS_ASSERT_SKIPSPACE2;
1793
29595ff2
NC
1794 s = skipspace(s);
1795 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1796 if (!PL_madskills || !svp)
1797 return s;
1798 start = SvPVX(PL_linestr) + startoff;
cd81e915 1799 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1800 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1801 PL_thistoken = newSVpvn(tstart, start - tstart);
1802 PL_realtokenstart = -1;
29595ff2 1803 }
cd81e915 1804 if (PL_skipwhite) {
29595ff2 1805 if (!*svp)
6b29d1f5 1806 *svp = newSVpvs("");
cd81e915
NC
1807 sv_setsv(*svp, PL_skipwhite);
1808 sv_free(PL_skipwhite);
1809 PL_skipwhite = 0;
29595ff2
NC
1810 }
1811
1812 return s;
1813}
1814#endif
1815
80a702cd 1816STATIC void
15f169a1 1817S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1818{
1819 AV *av = CopFILEAVx(PL_curcop);
1820 if (av) {
b9f83d2f 1821 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1822 if (orig_sv)
1823 sv_setsv(sv, orig_sv);
1824 else
1825 sv_setpvn(sv, buf, len);
80a702cd
RGS
1826 (void)SvIOK_on(sv);
1827 SvIV_set(sv, 0);
1828 av_store(av, (I32)CopLINE(PL_curcop), sv);
1829 }
1830}
1831
ffb4593c
NT
1832/*
1833 * S_skipspace
1834 * Called to gobble the appropriate amount and type of whitespace.
1835 * Skips comments as well.
1836 */
1837
76e3520e 1838STATIC char *
5aaab254 1839S_skipspace(pTHX_ char *s)
a687059c 1840{
5db06880 1841#ifdef PERL_MAD
f0e67a1d
Z
1842 char *start = s;
1843#endif /* PERL_MAD */
7918f24d 1844 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1845#ifdef PERL_MAD
cd81e915
NC
1846 if (PL_skipwhite) {
1847 sv_free(PL_skipwhite);
f0e67a1d 1848 PL_skipwhite = NULL;
5db06880 1849 }
f0e67a1d 1850#endif /* PERL_MAD */
3280af22 1851 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1852 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1853 s++;
f0e67a1d
Z
1854 } else {
1855 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1856 PL_bufptr = s;
f0998909
Z
1857 lex_read_space(LEX_KEEP_PREVIOUS |
1858 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1859 LEX_NO_NEXT_CHUNK : 0));
3280af22 1860 s = PL_bufptr;
f0e67a1d
Z
1861 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1862 if (PL_linestart > PL_bufptr)
1863 PL_bufptr = PL_linestart;
1864 return s;
463ee0b2 1865 }
5db06880 1866#ifdef PERL_MAD
f0e67a1d
Z
1867 if (PL_madskills)
1868 PL_skipwhite = newSVpvn(start, s-start);
1869#endif /* PERL_MAD */
5db06880 1870 return s;
a687059c 1871}
378cc40b 1872
ffb4593c
NT
1873/*
1874 * S_check_uni
1875 * Check the unary operators to ensure there's no ambiguity in how they're
1876 * used. An ambiguous piece of code would be:
1877 * rand + 5
1878 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1879 * the +5 is its argument.
1880 */
1881
76e3520e 1882STATIC void
cea2e8a9 1883S_check_uni(pTHX)
ba106d47 1884{
97aff369 1885 dVAR;
d4c19fe8
AL
1886 const char *s;
1887 const char *t;
2f3197b3 1888
3280af22 1889 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1890 return;
3280af22
NIS
1891 while (isSPACE(*PL_last_uni))
1892 PL_last_uni++;
c35e046a 1893 s = PL_last_uni;
8a2bca12 1894 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
c35e046a 1895 s++;
3280af22 1896 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1897 return;
6136c704 1898
9b387841
NC
1899 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1900 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1901 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1902}
1903
ffb4593c
NT
1904/*
1905 * LOP : macro to build a list operator. Its behaviour has been replaced
1906 * with a subroutine, S_lop() for which LOP is just another name.
1907 */
1908
a0d0e21e
LW
1909#define LOP(f,x) return lop(f,x,s)
1910
ffb4593c
NT
1911/*
1912 * S_lop
1913 * Build a list operator (or something that might be one). The rules:
1914 * - if we have a next token, then it's a list operator [why?]
1915 * - if the next thing is an opening paren, then it's a function
1916 * - else it's a list operator
1917 */
1918
76e3520e 1919STATIC I32
a0be28da 1920S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1921{
97aff369 1922 dVAR;
7918f24d
NC
1923
1924 PERL_ARGS_ASSERT_LOP;
1925
6154021b 1926 pl_yylval.ival = f;
35c8bce7 1927 CLINE;
3280af22
NIS
1928 PL_expect = x;
1929 PL_bufptr = s;
1930 PL_last_lop = PL_oldbufptr;
eb160463 1931 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1932#ifdef PERL_MAD
1933 if (PL_lasttoke)
78cdf107 1934 goto lstop;
5db06880 1935#else
3280af22 1936 if (PL_nexttoke)
78cdf107 1937 goto lstop;
5db06880 1938#endif
79072805 1939 if (*s == '(')
bbf60fe6 1940 return REPORT(FUNC);
29595ff2 1941 s = PEEKSPACE(s);
79072805 1942 if (*s == '(')
bbf60fe6 1943 return REPORT(FUNC);
78cdf107
Z
1944 else {
1945 lstop:
1946 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1947 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1948 return REPORT(LSTOP);
78cdf107 1949 }
79072805
LW
1950}
1951
5db06880
NC
1952#ifdef PERL_MAD
1953 /*
1954 * S_start_force
1955 * Sets up for an eventual force_next(). start_force(0) basically does
1956 * an unshift, while start_force(-1) does a push. yylex removes items
1957 * on the "pop" end.
1958 */
1959
1960STATIC void
1961S_start_force(pTHX_ int where)
1962{
1963 int i;
1964
cd81e915 1965 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1966 where = PL_lasttoke;
cd81e915
NC
1967 assert(PL_curforce < 0 || PL_curforce == where);
1968 if (PL_curforce != where) {
5db06880
NC
1969 for (i = PL_lasttoke; i > where; --i) {
1970 PL_nexttoke[i] = PL_nexttoke[i-1];
1971 }
1972 PL_lasttoke++;
1973 }
cd81e915 1974 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1975 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1976 PL_curforce = where;
1977 if (PL_nextwhite) {
5db06880 1978 if (PL_madskills)
6b29d1f5 1979 curmad('^', newSVpvs(""));
cd81e915 1980 CURMAD('_', PL_nextwhite);
5db06880
NC
1981 }
1982}
1983
1984STATIC void
1985S_curmad(pTHX_ char slot, SV *sv)
1986{
1987 MADPROP **where;
1988
1989 if (!sv)
1990 return;
cd81e915
NC
1991 if (PL_curforce < 0)
1992 where = &PL_thismad;
5db06880 1993 else
cd81e915 1994 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1995
cd81e915 1996 if (PL_faketokens)
76f68e9b 1997 sv_setpvs(sv, "");
5db06880
NC
1998 else {
1999 if (!IN_BYTES) {
2000 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2001 SvUTF8_on(sv);
2002 else if (PL_encoding) {
2003 sv_recode_to_utf8(sv, PL_encoding);
2004 }
2005 }
2006 }
2007
2008 /* keep a slot open for the head of the list? */
2009 if (slot != '_' && *where && (*where)->mad_key == '^') {
2010 (*where)->mad_key = slot;
daba3364 2011 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
2012 (*where)->mad_val = (void*)sv;
2013 }
2014 else
2015 addmad(newMADsv(slot, sv), where, 0);
2016}
2017#else
b3f24c00
MHM
2018# define start_force(where) NOOP
2019# define curmad(slot, sv) NOOP
5db06880
NC
2020#endif
2021
ffb4593c
NT
2022/*
2023 * S_force_next
9cbb5ea2 2024 * When the lexer realizes it knows the next token (for instance,
ffb4593c 2025 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 2026 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
2027 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2028 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
2029 */
2030
4e553d73 2031STATIC void
cea2e8a9 2032S_force_next(pTHX_ I32 type)
79072805 2033{
97aff369 2034 dVAR;
704d4215
GG
2035#ifdef DEBUGGING
2036 if (DEBUG_T_TEST) {
2037 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 2038 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
2039 }
2040#endif
5db06880 2041#ifdef PERL_MAD
cd81e915 2042 if (PL_curforce < 0)
5db06880 2043 start_force(PL_lasttoke);
cd81e915 2044 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
2045 if (PL_lex_state != LEX_KNOWNEXT)
2046 PL_lex_defer = PL_lex_state;
2047 PL_lex_state = LEX_KNOWNEXT;
2048 PL_lex_expect = PL_expect;
cd81e915 2049 PL_curforce = -1;
5db06880 2050#else
3280af22
NIS
2051 PL_nexttype[PL_nexttoke] = type;
2052 PL_nexttoke++;
2053 if (PL_lex_state != LEX_KNOWNEXT) {
2054 PL_lex_defer = PL_lex_state;
2055 PL_lex_expect = PL_expect;
2056 PL_lex_state = LEX_KNOWNEXT;
79072805 2057 }
5db06880 2058#endif
79072805
LW
2059}
2060
28ac2b49
Z
2061void
2062Perl_yyunlex(pTHX)
2063{
a7aaec61
Z
2064 int yyc = PL_parser->yychar;
2065 if (yyc != YYEMPTY) {
2066 if (yyc) {
2067 start_force(-1);
2068 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2069 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2070 PL_lex_allbrackets--;
a7aaec61 2071 PL_lex_brackets--;
78cdf107
Z
2072 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2073 } else if (yyc == '('/*)*/) {
2074 PL_lex_allbrackets--;
2075 yyc |= (2<<24);
a7aaec61
Z
2076 }
2077 force_next(yyc);
2078 }
28ac2b49
Z
2079 PL_parser->yychar = YYEMPTY;
2080 }
2081}
2082
d0a148a6 2083STATIC SV *
15f169a1 2084S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2085{
97aff369 2086 dVAR;
740cce10 2087 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2088 !IN_BYTES
2089 && UTF
2090 && !is_ascii_string((const U8*)start, len)
740cce10 2091 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2092 return sv;
2093}
2094
ffb4593c
NT
2095/*
2096 * S_force_word
2097 * When the lexer knows the next thing is a word (for instance, it has
2098 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2099 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2100 * lookahead.
ffb4593c
NT
2101 *
2102 * Arguments:
b1b65b59 2103 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2104 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2105 * int check_keyword : if true, Perl checks to make sure the word isn't
2106 * a keyword (do this if the word is a label, e.g. goto FOO)
2107 * int allow_pack : if true, : characters will also be allowed (require,
2108 * use, etc. do this)
9cbb5ea2 2109 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2110 */
2111
76e3520e 2112STATIC char *
5aaab254 2113S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2114{
97aff369 2115 dVAR;
eb578fdb 2116 char *s;
463ee0b2 2117 STRLEN len;
4e553d73 2118
7918f24d
NC
2119 PERL_ARGS_ASSERT_FORCE_WORD;
2120
29595ff2 2121 start = SKIPSPACE1(start);
463ee0b2 2122 s = start;
7e2040f0 2123 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2124 (allow_pack && *s == ':') ||
15f0808c 2125 (allow_initial_tick && *s == '\'') )
a0d0e21e 2126 {
3280af22 2127 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2128 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2129 return start;
cd81e915 2130 start_force(PL_curforce);
5db06880
NC
2131 if (PL_madskills)
2132 curmad('X', newSVpvn(start,s-start));
463ee0b2 2133 if (token == METHOD) {
29595ff2 2134 s = SKIPSPACE1(s);
463ee0b2 2135 if (*s == '(')
3280af22 2136 PL_expect = XTERM;
463ee0b2 2137 else {
3280af22 2138 PL_expect = XOPERATOR;
463ee0b2 2139 }
79072805 2140 }
e74e6b3d 2141 if (PL_madskills)
63575281 2142 curmad('g', newSVpvs( "forced" ));
9ded7720 2143 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2144 = (OP*)newSVOP(OP_CONST,0,
2145 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2146 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2147 force_next(token);
2148 }
2149 return s;
2150}
2151
ffb4593c
NT
2152/*
2153 * S_force_ident
9cbb5ea2 2154 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2155 * text only contains the "foo" portion. The first argument is a pointer
2156 * to the "foo", and the second argument is the type symbol to prefix.
2157 * Forces the next token to be a "WORD".
9cbb5ea2 2158 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2159 */
2160
76e3520e 2161STATIC void
5aaab254 2162S_force_ident(pTHX_ const char *s, int kind)
79072805 2163{
97aff369 2164 dVAR;
7918f24d
NC
2165
2166 PERL_ARGS_ASSERT_FORCE_IDENT;
2167
c9b48522
DD
2168 if (s[0]) {
2169 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
728847b1
BF
2170 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2171 UTF ? SVf_UTF8 : 0));
cd81e915 2172 start_force(PL_curforce);
9ded7720 2173 NEXTVAL_NEXTTOKE.opval = o;
79072805 2174 force_next(WORD);
748a9306 2175 if (kind) {
11343788 2176 o->op_private = OPpCONST_ENTERED;
55497cff 2177 /* XXX see note in pp_entereval() for why we forgo typo
2178 warnings if the symbol must be introduced in an eval.
2179 GSAR 96-10-12 */
90e5519e 2180 gv_fetchpvn_flags(s, len,
728847b1
BF
2181 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2182 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2183 kind == '$' ? SVt_PV :
2184 kind == '@' ? SVt_PVAV :
2185 kind == '%' ? SVt_PVHV :
a0d0e21e 2186 SVt_PVGV
90e5519e 2187 );
748a9306 2188 }
79072805
LW
2189 }
2190}
2191
3f33d153
FC
2192static void
2193S_force_ident_maybe_lex(pTHX_ char pit)
2194{
2195 start_force(PL_curforce);
2196 NEXTVAL_NEXTTOKE.ival = pit;
2197 force_next('p');
2198}
2199
1571675a
GS
2200NV
2201Perl_str_to_version(pTHX_ SV *sv)
2202{
2203 NV retval = 0.0;
2204 NV nshift = 1.0;
2205 STRLEN len;
cfd0369c 2206 const char *start = SvPV_const(sv,len);
9d4ba2ae 2207 const char * const end = start + len;
504618e9 2208 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2209
2210 PERL_ARGS_ASSERT_STR_TO_VERSION;
2211
1571675a 2212 while (start < end) {
ba210ebe 2213 STRLEN skip;
1571675a
GS
2214 UV n;
2215 if (utf)
9041c2e3 2216 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2217 else {
2218 n = *(U8*)start;
2219 skip = 1;
2220 }
2221 retval += ((NV)n)/nshift;
2222 start += skip;
2223 nshift *= 1000;
2224 }
2225 return retval;
2226}
2227
4e553d73 2228/*
ffb4593c
NT
2229 * S_force_version
2230 * Forces the next token to be a version number.
e759cc13
RGS
2231 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2232 * and if "guessing" is TRUE, then no new token is created (and the caller
2233 * must use an alternative parsing method).
ffb4593c
NT
2234 */
2235
76e3520e 2236STATIC char *
e759cc13 2237S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2238{
97aff369 2239 dVAR;
5f66b61c 2240 OP *version = NULL;
44dcb63b 2241 char *d;
5db06880
NC
2242#ifdef PERL_MAD
2243 I32 startoff = s - SvPVX(PL_linestr);
2244#endif
89bfa8cd 2245
7918f24d
NC
2246 PERL_ARGS_ASSERT_FORCE_VERSION;
2247
29595ff2 2248 s = SKIPSPACE1(s);
89bfa8cd 2249
44dcb63b 2250 d = s;
dd629d5b 2251 if (*d == 'v')
44dcb63b 2252 d++;
44dcb63b 2253 if (isDIGIT(*d)) {
e759cc13
RGS
2254 while (isDIGIT(*d) || *d == '_' || *d == '.')
2255 d++;
5db06880
NC
2256#ifdef PERL_MAD
2257 if (PL_madskills) {
cd81e915 2258 start_force(PL_curforce);
5db06880
NC
2259 curmad('X', newSVpvn(s,d-s));
2260 }
2261#endif
4e4da3ac 2262 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2263 SV *ver;
8d08d9ba 2264#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2265 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2266 setlocale(LC_NUMERIC, "C");
8d08d9ba 2267#endif
6154021b 2268 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2269#ifdef USE_LOCALE_NUMERIC
2270 setlocale(LC_NUMERIC, loc);
909d3787 2271 Safefree(loc);
8d08d9ba 2272#endif
6154021b 2273 version = pl_yylval.opval;
dd629d5b
GS
2274 ver = cSVOPx(version)->op_sv;
2275 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2276 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2277 SvNV_set(ver, str_to_version(ver));
1571675a 2278 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2279 }
89bfa8cd 2280 }
5db06880
NC
2281 else if (guessing) {
2282#ifdef PERL_MAD
2283 if (PL_madskills) {
cd81e915
NC
2284 sv_free(PL_nextwhite); /* let next token collect whitespace */
2285 PL_nextwhite = 0;
5db06880
NC
2286 s = SvPVX(PL_linestr) + startoff;
2287 }
2288#endif
e759cc13 2289 return s;
5db06880 2290 }
89bfa8cd 2291 }
2292
5db06880
NC
2293#ifdef PERL_MAD
2294 if (PL_madskills && !version) {
cd81e915
NC
2295 sv_free(PL_nextwhite); /* let next token collect whitespace */
2296 PL_nextwhite = 0;
5db06880
NC
2297 s = SvPVX(PL_linestr) + startoff;
2298 }
2299#endif
89bfa8cd 2300 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2301 start_force(PL_curforce);
9ded7720 2302 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2303 force_next(WORD);
89bfa8cd 2304
e759cc13 2305 return s;
89bfa8cd 2306}
2307
ffb4593c 2308/*
91152fc1
DG
2309 * S_force_strict_version
2310 * Forces the next token to be a version number using strict syntax rules.
2311 */
2312
2313STATIC char *
2314S_force_strict_version(pTHX_ char *s)
2315{
2316 dVAR;
2317 OP *version = NULL;
2318#ifdef PERL_MAD
2319 I32 startoff = s - SvPVX(PL_linestr);
2320#endif
2321 const char *errstr = NULL;
2322
2323 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2324
2325 while (isSPACE(*s)) /* leading whitespace */
2326 s++;
2327
2328 if (is_STRICT_VERSION(s,&errstr)) {
2329 SV *ver = newSV(0);
2330 s = (char *)scan_version(s, ver, 0);
2331 version = newSVOP(OP_CONST, 0, ver);
2332 }
4e4da3ac
Z
2333 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2334 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2335 {
91152fc1
DG
2336 PL_bufptr = s;
2337 if (errstr)
2338 yyerror(errstr); /* version required */
2339 return s;
2340 }
2341
2342#ifdef PERL_MAD
2343 if (PL_madskills && !version) {
2344 sv_free(PL_nextwhite); /* let next token collect whitespace */
2345 PL_nextwhite = 0;
2346 s = SvPVX(PL_linestr) + startoff;
2347 }
2348#endif
2349 /* NOTE: The parser sees the package name and the VERSION swapped */
2350 start_force(PL_curforce);
2351 NEXTVAL_NEXTTOKE.opval = version;
2352 force_next(WORD);
2353
2354 return s;
2355}
2356
2357/*
ffb4593c
NT
2358 * S_tokeq
2359 * Tokenize a quoted string passed in as an SV. It finds the next
2360 * chunk, up to end of string or a backslash. It may make a new
2361 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2362 * turns \\ into \.
2363 */
2364
76e3520e 2365STATIC SV *
cea2e8a9 2366S_tokeq(pTHX_ SV *sv)
79072805 2367{
97aff369 2368 dVAR;
eb578fdb
KW
2369 char *s;
2370 char *send;
2371 char *d;
b3ac6de7
IZ
2372 STRLEN len = 0;
2373 SV *pv = sv;
79072805 2374
7918f24d
NC
2375 PERL_ARGS_ASSERT_TOKEQ;
2376
79072805 2377 if (!SvLEN(sv))
b3ac6de7 2378 goto finish;
79072805 2379
a0d0e21e 2380 s = SvPV_force(sv, len);
21a311ee 2381 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2382 goto finish;
463ee0b2 2383 send = s + len;
dcb21ed6
NC
2384 /* This is relying on the SV being "well formed" with a trailing '\0' */
2385 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2386 s++;
2387 if (s == send)
b3ac6de7 2388 goto finish;
79072805 2389 d = s;
be4731d2 2390 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2391 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2392 }
79072805
LW
2393 while (s < send) {
2394 if (*s == '\\') {
a0d0e21e 2395 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2396 s++; /* all that, just for this */
2397 }
2398 *d++ = *s++;
2399 }
2400 *d = '\0';
95a20fc0 2401 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2402 finish:
3280af22 2403 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2404 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2405 return sv;
2406}
2407
ffb4593c
NT
2408/*
2409 * Now come three functions related to double-quote context,
2410 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2411 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2412 * interact with PL_lex_state, and create fake ( ... ) argument lists
2413 * to handle functions and concatenation.
ecd24171
DM
2414 * For example,
2415 * "foo\lbar"
2416 * is tokenised as
2417 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
ffb4593c
NT
2418 */
2419
2420/*
2421 * S_sublex_start
6154021b 2422 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2423 *
2424 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2425 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2426 *
2427 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2428 *
2429 * Everything else becomes a FUNC.
2430 *
2431 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2432 * had an OP_CONST or OP_READLINE). This just sets us up for a
2433 * call to S_sublex_push().
2434 */
2435
76e3520e 2436STATIC I32
cea2e8a9 2437S_sublex_start(pTHX)
79072805 2438{
97aff369 2439 dVAR;
eb578fdb 2440 const I32 op_type = pl_yylval.ival;
79072805
LW
2441
2442 if (op_type == OP_NULL) {
6154021b 2443 pl_yylval.opval = PL_lex_op;
5f66b61c 2444 PL_lex_op = NULL;
79072805
LW
2445 return THING;
2446 }
2447 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2448 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2449
2450 if (SvTYPE(sv) == SVt_PVIV) {
2451 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2452 STRLEN len;
96a5add6 2453 const char * const p = SvPV_const(sv, len);
740cce10 2454 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2455 SvREFCNT_dec(sv);
2456 sv = nsv;
4e553d73 2457 }
6154021b 2458 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2459 PL_lex_stuff = NULL;
6f33ba73
RGS
2460 /* Allow <FH> // "foo" */
2461 if (op_type == OP_READLINE)
2462 PL_expect = XTERMORDORDOR;
79072805
LW
2463 return THING;
2464 }
e3f73d4e
RGS
2465 else if (op_type == OP_BACKTICK && PL_lex_op) {
2466 /* readpipe() vas overriden */
2467 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2468 pl_yylval.opval = PL_lex_op;
9b201d7d 2469 PL_lex_op = NULL;
e3f73d4e
RGS
2470 PL_lex_stuff = NULL;
2471 return THING;
2472 }
79072805 2473
3280af22 2474 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2475 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2476 PL_sublex_info.sub_op = PL_lex_op;
2477 PL_lex_state = LEX_INTERPPUSH;
55497cff 2478
3280af22
NIS
2479 PL_expect = XTERM;
2480 if (PL_lex_op) {
6154021b 2481 pl_yylval.opval = PL_lex_op;
5f66b61c 2482 PL_lex_op = NULL;
55497cff 2483 return PMFUNC;
2484 }
2485 else
2486 return FUNC;
2487}
2488
ffb4593c
NT
2489/*
2490 * S_sublex_push
2491 * Create a new scope to save the lexing state. The scope will be
2492 * ended in S_sublex_done. Returns a '(', starting the function arguments
2493 * to the uc, lc, etc. found before.
2494 * Sets PL_lex_state to LEX_INTERPCONCAT.
2495 */
2496
76e3520e 2497STATIC I32
cea2e8a9 2498S_sublex_push(pTHX)
55497cff 2499{
27da23d5 2500 dVAR;
78a635de 2501 LEXSHARED *shared;
f46d017c 2502 ENTER;
55497cff 2503
3280af22 2504 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2505 SAVEBOOL(PL_lex_dojoin);
3280af22 2506 SAVEI32(PL_lex_brackets);
78cdf107 2507 SAVEI32(PL_lex_allbrackets);
b27dce25 2508 SAVEI32(PL_lex_formbrack);
78cdf107 2509 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2510 SAVEI32(PL_lex_casemods);
2511 SAVEI32(PL_lex_starts);
651b5b28 2512 SAVEI8(PL_lex_state);
7cc34111 2513 SAVESPTR(PL_lex_repl);
7766f137 2514 SAVEVPTR(PL_lex_inpat);
98246f1e 2515 SAVEI16(PL_lex_inwhat);
57843af0 2516 SAVECOPLINE(PL_curcop);
3280af22 2517 SAVEPPTR(PL_bufptr);
8452ff4b 2518 SAVEPPTR(PL_bufend);
3280af22
NIS
2519 SAVEPPTR(PL_oldbufptr);
2520 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2521 SAVEPPTR(PL_last_lop);
2522 SAVEPPTR(PL_last_uni);
3280af22
NIS
2523 SAVEPPTR(PL_linestart);
2524 SAVESPTR(PL_linestr);
8edd5f42
RGS
2525 SAVEGENERICPV(PL_lex_brackstack);
2526 SAVEGENERICPV(PL_lex_casestack);
78a635de 2527 SAVEGENERICPV(PL_parser->lex_shared);
3a54fd60 2528 SAVEBOOL(PL_parser->lex_re_reparsing);
3280af22 2529
99bd9d90 2530 /* The here-doc parser needs to be able to peek into outer lexing
60f40a38
FC
2531 scopes to find the body of the here-doc. So we put PL_linestr and
2532 PL_bufptr into lex_shared, to ‘share’ those values.
99bd9d90 2533 */
60f40a38
FC
2534 PL_parser->lex_shared->ls_linestr = PL_linestr;
2535 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
99bd9d90 2536
3280af22 2537 PL_linestr = PL_lex_stuff;
7cc34111 2538 PL_lex_repl = PL_sublex_info.repl;
a0714e2c 2539 PL_lex_stuff = NULL;
7cc34111 2540 PL_sublex_info.repl = NULL;
3280af22 2541
9cbb5ea2
GS
2542 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2543 = SvPVX(PL_linestr);
3280af22 2544 PL_bufend += SvCUR(PL_linestr);
bd61b366 2545 PL_last_lop = PL_last_uni = NULL;
3280af22 2546 SAVEFREESV(PL_linestr);
4dc843bc 2547 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
3280af22
NIS
2548
2549 PL_lex_dojoin = FALSE;
b27dce25 2550 PL_lex_brackets = PL_lex_formbrack = 0;
78cdf107
Z
2551 PL_lex_allbrackets = 0;
2552 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2553 Newx(PL_lex_brackstack, 120, char);
2554 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2555 PL_lex_casemods = 0;
2556 *PL_lex_casestack = '\0';
2557 PL_lex_starts = 0;
2558 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2559 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
78a635de
FC
2560
2561 Newxz(shared, 1, LEXSHARED);
2562 shared->ls_prev = PL_parser->lex_shared;
2563 PL_parser->lex_shared = shared;
3280af22
NIS
2564
2565 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2566 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2567 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2568 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2569 else
5f66b61c 2570 PL_lex_inpat = NULL;
79072805 2571
3a54fd60
DM
2572 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2573 PL_in_eval &= ~EVAL_RE_REPARSING;
2574
55497cff 2575 return '(';
79072805
LW
2576}
2577
ffb4593c
NT
2578/*
2579 * S_sublex_done
2580 * Restores lexer state after a S_sublex_push.
2581 */
2582
76e3520e 2583STATIC I32
cea2e8a9 2584S_sublex_done(pTHX)
79072805 2585{
27da23d5 2586 dVAR;
3280af22 2587 if (!PL_lex_starts++) {
396482e1 2588 SV * const sv = newSVpvs("");
9aa983d2
JH
2589 if (SvUTF8(PL_linestr))
2590 SvUTF8_on(sv);
3280af22 2591 PL_expect = XOPERATOR;
6154021b 2592 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2593 return THING;
2594 }
2595
3280af22
NIS
2596 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2597 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2598 return yylex();
79072805
LW
2599 }
2600
ffb4593c 2601 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2602 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2603 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2604 PL_linestr = PL_lex_repl;
2605 PL_lex_inpat = 0;
2606 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2607 PL_bufend += SvCUR(PL_linestr);
bd61b366 2608 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2609 PL_lex_dojoin = FALSE;
2610 PL_lex_brackets = 0;
78cdf107
Z
2611 PL_lex_allbrackets = 0;
2612 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2613 PL_lex_casemods = 0;
2614 *PL_lex_casestack = '\0';
2615 PL_lex_starts = 0;
25da4f38 2616 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2617 PL_lex_state = LEX_INTERPNORMAL;
2618 PL_lex_starts++;
e9fa98b2
HS
2619 /* we don't clear PL_lex_repl here, so that we can check later
2620 whether this is an evalled subst; that means we rely on the
2621 logic to ensure sublex_done() is called again only via the
2622 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2623 }
e9fa98b2 2624 else {
3280af22 2625 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2626 PL_lex_repl = NULL;
e9fa98b2 2627 }
79072805 2628 return ',';
ffed7fef
LW
2629 }
2630 else {
5db06880
NC
2631#ifdef PERL_MAD
2632 if (PL_madskills) {
cd81e915
NC
2633 if (PL_thiswhite) {
2634 if (!PL_endwhite)
6b29d1f5 2635 PL_endwhite = newSVpvs("");
cd81e915
NC
2636 sv_catsv(PL_endwhite, PL_thiswhite);
2637 PL_thiswhite = 0;
2638 }
2639 if (PL_thistoken)
76f68e9b 2640 sv_setpvs(PL_thistoken,"");
5db06880 2641 else
cd81e915 2642 PL_realtokenstart = -1;
5db06880
NC
2643 }
2644#endif
f46d017c 2645 LEAVE;
3280af22
NIS
2646 PL_bufend = SvPVX(PL_linestr);
2647 PL_bufend += SvCUR(PL_linestr);
2648 PL_expect = XOPERATOR;
09bef843 2649 PL_sublex_info.sub_inwhat = 0;
79072805 2650 return ')';
ffed7fef
LW
2651 }
2652}
2653
6f613c73
KW
2654PERL_STATIC_INLINE SV*
2655S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2656{
140b12ad
KW
2657 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2658 * interior, hence to the "}". Finds what the name resolves to, returning
2659 * an SV* containing it; NULL if no valid one found */
2660
dd2b1b72 2661 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
6f613c73 2662
0c415a79
KW
2663 HV * table;
2664 SV **cvp;
2665 SV *cv;
2666 SV *rv;
2667 HV *stash;
2668 const U8* first_bad_char_loc;
2669 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2670
6f613c73
KW
2671 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2672
107160e2
KW
2673 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2674 e - backslash_ptr,
2675 &first_bad_char_loc))
2676 {
2677 /* If warnings are on, this will print a more detailed analysis of what
2678 * is wrong than the error message below */
2679 utf8n_to_uvuni(first_bad_char_loc,
2680 e - ((char *) first_bad_char_loc),
2681 NULL, 0);
2682
2683 /* We deliberately don't try to print the malformed character, which
2684 * might not print very well; it also may be just the first of many
2685 * malformations, so don't print what comes after it */
2686 yyerror(Perl_form(aTHX_
2687 "Malformed UTF-8 character immediately after '%.*s'",
2688 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2689 return NULL;
2690 }
2691
2692 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2693 /* include the <}> */
2694 e - backslash_ptr + 1);
6f613c73 2695 if (! SvPOK(res)) {
b6407c49 2696 SvREFCNT_dec_NN(res);
6f613c73
KW
2697 return NULL;
2698 }
2699
0c415a79
KW
2700 /* See if the charnames handler is the Perl core's, and if so, we can skip
2701 * the validation needed for a user-supplied one, as Perl's does its own
2702 * validation. */
2703 table = GvHV(PL_hintgv); /* ^H */
2704 cvp = hv_fetchs(table, "charnames", FALSE);
67a057d6
FC
2705 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2706 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
0c415a79
KW
2707 {
2708 const char * const name = HvNAME(stash);
2709 if strEQ(name, "_charnames") {
2710 return res;
2711 }
2712 }
2713
bde9e88d
KW
2714 /* Here, it isn't Perl's charname handler. We can't rely on a
2715 * user-supplied handler to validate the input name. For non-ut8 input,
2716 * look to see that the first character is legal. Then loop through the
2717 * rest checking that each is a continuation */
6f613c73 2718
b6ba1137
KW
2719 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2720 * the same thing */
2721
b6ba1137 2722 if (! UTF) {
bde9e88d 2723 if (! isALPHAU(*s)) {
b6ba1137
KW
2724 goto bad_charname;
2725 }
bde9e88d
KW
2726 s++;
2727 while (s < e) {
2728 if (! isCHARNAME_CONT(*s)) {
b6ba1137
KW
2729 goto bad_charname;
2730 }
3036c853 2731 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2732 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2733 "A sequence of multiple spaces in a charnames "
2734 "alias definition is deprecated");
bd299e29 2735 }
bde9e88d 2736 s++;
b6ba1137 2737 }
3036c853 2738 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2739 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2740 "Trailing white-space in a charnames alias "
2741 "definition is deprecated");
bd299e29 2742 }
b6ba1137
KW
2743 }
2744 else {
bde9e88d
KW
2745 /* Similarly for utf8. For invariants can check directly; for other
2746 * Latin1, can calculate their code point and check; otherwise use a
2747 * swash */
2748 if (UTF8_IS_INVARIANT(*s)) {
2749 if (! isALPHAU(*s)) {
140b12ad
KW
2750 goto bad_charname;
2751 }
bde9e88d
KW
2752 s++;
2753 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2754 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
b6ba1137 2755 goto bad_charname;
6f613c73 2756 }
bde9e88d 2757 s += 2;
6f613c73 2758 }
bde9e88d
KW
2759 else {
2760 if (! PL_utf8_charname_begin) {
2761 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2762 PL_utf8_charname_begin = _core_swash_init("utf8",
2763 "_Perl_Charname_Begin",
2764 &PL_sv_undef,
2765 1, 0, NULL, &flags);
2766 }
2767 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2768 goto bad_charname;
2769 }
2770 s += UTF8SKIP(s);
2771 }
2772
2773 while (s < e) {
2774 if (UTF8_IS_INVARIANT(*s)) {
2775 if (! isCHARNAME_CONT(*s)) {
2776 goto bad_charname;
2777 }
3036c853
FC
2778 if (*s == ' ' && *(s-1) == ' '
2779 && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2780 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2781 "A sequence of multiple spaces in a charnam"
2782 "es alias definition is deprecated");
bd299e29 2783 }
bde9e88d
KW
2784 s++;
2785 }
2786 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2787 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2788 *(s+1)))))
2789 {
2790 goto bad_charname;
2791 }
2792 s += 2;
2793 }
2794 else {
2795 if (! PL_utf8_charname_continue) {
2796 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2797 PL_utf8_charname_continue = _core_swash_init("utf8",
2798 "_Perl_Charname_Continue",
2799 &PL_sv_undef,
2800 1, 0, NULL, &flags);
2801 }
2802 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2803 goto bad_charname;
2804 }
2805 s += UTF8SKIP(s);
6f613c73
KW
2806 }
2807 }
3036c853 2808 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2809 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2810 "Trailing white-space in a charnames alias "
2811 "definition is deprecated");
bd299e29 2812 }
6f613c73
KW
2813 }
2814
94ca1619 2815 if (SvUTF8(res)) { /* Don't accept malformed input */
bde9e88d
KW
2816 const U8* first_bad_char_loc;
2817 STRLEN len;
2818 const char* const str = SvPV_const(res, len);
2819 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2820 /* If warnings are on, this will print a more detailed analysis of
2821 * what is wrong than the error message below */
2822 utf8n_to_uvuni(first_bad_char_loc,
2823 (char *) first_bad_char_loc - str,
2824 NULL, 0);
2825
2826 /* We deliberately don't try to print the malformed character,
2827 * which might not print very well; it also may be just the first
2828 * of many malformations, so don't print what comes after it */
2829 yyerror_pv(
2830 Perl_form(aTHX_
2831 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2832 (int) (e - backslash_ptr + 1), backslash_ptr,
2833 (int) ((char *) first_bad_char_loc - str), str
2834 ),
2835 SVf_UTF8);
2836 return NULL;
2837 }
2838 }
140b12ad 2839
bde9e88d 2840 return res;
140b12ad 2841
bde9e88d
KW
2842 bad_charname: {
2843 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2844
2845 /* The final %.*s makes sure that should the trailing NUL be missing
2846 * that this print won't run off the end of the string */
2847 yyerror_pv(
2848 Perl_form(aTHX_
2849 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2850 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2851 (int)(e - s + bad_char_size), s + bad_char_size
2852 ),
2853 UTF ? SVf_UTF8 : 0);
2854 return NULL;
2855 }
6f613c73
KW
2856}
2857
02aa26ce
NT
2858/*
2859 scan_const
2860
9da1dd8f
DM
2861 Extracts the next constant part of a pattern, double-quoted string,
2862 or transliteration. This is terrifying code.
2863
2864 For example, in parsing the double-quoted string "ab\x63$d", it would
2865 stop at the '$' and return an OP_CONST containing 'abc'.
02aa26ce 2866
94def140 2867 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2868 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2869 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2870
94def140
TS
2871 Returns a pointer to the character scanned up to. If this is
2872 advanced from the start pointer supplied (i.e. if anything was
9da1dd8f 2873 successfully parsed), will leave an OP_CONST for the substring scanned
6154021b 2874 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2875 by looking at the next characters herself.
2876
02aa26ce 2877 In patterns:
9da1dd8f 2878 expand:
537124e4
KW
2879 \N{FOO} => \N{U+hex_for_character_FOO}
2880 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
9da1dd8f
DM
2881
2882 pass through:
2883 all other \-char, including \N and \N{ apart from \N{ABC}
2884
2885 stops on:
2886 @ and $ where it appears to be a var, but not for $ as tail anchor
2887 \l \L \u \U \Q \E
2888 (?{ or (??{
2889
02aa26ce
NT
2890
2891 In transliterations:
2892 characters are VERY literal, except for - not at the start or end
94def140
TS
2893 of the string, which indicates a range. If the range is in bytes,
2894 scan_const expands the range to the full set of intermediate
2895 characters. If the range is in utf8, the hyphen is replaced with
2896 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2897
2898 In double-quoted strings:
2899 backslashes:
2900 double-quoted style: \r and \n
ff3f963a 2901 constants: \x31, etc.
94def140 2902 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2903 case and quoting: \U \Q \E
2904 stops on @ and $
2905
2906 scan_const does *not* construct ops to handle interpolated strings.
2907 It stops processing as soon as it finds an embedded $ or @ variable
2908 and leaves it to the caller to work out what's going on.
2909
94def140
TS
2910 embedded arrays (whether in pattern or not) could be:
2911 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2912
2913 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2914
2915 $ in pattern could be $foo or could be tail anchor. Assumption:
2916 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2917 followed by one of "()| \r\n\t"
02aa26ce 2918
9da1dd8f 2919 \1 (backreferences) are turned into $1 in substitutions
02aa26ce
NT
2920
2921 The structure of the code is
2922 while (there's a character to process) {
94def140
TS
2923 handle transliteration ranges
2924 skip regexp comments /(?#comment)/ and codes /(?{code})/
2925 skip #-initiated comments in //x patterns
2926 check for embedded arrays
02aa26ce
NT
2927 check for embedded scalars
2928 if (backslash) {
94def140 2929 deprecate \1 in substitution replacements
02aa26ce
NT
2930 handle string-changing backslashes \l \U \Q \E, etc.
2931 switch (what was escaped) {
94def140 2932 handle \- in a transliteration (becomes a literal -)
ff3f963a 2933 if a pattern and not \N{, go treat as regular character
94def140
TS
2934 handle \132 (octal characters)
2935 handle \x15 and \x{1234} (hex characters)
ff3f963a 2936 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2937 handle \cV (control characters)
2938 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2939 } (end switch)
77a135fe 2940 continue
02aa26ce 2941 } (end if backslash)
77a135fe 2942 handle regular character
02aa26ce 2943 } (end while character to read)
4e553d73 2944
02aa26ce
NT
2945*/
2946
76e3520e 2947STATIC char *
cea2e8a9 2948S_scan_const(pTHX_ char *start)
79072805 2949{
97aff369 2950 dVAR;
eb578fdb 2951 char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2952 SV *sv = newSV(send - start); /* sv for the constant. See
2953 note below on sizing. */
eb578fdb
KW
2954 char *s = start; /* start of the constant */
2955 char *d = SvPVX(sv); /* destination for copies */
02aa26ce 2956 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2957 bool didrange = FALSE; /* did we just finish a range? */
2866decb 2958 bool in_charclass = FALSE; /* within /[...]/ */
b953e60c
KW
2959 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2960 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2961 to be UTF8? But, this can
2962 show as true when the source
2963 isn't utf8, as for example
2964 when it is entirely composed
2965 of hex constants */
6f613c73 2966 SV *res; /* result from charnames */
77a135fe
KW
2967
2968 /* Note on sizing: The scanned constant is placed into sv, which is
2969 * initialized by newSV() assuming one byte of output for every byte of
2970 * input. This routine expects newSV() to allocate an extra byte for a
2971 * trailing NUL, which this routine will append if it gets to the end of
2972 * the input. There may be more bytes of input than output (eg., \N{LATIN
2973 * CAPITAL LETTER A}), or more output than input if the constant ends up
2974 * recoded to utf8, but each time a construct is found that might increase
2975 * the needed size, SvGROW() is called. Its size parameter each time is
2976 * based on the best guess estimate at the time, namely the length used so
2977 * far, plus the length the current construct will occupy, plus room for
2978 * the trailing NUL, plus one byte for every input byte still unscanned */
2979
c3320c2a
KW
2980 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
2981 before set */
4c3a8340
TS
2982#ifdef EBCDIC
2983 UV literal_endpoint = 0;
e294cc5d 2984 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2985#endif
012bcf8d 2986
7918f24d
NC
2987 PERL_ARGS_ASSERT_SCAN_CONST;
2988
bb16bae8 2989 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2990 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2991 /* If we are doing a trans and we know we want UTF8 set expectation */
2992 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2993 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2994 }
2995
b899e89d
FC
2996 /* Protect sv from errors and fatal warnings. */
2997 ENTER_with_name("scan_const");
2998 SAVEFREESV(sv);
2b9d42f0 2999
79072805 3000 while (s < send || dorange) {
ff3f963a 3001
02aa26ce 3002 /* get transliterations out of the way (they're most literal) */
3280af22 3003 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 3004 /* expand a range A-Z to the full set of characters. AIE! */
79072805 3005 if (dorange) {
1ba5c669
JH
3006 I32 i; /* current expanded character */
3007 I32 min; /* first character in range */
3008 I32 max; /* last character in range */
02aa26ce 3009
e294cc5d
JH
3010#ifdef EBCDIC
3011 UV uvmax = 0;
3012#endif
3013
3014 if (has_utf8
3015#ifdef EBCDIC
3016 && !native_range
3017#endif
1953db30 3018 ) {
9d4ba2ae 3019 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
3020 char *e = d++;
3021 while (e-- > c)
3022 *(e + 1) = *e;
25716404 3023 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
3024 /* mark the range as done, and continue */
3025 dorange = FALSE;
3026 didrange = TRUE;
3027 continue;
3028 }
2b9d42f0 3029
95a20fc0 3030 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
3031#ifdef EBCDIC
3032 SvGROW(sv,
3033 SvLEN(sv) + (has_utf8 ?
3034 (512 - UTF_CONTINUATION_MARK +
3035 UNISKIP(0x100))
3036 : 256));
3037 /* How many two-byte within 0..255: 128 in UTF-8,
3038 * 96 in UTF-8-mod. */
3039#else
9cbb5ea2 3040 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 3041#endif
9cbb5ea2 3042 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
3043#ifdef EBCDIC
3044 if (has_utf8) {
3045 int j;
3046 for (j = 0; j <= 1; j++) {
3047 char * const c = (char*)utf8_hop((U8*)d, -1);
3048 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3049 if (j)
3050 min = (U8)uv;
3051 else if (uv < 256)
3052 max = (U8)uv;
3053 else {
3054 max = (U8)0xff; /* only to \xff */
3055 uvmax = uv; /* \x{100} to uvmax */
3056 }
3057 d = c; /* eat endpoint chars */
3058 }
3059 }
3060 else {
3061#endif
3062 d -= 2; /* eat the first char and the - */
3063 min = (U8)*d; /* first char in range */
3064 max = (U8)d[1]; /* last char in range */
3065#ifdef EBCDIC
3066 }
3067#endif
8ada0baa 3068
c2e66d9e 3069 if (min > max) {
01ec43d0 3070 Perl_croak(aTHX_
d1573ac7 3071 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 3072 (char)min, (char)max);
c2e66d9e
GS
3073 }
3074
c7f1f016 3075#ifdef EBCDIC
4c3a8340
TS
3076 if (literal_endpoint == 2 &&
3077 ((isLOWER(min) && isLOWER(max)) ||
3078 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
3079 if (isLOWER(min)) {
3080 for (i = min; i <= max; i++)
3081 if (isLOWER(i))
db42d148 3082 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3083 } else {
3084 for (i = min; i <= max; i++)
3085 if (isUPPER(i))
db42d148 3086 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3087 }
3088 }
3089 else
3090#endif
3091 for (i = min; i <= max; i++)
e294cc5d
JH
3092#ifdef EBCDIC
3093 if (has_utf8) {
3094 const U8 ch = (U8)NATIVE_TO_UTF(i);
3095 if (UNI_IS_INVARIANT(ch))
3096 *d++ = (U8)i;
3097 else {
3098 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3099 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3100 }
3101 }
3102 else
3103#endif
3104 *d++ = (char)i;
3105
3106#ifdef EBCDIC
3107 if (uvmax) {
3108 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3109 if (uvmax > 0x101)
3110 *d++ = (char)UTF_TO_NATIVE(0xff);
3111 if (uvmax > 0x100)
3112 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3113 }
3114#endif
02aa26ce
NT
3115
3116 /* mark the range as done, and continue */
79072805 3117 dorange = FALSE;
01ec43d0 3118 didrange = TRUE;
4c3a8340
TS
3119#ifdef EBCDIC
3120 literal_endpoint = 0;
3121#endif
79072805 3122 continue;
4e553d73 3123 }
02aa26ce
NT
3124
3125 /* range begins (ignore - as first or last char) */
79072805 3126 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 3127 if (didrange) {
1fafa243 3128 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 3129 }
e294cc5d
JH
3130 if (has_utf8
3131#ifdef EBCDIC
3132 && !native_range
3133#endif
3134 ) {
25716404 3135 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
3136 s++;
3137 continue;
3138 }
79072805
LW
3139 dorange = TRUE;
3140 s++;
01ec43d0
GS
3141 }
3142 else {
3143 didrange = FALSE;
4c3a8340
TS
3144#ifdef EBCDIC
3145 literal_endpoint = 0;
e294cc5d 3146 native_range = TRUE;
4c3a8340 3147#endif
01ec43d0 3148 }
79072805 3149 }
02aa26ce
NT
3150
3151 /* if we get here, we're not doing a transliteration */
3152
e4a2df84
DM
3153 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3154 char *s1 = s-1;
3155 int esc = 0;
3156 while (s1 >= start && *s1-- == '\\')
3157 esc = !esc;
3158 if (!esc)
3159 in_charclass = TRUE;
3160 }
2866decb 3161
e4a2df84
DM
3162 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3163 char *s1 = s-1;
3164 int esc = 0;
3165 while (s1 >= start && *s1-- == '\\')
3166 esc = !esc;
3167 if (!esc)
3168 in_charclass = FALSE;
3169 }
2866decb 3170
9da1dd8f
DM
3171 /* skip for regexp comments /(?#comment)/, except for the last
3172 * char, which will be done separately.
3173 * Stop on (?{..}) and friends */
3174
3280af22 3175 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 3176 if (s[2] == '#') {
e994fd66 3177 while (s+1 < send && *s != ')')
db42d148 3178 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94 3179 }
2866decb 3180 else if (!PL_lex_casemods && !in_charclass &&
d3cec5e5
DM
3181 ( s[2] == '{' /* This should match regcomp.c */
3182 || (s[2] == '?' && s[3] == '{')))
155aba94 3183 {
9da1dd8f 3184 break;
cc6b7395 3185 }
748a9306 3186 }
02aa26ce
NT
3187
3188 /* likewise skip #-initiated comments in //x patterns */
3280af22 3189 else if (*s == '#' && PL_lex_inpat &&
73134a2e 3190 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 3191 while (s+1 < send && *s != '\n')
db42d148 3192 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 3193 }
02aa26ce 3194
9da1dd8f
DM
3195 /* no further processing of single-quoted regex */
3196 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3197 goto default_action;
3198
5d1d4326 3199 /* check for embedded arrays
da6eedaa 3200 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 3201 */
1749ea0d 3202 else if (*s == '@' && s[1]) {
8a2bca12 3203 if (isWORDCHAR_lazy_if(s+1,UTF))
1749ea0d
TS
3204 break;
3205 if (strchr(":'{$", s[1]))
3206 break;
3207 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3208 break; /* in regexp, neither @+ nor @- are interpolated */
3209 }
02aa26ce
NT
3210
3211 /* check for embedded scalars. only stop if we're sure it's a
3212 variable.
3213 */
79072805 3214 else if (*s == '$') {
3280af22 3215 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 3216 break;
77772344 3217 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
3218 if (s[1] == '\\') {
3219 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3220 "Possible unintended interpolation of $\\ in regex");
77772344 3221 }
79072805 3222 break; /* in regexp, $ might be tail anchor */
77772344 3223 }
79072805 3224 }
02aa26ce 3225
2b9d42f0
NIS
3226 /* End of else if chain - OP_TRANS rejoin rest */
3227
02aa26ce 3228 /* backslashes */
79072805 3229 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
3230 char* e; /* Can be used for ending '}', etc. */
3231
79072805 3232 s++;
02aa26ce 3233
7d0fc23c
KW
3234 /* warn on \1 - \9 in substitution replacements, but note that \11
3235 * is an octal; and \19 is \1 followed by '9' */
3280af22 3236 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 3237 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 3238 {
a2a5de95 3239 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
3240 *--s = '$';
3241 break;
3242 }
02aa26ce
NT
3243
3244 /* string-change backslash escapes */
838f2281 3245 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
3246 --s;
3247 break;
3248 }
ff3f963a
KW
3249 /* In a pattern, process \N, but skip any other backslash escapes.
3250 * This is because we don't want to translate an escape sequence
3251 * into a meta symbol and have the regex compiler use the meta
3252 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3253 * in spite of this, we do have to process \N here while the proper
3254 * charnames handler is in scope. See bugs #56444 and #62056.
3255 * There is a complication because \N in a pattern may also stand
3256 * for 'match a non-nl', and not mean a charname, in which case its
3257 * processing should be deferred to the regex compiler. To be a
3258 * charname it must be followed immediately by a '{', and not look
3259 * like \N followed by a curly quantifier, i.e., not something like
3260 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3261 * quantifier */
3262 else if (PL_lex_inpat
3263 && (*s != 'N'
3264 || s[1] != '{'
4d68ffa0 3265 || regcurly(s + 1, FALSE)))
ff3f963a 3266 {
cc74c5bd
TS
3267 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3268 goto default_action;
3269 }
02aa26ce 3270
79072805 3271 switch (*s) {
02aa26ce
NT
3272
3273 /* quoted - in transliterations */
79072805 3274 case '-':
3280af22 3275 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
3276 *d++ = *s++;
3277 continue;
3278 }
3279 /* FALL THROUGH */
3280 default:
11b8faa4 3281 {
15861f94 3282 if ((isALPHANUMERIC(*s)))
a2a5de95
NC
3283 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3284 "Unrecognized escape \\%c passed through",
3285 *s);
11b8faa4 3286 /* default action is to copy the quoted character */
f9a63242 3287 goto default_action;
11b8faa4 3288 }
02aa26ce 3289
632403cc 3290 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
3291 case '0': case '1': case '2': case '3':
3292 case '4': case '5': case '6': case '7':
ba210ebe 3293 {
5e0a247b 3294 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
53305cf1 3295 STRLEN len = 3;
77a135fe 3296 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe 3297 s += len;
5e0a247b
KW
3298 if (len < 3 && s < send && isDIGIT(*s)
3299 && ckWARN(WARN_MISC))
3300 {
3301 Perl_warner(aTHX_ packWARN(WARN_MISC),
3302 "%s", form_short_octal_warning(s, len));
3303 }
ba210ebe 3304 }
012bcf8d 3305 goto NUM_ESCAPE_INSERT;
02aa26ce 3306
f0a2b745
KW
3307 /* eg. \o{24} indicates the octal constant \024 */
3308 case 'o':
3309 {
454155d9 3310 const char* error;
f0a2b745 3311
00ce5563 3312 bool valid = grok_bslash_o(&s, &uv, &error,
80f4111b
KW
3313 TRUE, /* Output warning */
3314 FALSE, /* Not strict */
17896a85
KW
3315 TRUE, /* Output warnings for
3316 non-portables */
80f4111b 3317 UTF);
454155d9 3318 if (! valid) {
f0a2b745
KW
3319 yyerror(error);
3320 continue;
3321 }
3322 goto NUM_ESCAPE_INSERT;
3323 }
3324
77a135fe 3325 /* eg. \x24 indicates the hex constant 0x24 */
79072805 3326 case 'x':
a0481293 3327 {
a0481293 3328 const char* error;
355860ce 3329
00ce5563 3330 bool valid = grok_bslash_x(&s, &uv, &error,
80f4111b
KW
3331 TRUE, /* Output warning */
3332 FALSE, /* Not strict */
17896a85
KW
3333 TRUE, /* Output warnings for
3334 non-portables */
80f4111b 3335 UTF);
a0481293
KW
3336 if (! valid) {
3337 yyerror(error);
355860ce 3338 continue;
ba210ebe 3339 }
012bcf8d
GS
3340 }
3341
3342 NUM_ESCAPE_INSERT:
ff3f963a
KW
3343 /* Insert oct or hex escaped character. There will always be
3344 * enough room in sv since such escapes will be longer than any
3345 * UTF-8 sequence they can end up as, except if they force us
3346 * to recode the rest of the string into utf8 */
ba7cea30 3347
77a135fe 3348 /* Here uv is the ordinal of the next character being added in
ff3f963a 3349 * unicode (converted from native). */
77a135fe 3350 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3351 if (!has_utf8 && uv > 255) {
77a135fe
KW
3352 /* Might need to recode whatever we have accumulated so
3353 * far if it contains any chars variant in utf8 or
3354 * utf-ebcdic. */
3355
3356 SvCUR_set(sv, d - SvPVX_const(sv));
3357 SvPOK_on(sv);
3358 *d = '\0';
77a135fe 3359 /* See Note on sizing above. */
7bf79863
KW
3360 sv_utf8_upgrade_flags_grow(sv,
3361 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3362 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3363 d = SvPVX(sv) + SvCUR(sv);
3364 has_utf8 = TRUE;
012bcf8d
GS
3365 }
3366
77a135fe
KW
3367 if (has_utf8) {
3368 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3369 if (PL_lex_inwhat == OP_TRANS &&
3370 PL_sublex_info.sub_op) {
3371 PL_sublex_info.sub_op->op_private |=
3372 (PL_lex_repl ? OPpTRANS_FROM_UTF
3373 : OPpTRANS_TO_UTF);
f9a63242 3374 }
e294cc5d
JH
3375#ifdef EBCDIC
3376 if (uv > 255 && !dorange)
3377 native_range = FALSE;
3378#endif
012bcf8d 3379 }
a0ed51b3 3380 else {
012bcf8d 3381 *d++ = (char)uv;
a0ed51b3 3382 }
012bcf8d
GS
3383 }
3384 else {
c4d5f83a 3385 *d++ = (char) uv;
a0ed51b3 3386 }
79072805 3387 continue;
02aa26ce 3388
4a2d328f 3389 case 'N':
ff3f963a
KW
3390 /* In a non-pattern \N must be a named character, like \N{LATIN
3391 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3392 * mean to match a non-newline. For non-patterns, named
3393 * characters are converted to their string equivalents. In
3394 * patterns, named characters are not converted to their
3395 * ultimate forms for the same reasons that other escapes
3396 * aren't. Instead, they are converted to the \N{U+...} form
3397 * to get the value from the charnames that is in effect right
3398 * now, while preserving the fact that it was a named character
3399 * so that the regex compiler knows this */
3400
3401 /* This section of code doesn't generally use the
3402 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3403 * a close examination of this macro and determined it is a
3404 * no-op except on utfebcdic variant characters. Every
3405 * character generated by this that would normally need to be
3406 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3407 * needed, and would complicate use of copy(). XXX There are
3408 * other parts of this file where the macro is used
3409 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3410
3411 /* The structure of this section of code (besides checking for
3412 * errors and upgrading to utf8) is:
3413 * Further disambiguate between the two meanings of \N, and if
3414 * not a charname, go process it elsewhere
0a96133f
KW
3415 * If of form \N{U+...}, pass it through if a pattern;
3416 * otherwise convert to utf8
3417 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3418 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3419
3420 /* Here, s points to the 'N'; the test below is guaranteed to
3421 * succeed if we are being called on a pattern as we already
3422 * know from a test above that the next character is a '{'.
3423 * On a non-pattern \N must mean 'named sequence, which
3424 * requires braces */
3425 s++;
3426 if (*s != '{') {
3427 yyerror("Missing braces on \\N{}");
3428 continue;
3429 }
3430 s++;
3431
0a96133f 3432 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3433 if (! (e = strchr(s, '}'))) {
3434 if (! PL_lex_inpat) {
5777a3f7 3435 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3436 } else {
3437 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3438 }
0a96133f 3439 continue;
ff3f963a 3440 }
cddc7ef4 3441
ff3f963a 3442 /* Here it looks like a named character */
cddc7ef4 3443
ff3f963a
KW
3444 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3445 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3446 | PERL_SCAN_DISALLOW_PREFIX;
3447 STRLEN len;
3448
3449 /* For \N{U+...}, the '...' is a unicode value even on
3450 * EBCDIC machines */
3451 s += 2; /* Skip to next char after the 'U+' */
3452 len = e - s;
3453 uv = grok_hex(s, &len, &flags, NULL);
3454 if (len == 0 || len != (STRLEN)(e - s)) {
3455 yyerror("Invalid hexadecimal number in \\N{U+...}");
3456 s = e + 1;
3457 continue;
3458 }
3459
3460 if (PL_lex_inpat) {
3461
e2a7e165
KW
3462 /* On non-EBCDIC platforms, pass through to the regex
3463 * compiler unchanged. The reason we evaluated the
3464 * number above is to make sure there wasn't a syntax
3465 * error. But on EBCDIC we convert to native so
3466 * downstream code can continue to assume it's native
3467 */
ff3f963a 3468 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3469#ifdef EBCDIC
3470 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3471 and the \0 */
3472 "\\N{U+%X}",
3473 (unsigned int) UNI_TO_NATIVE(uv));
3474#else
ff3f963a
KW
3475 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3476 d += e - s + 1;
e2a7e165 3477#endif
ff3f963a
KW
3478 }
3479 else { /* Not a pattern: convert the hex to string */
3480
3481 /* If destination is not in utf8, unconditionally
3482 * recode it to be so. This is because \N{} implies
3483 * Unicode semantics, and scalars have to be in utf8
3484 * to guarantee those semantics */
3485 if (! has_utf8) {
3486 SvCUR_set(sv, d - SvPVX_const(sv));
3487 SvPOK_on(sv);
3488 *d = '\0';
3489 /* See Note on sizing above. */
3490 sv_utf8_upgrade_flags_grow(
3491 sv,
3492 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3493 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3494 d = SvPVX(sv) + SvCUR(sv);
3495 has_utf8 = TRUE;
3496 }
3497
3498 /* Add the string to the output */
3499 if (UNI_IS_INVARIANT(uv)) {
3500 *d++ = (char) uv;
3501 }
3502 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3503 }
3504 }
6f613c73
KW
3505 else /* Here is \N{NAME} but not \N{U+...}. */
3506 if ((res = get_and_check_backslash_N_name(s, e)))
3507 {
3508 STRLEN len;
3509 const char *str = SvPV_const(res, len);
3510 if (PL_lex_inpat) {
ff3f963a
KW
3511
3512 if (! len) { /* The name resolved to an empty string */
3513 Copy("\\N{}", d, 4, char);
3514 d += 4;
3515 }
3516 else {
3517 /* In order to not lose information for the regex
3518 * compiler, pass the result in the specially made
3519 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3520 * the code points in hex of each character
3521 * returned by charnames */
3522
3523 const char *str_end = str + len;
3b721c4f 3524 const STRLEN off = d - SvPVX_const(sv);
94ca1619
KW
3525
3526 if (! SvUTF8(res)) {
3527 /* For the non-UTF-8 case, we can determine the
3528 * exact length needed without having to parse
3529 * through the string. Each character takes up
3530 * 2 hex digits plus either a trailing dot or
3531 * the "}" */
3532 d = off + SvGROW(sv, off
3533 + 3 * len
3534 + 6 /* For the "\N{U+", and
3535 trailing NUL */
3536 + (STRLEN)(send - e));
3537 Copy("\\N{U+", d, 5, char);
3538 d += 5;
3539 while (str < str_end) {
3540 char hex_string[4];
3541 my_snprintf(hex_string, sizeof(hex_string),
3542 "%02X.", (U8) *str);
3543 Copy(hex_string, d, 3, char);
3544 d += 3;
3545 str++;
3546 }
3547 d--; /* We will overwrite below the final
3548 dot with a right brace */
3549 }
3550 else {
1953db30
KW
3551 STRLEN char_length; /* cur char's byte length */
3552
3553 /* and the number of bytes after this is
3554 * translated into hex digits */
3555 STRLEN output_length;
3556
3557 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3558 * for max('U+', '.'); and 1 for NUL */
3559 char hex_string[2 * UTF8_MAXBYTES + 5];
3560
3561 /* Get the first character of the result. */
3562 U32 uv = utf8n_to_uvuni((U8 *) str,
3563 len,
3564 &char_length,
3565 UTF8_ALLOW_ANYUV);
3566 /* Convert first code point to hex, including
3567 * the boiler plate before it. For all these,
3568 * we convert to native format so that
3569 * downstream code can continue to assume the
3570 * input is native */
3571 output_length =
3572 my_snprintf(hex_string, sizeof(hex_string),
3573 "\\N{U+%X",
3574 (unsigned int) UNI_TO_NATIVE(uv));
3575
3576 /* Make sure there is enough space to hold it */
3577 d = off + SvGROW(sv, off
3578 + output_length
3579 + (STRLEN)(send - e)
3580 + 2); /* '}' + NUL */
3581 /* And output it */
3582 Copy(hex_string, d, output_length, char);
3583 d += output_length;
3584
3585 /* For each subsequent character, append dot and
3586 * its ordinal in hex */
3587 while ((str += char_length) < str_end) {
3588 const STRLEN off = d - SvPVX_const(sv);
3589 U32 uv = utf8n_to_uvuni((U8 *) str,
3590 str_end - str,
3591 &char_length,
3592 UTF8_ALLOW_ANYUV);
3593 output_length =
3594 my_snprintf(hex_string,
3595 sizeof(hex_string),
3596 ".%X",
3597 (unsigned int) UNI_TO_NATIVE(uv));
3598
3599 d = off + SvGROW(sv, off
3600 + output_length
3601 + (STRLEN)(send - e)
3602 + 2); /* '}' + NUL */
3603 Copy(hex_string, d, output_length, char);
3604 d += output_length;
3605 }
94ca1619 3606 }
ff3f963a
KW
3607
3608 *d++ = '}'; /* Done. Add the trailing brace */
3609 }
3610 }
3611 else { /* Here, not in a pattern. Convert the name to a
3612 * string. */
3613
3614 /* If destination is not in utf8, unconditionally
3615 * recode it to be so. This is because \N{} implies
3616 * Unicode semantics, and scalars have to be in utf8
3617 * to guarantee those semantics */
3618 if (! has_utf8) {
3619 SvCUR_set(sv, d - SvPVX_const(sv));
3620 SvPOK_on(sv);
3621 *d = '\0';
3622 /* See Note on sizing above. */
3623 sv_utf8_upgrade_flags_grow(sv,
3624 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3625 len + (STRLEN)(send - s) + 1);
3626 d = SvPVX(sv) + SvCUR(sv);
3627 has_utf8 = TRUE;
3628 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3629
3630 /* See Note on sizing above. (NOTE: SvCUR() is not
3631 * set correctly here). */
3632 const STRLEN off = d - SvPVX_const(sv);
3633 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3634 }
3635 Copy(str, d, len, char);
3636 d += len;
423cee85 3637 }
6f613c73 3638
423cee85 3639 SvREFCNT_dec(res);
cb233ae3 3640
cb233ae3 3641 } /* End \N{NAME} */
ff3f963a
KW
3642#ifdef EBCDIC
3643 if (!dorange)
3644 native_range = FALSE; /* \N{} is defined to be Unicode */
3645#endif
3646 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3647 continue;
3648
02aa26ce 3649 /* \c is a control character */
79072805
LW
3650 case 'c':
3651 s++;
961ce445 3652 if (s < send) {
17a3df4c 3653 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3654 }
961ce445
RGS
3655 else {
3656 yyerror("Missing control char name in \\c");
3657 }
79072805 3658 continue;
02aa26ce
NT
3659
3660 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3661 case 'b':
db42d148 3662 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3663 break;
3664 case 'n':
db42d148 3665 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3666 break;
3667 case 'r':
db42d148 3668 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3669 break;
3670 case 'f':
db42d148 3671 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3672 break;
3673 case 't':
db42d148 3674 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3675 break;
34a3fe2a 3676 case 'e':
db42d148 3677 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3678 break;
3679 case 'a':
db42d148 3680 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3681 break;
02aa26ce
NT
3682 } /* end switch */
3683
79072805
LW
3684 s++;
3685 continue;
02aa26ce 3686 } /* end if (backslash) */
4c3a8340
TS
3687#ifdef EBCDIC
3688 else
3689 literal_endpoint++;
3690#endif
02aa26ce 3691
f9a63242 3692 default_action:
77a135fe
KW
3693 /* If we started with encoded form, or already know we want it,
3694 then encode the next character */
3695 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3696 STRLEN len = 1;
77a135fe
KW
3697
3698
3699 /* One might think that it is wasted effort in the case of the
3700 * source being utf8 (this_utf8 == TRUE) to take the next character
3701 * in the source, convert it to an unsigned value, and then convert
3702 * it back again. But the source has not been validated here. The
3703 * routine that does the conversion checks for errors like
3704 * malformed utf8 */
3705
5f66b61c
AL
3706 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3707 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3708 if (!has_utf8) {
3709 SvCUR_set(sv, d - SvPVX_const(sv));
3710 SvPOK_on(sv);
3711 *d = '\0';
77a135fe 3712 /* See Note on sizing above. */
7bf79863
KW
3713 sv_utf8_upgrade_flags_grow(sv,
3714 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3715 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3716 d = SvPVX(sv) + SvCUR(sv);
3717 has_utf8 = TRUE;
3718 } else if (need > len) {
3719 /* encoded value larger than old, may need extra space (NOTE:
3720 * SvCUR() is not set correctly here). See Note on sizing
3721 * above. */
9d4ba2ae 3722 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3723 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3724 }
77a135fe
KW
3725 s += len;
3726
5f66b61c 3727 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3728#ifdef EBCDIC
3729 if (uv > 255 && !dorange)
3730 native_range = FALSE;
3731#endif
2b9d42f0
NIS
3732 }
3733 else {
3734 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3735 }
02aa26ce
NT
3736 } /* while loop to process each character */
3737
3738 /* terminate the string and set up the sv */
79072805 3739 *d = '\0';
95a20fc0 3740 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3741 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3742 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3743 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3744
79072805 3745 SvPOK_on(sv);
9f4817db 3746 if (PL_encoding && !has_utf8) {
d0063567
DK
3747 sv_recode_to_utf8(sv, PL_encoding);
3748 if (SvUTF8(sv))
3749 has_utf8 = TRUE;
9f4817db 3750 }
2b9d42f0 3751 if (has_utf8) {
7e2040f0 3752 SvUTF8_on(sv);
2b9d42f0 3753 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3754 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3755 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3756 }
3757 }
79072805 3758
02aa26ce 3759 /* shrink the sv if we allocated more than we used */
79072805 3760 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3761 SvPV_shrink_to_cur(sv);
79072805 3762 }
02aa26ce 3763
6154021b 3764 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3765 if (s > PL_bufptr) {
b899e89d 3766 SvREFCNT_inc_simple_void_NN(sv);
4f3e2518
DM
3767 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3768 && ! PL_parser->lex_re_reparsing)
3769 {
eb0d8d16
NC
3770 const char *const key = PL_lex_inpat ? "qr" : "q";
3771 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3772 const char *type;
3773 STRLEN typelen;
3774
3775 if (PL_lex_inwhat == OP_TRANS) {
3776 type = "tr";
3777 typelen = 2;
3778 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3779 type = "s";
3780 typelen = 1;
9da1dd8f
DM
3781 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3782 type = "q";
3783 typelen = 1;
eb0d8d16
NC
3784 } else {
3785 type = "qq";
3786 typelen = 2;
3787 }
3788
3789 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3790 type, typelen);
3791 }
6154021b 3792 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b899e89d
FC
3793 }
3794 LEAVE_with_name("scan_const");
79072805
LW
3795 return s;
3796}
3797
ffb4593c
NT
3798/* S_intuit_more
3799 * Returns TRUE if there's more to the expression (e.g., a subscript),
3800 * FALSE otherwise.
ffb4593c
NT
3801 *
3802 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3803 *
3804 * ->[ and ->{ return TRUE
3805 * { and [ outside a pattern are always subscripts, so return TRUE
3806 * if we're outside a pattern and it's not { or [, then return FALSE
3807 * if we're in a pattern and the first char is a {
3808 * {4,5} (any digits around the comma) returns FALSE
3809 * if we're in a pattern and the first char is a [
3810 * [] returns FALSE
3811 * [SOMETHING] has a funky algorithm to decide whether it's a
3812 * character class or not. It has to deal with things like
3813 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3814 * anything else returns TRUE
3815 */
3816
9cbb5ea2
GS
3817/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3818
76e3520e 3819STATIC int
5aaab254 3820S_intuit_more(pTHX_ char *s)
79072805 3821{
97aff369 3822 dVAR;
7918f24d
NC
3823
3824 PERL_ARGS_ASSERT_INTUIT_MORE;
3825
3280af22 3826 if (PL_lex_brackets)
79072805
LW
3827 return TRUE;
3828 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3829 return TRUE;
3830 if (*s != '{' && *s != '[')
3831 return FALSE;
3280af22 3832 if (!PL_lex_inpat)
79072805
LW
3833 return TRUE;
3834
3835 /* In a pattern, so maybe we have {n,m}. */
3836 if (*s == '{') {
4d68ffa0 3837 if (regcurly(s, FALSE)) {
79072805 3838 return FALSE;
b3155d95 3839 }
79072805 3840 return TRUE;
79072805
LW
3841 }
3842
3843 /* On the other hand, maybe we have a character class */
3844
3845 s++;
3846 if (*s == ']' || *s == '^')
3847 return FALSE;
3848 else {
ffb4593c 3849 /* this is terrifying, and it works */
99f2bdb7 3850 int weight;
79072805 3851 char seen[256];
9d4ba2ae 3852 const char * const send = strchr(s,']');
99f2bdb7 3853 unsigned char un_char, last_un_char;
3280af22 3854 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3855
3856 if (!send) /* has to be an expression */
3857 return TRUE;
99f2bdb7 3858 weight = 2; /* let's weigh the evidence */
79072805 3859
79072805
LW
3860 if (*s == '$')
3861 weight -= 3;
3862 else if (isDIGIT(*s)) {
3863 if (s[1] != ']') {
3864 if (isDIGIT(s[1]) && s[2] == ']')
3865 weight -= 10;
3866 }
3867 else
3868 weight -= 100;
3869 }
99f2bdb7
DD
3870 Zero(seen,256,char);
3871 un_char = 255;
79072805
LW
3872 for (; s < send; s++) {
3873 last_un_char = un_char;
3874 un_char = (unsigned char)*s;
3875 switch (*s) {
3876 case '@':
3877 case '&':
3878 case '$':
3879 weight -= seen[un_char] * 10;
8a2bca12 3880 if (isWORDCHAR_lazy_if(s+1,UTF)) {
90e5519e 3881 int len;
8903cb82 3882 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3883 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3884 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3885 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3886 weight -= 100;
3887 else
3888 weight -= 10;
3889 }
3890 else if (*s == '$' && s[1] &&
93a17b20
LW
3891 strchr("[#!%*<>()-=",s[1])) {
3892 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3893 weight -= 10;
3894 else
3895 weight -= 1;
3896 }
3897 break;
3898 case '\\':
3899 un_char = 254;
3900 if (s[1]) {
93a17b20 3901 if (strchr("wds]",s[1]))
79072805 3902 weight += 100;
10edeb5d 3903 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3904 weight += 1;
93a17b20 3905 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3906 weight += 40;
3907 else if (isDIGIT(s[1])) {
3908 weight += 40;
3909 while (s[1] && isDIGIT(s[1]))
3910 s++;
3911 }
3912 }
3913 else
3914 weight += 100;
3915 break;
3916 case '-':
3917 if (s[1] == '\\')
3918 weight += 50;
93a17b20 3919 if (strchr("aA01! ",last_un_char))
79072805 3920 weight += 30;
93a17b20 3921 if (strchr("zZ79~",s[1]))
79072805 3922 weight += 30;
f27ffc4a
GS
3923 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3924 weight -= 5; /* cope with negative subscript */
79072805
LW
3925 break;
3926 default:
0eb30aeb 3927 if (!isWORDCHAR(last_un_char)
3792a11b
NC
3928 && !(last_un_char == '$' || last_un_char == '@'
3929 || last_un_char == '&')
3930 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3931 char *d = tmpbuf;
3932 while (isALPHA(*s))
3933 *d++ = *s++;
3934 *d = '\0';
5458a98a 3935 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3936 weight -= 150;
3937 }
3938 if (un_char == last_un_char + 1)
3939 weight += 5;
3940 weight -= seen[un_char];
3941 break;
3942 }
3943 seen[un_char]++;
3944 }
3945 if (weight >= 0) /* probably a character class */
3946 return FALSE;
3947 }
3948
3949 return TRUE;
3950}
ffed7fef 3951
ffb4593c
NT
3952/*
3953 * S_intuit_method
3954 *
3955 * Does all the checking to disambiguate
3956 * foo bar
3957 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3958 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3959 *
3960 * First argument is the stuff after the first token, e.g. "bar".
3961 *
a4fd4a89 3962 * Not a method if foo is a filehandle.
ffb4593c
NT
3963 * Not a method if foo is a subroutine prototyped to take a filehandle.
3964 * Not a method if it's really "Foo $bar"
3965 * Method if it's "foo $bar"
3966 * Not a method if it's really "print foo $bar"
3967 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3968 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3969 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3970 * =>
3971 */
3972
76e3520e 3973STATIC int
62d55b22 3974S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3975{
97aff369 3976 dVAR;
a0d0e21e 3977 char *s = start + (*start == '$');
3280af22 3978 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3979 STRLEN len;
3980 GV* indirgv;
5db06880
NC
3981#ifdef PERL_MAD
3982 int soff;
3983#endif
a0d0e21e 3984
7918f24d
NC
3985 PERL_ARGS_ASSERT_INTUIT_METHOD;
3986
aca88b25 3987 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3988 return 0;
aca88b25 3989 if (cv && SvPOK(cv)) {
8fa6a409 3990 const char *proto = CvPROTO(cv);
62d55b22
NC
3991 if (proto) {
3992 if (*proto == ';')
3993 proto++;
3994 if (*proto == '*')
3995 return 0;
3996 }
a0d0e21e 3997 }
ffb4593c 3998
a0d0e21e 3999 if (*start == '$') {
39c012bc 4000 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3ef1310e 4001 isUPPER(*PL_tokenbuf))
a0d0e21e 4002 return 0;
5db06880
NC
4003#ifdef PERL_MAD
4004 len = start - SvPVX(PL_linestr);
4005#endif
29595ff2 4006 s = PEEKSPACE(s);
f0092767 4007#ifdef PERL_MAD
5db06880
NC
4008 start = SvPVX(PL_linestr) + len;
4009#endif
3280af22
NIS
4010 PL_bufptr = start;
4011 PL_expect = XREF;
a0d0e21e
LW
4012 return *s == '(' ? FUNCMETH : METHOD;
4013 }
04e07f8b
BF
4014
4015 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4016 /* start is the beginning of the possible filehandle/object,
4017 * and s is the end of it
4018 * tmpbuf is a copy of it (but with single quotes as double colons)
4019 */
4020
5458a98a 4021 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
4022 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4023 len -= 2;
4024 tmpbuf[len] = '\0';
5db06880
NC
4025#ifdef PERL_MAD
4026 soff = s - SvPVX(PL_linestr);
4027#endif
c3e0f903
GS
4028 goto bare_package;
4029 }
38d2cf30 4030 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 4031 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
4032 return 0;
4033 /* filehandle or package name makes it a method */
39c012bc 4034 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
4035#ifdef PERL_MAD
4036 soff = s - SvPVX(PL_linestr);
4037#endif
29595ff2 4038 s = PEEKSPACE(s);
3280af22 4039 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 4040 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 4041 bare_package:
cd81e915 4042 start_force(PL_curforce);
9ded7720 4043 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 4044 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 4045 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 4046 if (PL_madskills)
38d2cf30
BF
4047 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4048 ( UTF ? SVf_UTF8 : 0 )));
3280af22 4049 PL_expect = XTERM;
a0d0e21e 4050 force_next(WORD);
3280af22 4051 PL_bufptr = s;
5db06880
NC
4052#ifdef PERL_MAD
4053 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4054#endif
a0d0e21e
LW
4055 return *s == '(' ? FUNCMETH : METHOD;
4056 }
4057 }
4058 return 0;
4059}
4060
16d20bd9 4061/* Encoded script support. filter_add() effectively inserts a
4e553d73 4062 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
4063 * Note that the filter function only applies to the current source file
4064 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4065 *
4066 * The datasv parameter (which may be NULL) can be used to pass
4067 * private data to this instance of the filter. The filter function
4068 * can recover the SV using the FILTER_DATA macro and use it to
4069 * store private buffers and state information.
4070 *
4071 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 4072 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 4073 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
4074 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4075 * private use must be set using malloc'd pointers.
4076 */
16d20bd9
AD
4077
4078SV *
864dbfa3 4079Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 4080{
97aff369 4081 dVAR;
f4c556ac 4082 if (!funcp)
a0714e2c 4083 return NULL;
f4c556ac 4084
5486870f
DM
4085 if (!PL_parser)
4086 return NULL;
4087
f1c31c52
FC
4088 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4089 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4090
3280af22
NIS
4091 if (!PL_rsfp_filters)
4092 PL_rsfp_filters = newAV();
16d20bd9 4093 if (!datasv)
561b68a9 4094 datasv = newSV(0);
862a34c6 4095 SvUPGRADE(datasv, SVt_PVIO);
8141890a 4096 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 4097 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 4098 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
4099 FPTR2DPTR(void *, IoANY(datasv)),
4100 SvPV_nolen(datasv)));
3280af22
NIS
4101 av_unshift(PL_rsfp_filters, 1);
4102 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
4103 if (
4104 !PL_parser->filtered
4105 && PL_parser->lex_flags & LEX_EVALBYTES
4106 && PL_bufptr < PL_bufend
4107 ) {
4108 const char *s = PL_bufptr;
4109 while (s < PL_bufend) {
4110 if (*s == '\n') {
4111 SV *linestr = PL_parser->linestr;
4112 char *buf = SvPVX(linestr);
4113 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4114 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4115 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4116 STRLEN const linestart_pos = PL_parser->linestart - buf;
4117 STRLEN const last_uni_pos =
4118 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4119 STRLEN const last_lop_pos =
4120 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4121 av_push(PL_rsfp_filters, linestr);
4122 PL_parser->linestr =
4123 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4124 buf = SvPVX(PL_parser->linestr);
4125 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4126 PL_parser->bufptr = buf + bufptr_pos;
4127 PL_parser->oldbufptr = buf + oldbufptr_pos;
4128 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4129 PL_parser->linestart = buf + linestart_pos;
4130 if (PL_parser->last_uni)
4131 PL_parser->last_uni = buf + last_uni_pos;
4132 if (PL_parser->last_lop)
4133 PL_parser->last_lop = buf + last_lop_pos;
4134 SvLEN(linestr) = SvCUR(linestr);
4135 SvCUR(linestr) = s-SvPVX(linestr);
4136 PL_parser->filtered = 1;
4137 break;
4138 }
4139 s++;
4140 }
4141 }
16d20bd9
AD
4142 return(datasv);
4143}
4e553d73 4144
16d20bd9
AD
4145
4146/* Delete most recently added instance of this filter function. */
a0d0e21e 4147void
864dbfa3 4148Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 4149{
97aff369 4150 dVAR;
e0c19803 4151 SV *datasv;
24801a4b 4152
7918f24d
NC
4153 PERL_ARGS_ASSERT_FILTER_DEL;
4154
33073adb 4155#ifdef DEBUGGING
55662e27
JH
4156 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4157 FPTR2DPTR(void*, funcp)));
33073adb 4158#endif
5486870f 4159 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
4160 return;
4161 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 4162 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 4163 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 4164 sv_free(av_pop(PL_rsfp_filters));
e50aee73 4165
16d20bd9
AD
4166 return;
4167 }
4168 /* we need to search for the correct entry and clear it */
cea2e8a9 4169 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
4170}
4171
4172
1de9afcd
RGS
4173/* Invoke the idxth filter function for the current rsfp. */
4174/* maxlen 0 = read one text line */
16d20bd9 4175I32
864dbfa3 4176Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 4177{
97aff369 4178 dVAR;
16d20bd9
AD
4179 filter_t funcp;
4180 SV *datasv = NULL;
f482118e
NC
4181 /* This API is bad. It should have been using unsigned int for maxlen.
4182 Not sure if we want to change the API, but if not we should sanity
4183 check the value here. */
60d63348 4184 unsigned int correct_length
39cd7a59
NC
4185 = maxlen < 0 ?
4186#ifdef PERL_MICRO
4187 0x7FFFFFFF
4188#else
4189 INT_MAX
4190#endif
4191 : maxlen;
e50aee73 4192
7918f24d
NC
4193 PERL_ARGS_ASSERT_FILTER_READ;
4194
5486870f 4195 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 4196 return -1;
1de9afcd 4197 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
4198 /* Provide a default input filter to make life easy. */
4199 /* Note that we append to the line. This is handy. */
f4c556ac
GS
4200 DEBUG_P(PerlIO_printf(Perl_debug_log,
4201 "filter_read %d: from rsfp\n", idx));
f482118e 4202 if (correct_length) {
16d20bd9
AD
4203 /* Want a block */
4204 int len ;
f54cb97a 4205 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
4206
4207 /* ensure buf_sv is large enough */
881d8f0a 4208 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
4209 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4210 correct_length)) <= 0) {
3280af22 4211 if (PerlIO_error(PL_rsfp))
37120919
AD
4212 return -1; /* error */
4213 else
4214 return 0 ; /* end of file */
4215 }
16d20bd9 4216 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 4217 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
4218 } else {
4219 /* Want a line */
3280af22
NIS
4220 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4221 if (PerlIO_error(PL_rsfp))
37120919
AD
4222 return -1; /* error */
4223 else
4224 return 0 ; /* end of file */
4225 }
16d20bd9
AD
4226 }
4227 return SvCUR(buf_sv);
4228 }
4229 /* Skip this filter slot if filter has been deleted */
1de9afcd 4230 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
4231 DEBUG_P(PerlIO_printf(Perl_debug_log,
4232 "filter_read %d: skipped (filter deleted)\n",
4233 idx));
f482118e 4234 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4235 }
60d63348
FC
4236 if (SvTYPE(datasv) != SVt_PVIO) {
4237 if (correct_length) {
4238 /* Want a block */
4239 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4240 if (!remainder) return 0; /* eof */
4241 if (correct_length > remainder) correct_length = remainder;
4242 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4243 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4244 } else {
4245 /* Want a line */
4246 const char *s = SvEND(datasv);
4247 const char *send = SvPVX(datasv) + SvLEN(datasv);
4248 while (s < send) {
4249 if (*s == '\n') {
4250 s++;
4251 break;
4252 }
4253 s++;
4254 }
4255 if (s == send) return 0; /* eof */
4256 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4257 SvCUR_set(datasv, s-SvPVX(datasv));
4258 }
4259 return SvCUR(buf_sv);
4260 }
16d20bd9 4261 /* Get function pointer hidden within datasv */
8141890a 4262 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4263 DEBUG_P(PerlIO_printf(Perl_debug_log,
4264 "filter_read %d: via function %p (%s)\n",
ca0270c4 4265 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4266 /* Call function. The function is expected to */
4267 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4268 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4269 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4270}
4271
76e3520e 4272STATIC char *
5aaab254 4273S_filter_gets(pTHX_ SV *sv, STRLEN append)
16d20bd9 4274{
97aff369 4275 dVAR;
7918f24d
NC
4276
4277 PERL_ARGS_ASSERT_FILTER_GETS;
4278
c39cd008 4279#ifdef PERL_CR_FILTER
3280af22 4280 if (!PL_rsfp_filters) {
c39cd008 4281 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4282 }
4283#endif
3280af22 4284 if (PL_rsfp_filters) {
55497cff 4285 if (!append)
4286 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4287 if (FILTER_READ(0, sv, 0) > 0)
4288 return ( SvPVX(sv) ) ;
4289 else
bd61b366 4290 return NULL ;
16d20bd9 4291 }
9d116dd7 4292 else
5cc814fd 4293 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4294}
4295
01ec43d0 4296STATIC HV *
9bde8eb0 4297S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4298{
97aff369 4299 dVAR;
def3634b
GS
4300 GV *gv;
4301
7918f24d
NC
4302 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4303
01ec43d0 4304 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4305 return PL_curstash;
4306
4307 if (len > 2 &&
4308 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4309 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4310 {
4311 return GvHV(gv); /* Foo:: */
def3634b
GS
4312 }
4313
4314 /* use constant CLASS => 'MyClass' */
acc6da14 4315 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4316 if (gv && GvCV(gv)) {
4317 SV * const sv = cv_const_sv(GvCV(gv));
4318 if (sv)
9bde8eb0 4319 pkgname = SvPV_const(sv, len);
def3634b
GS
4320 }
4321
acc6da14 4322 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4323}
a0d0e21e 4324
e3f73d4e
RGS
4325/*
4326 * S_readpipe_override
486ec47a 4327 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4328 * optree, provided sublex_start() is called afterwards.
4329 */
4330STATIC void
1d51329b 4331S_readpipe_override(pTHX)
e3f73d4e
RGS
4332{
4333 GV **gvp;
4334 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4335 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4336 if ((gv_readpipe
4337 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4338 ||
4339 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4340 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4341 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4342 {
4343 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4344 op_append_elem(OP_LIST,
e3f73d4e
RGS
4345 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4346 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4347 }
e3f73d4e
RGS
4348}
4349
5db06880
NC
4350#ifdef PERL_MAD
4351 /*
4352 * Perl_madlex
4353 * The intent of this yylex wrapper is to minimize the changes to the
4354 * tokener when we aren't interested in collecting madprops. It remains
4355 * to be seen how successful this strategy will be...
4356 */
4357
4358int
4359Perl_madlex(pTHX)
4360{
4361 int optype;
4362 char *s = PL_bufptr;
4363
cd81e915
NC
4364 /* make sure PL_thiswhite is initialized */
4365 PL_thiswhite = 0;
4366 PL_thismad = 0;
5db06880 4367
5db06880 4368 /* previous token ate up our whitespace? */
cd81e915
NC
4369 if (!PL_lasttoke && PL_nextwhite) {
4370 PL_thiswhite = PL_nextwhite;
4371 PL_nextwhite = 0;
5db06880
NC
4372 }
4373
4374 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4375 PL_realtokenstart = -1;
4376 PL_thistoken = 0;
5db06880
NC
4377 optype = yylex();
4378 s = PL_bufptr;
cd81e915 4379 assert(PL_curforce < 0);
5db06880 4380
cd81e915
NC
4381 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4382 if (!PL_thistoken) {
4383 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4384 PL_thistoken = newSVpvs("");
5db06880 4385 else {
c35e046a 4386 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4387 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4388 }
4389 }
cd81e915
NC
4390 if (PL_thismad) /* install head */
4391 CURMAD('X', PL_thistoken);
5db06880
NC
4392 }
4393
4394 /* last whitespace of a sublex? */
cd81e915
NC
4395 if (optype == ')' && PL_endwhite) {
4396 CURMAD('X', PL_endwhite);
5db06880
NC
4397 }
4398
cd81e915 4399 if (!PL_thismad) {
5db06880
NC
4400
4401 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4402 if (!PL_thiswhite && !PL_endwhite && !optype) {
4403 sv_free(PL_thistoken);
4404 PL_thistoken = 0;
5db06880
NC
4405 return 0;
4406 }
4407
4408 /* put off final whitespace till peg */
60d63348 4409 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4410 PL_nextwhite = PL_thiswhite;
4411 PL_thiswhite = 0;
5db06880 4412 }
cd81e915
NC
4413 else if (PL_thisopen) {
4414 CURMAD('q', PL_thisopen);
4415 if (PL_thistoken)
4416 sv_free(PL_thistoken);
4417 PL_thistoken = 0;
5db06880
NC
4418 }
4419 else {
4420 /* Store actual token text as madprop X */
cd81e915 4421 CURMAD('X', PL_thistoken);
5db06880
NC
4422 }
4423
cd81e915 4424 if (PL_thiswhite) {
5db06880 4425 /* add preceding whitespace as madprop _ */
cd81e915 4426 CURMAD('_', PL_thiswhite);
5db06880
NC
4427 }
4428
cd81e915 4429 if (PL_thisstuff) {
5db06880 4430 /* add quoted material as madprop = */
cd81e915 4431 CURMAD('=', PL_thisstuff);
5db06880
NC
4432 }
4433
cd81e915 4434 if (PL_thisclose) {
5db06880 4435 /* add terminating quote as madprop Q */
cd81e915 4436 CURMAD('Q', PL_thisclose);
5db06880
NC
4437 }
4438 }
4439
4440 /* special processing based on optype */
4441
4442 switch (optype) {
4443
4444 /* opval doesn't need a TOKEN since it can already store mp */
4445 case WORD:
4446 case METHOD:
4447 case FUNCMETH:
4448 case THING:
4449 case PMFUNC:
4450 case PRIVATEREF:
4451 case FUNC0SUB:
4452 case UNIOPSUB:
4453 case LSTOPSUB:
6154021b
RGS
4454 if (pl_yylval.opval)
4455 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4456 PL_thismad = 0;
5db06880
NC
4457 return optype;
4458
4459 /* fake EOF */
4460 case 0:
4461 optype = PEG;
cd81e915
NC
4462 if (PL_endwhite) {
4463 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4464 PL_endwhite = 0;
5db06880
NC
4465 }
4466 break;
4467
5504e6cf
FC
4468 /* pval */
4469 case LABEL:
4470 break;
4471
5db06880
NC
4472 case ']':
4473 case '}':
cd81e915 4474 if (PL_faketokens)
5db06880
NC
4475 break;
4476 /* remember any fake bracket that lexer is about to discard */
4477 if (PL_lex_brackets == 1 &&
4478 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4479 {
4480 s = PL_bufptr;
4481 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4482 s++;
4483 if (*s == '}') {
cd81e915
NC
4484 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4485 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4486 PL_thiswhite = 0;
5db06880
NC
4487 PL_bufptr = s - 1;
4488 break; /* don't bother looking for trailing comment */
4489 }
4490 else
4491 s = PL_bufptr;
4492 }
4493 if (optype == ']')
4494 break;
4495 /* FALLTHROUGH */
4496
4497 /* attach a trailing comment to its statement instead of next token */
4498 case ';':
cd81e915 4499 if (PL_faketokens)
5db06880
NC
4500 break;
4501 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4502 s = PL_bufptr;
4503 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4504 s++;
4505 if (*s == '\n' || *s == '#') {
4506 while (s < PL_bufend && *s != '\n')
4507 s++;
4508 if (s < PL_bufend)
4509 s++;
cd81e915
NC
4510 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4511 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4512 PL_thiswhite = 0;
5db06880
NC
4513 PL_bufptr = s;
4514 }
4515 }
4516 break;
4517
5db06880
NC
4518 /* ival */
4519 default:
4520 break;
4521
4522 }
4523
4524 /* Create new token struct. Note: opvals return early above. */
6154021b 4525 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4526 PL_thismad = 0;
5db06880
NC
4527 return optype;
4528}
4529#endif
4530
468aa647 4531STATIC char *
cc6ed77d 4532S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4533 dVAR;
7918f24d
NC
4534
4535 PERL_ARGS_ASSERT_TOKENIZE_USE;
4536
468aa647
RGS
4537 if (PL_expect != XSTATE)
4538 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4539 is_use ? "use" : "no"));
52d0e95b 4540 PL_expect = XTERM;
29595ff2 4541 s = SKIPSPACE1(s);
468aa647
RGS
4542 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4543 s = force_version(s, TRUE);
17c59fdf
VP
4544 if (*s == ';' || *s == '}'
4545 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4546 start_force(PL_curforce);
9ded7720 4547 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4548 force_next(WORD);
4549 }
4550 else if (*s == 'v') {
4551 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4552 s = force_version(s, FALSE);
4553 }
4554 }
4555 else {
4556 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4557 s = force_version(s, FALSE);
4558 }
6154021b 4559 pl_yylval.ival = is_use;
468aa647
RGS
4560 return s;
4561}
748a9306 4562#ifdef DEBUGGING
27da23d5 4563 static const char* const exp_name[] =
09bef843 4564 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4565 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4566 };
748a9306 4567#endif
463ee0b2 4568
361d9b55
Z
4569#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4570STATIC bool
4571S_word_takes_any_delimeter(char *p, STRLEN len)
4572{
4573 return (len == 1 && strchr("msyq", p[0])) ||
4574 (len == 2 && (
4575 (p[0] == 't' && p[1] == 'r') ||
4576 (p[0] == 'q' && strchr("qwxr", p[1]))));
4577}
4578
02aa26ce
NT
4579/*
4580 yylex
4581
4582 Works out what to call the token just pulled out of the input
4583 stream. The yacc parser takes care of taking the ops we return and
4584 stitching them into a tree.
4585
4586 Returns:
3875fc11 4587 The type of the next token
02aa26ce
NT
4588
4589 Structure:
3875fc11
FC
4590 Switch based on the current state:
4591 - if we already built the token before, use it
4592 - if we have a case modifier in a string, deal with that
4593 - handle other cases of interpolation inside a string
4594 - scan the next line if we are inside a format
4595 In the normal state switch on the next character:
4596 - default:
4597 if alphabetic, go to key lookup
4598 unrecoginized character - croak
4599 - 0/4/26: handle end-of-line or EOF
4600 - cases for whitespace
4601 - \n and #: handle comments and line numbers
4602 - various operators, brackets and sigils
4603 - numbers
4604 - quotes
4605 - 'v': vstrings (or go to key lookup)
4606 - 'x' repetition operator (or go to key lookup)
4607 - other ASCII alphanumerics (key lookup begins here):
4608 word before => ?
4609 keyword plugin
4610 scan built-in keyword (but do nothing with it yet)
4611 check for statement label
4612 check for lexical subs
4613 goto just_a_word if there is one
4614 see whether built-in keyword is overridden
4615 switch on keyword number:
4616 - default: just_a_word:
4617 not a built-in keyword; handle bareword lookup
4618 disambiguate between method and sub call
4619 fall back to bareword
4620 - cases for built-in keywords
02aa26ce
NT
4621*/
4622
20141f0e 4623
dba4d153
JH
4624#ifdef __SC__
4625#pragma segment Perl_yylex
4626#endif
dba4d153 4627int
dba4d153 4628Perl_yylex(pTHX)
20141f0e 4629{
97aff369 4630 dVAR;
eb578fdb
KW
4631 char *s = PL_bufptr;
4632 char *d;
463ee0b2 4633 STRLEN len;
705fe0e5
FC
4634 bool bof = FALSE;
4635 U8 formbrack = 0;
580561a3 4636 U32 fake_eof = 0;
a687059c 4637
10edeb5d
JH
4638 /* orig_keyword, gvp, and gv are initialized here because
4639 * jump to the label just_a_word_zero can bypass their
4640 * initialization later. */
4641 I32 orig_keyword = 0;
4642 GV *gv = NULL;
4643 GV **gvp = NULL;
4644
bbf60fe6 4645 DEBUG_T( {
396482e1 4646 SV* tmp = newSVpvs("");
b6007c36
DM
4647 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4648 (IV)CopLINE(PL_curcop),
4649 lex_state_names[PL_lex_state],
4650 exp_name[PL_expect],
4651 pv_display(tmp, s, strlen(s), 0, 60));
4652 SvREFCNT_dec(tmp);
bbf60fe6 4653 } );
02aa26ce 4654
3280af22 4655 switch (PL_lex_state) {
79072805
LW
4656#ifdef COMMENTARY
4657 case LEX_NORMAL: /* Some compilers will produce faster */
4658 case LEX_INTERPNORMAL: /* code if we comment these out. */
4659 break;
4660#endif
4661
09bef843 4662 /* when we've already built the next token, just pull it out of the queue */
79072805 4663 case LEX_KNOWNEXT:
5db06880
NC
4664#ifdef PERL_MAD
4665 PL_lasttoke--;
6154021b 4666 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4667 if (PL_madskills) {
cd81e915 4668 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4669 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4670 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4671 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4672 PL_thismad->mad_val = 0;
4673 mad_free(PL_thismad);
4674 PL_thismad = 0;
5db06880
NC
4675 }
4676 }
4677 if (!PL_lasttoke) {
4678 PL_lex_state = PL_lex_defer;
4679 PL_expect = PL_lex_expect;
4680 PL_lex_defer = LEX_NORMAL;
4681 if (!PL_nexttoke[PL_lasttoke].next_type)
4682 return yylex();
4683 }
4684#else
3280af22 4685 PL_nexttoke--;
6154021b 4686 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4687 if (!PL_nexttoke) {
4688 PL_lex_state = PL_lex_defer;
4689 PL_expect = PL_lex_expect;
4690 PL_lex_defer = LEX_NORMAL;
463ee0b2 4691 }
5db06880 4692#endif
a7aaec61
Z
4693 {
4694 I32 next_type;
5db06880 4695#ifdef PERL_MAD
a7aaec61 4696 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4697#else
a7aaec61 4698 next_type = PL_nexttype[PL_nexttoke];
5db06880 4699#endif
78cdf107
Z
4700 if (next_type & (7<<24)) {
4701 if (next_type & (1<<24)) {
4702 if (PL_lex_brackets > 100)
4703 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4704 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4705 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4706 }
4707 if (next_type & (2<<24))
4708 PL_lex_allbrackets++;
4709 if (next_type & (4<<24))
4710 PL_lex_allbrackets--;
a7aaec61
Z
4711 next_type &= 0xffff;
4712 }
3f33d153 4713 return REPORT(next_type == 'p' ? pending_ident() : next_type);
a7aaec61 4714 }
79072805 4715
02aa26ce 4716 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4717 when we get here, PL_bufptr is at the \
02aa26ce 4718 */
79072805
LW
4719 case LEX_INTERPCASEMOD:
4720#ifdef DEBUGGING
3280af22 4721 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4722 Perl_croak(aTHX_
4723 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4724 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4725#endif
02aa26ce 4726 /* handle \E or end of string */
3280af22 4727 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4728 /* if at a \E */
3280af22 4729 if (PL_lex_casemods) {
f54cb97a 4730 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4731 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4732
3792a11b 4733 if (PL_bufptr != PL_bufend
838f2281
BF
4734 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4735 || oldmod == 'F')) {
3280af22
NIS
4736 PL_bufptr += 2;
4737 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4738#ifdef PERL_MAD
4739 if (PL_madskills)
6b29d1f5 4740 PL_thistoken = newSVpvs("\\E");
5db06880 4741#endif
a0d0e21e 4742 }
78cdf107 4743 PL_lex_allbrackets--;
bbf60fe6 4744 return REPORT(')');
79072805 4745 }
52ed07f6
BF
4746 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4747 /* Got an unpaired \E */
4748 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4749 "Useless use of \\E");
52ed07f6 4750 }
5db06880
NC
4751#ifdef PERL_MAD
4752 while (PL_bufptr != PL_bufend &&
4753 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
1cac5c33
FC
4754 if (PL_madskills) {
4755 if (!PL_thiswhite)
6b29d1f5 4756 PL_thiswhite = newSVpvs("");
1cac5c33
FC
4757 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4758 }
5db06880
NC
4759 PL_bufptr += 2;
4760 }
4761#else
3280af22
NIS
4762 if (PL_bufptr != PL_bufend)
4763 PL_bufptr += 2;
5db06880 4764#endif
3280af22 4765 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4766 return yylex();
79072805
LW
4767 }
4768 else {
607df283 4769 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4770 "### Saw case modifier\n"); });
3280af22 4771 s = PL_bufptr + 1;
6e909404 4772 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4773#ifdef PERL_MAD
1cac5c33
FC
4774 if (PL_madskills) {
4775 if (!PL_thiswhite)
6b29d1f5 4776 PL_thiswhite = newSVpvs("");
1cac5c33
FC
4777 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4778 }
5db06880 4779#endif
89122651 4780 PL_bufptr = s + 3;
6e909404
JH
4781 PL_lex_state = LEX_INTERPCONCAT;
4782 return yylex();
a0d0e21e 4783 }
6e909404 4784 else {
90771dc0 4785 I32 tmp;
5db06880
NC
4786 if (!PL_madskills) /* when just compiling don't need correct */
4787 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4788 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4789 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4790 (strchr(PL_lex_casestack, 'L')
4791 || strchr(PL_lex_casestack, 'U')
4792 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4793 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4794 PL_lex_allbrackets--;
bbf60fe6 4795 return REPORT(')');
6e909404
JH
4796 }
4797 if (PL_lex_casemods > 10)
4798 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4799 PL_lex_casestack[PL_lex_casemods++] = *s;
4800 PL_lex_casestack[PL_lex_casemods] = '\0';
4801 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4802 start_force(PL_curforce);
9ded7720 4803 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4804 force_next((2<<24)|'(');
cd81e915 4805 start_force(PL_curforce);
6e909404 4806 if (*s == 'l')
9ded7720 4807 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4808 else if (*s == 'u')
9ded7720 4809 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4810 else if (*s == 'L')
9ded7720 4811 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4812 else if (*s == 'U')
9ded7720 4813 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4814 else if (*s == 'Q')
9ded7720 4815 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4816 else if (*s == 'F')
4817 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4818 else
5637ef5b 4819 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4820 if (PL_madskills) {
a5849ce5
NC
4821 SV* const tmpsv = newSVpvs("\\ ");
4822 /* replace the space with the character we want to escape
4823 */
4824 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4825 curmad('_', tmpsv);
4826 }
6e909404 4827 PL_bufptr = s + 1;
a0d0e21e 4828 }
79072805 4829 force_next(FUNC);
3280af22
NIS
4830 if (PL_lex_starts) {
4831 s = PL_bufptr;
4832 PL_lex_starts = 0;
5db06880
NC
4833#ifdef PERL_MAD
4834 if (PL_madskills) {
cd81e915
NC
4835 if (PL_thistoken)
4836 sv_free(PL_thistoken);
6b29d1f5 4837 PL_thistoken = newSVpvs("");
5db06880
NC
4838 }
4839#endif
131b3ad0
DM
4840 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4841 if (PL_lex_casemods == 1 && PL_lex_inpat)
4842 OPERATOR(',');
4843 else
4844 Aop(OP_CONCAT);
79072805
LW
4845 }
4846 else
cea2e8a9 4847 return yylex();
79072805
LW
4848 }
4849
55497cff 4850 case LEX_INTERPPUSH:
bbf60fe6 4851 return REPORT(sublex_push());
55497cff 4852
79072805 4853 case LEX_INTERPSTART:
3280af22 4854 if (PL_bufptr == PL_bufend)
bbf60fe6 4855 return REPORT(sublex_done());
9da1dd8f 4856 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
b6007c36 4857 "### Interpolated variable\n"); });
3280af22 4858 PL_expect = XTERM;
491453ba
DM
4859 /* for /@a/, we leave the joining for the regex engine to do
4860 * (unless we're within \Q etc) */
4861 PL_lex_dojoin = (*PL_bufptr == '@'
4862 && (!PL_lex_inpat || PL_lex_casemods));
3280af22
NIS
4863 PL_lex_state = LEX_INTERPNORMAL;
4864 if (PL_lex_dojoin) {
cd81e915 4865 start_force(PL_curforce);
9ded7720 4866 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4867 force_next(',');
cd81e915 4868 start_force(PL_curforce);
a0d0e21e 4869 force_ident("\"", '$');
cd81e915 4870 start_force(PL_curforce);
9ded7720 4871 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4872 force_next('$');
cd81e915 4873 start_force(PL_curforce);
9ded7720 4874 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4875 force_next((2<<24)|'(');
cd81e915 4876 start_force(PL_curforce);
9ded7720 4877 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4878 force_next(FUNC);
4879 }
9da1dd8f
DM
4880 /* Convert (?{...}) and friends to 'do {...}' */
4881 if (PL_lex_inpat && *PL_bufptr == '(') {
3328ab5a 4882 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9da1dd8f
DM
4883 PL_bufptr += 2;
4884 if (*PL_bufptr != '{')
4885 PL_bufptr++;
6165f85b
DM
4886 start_force(PL_curforce);
4887 /* XXX probably need a CURMAD(something) here */
9da1dd8f
DM
4888 PL_expect = XTERMBLOCK;
4889 force_next(DO);
4890 }
4891
3280af22
NIS
4892 if (PL_lex_starts++) {
4893 s = PL_bufptr;
5db06880
NC
4894#ifdef PERL_MAD
4895 if (PL_madskills) {
cd81e915
NC
4896 if (PL_thistoken)
4897 sv_free(PL_thistoken);
6b29d1f5 4898 PL_thistoken = newSVpvs("");
5db06880
NC
4899 }
4900#endif
131b3ad0
DM
4901 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4902 if (!PL_lex_casemods && PL_lex_inpat)
4903 OPERATOR(',');
4904 else
4905 Aop(OP_CONCAT);
79072805 4906 }
cea2e8a9 4907 return yylex();
79072805
LW
4908
4909 case LEX_INTERPENDMAYBE:
3280af22
NIS
4910 if (intuit_more(PL_bufptr)) {
4911 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4912 break;
4913 }
4914 /* FALL THROUGH */
4915
4916 case LEX_INTERPEND:
3280af22
NIS
4917 if (PL_lex_dojoin) {
4918 PL_lex_dojoin = FALSE;
4919 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4920#ifdef PERL_MAD
4921 if (PL_madskills) {
cd81e915
NC
4922 if (PL_thistoken)
4923 sv_free(PL_thistoken);
6b29d1f5 4924 PL_thistoken = newSVpvs("");
5db06880
NC
4925 }
4926#endif
78cdf107 4927 PL_lex_allbrackets--;
bbf60fe6 4928 return REPORT(')');
79072805 4929 }
43a16006 4930 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4931 && SvEVALED(PL_lex_repl))
43a16006 4932 {
e9fa98b2 4933 if (PL_bufptr != PL_bufend)
cea2e8a9 4934 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4935 PL_lex_repl = NULL;
e9fa98b2 4936 }
db444266
FC
4937 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4938 re_eval_str. If the here-doc body’s length equals the previous
4939 value of re_eval_start, re_eval_start will now be null. So
4940 check re_eval_str as well. */
3328ab5a
FC
4941 if (PL_parser->lex_shared->re_eval_start
4942 || PL_parser->lex_shared->re_eval_str) {
db444266 4943 SV *sv;
9da1dd8f
DM
4944 if (*PL_bufptr != ')')
4945 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4946 PL_bufptr++;
4947 /* having compiled a (?{..}) expression, return the original
4948 * text too, as a const */
3328ab5a
FC
4949 if (PL_parser->lex_shared->re_eval_str) {
4950 sv = PL_parser->lex_shared->re_eval_str;
4951 PL_parser->lex_shared->re_eval_str = NULL;
4952 SvCUR_set(sv,
4953 PL_bufptr - PL_parser->lex_shared->re_eval_start);
db444266
FC
4954 SvPV_shrink_to_cur(sv);
4955 }
3328ab5a
FC
4956 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4957 PL_bufptr - PL_parser->lex_shared->re_eval_start);
6165f85b
DM
4958 start_force(PL_curforce);
4959 /* XXX probably need a CURMAD(something) here */
4960 NEXTVAL_NEXTTOKE.opval =
9da1dd8f 4961 (OP*)newSVOP(OP_CONST, 0,
db444266 4962 sv);
9da1dd8f 4963 force_next(THING);
3328ab5a 4964 PL_parser->lex_shared->re_eval_start = NULL;
9da1dd8f
DM
4965 PL_expect = XTERM;
4966 return REPORT(',');
4967 }
4968
79072805
LW
4969 /* FALLTHROUGH */
4970 case LEX_INTERPCONCAT:
4971#ifdef DEBUGGING
3280af22 4972 if (PL_lex_brackets)
5637ef5b
NC
4973 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4974 (long) PL_lex_brackets);
79072805 4975#endif
3280af22 4976 if (PL_bufptr == PL_bufend)
bbf60fe6 4977 return REPORT(sublex_done());
79072805 4978
9da1dd8f
DM
4979 /* m'foo' still needs to be parsed for possible (?{...}) */
4980 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
3280af22 4981 SV *sv = newSVsv(PL_linestr);
9da1dd8f 4982 sv = tokeq(sv);
6154021b 4983 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4984 s = PL_bufend;
79072805
LW
4985 }
4986 else {
3280af22 4987 s = scan_const(PL_bufptr);
79072805 4988 if (*s == '\\')
3280af22 4989 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4990 else
3280af22 4991 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4992 }
4993
3280af22 4994 if (s != PL_bufptr) {
cd81e915 4995 start_force(PL_curforce);
5db06880
NC
4996 if (PL_madskills) {
4997 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4998 }
6154021b 4999 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 5000 PL_expect = XTERM;
79072805 5001 force_next(THING);
131b3ad0 5002 if (PL_lex_starts++) {
5db06880
NC
5003#ifdef PERL_MAD
5004 if (PL_madskills) {
cd81e915
NC
5005 if (PL_thistoken)
5006 sv_free(PL_thistoken);
6b29d1f5 5007 PL_thistoken = newSVpvs("");
5db06880
NC
5008 }
5009#endif
131b3ad0
DM
5010 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5011 if (!PL_lex_casemods && PL_lex_inpat)
5012 OPERATOR(',');
5013 else
5014 Aop(OP_CONCAT);
5015 }
79072805 5016 else {
3280af22 5017 PL_bufptr = s;
cea2e8a9 5018 return yylex();
79072805
LW
5019 }
5020 }
5021
cea2e8a9 5022 return yylex();
a0d0e21e 5023 case LEX_FORMLINE:
3280af22
NIS
5024 s = scan_formline(PL_bufptr);
5025 if (!PL_lex_formbrack)
7c70caa5 5026 {
705fe0e5 5027 formbrack = 1;
a0d0e21e 5028 goto rightbracket;
7c70caa5 5029 }
705fe0e5
FC
5030 PL_bufptr = s;
5031 return yylex();
79072805
LW
5032 }
5033
3280af22
NIS
5034 s = PL_bufptr;
5035 PL_oldoldbufptr = PL_oldbufptr;
5036 PL_oldbufptr = s;
463ee0b2
LW
5037
5038 retry:
5db06880 5039#ifdef PERL_MAD
cd81e915
NC
5040 if (PL_thistoken) {
5041 sv_free(PL_thistoken);
5042 PL_thistoken = 0;
5db06880 5043 }
cd81e915 5044 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 5045#endif
378cc40b
LW
5046 switch (*s) {
5047 default:
32833930 5048 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
834a4ddd 5049 goto keylookup;
b1fc3636 5050 {
e2f06df0
BF
5051 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5052 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5053 UTF8SKIP(s),
5054 SVs_TEMP | SVf_UTF8),
5055 10, UNI_DISPLAY_ISPRINT))
5056 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
b1fc3636
CJ
5057 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5058 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5059 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5060 } else {
5061 d = PL_linestart;
5062 }
5063 *s = '\0';
e2f06df0
BF
5064 sv_setpv(dsv, d);
5065 if (UTF)
5066 SvUTF8_on(dsv);
5067 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
b1fc3636 5068 }
e929a76b
LW
5069 case 4:
5070 case 26:
5071 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 5072 case 0:
5db06880
NC
5073#ifdef PERL_MAD
5074 if (PL_madskills)
cd81e915 5075 PL_faketokens = 0;
5db06880 5076#endif
60d63348 5077 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
5078 PL_last_uni = 0;
5079 PL_last_lop = 0;
a7aaec61
Z
5080 if (PL_lex_brackets &&
5081 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
5082 yyerror((const char *)
5083 (PL_lex_formbrack
5084 ? "Format not terminated"
5085 : "Missing right curly or square bracket"));
c5ee2135 5086 }
4e553d73 5087 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 5088 "### Tokener got EOF\n");
5f80b19c 5089 } );
79072805 5090 TOKEN(0);
463ee0b2 5091 }
3280af22 5092 if (s++ < PL_bufend)
a687059c 5093 goto retry; /* ignore stray nulls */
3280af22
NIS
5094 PL_last_uni = 0;
5095 PL_last_lop = 0;
5096 if (!PL_in_eval && !PL_preambled) {
5097 PL_preambled = TRUE;
5db06880
NC
5098#ifdef PERL_MAD
5099 if (PL_madskills)
cd81e915 5100 PL_faketokens = 1;
5db06880 5101#endif
5ab7ff98
NC
5102 if (PL_perldb) {
5103 /* Generate a string of Perl code to load the debugger.
5104 * If PERL5DB is set, it will return the contents of that,
5105 * otherwise a compile-time require of perl5db.pl. */
5106
5107 const char * const pdb = PerlEnv_getenv("PERL5DB");
5108
5109 if (pdb) {
5110 sv_setpv(PL_linestr, pdb);
5111 sv_catpvs(PL_linestr,";");
5112 } else {
5113 SETERRNO(0,SS_NORMAL);
5114 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5115 }
5116 } else
5117 sv_setpvs(PL_linestr,"");
c62eb204
NC
5118 if (PL_preambleav) {
5119 SV **svp = AvARRAY(PL_preambleav);
5120 SV **const end = svp + AvFILLp(PL_preambleav);
5121 while(svp <= end) {
5122 sv_catsv(PL_linestr, *svp);
5123 ++svp;
396482e1 5124 sv_catpvs(PL_linestr, ";");
91b7def8 5125 }
daba3364 5126 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 5127 PL_preambleav = NULL;
91b7def8 5128 }
9f639728
FR
5129 if (PL_minus_E)
5130 sv_catpvs(PL_linestr,
5131 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 5132 if (PL_minus_n || PL_minus_p) {
f0e67a1d 5133 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 5134 if (PL_minus_l)
396482e1 5135 sv_catpvs(PL_linestr,"chomp;");
3280af22 5136 if (PL_minus_a) {
3280af22 5137 if (PL_minus_F) {
3792a11b
NC
5138 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5139 || *PL_splitstr == '"')
3280af22 5140 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 5141 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 5142 else {
c8ef6a4b
NC
5143 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5144 bytes can be used as quoting characters. :-) */
dd374669 5145 const char *splits = PL_splitstr;
91d456ae 5146 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
5147 do {
5148 /* Need to \ \s */
dd374669
AL
5149 if (*splits == '\\')
5150 sv_catpvn(PL_linestr, splits, 1);
5151 sv_catpvn(PL_linestr, splits, 1);
5152 } while (*splits++);
48c4c863
NC
5153 /* This loop will embed the trailing NUL of
5154 PL_linestr as the last thing it does before
5155 terminating. */
396482e1 5156 sv_catpvs(PL_linestr, ");");
54310121 5157 }
2304df62
AD
5158 }
5159 else
396482e1 5160 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 5161 }
79072805 5162 }
396482e1 5163 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
5164 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5165 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5166 PL_last_lop = PL_last_uni = NULL;
65269a95 5167 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 5168 update_debugger_info(PL_linestr, NULL, 0);
79072805 5169 goto retry;
a687059c 5170 }
e929a76b 5171 do {
580561a3
Z
5172 fake_eof = 0;
5173 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 5174 if (0) {
7e28d3af 5175 fake_eof:
f0e67a1d
Z
5176 fake_eof = LEX_FAKE_EOF;
5177 }
5178 PL_bufptr = PL_bufend;
83944c01 5179 COPLINE_INC_WITH_HERELINES;
f0e67a1d 5180 if (!lex_next_chunk(fake_eof)) {
17cc9359 5181 CopLINE_dec(PL_curcop);
f0e67a1d
Z
5182 s = PL_bufptr;
5183 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5184 }
17cc9359 5185 CopLINE_dec(PL_curcop);
5db06880 5186#ifdef PERL_MAD
f0e67a1d 5187 if (!PL_rsfp)
cd81e915 5188 PL_realtokenstart = -1;
5db06880 5189#endif
f0e67a1d 5190 s = PL_bufptr;
7aa207d6
JH
5191 /* If it looks like the start of a BOM or raw UTF-16,
5192 * check if it in fact is. */
580561a3 5193 if (bof && PL_rsfp &&
7aa207d6
JH
5194 (*s == 0 ||
5195 *(U8*)s == 0xEF ||
5196 *(U8*)s >= 0xFE ||
5197 s[1] == 0)) {
879bc93b
DM
5198 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5199 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
5200#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5201 /* offset may include swallowed CR */
5202 if (!bof)
879bc93b 5203 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 5204#endif
7e28d3af 5205 if (bof) {
3280af22 5206 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 5207 s = swallow_bom((U8*)s);
e929a76b 5208 }
378cc40b 5209 }
737c24fc 5210 if (PL_parser->in_pod) {
a0d0e21e 5211 /* Incest with pod. */
5db06880
NC
5212#ifdef PERL_MAD
5213 if (PL_madskills)
cd81e915 5214 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 5215#endif
01a57ef7 5216 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 5217 sv_setpvs(PL_linestr, "");
3280af22
NIS
5218 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5219 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5220 PL_last_lop = PL_last_uni = NULL;
737c24fc 5221 PL_parser->in_pod = 0;
a0d0e21e 5222 }
4e553d73 5223 }
60d63348 5224 if (PL_rsfp || PL_parser->filtered)
85613cab 5225 incline(s);
737c24fc 5226 } while (PL_parser->in_pod);
3280af22 5227 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 5228 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5229 PL_last_lop = PL_last_uni = NULL;
57843af0 5230 if (CopLINE(PL_curcop) == 1) {
3280af22 5231 while (s < PL_bufend && isSPACE(*s))
79072805 5232 s++;
a0d0e21e 5233 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 5234 s++;
5db06880
NC
5235#ifdef PERL_MAD
5236 if (PL_madskills)
cd81e915 5237 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 5238#endif
bd61b366 5239 d = NULL;
3280af22 5240 if (!PL_in_eval) {
44a8e56a 5241 if (*s == '#' && *(s+1) == '!')
5242 d = s + 2;
5243#ifdef ALTERNATE_SHEBANG
5244 else {
bfed75c6 5245 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 5246 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5247 d = s + (sizeof(as) - 1);
5248 }
5249#endif /* ALTERNATE_SHEBANG */
5250 }
5251 if (d) {
b8378b72 5252 char *ipath;
774d564b 5253 char *ipathend;
b8378b72 5254
774d564b 5255 while (isSPACE(*d))
b8378b72
CS
5256 d++;
5257 ipath = d;
774d564b 5258 while (*d && !isSPACE(*d))
5259 d++;
5260 ipathend = d;
5261
5262#ifdef ARG_ZERO_IS_SCRIPT
5263 if (ipathend > ipath) {
5264 /*
5265 * HP-UX (at least) sets argv[0] to the script name,
5266 * which makes $^X incorrect. And Digital UNIX and Linux,
5267 * at least, set argv[0] to the basename of the Perl
5268 * interpreter. So, having found "#!", we'll set it right.
5269 */
fafc274c
NC
5270 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5271 SVt_PV)); /* $^X */
774d564b 5272 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 5273 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 5274 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 5275 SvSETMAGIC(x);
5276 }
556c1dec
JH
5277 else {
5278 STRLEN blen;
5279 STRLEN llen;
cfd0369c 5280 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 5281 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
5282 if (llen < blen) {
5283 bstart += blen - llen;
5284 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5285 sv_setpvn(x, ipath, ipathend - ipath);
5286 SvSETMAGIC(x);
5287 }
5288 }
5289 }
774d564b 5290 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 5291 }
774d564b 5292#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
5293
5294 /*
5295 * Look for options.
5296 */
748a9306 5297 d = instr(s,"perl -");
84e30d1a 5298 if (!d) {
748a9306 5299 d = instr(s,"perl");
84e30d1a
GS
5300#if defined(DOSISH)
5301 /* avoid getting into infinite loops when shebang
5302 * line contains "Perl" rather than "perl" */
5303 if (!d) {
5304 for (d = ipathend-4; d >= ipath; --d) {
5305 if ((*d == 'p' || *d == 'P')
5306 && !ibcmp(d, "perl", 4))
5307 {
5308 break;
5309 }
5310 }
5311 if (d < ipath)
bd61b366 5312 d = NULL;
84e30d1a
GS
5313 }
5314#endif
5315 }
44a8e56a 5316#ifdef ALTERNATE_SHEBANG
5317 /*
5318 * If the ALTERNATE_SHEBANG on this system starts with a
5319 * character that can be part of a Perl expression, then if
5320 * we see it but not "perl", we're probably looking at the
5321 * start of Perl code, not a request to hand off to some
5322 * other interpreter. Similarly, if "perl" is there, but
5323 * not in the first 'word' of the line, we assume the line
5324 * contains the start of the Perl program.
44a8e56a 5325 */
5326 if (d && *s != '#') {
f54cb97a 5327 const char *c = ipath;
44a8e56a 5328 while (*c && !strchr("; \t\r\n\f\v#", *c))
5329 c++;
5330 if (c < d)
bd61b366 5331 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5332 else
5333 *s = '#'; /* Don't try to parse shebang line */
5334 }
774d564b 5335#endif /* ALTERNATE_SHEBANG */
748a9306 5336 if (!d &&
44a8e56a 5337 *s == '#' &&
774d564b 5338 ipathend > ipath &&
3280af22 5339 !PL_minus_c &&
748a9306 5340 !instr(s,"indir") &&
3280af22 5341 instr(PL_origargv[0],"perl"))
748a9306 5342 {
27da23d5 5343 dVAR;
9f68db38 5344 char **newargv;
9f68db38 5345
774d564b 5346 *ipathend = '\0';
5347 s = ipathend + 1;
3280af22 5348 while (s < PL_bufend && isSPACE(*s))
9f68db38 5349 s++;
3280af22 5350 if (s < PL_bufend) {
d85f917e 5351 Newx(newargv,PL_origargc+3,char*);
9f68db38 5352 newargv[1] = s;
3280af22 5353 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5354 s++;
5355 *s = '\0';
3280af22 5356 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5357 }
5358 else
3280af22 5359 newargv = PL_origargv;
774d564b 5360 newargv[0] = ipath;
b35112e7 5361 PERL_FPU_PRE_EXEC
b4748376 5362 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5363 PERL_FPU_POST_EXEC
cea2e8a9 5364 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5365 }
748a9306 5366 if (d) {
c35e046a
AL
5367 while (*d && !isSPACE(*d))
5368 d++;
5369 while (SPACE_OR_TAB(*d))
5370 d++;
748a9306
LW
5371
5372 if (*d++ == '-') {
f54cb97a 5373 const bool switches_done = PL_doswitches;
fb993905
GA
5374 const U32 oldpdb = PL_perldb;
5375 const bool oldn = PL_minus_n;
5376 const bool oldp = PL_minus_p;
c7030b81 5377 const char *d1 = d;
fb993905 5378
8cc95fdb 5379 do {
4ba71d51
FC
5380 bool baduni = FALSE;
5381 if (*d1 == 'C') {
bd0ab00d
NC
5382 const char *d2 = d1 + 1;
5383 if (parse_unicode_opts((const char **)&d2)
5384 != PL_unicode)
5385 baduni = TRUE;
4ba71d51
FC
5386 }
5387 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5388 const char * const m = d1;
5389 while (*d1 && !isSPACE(*d1))
5390 d1++;
cea2e8a9 5391 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5392 (int)(d1 - m), m);
8cc95fdb 5393 }
c7030b81
NC
5394 d1 = moreswitches(d1);
5395 } while (d1);
f0b2cf55
YST
5396 if (PL_doswitches && !switches_done) {
5397 int argc = PL_origargc;
5398 char **argv = PL_origargv;
5399 do {
5400 argc--,argv++;
5401 } while (argc && argv[0][0] == '-' && argv[0][1]);
5402 init_argv_symbols(argc,argv);
5403 }
65269a95 5404 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5405 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5406 /* if we have already added "LINE: while (<>) {",
5407 we must not do it again */
748a9306 5408 {
76f68e9b 5409 sv_setpvs(PL_linestr, "");
3280af22
NIS
5410 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5411 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5412 PL_last_lop = PL_last_uni = NULL;
3280af22 5413 PL_preambled = FALSE;
65269a95 5414 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5415 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5416 goto retry;
5417 }
a0d0e21e 5418 }
79072805 5419 }
9f68db38 5420 }
79072805 5421 }
3280af22 5422 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5423 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5424 start_force(PL_curforce);
5425 NEXTVAL_NEXTTOKE.ival = 0;
5426 force_next(FORMRBRACK);
5427 TOKEN(';');
ae986130 5428 }
378cc40b 5429 goto retry;
4fdae800 5430 case '\r':
6a27c188 5431#ifdef PERL_STRICT_CR
cea2e8a9 5432 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5433 Perl_croak(aTHX_
cc507455 5434 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5435#endif
4fdae800 5436 case ' ': case '\t': case '\f': case 013:
5db06880 5437#ifdef PERL_MAD
cd81e915 5438 PL_realtokenstart = -1;
1cac5c33
FC
5439 if (PL_madskills) {
5440 if (!PL_thiswhite)
ac372eb8 5441 PL_thiswhite = newSVpvs("");
1cac5c33
FC
5442 sv_catpvn(PL_thiswhite, s, 1);
5443 }
5db06880 5444#endif
ac372eb8 5445 s++;
378cc40b 5446 goto retry;
378cc40b 5447 case '#':
e929a76b 5448 case '\n':
5db06880 5449#ifdef PERL_MAD
cd81e915 5450 PL_realtokenstart = -1;
5db06880 5451 if (PL_madskills)
cd81e915 5452 PL_faketokens = 0;
5db06880 5453#endif
60d63348 5454 if (PL_lex_state != LEX_NORMAL ||
62e4c90a
FC
5455 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5456 if (*s == '#' && s == PL_linestart && PL_in_eval
60d63348 5457 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5458 /* handle eval qq[#line 1 "foo"\n ...] */
5459 CopLINE_dec(PL_curcop);
5460 incline(s);
5461 }
5db06880
NC
5462 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5463 s = SKIPSPACE0(s);
62e4c90a 5464 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5465 incline(s);
5466 }
5467 else {
9c74ccc9 5468 const bool in_comment = *s == '#';
5db06880
NC
5469 d = s;
5470 while (d < PL_bufend && *d != '\n')
5471 d++;
5472 if (d < PL_bufend)
5473 d++;
5474 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5475 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5476 d, PL_bufend);
5db06880
NC
5477#ifdef PERL_MAD
5478 if (PL_madskills)
cd81e915 5479 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5480#endif
5481 s = d;
9c74ccc9
FC
5482 if (in_comment && d == PL_bufend
5483 && PL_lex_state == LEX_INTERPNORMAL
90a536e1 5484 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
9c74ccc9
FC
5485 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5486 else incline(s);
5db06880 5487 }
3280af22 5488 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5489 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5490 start_force(PL_curforce);
5491 NEXTVAL_NEXTTOKE.ival = 0;
5492 force_next(FORMRBRACK);
5493 TOKEN(';');
a687059c 5494 }
378cc40b 5495 }
a687059c 5496 else {
5db06880
NC
5497#ifdef PERL_MAD
5498 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5499 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5500 PL_faketokens = 0;
5db06880
NC
5501 s = SKIPSPACE0(s);
5502 TOKEN(PEG); /* make sure any #! line is accessible */
5503 }
5504 s = SKIPSPACE0(s);
5505 }
5506 else {
5507/* if (PL_madskills && PL_lex_formbrack) { */
5508 d = s;
5509 while (d < PL_bufend && *d != '\n')
5510 d++;
5511 if (d < PL_bufend)
5512 d++;
5513 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5514 Perl_croak(aTHX_ "panic: input overflow");
5515 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5516 if (!PL_thiswhite)
6b29d1f5 5517 PL_thiswhite = newSVpvs("");
5db06880 5518 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5519 sv_setpvs(PL_thiswhite, "");
cd81e915 5520 PL_faketokens = 0;
5db06880 5521 }
cd81e915 5522 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5523 }
5524 s = d;
5525/* }
5526 *s = '\0';
5527 PL_bufend = s; */
5528 }
5529#else
378cc40b 5530 *s = '\0';
3280af22 5531 PL_bufend = s;
5db06880 5532#endif
a687059c 5533 }
378cc40b
LW
5534 goto retry;
5535 case '-':
0eb30aeb 5536 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
e5edeb50 5537 I32 ftst = 0;
90771dc0 5538 char tmp;
e5edeb50 5539
378cc40b 5540 s++;
3280af22 5541 PL_bufptr = s;
748a9306
LW
5542 tmp = *s++;
5543
bf4acbe4 5544 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5545 s++;
5546
5547 if (strnEQ(s,"=>",2)) {
3280af22 5548 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5549 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5550 OPERATOR('-'); /* unary minus */
5551 }
3280af22 5552 PL_last_uni = PL_oldbufptr;
748a9306 5553 switch (tmp) {
e5edeb50
JH
5554 case 'r': ftst = OP_FTEREAD; break;
5555 case 'w': ftst = OP_FTEWRITE; break;
5556 case 'x': ftst = OP_FTEEXEC; break;
5557 case 'o': ftst = OP_FTEOWNED; break;
5558 case 'R': ftst = OP_FTRREAD; break;
5559 case 'W': ftst = OP_FTRWRITE; break;
5560 case 'X': ftst = OP_FTREXEC; break;
5561 case 'O': ftst = OP_FTROWNED; break;
5562 case 'e': ftst = OP_FTIS; break;
5563 case 'z': ftst = OP_FTZERO; break;
5564 case 's': ftst = OP_FTSIZE; break;
5565 case 'f': ftst = OP_FTFILE; break;
5566 case 'd': ftst = OP_FTDIR; break;
5567 case 'l': ftst = OP_FTLINK; break;
5568 case 'p': ftst = OP_FTPIPE; break;
5569 case 'S': ftst = OP_FTSOCK; break;
5570 case 'u': ftst = OP_FTSUID; break;
5571 case 'g': ftst = OP_FTSGID; break;
5572 case 'k': ftst = OP_FTSVTX; break;
5573 case 'b': ftst = OP_FTBLK; break;
5574 case 'c': ftst = OP_FTCHR; break;
5575 case 't': ftst = OP_FTTTY; break;
5576 case 'T': ftst = OP_FTTEXT; break;
5577 case 'B': ftst = OP_FTBINARY; break;
5578 case 'M': case 'A': case 'C':
fafc274c 5579 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5580 switch (tmp) {
5581 case 'M': ftst = OP_FTMTIME; break;
5582 case 'A': ftst = OP_FTATIME; break;
5583 case 'C': ftst = OP_FTCTIME; break;
5584 default: break;
5585 }
5586 break;
378cc40b 5587 default:
378cc40b
LW
5588 break;
5589 }
e5edeb50 5590 if (ftst) {
eb160463 5591 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5592 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5593 "### Saw file test %c\n", (int)tmp);
5f80b19c 5594 } );
e5edeb50
JH
5595 FTST(ftst);
5596 }
5597 else {
5598 /* Assume it was a minus followed by a one-letter named
5599 * subroutine call (or a -bareword), then. */
95c31fe3 5600 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5601 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5602 (int) tmp);
5f80b19c 5603 } );
3cf7b4c4 5604 s = --PL_bufptr;
e5edeb50 5605 }
378cc40b 5606 }
90771dc0
NC
5607 {
5608 const char tmp = *s++;
5609 if (*s == tmp) {
5610 s++;
5611 if (PL_expect == XOPERATOR)
5612 TERM(POSTDEC);
5613 else
5614 OPERATOR(PREDEC);
5615 }
5616 else if (*s == '>') {
5617 s++;
29595ff2 5618 s = SKIPSPACE1(s);
90771dc0
NC
5619 if (isIDFIRST_lazy_if(s,UTF)) {
5620 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5621 TOKEN(ARROW);
5622 }
5623 else if (*s == '$')
5624 OPERATOR(ARROW);
5625 else
5626 TERM(ARROW);
5627 }
78cdf107
Z
5628 if (PL_expect == XOPERATOR) {
5629 if (*s == '=' && !PL_lex_allbrackets &&
5630 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5631 s--;
5632 TOKEN(0);
5633 }
90771dc0 5634 Aop(OP_SUBTRACT);
78cdf107 5635 }
90771dc0
NC
5636 else {
5637 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5638 check_uni();
5639 OPERATOR('-'); /* unary minus */
79072805 5640 }
2f3197b3 5641 }
79072805 5642
378cc40b 5643 case '+':
90771dc0
NC
5644 {
5645 const char tmp = *s++;
5646 if (*s == tmp) {
5647 s++;
5648 if (PL_expect == XOPERATOR)
5649 TERM(POSTINC);
5650 else
5651 OPERATOR(PREINC);
5652 }
78cdf107
Z
5653 if (PL_expect == XOPERATOR) {
5654 if (*s == '=' && !PL_lex_allbrackets &&
5655 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5656 s--;
5657 TOKEN(0);
5658 }
90771dc0 5659 Aop(OP_ADD);
78cdf107 5660 }
90771dc0
NC
5661 else {
5662 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5663 check_uni();
5664 OPERATOR('+');
5665 }
2f3197b3 5666 }
a687059c 5667
378cc40b 5668 case '*':
3280af22
NIS
5669 if (PL_expect != XOPERATOR) {
5670 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5671 PL_expect = XOPERATOR;
5672 force_ident(PL_tokenbuf, '*');
5673 if (!*PL_tokenbuf)
a0d0e21e 5674 PREREF('*');
79072805 5675 TERM('*');
a687059c 5676 }
79072805
LW
5677 s++;
5678 if (*s == '*') {
a687059c 5679 s++;
78cdf107
Z
5680 if (*s == '=' && !PL_lex_allbrackets &&
5681 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5682 s -= 2;
5683 TOKEN(0);
5684 }
79072805 5685 PWop(OP_POW);
a687059c 5686 }
78cdf107
Z
5687 if (*s == '=' && !PL_lex_allbrackets &&
5688 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5689 s--;
5690 TOKEN(0);
5691 }
79072805
LW
5692 Mop(OP_MULTIPLY);
5693
378cc40b 5694 case '%':
3280af22 5695 if (PL_expect == XOPERATOR) {
78cdf107
Z
5696 if (s[1] == '=' && !PL_lex_allbrackets &&
5697 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5698 TOKEN(0);
bbce6d69 5699 ++s;
5700 Mop(OP_MODULO);
a687059c 5701 }
3280af22 5702 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5703 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5704 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5705 if (!PL_tokenbuf[1]) {
bbce6d69 5706 PREREF('%');
a687059c 5707 }
60ac52eb
FC
5708 PL_expect = XOPERATOR;
5709 force_ident_maybe_lex('%');
bbce6d69 5710 TERM('%');
a687059c 5711
378cc40b 5712 case '^':
78cdf107
Z
5713 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5714 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5715 TOKEN(0);
79072805 5716 s++;
a0d0e21e 5717 BOop(OP_BIT_XOR);
79072805 5718 case '[':
a7aaec61
Z
5719 if (PL_lex_brackets > 100)
5720 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5721 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5722 PL_lex_allbrackets++;
df3467db
IG
5723 {
5724 const char tmp = *s++;
5725 OPERATOR(tmp);
5726 }
378cc40b 5727 case '~':
0d863452 5728 if (s[1] == '~'
3e7dd34d 5729 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5730 {
78cdf107
Z
5731 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5732 TOKEN(0);
0d863452 5733 s += 2;
0f539b13
BF
5734 Perl_ck_warner_d(aTHX_
5735 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5736 "Smartmatch is experimental");
0d863452
RH
5737 Eop(OP_SMARTMATCH);
5738 }
78cdf107
Z
5739 s++;
5740 OPERATOR('~');
378cc40b 5741 case ',':
78cdf107
Z
5742 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5743 TOKEN(0);
5744 s++;
5745 OPERATOR(',');
a0d0e21e
LW
5746 case ':':
5747 if (s[1] == ':') {
5748 len = 0;
0bfa2a8a 5749 goto just_a_word_zero_gv;
a0d0e21e
LW
5750 }
5751 s++;
09bef843
SB
5752 switch (PL_expect) {
5753 OP *attrs;
5db06880
NC
5754#ifdef PERL_MAD
5755 I32 stuffstart;
5756#endif
09bef843
SB
5757 case XOPERATOR:
5758 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5759 break;
5760 PL_bufptr = s; /* update in case we back off */
d83f38d8 5761 if (*s == '=') {
2dc78664
NC
5762 Perl_croak(aTHX_
5763 "Use of := for an empty attribute list is not allowed");
d83f38d8 5764 }
09bef843
SB
5765 goto grabattrs;
5766 case XATTRBLOCK:
5767 PL_expect = XBLOCK;
5768 goto grabattrs;
5769 case XATTRTERM:
5770 PL_expect = XTERMBLOCK;
5771 grabattrs:
5db06880
NC
5772#ifdef PERL_MAD
5773 stuffstart = s - SvPVX(PL_linestr) - 1;
5774#endif
29595ff2 5775 s = PEEKSPACE(s);
5f66b61c 5776 attrs = NULL;
7e2040f0 5777 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5778 I32 tmp;
5cc237b8 5779 SV *sv;
09bef843 5780 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5781 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5782 if (tmp < 0) tmp = -tmp;
5783 switch (tmp) {
5784 case KEY_or:
5785 case KEY_and:
5786 case KEY_for:
11baf631 5787 case KEY_foreach:
f9829d6b
GS
5788 case KEY_unless:
5789 case KEY_if:
5790 case KEY_while:
5791 case KEY_until:
5792 goto got_attrs;
5793 default:
5794 break;
5795 }
5796 }
89a5757c 5797 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843 5798 if (*d == '(') {
4d68ffa0 5799 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
09bef843 5800 if (!d) {
09bef843
SB
5801 /* MUST advance bufptr here to avoid bogus
5802 "at end of line" context messages from yyerror().
5803 */
5804 PL_bufptr = s + len;
5805 yyerror("Unterminated attribute parameter in attribute list");
5806 if (attrs)
5807 op_free(attrs);
5cc237b8 5808 sv_free(sv);
bbf60fe6 5809 return REPORT(0); /* EOF indicator */
09bef843
SB
5810 }
5811 }
5812 if (PL_lex_stuff) {
09bef843 5813 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5814 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5815 newSVOP(OP_CONST, 0, sv));
5816 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5817 PL_lex_stuff = NULL;
09bef843
SB
5818 }
5819 else {
5cc237b8
BS
5820 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5821 sv_free(sv);
1108974d 5822 if (PL_in_my == KEY_our) {
df9a6019 5823 deprecate(":unique");
1108974d 5824 }
bfed75c6 5825 else
371fce9b
DM
5826 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5827 }
5828
d3cea301
SB
5829 /* NOTE: any CV attrs applied here need to be part of
5830 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5831 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5832 sv_free(sv);
78f9721b 5833 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5834 }
5835 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5836 sv_free(sv);
8e5dadda 5837 deprecate(":locked");
5cc237b8
BS
5838 }
5839 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5840 sv_free(sv);
78f9721b 5841 CvMETHOD_on(PL_compcv);
5cc237b8 5842 }
78f9721b
SM
5843 /* After we've set the flags, it could be argued that
5844 we don't need to do the attributes.pm-based setting
5845 process, and shouldn't bother appending recognized
d3cea301
SB
5846 flags. To experiment with that, uncomment the
5847 following "else". (Note that's already been
5848 uncommented. That keeps the above-applied built-in
5849 attributes from being intercepted (and possibly
5850 rejected) by a package's attribute routines, but is
5851 justified by the performance win for the common case
5852 of applying only built-in attributes.) */
0256094b 5853 else
2fcb4757 5854 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5855 newSVOP(OP_CONST, 0,
5cc237b8 5856 sv));
09bef843 5857 }
29595ff2 5858 s = PEEKSPACE(d);
0120eecf 5859 if (*s == ':' && s[1] != ':')
29595ff2 5860 s = PEEKSPACE(s+1);
0120eecf
GS
5861 else if (s == d)
5862 break; /* require real whitespace or :'s */
29595ff2 5863 /* XXX losing whitespace on sequential attributes here */
09bef843 5864 }
90771dc0
NC
5865 {
5866 const char tmp
5867 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5868 if (*s != ';' && *s != '}' && *s != tmp
5869 && (tmp != '=' || *s != ')')) {
5870 const char q = ((*s == '\'') ? '"' : '\'');
5871 /* If here for an expression, and parsed no attrs, back
5872 off. */
5873 if (tmp == '=' && !attrs) {
5874 s = PL_bufptr;
5875 break;
5876 }
5877 /* MUST advance bufptr here to avoid bogus "at end of line"
5878 context messages from yyerror().
5879 */
5880 PL_bufptr = s;
10edeb5d
JH
5881 yyerror( (const char *)
5882 (*s
5883 ? Perl_form(aTHX_ "Invalid separator character "
5884 "%c%c%c in attribute list", q, *s, q)
5885 : "Unterminated attribute list" ) );
90771dc0
NC
5886 if (attrs)
5887 op_free(attrs);
5888 OPERATOR(':');
09bef843 5889 }
09bef843 5890 }
f9829d6b 5891 got_attrs:
09bef843 5892 if (attrs) {
cd81e915 5893 start_force(PL_curforce);
9ded7720 5894 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5895 CURMAD('_', PL_nextwhite);
89122651 5896 force_next(THING);
5db06880
NC
5897 }
5898#ifdef PERL_MAD
5899 if (PL_madskills) {
cd81e915 5900 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5901 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5902 }
5db06880 5903#endif
09bef843
SB
5904 TOKEN(COLONATTR);
5905 }
78cdf107
Z
5906 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5907 s--;
5908 TOKEN(0);
5909 }
5910 PL_lex_allbrackets--;
a0d0e21e 5911 OPERATOR(':');
8990e307
LW
5912 case '(':
5913 s++;
3280af22
NIS
5914 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5915 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5916 else
3280af22 5917 PL_expect = XTERM;
29595ff2 5918 s = SKIPSPACE1(s);
78cdf107 5919 PL_lex_allbrackets++;
a0d0e21e 5920 TOKEN('(');
378cc40b 5921 case ';':
78cdf107
Z
5922 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5923 TOKEN(0);
f4dd75d9 5924 CLINE;
78cdf107
Z
5925 s++;
5926 OPERATOR(';');
378cc40b 5927 case ')':
78cdf107
Z
5928 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5929 TOKEN(0);
5930 s++;
5931 PL_lex_allbrackets--;
5932 s = SKIPSPACE1(s);
5933 if (*s == '{')
5934 PREBLOCK(')');
5935 TERM(')');
79072805 5936 case ']':
a7aaec61
Z
5937 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5938 TOKEN(0);
79072805 5939 s++;
3280af22 5940 if (PL_lex_brackets <= 0)
d98d5fff 5941 yyerror("Unmatched right square bracket");
463ee0b2 5942 else
3280af22 5943 --PL_lex_brackets;
78cdf107 5944 PL_lex_allbrackets--;
3280af22
NIS
5945 if (PL_lex_state == LEX_INTERPNORMAL) {
5946 if (PL_lex_brackets == 0) {
02255c60
FC
5947 if (*s == '-' && s[1] == '>')
5948 PL_lex_state = LEX_INTERPENDMAYBE;
5949 else if (*s != '[' && *s != '{')
3280af22 5950 PL_lex_state = LEX_INTERPEND;
79072805
LW
5951 }
5952 }
4633a7c4 5953 TERM(']');
79072805 5954 case '{':
79072805 5955 s++;
eaf6a13d 5956 leftbracket:
3280af22 5957 if (PL_lex_brackets > 100) {
8edd5f42 5958 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5959 }
3280af22 5960 switch (PL_expect) {
a0d0e21e 5961 case XTERM:
819b004e 5962 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5963 PL_lex_allbrackets++;
79072805 5964 OPERATOR(HASHBRACK);
a0d0e21e 5965 case XOPERATOR:
bf4acbe4 5966 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5967 s++;
44a8e56a 5968 d = s;
3280af22
NIS
5969 PL_tokenbuf[0] = '\0';
5970 if (d < PL_bufend && *d == '-') {
5971 PL_tokenbuf[0] = '-';
44a8e56a 5972 d++;
bf4acbe4 5973 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5974 d++;
5975 }
7e2040f0 5976 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5977 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5978 FALSE, &len);
bf4acbe4 5979 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5980 d++;
5981 if (*d == '}') {
f54cb97a 5982 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5983 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5984 if (minus)
5985 force_next('-');
748a9306
LW
5986 }
5987 }
5988 /* FALL THROUGH */
09bef843 5989 case XATTRBLOCK:
748a9306 5990 case XBLOCK:
3280af22 5991 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5992 PL_lex_allbrackets++;
3280af22 5993 PL_expect = XSTATE;
a0d0e21e 5994 break;
09bef843 5995 case XATTRTERM:
a0d0e21e 5996 case XTERMBLOCK:
3280af22 5997 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5998 PL_lex_allbrackets++;
3280af22 5999 PL_expect = XSTATE;
a0d0e21e
LW
6000 break;
6001 default: {
f54cb97a 6002 const char *t;
3280af22
NIS
6003 if (PL_oldoldbufptr == PL_last_lop)
6004 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 6005 else
3280af22 6006 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 6007 PL_lex_allbrackets++;
29595ff2 6008 s = SKIPSPACE1(s);
8452ff4b
SB
6009 if (*s == '}') {
6010 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6011 PL_expect = XTERM;
6012 /* This hack is to get the ${} in the message. */
6013 PL_bufptr = s+1;
6014 yyerror("syntax error");
6015 break;
6016 }
a0d0e21e 6017 OPERATOR(HASHBRACK);
8452ff4b 6018 }
b8a4b1be
GS
6019 /* This hack serves to disambiguate a pair of curlies
6020 * as being a block or an anon hash. Normally, expectation
6021 * determines that, but in cases where we're not in a
6022 * position to expect anything in particular (like inside
6023 * eval"") we have to resolve the ambiguity. This code
6024 * covers the case where the first term in the curlies is a
6025 * quoted string. Most other cases need to be explicitly
a0288114 6026 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
6027 * curly in order to force resolution as an anon hash.
6028 *
6029 * XXX should probably propagate the outer expectation
6030 * into eval"" to rely less on this hack, but that could
6031 * potentially break current behavior of eval"".
6032 * GSAR 97-07-21
6033 */
6034 t = s;
6035 if (*s == '\'' || *s == '"' || *s == '`') {
6036 /* common case: get past first string, handling escapes */
3280af22 6037 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
6038 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6039 t++;
6040 t++;
a0d0e21e 6041 }
b8a4b1be 6042 else if (*s == 'q') {
3280af22 6043 if (++t < PL_bufend
0eb30aeb 6044 && (!isWORDCHAR(*t)
3280af22 6045 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0eb30aeb 6046 && !isWORDCHAR(*t))))
0505442f 6047 {
abc667d1 6048 /* skip q//-like construct */
f54cb97a 6049 const char *tmps;
b8a4b1be
GS
6050 char open, close, term;
6051 I32 brackets = 1;
6052
3280af22 6053 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 6054 t++;
abc667d1
DM
6055 /* check for q => */
6056 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6057 OPERATOR(HASHBRACK);
6058 }
b8a4b1be
GS
6059 term = *t;
6060 open = term;
6061 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6062 term = tmps[5];
6063 close = term;
6064 if (open == close)
3280af22
NIS
6065 for (t++; t < PL_bufend; t++) {
6066 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 6067 t++;
6d07e5e9 6068 else if (*t == open)
b8a4b1be
GS
6069 break;
6070 }
abc667d1 6071 else {
3280af22
NIS
6072 for (t++; t < PL_bufend; t++) {
6073 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 6074 t++;
6d07e5e9 6075 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
6076 break;
6077 else if (*t == open)
6078 brackets++;
6079 }
abc667d1
DM
6080 }
6081 t++;
b8a4b1be 6082 }
abc667d1
DM
6083 else
6084 /* skip plain q word */
8a2bca12 6085 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
abc667d1 6086 t += UTF8SKIP(t);
a0d0e21e 6087 }
8a2bca12 6088 else if (isWORDCHAR_lazy_if(t,UTF)) {
0505442f 6089 t += UTF8SKIP(t);
8a2bca12 6090 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
0505442f 6091 t += UTF8SKIP(t);
a0d0e21e 6092 }
3280af22 6093 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 6094 t++;
b8a4b1be
GS
6095 /* if comma follows first term, call it an anon hash */
6096 /* XXX it could be a comma expression with loop modifiers */
3280af22 6097 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 6098 || (*t == '=' && t[1] == '>')))
a0d0e21e 6099 OPERATOR(HASHBRACK);
3280af22 6100 if (PL_expect == XREF)
4e4e412b 6101 PL_expect = XTERM;
a0d0e21e 6102 else {
3280af22
NIS
6103 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6104 PL_expect = XSTATE;
a0d0e21e 6105 }
8990e307 6106 }
a0d0e21e 6107 break;
463ee0b2 6108 }
6154021b 6109 pl_yylval.ival = CopLINE(PL_curcop);
79072805 6110 if (isSPACE(*s) || *s == '#')
3280af22 6111 PL_copline = NOLINE; /* invalidate current command line number */
7c70caa5 6112 TOKEN(formbrack ? '=' : '{');
378cc40b 6113 case '}':
a7aaec61
Z
6114 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6115 TOKEN(0);
79072805
LW
6116 rightbracket:
6117 s++;
3280af22 6118 if (PL_lex_brackets <= 0)
d98d5fff 6119 yyerror("Unmatched right curly bracket");
463ee0b2 6120 else
3280af22 6121 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 6122 PL_lex_allbrackets--;
3280af22
NIS
6123 if (PL_lex_state == LEX_INTERPNORMAL) {
6124 if (PL_lex_brackets == 0) {
9059aa12
LW
6125 if (PL_expect & XFAKEBRACK) {
6126 PL_expect &= XENUMMASK;
3280af22
NIS
6127 PL_lex_state = LEX_INTERPEND;
6128 PL_bufptr = s;
5db06880
NC
6129#if 0
6130 if (PL_madskills) {
cd81e915 6131 if (!PL_thiswhite)
6b29d1f5 6132 PL_thiswhite = newSVpvs("");
76f68e9b 6133 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
6134 }
6135#endif
cea2e8a9 6136 return yylex(); /* ignore fake brackets */
79072805 6137 }
f777953f 6138 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6b00f562
FC
6139 && SvEVALED(PL_lex_repl))
6140 PL_lex_state = LEX_INTERPEND;
6141 else if (*s == '-' && s[1] == '>')
3280af22 6142 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 6143 else if (*s != '[' && *s != '{')
3280af22 6144 PL_lex_state = LEX_INTERPEND;
79072805
LW
6145 }
6146 }
9059aa12
LW
6147 if (PL_expect & XFAKEBRACK) {
6148 PL_expect &= XENUMMASK;
3280af22 6149 PL_bufptr = s;
cea2e8a9 6150 return yylex(); /* ignore fake brackets */
748a9306 6151 }
cd81e915 6152 start_force(PL_curforce);
5db06880
NC
6153 if (PL_madskills) {
6154 curmad('X', newSVpvn(s-1,1));
cd81e915 6155 CURMAD('_', PL_thiswhite);
5db06880 6156 }
7c70caa5 6157 force_next(formbrack ? '.' : '}');
583c9d5c 6158 if (formbrack) LEAVE;
5db06880 6159#ifdef PERL_MAD
1cac5c33 6160 if (PL_madskills && !PL_thistoken)
6b29d1f5 6161 PL_thistoken = newSVpvs("");
5db06880 6162#endif
705fe0e5
FC
6163 if (formbrack == 2) { /* means . where arguments were expected */
6164 start_force(PL_curforce);
6165 force_next(';');
96f9b782 6166 TOKEN(FORMRBRACK);
705fe0e5 6167 }
79072805 6168 TOKEN(';');
378cc40b
LW
6169 case '&':
6170 s++;
78cdf107
Z
6171 if (*s++ == '&') {
6172 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6173 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6174 s -= 2;
6175 TOKEN(0);
6176 }
a0d0e21e 6177 AOPERATOR(ANDAND);
78cdf107 6178 }
378cc40b 6179 s--;
3280af22 6180 if (PL_expect == XOPERATOR) {
041457d9
DM
6181 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6182 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 6183 {
57843af0 6184 CopLINE_dec(PL_curcop);
f1f66076 6185 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6186 CopLINE_inc(PL_curcop);
463ee0b2 6187 }
78cdf107
Z
6188 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6189 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6190 s--;
6191 TOKEN(0);
6192 }
79072805 6193 BAop(OP_BIT_AND);
463ee0b2 6194 }
79072805 6195
c07656ed
FC
6196 PL_tokenbuf[0] = '&';
6197 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6198 sizeof PL_tokenbuf - 1, TRUE);
6199 if (PL_tokenbuf[1]) {
3280af22 6200 PL_expect = XOPERATOR;
60ac52eb 6201 force_ident_maybe_lex('&');
463ee0b2 6202 }
79072805
LW
6203 else
6204 PREREF('&');
6154021b 6205 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
6206 TERM('&');
6207
378cc40b
LW
6208 case '|':
6209 s++;
78cdf107
Z
6210 if (*s++ == '|') {
6211 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6212 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6213 s -= 2;
6214 TOKEN(0);
6215 }
a0d0e21e 6216 AOPERATOR(OROR);
78cdf107 6217 }
378cc40b 6218 s--;
78cdf107
Z
6219 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6220 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6221 s--;
6222 TOKEN(0);
6223 }
79072805 6224 BOop(OP_BIT_OR);
378cc40b
LW
6225 case '=':
6226 s++;
748a9306 6227 {
90771dc0 6228 const char tmp = *s++;
78cdf107
Z
6229 if (tmp == '=') {
6230 if (!PL_lex_allbrackets &&
6231 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6232 s -= 2;
6233 TOKEN(0);
6234 }
90771dc0 6235 Eop(OP_EQ);
78cdf107
Z
6236 }
6237 if (tmp == '>') {
6238 if (!PL_lex_allbrackets &&
6239 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6240 s -= 2;
6241 TOKEN(0);
6242 }
90771dc0 6243 OPERATOR(',');
78cdf107 6244 }
90771dc0
NC
6245 if (tmp == '~')
6246 PMop(OP_MATCH);
6247 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6248 && strchr("+-*/%.^&|<",tmp))
6249 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6250 "Reversed %c= operator",(int)tmp);
6251 s--;
6252 if (PL_expect == XSTATE && isALPHA(tmp) &&
6253 (s == PL_linestart+1 || s[-2] == '\n') )
6254 {
62e4c90a 6255 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
4a7239ff 6256 || PL_lex_state != LEX_NORMAL) {
90771dc0
NC
6257 d = PL_bufend;
6258 while (s < d) {
6259 if (*s++ == '\n') {
6260 incline(s);
6261 if (strnEQ(s,"=cut",4)) {
6262 s = strchr(s,'\n');
6263 if (s)
6264 s++;
6265 else
6266 s = d;
6267 incline(s);
6268 goto retry;
6269 }
6270 }
a5f75d66 6271 }
90771dc0 6272 goto retry;
a5f75d66 6273 }
5db06880
NC
6274#ifdef PERL_MAD
6275 if (PL_madskills) {
cd81e915 6276 if (!PL_thiswhite)
6b29d1f5 6277 PL_thiswhite = newSVpvs("");
cd81e915 6278 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
6279 PL_bufend - PL_linestart);
6280 }
6281#endif
90771dc0 6282 s = PL_bufend;
737c24fc 6283 PL_parser->in_pod = 1;
90771dc0 6284 goto retry;
a5f75d66 6285 }
a0d0e21e 6286 }
64a40898 6287 if (PL_expect == XBLOCK) {
c35e046a 6288 const char *t = s;
51882d45 6289#ifdef PERL_STRICT_CR
c35e046a 6290 while (SPACE_OR_TAB(*t))
51882d45 6291#else
c35e046a 6292 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 6293#endif
c35e046a 6294 t++;
a0d0e21e 6295 if (*t == '\n' || *t == '#') {
705fe0e5 6296 formbrack = 1;
583c9d5c
FC
6297 ENTER;
6298 SAVEI8(PL_parser->form_lex_state);
64a40898 6299 SAVEI32(PL_lex_formbrack);
583c9d5c 6300 PL_parser->form_lex_state = PL_lex_state;
64a40898 6301 PL_lex_formbrack = PL_lex_brackets + 1;
a0d0e21e
LW
6302 goto leftbracket;
6303 }
79072805 6304 }
78cdf107
Z
6305 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6306 s--;
6307 TOKEN(0);
6308 }
6154021b 6309 pl_yylval.ival = 0;
a0d0e21e 6310 OPERATOR(ASSIGNOP);
378cc40b
LW
6311 case '!':
6312 s++;
90771dc0
NC
6313 {
6314 const char tmp = *s++;
6315 if (tmp == '=') {
6316 /* was this !=~ where !~ was meant?
6317 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6318
6319 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6320 const char *t = s+1;
6321
6322 while (t < PL_bufend && isSPACE(*t))
6323 ++t;
6324
6325 if (*t == '/' || *t == '?' ||
6326 ((*t == 'm' || *t == 's' || *t == 'y')
0eb30aeb
KW
6327 && !isWORDCHAR(t[1])) ||
6328 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
90771dc0
NC
6329 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6330 "!=~ should be !~");
6331 }
78cdf107
Z
6332 if (!PL_lex_allbrackets &&
6333 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6334 s -= 2;
6335 TOKEN(0);
6336 }
90771dc0
NC
6337 Eop(OP_NE);
6338 }
6339 if (tmp == '~')
6340 PMop(OP_NOT);
6341 }
378cc40b
LW
6342 s--;
6343 OPERATOR('!');
6344 case '<':
3280af22 6345 if (PL_expect != XOPERATOR) {
93a17b20 6346 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6347 check_uni();
79072805
LW
6348 if (s[1] == '<')
6349 s = scan_heredoc(s);
6350 else
6351 s = scan_inputsymbol(s);
78a635de
FC
6352 PL_expect = XOPERATOR;
6353 TOKEN(sublex_start());
378cc40b
LW
6354 }
6355 s++;
90771dc0
NC
6356 {
6357 char tmp = *s++;
78cdf107
Z
6358 if (tmp == '<') {
6359 if (*s == '=' && !PL_lex_allbrackets &&
6360 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6361 s -= 2;
6362 TOKEN(0);
6363 }
90771dc0 6364 SHop(OP_LEFT_SHIFT);
78cdf107 6365 }
90771dc0
NC
6366 if (tmp == '=') {
6367 tmp = *s++;
78cdf107
Z
6368 if (tmp == '>') {
6369 if (!PL_lex_allbrackets &&
6370 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6371 s -= 3;
6372 TOKEN(0);
6373 }
90771dc0 6374 Eop(OP_NCMP);
78cdf107 6375 }
90771dc0 6376 s--;
78cdf107
Z
6377 if (!PL_lex_allbrackets &&
6378 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6379 s -= 2;
6380 TOKEN(0);
6381 }
90771dc0
NC
6382 Rop(OP_LE);
6383 }
395c3793 6384 }
378cc40b 6385 s--;
78cdf107
Z
6386 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6387 s--;
6388 TOKEN(0);
6389 }
79072805 6390 Rop(OP_LT);
378cc40b
LW
6391 case '>':
6392 s++;
90771dc0
NC
6393 {
6394 const char tmp = *s++;
78cdf107
Z
6395 if (tmp == '>') {
6396 if (*s == '=' && !PL_lex_allbrackets &&
6397 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6398 s -= 2;
6399 TOKEN(0);
6400 }
90771dc0 6401 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6402 }
6403 else if (tmp == '=') {
6404 if (!PL_lex_allbrackets &&
6405 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6406 s -= 2;
6407 TOKEN(0);
6408 }
90771dc0 6409 Rop(OP_GE);
78cdf107 6410 }
90771dc0 6411 }
378cc40b 6412 s--;
78cdf107
Z
6413 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6414 s--;
6415 TOKEN(0);
6416 }
79072805 6417 Rop(OP_GT);
378cc40b
LW
6418
6419 case '$':
bbce6d69 6420 CLINE;
6421
3280af22
NIS
6422 if (PL_expect == XOPERATOR) {
6423 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6424 return deprecate_commaless_var_list();
a0d0e21e 6425 }
8990e307 6426 }
a0d0e21e 6427
c0b977fd 6428 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6429 PL_tokenbuf[0] = '@';
376b8730
SM
6430 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6431 sizeof PL_tokenbuf - 1, FALSE);
6432 if (PL_expect == XOPERATOR)
6433 no_op("Array length", s);
3280af22 6434 if (!PL_tokenbuf[1])
a0d0e21e 6435 PREREF(DOLSHARP);
3280af22 6436 PL_expect = XOPERATOR;
60ac52eb 6437 force_ident_maybe_lex('#');
463ee0b2 6438 TOKEN(DOLSHARP);
79072805 6439 }
bbce6d69 6440
3280af22 6441 PL_tokenbuf[0] = '$';
376b8730
SM
6442 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6443 sizeof PL_tokenbuf - 1, FALSE);
6444 if (PL_expect == XOPERATOR)
6445 no_op("Scalar", s);
3280af22
NIS
6446 if (!PL_tokenbuf[1]) {
6447 if (s == PL_bufend)
bbce6d69 6448 yyerror("Final $ should be \\$ or $name");
6449 PREREF('$');
8990e307 6450 }
a0d0e21e 6451
ff68c719 6452 d = s;
90771dc0
NC
6453 {
6454 const char tmp = *s;
ae28bb2a 6455 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6456 s = SKIPSPACE1(s);
ff68c719 6457
90771dc0
NC
6458 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6459 && intuit_more(s)) {
6460 if (*s == '[') {
6461 PL_tokenbuf[0] = '@';
6462 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6463 char *t = s+1;
6464
8a2bca12 6465 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
c35e046a 6466 t++;
90771dc0 6467 if (*t++ == ',') {
29595ff2 6468 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6469 while (t < PL_bufend && *t != ']')
6470 t++;
9014280d 6471 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6472 "Multidimensional syntax %.*s not supported",
36c7798d 6473 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6474 }
748a9306 6475 }
93a17b20 6476 }
90771dc0
NC
6477 else if (*s == '{') {
6478 char *t;
6479 PL_tokenbuf[0] = '%';
6480 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6481 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6482 {
6483 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6484 do {
6485 t++;
6486 } while (isSPACE(*t));
90771dc0 6487 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6488 STRLEN len;
90771dc0 6489 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6490 &len);
c35e046a
AL
6491 while (isSPACE(*t))
6492 t++;
4c01a014
BF
6493 if (*t == ';'
6494 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
90771dc0 6495 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4c01a014
BF
6496 "You need to quote \"%"SVf"\"",
6497 SVfARG(newSVpvn_flags(tmpbuf, len,
6498 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
90771dc0
NC
6499 }
6500 }
6501 }
93a17b20 6502 }
bbce6d69 6503
90771dc0
NC
6504 PL_expect = XOPERATOR;
6505 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6506 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6507 if (!islop || PL_last_lop_op == OP_GREPSTART)
6508 PL_expect = XOPERATOR;
6509 else if (strchr("$@\"'`q", *s))
6510 PL_expect = XTERM; /* e.g. print $fh "foo" */
6511 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6512 PL_expect = XTERM; /* e.g. print $fh &sub */
6513 else if (isIDFIRST_lazy_if(s,UTF)) {
6514 char tmpbuf[sizeof PL_tokenbuf];
6515 int t2;
6516 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6517 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6518 /* binary operators exclude handle interpretations */
6519 switch (t2) {
6520 case -KEY_x:
6521 case -KEY_eq:
6522 case -KEY_ne:
6523 case -KEY_gt:
6524 case -KEY_lt:
6525 case -KEY_ge:
6526 case -KEY_le:
6527 case -KEY_cmp:
6528 break;
6529 default:
6530 PL_expect = XTERM; /* e.g. print $fh length() */
6531 break;
6532 }
6533 }
6534 else {
6535 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6536 }
6537 }
90771dc0
NC
6538 else if (isDIGIT(*s))
6539 PL_expect = XTERM; /* e.g. print $fh 3 */
6540 else if (*s == '.' && isDIGIT(s[1]))
6541 PL_expect = XTERM; /* e.g. print $fh .3 */
6542 else if ((*s == '?' || *s == '-' || *s == '+')
6543 && !isSPACE(s[1]) && s[1] != '=')
6544 PL_expect = XTERM; /* e.g. print $fh -1 */
6545 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6546 && s[1] != '/')
6547 PL_expect = XTERM; /* e.g. print $fh /.../
6548 XXX except DORDOR operator
6549 */
6550 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6551 && s[2] != '=')
6552 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6553 }
bbce6d69 6554 }
60ac52eb 6555 force_ident_maybe_lex('$');
79072805 6556 TOKEN('$');
378cc40b
LW
6557
6558 case '@':
3280af22 6559 if (PL_expect == XOPERATOR)
bbce6d69 6560 no_op("Array", s);
3280af22
NIS
6561 PL_tokenbuf[0] = '@';
6562 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6563 if (!PL_tokenbuf[1]) {
bbce6d69 6564 PREREF('@');
6565 }
3280af22 6566 if (PL_lex_state == LEX_NORMAL)
29595ff2 6567 s = SKIPSPACE1(s);
3280af22 6568 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6569 if (*s == '{')
3280af22 6570 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6571
6572 /* Warn about @ where they meant $. */
041457d9
DM
6573 if (*s == '[' || *s == '{') {
6574 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6575 const char *t = s + 1;
8a2bca12 6576 while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
b9e186cd 6577 t += UTF ? UTF8SKIP(t) : 1;
a0d0e21e
LW
6578 if (*t == '}' || *t == ']') {
6579 t++;
29595ff2 6580 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6581 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6582 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b9e186cd
BF
6583 "Scalar value %"SVf" better written as $%"SVf,
6584 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6585 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6586 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6587 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
a0d0e21e 6588 }
93a17b20
LW
6589 }
6590 }
463ee0b2 6591 }
60ac52eb
FC
6592 PL_expect = XOPERATOR;
6593 force_ident_maybe_lex('@');
79072805 6594 TERM('@');
378cc40b 6595
c963b151 6596 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6597 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6598 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6599 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6600 TOKEN(0);
6f33ba73
RGS
6601 s += 2;
6602 AOPERATOR(DORDOR);
6603 }
c963b151 6604 case '?': /* may either be conditional or pattern */
be25f609 6605 if (PL_expect == XOPERATOR) {
90771dc0 6606 char tmp = *s++;
c963b151 6607 if(tmp == '?') {
78cdf107
Z
6608 if (!PL_lex_allbrackets &&
6609 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6610 s--;
6611 TOKEN(0);
6612 }
6613 PL_lex_allbrackets++;
be25f609 6614 OPERATOR('?');
c963b151
BD
6615 }
6616 else {
6617 tmp = *s++;
6618 if(tmp == '/') {
6619 /* A // operator. */
78cdf107
Z
6620 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6621 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6622 LEX_FAKEEOF_LOGIC)) {
6623 s -= 2;
6624 TOKEN(0);
6625 }
c963b151
BD
6626 AOPERATOR(DORDOR);
6627 }
6628 else {
6629 s--;
78cdf107
Z
6630 if (*s == '=' && !PL_lex_allbrackets &&
6631 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6632 s--;
6633 TOKEN(0);
6634 }
c963b151
BD
6635 Mop(OP_DIVIDE);
6636 }
6637 }
6638 }
6639 else {
6640 /* Disable warning on "study /blah/" */
6641 if (PL_oldoldbufptr == PL_last_uni
6642 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6643 || memNE(PL_last_uni, "study", 5)
8a2bca12 6644 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
c963b151
BD
6645 ))
6646 check_uni();
725a61d7
Z
6647 if (*s == '?')
6648 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6649 s = scan_pat(s,OP_MATCH);
6650 TERM(sublex_start());
6651 }
378cc40b
LW
6652
6653 case '.':
51882d45
GS
6654 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6655#ifdef PERL_STRICT_CR
6656 && s[1] == '\n'
6657#else
6658 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6659#endif
6660 && (s == PL_linestart || s[-1] == '\n') )
6661 {
3280af22 6662 PL_expect = XSTATE;
705fe0e5 6663 formbrack = 2; /* dot seen where arguments expected */
79072805
LW
6664 goto rightbracket;
6665 }
be25f609 6666 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6667 s += 3;
6668 OPERATOR(YADAYADA);
6669 }
3280af22 6670 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6671 char tmp = *s++;
a687059c 6672 if (*s == tmp) {
78cdf107
Z
6673 if (!PL_lex_allbrackets &&
6674 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6675 s--;
6676 TOKEN(0);
6677 }
a687059c 6678 s++;
2f3197b3
LW
6679 if (*s == tmp) {
6680 s++;
6154021b 6681 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6682 }
6683 else
6154021b 6684 pl_yylval.ival = 0;
378cc40b 6685 OPERATOR(DOTDOT);
a687059c 6686 }
78cdf107
Z
6687 if (*s == '=' && !PL_lex_allbrackets &&
6688 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6689 s--;
6690 TOKEN(0);
6691 }
79072805 6692 Aop(OP_CONCAT);
378cc40b
LW
6693 }
6694 /* FALL THROUGH */
6695 case '0': case '1': case '2': case '3': case '4':
6696 case '5': case '6': case '7': case '8': case '9':
6154021b 6697 s = scan_num(s, &pl_yylval);
931e0695 6698 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6699 if (PL_expect == XOPERATOR)
8990e307 6700 no_op("Number",s);
79072805
LW
6701 TERM(THING);
6702
6703 case '\'':
4d68ffa0 6704 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6705 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6706 if (PL_expect == XOPERATOR) {
6707 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6708 return deprecate_commaless_var_list();
a0d0e21e 6709 }
463ee0b2 6710 else
8990e307 6711 no_op("String",s);
463ee0b2 6712 }
79072805 6713 if (!s)
d4c19fe8 6714 missingterm(NULL);
6154021b 6715 pl_yylval.ival = OP_CONST;
79072805
LW
6716 TERM(sublex_start());
6717
6718 case '"':
4d68ffa0 6719 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6720 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6721 if (PL_expect == XOPERATOR) {
6722 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6723 return deprecate_commaless_var_list();
a0d0e21e 6724 }
463ee0b2 6725 else
8990e307 6726 no_op("String",s);
463ee0b2 6727 }
79072805 6728 if (!s)
d4c19fe8 6729 missingterm(NULL);
6154021b 6730 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6731 /* FIXME. I think that this can be const if char *d is replaced by
6732 more localised variables. */
3280af22 6733 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6734 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6735 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6736 break;
6737 }
6738 }
79072805
LW
6739 TERM(sublex_start());
6740
6741 case '`':
4d68ffa0 6742 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6743 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6744 if (PL_expect == XOPERATOR)
8990e307 6745 no_op("Backticks",s);
79072805 6746 if (!s)
d4c19fe8 6747 missingterm(NULL);
9b201d7d 6748 readpipe_override();
79072805
LW
6749 TERM(sublex_start());
6750
6751 case '\\':
6752 s++;
a2a5de95
NC
6753 if (PL_lex_inwhat && isDIGIT(*s))
6754 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6755 *s, *s);
3280af22 6756 if (PL_expect == XOPERATOR)
8990e307 6757 no_op("Backslash",s);
79072805
LW
6758 OPERATOR(REFGEN);
6759
a7cb1f99 6760 case 'v':
e526c9e6 6761 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6762 char *start = s + 2;
dd629d5b 6763 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6764 start++;
6765 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6766 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6767 TERM(THING);
6768 }
e9d2327d
FC
6769 else if ((*start == ':' && start[1] == ':')
6770 || (PL_expect == XSTATE && *start == ':'))
6771 goto keylookup;
6772 else if (PL_expect == XSTATE) {
6773 d = start;
6774 while (d < PL_bufend && isSPACE(*d)) d++;
6775 if (*d == ':') goto keylookup;
6776 }
e526c9e6 6777 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
e9d2327d 6778 if (!isALPHA(*start) && (PL_expect == XTERM
6f33ba73
RGS
6779 || PL_expect == XREF || PL_expect == XSTATE
6780 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6781 GV *const gv = gv_fetchpvn_flags(s, start - s,
6782 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6783 if (!gv) {
6154021b 6784 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6785 TERM(THING);
6786 }
6787 }
a7cb1f99
GS
6788 }
6789 goto keylookup;
79072805 6790 case 'x':
3280af22 6791 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6792 s++;
6793 Mop(OP_REPEAT);
2f3197b3 6794 }
79072805
LW
6795 goto keylookup;
6796
378cc40b 6797 case '_':
79072805
LW
6798 case 'a': case 'A':
6799 case 'b': case 'B':
6800 case 'c': case 'C':
6801 case 'd': case 'D':
6802 case 'e': case 'E':
6803 case 'f': case 'F':
6804 case 'g': case 'G':
6805 case 'h': case 'H':
6806 case 'i': case 'I':
6807 case 'j': case 'J':
6808 case 'k': case 'K':
6809 case 'l': case 'L':
6810 case 'm': case 'M':
6811 case 'n': case 'N':
6812 case 'o': case 'O':
6813 case 'p': case 'P':
6814 case 'q': case 'Q':
6815 case 'r': case 'R':
6816 case 's': case 'S':
6817 case 't': case 'T':
6818 case 'u': case 'U':
a7cb1f99 6819 case 'V':
79072805
LW
6820 case 'w': case 'W':
6821 case 'X':
6822 case 'y': case 'Y':
6823 case 'z': case 'Z':
6824
49dc05e3 6825 keylookup: {
88e1f1a2 6826 bool anydelim;
18f70389 6827 bool lex;
90771dc0 6828 I32 tmp;
18f70389 6829 SV *sv;
73f3e228
FC
6830 CV *cv;
6831 PADOFFSET off;
6832 OP *rv2cv_op;
10edeb5d 6833
18f70389 6834 lex = FALSE;
10edeb5d 6835 orig_keyword = 0;
73f3e228 6836 off = 0;
18f70389 6837 sv = NULL;
73f3e228 6838 cv = NULL;
10edeb5d
JH
6839 gv = NULL;
6840 gvp = NULL;
73f3e228 6841 rv2cv_op = NULL;
49dc05e3 6842
3280af22
NIS
6843 PL_bufptr = s;
6844 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6845
6846 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6847 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6848
6849 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6850 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6851 goto just_a_word;
6852
3643fb5f 6853 d = s;
3280af22 6854 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6855 d++; /* no comments skipped here, or s### is misparsed */
6856
748a9306 6857 /* Is this a word before a => operator? */
1c3923b3 6858 if (*d == '=' && d[1] == '>') {
748a9306 6859 CLINE;
6154021b 6860 pl_yylval.opval
d0a148a6
NC
6861 = (OP*)newSVOP(OP_CONST, 0,
6862 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6863 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6864 TERM(WORD);
6865 }
6866
88e1f1a2
JV
6867 /* Check for plugged-in keyword */
6868 {
6869 OP *o;
6870 int result;
6871 char *saved_bufptr = PL_bufptr;
6872 PL_bufptr = s;
16c91539 6873 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6874 s = PL_bufptr;
6875 if (result == KEYWORD_PLUGIN_DECLINE) {
6876 /* not a plugged-in keyword */
6877 PL_bufptr = saved_bufptr;
6878 } else if (result == KEYWORD_PLUGIN_STMT) {
6879 pl_yylval.opval = o;
6880 CLINE;
6881 PL_expect = XSTATE;
6882 return REPORT(PLUGSTMT);
6883 } else if (result == KEYWORD_PLUGIN_EXPR) {
6884 pl_yylval.opval = o;
6885 CLINE;
6886 PL_expect = XOPERATOR;
6887 return REPORT(PLUGEXPR);
6888 } else {
6889 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6890 PL_tokenbuf);
6891 }
6892 }
6893
6894 /* Check for built-in keyword */
6895 tmp = keyword(PL_tokenbuf, len, 0);
6896
6897 /* Is this a label? */
6898 if (!anydelim && PL_expect == XSTATE
6899 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2 6900 s = d + 1;
5504e6cf
FC
6901 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6902 pl_yylval.pval[len] = '\0';
6903 pl_yylval.pval[len+1] = UTF ? 1 : 0;
88e1f1a2
JV
6904 CLINE;
6905 TOKEN(LABEL);
6906 }
6907
18f70389
FC
6908 /* Check for lexical sub */
6909 if (PL_expect != XOPERATOR) {
6910 char tmpbuf[sizeof PL_tokenbuf + 1];
18f70389
FC
6911 *tmpbuf = '&';
6912 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6913 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6914 if (off != NOT_IN_PAD) {
73f3e228 6915 assert(off); /* we assume this is boolean-true below */
18f70389
FC
6916 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6917 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6918 HEK * const stashname = HvNAME_HEK(stash);
6919 sv = newSVhek(stashname);
6920 sv_catpvs(sv, "::");
6921 sv_catpvn_flags(sv, PL_tokenbuf, len,
6922 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6923 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6924 SVt_PVCV);
73f3e228 6925 off = 0;
89e006ae
PM
6926 if (!gv) {
6927 sv_free(sv);
6928 sv = NULL;
6929 goto just_a_word;
6930 }
18f70389 6931 }
73f3e228
FC
6932 else {
6933 rv2cv_op = newOP(OP_PADANY, 0);
6934 rv2cv_op->op_targ = off;
6935 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6936 cv = (CV *)PAD_SV(off);
6937 }
6938 lex = TRUE;
6939 goto just_a_word;
18f70389 6940 }
73f3e228 6941 off = 0;
18f70389
FC
6942 }
6943
a0d0e21e 6944 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6945 GV *ogv = NULL; /* override (winner) */
6946 GV *hgv = NULL; /* hidden (loser) */
3280af22 6947 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6948 CV *cv;
af9f5953
BF
6949 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6950 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6951 (cv = GvCVu(gv)))
6952 {
6953 if (GvIMPORTED_CV(gv))
6954 ogv = gv;
6955 else if (! CvMETHOD(cv))
6956 hgv = gv;
6957 }
6958 if (!ogv &&
af9f5953 6959 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6960 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6961 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6962 GvCVu(gv) && GvIMPORTED_CV(gv))
6963 {
6964 ogv = gv;
6965 }
6966 }
6967 if (ogv) {
30fe34ed 6968 orig_keyword = tmp;
56f7f34b 6969 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6970 }
6971 else if (gv && !gvp
6972 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6973 && GvCVu(gv))
6e7b2336
GS
6974 {
6975 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6976 }
56f7f34b
CS
6977 else { /* no override */
6978 tmp = -tmp;
a2a5de95
NC
6979 if (tmp == KEY_dump) {
6980 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6981 "dump() better written as CORE::dump()");
ac206dc8 6982 }
a0714e2c 6983 gv = NULL;
56f7f34b 6984 gvp = 0;
a2a5de95
NC
6985 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6986 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6987 "Ambiguous call resolved as CORE::%s(), "
6988 "qualify as such or use &",
6989 GvENAME(hgv));
49dc05e3 6990 }
a0d0e21e
LW
6991 }
6992
6993 reserved_word:
6994 switch (tmp) {
79072805
LW
6995
6996 default: /* not a keyword */
0bfa2a8a
NC
6997 /* Trade off - by using this evil construction we can pull the
6998 variable gv into the block labelled keylookup. If not, then
6999 we have to give it function scope so that the goto from the
7000 earlier ':' case doesn't bypass the initialisation. */
7001 if (0) {
7002 just_a_word_zero_gv:
73f3e228
FC
7003 sv = NULL;
7004 cv = NULL;
0bfa2a8a
NC
7005 gv = NULL;
7006 gvp = NULL;
73f3e228 7007 rv2cv_op = NULL;
8bee0991 7008 orig_keyword = 0;
18f70389
FC
7009 lex = 0;
7010 off = 0;
0bfa2a8a 7011 }
93a17b20 7012 just_a_word: {
ce29ac45 7013 int pkgname = 0;
f54cb97a 7014 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
898c3bca
FC
7015 const char penultchar =
7016 lastchar && PL_bufptr - 2 >= PL_linestart
7017 ? PL_bufptr[-2]
7018 : 0;
5db06880 7019#ifdef PERL_MAD
cd81e915 7020 SV *nextPL_nextwhite = 0;
5db06880
NC
7021#endif
7022
8990e307
LW
7023
7024 /* Get the rest if it looks like a package qualifier */
7025
155aba94 7026 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 7027 STRLEN morelen;
3280af22 7028 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
7029 TRUE, &morelen);
7030 if (!morelen)
86fe3f36
BF
7031 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
7032 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7033 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
ec2ab091 7034 *s == '\'' ? "'" : "::");
c3e0f903 7035 len += morelen;
ce29ac45 7036 pkgname = 1;
a0d0e21e 7037 }
8990e307 7038
3280af22
NIS
7039 if (PL_expect == XOPERATOR) {
7040 if (PL_bufptr == PL_linestart) {
57843af0 7041 CopLINE_dec(PL_curcop);
f1f66076 7042 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 7043 CopLINE_inc(PL_curcop);
463ee0b2
LW
7044 }
7045 else
54310121 7046 no_op("Bareword",s);
463ee0b2 7047 }
8990e307 7048
c3e0f903 7049 /* Look for a subroutine with this name in current package,
73f3e228
FC
7050 unless this is a lexical sub, or name is "Foo::",
7051 in which case Foo is a bareword
c3e0f903
GS
7052 (and a package name). */
7053
5db06880 7054 if (len > 2 && !PL_madskills &&
3280af22 7055 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 7056 {
f776e3cd 7057 if (ckWARN(WARN_BAREWORD)
af9f5953 7058 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 7059 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
979a1401
BF
7060 "Bareword \"%"SVf"\" refers to nonexistent package",
7061 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7062 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
c3e0f903 7063 len -= 2;
3280af22 7064 PL_tokenbuf[len] = '\0';
a0714e2c 7065 gv = NULL;
c3e0f903
GS
7066 gvp = 0;
7067 }
7068 else {
73f3e228 7069 if (!lex && !gv) {
62d55b22
NC
7070 /* Mustn't actually add anything to a symbol table.
7071 But also don't want to "initialise" any placeholder
7072 constants that might already be there into full
7073 blown PVGVs with attached PVCV. */
90e5519e 7074 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
7075 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7076 SVt_PVCV);
62d55b22 7077 }
b3d904f3 7078 len = 0;
c3e0f903
GS
7079 }
7080
7081 /* if we saw a global override before, get the right name */
8990e307 7082
73f3e228 7083 if (!sv)
18f70389 7084 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
37bb7629 7085 len ? len : strlen(PL_tokenbuf));
49dc05e3 7086 if (gvp) {
37bb7629 7087 SV * const tmp_sv = sv;
396482e1 7088 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
7089 sv_catsv(sv, tmp_sv);
7090 SvREFCNT_dec(tmp_sv);
8a7a129d 7091 }
37bb7629 7092
5db06880 7093#ifdef PERL_MAD
cd81e915
NC
7094 if (PL_madskills && !PL_thistoken) {
7095 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 7096 PL_thistoken = newSVpvn(start,s - start);
cd81e915 7097 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
7098 }
7099#endif
8990e307 7100
a0d0e21e 7101 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 7102 CLINE;
6154021b
RGS
7103 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7104 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 7105
c3e0f903 7106 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
7107 if (len)
7108 goto safe_bareword;
7109
73f3e228 7110 if (!off)
f7461760 7111 {
d8ebba9f 7112 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
7113 const_op->op_private = OPpCONST_BARE;
7114 rv2cv_op = newCVREF(0, const_op);
73f3e228 7115 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
f7461760 7116 }
5069cc75 7117
8990e307
LW
7118 /* See if it's the indirect object for a list operator. */
7119
3280af22
NIS
7120 if (PL_oldoldbufptr &&
7121 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
7122 (PL_oldoldbufptr == PL_last_lop
7123 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 7124 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
7125 (PL_expect == XREF ||
7126 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 7127 {
748a9306
LW
7128 bool immediate_paren = *s == '(';
7129
a0d0e21e 7130 /* (Now we can afford to cross potential line boundary.) */
cd81e915 7131 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 7132#ifdef PERL_MAD
cd81e915 7133 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 7134#endif
a0d0e21e
LW
7135
7136 /* Two barewords in a row may indicate method call. */
7137
62d55b22 7138 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
7139 (tmp = intuit_method(s, gv, cv))) {
7140 op_free(rv2cv_op);
78cdf107
Z
7141 if (tmp == METHOD && !PL_lex_allbrackets &&
7142 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7143 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7144 return REPORT(tmp);
f7461760 7145 }
a0d0e21e
LW
7146
7147 /* If not a declared subroutine, it's an indirect object. */
7148 /* (But it's an indir obj regardless for sort.) */
7294df96 7149 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 7150
7294df96
RGS
7151 if (
7152 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 7153 (!cv &&
a9ef352a 7154 (PL_last_lop_op != OP_MAPSTART &&
f0670693 7155 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
7156 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7157 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7158 )
a9ef352a 7159 {
3280af22 7160 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 7161 goto bareword;
93a17b20
LW
7162 }
7163 }
8990e307 7164
3280af22 7165 PL_expect = XOPERATOR;
5db06880
NC
7166#ifdef PERL_MAD
7167 if (isSPACE(*s))
cd81e915
NC
7168 s = SKIPSPACE2(s,nextPL_nextwhite);
7169 PL_nextwhite = nextPL_nextwhite;
5db06880 7170#else
8990e307 7171 s = skipspace(s);
5db06880 7172#endif
1c3923b3
GS
7173
7174 /* Is this a word before a => operator? */
ce29ac45 7175 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 7176 op_free(rv2cv_op);
1c3923b3 7177 CLINE;
6154021b 7178 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 7179 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 7180 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
7181 TERM(WORD);
7182 }
7183
7184 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 7185 if (*s == '(') {
79072805 7186 CLINE;
5069cc75 7187 if (cv) {
c35e046a
AL
7188 d = s + 1;
7189 while (SPACE_OR_TAB(*d))
7190 d++;
f7461760 7191 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 7192 s = d + 1;
c631f32b 7193 goto its_constant;
96e4d5b1 7194 }
7195 }
5db06880
NC
7196#ifdef PERL_MAD
7197 if (PL_madskills) {
cd81e915
NC
7198 PL_nextwhite = PL_thiswhite;
7199 PL_thiswhite = 0;
5db06880 7200 }
cd81e915 7201 start_force(PL_curforce);
5db06880 7202#endif
73f3e228
FC
7203 NEXTVAL_NEXTTOKE.opval =
7204 off ? rv2cv_op : pl_yylval.opval;
3280af22 7205 PL_expect = XOPERATOR;
5db06880
NC
7206#ifdef PERL_MAD
7207 if (PL_madskills) {
cd81e915
NC
7208 PL_nextwhite = nextPL_nextwhite;
7209 curmad('X', PL_thistoken);
6b29d1f5 7210 PL_thistoken = newSVpvs("");
5db06880
NC
7211 }
7212#endif
73f3e228
FC
7213 if (off)
7214 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7215 else op_free(rv2cv_op), force_next(WORD);
6154021b 7216 pl_yylval.ival = 0;
463ee0b2 7217 TOKEN('&');
79072805 7218 }
93a17b20 7219
a0d0e21e 7220 /* If followed by var or block, call it a method (unless sub) */
8990e307 7221
f7461760
Z
7222 if ((*s == '$' || *s == '{') && !cv) {
7223 op_free(rv2cv_op);
3280af22
NIS
7224 PL_last_lop = PL_oldbufptr;
7225 PL_last_lop_op = OP_METHOD;
78cdf107
Z
7226 if (!PL_lex_allbrackets &&
7227 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7228 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 7229 PREBLOCK(METHOD);
463ee0b2
LW
7230 }
7231
8990e307
LW
7232 /* If followed by a bareword, see if it looks like indir obj. */
7233
30fe34ed
RGS
7234 if (!orig_keyword
7235 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
7236 && (tmp = intuit_method(s, gv, cv))) {
7237 op_free(rv2cv_op);
78cdf107
Z
7238 if (tmp == METHOD && !PL_lex_allbrackets &&
7239 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7240 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7241 return REPORT(tmp);
f7461760 7242 }
93a17b20 7243
8990e307
LW
7244 /* Not a method, so call it a subroutine (if defined) */
7245
5069cc75 7246 if (cv) {
898c3bca 7247 if (lastchar == '-' && penultchar != '-') {
43b5ab4c
BF
7248 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7249 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7250 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7251 SVfARG(tmpsv), SVfARG(tmpsv));
7252 }
89bfa8cd 7253 /* Check for a constant sub */
f7461760 7254 if ((sv = cv_const_sv(cv))) {
96e4d5b1 7255 its_constant:
f7461760 7256 op_free(rv2cv_op);
6154021b
RGS
7257 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7258 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
cc2ebcd7 7259 pl_yylval.opval->op_private = OPpCONST_FOLDED;
6b7c6d95 7260 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 7261 TOKEN(WORD);
89bfa8cd 7262 }
7263
6154021b 7264 op_free(pl_yylval.opval);
f7461760 7265 pl_yylval.opval = rv2cv_op;
6154021b 7266 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 7267 PL_last_lop = PL_oldbufptr;
bf848113 7268 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 7269 /* Is there a prototype? */
5db06880
NC
7270 if (
7271#ifdef PERL_MAD
7272 cv &&
7273#endif
d9f2850e
RGS
7274 SvPOK(cv))
7275 {
8fa6a409
FC
7276 STRLEN protolen = CvPROTOLEN(cv);
7277 const char *proto = CvPROTO(cv);
b5fb7ce3 7278 bool optional;
5f66b61c 7279 if (!protolen)
4633a7c4 7280 TERM(FUNC0SUB);
b5fb7ce3
FC
7281 if ((optional = *proto == ';'))
7282 do
0f5d0394 7283 proto++;
b5fb7ce3 7284 while (*proto == ';');
649d02de
FC
7285 if (
7286 (
7287 (
7288 *proto == '$' || *proto == '_'
c035a075 7289 || *proto == '*' || *proto == '+'
649d02de
FC
7290 )
7291 && proto[1] == '\0'
7292 )
7293 || (
7294 *proto == '\\' && proto[1] && proto[2] == '\0'
7295 )
7296 )
b5fb7ce3 7297 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
7298 if (*proto == '\\' && proto[1] == '[') {
7299 const char *p = proto + 2;
7300 while(*p && *p != ']')
7301 ++p;
b5fb7ce3
FC
7302 if(*p == ']' && !p[1])
7303 UNIPROTO(UNIOPSUB,optional);
649d02de 7304 }
7a52d87a 7305 if (*proto == '&' && *s == '{') {
49a54bbe
NC
7306 if (PL_curstash)
7307 sv_setpvs(PL_subname, "__ANON__");
7308 else
7309 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
7310 if (!PL_lex_allbrackets &&
7311 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7312 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
7313 PREBLOCK(LSTOPSUB);
7314 }
a9ef352a 7315 }
5db06880
NC
7316#ifdef PERL_MAD
7317 {
7318 if (PL_madskills) {
cd81e915
NC
7319 PL_nextwhite = PL_thiswhite;
7320 PL_thiswhite = 0;
5db06880 7321 }
cd81e915 7322 start_force(PL_curforce);
6154021b 7323 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
7324 PL_expect = XTERM;
7325 if (PL_madskills) {
cd81e915
NC
7326 PL_nextwhite = nextPL_nextwhite;
7327 curmad('X', PL_thistoken);
6b29d1f5 7328 PL_thistoken = newSVpvs("");
5db06880 7329 }
73f3e228 7330 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7331 if (!PL_lex_allbrackets &&
7332 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7333 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7334 TOKEN(NOAMP);
7335 }
7336 }
7337
7338 /* Guess harder when madskills require "best effort". */
7339 if (PL_madskills && (!gv || !GvCVu(gv))) {
7340 int probable_sub = 0;
7341 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7342 probable_sub = 1;
7343 else if (isALPHA(*s)) {
7344 char tmpbuf[1024];
7345 STRLEN tmplen;
7346 d = s;
7347 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 7348 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
7349 probable_sub = 1;
7350 else {
7351 while (d < PL_bufend && isSPACE(*d))
7352 d++;
7353 if (*d == '=' && d[1] == '>')
7354 probable_sub = 1;
7355 }
7356 }
7357 if (probable_sub) {
af9f5953
BF
7358 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7359 SVt_PVCV);
6154021b 7360 op_free(pl_yylval.opval);
f7461760 7361 pl_yylval.opval = rv2cv_op;
6154021b 7362 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
7363 PL_last_lop = PL_oldbufptr;
7364 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
7365 PL_nextwhite = PL_thiswhite;
7366 PL_thiswhite = 0;
7367 start_force(PL_curforce);
6154021b 7368 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 7369 PL_expect = XTERM;
cd81e915
NC
7370 PL_nextwhite = nextPL_nextwhite;
7371 curmad('X', PL_thistoken);
6b29d1f5 7372 PL_thistoken = newSVpvs("");
73f3e228 7373 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7374 if (!PL_lex_allbrackets &&
7375 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7376 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7377 TOKEN(NOAMP);
7378 }
7379#else
6154021b 7380 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 7381 PL_expect = XTERM;
73f3e228 7382 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7383 if (!PL_lex_allbrackets &&
7384 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7385 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 7386 TOKEN(NOAMP);
5db06880 7387#endif
8990e307 7388 }
748a9306 7389
8990e307
LW
7390 /* Call it a bare word */
7391
5603f27d 7392 if (PL_hints & HINT_STRICT_SUBS)
6154021b 7393 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 7394 else {
9a073a1d
RGS
7395 bareword:
7396 /* after "print" and similar functions (corresponding to
7397 * "F? L" in opcode.pl), whatever wasn't already parsed as
7398 * a filehandle should be subject to "strict subs".
7399 * Likewise for the optional indirect-object argument to system
7400 * or exec, which can't be a bareword */
7401 if ((PL_last_lop_op == OP_PRINT
7402 || PL_last_lop_op == OP_PRTF
7403 || PL_last_lop_op == OP_SAY
7404 || PL_last_lop_op == OP_SYSTEM
7405 || PL_last_lop_op == OP_EXEC)
7406 && (PL_hints & HINT_STRICT_SUBS))
7407 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7408 if (lastchar != '-') {
7409 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7410 d = PL_tokenbuf;
7411 while (isLOWER(*d))
7412 d++;
af9f5953 7413 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7414 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7415 PL_tokenbuf);
7416 }
748a9306
LW
7417 }
7418 }
f7461760 7419 op_free(rv2cv_op);
c3e0f903
GS
7420
7421 safe_bareword:
9b387841
NC
7422 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7423 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
02571fe8
BF
7424 "Operator or semicolon missing before %c%"SVf,
7425 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7426 strlen(PL_tokenbuf),
7427 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
9b387841
NC
7428 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7429 "Ambiguous use of %c resolved as operator %c",
7430 lastchar, lastchar);
748a9306 7431 }
93a17b20 7432 TOKEN(WORD);
79072805 7433 }
79072805 7434
68dc0745 7435 case KEY___FILE__:
7eb971ee 7436 FUN0OP(
14f0f125 7437 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7438 );
46fc3d4c 7439
79072805 7440 case KEY___LINE__:
7eb971ee
FC
7441 FUN0OP(
7442 (OP*)newSVOP(OP_CONST, 0,
7443 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7444 );
68dc0745 7445
7446 case KEY___PACKAGE__:
7eb971ee
FC
7447 FUN0OP(
7448 (OP*)newSVOP(OP_CONST, 0,
3280af22 7449 (PL_curstash
5aaec2b4 7450 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7451 : &PL_sv_undef))
7452 );
79072805 7453
e50aee73 7454 case KEY___DATA__:
79072805
LW
7455 case KEY___END__: {
7456 GV *gv;
3280af22 7457 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7458 const char *pname = "main";
affc13fc
FC
7459 STRLEN plen = 4;
7460 U32 putf8 = 0;
3280af22 7461 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7462 {
7463 HV * const stash =
7464 PL_curstash ? PL_curstash : PL_defstash;
7465 pname = HvNAME_get(stash);
7466 plen = HvNAMELEN (stash);
7467 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7468 }
7469 gv = gv_fetchpvn_flags(
7470 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7471 plen+6, GV_ADD|putf8, SVt_PVIO
7472 );
a5f75d66 7473 GvMULTI_on(gv);
79072805 7474 if (!GvIO(gv))
a0d0e21e 7475 GvIOp(gv) = newIO();
3280af22 7476 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7477#if defined(HAS_FCNTL) && defined(F_SETFD)
7478 {
f54cb97a 7479 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7480 fcntl(fd,F_SETFD,fd >= 3);
7481 }
79072805 7482#endif
fd049845 7483 /* Mark this internal pseudo-handle as clean */
7484 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7485 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7486 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7487 else
50952442 7488 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7489#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7490 /* if the script was opened in binmode, we need to revert
53129d29 7491 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7492 * XXX this is a questionable hack at best. */
53129d29
GS
7493 if (PL_bufend-PL_bufptr > 2
7494 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7495 {
7496 Off_t loc = 0;
50952442 7497 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7498 loc = PerlIO_tell(PL_rsfp);
7499 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7500 }
2986a63f
JH
7501#ifdef NETWARE
7502 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7503#else
c39cd008 7504 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7505#endif /* NETWARE */
c39cd008
GS
7506 if (loc > 0)
7507 PerlIO_seek(PL_rsfp, loc, 0);
7508 }
7509 }
7510#endif
7948272d 7511#ifdef PERLIO_LAYERS
52d2e0f4
JH
7512 if (!IN_BYTES) {
7513 if (UTF)
7514 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7515 else if (PL_encoding) {
7516 SV *name;
7517 dSP;
7518 ENTER;
7519 SAVETMPS;
7520 PUSHMARK(sp);
7521 EXTEND(SP, 1);
7522 XPUSHs(PL_encoding);
7523 PUTBACK;
7524 call_method("name", G_SCALAR);
7525 SPAGAIN;
7526 name = POPs;
7527 PUTBACK;
bfed75c6 7528 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7529 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7530 SVfARG(name)));
52d2e0f4
JH
7531 FREETMPS;
7532 LEAVE;
7533 }
7534 }
7948272d 7535#endif
5db06880
NC
7536#ifdef PERL_MAD
7537 if (PL_madskills) {
cd81e915
NC
7538 if (PL_realtokenstart >= 0) {
7539 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7540 if (!PL_endwhite)
6b29d1f5 7541 PL_endwhite = newSVpvs("");
cd81e915
NC
7542 sv_catsv(PL_endwhite, PL_thiswhite);
7543 PL_thiswhite = 0;
7544 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7545 PL_realtokenstart = -1;
5db06880 7546 }
5cc814fd
NC
7547 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7548 != NULL) ;
5db06880
NC
7549 }
7550#endif
4608196e 7551 PL_rsfp = NULL;
79072805
LW
7552 }
7553 goto fake_eof;
e929a76b 7554 }
de3bb511 7555
84ed0108 7556 case KEY___SUB__:
1a35f9ff 7557 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7558
8990e307 7559 case KEY_AUTOLOAD:
ed6116ce 7560 case KEY_DESTROY:
79072805 7561 case KEY_BEGIN:
3c10abe3 7562 case KEY_UNITCHECK:
7d30b5c4 7563 case KEY_CHECK:
7d07dbc2 7564 case KEY_INIT:
7d30b5c4 7565 case KEY_END:
3280af22
NIS
7566 if (PL_expect == XSTATE) {
7567 s = PL_bufptr;
93a17b20 7568 goto really_sub;
79072805
LW
7569 }
7570 goto just_a_word;
7571
a0d0e21e
LW
7572 case KEY_CORE:
7573 if (*s == ':' && s[1] == ':') {
ee36fb64 7574 STRLEN olen = len;
748a9306 7575 d = s;
ee36fb64 7576 s += 2;
3280af22 7577 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
ee36fb64
FC
7578 if ((*s == ':' && s[1] == ':')
7579 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7580 {
7581 s = d;
7582 len = olen;
7583 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7584 goto just_a_word;
7585 }
7586 if (!tmp)
3773592b
BF
7587 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7588 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7589 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
a0d0e21e
LW
7590 if (tmp < 0)
7591 tmp = -tmp;
d67594ff
FC
7592 else if (tmp == KEY_require || tmp == KEY_do
7593 || tmp == KEY_glob)
a72a1c8b 7594 /* that's a way to remember we saw "CORE::" */
850e8516 7595 orig_keyword = tmp;
a0d0e21e
LW
7596 goto reserved_word;
7597 }
7598 goto just_a_word;
7599
463ee0b2
LW
7600 case KEY_abs:
7601 UNI(OP_ABS);
7602
79072805
LW
7603 case KEY_alarm:
7604 UNI(OP_ALARM);
7605
7606 case KEY_accept:
a0d0e21e 7607 LOP(OP_ACCEPT,XTERM);
79072805 7608
463ee0b2 7609 case KEY_and:
78cdf107
Z
7610 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7611 return REPORT(0);
463ee0b2
LW
7612 OPERATOR(ANDOP);
7613
79072805 7614 case KEY_atan2:
a0d0e21e 7615 LOP(OP_ATAN2,XTERM);
85e6fe83 7616
79072805 7617 case KEY_bind:
a0d0e21e 7618 LOP(OP_BIND,XTERM);
79072805
LW
7619
7620 case KEY_binmode:
1c1fc3ea 7621 LOP(OP_BINMODE,XTERM);
79072805
LW
7622
7623 case KEY_bless:
a0d0e21e 7624 LOP(OP_BLESS,XTERM);
79072805 7625
0d863452
RH
7626 case KEY_break:
7627 FUN0(OP_BREAK);
7628
79072805
LW
7629 case KEY_chop:
7630 UNI(OP_CHOP);
7631
7632 case KEY_continue:
0d863452
RH
7633 /* We have to disambiguate the two senses of
7634 "continue". If the next token is a '{' then
7635 treat it as the start of a continue block;
7636 otherwise treat it as a control operator.
7637 */
7638 s = skipspace(s);
7639 if (*s == '{')
79072805 7640 PREBLOCK(CONTINUE);
0d863452
RH
7641 else
7642 FUN0(OP_CONTINUE);
79072805
LW
7643
7644 case KEY_chdir:
fafc274c
NC
7645 /* may use HOME */
7646 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7647 UNI(OP_CHDIR);
7648
7649 case KEY_close:
7650 UNI(OP_CLOSE);
7651
7652 case KEY_closedir:
7653 UNI(OP_CLOSEDIR);
7654
7655 case KEY_cmp:
78cdf107
Z
7656 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7657 return REPORT(0);
79072805
LW
7658 Eop(OP_SCMP);
7659
7660 case KEY_caller:
7661 UNI(OP_CALLER);
7662
7663 case KEY_crypt:
7664#ifdef FCRYPT
f4c556ac
GS
7665 if (!PL_cryptseen) {
7666 PL_cryptseen = TRUE;
de3bb511 7667 init_des();
f4c556ac 7668 }
a687059c 7669#endif
a0d0e21e 7670 LOP(OP_CRYPT,XTERM);
79072805
LW
7671
7672 case KEY_chmod:
a0d0e21e 7673 LOP(OP_CHMOD,XTERM);
79072805
LW
7674
7675 case KEY_chown:
a0d0e21e 7676 LOP(OP_CHOWN,XTERM);
79072805
LW
7677
7678 case KEY_connect:
a0d0e21e 7679 LOP(OP_CONNECT,XTERM);
79072805 7680
463ee0b2
LW
7681 case KEY_chr:
7682 UNI(OP_CHR);
7683
79072805
LW
7684 case KEY_cos:
7685 UNI(OP_COS);
7686
7687 case KEY_chroot:
7688 UNI(OP_CHROOT);
7689
0d863452
RH
7690 case KEY_default:
7691 PREBLOCK(DEFAULT);
7692
79072805 7693 case KEY_do:
29595ff2 7694 s = SKIPSPACE1(s);
79072805 7695 if (*s == '{')
a0d0e21e 7696 PRETERMBLOCK(DO);
c2900bb8 7697 if (*s != '\'') {
4b473a5a
FC
7698 *PL_tokenbuf = '&';
7699 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7700 1, &len);
7701 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
c2900bb8 7702 d = SKIPSPACE1(d);
4b473a5a 7703 if (*d == '(') {
60ac52eb 7704 force_ident_maybe_lex('&');
4b473a5a
FC
7705 s = d;
7706 }
c2900bb8
FC
7707 }
7708 }
850e8516
RGS
7709 if (orig_keyword == KEY_do) {
7710 orig_keyword = 0;
6154021b 7711 pl_yylval.ival = 1;
850e8516
RGS
7712 }
7713 else
6154021b 7714 pl_yylval.ival = 0;
378cc40b 7715 OPERATOR(DO);
79072805
LW
7716
7717 case KEY_die:
3280af22 7718 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7719 LOP(OP_DIE,XTERM);
79072805
LW
7720
7721 case KEY_defined:
7722 UNI(OP_DEFINED);
7723
7724 case KEY_delete:
a0d0e21e 7725 UNI(OP_DELETE);
79072805
LW
7726
7727 case KEY_dbmopen:
74e8ce34
NC
7728 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7729 STR_WITH_LEN("NDBM_File::"),
7730 STR_WITH_LEN("DB_File::"),
7731 STR_WITH_LEN("GDBM_File::"),
7732 STR_WITH_LEN("SDBM_File::"),
7733 STR_WITH_LEN("ODBM_File::"),
7734 NULL);
a0d0e21e 7735 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7736
7737 case KEY_dbmclose:
7738 UNI(OP_DBMCLOSE);
7739
7740 case KEY_dump:
c31f6d3b 7741 PL_expect = XOPERATOR;
a0d0e21e 7742 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7743 LOOPX(OP_DUMP);
7744
7745 case KEY_else:
7746 PREBLOCK(ELSE);
7747
7748 case KEY_elsif:
6154021b 7749 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7750 OPERATOR(ELSIF);
7751
7752 case KEY_eq:
78cdf107
Z
7753 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7754 return REPORT(0);
79072805
LW
7755 Eop(OP_SEQ);
7756
a0d0e21e
LW
7757 case KEY_exists:
7758 UNI(OP_EXISTS);
4e553d73 7759
79072805 7760 case KEY_exit:
5db06880
NC
7761 if (PL_madskills)
7762 UNI(OP_INT);
79072805
LW
7763 UNI(OP_EXIT);
7764
7765 case KEY_eval:
29595ff2 7766 s = SKIPSPACE1(s);
32e2a35d
RGS
7767 if (*s == '{') { /* block eval */
7768 PL_expect = XTERMBLOCK;
7769 UNIBRACK(OP_ENTERTRY);
7770 }
7771 else { /* string eval */
7772 PL_expect = XTERM;
7773 UNIBRACK(OP_ENTEREVAL);
7774 }
79072805 7775
7d789282
FC
7776 case KEY_evalbytes:
7777 PL_expect = XTERM;
7778 UNIBRACK(-OP_ENTEREVAL);
7779
79072805
LW
7780 case KEY_eof:
7781 UNI(OP_EOF);
7782
7783 case KEY_exp:
7784 UNI(OP_EXP);
7785
7786 case KEY_each:
7787 UNI(OP_EACH);
7788
7789 case KEY_exec:
a0d0e21e 7790 LOP(OP_EXEC,XREF);
79072805
LW
7791
7792 case KEY_endhostent:
7793 FUN0(OP_EHOSTENT);
7794
7795 case KEY_endnetent:
7796 FUN0(OP_ENETENT);
7797
7798 case KEY_endservent:
7799 FUN0(OP_ESERVENT);
7800
7801 case KEY_endprotoent:
7802 FUN0(OP_EPROTOENT);
7803
7804 case KEY_endpwent:
7805 FUN0(OP_EPWENT);
7806
7807 case KEY_endgrent:
7808 FUN0(OP_EGRENT);
7809
7810 case KEY_for:
7811 case KEY_foreach:
78cdf107
Z
7812 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7813 return REPORT(0);
6154021b 7814 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7815 s = SKIPSPACE1(s);
7e2040f0 7816 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7817 char *p = s;
5db06880
NC
7818#ifdef PERL_MAD
7819 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7820#endif
7821
3280af22 7822 if ((PL_bufend - p) >= 3 &&
55497cff 7823 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7824 p += 2;
77ca0c92
LW
7825 else if ((PL_bufend - p) >= 4 &&
7826 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7827 p += 3;
29595ff2 7828 p = PEEKSPACE(p);
7e2040f0 7829 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7830 p = scan_ident(p, PL_bufend,
7831 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7832 p = PEEKSPACE(p);
77ca0c92
LW
7833 }
7834 if (*p != '$')
cea2e8a9 7835 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7836#ifdef PERL_MAD
7837 s = SvPVX(PL_linestr) + soff;
7838#endif
55497cff 7839 }
79072805
LW
7840 OPERATOR(FOR);
7841
7842 case KEY_formline:
a0d0e21e 7843 LOP(OP_FORMLINE,XTERM);
79072805
LW
7844
7845 case KEY_fork:
7846 FUN0(OP_FORK);
7847
838f2281
BF
7848 case KEY_fc:
7849 UNI(OP_FC);
7850
79072805 7851 case KEY_fcntl:
a0d0e21e 7852 LOP(OP_FCNTL,XTERM);
79072805
LW
7853
7854 case KEY_fileno:
7855 UNI(OP_FILENO);
7856
7857 case KEY_flock:
a0d0e21e 7858 LOP(OP_FLOCK,XTERM);
79072805
LW
7859
7860 case KEY_gt:
78cdf107
Z
7861 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7862 return REPORT(0);
79072805
LW
7863 Rop(OP_SGT);
7864
7865 case KEY_ge:
78cdf107
Z
7866 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7867 return REPORT(0);
79072805
LW
7868 Rop(OP_SGE);
7869
7870 case KEY_grep:
2c38e13d 7871 LOP(OP_GREPSTART, XREF);
79072805
LW
7872
7873 case KEY_goto:
c31f6d3b 7874 PL_expect = XOPERATOR;
a0d0e21e 7875 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7876 LOOPX(OP_GOTO);
7877
7878 case KEY_gmtime:
7879 UNI(OP_GMTIME);
7880
7881 case KEY_getc:
6f33ba73 7882 UNIDOR(OP_GETC);
79072805
LW
7883
7884 case KEY_getppid:
7885 FUN0(OP_GETPPID);
7886
7887 case KEY_getpgrp:
7888 UNI(OP_GETPGRP);
7889
7890 case KEY_getpriority:
a0d0e21e 7891 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7892
7893 case KEY_getprotobyname:
7894 UNI(OP_GPBYNAME);
7895
7896 case KEY_getprotobynumber:
a0d0e21e 7897 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7898
7899 case KEY_getprotoent:
7900 FUN0(OP_GPROTOENT);
7901
7902 case KEY_getpwent:
7903 FUN0(OP_GPWENT);
7904
7905 case KEY_getpwnam:
ff68c719 7906 UNI(OP_GPWNAM);
79072805
LW
7907
7908 case KEY_getpwuid:
ff68c719 7909 UNI(OP_GPWUID);
79072805
LW
7910
7911 case KEY_getpeername:
7912 UNI(OP_GETPEERNAME);
7913
7914 case KEY_gethostbyname:
7915 UNI(OP_GHBYNAME);
7916
7917 case KEY_gethostbyaddr:
a0d0e21e 7918 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7919
7920 case KEY_gethostent:
7921 FUN0(OP_GHOSTENT);
7922
7923 case KEY_getnetbyname:
7924 UNI(OP_GNBYNAME);
7925
7926 case KEY_getnetbyaddr:
a0d0e21e 7927 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7928
7929 case KEY_getnetent:
7930 FUN0(OP_GNETENT);
7931
7932 case KEY_getservbyname:
a0d0e21e 7933 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7934
7935 case KEY_getservbyport:
a0d0e21e 7936 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7937
7938 case KEY_getservent:
7939 FUN0(OP_GSERVENT);
7940
7941 case KEY_getsockname:
7942 UNI(OP_GETSOCKNAME);
7943
7944 case KEY_getsockopt:
a0d0e21e 7945 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7946
7947 case KEY_getgrent:
7948 FUN0(OP_GGRENT);
7949
7950 case KEY_getgrnam:
ff68c719 7951 UNI(OP_GGRNAM);
79072805
LW
7952
7953 case KEY_getgrgid:
ff68c719 7954 UNI(OP_GGRGID);
79072805
LW
7955
7956 case KEY_getlogin:
7957 FUN0(OP_GETLOGIN);
7958
0d863452 7959 case KEY_given:
6154021b 7960 pl_yylval.ival = CopLINE(PL_curcop);
0f539b13
BF
7961 Perl_ck_warner_d(aTHX_
7962 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
7963 "given is experimental");
0d863452
RH
7964 OPERATOR(GIVEN);
7965
93a17b20 7966 case KEY_glob:
d67594ff
FC
7967 LOP(
7968 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7969 XTERM
7970 );
93a17b20 7971
79072805
LW
7972 case KEY_hex:
7973 UNI(OP_HEX);
7974
7975 case KEY_if:
78cdf107
Z
7976 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7977 return REPORT(0);
6154021b 7978 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7979 OPERATOR(IF);
7980
7981 case KEY_index:
a0d0e21e 7982 LOP(OP_INDEX,XTERM);
79072805
LW
7983
7984 case KEY_int:
7985 UNI(OP_INT);
7986
7987 case KEY_ioctl:
a0d0e21e 7988 LOP(OP_IOCTL,XTERM);
79072805
LW
7989
7990 case KEY_join:
a0d0e21e 7991 LOP(OP_JOIN,XTERM);
79072805
LW
7992
7993 case KEY_keys:
7994 UNI(OP_KEYS);
7995
7996 case KEY_kill:
a0d0e21e 7997 LOP(OP_KILL,XTERM);
79072805
LW
7998
7999 case KEY_last:
c31f6d3b 8000 PL_expect = XOPERATOR;
a0d0e21e 8001 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 8002 LOOPX(OP_LAST);
4e553d73 8003
79072805
LW
8004 case KEY_lc:
8005 UNI(OP_LC);
8006
8007 case KEY_lcfirst:
8008 UNI(OP_LCFIRST);
8009
8010 case KEY_local:
6154021b 8011 pl_yylval.ival = 0;
79072805
LW
8012 OPERATOR(LOCAL);
8013
8014 case KEY_length:
8015 UNI(OP_LENGTH);
8016
8017 case KEY_lt:
78cdf107
Z
8018 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8019 return REPORT(0);
79072805
LW
8020 Rop(OP_SLT);
8021
8022 case KEY_le:
78cdf107
Z
8023 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8024 return REPORT(0);
79072805
LW
8025 Rop(OP_SLE);
8026
8027 case KEY_localtime:
8028 UNI(OP_LOCALTIME);
8029
8030 case KEY_log:
8031 UNI(OP_LOG);
8032
8033 case KEY_link:
a0d0e21e 8034 LOP(OP_LINK,XTERM);
79072805
LW
8035
8036 case KEY_listen:
a0d0e21e 8037 LOP(OP_LISTEN,XTERM);
79072805 8038
c0329465
MB
8039 case KEY_lock:
8040 UNI(OP_LOCK);
8041
79072805
LW
8042 case KEY_lstat:
8043 UNI(OP_LSTAT);
8044
8045 case KEY_m:
8782bef2 8046 s = scan_pat(s,OP_MATCH);
79072805
LW
8047 TERM(sublex_start());
8048
a0d0e21e 8049 case KEY_map:
2c38e13d 8050 LOP(OP_MAPSTART, XREF);
4e4e412b 8051
79072805 8052 case KEY_mkdir:
a0d0e21e 8053 LOP(OP_MKDIR,XTERM);
79072805
LW
8054
8055 case KEY_msgctl:
a0d0e21e 8056 LOP(OP_MSGCTL,XTERM);
79072805
LW
8057
8058 case KEY_msgget:
a0d0e21e 8059 LOP(OP_MSGGET,XTERM);
79072805
LW
8060
8061 case KEY_msgrcv:
a0d0e21e 8062 LOP(OP_MSGRCV,XTERM);
79072805
LW
8063
8064 case KEY_msgsnd:
a0d0e21e 8065 LOP(OP_MSGSND,XTERM);
79072805 8066
77ca0c92 8067 case KEY_our:
93a17b20 8068 case KEY_my:
952306ac 8069 case KEY_state:
eac04b2e 8070 PL_in_my = (U16)tmp;
29595ff2 8071 s = SKIPSPACE1(s);
7e2040f0 8072 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
8073#ifdef PERL_MAD
8074 char* start = s;
8075#endif
3280af22 8076 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843 8077 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
e7d0b801
FC
8078 {
8079 if (!FEATURE_LEXSUBS_IS_ENABLED)
8080 Perl_croak(aTHX_
8081 "Experimental \"%s\" subs not enabled",
8082 tmp == KEY_my ? "my" :
8083 tmp == KEY_state ? "state" : "our");
64fbf0dd
FC
8084 Perl_ck_warner_d(aTHX_
8085 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8086 "The lexical_subs feature is experimental");
09bef843 8087 goto really_sub;
e7d0b801 8088 }
def3634b 8089 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 8090 if (!PL_in_my_stash) {
c750a3ec 8091 char tmpbuf[1024];
3280af22 8092 PL_bufptr = s;
d9fad198 8093 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
3c54b17a 8094 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
c750a3ec 8095 }
5db06880
NC
8096#ifdef PERL_MAD
8097 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
8098 sv_catsv(PL_thistoken, PL_nextwhite);
8099 PL_nextwhite = 0;
8100 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
8101 }
8102#endif
c750a3ec 8103 }
6154021b 8104 pl_yylval.ival = 1;
55497cff 8105 OPERATOR(MY);
93a17b20 8106
79072805 8107 case KEY_next:
c31f6d3b 8108 PL_expect = XOPERATOR;
a0d0e21e 8109 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8110 LOOPX(OP_NEXT);
8111
8112 case KEY_ne:
78cdf107
Z
8113 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8114 return REPORT(0);
79072805
LW
8115 Eop(OP_SNE);
8116
a0d0e21e 8117 case KEY_no:
468aa647 8118 s = tokenize_use(0, s);
52d0e95b 8119 TERM(USE);
a0d0e21e
LW
8120
8121 case KEY_not:
29595ff2 8122 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 8123 FUN1(OP_NOT);
78cdf107
Z
8124 else {
8125 if (!PL_lex_allbrackets &&
8126 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8127 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 8128 OPERATOR(NOTOP);
78cdf107 8129 }
a0d0e21e 8130
79072805 8131 case KEY_open:
29595ff2 8132 s = SKIPSPACE1(s);
7e2040f0 8133 if (isIDFIRST_lazy_if(s,UTF)) {
8ea4c679
BF
8134 const char *t;
8135 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8136 &len);
c35e046a
AL
8137 for (t=d; isSPACE(*t);)
8138 t++;
e2ab214b 8139 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
8140 /* [perl #16184] */
8141 && !(t[0] == '=' && t[1] == '>')
db3abe52 8142 && !(t[0] == ':' && t[1] == ':')
240d1b6f 8143 && !keyword(s, d-s, 0)
66fbe8fb 8144 ) {
71aa9713
BF
8145 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
8146 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
9014280d 8147 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
71aa9713
BF
8148 "Precedence problem: open %"SVf" should be open(%"SVf")",
8149 SVfARG(tmpsv), SVfARG(tmpsv));
66fbe8fb 8150 }
93a17b20 8151 }
a0d0e21e 8152 LOP(OP_OPEN,XTERM);
79072805 8153
463ee0b2 8154 case KEY_or:
78cdf107
Z
8155 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8156 return REPORT(0);
6154021b 8157 pl_yylval.ival = OP_OR;
463ee0b2
LW
8158 OPERATOR(OROP);
8159
79072805
LW
8160 case KEY_ord:
8161 UNI(OP_ORD);
8162
8163 case KEY_oct:
8164 UNI(OP_OCT);
8165
8166 case KEY_opendir:
a0d0e21e 8167 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
8168
8169 case KEY_print:
3280af22 8170 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8171 LOP(OP_PRINT,XREF);
79072805
LW
8172
8173 case KEY_printf:
3280af22 8174 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8175 LOP(OP_PRTF,XREF);
79072805 8176
c07a80fd 8177 case KEY_prototype:
8178 UNI(OP_PROTOTYPE);
8179
79072805 8180 case KEY_push:
a0d0e21e 8181 LOP(OP_PUSH,XTERM);
79072805
LW
8182
8183 case KEY_pop:
6f33ba73 8184 UNIDOR(OP_POP);
79072805 8185
a0d0e21e 8186 case KEY_pos:
6f33ba73 8187 UNIDOR(OP_POS);
4e553d73 8188
79072805 8189 case KEY_pack:
a0d0e21e 8190 LOP(OP_PACK,XTERM);
79072805
LW
8191
8192 case KEY_package:
a0d0e21e 8193 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 8194 s = SKIPSPACE1(s);
91152fc1 8195 s = force_strict_version(s);
4e4da3ac 8196 PL_lex_expect = XBLOCK;
79072805
LW
8197 OPERATOR(PACKAGE);
8198
8199 case KEY_pipe:
a0d0e21e 8200 LOP(OP_PIPE_OP,XTERM);
79072805
LW
8201
8202 case KEY_q:
4d68ffa0 8203 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8204 if (!s)
d4c19fe8 8205 missingterm(NULL);
6154021b 8206 pl_yylval.ival = OP_CONST;
79072805
LW
8207 TERM(sublex_start());
8208
a0d0e21e
LW
8209 case KEY_quotemeta:
8210 UNI(OP_QUOTEMETA);
8211
ea25a9b2
Z
8212 case KEY_qw: {
8213 OP *words = NULL;
4d68ffa0 8214 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8990e307 8215 if (!s)
d4c19fe8 8216 missingterm(NULL);
3480a8d2 8217 PL_expect = XOPERATOR;
8127e0e3 8218 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
8219 int warned_comma = !ckWARN(WARN_QW);
8220 int warned_comment = warned_comma;
3280af22 8221 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 8222 while (len) {
d4c19fe8
AL
8223 for (; isSPACE(*d) && len; --len, ++d)
8224 /**/;
8127e0e3 8225 if (len) {
d4c19fe8 8226 SV *sv;
f54cb97a 8227 const char *b = d;
7e03b518 8228 if (!warned_comma || !warned_comment) {
8127e0e3 8229 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 8230 if (!warned_comma && *d == ',') {
9014280d 8231 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8232 "Possible attempt to separate words with commas");
7e03b518 8233 ++warned_comma;
8127e0e3 8234 }
7e03b518 8235 else if (!warned_comment && *d == '#') {
9014280d 8236 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8237 "Possible attempt to put comments in qw() list");
7e03b518 8238 ++warned_comment;
8127e0e3
GS
8239 }
8240 }
8241 }
8242 else {
d4c19fe8
AL
8243 for (; !isSPACE(*d) && len; --len, ++d)
8244 /**/;
8127e0e3 8245 }
740cce10 8246 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 8247 words = op_append_elem(OP_LIST, words,
7948272d 8248 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 8249 }
8250 }
8251 }
ea25a9b2
Z
8252 if (!words)
8253 words = newNULLLIST();
37fd879b 8254 if (PL_lex_stuff) {
8127e0e3 8255 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 8256 PL_lex_stuff = NULL;
37fd879b 8257 }
ea25a9b2
Z
8258 PL_expect = XOPERATOR;
8259 pl_yylval.opval = sawparens(words);
8260 TOKEN(QWLIST);
8261 }
8990e307 8262
79072805 8263 case KEY_qq:
4d68ffa0 8264 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8265 if (!s)
d4c19fe8 8266 missingterm(NULL);
6154021b 8267 pl_yylval.ival = OP_STRINGIFY;
3280af22 8268 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 8269 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
8270 TERM(sublex_start());
8271
8782bef2
GB
8272 case KEY_qr:
8273 s = scan_pat(s,OP_QR);
8274 TERM(sublex_start());
8275
79072805 8276 case KEY_qx:
4d68ffa0 8277 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8278 if (!s)
d4c19fe8 8279 missingterm(NULL);
9b201d7d 8280 readpipe_override();
79072805
LW
8281 TERM(sublex_start());
8282
8283 case KEY_return:
8284 OLDLOP(OP_RETURN);
8285
8286 case KEY_require:
29595ff2 8287 s = SKIPSPACE1(s);
c31f6d3b 8288 PL_expect = XOPERATOR;
e759cc13
RGS
8289 if (isDIGIT(*s)) {
8290 s = force_version(s, FALSE);
a7cb1f99 8291 }
e759cc13
RGS
8292 else if (*s != 'v' || !isDIGIT(s[1])
8293 || (s = force_version(s, TRUE), *s == 'v'))
8294 {
a7cb1f99
GS
8295 *PL_tokenbuf = '\0';
8296 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 8297 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
8298 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8299 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
8300 else if (*s == '<')
8301 yyerror("<> should be quotes");
8302 }
a72a1c8b
RGS
8303 if (orig_keyword == KEY_require) {
8304 orig_keyword = 0;
6154021b 8305 pl_yylval.ival = 1;
a72a1c8b
RGS
8306 }
8307 else
6154021b 8308 pl_yylval.ival = 0;
a72a1c8b
RGS
8309 PL_expect = XTERM;
8310 PL_bufptr = s;
8311 PL_last_uni = PL_oldbufptr;
8312 PL_last_lop_op = OP_REQUIRE;
8313 s = skipspace(s);
8314 return REPORT( (int)REQUIRE );
79072805
LW
8315
8316 case KEY_reset:
8317 UNI(OP_RESET);
8318
8319 case KEY_redo:
c31f6d3b 8320 PL_expect = XOPERATOR;
a0d0e21e 8321 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8322 LOOPX(OP_REDO);
8323
8324 case KEY_rename:
a0d0e21e 8325 LOP(OP_RENAME,XTERM);
79072805
LW
8326
8327 case KEY_rand:
8328 UNI(OP_RAND);
8329
8330 case KEY_rmdir:
8331 UNI(OP_RMDIR);
8332
8333 case KEY_rindex:
a0d0e21e 8334 LOP(OP_RINDEX,XTERM);
79072805
LW
8335
8336 case KEY_read:
a0d0e21e 8337 LOP(OP_READ,XTERM);
79072805
LW
8338
8339 case KEY_readdir:
8340 UNI(OP_READDIR);
8341
93a17b20 8342 case KEY_readline:
6f33ba73 8343 UNIDOR(OP_READLINE);
93a17b20
LW
8344
8345 case KEY_readpipe:
0858480c 8346 UNIDOR(OP_BACKTICK);
93a17b20 8347
79072805
LW
8348 case KEY_rewinddir:
8349 UNI(OP_REWINDDIR);
8350
8351 case KEY_recv:
a0d0e21e 8352 LOP(OP_RECV,XTERM);
79072805
LW
8353
8354 case KEY_reverse:
a0d0e21e 8355 LOP(OP_REVERSE,XTERM);
79072805
LW
8356
8357 case KEY_readlink:
6f33ba73 8358 UNIDOR(OP_READLINK);
79072805
LW
8359
8360 case KEY_ref:
8361 UNI(OP_REF);
8362
8363 case KEY_s:
8364 s = scan_subst(s);
6154021b 8365 if (pl_yylval.opval)
79072805
LW
8366 TERM(sublex_start());
8367 else
8368 TOKEN(1); /* force error */
8369
0d863452
RH
8370 case KEY_say:
8371 checkcomma(s,PL_tokenbuf,"filehandle");
8372 LOP(OP_SAY,XREF);
8373
a0d0e21e
LW
8374 case KEY_chomp:
8375 UNI(OP_CHOMP);
4e553d73 8376
79072805
LW
8377 case KEY_scalar:
8378 UNI(OP_SCALAR);
8379
8380 case KEY_select:
a0d0e21e 8381 LOP(OP_SELECT,XTERM);
79072805
LW
8382
8383 case KEY_seek:
a0d0e21e 8384 LOP(OP_SEEK,XTERM);
79072805
LW
8385
8386 case KEY_semctl:
a0d0e21e 8387 LOP(OP_SEMCTL,XTERM);
79072805
LW
8388
8389 case KEY_semget:
a0d0e21e 8390 LOP(OP_SEMGET,XTERM);
79072805
LW
8391
8392 case KEY_semop:
a0d0e21e 8393 LOP(OP_SEMOP,XTERM);
79072805
LW
8394
8395 case KEY_send:
a0d0e21e 8396 LOP(OP_SEND,XTERM);
79072805
LW
8397
8398 case KEY_setpgrp:
a0d0e21e 8399 LOP(OP_SETPGRP,XTERM);
79072805
LW
8400
8401 case KEY_setpriority:
a0d0e21e 8402 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
8403
8404 case KEY_sethostent:
ff68c719 8405 UNI(OP_SHOSTENT);
79072805
LW
8406
8407 case KEY_setnetent:
ff68c719 8408 UNI(OP_SNETENT);
79072805
LW
8409
8410 case KEY_setservent:
ff68c719 8411 UNI(OP_SSERVENT);
79072805
LW
8412
8413 case KEY_setprotoent:
ff68c719 8414 UNI(OP_SPROTOENT);
79072805
LW
8415
8416 case KEY_setpwent:
8417 FUN0(OP_SPWENT);
8418
8419 case KEY_setgrent:
8420 FUN0(OP_SGRENT);
8421
8422 case KEY_seekdir:
a0d0e21e 8423 LOP(OP_SEEKDIR,XTERM);
79072805
LW
8424
8425 case KEY_setsockopt:
a0d0e21e 8426 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
8427
8428 case KEY_shift:
6f33ba73 8429 UNIDOR(OP_SHIFT);
79072805
LW
8430
8431 case KEY_shmctl:
a0d0e21e 8432 LOP(OP_SHMCTL,XTERM);
79072805
LW
8433
8434 case KEY_shmget:
a0d0e21e 8435 LOP(OP_SHMGET,XTERM);
79072805
LW
8436
8437 case KEY_shmread:
a0d0e21e 8438 LOP(OP_SHMREAD,XTERM);
79072805
LW
8439
8440 case KEY_shmwrite:
a0d0e21e 8441 LOP(OP_SHMWRITE,XTERM);
79072805
LW
8442
8443 case KEY_shutdown:
a0d0e21e 8444 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8445
8446 case KEY_sin:
8447 UNI(OP_SIN);
8448
8449 case KEY_sleep:
8450 UNI(OP_SLEEP);
8451
8452 case KEY_socket:
a0d0e21e 8453 LOP(OP_SOCKET,XTERM);
79072805
LW
8454
8455 case KEY_socketpair:
a0d0e21e 8456 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8457
8458 case KEY_sort:
3280af22 8459 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8460 s = SKIPSPACE1(s);
3280af22 8461 PL_expect = XTERM;
15f0808c 8462 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 8463 LOP(OP_SORT,XREF);
79072805
LW
8464
8465 case KEY_split:
a0d0e21e 8466 LOP(OP_SPLIT,XTERM);
79072805
LW
8467
8468 case KEY_sprintf:
a0d0e21e 8469 LOP(OP_SPRINTF,XTERM);
79072805
LW
8470
8471 case KEY_splice:
a0d0e21e 8472 LOP(OP_SPLICE,XTERM);
79072805
LW
8473
8474 case KEY_sqrt:
8475 UNI(OP_SQRT);
8476
8477 case KEY_srand:
8478 UNI(OP_SRAND);
8479
8480 case KEY_stat:
8481 UNI(OP_STAT);
8482
8483 case KEY_study:
79072805
LW
8484 UNI(OP_STUDY);
8485
8486 case KEY_substr:
a0d0e21e 8487 LOP(OP_SUBSTR,XTERM);
79072805
LW
8488
8489 case KEY_format:
8490 case KEY_sub:
93a17b20 8491 really_sub:
09bef843 8492 {
24b6ef70 8493 char * const tmpbuf = PL_tokenbuf + 1;
9c5ffd7c 8494 SSize_t tboffset = 0;
09bef843 8495 expectation attrful;
28cc6278 8496 bool have_name, have_proto;
f54cb97a 8497 const int key = tmp;
09bef843 8498
5db06880
NC
8499#ifdef PERL_MAD
8500 SV *tmpwhite = 0;
8501
cd81e915 8502 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1cac5c33
FC
8503 SV *subtoken = PL_madskills
8504 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8505 : NULL;
cd81e915 8506 PL_thistoken = 0;
5db06880
NC
8507
8508 d = s;
8509 s = SKIPSPACE2(s,tmpwhite);
8510#else
8767b1ab 8511 d = s;
09bef843 8512 s = skipspace(s);
5db06880 8513#endif
09bef843 8514
7e2040f0 8515 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8516 (*s == ':' && s[1] == ':'))
8517 {
5db06880 8518#ifdef PERL_MAD
4f61fd4b 8519 SV *nametoke = NULL;
5db06880
NC
8520#endif
8521
09bef843
SB
8522 PL_expect = XBLOCK;
8523 attrful = XATTRBLOCK;
b1b65b59
JH
8524 /* remember buffer pos'n for later force_word */
8525 tboffset = s - PL_oldbufptr;
24b6ef70
FC
8526 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8527 &len);
5db06880
NC
8528#ifdef PERL_MAD
8529 if (PL_madskills)
af9f5953 8530 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8531#endif
689aac7b
FC
8532 *PL_tokenbuf = '&';
8533 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8534 || pad_findmy_pvn(
8535 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8536 ) != NOT_IN_PAD)
6502358f 8537 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8538 else {
8539 sv_setsv(PL_subname,PL_curstname);
396482e1 8540 sv_catpvs(PL_subname,"::");
09bef843
SB
8541 sv_catpvn(PL_subname,tmpbuf,len);
8542 }
af9f5953
BF
8543 if (SvUTF8(PL_linestr))
8544 SvUTF8_on(PL_subname);
09bef843 8545 have_name = TRUE;
5db06880 8546
60ac52eb 8547
5db06880 8548#ifdef PERL_MAD
60ac52eb
FC
8549 start_force(0);
8550 CURMAD('X', nametoke);
8551 CURMAD('_', tmpwhite);
4210d3f1 8552 force_ident_maybe_lex('&');
5db06880
NC
8553
8554 s = SKIPSPACE2(d,tmpwhite);
8555#else
8556 s = skipspace(d);
8557#endif
09bef843 8558 }
463ee0b2 8559 else {
8767b1ab
FC
8560 if (key == KEY_my || key == KEY_our || key==KEY_state)
8561 {
8562 *d = '\0';
8563 /* diag_listed_as: Missing name in "%s sub" */
8564 Perl_croak(aTHX_
8565 "Missing name in \"%s\"", PL_bufptr);
8566 }
09bef843
SB
8567 PL_expect = XTERMBLOCK;
8568 attrful = XATTRTERM;
76f68e9b 8569 sv_setpvs(PL_subname,"?");
09bef843 8570 have_name = FALSE;
463ee0b2 8571 }
4633a7c4 8572
09bef843 8573 if (key == KEY_format) {
5db06880 8574#ifdef PERL_MAD
cd81e915 8575 PL_thistoken = subtoken;
5db06880 8576 s = d;
9c3c07f8 8577 PERL_UNUSED_VAR(tboffset);
5db06880 8578#else
09bef843 8579 if (have_name)
b1b65b59
JH
8580 (void) force_word(PL_oldbufptr + tboffset, WORD,
8581 FALSE, TRUE, TRUE);
5db06880 8582#endif
64a40898 8583 PREBLOCK(FORMAT);
09bef843 8584 }
79072805 8585
09bef843
SB
8586 /* Look for a prototype */
8587 if (*s == '(') {
d9f2850e
RGS
8588 char *p;
8589 bool bad_proto = FALSE;
9e8d7757
RB
8590 bool in_brackets = FALSE;
8591 char greedy_proto = ' ';
8592 bool proto_after_greedy_proto = FALSE;
8593 bool must_be_last = FALSE;
8594 bool underscore = FALSE;
aef2a98a 8595 bool seen_underscore = FALSE;
197afce1 8596 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8597 STRLEN tmplen;
09bef843 8598
4d68ffa0 8599 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
37fd879b 8600 if (!s)
09bef843 8601 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8602 /* strip spaces and check for bad characters */
dab1c735 8603 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8604 tmp = 0;
dab1c735 8605 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8606 if (!isSPACE(*p)) {
dab1c735 8607 d[tmp++] = *p;
9e8d7757 8608
197afce1 8609 if (warnillegalproto) {
9e8d7757
RB
8610 if (must_be_last)
8611 proto_after_greedy_proto = TRUE;
dab1c735 8612 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8613 bad_proto = TRUE;
8614 }
8615 else {
8616 if ( underscore ) {
34daab0f 8617 if ( !strchr(";@%", *p) )
9e8d7757
RB
8618 bad_proto = TRUE;
8619 underscore = FALSE;
8620 }
8621 if ( *p == '[' ) {
8622 in_brackets = TRUE;
8623 }
8624 else if ( *p == ']' ) {
8625 in_brackets = FALSE;
8626 }
8627 else if ( (*p == '@' || *p == '%') &&
8628 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8629 !in_brackets ) {
8630 must_be_last = TRUE;
8631 greedy_proto = *p;
8632 }
8633 else if ( *p == '_' ) {
aef2a98a 8634 underscore = seen_underscore = TRUE;
9e8d7757
RB
8635 }
8636 }
8637 }
d37a9538 8638 }
09bef843 8639 }
dab1c735 8640 d[tmp] = '\0';
9e8d7757 8641 if (proto_after_greedy_proto)
197afce1 8642 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8643 "Prototype after '%c' for %"SVf" : %s",
8644 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8645 if (bad_proto) {
8646 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8647 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8648 "Illegal character %sin prototype for %"SVf" : %s",
8649 seen_underscore ? "after '_' " : "",
dab1c735 8650 SVfARG(PL_subname),
97eb901d
BF
8651 SvUTF8(PL_lex_stuff)
8652 ? sv_uni_display(dsv,
8653 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8654 tmp,
8655 UNI_DISPLAY_ISPRINT)
8656 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8657 PERL_PV_ESCAPE_NONASCII));
dab1c735
BF
8658 }
8659 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8660 have_proto = TRUE;
68dc0745 8661
5db06880
NC
8662#ifdef PERL_MAD
8663 start_force(0);
cd81e915 8664 CURMAD('q', PL_thisopen);
5db06880 8665 CURMAD('_', tmpwhite);
cd81e915
NC
8666 CURMAD('=', PL_thisstuff);
8667 CURMAD('Q', PL_thisclose);
5db06880
NC
8668 NEXTVAL_NEXTTOKE.opval =
8669 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8670 PL_lex_stuff = NULL;
5db06880
NC
8671 force_next(THING);
8672
8673 s = SKIPSPACE2(s,tmpwhite);
8674#else
09bef843 8675 s = skipspace(s);
5db06880 8676#endif
4633a7c4 8677 }
09bef843
SB
8678 else
8679 have_proto = FALSE;
8680
8681 if (*s == ':' && s[1] != ':')
8682 PL_expect = attrful;
8e742a20
MHM
8683 else if (*s != '{' && key == KEY_sub) {
8684 if (!have_name)
8685 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8686 else if (*s != ';' && *s != '}')
be2597df 8687 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8688 }
09bef843 8689
5db06880
NC
8690#ifdef PERL_MAD
8691 start_force(0);
8692 if (tmpwhite) {
8693 if (PL_madskills)
6b29d1f5 8694 curmad('^', newSVpvs(""));
5db06880
NC
8695 CURMAD('_', tmpwhite);
8696 }
8697 force_next(0);
8698
cd81e915 8699 PL_thistoken = subtoken;
9c3c07f8 8700 PERL_UNUSED_VAR(have_proto);
5db06880 8701#else
09bef843 8702 if (have_proto) {
9ded7720 8703 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8704 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8705 PL_lex_stuff = NULL;
09bef843 8706 force_next(THING);
68dc0745 8707 }
5db06880 8708#endif
09bef843 8709 if (!have_name) {
49a54bbe
NC
8710 if (PL_curstash)
8711 sv_setpvs(PL_subname, "__ANON__");
8712 else
8713 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8714 TOKEN(ANONSUB);
4633a7c4 8715 }
5db06880 8716#ifndef PERL_MAD
4210d3f1 8717 force_ident_maybe_lex('&');
5db06880 8718#endif
09bef843 8719 TOKEN(SUB);
4633a7c4 8720 }
79072805
LW
8721
8722 case KEY_system:
a0d0e21e 8723 LOP(OP_SYSTEM,XREF);
79072805
LW
8724
8725 case KEY_symlink:
a0d0e21e 8726 LOP(OP_SYMLINK,XTERM);
79072805
LW
8727
8728 case KEY_syscall:
a0d0e21e 8729 LOP(OP_SYSCALL,XTERM);
79072805 8730
c07a80fd 8731 case KEY_sysopen:
8732 LOP(OP_SYSOPEN,XTERM);
8733
137443ea 8734 case KEY_sysseek:
8735 LOP(OP_SYSSEEK,XTERM);
8736
79072805 8737 case KEY_sysread:
a0d0e21e 8738 LOP(OP_SYSREAD,XTERM);
79072805
LW
8739
8740 case KEY_syswrite:
a0d0e21e 8741 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8742
8743 case KEY_tr:
8ce4b50f 8744 case KEY_y:
79072805
LW
8745 s = scan_trans(s);
8746 TERM(sublex_start());
8747
8748 case KEY_tell:
8749 UNI(OP_TELL);
8750
8751 case KEY_telldir:
8752 UNI(OP_TELLDIR);
8753
463ee0b2 8754 case KEY_tie:
a0d0e21e 8755 LOP(OP_TIE,XTERM);
463ee0b2 8756
c07a80fd 8757 case KEY_tied:
8758 UNI(OP_TIED);
8759
79072805
LW
8760 case KEY_time:
8761 FUN0(OP_TIME);
8762
8763 case KEY_times:
8764 FUN0(OP_TMS);
8765
8766 case KEY_truncate:
a0d0e21e 8767 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8768
8769 case KEY_uc:
8770 UNI(OP_UC);
8771
8772 case KEY_ucfirst:
8773 UNI(OP_UCFIRST);
8774
463ee0b2
LW
8775 case KEY_untie:
8776 UNI(OP_UNTIE);
8777
79072805 8778 case KEY_until:
78cdf107
Z
8779 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8780 return REPORT(0);
6154021b 8781 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8782 OPERATOR(UNTIL);
8783
8784 case KEY_unless:
78cdf107
Z
8785 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8786 return REPORT(0);
6154021b 8787 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8788 OPERATOR(UNLESS);
8789
8790 case KEY_unlink:
a0d0e21e 8791 LOP(OP_UNLINK,XTERM);
79072805
LW
8792
8793 case KEY_undef:
6f33ba73 8794 UNIDOR(OP_UNDEF);
79072805
LW
8795
8796 case KEY_unpack:
a0d0e21e 8797 LOP(OP_UNPACK,XTERM);
79072805
LW
8798
8799 case KEY_utime:
a0d0e21e 8800 LOP(OP_UTIME,XTERM);
79072805
LW
8801
8802 case KEY_umask:
6f33ba73 8803 UNIDOR(OP_UMASK);
79072805
LW
8804
8805 case KEY_unshift:
a0d0e21e
LW
8806 LOP(OP_UNSHIFT,XTERM);
8807
8808 case KEY_use:
468aa647 8809 s = tokenize_use(1, s);
a0d0e21e 8810 OPERATOR(USE);
79072805
LW
8811
8812 case KEY_values:
8813 UNI(OP_VALUES);
8814
8815 case KEY_vec:
a0d0e21e 8816 LOP(OP_VEC,XTERM);
79072805 8817
0d863452 8818 case KEY_when:
78cdf107
Z
8819 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8820 return REPORT(0);
6154021b 8821 pl_yylval.ival = CopLINE(PL_curcop);
0f539b13
BF
8822 Perl_ck_warner_d(aTHX_
8823 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8824 "when is experimental");
0d863452
RH
8825 OPERATOR(WHEN);
8826
79072805 8827 case KEY_while:
78cdf107
Z
8828 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8829 return REPORT(0);
6154021b 8830 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8831 OPERATOR(WHILE);
8832
8833 case KEY_warn:
3280af22 8834 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8835 LOP(OP_WARN,XTERM);
79072805
LW
8836
8837 case KEY_wait:
8838 FUN0(OP_WAIT);
8839
8840 case KEY_waitpid:
a0d0e21e 8841 LOP(OP_WAITPID,XTERM);
79072805
LW
8842
8843 case KEY_wantarray:
8844 FUN0(OP_WANTARRAY);
8845
8846 case KEY_write:
9d116dd7
JH
8847#ifdef EBCDIC
8848 {
df3728a2
JH
8849 char ctl_l[2];
8850 ctl_l[0] = toCTRL('L');
8851 ctl_l[1] = '\0';
fafc274c 8852 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8853 }
8854#else
fafc274c
NC
8855 /* Make sure $^L is defined */
8856 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8857#endif
79072805
LW
8858 UNI(OP_ENTERWRITE);
8859
8860 case KEY_x:
78cdf107
Z
8861 if (PL_expect == XOPERATOR) {
8862 if (*s == '=' && !PL_lex_allbrackets &&
8863 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8864 return REPORT(0);
79072805 8865 Mop(OP_REPEAT);
78cdf107 8866 }
79072805
LW
8867 check_uni();
8868 goto just_a_word;
8869
a0d0e21e 8870 case KEY_xor:
78cdf107
Z
8871 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8872 return REPORT(0);
6154021b 8873 pl_yylval.ival = OP_XOR;
a0d0e21e 8874 OPERATOR(OROP);
79072805 8875 }
49dc05e3 8876 }}
79072805 8877}
bf4acbe4
GS
8878#ifdef __SC__
8879#pragma segment Main
8880#endif
79072805 8881
3875fc11
FC
8882/*
8883 S_pending_ident
8884
8885 Looks up an identifier in the pad or in a package
8886
8887 Returns:
8888 PRIVATEREF if this is a lexical name.
8889 WORD if this belongs to a package.
8890
8891 Structure:
8892 if we're in a my declaration
8893 croak if they tried to say my($foo::bar)
8894 build the ops for a my() declaration
8895 if it's an access to a my() variable
8896 build ops for access to a my() variable
8897 if in a dq string, and they've said @foo and we can't find @foo
8898 warn
8899 build ops for a bareword
8900*/
8901
3f33d153
FC
8902static int
8903S_pending_ident(pTHX)
8eceec63 8904{
97aff369 8905 dVAR;
bbd11bfc 8906 PADOFFSET tmp = 0;
3f33d153 8907 const char pit = (char)pl_yylval.ival;
9bde8eb0
NC
8908 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8909 /* All routes through this function want to know if there is a colon. */
c099d646 8910 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63 8911
3f33d153
FC
8912 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8913 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8914
8915 /* if we're in a my(), we can't allow dynamics here.
8916 $foo'bar has already been turned into $foo::bar, so
8917 just check for colons.
8918
8919 if it's a legal name, the OP is a PADANY.
8920 */
8921 if (PL_in_my) {
8922 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8923 if (has_colon)
4bca4ee0 8924 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8eceec63 8925 "variable %s in \"our\"",
4bca4ee0 8926 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
bc9b26ca 8927 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8928 }
8929 else {
9bde8eb0 8930 if (has_colon)
58576270
BF
8931 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8932 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8933 UTF ? SVf_UTF8 : 0);
8eceec63 8934
3f33d153
FC
8935 pl_yylval.opval = newOP(OP_PADANY, 0);
8936 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
bc9b26ca 8937 UTF ? SVf_UTF8 : 0);
3f33d153 8938 return PRIVATEREF;
8eceec63
SC
8939 }
8940 }
8941
8942 /*
8943 build the ops for accesses to a my() variable.
8eceec63
SC
8944 */
8945
9bde8eb0 8946 if (!has_colon) {
8716503d 8947 if (!PL_in_my)
bc9b26ca
BF
8948 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8949 UTF ? SVf_UTF8 : 0);
8716503d 8950 if (tmp != NOT_IN_PAD) {
8eceec63 8951 /* might be an "our" variable" */
00b1698f 8952 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8953 /* build ops for a bareword */
b64e5050
AL
8954 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8955 HEK * const stashname = HvNAME_HEK(stash);
8956 SV * const sym = newSVhek(stashname);
396482e1 8957 sv_catpvs(sym, "::");
2a33114a 8958 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
3f33d153
FC
8959 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8960 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
8961 if (pit != '&')
8962 gv_fetchsv(sym,
8eceec63
SC
8963 (PL_in_eval
8964 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8965 : GV_ADDMULTI
8eceec63
SC
8966 ),
8967 ((PL_tokenbuf[0] == '$') ? SVt_PV
8968 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8969 : SVt_PVHV));
3f33d153 8970 return WORD;
8eceec63
SC
8971 }
8972
3f33d153
FC
8973 pl_yylval.opval = newOP(OP_PADANY, 0);
8974 pl_yylval.opval->op_targ = tmp;
8975 return PRIVATEREF;
8eceec63
SC
8976 }
8977 }
8978
8979 /*
8980 Whine if they've said @foo in a doublequoted string,
8981 and @foo isn't a variable we can find in the symbol
8982 table.
8983 */
d824713b
NC
8984 if (ckWARN(WARN_AMBIGUOUS) &&
8985 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8986 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8987 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8988 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8989 /* DO NOT warn for @- and @+ */
8990 && !( PL_tokenbuf[2] == '\0' &&
8991 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8992 )
8eceec63
SC
8993 {
8994 /* Downgraded from fatal to warning 20000522 mjd */
d824713b 8995 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
29fb1d0e
BF
8996 "Possible unintended interpolation of %"SVf" in string",
8997 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8998 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8eceec63
SC
8999 }
9000 }
9001
9002 /* build ops for a bareword */
3f33d153
FC
9003 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9004 newSVpvn_flags(PL_tokenbuf + 1,
0be4d16f
BF
9005 tokenbuf_len - 1,
9006 UTF ? SVf_UTF8 : 0 ));
3f33d153 9007 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
9008 if (pit != '&')
9009 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
9010 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9011 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
9012 ((PL_tokenbuf[0] == '$') ? SVt_PV
9013 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9014 : SVt_PVHV));
3f33d153 9015 return WORD;
8eceec63
SC
9016}
9017
76e3520e 9018STATIC void
c94115d8 9019S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 9020{
97aff369 9021 dVAR;
2f3197b3 9022
7918f24d
NC
9023 PERL_ARGS_ASSERT_CHECKCOMMA;
9024
d008e5eb 9025 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
9026 if (ckWARN(WARN_SYNTAX)) {
9027 int level = 1;
26ff0806 9028 const char *w;
d008e5eb
GS
9029 for (w = s+2; *w && level; w++) {
9030 if (*w == '(')
9031 ++level;
9032 else if (*w == ')')
9033 --level;
9034 }
888fea98
NC
9035 while (isSPACE(*w))
9036 ++w;
b1439985
RGS
9037 /* the list of chars below is for end of statements or
9038 * block / parens, boolean operators (&&, ||, //) and branch
9039 * constructs (or, and, if, until, unless, while, err, for).
9040 * Not a very solid hack... */
9041 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 9042 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 9043 "%s (...) interpreted as function",name);
d008e5eb 9044 }
2f3197b3 9045 }
3280af22 9046 while (s < PL_bufend && isSPACE(*s))
2f3197b3 9047 s++;
a687059c
LW
9048 if (*s == '(')
9049 s++;
3280af22 9050 while (s < PL_bufend && isSPACE(*s))
a687059c 9051 s++;
7e2040f0 9052 if (isIDFIRST_lazy_if(s,UTF)) {
d0fb66e4
BF
9053 const char * const w = s;
9054 s += UTF ? UTF8SKIP(s) : 1;
8a2bca12 9055 while (isWORDCHAR_lazy_if(s,UTF))
d0fb66e4 9056 s += UTF ? UTF8SKIP(s) : 1;
3280af22 9057 while (s < PL_bufend && isSPACE(*s))
a687059c 9058 s++;
e929a76b 9059 if (*s == ',') {
c94115d8 9060 GV* gv;
5458a98a 9061 if (keyword(w, s - w, 0))
e929a76b 9062 return;
c94115d8 9063
2e38bce1 9064 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 9065 if (gv && GvCVu(gv))
abbb3198 9066 return;
cea2e8a9 9067 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
9068 }
9069 }
9070}
9071
a1941760
DM
9072/* S_new_constant(): do any overload::constant lookup.
9073
9074 Either returns sv, or mortalizes/frees sv and returns a new SV*.
423cee85
JH
9075 Best used as sv=new_constant(..., sv, ...).
9076 If s, pv are NULL, calls subroutine with one argument,
107160e2
KW
9077 and <type> is used with error messages only.
9078 <type> is assumed to be well formed UTF-8 */
423cee85 9079
b3ac6de7 9080STATIC SV *
eb0d8d16
NC
9081S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9082 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 9083{
27da23d5 9084 dVAR; dSP;
fbb93542 9085 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 9086 SV *res;
eed484f9 9087 SV *errsv = NULL;
b3ac6de7
IZ
9088 SV **cvp;
9089 SV *cv, *typesv;
89e33a05 9090 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 9091
7918f24d 9092 PERL_ARGS_ASSERT_NEW_CONSTANT;
f374c70f
FC
9093 /* We assume that this is true: */
9094 if (*key == 'c') { assert (strEQ(key, "charnames")); }
bb4784f0 9095 assert(type || s);
7918f24d 9096
f8988b41 9097 /* charnames doesn't work well if there have been errors found */
f374c70f 9098 if (PL_error_count > 0 && *key == 'c')
14ca8ff4
FC
9099 {
9100 SvREFCNT_dec_NN(sv);
f8988b41 9101 return &PL_sv_undef;
14ca8ff4 9102 }
f8988b41 9103
5f7f7af5 9104 sv_2mortal(sv); /* Parent created it permanently */
fbb93542
KW
9105 if (!table
9106 || ! (PL_hints & HINT_LOCALIZE_HH)
9107 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9108 || ! SvOK(*cvp))
9109 {
5f7f7af5 9110 char *msg;
423cee85 9111
fbb93542
KW
9112 /* Here haven't found what we're looking for. If it is charnames,
9113 * perhaps it needs to be loaded. Try doing that before giving up */
f374c70f 9114 if (*key == 'c') {
fbb93542
KW
9115 Perl_load_module(aTHX_
9116 0,
9117 newSVpvs("_charnames"),
9118 /* version parameter; no need to specify it, as if
9119 * we get too early a version, will fail anyway,
9120 * not being able to find '_charnames' */
9121 NULL,
9122 newSVpvs(":full"),
9123 newSVpvs(":short"),
9124 NULL);
9125 SPAGAIN;
9126 table = GvHV(PL_hintgv);
9127 if (table
9128 && (PL_hints & HINT_LOCALIZE_HH)
9129 && (cvp = hv_fetch(table, key, keylen, FALSE))
9130 && SvOK(*cvp))
9131 {
9132 goto now_ok;
9133 }
9134 }
9135 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
5f7f7af5 9136 msg = Perl_form(aTHX_
bb4784f0
FC
9137 "Constant(%.*s) unknown",
9138 (int)(type ? typelen : len),
9139 (type ? type: s));
fbb93542
KW
9140 }
9141 else {
3918902d
KW
9142 why1 = "$^H{";
9143 why2 = key;
9144 why3 = "} is not defined";
9145 report:
f374c70f 9146 if (*key == 'c') {
5f7f7af5 9147 msg = Perl_form(aTHX_
bde9e88d
KW
9148 /* The +3 is for '\N{'; -4 for that, plus '}' */
9149 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
5f7f7af5 9150 );
90249f0a
KW
9151 }
9152 else {
5f7f7af5 9153 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
bb4784f0
FC
9154 (int)(type ? typelen : len),
9155 (type ? type: s), why1, why2, why3);
90249f0a 9156 }
3918902d 9157 }
5f7f7af5
FC
9158 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9159 return SvREFCNT_inc_simple_NN(sv);
423cee85 9160 }
fbb93542 9161now_ok:
b3ac6de7 9162 cv = *cvp;
423cee85 9163 if (!pv && s)
59cd0e26 9164 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 9165 if (type && pv)
59cd0e26 9166 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 9167 else
423cee85 9168 typesv = &PL_sv_undef;
4e553d73 9169
e788e7d3 9170 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
9171 ENTER ;
9172 SAVETMPS;
4e553d73 9173
423cee85 9174 PUSHMARK(SP) ;
a5845cb7 9175 EXTEND(sp, 3);
423cee85
JH
9176 if (pv)
9177 PUSHs(pv);
b3ac6de7 9178 PUSHs(sv);
423cee85
JH
9179 if (pv)
9180 PUSHs(typesv);
b3ac6de7 9181 PUTBACK;
423cee85 9182 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 9183
423cee85 9184 SPAGAIN ;
4e553d73 9185
423cee85 9186 /* Check the eval first */
eed484f9 9187 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
c06ecf4f
DD
9188 STRLEN errlen;
9189 const char * errstr;
eed484f9
DD
9190 sv_catpvs(errsv, "Propagated");
9191 errstr = SvPV_const(errsv, errlen);
c06ecf4f 9192 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
e1f15930 9193 (void)POPs;
ae5c22c1 9194 res = SvREFCNT_inc_simple_NN(sv);
423cee85
JH
9195 }
9196 else {
9197 res = POPs;
ae5c22c1 9198 SvREFCNT_inc_simple_void_NN(res);
423cee85 9199 }
4e553d73 9200
423cee85
JH
9201 PUTBACK ;
9202 FREETMPS ;
9203 LEAVE ;
b3ac6de7 9204 POPSTACK;
4e553d73 9205
b3ac6de7 9206 if (!SvOK(res)) {
423cee85
JH
9207 why1 = "Call to &{$^H{";
9208 why2 = key;
f0af216f 9209 why3 = "}} did not return a defined value";
423cee85 9210 sv = res;
5f7f7af5 9211 (void)sv_2mortal(sv);
423cee85 9212 goto report;
9b0e499b 9213 }
423cee85 9214
9b0e499b 9215 return res;
b3ac6de7 9216}
4e553d73 9217
07f72646
BF
9218PERL_STATIC_INLINE void
9219S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9220 dVAR;
9221 PERL_ARGS_ASSERT_PARSE_IDENT;
9222
9223 for (;;) {
9224 if (*d >= e)
9225 Perl_croak(aTHX_ "%s", ident_too_long);
32833930
BF
9226 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9227 /* The UTF-8 case must come first, otherwise things
9228 * like c\N{COMBINING TILDE} would start failing, as the
9229 * isWORDCHAR_A case below would gobble the 'c' up.
9230 */
9231
07f72646 9232 char *t = *s + UTF8SKIP(*s);
32833930 9233 while (isIDCONT_utf8((U8*)t))
07f72646
BF
9234 t += UTF8SKIP(t);
9235 if (*d + (t - *s) > e)
9236 Perl_croak(aTHX_ "%s", ident_too_long);
9237 Copy(*s, *d, t - *s, char);
9238 *d += t - *s;
9239 *s = t;
9240 }
32833930
BF
9241 else if ( isWORDCHAR_A(**s) ) {
9242 do {
9243 *(*d)++ = *(*s)++;
9244 } while isWORDCHAR_A(**s);
9245 }
07f72646
BF
9246 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9247 *(*d)++ = ':';
9248 *(*d)++ = ':';
9249 (*s)++;
9250 }
9251 else if (allow_package && **s == ':' && (*s)[1] == ':'
9252 /* Disallow things like Foo::$bar. For the curious, this is
9253 * the code path that triggers the "Bad name after" warning
9254 * when looking for barewords.
9255 */
9256 && (*s)[2] != '$') {
9257 *(*d)++ = *(*s)++;
9258 *(*d)++ = *(*s)++;
9259 }
9260 else
9261 break;
9262 }
9263 return;
9264}
9265
d0a148a6
NC
9266/* Returns a NUL terminated string, with the length of the string written to
9267 *slp
9268 */
76e3520e 9269STATIC char *
5aaab254 9270S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 9271{
97aff369 9272 dVAR;
eb578fdb
KW
9273 char *d = dest;
9274 char * const e = d + destlen - 3; /* two-character token, ending NUL */
07f72646 9275 bool is_utf8 = cBOOL(UTF);
7918f24d
NC
9276
9277 PERL_ARGS_ASSERT_SCAN_WORD;
9278
07f72646
BF
9279 parse_ident(&s, &d, e, allow_package, is_utf8);
9280 *d = '\0';
9281 *slp = d - dest;
9282 return s;
378cc40b
LW
9283}
9284
76e3520e 9285STATIC char *
5aaab254 9286S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 9287{
97aff369 9288 dVAR;
6136c704 9289 char *bracket = NULL;
748a9306 9290 char funny = *s++;
eb578fdb
KW
9291 char *d = dest;
9292 char * const e = d + destlen - 3; /* two-character token, ending NUL */
07f72646 9293 bool is_utf8 = cBOOL(UTF);
378cc40b 9294
7918f24d
NC
9295 PERL_ARGS_ASSERT_SCAN_IDENT;
9296
a0d0e21e 9297 if (isSPACE(*s))
29595ff2 9298 s = PEEKSPACE(s);
de3bb511 9299 if (isDIGIT(*s)) {
8903cb82 9300 while (isDIGIT(*s)) {
9301 if (d >= e)
e5cc0c0f 9302 Perl_croak(aTHX_ "%s", ident_too_long);
378cc40b 9303 *d++ = *s++;
8903cb82 9304 }
378cc40b
LW
9305 }
9306 else {
07f72646 9307 parse_ident(&s, &d, e, 1, is_utf8);
378cc40b
LW
9308 }
9309 *d = '\0';
9310 d = dest;
79072805 9311 if (*d) {
c5b6cc8c
NC
9312 /* Either a digit variable, or parse_ident() found an identifier
9313 (anything valid as a bareword), so job done and return. */
3280af22
NIS
9314 if (PL_lex_state != LEX_NORMAL)
9315 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 9316 return s;
378cc40b 9317 }
748a9306 9318 if (*s == '$' && s[1] &&
0a520fce
BF
9319 (isIDFIRST_lazy_if(s+1,is_utf8)
9320 || isDIGIT_A((U8)s[1])
9321 || s[1] == '$'
9322 || s[1] == '{'
9323 || strnEQ(s+1,"::",2)) )
5cd24f17 9324 {
c5b6cc8c
NC
9325 /* Dereferencing a value in a scalar variable.
9326 The alternatives are different syntaxes for a scalar variable.
9327 Using ' as a leading package separator isn't allowed. :: is. */
4810e5ec 9328 return s;
5cd24f17 9329 }
c5b6cc8c 9330 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
79072805
LW
9331 if (*s == '{') {
9332 bracket = s;
9333 s++;
a21046ad
BF
9334 while (s < send && SPACE_OR_TAB(*s))
9335 s++;
79072805 9336 }
32833930 9337
850a35c9
BF
9338#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9339 || isCNTRL_A((U8)(d)) \
9340 || isDIGIT_A((U8)(d)) \
9341 || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
32833930 9342 if (s < send
850a35c9 9343 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
32833930 9344 {
07f72646 9345 if (is_utf8) {
204e6232
BF
9346 const STRLEN skip = UTF8SKIP(s);
9347 STRLEN i;
9348 d[skip] = '\0';
9349 for ( i = 0; i < skip; i++ )
9350 d[i] = *s++;
9351 }
9352 else {
9353 *d = *s++;
9354 d[1] = '\0';
9355 }
9356 }
c5b6cc8c 9357 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
2b92dfce 9358 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 9359 *d = toCTRL(*s);
9360 s++;
de3bb511 9361 }
c5b6cc8c
NC
9362 /* Warn about ambiguous code after unary operators if {...} notation isn't
9363 used. There's no difference in ambiguity; it's merely a heuristic
9364 about when not to warn. */
fbdd83da
DIM
9365 else if (ck_uni && !bracket)
9366 check_uni();
79072805 9367 if (bracket) {
c5b6cc8c 9368 /* If we were processing {...} notation then... */
07f72646 9369 if (isIDFIRST_lazy_if(d,is_utf8)) {
c5b6cc8c
NC
9370 /* if it starts as a valid identifier, assume that it is one.
9371 (the later check for } being at the expected point will trap
9372 cases where this doesn't pan out.) */
07f72646
BF
9373 d += is_utf8 ? UTF8SKIP(d) : 1;
9374 parse_ident(&s, &d, e, 1, is_utf8);
79072805 9375 *d = '\0';
c35e046a
AL
9376 while (s < send && SPACE_OR_TAB(*s))
9377 s++;
ff68c719 9378 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
c5b6cc8c 9379 /* ${foo[0]} and ${foo{bar}} notation. */
5458a98a 9380 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
9381 const char * const brack =
9382 (const char *)
9383 ((*s == '[') ? "[...]" : "{...}");
e850844c 9384 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 9385 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 9386 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
9387 funny, dest, brack, funny, dest, brack);
9388 }
79072805 9389 bracket++;
a0be28da 9390 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 9391 PL_lex_allbrackets++;
79072805
LW
9392 return s;
9393 }
4e553d73
NIS
9394 }
9395 /* Handle extended ${^Foo} variables
2b92dfce 9396 * 1999-02-27 mjd-perl-patch@plover.com */
aecf5a4b 9397 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
0eb30aeb 9398 && isWORDCHAR(*s))
2b92dfce
GS
9399 {
9400 d++;
0eb30aeb 9401 while (isWORDCHAR(*s) && d < e) {
2b92dfce
GS
9402 *d++ = *s++;
9403 }
9404 if (d >= e)
e5cc0c0f 9405 Perl_croak(aTHX_ "%s", ident_too_long);
2b92dfce 9406 *d = '\0';
79072805 9407 }
a21046ad
BF
9408
9409 while (s < send && SPACE_OR_TAB(*s))
9410 s++;
9411
c5b6cc8c
NC
9412 /* Expect to find a closing } after consuming any trailing whitespace.
9413 */
79072805
LW
9414 if (*s == '}') {
9415 s++;
7df0d042 9416 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 9417 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
9418 PL_expect = XREF;
9419 }
d008e5eb 9420 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 9421 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241 9422 (keyword(dest, d - dest, 0)
07f72646 9423 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
d008e5eb 9424 {
5c66c3dd 9425 SV *tmp = newSVpvn_flags( dest, d - dest,
07f72646 9426 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
c35e046a
AL
9427 if (funny == '#')
9428 funny = '@';
9014280d 9429 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5c66c3dd
BF
9430 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9431 funny, tmp, funny, tmp);
d008e5eb
GS
9432 }
9433 }
79072805
LW
9434 }
9435 else {
c5b6cc8c
NC
9436 /* Didn't find the closing } at the point we expected, so restore
9437 state such that the next thing to process is the opening { and */
79072805 9438 s = bracket; /* let the parser handle it */
93a17b20 9439 *dest = '\0';
79072805
LW
9440 }
9441 }
3280af22
NIS
9442 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9443 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
9444 return s;
9445}
9446
858a358b 9447static bool
3955e1a9 9448S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
9449
9450 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9451 * the parse starting at 's', based on the subset that are valid in this
9452 * context input to this routine in 'valid_flags'. Advances s. Returns
96f3bfda
KW
9453 * TRUE if the input should be treated as a valid flag, so the next char
9454 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9455 * first call on the current regex. This routine will set it to any
9456 * charset modifier found. The caller shouldn't change it. This way,
9457 * another charset modifier encountered in the parse can be detected as an
9458 * error, as we have decided to allow only one */
858a358b
KW
9459
9460 const char c = **s;
84159251 9461 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
94b03d7d 9462
84159251 9463 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8a2bca12 9464 if (isWORDCHAR_lazy_if(*s, UTF)) {
4f8dbb2d 9465 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
84159251
BF
9466 UTF ? SVf_UTF8 : 0);
9467 (*s) += charlen;
96f3bfda
KW
9468 /* Pretend that it worked, so will continue processing before
9469 * dieing */
0da72d5e 9470 return TRUE;
858a358b
KW
9471 }
9472 return FALSE;
9473 }
9474
9475 switch (c) {
94b03d7d 9476
858a358b
KW
9477 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9478 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9479 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9480 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9481 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9482 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d 9483 case LOCALE_PAT_MOD:
3955e1a9
KW
9484 if (*charset) {
9485 goto multiple_charsets;
9486 }
94b03d7d 9487 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9488 *charset = c;
94b03d7d
KW
9489 break;
9490 case UNICODE_PAT_MOD:
3955e1a9
KW
9491 if (*charset) {
9492 goto multiple_charsets;
9493 }
94b03d7d 9494 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9495 *charset = c;
94b03d7d
KW
9496 break;
9497 case ASCII_RESTRICT_PAT_MOD:
ff3f26d2 9498 if (! *charset) {
94b03d7d
KW
9499 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9500 }
ff3f26d2
KW
9501 else {
9502
9503 /* Error if previous modifier wasn't an 'a', but if it was, see
9504 * if, and accept, a second occurrence (only) */
9505 if (*charset != 'a'
9506 || get_regex_charset(*pmfl)
9507 != REGEX_ASCII_RESTRICTED_CHARSET)
9508 {
9509 goto multiple_charsets;
9510 }
9511 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9512 }
9513 *charset = c;
94b03d7d
KW
9514 break;
9515 case DEPENDS_PAT_MOD:
3955e1a9
KW
9516 if (*charset) {
9517 goto multiple_charsets;
9518 }
94b03d7d 9519 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9520 *charset = c;
94b03d7d 9521 break;
879d0c72 9522 }
94b03d7d 9523
858a358b
KW
9524 (*s)++;
9525 return TRUE;
94b03d7d 9526
3955e1a9
KW
9527 multiple_charsets:
9528 if (*charset != c) {
9529 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9530 }
ff3f26d2
KW
9531 else if (c == 'a') {
9532 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9533 }
3955e1a9
KW
9534 else {
9535 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9536 }
9537
9538 /* Pretend that it worked, so will continue processing before dieing */
9539 (*s)++;
9540 return TRUE;
879d0c72
NC
9541}
9542
76e3520e 9543STATIC char *
cea2e8a9 9544S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9545{
97aff369 9546 dVAR;
79072805 9547 PMOP *pm;
a1941760 9548 char *s;
10edeb5d 9549 const char * const valid_flags =
a20207d7 9550 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9551 char charset = '\0'; /* character set modifier */
5db06880
NC
9552#ifdef PERL_MAD
9553 char *modstart;
9554#endif
9555
7918f24d 9556 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9557
a1941760
DM
9558 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9559 TRUE /* look for escaped bracketed metas */ );
9560
25c09cbf 9561 if (!s) {
6136c704 9562 const char * const delimiter = skipspace(start);
10edeb5d
JH
9563 Perl_croak(aTHX_
9564 (const char *)
9565 (*delimiter == '?'
9566 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9567 : "Search pattern not terminated" ));
25c09cbf 9568 }
bbce6d69 9569
8782bef2 9570 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9571 if (PL_multi_open == '?') {
9572 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9573 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9574
9575 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9576 allows us to restrict the list needed by reset to just the ??
9577 matches. */
9578 assert(type != OP_TRANS);
9579 if (PL_curstash) {
daba3364 9580 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9581 U32 elements;
9582 if (!mg) {
daba3364 9583 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9584 0);
9585 }
9586 elements = mg->mg_len / sizeof(PMOP**);
9587 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9588 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9589 mg->mg_len = elements * sizeof(PMOP**);
9590 PmopSTASH_set(pm,PL_curstash);
9591 }
9592 }
5db06880
NC
9593#ifdef PERL_MAD
9594 modstart = s;
9595#endif
d63c20f2
DM
9596
9597 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9598 * anon CV. False positives like qr/[(?{]/ are harmless */
9599
9600 if (type == OP_QR) {
6f635923
DM
9601 STRLEN len;
9602 char *e, *p = SvPV(PL_lex_stuff, len);
9603 e = p + len;
9604 for (; p < e; p++) {
d63c20f2
DM
9605 if (p[0] == '(' && p[1] == '?'
9606 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9607 {
9608 pm->op_pmflags |= PMf_HAS_CV;
9609 break;
9610 }
9611 }
6f635923 9612 pm->op_pmflags |= PMf_IS_QR;
d63c20f2
DM
9613 }
9614
3955e1a9 9615 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9616#ifdef PERL_MAD
9617 if (PL_madskills && modstart != s) {
9618 SV* tmptoken = newSVpvn(modstart, s - modstart);
9619 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9620 }
9621#endif
4ac733c9 9622 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9623 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9624 {
a2a5de95
NC
9625 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9626 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9627 }
9628
3280af22 9629 PL_lex_op = (OP*)pm;
6154021b 9630 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9631 return s;
9632}
9633
76e3520e 9634STATIC char *
cea2e8a9 9635S_scan_subst(pTHX_ char *start)
79072805 9636{
27da23d5 9637 dVAR;
22594288 9638 char *s;
eb578fdb 9639 PMOP *pm;
4fdae800 9640 I32 first_start;
79072805 9641 I32 es = 0;
3955e1a9 9642 char charset = '\0'; /* character set modifier */
5db06880
NC
9643#ifdef PERL_MAD
9644 char *modstart;
9645#endif
79072805 9646
7918f24d
NC
9647 PERL_ARGS_ASSERT_SCAN_SUBST;
9648
6154021b 9649 pl_yylval.ival = OP_NULL;
79072805 9650
4d68ffa0
KW
9651 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9652 TRUE /* look for escaped bracketed metas */ );
79072805 9653
37fd879b 9654 if (!s)
cea2e8a9 9655 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9656
3280af22 9657 if (s[-1] == PL_multi_open)
79072805 9658 s--;
5db06880
NC
9659#ifdef PERL_MAD
9660 if (PL_madskills) {
cd81e915
NC
9661 CURMAD('q', PL_thisopen);
9662 CURMAD('_', PL_thiswhite);
9663 CURMAD('E', PL_thisstuff);
9664 CURMAD('Q', PL_thisclose);
9665 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9666 }
9667#endif
79072805 9668
3280af22 9669 first_start = PL_multi_start;
4d68ffa0 9670 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 9671 if (!s) {
37fd879b 9672 if (PL_lex_stuff) {
3280af22 9673 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9674 PL_lex_stuff = NULL;
37fd879b 9675 }
cea2e8a9 9676 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9677 }
3280af22 9678 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9679
79072805 9680 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9681
9682#ifdef PERL_MAD
9683 if (PL_madskills) {
cd81e915
NC
9684 CURMAD('z', PL_thisopen);
9685 CURMAD('R', PL_thisstuff);
9686 CURMAD('Z', PL_thisclose);
5db06880
NC
9687 }
9688 modstart = s;
9689#endif
9690
48c036b1 9691 while (*s) {
a20207d7 9692 if (*s == EXEC_PAT_MOD) {
a687059c 9693 s++;
2f3197b3 9694 es++;
a687059c 9695 }
3955e1a9
KW
9696 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9697 {
48c036b1 9698 break;
aa78b661 9699 }
378cc40b 9700 }
79072805 9701
5db06880
NC
9702#ifdef PERL_MAD
9703 if (PL_madskills) {
9704 if (modstart != s)
9705 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9706 append_madprops(PL_thismad, (OP*)pm, 0);
9707 PL_thismad = 0;
5db06880
NC
9708 }
9709#endif
a2a5de95
NC
9710 if ((pm->op_pmflags & PMf_CONTINUE)) {
9711 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9712 }
9713
79072805 9714 if (es) {
6136c704
AL
9715 SV * const repl = newSVpvs("");
9716
0244c3a4 9717 PL_multi_end = 0;
79072805 9718 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9719 while (es-- > 0) {
9720 if (es)
9721 sv_catpvs(repl, "eval ");
9722 else
9723 sv_catpvs(repl, "do ");
9724 }
6f43d98f 9725 sv_catpvs(repl, "{");
7cc34111 9726 sv_catsv(repl, PL_sublex_info.repl);
9badc361 9727 sv_catpvs(repl, "}");
25da4f38 9728 SvEVALED_on(repl);
7cc34111
FC
9729 SvREFCNT_dec(PL_sublex_info.repl);
9730 PL_sublex_info.repl = repl;
378cc40b 9731 }
79072805 9732
3280af22 9733 PL_lex_op = (OP*)pm;
6154021b 9734 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9735 return s;
9736}
9737
76e3520e 9738STATIC char *
cea2e8a9 9739S_scan_trans(pTHX_ char *start)
378cc40b 9740{
97aff369 9741 dVAR;
eb578fdb 9742 char* s;
11343788 9743 OP *o;
b84c11c8
NC
9744 U8 squash;
9745 U8 del;
9746 U8 complement;
bb16bae8 9747 bool nondestruct = 0;
5db06880
NC
9748#ifdef PERL_MAD
9749 char *modstart;
9750#endif
79072805 9751
7918f24d
NC
9752 PERL_ARGS_ASSERT_SCAN_TRANS;
9753
6154021b 9754 pl_yylval.ival = OP_NULL;
79072805 9755
4d68ffa0 9756 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
37fd879b 9757 if (!s)
cea2e8a9 9758 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9759
3280af22 9760 if (s[-1] == PL_multi_open)
2f3197b3 9761 s--;
5db06880
NC
9762#ifdef PERL_MAD
9763 if (PL_madskills) {
cd81e915
NC
9764 CURMAD('q', PL_thisopen);
9765 CURMAD('_', PL_thiswhite);
9766 CURMAD('E', PL_thisstuff);
9767 CURMAD('Q', PL_thisclose);
9768 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9769 }
9770#endif
2f3197b3 9771
4d68ffa0 9772 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 9773 if (!s) {
37fd879b 9774 if (PL_lex_stuff) {
3280af22 9775 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9776 PL_lex_stuff = NULL;
37fd879b 9777 }
cea2e8a9 9778 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9779 }
5db06880 9780 if (PL_madskills) {
cd81e915
NC
9781 CURMAD('z', PL_thisopen);
9782 CURMAD('R', PL_thisstuff);
9783 CURMAD('Z', PL_thisclose);
5db06880 9784 }
79072805 9785
a0ed51b3 9786 complement = del = squash = 0;
5db06880
NC
9787#ifdef PERL_MAD
9788 modstart = s;
9789#endif
7a1e2023
NC
9790 while (1) {
9791 switch (*s) {
9792 case 'c':
79072805 9793 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9794 break;
9795 case 'd':
a0ed51b3 9796 del = OPpTRANS_DELETE;
7a1e2023
NC
9797 break;
9798 case 's':
79072805 9799 squash = OPpTRANS_SQUASH;
7a1e2023 9800 break;
bb16bae8
FC
9801 case 'r':
9802 nondestruct = 1;
9803 break;
7a1e2023
NC
9804 default:
9805 goto no_more;
9806 }
395c3793
LW
9807 s++;
9808 }
7a1e2023 9809 no_more:
8973db79 9810
9100eeb1 9811 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
59f00321
RGS
9812 o->op_private &= ~OPpTRANS_ALL;
9813 o->op_private |= del|squash|complement|
7948272d 9814 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7cc34111 9815 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
79072805 9816
3280af22 9817 PL_lex_op = o;
bb16bae8 9818 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9819
9820#ifdef PERL_MAD
9821 if (PL_madskills) {
9822 if (modstart != s)
9823 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9824 append_madprops(PL_thismad, o, 0);
9825 PL_thismad = 0;
5db06880
NC
9826 }
9827#endif
9828
79072805
LW
9829 return s;
9830}
9831
5097bf9b
FC
9832/* scan_heredoc
9833 Takes a pointer to the first < in <<FOO.
9834 Returns a pointer to the byte following <<FOO.
9835
9836 This function scans a heredoc, which involves different methods
9837 depending on whether we are in a string eval, quoted construct, etc.
9838 This is because PL_linestr could containing a single line of input, or
9839 a whole string being evalled, or the contents of the current quote-
9840 like operator.
9841
19bbc0d7
FC
9842 The two basic methods are:
9843 - Steal lines from the input stream
9844 - Scan the heredoc in PL_linestr and remove it therefrom
9845
9846 In a file scope or filtered eval, the first method is used; in a
9847 string eval, the second.
9848
9849 In a quote-like operator, we have to choose between the two,
9850 depending on where we can find a newline. We peek into outer lex-
9851 ing scopes until we find one with a newline in it. If we reach the
9852 outermost lexing scope and it is a file, we use the stream method.
9853 Otherwise it is treated as an eval.
5097bf9b
FC
9854*/
9855
76e3520e 9856STATIC char *
5aaab254 9857S_scan_heredoc(pTHX_ char *s)
79072805 9858{
97aff369 9859 dVAR;
79072805
LW
9860 I32 op_type = OP_SCALAR;
9861 I32 len;
9862 SV *tmpstr;
9863 char term;
eb578fdb
KW
9864 char *d;
9865 char *e;
4633a7c4 9866 char *peek;
5097bf9b 9867 const bool infile = PL_rsfp || PL_parser->filtered;
78a635de 9868 LEXSHARED *shared = PL_parser->lex_shared;
5db06880
NC
9869#ifdef PERL_MAD
9870 I32 stuffstart = s - SvPVX(PL_linestr);
9871 char *tstart;
9872
cd81e915 9873 PL_realtokenstart = -1;
5db06880 9874#endif
79072805 9875
7918f24d
NC
9876 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9877
79072805 9878 s += 2;
5097bf9b 9879 d = PL_tokenbuf + 1;
3280af22 9880 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
5097bf9b 9881 *PL_tokenbuf = '\n';
c35e046a
AL
9882 peek = s;
9883 while (SPACE_OR_TAB(*peek))
9884 peek++;
3792a11b 9885 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9886 s = peek;
79072805 9887 term = *s++;
3280af22 9888 s = delimcpy(d, e, s, PL_bufend, term, &len);
6f2d7fc9
FC
9889 if (s == PL_bufend)
9890 Perl_croak(aTHX_ "Unterminated delimiter for here document");
fc36a67e 9891 d += len;
6f2d7fc9 9892 s++;
79072805
LW
9893 }
9894 else {
9895 if (*s == '\\')
458391bd 9896 /* <<\FOO is equivalent to <<'FOO' */
79072805
LW
9897 s++, term = '\'';
9898 else
9899 term = '"';
8a2bca12 9900 if (!isWORDCHAR_lazy_if(s,UTF))
8ab8f082 9901 deprecate("bare << to mean <<\"\"");
8a2bca12 9902 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
fc36a67e 9903 if (d < e)
9904 *d++ = *s;
9905 }
9906 }
3280af22 9907 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9908 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9909 *d++ = '\n';
9910 *d = '\0';
3280af22 9911 len = d - PL_tokenbuf;
5db06880
NC
9912
9913#ifdef PERL_MAD
9914 if (PL_madskills) {
5097bf9b
FC
9915 tstart = PL_tokenbuf + 1;
9916 PL_thisclose = newSVpvn(tstart, len - 1);
5db06880 9917 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9918 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9919 stuffstart = s - SvPVX(PL_linestr);
9920 }
9921#endif
6a27c188 9922#ifndef PERL_STRICT_CR
f63a84b2
LW
9923 d = strchr(s, '\r');
9924 if (d) {
b464bac0 9925 char * const olds = s;
f63a84b2 9926 s = d;
3280af22 9927 while (s < PL_bufend) {
f63a84b2
LW
9928 if (*s == '\r') {
9929 *d++ = '\n';
9930 if (*++s == '\n')
9931 s++;
9932 }
9933 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9934 *d++ = *s++;
9935 s++;
9936 }
9937 else
9938 *d++ = *s++;
9939 }
9940 *d = '\0';
3280af22 9941 PL_bufend = d;
95a20fc0 9942 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9943 s = olds;
9944 }
9945#endif
5db06880
NC
9946#ifdef PERL_MAD
9947 if (PL_madskills) {
9948 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9949 if (PL_thisstuff)
9950 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9951 else
cd81e915 9952 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880 9953 }
748a9306 9954
5db06880 9955 stuffstart = s - SvPVX(PL_linestr);
5db06880
NC
9956#endif
9957
7d0a29fe
NC
9958 tmpstr = newSV_type(SVt_PVIV);
9959 SvGROW(tmpstr, 80);
748a9306 9960 if (term == '\'') {
79072805 9961 op_type = OP_CONST;
45977657 9962 SvIV_set(tmpstr, -1);
748a9306
LW
9963 }
9964 else if (term == '`') {
79072805 9965 op_type = OP_BACKTICK;
45977657 9966 SvIV_set(tmpstr, '\\');
748a9306 9967 }
79072805 9968
78a635de 9969 PL_multi_start = CopLINE(PL_curcop) + 1;
3280af22 9970 PL_multi_open = PL_multi_close = '<';
19bbc0d7 9971 /* inside a string eval or quote-like operator */
4efe39d2 9972 if (!infile || PL_lex_inwhat) {
60f40a38 9973 SV *linestr;
3526bd3e 9974 char *bufend;
074b1c59 9975 char * const olds = s;
d37427bc 9976 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
19bbc0d7
FC
9977 /* These two fields are not set until an inner lexing scope is
9978 entered. But we need them set here. */
4efe39d2
FC
9979 shared->ls_bufptr = s;
9980 shared->ls_linestr = PL_linestr;
9981 if (PL_lex_inwhat)
9982 /* Look for a newline. If the current buffer does not have one,
9983 peek into the line buffer of the parent lexing scope, going
9984 up as many levels as necessary to find one with a newline
9985 after bufptr.
9986 */
9987 while (!(s = (char *)memchr(
9988 (void *)shared->ls_bufptr, '\n',
9989 SvEND(shared->ls_linestr)-shared->ls_bufptr
9990 ))) {
60f40a38 9991 shared = shared->ls_prev;
f68f7dc1
FC
9992 /* shared is only null if we have gone beyond the outermost
9993 lexing scope. In a file, we will have broken out of the
9994 loop in the previous iteration. In an eval, the string buf-
9995 fer ends with "\n;", so the while condition below will have
9996 evaluated to false. So shared can never be null. */
9997 assert(shared);
60f40a38
FC
9998 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9999 most lexing scope. In a file, shared->ls_linestr at that
10000 level is just one line, so there is no body to steal. */
10001 if (infile && !shared->ls_prev) {
074b1c59 10002 s = olds;
99bd9d90
FC
10003 goto streaming;
10004 }
4efe39d2
FC
10005 }
10006 else { /* eval */
10007 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10008 assert(s);
10009 }
60f40a38
FC
10010 linestr = shared->ls_linestr;
10011 bufend = SvEND(linestr);
0244c3a4 10012 d = s;
6bf48f47 10013 while (s < bufend - len + 1 &&
621baac6 10014 memNE(s,PL_tokenbuf,len) ) {
0244c3a4 10015 if (*s++ == '\n')
78a635de 10016 ++shared->herelines;
0244c3a4 10017 }
6bf48f47 10018 if (s >= bufend - len + 1) {
932d0cf1 10019 goto interminable;
0244c3a4 10020 }
3328ab5a 10021 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
10022#ifdef PERL_MAD
10023 if (PL_madskills) {
cd81e915
NC
10024 if (PL_thisstuff)
10025 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 10026 else
cd81e915 10027 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
10028 stuffstart = s - SvPVX(PL_linestr);
10029 }
10030#endif
79072805 10031 s += len - 1;
d794b522 10032 /* the preceding stmt passes a newline */
78a635de 10033 shared->herelines++;
49d8d3a1 10034
db444266
FC
10035 /* s now points to the newline after the heredoc terminator.
10036 d points to the newline before the body of the heredoc.
10037 */
19bbc0d7
FC
10038
10039 /* We are going to modify linestr in place here, so set
10040 aside copies of the string if necessary for re-evals or
10041 (caller $n)[6]. */
a91428a4 10042 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
3328ab5a
FC
10043 check shared->re_eval_str. */
10044 if (shared->re_eval_start || shared->re_eval_str) {
db444266 10045 /* Set aside the rest of the regexp */
3328ab5a
FC
10046 if (!shared->re_eval_str)
10047 shared->re_eval_str =
10048 newSVpvn(shared->re_eval_start,
4efe39d2 10049 bufend - shared->re_eval_start);
3328ab5a 10050 shared->re_eval_start -= s-d;
db444266 10051 }
d4fe4ada
RU
10052 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10053 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10054 cx->blk_eval.cur_text == linestr)
10055 {
4efe39d2 10056 cx->blk_eval.cur_text = newSVsv(linestr);
d37427bc
FC
10057 SvSCREAM_on(cx->blk_eval.cur_text);
10058 }
db444266 10059 /* Copy everything from s onwards back to d. */
4efe39d2
FC
10060 Move(s,d,bufend-s + 1,char);
10061 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
19bbc0d7
FC
10062 /* Setting PL_bufend only applies when we have not dug deeper
10063 into other scopes, because sublex_done sets PL_bufend to
10064 SvEND(PL_linestr). */
4efe39d2 10065 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
db444266 10066 s = olds;
79072805
LW
10067 }
10068 else
a7922135 10069 {
3328ab5a 10070 SV *linestr_save;
a7922135
FC
10071 streaming:
10072 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10073 term = PL_tokenbuf[1];
10074 len--;
3328ab5a 10075 linestr_save = PL_linestr; /* must restore this afterwards */
074b1c59 10076 d = s; /* and this */
3328ab5a 10077 PL_linestr = newSVpvs("");
074b1c59
FC
10078 PL_bufend = SvPVX(PL_linestr);
10079 while (1) {
5db06880
NC
10080#ifdef PERL_MAD
10081 if (PL_madskills) {
10082 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10083 if (PL_thisstuff)
10084 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10085 else
cd81e915 10086 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10087 }
10088#endif
074b1c59 10089 PL_bufptr = PL_bufend;
d794b522 10090 CopLINE_set(PL_curcop,
78a635de 10091 PL_multi_start + shared->herelines);
112d1284
FC
10092 if (!lex_next_chunk(LEX_NO_TERM)
10093 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
3328ab5a 10094 SvREFCNT_dec(linestr_save);
932d0cf1 10095 goto interminable;
79072805 10096 }
78a635de 10097 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
112d1284
FC
10098 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10099 lex_grow_linestr(SvCUR(PL_linestr) + 2);
10100 sv_catpvs(PL_linestr, "\n\0");
10101 }
f0e67a1d 10102 s = PL_bufptr;
5db06880
NC
10103#ifdef PERL_MAD
10104 stuffstart = s - SvPVX(PL_linestr);
10105#endif
78a635de 10106 shared->herelines++;
bd61b366 10107 PL_last_lop = PL_last_uni = NULL;
6a27c188 10108#ifndef PERL_STRICT_CR
3280af22 10109 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
10110 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10111 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 10112 {
3280af22
NIS
10113 PL_bufend[-2] = '\n';
10114 PL_bufend--;
95a20fc0 10115 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 10116 }
3280af22
NIS
10117 else if (PL_bufend[-1] == '\r')
10118 PL_bufend[-1] = '\n';
f63a84b2 10119 }
3280af22
NIS
10120 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10121 PL_bufend[-1] = '\n';
f63a84b2 10122#endif
5097bf9b 10123 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
3328ab5a
FC
10124 SvREFCNT_dec(PL_linestr);
10125 PL_linestr = linestr_save;
10126 PL_linestart = SvPVX(linestr_save);
3280af22 10127 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3328ab5a 10128 s = d;
074b1c59 10129 break;
79072805
LW
10130 }
10131 else {
3280af22 10132 sv_catsv(tmpstr,PL_linestr);
395c3793 10133 }
a7922135 10134 }
395c3793 10135 }
57843af0 10136 PL_multi_end = CopLINE(PL_curcop);
79072805 10137 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 10138 SvPV_shrink_to_cur(tmpstr);
79072805 10139 }
2f31ce75 10140 if (!IN_BYTES) {
95a20fc0 10141 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
10142 SvUTF8_on(tmpstr);
10143 else if (PL_encoding)
10144 sv_recode_to_utf8(tmpstr, PL_encoding);
10145 }
3280af22 10146 PL_lex_stuff = tmpstr;
6154021b 10147 pl_yylval.ival = op_type;
79072805 10148 return s;
932d0cf1
FC
10149
10150 interminable:
932d0cf1
FC
10151 SvREFCNT_dec(tmpstr);
10152 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10153 missingterm(PL_tokenbuf + 1);
79072805
LW
10154}
10155
02aa26ce
NT
10156/* scan_inputsymbol
10157 takes: current position in input buffer
10158 returns: new position in input buffer
6154021b 10159 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
10160
10161 This code handles:
10162
10163 <> read from ARGV
10164 <FH> read from filehandle
10165 <pkg::FH> read from package qualified filehandle
10166 <pkg'FH> read from package qualified filehandle
10167 <$fh> read from filehandle in $fh
10168 <*.h> filename glob
10169
10170*/
10171
76e3520e 10172STATIC char *
cea2e8a9 10173S_scan_inputsymbol(pTHX_ char *start)
79072805 10174{
97aff369 10175 dVAR;
eb578fdb 10176 char *s = start; /* current position in buffer */
1b420867 10177 char *end;
79072805 10178 I32 len;
6136c704
AL
10179 char *d = PL_tokenbuf; /* start of temp holding space */
10180 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10181
7918f24d
NC
10182 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10183
1b420867
GS
10184 end = strchr(s, '\n');
10185 if (!end)
10186 end = PL_bufend;
10187 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
10188
10189 /* die if we didn't have space for the contents of the <>,
1b420867 10190 or if it didn't end, or if we see a newline
02aa26ce
NT
10191 */
10192
bb7a0f54 10193 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 10194 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 10195 if (s >= end)
cea2e8a9 10196 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 10197
fc36a67e 10198 s++;
02aa26ce
NT
10199
10200 /* check for <$fh>
10201 Remember, only scalar variables are interpreted as filehandles by
10202 this code. Anything more complex (e.g., <$fh{$num}>) will be
10203 treated as a glob() call.
10204 This code makes use of the fact that except for the $ at the front,
10205 a scalar variable and a filehandle look the same.
10206 */
4633a7c4 10207 if (*d == '$' && d[1]) d++;
02aa26ce
NT
10208
10209 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
8a2bca12 10210 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 10211 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
10212
10213 /* If we've tried to read what we allow filehandles to look like, and
10214 there's still text left, then it must be a glob() and not a getline.
10215 Use scan_str to pull out the stuff between the <> and treat it
10216 as nothing more than a string.
10217 */
10218
3280af22 10219 if (d - PL_tokenbuf != len) {
6154021b 10220 pl_yylval.ival = OP_GLOB;
4d68ffa0 10221 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 10222 if (!s)
cea2e8a9 10223 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
10224 return s;
10225 }
395c3793 10226 else {
9b3023bc 10227 bool readline_overriden = FALSE;
6136c704 10228 GV *gv_readline;
9b3023bc 10229 GV **gvp;
02aa26ce 10230 /* we're in a filehandle read situation */
3280af22 10231 d = PL_tokenbuf;
02aa26ce
NT
10232
10233 /* turn <> into <ARGV> */
79072805 10234 if (!len)
689badd5 10235 Copy("ARGV",d,5,char);
02aa26ce 10236
9b3023bc 10237 /* Check whether readline() is overriden */
fafc274c 10238 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 10239 if ((gv_readline
ba979b31 10240 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 10241 ||
017a3ce5 10242 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 10243 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 10244 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
10245 readline_overriden = TRUE;
10246
02aa26ce
NT
10247 /* if <$fh>, create the ops to turn the variable into a
10248 filehandle
10249 */
79072805 10250 if (*d == '$') {
02aa26ce
NT
10251 /* try to find it in the pad for this block, otherwise find
10252 add symbol table ops
10253 */
bc9b26ca 10254 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 10255 if (tmp != NOT_IN_PAD) {
00b1698f 10256 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
10257 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10258 HEK * const stashname = HvNAME_HEK(stash);
10259 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 10260 sv_catpvs(sym, "::");
f558d5af
JH
10261 sv_catpv(sym, d+1);
10262 d = SvPVX(sym);
10263 goto intro_sym;
10264 }
10265 else {
6136c704 10266 OP * const o = newOP(OP_PADSV, 0);
f558d5af 10267 o->op_targ = tmp;
9b3023bc
RGS
10268 PL_lex_op = readline_overriden
10269 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10270 op_append_elem(OP_LIST, o,
9b3023bc
RGS
10271 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10272 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 10273 }
a0d0e21e
LW
10274 }
10275 else {
f558d5af
JH
10276 GV *gv;
10277 ++d;
10278intro_sym:
10279 gv = gv_fetchpv(d,
10280 (PL_in_eval
10281 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 10282 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 10283 SVt_PV);
9b3023bc
RGS
10284 PL_lex_op = readline_overriden
10285 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10286 op_append_elem(OP_LIST,
9b3023bc
RGS
10287 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10288 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10289 : (OP*)newUNOP(OP_READLINE, 0,
10290 newUNOP(OP_RV2SV, 0,
10291 newGVOP(OP_GV, 0, gv)));
a0d0e21e 10292 }
7c6fadd6
RGS
10293 if (!readline_overriden)
10294 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
10295 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10296 pl_yylval.ival = OP_NULL;
79072805 10297 }
02aa26ce
NT
10298
10299 /* If it's none of the above, it must be a literal filehandle
10300 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 10301 else {
25db2ea6 10302 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
10303 PL_lex_op = readline_overriden
10304 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10305 op_append_elem(OP_LIST,
9b3023bc
RGS
10306 newGVOP(OP_GV, 0, gv),
10307 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10308 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 10309 pl_yylval.ival = OP_NULL;
79072805
LW
10310 }
10311 }
02aa26ce 10312
79072805
LW
10313 return s;
10314}
10315
02aa26ce
NT
10316
10317/* scan_str
10318 takes: start position in buffer
09bef843
SB
10319 keep_quoted preserve \ on the embedded delimiter(s)
10320 keep_delims preserve the delimiters around the string
d24ca0c5
DM
10321 re_reparse compiling a run-time /(?{})/:
10322 collapse // to /, and skip encoding src
02aa26ce
NT
10323 returns: position to continue reading from buffer
10324 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10325 updates the read buffer.
10326
10327 This subroutine pulls a string out of the input. It is called for:
10328 q single quotes q(literal text)
10329 ' single quotes 'literal text'
10330 qq double quotes qq(interpolate $here please)
10331 " double quotes "interpolate $here please"
10332 qx backticks qx(/bin/ls -l)
10333 ` backticks `/bin/ls -l`
10334 qw quote words @EXPORT_OK = qw( func() $spam )
10335 m// regexp match m/this/
10336 s/// regexp substitute s/this/that/
10337 tr/// string transliterate tr/this/that/
10338 y/// string transliterate y/this/that/
10339 ($*@) sub prototypes sub foo ($)
09bef843 10340 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
10341 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10342
10343 In most of these cases (all but <>, patterns and transliterate)
10344 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10345 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10346 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10347 calls scan_str().
4e553d73 10348
02aa26ce
NT
10349 It skips whitespace before the string starts, and treats the first
10350 character as the delimiter. If the delimiter is one of ([{< then
10351 the corresponding "close" character )]}> is used as the closing
10352 delimiter. It allows quoting of delimiters, and if the string has
10353 balanced delimiters ([{<>}]) it allows nesting.
10354
37fd879b
HS
10355 On success, the SV with the resulting string is put into lex_stuff or,
10356 if that is already non-NULL, into lex_repl. The second case occurs only
10357 when parsing the RHS of the special constructs s/// and tr/// (y///).
10358 For convenience, the terminating delimiter character is stuffed into
10359 SvIVX of the SV.
02aa26ce
NT
10360*/
10361
76e3520e 10362STATIC char *
4d68ffa0
KW
10363S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10364 bool deprecate_escaped_meta /* Should we issue a deprecation warning
10365 for certain paired metacharacters that
10366 appear escaped within it */
10367 )
79072805 10368{
97aff369 10369 dVAR;
a8d9c7ae
KW
10370 SV *sv; /* scalar value: string */
10371 const char *tmps; /* temp string, used for delimiter matching */
eb578fdb
KW
10372 char *s = start; /* current position in the buffer */
10373 char term; /* terminating character */
10374 char *to; /* current position in the sv's data */
a8d9c7ae
KW
10375 I32 brackets = 1; /* bracket nesting level */
10376 bool has_utf8 = FALSE; /* is there any utf8 content? */
10377 I32 termcode; /* terminating char. code */
10378 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10379 STRLEN termlen; /* length of terminating string */
10380 int last_off = 0; /* last position for nesting bracket */
4d68ffa0 10381 char *escaped_open = NULL;
5db06880
NC
10382#ifdef PERL_MAD
10383 int stuffstart;
10384 char *tstart;
10385#endif
02aa26ce 10386
7918f24d
NC
10387 PERL_ARGS_ASSERT_SCAN_STR;
10388
02aa26ce 10389 /* skip space before the delimiter */
29595ff2
NC
10390 if (isSPACE(*s)) {
10391 s = PEEKSPACE(s);
10392 }
02aa26ce 10393
5db06880 10394#ifdef PERL_MAD
cd81e915
NC
10395 if (PL_realtokenstart >= 0) {
10396 stuffstart = PL_realtokenstart;
10397 PL_realtokenstart = -1;
5db06880
NC
10398 }
10399 else
10400 stuffstart = start - SvPVX(PL_linestr);
10401#endif
02aa26ce 10402 /* mark where we are, in case we need to report errors */
79072805 10403 CLINE;
02aa26ce
NT
10404
10405 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 10406 term = *s;
220e2d4e
IH
10407 if (!UTF) {
10408 termcode = termstr[0] = term;
10409 termlen = 1;
10410 }
10411 else {
4b88fb76 10412 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
220e2d4e
IH
10413 Copy(s, termstr, termlen, U8);
10414 if (!UTF8_IS_INVARIANT(term))
10415 has_utf8 = TRUE;
10416 }
b1c7b182 10417
02aa26ce 10418 /* mark where we are */
57843af0 10419 PL_multi_start = CopLINE(PL_curcop);
3280af22 10420 PL_multi_open = term;
02aa26ce
NT
10421
10422 /* find corresponding closing delimiter */
93a17b20 10423 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
10424 termcode = termstr[0] = term = tmps[5];
10425
3280af22 10426 PL_multi_close = term;
79072805 10427
4d68ffa0
KW
10428 /* A warning is raised if the input parameter requires it for escaped (by a
10429 * backslash) paired metacharacters {} [] and () when the delimiters are
10430 * those same characters, and the backslash is ineffective. This doesn't
10431 * happen for <>, as they aren't metas. */
10432 if (deprecate_escaped_meta
10433 && (PL_multi_open == PL_multi_close
10434 || ! ckWARN_d(WARN_DEPRECATED)
10435 || PL_multi_open == '<'))
10436 {
10437 deprecate_escaped_meta = FALSE;
10438 }
10439
561b68a9
SH
10440 /* create a new SV to hold the contents. 79 is the SV's initial length.
10441 What a random number. */
7d0a29fe
NC
10442 sv = newSV_type(SVt_PVIV);
10443 SvGROW(sv, 80);
45977657 10444 SvIV_set(sv, termcode);
a0d0e21e 10445 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
10446
10447 /* move past delimiter and try to read a complete string */
09bef843 10448 if (keep_delims)
220e2d4e
IH
10449 sv_catpvn(sv, s, termlen);
10450 s += termlen;
5db06880
NC
10451#ifdef PERL_MAD
10452 tstart = SvPVX(PL_linestr) + stuffstart;
1cac5c33 10453 if (PL_madskills && !PL_thisopen && !keep_delims) {
cd81e915 10454 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
10455 stuffstart = s - SvPVX(PL_linestr);
10456 }
10457#endif
93a17b20 10458 for (;;) {
d24ca0c5 10459 if (PL_encoding && !UTF && !re_reparse) {
220e2d4e
IH
10460 bool cont = TRUE;
10461
10462 while (cont) {
95a20fc0 10463 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 10464 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 10465 &offset, (char*)termstr, termlen);
6136c704
AL
10466 const char * const ns = SvPVX_const(PL_linestr) + offset;
10467 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
10468
10469 for (; s < ns; s++) {
60d63348 10470 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10471 COPLINE_INC_WITH_HERELINES;
220e2d4e
IH
10472 }
10473 if (!found)
10474 goto read_more_line;
10475 else {
10476 /* handle quoted delimiters */
52327caf 10477 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 10478 const char *t;
95a20fc0 10479 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
10480 t--;
10481 if ((svlast-1 - t) % 2) {
10482 if (!keep_quoted) {
10483 *(svlast-1) = term;
10484 *svlast = '\0';
10485 SvCUR_set(sv, SvCUR(sv) - 1);
10486 }
10487 continue;
10488 }
10489 }
10490 if (PL_multi_open == PL_multi_close) {
10491 cont = FALSE;
10492 }
10493 else {
f54cb97a
AL
10494 const char *t;
10495 char *w;
0331ef07 10496 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
10497 /* At here, all closes are "was quoted" one,
10498 so we don't check PL_multi_close. */
10499 if (*t == '\\') {
10500 if (!keep_quoted && *(t+1) == PL_multi_open)
10501 t++;
10502 else
10503 *w++ = *t++;
10504 }
10505 else if (*t == PL_multi_open)
10506 brackets++;
10507
10508 *w = *t;
10509 }
10510 if (w < t) {
10511 *w++ = term;
10512 *w = '\0';
95a20fc0 10513 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 10514 }
0331ef07 10515 last_off = w - SvPVX(sv);
220e2d4e
IH
10516 if (--brackets <= 0)
10517 cont = FALSE;
10518 }
10519 }
10520 }
10521 if (!keep_delims) {
10522 SvCUR_set(sv, SvCUR(sv) - 1);
10523 *SvEND(sv) = '\0';
10524 }
10525 break;
10526 }
10527
02aa26ce 10528 /* extend sv if need be */
3280af22 10529 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 10530 /* set 'to' to the next character in the sv's string */
463ee0b2 10531 to = SvPVX(sv)+SvCUR(sv);
09bef843 10532
02aa26ce 10533 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
10534 if (PL_multi_open == PL_multi_close) {
10535 for (; s < PL_bufend; s++,to++) {
02aa26ce 10536 /* embedded newlines increment the current line number */
60d63348 10537 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10538 COPLINE_INC_WITH_HERELINES;
02aa26ce 10539 /* handle quoted delimiters */
3280af22 10540 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
d24ca0c5
DM
10541 if (!keep_quoted
10542 && (s[1] == term
10543 || (re_reparse && s[1] == '\\'))
10544 )
a0d0e21e 10545 s++;
d24ca0c5 10546 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10547 else
10548 *to++ = *s++;
10549 }
02aa26ce
NT
10550 /* terminate when run out of buffer (the for() condition), or
10551 have found the terminator */
220e2d4e
IH
10552 else if (*s == term) {
10553 if (termlen == 1)
10554 break;
f3b9ce0f 10555 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10556 break;
10557 }
63cd0674 10558 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10559 has_utf8 = TRUE;
93a17b20
LW
10560 *to = *s;
10561 }
10562 }
02aa26ce
NT
10563
10564 /* if the terminator isn't the same as the start character (e.g.,
10565 matched brackets), we have to allow more in the quoting, and
10566 be prepared for nested brackets.
10567 */
93a17b20 10568 else {
02aa26ce 10569 /* read until we run out of string, or we find the terminator */
3280af22 10570 for (; s < PL_bufend; s++,to++) {
02aa26ce 10571 /* embedded newlines increment the line count */
60d63348 10572 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10573 COPLINE_INC_WITH_HERELINES;
02aa26ce 10574 /* backslashes can escape the open or closing characters */
3280af22 10575 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10576 if (!keep_quoted &&
10577 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
4d68ffa0 10578 {
a0d0e21e 10579 s++;
4d68ffa0
KW
10580
10581 /* Here, 'deprecate_escaped_meta' is true iff the
10582 * delimiters are paired metacharacters, and 's' points
10583 * to an occurrence of one of them within the string,
10584 * which was preceded by a backslash. If this is a
10585 * context where the delimiter is also a metacharacter,
10586 * the backslash is useless, and deprecated. () and []
10587 * are meta in any context. {} are meta only when
10588 * appearing in a quantifier or in things like '\p{'.
10589 * They also aren't meta unless there is a matching
10590 * closed, escaped char later on within the string.
10591 * If 's' points to an open, set a flag; if to a close,
10592 * test that flag, and raise a warning if it was set */
10593
10594 if (deprecate_escaped_meta) {
10595 if (*s == PL_multi_open) {
10596 if (*s != '{') {
10597 escaped_open = s;
10598 }
10599 else if (regcurly(s,
10600 TRUE /* Look for a closing
10601 '\}' */)
10602 || (s - start > 2 /* Look for e.g.
10603 '\x{' */
10604 && _generic_isCC(*(s-2), _CC_BACKSLASH_FOO_LBRACE_IS_META)))
10605 {
10606 escaped_open = s;
10607 }
10608 }
10609 else if (escaped_open) {
10610 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10611 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10612 escaped_open = NULL;
10613 }
10614 }
10615 }
a0d0e21e
LW
10616 else
10617 *to++ = *s++;
10618 }
02aa26ce 10619 /* allow nested opens and closes */
3280af22 10620 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10621 break;
3280af22 10622 else if (*s == PL_multi_open)
93a17b20 10623 brackets++;
63cd0674 10624 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10625 has_utf8 = TRUE;
93a17b20
LW
10626 *to = *s;
10627 }
10628 }
02aa26ce 10629 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10630 *to = '\0';
95a20fc0 10631 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10632
02aa26ce
NT
10633 /*
10634 * this next chunk reads more into the buffer if we're not done yet
10635 */
10636
b1c7b182
GS
10637 if (s < PL_bufend)
10638 break; /* handle case where we are done yet :-) */
79072805 10639
6a27c188 10640#ifndef PERL_STRICT_CR
95a20fc0 10641 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10642 if ((to[-2] == '\r' && to[-1] == '\n') ||
10643 (to[-2] == '\n' && to[-1] == '\r'))
10644 {
f63a84b2
LW
10645 to[-2] = '\n';
10646 to--;
95a20fc0 10647 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10648 }
10649 else if (to[-1] == '\r')
10650 to[-1] = '\n';
10651 }
95a20fc0 10652 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10653 to[-1] = '\n';
10654#endif
10655
220e2d4e 10656 read_more_line:
02aa26ce
NT
10657 /* if we're out of file, or a read fails, bail and reset the current
10658 line marker so we can report where the unterminated string began
10659 */
5db06880
NC
10660#ifdef PERL_MAD
10661 if (PL_madskills) {
c35e046a 10662 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10663 if (PL_thisstuff)
10664 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10665 else
cd81e915 10666 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10667 }
10668#endif
83944c01 10669 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
10670 PL_bufptr = PL_bufend;
10671 if (!lex_next_chunk(0)) {
c07a80fd 10672 sv_free(sv);
eb160463 10673 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10674 return NULL;
79072805 10675 }
f0e67a1d 10676 s = PL_bufptr;
5db06880
NC
10677#ifdef PERL_MAD
10678 stuffstart = 0;
10679#endif
378cc40b 10680 }
4e553d73 10681
02aa26ce
NT
10682 /* at this point, we have successfully read the delimited string */
10683
d24ca0c5 10684 if (!PL_encoding || UTF || re_reparse) {
5db06880
NC
10685#ifdef PERL_MAD
10686 if (PL_madskills) {
c35e046a 10687 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10688 const int len = s - tstart;
cd81e915 10689 if (PL_thisstuff)
c35e046a 10690 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10691 else
c35e046a 10692 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10693 if (!PL_thisclose && !keep_delims)
10694 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10695 }
10696#endif
10697
220e2d4e
IH
10698 if (keep_delims)
10699 sv_catpvn(sv, s, termlen);
10700 s += termlen;
10701 }
5db06880
NC
10702#ifdef PERL_MAD
10703 else {
10704 if (PL_madskills) {
c35e046a
AL
10705 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10706 const int len = s - tstart - termlen;
cd81e915 10707 if (PL_thisstuff)
c35e046a 10708 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10709 else
c35e046a 10710 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10711 if (!PL_thisclose && !keep_delims)
10712 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10713 }
10714 }
10715#endif
d24ca0c5 10716 if (has_utf8 || (PL_encoding && !re_reparse))
b1c7b182 10717 SvUTF8_on(sv);
d0063567 10718
57843af0 10719 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10720
10721 /* if we allocated too much space, give some back */
93a17b20
LW
10722 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10723 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10724 SvPV_renew(sv, SvLEN(sv));
79072805 10725 }
02aa26ce
NT
10726
10727 /* decide whether this is the first or second quoted string we've read
10728 for this op
10729 */
4e553d73 10730
3280af22 10731 if (PL_lex_stuff)
7cc34111 10732 PL_sublex_info.repl = sv;
79072805 10733 else
3280af22 10734 PL_lex_stuff = sv;
378cc40b
LW
10735 return s;
10736}
10737
02aa26ce
NT
10738/*
10739 scan_num
10740 takes: pointer to position in buffer
10741 returns: pointer to new position in buffer
6154021b 10742 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10743
10744 Read a number in any of the formats that Perl accepts:
10745
7fd134d9
JH
10746 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10747 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10748 0b[01](_?[01])*
10749 0[0-7](_?[0-7])*
10750 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10751
3280af22 10752 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10753 thing it reads.
10754
10755 If it reads a number without a decimal point or an exponent, it will
10756 try converting the number to an integer and see if it can do so
10757 without loss of precision.
10758*/
4e553d73 10759
378cc40b 10760char *
bfed75c6 10761Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10762{
97aff369 10763 dVAR;
eb578fdb
KW
10764 const char *s = start; /* current position in buffer */
10765 char *d; /* destination in temp buffer */
10766 char *e; /* end of temp buffer */
86554af2 10767 NV nv; /* number read, as a double */
a0714e2c 10768 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10769 bool floatit; /* boolean: int or float? */
cbbf8932 10770 const char *lastub = NULL; /* position of last underbar */
a1894d81 10771 static const char* const number_too_long = "Number too long";
378cc40b 10772
7918f24d
NC
10773 PERL_ARGS_ASSERT_SCAN_NUM;
10774
02aa26ce
NT
10775 /* We use the first character to decide what type of number this is */
10776
378cc40b 10777 switch (*s) {
79072805 10778 default:
5637ef5b 10779 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10780
02aa26ce 10781 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10782 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10783 case '0':
10784 {
02aa26ce
NT
10785 /* variables:
10786 u holds the "number so far"
4f19785b
WSI
10787 shift the power of 2 of the base
10788 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10789 overflowed was the number more than we can hold?
10790
10791 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10792 we in octal/hex/binary?" indicator to disallow hex characters
10793 when in octal mode.
02aa26ce 10794 */
9e24b6e2
JH
10795 NV n = 0.0;
10796 UV u = 0;
79072805 10797 I32 shift;
9e24b6e2 10798 bool overflowed = FALSE;
61f33854 10799 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10800 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10801 static const char* const bases[5] =
10802 { "", "binary", "", "octal", "hexadecimal" };
10803 static const char* const Bases[5] =
10804 { "", "Binary", "", "Octal", "Hexadecimal" };
10805 static const char* const maxima[5] =
10806 { "",
10807 "0b11111111111111111111111111111111",
10808 "",
10809 "037777777777",
10810 "0xffffffff" };
bfed75c6 10811 const char *base, *Base, *max;
378cc40b 10812
02aa26ce 10813 /* check for hex */
a674e8db 10814 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10815 shift = 4;
10816 s += 2;
61f33854 10817 just_zero = FALSE;
a674e8db 10818 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10819 shift = 1;
10820 s += 2;
61f33854 10821 just_zero = FALSE;
378cc40b 10822 }
02aa26ce 10823 /* check for a decimal in disguise */
b78218b7 10824 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10825 goto decimal;
02aa26ce 10826 /* so it must be octal */
928753ea 10827 else {
378cc40b 10828 shift = 3;
928753ea
JH
10829 s++;
10830 }
10831
10832 if (*s == '_') {
a2a5de95 10833 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10834 "Misplaced _ in number");
10835 lastub = s++;
10836 }
9e24b6e2
JH
10837
10838 base = bases[shift];
10839 Base = Bases[shift];
10840 max = maxima[shift];
02aa26ce 10841
4f19785b 10842 /* read the rest of the number */
378cc40b 10843 for (;;) {
9e24b6e2 10844 /* x is used in the overflow test,
893fe2c2 10845 b is the digit we're adding on. */
9e24b6e2 10846 UV x, b;
55497cff 10847
378cc40b 10848 switch (*s) {
02aa26ce
NT
10849
10850 /* if we don't mention it, we're done */
378cc40b
LW
10851 default:
10852 goto out;
02aa26ce 10853
928753ea 10854 /* _ are ignored -- but warned about if consecutive */
de3bb511 10855 case '_':
a2a5de95
NC
10856 if (lastub && s == lastub + 1)
10857 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10858 "Misplaced _ in number");
928753ea 10859 lastub = s++;
de3bb511 10860 break;
02aa26ce
NT
10861
10862 /* 8 and 9 are not octal */
378cc40b 10863 case '8': case '9':
4f19785b 10864 if (shift == 3)
cea2e8a9 10865 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10866 /* FALL THROUGH */
02aa26ce
NT
10867
10868 /* octal digits */
4f19785b 10869 case '2': case '3': case '4':
378cc40b 10870 case '5': case '6': case '7':
4f19785b 10871 if (shift == 1)
cea2e8a9 10872 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10873 /* FALL THROUGH */
10874
10875 case '0': case '1':
02aa26ce 10876 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10877 goto digit;
02aa26ce
NT
10878
10879 /* hex digits */
378cc40b
LW
10880 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10881 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10882 /* make sure they said 0x */
378cc40b
LW
10883 if (shift != 4)
10884 goto out;
55497cff 10885 b = (*s++ & 7) + 9;
02aa26ce
NT
10886
10887 /* Prepare to put the digit we have onto the end
10888 of the number so far. We check for overflows.
10889 */
10890
55497cff 10891 digit:
61f33854 10892 just_zero = FALSE;
9e24b6e2
JH
10893 if (!overflowed) {
10894 x = u << shift; /* make room for the digit */
10895
10896 if ((x >> shift) != u
10897 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10898 overflowed = TRUE;
10899 n = (NV) u;
9b387841
NC
10900 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10901 "Integer overflow in %s number",
10902 base);
9e24b6e2
JH
10903 } else
10904 u = x | b; /* add the digit to the end */
10905 }
10906 if (overflowed) {
10907 n *= nvshift[shift];
10908 /* If an NV has not enough bits in its
10909 * mantissa to represent an UV this summing of
10910 * small low-order numbers is a waste of time
10911 * (because the NV cannot preserve the
10912 * low-order bits anyway): we could just
10913 * remember when did we overflow and in the
10914 * end just multiply n by the right
10915 * amount. */
10916 n += (NV) b;
55497cff 10917 }
378cc40b
LW
10918 break;
10919 }
10920 }
02aa26ce
NT
10921
10922 /* if we get here, we had success: make a scalar value from
10923 the number.
10924 */
378cc40b 10925 out:
928753ea
JH
10926
10927 /* final misplaced underbar check */
10928 if (s[-1] == '_') {
a2a5de95 10929 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10930 }
10931
9e24b6e2 10932 if (overflowed) {
a2a5de95
NC
10933 if (n > 4294967295.0)
10934 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10935 "%s number > %s non-portable",
10936 Base, max);
b081dd7e 10937 sv = newSVnv(n);
9e24b6e2
JH
10938 }
10939 else {
15041a67 10940#if UVSIZE > 4
a2a5de95
NC
10941 if (u > 0xffffffff)
10942 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10943 "%s number > %s non-portable",
10944 Base, max);
2cc4c2dc 10945#endif
b081dd7e 10946 sv = newSVuv(u);
9e24b6e2 10947 }
61f33854 10948 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10949 sv = new_constant(start, s - start, "integer",
eb0d8d16 10950 sv, NULL, NULL, 0);
61f33854 10951 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10952 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10953 }
10954 break;
02aa26ce
NT
10955
10956 /*
10957 handle decimal numbers.
10958 we're also sent here when we read a 0 as the first digit
10959 */
378cc40b
LW
10960 case '1': case '2': case '3': case '4': case '5':
10961 case '6': case '7': case '8': case '9': case '.':
10962 decimal:
3280af22
NIS
10963 d = PL_tokenbuf;
10964 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10965 floatit = FALSE;
02aa26ce
NT
10966
10967 /* read next group of digits and _ and copy into d */
de3bb511 10968 while (isDIGIT(*s) || *s == '_') {
4e553d73 10969 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10970 if -w is on
10971 */
93a17b20 10972 if (*s == '_') {
a2a5de95
NC
10973 if (lastub && s == lastub + 1)
10974 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10975 "Misplaced _ in number");
928753ea 10976 lastub = s++;
93a17b20 10977 }
fc36a67e 10978 else {
02aa26ce 10979 /* check for end of fixed-length buffer */
fc36a67e 10980 if (d >= e)
e5cc0c0f 10981 Perl_croak(aTHX_ "%s", number_too_long);
02aa26ce 10982 /* if we're ok, copy the character */
378cc40b 10983 *d++ = *s++;
fc36a67e 10984 }
378cc40b 10985 }
02aa26ce
NT
10986
10987 /* final misplaced underbar check */
928753ea 10988 if (lastub && s == lastub + 1) {
a2a5de95 10989 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10990 }
02aa26ce
NT
10991
10992 /* read a decimal portion if there is one. avoid
10993 3..5 being interpreted as the number 3. followed
10994 by .5
10995 */
2f3197b3 10996 if (*s == '.' && s[1] != '.') {
79072805 10997 floatit = TRUE;
378cc40b 10998 *d++ = *s++;
02aa26ce 10999
928753ea 11000 if (*s == '_') {
a2a5de95
NC
11001 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11002 "Misplaced _ in number");
928753ea
JH
11003 lastub = s;
11004 }
11005
11006 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 11007 */
fc36a67e 11008 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 11009 /* fixed length buffer check */
fc36a67e 11010 if (d >= e)
e5cc0c0f 11011 Perl_croak(aTHX_ "%s", number_too_long);
928753ea 11012 if (*s == '_') {
a2a5de95
NC
11013 if (lastub && s == lastub + 1)
11014 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11015 "Misplaced _ in number");
928753ea
JH
11016 lastub = s;
11017 }
11018 else
fc36a67e 11019 *d++ = *s;
378cc40b 11020 }
928753ea
JH
11021 /* fractional part ending in underbar? */
11022 if (s[-1] == '_') {
a2a5de95
NC
11023 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11024 "Misplaced _ in number");
928753ea 11025 }
dd629d5b
GS
11026 if (*s == '.' && isDIGIT(s[1])) {
11027 /* oops, it's really a v-string, but without the "v" */
f4758303 11028 s = start;
dd629d5b
GS
11029 goto vstring;
11030 }
378cc40b 11031 }
02aa26ce
NT
11032
11033 /* read exponent part, if present */
3792a11b 11034 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
11035 floatit = TRUE;
11036 s++;
02aa26ce
NT
11037
11038 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 11039 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 11040
7fd134d9
JH
11041 /* stray preinitial _ */
11042 if (*s == '_') {
a2a5de95
NC
11043 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11044 "Misplaced _ in number");
7fd134d9
JH
11045 lastub = s++;
11046 }
11047
02aa26ce 11048 /* allow positive or negative exponent */
378cc40b
LW
11049 if (*s == '+' || *s == '-')
11050 *d++ = *s++;
02aa26ce 11051
7fd134d9
JH
11052 /* stray initial _ */
11053 if (*s == '_') {
a2a5de95
NC
11054 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11055 "Misplaced _ in number");
7fd134d9
JH
11056 lastub = s++;
11057 }
11058
7fd134d9
JH
11059 /* read digits of exponent */
11060 while (isDIGIT(*s) || *s == '_') {
11061 if (isDIGIT(*s)) {
11062 if (d >= e)
e5cc0c0f 11063 Perl_croak(aTHX_ "%s", number_too_long);
b3b48e3e 11064 *d++ = *s++;
7fd134d9
JH
11065 }
11066 else {
041457d9 11067 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
11068 (!isDIGIT(s[1]) && s[1] != '_')))
11069 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11070 "Misplaced _ in number");
b3b48e3e 11071 lastub = s++;
7fd134d9 11072 }
7fd134d9 11073 }
378cc40b 11074 }
02aa26ce 11075
02aa26ce 11076
0b7fceb9 11077 /*
58bb9ec3
NC
11078 We try to do an integer conversion first if no characters
11079 indicating "float" have been found.
0b7fceb9
MU
11080 */
11081
11082 if (!floatit) {
58bb9ec3 11083 UV uv;
6136c704 11084 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
11085
11086 if (flags == IS_NUMBER_IN_UV) {
11087 if (uv <= IV_MAX)
b081dd7e 11088 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 11089 else
b081dd7e 11090 sv = newSVuv(uv);
58bb9ec3
NC
11091 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11092 if (uv <= (UV) IV_MIN)
b081dd7e 11093 sv = newSViv(-(IV)uv);
58bb9ec3
NC
11094 else
11095 floatit = TRUE;
11096 } else
11097 floatit = TRUE;
11098 }
0b7fceb9 11099 if (floatit) {
58bb9ec3
NC
11100 /* terminate the string */
11101 *d = '\0';
86554af2 11102 nv = Atof(PL_tokenbuf);
b081dd7e 11103 sv = newSVnv(nv);
86554af2 11104 }
86554af2 11105
eb0d8d16
NC
11106 if ( floatit
11107 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11108 const char *const key = floatit ? "float" : "integer";
11109 const STRLEN keylen = floatit ? 5 : 7;
11110 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11111 key, keylen, sv, NULL, NULL, 0);
11112 }
378cc40b 11113 break;
0b7fceb9 11114
e312add1 11115 /* if it starts with a v, it could be a v-string */
a7cb1f99 11116 case 'v':
dd629d5b 11117vstring:
561b68a9 11118 sv = newSV(5); /* preallocate storage space */
ecabb004
FC
11119 ENTER_with_name("scan_vstring");
11120 SAVEFREESV(sv);
65b06e02 11121 s = scan_vstring(s, PL_bufend, sv);
ecabb004
FC
11122 SvREFCNT_inc_simple_void_NN(sv);
11123 LEAVE_with_name("scan_vstring");
a7cb1f99 11124 break;
79072805 11125 }
a687059c 11126
02aa26ce
NT
11127 /* make the op for the constant and return */
11128
a86a20aa 11129 if (sv)
b73d6f50 11130 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 11131 else
5f66b61c 11132 lvalp->opval = NULL;
a687059c 11133
73d840c0 11134 return (char *)s;
378cc40b
LW
11135}
11136
76e3520e 11137STATIC char *
5aaab254 11138S_scan_formline(pTHX_ char *s)
378cc40b 11139{
97aff369 11140 dVAR;
eb578fdb
KW
11141 char *eol;
11142 char *t;
6136c704 11143 SV * const stuff = newSVpvs("");
79072805 11144 bool needargs = FALSE;
c5ee2135 11145 bool eofmt = FALSE;
5db06880
NC
11146#ifdef PERL_MAD
11147 char *tokenstart = s;
4f61fd4b
JC
11148 SV* savewhite = NULL;
11149
5db06880 11150 if (PL_madskills) {
cd81e915
NC
11151 savewhite = PL_thiswhite;
11152 PL_thiswhite = 0;
5db06880
NC
11153 }
11154#endif
378cc40b 11155
7918f24d
NC
11156 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11157
79072805 11158 while (!needargs) {
a1b95068 11159 if (*s == '.') {
c35e046a 11160 t = s+1;
51882d45 11161#ifdef PERL_STRICT_CR
c35e046a
AL
11162 while (SPACE_OR_TAB(*t))
11163 t++;
51882d45 11164#else
c35e046a
AL
11165 while (SPACE_OR_TAB(*t) || *t == '\r')
11166 t++;
51882d45 11167#endif
c5ee2135
WL
11168 if (*t == '\n' || t == PL_bufend) {
11169 eofmt = TRUE;
79072805 11170 break;
c5ee2135 11171 }
79072805 11172 }
583c9d5c
FC
11173 eol = (char *) memchr(s,'\n',PL_bufend-s);
11174 if (!eol++)
3280af22 11175 eol = PL_bufend;
79072805 11176 if (*s != '#') {
a0d0e21e
LW
11177 for (t = s; t < eol; t++) {
11178 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11179 needargs = FALSE;
11180 goto enough; /* ~~ must be first line in formline */
378cc40b 11181 }
a0d0e21e
LW
11182 if (*t == '@' || *t == '^')
11183 needargs = TRUE;
378cc40b 11184 }
7121b347
MG
11185 if (eol > s) {
11186 sv_catpvn(stuff, s, eol-s);
2dc4c65b 11187#ifndef PERL_STRICT_CR
7121b347
MG
11188 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11189 char *end = SvPVX(stuff) + SvCUR(stuff);
11190 end[-2] = '\n';
11191 end[-1] = '\0';
b162af07 11192 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 11193 }
2dc4c65b 11194#endif
7121b347
MG
11195 }
11196 else
11197 break;
79072805 11198 }
95a20fc0 11199 s = (char*)eol;
583c9d5c
FC
11200 if ((PL_rsfp || PL_parser->filtered)
11201 && PL_parser->form_lex_state == LEX_NORMAL) {
f0e67a1d 11202 bool got_some;
5db06880
NC
11203#ifdef PERL_MAD
11204 if (PL_madskills) {
cd81e915
NC
11205 if (PL_thistoken)
11206 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 11207 else
cd81e915 11208 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
11209 }
11210#endif
f0e67a1d 11211 PL_bufptr = PL_bufend;
83944c01 11212 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
11213 got_some = lex_next_chunk(0);
11214 CopLINE_dec(PL_curcop);
11215 s = PL_bufptr;
5db06880 11216#ifdef PERL_MAD
f0e67a1d 11217 tokenstart = PL_bufptr;
5db06880 11218#endif
f0e67a1d 11219 if (!got_some)
378cc40b 11220 break;
378cc40b 11221 }
463ee0b2 11222 incline(s);
79072805 11223 }
a0d0e21e 11224 enough:
5c9ae74d
FC
11225 if (!SvCUR(stuff) || needargs)
11226 PL_lex_state = PL_parser->form_lex_state;
a0d0e21e 11227 if (SvCUR(stuff)) {
705fe0e5 11228 PL_expect = XSTATE;
79072805 11229 if (needargs) {
cd81e915 11230 start_force(PL_curforce);
9ded7720 11231 NEXTVAL_NEXTTOKE.ival = 0;
705fe0e5 11232 force_next(FORMLBRACK);
79072805 11233 }
1bd51a4c 11234 if (!IN_BYTES) {
95a20fc0 11235 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
11236 SvUTF8_on(stuff);
11237 else if (PL_encoding)
11238 sv_recode_to_utf8(stuff, PL_encoding);
11239 }
cd81e915 11240 start_force(PL_curforce);
9ded7720 11241 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 11242 force_next(THING);
378cc40b 11243 }
79072805 11244 else {
8990e307 11245 SvREFCNT_dec(stuff);
c5ee2135
WL
11246 if (eofmt)
11247 PL_lex_formbrack = 0;
79072805 11248 }
5db06880
NC
11249#ifdef PERL_MAD
11250 if (PL_madskills) {
cd81e915
NC
11251 if (PL_thistoken)
11252 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 11253 else
cd81e915
NC
11254 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11255 PL_thiswhite = savewhite;
5db06880
NC
11256 }
11257#endif
79072805 11258 return s;
378cc40b 11259}
a687059c 11260
ba6d6ac9 11261I32
864dbfa3 11262Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 11263{
97aff369 11264 dVAR;
a3b680e6 11265 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 11266 CV* const outsidecv = PL_compcv;
8990e307 11267
7766f137 11268 SAVEI32(PL_subline);
3280af22 11269 save_item(PL_subname);
3280af22 11270 SAVESPTR(PL_compcv);
3280af22 11271
ea726b52 11272 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
11273 CvFLAGS(PL_compcv) |= flags;
11274
57843af0 11275 PL_subline = CopLINE(PL_curcop);
dd2155a4 11276 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 11277 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 11278 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
db4cf31d 11279 if (outsidecv && CvPADLIST(outsidecv))
8771da69
FC
11280 CvPADLIST(PL_compcv)->xpadl_outid =
11281 PadlistNAMES(CvPADLIST(outsidecv));
748a9306 11282
8990e307
LW
11283 return oldsavestack_ix;
11284}
11285
084592ab
CN
11286#ifdef __SC__
11287#pragma segment Perl_yylex
11288#endif
af41e527 11289static int
19c62481 11290S_yywarn(pTHX_ const char *const s, U32 flags)
8990e307 11291{
97aff369 11292 dVAR;
7918f24d
NC
11293
11294 PERL_ARGS_ASSERT_YYWARN;
11295
faef0170 11296 PL_in_eval |= EVAL_WARNONLY;
19c62481 11297 yyerror_pv(s, flags);
faef0170 11298 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 11299 return 0;
8990e307
LW
11300}
11301
11302int
15f169a1 11303Perl_yyerror(pTHX_ const char *const s)
463ee0b2 11304{
19c62481
BF
11305 PERL_ARGS_ASSERT_YYERROR;
11306 return yyerror_pvn(s, strlen(s), 0);
11307}
11308
11309int
11310Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11311{
11312 PERL_ARGS_ASSERT_YYERROR_PV;
11313 return yyerror_pvn(s, strlen(s), flags);
11314}
11315
11316int
19c62481
BF
11317Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11318{
97aff369 11319 dVAR;
bfed75c6 11320 const char *context = NULL;
68dc0745 11321 int contlen = -1;
46fc3d4c 11322 SV *msg;
19c62481 11323 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
5912531f 11324 int yychar = PL_parser->yychar;
463ee0b2 11325
19c62481 11326 PERL_ARGS_ASSERT_YYERROR_PVN;
7918f24d 11327
3280af22 11328 if (!yychar || (yychar == ';' && !PL_rsfp))
19c62481 11329 sv_catpvs(where_sv, "at EOF");
8bcfe651
TM
11330 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11331 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11332 PL_oldbufptr != PL_bufptr) {
f355267c
JH
11333 /*
11334 Only for NetWare:
11335 The code below is removed for NetWare because it abends/crashes on NetWare
11336 when the script has error such as not having the closing quotes like:
11337 if ($var eq "value)
11338 Checking of white spaces is anyway done in NetWare code.
11339 */
11340#ifndef NETWARE
3280af22
NIS
11341 while (isSPACE(*PL_oldoldbufptr))
11342 PL_oldoldbufptr++;
f355267c 11343#endif
3280af22
NIS
11344 context = PL_oldoldbufptr;
11345 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 11346 }
8bcfe651
TM
11347 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11348 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
11349 /*
11350 Only for NetWare:
11351 The code below is removed for NetWare because it abends/crashes on NetWare
11352 when the script has error such as not having the closing quotes like:
11353 if ($var eq "value)
11354 Checking of white spaces is anyway done in NetWare code.
11355 */
11356#ifndef NETWARE
3280af22
NIS
11357 while (isSPACE(*PL_oldbufptr))
11358 PL_oldbufptr++;
f355267c 11359#endif
3280af22
NIS
11360 context = PL_oldbufptr;
11361 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
11362 }
11363 else if (yychar > 255)
19c62481 11364 sv_catpvs(where_sv, "next token ???");
12fbd33b 11365 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
11366 if (PL_lex_state == LEX_NORMAL ||
11367 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
19c62481 11368 sv_catpvs(where_sv, "at end of line");
3280af22 11369 else if (PL_lex_inpat)
19c62481 11370 sv_catpvs(where_sv, "within pattern");
463ee0b2 11371 else
19c62481 11372 sv_catpvs(where_sv, "within string");
463ee0b2 11373 }
46fc3d4c 11374 else {
19c62481 11375 sv_catpvs(where_sv, "next char ");
46fc3d4c 11376 if (yychar < 32)
cea2e8a9 11377 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 11378 else if (isPRINT_LC(yychar)) {
88c9ea1e 11379 const char string = yychar;
5e7aa789
NC
11380 sv_catpvn(where_sv, &string, 1);
11381 }
463ee0b2 11382 else
cea2e8a9 11383 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
463ee0b2 11384 }
b604e366 11385 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
ed094faf 11386 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 11387 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 11388 if (context)
19c62481
BF
11389 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11390 SVfARG(newSVpvn_flags(context, contlen,
11391 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
463ee0b2 11392 else
19c62481 11393 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
57843af0 11394 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 11395 Perl_sv_catpvf(aTHX_ msg,
57def98f 11396 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 11397 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 11398 PL_multi_end = 0;
a0d0e21e 11399 }
500960a6 11400 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 11401 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 11402 }
463ee0b2 11403 else
5a844595 11404 qerror(msg);
c7d6bfb2 11405 if (PL_error_count >= 10) {
eed484f9
DD
11406 SV * errsv;
11407 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
d2560b70 11408 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
eed484f9 11409 SVfARG(errsv), OutCopFILE(PL_curcop));
c7d6bfb2
GS
11410 else
11411 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 11412 OutCopFILE(PL_curcop));
c7d6bfb2 11413 }
3280af22 11414 PL_in_my = 0;
5c284bb0 11415 PL_in_my_stash = NULL;
463ee0b2
LW
11416 return 0;
11417}
084592ab
CN
11418#ifdef __SC__
11419#pragma segment Main
11420#endif
4e35701f 11421
b250498f 11422STATIC char*
3ae08724 11423S_swallow_bom(pTHX_ U8 *s)
01ec43d0 11424{
97aff369 11425 dVAR;
f54cb97a 11426 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
11427
11428 PERL_ARGS_ASSERT_SWALLOW_BOM;
11429
7aa207d6 11430 switch (s[0]) {
4e553d73
NIS
11431 case 0xFF:
11432 if (s[1] == 0xFE) {
ee6ba15d 11433 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 11434 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 11435 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11436 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 11437#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 11438 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 11439 s += 2;
dea0fc0b 11440 if (PL_bufend > (char*)s) {
81a923f4 11441 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 11442 }
b250498f 11443#else
dcbac5bb 11444 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11445 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 11446#endif
01ec43d0
GS
11447 }
11448 break;
78ae23f5 11449 case 0xFE:
7aa207d6 11450 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 11451#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11452 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
11453 s += 2;
11454 if (PL_bufend > (char *)s) {
81a923f4 11455 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 11456 }
b250498f 11457#else
dcbac5bb 11458 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11459 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 11460#endif
01ec43d0
GS
11461 }
11462 break;
3ae08724
GS
11463 case 0xEF:
11464 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 11465 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
11466 s += 3; /* UTF-8 */
11467 }
11468 break;
11469 case 0:
7aa207d6
JH
11470 if (slen > 3) {
11471 if (s[1] == 0) {
11472 if (s[2] == 0xFE && s[3] == 0xFF) {
11473 /* UTF-32 big-endian */
dcbac5bb 11474 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11475 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
11476 }
11477 }
11478 else if (s[2] == 0 && s[3] != 0) {
11479 /* Leading bytes
11480 * 00 xx 00 xx
11481 * are a good indicator of UTF-16BE. */
ee6ba15d 11482#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11483 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
11484 s = add_utf16_textfilter(s, FALSE);
11485#else
dcbac5bb 11486 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11487 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11488#endif
7aa207d6 11489 }
01ec43d0 11490 }
e294cc5d
JH
11491#ifdef EBCDIC
11492 case 0xDD:
11493 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11494 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11495 s += 4; /* UTF-8 */
11496 }
11497 break;
11498#endif
11499
7aa207d6
JH
11500 default:
11501 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11502 /* Leading bytes
11503 * xx 00 xx 00
11504 * are a good indicator of UTF-16LE. */
ee6ba15d 11505#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11506 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 11507 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 11508#else
dcbac5bb 11509 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11510 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11511#endif
7aa207d6 11512 }
01ec43d0 11513 }
b8f84bb2 11514 return (char*)s;
b250498f 11515}
4755096e 11516
6e3aabd6
GS
11517
11518#ifndef PERL_NO_UTF16_FILTER
11519static I32
a28af015 11520S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 11521{
97aff369 11522 dVAR;
f3040f2c 11523 SV *const filter = FILTER_DATA(idx);
2a773401
NC
11524 /* We re-use this each time round, throwing the contents away before we
11525 return. */
2a773401 11526 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 11527 SV *const utf8_buffer = filter;
c28d6105 11528 IV status = IoPAGE(filter);
f2338a2e 11529 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 11530 I32 retval;
c8b0cbae 11531
c85ae797
NC
11532 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11533
c8b0cbae
NC
11534 /* As we're automatically added, at the lowest level, and hence only called
11535 from this file, we can be sure that we're not called in block mode. Hence
11536 don't bother writing code to deal with block mode. */
11537 if (maxlen) {
11538 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11539 }
c28d6105
NC
11540 if (status < 0) {
11541 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11542 }
1de9afcd 11543 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 11544 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 11545 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
11546 reverse ? 'l' : 'b', idx, maxlen, status,
11547 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11548
11549 while (1) {
11550 STRLEN chars;
11551 STRLEN have;
dea0fc0b 11552 I32 newlen;
2a773401 11553 U8 *end;
c28d6105
NC
11554 /* First, look in our buffer of existing UTF-8 data: */
11555 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11556
11557 if (nl) {
11558 ++nl;
11559 } else if (status == 0) {
11560 /* EOF */
11561 IoPAGE(filter) = 0;
11562 nl = SvEND(utf8_buffer);
11563 }
11564 if (nl) {
d2d1d4de
NC
11565 STRLEN got = nl - SvPVX(utf8_buffer);
11566 /* Did we have anything to append? */
11567 retval = got != 0;
11568 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
11569 /* Everything else in this code works just fine if SVp_POK isn't
11570 set. This, however, needs it, and we need it to work, else
11571 we loop infinitely because the buffer is never consumed. */
11572 sv_chop(utf8_buffer, nl);
11573 break;
11574 }
ba77e4cc 11575
c28d6105
NC
11576 /* OK, not a complete line there, so need to read some more UTF-16.
11577 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
11578 while (1) {
11579 if (status <= 0)
11580 break;
11581 if (SvCUR(utf16_buffer) >= 2) {
11582 /* Location of the high octet of the last complete code point.
11583 Gosh, UTF-16 is a pain. All the benefits of variable length,
11584 *coupled* with all the benefits of partial reads and
11585 endianness. */
11586 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11587 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11588
11589 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11590 break;
11591 }
11592
11593 /* We have the first half of a surrogate. Read more. */
11594 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11595 }
c28d6105 11596
c28d6105
NC
11597 status = FILTER_READ(idx + 1, utf16_buffer,
11598 160 + (SvCUR(utf16_buffer) & 1));
11599 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11600 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11601 if (status < 0) {
11602 /* Error */
11603 IoPAGE(filter) = status;
11604 return status;
11605 }
11606 }
11607
11608 chars = SvCUR(utf16_buffer) >> 1;
11609 have = SvCUR(utf8_buffer);
11610 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11611
aa6dbd60 11612 if (reverse) {
c28d6105
NC
11613 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11614 (U8*)SvPVX_const(utf8_buffer) + have,
11615 chars * 2, &newlen);
aa6dbd60 11616 } else {
2a773401 11617 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11618 (U8*)SvPVX_const(utf8_buffer) + have,
11619 chars * 2, &newlen);
2a773401 11620 }
c28d6105 11621 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11622 *end = '\0';
c28d6105 11623
e07286ed
NC
11624 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11625 it's private to us, and utf16_to_utf8{,reversed} take a
11626 (pointer,length) pair, rather than a NUL-terminated string. */
11627 if(SvCUR(utf16_buffer) & 1) {
11628 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11629 SvCUR_set(utf16_buffer, 1);
11630 } else {
11631 SvCUR_set(utf16_buffer, 0);
11632 }
2a773401 11633 }
c28d6105
NC
11634 DEBUG_P(PerlIO_printf(Perl_debug_log,
11635 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11636 status,
11637 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11638 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11639 return retval;
6e3aabd6 11640}
81a923f4
NC
11641
11642static U8 *
11643S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11644{
2a773401 11645 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11646
c85ae797
NC
11647 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11648
c28d6105 11649 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11650 sv_setpvs(filter, "");
2a773401 11651 IoLINES(filter) = reversed;
c28d6105
NC
11652 IoPAGE(filter) = 1; /* Not EOF */
11653
11654 /* Sadly, we have to return a valid pointer, come what may, so we have to
11655 ignore any error return from this. */
11656 SvCUR_set(PL_linestr, 0);
11657 if (FILTER_READ(0, PL_linestr, 0)) {
11658 SvUTF8_on(PL_linestr);
81a923f4 11659 } else {
c28d6105 11660 SvUTF8_on(PL_linestr);
81a923f4 11661 }
c28d6105 11662 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11663 return (U8*)SvPVX(PL_linestr);
11664}
6e3aabd6 11665#endif
9f4817db 11666
f333445c
JP
11667/*
11668Returns a pointer to the next character after the parsed
11669vstring, as well as updating the passed in sv.
11670
11671Function must be called like
11672
615e0a48 11673 sv = sv_2mortal(newSV(5));
65b06e02 11674 s = scan_vstring(s,e,sv);
f333445c 11675
65b06e02 11676where s and e are the start and end of the string.
f333445c
JP
11677The sv should already be large enough to store the vstring
11678passed in, for performance reasons.
11679
615e0a48
FC
11680This function may croak if fatal warnings are enabled in the
11681calling scope, hence the sv_2mortal in the example (to prevent
11682a leak). Make sure to do SvREFCNT_inc afterwards if you use
11683sv_2mortal.
11684
f333445c
JP
11685*/
11686
11687char *
15f169a1 11688Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11689{
97aff369 11690 dVAR;
bfed75c6
AL
11691 const char *pos = s;
11692 const char *start = s;
7918f24d
NC
11693
11694 PERL_ARGS_ASSERT_SCAN_VSTRING;
11695
f333445c 11696 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11697 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11698 pos++;
f333445c
JP
11699 if ( *pos != '.') {
11700 /* this may not be a v-string if followed by => */
bfed75c6 11701 const char *next = pos;
65b06e02 11702 while (next < e && isSPACE(*next))
8fc7bb1c 11703 ++next;
65b06e02 11704 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11705 /* return string not v-string */
11706 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11707 return (char *)pos;
f333445c
JP
11708 }
11709 }
11710
11711 if (!isALPHA(*pos)) {
89ebb4a3 11712 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11713
d4c19fe8
AL
11714 if (*s == 'v')
11715 s++; /* get past 'v' */
f333445c 11716
76f68e9b 11717 sv_setpvs(sv, "");
f333445c
JP
11718
11719 for (;;) {
d4c19fe8 11720 /* this is atoi() that tolerates underscores */
0bd48802
AL
11721 U8 *tmpend;
11722 UV rev = 0;
d4c19fe8
AL
11723 const char *end = pos;
11724 UV mult = 1;
11725 while (--end >= s) {
11726 if (*end != '_') {
11727 const UV orev = rev;
f333445c
JP
11728 rev += (*end - '0') * mult;
11729 mult *= 10;
9b387841 11730 if (orev > rev)
dcbac5bb 11731 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11732 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11733 "Integer overflow in decimal number");
f333445c
JP
11734 }
11735 }
11736#ifdef EBCDIC
11737 if (rev > 0x7FFFFFFF)
11738 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11739#endif
11740 /* Append native character for the rev point */
11741 tmpend = uvchr_to_utf8(tmpbuf, rev);
11742 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11743 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11744 SvUTF8_on(sv);
65b06e02 11745 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11746 s = ++pos;
11747 else {
11748 s = pos;
11749 break;
11750 }
65b06e02 11751 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11752 pos++;
11753 }
11754 SvPOK_on(sv);
11755 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11756 SvRMAGICAL_on(sv);
11757 }
73d840c0 11758 return (char *)s;
f333445c
JP
11759}
11760
88e1f1a2
JV
11761int
11762Perl_keyword_plugin_standard(pTHX_
11763 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11764{
11765 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11766 PERL_UNUSED_CONTEXT;
11767 PERL_UNUSED_ARG(keyword_ptr);
11768 PERL_UNUSED_ARG(keyword_len);
11769 PERL_UNUSED_ARG(op_ptr);
11770 return KEYWORD_PLUGIN_DECLINE;
11771}
11772
78cdf107 11773#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11774static void
78cdf107 11775S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11776{
11777 SAVEI32(PL_lex_brackets);
11778 if (PL_lex_brackets > 100)
11779 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11780 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11781 SAVEI32(PL_lex_allbrackets);
11782 PL_lex_allbrackets = 0;
11783 SAVEI8(PL_lex_fakeeof);
2dcac756 11784 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11785 if(yyparse(gramtype) && !PL_parser->error_count)
11786 qerror(Perl_mess(aTHX_ "Parse error"));
11787}
11788
78cdf107 11789#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11790static OP *
78cdf107 11791S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11792{
11793 OP *o;
11794 ENTER;
11795 SAVEVPTR(PL_eval_root);
11796 PL_eval_root = NULL;
78cdf107 11797 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11798 o = PL_eval_root;
11799 LEAVE;
11800 return o;
11801}
11802
78cdf107
Z
11803#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11804static OP *
11805S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11806{
11807 OP *exprop;
11808 if (flags & ~PARSE_OPTIONAL)
11809 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11810 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11811 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11812 if (!PL_parser->error_count)
11813 qerror(Perl_mess(aTHX_ "Parse error"));
11814 exprop = newOP(OP_NULL, 0);
11815 }
11816 return exprop;
11817}
11818
11819/*
11820=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11821
11822Parse a Perl arithmetic expression. This may contain operators of precedence
11823down to the bit shift operators. The expression must be followed (and thus
11824terminated) either by a comparison or lower-precedence operator or by
11825something that would normally terminate an expression such as semicolon.
11826If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11827otherwise it is mandatory. It is up to the caller to ensure that the
11828dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11829the source of the code to be parsed and the lexical context for the
11830expression.
11831
11832The op tree representing the expression is returned. If an optional
11833expression is absent, a null pointer is returned, otherwise the pointer
11834will be non-null.
11835
11836If an error occurs in parsing or compilation, in most cases a valid op
11837tree is returned anyway. The error is reflected in the parser state,
11838normally resulting in a single exception at the top level of parsing
11839which covers all the compilation errors that occurred. Some compilation
11840errors, however, will throw an exception immediately.
11841
11842=cut
11843*/
11844
11845OP *
11846Perl_parse_arithexpr(pTHX_ U32 flags)
11847{
11848 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11849}
11850
11851/*
11852=for apidoc Amx|OP *|parse_termexpr|U32 flags
11853
11854Parse a Perl term expression. This may contain operators of precedence
11855down to the assignment operators. The expression must be followed (and thus
11856terminated) either by a comma or lower-precedence operator or by
11857something that would normally terminate an expression such as semicolon.
11858If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11859otherwise it is mandatory. It is up to the caller to ensure that the
11860dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11861the source of the code to be parsed and the lexical context for the
11862expression.
11863
11864The op tree representing the expression is returned. If an optional
11865expression is absent, a null pointer is returned, otherwise the pointer
11866will be non-null.
11867
11868If an error occurs in parsing or compilation, in most cases a valid op
11869tree is returned anyway. The error is reflected in the parser state,
11870normally resulting in a single exception at the top level of parsing
11871which covers all the compilation errors that occurred. Some compilation
11872errors, however, will throw an exception immediately.
11873
11874=cut
11875*/
11876
11877OP *
11878Perl_parse_termexpr(pTHX_ U32 flags)
11879{
11880 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11881}
11882
11883/*
11884=for apidoc Amx|OP *|parse_listexpr|U32 flags
11885
11886Parse a Perl list expression. This may contain operators of precedence
11887down to the comma operator. The expression must be followed (and thus
11888terminated) either by a low-precedence logic operator such as C<or> or by
11889something that would normally terminate an expression such as semicolon.
11890If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11891otherwise it is mandatory. It is up to the caller to ensure that the
11892dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11893the source of the code to be parsed and the lexical context for the
11894expression.
11895
11896The op tree representing the expression is returned. If an optional
11897expression is absent, a null pointer is returned, otherwise the pointer
11898will be non-null.
11899
11900If an error occurs in parsing or compilation, in most cases a valid op
11901tree is returned anyway. The error is reflected in the parser state,
11902normally resulting in a single exception at the top level of parsing
11903which covers all the compilation errors that occurred. Some compilation
11904errors, however, will throw an exception immediately.
11905
11906=cut
11907*/
11908
11909OP *
11910Perl_parse_listexpr(pTHX_ U32 flags)
11911{
11912 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11913}
11914
11915/*
11916=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11917
11918Parse a single complete Perl expression. This allows the full
11919expression grammar, including the lowest-precedence operators such
11920as C<or>. The expression must be followed (and thus terminated) by a
11921token that an expression would normally be terminated by: end-of-file,
11922closing bracketing punctuation, semicolon, or one of the keywords that
11923signals a postfix expression-statement modifier. If I<flags> includes
11924C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11925mandatory. It is up to the caller to ensure that the dynamic parser
11926state (L</PL_parser> et al) is correctly set to reflect the source of
11927the code to be parsed and the lexical context for the expression.
11928
11929The op tree representing the expression is returned. If an optional
11930expression is absent, a null pointer is returned, otherwise the pointer
11931will be non-null.
11932
11933If an error occurs in parsing or compilation, in most cases a valid op
11934tree is returned anyway. The error is reflected in the parser state,
11935normally resulting in a single exception at the top level of parsing
11936which covers all the compilation errors that occurred. Some compilation
11937errors, however, will throw an exception immediately.
11938
11939=cut
11940*/
11941
11942OP *
11943Perl_parse_fullexpr(pTHX_ U32 flags)
11944{
11945 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11946}
11947
e53d8f76
Z
11948/*
11949=for apidoc Amx|OP *|parse_block|U32 flags
11950
11951Parse a single complete Perl code block. This consists of an opening
11952brace, a sequence of statements, and a closing brace. The block
11953constitutes a lexical scope, so C<my> variables and various compile-time
11954effects can be contained within it. It is up to the caller to ensure
11955that the dynamic parser state (L</PL_parser> et al) is correctly set to
11956reflect the source of the code to be parsed and the lexical context for
11957the statement.
11958
11959The op tree representing the code block is returned. This is always a
11960real op, never a null pointer. It will normally be a C<lineseq> list,
11961including C<nextstate> or equivalent ops. No ops to construct any kind
11962of runtime scope are included by virtue of it being a block.
11963
11964If an error occurs in parsing or compilation, in most cases a valid op
11965tree (most likely null) is returned anyway. The error is reflected in
11966the parser state, normally resulting in a single exception at the top
11967level of parsing which covers all the compilation errors that occurred.
11968Some compilation errors, however, will throw an exception immediately.
11969
11970The I<flags> parameter is reserved for future use, and must always
11971be zero.
11972
11973=cut
11974*/
11975
11976OP *
11977Perl_parse_block(pTHX_ U32 flags)
11978{
11979 if (flags)
11980 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11981 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11982}
11983
1da4ca5f 11984/*
8359b381
Z
11985=for apidoc Amx|OP *|parse_barestmt|U32 flags
11986
11987Parse a single unadorned Perl statement. This may be a normal imperative
11988statement or a declaration that has compile-time effect. It does not
11989include any label or other affixture. It is up to the caller to ensure
11990that the dynamic parser state (L</PL_parser> et al) is correctly set to
11991reflect the source of the code to be parsed and the lexical context for
11992the statement.
11993
11994The op tree representing the statement is returned. This may be a
11995null pointer if the statement is null, for example if it was actually
11996a subroutine definition (which has compile-time side effects). If not
11997null, it will be ops directly implementing the statement, suitable to
11998pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11999equivalent op (except for those embedded in a scope contained entirely
12000within the statement).
12001
12002If an error occurs in parsing or compilation, in most cases a valid op
12003tree (most likely null) is returned anyway. The error is reflected in
12004the parser state, normally resulting in a single exception at the top
12005level of parsing which covers all the compilation errors that occurred.
12006Some compilation errors, however, will throw an exception immediately.
12007
12008The I<flags> parameter is reserved for future use, and must always
12009be zero.
12010
12011=cut
12012*/
12013
12014OP *
12015Perl_parse_barestmt(pTHX_ U32 flags)
12016{
12017 if (flags)
12018 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 12019 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
12020}
12021
12022/*
361d9b55
Z
12023=for apidoc Amx|SV *|parse_label|U32 flags
12024
12025Parse a single label, possibly optional, of the type that may prefix a
12026Perl statement. It is up to the caller to ensure that the dynamic parser
12027state (L</PL_parser> et al) is correctly set to reflect the source of
12028the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12029label is optional, otherwise it is mandatory.
12030
12031The name of the label is returned in the form of a fresh scalar. If an
12032optional label is absent, a null pointer is returned.
12033
12034If an error occurs in parsing, which can only occur if the label is
12035mandatory, a valid label is returned anyway. The error is reflected in
12036the parser state, normally resulting in a single exception at the top
12037level of parsing which covers all the compilation errors that occurred.
12038
12039=cut
12040*/
12041
12042SV *
12043Perl_parse_label(pTHX_ U32 flags)
12044{
12045 if (flags & ~PARSE_OPTIONAL)
12046 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12047 if (PL_lex_state == LEX_KNOWNEXT) {
12048 PL_parser->yychar = yylex();
12049 if (PL_parser->yychar == LABEL) {
5504e6cf
FC
12050 char * const lpv = pl_yylval.pval;
12051 STRLEN llen = strlen(lpv);
361d9b55 12052 PL_parser->yychar = YYEMPTY;
5504e6cf 12053 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
361d9b55
Z
12054 } else {
12055 yyunlex();
12056 goto no_label;
12057 }
12058 } else {
12059 char *s, *t;
361d9b55
Z
12060 STRLEN wlen, bufptr_pos;
12061 lex_read_space(0);
12062 t = s = PL_bufptr;
5db1eb8d 12063 if (!isIDFIRST_lazy_if(s, UTF))
361d9b55 12064 goto no_label;
5db1eb8d 12065 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
361d9b55
Z
12066 if (word_takes_any_delimeter(s, wlen))
12067 goto no_label;
12068 bufptr_pos = s - SvPVX(PL_linestr);
12069 PL_bufptr = t;
12070 lex_read_space(LEX_KEEP_PREVIOUS);
12071 t = PL_bufptr;
12072 s = SvPVX(PL_linestr) + bufptr_pos;
12073 if (t[0] == ':' && t[1] != ':') {
12074 PL_oldoldbufptr = PL_oldbufptr;
12075 PL_oldbufptr = s;
12076 PL_bufptr = t+1;
5db1eb8d 12077 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
361d9b55
Z
12078 } else {
12079 PL_bufptr = s;
12080 no_label:
12081 if (flags & PARSE_OPTIONAL) {
12082 return NULL;
12083 } else {
12084 qerror(Perl_mess(aTHX_ "Parse error"));
12085 return newSVpvs("x");
12086 }
12087 }
12088 }
12089}
12090
12091/*
28ac2b49
Z
12092=for apidoc Amx|OP *|parse_fullstmt|U32 flags
12093
12094Parse a single complete Perl statement. This may be a normal imperative
8359b381 12095statement or a declaration that has compile-time effect, and may include
8e720305 12096optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
12097parser state (L</PL_parser> et al) is correctly set to reflect the source
12098of the code to be parsed and the lexical context for the statement.
12099
12100The op tree representing the statement is returned. This may be a
12101null pointer if the statement is null, for example if it was actually
12102a subroutine definition (which has compile-time side effects). If not
12103null, it will be the result of a L</newSTATEOP> call, normally including
12104a C<nextstate> or equivalent op.
12105
12106If an error occurs in parsing or compilation, in most cases a valid op
12107tree (most likely null) is returned anyway. The error is reflected in
12108the parser state, normally resulting in a single exception at the top
12109level of parsing which covers all the compilation errors that occurred.
12110Some compilation errors, however, will throw an exception immediately.
12111
12112The I<flags> parameter is reserved for future use, and must always
12113be zero.
12114
12115=cut
12116*/
12117
12118OP *
12119Perl_parse_fullstmt(pTHX_ U32 flags)
12120{
28ac2b49
Z
12121 if (flags)
12122 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 12123 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
12124}
12125
07ffcb73
Z
12126/*
12127=for apidoc Amx|OP *|parse_stmtseq|U32 flags
12128
12129Parse a sequence of zero or more Perl statements. These may be normal
12130imperative statements, including optional labels, or declarations
12131that have compile-time effect, or any mixture thereof. The statement
12132sequence ends when a closing brace or end-of-file is encountered in a
12133place where a new statement could have validly started. It is up to
12134the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12135is correctly set to reflect the source of the code to be parsed and the
12136lexical context for the statements.
12137
12138The op tree representing the statement sequence is returned. This may
12139be a null pointer if the statements were all null, for example if there
12140were no statements or if there were only subroutine definitions (which
12141have compile-time side effects). If not null, it will be a C<lineseq>
12142list, normally including C<nextstate> or equivalent ops.
12143
12144If an error occurs in parsing or compilation, in most cases a valid op
12145tree is returned anyway. The error is reflected in the parser state,
12146normally resulting in a single exception at the top level of parsing
12147which covers all the compilation errors that occurred. Some compilation
12148errors, however, will throw an exception immediately.
12149
12150The I<flags> parameter is reserved for future use, and must always
12151be zero.
12152
12153=cut
12154*/
12155
12156OP *
12157Perl_parse_stmtseq(pTHX_ U32 flags)
12158{
12159 OP *stmtseqop;
e53d8f76 12160 I32 c;
07ffcb73 12161 if (flags)
78cdf107
Z
12162 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12163 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
12164 c = lex_peek_unichar(0);
12165 if (c != -1 && c != /*{*/'}')
07ffcb73 12166 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
12167 return stmtseqop;
12168}
12169
28ac2b49 12170/*
1da4ca5f
NC
12171 * Local variables:
12172 * c-indentation-style: bsd
12173 * c-basic-offset: 4
14d04a33 12174 * indent-tabs-mode: nil
1da4ca5f
NC
12175 * End:
12176 *
14d04a33 12177 * ex: set ts=8 sts=4 sw=4 et:
37442d52 12178 */