This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add dist/constant/lib to the paths in the generated lib/buildcustomize.pl
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
199e78b7
DM
69#define PL_preambled (PL_parser->preambled)
70#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 71#define PL_linestr (PL_parser->linestr)
c2598295
DM
72#define PL_expect (PL_parser->expect)
73#define PL_copline (PL_parser->copline)
f06b5848
DM
74#define PL_bufptr (PL_parser->bufptr)
75#define PL_oldbufptr (PL_parser->oldbufptr)
76#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77#define PL_linestart (PL_parser->linestart)
78#define PL_bufend (PL_parser->bufend)
79#define PL_last_uni (PL_parser->last_uni)
80#define PL_last_lop (PL_parser->last_lop)
81#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 82#define PL_lex_state (PL_parser->lex_state)
2f9285f8 83#define PL_rsfp (PL_parser->rsfp)
5486870f 84#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
85#define PL_in_my (PL_parser->in_my)
86#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 87#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 88#define PL_multi_end (PL_parser->multi_end)
13765c85 89#define PL_error_count (PL_parser->error_count)
199e78b7
DM
90
91#ifdef PERL_MAD
92# define PL_endwhite (PL_parser->endwhite)
93# define PL_faketokens (PL_parser->faketokens)
94# define PL_lasttoke (PL_parser->lasttoke)
95# define PL_nextwhite (PL_parser->nextwhite)
96# define PL_realtokenstart (PL_parser->realtokenstart)
97# define PL_skipwhite (PL_parser->skipwhite)
98# define PL_thisclose (PL_parser->thisclose)
99# define PL_thismad (PL_parser->thismad)
100# define PL_thisopen (PL_parser->thisopen)
101# define PL_thisstuff (PL_parser->thisstuff)
102# define PL_thistoken (PL_parser->thistoken)
103# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_nexttoke (PL_parser->nexttoke)
106# define PL_curforce (PL_parser->curforce)
107#else
108# define PL_nexttoke (PL_parser->nexttoke)
109# define PL_nexttype (PL_parser->nexttype)
110# define PL_nextval (PL_parser->nextval)
199e78b7
DM
111#endif
112
a1894d81 113static const char* const ident_too_long = "Identifier too long";
8903cb82 114
29595ff2 115#ifdef PERL_MAD
29595ff2 116# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 117# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 118#else
5db06880 119# define CURMAD(slot,sv)
9ded7720 120# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
121#endif
122
a7aaec61
Z
123#define XENUMMASK 0x3f
124#define XFAKEEOF 0x40
125#define XFAKEBRACK 0x80
9059aa12 126
39e02b42
JH
127#ifdef USE_UTF8_SCRIPTS
128# define UTF (!IN_BYTES)
2b9d42f0 129#else
802a15e9 130# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 131#endif
a0ed51b3 132
b1fc3636
CJ
133/* The maximum number of characters preceding the unrecognized one to display */
134#define UNRECOGNIZED_PRECEDE_COUNT 10
135
61f0cdd9 136/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
137 * 1999-02-27 mjd-perl-patch@plover.com */
138#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139
14bd96d0 140#define SPACE_OR_TAB(c) isBLANK_A(c)
bf4acbe4 141
ffb4593c
NT
142/* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
79072805 144 * can get by with a single comparison (if the compiler is smart enough).
9da1dd8f
DM
145 *
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
79072805
LW
148 */
149
fb73857a 150/* #define LEX_NOTPARSING 11 is done in perl.h. */
151
b6007c36
DM
152#define LEX_NORMAL 10 /* normal code (ie not within "...") */
153#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156#define LEX_INTERPSTART 6 /* expecting the start of a $var */
157
158 /* at end of code, eg "$x" followed by: */
159#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161
162#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164#define LEX_INTERPCONST 2 /* NOT USED */
165#define LEX_FORMLINE 1 /* expecting a format line */
166#define LEX_KNOWNEXT 0 /* next token known; just return it */
167
79072805 168
bbf60fe6 169#ifdef DEBUGGING
27da23d5 170static const char* const lex_state_names[] = {
bbf60fe6
DM
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
79072805
LW
185#ifdef ff_next
186#undef ff_next
d48672a2
LW
187#endif
188
79072805 189#include "keywords.h"
fe14fcc3 190
ffb4593c
NT
191/* CLINE is a macro that ensures PL_copline has a sane value */
192
ae986130
LW
193#ifdef CLINE
194#undef CLINE
195#endif
57843af0 196#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 197
5db06880 198#ifdef PERL_MAD
29595ff2
NC
199# define SKIPSPACE0(s) skipspace0(s)
200# define SKIPSPACE1(s) skipspace1(s)
201# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202# define PEEKSPACE(s) skipspace2(s,0)
203#else
204# define SKIPSPACE0(s) skipspace(s)
205# define SKIPSPACE1(s) skipspace(s)
206# define SKIPSPACE2(s,tsv) skipspace(s)
207# define PEEKSPACE(s) skipspace(s)
208#endif
209
ffb4593c
NT
210/*
211 * Convenience functions to return different tokens and prime the
9cbb5ea2 212 * lexer for the next token. They all take an argument.
ffb4593c
NT
213 *
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
7eb971ee 224 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 255#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
d68ce4ac 272#define UNI3(f,x,have_x) { \
6154021b 273 pl_yylval.ival = f; \
d68ce4ac 274 if (have_x) PL_expect = x; \
376fcdbf
AL
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
d68ce4ac
FC
283#define UNI(f) UNI3(f,XTERM,1)
284#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
b5fb7ce3
FC
285#define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
287 OPERATOR(f); \
288 }
a687059c 289
d68ce4ac 290#define UNIBRACK(f) UNI3(f,0,0)
79072805 291
9f68db38 292/* grandfather return to old style */
78cdf107
Z
293#define OLDLOP(f) \
294 do { \
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
298 PL_expect = XTERM; \
299 PL_bufptr = s; \
300 return (int)LSTOP; \
301 } while(0)
79072805 302
83944c01
FC
303#define COPLINE_INC_WITH_HERELINES \
304 STMT_START { \
305 CopLINE_inc(PL_curcop); \
d794b522
FC
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
83944c01
FC
309 } STMT_END
310
311
8fa7f367
JH
312#ifdef DEBUGGING
313
6154021b 314/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
315enum token_type {
316 TOKENTYPE_NONE,
317 TOKENTYPE_IVAL,
6154021b 318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6 319 TOKENTYPE_PVAL,
aeaef349 320 TOKENTYPE_OPVAL
bbf60fe6
DM
321};
322
6d4a66ac
NC
323static struct debug_tokens {
324 const int token;
325 enum token_type type;
326 const char *name;
327} const debug_tokens[] =
9041c2e3 328{
bbf60fe6
DM
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
705fe0e5
FC
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
bbf60fe6
DM
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
5504e6cf 361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
bbf60fe6
DM
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
bbf60fe6
DM
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
f3f204dc 375 { PEG, TOKENTYPE_NONE, "PEG" },
88e1f1a2
JV
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
f3f204dc 385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
bbf60fe6
DM
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
f3f204dc 388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
bbf60fe6
DM
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 402 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
403};
404
6154021b 405/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 406
bbf60fe6 407STATIC int
704d4215 408S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 409{
97aff369 410 dVAR;
7918f24d
NC
411
412 PERL_ARGS_ASSERT_TOKEREPORT;
413
bbf60fe6 414 if (DEBUG_T_TEST) {
bd61b366 415 const char *name = NULL;
bbf60fe6 416 enum token_type type = TOKENTYPE_NONE;
f54cb97a 417 const struct debug_tokens *p;
396482e1 418 SV* const report = newSVpvs("<== ");
bbf60fe6 419
f54cb97a 420 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
421 if (p->token == (int)rv) {
422 name = p->name;
423 type = p->type;
424 break;
425 }
426 }
427 if (name)
54667de8 428 Perl_sv_catpv(aTHX_ report, name);
74736ae6 429 else if ((char)rv > ' ' && (char)rv <= '~')
4ebc7986 430 {
bbf60fe6 431 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
4ebc7986
FC
432 if ((char)rv == 'p')
433 sv_catpvs(report, " (pending identifier)");
434 }
bbf60fe6 435 else if (!rv)
396482e1 436 sv_catpvs(report, "EOF");
bbf60fe6
DM
437 else
438 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
439 switch (type) {
440 case TOKENTYPE_NONE:
bbf60fe6
DM
441 break;
442 case TOKENTYPE_IVAL:
704d4215 443 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
444 break;
445 case TOKENTYPE_OPNUM:
446 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 447 PL_op_name[lvalp->ival]);
bbf60fe6
DM
448 break;
449 case TOKENTYPE_PVAL:
704d4215 450 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
451 break;
452 case TOKENTYPE_OPVAL:
704d4215 453 if (lvalp->opval) {
401441c0 454 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
455 PL_op_name[lvalp->opval->op_type]);
456 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 457 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 458 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
459 }
460
461 }
401441c0 462 else
396482e1 463 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
464 break;
465 }
b6007c36 466 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
467 };
468 return (int)rv;
998054bd
SC
469}
470
b6007c36
DM
471
472/* print the buffer with suitable escapes */
473
474STATIC void
15f169a1 475S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 476{
396482e1 477 SV* const tmp = newSVpvs("");
7918f24d
NC
478
479 PERL_ARGS_ASSERT_PRINTBUF;
480
b6007c36
DM
481 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
482 SvREFCNT_dec(tmp);
483}
484
8fa7f367
JH
485#endif
486
8290c323
NC
487static int
488S_deprecate_commaless_var_list(pTHX) {
489 PL_expect = XTERM;
490 deprecate("comma-less variable list");
491 return REPORT(','); /* grandfather non-comma-format format */
492}
493
ffb4593c
NT
494/*
495 * S_ao
496 *
c963b151
BD
497 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
498 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
499 */
500
76e3520e 501STATIC int
cea2e8a9 502S_ao(pTHX_ int toketype)
a0d0e21e 503{
97aff369 504 dVAR;
3280af22
NIS
505 if (*PL_bufptr == '=') {
506 PL_bufptr++;
a0d0e21e 507 if (toketype == ANDAND)
6154021b 508 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 509 else if (toketype == OROR)
6154021b 510 pl_yylval.ival = OP_ORASSIGN;
c963b151 511 else if (toketype == DORDOR)
6154021b 512 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
513 toketype = ASSIGNOP;
514 }
515 return toketype;
516}
517
ffb4593c
NT
518/*
519 * S_no_op
520 * When Perl expects an operator and finds something else, no_op
521 * prints the warning. It always prints "<something> found where
522 * operator expected. It prints "Missing semicolon on previous line?"
523 * if the surprise occurs at the start of the line. "do you need to
524 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
525 * where the compiler doesn't know if foo is a method call or a function.
526 * It prints "Missing operator before end of line" if there's nothing
527 * after the missing operator, or "... before <...>" if there is something
528 * after the missing operator.
529 */
530
76e3520e 531STATIC void
15f169a1 532S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 533{
97aff369 534 dVAR;
9d4ba2ae
AL
535 char * const oldbp = PL_bufptr;
536 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 537
7918f24d
NC
538 PERL_ARGS_ASSERT_NO_OP;
539
1189a94a
GS
540 if (!s)
541 s = oldbp;
07c798fb 542 else
1189a94a 543 PL_bufptr = s;
734ab321 544 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
56da5a46
RGS
545 if (ckWARN_d(WARN_SYNTAX)) {
546 if (is_first)
547 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
548 "\t(Missing semicolon on previous line?)\n");
549 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 550 const char *t;
8a2bca12 551 for (t = PL_oldoldbufptr; (isWORDCHAR_lazy_if(t,UTF) || *t == ':');
734ab321 552 t += UTF ? UTF8SKIP(t) : 1)
c35e046a 553 NOOP;
56da5a46
RGS
554 if (t < PL_bufptr && isSPACE(*t))
555 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b17a0679
FC
556 "\t(Do you need to predeclare %"UTF8f"?)\n",
557 UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
56da5a46
RGS
558 }
559 else {
560 assert(s >= oldbp);
561 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b17a0679
FC
562 "\t(Missing operator before %"UTF8f"?)\n",
563 UTF8fARG(UTF, s - oldbp, oldbp));
56da5a46 564 }
07c798fb 565 }
3280af22 566 PL_bufptr = oldbp;
8990e307
LW
567}
568
ffb4593c
NT
569/*
570 * S_missingterm
571 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 572 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
573 * If we're in a delimited string and the delimiter is a control
574 * character, it's reformatted into a two-char sequence like ^C.
575 * This is fatal.
576 */
577
76e3520e 578STATIC void
cea2e8a9 579S_missingterm(pTHX_ char *s)
8990e307 580{
97aff369 581 dVAR;
8990e307
LW
582 char tmpbuf[3];
583 char q;
584 if (s) {
9d4ba2ae 585 char * const nl = strrchr(s,'\n');
d2719217 586 if (nl)
8990e307
LW
587 *nl = '\0';
588 }
463559e7 589 else if (isCNTRL(PL_multi_close)) {
8990e307 590 *tmpbuf = '^';
585ec06d 591 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
592 tmpbuf[2] = '\0';
593 s = tmpbuf;
594 }
595 else {
eb160463 596 *tmpbuf = (char)PL_multi_close;
8990e307
LW
597 tmpbuf[1] = '\0';
598 s = tmpbuf;
599 }
600 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 601 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 602}
79072805 603
dd0ac2b9
FC
604#include "feature.h"
605
0d863452 606/*
0d863452
RH
607 * Check whether the named feature is enabled.
608 */
26ea9e12 609bool
3fff3427 610Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 611{
97aff369 612 dVAR;
4a731d7b 613 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
614
615 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
616
617 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 618
26ea9e12
NC
619 if (namelen > MAX_FEATURE_LEN)
620 return FALSE;
3fff3427 621 memcpy(&he_name[8], name, namelen);
7d69d4a6 622
c8ca97b0
NC
623 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
624 REFCOUNTED_HE_EXISTS));
0d863452
RH
625}
626
ffb4593c 627/*
9cbb5ea2
GS
628 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
629 * utf16-to-utf8-reversed.
ffb4593c
NT
630 */
631
c39cd008
GS
632#ifdef PERL_CR_FILTER
633static void
634strip_return(SV *sv)
635{
eb578fdb
KW
636 const char *s = SvPVX_const(sv);
637 const char * const e = s + SvCUR(sv);
7918f24d
NC
638
639 PERL_ARGS_ASSERT_STRIP_RETURN;
640
c39cd008
GS
641 /* outer loop optimized to do nothing if there are no CR-LFs */
642 while (s < e) {
643 if (*s++ == '\r' && *s == '\n') {
644 /* hit a CR-LF, need to copy the rest */
eb578fdb 645 char *d = s - 1;
c39cd008
GS
646 *d++ = *s++;
647 while (s < e) {
648 if (*s == '\r' && s[1] == '\n')
649 s++;
650 *d++ = *s++;
651 }
652 SvCUR(sv) -= s - d;
653 return;
654 }
655 }
656}
a868473f 657
76e3520e 658STATIC I32
c39cd008 659S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 660{
f54cb97a 661 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
662 if (count > 0 && !maxlen)
663 strip_return(sv);
664 return count;
a868473f
NIS
665}
666#endif
667
ffb4593c 668/*
8eaa0acf
Z
669=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
670
671Creates and initialises a new lexer/parser state object, supplying
672a context in which to lex and parse from a new source of Perl code.
673A pointer to the new state object is placed in L</PL_parser>. An entry
674is made on the save stack so that upon unwinding the new state object
675will be destroyed and the former value of L</PL_parser> will be restored.
676Nothing else need be done to clean up the parsing context.
677
678The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
679non-null, provides a string (in SV form) containing code to be parsed.
680A copy of the string is made, so subsequent modification of I<line>
681does not affect parsing. I<rsfp>, if non-null, provides an input stream
682from which code will be read to be parsed. If both are non-null, the
683code in I<line> comes first and must consist of complete lines of input,
684and I<rsfp> supplies the remainder of the source.
685
e368b3bd
FC
686The I<flags> parameter is reserved for future use. Currently it is only
687used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
688
689=cut
690*/
ffb4593c 691
27fcb6ee 692/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
87606032
NC
693 can share filters with the current parser.
694 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
695 caller, hence isn't owned by the parser, so shouldn't be closed on parser
696 destruction. This is used to handle the case of defaulting to reading the
697 script from the standard input because no filename was given on the command
698 line (without getting confused by situation where STDIN has been closed, so
699 the script handle is opened on fd 0) */
27fcb6ee 700
a0d0e21e 701void
8eaa0acf 702Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 703{
97aff369 704 dVAR;
6ef55633 705 const char *s = NULL;
5486870f 706 yy_parser *parser, *oparser;
60d63348 707 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 708 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
709
710 /* create and initialise a parser */
711
199e78b7 712 Newxz(parser, 1, yy_parser);
5486870f 713 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
714 PL_parser = parser;
715
28ac2b49
Z
716 parser->stack = NULL;
717 parser->ps = NULL;
718 parser->stack_size = 0;
acdf0a21 719
e3abe207
DM
720 /* on scope exit, free this parser and restore any outer one */
721 SAVEPARSER(parser);
7c4baf47 722 parser->saved_curcop = PL_curcop;
e3abe207 723
acdf0a21 724 /* initialise lexer state */
8990e307 725
fb205e7a
DM
726#ifdef PERL_MAD
727 parser->curforce = -1;
728#else
729 parser->nexttoke = 0;
730#endif
ca4cfd28 731 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 732 parser->copline = NOLINE;
5afb0a62 733 parser->lex_state = LEX_NORMAL;
c2598295 734 parser->expect = XSTATE;
2f9285f8 735 parser->rsfp = rsfp;
27fcb6ee
FC
736 parser->rsfp_filters =
737 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
738 ? NULL
739 : MUTABLE_AV(SvREFCNT_inc(
740 oparser->rsfp_filters
741 ? oparser->rsfp_filters
742 : (oparser->rsfp_filters = newAV())
743 ));
2f9285f8 744
199e78b7
DM
745 Newx(parser->lex_brackstack, 120, char);
746 Newx(parser->lex_casestack, 12, char);
747 *parser->lex_casestack = '\0';
d794b522 748 Newxz(parser->lex_shared, 1, LEXSHARED);
02b34bbe 749
10efb74f 750 if (line) {
0528fd32 751 STRLEN len;
10efb74f 752 s = SvPV_const(line, len);
0abcdfa4
FC
753 parser->linestr = flags & LEX_START_COPIED
754 ? SvREFCNT_inc_simple_NN(line)
755 : newSVpvn_flags(s, len, SvUTF8(line));
11076590 756 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
757 } else {
758 parser->linestr = newSVpvs("\n;");
8990e307 759 }
f06b5848
DM
760 parser->oldoldbufptr =
761 parser->oldbufptr =
762 parser->bufptr =
763 parser->linestart = SvPVX(parser->linestr);
764 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
765 parser->last_lop = parser->last_uni = NULL;
87606032
NC
766 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
767 |LEX_DONT_CLOSE_RSFP);
737c24fc 768
60d63348 769 parser->in_pod = parser->filtered = 0;
79072805 770}
a687059c 771
e3abe207
DM
772
773/* delete a parser object */
774
775void
776Perl_parser_free(pTHX_ const yy_parser *parser)
777{
7918f24d
NC
778 PERL_ARGS_ASSERT_PARSER_FREE;
779
7c4baf47 780 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
781 SvREFCNT_dec(parser->linestr);
782
87606032 783 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
2f9285f8 784 PerlIO_clearerr(parser->rsfp);
799361c3
SH
785 else if (parser->rsfp && (!parser->old_parser ||
786 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 787 PerlIO_close(parser->rsfp);
5486870f 788 SvREFCNT_dec(parser->rsfp_filters);
10002bc1
FC
789 SvREFCNT_dec(parser->lex_stuff);
790 SvREFCNT_dec(parser->sublex_info.repl);
3ac7ff8f
FC
791
792 Safefree(parser->lex_brackstack);
793 Safefree(parser->lex_casestack);
794 Safefree(parser->lex_shared);
795 PL_parser = parser->old_parser;
796 Safefree(parser);
797}
798
799void
800Perl_parser_free_nexttoke_ops(pTHX_ yy_parser *parser, OPSLAB *slab)
801{
802#ifdef PERL_MAD
803 I32 nexttoke = parser->lasttoke;
804#else
805 I32 nexttoke = parser->nexttoke;
806#endif
807 PERL_ARGS_ASSERT_PARSER_FREE_NEXTTOKE_OPS;
3ce3dcd9
FC
808 while (nexttoke--) {
809#ifdef PERL_MAD
810 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
3ac7ff8f
FC
811 & 0xffff)
812 && parser->nexttoke[nexttoke].next_val.opval
813 && parser->nexttoke[nexttoke].next_val.opval->op_slabbed
814 && OpSLAB(parser->nexttoke[nexttoke].next_val.opval) == slab) {
815 op_free(parser->nexttoke[nexttoke].next_val.opval);
816 parser->nexttoke[nexttoke].next_val.opval = NULL;
817 }
3ce3dcd9 818#else
3ac7ff8f
FC
819 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff)
820 && parser->nextval[nexttoke].opval
821 && parser->nextval[nexttoke].opval->op_slabbed
822 && OpSLAB(parser->nextval[nexttoke].opval) == slab) {
3ce3dcd9 823 op_free(parser->nextval[nexttoke].opval);
3ac7ff8f
FC
824 parser->nextval[nexttoke].opval = NULL;
825 }
3ce3dcd9
FC
826#endif
827 }
e3abe207
DM
828}
829
830
ffb4593c 831/*
f0e67a1d
Z
832=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
833
834Buffer scalar containing the chunk currently under consideration of the
835text currently being lexed. This is always a plain string scalar (for
836which C<SvPOK> is true). It is not intended to be used as a scalar by
837normal scalar means; instead refer to the buffer directly by the pointer
838variables described below.
839
840The lexer maintains various C<char*> pointers to things in the
841C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
842reallocated, all of these pointers must be updated. Don't attempt to
843do this manually, but rather use L</lex_grow_linestr> if you need to
844reallocate the buffer.
845
846The content of the text chunk in the buffer is commonly exactly one
847complete line of input, up to and including a newline terminator,
848but there are situations where it is otherwise. The octets of the
849buffer may be intended to be interpreted as either UTF-8 or Latin-1.
850The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
851flag on this scalar, which may disagree with it.
852
853For direct examination of the buffer, the variable
854L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
855lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
856of these pointers is usually preferable to examination of the scalar
857through normal scalar means.
858
859=for apidoc AmxU|char *|PL_parser-E<gt>bufend
860
861Direct pointer to the end of the chunk of text currently being lexed, the
862end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
863+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
864always located at the end of the buffer, and does not count as part of
865the buffer's contents.
866
867=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
868
869Points to the current position of lexing inside the lexer buffer.
870Characters around this point may be freely examined, within
871the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
872L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
873interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
874
875Lexing code (whether in the Perl core or not) moves this pointer past
876the characters that it consumes. It is also expected to perform some
877bookkeeping whenever a newline character is consumed. This movement
878can be more conveniently performed by the function L</lex_read_to>,
879which handles newlines appropriately.
880
881Interpretation of the buffer's octets can be abstracted out by
882using the slightly higher-level functions L</lex_peek_unichar> and
883L</lex_read_unichar>.
884
885=for apidoc AmxU|char *|PL_parser-E<gt>linestart
886
887Points to the start of the current line inside the lexer buffer.
888This is useful for indicating at which column an error occurred, and
889not much else. This must be updated by any lexing code that consumes
890a newline; the function L</lex_read_to> handles this detail.
891
892=cut
893*/
894
895/*
896=for apidoc Amx|bool|lex_bufutf8
897
898Indicates whether the octets in the lexer buffer
899(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
900of Unicode characters. If not, they should be interpreted as Latin-1
901characters. This is analogous to the C<SvUTF8> flag for scalars.
902
903In UTF-8 mode, it is not guaranteed that the lexer buffer actually
904contains valid UTF-8. Lexing code must be robust in the face of invalid
905encoding.
906
907The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
908is significant, but not the whole story regarding the input character
909encoding. Normally, when a file is being read, the scalar contains octets
910and its C<SvUTF8> flag is off, but the octets should be interpreted as
911UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
912however, the scalar may have the C<SvUTF8> flag on, and in this case its
913octets should be interpreted as UTF-8 unless the C<use bytes> pragma
914is in effect. This logic may change in the future; use this function
915instead of implementing the logic yourself.
916
917=cut
918*/
919
920bool
921Perl_lex_bufutf8(pTHX)
922{
923 return UTF;
924}
925
926/*
927=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
928
929Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
930at least I<len> octets (including terminating NUL). Returns a
931pointer to the reallocated buffer. This is necessary before making
932any direct modification of the buffer that would increase its length.
933L</lex_stuff_pvn> provides a more convenient way to insert text into
934the buffer.
935
936Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
937this function updates all of the lexer's variables that point directly
938into the buffer.
939
940=cut
941*/
942
943char *
944Perl_lex_grow_linestr(pTHX_ STRLEN len)
945{
946 SV *linestr;
947 char *buf;
948 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
c7641931 949 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
f0e67a1d
Z
950 linestr = PL_parser->linestr;
951 buf = SvPVX(linestr);
952 if (len <= SvLEN(linestr))
953 return buf;
954 bufend_pos = PL_parser->bufend - buf;
955 bufptr_pos = PL_parser->bufptr - buf;
956 oldbufptr_pos = PL_parser->oldbufptr - buf;
957 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
958 linestart_pos = PL_parser->linestart - buf;
959 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
960 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3328ab5a
FC
961 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
962 PL_parser->lex_shared->re_eval_start - buf : 0;
c7641931 963
f0e67a1d 964 buf = sv_grow(linestr, len);
c7641931 965
f0e67a1d
Z
966 PL_parser->bufend = buf + bufend_pos;
967 PL_parser->bufptr = buf + bufptr_pos;
968 PL_parser->oldbufptr = buf + oldbufptr_pos;
969 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
970 PL_parser->linestart = buf + linestart_pos;
971 if (PL_parser->last_uni)
972 PL_parser->last_uni = buf + last_uni_pos;
973 if (PL_parser->last_lop)
974 PL_parser->last_lop = buf + last_lop_pos;
3328ab5a
FC
975 if (PL_parser->lex_shared->re_eval_start)
976 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
f0e67a1d
Z
977 return buf;
978}
979
980/*
83aa740e 981=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
982
983Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
984immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
985reallocating the buffer if necessary. This means that lexing code that
986runs later will see the characters as if they had appeared in the input.
987It is not recommended to do this as part of normal parsing, and most
988uses of this facility run the risk of the inserted characters being
989interpreted in an unintended manner.
990
991The string to be inserted is represented by I<len> octets starting
992at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
993according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
994The characters are recoded for the lexer buffer, according to how the
995buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 996to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
997function is more convenient.
998
999=cut
1000*/
1001
1002void
83aa740e 1003Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 1004{
749123ff 1005 dVAR;
f0e67a1d
Z
1006 char *bufptr;
1007 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
1008 if (flags & ~(LEX_STUFF_UTF8))
1009 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
1010 if (UTF) {
1011 if (flags & LEX_STUFF_UTF8) {
1012 goto plain_copy;
1013 } else {
54d004e8 1014 STRLEN highhalf = 0; /* Count of variants */
83aa740e 1015 const char *p, *e = pv+len;
54d004e8
KW
1016 for (p = pv; p != e; p++) {
1017 if (! UTF8_IS_INVARIANT(*p)) {
1018 highhalf++;
1019 }
1020 }
f0e67a1d
Z
1021 if (!highhalf)
1022 goto plain_copy;
1023 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1024 bufptr = PL_parser->bufptr;
1025 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1026 SvCUR_set(PL_parser->linestr,
1027 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
1028 PL_parser->bufend += len+highhalf;
1029 for (p = pv; p != e; p++) {
1030 U8 c = (U8)*p;
54d004e8
KW
1031 if (! UTF8_IS_INVARIANT(c)) {
1032 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1033 *bufptr++ = UTF8_TWO_BYTE_LO(c);
f0e67a1d
Z
1034 } else {
1035 *bufptr++ = (char)c;
1036 }
1037 }
1038 }
1039 } else {
1040 if (flags & LEX_STUFF_UTF8) {
1041 STRLEN highhalf = 0;
83aa740e 1042 const char *p, *e = pv+len;
f0e67a1d
Z
1043 for (p = pv; p != e; p++) {
1044 U8 c = (U8)*p;
54d004e8 1045 if (UTF8_IS_ABOVE_LATIN1(c)) {
f0e67a1d
Z
1046 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1047 "non-Latin-1 character into Latin-1 input");
54d004e8 1048 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
f0e67a1d
Z
1049 p++;
1050 highhalf++;
54d004e8 1051 } else if (! UTF8_IS_INVARIANT(c)) {
f0e67a1d
Z
1052 /* malformed UTF-8 */
1053 ENTER;
1054 SAVESPTR(PL_warnhook);
1055 PL_warnhook = PERL_WARNHOOK_FATAL;
1056 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1057 LEAVE;
1058 }
1059 }
1060 if (!highhalf)
1061 goto plain_copy;
1062 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1063 bufptr = PL_parser->bufptr;
1064 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1065 SvCUR_set(PL_parser->linestr,
1066 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d 1067 PL_parser->bufend += len-highhalf;
54d004e8
KW
1068 p = pv;
1069 while (p < e) {
1070 if (UTF8_IS_INVARIANT(*p)) {
1071 *bufptr++ = *p;
1072 p++;
f0e67a1d 1073 }
54d004e8
KW
1074 else {
1075 assert(p < e -1 );
1076 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1077 p += 2;
1078 }
f0e67a1d
Z
1079 }
1080 } else {
54d004e8 1081 plain_copy:
f0e67a1d
Z
1082 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1083 bufptr = PL_parser->bufptr;
1084 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1085 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1086 PL_parser->bufend += len;
1087 Copy(pv, bufptr, len, char);
1088 }
1089 }
1090}
1091
1092/*
9dcc53ea
Z
1093=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1094
1095Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1096immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1097reallocating the buffer if necessary. This means that lexing code that
1098runs later will see the characters as if they had appeared in the input.
1099It is not recommended to do this as part of normal parsing, and most
1100uses of this facility run the risk of the inserted characters being
1101interpreted in an unintended manner.
1102
1103The string to be inserted is represented by octets starting at I<pv>
1104and continuing to the first nul. These octets are interpreted as either
1105UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1106in I<flags>. The characters are recoded for the lexer buffer, according
1107to how the buffer is currently being interpreted (L</lex_bufutf8>).
1108If it is not convenient to nul-terminate a string to be inserted, the
1109L</lex_stuff_pvn> function is more appropriate.
1110
1111=cut
1112*/
1113
1114void
1115Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1116{
1117 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1118 lex_stuff_pvn(pv, strlen(pv), flags);
1119}
1120
1121/*
f0e67a1d
Z
1122=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1123
1124Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1125immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1126reallocating the buffer if necessary. This means that lexing code that
1127runs later will see the characters as if they had appeared in the input.
1128It is not recommended to do this as part of normal parsing, and most
1129uses of this facility run the risk of the inserted characters being
1130interpreted in an unintended manner.
1131
1132The string to be inserted is the string value of I<sv>. The characters
1133are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1134being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1135not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1136need to construct a scalar.
1137
1138=cut
1139*/
1140
1141void
1142Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1143{
1144 char *pv;
1145 STRLEN len;
1146 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1147 if (flags)
1148 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1149 pv = SvPV(sv, len);
1150 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1151}
1152
1153/*
1154=for apidoc Amx|void|lex_unstuff|char *ptr
1155
1156Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1157I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1158This hides the discarded text from any lexing code that runs later,
1159as if the text had never appeared.
1160
1161This is not the normal way to consume lexed text. For that, use
1162L</lex_read_to>.
1163
1164=cut
1165*/
1166
1167void
1168Perl_lex_unstuff(pTHX_ char *ptr)
1169{
1170 char *buf, *bufend;
1171 STRLEN unstuff_len;
1172 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1173 buf = PL_parser->bufptr;
1174 if (ptr < buf)
1175 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1176 if (ptr == buf)
1177 return;
1178 bufend = PL_parser->bufend;
1179 if (ptr > bufend)
1180 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1181 unstuff_len = ptr - buf;
1182 Move(ptr, buf, bufend+1-ptr, char);
1183 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1184 PL_parser->bufend = bufend - unstuff_len;
1185}
1186
1187/*
1188=for apidoc Amx|void|lex_read_to|char *ptr
1189
1190Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1191to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1192performing the correct bookkeeping whenever a newline character is passed.
1193This is the normal way to consume lexed text.
1194
1195Interpretation of the buffer's octets can be abstracted out by
1196using the slightly higher-level functions L</lex_peek_unichar> and
1197L</lex_read_unichar>.
1198
1199=cut
1200*/
1201
1202void
1203Perl_lex_read_to(pTHX_ char *ptr)
1204{
1205 char *s;
1206 PERL_ARGS_ASSERT_LEX_READ_TO;
1207 s = PL_parser->bufptr;
1208 if (ptr < s || ptr > PL_parser->bufend)
1209 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1210 for (; s != ptr; s++)
1211 if (*s == '\n') {
83944c01 1212 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1213 PL_parser->linestart = s+1;
1214 }
1215 PL_parser->bufptr = ptr;
1216}
1217
1218/*
1219=for apidoc Amx|void|lex_discard_to|char *ptr
1220
1221Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1222up to I<ptr>. The remaining content of the buffer will be moved, and
1223all pointers into the buffer updated appropriately. I<ptr> must not
1224be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1225it is not permitted to discard text that has yet to be lexed.
1226
1227Normally it is not necessarily to do this directly, because it suffices to
1228use the implicit discarding behaviour of L</lex_next_chunk> and things
1229based on it. However, if a token stretches across multiple lines,
1f317c95 1230and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1231that purpose, then after completion of the token it would be wise to
1232explicitly discard the now-unneeded earlier lines, to avoid future
1233multi-line tokens growing the buffer without bound.
1234
1235=cut
1236*/
1237
1238void
1239Perl_lex_discard_to(pTHX_ char *ptr)
1240{
1241 char *buf;
1242 STRLEN discard_len;
1243 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1244 buf = SvPVX(PL_parser->linestr);
1245 if (ptr < buf)
1246 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1247 if (ptr == buf)
1248 return;
1249 if (ptr > PL_parser->bufptr)
1250 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1251 discard_len = ptr - buf;
1252 if (PL_parser->oldbufptr < ptr)
1253 PL_parser->oldbufptr = ptr;
1254 if (PL_parser->oldoldbufptr < ptr)
1255 PL_parser->oldoldbufptr = ptr;
1256 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1257 PL_parser->last_uni = NULL;
1258 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1259 PL_parser->last_lop = NULL;
1260 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1261 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1262 PL_parser->bufend -= discard_len;
1263 PL_parser->bufptr -= discard_len;
1264 PL_parser->oldbufptr -= discard_len;
1265 PL_parser->oldoldbufptr -= discard_len;
1266 if (PL_parser->last_uni)
1267 PL_parser->last_uni -= discard_len;
1268 if (PL_parser->last_lop)
1269 PL_parser->last_lop -= discard_len;
1270}
1271
1272/*
1273=for apidoc Amx|bool|lex_next_chunk|U32 flags
1274
1275Reads in the next chunk of text to be lexed, appending it to
1276L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1277looked to the end of the current chunk and wants to know more. It is
1278usual, but not necessary, for lexing to have consumed the entirety of
1279the current chunk at this time.
1280
1281If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1282chunk (i.e., the current chunk has been entirely consumed), normally the
1283current chunk will be discarded at the same time that the new chunk is
1284read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1285will not be discarded. If the current chunk has not been entirely
1286consumed, then it will not be discarded regardless of the flag.
1287
1288Returns true if some new text was added to the buffer, or false if the
1289buffer has reached the end of the input text.
1290
1291=cut
1292*/
1293
1294#define LEX_FAKE_EOF 0x80000000
112d1284 1295#define LEX_NO_TERM 0x40000000
f0e67a1d
Z
1296
1297bool
1298Perl_lex_next_chunk(pTHX_ U32 flags)
1299{
1300 SV *linestr;
1301 char *buf;
1302 STRLEN old_bufend_pos, new_bufend_pos;
1303 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1304 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1305 bool got_some_for_debugger = 0;
f0e67a1d 1306 bool got_some;
112d1284 1307 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
f0e67a1d 1308 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1309 linestr = PL_parser->linestr;
1310 buf = SvPVX(linestr);
1311 if (!(flags & LEX_KEEP_PREVIOUS) &&
1312 PL_parser->bufptr == PL_parser->bufend) {
1313 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1314 linestart_pos = 0;
1315 if (PL_parser->last_uni != PL_parser->bufend)
1316 PL_parser->last_uni = NULL;
1317 if (PL_parser->last_lop != PL_parser->bufend)
1318 PL_parser->last_lop = NULL;
1319 last_uni_pos = last_lop_pos = 0;
1320 *buf = 0;
1321 SvCUR(linestr) = 0;
1322 } else {
1323 old_bufend_pos = PL_parser->bufend - buf;
1324 bufptr_pos = PL_parser->bufptr - buf;
1325 oldbufptr_pos = PL_parser->oldbufptr - buf;
1326 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1327 linestart_pos = PL_parser->linestart - buf;
1328 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1329 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1330 }
1331 if (flags & LEX_FAKE_EOF) {
1332 goto eof;
60d63348 1333 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1334 got_some = 0;
1335 } else if (filter_gets(linestr, old_bufend_pos)) {
1336 got_some = 1;
17cc9359 1337 got_some_for_debugger = 1;
112d1284
FC
1338 } else if (flags & LEX_NO_TERM) {
1339 got_some = 0;
f0e67a1d 1340 } else {
580561a3
Z
1341 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1342 sv_setpvs(linestr, "");
f0e67a1d
Z
1343 eof:
1344 /* End of real input. Close filehandle (unless it was STDIN),
1345 * then add implicit termination.
1346 */
87606032 1347 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
f0e67a1d
Z
1348 PerlIO_clearerr(PL_parser->rsfp);
1349 else if (PL_parser->rsfp)
1350 (void)PerlIO_close(PL_parser->rsfp);
1351 PL_parser->rsfp = NULL;
60d63348 1352 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1353#ifdef PERL_MAD
1354 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1355 PL_faketokens = 1;
1356#endif
1357 if (!PL_in_eval && PL_minus_p) {
1358 sv_catpvs(linestr,
1359 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1360 PL_minus_n = PL_minus_p = 0;
1361 } else if (!PL_in_eval && PL_minus_n) {
1362 sv_catpvs(linestr, /*{*/";}");
1363 PL_minus_n = 0;
1364 } else
1365 sv_catpvs(linestr, ";");
1366 got_some = 1;
1367 }
1368 buf = SvPVX(linestr);
1369 new_bufend_pos = SvCUR(linestr);
1370 PL_parser->bufend = buf + new_bufend_pos;
1371 PL_parser->bufptr = buf + bufptr_pos;
1372 PL_parser->oldbufptr = buf + oldbufptr_pos;
1373 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1374 PL_parser->linestart = buf + linestart_pos;
1375 if (PL_parser->last_uni)
1376 PL_parser->last_uni = buf + last_uni_pos;
1377 if (PL_parser->last_lop)
1378 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1379 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1380 PL_curstash != PL_debstash) {
1381 /* debugger active and we're not compiling the debugger code,
1382 * so store the line into the debugger's array of lines
1383 */
1384 update_debugger_info(NULL, buf+old_bufend_pos,
1385 new_bufend_pos-old_bufend_pos);
1386 }
1387 return got_some;
1388}
1389
1390/*
1391=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1392
1393Looks ahead one (Unicode) character in the text currently being lexed.
1394Returns the codepoint (unsigned integer value) of the next character,
1395or -1 if lexing has reached the end of the input text. To consume the
1396peeked character, use L</lex_read_unichar>.
1397
1398If the next character is in (or extends into) the next chunk of input
1399text, the next chunk will be read in. Normally the current chunk will be
1400discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1401then the current chunk will not be discarded.
1402
1403If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1404is encountered, an exception is generated.
1405
1406=cut
1407*/
1408
1409I32
1410Perl_lex_peek_unichar(pTHX_ U32 flags)
1411{
749123ff 1412 dVAR;
f0e67a1d
Z
1413 char *s, *bufend;
1414 if (flags & ~(LEX_KEEP_PREVIOUS))
1415 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1416 s = PL_parser->bufptr;
1417 bufend = PL_parser->bufend;
1418 if (UTF) {
1419 U8 head;
1420 I32 unichar;
1421 STRLEN len, retlen;
1422 if (s == bufend) {
1423 if (!lex_next_chunk(flags))
1424 return -1;
1425 s = PL_parser->bufptr;
1426 bufend = PL_parser->bufend;
1427 }
1428 head = (U8)*s;
54d004e8 1429 if (UTF8_IS_INVARIANT(head))
f0e67a1d 1430 return head;
54d004e8
KW
1431 if (UTF8_IS_START(head)) {
1432 len = UTF8SKIP(&head);
f0e67a1d
Z
1433 while ((STRLEN)(bufend-s) < len) {
1434 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1435 break;
1436 s = PL_parser->bufptr;
1437 bufend = PL_parser->bufend;
1438 }
1439 }
1440 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1441 if (retlen == (STRLEN)-1) {
1442 /* malformed UTF-8 */
1443 ENTER;
1444 SAVESPTR(PL_warnhook);
1445 PL_warnhook = PERL_WARNHOOK_FATAL;
1446 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1447 LEAVE;
1448 }
1449 return unichar;
1450 } else {
1451 if (s == bufend) {
1452 if (!lex_next_chunk(flags))
1453 return -1;
1454 s = PL_parser->bufptr;
1455 }
1456 return (U8)*s;
1457 }
1458}
1459
1460/*
1461=for apidoc Amx|I32|lex_read_unichar|U32 flags
1462
1463Reads the next (Unicode) character in the text currently being lexed.
1464Returns the codepoint (unsigned integer value) of the character read,
1465and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1466if lexing has reached the end of the input text. To non-destructively
1467examine the next character, use L</lex_peek_unichar> instead.
1468
1469If the next character is in (or extends into) the next chunk of input
1470text, the next chunk will be read in. Normally the current chunk will be
1471discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1472then the current chunk will not be discarded.
1473
1474If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1475is encountered, an exception is generated.
1476
1477=cut
1478*/
1479
1480I32
1481Perl_lex_read_unichar(pTHX_ U32 flags)
1482{
1483 I32 c;
1484 if (flags & ~(LEX_KEEP_PREVIOUS))
1485 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1486 c = lex_peek_unichar(flags);
1487 if (c != -1) {
1488 if (c == '\n')
83944c01 1489 COPLINE_INC_WITH_HERELINES;
d9018cbe
EB
1490 if (UTF)
1491 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1492 else
1493 ++(PL_parser->bufptr);
f0e67a1d
Z
1494 }
1495 return c;
1496}
1497
1498/*
1499=for apidoc Amx|void|lex_read_space|U32 flags
1500
1501Reads optional spaces, in Perl style, in the text currently being
1502lexed. The spaces may include ordinary whitespace characters and
1503Perl-style comments. C<#line> directives are processed if encountered.
1504L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1505at a non-space character (or the end of the input text).
1506
1507If spaces extend into the next chunk of input text, the next chunk will
1508be read in. Normally the current chunk will be discarded at the same
1509time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1510chunk will not be discarded.
1511
1512=cut
1513*/
1514
21791330 1515#define LEX_NO_INCLINE 0x40000000
f0998909
Z
1516#define LEX_NO_NEXT_CHUNK 0x80000000
1517
f0e67a1d
Z
1518void
1519Perl_lex_read_space(pTHX_ U32 flags)
1520{
1521 char *s, *bufend;
21791330 1522 const bool can_incline = !(flags & LEX_NO_INCLINE);
f0e67a1d 1523 bool need_incline = 0;
21791330 1524 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK|LEX_NO_INCLINE))
f0e67a1d
Z
1525 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1526#ifdef PERL_MAD
1527 if (PL_skipwhite) {
1528 sv_free(PL_skipwhite);
1529 PL_skipwhite = NULL;
1530 }
1531 if (PL_madskills)
1532 PL_skipwhite = newSVpvs("");
1533#endif /* PERL_MAD */
1534 s = PL_parser->bufptr;
1535 bufend = PL_parser->bufend;
1536 while (1) {
1537 char c = *s;
1538 if (c == '#') {
1539 do {
1540 c = *++s;
1541 } while (!(c == '\n' || (c == 0 && s == bufend)));
1542 } else if (c == '\n') {
1543 s++;
21791330
FC
1544 if (can_incline) {
1545 PL_parser->linestart = s;
1546 if (s == bufend)
1547 need_incline = 1;
1548 else
1549 incline(s);
1550 }
f0e67a1d
Z
1551 } else if (isSPACE(c)) {
1552 s++;
1553 } else if (c == 0 && s == bufend) {
1554 bool got_more;
1555#ifdef PERL_MAD
1556 if (PL_madskills)
1557 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1558#endif /* PERL_MAD */
f0998909
Z
1559 if (flags & LEX_NO_NEXT_CHUNK)
1560 break;
f0e67a1d 1561 PL_parser->bufptr = s;
21791330 1562 if (can_incline) COPLINE_INC_WITH_HERELINES;
f0e67a1d 1563 got_more = lex_next_chunk(flags);
21791330 1564 if (can_incline) CopLINE_dec(PL_curcop);
f0e67a1d
Z
1565 s = PL_parser->bufptr;
1566 bufend = PL_parser->bufend;
1567 if (!got_more)
1568 break;
21791330 1569 if (can_incline && need_incline && PL_parser->rsfp) {
f0e67a1d
Z
1570 incline(s);
1571 need_incline = 0;
1572 }
1573 } else {
1574 break;
1575 }
1576 }
1577#ifdef PERL_MAD
1578 if (PL_madskills)
1579 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1580#endif /* PERL_MAD */
1581 PL_parser->bufptr = s;
1582}
1583
1584/*
fe788d6b
PM
1585
1586=for apidoc EXMp|bool|validate_proto|SV *name|SV *proto|bool warn
1587
1588This function performs syntax checking on a prototype, C<proto>.
1589If C<warn> is true, any illegal characters or mismatched brackets
1590will trigger illegalproto warnings, declaring that they were
1591detected in the prototype for C<name>.
1592
1593The return value is C<true> if this is a valid prototype, and
1594C<false> if it is not, regardless of whether C<warn> was C<true> or
1595C<false>.
1596
1597Note that C<NULL> is a valid C<proto> and will always return C<true>.
1598
1599=cut
1600
1601 */
1602
1603bool
1604Perl_validate_proto(pTHX_ SV *name, SV *proto, bool warn)
1605{
1606 STRLEN len, origlen;
1607 char *p = proto ? SvPV(proto, len) : NULL;
1608 bool bad_proto = FALSE;
1609 bool in_brackets = FALSE;
1610 bool after_slash = FALSE;
1611 char greedy_proto = ' ';
1612 bool proto_after_greedy_proto = FALSE;
1613 bool must_be_last = FALSE;
1614 bool underscore = FALSE;
f791a21a 1615 bool bad_proto_after_underscore = FALSE;
fe788d6b
PM
1616
1617 PERL_ARGS_ASSERT_VALIDATE_PROTO;
1618
1619 if (!proto)
1620 return TRUE;
1621
1622 origlen = len;
1623 for (; len--; p++) {
1624 if (!isSPACE(*p)) {
1625 if (must_be_last)
1626 proto_after_greedy_proto = TRUE;
f791a21a
PM
1627 if (underscore) {
1628 if (!strchr(";@%", *p))
1629 bad_proto_after_underscore = TRUE;
1630 underscore = FALSE;
1631 }
fe788d6b
PM
1632 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
1633 bad_proto = TRUE;
1634 }
1635 else {
fe788d6b
PM
1636 if (*p == '[')
1637 in_brackets = TRUE;
1638 else if (*p == ']')
1639 in_brackets = FALSE;
1640 else if ((*p == '@' || *p == '%') &&
1641 !after_slash &&
1642 !in_brackets ) {
1643 must_be_last = TRUE;
1644 greedy_proto = *p;
1645 }
1646 else if (*p == '_')
f791a21a 1647 underscore = TRUE;
fe788d6b
PM
1648 }
1649 if (*p == '\\')
1650 after_slash = TRUE;
1651 else
1652 after_slash = FALSE;
1653 }
1654 }
1655
1656 if (warn) {
b54d603d 1657 SV *tmpsv = newSVpvs_flags("", SVs_TEMP);
fe788d6b 1658 p -= origlen;
b54d603d
PM
1659 p = SvUTF8(proto)
1660 ? sv_uni_display(tmpsv, newSVpvn_flags(p, origlen, SVs_TEMP | SVf_UTF8),
1661 origlen, UNI_DISPLAY_ISPRINT)
1662 : pv_pretty(tmpsv, p, origlen, 60, NULL, NULL, PERL_PV_ESCAPE_NONASCII);
1663
fe788d6b
PM
1664 if (proto_after_greedy_proto)
1665 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1666 "Prototype after '%c' for %"SVf" : %s",
1667 greedy_proto, SVfARG(name), p);
50278ed0
PM
1668 if (in_brackets)
1669 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1670 "Missing ']' in prototype for %"SVf" : %s",
1671 SVfARG(name), p);
b54d603d 1672 if (bad_proto)
fe788d6b 1673 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
f791a21a
PM
1674 "Illegal character in prototype for %"SVf" : %s",
1675 SVfARG(name), p);
1676 if (bad_proto_after_underscore)
1677 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
1678 "Illegal character after '_' in prototype for %"SVf" : %s",
1679 SVfARG(name), p);
fe788d6b
PM
1680 }
1681
1682 return (! (proto_after_greedy_proto || bad_proto) );
1683}
1684
1685/*
ffb4593c
NT
1686 * S_incline
1687 * This subroutine has nothing to do with tilting, whether at windmills
1688 * or pinball tables. Its name is short for "increment line". It
57843af0 1689 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1690 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1691 * # line 500 "foo.pm"
1692 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1693 */
1694
76e3520e 1695STATIC void
d9095cec 1696S_incline(pTHX_ const char *s)
463ee0b2 1697{
97aff369 1698 dVAR;
d9095cec
NC
1699 const char *t;
1700 const char *n;
1701 const char *e;
8818d409 1702 line_t line_num;
463ee0b2 1703
7918f24d
NC
1704 PERL_ARGS_ASSERT_INCLINE;
1705
83944c01 1706 COPLINE_INC_WITH_HERELINES;
451f421f
FC
1707 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1708 && s+1 == PL_bufend && *s == ';') {
1709 /* fake newline in string eval */
1710 CopLINE_dec(PL_curcop);
1711 return;
1712 }
463ee0b2
LW
1713 if (*s++ != '#')
1714 return;
d4c19fe8
AL
1715 while (SPACE_OR_TAB(*s))
1716 s++;
73659bf1
GS
1717 if (strnEQ(s, "line", 4))
1718 s += 4;
1719 else
1720 return;
084592ab 1721 if (SPACE_OR_TAB(*s))
73659bf1 1722 s++;
4e553d73 1723 else
73659bf1 1724 return;
d4c19fe8
AL
1725 while (SPACE_OR_TAB(*s))
1726 s++;
463ee0b2
LW
1727 if (!isDIGIT(*s))
1728 return;
d4c19fe8 1729
463ee0b2
LW
1730 n = s;
1731 while (isDIGIT(*s))
1732 s++;
07714eb4 1733 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1734 return;
bf4acbe4 1735 while (SPACE_OR_TAB(*s))
463ee0b2 1736 s++;
73659bf1 1737 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1738 s++;
73659bf1
GS
1739 e = t + 1;
1740 }
463ee0b2 1741 else {
c35e046a
AL
1742 t = s;
1743 while (!isSPACE(*t))
1744 t++;
73659bf1 1745 e = t;
463ee0b2 1746 }
bf4acbe4 1747 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1748 e++;
1749 if (*e != '\n' && *e != '\0')
1750 return; /* false alarm */
1751
8818d409
FC
1752 line_num = atoi(n)-1;
1753
f4dd75d9 1754 if (t - s > 0) {
d9095cec 1755 const STRLEN len = t - s;
3df32bda 1756
d36ee5be 1757 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1758 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1759 * to *{"::_<newfilename"} */
44867030
NC
1760 /* However, the long form of evals is only turned on by the
1761 debugger - usually they're "(eval %lu)" */
d36ee5be
FC
1762 GV * const cfgv = CopFILEGV(PL_curcop);
1763 if (cfgv) {
38bd7ad8
FC
1764 char smallbuf[128];
1765 STRLEN tmplen2 = len;
44867030 1766 char *tmpbuf2;
449dd039 1767 GV *gv2;
44867030
NC
1768
1769 if (tmplen2 + 2 <= sizeof smallbuf)
1770 tmpbuf2 = smallbuf;
1771 else
1772 Newx(tmpbuf2, tmplen2 + 2, char);
1773
38bd7ad8
FC
1774 tmpbuf2[0] = '_';
1775 tmpbuf2[1] = '<';
44867030
NC
1776
1777 memcpy(tmpbuf2 + 2, s, tmplen2);
1778 tmplen2 += 2;
1779
8a5ee598 1780 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1781 if (!isGV(gv2)) {
8a5ee598 1782 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1783 /* adjust ${"::_<newfilename"} to store the new file name */
1784 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1785 /* The line number may differ. If that is the case,
1786 alias the saved lines that are in the array.
1787 Otherwise alias the whole array. */
1788 if (CopLINE(PL_curcop) == line_num) {
38bd7ad8
FC
1789 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(cfgv)));
1790 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(cfgv)));
8818d409 1791 }
38bd7ad8
FC
1792 else if (GvAV(cfgv)) {
1793 AV * const av = GvAV(cfgv);
8818d409
FC
1794 const I32 start = CopLINE(PL_curcop)+1;
1795 I32 items = AvFILLp(av) - start;
1796 if (items > 0) {
1797 AV * const av2 = GvAVn(gv2);
1798 SV **svp = AvARRAY(av) + start;
1799 I32 l = (I32)line_num+1;
1800 while (items--)
1801 av_store(av2, l++, SvREFCNT_inc(*svp++));
1802 }
1803 }
e5527e4b 1804 }
44867030
NC
1805
1806 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
d36ee5be 1807 }
e66cf94c 1808 }
05ec9bb3 1809 CopFILE_free(PL_curcop);
449dd039 1810 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1811 }
8818d409 1812 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1813}
1814
21791330
FC
1815#define skipspace(s) skipspace_flags(s, 0)
1816
29595ff2 1817#ifdef PERL_MAD
cd81e915 1818/* skip space before PL_thistoken */
29595ff2
NC
1819
1820STATIC char *
5aaab254 1821S_skipspace0(pTHX_ char *s)
29595ff2 1822{
7918f24d
NC
1823 PERL_ARGS_ASSERT_SKIPSPACE0;
1824
29595ff2
NC
1825 s = skipspace(s);
1826 if (!PL_madskills)
1827 return s;
cd81e915
NC
1828 if (PL_skipwhite) {
1829 if (!PL_thiswhite)
6b29d1f5 1830 PL_thiswhite = newSVpvs("");
cd81e915
NC
1831 sv_catsv(PL_thiswhite, PL_skipwhite);
1832 sv_free(PL_skipwhite);
1833 PL_skipwhite = 0;
1834 }
1835 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1836 return s;
1837}
1838
cd81e915 1839/* skip space after PL_thistoken */
29595ff2
NC
1840
1841STATIC char *
5aaab254 1842S_skipspace1(pTHX_ char *s)
29595ff2 1843{
d4c19fe8 1844 const char *start = s;
29595ff2
NC
1845 I32 startoff = start - SvPVX(PL_linestr);
1846
7918f24d
NC
1847 PERL_ARGS_ASSERT_SKIPSPACE1;
1848
29595ff2
NC
1849 s = skipspace(s);
1850 if (!PL_madskills)
1851 return s;
1852 start = SvPVX(PL_linestr) + startoff;
cd81e915 1853 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1854 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1855 PL_thistoken = newSVpvn(tstart, start - tstart);
1856 }
1857 PL_realtokenstart = -1;
1858 if (PL_skipwhite) {
1859 if (!PL_nextwhite)
6b29d1f5 1860 PL_nextwhite = newSVpvs("");
cd81e915
NC
1861 sv_catsv(PL_nextwhite, PL_skipwhite);
1862 sv_free(PL_skipwhite);
1863 PL_skipwhite = 0;
29595ff2
NC
1864 }
1865 return s;
1866}
1867
1868STATIC char *
5aaab254 1869S_skipspace2(pTHX_ char *s, SV **svp)
29595ff2 1870{
c35e046a
AL
1871 char *start;
1872 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1873 const I32 startoff = s - SvPVX(PL_linestr);
1874
7918f24d
NC
1875 PERL_ARGS_ASSERT_SKIPSPACE2;
1876
29595ff2
NC
1877 s = skipspace(s);
1878 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1879 if (!PL_madskills || !svp)
1880 return s;
1881 start = SvPVX(PL_linestr) + startoff;
cd81e915 1882 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1883 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1884 PL_thistoken = newSVpvn(tstart, start - tstart);
1885 PL_realtokenstart = -1;
29595ff2 1886 }
cd81e915 1887 if (PL_skipwhite) {
29595ff2 1888 if (!*svp)
6b29d1f5 1889 *svp = newSVpvs("");
cd81e915
NC
1890 sv_setsv(*svp, PL_skipwhite);
1891 sv_free(PL_skipwhite);
1892 PL_skipwhite = 0;
29595ff2
NC
1893 }
1894
1895 return s;
1896}
1897#endif
1898
80a702cd 1899STATIC void
15f169a1 1900S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1901{
1902 AV *av = CopFILEAVx(PL_curcop);
1903 if (av) {
b9f83d2f 1904 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb 1905 if (orig_sv)
4e917a04 1906 sv_setsv_flags(sv, orig_sv, 0); /* no cow */
5fa550fb
NC
1907 else
1908 sv_setpvn(sv, buf, len);
80a702cd
RGS
1909 (void)SvIOK_on(sv);
1910 SvIV_set(sv, 0);
1911 av_store(av, (I32)CopLINE(PL_curcop), sv);
1912 }
1913}
1914
ffb4593c
NT
1915/*
1916 * S_skipspace
1917 * Called to gobble the appropriate amount and type of whitespace.
1918 * Skips comments as well.
1919 */
1920
76e3520e 1921STATIC char *
21791330 1922S_skipspace_flags(pTHX_ char *s, U32 flags)
a687059c 1923{
5db06880 1924#ifdef PERL_MAD
f0e67a1d
Z
1925 char *start = s;
1926#endif /* PERL_MAD */
21791330 1927 PERL_ARGS_ASSERT_SKIPSPACE_FLAGS;
f0e67a1d 1928#ifdef PERL_MAD
cd81e915
NC
1929 if (PL_skipwhite) {
1930 sv_free(PL_skipwhite);
f0e67a1d 1931 PL_skipwhite = NULL;
5db06880 1932 }
f0e67a1d 1933#endif /* PERL_MAD */
3280af22 1934 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1935 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1936 s++;
f0e67a1d
Z
1937 } else {
1938 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1939 PL_bufptr = s;
21791330 1940 lex_read_space(flags | LEX_KEEP_PREVIOUS |
f0998909
Z
1941 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1942 LEX_NO_NEXT_CHUNK : 0));
3280af22 1943 s = PL_bufptr;
f0e67a1d
Z
1944 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1945 if (PL_linestart > PL_bufptr)
1946 PL_bufptr = PL_linestart;
1947 return s;
463ee0b2 1948 }
5db06880 1949#ifdef PERL_MAD
f0e67a1d
Z
1950 if (PL_madskills)
1951 PL_skipwhite = newSVpvn(start, s-start);
1952#endif /* PERL_MAD */
5db06880 1953 return s;
a687059c 1954}
378cc40b 1955
ffb4593c
NT
1956/*
1957 * S_check_uni
1958 * Check the unary operators to ensure there's no ambiguity in how they're
1959 * used. An ambiguous piece of code would be:
1960 * rand + 5
1961 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1962 * the +5 is its argument.
1963 */
1964
76e3520e 1965STATIC void
cea2e8a9 1966S_check_uni(pTHX)
ba106d47 1967{
97aff369 1968 dVAR;
d4c19fe8
AL
1969 const char *s;
1970 const char *t;
2f3197b3 1971
3280af22 1972 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1973 return;
3280af22
NIS
1974 while (isSPACE(*PL_last_uni))
1975 PL_last_uni++;
c35e046a 1976 s = PL_last_uni;
8a2bca12 1977 while (isWORDCHAR_lazy_if(s,UTF) || *s == '-')
c35e046a 1978 s++;
3280af22 1979 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1980 return;
6136c704 1981
9b387841
NC
1982 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1983 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1984 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1985}
1986
ffb4593c
NT
1987/*
1988 * LOP : macro to build a list operator. Its behaviour has been replaced
1989 * with a subroutine, S_lop() for which LOP is just another name.
1990 */
1991
a0d0e21e
LW
1992#define LOP(f,x) return lop(f,x,s)
1993
ffb4593c
NT
1994/*
1995 * S_lop
1996 * Build a list operator (or something that might be one). The rules:
1997 * - if we have a next token, then it's a list operator [why?]
1998 * - if the next thing is an opening paren, then it's a function
1999 * - else it's a list operator
2000 */
2001
76e3520e 2002STATIC I32
a0be28da 2003S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 2004{
97aff369 2005 dVAR;
7918f24d
NC
2006
2007 PERL_ARGS_ASSERT_LOP;
2008
6154021b 2009 pl_yylval.ival = f;
35c8bce7 2010 CLINE;
3280af22
NIS
2011 PL_expect = x;
2012 PL_bufptr = s;
2013 PL_last_lop = PL_oldbufptr;
eb160463 2014 PL_last_lop_op = (OPCODE)f;
5db06880
NC
2015#ifdef PERL_MAD
2016 if (PL_lasttoke)
78cdf107 2017 goto lstop;
5db06880 2018#else
3280af22 2019 if (PL_nexttoke)
78cdf107 2020 goto lstop;
5db06880 2021#endif
79072805 2022 if (*s == '(')
bbf60fe6 2023 return REPORT(FUNC);
29595ff2 2024 s = PEEKSPACE(s);
79072805 2025 if (*s == '(')
bbf60fe6 2026 return REPORT(FUNC);
78cdf107
Z
2027 else {
2028 lstop:
2029 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
2030 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 2031 return REPORT(LSTOP);
78cdf107 2032 }
79072805
LW
2033}
2034
5db06880
NC
2035#ifdef PERL_MAD
2036 /*
2037 * S_start_force
2038 * Sets up for an eventual force_next(). start_force(0) basically does
2039 * an unshift, while start_force(-1) does a push. yylex removes items
2040 * on the "pop" end.
2041 */
2042
2043STATIC void
2044S_start_force(pTHX_ int where)
2045{
2046 int i;
2047
cd81e915 2048 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 2049 where = PL_lasttoke;
cd81e915
NC
2050 assert(PL_curforce < 0 || PL_curforce == where);
2051 if (PL_curforce != where) {
5db06880
NC
2052 for (i = PL_lasttoke; i > where; --i) {
2053 PL_nexttoke[i] = PL_nexttoke[i-1];
2054 }
2055 PL_lasttoke++;
2056 }
cd81e915 2057 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 2058 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
2059 PL_curforce = where;
2060 if (PL_nextwhite) {
5db06880 2061 if (PL_madskills)
6b29d1f5 2062 curmad('^', newSVpvs(""));
cd81e915 2063 CURMAD('_', PL_nextwhite);
5db06880
NC
2064 }
2065}
2066
2067STATIC void
2068S_curmad(pTHX_ char slot, SV *sv)
2069{
2070 MADPROP **where;
2071
2072 if (!sv)
2073 return;
cd81e915
NC
2074 if (PL_curforce < 0)
2075 where = &PL_thismad;
5db06880 2076 else
cd81e915 2077 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 2078
cd81e915 2079 if (PL_faketokens)
76f68e9b 2080 sv_setpvs(sv, "");
5db06880
NC
2081 else {
2082 if (!IN_BYTES) {
2083 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
2084 SvUTF8_on(sv);
2085 else if (PL_encoding) {
2086 sv_recode_to_utf8(sv, PL_encoding);
2087 }
2088 }
2089 }
2090
2091 /* keep a slot open for the head of the list? */
2092 if (slot != '_' && *where && (*where)->mad_key == '^') {
2093 (*where)->mad_key = slot;
daba3364 2094 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
2095 (*where)->mad_val = (void*)sv;
2096 }
2097 else
2098 addmad(newMADsv(slot, sv), where, 0);
2099}
2100#else
b3f24c00
MHM
2101# define start_force(where) NOOP
2102# define curmad(slot, sv) NOOP
5db06880
NC
2103#endif
2104
ffb4593c
NT
2105/*
2106 * S_force_next
9cbb5ea2 2107 * When the lexer realizes it knows the next token (for instance,
ffb4593c 2108 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 2109 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
2110 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2111 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
2112 */
2113
4e553d73 2114STATIC void
cea2e8a9 2115S_force_next(pTHX_ I32 type)
79072805 2116{
97aff369 2117 dVAR;
704d4215
GG
2118#ifdef DEBUGGING
2119 if (DEBUG_T_TEST) {
2120 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 2121 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
2122 }
2123#endif
5db06880 2124#ifdef PERL_MAD
cd81e915 2125 if (PL_curforce < 0)
5db06880 2126 start_force(PL_lasttoke);
cd81e915 2127 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
2128 if (PL_lex_state != LEX_KNOWNEXT)
2129 PL_lex_defer = PL_lex_state;
2130 PL_lex_state = LEX_KNOWNEXT;
2131 PL_lex_expect = PL_expect;
cd81e915 2132 PL_curforce = -1;
5db06880 2133#else
3280af22
NIS
2134 PL_nexttype[PL_nexttoke] = type;
2135 PL_nexttoke++;
2136 if (PL_lex_state != LEX_KNOWNEXT) {
2137 PL_lex_defer = PL_lex_state;
2138 PL_lex_expect = PL_expect;
2139 PL_lex_state = LEX_KNOWNEXT;
79072805 2140 }
5db06880 2141#endif
79072805
LW
2142}
2143
28ac2b49
Z
2144void
2145Perl_yyunlex(pTHX)
2146{
a7aaec61
Z
2147 int yyc = PL_parser->yychar;
2148 if (yyc != YYEMPTY) {
2149 if (yyc) {
2150 start_force(-1);
2151 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2152 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2153 PL_lex_allbrackets--;
a7aaec61 2154 PL_lex_brackets--;
78cdf107
Z
2155 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2156 } else if (yyc == '('/*)*/) {
2157 PL_lex_allbrackets--;
2158 yyc |= (2<<24);
a7aaec61
Z
2159 }
2160 force_next(yyc);
2161 }
28ac2b49
Z
2162 PL_parser->yychar = YYEMPTY;
2163 }
2164}
2165
d0a148a6 2166STATIC SV *
15f169a1 2167S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2168{
97aff369 2169 dVAR;
740cce10 2170 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2171 !IN_BYTES
2172 && UTF
2173 && !is_ascii_string((const U8*)start, len)
740cce10 2174 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2175 return sv;
2176}
2177
ffb4593c
NT
2178/*
2179 * S_force_word
2180 * When the lexer knows the next thing is a word (for instance, it has
2181 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2182 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2183 * lookahead.
ffb4593c
NT
2184 *
2185 * Arguments:
b1b65b59 2186 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2187 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2188 * int check_keyword : if true, Perl checks to make sure the word isn't
2189 * a keyword (do this if the word is a label, e.g. goto FOO)
2190 * int allow_pack : if true, : characters will also be allowed (require,
2191 * use, etc. do this)
9cbb5ea2 2192 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2193 */
2194
76e3520e 2195STATIC char *
345b3785 2196S_force_word(pTHX_ char *start, int token, int check_keyword, int allow_pack)
79072805 2197{
97aff369 2198 dVAR;
eb578fdb 2199 char *s;
463ee0b2 2200 STRLEN len;
4e553d73 2201
7918f24d
NC
2202 PERL_ARGS_ASSERT_FORCE_WORD;
2203
29595ff2 2204 start = SKIPSPACE1(start);
463ee0b2 2205 s = start;
7e2040f0 2206 if (isIDFIRST_lazy_if(s,UTF) ||
345b3785 2207 (allow_pack && *s == ':') )
a0d0e21e 2208 {
3280af22 2209 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
01b5ef50
FC
2210 if (check_keyword) {
2211 char *s2 = PL_tokenbuf;
2212 if (allow_pack && len > 6 && strnEQ(s2, "CORE::", 6))
2213 s2 += 6, len -= 6;
2214 if (keyword(s2, len, 0))
463ee0b2 2215 return start;
01b5ef50 2216 }
cd81e915 2217 start_force(PL_curforce);
5db06880
NC
2218 if (PL_madskills)
2219 curmad('X', newSVpvn(start,s-start));
463ee0b2 2220 if (token == METHOD) {
29595ff2 2221 s = SKIPSPACE1(s);
463ee0b2 2222 if (*s == '(')
3280af22 2223 PL_expect = XTERM;
463ee0b2 2224 else {
3280af22 2225 PL_expect = XOPERATOR;
463ee0b2 2226 }
79072805 2227 }
e74e6b3d 2228 if (PL_madskills)
63575281 2229 curmad('g', newSVpvs( "forced" ));
9ded7720 2230 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2231 = (OP*)newSVOP(OP_CONST,0,
2232 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2233 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2234 force_next(token);
2235 }
2236 return s;
2237}
2238
ffb4593c
NT
2239/*
2240 * S_force_ident
9cbb5ea2 2241 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2242 * text only contains the "foo" portion. The first argument is a pointer
2243 * to the "foo", and the second argument is the type symbol to prefix.
2244 * Forces the next token to be a "WORD".
9cbb5ea2 2245 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2246 */
2247
76e3520e 2248STATIC void
5aaab254 2249S_force_ident(pTHX_ const char *s, int kind)
79072805 2250{
97aff369 2251 dVAR;
7918f24d
NC
2252
2253 PERL_ARGS_ASSERT_FORCE_IDENT;
2254
c9b48522
DD
2255 if (s[0]) {
2256 const STRLEN len = s[1] ? strlen(s) : 1; /* s = "\"" see yylex */
728847b1
BF
2257 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2258 UTF ? SVf_UTF8 : 0));
cd81e915 2259 start_force(PL_curforce);
9ded7720 2260 NEXTVAL_NEXTTOKE.opval = o;
79072805 2261 force_next(WORD);
748a9306 2262 if (kind) {
11343788 2263 o->op_private = OPpCONST_ENTERED;
55497cff 2264 /* XXX see note in pp_entereval() for why we forgo typo
2265 warnings if the symbol must be introduced in an eval.
2266 GSAR 96-10-12 */
90e5519e 2267 gv_fetchpvn_flags(s, len,
728847b1
BF
2268 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2269 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2270 kind == '$' ? SVt_PV :
2271 kind == '@' ? SVt_PVAV :
2272 kind == '%' ? SVt_PVHV :
a0d0e21e 2273 SVt_PVGV
90e5519e 2274 );
748a9306 2275 }
79072805
LW
2276 }
2277}
2278
3f33d153
FC
2279static void
2280S_force_ident_maybe_lex(pTHX_ char pit)
2281{
2282 start_force(PL_curforce);
2283 NEXTVAL_NEXTTOKE.ival = pit;
2284 force_next('p');
2285}
2286
1571675a
GS
2287NV
2288Perl_str_to_version(pTHX_ SV *sv)
2289{
2290 NV retval = 0.0;
2291 NV nshift = 1.0;
2292 STRLEN len;
cfd0369c 2293 const char *start = SvPV_const(sv,len);
9d4ba2ae 2294 const char * const end = start + len;
504618e9 2295 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2296
2297 PERL_ARGS_ASSERT_STR_TO_VERSION;
2298
1571675a 2299 while (start < end) {
ba210ebe 2300 STRLEN skip;
1571675a
GS
2301 UV n;
2302 if (utf)
9041c2e3 2303 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2304 else {
2305 n = *(U8*)start;
2306 skip = 1;
2307 }
2308 retval += ((NV)n)/nshift;
2309 start += skip;
2310 nshift *= 1000;
2311 }
2312 return retval;
2313}
2314
4e553d73 2315/*
ffb4593c
NT
2316 * S_force_version
2317 * Forces the next token to be a version number.
e759cc13
RGS
2318 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2319 * and if "guessing" is TRUE, then no new token is created (and the caller
2320 * must use an alternative parsing method).
ffb4593c
NT
2321 */
2322
76e3520e 2323STATIC char *
e759cc13 2324S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2325{
97aff369 2326 dVAR;
5f66b61c 2327 OP *version = NULL;
44dcb63b 2328 char *d;
5db06880
NC
2329#ifdef PERL_MAD
2330 I32 startoff = s - SvPVX(PL_linestr);
2331#endif
89bfa8cd 2332
7918f24d
NC
2333 PERL_ARGS_ASSERT_FORCE_VERSION;
2334
29595ff2 2335 s = SKIPSPACE1(s);
89bfa8cd 2336
44dcb63b 2337 d = s;
dd629d5b 2338 if (*d == 'v')
44dcb63b 2339 d++;
44dcb63b 2340 if (isDIGIT(*d)) {
e759cc13
RGS
2341 while (isDIGIT(*d) || *d == '_' || *d == '.')
2342 d++;
5db06880
NC
2343#ifdef PERL_MAD
2344 if (PL_madskills) {
cd81e915 2345 start_force(PL_curforce);
5db06880
NC
2346 curmad('X', newSVpvn(s,d-s));
2347 }
2348#endif
4e4da3ac 2349 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2350 SV *ver;
8d08d9ba 2351#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2352 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2353 setlocale(LC_NUMERIC, "C");
8d08d9ba 2354#endif
6154021b 2355 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2356#ifdef USE_LOCALE_NUMERIC
2357 setlocale(LC_NUMERIC, loc);
909d3787 2358 Safefree(loc);
8d08d9ba 2359#endif
6154021b 2360 version = pl_yylval.opval;
dd629d5b
GS
2361 ver = cSVOPx(version)->op_sv;
2362 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2363 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2364 SvNV_set(ver, str_to_version(ver));
1571675a 2365 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2366 }
89bfa8cd 2367 }
5db06880
NC
2368 else if (guessing) {
2369#ifdef PERL_MAD
2370 if (PL_madskills) {
cd81e915
NC
2371 sv_free(PL_nextwhite); /* let next token collect whitespace */
2372 PL_nextwhite = 0;
5db06880
NC
2373 s = SvPVX(PL_linestr) + startoff;
2374 }
2375#endif
e759cc13 2376 return s;
5db06880 2377 }
89bfa8cd 2378 }
2379
5db06880
NC
2380#ifdef PERL_MAD
2381 if (PL_madskills && !version) {
cd81e915
NC
2382 sv_free(PL_nextwhite); /* let next token collect whitespace */
2383 PL_nextwhite = 0;
5db06880
NC
2384 s = SvPVX(PL_linestr) + startoff;
2385 }
2386#endif
89bfa8cd 2387 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2388 start_force(PL_curforce);
9ded7720 2389 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2390 force_next(WORD);
89bfa8cd 2391
e759cc13 2392 return s;
89bfa8cd 2393}
2394
ffb4593c 2395/*
91152fc1
DG
2396 * S_force_strict_version
2397 * Forces the next token to be a version number using strict syntax rules.
2398 */
2399
2400STATIC char *
2401S_force_strict_version(pTHX_ char *s)
2402{
2403 dVAR;
2404 OP *version = NULL;
2405#ifdef PERL_MAD
2406 I32 startoff = s - SvPVX(PL_linestr);
2407#endif
2408 const char *errstr = NULL;
2409
2410 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2411
2412 while (isSPACE(*s)) /* leading whitespace */
2413 s++;
2414
2415 if (is_STRICT_VERSION(s,&errstr)) {
2416 SV *ver = newSV(0);
2417 s = (char *)scan_version(s, ver, 0);
2418 version = newSVOP(OP_CONST, 0, ver);
2419 }
4e4da3ac
Z
2420 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2421 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2422 {
91152fc1
DG
2423 PL_bufptr = s;
2424 if (errstr)
2425 yyerror(errstr); /* version required */
2426 return s;
2427 }
2428
2429#ifdef PERL_MAD
2430 if (PL_madskills && !version) {
2431 sv_free(PL_nextwhite); /* let next token collect whitespace */
2432 PL_nextwhite = 0;
2433 s = SvPVX(PL_linestr) + startoff;
2434 }
2435#endif
2436 /* NOTE: The parser sees the package name and the VERSION swapped */
2437 start_force(PL_curforce);
2438 NEXTVAL_NEXTTOKE.opval = version;
2439 force_next(WORD);
2440
2441 return s;
2442}
2443
2444/*
ffb4593c
NT
2445 * S_tokeq
2446 * Tokenize a quoted string passed in as an SV. It finds the next
2447 * chunk, up to end of string or a backslash. It may make a new
2448 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2449 * turns \\ into \.
2450 */
2451
76e3520e 2452STATIC SV *
cea2e8a9 2453S_tokeq(pTHX_ SV *sv)
79072805 2454{
97aff369 2455 dVAR;
eb578fdb
KW
2456 char *s;
2457 char *send;
2458 char *d;
b3ac6de7
IZ
2459 STRLEN len = 0;
2460 SV *pv = sv;
79072805 2461
7918f24d
NC
2462 PERL_ARGS_ASSERT_TOKEQ;
2463
79072805 2464 if (!SvLEN(sv))
b3ac6de7 2465 goto finish;
79072805 2466
a0d0e21e 2467 s = SvPV_force(sv, len);
21a311ee 2468 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2469 goto finish;
463ee0b2 2470 send = s + len;
dcb21ed6
NC
2471 /* This is relying on the SV being "well formed" with a trailing '\0' */
2472 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2473 s++;
2474 if (s == send)
b3ac6de7 2475 goto finish;
79072805 2476 d = s;
be4731d2 2477 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2478 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2479 }
79072805
LW
2480 while (s < send) {
2481 if (*s == '\\') {
a0d0e21e 2482 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2483 s++; /* all that, just for this */
2484 }
2485 *d++ = *s++;
2486 }
2487 *d = '\0';
95a20fc0 2488 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2489 finish:
3280af22 2490 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2491 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2492 return sv;
2493}
2494
ffb4593c
NT
2495/*
2496 * Now come three functions related to double-quote context,
2497 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2498 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2499 * interact with PL_lex_state, and create fake ( ... ) argument lists
2500 * to handle functions and concatenation.
ecd24171
DM
2501 * For example,
2502 * "foo\lbar"
2503 * is tokenised as
2504 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
ffb4593c
NT
2505 */
2506
2507/*
2508 * S_sublex_start
6154021b 2509 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2510 *
2511 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2512 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2513 *
2514 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2515 *
2516 * Everything else becomes a FUNC.
2517 *
2518 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2519 * had an OP_CONST or OP_READLINE). This just sets us up for a
2520 * call to S_sublex_push().
2521 */
2522
76e3520e 2523STATIC I32
cea2e8a9 2524S_sublex_start(pTHX)
79072805 2525{
97aff369 2526 dVAR;
eb578fdb 2527 const I32 op_type = pl_yylval.ival;
79072805
LW
2528
2529 if (op_type == OP_NULL) {
6154021b 2530 pl_yylval.opval = PL_lex_op;
5f66b61c 2531 PL_lex_op = NULL;
79072805
LW
2532 return THING;
2533 }
2534 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2535 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2536
2537 if (SvTYPE(sv) == SVt_PVIV) {
2538 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2539 STRLEN len;
96a5add6 2540 const char * const p = SvPV_const(sv, len);
740cce10 2541 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2542 SvREFCNT_dec(sv);
2543 sv = nsv;
4e553d73 2544 }
6154021b 2545 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2546 PL_lex_stuff = NULL;
6f33ba73
RGS
2547 /* Allow <FH> // "foo" */
2548 if (op_type == OP_READLINE)
2549 PL_expect = XTERMORDORDOR;
79072805
LW
2550 return THING;
2551 }
e3f73d4e 2552 else if (op_type == OP_BACKTICK && PL_lex_op) {
06df4f41 2553 /* readpipe() was overridden */
e3f73d4e 2554 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2555 pl_yylval.opval = PL_lex_op;
9b201d7d 2556 PL_lex_op = NULL;
e3f73d4e
RGS
2557 PL_lex_stuff = NULL;
2558 return THING;
2559 }
79072805 2560
3280af22 2561 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2562 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2563 PL_sublex_info.sub_op = PL_lex_op;
2564 PL_lex_state = LEX_INTERPPUSH;
55497cff 2565
3280af22
NIS
2566 PL_expect = XTERM;
2567 if (PL_lex_op) {
6154021b 2568 pl_yylval.opval = PL_lex_op;
5f66b61c 2569 PL_lex_op = NULL;
55497cff 2570 return PMFUNC;
2571 }
2572 else
2573 return FUNC;
2574}
2575
ffb4593c
NT
2576/*
2577 * S_sublex_push
2578 * Create a new scope to save the lexing state. The scope will be
2579 * ended in S_sublex_done. Returns a '(', starting the function arguments
2580 * to the uc, lc, etc. found before.
2581 * Sets PL_lex_state to LEX_INTERPCONCAT.
2582 */
2583
76e3520e 2584STATIC I32
cea2e8a9 2585S_sublex_push(pTHX)
55497cff 2586{
27da23d5 2587 dVAR;
78a635de 2588 LEXSHARED *shared;
f46d017c 2589 ENTER;
55497cff 2590
3280af22 2591 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2592 SAVEBOOL(PL_lex_dojoin);
3280af22 2593 SAVEI32(PL_lex_brackets);
78cdf107 2594 SAVEI32(PL_lex_allbrackets);
b27dce25 2595 SAVEI32(PL_lex_formbrack);
78cdf107 2596 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2597 SAVEI32(PL_lex_casemods);
2598 SAVEI32(PL_lex_starts);
651b5b28 2599 SAVEI8(PL_lex_state);
7cc34111 2600 SAVESPTR(PL_lex_repl);
7766f137 2601 SAVEVPTR(PL_lex_inpat);
98246f1e 2602 SAVEI16(PL_lex_inwhat);
57843af0 2603 SAVECOPLINE(PL_curcop);
3280af22 2604 SAVEPPTR(PL_bufptr);
8452ff4b 2605 SAVEPPTR(PL_bufend);
3280af22
NIS
2606 SAVEPPTR(PL_oldbufptr);
2607 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2608 SAVEPPTR(PL_last_lop);
2609 SAVEPPTR(PL_last_uni);
3280af22
NIS
2610 SAVEPPTR(PL_linestart);
2611 SAVESPTR(PL_linestr);
8edd5f42
RGS
2612 SAVEGENERICPV(PL_lex_brackstack);
2613 SAVEGENERICPV(PL_lex_casestack);
78a635de 2614 SAVEGENERICPV(PL_parser->lex_shared);
3a54fd60 2615 SAVEBOOL(PL_parser->lex_re_reparsing);
3280af22 2616
99bd9d90 2617 /* The here-doc parser needs to be able to peek into outer lexing
60f40a38
FC
2618 scopes to find the body of the here-doc. So we put PL_linestr and
2619 PL_bufptr into lex_shared, to ‘share’ those values.
99bd9d90 2620 */
60f40a38
FC
2621 PL_parser->lex_shared->ls_linestr = PL_linestr;
2622 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
99bd9d90 2623
3280af22 2624 PL_linestr = PL_lex_stuff;
7cc34111 2625 PL_lex_repl = PL_sublex_info.repl;
a0714e2c 2626 PL_lex_stuff = NULL;
7cc34111 2627 PL_sublex_info.repl = NULL;
3280af22 2628
9cbb5ea2
GS
2629 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2630 = SvPVX(PL_linestr);
3280af22 2631 PL_bufend += SvCUR(PL_linestr);
bd61b366 2632 PL_last_lop = PL_last_uni = NULL;
3280af22 2633 SAVEFREESV(PL_linestr);
4dc843bc 2634 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
3280af22
NIS
2635
2636 PL_lex_dojoin = FALSE;
b27dce25 2637 PL_lex_brackets = PL_lex_formbrack = 0;
78cdf107
Z
2638 PL_lex_allbrackets = 0;
2639 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2640 Newx(PL_lex_brackstack, 120, char);
2641 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2642 PL_lex_casemods = 0;
2643 *PL_lex_casestack = '\0';
2644 PL_lex_starts = 0;
2645 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2646 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
78a635de
FC
2647
2648 Newxz(shared, 1, LEXSHARED);
2649 shared->ls_prev = PL_parser->lex_shared;
2650 PL_parser->lex_shared = shared;
3280af22
NIS
2651
2652 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2653 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2654 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2655 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2656 else
5f66b61c 2657 PL_lex_inpat = NULL;
79072805 2658
3a54fd60
DM
2659 PL_parser->lex_re_reparsing = cBOOL(PL_in_eval & EVAL_RE_REPARSING);
2660 PL_in_eval &= ~EVAL_RE_REPARSING;
2661
55497cff 2662 return '(';
79072805
LW
2663}
2664
ffb4593c
NT
2665/*
2666 * S_sublex_done
2667 * Restores lexer state after a S_sublex_push.
2668 */
2669
76e3520e 2670STATIC I32
cea2e8a9 2671S_sublex_done(pTHX)
79072805 2672{
27da23d5 2673 dVAR;
3280af22 2674 if (!PL_lex_starts++) {
396482e1 2675 SV * const sv = newSVpvs("");
9aa983d2
JH
2676 if (SvUTF8(PL_linestr))
2677 SvUTF8_on(sv);
3280af22 2678 PL_expect = XOPERATOR;
6154021b 2679 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2680 return THING;
2681 }
2682
3280af22
NIS
2683 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2684 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2685 return yylex();
79072805
LW
2686 }
2687
ffb4593c 2688 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2689 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2690 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2691 PL_linestr = PL_lex_repl;
2692 PL_lex_inpat = 0;
2693 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2694 PL_bufend += SvCUR(PL_linestr);
bd61b366 2695 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2696 PL_lex_dojoin = FALSE;
2697 PL_lex_brackets = 0;
78cdf107
Z
2698 PL_lex_allbrackets = 0;
2699 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2700 PL_lex_casemods = 0;
2701 *PL_lex_casestack = '\0';
2702 PL_lex_starts = 0;
25da4f38 2703 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2704 PL_lex_state = LEX_INTERPNORMAL;
2705 PL_lex_starts++;
e9fa98b2
HS
2706 /* we don't clear PL_lex_repl here, so that we can check later
2707 whether this is an evalled subst; that means we rely on the
2708 logic to ensure sublex_done() is called again only via the
2709 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2710 }
e9fa98b2 2711 else {
3280af22 2712 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2713 PL_lex_repl = NULL;
e9fa98b2 2714 }
79072805 2715 return ',';
ffed7fef
LW
2716 }
2717 else {
5db06880
NC
2718#ifdef PERL_MAD
2719 if (PL_madskills) {
cd81e915
NC
2720 if (PL_thiswhite) {
2721 if (!PL_endwhite)
6b29d1f5 2722 PL_endwhite = newSVpvs("");
cd81e915
NC
2723 sv_catsv(PL_endwhite, PL_thiswhite);
2724 PL_thiswhite = 0;
2725 }
2726 if (PL_thistoken)
76f68e9b 2727 sv_setpvs(PL_thistoken,"");
5db06880 2728 else
cd81e915 2729 PL_realtokenstart = -1;
5db06880
NC
2730 }
2731#endif
f46d017c 2732 LEAVE;
3280af22
NIS
2733 PL_bufend = SvPVX(PL_linestr);
2734 PL_bufend += SvCUR(PL_linestr);
2735 PL_expect = XOPERATOR;
09bef843 2736 PL_sublex_info.sub_inwhat = 0;
79072805 2737 return ')';
ffed7fef
LW
2738 }
2739}
2740
6f613c73
KW
2741PERL_STATIC_INLINE SV*
2742S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2743{
140b12ad
KW
2744 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2745 * interior, hence to the "}". Finds what the name resolves to, returning
2746 * an SV* containing it; NULL if no valid one found */
2747
dd2b1b72 2748 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
6f613c73 2749
0c415a79
KW
2750 HV * table;
2751 SV **cvp;
2752 SV *cv;
2753 SV *rv;
2754 HV *stash;
2755 const U8* first_bad_char_loc;
2756 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2757
6f613c73
KW
2758 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2759
107160e2
KW
2760 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2761 e - backslash_ptr,
2762 &first_bad_char_loc))
2763 {
2764 /* If warnings are on, this will print a more detailed analysis of what
2765 * is wrong than the error message below */
2766 utf8n_to_uvuni(first_bad_char_loc,
2767 e - ((char *) first_bad_char_loc),
2768 NULL, 0);
2769
2770 /* We deliberately don't try to print the malformed character, which
2771 * might not print very well; it also may be just the first of many
2772 * malformations, so don't print what comes after it */
2773 yyerror(Perl_form(aTHX_
2774 "Malformed UTF-8 character immediately after '%.*s'",
2775 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2776 return NULL;
2777 }
2778
2779 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2780 /* include the <}> */
2781 e - backslash_ptr + 1);
6f613c73 2782 if (! SvPOK(res)) {
b6407c49 2783 SvREFCNT_dec_NN(res);
6f613c73
KW
2784 return NULL;
2785 }
2786
0c415a79
KW
2787 /* See if the charnames handler is the Perl core's, and if so, we can skip
2788 * the validation needed for a user-supplied one, as Perl's does its own
2789 * validation. */
2790 table = GvHV(PL_hintgv); /* ^H */
2791 cvp = hv_fetchs(table, "charnames", FALSE);
67a057d6
FC
2792 if (cvp && (cv = *cvp) && SvROK(cv) && ((rv = SvRV(cv)) != NULL)
2793 && SvTYPE(rv) == SVt_PVCV && ((stash = CvSTASH(rv)) != NULL))
0c415a79
KW
2794 {
2795 const char * const name = HvNAME(stash);
2796 if strEQ(name, "_charnames") {
2797 return res;
2798 }
2799 }
2800
bde9e88d
KW
2801 /* Here, it isn't Perl's charname handler. We can't rely on a
2802 * user-supplied handler to validate the input name. For non-ut8 input,
2803 * look to see that the first character is legal. Then loop through the
2804 * rest checking that each is a continuation */
6f613c73 2805
b6ba1137
KW
2806 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2807 * the same thing */
2808
b6ba1137 2809 if (! UTF) {
bde9e88d 2810 if (! isALPHAU(*s)) {
b6ba1137
KW
2811 goto bad_charname;
2812 }
bde9e88d
KW
2813 s++;
2814 while (s < e) {
2815 if (! isCHARNAME_CONT(*s)) {
b6ba1137
KW
2816 goto bad_charname;
2817 }
3036c853 2818 if (*s == ' ' && *(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2819 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2820 "A sequence of multiple spaces in a charnames "
2821 "alias definition is deprecated");
bd299e29 2822 }
bde9e88d 2823 s++;
b6ba1137 2824 }
3036c853 2825 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2826 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2827 "Trailing white-space in a charnames alias "
2828 "definition is deprecated");
bd299e29 2829 }
b6ba1137
KW
2830 }
2831 else {
bde9e88d
KW
2832 /* Similarly for utf8. For invariants can check directly; for other
2833 * Latin1, can calculate their code point and check; otherwise use a
2834 * swash */
2835 if (UTF8_IS_INVARIANT(*s)) {
2836 if (! isALPHAU(*s)) {
140b12ad
KW
2837 goto bad_charname;
2838 }
bde9e88d
KW
2839 s++;
2840 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2841 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
b6ba1137 2842 goto bad_charname;
6f613c73 2843 }
bde9e88d 2844 s += 2;
6f613c73 2845 }
bde9e88d
KW
2846 else {
2847 if (! PL_utf8_charname_begin) {
2848 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2849 PL_utf8_charname_begin = _core_swash_init("utf8",
2850 "_Perl_Charname_Begin",
2851 &PL_sv_undef,
2852 1, 0, NULL, &flags);
2853 }
2854 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2855 goto bad_charname;
2856 }
2857 s += UTF8SKIP(s);
2858 }
2859
2860 while (s < e) {
2861 if (UTF8_IS_INVARIANT(*s)) {
2862 if (! isCHARNAME_CONT(*s)) {
2863 goto bad_charname;
2864 }
3036c853
FC
2865 if (*s == ' ' && *(s-1) == ' '
2866 && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2867 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2868 "A sequence of multiple spaces in a charnam"
2869 "es alias definition is deprecated");
bd299e29 2870 }
bde9e88d
KW
2871 s++;
2872 }
2873 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2874 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2875 *(s+1)))))
2876 {
2877 goto bad_charname;
2878 }
2879 s += 2;
2880 }
2881 else {
2882 if (! PL_utf8_charname_continue) {
2883 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2884 PL_utf8_charname_continue = _core_swash_init("utf8",
2885 "_Perl_Charname_Continue",
2886 &PL_sv_undef,
2887 1, 0, NULL, &flags);
2888 }
2889 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2890 goto bad_charname;
2891 }
2892 s += UTF8SKIP(s);
6f613c73
KW
2893 }
2894 }
3036c853 2895 if (*(s-1) == ' ' && ckWARN_d(WARN_DEPRECATED)) {
94ec3a20
FC
2896 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
2897 "Trailing white-space in a charnames alias "
2898 "definition is deprecated");
bd299e29 2899 }
6f613c73
KW
2900 }
2901
94ca1619 2902 if (SvUTF8(res)) { /* Don't accept malformed input */
bde9e88d
KW
2903 const U8* first_bad_char_loc;
2904 STRLEN len;
2905 const char* const str = SvPV_const(res, len);
2906 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2907 /* If warnings are on, this will print a more detailed analysis of
2908 * what is wrong than the error message below */
2909 utf8n_to_uvuni(first_bad_char_loc,
2910 (char *) first_bad_char_loc - str,
2911 NULL, 0);
2912
2913 /* We deliberately don't try to print the malformed character,
2914 * which might not print very well; it also may be just the first
2915 * of many malformations, so don't print what comes after it */
2916 yyerror_pv(
2917 Perl_form(aTHX_
2918 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2919 (int) (e - backslash_ptr + 1), backslash_ptr,
2920 (int) ((char *) first_bad_char_loc - str), str
2921 ),
2922 SVf_UTF8);
2923 return NULL;
2924 }
2925 }
140b12ad 2926
bde9e88d 2927 return res;
140b12ad 2928
bde9e88d
KW
2929 bad_charname: {
2930 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2931
2932 /* The final %.*s makes sure that should the trailing NUL be missing
2933 * that this print won't run off the end of the string */
2934 yyerror_pv(
2935 Perl_form(aTHX_
2936 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2937 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2938 (int)(e - s + bad_char_size), s + bad_char_size
2939 ),
2940 UTF ? SVf_UTF8 : 0);
2941 return NULL;
2942 }
6f613c73
KW
2943}
2944
02aa26ce
NT
2945/*
2946 scan_const
2947
9da1dd8f
DM
2948 Extracts the next constant part of a pattern, double-quoted string,
2949 or transliteration. This is terrifying code.
2950
2951 For example, in parsing the double-quoted string "ab\x63$d", it would
2952 stop at the '$' and return an OP_CONST containing 'abc'.
02aa26ce 2953
94def140 2954 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2955 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2956 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2957
94def140
TS
2958 Returns a pointer to the character scanned up to. If this is
2959 advanced from the start pointer supplied (i.e. if anything was
9da1dd8f 2960 successfully parsed), will leave an OP_CONST for the substring scanned
6154021b 2961 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2962 by looking at the next characters herself.
2963
02aa26ce 2964 In patterns:
9da1dd8f 2965 expand:
537124e4
KW
2966 \N{FOO} => \N{U+hex_for_character_FOO}
2967 (if FOO expands to multiple characters, expands to \N{U+xx.XX.yy ...})
9da1dd8f
DM
2968
2969 pass through:
2970 all other \-char, including \N and \N{ apart from \N{ABC}
2971
2972 stops on:
2973 @ and $ where it appears to be a var, but not for $ as tail anchor
2974 \l \L \u \U \Q \E
2975 (?{ or (??{
2976
02aa26ce
NT
2977
2978 In transliterations:
2979 characters are VERY literal, except for - not at the start or end
94def140
TS
2980 of the string, which indicates a range. If the range is in bytes,
2981 scan_const expands the range to the full set of intermediate
2982 characters. If the range is in utf8, the hyphen is replaced with
2983 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2984
2985 In double-quoted strings:
2986 backslashes:
2987 double-quoted style: \r and \n
ff3f963a 2988 constants: \x31, etc.
94def140 2989 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2990 case and quoting: \U \Q \E
2991 stops on @ and $
2992
2993 scan_const does *not* construct ops to handle interpolated strings.
2994 It stops processing as soon as it finds an embedded $ or @ variable
2995 and leaves it to the caller to work out what's going on.
2996
94def140
TS
2997 embedded arrays (whether in pattern or not) could be:
2998 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2999
3000 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
3001
3002 $ in pattern could be $foo or could be tail anchor. Assumption:
3003 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 3004 followed by one of "()| \r\n\t"
02aa26ce 3005
9da1dd8f 3006 \1 (backreferences) are turned into $1 in substitutions
02aa26ce
NT
3007
3008 The structure of the code is
3009 while (there's a character to process) {
94def140
TS
3010 handle transliteration ranges
3011 skip regexp comments /(?#comment)/ and codes /(?{code})/
3012 skip #-initiated comments in //x patterns
3013 check for embedded arrays
02aa26ce
NT
3014 check for embedded scalars
3015 if (backslash) {
94def140 3016 deprecate \1 in substitution replacements
02aa26ce
NT
3017 handle string-changing backslashes \l \U \Q \E, etc.
3018 switch (what was escaped) {
94def140 3019 handle \- in a transliteration (becomes a literal -)
ff3f963a 3020 if a pattern and not \N{, go treat as regular character
94def140
TS
3021 handle \132 (octal characters)
3022 handle \x15 and \x{1234} (hex characters)
ff3f963a 3023 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
3024 handle \cV (control characters)
3025 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 3026 } (end switch)
77a135fe 3027 continue
02aa26ce 3028 } (end if backslash)
77a135fe 3029 handle regular character
02aa26ce 3030 } (end while character to read)
4e553d73 3031
02aa26ce
NT
3032*/
3033
76e3520e 3034STATIC char *
cea2e8a9 3035S_scan_const(pTHX_ char *start)
79072805 3036{
97aff369 3037 dVAR;
eb578fdb 3038 char *send = PL_bufend; /* end of the constant */
77a135fe
KW
3039 SV *sv = newSV(send - start); /* sv for the constant. See
3040 note below on sizing. */
eb578fdb
KW
3041 char *s = start; /* start of the constant */
3042 char *d = SvPVX(sv); /* destination for copies */
02aa26ce 3043 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 3044 bool didrange = FALSE; /* did we just finish a range? */
2866decb 3045 bool in_charclass = FALSE; /* within /[...]/ */
b953e60c
KW
3046 bool has_utf8 = FALSE; /* Output constant is UTF8 */
3047 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
3048 to be UTF8? But, this can
3049 show as true when the source
3050 isn't utf8, as for example
3051 when it is entirely composed
3052 of hex constants */
6f613c73 3053 SV *res; /* result from charnames */
77a135fe
KW
3054
3055 /* Note on sizing: The scanned constant is placed into sv, which is
3056 * initialized by newSV() assuming one byte of output for every byte of
3057 * input. This routine expects newSV() to allocate an extra byte for a
3058 * trailing NUL, which this routine will append if it gets to the end of
3059 * the input. There may be more bytes of input than output (eg., \N{LATIN
3060 * CAPITAL LETTER A}), or more output than input if the constant ends up
3061 * recoded to utf8, but each time a construct is found that might increase
3062 * the needed size, SvGROW() is called. Its size parameter each time is
3063 * based on the best guess estimate at the time, namely the length used so
3064 * far, plus the length the current construct will occupy, plus room for
3065 * the trailing NUL, plus one byte for every input byte still unscanned */
3066
c3320c2a
KW
3067 UV uv = UV_MAX; /* Initialize to weird value to try to catch any uses
3068 before set */
4c3a8340
TS
3069#ifdef EBCDIC
3070 UV literal_endpoint = 0;
e294cc5d 3071 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 3072#endif
012bcf8d 3073
7918f24d
NC
3074 PERL_ARGS_ASSERT_SCAN_CONST;
3075
bb16bae8 3076 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
3077 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
3078 /* If we are doing a trans and we know we want UTF8 set expectation */
3079 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
3080 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3081 }
3082
b899e89d
FC
3083 /* Protect sv from errors and fatal warnings. */
3084 ENTER_with_name("scan_const");
3085 SAVEFREESV(sv);
2b9d42f0 3086
79072805 3087 while (s < send || dorange) {
ff3f963a 3088
02aa26ce 3089 /* get transliterations out of the way (they're most literal) */
3280af22 3090 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 3091 /* expand a range A-Z to the full set of characters. AIE! */
79072805 3092 if (dorange) {
1ba5c669
JH
3093 I32 i; /* current expanded character */
3094 I32 min; /* first character in range */
3095 I32 max; /* last character in range */
02aa26ce 3096
e294cc5d
JH
3097#ifdef EBCDIC
3098 UV uvmax = 0;
3099#endif
3100
3101 if (has_utf8
3102#ifdef EBCDIC
3103 && !native_range
3104#endif
1953db30 3105 ) {
9d4ba2ae 3106 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
3107 char *e = d++;
3108 while (e-- > c)
3109 *(e + 1) = *e;
25716404 3110 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
3111 /* mark the range as done, and continue */
3112 dorange = FALSE;
3113 didrange = TRUE;
3114 continue;
3115 }
2b9d42f0 3116
95a20fc0 3117 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
3118#ifdef EBCDIC
3119 SvGROW(sv,
3120 SvLEN(sv) + (has_utf8 ?
3121 (512 - UTF_CONTINUATION_MARK +
3122 UNISKIP(0x100))
3123 : 256));
3124 /* How many two-byte within 0..255: 128 in UTF-8,
3125 * 96 in UTF-8-mod. */
3126#else
9cbb5ea2 3127 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 3128#endif
9cbb5ea2 3129 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
3130#ifdef EBCDIC
3131 if (has_utf8) {
3132 int j;
3133 for (j = 0; j <= 1; j++) {
3134 char * const c = (char*)utf8_hop((U8*)d, -1);
3135 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3136 if (j)
3137 min = (U8)uv;
3138 else if (uv < 256)
3139 max = (U8)uv;
3140 else {
3141 max = (U8)0xff; /* only to \xff */
3142 uvmax = uv; /* \x{100} to uvmax */
3143 }
3144 d = c; /* eat endpoint chars */
3145 }
3146 }
3147 else {
3148#endif
3149 d -= 2; /* eat the first char and the - */
3150 min = (U8)*d; /* first char in range */
3151 max = (U8)d[1]; /* last char in range */
3152#ifdef EBCDIC
3153 }
3154#endif
8ada0baa 3155
c2e66d9e 3156 if (min > max) {
01ec43d0 3157 Perl_croak(aTHX_
d1573ac7 3158 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 3159 (char)min, (char)max);
c2e66d9e
GS
3160 }
3161
c7f1f016 3162#ifdef EBCDIC
4c3a8340
TS
3163 if (literal_endpoint == 2 &&
3164 ((isLOWER(min) && isLOWER(max)) ||
3165 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
3166 if (isLOWER(min)) {
3167 for (i = min; i <= max; i++)
3168 if (isLOWER(i))
db42d148 3169 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3170 } else {
3171 for (i = min; i <= max; i++)
3172 if (isUPPER(i))
db42d148 3173 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3174 }
3175 }
3176 else
3177#endif
3178 for (i = min; i <= max; i++)
e294cc5d
JH
3179#ifdef EBCDIC
3180 if (has_utf8) {
3181 const U8 ch = (U8)NATIVE_TO_UTF(i);
3182 if (UNI_IS_INVARIANT(ch))
3183 *d++ = (U8)i;
3184 else {
3185 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3186 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3187 }
3188 }
3189 else
3190#endif
3191 *d++ = (char)i;
3192
3193#ifdef EBCDIC
3194 if (uvmax) {
3195 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3196 if (uvmax > 0x101)
3197 *d++ = (char)UTF_TO_NATIVE(0xff);
3198 if (uvmax > 0x100)
3199 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3200 }
3201#endif
02aa26ce
NT
3202
3203 /* mark the range as done, and continue */
79072805 3204 dorange = FALSE;
01ec43d0 3205 didrange = TRUE;
4c3a8340
TS
3206#ifdef EBCDIC
3207 literal_endpoint = 0;
3208#endif
79072805 3209 continue;
4e553d73 3210 }
02aa26ce
NT
3211
3212 /* range begins (ignore - as first or last char) */
79072805 3213 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 3214 if (didrange) {
1fafa243 3215 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 3216 }
e294cc5d
JH
3217 if (has_utf8
3218#ifdef EBCDIC
3219 && !native_range
3220#endif
3221 ) {
25716404 3222 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
3223 s++;
3224 continue;
3225 }
79072805
LW
3226 dorange = TRUE;
3227 s++;
01ec43d0
GS
3228 }
3229 else {
3230 didrange = FALSE;
4c3a8340
TS
3231#ifdef EBCDIC
3232 literal_endpoint = 0;
e294cc5d 3233 native_range = TRUE;
4c3a8340 3234#endif
01ec43d0 3235 }
79072805 3236 }
02aa26ce
NT
3237
3238 /* if we get here, we're not doing a transliteration */
3239
e4a2df84
DM
3240 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3241 char *s1 = s-1;
3242 int esc = 0;
3243 while (s1 >= start && *s1-- == '\\')
3244 esc = !esc;
3245 if (!esc)
3246 in_charclass = TRUE;
3247 }
2866decb 3248
e4a2df84
DM
3249 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3250 char *s1 = s-1;
3251 int esc = 0;
3252 while (s1 >= start && *s1-- == '\\')
3253 esc = !esc;
3254 if (!esc)
3255 in_charclass = FALSE;
3256 }
2866decb 3257
9da1dd8f
DM
3258 /* skip for regexp comments /(?#comment)/, except for the last
3259 * char, which will be done separately.
3260 * Stop on (?{..}) and friends */
3261
c30fc27b 3262 else if (*s == '(' && PL_lex_inpat && s[1] == '?' && !in_charclass) {
cc6b7395 3263 if (s[2] == '#') {
e994fd66 3264 while (s+1 < send && *s != ')')
db42d148 3265 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94 3266 }
c30fc27b 3267 else if (!PL_lex_casemods &&
d3cec5e5
DM
3268 ( s[2] == '{' /* This should match regcomp.c */
3269 || (s[2] == '?' && s[3] == '{')))
155aba94 3270 {
9da1dd8f 3271 break;
cc6b7395 3272 }
748a9306 3273 }
02aa26ce
NT
3274
3275 /* likewise skip #-initiated comments in //x patterns */
c30fc27b 3276 else if (*s == '#' && PL_lex_inpat && !in_charclass &&
73134a2e 3277 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 3278 while (s+1 < send && *s != '\n')
db42d148 3279 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 3280 }
02aa26ce 3281
9da1dd8f
DM
3282 /* no further processing of single-quoted regex */
3283 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3284 goto default_action;
3285
5d1d4326 3286 /* check for embedded arrays
da6eedaa 3287 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 3288 */
1749ea0d 3289 else if (*s == '@' && s[1]) {
8a2bca12 3290 if (isWORDCHAR_lazy_if(s+1,UTF))
1749ea0d
TS
3291 break;
3292 if (strchr(":'{$", s[1]))
3293 break;
3294 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3295 break; /* in regexp, neither @+ nor @- are interpolated */
3296 }
02aa26ce
NT
3297
3298 /* check for embedded scalars. only stop if we're sure it's a
3299 variable.
3300 */
79072805 3301 else if (*s == '$') {
3280af22 3302 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 3303 break;
77772344 3304 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
3305 if (s[1] == '\\') {
3306 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3307 "Possible unintended interpolation of $\\ in regex");
77772344 3308 }
79072805 3309 break; /* in regexp, $ might be tail anchor */
77772344 3310 }
79072805 3311 }
02aa26ce 3312
2b9d42f0
NIS
3313 /* End of else if chain - OP_TRANS rejoin rest */
3314
02aa26ce 3315 /* backslashes */
79072805 3316 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
3317 char* e; /* Can be used for ending '}', etc. */
3318
79072805 3319 s++;
02aa26ce 3320
7d0fc23c
KW
3321 /* warn on \1 - \9 in substitution replacements, but note that \11
3322 * is an octal; and \19 is \1 followed by '9' */
3280af22 3323 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 3324 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 3325 {
a2a5de95 3326 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
3327 *--s = '$';
3328 break;
3329 }
02aa26ce
NT
3330
3331 /* string-change backslash escapes */
838f2281 3332 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
3333 --s;
3334 break;
3335 }
ff3f963a
KW
3336 /* In a pattern, process \N, but skip any other backslash escapes.
3337 * This is because we don't want to translate an escape sequence
3338 * into a meta symbol and have the regex compiler use the meta
3339 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3340 * in spite of this, we do have to process \N here while the proper
3341 * charnames handler is in scope. See bugs #56444 and #62056.
3342 * There is a complication because \N in a pattern may also stand
3343 * for 'match a non-nl', and not mean a charname, in which case its
3344 * processing should be deferred to the regex compiler. To be a
3345 * charname it must be followed immediately by a '{', and not look
3346 * like \N followed by a curly quantifier, i.e., not something like
3347 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3348 * quantifier */
3349 else if (PL_lex_inpat
3350 && (*s != 'N'
3351 || s[1] != '{'
4d68ffa0 3352 || regcurly(s + 1, FALSE)))
ff3f963a 3353 {
cc74c5bd
TS
3354 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3355 goto default_action;
3356 }
02aa26ce 3357
79072805 3358 switch (*s) {
02aa26ce
NT
3359
3360 /* quoted - in transliterations */
79072805 3361 case '-':
3280af22 3362 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
3363 *d++ = *s++;
3364 continue;
3365 }
3366 /* FALL THROUGH */
3367 default:
11b8faa4 3368 {
15861f94 3369 if ((isALPHANUMERIC(*s)))
a2a5de95
NC
3370 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3371 "Unrecognized escape \\%c passed through",
3372 *s);
11b8faa4 3373 /* default action is to copy the quoted character */
f9a63242 3374 goto default_action;
11b8faa4 3375 }
02aa26ce 3376
632403cc 3377 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
3378 case '0': case '1': case '2': case '3':
3379 case '4': case '5': case '6': case '7':
ba210ebe 3380 {
5e0a247b 3381 I32 flags = PERL_SCAN_SILENT_ILLDIGIT;
53305cf1 3382 STRLEN len = 3;
77a135fe 3383 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe 3384 s += len;
5e0a247b
KW
3385 if (len < 3 && s < send && isDIGIT(*s)
3386 && ckWARN(WARN_MISC))
3387 {
3388 Perl_warner(aTHX_ packWARN(WARN_MISC),
3389 "%s", form_short_octal_warning(s, len));
3390 }
ba210ebe 3391 }
012bcf8d 3392 goto NUM_ESCAPE_INSERT;
02aa26ce 3393
f0a2b745
KW
3394 /* eg. \o{24} indicates the octal constant \024 */
3395 case 'o':
3396 {
454155d9 3397 const char* error;
f0a2b745 3398
00ce5563 3399 bool valid = grok_bslash_o(&s, &uv, &error,
80f4111b
KW
3400 TRUE, /* Output warning */
3401 FALSE, /* Not strict */
17896a85
KW
3402 TRUE, /* Output warnings for
3403 non-portables */
80f4111b 3404 UTF);
454155d9 3405 if (! valid) {
f0a2b745
KW
3406 yyerror(error);
3407 continue;
3408 }
3409 goto NUM_ESCAPE_INSERT;
3410 }
3411
77a135fe 3412 /* eg. \x24 indicates the hex constant 0x24 */
79072805 3413 case 'x':
a0481293 3414 {
a0481293 3415 const char* error;
355860ce 3416
00ce5563 3417 bool valid = grok_bslash_x(&s, &uv, &error,
80f4111b
KW
3418 TRUE, /* Output warning */
3419 FALSE, /* Not strict */
17896a85
KW
3420 TRUE, /* Output warnings for
3421 non-portables */
80f4111b 3422 UTF);
a0481293
KW
3423 if (! valid) {
3424 yyerror(error);
355860ce 3425 continue;
ba210ebe 3426 }
012bcf8d
GS
3427 }
3428
3429 NUM_ESCAPE_INSERT:
ff3f963a
KW
3430 /* Insert oct or hex escaped character. There will always be
3431 * enough room in sv since such escapes will be longer than any
3432 * UTF-8 sequence they can end up as, except if they force us
3433 * to recode the rest of the string into utf8 */
ba7cea30 3434
77a135fe 3435 /* Here uv is the ordinal of the next character being added in
ff3f963a 3436 * unicode (converted from native). */
77a135fe 3437 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3438 if (!has_utf8 && uv > 255) {
77a135fe
KW
3439 /* Might need to recode whatever we have accumulated so
3440 * far if it contains any chars variant in utf8 or
3441 * utf-ebcdic. */
3442
3443 SvCUR_set(sv, d - SvPVX_const(sv));
3444 SvPOK_on(sv);
3445 *d = '\0';
77a135fe 3446 /* See Note on sizing above. */
7bf79863
KW
3447 sv_utf8_upgrade_flags_grow(sv,
3448 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3449 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3450 d = SvPVX(sv) + SvCUR(sv);
3451 has_utf8 = TRUE;
012bcf8d
GS
3452 }
3453
77a135fe
KW
3454 if (has_utf8) {
3455 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3456 if (PL_lex_inwhat == OP_TRANS &&
3457 PL_sublex_info.sub_op) {
3458 PL_sublex_info.sub_op->op_private |=
3459 (PL_lex_repl ? OPpTRANS_FROM_UTF
3460 : OPpTRANS_TO_UTF);
f9a63242 3461 }
e294cc5d
JH
3462#ifdef EBCDIC
3463 if (uv > 255 && !dorange)
3464 native_range = FALSE;
3465#endif
012bcf8d 3466 }
a0ed51b3 3467 else {
012bcf8d 3468 *d++ = (char)uv;
a0ed51b3 3469 }
012bcf8d
GS
3470 }
3471 else {
c4d5f83a 3472 *d++ = (char) uv;
a0ed51b3 3473 }
79072805 3474 continue;
02aa26ce 3475
4a2d328f 3476 case 'N':
ff3f963a
KW
3477 /* In a non-pattern \N must be a named character, like \N{LATIN
3478 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3479 * mean to match a non-newline. For non-patterns, named
3480 * characters are converted to their string equivalents. In
3481 * patterns, named characters are not converted to their
3482 * ultimate forms for the same reasons that other escapes
3483 * aren't. Instead, they are converted to the \N{U+...} form
3484 * to get the value from the charnames that is in effect right
3485 * now, while preserving the fact that it was a named character
3486 * so that the regex compiler knows this */
3487
3488 /* This section of code doesn't generally use the
3489 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3490 * a close examination of this macro and determined it is a
3491 * no-op except on utfebcdic variant characters. Every
3492 * character generated by this that would normally need to be
3493 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3494 * needed, and would complicate use of copy(). XXX There are
3495 * other parts of this file where the macro is used
3496 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3497
3498 /* The structure of this section of code (besides checking for
3499 * errors and upgrading to utf8) is:
3500 * Further disambiguate between the two meanings of \N, and if
3501 * not a charname, go process it elsewhere
0a96133f
KW
3502 * If of form \N{U+...}, pass it through if a pattern;
3503 * otherwise convert to utf8
3504 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3505 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3506
3507 /* Here, s points to the 'N'; the test below is guaranteed to
3508 * succeed if we are being called on a pattern as we already
3509 * know from a test above that the next character is a '{'.
3510 * On a non-pattern \N must mean 'named sequence, which
3511 * requires braces */
3512 s++;
3513 if (*s != '{') {
3514 yyerror("Missing braces on \\N{}");
3515 continue;
3516 }
3517 s++;
3518
0a96133f 3519 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3520 if (! (e = strchr(s, '}'))) {
3521 if (! PL_lex_inpat) {
5777a3f7 3522 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3523 } else {
3524 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3525 }
0a96133f 3526 continue;
ff3f963a 3527 }
cddc7ef4 3528
ff3f963a 3529 /* Here it looks like a named character */
cddc7ef4 3530
ff3f963a
KW
3531 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3532 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3533 | PERL_SCAN_DISALLOW_PREFIX;
3534 STRLEN len;
3535
3536 /* For \N{U+...}, the '...' is a unicode value even on
3537 * EBCDIC machines */
3538 s += 2; /* Skip to next char after the 'U+' */
3539 len = e - s;
3540 uv = grok_hex(s, &len, &flags, NULL);
3541 if (len == 0 || len != (STRLEN)(e - s)) {
3542 yyerror("Invalid hexadecimal number in \\N{U+...}");
3543 s = e + 1;
3544 continue;
3545 }
3546
3547 if (PL_lex_inpat) {
3548
e2a7e165
KW
3549 /* On non-EBCDIC platforms, pass through to the regex
3550 * compiler unchanged. The reason we evaluated the
3551 * number above is to make sure there wasn't a syntax
3552 * error. But on EBCDIC we convert to native so
3553 * downstream code can continue to assume it's native
3554 */
ff3f963a 3555 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3556#ifdef EBCDIC
3557 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3558 and the \0 */
3559 "\\N{U+%X}",
3560 (unsigned int) UNI_TO_NATIVE(uv));
3561#else
ff3f963a
KW
3562 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3563 d += e - s + 1;
e2a7e165 3564#endif
ff3f963a
KW
3565 }
3566 else { /* Not a pattern: convert the hex to string */
3567
3568 /* If destination is not in utf8, unconditionally
3569 * recode it to be so. This is because \N{} implies
3570 * Unicode semantics, and scalars have to be in utf8
3571 * to guarantee those semantics */
3572 if (! has_utf8) {
3573 SvCUR_set(sv, d - SvPVX_const(sv));
3574 SvPOK_on(sv);
3575 *d = '\0';
3576 /* See Note on sizing above. */
3577 sv_utf8_upgrade_flags_grow(
3578 sv,
3579 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3580 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3581 d = SvPVX(sv) + SvCUR(sv);
3582 has_utf8 = TRUE;
3583 }
3584
3585 /* Add the string to the output */
3586 if (UNI_IS_INVARIANT(uv)) {
3587 *d++ = (char) uv;
3588 }
3589 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3590 }
3591 }
6f613c73
KW
3592 else /* Here is \N{NAME} but not \N{U+...}. */
3593 if ((res = get_and_check_backslash_N_name(s, e)))
3594 {
3595 STRLEN len;
3596 const char *str = SvPV_const(res, len);
3597 if (PL_lex_inpat) {
ff3f963a
KW
3598
3599 if (! len) { /* The name resolved to an empty string */
3600 Copy("\\N{}", d, 4, char);
3601 d += 4;
3602 }
3603 else {
3604 /* In order to not lose information for the regex
3605 * compiler, pass the result in the specially made
3606 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3607 * the code points in hex of each character
3608 * returned by charnames */
3609
3610 const char *str_end = str + len;
3b721c4f 3611 const STRLEN off = d - SvPVX_const(sv);
94ca1619
KW
3612
3613 if (! SvUTF8(res)) {
3614 /* For the non-UTF-8 case, we can determine the
3615 * exact length needed without having to parse
3616 * through the string. Each character takes up
3617 * 2 hex digits plus either a trailing dot or
3618 * the "}" */
3619 d = off + SvGROW(sv, off
3620 + 3 * len
3621 + 6 /* For the "\N{U+", and
3622 trailing NUL */
3623 + (STRLEN)(send - e));
3624 Copy("\\N{U+", d, 5, char);
3625 d += 5;
3626 while (str < str_end) {
3627 char hex_string[4];
3628 my_snprintf(hex_string, sizeof(hex_string),
3629 "%02X.", (U8) *str);
3630 Copy(hex_string, d, 3, char);
3631 d += 3;
3632 str++;
3633 }
3634 d--; /* We will overwrite below the final
3635 dot with a right brace */
3636 }
3637 else {
1953db30
KW
3638 STRLEN char_length; /* cur char's byte length */
3639
3640 /* and the number of bytes after this is
3641 * translated into hex digits */
3642 STRLEN output_length;
3643
3644 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3645 * for max('U+', '.'); and 1 for NUL */
3646 char hex_string[2 * UTF8_MAXBYTES + 5];
3647
3648 /* Get the first character of the result. */
3649 U32 uv = utf8n_to_uvuni((U8 *) str,
3650 len,
3651 &char_length,
3652 UTF8_ALLOW_ANYUV);
3653 /* Convert first code point to hex, including
3654 * the boiler plate before it. For all these,
3655 * we convert to native format so that
3656 * downstream code can continue to assume the
3657 * input is native */
3658 output_length =
3659 my_snprintf(hex_string, sizeof(hex_string),
3660 "\\N{U+%X",
3661 (unsigned int) UNI_TO_NATIVE(uv));
3662
3663 /* Make sure there is enough space to hold it */
3664 d = off + SvGROW(sv, off
3665 + output_length
3666 + (STRLEN)(send - e)
3667 + 2); /* '}' + NUL */
3668 /* And output it */
3669 Copy(hex_string, d, output_length, char);
3670 d += output_length;
3671
3672 /* For each subsequent character, append dot and
3673 * its ordinal in hex */
3674 while ((str += char_length) < str_end) {
3675 const STRLEN off = d - SvPVX_const(sv);
3676 U32 uv = utf8n_to_uvuni((U8 *) str,
3677 str_end - str,
3678 &char_length,
3679 UTF8_ALLOW_ANYUV);
3680 output_length =
3681 my_snprintf(hex_string,
3682 sizeof(hex_string),
3683 ".%X",
3684 (unsigned int) UNI_TO_NATIVE(uv));
3685
3686 d = off + SvGROW(sv, off
3687 + output_length
3688 + (STRLEN)(send - e)
3689 + 2); /* '}' + NUL */
3690 Copy(hex_string, d, output_length, char);
3691 d += output_length;
3692 }
94ca1619 3693 }
ff3f963a
KW
3694
3695 *d++ = '}'; /* Done. Add the trailing brace */
3696 }
3697 }
3698 else { /* Here, not in a pattern. Convert the name to a
3699 * string. */
3700
3701 /* If destination is not in utf8, unconditionally
3702 * recode it to be so. This is because \N{} implies
3703 * Unicode semantics, and scalars have to be in utf8
3704 * to guarantee those semantics */
3705 if (! has_utf8) {
3706 SvCUR_set(sv, d - SvPVX_const(sv));
3707 SvPOK_on(sv);
3708 *d = '\0';
3709 /* See Note on sizing above. */
3710 sv_utf8_upgrade_flags_grow(sv,
3711 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3712 len + (STRLEN)(send - s) + 1);
3713 d = SvPVX(sv) + SvCUR(sv);
3714 has_utf8 = TRUE;
3715 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3716
3717 /* See Note on sizing above. (NOTE: SvCUR() is not
3718 * set correctly here). */
3719 const STRLEN off = d - SvPVX_const(sv);
3720 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3721 }
3722 Copy(str, d, len, char);
3723 d += len;
423cee85 3724 }
6f613c73 3725
423cee85 3726 SvREFCNT_dec(res);
cb233ae3 3727
cb233ae3 3728 } /* End \N{NAME} */
ff3f963a
KW
3729#ifdef EBCDIC
3730 if (!dorange)
3731 native_range = FALSE; /* \N{} is defined to be Unicode */
3732#endif
3733 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3734 continue;
3735
02aa26ce 3736 /* \c is a control character */
79072805
LW
3737 case 'c':
3738 s++;
961ce445 3739 if (s < send) {
17a3df4c 3740 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3741 }
961ce445
RGS
3742 else {
3743 yyerror("Missing control char name in \\c");
3744 }
79072805 3745 continue;
02aa26ce
NT
3746
3747 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3748 case 'b':
db42d148 3749 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3750 break;
3751 case 'n':
db42d148 3752 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3753 break;
3754 case 'r':
db42d148 3755 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3756 break;
3757 case 'f':
db42d148 3758 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3759 break;
3760 case 't':
db42d148 3761 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3762 break;
34a3fe2a 3763 case 'e':
db42d148 3764 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3765 break;
3766 case 'a':
db42d148 3767 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3768 break;
02aa26ce
NT
3769 } /* end switch */
3770
79072805
LW
3771 s++;
3772 continue;
02aa26ce 3773 } /* end if (backslash) */
4c3a8340
TS
3774#ifdef EBCDIC
3775 else
3776 literal_endpoint++;
3777#endif
02aa26ce 3778
f9a63242 3779 default_action:
77a135fe
KW
3780 /* If we started with encoded form, or already know we want it,
3781 then encode the next character */
3782 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3783 STRLEN len = 1;
77a135fe
KW
3784
3785
3786 /* One might think that it is wasted effort in the case of the
3787 * source being utf8 (this_utf8 == TRUE) to take the next character
3788 * in the source, convert it to an unsigned value, and then convert
3789 * it back again. But the source has not been validated here. The
3790 * routine that does the conversion checks for errors like
3791 * malformed utf8 */
3792
5f66b61c
AL
3793 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3794 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3795 if (!has_utf8) {
3796 SvCUR_set(sv, d - SvPVX_const(sv));
3797 SvPOK_on(sv);
3798 *d = '\0';
77a135fe 3799 /* See Note on sizing above. */
7bf79863
KW
3800 sv_utf8_upgrade_flags_grow(sv,
3801 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3802 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3803 d = SvPVX(sv) + SvCUR(sv);
3804 has_utf8 = TRUE;
3805 } else if (need > len) {
3806 /* encoded value larger than old, may need extra space (NOTE:
3807 * SvCUR() is not set correctly here). See Note on sizing
3808 * above. */
9d4ba2ae 3809 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3810 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3811 }
77a135fe
KW
3812 s += len;
3813
5f66b61c 3814 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3815#ifdef EBCDIC
3816 if (uv > 255 && !dorange)
3817 native_range = FALSE;
3818#endif
2b9d42f0
NIS
3819 }
3820 else {
3821 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3822 }
02aa26ce
NT
3823 } /* while loop to process each character */
3824
3825 /* terminate the string and set up the sv */
79072805 3826 *d = '\0';
95a20fc0 3827 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3828 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3829 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3830 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3831
79072805 3832 SvPOK_on(sv);
9f4817db 3833 if (PL_encoding && !has_utf8) {
d0063567
DK
3834 sv_recode_to_utf8(sv, PL_encoding);
3835 if (SvUTF8(sv))
3836 has_utf8 = TRUE;
9f4817db 3837 }
2b9d42f0 3838 if (has_utf8) {
7e2040f0 3839 SvUTF8_on(sv);
2b9d42f0 3840 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3841 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3842 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3843 }
3844 }
79072805 3845
02aa26ce 3846 /* shrink the sv if we allocated more than we used */
79072805 3847 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3848 SvPV_shrink_to_cur(sv);
79072805 3849 }
02aa26ce 3850
6154021b 3851 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3852 if (s > PL_bufptr) {
b899e89d 3853 SvREFCNT_inc_simple_void_NN(sv);
4f3e2518
DM
3854 if ( (PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ))
3855 && ! PL_parser->lex_re_reparsing)
3856 {
eb0d8d16
NC
3857 const char *const key = PL_lex_inpat ? "qr" : "q";
3858 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3859 const char *type;
3860 STRLEN typelen;
3861
3862 if (PL_lex_inwhat == OP_TRANS) {
3863 type = "tr";
3864 typelen = 2;
3865 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3866 type = "s";
3867 typelen = 1;
9da1dd8f
DM
3868 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3869 type = "q";
3870 typelen = 1;
eb0d8d16
NC
3871 } else {
3872 type = "qq";
3873 typelen = 2;
3874 }
3875
3876 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3877 type, typelen);
3878 }
6154021b 3879 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b899e89d
FC
3880 }
3881 LEAVE_with_name("scan_const");
79072805
LW
3882 return s;
3883}
3884
ffb4593c
NT
3885/* S_intuit_more
3886 * Returns TRUE if there's more to the expression (e.g., a subscript),
3887 * FALSE otherwise.
ffb4593c
NT
3888 *
3889 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3890 *
3891 * ->[ and ->{ return TRUE
3892 * { and [ outside a pattern are always subscripts, so return TRUE
3893 * if we're outside a pattern and it's not { or [, then return FALSE
3894 * if we're in a pattern and the first char is a {
3895 * {4,5} (any digits around the comma) returns FALSE
3896 * if we're in a pattern and the first char is a [
3897 * [] returns FALSE
3898 * [SOMETHING] has a funky algorithm to decide whether it's a
3899 * character class or not. It has to deal with things like
3900 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3901 * anything else returns TRUE
3902 */
3903
9cbb5ea2
GS
3904/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3905
76e3520e 3906STATIC int
5aaab254 3907S_intuit_more(pTHX_ char *s)
79072805 3908{
97aff369 3909 dVAR;
7918f24d
NC
3910
3911 PERL_ARGS_ASSERT_INTUIT_MORE;
3912
3280af22 3913 if (PL_lex_brackets)
79072805
LW
3914 return TRUE;
3915 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3916 return TRUE;
3917 if (*s != '{' && *s != '[')
3918 return FALSE;
3280af22 3919 if (!PL_lex_inpat)
79072805
LW
3920 return TRUE;
3921
3922 /* In a pattern, so maybe we have {n,m}. */
3923 if (*s == '{') {
4d68ffa0 3924 if (regcurly(s, FALSE)) {
79072805 3925 return FALSE;
b3155d95 3926 }
79072805 3927 return TRUE;
79072805
LW
3928 }
3929
3930 /* On the other hand, maybe we have a character class */
3931
3932 s++;
3933 if (*s == ']' || *s == '^')
3934 return FALSE;
3935 else {
ffb4593c 3936 /* this is terrifying, and it works */
99f2bdb7 3937 int weight;
79072805 3938 char seen[256];
9d4ba2ae 3939 const char * const send = strchr(s,']');
99f2bdb7 3940 unsigned char un_char, last_un_char;
3280af22 3941 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3942
3943 if (!send) /* has to be an expression */
3944 return TRUE;
99f2bdb7 3945 weight = 2; /* let's weigh the evidence */
79072805 3946
79072805
LW
3947 if (*s == '$')
3948 weight -= 3;
3949 else if (isDIGIT(*s)) {
3950 if (s[1] != ']') {
3951 if (isDIGIT(s[1]) && s[2] == ']')
3952 weight -= 10;
3953 }
3954 else
3955 weight -= 100;
3956 }
99f2bdb7
DD
3957 Zero(seen,256,char);
3958 un_char = 255;
79072805
LW
3959 for (; s < send; s++) {
3960 last_un_char = un_char;
3961 un_char = (unsigned char)*s;
3962 switch (*s) {
3963 case '@':
3964 case '&':
3965 case '$':
3966 weight -= seen[un_char] * 10;
8a2bca12 3967 if (isWORDCHAR_lazy_if(s+1,UTF)) {
90e5519e 3968 int len;
8903cb82 3969 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3970 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3971 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3972 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3973 weight -= 100;
3974 else
3975 weight -= 10;
3976 }
3977 else if (*s == '$' && s[1] &&
93a17b20
LW
3978 strchr("[#!%*<>()-=",s[1])) {
3979 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3980 weight -= 10;
3981 else
3982 weight -= 1;
3983 }
3984 break;
3985 case '\\':
3986 un_char = 254;
3987 if (s[1]) {
93a17b20 3988 if (strchr("wds]",s[1]))
79072805 3989 weight += 100;
10edeb5d 3990 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3991 weight += 1;
93a17b20 3992 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3993 weight += 40;
3994 else if (isDIGIT(s[1])) {
3995 weight += 40;
3996 while (s[1] && isDIGIT(s[1]))
3997 s++;
3998 }
3999 }
4000 else
4001 weight += 100;
4002 break;
4003 case '-':
4004 if (s[1] == '\\')
4005 weight += 50;
93a17b20 4006 if (strchr("aA01! ",last_un_char))
79072805 4007 weight += 30;
93a17b20 4008 if (strchr("zZ79~",s[1]))
79072805 4009 weight += 30;
f27ffc4a
GS
4010 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
4011 weight -= 5; /* cope with negative subscript */
79072805
LW
4012 break;
4013 default:
0eb30aeb 4014 if (!isWORDCHAR(last_un_char)
3792a11b
NC
4015 && !(last_un_char == '$' || last_un_char == '@'
4016 || last_un_char == '&')
4017 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
4018 char *d = tmpbuf;
4019 while (isALPHA(*s))
4020 *d++ = *s++;
4021 *d = '\0';
5458a98a 4022 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
4023 weight -= 150;
4024 }
4025 if (un_char == last_un_char + 1)
4026 weight += 5;
4027 weight -= seen[un_char];
4028 break;
4029 }
4030 seen[un_char]++;
4031 }
4032 if (weight >= 0) /* probably a character class */
4033 return FALSE;
4034 }
4035
4036 return TRUE;
4037}
ffed7fef 4038
ffb4593c
NT
4039/*
4040 * S_intuit_method
4041 *
4042 * Does all the checking to disambiguate
4043 * foo bar
4044 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
4045 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
4046 *
4047 * First argument is the stuff after the first token, e.g. "bar".
4048 *
a4fd4a89 4049 * Not a method if foo is a filehandle.
ffb4593c
NT
4050 * Not a method if foo is a subroutine prototyped to take a filehandle.
4051 * Not a method if it's really "Foo $bar"
4052 * Method if it's "foo $bar"
4053 * Not a method if it's really "print foo $bar"
4054 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 4055 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 4056 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
4057 * =>
4058 */
4059
76e3520e 4060STATIC int
62d55b22 4061S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 4062{
97aff369 4063 dVAR;
a0d0e21e 4064 char *s = start + (*start == '$');
3280af22 4065 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
4066 STRLEN len;
4067 GV* indirgv;
5db06880
NC
4068#ifdef PERL_MAD
4069 int soff;
4070#endif
a0d0e21e 4071
7918f24d
NC
4072 PERL_ARGS_ASSERT_INTUIT_METHOD;
4073
aca88b25 4074 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 4075 return 0;
aca88b25 4076 if (cv && SvPOK(cv)) {
80e09529
PM
4077 const char *proto = CvPROTO(cv);
4078 if (proto) {
4079 while (*proto && (isSPACE(*proto) || *proto == ';'))
4080 proto++;
4081 if (*proto == '*')
4082 return 0;
4083 }
a0d0e21e 4084 }
ffb4593c 4085
a0d0e21e 4086 if (*start == '$') {
39c012bc 4087 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3ef1310e 4088 isUPPER(*PL_tokenbuf))
a0d0e21e 4089 return 0;
5db06880
NC
4090#ifdef PERL_MAD
4091 len = start - SvPVX(PL_linestr);
4092#endif
29595ff2 4093 s = PEEKSPACE(s);
f0092767 4094#ifdef PERL_MAD
5db06880
NC
4095 start = SvPVX(PL_linestr) + len;
4096#endif
3280af22
NIS
4097 PL_bufptr = start;
4098 PL_expect = XREF;
a0d0e21e
LW
4099 return *s == '(' ? FUNCMETH : METHOD;
4100 }
04e07f8b
BF
4101
4102 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
4103 /* start is the beginning of the possible filehandle/object,
4104 * and s is the end of it
4105 * tmpbuf is a copy of it (but with single quotes as double colons)
4106 */
4107
5458a98a 4108 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
4109 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
4110 len -= 2;
4111 tmpbuf[len] = '\0';
5db06880
NC
4112#ifdef PERL_MAD
4113 soff = s - SvPVX(PL_linestr);
4114#endif
c3e0f903
GS
4115 goto bare_package;
4116 }
38d2cf30 4117 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 4118 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
4119 return 0;
4120 /* filehandle or package name makes it a method */
39c012bc 4121 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
4122#ifdef PERL_MAD
4123 soff = s - SvPVX(PL_linestr);
4124#endif
29595ff2 4125 s = PEEKSPACE(s);
3280af22 4126 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 4127 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 4128 bare_package:
cd81e915 4129 start_force(PL_curforce);
9ded7720 4130 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 4131 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 4132 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 4133 if (PL_madskills)
38d2cf30
BF
4134 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
4135 ( UTF ? SVf_UTF8 : 0 )));
3280af22 4136 PL_expect = XTERM;
a0d0e21e 4137 force_next(WORD);
3280af22 4138 PL_bufptr = s;
5db06880
NC
4139#ifdef PERL_MAD
4140 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
4141#endif
a0d0e21e
LW
4142 return *s == '(' ? FUNCMETH : METHOD;
4143 }
4144 }
4145 return 0;
4146}
4147
16d20bd9 4148/* Encoded script support. filter_add() effectively inserts a
4e553d73 4149 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
4150 * Note that the filter function only applies to the current source file
4151 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4152 *
4153 * The datasv parameter (which may be NULL) can be used to pass
4154 * private data to this instance of the filter. The filter function
4155 * can recover the SV using the FILTER_DATA macro and use it to
4156 * store private buffers and state information.
4157 *
4158 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 4159 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 4160 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
4161 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4162 * private use must be set using malloc'd pointers.
4163 */
16d20bd9
AD
4164
4165SV *
864dbfa3 4166Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 4167{
97aff369 4168 dVAR;
f4c556ac 4169 if (!funcp)
a0714e2c 4170 return NULL;
f4c556ac 4171
5486870f
DM
4172 if (!PL_parser)
4173 return NULL;
4174
f1c31c52
FC
4175 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4176 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4177
3280af22
NIS
4178 if (!PL_rsfp_filters)
4179 PL_rsfp_filters = newAV();
16d20bd9 4180 if (!datasv)
561b68a9 4181 datasv = newSV(0);
862a34c6 4182 SvUPGRADE(datasv, SVt_PVIO);
8141890a 4183 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 4184 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 4185 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
4186 FPTR2DPTR(void *, IoANY(datasv)),
4187 SvPV_nolen(datasv)));
3280af22
NIS
4188 av_unshift(PL_rsfp_filters, 1);
4189 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
4190 if (
4191 !PL_parser->filtered
4192 && PL_parser->lex_flags & LEX_EVALBYTES
4193 && PL_bufptr < PL_bufend
4194 ) {
4195 const char *s = PL_bufptr;
4196 while (s < PL_bufend) {
4197 if (*s == '\n') {
4198 SV *linestr = PL_parser->linestr;
4199 char *buf = SvPVX(linestr);
4200 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4201 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4202 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4203 STRLEN const linestart_pos = PL_parser->linestart - buf;
4204 STRLEN const last_uni_pos =
4205 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4206 STRLEN const last_lop_pos =
4207 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4208 av_push(PL_rsfp_filters, linestr);
4209 PL_parser->linestr =
4210 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4211 buf = SvPVX(PL_parser->linestr);
4212 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4213 PL_parser->bufptr = buf + bufptr_pos;
4214 PL_parser->oldbufptr = buf + oldbufptr_pos;
4215 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4216 PL_parser->linestart = buf + linestart_pos;
4217 if (PL_parser->last_uni)
4218 PL_parser->last_uni = buf + last_uni_pos;
4219 if (PL_parser->last_lop)
4220 PL_parser->last_lop = buf + last_lop_pos;
4221 SvLEN(linestr) = SvCUR(linestr);
4222 SvCUR(linestr) = s-SvPVX(linestr);
4223 PL_parser->filtered = 1;
4224 break;
4225 }
4226 s++;
4227 }
4228 }
16d20bd9
AD
4229 return(datasv);
4230}
4e553d73 4231
16d20bd9
AD
4232
4233/* Delete most recently added instance of this filter function. */
a0d0e21e 4234void
864dbfa3 4235Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 4236{
97aff369 4237 dVAR;
e0c19803 4238 SV *datasv;
24801a4b 4239
7918f24d
NC
4240 PERL_ARGS_ASSERT_FILTER_DEL;
4241
33073adb 4242#ifdef DEBUGGING
55662e27
JH
4243 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4244 FPTR2DPTR(void*, funcp)));
33073adb 4245#endif
5486870f 4246 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
4247 return;
4248 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 4249 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 4250 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 4251 sv_free(av_pop(PL_rsfp_filters));
e50aee73 4252
16d20bd9
AD
4253 return;
4254 }
4255 /* we need to search for the correct entry and clear it */
cea2e8a9 4256 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
4257}
4258
4259
1de9afcd
RGS
4260/* Invoke the idxth filter function for the current rsfp. */
4261/* maxlen 0 = read one text line */
16d20bd9 4262I32
864dbfa3 4263Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 4264{
97aff369 4265 dVAR;
16d20bd9
AD
4266 filter_t funcp;
4267 SV *datasv = NULL;
f482118e
NC
4268 /* This API is bad. It should have been using unsigned int for maxlen.
4269 Not sure if we want to change the API, but if not we should sanity
4270 check the value here. */
60d63348 4271 unsigned int correct_length
39cd7a59
NC
4272 = maxlen < 0 ?
4273#ifdef PERL_MICRO
4274 0x7FFFFFFF
4275#else
4276 INT_MAX
4277#endif
4278 : maxlen;
e50aee73 4279
7918f24d
NC
4280 PERL_ARGS_ASSERT_FILTER_READ;
4281
5486870f 4282 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 4283 return -1;
1de9afcd 4284 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
4285 /* Provide a default input filter to make life easy. */
4286 /* Note that we append to the line. This is handy. */
f4c556ac
GS
4287 DEBUG_P(PerlIO_printf(Perl_debug_log,
4288 "filter_read %d: from rsfp\n", idx));
f482118e 4289 if (correct_length) {
16d20bd9
AD
4290 /* Want a block */
4291 int len ;
f54cb97a 4292 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
4293
4294 /* ensure buf_sv is large enough */
881d8f0a 4295 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
4296 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4297 correct_length)) <= 0) {
3280af22 4298 if (PerlIO_error(PL_rsfp))
37120919
AD
4299 return -1; /* error */
4300 else
4301 return 0 ; /* end of file */
4302 }
16d20bd9 4303 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 4304 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
4305 } else {
4306 /* Want a line */
3280af22
NIS
4307 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4308 if (PerlIO_error(PL_rsfp))
37120919
AD
4309 return -1; /* error */
4310 else
4311 return 0 ; /* end of file */
4312 }
16d20bd9
AD
4313 }
4314 return SvCUR(buf_sv);
4315 }
4316 /* Skip this filter slot if filter has been deleted */
1de9afcd 4317 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
4318 DEBUG_P(PerlIO_printf(Perl_debug_log,
4319 "filter_read %d: skipped (filter deleted)\n",
4320 idx));
f482118e 4321 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4322 }
60d63348
FC
4323 if (SvTYPE(datasv) != SVt_PVIO) {
4324 if (correct_length) {
4325 /* Want a block */
4326 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4327 if (!remainder) return 0; /* eof */
4328 if (correct_length > remainder) correct_length = remainder;
4329 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4330 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4331 } else {
4332 /* Want a line */
4333 const char *s = SvEND(datasv);
4334 const char *send = SvPVX(datasv) + SvLEN(datasv);
4335 while (s < send) {
4336 if (*s == '\n') {
4337 s++;
4338 break;
4339 }
4340 s++;
4341 }
4342 if (s == send) return 0; /* eof */
4343 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4344 SvCUR_set(datasv, s-SvPVX(datasv));
4345 }
4346 return SvCUR(buf_sv);
4347 }
16d20bd9 4348 /* Get function pointer hidden within datasv */
8141890a 4349 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4350 DEBUG_P(PerlIO_printf(Perl_debug_log,
4351 "filter_read %d: via function %p (%s)\n",
ca0270c4 4352 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4353 /* Call function. The function is expected to */
4354 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4355 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4356 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4357}
4358
76e3520e 4359STATIC char *
5aaab254 4360S_filter_gets(pTHX_ SV *sv, STRLEN append)
16d20bd9 4361{
97aff369 4362 dVAR;
7918f24d
NC
4363
4364 PERL_ARGS_ASSERT_FILTER_GETS;
4365
c39cd008 4366#ifdef PERL_CR_FILTER
3280af22 4367 if (!PL_rsfp_filters) {
c39cd008 4368 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4369 }
4370#endif
3280af22 4371 if (PL_rsfp_filters) {
55497cff 4372 if (!append)
4373 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4374 if (FILTER_READ(0, sv, 0) > 0)
4375 return ( SvPVX(sv) ) ;
4376 else
bd61b366 4377 return NULL ;
16d20bd9 4378 }
9d116dd7 4379 else
5cc814fd 4380 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4381}
4382
01ec43d0 4383STATIC HV *
9bde8eb0 4384S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4385{
97aff369 4386 dVAR;
def3634b
GS
4387 GV *gv;
4388
7918f24d
NC
4389 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4390
01ec43d0 4391 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4392 return PL_curstash;
4393
4394 if (len > 2 &&
4395 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4396 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4397 {
4398 return GvHV(gv); /* Foo:: */
def3634b
GS
4399 }
4400
4401 /* use constant CLASS => 'MyClass' */
acc6da14 4402 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4403 if (gv && GvCV(gv)) {
4404 SV * const sv = cv_const_sv(GvCV(gv));
4405 if (sv)
9bde8eb0 4406 pkgname = SvPV_const(sv, len);
def3634b
GS
4407 }
4408
acc6da14 4409 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4410}
a0d0e21e 4411
e3f73d4e
RGS
4412/*
4413 * S_readpipe_override
486ec47a 4414 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4415 * optree, provided sublex_start() is called afterwards.
4416 */
4417STATIC void
1d51329b 4418S_readpipe_override(pTHX)
e3f73d4e
RGS
4419{
4420 GV **gvp;
4421 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4422 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4423 if ((gv_readpipe
4424 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4425 ||
4426 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4427 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4428 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4429 {
4430 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4431 op_append_elem(OP_LIST,
e3f73d4e
RGS
4432 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4433 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4434 }
e3f73d4e
RGS
4435}
4436
5db06880
NC
4437#ifdef PERL_MAD
4438 /*
4439 * Perl_madlex
4440 * The intent of this yylex wrapper is to minimize the changes to the
4441 * tokener when we aren't interested in collecting madprops. It remains
4442 * to be seen how successful this strategy will be...
4443 */
4444
4445int
4446Perl_madlex(pTHX)
4447{
4448 int optype;
4449 char *s = PL_bufptr;
4450
cd81e915
NC
4451 /* make sure PL_thiswhite is initialized */
4452 PL_thiswhite = 0;
4453 PL_thismad = 0;
5db06880 4454
5db06880 4455 /* previous token ate up our whitespace? */
cd81e915
NC
4456 if (!PL_lasttoke && PL_nextwhite) {
4457 PL_thiswhite = PL_nextwhite;
4458 PL_nextwhite = 0;
5db06880
NC
4459 }
4460
4461 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4462 PL_realtokenstart = -1;
4463 PL_thistoken = 0;
5db06880
NC
4464 optype = yylex();
4465 s = PL_bufptr;
cd81e915 4466 assert(PL_curforce < 0);
5db06880 4467
cd81e915
NC
4468 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4469 if (!PL_thistoken) {
4470 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4471 PL_thistoken = newSVpvs("");
5db06880 4472 else {
c35e046a 4473 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4474 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4475 }
4476 }
cd81e915
NC
4477 if (PL_thismad) /* install head */
4478 CURMAD('X', PL_thistoken);
5db06880
NC
4479 }
4480
4481 /* last whitespace of a sublex? */
cd81e915
NC
4482 if (optype == ')' && PL_endwhite) {
4483 CURMAD('X', PL_endwhite);
5db06880
NC
4484 }
4485
cd81e915 4486 if (!PL_thismad) {
5db06880
NC
4487
4488 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4489 if (!PL_thiswhite && !PL_endwhite && !optype) {
4490 sv_free(PL_thistoken);
4491 PL_thistoken = 0;
5db06880
NC
4492 return 0;
4493 }
4494
4495 /* put off final whitespace till peg */
60d63348 4496 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4497 PL_nextwhite = PL_thiswhite;
4498 PL_thiswhite = 0;
5db06880 4499 }
cd81e915
NC
4500 else if (PL_thisopen) {
4501 CURMAD('q', PL_thisopen);
4502 if (PL_thistoken)
4503 sv_free(PL_thistoken);
4504 PL_thistoken = 0;
5db06880
NC
4505 }
4506 else {
4507 /* Store actual token text as madprop X */
cd81e915 4508 CURMAD('X', PL_thistoken);
5db06880
NC
4509 }
4510
cd81e915 4511 if (PL_thiswhite) {
5db06880 4512 /* add preceding whitespace as madprop _ */
cd81e915 4513 CURMAD('_', PL_thiswhite);
5db06880
NC
4514 }
4515
cd81e915 4516 if (PL_thisstuff) {
5db06880 4517 /* add quoted material as madprop = */
cd81e915 4518 CURMAD('=', PL_thisstuff);
5db06880
NC
4519 }
4520
cd81e915 4521 if (PL_thisclose) {
5db06880 4522 /* add terminating quote as madprop Q */
cd81e915 4523 CURMAD('Q', PL_thisclose);
5db06880
NC
4524 }
4525 }
4526
4527 /* special processing based on optype */
4528
4529 switch (optype) {
4530
4531 /* opval doesn't need a TOKEN since it can already store mp */
4532 case WORD:
4533 case METHOD:
4534 case FUNCMETH:
4535 case THING:
4536 case PMFUNC:
4537 case PRIVATEREF:
4538 case FUNC0SUB:
4539 case UNIOPSUB:
4540 case LSTOPSUB:
6154021b
RGS
4541 if (pl_yylval.opval)
4542 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4543 PL_thismad = 0;
5db06880
NC
4544 return optype;
4545
4546 /* fake EOF */
4547 case 0:
4548 optype = PEG;
cd81e915
NC
4549 if (PL_endwhite) {
4550 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4551 PL_endwhite = 0;
5db06880
NC
4552 }
4553 break;
4554
5504e6cf
FC
4555 /* pval */
4556 case LABEL:
4557 break;
4558
5db06880
NC
4559 case ']':
4560 case '}':
cd81e915 4561 if (PL_faketokens)
5db06880
NC
4562 break;
4563 /* remember any fake bracket that lexer is about to discard */
4564 if (PL_lex_brackets == 1 &&
4565 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4566 {
4567 s = PL_bufptr;
4568 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4569 s++;
4570 if (*s == '}') {
cd81e915
NC
4571 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4572 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4573 PL_thiswhite = 0;
5db06880
NC
4574 PL_bufptr = s - 1;
4575 break; /* don't bother looking for trailing comment */
4576 }
4577 else
4578 s = PL_bufptr;
4579 }
4580 if (optype == ']')
4581 break;
4582 /* FALLTHROUGH */
4583
4584 /* attach a trailing comment to its statement instead of next token */
4585 case ';':
cd81e915 4586 if (PL_faketokens)
5db06880
NC
4587 break;
4588 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4589 s = PL_bufptr;
4590 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4591 s++;
4592 if (*s == '\n' || *s == '#') {
4593 while (s < PL_bufend && *s != '\n')
4594 s++;
4595 if (s < PL_bufend)
4596 s++;
cd81e915
NC
4597 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4598 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4599 PL_thiswhite = 0;
5db06880
NC
4600 PL_bufptr = s;
4601 }
4602 }
4603 break;
4604
5db06880
NC
4605 /* ival */
4606 default:
4607 break;
4608
4609 }
4610
4611 /* Create new token struct. Note: opvals return early above. */
6154021b 4612 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4613 PL_thismad = 0;
5db06880
NC
4614 return optype;
4615}
4616#endif
4617
468aa647 4618STATIC char *
cc6ed77d 4619S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4620 dVAR;
7918f24d
NC
4621
4622 PERL_ARGS_ASSERT_TOKENIZE_USE;
4623
468aa647
RGS
4624 if (PL_expect != XSTATE)
4625 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4626 is_use ? "use" : "no"));
52d0e95b 4627 PL_expect = XTERM;
29595ff2 4628 s = SKIPSPACE1(s);
468aa647
RGS
4629 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4630 s = force_version(s, TRUE);
17c59fdf
VP
4631 if (*s == ';' || *s == '}'
4632 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4633 start_force(PL_curforce);
9ded7720 4634 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4635 force_next(WORD);
4636 }
4637 else if (*s == 'v') {
345b3785 4638 s = force_word(s,WORD,FALSE,TRUE);
468aa647
RGS
4639 s = force_version(s, FALSE);
4640 }
4641 }
4642 else {
345b3785 4643 s = force_word(s,WORD,FALSE,TRUE);
468aa647
RGS
4644 s = force_version(s, FALSE);
4645 }
6154021b 4646 pl_yylval.ival = is_use;
468aa647
RGS
4647 return s;
4648}
748a9306 4649#ifdef DEBUGGING
27da23d5 4650 static const char* const exp_name[] =
09bef843 4651 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4652 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4653 };
748a9306 4654#endif
463ee0b2 4655
361d9b55
Z
4656#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4657STATIC bool
4658S_word_takes_any_delimeter(char *p, STRLEN len)
4659{
4660 return (len == 1 && strchr("msyq", p[0])) ||
4661 (len == 2 && (
4662 (p[0] == 't' && p[1] == 'r') ||
4663 (p[0] == 'q' && strchr("qwxr", p[1]))));
4664}
4665
02aa26ce
NT
4666/*
4667 yylex
4668
4669 Works out what to call the token just pulled out of the input
4670 stream. The yacc parser takes care of taking the ops we return and
4671 stitching them into a tree.
4672
4673 Returns:
3875fc11 4674 The type of the next token
02aa26ce
NT
4675
4676 Structure:
3875fc11
FC
4677 Switch based on the current state:
4678 - if we already built the token before, use it
4679 - if we have a case modifier in a string, deal with that
4680 - handle other cases of interpolation inside a string
4681 - scan the next line if we are inside a format
4682 In the normal state switch on the next character:
4683 - default:
4684 if alphabetic, go to key lookup
4685 unrecoginized character - croak
4686 - 0/4/26: handle end-of-line or EOF
4687 - cases for whitespace
4688 - \n and #: handle comments and line numbers
4689 - various operators, brackets and sigils
4690 - numbers
4691 - quotes
4692 - 'v': vstrings (or go to key lookup)
4693 - 'x' repetition operator (or go to key lookup)
4694 - other ASCII alphanumerics (key lookup begins here):
4695 word before => ?
4696 keyword plugin
4697 scan built-in keyword (but do nothing with it yet)
4698 check for statement label
4699 check for lexical subs
4700 goto just_a_word if there is one
4701 see whether built-in keyword is overridden
4702 switch on keyword number:
4703 - default: just_a_word:
4704 not a built-in keyword; handle bareword lookup
4705 disambiguate between method and sub call
4706 fall back to bareword
4707 - cases for built-in keywords
02aa26ce
NT
4708*/
4709
20141f0e 4710
dba4d153
JH
4711#ifdef __SC__
4712#pragma segment Perl_yylex
4713#endif
dba4d153 4714int
dba4d153 4715Perl_yylex(pTHX)
20141f0e 4716{
97aff369 4717 dVAR;
eb578fdb
KW
4718 char *s = PL_bufptr;
4719 char *d;
463ee0b2 4720 STRLEN len;
705fe0e5 4721 bool bof = FALSE;
9700e2d3 4722 const bool saw_infix_sigil = PL_parser->saw_infix_sigil;
705fe0e5 4723 U8 formbrack = 0;
580561a3 4724 U32 fake_eof = 0;
a687059c 4725
10edeb5d
JH
4726 /* orig_keyword, gvp, and gv are initialized here because
4727 * jump to the label just_a_word_zero can bypass their
4728 * initialization later. */
4729 I32 orig_keyword = 0;
4730 GV *gv = NULL;
4731 GV **gvp = NULL;
4732
bbf60fe6 4733 DEBUG_T( {
396482e1 4734 SV* tmp = newSVpvs("");
b6007c36
DM
4735 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4736 (IV)CopLINE(PL_curcop),
4737 lex_state_names[PL_lex_state],
4738 exp_name[PL_expect],
4739 pv_display(tmp, s, strlen(s), 0, 60));
4740 SvREFCNT_dec(tmp);
bbf60fe6 4741 } );
02aa26ce 4742
3280af22 4743 switch (PL_lex_state) {
79072805
LW
4744#ifdef COMMENTARY
4745 case LEX_NORMAL: /* Some compilers will produce faster */
4746 case LEX_INTERPNORMAL: /* code if we comment these out. */
4747 break;
4748#endif
4749
09bef843 4750 /* when we've already built the next token, just pull it out of the queue */
79072805 4751 case LEX_KNOWNEXT:
5db06880
NC
4752#ifdef PERL_MAD
4753 PL_lasttoke--;
6154021b 4754 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4755 if (PL_madskills) {
cd81e915 4756 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4757 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4758 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4759 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4760 PL_thismad->mad_val = 0;
4761 mad_free(PL_thismad);
4762 PL_thismad = 0;
5db06880
NC
4763 }
4764 }
4765 if (!PL_lasttoke) {
4766 PL_lex_state = PL_lex_defer;
4767 PL_expect = PL_lex_expect;
4768 PL_lex_defer = LEX_NORMAL;
4769 if (!PL_nexttoke[PL_lasttoke].next_type)
4770 return yylex();
4771 }
4772#else
3280af22 4773 PL_nexttoke--;
6154021b 4774 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4775 if (!PL_nexttoke) {
4776 PL_lex_state = PL_lex_defer;
4777 PL_expect = PL_lex_expect;
4778 PL_lex_defer = LEX_NORMAL;
463ee0b2 4779 }
5db06880 4780#endif
a7aaec61
Z
4781 {
4782 I32 next_type;
5db06880 4783#ifdef PERL_MAD
a7aaec61 4784 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4785#else
a7aaec61 4786 next_type = PL_nexttype[PL_nexttoke];
5db06880 4787#endif
78cdf107
Z
4788 if (next_type & (7<<24)) {
4789 if (next_type & (1<<24)) {
4790 if (PL_lex_brackets > 100)
4791 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4792 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4793 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4794 }
4795 if (next_type & (2<<24))
4796 PL_lex_allbrackets++;
4797 if (next_type & (4<<24))
4798 PL_lex_allbrackets--;
a7aaec61
Z
4799 next_type &= 0xffff;
4800 }
3f33d153 4801 return REPORT(next_type == 'p' ? pending_ident() : next_type);
a7aaec61 4802 }
79072805 4803
02aa26ce 4804 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4805 when we get here, PL_bufptr is at the \
02aa26ce 4806 */
79072805
LW
4807 case LEX_INTERPCASEMOD:
4808#ifdef DEBUGGING
3280af22 4809 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4810 Perl_croak(aTHX_
4811 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4812 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4813#endif
02aa26ce 4814 /* handle \E or end of string */
3280af22 4815 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4816 /* if at a \E */
3280af22 4817 if (PL_lex_casemods) {
f54cb97a 4818 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4819 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4820
3792a11b 4821 if (PL_bufptr != PL_bufend
838f2281
BF
4822 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4823 || oldmod == 'F')) {
3280af22
NIS
4824 PL_bufptr += 2;
4825 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4826#ifdef PERL_MAD
4827 if (PL_madskills)
6b29d1f5 4828 PL_thistoken = newSVpvs("\\E");
5db06880 4829#endif
a0d0e21e 4830 }
78cdf107 4831 PL_lex_allbrackets--;
bbf60fe6 4832 return REPORT(')');
79072805 4833 }
52ed07f6
BF
4834 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4835 /* Got an unpaired \E */
4836 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4837 "Useless use of \\E");
52ed07f6 4838 }
5db06880
NC
4839#ifdef PERL_MAD
4840 while (PL_bufptr != PL_bufend &&
4841 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
1cac5c33
FC
4842 if (PL_madskills) {
4843 if (!PL_thiswhite)
6b29d1f5 4844 PL_thiswhite = newSVpvs("");
1cac5c33
FC
4845 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
4846 }
5db06880
NC
4847 PL_bufptr += 2;
4848 }
4849#else
3280af22
NIS
4850 if (PL_bufptr != PL_bufend)
4851 PL_bufptr += 2;
5db06880 4852#endif
3280af22 4853 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4854 return yylex();
79072805
LW
4855 }
4856 else {
607df283 4857 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4858 "### Saw case modifier\n"); });
3280af22 4859 s = PL_bufptr + 1;
6e909404 4860 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4861#ifdef PERL_MAD
1cac5c33
FC
4862 if (PL_madskills) {
4863 if (!PL_thiswhite)
6b29d1f5 4864 PL_thiswhite = newSVpvs("");
1cac5c33
FC
4865 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
4866 }
5db06880 4867#endif
89122651 4868 PL_bufptr = s + 3;
6e909404
JH
4869 PL_lex_state = LEX_INTERPCONCAT;
4870 return yylex();
a0d0e21e 4871 }
6e909404 4872 else {
90771dc0 4873 I32 tmp;
5db06880
NC
4874 if (!PL_madskills) /* when just compiling don't need correct */
4875 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4876 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4877 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4878 (strchr(PL_lex_casestack, 'L')
4879 || strchr(PL_lex_casestack, 'U')
4880 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4881 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4882 PL_lex_allbrackets--;
bbf60fe6 4883 return REPORT(')');
6e909404
JH
4884 }
4885 if (PL_lex_casemods > 10)
4886 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4887 PL_lex_casestack[PL_lex_casemods++] = *s;
4888 PL_lex_casestack[PL_lex_casemods] = '\0';
4889 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4890 start_force(PL_curforce);
9ded7720 4891 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4892 force_next((2<<24)|'(');
cd81e915 4893 start_force(PL_curforce);
6e909404 4894 if (*s == 'l')
9ded7720 4895 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4896 else if (*s == 'u')
9ded7720 4897 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4898 else if (*s == 'L')
9ded7720 4899 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4900 else if (*s == 'U')
9ded7720 4901 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4902 else if (*s == 'Q')
9ded7720 4903 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4904 else if (*s == 'F')
4905 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4906 else
5637ef5b 4907 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4908 if (PL_madskills) {
a5849ce5
NC
4909 SV* const tmpsv = newSVpvs("\\ ");
4910 /* replace the space with the character we want to escape
4911 */
4912 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4913 curmad('_', tmpsv);
4914 }
6e909404 4915 PL_bufptr = s + 1;
a0d0e21e 4916 }
79072805 4917 force_next(FUNC);
3280af22
NIS
4918 if (PL_lex_starts) {
4919 s = PL_bufptr;
4920 PL_lex_starts = 0;
5db06880
NC
4921#ifdef PERL_MAD
4922 if (PL_madskills) {
cd81e915
NC
4923 if (PL_thistoken)
4924 sv_free(PL_thistoken);
6b29d1f5 4925 PL_thistoken = newSVpvs("");
5db06880
NC
4926 }
4927#endif
131b3ad0
DM
4928 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4929 if (PL_lex_casemods == 1 && PL_lex_inpat)
4930 OPERATOR(',');
4931 else
4932 Aop(OP_CONCAT);
79072805
LW
4933 }
4934 else
cea2e8a9 4935 return yylex();
79072805
LW
4936 }
4937
55497cff 4938 case LEX_INTERPPUSH:
bbf60fe6 4939 return REPORT(sublex_push());
55497cff 4940
79072805 4941 case LEX_INTERPSTART:
3280af22 4942 if (PL_bufptr == PL_bufend)
bbf60fe6 4943 return REPORT(sublex_done());
9da1dd8f 4944 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
b6007c36 4945 "### Interpolated variable\n"); });
3280af22 4946 PL_expect = XTERM;
491453ba
DM
4947 /* for /@a/, we leave the joining for the regex engine to do
4948 * (unless we're within \Q etc) */
4949 PL_lex_dojoin = (*PL_bufptr == '@'
4950 && (!PL_lex_inpat || PL_lex_casemods));
3280af22
NIS
4951 PL_lex_state = LEX_INTERPNORMAL;
4952 if (PL_lex_dojoin) {
cd81e915 4953 start_force(PL_curforce);
9ded7720 4954 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4955 force_next(',');
cd81e915 4956 start_force(PL_curforce);
a0d0e21e 4957 force_ident("\"", '$');
cd81e915 4958 start_force(PL_curforce);
9ded7720 4959 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4960 force_next('$');
cd81e915 4961 start_force(PL_curforce);
9ded7720 4962 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4963 force_next((2<<24)|'(');
cd81e915 4964 start_force(PL_curforce);
9ded7720 4965 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4966 force_next(FUNC);
4967 }
9da1dd8f
DM
4968 /* Convert (?{...}) and friends to 'do {...}' */
4969 if (PL_lex_inpat && *PL_bufptr == '(') {
3328ab5a 4970 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9da1dd8f
DM
4971 PL_bufptr += 2;
4972 if (*PL_bufptr != '{')
4973 PL_bufptr++;
6165f85b
DM
4974 start_force(PL_curforce);
4975 /* XXX probably need a CURMAD(something) here */
9da1dd8f
DM
4976 PL_expect = XTERMBLOCK;
4977 force_next(DO);
4978 }
4979
3280af22
NIS
4980 if (PL_lex_starts++) {
4981 s = PL_bufptr;
5db06880
NC
4982#ifdef PERL_MAD
4983 if (PL_madskills) {
cd81e915
NC
4984 if (PL_thistoken)
4985 sv_free(PL_thistoken);
6b29d1f5 4986 PL_thistoken = newSVpvs("");
5db06880
NC
4987 }
4988#endif
131b3ad0
DM
4989 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4990 if (!PL_lex_casemods && PL_lex_inpat)
4991 OPERATOR(',');
4992 else
4993 Aop(OP_CONCAT);
79072805 4994 }
cea2e8a9 4995 return yylex();
79072805
LW
4996
4997 case LEX_INTERPENDMAYBE:
3280af22
NIS
4998 if (intuit_more(PL_bufptr)) {
4999 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
5000 break;
5001 }
5002 /* FALL THROUGH */
5003
5004 case LEX_INTERPEND:
3280af22
NIS
5005 if (PL_lex_dojoin) {
5006 PL_lex_dojoin = FALSE;
5007 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
5008#ifdef PERL_MAD
5009 if (PL_madskills) {
cd81e915
NC
5010 if (PL_thistoken)
5011 sv_free(PL_thistoken);
6b29d1f5 5012 PL_thistoken = newSVpvs("");
5db06880
NC
5013 }
5014#endif
78cdf107 5015 PL_lex_allbrackets--;
bbf60fe6 5016 return REPORT(')');
79072805 5017 }
43a16006 5018 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 5019 && SvEVALED(PL_lex_repl))
43a16006 5020 {
e9fa98b2 5021 if (PL_bufptr != PL_bufend)
cea2e8a9 5022 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 5023 PL_lex_repl = NULL;
e9fa98b2 5024 }
db444266
FC
5025 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
5026 re_eval_str. If the here-doc body’s length equals the previous
5027 value of re_eval_start, re_eval_start will now be null. So
5028 check re_eval_str as well. */
3328ab5a
FC
5029 if (PL_parser->lex_shared->re_eval_start
5030 || PL_parser->lex_shared->re_eval_str) {
db444266 5031 SV *sv;
9da1dd8f
DM
5032 if (*PL_bufptr != ')')
5033 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
5034 PL_bufptr++;
5035 /* having compiled a (?{..}) expression, return the original
5036 * text too, as a const */
3328ab5a
FC
5037 if (PL_parser->lex_shared->re_eval_str) {
5038 sv = PL_parser->lex_shared->re_eval_str;
5039 PL_parser->lex_shared->re_eval_str = NULL;
5040 SvCUR_set(sv,
5041 PL_bufptr - PL_parser->lex_shared->re_eval_start);
db444266
FC
5042 SvPV_shrink_to_cur(sv);
5043 }
3328ab5a
FC
5044 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
5045 PL_bufptr - PL_parser->lex_shared->re_eval_start);
6165f85b
DM
5046 start_force(PL_curforce);
5047 /* XXX probably need a CURMAD(something) here */
5048 NEXTVAL_NEXTTOKE.opval =
9da1dd8f 5049 (OP*)newSVOP(OP_CONST, 0,
db444266 5050 sv);
9da1dd8f 5051 force_next(THING);
3328ab5a 5052 PL_parser->lex_shared->re_eval_start = NULL;
9da1dd8f
DM
5053 PL_expect = XTERM;
5054 return REPORT(',');
5055 }
5056
79072805
LW
5057 /* FALLTHROUGH */
5058 case LEX_INTERPCONCAT:
5059#ifdef DEBUGGING
3280af22 5060 if (PL_lex_brackets)
5637ef5b
NC
5061 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
5062 (long) PL_lex_brackets);
79072805 5063#endif
3280af22 5064 if (PL_bufptr == PL_bufend)
bbf60fe6 5065 return REPORT(sublex_done());
79072805 5066
9da1dd8f
DM
5067 /* m'foo' still needs to be parsed for possible (?{...}) */
5068 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
3280af22 5069 SV *sv = newSVsv(PL_linestr);
9da1dd8f 5070 sv = tokeq(sv);
6154021b 5071 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 5072 s = PL_bufend;
79072805
LW
5073 }
5074 else {
3280af22 5075 s = scan_const(PL_bufptr);
79072805 5076 if (*s == '\\')
3280af22 5077 PL_lex_state = LEX_INTERPCASEMOD;
79072805 5078 else
3280af22 5079 PL_lex_state = LEX_INTERPSTART;
79072805
LW
5080 }
5081
3280af22 5082 if (s != PL_bufptr) {
cd81e915 5083 start_force(PL_curforce);
5db06880
NC
5084 if (PL_madskills) {
5085 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
5086 }
6154021b 5087 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 5088 PL_expect = XTERM;
79072805 5089 force_next(THING);
131b3ad0 5090 if (PL_lex_starts++) {
5db06880
NC
5091#ifdef PERL_MAD
5092 if (PL_madskills) {
cd81e915
NC
5093 if (PL_thistoken)
5094 sv_free(PL_thistoken);
6b29d1f5 5095 PL_thistoken = newSVpvs("");
5db06880
NC
5096 }
5097#endif
131b3ad0
DM
5098 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
5099 if (!PL_lex_casemods && PL_lex_inpat)
5100 OPERATOR(',');
5101 else
5102 Aop(OP_CONCAT);
5103 }
79072805 5104 else {
3280af22 5105 PL_bufptr = s;
cea2e8a9 5106 return yylex();
79072805
LW
5107 }
5108 }
5109
cea2e8a9 5110 return yylex();
a0d0e21e 5111 case LEX_FORMLINE:
3280af22
NIS
5112 s = scan_formline(PL_bufptr);
5113 if (!PL_lex_formbrack)
7c70caa5 5114 {
705fe0e5 5115 formbrack = 1;
a0d0e21e 5116 goto rightbracket;
7c70caa5 5117 }
705fe0e5
FC
5118 PL_bufptr = s;
5119 return yylex();
79072805
LW
5120 }
5121
2cc6fe62
FC
5122 /* We really do *not* want PL_linestr ever becoming a COW. */
5123 assert (!SvIsCOW(PL_linestr));
3280af22
NIS
5124 s = PL_bufptr;
5125 PL_oldoldbufptr = PL_oldbufptr;
5126 PL_oldbufptr = s;
9700e2d3 5127 PL_parser->saw_infix_sigil = 0;
463ee0b2
LW
5128
5129 retry:
5db06880 5130#ifdef PERL_MAD
cd81e915
NC
5131 if (PL_thistoken) {
5132 sv_free(PL_thistoken);
5133 PL_thistoken = 0;
5db06880 5134 }
cd81e915 5135 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 5136#endif
378cc40b
LW
5137 switch (*s) {
5138 default:
32833930 5139 if (UTF ? isIDFIRST_utf8((U8*)s) : isALNUMC(*s))
834a4ddd 5140 goto keylookup;
b1fc3636 5141 {
e2f06df0
BF
5142 SV *dsv = newSVpvs_flags("", SVs_TEMP);
5143 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
5144 UTF8SKIP(s),
5145 SVs_TEMP | SVf_UTF8),
5146 10, UNI_DISPLAY_ISPRINT))
5147 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
b1fc3636
CJ
5148 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
5149 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
5150 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
5151 } else {
5152 d = PL_linestart;
5153 }
5154 *s = '\0';
e2f06df0
BF
5155 sv_setpv(dsv, d);
5156 if (UTF)
5157 SvUTF8_on(dsv);
5158 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
b1fc3636 5159 }
e929a76b
LW
5160 case 4:
5161 case 26:
5162 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 5163 case 0:
5db06880
NC
5164#ifdef PERL_MAD
5165 if (PL_madskills)
cd81e915 5166 PL_faketokens = 0;
5db06880 5167#endif
60d63348 5168 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
5169 PL_last_uni = 0;
5170 PL_last_lop = 0;
a7aaec61
Z
5171 if (PL_lex_brackets &&
5172 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
5173 yyerror((const char *)
5174 (PL_lex_formbrack
5175 ? "Format not terminated"
5176 : "Missing right curly or square bracket"));
c5ee2135 5177 }
4e553d73 5178 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 5179 "### Tokener got EOF\n");
5f80b19c 5180 } );
79072805 5181 TOKEN(0);
463ee0b2 5182 }
3280af22 5183 if (s++ < PL_bufend)
a687059c 5184 goto retry; /* ignore stray nulls */
3280af22
NIS
5185 PL_last_uni = 0;
5186 PL_last_lop = 0;
5187 if (!PL_in_eval && !PL_preambled) {
5188 PL_preambled = TRUE;
5db06880
NC
5189#ifdef PERL_MAD
5190 if (PL_madskills)
cd81e915 5191 PL_faketokens = 1;
5db06880 5192#endif
5ab7ff98
NC
5193 if (PL_perldb) {
5194 /* Generate a string of Perl code to load the debugger.
5195 * If PERL5DB is set, it will return the contents of that,
5196 * otherwise a compile-time require of perl5db.pl. */
5197
5198 const char * const pdb = PerlEnv_getenv("PERL5DB");
5199
5200 if (pdb) {
5201 sv_setpv(PL_linestr, pdb);
5202 sv_catpvs(PL_linestr,";");
5203 } else {
5204 SETERRNO(0,SS_NORMAL);
5205 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5206 }
5207 } else
5208 sv_setpvs(PL_linestr,"");
c62eb204
NC
5209 if (PL_preambleav) {
5210 SV **svp = AvARRAY(PL_preambleav);
5211 SV **const end = svp + AvFILLp(PL_preambleav);
5212 while(svp <= end) {
5213 sv_catsv(PL_linestr, *svp);
5214 ++svp;
396482e1 5215 sv_catpvs(PL_linestr, ";");
91b7def8 5216 }
daba3364 5217 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 5218 PL_preambleav = NULL;
91b7def8 5219 }
9f639728
FR
5220 if (PL_minus_E)
5221 sv_catpvs(PL_linestr,
5222 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 5223 if (PL_minus_n || PL_minus_p) {
f0e67a1d 5224 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 5225 if (PL_minus_l)
396482e1 5226 sv_catpvs(PL_linestr,"chomp;");
3280af22 5227 if (PL_minus_a) {
3280af22 5228 if (PL_minus_F) {
3792a11b
NC
5229 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5230 || *PL_splitstr == '"')
3280af22 5231 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 5232 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 5233 else {
c8ef6a4b
NC
5234 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5235 bytes can be used as quoting characters. :-) */
dd374669 5236 const char *splits = PL_splitstr;
91d456ae 5237 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
5238 do {
5239 /* Need to \ \s */
dd374669
AL
5240 if (*splits == '\\')
5241 sv_catpvn(PL_linestr, splits, 1);
5242 sv_catpvn(PL_linestr, splits, 1);
5243 } while (*splits++);
48c4c863
NC
5244 /* This loop will embed the trailing NUL of
5245 PL_linestr as the last thing it does before
5246 terminating. */
396482e1 5247 sv_catpvs(PL_linestr, ");");
54310121 5248 }
2304df62
AD
5249 }
5250 else
396482e1 5251 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 5252 }
79072805 5253 }
396482e1 5254 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
5255 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5256 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5257 PL_last_lop = PL_last_uni = NULL;
65269a95 5258 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 5259 update_debugger_info(PL_linestr, NULL, 0);
79072805 5260 goto retry;
a687059c 5261 }
e929a76b 5262 do {
580561a3
Z
5263 fake_eof = 0;
5264 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 5265 if (0) {
7e28d3af 5266 fake_eof:
f0e67a1d
Z
5267 fake_eof = LEX_FAKE_EOF;
5268 }
5269 PL_bufptr = PL_bufend;
83944c01 5270 COPLINE_INC_WITH_HERELINES;
f0e67a1d 5271 if (!lex_next_chunk(fake_eof)) {
17cc9359 5272 CopLINE_dec(PL_curcop);
f0e67a1d
Z
5273 s = PL_bufptr;
5274 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5275 }
17cc9359 5276 CopLINE_dec(PL_curcop);
5db06880 5277#ifdef PERL_MAD
f0e67a1d 5278 if (!PL_rsfp)
cd81e915 5279 PL_realtokenstart = -1;
5db06880 5280#endif
f0e67a1d 5281 s = PL_bufptr;
7aa207d6
JH
5282 /* If it looks like the start of a BOM or raw UTF-16,
5283 * check if it in fact is. */
580561a3 5284 if (bof && PL_rsfp &&
7aa207d6
JH
5285 (*s == 0 ||
5286 *(U8*)s == 0xEF ||
5287 *(U8*)s >= 0xFE ||
5288 s[1] == 0)) {
879bc93b
DM
5289 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5290 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
5291#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5292 /* offset may include swallowed CR */
5293 if (!bof)
879bc93b 5294 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 5295#endif
7e28d3af 5296 if (bof) {
3280af22 5297 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 5298 s = swallow_bom((U8*)s);
e929a76b 5299 }
378cc40b 5300 }
737c24fc 5301 if (PL_parser->in_pod) {
a0d0e21e 5302 /* Incest with pod. */
5db06880
NC
5303#ifdef PERL_MAD
5304 if (PL_madskills)
cd81e915 5305 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 5306#endif
01a57ef7 5307 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 5308 sv_setpvs(PL_linestr, "");
3280af22
NIS
5309 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5310 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5311 PL_last_lop = PL_last_uni = NULL;
737c24fc 5312 PL_parser->in_pod = 0;
a0d0e21e 5313 }
4e553d73 5314 }
60d63348 5315 if (PL_rsfp || PL_parser->filtered)
85613cab 5316 incline(s);
737c24fc 5317 } while (PL_parser->in_pod);
3280af22 5318 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 5319 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5320 PL_last_lop = PL_last_uni = NULL;
57843af0 5321 if (CopLINE(PL_curcop) == 1) {
3280af22 5322 while (s < PL_bufend && isSPACE(*s))
79072805 5323 s++;
a0d0e21e 5324 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 5325 s++;
5db06880
NC
5326#ifdef PERL_MAD
5327 if (PL_madskills)
cd81e915 5328 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 5329#endif
bd61b366 5330 d = NULL;
3280af22 5331 if (!PL_in_eval) {
44a8e56a 5332 if (*s == '#' && *(s+1) == '!')
5333 d = s + 2;
5334#ifdef ALTERNATE_SHEBANG
5335 else {
bfed75c6 5336 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 5337 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5338 d = s + (sizeof(as) - 1);
5339 }
5340#endif /* ALTERNATE_SHEBANG */
5341 }
5342 if (d) {
b8378b72 5343 char *ipath;
774d564b 5344 char *ipathend;
b8378b72 5345
774d564b 5346 while (isSPACE(*d))
b8378b72
CS
5347 d++;
5348 ipath = d;
774d564b 5349 while (*d && !isSPACE(*d))
5350 d++;
5351 ipathend = d;
5352
5353#ifdef ARG_ZERO_IS_SCRIPT
5354 if (ipathend > ipath) {
5355 /*
5356 * HP-UX (at least) sets argv[0] to the script name,
5357 * which makes $^X incorrect. And Digital UNIX and Linux,
5358 * at least, set argv[0] to the basename of the Perl
5359 * interpreter. So, having found "#!", we'll set it right.
5360 */
fafc274c
NC
5361 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5362 SVt_PV)); /* $^X */
774d564b 5363 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 5364 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 5365 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 5366 SvSETMAGIC(x);
5367 }
556c1dec
JH
5368 else {
5369 STRLEN blen;
5370 STRLEN llen;
cfd0369c 5371 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 5372 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
5373 if (llen < blen) {
5374 bstart += blen - llen;
5375 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5376 sv_setpvn(x, ipath, ipathend - ipath);
5377 SvSETMAGIC(x);
5378 }
5379 }
5380 }
774d564b 5381 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 5382 }
774d564b 5383#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
5384
5385 /*
5386 * Look for options.
5387 */
748a9306 5388 d = instr(s,"perl -");
84e30d1a 5389 if (!d) {
748a9306 5390 d = instr(s,"perl");
84e30d1a
GS
5391#if defined(DOSISH)
5392 /* avoid getting into infinite loops when shebang
5393 * line contains "Perl" rather than "perl" */
5394 if (!d) {
5395 for (d = ipathend-4; d >= ipath; --d) {
5396 if ((*d == 'p' || *d == 'P')
5397 && !ibcmp(d, "perl", 4))
5398 {
5399 break;
5400 }
5401 }
5402 if (d < ipath)
bd61b366 5403 d = NULL;
84e30d1a
GS
5404 }
5405#endif
5406 }
44a8e56a 5407#ifdef ALTERNATE_SHEBANG
5408 /*
5409 * If the ALTERNATE_SHEBANG on this system starts with a
5410 * character that can be part of a Perl expression, then if
5411 * we see it but not "perl", we're probably looking at the
5412 * start of Perl code, not a request to hand off to some
5413 * other interpreter. Similarly, if "perl" is there, but
5414 * not in the first 'word' of the line, we assume the line
5415 * contains the start of the Perl program.
44a8e56a 5416 */
5417 if (d && *s != '#') {
f54cb97a 5418 const char *c = ipath;
44a8e56a 5419 while (*c && !strchr("; \t\r\n\f\v#", *c))
5420 c++;
5421 if (c < d)
bd61b366 5422 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5423 else
5424 *s = '#'; /* Don't try to parse shebang line */
5425 }
774d564b 5426#endif /* ALTERNATE_SHEBANG */
748a9306 5427 if (!d &&
44a8e56a 5428 *s == '#' &&
774d564b 5429 ipathend > ipath &&
3280af22 5430 !PL_minus_c &&
748a9306 5431 !instr(s,"indir") &&
3280af22 5432 instr(PL_origargv[0],"perl"))
748a9306 5433 {
27da23d5 5434 dVAR;
9f68db38 5435 char **newargv;
9f68db38 5436
774d564b 5437 *ipathend = '\0';
5438 s = ipathend + 1;
3280af22 5439 while (s < PL_bufend && isSPACE(*s))
9f68db38 5440 s++;
3280af22 5441 if (s < PL_bufend) {
d85f917e 5442 Newx(newargv,PL_origargc+3,char*);
9f68db38 5443 newargv[1] = s;
3280af22 5444 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5445 s++;
5446 *s = '\0';
3280af22 5447 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5448 }
5449 else
3280af22 5450 newargv = PL_origargv;
774d564b 5451 newargv[0] = ipath;
b35112e7 5452 PERL_FPU_PRE_EXEC
b4748376 5453 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5454 PERL_FPU_POST_EXEC
cea2e8a9 5455 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5456 }
748a9306 5457 if (d) {
c35e046a
AL
5458 while (*d && !isSPACE(*d))
5459 d++;
5460 while (SPACE_OR_TAB(*d))
5461 d++;
748a9306
LW
5462
5463 if (*d++ == '-') {
f54cb97a 5464 const bool switches_done = PL_doswitches;
fb993905
GA
5465 const U32 oldpdb = PL_perldb;
5466 const bool oldn = PL_minus_n;
5467 const bool oldp = PL_minus_p;
c7030b81 5468 const char *d1 = d;
fb993905 5469
8cc95fdb 5470 do {
4ba71d51
FC
5471 bool baduni = FALSE;
5472 if (*d1 == 'C') {
bd0ab00d
NC
5473 const char *d2 = d1 + 1;
5474 if (parse_unicode_opts((const char **)&d2)
5475 != PL_unicode)
5476 baduni = TRUE;
4ba71d51
FC
5477 }
5478 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5479 const char * const m = d1;
5480 while (*d1 && !isSPACE(*d1))
5481 d1++;
cea2e8a9 5482 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5483 (int)(d1 - m), m);
8cc95fdb 5484 }
c7030b81
NC
5485 d1 = moreswitches(d1);
5486 } while (d1);
f0b2cf55
YST
5487 if (PL_doswitches && !switches_done) {
5488 int argc = PL_origargc;
5489 char **argv = PL_origargv;
5490 do {
5491 argc--,argv++;
5492 } while (argc && argv[0][0] == '-' && argv[0][1]);
5493 init_argv_symbols(argc,argv);
5494 }
65269a95 5495 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5496 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5497 /* if we have already added "LINE: while (<>) {",
5498 we must not do it again */
748a9306 5499 {
76f68e9b 5500 sv_setpvs(PL_linestr, "");
3280af22
NIS
5501 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5502 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5503 PL_last_lop = PL_last_uni = NULL;
3280af22 5504 PL_preambled = FALSE;
65269a95 5505 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5506 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5507 goto retry;
5508 }
a0d0e21e 5509 }
79072805 5510 }
9f68db38 5511 }
79072805 5512 }
3280af22 5513 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5514 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5515 start_force(PL_curforce);
5516 NEXTVAL_NEXTTOKE.ival = 0;
5517 force_next(FORMRBRACK);
5518 TOKEN(';');
ae986130 5519 }
378cc40b 5520 goto retry;
4fdae800 5521 case '\r':
6a27c188 5522#ifdef PERL_STRICT_CR
cea2e8a9 5523 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5524 Perl_croak(aTHX_
cc507455 5525 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5526#endif
4fdae800 5527 case ' ': case '\t': case '\f': case 013:
5db06880 5528#ifdef PERL_MAD
cd81e915 5529 PL_realtokenstart = -1;
1cac5c33
FC
5530 if (PL_madskills) {
5531 if (!PL_thiswhite)
ac372eb8 5532 PL_thiswhite = newSVpvs("");
1cac5c33
FC
5533 sv_catpvn(PL_thiswhite, s, 1);
5534 }
5db06880 5535#endif
ac372eb8 5536 s++;
378cc40b 5537 goto retry;
378cc40b 5538 case '#':
e929a76b 5539 case '\n':
5db06880 5540#ifdef PERL_MAD
cd81e915 5541 PL_realtokenstart = -1;
5db06880 5542 if (PL_madskills)
cd81e915 5543 PL_faketokens = 0;
5db06880 5544#endif
60d63348 5545 if (PL_lex_state != LEX_NORMAL ||
62e4c90a
FC
5546 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5547 if (*s == '#' && s == PL_linestart && PL_in_eval
60d63348 5548 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5549 /* handle eval qq[#line 1 "foo"\n ...] */
5550 CopLINE_dec(PL_curcop);
5551 incline(s);
5552 }
5db06880
NC
5553 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5554 s = SKIPSPACE0(s);
62e4c90a 5555 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5556 incline(s);
5557 }
5558 else {
9c74ccc9 5559 const bool in_comment = *s == '#';
5db06880
NC
5560 d = s;
5561 while (d < PL_bufend && *d != '\n')
5562 d++;
5563 if (d < PL_bufend)
5564 d++;
5565 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5566 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5567 d, PL_bufend);
5db06880
NC
5568#ifdef PERL_MAD
5569 if (PL_madskills)
cd81e915 5570 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5571#endif
5572 s = d;
9c74ccc9
FC
5573 if (in_comment && d == PL_bufend
5574 && PL_lex_state == LEX_INTERPNORMAL
90a536e1 5575 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
9c74ccc9
FC
5576 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5577 else incline(s);
5db06880 5578 }
3280af22 5579 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5580 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5581 start_force(PL_curforce);
5582 NEXTVAL_NEXTTOKE.ival = 0;
5583 force_next(FORMRBRACK);
5584 TOKEN(';');
a687059c 5585 }
378cc40b 5586 }
a687059c 5587 else {
5db06880
NC
5588#ifdef PERL_MAD
5589 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5590 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5591 PL_faketokens = 0;
5db06880
NC
5592 s = SKIPSPACE0(s);
5593 TOKEN(PEG); /* make sure any #! line is accessible */
5594 }
5595 s = SKIPSPACE0(s);
5596 }
5597 else {
5598/* if (PL_madskills && PL_lex_formbrack) { */
5599 d = s;
5600 while (d < PL_bufend && *d != '\n')
5601 d++;
5602 if (d < PL_bufend)
5603 d++;
5604 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5605 Perl_croak(aTHX_ "panic: input overflow");
5606 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5607 if (!PL_thiswhite)
6b29d1f5 5608 PL_thiswhite = newSVpvs("");
5db06880 5609 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5610 sv_setpvs(PL_thiswhite, "");
cd81e915 5611 PL_faketokens = 0;
5db06880 5612 }
cd81e915 5613 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5614 }
5615 s = d;
5616/* }
5617 *s = '\0';
5618 PL_bufend = s; */
5619 }
5620#else
21791330
FC
5621 while (s < PL_bufend && *s != '\n')
5622 s++;
5623 if (s < PL_bufend)
5624 s++;
5625 else if (s > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5626 Perl_croak(aTHX_ "panic: input overflow");
5db06880 5627#endif
a687059c 5628 }
378cc40b
LW
5629 goto retry;
5630 case '-':
0eb30aeb 5631 if (s[1] && isALPHA(s[1]) && !isWORDCHAR(s[2])) {
e5edeb50 5632 I32 ftst = 0;
90771dc0 5633 char tmp;
e5edeb50 5634
378cc40b 5635 s++;
3280af22 5636 PL_bufptr = s;
748a9306
LW
5637 tmp = *s++;
5638
bf4acbe4 5639 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5640 s++;
5641
5642 if (strnEQ(s,"=>",2)) {
345b3785 5643 s = force_word(PL_bufptr,WORD,FALSE,FALSE);
931e0695 5644 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5645 OPERATOR('-'); /* unary minus */
5646 }
3280af22 5647 PL_last_uni = PL_oldbufptr;
748a9306 5648 switch (tmp) {
e5edeb50
JH
5649 case 'r': ftst = OP_FTEREAD; break;
5650 case 'w': ftst = OP_FTEWRITE; break;
5651 case 'x': ftst = OP_FTEEXEC; break;
5652 case 'o': ftst = OP_FTEOWNED; break;
5653 case 'R': ftst = OP_FTRREAD; break;
5654 case 'W': ftst = OP_FTRWRITE; break;
5655 case 'X': ftst = OP_FTREXEC; break;
5656 case 'O': ftst = OP_FTROWNED; break;
5657 case 'e': ftst = OP_FTIS; break;
5658 case 'z': ftst = OP_FTZERO; break;
5659 case 's': ftst = OP_FTSIZE; break;
5660 case 'f': ftst = OP_FTFILE; break;
5661 case 'd': ftst = OP_FTDIR; break;
5662 case 'l': ftst = OP_FTLINK; break;
5663 case 'p': ftst = OP_FTPIPE; break;
5664 case 'S': ftst = OP_FTSOCK; break;
5665 case 'u': ftst = OP_FTSUID; break;
5666 case 'g': ftst = OP_FTSGID; break;
5667 case 'k': ftst = OP_FTSVTX; break;
5668 case 'b': ftst = OP_FTBLK; break;
5669 case 'c': ftst = OP_FTCHR; break;
5670 case 't': ftst = OP_FTTTY; break;
5671 case 'T': ftst = OP_FTTEXT; break;
5672 case 'B': ftst = OP_FTBINARY; break;
5673 case 'M': case 'A': case 'C':
fafc274c 5674 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5675 switch (tmp) {
5676 case 'M': ftst = OP_FTMTIME; break;
5677 case 'A': ftst = OP_FTATIME; break;
5678 case 'C': ftst = OP_FTCTIME; break;
5679 default: break;
5680 }
5681 break;
378cc40b 5682 default:
378cc40b
LW
5683 break;
5684 }
e5edeb50 5685 if (ftst) {
eb160463 5686 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5687 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5688 "### Saw file test %c\n", (int)tmp);
5f80b19c 5689 } );
e5edeb50
JH
5690 FTST(ftst);
5691 }
5692 else {
5693 /* Assume it was a minus followed by a one-letter named
5694 * subroutine call (or a -bareword), then. */
95c31fe3 5695 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5696 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5697 (int) tmp);
5f80b19c 5698 } );
3cf7b4c4 5699 s = --PL_bufptr;
e5edeb50 5700 }
378cc40b 5701 }
90771dc0
NC
5702 {
5703 const char tmp = *s++;
5704 if (*s == tmp) {
5705 s++;
5706 if (PL_expect == XOPERATOR)
5707 TERM(POSTDEC);
5708 else
5709 OPERATOR(PREDEC);
5710 }
5711 else if (*s == '>') {
5712 s++;
29595ff2 5713 s = SKIPSPACE1(s);
90771dc0 5714 if (isIDFIRST_lazy_if(s,UTF)) {
345b3785 5715 s = force_word(s,METHOD,FALSE,TRUE);
90771dc0
NC
5716 TOKEN(ARROW);
5717 }
5718 else if (*s == '$')
5719 OPERATOR(ARROW);
5720 else
5721 TERM(ARROW);
5722 }
78cdf107
Z
5723 if (PL_expect == XOPERATOR) {
5724 if (*s == '=' && !PL_lex_allbrackets &&
5725 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5726 s--;
5727 TOKEN(0);
5728 }
90771dc0 5729 Aop(OP_SUBTRACT);
78cdf107 5730 }
90771dc0
NC
5731 else {
5732 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5733 check_uni();
5734 OPERATOR('-'); /* unary minus */
79072805 5735 }
2f3197b3 5736 }
79072805 5737
378cc40b 5738 case '+':
90771dc0
NC
5739 {
5740 const char tmp = *s++;
5741 if (*s == tmp) {
5742 s++;
5743 if (PL_expect == XOPERATOR)
5744 TERM(POSTINC);
5745 else
5746 OPERATOR(PREINC);
5747 }
78cdf107
Z
5748 if (PL_expect == XOPERATOR) {
5749 if (*s == '=' && !PL_lex_allbrackets &&
5750 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5751 s--;
5752 TOKEN(0);
5753 }
90771dc0 5754 Aop(OP_ADD);
78cdf107 5755 }
90771dc0
NC
5756 else {
5757 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5758 check_uni();
5759 OPERATOR('+');
5760 }
2f3197b3 5761 }
a687059c 5762
378cc40b 5763 case '*':
3280af22
NIS
5764 if (PL_expect != XOPERATOR) {
5765 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5766 PL_expect = XOPERATOR;
5767 force_ident(PL_tokenbuf, '*');
5768 if (!*PL_tokenbuf)
a0d0e21e 5769 PREREF('*');
79072805 5770 TERM('*');
a687059c 5771 }
79072805
LW
5772 s++;
5773 if (*s == '*') {
a687059c 5774 s++;
78cdf107
Z
5775 if (*s == '=' && !PL_lex_allbrackets &&
5776 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5777 s -= 2;
5778 TOKEN(0);
5779 }
79072805 5780 PWop(OP_POW);
a687059c 5781 }
78cdf107
Z
5782 if (*s == '=' && !PL_lex_allbrackets &&
5783 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5784 s--;
5785 TOKEN(0);
5786 }
9700e2d3 5787 PL_parser->saw_infix_sigil = 1;
79072805
LW
5788 Mop(OP_MULTIPLY);
5789
378cc40b 5790 case '%':
3280af22 5791 if (PL_expect == XOPERATOR) {
78cdf107
Z
5792 if (s[1] == '=' && !PL_lex_allbrackets &&
5793 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5794 TOKEN(0);
bbce6d69 5795 ++s;
9700e2d3 5796 PL_parser->saw_infix_sigil = 1;
bbce6d69 5797 Mop(OP_MODULO);
a687059c 5798 }
3280af22 5799 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5800 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5801 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5802 if (!PL_tokenbuf[1]) {
bbce6d69 5803 PREREF('%');
a687059c 5804 }
60ac52eb
FC
5805 PL_expect = XOPERATOR;
5806 force_ident_maybe_lex('%');
bbce6d69 5807 TERM('%');
a687059c 5808
378cc40b 5809 case '^':
78cdf107
Z
5810 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5811 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5812 TOKEN(0);
79072805 5813 s++;
a0d0e21e 5814 BOop(OP_BIT_XOR);
79072805 5815 case '[':
a7aaec61
Z
5816 if (PL_lex_brackets > 100)
5817 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5818 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5819 PL_lex_allbrackets++;
df3467db
IG
5820 {
5821 const char tmp = *s++;
5822 OPERATOR(tmp);
5823 }
378cc40b 5824 case '~':
0d863452 5825 if (s[1] == '~'
3e7dd34d 5826 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5827 {
78cdf107
Z
5828 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5829 TOKEN(0);
0d863452 5830 s += 2;
0f539b13
BF
5831 Perl_ck_warner_d(aTHX_
5832 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
5833 "Smartmatch is experimental");
0d863452
RH
5834 Eop(OP_SMARTMATCH);
5835 }
78cdf107
Z
5836 s++;
5837 OPERATOR('~');
378cc40b 5838 case ',':
78cdf107
Z
5839 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5840 TOKEN(0);
5841 s++;
5842 OPERATOR(',');
a0d0e21e
LW
5843 case ':':
5844 if (s[1] == ':') {
5845 len = 0;
0bfa2a8a 5846 goto just_a_word_zero_gv;
a0d0e21e
LW
5847 }
5848 s++;
09bef843
SB
5849 switch (PL_expect) {
5850 OP *attrs;
5db06880
NC
5851#ifdef PERL_MAD
5852 I32 stuffstart;
5853#endif
09bef843
SB
5854 case XOPERATOR:
5855 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5856 break;
5857 PL_bufptr = s; /* update in case we back off */
d83f38d8 5858 if (*s == '=') {
2dc78664
NC
5859 Perl_croak(aTHX_
5860 "Use of := for an empty attribute list is not allowed");
d83f38d8 5861 }
09bef843
SB
5862 goto grabattrs;
5863 case XATTRBLOCK:
5864 PL_expect = XBLOCK;
5865 goto grabattrs;
5866 case XATTRTERM:
5867 PL_expect = XTERMBLOCK;
5868 grabattrs:
5db06880
NC
5869#ifdef PERL_MAD
5870 stuffstart = s - SvPVX(PL_linestr) - 1;
5871#endif
29595ff2 5872 s = PEEKSPACE(s);
5f66b61c 5873 attrs = NULL;
7e2040f0 5874 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5875 I32 tmp;
5cc237b8 5876 SV *sv;
09bef843 5877 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5878 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5879 if (tmp < 0) tmp = -tmp;
5880 switch (tmp) {
5881 case KEY_or:
5882 case KEY_and:
5883 case KEY_for:
11baf631 5884 case KEY_foreach:
f9829d6b
GS
5885 case KEY_unless:
5886 case KEY_if:
5887 case KEY_while:
5888 case KEY_until:
5889 goto got_attrs;
5890 default:
5891 break;
5892 }
5893 }
89a5757c 5894 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843 5895 if (*d == '(') {
4d68ffa0 5896 d = scan_str(d,TRUE,TRUE,FALSE, FALSE);
09bef843 5897 if (!d) {
09bef843
SB
5898 /* MUST advance bufptr here to avoid bogus
5899 "at end of line" context messages from yyerror().
5900 */
5901 PL_bufptr = s + len;
5902 yyerror("Unterminated attribute parameter in attribute list");
5903 if (attrs)
5904 op_free(attrs);
5cc237b8 5905 sv_free(sv);
bbf60fe6 5906 return REPORT(0); /* EOF indicator */
09bef843
SB
5907 }
5908 }
5909 if (PL_lex_stuff) {
09bef843 5910 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5911 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5912 newSVOP(OP_CONST, 0, sv));
5913 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5914 PL_lex_stuff = NULL;
09bef843
SB
5915 }
5916 else {
5cc237b8
BS
5917 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5918 sv_free(sv);
1108974d 5919 if (PL_in_my == KEY_our) {
df9a6019 5920 deprecate(":unique");
1108974d 5921 }
bfed75c6 5922 else
371fce9b
DM
5923 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5924 }
5925
d3cea301
SB
5926 /* NOTE: any CV attrs applied here need to be part of
5927 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5928 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5929 sv_free(sv);
78f9721b 5930 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5931 }
5932 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5933 sv_free(sv);
8e5dadda 5934 deprecate(":locked");
5cc237b8
BS
5935 }
5936 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5937 sv_free(sv);
78f9721b 5938 CvMETHOD_on(PL_compcv);
5cc237b8 5939 }
78f9721b
SM
5940 /* After we've set the flags, it could be argued that
5941 we don't need to do the attributes.pm-based setting
5942 process, and shouldn't bother appending recognized
d3cea301
SB
5943 flags. To experiment with that, uncomment the
5944 following "else". (Note that's already been
5945 uncommented. That keeps the above-applied built-in
5946 attributes from being intercepted (and possibly
5947 rejected) by a package's attribute routines, but is
5948 justified by the performance win for the common case
5949 of applying only built-in attributes.) */
0256094b 5950 else
2fcb4757 5951 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5952 newSVOP(OP_CONST, 0,
5cc237b8 5953 sv));
09bef843 5954 }
29595ff2 5955 s = PEEKSPACE(d);
0120eecf 5956 if (*s == ':' && s[1] != ':')
29595ff2 5957 s = PEEKSPACE(s+1);
0120eecf
GS
5958 else if (s == d)
5959 break; /* require real whitespace or :'s */
29595ff2 5960 /* XXX losing whitespace on sequential attributes here */
09bef843 5961 }
90771dc0
NC
5962 {
5963 const char tmp
5964 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5965 if (*s != ';' && *s != '}' && *s != tmp
5966 && (tmp != '=' || *s != ')')) {
5967 const char q = ((*s == '\'') ? '"' : '\'');
5968 /* If here for an expression, and parsed no attrs, back
5969 off. */
5970 if (tmp == '=' && !attrs) {
5971 s = PL_bufptr;
5972 break;
5973 }
5974 /* MUST advance bufptr here to avoid bogus "at end of line"
5975 context messages from yyerror().
5976 */
5977 PL_bufptr = s;
10edeb5d
JH
5978 yyerror( (const char *)
5979 (*s
5980 ? Perl_form(aTHX_ "Invalid separator character "
5981 "%c%c%c in attribute list", q, *s, q)
5982 : "Unterminated attribute list" ) );
90771dc0
NC
5983 if (attrs)
5984 op_free(attrs);
5985 OPERATOR(':');
09bef843 5986 }
09bef843 5987 }
f9829d6b 5988 got_attrs:
09bef843 5989 if (attrs) {
cd81e915 5990 start_force(PL_curforce);
9ded7720 5991 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5992 CURMAD('_', PL_nextwhite);
89122651 5993 force_next(THING);
5db06880
NC
5994 }
5995#ifdef PERL_MAD
5996 if (PL_madskills) {
cd81e915 5997 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5998 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5999 }
5db06880 6000#endif
09bef843
SB
6001 TOKEN(COLONATTR);
6002 }
78cdf107
Z
6003 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
6004 s--;
6005 TOKEN(0);
6006 }
6007 PL_lex_allbrackets--;
a0d0e21e 6008 OPERATOR(':');
8990e307
LW
6009 case '(':
6010 s++;
3280af22
NIS
6011 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
6012 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 6013 else
3280af22 6014 PL_expect = XTERM;
29595ff2 6015 s = SKIPSPACE1(s);
78cdf107 6016 PL_lex_allbrackets++;
a0d0e21e 6017 TOKEN('(');
378cc40b 6018 case ';':
78cdf107
Z
6019 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
6020 TOKEN(0);
f4dd75d9 6021 CLINE;
78cdf107
Z
6022 s++;
6023 OPERATOR(';');
378cc40b 6024 case ')':
78cdf107
Z
6025 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
6026 TOKEN(0);
6027 s++;
6028 PL_lex_allbrackets--;
6029 s = SKIPSPACE1(s);
6030 if (*s == '{')
6031 PREBLOCK(')');
6032 TERM(')');
79072805 6033 case ']':
a7aaec61
Z
6034 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6035 TOKEN(0);
79072805 6036 s++;
3280af22 6037 if (PL_lex_brackets <= 0)
d98d5fff 6038 yyerror("Unmatched right square bracket");
463ee0b2 6039 else
3280af22 6040 --PL_lex_brackets;
78cdf107 6041 PL_lex_allbrackets--;
3280af22
NIS
6042 if (PL_lex_state == LEX_INTERPNORMAL) {
6043 if (PL_lex_brackets == 0) {
02255c60
FC
6044 if (*s == '-' && s[1] == '>')
6045 PL_lex_state = LEX_INTERPENDMAYBE;
6046 else if (*s != '[' && *s != '{')
3280af22 6047 PL_lex_state = LEX_INTERPEND;
79072805
LW
6048 }
6049 }
4633a7c4 6050 TERM(']');
79072805 6051 case '{':
79072805 6052 s++;
eaf6a13d 6053 leftbracket:
3280af22 6054 if (PL_lex_brackets > 100) {
8edd5f42 6055 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 6056 }
3280af22 6057 switch (PL_expect) {
a0d0e21e 6058 case XTERM:
819b004e 6059 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 6060 PL_lex_allbrackets++;
79072805 6061 OPERATOR(HASHBRACK);
a0d0e21e 6062 case XOPERATOR:
bf4acbe4 6063 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 6064 s++;
44a8e56a 6065 d = s;
3280af22
NIS
6066 PL_tokenbuf[0] = '\0';
6067 if (d < PL_bufend && *d == '-') {
6068 PL_tokenbuf[0] = '-';
44a8e56a 6069 d++;
bf4acbe4 6070 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 6071 d++;
6072 }
7e2040f0 6073 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 6074 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 6075 FALSE, &len);
bf4acbe4 6076 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
6077 d++;
6078 if (*d == '}') {
f54cb97a 6079 const char minus = (PL_tokenbuf[0] == '-');
345b3785 6080 s = force_word(s + minus, WORD, FALSE, TRUE);
44a8e56a 6081 if (minus)
6082 force_next('-');
748a9306
LW
6083 }
6084 }
6085 /* FALL THROUGH */
09bef843 6086 case XATTRBLOCK:
748a9306 6087 case XBLOCK:
3280af22 6088 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 6089 PL_lex_allbrackets++;
3280af22 6090 PL_expect = XSTATE;
a0d0e21e 6091 break;
09bef843 6092 case XATTRTERM:
a0d0e21e 6093 case XTERMBLOCK:
3280af22 6094 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 6095 PL_lex_allbrackets++;
3280af22 6096 PL_expect = XSTATE;
a0d0e21e
LW
6097 break;
6098 default: {
f54cb97a 6099 const char *t;
3280af22
NIS
6100 if (PL_oldoldbufptr == PL_last_lop)
6101 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 6102 else
3280af22 6103 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 6104 PL_lex_allbrackets++;
29595ff2 6105 s = SKIPSPACE1(s);
8452ff4b
SB
6106 if (*s == '}') {
6107 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
6108 PL_expect = XTERM;
6109 /* This hack is to get the ${} in the message. */
6110 PL_bufptr = s+1;
6111 yyerror("syntax error");
6112 break;
6113 }
a0d0e21e 6114 OPERATOR(HASHBRACK);
8452ff4b 6115 }
b8a4b1be
GS
6116 /* This hack serves to disambiguate a pair of curlies
6117 * as being a block or an anon hash. Normally, expectation
6118 * determines that, but in cases where we're not in a
6119 * position to expect anything in particular (like inside
6120 * eval"") we have to resolve the ambiguity. This code
6121 * covers the case where the first term in the curlies is a
6122 * quoted string. Most other cases need to be explicitly
a0288114 6123 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
6124 * curly in order to force resolution as an anon hash.
6125 *
6126 * XXX should probably propagate the outer expectation
6127 * into eval"" to rely less on this hack, but that could
6128 * potentially break current behavior of eval"".
6129 * GSAR 97-07-21
6130 */
6131 t = s;
6132 if (*s == '\'' || *s == '"' || *s == '`') {
6133 /* common case: get past first string, handling escapes */
3280af22 6134 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
6135 if (*t++ == '\\' && (*t == '\\' || *t == *s))
6136 t++;
6137 t++;
a0d0e21e 6138 }
b8a4b1be 6139 else if (*s == 'q') {
3280af22 6140 if (++t < PL_bufend
0eb30aeb 6141 && (!isWORDCHAR(*t)
3280af22 6142 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0eb30aeb 6143 && !isWORDCHAR(*t))))
0505442f 6144 {
abc667d1 6145 /* skip q//-like construct */
f54cb97a 6146 const char *tmps;
b8a4b1be
GS
6147 char open, close, term;
6148 I32 brackets = 1;
6149
3280af22 6150 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 6151 t++;
abc667d1
DM
6152 /* check for q => */
6153 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
6154 OPERATOR(HASHBRACK);
6155 }
b8a4b1be
GS
6156 term = *t;
6157 open = term;
6158 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
6159 term = tmps[5];
6160 close = term;
6161 if (open == close)
3280af22
NIS
6162 for (t++; t < PL_bufend; t++) {
6163 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 6164 t++;
6d07e5e9 6165 else if (*t == open)
b8a4b1be
GS
6166 break;
6167 }
abc667d1 6168 else {
3280af22
NIS
6169 for (t++; t < PL_bufend; t++) {
6170 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 6171 t++;
6d07e5e9 6172 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
6173 break;
6174 else if (*t == open)
6175 brackets++;
6176 }
abc667d1
DM
6177 }
6178 t++;
b8a4b1be 6179 }
abc667d1
DM
6180 else
6181 /* skip plain q word */
8a2bca12 6182 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
abc667d1 6183 t += UTF8SKIP(t);
a0d0e21e 6184 }
8a2bca12 6185 else if (isWORDCHAR_lazy_if(t,UTF)) {
0505442f 6186 t += UTF8SKIP(t);
8a2bca12 6187 while (t < PL_bufend && isWORDCHAR_lazy_if(t,UTF))
0505442f 6188 t += UTF8SKIP(t);
a0d0e21e 6189 }
3280af22 6190 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 6191 t++;
b8a4b1be
GS
6192 /* if comma follows first term, call it an anon hash */
6193 /* XXX it could be a comma expression with loop modifiers */
3280af22 6194 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 6195 || (*t == '=' && t[1] == '>')))
a0d0e21e 6196 OPERATOR(HASHBRACK);
3280af22 6197 if (PL_expect == XREF)
4e4e412b 6198 PL_expect = XTERM;
a0d0e21e 6199 else {
3280af22
NIS
6200 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6201 PL_expect = XSTATE;
a0d0e21e 6202 }
8990e307 6203 }
a0d0e21e 6204 break;
463ee0b2 6205 }
6154021b 6206 pl_yylval.ival = CopLINE(PL_curcop);
79072805 6207 if (isSPACE(*s) || *s == '#')
3280af22 6208 PL_copline = NOLINE; /* invalidate current command line number */
7c70caa5 6209 TOKEN(formbrack ? '=' : '{');
378cc40b 6210 case '}':
a7aaec61
Z
6211 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6212 TOKEN(0);
79072805
LW
6213 rightbracket:
6214 s++;
3280af22 6215 if (PL_lex_brackets <= 0)
d98d5fff 6216 yyerror("Unmatched right curly bracket");
463ee0b2 6217 else
3280af22 6218 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 6219 PL_lex_allbrackets--;
3280af22
NIS
6220 if (PL_lex_state == LEX_INTERPNORMAL) {
6221 if (PL_lex_brackets == 0) {
9059aa12
LW
6222 if (PL_expect & XFAKEBRACK) {
6223 PL_expect &= XENUMMASK;
3280af22
NIS
6224 PL_lex_state = LEX_INTERPEND;
6225 PL_bufptr = s;
5db06880
NC
6226#if 0
6227 if (PL_madskills) {
cd81e915 6228 if (!PL_thiswhite)
6b29d1f5 6229 PL_thiswhite = newSVpvs("");
76f68e9b 6230 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
6231 }
6232#endif
cea2e8a9 6233 return yylex(); /* ignore fake brackets */
79072805 6234 }
f777953f 6235 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6b00f562
FC
6236 && SvEVALED(PL_lex_repl))
6237 PL_lex_state = LEX_INTERPEND;
6238 else if (*s == '-' && s[1] == '>')
3280af22 6239 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 6240 else if (*s != '[' && *s != '{')
3280af22 6241 PL_lex_state = LEX_INTERPEND;
79072805
LW
6242 }
6243 }
9059aa12
LW
6244 if (PL_expect & XFAKEBRACK) {
6245 PL_expect &= XENUMMASK;
3280af22 6246 PL_bufptr = s;
cea2e8a9 6247 return yylex(); /* ignore fake brackets */
748a9306 6248 }
cd81e915 6249 start_force(PL_curforce);
5db06880
NC
6250 if (PL_madskills) {
6251 curmad('X', newSVpvn(s-1,1));
cd81e915 6252 CURMAD('_', PL_thiswhite);
5db06880 6253 }
7c70caa5 6254 force_next(formbrack ? '.' : '}');
583c9d5c 6255 if (formbrack) LEAVE;
5db06880 6256#ifdef PERL_MAD
1cac5c33 6257 if (PL_madskills && !PL_thistoken)
6b29d1f5 6258 PL_thistoken = newSVpvs("");
5db06880 6259#endif
705fe0e5
FC
6260 if (formbrack == 2) { /* means . where arguments were expected */
6261 start_force(PL_curforce);
6262 force_next(';');
96f9b782 6263 TOKEN(FORMRBRACK);
705fe0e5 6264 }
79072805 6265 TOKEN(';');
378cc40b
LW
6266 case '&':
6267 s++;
78cdf107
Z
6268 if (*s++ == '&') {
6269 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6270 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6271 s -= 2;
6272 TOKEN(0);
6273 }
a0d0e21e 6274 AOPERATOR(ANDAND);
78cdf107 6275 }
378cc40b 6276 s--;
3280af22 6277 if (PL_expect == XOPERATOR) {
041457d9
DM
6278 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6279 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 6280 {
57843af0 6281 CopLINE_dec(PL_curcop);
f1f66076 6282 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6283 CopLINE_inc(PL_curcop);
463ee0b2 6284 }
78cdf107
Z
6285 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6286 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6287 s--;
6288 TOKEN(0);
6289 }
9700e2d3 6290 PL_parser->saw_infix_sigil = 1;
79072805 6291 BAop(OP_BIT_AND);
463ee0b2 6292 }
79072805 6293
c07656ed
FC
6294 PL_tokenbuf[0] = '&';
6295 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6296 sizeof PL_tokenbuf - 1, TRUE);
6297 if (PL_tokenbuf[1]) {
3280af22 6298 PL_expect = XOPERATOR;
60ac52eb 6299 force_ident_maybe_lex('&');
463ee0b2 6300 }
79072805
LW
6301 else
6302 PREREF('&');
6154021b 6303 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
6304 TERM('&');
6305
378cc40b
LW
6306 case '|':
6307 s++;
78cdf107
Z
6308 if (*s++ == '|') {
6309 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6310 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6311 s -= 2;
6312 TOKEN(0);
6313 }
a0d0e21e 6314 AOPERATOR(OROR);
78cdf107 6315 }
378cc40b 6316 s--;
78cdf107
Z
6317 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6318 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6319 s--;
6320 TOKEN(0);
6321 }
79072805 6322 BOop(OP_BIT_OR);
378cc40b
LW
6323 case '=':
6324 s++;
748a9306 6325 {
90771dc0 6326 const char tmp = *s++;
78cdf107
Z
6327 if (tmp == '=') {
6328 if (!PL_lex_allbrackets &&
6329 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6330 s -= 2;
6331 TOKEN(0);
6332 }
90771dc0 6333 Eop(OP_EQ);
78cdf107
Z
6334 }
6335 if (tmp == '>') {
6336 if (!PL_lex_allbrackets &&
6337 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6338 s -= 2;
6339 TOKEN(0);
6340 }
90771dc0 6341 OPERATOR(',');
78cdf107 6342 }
90771dc0
NC
6343 if (tmp == '~')
6344 PMop(OP_MATCH);
6345 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6346 && strchr("+-*/%.^&|<",tmp))
6347 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6348 "Reversed %c= operator",(int)tmp);
6349 s--;
6350 if (PL_expect == XSTATE && isALPHA(tmp) &&
6351 (s == PL_linestart+1 || s[-2] == '\n') )
6352 {
62e4c90a 6353 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
4a7239ff 6354 || PL_lex_state != LEX_NORMAL) {
90771dc0
NC
6355 d = PL_bufend;
6356 while (s < d) {
6357 if (*s++ == '\n') {
6358 incline(s);
6359 if (strnEQ(s,"=cut",4)) {
6360 s = strchr(s,'\n');
6361 if (s)
6362 s++;
6363 else
6364 s = d;
6365 incline(s);
6366 goto retry;
6367 }
6368 }
a5f75d66 6369 }
90771dc0 6370 goto retry;
a5f75d66 6371 }
5db06880
NC
6372#ifdef PERL_MAD
6373 if (PL_madskills) {
cd81e915 6374 if (!PL_thiswhite)
6b29d1f5 6375 PL_thiswhite = newSVpvs("");
cd81e915 6376 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
6377 PL_bufend - PL_linestart);
6378 }
6379#endif
90771dc0 6380 s = PL_bufend;
737c24fc 6381 PL_parser->in_pod = 1;
90771dc0 6382 goto retry;
a5f75d66 6383 }
a0d0e21e 6384 }
64a40898 6385 if (PL_expect == XBLOCK) {
c35e046a 6386 const char *t = s;
51882d45 6387#ifdef PERL_STRICT_CR
c35e046a 6388 while (SPACE_OR_TAB(*t))
51882d45 6389#else
c35e046a 6390 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 6391#endif
c35e046a 6392 t++;
a0d0e21e 6393 if (*t == '\n' || *t == '#') {
705fe0e5 6394 formbrack = 1;
583c9d5c
FC
6395 ENTER;
6396 SAVEI8(PL_parser->form_lex_state);
64a40898 6397 SAVEI32(PL_lex_formbrack);
583c9d5c 6398 PL_parser->form_lex_state = PL_lex_state;
64a40898 6399 PL_lex_formbrack = PL_lex_brackets + 1;
a0d0e21e
LW
6400 goto leftbracket;
6401 }
79072805 6402 }
78cdf107
Z
6403 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6404 s--;
6405 TOKEN(0);
6406 }
6154021b 6407 pl_yylval.ival = 0;
a0d0e21e 6408 OPERATOR(ASSIGNOP);
378cc40b
LW
6409 case '!':
6410 s++;
90771dc0
NC
6411 {
6412 const char tmp = *s++;
6413 if (tmp == '=') {
6414 /* was this !=~ where !~ was meant?
6415 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6416
6417 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6418 const char *t = s+1;
6419
6420 while (t < PL_bufend && isSPACE(*t))
6421 ++t;
6422
6423 if (*t == '/' || *t == '?' ||
6424 ((*t == 'm' || *t == 's' || *t == 'y')
0eb30aeb
KW
6425 && !isWORDCHAR(t[1])) ||
6426 (*t == 't' && t[1] == 'r' && !isWORDCHAR(t[2])))
90771dc0
NC
6427 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6428 "!=~ should be !~");
6429 }
78cdf107
Z
6430 if (!PL_lex_allbrackets &&
6431 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6432 s -= 2;
6433 TOKEN(0);
6434 }
90771dc0
NC
6435 Eop(OP_NE);
6436 }
6437 if (tmp == '~')
6438 PMop(OP_NOT);
6439 }
378cc40b
LW
6440 s--;
6441 OPERATOR('!');
6442 case '<':
3280af22 6443 if (PL_expect != XOPERATOR) {
93a17b20 6444 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6445 check_uni();
79072805
LW
6446 if (s[1] == '<')
6447 s = scan_heredoc(s);
6448 else
6449 s = scan_inputsymbol(s);
78a635de
FC
6450 PL_expect = XOPERATOR;
6451 TOKEN(sublex_start());
378cc40b
LW
6452 }
6453 s++;
90771dc0
NC
6454 {
6455 char tmp = *s++;
78cdf107
Z
6456 if (tmp == '<') {
6457 if (*s == '=' && !PL_lex_allbrackets &&
6458 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6459 s -= 2;
6460 TOKEN(0);
6461 }
90771dc0 6462 SHop(OP_LEFT_SHIFT);
78cdf107 6463 }
90771dc0
NC
6464 if (tmp == '=') {
6465 tmp = *s++;
78cdf107
Z
6466 if (tmp == '>') {
6467 if (!PL_lex_allbrackets &&
6468 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6469 s -= 3;
6470 TOKEN(0);
6471 }
90771dc0 6472 Eop(OP_NCMP);
78cdf107 6473 }
90771dc0 6474 s--;
78cdf107
Z
6475 if (!PL_lex_allbrackets &&
6476 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6477 s -= 2;
6478 TOKEN(0);
6479 }
90771dc0
NC
6480 Rop(OP_LE);
6481 }
395c3793 6482 }
378cc40b 6483 s--;
78cdf107
Z
6484 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6485 s--;
6486 TOKEN(0);
6487 }
79072805 6488 Rop(OP_LT);
378cc40b
LW
6489 case '>':
6490 s++;
90771dc0
NC
6491 {
6492 const char tmp = *s++;
78cdf107
Z
6493 if (tmp == '>') {
6494 if (*s == '=' && !PL_lex_allbrackets &&
6495 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6496 s -= 2;
6497 TOKEN(0);
6498 }
90771dc0 6499 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6500 }
6501 else if (tmp == '=') {
6502 if (!PL_lex_allbrackets &&
6503 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6504 s -= 2;
6505 TOKEN(0);
6506 }
90771dc0 6507 Rop(OP_GE);
78cdf107 6508 }
90771dc0 6509 }
378cc40b 6510 s--;
78cdf107
Z
6511 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6512 s--;
6513 TOKEN(0);
6514 }
79072805 6515 Rop(OP_GT);
378cc40b
LW
6516
6517 case '$':
bbce6d69 6518 CLINE;
6519
3280af22
NIS
6520 if (PL_expect == XOPERATOR) {
6521 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6522 return deprecate_commaless_var_list();
a0d0e21e 6523 }
8990e307 6524 }
a0d0e21e 6525
c0b977fd 6526 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6527 PL_tokenbuf[0] = '@';
376b8730
SM
6528 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6529 sizeof PL_tokenbuf - 1, FALSE);
6530 if (PL_expect == XOPERATOR)
6531 no_op("Array length", s);
3280af22 6532 if (!PL_tokenbuf[1])
a0d0e21e 6533 PREREF(DOLSHARP);
3280af22 6534 PL_expect = XOPERATOR;
60ac52eb 6535 force_ident_maybe_lex('#');
463ee0b2 6536 TOKEN(DOLSHARP);
79072805 6537 }
bbce6d69 6538
3280af22 6539 PL_tokenbuf[0] = '$';
376b8730
SM
6540 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6541 sizeof PL_tokenbuf - 1, FALSE);
6542 if (PL_expect == XOPERATOR)
6543 no_op("Scalar", s);
3280af22
NIS
6544 if (!PL_tokenbuf[1]) {
6545 if (s == PL_bufend)
bbce6d69 6546 yyerror("Final $ should be \\$ or $name");
6547 PREREF('$');
8990e307 6548 }
a0d0e21e 6549
ff68c719 6550 d = s;
90771dc0
NC
6551 {
6552 const char tmp = *s;
ae28bb2a 6553 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6554 s = SKIPSPACE1(s);
ff68c719 6555
90771dc0
NC
6556 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6557 && intuit_more(s)) {
6558 if (*s == '[') {
6559 PL_tokenbuf[0] = '@';
6560 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6561 char *t = s+1;
6562
8a2bca12 6563 while (isSPACE(*t) || isWORDCHAR_lazy_if(t,UTF) || *t == '$')
c35e046a 6564 t++;
90771dc0 6565 if (*t++ == ',') {
29595ff2 6566 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6567 while (t < PL_bufend && *t != ']')
6568 t++;
9014280d 6569 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6570 "Multidimensional syntax %.*s not supported",
36c7798d 6571 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6572 }
748a9306 6573 }
93a17b20 6574 }
90771dc0
NC
6575 else if (*s == '{') {
6576 char *t;
6577 PL_tokenbuf[0] = '%';
6578 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6579 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6580 {
6581 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6582 do {
6583 t++;
6584 } while (isSPACE(*t));
90771dc0 6585 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6586 STRLEN len;
90771dc0 6587 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6588 &len);
c35e046a
AL
6589 while (isSPACE(*t))
6590 t++;
4c01a014
BF
6591 if (*t == ';'
6592 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
90771dc0 6593 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b17a0679
FC
6594 "You need to quote \"%"UTF8f"\"",
6595 UTF8fARG(UTF, len, tmpbuf));
90771dc0
NC
6596 }
6597 }
6598 }
93a17b20 6599 }
bbce6d69 6600
90771dc0
NC
6601 PL_expect = XOPERATOR;
6602 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6603 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6604 if (!islop || PL_last_lop_op == OP_GREPSTART)
6605 PL_expect = XOPERATOR;
6606 else if (strchr("$@\"'`q", *s))
6607 PL_expect = XTERM; /* e.g. print $fh "foo" */
6608 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6609 PL_expect = XTERM; /* e.g. print $fh &sub */
6610 else if (isIDFIRST_lazy_if(s,UTF)) {
6611 char tmpbuf[sizeof PL_tokenbuf];
6612 int t2;
6613 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6614 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6615 /* binary operators exclude handle interpretations */
6616 switch (t2) {
6617 case -KEY_x:
6618 case -KEY_eq:
6619 case -KEY_ne:
6620 case -KEY_gt:
6621 case -KEY_lt:
6622 case -KEY_ge:
6623 case -KEY_le:
6624 case -KEY_cmp:
6625 break;
6626 default:
6627 PL_expect = XTERM; /* e.g. print $fh length() */
6628 break;
6629 }
6630 }
6631 else {
6632 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6633 }
6634 }
90771dc0
NC
6635 else if (isDIGIT(*s))
6636 PL_expect = XTERM; /* e.g. print $fh 3 */
6637 else if (*s == '.' && isDIGIT(s[1]))
6638 PL_expect = XTERM; /* e.g. print $fh .3 */
6639 else if ((*s == '?' || *s == '-' || *s == '+')
6640 && !isSPACE(s[1]) && s[1] != '=')
6641 PL_expect = XTERM; /* e.g. print $fh -1 */
6642 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6643 && s[1] != '/')
6644 PL_expect = XTERM; /* e.g. print $fh /.../
6645 XXX except DORDOR operator
6646 */
6647 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6648 && s[2] != '=')
6649 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6650 }
bbce6d69 6651 }
60ac52eb 6652 force_ident_maybe_lex('$');
79072805 6653 TOKEN('$');
378cc40b
LW
6654
6655 case '@':
3280af22 6656 if (PL_expect == XOPERATOR)
bbce6d69 6657 no_op("Array", s);
3280af22
NIS
6658 PL_tokenbuf[0] = '@';
6659 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6660 if (!PL_tokenbuf[1]) {
bbce6d69 6661 PREREF('@');
6662 }
3280af22 6663 if (PL_lex_state == LEX_NORMAL)
29595ff2 6664 s = SKIPSPACE1(s);
3280af22 6665 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6666 if (*s == '{')
3280af22 6667 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6668
6669 /* Warn about @ where they meant $. */
041457d9
DM
6670 if (*s == '[' || *s == '{') {
6671 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6672 const char *t = s + 1;
8a2bca12 6673 while (*t && (isWORDCHAR_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
b9e186cd 6674 t += UTF ? UTF8SKIP(t) : 1;
a0d0e21e
LW
6675 if (*t == '}' || *t == ']') {
6676 t++;
29595ff2 6677 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6678 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6679 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b17a0679
FC
6680 "Scalar value %"UTF8f" better written as $%"UTF8f,
6681 UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
6682 UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
a0d0e21e 6683 }
93a17b20
LW
6684 }
6685 }
463ee0b2 6686 }
60ac52eb
FC
6687 PL_expect = XOPERATOR;
6688 force_ident_maybe_lex('@');
79072805 6689 TERM('@');
378cc40b 6690
c963b151 6691 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6692 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6693 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6694 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6695 TOKEN(0);
6f33ba73
RGS
6696 s += 2;
6697 AOPERATOR(DORDOR);
6698 }
c963b151 6699 case '?': /* may either be conditional or pattern */
be25f609 6700 if (PL_expect == XOPERATOR) {
90771dc0 6701 char tmp = *s++;
c963b151 6702 if(tmp == '?') {
78cdf107
Z
6703 if (!PL_lex_allbrackets &&
6704 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6705 s--;
6706 TOKEN(0);
6707 }
6708 PL_lex_allbrackets++;
be25f609 6709 OPERATOR('?');
c963b151
BD
6710 }
6711 else {
6712 tmp = *s++;
6713 if(tmp == '/') {
6714 /* A // operator. */
78cdf107
Z
6715 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6716 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6717 LEX_FAKEEOF_LOGIC)) {
6718 s -= 2;
6719 TOKEN(0);
6720 }
c963b151
BD
6721 AOPERATOR(DORDOR);
6722 }
6723 else {
6724 s--;
78cdf107
Z
6725 if (*s == '=' && !PL_lex_allbrackets &&
6726 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6727 s--;
6728 TOKEN(0);
6729 }
c963b151
BD
6730 Mop(OP_DIVIDE);
6731 }
6732 }
6733 }
6734 else {
6735 /* Disable warning on "study /blah/" */
6736 if (PL_oldoldbufptr == PL_last_uni
6737 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6738 || memNE(PL_last_uni, "study", 5)
8a2bca12 6739 || isWORDCHAR_lazy_if(PL_last_uni+5,UTF)
c963b151
BD
6740 ))
6741 check_uni();
725a61d7
Z
6742 if (*s == '?')
6743 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6744 s = scan_pat(s,OP_MATCH);
6745 TERM(sublex_start());
6746 }
378cc40b
LW
6747
6748 case '.':
51882d45
GS
6749 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6750#ifdef PERL_STRICT_CR
6751 && s[1] == '\n'
6752#else
6753 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6754#endif
6755 && (s == PL_linestart || s[-1] == '\n') )
6756 {
3280af22 6757 PL_expect = XSTATE;
705fe0e5 6758 formbrack = 2; /* dot seen where arguments expected */
79072805
LW
6759 goto rightbracket;
6760 }
be25f609 6761 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6762 s += 3;
6763 OPERATOR(YADAYADA);
6764 }
3280af22 6765 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6766 char tmp = *s++;
a687059c 6767 if (*s == tmp) {
78cdf107
Z
6768 if (!PL_lex_allbrackets &&
6769 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6770 s--;
6771 TOKEN(0);
6772 }
a687059c 6773 s++;
2f3197b3
LW
6774 if (*s == tmp) {
6775 s++;
6154021b 6776 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6777 }
6778 else
6154021b 6779 pl_yylval.ival = 0;
378cc40b 6780 OPERATOR(DOTDOT);
a687059c 6781 }
78cdf107
Z
6782 if (*s == '=' && !PL_lex_allbrackets &&
6783 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6784 s--;
6785 TOKEN(0);
6786 }
79072805 6787 Aop(OP_CONCAT);
378cc40b
LW
6788 }
6789 /* FALL THROUGH */
6790 case '0': case '1': case '2': case '3': case '4':
6791 case '5': case '6': case '7': case '8': case '9':
6154021b 6792 s = scan_num(s, &pl_yylval);
931e0695 6793 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6794 if (PL_expect == XOPERATOR)
8990e307 6795 no_op("Number",s);
79072805
LW
6796 TERM(THING);
6797
6798 case '\'':
4d68ffa0 6799 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6800 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6801 if (PL_expect == XOPERATOR) {
6802 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6803 return deprecate_commaless_var_list();
a0d0e21e 6804 }
463ee0b2 6805 else
8990e307 6806 no_op("String",s);
463ee0b2 6807 }
79072805 6808 if (!s)
d4c19fe8 6809 missingterm(NULL);
6154021b 6810 pl_yylval.ival = OP_CONST;
79072805
LW
6811 TERM(sublex_start());
6812
6813 case '"':
4d68ffa0 6814 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6815 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6816 if (PL_expect == XOPERATOR) {
6817 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6818 return deprecate_commaless_var_list();
a0d0e21e 6819 }
463ee0b2 6820 else
8990e307 6821 no_op("String",s);
463ee0b2 6822 }
79072805 6823 if (!s)
d4c19fe8 6824 missingterm(NULL);
6154021b 6825 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6826 /* FIXME. I think that this can be const if char *d is replaced by
6827 more localised variables. */
3280af22 6828 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6829 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6830 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6831 break;
6832 }
6833 }
79072805
LW
6834 TERM(sublex_start());
6835
6836 case '`':
4d68ffa0 6837 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
931e0695 6838 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6839 if (PL_expect == XOPERATOR)
8990e307 6840 no_op("Backticks",s);
79072805 6841 if (!s)
d4c19fe8 6842 missingterm(NULL);
9b201d7d 6843 readpipe_override();
79072805
LW
6844 TERM(sublex_start());
6845
6846 case '\\':
6847 s++;
a2a5de95
NC
6848 if (PL_lex_inwhat && isDIGIT(*s))
6849 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6850 *s, *s);
3280af22 6851 if (PL_expect == XOPERATOR)
8990e307 6852 no_op("Backslash",s);
79072805
LW
6853 OPERATOR(REFGEN);
6854
a7cb1f99 6855 case 'v':
e526c9e6 6856 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6857 char *start = s + 2;
dd629d5b 6858 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6859 start++;
6860 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6861 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6862 TERM(THING);
6863 }
e9d2327d
FC
6864 else if ((*start == ':' && start[1] == ':')
6865 || (PL_expect == XSTATE && *start == ':'))
6866 goto keylookup;
6867 else if (PL_expect == XSTATE) {
6868 d = start;
6869 while (d < PL_bufend && isSPACE(*d)) d++;
6870 if (*d == ':') goto keylookup;
6871 }
e526c9e6 6872 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
e9d2327d 6873 if (!isALPHA(*start) && (PL_expect == XTERM
6f33ba73
RGS
6874 || PL_expect == XREF || PL_expect == XSTATE
6875 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6876 GV *const gv = gv_fetchpvn_flags(s, start - s,
6877 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6878 if (!gv) {
6154021b 6879 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6880 TERM(THING);
6881 }
6882 }
a7cb1f99
GS
6883 }
6884 goto keylookup;
79072805 6885 case 'x':
3280af22 6886 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6887 s++;
6888 Mop(OP_REPEAT);
2f3197b3 6889 }
79072805
LW
6890 goto keylookup;
6891
378cc40b 6892 case '_':
79072805
LW
6893 case 'a': case 'A':
6894 case 'b': case 'B':
6895 case 'c': case 'C':
6896 case 'd': case 'D':
6897 case 'e': case 'E':
6898 case 'f': case 'F':
6899 case 'g': case 'G':
6900 case 'h': case 'H':
6901 case 'i': case 'I':
6902 case 'j': case 'J':
6903 case 'k': case 'K':
6904 case 'l': case 'L':
6905 case 'm': case 'M':
6906 case 'n': case 'N':
6907 case 'o': case 'O':
6908 case 'p': case 'P':
6909 case 'q': case 'Q':
6910 case 'r': case 'R':
6911 case 's': case 'S':
6912 case 't': case 'T':
6913 case 'u': case 'U':
a7cb1f99 6914 case 'V':
79072805
LW
6915 case 'w': case 'W':
6916 case 'X':
6917 case 'y': case 'Y':
6918 case 'z': case 'Z':
6919
49dc05e3 6920 keylookup: {
88e1f1a2 6921 bool anydelim;
18f70389 6922 bool lex;
90771dc0 6923 I32 tmp;
18f70389 6924 SV *sv;
73f3e228
FC
6925 CV *cv;
6926 PADOFFSET off;
6927 OP *rv2cv_op;
10edeb5d 6928
18f70389 6929 lex = FALSE;
10edeb5d 6930 orig_keyword = 0;
73f3e228 6931 off = 0;
18f70389 6932 sv = NULL;
73f3e228 6933 cv = NULL;
10edeb5d
JH
6934 gv = NULL;
6935 gvp = NULL;
73f3e228 6936 rv2cv_op = NULL;
49dc05e3 6937
3280af22
NIS
6938 PL_bufptr = s;
6939 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6940
6941 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6942 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6943
6944 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6945 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6946 goto just_a_word;
6947
3643fb5f 6948 d = s;
3280af22 6949 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6950 d++; /* no comments skipped here, or s### is misparsed */
6951
748a9306 6952 /* Is this a word before a => operator? */
1c3923b3 6953 if (*d == '=' && d[1] == '>') {
21791330 6954 fat_arrow:
748a9306 6955 CLINE;
6154021b 6956 pl_yylval.opval
d0a148a6
NC
6957 = (OP*)newSVOP(OP_CONST, 0,
6958 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6959 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6960 TERM(WORD);
6961 }
6962
88e1f1a2
JV
6963 /* Check for plugged-in keyword */
6964 {
6965 OP *o;
6966 int result;
6967 char *saved_bufptr = PL_bufptr;
6968 PL_bufptr = s;
16c91539 6969 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6970 s = PL_bufptr;
6971 if (result == KEYWORD_PLUGIN_DECLINE) {
6972 /* not a plugged-in keyword */
6973 PL_bufptr = saved_bufptr;
6974 } else if (result == KEYWORD_PLUGIN_STMT) {
6975 pl_yylval.opval = o;
6976 CLINE;
6977 PL_expect = XSTATE;
6978 return REPORT(PLUGSTMT);
6979 } else if (result == KEYWORD_PLUGIN_EXPR) {
6980 pl_yylval.opval = o;
6981 CLINE;
6982 PL_expect = XOPERATOR;
6983 return REPORT(PLUGEXPR);
6984 } else {
6985 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6986 PL_tokenbuf);
6987 }
6988 }
6989
6990 /* Check for built-in keyword */
6991 tmp = keyword(PL_tokenbuf, len, 0);
6992
6993 /* Is this a label? */
6994 if (!anydelim && PL_expect == XSTATE
6995 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2 6996 s = d + 1;
5504e6cf
FC
6997 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6998 pl_yylval.pval[len] = '\0';
6999 pl_yylval.pval[len+1] = UTF ? 1 : 0;
88e1f1a2
JV
7000 CLINE;
7001 TOKEN(LABEL);
7002 }
7003
18f70389
FC
7004 /* Check for lexical sub */
7005 if (PL_expect != XOPERATOR) {
7006 char tmpbuf[sizeof PL_tokenbuf + 1];
18f70389
FC
7007 *tmpbuf = '&';
7008 Copy(PL_tokenbuf, tmpbuf+1, len, char);
7009 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
7010 if (off != NOT_IN_PAD) {
73f3e228 7011 assert(off); /* we assume this is boolean-true below */
18f70389
FC
7012 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
7013 HV * const stash = PAD_COMPNAME_OURSTASH(off);
7014 HEK * const stashname = HvNAME_HEK(stash);
7015 sv = newSVhek(stashname);
7016 sv_catpvs(sv, "::");
7017 sv_catpvn_flags(sv, PL_tokenbuf, len,
7018 (UTF ? SV_CATUTF8 : SV_CATBYTES));
7019 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
7020 SVt_PVCV);
73f3e228 7021 off = 0;
89e006ae
PM
7022 if (!gv) {
7023 sv_free(sv);
7024 sv = NULL;
7025 goto just_a_word;
7026 }
18f70389 7027 }
73f3e228
FC
7028 else {
7029 rv2cv_op = newOP(OP_PADANY, 0);
7030 rv2cv_op->op_targ = off;
9a5e6f3c 7031 cv = find_lexical_cv(off);
73f3e228
FC
7032 }
7033 lex = TRUE;
7034 goto just_a_word;
18f70389 7035 }
73f3e228 7036 off = 0;
18f70389
FC
7037 }
7038
a0d0e21e 7039 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
7040 GV *ogv = NULL; /* override (winner) */
7041 GV *hgv = NULL; /* hidden (loser) */
3280af22 7042 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 7043 CV *cv;
af9f5953
BF
7044 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
7045 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
7046 (cv = GvCVu(gv)))
7047 {
7048 if (GvIMPORTED_CV(gv))
7049 ogv = gv;
7050 else if (! CvMETHOD(cv))
7051 hgv = gv;
7052 }
7053 if (!ogv &&
af9f5953 7054 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 7055 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 7056 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
7057 GvCVu(gv) && GvIMPORTED_CV(gv))
7058 {
7059 ogv = gv;
7060 }
7061 }
7062 if (ogv) {
30fe34ed 7063 orig_keyword = tmp;
56f7f34b 7064 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
7065 }
7066 else if (gv && !gvp
7067 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 7068 && GvCVu(gv))
6e7b2336
GS
7069 {
7070 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 7071 }
56f7f34b
CS
7072 else { /* no override */
7073 tmp = -tmp;
a2a5de95
NC
7074 if (tmp == KEY_dump) {
7075 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
7076 "dump() better written as CORE::dump()");
ac206dc8 7077 }
a0714e2c 7078 gv = NULL;
56f7f34b 7079 gvp = 0;
a2a5de95
NC
7080 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
7081 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
7082 "Ambiguous call resolved as CORE::%s(), "
7083 "qualify as such or use &",
7084 GvENAME(hgv));
49dc05e3 7085 }
a0d0e21e
LW
7086 }
7087
21791330
FC
7088 if (tmp && tmp != KEY___DATA__ && tmp != KEY___END__
7089 && (!anydelim || *s != '#')) {
7090 /* no override, and not s### either; skipspace is safe here
7091 * check for => on following line */
7092 STRLEN bufoff = PL_bufptr - SvPVX(PL_linestr);
7093 STRLEN soff = s - SvPVX(PL_linestr);
7094 s = skipspace_flags(s, LEX_NO_INCLINE);
7095 if (*s == '=' && s[1] == '>') goto fat_arrow;
7096 PL_bufptr = SvPVX(PL_linestr) + bufoff;
7097 s = SvPVX(PL_linestr) + soff;
7098 }
7099
a0d0e21e
LW
7100 reserved_word:
7101 switch (tmp) {
79072805
LW
7102
7103 default: /* not a keyword */
0bfa2a8a
NC
7104 /* Trade off - by using this evil construction we can pull the
7105 variable gv into the block labelled keylookup. If not, then
7106 we have to give it function scope so that the goto from the
7107 earlier ':' case doesn't bypass the initialisation. */
7108 if (0) {
7109 just_a_word_zero_gv:
73f3e228
FC
7110 sv = NULL;
7111 cv = NULL;
0bfa2a8a
NC
7112 gv = NULL;
7113 gvp = NULL;
73f3e228 7114 rv2cv_op = NULL;
8bee0991 7115 orig_keyword = 0;
18f70389
FC
7116 lex = 0;
7117 off = 0;
0bfa2a8a 7118 }
93a17b20 7119 just_a_word: {
ce29ac45 7120 int pkgname = 0;
f54cb97a 7121 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
898c3bca
FC
7122 const char penultchar =
7123 lastchar && PL_bufptr - 2 >= PL_linestart
7124 ? PL_bufptr[-2]
7125 : 0;
5db06880 7126#ifdef PERL_MAD
cd81e915 7127 SV *nextPL_nextwhite = 0;
5db06880
NC
7128#endif
7129
8990e307
LW
7130
7131 /* Get the rest if it looks like a package qualifier */
7132
155aba94 7133 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 7134 STRLEN morelen;
3280af22 7135 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
7136 TRUE, &morelen);
7137 if (!morelen)
b17a0679
FC
7138 Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
7139 UTF8fARG(UTF, len, PL_tokenbuf),
ec2ab091 7140 *s == '\'' ? "'" : "::");
c3e0f903 7141 len += morelen;
ce29ac45 7142 pkgname = 1;
a0d0e21e 7143 }
8990e307 7144
3280af22
NIS
7145 if (PL_expect == XOPERATOR) {
7146 if (PL_bufptr == PL_linestart) {
57843af0 7147 CopLINE_dec(PL_curcop);
f1f66076 7148 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 7149 CopLINE_inc(PL_curcop);
463ee0b2
LW
7150 }
7151 else
54310121 7152 no_op("Bareword",s);
463ee0b2 7153 }
8990e307 7154
c3e0f903 7155 /* Look for a subroutine with this name in current package,
73f3e228
FC
7156 unless this is a lexical sub, or name is "Foo::",
7157 in which case Foo is a bareword
c3e0f903
GS
7158 (and a package name). */
7159
5db06880 7160 if (len > 2 && !PL_madskills &&
3280af22 7161 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 7162 {
f776e3cd 7163 if (ckWARN(WARN_BAREWORD)
af9f5953 7164 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 7165 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
b17a0679
FC
7166 "Bareword \"%"UTF8f"\" refers to nonexistent package",
7167 UTF8fARG(UTF, len, PL_tokenbuf));
c3e0f903 7168 len -= 2;
3280af22 7169 PL_tokenbuf[len] = '\0';
a0714e2c 7170 gv = NULL;
c3e0f903
GS
7171 gvp = 0;
7172 }
7173 else {
73f3e228 7174 if (!lex && !gv) {
62d55b22
NC
7175 /* Mustn't actually add anything to a symbol table.
7176 But also don't want to "initialise" any placeholder
7177 constants that might already be there into full
7178 blown PVGVs with attached PVCV. */
90e5519e 7179 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
7180 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7181 SVt_PVCV);
62d55b22 7182 }
b3d904f3 7183 len = 0;
c3e0f903
GS
7184 }
7185
7186 /* if we saw a global override before, get the right name */
8990e307 7187
73f3e228 7188 if (!sv)
18f70389 7189 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
37bb7629 7190 len ? len : strlen(PL_tokenbuf));
49dc05e3 7191 if (gvp) {
37bb7629 7192 SV * const tmp_sv = sv;
396482e1 7193 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
7194 sv_catsv(sv, tmp_sv);
7195 SvREFCNT_dec(tmp_sv);
8a7a129d 7196 }
37bb7629 7197
5db06880 7198#ifdef PERL_MAD
cd81e915
NC
7199 if (PL_madskills && !PL_thistoken) {
7200 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 7201 PL_thistoken = newSVpvn(start,s - start);
cd81e915 7202 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
7203 }
7204#endif
8990e307 7205
a0d0e21e 7206 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 7207 CLINE;
6154021b
RGS
7208 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7209 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 7210
c3e0f903 7211 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
7212 if (len)
7213 goto safe_bareword;
7214
73f3e228 7215 if (!off)
f7461760 7216 {
d8ebba9f 7217 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
7218 const_op->op_private = OPpCONST_BARE;
7219 rv2cv_op = newCVREF(0, const_op);
73f3e228 7220 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
f7461760 7221 }
5069cc75 7222
8990e307
LW
7223 /* See if it's the indirect object for a list operator. */
7224
3280af22
NIS
7225 if (PL_oldoldbufptr &&
7226 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
7227 (PL_oldoldbufptr == PL_last_lop
7228 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 7229 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
7230 (PL_expect == XREF ||
7231 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 7232 {
748a9306
LW
7233 bool immediate_paren = *s == '(';
7234
a0d0e21e 7235 /* (Now we can afford to cross potential line boundary.) */
cd81e915 7236 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 7237#ifdef PERL_MAD
cd81e915 7238 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 7239#endif
a0d0e21e
LW
7240
7241 /* Two barewords in a row may indicate method call. */
7242
62d55b22 7243 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
7244 (tmp = intuit_method(s, gv, cv))) {
7245 op_free(rv2cv_op);
78cdf107
Z
7246 if (tmp == METHOD && !PL_lex_allbrackets &&
7247 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7248 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7249 return REPORT(tmp);
f7461760 7250 }
a0d0e21e
LW
7251
7252 /* If not a declared subroutine, it's an indirect object. */
7253 /* (But it's an indir obj regardless for sort.) */
7294df96 7254 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 7255
7294df96
RGS
7256 if (
7257 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 7258 (!cv &&
a9ef352a 7259 (PL_last_lop_op != OP_MAPSTART &&
f0670693 7260 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
7261 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7262 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7263 )
a9ef352a 7264 {
3280af22 7265 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 7266 goto bareword;
93a17b20
LW
7267 }
7268 }
8990e307 7269
3280af22 7270 PL_expect = XOPERATOR;
5db06880
NC
7271#ifdef PERL_MAD
7272 if (isSPACE(*s))
cd81e915
NC
7273 s = SKIPSPACE2(s,nextPL_nextwhite);
7274 PL_nextwhite = nextPL_nextwhite;
5db06880 7275#else
8990e307 7276 s = skipspace(s);
5db06880 7277#endif
1c3923b3
GS
7278
7279 /* Is this a word before a => operator? */
ce29ac45 7280 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 7281 op_free(rv2cv_op);
1c3923b3 7282 CLINE;
9657ccb4
FC
7283 /* This is our own scalar, created a few lines above,
7284 so this is safe. */
7285 SvREADONLY_off(cSVOPx(pl_yylval.opval)->op_sv);
6154021b 7286 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 7287 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 7288 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
9657ccb4 7289 SvREADONLY_on(cSVOPx(pl_yylval.opval)->op_sv);
1c3923b3
GS
7290 TERM(WORD);
7291 }
7292
7293 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 7294 if (*s == '(') {
79072805 7295 CLINE;
5069cc75 7296 if (cv) {
c35e046a
AL
7297 d = s + 1;
7298 while (SPACE_OR_TAB(*d))
7299 d++;
f815dc14 7300 if (*d == ')' && (sv = cv_const_sv_or_av(cv))) {
96e4d5b1 7301 s = d + 1;
c631f32b 7302 goto its_constant;
96e4d5b1 7303 }
7304 }
5db06880
NC
7305#ifdef PERL_MAD
7306 if (PL_madskills) {
cd81e915
NC
7307 PL_nextwhite = PL_thiswhite;
7308 PL_thiswhite = 0;
5db06880 7309 }
cd81e915 7310 start_force(PL_curforce);
5db06880 7311#endif
73f3e228
FC
7312 NEXTVAL_NEXTTOKE.opval =
7313 off ? rv2cv_op : pl_yylval.opval;
3280af22 7314 PL_expect = XOPERATOR;
5db06880
NC
7315#ifdef PERL_MAD
7316 if (PL_madskills) {
cd81e915
NC
7317 PL_nextwhite = nextPL_nextwhite;
7318 curmad('X', PL_thistoken);
6b29d1f5 7319 PL_thistoken = newSVpvs("");
5db06880
NC
7320 }
7321#endif
73f3e228
FC
7322 if (off)
7323 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7324 else op_free(rv2cv_op), force_next(WORD);
6154021b 7325 pl_yylval.ival = 0;
463ee0b2 7326 TOKEN('&');
79072805 7327 }
93a17b20 7328
a0d0e21e 7329 /* If followed by var or block, call it a method (unless sub) */
8990e307 7330
f7461760
Z
7331 if ((*s == '$' || *s == '{') && !cv) {
7332 op_free(rv2cv_op);
3280af22
NIS
7333 PL_last_lop = PL_oldbufptr;
7334 PL_last_lop_op = OP_METHOD;
78cdf107
Z
7335 if (!PL_lex_allbrackets &&
7336 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7337 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 7338 PREBLOCK(METHOD);
463ee0b2
LW
7339 }
7340
8990e307
LW
7341 /* If followed by a bareword, see if it looks like indir obj. */
7342
30fe34ed
RGS
7343 if (!orig_keyword
7344 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
7345 && (tmp = intuit_method(s, gv, cv))) {
7346 op_free(rv2cv_op);
78cdf107
Z
7347 if (tmp == METHOD && !PL_lex_allbrackets &&
7348 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7349 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7350 return REPORT(tmp);
f7461760 7351 }
93a17b20 7352
8990e307
LW
7353 /* Not a method, so call it a subroutine (if defined) */
7354
5069cc75 7355 if (cv) {
898c3bca 7356 if (lastchar == '-' && penultchar != '-') {
b17a0679 7357 const STRLEN l = len ? len : strlen(PL_tokenbuf);
43b5ab4c 7358 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
b17a0679
FC
7359 "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
7360 UTF8fARG(UTF, l, PL_tokenbuf),
7361 UTF8fARG(UTF, l, PL_tokenbuf));
43b5ab4c 7362 }
89bfa8cd 7363 /* Check for a constant sub */
f815dc14 7364 if ((sv = cv_const_sv_or_av(cv))) {
96e4d5b1 7365 its_constant:
f7461760 7366 op_free(rv2cv_op);
6154021b
RGS
7367 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7368 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
f815dc14
FC
7369 if (SvTYPE(sv) == SVt_PVAV)
7370 pl_yylval.opval = newUNOP(OP_RV2AV, OPf_PARENS,
7371 pl_yylval.opval);
7372 else {
7373 pl_yylval.opval->op_private = OPpCONST_FOLDED;
7374 pl_yylval.opval->op_folded = 1;
7375 pl_yylval.opval->op_flags |= OPf_SPECIAL;
7376 }
96e4d5b1 7377 TOKEN(WORD);
89bfa8cd 7378 }
7379
6154021b 7380 op_free(pl_yylval.opval);
9a5e6f3c
FC
7381 pl_yylval.opval =
7382 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6154021b 7383 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 7384 PL_last_lop = PL_oldbufptr;
bf848113 7385 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 7386 /* Is there a prototype? */
5db06880
NC
7387 if (
7388#ifdef PERL_MAD
7389 cv &&
7390#endif
d9f2850e
RGS
7391 SvPOK(cv))
7392 {
8fa6a409
FC
7393 STRLEN protolen = CvPROTOLEN(cv);
7394 const char *proto = CvPROTO(cv);
b5fb7ce3 7395 bool optional;
d16269d8 7396 proto = S_strip_spaces(aTHX_ proto, &protolen);
5f66b61c 7397 if (!protolen)
4633a7c4 7398 TERM(FUNC0SUB);
b5fb7ce3
FC
7399 if ((optional = *proto == ';'))
7400 do
0f5d0394 7401 proto++;
b5fb7ce3 7402 while (*proto == ';');
649d02de
FC
7403 if (
7404 (
7405 (
7406 *proto == '$' || *proto == '_'
c035a075 7407 || *proto == '*' || *proto == '+'
649d02de
FC
7408 )
7409 && proto[1] == '\0'
7410 )
7411 || (
7412 *proto == '\\' && proto[1] && proto[2] == '\0'
7413 )
7414 )
b5fb7ce3 7415 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
7416 if (*proto == '\\' && proto[1] == '[') {
7417 const char *p = proto + 2;
7418 while(*p && *p != ']')
7419 ++p;
b5fb7ce3
FC
7420 if(*p == ']' && !p[1])
7421 UNIPROTO(UNIOPSUB,optional);
649d02de 7422 }
7a52d87a 7423 if (*proto == '&' && *s == '{') {
49a54bbe
NC
7424 if (PL_curstash)
7425 sv_setpvs(PL_subname, "__ANON__");
7426 else
7427 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
7428 if (!PL_lex_allbrackets &&
7429 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7430 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
7431 PREBLOCK(LSTOPSUB);
7432 }
a9ef352a 7433 }
5db06880
NC
7434#ifdef PERL_MAD
7435 {
7436 if (PL_madskills) {
cd81e915
NC
7437 PL_nextwhite = PL_thiswhite;
7438 PL_thiswhite = 0;
5db06880 7439 }
cd81e915 7440 start_force(PL_curforce);
6154021b 7441 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
7442 PL_expect = XTERM;
7443 if (PL_madskills) {
cd81e915
NC
7444 PL_nextwhite = nextPL_nextwhite;
7445 curmad('X', PL_thistoken);
6b29d1f5 7446 PL_thistoken = newSVpvs("");
5db06880 7447 }
73f3e228 7448 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7449 if (!PL_lex_allbrackets &&
7450 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7451 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7452 TOKEN(NOAMP);
7453 }
7454 }
7455
7456 /* Guess harder when madskills require "best effort". */
7457 if (PL_madskills && (!gv || !GvCVu(gv))) {
7458 int probable_sub = 0;
7459 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7460 probable_sub = 1;
7461 else if (isALPHA(*s)) {
7462 char tmpbuf[1024];
7463 STRLEN tmplen;
7464 d = s;
7465 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 7466 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
7467 probable_sub = 1;
7468 else {
7469 while (d < PL_bufend && isSPACE(*d))
7470 d++;
7471 if (*d == '=' && d[1] == '>')
7472 probable_sub = 1;
7473 }
7474 }
7475 if (probable_sub) {
af9f5953
BF
7476 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7477 SVt_PVCV);
6154021b 7478 op_free(pl_yylval.opval);
9a5e6f3c
FC
7479 pl_yylval.opval =
7480 off ? (OP *)newCVREF(0, rv2cv_op) : rv2cv_op;
6154021b 7481 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
7482 PL_last_lop = PL_oldbufptr;
7483 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
7484 PL_nextwhite = PL_thiswhite;
7485 PL_thiswhite = 0;
7486 start_force(PL_curforce);
6154021b 7487 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 7488 PL_expect = XTERM;
cd81e915
NC
7489 PL_nextwhite = nextPL_nextwhite;
7490 curmad('X', PL_thistoken);
6b29d1f5 7491 PL_thistoken = newSVpvs("");
73f3e228 7492 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7493 if (!PL_lex_allbrackets &&
7494 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7495 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7496 TOKEN(NOAMP);
7497 }
7498#else
6154021b 7499 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 7500 PL_expect = XTERM;
73f3e228 7501 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7502 if (!PL_lex_allbrackets &&
7503 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7504 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 7505 TOKEN(NOAMP);
5db06880 7506#endif
8990e307 7507 }
748a9306 7508
8990e307
LW
7509 /* Call it a bare word */
7510
5603f27d 7511 if (PL_hints & HINT_STRICT_SUBS)
6154021b 7512 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 7513 else {
9a073a1d
RGS
7514 bareword:
7515 /* after "print" and similar functions (corresponding to
7516 * "F? L" in opcode.pl), whatever wasn't already parsed as
7517 * a filehandle should be subject to "strict subs".
7518 * Likewise for the optional indirect-object argument to system
7519 * or exec, which can't be a bareword */
7520 if ((PL_last_lop_op == OP_PRINT
7521 || PL_last_lop_op == OP_PRTF
7522 || PL_last_lop_op == OP_SAY
7523 || PL_last_lop_op == OP_SYSTEM
7524 || PL_last_lop_op == OP_EXEC)
7525 && (PL_hints & HINT_STRICT_SUBS))
7526 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7527 if (lastchar != '-') {
7528 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7529 d = PL_tokenbuf;
7530 while (isLOWER(*d))
7531 d++;
af9f5953 7532 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7533 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7534 PL_tokenbuf);
7535 }
748a9306
LW
7536 }
7537 }
f7461760 7538 op_free(rv2cv_op);
c3e0f903
GS
7539
7540 safe_bareword:
9700e2d3
FC
7541 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
7542 && saw_infix_sigil) {
9b387841 7543 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
b17a0679
FC
7544 "Operator or semicolon missing before %c%"UTF8f,
7545 lastchar,
7546 UTF8fARG(UTF, strlen(PL_tokenbuf),
7547 PL_tokenbuf));
9b387841
NC
7548 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7549 "Ambiguous use of %c resolved as operator %c",
7550 lastchar, lastchar);
748a9306 7551 }
93a17b20 7552 TOKEN(WORD);
79072805 7553 }
79072805 7554
68dc0745 7555 case KEY___FILE__:
7eb971ee 7556 FUN0OP(
14f0f125 7557 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7558 );
46fc3d4c 7559
79072805 7560 case KEY___LINE__:
7eb971ee
FC
7561 FUN0OP(
7562 (OP*)newSVOP(OP_CONST, 0,
7563 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7564 );
68dc0745 7565
7566 case KEY___PACKAGE__:
7eb971ee
FC
7567 FUN0OP(
7568 (OP*)newSVOP(OP_CONST, 0,
3280af22 7569 (PL_curstash
5aaec2b4 7570 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7571 : &PL_sv_undef))
7572 );
79072805 7573
e50aee73 7574 case KEY___DATA__:
79072805
LW
7575 case KEY___END__: {
7576 GV *gv;
3280af22 7577 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7578 const char *pname = "main";
affc13fc
FC
7579 STRLEN plen = 4;
7580 U32 putf8 = 0;
3280af22 7581 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7582 {
7583 HV * const stash =
7584 PL_curstash ? PL_curstash : PL_defstash;
7585 pname = HvNAME_get(stash);
7586 plen = HvNAMELEN (stash);
7587 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7588 }
7589 gv = gv_fetchpvn_flags(
7590 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7591 plen+6, GV_ADD|putf8, SVt_PVIO
7592 );
a5f75d66 7593 GvMULTI_on(gv);
79072805 7594 if (!GvIO(gv))
a0d0e21e 7595 GvIOp(gv) = newIO();
3280af22 7596 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7597#if defined(HAS_FCNTL) && defined(F_SETFD)
7598 {
f54cb97a 7599 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7600 fcntl(fd,F_SETFD,fd >= 3);
7601 }
79072805 7602#endif
fd049845 7603 /* Mark this internal pseudo-handle as clean */
7604 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7605 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7606 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7607 else
50952442 7608 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7609#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7610 /* if the script was opened in binmode, we need to revert
53129d29 7611 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7612 * XXX this is a questionable hack at best. */
53129d29
GS
7613 if (PL_bufend-PL_bufptr > 2
7614 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7615 {
7616 Off_t loc = 0;
50952442 7617 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7618 loc = PerlIO_tell(PL_rsfp);
7619 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7620 }
2986a63f
JH
7621#ifdef NETWARE
7622 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7623#else
c39cd008 7624 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7625#endif /* NETWARE */
c39cd008
GS
7626 if (loc > 0)
7627 PerlIO_seek(PL_rsfp, loc, 0);
7628 }
7629 }
7630#endif
7948272d 7631#ifdef PERLIO_LAYERS
52d2e0f4
JH
7632 if (!IN_BYTES) {
7633 if (UTF)
7634 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7635 else if (PL_encoding) {
7636 SV *name;
7637 dSP;
7638 ENTER;
7639 SAVETMPS;
7640 PUSHMARK(sp);
7641 EXTEND(SP, 1);
7642 XPUSHs(PL_encoding);
7643 PUTBACK;
7644 call_method("name", G_SCALAR);
7645 SPAGAIN;
7646 name = POPs;
7647 PUTBACK;
bfed75c6 7648 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7649 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7650 SVfARG(name)));
52d2e0f4
JH
7651 FREETMPS;
7652 LEAVE;
7653 }
7654 }
7948272d 7655#endif
5db06880
NC
7656#ifdef PERL_MAD
7657 if (PL_madskills) {
cd81e915
NC
7658 if (PL_realtokenstart >= 0) {
7659 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7660 if (!PL_endwhite)
6b29d1f5 7661 PL_endwhite = newSVpvs("");
cd81e915
NC
7662 sv_catsv(PL_endwhite, PL_thiswhite);
7663 PL_thiswhite = 0;
7664 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7665 PL_realtokenstart = -1;
5db06880 7666 }
5cc814fd
NC
7667 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7668 != NULL) ;
5db06880
NC
7669 }
7670#endif
4608196e 7671 PL_rsfp = NULL;
79072805
LW
7672 }
7673 goto fake_eof;
e929a76b 7674 }
de3bb511 7675
84ed0108 7676 case KEY___SUB__:
1a35f9ff 7677 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7678
8990e307 7679 case KEY_AUTOLOAD:
ed6116ce 7680 case KEY_DESTROY:
79072805 7681 case KEY_BEGIN:
3c10abe3 7682 case KEY_UNITCHECK:
7d30b5c4 7683 case KEY_CHECK:
7d07dbc2 7684 case KEY_INIT:
7d30b5c4 7685 case KEY_END:
3280af22
NIS
7686 if (PL_expect == XSTATE) {
7687 s = PL_bufptr;
93a17b20 7688 goto really_sub;
79072805
LW
7689 }
7690 goto just_a_word;
7691
a0d0e21e
LW
7692 case KEY_CORE:
7693 if (*s == ':' && s[1] == ':') {
ee36fb64 7694 STRLEN olen = len;
748a9306 7695 d = s;
ee36fb64 7696 s += 2;
3280af22 7697 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
ee36fb64
FC
7698 if ((*s == ':' && s[1] == ':')
7699 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7700 {
7701 s = d;
7702 len = olen;
7703 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7704 goto just_a_word;
7705 }
7706 if (!tmp)
b17a0679
FC
7707 Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
7708 UTF8fARG(UTF, len, PL_tokenbuf));
a0d0e21e
LW
7709 if (tmp < 0)
7710 tmp = -tmp;
d67594ff
FC
7711 else if (tmp == KEY_require || tmp == KEY_do
7712 || tmp == KEY_glob)
a72a1c8b 7713 /* that's a way to remember we saw "CORE::" */
850e8516 7714 orig_keyword = tmp;
a0d0e21e
LW
7715 goto reserved_word;
7716 }
7717 goto just_a_word;
7718
463ee0b2
LW
7719 case KEY_abs:
7720 UNI(OP_ABS);
7721
79072805
LW
7722 case KEY_alarm:
7723 UNI(OP_ALARM);
7724
7725 case KEY_accept:
a0d0e21e 7726 LOP(OP_ACCEPT,XTERM);
79072805 7727
463ee0b2 7728 case KEY_and:
78cdf107
Z
7729 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7730 return REPORT(0);
463ee0b2
LW
7731 OPERATOR(ANDOP);
7732
79072805 7733 case KEY_atan2:
a0d0e21e 7734 LOP(OP_ATAN2,XTERM);
85e6fe83 7735
79072805 7736 case KEY_bind:
a0d0e21e 7737 LOP(OP_BIND,XTERM);
79072805
LW
7738
7739 case KEY_binmode:
1c1fc3ea 7740 LOP(OP_BINMODE,XTERM);
79072805
LW
7741
7742 case KEY_bless:
a0d0e21e 7743 LOP(OP_BLESS,XTERM);
79072805 7744
0d863452
RH
7745 case KEY_break:
7746 FUN0(OP_BREAK);
7747
79072805
LW
7748 case KEY_chop:
7749 UNI(OP_CHOP);
7750
7751 case KEY_continue:
0d863452
RH
7752 /* We have to disambiguate the two senses of
7753 "continue". If the next token is a '{' then
7754 treat it as the start of a continue block;
7755 otherwise treat it as a control operator.
7756 */
7757 s = skipspace(s);
7758 if (*s == '{')
79072805 7759 PREBLOCK(CONTINUE);
0d863452
RH
7760 else
7761 FUN0(OP_CONTINUE);
79072805
LW
7762
7763 case KEY_chdir:
fafc274c
NC
7764 /* may use HOME */
7765 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7766 UNI(OP_CHDIR);
7767
7768 case KEY_close:
7769 UNI(OP_CLOSE);
7770
7771 case KEY_closedir:
7772 UNI(OP_CLOSEDIR);
7773
7774 case KEY_cmp:
78cdf107
Z
7775 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7776 return REPORT(0);
79072805
LW
7777 Eop(OP_SCMP);
7778
7779 case KEY_caller:
7780 UNI(OP_CALLER);
7781
7782 case KEY_crypt:
7783#ifdef FCRYPT
f4c556ac
GS
7784 if (!PL_cryptseen) {
7785 PL_cryptseen = TRUE;
de3bb511 7786 init_des();
f4c556ac 7787 }
a687059c 7788#endif
a0d0e21e 7789 LOP(OP_CRYPT,XTERM);
79072805
LW
7790
7791 case KEY_chmod:
a0d0e21e 7792 LOP(OP_CHMOD,XTERM);
79072805
LW
7793
7794 case KEY_chown:
a0d0e21e 7795 LOP(OP_CHOWN,XTERM);
79072805
LW
7796
7797 case KEY_connect:
a0d0e21e 7798 LOP(OP_CONNECT,XTERM);
79072805 7799
463ee0b2
LW
7800 case KEY_chr:
7801 UNI(OP_CHR);
7802
79072805
LW
7803 case KEY_cos:
7804 UNI(OP_COS);
7805
7806 case KEY_chroot:
7807 UNI(OP_CHROOT);
7808
0d863452
RH
7809 case KEY_default:
7810 PREBLOCK(DEFAULT);
7811
79072805 7812 case KEY_do:
29595ff2 7813 s = SKIPSPACE1(s);
79072805 7814 if (*s == '{')
a0d0e21e 7815 PRETERMBLOCK(DO);
c2900bb8 7816 if (*s != '\'') {
4b473a5a
FC
7817 *PL_tokenbuf = '&';
7818 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7819 1, &len);
7820 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
c2900bb8 7821 d = SKIPSPACE1(d);
4b473a5a 7822 if (*d == '(') {
60ac52eb 7823 force_ident_maybe_lex('&');
4b473a5a
FC
7824 s = d;
7825 }
c2900bb8
FC
7826 }
7827 }
850e8516
RGS
7828 if (orig_keyword == KEY_do) {
7829 orig_keyword = 0;
6154021b 7830 pl_yylval.ival = 1;
850e8516
RGS
7831 }
7832 else
6154021b 7833 pl_yylval.ival = 0;
378cc40b 7834 OPERATOR(DO);
79072805
LW
7835
7836 case KEY_die:
3280af22 7837 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7838 LOP(OP_DIE,XTERM);
79072805
LW
7839
7840 case KEY_defined:
7841 UNI(OP_DEFINED);
7842
7843 case KEY_delete:
a0d0e21e 7844 UNI(OP_DELETE);
79072805
LW
7845
7846 case KEY_dbmopen:
74e8ce34
NC
7847 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7848 STR_WITH_LEN("NDBM_File::"),
7849 STR_WITH_LEN("DB_File::"),
7850 STR_WITH_LEN("GDBM_File::"),
7851 STR_WITH_LEN("SDBM_File::"),
7852 STR_WITH_LEN("ODBM_File::"),
7853 NULL);
a0d0e21e 7854 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7855
7856 case KEY_dbmclose:
7857 UNI(OP_DBMCLOSE);
7858
7859 case KEY_dump:
c31f6d3b 7860 PL_expect = XOPERATOR;
345b3785 7861 s = force_word(s,WORD,TRUE,FALSE);
79072805
LW
7862 LOOPX(OP_DUMP);
7863
7864 case KEY_else:
7865 PREBLOCK(ELSE);
7866
7867 case KEY_elsif:
6154021b 7868 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7869 OPERATOR(ELSIF);
7870
7871 case KEY_eq:
78cdf107
Z
7872 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7873 return REPORT(0);
79072805
LW
7874 Eop(OP_SEQ);
7875
a0d0e21e
LW
7876 case KEY_exists:
7877 UNI(OP_EXISTS);
4e553d73 7878
79072805 7879 case KEY_exit:
5db06880
NC
7880 if (PL_madskills)
7881 UNI(OP_INT);
79072805
LW
7882 UNI(OP_EXIT);
7883
7884 case KEY_eval:
29595ff2 7885 s = SKIPSPACE1(s);
32e2a35d
RGS
7886 if (*s == '{') { /* block eval */
7887 PL_expect = XTERMBLOCK;
7888 UNIBRACK(OP_ENTERTRY);
7889 }
7890 else { /* string eval */
7891 PL_expect = XTERM;
7892 UNIBRACK(OP_ENTEREVAL);
7893 }
79072805 7894
7d789282
FC
7895 case KEY_evalbytes:
7896 PL_expect = XTERM;
7897 UNIBRACK(-OP_ENTEREVAL);
7898
79072805
LW
7899 case KEY_eof:
7900 UNI(OP_EOF);
7901
7902 case KEY_exp:
7903 UNI(OP_EXP);
7904
7905 case KEY_each:
7906 UNI(OP_EACH);
7907
7908 case KEY_exec:
a0d0e21e 7909 LOP(OP_EXEC,XREF);
79072805
LW
7910
7911 case KEY_endhostent:
7912 FUN0(OP_EHOSTENT);
7913
7914 case KEY_endnetent:
7915 FUN0(OP_ENETENT);
7916
7917 case KEY_endservent:
7918 FUN0(OP_ESERVENT);
7919
7920 case KEY_endprotoent:
7921 FUN0(OP_EPROTOENT);
7922
7923 case KEY_endpwent:
7924 FUN0(OP_EPWENT);
7925
7926 case KEY_endgrent:
7927 FUN0(OP_EGRENT);
7928
7929 case KEY_for:
7930 case KEY_foreach:
78cdf107
Z
7931 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7932 return REPORT(0);
6154021b 7933 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7934 s = SKIPSPACE1(s);
7e2040f0 7935 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7936 char *p = s;
5db06880
NC
7937#ifdef PERL_MAD
7938 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7939#endif
7940
3280af22 7941 if ((PL_bufend - p) >= 3 &&
55497cff 7942 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7943 p += 2;
77ca0c92
LW
7944 else if ((PL_bufend - p) >= 4 &&
7945 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7946 p += 3;
29595ff2 7947 p = PEEKSPACE(p);
7e2040f0 7948 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7949 p = scan_ident(p, PL_bufend,
7950 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7951 p = PEEKSPACE(p);
77ca0c92
LW
7952 }
7953 if (*p != '$')
cea2e8a9 7954 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7955#ifdef PERL_MAD
7956 s = SvPVX(PL_linestr) + soff;
7957#endif
55497cff 7958 }
79072805
LW
7959 OPERATOR(FOR);
7960
7961 case KEY_formline:
a0d0e21e 7962 LOP(OP_FORMLINE,XTERM);
79072805
LW
7963
7964 case KEY_fork:
7965 FUN0(OP_FORK);
7966
838f2281
BF
7967 case KEY_fc:
7968 UNI(OP_FC);
7969
79072805 7970 case KEY_fcntl:
a0d0e21e 7971 LOP(OP_FCNTL,XTERM);
79072805
LW
7972
7973 case KEY_fileno:
7974 UNI(OP_FILENO);
7975
7976 case KEY_flock:
a0d0e21e 7977 LOP(OP_FLOCK,XTERM);
79072805
LW
7978
7979 case KEY_gt:
78cdf107
Z
7980 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7981 return REPORT(0);
79072805
LW
7982 Rop(OP_SGT);
7983
7984 case KEY_ge:
78cdf107
Z
7985 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7986 return REPORT(0);
79072805
LW
7987 Rop(OP_SGE);
7988
7989 case KEY_grep:
2c38e13d 7990 LOP(OP_GREPSTART, XREF);
79072805
LW
7991
7992 case KEY_goto:
c31f6d3b 7993 PL_expect = XOPERATOR;
345b3785 7994 s = force_word(s,WORD,TRUE,FALSE);
79072805
LW
7995 LOOPX(OP_GOTO);
7996
7997 case KEY_gmtime:
7998 UNI(OP_GMTIME);
7999
8000 case KEY_getc:
6f33ba73 8001 UNIDOR(OP_GETC);
79072805
LW
8002
8003 case KEY_getppid:
8004 FUN0(OP_GETPPID);
8005
8006 case KEY_getpgrp:
8007 UNI(OP_GETPGRP);
8008
8009 case KEY_getpriority:
a0d0e21e 8010 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
8011
8012 case KEY_getprotobyname:
8013 UNI(OP_GPBYNAME);
8014
8015 case KEY_getprotobynumber:
a0d0e21e 8016 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
8017
8018 case KEY_getprotoent:
8019 FUN0(OP_GPROTOENT);
8020
8021 case KEY_getpwent:
8022 FUN0(OP_GPWENT);
8023
8024 case KEY_getpwnam:
ff68c719 8025 UNI(OP_GPWNAM);
79072805
LW
8026
8027 case KEY_getpwuid:
ff68c719 8028 UNI(OP_GPWUID);
79072805
LW
8029
8030 case KEY_getpeername:
8031 UNI(OP_GETPEERNAME);
8032
8033 case KEY_gethostbyname:
8034 UNI(OP_GHBYNAME);
8035
8036 case KEY_gethostbyaddr:
a0d0e21e 8037 LOP(OP_GHBYADDR,XTERM);
79072805
LW
8038
8039 case KEY_gethostent:
8040 FUN0(OP_GHOSTENT);
8041
8042 case KEY_getnetbyname:
8043 UNI(OP_GNBYNAME);
8044
8045 case KEY_getnetbyaddr:
a0d0e21e 8046 LOP(OP_GNBYADDR,XTERM);
79072805
LW
8047
8048 case KEY_getnetent:
8049 FUN0(OP_GNETENT);
8050
8051 case KEY_getservbyname:
a0d0e21e 8052 LOP(OP_GSBYNAME,XTERM);
79072805
LW
8053
8054 case KEY_getservbyport:
a0d0e21e 8055 LOP(OP_GSBYPORT,XTERM);
79072805
LW
8056
8057 case KEY_getservent:
8058 FUN0(OP_GSERVENT);
8059
8060 case KEY_getsockname:
8061 UNI(OP_GETSOCKNAME);
8062
8063 case KEY_getsockopt:
a0d0e21e 8064 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
8065
8066 case KEY_getgrent:
8067 FUN0(OP_GGRENT);
8068
8069 case KEY_getgrnam:
ff68c719 8070 UNI(OP_GGRNAM);
79072805
LW
8071
8072 case KEY_getgrgid:
ff68c719 8073 UNI(OP_GGRGID);
79072805
LW
8074
8075 case KEY_getlogin:
8076 FUN0(OP_GETLOGIN);
8077
0d863452 8078 case KEY_given:
6154021b 8079 pl_yylval.ival = CopLINE(PL_curcop);
0f539b13
BF
8080 Perl_ck_warner_d(aTHX_
8081 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8082 "given is experimental");
0d863452
RH
8083 OPERATOR(GIVEN);
8084
93a17b20 8085 case KEY_glob:
d67594ff
FC
8086 LOP(
8087 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
8088 XTERM
8089 );
93a17b20 8090
79072805
LW
8091 case KEY_hex:
8092 UNI(OP_HEX);
8093
8094 case KEY_if:
78cdf107
Z
8095 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8096 return REPORT(0);
6154021b 8097 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8098 OPERATOR(IF);
8099
8100 case KEY_index:
a0d0e21e 8101 LOP(OP_INDEX,XTERM);
79072805
LW
8102
8103 case KEY_int:
8104 UNI(OP_INT);
8105
8106 case KEY_ioctl:
a0d0e21e 8107 LOP(OP_IOCTL,XTERM);
79072805
LW
8108
8109 case KEY_join:
a0d0e21e 8110 LOP(OP_JOIN,XTERM);
79072805
LW
8111
8112 case KEY_keys:
8113 UNI(OP_KEYS);
8114
8115 case KEY_kill:
a0d0e21e 8116 LOP(OP_KILL,XTERM);
79072805
LW
8117
8118 case KEY_last:
c31f6d3b 8119 PL_expect = XOPERATOR;
345b3785 8120 s = force_word(s,WORD,TRUE,FALSE);
79072805 8121 LOOPX(OP_LAST);
4e553d73 8122
79072805
LW
8123 case KEY_lc:
8124 UNI(OP_LC);
8125
8126 case KEY_lcfirst:
8127 UNI(OP_LCFIRST);
8128
8129 case KEY_local:
6154021b 8130 pl_yylval.ival = 0;
79072805
LW
8131 OPERATOR(LOCAL);
8132
8133 case KEY_length:
8134 UNI(OP_LENGTH);
8135
8136 case KEY_lt:
78cdf107
Z
8137 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8138 return REPORT(0);
79072805
LW
8139 Rop(OP_SLT);
8140
8141 case KEY_le:
78cdf107
Z
8142 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8143 return REPORT(0);
79072805
LW
8144 Rop(OP_SLE);
8145
8146 case KEY_localtime:
8147 UNI(OP_LOCALTIME);
8148
8149 case KEY_log:
8150 UNI(OP_LOG);
8151
8152 case KEY_link:
a0d0e21e 8153 LOP(OP_LINK,XTERM);
79072805
LW
8154
8155 case KEY_listen:
a0d0e21e 8156 LOP(OP_LISTEN,XTERM);
79072805 8157
c0329465
MB
8158 case KEY_lock:
8159 UNI(OP_LOCK);
8160
79072805
LW
8161 case KEY_lstat:
8162 UNI(OP_LSTAT);
8163
8164 case KEY_m:
8782bef2 8165 s = scan_pat(s,OP_MATCH);
79072805
LW
8166 TERM(sublex_start());
8167
a0d0e21e 8168 case KEY_map:
2c38e13d 8169 LOP(OP_MAPSTART, XREF);
4e4e412b 8170
79072805 8171 case KEY_mkdir:
a0d0e21e 8172 LOP(OP_MKDIR,XTERM);
79072805
LW
8173
8174 case KEY_msgctl:
a0d0e21e 8175 LOP(OP_MSGCTL,XTERM);
79072805
LW
8176
8177 case KEY_msgget:
a0d0e21e 8178 LOP(OP_MSGGET,XTERM);
79072805
LW
8179
8180 case KEY_msgrcv:
a0d0e21e 8181 LOP(OP_MSGRCV,XTERM);
79072805
LW
8182
8183 case KEY_msgsnd:
a0d0e21e 8184 LOP(OP_MSGSND,XTERM);
79072805 8185
77ca0c92 8186 case KEY_our:
93a17b20 8187 case KEY_my:
952306ac 8188 case KEY_state:
eac04b2e 8189 PL_in_my = (U16)tmp;
29595ff2 8190 s = SKIPSPACE1(s);
7e2040f0 8191 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
8192#ifdef PERL_MAD
8193 char* start = s;
8194#endif
3280af22 8195 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843 8196 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
e7d0b801
FC
8197 {
8198 if (!FEATURE_LEXSUBS_IS_ENABLED)
8199 Perl_croak(aTHX_
8200 "Experimental \"%s\" subs not enabled",
8201 tmp == KEY_my ? "my" :
8202 tmp == KEY_state ? "state" : "our");
64fbf0dd
FC
8203 Perl_ck_warner_d(aTHX_
8204 packWARN(WARN_EXPERIMENTAL__LEXICAL_SUBS),
8205 "The lexical_subs feature is experimental");
09bef843 8206 goto really_sub;
e7d0b801 8207 }
def3634b 8208 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 8209 if (!PL_in_my_stash) {
c750a3ec 8210 char tmpbuf[1024];
3280af22 8211 PL_bufptr = s;
d9fad198 8212 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
3c54b17a 8213 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
c750a3ec 8214 }
5db06880
NC
8215#ifdef PERL_MAD
8216 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
8217 sv_catsv(PL_thistoken, PL_nextwhite);
8218 PL_nextwhite = 0;
8219 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
8220 }
8221#endif
c750a3ec 8222 }
6154021b 8223 pl_yylval.ival = 1;
55497cff 8224 OPERATOR(MY);
93a17b20 8225
79072805 8226 case KEY_next:
c31f6d3b 8227 PL_expect = XOPERATOR;
345b3785 8228 s = force_word(s,WORD,TRUE,FALSE);
79072805
LW
8229 LOOPX(OP_NEXT);
8230
8231 case KEY_ne:
78cdf107
Z
8232 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8233 return REPORT(0);
79072805
LW
8234 Eop(OP_SNE);
8235
a0d0e21e 8236 case KEY_no:
468aa647 8237 s = tokenize_use(0, s);
52d0e95b 8238 TERM(USE);
a0d0e21e
LW
8239
8240 case KEY_not:
29595ff2 8241 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 8242 FUN1(OP_NOT);
78cdf107
Z
8243 else {
8244 if (!PL_lex_allbrackets &&
8245 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8246 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 8247 OPERATOR(NOTOP);
78cdf107 8248 }
a0d0e21e 8249
79072805 8250 case KEY_open:
29595ff2 8251 s = SKIPSPACE1(s);
7e2040f0 8252 if (isIDFIRST_lazy_if(s,UTF)) {
8ea4c679
BF
8253 const char *t;
8254 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE,
8255 &len);
c35e046a
AL
8256 for (t=d; isSPACE(*t);)
8257 t++;
e2ab214b 8258 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
8259 /* [perl #16184] */
8260 && !(t[0] == '=' && t[1] == '>')
db3abe52 8261 && !(t[0] == ':' && t[1] == ':')
240d1b6f 8262 && !keyword(s, d-s, 0)
66fbe8fb 8263 ) {
9014280d 8264 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
b17a0679
FC
8265 "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
8266 UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
66fbe8fb 8267 }
93a17b20 8268 }
a0d0e21e 8269 LOP(OP_OPEN,XTERM);
79072805 8270
463ee0b2 8271 case KEY_or:
78cdf107
Z
8272 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8273 return REPORT(0);
6154021b 8274 pl_yylval.ival = OP_OR;
463ee0b2
LW
8275 OPERATOR(OROP);
8276
79072805
LW
8277 case KEY_ord:
8278 UNI(OP_ORD);
8279
8280 case KEY_oct:
8281 UNI(OP_OCT);
8282
8283 case KEY_opendir:
a0d0e21e 8284 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
8285
8286 case KEY_print:
3280af22 8287 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8288 LOP(OP_PRINT,XREF);
79072805
LW
8289
8290 case KEY_printf:
3280af22 8291 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8292 LOP(OP_PRTF,XREF);
79072805 8293
c07a80fd 8294 case KEY_prototype:
8295 UNI(OP_PROTOTYPE);
8296
79072805 8297 case KEY_push:
a0d0e21e 8298 LOP(OP_PUSH,XTERM);
79072805
LW
8299
8300 case KEY_pop:
6f33ba73 8301 UNIDOR(OP_POP);
79072805 8302
a0d0e21e 8303 case KEY_pos:
6f33ba73 8304 UNIDOR(OP_POS);
4e553d73 8305
79072805 8306 case KEY_pack:
a0d0e21e 8307 LOP(OP_PACK,XTERM);
79072805
LW
8308
8309 case KEY_package:
345b3785 8310 s = force_word(s,WORD,FALSE,TRUE);
14a86d0c 8311 s = SKIPSPACE1(s);
91152fc1 8312 s = force_strict_version(s);
4e4da3ac 8313 PL_lex_expect = XBLOCK;
79072805
LW
8314 OPERATOR(PACKAGE);
8315
8316 case KEY_pipe:
a0d0e21e 8317 LOP(OP_PIPE_OP,XTERM);
79072805
LW
8318
8319 case KEY_q:
4d68ffa0 8320 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8321 if (!s)
d4c19fe8 8322 missingterm(NULL);
6154021b 8323 pl_yylval.ival = OP_CONST;
79072805
LW
8324 TERM(sublex_start());
8325
a0d0e21e
LW
8326 case KEY_quotemeta:
8327 UNI(OP_QUOTEMETA);
8328
ea25a9b2
Z
8329 case KEY_qw: {
8330 OP *words = NULL;
4d68ffa0 8331 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
8990e307 8332 if (!s)
d4c19fe8 8333 missingterm(NULL);
3480a8d2 8334 PL_expect = XOPERATOR;
8127e0e3 8335 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
8336 int warned_comma = !ckWARN(WARN_QW);
8337 int warned_comment = warned_comma;
3280af22 8338 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 8339 while (len) {
d4c19fe8
AL
8340 for (; isSPACE(*d) && len; --len, ++d)
8341 /**/;
8127e0e3 8342 if (len) {
d4c19fe8 8343 SV *sv;
f54cb97a 8344 const char *b = d;
7e03b518 8345 if (!warned_comma || !warned_comment) {
8127e0e3 8346 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 8347 if (!warned_comma && *d == ',') {
9014280d 8348 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8349 "Possible attempt to separate words with commas");
7e03b518 8350 ++warned_comma;
8127e0e3 8351 }
7e03b518 8352 else if (!warned_comment && *d == '#') {
9014280d 8353 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8354 "Possible attempt to put comments in qw() list");
7e03b518 8355 ++warned_comment;
8127e0e3
GS
8356 }
8357 }
8358 }
8359 else {
d4c19fe8
AL
8360 for (; !isSPACE(*d) && len; --len, ++d)
8361 /**/;
8127e0e3 8362 }
740cce10 8363 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 8364 words = op_append_elem(OP_LIST, words,
7948272d 8365 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 8366 }
8367 }
8368 }
ea25a9b2
Z
8369 if (!words)
8370 words = newNULLLIST();
37fd879b 8371 if (PL_lex_stuff) {
8127e0e3 8372 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 8373 PL_lex_stuff = NULL;
37fd879b 8374 }
ea25a9b2
Z
8375 PL_expect = XOPERATOR;
8376 pl_yylval.opval = sawparens(words);
8377 TOKEN(QWLIST);
8378 }
8990e307 8379
79072805 8380 case KEY_qq:
4d68ffa0 8381 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8382 if (!s)
d4c19fe8 8383 missingterm(NULL);
6154021b 8384 pl_yylval.ival = OP_STRINGIFY;
3280af22 8385 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 8386 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
8387 TERM(sublex_start());
8388
8782bef2
GB
8389 case KEY_qr:
8390 s = scan_pat(s,OP_QR);
8391 TERM(sublex_start());
8392
79072805 8393 case KEY_qx:
4d68ffa0 8394 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 8395 if (!s)
d4c19fe8 8396 missingterm(NULL);
9b201d7d 8397 readpipe_override();
79072805
LW
8398 TERM(sublex_start());
8399
8400 case KEY_return:
8401 OLDLOP(OP_RETURN);
8402
8403 case KEY_require:
29595ff2 8404 s = SKIPSPACE1(s);
c31f6d3b 8405 PL_expect = XOPERATOR;
e759cc13
RGS
8406 if (isDIGIT(*s)) {
8407 s = force_version(s, FALSE);
a7cb1f99 8408 }
e759cc13
RGS
8409 else if (*s != 'v' || !isDIGIT(s[1])
8410 || (s = force_version(s, TRUE), *s == 'v'))
8411 {
a7cb1f99 8412 *PL_tokenbuf = '\0';
345b3785 8413 s = force_word(s,WORD,TRUE,TRUE);
7e2040f0 8414 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
8415 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8416 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
8417 else if (*s == '<')
8418 yyerror("<> should be quotes");
8419 }
a72a1c8b
RGS
8420 if (orig_keyword == KEY_require) {
8421 orig_keyword = 0;
6154021b 8422 pl_yylval.ival = 1;
a72a1c8b
RGS
8423 }
8424 else
6154021b 8425 pl_yylval.ival = 0;
a72a1c8b
RGS
8426 PL_expect = XTERM;
8427 PL_bufptr = s;
8428 PL_last_uni = PL_oldbufptr;
8429 PL_last_lop_op = OP_REQUIRE;
8430 s = skipspace(s);
8431 return REPORT( (int)REQUIRE );
79072805
LW
8432
8433 case KEY_reset:
8434 UNI(OP_RESET);
8435
8436 case KEY_redo:
c31f6d3b 8437 PL_expect = XOPERATOR;
345b3785 8438 s = force_word(s,WORD,TRUE,FALSE);
79072805
LW
8439 LOOPX(OP_REDO);
8440
8441 case KEY_rename:
a0d0e21e 8442 LOP(OP_RENAME,XTERM);
79072805
LW
8443
8444 case KEY_rand:
8445 UNI(OP_RAND);
8446
8447 case KEY_rmdir:
8448 UNI(OP_RMDIR);
8449
8450 case KEY_rindex:
a0d0e21e 8451 LOP(OP_RINDEX,XTERM);
79072805
LW
8452
8453 case KEY_read:
a0d0e21e 8454 LOP(OP_READ,XTERM);
79072805
LW
8455
8456 case KEY_readdir:
8457 UNI(OP_READDIR);
8458
93a17b20 8459 case KEY_readline:
6f33ba73 8460 UNIDOR(OP_READLINE);
93a17b20
LW
8461
8462 case KEY_readpipe:
0858480c 8463 UNIDOR(OP_BACKTICK);
93a17b20 8464
79072805
LW
8465 case KEY_rewinddir:
8466 UNI(OP_REWINDDIR);
8467
8468 case KEY_recv:
a0d0e21e 8469 LOP(OP_RECV,XTERM);
79072805
LW
8470
8471 case KEY_reverse:
a0d0e21e 8472 LOP(OP_REVERSE,XTERM);
79072805
LW
8473
8474 case KEY_readlink:
6f33ba73 8475 UNIDOR(OP_READLINK);
79072805
LW
8476
8477 case KEY_ref:
8478 UNI(OP_REF);
8479
8480 case KEY_s:
8481 s = scan_subst(s);
6154021b 8482 if (pl_yylval.opval)
79072805
LW
8483 TERM(sublex_start());
8484 else
8485 TOKEN(1); /* force error */
8486
0d863452
RH
8487 case KEY_say:
8488 checkcomma(s,PL_tokenbuf,"filehandle");
8489 LOP(OP_SAY,XREF);
8490
a0d0e21e
LW
8491 case KEY_chomp:
8492 UNI(OP_CHOMP);
4e553d73 8493
79072805
LW
8494 case KEY_scalar:
8495 UNI(OP_SCALAR);
8496
8497 case KEY_select:
a0d0e21e 8498 LOP(OP_SELECT,XTERM);
79072805
LW
8499
8500 case KEY_seek:
a0d0e21e 8501 LOP(OP_SEEK,XTERM);
79072805
LW
8502
8503 case KEY_semctl:
a0d0e21e 8504 LOP(OP_SEMCTL,XTERM);
79072805
LW
8505
8506 case KEY_semget:
a0d0e21e 8507 LOP(OP_SEMGET,XTERM);
79072805
LW
8508
8509 case KEY_semop:
a0d0e21e 8510 LOP(OP_SEMOP,XTERM);
79072805
LW
8511
8512 case KEY_send:
a0d0e21e 8513 LOP(OP_SEND,XTERM);
79072805
LW
8514
8515 case KEY_setpgrp:
a0d0e21e 8516 LOP(OP_SETPGRP,XTERM);
79072805
LW
8517
8518 case KEY_setpriority:
a0d0e21e 8519 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
8520
8521 case KEY_sethostent:
ff68c719 8522 UNI(OP_SHOSTENT);
79072805
LW
8523
8524 case KEY_setnetent:
ff68c719 8525 UNI(OP_SNETENT);
79072805
LW
8526
8527 case KEY_setservent:
ff68c719 8528 UNI(OP_SSERVENT);
79072805
LW
8529
8530 case KEY_setprotoent:
ff68c719 8531 UNI(OP_SPROTOENT);
79072805
LW
8532
8533 case KEY_setpwent:
8534 FUN0(OP_SPWENT);
8535
8536 case KEY_setgrent:
8537 FUN0(OP_SGRENT);
8538
8539 case KEY_seekdir:
a0d0e21e 8540 LOP(OP_SEEKDIR,XTERM);
79072805
LW
8541
8542 case KEY_setsockopt:
a0d0e21e 8543 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
8544
8545 case KEY_shift:
6f33ba73 8546 UNIDOR(OP_SHIFT);
79072805
LW
8547
8548 case KEY_shmctl:
a0d0e21e 8549 LOP(OP_SHMCTL,XTERM);
79072805
LW
8550
8551 case KEY_shmget:
a0d0e21e 8552 LOP(OP_SHMGET,XTERM);
79072805
LW
8553
8554 case KEY_shmread:
a0d0e21e 8555 LOP(OP_SHMREAD,XTERM);
79072805
LW
8556
8557 case KEY_shmwrite:
a0d0e21e 8558 LOP(OP_SHMWRITE,XTERM);
79072805
LW
8559
8560 case KEY_shutdown:
a0d0e21e 8561 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8562
8563 case KEY_sin:
8564 UNI(OP_SIN);
8565
8566 case KEY_sleep:
8567 UNI(OP_SLEEP);
8568
8569 case KEY_socket:
a0d0e21e 8570 LOP(OP_SOCKET,XTERM);
79072805
LW
8571
8572 case KEY_socketpair:
a0d0e21e 8573 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8574
8575 case KEY_sort:
3280af22 8576 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8577 s = SKIPSPACE1(s);
3280af22 8578 PL_expect = XTERM;
345b3785 8579 s = force_word(s,WORD,TRUE,TRUE);
a0d0e21e 8580 LOP(OP_SORT,XREF);
79072805
LW
8581
8582 case KEY_split:
a0d0e21e 8583 LOP(OP_SPLIT,XTERM);
79072805
LW
8584
8585 case KEY_sprintf:
a0d0e21e 8586 LOP(OP_SPRINTF,XTERM);
79072805
LW
8587
8588 case KEY_splice:
a0d0e21e 8589 LOP(OP_SPLICE,XTERM);
79072805
LW
8590
8591 case KEY_sqrt:
8592 UNI(OP_SQRT);
8593
8594 case KEY_srand:
8595 UNI(OP_SRAND);
8596
8597 case KEY_stat:
8598 UNI(OP_STAT);
8599
8600 case KEY_study:
79072805
LW
8601 UNI(OP_STUDY);
8602
8603 case KEY_substr:
a0d0e21e 8604 LOP(OP_SUBSTR,XTERM);
79072805
LW
8605
8606 case KEY_format:
8607 case KEY_sub:
93a17b20 8608 really_sub:
09bef843 8609 {
24b6ef70 8610 char * const tmpbuf = PL_tokenbuf + 1;
09bef843 8611 expectation attrful;
28cc6278 8612 bool have_name, have_proto;
f54cb97a 8613 const int key = tmp;
b4fd0ac8 8614#ifndef PERL_MAD
d6a4f4b5 8615 SV *format_name = NULL;
b4fd0ac8 8616#endif
09bef843 8617
5db06880
NC
8618#ifdef PERL_MAD
8619 SV *tmpwhite = 0;
8620
cd81e915 8621 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
1cac5c33
FC
8622 SV *subtoken = PL_madskills
8623 ? newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr))
8624 : NULL;
cd81e915 8625 PL_thistoken = 0;
5db06880
NC
8626
8627 d = s;
8628 s = SKIPSPACE2(s,tmpwhite);
8629#else
8767b1ab 8630 d = s;
09bef843 8631 s = skipspace(s);
5db06880 8632#endif
09bef843 8633
7e2040f0 8634 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8635 (*s == ':' && s[1] == ':'))
8636 {
5db06880 8637#ifdef PERL_MAD
4f61fd4b 8638 SV *nametoke = NULL;
5db06880
NC
8639#endif
8640
09bef843
SB
8641 PL_expect = XBLOCK;
8642 attrful = XATTRBLOCK;
24b6ef70
FC
8643 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8644 &len);
5db06880
NC
8645#ifdef PERL_MAD
8646 if (PL_madskills)
af9f5953 8647 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
b4fd0ac8 8648#else
d6a4f4b5
NC
8649 if (key == KEY_format)
8650 format_name = S_newSV_maybe_utf8(aTHX_ s, d - s);
b4fd0ac8 8651#endif
689aac7b
FC
8652 *PL_tokenbuf = '&';
8653 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8654 || pad_findmy_pvn(
8655 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8656 ) != NOT_IN_PAD)
6502358f 8657 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8658 else {
8659 sv_setsv(PL_subname,PL_curstname);
396482e1 8660 sv_catpvs(PL_subname,"::");
09bef843
SB
8661 sv_catpvn(PL_subname,tmpbuf,len);
8662 }
af9f5953
BF
8663 if (SvUTF8(PL_linestr))
8664 SvUTF8_on(PL_subname);
09bef843 8665 have_name = TRUE;
5db06880 8666
60ac52eb 8667
5db06880 8668#ifdef PERL_MAD
60ac52eb
FC
8669 start_force(0);
8670 CURMAD('X', nametoke);
8671 CURMAD('_', tmpwhite);
4210d3f1 8672 force_ident_maybe_lex('&');
5db06880
NC
8673
8674 s = SKIPSPACE2(d,tmpwhite);
8675#else
8676 s = skipspace(d);
8677#endif
09bef843 8678 }
463ee0b2 8679 else {
8767b1ab
FC
8680 if (key == KEY_my || key == KEY_our || key==KEY_state)
8681 {
8682 *d = '\0';
8683 /* diag_listed_as: Missing name in "%s sub" */
8684 Perl_croak(aTHX_
8685 "Missing name in \"%s\"", PL_bufptr);
8686 }
09bef843
SB
8687 PL_expect = XTERMBLOCK;
8688 attrful = XATTRTERM;
76f68e9b 8689 sv_setpvs(PL_subname,"?");
09bef843 8690 have_name = FALSE;
463ee0b2 8691 }
4633a7c4 8692
09bef843 8693 if (key == KEY_format) {
5db06880 8694#ifdef PERL_MAD
cd81e915 8695 PL_thistoken = subtoken;
5db06880
NC
8696 s = d;
8697#else
d6a4f4b5
NC
8698 if (format_name) {
8699 start_force(PL_curforce);
d6a4f4b5
NC
8700 NEXTVAL_NEXTTOKE.opval
8701 = (OP*)newSVOP(OP_CONST,0, format_name);
8702 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
8703 force_next(WORD);
8704 }
5db06880 8705#endif
64a40898 8706 PREBLOCK(FORMAT);
09bef843 8707 }
79072805 8708
09bef843
SB
8709 /* Look for a prototype */
8710 if (*s == '(') {
4d68ffa0 8711 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
37fd879b 8712 if (!s)
09bef843 8713 Perl_croak(aTHX_ "Prototype not terminated");
fe788d6b 8714 (void)validate_proto(PL_subname, PL_lex_stuff, ckWARN(WARN_ILLEGALPROTO));
09bef843 8715 have_proto = TRUE;
68dc0745 8716
5db06880
NC
8717#ifdef PERL_MAD
8718 start_force(0);
cd81e915 8719 CURMAD('q', PL_thisopen);
5db06880 8720 CURMAD('_', tmpwhite);
cd81e915
NC
8721 CURMAD('=', PL_thisstuff);
8722 CURMAD('Q', PL_thisclose);
5db06880
NC
8723 NEXTVAL_NEXTTOKE.opval =
8724 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8725 PL_lex_stuff = NULL;
5db06880
NC
8726 force_next(THING);
8727
8728 s = SKIPSPACE2(s,tmpwhite);
8729#else
09bef843 8730 s = skipspace(s);
5db06880 8731#endif
4633a7c4 8732 }
09bef843
SB
8733 else
8734 have_proto = FALSE;
8735
8736 if (*s == ':' && s[1] != ':')
8737 PL_expect = attrful;
8e742a20
MHM
8738 else if (*s != '{' && key == KEY_sub) {
8739 if (!have_name)
8740 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8741 else if (*s != ';' && *s != '}')
be2597df 8742 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8743 }
09bef843 8744
5db06880
NC
8745#ifdef PERL_MAD
8746 start_force(0);
8747 if (tmpwhite) {
8748 if (PL_madskills)
6b29d1f5 8749 curmad('^', newSVpvs(""));
5db06880
NC
8750 CURMAD('_', tmpwhite);
8751 }
8752 force_next(0);
8753
cd81e915 8754 PL_thistoken = subtoken;
9c3c07f8 8755 PERL_UNUSED_VAR(have_proto);
5db06880 8756#else
09bef843 8757 if (have_proto) {
9ded7720 8758 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8759 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8760 PL_lex_stuff = NULL;
09bef843 8761 force_next(THING);
68dc0745 8762 }
5db06880 8763#endif
09bef843 8764 if (!have_name) {
49a54bbe
NC
8765 if (PL_curstash)
8766 sv_setpvs(PL_subname, "__ANON__");
8767 else
8768 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8769 TOKEN(ANONSUB);
4633a7c4 8770 }
5db06880 8771#ifndef PERL_MAD
4210d3f1 8772 force_ident_maybe_lex('&');
5db06880 8773#endif
09bef843 8774 TOKEN(SUB);
4633a7c4 8775 }
79072805
LW
8776
8777 case KEY_system:
a0d0e21e 8778 LOP(OP_SYSTEM,XREF);
79072805
LW
8779
8780 case KEY_symlink:
a0d0e21e 8781 LOP(OP_SYMLINK,XTERM);
79072805
LW
8782
8783 case KEY_syscall:
a0d0e21e 8784 LOP(OP_SYSCALL,XTERM);
79072805 8785
c07a80fd 8786 case KEY_sysopen:
8787 LOP(OP_SYSOPEN,XTERM);
8788
137443ea 8789 case KEY_sysseek:
8790 LOP(OP_SYSSEEK,XTERM);
8791
79072805 8792 case KEY_sysread:
a0d0e21e 8793 LOP(OP_SYSREAD,XTERM);
79072805
LW
8794
8795 case KEY_syswrite:
a0d0e21e 8796 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8797
8798 case KEY_tr:
8ce4b50f 8799 case KEY_y:
79072805
LW
8800 s = scan_trans(s);
8801 TERM(sublex_start());
8802
8803 case KEY_tell:
8804 UNI(OP_TELL);
8805
8806 case KEY_telldir:
8807 UNI(OP_TELLDIR);
8808
463ee0b2 8809 case KEY_tie:
a0d0e21e 8810 LOP(OP_TIE,XTERM);
463ee0b2 8811
c07a80fd 8812 case KEY_tied:
8813 UNI(OP_TIED);
8814
79072805
LW
8815 case KEY_time:
8816 FUN0(OP_TIME);
8817
8818 case KEY_times:
8819 FUN0(OP_TMS);
8820
8821 case KEY_truncate:
a0d0e21e 8822 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8823
8824 case KEY_uc:
8825 UNI(OP_UC);
8826
8827 case KEY_ucfirst:
8828 UNI(OP_UCFIRST);
8829
463ee0b2
LW
8830 case KEY_untie:
8831 UNI(OP_UNTIE);
8832
79072805 8833 case KEY_until:
78cdf107
Z
8834 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8835 return REPORT(0);
6154021b 8836 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8837 OPERATOR(UNTIL);
8838
8839 case KEY_unless:
78cdf107
Z
8840 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8841 return REPORT(0);
6154021b 8842 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8843 OPERATOR(UNLESS);
8844
8845 case KEY_unlink:
a0d0e21e 8846 LOP(OP_UNLINK,XTERM);
79072805
LW
8847
8848 case KEY_undef:
6f33ba73 8849 UNIDOR(OP_UNDEF);
79072805
LW
8850
8851 case KEY_unpack:
a0d0e21e 8852 LOP(OP_UNPACK,XTERM);
79072805
LW
8853
8854 case KEY_utime:
a0d0e21e 8855 LOP(OP_UTIME,XTERM);
79072805
LW
8856
8857 case KEY_umask:
6f33ba73 8858 UNIDOR(OP_UMASK);
79072805
LW
8859
8860 case KEY_unshift:
a0d0e21e
LW
8861 LOP(OP_UNSHIFT,XTERM);
8862
8863 case KEY_use:
468aa647 8864 s = tokenize_use(1, s);
a0d0e21e 8865 OPERATOR(USE);
79072805
LW
8866
8867 case KEY_values:
8868 UNI(OP_VALUES);
8869
8870 case KEY_vec:
a0d0e21e 8871 LOP(OP_VEC,XTERM);
79072805 8872
0d863452 8873 case KEY_when:
78cdf107
Z
8874 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8875 return REPORT(0);
6154021b 8876 pl_yylval.ival = CopLINE(PL_curcop);
0f539b13
BF
8877 Perl_ck_warner_d(aTHX_
8878 packWARN(WARN_EXPERIMENTAL__SMARTMATCH),
8879 "when is experimental");
0d863452
RH
8880 OPERATOR(WHEN);
8881
79072805 8882 case KEY_while:
78cdf107
Z
8883 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8884 return REPORT(0);
6154021b 8885 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8886 OPERATOR(WHILE);
8887
8888 case KEY_warn:
3280af22 8889 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8890 LOP(OP_WARN,XTERM);
79072805
LW
8891
8892 case KEY_wait:
8893 FUN0(OP_WAIT);
8894
8895 case KEY_waitpid:
a0d0e21e 8896 LOP(OP_WAITPID,XTERM);
79072805
LW
8897
8898 case KEY_wantarray:
8899 FUN0(OP_WANTARRAY);
8900
8901 case KEY_write:
9d116dd7
JH
8902#ifdef EBCDIC
8903 {
df3728a2
JH
8904 char ctl_l[2];
8905 ctl_l[0] = toCTRL('L');
8906 ctl_l[1] = '\0';
fafc274c 8907 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8908 }
8909#else
fafc274c
NC
8910 /* Make sure $^L is defined */
8911 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8912#endif
79072805
LW
8913 UNI(OP_ENTERWRITE);
8914
8915 case KEY_x:
78cdf107
Z
8916 if (PL_expect == XOPERATOR) {
8917 if (*s == '=' && !PL_lex_allbrackets &&
8918 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8919 return REPORT(0);
79072805 8920 Mop(OP_REPEAT);
78cdf107 8921 }
79072805
LW
8922 check_uni();
8923 goto just_a_word;
8924
a0d0e21e 8925 case KEY_xor:
78cdf107
Z
8926 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8927 return REPORT(0);
6154021b 8928 pl_yylval.ival = OP_XOR;
a0d0e21e 8929 OPERATOR(OROP);
79072805 8930 }
49dc05e3 8931 }}
79072805 8932}
bf4acbe4
GS
8933#ifdef __SC__
8934#pragma segment Main
8935#endif
79072805 8936
3875fc11
FC
8937/*
8938 S_pending_ident
8939
8940 Looks up an identifier in the pad or in a package
8941
8942 Returns:
8943 PRIVATEREF if this is a lexical name.
8944 WORD if this belongs to a package.
8945
8946 Structure:
8947 if we're in a my declaration
8948 croak if they tried to say my($foo::bar)
8949 build the ops for a my() declaration
8950 if it's an access to a my() variable
8951 build ops for access to a my() variable
8952 if in a dq string, and they've said @foo and we can't find @foo
8953 warn
8954 build ops for a bareword
8955*/
8956
3f33d153
FC
8957static int
8958S_pending_ident(pTHX)
8eceec63 8959{
97aff369 8960 dVAR;
bbd11bfc 8961 PADOFFSET tmp = 0;
3f33d153 8962 const char pit = (char)pl_yylval.ival;
9bde8eb0
NC
8963 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8964 /* All routes through this function want to know if there is a colon. */
c099d646 8965 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63 8966
3f33d153
FC
8967 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8968 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8969
8970 /* if we're in a my(), we can't allow dynamics here.
8971 $foo'bar has already been turned into $foo::bar, so
8972 just check for colons.
8973
8974 if it's a legal name, the OP is a PADANY.
8975 */
8976 if (PL_in_my) {
8977 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8978 if (has_colon)
4bca4ee0 8979 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8eceec63 8980 "variable %s in \"our\"",
4bca4ee0 8981 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
bc9b26ca 8982 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8983 }
8984 else {
9bde8eb0 8985 if (has_colon)
58576270
BF
8986 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8987 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8988 UTF ? SVf_UTF8 : 0);
8eceec63 8989
3f33d153
FC
8990 pl_yylval.opval = newOP(OP_PADANY, 0);
8991 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
bc9b26ca 8992 UTF ? SVf_UTF8 : 0);
3f33d153 8993 return PRIVATEREF;
8eceec63
SC
8994 }
8995 }
8996
8997 /*
8998 build the ops for accesses to a my() variable.
8eceec63
SC
8999 */
9000
9bde8eb0 9001 if (!has_colon) {
8716503d 9002 if (!PL_in_my)
bc9b26ca
BF
9003 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
9004 UTF ? SVf_UTF8 : 0);
8716503d 9005 if (tmp != NOT_IN_PAD) {
8eceec63 9006 /* might be an "our" variable" */
00b1698f 9007 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 9008 /* build ops for a bareword */
b64e5050
AL
9009 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9010 HEK * const stashname = HvNAME_HEK(stash);
9011 SV * const sym = newSVhek(stashname);
396482e1 9012 sv_catpvs(sym, "::");
2a33114a 9013 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
3f33d153
FC
9014 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
9015 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
9016 if (pit != '&')
9017 gv_fetchsv(sym,
8eceec63
SC
9018 (PL_in_eval
9019 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 9020 : GV_ADDMULTI
8eceec63
SC
9021 ),
9022 ((PL_tokenbuf[0] == '$') ? SVt_PV
9023 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9024 : SVt_PVHV));
3f33d153 9025 return WORD;
8eceec63
SC
9026 }
9027
3f33d153
FC
9028 pl_yylval.opval = newOP(OP_PADANY, 0);
9029 pl_yylval.opval->op_targ = tmp;
9030 return PRIVATEREF;
8eceec63
SC
9031 }
9032 }
9033
9034 /*
9035 Whine if they've said @foo in a doublequoted string,
9036 and @foo isn't a variable we can find in the symbol
9037 table.
9038 */
d824713b
NC
9039 if (ckWARN(WARN_AMBIGUOUS) &&
9040 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
9041 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
9042 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 9043 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
9044 /* DO NOT warn for @- and @+ */
9045 && !( PL_tokenbuf[2] == '\0' &&
9046 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
9047 )
8eceec63
SC
9048 {
9049 /* Downgraded from fatal to warning 20000522 mjd */
d824713b 9050 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
b17a0679
FC
9051 "Possible unintended interpolation of %"UTF8f
9052 " in string",
9053 UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
8eceec63
SC
9054 }
9055 }
9056
9057 /* build ops for a bareword */
3f33d153
FC
9058 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
9059 newSVpvn_flags(PL_tokenbuf + 1,
0be4d16f
BF
9060 tokenbuf_len - 1,
9061 UTF ? SVf_UTF8 : 0 ));
3f33d153 9062 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
9063 if (pit != '&')
9064 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
9065 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
9066 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
9067 ((PL_tokenbuf[0] == '$') ? SVt_PV
9068 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
9069 : SVt_PVHV));
3f33d153 9070 return WORD;
8eceec63
SC
9071}
9072
76e3520e 9073STATIC void
c94115d8 9074S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 9075{
97aff369 9076 dVAR;
2f3197b3 9077
7918f24d
NC
9078 PERL_ARGS_ASSERT_CHECKCOMMA;
9079
d008e5eb 9080 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
9081 if (ckWARN(WARN_SYNTAX)) {
9082 int level = 1;
26ff0806 9083 const char *w;
d008e5eb
GS
9084 for (w = s+2; *w && level; w++) {
9085 if (*w == '(')
9086 ++level;
9087 else if (*w == ')')
9088 --level;
9089 }
888fea98
NC
9090 while (isSPACE(*w))
9091 ++w;
b1439985
RGS
9092 /* the list of chars below is for end of statements or
9093 * block / parens, boolean operators (&&, ||, //) and branch
9094 * constructs (or, and, if, until, unless, while, err, for).
9095 * Not a very solid hack... */
9096 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 9097 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 9098 "%s (...) interpreted as function",name);
d008e5eb 9099 }
2f3197b3 9100 }
3280af22 9101 while (s < PL_bufend && isSPACE(*s))
2f3197b3 9102 s++;
a687059c
LW
9103 if (*s == '(')
9104 s++;
3280af22 9105 while (s < PL_bufend && isSPACE(*s))
a687059c 9106 s++;
7e2040f0 9107 if (isIDFIRST_lazy_if(s,UTF)) {
d0fb66e4
BF
9108 const char * const w = s;
9109 s += UTF ? UTF8SKIP(s) : 1;
8a2bca12 9110 while (isWORDCHAR_lazy_if(s,UTF))
d0fb66e4 9111 s += UTF ? UTF8SKIP(s) : 1;
3280af22 9112 while (s < PL_bufend && isSPACE(*s))
a687059c 9113 s++;
e929a76b 9114 if (*s == ',') {
c94115d8 9115 GV* gv;
5458a98a 9116 if (keyword(w, s - w, 0))
e929a76b 9117 return;
c94115d8 9118
2e38bce1 9119 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 9120 if (gv && GvCVu(gv))
abbb3198 9121 return;
cea2e8a9 9122 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
9123 }
9124 }
9125}
9126
a1941760
DM
9127/* S_new_constant(): do any overload::constant lookup.
9128
9129 Either returns sv, or mortalizes/frees sv and returns a new SV*.
423cee85
JH
9130 Best used as sv=new_constant(..., sv, ...).
9131 If s, pv are NULL, calls subroutine with one argument,
107160e2
KW
9132 and <type> is used with error messages only.
9133 <type> is assumed to be well formed UTF-8 */
423cee85 9134
b3ac6de7 9135STATIC SV *
eb0d8d16
NC
9136S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9137 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 9138{
27da23d5 9139 dVAR; dSP;
fbb93542 9140 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 9141 SV *res;
eed484f9 9142 SV *errsv = NULL;
b3ac6de7
IZ
9143 SV **cvp;
9144 SV *cv, *typesv;
89e33a05 9145 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 9146
7918f24d 9147 PERL_ARGS_ASSERT_NEW_CONSTANT;
f374c70f
FC
9148 /* We assume that this is true: */
9149 if (*key == 'c') { assert (strEQ(key, "charnames")); }
bb4784f0 9150 assert(type || s);
7918f24d 9151
f8988b41 9152 /* charnames doesn't work well if there have been errors found */
f374c70f 9153 if (PL_error_count > 0 && *key == 'c')
14ca8ff4
FC
9154 {
9155 SvREFCNT_dec_NN(sv);
f8988b41 9156 return &PL_sv_undef;
14ca8ff4 9157 }
f8988b41 9158
5f7f7af5 9159 sv_2mortal(sv); /* Parent created it permanently */
fbb93542
KW
9160 if (!table
9161 || ! (PL_hints & HINT_LOCALIZE_HH)
9162 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9163 || ! SvOK(*cvp))
9164 {
5f7f7af5 9165 char *msg;
423cee85 9166
fbb93542
KW
9167 /* Here haven't found what we're looking for. If it is charnames,
9168 * perhaps it needs to be loaded. Try doing that before giving up */
f374c70f 9169 if (*key == 'c') {
fbb93542
KW
9170 Perl_load_module(aTHX_
9171 0,
9172 newSVpvs("_charnames"),
9173 /* version parameter; no need to specify it, as if
9174 * we get too early a version, will fail anyway,
9175 * not being able to find '_charnames' */
9176 NULL,
9177 newSVpvs(":full"),
9178 newSVpvs(":short"),
9179 NULL);
9180 SPAGAIN;
9181 table = GvHV(PL_hintgv);
9182 if (table
9183 && (PL_hints & HINT_LOCALIZE_HH)
9184 && (cvp = hv_fetch(table, key, keylen, FALSE))
9185 && SvOK(*cvp))
9186 {
9187 goto now_ok;
9188 }
9189 }
9190 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
5f7f7af5 9191 msg = Perl_form(aTHX_
bb4784f0
FC
9192 "Constant(%.*s) unknown",
9193 (int)(type ? typelen : len),
9194 (type ? type: s));
fbb93542
KW
9195 }
9196 else {
3918902d
KW
9197 why1 = "$^H{";
9198 why2 = key;
9199 why3 = "} is not defined";
9200 report:
f374c70f 9201 if (*key == 'c') {
5f7f7af5 9202 msg = Perl_form(aTHX_
bde9e88d
KW
9203 /* The +3 is for '\N{'; -4 for that, plus '}' */
9204 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
5f7f7af5 9205 );
90249f0a
KW
9206 }
9207 else {
5f7f7af5 9208 msg = Perl_form(aTHX_ "Constant(%.*s): %s%s%s",
bb4784f0
FC
9209 (int)(type ? typelen : len),
9210 (type ? type: s), why1, why2, why3);
90249f0a 9211 }
3918902d 9212 }
5f7f7af5
FC
9213 yyerror_pv(msg, UTF ? SVf_UTF8 : 0);
9214 return SvREFCNT_inc_simple_NN(sv);
423cee85 9215 }
fbb93542 9216now_ok:
b3ac6de7 9217 cv = *cvp;
423cee85 9218 if (!pv && s)
59cd0e26 9219 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 9220 if (type && pv)
59cd0e26 9221 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 9222 else
423cee85 9223 typesv = &PL_sv_undef;
4e553d73 9224
e788e7d3 9225 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
9226 ENTER ;
9227 SAVETMPS;
4e553d73 9228
423cee85 9229 PUSHMARK(SP) ;
a5845cb7 9230 EXTEND(sp, 3);
423cee85
JH
9231 if (pv)
9232 PUSHs(pv);
b3ac6de7 9233 PUSHs(sv);
423cee85
JH
9234 if (pv)
9235 PUSHs(typesv);
b3ac6de7 9236 PUTBACK;
423cee85 9237 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 9238
423cee85 9239 SPAGAIN ;
4e553d73 9240
423cee85 9241 /* Check the eval first */
eed484f9 9242 if (!PL_in_eval && ((errsv = ERRSV), SvTRUE_NN(errsv))) {
c06ecf4f
DD
9243 STRLEN errlen;
9244 const char * errstr;
eed484f9
DD
9245 sv_catpvs(errsv, "Propagated");
9246 errstr = SvPV_const(errsv, errlen);
c06ecf4f 9247 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
e1f15930 9248 (void)POPs;
ae5c22c1 9249 res = SvREFCNT_inc_simple_NN(sv);
423cee85
JH
9250 }
9251 else {
9252 res = POPs;
ae5c22c1 9253 SvREFCNT_inc_simple_void_NN(res);
423cee85 9254 }
4e553d73 9255
423cee85
JH
9256 PUTBACK ;
9257 FREETMPS ;
9258 LEAVE ;
b3ac6de7 9259 POPSTACK;
4e553d73 9260
b3ac6de7 9261 if (!SvOK(res)) {
423cee85
JH
9262 why1 = "Call to &{$^H{";
9263 why2 = key;
f0af216f 9264 why3 = "}} did not return a defined value";
423cee85 9265 sv = res;
5f7f7af5 9266 (void)sv_2mortal(sv);
423cee85 9267 goto report;
9b0e499b 9268 }
423cee85 9269
9b0e499b 9270 return res;
b3ac6de7 9271}
4e553d73 9272
07f72646
BF
9273PERL_STATIC_INLINE void
9274S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package, bool is_utf8) {
9275 dVAR;
9276 PERL_ARGS_ASSERT_PARSE_IDENT;
9277
9278 for (;;) {
9279 if (*d >= e)
9280 Perl_croak(aTHX_ "%s", ident_too_long);
32833930
BF
9281 if (is_utf8 && isIDFIRST_utf8((U8*)*s)) {
9282 /* The UTF-8 case must come first, otherwise things
9283 * like c\N{COMBINING TILDE} would start failing, as the
9284 * isWORDCHAR_A case below would gobble the 'c' up.
9285 */
9286
07f72646 9287 char *t = *s + UTF8SKIP(*s);
32833930 9288 while (isIDCONT_utf8((U8*)t))
07f72646
BF
9289 t += UTF8SKIP(t);
9290 if (*d + (t - *s) > e)
9291 Perl_croak(aTHX_ "%s", ident_too_long);
9292 Copy(*s, *d, t - *s, char);
9293 *d += t - *s;
9294 *s = t;
9295 }
32833930
BF
9296 else if ( isWORDCHAR_A(**s) ) {
9297 do {
9298 *(*d)++ = *(*s)++;
9299 } while isWORDCHAR_A(**s);
9300 }
07f72646
BF
9301 else if (allow_package && **s == '\'' && isIDFIRST_lazy_if(*s+1,is_utf8)) {
9302 *(*d)++ = ':';
9303 *(*d)++ = ':';
9304 (*s)++;
9305 }
9306 else if (allow_package && **s == ':' && (*s)[1] == ':'
9307 /* Disallow things like Foo::$bar. For the curious, this is
9308 * the code path that triggers the "Bad name after" warning
9309 * when looking for barewords.
9310 */
9311 && (*s)[2] != '$') {
9312 *(*d)++ = *(*s)++;
9313 *(*d)++ = *(*s)++;
9314 }
9315 else
9316 break;
9317 }
9318 return;
9319}
9320
d0a148a6
NC
9321/* Returns a NUL terminated string, with the length of the string written to
9322 *slp
9323 */
76e3520e 9324STATIC char *
5aaab254 9325S_scan_word(pTHX_ char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 9326{
97aff369 9327 dVAR;
eb578fdb
KW
9328 char *d = dest;
9329 char * const e = d + destlen - 3; /* two-character token, ending NUL */
07f72646 9330 bool is_utf8 = cBOOL(UTF);
7918f24d
NC
9331
9332 PERL_ARGS_ASSERT_SCAN_WORD;
9333
07f72646
BF
9334 parse_ident(&s, &d, e, allow_package, is_utf8);
9335 *d = '\0';
9336 *slp = d - dest;
9337 return s;
378cc40b
LW
9338}
9339
76e3520e 9340STATIC char *
5aaab254 9341S_scan_ident(pTHX_ char *s, const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 9342{
97aff369 9343 dVAR;
6136c704 9344 char *bracket = NULL;
748a9306 9345 char funny = *s++;
eb578fdb
KW
9346 char *d = dest;
9347 char * const e = d + destlen - 3; /* two-character token, ending NUL */
07f72646 9348 bool is_utf8 = cBOOL(UTF);
378cc40b 9349
7918f24d
NC
9350 PERL_ARGS_ASSERT_SCAN_IDENT;
9351
a0d0e21e 9352 if (isSPACE(*s))
29595ff2 9353 s = PEEKSPACE(s);
de3bb511 9354 if (isDIGIT(*s)) {
8903cb82 9355 while (isDIGIT(*s)) {
9356 if (d >= e)
e5cc0c0f 9357 Perl_croak(aTHX_ "%s", ident_too_long);
378cc40b 9358 *d++ = *s++;
8903cb82 9359 }
378cc40b
LW
9360 }
9361 else {
07f72646 9362 parse_ident(&s, &d, e, 1, is_utf8);
378cc40b
LW
9363 }
9364 *d = '\0';
9365 d = dest;
79072805 9366 if (*d) {
c5b6cc8c
NC
9367 /* Either a digit variable, or parse_ident() found an identifier
9368 (anything valid as a bareword), so job done and return. */
3280af22
NIS
9369 if (PL_lex_state != LEX_NORMAL)
9370 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 9371 return s;
378cc40b 9372 }
748a9306 9373 if (*s == '$' && s[1] &&
0a520fce
BF
9374 (isIDFIRST_lazy_if(s+1,is_utf8)
9375 || isDIGIT_A((U8)s[1])
9376 || s[1] == '$'
9377 || s[1] == '{'
9378 || strnEQ(s+1,"::",2)) )
5cd24f17 9379 {
c5b6cc8c
NC
9380 /* Dereferencing a value in a scalar variable.
9381 The alternatives are different syntaxes for a scalar variable.
9382 Using ' as a leading package separator isn't allowed. :: is. */
4810e5ec 9383 return s;
5cd24f17 9384 }
c5b6cc8c 9385 /* Handle the opening { of @{...}, &{...}, *{...}, %{...}, ${...} */
79072805
LW
9386 if (*s == '{') {
9387 bracket = s;
9388 s++;
a21046ad
BF
9389 while (s < send && SPACE_OR_TAB(*s))
9390 s++;
79072805 9391 }
32833930 9392
850a35c9
BF
9393#define VALID_LEN_ONE_IDENT(d, u) (isPUNCT_A((U8)(d)) \
9394 || isCNTRL_A((U8)(d)) \
9395 || isDIGIT_A((U8)(d)) \
9396 || (!(u) && !UTF8_IS_INVARIANT((U8)(d))))
32833930 9397 if (s < send
850a35c9 9398 && (isIDFIRST_lazy_if(s, is_utf8) || VALID_LEN_ONE_IDENT(*s, is_utf8)))
32833930 9399 {
07f72646 9400 if (is_utf8) {
204e6232
BF
9401 const STRLEN skip = UTF8SKIP(s);
9402 STRLEN i;
9403 d[skip] = '\0';
9404 for ( i = 0; i < skip; i++ )
9405 d[i] = *s++;
9406 }
9407 else {
9408 *d = *s++;
9409 d[1] = '\0';
9410 }
9411 }
c5b6cc8c 9412 /* Convert $^F, ${^F} and the ^F of ${^FOO} to control characters */
2b92dfce 9413 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 9414 *d = toCTRL(*s);
9415 s++;
de3bb511 9416 }
c5b6cc8c
NC
9417 /* Warn about ambiguous code after unary operators if {...} notation isn't
9418 used. There's no difference in ambiguity; it's merely a heuristic
9419 about when not to warn. */
fbdd83da
DIM
9420 else if (ck_uni && !bracket)
9421 check_uni();
79072805 9422 if (bracket) {
c5b6cc8c 9423 /* If we were processing {...} notation then... */
07f72646 9424 if (isIDFIRST_lazy_if(d,is_utf8)) {
c5b6cc8c
NC
9425 /* if it starts as a valid identifier, assume that it is one.
9426 (the later check for } being at the expected point will trap
9427 cases where this doesn't pan out.) */
07f72646
BF
9428 d += is_utf8 ? UTF8SKIP(d) : 1;
9429 parse_ident(&s, &d, e, 1, is_utf8);
79072805 9430 *d = '\0';
c35e046a
AL
9431 while (s < send && SPACE_OR_TAB(*s))
9432 s++;
ff68c719 9433 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
c5b6cc8c 9434 /* ${foo[0]} and ${foo{bar}} notation. */
5458a98a 9435 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
9436 const char * const brack =
9437 (const char *)
9438 ((*s == '[') ? "[...]" : "{...}");
e850844c 9439 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 9440 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 9441 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
9442 funny, dest, brack, funny, dest, brack);
9443 }
79072805 9444 bracket++;
a0be28da 9445 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 9446 PL_lex_allbrackets++;
79072805
LW
9447 return s;
9448 }
4e553d73
NIS
9449 }
9450 /* Handle extended ${^Foo} variables
2b92dfce 9451 * 1999-02-27 mjd-perl-patch@plover.com */
aecf5a4b 9452 else if (! isPRINT(*d) /* isCNTRL(d), plus all non-ASCII */
0eb30aeb 9453 && isWORDCHAR(*s))
2b92dfce
GS
9454 {
9455 d++;
0eb30aeb 9456 while (isWORDCHAR(*s) && d < e) {
2b92dfce
GS
9457 *d++ = *s++;
9458 }
9459 if (d >= e)
e5cc0c0f 9460 Perl_croak(aTHX_ "%s", ident_too_long);
2b92dfce 9461 *d = '\0';
79072805 9462 }
a21046ad
BF
9463
9464 while (s < send && SPACE_OR_TAB(*s))
9465 s++;
9466
c5b6cc8c
NC
9467 /* Expect to find a closing } after consuming any trailing whitespace.
9468 */
79072805
LW
9469 if (*s == '}') {
9470 s++;
7df0d042 9471 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 9472 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
9473 PL_expect = XREF;
9474 }
d008e5eb 9475 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 9476 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241 9477 (keyword(dest, d - dest, 0)
07f72646 9478 || get_cvn_flags(dest, d - dest, is_utf8 ? SVf_UTF8 : 0)))
d008e5eb 9479 {
5c66c3dd 9480 SV *tmp = newSVpvn_flags( dest, d - dest,
07f72646 9481 SVs_TEMP | (is_utf8 ? SVf_UTF8 : 0) );
c35e046a
AL
9482 if (funny == '#')
9483 funny = '@';
9014280d 9484 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5c66c3dd
BF
9485 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9486 funny, tmp, funny, tmp);
d008e5eb
GS
9487 }
9488 }
79072805
LW
9489 }
9490 else {
c5b6cc8c
NC
9491 /* Didn't find the closing } at the point we expected, so restore
9492 state such that the next thing to process is the opening { and */
79072805 9493 s = bracket; /* let the parser handle it */
93a17b20 9494 *dest = '\0';
79072805
LW
9495 }
9496 }
3280af22
NIS
9497 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9498 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
9499 return s;
9500}
9501
858a358b 9502static bool
3955e1a9 9503S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
9504
9505 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9506 * the parse starting at 's', based on the subset that are valid in this
9507 * context input to this routine in 'valid_flags'. Advances s. Returns
96f3bfda
KW
9508 * TRUE if the input should be treated as a valid flag, so the next char
9509 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9510 * first call on the current regex. This routine will set it to any
9511 * charset modifier found. The caller shouldn't change it. This way,
9512 * another charset modifier encountered in the parse can be detected as an
9513 * error, as we have decided to allow only one */
858a358b
KW
9514
9515 const char c = **s;
84159251 9516 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
94b03d7d 9517
84159251 9518 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
8a2bca12 9519 if (isWORDCHAR_lazy_if(*s, UTF)) {
4f8dbb2d 9520 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
84159251
BF
9521 UTF ? SVf_UTF8 : 0);
9522 (*s) += charlen;
96f3bfda
KW
9523 /* Pretend that it worked, so will continue processing before
9524 * dieing */
0da72d5e 9525 return TRUE;
858a358b
KW
9526 }
9527 return FALSE;
9528 }
9529
9530 switch (c) {
94b03d7d 9531
858a358b
KW
9532 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9533 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9534 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9535 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9536 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9537 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d 9538 case LOCALE_PAT_MOD:
3955e1a9
KW
9539 if (*charset) {
9540 goto multiple_charsets;
9541 }
94b03d7d 9542 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9543 *charset = c;
94b03d7d
KW
9544 break;
9545 case UNICODE_PAT_MOD:
3955e1a9
KW
9546 if (*charset) {
9547 goto multiple_charsets;
9548 }
94b03d7d 9549 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9550 *charset = c;
94b03d7d
KW
9551 break;
9552 case ASCII_RESTRICT_PAT_MOD:
ff3f26d2 9553 if (! *charset) {
94b03d7d
KW
9554 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9555 }
ff3f26d2
KW
9556 else {
9557
9558 /* Error if previous modifier wasn't an 'a', but if it was, see
9559 * if, and accept, a second occurrence (only) */
9560 if (*charset != 'a'
9561 || get_regex_charset(*pmfl)
9562 != REGEX_ASCII_RESTRICTED_CHARSET)
9563 {
9564 goto multiple_charsets;
9565 }
9566 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9567 }
9568 *charset = c;
94b03d7d
KW
9569 break;
9570 case DEPENDS_PAT_MOD:
3955e1a9
KW
9571 if (*charset) {
9572 goto multiple_charsets;
9573 }
94b03d7d 9574 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9575 *charset = c;
94b03d7d 9576 break;
879d0c72 9577 }
94b03d7d 9578
858a358b
KW
9579 (*s)++;
9580 return TRUE;
94b03d7d 9581
3955e1a9
KW
9582 multiple_charsets:
9583 if (*charset != c) {
9584 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9585 }
ff3f26d2
KW
9586 else if (c == 'a') {
9587 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9588 }
3955e1a9
KW
9589 else {
9590 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9591 }
9592
9593 /* Pretend that it worked, so will continue processing before dieing */
9594 (*s)++;
9595 return TRUE;
879d0c72
NC
9596}
9597
76e3520e 9598STATIC char *
cea2e8a9 9599S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9600{
97aff369 9601 dVAR;
79072805 9602 PMOP *pm;
a1941760 9603 char *s;
10edeb5d 9604 const char * const valid_flags =
a20207d7 9605 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9606 char charset = '\0'; /* character set modifier */
5db06880
NC
9607#ifdef PERL_MAD
9608 char *modstart;
9609#endif
9610
7918f24d 9611 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9612
a1941760
DM
9613 s = scan_str(start,!!PL_madskills,FALSE, (PL_in_eval & EVAL_RE_REPARSING),
9614 TRUE /* look for escaped bracketed metas */ );
9615
25c09cbf 9616 if (!s) {
6136c704 9617 const char * const delimiter = skipspace(start);
10edeb5d
JH
9618 Perl_croak(aTHX_
9619 (const char *)
9620 (*delimiter == '?'
9621 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9622 : "Search pattern not terminated" ));
25c09cbf 9623 }
bbce6d69 9624
8782bef2 9625 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9626 if (PL_multi_open == '?') {
9627 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9628 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9629
9630 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9631 allows us to restrict the list needed by reset to just the ??
9632 matches. */
9633 assert(type != OP_TRANS);
9634 if (PL_curstash) {
daba3364 9635 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9636 U32 elements;
9637 if (!mg) {
daba3364 9638 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9639 0);
9640 }
9641 elements = mg->mg_len / sizeof(PMOP**);
9642 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9643 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9644 mg->mg_len = elements * sizeof(PMOP**);
9645 PmopSTASH_set(pm,PL_curstash);
9646 }
9647 }
5db06880
NC
9648#ifdef PERL_MAD
9649 modstart = s;
9650#endif
d63c20f2
DM
9651
9652 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9653 * anon CV. False positives like qr/[(?{]/ are harmless */
9654
9655 if (type == OP_QR) {
6f635923
DM
9656 STRLEN len;
9657 char *e, *p = SvPV(PL_lex_stuff, len);
9658 e = p + len;
9659 for (; p < e; p++) {
d63c20f2
DM
9660 if (p[0] == '(' && p[1] == '?'
9661 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9662 {
9663 pm->op_pmflags |= PMf_HAS_CV;
9664 break;
9665 }
9666 }
6f635923 9667 pm->op_pmflags |= PMf_IS_QR;
d63c20f2
DM
9668 }
9669
3955e1a9 9670 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9671#ifdef PERL_MAD
9672 if (PL_madskills && modstart != s) {
9673 SV* tmptoken = newSVpvn(modstart, s - modstart);
9674 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9675 }
9676#endif
4ac733c9 9677 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9678 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9679 {
a2a5de95
NC
9680 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9681 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9682 }
9683
3280af22 9684 PL_lex_op = (OP*)pm;
6154021b 9685 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9686 return s;
9687}
9688
76e3520e 9689STATIC char *
cea2e8a9 9690S_scan_subst(pTHX_ char *start)
79072805 9691{
27da23d5 9692 dVAR;
22594288 9693 char *s;
eb578fdb 9694 PMOP *pm;
4fdae800 9695 I32 first_start;
79072805 9696 I32 es = 0;
3955e1a9 9697 char charset = '\0'; /* character set modifier */
5db06880
NC
9698#ifdef PERL_MAD
9699 char *modstart;
9700#endif
79072805 9701
7918f24d
NC
9702 PERL_ARGS_ASSERT_SCAN_SUBST;
9703
6154021b 9704 pl_yylval.ival = OP_NULL;
79072805 9705
4d68ffa0
KW
9706 s = scan_str(start,!!PL_madskills,FALSE,FALSE,
9707 TRUE /* look for escaped bracketed metas */ );
79072805 9708
37fd879b 9709 if (!s)
cea2e8a9 9710 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9711
3280af22 9712 if (s[-1] == PL_multi_open)
79072805 9713 s--;
5db06880
NC
9714#ifdef PERL_MAD
9715 if (PL_madskills) {
cd81e915
NC
9716 CURMAD('q', PL_thisopen);
9717 CURMAD('_', PL_thiswhite);
9718 CURMAD('E', PL_thisstuff);
9719 CURMAD('Q', PL_thisclose);
9720 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9721 }
9722#endif
79072805 9723
3280af22 9724 first_start = PL_multi_start;
4d68ffa0 9725 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 9726 if (!s) {
37fd879b 9727 if (PL_lex_stuff) {
3280af22 9728 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9729 PL_lex_stuff = NULL;
37fd879b 9730 }
cea2e8a9 9731 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9732 }
3280af22 9733 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9734
79072805 9735 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9736
9737#ifdef PERL_MAD
9738 if (PL_madskills) {
cd81e915
NC
9739 CURMAD('z', PL_thisopen);
9740 CURMAD('R', PL_thisstuff);
9741 CURMAD('Z', PL_thisclose);
5db06880
NC
9742 }
9743 modstart = s;
9744#endif
9745
48c036b1 9746 while (*s) {
a20207d7 9747 if (*s == EXEC_PAT_MOD) {
a687059c 9748 s++;
2f3197b3 9749 es++;
a687059c 9750 }
3955e1a9
KW
9751 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9752 {
48c036b1 9753 break;
aa78b661 9754 }
378cc40b 9755 }
79072805 9756
5db06880
NC
9757#ifdef PERL_MAD
9758 if (PL_madskills) {
9759 if (modstart != s)
9760 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9761 append_madprops(PL_thismad, (OP*)pm, 0);
9762 PL_thismad = 0;
5db06880
NC
9763 }
9764#endif
a2a5de95
NC
9765 if ((pm->op_pmflags & PMf_CONTINUE)) {
9766 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9767 }
9768
79072805 9769 if (es) {
6136c704
AL
9770 SV * const repl = newSVpvs("");
9771
0244c3a4 9772 PL_multi_end = 0;
79072805 9773 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9774 while (es-- > 0) {
9775 if (es)
9776 sv_catpvs(repl, "eval ");
9777 else
9778 sv_catpvs(repl, "do ");
9779 }
6f43d98f 9780 sv_catpvs(repl, "{");
7cc34111 9781 sv_catsv(repl, PL_sublex_info.repl);
9badc361 9782 sv_catpvs(repl, "}");
25da4f38 9783 SvEVALED_on(repl);
7cc34111
FC
9784 SvREFCNT_dec(PL_sublex_info.repl);
9785 PL_sublex_info.repl = repl;
378cc40b 9786 }
79072805 9787
3280af22 9788 PL_lex_op = (OP*)pm;
6154021b 9789 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9790 return s;
9791}
9792
76e3520e 9793STATIC char *
cea2e8a9 9794S_scan_trans(pTHX_ char *start)
378cc40b 9795{
97aff369 9796 dVAR;
eb578fdb 9797 char* s;
11343788 9798 OP *o;
b84c11c8
NC
9799 U8 squash;
9800 U8 del;
9801 U8 complement;
bb16bae8 9802 bool nondestruct = 0;
5db06880
NC
9803#ifdef PERL_MAD
9804 char *modstart;
9805#endif
79072805 9806
7918f24d
NC
9807 PERL_ARGS_ASSERT_SCAN_TRANS;
9808
6154021b 9809 pl_yylval.ival = OP_NULL;
79072805 9810
4d68ffa0 9811 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
37fd879b 9812 if (!s)
cea2e8a9 9813 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9814
3280af22 9815 if (s[-1] == PL_multi_open)
2f3197b3 9816 s--;
5db06880
NC
9817#ifdef PERL_MAD
9818 if (PL_madskills) {
cd81e915
NC
9819 CURMAD('q', PL_thisopen);
9820 CURMAD('_', PL_thiswhite);
9821 CURMAD('E', PL_thisstuff);
9822 CURMAD('Q', PL_thisclose);
9823 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9824 }
9825#endif
2f3197b3 9826
4d68ffa0 9827 s = scan_str(s,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 9828 if (!s) {
37fd879b 9829 if (PL_lex_stuff) {
3280af22 9830 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9831 PL_lex_stuff = NULL;
37fd879b 9832 }
cea2e8a9 9833 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9834 }
5db06880 9835 if (PL_madskills) {
cd81e915
NC
9836 CURMAD('z', PL_thisopen);
9837 CURMAD('R', PL_thisstuff);
9838 CURMAD('Z', PL_thisclose);
5db06880 9839 }
79072805 9840
a0ed51b3 9841 complement = del = squash = 0;
5db06880
NC
9842#ifdef PERL_MAD
9843 modstart = s;
9844#endif
7a1e2023
NC
9845 while (1) {
9846 switch (*s) {
9847 case 'c':
79072805 9848 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9849 break;
9850 case 'd':
a0ed51b3 9851 del = OPpTRANS_DELETE;
7a1e2023
NC
9852 break;
9853 case 's':
79072805 9854 squash = OPpTRANS_SQUASH;
7a1e2023 9855 break;
bb16bae8
FC
9856 case 'r':
9857 nondestruct = 1;
9858 break;
7a1e2023
NC
9859 default:
9860 goto no_more;
9861 }
395c3793
LW
9862 s++;
9863 }
7a1e2023 9864 no_more:
8973db79 9865
9100eeb1 9866 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
59f00321
RGS
9867 o->op_private &= ~OPpTRANS_ALL;
9868 o->op_private |= del|squash|complement|
7948272d 9869 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7cc34111 9870 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
79072805 9871
3280af22 9872 PL_lex_op = o;
bb16bae8 9873 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9874
9875#ifdef PERL_MAD
9876 if (PL_madskills) {
9877 if (modstart != s)
9878 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9879 append_madprops(PL_thismad, o, 0);
9880 PL_thismad = 0;
5db06880
NC
9881 }
9882#endif
9883
79072805
LW
9884 return s;
9885}
9886
5097bf9b
FC
9887/* scan_heredoc
9888 Takes a pointer to the first < in <<FOO.
9889 Returns a pointer to the byte following <<FOO.
9890
9891 This function scans a heredoc, which involves different methods
9892 depending on whether we are in a string eval, quoted construct, etc.
9893 This is because PL_linestr could containing a single line of input, or
9894 a whole string being evalled, or the contents of the current quote-
9895 like operator.
9896
19bbc0d7
FC
9897 The two basic methods are:
9898 - Steal lines from the input stream
9899 - Scan the heredoc in PL_linestr and remove it therefrom
9900
9901 In a file scope or filtered eval, the first method is used; in a
9902 string eval, the second.
9903
9904 In a quote-like operator, we have to choose between the two,
9905 depending on where we can find a newline. We peek into outer lex-
9906 ing scopes until we find one with a newline in it. If we reach the
9907 outermost lexing scope and it is a file, we use the stream method.
9908 Otherwise it is treated as an eval.
5097bf9b
FC
9909*/
9910
76e3520e 9911STATIC char *
5aaab254 9912S_scan_heredoc(pTHX_ char *s)
79072805 9913{
97aff369 9914 dVAR;
79072805
LW
9915 I32 op_type = OP_SCALAR;
9916 I32 len;
9917 SV *tmpstr;
9918 char term;
eb578fdb
KW
9919 char *d;
9920 char *e;
4633a7c4 9921 char *peek;
5097bf9b 9922 const bool infile = PL_rsfp || PL_parser->filtered;
78a635de 9923 LEXSHARED *shared = PL_parser->lex_shared;
5db06880
NC
9924#ifdef PERL_MAD
9925 I32 stuffstart = s - SvPVX(PL_linestr);
9926 char *tstart;
9927
cd81e915 9928 PL_realtokenstart = -1;
5db06880 9929#endif
79072805 9930
7918f24d
NC
9931 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9932
79072805 9933 s += 2;
5097bf9b 9934 d = PL_tokenbuf + 1;
3280af22 9935 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
5097bf9b 9936 *PL_tokenbuf = '\n';
c35e046a
AL
9937 peek = s;
9938 while (SPACE_OR_TAB(*peek))
9939 peek++;
3792a11b 9940 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9941 s = peek;
79072805 9942 term = *s++;
3280af22 9943 s = delimcpy(d, e, s, PL_bufend, term, &len);
6f2d7fc9
FC
9944 if (s == PL_bufend)
9945 Perl_croak(aTHX_ "Unterminated delimiter for here document");
fc36a67e 9946 d += len;
6f2d7fc9 9947 s++;
79072805
LW
9948 }
9949 else {
9950 if (*s == '\\')
458391bd 9951 /* <<\FOO is equivalent to <<'FOO' */
79072805
LW
9952 s++, term = '\'';
9953 else
9954 term = '"';
8a2bca12 9955 if (!isWORDCHAR_lazy_if(s,UTF))
8ab8f082 9956 deprecate("bare << to mean <<\"\"");
8a2bca12 9957 for (; isWORDCHAR_lazy_if(s,UTF); s++) {
fc36a67e 9958 if (d < e)
9959 *d++ = *s;
9960 }
9961 }
3280af22 9962 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9963 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9964 *d++ = '\n';
9965 *d = '\0';
3280af22 9966 len = d - PL_tokenbuf;
5db06880
NC
9967
9968#ifdef PERL_MAD
9969 if (PL_madskills) {
5097bf9b
FC
9970 tstart = PL_tokenbuf + 1;
9971 PL_thisclose = newSVpvn(tstart, len - 1);
5db06880 9972 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9973 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9974 stuffstart = s - SvPVX(PL_linestr);
9975 }
9976#endif
6a27c188 9977#ifndef PERL_STRICT_CR
f63a84b2
LW
9978 d = strchr(s, '\r');
9979 if (d) {
b464bac0 9980 char * const olds = s;
f63a84b2 9981 s = d;
3280af22 9982 while (s < PL_bufend) {
f63a84b2
LW
9983 if (*s == '\r') {
9984 *d++ = '\n';
9985 if (*++s == '\n')
9986 s++;
9987 }
9988 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9989 *d++ = *s++;
9990 s++;
9991 }
9992 else
9993 *d++ = *s++;
9994 }
9995 *d = '\0';
3280af22 9996 PL_bufend = d;
95a20fc0 9997 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9998 s = olds;
9999 }
10000#endif
5db06880
NC
10001#ifdef PERL_MAD
10002 if (PL_madskills) {
10003 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10004 if (PL_thisstuff)
10005 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 10006 else
cd81e915 10007 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880 10008 }
748a9306 10009
5db06880 10010 stuffstart = s - SvPVX(PL_linestr);
5db06880
NC
10011#endif
10012
7d0a29fe
NC
10013 tmpstr = newSV_type(SVt_PVIV);
10014 SvGROW(tmpstr, 80);
748a9306 10015 if (term == '\'') {
79072805 10016 op_type = OP_CONST;
45977657 10017 SvIV_set(tmpstr, -1);
748a9306
LW
10018 }
10019 else if (term == '`') {
79072805 10020 op_type = OP_BACKTICK;
45977657 10021 SvIV_set(tmpstr, '\\');
748a9306 10022 }
79072805 10023
78a635de 10024 PL_multi_start = CopLINE(PL_curcop) + 1;
3280af22 10025 PL_multi_open = PL_multi_close = '<';
19bbc0d7 10026 /* inside a string eval or quote-like operator */
4efe39d2 10027 if (!infile || PL_lex_inwhat) {
60f40a38 10028 SV *linestr;
3526bd3e 10029 char *bufend;
074b1c59 10030 char * const olds = s;
d37427bc 10031 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
19bbc0d7
FC
10032 /* These two fields are not set until an inner lexing scope is
10033 entered. But we need them set here. */
4efe39d2
FC
10034 shared->ls_bufptr = s;
10035 shared->ls_linestr = PL_linestr;
10036 if (PL_lex_inwhat)
10037 /* Look for a newline. If the current buffer does not have one,
10038 peek into the line buffer of the parent lexing scope, going
10039 up as many levels as necessary to find one with a newline
10040 after bufptr.
10041 */
10042 while (!(s = (char *)memchr(
10043 (void *)shared->ls_bufptr, '\n',
10044 SvEND(shared->ls_linestr)-shared->ls_bufptr
10045 ))) {
60f40a38 10046 shared = shared->ls_prev;
f68f7dc1
FC
10047 /* shared is only null if we have gone beyond the outermost
10048 lexing scope. In a file, we will have broken out of the
10049 loop in the previous iteration. In an eval, the string buf-
63ce3107 10050 fer ends with "\n;", so the while condition above will have
f68f7dc1
FC
10051 evaluated to false. So shared can never be null. */
10052 assert(shared);
60f40a38
FC
10053 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
10054 most lexing scope. In a file, shared->ls_linestr at that
10055 level is just one line, so there is no body to steal. */
10056 if (infile && !shared->ls_prev) {
074b1c59 10057 s = olds;
99bd9d90
FC
10058 goto streaming;
10059 }
4efe39d2
FC
10060 }
10061 else { /* eval */
10062 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
10063 assert(s);
10064 }
60f40a38
FC
10065 linestr = shared->ls_linestr;
10066 bufend = SvEND(linestr);
0244c3a4 10067 d = s;
6bf48f47 10068 while (s < bufend - len + 1 &&
621baac6 10069 memNE(s,PL_tokenbuf,len) ) {
0244c3a4 10070 if (*s++ == '\n')
78a635de 10071 ++shared->herelines;
0244c3a4 10072 }
6bf48f47 10073 if (s >= bufend - len + 1) {
932d0cf1 10074 goto interminable;
0244c3a4 10075 }
3328ab5a 10076 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
10077#ifdef PERL_MAD
10078 if (PL_madskills) {
cd81e915
NC
10079 if (PL_thisstuff)
10080 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 10081 else
cd81e915 10082 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
10083 stuffstart = s - SvPVX(PL_linestr);
10084 }
10085#endif
79072805 10086 s += len - 1;
d794b522 10087 /* the preceding stmt passes a newline */
78a635de 10088 shared->herelines++;
49d8d3a1 10089
db444266
FC
10090 /* s now points to the newline after the heredoc terminator.
10091 d points to the newline before the body of the heredoc.
10092 */
19bbc0d7
FC
10093
10094 /* We are going to modify linestr in place here, so set
10095 aside copies of the string if necessary for re-evals or
10096 (caller $n)[6]. */
a91428a4 10097 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
3328ab5a
FC
10098 check shared->re_eval_str. */
10099 if (shared->re_eval_start || shared->re_eval_str) {
db444266 10100 /* Set aside the rest of the regexp */
3328ab5a
FC
10101 if (!shared->re_eval_str)
10102 shared->re_eval_str =
10103 newSVpvn(shared->re_eval_start,
4efe39d2 10104 bufend - shared->re_eval_start);
3328ab5a 10105 shared->re_eval_start -= s-d;
db444266 10106 }
d4fe4ada
RU
10107 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
10108 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
10109 cx->blk_eval.cur_text == linestr)
10110 {
4efe39d2 10111 cx->blk_eval.cur_text = newSVsv(linestr);
d37427bc
FC
10112 SvSCREAM_on(cx->blk_eval.cur_text);
10113 }
db444266 10114 /* Copy everything from s onwards back to d. */
4efe39d2
FC
10115 Move(s,d,bufend-s + 1,char);
10116 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
19bbc0d7
FC
10117 /* Setting PL_bufend only applies when we have not dug deeper
10118 into other scopes, because sublex_done sets PL_bufend to
10119 SvEND(PL_linestr). */
4efe39d2 10120 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
db444266 10121 s = olds;
79072805
LW
10122 }
10123 else
a7922135 10124 {
3328ab5a 10125 SV *linestr_save;
a7922135
FC
10126 streaming:
10127 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
10128 term = PL_tokenbuf[1];
10129 len--;
3328ab5a 10130 linestr_save = PL_linestr; /* must restore this afterwards */
074b1c59 10131 d = s; /* and this */
3328ab5a 10132 PL_linestr = newSVpvs("");
074b1c59
FC
10133 PL_bufend = SvPVX(PL_linestr);
10134 while (1) {
5db06880
NC
10135#ifdef PERL_MAD
10136 if (PL_madskills) {
10137 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10138 if (PL_thisstuff)
10139 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10140 else
cd81e915 10141 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10142 }
10143#endif
074b1c59 10144 PL_bufptr = PL_bufend;
d794b522 10145 CopLINE_set(PL_curcop,
78a635de 10146 PL_multi_start + shared->herelines);
112d1284
FC
10147 if (!lex_next_chunk(LEX_NO_TERM)
10148 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
3328ab5a 10149 SvREFCNT_dec(linestr_save);
932d0cf1 10150 goto interminable;
79072805 10151 }
78a635de 10152 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
112d1284 10153 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
d8fe30ad
NC
10154 s = lex_grow_linestr(SvLEN(PL_linestr) + 3);
10155 /* ^That should be enough to avoid this needing to grow: */
112d1284 10156 sv_catpvs(PL_linestr, "\n\0");
d8fe30ad
NC
10157 assert(s == SvPVX(PL_linestr));
10158 PL_bufend = SvEND(PL_linestr);
112d1284 10159 }
f0e67a1d 10160 s = PL_bufptr;
5db06880
NC
10161#ifdef PERL_MAD
10162 stuffstart = s - SvPVX(PL_linestr);
10163#endif
78a635de 10164 shared->herelines++;
bd61b366 10165 PL_last_lop = PL_last_uni = NULL;
6a27c188 10166#ifndef PERL_STRICT_CR
3280af22 10167 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
10168 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10169 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 10170 {
3280af22
NIS
10171 PL_bufend[-2] = '\n';
10172 PL_bufend--;
95a20fc0 10173 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 10174 }
3280af22
NIS
10175 else if (PL_bufend[-1] == '\r')
10176 PL_bufend[-1] = '\n';
f63a84b2 10177 }
3280af22
NIS
10178 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10179 PL_bufend[-1] = '\n';
f63a84b2 10180#endif
5097bf9b 10181 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
3328ab5a
FC
10182 SvREFCNT_dec(PL_linestr);
10183 PL_linestr = linestr_save;
10184 PL_linestart = SvPVX(linestr_save);
3280af22 10185 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3328ab5a 10186 s = d;
074b1c59 10187 break;
79072805
LW
10188 }
10189 else {
3280af22 10190 sv_catsv(tmpstr,PL_linestr);
395c3793 10191 }
a7922135 10192 }
395c3793 10193 }
57843af0 10194 PL_multi_end = CopLINE(PL_curcop);
79072805 10195 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 10196 SvPV_shrink_to_cur(tmpstr);
79072805 10197 }
2f31ce75 10198 if (!IN_BYTES) {
95a20fc0 10199 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
10200 SvUTF8_on(tmpstr);
10201 else if (PL_encoding)
10202 sv_recode_to_utf8(tmpstr, PL_encoding);
10203 }
3280af22 10204 PL_lex_stuff = tmpstr;
6154021b 10205 pl_yylval.ival = op_type;
79072805 10206 return s;
932d0cf1
FC
10207
10208 interminable:
932d0cf1
FC
10209 SvREFCNT_dec(tmpstr);
10210 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10211 missingterm(PL_tokenbuf + 1);
79072805
LW
10212}
10213
02aa26ce
NT
10214/* scan_inputsymbol
10215 takes: current position in input buffer
10216 returns: new position in input buffer
6154021b 10217 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
10218
10219 This code handles:
10220
10221 <> read from ARGV
10222 <FH> read from filehandle
10223 <pkg::FH> read from package qualified filehandle
10224 <pkg'FH> read from package qualified filehandle
10225 <$fh> read from filehandle in $fh
10226 <*.h> filename glob
10227
10228*/
10229
76e3520e 10230STATIC char *
cea2e8a9 10231S_scan_inputsymbol(pTHX_ char *start)
79072805 10232{
97aff369 10233 dVAR;
eb578fdb 10234 char *s = start; /* current position in buffer */
1b420867 10235 char *end;
79072805 10236 I32 len;
6136c704
AL
10237 char *d = PL_tokenbuf; /* start of temp holding space */
10238 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10239
7918f24d
NC
10240 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10241
1b420867
GS
10242 end = strchr(s, '\n');
10243 if (!end)
10244 end = PL_bufend;
10245 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
10246
10247 /* die if we didn't have space for the contents of the <>,
1b420867 10248 or if it didn't end, or if we see a newline
02aa26ce
NT
10249 */
10250
bb7a0f54 10251 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 10252 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 10253 if (s >= end)
cea2e8a9 10254 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 10255
fc36a67e 10256 s++;
02aa26ce
NT
10257
10258 /* check for <$fh>
10259 Remember, only scalar variables are interpreted as filehandles by
10260 this code. Anything more complex (e.g., <$fh{$num}>) will be
10261 treated as a glob() call.
10262 This code makes use of the fact that except for the $ at the front,
10263 a scalar variable and a filehandle look the same.
10264 */
4633a7c4 10265 if (*d == '$' && d[1]) d++;
02aa26ce
NT
10266
10267 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
8a2bca12 10268 while (*d && (isWORDCHAR_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 10269 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
10270
10271 /* If we've tried to read what we allow filehandles to look like, and
10272 there's still text left, then it must be a glob() and not a getline.
10273 Use scan_str to pull out the stuff between the <> and treat it
10274 as nothing more than a string.
10275 */
10276
3280af22 10277 if (d - PL_tokenbuf != len) {
6154021b 10278 pl_yylval.ival = OP_GLOB;
4d68ffa0 10279 s = scan_str(start,!!PL_madskills,FALSE,FALSE, FALSE);
79072805 10280 if (!s)
cea2e8a9 10281 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
10282 return s;
10283 }
395c3793 10284 else {
9b3023bc 10285 bool readline_overriden = FALSE;
6136c704 10286 GV *gv_readline;
9b3023bc 10287 GV **gvp;
02aa26ce 10288 /* we're in a filehandle read situation */
3280af22 10289 d = PL_tokenbuf;
02aa26ce
NT
10290
10291 /* turn <> into <ARGV> */
79072805 10292 if (!len)
689badd5 10293 Copy("ARGV",d,5,char);
02aa26ce 10294
9b3023bc 10295 /* Check whether readline() is overriden */
fafc274c 10296 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 10297 if ((gv_readline
ba979b31 10298 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 10299 ||
017a3ce5 10300 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 10301 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 10302 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
10303 readline_overriden = TRUE;
10304
02aa26ce
NT
10305 /* if <$fh>, create the ops to turn the variable into a
10306 filehandle
10307 */
79072805 10308 if (*d == '$') {
02aa26ce
NT
10309 /* try to find it in the pad for this block, otherwise find
10310 add symbol table ops
10311 */
bc9b26ca 10312 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 10313 if (tmp != NOT_IN_PAD) {
00b1698f 10314 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
10315 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10316 HEK * const stashname = HvNAME_HEK(stash);
10317 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 10318 sv_catpvs(sym, "::");
f558d5af
JH
10319 sv_catpv(sym, d+1);
10320 d = SvPVX(sym);
10321 goto intro_sym;
10322 }
10323 else {
6136c704 10324 OP * const o = newOP(OP_PADSV, 0);
f558d5af 10325 o->op_targ = tmp;
9b3023bc
RGS
10326 PL_lex_op = readline_overriden
10327 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10328 op_append_elem(OP_LIST, o,
9b3023bc
RGS
10329 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10330 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 10331 }
a0d0e21e
LW
10332 }
10333 else {
f558d5af
JH
10334 GV *gv;
10335 ++d;
10336intro_sym:
10337 gv = gv_fetchpv(d,
10338 (PL_in_eval
10339 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 10340 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 10341 SVt_PV);
9b3023bc
RGS
10342 PL_lex_op = readline_overriden
10343 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10344 op_append_elem(OP_LIST,
9b3023bc
RGS
10345 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10346 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10347 : (OP*)newUNOP(OP_READLINE, 0,
10348 newUNOP(OP_RV2SV, 0,
10349 newGVOP(OP_GV, 0, gv)));
a0d0e21e 10350 }
7c6fadd6
RGS
10351 if (!readline_overriden)
10352 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
10353 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10354 pl_yylval.ival = OP_NULL;
79072805 10355 }
02aa26ce
NT
10356
10357 /* If it's none of the above, it must be a literal filehandle
10358 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 10359 else {
25db2ea6 10360 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
10361 PL_lex_op = readline_overriden
10362 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10363 op_append_elem(OP_LIST,
9b3023bc
RGS
10364 newGVOP(OP_GV, 0, gv),
10365 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10366 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 10367 pl_yylval.ival = OP_NULL;
79072805
LW
10368 }
10369 }
02aa26ce 10370
79072805
LW
10371 return s;
10372}
10373
02aa26ce
NT
10374
10375/* scan_str
10376 takes: start position in buffer
09bef843
SB
10377 keep_quoted preserve \ on the embedded delimiter(s)
10378 keep_delims preserve the delimiters around the string
d24ca0c5
DM
10379 re_reparse compiling a run-time /(?{})/:
10380 collapse // to /, and skip encoding src
02aa26ce
NT
10381 returns: position to continue reading from buffer
10382 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10383 updates the read buffer.
10384
10385 This subroutine pulls a string out of the input. It is called for:
10386 q single quotes q(literal text)
10387 ' single quotes 'literal text'
10388 qq double quotes qq(interpolate $here please)
10389 " double quotes "interpolate $here please"
10390 qx backticks qx(/bin/ls -l)
10391 ` backticks `/bin/ls -l`
10392 qw quote words @EXPORT_OK = qw( func() $spam )
10393 m// regexp match m/this/
10394 s/// regexp substitute s/this/that/
10395 tr/// string transliterate tr/this/that/
10396 y/// string transliterate y/this/that/
10397 ($*@) sub prototypes sub foo ($)
09bef843 10398 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
10399 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10400
10401 In most of these cases (all but <>, patterns and transliterate)
10402 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10403 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10404 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10405 calls scan_str().
4e553d73 10406
02aa26ce
NT
10407 It skips whitespace before the string starts, and treats the first
10408 character as the delimiter. If the delimiter is one of ([{< then
10409 the corresponding "close" character )]}> is used as the closing
10410 delimiter. It allows quoting of delimiters, and if the string has
10411 balanced delimiters ([{<>}]) it allows nesting.
10412
37fd879b
HS
10413 On success, the SV with the resulting string is put into lex_stuff or,
10414 if that is already non-NULL, into lex_repl. The second case occurs only
10415 when parsing the RHS of the special constructs s/// and tr/// (y///).
10416 For convenience, the terminating delimiter character is stuffed into
10417 SvIVX of the SV.
02aa26ce
NT
10418*/
10419
76e3520e 10420STATIC char *
4d68ffa0
KW
10421S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse,
10422 bool deprecate_escaped_meta /* Should we issue a deprecation warning
10423 for certain paired metacharacters that
10424 appear escaped within it */
10425 )
79072805 10426{
97aff369 10427 dVAR;
a8d9c7ae
KW
10428 SV *sv; /* scalar value: string */
10429 const char *tmps; /* temp string, used for delimiter matching */
eb578fdb
KW
10430 char *s = start; /* current position in the buffer */
10431 char term; /* terminating character */
10432 char *to; /* current position in the sv's data */
a8d9c7ae
KW
10433 I32 brackets = 1; /* bracket nesting level */
10434 bool has_utf8 = FALSE; /* is there any utf8 content? */
10435 I32 termcode; /* terminating char. code */
10436 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
10437 STRLEN termlen; /* length of terminating string */
10438 int last_off = 0; /* last position for nesting bracket */
4d68ffa0 10439 char *escaped_open = NULL;
5db06880
NC
10440#ifdef PERL_MAD
10441 int stuffstart;
10442 char *tstart;
10443#endif
02aa26ce 10444
7918f24d
NC
10445 PERL_ARGS_ASSERT_SCAN_STR;
10446
02aa26ce 10447 /* skip space before the delimiter */
29595ff2
NC
10448 if (isSPACE(*s)) {
10449 s = PEEKSPACE(s);
10450 }
02aa26ce 10451
5db06880 10452#ifdef PERL_MAD
cd81e915
NC
10453 if (PL_realtokenstart >= 0) {
10454 stuffstart = PL_realtokenstart;
10455 PL_realtokenstart = -1;
5db06880
NC
10456 }
10457 else
10458 stuffstart = start - SvPVX(PL_linestr);
10459#endif
02aa26ce 10460 /* mark where we are, in case we need to report errors */
79072805 10461 CLINE;
02aa26ce
NT
10462
10463 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 10464 term = *s;
220e2d4e
IH
10465 if (!UTF) {
10466 termcode = termstr[0] = term;
10467 termlen = 1;
10468 }
10469 else {
4b88fb76 10470 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
220e2d4e
IH
10471 Copy(s, termstr, termlen, U8);
10472 if (!UTF8_IS_INVARIANT(term))
10473 has_utf8 = TRUE;
10474 }
b1c7b182 10475
02aa26ce 10476 /* mark where we are */
57843af0 10477 PL_multi_start = CopLINE(PL_curcop);
3280af22 10478 PL_multi_open = term;
02aa26ce
NT
10479
10480 /* find corresponding closing delimiter */
93a17b20 10481 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
10482 termcode = termstr[0] = term = tmps[5];
10483
3280af22 10484 PL_multi_close = term;
79072805 10485
4d68ffa0
KW
10486 /* A warning is raised if the input parameter requires it for escaped (by a
10487 * backslash) paired metacharacters {} [] and () when the delimiters are
10488 * those same characters, and the backslash is ineffective. This doesn't
10489 * happen for <>, as they aren't metas. */
10490 if (deprecate_escaped_meta
10491 && (PL_multi_open == PL_multi_close
10492 || ! ckWARN_d(WARN_DEPRECATED)
10493 || PL_multi_open == '<'))
10494 {
10495 deprecate_escaped_meta = FALSE;
10496 }
10497
561b68a9
SH
10498 /* create a new SV to hold the contents. 79 is the SV's initial length.
10499 What a random number. */
7d0a29fe
NC
10500 sv = newSV_type(SVt_PVIV);
10501 SvGROW(sv, 80);
45977657 10502 SvIV_set(sv, termcode);
a0d0e21e 10503 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
10504
10505 /* move past delimiter and try to read a complete string */
09bef843 10506 if (keep_delims)
220e2d4e
IH
10507 sv_catpvn(sv, s, termlen);
10508 s += termlen;
5db06880
NC
10509#ifdef PERL_MAD
10510 tstart = SvPVX(PL_linestr) + stuffstart;
1cac5c33 10511 if (PL_madskills && !PL_thisopen && !keep_delims) {
cd81e915 10512 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
10513 stuffstart = s - SvPVX(PL_linestr);
10514 }
10515#endif
93a17b20 10516 for (;;) {
d24ca0c5 10517 if (PL_encoding && !UTF && !re_reparse) {
220e2d4e
IH
10518 bool cont = TRUE;
10519
10520 while (cont) {
95a20fc0 10521 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 10522 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 10523 &offset, (char*)termstr, termlen);
2cc6fe62
FC
10524 const char *ns;
10525 char *svlast;
10526
10527 if (SvIsCOW(PL_linestr)) {
10528 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos;
10529 STRLEN oldoldbufptr_pos, linestart_pos, last_uni_pos;
10530 STRLEN last_lop_pos, re_eval_start_pos, s_pos;
10531 char *buf = SvPVX(PL_linestr);
10532 bufend_pos = PL_parser->bufend - buf;
10533 bufptr_pos = PL_parser->bufptr - buf;
10534 oldbufptr_pos = PL_parser->oldbufptr - buf;
10535 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
10536 linestart_pos = PL_parser->linestart - buf;
10537 last_uni_pos = PL_parser->last_uni
10538 ? PL_parser->last_uni - buf
10539 : 0;
10540 last_lop_pos = PL_parser->last_lop
10541 ? PL_parser->last_lop - buf
10542 : 0;
10543 re_eval_start_pos =
10544 PL_parser->lex_shared->re_eval_start ?
10545 PL_parser->lex_shared->re_eval_start - buf : 0;
10546 s_pos = s - buf;
10547
10548 sv_force_normal(PL_linestr);
10549
10550 buf = SvPVX(PL_linestr);
10551 PL_parser->bufend = buf + bufend_pos;
10552 PL_parser->bufptr = buf + bufptr_pos;
10553 PL_parser->oldbufptr = buf + oldbufptr_pos;
10554 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
10555 PL_parser->linestart = buf + linestart_pos;
10556 if (PL_parser->last_uni)
10557 PL_parser->last_uni = buf + last_uni_pos;
10558 if (PL_parser->last_lop)
10559 PL_parser->last_lop = buf + last_lop_pos;
10560 if (PL_parser->lex_shared->re_eval_start)
10561 PL_parser->lex_shared->re_eval_start =
10562 buf + re_eval_start_pos;
10563 s = buf + s_pos;
10564 }
10565 ns = SvPVX_const(PL_linestr) + offset;
10566 svlast = SvEND(sv) - 1;
220e2d4e
IH
10567
10568 for (; s < ns; s++) {
60d63348 10569 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10570 COPLINE_INC_WITH_HERELINES;
220e2d4e
IH
10571 }
10572 if (!found)
10573 goto read_more_line;
10574 else {
10575 /* handle quoted delimiters */
52327caf 10576 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 10577 const char *t;
95a20fc0 10578 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
10579 t--;
10580 if ((svlast-1 - t) % 2) {
10581 if (!keep_quoted) {
10582 *(svlast-1) = term;
10583 *svlast = '\0';
10584 SvCUR_set(sv, SvCUR(sv) - 1);
10585 }
10586 continue;
10587 }
10588 }
10589 if (PL_multi_open == PL_multi_close) {
10590 cont = FALSE;
10591 }
10592 else {
f54cb97a
AL
10593 const char *t;
10594 char *w;
0331ef07 10595 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
10596 /* At here, all closes are "was quoted" one,
10597 so we don't check PL_multi_close. */
10598 if (*t == '\\') {
10599 if (!keep_quoted && *(t+1) == PL_multi_open)
10600 t++;
10601 else
10602 *w++ = *t++;
10603 }
10604 else if (*t == PL_multi_open)
10605 brackets++;
10606
10607 *w = *t;
10608 }
10609 if (w < t) {
10610 *w++ = term;
10611 *w = '\0';
95a20fc0 10612 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 10613 }
0331ef07 10614 last_off = w - SvPVX(sv);
220e2d4e
IH
10615 if (--brackets <= 0)
10616 cont = FALSE;
10617 }
10618 }
10619 }
10620 if (!keep_delims) {
10621 SvCUR_set(sv, SvCUR(sv) - 1);
10622 *SvEND(sv) = '\0';
10623 }
10624 break;
10625 }
10626
02aa26ce 10627 /* extend sv if need be */
3280af22 10628 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 10629 /* set 'to' to the next character in the sv's string */
463ee0b2 10630 to = SvPVX(sv)+SvCUR(sv);
09bef843 10631
02aa26ce 10632 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
10633 if (PL_multi_open == PL_multi_close) {
10634 for (; s < PL_bufend; s++,to++) {
02aa26ce 10635 /* embedded newlines increment the current line number */
60d63348 10636 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10637 COPLINE_INC_WITH_HERELINES;
02aa26ce 10638 /* handle quoted delimiters */
3280af22 10639 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
d24ca0c5
DM
10640 if (!keep_quoted
10641 && (s[1] == term
10642 || (re_reparse && s[1] == '\\'))
10643 )
a0d0e21e 10644 s++;
d24ca0c5 10645 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10646 else
10647 *to++ = *s++;
10648 }
02aa26ce
NT
10649 /* terminate when run out of buffer (the for() condition), or
10650 have found the terminator */
220e2d4e
IH
10651 else if (*s == term) {
10652 if (termlen == 1)
10653 break;
f3b9ce0f 10654 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10655 break;
10656 }
63cd0674 10657 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10658 has_utf8 = TRUE;
93a17b20
LW
10659 *to = *s;
10660 }
10661 }
02aa26ce
NT
10662
10663 /* if the terminator isn't the same as the start character (e.g.,
10664 matched brackets), we have to allow more in the quoting, and
10665 be prepared for nested brackets.
10666 */
93a17b20 10667 else {
02aa26ce 10668 /* read until we run out of string, or we find the terminator */
3280af22 10669 for (; s < PL_bufend; s++,to++) {
02aa26ce 10670 /* embedded newlines increment the line count */
60d63348 10671 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10672 COPLINE_INC_WITH_HERELINES;
02aa26ce 10673 /* backslashes can escape the open or closing characters */
3280af22 10674 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10675 if (!keep_quoted &&
10676 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
4d68ffa0 10677 {
a0d0e21e 10678 s++;
4d68ffa0
KW
10679
10680 /* Here, 'deprecate_escaped_meta' is true iff the
10681 * delimiters are paired metacharacters, and 's' points
10682 * to an occurrence of one of them within the string,
10683 * which was preceded by a backslash. If this is a
10684 * context where the delimiter is also a metacharacter,
10685 * the backslash is useless, and deprecated. () and []
10686 * are meta in any context. {} are meta only when
70502ce0
KW
10687 * appearing in a quantifier or in things like '\p{'
10688 * (but '\\p{' isn't meta). They also aren't meta
10689 * unless there is a matching closed, escaped char
10690 * later on within the string. If 's' points to an
10691 * open, set a flag; if to a close, test that flag, and
10692 * raise a warning if it was set */
4d68ffa0
KW
10693
10694 if (deprecate_escaped_meta) {
10695 if (*s == PL_multi_open) {
10696 if (*s != '{') {
10697 escaped_open = s;
10698 }
70502ce0
KW
10699 /* Look for a closing '\}' */
10700 else if (regcurly(s, TRUE)) {
4d68ffa0
KW
10701 escaped_open = s;
10702 }
70502ce0
KW
10703 /* Look for e.g. '\x{' */
10704 else if (s - start > 2
10705 && _generic_isCC(*(s-2),
10706 _CC_BACKSLASH_FOO_LBRACE_IS_META))
10707 { /* Exclude '\\x', '\\\\x', etc. */
10708 char *lookbehind = s - 4;
10709 bool is_meta = TRUE;
10710 while (lookbehind >= start
10711 && *lookbehind == '\\')
10712 {
10713 is_meta = ! is_meta;
10714 lookbehind--;
10715 }
10716 if (is_meta) {
10717 escaped_open = s;
10718 }
10719 }
4d68ffa0
KW
10720 }
10721 else if (escaped_open) {
10722 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
10723 "Useless use of '\\'; doesn't escape metacharacter '%c'", PL_multi_open);
10724 escaped_open = NULL;
10725 }
10726 }
10727 }
a0d0e21e
LW
10728 else
10729 *to++ = *s++;
10730 }
02aa26ce 10731 /* allow nested opens and closes */
3280af22 10732 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10733 break;
3280af22 10734 else if (*s == PL_multi_open)
93a17b20 10735 brackets++;
63cd0674 10736 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10737 has_utf8 = TRUE;
93a17b20
LW
10738 *to = *s;
10739 }
10740 }
02aa26ce 10741 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10742 *to = '\0';
95a20fc0 10743 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10744
02aa26ce
NT
10745 /*
10746 * this next chunk reads more into the buffer if we're not done yet
10747 */
10748
b1c7b182
GS
10749 if (s < PL_bufend)
10750 break; /* handle case where we are done yet :-) */
79072805 10751
6a27c188 10752#ifndef PERL_STRICT_CR
95a20fc0 10753 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10754 if ((to[-2] == '\r' && to[-1] == '\n') ||
10755 (to[-2] == '\n' && to[-1] == '\r'))
10756 {
f63a84b2
LW
10757 to[-2] = '\n';
10758 to--;
95a20fc0 10759 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10760 }
10761 else if (to[-1] == '\r')
10762 to[-1] = '\n';
10763 }
95a20fc0 10764 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10765 to[-1] = '\n';
10766#endif
10767
220e2d4e 10768 read_more_line:
02aa26ce
NT
10769 /* if we're out of file, or a read fails, bail and reset the current
10770 line marker so we can report where the unterminated string began
10771 */
5db06880
NC
10772#ifdef PERL_MAD
10773 if (PL_madskills) {
c35e046a 10774 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10775 if (PL_thisstuff)
10776 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10777 else
cd81e915 10778 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10779 }
10780#endif
83944c01 10781 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
10782 PL_bufptr = PL_bufend;
10783 if (!lex_next_chunk(0)) {
c07a80fd 10784 sv_free(sv);
eb160463 10785 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10786 return NULL;
79072805 10787 }
f0e67a1d 10788 s = PL_bufptr;
5db06880
NC
10789#ifdef PERL_MAD
10790 stuffstart = 0;
10791#endif
378cc40b 10792 }
4e553d73 10793
02aa26ce
NT
10794 /* at this point, we have successfully read the delimited string */
10795
d24ca0c5 10796 if (!PL_encoding || UTF || re_reparse) {
5db06880
NC
10797#ifdef PERL_MAD
10798 if (PL_madskills) {
c35e046a 10799 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10800 const int len = s - tstart;
cd81e915 10801 if (PL_thisstuff)
c35e046a 10802 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10803 else
c35e046a 10804 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10805 if (!PL_thisclose && !keep_delims)
10806 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10807 }
10808#endif
10809
220e2d4e
IH
10810 if (keep_delims)
10811 sv_catpvn(sv, s, termlen);
10812 s += termlen;
10813 }
5db06880
NC
10814#ifdef PERL_MAD
10815 else {
10816 if (PL_madskills) {
c35e046a
AL
10817 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10818 const int len = s - tstart - termlen;
cd81e915 10819 if (PL_thisstuff)
c35e046a 10820 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10821 else
c35e046a 10822 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10823 if (!PL_thisclose && !keep_delims)
10824 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10825 }
10826 }
10827#endif
d24ca0c5 10828 if (has_utf8 || (PL_encoding && !re_reparse))
b1c7b182 10829 SvUTF8_on(sv);
d0063567 10830
57843af0 10831 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10832
10833 /* if we allocated too much space, give some back */
93a17b20
LW
10834 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10835 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10836 SvPV_renew(sv, SvLEN(sv));
79072805 10837 }
02aa26ce
NT
10838
10839 /* decide whether this is the first or second quoted string we've read
10840 for this op
10841 */
4e553d73 10842
3280af22 10843 if (PL_lex_stuff)
7cc34111 10844 PL_sublex_info.repl = sv;
79072805 10845 else
3280af22 10846 PL_lex_stuff = sv;
378cc40b
LW
10847 return s;
10848}
10849
02aa26ce
NT
10850/*
10851 scan_num
10852 takes: pointer to position in buffer
10853 returns: pointer to new position in buffer
6154021b 10854 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10855
10856 Read a number in any of the formats that Perl accepts:
10857
7fd134d9
JH
10858 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10859 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10860 0b[01](_?[01])*
10861 0[0-7](_?[0-7])*
10862 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10863
3280af22 10864 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10865 thing it reads.
10866
10867 If it reads a number without a decimal point or an exponent, it will
10868 try converting the number to an integer and see if it can do so
10869 without loss of precision.
10870*/
4e553d73 10871
378cc40b 10872char *
bfed75c6 10873Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10874{
97aff369 10875 dVAR;
eb578fdb
KW
10876 const char *s = start; /* current position in buffer */
10877 char *d; /* destination in temp buffer */
10878 char *e; /* end of temp buffer */
86554af2 10879 NV nv; /* number read, as a double */
a0714e2c 10880 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10881 bool floatit; /* boolean: int or float? */
cbbf8932 10882 const char *lastub = NULL; /* position of last underbar */
a1894d81 10883 static const char* const number_too_long = "Number too long";
378cc40b 10884
7918f24d
NC
10885 PERL_ARGS_ASSERT_SCAN_NUM;
10886
02aa26ce
NT
10887 /* We use the first character to decide what type of number this is */
10888
378cc40b 10889 switch (*s) {
79072805 10890 default:
5637ef5b 10891 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10892
02aa26ce 10893 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10894 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10895 case '0':
10896 {
02aa26ce
NT
10897 /* variables:
10898 u holds the "number so far"
4f19785b
WSI
10899 shift the power of 2 of the base
10900 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10901 overflowed was the number more than we can hold?
10902
10903 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10904 we in octal/hex/binary?" indicator to disallow hex characters
10905 when in octal mode.
02aa26ce 10906 */
9e24b6e2
JH
10907 NV n = 0.0;
10908 UV u = 0;
79072805 10909 I32 shift;
9e24b6e2 10910 bool overflowed = FALSE;
61f33854 10911 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10912 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10913 static const char* const bases[5] =
10914 { "", "binary", "", "octal", "hexadecimal" };
10915 static const char* const Bases[5] =
10916 { "", "Binary", "", "Octal", "Hexadecimal" };
10917 static const char* const maxima[5] =
10918 { "",
10919 "0b11111111111111111111111111111111",
10920 "",
10921 "037777777777",
10922 "0xffffffff" };
bfed75c6 10923 const char *base, *Base, *max;
378cc40b 10924
02aa26ce 10925 /* check for hex */
a674e8db 10926 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10927 shift = 4;
10928 s += 2;
61f33854 10929 just_zero = FALSE;
a674e8db 10930 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10931 shift = 1;
10932 s += 2;
61f33854 10933 just_zero = FALSE;
378cc40b 10934 }
02aa26ce 10935 /* check for a decimal in disguise */
b78218b7 10936 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10937 goto decimal;
02aa26ce 10938 /* so it must be octal */
928753ea 10939 else {
378cc40b 10940 shift = 3;
928753ea
JH
10941 s++;
10942 }
10943
10944 if (*s == '_') {
a2a5de95 10945 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10946 "Misplaced _ in number");
10947 lastub = s++;
10948 }
9e24b6e2
JH
10949
10950 base = bases[shift];
10951 Base = Bases[shift];
10952 max = maxima[shift];
02aa26ce 10953
4f19785b 10954 /* read the rest of the number */
378cc40b 10955 for (;;) {
9e24b6e2 10956 /* x is used in the overflow test,
893fe2c2 10957 b is the digit we're adding on. */
9e24b6e2 10958 UV x, b;
55497cff 10959
378cc40b 10960 switch (*s) {
02aa26ce
NT
10961
10962 /* if we don't mention it, we're done */
378cc40b
LW
10963 default:
10964 goto out;
02aa26ce 10965
928753ea 10966 /* _ are ignored -- but warned about if consecutive */
de3bb511 10967 case '_':
a2a5de95
NC
10968 if (lastub && s == lastub + 1)
10969 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10970 "Misplaced _ in number");
928753ea 10971 lastub = s++;
de3bb511 10972 break;
02aa26ce
NT
10973
10974 /* 8 and 9 are not octal */
378cc40b 10975 case '8': case '9':
4f19785b 10976 if (shift == 3)
cea2e8a9 10977 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10978 /* FALL THROUGH */
02aa26ce
NT
10979
10980 /* octal digits */
4f19785b 10981 case '2': case '3': case '4':
378cc40b 10982 case '5': case '6': case '7':
4f19785b 10983 if (shift == 1)
cea2e8a9 10984 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10985 /* FALL THROUGH */
10986
10987 case '0': case '1':
02aa26ce 10988 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10989 goto digit;
02aa26ce
NT
10990
10991 /* hex digits */
378cc40b
LW
10992 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10993 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10994 /* make sure they said 0x */
378cc40b
LW
10995 if (shift != 4)
10996 goto out;
55497cff 10997 b = (*s++ & 7) + 9;
02aa26ce
NT
10998
10999 /* Prepare to put the digit we have onto the end
11000 of the number so far. We check for overflows.
11001 */
11002
55497cff 11003 digit:
61f33854 11004 just_zero = FALSE;
9e24b6e2
JH
11005 if (!overflowed) {
11006 x = u << shift; /* make room for the digit */
11007
11008 if ((x >> shift) != u
11009 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
11010 overflowed = TRUE;
11011 n = (NV) u;
9b387841
NC
11012 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11013 "Integer overflow in %s number",
11014 base);
9e24b6e2
JH
11015 } else
11016 u = x | b; /* add the digit to the end */
11017 }
11018 if (overflowed) {
11019 n *= nvshift[shift];
11020 /* If an NV has not enough bits in its
11021 * mantissa to represent an UV this summing of
11022 * small low-order numbers is a waste of time
11023 * (because the NV cannot preserve the
11024 * low-order bits anyway): we could just
11025 * remember when did we overflow and in the
11026 * end just multiply n by the right
11027 * amount. */
11028 n += (NV) b;
55497cff 11029 }
378cc40b
LW
11030 break;
11031 }
11032 }
02aa26ce
NT
11033
11034 /* if we get here, we had success: make a scalar value from
11035 the number.
11036 */
378cc40b 11037 out:
928753ea
JH
11038
11039 /* final misplaced underbar check */
11040 if (s[-1] == '_') {
a2a5de95 11041 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
11042 }
11043
9e24b6e2 11044 if (overflowed) {
a2a5de95
NC
11045 if (n > 4294967295.0)
11046 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11047 "%s number > %s non-portable",
11048 Base, max);
b081dd7e 11049 sv = newSVnv(n);
9e24b6e2
JH
11050 }
11051 else {
15041a67 11052#if UVSIZE > 4
a2a5de95
NC
11053 if (u > 0xffffffff)
11054 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
11055 "%s number > %s non-portable",
11056 Base, max);
2cc4c2dc 11057#endif
b081dd7e 11058 sv = newSVuv(u);
9e24b6e2 11059 }
61f33854 11060 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 11061 sv = new_constant(start, s - start, "integer",
eb0d8d16 11062 sv, NULL, NULL, 0);
61f33854 11063 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 11064 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
11065 }
11066 break;
02aa26ce
NT
11067
11068 /*
11069 handle decimal numbers.
11070 we're also sent here when we read a 0 as the first digit
11071 */
378cc40b
LW
11072 case '1': case '2': case '3': case '4': case '5':
11073 case '6': case '7': case '8': case '9': case '.':
11074 decimal:
3280af22
NIS
11075 d = PL_tokenbuf;
11076 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 11077 floatit = FALSE;
02aa26ce
NT
11078
11079 /* read next group of digits and _ and copy into d */
de3bb511 11080 while (isDIGIT(*s) || *s == '_') {
4e553d73 11081 /* skip underscores, checking for misplaced ones
02aa26ce
NT
11082 if -w is on
11083 */
93a17b20 11084 if (*s == '_') {
a2a5de95
NC
11085 if (lastub && s == lastub + 1)
11086 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11087 "Misplaced _ in number");
928753ea 11088 lastub = s++;
93a17b20 11089 }
fc36a67e 11090 else {
02aa26ce 11091 /* check for end of fixed-length buffer */
fc36a67e 11092 if (d >= e)
e5cc0c0f 11093 Perl_croak(aTHX_ "%s", number_too_long);
02aa26ce 11094 /* if we're ok, copy the character */
378cc40b 11095 *d++ = *s++;
fc36a67e 11096 }
378cc40b 11097 }
02aa26ce
NT
11098
11099 /* final misplaced underbar check */
928753ea 11100 if (lastub && s == lastub + 1) {
a2a5de95 11101 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 11102 }
02aa26ce
NT
11103
11104 /* read a decimal portion if there is one. avoid
11105 3..5 being interpreted as the number 3. followed
11106 by .5
11107 */
2f3197b3 11108 if (*s == '.' && s[1] != '.') {
79072805 11109 floatit = TRUE;
378cc40b 11110 *d++ = *s++;
02aa26ce 11111
928753ea 11112 if (*s == '_') {
a2a5de95
NC
11113 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11114 "Misplaced _ in number");
928753ea
JH
11115 lastub = s;
11116 }
11117
11118 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 11119 */
fc36a67e 11120 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 11121 /* fixed length buffer check */
fc36a67e 11122 if (d >= e)
e5cc0c0f 11123 Perl_croak(aTHX_ "%s", number_too_long);
928753ea 11124 if (*s == '_') {
a2a5de95
NC
11125 if (lastub && s == lastub + 1)
11126 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11127 "Misplaced _ in number");
928753ea
JH
11128 lastub = s;
11129 }
11130 else
fc36a67e 11131 *d++ = *s;
378cc40b 11132 }
928753ea
JH
11133 /* fractional part ending in underbar? */
11134 if (s[-1] == '_') {
a2a5de95
NC
11135 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11136 "Misplaced _ in number");
928753ea 11137 }
dd629d5b
GS
11138 if (*s == '.' && isDIGIT(s[1])) {
11139 /* oops, it's really a v-string, but without the "v" */
f4758303 11140 s = start;
dd629d5b
GS
11141 goto vstring;
11142 }
378cc40b 11143 }
02aa26ce
NT
11144
11145 /* read exponent part, if present */
3792a11b 11146 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
11147 floatit = TRUE;
11148 s++;
02aa26ce
NT
11149
11150 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 11151 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 11152
7fd134d9
JH
11153 /* stray preinitial _ */
11154 if (*s == '_') {
a2a5de95
NC
11155 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11156 "Misplaced _ in number");
7fd134d9
JH
11157 lastub = s++;
11158 }
11159
02aa26ce 11160 /* allow positive or negative exponent */
378cc40b
LW
11161 if (*s == '+' || *s == '-')
11162 *d++ = *s++;
02aa26ce 11163
7fd134d9
JH
11164 /* stray initial _ */
11165 if (*s == '_') {
a2a5de95
NC
11166 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11167 "Misplaced _ in number");
7fd134d9
JH
11168 lastub = s++;
11169 }
11170
7fd134d9
JH
11171 /* read digits of exponent */
11172 while (isDIGIT(*s) || *s == '_') {
11173 if (isDIGIT(*s)) {
11174 if (d >= e)
e5cc0c0f 11175 Perl_croak(aTHX_ "%s", number_too_long);
b3b48e3e 11176 *d++ = *s++;
7fd134d9
JH
11177 }
11178 else {
041457d9 11179 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
11180 (!isDIGIT(s[1]) && s[1] != '_')))
11181 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
11182 "Misplaced _ in number");
b3b48e3e 11183 lastub = s++;
7fd134d9 11184 }
7fd134d9 11185 }
378cc40b 11186 }
02aa26ce 11187
02aa26ce 11188
0b7fceb9 11189 /*
58bb9ec3
NC
11190 We try to do an integer conversion first if no characters
11191 indicating "float" have been found.
0b7fceb9
MU
11192 */
11193
11194 if (!floatit) {
58bb9ec3 11195 UV uv;
6136c704 11196 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
11197
11198 if (flags == IS_NUMBER_IN_UV) {
11199 if (uv <= IV_MAX)
b081dd7e 11200 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 11201 else
b081dd7e 11202 sv = newSVuv(uv);
58bb9ec3
NC
11203 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
11204 if (uv <= (UV) IV_MIN)
b081dd7e 11205 sv = newSViv(-(IV)uv);
58bb9ec3
NC
11206 else
11207 floatit = TRUE;
11208 } else
11209 floatit = TRUE;
11210 }
0b7fceb9 11211 if (floatit) {
58bb9ec3
NC
11212 /* terminate the string */
11213 *d = '\0';
86554af2 11214 nv = Atof(PL_tokenbuf);
b081dd7e 11215 sv = newSVnv(nv);
86554af2 11216 }
86554af2 11217
eb0d8d16
NC
11218 if ( floatit
11219 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
11220 const char *const key = floatit ? "float" : "integer";
11221 const STRLEN keylen = floatit ? 5 : 7;
11222 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
11223 key, keylen, sv, NULL, NULL, 0);
11224 }
378cc40b 11225 break;
0b7fceb9 11226
e312add1 11227 /* if it starts with a v, it could be a v-string */
a7cb1f99 11228 case 'v':
dd629d5b 11229vstring:
561b68a9 11230 sv = newSV(5); /* preallocate storage space */
ecabb004
FC
11231 ENTER_with_name("scan_vstring");
11232 SAVEFREESV(sv);
65b06e02 11233 s = scan_vstring(s, PL_bufend, sv);
ecabb004
FC
11234 SvREFCNT_inc_simple_void_NN(sv);
11235 LEAVE_with_name("scan_vstring");
a7cb1f99 11236 break;
79072805 11237 }
a687059c 11238
02aa26ce
NT
11239 /* make the op for the constant and return */
11240
a86a20aa 11241 if (sv)
b73d6f50 11242 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 11243 else
5f66b61c 11244 lvalp->opval = NULL;
a687059c 11245
73d840c0 11246 return (char *)s;
378cc40b
LW
11247}
11248
76e3520e 11249STATIC char *
5aaab254 11250S_scan_formline(pTHX_ char *s)
378cc40b 11251{
97aff369 11252 dVAR;
eb578fdb
KW
11253 char *eol;
11254 char *t;
6136c704 11255 SV * const stuff = newSVpvs("");
79072805 11256 bool needargs = FALSE;
c5ee2135 11257 bool eofmt = FALSE;
5db06880
NC
11258#ifdef PERL_MAD
11259 char *tokenstart = s;
4f61fd4b
JC
11260 SV* savewhite = NULL;
11261
5db06880 11262 if (PL_madskills) {
cd81e915
NC
11263 savewhite = PL_thiswhite;
11264 PL_thiswhite = 0;
5db06880
NC
11265 }
11266#endif
378cc40b 11267
7918f24d
NC
11268 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11269
79072805 11270 while (!needargs) {
a1b95068 11271 if (*s == '.') {
c35e046a 11272 t = s+1;
51882d45 11273#ifdef PERL_STRICT_CR
c35e046a
AL
11274 while (SPACE_OR_TAB(*t))
11275 t++;
51882d45 11276#else
c35e046a
AL
11277 while (SPACE_OR_TAB(*t) || *t == '\r')
11278 t++;
51882d45 11279#endif
c5ee2135
WL
11280 if (*t == '\n' || t == PL_bufend) {
11281 eofmt = TRUE;
79072805 11282 break;
c5ee2135 11283 }
79072805 11284 }
583c9d5c
FC
11285 eol = (char *) memchr(s,'\n',PL_bufend-s);
11286 if (!eol++)
3280af22 11287 eol = PL_bufend;
79072805 11288 if (*s != '#') {
a0d0e21e
LW
11289 for (t = s; t < eol; t++) {
11290 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11291 needargs = FALSE;
11292 goto enough; /* ~~ must be first line in formline */
378cc40b 11293 }
a0d0e21e
LW
11294 if (*t == '@' || *t == '^')
11295 needargs = TRUE;
378cc40b 11296 }
7121b347
MG
11297 if (eol > s) {
11298 sv_catpvn(stuff, s, eol-s);
2dc4c65b 11299#ifndef PERL_STRICT_CR
7121b347
MG
11300 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11301 char *end = SvPVX(stuff) + SvCUR(stuff);
11302 end[-2] = '\n';
11303 end[-1] = '\0';
b162af07 11304 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 11305 }
2dc4c65b 11306#endif
7121b347
MG
11307 }
11308 else
11309 break;
79072805 11310 }
95a20fc0 11311 s = (char*)eol;
583c9d5c
FC
11312 if ((PL_rsfp || PL_parser->filtered)
11313 && PL_parser->form_lex_state == LEX_NORMAL) {
f0e67a1d 11314 bool got_some;
5db06880
NC
11315#ifdef PERL_MAD
11316 if (PL_madskills) {
cd81e915
NC
11317 if (PL_thistoken)
11318 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 11319 else
cd81e915 11320 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
11321 }
11322#endif
f0e67a1d 11323 PL_bufptr = PL_bufend;
83944c01 11324 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
11325 got_some = lex_next_chunk(0);
11326 CopLINE_dec(PL_curcop);
11327 s = PL_bufptr;
5db06880 11328#ifdef PERL_MAD
f0e67a1d 11329 tokenstart = PL_bufptr;
5db06880 11330#endif
f0e67a1d 11331 if (!got_some)
378cc40b 11332 break;
378cc40b 11333 }
463ee0b2 11334 incline(s);
79072805 11335 }
a0d0e21e 11336 enough:
5c9ae74d
FC
11337 if (!SvCUR(stuff) || needargs)
11338 PL_lex_state = PL_parser->form_lex_state;
a0d0e21e 11339 if (SvCUR(stuff)) {
705fe0e5 11340 PL_expect = XSTATE;
79072805 11341 if (needargs) {
cd81e915 11342 start_force(PL_curforce);
9ded7720 11343 NEXTVAL_NEXTTOKE.ival = 0;
705fe0e5 11344 force_next(FORMLBRACK);
79072805 11345 }
1bd51a4c 11346 if (!IN_BYTES) {
95a20fc0 11347 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
11348 SvUTF8_on(stuff);
11349 else if (PL_encoding)
11350 sv_recode_to_utf8(stuff, PL_encoding);
11351 }
cd81e915 11352 start_force(PL_curforce);
9ded7720 11353 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 11354 force_next(THING);
378cc40b 11355 }
79072805 11356 else {
8990e307 11357 SvREFCNT_dec(stuff);
c5ee2135
WL
11358 if (eofmt)
11359 PL_lex_formbrack = 0;
79072805 11360 }
5db06880
NC
11361#ifdef PERL_MAD
11362 if (PL_madskills) {
cd81e915
NC
11363 if (PL_thistoken)
11364 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 11365 else
cd81e915
NC
11366 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11367 PL_thiswhite = savewhite;
5db06880
NC
11368 }
11369#endif
79072805 11370 return s;
378cc40b 11371}
a687059c 11372
ba6d6ac9 11373I32
864dbfa3 11374Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 11375{
97aff369 11376 dVAR;
a3b680e6 11377 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 11378 CV* const outsidecv = PL_compcv;
8990e307 11379
7766f137 11380 SAVEI32(PL_subline);
3280af22 11381 save_item(PL_subname);
3280af22 11382 SAVESPTR(PL_compcv);
3280af22 11383
ea726b52 11384 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
11385 CvFLAGS(PL_compcv) |= flags;
11386
57843af0 11387 PL_subline = CopLINE(PL_curcop);
dd2155a4 11388 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 11389 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 11390 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
db4cf31d 11391 if (outsidecv && CvPADLIST(outsidecv))
8771da69
FC
11392 CvPADLIST(PL_compcv)->xpadl_outid =
11393 PadlistNAMES(CvPADLIST(outsidecv));
748a9306 11394
8990e307
LW
11395 return oldsavestack_ix;
11396}
11397
084592ab
CN
11398#ifdef __SC__
11399#pragma segment Perl_yylex
11400#endif
af41e527 11401static int
19c62481 11402S_yywarn(pTHX_ const char *const s, U32 flags)
8990e307 11403{
97aff369 11404 dVAR;
7918f24d
NC
11405
11406 PERL_ARGS_ASSERT_YYWARN;
11407
faef0170 11408 PL_in_eval |= EVAL_WARNONLY;
19c62481 11409 yyerror_pv(s, flags);
faef0170 11410 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 11411 return 0;
8990e307
LW
11412}
11413
11414int
15f169a1 11415Perl_yyerror(pTHX_ const char *const s)
463ee0b2 11416{
19c62481
BF
11417 PERL_ARGS_ASSERT_YYERROR;
11418 return yyerror_pvn(s, strlen(s), 0);
11419}
11420
11421int
11422Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11423{
11424 PERL_ARGS_ASSERT_YYERROR_PV;
11425 return yyerror_pvn(s, strlen(s), flags);
11426}
11427
11428int
19c62481
BF
11429Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11430{
97aff369 11431 dVAR;
bfed75c6 11432 const char *context = NULL;
68dc0745 11433 int contlen = -1;
46fc3d4c 11434 SV *msg;
19c62481 11435 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
5912531f 11436 int yychar = PL_parser->yychar;
463ee0b2 11437
19c62481 11438 PERL_ARGS_ASSERT_YYERROR_PVN;
7918f24d 11439
3280af22 11440 if (!yychar || (yychar == ';' && !PL_rsfp))
19c62481 11441 sv_catpvs(where_sv, "at EOF");
8bcfe651
TM
11442 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11443 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11444 PL_oldbufptr != PL_bufptr) {
f355267c
JH
11445 /*
11446 Only for NetWare:
11447 The code below is removed for NetWare because it abends/crashes on NetWare
11448 when the script has error such as not having the closing quotes like:
11449 if ($var eq "value)
11450 Checking of white spaces is anyway done in NetWare code.
11451 */
11452#ifndef NETWARE
3280af22
NIS
11453 while (isSPACE(*PL_oldoldbufptr))
11454 PL_oldoldbufptr++;
f355267c 11455#endif
3280af22
NIS
11456 context = PL_oldoldbufptr;
11457 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 11458 }
8bcfe651
TM
11459 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11460 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
11461 /*
11462 Only for NetWare:
11463 The code below is removed for NetWare because it abends/crashes on NetWare
11464 when the script has error such as not having the closing quotes like:
11465 if ($var eq "value)
11466 Checking of white spaces is anyway done in NetWare code.
11467 */
11468#ifndef NETWARE
3280af22
NIS
11469 while (isSPACE(*PL_oldbufptr))
11470 PL_oldbufptr++;
f355267c 11471#endif
3280af22
NIS
11472 context = PL_oldbufptr;
11473 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
11474 }
11475 else if (yychar > 255)
19c62481 11476 sv_catpvs(where_sv, "next token ???");
12fbd33b 11477 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
11478 if (PL_lex_state == LEX_NORMAL ||
11479 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
19c62481 11480 sv_catpvs(where_sv, "at end of line");
3280af22 11481 else if (PL_lex_inpat)
19c62481 11482 sv_catpvs(where_sv, "within pattern");
463ee0b2 11483 else
19c62481 11484 sv_catpvs(where_sv, "within string");
463ee0b2 11485 }
46fc3d4c 11486 else {
19c62481 11487 sv_catpvs(where_sv, "next char ");
46fc3d4c 11488 if (yychar < 32)
cea2e8a9 11489 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 11490 else if (isPRINT_LC(yychar)) {
88c9ea1e 11491 const char string = yychar;
5e7aa789
NC
11492 sv_catpvn(where_sv, &string, 1);
11493 }
463ee0b2 11494 else
cea2e8a9 11495 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
463ee0b2 11496 }
b604e366 11497 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
ed094faf 11498 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 11499 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 11500 if (context)
b17a0679
FC
11501 Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
11502 UTF8fARG(UTF, contlen, context));
463ee0b2 11503 else
19c62481 11504 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
57843af0 11505 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 11506 Perl_sv_catpvf(aTHX_ msg,
57def98f 11507 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 11508 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 11509 PL_multi_end = 0;
a0d0e21e 11510 }
500960a6 11511 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 11512 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 11513 }
463ee0b2 11514 else
5a844595 11515 qerror(msg);
c7d6bfb2 11516 if (PL_error_count >= 10) {
eed484f9
DD
11517 SV * errsv;
11518 if (PL_in_eval && ((errsv = ERRSV), SvCUR(errsv)))
d2560b70 11519 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
eed484f9 11520 SVfARG(errsv), OutCopFILE(PL_curcop));
c7d6bfb2
GS
11521 else
11522 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 11523 OutCopFILE(PL_curcop));
c7d6bfb2 11524 }
3280af22 11525 PL_in_my = 0;
5c284bb0 11526 PL_in_my_stash = NULL;
463ee0b2
LW
11527 return 0;
11528}
084592ab
CN
11529#ifdef __SC__
11530#pragma segment Main
11531#endif
4e35701f 11532
b250498f 11533STATIC char*
3ae08724 11534S_swallow_bom(pTHX_ U8 *s)
01ec43d0 11535{
97aff369 11536 dVAR;
f54cb97a 11537 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
11538
11539 PERL_ARGS_ASSERT_SWALLOW_BOM;
11540
7aa207d6 11541 switch (s[0]) {
4e553d73
NIS
11542 case 0xFF:
11543 if (s[1] == 0xFE) {
ee6ba15d 11544 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 11545 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 11546 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11547 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 11548#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 11549 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 11550 s += 2;
dea0fc0b 11551 if (PL_bufend > (char*)s) {
81a923f4 11552 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 11553 }
b250498f 11554#else
dcbac5bb 11555 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11556 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 11557#endif
01ec43d0
GS
11558 }
11559 break;
78ae23f5 11560 case 0xFE:
7aa207d6 11561 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 11562#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11563 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
11564 s += 2;
11565 if (PL_bufend > (char *)s) {
81a923f4 11566 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 11567 }
b250498f 11568#else
dcbac5bb 11569 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11570 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 11571#endif
01ec43d0
GS
11572 }
11573 break;
3ae08724
GS
11574 case 0xEF:
11575 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 11576 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
11577 s += 3; /* UTF-8 */
11578 }
11579 break;
11580 case 0:
7aa207d6
JH
11581 if (slen > 3) {
11582 if (s[1] == 0) {
11583 if (s[2] == 0xFE && s[3] == 0xFF) {
11584 /* UTF-32 big-endian */
dcbac5bb 11585 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11586 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
11587 }
11588 }
11589 else if (s[2] == 0 && s[3] != 0) {
11590 /* Leading bytes
11591 * 00 xx 00 xx
11592 * are a good indicator of UTF-16BE. */
ee6ba15d 11593#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11594 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
11595 s = add_utf16_textfilter(s, FALSE);
11596#else
dcbac5bb 11597 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11598 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11599#endif
7aa207d6 11600 }
01ec43d0 11601 }
e294cc5d
JH
11602#ifdef EBCDIC
11603 case 0xDD:
11604 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11605 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11606 s += 4; /* UTF-8 */
11607 }
11608 break;
11609#endif
11610
7aa207d6
JH
11611 default:
11612 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11613 /* Leading bytes
11614 * xx 00 xx 00
11615 * are a good indicator of UTF-16LE. */
ee6ba15d 11616#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11617 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 11618 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 11619#else
dcbac5bb 11620 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11621 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11622#endif
7aa207d6 11623 }
01ec43d0 11624 }
b8f84bb2 11625 return (char*)s;
b250498f 11626}
4755096e 11627
6e3aabd6
GS
11628
11629#ifndef PERL_NO_UTF16_FILTER
11630static I32
a28af015 11631S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 11632{
97aff369 11633 dVAR;
f3040f2c 11634 SV *const filter = FILTER_DATA(idx);
2a773401
NC
11635 /* We re-use this each time round, throwing the contents away before we
11636 return. */
2a773401 11637 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 11638 SV *const utf8_buffer = filter;
c28d6105 11639 IV status = IoPAGE(filter);
f2338a2e 11640 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 11641 I32 retval;
c8b0cbae 11642
c85ae797
NC
11643 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11644
c8b0cbae
NC
11645 /* As we're automatically added, at the lowest level, and hence only called
11646 from this file, we can be sure that we're not called in block mode. Hence
11647 don't bother writing code to deal with block mode. */
11648 if (maxlen) {
11649 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11650 }
c28d6105
NC
11651 if (status < 0) {
11652 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11653 }
1de9afcd 11654 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 11655 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 11656 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
11657 reverse ? 'l' : 'b', idx, maxlen, status,
11658 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11659
11660 while (1) {
11661 STRLEN chars;
11662 STRLEN have;
dea0fc0b 11663 I32 newlen;
2a773401 11664 U8 *end;
c28d6105
NC
11665 /* First, look in our buffer of existing UTF-8 data: */
11666 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11667
11668 if (nl) {
11669 ++nl;
11670 } else if (status == 0) {
11671 /* EOF */
11672 IoPAGE(filter) = 0;
11673 nl = SvEND(utf8_buffer);
11674 }
11675 if (nl) {
d2d1d4de
NC
11676 STRLEN got = nl - SvPVX(utf8_buffer);
11677 /* Did we have anything to append? */
11678 retval = got != 0;
11679 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
11680 /* Everything else in this code works just fine if SVp_POK isn't
11681 set. This, however, needs it, and we need it to work, else
11682 we loop infinitely because the buffer is never consumed. */
11683 sv_chop(utf8_buffer, nl);
11684 break;
11685 }
ba77e4cc 11686
c28d6105
NC
11687 /* OK, not a complete line there, so need to read some more UTF-16.
11688 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
11689 while (1) {
11690 if (status <= 0)
11691 break;
11692 if (SvCUR(utf16_buffer) >= 2) {
11693 /* Location of the high octet of the last complete code point.
11694 Gosh, UTF-16 is a pain. All the benefits of variable length,
11695 *coupled* with all the benefits of partial reads and
11696 endianness. */
11697 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11698 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11699
11700 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11701 break;
11702 }
11703
11704 /* We have the first half of a surrogate. Read more. */
11705 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11706 }
c28d6105 11707
c28d6105
NC
11708 status = FILTER_READ(idx + 1, utf16_buffer,
11709 160 + (SvCUR(utf16_buffer) & 1));
11710 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11711 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11712 if (status < 0) {
11713 /* Error */
11714 IoPAGE(filter) = status;
11715 return status;
11716 }
11717 }
11718
11719 chars = SvCUR(utf16_buffer) >> 1;
11720 have = SvCUR(utf8_buffer);
11721 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11722
aa6dbd60 11723 if (reverse) {
c28d6105
NC
11724 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11725 (U8*)SvPVX_const(utf8_buffer) + have,
11726 chars * 2, &newlen);
aa6dbd60 11727 } else {
2a773401 11728 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11729 (U8*)SvPVX_const(utf8_buffer) + have,
11730 chars * 2, &newlen);
2a773401 11731 }
c28d6105 11732 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11733 *end = '\0';
c28d6105 11734
e07286ed
NC
11735 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11736 it's private to us, and utf16_to_utf8{,reversed} take a
11737 (pointer,length) pair, rather than a NUL-terminated string. */
11738 if(SvCUR(utf16_buffer) & 1) {
11739 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11740 SvCUR_set(utf16_buffer, 1);
11741 } else {
11742 SvCUR_set(utf16_buffer, 0);
11743 }
2a773401 11744 }
c28d6105
NC
11745 DEBUG_P(PerlIO_printf(Perl_debug_log,
11746 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11747 status,
11748 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11749 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11750 return retval;
6e3aabd6 11751}
81a923f4
NC
11752
11753static U8 *
11754S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11755{
2a773401 11756 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11757
c85ae797
NC
11758 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11759
c28d6105 11760 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11761 sv_setpvs(filter, "");
2a773401 11762 IoLINES(filter) = reversed;
c28d6105
NC
11763 IoPAGE(filter) = 1; /* Not EOF */
11764
11765 /* Sadly, we have to return a valid pointer, come what may, so we have to
11766 ignore any error return from this. */
11767 SvCUR_set(PL_linestr, 0);
11768 if (FILTER_READ(0, PL_linestr, 0)) {
11769 SvUTF8_on(PL_linestr);
81a923f4 11770 } else {
c28d6105 11771 SvUTF8_on(PL_linestr);
81a923f4 11772 }
c28d6105 11773 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11774 return (U8*)SvPVX(PL_linestr);
11775}
6e3aabd6 11776#endif
9f4817db 11777
f333445c
JP
11778/*
11779Returns a pointer to the next character after the parsed
11780vstring, as well as updating the passed in sv.
11781
11782Function must be called like
11783
615e0a48 11784 sv = sv_2mortal(newSV(5));
65b06e02 11785 s = scan_vstring(s,e,sv);
f333445c 11786
65b06e02 11787where s and e are the start and end of the string.
f333445c
JP
11788The sv should already be large enough to store the vstring
11789passed in, for performance reasons.
11790
615e0a48
FC
11791This function may croak if fatal warnings are enabled in the
11792calling scope, hence the sv_2mortal in the example (to prevent
11793a leak). Make sure to do SvREFCNT_inc afterwards if you use
11794sv_2mortal.
11795
f333445c
JP
11796*/
11797
11798char *
15f169a1 11799Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11800{
97aff369 11801 dVAR;
bfed75c6
AL
11802 const char *pos = s;
11803 const char *start = s;
7918f24d
NC
11804
11805 PERL_ARGS_ASSERT_SCAN_VSTRING;
11806
f333445c 11807 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11808 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11809 pos++;
f333445c
JP
11810 if ( *pos != '.') {
11811 /* this may not be a v-string if followed by => */
bfed75c6 11812 const char *next = pos;
65b06e02 11813 while (next < e && isSPACE(*next))
8fc7bb1c 11814 ++next;
65b06e02 11815 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11816 /* return string not v-string */
11817 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11818 return (char *)pos;
f333445c
JP
11819 }
11820 }
11821
11822 if (!isALPHA(*pos)) {
89ebb4a3 11823 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11824
d4c19fe8
AL
11825 if (*s == 'v')
11826 s++; /* get past 'v' */
f333445c 11827
76f68e9b 11828 sv_setpvs(sv, "");
f333445c
JP
11829
11830 for (;;) {
d4c19fe8 11831 /* this is atoi() that tolerates underscores */
0bd48802
AL
11832 U8 *tmpend;
11833 UV rev = 0;
d4c19fe8
AL
11834 const char *end = pos;
11835 UV mult = 1;
11836 while (--end >= s) {
11837 if (*end != '_') {
11838 const UV orev = rev;
f333445c
JP
11839 rev += (*end - '0') * mult;
11840 mult *= 10;
9b387841 11841 if (orev > rev)
dcbac5bb 11842 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11843 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11844 "Integer overflow in decimal number");
f333445c
JP
11845 }
11846 }
11847#ifdef EBCDIC
11848 if (rev > 0x7FFFFFFF)
11849 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11850#endif
11851 /* Append native character for the rev point */
11852 tmpend = uvchr_to_utf8(tmpbuf, rev);
11853 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11854 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11855 SvUTF8_on(sv);
65b06e02 11856 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11857 s = ++pos;
11858 else {
11859 s = pos;
11860 break;
11861 }
65b06e02 11862 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11863 pos++;
11864 }
11865 SvPOK_on(sv);
11866 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11867 SvRMAGICAL_on(sv);
11868 }
73d840c0 11869 return (char *)s;
f333445c
JP
11870}
11871
88e1f1a2
JV
11872int
11873Perl_keyword_plugin_standard(pTHX_
11874 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11875{
11876 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11877 PERL_UNUSED_CONTEXT;
11878 PERL_UNUSED_ARG(keyword_ptr);
11879 PERL_UNUSED_ARG(keyword_len);
11880 PERL_UNUSED_ARG(op_ptr);
11881 return KEYWORD_PLUGIN_DECLINE;
11882}
11883
78cdf107 11884#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11885static void
78cdf107 11886S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11887{
11888 SAVEI32(PL_lex_brackets);
11889 if (PL_lex_brackets > 100)
11890 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11891 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11892 SAVEI32(PL_lex_allbrackets);
11893 PL_lex_allbrackets = 0;
11894 SAVEI8(PL_lex_fakeeof);
2dcac756 11895 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11896 if(yyparse(gramtype) && !PL_parser->error_count)
11897 qerror(Perl_mess(aTHX_ "Parse error"));
11898}
11899
78cdf107 11900#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11901static OP *
78cdf107 11902S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11903{
11904 OP *o;
11905 ENTER;
11906 SAVEVPTR(PL_eval_root);
11907 PL_eval_root = NULL;
78cdf107 11908 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11909 o = PL_eval_root;
11910 LEAVE;
11911 return o;
11912}
11913
78cdf107
Z
11914#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11915static OP *
11916S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11917{
11918 OP *exprop;
11919 if (flags & ~PARSE_OPTIONAL)
11920 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11921 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11922 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11923 if (!PL_parser->error_count)
11924 qerror(Perl_mess(aTHX_ "Parse error"));
11925 exprop = newOP(OP_NULL, 0);
11926 }
11927 return exprop;
11928}
11929
11930/*
11931=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11932
11933Parse a Perl arithmetic expression. This may contain operators of precedence
11934down to the bit shift operators. The expression must be followed (and thus
11935terminated) either by a comparison or lower-precedence operator or by
11936something that would normally terminate an expression such as semicolon.
11937If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11938otherwise it is mandatory. It is up to the caller to ensure that the
11939dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11940the source of the code to be parsed and the lexical context for the
11941expression.
11942
11943The op tree representing the expression is returned. If an optional
11944expression is absent, a null pointer is returned, otherwise the pointer
11945will be non-null.
11946
11947If an error occurs in parsing or compilation, in most cases a valid op
11948tree is returned anyway. The error is reflected in the parser state,
11949normally resulting in a single exception at the top level of parsing
11950which covers all the compilation errors that occurred. Some compilation
11951errors, however, will throw an exception immediately.
11952
11953=cut
11954*/
11955
11956OP *
11957Perl_parse_arithexpr(pTHX_ U32 flags)
11958{
11959 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11960}
11961
11962/*
11963=for apidoc Amx|OP *|parse_termexpr|U32 flags
11964
11965Parse a Perl term expression. This may contain operators of precedence
11966down to the assignment operators. The expression must be followed (and thus
11967terminated) either by a comma or lower-precedence operator or by
11968something that would normally terminate an expression such as semicolon.
11969If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11970otherwise it is mandatory. It is up to the caller to ensure that the
11971dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11972the source of the code to be parsed and the lexical context for the
11973expression.
11974
11975The op tree representing the expression is returned. If an optional
11976expression is absent, a null pointer is returned, otherwise the pointer
11977will be non-null.
11978
11979If an error occurs in parsing or compilation, in most cases a valid op
11980tree is returned anyway. The error is reflected in the parser state,
11981normally resulting in a single exception at the top level of parsing
11982which covers all the compilation errors that occurred. Some compilation
11983errors, however, will throw an exception immediately.
11984
11985=cut
11986*/
11987
11988OP *
11989Perl_parse_termexpr(pTHX_ U32 flags)
11990{
11991 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11992}
11993
11994/*
11995=for apidoc Amx|OP *|parse_listexpr|U32 flags
11996
11997Parse a Perl list expression. This may contain operators of precedence
11998down to the comma operator. The expression must be followed (and thus
11999terminated) either by a low-precedence logic operator such as C<or> or by
12000something that would normally terminate an expression such as semicolon.
12001If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
12002otherwise it is mandatory. It is up to the caller to ensure that the
12003dynamic parser state (L</PL_parser> et al) is correctly set to reflect
12004the source of the code to be parsed and the lexical context for the
12005expression.
12006
12007The op tree representing the expression is returned. If an optional
12008expression is absent, a null pointer is returned, otherwise the pointer
12009will be non-null.
12010
12011If an error occurs in parsing or compilation, in most cases a valid op
12012tree is returned anyway. The error is reflected in the parser state,
12013normally resulting in a single exception at the top level of parsing
12014which covers all the compilation errors that occurred. Some compilation
12015errors, however, will throw an exception immediately.
12016
12017=cut
12018*/
12019
12020OP *
12021Perl_parse_listexpr(pTHX_ U32 flags)
12022{
12023 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
12024}
12025
12026/*
12027=for apidoc Amx|OP *|parse_fullexpr|U32 flags
12028
12029Parse a single complete Perl expression. This allows the full
12030expression grammar, including the lowest-precedence operators such
12031as C<or>. The expression must be followed (and thus terminated) by a
12032token that an expression would normally be terminated by: end-of-file,
12033closing bracketing punctuation, semicolon, or one of the keywords that
12034signals a postfix expression-statement modifier. If I<flags> includes
12035C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
12036mandatory. It is up to the caller to ensure that the dynamic parser
12037state (L</PL_parser> et al) is correctly set to reflect the source of
12038the code to be parsed and the lexical context for the expression.
12039
12040The op tree representing the expression is returned. If an optional
12041expression is absent, a null pointer is returned, otherwise the pointer
12042will be non-null.
12043
12044If an error occurs in parsing or compilation, in most cases a valid op
12045tree is returned anyway. The error is reflected in the parser state,
12046normally resulting in a single exception at the top level of parsing
12047which covers all the compilation errors that occurred. Some compilation
12048errors, however, will throw an exception immediately.
12049
12050=cut
12051*/
12052
12053OP *
12054Perl_parse_fullexpr(pTHX_ U32 flags)
12055{
12056 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
12057}
12058
e53d8f76
Z
12059/*
12060=for apidoc Amx|OP *|parse_block|U32 flags
12061
12062Parse a single complete Perl code block. This consists of an opening
12063brace, a sequence of statements, and a closing brace. The block
12064constitutes a lexical scope, so C<my> variables and various compile-time
12065effects can be contained within it. It is up to the caller to ensure
12066that the dynamic parser state (L</PL_parser> et al) is correctly set to
12067reflect the source of the code to be parsed and the lexical context for
12068the statement.
12069
12070The op tree representing the code block is returned. This is always a
12071real op, never a null pointer. It will normally be a C<lineseq> list,
12072including C<nextstate> or equivalent ops. No ops to construct any kind
12073of runtime scope are included by virtue of it being a block.
12074
12075If an error occurs in parsing or compilation, in most cases a valid op
12076tree (most likely null) is returned anyway. The error is reflected in
12077the parser state, normally resulting in a single exception at the top
12078level of parsing which covers all the compilation errors that occurred.
12079Some compilation errors, however, will throw an exception immediately.
12080
12081The I<flags> parameter is reserved for future use, and must always
12082be zero.
12083
12084=cut
12085*/
12086
12087OP *
12088Perl_parse_block(pTHX_ U32 flags)
12089{
12090 if (flags)
12091 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 12092 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
12093}
12094
1da4ca5f 12095/*
8359b381
Z
12096=for apidoc Amx|OP *|parse_barestmt|U32 flags
12097
12098Parse a single unadorned Perl statement. This may be a normal imperative
12099statement or a declaration that has compile-time effect. It does not
12100include any label or other affixture. It is up to the caller to ensure
12101that the dynamic parser state (L</PL_parser> et al) is correctly set to
12102reflect the source of the code to be parsed and the lexical context for
12103the statement.
12104
12105The op tree representing the statement is returned. This may be a
12106null pointer if the statement is null, for example if it was actually
12107a subroutine definition (which has compile-time side effects). If not
12108null, it will be ops directly implementing the statement, suitable to
12109pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
12110equivalent op (except for those embedded in a scope contained entirely
12111within the statement).
12112
12113If an error occurs in parsing or compilation, in most cases a valid op
12114tree (most likely null) is returned anyway. The error is reflected in
12115the parser state, normally resulting in a single exception at the top
12116level of parsing which covers all the compilation errors that occurred.
12117Some compilation errors, however, will throw an exception immediately.
12118
12119The I<flags> parameter is reserved for future use, and must always
12120be zero.
12121
12122=cut
12123*/
12124
12125OP *
12126Perl_parse_barestmt(pTHX_ U32 flags)
12127{
12128 if (flags)
12129 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 12130 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
12131}
12132
12133/*
361d9b55
Z
12134=for apidoc Amx|SV *|parse_label|U32 flags
12135
12136Parse a single label, possibly optional, of the type that may prefix a
12137Perl statement. It is up to the caller to ensure that the dynamic parser
12138state (L</PL_parser> et al) is correctly set to reflect the source of
12139the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
12140label is optional, otherwise it is mandatory.
12141
12142The name of the label is returned in the form of a fresh scalar. If an
12143optional label is absent, a null pointer is returned.
12144
12145If an error occurs in parsing, which can only occur if the label is
12146mandatory, a valid label is returned anyway. The error is reflected in
12147the parser state, normally resulting in a single exception at the top
12148level of parsing which covers all the compilation errors that occurred.
12149
12150=cut
12151*/
12152
12153SV *
12154Perl_parse_label(pTHX_ U32 flags)
12155{
12156 if (flags & ~PARSE_OPTIONAL)
12157 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
12158 if (PL_lex_state == LEX_KNOWNEXT) {
12159 PL_parser->yychar = yylex();
12160 if (PL_parser->yychar == LABEL) {
5504e6cf
FC
12161 char * const lpv = pl_yylval.pval;
12162 STRLEN llen = strlen(lpv);
361d9b55 12163 PL_parser->yychar = YYEMPTY;
5504e6cf 12164 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
361d9b55
Z
12165 } else {
12166 yyunlex();
12167 goto no_label;
12168 }
12169 } else {
12170 char *s, *t;
361d9b55
Z
12171 STRLEN wlen, bufptr_pos;
12172 lex_read_space(0);
12173 t = s = PL_bufptr;
5db1eb8d 12174 if (!isIDFIRST_lazy_if(s, UTF))
361d9b55 12175 goto no_label;
5db1eb8d 12176 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
361d9b55
Z
12177 if (word_takes_any_delimeter(s, wlen))
12178 goto no_label;
12179 bufptr_pos = s - SvPVX(PL_linestr);
12180 PL_bufptr = t;
12181 lex_read_space(LEX_KEEP_PREVIOUS);
12182 t = PL_bufptr;
12183 s = SvPVX(PL_linestr) + bufptr_pos;
12184 if (t[0] == ':' && t[1] != ':') {
12185 PL_oldoldbufptr = PL_oldbufptr;
12186 PL_oldbufptr = s;
12187 PL_bufptr = t+1;
5db1eb8d 12188 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
361d9b55
Z
12189 } else {
12190 PL_bufptr = s;
12191 no_label:
12192 if (flags & PARSE_OPTIONAL) {
12193 return NULL;
12194 } else {
12195 qerror(Perl_mess(aTHX_ "Parse error"));
12196 return newSVpvs("x");
12197 }
12198 }
12199 }
12200}
12201
12202/*
28ac2b49
Z
12203=for apidoc Amx|OP *|parse_fullstmt|U32 flags
12204
12205Parse a single complete Perl statement. This may be a normal imperative
8359b381 12206statement or a declaration that has compile-time effect, and may include
8e720305 12207optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
12208parser state (L</PL_parser> et al) is correctly set to reflect the source
12209of the code to be parsed and the lexical context for the statement.
12210
12211The op tree representing the statement is returned. This may be a
12212null pointer if the statement is null, for example if it was actually
12213a subroutine definition (which has compile-time side effects). If not
12214null, it will be the result of a L</newSTATEOP> call, normally including
12215a C<nextstate> or equivalent op.
12216
12217If an error occurs in parsing or compilation, in most cases a valid op
12218tree (most likely null) is returned anyway. The error is reflected in
12219the parser state, normally resulting in a single exception at the top
12220level of parsing which covers all the compilation errors that occurred.
12221Some compilation errors, however, will throw an exception immediately.
12222
12223The I<flags> parameter is reserved for future use, and must always
12224be zero.
12225
12226=cut
12227*/
12228
12229OP *
12230Perl_parse_fullstmt(pTHX_ U32 flags)
12231{
28ac2b49
Z
12232 if (flags)
12233 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 12234 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
12235}
12236
07ffcb73
Z
12237/*
12238=for apidoc Amx|OP *|parse_stmtseq|U32 flags
12239
12240Parse a sequence of zero or more Perl statements. These may be normal
12241imperative statements, including optional labels, or declarations
12242that have compile-time effect, or any mixture thereof. The statement
12243sequence ends when a closing brace or end-of-file is encountered in a
12244place where a new statement could have validly started. It is up to
12245the caller to ensure that the dynamic parser state (L</PL_parser> et al)
12246is correctly set to reflect the source of the code to be parsed and the
12247lexical context for the statements.
12248
12249The op tree representing the statement sequence is returned. This may
12250be a null pointer if the statements were all null, for example if there
12251were no statements or if there were only subroutine definitions (which
12252have compile-time side effects). If not null, it will be a C<lineseq>
12253list, normally including C<nextstate> or equivalent ops.
12254
12255If an error occurs in parsing or compilation, in most cases a valid op
12256tree is returned anyway. The error is reflected in the parser state,
12257normally resulting in a single exception at the top level of parsing
12258which covers all the compilation errors that occurred. Some compilation
12259errors, however, will throw an exception immediately.
12260
12261The I<flags> parameter is reserved for future use, and must always
12262be zero.
12263
12264=cut
12265*/
12266
12267OP *
12268Perl_parse_stmtseq(pTHX_ U32 flags)
12269{
12270 OP *stmtseqop;
e53d8f76 12271 I32 c;
07ffcb73 12272 if (flags)
78cdf107
Z
12273 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12274 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
12275 c = lex_peek_unichar(0);
12276 if (c != -1 && c != /*{*/'}')
07ffcb73 12277 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
12278 return stmtseqop;
12279}
12280
28ac2b49 12281/*
1da4ca5f
NC
12282 * Local variables:
12283 * c-indentation-style: bsd
12284 * c-basic-offset: 4
14d04a33 12285 * indent-tabs-mode: nil
1da4ca5f
NC
12286 * End:
12287 *
14d04a33 12288 * ex: set ts=8 sts=4 sw=4 et:
37442d52 12289 */