This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl5db] Refactor the LineInfo function.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
199e78b7
DM
69#define PL_preambled (PL_parser->preambled)
70#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 71#define PL_linestr (PL_parser->linestr)
c2598295
DM
72#define PL_expect (PL_parser->expect)
73#define PL_copline (PL_parser->copline)
f06b5848
DM
74#define PL_bufptr (PL_parser->bufptr)
75#define PL_oldbufptr (PL_parser->oldbufptr)
76#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77#define PL_linestart (PL_parser->linestart)
78#define PL_bufend (PL_parser->bufend)
79#define PL_last_uni (PL_parser->last_uni)
80#define PL_last_lop (PL_parser->last_lop)
81#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 82#define PL_lex_state (PL_parser->lex_state)
2f9285f8 83#define PL_rsfp (PL_parser->rsfp)
5486870f 84#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
85#define PL_in_my (PL_parser->in_my)
86#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 87#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 88#define PL_multi_end (PL_parser->multi_end)
13765c85 89#define PL_error_count (PL_parser->error_count)
199e78b7
DM
90
91#ifdef PERL_MAD
92# define PL_endwhite (PL_parser->endwhite)
93# define PL_faketokens (PL_parser->faketokens)
94# define PL_lasttoke (PL_parser->lasttoke)
95# define PL_nextwhite (PL_parser->nextwhite)
96# define PL_realtokenstart (PL_parser->realtokenstart)
97# define PL_skipwhite (PL_parser->skipwhite)
98# define PL_thisclose (PL_parser->thisclose)
99# define PL_thismad (PL_parser->thismad)
100# define PL_thisopen (PL_parser->thisopen)
101# define PL_thisstuff (PL_parser->thisstuff)
102# define PL_thistoken (PL_parser->thistoken)
103# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_nexttoke (PL_parser->nexttoke)
106# define PL_curforce (PL_parser->curforce)
107#else
108# define PL_nexttoke (PL_parser->nexttoke)
109# define PL_nexttype (PL_parser->nexttype)
110# define PL_nextval (PL_parser->nextval)
199e78b7
DM
111#endif
112
0bd48802 113static const char ident_too_long[] = "Identifier too long";
8903cb82 114
29595ff2 115#ifdef PERL_MAD
29595ff2 116# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 117# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 118#else
5db06880 119# define CURMAD(slot,sv)
9ded7720 120# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
121#endif
122
a7aaec61
Z
123#define XENUMMASK 0x3f
124#define XFAKEEOF 0x40
125#define XFAKEBRACK 0x80
9059aa12 126
39e02b42
JH
127#ifdef USE_UTF8_SCRIPTS
128# define UTF (!IN_BYTES)
2b9d42f0 129#else
802a15e9 130# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 131#endif
a0ed51b3 132
b1fc3636
CJ
133/* The maximum number of characters preceding the unrecognized one to display */
134#define UNRECOGNIZED_PRECEDE_COUNT 10
135
61f0cdd9 136/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
137 * 1999-02-27 mjd-perl-patch@plover.com */
138#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139
bf4acbe4 140#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 141
ffb4593c
NT
142/* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
79072805 144 * can get by with a single comparison (if the compiler is smart enough).
9da1dd8f
DM
145 *
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
79072805
LW
148 */
149
fb73857a 150/* #define LEX_NOTPARSING 11 is done in perl.h. */
151
b6007c36
DM
152#define LEX_NORMAL 10 /* normal code (ie not within "...") */
153#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156#define LEX_INTERPSTART 6 /* expecting the start of a $var */
157
158 /* at end of code, eg "$x" followed by: */
159#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161
162#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164#define LEX_INTERPCONST 2 /* NOT USED */
165#define LEX_FORMLINE 1 /* expecting a format line */
166#define LEX_KNOWNEXT 0 /* next token known; just return it */
167
79072805 168
bbf60fe6 169#ifdef DEBUGGING
27da23d5 170static const char* const lex_state_names[] = {
bbf60fe6
DM
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
79072805
LW
185#ifdef ff_next
186#undef ff_next
d48672a2
LW
187#endif
188
79072805 189#include "keywords.h"
fe14fcc3 190
ffb4593c
NT
191/* CLINE is a macro that ensures PL_copline has a sane value */
192
ae986130
LW
193#ifdef CLINE
194#undef CLINE
195#endif
57843af0 196#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 197
5db06880 198#ifdef PERL_MAD
29595ff2
NC
199# define SKIPSPACE0(s) skipspace0(s)
200# define SKIPSPACE1(s) skipspace1(s)
201# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202# define PEEKSPACE(s) skipspace2(s,0)
203#else
204# define SKIPSPACE0(s) skipspace(s)
205# define SKIPSPACE1(s) skipspace(s)
206# define SKIPSPACE2(s,tsv) skipspace(s)
207# define PEEKSPACE(s) skipspace(s)
208#endif
209
ffb4593c
NT
210/*
211 * Convenience functions to return different tokens and prime the
9cbb5ea2 212 * lexer for the next token. They all take an argument.
ffb4593c
NT
213 *
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
7eb971ee 224 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 255#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
d68ce4ac 272#define UNI3(f,x,have_x) { \
6154021b 273 pl_yylval.ival = f; \
d68ce4ac 274 if (have_x) PL_expect = x; \
376fcdbf
AL
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
d68ce4ac
FC
283#define UNI(f) UNI3(f,XTERM,1)
284#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
b5fb7ce3
FC
285#define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
287 OPERATOR(f); \
288 }
a687059c 289
d68ce4ac 290#define UNIBRACK(f) UNI3(f,0,0)
79072805 291
9f68db38 292/* grandfather return to old style */
78cdf107
Z
293#define OLDLOP(f) \
294 do { \
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
298 PL_expect = XTERM; \
299 PL_bufptr = s; \
300 return (int)LSTOP; \
301 } while(0)
79072805 302
83944c01
FC
303#define COPLINE_INC_WITH_HERELINES \
304 STMT_START { \
305 CopLINE_inc(PL_curcop); \
d794b522
FC
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
83944c01
FC
309 } STMT_END
310
311
8fa7f367
JH
312#ifdef DEBUGGING
313
6154021b 314/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
315enum token_type {
316 TOKENTYPE_NONE,
317 TOKENTYPE_IVAL,
6154021b 318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6 319 TOKENTYPE_PVAL,
aeaef349 320 TOKENTYPE_OPVAL
bbf60fe6
DM
321};
322
6d4a66ac
NC
323static struct debug_tokens {
324 const int token;
325 enum token_type type;
326 const char *name;
327} const debug_tokens[] =
9041c2e3 328{
bbf60fe6
DM
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
705fe0e5
FC
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
bbf60fe6
DM
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
5504e6cf 361 { LABEL, TOKENTYPE_PVAL, "LABEL" },
bbf60fe6
DM
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
bbf60fe6
DM
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
f3f204dc 375 { PEG, TOKENTYPE_NONE, "PEG" },
88e1f1a2
JV
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
f3f204dc 385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
bbf60fe6
DM
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
f3f204dc 388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
bbf60fe6
DM
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 402 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
403};
404
6154021b 405/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 406
bbf60fe6 407STATIC int
704d4215 408S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 409{
97aff369 410 dVAR;
7918f24d
NC
411
412 PERL_ARGS_ASSERT_TOKEREPORT;
413
bbf60fe6 414 if (DEBUG_T_TEST) {
bd61b366 415 const char *name = NULL;
bbf60fe6 416 enum token_type type = TOKENTYPE_NONE;
f54cb97a 417 const struct debug_tokens *p;
396482e1 418 SV* const report = newSVpvs("<== ");
bbf60fe6 419
f54cb97a 420 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
421 if (p->token == (int)rv) {
422 name = p->name;
423 type = p->type;
424 break;
425 }
426 }
427 if (name)
54667de8 428 Perl_sv_catpv(aTHX_ report, name);
74736ae6 429 else if ((char)rv > ' ' && (char)rv <= '~')
bbf60fe6
DM
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
431 else if (!rv)
396482e1 432 sv_catpvs(report, "EOF");
bbf60fe6
DM
433 else
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
435 switch (type) {
436 case TOKENTYPE_NONE:
bbf60fe6
DM
437 break;
438 case TOKENTYPE_IVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 443 PL_op_name[lvalp->ival]);
bbf60fe6
DM
444 break;
445 case TOKENTYPE_PVAL:
704d4215 446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
447 break;
448 case TOKENTYPE_OPVAL:
704d4215 449 if (lvalp->opval) {
401441c0 450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 453 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 454 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
455 }
456
457 }
401441c0 458 else
396482e1 459 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
460 break;
461 }
b6007c36 462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
463 };
464 return (int)rv;
998054bd
SC
465}
466
b6007c36
DM
467
468/* print the buffer with suitable escapes */
469
470STATIC void
15f169a1 471S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 472{
396482e1 473 SV* const tmp = newSVpvs("");
7918f24d
NC
474
475 PERL_ARGS_ASSERT_PRINTBUF;
476
b6007c36
DM
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
478 SvREFCNT_dec(tmp);
479}
480
8fa7f367
JH
481#endif
482
8290c323
NC
483static int
484S_deprecate_commaless_var_list(pTHX) {
485 PL_expect = XTERM;
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
488}
489
ffb4593c
NT
490/*
491 * S_ao
492 *
c963b151
BD
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
495 */
496
76e3520e 497STATIC int
cea2e8a9 498S_ao(pTHX_ int toketype)
a0d0e21e 499{
97aff369 500 dVAR;
3280af22
NIS
501 if (*PL_bufptr == '=') {
502 PL_bufptr++;
a0d0e21e 503 if (toketype == ANDAND)
6154021b 504 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 505 else if (toketype == OROR)
6154021b 506 pl_yylval.ival = OP_ORASSIGN;
c963b151 507 else if (toketype == DORDOR)
6154021b 508 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
509 toketype = ASSIGNOP;
510 }
511 return toketype;
512}
513
ffb4593c
NT
514/*
515 * S_no_op
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
525 */
526
76e3520e 527STATIC void
15f169a1 528S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 529{
97aff369 530 dVAR;
9d4ba2ae
AL
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 533
7918f24d
NC
534 PERL_ARGS_ASSERT_NO_OP;
535
1189a94a
GS
536 if (!s)
537 s = oldbp;
07c798fb 538 else
1189a94a 539 PL_bufptr = s;
734ab321 540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
56da5a46
RGS
541 if (ckWARN_d(WARN_SYNTAX)) {
542 if (is_first)
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 546 const char *t;
734ab321
BF
547 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
548 t += UTF ? UTF8SKIP(t) : 1)
c35e046a 549 NOOP;
56da5a46
RGS
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46
RGS
555 }
556 else {
557 assert(s >= oldbp);
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46 562 }
07c798fb 563 }
3280af22 564 PL_bufptr = oldbp;
8990e307
LW
565}
566
ffb4593c
NT
567/*
568 * S_missingterm
569 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 570 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
573 * This is fatal.
574 */
575
76e3520e 576STATIC void
cea2e8a9 577S_missingterm(pTHX_ char *s)
8990e307 578{
97aff369 579 dVAR;
8990e307
LW
580 char tmpbuf[3];
581 char q;
582 if (s) {
9d4ba2ae 583 char * const nl = strrchr(s,'\n');
d2719217 584 if (nl)
8990e307
LW
585 *nl = '\0';
586 }
463559e7 587 else if (isCNTRL(PL_multi_close)) {
8990e307 588 *tmpbuf = '^';
585ec06d 589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
590 tmpbuf[2] = '\0';
591 s = tmpbuf;
592 }
593 else {
eb160463 594 *tmpbuf = (char)PL_multi_close;
8990e307
LW
595 tmpbuf[1] = '\0';
596 s = tmpbuf;
597 }
598 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 600}
79072805 601
dd0ac2b9
FC
602#include "feature.h"
603
0d863452 604/*
0d863452
RH
605 * Check whether the named feature is enabled.
606 */
26ea9e12 607bool
3fff3427 608Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 609{
97aff369 610 dVAR;
4a731d7b 611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
612
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
614
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 616
26ea9e12
NC
617 if (namelen > MAX_FEATURE_LEN)
618 return FALSE;
3fff3427 619 memcpy(&he_name[8], name, namelen);
7d69d4a6 620
c8ca97b0
NC
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
0d863452
RH
623}
624
ffb4593c 625/*
9cbb5ea2
GS
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
ffb4593c
NT
628 */
629
c39cd008
GS
630#ifdef PERL_CR_FILTER
631static void
632strip_return(SV *sv)
633{
eb578fdb
KW
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
7918f24d
NC
636
637 PERL_ARGS_ASSERT_STRIP_RETURN;
638
c39cd008
GS
639 /* outer loop optimized to do nothing if there are no CR-LFs */
640 while (s < e) {
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
eb578fdb 643 char *d = s - 1;
c39cd008
GS
644 *d++ = *s++;
645 while (s < e) {
646 if (*s == '\r' && s[1] == '\n')
647 s++;
648 *d++ = *s++;
649 }
650 SvCUR(sv) -= s - d;
651 return;
652 }
653 }
654}
a868473f 655
76e3520e 656STATIC I32
c39cd008 657S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 658{
f54cb97a 659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
660 if (count > 0 && !maxlen)
661 strip_return(sv);
662 return count;
a868473f
NIS
663}
664#endif
665
ffb4593c 666/*
8eaa0acf
Z
667=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
668
669Creates and initialises a new lexer/parser state object, supplying
670a context in which to lex and parse from a new source of Perl code.
671A pointer to the new state object is placed in L</PL_parser>. An entry
672is made on the save stack so that upon unwinding the new state object
673will be destroyed and the former value of L</PL_parser> will be restored.
674Nothing else need be done to clean up the parsing context.
675
676The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677non-null, provides a string (in SV form) containing code to be parsed.
678A copy of the string is made, so subsequent modification of I<line>
679does not affect parsing. I<rsfp>, if non-null, provides an input stream
680from which code will be read to be parsed. If both are non-null, the
681code in I<line> comes first and must consist of complete lines of input,
682and I<rsfp> supplies the remainder of the source.
683
e368b3bd
FC
684The I<flags> parameter is reserved for future use. Currently it is only
685used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
686
687=cut
688*/
ffb4593c 689
27fcb6ee 690/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
87606032
NC
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
27fcb6ee 698
a0d0e21e 699void
8eaa0acf 700Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 701{
97aff369 702 dVAR;
6ef55633 703 const char *s = NULL;
5486870f 704 yy_parser *parser, *oparser;
60d63348 705 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
707
708 /* create and initialise a parser */
709
199e78b7 710 Newxz(parser, 1, yy_parser);
5486870f 711 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
712 PL_parser = parser;
713
28ac2b49
Z
714 parser->stack = NULL;
715 parser->ps = NULL;
716 parser->stack_size = 0;
acdf0a21 717
e3abe207
DM
718 /* on scope exit, free this parser and restore any outer one */
719 SAVEPARSER(parser);
7c4baf47 720 parser->saved_curcop = PL_curcop;
e3abe207 721
acdf0a21 722 /* initialise lexer state */
8990e307 723
fb205e7a
DM
724#ifdef PERL_MAD
725 parser->curforce = -1;
726#else
727 parser->nexttoke = 0;
728#endif
ca4cfd28 729 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 730 parser->copline = NOLINE;
5afb0a62 731 parser->lex_state = LEX_NORMAL;
c2598295 732 parser->expect = XSTATE;
2f9285f8 733 parser->rsfp = rsfp;
27fcb6ee
FC
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
736 ? NULL
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
741 ));
2f9285f8 742
199e78b7
DM
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
d794b522 746 Newxz(parser->lex_shared, 1, LEXSHARED);
02b34bbe 747
10efb74f 748 if (line) {
0528fd32 749 STRLEN len;
10efb74f 750 s = SvPV_const(line, len);
0abcdfa4
FC
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
11076590 754 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
755 } else {
756 parser->linestr = newSVpvs("\n;");
8990e307 757 }
f06b5848
DM
758 parser->oldoldbufptr =
759 parser->oldbufptr =
760 parser->bufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
87606032
NC
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
737c24fc 766
60d63348 767 parser->in_pod = parser->filtered = 0;
79072805 768}
a687059c 769
e3abe207
DM
770
771/* delete a parser object */
772
773void
774Perl_parser_free(pTHX_ const yy_parser *parser)
775{
3ce3dcd9
FC
776#ifdef PERL_MAD
777 I32 nexttoke = parser->lasttoke;
778#else
779 I32 nexttoke = parser->nexttoke;
780#endif
781
7918f24d
NC
782 PERL_ARGS_ASSERT_PARSER_FREE;
783
7c4baf47 784 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
785 SvREFCNT_dec(parser->linestr);
786
87606032 787 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
2f9285f8 788 PerlIO_clearerr(parser->rsfp);
799361c3
SH
789 else if (parser->rsfp && (!parser->old_parser ||
790 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 791 PerlIO_close(parser->rsfp);
5486870f 792 SvREFCNT_dec(parser->rsfp_filters);
10002bc1
FC
793 SvREFCNT_dec(parser->lex_stuff);
794 SvREFCNT_dec(parser->sublex_info.repl);
3ce3dcd9
FC
795 while (nexttoke--) {
796#ifdef PERL_MAD
797 if (S_is_opval_token(parser->nexttoke[nexttoke].next_type
798 & 0xffff))
799 op_free(parser->nexttoke[nexttoke].next_val.opval);
800#else
801 if (S_is_opval_token(parser->nexttype[nexttoke] & 0xffff))
802 op_free(parser->nextval[nexttoke].opval);
803#endif
804 }
2f9285f8 805
e3abe207
DM
806 Safefree(parser->lex_brackstack);
807 Safefree(parser->lex_casestack);
d794b522 808 Safefree(parser->lex_shared);
e3abe207
DM
809 PL_parser = parser->old_parser;
810 Safefree(parser);
811}
812
813
ffb4593c 814/*
f0e67a1d
Z
815=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
816
817Buffer scalar containing the chunk currently under consideration of the
818text currently being lexed. This is always a plain string scalar (for
819which C<SvPOK> is true). It is not intended to be used as a scalar by
820normal scalar means; instead refer to the buffer directly by the pointer
821variables described below.
822
823The lexer maintains various C<char*> pointers to things in the
824C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
825reallocated, all of these pointers must be updated. Don't attempt to
826do this manually, but rather use L</lex_grow_linestr> if you need to
827reallocate the buffer.
828
829The content of the text chunk in the buffer is commonly exactly one
830complete line of input, up to and including a newline terminator,
831but there are situations where it is otherwise. The octets of the
832buffer may be intended to be interpreted as either UTF-8 or Latin-1.
833The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
834flag on this scalar, which may disagree with it.
835
836For direct examination of the buffer, the variable
837L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
838lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
839of these pointers is usually preferable to examination of the scalar
840through normal scalar means.
841
842=for apidoc AmxU|char *|PL_parser-E<gt>bufend
843
844Direct pointer to the end of the chunk of text currently being lexed, the
845end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
846+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
847always located at the end of the buffer, and does not count as part of
848the buffer's contents.
849
850=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
851
852Points to the current position of lexing inside the lexer buffer.
853Characters around this point may be freely examined, within
854the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
855L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
856interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
857
858Lexing code (whether in the Perl core or not) moves this pointer past
859the characters that it consumes. It is also expected to perform some
860bookkeeping whenever a newline character is consumed. This movement
861can be more conveniently performed by the function L</lex_read_to>,
862which handles newlines appropriately.
863
864Interpretation of the buffer's octets can be abstracted out by
865using the slightly higher-level functions L</lex_peek_unichar> and
866L</lex_read_unichar>.
867
868=for apidoc AmxU|char *|PL_parser-E<gt>linestart
869
870Points to the start of the current line inside the lexer buffer.
871This is useful for indicating at which column an error occurred, and
872not much else. This must be updated by any lexing code that consumes
873a newline; the function L</lex_read_to> handles this detail.
874
875=cut
876*/
877
878/*
879=for apidoc Amx|bool|lex_bufutf8
880
881Indicates whether the octets in the lexer buffer
882(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
883of Unicode characters. If not, they should be interpreted as Latin-1
884characters. This is analogous to the C<SvUTF8> flag for scalars.
885
886In UTF-8 mode, it is not guaranteed that the lexer buffer actually
887contains valid UTF-8. Lexing code must be robust in the face of invalid
888encoding.
889
890The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
891is significant, but not the whole story regarding the input character
892encoding. Normally, when a file is being read, the scalar contains octets
893and its C<SvUTF8> flag is off, but the octets should be interpreted as
894UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
895however, the scalar may have the C<SvUTF8> flag on, and in this case its
896octets should be interpreted as UTF-8 unless the C<use bytes> pragma
897is in effect. This logic may change in the future; use this function
898instead of implementing the logic yourself.
899
900=cut
901*/
902
903bool
904Perl_lex_bufutf8(pTHX)
905{
906 return UTF;
907}
908
909/*
910=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
911
912Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
913at least I<len> octets (including terminating NUL). Returns a
914pointer to the reallocated buffer. This is necessary before making
915any direct modification of the buffer that would increase its length.
916L</lex_stuff_pvn> provides a more convenient way to insert text into
917the buffer.
918
919Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
920this function updates all of the lexer's variables that point directly
921into the buffer.
922
923=cut
924*/
925
926char *
927Perl_lex_grow_linestr(pTHX_ STRLEN len)
928{
929 SV *linestr;
930 char *buf;
931 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
c7641931 932 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
f0e67a1d
Z
933 linestr = PL_parser->linestr;
934 buf = SvPVX(linestr);
935 if (len <= SvLEN(linestr))
936 return buf;
937 bufend_pos = PL_parser->bufend - buf;
938 bufptr_pos = PL_parser->bufptr - buf;
939 oldbufptr_pos = PL_parser->oldbufptr - buf;
940 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
941 linestart_pos = PL_parser->linestart - buf;
942 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
943 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3328ab5a
FC
944 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
945 PL_parser->lex_shared->re_eval_start - buf : 0;
c7641931 946
f0e67a1d 947 buf = sv_grow(linestr, len);
c7641931 948
f0e67a1d
Z
949 PL_parser->bufend = buf + bufend_pos;
950 PL_parser->bufptr = buf + bufptr_pos;
951 PL_parser->oldbufptr = buf + oldbufptr_pos;
952 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
953 PL_parser->linestart = buf + linestart_pos;
954 if (PL_parser->last_uni)
955 PL_parser->last_uni = buf + last_uni_pos;
956 if (PL_parser->last_lop)
957 PL_parser->last_lop = buf + last_lop_pos;
3328ab5a
FC
958 if (PL_parser->lex_shared->re_eval_start)
959 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
f0e67a1d
Z
960 return buf;
961}
962
963/*
83aa740e 964=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
965
966Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
967immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
968reallocating the buffer if necessary. This means that lexing code that
969runs later will see the characters as if they had appeared in the input.
970It is not recommended to do this as part of normal parsing, and most
971uses of this facility run the risk of the inserted characters being
972interpreted in an unintended manner.
973
974The string to be inserted is represented by I<len> octets starting
975at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
976according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
977The characters are recoded for the lexer buffer, according to how the
978buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 979to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
980function is more convenient.
981
982=cut
983*/
984
985void
83aa740e 986Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 987{
749123ff 988 dVAR;
f0e67a1d
Z
989 char *bufptr;
990 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
991 if (flags & ~(LEX_STUFF_UTF8))
992 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
993 if (UTF) {
994 if (flags & LEX_STUFF_UTF8) {
995 goto plain_copy;
996 } else {
54d004e8 997 STRLEN highhalf = 0; /* Count of variants */
83aa740e 998 const char *p, *e = pv+len;
54d004e8
KW
999 for (p = pv; p != e; p++) {
1000 if (! UTF8_IS_INVARIANT(*p)) {
1001 highhalf++;
1002 }
1003 }
f0e67a1d
Z
1004 if (!highhalf)
1005 goto plain_copy;
1006 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
1007 bufptr = PL_parser->bufptr;
1008 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1009 SvCUR_set(PL_parser->linestr,
1010 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
1011 PL_parser->bufend += len+highhalf;
1012 for (p = pv; p != e; p++) {
1013 U8 c = (U8)*p;
54d004e8
KW
1014 if (! UTF8_IS_INVARIANT(c)) {
1015 *bufptr++ = UTF8_TWO_BYTE_HI(c);
1016 *bufptr++ = UTF8_TWO_BYTE_LO(c);
f0e67a1d
Z
1017 } else {
1018 *bufptr++ = (char)c;
1019 }
1020 }
1021 }
1022 } else {
1023 if (flags & LEX_STUFF_UTF8) {
1024 STRLEN highhalf = 0;
83aa740e 1025 const char *p, *e = pv+len;
f0e67a1d
Z
1026 for (p = pv; p != e; p++) {
1027 U8 c = (U8)*p;
54d004e8 1028 if (UTF8_IS_ABOVE_LATIN1(c)) {
f0e67a1d
Z
1029 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1030 "non-Latin-1 character into Latin-1 input");
54d004e8 1031 } else if (UTF8_IS_NEXT_CHAR_DOWNGRADEABLE(p, e)) {
f0e67a1d
Z
1032 p++;
1033 highhalf++;
54d004e8 1034 } else if (! UTF8_IS_INVARIANT(c)) {
f0e67a1d
Z
1035 /* malformed UTF-8 */
1036 ENTER;
1037 SAVESPTR(PL_warnhook);
1038 PL_warnhook = PERL_WARNHOOK_FATAL;
1039 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1040 LEAVE;
1041 }
1042 }
1043 if (!highhalf)
1044 goto plain_copy;
1045 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1046 bufptr = PL_parser->bufptr;
1047 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1048 SvCUR_set(PL_parser->linestr,
1049 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d 1050 PL_parser->bufend += len-highhalf;
54d004e8
KW
1051 p = pv;
1052 while (p < e) {
1053 if (UTF8_IS_INVARIANT(*p)) {
1054 *bufptr++ = *p;
1055 p++;
f0e67a1d 1056 }
54d004e8
KW
1057 else {
1058 assert(p < e -1 );
1059 *bufptr++ = TWO_BYTE_UTF8_TO_UNI(*p, *(p+1));
1060 p += 2;
1061 }
f0e67a1d
Z
1062 }
1063 } else {
54d004e8 1064 plain_copy:
f0e67a1d
Z
1065 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1066 bufptr = PL_parser->bufptr;
1067 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1068 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1069 PL_parser->bufend += len;
1070 Copy(pv, bufptr, len, char);
1071 }
1072 }
1073}
1074
1075/*
9dcc53ea
Z
1076=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1077
1078Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1079immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1080reallocating the buffer if necessary. This means that lexing code that
1081runs later will see the characters as if they had appeared in the input.
1082It is not recommended to do this as part of normal parsing, and most
1083uses of this facility run the risk of the inserted characters being
1084interpreted in an unintended manner.
1085
1086The string to be inserted is represented by octets starting at I<pv>
1087and continuing to the first nul. These octets are interpreted as either
1088UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1089in I<flags>. The characters are recoded for the lexer buffer, according
1090to how the buffer is currently being interpreted (L</lex_bufutf8>).
1091If it is not convenient to nul-terminate a string to be inserted, the
1092L</lex_stuff_pvn> function is more appropriate.
1093
1094=cut
1095*/
1096
1097void
1098Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1099{
1100 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1101 lex_stuff_pvn(pv, strlen(pv), flags);
1102}
1103
1104/*
f0e67a1d
Z
1105=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1106
1107Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1108immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1109reallocating the buffer if necessary. This means that lexing code that
1110runs later will see the characters as if they had appeared in the input.
1111It is not recommended to do this as part of normal parsing, and most
1112uses of this facility run the risk of the inserted characters being
1113interpreted in an unintended manner.
1114
1115The string to be inserted is the string value of I<sv>. The characters
1116are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1117being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1118not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1119need to construct a scalar.
1120
1121=cut
1122*/
1123
1124void
1125Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1126{
1127 char *pv;
1128 STRLEN len;
1129 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1130 if (flags)
1131 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1132 pv = SvPV(sv, len);
1133 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1134}
1135
1136/*
1137=for apidoc Amx|void|lex_unstuff|char *ptr
1138
1139Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1140I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1141This hides the discarded text from any lexing code that runs later,
1142as if the text had never appeared.
1143
1144This is not the normal way to consume lexed text. For that, use
1145L</lex_read_to>.
1146
1147=cut
1148*/
1149
1150void
1151Perl_lex_unstuff(pTHX_ char *ptr)
1152{
1153 char *buf, *bufend;
1154 STRLEN unstuff_len;
1155 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1156 buf = PL_parser->bufptr;
1157 if (ptr < buf)
1158 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1159 if (ptr == buf)
1160 return;
1161 bufend = PL_parser->bufend;
1162 if (ptr > bufend)
1163 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1164 unstuff_len = ptr - buf;
1165 Move(ptr, buf, bufend+1-ptr, char);
1166 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1167 PL_parser->bufend = bufend - unstuff_len;
1168}
1169
1170/*
1171=for apidoc Amx|void|lex_read_to|char *ptr
1172
1173Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1174to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1175performing the correct bookkeeping whenever a newline character is passed.
1176This is the normal way to consume lexed text.
1177
1178Interpretation of the buffer's octets can be abstracted out by
1179using the slightly higher-level functions L</lex_peek_unichar> and
1180L</lex_read_unichar>.
1181
1182=cut
1183*/
1184
1185void
1186Perl_lex_read_to(pTHX_ char *ptr)
1187{
1188 char *s;
1189 PERL_ARGS_ASSERT_LEX_READ_TO;
1190 s = PL_parser->bufptr;
1191 if (ptr < s || ptr > PL_parser->bufend)
1192 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1193 for (; s != ptr; s++)
1194 if (*s == '\n') {
83944c01 1195 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1196 PL_parser->linestart = s+1;
1197 }
1198 PL_parser->bufptr = ptr;
1199}
1200
1201/*
1202=for apidoc Amx|void|lex_discard_to|char *ptr
1203
1204Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1205up to I<ptr>. The remaining content of the buffer will be moved, and
1206all pointers into the buffer updated appropriately. I<ptr> must not
1207be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1208it is not permitted to discard text that has yet to be lexed.
1209
1210Normally it is not necessarily to do this directly, because it suffices to
1211use the implicit discarding behaviour of L</lex_next_chunk> and things
1212based on it. However, if a token stretches across multiple lines,
1f317c95 1213and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1214that purpose, then after completion of the token it would be wise to
1215explicitly discard the now-unneeded earlier lines, to avoid future
1216multi-line tokens growing the buffer without bound.
1217
1218=cut
1219*/
1220
1221void
1222Perl_lex_discard_to(pTHX_ char *ptr)
1223{
1224 char *buf;
1225 STRLEN discard_len;
1226 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1227 buf = SvPVX(PL_parser->linestr);
1228 if (ptr < buf)
1229 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1230 if (ptr == buf)
1231 return;
1232 if (ptr > PL_parser->bufptr)
1233 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1234 discard_len = ptr - buf;
1235 if (PL_parser->oldbufptr < ptr)
1236 PL_parser->oldbufptr = ptr;
1237 if (PL_parser->oldoldbufptr < ptr)
1238 PL_parser->oldoldbufptr = ptr;
1239 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1240 PL_parser->last_uni = NULL;
1241 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1242 PL_parser->last_lop = NULL;
1243 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1244 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1245 PL_parser->bufend -= discard_len;
1246 PL_parser->bufptr -= discard_len;
1247 PL_parser->oldbufptr -= discard_len;
1248 PL_parser->oldoldbufptr -= discard_len;
1249 if (PL_parser->last_uni)
1250 PL_parser->last_uni -= discard_len;
1251 if (PL_parser->last_lop)
1252 PL_parser->last_lop -= discard_len;
1253}
1254
1255/*
1256=for apidoc Amx|bool|lex_next_chunk|U32 flags
1257
1258Reads in the next chunk of text to be lexed, appending it to
1259L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1260looked to the end of the current chunk and wants to know more. It is
1261usual, but not necessary, for lexing to have consumed the entirety of
1262the current chunk at this time.
1263
1264If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1265chunk (i.e., the current chunk has been entirely consumed), normally the
1266current chunk will be discarded at the same time that the new chunk is
1267read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1268will not be discarded. If the current chunk has not been entirely
1269consumed, then it will not be discarded regardless of the flag.
1270
1271Returns true if some new text was added to the buffer, or false if the
1272buffer has reached the end of the input text.
1273
1274=cut
1275*/
1276
1277#define LEX_FAKE_EOF 0x80000000
112d1284 1278#define LEX_NO_TERM 0x40000000
f0e67a1d
Z
1279
1280bool
1281Perl_lex_next_chunk(pTHX_ U32 flags)
1282{
1283 SV *linestr;
1284 char *buf;
1285 STRLEN old_bufend_pos, new_bufend_pos;
1286 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1287 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1288 bool got_some_for_debugger = 0;
f0e67a1d 1289 bool got_some;
112d1284 1290 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
f0e67a1d 1291 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1292 linestr = PL_parser->linestr;
1293 buf = SvPVX(linestr);
1294 if (!(flags & LEX_KEEP_PREVIOUS) &&
1295 PL_parser->bufptr == PL_parser->bufend) {
1296 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1297 linestart_pos = 0;
1298 if (PL_parser->last_uni != PL_parser->bufend)
1299 PL_parser->last_uni = NULL;
1300 if (PL_parser->last_lop != PL_parser->bufend)
1301 PL_parser->last_lop = NULL;
1302 last_uni_pos = last_lop_pos = 0;
1303 *buf = 0;
1304 SvCUR(linestr) = 0;
1305 } else {
1306 old_bufend_pos = PL_parser->bufend - buf;
1307 bufptr_pos = PL_parser->bufptr - buf;
1308 oldbufptr_pos = PL_parser->oldbufptr - buf;
1309 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1310 linestart_pos = PL_parser->linestart - buf;
1311 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1312 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1313 }
1314 if (flags & LEX_FAKE_EOF) {
1315 goto eof;
60d63348 1316 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1317 got_some = 0;
1318 } else if (filter_gets(linestr, old_bufend_pos)) {
1319 got_some = 1;
17cc9359 1320 got_some_for_debugger = 1;
112d1284
FC
1321 } else if (flags & LEX_NO_TERM) {
1322 got_some = 0;
f0e67a1d 1323 } else {
580561a3
Z
1324 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1325 sv_setpvs(linestr, "");
f0e67a1d
Z
1326 eof:
1327 /* End of real input. Close filehandle (unless it was STDIN),
1328 * then add implicit termination.
1329 */
87606032 1330 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
f0e67a1d
Z
1331 PerlIO_clearerr(PL_parser->rsfp);
1332 else if (PL_parser->rsfp)
1333 (void)PerlIO_close(PL_parser->rsfp);
1334 PL_parser->rsfp = NULL;
60d63348 1335 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1336#ifdef PERL_MAD
1337 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1338 PL_faketokens = 1;
1339#endif
1340 if (!PL_in_eval && PL_minus_p) {
1341 sv_catpvs(linestr,
1342 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1343 PL_minus_n = PL_minus_p = 0;
1344 } else if (!PL_in_eval && PL_minus_n) {
1345 sv_catpvs(linestr, /*{*/";}");
1346 PL_minus_n = 0;
1347 } else
1348 sv_catpvs(linestr, ";");
1349 got_some = 1;
1350 }
1351 buf = SvPVX(linestr);
1352 new_bufend_pos = SvCUR(linestr);
1353 PL_parser->bufend = buf + new_bufend_pos;
1354 PL_parser->bufptr = buf + bufptr_pos;
1355 PL_parser->oldbufptr = buf + oldbufptr_pos;
1356 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1357 PL_parser->linestart = buf + linestart_pos;
1358 if (PL_parser->last_uni)
1359 PL_parser->last_uni = buf + last_uni_pos;
1360 if (PL_parser->last_lop)
1361 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1362 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1363 PL_curstash != PL_debstash) {
1364 /* debugger active and we're not compiling the debugger code,
1365 * so store the line into the debugger's array of lines
1366 */
1367 update_debugger_info(NULL, buf+old_bufend_pos,
1368 new_bufend_pos-old_bufend_pos);
1369 }
1370 return got_some;
1371}
1372
1373/*
1374=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1375
1376Looks ahead one (Unicode) character in the text currently being lexed.
1377Returns the codepoint (unsigned integer value) of the next character,
1378or -1 if lexing has reached the end of the input text. To consume the
1379peeked character, use L</lex_read_unichar>.
1380
1381If the next character is in (or extends into) the next chunk of input
1382text, the next chunk will be read in. Normally the current chunk will be
1383discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1384then the current chunk will not be discarded.
1385
1386If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1387is encountered, an exception is generated.
1388
1389=cut
1390*/
1391
1392I32
1393Perl_lex_peek_unichar(pTHX_ U32 flags)
1394{
749123ff 1395 dVAR;
f0e67a1d
Z
1396 char *s, *bufend;
1397 if (flags & ~(LEX_KEEP_PREVIOUS))
1398 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1399 s = PL_parser->bufptr;
1400 bufend = PL_parser->bufend;
1401 if (UTF) {
1402 U8 head;
1403 I32 unichar;
1404 STRLEN len, retlen;
1405 if (s == bufend) {
1406 if (!lex_next_chunk(flags))
1407 return -1;
1408 s = PL_parser->bufptr;
1409 bufend = PL_parser->bufend;
1410 }
1411 head = (U8)*s;
54d004e8 1412 if (UTF8_IS_INVARIANT(head))
f0e67a1d 1413 return head;
54d004e8
KW
1414 if (UTF8_IS_START(head)) {
1415 len = UTF8SKIP(&head);
f0e67a1d
Z
1416 while ((STRLEN)(bufend-s) < len) {
1417 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1418 break;
1419 s = PL_parser->bufptr;
1420 bufend = PL_parser->bufend;
1421 }
1422 }
1423 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1424 if (retlen == (STRLEN)-1) {
1425 /* malformed UTF-8 */
1426 ENTER;
1427 SAVESPTR(PL_warnhook);
1428 PL_warnhook = PERL_WARNHOOK_FATAL;
1429 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1430 LEAVE;
1431 }
1432 return unichar;
1433 } else {
1434 if (s == bufend) {
1435 if (!lex_next_chunk(flags))
1436 return -1;
1437 s = PL_parser->bufptr;
1438 }
1439 return (U8)*s;
1440 }
1441}
1442
1443/*
1444=for apidoc Amx|I32|lex_read_unichar|U32 flags
1445
1446Reads the next (Unicode) character in the text currently being lexed.
1447Returns the codepoint (unsigned integer value) of the character read,
1448and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1449if lexing has reached the end of the input text. To non-destructively
1450examine the next character, use L</lex_peek_unichar> instead.
1451
1452If the next character is in (or extends into) the next chunk of input
1453text, the next chunk will be read in. Normally the current chunk will be
1454discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1455then the current chunk will not be discarded.
1456
1457If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1458is encountered, an exception is generated.
1459
1460=cut
1461*/
1462
1463I32
1464Perl_lex_read_unichar(pTHX_ U32 flags)
1465{
1466 I32 c;
1467 if (flags & ~(LEX_KEEP_PREVIOUS))
1468 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1469 c = lex_peek_unichar(flags);
1470 if (c != -1) {
1471 if (c == '\n')
83944c01 1472 COPLINE_INC_WITH_HERELINES;
d9018cbe
EB
1473 if (UTF)
1474 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1475 else
1476 ++(PL_parser->bufptr);
f0e67a1d
Z
1477 }
1478 return c;
1479}
1480
1481/*
1482=for apidoc Amx|void|lex_read_space|U32 flags
1483
1484Reads optional spaces, in Perl style, in the text currently being
1485lexed. The spaces may include ordinary whitespace characters and
1486Perl-style comments. C<#line> directives are processed if encountered.
1487L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1488at a non-space character (or the end of the input text).
1489
1490If spaces extend into the next chunk of input text, the next chunk will
1491be read in. Normally the current chunk will be discarded at the same
1492time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1493chunk will not be discarded.
1494
1495=cut
1496*/
1497
f0998909
Z
1498#define LEX_NO_NEXT_CHUNK 0x80000000
1499
f0e67a1d
Z
1500void
1501Perl_lex_read_space(pTHX_ U32 flags)
1502{
1503 char *s, *bufend;
1504 bool need_incline = 0;
f0998909 1505 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1506 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1507#ifdef PERL_MAD
1508 if (PL_skipwhite) {
1509 sv_free(PL_skipwhite);
1510 PL_skipwhite = NULL;
1511 }
1512 if (PL_madskills)
1513 PL_skipwhite = newSVpvs("");
1514#endif /* PERL_MAD */
1515 s = PL_parser->bufptr;
1516 bufend = PL_parser->bufend;
1517 while (1) {
1518 char c = *s;
1519 if (c == '#') {
1520 do {
1521 c = *++s;
1522 } while (!(c == '\n' || (c == 0 && s == bufend)));
1523 } else if (c == '\n') {
1524 s++;
1525 PL_parser->linestart = s;
1526 if (s == bufend)
1527 need_incline = 1;
1528 else
1529 incline(s);
1530 } else if (isSPACE(c)) {
1531 s++;
1532 } else if (c == 0 && s == bufend) {
1533 bool got_more;
1534#ifdef PERL_MAD
1535 if (PL_madskills)
1536 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1537#endif /* PERL_MAD */
f0998909
Z
1538 if (flags & LEX_NO_NEXT_CHUNK)
1539 break;
f0e67a1d 1540 PL_parser->bufptr = s;
83944c01 1541 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1542 got_more = lex_next_chunk(flags);
1543 CopLINE_dec(PL_curcop);
1544 s = PL_parser->bufptr;
1545 bufend = PL_parser->bufend;
1546 if (!got_more)
1547 break;
1548 if (need_incline && PL_parser->rsfp) {
1549 incline(s);
1550 need_incline = 0;
1551 }
1552 } else {
1553 break;
1554 }
1555 }
1556#ifdef PERL_MAD
1557 if (PL_madskills)
1558 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1559#endif /* PERL_MAD */
1560 PL_parser->bufptr = s;
1561}
1562
1563/*
ffb4593c
NT
1564 * S_incline
1565 * This subroutine has nothing to do with tilting, whether at windmills
1566 * or pinball tables. Its name is short for "increment line". It
57843af0 1567 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1568 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1569 * # line 500 "foo.pm"
1570 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1571 */
1572
76e3520e 1573STATIC void
d9095cec 1574S_incline(pTHX_ const char *s)
463ee0b2 1575{
97aff369 1576 dVAR;
d9095cec
NC
1577 const char *t;
1578 const char *n;
1579 const char *e;
8818d409 1580 line_t line_num;
463ee0b2 1581
7918f24d
NC
1582 PERL_ARGS_ASSERT_INCLINE;
1583
83944c01 1584 COPLINE_INC_WITH_HERELINES;
451f421f
FC
1585 if (!PL_rsfp && !PL_parser->filtered && PL_lex_state == LEX_NORMAL
1586 && s+1 == PL_bufend && *s == ';') {
1587 /* fake newline in string eval */
1588 CopLINE_dec(PL_curcop);
1589 return;
1590 }
463ee0b2
LW
1591 if (*s++ != '#')
1592 return;
d4c19fe8
AL
1593 while (SPACE_OR_TAB(*s))
1594 s++;
73659bf1
GS
1595 if (strnEQ(s, "line", 4))
1596 s += 4;
1597 else
1598 return;
084592ab 1599 if (SPACE_OR_TAB(*s))
73659bf1 1600 s++;
4e553d73 1601 else
73659bf1 1602 return;
d4c19fe8
AL
1603 while (SPACE_OR_TAB(*s))
1604 s++;
463ee0b2
LW
1605 if (!isDIGIT(*s))
1606 return;
d4c19fe8 1607
463ee0b2
LW
1608 n = s;
1609 while (isDIGIT(*s))
1610 s++;
07714eb4 1611 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1612 return;
bf4acbe4 1613 while (SPACE_OR_TAB(*s))
463ee0b2 1614 s++;
73659bf1 1615 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1616 s++;
73659bf1
GS
1617 e = t + 1;
1618 }
463ee0b2 1619 else {
c35e046a
AL
1620 t = s;
1621 while (!isSPACE(*t))
1622 t++;
73659bf1 1623 e = t;
463ee0b2 1624 }
bf4acbe4 1625 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1626 e++;
1627 if (*e != '\n' && *e != '\0')
1628 return; /* false alarm */
1629
8818d409
FC
1630 line_num = atoi(n)-1;
1631
f4dd75d9 1632 if (t - s > 0) {
d9095cec 1633 const STRLEN len = t - s;
19bad673
NC
1634 SV *const temp_sv = CopFILESV(PL_curcop);
1635 const char *cf;
1636 STRLEN tmplen;
1637
1638 if (temp_sv) {
1639 cf = SvPVX(temp_sv);
1640 tmplen = SvCUR(temp_sv);
1641 } else {
1642 cf = NULL;
1643 tmplen = 0;
1644 }
1645
d1299d44 1646 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1647 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1648 * to *{"::_<newfilename"} */
44867030
NC
1649 /* However, the long form of evals is only turned on by the
1650 debugger - usually they're "(eval %lu)" */
1651 char smallbuf[128];
1652 char *tmpbuf;
1653 GV **gvp;
d9095cec 1654 STRLEN tmplen2 = len;
798b63bc 1655 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1656 tmpbuf = smallbuf;
1657 else
2ae0db35 1658 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1659 tmpbuf[0] = '_';
1660 tmpbuf[1] = '<';
2ae0db35 1661 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1662 tmplen += 2;
8a5ee598
RGS
1663 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1664 if (gvp) {
44867030
NC
1665 char *tmpbuf2;
1666 GV *gv2;
1667
1668 if (tmplen2 + 2 <= sizeof smallbuf)
1669 tmpbuf2 = smallbuf;
1670 else
1671 Newx(tmpbuf2, tmplen2 + 2, char);
1672
1673 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1674 /* Either they malloc'd it, or we malloc'd it,
1675 so no prefix is present in ours. */
1676 tmpbuf2[0] = '_';
1677 tmpbuf2[1] = '<';
1678 }
1679
1680 memcpy(tmpbuf2 + 2, s, tmplen2);
1681 tmplen2 += 2;
1682
8a5ee598 1683 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1684 if (!isGV(gv2)) {
8a5ee598 1685 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1686 /* adjust ${"::_<newfilename"} to store the new file name */
1687 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1688 /* The line number may differ. If that is the case,
1689 alias the saved lines that are in the array.
1690 Otherwise alias the whole array. */
1691 if (CopLINE(PL_curcop) == line_num) {
1692 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1693 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1694 }
1695 else if (GvAV(*gvp)) {
1696 AV * const av = GvAV(*gvp);
1697 const I32 start = CopLINE(PL_curcop)+1;
1698 I32 items = AvFILLp(av) - start;
1699 if (items > 0) {
1700 AV * const av2 = GvAVn(gv2);
1701 SV **svp = AvARRAY(av) + start;
1702 I32 l = (I32)line_num+1;
1703 while (items--)
1704 av_store(av2, l++, SvREFCNT_inc(*svp++));
1705 }
1706 }
e5527e4b 1707 }
44867030
NC
1708
1709 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1710 }
e66cf94c 1711 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1712 }
05ec9bb3 1713 CopFILE_free(PL_curcop);
d9095cec 1714 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1715 }
8818d409 1716 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1717}
1718
29595ff2 1719#ifdef PERL_MAD
cd81e915 1720/* skip space before PL_thistoken */
29595ff2
NC
1721
1722STATIC char *
1723S_skipspace0(pTHX_ register char *s)
1724{
7918f24d
NC
1725 PERL_ARGS_ASSERT_SKIPSPACE0;
1726
29595ff2
NC
1727 s = skipspace(s);
1728 if (!PL_madskills)
1729 return s;
cd81e915
NC
1730 if (PL_skipwhite) {
1731 if (!PL_thiswhite)
6b29d1f5 1732 PL_thiswhite = newSVpvs("");
cd81e915
NC
1733 sv_catsv(PL_thiswhite, PL_skipwhite);
1734 sv_free(PL_skipwhite);
1735 PL_skipwhite = 0;
1736 }
1737 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1738 return s;
1739}
1740
cd81e915 1741/* skip space after PL_thistoken */
29595ff2
NC
1742
1743STATIC char *
1744S_skipspace1(pTHX_ register char *s)
1745{
d4c19fe8 1746 const char *start = s;
29595ff2
NC
1747 I32 startoff = start - SvPVX(PL_linestr);
1748
7918f24d
NC
1749 PERL_ARGS_ASSERT_SKIPSPACE1;
1750
29595ff2
NC
1751 s = skipspace(s);
1752 if (!PL_madskills)
1753 return s;
1754 start = SvPVX(PL_linestr) + startoff;
cd81e915 1755 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1756 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1757 PL_thistoken = newSVpvn(tstart, start - tstart);
1758 }
1759 PL_realtokenstart = -1;
1760 if (PL_skipwhite) {
1761 if (!PL_nextwhite)
6b29d1f5 1762 PL_nextwhite = newSVpvs("");
cd81e915
NC
1763 sv_catsv(PL_nextwhite, PL_skipwhite);
1764 sv_free(PL_skipwhite);
1765 PL_skipwhite = 0;
29595ff2
NC
1766 }
1767 return s;
1768}
1769
1770STATIC char *
1771S_skipspace2(pTHX_ register char *s, SV **svp)
1772{
c35e046a
AL
1773 char *start;
1774 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1775 const I32 startoff = s - SvPVX(PL_linestr);
1776
7918f24d
NC
1777 PERL_ARGS_ASSERT_SKIPSPACE2;
1778
29595ff2
NC
1779 s = skipspace(s);
1780 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1781 if (!PL_madskills || !svp)
1782 return s;
1783 start = SvPVX(PL_linestr) + startoff;
cd81e915 1784 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1785 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1786 PL_thistoken = newSVpvn(tstart, start - tstart);
1787 PL_realtokenstart = -1;
29595ff2 1788 }
cd81e915 1789 if (PL_skipwhite) {
29595ff2 1790 if (!*svp)
6b29d1f5 1791 *svp = newSVpvs("");
cd81e915
NC
1792 sv_setsv(*svp, PL_skipwhite);
1793 sv_free(PL_skipwhite);
1794 PL_skipwhite = 0;
29595ff2
NC
1795 }
1796
1797 return s;
1798}
1799#endif
1800
80a702cd 1801STATIC void
15f169a1 1802S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1803{
1804 AV *av = CopFILEAVx(PL_curcop);
1805 if (av) {
b9f83d2f 1806 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1807 if (orig_sv)
1808 sv_setsv(sv, orig_sv);
1809 else
1810 sv_setpvn(sv, buf, len);
80a702cd
RGS
1811 (void)SvIOK_on(sv);
1812 SvIV_set(sv, 0);
1813 av_store(av, (I32)CopLINE(PL_curcop), sv);
1814 }
1815}
1816
ffb4593c
NT
1817/*
1818 * S_skipspace
1819 * Called to gobble the appropriate amount and type of whitespace.
1820 * Skips comments as well.
1821 */
1822
76e3520e 1823STATIC char *
cea2e8a9 1824S_skipspace(pTHX_ register char *s)
a687059c 1825{
5db06880 1826#ifdef PERL_MAD
f0e67a1d
Z
1827 char *start = s;
1828#endif /* PERL_MAD */
7918f24d 1829 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1830#ifdef PERL_MAD
cd81e915
NC
1831 if (PL_skipwhite) {
1832 sv_free(PL_skipwhite);
f0e67a1d 1833 PL_skipwhite = NULL;
5db06880 1834 }
f0e67a1d 1835#endif /* PERL_MAD */
3280af22 1836 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1837 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1838 s++;
f0e67a1d
Z
1839 } else {
1840 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1841 PL_bufptr = s;
f0998909
Z
1842 lex_read_space(LEX_KEEP_PREVIOUS |
1843 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1844 LEX_NO_NEXT_CHUNK : 0));
3280af22 1845 s = PL_bufptr;
f0e67a1d
Z
1846 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1847 if (PL_linestart > PL_bufptr)
1848 PL_bufptr = PL_linestart;
1849 return s;
463ee0b2 1850 }
5db06880 1851#ifdef PERL_MAD
f0e67a1d
Z
1852 if (PL_madskills)
1853 PL_skipwhite = newSVpvn(start, s-start);
1854#endif /* PERL_MAD */
5db06880 1855 return s;
a687059c 1856}
378cc40b 1857
ffb4593c
NT
1858/*
1859 * S_check_uni
1860 * Check the unary operators to ensure there's no ambiguity in how they're
1861 * used. An ambiguous piece of code would be:
1862 * rand + 5
1863 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1864 * the +5 is its argument.
1865 */
1866
76e3520e 1867STATIC void
cea2e8a9 1868S_check_uni(pTHX)
ba106d47 1869{
97aff369 1870 dVAR;
d4c19fe8
AL
1871 const char *s;
1872 const char *t;
2f3197b3 1873
3280af22 1874 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1875 return;
3280af22
NIS
1876 while (isSPACE(*PL_last_uni))
1877 PL_last_uni++;
c35e046a
AL
1878 s = PL_last_uni;
1879 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1880 s++;
3280af22 1881 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1882 return;
6136c704 1883
9b387841
NC
1884 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1885 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1886 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1887}
1888
ffb4593c
NT
1889/*
1890 * LOP : macro to build a list operator. Its behaviour has been replaced
1891 * with a subroutine, S_lop() for which LOP is just another name.
1892 */
1893
a0d0e21e
LW
1894#define LOP(f,x) return lop(f,x,s)
1895
ffb4593c
NT
1896/*
1897 * S_lop
1898 * Build a list operator (or something that might be one). The rules:
1899 * - if we have a next token, then it's a list operator [why?]
1900 * - if the next thing is an opening paren, then it's a function
1901 * - else it's a list operator
1902 */
1903
76e3520e 1904STATIC I32
a0be28da 1905S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1906{
97aff369 1907 dVAR;
7918f24d
NC
1908
1909 PERL_ARGS_ASSERT_LOP;
1910
6154021b 1911 pl_yylval.ival = f;
35c8bce7 1912 CLINE;
3280af22
NIS
1913 PL_expect = x;
1914 PL_bufptr = s;
1915 PL_last_lop = PL_oldbufptr;
eb160463 1916 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1917#ifdef PERL_MAD
1918 if (PL_lasttoke)
78cdf107 1919 goto lstop;
5db06880 1920#else
3280af22 1921 if (PL_nexttoke)
78cdf107 1922 goto lstop;
5db06880 1923#endif
79072805 1924 if (*s == '(')
bbf60fe6 1925 return REPORT(FUNC);
29595ff2 1926 s = PEEKSPACE(s);
79072805 1927 if (*s == '(')
bbf60fe6 1928 return REPORT(FUNC);
78cdf107
Z
1929 else {
1930 lstop:
1931 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1932 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1933 return REPORT(LSTOP);
78cdf107 1934 }
79072805
LW
1935}
1936
5db06880
NC
1937#ifdef PERL_MAD
1938 /*
1939 * S_start_force
1940 * Sets up for an eventual force_next(). start_force(0) basically does
1941 * an unshift, while start_force(-1) does a push. yylex removes items
1942 * on the "pop" end.
1943 */
1944
1945STATIC void
1946S_start_force(pTHX_ int where)
1947{
1948 int i;
1949
cd81e915 1950 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1951 where = PL_lasttoke;
cd81e915
NC
1952 assert(PL_curforce < 0 || PL_curforce == where);
1953 if (PL_curforce != where) {
5db06880
NC
1954 for (i = PL_lasttoke; i > where; --i) {
1955 PL_nexttoke[i] = PL_nexttoke[i-1];
1956 }
1957 PL_lasttoke++;
1958 }
cd81e915 1959 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1960 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1961 PL_curforce = where;
1962 if (PL_nextwhite) {
5db06880 1963 if (PL_madskills)
6b29d1f5 1964 curmad('^', newSVpvs(""));
cd81e915 1965 CURMAD('_', PL_nextwhite);
5db06880
NC
1966 }
1967}
1968
1969STATIC void
1970S_curmad(pTHX_ char slot, SV *sv)
1971{
1972 MADPROP **where;
1973
1974 if (!sv)
1975 return;
cd81e915
NC
1976 if (PL_curforce < 0)
1977 where = &PL_thismad;
5db06880 1978 else
cd81e915 1979 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1980
cd81e915 1981 if (PL_faketokens)
76f68e9b 1982 sv_setpvs(sv, "");
5db06880
NC
1983 else {
1984 if (!IN_BYTES) {
1985 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1986 SvUTF8_on(sv);
1987 else if (PL_encoding) {
1988 sv_recode_to_utf8(sv, PL_encoding);
1989 }
1990 }
1991 }
1992
1993 /* keep a slot open for the head of the list? */
1994 if (slot != '_' && *where && (*where)->mad_key == '^') {
1995 (*where)->mad_key = slot;
daba3364 1996 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1997 (*where)->mad_val = (void*)sv;
1998 }
1999 else
2000 addmad(newMADsv(slot, sv), where, 0);
2001}
2002#else
b3f24c00
MHM
2003# define start_force(where) NOOP
2004# define curmad(slot, sv) NOOP
5db06880
NC
2005#endif
2006
ffb4593c
NT
2007/*
2008 * S_force_next
9cbb5ea2 2009 * When the lexer realizes it knows the next token (for instance,
ffb4593c 2010 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 2011 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
2012 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
2013 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
2014 */
2015
4e553d73 2016STATIC void
cea2e8a9 2017S_force_next(pTHX_ I32 type)
79072805 2018{
97aff369 2019 dVAR;
704d4215
GG
2020#ifdef DEBUGGING
2021 if (DEBUG_T_TEST) {
2022 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 2023 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
2024 }
2025#endif
6c7ae946
FC
2026 /* Don’t let opslab_force_free snatch it */
2027 if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
2028 assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2029 NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2030 }
5db06880 2031#ifdef PERL_MAD
cd81e915 2032 if (PL_curforce < 0)
5db06880 2033 start_force(PL_lasttoke);
cd81e915 2034 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
2035 if (PL_lex_state != LEX_KNOWNEXT)
2036 PL_lex_defer = PL_lex_state;
2037 PL_lex_state = LEX_KNOWNEXT;
2038 PL_lex_expect = PL_expect;
cd81e915 2039 PL_curforce = -1;
5db06880 2040#else
3280af22
NIS
2041 PL_nexttype[PL_nexttoke] = type;
2042 PL_nexttoke++;
2043 if (PL_lex_state != LEX_KNOWNEXT) {
2044 PL_lex_defer = PL_lex_state;
2045 PL_lex_expect = PL_expect;
2046 PL_lex_state = LEX_KNOWNEXT;
79072805 2047 }
5db06880 2048#endif
79072805
LW
2049}
2050
28ac2b49
Z
2051void
2052Perl_yyunlex(pTHX)
2053{
a7aaec61
Z
2054 int yyc = PL_parser->yychar;
2055 if (yyc != YYEMPTY) {
2056 if (yyc) {
2057 start_force(-1);
2058 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2059 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2060 PL_lex_allbrackets--;
a7aaec61 2061 PL_lex_brackets--;
78cdf107
Z
2062 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2063 } else if (yyc == '('/*)*/) {
2064 PL_lex_allbrackets--;
2065 yyc |= (2<<24);
a7aaec61
Z
2066 }
2067 force_next(yyc);
2068 }
28ac2b49
Z
2069 PL_parser->yychar = YYEMPTY;
2070 }
2071}
2072
d0a148a6 2073STATIC SV *
15f169a1 2074S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2075{
97aff369 2076 dVAR;
740cce10 2077 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2078 !IN_BYTES
2079 && UTF
2080 && !is_ascii_string((const U8*)start, len)
740cce10 2081 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2082 return sv;
2083}
2084
ffb4593c
NT
2085/*
2086 * S_force_word
2087 * When the lexer knows the next thing is a word (for instance, it has
2088 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2089 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2090 * lookahead.
ffb4593c
NT
2091 *
2092 * Arguments:
b1b65b59 2093 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2094 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2095 * int check_keyword : if true, Perl checks to make sure the word isn't
2096 * a keyword (do this if the word is a label, e.g. goto FOO)
2097 * int allow_pack : if true, : characters will also be allowed (require,
2098 * use, etc. do this)
9cbb5ea2 2099 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2100 */
2101
76e3520e 2102STATIC char *
cea2e8a9 2103S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2104{
97aff369 2105 dVAR;
eb578fdb 2106 char *s;
463ee0b2 2107 STRLEN len;
4e553d73 2108
7918f24d
NC
2109 PERL_ARGS_ASSERT_FORCE_WORD;
2110
29595ff2 2111 start = SKIPSPACE1(start);
463ee0b2 2112 s = start;
7e2040f0 2113 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2114 (allow_pack && *s == ':') ||
15f0808c 2115 (allow_initial_tick && *s == '\'') )
a0d0e21e 2116 {
3280af22 2117 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2118 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2119 return start;
cd81e915 2120 start_force(PL_curforce);
5db06880
NC
2121 if (PL_madskills)
2122 curmad('X', newSVpvn(start,s-start));
463ee0b2 2123 if (token == METHOD) {
29595ff2 2124 s = SKIPSPACE1(s);
463ee0b2 2125 if (*s == '(')
3280af22 2126 PL_expect = XTERM;
463ee0b2 2127 else {
3280af22 2128 PL_expect = XOPERATOR;
463ee0b2 2129 }
79072805 2130 }
e74e6b3d 2131 if (PL_madskills)
63575281 2132 curmad('g', newSVpvs( "forced" ));
9ded7720 2133 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2134 = (OP*)newSVOP(OP_CONST,0,
2135 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2136 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2137 force_next(token);
2138 }
2139 return s;
2140}
2141
ffb4593c
NT
2142/*
2143 * S_force_ident
9cbb5ea2 2144 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2145 * text only contains the "foo" portion. The first argument is a pointer
2146 * to the "foo", and the second argument is the type symbol to prefix.
2147 * Forces the next token to be a "WORD".
9cbb5ea2 2148 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2149 */
2150
76e3520e 2151STATIC void
bfed75c6 2152S_force_ident(pTHX_ register const char *s, int kind)
79072805 2153{
97aff369 2154 dVAR;
7918f24d
NC
2155
2156 PERL_ARGS_ASSERT_FORCE_IDENT;
2157
c35e046a 2158 if (*s) {
90e5519e 2159 const STRLEN len = strlen(s);
728847b1
BF
2160 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2161 UTF ? SVf_UTF8 : 0));
cd81e915 2162 start_force(PL_curforce);
9ded7720 2163 NEXTVAL_NEXTTOKE.opval = o;
79072805 2164 force_next(WORD);
748a9306 2165 if (kind) {
11343788 2166 o->op_private = OPpCONST_ENTERED;
55497cff 2167 /* XXX see note in pp_entereval() for why we forgo typo
2168 warnings if the symbol must be introduced in an eval.
2169 GSAR 96-10-12 */
90e5519e 2170 gv_fetchpvn_flags(s, len,
728847b1
BF
2171 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2172 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2173 kind == '$' ? SVt_PV :
2174 kind == '@' ? SVt_PVAV :
2175 kind == '%' ? SVt_PVHV :
a0d0e21e 2176 SVt_PVGV
90e5519e 2177 );
748a9306 2178 }
79072805
LW
2179 }
2180}
2181
3f33d153
FC
2182static void
2183S_force_ident_maybe_lex(pTHX_ char pit)
2184{
2185 start_force(PL_curforce);
2186 NEXTVAL_NEXTTOKE.ival = pit;
2187 force_next('p');
2188}
2189
1571675a
GS
2190NV
2191Perl_str_to_version(pTHX_ SV *sv)
2192{
2193 NV retval = 0.0;
2194 NV nshift = 1.0;
2195 STRLEN len;
cfd0369c 2196 const char *start = SvPV_const(sv,len);
9d4ba2ae 2197 const char * const end = start + len;
504618e9 2198 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2199
2200 PERL_ARGS_ASSERT_STR_TO_VERSION;
2201
1571675a 2202 while (start < end) {
ba210ebe 2203 STRLEN skip;
1571675a
GS
2204 UV n;
2205 if (utf)
9041c2e3 2206 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2207 else {
2208 n = *(U8*)start;
2209 skip = 1;
2210 }
2211 retval += ((NV)n)/nshift;
2212 start += skip;
2213 nshift *= 1000;
2214 }
2215 return retval;
2216}
2217
4e553d73 2218/*
ffb4593c
NT
2219 * S_force_version
2220 * Forces the next token to be a version number.
e759cc13
RGS
2221 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2222 * and if "guessing" is TRUE, then no new token is created (and the caller
2223 * must use an alternative parsing method).
ffb4593c
NT
2224 */
2225
76e3520e 2226STATIC char *
e759cc13 2227S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2228{
97aff369 2229 dVAR;
5f66b61c 2230 OP *version = NULL;
44dcb63b 2231 char *d;
5db06880
NC
2232#ifdef PERL_MAD
2233 I32 startoff = s - SvPVX(PL_linestr);
2234#endif
89bfa8cd 2235
7918f24d
NC
2236 PERL_ARGS_ASSERT_FORCE_VERSION;
2237
29595ff2 2238 s = SKIPSPACE1(s);
89bfa8cd 2239
44dcb63b 2240 d = s;
dd629d5b 2241 if (*d == 'v')
44dcb63b 2242 d++;
44dcb63b 2243 if (isDIGIT(*d)) {
e759cc13
RGS
2244 while (isDIGIT(*d) || *d == '_' || *d == '.')
2245 d++;
5db06880
NC
2246#ifdef PERL_MAD
2247 if (PL_madskills) {
cd81e915 2248 start_force(PL_curforce);
5db06880
NC
2249 curmad('X', newSVpvn(s,d-s));
2250 }
2251#endif
4e4da3ac 2252 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2253 SV *ver;
8d08d9ba 2254#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2255 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2256 setlocale(LC_NUMERIC, "C");
8d08d9ba 2257#endif
6154021b 2258 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2259#ifdef USE_LOCALE_NUMERIC
2260 setlocale(LC_NUMERIC, loc);
909d3787 2261 Safefree(loc);
8d08d9ba 2262#endif
6154021b 2263 version = pl_yylval.opval;
dd629d5b
GS
2264 ver = cSVOPx(version)->op_sv;
2265 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2266 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2267 SvNV_set(ver, str_to_version(ver));
1571675a 2268 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2269 }
89bfa8cd 2270 }
5db06880
NC
2271 else if (guessing) {
2272#ifdef PERL_MAD
2273 if (PL_madskills) {
cd81e915
NC
2274 sv_free(PL_nextwhite); /* let next token collect whitespace */
2275 PL_nextwhite = 0;
5db06880
NC
2276 s = SvPVX(PL_linestr) + startoff;
2277 }
2278#endif
e759cc13 2279 return s;
5db06880 2280 }
89bfa8cd 2281 }
2282
5db06880
NC
2283#ifdef PERL_MAD
2284 if (PL_madskills && !version) {
cd81e915
NC
2285 sv_free(PL_nextwhite); /* let next token collect whitespace */
2286 PL_nextwhite = 0;
5db06880
NC
2287 s = SvPVX(PL_linestr) + startoff;
2288 }
2289#endif
89bfa8cd 2290 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2291 start_force(PL_curforce);
9ded7720 2292 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2293 force_next(WORD);
89bfa8cd 2294
e759cc13 2295 return s;
89bfa8cd 2296}
2297
ffb4593c 2298/*
91152fc1
DG
2299 * S_force_strict_version
2300 * Forces the next token to be a version number using strict syntax rules.
2301 */
2302
2303STATIC char *
2304S_force_strict_version(pTHX_ char *s)
2305{
2306 dVAR;
2307 OP *version = NULL;
2308#ifdef PERL_MAD
2309 I32 startoff = s - SvPVX(PL_linestr);
2310#endif
2311 const char *errstr = NULL;
2312
2313 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2314
2315 while (isSPACE(*s)) /* leading whitespace */
2316 s++;
2317
2318 if (is_STRICT_VERSION(s,&errstr)) {
2319 SV *ver = newSV(0);
2320 s = (char *)scan_version(s, ver, 0);
2321 version = newSVOP(OP_CONST, 0, ver);
2322 }
4e4da3ac
Z
2323 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2324 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2325 {
91152fc1
DG
2326 PL_bufptr = s;
2327 if (errstr)
2328 yyerror(errstr); /* version required */
2329 return s;
2330 }
2331
2332#ifdef PERL_MAD
2333 if (PL_madskills && !version) {
2334 sv_free(PL_nextwhite); /* let next token collect whitespace */
2335 PL_nextwhite = 0;
2336 s = SvPVX(PL_linestr) + startoff;
2337 }
2338#endif
2339 /* NOTE: The parser sees the package name and the VERSION swapped */
2340 start_force(PL_curforce);
2341 NEXTVAL_NEXTTOKE.opval = version;
2342 force_next(WORD);
2343
2344 return s;
2345}
2346
2347/*
ffb4593c
NT
2348 * S_tokeq
2349 * Tokenize a quoted string passed in as an SV. It finds the next
2350 * chunk, up to end of string or a backslash. It may make a new
2351 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2352 * turns \\ into \.
2353 */
2354
76e3520e 2355STATIC SV *
cea2e8a9 2356S_tokeq(pTHX_ SV *sv)
79072805 2357{
97aff369 2358 dVAR;
eb578fdb
KW
2359 char *s;
2360 char *send;
2361 char *d;
b3ac6de7
IZ
2362 STRLEN len = 0;
2363 SV *pv = sv;
79072805 2364
7918f24d
NC
2365 PERL_ARGS_ASSERT_TOKEQ;
2366
79072805 2367 if (!SvLEN(sv))
b3ac6de7 2368 goto finish;
79072805 2369
a0d0e21e 2370 s = SvPV_force(sv, len);
21a311ee 2371 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2372 goto finish;
463ee0b2 2373 send = s + len;
dcb21ed6
NC
2374 /* This is relying on the SV being "well formed" with a trailing '\0' */
2375 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2376 s++;
2377 if (s == send)
b3ac6de7 2378 goto finish;
79072805 2379 d = s;
be4731d2 2380 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2381 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2382 }
79072805
LW
2383 while (s < send) {
2384 if (*s == '\\') {
a0d0e21e 2385 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2386 s++; /* all that, just for this */
2387 }
2388 *d++ = *s++;
2389 }
2390 *d = '\0';
95a20fc0 2391 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2392 finish:
3280af22 2393 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2394 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2395 return sv;
2396}
2397
ffb4593c
NT
2398/*
2399 * Now come three functions related to double-quote context,
2400 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2401 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2402 * interact with PL_lex_state, and create fake ( ... ) argument lists
2403 * to handle functions and concatenation.
ecd24171
DM
2404 * For example,
2405 * "foo\lbar"
2406 * is tokenised as
2407 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
ffb4593c
NT
2408 */
2409
2410/*
2411 * S_sublex_start
6154021b 2412 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2413 *
2414 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2415 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2416 *
2417 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2418 *
2419 * Everything else becomes a FUNC.
2420 *
2421 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2422 * had an OP_CONST or OP_READLINE). This just sets us up for a
2423 * call to S_sublex_push().
2424 */
2425
76e3520e 2426STATIC I32
cea2e8a9 2427S_sublex_start(pTHX)
79072805 2428{
97aff369 2429 dVAR;
eb578fdb 2430 const I32 op_type = pl_yylval.ival;
79072805
LW
2431
2432 if (op_type == OP_NULL) {
6154021b 2433 pl_yylval.opval = PL_lex_op;
5f66b61c 2434 PL_lex_op = NULL;
79072805
LW
2435 return THING;
2436 }
2437 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2438 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2439
2440 if (SvTYPE(sv) == SVt_PVIV) {
2441 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2442 STRLEN len;
96a5add6 2443 const char * const p = SvPV_const(sv, len);
740cce10 2444 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2445 SvREFCNT_dec(sv);
2446 sv = nsv;
4e553d73 2447 }
6154021b 2448 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2449 PL_lex_stuff = NULL;
6f33ba73
RGS
2450 /* Allow <FH> // "foo" */
2451 if (op_type == OP_READLINE)
2452 PL_expect = XTERMORDORDOR;
79072805
LW
2453 return THING;
2454 }
e3f73d4e
RGS
2455 else if (op_type == OP_BACKTICK && PL_lex_op) {
2456 /* readpipe() vas overriden */
2457 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2458 pl_yylval.opval = PL_lex_op;
9b201d7d 2459 PL_lex_op = NULL;
e3f73d4e
RGS
2460 PL_lex_stuff = NULL;
2461 return THING;
2462 }
79072805 2463
3280af22 2464 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2465 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2466 PL_sublex_info.sub_op = PL_lex_op;
2467 PL_lex_state = LEX_INTERPPUSH;
55497cff 2468
3280af22
NIS
2469 PL_expect = XTERM;
2470 if (PL_lex_op) {
6154021b 2471 pl_yylval.opval = PL_lex_op;
5f66b61c 2472 PL_lex_op = NULL;
55497cff 2473 return PMFUNC;
2474 }
2475 else
2476 return FUNC;
2477}
2478
ffb4593c
NT
2479/*
2480 * S_sublex_push
2481 * Create a new scope to save the lexing state. The scope will be
2482 * ended in S_sublex_done. Returns a '(', starting the function arguments
2483 * to the uc, lc, etc. found before.
2484 * Sets PL_lex_state to LEX_INTERPCONCAT.
2485 */
2486
76e3520e 2487STATIC I32
cea2e8a9 2488S_sublex_push(pTHX)
55497cff 2489{
27da23d5 2490 dVAR;
78a635de 2491 LEXSHARED *shared;
f46d017c 2492 ENTER;
55497cff 2493
3280af22 2494 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2495 SAVEBOOL(PL_lex_dojoin);
3280af22 2496 SAVEI32(PL_lex_brackets);
78cdf107 2497 SAVEI32(PL_lex_allbrackets);
b27dce25 2498 SAVEI32(PL_lex_formbrack);
78cdf107 2499 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2500 SAVEI32(PL_lex_casemods);
2501 SAVEI32(PL_lex_starts);
651b5b28 2502 SAVEI8(PL_lex_state);
7cc34111 2503 SAVESPTR(PL_lex_repl);
7766f137 2504 SAVEVPTR(PL_lex_inpat);
98246f1e 2505 SAVEI16(PL_lex_inwhat);
57843af0 2506 SAVECOPLINE(PL_curcop);
3280af22 2507 SAVEPPTR(PL_bufptr);
8452ff4b 2508 SAVEPPTR(PL_bufend);
3280af22
NIS
2509 SAVEPPTR(PL_oldbufptr);
2510 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2511 SAVEPPTR(PL_last_lop);
2512 SAVEPPTR(PL_last_uni);
3280af22
NIS
2513 SAVEPPTR(PL_linestart);
2514 SAVESPTR(PL_linestr);
8edd5f42
RGS
2515 SAVEGENERICPV(PL_lex_brackstack);
2516 SAVEGENERICPV(PL_lex_casestack);
78a635de 2517 SAVEGENERICPV(PL_parser->lex_shared);
3280af22 2518
99bd9d90 2519 /* The here-doc parser needs to be able to peek into outer lexing
60f40a38
FC
2520 scopes to find the body of the here-doc. So we put PL_linestr and
2521 PL_bufptr into lex_shared, to ‘share’ those values.
99bd9d90 2522 */
60f40a38
FC
2523 PL_parser->lex_shared->ls_linestr = PL_linestr;
2524 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
99bd9d90 2525
3280af22 2526 PL_linestr = PL_lex_stuff;
7cc34111 2527 PL_lex_repl = PL_sublex_info.repl;
a0714e2c 2528 PL_lex_stuff = NULL;
7cc34111 2529 PL_sublex_info.repl = NULL;
3280af22 2530
9cbb5ea2
GS
2531 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2532 = SvPVX(PL_linestr);
3280af22 2533 PL_bufend += SvCUR(PL_linestr);
bd61b366 2534 PL_last_lop = PL_last_uni = NULL;
3280af22 2535 SAVEFREESV(PL_linestr);
4dc843bc 2536 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
3280af22
NIS
2537
2538 PL_lex_dojoin = FALSE;
b27dce25 2539 PL_lex_brackets = PL_lex_formbrack = 0;
78cdf107
Z
2540 PL_lex_allbrackets = 0;
2541 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2542 Newx(PL_lex_brackstack, 120, char);
2543 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2544 PL_lex_casemods = 0;
2545 *PL_lex_casestack = '\0';
2546 PL_lex_starts = 0;
2547 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2548 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
78a635de
FC
2549
2550 Newxz(shared, 1, LEXSHARED);
2551 shared->ls_prev = PL_parser->lex_shared;
2552 PL_parser->lex_shared = shared;
3280af22
NIS
2553
2554 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2555 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2556 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2557 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2558 else
5f66b61c 2559 PL_lex_inpat = NULL;
79072805 2560
55497cff 2561 return '(';
79072805
LW
2562}
2563
ffb4593c
NT
2564/*
2565 * S_sublex_done
2566 * Restores lexer state after a S_sublex_push.
2567 */
2568
76e3520e 2569STATIC I32
cea2e8a9 2570S_sublex_done(pTHX)
79072805 2571{
27da23d5 2572 dVAR;
3280af22 2573 if (!PL_lex_starts++) {
396482e1 2574 SV * const sv = newSVpvs("");
9aa983d2
JH
2575 if (SvUTF8(PL_linestr))
2576 SvUTF8_on(sv);
3280af22 2577 PL_expect = XOPERATOR;
6154021b 2578 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2579 return THING;
2580 }
2581
3280af22
NIS
2582 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2583 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2584 return yylex();
79072805
LW
2585 }
2586
ffb4593c 2587 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2588 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2589 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2590 PL_linestr = PL_lex_repl;
2591 PL_lex_inpat = 0;
2592 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2593 PL_bufend += SvCUR(PL_linestr);
bd61b366 2594 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2595 PL_lex_dojoin = FALSE;
2596 PL_lex_brackets = 0;
78cdf107
Z
2597 PL_lex_allbrackets = 0;
2598 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2599 PL_lex_casemods = 0;
2600 *PL_lex_casestack = '\0';
2601 PL_lex_starts = 0;
25da4f38 2602 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2603 PL_lex_state = LEX_INTERPNORMAL;
2604 PL_lex_starts++;
e9fa98b2
HS
2605 /* we don't clear PL_lex_repl here, so that we can check later
2606 whether this is an evalled subst; that means we rely on the
2607 logic to ensure sublex_done() is called again only via the
2608 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2609 }
e9fa98b2 2610 else {
3280af22 2611 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2612 PL_lex_repl = NULL;
e9fa98b2 2613 }
79072805 2614 return ',';
ffed7fef
LW
2615 }
2616 else {
5db06880
NC
2617#ifdef PERL_MAD
2618 if (PL_madskills) {
cd81e915
NC
2619 if (PL_thiswhite) {
2620 if (!PL_endwhite)
6b29d1f5 2621 PL_endwhite = newSVpvs("");
cd81e915
NC
2622 sv_catsv(PL_endwhite, PL_thiswhite);
2623 PL_thiswhite = 0;
2624 }
2625 if (PL_thistoken)
76f68e9b 2626 sv_setpvs(PL_thistoken,"");
5db06880 2627 else
cd81e915 2628 PL_realtokenstart = -1;
5db06880
NC
2629 }
2630#endif
f46d017c 2631 LEAVE;
3280af22
NIS
2632 PL_bufend = SvPVX(PL_linestr);
2633 PL_bufend += SvCUR(PL_linestr);
2634 PL_expect = XOPERATOR;
09bef843 2635 PL_sublex_info.sub_inwhat = 0;
79072805 2636 return ')';
ffed7fef
LW
2637 }
2638}
2639
6f613c73
KW
2640PERL_STATIC_INLINE SV*
2641S_get_and_check_backslash_N_name(pTHX_ const char* s, const char* const e)
2642{
140b12ad
KW
2643 /* <s> points to first character of interior of \N{}, <e> to one beyond the
2644 * interior, hence to the "}". Finds what the name resolves to, returning
2645 * an SV* containing it; NULL if no valid one found */
2646
dd2b1b72 2647 SV* res = newSVpvn_flags(s, e - s, UTF ? SVf_UTF8 : 0);
6f613c73 2648
0c415a79
KW
2649 HV * table;
2650 SV **cvp;
2651 SV *cv;
2652 SV *rv;
2653 HV *stash;
2654 const U8* first_bad_char_loc;
2655 const char* backslash_ptr = s - 3; /* Points to the <\> of \N{... */
2656
6f613c73
KW
2657 PERL_ARGS_ASSERT_GET_AND_CHECK_BACKSLASH_N_NAME;
2658
107160e2
KW
2659 if (UTF && ! is_utf8_string_loc((U8 *) backslash_ptr,
2660 e - backslash_ptr,
2661 &first_bad_char_loc))
2662 {
2663 /* If warnings are on, this will print a more detailed analysis of what
2664 * is wrong than the error message below */
2665 utf8n_to_uvuni(first_bad_char_loc,
2666 e - ((char *) first_bad_char_loc),
2667 NULL, 0);
2668
2669 /* We deliberately don't try to print the malformed character, which
2670 * might not print very well; it also may be just the first of many
2671 * malformations, so don't print what comes after it */
2672 yyerror(Perl_form(aTHX_
2673 "Malformed UTF-8 character immediately after '%.*s'",
2674 (int) (first_bad_char_loc - (U8 *) backslash_ptr), backslash_ptr));
2675 return NULL;
2676 }
2677
2678 res = new_constant( NULL, 0, "charnames", res, NULL, backslash_ptr,
2679 /* include the <}> */
2680 e - backslash_ptr + 1);
6f613c73
KW
2681 if (! SvPOK(res)) {
2682 return NULL;
2683 }
2684
0c415a79
KW
2685 /* See if the charnames handler is the Perl core's, and if so, we can skip
2686 * the validation needed for a user-supplied one, as Perl's does its own
2687 * validation. */
2688 table = GvHV(PL_hintgv); /* ^H */
2689 cvp = hv_fetchs(table, "charnames", FALSE);
2690 cv = *cvp;
2691 if (((rv = SvRV(cv)) != NULL)
2692 && ((stash = CvSTASH(rv)) != NULL))
2693 {
2694 const char * const name = HvNAME(stash);
2695 if strEQ(name, "_charnames") {
2696 return res;
2697 }
2698 }
2699
bde9e88d
KW
2700 /* Here, it isn't Perl's charname handler. We can't rely on a
2701 * user-supplied handler to validate the input name. For non-ut8 input,
2702 * look to see that the first character is legal. Then loop through the
2703 * rest checking that each is a continuation */
6f613c73 2704
b6ba1137
KW
2705 /* This code needs to be sync'ed with a regex in _charnames.pm which does
2706 * the same thing */
2707
b6ba1137 2708 if (! UTF) {
bde9e88d 2709 if (! isALPHAU(*s)) {
b6ba1137
KW
2710 goto bad_charname;
2711 }
bde9e88d
KW
2712 s++;
2713 while (s < e) {
2714 if (! isCHARNAME_CONT(*s)) {
b6ba1137
KW
2715 goto bad_charname;
2716 }
bde9e88d 2717 s++;
b6ba1137
KW
2718 }
2719 }
2720 else {
bde9e88d
KW
2721 /* Similarly for utf8. For invariants can check directly; for other
2722 * Latin1, can calculate their code point and check; otherwise use a
2723 * swash */
2724 if (UTF8_IS_INVARIANT(*s)) {
2725 if (! isALPHAU(*s)) {
140b12ad
KW
2726 goto bad_charname;
2727 }
bde9e88d
KW
2728 s++;
2729 } else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2730 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s, *(s+1))))) {
b6ba1137 2731 goto bad_charname;
6f613c73 2732 }
bde9e88d 2733 s += 2;
6f613c73 2734 }
bde9e88d
KW
2735 else {
2736 if (! PL_utf8_charname_begin) {
2737 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2738 PL_utf8_charname_begin = _core_swash_init("utf8",
2739 "_Perl_Charname_Begin",
2740 &PL_sv_undef,
2741 1, 0, NULL, &flags);
2742 }
2743 if (! swash_fetch(PL_utf8_charname_begin, (U8 *) s, TRUE)) {
2744 goto bad_charname;
2745 }
2746 s += UTF8SKIP(s);
2747 }
2748
2749 while (s < e) {
2750 if (UTF8_IS_INVARIANT(*s)) {
2751 if (! isCHARNAME_CONT(*s)) {
2752 goto bad_charname;
2753 }
2754 s++;
2755 }
2756 else if (UTF8_IS_DOWNGRADEABLE_START(*s)) {
2757 if (! isCHARNAME_CONT(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*s,
2758 *(s+1)))))
2759 {
2760 goto bad_charname;
2761 }
2762 s += 2;
2763 }
2764 else {
2765 if (! PL_utf8_charname_continue) {
2766 U8 flags = _CORE_SWASH_INIT_ACCEPT_INVLIST;
2767 PL_utf8_charname_continue = _core_swash_init("utf8",
2768 "_Perl_Charname_Continue",
2769 &PL_sv_undef,
2770 1, 0, NULL, &flags);
2771 }
2772 if (! swash_fetch(PL_utf8_charname_continue, (U8 *) s, TRUE)) {
2773 goto bad_charname;
2774 }
2775 s += UTF8SKIP(s);
6f613c73
KW
2776 }
2777 }
6f613c73
KW
2778 }
2779
94ca1619 2780 if (SvUTF8(res)) { /* Don't accept malformed input */
bde9e88d
KW
2781 const U8* first_bad_char_loc;
2782 STRLEN len;
2783 const char* const str = SvPV_const(res, len);
2784 if (! is_utf8_string_loc((U8 *) str, len, &first_bad_char_loc)) {
2785 /* If warnings are on, this will print a more detailed analysis of
2786 * what is wrong than the error message below */
2787 utf8n_to_uvuni(first_bad_char_loc,
2788 (char *) first_bad_char_loc - str,
2789 NULL, 0);
2790
2791 /* We deliberately don't try to print the malformed character,
2792 * which might not print very well; it also may be just the first
2793 * of many malformations, so don't print what comes after it */
2794 yyerror_pv(
2795 Perl_form(aTHX_
2796 "Malformed UTF-8 returned by %.*s immediately after '%.*s'",
2797 (int) (e - backslash_ptr + 1), backslash_ptr,
2798 (int) ((char *) first_bad_char_loc - str), str
2799 ),
2800 SVf_UTF8);
2801 return NULL;
2802 }
2803 }
140b12ad 2804
bde9e88d 2805 return res;
140b12ad 2806
bde9e88d
KW
2807 bad_charname: {
2808 int bad_char_size = ((UTF) ? UTF8SKIP(s) : 1);
2809
2810 /* The final %.*s makes sure that should the trailing NUL be missing
2811 * that this print won't run off the end of the string */
2812 yyerror_pv(
2813 Perl_form(aTHX_
2814 "Invalid character in \\N{...}; marked by <-- HERE in %.*s<-- HERE %.*s",
2815 (int)(s - backslash_ptr + bad_char_size), backslash_ptr,
2816 (int)(e - s + bad_char_size), s + bad_char_size
2817 ),
2818 UTF ? SVf_UTF8 : 0);
2819 return NULL;
2820 }
6f613c73
KW
2821}
2822
02aa26ce
NT
2823/*
2824 scan_const
2825
9da1dd8f
DM
2826 Extracts the next constant part of a pattern, double-quoted string,
2827 or transliteration. This is terrifying code.
2828
2829 For example, in parsing the double-quoted string "ab\x63$d", it would
2830 stop at the '$' and return an OP_CONST containing 'abc'.
02aa26ce 2831
94def140 2832 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2833 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2834 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2835
94def140
TS
2836 Returns a pointer to the character scanned up to. If this is
2837 advanced from the start pointer supplied (i.e. if anything was
9da1dd8f 2838 successfully parsed), will leave an OP_CONST for the substring scanned
6154021b 2839 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2840 by looking at the next characters herself.
2841
02aa26ce 2842 In patterns:
9da1dd8f
DM
2843 expand:
2844 \N{ABC} => \N{U+41.42.43}
2845
2846 pass through:
2847 all other \-char, including \N and \N{ apart from \N{ABC}
2848
2849 stops on:
2850 @ and $ where it appears to be a var, but not for $ as tail anchor
2851 \l \L \u \U \Q \E
2852 (?{ or (??{
2853
02aa26ce
NT
2854
2855 In transliterations:
2856 characters are VERY literal, except for - not at the start or end
94def140
TS
2857 of the string, which indicates a range. If the range is in bytes,
2858 scan_const expands the range to the full set of intermediate
2859 characters. If the range is in utf8, the hyphen is replaced with
2860 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2861
2862 In double-quoted strings:
2863 backslashes:
2864 double-quoted style: \r and \n
ff3f963a 2865 constants: \x31, etc.
94def140 2866 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2867 case and quoting: \U \Q \E
2868 stops on @ and $
2869
2870 scan_const does *not* construct ops to handle interpolated strings.
2871 It stops processing as soon as it finds an embedded $ or @ variable
2872 and leaves it to the caller to work out what's going on.
2873
94def140
TS
2874 embedded arrays (whether in pattern or not) could be:
2875 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2876
2877 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2878
2879 $ in pattern could be $foo or could be tail anchor. Assumption:
2880 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2881 followed by one of "()| \r\n\t"
02aa26ce 2882
9da1dd8f 2883 \1 (backreferences) are turned into $1 in substitutions
02aa26ce
NT
2884
2885 The structure of the code is
2886 while (there's a character to process) {
94def140
TS
2887 handle transliteration ranges
2888 skip regexp comments /(?#comment)/ and codes /(?{code})/
2889 skip #-initiated comments in //x patterns
2890 check for embedded arrays
02aa26ce
NT
2891 check for embedded scalars
2892 if (backslash) {
94def140 2893 deprecate \1 in substitution replacements
02aa26ce
NT
2894 handle string-changing backslashes \l \U \Q \E, etc.
2895 switch (what was escaped) {
94def140 2896 handle \- in a transliteration (becomes a literal -)
ff3f963a 2897 if a pattern and not \N{, go treat as regular character
94def140
TS
2898 handle \132 (octal characters)
2899 handle \x15 and \x{1234} (hex characters)
ff3f963a 2900 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2901 handle \cV (control characters)
2902 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2903 } (end switch)
77a135fe 2904 continue
02aa26ce 2905 } (end if backslash)
77a135fe 2906 handle regular character
02aa26ce 2907 } (end while character to read)
4e553d73 2908
02aa26ce
NT
2909*/
2910
76e3520e 2911STATIC char *
cea2e8a9 2912S_scan_const(pTHX_ char *start)
79072805 2913{
97aff369 2914 dVAR;
eb578fdb 2915 char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2916 SV *sv = newSV(send - start); /* sv for the constant. See
2917 note below on sizing. */
eb578fdb
KW
2918 char *s = start; /* start of the constant */
2919 char *d = SvPVX(sv); /* destination for copies */
02aa26ce 2920 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2921 bool didrange = FALSE; /* did we just finish a range? */
2866decb 2922 bool in_charclass = FALSE; /* within /[...]/ */
b953e60c
KW
2923 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2924 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2925 to be UTF8? But, this can
2926 show as true when the source
2927 isn't utf8, as for example
2928 when it is entirely composed
2929 of hex constants */
6f613c73 2930 SV *res; /* result from charnames */
77a135fe
KW
2931
2932 /* Note on sizing: The scanned constant is placed into sv, which is
2933 * initialized by newSV() assuming one byte of output for every byte of
2934 * input. This routine expects newSV() to allocate an extra byte for a
2935 * trailing NUL, which this routine will append if it gets to the end of
2936 * the input. There may be more bytes of input than output (eg., \N{LATIN
2937 * CAPITAL LETTER A}), or more output than input if the constant ends up
2938 * recoded to utf8, but each time a construct is found that might increase
2939 * the needed size, SvGROW() is called. Its size parameter each time is
2940 * based on the best guess estimate at the time, namely the length used so
2941 * far, plus the length the current construct will occupy, plus room for
2942 * the trailing NUL, plus one byte for every input byte still unscanned */
2943
012bcf8d 2944 UV uv;
4c3a8340
TS
2945#ifdef EBCDIC
2946 UV literal_endpoint = 0;
e294cc5d 2947 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2948#endif
012bcf8d 2949
7918f24d
NC
2950 PERL_ARGS_ASSERT_SCAN_CONST;
2951
bb16bae8 2952 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2953 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2954 /* If we are doing a trans and we know we want UTF8 set expectation */
2955 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2956 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2957 }
2958
2959
79072805 2960 while (s < send || dorange) {
ff3f963a 2961
02aa26ce 2962 /* get transliterations out of the way (they're most literal) */
3280af22 2963 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2964 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2965 if (dorange) {
1ba5c669
JH
2966 I32 i; /* current expanded character */
2967 I32 min; /* first character in range */
2968 I32 max; /* last character in range */
02aa26ce 2969
e294cc5d
JH
2970#ifdef EBCDIC
2971 UV uvmax = 0;
2972#endif
2973
2974 if (has_utf8
2975#ifdef EBCDIC
2976 && !native_range
2977#endif
1953db30 2978 ) {
9d4ba2ae 2979 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2980 char *e = d++;
2981 while (e-- > c)
2982 *(e + 1) = *e;
25716404 2983 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2984 /* mark the range as done, and continue */
2985 dorange = FALSE;
2986 didrange = TRUE;
2987 continue;
2988 }
2b9d42f0 2989
95a20fc0 2990 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2991#ifdef EBCDIC
2992 SvGROW(sv,
2993 SvLEN(sv) + (has_utf8 ?
2994 (512 - UTF_CONTINUATION_MARK +
2995 UNISKIP(0x100))
2996 : 256));
2997 /* How many two-byte within 0..255: 128 in UTF-8,
2998 * 96 in UTF-8-mod. */
2999#else
9cbb5ea2 3000 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 3001#endif
9cbb5ea2 3002 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
3003#ifdef EBCDIC
3004 if (has_utf8) {
3005 int j;
3006 for (j = 0; j <= 1; j++) {
3007 char * const c = (char*)utf8_hop((U8*)d, -1);
3008 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
3009 if (j)
3010 min = (U8)uv;
3011 else if (uv < 256)
3012 max = (U8)uv;
3013 else {
3014 max = (U8)0xff; /* only to \xff */
3015 uvmax = uv; /* \x{100} to uvmax */
3016 }
3017 d = c; /* eat endpoint chars */
3018 }
3019 }
3020 else {
3021#endif
3022 d -= 2; /* eat the first char and the - */
3023 min = (U8)*d; /* first char in range */
3024 max = (U8)d[1]; /* last char in range */
3025#ifdef EBCDIC
3026 }
3027#endif
8ada0baa 3028
c2e66d9e 3029 if (min > max) {
4dc843bc 3030 SvREFCNT_dec(sv);
01ec43d0 3031 Perl_croak(aTHX_
d1573ac7 3032 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 3033 (char)min, (char)max);
c2e66d9e
GS
3034 }
3035
c7f1f016 3036#ifdef EBCDIC
4c3a8340
TS
3037 if (literal_endpoint == 2 &&
3038 ((isLOWER(min) && isLOWER(max)) ||
3039 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
3040 if (isLOWER(min)) {
3041 for (i = min; i <= max; i++)
3042 if (isLOWER(i))
db42d148 3043 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3044 } else {
3045 for (i = min; i <= max; i++)
3046 if (isUPPER(i))
db42d148 3047 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
3048 }
3049 }
3050 else
3051#endif
3052 for (i = min; i <= max; i++)
e294cc5d
JH
3053#ifdef EBCDIC
3054 if (has_utf8) {
3055 const U8 ch = (U8)NATIVE_TO_UTF(i);
3056 if (UNI_IS_INVARIANT(ch))
3057 *d++ = (U8)i;
3058 else {
3059 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
3060 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
3061 }
3062 }
3063 else
3064#endif
3065 *d++ = (char)i;
3066
3067#ifdef EBCDIC
3068 if (uvmax) {
3069 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
3070 if (uvmax > 0x101)
3071 *d++ = (char)UTF_TO_NATIVE(0xff);
3072 if (uvmax > 0x100)
3073 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
3074 }
3075#endif
02aa26ce
NT
3076
3077 /* mark the range as done, and continue */
79072805 3078 dorange = FALSE;
01ec43d0 3079 didrange = TRUE;
4c3a8340
TS
3080#ifdef EBCDIC
3081 literal_endpoint = 0;
3082#endif
79072805 3083 continue;
4e553d73 3084 }
02aa26ce
NT
3085
3086 /* range begins (ignore - as first or last char) */
79072805 3087 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 3088 if (didrange) {
4dc843bc 3089 SvREFCNT_dec(sv);
1fafa243 3090 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 3091 }
e294cc5d
JH
3092 if (has_utf8
3093#ifdef EBCDIC
3094 && !native_range
3095#endif
3096 ) {
25716404 3097 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
3098 s++;
3099 continue;
3100 }
79072805
LW
3101 dorange = TRUE;
3102 s++;
01ec43d0
GS
3103 }
3104 else {
3105 didrange = FALSE;
4c3a8340
TS
3106#ifdef EBCDIC
3107 literal_endpoint = 0;
e294cc5d 3108 native_range = TRUE;
4c3a8340 3109#endif
01ec43d0 3110 }
79072805 3111 }
02aa26ce
NT
3112
3113 /* if we get here, we're not doing a transliteration */
3114
e4a2df84
DM
3115 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
3116 char *s1 = s-1;
3117 int esc = 0;
3118 while (s1 >= start && *s1-- == '\\')
3119 esc = !esc;
3120 if (!esc)
3121 in_charclass = TRUE;
3122 }
2866decb 3123
e4a2df84
DM
3124 else if (*s == ']' && PL_lex_inpat && in_charclass) {
3125 char *s1 = s-1;
3126 int esc = 0;
3127 while (s1 >= start && *s1-- == '\\')
3128 esc = !esc;
3129 if (!esc)
3130 in_charclass = FALSE;
3131 }
2866decb 3132
9da1dd8f
DM
3133 /* skip for regexp comments /(?#comment)/, except for the last
3134 * char, which will be done separately.
3135 * Stop on (?{..}) and friends */
3136
3280af22 3137 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 3138 if (s[2] == '#') {
e994fd66 3139 while (s+1 < send && *s != ')')
db42d148 3140 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94 3141 }
2866decb 3142 else if (!PL_lex_casemods && !in_charclass &&
d3cec5e5
DM
3143 ( s[2] == '{' /* This should match regcomp.c */
3144 || (s[2] == '?' && s[3] == '{')))
155aba94 3145 {
9da1dd8f 3146 break;
cc6b7395 3147 }
748a9306 3148 }
02aa26ce
NT
3149
3150 /* likewise skip #-initiated comments in //x patterns */
3280af22 3151 else if (*s == '#' && PL_lex_inpat &&
73134a2e 3152 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 3153 while (s+1 < send && *s != '\n')
db42d148 3154 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 3155 }
02aa26ce 3156
9da1dd8f
DM
3157 /* no further processing of single-quoted regex */
3158 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
3159 goto default_action;
3160
5d1d4326 3161 /* check for embedded arrays
da6eedaa 3162 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 3163 */
1749ea0d
TS
3164 else if (*s == '@' && s[1]) {
3165 if (isALNUM_lazy_if(s+1,UTF))
3166 break;
3167 if (strchr(":'{$", s[1]))
3168 break;
3169 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
3170 break; /* in regexp, neither @+ nor @- are interpolated */
3171 }
02aa26ce
NT
3172
3173 /* check for embedded scalars. only stop if we're sure it's a
3174 variable.
3175 */
79072805 3176 else if (*s == '$') {
3280af22 3177 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 3178 break;
77772344 3179 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
3180 if (s[1] == '\\') {
3181 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
3182 "Possible unintended interpolation of $\\ in regex");
77772344 3183 }
79072805 3184 break; /* in regexp, $ might be tail anchor */
77772344 3185 }
79072805 3186 }
02aa26ce 3187
2b9d42f0
NIS
3188 /* End of else if chain - OP_TRANS rejoin rest */
3189
02aa26ce 3190 /* backslashes */
79072805 3191 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
3192 char* e; /* Can be used for ending '}', etc. */
3193
79072805 3194 s++;
02aa26ce 3195
7d0fc23c
KW
3196 /* warn on \1 - \9 in substitution replacements, but note that \11
3197 * is an octal; and \19 is \1 followed by '9' */
3280af22 3198 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 3199 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 3200 {
a2a5de95 3201 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
3202 *--s = '$';
3203 break;
3204 }
02aa26ce
NT
3205
3206 /* string-change backslash escapes */
838f2281 3207 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
3208 --s;
3209 break;
3210 }
ff3f963a
KW
3211 /* In a pattern, process \N, but skip any other backslash escapes.
3212 * This is because we don't want to translate an escape sequence
3213 * into a meta symbol and have the regex compiler use the meta
3214 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3215 * in spite of this, we do have to process \N here while the proper
3216 * charnames handler is in scope. See bugs #56444 and #62056.
3217 * There is a complication because \N in a pattern may also stand
3218 * for 'match a non-nl', and not mean a charname, in which case its
3219 * processing should be deferred to the regex compiler. To be a
3220 * charname it must be followed immediately by a '{', and not look
3221 * like \N followed by a curly quantifier, i.e., not something like
3222 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3223 * quantifier */
3224 else if (PL_lex_inpat
3225 && (*s != 'N'
3226 || s[1] != '{'
3227 || regcurly(s + 1)))
3228 {
cc74c5bd
TS
3229 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3230 goto default_action;
3231 }
02aa26ce 3232
79072805 3233 switch (*s) {
02aa26ce
NT
3234
3235 /* quoted - in transliterations */
79072805 3236 case '-':
3280af22 3237 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
3238 *d++ = *s++;
3239 continue;
3240 }
3241 /* FALL THROUGH */
3242 default:
11b8faa4 3243 {
e4ca4584 3244 if ((isALNUMC(*s)))
a2a5de95
NC
3245 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3246 "Unrecognized escape \\%c passed through",
3247 *s);
11b8faa4 3248 /* default action is to copy the quoted character */
f9a63242 3249 goto default_action;
11b8faa4 3250 }
02aa26ce 3251
632403cc 3252 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
3253 case '0': case '1': case '2': case '3':
3254 case '4': case '5': case '6': case '7':
ba210ebe 3255 {
53305cf1
NC
3256 I32 flags = 0;
3257 STRLEN len = 3;
77a135fe 3258 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
3259 s += len;
3260 }
012bcf8d 3261 goto NUM_ESCAPE_INSERT;
02aa26ce 3262
f0a2b745
KW
3263 /* eg. \o{24} indicates the octal constant \024 */
3264 case 'o':
3265 {
3266 STRLEN len;
454155d9 3267 const char* error;
f0a2b745 3268
454155d9 3269 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 3270 s += len;
454155d9 3271 if (! valid) {
f0a2b745
KW
3272 yyerror(error);
3273 continue;
3274 }
3275 goto NUM_ESCAPE_INSERT;
3276 }
3277
77a135fe 3278 /* eg. \x24 indicates the hex constant 0x24 */
79072805 3279 case 'x':
a0481293 3280 {
53305cf1 3281 STRLEN len;
a0481293 3282 const char* error;
355860ce 3283
a0481293
KW
3284 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3285 s += len;
3286 if (! valid) {
3287 yyerror(error);
355860ce 3288 continue;
ba210ebe 3289 }
012bcf8d
GS
3290 }
3291
3292 NUM_ESCAPE_INSERT:
ff3f963a
KW
3293 /* Insert oct or hex escaped character. There will always be
3294 * enough room in sv since such escapes will be longer than any
3295 * UTF-8 sequence they can end up as, except if they force us
3296 * to recode the rest of the string into utf8 */
ba7cea30 3297
77a135fe 3298 /* Here uv is the ordinal of the next character being added in
ff3f963a 3299 * unicode (converted from native). */
77a135fe 3300 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3301 if (!has_utf8 && uv > 255) {
77a135fe
KW
3302 /* Might need to recode whatever we have accumulated so
3303 * far if it contains any chars variant in utf8 or
3304 * utf-ebcdic. */
3305
3306 SvCUR_set(sv, d - SvPVX_const(sv));
3307 SvPOK_on(sv);
3308 *d = '\0';
77a135fe 3309 /* See Note on sizing above. */
7bf79863
KW
3310 sv_utf8_upgrade_flags_grow(sv,
3311 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3312 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3313 d = SvPVX(sv) + SvCUR(sv);
3314 has_utf8 = TRUE;
012bcf8d
GS
3315 }
3316
77a135fe
KW
3317 if (has_utf8) {
3318 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3319 if (PL_lex_inwhat == OP_TRANS &&
3320 PL_sublex_info.sub_op) {
3321 PL_sublex_info.sub_op->op_private |=
3322 (PL_lex_repl ? OPpTRANS_FROM_UTF
3323 : OPpTRANS_TO_UTF);
f9a63242 3324 }
e294cc5d
JH
3325#ifdef EBCDIC
3326 if (uv > 255 && !dorange)
3327 native_range = FALSE;
3328#endif
012bcf8d 3329 }
a0ed51b3 3330 else {
012bcf8d 3331 *d++ = (char)uv;
a0ed51b3 3332 }
012bcf8d
GS
3333 }
3334 else {
c4d5f83a 3335 *d++ = (char) uv;
a0ed51b3 3336 }
79072805 3337 continue;
02aa26ce 3338
4a2d328f 3339 case 'N':
ff3f963a
KW
3340 /* In a non-pattern \N must be a named character, like \N{LATIN
3341 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3342 * mean to match a non-newline. For non-patterns, named
3343 * characters are converted to their string equivalents. In
3344 * patterns, named characters are not converted to their
3345 * ultimate forms for the same reasons that other escapes
3346 * aren't. Instead, they are converted to the \N{U+...} form
3347 * to get the value from the charnames that is in effect right
3348 * now, while preserving the fact that it was a named character
3349 * so that the regex compiler knows this */
3350
3351 /* This section of code doesn't generally use the
3352 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3353 * a close examination of this macro and determined it is a
3354 * no-op except on utfebcdic variant characters. Every
3355 * character generated by this that would normally need to be
3356 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3357 * needed, and would complicate use of copy(). XXX There are
3358 * other parts of this file where the macro is used
3359 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3360
3361 /* The structure of this section of code (besides checking for
3362 * errors and upgrading to utf8) is:
3363 * Further disambiguate between the two meanings of \N, and if
3364 * not a charname, go process it elsewhere
0a96133f
KW
3365 * If of form \N{U+...}, pass it through if a pattern;
3366 * otherwise convert to utf8
3367 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3368 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3369
3370 /* Here, s points to the 'N'; the test below is guaranteed to
3371 * succeed if we are being called on a pattern as we already
3372 * know from a test above that the next character is a '{'.
3373 * On a non-pattern \N must mean 'named sequence, which
3374 * requires braces */
3375 s++;
3376 if (*s != '{') {
3377 yyerror("Missing braces on \\N{}");
3378 continue;
3379 }
3380 s++;
3381
0a96133f 3382 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3383 if (! (e = strchr(s, '}'))) {
3384 if (! PL_lex_inpat) {
5777a3f7 3385 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3386 } else {
3387 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3388 }
0a96133f 3389 continue;
ff3f963a 3390 }
cddc7ef4 3391
ff3f963a 3392 /* Here it looks like a named character */
cddc7ef4 3393
ff3f963a
KW
3394 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3395 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3396 | PERL_SCAN_DISALLOW_PREFIX;
3397 STRLEN len;
3398
3399 /* For \N{U+...}, the '...' is a unicode value even on
3400 * EBCDIC machines */
3401 s += 2; /* Skip to next char after the 'U+' */
3402 len = e - s;
3403 uv = grok_hex(s, &len, &flags, NULL);
3404 if (len == 0 || len != (STRLEN)(e - s)) {
3405 yyerror("Invalid hexadecimal number in \\N{U+...}");
3406 s = e + 1;
3407 continue;
3408 }
3409
3410 if (PL_lex_inpat) {
3411
e2a7e165
KW
3412 /* On non-EBCDIC platforms, pass through to the regex
3413 * compiler unchanged. The reason we evaluated the
3414 * number above is to make sure there wasn't a syntax
3415 * error. But on EBCDIC we convert to native so
3416 * downstream code can continue to assume it's native
3417 */
ff3f963a 3418 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3419#ifdef EBCDIC
3420 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3421 and the \0 */
3422 "\\N{U+%X}",
3423 (unsigned int) UNI_TO_NATIVE(uv));
3424#else
ff3f963a
KW
3425 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3426 d += e - s + 1;
e2a7e165 3427#endif
ff3f963a
KW
3428 }
3429 else { /* Not a pattern: convert the hex to string */
3430
3431 /* If destination is not in utf8, unconditionally
3432 * recode it to be so. This is because \N{} implies
3433 * Unicode semantics, and scalars have to be in utf8
3434 * to guarantee those semantics */
3435 if (! has_utf8) {
3436 SvCUR_set(sv, d - SvPVX_const(sv));
3437 SvPOK_on(sv);
3438 *d = '\0';
3439 /* See Note on sizing above. */
3440 sv_utf8_upgrade_flags_grow(
3441 sv,
3442 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3443 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3444 d = SvPVX(sv) + SvCUR(sv);
3445 has_utf8 = TRUE;
3446 }
3447
3448 /* Add the string to the output */
3449 if (UNI_IS_INVARIANT(uv)) {
3450 *d++ = (char) uv;
3451 }
3452 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3453 }
3454 }
6f613c73
KW
3455 else /* Here is \N{NAME} but not \N{U+...}. */
3456 if ((res = get_and_check_backslash_N_name(s, e)))
3457 {
3458 STRLEN len;
3459 const char *str = SvPV_const(res, len);
3460 if (PL_lex_inpat) {
ff3f963a
KW
3461
3462 if (! len) { /* The name resolved to an empty string */
3463 Copy("\\N{}", d, 4, char);
3464 d += 4;
3465 }
3466 else {
3467 /* In order to not lose information for the regex
3468 * compiler, pass the result in the specially made
3469 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3470 * the code points in hex of each character
3471 * returned by charnames */
3472
3473 const char *str_end = str + len;
3b721c4f 3474 const STRLEN off = d - SvPVX_const(sv);
94ca1619
KW
3475
3476 if (! SvUTF8(res)) {
3477 /* For the non-UTF-8 case, we can determine the
3478 * exact length needed without having to parse
3479 * through the string. Each character takes up
3480 * 2 hex digits plus either a trailing dot or
3481 * the "}" */
3482 d = off + SvGROW(sv, off
3483 + 3 * len
3484 + 6 /* For the "\N{U+", and
3485 trailing NUL */
3486 + (STRLEN)(send - e));
3487 Copy("\\N{U+", d, 5, char);
3488 d += 5;
3489 while (str < str_end) {
3490 char hex_string[4];
3491 my_snprintf(hex_string, sizeof(hex_string),
3492 "%02X.", (U8) *str);
3493 Copy(hex_string, d, 3, char);
3494 d += 3;
3495 str++;
3496 }
3497 d--; /* We will overwrite below the final
3498 dot with a right brace */
3499 }
3500 else {
1953db30
KW
3501 STRLEN char_length; /* cur char's byte length */
3502
3503 /* and the number of bytes after this is
3504 * translated into hex digits */
3505 STRLEN output_length;
3506
3507 /* 2 hex per byte; 2 chars for '\N'; 2 chars
3508 * for max('U+', '.'); and 1 for NUL */
3509 char hex_string[2 * UTF8_MAXBYTES + 5];
3510
3511 /* Get the first character of the result. */
3512 U32 uv = utf8n_to_uvuni((U8 *) str,
3513 len,
3514 &char_length,
3515 UTF8_ALLOW_ANYUV);
3516 /* Convert first code point to hex, including
3517 * the boiler plate before it. For all these,
3518 * we convert to native format so that
3519 * downstream code can continue to assume the
3520 * input is native */
3521 output_length =
3522 my_snprintf(hex_string, sizeof(hex_string),
3523 "\\N{U+%X",
3524 (unsigned int) UNI_TO_NATIVE(uv));
3525
3526 /* Make sure there is enough space to hold it */
3527 d = off + SvGROW(sv, off
3528 + output_length
3529 + (STRLEN)(send - e)
3530 + 2); /* '}' + NUL */
3531 /* And output it */
3532 Copy(hex_string, d, output_length, char);
3533 d += output_length;
3534
3535 /* For each subsequent character, append dot and
3536 * its ordinal in hex */
3537 while ((str += char_length) < str_end) {
3538 const STRLEN off = d - SvPVX_const(sv);
3539 U32 uv = utf8n_to_uvuni((U8 *) str,
3540 str_end - str,
3541 &char_length,
3542 UTF8_ALLOW_ANYUV);
3543 output_length =
3544 my_snprintf(hex_string,
3545 sizeof(hex_string),
3546 ".%X",
3547 (unsigned int) UNI_TO_NATIVE(uv));
3548
3549 d = off + SvGROW(sv, off
3550 + output_length
3551 + (STRLEN)(send - e)
3552 + 2); /* '}' + NUL */
3553 Copy(hex_string, d, output_length, char);
3554 d += output_length;
3555 }
94ca1619 3556 }
ff3f963a
KW
3557
3558 *d++ = '}'; /* Done. Add the trailing brace */
3559 }
3560 }
3561 else { /* Here, not in a pattern. Convert the name to a
3562 * string. */
3563
3564 /* If destination is not in utf8, unconditionally
3565 * recode it to be so. This is because \N{} implies
3566 * Unicode semantics, and scalars have to be in utf8
3567 * to guarantee those semantics */
3568 if (! has_utf8) {
3569 SvCUR_set(sv, d - SvPVX_const(sv));
3570 SvPOK_on(sv);
3571 *d = '\0';
3572 /* See Note on sizing above. */
3573 sv_utf8_upgrade_flags_grow(sv,
3574 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3575 len + (STRLEN)(send - s) + 1);
3576 d = SvPVX(sv) + SvCUR(sv);
3577 has_utf8 = TRUE;
3578 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3579
3580 /* See Note on sizing above. (NOTE: SvCUR() is not
3581 * set correctly here). */
3582 const STRLEN off = d - SvPVX_const(sv);
3583 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3584 }
3585 Copy(str, d, len, char);
3586 d += len;
423cee85 3587 }
6f613c73 3588
423cee85 3589 SvREFCNT_dec(res);
cb233ae3 3590
cb233ae3 3591 } /* End \N{NAME} */
ff3f963a
KW
3592#ifdef EBCDIC
3593 if (!dorange)
3594 native_range = FALSE; /* \N{} is defined to be Unicode */
3595#endif
3596 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3597 continue;
3598
02aa26ce 3599 /* \c is a control character */
79072805
LW
3600 case 'c':
3601 s++;
961ce445 3602 if (s < send) {
17a3df4c 3603 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3604 }
961ce445
RGS
3605 else {
3606 yyerror("Missing control char name in \\c");
3607 }
79072805 3608 continue;
02aa26ce
NT
3609
3610 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3611 case 'b':
db42d148 3612 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3613 break;
3614 case 'n':
db42d148 3615 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3616 break;
3617 case 'r':
db42d148 3618 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3619 break;
3620 case 'f':
db42d148 3621 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3622 break;
3623 case 't':
db42d148 3624 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3625 break;
34a3fe2a 3626 case 'e':
db42d148 3627 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3628 break;
3629 case 'a':
db42d148 3630 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3631 break;
02aa26ce
NT
3632 } /* end switch */
3633
79072805
LW
3634 s++;
3635 continue;
02aa26ce 3636 } /* end if (backslash) */
4c3a8340
TS
3637#ifdef EBCDIC
3638 else
3639 literal_endpoint++;
3640#endif
02aa26ce 3641
f9a63242 3642 default_action:
77a135fe
KW
3643 /* If we started with encoded form, or already know we want it,
3644 then encode the next character */
3645 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3646 STRLEN len = 1;
77a135fe
KW
3647
3648
3649 /* One might think that it is wasted effort in the case of the
3650 * source being utf8 (this_utf8 == TRUE) to take the next character
3651 * in the source, convert it to an unsigned value, and then convert
3652 * it back again. But the source has not been validated here. The
3653 * routine that does the conversion checks for errors like
3654 * malformed utf8 */
3655
5f66b61c
AL
3656 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3657 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3658 if (!has_utf8) {
3659 SvCUR_set(sv, d - SvPVX_const(sv));
3660 SvPOK_on(sv);
3661 *d = '\0';
77a135fe 3662 /* See Note on sizing above. */
7bf79863
KW
3663 sv_utf8_upgrade_flags_grow(sv,
3664 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3665 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3666 d = SvPVX(sv) + SvCUR(sv);
3667 has_utf8 = TRUE;
3668 } else if (need > len) {
3669 /* encoded value larger than old, may need extra space (NOTE:
3670 * SvCUR() is not set correctly here). See Note on sizing
3671 * above. */
9d4ba2ae 3672 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3673 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3674 }
77a135fe
KW
3675 s += len;
3676
5f66b61c 3677 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3678#ifdef EBCDIC
3679 if (uv > 255 && !dorange)
3680 native_range = FALSE;
3681#endif
2b9d42f0
NIS
3682 }
3683 else {
3684 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3685 }
02aa26ce
NT
3686 } /* while loop to process each character */
3687
3688 /* terminate the string and set up the sv */
79072805 3689 *d = '\0';
95a20fc0 3690 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3691 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3692 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3693 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3694
79072805 3695 SvPOK_on(sv);
9f4817db 3696 if (PL_encoding && !has_utf8) {
d0063567
DK
3697 sv_recode_to_utf8(sv, PL_encoding);
3698 if (SvUTF8(sv))
3699 has_utf8 = TRUE;
9f4817db 3700 }
2b9d42f0 3701 if (has_utf8) {
7e2040f0 3702 SvUTF8_on(sv);
2b9d42f0 3703 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3704 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3705 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3706 }
3707 }
79072805 3708
02aa26ce 3709 /* shrink the sv if we allocated more than we used */
79072805 3710 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3711 SvPV_shrink_to_cur(sv);
79072805 3712 }
02aa26ce 3713
6154021b 3714 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3715 if (s > PL_bufptr) {
eb0d8d16
NC
3716 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3717 const char *const key = PL_lex_inpat ? "qr" : "q";
3718 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3719 const char *type;
3720 STRLEN typelen;
3721
3722 if (PL_lex_inwhat == OP_TRANS) {
3723 type = "tr";
3724 typelen = 2;
3725 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3726 type = "s";
3727 typelen = 1;
9da1dd8f
DM
3728 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3729 type = "q";
3730 typelen = 1;
eb0d8d16
NC
3731 } else {
3732 type = "qq";
3733 typelen = 2;
3734 }
3735
3736 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3737 type, typelen);
3738 }
6154021b 3739 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3740 } else
8990e307 3741 SvREFCNT_dec(sv);
79072805
LW
3742 return s;
3743}
3744
ffb4593c
NT
3745/* S_intuit_more
3746 * Returns TRUE if there's more to the expression (e.g., a subscript),
3747 * FALSE otherwise.
ffb4593c
NT
3748 *
3749 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3750 *
3751 * ->[ and ->{ return TRUE
3752 * { and [ outside a pattern are always subscripts, so return TRUE
3753 * if we're outside a pattern and it's not { or [, then return FALSE
3754 * if we're in a pattern and the first char is a {
3755 * {4,5} (any digits around the comma) returns FALSE
3756 * if we're in a pattern and the first char is a [
3757 * [] returns FALSE
3758 * [SOMETHING] has a funky algorithm to decide whether it's a
3759 * character class or not. It has to deal with things like
3760 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3761 * anything else returns TRUE
3762 */
3763
9cbb5ea2
GS
3764/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3765
76e3520e 3766STATIC int
cea2e8a9 3767S_intuit_more(pTHX_ register char *s)
79072805 3768{
97aff369 3769 dVAR;
7918f24d
NC
3770
3771 PERL_ARGS_ASSERT_INTUIT_MORE;
3772
3280af22 3773 if (PL_lex_brackets)
79072805
LW
3774 return TRUE;
3775 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3776 return TRUE;
3777 if (*s != '{' && *s != '[')
3778 return FALSE;
3280af22 3779 if (!PL_lex_inpat)
79072805
LW
3780 return TRUE;
3781
3782 /* In a pattern, so maybe we have {n,m}. */
3783 if (*s == '{') {
b3155d95 3784 if (regcurly(s)) {
79072805 3785 return FALSE;
b3155d95 3786 }
79072805 3787 return TRUE;
79072805
LW
3788 }
3789
3790 /* On the other hand, maybe we have a character class */
3791
3792 s++;
3793 if (*s == ']' || *s == '^')
3794 return FALSE;
3795 else {
ffb4593c 3796 /* this is terrifying, and it works */
79072805
LW
3797 int weight = 2; /* let's weigh the evidence */
3798 char seen[256];
f27ffc4a 3799 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3800 const char * const send = strchr(s,']');
3280af22 3801 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3802
3803 if (!send) /* has to be an expression */
3804 return TRUE;
3805
3806 Zero(seen,256,char);
3807 if (*s == '$')
3808 weight -= 3;
3809 else if (isDIGIT(*s)) {
3810 if (s[1] != ']') {
3811 if (isDIGIT(s[1]) && s[2] == ']')
3812 weight -= 10;
3813 }
3814 else
3815 weight -= 100;
3816 }
3817 for (; s < send; s++) {
3818 last_un_char = un_char;
3819 un_char = (unsigned char)*s;
3820 switch (*s) {
3821 case '@':
3822 case '&':
3823 case '$':
3824 weight -= seen[un_char] * 10;
7e2040f0 3825 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3826 int len;
8903cb82 3827 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3828 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3829 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3830 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3831 weight -= 100;
3832 else
3833 weight -= 10;
3834 }
3835 else if (*s == '$' && s[1] &&
93a17b20
LW
3836 strchr("[#!%*<>()-=",s[1])) {
3837 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3838 weight -= 10;
3839 else
3840 weight -= 1;
3841 }
3842 break;
3843 case '\\':
3844 un_char = 254;
3845 if (s[1]) {
93a17b20 3846 if (strchr("wds]",s[1]))
79072805 3847 weight += 100;
10edeb5d 3848 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3849 weight += 1;
93a17b20 3850 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3851 weight += 40;
3852 else if (isDIGIT(s[1])) {
3853 weight += 40;
3854 while (s[1] && isDIGIT(s[1]))
3855 s++;
3856 }
3857 }
3858 else
3859 weight += 100;
3860 break;
3861 case '-':
3862 if (s[1] == '\\')
3863 weight += 50;
93a17b20 3864 if (strchr("aA01! ",last_un_char))
79072805 3865 weight += 30;
93a17b20 3866 if (strchr("zZ79~",s[1]))
79072805 3867 weight += 30;
f27ffc4a
GS
3868 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3869 weight -= 5; /* cope with negative subscript */
79072805
LW
3870 break;
3871 default:
3792a11b
NC
3872 if (!isALNUM(last_un_char)
3873 && !(last_un_char == '$' || last_un_char == '@'
3874 || last_un_char == '&')
3875 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3876 char *d = tmpbuf;
3877 while (isALPHA(*s))
3878 *d++ = *s++;
3879 *d = '\0';
5458a98a 3880 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3881 weight -= 150;
3882 }
3883 if (un_char == last_un_char + 1)
3884 weight += 5;
3885 weight -= seen[un_char];
3886 break;
3887 }
3888 seen[un_char]++;
3889 }
3890 if (weight >= 0) /* probably a character class */
3891 return FALSE;
3892 }
3893
3894 return TRUE;
3895}
ffed7fef 3896
ffb4593c
NT
3897/*
3898 * S_intuit_method
3899 *
3900 * Does all the checking to disambiguate
3901 * foo bar
3902 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3903 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3904 *
3905 * First argument is the stuff after the first token, e.g. "bar".
3906 *
a4fd4a89 3907 * Not a method if foo is a filehandle.
ffb4593c
NT
3908 * Not a method if foo is a subroutine prototyped to take a filehandle.
3909 * Not a method if it's really "Foo $bar"
3910 * Method if it's "foo $bar"
3911 * Not a method if it's really "print foo $bar"
3912 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3913 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3914 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3915 * =>
3916 */
3917
76e3520e 3918STATIC int
62d55b22 3919S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3920{
97aff369 3921 dVAR;
a0d0e21e 3922 char *s = start + (*start == '$');
3280af22 3923 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3924 STRLEN len;
3925 GV* indirgv;
5db06880
NC
3926#ifdef PERL_MAD
3927 int soff;
3928#endif
a0d0e21e 3929
7918f24d
NC
3930 PERL_ARGS_ASSERT_INTUIT_METHOD;
3931
aca88b25 3932 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3933 return 0;
aca88b25 3934 if (cv && SvPOK(cv)) {
8fa6a409 3935 const char *proto = CvPROTO(cv);
62d55b22
NC
3936 if (proto) {
3937 if (*proto == ';')
3938 proto++;
3939 if (*proto == '*')
3940 return 0;
3941 }
a0d0e21e 3942 }
8903cb82 3943 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3944 /* start is the beginning of the possible filehandle/object,
3945 * and s is the end of it
3946 * tmpbuf is a copy of it
3947 */
3948
a0d0e21e 3949 if (*start == '$') {
39c012bc 3950 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3ef1310e 3951 isUPPER(*PL_tokenbuf))
a0d0e21e 3952 return 0;
5db06880
NC
3953#ifdef PERL_MAD
3954 len = start - SvPVX(PL_linestr);
3955#endif
29595ff2 3956 s = PEEKSPACE(s);
f0092767 3957#ifdef PERL_MAD
5db06880
NC
3958 start = SvPVX(PL_linestr) + len;
3959#endif
3280af22
NIS
3960 PL_bufptr = start;
3961 PL_expect = XREF;
a0d0e21e
LW
3962 return *s == '(' ? FUNCMETH : METHOD;
3963 }
5458a98a 3964 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3965 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3966 len -= 2;
3967 tmpbuf[len] = '\0';
5db06880
NC
3968#ifdef PERL_MAD
3969 soff = s - SvPVX(PL_linestr);
3970#endif
c3e0f903
GS
3971 goto bare_package;
3972 }
38d2cf30 3973 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 3974 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3975 return 0;
3976 /* filehandle or package name makes it a method */
39c012bc 3977 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
3978#ifdef PERL_MAD
3979 soff = s - SvPVX(PL_linestr);
3980#endif
29595ff2 3981 s = PEEKSPACE(s);
3280af22 3982 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3983 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3984 bare_package:
cd81e915 3985 start_force(PL_curforce);
9ded7720 3986 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3987 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3988 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 3989 if (PL_madskills)
38d2cf30
BF
3990 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3991 ( UTF ? SVf_UTF8 : 0 )));
3280af22 3992 PL_expect = XTERM;
a0d0e21e 3993 force_next(WORD);
3280af22 3994 PL_bufptr = s;
5db06880
NC
3995#ifdef PERL_MAD
3996 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3997#endif
a0d0e21e
LW
3998 return *s == '(' ? FUNCMETH : METHOD;
3999 }
4000 }
4001 return 0;
4002}
4003
16d20bd9 4004/* Encoded script support. filter_add() effectively inserts a
4e553d73 4005 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
4006 * Note that the filter function only applies to the current source file
4007 * (e.g., it will not affect files 'require'd or 'use'd by this one).
4008 *
4009 * The datasv parameter (which may be NULL) can be used to pass
4010 * private data to this instance of the filter. The filter function
4011 * can recover the SV using the FILTER_DATA macro and use it to
4012 * store private buffers and state information.
4013 *
4014 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 4015 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 4016 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
4017 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
4018 * private use must be set using malloc'd pointers.
4019 */
16d20bd9
AD
4020
4021SV *
864dbfa3 4022Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 4023{
97aff369 4024 dVAR;
f4c556ac 4025 if (!funcp)
a0714e2c 4026 return NULL;
f4c556ac 4027
5486870f
DM
4028 if (!PL_parser)
4029 return NULL;
4030
f1c31c52
FC
4031 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
4032 Perl_croak(aTHX_ "Source filters apply only to byte streams");
4033
3280af22
NIS
4034 if (!PL_rsfp_filters)
4035 PL_rsfp_filters = newAV();
16d20bd9 4036 if (!datasv)
561b68a9 4037 datasv = newSV(0);
862a34c6 4038 SvUPGRADE(datasv, SVt_PVIO);
8141890a 4039 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 4040 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 4041 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
4042 FPTR2DPTR(void *, IoANY(datasv)),
4043 SvPV_nolen(datasv)));
3280af22
NIS
4044 av_unshift(PL_rsfp_filters, 1);
4045 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
4046 if (
4047 !PL_parser->filtered
4048 && PL_parser->lex_flags & LEX_EVALBYTES
4049 && PL_bufptr < PL_bufend
4050 ) {
4051 const char *s = PL_bufptr;
4052 while (s < PL_bufend) {
4053 if (*s == '\n') {
4054 SV *linestr = PL_parser->linestr;
4055 char *buf = SvPVX(linestr);
4056 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
4057 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
4058 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
4059 STRLEN const linestart_pos = PL_parser->linestart - buf;
4060 STRLEN const last_uni_pos =
4061 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
4062 STRLEN const last_lop_pos =
4063 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
4064 av_push(PL_rsfp_filters, linestr);
4065 PL_parser->linestr =
4066 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
4067 buf = SvPVX(PL_parser->linestr);
4068 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
4069 PL_parser->bufptr = buf + bufptr_pos;
4070 PL_parser->oldbufptr = buf + oldbufptr_pos;
4071 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
4072 PL_parser->linestart = buf + linestart_pos;
4073 if (PL_parser->last_uni)
4074 PL_parser->last_uni = buf + last_uni_pos;
4075 if (PL_parser->last_lop)
4076 PL_parser->last_lop = buf + last_lop_pos;
4077 SvLEN(linestr) = SvCUR(linestr);
4078 SvCUR(linestr) = s-SvPVX(linestr);
4079 PL_parser->filtered = 1;
4080 break;
4081 }
4082 s++;
4083 }
4084 }
16d20bd9
AD
4085 return(datasv);
4086}
4e553d73 4087
16d20bd9
AD
4088
4089/* Delete most recently added instance of this filter function. */
a0d0e21e 4090void
864dbfa3 4091Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 4092{
97aff369 4093 dVAR;
e0c19803 4094 SV *datasv;
24801a4b 4095
7918f24d
NC
4096 PERL_ARGS_ASSERT_FILTER_DEL;
4097
33073adb 4098#ifdef DEBUGGING
55662e27
JH
4099 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
4100 FPTR2DPTR(void*, funcp)));
33073adb 4101#endif
5486870f 4102 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
4103 return;
4104 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 4105 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 4106 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 4107 sv_free(av_pop(PL_rsfp_filters));
e50aee73 4108
16d20bd9
AD
4109 return;
4110 }
4111 /* we need to search for the correct entry and clear it */
cea2e8a9 4112 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
4113}
4114
4115
1de9afcd
RGS
4116/* Invoke the idxth filter function for the current rsfp. */
4117/* maxlen 0 = read one text line */
16d20bd9 4118I32
864dbfa3 4119Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 4120{
97aff369 4121 dVAR;
16d20bd9
AD
4122 filter_t funcp;
4123 SV *datasv = NULL;
f482118e
NC
4124 /* This API is bad. It should have been using unsigned int for maxlen.
4125 Not sure if we want to change the API, but if not we should sanity
4126 check the value here. */
60d63348 4127 unsigned int correct_length
39cd7a59
NC
4128 = maxlen < 0 ?
4129#ifdef PERL_MICRO
4130 0x7FFFFFFF
4131#else
4132 INT_MAX
4133#endif
4134 : maxlen;
e50aee73 4135
7918f24d
NC
4136 PERL_ARGS_ASSERT_FILTER_READ;
4137
5486870f 4138 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 4139 return -1;
1de9afcd 4140 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
4141 /* Provide a default input filter to make life easy. */
4142 /* Note that we append to the line. This is handy. */
f4c556ac
GS
4143 DEBUG_P(PerlIO_printf(Perl_debug_log,
4144 "filter_read %d: from rsfp\n", idx));
f482118e 4145 if (correct_length) {
16d20bd9
AD
4146 /* Want a block */
4147 int len ;
f54cb97a 4148 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
4149
4150 /* ensure buf_sv is large enough */
881d8f0a 4151 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
4152 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4153 correct_length)) <= 0) {
3280af22 4154 if (PerlIO_error(PL_rsfp))
37120919
AD
4155 return -1; /* error */
4156 else
4157 return 0 ; /* end of file */
4158 }
16d20bd9 4159 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 4160 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
4161 } else {
4162 /* Want a line */
3280af22
NIS
4163 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4164 if (PerlIO_error(PL_rsfp))
37120919
AD
4165 return -1; /* error */
4166 else
4167 return 0 ; /* end of file */
4168 }
16d20bd9
AD
4169 }
4170 return SvCUR(buf_sv);
4171 }
4172 /* Skip this filter slot if filter has been deleted */
1de9afcd 4173 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
4174 DEBUG_P(PerlIO_printf(Perl_debug_log,
4175 "filter_read %d: skipped (filter deleted)\n",
4176 idx));
f482118e 4177 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4178 }
60d63348
FC
4179 if (SvTYPE(datasv) != SVt_PVIO) {
4180 if (correct_length) {
4181 /* Want a block */
4182 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4183 if (!remainder) return 0; /* eof */
4184 if (correct_length > remainder) correct_length = remainder;
4185 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4186 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4187 } else {
4188 /* Want a line */
4189 const char *s = SvEND(datasv);
4190 const char *send = SvPVX(datasv) + SvLEN(datasv);
4191 while (s < send) {
4192 if (*s == '\n') {
4193 s++;
4194 break;
4195 }
4196 s++;
4197 }
4198 if (s == send) return 0; /* eof */
4199 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4200 SvCUR_set(datasv, s-SvPVX(datasv));
4201 }
4202 return SvCUR(buf_sv);
4203 }
16d20bd9 4204 /* Get function pointer hidden within datasv */
8141890a 4205 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4206 DEBUG_P(PerlIO_printf(Perl_debug_log,
4207 "filter_read %d: via function %p (%s)\n",
ca0270c4 4208 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4209 /* Call function. The function is expected to */
4210 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4211 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4212 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4213}
4214
76e3520e 4215STATIC char *
5cc814fd 4216S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 4217{
97aff369 4218 dVAR;
7918f24d
NC
4219
4220 PERL_ARGS_ASSERT_FILTER_GETS;
4221
c39cd008 4222#ifdef PERL_CR_FILTER
3280af22 4223 if (!PL_rsfp_filters) {
c39cd008 4224 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4225 }
4226#endif
3280af22 4227 if (PL_rsfp_filters) {
55497cff 4228 if (!append)
4229 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4230 if (FILTER_READ(0, sv, 0) > 0)
4231 return ( SvPVX(sv) ) ;
4232 else
bd61b366 4233 return NULL ;
16d20bd9 4234 }
9d116dd7 4235 else
5cc814fd 4236 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4237}
4238
01ec43d0 4239STATIC HV *
9bde8eb0 4240S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4241{
97aff369 4242 dVAR;
def3634b
GS
4243 GV *gv;
4244
7918f24d
NC
4245 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4246
01ec43d0 4247 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4248 return PL_curstash;
4249
4250 if (len > 2 &&
4251 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4252 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4253 {
4254 return GvHV(gv); /* Foo:: */
def3634b
GS
4255 }
4256
4257 /* use constant CLASS => 'MyClass' */
acc6da14 4258 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4259 if (gv && GvCV(gv)) {
4260 SV * const sv = cv_const_sv(GvCV(gv));
4261 if (sv)
9bde8eb0 4262 pkgname = SvPV_const(sv, len);
def3634b
GS
4263 }
4264
acc6da14 4265 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4266}
a0d0e21e 4267
e3f73d4e
RGS
4268/*
4269 * S_readpipe_override
486ec47a 4270 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4271 * optree, provided sublex_start() is called afterwards.
4272 */
4273STATIC void
1d51329b 4274S_readpipe_override(pTHX)
e3f73d4e
RGS
4275{
4276 GV **gvp;
4277 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4278 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4279 if ((gv_readpipe
4280 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4281 ||
4282 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4283 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4284 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4285 {
4286 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4287 op_append_elem(OP_LIST,
e3f73d4e
RGS
4288 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4289 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4290 }
e3f73d4e
RGS
4291}
4292
5db06880
NC
4293#ifdef PERL_MAD
4294 /*
4295 * Perl_madlex
4296 * The intent of this yylex wrapper is to minimize the changes to the
4297 * tokener when we aren't interested in collecting madprops. It remains
4298 * to be seen how successful this strategy will be...
4299 */
4300
4301int
4302Perl_madlex(pTHX)
4303{
4304 int optype;
4305 char *s = PL_bufptr;
4306
cd81e915
NC
4307 /* make sure PL_thiswhite is initialized */
4308 PL_thiswhite = 0;
4309 PL_thismad = 0;
5db06880 4310
5db06880 4311 /* previous token ate up our whitespace? */
cd81e915
NC
4312 if (!PL_lasttoke && PL_nextwhite) {
4313 PL_thiswhite = PL_nextwhite;
4314 PL_nextwhite = 0;
5db06880
NC
4315 }
4316
4317 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4318 PL_realtokenstart = -1;
4319 PL_thistoken = 0;
5db06880
NC
4320 optype = yylex();
4321 s = PL_bufptr;
cd81e915 4322 assert(PL_curforce < 0);
5db06880 4323
cd81e915
NC
4324 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4325 if (!PL_thistoken) {
4326 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4327 PL_thistoken = newSVpvs("");
5db06880 4328 else {
c35e046a 4329 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4330 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4331 }
4332 }
cd81e915
NC
4333 if (PL_thismad) /* install head */
4334 CURMAD('X', PL_thistoken);
5db06880
NC
4335 }
4336
4337 /* last whitespace of a sublex? */
cd81e915
NC
4338 if (optype == ')' && PL_endwhite) {
4339 CURMAD('X', PL_endwhite);
5db06880
NC
4340 }
4341
cd81e915 4342 if (!PL_thismad) {
5db06880
NC
4343
4344 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4345 if (!PL_thiswhite && !PL_endwhite && !optype) {
4346 sv_free(PL_thistoken);
4347 PL_thistoken = 0;
5db06880
NC
4348 return 0;
4349 }
4350
4351 /* put off final whitespace till peg */
60d63348 4352 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4353 PL_nextwhite = PL_thiswhite;
4354 PL_thiswhite = 0;
5db06880 4355 }
cd81e915
NC
4356 else if (PL_thisopen) {
4357 CURMAD('q', PL_thisopen);
4358 if (PL_thistoken)
4359 sv_free(PL_thistoken);
4360 PL_thistoken = 0;
5db06880
NC
4361 }
4362 else {
4363 /* Store actual token text as madprop X */
cd81e915 4364 CURMAD('X', PL_thistoken);
5db06880
NC
4365 }
4366
cd81e915 4367 if (PL_thiswhite) {
5db06880 4368 /* add preceding whitespace as madprop _ */
cd81e915 4369 CURMAD('_', PL_thiswhite);
5db06880
NC
4370 }
4371
cd81e915 4372 if (PL_thisstuff) {
5db06880 4373 /* add quoted material as madprop = */
cd81e915 4374 CURMAD('=', PL_thisstuff);
5db06880
NC
4375 }
4376
cd81e915 4377 if (PL_thisclose) {
5db06880 4378 /* add terminating quote as madprop Q */
cd81e915 4379 CURMAD('Q', PL_thisclose);
5db06880
NC
4380 }
4381 }
4382
4383 /* special processing based on optype */
4384
4385 switch (optype) {
4386
4387 /* opval doesn't need a TOKEN since it can already store mp */
4388 case WORD:
4389 case METHOD:
4390 case FUNCMETH:
4391 case THING:
4392 case PMFUNC:
4393 case PRIVATEREF:
4394 case FUNC0SUB:
4395 case UNIOPSUB:
4396 case LSTOPSUB:
6154021b
RGS
4397 if (pl_yylval.opval)
4398 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4399 PL_thismad = 0;
5db06880
NC
4400 return optype;
4401
4402 /* fake EOF */
4403 case 0:
4404 optype = PEG;
cd81e915
NC
4405 if (PL_endwhite) {
4406 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4407 PL_endwhite = 0;
5db06880
NC
4408 }
4409 break;
4410
5504e6cf
FC
4411 /* pval */
4412 case LABEL:
4413 break;
4414
5db06880
NC
4415 case ']':
4416 case '}':
cd81e915 4417 if (PL_faketokens)
5db06880
NC
4418 break;
4419 /* remember any fake bracket that lexer is about to discard */
4420 if (PL_lex_brackets == 1 &&
4421 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4422 {
4423 s = PL_bufptr;
4424 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4425 s++;
4426 if (*s == '}') {
cd81e915
NC
4427 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4428 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4429 PL_thiswhite = 0;
5db06880
NC
4430 PL_bufptr = s - 1;
4431 break; /* don't bother looking for trailing comment */
4432 }
4433 else
4434 s = PL_bufptr;
4435 }
4436 if (optype == ']')
4437 break;
4438 /* FALLTHROUGH */
4439
4440 /* attach a trailing comment to its statement instead of next token */
4441 case ';':
cd81e915 4442 if (PL_faketokens)
5db06880
NC
4443 break;
4444 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4445 s = PL_bufptr;
4446 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4447 s++;
4448 if (*s == '\n' || *s == '#') {
4449 while (s < PL_bufend && *s != '\n')
4450 s++;
4451 if (s < PL_bufend)
4452 s++;
cd81e915
NC
4453 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4454 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4455 PL_thiswhite = 0;
5db06880
NC
4456 PL_bufptr = s;
4457 }
4458 }
4459 break;
4460
5db06880
NC
4461 /* ival */
4462 default:
4463 break;
4464
4465 }
4466
4467 /* Create new token struct. Note: opvals return early above. */
6154021b 4468 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4469 PL_thismad = 0;
5db06880
NC
4470 return optype;
4471}
4472#endif
4473
468aa647 4474STATIC char *
cc6ed77d 4475S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4476 dVAR;
7918f24d
NC
4477
4478 PERL_ARGS_ASSERT_TOKENIZE_USE;
4479
468aa647
RGS
4480 if (PL_expect != XSTATE)
4481 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4482 is_use ? "use" : "no"));
52d0e95b 4483 PL_expect = XTERM;
29595ff2 4484 s = SKIPSPACE1(s);
468aa647
RGS
4485 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4486 s = force_version(s, TRUE);
17c59fdf
VP
4487 if (*s == ';' || *s == '}'
4488 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4489 start_force(PL_curforce);
9ded7720 4490 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4491 force_next(WORD);
4492 }
4493 else if (*s == 'v') {
4494 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4495 s = force_version(s, FALSE);
4496 }
4497 }
4498 else {
4499 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4500 s = force_version(s, FALSE);
4501 }
6154021b 4502 pl_yylval.ival = is_use;
468aa647
RGS
4503 return s;
4504}
748a9306 4505#ifdef DEBUGGING
27da23d5 4506 static const char* const exp_name[] =
09bef843 4507 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4508 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4509 };
748a9306 4510#endif
463ee0b2 4511
361d9b55
Z
4512#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4513STATIC bool
4514S_word_takes_any_delimeter(char *p, STRLEN len)
4515{
4516 return (len == 1 && strchr("msyq", p[0])) ||
4517 (len == 2 && (
4518 (p[0] == 't' && p[1] == 'r') ||
4519 (p[0] == 'q' && strchr("qwxr", p[1]))));
4520}
4521
02aa26ce
NT
4522/*
4523 yylex
4524
4525 Works out what to call the token just pulled out of the input
4526 stream. The yacc parser takes care of taking the ops we return and
4527 stitching them into a tree.
4528
4529 Returns:
3875fc11 4530 The type of the next token
02aa26ce
NT
4531
4532 Structure:
3875fc11
FC
4533 Switch based on the current state:
4534 - if we already built the token before, use it
4535 - if we have a case modifier in a string, deal with that
4536 - handle other cases of interpolation inside a string
4537 - scan the next line if we are inside a format
4538 In the normal state switch on the next character:
4539 - default:
4540 if alphabetic, go to key lookup
4541 unrecoginized character - croak
4542 - 0/4/26: handle end-of-line or EOF
4543 - cases for whitespace
4544 - \n and #: handle comments and line numbers
4545 - various operators, brackets and sigils
4546 - numbers
4547 - quotes
4548 - 'v': vstrings (or go to key lookup)
4549 - 'x' repetition operator (or go to key lookup)
4550 - other ASCII alphanumerics (key lookup begins here):
4551 word before => ?
4552 keyword plugin
4553 scan built-in keyword (but do nothing with it yet)
4554 check for statement label
4555 check for lexical subs
4556 goto just_a_word if there is one
4557 see whether built-in keyword is overridden
4558 switch on keyword number:
4559 - default: just_a_word:
4560 not a built-in keyword; handle bareword lookup
4561 disambiguate between method and sub call
4562 fall back to bareword
4563 - cases for built-in keywords
02aa26ce
NT
4564*/
4565
20141f0e 4566
dba4d153
JH
4567#ifdef __SC__
4568#pragma segment Perl_yylex
4569#endif
dba4d153 4570int
dba4d153 4571Perl_yylex(pTHX)
20141f0e 4572{
97aff369 4573 dVAR;
eb578fdb
KW
4574 char *s = PL_bufptr;
4575 char *d;
463ee0b2 4576 STRLEN len;
705fe0e5
FC
4577 bool bof = FALSE;
4578 U8 formbrack = 0;
580561a3 4579 U32 fake_eof = 0;
a687059c 4580
10edeb5d
JH
4581 /* orig_keyword, gvp, and gv are initialized here because
4582 * jump to the label just_a_word_zero can bypass their
4583 * initialization later. */
4584 I32 orig_keyword = 0;
4585 GV *gv = NULL;
4586 GV **gvp = NULL;
4587
bbf60fe6 4588 DEBUG_T( {
396482e1 4589 SV* tmp = newSVpvs("");
b6007c36
DM
4590 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4591 (IV)CopLINE(PL_curcop),
4592 lex_state_names[PL_lex_state],
4593 exp_name[PL_expect],
4594 pv_display(tmp, s, strlen(s), 0, 60));
4595 SvREFCNT_dec(tmp);
bbf60fe6 4596 } );
02aa26ce 4597
3280af22 4598 switch (PL_lex_state) {
79072805
LW
4599#ifdef COMMENTARY
4600 case LEX_NORMAL: /* Some compilers will produce faster */
4601 case LEX_INTERPNORMAL: /* code if we comment these out. */
4602 break;
4603#endif
4604
09bef843 4605 /* when we've already built the next token, just pull it out of the queue */
79072805 4606 case LEX_KNOWNEXT:
5db06880
NC
4607#ifdef PERL_MAD
4608 PL_lasttoke--;
6154021b 4609 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4610 if (PL_madskills) {
cd81e915 4611 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4612 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4613 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4614 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4615 PL_thismad->mad_val = 0;
4616 mad_free(PL_thismad);
4617 PL_thismad = 0;
5db06880
NC
4618 }
4619 }
4620 if (!PL_lasttoke) {
4621 PL_lex_state = PL_lex_defer;
4622 PL_expect = PL_lex_expect;
4623 PL_lex_defer = LEX_NORMAL;
4624 if (!PL_nexttoke[PL_lasttoke].next_type)
4625 return yylex();
4626 }
4627#else
3280af22 4628 PL_nexttoke--;
6154021b 4629 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4630 if (!PL_nexttoke) {
4631 PL_lex_state = PL_lex_defer;
4632 PL_expect = PL_lex_expect;
4633 PL_lex_defer = LEX_NORMAL;
463ee0b2 4634 }
5db06880 4635#endif
a7aaec61
Z
4636 {
4637 I32 next_type;
5db06880 4638#ifdef PERL_MAD
a7aaec61 4639 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4640#else
a7aaec61 4641 next_type = PL_nexttype[PL_nexttoke];
5db06880 4642#endif
78cdf107
Z
4643 if (next_type & (7<<24)) {
4644 if (next_type & (1<<24)) {
4645 if (PL_lex_brackets > 100)
4646 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4647 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4648 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4649 }
4650 if (next_type & (2<<24))
4651 PL_lex_allbrackets++;
4652 if (next_type & (4<<24))
4653 PL_lex_allbrackets--;
a7aaec61
Z
4654 next_type &= 0xffff;
4655 }
6c7ae946
FC
4656 if (S_is_opval_token(next_type) && pl_yylval.opval)
4657 pl_yylval.opval->op_savefree = 0; /* release */
3f33d153 4658 return REPORT(next_type == 'p' ? pending_ident() : next_type);
a7aaec61 4659 }
79072805 4660
02aa26ce 4661 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4662 when we get here, PL_bufptr is at the \
02aa26ce 4663 */
79072805
LW
4664 case LEX_INTERPCASEMOD:
4665#ifdef DEBUGGING
3280af22 4666 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4667 Perl_croak(aTHX_
4668 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4669 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4670#endif
02aa26ce 4671 /* handle \E or end of string */
3280af22 4672 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4673 /* if at a \E */
3280af22 4674 if (PL_lex_casemods) {
f54cb97a 4675 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4676 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4677
3792a11b 4678 if (PL_bufptr != PL_bufend
838f2281
BF
4679 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4680 || oldmod == 'F')) {
3280af22
NIS
4681 PL_bufptr += 2;
4682 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4683#ifdef PERL_MAD
4684 if (PL_madskills)
6b29d1f5 4685 PL_thistoken = newSVpvs("\\E");
5db06880 4686#endif
a0d0e21e 4687 }
78cdf107 4688 PL_lex_allbrackets--;
bbf60fe6 4689 return REPORT(')');
79072805 4690 }
52ed07f6
BF
4691 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4692 /* Got an unpaired \E */
4693 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4694 "Useless use of \\E");
52ed07f6 4695 }
5db06880
NC
4696#ifdef PERL_MAD
4697 while (PL_bufptr != PL_bufend &&
4698 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4699 if (!PL_thiswhite)
6b29d1f5 4700 PL_thiswhite = newSVpvs("");
cd81e915 4701 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4702 PL_bufptr += 2;
4703 }
4704#else
3280af22
NIS
4705 if (PL_bufptr != PL_bufend)
4706 PL_bufptr += 2;
5db06880 4707#endif
3280af22 4708 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4709 return yylex();
79072805
LW
4710 }
4711 else {
607df283 4712 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4713 "### Saw case modifier\n"); });
3280af22 4714 s = PL_bufptr + 1;
6e909404 4715 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4716#ifdef PERL_MAD
cd81e915 4717 if (!PL_thiswhite)
6b29d1f5 4718 PL_thiswhite = newSVpvs("");
cd81e915 4719 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4720#endif
89122651 4721 PL_bufptr = s + 3;
6e909404
JH
4722 PL_lex_state = LEX_INTERPCONCAT;
4723 return yylex();
a0d0e21e 4724 }
6e909404 4725 else {
90771dc0 4726 I32 tmp;
5db06880
NC
4727 if (!PL_madskills) /* when just compiling don't need correct */
4728 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4729 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4730 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4731 (strchr(PL_lex_casestack, 'L')
4732 || strchr(PL_lex_casestack, 'U')
4733 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4734 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4735 PL_lex_allbrackets--;
bbf60fe6 4736 return REPORT(')');
6e909404
JH
4737 }
4738 if (PL_lex_casemods > 10)
4739 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4740 PL_lex_casestack[PL_lex_casemods++] = *s;
4741 PL_lex_casestack[PL_lex_casemods] = '\0';
4742 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4743 start_force(PL_curforce);
9ded7720 4744 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4745 force_next((2<<24)|'(');
cd81e915 4746 start_force(PL_curforce);
6e909404 4747 if (*s == 'l')
9ded7720 4748 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4749 else if (*s == 'u')
9ded7720 4750 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4751 else if (*s == 'L')
9ded7720 4752 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4753 else if (*s == 'U')
9ded7720 4754 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4755 else if (*s == 'Q')
9ded7720 4756 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4757 else if (*s == 'F')
4758 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4759 else
5637ef5b 4760 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4761 if (PL_madskills) {
a5849ce5
NC
4762 SV* const tmpsv = newSVpvs("\\ ");
4763 /* replace the space with the character we want to escape
4764 */
4765 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4766 curmad('_', tmpsv);
4767 }
6e909404 4768 PL_bufptr = s + 1;
a0d0e21e 4769 }
79072805 4770 force_next(FUNC);
3280af22
NIS
4771 if (PL_lex_starts) {
4772 s = PL_bufptr;
4773 PL_lex_starts = 0;
5db06880
NC
4774#ifdef PERL_MAD
4775 if (PL_madskills) {
cd81e915
NC
4776 if (PL_thistoken)
4777 sv_free(PL_thistoken);
6b29d1f5 4778 PL_thistoken = newSVpvs("");
5db06880
NC
4779 }
4780#endif
131b3ad0
DM
4781 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4782 if (PL_lex_casemods == 1 && PL_lex_inpat)
4783 OPERATOR(',');
4784 else
4785 Aop(OP_CONCAT);
79072805
LW
4786 }
4787 else
cea2e8a9 4788 return yylex();
79072805
LW
4789 }
4790
55497cff 4791 case LEX_INTERPPUSH:
bbf60fe6 4792 return REPORT(sublex_push());
55497cff 4793
79072805 4794 case LEX_INTERPSTART:
3280af22 4795 if (PL_bufptr == PL_bufend)
bbf60fe6 4796 return REPORT(sublex_done());
9da1dd8f 4797 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
b6007c36 4798 "### Interpolated variable\n"); });
3280af22
NIS
4799 PL_expect = XTERM;
4800 PL_lex_dojoin = (*PL_bufptr == '@');
4801 PL_lex_state = LEX_INTERPNORMAL;
4802 if (PL_lex_dojoin) {
cd81e915 4803 start_force(PL_curforce);
9ded7720 4804 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4805 force_next(',');
cd81e915 4806 start_force(PL_curforce);
a0d0e21e 4807 force_ident("\"", '$');
cd81e915 4808 start_force(PL_curforce);
9ded7720 4809 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4810 force_next('$');
cd81e915 4811 start_force(PL_curforce);
9ded7720 4812 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4813 force_next((2<<24)|'(');
cd81e915 4814 start_force(PL_curforce);
9ded7720 4815 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4816 force_next(FUNC);
4817 }
9da1dd8f
DM
4818 /* Convert (?{...}) and friends to 'do {...}' */
4819 if (PL_lex_inpat && *PL_bufptr == '(') {
3328ab5a 4820 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9da1dd8f
DM
4821 PL_bufptr += 2;
4822 if (*PL_bufptr != '{')
4823 PL_bufptr++;
6165f85b
DM
4824 start_force(PL_curforce);
4825 /* XXX probably need a CURMAD(something) here */
9da1dd8f
DM
4826 PL_expect = XTERMBLOCK;
4827 force_next(DO);
4828 }
4829
3280af22
NIS
4830 if (PL_lex_starts++) {
4831 s = PL_bufptr;
5db06880
NC
4832#ifdef PERL_MAD
4833 if (PL_madskills) {
cd81e915
NC
4834 if (PL_thistoken)
4835 sv_free(PL_thistoken);
6b29d1f5 4836 PL_thistoken = newSVpvs("");
5db06880
NC
4837 }
4838#endif
131b3ad0
DM
4839 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4840 if (!PL_lex_casemods && PL_lex_inpat)
4841 OPERATOR(',');
4842 else
4843 Aop(OP_CONCAT);
79072805 4844 }
cea2e8a9 4845 return yylex();
79072805
LW
4846
4847 case LEX_INTERPENDMAYBE:
3280af22
NIS
4848 if (intuit_more(PL_bufptr)) {
4849 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4850 break;
4851 }
4852 /* FALL THROUGH */
4853
4854 case LEX_INTERPEND:
3280af22
NIS
4855 if (PL_lex_dojoin) {
4856 PL_lex_dojoin = FALSE;
4857 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4858#ifdef PERL_MAD
4859 if (PL_madskills) {
cd81e915
NC
4860 if (PL_thistoken)
4861 sv_free(PL_thistoken);
6b29d1f5 4862 PL_thistoken = newSVpvs("");
5db06880
NC
4863 }
4864#endif
78cdf107 4865 PL_lex_allbrackets--;
bbf60fe6 4866 return REPORT(')');
79072805 4867 }
43a16006 4868 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4869 && SvEVALED(PL_lex_repl))
43a16006 4870 {
e9fa98b2 4871 if (PL_bufptr != PL_bufend)
cea2e8a9 4872 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4873 PL_lex_repl = NULL;
e9fa98b2 4874 }
db444266
FC
4875 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4876 re_eval_str. If the here-doc body’s length equals the previous
4877 value of re_eval_start, re_eval_start will now be null. So
4878 check re_eval_str as well. */
3328ab5a
FC
4879 if (PL_parser->lex_shared->re_eval_start
4880 || PL_parser->lex_shared->re_eval_str) {
db444266 4881 SV *sv;
9da1dd8f
DM
4882 if (*PL_bufptr != ')')
4883 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4884 PL_bufptr++;
4885 /* having compiled a (?{..}) expression, return the original
4886 * text too, as a const */
3328ab5a
FC
4887 if (PL_parser->lex_shared->re_eval_str) {
4888 sv = PL_parser->lex_shared->re_eval_str;
4889 PL_parser->lex_shared->re_eval_str = NULL;
4890 SvCUR_set(sv,
4891 PL_bufptr - PL_parser->lex_shared->re_eval_start);
db444266
FC
4892 SvPV_shrink_to_cur(sv);
4893 }
3328ab5a
FC
4894 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4895 PL_bufptr - PL_parser->lex_shared->re_eval_start);
6165f85b
DM
4896 start_force(PL_curforce);
4897 /* XXX probably need a CURMAD(something) here */
4898 NEXTVAL_NEXTTOKE.opval =
9da1dd8f 4899 (OP*)newSVOP(OP_CONST, 0,
db444266 4900 sv);
9da1dd8f 4901 force_next(THING);
3328ab5a 4902 PL_parser->lex_shared->re_eval_start = NULL;
9da1dd8f
DM
4903 PL_expect = XTERM;
4904 return REPORT(',');
4905 }
4906
79072805
LW
4907 /* FALLTHROUGH */
4908 case LEX_INTERPCONCAT:
4909#ifdef DEBUGGING
3280af22 4910 if (PL_lex_brackets)
5637ef5b
NC
4911 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4912 (long) PL_lex_brackets);
79072805 4913#endif
3280af22 4914 if (PL_bufptr == PL_bufend)
bbf60fe6 4915 return REPORT(sublex_done());
79072805 4916
9da1dd8f
DM
4917 /* m'foo' still needs to be parsed for possible (?{...}) */
4918 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
3280af22 4919 SV *sv = newSVsv(PL_linestr);
9da1dd8f 4920 sv = tokeq(sv);
6154021b 4921 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4922 s = PL_bufend;
79072805
LW
4923 }
4924 else {
3280af22 4925 s = scan_const(PL_bufptr);
79072805 4926 if (*s == '\\')
3280af22 4927 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4928 else
3280af22 4929 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4930 }
4931
3280af22 4932 if (s != PL_bufptr) {
cd81e915 4933 start_force(PL_curforce);
5db06880
NC
4934 if (PL_madskills) {
4935 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4936 }
6154021b 4937 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4938 PL_expect = XTERM;
79072805 4939 force_next(THING);
131b3ad0 4940 if (PL_lex_starts++) {
5db06880
NC
4941#ifdef PERL_MAD
4942 if (PL_madskills) {
cd81e915
NC
4943 if (PL_thistoken)
4944 sv_free(PL_thistoken);
6b29d1f5 4945 PL_thistoken = newSVpvs("");
5db06880
NC
4946 }
4947#endif
131b3ad0
DM
4948 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4949 if (!PL_lex_casemods && PL_lex_inpat)
4950 OPERATOR(',');
4951 else
4952 Aop(OP_CONCAT);
4953 }
79072805 4954 else {
3280af22 4955 PL_bufptr = s;
cea2e8a9 4956 return yylex();
79072805
LW
4957 }
4958 }
4959
cea2e8a9 4960 return yylex();
a0d0e21e 4961 case LEX_FORMLINE:
3280af22
NIS
4962 s = scan_formline(PL_bufptr);
4963 if (!PL_lex_formbrack)
7c70caa5 4964 {
705fe0e5 4965 formbrack = 1;
a0d0e21e 4966 goto rightbracket;
7c70caa5 4967 }
705fe0e5
FC
4968 PL_bufptr = s;
4969 return yylex();
79072805
LW
4970 }
4971
3280af22
NIS
4972 s = PL_bufptr;
4973 PL_oldoldbufptr = PL_oldbufptr;
4974 PL_oldbufptr = s;
463ee0b2
LW
4975
4976 retry:
5db06880 4977#ifdef PERL_MAD
cd81e915
NC
4978 if (PL_thistoken) {
4979 sv_free(PL_thistoken);
4980 PL_thistoken = 0;
5db06880 4981 }
cd81e915 4982 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4983#endif
378cc40b
LW
4984 switch (*s) {
4985 default:
7e2040f0 4986 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4987 goto keylookup;
b1fc3636 4988 {
e2f06df0
BF
4989 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4990 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4991 UTF8SKIP(s),
4992 SVs_TEMP | SVf_UTF8),
4993 10, UNI_DISPLAY_ISPRINT))
4994 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
b1fc3636
CJ
4995 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4996 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4997 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4998 } else {
4999 d = PL_linestart;
5000 }
5001 *s = '\0';
e2f06df0
BF
5002 sv_setpv(dsv, d);
5003 if (UTF)
5004 SvUTF8_on(dsv);
5005 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
b1fc3636 5006 }
e929a76b
LW
5007 case 4:
5008 case 26:
5009 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 5010 case 0:
5db06880
NC
5011#ifdef PERL_MAD
5012 if (PL_madskills)
cd81e915 5013 PL_faketokens = 0;
5db06880 5014#endif
60d63348 5015 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
5016 PL_last_uni = 0;
5017 PL_last_lop = 0;
a7aaec61
Z
5018 if (PL_lex_brackets &&
5019 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
5020 yyerror((const char *)
5021 (PL_lex_formbrack
5022 ? "Format not terminated"
5023 : "Missing right curly or square bracket"));
c5ee2135 5024 }
4e553d73 5025 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 5026 "### Tokener got EOF\n");
5f80b19c 5027 } );
79072805 5028 TOKEN(0);
463ee0b2 5029 }
3280af22 5030 if (s++ < PL_bufend)
a687059c 5031 goto retry; /* ignore stray nulls */
3280af22
NIS
5032 PL_last_uni = 0;
5033 PL_last_lop = 0;
5034 if (!PL_in_eval && !PL_preambled) {
5035 PL_preambled = TRUE;
5db06880
NC
5036#ifdef PERL_MAD
5037 if (PL_madskills)
cd81e915 5038 PL_faketokens = 1;
5db06880 5039#endif
5ab7ff98
NC
5040 if (PL_perldb) {
5041 /* Generate a string of Perl code to load the debugger.
5042 * If PERL5DB is set, it will return the contents of that,
5043 * otherwise a compile-time require of perl5db.pl. */
5044
5045 const char * const pdb = PerlEnv_getenv("PERL5DB");
5046
5047 if (pdb) {
5048 sv_setpv(PL_linestr, pdb);
5049 sv_catpvs(PL_linestr,";");
5050 } else {
5051 SETERRNO(0,SS_NORMAL);
5052 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
5053 }
5054 } else
5055 sv_setpvs(PL_linestr,"");
c62eb204
NC
5056 if (PL_preambleav) {
5057 SV **svp = AvARRAY(PL_preambleav);
5058 SV **const end = svp + AvFILLp(PL_preambleav);
5059 while(svp <= end) {
5060 sv_catsv(PL_linestr, *svp);
5061 ++svp;
396482e1 5062 sv_catpvs(PL_linestr, ";");
91b7def8 5063 }
daba3364 5064 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 5065 PL_preambleav = NULL;
91b7def8 5066 }
9f639728
FR
5067 if (PL_minus_E)
5068 sv_catpvs(PL_linestr,
5069 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 5070 if (PL_minus_n || PL_minus_p) {
f0e67a1d 5071 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 5072 if (PL_minus_l)
396482e1 5073 sv_catpvs(PL_linestr,"chomp;");
3280af22 5074 if (PL_minus_a) {
3280af22 5075 if (PL_minus_F) {
3792a11b
NC
5076 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
5077 || *PL_splitstr == '"')
3280af22 5078 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 5079 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 5080 else {
c8ef6a4b
NC
5081 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
5082 bytes can be used as quoting characters. :-) */
dd374669 5083 const char *splits = PL_splitstr;
91d456ae 5084 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
5085 do {
5086 /* Need to \ \s */
dd374669
AL
5087 if (*splits == '\\')
5088 sv_catpvn(PL_linestr, splits, 1);
5089 sv_catpvn(PL_linestr, splits, 1);
5090 } while (*splits++);
48c4c863
NC
5091 /* This loop will embed the trailing NUL of
5092 PL_linestr as the last thing it does before
5093 terminating. */
396482e1 5094 sv_catpvs(PL_linestr, ");");
54310121 5095 }
2304df62
AD
5096 }
5097 else
396482e1 5098 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 5099 }
79072805 5100 }
396482e1 5101 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
5102 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5103 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5104 PL_last_lop = PL_last_uni = NULL;
65269a95 5105 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 5106 update_debugger_info(PL_linestr, NULL, 0);
79072805 5107 goto retry;
a687059c 5108 }
e929a76b 5109 do {
580561a3
Z
5110 fake_eof = 0;
5111 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 5112 if (0) {
7e28d3af 5113 fake_eof:
f0e67a1d
Z
5114 fake_eof = LEX_FAKE_EOF;
5115 }
5116 PL_bufptr = PL_bufend;
83944c01 5117 COPLINE_INC_WITH_HERELINES;
f0e67a1d 5118 if (!lex_next_chunk(fake_eof)) {
17cc9359 5119 CopLINE_dec(PL_curcop);
f0e67a1d
Z
5120 s = PL_bufptr;
5121 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
5122 }
17cc9359 5123 CopLINE_dec(PL_curcop);
5db06880 5124#ifdef PERL_MAD
f0e67a1d 5125 if (!PL_rsfp)
cd81e915 5126 PL_realtokenstart = -1;
5db06880 5127#endif
f0e67a1d 5128 s = PL_bufptr;
7aa207d6
JH
5129 /* If it looks like the start of a BOM or raw UTF-16,
5130 * check if it in fact is. */
580561a3 5131 if (bof && PL_rsfp &&
7aa207d6
JH
5132 (*s == 0 ||
5133 *(U8*)s == 0xEF ||
5134 *(U8*)s >= 0xFE ||
5135 s[1] == 0)) {
879bc93b
DM
5136 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
5137 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
5138#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5139 /* offset may include swallowed CR */
5140 if (!bof)
879bc93b 5141 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 5142#endif
7e28d3af 5143 if (bof) {
3280af22 5144 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 5145 s = swallow_bom((U8*)s);
e929a76b 5146 }
378cc40b 5147 }
737c24fc 5148 if (PL_parser->in_pod) {
a0d0e21e 5149 /* Incest with pod. */
5db06880
NC
5150#ifdef PERL_MAD
5151 if (PL_madskills)
cd81e915 5152 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 5153#endif
01a57ef7 5154 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 5155 sv_setpvs(PL_linestr, "");
3280af22
NIS
5156 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5157 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5158 PL_last_lop = PL_last_uni = NULL;
737c24fc 5159 PL_parser->in_pod = 0;
a0d0e21e 5160 }
4e553d73 5161 }
60d63348 5162 if (PL_rsfp || PL_parser->filtered)
85613cab 5163 incline(s);
737c24fc 5164 } while (PL_parser->in_pod);
3280af22 5165 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 5166 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5167 PL_last_lop = PL_last_uni = NULL;
57843af0 5168 if (CopLINE(PL_curcop) == 1) {
3280af22 5169 while (s < PL_bufend && isSPACE(*s))
79072805 5170 s++;
a0d0e21e 5171 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 5172 s++;
5db06880
NC
5173#ifdef PERL_MAD
5174 if (PL_madskills)
cd81e915 5175 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 5176#endif
bd61b366 5177 d = NULL;
3280af22 5178 if (!PL_in_eval) {
44a8e56a 5179 if (*s == '#' && *(s+1) == '!')
5180 d = s + 2;
5181#ifdef ALTERNATE_SHEBANG
5182 else {
bfed75c6 5183 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 5184 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5185 d = s + (sizeof(as) - 1);
5186 }
5187#endif /* ALTERNATE_SHEBANG */
5188 }
5189 if (d) {
b8378b72 5190 char *ipath;
774d564b 5191 char *ipathend;
b8378b72 5192
774d564b 5193 while (isSPACE(*d))
b8378b72
CS
5194 d++;
5195 ipath = d;
774d564b 5196 while (*d && !isSPACE(*d))
5197 d++;
5198 ipathend = d;
5199
5200#ifdef ARG_ZERO_IS_SCRIPT
5201 if (ipathend > ipath) {
5202 /*
5203 * HP-UX (at least) sets argv[0] to the script name,
5204 * which makes $^X incorrect. And Digital UNIX and Linux,
5205 * at least, set argv[0] to the basename of the Perl
5206 * interpreter. So, having found "#!", we'll set it right.
5207 */
fafc274c
NC
5208 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5209 SVt_PV)); /* $^X */
774d564b 5210 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 5211 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 5212 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 5213 SvSETMAGIC(x);
5214 }
556c1dec
JH
5215 else {
5216 STRLEN blen;
5217 STRLEN llen;
cfd0369c 5218 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 5219 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
5220 if (llen < blen) {
5221 bstart += blen - llen;
5222 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5223 sv_setpvn(x, ipath, ipathend - ipath);
5224 SvSETMAGIC(x);
5225 }
5226 }
5227 }
774d564b 5228 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 5229 }
774d564b 5230#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
5231
5232 /*
5233 * Look for options.
5234 */
748a9306 5235 d = instr(s,"perl -");
84e30d1a 5236 if (!d) {
748a9306 5237 d = instr(s,"perl");
84e30d1a
GS
5238#if defined(DOSISH)
5239 /* avoid getting into infinite loops when shebang
5240 * line contains "Perl" rather than "perl" */
5241 if (!d) {
5242 for (d = ipathend-4; d >= ipath; --d) {
5243 if ((*d == 'p' || *d == 'P')
5244 && !ibcmp(d, "perl", 4))
5245 {
5246 break;
5247 }
5248 }
5249 if (d < ipath)
bd61b366 5250 d = NULL;
84e30d1a
GS
5251 }
5252#endif
5253 }
44a8e56a 5254#ifdef ALTERNATE_SHEBANG
5255 /*
5256 * If the ALTERNATE_SHEBANG on this system starts with a
5257 * character that can be part of a Perl expression, then if
5258 * we see it but not "perl", we're probably looking at the
5259 * start of Perl code, not a request to hand off to some
5260 * other interpreter. Similarly, if "perl" is there, but
5261 * not in the first 'word' of the line, we assume the line
5262 * contains the start of the Perl program.
44a8e56a 5263 */
5264 if (d && *s != '#') {
f54cb97a 5265 const char *c = ipath;
44a8e56a 5266 while (*c && !strchr("; \t\r\n\f\v#", *c))
5267 c++;
5268 if (c < d)
bd61b366 5269 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5270 else
5271 *s = '#'; /* Don't try to parse shebang line */
5272 }
774d564b 5273#endif /* ALTERNATE_SHEBANG */
748a9306 5274 if (!d &&
44a8e56a 5275 *s == '#' &&
774d564b 5276 ipathend > ipath &&
3280af22 5277 !PL_minus_c &&
748a9306 5278 !instr(s,"indir") &&
3280af22 5279 instr(PL_origargv[0],"perl"))
748a9306 5280 {
27da23d5 5281 dVAR;
9f68db38 5282 char **newargv;
9f68db38 5283
774d564b 5284 *ipathend = '\0';
5285 s = ipathend + 1;
3280af22 5286 while (s < PL_bufend && isSPACE(*s))
9f68db38 5287 s++;
3280af22 5288 if (s < PL_bufend) {
d85f917e 5289 Newx(newargv,PL_origargc+3,char*);
9f68db38 5290 newargv[1] = s;
3280af22 5291 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5292 s++;
5293 *s = '\0';
3280af22 5294 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5295 }
5296 else
3280af22 5297 newargv = PL_origargv;
774d564b 5298 newargv[0] = ipath;
b35112e7 5299 PERL_FPU_PRE_EXEC
b4748376 5300 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5301 PERL_FPU_POST_EXEC
cea2e8a9 5302 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5303 }
748a9306 5304 if (d) {
c35e046a
AL
5305 while (*d && !isSPACE(*d))
5306 d++;
5307 while (SPACE_OR_TAB(*d))
5308 d++;
748a9306
LW
5309
5310 if (*d++ == '-') {
f54cb97a 5311 const bool switches_done = PL_doswitches;
fb993905
GA
5312 const U32 oldpdb = PL_perldb;
5313 const bool oldn = PL_minus_n;
5314 const bool oldp = PL_minus_p;
c7030b81 5315 const char *d1 = d;
fb993905 5316
8cc95fdb 5317 do {
4ba71d51
FC
5318 bool baduni = FALSE;
5319 if (*d1 == 'C') {
bd0ab00d
NC
5320 const char *d2 = d1 + 1;
5321 if (parse_unicode_opts((const char **)&d2)
5322 != PL_unicode)
5323 baduni = TRUE;
4ba71d51
FC
5324 }
5325 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5326 const char * const m = d1;
5327 while (*d1 && !isSPACE(*d1))
5328 d1++;
cea2e8a9 5329 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5330 (int)(d1 - m), m);
8cc95fdb 5331 }
c7030b81
NC
5332 d1 = moreswitches(d1);
5333 } while (d1);
f0b2cf55
YST
5334 if (PL_doswitches && !switches_done) {
5335 int argc = PL_origargc;
5336 char **argv = PL_origargv;
5337 do {
5338 argc--,argv++;
5339 } while (argc && argv[0][0] == '-' && argv[0][1]);
5340 init_argv_symbols(argc,argv);
5341 }
65269a95 5342 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5343 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5344 /* if we have already added "LINE: while (<>) {",
5345 we must not do it again */
748a9306 5346 {
76f68e9b 5347 sv_setpvs(PL_linestr, "");
3280af22
NIS
5348 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5349 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5350 PL_last_lop = PL_last_uni = NULL;
3280af22 5351 PL_preambled = FALSE;
65269a95 5352 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5353 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5354 goto retry;
5355 }
a0d0e21e 5356 }
79072805 5357 }
9f68db38 5358 }
79072805 5359 }
3280af22 5360 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5361 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5362 start_force(PL_curforce);
5363 NEXTVAL_NEXTTOKE.ival = 0;
5364 force_next(FORMRBRACK);
5365 TOKEN(';');
ae986130 5366 }
378cc40b 5367 goto retry;
4fdae800 5368 case '\r':
6a27c188 5369#ifdef PERL_STRICT_CR
cea2e8a9 5370 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5371 Perl_croak(aTHX_
cc507455 5372 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5373#endif
4fdae800 5374 case ' ': case '\t': case '\f': case 013:
5db06880 5375#ifdef PERL_MAD
cd81e915 5376 PL_realtokenstart = -1;
ac372eb8
RD
5377 if (!PL_thiswhite)
5378 PL_thiswhite = newSVpvs("");
5379 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5380#endif
ac372eb8 5381 s++;
378cc40b 5382 goto retry;
378cc40b 5383 case '#':
e929a76b 5384 case '\n':
5db06880 5385#ifdef PERL_MAD
cd81e915 5386 PL_realtokenstart = -1;
5db06880 5387 if (PL_madskills)
cd81e915 5388 PL_faketokens = 0;
5db06880 5389#endif
60d63348 5390 if (PL_lex_state != LEX_NORMAL ||
62e4c90a
FC
5391 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5392 if (*s == '#' && s == PL_linestart && PL_in_eval
60d63348 5393 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5394 /* handle eval qq[#line 1 "foo"\n ...] */
5395 CopLINE_dec(PL_curcop);
5396 incline(s);
5397 }
5db06880
NC
5398 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5399 s = SKIPSPACE0(s);
62e4c90a 5400 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5401 incline(s);
5402 }
5403 else {
9c74ccc9 5404 const bool in_comment = *s == '#';
5db06880
NC
5405 d = s;
5406 while (d < PL_bufend && *d != '\n')
5407 d++;
5408 if (d < PL_bufend)
5409 d++;
5410 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5411 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5412 d, PL_bufend);
5db06880
NC
5413#ifdef PERL_MAD
5414 if (PL_madskills)
cd81e915 5415 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5416#endif
5417 s = d;
9c74ccc9
FC
5418 if (in_comment && d == PL_bufend
5419 && PL_lex_state == LEX_INTERPNORMAL
90a536e1 5420 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
9c74ccc9
FC
5421 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5422 else incline(s);
5db06880 5423 }
3280af22 5424 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5425 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5426 start_force(PL_curforce);
5427 NEXTVAL_NEXTTOKE.ival = 0;
5428 force_next(FORMRBRACK);
5429 TOKEN(';');
a687059c 5430 }
378cc40b 5431 }
a687059c 5432 else {
5db06880
NC
5433#ifdef PERL_MAD
5434 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5435 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5436 PL_faketokens = 0;
5db06880
NC
5437 s = SKIPSPACE0(s);
5438 TOKEN(PEG); /* make sure any #! line is accessible */
5439 }
5440 s = SKIPSPACE0(s);
5441 }
5442 else {
5443/* if (PL_madskills && PL_lex_formbrack) { */
5444 d = s;
5445 while (d < PL_bufend && *d != '\n')
5446 d++;
5447 if (d < PL_bufend)
5448 d++;
5449 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5450 Perl_croak(aTHX_ "panic: input overflow");
5451 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5452 if (!PL_thiswhite)
6b29d1f5 5453 PL_thiswhite = newSVpvs("");
5db06880 5454 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5455 sv_setpvs(PL_thiswhite, "");
cd81e915 5456 PL_faketokens = 0;
5db06880 5457 }
cd81e915 5458 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5459 }
5460 s = d;
5461/* }
5462 *s = '\0';
5463 PL_bufend = s; */
5464 }
5465#else
378cc40b 5466 *s = '\0';
3280af22 5467 PL_bufend = s;
5db06880 5468#endif
a687059c 5469 }
378cc40b
LW
5470 goto retry;
5471 case '-':
79072805 5472 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5473 I32 ftst = 0;
90771dc0 5474 char tmp;
e5edeb50 5475
378cc40b 5476 s++;
3280af22 5477 PL_bufptr = s;
748a9306
LW
5478 tmp = *s++;
5479
bf4acbe4 5480 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5481 s++;
5482
5483 if (strnEQ(s,"=>",2)) {
3280af22 5484 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5485 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5486 OPERATOR('-'); /* unary minus */
5487 }
3280af22 5488 PL_last_uni = PL_oldbufptr;
748a9306 5489 switch (tmp) {
e5edeb50
JH
5490 case 'r': ftst = OP_FTEREAD; break;
5491 case 'w': ftst = OP_FTEWRITE; break;
5492 case 'x': ftst = OP_FTEEXEC; break;
5493 case 'o': ftst = OP_FTEOWNED; break;
5494 case 'R': ftst = OP_FTRREAD; break;
5495 case 'W': ftst = OP_FTRWRITE; break;
5496 case 'X': ftst = OP_FTREXEC; break;
5497 case 'O': ftst = OP_FTROWNED; break;
5498 case 'e': ftst = OP_FTIS; break;
5499 case 'z': ftst = OP_FTZERO; break;
5500 case 's': ftst = OP_FTSIZE; break;
5501 case 'f': ftst = OP_FTFILE; break;
5502 case 'd': ftst = OP_FTDIR; break;
5503 case 'l': ftst = OP_FTLINK; break;
5504 case 'p': ftst = OP_FTPIPE; break;
5505 case 'S': ftst = OP_FTSOCK; break;
5506 case 'u': ftst = OP_FTSUID; break;
5507 case 'g': ftst = OP_FTSGID; break;
5508 case 'k': ftst = OP_FTSVTX; break;
5509 case 'b': ftst = OP_FTBLK; break;
5510 case 'c': ftst = OP_FTCHR; break;
5511 case 't': ftst = OP_FTTTY; break;
5512 case 'T': ftst = OP_FTTEXT; break;
5513 case 'B': ftst = OP_FTBINARY; break;
5514 case 'M': case 'A': case 'C':
fafc274c 5515 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5516 switch (tmp) {
5517 case 'M': ftst = OP_FTMTIME; break;
5518 case 'A': ftst = OP_FTATIME; break;
5519 case 'C': ftst = OP_FTCTIME; break;
5520 default: break;
5521 }
5522 break;
378cc40b 5523 default:
378cc40b
LW
5524 break;
5525 }
e5edeb50 5526 if (ftst) {
eb160463 5527 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5528 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5529 "### Saw file test %c\n", (int)tmp);
5f80b19c 5530 } );
e5edeb50
JH
5531 FTST(ftst);
5532 }
5533 else {
5534 /* Assume it was a minus followed by a one-letter named
5535 * subroutine call (or a -bareword), then. */
95c31fe3 5536 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5537 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5538 (int) tmp);
5f80b19c 5539 } );
3cf7b4c4 5540 s = --PL_bufptr;
e5edeb50 5541 }
378cc40b 5542 }
90771dc0
NC
5543 {
5544 const char tmp = *s++;
5545 if (*s == tmp) {
5546 s++;
5547 if (PL_expect == XOPERATOR)
5548 TERM(POSTDEC);
5549 else
5550 OPERATOR(PREDEC);
5551 }
5552 else if (*s == '>') {
5553 s++;
29595ff2 5554 s = SKIPSPACE1(s);
90771dc0
NC
5555 if (isIDFIRST_lazy_if(s,UTF)) {
5556 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5557 TOKEN(ARROW);
5558 }
5559 else if (*s == '$')
5560 OPERATOR(ARROW);
5561 else
5562 TERM(ARROW);
5563 }
78cdf107
Z
5564 if (PL_expect == XOPERATOR) {
5565 if (*s == '=' && !PL_lex_allbrackets &&
5566 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5567 s--;
5568 TOKEN(0);
5569 }
90771dc0 5570 Aop(OP_SUBTRACT);
78cdf107 5571 }
90771dc0
NC
5572 else {
5573 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5574 check_uni();
5575 OPERATOR('-'); /* unary minus */
79072805 5576 }
2f3197b3 5577 }
79072805 5578
378cc40b 5579 case '+':
90771dc0
NC
5580 {
5581 const char tmp = *s++;
5582 if (*s == tmp) {
5583 s++;
5584 if (PL_expect == XOPERATOR)
5585 TERM(POSTINC);
5586 else
5587 OPERATOR(PREINC);
5588 }
78cdf107
Z
5589 if (PL_expect == XOPERATOR) {
5590 if (*s == '=' && !PL_lex_allbrackets &&
5591 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5592 s--;
5593 TOKEN(0);
5594 }
90771dc0 5595 Aop(OP_ADD);
78cdf107 5596 }
90771dc0
NC
5597 else {
5598 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5599 check_uni();
5600 OPERATOR('+');
5601 }
2f3197b3 5602 }
a687059c 5603
378cc40b 5604 case '*':
3280af22
NIS
5605 if (PL_expect != XOPERATOR) {
5606 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5607 PL_expect = XOPERATOR;
5608 force_ident(PL_tokenbuf, '*');
5609 if (!*PL_tokenbuf)
a0d0e21e 5610 PREREF('*');
79072805 5611 TERM('*');
a687059c 5612 }
79072805
LW
5613 s++;
5614 if (*s == '*') {
a687059c 5615 s++;
78cdf107
Z
5616 if (*s == '=' && !PL_lex_allbrackets &&
5617 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5618 s -= 2;
5619 TOKEN(0);
5620 }
79072805 5621 PWop(OP_POW);
a687059c 5622 }
78cdf107
Z
5623 if (*s == '=' && !PL_lex_allbrackets &&
5624 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5625 s--;
5626 TOKEN(0);
5627 }
79072805
LW
5628 Mop(OP_MULTIPLY);
5629
378cc40b 5630 case '%':
3280af22 5631 if (PL_expect == XOPERATOR) {
78cdf107
Z
5632 if (s[1] == '=' && !PL_lex_allbrackets &&
5633 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5634 TOKEN(0);
bbce6d69 5635 ++s;
5636 Mop(OP_MODULO);
a687059c 5637 }
3280af22 5638 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5639 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5640 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5641 if (!PL_tokenbuf[1]) {
bbce6d69 5642 PREREF('%');
a687059c 5643 }
60ac52eb
FC
5644 PL_expect = XOPERATOR;
5645 force_ident_maybe_lex('%');
bbce6d69 5646 TERM('%');
a687059c 5647
378cc40b 5648 case '^':
78cdf107
Z
5649 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5650 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5651 TOKEN(0);
79072805 5652 s++;
a0d0e21e 5653 BOop(OP_BIT_XOR);
79072805 5654 case '[':
a7aaec61
Z
5655 if (PL_lex_brackets > 100)
5656 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5657 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5658 PL_lex_allbrackets++;
df3467db
IG
5659 {
5660 const char tmp = *s++;
5661 OPERATOR(tmp);
5662 }
378cc40b 5663 case '~':
0d863452 5664 if (s[1] == '~'
3e7dd34d 5665 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5666 {
78cdf107
Z
5667 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5668 TOKEN(0);
0d863452
RH
5669 s += 2;
5670 Eop(OP_SMARTMATCH);
5671 }
78cdf107
Z
5672 s++;
5673 OPERATOR('~');
378cc40b 5674 case ',':
78cdf107
Z
5675 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5676 TOKEN(0);
5677 s++;
5678 OPERATOR(',');
a0d0e21e
LW
5679 case ':':
5680 if (s[1] == ':') {
5681 len = 0;
0bfa2a8a 5682 goto just_a_word_zero_gv;
a0d0e21e
LW
5683 }
5684 s++;
09bef843
SB
5685 switch (PL_expect) {
5686 OP *attrs;
5db06880
NC
5687#ifdef PERL_MAD
5688 I32 stuffstart;
5689#endif
09bef843
SB
5690 case XOPERATOR:
5691 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5692 break;
5693 PL_bufptr = s; /* update in case we back off */
d83f38d8 5694 if (*s == '=') {
2dc78664
NC
5695 Perl_croak(aTHX_
5696 "Use of := for an empty attribute list is not allowed");
d83f38d8 5697 }
09bef843
SB
5698 goto grabattrs;
5699 case XATTRBLOCK:
5700 PL_expect = XBLOCK;
5701 goto grabattrs;
5702 case XATTRTERM:
5703 PL_expect = XTERMBLOCK;
5704 grabattrs:
5db06880
NC
5705#ifdef PERL_MAD
5706 stuffstart = s - SvPVX(PL_linestr) - 1;
5707#endif
29595ff2 5708 s = PEEKSPACE(s);
5f66b61c 5709 attrs = NULL;
7e2040f0 5710 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5711 I32 tmp;
5cc237b8 5712 SV *sv;
09bef843 5713 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5714 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5715 if (tmp < 0) tmp = -tmp;
5716 switch (tmp) {
5717 case KEY_or:
5718 case KEY_and:
5719 case KEY_for:
11baf631 5720 case KEY_foreach:
f9829d6b
GS
5721 case KEY_unless:
5722 case KEY_if:
5723 case KEY_while:
5724 case KEY_until:
5725 goto got_attrs;
5726 default:
5727 break;
5728 }
5729 }
89a5757c 5730 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843 5731 if (*d == '(') {
d24ca0c5 5732 d = scan_str(d,TRUE,TRUE,FALSE);
09bef843 5733 if (!d) {
09bef843
SB
5734 /* MUST advance bufptr here to avoid bogus
5735 "at end of line" context messages from yyerror().
5736 */
5737 PL_bufptr = s + len;
5738 yyerror("Unterminated attribute parameter in attribute list");
5739 if (attrs)
5740 op_free(attrs);
5cc237b8 5741 sv_free(sv);
bbf60fe6 5742 return REPORT(0); /* EOF indicator */
09bef843
SB
5743 }
5744 }
5745 if (PL_lex_stuff) {
09bef843 5746 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5747 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5748 newSVOP(OP_CONST, 0, sv));
5749 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5750 PL_lex_stuff = NULL;
09bef843
SB
5751 }
5752 else {
5cc237b8
BS
5753 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5754 sv_free(sv);
1108974d 5755 if (PL_in_my == KEY_our) {
df9a6019 5756 deprecate(":unique");
1108974d 5757 }
bfed75c6 5758 else
371fce9b
DM
5759 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5760 }
5761
d3cea301
SB
5762 /* NOTE: any CV attrs applied here need to be part of
5763 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5764 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5765 sv_free(sv);
78f9721b 5766 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5767 }
5768 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5769 sv_free(sv);
8e5dadda 5770 deprecate(":locked");
5cc237b8
BS
5771 }
5772 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5773 sv_free(sv);
78f9721b 5774 CvMETHOD_on(PL_compcv);
5cc237b8 5775 }
78f9721b
SM
5776 /* After we've set the flags, it could be argued that
5777 we don't need to do the attributes.pm-based setting
5778 process, and shouldn't bother appending recognized
d3cea301
SB
5779 flags. To experiment with that, uncomment the
5780 following "else". (Note that's already been
5781 uncommented. That keeps the above-applied built-in
5782 attributes from being intercepted (and possibly
5783 rejected) by a package's attribute routines, but is
5784 justified by the performance win for the common case
5785 of applying only built-in attributes.) */
0256094b 5786 else
2fcb4757 5787 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5788 newSVOP(OP_CONST, 0,
5cc237b8 5789 sv));
09bef843 5790 }
29595ff2 5791 s = PEEKSPACE(d);
0120eecf 5792 if (*s == ':' && s[1] != ':')
29595ff2 5793 s = PEEKSPACE(s+1);
0120eecf
GS
5794 else if (s == d)
5795 break; /* require real whitespace or :'s */
29595ff2 5796 /* XXX losing whitespace on sequential attributes here */
09bef843 5797 }
90771dc0
NC
5798 {
5799 const char tmp
5800 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5801 if (*s != ';' && *s != '}' && *s != tmp
5802 && (tmp != '=' || *s != ')')) {
5803 const char q = ((*s == '\'') ? '"' : '\'');
5804 /* If here for an expression, and parsed no attrs, back
5805 off. */
5806 if (tmp == '=' && !attrs) {
5807 s = PL_bufptr;
5808 break;
5809 }
5810 /* MUST advance bufptr here to avoid bogus "at end of line"
5811 context messages from yyerror().
5812 */
5813 PL_bufptr = s;
10edeb5d
JH
5814 yyerror( (const char *)
5815 (*s
5816 ? Perl_form(aTHX_ "Invalid separator character "
5817 "%c%c%c in attribute list", q, *s, q)
5818 : "Unterminated attribute list" ) );
90771dc0
NC
5819 if (attrs)
5820 op_free(attrs);
5821 OPERATOR(':');
09bef843 5822 }
09bef843 5823 }
f9829d6b 5824 got_attrs:
09bef843 5825 if (attrs) {
cd81e915 5826 start_force(PL_curforce);
9ded7720 5827 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5828 CURMAD('_', PL_nextwhite);
89122651 5829 force_next(THING);
5db06880
NC
5830 }
5831#ifdef PERL_MAD
5832 if (PL_madskills) {
cd81e915 5833 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5834 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5835 }
5db06880 5836#endif
09bef843
SB
5837 TOKEN(COLONATTR);
5838 }
78cdf107
Z
5839 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5840 s--;
5841 TOKEN(0);
5842 }
5843 PL_lex_allbrackets--;
a0d0e21e 5844 OPERATOR(':');
8990e307
LW
5845 case '(':
5846 s++;
3280af22
NIS
5847 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5848 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5849 else
3280af22 5850 PL_expect = XTERM;
29595ff2 5851 s = SKIPSPACE1(s);
78cdf107 5852 PL_lex_allbrackets++;
a0d0e21e 5853 TOKEN('(');
378cc40b 5854 case ';':
78cdf107
Z
5855 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5856 TOKEN(0);
f4dd75d9 5857 CLINE;
78cdf107
Z
5858 s++;
5859 OPERATOR(';');
378cc40b 5860 case ')':
78cdf107
Z
5861 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5862 TOKEN(0);
5863 s++;
5864 PL_lex_allbrackets--;
5865 s = SKIPSPACE1(s);
5866 if (*s == '{')
5867 PREBLOCK(')');
5868 TERM(')');
79072805 5869 case ']':
a7aaec61
Z
5870 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5871 TOKEN(0);
79072805 5872 s++;
3280af22 5873 if (PL_lex_brackets <= 0)
d98d5fff 5874 yyerror("Unmatched right square bracket");
463ee0b2 5875 else
3280af22 5876 --PL_lex_brackets;
78cdf107 5877 PL_lex_allbrackets--;
3280af22
NIS
5878 if (PL_lex_state == LEX_INTERPNORMAL) {
5879 if (PL_lex_brackets == 0) {
02255c60
FC
5880 if (*s == '-' && s[1] == '>')
5881 PL_lex_state = LEX_INTERPENDMAYBE;
5882 else if (*s != '[' && *s != '{')
3280af22 5883 PL_lex_state = LEX_INTERPEND;
79072805
LW
5884 }
5885 }
4633a7c4 5886 TERM(']');
79072805 5887 case '{':
79072805 5888 s++;
eaf6a13d 5889 leftbracket:
3280af22 5890 if (PL_lex_brackets > 100) {
8edd5f42 5891 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5892 }
3280af22 5893 switch (PL_expect) {
a0d0e21e 5894 case XTERM:
819b004e 5895 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5896 PL_lex_allbrackets++;
79072805 5897 OPERATOR(HASHBRACK);
a0d0e21e 5898 case XOPERATOR:
bf4acbe4 5899 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5900 s++;
44a8e56a 5901 d = s;
3280af22
NIS
5902 PL_tokenbuf[0] = '\0';
5903 if (d < PL_bufend && *d == '-') {
5904 PL_tokenbuf[0] = '-';
44a8e56a 5905 d++;
bf4acbe4 5906 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5907 d++;
5908 }
7e2040f0 5909 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5910 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5911 FALSE, &len);
bf4acbe4 5912 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5913 d++;
5914 if (*d == '}') {
f54cb97a 5915 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5916 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5917 if (minus)
5918 force_next('-');
748a9306
LW
5919 }
5920 }
5921 /* FALL THROUGH */
09bef843 5922 case XATTRBLOCK:
748a9306 5923 case XBLOCK:
3280af22 5924 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5925 PL_lex_allbrackets++;
3280af22 5926 PL_expect = XSTATE;
a0d0e21e 5927 break;
09bef843 5928 case XATTRTERM:
a0d0e21e 5929 case XTERMBLOCK:
3280af22 5930 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5931 PL_lex_allbrackets++;
3280af22 5932 PL_expect = XSTATE;
a0d0e21e
LW
5933 break;
5934 default: {
f54cb97a 5935 const char *t;
3280af22
NIS
5936 if (PL_oldoldbufptr == PL_last_lop)
5937 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5938 else
3280af22 5939 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5940 PL_lex_allbrackets++;
29595ff2 5941 s = SKIPSPACE1(s);
8452ff4b
SB
5942 if (*s == '}') {
5943 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5944 PL_expect = XTERM;
5945 /* This hack is to get the ${} in the message. */
5946 PL_bufptr = s+1;
5947 yyerror("syntax error");
5948 break;
5949 }
a0d0e21e 5950 OPERATOR(HASHBRACK);
8452ff4b 5951 }
b8a4b1be
GS
5952 /* This hack serves to disambiguate a pair of curlies
5953 * as being a block or an anon hash. Normally, expectation
5954 * determines that, but in cases where we're not in a
5955 * position to expect anything in particular (like inside
5956 * eval"") we have to resolve the ambiguity. This code
5957 * covers the case where the first term in the curlies is a
5958 * quoted string. Most other cases need to be explicitly
a0288114 5959 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5960 * curly in order to force resolution as an anon hash.
5961 *
5962 * XXX should probably propagate the outer expectation
5963 * into eval"" to rely less on this hack, but that could
5964 * potentially break current behavior of eval"".
5965 * GSAR 97-07-21
5966 */
5967 t = s;
5968 if (*s == '\'' || *s == '"' || *s == '`') {
5969 /* common case: get past first string, handling escapes */
3280af22 5970 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5971 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5972 t++;
5973 t++;
a0d0e21e 5974 }
b8a4b1be 5975 else if (*s == 'q') {
3280af22 5976 if (++t < PL_bufend
b8a4b1be 5977 && (!isALNUM(*t)
3280af22 5978 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5979 && !isALNUM(*t))))
5980 {
abc667d1 5981 /* skip q//-like construct */
f54cb97a 5982 const char *tmps;
b8a4b1be
GS
5983 char open, close, term;
5984 I32 brackets = 1;
5985
3280af22 5986 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5987 t++;
abc667d1
DM
5988 /* check for q => */
5989 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5990 OPERATOR(HASHBRACK);
5991 }
b8a4b1be
GS
5992 term = *t;
5993 open = term;
5994 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5995 term = tmps[5];
5996 close = term;
5997 if (open == close)
3280af22
NIS
5998 for (t++; t < PL_bufend; t++) {
5999 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 6000 t++;
6d07e5e9 6001 else if (*t == open)
b8a4b1be
GS
6002 break;
6003 }
abc667d1 6004 else {
3280af22
NIS
6005 for (t++; t < PL_bufend; t++) {
6006 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 6007 t++;
6d07e5e9 6008 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
6009 break;
6010 else if (*t == open)
6011 brackets++;
6012 }
abc667d1
DM
6013 }
6014 t++;
b8a4b1be 6015 }
abc667d1
DM
6016 else
6017 /* skip plain q word */
6018 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
6019 t += UTF8SKIP(t);
a0d0e21e 6020 }
7e2040f0 6021 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 6022 t += UTF8SKIP(t);
7e2040f0 6023 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 6024 t += UTF8SKIP(t);
a0d0e21e 6025 }
3280af22 6026 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 6027 t++;
b8a4b1be
GS
6028 /* if comma follows first term, call it an anon hash */
6029 /* XXX it could be a comma expression with loop modifiers */
3280af22 6030 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 6031 || (*t == '=' && t[1] == '>')))
a0d0e21e 6032 OPERATOR(HASHBRACK);
3280af22 6033 if (PL_expect == XREF)
4e4e412b 6034 PL_expect = XTERM;
a0d0e21e 6035 else {
3280af22
NIS
6036 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
6037 PL_expect = XSTATE;
a0d0e21e 6038 }
8990e307 6039 }
a0d0e21e 6040 break;
463ee0b2 6041 }
6154021b 6042 pl_yylval.ival = CopLINE(PL_curcop);
79072805 6043 if (isSPACE(*s) || *s == '#')
3280af22 6044 PL_copline = NOLINE; /* invalidate current command line number */
7c70caa5 6045 TOKEN(formbrack ? '=' : '{');
378cc40b 6046 case '}':
a7aaec61
Z
6047 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
6048 TOKEN(0);
79072805
LW
6049 rightbracket:
6050 s++;
3280af22 6051 if (PL_lex_brackets <= 0)
d98d5fff 6052 yyerror("Unmatched right curly bracket");
463ee0b2 6053 else
3280af22 6054 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 6055 PL_lex_allbrackets--;
3280af22
NIS
6056 if (PL_lex_state == LEX_INTERPNORMAL) {
6057 if (PL_lex_brackets == 0) {
9059aa12
LW
6058 if (PL_expect & XFAKEBRACK) {
6059 PL_expect &= XENUMMASK;
3280af22
NIS
6060 PL_lex_state = LEX_INTERPEND;
6061 PL_bufptr = s;
5db06880
NC
6062#if 0
6063 if (PL_madskills) {
cd81e915 6064 if (!PL_thiswhite)
6b29d1f5 6065 PL_thiswhite = newSVpvs("");
76f68e9b 6066 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
6067 }
6068#endif
cea2e8a9 6069 return yylex(); /* ignore fake brackets */
79072805 6070 }
f777953f 6071 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6b00f562
FC
6072 && SvEVALED(PL_lex_repl))
6073 PL_lex_state = LEX_INTERPEND;
6074 else if (*s == '-' && s[1] == '>')
3280af22 6075 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 6076 else if (*s != '[' && *s != '{')
3280af22 6077 PL_lex_state = LEX_INTERPEND;
79072805
LW
6078 }
6079 }
9059aa12
LW
6080 if (PL_expect & XFAKEBRACK) {
6081 PL_expect &= XENUMMASK;
3280af22 6082 PL_bufptr = s;
cea2e8a9 6083 return yylex(); /* ignore fake brackets */
748a9306 6084 }
cd81e915 6085 start_force(PL_curforce);
5db06880
NC
6086 if (PL_madskills) {
6087 curmad('X', newSVpvn(s-1,1));
cd81e915 6088 CURMAD('_', PL_thiswhite);
5db06880 6089 }
7c70caa5 6090 force_next(formbrack ? '.' : '}');
583c9d5c 6091 if (formbrack) LEAVE;
5db06880 6092#ifdef PERL_MAD
cd81e915 6093 if (!PL_thistoken)
6b29d1f5 6094 PL_thistoken = newSVpvs("");
5db06880 6095#endif
705fe0e5
FC
6096 if (formbrack == 2) { /* means . where arguments were expected */
6097 start_force(PL_curforce);
6098 force_next(';');
96f9b782 6099 TOKEN(FORMRBRACK);
705fe0e5 6100 }
79072805 6101 TOKEN(';');
378cc40b
LW
6102 case '&':
6103 s++;
78cdf107
Z
6104 if (*s++ == '&') {
6105 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6106 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6107 s -= 2;
6108 TOKEN(0);
6109 }
a0d0e21e 6110 AOPERATOR(ANDAND);
78cdf107 6111 }
378cc40b 6112 s--;
3280af22 6113 if (PL_expect == XOPERATOR) {
041457d9
DM
6114 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
6115 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 6116 {
57843af0 6117 CopLINE_dec(PL_curcop);
f1f66076 6118 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6119 CopLINE_inc(PL_curcop);
463ee0b2 6120 }
78cdf107
Z
6121 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6122 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6123 s--;
6124 TOKEN(0);
6125 }
79072805 6126 BAop(OP_BIT_AND);
463ee0b2 6127 }
79072805 6128
c07656ed
FC
6129 PL_tokenbuf[0] = '&';
6130 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
6131 sizeof PL_tokenbuf - 1, TRUE);
6132 if (PL_tokenbuf[1]) {
3280af22 6133 PL_expect = XOPERATOR;
60ac52eb 6134 force_ident_maybe_lex('&');
463ee0b2 6135 }
79072805
LW
6136 else
6137 PREREF('&');
6154021b 6138 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
6139 TERM('&');
6140
378cc40b
LW
6141 case '|':
6142 s++;
78cdf107
Z
6143 if (*s++ == '|') {
6144 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6145 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6146 s -= 2;
6147 TOKEN(0);
6148 }
a0d0e21e 6149 AOPERATOR(OROR);
78cdf107 6150 }
378cc40b 6151 s--;
78cdf107
Z
6152 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6153 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6154 s--;
6155 TOKEN(0);
6156 }
79072805 6157 BOop(OP_BIT_OR);
378cc40b
LW
6158 case '=':
6159 s++;
748a9306 6160 {
90771dc0 6161 const char tmp = *s++;
78cdf107
Z
6162 if (tmp == '=') {
6163 if (!PL_lex_allbrackets &&
6164 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6165 s -= 2;
6166 TOKEN(0);
6167 }
90771dc0 6168 Eop(OP_EQ);
78cdf107
Z
6169 }
6170 if (tmp == '>') {
6171 if (!PL_lex_allbrackets &&
6172 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6173 s -= 2;
6174 TOKEN(0);
6175 }
90771dc0 6176 OPERATOR(',');
78cdf107 6177 }
90771dc0
NC
6178 if (tmp == '~')
6179 PMop(OP_MATCH);
6180 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6181 && strchr("+-*/%.^&|<",tmp))
6182 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6183 "Reversed %c= operator",(int)tmp);
6184 s--;
6185 if (PL_expect == XSTATE && isALPHA(tmp) &&
6186 (s == PL_linestart+1 || s[-2] == '\n') )
6187 {
62e4c90a 6188 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
4a7239ff 6189 || PL_lex_state != LEX_NORMAL) {
90771dc0
NC
6190 d = PL_bufend;
6191 while (s < d) {
6192 if (*s++ == '\n') {
6193 incline(s);
6194 if (strnEQ(s,"=cut",4)) {
6195 s = strchr(s,'\n');
6196 if (s)
6197 s++;
6198 else
6199 s = d;
6200 incline(s);
6201 goto retry;
6202 }
6203 }
a5f75d66 6204 }
90771dc0 6205 goto retry;
a5f75d66 6206 }
5db06880
NC
6207#ifdef PERL_MAD
6208 if (PL_madskills) {
cd81e915 6209 if (!PL_thiswhite)
6b29d1f5 6210 PL_thiswhite = newSVpvs("");
cd81e915 6211 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
6212 PL_bufend - PL_linestart);
6213 }
6214#endif
90771dc0 6215 s = PL_bufend;
737c24fc 6216 PL_parser->in_pod = 1;
90771dc0 6217 goto retry;
a5f75d66 6218 }
a0d0e21e 6219 }
64a40898 6220 if (PL_expect == XBLOCK) {
c35e046a 6221 const char *t = s;
51882d45 6222#ifdef PERL_STRICT_CR
c35e046a 6223 while (SPACE_OR_TAB(*t))
51882d45 6224#else
c35e046a 6225 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 6226#endif
c35e046a 6227 t++;
a0d0e21e 6228 if (*t == '\n' || *t == '#') {
705fe0e5 6229 formbrack = 1;
583c9d5c
FC
6230 ENTER;
6231 SAVEI8(PL_parser->form_lex_state);
64a40898 6232 SAVEI32(PL_lex_formbrack);
583c9d5c 6233 PL_parser->form_lex_state = PL_lex_state;
64a40898 6234 PL_lex_formbrack = PL_lex_brackets + 1;
a0d0e21e
LW
6235 goto leftbracket;
6236 }
79072805 6237 }
78cdf107
Z
6238 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6239 s--;
6240 TOKEN(0);
6241 }
6154021b 6242 pl_yylval.ival = 0;
a0d0e21e 6243 OPERATOR(ASSIGNOP);
378cc40b
LW
6244 case '!':
6245 s++;
90771dc0
NC
6246 {
6247 const char tmp = *s++;
6248 if (tmp == '=') {
6249 /* was this !=~ where !~ was meant?
6250 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6251
6252 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6253 const char *t = s+1;
6254
6255 while (t < PL_bufend && isSPACE(*t))
6256 ++t;
6257
6258 if (*t == '/' || *t == '?' ||
6259 ((*t == 'm' || *t == 's' || *t == 'y')
6260 && !isALNUM(t[1])) ||
6261 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6262 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6263 "!=~ should be !~");
6264 }
78cdf107
Z
6265 if (!PL_lex_allbrackets &&
6266 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6267 s -= 2;
6268 TOKEN(0);
6269 }
90771dc0
NC
6270 Eop(OP_NE);
6271 }
6272 if (tmp == '~')
6273 PMop(OP_NOT);
6274 }
378cc40b
LW
6275 s--;
6276 OPERATOR('!');
6277 case '<':
3280af22 6278 if (PL_expect != XOPERATOR) {
93a17b20 6279 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6280 check_uni();
79072805
LW
6281 if (s[1] == '<')
6282 s = scan_heredoc(s);
6283 else
6284 s = scan_inputsymbol(s);
78a635de
FC
6285 PL_expect = XOPERATOR;
6286 TOKEN(sublex_start());
378cc40b
LW
6287 }
6288 s++;
90771dc0
NC
6289 {
6290 char tmp = *s++;
78cdf107
Z
6291 if (tmp == '<') {
6292 if (*s == '=' && !PL_lex_allbrackets &&
6293 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6294 s -= 2;
6295 TOKEN(0);
6296 }
90771dc0 6297 SHop(OP_LEFT_SHIFT);
78cdf107 6298 }
90771dc0
NC
6299 if (tmp == '=') {
6300 tmp = *s++;
78cdf107
Z
6301 if (tmp == '>') {
6302 if (!PL_lex_allbrackets &&
6303 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6304 s -= 3;
6305 TOKEN(0);
6306 }
90771dc0 6307 Eop(OP_NCMP);
78cdf107 6308 }
90771dc0 6309 s--;
78cdf107
Z
6310 if (!PL_lex_allbrackets &&
6311 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6312 s -= 2;
6313 TOKEN(0);
6314 }
90771dc0
NC
6315 Rop(OP_LE);
6316 }
395c3793 6317 }
378cc40b 6318 s--;
78cdf107
Z
6319 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6320 s--;
6321 TOKEN(0);
6322 }
79072805 6323 Rop(OP_LT);
378cc40b
LW
6324 case '>':
6325 s++;
90771dc0
NC
6326 {
6327 const char tmp = *s++;
78cdf107
Z
6328 if (tmp == '>') {
6329 if (*s == '=' && !PL_lex_allbrackets &&
6330 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6331 s -= 2;
6332 TOKEN(0);
6333 }
90771dc0 6334 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6335 }
6336 else if (tmp == '=') {
6337 if (!PL_lex_allbrackets &&
6338 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6339 s -= 2;
6340 TOKEN(0);
6341 }
90771dc0 6342 Rop(OP_GE);
78cdf107 6343 }
90771dc0 6344 }
378cc40b 6345 s--;
78cdf107
Z
6346 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6347 s--;
6348 TOKEN(0);
6349 }
79072805 6350 Rop(OP_GT);
378cc40b
LW
6351
6352 case '$':
bbce6d69 6353 CLINE;
6354
3280af22
NIS
6355 if (PL_expect == XOPERATOR) {
6356 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6357 return deprecate_commaless_var_list();
a0d0e21e 6358 }
8990e307 6359 }
a0d0e21e 6360
c0b977fd 6361 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6362 PL_tokenbuf[0] = '@';
376b8730
SM
6363 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6364 sizeof PL_tokenbuf - 1, FALSE);
6365 if (PL_expect == XOPERATOR)
6366 no_op("Array length", s);
3280af22 6367 if (!PL_tokenbuf[1])
a0d0e21e 6368 PREREF(DOLSHARP);
3280af22 6369 PL_expect = XOPERATOR;
60ac52eb 6370 force_ident_maybe_lex('#');
463ee0b2 6371 TOKEN(DOLSHARP);
79072805 6372 }
bbce6d69 6373
3280af22 6374 PL_tokenbuf[0] = '$';
376b8730
SM
6375 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6376 sizeof PL_tokenbuf - 1, FALSE);
6377 if (PL_expect == XOPERATOR)
6378 no_op("Scalar", s);
3280af22
NIS
6379 if (!PL_tokenbuf[1]) {
6380 if (s == PL_bufend)
bbce6d69 6381 yyerror("Final $ should be \\$ or $name");
6382 PREREF('$');
8990e307 6383 }
a0d0e21e 6384
ff68c719 6385 d = s;
90771dc0
NC
6386 {
6387 const char tmp = *s;
ae28bb2a 6388 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6389 s = SKIPSPACE1(s);
ff68c719 6390
90771dc0
NC
6391 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6392 && intuit_more(s)) {
6393 if (*s == '[') {
6394 PL_tokenbuf[0] = '@';
6395 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6396 char *t = s+1;
6397
6398 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6399 t++;
90771dc0 6400 if (*t++ == ',') {
29595ff2 6401 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6402 while (t < PL_bufend && *t != ']')
6403 t++;
9014280d 6404 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6405 "Multidimensional syntax %.*s not supported",
36c7798d 6406 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6407 }
748a9306 6408 }
93a17b20 6409 }
90771dc0
NC
6410 else if (*s == '{') {
6411 char *t;
6412 PL_tokenbuf[0] = '%';
6413 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6414 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6415 {
6416 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6417 do {
6418 t++;
6419 } while (isSPACE(*t));
90771dc0 6420 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6421 STRLEN len;
90771dc0 6422 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6423 &len);
c35e046a
AL
6424 while (isSPACE(*t))
6425 t++;
4c01a014
BF
6426 if (*t == ';'
6427 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
90771dc0 6428 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4c01a014
BF
6429 "You need to quote \"%"SVf"\"",
6430 SVfARG(newSVpvn_flags(tmpbuf, len,
6431 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
90771dc0
NC
6432 }
6433 }
6434 }
93a17b20 6435 }
bbce6d69 6436
90771dc0
NC
6437 PL_expect = XOPERATOR;
6438 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6439 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6440 if (!islop || PL_last_lop_op == OP_GREPSTART)
6441 PL_expect = XOPERATOR;
6442 else if (strchr("$@\"'`q", *s))
6443 PL_expect = XTERM; /* e.g. print $fh "foo" */
6444 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6445 PL_expect = XTERM; /* e.g. print $fh &sub */
6446 else if (isIDFIRST_lazy_if(s,UTF)) {
6447 char tmpbuf[sizeof PL_tokenbuf];
6448 int t2;
6449 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6450 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6451 /* binary operators exclude handle interpretations */
6452 switch (t2) {
6453 case -KEY_x:
6454 case -KEY_eq:
6455 case -KEY_ne:
6456 case -KEY_gt:
6457 case -KEY_lt:
6458 case -KEY_ge:
6459 case -KEY_le:
6460 case -KEY_cmp:
6461 break;
6462 default:
6463 PL_expect = XTERM; /* e.g. print $fh length() */
6464 break;
6465 }
6466 }
6467 else {
6468 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6469 }
6470 }
90771dc0
NC
6471 else if (isDIGIT(*s))
6472 PL_expect = XTERM; /* e.g. print $fh 3 */
6473 else if (*s == '.' && isDIGIT(s[1]))
6474 PL_expect = XTERM; /* e.g. print $fh .3 */
6475 else if ((*s == '?' || *s == '-' || *s == '+')
6476 && !isSPACE(s[1]) && s[1] != '=')
6477 PL_expect = XTERM; /* e.g. print $fh -1 */
6478 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6479 && s[1] != '/')
6480 PL_expect = XTERM; /* e.g. print $fh /.../
6481 XXX except DORDOR operator
6482 */
6483 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6484 && s[2] != '=')
6485 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6486 }
bbce6d69 6487 }
60ac52eb 6488 force_ident_maybe_lex('$');
79072805 6489 TOKEN('$');
378cc40b
LW
6490
6491 case '@':
3280af22 6492 if (PL_expect == XOPERATOR)
bbce6d69 6493 no_op("Array", s);
3280af22
NIS
6494 PL_tokenbuf[0] = '@';
6495 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6496 if (!PL_tokenbuf[1]) {
bbce6d69 6497 PREREF('@');
6498 }
3280af22 6499 if (PL_lex_state == LEX_NORMAL)
29595ff2 6500 s = SKIPSPACE1(s);
3280af22 6501 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6502 if (*s == '{')
3280af22 6503 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6504
6505 /* Warn about @ where they meant $. */
041457d9
DM
6506 if (*s == '[' || *s == '{') {
6507 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6508 const char *t = s + 1;
7e2040f0 6509 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
b9e186cd 6510 t += UTF ? UTF8SKIP(t) : 1;
a0d0e21e
LW
6511 if (*t == '}' || *t == ']') {
6512 t++;
29595ff2 6513 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6514 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6515 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b9e186cd
BF
6516 "Scalar value %"SVf" better written as $%"SVf,
6517 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6518 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6519 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6520 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
a0d0e21e 6521 }
93a17b20
LW
6522 }
6523 }
463ee0b2 6524 }
60ac52eb
FC
6525 PL_expect = XOPERATOR;
6526 force_ident_maybe_lex('@');
79072805 6527 TERM('@');
378cc40b 6528
c963b151 6529 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6530 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6531 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6532 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6533 TOKEN(0);
6f33ba73
RGS
6534 s += 2;
6535 AOPERATOR(DORDOR);
6536 }
c963b151 6537 case '?': /* may either be conditional or pattern */
be25f609 6538 if (PL_expect == XOPERATOR) {
90771dc0 6539 char tmp = *s++;
c963b151 6540 if(tmp == '?') {
78cdf107
Z
6541 if (!PL_lex_allbrackets &&
6542 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6543 s--;
6544 TOKEN(0);
6545 }
6546 PL_lex_allbrackets++;
be25f609 6547 OPERATOR('?');
c963b151
BD
6548 }
6549 else {
6550 tmp = *s++;
6551 if(tmp == '/') {
6552 /* A // operator. */
78cdf107
Z
6553 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6554 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6555 LEX_FAKEEOF_LOGIC)) {
6556 s -= 2;
6557 TOKEN(0);
6558 }
c963b151
BD
6559 AOPERATOR(DORDOR);
6560 }
6561 else {
6562 s--;
78cdf107
Z
6563 if (*s == '=' && !PL_lex_allbrackets &&
6564 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6565 s--;
6566 TOKEN(0);
6567 }
c963b151
BD
6568 Mop(OP_DIVIDE);
6569 }
6570 }
6571 }
6572 else {
6573 /* Disable warning on "study /blah/" */
6574 if (PL_oldoldbufptr == PL_last_uni
6575 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6576 || memNE(PL_last_uni, "study", 5)
6577 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6578 ))
6579 check_uni();
725a61d7
Z
6580 if (*s == '?')
6581 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6582 s = scan_pat(s,OP_MATCH);
6583 TERM(sublex_start());
6584 }
378cc40b
LW
6585
6586 case '.':
51882d45
GS
6587 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6588#ifdef PERL_STRICT_CR
6589 && s[1] == '\n'
6590#else
6591 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6592#endif
6593 && (s == PL_linestart || s[-1] == '\n') )
6594 {
3280af22 6595 PL_expect = XSTATE;
705fe0e5 6596 formbrack = 2; /* dot seen where arguments expected */
79072805
LW
6597 goto rightbracket;
6598 }
be25f609 6599 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6600 s += 3;
6601 OPERATOR(YADAYADA);
6602 }
3280af22 6603 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6604 char tmp = *s++;
a687059c 6605 if (*s == tmp) {
78cdf107
Z
6606 if (!PL_lex_allbrackets &&
6607 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6608 s--;
6609 TOKEN(0);
6610 }
a687059c 6611 s++;
2f3197b3
LW
6612 if (*s == tmp) {
6613 s++;
6154021b 6614 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6615 }
6616 else
6154021b 6617 pl_yylval.ival = 0;
378cc40b 6618 OPERATOR(DOTDOT);
a687059c 6619 }
78cdf107
Z
6620 if (*s == '=' && !PL_lex_allbrackets &&
6621 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6622 s--;
6623 TOKEN(0);
6624 }
79072805 6625 Aop(OP_CONCAT);
378cc40b
LW
6626 }
6627 /* FALL THROUGH */
6628 case '0': case '1': case '2': case '3': case '4':
6629 case '5': case '6': case '7': case '8': case '9':
6154021b 6630 s = scan_num(s, &pl_yylval);
931e0695 6631 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6632 if (PL_expect == XOPERATOR)
8990e307 6633 no_op("Number",s);
79072805
LW
6634 TERM(THING);
6635
6636 case '\'':
d24ca0c5 6637 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6638 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6639 if (PL_expect == XOPERATOR) {
6640 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6641 return deprecate_commaless_var_list();
a0d0e21e 6642 }
463ee0b2 6643 else
8990e307 6644 no_op("String",s);
463ee0b2 6645 }
79072805 6646 if (!s)
d4c19fe8 6647 missingterm(NULL);
6154021b 6648 pl_yylval.ival = OP_CONST;
79072805
LW
6649 TERM(sublex_start());
6650
6651 case '"':
d24ca0c5 6652 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6653 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6654 if (PL_expect == XOPERATOR) {
6655 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6656 return deprecate_commaless_var_list();
a0d0e21e 6657 }
463ee0b2 6658 else
8990e307 6659 no_op("String",s);
463ee0b2 6660 }
79072805 6661 if (!s)
d4c19fe8 6662 missingterm(NULL);
6154021b 6663 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6664 /* FIXME. I think that this can be const if char *d is replaced by
6665 more localised variables. */
3280af22 6666 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6667 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6668 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6669 break;
6670 }
6671 }
79072805
LW
6672 TERM(sublex_start());
6673
6674 case '`':
d24ca0c5 6675 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6676 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6677 if (PL_expect == XOPERATOR)
8990e307 6678 no_op("Backticks",s);
79072805 6679 if (!s)
d4c19fe8 6680 missingterm(NULL);
9b201d7d 6681 readpipe_override();
79072805
LW
6682 TERM(sublex_start());
6683
6684 case '\\':
6685 s++;
a2a5de95
NC
6686 if (PL_lex_inwhat && isDIGIT(*s))
6687 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6688 *s, *s);
3280af22 6689 if (PL_expect == XOPERATOR)
8990e307 6690 no_op("Backslash",s);
79072805
LW
6691 OPERATOR(REFGEN);
6692
a7cb1f99 6693 case 'v':
e526c9e6 6694 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6695 char *start = s + 2;
dd629d5b 6696 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6697 start++;
6698 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6699 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6700 TERM(THING);
6701 }
e9d2327d
FC
6702 else if ((*start == ':' && start[1] == ':')
6703 || (PL_expect == XSTATE && *start == ':'))
6704 goto keylookup;
6705 else if (PL_expect == XSTATE) {
6706 d = start;
6707 while (d < PL_bufend && isSPACE(*d)) d++;
6708 if (*d == ':') goto keylookup;
6709 }
e526c9e6 6710 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
e9d2327d 6711 if (!isALPHA(*start) && (PL_expect == XTERM
6f33ba73
RGS
6712 || PL_expect == XREF || PL_expect == XSTATE
6713 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6714 GV *const gv = gv_fetchpvn_flags(s, start - s,
6715 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6716 if (!gv) {
6154021b 6717 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6718 TERM(THING);
6719 }
6720 }
a7cb1f99
GS
6721 }
6722 goto keylookup;
79072805 6723 case 'x':
3280af22 6724 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6725 s++;
6726 Mop(OP_REPEAT);
2f3197b3 6727 }
79072805
LW
6728 goto keylookup;
6729
378cc40b 6730 case '_':
79072805
LW
6731 case 'a': case 'A':
6732 case 'b': case 'B':
6733 case 'c': case 'C':
6734 case 'd': case 'D':
6735 case 'e': case 'E':
6736 case 'f': case 'F':
6737 case 'g': case 'G':
6738 case 'h': case 'H':
6739 case 'i': case 'I':
6740 case 'j': case 'J':
6741 case 'k': case 'K':
6742 case 'l': case 'L':
6743 case 'm': case 'M':
6744 case 'n': case 'N':
6745 case 'o': case 'O':
6746 case 'p': case 'P':
6747 case 'q': case 'Q':
6748 case 'r': case 'R':
6749 case 's': case 'S':
6750 case 't': case 'T':
6751 case 'u': case 'U':
a7cb1f99 6752 case 'V':
79072805
LW
6753 case 'w': case 'W':
6754 case 'X':
6755 case 'y': case 'Y':
6756 case 'z': case 'Z':
6757
49dc05e3 6758 keylookup: {
88e1f1a2 6759 bool anydelim;
18f70389 6760 bool lex;
90771dc0 6761 I32 tmp;
18f70389 6762 SV *sv;
73f3e228
FC
6763 CV *cv;
6764 PADOFFSET off;
6765 OP *rv2cv_op;
10edeb5d 6766
18f70389 6767 lex = FALSE;
10edeb5d 6768 orig_keyword = 0;
73f3e228 6769 off = 0;
18f70389 6770 sv = NULL;
73f3e228 6771 cv = NULL;
10edeb5d
JH
6772 gv = NULL;
6773 gvp = NULL;
73f3e228 6774 rv2cv_op = NULL;
49dc05e3 6775
3280af22
NIS
6776 PL_bufptr = s;
6777 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6778
6779 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6780 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6781
6782 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6783 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6784 goto just_a_word;
6785
3643fb5f 6786 d = s;
3280af22 6787 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6788 d++; /* no comments skipped here, or s### is misparsed */
6789
748a9306 6790 /* Is this a word before a => operator? */
1c3923b3 6791 if (*d == '=' && d[1] == '>') {
748a9306 6792 CLINE;
6154021b 6793 pl_yylval.opval
d0a148a6
NC
6794 = (OP*)newSVOP(OP_CONST, 0,
6795 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6796 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6797 TERM(WORD);
6798 }
6799
88e1f1a2
JV
6800 /* Check for plugged-in keyword */
6801 {
6802 OP *o;
6803 int result;
6804 char *saved_bufptr = PL_bufptr;
6805 PL_bufptr = s;
16c91539 6806 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6807 s = PL_bufptr;
6808 if (result == KEYWORD_PLUGIN_DECLINE) {
6809 /* not a plugged-in keyword */
6810 PL_bufptr = saved_bufptr;
6811 } else if (result == KEYWORD_PLUGIN_STMT) {
6812 pl_yylval.opval = o;
6813 CLINE;
6814 PL_expect = XSTATE;
6815 return REPORT(PLUGSTMT);
6816 } else if (result == KEYWORD_PLUGIN_EXPR) {
6817 pl_yylval.opval = o;
6818 CLINE;
6819 PL_expect = XOPERATOR;
6820 return REPORT(PLUGEXPR);
6821 } else {
6822 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6823 PL_tokenbuf);
6824 }
6825 }
6826
6827 /* Check for built-in keyword */
6828 tmp = keyword(PL_tokenbuf, len, 0);
6829
6830 /* Is this a label? */
6831 if (!anydelim && PL_expect == XSTATE
6832 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2 6833 s = d + 1;
5504e6cf
FC
6834 pl_yylval.pval = savepvn(PL_tokenbuf, len+1);
6835 pl_yylval.pval[len] = '\0';
6836 pl_yylval.pval[len+1] = UTF ? 1 : 0;
88e1f1a2
JV
6837 CLINE;
6838 TOKEN(LABEL);
6839 }
6840
18f70389
FC
6841 /* Check for lexical sub */
6842 if (PL_expect != XOPERATOR) {
6843 char tmpbuf[sizeof PL_tokenbuf + 1];
18f70389
FC
6844 *tmpbuf = '&';
6845 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6846 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6847 if (off != NOT_IN_PAD) {
73f3e228 6848 assert(off); /* we assume this is boolean-true below */
18f70389
FC
6849 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6850 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6851 HEK * const stashname = HvNAME_HEK(stash);
6852 sv = newSVhek(stashname);
6853 sv_catpvs(sv, "::");
6854 sv_catpvn_flags(sv, PL_tokenbuf, len,
6855 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6856 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6857 SVt_PVCV);
73f3e228 6858 off = 0;
18f70389 6859 }
73f3e228
FC
6860 else {
6861 rv2cv_op = newOP(OP_PADANY, 0);
6862 rv2cv_op->op_targ = off;
6863 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6864 cv = (CV *)PAD_SV(off);
6865 }
6866 lex = TRUE;
6867 goto just_a_word;
18f70389 6868 }
73f3e228 6869 off = 0;
18f70389
FC
6870 }
6871
a0d0e21e 6872 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6873 GV *ogv = NULL; /* override (winner) */
6874 GV *hgv = NULL; /* hidden (loser) */
3280af22 6875 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6876 CV *cv;
af9f5953
BF
6877 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6878 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6879 (cv = GvCVu(gv)))
6880 {
6881 if (GvIMPORTED_CV(gv))
6882 ogv = gv;
6883 else if (! CvMETHOD(cv))
6884 hgv = gv;
6885 }
6886 if (!ogv &&
af9f5953 6887 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6888 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6889 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6890 GvCVu(gv) && GvIMPORTED_CV(gv))
6891 {
6892 ogv = gv;
6893 }
6894 }
6895 if (ogv) {
30fe34ed 6896 orig_keyword = tmp;
56f7f34b 6897 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6898 }
6899 else if (gv && !gvp
6900 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6901 && GvCVu(gv))
6e7b2336
GS
6902 {
6903 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6904 }
56f7f34b
CS
6905 else { /* no override */
6906 tmp = -tmp;
a2a5de95
NC
6907 if (tmp == KEY_dump) {
6908 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6909 "dump() better written as CORE::dump()");
ac206dc8 6910 }
a0714e2c 6911 gv = NULL;
56f7f34b 6912 gvp = 0;
a2a5de95
NC
6913 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6914 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6915 "Ambiguous call resolved as CORE::%s(), "
6916 "qualify as such or use &",
6917 GvENAME(hgv));
49dc05e3 6918 }
a0d0e21e
LW
6919 }
6920
6921 reserved_word:
6922 switch (tmp) {
79072805
LW
6923
6924 default: /* not a keyword */
0bfa2a8a
NC
6925 /* Trade off - by using this evil construction we can pull the
6926 variable gv into the block labelled keylookup. If not, then
6927 we have to give it function scope so that the goto from the
6928 earlier ':' case doesn't bypass the initialisation. */
6929 if (0) {
6930 just_a_word_zero_gv:
73f3e228
FC
6931 sv = NULL;
6932 cv = NULL;
0bfa2a8a
NC
6933 gv = NULL;
6934 gvp = NULL;
73f3e228 6935 rv2cv_op = NULL;
8bee0991 6936 orig_keyword = 0;
18f70389
FC
6937 lex = 0;
6938 off = 0;
0bfa2a8a 6939 }
93a17b20 6940 just_a_word: {
ce29ac45 6941 int pkgname = 0;
f54cb97a 6942 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
898c3bca
FC
6943 const char penultchar =
6944 lastchar && PL_bufptr - 2 >= PL_linestart
6945 ? PL_bufptr[-2]
6946 : 0;
5db06880 6947#ifdef PERL_MAD
cd81e915 6948 SV *nextPL_nextwhite = 0;
5db06880
NC
6949#endif
6950
8990e307
LW
6951
6952 /* Get the rest if it looks like a package qualifier */
6953
155aba94 6954 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6955 STRLEN morelen;
3280af22 6956 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6957 TRUE, &morelen);
6958 if (!morelen)
86fe3f36
BF
6959 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6960 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6961 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
ec2ab091 6962 *s == '\'' ? "'" : "::");
c3e0f903 6963 len += morelen;
ce29ac45 6964 pkgname = 1;
a0d0e21e 6965 }
8990e307 6966
3280af22
NIS
6967 if (PL_expect == XOPERATOR) {
6968 if (PL_bufptr == PL_linestart) {
57843af0 6969 CopLINE_dec(PL_curcop);
f1f66076 6970 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6971 CopLINE_inc(PL_curcop);
463ee0b2
LW
6972 }
6973 else
54310121 6974 no_op("Bareword",s);
463ee0b2 6975 }
8990e307 6976
c3e0f903 6977 /* Look for a subroutine with this name in current package,
73f3e228
FC
6978 unless this is a lexical sub, or name is "Foo::",
6979 in which case Foo is a bareword
c3e0f903
GS
6980 (and a package name). */
6981
5db06880 6982 if (len > 2 && !PL_madskills &&
3280af22 6983 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6984 {
f776e3cd 6985 if (ckWARN(WARN_BAREWORD)
af9f5953 6986 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 6987 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
979a1401
BF
6988 "Bareword \"%"SVf"\" refers to nonexistent package",
6989 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6990 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
c3e0f903 6991 len -= 2;
3280af22 6992 PL_tokenbuf[len] = '\0';
a0714e2c 6993 gv = NULL;
c3e0f903
GS
6994 gvp = 0;
6995 }
6996 else {
73f3e228 6997 if (!lex && !gv) {
62d55b22
NC
6998 /* Mustn't actually add anything to a symbol table.
6999 But also don't want to "initialise" any placeholder
7000 constants that might already be there into full
7001 blown PVGVs with attached PVCV. */
90e5519e 7002 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
7003 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
7004 SVt_PVCV);
62d55b22 7005 }
b3d904f3 7006 len = 0;
c3e0f903
GS
7007 }
7008
7009 /* if we saw a global override before, get the right name */
8990e307 7010
73f3e228 7011 if (!sv)
18f70389 7012 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
37bb7629 7013 len ? len : strlen(PL_tokenbuf));
49dc05e3 7014 if (gvp) {
37bb7629 7015 SV * const tmp_sv = sv;
396482e1 7016 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
7017 sv_catsv(sv, tmp_sv);
7018 SvREFCNT_dec(tmp_sv);
8a7a129d 7019 }
37bb7629 7020
5db06880 7021#ifdef PERL_MAD
cd81e915
NC
7022 if (PL_madskills && !PL_thistoken) {
7023 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 7024 PL_thistoken = newSVpvn(start,s - start);
cd81e915 7025 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
7026 }
7027#endif
8990e307 7028
a0d0e21e 7029 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 7030 CLINE;
6154021b
RGS
7031 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
7032 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 7033
c3e0f903 7034 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
7035 if (len)
7036 goto safe_bareword;
7037
73f3e228 7038 if (!off)
f7461760 7039 {
d8ebba9f 7040 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
7041 const_op->op_private = OPpCONST_BARE;
7042 rv2cv_op = newCVREF(0, const_op);
73f3e228 7043 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
f7461760 7044 }
5069cc75 7045
8990e307
LW
7046 /* See if it's the indirect object for a list operator. */
7047
3280af22
NIS
7048 if (PL_oldoldbufptr &&
7049 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
7050 (PL_oldoldbufptr == PL_last_lop
7051 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 7052 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
7053 (PL_expect == XREF ||
7054 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 7055 {
748a9306
LW
7056 bool immediate_paren = *s == '(';
7057
a0d0e21e 7058 /* (Now we can afford to cross potential line boundary.) */
cd81e915 7059 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 7060#ifdef PERL_MAD
cd81e915 7061 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 7062#endif
a0d0e21e
LW
7063
7064 /* Two barewords in a row may indicate method call. */
7065
62d55b22 7066 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
7067 (tmp = intuit_method(s, gv, cv))) {
7068 op_free(rv2cv_op);
78cdf107
Z
7069 if (tmp == METHOD && !PL_lex_allbrackets &&
7070 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7071 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7072 return REPORT(tmp);
f7461760 7073 }
a0d0e21e
LW
7074
7075 /* If not a declared subroutine, it's an indirect object. */
7076 /* (But it's an indir obj regardless for sort.) */
7294df96 7077 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 7078
7294df96
RGS
7079 if (
7080 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 7081 (!cv &&
a9ef352a 7082 (PL_last_lop_op != OP_MAPSTART &&
f0670693 7083 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
7084 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
7085 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
7086 )
a9ef352a 7087 {
3280af22 7088 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 7089 goto bareword;
93a17b20
LW
7090 }
7091 }
8990e307 7092
3280af22 7093 PL_expect = XOPERATOR;
5db06880
NC
7094#ifdef PERL_MAD
7095 if (isSPACE(*s))
cd81e915
NC
7096 s = SKIPSPACE2(s,nextPL_nextwhite);
7097 PL_nextwhite = nextPL_nextwhite;
5db06880 7098#else
8990e307 7099 s = skipspace(s);
5db06880 7100#endif
1c3923b3
GS
7101
7102 /* Is this a word before a => operator? */
ce29ac45 7103 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 7104 op_free(rv2cv_op);
1c3923b3 7105 CLINE;
6154021b 7106 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 7107 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 7108 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
7109 TERM(WORD);
7110 }
7111
7112 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 7113 if (*s == '(') {
79072805 7114 CLINE;
5069cc75 7115 if (cv) {
c35e046a
AL
7116 d = s + 1;
7117 while (SPACE_OR_TAB(*d))
7118 d++;
f7461760 7119 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 7120 s = d + 1;
c631f32b 7121 goto its_constant;
96e4d5b1 7122 }
7123 }
5db06880
NC
7124#ifdef PERL_MAD
7125 if (PL_madskills) {
cd81e915
NC
7126 PL_nextwhite = PL_thiswhite;
7127 PL_thiswhite = 0;
5db06880 7128 }
cd81e915 7129 start_force(PL_curforce);
5db06880 7130#endif
73f3e228
FC
7131 NEXTVAL_NEXTTOKE.opval =
7132 off ? rv2cv_op : pl_yylval.opval;
3280af22 7133 PL_expect = XOPERATOR;
5db06880
NC
7134#ifdef PERL_MAD
7135 if (PL_madskills) {
cd81e915
NC
7136 PL_nextwhite = nextPL_nextwhite;
7137 curmad('X', PL_thistoken);
6b29d1f5 7138 PL_thistoken = newSVpvs("");
5db06880
NC
7139 }
7140#endif
73f3e228
FC
7141 if (off)
7142 op_free(pl_yylval.opval), force_next(PRIVATEREF);
7143 else op_free(rv2cv_op), force_next(WORD);
6154021b 7144 pl_yylval.ival = 0;
463ee0b2 7145 TOKEN('&');
79072805 7146 }
93a17b20 7147
a0d0e21e 7148 /* If followed by var or block, call it a method (unless sub) */
8990e307 7149
f7461760
Z
7150 if ((*s == '$' || *s == '{') && !cv) {
7151 op_free(rv2cv_op);
3280af22
NIS
7152 PL_last_lop = PL_oldbufptr;
7153 PL_last_lop_op = OP_METHOD;
78cdf107
Z
7154 if (!PL_lex_allbrackets &&
7155 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7156 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 7157 PREBLOCK(METHOD);
463ee0b2
LW
7158 }
7159
8990e307
LW
7160 /* If followed by a bareword, see if it looks like indir obj. */
7161
30fe34ed
RGS
7162 if (!orig_keyword
7163 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
7164 && (tmp = intuit_method(s, gv, cv))) {
7165 op_free(rv2cv_op);
78cdf107
Z
7166 if (tmp == METHOD && !PL_lex_allbrackets &&
7167 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7168 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7169 return REPORT(tmp);
f7461760 7170 }
93a17b20 7171
8990e307
LW
7172 /* Not a method, so call it a subroutine (if defined) */
7173
5069cc75 7174 if (cv) {
898c3bca 7175 if (lastchar == '-' && penultchar != '-') {
43b5ab4c
BF
7176 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7177 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7178 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7179 SVfARG(tmpsv), SVfARG(tmpsv));
7180 }
89bfa8cd 7181 /* Check for a constant sub */
f7461760 7182 if ((sv = cv_const_sv(cv))) {
96e4d5b1 7183 its_constant:
f7461760 7184 op_free(rv2cv_op);
6154021b
RGS
7185 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7186 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
cc2ebcd7 7187 pl_yylval.opval->op_private = OPpCONST_FOLDED;
6b7c6d95 7188 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 7189 TOKEN(WORD);
89bfa8cd 7190 }
7191
6154021b 7192 op_free(pl_yylval.opval);
f7461760 7193 pl_yylval.opval = rv2cv_op;
6154021b 7194 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 7195 PL_last_lop = PL_oldbufptr;
bf848113 7196 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 7197 /* Is there a prototype? */
5db06880
NC
7198 if (
7199#ifdef PERL_MAD
7200 cv &&
7201#endif
d9f2850e
RGS
7202 SvPOK(cv))
7203 {
8fa6a409
FC
7204 STRLEN protolen = CvPROTOLEN(cv);
7205 const char *proto = CvPROTO(cv);
b5fb7ce3 7206 bool optional;
5f66b61c 7207 if (!protolen)
4633a7c4 7208 TERM(FUNC0SUB);
b5fb7ce3
FC
7209 if ((optional = *proto == ';'))
7210 do
0f5d0394 7211 proto++;
b5fb7ce3 7212 while (*proto == ';');
649d02de
FC
7213 if (
7214 (
7215 (
7216 *proto == '$' || *proto == '_'
c035a075 7217 || *proto == '*' || *proto == '+'
649d02de
FC
7218 )
7219 && proto[1] == '\0'
7220 )
7221 || (
7222 *proto == '\\' && proto[1] && proto[2] == '\0'
7223 )
7224 )
b5fb7ce3 7225 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
7226 if (*proto == '\\' && proto[1] == '[') {
7227 const char *p = proto + 2;
7228 while(*p && *p != ']')
7229 ++p;
b5fb7ce3
FC
7230 if(*p == ']' && !p[1])
7231 UNIPROTO(UNIOPSUB,optional);
649d02de 7232 }
7a52d87a 7233 if (*proto == '&' && *s == '{') {
49a54bbe
NC
7234 if (PL_curstash)
7235 sv_setpvs(PL_subname, "__ANON__");
7236 else
7237 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
7238 if (!PL_lex_allbrackets &&
7239 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7240 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
7241 PREBLOCK(LSTOPSUB);
7242 }
a9ef352a 7243 }
5db06880
NC
7244#ifdef PERL_MAD
7245 {
7246 if (PL_madskills) {
cd81e915
NC
7247 PL_nextwhite = PL_thiswhite;
7248 PL_thiswhite = 0;
5db06880 7249 }
cd81e915 7250 start_force(PL_curforce);
6154021b 7251 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
7252 PL_expect = XTERM;
7253 if (PL_madskills) {
cd81e915
NC
7254 PL_nextwhite = nextPL_nextwhite;
7255 curmad('X', PL_thistoken);
6b29d1f5 7256 PL_thistoken = newSVpvs("");
5db06880 7257 }
73f3e228 7258 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7259 if (!PL_lex_allbrackets &&
7260 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7261 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7262 TOKEN(NOAMP);
7263 }
7264 }
7265
7266 /* Guess harder when madskills require "best effort". */
7267 if (PL_madskills && (!gv || !GvCVu(gv))) {
7268 int probable_sub = 0;
7269 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7270 probable_sub = 1;
7271 else if (isALPHA(*s)) {
7272 char tmpbuf[1024];
7273 STRLEN tmplen;
7274 d = s;
7275 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 7276 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
7277 probable_sub = 1;
7278 else {
7279 while (d < PL_bufend && isSPACE(*d))
7280 d++;
7281 if (*d == '=' && d[1] == '>')
7282 probable_sub = 1;
7283 }
7284 }
7285 if (probable_sub) {
af9f5953
BF
7286 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7287 SVt_PVCV);
6154021b 7288 op_free(pl_yylval.opval);
f7461760 7289 pl_yylval.opval = rv2cv_op;
6154021b 7290 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
7291 PL_last_lop = PL_oldbufptr;
7292 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
7293 PL_nextwhite = PL_thiswhite;
7294 PL_thiswhite = 0;
7295 start_force(PL_curforce);
6154021b 7296 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 7297 PL_expect = XTERM;
cd81e915
NC
7298 PL_nextwhite = nextPL_nextwhite;
7299 curmad('X', PL_thistoken);
6b29d1f5 7300 PL_thistoken = newSVpvs("");
73f3e228 7301 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7302 if (!PL_lex_allbrackets &&
7303 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7304 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7305 TOKEN(NOAMP);
7306 }
7307#else
6154021b 7308 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 7309 PL_expect = XTERM;
73f3e228 7310 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7311 if (!PL_lex_allbrackets &&
7312 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7313 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 7314 TOKEN(NOAMP);
5db06880 7315#endif
8990e307 7316 }
748a9306 7317
8990e307
LW
7318 /* Call it a bare word */
7319
5603f27d 7320 if (PL_hints & HINT_STRICT_SUBS)
6154021b 7321 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 7322 else {
9a073a1d
RGS
7323 bareword:
7324 /* after "print" and similar functions (corresponding to
7325 * "F? L" in opcode.pl), whatever wasn't already parsed as
7326 * a filehandle should be subject to "strict subs".
7327 * Likewise for the optional indirect-object argument to system
7328 * or exec, which can't be a bareword */
7329 if ((PL_last_lop_op == OP_PRINT
7330 || PL_last_lop_op == OP_PRTF
7331 || PL_last_lop_op == OP_SAY
7332 || PL_last_lop_op == OP_SYSTEM
7333 || PL_last_lop_op == OP_EXEC)
7334 && (PL_hints & HINT_STRICT_SUBS))
7335 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7336 if (lastchar != '-') {
7337 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7338 d = PL_tokenbuf;
7339 while (isLOWER(*d))
7340 d++;
af9f5953 7341 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7342 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7343 PL_tokenbuf);
7344 }
748a9306
LW
7345 }
7346 }
f7461760 7347 op_free(rv2cv_op);
c3e0f903
GS
7348
7349 safe_bareword:
9b387841
NC
7350 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7351 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
02571fe8
BF
7352 "Operator or semicolon missing before %c%"SVf,
7353 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7354 strlen(PL_tokenbuf),
7355 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
9b387841
NC
7356 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7357 "Ambiguous use of %c resolved as operator %c",
7358 lastchar, lastchar);
748a9306 7359 }
93a17b20 7360 TOKEN(WORD);
79072805 7361 }
79072805 7362
68dc0745 7363 case KEY___FILE__:
7eb971ee 7364 FUN0OP(
14f0f125 7365 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7366 );
46fc3d4c 7367
79072805 7368 case KEY___LINE__:
7eb971ee
FC
7369 FUN0OP(
7370 (OP*)newSVOP(OP_CONST, 0,
7371 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7372 );
68dc0745 7373
7374 case KEY___PACKAGE__:
7eb971ee
FC
7375 FUN0OP(
7376 (OP*)newSVOP(OP_CONST, 0,
3280af22 7377 (PL_curstash
5aaec2b4 7378 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7379 : &PL_sv_undef))
7380 );
79072805 7381
e50aee73 7382 case KEY___DATA__:
79072805
LW
7383 case KEY___END__: {
7384 GV *gv;
3280af22 7385 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7386 const char *pname = "main";
affc13fc
FC
7387 STRLEN plen = 4;
7388 U32 putf8 = 0;
3280af22 7389 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7390 {
7391 HV * const stash =
7392 PL_curstash ? PL_curstash : PL_defstash;
7393 pname = HvNAME_get(stash);
7394 plen = HvNAMELEN (stash);
7395 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7396 }
7397 gv = gv_fetchpvn_flags(
7398 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7399 plen+6, GV_ADD|putf8, SVt_PVIO
7400 );
a5f75d66 7401 GvMULTI_on(gv);
79072805 7402 if (!GvIO(gv))
a0d0e21e 7403 GvIOp(gv) = newIO();
3280af22 7404 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7405#if defined(HAS_FCNTL) && defined(F_SETFD)
7406 {
f54cb97a 7407 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7408 fcntl(fd,F_SETFD,fd >= 3);
7409 }
79072805 7410#endif
fd049845 7411 /* Mark this internal pseudo-handle as clean */
7412 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7413 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7414 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7415 else
50952442 7416 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7417#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7418 /* if the script was opened in binmode, we need to revert
53129d29 7419 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7420 * XXX this is a questionable hack at best. */
53129d29
GS
7421 if (PL_bufend-PL_bufptr > 2
7422 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7423 {
7424 Off_t loc = 0;
50952442 7425 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7426 loc = PerlIO_tell(PL_rsfp);
7427 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7428 }
2986a63f
JH
7429#ifdef NETWARE
7430 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7431#else
c39cd008 7432 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7433#endif /* NETWARE */
c39cd008
GS
7434 if (loc > 0)
7435 PerlIO_seek(PL_rsfp, loc, 0);
7436 }
7437 }
7438#endif
7948272d 7439#ifdef PERLIO_LAYERS
52d2e0f4
JH
7440 if (!IN_BYTES) {
7441 if (UTF)
7442 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7443 else if (PL_encoding) {
7444 SV *name;
7445 dSP;
7446 ENTER;
7447 SAVETMPS;
7448 PUSHMARK(sp);
7449 EXTEND(SP, 1);
7450 XPUSHs(PL_encoding);
7451 PUTBACK;
7452 call_method("name", G_SCALAR);
7453 SPAGAIN;
7454 name = POPs;
7455 PUTBACK;
bfed75c6 7456 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7457 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7458 SVfARG(name)));
52d2e0f4
JH
7459 FREETMPS;
7460 LEAVE;
7461 }
7462 }
7948272d 7463#endif
5db06880
NC
7464#ifdef PERL_MAD
7465 if (PL_madskills) {
cd81e915
NC
7466 if (PL_realtokenstart >= 0) {
7467 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7468 if (!PL_endwhite)
6b29d1f5 7469 PL_endwhite = newSVpvs("");
cd81e915
NC
7470 sv_catsv(PL_endwhite, PL_thiswhite);
7471 PL_thiswhite = 0;
7472 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7473 PL_realtokenstart = -1;
5db06880 7474 }
5cc814fd
NC
7475 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7476 != NULL) ;
5db06880
NC
7477 }
7478#endif
4608196e 7479 PL_rsfp = NULL;
79072805
LW
7480 }
7481 goto fake_eof;
e929a76b 7482 }
de3bb511 7483
84ed0108 7484 case KEY___SUB__:
1a35f9ff 7485 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7486
8990e307 7487 case KEY_AUTOLOAD:
ed6116ce 7488 case KEY_DESTROY:
79072805 7489 case KEY_BEGIN:
3c10abe3 7490 case KEY_UNITCHECK:
7d30b5c4 7491 case KEY_CHECK:
7d07dbc2 7492 case KEY_INIT:
7d30b5c4 7493 case KEY_END:
3280af22
NIS
7494 if (PL_expect == XSTATE) {
7495 s = PL_bufptr;
93a17b20 7496 goto really_sub;
79072805
LW
7497 }
7498 goto just_a_word;
7499
a0d0e21e
LW
7500 case KEY_CORE:
7501 if (*s == ':' && s[1] == ':') {
ee36fb64 7502 STRLEN olen = len;
748a9306 7503 d = s;
ee36fb64 7504 s += 2;
3280af22 7505 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
ee36fb64
FC
7506 if ((*s == ':' && s[1] == ':')
7507 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7508 {
7509 s = d;
7510 len = olen;
7511 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7512 goto just_a_word;
7513 }
7514 if (!tmp)
3773592b
BF
7515 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7516 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7517 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
a0d0e21e
LW
7518 if (tmp < 0)
7519 tmp = -tmp;
d67594ff
FC
7520 else if (tmp == KEY_require || tmp == KEY_do
7521 || tmp == KEY_glob)
a72a1c8b 7522 /* that's a way to remember we saw "CORE::" */
850e8516 7523 orig_keyword = tmp;
a0d0e21e
LW
7524 goto reserved_word;
7525 }
7526 goto just_a_word;
7527
463ee0b2
LW
7528 case KEY_abs:
7529 UNI(OP_ABS);
7530
79072805
LW
7531 case KEY_alarm:
7532 UNI(OP_ALARM);
7533
7534 case KEY_accept:
a0d0e21e 7535 LOP(OP_ACCEPT,XTERM);
79072805 7536
463ee0b2 7537 case KEY_and:
78cdf107
Z
7538 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7539 return REPORT(0);
463ee0b2
LW
7540 OPERATOR(ANDOP);
7541
79072805 7542 case KEY_atan2:
a0d0e21e 7543 LOP(OP_ATAN2,XTERM);
85e6fe83 7544
79072805 7545 case KEY_bind:
a0d0e21e 7546 LOP(OP_BIND,XTERM);
79072805
LW
7547
7548 case KEY_binmode:
1c1fc3ea 7549 LOP(OP_BINMODE,XTERM);
79072805
LW
7550
7551 case KEY_bless:
a0d0e21e 7552 LOP(OP_BLESS,XTERM);
79072805 7553
0d863452
RH
7554 case KEY_break:
7555 FUN0(OP_BREAK);
7556
79072805
LW
7557 case KEY_chop:
7558 UNI(OP_CHOP);
7559
7560 case KEY_continue:
0d863452
RH
7561 /* We have to disambiguate the two senses of
7562 "continue". If the next token is a '{' then
7563 treat it as the start of a continue block;
7564 otherwise treat it as a control operator.
7565 */
7566 s = skipspace(s);
7567 if (*s == '{')
79072805 7568 PREBLOCK(CONTINUE);
0d863452
RH
7569 else
7570 FUN0(OP_CONTINUE);
79072805
LW
7571
7572 case KEY_chdir:
fafc274c
NC
7573 /* may use HOME */
7574 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7575 UNI(OP_CHDIR);
7576
7577 case KEY_close:
7578 UNI(OP_CLOSE);
7579
7580 case KEY_closedir:
7581 UNI(OP_CLOSEDIR);
7582
7583 case KEY_cmp:
78cdf107
Z
7584 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7585 return REPORT(0);
79072805
LW
7586 Eop(OP_SCMP);
7587
7588 case KEY_caller:
7589 UNI(OP_CALLER);
7590
7591 case KEY_crypt:
7592#ifdef FCRYPT
f4c556ac
GS
7593 if (!PL_cryptseen) {
7594 PL_cryptseen = TRUE;
de3bb511 7595 init_des();
f4c556ac 7596 }
a687059c 7597#endif
a0d0e21e 7598 LOP(OP_CRYPT,XTERM);
79072805
LW
7599
7600 case KEY_chmod:
a0d0e21e 7601 LOP(OP_CHMOD,XTERM);
79072805
LW
7602
7603 case KEY_chown:
a0d0e21e 7604 LOP(OP_CHOWN,XTERM);
79072805
LW
7605
7606 case KEY_connect:
a0d0e21e 7607 LOP(OP_CONNECT,XTERM);
79072805 7608
463ee0b2
LW
7609 case KEY_chr:
7610 UNI(OP_CHR);
7611
79072805
LW
7612 case KEY_cos:
7613 UNI(OP_COS);
7614
7615 case KEY_chroot:
7616 UNI(OP_CHROOT);
7617
0d863452
RH
7618 case KEY_default:
7619 PREBLOCK(DEFAULT);
7620
79072805 7621 case KEY_do:
29595ff2 7622 s = SKIPSPACE1(s);
79072805 7623 if (*s == '{')
a0d0e21e 7624 PRETERMBLOCK(DO);
c2900bb8 7625 if (*s != '\'') {
4b473a5a
FC
7626 *PL_tokenbuf = '&';
7627 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7628 1, &len);
7629 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
c2900bb8 7630 d = SKIPSPACE1(d);
4b473a5a 7631 if (*d == '(') {
60ac52eb 7632 force_ident_maybe_lex('&');
4b473a5a
FC
7633 s = d;
7634 }
c2900bb8
FC
7635 }
7636 }
850e8516
RGS
7637 if (orig_keyword == KEY_do) {
7638 orig_keyword = 0;
6154021b 7639 pl_yylval.ival = 1;
850e8516
RGS
7640 }
7641 else
6154021b 7642 pl_yylval.ival = 0;
378cc40b 7643 OPERATOR(DO);
79072805
LW
7644
7645 case KEY_die:
3280af22 7646 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7647 LOP(OP_DIE,XTERM);
79072805
LW
7648
7649 case KEY_defined:
7650 UNI(OP_DEFINED);
7651
7652 case KEY_delete:
a0d0e21e 7653 UNI(OP_DELETE);
79072805
LW
7654
7655 case KEY_dbmopen:
74e8ce34
NC
7656 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7657 STR_WITH_LEN("NDBM_File::"),
7658 STR_WITH_LEN("DB_File::"),
7659 STR_WITH_LEN("GDBM_File::"),
7660 STR_WITH_LEN("SDBM_File::"),
7661 STR_WITH_LEN("ODBM_File::"),
7662 NULL);
a0d0e21e 7663 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7664
7665 case KEY_dbmclose:
7666 UNI(OP_DBMCLOSE);
7667
7668 case KEY_dump:
c31f6d3b 7669 PL_expect = XOPERATOR;
a0d0e21e 7670 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7671 LOOPX(OP_DUMP);
7672
7673 case KEY_else:
7674 PREBLOCK(ELSE);
7675
7676 case KEY_elsif:
6154021b 7677 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7678 OPERATOR(ELSIF);
7679
7680 case KEY_eq:
78cdf107
Z
7681 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7682 return REPORT(0);
79072805
LW
7683 Eop(OP_SEQ);
7684
a0d0e21e
LW
7685 case KEY_exists:
7686 UNI(OP_EXISTS);
4e553d73 7687
79072805 7688 case KEY_exit:
5db06880
NC
7689 if (PL_madskills)
7690 UNI(OP_INT);
79072805
LW
7691 UNI(OP_EXIT);
7692
7693 case KEY_eval:
29595ff2 7694 s = SKIPSPACE1(s);
32e2a35d
RGS
7695 if (*s == '{') { /* block eval */
7696 PL_expect = XTERMBLOCK;
7697 UNIBRACK(OP_ENTERTRY);
7698 }
7699 else { /* string eval */
7700 PL_expect = XTERM;
7701 UNIBRACK(OP_ENTEREVAL);
7702 }
79072805 7703
7d789282
FC
7704 case KEY_evalbytes:
7705 PL_expect = XTERM;
7706 UNIBRACK(-OP_ENTEREVAL);
7707
79072805
LW
7708 case KEY_eof:
7709 UNI(OP_EOF);
7710
7711 case KEY_exp:
7712 UNI(OP_EXP);
7713
7714 case KEY_each:
7715 UNI(OP_EACH);
7716
7717 case KEY_exec:
a0d0e21e 7718 LOP(OP_EXEC,XREF);
79072805
LW
7719
7720 case KEY_endhostent:
7721 FUN0(OP_EHOSTENT);
7722
7723 case KEY_endnetent:
7724 FUN0(OP_ENETENT);
7725
7726 case KEY_endservent:
7727 FUN0(OP_ESERVENT);
7728
7729 case KEY_endprotoent:
7730 FUN0(OP_EPROTOENT);
7731
7732 case KEY_endpwent:
7733 FUN0(OP_EPWENT);
7734
7735 case KEY_endgrent:
7736 FUN0(OP_EGRENT);
7737
7738 case KEY_for:
7739 case KEY_foreach:
78cdf107
Z
7740 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7741 return REPORT(0);
6154021b 7742 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7743 s = SKIPSPACE1(s);
7e2040f0 7744 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7745 char *p = s;
5db06880
NC
7746#ifdef PERL_MAD
7747 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7748#endif
7749
3280af22 7750 if ((PL_bufend - p) >= 3 &&
55497cff 7751 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7752 p += 2;
77ca0c92
LW
7753 else if ((PL_bufend - p) >= 4 &&
7754 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7755 p += 3;
29595ff2 7756 p = PEEKSPACE(p);
7e2040f0 7757 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7758 p = scan_ident(p, PL_bufend,
7759 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7760 p = PEEKSPACE(p);
77ca0c92
LW
7761 }
7762 if (*p != '$')
cea2e8a9 7763 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7764#ifdef PERL_MAD
7765 s = SvPVX(PL_linestr) + soff;
7766#endif
55497cff 7767 }
79072805
LW
7768 OPERATOR(FOR);
7769
7770 case KEY_formline:
a0d0e21e 7771 LOP(OP_FORMLINE,XTERM);
79072805
LW
7772
7773 case KEY_fork:
7774 FUN0(OP_FORK);
7775
838f2281
BF
7776 case KEY_fc:
7777 UNI(OP_FC);
7778
79072805 7779 case KEY_fcntl:
a0d0e21e 7780 LOP(OP_FCNTL,XTERM);
79072805
LW
7781
7782 case KEY_fileno:
7783 UNI(OP_FILENO);
7784
7785 case KEY_flock:
a0d0e21e 7786 LOP(OP_FLOCK,XTERM);
79072805
LW
7787
7788 case KEY_gt:
78cdf107
Z
7789 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7790 return REPORT(0);
79072805
LW
7791 Rop(OP_SGT);
7792
7793 case KEY_ge:
78cdf107
Z
7794 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7795 return REPORT(0);
79072805
LW
7796 Rop(OP_SGE);
7797
7798 case KEY_grep:
2c38e13d 7799 LOP(OP_GREPSTART, XREF);
79072805
LW
7800
7801 case KEY_goto:
c31f6d3b 7802 PL_expect = XOPERATOR;
a0d0e21e 7803 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7804 LOOPX(OP_GOTO);
7805
7806 case KEY_gmtime:
7807 UNI(OP_GMTIME);
7808
7809 case KEY_getc:
6f33ba73 7810 UNIDOR(OP_GETC);
79072805
LW
7811
7812 case KEY_getppid:
7813 FUN0(OP_GETPPID);
7814
7815 case KEY_getpgrp:
7816 UNI(OP_GETPGRP);
7817
7818 case KEY_getpriority:
a0d0e21e 7819 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7820
7821 case KEY_getprotobyname:
7822 UNI(OP_GPBYNAME);
7823
7824 case KEY_getprotobynumber:
a0d0e21e 7825 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7826
7827 case KEY_getprotoent:
7828 FUN0(OP_GPROTOENT);
7829
7830 case KEY_getpwent:
7831 FUN0(OP_GPWENT);
7832
7833 case KEY_getpwnam:
ff68c719 7834 UNI(OP_GPWNAM);
79072805
LW
7835
7836 case KEY_getpwuid:
ff68c719 7837 UNI(OP_GPWUID);
79072805
LW
7838
7839 case KEY_getpeername:
7840 UNI(OP_GETPEERNAME);
7841
7842 case KEY_gethostbyname:
7843 UNI(OP_GHBYNAME);
7844
7845 case KEY_gethostbyaddr:
a0d0e21e 7846 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7847
7848 case KEY_gethostent:
7849 FUN0(OP_GHOSTENT);
7850
7851 case KEY_getnetbyname:
7852 UNI(OP_GNBYNAME);
7853
7854 case KEY_getnetbyaddr:
a0d0e21e 7855 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7856
7857 case KEY_getnetent:
7858 FUN0(OP_GNETENT);
7859
7860 case KEY_getservbyname:
a0d0e21e 7861 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7862
7863 case KEY_getservbyport:
a0d0e21e 7864 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7865
7866 case KEY_getservent:
7867 FUN0(OP_GSERVENT);
7868
7869 case KEY_getsockname:
7870 UNI(OP_GETSOCKNAME);
7871
7872 case KEY_getsockopt:
a0d0e21e 7873 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7874
7875 case KEY_getgrent:
7876 FUN0(OP_GGRENT);
7877
7878 case KEY_getgrnam:
ff68c719 7879 UNI(OP_GGRNAM);
79072805
LW
7880
7881 case KEY_getgrgid:
ff68c719 7882 UNI(OP_GGRGID);
79072805
LW
7883
7884 case KEY_getlogin:
7885 FUN0(OP_GETLOGIN);
7886
0d863452 7887 case KEY_given:
6154021b 7888 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7889 OPERATOR(GIVEN);
7890
93a17b20 7891 case KEY_glob:
d67594ff
FC
7892 LOP(
7893 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7894 XTERM
7895 );
93a17b20 7896
79072805
LW
7897 case KEY_hex:
7898 UNI(OP_HEX);
7899
7900 case KEY_if:
78cdf107
Z
7901 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7902 return REPORT(0);
6154021b 7903 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7904 OPERATOR(IF);
7905
7906 case KEY_index:
a0d0e21e 7907 LOP(OP_INDEX,XTERM);
79072805
LW
7908
7909 case KEY_int:
7910 UNI(OP_INT);
7911
7912 case KEY_ioctl:
a0d0e21e 7913 LOP(OP_IOCTL,XTERM);
79072805
LW
7914
7915 case KEY_join:
a0d0e21e 7916 LOP(OP_JOIN,XTERM);
79072805
LW
7917
7918 case KEY_keys:
7919 UNI(OP_KEYS);
7920
7921 case KEY_kill:
a0d0e21e 7922 LOP(OP_KILL,XTERM);
79072805
LW
7923
7924 case KEY_last:
c31f6d3b 7925 PL_expect = XOPERATOR;
a0d0e21e 7926 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7927 LOOPX(OP_LAST);
4e553d73 7928
79072805
LW
7929 case KEY_lc:
7930 UNI(OP_LC);
7931
7932 case KEY_lcfirst:
7933 UNI(OP_LCFIRST);
7934
7935 case KEY_local:
6154021b 7936 pl_yylval.ival = 0;
79072805
LW
7937 OPERATOR(LOCAL);
7938
7939 case KEY_length:
7940 UNI(OP_LENGTH);
7941
7942 case KEY_lt:
78cdf107
Z
7943 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7944 return REPORT(0);
79072805
LW
7945 Rop(OP_SLT);
7946
7947 case KEY_le:
78cdf107
Z
7948 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7949 return REPORT(0);
79072805
LW
7950 Rop(OP_SLE);
7951
7952 case KEY_localtime:
7953 UNI(OP_LOCALTIME);
7954
7955 case KEY_log:
7956 UNI(OP_LOG);
7957
7958 case KEY_link:
a0d0e21e 7959 LOP(OP_LINK,XTERM);
79072805
LW
7960
7961 case KEY_listen:
a0d0e21e 7962 LOP(OP_LISTEN,XTERM);
79072805 7963
c0329465
MB
7964 case KEY_lock:
7965 UNI(OP_LOCK);
7966
79072805
LW
7967 case KEY_lstat:
7968 UNI(OP_LSTAT);
7969
7970 case KEY_m:
8782bef2 7971 s = scan_pat(s,OP_MATCH);
79072805
LW
7972 TERM(sublex_start());
7973
a0d0e21e 7974 case KEY_map:
2c38e13d 7975 LOP(OP_MAPSTART, XREF);
4e4e412b 7976
79072805 7977 case KEY_mkdir:
a0d0e21e 7978 LOP(OP_MKDIR,XTERM);
79072805
LW
7979
7980 case KEY_msgctl:
a0d0e21e 7981 LOP(OP_MSGCTL,XTERM);
79072805
LW
7982
7983 case KEY_msgget:
a0d0e21e 7984 LOP(OP_MSGGET,XTERM);
79072805
LW
7985
7986 case KEY_msgrcv:
a0d0e21e 7987 LOP(OP_MSGRCV,XTERM);
79072805
LW
7988
7989 case KEY_msgsnd:
a0d0e21e 7990 LOP(OP_MSGSND,XTERM);
79072805 7991
77ca0c92 7992 case KEY_our:
93a17b20 7993 case KEY_my:
952306ac 7994 case KEY_state:
eac04b2e 7995 PL_in_my = (U16)tmp;
29595ff2 7996 s = SKIPSPACE1(s);
7e2040f0 7997 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7998#ifdef PERL_MAD
7999 char* start = s;
8000#endif
3280af22 8001 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843 8002 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
e7d0b801
FC
8003 {
8004 if (!FEATURE_LEXSUBS_IS_ENABLED)
8005 Perl_croak(aTHX_
8006 "Experimental \"%s\" subs not enabled",
8007 tmp == KEY_my ? "my" :
8008 tmp == KEY_state ? "state" : "our");
09bef843 8009 goto really_sub;
e7d0b801 8010 }
def3634b 8011 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 8012 if (!PL_in_my_stash) {
c750a3ec 8013 char tmpbuf[1024];
3280af22 8014 PL_bufptr = s;
d9fad198 8015 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
3c54b17a 8016 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
c750a3ec 8017 }
5db06880
NC
8018#ifdef PERL_MAD
8019 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
8020 sv_catsv(PL_thistoken, PL_nextwhite);
8021 PL_nextwhite = 0;
8022 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
8023 }
8024#endif
c750a3ec 8025 }
6154021b 8026 pl_yylval.ival = 1;
55497cff 8027 OPERATOR(MY);
93a17b20 8028
79072805 8029 case KEY_next:
c31f6d3b 8030 PL_expect = XOPERATOR;
a0d0e21e 8031 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8032 LOOPX(OP_NEXT);
8033
8034 case KEY_ne:
78cdf107
Z
8035 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
8036 return REPORT(0);
79072805
LW
8037 Eop(OP_SNE);
8038
a0d0e21e 8039 case KEY_no:
468aa647 8040 s = tokenize_use(0, s);
52d0e95b 8041 TERM(USE);
a0d0e21e
LW
8042
8043 case KEY_not:
29595ff2 8044 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 8045 FUN1(OP_NOT);
78cdf107
Z
8046 else {
8047 if (!PL_lex_allbrackets &&
8048 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
8049 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 8050 OPERATOR(NOTOP);
78cdf107 8051 }
a0d0e21e 8052
79072805 8053 case KEY_open:
29595ff2 8054 s = SKIPSPACE1(s);
7e2040f0 8055 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 8056 const char *t;
71aa9713
BF
8057 for (d = s; isALNUM_lazy_if(d,UTF);) {
8058 d += UTF ? UTF8SKIP(d) : 1;
8059 if (UTF) {
8060 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
8061 d += UTF ? UTF8SKIP(d) : 1;
8062 }
8063 }
8064 }
c35e046a
AL
8065 for (t=d; isSPACE(*t);)
8066 t++;
e2ab214b 8067 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
8068 /* [perl #16184] */
8069 && !(t[0] == '=' && t[1] == '>')
db3abe52 8070 && !(t[0] == ':' && t[1] == ':')
240d1b6f 8071 && !keyword(s, d-s, 0)
66fbe8fb 8072 ) {
71aa9713
BF
8073 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
8074 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
9014280d 8075 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
71aa9713
BF
8076 "Precedence problem: open %"SVf" should be open(%"SVf")",
8077 SVfARG(tmpsv), SVfARG(tmpsv));
66fbe8fb 8078 }
93a17b20 8079 }
a0d0e21e 8080 LOP(OP_OPEN,XTERM);
79072805 8081
463ee0b2 8082 case KEY_or:
78cdf107
Z
8083 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8084 return REPORT(0);
6154021b 8085 pl_yylval.ival = OP_OR;
463ee0b2
LW
8086 OPERATOR(OROP);
8087
79072805
LW
8088 case KEY_ord:
8089 UNI(OP_ORD);
8090
8091 case KEY_oct:
8092 UNI(OP_OCT);
8093
8094 case KEY_opendir:
a0d0e21e 8095 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
8096
8097 case KEY_print:
3280af22 8098 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8099 LOP(OP_PRINT,XREF);
79072805
LW
8100
8101 case KEY_printf:
3280af22 8102 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 8103 LOP(OP_PRTF,XREF);
79072805 8104
c07a80fd 8105 case KEY_prototype:
8106 UNI(OP_PROTOTYPE);
8107
79072805 8108 case KEY_push:
a0d0e21e 8109 LOP(OP_PUSH,XTERM);
79072805
LW
8110
8111 case KEY_pop:
6f33ba73 8112 UNIDOR(OP_POP);
79072805 8113
a0d0e21e 8114 case KEY_pos:
6f33ba73 8115 UNIDOR(OP_POS);
4e553d73 8116
79072805 8117 case KEY_pack:
a0d0e21e 8118 LOP(OP_PACK,XTERM);
79072805
LW
8119
8120 case KEY_package:
a0d0e21e 8121 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 8122 s = SKIPSPACE1(s);
91152fc1 8123 s = force_strict_version(s);
4e4da3ac 8124 PL_lex_expect = XBLOCK;
79072805
LW
8125 OPERATOR(PACKAGE);
8126
8127 case KEY_pipe:
a0d0e21e 8128 LOP(OP_PIPE_OP,XTERM);
79072805
LW
8129
8130 case KEY_q:
d24ca0c5 8131 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 8132 if (!s)
d4c19fe8 8133 missingterm(NULL);
6154021b 8134 pl_yylval.ival = OP_CONST;
79072805
LW
8135 TERM(sublex_start());
8136
a0d0e21e
LW
8137 case KEY_quotemeta:
8138 UNI(OP_QUOTEMETA);
8139
ea25a9b2
Z
8140 case KEY_qw: {
8141 OP *words = NULL;
d24ca0c5 8142 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8990e307 8143 if (!s)
d4c19fe8 8144 missingterm(NULL);
3480a8d2 8145 PL_expect = XOPERATOR;
8127e0e3 8146 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
8147 int warned_comma = !ckWARN(WARN_QW);
8148 int warned_comment = warned_comma;
3280af22 8149 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 8150 while (len) {
d4c19fe8
AL
8151 for (; isSPACE(*d) && len; --len, ++d)
8152 /**/;
8127e0e3 8153 if (len) {
d4c19fe8 8154 SV *sv;
f54cb97a 8155 const char *b = d;
7e03b518 8156 if (!warned_comma || !warned_comment) {
8127e0e3 8157 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 8158 if (!warned_comma && *d == ',') {
9014280d 8159 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8160 "Possible attempt to separate words with commas");
7e03b518 8161 ++warned_comma;
8127e0e3 8162 }
7e03b518 8163 else if (!warned_comment && *d == '#') {
9014280d 8164 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8165 "Possible attempt to put comments in qw() list");
7e03b518 8166 ++warned_comment;
8127e0e3
GS
8167 }
8168 }
8169 }
8170 else {
d4c19fe8
AL
8171 for (; !isSPACE(*d) && len; --len, ++d)
8172 /**/;
8127e0e3 8173 }
740cce10 8174 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 8175 words = op_append_elem(OP_LIST, words,
7948272d 8176 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 8177 }
8178 }
8179 }
ea25a9b2
Z
8180 if (!words)
8181 words = newNULLLIST();
37fd879b 8182 if (PL_lex_stuff) {
8127e0e3 8183 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 8184 PL_lex_stuff = NULL;
37fd879b 8185 }
ea25a9b2
Z
8186 PL_expect = XOPERATOR;
8187 pl_yylval.opval = sawparens(words);
8188 TOKEN(QWLIST);
8189 }
8990e307 8190
79072805 8191 case KEY_qq:
d24ca0c5 8192 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 8193 if (!s)
d4c19fe8 8194 missingterm(NULL);
6154021b 8195 pl_yylval.ival = OP_STRINGIFY;
3280af22 8196 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 8197 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
8198 TERM(sublex_start());
8199
8782bef2
GB
8200 case KEY_qr:
8201 s = scan_pat(s,OP_QR);
8202 TERM(sublex_start());
8203
79072805 8204 case KEY_qx:
d24ca0c5 8205 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 8206 if (!s)
d4c19fe8 8207 missingterm(NULL);
9b201d7d 8208 readpipe_override();
79072805
LW
8209 TERM(sublex_start());
8210
8211 case KEY_return:
8212 OLDLOP(OP_RETURN);
8213
8214 case KEY_require:
29595ff2 8215 s = SKIPSPACE1(s);
c31f6d3b 8216 PL_expect = XOPERATOR;
e759cc13
RGS
8217 if (isDIGIT(*s)) {
8218 s = force_version(s, FALSE);
a7cb1f99 8219 }
e759cc13
RGS
8220 else if (*s != 'v' || !isDIGIT(s[1])
8221 || (s = force_version(s, TRUE), *s == 'v'))
8222 {
a7cb1f99
GS
8223 *PL_tokenbuf = '\0';
8224 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 8225 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
8226 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8227 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
8228 else if (*s == '<')
8229 yyerror("<> should be quotes");
8230 }
a72a1c8b
RGS
8231 if (orig_keyword == KEY_require) {
8232 orig_keyword = 0;
6154021b 8233 pl_yylval.ival = 1;
a72a1c8b
RGS
8234 }
8235 else
6154021b 8236 pl_yylval.ival = 0;
a72a1c8b
RGS
8237 PL_expect = XTERM;
8238 PL_bufptr = s;
8239 PL_last_uni = PL_oldbufptr;
8240 PL_last_lop_op = OP_REQUIRE;
8241 s = skipspace(s);
8242 return REPORT( (int)REQUIRE );
79072805
LW
8243
8244 case KEY_reset:
8245 UNI(OP_RESET);
8246
8247 case KEY_redo:
c31f6d3b 8248 PL_expect = XOPERATOR;
a0d0e21e 8249 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8250 LOOPX(OP_REDO);
8251
8252 case KEY_rename:
a0d0e21e 8253 LOP(OP_RENAME,XTERM);
79072805
LW
8254
8255 case KEY_rand:
8256 UNI(OP_RAND);
8257
8258 case KEY_rmdir:
8259 UNI(OP_RMDIR);
8260
8261 case KEY_rindex:
a0d0e21e 8262 LOP(OP_RINDEX,XTERM);
79072805
LW
8263
8264 case KEY_read:
a0d0e21e 8265 LOP(OP_READ,XTERM);
79072805
LW
8266
8267 case KEY_readdir:
8268 UNI(OP_READDIR);
8269
93a17b20 8270 case KEY_readline:
6f33ba73 8271 UNIDOR(OP_READLINE);
93a17b20
LW
8272
8273 case KEY_readpipe:
0858480c 8274 UNIDOR(OP_BACKTICK);
93a17b20 8275
79072805
LW
8276 case KEY_rewinddir:
8277 UNI(OP_REWINDDIR);
8278
8279 case KEY_recv:
a0d0e21e 8280 LOP(OP_RECV,XTERM);
79072805
LW
8281
8282 case KEY_reverse:
a0d0e21e 8283 LOP(OP_REVERSE,XTERM);
79072805
LW
8284
8285 case KEY_readlink:
6f33ba73 8286 UNIDOR(OP_READLINK);
79072805
LW
8287
8288 case KEY_ref:
8289 UNI(OP_REF);
8290
8291 case KEY_s:
8292 s = scan_subst(s);
6154021b 8293 if (pl_yylval.opval)
79072805
LW
8294 TERM(sublex_start());
8295 else
8296 TOKEN(1); /* force error */
8297
0d863452
RH
8298 case KEY_say:
8299 checkcomma(s,PL_tokenbuf,"filehandle");
8300 LOP(OP_SAY,XREF);
8301
a0d0e21e
LW
8302 case KEY_chomp:
8303 UNI(OP_CHOMP);
4e553d73 8304
79072805
LW
8305 case KEY_scalar:
8306 UNI(OP_SCALAR);
8307
8308 case KEY_select:
a0d0e21e 8309 LOP(OP_SELECT,XTERM);
79072805
LW
8310
8311 case KEY_seek:
a0d0e21e 8312 LOP(OP_SEEK,XTERM);
79072805
LW
8313
8314 case KEY_semctl:
a0d0e21e 8315 LOP(OP_SEMCTL,XTERM);
79072805
LW
8316
8317 case KEY_semget:
a0d0e21e 8318 LOP(OP_SEMGET,XTERM);
79072805
LW
8319
8320 case KEY_semop:
a0d0e21e 8321 LOP(OP_SEMOP,XTERM);
79072805
LW
8322
8323 case KEY_send:
a0d0e21e 8324 LOP(OP_SEND,XTERM);
79072805
LW
8325
8326 case KEY_setpgrp:
a0d0e21e 8327 LOP(OP_SETPGRP,XTERM);
79072805
LW
8328
8329 case KEY_setpriority:
a0d0e21e 8330 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
8331
8332 case KEY_sethostent:
ff68c719 8333 UNI(OP_SHOSTENT);
79072805
LW
8334
8335 case KEY_setnetent:
ff68c719 8336 UNI(OP_SNETENT);
79072805
LW
8337
8338 case KEY_setservent:
ff68c719 8339 UNI(OP_SSERVENT);
79072805
LW
8340
8341 case KEY_setprotoent:
ff68c719 8342 UNI(OP_SPROTOENT);
79072805
LW
8343
8344 case KEY_setpwent:
8345 FUN0(OP_SPWENT);
8346
8347 case KEY_setgrent:
8348 FUN0(OP_SGRENT);
8349
8350 case KEY_seekdir:
a0d0e21e 8351 LOP(OP_SEEKDIR,XTERM);
79072805
LW
8352
8353 case KEY_setsockopt:
a0d0e21e 8354 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
8355
8356 case KEY_shift:
6f33ba73 8357 UNIDOR(OP_SHIFT);
79072805
LW
8358
8359 case KEY_shmctl:
a0d0e21e 8360 LOP(OP_SHMCTL,XTERM);
79072805
LW
8361
8362 case KEY_shmget:
a0d0e21e 8363 LOP(OP_SHMGET,XTERM);
79072805
LW
8364
8365 case KEY_shmread:
a0d0e21e 8366 LOP(OP_SHMREAD,XTERM);
79072805
LW
8367
8368 case KEY_shmwrite:
a0d0e21e 8369 LOP(OP_SHMWRITE,XTERM);
79072805
LW
8370
8371 case KEY_shutdown:
a0d0e21e 8372 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8373
8374 case KEY_sin:
8375 UNI(OP_SIN);
8376
8377 case KEY_sleep:
8378 UNI(OP_SLEEP);
8379
8380 case KEY_socket:
a0d0e21e 8381 LOP(OP_SOCKET,XTERM);
79072805
LW
8382
8383 case KEY_socketpair:
a0d0e21e 8384 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8385
8386 case KEY_sort:
3280af22 8387 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8388 s = SKIPSPACE1(s);
3280af22 8389 PL_expect = XTERM;
15f0808c 8390 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 8391 LOP(OP_SORT,XREF);
79072805
LW
8392
8393 case KEY_split:
a0d0e21e 8394 LOP(OP_SPLIT,XTERM);
79072805
LW
8395
8396 case KEY_sprintf:
a0d0e21e 8397 LOP(OP_SPRINTF,XTERM);
79072805
LW
8398
8399 case KEY_splice:
a0d0e21e 8400 LOP(OP_SPLICE,XTERM);
79072805
LW
8401
8402 case KEY_sqrt:
8403 UNI(OP_SQRT);
8404
8405 case KEY_srand:
8406 UNI(OP_SRAND);
8407
8408 case KEY_stat:
8409 UNI(OP_STAT);
8410
8411 case KEY_study:
79072805
LW
8412 UNI(OP_STUDY);
8413
8414 case KEY_substr:
a0d0e21e 8415 LOP(OP_SUBSTR,XTERM);
79072805
LW
8416
8417 case KEY_format:
8418 case KEY_sub:
93a17b20 8419 really_sub:
09bef843 8420 {
24b6ef70 8421 char * const tmpbuf = PL_tokenbuf + 1;
9c5ffd7c 8422 SSize_t tboffset = 0;
09bef843 8423 expectation attrful;
28cc6278 8424 bool have_name, have_proto;
f54cb97a 8425 const int key = tmp;
09bef843 8426
5db06880
NC
8427#ifdef PERL_MAD
8428 SV *tmpwhite = 0;
8429
cd81e915 8430 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
af9f5953 8431 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
cd81e915 8432 PL_thistoken = 0;
5db06880
NC
8433
8434 d = s;
8435 s = SKIPSPACE2(s,tmpwhite);
8436#else
8767b1ab 8437 d = s;
09bef843 8438 s = skipspace(s);
5db06880 8439#endif
09bef843 8440
7e2040f0 8441 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8442 (*s == ':' && s[1] == ':'))
8443 {
5db06880 8444#ifdef PERL_MAD
4f61fd4b 8445 SV *nametoke = NULL;
5db06880
NC
8446#endif
8447
09bef843
SB
8448 PL_expect = XBLOCK;
8449 attrful = XATTRBLOCK;
b1b65b59
JH
8450 /* remember buffer pos'n for later force_word */
8451 tboffset = s - PL_oldbufptr;
24b6ef70
FC
8452 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8453 &len);
5db06880
NC
8454#ifdef PERL_MAD
8455 if (PL_madskills)
af9f5953 8456 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8457#endif
689aac7b
FC
8458 *PL_tokenbuf = '&';
8459 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8460 || pad_findmy_pvn(
8461 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8462 ) != NOT_IN_PAD)
6502358f 8463 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8464 else {
8465 sv_setsv(PL_subname,PL_curstname);
396482e1 8466 sv_catpvs(PL_subname,"::");
09bef843
SB
8467 sv_catpvn(PL_subname,tmpbuf,len);
8468 }
af9f5953
BF
8469 if (SvUTF8(PL_linestr))
8470 SvUTF8_on(PL_subname);
09bef843 8471 have_name = TRUE;
5db06880 8472
60ac52eb 8473
5db06880 8474#ifdef PERL_MAD
60ac52eb
FC
8475 start_force(0);
8476 CURMAD('X', nametoke);
8477 CURMAD('_', tmpwhite);
4210d3f1 8478 force_ident_maybe_lex('&');
5db06880
NC
8479
8480 s = SKIPSPACE2(d,tmpwhite);
8481#else
8482 s = skipspace(d);
8483#endif
09bef843 8484 }
463ee0b2 8485 else {
8767b1ab
FC
8486 if (key == KEY_my || key == KEY_our || key==KEY_state)
8487 {
8488 *d = '\0';
8489 /* diag_listed_as: Missing name in "%s sub" */
8490 Perl_croak(aTHX_
8491 "Missing name in \"%s\"", PL_bufptr);
8492 }
09bef843
SB
8493 PL_expect = XTERMBLOCK;
8494 attrful = XATTRTERM;
76f68e9b 8495 sv_setpvs(PL_subname,"?");
09bef843 8496 have_name = FALSE;
463ee0b2 8497 }
4633a7c4 8498
09bef843 8499 if (key == KEY_format) {
5db06880 8500#ifdef PERL_MAD
cd81e915 8501 PL_thistoken = subtoken;
5db06880
NC
8502 s = d;
8503#else
09bef843 8504 if (have_name)
b1b65b59
JH
8505 (void) force_word(PL_oldbufptr + tboffset, WORD,
8506 FALSE, TRUE, TRUE);
5db06880 8507#endif
64a40898 8508 PREBLOCK(FORMAT);
09bef843 8509 }
79072805 8510
09bef843
SB
8511 /* Look for a prototype */
8512 if (*s == '(') {
d9f2850e
RGS
8513 char *p;
8514 bool bad_proto = FALSE;
9e8d7757
RB
8515 bool in_brackets = FALSE;
8516 char greedy_proto = ' ';
8517 bool proto_after_greedy_proto = FALSE;
8518 bool must_be_last = FALSE;
8519 bool underscore = FALSE;
aef2a98a 8520 bool seen_underscore = FALSE;
197afce1 8521 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8522 STRLEN tmplen;
09bef843 8523
d24ca0c5 8524 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
37fd879b 8525 if (!s)
09bef843 8526 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8527 /* strip spaces and check for bad characters */
dab1c735 8528 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8529 tmp = 0;
dab1c735 8530 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8531 if (!isSPACE(*p)) {
dab1c735 8532 d[tmp++] = *p;
9e8d7757 8533
197afce1 8534 if (warnillegalproto) {
9e8d7757
RB
8535 if (must_be_last)
8536 proto_after_greedy_proto = TRUE;
dab1c735 8537 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8538 bad_proto = TRUE;
8539 }
8540 else {
8541 if ( underscore ) {
34daab0f 8542 if ( !strchr(";@%", *p) )
9e8d7757
RB
8543 bad_proto = TRUE;
8544 underscore = FALSE;
8545 }
8546 if ( *p == '[' ) {
8547 in_brackets = TRUE;
8548 }
8549 else if ( *p == ']' ) {
8550 in_brackets = FALSE;
8551 }
8552 else if ( (*p == '@' || *p == '%') &&
8553 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8554 !in_brackets ) {
8555 must_be_last = TRUE;
8556 greedy_proto = *p;
8557 }
8558 else if ( *p == '_' ) {
aef2a98a 8559 underscore = seen_underscore = TRUE;
9e8d7757
RB
8560 }
8561 }
8562 }
d37a9538 8563 }
09bef843 8564 }
dab1c735 8565 d[tmp] = '\0';
9e8d7757 8566 if (proto_after_greedy_proto)
197afce1 8567 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8568 "Prototype after '%c' for %"SVf" : %s",
8569 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8570 if (bad_proto) {
8571 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8572 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8573 "Illegal character %sin prototype for %"SVf" : %s",
8574 seen_underscore ? "after '_' " : "",
dab1c735 8575 SVfARG(PL_subname),
97eb901d
BF
8576 SvUTF8(PL_lex_stuff)
8577 ? sv_uni_display(dsv,
8578 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8579 tmp,
8580 UNI_DISPLAY_ISPRINT)
8581 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8582 PERL_PV_ESCAPE_NONASCII));
dab1c735
BF
8583 }
8584 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8585 have_proto = TRUE;
68dc0745 8586
5db06880
NC
8587#ifdef PERL_MAD
8588 start_force(0);
cd81e915 8589 CURMAD('q', PL_thisopen);
5db06880 8590 CURMAD('_', tmpwhite);
cd81e915
NC
8591 CURMAD('=', PL_thisstuff);
8592 CURMAD('Q', PL_thisclose);
5db06880
NC
8593 NEXTVAL_NEXTTOKE.opval =
8594 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8595 PL_lex_stuff = NULL;
5db06880
NC
8596 force_next(THING);
8597
8598 s = SKIPSPACE2(s,tmpwhite);
8599#else
09bef843 8600 s = skipspace(s);
5db06880 8601#endif
4633a7c4 8602 }
09bef843
SB
8603 else
8604 have_proto = FALSE;
8605
8606 if (*s == ':' && s[1] != ':')
8607 PL_expect = attrful;
8e742a20
MHM
8608 else if (*s != '{' && key == KEY_sub) {
8609 if (!have_name)
8610 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8611 else if (*s != ';' && *s != '}')
be2597df 8612 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8613 }
09bef843 8614
5db06880
NC
8615#ifdef PERL_MAD
8616 start_force(0);
8617 if (tmpwhite) {
8618 if (PL_madskills)
6b29d1f5 8619 curmad('^', newSVpvs(""));
5db06880
NC
8620 CURMAD('_', tmpwhite);
8621 }
8622 force_next(0);
8623
cd81e915 8624 PL_thistoken = subtoken;
5db06880 8625#else
09bef843 8626 if (have_proto) {
9ded7720 8627 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8628 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8629 PL_lex_stuff = NULL;
09bef843 8630 force_next(THING);
68dc0745 8631 }
5db06880 8632#endif
09bef843 8633 if (!have_name) {
49a54bbe
NC
8634 if (PL_curstash)
8635 sv_setpvs(PL_subname, "__ANON__");
8636 else
8637 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8638 TOKEN(ANONSUB);
4633a7c4 8639 }
5db06880 8640#ifndef PERL_MAD
4210d3f1 8641 force_ident_maybe_lex('&');
5db06880 8642#endif
09bef843 8643 TOKEN(SUB);
4633a7c4 8644 }
79072805
LW
8645
8646 case KEY_system:
a0d0e21e 8647 LOP(OP_SYSTEM,XREF);
79072805
LW
8648
8649 case KEY_symlink:
a0d0e21e 8650 LOP(OP_SYMLINK,XTERM);
79072805
LW
8651
8652 case KEY_syscall:
a0d0e21e 8653 LOP(OP_SYSCALL,XTERM);
79072805 8654
c07a80fd 8655 case KEY_sysopen:
8656 LOP(OP_SYSOPEN,XTERM);
8657
137443ea 8658 case KEY_sysseek:
8659 LOP(OP_SYSSEEK,XTERM);
8660
79072805 8661 case KEY_sysread:
a0d0e21e 8662 LOP(OP_SYSREAD,XTERM);
79072805
LW
8663
8664 case KEY_syswrite:
a0d0e21e 8665 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8666
8667 case KEY_tr:
8ce4b50f 8668 case KEY_y:
79072805
LW
8669 s = scan_trans(s);
8670 TERM(sublex_start());
8671
8672 case KEY_tell:
8673 UNI(OP_TELL);
8674
8675 case KEY_telldir:
8676 UNI(OP_TELLDIR);
8677
463ee0b2 8678 case KEY_tie:
a0d0e21e 8679 LOP(OP_TIE,XTERM);
463ee0b2 8680
c07a80fd 8681 case KEY_tied:
8682 UNI(OP_TIED);
8683
79072805
LW
8684 case KEY_time:
8685 FUN0(OP_TIME);
8686
8687 case KEY_times:
8688 FUN0(OP_TMS);
8689
8690 case KEY_truncate:
a0d0e21e 8691 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8692
8693 case KEY_uc:
8694 UNI(OP_UC);
8695
8696 case KEY_ucfirst:
8697 UNI(OP_UCFIRST);
8698
463ee0b2
LW
8699 case KEY_untie:
8700 UNI(OP_UNTIE);
8701
79072805 8702 case KEY_until:
78cdf107
Z
8703 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8704 return REPORT(0);
6154021b 8705 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8706 OPERATOR(UNTIL);
8707
8708 case KEY_unless:
78cdf107
Z
8709 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8710 return REPORT(0);
6154021b 8711 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8712 OPERATOR(UNLESS);
8713
8714 case KEY_unlink:
a0d0e21e 8715 LOP(OP_UNLINK,XTERM);
79072805
LW
8716
8717 case KEY_undef:
6f33ba73 8718 UNIDOR(OP_UNDEF);
79072805
LW
8719
8720 case KEY_unpack:
a0d0e21e 8721 LOP(OP_UNPACK,XTERM);
79072805
LW
8722
8723 case KEY_utime:
a0d0e21e 8724 LOP(OP_UTIME,XTERM);
79072805
LW
8725
8726 case KEY_umask:
6f33ba73 8727 UNIDOR(OP_UMASK);
79072805
LW
8728
8729 case KEY_unshift:
a0d0e21e
LW
8730 LOP(OP_UNSHIFT,XTERM);
8731
8732 case KEY_use:
468aa647 8733 s = tokenize_use(1, s);
a0d0e21e 8734 OPERATOR(USE);
79072805
LW
8735
8736 case KEY_values:
8737 UNI(OP_VALUES);
8738
8739 case KEY_vec:
a0d0e21e 8740 LOP(OP_VEC,XTERM);
79072805 8741
0d863452 8742 case KEY_when:
78cdf107
Z
8743 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8744 return REPORT(0);
6154021b 8745 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8746 OPERATOR(WHEN);
8747
79072805 8748 case KEY_while:
78cdf107
Z
8749 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8750 return REPORT(0);
6154021b 8751 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8752 OPERATOR(WHILE);
8753
8754 case KEY_warn:
3280af22 8755 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8756 LOP(OP_WARN,XTERM);
79072805
LW
8757
8758 case KEY_wait:
8759 FUN0(OP_WAIT);
8760
8761 case KEY_waitpid:
a0d0e21e 8762 LOP(OP_WAITPID,XTERM);
79072805
LW
8763
8764 case KEY_wantarray:
8765 FUN0(OP_WANTARRAY);
8766
8767 case KEY_write:
9d116dd7
JH
8768#ifdef EBCDIC
8769 {
df3728a2
JH
8770 char ctl_l[2];
8771 ctl_l[0] = toCTRL('L');
8772 ctl_l[1] = '\0';
fafc274c 8773 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8774 }
8775#else
fafc274c
NC
8776 /* Make sure $^L is defined */
8777 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8778#endif
79072805
LW
8779 UNI(OP_ENTERWRITE);
8780
8781 case KEY_x:
78cdf107
Z
8782 if (PL_expect == XOPERATOR) {
8783 if (*s == '=' && !PL_lex_allbrackets &&
8784 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8785 return REPORT(0);
79072805 8786 Mop(OP_REPEAT);
78cdf107 8787 }
79072805
LW
8788 check_uni();
8789 goto just_a_word;
8790
a0d0e21e 8791 case KEY_xor:
78cdf107
Z
8792 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8793 return REPORT(0);
6154021b 8794 pl_yylval.ival = OP_XOR;
a0d0e21e 8795 OPERATOR(OROP);
79072805 8796 }
49dc05e3 8797 }}
79072805 8798}
bf4acbe4
GS
8799#ifdef __SC__
8800#pragma segment Main
8801#endif
79072805 8802
3875fc11
FC
8803/*
8804 S_pending_ident
8805
8806 Looks up an identifier in the pad or in a package
8807
8808 Returns:
8809 PRIVATEREF if this is a lexical name.
8810 WORD if this belongs to a package.
8811
8812 Structure:
8813 if we're in a my declaration
8814 croak if they tried to say my($foo::bar)
8815 build the ops for a my() declaration
8816 if it's an access to a my() variable
8817 build ops for access to a my() variable
8818 if in a dq string, and they've said @foo and we can't find @foo
8819 warn
8820 build ops for a bareword
8821*/
8822
3f33d153
FC
8823static int
8824S_pending_ident(pTHX)
8eceec63 8825{
97aff369 8826 dVAR;
bbd11bfc 8827 PADOFFSET tmp = 0;
3f33d153 8828 const char pit = (char)pl_yylval.ival;
9bde8eb0
NC
8829 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8830 /* All routes through this function want to know if there is a colon. */
c099d646 8831 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63 8832
3f33d153
FC
8833 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8834 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8835
8836 /* if we're in a my(), we can't allow dynamics here.
8837 $foo'bar has already been turned into $foo::bar, so
8838 just check for colons.
8839
8840 if it's a legal name, the OP is a PADANY.
8841 */
8842 if (PL_in_my) {
8843 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8844 if (has_colon)
4bca4ee0 8845 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8eceec63 8846 "variable %s in \"our\"",
4bca4ee0 8847 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
bc9b26ca 8848 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8849 }
8850 else {
9bde8eb0 8851 if (has_colon)
58576270
BF
8852 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8853 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8854 UTF ? SVf_UTF8 : 0);
8eceec63 8855
3f33d153
FC
8856 pl_yylval.opval = newOP(OP_PADANY, 0);
8857 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
bc9b26ca 8858 UTF ? SVf_UTF8 : 0);
3f33d153 8859 return PRIVATEREF;
8eceec63
SC
8860 }
8861 }
8862
8863 /*
8864 build the ops for accesses to a my() variable.
8eceec63
SC
8865 */
8866
9bde8eb0 8867 if (!has_colon) {
8716503d 8868 if (!PL_in_my)
bc9b26ca
BF
8869 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8870 UTF ? SVf_UTF8 : 0);
8716503d 8871 if (tmp != NOT_IN_PAD) {
8eceec63 8872 /* might be an "our" variable" */
00b1698f 8873 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8874 /* build ops for a bareword */
b64e5050
AL
8875 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8876 HEK * const stashname = HvNAME_HEK(stash);
8877 SV * const sym = newSVhek(stashname);
396482e1 8878 sv_catpvs(sym, "::");
2a33114a 8879 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
3f33d153
FC
8880 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8881 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
8882 if (pit != '&')
8883 gv_fetchsv(sym,
8eceec63
SC
8884 (PL_in_eval
8885 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8886 : GV_ADDMULTI
8eceec63
SC
8887 ),
8888 ((PL_tokenbuf[0] == '$') ? SVt_PV
8889 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8890 : SVt_PVHV));
3f33d153 8891 return WORD;
8eceec63
SC
8892 }
8893
3f33d153
FC
8894 pl_yylval.opval = newOP(OP_PADANY, 0);
8895 pl_yylval.opval->op_targ = tmp;
8896 return PRIVATEREF;
8eceec63
SC
8897 }
8898 }
8899
8900 /*
8901 Whine if they've said @foo in a doublequoted string,
8902 and @foo isn't a variable we can find in the symbol
8903 table.
8904 */
d824713b
NC
8905 if (ckWARN(WARN_AMBIGUOUS) &&
8906 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8907 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8908 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8909 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8910 /* DO NOT warn for @- and @+ */
8911 && !( PL_tokenbuf[2] == '\0' &&
8912 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8913 )
8eceec63
SC
8914 {
8915 /* Downgraded from fatal to warning 20000522 mjd */
d824713b 8916 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
29fb1d0e
BF
8917 "Possible unintended interpolation of %"SVf" in string",
8918 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8919 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8eceec63
SC
8920 }
8921 }
8922
8923 /* build ops for a bareword */
3f33d153
FC
8924 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8925 newSVpvn_flags(PL_tokenbuf + 1,
0be4d16f
BF
8926 tokenbuf_len - 1,
8927 UTF ? SVf_UTF8 : 0 ));
3f33d153 8928 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
8929 if (pit != '&')
8930 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8931 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8932 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8933 ((PL_tokenbuf[0] == '$') ? SVt_PV
8934 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8935 : SVt_PVHV));
3f33d153 8936 return WORD;
8eceec63
SC
8937}
8938
76e3520e 8939STATIC void
c94115d8 8940S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8941{
97aff369 8942 dVAR;
2f3197b3 8943
7918f24d
NC
8944 PERL_ARGS_ASSERT_CHECKCOMMA;
8945
d008e5eb 8946 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8947 if (ckWARN(WARN_SYNTAX)) {
8948 int level = 1;
26ff0806 8949 const char *w;
d008e5eb
GS
8950 for (w = s+2; *w && level; w++) {
8951 if (*w == '(')
8952 ++level;
8953 else if (*w == ')')
8954 --level;
8955 }
888fea98
NC
8956 while (isSPACE(*w))
8957 ++w;
b1439985
RGS
8958 /* the list of chars below is for end of statements or
8959 * block / parens, boolean operators (&&, ||, //) and branch
8960 * constructs (or, and, if, until, unless, while, err, for).
8961 * Not a very solid hack... */
8962 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8963 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8964 "%s (...) interpreted as function",name);
d008e5eb 8965 }
2f3197b3 8966 }
3280af22 8967 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8968 s++;
a687059c
LW
8969 if (*s == '(')
8970 s++;
3280af22 8971 while (s < PL_bufend && isSPACE(*s))
a687059c 8972 s++;
7e2040f0 8973 if (isIDFIRST_lazy_if(s,UTF)) {
d0fb66e4
BF
8974 const char * const w = s;
8975 s += UTF ? UTF8SKIP(s) : 1;
7e2040f0 8976 while (isALNUM_lazy_if(s,UTF))
d0fb66e4 8977 s += UTF ? UTF8SKIP(s) : 1;
3280af22 8978 while (s < PL_bufend && isSPACE(*s))
a687059c 8979 s++;
e929a76b 8980 if (*s == ',') {
c94115d8 8981 GV* gv;
5458a98a 8982 if (keyword(w, s - w, 0))
e929a76b 8983 return;
c94115d8 8984
2e38bce1 8985 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 8986 if (gv && GvCVu(gv))
abbb3198 8987 return;
cea2e8a9 8988 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8989 }
8990 }
8991}
8992
423cee85
JH
8993/* Either returns sv, or mortalizes sv and returns a new SV*.
8994 Best used as sv=new_constant(..., sv, ...).
8995 If s, pv are NULL, calls subroutine with one argument,
107160e2
KW
8996 and <type> is used with error messages only.
8997 <type> is assumed to be well formed UTF-8 */
423cee85 8998
b3ac6de7 8999STATIC SV *
eb0d8d16
NC
9000S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
9001 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 9002{
27da23d5 9003 dVAR; dSP;
fbb93542 9004 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 9005 SV *res;
b3ac6de7
IZ
9006 SV **cvp;
9007 SV *cv, *typesv;
89e33a05 9008 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 9009
7918f24d
NC
9010 PERL_ARGS_ASSERT_NEW_CONSTANT;
9011
f8988b41
KW
9012 /* charnames doesn't work well if there have been errors found */
9013 if (PL_error_count > 0 && strEQ(key,"charnames"))
9014 return &PL_sv_undef;
9015
fbb93542
KW
9016 if (!table
9017 || ! (PL_hints & HINT_LOCALIZE_HH)
9018 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
9019 || ! SvOK(*cvp))
9020 {
423cee85
JH
9021 SV *msg;
9022
fbb93542
KW
9023 /* Here haven't found what we're looking for. If it is charnames,
9024 * perhaps it needs to be loaded. Try doing that before giving up */
9025 if (strEQ(key,"charnames")) {
9026 Perl_load_module(aTHX_
9027 0,
9028 newSVpvs("_charnames"),
9029 /* version parameter; no need to specify it, as if
9030 * we get too early a version, will fail anyway,
9031 * not being able to find '_charnames' */
9032 NULL,
9033 newSVpvs(":full"),
9034 newSVpvs(":short"),
9035 NULL);
9036 SPAGAIN;
9037 table = GvHV(PL_hintgv);
9038 if (table
9039 && (PL_hints & HINT_LOCALIZE_HH)
9040 && (cvp = hv_fetch(table, key, keylen, FALSE))
9041 && SvOK(*cvp))
9042 {
9043 goto now_ok;
9044 }
9045 }
9046 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
9047 msg = Perl_newSVpvf(aTHX_
9048 "Constant(%s) unknown", (type ? type: "undef"));
9049 }
9050 else {
3918902d
KW
9051 why1 = "$^H{";
9052 why2 = key;
9053 why3 = "} is not defined";
9054 report:
90249f0a 9055 if (strEQ(key,"charnames")) {
bde9e88d
KW
9056 yyerror_pv(Perl_form(aTHX_
9057 /* The +3 is for '\N{'; -4 for that, plus '}' */
9058 "Unknown charname '%.*s'", (int)typelen - 4, type + 3
9059 ),
9060 UTF ? SVf_UTF8 : 0);
9061 return sv;
90249f0a
KW
9062 }
9063 else {
9064 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
3918902d 9065 (type ? type: "undef"), why1, why2, why3);
90249f0a 9066 }
3918902d 9067 }
95a20fc0 9068 yyerror(SvPVX_const(msg));
423cee85
JH
9069 SvREFCNT_dec(msg);
9070 return sv;
9071 }
fbb93542 9072now_ok:
b3ac6de7
IZ
9073 sv_2mortal(sv); /* Parent created it permanently */
9074 cv = *cvp;
423cee85 9075 if (!pv && s)
59cd0e26 9076 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 9077 if (type && pv)
59cd0e26 9078 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 9079 else
423cee85 9080 typesv = &PL_sv_undef;
4e553d73 9081
e788e7d3 9082 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
9083 ENTER ;
9084 SAVETMPS;
4e553d73 9085
423cee85 9086 PUSHMARK(SP) ;
a5845cb7 9087 EXTEND(sp, 3);
423cee85
JH
9088 if (pv)
9089 PUSHs(pv);
b3ac6de7 9090 PUSHs(sv);
423cee85
JH
9091 if (pv)
9092 PUSHs(typesv);
b3ac6de7 9093 PUTBACK;
423cee85 9094 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 9095
423cee85 9096 SPAGAIN ;
4e553d73 9097
423cee85 9098 /* Check the eval first */
9b0e499b 9099 if (!PL_in_eval && SvTRUE(ERRSV)) {
c06ecf4f
DD
9100 STRLEN errlen;
9101 const char * errstr;
9102 sv_catpvs(ERRSV, "Propagated");
9103 errstr = SvPV_const(ERRSV, errlen);
9104 yyerror_pvn(errstr, errlen, 0); /* Duplicates the message inside eval */
e1f15930 9105 (void)POPs;
b37c2d43 9106 res = SvREFCNT_inc_simple(sv);
423cee85
JH
9107 }
9108 else {
9109 res = POPs;
b37c2d43 9110 SvREFCNT_inc_simple_void(res);
423cee85 9111 }
4e553d73 9112
423cee85
JH
9113 PUTBACK ;
9114 FREETMPS ;
9115 LEAVE ;
b3ac6de7 9116 POPSTACK;
4e553d73 9117
b3ac6de7 9118 if (!SvOK(res)) {
423cee85
JH
9119 why1 = "Call to &{$^H{";
9120 why2 = key;
f0af216f 9121 why3 = "}} did not return a defined value";
423cee85
JH
9122 sv = res;
9123 goto report;
9b0e499b 9124 }
423cee85 9125
9b0e499b 9126 return res;
b3ac6de7 9127}
4e553d73 9128
d0a148a6
NC
9129/* Returns a NUL terminated string, with the length of the string written to
9130 *slp
9131 */
76e3520e 9132STATIC char *
cea2e8a9 9133S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 9134{
97aff369 9135 dVAR;
eb578fdb
KW
9136 char *d = dest;
9137 char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
9138
9139 PERL_ARGS_ASSERT_SCAN_WORD;
9140
463ee0b2 9141 for (;;) {
8903cb82 9142 if (d >= e)
cea2e8a9 9143 Perl_croak(aTHX_ ident_too_long);
5db1eb8d 9144 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
463ee0b2 9145 *d++ = *s++;
c35e046a 9146 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
9147 *d++ = ':';
9148 *d++ = ':';
9149 s++;
9150 }
c35e046a 9151 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
9152 *d++ = *s++;
9153 *d++ = *s++;
9154 }
fd400ab9 9155 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 9156 char *t = s + UTF8SKIP(s);
c35e046a 9157 size_t len;
fd400ab9 9158 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 9159 t += UTF8SKIP(t);
c35e046a
AL
9160 len = t - s;
9161 if (d + len > e)
cea2e8a9 9162 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
9163 Copy(s, d, len, char);
9164 d += len;
a0ed51b3
LW
9165 s = t;
9166 }
463ee0b2
LW
9167 else {
9168 *d = '\0';
9169 *slp = d - dest;
9170 return s;
e929a76b 9171 }
378cc40b
LW
9172 }
9173}
9174
76e3520e 9175STATIC char *
f54cb97a 9176S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 9177{
97aff369 9178 dVAR;
6136c704 9179 char *bracket = NULL;
748a9306 9180 char funny = *s++;
eb578fdb
KW
9181 char *d = dest;
9182 char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 9183
7918f24d
NC
9184 PERL_ARGS_ASSERT_SCAN_IDENT;
9185
a0d0e21e 9186 if (isSPACE(*s))
29595ff2 9187 s = PEEKSPACE(s);
de3bb511 9188 if (isDIGIT(*s)) {
8903cb82 9189 while (isDIGIT(*s)) {
9190 if (d >= e)
cea2e8a9 9191 Perl_croak(aTHX_ ident_too_long);
378cc40b 9192 *d++ = *s++;
8903cb82 9193 }
378cc40b
LW
9194 }
9195 else {
463ee0b2 9196 for (;;) {
8903cb82 9197 if (d >= e)
cea2e8a9 9198 Perl_croak(aTHX_ ident_too_long);
834a4ddd 9199 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 9200 *d++ = *s++;
7e2040f0 9201 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
9202 *d++ = ':';
9203 *d++ = ':';
9204 s++;
9205 }
a0d0e21e 9206 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
9207 *d++ = *s++;
9208 *d++ = *s++;
9209 }
fd400ab9 9210 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 9211 char *t = s + UTF8SKIP(s);
fd400ab9 9212 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
9213 t += UTF8SKIP(t);
9214 if (d + (t - s) > e)
cea2e8a9 9215 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
9216 Copy(s, d, t - s, char);
9217 d += t - s;
9218 s = t;
9219 }
463ee0b2
LW
9220 else
9221 break;
9222 }
378cc40b
LW
9223 }
9224 *d = '\0';
9225 d = dest;
79072805 9226 if (*d) {
3280af22
NIS
9227 if (PL_lex_state != LEX_NORMAL)
9228 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 9229 return s;
378cc40b 9230 }
748a9306 9231 if (*s == '$' && s[1] &&
3792a11b 9232 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 9233 {
4810e5ec 9234 return s;
5cd24f17 9235 }
79072805
LW
9236 if (*s == '{') {
9237 bracket = s;
9238 s++;
9239 }
204e6232
BF
9240 if (s < send) {
9241 if (UTF) {
9242 const STRLEN skip = UTF8SKIP(s);
9243 STRLEN i;
9244 d[skip] = '\0';
9245 for ( i = 0; i < skip; i++ )
9246 d[i] = *s++;
9247 }
9248 else {
9249 *d = *s++;
9250 d[1] = '\0';
9251 }
9252 }
2b92dfce 9253 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 9254 *d = toCTRL(*s);
9255 s++;
de3bb511 9256 }
fbdd83da
DIM
9257 else if (ck_uni && !bracket)
9258 check_uni();
79072805 9259 if (bracket) {
748a9306 9260 if (isSPACE(s[-1])) {
fa83b5b6 9261 while (s < send) {
f54cb97a 9262 const char ch = *s++;
bf4acbe4 9263 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 9264 *d = ch;
9265 break;
9266 }
9267 }
748a9306 9268 }
7e2040f0 9269 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 9270 d += UTF8SKIP(d);
a0ed51b3 9271 if (UTF) {
6136c704
AL
9272 char *end = s;
9273 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9274 end += UTF8SKIP(end);
9275 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9276 end += UTF8SKIP(end);
a0ed51b3 9277 }
6136c704
AL
9278 Copy(s, d, end - s, char);
9279 d += end - s;
9280 s = end;
a0ed51b3
LW
9281 }
9282 else {
2b92dfce 9283 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 9284 *d++ = *s++;
2b92dfce 9285 if (d >= e)
cea2e8a9 9286 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 9287 }
79072805 9288 *d = '\0';
c35e046a
AL
9289 while (s < send && SPACE_OR_TAB(*s))
9290 s++;
ff68c719 9291 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 9292 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
9293 const char * const brack =
9294 (const char *)
9295 ((*s == '[') ? "[...]" : "{...}");
e850844c 9296 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 9297 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 9298 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
9299 funny, dest, brack, funny, dest, brack);
9300 }
79072805 9301 bracket++;
a0be28da 9302 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 9303 PL_lex_allbrackets++;
79072805
LW
9304 return s;
9305 }
4e553d73
NIS
9306 }
9307 /* Handle extended ${^Foo} variables
2b92dfce
GS
9308 * 1999-02-27 mjd-perl-patch@plover.com */
9309 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9310 && isALNUM(*s))
9311 {
9312 d++;
9313 while (isALNUM(*s) && d < e) {
9314 *d++ = *s++;
9315 }
9316 if (d >= e)
cea2e8a9 9317 Perl_croak(aTHX_ ident_too_long);
2b92dfce 9318 *d = '\0';
79072805
LW
9319 }
9320 if (*s == '}') {
9321 s++;
7df0d042 9322 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 9323 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
9324 PL_expect = XREF;
9325 }
d008e5eb 9326 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 9327 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241 9328 (keyword(dest, d - dest, 0)
5c66c3dd 9329 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
d008e5eb 9330 {
5c66c3dd
BF
9331 SV *tmp = newSVpvn_flags( dest, d - dest,
9332 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
c35e046a
AL
9333 if (funny == '#')
9334 funny = '@';
9014280d 9335 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5c66c3dd
BF
9336 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9337 funny, tmp, funny, tmp);
d008e5eb
GS
9338 }
9339 }
79072805
LW
9340 }
9341 else {
9342 s = bracket; /* let the parser handle it */
93a17b20 9343 *dest = '\0';
79072805
LW
9344 }
9345 }
3280af22
NIS
9346 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9347 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
9348 return s;
9349}
9350
858a358b 9351static bool
3955e1a9 9352S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
9353
9354 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9355 * the parse starting at 's', based on the subset that are valid in this
9356 * context input to this routine in 'valid_flags'. Advances s. Returns
96f3bfda
KW
9357 * TRUE if the input should be treated as a valid flag, so the next char
9358 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9359 * first call on the current regex. This routine will set it to any
9360 * charset modifier found. The caller shouldn't change it. This way,
9361 * another charset modifier encountered in the parse can be detected as an
9362 * error, as we have decided to allow only one */
858a358b
KW
9363
9364 const char c = **s;
84159251 9365 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
94b03d7d 9366
84159251
BF
9367 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9368 if (isALNUM_lazy_if(*s, UTF)) {
4f8dbb2d 9369 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
84159251
BF
9370 UTF ? SVf_UTF8 : 0);
9371 (*s) += charlen;
96f3bfda
KW
9372 /* Pretend that it worked, so will continue processing before
9373 * dieing */
0da72d5e 9374 return TRUE;
858a358b
KW
9375 }
9376 return FALSE;
9377 }
9378
9379 switch (c) {
94b03d7d 9380
858a358b
KW
9381 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9382 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9383 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9384 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9385 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9386 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d 9387 case LOCALE_PAT_MOD:
3955e1a9
KW
9388 if (*charset) {
9389 goto multiple_charsets;
9390 }
94b03d7d 9391 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9392 *charset = c;
94b03d7d
KW
9393 break;
9394 case UNICODE_PAT_MOD:
3955e1a9
KW
9395 if (*charset) {
9396 goto multiple_charsets;
9397 }
94b03d7d 9398 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9399 *charset = c;
94b03d7d
KW
9400 break;
9401 case ASCII_RESTRICT_PAT_MOD:
ff3f26d2 9402 if (! *charset) {
94b03d7d
KW
9403 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9404 }
ff3f26d2
KW
9405 else {
9406
9407 /* Error if previous modifier wasn't an 'a', but if it was, see
9408 * if, and accept, a second occurrence (only) */
9409 if (*charset != 'a'
9410 || get_regex_charset(*pmfl)
9411 != REGEX_ASCII_RESTRICTED_CHARSET)
9412 {
9413 goto multiple_charsets;
9414 }
9415 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9416 }
9417 *charset = c;
94b03d7d
KW
9418 break;
9419 case DEPENDS_PAT_MOD:
3955e1a9
KW
9420 if (*charset) {
9421 goto multiple_charsets;
9422 }
94b03d7d 9423 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9424 *charset = c;
94b03d7d 9425 break;
879d0c72 9426 }
94b03d7d 9427
858a358b
KW
9428 (*s)++;
9429 return TRUE;
94b03d7d 9430
3955e1a9
KW
9431 multiple_charsets:
9432 if (*charset != c) {
9433 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9434 }
ff3f26d2
KW
9435 else if (c == 'a') {
9436 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9437 }
3955e1a9
KW
9438 else {
9439 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9440 }
9441
9442 /* Pretend that it worked, so will continue processing before dieing */
9443 (*s)++;
9444 return TRUE;
879d0c72
NC
9445}
9446
76e3520e 9447STATIC char *
cea2e8a9 9448S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9449{
97aff369 9450 dVAR;
79072805 9451 PMOP *pm;
d24ca0c5 9452 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
10edeb5d 9453 const char * const valid_flags =
a20207d7 9454 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9455 char charset = '\0'; /* character set modifier */
5db06880
NC
9456#ifdef PERL_MAD
9457 char *modstart;
9458#endif
9459
7918f24d 9460 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9461
d24ca0c5
DM
9462 /* this was only needed for the initial scan_str; set it to false
9463 * so that any (?{}) code blocks etc are parsed normally */
9464 PL_reg_state.re_reparsing = FALSE;
25c09cbf 9465 if (!s) {
6136c704 9466 const char * const delimiter = skipspace(start);
10edeb5d
JH
9467 Perl_croak(aTHX_
9468 (const char *)
9469 (*delimiter == '?'
9470 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9471 : "Search pattern not terminated" ));
25c09cbf 9472 }
bbce6d69 9473
8782bef2 9474 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9475 if (PL_multi_open == '?') {
9476 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9477 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9478
9479 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9480 allows us to restrict the list needed by reset to just the ??
9481 matches. */
9482 assert(type != OP_TRANS);
9483 if (PL_curstash) {
daba3364 9484 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9485 U32 elements;
9486 if (!mg) {
daba3364 9487 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9488 0);
9489 }
9490 elements = mg->mg_len / sizeof(PMOP**);
9491 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9492 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9493 mg->mg_len = elements * sizeof(PMOP**);
9494 PmopSTASH_set(pm,PL_curstash);
9495 }
9496 }
5db06880
NC
9497#ifdef PERL_MAD
9498 modstart = s;
9499#endif
d63c20f2
DM
9500
9501 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9502 * anon CV. False positives like qr/[(?{]/ are harmless */
9503
9504 if (type == OP_QR) {
6f635923
DM
9505 STRLEN len;
9506 char *e, *p = SvPV(PL_lex_stuff, len);
9507 e = p + len;
9508 for (; p < e; p++) {
d63c20f2
DM
9509 if (p[0] == '(' && p[1] == '?'
9510 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9511 {
9512 pm->op_pmflags |= PMf_HAS_CV;
9513 break;
9514 }
9515 }
6f635923 9516 pm->op_pmflags |= PMf_IS_QR;
d63c20f2
DM
9517 }
9518
3955e1a9 9519 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9520#ifdef PERL_MAD
9521 if (PL_madskills && modstart != s) {
9522 SV* tmptoken = newSVpvn(modstart, s - modstart);
9523 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9524 }
9525#endif
4ac733c9 9526 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9527 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9528 {
a2a5de95
NC
9529 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9530 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9531 }
9532
3280af22 9533 PL_lex_op = (OP*)pm;
6154021b 9534 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9535 return s;
9536}
9537
76e3520e 9538STATIC char *
cea2e8a9 9539S_scan_subst(pTHX_ char *start)
79072805 9540{
27da23d5 9541 dVAR;
22594288 9542 char *s;
eb578fdb 9543 PMOP *pm;
4fdae800 9544 I32 first_start;
79072805 9545 I32 es = 0;
3955e1a9 9546 char charset = '\0'; /* character set modifier */
5db06880
NC
9547#ifdef PERL_MAD
9548 char *modstart;
9549#endif
79072805 9550
7918f24d
NC
9551 PERL_ARGS_ASSERT_SCAN_SUBST;
9552
6154021b 9553 pl_yylval.ival = OP_NULL;
79072805 9554
d24ca0c5 9555 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 9556
37fd879b 9557 if (!s)
cea2e8a9 9558 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9559
3280af22 9560 if (s[-1] == PL_multi_open)
79072805 9561 s--;
5db06880
NC
9562#ifdef PERL_MAD
9563 if (PL_madskills) {
cd81e915
NC
9564 CURMAD('q', PL_thisopen);
9565 CURMAD('_', PL_thiswhite);
9566 CURMAD('E', PL_thisstuff);
9567 CURMAD('Q', PL_thisclose);
9568 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9569 }
9570#endif
79072805 9571
3280af22 9572 first_start = PL_multi_start;
d24ca0c5 9573 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9574 if (!s) {
37fd879b 9575 if (PL_lex_stuff) {
3280af22 9576 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9577 PL_lex_stuff = NULL;
37fd879b 9578 }
cea2e8a9 9579 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9580 }
3280af22 9581 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9582
79072805 9583 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9584
9585#ifdef PERL_MAD
9586 if (PL_madskills) {
cd81e915
NC
9587 CURMAD('z', PL_thisopen);
9588 CURMAD('R', PL_thisstuff);
9589 CURMAD('Z', PL_thisclose);
5db06880
NC
9590 }
9591 modstart = s;
9592#endif
9593
48c036b1 9594 while (*s) {
a20207d7 9595 if (*s == EXEC_PAT_MOD) {
a687059c 9596 s++;
2f3197b3 9597 es++;
a687059c 9598 }
3955e1a9
KW
9599 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9600 {
48c036b1 9601 break;
aa78b661 9602 }
378cc40b 9603 }
79072805 9604
5db06880
NC
9605#ifdef PERL_MAD
9606 if (PL_madskills) {
9607 if (modstart != s)
9608 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9609 append_madprops(PL_thismad, (OP*)pm, 0);
9610 PL_thismad = 0;
5db06880
NC
9611 }
9612#endif
a2a5de95
NC
9613 if ((pm->op_pmflags & PMf_CONTINUE)) {
9614 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9615 }
9616
79072805 9617 if (es) {
6136c704
AL
9618 SV * const repl = newSVpvs("");
9619
0244c3a4 9620 PL_multi_end = 0;
79072805 9621 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9622 while (es-- > 0) {
9623 if (es)
9624 sv_catpvs(repl, "eval ");
9625 else
9626 sv_catpvs(repl, "do ");
9627 }
6f43d98f 9628 sv_catpvs(repl, "{");
7cc34111 9629 sv_catsv(repl, PL_sublex_info.repl);
9badc361 9630 sv_catpvs(repl, "}");
25da4f38 9631 SvEVALED_on(repl);
7cc34111
FC
9632 SvREFCNT_dec(PL_sublex_info.repl);
9633 PL_sublex_info.repl = repl;
378cc40b 9634 }
79072805 9635
3280af22 9636 PL_lex_op = (OP*)pm;
6154021b 9637 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9638 return s;
9639}
9640
76e3520e 9641STATIC char *
cea2e8a9 9642S_scan_trans(pTHX_ char *start)
378cc40b 9643{
97aff369 9644 dVAR;
eb578fdb 9645 char* s;
11343788 9646 OP *o;
b84c11c8
NC
9647 U8 squash;
9648 U8 del;
9649 U8 complement;
bb16bae8 9650 bool nondestruct = 0;
5db06880
NC
9651#ifdef PERL_MAD
9652 char *modstart;
9653#endif
79072805 9654
7918f24d
NC
9655 PERL_ARGS_ASSERT_SCAN_TRANS;
9656
6154021b 9657 pl_yylval.ival = OP_NULL;
79072805 9658
d24ca0c5 9659 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
37fd879b 9660 if (!s)
cea2e8a9 9661 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9662
3280af22 9663 if (s[-1] == PL_multi_open)
2f3197b3 9664 s--;
5db06880
NC
9665#ifdef PERL_MAD
9666 if (PL_madskills) {
cd81e915
NC
9667 CURMAD('q', PL_thisopen);
9668 CURMAD('_', PL_thiswhite);
9669 CURMAD('E', PL_thisstuff);
9670 CURMAD('Q', PL_thisclose);
9671 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9672 }
9673#endif
2f3197b3 9674
d24ca0c5 9675 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9676 if (!s) {
37fd879b 9677 if (PL_lex_stuff) {
3280af22 9678 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9679 PL_lex_stuff = NULL;
37fd879b 9680 }
cea2e8a9 9681 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9682 }
5db06880 9683 if (PL_madskills) {
cd81e915
NC
9684 CURMAD('z', PL_thisopen);
9685 CURMAD('R', PL_thisstuff);
9686 CURMAD('Z', PL_thisclose);
5db06880 9687 }
79072805 9688
a0ed51b3 9689 complement = del = squash = 0;
5db06880
NC
9690#ifdef PERL_MAD
9691 modstart = s;
9692#endif
7a1e2023
NC
9693 while (1) {
9694 switch (*s) {
9695 case 'c':
79072805 9696 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9697 break;
9698 case 'd':
a0ed51b3 9699 del = OPpTRANS_DELETE;
7a1e2023
NC
9700 break;
9701 case 's':
79072805 9702 squash = OPpTRANS_SQUASH;
7a1e2023 9703 break;
bb16bae8
FC
9704 case 'r':
9705 nondestruct = 1;
9706 break;
7a1e2023
NC
9707 default:
9708 goto no_more;
9709 }
395c3793
LW
9710 s++;
9711 }
7a1e2023 9712 no_more:
8973db79 9713
9100eeb1 9714 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
59f00321
RGS
9715 o->op_private &= ~OPpTRANS_ALL;
9716 o->op_private |= del|squash|complement|
7948272d 9717 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7cc34111 9718 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
79072805 9719
3280af22 9720 PL_lex_op = o;
bb16bae8 9721 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9722
9723#ifdef PERL_MAD
9724 if (PL_madskills) {
9725 if (modstart != s)
9726 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9727 append_madprops(PL_thismad, o, 0);
9728 PL_thismad = 0;
5db06880
NC
9729 }
9730#endif
9731
79072805
LW
9732 return s;
9733}
9734
5097bf9b
FC
9735/* scan_heredoc
9736 Takes a pointer to the first < in <<FOO.
9737 Returns a pointer to the byte following <<FOO.
9738
9739 This function scans a heredoc, which involves different methods
9740 depending on whether we are in a string eval, quoted construct, etc.
9741 This is because PL_linestr could containing a single line of input, or
9742 a whole string being evalled, or the contents of the current quote-
9743 like operator.
9744
19bbc0d7
FC
9745 The two basic methods are:
9746 - Steal lines from the input stream
9747 - Scan the heredoc in PL_linestr and remove it therefrom
9748
9749 In a file scope or filtered eval, the first method is used; in a
9750 string eval, the second.
9751
9752 In a quote-like operator, we have to choose between the two,
9753 depending on where we can find a newline. We peek into outer lex-
9754 ing scopes until we find one with a newline in it. If we reach the
9755 outermost lexing scope and it is a file, we use the stream method.
9756 Otherwise it is treated as an eval.
5097bf9b
FC
9757*/
9758
76e3520e 9759STATIC char *
cea2e8a9 9760S_scan_heredoc(pTHX_ register char *s)
79072805 9761{
97aff369 9762 dVAR;
79072805
LW
9763 I32 op_type = OP_SCALAR;
9764 I32 len;
9765 SV *tmpstr;
9766 char term;
eb578fdb
KW
9767 char *d;
9768 char *e;
4633a7c4 9769 char *peek;
5097bf9b 9770 const bool infile = PL_rsfp || PL_parser->filtered;
78a635de 9771 LEXSHARED *shared = PL_parser->lex_shared;
5db06880
NC
9772#ifdef PERL_MAD
9773 I32 stuffstart = s - SvPVX(PL_linestr);
9774 char *tstart;
9775
cd81e915 9776 PL_realtokenstart = -1;
5db06880 9777#endif
79072805 9778
7918f24d
NC
9779 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9780
79072805 9781 s += 2;
5097bf9b 9782 d = PL_tokenbuf + 1;
3280af22 9783 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
5097bf9b 9784 *PL_tokenbuf = '\n';
c35e046a
AL
9785 peek = s;
9786 while (SPACE_OR_TAB(*peek))
9787 peek++;
3792a11b 9788 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9789 s = peek;
79072805 9790 term = *s++;
3280af22 9791 s = delimcpy(d, e, s, PL_bufend, term, &len);
6f2d7fc9
FC
9792 if (s == PL_bufend)
9793 Perl_croak(aTHX_ "Unterminated delimiter for here document");
fc36a67e 9794 d += len;
6f2d7fc9 9795 s++;
79072805
LW
9796 }
9797 else {
9798 if (*s == '\\')
458391bd 9799 /* <<\FOO is equivalent to <<'FOO' */
79072805
LW
9800 s++, term = '\'';
9801 else
9802 term = '"';
7e2040f0 9803 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9804 deprecate("bare << to mean <<\"\"");
7e2040f0 9805 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9806 if (d < e)
9807 *d++ = *s;
9808 }
9809 }
3280af22 9810 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9811 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9812 *d++ = '\n';
9813 *d = '\0';
3280af22 9814 len = d - PL_tokenbuf;
5db06880
NC
9815
9816#ifdef PERL_MAD
9817 if (PL_madskills) {
5097bf9b
FC
9818 tstart = PL_tokenbuf + 1;
9819 PL_thisclose = newSVpvn(tstart, len - 1);
5db06880 9820 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9821 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9822 stuffstart = s - SvPVX(PL_linestr);
9823 }
9824#endif
6a27c188 9825#ifndef PERL_STRICT_CR
f63a84b2
LW
9826 d = strchr(s, '\r');
9827 if (d) {
b464bac0 9828 char * const olds = s;
f63a84b2 9829 s = d;
3280af22 9830 while (s < PL_bufend) {
f63a84b2
LW
9831 if (*s == '\r') {
9832 *d++ = '\n';
9833 if (*++s == '\n')
9834 s++;
9835 }
9836 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9837 *d++ = *s++;
9838 s++;
9839 }
9840 else
9841 *d++ = *s++;
9842 }
9843 *d = '\0';
3280af22 9844 PL_bufend = d;
95a20fc0 9845 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9846 s = olds;
9847 }
9848#endif
5db06880
NC
9849#ifdef PERL_MAD
9850 if (PL_madskills) {
9851 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9852 if (PL_thisstuff)
9853 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9854 else
cd81e915 9855 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880 9856 }
748a9306 9857
5db06880 9858 stuffstart = s - SvPVX(PL_linestr);
5db06880
NC
9859#endif
9860
7d0a29fe
NC
9861 tmpstr = newSV_type(SVt_PVIV);
9862 SvGROW(tmpstr, 80);
748a9306 9863 if (term == '\'') {
79072805 9864 op_type = OP_CONST;
45977657 9865 SvIV_set(tmpstr, -1);
748a9306
LW
9866 }
9867 else if (term == '`') {
79072805 9868 op_type = OP_BACKTICK;
45977657 9869 SvIV_set(tmpstr, '\\');
748a9306 9870 }
79072805 9871
78a635de 9872 PL_multi_start = CopLINE(PL_curcop) + 1;
3280af22 9873 PL_multi_open = PL_multi_close = '<';
19bbc0d7 9874 /* inside a string eval or quote-like operator */
4efe39d2 9875 if (!infile || PL_lex_inwhat) {
60f40a38 9876 SV *linestr;
3526bd3e 9877 char *bufend;
074b1c59 9878 char * const olds = s;
d37427bc 9879 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
19bbc0d7
FC
9880 /* These two fields are not set until an inner lexing scope is
9881 entered. But we need them set here. */
4efe39d2
FC
9882 shared->ls_bufptr = s;
9883 shared->ls_linestr = PL_linestr;
9884 if (PL_lex_inwhat)
9885 /* Look for a newline. If the current buffer does not have one,
9886 peek into the line buffer of the parent lexing scope, going
9887 up as many levels as necessary to find one with a newline
9888 after bufptr.
9889 */
9890 while (!(s = (char *)memchr(
9891 (void *)shared->ls_bufptr, '\n',
9892 SvEND(shared->ls_linestr)-shared->ls_bufptr
9893 ))) {
60f40a38 9894 shared = shared->ls_prev;
f68f7dc1
FC
9895 /* shared is only null if we have gone beyond the outermost
9896 lexing scope. In a file, we will have broken out of the
9897 loop in the previous iteration. In an eval, the string buf-
9898 fer ends with "\n;", so the while condition below will have
9899 evaluated to false. So shared can never be null. */
9900 assert(shared);
60f40a38
FC
9901 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9902 most lexing scope. In a file, shared->ls_linestr at that
9903 level is just one line, so there is no body to steal. */
9904 if (infile && !shared->ls_prev) {
074b1c59 9905 s = olds;
99bd9d90
FC
9906 goto streaming;
9907 }
4efe39d2
FC
9908 }
9909 else { /* eval */
9910 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9911 assert(s);
9912 }
60f40a38
FC
9913 linestr = shared->ls_linestr;
9914 bufend = SvEND(linestr);
0244c3a4
GS
9915 d = s;
9916 while (s < bufend &&
5bd13da3 9917 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
0244c3a4 9918 if (*s++ == '\n')
78a635de 9919 ++shared->herelines;
0244c3a4
GS
9920 }
9921 if (s >= bufend) {
932d0cf1 9922 goto interminable;
0244c3a4 9923 }
3328ab5a 9924 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9925#ifdef PERL_MAD
9926 if (PL_madskills) {
cd81e915
NC
9927 if (PL_thisstuff)
9928 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9929 else
cd81e915 9930 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9931 stuffstart = s - SvPVX(PL_linestr);
9932 }
9933#endif
79072805 9934 s += len - 1;
d794b522 9935 /* the preceding stmt passes a newline */
78a635de 9936 shared->herelines++;
49d8d3a1 9937
db444266
FC
9938 /* s now points to the newline after the heredoc terminator.
9939 d points to the newline before the body of the heredoc.
9940 */
19bbc0d7
FC
9941
9942 /* We are going to modify linestr in place here, so set
9943 aside copies of the string if necessary for re-evals or
9944 (caller $n)[6]. */
a91428a4 9945 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
3328ab5a
FC
9946 check shared->re_eval_str. */
9947 if (shared->re_eval_start || shared->re_eval_str) {
db444266 9948 /* Set aside the rest of the regexp */
3328ab5a
FC
9949 if (!shared->re_eval_str)
9950 shared->re_eval_str =
9951 newSVpvn(shared->re_eval_start,
4efe39d2 9952 bufend - shared->re_eval_start);
3328ab5a 9953 shared->re_eval_start -= s-d;
db444266 9954 }
d4fe4ada
RU
9955 if (cxstack_ix >= 0 && CxTYPE(cx) == CXt_EVAL &&
9956 CxOLD_OP_TYPE(cx) == OP_ENTEREVAL &&
9957 cx->blk_eval.cur_text == linestr)
9958 {
4efe39d2 9959 cx->blk_eval.cur_text = newSVsv(linestr);
d37427bc
FC
9960 SvSCREAM_on(cx->blk_eval.cur_text);
9961 }
db444266 9962 /* Copy everything from s onwards back to d. */
4efe39d2
FC
9963 Move(s,d,bufend-s + 1,char);
9964 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
19bbc0d7
FC
9965 /* Setting PL_bufend only applies when we have not dug deeper
9966 into other scopes, because sublex_done sets PL_bufend to
9967 SvEND(PL_linestr). */
4efe39d2 9968 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
db444266 9969 s = olds;
79072805
LW
9970 }
9971 else
a7922135 9972 {
3328ab5a 9973 SV *linestr_save;
a7922135
FC
9974 streaming:
9975 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9976 term = PL_tokenbuf[1];
9977 len--;
3328ab5a 9978 linestr_save = PL_linestr; /* must restore this afterwards */
074b1c59 9979 d = s; /* and this */
3328ab5a 9980 PL_linestr = newSVpvs("");
074b1c59
FC
9981 PL_bufend = SvPVX(PL_linestr);
9982 while (1) {
5db06880
NC
9983#ifdef PERL_MAD
9984 if (PL_madskills) {
9985 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9986 if (PL_thisstuff)
9987 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9988 else
cd81e915 9989 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9990 }
9991#endif
074b1c59 9992 PL_bufptr = PL_bufend;
d794b522 9993 CopLINE_set(PL_curcop,
78a635de 9994 PL_multi_start + shared->herelines);
112d1284
FC
9995 if (!lex_next_chunk(LEX_NO_TERM)
9996 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
3328ab5a 9997 SvREFCNT_dec(linestr_save);
932d0cf1 9998 goto interminable;
79072805 9999 }
78a635de 10000 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
112d1284
FC
10001 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
10002 lex_grow_linestr(SvCUR(PL_linestr) + 2);
10003 sv_catpvs(PL_linestr, "\n\0");
10004 }
f0e67a1d 10005 s = PL_bufptr;
5db06880
NC
10006#ifdef PERL_MAD
10007 stuffstart = s - SvPVX(PL_linestr);
10008#endif
78a635de 10009 shared->herelines++;
bd61b366 10010 PL_last_lop = PL_last_uni = NULL;
6a27c188 10011#ifndef PERL_STRICT_CR
3280af22 10012 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
10013 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
10014 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 10015 {
3280af22
NIS
10016 PL_bufend[-2] = '\n';
10017 PL_bufend--;
95a20fc0 10018 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 10019 }
3280af22
NIS
10020 else if (PL_bufend[-1] == '\r')
10021 PL_bufend[-1] = '\n';
f63a84b2 10022 }
3280af22
NIS
10023 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
10024 PL_bufend[-1] = '\n';
f63a84b2 10025#endif
5097bf9b 10026 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
3328ab5a
FC
10027 SvREFCNT_dec(PL_linestr);
10028 PL_linestr = linestr_save;
10029 PL_linestart = SvPVX(linestr_save);
3280af22 10030 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3328ab5a 10031 s = d;
074b1c59 10032 break;
79072805
LW
10033 }
10034 else {
3280af22 10035 sv_catsv(tmpstr,PL_linestr);
395c3793 10036 }
a7922135 10037 }
395c3793 10038 }
57843af0 10039 PL_multi_end = CopLINE(PL_curcop);
79072805 10040 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 10041 SvPV_shrink_to_cur(tmpstr);
79072805 10042 }
2f31ce75 10043 if (!IN_BYTES) {
95a20fc0 10044 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
10045 SvUTF8_on(tmpstr);
10046 else if (PL_encoding)
10047 sv_recode_to_utf8(tmpstr, PL_encoding);
10048 }
3280af22 10049 PL_lex_stuff = tmpstr;
6154021b 10050 pl_yylval.ival = op_type;
79072805 10051 return s;
932d0cf1
FC
10052
10053 interminable:
932d0cf1
FC
10054 SvREFCNT_dec(tmpstr);
10055 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
10056 missingterm(PL_tokenbuf + 1);
79072805
LW
10057}
10058
02aa26ce
NT
10059/* scan_inputsymbol
10060 takes: current position in input buffer
10061 returns: new position in input buffer
6154021b 10062 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
10063
10064 This code handles:
10065
10066 <> read from ARGV
10067 <FH> read from filehandle
10068 <pkg::FH> read from package qualified filehandle
10069 <pkg'FH> read from package qualified filehandle
10070 <$fh> read from filehandle in $fh
10071 <*.h> filename glob
10072
10073*/
10074
76e3520e 10075STATIC char *
cea2e8a9 10076S_scan_inputsymbol(pTHX_ char *start)
79072805 10077{
97aff369 10078 dVAR;
eb578fdb 10079 char *s = start; /* current position in buffer */
1b420867 10080 char *end;
79072805 10081 I32 len;
6136c704
AL
10082 char *d = PL_tokenbuf; /* start of temp holding space */
10083 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
10084
7918f24d
NC
10085 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
10086
1b420867
GS
10087 end = strchr(s, '\n');
10088 if (!end)
10089 end = PL_bufend;
10090 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
10091
10092 /* die if we didn't have space for the contents of the <>,
1b420867 10093 or if it didn't end, or if we see a newline
02aa26ce
NT
10094 */
10095
bb7a0f54 10096 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 10097 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 10098 if (s >= end)
cea2e8a9 10099 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 10100
fc36a67e 10101 s++;
02aa26ce
NT
10102
10103 /* check for <$fh>
10104 Remember, only scalar variables are interpreted as filehandles by
10105 this code. Anything more complex (e.g., <$fh{$num}>) will be
10106 treated as a glob() call.
10107 This code makes use of the fact that except for the $ at the front,
10108 a scalar variable and a filehandle look the same.
10109 */
4633a7c4 10110 if (*d == '$' && d[1]) d++;
02aa26ce
NT
10111
10112 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 10113 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 10114 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
10115
10116 /* If we've tried to read what we allow filehandles to look like, and
10117 there's still text left, then it must be a glob() and not a getline.
10118 Use scan_str to pull out the stuff between the <> and treat it
10119 as nothing more than a string.
10120 */
10121
3280af22 10122 if (d - PL_tokenbuf != len) {
6154021b 10123 pl_yylval.ival = OP_GLOB;
d24ca0c5 10124 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 10125 if (!s)
cea2e8a9 10126 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
10127 return s;
10128 }
395c3793 10129 else {
9b3023bc 10130 bool readline_overriden = FALSE;
6136c704 10131 GV *gv_readline;
9b3023bc 10132 GV **gvp;
02aa26ce 10133 /* we're in a filehandle read situation */
3280af22 10134 d = PL_tokenbuf;
02aa26ce
NT
10135
10136 /* turn <> into <ARGV> */
79072805 10137 if (!len)
689badd5 10138 Copy("ARGV",d,5,char);
02aa26ce 10139
9b3023bc 10140 /* Check whether readline() is overriden */
fafc274c 10141 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 10142 if ((gv_readline
ba979b31 10143 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 10144 ||
017a3ce5 10145 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 10146 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 10147 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
10148 readline_overriden = TRUE;
10149
02aa26ce
NT
10150 /* if <$fh>, create the ops to turn the variable into a
10151 filehandle
10152 */
79072805 10153 if (*d == '$') {
02aa26ce
NT
10154 /* try to find it in the pad for this block, otherwise find
10155 add symbol table ops
10156 */
bc9b26ca 10157 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 10158 if (tmp != NOT_IN_PAD) {
00b1698f 10159 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
10160 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
10161 HEK * const stashname = HvNAME_HEK(stash);
10162 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 10163 sv_catpvs(sym, "::");
f558d5af
JH
10164 sv_catpv(sym, d+1);
10165 d = SvPVX(sym);
10166 goto intro_sym;
10167 }
10168 else {
6136c704 10169 OP * const o = newOP(OP_PADSV, 0);
f558d5af 10170 o->op_targ = tmp;
9b3023bc
RGS
10171 PL_lex_op = readline_overriden
10172 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10173 op_append_elem(OP_LIST, o,
9b3023bc
RGS
10174 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
10175 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 10176 }
a0d0e21e
LW
10177 }
10178 else {
f558d5af
JH
10179 GV *gv;
10180 ++d;
10181intro_sym:
10182 gv = gv_fetchpv(d,
10183 (PL_in_eval
10184 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 10185 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 10186 SVt_PV);
9b3023bc
RGS
10187 PL_lex_op = readline_overriden
10188 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10189 op_append_elem(OP_LIST,
9b3023bc
RGS
10190 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10191 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10192 : (OP*)newUNOP(OP_READLINE, 0,
10193 newUNOP(OP_RV2SV, 0,
10194 newGVOP(OP_GV, 0, gv)));
a0d0e21e 10195 }
7c6fadd6
RGS
10196 if (!readline_overriden)
10197 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
10198 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10199 pl_yylval.ival = OP_NULL;
79072805 10200 }
02aa26ce
NT
10201
10202 /* If it's none of the above, it must be a literal filehandle
10203 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 10204 else {
25db2ea6 10205 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
10206 PL_lex_op = readline_overriden
10207 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10208 op_append_elem(OP_LIST,
9b3023bc
RGS
10209 newGVOP(OP_GV, 0, gv),
10210 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10211 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 10212 pl_yylval.ival = OP_NULL;
79072805
LW
10213 }
10214 }
02aa26ce 10215
79072805
LW
10216 return s;
10217}
10218
02aa26ce
NT
10219
10220/* scan_str
10221 takes: start position in buffer
09bef843
SB
10222 keep_quoted preserve \ on the embedded delimiter(s)
10223 keep_delims preserve the delimiters around the string
d24ca0c5
DM
10224 re_reparse compiling a run-time /(?{})/:
10225 collapse // to /, and skip encoding src
02aa26ce
NT
10226 returns: position to continue reading from buffer
10227 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10228 updates the read buffer.
10229
10230 This subroutine pulls a string out of the input. It is called for:
10231 q single quotes q(literal text)
10232 ' single quotes 'literal text'
10233 qq double quotes qq(interpolate $here please)
10234 " double quotes "interpolate $here please"
10235 qx backticks qx(/bin/ls -l)
10236 ` backticks `/bin/ls -l`
10237 qw quote words @EXPORT_OK = qw( func() $spam )
10238 m// regexp match m/this/
10239 s/// regexp substitute s/this/that/
10240 tr/// string transliterate tr/this/that/
10241 y/// string transliterate y/this/that/
10242 ($*@) sub prototypes sub foo ($)
09bef843 10243 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
10244 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10245
10246 In most of these cases (all but <>, patterns and transliterate)
10247 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10248 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10249 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10250 calls scan_str().
4e553d73 10251
02aa26ce
NT
10252 It skips whitespace before the string starts, and treats the first
10253 character as the delimiter. If the delimiter is one of ([{< then
10254 the corresponding "close" character )]}> is used as the closing
10255 delimiter. It allows quoting of delimiters, and if the string has
10256 balanced delimiters ([{<>}]) it allows nesting.
10257
37fd879b
HS
10258 On success, the SV with the resulting string is put into lex_stuff or,
10259 if that is already non-NULL, into lex_repl. The second case occurs only
10260 when parsing the RHS of the special constructs s/// and tr/// (y///).
10261 For convenience, the terminating delimiter character is stuffed into
10262 SvIVX of the SV.
02aa26ce
NT
10263*/
10264
76e3520e 10265STATIC char *
d24ca0c5 10266S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
79072805 10267{
97aff369 10268 dVAR;
02aa26ce 10269 SV *sv; /* scalar value: string */
d3fcec1f 10270 const char *tmps; /* temp string, used for delimiter matching */
eb578fdb
KW
10271 char *s = start; /* current position in the buffer */
10272 char term; /* terminating character */
10273 char *to; /* current position in the sv's data */
02aa26ce 10274 I32 brackets = 1; /* bracket nesting level */
89491803 10275 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 10276 I32 termcode; /* terminating char. code */
89ebb4a3 10277 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 10278 STRLEN termlen; /* length of terminating string */
0331ef07 10279 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
10280#ifdef PERL_MAD
10281 int stuffstart;
10282 char *tstart;
10283#endif
02aa26ce 10284
7918f24d
NC
10285 PERL_ARGS_ASSERT_SCAN_STR;
10286
02aa26ce 10287 /* skip space before the delimiter */
29595ff2
NC
10288 if (isSPACE(*s)) {
10289 s = PEEKSPACE(s);
10290 }
02aa26ce 10291
5db06880 10292#ifdef PERL_MAD
cd81e915
NC
10293 if (PL_realtokenstart >= 0) {
10294 stuffstart = PL_realtokenstart;
10295 PL_realtokenstart = -1;
5db06880
NC
10296 }
10297 else
10298 stuffstart = start - SvPVX(PL_linestr);
10299#endif
02aa26ce 10300 /* mark where we are, in case we need to report errors */
79072805 10301 CLINE;
02aa26ce
NT
10302
10303 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 10304 term = *s;
220e2d4e
IH
10305 if (!UTF) {
10306 termcode = termstr[0] = term;
10307 termlen = 1;
10308 }
10309 else {
4b88fb76 10310 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
220e2d4e
IH
10311 Copy(s, termstr, termlen, U8);
10312 if (!UTF8_IS_INVARIANT(term))
10313 has_utf8 = TRUE;
10314 }
b1c7b182 10315
02aa26ce 10316 /* mark where we are */
57843af0 10317 PL_multi_start = CopLINE(PL_curcop);
3280af22 10318 PL_multi_open = term;
02aa26ce
NT
10319
10320 /* find corresponding closing delimiter */
93a17b20 10321 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
10322 termcode = termstr[0] = term = tmps[5];
10323
3280af22 10324 PL_multi_close = term;
79072805 10325
561b68a9
SH
10326 /* create a new SV to hold the contents. 79 is the SV's initial length.
10327 What a random number. */
7d0a29fe
NC
10328 sv = newSV_type(SVt_PVIV);
10329 SvGROW(sv, 80);
45977657 10330 SvIV_set(sv, termcode);
a0d0e21e 10331 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
10332
10333 /* move past delimiter and try to read a complete string */
09bef843 10334 if (keep_delims)
220e2d4e
IH
10335 sv_catpvn(sv, s, termlen);
10336 s += termlen;
5db06880
NC
10337#ifdef PERL_MAD
10338 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10339 if (!PL_thisopen && !keep_delims) {
10340 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
10341 stuffstart = s - SvPVX(PL_linestr);
10342 }
10343#endif
93a17b20 10344 for (;;) {
d24ca0c5 10345 if (PL_encoding && !UTF && !re_reparse) {
220e2d4e
IH
10346 bool cont = TRUE;
10347
10348 while (cont) {
95a20fc0 10349 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 10350 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 10351 &offset, (char*)termstr, termlen);
6136c704
AL
10352 const char * const ns = SvPVX_const(PL_linestr) + offset;
10353 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
10354
10355 for (; s < ns; s++) {
60d63348 10356 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10357 COPLINE_INC_WITH_HERELINES;
220e2d4e
IH
10358 }
10359 if (!found)
10360 goto read_more_line;
10361 else {
10362 /* handle quoted delimiters */
52327caf 10363 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 10364 const char *t;
95a20fc0 10365 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
10366 t--;
10367 if ((svlast-1 - t) % 2) {
10368 if (!keep_quoted) {
10369 *(svlast-1) = term;
10370 *svlast = '\0';
10371 SvCUR_set(sv, SvCUR(sv) - 1);
10372 }
10373 continue;
10374 }
10375 }
10376 if (PL_multi_open == PL_multi_close) {
10377 cont = FALSE;
10378 }
10379 else {
f54cb97a
AL
10380 const char *t;
10381 char *w;
0331ef07 10382 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
10383 /* At here, all closes are "was quoted" one,
10384 so we don't check PL_multi_close. */
10385 if (*t == '\\') {
10386 if (!keep_quoted && *(t+1) == PL_multi_open)
10387 t++;
10388 else
10389 *w++ = *t++;
10390 }
10391 else if (*t == PL_multi_open)
10392 brackets++;
10393
10394 *w = *t;
10395 }
10396 if (w < t) {
10397 *w++ = term;
10398 *w = '\0';
95a20fc0 10399 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 10400 }
0331ef07 10401 last_off = w - SvPVX(sv);
220e2d4e
IH
10402 if (--brackets <= 0)
10403 cont = FALSE;
10404 }
10405 }
10406 }
10407 if (!keep_delims) {
10408 SvCUR_set(sv, SvCUR(sv) - 1);
10409 *SvEND(sv) = '\0';
10410 }
10411 break;
10412 }
10413
02aa26ce 10414 /* extend sv if need be */
3280af22 10415 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 10416 /* set 'to' to the next character in the sv's string */
463ee0b2 10417 to = SvPVX(sv)+SvCUR(sv);
09bef843 10418
02aa26ce 10419 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
10420 if (PL_multi_open == PL_multi_close) {
10421 for (; s < PL_bufend; s++,to++) {
02aa26ce 10422 /* embedded newlines increment the current line number */
60d63348 10423 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10424 COPLINE_INC_WITH_HERELINES;
02aa26ce 10425 /* handle quoted delimiters */
3280af22 10426 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
d24ca0c5
DM
10427 if (!keep_quoted
10428 && (s[1] == term
10429 || (re_reparse && s[1] == '\\'))
10430 )
a0d0e21e 10431 s++;
d24ca0c5 10432 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10433 else
10434 *to++ = *s++;
10435 }
02aa26ce
NT
10436 /* terminate when run out of buffer (the for() condition), or
10437 have found the terminator */
220e2d4e
IH
10438 else if (*s == term) {
10439 if (termlen == 1)
10440 break;
f3b9ce0f 10441 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10442 break;
10443 }
63cd0674 10444 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10445 has_utf8 = TRUE;
93a17b20
LW
10446 *to = *s;
10447 }
10448 }
02aa26ce
NT
10449
10450 /* if the terminator isn't the same as the start character (e.g.,
10451 matched brackets), we have to allow more in the quoting, and
10452 be prepared for nested brackets.
10453 */
93a17b20 10454 else {
02aa26ce 10455 /* read until we run out of string, or we find the terminator */
3280af22 10456 for (; s < PL_bufend; s++,to++) {
02aa26ce 10457 /* embedded newlines increment the line count */
60d63348 10458 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10459 COPLINE_INC_WITH_HERELINES;
02aa26ce 10460 /* backslashes can escape the open or closing characters */
3280af22 10461 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10462 if (!keep_quoted &&
10463 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
10464 s++;
10465 else
10466 *to++ = *s++;
10467 }
02aa26ce 10468 /* allow nested opens and closes */
3280af22 10469 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10470 break;
3280af22 10471 else if (*s == PL_multi_open)
93a17b20 10472 brackets++;
63cd0674 10473 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10474 has_utf8 = TRUE;
93a17b20
LW
10475 *to = *s;
10476 }
10477 }
02aa26ce 10478 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10479 *to = '\0';
95a20fc0 10480 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10481
02aa26ce
NT
10482 /*
10483 * this next chunk reads more into the buffer if we're not done yet
10484 */
10485
b1c7b182
GS
10486 if (s < PL_bufend)
10487 break; /* handle case where we are done yet :-) */
79072805 10488
6a27c188 10489#ifndef PERL_STRICT_CR
95a20fc0 10490 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10491 if ((to[-2] == '\r' && to[-1] == '\n') ||
10492 (to[-2] == '\n' && to[-1] == '\r'))
10493 {
f63a84b2
LW
10494 to[-2] = '\n';
10495 to--;
95a20fc0 10496 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10497 }
10498 else if (to[-1] == '\r')
10499 to[-1] = '\n';
10500 }
95a20fc0 10501 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10502 to[-1] = '\n';
10503#endif
10504
220e2d4e 10505 read_more_line:
02aa26ce
NT
10506 /* if we're out of file, or a read fails, bail and reset the current
10507 line marker so we can report where the unterminated string began
10508 */
5db06880
NC
10509#ifdef PERL_MAD
10510 if (PL_madskills) {
c35e046a 10511 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10512 if (PL_thisstuff)
10513 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10514 else
cd81e915 10515 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10516 }
10517#endif
83944c01 10518 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
10519 PL_bufptr = PL_bufend;
10520 if (!lex_next_chunk(0)) {
c07a80fd 10521 sv_free(sv);
eb160463 10522 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10523 return NULL;
79072805 10524 }
f0e67a1d 10525 s = PL_bufptr;
5db06880
NC
10526#ifdef PERL_MAD
10527 stuffstart = 0;
10528#endif
378cc40b 10529 }
4e553d73 10530
02aa26ce
NT
10531 /* at this point, we have successfully read the delimited string */
10532
d24ca0c5 10533 if (!PL_encoding || UTF || re_reparse) {
5db06880
NC
10534#ifdef PERL_MAD
10535 if (PL_madskills) {
c35e046a 10536 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10537 const int len = s - tstart;
cd81e915 10538 if (PL_thisstuff)
c35e046a 10539 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10540 else
c35e046a 10541 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10542 if (!PL_thisclose && !keep_delims)
10543 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10544 }
10545#endif
10546
220e2d4e
IH
10547 if (keep_delims)
10548 sv_catpvn(sv, s, termlen);
10549 s += termlen;
10550 }
5db06880
NC
10551#ifdef PERL_MAD
10552 else {
10553 if (PL_madskills) {
c35e046a
AL
10554 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10555 const int len = s - tstart - termlen;
cd81e915 10556 if (PL_thisstuff)
c35e046a 10557 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10558 else
c35e046a 10559 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10560 if (!PL_thisclose && !keep_delims)
10561 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10562 }
10563 }
10564#endif
d24ca0c5 10565 if (has_utf8 || (PL_encoding && !re_reparse))
b1c7b182 10566 SvUTF8_on(sv);
d0063567 10567
57843af0 10568 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10569
10570 /* if we allocated too much space, give some back */
93a17b20
LW
10571 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10572 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10573 SvPV_renew(sv, SvLEN(sv));
79072805 10574 }
02aa26ce
NT
10575
10576 /* decide whether this is the first or second quoted string we've read
10577 for this op
10578 */
4e553d73 10579
3280af22 10580 if (PL_lex_stuff)
7cc34111 10581 PL_sublex_info.repl = sv;
79072805 10582 else
3280af22 10583 PL_lex_stuff = sv;
378cc40b
LW
10584 return s;
10585}
10586
02aa26ce
NT
10587/*
10588 scan_num
10589 takes: pointer to position in buffer
10590 returns: pointer to new position in buffer
6154021b 10591 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10592
10593 Read a number in any of the formats that Perl accepts:
10594
7fd134d9
JH
10595 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10596 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10597 0b[01](_?[01])*
10598 0[0-7](_?[0-7])*
10599 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10600
3280af22 10601 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10602 thing it reads.
10603
10604 If it reads a number without a decimal point or an exponent, it will
10605 try converting the number to an integer and see if it can do so
10606 without loss of precision.
10607*/
4e553d73 10608
378cc40b 10609char *
bfed75c6 10610Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10611{
97aff369 10612 dVAR;
eb578fdb
KW
10613 const char *s = start; /* current position in buffer */
10614 char *d; /* destination in temp buffer */
10615 char *e; /* end of temp buffer */
86554af2 10616 NV nv; /* number read, as a double */
a0714e2c 10617 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10618 bool floatit; /* boolean: int or float? */
cbbf8932 10619 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10620 static char const number_too_long[] = "Number too long";
378cc40b 10621
7918f24d
NC
10622 PERL_ARGS_ASSERT_SCAN_NUM;
10623
02aa26ce
NT
10624 /* We use the first character to decide what type of number this is */
10625
378cc40b 10626 switch (*s) {
79072805 10627 default:
5637ef5b 10628 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10629
02aa26ce 10630 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10631 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10632 case '0':
10633 {
02aa26ce
NT
10634 /* variables:
10635 u holds the "number so far"
4f19785b
WSI
10636 shift the power of 2 of the base
10637 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10638 overflowed was the number more than we can hold?
10639
10640 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10641 we in octal/hex/binary?" indicator to disallow hex characters
10642 when in octal mode.
02aa26ce 10643 */
9e24b6e2
JH
10644 NV n = 0.0;
10645 UV u = 0;
79072805 10646 I32 shift;
9e24b6e2 10647 bool overflowed = FALSE;
61f33854 10648 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10649 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10650 static const char* const bases[5] =
10651 { "", "binary", "", "octal", "hexadecimal" };
10652 static const char* const Bases[5] =
10653 { "", "Binary", "", "Octal", "Hexadecimal" };
10654 static const char* const maxima[5] =
10655 { "",
10656 "0b11111111111111111111111111111111",
10657 "",
10658 "037777777777",
10659 "0xffffffff" };
bfed75c6 10660 const char *base, *Base, *max;
378cc40b 10661
02aa26ce 10662 /* check for hex */
a674e8db 10663 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10664 shift = 4;
10665 s += 2;
61f33854 10666 just_zero = FALSE;
a674e8db 10667 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10668 shift = 1;
10669 s += 2;
61f33854 10670 just_zero = FALSE;
378cc40b 10671 }
02aa26ce 10672 /* check for a decimal in disguise */
b78218b7 10673 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10674 goto decimal;
02aa26ce 10675 /* so it must be octal */
928753ea 10676 else {
378cc40b 10677 shift = 3;
928753ea
JH
10678 s++;
10679 }
10680
10681 if (*s == '_') {
a2a5de95 10682 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10683 "Misplaced _ in number");
10684 lastub = s++;
10685 }
9e24b6e2
JH
10686
10687 base = bases[shift];
10688 Base = Bases[shift];
10689 max = maxima[shift];
02aa26ce 10690
4f19785b 10691 /* read the rest of the number */
378cc40b 10692 for (;;) {
9e24b6e2 10693 /* x is used in the overflow test,
893fe2c2 10694 b is the digit we're adding on. */
9e24b6e2 10695 UV x, b;
55497cff 10696
378cc40b 10697 switch (*s) {
02aa26ce
NT
10698
10699 /* if we don't mention it, we're done */
378cc40b
LW
10700 default:
10701 goto out;
02aa26ce 10702
928753ea 10703 /* _ are ignored -- but warned about if consecutive */
de3bb511 10704 case '_':
a2a5de95
NC
10705 if (lastub && s == lastub + 1)
10706 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10707 "Misplaced _ in number");
928753ea 10708 lastub = s++;
de3bb511 10709 break;
02aa26ce
NT
10710
10711 /* 8 and 9 are not octal */
378cc40b 10712 case '8': case '9':
4f19785b 10713 if (shift == 3)
cea2e8a9 10714 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10715 /* FALL THROUGH */
02aa26ce
NT
10716
10717 /* octal digits */
4f19785b 10718 case '2': case '3': case '4':
378cc40b 10719 case '5': case '6': case '7':
4f19785b 10720 if (shift == 1)
cea2e8a9 10721 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10722 /* FALL THROUGH */
10723
10724 case '0': case '1':
02aa26ce 10725 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10726 goto digit;
02aa26ce
NT
10727
10728 /* hex digits */
378cc40b
LW
10729 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10730 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10731 /* make sure they said 0x */
378cc40b
LW
10732 if (shift != 4)
10733 goto out;
55497cff 10734 b = (*s++ & 7) + 9;
02aa26ce
NT
10735
10736 /* Prepare to put the digit we have onto the end
10737 of the number so far. We check for overflows.
10738 */
10739
55497cff 10740 digit:
61f33854 10741 just_zero = FALSE;
9e24b6e2
JH
10742 if (!overflowed) {
10743 x = u << shift; /* make room for the digit */
10744
10745 if ((x >> shift) != u
10746 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10747 overflowed = TRUE;
10748 n = (NV) u;
9b387841
NC
10749 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10750 "Integer overflow in %s number",
10751 base);
9e24b6e2
JH
10752 } else
10753 u = x | b; /* add the digit to the end */
10754 }
10755 if (overflowed) {
10756 n *= nvshift[shift];
10757 /* If an NV has not enough bits in its
10758 * mantissa to represent an UV this summing of
10759 * small low-order numbers is a waste of time
10760 * (because the NV cannot preserve the
10761 * low-order bits anyway): we could just
10762 * remember when did we overflow and in the
10763 * end just multiply n by the right
10764 * amount. */
10765 n += (NV) b;
55497cff 10766 }
378cc40b
LW
10767 break;
10768 }
10769 }
02aa26ce
NT
10770
10771 /* if we get here, we had success: make a scalar value from
10772 the number.
10773 */
378cc40b 10774 out:
928753ea
JH
10775
10776 /* final misplaced underbar check */
10777 if (s[-1] == '_') {
a2a5de95 10778 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10779 }
10780
9e24b6e2 10781 if (overflowed) {
a2a5de95
NC
10782 if (n > 4294967295.0)
10783 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10784 "%s number > %s non-portable",
10785 Base, max);
b081dd7e 10786 sv = newSVnv(n);
9e24b6e2
JH
10787 }
10788 else {
15041a67 10789#if UVSIZE > 4
a2a5de95
NC
10790 if (u > 0xffffffff)
10791 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10792 "%s number > %s non-portable",
10793 Base, max);
2cc4c2dc 10794#endif
b081dd7e 10795 sv = newSVuv(u);
9e24b6e2 10796 }
61f33854 10797 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10798 sv = new_constant(start, s - start, "integer",
eb0d8d16 10799 sv, NULL, NULL, 0);
61f33854 10800 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10801 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10802 }
10803 break;
02aa26ce
NT
10804
10805 /*
10806 handle decimal numbers.
10807 we're also sent here when we read a 0 as the first digit
10808 */
378cc40b
LW
10809 case '1': case '2': case '3': case '4': case '5':
10810 case '6': case '7': case '8': case '9': case '.':
10811 decimal:
3280af22
NIS
10812 d = PL_tokenbuf;
10813 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10814 floatit = FALSE;
02aa26ce
NT
10815
10816 /* read next group of digits and _ and copy into d */
de3bb511 10817 while (isDIGIT(*s) || *s == '_') {
4e553d73 10818 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10819 if -w is on
10820 */
93a17b20 10821 if (*s == '_') {
a2a5de95
NC
10822 if (lastub && s == lastub + 1)
10823 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10824 "Misplaced _ in number");
928753ea 10825 lastub = s++;
93a17b20 10826 }
fc36a67e 10827 else {
02aa26ce 10828 /* check for end of fixed-length buffer */
fc36a67e 10829 if (d >= e)
cea2e8a9 10830 Perl_croak(aTHX_ number_too_long);
02aa26ce 10831 /* if we're ok, copy the character */
378cc40b 10832 *d++ = *s++;
fc36a67e 10833 }
378cc40b 10834 }
02aa26ce
NT
10835
10836 /* final misplaced underbar check */
928753ea 10837 if (lastub && s == lastub + 1) {
a2a5de95 10838 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10839 }
02aa26ce
NT
10840
10841 /* read a decimal portion if there is one. avoid
10842 3..5 being interpreted as the number 3. followed
10843 by .5
10844 */
2f3197b3 10845 if (*s == '.' && s[1] != '.') {
79072805 10846 floatit = TRUE;
378cc40b 10847 *d++ = *s++;
02aa26ce 10848
928753ea 10849 if (*s == '_') {
a2a5de95
NC
10850 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10851 "Misplaced _ in number");
928753ea
JH
10852 lastub = s;
10853 }
10854
10855 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10856 */
fc36a67e 10857 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10858 /* fixed length buffer check */
fc36a67e 10859 if (d >= e)
cea2e8a9 10860 Perl_croak(aTHX_ number_too_long);
928753ea 10861 if (*s == '_') {
a2a5de95
NC
10862 if (lastub && s == lastub + 1)
10863 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10864 "Misplaced _ in number");
928753ea
JH
10865 lastub = s;
10866 }
10867 else
fc36a67e 10868 *d++ = *s;
378cc40b 10869 }
928753ea
JH
10870 /* fractional part ending in underbar? */
10871 if (s[-1] == '_') {
a2a5de95
NC
10872 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10873 "Misplaced _ in number");
928753ea 10874 }
dd629d5b
GS
10875 if (*s == '.' && isDIGIT(s[1])) {
10876 /* oops, it's really a v-string, but without the "v" */
f4758303 10877 s = start;
dd629d5b
GS
10878 goto vstring;
10879 }
378cc40b 10880 }
02aa26ce
NT
10881
10882 /* read exponent part, if present */
3792a11b 10883 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10884 floatit = TRUE;
10885 s++;
02aa26ce
NT
10886
10887 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10888 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10889
7fd134d9
JH
10890 /* stray preinitial _ */
10891 if (*s == '_') {
a2a5de95
NC
10892 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10893 "Misplaced _ in number");
7fd134d9
JH
10894 lastub = s++;
10895 }
10896
02aa26ce 10897 /* allow positive or negative exponent */
378cc40b
LW
10898 if (*s == '+' || *s == '-')
10899 *d++ = *s++;
02aa26ce 10900
7fd134d9
JH
10901 /* stray initial _ */
10902 if (*s == '_') {
a2a5de95
NC
10903 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10904 "Misplaced _ in number");
7fd134d9
JH
10905 lastub = s++;
10906 }
10907
7fd134d9
JH
10908 /* read digits of exponent */
10909 while (isDIGIT(*s) || *s == '_') {
10910 if (isDIGIT(*s)) {
10911 if (d >= e)
10912 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10913 *d++ = *s++;
7fd134d9
JH
10914 }
10915 else {
041457d9 10916 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10917 (!isDIGIT(s[1]) && s[1] != '_')))
10918 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10919 "Misplaced _ in number");
b3b48e3e 10920 lastub = s++;
7fd134d9 10921 }
7fd134d9 10922 }
378cc40b 10923 }
02aa26ce 10924
02aa26ce 10925
0b7fceb9 10926 /*
58bb9ec3
NC
10927 We try to do an integer conversion first if no characters
10928 indicating "float" have been found.
0b7fceb9
MU
10929 */
10930
10931 if (!floatit) {
58bb9ec3 10932 UV uv;
6136c704 10933 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10934
10935 if (flags == IS_NUMBER_IN_UV) {
10936 if (uv <= IV_MAX)
b081dd7e 10937 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10938 else
b081dd7e 10939 sv = newSVuv(uv);
58bb9ec3
NC
10940 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10941 if (uv <= (UV) IV_MIN)
b081dd7e 10942 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10943 else
10944 floatit = TRUE;
10945 } else
10946 floatit = TRUE;
10947 }
0b7fceb9 10948 if (floatit) {
58bb9ec3
NC
10949 /* terminate the string */
10950 *d = '\0';
86554af2 10951 nv = Atof(PL_tokenbuf);
b081dd7e 10952 sv = newSVnv(nv);
86554af2 10953 }
86554af2 10954
eb0d8d16
NC
10955 if ( floatit
10956 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10957 const char *const key = floatit ? "float" : "integer";
10958 const STRLEN keylen = floatit ? 5 : 7;
10959 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10960 key, keylen, sv, NULL, NULL, 0);
10961 }
378cc40b 10962 break;
0b7fceb9 10963
e312add1 10964 /* if it starts with a v, it could be a v-string */
a7cb1f99 10965 case 'v':
dd629d5b 10966vstring:
561b68a9 10967 sv = newSV(5); /* preallocate storage space */
65b06e02 10968 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10969 break;
79072805 10970 }
a687059c 10971
02aa26ce
NT
10972 /* make the op for the constant and return */
10973
a86a20aa 10974 if (sv)
b73d6f50 10975 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10976 else
5f66b61c 10977 lvalp->opval = NULL;
a687059c 10978
73d840c0 10979 return (char *)s;
378cc40b
LW
10980}
10981
76e3520e 10982STATIC char *
cea2e8a9 10983S_scan_formline(pTHX_ register char *s)
378cc40b 10984{
97aff369 10985 dVAR;
eb578fdb
KW
10986 char *eol;
10987 char *t;
6136c704 10988 SV * const stuff = newSVpvs("");
79072805 10989 bool needargs = FALSE;
c5ee2135 10990 bool eofmt = FALSE;
5db06880
NC
10991#ifdef PERL_MAD
10992 char *tokenstart = s;
4f61fd4b
JC
10993 SV* savewhite = NULL;
10994
5db06880 10995 if (PL_madskills) {
cd81e915
NC
10996 savewhite = PL_thiswhite;
10997 PL_thiswhite = 0;
5db06880
NC
10998 }
10999#endif
378cc40b 11000
7918f24d
NC
11001 PERL_ARGS_ASSERT_SCAN_FORMLINE;
11002
79072805 11003 while (!needargs) {
a1b95068 11004 if (*s == '.') {
c35e046a 11005 t = s+1;
51882d45 11006#ifdef PERL_STRICT_CR
c35e046a
AL
11007 while (SPACE_OR_TAB(*t))
11008 t++;
51882d45 11009#else
c35e046a
AL
11010 while (SPACE_OR_TAB(*t) || *t == '\r')
11011 t++;
51882d45 11012#endif
c5ee2135
WL
11013 if (*t == '\n' || t == PL_bufend) {
11014 eofmt = TRUE;
79072805 11015 break;
c5ee2135 11016 }
79072805 11017 }
583c9d5c
FC
11018 eol = (char *) memchr(s,'\n',PL_bufend-s);
11019 if (!eol++)
3280af22 11020 eol = PL_bufend;
79072805 11021 if (*s != '#') {
a0d0e21e
LW
11022 for (t = s; t < eol; t++) {
11023 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
11024 needargs = FALSE;
11025 goto enough; /* ~~ must be first line in formline */
378cc40b 11026 }
a0d0e21e
LW
11027 if (*t == '@' || *t == '^')
11028 needargs = TRUE;
378cc40b 11029 }
7121b347
MG
11030 if (eol > s) {
11031 sv_catpvn(stuff, s, eol-s);
2dc4c65b 11032#ifndef PERL_STRICT_CR
7121b347
MG
11033 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
11034 char *end = SvPVX(stuff) + SvCUR(stuff);
11035 end[-2] = '\n';
11036 end[-1] = '\0';
b162af07 11037 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 11038 }
2dc4c65b 11039#endif
7121b347
MG
11040 }
11041 else
11042 break;
79072805 11043 }
95a20fc0 11044 s = (char*)eol;
583c9d5c
FC
11045 if ((PL_rsfp || PL_parser->filtered)
11046 && PL_parser->form_lex_state == LEX_NORMAL) {
f0e67a1d 11047 bool got_some;
5db06880
NC
11048#ifdef PERL_MAD
11049 if (PL_madskills) {
cd81e915
NC
11050 if (PL_thistoken)
11051 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 11052 else
cd81e915 11053 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
11054 }
11055#endif
f0e67a1d 11056 PL_bufptr = PL_bufend;
83944c01 11057 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
11058 got_some = lex_next_chunk(0);
11059 CopLINE_dec(PL_curcop);
11060 s = PL_bufptr;
5db06880 11061#ifdef PERL_MAD
f0e67a1d 11062 tokenstart = PL_bufptr;
5db06880 11063#endif
f0e67a1d 11064 if (!got_some)
378cc40b 11065 break;
378cc40b 11066 }
463ee0b2 11067 incline(s);
79072805 11068 }
a0d0e21e 11069 enough:
5c9ae74d
FC
11070 if (!SvCUR(stuff) || needargs)
11071 PL_lex_state = PL_parser->form_lex_state;
a0d0e21e 11072 if (SvCUR(stuff)) {
705fe0e5 11073 PL_expect = XSTATE;
79072805 11074 if (needargs) {
cd81e915 11075 start_force(PL_curforce);
9ded7720 11076 NEXTVAL_NEXTTOKE.ival = 0;
705fe0e5 11077 force_next(FORMLBRACK);
79072805 11078 }
1bd51a4c 11079 if (!IN_BYTES) {
95a20fc0 11080 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
11081 SvUTF8_on(stuff);
11082 else if (PL_encoding)
11083 sv_recode_to_utf8(stuff, PL_encoding);
11084 }
cd81e915 11085 start_force(PL_curforce);
9ded7720 11086 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 11087 force_next(THING);
378cc40b 11088 }
79072805 11089 else {
8990e307 11090 SvREFCNT_dec(stuff);
c5ee2135
WL
11091 if (eofmt)
11092 PL_lex_formbrack = 0;
79072805 11093 }
5db06880
NC
11094#ifdef PERL_MAD
11095 if (PL_madskills) {
cd81e915
NC
11096 if (PL_thistoken)
11097 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 11098 else
cd81e915
NC
11099 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
11100 PL_thiswhite = savewhite;
5db06880
NC
11101 }
11102#endif
79072805 11103 return s;
378cc40b 11104}
a687059c 11105
ba6d6ac9 11106I32
864dbfa3 11107Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 11108{
97aff369 11109 dVAR;
a3b680e6 11110 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 11111 CV* const outsidecv = PL_compcv;
8990e307 11112
7766f137 11113 SAVEI32(PL_subline);
3280af22 11114 save_item(PL_subname);
3280af22 11115 SAVESPTR(PL_compcv);
3280af22 11116
ea726b52 11117 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
11118 CvFLAGS(PL_compcv) |= flags;
11119
57843af0 11120 PL_subline = CopLINE(PL_curcop);
dd2155a4 11121 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 11122 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 11123 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
db4cf31d 11124 if (outsidecv && CvPADLIST(outsidecv))
8771da69
FC
11125 CvPADLIST(PL_compcv)->xpadl_outid =
11126 PadlistNAMES(CvPADLIST(outsidecv));
748a9306 11127
8990e307
LW
11128 return oldsavestack_ix;
11129}
11130
084592ab
CN
11131#ifdef __SC__
11132#pragma segment Perl_yylex
11133#endif
af41e527 11134static int
19c62481 11135S_yywarn(pTHX_ const char *const s, U32 flags)
8990e307 11136{
97aff369 11137 dVAR;
7918f24d
NC
11138
11139 PERL_ARGS_ASSERT_YYWARN;
11140
faef0170 11141 PL_in_eval |= EVAL_WARNONLY;
19c62481 11142 yyerror_pv(s, flags);
faef0170 11143 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 11144 return 0;
8990e307
LW
11145}
11146
11147int
15f169a1 11148Perl_yyerror(pTHX_ const char *const s)
463ee0b2 11149{
19c62481
BF
11150 PERL_ARGS_ASSERT_YYERROR;
11151 return yyerror_pvn(s, strlen(s), 0);
11152}
11153
11154int
11155Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
11156{
11157 PERL_ARGS_ASSERT_YYERROR_PV;
11158 return yyerror_pvn(s, strlen(s), flags);
11159}
11160
11161int
19c62481
BF
11162Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
11163{
97aff369 11164 dVAR;
bfed75c6 11165 const char *context = NULL;
68dc0745 11166 int contlen = -1;
46fc3d4c 11167 SV *msg;
19c62481 11168 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
5912531f 11169 int yychar = PL_parser->yychar;
463ee0b2 11170
19c62481 11171 PERL_ARGS_ASSERT_YYERROR_PVN;
7918f24d 11172
3280af22 11173 if (!yychar || (yychar == ';' && !PL_rsfp))
19c62481 11174 sv_catpvs(where_sv, "at EOF");
8bcfe651
TM
11175 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
11176 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
11177 PL_oldbufptr != PL_bufptr) {
f355267c
JH
11178 /*
11179 Only for NetWare:
11180 The code below is removed for NetWare because it abends/crashes on NetWare
11181 when the script has error such as not having the closing quotes like:
11182 if ($var eq "value)
11183 Checking of white spaces is anyway done in NetWare code.
11184 */
11185#ifndef NETWARE
3280af22
NIS
11186 while (isSPACE(*PL_oldoldbufptr))
11187 PL_oldoldbufptr++;
f355267c 11188#endif
3280af22
NIS
11189 context = PL_oldoldbufptr;
11190 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 11191 }
8bcfe651
TM
11192 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11193 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
11194 /*
11195 Only for NetWare:
11196 The code below is removed for NetWare because it abends/crashes on NetWare
11197 when the script has error such as not having the closing quotes like:
11198 if ($var eq "value)
11199 Checking of white spaces is anyway done in NetWare code.
11200 */
11201#ifndef NETWARE
3280af22
NIS
11202 while (isSPACE(*PL_oldbufptr))
11203 PL_oldbufptr++;
f355267c 11204#endif
3280af22
NIS
11205 context = PL_oldbufptr;
11206 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
11207 }
11208 else if (yychar > 255)
19c62481 11209 sv_catpvs(where_sv, "next token ???");
12fbd33b 11210 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
11211 if (PL_lex_state == LEX_NORMAL ||
11212 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
19c62481 11213 sv_catpvs(where_sv, "at end of line");
3280af22 11214 else if (PL_lex_inpat)
19c62481 11215 sv_catpvs(where_sv, "within pattern");
463ee0b2 11216 else
19c62481 11217 sv_catpvs(where_sv, "within string");
463ee0b2 11218 }
46fc3d4c 11219 else {
19c62481 11220 sv_catpvs(where_sv, "next char ");
46fc3d4c 11221 if (yychar < 32)
cea2e8a9 11222 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 11223 else if (isPRINT_LC(yychar)) {
88c9ea1e 11224 const char string = yychar;
5e7aa789
NC
11225 sv_catpvn(where_sv, &string, 1);
11226 }
463ee0b2 11227 else
cea2e8a9 11228 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
463ee0b2 11229 }
b604e366 11230 msg = newSVpvn_flags(s, len, (flags & SVf_UTF8) | SVs_TEMP);
ed094faf 11231 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 11232 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 11233 if (context)
19c62481
BF
11234 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11235 SVfARG(newSVpvn_flags(context, contlen,
11236 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
463ee0b2 11237 else
19c62481 11238 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
57843af0 11239 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 11240 Perl_sv_catpvf(aTHX_ msg,
57def98f 11241 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 11242 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 11243 PL_multi_end = 0;
a0d0e21e 11244 }
500960a6 11245 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 11246 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 11247 }
463ee0b2 11248 else
5a844595 11249 qerror(msg);
c7d6bfb2
GS
11250 if (PL_error_count >= 10) {
11251 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 11252 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 11253 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
11254 else
11255 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 11256 OutCopFILE(PL_curcop));
c7d6bfb2 11257 }
3280af22 11258 PL_in_my = 0;
5c284bb0 11259 PL_in_my_stash = NULL;
463ee0b2
LW
11260 return 0;
11261}
084592ab
CN
11262#ifdef __SC__
11263#pragma segment Main
11264#endif
4e35701f 11265
b250498f 11266STATIC char*
3ae08724 11267S_swallow_bom(pTHX_ U8 *s)
01ec43d0 11268{
97aff369 11269 dVAR;
f54cb97a 11270 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
11271
11272 PERL_ARGS_ASSERT_SWALLOW_BOM;
11273
7aa207d6 11274 switch (s[0]) {
4e553d73
NIS
11275 case 0xFF:
11276 if (s[1] == 0xFE) {
ee6ba15d 11277 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 11278 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 11279 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11280 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 11281#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 11282 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 11283 s += 2;
dea0fc0b 11284 if (PL_bufend > (char*)s) {
81a923f4 11285 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 11286 }
b250498f 11287#else
dcbac5bb 11288 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11289 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 11290#endif
01ec43d0
GS
11291 }
11292 break;
78ae23f5 11293 case 0xFE:
7aa207d6 11294 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 11295#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11296 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
11297 s += 2;
11298 if (PL_bufend > (char *)s) {
81a923f4 11299 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 11300 }
b250498f 11301#else
dcbac5bb 11302 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11303 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 11304#endif
01ec43d0
GS
11305 }
11306 break;
3ae08724
GS
11307 case 0xEF:
11308 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 11309 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
11310 s += 3; /* UTF-8 */
11311 }
11312 break;
11313 case 0:
7aa207d6
JH
11314 if (slen > 3) {
11315 if (s[1] == 0) {
11316 if (s[2] == 0xFE && s[3] == 0xFF) {
11317 /* UTF-32 big-endian */
dcbac5bb 11318 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11319 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
11320 }
11321 }
11322 else if (s[2] == 0 && s[3] != 0) {
11323 /* Leading bytes
11324 * 00 xx 00 xx
11325 * are a good indicator of UTF-16BE. */
ee6ba15d 11326#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11327 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
11328 s = add_utf16_textfilter(s, FALSE);
11329#else
dcbac5bb 11330 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11331 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11332#endif
7aa207d6 11333 }
01ec43d0 11334 }
e294cc5d
JH
11335#ifdef EBCDIC
11336 case 0xDD:
11337 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11338 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11339 s += 4; /* UTF-8 */
11340 }
11341 break;
11342#endif
11343
7aa207d6
JH
11344 default:
11345 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11346 /* Leading bytes
11347 * xx 00 xx 00
11348 * are a good indicator of UTF-16LE. */
ee6ba15d 11349#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11350 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 11351 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 11352#else
dcbac5bb 11353 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11354 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11355#endif
7aa207d6 11356 }
01ec43d0 11357 }
b8f84bb2 11358 return (char*)s;
b250498f 11359}
4755096e 11360
6e3aabd6
GS
11361
11362#ifndef PERL_NO_UTF16_FILTER
11363static I32
a28af015 11364S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 11365{
97aff369 11366 dVAR;
f3040f2c 11367 SV *const filter = FILTER_DATA(idx);
2a773401
NC
11368 /* We re-use this each time round, throwing the contents away before we
11369 return. */
2a773401 11370 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 11371 SV *const utf8_buffer = filter;
c28d6105 11372 IV status = IoPAGE(filter);
f2338a2e 11373 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 11374 I32 retval;
c8b0cbae 11375
c85ae797
NC
11376 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11377
c8b0cbae
NC
11378 /* As we're automatically added, at the lowest level, and hence only called
11379 from this file, we can be sure that we're not called in block mode. Hence
11380 don't bother writing code to deal with block mode. */
11381 if (maxlen) {
11382 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11383 }
c28d6105
NC
11384 if (status < 0) {
11385 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11386 }
1de9afcd 11387 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 11388 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 11389 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
11390 reverse ? 'l' : 'b', idx, maxlen, status,
11391 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11392
11393 while (1) {
11394 STRLEN chars;
11395 STRLEN have;
dea0fc0b 11396 I32 newlen;
2a773401 11397 U8 *end;
c28d6105
NC
11398 /* First, look in our buffer of existing UTF-8 data: */
11399 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11400
11401 if (nl) {
11402 ++nl;
11403 } else if (status == 0) {
11404 /* EOF */
11405 IoPAGE(filter) = 0;
11406 nl = SvEND(utf8_buffer);
11407 }
11408 if (nl) {
d2d1d4de
NC
11409 STRLEN got = nl - SvPVX(utf8_buffer);
11410 /* Did we have anything to append? */
11411 retval = got != 0;
11412 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
11413 /* Everything else in this code works just fine if SVp_POK isn't
11414 set. This, however, needs it, and we need it to work, else
11415 we loop infinitely because the buffer is never consumed. */
11416 sv_chop(utf8_buffer, nl);
11417 break;
11418 }
ba77e4cc 11419
c28d6105
NC
11420 /* OK, not a complete line there, so need to read some more UTF-16.
11421 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
11422 while (1) {
11423 if (status <= 0)
11424 break;
11425 if (SvCUR(utf16_buffer) >= 2) {
11426 /* Location of the high octet of the last complete code point.
11427 Gosh, UTF-16 is a pain. All the benefits of variable length,
11428 *coupled* with all the benefits of partial reads and
11429 endianness. */
11430 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11431 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11432
11433 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11434 break;
11435 }
11436
11437 /* We have the first half of a surrogate. Read more. */
11438 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11439 }
c28d6105 11440
c28d6105
NC
11441 status = FILTER_READ(idx + 1, utf16_buffer,
11442 160 + (SvCUR(utf16_buffer) & 1));
11443 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11444 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11445 if (status < 0) {
11446 /* Error */
11447 IoPAGE(filter) = status;
11448 return status;
11449 }
11450 }
11451
11452 chars = SvCUR(utf16_buffer) >> 1;
11453 have = SvCUR(utf8_buffer);
11454 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11455
aa6dbd60 11456 if (reverse) {
c28d6105
NC
11457 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11458 (U8*)SvPVX_const(utf8_buffer) + have,
11459 chars * 2, &newlen);
aa6dbd60 11460 } else {
2a773401 11461 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11462 (U8*)SvPVX_const(utf8_buffer) + have,
11463 chars * 2, &newlen);
2a773401 11464 }
c28d6105 11465 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11466 *end = '\0';
c28d6105 11467
e07286ed
NC
11468 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11469 it's private to us, and utf16_to_utf8{,reversed} take a
11470 (pointer,length) pair, rather than a NUL-terminated string. */
11471 if(SvCUR(utf16_buffer) & 1) {
11472 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11473 SvCUR_set(utf16_buffer, 1);
11474 } else {
11475 SvCUR_set(utf16_buffer, 0);
11476 }
2a773401 11477 }
c28d6105
NC
11478 DEBUG_P(PerlIO_printf(Perl_debug_log,
11479 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11480 status,
11481 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11482 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11483 return retval;
6e3aabd6 11484}
81a923f4
NC
11485
11486static U8 *
11487S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11488{
2a773401 11489 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11490
c85ae797
NC
11491 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11492
c28d6105 11493 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11494 sv_setpvs(filter, "");
2a773401 11495 IoLINES(filter) = reversed;
c28d6105
NC
11496 IoPAGE(filter) = 1; /* Not EOF */
11497
11498 /* Sadly, we have to return a valid pointer, come what may, so we have to
11499 ignore any error return from this. */
11500 SvCUR_set(PL_linestr, 0);
11501 if (FILTER_READ(0, PL_linestr, 0)) {
11502 SvUTF8_on(PL_linestr);
81a923f4 11503 } else {
c28d6105 11504 SvUTF8_on(PL_linestr);
81a923f4 11505 }
c28d6105 11506 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11507 return (U8*)SvPVX(PL_linestr);
11508}
6e3aabd6 11509#endif
9f4817db 11510
f333445c
JP
11511/*
11512Returns a pointer to the next character after the parsed
11513vstring, as well as updating the passed in sv.
11514
11515Function must be called like
11516
561b68a9 11517 sv = newSV(5);
65b06e02 11518 s = scan_vstring(s,e,sv);
f333445c 11519
65b06e02 11520where s and e are the start and end of the string.
f333445c
JP
11521The sv should already be large enough to store the vstring
11522passed in, for performance reasons.
11523
11524*/
11525
11526char *
15f169a1 11527Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11528{
97aff369 11529 dVAR;
bfed75c6
AL
11530 const char *pos = s;
11531 const char *start = s;
7918f24d
NC
11532
11533 PERL_ARGS_ASSERT_SCAN_VSTRING;
11534
f333445c 11535 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11536 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11537 pos++;
f333445c
JP
11538 if ( *pos != '.') {
11539 /* this may not be a v-string if followed by => */
bfed75c6 11540 const char *next = pos;
65b06e02 11541 while (next < e && isSPACE(*next))
8fc7bb1c 11542 ++next;
65b06e02 11543 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11544 /* return string not v-string */
11545 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11546 return (char *)pos;
f333445c
JP
11547 }
11548 }
11549
11550 if (!isALPHA(*pos)) {
89ebb4a3 11551 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11552
d4c19fe8
AL
11553 if (*s == 'v')
11554 s++; /* get past 'v' */
f333445c 11555
76f68e9b 11556 sv_setpvs(sv, "");
f333445c
JP
11557
11558 for (;;) {
d4c19fe8 11559 /* this is atoi() that tolerates underscores */
0bd48802
AL
11560 U8 *tmpend;
11561 UV rev = 0;
d4c19fe8
AL
11562 const char *end = pos;
11563 UV mult = 1;
11564 while (--end >= s) {
11565 if (*end != '_') {
11566 const UV orev = rev;
f333445c
JP
11567 rev += (*end - '0') * mult;
11568 mult *= 10;
9b387841 11569 if (orev > rev)
dcbac5bb 11570 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11571 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11572 "Integer overflow in decimal number");
f333445c
JP
11573 }
11574 }
11575#ifdef EBCDIC
11576 if (rev > 0x7FFFFFFF)
11577 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11578#endif
11579 /* Append native character for the rev point */
11580 tmpend = uvchr_to_utf8(tmpbuf, rev);
11581 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11582 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11583 SvUTF8_on(sv);
65b06e02 11584 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11585 s = ++pos;
11586 else {
11587 s = pos;
11588 break;
11589 }
65b06e02 11590 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11591 pos++;
11592 }
11593 SvPOK_on(sv);
11594 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11595 SvRMAGICAL_on(sv);
11596 }
73d840c0 11597 return (char *)s;
f333445c
JP
11598}
11599
88e1f1a2
JV
11600int
11601Perl_keyword_plugin_standard(pTHX_
11602 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11603{
11604 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11605 PERL_UNUSED_CONTEXT;
11606 PERL_UNUSED_ARG(keyword_ptr);
11607 PERL_UNUSED_ARG(keyword_len);
11608 PERL_UNUSED_ARG(op_ptr);
11609 return KEYWORD_PLUGIN_DECLINE;
11610}
11611
78cdf107 11612#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11613static void
78cdf107 11614S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11615{
11616 SAVEI32(PL_lex_brackets);
11617 if (PL_lex_brackets > 100)
11618 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11619 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11620 SAVEI32(PL_lex_allbrackets);
11621 PL_lex_allbrackets = 0;
11622 SAVEI8(PL_lex_fakeeof);
2dcac756 11623 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11624 if(yyparse(gramtype) && !PL_parser->error_count)
11625 qerror(Perl_mess(aTHX_ "Parse error"));
11626}
11627
78cdf107 11628#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11629static OP *
78cdf107 11630S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11631{
11632 OP *o;
11633 ENTER;
11634 SAVEVPTR(PL_eval_root);
11635 PL_eval_root = NULL;
78cdf107 11636 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11637 o = PL_eval_root;
11638 LEAVE;
11639 return o;
11640}
11641
78cdf107
Z
11642#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11643static OP *
11644S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11645{
11646 OP *exprop;
11647 if (flags & ~PARSE_OPTIONAL)
11648 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11649 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11650 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11651 if (!PL_parser->error_count)
11652 qerror(Perl_mess(aTHX_ "Parse error"));
11653 exprop = newOP(OP_NULL, 0);
11654 }
11655 return exprop;
11656}
11657
11658/*
11659=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11660
11661Parse a Perl arithmetic expression. This may contain operators of precedence
11662down to the bit shift operators. The expression must be followed (and thus
11663terminated) either by a comparison or lower-precedence operator or by
11664something that would normally terminate an expression such as semicolon.
11665If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11666otherwise it is mandatory. It is up to the caller to ensure that the
11667dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11668the source of the code to be parsed and the lexical context for the
11669expression.
11670
11671The op tree representing the expression is returned. If an optional
11672expression is absent, a null pointer is returned, otherwise the pointer
11673will be non-null.
11674
11675If an error occurs in parsing or compilation, in most cases a valid op
11676tree is returned anyway. The error is reflected in the parser state,
11677normally resulting in a single exception at the top level of parsing
11678which covers all the compilation errors that occurred. Some compilation
11679errors, however, will throw an exception immediately.
11680
11681=cut
11682*/
11683
11684OP *
11685Perl_parse_arithexpr(pTHX_ U32 flags)
11686{
11687 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11688}
11689
11690/*
11691=for apidoc Amx|OP *|parse_termexpr|U32 flags
11692
11693Parse a Perl term expression. This may contain operators of precedence
11694down to the assignment operators. The expression must be followed (and thus
11695terminated) either by a comma or lower-precedence operator or by
11696something that would normally terminate an expression such as semicolon.
11697If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11698otherwise it is mandatory. It is up to the caller to ensure that the
11699dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11700the source of the code to be parsed and the lexical context for the
11701expression.
11702
11703The op tree representing the expression is returned. If an optional
11704expression is absent, a null pointer is returned, otherwise the pointer
11705will be non-null.
11706
11707If an error occurs in parsing or compilation, in most cases a valid op
11708tree is returned anyway. The error is reflected in the parser state,
11709normally resulting in a single exception at the top level of parsing
11710which covers all the compilation errors that occurred. Some compilation
11711errors, however, will throw an exception immediately.
11712
11713=cut
11714*/
11715
11716OP *
11717Perl_parse_termexpr(pTHX_ U32 flags)
11718{
11719 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11720}
11721
11722/*
11723=for apidoc Amx|OP *|parse_listexpr|U32 flags
11724
11725Parse a Perl list expression. This may contain operators of precedence
11726down to the comma operator. The expression must be followed (and thus
11727terminated) either by a low-precedence logic operator such as C<or> or by
11728something that would normally terminate an expression such as semicolon.
11729If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11730otherwise it is mandatory. It is up to the caller to ensure that the
11731dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11732the source of the code to be parsed and the lexical context for the
11733expression.
11734
11735The op tree representing the expression is returned. If an optional
11736expression is absent, a null pointer is returned, otherwise the pointer
11737will be non-null.
11738
11739If an error occurs in parsing or compilation, in most cases a valid op
11740tree is returned anyway. The error is reflected in the parser state,
11741normally resulting in a single exception at the top level of parsing
11742which covers all the compilation errors that occurred. Some compilation
11743errors, however, will throw an exception immediately.
11744
11745=cut
11746*/
11747
11748OP *
11749Perl_parse_listexpr(pTHX_ U32 flags)
11750{
11751 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11752}
11753
11754/*
11755=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11756
11757Parse a single complete Perl expression. This allows the full
11758expression grammar, including the lowest-precedence operators such
11759as C<or>. The expression must be followed (and thus terminated) by a
11760token that an expression would normally be terminated by: end-of-file,
11761closing bracketing punctuation, semicolon, or one of the keywords that
11762signals a postfix expression-statement modifier. If I<flags> includes
11763C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11764mandatory. It is up to the caller to ensure that the dynamic parser
11765state (L</PL_parser> et al) is correctly set to reflect the source of
11766the code to be parsed and the lexical context for the expression.
11767
11768The op tree representing the expression is returned. If an optional
11769expression is absent, a null pointer is returned, otherwise the pointer
11770will be non-null.
11771
11772If an error occurs in parsing or compilation, in most cases a valid op
11773tree is returned anyway. The error is reflected in the parser state,
11774normally resulting in a single exception at the top level of parsing
11775which covers all the compilation errors that occurred. Some compilation
11776errors, however, will throw an exception immediately.
11777
11778=cut
11779*/
11780
11781OP *
11782Perl_parse_fullexpr(pTHX_ U32 flags)
11783{
11784 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11785}
11786
e53d8f76
Z
11787/*
11788=for apidoc Amx|OP *|parse_block|U32 flags
11789
11790Parse a single complete Perl code block. This consists of an opening
11791brace, a sequence of statements, and a closing brace. The block
11792constitutes a lexical scope, so C<my> variables and various compile-time
11793effects can be contained within it. It is up to the caller to ensure
11794that the dynamic parser state (L</PL_parser> et al) is correctly set to
11795reflect the source of the code to be parsed and the lexical context for
11796the statement.
11797
11798The op tree representing the code block is returned. This is always a
11799real op, never a null pointer. It will normally be a C<lineseq> list,
11800including C<nextstate> or equivalent ops. No ops to construct any kind
11801of runtime scope are included by virtue of it being a block.
11802
11803If an error occurs in parsing or compilation, in most cases a valid op
11804tree (most likely null) is returned anyway. The error is reflected in
11805the parser state, normally resulting in a single exception at the top
11806level of parsing which covers all the compilation errors that occurred.
11807Some compilation errors, however, will throw an exception immediately.
11808
11809The I<flags> parameter is reserved for future use, and must always
11810be zero.
11811
11812=cut
11813*/
11814
11815OP *
11816Perl_parse_block(pTHX_ U32 flags)
11817{
11818 if (flags)
11819 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11820 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11821}
11822
1da4ca5f 11823/*
8359b381
Z
11824=for apidoc Amx|OP *|parse_barestmt|U32 flags
11825
11826Parse a single unadorned Perl statement. This may be a normal imperative
11827statement or a declaration that has compile-time effect. It does not
11828include any label or other affixture. It is up to the caller to ensure
11829that the dynamic parser state (L</PL_parser> et al) is correctly set to
11830reflect the source of the code to be parsed and the lexical context for
11831the statement.
11832
11833The op tree representing the statement is returned. This may be a
11834null pointer if the statement is null, for example if it was actually
11835a subroutine definition (which has compile-time side effects). If not
11836null, it will be ops directly implementing the statement, suitable to
11837pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11838equivalent op (except for those embedded in a scope contained entirely
11839within the statement).
11840
11841If an error occurs in parsing or compilation, in most cases a valid op
11842tree (most likely null) is returned anyway. The error is reflected in
11843the parser state, normally resulting in a single exception at the top
11844level of parsing which covers all the compilation errors that occurred.
11845Some compilation errors, however, will throw an exception immediately.
11846
11847The I<flags> parameter is reserved for future use, and must always
11848be zero.
11849
11850=cut
11851*/
11852
11853OP *
11854Perl_parse_barestmt(pTHX_ U32 flags)
11855{
11856 if (flags)
11857 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11858 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11859}
11860
11861/*
361d9b55
Z
11862=for apidoc Amx|SV *|parse_label|U32 flags
11863
11864Parse a single label, possibly optional, of the type that may prefix a
11865Perl statement. It is up to the caller to ensure that the dynamic parser
11866state (L</PL_parser> et al) is correctly set to reflect the source of
11867the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11868label is optional, otherwise it is mandatory.
11869
11870The name of the label is returned in the form of a fresh scalar. If an
11871optional label is absent, a null pointer is returned.
11872
11873If an error occurs in parsing, which can only occur if the label is
11874mandatory, a valid label is returned anyway. The error is reflected in
11875the parser state, normally resulting in a single exception at the top
11876level of parsing which covers all the compilation errors that occurred.
11877
11878=cut
11879*/
11880
11881SV *
11882Perl_parse_label(pTHX_ U32 flags)
11883{
11884 if (flags & ~PARSE_OPTIONAL)
11885 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11886 if (PL_lex_state == LEX_KNOWNEXT) {
11887 PL_parser->yychar = yylex();
11888 if (PL_parser->yychar == LABEL) {
5504e6cf
FC
11889 char * const lpv = pl_yylval.pval;
11890 STRLEN llen = strlen(lpv);
361d9b55 11891 PL_parser->yychar = YYEMPTY;
5504e6cf 11892 return newSVpvn_flags(lpv, llen, lpv[llen+1] ? SVf_UTF8 : 0);
361d9b55
Z
11893 } else {
11894 yyunlex();
11895 goto no_label;
11896 }
11897 } else {
11898 char *s, *t;
361d9b55
Z
11899 STRLEN wlen, bufptr_pos;
11900 lex_read_space(0);
11901 t = s = PL_bufptr;
5db1eb8d 11902 if (!isIDFIRST_lazy_if(s, UTF))
361d9b55 11903 goto no_label;
5db1eb8d 11904 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
361d9b55
Z
11905 if (word_takes_any_delimeter(s, wlen))
11906 goto no_label;
11907 bufptr_pos = s - SvPVX(PL_linestr);
11908 PL_bufptr = t;
11909 lex_read_space(LEX_KEEP_PREVIOUS);
11910 t = PL_bufptr;
11911 s = SvPVX(PL_linestr) + bufptr_pos;
11912 if (t[0] == ':' && t[1] != ':') {
11913 PL_oldoldbufptr = PL_oldbufptr;
11914 PL_oldbufptr = s;
11915 PL_bufptr = t+1;
5db1eb8d 11916 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
361d9b55
Z
11917 } else {
11918 PL_bufptr = s;
11919 no_label:
11920 if (flags & PARSE_OPTIONAL) {
11921 return NULL;
11922 } else {
11923 qerror(Perl_mess(aTHX_ "Parse error"));
11924 return newSVpvs("x");
11925 }
11926 }
11927 }
11928}
11929
11930/*
28ac2b49
Z
11931=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11932
11933Parse a single complete Perl statement. This may be a normal imperative
8359b381 11934statement or a declaration that has compile-time effect, and may include
8e720305 11935optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11936parser state (L</PL_parser> et al) is correctly set to reflect the source
11937of the code to be parsed and the lexical context for the statement.
11938
11939The op tree representing the statement is returned. This may be a
11940null pointer if the statement is null, for example if it was actually
11941a subroutine definition (which has compile-time side effects). If not
11942null, it will be the result of a L</newSTATEOP> call, normally including
11943a C<nextstate> or equivalent op.
11944
11945If an error occurs in parsing or compilation, in most cases a valid op
11946tree (most likely null) is returned anyway. The error is reflected in
11947the parser state, normally resulting in a single exception at the top
11948level of parsing which covers all the compilation errors that occurred.
11949Some compilation errors, however, will throw an exception immediately.
11950
11951The I<flags> parameter is reserved for future use, and must always
11952be zero.
11953
11954=cut
11955*/
11956
11957OP *
11958Perl_parse_fullstmt(pTHX_ U32 flags)
11959{
28ac2b49
Z
11960 if (flags)
11961 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11962 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11963}
11964
07ffcb73
Z
11965/*
11966=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11967
11968Parse a sequence of zero or more Perl statements. These may be normal
11969imperative statements, including optional labels, or declarations
11970that have compile-time effect, or any mixture thereof. The statement
11971sequence ends when a closing brace or end-of-file is encountered in a
11972place where a new statement could have validly started. It is up to
11973the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11974is correctly set to reflect the source of the code to be parsed and the
11975lexical context for the statements.
11976
11977The op tree representing the statement sequence is returned. This may
11978be a null pointer if the statements were all null, for example if there
11979were no statements or if there were only subroutine definitions (which
11980have compile-time side effects). If not null, it will be a C<lineseq>
11981list, normally including C<nextstate> or equivalent ops.
11982
11983If an error occurs in parsing or compilation, in most cases a valid op
11984tree is returned anyway. The error is reflected in the parser state,
11985normally resulting in a single exception at the top level of parsing
11986which covers all the compilation errors that occurred. Some compilation
11987errors, however, will throw an exception immediately.
11988
11989The I<flags> parameter is reserved for future use, and must always
11990be zero.
11991
11992=cut
11993*/
11994
11995OP *
11996Perl_parse_stmtseq(pTHX_ U32 flags)
11997{
11998 OP *stmtseqop;
e53d8f76 11999 I32 c;
07ffcb73 12000 if (flags)
78cdf107
Z
12001 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
12002 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
12003 c = lex_peek_unichar(0);
12004 if (c != -1 && c != /*{*/'}')
07ffcb73 12005 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
12006 return stmtseqop;
12007}
12008
28ac2b49 12009/*
1da4ca5f
NC
12010 * Local variables:
12011 * c-indentation-style: bsd
12012 * c-basic-offset: 4
14d04a33 12013 * indent-tabs-mode: nil
1da4ca5f
NC
12014 * End:
12015 *
14d04a33 12016 * ex: set ts=8 sts=4 sw=4 et:
37442d52 12017 */