This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regex: Allow any single char to be SIMPLE
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
199e78b7
DM
69#define PL_preambled (PL_parser->preambled)
70#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 71#define PL_linestr (PL_parser->linestr)
c2598295
DM
72#define PL_expect (PL_parser->expect)
73#define PL_copline (PL_parser->copline)
f06b5848
DM
74#define PL_bufptr (PL_parser->bufptr)
75#define PL_oldbufptr (PL_parser->oldbufptr)
76#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
77#define PL_linestart (PL_parser->linestart)
78#define PL_bufend (PL_parser->bufend)
79#define PL_last_uni (PL_parser->last_uni)
80#define PL_last_lop (PL_parser->last_lop)
81#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 82#define PL_lex_state (PL_parser->lex_state)
2f9285f8 83#define PL_rsfp (PL_parser->rsfp)
5486870f 84#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
85#define PL_in_my (PL_parser->in_my)
86#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 87#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 88#define PL_multi_end (PL_parser->multi_end)
13765c85 89#define PL_error_count (PL_parser->error_count)
199e78b7
DM
90
91#ifdef PERL_MAD
92# define PL_endwhite (PL_parser->endwhite)
93# define PL_faketokens (PL_parser->faketokens)
94# define PL_lasttoke (PL_parser->lasttoke)
95# define PL_nextwhite (PL_parser->nextwhite)
96# define PL_realtokenstart (PL_parser->realtokenstart)
97# define PL_skipwhite (PL_parser->skipwhite)
98# define PL_thisclose (PL_parser->thisclose)
99# define PL_thismad (PL_parser->thismad)
100# define PL_thisopen (PL_parser->thisopen)
101# define PL_thisstuff (PL_parser->thisstuff)
102# define PL_thistoken (PL_parser->thistoken)
103# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
104# define PL_thiswhite (PL_parser->thiswhite)
105# define PL_nexttoke (PL_parser->nexttoke)
106# define PL_curforce (PL_parser->curforce)
107#else
108# define PL_nexttoke (PL_parser->nexttoke)
109# define PL_nexttype (PL_parser->nexttype)
110# define PL_nextval (PL_parser->nextval)
199e78b7
DM
111#endif
112
0bd48802 113static const char ident_too_long[] = "Identifier too long";
8903cb82 114
29595ff2 115#ifdef PERL_MAD
29595ff2 116# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 117# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 118#else
5db06880 119# define CURMAD(slot,sv)
9ded7720 120# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
121#endif
122
a7aaec61
Z
123#define XENUMMASK 0x3f
124#define XFAKEEOF 0x40
125#define XFAKEBRACK 0x80
9059aa12 126
39e02b42
JH
127#ifdef USE_UTF8_SCRIPTS
128# define UTF (!IN_BYTES)
2b9d42f0 129#else
802a15e9 130# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 131#endif
a0ed51b3 132
b1fc3636
CJ
133/* The maximum number of characters preceding the unrecognized one to display */
134#define UNRECOGNIZED_PRECEDE_COUNT 10
135
61f0cdd9 136/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
137 * 1999-02-27 mjd-perl-patch@plover.com */
138#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
139
bf4acbe4 140#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 141
ffb4593c
NT
142/* LEX_* are values for PL_lex_state, the state of the lexer.
143 * They are arranged oddly so that the guard on the switch statement
79072805 144 * can get by with a single comparison (if the compiler is smart enough).
9da1dd8f
DM
145 *
146 * These values refer to the various states within a sublex parse,
147 * i.e. within a double quotish string
79072805
LW
148 */
149
fb73857a 150/* #define LEX_NOTPARSING 11 is done in perl.h. */
151
b6007c36
DM
152#define LEX_NORMAL 10 /* normal code (ie not within "...") */
153#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
154#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
155#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
156#define LEX_INTERPSTART 6 /* expecting the start of a $var */
157
158 /* at end of code, eg "$x" followed by: */
159#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
160#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
161
162#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
163 string or after \E, $foo, etc */
164#define LEX_INTERPCONST 2 /* NOT USED */
165#define LEX_FORMLINE 1 /* expecting a format line */
166#define LEX_KNOWNEXT 0 /* next token known; just return it */
167
79072805 168
bbf60fe6 169#ifdef DEBUGGING
27da23d5 170static const char* const lex_state_names[] = {
bbf60fe6
DM
171 "KNOWNEXT",
172 "FORMLINE",
173 "INTERPCONST",
174 "INTERPCONCAT",
175 "INTERPENDMAYBE",
176 "INTERPEND",
177 "INTERPSTART",
178 "INTERPPUSH",
179 "INTERPCASEMOD",
180 "INTERPNORMAL",
181 "NORMAL"
182};
183#endif
184
79072805
LW
185#ifdef ff_next
186#undef ff_next
d48672a2
LW
187#endif
188
79072805 189#include "keywords.h"
fe14fcc3 190
ffb4593c
NT
191/* CLINE is a macro that ensures PL_copline has a sane value */
192
ae986130
LW
193#ifdef CLINE
194#undef CLINE
195#endif
57843af0 196#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 197
5db06880 198#ifdef PERL_MAD
29595ff2
NC
199# define SKIPSPACE0(s) skipspace0(s)
200# define SKIPSPACE1(s) skipspace1(s)
201# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
202# define PEEKSPACE(s) skipspace2(s,0)
203#else
204# define SKIPSPACE0(s) skipspace(s)
205# define SKIPSPACE1(s) skipspace(s)
206# define SKIPSPACE2(s,tsv) skipspace(s)
207# define PEEKSPACE(s) skipspace(s)
208#endif
209
ffb4593c
NT
210/*
211 * Convenience functions to return different tokens and prime the
9cbb5ea2 212 * lexer for the next token. They all take an argument.
ffb4593c
NT
213 *
214 * TOKEN : generic token (used for '(', DOLSHARP, etc)
215 * OPERATOR : generic operator
216 * AOPERATOR : assignment operator
217 * PREBLOCK : beginning the block after an if, while, foreach, ...
218 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
219 * PREREF : *EXPR where EXPR is not a simple identifier
220 * TERM : expression term
221 * LOOPX : loop exiting command (goto, last, dump, etc)
222 * FTST : file test operator
223 * FUN0 : zero-argument function
7eb971ee 224 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 255#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
d68ce4ac 272#define UNI3(f,x,have_x) { \
6154021b 273 pl_yylval.ival = f; \
d68ce4ac 274 if (have_x) PL_expect = x; \
376fcdbf
AL
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
d68ce4ac
FC
283#define UNI(f) UNI3(f,XTERM,1)
284#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
b5fb7ce3
FC
285#define UNIPROTO(f,optional) { \
286 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
287 OPERATOR(f); \
288 }
a687059c 289
d68ce4ac 290#define UNIBRACK(f) UNI3(f,0,0)
79072805 291
9f68db38 292/* grandfather return to old style */
78cdf107
Z
293#define OLDLOP(f) \
294 do { \
295 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
296 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
297 pl_yylval.ival = (f); \
298 PL_expect = XTERM; \
299 PL_bufptr = s; \
300 return (int)LSTOP; \
301 } while(0)
79072805 302
83944c01
FC
303#define COPLINE_INC_WITH_HERELINES \
304 STMT_START { \
305 CopLINE_inc(PL_curcop); \
d794b522
FC
306 if (PL_parser->lex_shared->herelines) \
307 CopLINE(PL_curcop) += PL_parser->lex_shared->herelines, \
308 PL_parser->lex_shared->herelines = 0; \
83944c01
FC
309 } STMT_END
310
311
8fa7f367
JH
312#ifdef DEBUGGING
313
6154021b 314/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
315enum token_type {
316 TOKENTYPE_NONE,
317 TOKENTYPE_IVAL,
6154021b 318 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6 319 TOKENTYPE_PVAL,
aeaef349 320 TOKENTYPE_OPVAL
bbf60fe6
DM
321};
322
6d4a66ac
NC
323static struct debug_tokens {
324 const int token;
325 enum token_type type;
326 const char *name;
327} const debug_tokens[] =
9041c2e3 328{
bbf60fe6
DM
329 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
330 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
331 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
332 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
333 { ARROW, TOKENTYPE_NONE, "ARROW" },
334 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
335 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
336 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
337 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
338 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 339 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
340 { DO, TOKENTYPE_NONE, "DO" },
341 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
342 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
343 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
344 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
345 { ELSE, TOKENTYPE_NONE, "ELSE" },
346 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
347 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
348 { FOR, TOKENTYPE_IVAL, "FOR" },
349 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
705fe0e5
FC
350 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
351 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
bbf60fe6
DM
352 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
353 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 354 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
355 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
356 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
357 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 358 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
359 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
360 { IF, TOKENTYPE_IVAL, "IF" },
5db1eb8d 361 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
bbf60fe6
DM
362 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
363 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
364 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
365 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
366 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
367 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
368 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
369 { MY, TOKENTYPE_IVAL, "MY" },
bbf60fe6
DM
370 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
371 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
372 { OROP, TOKENTYPE_IVAL, "OROP" },
373 { OROR, TOKENTYPE_NONE, "OROR" },
374 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
f3f204dc 375 { PEG, TOKENTYPE_NONE, "PEG" },
88e1f1a2
JV
376 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
377 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
378 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
379 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
380 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
381 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
382 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
383 { PREINC, TOKENTYPE_NONE, "PREINC" },
384 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
f3f204dc 385 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
bbf60fe6
DM
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
f3f204dc 388 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
bbf60fe6
DM
389 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
390 { SUB, TOKENTYPE_NONE, "SUB" },
391 { THING, TOKENTYPE_OPVAL, "THING" },
392 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
393 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
394 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
395 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
396 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
397 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 398 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
399 { WHILE, TOKENTYPE_IVAL, "WHILE" },
400 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 401 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 402 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
403};
404
6154021b 405/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 406
bbf60fe6 407STATIC int
704d4215 408S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 409{
97aff369 410 dVAR;
7918f24d
NC
411
412 PERL_ARGS_ASSERT_TOKEREPORT;
413
bbf60fe6 414 if (DEBUG_T_TEST) {
bd61b366 415 const char *name = NULL;
bbf60fe6 416 enum token_type type = TOKENTYPE_NONE;
f54cb97a 417 const struct debug_tokens *p;
396482e1 418 SV* const report = newSVpvs("<== ");
bbf60fe6 419
f54cb97a 420 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
421 if (p->token == (int)rv) {
422 name = p->name;
423 type = p->type;
424 break;
425 }
426 }
427 if (name)
54667de8 428 Perl_sv_catpv(aTHX_ report, name);
74736ae6 429 else if ((char)rv > ' ' && (char)rv <= '~')
bbf60fe6
DM
430 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
431 else if (!rv)
396482e1 432 sv_catpvs(report, "EOF");
bbf60fe6
DM
433 else
434 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
435 switch (type) {
436 case TOKENTYPE_NONE:
bbf60fe6
DM
437 break;
438 case TOKENTYPE_IVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 443 PL_op_name[lvalp->ival]);
bbf60fe6
DM
444 break;
445 case TOKENTYPE_PVAL:
704d4215 446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
447 break;
448 case TOKENTYPE_OPVAL:
704d4215 449 if (lvalp->opval) {
401441c0 450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 453 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 454 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
455 }
456
457 }
401441c0 458 else
396482e1 459 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
460 break;
461 }
b6007c36 462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
463 };
464 return (int)rv;
998054bd
SC
465}
466
b6007c36
DM
467
468/* print the buffer with suitable escapes */
469
470STATIC void
15f169a1 471S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 472{
396482e1 473 SV* const tmp = newSVpvs("");
7918f24d
NC
474
475 PERL_ARGS_ASSERT_PRINTBUF;
476
b6007c36
DM
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
478 SvREFCNT_dec(tmp);
479}
480
8fa7f367
JH
481#endif
482
8290c323
NC
483static int
484S_deprecate_commaless_var_list(pTHX) {
485 PL_expect = XTERM;
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
488}
489
ffb4593c
NT
490/*
491 * S_ao
492 *
c963b151
BD
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
495 */
496
76e3520e 497STATIC int
cea2e8a9 498S_ao(pTHX_ int toketype)
a0d0e21e 499{
97aff369 500 dVAR;
3280af22
NIS
501 if (*PL_bufptr == '=') {
502 PL_bufptr++;
a0d0e21e 503 if (toketype == ANDAND)
6154021b 504 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 505 else if (toketype == OROR)
6154021b 506 pl_yylval.ival = OP_ORASSIGN;
c963b151 507 else if (toketype == DORDOR)
6154021b 508 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
509 toketype = ASSIGNOP;
510 }
511 return toketype;
512}
513
ffb4593c
NT
514/*
515 * S_no_op
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
525 */
526
76e3520e 527STATIC void
15f169a1 528S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 529{
97aff369 530 dVAR;
9d4ba2ae
AL
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 533
7918f24d
NC
534 PERL_ARGS_ASSERT_NO_OP;
535
1189a94a
GS
536 if (!s)
537 s = oldbp;
07c798fb 538 else
1189a94a 539 PL_bufptr = s;
734ab321 540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
56da5a46
RGS
541 if (ckWARN_d(WARN_SYNTAX)) {
542 if (is_first)
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 546 const char *t;
734ab321
BF
547 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
548 t += UTF ? UTF8SKIP(t) : 1)
c35e046a 549 NOOP;
56da5a46
RGS
550 if (t < PL_bufptr && isSPACE(*t))
551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
552 "\t(Do you need to predeclare %"SVf"?)\n",
553 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
554 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46
RGS
555 }
556 else {
557 assert(s >= oldbp);
558 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
559 "\t(Missing operator before %"SVf"?)\n",
560 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
561 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46 562 }
07c798fb 563 }
3280af22 564 PL_bufptr = oldbp;
8990e307
LW
565}
566
ffb4593c
NT
567/*
568 * S_missingterm
569 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 570 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
571 * If we're in a delimited string and the delimiter is a control
572 * character, it's reformatted into a two-char sequence like ^C.
573 * This is fatal.
574 */
575
76e3520e 576STATIC void
cea2e8a9 577S_missingterm(pTHX_ char *s)
8990e307 578{
97aff369 579 dVAR;
8990e307
LW
580 char tmpbuf[3];
581 char q;
582 if (s) {
9d4ba2ae 583 char * const nl = strrchr(s,'\n');
d2719217 584 if (nl)
8990e307
LW
585 *nl = '\0';
586 }
463559e7 587 else if (isCNTRL(PL_multi_close)) {
8990e307 588 *tmpbuf = '^';
585ec06d 589 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
590 tmpbuf[2] = '\0';
591 s = tmpbuf;
592 }
593 else {
eb160463 594 *tmpbuf = (char)PL_multi_close;
8990e307
LW
595 tmpbuf[1] = '\0';
596 s = tmpbuf;
597 }
598 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 599 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 600}
79072805 601
dd0ac2b9
FC
602#include "feature.h"
603
0d863452 604/*
0d863452
RH
605 * Check whether the named feature is enabled.
606 */
26ea9e12 607bool
3fff3427 608Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 609{
97aff369 610 dVAR;
4a731d7b 611 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
612
613 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
614
615 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 616
26ea9e12
NC
617 if (namelen > MAX_FEATURE_LEN)
618 return FALSE;
3fff3427 619 memcpy(&he_name[8], name, namelen);
7d69d4a6 620
c8ca97b0
NC
621 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
622 REFCOUNTED_HE_EXISTS));
0d863452
RH
623}
624
ffb4593c 625/*
9cbb5ea2
GS
626 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
627 * utf16-to-utf8-reversed.
ffb4593c
NT
628 */
629
c39cd008
GS
630#ifdef PERL_CR_FILTER
631static void
632strip_return(SV *sv)
633{
eb578fdb
KW
634 const char *s = SvPVX_const(sv);
635 const char * const e = s + SvCUR(sv);
7918f24d
NC
636
637 PERL_ARGS_ASSERT_STRIP_RETURN;
638
c39cd008
GS
639 /* outer loop optimized to do nothing if there are no CR-LFs */
640 while (s < e) {
641 if (*s++ == '\r' && *s == '\n') {
642 /* hit a CR-LF, need to copy the rest */
eb578fdb 643 char *d = s - 1;
c39cd008
GS
644 *d++ = *s++;
645 while (s < e) {
646 if (*s == '\r' && s[1] == '\n')
647 s++;
648 *d++ = *s++;
649 }
650 SvCUR(sv) -= s - d;
651 return;
652 }
653 }
654}
a868473f 655
76e3520e 656STATIC I32
c39cd008 657S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 658{
f54cb97a 659 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
660 if (count > 0 && !maxlen)
661 strip_return(sv);
662 return count;
a868473f
NIS
663}
664#endif
665
ffb4593c 666/*
8eaa0acf
Z
667=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
668
669Creates and initialises a new lexer/parser state object, supplying
670a context in which to lex and parse from a new source of Perl code.
671A pointer to the new state object is placed in L</PL_parser>. An entry
672is made on the save stack so that upon unwinding the new state object
673will be destroyed and the former value of L</PL_parser> will be restored.
674Nothing else need be done to clean up the parsing context.
675
676The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
677non-null, provides a string (in SV form) containing code to be parsed.
678A copy of the string is made, so subsequent modification of I<line>
679does not affect parsing. I<rsfp>, if non-null, provides an input stream
680from which code will be read to be parsed. If both are non-null, the
681code in I<line> comes first and must consist of complete lines of input,
682and I<rsfp> supplies the remainder of the source.
683
e368b3bd
FC
684The I<flags> parameter is reserved for future use. Currently it is only
685used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
686
687=cut
688*/
ffb4593c 689
27fcb6ee 690/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
87606032
NC
691 can share filters with the current parser.
692 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
693 caller, hence isn't owned by the parser, so shouldn't be closed on parser
694 destruction. This is used to handle the case of defaulting to reading the
695 script from the standard input because no filename was given on the command
696 line (without getting confused by situation where STDIN has been closed, so
697 the script handle is opened on fd 0) */
27fcb6ee 698
a0d0e21e 699void
8eaa0acf 700Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 701{
97aff369 702 dVAR;
6ef55633 703 const char *s = NULL;
5486870f 704 yy_parser *parser, *oparser;
60d63348 705 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 706 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
707
708 /* create and initialise a parser */
709
199e78b7 710 Newxz(parser, 1, yy_parser);
5486870f 711 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
712 PL_parser = parser;
713
28ac2b49
Z
714 parser->stack = NULL;
715 parser->ps = NULL;
716 parser->stack_size = 0;
acdf0a21 717
e3abe207
DM
718 /* on scope exit, free this parser and restore any outer one */
719 SAVEPARSER(parser);
7c4baf47 720 parser->saved_curcop = PL_curcop;
e3abe207 721
acdf0a21 722 /* initialise lexer state */
8990e307 723
fb205e7a
DM
724#ifdef PERL_MAD
725 parser->curforce = -1;
726#else
727 parser->nexttoke = 0;
728#endif
ca4cfd28 729 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 730 parser->copline = NOLINE;
5afb0a62 731 parser->lex_state = LEX_NORMAL;
c2598295 732 parser->expect = XSTATE;
2f9285f8 733 parser->rsfp = rsfp;
27fcb6ee
FC
734 parser->rsfp_filters =
735 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
736 ? NULL
737 : MUTABLE_AV(SvREFCNT_inc(
738 oparser->rsfp_filters
739 ? oparser->rsfp_filters
740 : (oparser->rsfp_filters = newAV())
741 ));
2f9285f8 742
199e78b7
DM
743 Newx(parser->lex_brackstack, 120, char);
744 Newx(parser->lex_casestack, 12, char);
745 *parser->lex_casestack = '\0';
d794b522 746 Newxz(parser->lex_shared, 1, LEXSHARED);
02b34bbe 747
10efb74f 748 if (line) {
0528fd32 749 STRLEN len;
10efb74f 750 s = SvPV_const(line, len);
0abcdfa4
FC
751 parser->linestr = flags & LEX_START_COPIED
752 ? SvREFCNT_inc_simple_NN(line)
753 : newSVpvn_flags(s, len, SvUTF8(line));
11076590 754 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
755 } else {
756 parser->linestr = newSVpvs("\n;");
8990e307 757 }
f06b5848
DM
758 parser->oldoldbufptr =
759 parser->oldbufptr =
760 parser->bufptr =
761 parser->linestart = SvPVX(parser->linestr);
762 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
763 parser->last_lop = parser->last_uni = NULL;
87606032
NC
764 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
765 |LEX_DONT_CLOSE_RSFP);
737c24fc 766
60d63348 767 parser->in_pod = parser->filtered = 0;
79072805 768}
a687059c 769
e3abe207
DM
770
771/* delete a parser object */
772
773void
774Perl_parser_free(pTHX_ const yy_parser *parser)
775{
7918f24d
NC
776 PERL_ARGS_ASSERT_PARSER_FREE;
777
7c4baf47 778 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
779 SvREFCNT_dec(parser->linestr);
780
87606032 781 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
2f9285f8 782 PerlIO_clearerr(parser->rsfp);
799361c3
SH
783 else if (parser->rsfp && (!parser->old_parser ||
784 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 785 PerlIO_close(parser->rsfp);
5486870f 786 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 787
e3abe207
DM
788 Safefree(parser->lex_brackstack);
789 Safefree(parser->lex_casestack);
d794b522 790 Safefree(parser->lex_shared);
e3abe207
DM
791 PL_parser = parser->old_parser;
792 Safefree(parser);
793}
794
795
ffb4593c 796/*
f0e67a1d
Z
797=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
798
799Buffer scalar containing the chunk currently under consideration of the
800text currently being lexed. This is always a plain string scalar (for
801which C<SvPOK> is true). It is not intended to be used as a scalar by
802normal scalar means; instead refer to the buffer directly by the pointer
803variables described below.
804
805The lexer maintains various C<char*> pointers to things in the
806C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
807reallocated, all of these pointers must be updated. Don't attempt to
808do this manually, but rather use L</lex_grow_linestr> if you need to
809reallocate the buffer.
810
811The content of the text chunk in the buffer is commonly exactly one
812complete line of input, up to and including a newline terminator,
813but there are situations where it is otherwise. The octets of the
814buffer may be intended to be interpreted as either UTF-8 or Latin-1.
815The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
816flag on this scalar, which may disagree with it.
817
818For direct examination of the buffer, the variable
819L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
820lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
821of these pointers is usually preferable to examination of the scalar
822through normal scalar means.
823
824=for apidoc AmxU|char *|PL_parser-E<gt>bufend
825
826Direct pointer to the end of the chunk of text currently being lexed, the
827end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
828+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
829always located at the end of the buffer, and does not count as part of
830the buffer's contents.
831
832=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
833
834Points to the current position of lexing inside the lexer buffer.
835Characters around this point may be freely examined, within
836the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
837L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
838interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
839
840Lexing code (whether in the Perl core or not) moves this pointer past
841the characters that it consumes. It is also expected to perform some
842bookkeeping whenever a newline character is consumed. This movement
843can be more conveniently performed by the function L</lex_read_to>,
844which handles newlines appropriately.
845
846Interpretation of the buffer's octets can be abstracted out by
847using the slightly higher-level functions L</lex_peek_unichar> and
848L</lex_read_unichar>.
849
850=for apidoc AmxU|char *|PL_parser-E<gt>linestart
851
852Points to the start of the current line inside the lexer buffer.
853This is useful for indicating at which column an error occurred, and
854not much else. This must be updated by any lexing code that consumes
855a newline; the function L</lex_read_to> handles this detail.
856
857=cut
858*/
859
860/*
861=for apidoc Amx|bool|lex_bufutf8
862
863Indicates whether the octets in the lexer buffer
864(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
865of Unicode characters. If not, they should be interpreted as Latin-1
866characters. This is analogous to the C<SvUTF8> flag for scalars.
867
868In UTF-8 mode, it is not guaranteed that the lexer buffer actually
869contains valid UTF-8. Lexing code must be robust in the face of invalid
870encoding.
871
872The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
873is significant, but not the whole story regarding the input character
874encoding. Normally, when a file is being read, the scalar contains octets
875and its C<SvUTF8> flag is off, but the octets should be interpreted as
876UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
877however, the scalar may have the C<SvUTF8> flag on, and in this case its
878octets should be interpreted as UTF-8 unless the C<use bytes> pragma
879is in effect. This logic may change in the future; use this function
880instead of implementing the logic yourself.
881
882=cut
883*/
884
885bool
886Perl_lex_bufutf8(pTHX)
887{
888 return UTF;
889}
890
891/*
892=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
893
894Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
895at least I<len> octets (including terminating NUL). Returns a
896pointer to the reallocated buffer. This is necessary before making
897any direct modification of the buffer that would increase its length.
898L</lex_stuff_pvn> provides a more convenient way to insert text into
899the buffer.
900
901Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
902this function updates all of the lexer's variables that point directly
903into the buffer.
904
905=cut
906*/
907
908char *
909Perl_lex_grow_linestr(pTHX_ STRLEN len)
910{
911 SV *linestr;
912 char *buf;
913 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
c7641931 914 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
f0e67a1d
Z
915 linestr = PL_parser->linestr;
916 buf = SvPVX(linestr);
917 if (len <= SvLEN(linestr))
918 return buf;
919 bufend_pos = PL_parser->bufend - buf;
920 bufptr_pos = PL_parser->bufptr - buf;
921 oldbufptr_pos = PL_parser->oldbufptr - buf;
922 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
923 linestart_pos = PL_parser->linestart - buf;
924 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
925 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3328ab5a
FC
926 re_eval_start_pos = PL_parser->lex_shared->re_eval_start ?
927 PL_parser->lex_shared->re_eval_start - buf : 0;
c7641931 928
f0e67a1d 929 buf = sv_grow(linestr, len);
c7641931 930
f0e67a1d
Z
931 PL_parser->bufend = buf + bufend_pos;
932 PL_parser->bufptr = buf + bufptr_pos;
933 PL_parser->oldbufptr = buf + oldbufptr_pos;
934 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
935 PL_parser->linestart = buf + linestart_pos;
936 if (PL_parser->last_uni)
937 PL_parser->last_uni = buf + last_uni_pos;
938 if (PL_parser->last_lop)
939 PL_parser->last_lop = buf + last_lop_pos;
3328ab5a
FC
940 if (PL_parser->lex_shared->re_eval_start)
941 PL_parser->lex_shared->re_eval_start = buf + re_eval_start_pos;
f0e67a1d
Z
942 return buf;
943}
944
945/*
83aa740e 946=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
947
948Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
949immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
950reallocating the buffer if necessary. This means that lexing code that
951runs later will see the characters as if they had appeared in the input.
952It is not recommended to do this as part of normal parsing, and most
953uses of this facility run the risk of the inserted characters being
954interpreted in an unintended manner.
955
956The string to be inserted is represented by I<len> octets starting
957at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
958according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
959The characters are recoded for the lexer buffer, according to how the
960buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 961to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
962function is more convenient.
963
964=cut
965*/
966
967void
83aa740e 968Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 969{
749123ff 970 dVAR;
f0e67a1d
Z
971 char *bufptr;
972 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
973 if (flags & ~(LEX_STUFF_UTF8))
974 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
975 if (UTF) {
976 if (flags & LEX_STUFF_UTF8) {
977 goto plain_copy;
978 } else {
979 STRLEN highhalf = 0;
83aa740e 980 const char *p, *e = pv+len;
f0e67a1d
Z
981 for (p = pv; p != e; p++)
982 highhalf += !!(((U8)*p) & 0x80);
983 if (!highhalf)
984 goto plain_copy;
985 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
986 bufptr = PL_parser->bufptr;
987 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
988 SvCUR_set(PL_parser->linestr,
989 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
990 PL_parser->bufend += len+highhalf;
991 for (p = pv; p != e; p++) {
992 U8 c = (U8)*p;
993 if (c & 0x80) {
994 *bufptr++ = (char)(0xc0 | (c >> 6));
995 *bufptr++ = (char)(0x80 | (c & 0x3f));
996 } else {
997 *bufptr++ = (char)c;
998 }
999 }
1000 }
1001 } else {
1002 if (flags & LEX_STUFF_UTF8) {
1003 STRLEN highhalf = 0;
83aa740e 1004 const char *p, *e = pv+len;
f0e67a1d
Z
1005 for (p = pv; p != e; p++) {
1006 U8 c = (U8)*p;
1007 if (c >= 0xc4) {
1008 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1009 "non-Latin-1 character into Latin-1 input");
1010 } else if (c >= 0xc2 && p+1 != e &&
1011 (((U8)p[1]) & 0xc0) == 0x80) {
1012 p++;
1013 highhalf++;
1014 } else if (c >= 0x80) {
1015 /* malformed UTF-8 */
1016 ENTER;
1017 SAVESPTR(PL_warnhook);
1018 PL_warnhook = PERL_WARNHOOK_FATAL;
1019 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1020 LEAVE;
1021 }
1022 }
1023 if (!highhalf)
1024 goto plain_copy;
1025 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1026 bufptr = PL_parser->bufptr;
1027 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1028 SvCUR_set(PL_parser->linestr,
1029 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1030 PL_parser->bufend += len-highhalf;
1031 for (p = pv; p != e; p++) {
1032 U8 c = (U8)*p;
1033 if (c & 0x80) {
1034 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1035 p++;
1036 } else {
1037 *bufptr++ = (char)c;
1038 }
1039 }
1040 } else {
1041 plain_copy:
1042 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1043 bufptr = PL_parser->bufptr;
1044 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1045 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1046 PL_parser->bufend += len;
1047 Copy(pv, bufptr, len, char);
1048 }
1049 }
1050}
1051
1052/*
9dcc53ea
Z
1053=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1054
1055Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1056immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1057reallocating the buffer if necessary. This means that lexing code that
1058runs later will see the characters as if they had appeared in the input.
1059It is not recommended to do this as part of normal parsing, and most
1060uses of this facility run the risk of the inserted characters being
1061interpreted in an unintended manner.
1062
1063The string to be inserted is represented by octets starting at I<pv>
1064and continuing to the first nul. These octets are interpreted as either
1065UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1066in I<flags>. The characters are recoded for the lexer buffer, according
1067to how the buffer is currently being interpreted (L</lex_bufutf8>).
1068If it is not convenient to nul-terminate a string to be inserted, the
1069L</lex_stuff_pvn> function is more appropriate.
1070
1071=cut
1072*/
1073
1074void
1075Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1076{
1077 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1078 lex_stuff_pvn(pv, strlen(pv), flags);
1079}
1080
1081/*
f0e67a1d
Z
1082=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1083
1084Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1085immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1086reallocating the buffer if necessary. This means that lexing code that
1087runs later will see the characters as if they had appeared in the input.
1088It is not recommended to do this as part of normal parsing, and most
1089uses of this facility run the risk of the inserted characters being
1090interpreted in an unintended manner.
1091
1092The string to be inserted is the string value of I<sv>. The characters
1093are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1094being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1095not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1096need to construct a scalar.
1097
1098=cut
1099*/
1100
1101void
1102Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1103{
1104 char *pv;
1105 STRLEN len;
1106 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1107 if (flags)
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1109 pv = SvPV(sv, len);
1110 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1111}
1112
1113/*
1114=for apidoc Amx|void|lex_unstuff|char *ptr
1115
1116Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1117I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1118This hides the discarded text from any lexing code that runs later,
1119as if the text had never appeared.
1120
1121This is not the normal way to consume lexed text. For that, use
1122L</lex_read_to>.
1123
1124=cut
1125*/
1126
1127void
1128Perl_lex_unstuff(pTHX_ char *ptr)
1129{
1130 char *buf, *bufend;
1131 STRLEN unstuff_len;
1132 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1133 buf = PL_parser->bufptr;
1134 if (ptr < buf)
1135 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1136 if (ptr == buf)
1137 return;
1138 bufend = PL_parser->bufend;
1139 if (ptr > bufend)
1140 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1141 unstuff_len = ptr - buf;
1142 Move(ptr, buf, bufend+1-ptr, char);
1143 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1144 PL_parser->bufend = bufend - unstuff_len;
1145}
1146
1147/*
1148=for apidoc Amx|void|lex_read_to|char *ptr
1149
1150Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1151to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1152performing the correct bookkeeping whenever a newline character is passed.
1153This is the normal way to consume lexed text.
1154
1155Interpretation of the buffer's octets can be abstracted out by
1156using the slightly higher-level functions L</lex_peek_unichar> and
1157L</lex_read_unichar>.
1158
1159=cut
1160*/
1161
1162void
1163Perl_lex_read_to(pTHX_ char *ptr)
1164{
1165 char *s;
1166 PERL_ARGS_ASSERT_LEX_READ_TO;
1167 s = PL_parser->bufptr;
1168 if (ptr < s || ptr > PL_parser->bufend)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1170 for (; s != ptr; s++)
1171 if (*s == '\n') {
83944c01 1172 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1173 PL_parser->linestart = s+1;
1174 }
1175 PL_parser->bufptr = ptr;
1176}
1177
1178/*
1179=for apidoc Amx|void|lex_discard_to|char *ptr
1180
1181Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1182up to I<ptr>. The remaining content of the buffer will be moved, and
1183all pointers into the buffer updated appropriately. I<ptr> must not
1184be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1185it is not permitted to discard text that has yet to be lexed.
1186
1187Normally it is not necessarily to do this directly, because it suffices to
1188use the implicit discarding behaviour of L</lex_next_chunk> and things
1189based on it. However, if a token stretches across multiple lines,
1f317c95 1190and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1191that purpose, then after completion of the token it would be wise to
1192explicitly discard the now-unneeded earlier lines, to avoid future
1193multi-line tokens growing the buffer without bound.
1194
1195=cut
1196*/
1197
1198void
1199Perl_lex_discard_to(pTHX_ char *ptr)
1200{
1201 char *buf;
1202 STRLEN discard_len;
1203 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1204 buf = SvPVX(PL_parser->linestr);
1205 if (ptr < buf)
1206 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1207 if (ptr == buf)
1208 return;
1209 if (ptr > PL_parser->bufptr)
1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1211 discard_len = ptr - buf;
1212 if (PL_parser->oldbufptr < ptr)
1213 PL_parser->oldbufptr = ptr;
1214 if (PL_parser->oldoldbufptr < ptr)
1215 PL_parser->oldoldbufptr = ptr;
1216 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1217 PL_parser->last_uni = NULL;
1218 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1219 PL_parser->last_lop = NULL;
1220 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1221 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1222 PL_parser->bufend -= discard_len;
1223 PL_parser->bufptr -= discard_len;
1224 PL_parser->oldbufptr -= discard_len;
1225 PL_parser->oldoldbufptr -= discard_len;
1226 if (PL_parser->last_uni)
1227 PL_parser->last_uni -= discard_len;
1228 if (PL_parser->last_lop)
1229 PL_parser->last_lop -= discard_len;
1230}
1231
1232/*
1233=for apidoc Amx|bool|lex_next_chunk|U32 flags
1234
1235Reads in the next chunk of text to be lexed, appending it to
1236L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1237looked to the end of the current chunk and wants to know more. It is
1238usual, but not necessary, for lexing to have consumed the entirety of
1239the current chunk at this time.
1240
1241If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1242chunk (i.e., the current chunk has been entirely consumed), normally the
1243current chunk will be discarded at the same time that the new chunk is
1244read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1245will not be discarded. If the current chunk has not been entirely
1246consumed, then it will not be discarded regardless of the flag.
1247
1248Returns true if some new text was added to the buffer, or false if the
1249buffer has reached the end of the input text.
1250
1251=cut
1252*/
1253
1254#define LEX_FAKE_EOF 0x80000000
112d1284 1255#define LEX_NO_TERM 0x40000000
f0e67a1d
Z
1256
1257bool
1258Perl_lex_next_chunk(pTHX_ U32 flags)
1259{
1260 SV *linestr;
1261 char *buf;
1262 STRLEN old_bufend_pos, new_bufend_pos;
1263 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1264 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1265 bool got_some_for_debugger = 0;
f0e67a1d 1266 bool got_some;
112d1284 1267 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
f0e67a1d 1268 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1269 linestr = PL_parser->linestr;
1270 buf = SvPVX(linestr);
1271 if (!(flags & LEX_KEEP_PREVIOUS) &&
1272 PL_parser->bufptr == PL_parser->bufend) {
1273 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1274 linestart_pos = 0;
1275 if (PL_parser->last_uni != PL_parser->bufend)
1276 PL_parser->last_uni = NULL;
1277 if (PL_parser->last_lop != PL_parser->bufend)
1278 PL_parser->last_lop = NULL;
1279 last_uni_pos = last_lop_pos = 0;
1280 *buf = 0;
1281 SvCUR(linestr) = 0;
1282 } else {
1283 old_bufend_pos = PL_parser->bufend - buf;
1284 bufptr_pos = PL_parser->bufptr - buf;
1285 oldbufptr_pos = PL_parser->oldbufptr - buf;
1286 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1287 linestart_pos = PL_parser->linestart - buf;
1288 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1289 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1290 }
1291 if (flags & LEX_FAKE_EOF) {
1292 goto eof;
60d63348 1293 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1294 got_some = 0;
1295 } else if (filter_gets(linestr, old_bufend_pos)) {
1296 got_some = 1;
17cc9359 1297 got_some_for_debugger = 1;
112d1284
FC
1298 } else if (flags & LEX_NO_TERM) {
1299 got_some = 0;
f0e67a1d 1300 } else {
580561a3
Z
1301 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1302 sv_setpvs(linestr, "");
f0e67a1d
Z
1303 eof:
1304 /* End of real input. Close filehandle (unless it was STDIN),
1305 * then add implicit termination.
1306 */
87606032 1307 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
f0e67a1d
Z
1308 PerlIO_clearerr(PL_parser->rsfp);
1309 else if (PL_parser->rsfp)
1310 (void)PerlIO_close(PL_parser->rsfp);
1311 PL_parser->rsfp = NULL;
60d63348 1312 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1313#ifdef PERL_MAD
1314 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1315 PL_faketokens = 1;
1316#endif
1317 if (!PL_in_eval && PL_minus_p) {
1318 sv_catpvs(linestr,
1319 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1320 PL_minus_n = PL_minus_p = 0;
1321 } else if (!PL_in_eval && PL_minus_n) {
1322 sv_catpvs(linestr, /*{*/";}");
1323 PL_minus_n = 0;
1324 } else
1325 sv_catpvs(linestr, ";");
1326 got_some = 1;
1327 }
1328 buf = SvPVX(linestr);
1329 new_bufend_pos = SvCUR(linestr);
1330 PL_parser->bufend = buf + new_bufend_pos;
1331 PL_parser->bufptr = buf + bufptr_pos;
1332 PL_parser->oldbufptr = buf + oldbufptr_pos;
1333 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1334 PL_parser->linestart = buf + linestart_pos;
1335 if (PL_parser->last_uni)
1336 PL_parser->last_uni = buf + last_uni_pos;
1337 if (PL_parser->last_lop)
1338 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1339 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1340 PL_curstash != PL_debstash) {
1341 /* debugger active and we're not compiling the debugger code,
1342 * so store the line into the debugger's array of lines
1343 */
1344 update_debugger_info(NULL, buf+old_bufend_pos,
1345 new_bufend_pos-old_bufend_pos);
1346 }
1347 return got_some;
1348}
1349
1350/*
1351=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1352
1353Looks ahead one (Unicode) character in the text currently being lexed.
1354Returns the codepoint (unsigned integer value) of the next character,
1355or -1 if lexing has reached the end of the input text. To consume the
1356peeked character, use L</lex_read_unichar>.
1357
1358If the next character is in (or extends into) the next chunk of input
1359text, the next chunk will be read in. Normally the current chunk will be
1360discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1361then the current chunk will not be discarded.
1362
1363If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1364is encountered, an exception is generated.
1365
1366=cut
1367*/
1368
1369I32
1370Perl_lex_peek_unichar(pTHX_ U32 flags)
1371{
749123ff 1372 dVAR;
f0e67a1d
Z
1373 char *s, *bufend;
1374 if (flags & ~(LEX_KEEP_PREVIOUS))
1375 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1376 s = PL_parser->bufptr;
1377 bufend = PL_parser->bufend;
1378 if (UTF) {
1379 U8 head;
1380 I32 unichar;
1381 STRLEN len, retlen;
1382 if (s == bufend) {
1383 if (!lex_next_chunk(flags))
1384 return -1;
1385 s = PL_parser->bufptr;
1386 bufend = PL_parser->bufend;
1387 }
1388 head = (U8)*s;
1389 if (!(head & 0x80))
1390 return head;
1391 if (head & 0x40) {
1392 len = PL_utf8skip[head];
1393 while ((STRLEN)(bufend-s) < len) {
1394 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1395 break;
1396 s = PL_parser->bufptr;
1397 bufend = PL_parser->bufend;
1398 }
1399 }
1400 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1401 if (retlen == (STRLEN)-1) {
1402 /* malformed UTF-8 */
1403 ENTER;
1404 SAVESPTR(PL_warnhook);
1405 PL_warnhook = PERL_WARNHOOK_FATAL;
1406 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1407 LEAVE;
1408 }
1409 return unichar;
1410 } else {
1411 if (s == bufend) {
1412 if (!lex_next_chunk(flags))
1413 return -1;
1414 s = PL_parser->bufptr;
1415 }
1416 return (U8)*s;
1417 }
1418}
1419
1420/*
1421=for apidoc Amx|I32|lex_read_unichar|U32 flags
1422
1423Reads the next (Unicode) character in the text currently being lexed.
1424Returns the codepoint (unsigned integer value) of the character read,
1425and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1426if lexing has reached the end of the input text. To non-destructively
1427examine the next character, use L</lex_peek_unichar> instead.
1428
1429If the next character is in (or extends into) the next chunk of input
1430text, the next chunk will be read in. Normally the current chunk will be
1431discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1432then the current chunk will not be discarded.
1433
1434If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1435is encountered, an exception is generated.
1436
1437=cut
1438*/
1439
1440I32
1441Perl_lex_read_unichar(pTHX_ U32 flags)
1442{
1443 I32 c;
1444 if (flags & ~(LEX_KEEP_PREVIOUS))
1445 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1446 c = lex_peek_unichar(flags);
1447 if (c != -1) {
1448 if (c == '\n')
83944c01 1449 COPLINE_INC_WITH_HERELINES;
d9018cbe
EB
1450 if (UTF)
1451 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1452 else
1453 ++(PL_parser->bufptr);
f0e67a1d
Z
1454 }
1455 return c;
1456}
1457
1458/*
1459=for apidoc Amx|void|lex_read_space|U32 flags
1460
1461Reads optional spaces, in Perl style, in the text currently being
1462lexed. The spaces may include ordinary whitespace characters and
1463Perl-style comments. C<#line> directives are processed if encountered.
1464L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1465at a non-space character (or the end of the input text).
1466
1467If spaces extend into the next chunk of input text, the next chunk will
1468be read in. Normally the current chunk will be discarded at the same
1469time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1470chunk will not be discarded.
1471
1472=cut
1473*/
1474
f0998909
Z
1475#define LEX_NO_NEXT_CHUNK 0x80000000
1476
f0e67a1d
Z
1477void
1478Perl_lex_read_space(pTHX_ U32 flags)
1479{
1480 char *s, *bufend;
1481 bool need_incline = 0;
f0998909 1482 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1483 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1484#ifdef PERL_MAD
1485 if (PL_skipwhite) {
1486 sv_free(PL_skipwhite);
1487 PL_skipwhite = NULL;
1488 }
1489 if (PL_madskills)
1490 PL_skipwhite = newSVpvs("");
1491#endif /* PERL_MAD */
1492 s = PL_parser->bufptr;
1493 bufend = PL_parser->bufend;
1494 while (1) {
1495 char c = *s;
1496 if (c == '#') {
1497 do {
1498 c = *++s;
1499 } while (!(c == '\n' || (c == 0 && s == bufend)));
1500 } else if (c == '\n') {
1501 s++;
1502 PL_parser->linestart = s;
1503 if (s == bufend)
1504 need_incline = 1;
1505 else
1506 incline(s);
1507 } else if (isSPACE(c)) {
1508 s++;
1509 } else if (c == 0 && s == bufend) {
1510 bool got_more;
1511#ifdef PERL_MAD
1512 if (PL_madskills)
1513 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1514#endif /* PERL_MAD */
f0998909
Z
1515 if (flags & LEX_NO_NEXT_CHUNK)
1516 break;
f0e67a1d 1517 PL_parser->bufptr = s;
83944c01 1518 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
1519 got_more = lex_next_chunk(flags);
1520 CopLINE_dec(PL_curcop);
1521 s = PL_parser->bufptr;
1522 bufend = PL_parser->bufend;
1523 if (!got_more)
1524 break;
1525 if (need_incline && PL_parser->rsfp) {
1526 incline(s);
1527 need_incline = 0;
1528 }
1529 } else {
1530 break;
1531 }
1532 }
1533#ifdef PERL_MAD
1534 if (PL_madskills)
1535 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1536#endif /* PERL_MAD */
1537 PL_parser->bufptr = s;
1538}
1539
1540/*
ffb4593c
NT
1541 * S_incline
1542 * This subroutine has nothing to do with tilting, whether at windmills
1543 * or pinball tables. Its name is short for "increment line". It
57843af0 1544 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1545 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1546 * # line 500 "foo.pm"
1547 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1548 */
1549
76e3520e 1550STATIC void
d9095cec 1551S_incline(pTHX_ const char *s)
463ee0b2 1552{
97aff369 1553 dVAR;
d9095cec
NC
1554 const char *t;
1555 const char *n;
1556 const char *e;
8818d409 1557 line_t line_num;
463ee0b2 1558
7918f24d
NC
1559 PERL_ARGS_ASSERT_INCLINE;
1560
83944c01 1561 COPLINE_INC_WITH_HERELINES;
463ee0b2
LW
1562 if (*s++ != '#')
1563 return;
d4c19fe8
AL
1564 while (SPACE_OR_TAB(*s))
1565 s++;
73659bf1
GS
1566 if (strnEQ(s, "line", 4))
1567 s += 4;
1568 else
1569 return;
084592ab 1570 if (SPACE_OR_TAB(*s))
73659bf1 1571 s++;
4e553d73 1572 else
73659bf1 1573 return;
d4c19fe8
AL
1574 while (SPACE_OR_TAB(*s))
1575 s++;
463ee0b2
LW
1576 if (!isDIGIT(*s))
1577 return;
d4c19fe8 1578
463ee0b2
LW
1579 n = s;
1580 while (isDIGIT(*s))
1581 s++;
07714eb4 1582 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1583 return;
bf4acbe4 1584 while (SPACE_OR_TAB(*s))
463ee0b2 1585 s++;
73659bf1 1586 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1587 s++;
73659bf1
GS
1588 e = t + 1;
1589 }
463ee0b2 1590 else {
c35e046a
AL
1591 t = s;
1592 while (!isSPACE(*t))
1593 t++;
73659bf1 1594 e = t;
463ee0b2 1595 }
bf4acbe4 1596 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1597 e++;
1598 if (*e != '\n' && *e != '\0')
1599 return; /* false alarm */
1600
8818d409
FC
1601 line_num = atoi(n)-1;
1602
f4dd75d9 1603 if (t - s > 0) {
d9095cec 1604 const STRLEN len = t - s;
19bad673
NC
1605 SV *const temp_sv = CopFILESV(PL_curcop);
1606 const char *cf;
1607 STRLEN tmplen;
1608
1609 if (temp_sv) {
1610 cf = SvPVX(temp_sv);
1611 tmplen = SvCUR(temp_sv);
1612 } else {
1613 cf = NULL;
1614 tmplen = 0;
1615 }
1616
d1299d44 1617 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1618 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1619 * to *{"::_<newfilename"} */
44867030
NC
1620 /* However, the long form of evals is only turned on by the
1621 debugger - usually they're "(eval %lu)" */
1622 char smallbuf[128];
1623 char *tmpbuf;
1624 GV **gvp;
d9095cec 1625 STRLEN tmplen2 = len;
798b63bc 1626 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1627 tmpbuf = smallbuf;
1628 else
2ae0db35 1629 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1630 tmpbuf[0] = '_';
1631 tmpbuf[1] = '<';
2ae0db35 1632 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1633 tmplen += 2;
8a5ee598
RGS
1634 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1635 if (gvp) {
44867030
NC
1636 char *tmpbuf2;
1637 GV *gv2;
1638
1639 if (tmplen2 + 2 <= sizeof smallbuf)
1640 tmpbuf2 = smallbuf;
1641 else
1642 Newx(tmpbuf2, tmplen2 + 2, char);
1643
1644 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1645 /* Either they malloc'd it, or we malloc'd it,
1646 so no prefix is present in ours. */
1647 tmpbuf2[0] = '_';
1648 tmpbuf2[1] = '<';
1649 }
1650
1651 memcpy(tmpbuf2 + 2, s, tmplen2);
1652 tmplen2 += 2;
1653
8a5ee598 1654 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1655 if (!isGV(gv2)) {
8a5ee598 1656 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1657 /* adjust ${"::_<newfilename"} to store the new file name */
1658 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1659 /* The line number may differ. If that is the case,
1660 alias the saved lines that are in the array.
1661 Otherwise alias the whole array. */
1662 if (CopLINE(PL_curcop) == line_num) {
1663 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1664 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1665 }
1666 else if (GvAV(*gvp)) {
1667 AV * const av = GvAV(*gvp);
1668 const I32 start = CopLINE(PL_curcop)+1;
1669 I32 items = AvFILLp(av) - start;
1670 if (items > 0) {
1671 AV * const av2 = GvAVn(gv2);
1672 SV **svp = AvARRAY(av) + start;
1673 I32 l = (I32)line_num+1;
1674 while (items--)
1675 av_store(av2, l++, SvREFCNT_inc(*svp++));
1676 }
1677 }
e5527e4b 1678 }
44867030
NC
1679
1680 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1681 }
e66cf94c 1682 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1683 }
05ec9bb3 1684 CopFILE_free(PL_curcop);
d9095cec 1685 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1686 }
8818d409 1687 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1688}
1689
29595ff2 1690#ifdef PERL_MAD
cd81e915 1691/* skip space before PL_thistoken */
29595ff2
NC
1692
1693STATIC char *
1694S_skipspace0(pTHX_ register char *s)
1695{
7918f24d
NC
1696 PERL_ARGS_ASSERT_SKIPSPACE0;
1697
29595ff2
NC
1698 s = skipspace(s);
1699 if (!PL_madskills)
1700 return s;
cd81e915
NC
1701 if (PL_skipwhite) {
1702 if (!PL_thiswhite)
6b29d1f5 1703 PL_thiswhite = newSVpvs("");
cd81e915
NC
1704 sv_catsv(PL_thiswhite, PL_skipwhite);
1705 sv_free(PL_skipwhite);
1706 PL_skipwhite = 0;
1707 }
1708 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1709 return s;
1710}
1711
cd81e915 1712/* skip space after PL_thistoken */
29595ff2
NC
1713
1714STATIC char *
1715S_skipspace1(pTHX_ register char *s)
1716{
d4c19fe8 1717 const char *start = s;
29595ff2
NC
1718 I32 startoff = start - SvPVX(PL_linestr);
1719
7918f24d
NC
1720 PERL_ARGS_ASSERT_SKIPSPACE1;
1721
29595ff2
NC
1722 s = skipspace(s);
1723 if (!PL_madskills)
1724 return s;
1725 start = SvPVX(PL_linestr) + startoff;
cd81e915 1726 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1727 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1728 PL_thistoken = newSVpvn(tstart, start - tstart);
1729 }
1730 PL_realtokenstart = -1;
1731 if (PL_skipwhite) {
1732 if (!PL_nextwhite)
6b29d1f5 1733 PL_nextwhite = newSVpvs("");
cd81e915
NC
1734 sv_catsv(PL_nextwhite, PL_skipwhite);
1735 sv_free(PL_skipwhite);
1736 PL_skipwhite = 0;
29595ff2
NC
1737 }
1738 return s;
1739}
1740
1741STATIC char *
1742S_skipspace2(pTHX_ register char *s, SV **svp)
1743{
c35e046a
AL
1744 char *start;
1745 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1746 const I32 startoff = s - SvPVX(PL_linestr);
1747
7918f24d
NC
1748 PERL_ARGS_ASSERT_SKIPSPACE2;
1749
29595ff2
NC
1750 s = skipspace(s);
1751 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1752 if (!PL_madskills || !svp)
1753 return s;
1754 start = SvPVX(PL_linestr) + startoff;
cd81e915 1755 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1756 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1757 PL_thistoken = newSVpvn(tstart, start - tstart);
1758 PL_realtokenstart = -1;
29595ff2 1759 }
cd81e915 1760 if (PL_skipwhite) {
29595ff2 1761 if (!*svp)
6b29d1f5 1762 *svp = newSVpvs("");
cd81e915
NC
1763 sv_setsv(*svp, PL_skipwhite);
1764 sv_free(PL_skipwhite);
1765 PL_skipwhite = 0;
29595ff2
NC
1766 }
1767
1768 return s;
1769}
1770#endif
1771
80a702cd 1772STATIC void
15f169a1 1773S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1774{
1775 AV *av = CopFILEAVx(PL_curcop);
1776 if (av) {
b9f83d2f 1777 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1778 if (orig_sv)
1779 sv_setsv(sv, orig_sv);
1780 else
1781 sv_setpvn(sv, buf, len);
80a702cd
RGS
1782 (void)SvIOK_on(sv);
1783 SvIV_set(sv, 0);
1784 av_store(av, (I32)CopLINE(PL_curcop), sv);
1785 }
1786}
1787
ffb4593c
NT
1788/*
1789 * S_skipspace
1790 * Called to gobble the appropriate amount and type of whitespace.
1791 * Skips comments as well.
1792 */
1793
76e3520e 1794STATIC char *
cea2e8a9 1795S_skipspace(pTHX_ register char *s)
a687059c 1796{
5db06880 1797#ifdef PERL_MAD
f0e67a1d
Z
1798 char *start = s;
1799#endif /* PERL_MAD */
7918f24d 1800 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1801#ifdef PERL_MAD
cd81e915
NC
1802 if (PL_skipwhite) {
1803 sv_free(PL_skipwhite);
f0e67a1d 1804 PL_skipwhite = NULL;
5db06880 1805 }
f0e67a1d 1806#endif /* PERL_MAD */
3280af22 1807 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1808 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1809 s++;
f0e67a1d
Z
1810 } else {
1811 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1812 PL_bufptr = s;
f0998909
Z
1813 lex_read_space(LEX_KEEP_PREVIOUS |
1814 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1815 LEX_NO_NEXT_CHUNK : 0));
3280af22 1816 s = PL_bufptr;
f0e67a1d
Z
1817 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1818 if (PL_linestart > PL_bufptr)
1819 PL_bufptr = PL_linestart;
1820 return s;
463ee0b2 1821 }
5db06880 1822#ifdef PERL_MAD
f0e67a1d
Z
1823 if (PL_madskills)
1824 PL_skipwhite = newSVpvn(start, s-start);
1825#endif /* PERL_MAD */
5db06880 1826 return s;
a687059c 1827}
378cc40b 1828
ffb4593c
NT
1829/*
1830 * S_check_uni
1831 * Check the unary operators to ensure there's no ambiguity in how they're
1832 * used. An ambiguous piece of code would be:
1833 * rand + 5
1834 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1835 * the +5 is its argument.
1836 */
1837
76e3520e 1838STATIC void
cea2e8a9 1839S_check_uni(pTHX)
ba106d47 1840{
97aff369 1841 dVAR;
d4c19fe8
AL
1842 const char *s;
1843 const char *t;
2f3197b3 1844
3280af22 1845 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1846 return;
3280af22
NIS
1847 while (isSPACE(*PL_last_uni))
1848 PL_last_uni++;
c35e046a
AL
1849 s = PL_last_uni;
1850 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1851 s++;
3280af22 1852 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1853 return;
6136c704 1854
9b387841
NC
1855 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1856 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1857 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1858}
1859
ffb4593c
NT
1860/*
1861 * LOP : macro to build a list operator. Its behaviour has been replaced
1862 * with a subroutine, S_lop() for which LOP is just another name.
1863 */
1864
a0d0e21e
LW
1865#define LOP(f,x) return lop(f,x,s)
1866
ffb4593c
NT
1867/*
1868 * S_lop
1869 * Build a list operator (or something that might be one). The rules:
1870 * - if we have a next token, then it's a list operator [why?]
1871 * - if the next thing is an opening paren, then it's a function
1872 * - else it's a list operator
1873 */
1874
76e3520e 1875STATIC I32
a0be28da 1876S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1877{
97aff369 1878 dVAR;
7918f24d
NC
1879
1880 PERL_ARGS_ASSERT_LOP;
1881
6154021b 1882 pl_yylval.ival = f;
35c8bce7 1883 CLINE;
3280af22
NIS
1884 PL_expect = x;
1885 PL_bufptr = s;
1886 PL_last_lop = PL_oldbufptr;
eb160463 1887 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1888#ifdef PERL_MAD
1889 if (PL_lasttoke)
78cdf107 1890 goto lstop;
5db06880 1891#else
3280af22 1892 if (PL_nexttoke)
78cdf107 1893 goto lstop;
5db06880 1894#endif
79072805 1895 if (*s == '(')
bbf60fe6 1896 return REPORT(FUNC);
29595ff2 1897 s = PEEKSPACE(s);
79072805 1898 if (*s == '(')
bbf60fe6 1899 return REPORT(FUNC);
78cdf107
Z
1900 else {
1901 lstop:
1902 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1903 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1904 return REPORT(LSTOP);
78cdf107 1905 }
79072805
LW
1906}
1907
5db06880
NC
1908#ifdef PERL_MAD
1909 /*
1910 * S_start_force
1911 * Sets up for an eventual force_next(). start_force(0) basically does
1912 * an unshift, while start_force(-1) does a push. yylex removes items
1913 * on the "pop" end.
1914 */
1915
1916STATIC void
1917S_start_force(pTHX_ int where)
1918{
1919 int i;
1920
cd81e915 1921 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1922 where = PL_lasttoke;
cd81e915
NC
1923 assert(PL_curforce < 0 || PL_curforce == where);
1924 if (PL_curforce != where) {
5db06880
NC
1925 for (i = PL_lasttoke; i > where; --i) {
1926 PL_nexttoke[i] = PL_nexttoke[i-1];
1927 }
1928 PL_lasttoke++;
1929 }
cd81e915 1930 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1931 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1932 PL_curforce = where;
1933 if (PL_nextwhite) {
5db06880 1934 if (PL_madskills)
6b29d1f5 1935 curmad('^', newSVpvs(""));
cd81e915 1936 CURMAD('_', PL_nextwhite);
5db06880
NC
1937 }
1938}
1939
1940STATIC void
1941S_curmad(pTHX_ char slot, SV *sv)
1942{
1943 MADPROP **where;
1944
1945 if (!sv)
1946 return;
cd81e915
NC
1947 if (PL_curforce < 0)
1948 where = &PL_thismad;
5db06880 1949 else
cd81e915 1950 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1951
cd81e915 1952 if (PL_faketokens)
76f68e9b 1953 sv_setpvs(sv, "");
5db06880
NC
1954 else {
1955 if (!IN_BYTES) {
1956 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1957 SvUTF8_on(sv);
1958 else if (PL_encoding) {
1959 sv_recode_to_utf8(sv, PL_encoding);
1960 }
1961 }
1962 }
1963
1964 /* keep a slot open for the head of the list? */
1965 if (slot != '_' && *where && (*where)->mad_key == '^') {
1966 (*where)->mad_key = slot;
daba3364 1967 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1968 (*where)->mad_val = (void*)sv;
1969 }
1970 else
1971 addmad(newMADsv(slot, sv), where, 0);
1972}
1973#else
b3f24c00
MHM
1974# define start_force(where) NOOP
1975# define curmad(slot, sv) NOOP
5db06880
NC
1976#endif
1977
ffb4593c
NT
1978/*
1979 * S_force_next
9cbb5ea2 1980 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1981 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1982 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1983 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1984 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1985 */
1986
4e553d73 1987STATIC void
cea2e8a9 1988S_force_next(pTHX_ I32 type)
79072805 1989{
97aff369 1990 dVAR;
704d4215
GG
1991#ifdef DEBUGGING
1992 if (DEBUG_T_TEST) {
1993 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1994 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1995 }
1996#endif
6c7ae946
FC
1997 /* Don’t let opslab_force_free snatch it */
1998 if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
1999 assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
2000 NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
2001 }
5db06880 2002#ifdef PERL_MAD
cd81e915 2003 if (PL_curforce < 0)
5db06880 2004 start_force(PL_lasttoke);
cd81e915 2005 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
2006 if (PL_lex_state != LEX_KNOWNEXT)
2007 PL_lex_defer = PL_lex_state;
2008 PL_lex_state = LEX_KNOWNEXT;
2009 PL_lex_expect = PL_expect;
cd81e915 2010 PL_curforce = -1;
5db06880 2011#else
3280af22
NIS
2012 PL_nexttype[PL_nexttoke] = type;
2013 PL_nexttoke++;
2014 if (PL_lex_state != LEX_KNOWNEXT) {
2015 PL_lex_defer = PL_lex_state;
2016 PL_lex_expect = PL_expect;
2017 PL_lex_state = LEX_KNOWNEXT;
79072805 2018 }
5db06880 2019#endif
79072805
LW
2020}
2021
28ac2b49
Z
2022void
2023Perl_yyunlex(pTHX)
2024{
a7aaec61
Z
2025 int yyc = PL_parser->yychar;
2026 if (yyc != YYEMPTY) {
2027 if (yyc) {
2028 start_force(-1);
2029 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2030 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2031 PL_lex_allbrackets--;
a7aaec61 2032 PL_lex_brackets--;
78cdf107
Z
2033 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2034 } else if (yyc == '('/*)*/) {
2035 PL_lex_allbrackets--;
2036 yyc |= (2<<24);
a7aaec61
Z
2037 }
2038 force_next(yyc);
2039 }
28ac2b49
Z
2040 PL_parser->yychar = YYEMPTY;
2041 }
2042}
2043
d0a148a6 2044STATIC SV *
15f169a1 2045S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2046{
97aff369 2047 dVAR;
740cce10 2048 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2049 !IN_BYTES
2050 && UTF
2051 && !is_ascii_string((const U8*)start, len)
740cce10 2052 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2053 return sv;
2054}
2055
ffb4593c
NT
2056/*
2057 * S_force_word
2058 * When the lexer knows the next thing is a word (for instance, it has
2059 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2060 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2061 * lookahead.
ffb4593c
NT
2062 *
2063 * Arguments:
b1b65b59 2064 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2065 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2066 * int check_keyword : if true, Perl checks to make sure the word isn't
2067 * a keyword (do this if the word is a label, e.g. goto FOO)
2068 * int allow_pack : if true, : characters will also be allowed (require,
2069 * use, etc. do this)
9cbb5ea2 2070 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2071 */
2072
76e3520e 2073STATIC char *
cea2e8a9 2074S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2075{
97aff369 2076 dVAR;
eb578fdb 2077 char *s;
463ee0b2 2078 STRLEN len;
4e553d73 2079
7918f24d
NC
2080 PERL_ARGS_ASSERT_FORCE_WORD;
2081
29595ff2 2082 start = SKIPSPACE1(start);
463ee0b2 2083 s = start;
7e2040f0 2084 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2085 (allow_pack && *s == ':') ||
15f0808c 2086 (allow_initial_tick && *s == '\'') )
a0d0e21e 2087 {
3280af22 2088 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2089 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2090 return start;
cd81e915 2091 start_force(PL_curforce);
5db06880
NC
2092 if (PL_madskills)
2093 curmad('X', newSVpvn(start,s-start));
463ee0b2 2094 if (token == METHOD) {
29595ff2 2095 s = SKIPSPACE1(s);
463ee0b2 2096 if (*s == '(')
3280af22 2097 PL_expect = XTERM;
463ee0b2 2098 else {
3280af22 2099 PL_expect = XOPERATOR;
463ee0b2 2100 }
79072805 2101 }
e74e6b3d 2102 if (PL_madskills)
63575281 2103 curmad('g', newSVpvs( "forced" ));
9ded7720 2104 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2105 = (OP*)newSVOP(OP_CONST,0,
2106 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2107 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2108 force_next(token);
2109 }
2110 return s;
2111}
2112
ffb4593c
NT
2113/*
2114 * S_force_ident
9cbb5ea2 2115 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2116 * text only contains the "foo" portion. The first argument is a pointer
2117 * to the "foo", and the second argument is the type symbol to prefix.
2118 * Forces the next token to be a "WORD".
9cbb5ea2 2119 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2120 */
2121
76e3520e 2122STATIC void
bfed75c6 2123S_force_ident(pTHX_ register const char *s, int kind)
79072805 2124{
97aff369 2125 dVAR;
7918f24d
NC
2126
2127 PERL_ARGS_ASSERT_FORCE_IDENT;
2128
c35e046a 2129 if (*s) {
90e5519e 2130 const STRLEN len = strlen(s);
728847b1
BF
2131 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2132 UTF ? SVf_UTF8 : 0));
cd81e915 2133 start_force(PL_curforce);
9ded7720 2134 NEXTVAL_NEXTTOKE.opval = o;
79072805 2135 force_next(WORD);
748a9306 2136 if (kind) {
11343788 2137 o->op_private = OPpCONST_ENTERED;
55497cff 2138 /* XXX see note in pp_entereval() for why we forgo typo
2139 warnings if the symbol must be introduced in an eval.
2140 GSAR 96-10-12 */
90e5519e 2141 gv_fetchpvn_flags(s, len,
728847b1
BF
2142 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2143 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2144 kind == '$' ? SVt_PV :
2145 kind == '@' ? SVt_PVAV :
2146 kind == '%' ? SVt_PVHV :
a0d0e21e 2147 SVt_PVGV
90e5519e 2148 );
748a9306 2149 }
79072805
LW
2150 }
2151}
2152
3f33d153
FC
2153static void
2154S_force_ident_maybe_lex(pTHX_ char pit)
2155{
2156 start_force(PL_curforce);
2157 NEXTVAL_NEXTTOKE.ival = pit;
2158 force_next('p');
2159}
2160
1571675a
GS
2161NV
2162Perl_str_to_version(pTHX_ SV *sv)
2163{
2164 NV retval = 0.0;
2165 NV nshift = 1.0;
2166 STRLEN len;
cfd0369c 2167 const char *start = SvPV_const(sv,len);
9d4ba2ae 2168 const char * const end = start + len;
504618e9 2169 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2170
2171 PERL_ARGS_ASSERT_STR_TO_VERSION;
2172
1571675a 2173 while (start < end) {
ba210ebe 2174 STRLEN skip;
1571675a
GS
2175 UV n;
2176 if (utf)
9041c2e3 2177 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2178 else {
2179 n = *(U8*)start;
2180 skip = 1;
2181 }
2182 retval += ((NV)n)/nshift;
2183 start += skip;
2184 nshift *= 1000;
2185 }
2186 return retval;
2187}
2188
4e553d73 2189/*
ffb4593c
NT
2190 * S_force_version
2191 * Forces the next token to be a version number.
e759cc13
RGS
2192 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2193 * and if "guessing" is TRUE, then no new token is created (and the caller
2194 * must use an alternative parsing method).
ffb4593c
NT
2195 */
2196
76e3520e 2197STATIC char *
e759cc13 2198S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2199{
97aff369 2200 dVAR;
5f66b61c 2201 OP *version = NULL;
44dcb63b 2202 char *d;
5db06880
NC
2203#ifdef PERL_MAD
2204 I32 startoff = s - SvPVX(PL_linestr);
2205#endif
89bfa8cd 2206
7918f24d
NC
2207 PERL_ARGS_ASSERT_FORCE_VERSION;
2208
29595ff2 2209 s = SKIPSPACE1(s);
89bfa8cd 2210
44dcb63b 2211 d = s;
dd629d5b 2212 if (*d == 'v')
44dcb63b 2213 d++;
44dcb63b 2214 if (isDIGIT(*d)) {
e759cc13
RGS
2215 while (isDIGIT(*d) || *d == '_' || *d == '.')
2216 d++;
5db06880
NC
2217#ifdef PERL_MAD
2218 if (PL_madskills) {
cd81e915 2219 start_force(PL_curforce);
5db06880
NC
2220 curmad('X', newSVpvn(s,d-s));
2221 }
2222#endif
4e4da3ac 2223 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2224 SV *ver;
8d08d9ba 2225#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2226 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2227 setlocale(LC_NUMERIC, "C");
8d08d9ba 2228#endif
6154021b 2229 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2230#ifdef USE_LOCALE_NUMERIC
2231 setlocale(LC_NUMERIC, loc);
909d3787 2232 Safefree(loc);
8d08d9ba 2233#endif
6154021b 2234 version = pl_yylval.opval;
dd629d5b
GS
2235 ver = cSVOPx(version)->op_sv;
2236 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2237 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2238 SvNV_set(ver, str_to_version(ver));
1571675a 2239 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2240 }
89bfa8cd 2241 }
5db06880
NC
2242 else if (guessing) {
2243#ifdef PERL_MAD
2244 if (PL_madskills) {
cd81e915
NC
2245 sv_free(PL_nextwhite); /* let next token collect whitespace */
2246 PL_nextwhite = 0;
5db06880
NC
2247 s = SvPVX(PL_linestr) + startoff;
2248 }
2249#endif
e759cc13 2250 return s;
5db06880 2251 }
89bfa8cd 2252 }
2253
5db06880
NC
2254#ifdef PERL_MAD
2255 if (PL_madskills && !version) {
cd81e915
NC
2256 sv_free(PL_nextwhite); /* let next token collect whitespace */
2257 PL_nextwhite = 0;
5db06880
NC
2258 s = SvPVX(PL_linestr) + startoff;
2259 }
2260#endif
89bfa8cd 2261 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2262 start_force(PL_curforce);
9ded7720 2263 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2264 force_next(WORD);
89bfa8cd 2265
e759cc13 2266 return s;
89bfa8cd 2267}
2268
ffb4593c 2269/*
91152fc1
DG
2270 * S_force_strict_version
2271 * Forces the next token to be a version number using strict syntax rules.
2272 */
2273
2274STATIC char *
2275S_force_strict_version(pTHX_ char *s)
2276{
2277 dVAR;
2278 OP *version = NULL;
2279#ifdef PERL_MAD
2280 I32 startoff = s - SvPVX(PL_linestr);
2281#endif
2282 const char *errstr = NULL;
2283
2284 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2285
2286 while (isSPACE(*s)) /* leading whitespace */
2287 s++;
2288
2289 if (is_STRICT_VERSION(s,&errstr)) {
2290 SV *ver = newSV(0);
2291 s = (char *)scan_version(s, ver, 0);
2292 version = newSVOP(OP_CONST, 0, ver);
2293 }
4e4da3ac
Z
2294 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2295 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2296 {
91152fc1
DG
2297 PL_bufptr = s;
2298 if (errstr)
2299 yyerror(errstr); /* version required */
2300 return s;
2301 }
2302
2303#ifdef PERL_MAD
2304 if (PL_madskills && !version) {
2305 sv_free(PL_nextwhite); /* let next token collect whitespace */
2306 PL_nextwhite = 0;
2307 s = SvPVX(PL_linestr) + startoff;
2308 }
2309#endif
2310 /* NOTE: The parser sees the package name and the VERSION swapped */
2311 start_force(PL_curforce);
2312 NEXTVAL_NEXTTOKE.opval = version;
2313 force_next(WORD);
2314
2315 return s;
2316}
2317
2318/*
ffb4593c
NT
2319 * S_tokeq
2320 * Tokenize a quoted string passed in as an SV. It finds the next
2321 * chunk, up to end of string or a backslash. It may make a new
2322 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2323 * turns \\ into \.
2324 */
2325
76e3520e 2326STATIC SV *
cea2e8a9 2327S_tokeq(pTHX_ SV *sv)
79072805 2328{
97aff369 2329 dVAR;
eb578fdb
KW
2330 char *s;
2331 char *send;
2332 char *d;
b3ac6de7
IZ
2333 STRLEN len = 0;
2334 SV *pv = sv;
79072805 2335
7918f24d
NC
2336 PERL_ARGS_ASSERT_TOKEQ;
2337
79072805 2338 if (!SvLEN(sv))
b3ac6de7 2339 goto finish;
79072805 2340
a0d0e21e 2341 s = SvPV_force(sv, len);
21a311ee 2342 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2343 goto finish;
463ee0b2 2344 send = s + len;
dcb21ed6
NC
2345 /* This is relying on the SV being "well formed" with a trailing '\0' */
2346 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2347 s++;
2348 if (s == send)
b3ac6de7 2349 goto finish;
79072805 2350 d = s;
be4731d2 2351 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2352 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2353 }
79072805
LW
2354 while (s < send) {
2355 if (*s == '\\') {
a0d0e21e 2356 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2357 s++; /* all that, just for this */
2358 }
2359 *d++ = *s++;
2360 }
2361 *d = '\0';
95a20fc0 2362 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2363 finish:
3280af22 2364 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2365 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2366 return sv;
2367}
2368
ffb4593c
NT
2369/*
2370 * Now come three functions related to double-quote context,
2371 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2372 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2373 * interact with PL_lex_state, and create fake ( ... ) argument lists
2374 * to handle functions and concatenation.
ecd24171
DM
2375 * For example,
2376 * "foo\lbar"
2377 * is tokenised as
2378 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
ffb4593c
NT
2379 */
2380
2381/*
2382 * S_sublex_start
6154021b 2383 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2384 *
2385 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2386 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2387 *
2388 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2389 *
2390 * Everything else becomes a FUNC.
2391 *
2392 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2393 * had an OP_CONST or OP_READLINE). This just sets us up for a
2394 * call to S_sublex_push().
2395 */
2396
76e3520e 2397STATIC I32
cea2e8a9 2398S_sublex_start(pTHX)
79072805 2399{
97aff369 2400 dVAR;
eb578fdb 2401 const I32 op_type = pl_yylval.ival;
79072805
LW
2402
2403 if (op_type == OP_NULL) {
6154021b 2404 pl_yylval.opval = PL_lex_op;
5f66b61c 2405 PL_lex_op = NULL;
79072805
LW
2406 return THING;
2407 }
2408 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2409 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2410
2411 if (SvTYPE(sv) == SVt_PVIV) {
2412 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2413 STRLEN len;
96a5add6 2414 const char * const p = SvPV_const(sv, len);
740cce10 2415 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2416 SvREFCNT_dec(sv);
2417 sv = nsv;
4e553d73 2418 }
6154021b 2419 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2420 PL_lex_stuff = NULL;
6f33ba73
RGS
2421 /* Allow <FH> // "foo" */
2422 if (op_type == OP_READLINE)
2423 PL_expect = XTERMORDORDOR;
79072805
LW
2424 return THING;
2425 }
e3f73d4e
RGS
2426 else if (op_type == OP_BACKTICK && PL_lex_op) {
2427 /* readpipe() vas overriden */
2428 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2429 pl_yylval.opval = PL_lex_op;
9b201d7d 2430 PL_lex_op = NULL;
e3f73d4e
RGS
2431 PL_lex_stuff = NULL;
2432 return THING;
2433 }
79072805 2434
3280af22 2435 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2436 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2437 PL_sublex_info.sub_op = PL_lex_op;
2438 PL_lex_state = LEX_INTERPPUSH;
55497cff 2439
3280af22
NIS
2440 PL_expect = XTERM;
2441 if (PL_lex_op) {
6154021b 2442 pl_yylval.opval = PL_lex_op;
5f66b61c 2443 PL_lex_op = NULL;
55497cff 2444 return PMFUNC;
2445 }
2446 else
2447 return FUNC;
2448}
2449
ffb4593c
NT
2450/*
2451 * S_sublex_push
2452 * Create a new scope to save the lexing state. The scope will be
2453 * ended in S_sublex_done. Returns a '(', starting the function arguments
2454 * to the uc, lc, etc. found before.
2455 * Sets PL_lex_state to LEX_INTERPCONCAT.
2456 */
2457
76e3520e 2458STATIC I32
cea2e8a9 2459S_sublex_push(pTHX)
55497cff 2460{
27da23d5 2461 dVAR;
78a635de 2462 LEXSHARED *shared;
f46d017c 2463 ENTER;
55497cff 2464
3280af22 2465 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2466 SAVEBOOL(PL_lex_dojoin);
3280af22 2467 SAVEI32(PL_lex_brackets);
78cdf107 2468 SAVEI32(PL_lex_allbrackets);
b27dce25 2469 SAVEI32(PL_lex_formbrack);
78cdf107 2470 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2471 SAVEI32(PL_lex_casemods);
2472 SAVEI32(PL_lex_starts);
651b5b28 2473 SAVEI8(PL_lex_state);
7cc34111 2474 SAVESPTR(PL_lex_repl);
7766f137 2475 SAVEVPTR(PL_lex_inpat);
98246f1e 2476 SAVEI16(PL_lex_inwhat);
57843af0 2477 SAVECOPLINE(PL_curcop);
3280af22 2478 SAVEPPTR(PL_bufptr);
8452ff4b 2479 SAVEPPTR(PL_bufend);
3280af22
NIS
2480 SAVEPPTR(PL_oldbufptr);
2481 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2482 SAVEPPTR(PL_last_lop);
2483 SAVEPPTR(PL_last_uni);
3280af22
NIS
2484 SAVEPPTR(PL_linestart);
2485 SAVESPTR(PL_linestr);
8edd5f42
RGS
2486 SAVEGENERICPV(PL_lex_brackstack);
2487 SAVEGENERICPV(PL_lex_casestack);
78a635de 2488 SAVEGENERICPV(PL_parser->lex_shared);
3280af22 2489
99bd9d90 2490 /* The here-doc parser needs to be able to peek into outer lexing
60f40a38
FC
2491 scopes to find the body of the here-doc. So we put PL_linestr and
2492 PL_bufptr into lex_shared, to ‘share’ those values.
99bd9d90 2493 */
60f40a38
FC
2494 PL_parser->lex_shared->ls_linestr = PL_linestr;
2495 PL_parser->lex_shared->ls_bufptr = PL_bufptr;
99bd9d90 2496
3280af22 2497 PL_linestr = PL_lex_stuff;
7cc34111 2498 PL_lex_repl = PL_sublex_info.repl;
a0714e2c 2499 PL_lex_stuff = NULL;
7cc34111 2500 PL_sublex_info.repl = NULL;
3280af22 2501
9cbb5ea2
GS
2502 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2503 = SvPVX(PL_linestr);
3280af22 2504 PL_bufend += SvCUR(PL_linestr);
bd61b366 2505 PL_last_lop = PL_last_uni = NULL;
3280af22 2506 SAVEFREESV(PL_linestr);
4dc843bc 2507 if (PL_lex_repl) SAVEFREESV(PL_lex_repl);
3280af22
NIS
2508
2509 PL_lex_dojoin = FALSE;
b27dce25 2510 PL_lex_brackets = PL_lex_formbrack = 0;
78cdf107
Z
2511 PL_lex_allbrackets = 0;
2512 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2513 Newx(PL_lex_brackstack, 120, char);
2514 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2515 PL_lex_casemods = 0;
2516 *PL_lex_casestack = '\0';
2517 PL_lex_starts = 0;
2518 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2519 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
78a635de
FC
2520
2521 Newxz(shared, 1, LEXSHARED);
2522 shared->ls_prev = PL_parser->lex_shared;
2523 PL_parser->lex_shared = shared;
3280af22
NIS
2524
2525 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2526 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2527 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2528 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2529 else
5f66b61c 2530 PL_lex_inpat = NULL;
79072805 2531
55497cff 2532 return '(';
79072805
LW
2533}
2534
ffb4593c
NT
2535/*
2536 * S_sublex_done
2537 * Restores lexer state after a S_sublex_push.
2538 */
2539
76e3520e 2540STATIC I32
cea2e8a9 2541S_sublex_done(pTHX)
79072805 2542{
27da23d5 2543 dVAR;
3280af22 2544 if (!PL_lex_starts++) {
396482e1 2545 SV * const sv = newSVpvs("");
9aa983d2
JH
2546 if (SvUTF8(PL_linestr))
2547 SvUTF8_on(sv);
3280af22 2548 PL_expect = XOPERATOR;
6154021b 2549 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2550 return THING;
2551 }
2552
3280af22
NIS
2553 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2554 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2555 return yylex();
79072805
LW
2556 }
2557
ffb4593c 2558 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2559 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2560 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2561 PL_linestr = PL_lex_repl;
2562 PL_lex_inpat = 0;
2563 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2564 PL_bufend += SvCUR(PL_linestr);
bd61b366 2565 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2566 PL_lex_dojoin = FALSE;
2567 PL_lex_brackets = 0;
78cdf107
Z
2568 PL_lex_allbrackets = 0;
2569 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2570 PL_lex_casemods = 0;
2571 *PL_lex_casestack = '\0';
2572 PL_lex_starts = 0;
25da4f38 2573 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2574 PL_lex_state = LEX_INTERPNORMAL;
2575 PL_lex_starts++;
e9fa98b2
HS
2576 /* we don't clear PL_lex_repl here, so that we can check later
2577 whether this is an evalled subst; that means we rely on the
2578 logic to ensure sublex_done() is called again only via the
2579 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2580 }
e9fa98b2 2581 else {
3280af22 2582 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2583 PL_lex_repl = NULL;
e9fa98b2 2584 }
79072805 2585 return ',';
ffed7fef
LW
2586 }
2587 else {
5db06880
NC
2588#ifdef PERL_MAD
2589 if (PL_madskills) {
cd81e915
NC
2590 if (PL_thiswhite) {
2591 if (!PL_endwhite)
6b29d1f5 2592 PL_endwhite = newSVpvs("");
cd81e915
NC
2593 sv_catsv(PL_endwhite, PL_thiswhite);
2594 PL_thiswhite = 0;
2595 }
2596 if (PL_thistoken)
76f68e9b 2597 sv_setpvs(PL_thistoken,"");
5db06880 2598 else
cd81e915 2599 PL_realtokenstart = -1;
5db06880
NC
2600 }
2601#endif
f46d017c 2602 LEAVE;
3280af22
NIS
2603 PL_bufend = SvPVX(PL_linestr);
2604 PL_bufend += SvCUR(PL_linestr);
2605 PL_expect = XOPERATOR;
09bef843 2606 PL_sublex_info.sub_inwhat = 0;
79072805 2607 return ')';
ffed7fef
LW
2608 }
2609}
2610
02aa26ce
NT
2611/*
2612 scan_const
2613
9da1dd8f
DM
2614 Extracts the next constant part of a pattern, double-quoted string,
2615 or transliteration. This is terrifying code.
2616
2617 For example, in parsing the double-quoted string "ab\x63$d", it would
2618 stop at the '$' and return an OP_CONST containing 'abc'.
02aa26ce 2619
94def140 2620 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2621 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2622 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2623
94def140
TS
2624 Returns a pointer to the character scanned up to. If this is
2625 advanced from the start pointer supplied (i.e. if anything was
9da1dd8f 2626 successfully parsed), will leave an OP_CONST for the substring scanned
6154021b 2627 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2628 by looking at the next characters herself.
2629
02aa26ce 2630 In patterns:
9da1dd8f
DM
2631 expand:
2632 \N{ABC} => \N{U+41.42.43}
2633
2634 pass through:
2635 all other \-char, including \N and \N{ apart from \N{ABC}
2636
2637 stops on:
2638 @ and $ where it appears to be a var, but not for $ as tail anchor
2639 \l \L \u \U \Q \E
2640 (?{ or (??{
2641
02aa26ce
NT
2642
2643 In transliterations:
2644 characters are VERY literal, except for - not at the start or end
94def140
TS
2645 of the string, which indicates a range. If the range is in bytes,
2646 scan_const expands the range to the full set of intermediate
2647 characters. If the range is in utf8, the hyphen is replaced with
2648 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2649
2650 In double-quoted strings:
2651 backslashes:
2652 double-quoted style: \r and \n
ff3f963a 2653 constants: \x31, etc.
94def140 2654 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2655 case and quoting: \U \Q \E
2656 stops on @ and $
2657
2658 scan_const does *not* construct ops to handle interpolated strings.
2659 It stops processing as soon as it finds an embedded $ or @ variable
2660 and leaves it to the caller to work out what's going on.
2661
94def140
TS
2662 embedded arrays (whether in pattern or not) could be:
2663 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2664
2665 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2666
2667 $ in pattern could be $foo or could be tail anchor. Assumption:
2668 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2669 followed by one of "()| \r\n\t"
02aa26ce 2670
9da1dd8f 2671 \1 (backreferences) are turned into $1 in substitutions
02aa26ce
NT
2672
2673 The structure of the code is
2674 while (there's a character to process) {
94def140
TS
2675 handle transliteration ranges
2676 skip regexp comments /(?#comment)/ and codes /(?{code})/
2677 skip #-initiated comments in //x patterns
2678 check for embedded arrays
02aa26ce
NT
2679 check for embedded scalars
2680 if (backslash) {
94def140 2681 deprecate \1 in substitution replacements
02aa26ce
NT
2682 handle string-changing backslashes \l \U \Q \E, etc.
2683 switch (what was escaped) {
94def140 2684 handle \- in a transliteration (becomes a literal -)
ff3f963a 2685 if a pattern and not \N{, go treat as regular character
94def140
TS
2686 handle \132 (octal characters)
2687 handle \x15 and \x{1234} (hex characters)
ff3f963a 2688 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2689 handle \cV (control characters)
2690 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2691 } (end switch)
77a135fe 2692 continue
02aa26ce 2693 } (end if backslash)
77a135fe 2694 handle regular character
02aa26ce 2695 } (end while character to read)
4e553d73 2696
02aa26ce
NT
2697*/
2698
76e3520e 2699STATIC char *
cea2e8a9 2700S_scan_const(pTHX_ char *start)
79072805 2701{
97aff369 2702 dVAR;
eb578fdb 2703 char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2704 SV *sv = newSV(send - start); /* sv for the constant. See
2705 note below on sizing. */
eb578fdb
KW
2706 char *s = start; /* start of the constant */
2707 char *d = SvPVX(sv); /* destination for copies */
02aa26ce 2708 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2709 bool didrange = FALSE; /* did we just finish a range? */
2866decb 2710 bool in_charclass = FALSE; /* within /[...]/ */
b953e60c
KW
2711 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2712 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2713 to be UTF8? But, this can
2714 show as true when the source
2715 isn't utf8, as for example
2716 when it is entirely composed
2717 of hex constants */
2718
2719 /* Note on sizing: The scanned constant is placed into sv, which is
2720 * initialized by newSV() assuming one byte of output for every byte of
2721 * input. This routine expects newSV() to allocate an extra byte for a
2722 * trailing NUL, which this routine will append if it gets to the end of
2723 * the input. There may be more bytes of input than output (eg., \N{LATIN
2724 * CAPITAL LETTER A}), or more output than input if the constant ends up
2725 * recoded to utf8, but each time a construct is found that might increase
2726 * the needed size, SvGROW() is called. Its size parameter each time is
2727 * based on the best guess estimate at the time, namely the length used so
2728 * far, plus the length the current construct will occupy, plus room for
2729 * the trailing NUL, plus one byte for every input byte still unscanned */
2730
012bcf8d 2731 UV uv;
4c3a8340
TS
2732#ifdef EBCDIC
2733 UV literal_endpoint = 0;
e294cc5d 2734 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2735#endif
012bcf8d 2736
7918f24d
NC
2737 PERL_ARGS_ASSERT_SCAN_CONST;
2738
bb16bae8 2739 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2740 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2741 /* If we are doing a trans and we know we want UTF8 set expectation */
2742 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2743 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2744 }
2745
2746
79072805 2747 while (s < send || dorange) {
ff3f963a 2748
02aa26ce 2749 /* get transliterations out of the way (they're most literal) */
3280af22 2750 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2751 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2752 if (dorange) {
1ba5c669
JH
2753 I32 i; /* current expanded character */
2754 I32 min; /* first character in range */
2755 I32 max; /* last character in range */
02aa26ce 2756
e294cc5d
JH
2757#ifdef EBCDIC
2758 UV uvmax = 0;
2759#endif
2760
2761 if (has_utf8
2762#ifdef EBCDIC
2763 && !native_range
2764#endif
2765 ) {
9d4ba2ae 2766 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2767 char *e = d++;
2768 while (e-- > c)
2769 *(e + 1) = *e;
25716404 2770 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2771 /* mark the range as done, and continue */
2772 dorange = FALSE;
2773 didrange = TRUE;
2774 continue;
2775 }
2b9d42f0 2776
95a20fc0 2777 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2778#ifdef EBCDIC
2779 SvGROW(sv,
2780 SvLEN(sv) + (has_utf8 ?
2781 (512 - UTF_CONTINUATION_MARK +
2782 UNISKIP(0x100))
2783 : 256));
2784 /* How many two-byte within 0..255: 128 in UTF-8,
2785 * 96 in UTF-8-mod. */
2786#else
9cbb5ea2 2787 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2788#endif
9cbb5ea2 2789 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2790#ifdef EBCDIC
2791 if (has_utf8) {
2792 int j;
2793 for (j = 0; j <= 1; j++) {
2794 char * const c = (char*)utf8_hop((U8*)d, -1);
2795 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2796 if (j)
2797 min = (U8)uv;
2798 else if (uv < 256)
2799 max = (U8)uv;
2800 else {
2801 max = (U8)0xff; /* only to \xff */
2802 uvmax = uv; /* \x{100} to uvmax */
2803 }
2804 d = c; /* eat endpoint chars */
2805 }
2806 }
2807 else {
2808#endif
2809 d -= 2; /* eat the first char and the - */
2810 min = (U8)*d; /* first char in range */
2811 max = (U8)d[1]; /* last char in range */
2812#ifdef EBCDIC
2813 }
2814#endif
8ada0baa 2815
c2e66d9e 2816 if (min > max) {
4dc843bc 2817 SvREFCNT_dec(sv);
01ec43d0 2818 Perl_croak(aTHX_
d1573ac7 2819 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2820 (char)min, (char)max);
c2e66d9e
GS
2821 }
2822
c7f1f016 2823#ifdef EBCDIC
4c3a8340
TS
2824 if (literal_endpoint == 2 &&
2825 ((isLOWER(min) && isLOWER(max)) ||
2826 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2827 if (isLOWER(min)) {
2828 for (i = min; i <= max; i++)
2829 if (isLOWER(i))
db42d148 2830 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2831 } else {
2832 for (i = min; i <= max; i++)
2833 if (isUPPER(i))
db42d148 2834 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2835 }
2836 }
2837 else
2838#endif
2839 for (i = min; i <= max; i++)
e294cc5d
JH
2840#ifdef EBCDIC
2841 if (has_utf8) {
2842 const U8 ch = (U8)NATIVE_TO_UTF(i);
2843 if (UNI_IS_INVARIANT(ch))
2844 *d++ = (U8)i;
2845 else {
2846 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2847 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2848 }
2849 }
2850 else
2851#endif
2852 *d++ = (char)i;
2853
2854#ifdef EBCDIC
2855 if (uvmax) {
2856 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2857 if (uvmax > 0x101)
2858 *d++ = (char)UTF_TO_NATIVE(0xff);
2859 if (uvmax > 0x100)
2860 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2861 }
2862#endif
02aa26ce
NT
2863
2864 /* mark the range as done, and continue */
79072805 2865 dorange = FALSE;
01ec43d0 2866 didrange = TRUE;
4c3a8340
TS
2867#ifdef EBCDIC
2868 literal_endpoint = 0;
2869#endif
79072805 2870 continue;
4e553d73 2871 }
02aa26ce
NT
2872
2873 /* range begins (ignore - as first or last char) */
79072805 2874 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2875 if (didrange) {
4dc843bc 2876 SvREFCNT_dec(sv);
1fafa243 2877 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2878 }
e294cc5d
JH
2879 if (has_utf8
2880#ifdef EBCDIC
2881 && !native_range
2882#endif
2883 ) {
25716404 2884 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2885 s++;
2886 continue;
2887 }
79072805
LW
2888 dorange = TRUE;
2889 s++;
01ec43d0
GS
2890 }
2891 else {
2892 didrange = FALSE;
4c3a8340
TS
2893#ifdef EBCDIC
2894 literal_endpoint = 0;
e294cc5d 2895 native_range = TRUE;
4c3a8340 2896#endif
01ec43d0 2897 }
79072805 2898 }
02aa26ce
NT
2899
2900 /* if we get here, we're not doing a transliteration */
2901
e4a2df84
DM
2902 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2903 char *s1 = s-1;
2904 int esc = 0;
2905 while (s1 >= start && *s1-- == '\\')
2906 esc = !esc;
2907 if (!esc)
2908 in_charclass = TRUE;
2909 }
2866decb 2910
e4a2df84
DM
2911 else if (*s == ']' && PL_lex_inpat && in_charclass) {
2912 char *s1 = s-1;
2913 int esc = 0;
2914 while (s1 >= start && *s1-- == '\\')
2915 esc = !esc;
2916 if (!esc)
2917 in_charclass = FALSE;
2918 }
2866decb 2919
9da1dd8f
DM
2920 /* skip for regexp comments /(?#comment)/, except for the last
2921 * char, which will be done separately.
2922 * Stop on (?{..}) and friends */
2923
3280af22 2924 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2925 if (s[2] == '#') {
e994fd66 2926 while (s+1 < send && *s != ')')
db42d148 2927 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94 2928 }
2866decb 2929 else if (!PL_lex_casemods && !in_charclass &&
d3cec5e5
DM
2930 ( s[2] == '{' /* This should match regcomp.c */
2931 || (s[2] == '?' && s[3] == '{')))
155aba94 2932 {
9da1dd8f 2933 break;
cc6b7395 2934 }
748a9306 2935 }
02aa26ce
NT
2936
2937 /* likewise skip #-initiated comments in //x patterns */
3280af22 2938 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2939 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2940 while (s+1 < send && *s != '\n')
db42d148 2941 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2942 }
02aa26ce 2943
9da1dd8f
DM
2944 /* no further processing of single-quoted regex */
2945 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2946 goto default_action;
2947
5d1d4326 2948 /* check for embedded arrays
da6eedaa 2949 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2950 */
1749ea0d
TS
2951 else if (*s == '@' && s[1]) {
2952 if (isALNUM_lazy_if(s+1,UTF))
2953 break;
2954 if (strchr(":'{$", s[1]))
2955 break;
2956 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2957 break; /* in regexp, neither @+ nor @- are interpolated */
2958 }
02aa26ce
NT
2959
2960 /* check for embedded scalars. only stop if we're sure it's a
2961 variable.
2962 */
79072805 2963 else if (*s == '$') {
3280af22 2964 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2965 break;
77772344 2966 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2967 if (s[1] == '\\') {
2968 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2969 "Possible unintended interpolation of $\\ in regex");
77772344 2970 }
79072805 2971 break; /* in regexp, $ might be tail anchor */
77772344 2972 }
79072805 2973 }
02aa26ce 2974
2b9d42f0
NIS
2975 /* End of else if chain - OP_TRANS rejoin rest */
2976
02aa26ce 2977 /* backslashes */
79072805 2978 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2979 char* e; /* Can be used for ending '}', etc. */
2980
79072805 2981 s++;
02aa26ce 2982
7d0fc23c
KW
2983 /* warn on \1 - \9 in substitution replacements, but note that \11
2984 * is an octal; and \19 is \1 followed by '9' */
3280af22 2985 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2986 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2987 {
a2a5de95 2988 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2989 *--s = '$';
2990 break;
2991 }
02aa26ce
NT
2992
2993 /* string-change backslash escapes */
838f2281 2994 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
2995 --s;
2996 break;
2997 }
ff3f963a
KW
2998 /* In a pattern, process \N, but skip any other backslash escapes.
2999 * This is because we don't want to translate an escape sequence
3000 * into a meta symbol and have the regex compiler use the meta
3001 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3002 * in spite of this, we do have to process \N here while the proper
3003 * charnames handler is in scope. See bugs #56444 and #62056.
3004 * There is a complication because \N in a pattern may also stand
3005 * for 'match a non-nl', and not mean a charname, in which case its
3006 * processing should be deferred to the regex compiler. To be a
3007 * charname it must be followed immediately by a '{', and not look
3008 * like \N followed by a curly quantifier, i.e., not something like
3009 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3010 * quantifier */
3011 else if (PL_lex_inpat
3012 && (*s != 'N'
3013 || s[1] != '{'
3014 || regcurly(s + 1)))
3015 {
cc74c5bd
TS
3016 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3017 goto default_action;
3018 }
02aa26ce 3019
79072805 3020 switch (*s) {
02aa26ce
NT
3021
3022 /* quoted - in transliterations */
79072805 3023 case '-':
3280af22 3024 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
3025 *d++ = *s++;
3026 continue;
3027 }
3028 /* FALL THROUGH */
3029 default:
11b8faa4 3030 {
e4ca4584 3031 if ((isALNUMC(*s)))
a2a5de95
NC
3032 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3033 "Unrecognized escape \\%c passed through",
3034 *s);
11b8faa4 3035 /* default action is to copy the quoted character */
f9a63242 3036 goto default_action;
11b8faa4 3037 }
02aa26ce 3038
632403cc 3039 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
3040 case '0': case '1': case '2': case '3':
3041 case '4': case '5': case '6': case '7':
ba210ebe 3042 {
53305cf1
NC
3043 I32 flags = 0;
3044 STRLEN len = 3;
77a135fe 3045 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
3046 s += len;
3047 }
012bcf8d 3048 goto NUM_ESCAPE_INSERT;
02aa26ce 3049
f0a2b745
KW
3050 /* eg. \o{24} indicates the octal constant \024 */
3051 case 'o':
3052 {
3053 STRLEN len;
454155d9 3054 const char* error;
f0a2b745 3055
454155d9 3056 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 3057 s += len;
454155d9 3058 if (! valid) {
f0a2b745
KW
3059 yyerror(error);
3060 continue;
3061 }
3062 goto NUM_ESCAPE_INSERT;
3063 }
3064
77a135fe 3065 /* eg. \x24 indicates the hex constant 0x24 */
79072805 3066 case 'x':
a0481293 3067 {
53305cf1 3068 STRLEN len;
a0481293 3069 const char* error;
355860ce 3070
a0481293
KW
3071 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3072 s += len;
3073 if (! valid) {
3074 yyerror(error);
355860ce 3075 continue;
ba210ebe 3076 }
012bcf8d
GS
3077 }
3078
3079 NUM_ESCAPE_INSERT:
ff3f963a
KW
3080 /* Insert oct or hex escaped character. There will always be
3081 * enough room in sv since such escapes will be longer than any
3082 * UTF-8 sequence they can end up as, except if they force us
3083 * to recode the rest of the string into utf8 */
ba7cea30 3084
77a135fe 3085 /* Here uv is the ordinal of the next character being added in
ff3f963a 3086 * unicode (converted from native). */
77a135fe 3087 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3088 if (!has_utf8 && uv > 255) {
77a135fe
KW
3089 /* Might need to recode whatever we have accumulated so
3090 * far if it contains any chars variant in utf8 or
3091 * utf-ebcdic. */
3092
3093 SvCUR_set(sv, d - SvPVX_const(sv));
3094 SvPOK_on(sv);
3095 *d = '\0';
77a135fe 3096 /* See Note on sizing above. */
7bf79863
KW
3097 sv_utf8_upgrade_flags_grow(sv,
3098 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3099 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3100 d = SvPVX(sv) + SvCUR(sv);
3101 has_utf8 = TRUE;
012bcf8d
GS
3102 }
3103
77a135fe
KW
3104 if (has_utf8) {
3105 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3106 if (PL_lex_inwhat == OP_TRANS &&
3107 PL_sublex_info.sub_op) {
3108 PL_sublex_info.sub_op->op_private |=
3109 (PL_lex_repl ? OPpTRANS_FROM_UTF
3110 : OPpTRANS_TO_UTF);
f9a63242 3111 }
e294cc5d
JH
3112#ifdef EBCDIC
3113 if (uv > 255 && !dorange)
3114 native_range = FALSE;
3115#endif
012bcf8d 3116 }
a0ed51b3 3117 else {
012bcf8d 3118 *d++ = (char)uv;
a0ed51b3 3119 }
012bcf8d
GS
3120 }
3121 else {
c4d5f83a 3122 *d++ = (char) uv;
a0ed51b3 3123 }
79072805 3124 continue;
02aa26ce 3125
4a2d328f 3126 case 'N':
ff3f963a
KW
3127 /* In a non-pattern \N must be a named character, like \N{LATIN
3128 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3129 * mean to match a non-newline. For non-patterns, named
3130 * characters are converted to their string equivalents. In
3131 * patterns, named characters are not converted to their
3132 * ultimate forms for the same reasons that other escapes
3133 * aren't. Instead, they are converted to the \N{U+...} form
3134 * to get the value from the charnames that is in effect right
3135 * now, while preserving the fact that it was a named character
3136 * so that the regex compiler knows this */
3137
3138 /* This section of code doesn't generally use the
3139 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3140 * a close examination of this macro and determined it is a
3141 * no-op except on utfebcdic variant characters. Every
3142 * character generated by this that would normally need to be
3143 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3144 * needed, and would complicate use of copy(). XXX There are
3145 * other parts of this file where the macro is used
3146 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3147
3148 /* The structure of this section of code (besides checking for
3149 * errors and upgrading to utf8) is:
3150 * Further disambiguate between the two meanings of \N, and if
3151 * not a charname, go process it elsewhere
0a96133f
KW
3152 * If of form \N{U+...}, pass it through if a pattern;
3153 * otherwise convert to utf8
3154 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3155 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3156
3157 /* Here, s points to the 'N'; the test below is guaranteed to
3158 * succeed if we are being called on a pattern as we already
3159 * know from a test above that the next character is a '{'.
3160 * On a non-pattern \N must mean 'named sequence, which
3161 * requires braces */
3162 s++;
3163 if (*s != '{') {
3164 yyerror("Missing braces on \\N{}");
3165 continue;
3166 }
3167 s++;
3168
0a96133f 3169 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3170 if (! (e = strchr(s, '}'))) {
3171 if (! PL_lex_inpat) {
5777a3f7 3172 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3173 } else {
3174 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3175 }
0a96133f 3176 continue;
ff3f963a 3177 }
cddc7ef4 3178
ff3f963a 3179 /* Here it looks like a named character */
cddc7ef4 3180
ff3f963a
KW
3181 if (PL_lex_inpat) {
3182
3183 /* XXX This block is temporary code. \N{} implies that the
3184 * pattern is to have Unicode semantics, and therefore
3185 * currently has to be encoded in utf8. By putting it in
3186 * utf8 now, we save a whole pass in the regular expression
3187 * compiler. Once that code is changed so Unicode
3188 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3189 * block should be removed. However, the code that parses
3190 * the output of this would have to be changed to not
3191 * necessarily expect utf8 */
ff3f963a 3192 if (!has_utf8) {
77a135fe 3193 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3194 SvPOK_on(sv);
e4f3eed8 3195 *d = '\0';
77a135fe 3196 /* See Note on sizing above. */
7bf79863 3197 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3198 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3199 /* 5 = '\N{' + cur char + NUL */
3200 (STRLEN)(send - s) + 5);
f08d6ad9 3201 d = SvPVX(sv) + SvCUR(sv);
89491803 3202 has_utf8 = TRUE;
ff3f963a
KW
3203 }
3204 }
423cee85 3205
ff3f963a
KW
3206 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3207 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3208 | PERL_SCAN_DISALLOW_PREFIX;
3209 STRLEN len;
3210
3211 /* For \N{U+...}, the '...' is a unicode value even on
3212 * EBCDIC machines */
3213 s += 2; /* Skip to next char after the 'U+' */
3214 len = e - s;
3215 uv = grok_hex(s, &len, &flags, NULL);
3216 if (len == 0 || len != (STRLEN)(e - s)) {
3217 yyerror("Invalid hexadecimal number in \\N{U+...}");
3218 s = e + 1;
3219 continue;
3220 }
3221
3222 if (PL_lex_inpat) {
3223
e2a7e165
KW
3224 /* On non-EBCDIC platforms, pass through to the regex
3225 * compiler unchanged. The reason we evaluated the
3226 * number above is to make sure there wasn't a syntax
3227 * error. But on EBCDIC we convert to native so
3228 * downstream code can continue to assume it's native
3229 */
ff3f963a 3230 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3231#ifdef EBCDIC
3232 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3233 and the \0 */
3234 "\\N{U+%X}",
3235 (unsigned int) UNI_TO_NATIVE(uv));
3236#else
ff3f963a
KW
3237 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3238 d += e - s + 1;
e2a7e165 3239#endif
ff3f963a
KW
3240 }
3241 else { /* Not a pattern: convert the hex to string */
3242
3243 /* If destination is not in utf8, unconditionally
3244 * recode it to be so. This is because \N{} implies
3245 * Unicode semantics, and scalars have to be in utf8
3246 * to guarantee those semantics */
3247 if (! has_utf8) {
3248 SvCUR_set(sv, d - SvPVX_const(sv));
3249 SvPOK_on(sv);
3250 *d = '\0';
3251 /* See Note on sizing above. */
3252 sv_utf8_upgrade_flags_grow(
3253 sv,
3254 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3255 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3256 d = SvPVX(sv) + SvCUR(sv);
3257 has_utf8 = TRUE;
3258 }
3259
3260 /* Add the string to the output */
3261 if (UNI_IS_INVARIANT(uv)) {
3262 *d++ = (char) uv;
3263 }
3264 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3265 }
3266 }
3267 else { /* Here is \N{NAME} but not \N{U+...}. */
3268
3269 SV *res; /* result from charnames */
3270 const char *str; /* the string in 'res' */
3271 STRLEN len; /* its length */
3272
3273 /* Get the value for NAME */
3274 res = newSVpvn(s, e - s);
3275 res = new_constant( NULL, 0, "charnames",
3276 /* includes all of: \N{...} */
3277 res, NULL, s - 3, e - s + 4 );
3278
3279 /* Most likely res will be in utf8 already since the
3280 * standard charnames uses pack U, but a custom translator
3281 * can leave it otherwise, so make sure. XXX This can be
3282 * revisited to not have charnames use utf8 for characters
3283 * that don't need it when regexes don't have to be in utf8
3284 * for Unicode semantics. If doing so, remember EBCDIC */
3285 sv_utf8_upgrade(res);
3286 str = SvPV_const(res, len);
3287
3288 /* Don't accept malformed input */
3289 if (! is_utf8_string((U8 *) str, len)) {
3290 yyerror("Malformed UTF-8 returned by \\N");
3291 }
3292 else if (PL_lex_inpat) {
3293
3294 if (! len) { /* The name resolved to an empty string */
3295 Copy("\\N{}", d, 4, char);
3296 d += 4;
3297 }
3298 else {
3299 /* In order to not lose information for the regex
3300 * compiler, pass the result in the specially made
3301 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3302 * the code points in hex of each character
3303 * returned by charnames */
3304
3305 const char *str_end = str + len;
3306 STRLEN char_length; /* cur char's byte length */
3307 STRLEN output_length; /* and the number of bytes
3308 after this is translated
3309 into hex digits */
3310 const STRLEN off = d - SvPVX_const(sv);
3311
3312 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3313 * max('U+', '.'); and 1 for NUL */
3314 char hex_string[2 * UTF8_MAXBYTES + 5];
3315
3316 /* Get the first character of the result. */
3317 U32 uv = utf8n_to_uvuni((U8 *) str,
3318 len,
3319 &char_length,
3320 UTF8_ALLOW_ANYUV);
3321
3322 /* The call to is_utf8_string() above hopefully
3323 * guarantees that there won't be an error. But
3324 * it's easy here to make sure. The function just
3325 * above warns and returns 0 if invalid utf8, but
3326 * it can also return 0 if the input is validly a
3327 * NUL. Disambiguate */
3328 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3329 uv = UNICODE_REPLACEMENT;
3330 }
3331
3332 /* Convert first code point to hex, including the
e2a7e165
KW
3333 * boiler plate before it. For all these, we
3334 * convert to native format so that downstream code
3335 * can continue to assume the input is native */
78c35590 3336 output_length =
3353de27 3337 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3338 "\\N{U+%X",
3339 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3340
3341 /* Make sure there is enough space to hold it */
3342 d = off + SvGROW(sv, off
3343 + output_length
3344 + (STRLEN)(send - e)
3345 + 2); /* '}' + NUL */
3346 /* And output it */
3347 Copy(hex_string, d, output_length, char);
3348 d += output_length;
3349
3350 /* For each subsequent character, append dot and
3351 * its ordinal in hex */
3352 while ((str += char_length) < str_end) {
3353 const STRLEN off = d - SvPVX_const(sv);
3354 U32 uv = utf8n_to_uvuni((U8 *) str,
3355 str_end - str,
3356 &char_length,
3357 UTF8_ALLOW_ANYUV);
3358 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3359 uv = UNICODE_REPLACEMENT;
3360 }
3361
78c35590 3362 output_length =
3353de27 3363 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3364 ".%X",
3365 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3366
3367 d = off + SvGROW(sv, off
3368 + output_length
3369 + (STRLEN)(send - e)
3370 + 2); /* '}' + NUL */
3371 Copy(hex_string, d, output_length, char);
3372 d += output_length;
3373 }
3374
3375 *d++ = '}'; /* Done. Add the trailing brace */
3376 }
3377 }
3378 else { /* Here, not in a pattern. Convert the name to a
3379 * string. */
3380
3381 /* If destination is not in utf8, unconditionally
3382 * recode it to be so. This is because \N{} implies
3383 * Unicode semantics, and scalars have to be in utf8
3384 * to guarantee those semantics */
3385 if (! has_utf8) {
3386 SvCUR_set(sv, d - SvPVX_const(sv));
3387 SvPOK_on(sv);
3388 *d = '\0';
3389 /* See Note on sizing above. */
3390 sv_utf8_upgrade_flags_grow(sv,
3391 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3392 len + (STRLEN)(send - s) + 1);
3393 d = SvPVX(sv) + SvCUR(sv);
3394 has_utf8 = TRUE;
3395 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3396
3397 /* See Note on sizing above. (NOTE: SvCUR() is not
3398 * set correctly here). */
3399 const STRLEN off = d - SvPVX_const(sv);
3400 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3401 }
3402 Copy(str, d, len, char);
3403 d += len;
423cee85 3404 }
423cee85 3405 SvREFCNT_dec(res);
cb233ae3
KW
3406
3407 /* Deprecate non-approved name syntax */
3408 if (ckWARN_d(WARN_DEPRECATED)) {
3409 bool problematic = FALSE;
3410 char* i = s;
3411
3412 /* For non-ut8 input, look to see that the first
3413 * character is an alpha, then loop through the rest
3414 * checking that each is a continuation */
3415 if (! this_utf8) {
3416 if (! isALPHAU(*i)) problematic = TRUE;
3417 else for (i = s + 1; i < e; i++) {
3418 if (isCHARNAME_CONT(*i)) continue;
3419 problematic = TRUE;
3420 break;
3421 }
3422 }
3423 else {
3424 /* Similarly for utf8. For invariants can check
3425 * directly. We accept anything above the latin1
3426 * range because it is immaterial to Perl if it is
3427 * correct or not, and is expensive to check. But
3428 * it is fairly easy in the latin1 range to convert
3429 * the variants into a single character and check
3430 * those */
3431 if (UTF8_IS_INVARIANT(*i)) {
3432 if (! isALPHAU(*i)) problematic = TRUE;
3433 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3434 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3435 *(i+1)))))
3436 {
3437 problematic = TRUE;
3438 }
3439 }
3440 if (! problematic) for (i = s + UTF8SKIP(s);
3441 i < e;
3442 i+= UTF8SKIP(i))
3443 {
3444 if (UTF8_IS_INVARIANT(*i)) {
3445 if (isCHARNAME_CONT(*i)) continue;
3446 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3447 continue;
3448 } else if (isCHARNAME_CONT(
3449 UNI_TO_NATIVE(
81c14aa2 3450 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3451 {
3452 continue;
3453 }
3454 problematic = TRUE;
3455 break;
3456 }
3457 }
3458 if (problematic) {
6e1bad6c
KW
3459 /* The e-i passed to the final %.*s makes sure that
3460 * should the trailing NUL be missing that this
3461 * print won't run off the end of the string */
cb233ae3 3462 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3463 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3464 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3465 }
3466 }
3467 } /* End \N{NAME} */
ff3f963a
KW
3468#ifdef EBCDIC
3469 if (!dorange)
3470 native_range = FALSE; /* \N{} is defined to be Unicode */
3471#endif
3472 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3473 continue;
3474
02aa26ce 3475 /* \c is a control character */
79072805
LW
3476 case 'c':
3477 s++;
961ce445 3478 if (s < send) {
17a3df4c 3479 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3480 }
961ce445
RGS
3481 else {
3482 yyerror("Missing control char name in \\c");
3483 }
79072805 3484 continue;
02aa26ce
NT
3485
3486 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3487 case 'b':
db42d148 3488 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3489 break;
3490 case 'n':
db42d148 3491 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3492 break;
3493 case 'r':
db42d148 3494 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3495 break;
3496 case 'f':
db42d148 3497 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3498 break;
3499 case 't':
db42d148 3500 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3501 break;
34a3fe2a 3502 case 'e':
db42d148 3503 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3504 break;
3505 case 'a':
db42d148 3506 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3507 break;
02aa26ce
NT
3508 } /* end switch */
3509
79072805
LW
3510 s++;
3511 continue;
02aa26ce 3512 } /* end if (backslash) */
4c3a8340
TS
3513#ifdef EBCDIC
3514 else
3515 literal_endpoint++;
3516#endif
02aa26ce 3517
f9a63242 3518 default_action:
77a135fe
KW
3519 /* If we started with encoded form, or already know we want it,
3520 then encode the next character */
3521 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3522 STRLEN len = 1;
77a135fe
KW
3523
3524
3525 /* One might think that it is wasted effort in the case of the
3526 * source being utf8 (this_utf8 == TRUE) to take the next character
3527 * in the source, convert it to an unsigned value, and then convert
3528 * it back again. But the source has not been validated here. The
3529 * routine that does the conversion checks for errors like
3530 * malformed utf8 */
3531
5f66b61c
AL
3532 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3533 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3534 if (!has_utf8) {
3535 SvCUR_set(sv, d - SvPVX_const(sv));
3536 SvPOK_on(sv);
3537 *d = '\0';
77a135fe 3538 /* See Note on sizing above. */
7bf79863
KW
3539 sv_utf8_upgrade_flags_grow(sv,
3540 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3541 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3542 d = SvPVX(sv) + SvCUR(sv);
3543 has_utf8 = TRUE;
3544 } else if (need > len) {
3545 /* encoded value larger than old, may need extra space (NOTE:
3546 * SvCUR() is not set correctly here). See Note on sizing
3547 * above. */
9d4ba2ae 3548 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3549 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3550 }
77a135fe
KW
3551 s += len;
3552
5f66b61c 3553 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3554#ifdef EBCDIC
3555 if (uv > 255 && !dorange)
3556 native_range = FALSE;
3557#endif
2b9d42f0
NIS
3558 }
3559 else {
3560 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3561 }
02aa26ce
NT
3562 } /* while loop to process each character */
3563
3564 /* terminate the string and set up the sv */
79072805 3565 *d = '\0';
95a20fc0 3566 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3567 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3568 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3569 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3570
79072805 3571 SvPOK_on(sv);
9f4817db 3572 if (PL_encoding && !has_utf8) {
d0063567
DK
3573 sv_recode_to_utf8(sv, PL_encoding);
3574 if (SvUTF8(sv))
3575 has_utf8 = TRUE;
9f4817db 3576 }
2b9d42f0 3577 if (has_utf8) {
7e2040f0 3578 SvUTF8_on(sv);
2b9d42f0 3579 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3580 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3581 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3582 }
3583 }
79072805 3584
02aa26ce 3585 /* shrink the sv if we allocated more than we used */
79072805 3586 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3587 SvPV_shrink_to_cur(sv);
79072805 3588 }
02aa26ce 3589
6154021b 3590 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3591 if (s > PL_bufptr) {
eb0d8d16
NC
3592 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3593 const char *const key = PL_lex_inpat ? "qr" : "q";
3594 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3595 const char *type;
3596 STRLEN typelen;
3597
3598 if (PL_lex_inwhat == OP_TRANS) {
3599 type = "tr";
3600 typelen = 2;
3601 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3602 type = "s";
3603 typelen = 1;
9da1dd8f
DM
3604 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3605 type = "q";
3606 typelen = 1;
eb0d8d16
NC
3607 } else {
3608 type = "qq";
3609 typelen = 2;
3610 }
3611
3612 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3613 type, typelen);
3614 }
6154021b 3615 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3616 } else
8990e307 3617 SvREFCNT_dec(sv);
79072805
LW
3618 return s;
3619}
3620
ffb4593c
NT
3621/* S_intuit_more
3622 * Returns TRUE if there's more to the expression (e.g., a subscript),
3623 * FALSE otherwise.
ffb4593c
NT
3624 *
3625 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3626 *
3627 * ->[ and ->{ return TRUE
3628 * { and [ outside a pattern are always subscripts, so return TRUE
3629 * if we're outside a pattern and it's not { or [, then return FALSE
3630 * if we're in a pattern and the first char is a {
3631 * {4,5} (any digits around the comma) returns FALSE
3632 * if we're in a pattern and the first char is a [
3633 * [] returns FALSE
3634 * [SOMETHING] has a funky algorithm to decide whether it's a
3635 * character class or not. It has to deal with things like
3636 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3637 * anything else returns TRUE
3638 */
3639
9cbb5ea2
GS
3640/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3641
76e3520e 3642STATIC int
cea2e8a9 3643S_intuit_more(pTHX_ register char *s)
79072805 3644{
97aff369 3645 dVAR;
7918f24d
NC
3646
3647 PERL_ARGS_ASSERT_INTUIT_MORE;
3648
3280af22 3649 if (PL_lex_brackets)
79072805
LW
3650 return TRUE;
3651 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3652 return TRUE;
3653 if (*s != '{' && *s != '[')
3654 return FALSE;
3280af22 3655 if (!PL_lex_inpat)
79072805
LW
3656 return TRUE;
3657
3658 /* In a pattern, so maybe we have {n,m}. */
3659 if (*s == '{') {
b3155d95 3660 if (regcurly(s)) {
79072805 3661 return FALSE;
b3155d95 3662 }
79072805 3663 return TRUE;
79072805
LW
3664 }
3665
3666 /* On the other hand, maybe we have a character class */
3667
3668 s++;
3669 if (*s == ']' || *s == '^')
3670 return FALSE;
3671 else {
ffb4593c 3672 /* this is terrifying, and it works */
79072805
LW
3673 int weight = 2; /* let's weigh the evidence */
3674 char seen[256];
f27ffc4a 3675 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3676 const char * const send = strchr(s,']');
3280af22 3677 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3678
3679 if (!send) /* has to be an expression */
3680 return TRUE;
3681
3682 Zero(seen,256,char);
3683 if (*s == '$')
3684 weight -= 3;
3685 else if (isDIGIT(*s)) {
3686 if (s[1] != ']') {
3687 if (isDIGIT(s[1]) && s[2] == ']')
3688 weight -= 10;
3689 }
3690 else
3691 weight -= 100;
3692 }
3693 for (; s < send; s++) {
3694 last_un_char = un_char;
3695 un_char = (unsigned char)*s;
3696 switch (*s) {
3697 case '@':
3698 case '&':
3699 case '$':
3700 weight -= seen[un_char] * 10;
7e2040f0 3701 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3702 int len;
8903cb82 3703 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3704 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3705 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3706 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3707 weight -= 100;
3708 else
3709 weight -= 10;
3710 }
3711 else if (*s == '$' && s[1] &&
93a17b20
LW
3712 strchr("[#!%*<>()-=",s[1])) {
3713 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3714 weight -= 10;
3715 else
3716 weight -= 1;
3717 }
3718 break;
3719 case '\\':
3720 un_char = 254;
3721 if (s[1]) {
93a17b20 3722 if (strchr("wds]",s[1]))
79072805 3723 weight += 100;
10edeb5d 3724 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3725 weight += 1;
93a17b20 3726 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3727 weight += 40;
3728 else if (isDIGIT(s[1])) {
3729 weight += 40;
3730 while (s[1] && isDIGIT(s[1]))
3731 s++;
3732 }
3733 }
3734 else
3735 weight += 100;
3736 break;
3737 case '-':
3738 if (s[1] == '\\')
3739 weight += 50;
93a17b20 3740 if (strchr("aA01! ",last_un_char))
79072805 3741 weight += 30;
93a17b20 3742 if (strchr("zZ79~",s[1]))
79072805 3743 weight += 30;
f27ffc4a
GS
3744 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3745 weight -= 5; /* cope with negative subscript */
79072805
LW
3746 break;
3747 default:
3792a11b
NC
3748 if (!isALNUM(last_un_char)
3749 && !(last_un_char == '$' || last_un_char == '@'
3750 || last_un_char == '&')
3751 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3752 char *d = tmpbuf;
3753 while (isALPHA(*s))
3754 *d++ = *s++;
3755 *d = '\0';
5458a98a 3756 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3757 weight -= 150;
3758 }
3759 if (un_char == last_un_char + 1)
3760 weight += 5;
3761 weight -= seen[un_char];
3762 break;
3763 }
3764 seen[un_char]++;
3765 }
3766 if (weight >= 0) /* probably a character class */
3767 return FALSE;
3768 }
3769
3770 return TRUE;
3771}
ffed7fef 3772
ffb4593c
NT
3773/*
3774 * S_intuit_method
3775 *
3776 * Does all the checking to disambiguate
3777 * foo bar
3778 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3779 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3780 *
3781 * First argument is the stuff after the first token, e.g. "bar".
3782 *
a4fd4a89 3783 * Not a method if foo is a filehandle.
ffb4593c
NT
3784 * Not a method if foo is a subroutine prototyped to take a filehandle.
3785 * Not a method if it's really "Foo $bar"
3786 * Method if it's "foo $bar"
3787 * Not a method if it's really "print foo $bar"
3788 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3789 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3790 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3791 * =>
3792 */
3793
76e3520e 3794STATIC int
62d55b22 3795S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3796{
97aff369 3797 dVAR;
a0d0e21e 3798 char *s = start + (*start == '$');
3280af22 3799 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3800 STRLEN len;
3801 GV* indirgv;
5db06880
NC
3802#ifdef PERL_MAD
3803 int soff;
3804#endif
a0d0e21e 3805
7918f24d
NC
3806 PERL_ARGS_ASSERT_INTUIT_METHOD;
3807
aca88b25 3808 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3809 return 0;
aca88b25 3810 if (cv && SvPOK(cv)) {
8fa6a409 3811 const char *proto = CvPROTO(cv);
62d55b22
NC
3812 if (proto) {
3813 if (*proto == ';')
3814 proto++;
3815 if (*proto == '*')
3816 return 0;
3817 }
a0d0e21e 3818 }
8903cb82 3819 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3820 /* start is the beginning of the possible filehandle/object,
3821 * and s is the end of it
3822 * tmpbuf is a copy of it
3823 */
3824
a0d0e21e 3825 if (*start == '$') {
39c012bc 3826 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3ef1310e 3827 isUPPER(*PL_tokenbuf))
a0d0e21e 3828 return 0;
5db06880
NC
3829#ifdef PERL_MAD
3830 len = start - SvPVX(PL_linestr);
3831#endif
29595ff2 3832 s = PEEKSPACE(s);
f0092767 3833#ifdef PERL_MAD
5db06880
NC
3834 start = SvPVX(PL_linestr) + len;
3835#endif
3280af22
NIS
3836 PL_bufptr = start;
3837 PL_expect = XREF;
a0d0e21e
LW
3838 return *s == '(' ? FUNCMETH : METHOD;
3839 }
5458a98a 3840 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3841 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3842 len -= 2;
3843 tmpbuf[len] = '\0';
5db06880
NC
3844#ifdef PERL_MAD
3845 soff = s - SvPVX(PL_linestr);
3846#endif
c3e0f903
GS
3847 goto bare_package;
3848 }
38d2cf30 3849 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 3850 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3851 return 0;
3852 /* filehandle or package name makes it a method */
39c012bc 3853 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
3854#ifdef PERL_MAD
3855 soff = s - SvPVX(PL_linestr);
3856#endif
29595ff2 3857 s = PEEKSPACE(s);
3280af22 3858 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3859 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3860 bare_package:
cd81e915 3861 start_force(PL_curforce);
9ded7720 3862 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3863 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3864 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 3865 if (PL_madskills)
38d2cf30
BF
3866 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3867 ( UTF ? SVf_UTF8 : 0 )));
3280af22 3868 PL_expect = XTERM;
a0d0e21e 3869 force_next(WORD);
3280af22 3870 PL_bufptr = s;
5db06880
NC
3871#ifdef PERL_MAD
3872 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3873#endif
a0d0e21e
LW
3874 return *s == '(' ? FUNCMETH : METHOD;
3875 }
3876 }
3877 return 0;
3878}
3879
16d20bd9 3880/* Encoded script support. filter_add() effectively inserts a
4e553d73 3881 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3882 * Note that the filter function only applies to the current source file
3883 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3884 *
3885 * The datasv parameter (which may be NULL) can be used to pass
3886 * private data to this instance of the filter. The filter function
3887 * can recover the SV using the FILTER_DATA macro and use it to
3888 * store private buffers and state information.
3889 *
3890 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3891 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3892 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3893 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3894 * private use must be set using malloc'd pointers.
3895 */
16d20bd9
AD
3896
3897SV *
864dbfa3 3898Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3899{
97aff369 3900 dVAR;
f4c556ac 3901 if (!funcp)
a0714e2c 3902 return NULL;
f4c556ac 3903
5486870f
DM
3904 if (!PL_parser)
3905 return NULL;
3906
f1c31c52
FC
3907 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3908 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3909
3280af22
NIS
3910 if (!PL_rsfp_filters)
3911 PL_rsfp_filters = newAV();
16d20bd9 3912 if (!datasv)
561b68a9 3913 datasv = newSV(0);
862a34c6 3914 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3915 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3916 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3917 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3918 FPTR2DPTR(void *, IoANY(datasv)),
3919 SvPV_nolen(datasv)));
3280af22
NIS
3920 av_unshift(PL_rsfp_filters, 1);
3921 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
3922 if (
3923 !PL_parser->filtered
3924 && PL_parser->lex_flags & LEX_EVALBYTES
3925 && PL_bufptr < PL_bufend
3926 ) {
3927 const char *s = PL_bufptr;
3928 while (s < PL_bufend) {
3929 if (*s == '\n') {
3930 SV *linestr = PL_parser->linestr;
3931 char *buf = SvPVX(linestr);
3932 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3933 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3934 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3935 STRLEN const linestart_pos = PL_parser->linestart - buf;
3936 STRLEN const last_uni_pos =
3937 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3938 STRLEN const last_lop_pos =
3939 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3940 av_push(PL_rsfp_filters, linestr);
3941 PL_parser->linestr =
3942 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3943 buf = SvPVX(PL_parser->linestr);
3944 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3945 PL_parser->bufptr = buf + bufptr_pos;
3946 PL_parser->oldbufptr = buf + oldbufptr_pos;
3947 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3948 PL_parser->linestart = buf + linestart_pos;
3949 if (PL_parser->last_uni)
3950 PL_parser->last_uni = buf + last_uni_pos;
3951 if (PL_parser->last_lop)
3952 PL_parser->last_lop = buf + last_lop_pos;
3953 SvLEN(linestr) = SvCUR(linestr);
3954 SvCUR(linestr) = s-SvPVX(linestr);
3955 PL_parser->filtered = 1;
3956 break;
3957 }
3958 s++;
3959 }
3960 }
16d20bd9
AD
3961 return(datasv);
3962}
4e553d73 3963
16d20bd9
AD
3964
3965/* Delete most recently added instance of this filter function. */
a0d0e21e 3966void
864dbfa3 3967Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3968{
97aff369 3969 dVAR;
e0c19803 3970 SV *datasv;
24801a4b 3971
7918f24d
NC
3972 PERL_ARGS_ASSERT_FILTER_DEL;
3973
33073adb 3974#ifdef DEBUGGING
55662e27
JH
3975 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3976 FPTR2DPTR(void*, funcp)));
33073adb 3977#endif
5486870f 3978 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3979 return;
3980 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3981 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3982 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3983 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3984
16d20bd9
AD
3985 return;
3986 }
3987 /* we need to search for the correct entry and clear it */
cea2e8a9 3988 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3989}
3990
3991
1de9afcd
RGS
3992/* Invoke the idxth filter function for the current rsfp. */
3993/* maxlen 0 = read one text line */
16d20bd9 3994I32
864dbfa3 3995Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3996{
97aff369 3997 dVAR;
16d20bd9
AD
3998 filter_t funcp;
3999 SV *datasv = NULL;
f482118e
NC
4000 /* This API is bad. It should have been using unsigned int for maxlen.
4001 Not sure if we want to change the API, but if not we should sanity
4002 check the value here. */
60d63348 4003 unsigned int correct_length
39cd7a59
NC
4004 = maxlen < 0 ?
4005#ifdef PERL_MICRO
4006 0x7FFFFFFF
4007#else
4008 INT_MAX
4009#endif
4010 : maxlen;
e50aee73 4011
7918f24d
NC
4012 PERL_ARGS_ASSERT_FILTER_READ;
4013
5486870f 4014 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 4015 return -1;
1de9afcd 4016 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
4017 /* Provide a default input filter to make life easy. */
4018 /* Note that we append to the line. This is handy. */
f4c556ac
GS
4019 DEBUG_P(PerlIO_printf(Perl_debug_log,
4020 "filter_read %d: from rsfp\n", idx));
f482118e 4021 if (correct_length) {
16d20bd9
AD
4022 /* Want a block */
4023 int len ;
f54cb97a 4024 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
4025
4026 /* ensure buf_sv is large enough */
881d8f0a 4027 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
4028 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4029 correct_length)) <= 0) {
3280af22 4030 if (PerlIO_error(PL_rsfp))
37120919
AD
4031 return -1; /* error */
4032 else
4033 return 0 ; /* end of file */
4034 }
16d20bd9 4035 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 4036 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
4037 } else {
4038 /* Want a line */
3280af22
NIS
4039 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4040 if (PerlIO_error(PL_rsfp))
37120919
AD
4041 return -1; /* error */
4042 else
4043 return 0 ; /* end of file */
4044 }
16d20bd9
AD
4045 }
4046 return SvCUR(buf_sv);
4047 }
4048 /* Skip this filter slot if filter has been deleted */
1de9afcd 4049 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
4050 DEBUG_P(PerlIO_printf(Perl_debug_log,
4051 "filter_read %d: skipped (filter deleted)\n",
4052 idx));
f482118e 4053 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4054 }
60d63348
FC
4055 if (SvTYPE(datasv) != SVt_PVIO) {
4056 if (correct_length) {
4057 /* Want a block */
4058 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4059 if (!remainder) return 0; /* eof */
4060 if (correct_length > remainder) correct_length = remainder;
4061 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4062 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4063 } else {
4064 /* Want a line */
4065 const char *s = SvEND(datasv);
4066 const char *send = SvPVX(datasv) + SvLEN(datasv);
4067 while (s < send) {
4068 if (*s == '\n') {
4069 s++;
4070 break;
4071 }
4072 s++;
4073 }
4074 if (s == send) return 0; /* eof */
4075 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4076 SvCUR_set(datasv, s-SvPVX(datasv));
4077 }
4078 return SvCUR(buf_sv);
4079 }
16d20bd9 4080 /* Get function pointer hidden within datasv */
8141890a 4081 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4082 DEBUG_P(PerlIO_printf(Perl_debug_log,
4083 "filter_read %d: via function %p (%s)\n",
ca0270c4 4084 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4085 /* Call function. The function is expected to */
4086 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4087 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4088 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4089}
4090
76e3520e 4091STATIC char *
5cc814fd 4092S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 4093{
97aff369 4094 dVAR;
7918f24d
NC
4095
4096 PERL_ARGS_ASSERT_FILTER_GETS;
4097
c39cd008 4098#ifdef PERL_CR_FILTER
3280af22 4099 if (!PL_rsfp_filters) {
c39cd008 4100 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4101 }
4102#endif
3280af22 4103 if (PL_rsfp_filters) {
55497cff 4104 if (!append)
4105 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4106 if (FILTER_READ(0, sv, 0) > 0)
4107 return ( SvPVX(sv) ) ;
4108 else
bd61b366 4109 return NULL ;
16d20bd9 4110 }
9d116dd7 4111 else
5cc814fd 4112 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4113}
4114
01ec43d0 4115STATIC HV *
9bde8eb0 4116S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4117{
97aff369 4118 dVAR;
def3634b
GS
4119 GV *gv;
4120
7918f24d
NC
4121 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4122
01ec43d0 4123 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4124 return PL_curstash;
4125
4126 if (len > 2 &&
4127 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4128 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4129 {
4130 return GvHV(gv); /* Foo:: */
def3634b
GS
4131 }
4132
4133 /* use constant CLASS => 'MyClass' */
acc6da14 4134 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4135 if (gv && GvCV(gv)) {
4136 SV * const sv = cv_const_sv(GvCV(gv));
4137 if (sv)
9bde8eb0 4138 pkgname = SvPV_const(sv, len);
def3634b
GS
4139 }
4140
acc6da14 4141 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4142}
a0d0e21e 4143
e3f73d4e
RGS
4144/*
4145 * S_readpipe_override
486ec47a 4146 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4147 * optree, provided sublex_start() is called afterwards.
4148 */
4149STATIC void
1d51329b 4150S_readpipe_override(pTHX)
e3f73d4e
RGS
4151{
4152 GV **gvp;
4153 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4154 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4155 if ((gv_readpipe
4156 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4157 ||
4158 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4159 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4160 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4161 {
4162 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4163 op_append_elem(OP_LIST,
e3f73d4e
RGS
4164 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4165 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4166 }
e3f73d4e
RGS
4167}
4168
5db06880
NC
4169#ifdef PERL_MAD
4170 /*
4171 * Perl_madlex
4172 * The intent of this yylex wrapper is to minimize the changes to the
4173 * tokener when we aren't interested in collecting madprops. It remains
4174 * to be seen how successful this strategy will be...
4175 */
4176
4177int
4178Perl_madlex(pTHX)
4179{
4180 int optype;
4181 char *s = PL_bufptr;
4182
cd81e915
NC
4183 /* make sure PL_thiswhite is initialized */
4184 PL_thiswhite = 0;
4185 PL_thismad = 0;
5db06880 4186
5db06880 4187 /* previous token ate up our whitespace? */
cd81e915
NC
4188 if (!PL_lasttoke && PL_nextwhite) {
4189 PL_thiswhite = PL_nextwhite;
4190 PL_nextwhite = 0;
5db06880
NC
4191 }
4192
4193 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4194 PL_realtokenstart = -1;
4195 PL_thistoken = 0;
5db06880
NC
4196 optype = yylex();
4197 s = PL_bufptr;
cd81e915 4198 assert(PL_curforce < 0);
5db06880 4199
cd81e915
NC
4200 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4201 if (!PL_thistoken) {
4202 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4203 PL_thistoken = newSVpvs("");
5db06880 4204 else {
c35e046a 4205 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4206 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4207 }
4208 }
cd81e915
NC
4209 if (PL_thismad) /* install head */
4210 CURMAD('X', PL_thistoken);
5db06880
NC
4211 }
4212
4213 /* last whitespace of a sublex? */
cd81e915
NC
4214 if (optype == ')' && PL_endwhite) {
4215 CURMAD('X', PL_endwhite);
5db06880
NC
4216 }
4217
cd81e915 4218 if (!PL_thismad) {
5db06880
NC
4219
4220 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4221 if (!PL_thiswhite && !PL_endwhite && !optype) {
4222 sv_free(PL_thistoken);
4223 PL_thistoken = 0;
5db06880
NC
4224 return 0;
4225 }
4226
4227 /* put off final whitespace till peg */
60d63348 4228 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4229 PL_nextwhite = PL_thiswhite;
4230 PL_thiswhite = 0;
5db06880 4231 }
cd81e915
NC
4232 else if (PL_thisopen) {
4233 CURMAD('q', PL_thisopen);
4234 if (PL_thistoken)
4235 sv_free(PL_thistoken);
4236 PL_thistoken = 0;
5db06880
NC
4237 }
4238 else {
4239 /* Store actual token text as madprop X */
cd81e915 4240 CURMAD('X', PL_thistoken);
5db06880
NC
4241 }
4242
cd81e915 4243 if (PL_thiswhite) {
5db06880 4244 /* add preceding whitespace as madprop _ */
cd81e915 4245 CURMAD('_', PL_thiswhite);
5db06880
NC
4246 }
4247
cd81e915 4248 if (PL_thisstuff) {
5db06880 4249 /* add quoted material as madprop = */
cd81e915 4250 CURMAD('=', PL_thisstuff);
5db06880
NC
4251 }
4252
cd81e915 4253 if (PL_thisclose) {
5db06880 4254 /* add terminating quote as madprop Q */
cd81e915 4255 CURMAD('Q', PL_thisclose);
5db06880
NC
4256 }
4257 }
4258
4259 /* special processing based on optype */
4260
4261 switch (optype) {
4262
4263 /* opval doesn't need a TOKEN since it can already store mp */
4264 case WORD:
4265 case METHOD:
4266 case FUNCMETH:
4267 case THING:
4268 case PMFUNC:
4269 case PRIVATEREF:
4270 case FUNC0SUB:
4271 case UNIOPSUB:
4272 case LSTOPSUB:
5db1eb8d 4273 case LABEL:
6154021b
RGS
4274 if (pl_yylval.opval)
4275 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4276 PL_thismad = 0;
5db06880
NC
4277 return optype;
4278
4279 /* fake EOF */
4280 case 0:
4281 optype = PEG;
cd81e915
NC
4282 if (PL_endwhite) {
4283 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4284 PL_endwhite = 0;
5db06880
NC
4285 }
4286 break;
4287
4288 case ']':
4289 case '}':
cd81e915 4290 if (PL_faketokens)
5db06880
NC
4291 break;
4292 /* remember any fake bracket that lexer is about to discard */
4293 if (PL_lex_brackets == 1 &&
4294 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4295 {
4296 s = PL_bufptr;
4297 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4298 s++;
4299 if (*s == '}') {
cd81e915
NC
4300 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4301 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4302 PL_thiswhite = 0;
5db06880
NC
4303 PL_bufptr = s - 1;
4304 break; /* don't bother looking for trailing comment */
4305 }
4306 else
4307 s = PL_bufptr;
4308 }
4309 if (optype == ']')
4310 break;
4311 /* FALLTHROUGH */
4312
4313 /* attach a trailing comment to its statement instead of next token */
4314 case ';':
cd81e915 4315 if (PL_faketokens)
5db06880
NC
4316 break;
4317 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4318 s = PL_bufptr;
4319 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4320 s++;
4321 if (*s == '\n' || *s == '#') {
4322 while (s < PL_bufend && *s != '\n')
4323 s++;
4324 if (s < PL_bufend)
4325 s++;
cd81e915
NC
4326 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4327 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4328 PL_thiswhite = 0;
5db06880
NC
4329 PL_bufptr = s;
4330 }
4331 }
4332 break;
4333
5db06880
NC
4334 /* ival */
4335 default:
4336 break;
4337
4338 }
4339
4340 /* Create new token struct. Note: opvals return early above. */
6154021b 4341 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4342 PL_thismad = 0;
5db06880
NC
4343 return optype;
4344}
4345#endif
4346
468aa647 4347STATIC char *
cc6ed77d 4348S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4349 dVAR;
7918f24d
NC
4350
4351 PERL_ARGS_ASSERT_TOKENIZE_USE;
4352
468aa647
RGS
4353 if (PL_expect != XSTATE)
4354 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4355 is_use ? "use" : "no"));
52d0e95b 4356 PL_expect = XTERM;
29595ff2 4357 s = SKIPSPACE1(s);
468aa647
RGS
4358 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4359 s = force_version(s, TRUE);
17c59fdf
VP
4360 if (*s == ';' || *s == '}'
4361 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4362 start_force(PL_curforce);
9ded7720 4363 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4364 force_next(WORD);
4365 }
4366 else if (*s == 'v') {
4367 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4368 s = force_version(s, FALSE);
4369 }
4370 }
4371 else {
4372 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4373 s = force_version(s, FALSE);
4374 }
6154021b 4375 pl_yylval.ival = is_use;
468aa647
RGS
4376 return s;
4377}
748a9306 4378#ifdef DEBUGGING
27da23d5 4379 static const char* const exp_name[] =
09bef843 4380 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4381 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4382 };
748a9306 4383#endif
463ee0b2 4384
361d9b55
Z
4385#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4386STATIC bool
4387S_word_takes_any_delimeter(char *p, STRLEN len)
4388{
4389 return (len == 1 && strchr("msyq", p[0])) ||
4390 (len == 2 && (
4391 (p[0] == 't' && p[1] == 'r') ||
4392 (p[0] == 'q' && strchr("qwxr", p[1]))));
4393}
4394
02aa26ce
NT
4395/*
4396 yylex
4397
4398 Works out what to call the token just pulled out of the input
4399 stream. The yacc parser takes care of taking the ops we return and
4400 stitching them into a tree.
4401
4402 Returns:
4403 PRIVATEREF
4404
4405 Structure:
4406 if read an identifier
4407 if we're in a my declaration
4408 croak if they tried to say my($foo::bar)
4409 build the ops for a my() declaration
4410 if it's an access to a my() variable
4411 are we in a sort block?
4412 croak if my($a); $a <=> $b
4413 build ops for access to a my() variable
4414 if in a dq string, and they've said @foo and we can't find @foo
4415 croak
4416 build ops for a bareword
4417 if we already built the token before, use it.
4418*/
4419
20141f0e 4420
dba4d153
JH
4421#ifdef __SC__
4422#pragma segment Perl_yylex
4423#endif
dba4d153 4424int
dba4d153 4425Perl_yylex(pTHX)
20141f0e 4426{
97aff369 4427 dVAR;
eb578fdb
KW
4428 char *s = PL_bufptr;
4429 char *d;
463ee0b2 4430 STRLEN len;
705fe0e5
FC
4431 bool bof = FALSE;
4432 U8 formbrack = 0;
580561a3 4433 U32 fake_eof = 0;
a687059c 4434
10edeb5d
JH
4435 /* orig_keyword, gvp, and gv are initialized here because
4436 * jump to the label just_a_word_zero can bypass their
4437 * initialization later. */
4438 I32 orig_keyword = 0;
4439 GV *gv = NULL;
4440 GV **gvp = NULL;
4441
bbf60fe6 4442 DEBUG_T( {
396482e1 4443 SV* tmp = newSVpvs("");
b6007c36
DM
4444 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4445 (IV)CopLINE(PL_curcop),
4446 lex_state_names[PL_lex_state],
4447 exp_name[PL_expect],
4448 pv_display(tmp, s, strlen(s), 0, 60));
4449 SvREFCNT_dec(tmp);
bbf60fe6 4450 } );
02aa26ce 4451
3280af22 4452 switch (PL_lex_state) {
79072805
LW
4453#ifdef COMMENTARY
4454 case LEX_NORMAL: /* Some compilers will produce faster */
4455 case LEX_INTERPNORMAL: /* code if we comment these out. */
4456 break;
4457#endif
4458
09bef843 4459 /* when we've already built the next token, just pull it out of the queue */
79072805 4460 case LEX_KNOWNEXT:
5db06880
NC
4461#ifdef PERL_MAD
4462 PL_lasttoke--;
6154021b 4463 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4464 if (PL_madskills) {
cd81e915 4465 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4466 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4467 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4468 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4469 PL_thismad->mad_val = 0;
4470 mad_free(PL_thismad);
4471 PL_thismad = 0;
5db06880
NC
4472 }
4473 }
4474 if (!PL_lasttoke) {
4475 PL_lex_state = PL_lex_defer;
4476 PL_expect = PL_lex_expect;
4477 PL_lex_defer = LEX_NORMAL;
4478 if (!PL_nexttoke[PL_lasttoke].next_type)
4479 return yylex();
4480 }
4481#else
3280af22 4482 PL_nexttoke--;
6154021b 4483 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4484 if (!PL_nexttoke) {
4485 PL_lex_state = PL_lex_defer;
4486 PL_expect = PL_lex_expect;
4487 PL_lex_defer = LEX_NORMAL;
463ee0b2 4488 }
5db06880 4489#endif
a7aaec61
Z
4490 {
4491 I32 next_type;
5db06880 4492#ifdef PERL_MAD
a7aaec61 4493 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4494#else
a7aaec61 4495 next_type = PL_nexttype[PL_nexttoke];
5db06880 4496#endif
78cdf107
Z
4497 if (next_type & (7<<24)) {
4498 if (next_type & (1<<24)) {
4499 if (PL_lex_brackets > 100)
4500 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4501 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4502 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4503 }
4504 if (next_type & (2<<24))
4505 PL_lex_allbrackets++;
4506 if (next_type & (4<<24))
4507 PL_lex_allbrackets--;
a7aaec61
Z
4508 next_type &= 0xffff;
4509 }
6c7ae946
FC
4510 if (S_is_opval_token(next_type) && pl_yylval.opval)
4511 pl_yylval.opval->op_savefree = 0; /* release */
3f33d153 4512 return REPORT(next_type == 'p' ? pending_ident() : next_type);
a7aaec61 4513 }
79072805 4514
02aa26ce 4515 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4516 when we get here, PL_bufptr is at the \
02aa26ce 4517 */
79072805
LW
4518 case LEX_INTERPCASEMOD:
4519#ifdef DEBUGGING
3280af22 4520 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4521 Perl_croak(aTHX_
4522 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4523 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4524#endif
02aa26ce 4525 /* handle \E or end of string */
3280af22 4526 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4527 /* if at a \E */
3280af22 4528 if (PL_lex_casemods) {
f54cb97a 4529 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4530 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4531
3792a11b 4532 if (PL_bufptr != PL_bufend
838f2281
BF
4533 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4534 || oldmod == 'F')) {
3280af22
NIS
4535 PL_bufptr += 2;
4536 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4537#ifdef PERL_MAD
4538 if (PL_madskills)
6b29d1f5 4539 PL_thistoken = newSVpvs("\\E");
5db06880 4540#endif
a0d0e21e 4541 }
78cdf107 4542 PL_lex_allbrackets--;
bbf60fe6 4543 return REPORT(')');
79072805 4544 }
52ed07f6
BF
4545 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4546 /* Got an unpaired \E */
4547 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4548 "Useless use of \\E");
52ed07f6 4549 }
5db06880
NC
4550#ifdef PERL_MAD
4551 while (PL_bufptr != PL_bufend &&
4552 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4553 if (!PL_thiswhite)
6b29d1f5 4554 PL_thiswhite = newSVpvs("");
cd81e915 4555 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4556 PL_bufptr += 2;
4557 }
4558#else
3280af22
NIS
4559 if (PL_bufptr != PL_bufend)
4560 PL_bufptr += 2;
5db06880 4561#endif
3280af22 4562 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4563 return yylex();
79072805
LW
4564 }
4565 else {
607df283 4566 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4567 "### Saw case modifier\n"); });
3280af22 4568 s = PL_bufptr + 1;
6e909404 4569 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4570#ifdef PERL_MAD
cd81e915 4571 if (!PL_thiswhite)
6b29d1f5 4572 PL_thiswhite = newSVpvs("");
cd81e915 4573 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4574#endif
89122651 4575 PL_bufptr = s + 3;
6e909404
JH
4576 PL_lex_state = LEX_INTERPCONCAT;
4577 return yylex();
a0d0e21e 4578 }
6e909404 4579 else {
90771dc0 4580 I32 tmp;
5db06880
NC
4581 if (!PL_madskills) /* when just compiling don't need correct */
4582 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4583 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4584 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4585 (strchr(PL_lex_casestack, 'L')
4586 || strchr(PL_lex_casestack, 'U')
4587 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4588 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4589 PL_lex_allbrackets--;
bbf60fe6 4590 return REPORT(')');
6e909404
JH
4591 }
4592 if (PL_lex_casemods > 10)
4593 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4594 PL_lex_casestack[PL_lex_casemods++] = *s;
4595 PL_lex_casestack[PL_lex_casemods] = '\0';
4596 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4597 start_force(PL_curforce);
9ded7720 4598 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4599 force_next((2<<24)|'(');
cd81e915 4600 start_force(PL_curforce);
6e909404 4601 if (*s == 'l')
9ded7720 4602 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4603 else if (*s == 'u')
9ded7720 4604 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4605 else if (*s == 'L')
9ded7720 4606 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4607 else if (*s == 'U')
9ded7720 4608 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4609 else if (*s == 'Q')
9ded7720 4610 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4611 else if (*s == 'F')
4612 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4613 else
5637ef5b 4614 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4615 if (PL_madskills) {
a5849ce5
NC
4616 SV* const tmpsv = newSVpvs("\\ ");
4617 /* replace the space with the character we want to escape
4618 */
4619 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4620 curmad('_', tmpsv);
4621 }
6e909404 4622 PL_bufptr = s + 1;
a0d0e21e 4623 }
79072805 4624 force_next(FUNC);
3280af22
NIS
4625 if (PL_lex_starts) {
4626 s = PL_bufptr;
4627 PL_lex_starts = 0;
5db06880
NC
4628#ifdef PERL_MAD
4629 if (PL_madskills) {
cd81e915
NC
4630 if (PL_thistoken)
4631 sv_free(PL_thistoken);
6b29d1f5 4632 PL_thistoken = newSVpvs("");
5db06880
NC
4633 }
4634#endif
131b3ad0
DM
4635 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4636 if (PL_lex_casemods == 1 && PL_lex_inpat)
4637 OPERATOR(',');
4638 else
4639 Aop(OP_CONCAT);
79072805
LW
4640 }
4641 else
cea2e8a9 4642 return yylex();
79072805
LW
4643 }
4644
55497cff 4645 case LEX_INTERPPUSH:
bbf60fe6 4646 return REPORT(sublex_push());
55497cff 4647
79072805 4648 case LEX_INTERPSTART:
3280af22 4649 if (PL_bufptr == PL_bufend)
bbf60fe6 4650 return REPORT(sublex_done());
9da1dd8f 4651 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
b6007c36 4652 "### Interpolated variable\n"); });
3280af22
NIS
4653 PL_expect = XTERM;
4654 PL_lex_dojoin = (*PL_bufptr == '@');
4655 PL_lex_state = LEX_INTERPNORMAL;
4656 if (PL_lex_dojoin) {
cd81e915 4657 start_force(PL_curforce);
9ded7720 4658 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4659 force_next(',');
cd81e915 4660 start_force(PL_curforce);
a0d0e21e 4661 force_ident("\"", '$');
cd81e915 4662 start_force(PL_curforce);
9ded7720 4663 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4664 force_next('$');
cd81e915 4665 start_force(PL_curforce);
9ded7720 4666 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4667 force_next((2<<24)|'(');
cd81e915 4668 start_force(PL_curforce);
9ded7720 4669 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4670 force_next(FUNC);
4671 }
9da1dd8f
DM
4672 /* Convert (?{...}) and friends to 'do {...}' */
4673 if (PL_lex_inpat && *PL_bufptr == '(') {
3328ab5a 4674 PL_parser->lex_shared->re_eval_start = PL_bufptr;
9da1dd8f
DM
4675 PL_bufptr += 2;
4676 if (*PL_bufptr != '{')
4677 PL_bufptr++;
6165f85b
DM
4678 start_force(PL_curforce);
4679 /* XXX probably need a CURMAD(something) here */
9da1dd8f
DM
4680 PL_expect = XTERMBLOCK;
4681 force_next(DO);
4682 }
4683
3280af22
NIS
4684 if (PL_lex_starts++) {
4685 s = PL_bufptr;
5db06880
NC
4686#ifdef PERL_MAD
4687 if (PL_madskills) {
cd81e915
NC
4688 if (PL_thistoken)
4689 sv_free(PL_thistoken);
6b29d1f5 4690 PL_thistoken = newSVpvs("");
5db06880
NC
4691 }
4692#endif
131b3ad0
DM
4693 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4694 if (!PL_lex_casemods && PL_lex_inpat)
4695 OPERATOR(',');
4696 else
4697 Aop(OP_CONCAT);
79072805 4698 }
cea2e8a9 4699 return yylex();
79072805
LW
4700
4701 case LEX_INTERPENDMAYBE:
3280af22
NIS
4702 if (intuit_more(PL_bufptr)) {
4703 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4704 break;
4705 }
4706 /* FALL THROUGH */
4707
4708 case LEX_INTERPEND:
3280af22
NIS
4709 if (PL_lex_dojoin) {
4710 PL_lex_dojoin = FALSE;
4711 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4712#ifdef PERL_MAD
4713 if (PL_madskills) {
cd81e915
NC
4714 if (PL_thistoken)
4715 sv_free(PL_thistoken);
6b29d1f5 4716 PL_thistoken = newSVpvs("");
5db06880
NC
4717 }
4718#endif
78cdf107 4719 PL_lex_allbrackets--;
bbf60fe6 4720 return REPORT(')');
79072805 4721 }
43a16006 4722 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4723 && SvEVALED(PL_lex_repl))
43a16006 4724 {
e9fa98b2 4725 if (PL_bufptr != PL_bufend)
cea2e8a9 4726 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4727 PL_lex_repl = NULL;
e9fa98b2 4728 }
db444266
FC
4729 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4730 re_eval_str. If the here-doc body’s length equals the previous
4731 value of re_eval_start, re_eval_start will now be null. So
4732 check re_eval_str as well. */
3328ab5a
FC
4733 if (PL_parser->lex_shared->re_eval_start
4734 || PL_parser->lex_shared->re_eval_str) {
db444266 4735 SV *sv;
9da1dd8f
DM
4736 if (*PL_bufptr != ')')
4737 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4738 PL_bufptr++;
4739 /* having compiled a (?{..}) expression, return the original
4740 * text too, as a const */
3328ab5a
FC
4741 if (PL_parser->lex_shared->re_eval_str) {
4742 sv = PL_parser->lex_shared->re_eval_str;
4743 PL_parser->lex_shared->re_eval_str = NULL;
4744 SvCUR_set(sv,
4745 PL_bufptr - PL_parser->lex_shared->re_eval_start);
db444266
FC
4746 SvPV_shrink_to_cur(sv);
4747 }
3328ab5a
FC
4748 else sv = newSVpvn(PL_parser->lex_shared->re_eval_start,
4749 PL_bufptr - PL_parser->lex_shared->re_eval_start);
6165f85b
DM
4750 start_force(PL_curforce);
4751 /* XXX probably need a CURMAD(something) here */
4752 NEXTVAL_NEXTTOKE.opval =
9da1dd8f 4753 (OP*)newSVOP(OP_CONST, 0,
db444266 4754 sv);
9da1dd8f 4755 force_next(THING);
3328ab5a 4756 PL_parser->lex_shared->re_eval_start = NULL;
9da1dd8f
DM
4757 PL_expect = XTERM;
4758 return REPORT(',');
4759 }
4760
79072805
LW
4761 /* FALLTHROUGH */
4762 case LEX_INTERPCONCAT:
4763#ifdef DEBUGGING
3280af22 4764 if (PL_lex_brackets)
5637ef5b
NC
4765 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4766 (long) PL_lex_brackets);
79072805 4767#endif
3280af22 4768 if (PL_bufptr == PL_bufend)
bbf60fe6 4769 return REPORT(sublex_done());
79072805 4770
9da1dd8f
DM
4771 /* m'foo' still needs to be parsed for possible (?{...}) */
4772 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
3280af22 4773 SV *sv = newSVsv(PL_linestr);
9da1dd8f 4774 sv = tokeq(sv);
6154021b 4775 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4776 s = PL_bufend;
79072805
LW
4777 }
4778 else {
3280af22 4779 s = scan_const(PL_bufptr);
79072805 4780 if (*s == '\\')
3280af22 4781 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4782 else
3280af22 4783 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4784 }
4785
3280af22 4786 if (s != PL_bufptr) {
cd81e915 4787 start_force(PL_curforce);
5db06880
NC
4788 if (PL_madskills) {
4789 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4790 }
6154021b 4791 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4792 PL_expect = XTERM;
79072805 4793 force_next(THING);
131b3ad0 4794 if (PL_lex_starts++) {
5db06880
NC
4795#ifdef PERL_MAD
4796 if (PL_madskills) {
cd81e915
NC
4797 if (PL_thistoken)
4798 sv_free(PL_thistoken);
6b29d1f5 4799 PL_thistoken = newSVpvs("");
5db06880
NC
4800 }
4801#endif
131b3ad0
DM
4802 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4803 if (!PL_lex_casemods && PL_lex_inpat)
4804 OPERATOR(',');
4805 else
4806 Aop(OP_CONCAT);
4807 }
79072805 4808 else {
3280af22 4809 PL_bufptr = s;
cea2e8a9 4810 return yylex();
79072805
LW
4811 }
4812 }
4813
cea2e8a9 4814 return yylex();
a0d0e21e 4815 case LEX_FORMLINE:
3280af22
NIS
4816 s = scan_formline(PL_bufptr);
4817 if (!PL_lex_formbrack)
7c70caa5 4818 {
705fe0e5 4819 formbrack = 1;
a0d0e21e 4820 goto rightbracket;
7c70caa5 4821 }
705fe0e5
FC
4822 PL_bufptr = s;
4823 return yylex();
79072805
LW
4824 }
4825
3280af22
NIS
4826 s = PL_bufptr;
4827 PL_oldoldbufptr = PL_oldbufptr;
4828 PL_oldbufptr = s;
463ee0b2
LW
4829
4830 retry:
5db06880 4831#ifdef PERL_MAD
cd81e915
NC
4832 if (PL_thistoken) {
4833 sv_free(PL_thistoken);
4834 PL_thistoken = 0;
5db06880 4835 }
cd81e915 4836 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4837#endif
378cc40b
LW
4838 switch (*s) {
4839 default:
7e2040f0 4840 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4841 goto keylookup;
b1fc3636 4842 {
e2f06df0
BF
4843 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4844 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4845 UTF8SKIP(s),
4846 SVs_TEMP | SVf_UTF8),
4847 10, UNI_DISPLAY_ISPRINT))
4848 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
b1fc3636
CJ
4849 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4850 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4851 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4852 } else {
4853 d = PL_linestart;
4854 }
4855 *s = '\0';
e2f06df0
BF
4856 sv_setpv(dsv, d);
4857 if (UTF)
4858 SvUTF8_on(dsv);
4859 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
b1fc3636 4860 }
e929a76b
LW
4861 case 4:
4862 case 26:
4863 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4864 case 0:
5db06880
NC
4865#ifdef PERL_MAD
4866 if (PL_madskills)
cd81e915 4867 PL_faketokens = 0;
5db06880 4868#endif
60d63348 4869 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
4870 PL_last_uni = 0;
4871 PL_last_lop = 0;
a7aaec61
Z
4872 if (PL_lex_brackets &&
4873 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4874 yyerror((const char *)
4875 (PL_lex_formbrack
4876 ? "Format not terminated"
4877 : "Missing right curly or square bracket"));
c5ee2135 4878 }
4e553d73 4879 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4880 "### Tokener got EOF\n");
5f80b19c 4881 } );
79072805 4882 TOKEN(0);
463ee0b2 4883 }
3280af22 4884 if (s++ < PL_bufend)
a687059c 4885 goto retry; /* ignore stray nulls */
3280af22
NIS
4886 PL_last_uni = 0;
4887 PL_last_lop = 0;
4888 if (!PL_in_eval && !PL_preambled) {
4889 PL_preambled = TRUE;
5db06880
NC
4890#ifdef PERL_MAD
4891 if (PL_madskills)
cd81e915 4892 PL_faketokens = 1;
5db06880 4893#endif
5ab7ff98
NC
4894 if (PL_perldb) {
4895 /* Generate a string of Perl code to load the debugger.
4896 * If PERL5DB is set, it will return the contents of that,
4897 * otherwise a compile-time require of perl5db.pl. */
4898
4899 const char * const pdb = PerlEnv_getenv("PERL5DB");
4900
4901 if (pdb) {
4902 sv_setpv(PL_linestr, pdb);
4903 sv_catpvs(PL_linestr,";");
4904 } else {
4905 SETERRNO(0,SS_NORMAL);
4906 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4907 }
4908 } else
4909 sv_setpvs(PL_linestr,"");
c62eb204
NC
4910 if (PL_preambleav) {
4911 SV **svp = AvARRAY(PL_preambleav);
4912 SV **const end = svp + AvFILLp(PL_preambleav);
4913 while(svp <= end) {
4914 sv_catsv(PL_linestr, *svp);
4915 ++svp;
396482e1 4916 sv_catpvs(PL_linestr, ";");
91b7def8 4917 }
daba3364 4918 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4919 PL_preambleav = NULL;
91b7def8 4920 }
9f639728
FR
4921 if (PL_minus_E)
4922 sv_catpvs(PL_linestr,
4923 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4924 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4925 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4926 if (PL_minus_l)
396482e1 4927 sv_catpvs(PL_linestr,"chomp;");
3280af22 4928 if (PL_minus_a) {
3280af22 4929 if (PL_minus_F) {
3792a11b
NC
4930 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4931 || *PL_splitstr == '"')
3280af22 4932 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4933 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4934 else {
c8ef6a4b
NC
4935 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4936 bytes can be used as quoting characters. :-) */
dd374669 4937 const char *splits = PL_splitstr;
91d456ae 4938 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4939 do {
4940 /* Need to \ \s */
dd374669
AL
4941 if (*splits == '\\')
4942 sv_catpvn(PL_linestr, splits, 1);
4943 sv_catpvn(PL_linestr, splits, 1);
4944 } while (*splits++);
48c4c863
NC
4945 /* This loop will embed the trailing NUL of
4946 PL_linestr as the last thing it does before
4947 terminating. */
396482e1 4948 sv_catpvs(PL_linestr, ");");
54310121 4949 }
2304df62
AD
4950 }
4951 else
396482e1 4952 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4953 }
79072805 4954 }
396482e1 4955 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4956 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4957 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4958 PL_last_lop = PL_last_uni = NULL;
65269a95 4959 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4960 update_debugger_info(PL_linestr, NULL, 0);
79072805 4961 goto retry;
a687059c 4962 }
e929a76b 4963 do {
580561a3
Z
4964 fake_eof = 0;
4965 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4966 if (0) {
7e28d3af 4967 fake_eof:
f0e67a1d
Z
4968 fake_eof = LEX_FAKE_EOF;
4969 }
4970 PL_bufptr = PL_bufend;
83944c01 4971 COPLINE_INC_WITH_HERELINES;
f0e67a1d 4972 if (!lex_next_chunk(fake_eof)) {
17cc9359 4973 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4974 s = PL_bufptr;
4975 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4976 }
17cc9359 4977 CopLINE_dec(PL_curcop);
5db06880 4978#ifdef PERL_MAD
f0e67a1d 4979 if (!PL_rsfp)
cd81e915 4980 PL_realtokenstart = -1;
5db06880 4981#endif
f0e67a1d 4982 s = PL_bufptr;
7aa207d6
JH
4983 /* If it looks like the start of a BOM or raw UTF-16,
4984 * check if it in fact is. */
580561a3 4985 if (bof && PL_rsfp &&
7aa207d6
JH
4986 (*s == 0 ||
4987 *(U8*)s == 0xEF ||
4988 *(U8*)s >= 0xFE ||
4989 s[1] == 0)) {
879bc93b
DM
4990 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4991 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4992#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4993 /* offset may include swallowed CR */
4994 if (!bof)
879bc93b 4995 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4996#endif
7e28d3af 4997 if (bof) {
3280af22 4998 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4999 s = swallow_bom((U8*)s);
e929a76b 5000 }
378cc40b 5001 }
737c24fc 5002 if (PL_parser->in_pod) {
a0d0e21e 5003 /* Incest with pod. */
5db06880
NC
5004#ifdef PERL_MAD
5005 if (PL_madskills)
cd81e915 5006 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 5007#endif
01a57ef7 5008 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 5009 sv_setpvs(PL_linestr, "");
3280af22
NIS
5010 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5011 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5012 PL_last_lop = PL_last_uni = NULL;
737c24fc 5013 PL_parser->in_pod = 0;
a0d0e21e 5014 }
4e553d73 5015 }
60d63348 5016 if (PL_rsfp || PL_parser->filtered)
85613cab 5017 incline(s);
737c24fc 5018 } while (PL_parser->in_pod);
3280af22 5019 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 5020 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5021 PL_last_lop = PL_last_uni = NULL;
57843af0 5022 if (CopLINE(PL_curcop) == 1) {
3280af22 5023 while (s < PL_bufend && isSPACE(*s))
79072805 5024 s++;
a0d0e21e 5025 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 5026 s++;
5db06880
NC
5027#ifdef PERL_MAD
5028 if (PL_madskills)
cd81e915 5029 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 5030#endif
bd61b366 5031 d = NULL;
3280af22 5032 if (!PL_in_eval) {
44a8e56a 5033 if (*s == '#' && *(s+1) == '!')
5034 d = s + 2;
5035#ifdef ALTERNATE_SHEBANG
5036 else {
bfed75c6 5037 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 5038 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5039 d = s + (sizeof(as) - 1);
5040 }
5041#endif /* ALTERNATE_SHEBANG */
5042 }
5043 if (d) {
b8378b72 5044 char *ipath;
774d564b 5045 char *ipathend;
b8378b72 5046
774d564b 5047 while (isSPACE(*d))
b8378b72
CS
5048 d++;
5049 ipath = d;
774d564b 5050 while (*d && !isSPACE(*d))
5051 d++;
5052 ipathend = d;
5053
5054#ifdef ARG_ZERO_IS_SCRIPT
5055 if (ipathend > ipath) {
5056 /*
5057 * HP-UX (at least) sets argv[0] to the script name,
5058 * which makes $^X incorrect. And Digital UNIX and Linux,
5059 * at least, set argv[0] to the basename of the Perl
5060 * interpreter. So, having found "#!", we'll set it right.
5061 */
fafc274c
NC
5062 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5063 SVt_PV)); /* $^X */
774d564b 5064 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 5065 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 5066 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 5067 SvSETMAGIC(x);
5068 }
556c1dec
JH
5069 else {
5070 STRLEN blen;
5071 STRLEN llen;
cfd0369c 5072 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 5073 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
5074 if (llen < blen) {
5075 bstart += blen - llen;
5076 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5077 sv_setpvn(x, ipath, ipathend - ipath);
5078 SvSETMAGIC(x);
5079 }
5080 }
5081 }
774d564b 5082 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 5083 }
774d564b 5084#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
5085
5086 /*
5087 * Look for options.
5088 */
748a9306 5089 d = instr(s,"perl -");
84e30d1a 5090 if (!d) {
748a9306 5091 d = instr(s,"perl");
84e30d1a
GS
5092#if defined(DOSISH)
5093 /* avoid getting into infinite loops when shebang
5094 * line contains "Perl" rather than "perl" */
5095 if (!d) {
5096 for (d = ipathend-4; d >= ipath; --d) {
5097 if ((*d == 'p' || *d == 'P')
5098 && !ibcmp(d, "perl", 4))
5099 {
5100 break;
5101 }
5102 }
5103 if (d < ipath)
bd61b366 5104 d = NULL;
84e30d1a
GS
5105 }
5106#endif
5107 }
44a8e56a 5108#ifdef ALTERNATE_SHEBANG
5109 /*
5110 * If the ALTERNATE_SHEBANG on this system starts with a
5111 * character that can be part of a Perl expression, then if
5112 * we see it but not "perl", we're probably looking at the
5113 * start of Perl code, not a request to hand off to some
5114 * other interpreter. Similarly, if "perl" is there, but
5115 * not in the first 'word' of the line, we assume the line
5116 * contains the start of the Perl program.
44a8e56a 5117 */
5118 if (d && *s != '#') {
f54cb97a 5119 const char *c = ipath;
44a8e56a 5120 while (*c && !strchr("; \t\r\n\f\v#", *c))
5121 c++;
5122 if (c < d)
bd61b366 5123 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5124 else
5125 *s = '#'; /* Don't try to parse shebang line */
5126 }
774d564b 5127#endif /* ALTERNATE_SHEBANG */
748a9306 5128 if (!d &&
44a8e56a 5129 *s == '#' &&
774d564b 5130 ipathend > ipath &&
3280af22 5131 !PL_minus_c &&
748a9306 5132 !instr(s,"indir") &&
3280af22 5133 instr(PL_origargv[0],"perl"))
748a9306 5134 {
27da23d5 5135 dVAR;
9f68db38 5136 char **newargv;
9f68db38 5137
774d564b 5138 *ipathend = '\0';
5139 s = ipathend + 1;
3280af22 5140 while (s < PL_bufend && isSPACE(*s))
9f68db38 5141 s++;
3280af22 5142 if (s < PL_bufend) {
d85f917e 5143 Newx(newargv,PL_origargc+3,char*);
9f68db38 5144 newargv[1] = s;
3280af22 5145 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5146 s++;
5147 *s = '\0';
3280af22 5148 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5149 }
5150 else
3280af22 5151 newargv = PL_origargv;
774d564b 5152 newargv[0] = ipath;
b35112e7 5153 PERL_FPU_PRE_EXEC
b4748376 5154 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5155 PERL_FPU_POST_EXEC
cea2e8a9 5156 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5157 }
748a9306 5158 if (d) {
c35e046a
AL
5159 while (*d && !isSPACE(*d))
5160 d++;
5161 while (SPACE_OR_TAB(*d))
5162 d++;
748a9306
LW
5163
5164 if (*d++ == '-') {
f54cb97a 5165 const bool switches_done = PL_doswitches;
fb993905
GA
5166 const U32 oldpdb = PL_perldb;
5167 const bool oldn = PL_minus_n;
5168 const bool oldp = PL_minus_p;
c7030b81 5169 const char *d1 = d;
fb993905 5170
8cc95fdb 5171 do {
4ba71d51
FC
5172 bool baduni = FALSE;
5173 if (*d1 == 'C') {
bd0ab00d
NC
5174 const char *d2 = d1 + 1;
5175 if (parse_unicode_opts((const char **)&d2)
5176 != PL_unicode)
5177 baduni = TRUE;
4ba71d51
FC
5178 }
5179 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5180 const char * const m = d1;
5181 while (*d1 && !isSPACE(*d1))
5182 d1++;
cea2e8a9 5183 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5184 (int)(d1 - m), m);
8cc95fdb 5185 }
c7030b81
NC
5186 d1 = moreswitches(d1);
5187 } while (d1);
f0b2cf55
YST
5188 if (PL_doswitches && !switches_done) {
5189 int argc = PL_origargc;
5190 char **argv = PL_origargv;
5191 do {
5192 argc--,argv++;
5193 } while (argc && argv[0][0] == '-' && argv[0][1]);
5194 init_argv_symbols(argc,argv);
5195 }
65269a95 5196 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5197 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5198 /* if we have already added "LINE: while (<>) {",
5199 we must not do it again */
748a9306 5200 {
76f68e9b 5201 sv_setpvs(PL_linestr, "");
3280af22
NIS
5202 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5203 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5204 PL_last_lop = PL_last_uni = NULL;
3280af22 5205 PL_preambled = FALSE;
65269a95 5206 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5207 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5208 goto retry;
5209 }
a0d0e21e 5210 }
79072805 5211 }
9f68db38 5212 }
79072805 5213 }
3280af22 5214 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5215 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5216 start_force(PL_curforce);
5217 NEXTVAL_NEXTTOKE.ival = 0;
5218 force_next(FORMRBRACK);
5219 TOKEN(';');
ae986130 5220 }
378cc40b 5221 goto retry;
4fdae800 5222 case '\r':
6a27c188 5223#ifdef PERL_STRICT_CR
cea2e8a9 5224 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5225 Perl_croak(aTHX_
cc507455 5226 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5227#endif
4fdae800 5228 case ' ': case '\t': case '\f': case 013:
5db06880 5229#ifdef PERL_MAD
cd81e915 5230 PL_realtokenstart = -1;
ac372eb8
RD
5231 if (!PL_thiswhite)
5232 PL_thiswhite = newSVpvs("");
5233 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5234#endif
ac372eb8 5235 s++;
378cc40b 5236 goto retry;
378cc40b 5237 case '#':
e929a76b 5238 case '\n':
5db06880 5239#ifdef PERL_MAD
cd81e915 5240 PL_realtokenstart = -1;
5db06880 5241 if (PL_madskills)
cd81e915 5242 PL_faketokens = 0;
5db06880 5243#endif
60d63348 5244 if (PL_lex_state != LEX_NORMAL ||
62e4c90a
FC
5245 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5246 if (*s == '#' && s == PL_linestart && PL_in_eval
60d63348 5247 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5248 /* handle eval qq[#line 1 "foo"\n ...] */
5249 CopLINE_dec(PL_curcop);
5250 incline(s);
5251 }
5db06880
NC
5252 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5253 s = SKIPSPACE0(s);
62e4c90a 5254 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5255 incline(s);
5256 }
5257 else {
9c74ccc9 5258 const bool in_comment = *s == '#';
5db06880
NC
5259 d = s;
5260 while (d < PL_bufend && *d != '\n')
5261 d++;
5262 if (d < PL_bufend)
5263 d++;
5264 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5265 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5266 d, PL_bufend);
5db06880
NC
5267#ifdef PERL_MAD
5268 if (PL_madskills)
cd81e915 5269 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5270#endif
5271 s = d;
9c74ccc9
FC
5272 if (in_comment && d == PL_bufend
5273 && PL_lex_state == LEX_INTERPNORMAL
90a536e1 5274 && PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
9c74ccc9
FC
5275 && SvEVALED(PL_lex_repl) && d[-1] == '}') s--;
5276 else incline(s);
5db06880 5277 }
3280af22 5278 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5279 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5280 start_force(PL_curforce);
5281 NEXTVAL_NEXTTOKE.ival = 0;
5282 force_next(FORMRBRACK);
5283 TOKEN(';');
a687059c 5284 }
378cc40b 5285 }
a687059c 5286 else {
5db06880
NC
5287#ifdef PERL_MAD
5288 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5289 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5290 PL_faketokens = 0;
5db06880
NC
5291 s = SKIPSPACE0(s);
5292 TOKEN(PEG); /* make sure any #! line is accessible */
5293 }
5294 s = SKIPSPACE0(s);
5295 }
5296 else {
5297/* if (PL_madskills && PL_lex_formbrack) { */
5298 d = s;
5299 while (d < PL_bufend && *d != '\n')
5300 d++;
5301 if (d < PL_bufend)
5302 d++;
5303 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5304 Perl_croak(aTHX_ "panic: input overflow");
5305 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5306 if (!PL_thiswhite)
6b29d1f5 5307 PL_thiswhite = newSVpvs("");
5db06880 5308 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5309 sv_setpvs(PL_thiswhite, "");
cd81e915 5310 PL_faketokens = 0;
5db06880 5311 }
cd81e915 5312 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5313 }
5314 s = d;
5315/* }
5316 *s = '\0';
5317 PL_bufend = s; */
5318 }
5319#else
378cc40b 5320 *s = '\0';
3280af22 5321 PL_bufend = s;
5db06880 5322#endif
a687059c 5323 }
378cc40b
LW
5324 goto retry;
5325 case '-':
79072805 5326 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5327 I32 ftst = 0;
90771dc0 5328 char tmp;
e5edeb50 5329
378cc40b 5330 s++;
3280af22 5331 PL_bufptr = s;
748a9306
LW
5332 tmp = *s++;
5333
bf4acbe4 5334 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5335 s++;
5336
5337 if (strnEQ(s,"=>",2)) {
3280af22 5338 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5339 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5340 OPERATOR('-'); /* unary minus */
5341 }
3280af22 5342 PL_last_uni = PL_oldbufptr;
748a9306 5343 switch (tmp) {
e5edeb50
JH
5344 case 'r': ftst = OP_FTEREAD; break;
5345 case 'w': ftst = OP_FTEWRITE; break;
5346 case 'x': ftst = OP_FTEEXEC; break;
5347 case 'o': ftst = OP_FTEOWNED; break;
5348 case 'R': ftst = OP_FTRREAD; break;
5349 case 'W': ftst = OP_FTRWRITE; break;
5350 case 'X': ftst = OP_FTREXEC; break;
5351 case 'O': ftst = OP_FTROWNED; break;
5352 case 'e': ftst = OP_FTIS; break;
5353 case 'z': ftst = OP_FTZERO; break;
5354 case 's': ftst = OP_FTSIZE; break;
5355 case 'f': ftst = OP_FTFILE; break;
5356 case 'd': ftst = OP_FTDIR; break;
5357 case 'l': ftst = OP_FTLINK; break;
5358 case 'p': ftst = OP_FTPIPE; break;
5359 case 'S': ftst = OP_FTSOCK; break;
5360 case 'u': ftst = OP_FTSUID; break;
5361 case 'g': ftst = OP_FTSGID; break;
5362 case 'k': ftst = OP_FTSVTX; break;
5363 case 'b': ftst = OP_FTBLK; break;
5364 case 'c': ftst = OP_FTCHR; break;
5365 case 't': ftst = OP_FTTTY; break;
5366 case 'T': ftst = OP_FTTEXT; break;
5367 case 'B': ftst = OP_FTBINARY; break;
5368 case 'M': case 'A': case 'C':
fafc274c 5369 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5370 switch (tmp) {
5371 case 'M': ftst = OP_FTMTIME; break;
5372 case 'A': ftst = OP_FTATIME; break;
5373 case 'C': ftst = OP_FTCTIME; break;
5374 default: break;
5375 }
5376 break;
378cc40b 5377 default:
378cc40b
LW
5378 break;
5379 }
e5edeb50 5380 if (ftst) {
eb160463 5381 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5382 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5383 "### Saw file test %c\n", (int)tmp);
5f80b19c 5384 } );
e5edeb50
JH
5385 FTST(ftst);
5386 }
5387 else {
5388 /* Assume it was a minus followed by a one-letter named
5389 * subroutine call (or a -bareword), then. */
95c31fe3 5390 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5391 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5392 (int) tmp);
5f80b19c 5393 } );
3cf7b4c4 5394 s = --PL_bufptr;
e5edeb50 5395 }
378cc40b 5396 }
90771dc0
NC
5397 {
5398 const char tmp = *s++;
5399 if (*s == tmp) {
5400 s++;
5401 if (PL_expect == XOPERATOR)
5402 TERM(POSTDEC);
5403 else
5404 OPERATOR(PREDEC);
5405 }
5406 else if (*s == '>') {
5407 s++;
29595ff2 5408 s = SKIPSPACE1(s);
90771dc0
NC
5409 if (isIDFIRST_lazy_if(s,UTF)) {
5410 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5411 TOKEN(ARROW);
5412 }
5413 else if (*s == '$')
5414 OPERATOR(ARROW);
5415 else
5416 TERM(ARROW);
5417 }
78cdf107
Z
5418 if (PL_expect == XOPERATOR) {
5419 if (*s == '=' && !PL_lex_allbrackets &&
5420 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5421 s--;
5422 TOKEN(0);
5423 }
90771dc0 5424 Aop(OP_SUBTRACT);
78cdf107 5425 }
90771dc0
NC
5426 else {
5427 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5428 check_uni();
5429 OPERATOR('-'); /* unary minus */
79072805 5430 }
2f3197b3 5431 }
79072805 5432
378cc40b 5433 case '+':
90771dc0
NC
5434 {
5435 const char tmp = *s++;
5436 if (*s == tmp) {
5437 s++;
5438 if (PL_expect == XOPERATOR)
5439 TERM(POSTINC);
5440 else
5441 OPERATOR(PREINC);
5442 }
78cdf107
Z
5443 if (PL_expect == XOPERATOR) {
5444 if (*s == '=' && !PL_lex_allbrackets &&
5445 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5446 s--;
5447 TOKEN(0);
5448 }
90771dc0 5449 Aop(OP_ADD);
78cdf107 5450 }
90771dc0
NC
5451 else {
5452 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5453 check_uni();
5454 OPERATOR('+');
5455 }
2f3197b3 5456 }
a687059c 5457
378cc40b 5458 case '*':
3280af22
NIS
5459 if (PL_expect != XOPERATOR) {
5460 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5461 PL_expect = XOPERATOR;
5462 force_ident(PL_tokenbuf, '*');
5463 if (!*PL_tokenbuf)
a0d0e21e 5464 PREREF('*');
79072805 5465 TERM('*');
a687059c 5466 }
79072805
LW
5467 s++;
5468 if (*s == '*') {
a687059c 5469 s++;
78cdf107
Z
5470 if (*s == '=' && !PL_lex_allbrackets &&
5471 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5472 s -= 2;
5473 TOKEN(0);
5474 }
79072805 5475 PWop(OP_POW);
a687059c 5476 }
78cdf107
Z
5477 if (*s == '=' && !PL_lex_allbrackets &&
5478 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5479 s--;
5480 TOKEN(0);
5481 }
79072805
LW
5482 Mop(OP_MULTIPLY);
5483
378cc40b 5484 case '%':
3280af22 5485 if (PL_expect == XOPERATOR) {
78cdf107
Z
5486 if (s[1] == '=' && !PL_lex_allbrackets &&
5487 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5488 TOKEN(0);
bbce6d69 5489 ++s;
5490 Mop(OP_MODULO);
a687059c 5491 }
3280af22 5492 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5493 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5494 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5495 if (!PL_tokenbuf[1]) {
bbce6d69 5496 PREREF('%');
a687059c 5497 }
60ac52eb
FC
5498 PL_expect = XOPERATOR;
5499 force_ident_maybe_lex('%');
bbce6d69 5500 TERM('%');
a687059c 5501
378cc40b 5502 case '^':
78cdf107
Z
5503 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5504 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5505 TOKEN(0);
79072805 5506 s++;
a0d0e21e 5507 BOop(OP_BIT_XOR);
79072805 5508 case '[':
a7aaec61
Z
5509 if (PL_lex_brackets > 100)
5510 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5511 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5512 PL_lex_allbrackets++;
df3467db
IG
5513 {
5514 const char tmp = *s++;
5515 OPERATOR(tmp);
5516 }
378cc40b 5517 case '~':
0d863452 5518 if (s[1] == '~'
3e7dd34d 5519 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5520 {
78cdf107
Z
5521 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5522 TOKEN(0);
0d863452
RH
5523 s += 2;
5524 Eop(OP_SMARTMATCH);
5525 }
78cdf107
Z
5526 s++;
5527 OPERATOR('~');
378cc40b 5528 case ',':
78cdf107
Z
5529 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5530 TOKEN(0);
5531 s++;
5532 OPERATOR(',');
a0d0e21e
LW
5533 case ':':
5534 if (s[1] == ':') {
5535 len = 0;
0bfa2a8a 5536 goto just_a_word_zero_gv;
a0d0e21e
LW
5537 }
5538 s++;
09bef843
SB
5539 switch (PL_expect) {
5540 OP *attrs;
5db06880
NC
5541#ifdef PERL_MAD
5542 I32 stuffstart;
5543#endif
09bef843
SB
5544 case XOPERATOR:
5545 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5546 break;
5547 PL_bufptr = s; /* update in case we back off */
d83f38d8 5548 if (*s == '=') {
2dc78664
NC
5549 Perl_croak(aTHX_
5550 "Use of := for an empty attribute list is not allowed");
d83f38d8 5551 }
09bef843
SB
5552 goto grabattrs;
5553 case XATTRBLOCK:
5554 PL_expect = XBLOCK;
5555 goto grabattrs;
5556 case XATTRTERM:
5557 PL_expect = XTERMBLOCK;
5558 grabattrs:
5db06880
NC
5559#ifdef PERL_MAD
5560 stuffstart = s - SvPVX(PL_linestr) - 1;
5561#endif
29595ff2 5562 s = PEEKSPACE(s);
5f66b61c 5563 attrs = NULL;
7e2040f0 5564 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5565 I32 tmp;
5cc237b8 5566 SV *sv;
09bef843 5567 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5568 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5569 if (tmp < 0) tmp = -tmp;
5570 switch (tmp) {
5571 case KEY_or:
5572 case KEY_and:
5573 case KEY_for:
11baf631 5574 case KEY_foreach:
f9829d6b
GS
5575 case KEY_unless:
5576 case KEY_if:
5577 case KEY_while:
5578 case KEY_until:
5579 goto got_attrs;
5580 default:
5581 break;
5582 }
5583 }
89a5757c 5584 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843 5585 if (*d == '(') {
d24ca0c5 5586 d = scan_str(d,TRUE,TRUE,FALSE);
09bef843 5587 if (!d) {
09bef843
SB
5588 /* MUST advance bufptr here to avoid bogus
5589 "at end of line" context messages from yyerror().
5590 */
5591 PL_bufptr = s + len;
5592 yyerror("Unterminated attribute parameter in attribute list");
5593 if (attrs)
5594 op_free(attrs);
5cc237b8 5595 sv_free(sv);
bbf60fe6 5596 return REPORT(0); /* EOF indicator */
09bef843
SB
5597 }
5598 }
5599 if (PL_lex_stuff) {
09bef843 5600 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5601 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5602 newSVOP(OP_CONST, 0, sv));
5603 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5604 PL_lex_stuff = NULL;
09bef843
SB
5605 }
5606 else {
5cc237b8
BS
5607 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5608 sv_free(sv);
1108974d 5609 if (PL_in_my == KEY_our) {
df9a6019 5610 deprecate(":unique");
1108974d 5611 }
bfed75c6 5612 else
371fce9b
DM
5613 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5614 }
5615
d3cea301
SB
5616 /* NOTE: any CV attrs applied here need to be part of
5617 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5618 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5619 sv_free(sv);
78f9721b 5620 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5621 }
5622 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5623 sv_free(sv);
8e5dadda 5624 deprecate(":locked");
5cc237b8
BS
5625 }
5626 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5627 sv_free(sv);
78f9721b 5628 CvMETHOD_on(PL_compcv);
5cc237b8 5629 }
78f9721b
SM
5630 /* After we've set the flags, it could be argued that
5631 we don't need to do the attributes.pm-based setting
5632 process, and shouldn't bother appending recognized
d3cea301
SB
5633 flags. To experiment with that, uncomment the
5634 following "else". (Note that's already been
5635 uncommented. That keeps the above-applied built-in
5636 attributes from being intercepted (and possibly
5637 rejected) by a package's attribute routines, but is
5638 justified by the performance win for the common case
5639 of applying only built-in attributes.) */
0256094b 5640 else
2fcb4757 5641 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5642 newSVOP(OP_CONST, 0,
5cc237b8 5643 sv));
09bef843 5644 }
29595ff2 5645 s = PEEKSPACE(d);
0120eecf 5646 if (*s == ':' && s[1] != ':')
29595ff2 5647 s = PEEKSPACE(s+1);
0120eecf
GS
5648 else if (s == d)
5649 break; /* require real whitespace or :'s */
29595ff2 5650 /* XXX losing whitespace on sequential attributes here */
09bef843 5651 }
90771dc0
NC
5652 {
5653 const char tmp
5654 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5655 if (*s != ';' && *s != '}' && *s != tmp
5656 && (tmp != '=' || *s != ')')) {
5657 const char q = ((*s == '\'') ? '"' : '\'');
5658 /* If here for an expression, and parsed no attrs, back
5659 off. */
5660 if (tmp == '=' && !attrs) {
5661 s = PL_bufptr;
5662 break;
5663 }
5664 /* MUST advance bufptr here to avoid bogus "at end of line"
5665 context messages from yyerror().
5666 */
5667 PL_bufptr = s;
10edeb5d
JH
5668 yyerror( (const char *)
5669 (*s
5670 ? Perl_form(aTHX_ "Invalid separator character "
5671 "%c%c%c in attribute list", q, *s, q)
5672 : "Unterminated attribute list" ) );
90771dc0
NC
5673 if (attrs)
5674 op_free(attrs);
5675 OPERATOR(':');
09bef843 5676 }
09bef843 5677 }
f9829d6b 5678 got_attrs:
09bef843 5679 if (attrs) {
cd81e915 5680 start_force(PL_curforce);
9ded7720 5681 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5682 CURMAD('_', PL_nextwhite);
89122651 5683 force_next(THING);
5db06880
NC
5684 }
5685#ifdef PERL_MAD
5686 if (PL_madskills) {
cd81e915 5687 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5688 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5689 }
5db06880 5690#endif
09bef843
SB
5691 TOKEN(COLONATTR);
5692 }
78cdf107
Z
5693 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5694 s--;
5695 TOKEN(0);
5696 }
5697 PL_lex_allbrackets--;
a0d0e21e 5698 OPERATOR(':');
8990e307
LW
5699 case '(':
5700 s++;
3280af22
NIS
5701 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5702 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5703 else
3280af22 5704 PL_expect = XTERM;
29595ff2 5705 s = SKIPSPACE1(s);
78cdf107 5706 PL_lex_allbrackets++;
a0d0e21e 5707 TOKEN('(');
378cc40b 5708 case ';':
78cdf107
Z
5709 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5710 TOKEN(0);
f4dd75d9 5711 CLINE;
78cdf107
Z
5712 s++;
5713 OPERATOR(';');
378cc40b 5714 case ')':
78cdf107
Z
5715 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5716 TOKEN(0);
5717 s++;
5718 PL_lex_allbrackets--;
5719 s = SKIPSPACE1(s);
5720 if (*s == '{')
5721 PREBLOCK(')');
5722 TERM(')');
79072805 5723 case ']':
a7aaec61
Z
5724 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5725 TOKEN(0);
79072805 5726 s++;
3280af22 5727 if (PL_lex_brackets <= 0)
d98d5fff 5728 yyerror("Unmatched right square bracket");
463ee0b2 5729 else
3280af22 5730 --PL_lex_brackets;
78cdf107 5731 PL_lex_allbrackets--;
3280af22
NIS
5732 if (PL_lex_state == LEX_INTERPNORMAL) {
5733 if (PL_lex_brackets == 0) {
02255c60
FC
5734 if (*s == '-' && s[1] == '>')
5735 PL_lex_state = LEX_INTERPENDMAYBE;
5736 else if (*s != '[' && *s != '{')
3280af22 5737 PL_lex_state = LEX_INTERPEND;
79072805
LW
5738 }
5739 }
4633a7c4 5740 TERM(']');
79072805 5741 case '{':
79072805 5742 s++;
eaf6a13d 5743 leftbracket:
3280af22 5744 if (PL_lex_brackets > 100) {
8edd5f42 5745 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5746 }
3280af22 5747 switch (PL_expect) {
a0d0e21e 5748 case XTERM:
819b004e 5749 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5750 PL_lex_allbrackets++;
79072805 5751 OPERATOR(HASHBRACK);
a0d0e21e 5752 case XOPERATOR:
bf4acbe4 5753 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5754 s++;
44a8e56a 5755 d = s;
3280af22
NIS
5756 PL_tokenbuf[0] = '\0';
5757 if (d < PL_bufend && *d == '-') {
5758 PL_tokenbuf[0] = '-';
44a8e56a 5759 d++;
bf4acbe4 5760 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5761 d++;
5762 }
7e2040f0 5763 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5764 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5765 FALSE, &len);
bf4acbe4 5766 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5767 d++;
5768 if (*d == '}') {
f54cb97a 5769 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5770 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5771 if (minus)
5772 force_next('-');
748a9306
LW
5773 }
5774 }
5775 /* FALL THROUGH */
09bef843 5776 case XATTRBLOCK:
748a9306 5777 case XBLOCK:
3280af22 5778 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5779 PL_lex_allbrackets++;
3280af22 5780 PL_expect = XSTATE;
a0d0e21e 5781 break;
09bef843 5782 case XATTRTERM:
a0d0e21e 5783 case XTERMBLOCK:
3280af22 5784 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5785 PL_lex_allbrackets++;
3280af22 5786 PL_expect = XSTATE;
a0d0e21e
LW
5787 break;
5788 default: {
f54cb97a 5789 const char *t;
3280af22
NIS
5790 if (PL_oldoldbufptr == PL_last_lop)
5791 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5792 else
3280af22 5793 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5794 PL_lex_allbrackets++;
29595ff2 5795 s = SKIPSPACE1(s);
8452ff4b
SB
5796 if (*s == '}') {
5797 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5798 PL_expect = XTERM;
5799 /* This hack is to get the ${} in the message. */
5800 PL_bufptr = s+1;
5801 yyerror("syntax error");
5802 break;
5803 }
a0d0e21e 5804 OPERATOR(HASHBRACK);
8452ff4b 5805 }
b8a4b1be
GS
5806 /* This hack serves to disambiguate a pair of curlies
5807 * as being a block or an anon hash. Normally, expectation
5808 * determines that, but in cases where we're not in a
5809 * position to expect anything in particular (like inside
5810 * eval"") we have to resolve the ambiguity. This code
5811 * covers the case where the first term in the curlies is a
5812 * quoted string. Most other cases need to be explicitly
a0288114 5813 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5814 * curly in order to force resolution as an anon hash.
5815 *
5816 * XXX should probably propagate the outer expectation
5817 * into eval"" to rely less on this hack, but that could
5818 * potentially break current behavior of eval"".
5819 * GSAR 97-07-21
5820 */
5821 t = s;
5822 if (*s == '\'' || *s == '"' || *s == '`') {
5823 /* common case: get past first string, handling escapes */
3280af22 5824 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5825 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5826 t++;
5827 t++;
a0d0e21e 5828 }
b8a4b1be 5829 else if (*s == 'q') {
3280af22 5830 if (++t < PL_bufend
b8a4b1be 5831 && (!isALNUM(*t)
3280af22 5832 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5833 && !isALNUM(*t))))
5834 {
abc667d1 5835 /* skip q//-like construct */
f54cb97a 5836 const char *tmps;
b8a4b1be
GS
5837 char open, close, term;
5838 I32 brackets = 1;
5839
3280af22 5840 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5841 t++;
abc667d1
DM
5842 /* check for q => */
5843 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5844 OPERATOR(HASHBRACK);
5845 }
b8a4b1be
GS
5846 term = *t;
5847 open = term;
5848 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5849 term = tmps[5];
5850 close = term;
5851 if (open == close)
3280af22
NIS
5852 for (t++; t < PL_bufend; t++) {
5853 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5854 t++;
6d07e5e9 5855 else if (*t == open)
b8a4b1be
GS
5856 break;
5857 }
abc667d1 5858 else {
3280af22
NIS
5859 for (t++; t < PL_bufend; t++) {
5860 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5861 t++;
6d07e5e9 5862 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5863 break;
5864 else if (*t == open)
5865 brackets++;
5866 }
abc667d1
DM
5867 }
5868 t++;
b8a4b1be 5869 }
abc667d1
DM
5870 else
5871 /* skip plain q word */
5872 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5873 t += UTF8SKIP(t);
a0d0e21e 5874 }
7e2040f0 5875 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5876 t += UTF8SKIP(t);
7e2040f0 5877 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5878 t += UTF8SKIP(t);
a0d0e21e 5879 }
3280af22 5880 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5881 t++;
b8a4b1be
GS
5882 /* if comma follows first term, call it an anon hash */
5883 /* XXX it could be a comma expression with loop modifiers */
3280af22 5884 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5885 || (*t == '=' && t[1] == '>')))
a0d0e21e 5886 OPERATOR(HASHBRACK);
3280af22 5887 if (PL_expect == XREF)
4e4e412b 5888 PL_expect = XTERM;
a0d0e21e 5889 else {
3280af22
NIS
5890 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5891 PL_expect = XSTATE;
a0d0e21e 5892 }
8990e307 5893 }
a0d0e21e 5894 break;
463ee0b2 5895 }
6154021b 5896 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5897 if (isSPACE(*s) || *s == '#')
3280af22 5898 PL_copline = NOLINE; /* invalidate current command line number */
7c70caa5 5899 TOKEN(formbrack ? '=' : '{');
378cc40b 5900 case '}':
a7aaec61
Z
5901 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5902 TOKEN(0);
79072805
LW
5903 rightbracket:
5904 s++;
3280af22 5905 if (PL_lex_brackets <= 0)
d98d5fff 5906 yyerror("Unmatched right curly bracket");
463ee0b2 5907 else
3280af22 5908 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5909 PL_lex_allbrackets--;
3280af22
NIS
5910 if (PL_lex_state == LEX_INTERPNORMAL) {
5911 if (PL_lex_brackets == 0) {
9059aa12
LW
5912 if (PL_expect & XFAKEBRACK) {
5913 PL_expect &= XENUMMASK;
3280af22
NIS
5914 PL_lex_state = LEX_INTERPEND;
5915 PL_bufptr = s;
5db06880
NC
5916#if 0
5917 if (PL_madskills) {
cd81e915 5918 if (!PL_thiswhite)
6b29d1f5 5919 PL_thiswhite = newSVpvs("");
76f68e9b 5920 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5921 }
5922#endif
cea2e8a9 5923 return yylex(); /* ignore fake brackets */
79072805 5924 }
f777953f 5925 if (PL_lex_inwhat == OP_SUBST && PL_lex_repl == PL_linestr
6b00f562
FC
5926 && SvEVALED(PL_lex_repl))
5927 PL_lex_state = LEX_INTERPEND;
5928 else if (*s == '-' && s[1] == '>')
3280af22 5929 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5930 else if (*s != '[' && *s != '{')
3280af22 5931 PL_lex_state = LEX_INTERPEND;
79072805
LW
5932 }
5933 }
9059aa12
LW
5934 if (PL_expect & XFAKEBRACK) {
5935 PL_expect &= XENUMMASK;
3280af22 5936 PL_bufptr = s;
cea2e8a9 5937 return yylex(); /* ignore fake brackets */
748a9306 5938 }
cd81e915 5939 start_force(PL_curforce);
5db06880
NC
5940 if (PL_madskills) {
5941 curmad('X', newSVpvn(s-1,1));
cd81e915 5942 CURMAD('_', PL_thiswhite);
5db06880 5943 }
7c70caa5 5944 force_next(formbrack ? '.' : '}');
583c9d5c 5945 if (formbrack) LEAVE;
5db06880 5946#ifdef PERL_MAD
cd81e915 5947 if (!PL_thistoken)
6b29d1f5 5948 PL_thistoken = newSVpvs("");
5db06880 5949#endif
705fe0e5
FC
5950 if (formbrack == 2) { /* means . where arguments were expected */
5951 start_force(PL_curforce);
5952 force_next(';');
96f9b782 5953 TOKEN(FORMRBRACK);
705fe0e5 5954 }
79072805 5955 TOKEN(';');
378cc40b
LW
5956 case '&':
5957 s++;
78cdf107
Z
5958 if (*s++ == '&') {
5959 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5960 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5961 s -= 2;
5962 TOKEN(0);
5963 }
a0d0e21e 5964 AOPERATOR(ANDAND);
78cdf107 5965 }
378cc40b 5966 s--;
3280af22 5967 if (PL_expect == XOPERATOR) {
041457d9
DM
5968 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5969 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5970 {
57843af0 5971 CopLINE_dec(PL_curcop);
f1f66076 5972 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5973 CopLINE_inc(PL_curcop);
463ee0b2 5974 }
78cdf107
Z
5975 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5976 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5977 s--;
5978 TOKEN(0);
5979 }
79072805 5980 BAop(OP_BIT_AND);
463ee0b2 5981 }
79072805 5982
c07656ed
FC
5983 PL_tokenbuf[0] = '&';
5984 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf + 1,
5985 sizeof PL_tokenbuf - 1, TRUE);
5986 if (PL_tokenbuf[1]) {
3280af22 5987 PL_expect = XOPERATOR;
60ac52eb 5988 force_ident_maybe_lex('&');
463ee0b2 5989 }
79072805
LW
5990 else
5991 PREREF('&');
6154021b 5992 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5993 TERM('&');
5994
378cc40b
LW
5995 case '|':
5996 s++;
78cdf107
Z
5997 if (*s++ == '|') {
5998 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5999 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6000 s -= 2;
6001 TOKEN(0);
6002 }
a0d0e21e 6003 AOPERATOR(OROR);
78cdf107 6004 }
378cc40b 6005 s--;
78cdf107
Z
6006 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6007 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6008 s--;
6009 TOKEN(0);
6010 }
79072805 6011 BOop(OP_BIT_OR);
378cc40b
LW
6012 case '=':
6013 s++;
748a9306 6014 {
90771dc0 6015 const char tmp = *s++;
78cdf107
Z
6016 if (tmp == '=') {
6017 if (!PL_lex_allbrackets &&
6018 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6019 s -= 2;
6020 TOKEN(0);
6021 }
90771dc0 6022 Eop(OP_EQ);
78cdf107
Z
6023 }
6024 if (tmp == '>') {
6025 if (!PL_lex_allbrackets &&
6026 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6027 s -= 2;
6028 TOKEN(0);
6029 }
90771dc0 6030 OPERATOR(',');
78cdf107 6031 }
90771dc0
NC
6032 if (tmp == '~')
6033 PMop(OP_MATCH);
6034 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6035 && strchr("+-*/%.^&|<",tmp))
6036 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6037 "Reversed %c= operator",(int)tmp);
6038 s--;
6039 if (PL_expect == XSTATE && isALPHA(tmp) &&
6040 (s == PL_linestart+1 || s[-2] == '\n') )
6041 {
62e4c90a 6042 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
4a7239ff 6043 || PL_lex_state != LEX_NORMAL) {
90771dc0
NC
6044 d = PL_bufend;
6045 while (s < d) {
6046 if (*s++ == '\n') {
6047 incline(s);
6048 if (strnEQ(s,"=cut",4)) {
6049 s = strchr(s,'\n');
6050 if (s)
6051 s++;
6052 else
6053 s = d;
6054 incline(s);
6055 goto retry;
6056 }
6057 }
a5f75d66 6058 }
90771dc0 6059 goto retry;
a5f75d66 6060 }
5db06880
NC
6061#ifdef PERL_MAD
6062 if (PL_madskills) {
cd81e915 6063 if (!PL_thiswhite)
6b29d1f5 6064 PL_thiswhite = newSVpvs("");
cd81e915 6065 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
6066 PL_bufend - PL_linestart);
6067 }
6068#endif
90771dc0 6069 s = PL_bufend;
737c24fc 6070 PL_parser->in_pod = 1;
90771dc0 6071 goto retry;
a5f75d66 6072 }
a0d0e21e 6073 }
64a40898 6074 if (PL_expect == XBLOCK) {
c35e046a 6075 const char *t = s;
51882d45 6076#ifdef PERL_STRICT_CR
c35e046a 6077 while (SPACE_OR_TAB(*t))
51882d45 6078#else
c35e046a 6079 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 6080#endif
c35e046a 6081 t++;
a0d0e21e 6082 if (*t == '\n' || *t == '#') {
705fe0e5 6083 formbrack = 1;
583c9d5c
FC
6084 ENTER;
6085 SAVEI8(PL_parser->form_lex_state);
64a40898 6086 SAVEI32(PL_lex_formbrack);
583c9d5c 6087 PL_parser->form_lex_state = PL_lex_state;
64a40898 6088 PL_lex_formbrack = PL_lex_brackets + 1;
a0d0e21e
LW
6089 goto leftbracket;
6090 }
79072805 6091 }
78cdf107
Z
6092 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6093 s--;
6094 TOKEN(0);
6095 }
6154021b 6096 pl_yylval.ival = 0;
a0d0e21e 6097 OPERATOR(ASSIGNOP);
378cc40b
LW
6098 case '!':
6099 s++;
90771dc0
NC
6100 {
6101 const char tmp = *s++;
6102 if (tmp == '=') {
6103 /* was this !=~ where !~ was meant?
6104 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6105
6106 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6107 const char *t = s+1;
6108
6109 while (t < PL_bufend && isSPACE(*t))
6110 ++t;
6111
6112 if (*t == '/' || *t == '?' ||
6113 ((*t == 'm' || *t == 's' || *t == 'y')
6114 && !isALNUM(t[1])) ||
6115 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6117 "!=~ should be !~");
6118 }
78cdf107
Z
6119 if (!PL_lex_allbrackets &&
6120 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6121 s -= 2;
6122 TOKEN(0);
6123 }
90771dc0
NC
6124 Eop(OP_NE);
6125 }
6126 if (tmp == '~')
6127 PMop(OP_NOT);
6128 }
378cc40b
LW
6129 s--;
6130 OPERATOR('!');
6131 case '<':
3280af22 6132 if (PL_expect != XOPERATOR) {
93a17b20 6133 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6134 check_uni();
79072805
LW
6135 if (s[1] == '<')
6136 s = scan_heredoc(s);
6137 else
6138 s = scan_inputsymbol(s);
78a635de
FC
6139 PL_expect = XOPERATOR;
6140 TOKEN(sublex_start());
378cc40b
LW
6141 }
6142 s++;
90771dc0
NC
6143 {
6144 char tmp = *s++;
78cdf107
Z
6145 if (tmp == '<') {
6146 if (*s == '=' && !PL_lex_allbrackets &&
6147 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6148 s -= 2;
6149 TOKEN(0);
6150 }
90771dc0 6151 SHop(OP_LEFT_SHIFT);
78cdf107 6152 }
90771dc0
NC
6153 if (tmp == '=') {
6154 tmp = *s++;
78cdf107
Z
6155 if (tmp == '>') {
6156 if (!PL_lex_allbrackets &&
6157 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6158 s -= 3;
6159 TOKEN(0);
6160 }
90771dc0 6161 Eop(OP_NCMP);
78cdf107 6162 }
90771dc0 6163 s--;
78cdf107
Z
6164 if (!PL_lex_allbrackets &&
6165 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6166 s -= 2;
6167 TOKEN(0);
6168 }
90771dc0
NC
6169 Rop(OP_LE);
6170 }
395c3793 6171 }
378cc40b 6172 s--;
78cdf107
Z
6173 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6174 s--;
6175 TOKEN(0);
6176 }
79072805 6177 Rop(OP_LT);
378cc40b
LW
6178 case '>':
6179 s++;
90771dc0
NC
6180 {
6181 const char tmp = *s++;
78cdf107
Z
6182 if (tmp == '>') {
6183 if (*s == '=' && !PL_lex_allbrackets &&
6184 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6185 s -= 2;
6186 TOKEN(0);
6187 }
90771dc0 6188 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6189 }
6190 else if (tmp == '=') {
6191 if (!PL_lex_allbrackets &&
6192 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6193 s -= 2;
6194 TOKEN(0);
6195 }
90771dc0 6196 Rop(OP_GE);
78cdf107 6197 }
90771dc0 6198 }
378cc40b 6199 s--;
78cdf107
Z
6200 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6201 s--;
6202 TOKEN(0);
6203 }
79072805 6204 Rop(OP_GT);
378cc40b
LW
6205
6206 case '$':
bbce6d69 6207 CLINE;
6208
3280af22
NIS
6209 if (PL_expect == XOPERATOR) {
6210 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6211 return deprecate_commaless_var_list();
a0d0e21e 6212 }
8990e307 6213 }
a0d0e21e 6214
c0b977fd 6215 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6216 PL_tokenbuf[0] = '@';
376b8730
SM
6217 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6218 sizeof PL_tokenbuf - 1, FALSE);
6219 if (PL_expect == XOPERATOR)
6220 no_op("Array length", s);
3280af22 6221 if (!PL_tokenbuf[1])
a0d0e21e 6222 PREREF(DOLSHARP);
3280af22 6223 PL_expect = XOPERATOR;
60ac52eb 6224 force_ident_maybe_lex('#');
463ee0b2 6225 TOKEN(DOLSHARP);
79072805 6226 }
bbce6d69 6227
3280af22 6228 PL_tokenbuf[0] = '$';
376b8730
SM
6229 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6230 sizeof PL_tokenbuf - 1, FALSE);
6231 if (PL_expect == XOPERATOR)
6232 no_op("Scalar", s);
3280af22
NIS
6233 if (!PL_tokenbuf[1]) {
6234 if (s == PL_bufend)
bbce6d69 6235 yyerror("Final $ should be \\$ or $name");
6236 PREREF('$');
8990e307 6237 }
a0d0e21e 6238
ff68c719 6239 d = s;
90771dc0
NC
6240 {
6241 const char tmp = *s;
ae28bb2a 6242 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6243 s = SKIPSPACE1(s);
ff68c719 6244
90771dc0
NC
6245 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6246 && intuit_more(s)) {
6247 if (*s == '[') {
6248 PL_tokenbuf[0] = '@';
6249 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6250 char *t = s+1;
6251
6252 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6253 t++;
90771dc0 6254 if (*t++ == ',') {
29595ff2 6255 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6256 while (t < PL_bufend && *t != ']')
6257 t++;
9014280d 6258 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6259 "Multidimensional syntax %.*s not supported",
36c7798d 6260 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6261 }
748a9306 6262 }
93a17b20 6263 }
90771dc0
NC
6264 else if (*s == '{') {
6265 char *t;
6266 PL_tokenbuf[0] = '%';
6267 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6268 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6269 {
6270 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6271 do {
6272 t++;
6273 } while (isSPACE(*t));
90771dc0 6274 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6275 STRLEN len;
90771dc0 6276 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6277 &len);
c35e046a
AL
6278 while (isSPACE(*t))
6279 t++;
4c01a014
BF
6280 if (*t == ';'
6281 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
90771dc0 6282 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4c01a014
BF
6283 "You need to quote \"%"SVf"\"",
6284 SVfARG(newSVpvn_flags(tmpbuf, len,
6285 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
90771dc0
NC
6286 }
6287 }
6288 }
93a17b20 6289 }
bbce6d69 6290
90771dc0
NC
6291 PL_expect = XOPERATOR;
6292 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6293 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6294 if (!islop || PL_last_lop_op == OP_GREPSTART)
6295 PL_expect = XOPERATOR;
6296 else if (strchr("$@\"'`q", *s))
6297 PL_expect = XTERM; /* e.g. print $fh "foo" */
6298 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6299 PL_expect = XTERM; /* e.g. print $fh &sub */
6300 else if (isIDFIRST_lazy_if(s,UTF)) {
6301 char tmpbuf[sizeof PL_tokenbuf];
6302 int t2;
6303 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6304 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6305 /* binary operators exclude handle interpretations */
6306 switch (t2) {
6307 case -KEY_x:
6308 case -KEY_eq:
6309 case -KEY_ne:
6310 case -KEY_gt:
6311 case -KEY_lt:
6312 case -KEY_ge:
6313 case -KEY_le:
6314 case -KEY_cmp:
6315 break;
6316 default:
6317 PL_expect = XTERM; /* e.g. print $fh length() */
6318 break;
6319 }
6320 }
6321 else {
6322 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6323 }
6324 }
90771dc0
NC
6325 else if (isDIGIT(*s))
6326 PL_expect = XTERM; /* e.g. print $fh 3 */
6327 else if (*s == '.' && isDIGIT(s[1]))
6328 PL_expect = XTERM; /* e.g. print $fh .3 */
6329 else if ((*s == '?' || *s == '-' || *s == '+')
6330 && !isSPACE(s[1]) && s[1] != '=')
6331 PL_expect = XTERM; /* e.g. print $fh -1 */
6332 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6333 && s[1] != '/')
6334 PL_expect = XTERM; /* e.g. print $fh /.../
6335 XXX except DORDOR operator
6336 */
6337 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6338 && s[2] != '=')
6339 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6340 }
bbce6d69 6341 }
60ac52eb 6342 force_ident_maybe_lex('$');
79072805 6343 TOKEN('$');
378cc40b
LW
6344
6345 case '@':
3280af22 6346 if (PL_expect == XOPERATOR)
bbce6d69 6347 no_op("Array", s);
3280af22
NIS
6348 PL_tokenbuf[0] = '@';
6349 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6350 if (!PL_tokenbuf[1]) {
bbce6d69 6351 PREREF('@');
6352 }
3280af22 6353 if (PL_lex_state == LEX_NORMAL)
29595ff2 6354 s = SKIPSPACE1(s);
3280af22 6355 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6356 if (*s == '{')
3280af22 6357 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6358
6359 /* Warn about @ where they meant $. */
041457d9
DM
6360 if (*s == '[' || *s == '{') {
6361 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6362 const char *t = s + 1;
7e2040f0 6363 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
b9e186cd 6364 t += UTF ? UTF8SKIP(t) : 1;
a0d0e21e
LW
6365 if (*t == '}' || *t == ']') {
6366 t++;
29595ff2 6367 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6368 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6369 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b9e186cd
BF
6370 "Scalar value %"SVf" better written as $%"SVf,
6371 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6372 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6373 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6374 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
a0d0e21e 6375 }
93a17b20
LW
6376 }
6377 }
463ee0b2 6378 }
60ac52eb
FC
6379 PL_expect = XOPERATOR;
6380 force_ident_maybe_lex('@');
79072805 6381 TERM('@');
378cc40b 6382
c963b151 6383 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6384 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6385 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6386 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6387 TOKEN(0);
6f33ba73
RGS
6388 s += 2;
6389 AOPERATOR(DORDOR);
6390 }
c963b151 6391 case '?': /* may either be conditional or pattern */
be25f609 6392 if (PL_expect == XOPERATOR) {
90771dc0 6393 char tmp = *s++;
c963b151 6394 if(tmp == '?') {
78cdf107
Z
6395 if (!PL_lex_allbrackets &&
6396 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6397 s--;
6398 TOKEN(0);
6399 }
6400 PL_lex_allbrackets++;
be25f609 6401 OPERATOR('?');
c963b151
BD
6402 }
6403 else {
6404 tmp = *s++;
6405 if(tmp == '/') {
6406 /* A // operator. */
78cdf107
Z
6407 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6408 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6409 LEX_FAKEEOF_LOGIC)) {
6410 s -= 2;
6411 TOKEN(0);
6412 }
c963b151
BD
6413 AOPERATOR(DORDOR);
6414 }
6415 else {
6416 s--;
78cdf107
Z
6417 if (*s == '=' && !PL_lex_allbrackets &&
6418 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6419 s--;
6420 TOKEN(0);
6421 }
c963b151
BD
6422 Mop(OP_DIVIDE);
6423 }
6424 }
6425 }
6426 else {
6427 /* Disable warning on "study /blah/" */
6428 if (PL_oldoldbufptr == PL_last_uni
6429 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6430 || memNE(PL_last_uni, "study", 5)
6431 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6432 ))
6433 check_uni();
725a61d7
Z
6434 if (*s == '?')
6435 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6436 s = scan_pat(s,OP_MATCH);
6437 TERM(sublex_start());
6438 }
378cc40b
LW
6439
6440 case '.':
51882d45
GS
6441 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6442#ifdef PERL_STRICT_CR
6443 && s[1] == '\n'
6444#else
6445 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6446#endif
6447 && (s == PL_linestart || s[-1] == '\n') )
6448 {
3280af22 6449 PL_expect = XSTATE;
705fe0e5 6450 formbrack = 2; /* dot seen where arguments expected */
79072805
LW
6451 goto rightbracket;
6452 }
be25f609 6453 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6454 s += 3;
6455 OPERATOR(YADAYADA);
6456 }
3280af22 6457 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6458 char tmp = *s++;
a687059c 6459 if (*s == tmp) {
78cdf107
Z
6460 if (!PL_lex_allbrackets &&
6461 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6462 s--;
6463 TOKEN(0);
6464 }
a687059c 6465 s++;
2f3197b3
LW
6466 if (*s == tmp) {
6467 s++;
6154021b 6468 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6469 }
6470 else
6154021b 6471 pl_yylval.ival = 0;
378cc40b 6472 OPERATOR(DOTDOT);
a687059c 6473 }
78cdf107
Z
6474 if (*s == '=' && !PL_lex_allbrackets &&
6475 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6476 s--;
6477 TOKEN(0);
6478 }
79072805 6479 Aop(OP_CONCAT);
378cc40b
LW
6480 }
6481 /* FALL THROUGH */
6482 case '0': case '1': case '2': case '3': case '4':
6483 case '5': case '6': case '7': case '8': case '9':
6154021b 6484 s = scan_num(s, &pl_yylval);
931e0695 6485 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6486 if (PL_expect == XOPERATOR)
8990e307 6487 no_op("Number",s);
79072805
LW
6488 TERM(THING);
6489
6490 case '\'':
d24ca0c5 6491 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6492 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6493 if (PL_expect == XOPERATOR) {
6494 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6495 return deprecate_commaless_var_list();
a0d0e21e 6496 }
463ee0b2 6497 else
8990e307 6498 no_op("String",s);
463ee0b2 6499 }
79072805 6500 if (!s)
d4c19fe8 6501 missingterm(NULL);
6154021b 6502 pl_yylval.ival = OP_CONST;
79072805
LW
6503 TERM(sublex_start());
6504
6505 case '"':
d24ca0c5 6506 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6507 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6508 if (PL_expect == XOPERATOR) {
6509 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6510 return deprecate_commaless_var_list();
a0d0e21e 6511 }
463ee0b2 6512 else
8990e307 6513 no_op("String",s);
463ee0b2 6514 }
79072805 6515 if (!s)
d4c19fe8 6516 missingterm(NULL);
6154021b 6517 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6518 /* FIXME. I think that this can be const if char *d is replaced by
6519 more localised variables. */
3280af22 6520 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6521 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6522 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6523 break;
6524 }
6525 }
79072805
LW
6526 TERM(sublex_start());
6527
6528 case '`':
d24ca0c5 6529 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6530 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6531 if (PL_expect == XOPERATOR)
8990e307 6532 no_op("Backticks",s);
79072805 6533 if (!s)
d4c19fe8 6534 missingterm(NULL);
9b201d7d 6535 readpipe_override();
79072805
LW
6536 TERM(sublex_start());
6537
6538 case '\\':
6539 s++;
a2a5de95
NC
6540 if (PL_lex_inwhat && isDIGIT(*s))
6541 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6542 *s, *s);
3280af22 6543 if (PL_expect == XOPERATOR)
8990e307 6544 no_op("Backslash",s);
79072805
LW
6545 OPERATOR(REFGEN);
6546
a7cb1f99 6547 case 'v':
e526c9e6 6548 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6549 char *start = s + 2;
dd629d5b 6550 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6551 start++;
6552 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6553 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6554 TERM(THING);
6555 }
e9d2327d
FC
6556 else if ((*start == ':' && start[1] == ':')
6557 || (PL_expect == XSTATE && *start == ':'))
6558 goto keylookup;
6559 else if (PL_expect == XSTATE) {
6560 d = start;
6561 while (d < PL_bufend && isSPACE(*d)) d++;
6562 if (*d == ':') goto keylookup;
6563 }
e526c9e6 6564 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
e9d2327d 6565 if (!isALPHA(*start) && (PL_expect == XTERM
6f33ba73
RGS
6566 || PL_expect == XREF || PL_expect == XSTATE
6567 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6568 GV *const gv = gv_fetchpvn_flags(s, start - s,
6569 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6570 if (!gv) {
6154021b 6571 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6572 TERM(THING);
6573 }
6574 }
a7cb1f99
GS
6575 }
6576 goto keylookup;
79072805 6577 case 'x':
3280af22 6578 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6579 s++;
6580 Mop(OP_REPEAT);
2f3197b3 6581 }
79072805
LW
6582 goto keylookup;
6583
378cc40b 6584 case '_':
79072805
LW
6585 case 'a': case 'A':
6586 case 'b': case 'B':
6587 case 'c': case 'C':
6588 case 'd': case 'D':
6589 case 'e': case 'E':
6590 case 'f': case 'F':
6591 case 'g': case 'G':
6592 case 'h': case 'H':
6593 case 'i': case 'I':
6594 case 'j': case 'J':
6595 case 'k': case 'K':
6596 case 'l': case 'L':
6597 case 'm': case 'M':
6598 case 'n': case 'N':
6599 case 'o': case 'O':
6600 case 'p': case 'P':
6601 case 'q': case 'Q':
6602 case 'r': case 'R':
6603 case 's': case 'S':
6604 case 't': case 'T':
6605 case 'u': case 'U':
a7cb1f99 6606 case 'V':
79072805
LW
6607 case 'w': case 'W':
6608 case 'X':
6609 case 'y': case 'Y':
6610 case 'z': case 'Z':
6611
49dc05e3 6612 keylookup: {
88e1f1a2 6613 bool anydelim;
18f70389 6614 bool lex;
90771dc0 6615 I32 tmp;
18f70389 6616 SV *sv;
73f3e228
FC
6617 CV *cv;
6618 PADOFFSET off;
6619 OP *rv2cv_op;
10edeb5d 6620
18f70389 6621 lex = FALSE;
10edeb5d 6622 orig_keyword = 0;
73f3e228 6623 off = 0;
18f70389 6624 sv = NULL;
73f3e228 6625 cv = NULL;
10edeb5d
JH
6626 gv = NULL;
6627 gvp = NULL;
73f3e228 6628 rv2cv_op = NULL;
49dc05e3 6629
3280af22
NIS
6630 PL_bufptr = s;
6631 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6632
6633 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6634 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6635
6636 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6637 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6638 goto just_a_word;
6639
3643fb5f 6640 d = s;
3280af22 6641 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6642 d++; /* no comments skipped here, or s### is misparsed */
6643
748a9306 6644 /* Is this a word before a => operator? */
1c3923b3 6645 if (*d == '=' && d[1] == '>') {
748a9306 6646 CLINE;
6154021b 6647 pl_yylval.opval
d0a148a6
NC
6648 = (OP*)newSVOP(OP_CONST, 0,
6649 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6650 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6651 TERM(WORD);
6652 }
6653
88e1f1a2
JV
6654 /* Check for plugged-in keyword */
6655 {
6656 OP *o;
6657 int result;
6658 char *saved_bufptr = PL_bufptr;
6659 PL_bufptr = s;
16c91539 6660 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6661 s = PL_bufptr;
6662 if (result == KEYWORD_PLUGIN_DECLINE) {
6663 /* not a plugged-in keyword */
6664 PL_bufptr = saved_bufptr;
6665 } else if (result == KEYWORD_PLUGIN_STMT) {
6666 pl_yylval.opval = o;
6667 CLINE;
6668 PL_expect = XSTATE;
6669 return REPORT(PLUGSTMT);
6670 } else if (result == KEYWORD_PLUGIN_EXPR) {
6671 pl_yylval.opval = o;
6672 CLINE;
6673 PL_expect = XOPERATOR;
6674 return REPORT(PLUGEXPR);
6675 } else {
6676 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6677 PL_tokenbuf);
6678 }
6679 }
6680
6681 /* Check for built-in keyword */
6682 tmp = keyword(PL_tokenbuf, len, 0);
6683
6684 /* Is this a label? */
6685 if (!anydelim && PL_expect == XSTATE
6686 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2 6687 s = d + 1;
5db1eb8d
BF
6688 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6689 newSVpvn_flags(PL_tokenbuf,
6690 len, UTF ? SVf_UTF8 : 0));
88e1f1a2
JV
6691 CLINE;
6692 TOKEN(LABEL);
6693 }
6694
18f70389
FC
6695 /* Check for lexical sub */
6696 if (PL_expect != XOPERATOR) {
6697 char tmpbuf[sizeof PL_tokenbuf + 1];
18f70389
FC
6698 *tmpbuf = '&';
6699 Copy(PL_tokenbuf, tmpbuf+1, len, char);
6700 off = pad_findmy_pvn(tmpbuf, len+1, UTF ? SVf_UTF8 : 0);
6701 if (off != NOT_IN_PAD) {
73f3e228 6702 assert(off); /* we assume this is boolean-true below */
18f70389
FC
6703 if (PAD_COMPNAME_FLAGS_isOUR(off)) {
6704 HV * const stash = PAD_COMPNAME_OURSTASH(off);
6705 HEK * const stashname = HvNAME_HEK(stash);
6706 sv = newSVhek(stashname);
6707 sv_catpvs(sv, "::");
6708 sv_catpvn_flags(sv, PL_tokenbuf, len,
6709 (UTF ? SV_CATUTF8 : SV_CATBYTES));
6710 gv = gv_fetchsv(sv, GV_NOADD_NOINIT | SvUTF8(sv),
6711 SVt_PVCV);
73f3e228 6712 off = 0;
18f70389 6713 }
73f3e228
FC
6714 else {
6715 rv2cv_op = newOP(OP_PADANY, 0);
6716 rv2cv_op->op_targ = off;
6717 rv2cv_op = (OP*)newCVREF(0, rv2cv_op);
6718 cv = (CV *)PAD_SV(off);
6719 }
6720 lex = TRUE;
6721 goto just_a_word;
18f70389 6722 }
73f3e228 6723 off = 0;
18f70389
FC
6724 }
6725
a0d0e21e 6726 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6727 GV *ogv = NULL; /* override (winner) */
6728 GV *hgv = NULL; /* hidden (loser) */
3280af22 6729 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6730 CV *cv;
af9f5953
BF
6731 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6732 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6733 (cv = GvCVu(gv)))
6734 {
6735 if (GvIMPORTED_CV(gv))
6736 ogv = gv;
6737 else if (! CvMETHOD(cv))
6738 hgv = gv;
6739 }
6740 if (!ogv &&
af9f5953 6741 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6742 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6743 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6744 GvCVu(gv) && GvIMPORTED_CV(gv))
6745 {
6746 ogv = gv;
6747 }
6748 }
6749 if (ogv) {
30fe34ed 6750 orig_keyword = tmp;
56f7f34b 6751 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6752 }
6753 else if (gv && !gvp
6754 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6755 && GvCVu(gv))
6e7b2336
GS
6756 {
6757 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6758 }
56f7f34b
CS
6759 else { /* no override */
6760 tmp = -tmp;
a2a5de95
NC
6761 if (tmp == KEY_dump) {
6762 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6763 "dump() better written as CORE::dump()");
ac206dc8 6764 }
a0714e2c 6765 gv = NULL;
56f7f34b 6766 gvp = 0;
a2a5de95
NC
6767 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6768 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6769 "Ambiguous call resolved as CORE::%s(), "
6770 "qualify as such or use &",
6771 GvENAME(hgv));
49dc05e3 6772 }
a0d0e21e
LW
6773 }
6774
6775 reserved_word:
6776 switch (tmp) {
79072805
LW
6777
6778 default: /* not a keyword */
0bfa2a8a
NC
6779 /* Trade off - by using this evil construction we can pull the
6780 variable gv into the block labelled keylookup. If not, then
6781 we have to give it function scope so that the goto from the
6782 earlier ':' case doesn't bypass the initialisation. */
6783 if (0) {
6784 just_a_word_zero_gv:
73f3e228
FC
6785 sv = NULL;
6786 cv = NULL;
0bfa2a8a
NC
6787 gv = NULL;
6788 gvp = NULL;
73f3e228 6789 rv2cv_op = NULL;
8bee0991 6790 orig_keyword = 0;
18f70389
FC
6791 lex = 0;
6792 off = 0;
0bfa2a8a 6793 }
93a17b20 6794 just_a_word: {
ce29ac45 6795 int pkgname = 0;
f54cb97a 6796 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
898c3bca
FC
6797 const char penultchar =
6798 lastchar && PL_bufptr - 2 >= PL_linestart
6799 ? PL_bufptr[-2]
6800 : 0;
5db06880 6801#ifdef PERL_MAD
cd81e915 6802 SV *nextPL_nextwhite = 0;
5db06880
NC
6803#endif
6804
8990e307
LW
6805
6806 /* Get the rest if it looks like a package qualifier */
6807
155aba94 6808 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6809 STRLEN morelen;
3280af22 6810 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6811 TRUE, &morelen);
6812 if (!morelen)
86fe3f36
BF
6813 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6814 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6815 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
ec2ab091 6816 *s == '\'' ? "'" : "::");
c3e0f903 6817 len += morelen;
ce29ac45 6818 pkgname = 1;
a0d0e21e 6819 }
8990e307 6820
3280af22
NIS
6821 if (PL_expect == XOPERATOR) {
6822 if (PL_bufptr == PL_linestart) {
57843af0 6823 CopLINE_dec(PL_curcop);
f1f66076 6824 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6825 CopLINE_inc(PL_curcop);
463ee0b2
LW
6826 }
6827 else
54310121 6828 no_op("Bareword",s);
463ee0b2 6829 }
8990e307 6830
c3e0f903 6831 /* Look for a subroutine with this name in current package,
73f3e228
FC
6832 unless this is a lexical sub, or name is "Foo::",
6833 in which case Foo is a bareword
c3e0f903
GS
6834 (and a package name). */
6835
5db06880 6836 if (len > 2 && !PL_madskills &&
3280af22 6837 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6838 {
f776e3cd 6839 if (ckWARN(WARN_BAREWORD)
af9f5953 6840 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 6841 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
979a1401
BF
6842 "Bareword \"%"SVf"\" refers to nonexistent package",
6843 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6844 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
c3e0f903 6845 len -= 2;
3280af22 6846 PL_tokenbuf[len] = '\0';
a0714e2c 6847 gv = NULL;
c3e0f903
GS
6848 gvp = 0;
6849 }
6850 else {
73f3e228 6851 if (!lex && !gv) {
62d55b22
NC
6852 /* Mustn't actually add anything to a symbol table.
6853 But also don't want to "initialise" any placeholder
6854 constants that might already be there into full
6855 blown PVGVs with attached PVCV. */
90e5519e 6856 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
6857 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6858 SVt_PVCV);
62d55b22 6859 }
b3d904f3 6860 len = 0;
c3e0f903
GS
6861 }
6862
6863 /* if we saw a global override before, get the right name */
8990e307 6864
73f3e228 6865 if (!sv)
18f70389 6866 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
37bb7629 6867 len ? len : strlen(PL_tokenbuf));
49dc05e3 6868 if (gvp) {
37bb7629 6869 SV * const tmp_sv = sv;
396482e1 6870 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6871 sv_catsv(sv, tmp_sv);
6872 SvREFCNT_dec(tmp_sv);
8a7a129d 6873 }
37bb7629 6874
5db06880 6875#ifdef PERL_MAD
cd81e915
NC
6876 if (PL_madskills && !PL_thistoken) {
6877 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6878 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6879 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6880 }
6881#endif
8990e307 6882
a0d0e21e 6883 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6884 CLINE;
6154021b
RGS
6885 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6886 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6887
c3e0f903 6888 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6889 if (len)
6890 goto safe_bareword;
6891
73f3e228 6892 if (!off)
f7461760 6893 {
d8ebba9f 6894 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6895 const_op->op_private = OPpCONST_BARE;
6896 rv2cv_op = newCVREF(0, const_op);
73f3e228 6897 cv = lex ? GvCV(gv) : rv2cv_op_cv(rv2cv_op, 0);
f7461760 6898 }
5069cc75 6899
8990e307
LW
6900 /* See if it's the indirect object for a list operator. */
6901
3280af22
NIS
6902 if (PL_oldoldbufptr &&
6903 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6904 (PL_oldoldbufptr == PL_last_lop
6905 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6906 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6907 (PL_expect == XREF ||
6908 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6909 {
748a9306
LW
6910 bool immediate_paren = *s == '(';
6911
a0d0e21e 6912 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6913 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6914#ifdef PERL_MAD
cd81e915 6915 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6916#endif
a0d0e21e
LW
6917
6918 /* Two barewords in a row may indicate method call. */
6919
62d55b22 6920 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6921 (tmp = intuit_method(s, gv, cv))) {
6922 op_free(rv2cv_op);
78cdf107
Z
6923 if (tmp == METHOD && !PL_lex_allbrackets &&
6924 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6925 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6926 return REPORT(tmp);
f7461760 6927 }
a0d0e21e
LW
6928
6929 /* If not a declared subroutine, it's an indirect object. */
6930 /* (But it's an indir obj regardless for sort.) */
7294df96 6931 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6932
7294df96
RGS
6933 if (
6934 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6935 (!cv &&
a9ef352a 6936 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6937 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6938 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6939 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6940 )
a9ef352a 6941 {
3280af22 6942 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6943 goto bareword;
93a17b20
LW
6944 }
6945 }
8990e307 6946
3280af22 6947 PL_expect = XOPERATOR;
5db06880
NC
6948#ifdef PERL_MAD
6949 if (isSPACE(*s))
cd81e915
NC
6950 s = SKIPSPACE2(s,nextPL_nextwhite);
6951 PL_nextwhite = nextPL_nextwhite;
5db06880 6952#else
8990e307 6953 s = skipspace(s);
5db06880 6954#endif
1c3923b3
GS
6955
6956 /* Is this a word before a => operator? */
ce29ac45 6957 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6958 op_free(rv2cv_op);
1c3923b3 6959 CLINE;
6154021b 6960 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6961 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6962 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6963 TERM(WORD);
6964 }
6965
6966 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6967 if (*s == '(') {
79072805 6968 CLINE;
5069cc75 6969 if (cv) {
c35e046a
AL
6970 d = s + 1;
6971 while (SPACE_OR_TAB(*d))
6972 d++;
f7461760 6973 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6974 s = d + 1;
c631f32b 6975 goto its_constant;
96e4d5b1 6976 }
6977 }
5db06880
NC
6978#ifdef PERL_MAD
6979 if (PL_madskills) {
cd81e915
NC
6980 PL_nextwhite = PL_thiswhite;
6981 PL_thiswhite = 0;
5db06880 6982 }
cd81e915 6983 start_force(PL_curforce);
5db06880 6984#endif
73f3e228
FC
6985 NEXTVAL_NEXTTOKE.opval =
6986 off ? rv2cv_op : pl_yylval.opval;
3280af22 6987 PL_expect = XOPERATOR;
5db06880
NC
6988#ifdef PERL_MAD
6989 if (PL_madskills) {
cd81e915
NC
6990 PL_nextwhite = nextPL_nextwhite;
6991 curmad('X', PL_thistoken);
6b29d1f5 6992 PL_thistoken = newSVpvs("");
5db06880
NC
6993 }
6994#endif
73f3e228
FC
6995 if (off)
6996 op_free(pl_yylval.opval), force_next(PRIVATEREF);
6997 else op_free(rv2cv_op), force_next(WORD);
6154021b 6998 pl_yylval.ival = 0;
463ee0b2 6999 TOKEN('&');
79072805 7000 }
93a17b20 7001
a0d0e21e 7002 /* If followed by var or block, call it a method (unless sub) */
8990e307 7003
f7461760
Z
7004 if ((*s == '$' || *s == '{') && !cv) {
7005 op_free(rv2cv_op);
3280af22
NIS
7006 PL_last_lop = PL_oldbufptr;
7007 PL_last_lop_op = OP_METHOD;
78cdf107
Z
7008 if (!PL_lex_allbrackets &&
7009 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7010 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 7011 PREBLOCK(METHOD);
463ee0b2
LW
7012 }
7013
8990e307
LW
7014 /* If followed by a bareword, see if it looks like indir obj. */
7015
30fe34ed
RGS
7016 if (!orig_keyword
7017 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
7018 && (tmp = intuit_method(s, gv, cv))) {
7019 op_free(rv2cv_op);
78cdf107
Z
7020 if (tmp == METHOD && !PL_lex_allbrackets &&
7021 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7022 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 7023 return REPORT(tmp);
f7461760 7024 }
93a17b20 7025
8990e307
LW
7026 /* Not a method, so call it a subroutine (if defined) */
7027
5069cc75 7028 if (cv) {
898c3bca 7029 if (lastchar == '-' && penultchar != '-') {
43b5ab4c
BF
7030 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
7031 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7032 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
7033 SVfARG(tmpsv), SVfARG(tmpsv));
7034 }
89bfa8cd 7035 /* Check for a constant sub */
f7461760 7036 if ((sv = cv_const_sv(cv))) {
96e4d5b1 7037 its_constant:
f7461760 7038 op_free(rv2cv_op);
6154021b
RGS
7039 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
7040 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
cc2ebcd7 7041 pl_yylval.opval->op_private = OPpCONST_FOLDED;
6b7c6d95 7042 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 7043 TOKEN(WORD);
89bfa8cd 7044 }
7045
6154021b 7046 op_free(pl_yylval.opval);
f7461760 7047 pl_yylval.opval = rv2cv_op;
6154021b 7048 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 7049 PL_last_lop = PL_oldbufptr;
bf848113 7050 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 7051 /* Is there a prototype? */
5db06880
NC
7052 if (
7053#ifdef PERL_MAD
7054 cv &&
7055#endif
d9f2850e
RGS
7056 SvPOK(cv))
7057 {
8fa6a409
FC
7058 STRLEN protolen = CvPROTOLEN(cv);
7059 const char *proto = CvPROTO(cv);
b5fb7ce3 7060 bool optional;
5f66b61c 7061 if (!protolen)
4633a7c4 7062 TERM(FUNC0SUB);
b5fb7ce3
FC
7063 if ((optional = *proto == ';'))
7064 do
0f5d0394 7065 proto++;
b5fb7ce3 7066 while (*proto == ';');
649d02de
FC
7067 if (
7068 (
7069 (
7070 *proto == '$' || *proto == '_'
c035a075 7071 || *proto == '*' || *proto == '+'
649d02de
FC
7072 )
7073 && proto[1] == '\0'
7074 )
7075 || (
7076 *proto == '\\' && proto[1] && proto[2] == '\0'
7077 )
7078 )
b5fb7ce3 7079 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
7080 if (*proto == '\\' && proto[1] == '[') {
7081 const char *p = proto + 2;
7082 while(*p && *p != ']')
7083 ++p;
b5fb7ce3
FC
7084 if(*p == ']' && !p[1])
7085 UNIPROTO(UNIOPSUB,optional);
649d02de 7086 }
7a52d87a 7087 if (*proto == '&' && *s == '{') {
49a54bbe
NC
7088 if (PL_curstash)
7089 sv_setpvs(PL_subname, "__ANON__");
7090 else
7091 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
7092 if (!PL_lex_allbrackets &&
7093 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7094 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
7095 PREBLOCK(LSTOPSUB);
7096 }
a9ef352a 7097 }
5db06880
NC
7098#ifdef PERL_MAD
7099 {
7100 if (PL_madskills) {
cd81e915
NC
7101 PL_nextwhite = PL_thiswhite;
7102 PL_thiswhite = 0;
5db06880 7103 }
cd81e915 7104 start_force(PL_curforce);
6154021b 7105 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
7106 PL_expect = XTERM;
7107 if (PL_madskills) {
cd81e915
NC
7108 PL_nextwhite = nextPL_nextwhite;
7109 curmad('X', PL_thistoken);
6b29d1f5 7110 PL_thistoken = newSVpvs("");
5db06880 7111 }
73f3e228 7112 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7113 if (!PL_lex_allbrackets &&
7114 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7115 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7116 TOKEN(NOAMP);
7117 }
7118 }
7119
7120 /* Guess harder when madskills require "best effort". */
7121 if (PL_madskills && (!gv || !GvCVu(gv))) {
7122 int probable_sub = 0;
7123 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7124 probable_sub = 1;
7125 else if (isALPHA(*s)) {
7126 char tmpbuf[1024];
7127 STRLEN tmplen;
7128 d = s;
7129 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 7130 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
7131 probable_sub = 1;
7132 else {
7133 while (d < PL_bufend && isSPACE(*d))
7134 d++;
7135 if (*d == '=' && d[1] == '>')
7136 probable_sub = 1;
7137 }
7138 }
7139 if (probable_sub) {
af9f5953
BF
7140 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7141 SVt_PVCV);
6154021b 7142 op_free(pl_yylval.opval);
f7461760 7143 pl_yylval.opval = rv2cv_op;
6154021b 7144 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
7145 PL_last_lop = PL_oldbufptr;
7146 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
7147 PL_nextwhite = PL_thiswhite;
7148 PL_thiswhite = 0;
7149 start_force(PL_curforce);
6154021b 7150 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 7151 PL_expect = XTERM;
cd81e915
NC
7152 PL_nextwhite = nextPL_nextwhite;
7153 curmad('X', PL_thistoken);
6b29d1f5 7154 PL_thistoken = newSVpvs("");
73f3e228 7155 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7156 if (!PL_lex_allbrackets &&
7157 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7158 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7159 TOKEN(NOAMP);
7160 }
7161#else
6154021b 7162 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 7163 PL_expect = XTERM;
73f3e228 7164 force_next(off ? PRIVATEREF : WORD);
78cdf107
Z
7165 if (!PL_lex_allbrackets &&
7166 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7167 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 7168 TOKEN(NOAMP);
5db06880 7169#endif
8990e307 7170 }
748a9306 7171
8990e307
LW
7172 /* Call it a bare word */
7173
5603f27d 7174 if (PL_hints & HINT_STRICT_SUBS)
6154021b 7175 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 7176 else {
9a073a1d
RGS
7177 bareword:
7178 /* after "print" and similar functions (corresponding to
7179 * "F? L" in opcode.pl), whatever wasn't already parsed as
7180 * a filehandle should be subject to "strict subs".
7181 * Likewise for the optional indirect-object argument to system
7182 * or exec, which can't be a bareword */
7183 if ((PL_last_lop_op == OP_PRINT
7184 || PL_last_lop_op == OP_PRTF
7185 || PL_last_lop_op == OP_SAY
7186 || PL_last_lop_op == OP_SYSTEM
7187 || PL_last_lop_op == OP_EXEC)
7188 && (PL_hints & HINT_STRICT_SUBS))
7189 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7190 if (lastchar != '-') {
7191 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7192 d = PL_tokenbuf;
7193 while (isLOWER(*d))
7194 d++;
af9f5953 7195 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7196 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7197 PL_tokenbuf);
7198 }
748a9306
LW
7199 }
7200 }
f7461760 7201 op_free(rv2cv_op);
c3e0f903
GS
7202
7203 safe_bareword:
9b387841
NC
7204 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7205 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
02571fe8
BF
7206 "Operator or semicolon missing before %c%"SVf,
7207 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7208 strlen(PL_tokenbuf),
7209 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
9b387841
NC
7210 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7211 "Ambiguous use of %c resolved as operator %c",
7212 lastchar, lastchar);
748a9306 7213 }
93a17b20 7214 TOKEN(WORD);
79072805 7215 }
79072805 7216
68dc0745 7217 case KEY___FILE__:
7eb971ee 7218 FUN0OP(
14f0f125 7219 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7220 );
46fc3d4c 7221
79072805 7222 case KEY___LINE__:
7eb971ee
FC
7223 FUN0OP(
7224 (OP*)newSVOP(OP_CONST, 0,
7225 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7226 );
68dc0745 7227
7228 case KEY___PACKAGE__:
7eb971ee
FC
7229 FUN0OP(
7230 (OP*)newSVOP(OP_CONST, 0,
3280af22 7231 (PL_curstash
5aaec2b4 7232 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7233 : &PL_sv_undef))
7234 );
79072805 7235
e50aee73 7236 case KEY___DATA__:
79072805
LW
7237 case KEY___END__: {
7238 GV *gv;
3280af22 7239 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7240 const char *pname = "main";
affc13fc
FC
7241 STRLEN plen = 4;
7242 U32 putf8 = 0;
3280af22 7243 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7244 {
7245 HV * const stash =
7246 PL_curstash ? PL_curstash : PL_defstash;
7247 pname = HvNAME_get(stash);
7248 plen = HvNAMELEN (stash);
7249 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7250 }
7251 gv = gv_fetchpvn_flags(
7252 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7253 plen+6, GV_ADD|putf8, SVt_PVIO
7254 );
a5f75d66 7255 GvMULTI_on(gv);
79072805 7256 if (!GvIO(gv))
a0d0e21e 7257 GvIOp(gv) = newIO();
3280af22 7258 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7259#if defined(HAS_FCNTL) && defined(F_SETFD)
7260 {
f54cb97a 7261 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7262 fcntl(fd,F_SETFD,fd >= 3);
7263 }
79072805 7264#endif
fd049845 7265 /* Mark this internal pseudo-handle as clean */
7266 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7267 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7268 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7269 else
50952442 7270 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7271#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7272 /* if the script was opened in binmode, we need to revert
53129d29 7273 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7274 * XXX this is a questionable hack at best. */
53129d29
GS
7275 if (PL_bufend-PL_bufptr > 2
7276 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7277 {
7278 Off_t loc = 0;
50952442 7279 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7280 loc = PerlIO_tell(PL_rsfp);
7281 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7282 }
2986a63f
JH
7283#ifdef NETWARE
7284 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7285#else
c39cd008 7286 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7287#endif /* NETWARE */
c39cd008
GS
7288 if (loc > 0)
7289 PerlIO_seek(PL_rsfp, loc, 0);
7290 }
7291 }
7292#endif
7948272d 7293#ifdef PERLIO_LAYERS
52d2e0f4
JH
7294 if (!IN_BYTES) {
7295 if (UTF)
7296 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7297 else if (PL_encoding) {
7298 SV *name;
7299 dSP;
7300 ENTER;
7301 SAVETMPS;
7302 PUSHMARK(sp);
7303 EXTEND(SP, 1);
7304 XPUSHs(PL_encoding);
7305 PUTBACK;
7306 call_method("name", G_SCALAR);
7307 SPAGAIN;
7308 name = POPs;
7309 PUTBACK;
bfed75c6 7310 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7311 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7312 SVfARG(name)));
52d2e0f4
JH
7313 FREETMPS;
7314 LEAVE;
7315 }
7316 }
7948272d 7317#endif
5db06880
NC
7318#ifdef PERL_MAD
7319 if (PL_madskills) {
cd81e915
NC
7320 if (PL_realtokenstart >= 0) {
7321 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7322 if (!PL_endwhite)
6b29d1f5 7323 PL_endwhite = newSVpvs("");
cd81e915
NC
7324 sv_catsv(PL_endwhite, PL_thiswhite);
7325 PL_thiswhite = 0;
7326 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7327 PL_realtokenstart = -1;
5db06880 7328 }
5cc814fd
NC
7329 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7330 != NULL) ;
5db06880
NC
7331 }
7332#endif
4608196e 7333 PL_rsfp = NULL;
79072805
LW
7334 }
7335 goto fake_eof;
e929a76b 7336 }
de3bb511 7337
84ed0108 7338 case KEY___SUB__:
1a35f9ff 7339 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7340
8990e307 7341 case KEY_AUTOLOAD:
ed6116ce 7342 case KEY_DESTROY:
79072805 7343 case KEY_BEGIN:
3c10abe3 7344 case KEY_UNITCHECK:
7d30b5c4 7345 case KEY_CHECK:
7d07dbc2 7346 case KEY_INIT:
7d30b5c4 7347 case KEY_END:
3280af22
NIS
7348 if (PL_expect == XSTATE) {
7349 s = PL_bufptr;
93a17b20 7350 goto really_sub;
79072805
LW
7351 }
7352 goto just_a_word;
7353
a0d0e21e
LW
7354 case KEY_CORE:
7355 if (*s == ':' && s[1] == ':') {
ee36fb64 7356 STRLEN olen = len;
748a9306 7357 d = s;
ee36fb64 7358 s += 2;
3280af22 7359 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
ee36fb64
FC
7360 if ((*s == ':' && s[1] == ':')
7361 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7362 {
7363 s = d;
7364 len = olen;
7365 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7366 goto just_a_word;
7367 }
7368 if (!tmp)
3773592b
BF
7369 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7370 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7371 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
a0d0e21e
LW
7372 if (tmp < 0)
7373 tmp = -tmp;
d67594ff
FC
7374 else if (tmp == KEY_require || tmp == KEY_do
7375 || tmp == KEY_glob)
a72a1c8b 7376 /* that's a way to remember we saw "CORE::" */
850e8516 7377 orig_keyword = tmp;
a0d0e21e
LW
7378 goto reserved_word;
7379 }
7380 goto just_a_word;
7381
463ee0b2
LW
7382 case KEY_abs:
7383 UNI(OP_ABS);
7384
79072805
LW
7385 case KEY_alarm:
7386 UNI(OP_ALARM);
7387
7388 case KEY_accept:
a0d0e21e 7389 LOP(OP_ACCEPT,XTERM);
79072805 7390
463ee0b2 7391 case KEY_and:
78cdf107
Z
7392 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7393 return REPORT(0);
463ee0b2
LW
7394 OPERATOR(ANDOP);
7395
79072805 7396 case KEY_atan2:
a0d0e21e 7397 LOP(OP_ATAN2,XTERM);
85e6fe83 7398
79072805 7399 case KEY_bind:
a0d0e21e 7400 LOP(OP_BIND,XTERM);
79072805
LW
7401
7402 case KEY_binmode:
1c1fc3ea 7403 LOP(OP_BINMODE,XTERM);
79072805
LW
7404
7405 case KEY_bless:
a0d0e21e 7406 LOP(OP_BLESS,XTERM);
79072805 7407
0d863452
RH
7408 case KEY_break:
7409 FUN0(OP_BREAK);
7410
79072805
LW
7411 case KEY_chop:
7412 UNI(OP_CHOP);
7413
7414 case KEY_continue:
0d863452
RH
7415 /* We have to disambiguate the two senses of
7416 "continue". If the next token is a '{' then
7417 treat it as the start of a continue block;
7418 otherwise treat it as a control operator.
7419 */
7420 s = skipspace(s);
7421 if (*s == '{')
79072805 7422 PREBLOCK(CONTINUE);
0d863452
RH
7423 else
7424 FUN0(OP_CONTINUE);
79072805
LW
7425
7426 case KEY_chdir:
fafc274c
NC
7427 /* may use HOME */
7428 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7429 UNI(OP_CHDIR);
7430
7431 case KEY_close:
7432 UNI(OP_CLOSE);
7433
7434 case KEY_closedir:
7435 UNI(OP_CLOSEDIR);
7436
7437 case KEY_cmp:
78cdf107
Z
7438 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7439 return REPORT(0);
79072805
LW
7440 Eop(OP_SCMP);
7441
7442 case KEY_caller:
7443 UNI(OP_CALLER);
7444
7445 case KEY_crypt:
7446#ifdef FCRYPT
f4c556ac
GS
7447 if (!PL_cryptseen) {
7448 PL_cryptseen = TRUE;
de3bb511 7449 init_des();
f4c556ac 7450 }
a687059c 7451#endif
a0d0e21e 7452 LOP(OP_CRYPT,XTERM);
79072805
LW
7453
7454 case KEY_chmod:
a0d0e21e 7455 LOP(OP_CHMOD,XTERM);
79072805
LW
7456
7457 case KEY_chown:
a0d0e21e 7458 LOP(OP_CHOWN,XTERM);
79072805
LW
7459
7460 case KEY_connect:
a0d0e21e 7461 LOP(OP_CONNECT,XTERM);
79072805 7462
463ee0b2
LW
7463 case KEY_chr:
7464 UNI(OP_CHR);
7465
79072805
LW
7466 case KEY_cos:
7467 UNI(OP_COS);
7468
7469 case KEY_chroot:
7470 UNI(OP_CHROOT);
7471
0d863452
RH
7472 case KEY_default:
7473 PREBLOCK(DEFAULT);
7474
79072805 7475 case KEY_do:
29595ff2 7476 s = SKIPSPACE1(s);
79072805 7477 if (*s == '{')
a0d0e21e 7478 PRETERMBLOCK(DO);
c2900bb8 7479 if (*s != '\'') {
4b473a5a
FC
7480 *PL_tokenbuf = '&';
7481 d = scan_word(s, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
7482 1, &len);
7483 if (len && !keyword(PL_tokenbuf + 1, len, 0)) {
c2900bb8 7484 d = SKIPSPACE1(d);
4b473a5a 7485 if (*d == '(') {
60ac52eb 7486 force_ident_maybe_lex('&');
4b473a5a
FC
7487 s = d;
7488 }
c2900bb8
FC
7489 }
7490 }
850e8516
RGS
7491 if (orig_keyword == KEY_do) {
7492 orig_keyword = 0;
6154021b 7493 pl_yylval.ival = 1;
850e8516
RGS
7494 }
7495 else
6154021b 7496 pl_yylval.ival = 0;
378cc40b 7497 OPERATOR(DO);
79072805
LW
7498
7499 case KEY_die:
3280af22 7500 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7501 LOP(OP_DIE,XTERM);
79072805
LW
7502
7503 case KEY_defined:
7504 UNI(OP_DEFINED);
7505
7506 case KEY_delete:
a0d0e21e 7507 UNI(OP_DELETE);
79072805
LW
7508
7509 case KEY_dbmopen:
74e8ce34
NC
7510 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7511 STR_WITH_LEN("NDBM_File::"),
7512 STR_WITH_LEN("DB_File::"),
7513 STR_WITH_LEN("GDBM_File::"),
7514 STR_WITH_LEN("SDBM_File::"),
7515 STR_WITH_LEN("ODBM_File::"),
7516 NULL);
a0d0e21e 7517 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7518
7519 case KEY_dbmclose:
7520 UNI(OP_DBMCLOSE);
7521
7522 case KEY_dump:
c31f6d3b 7523 PL_expect = XOPERATOR;
a0d0e21e 7524 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7525 LOOPX(OP_DUMP);
7526
7527 case KEY_else:
7528 PREBLOCK(ELSE);
7529
7530 case KEY_elsif:
6154021b 7531 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7532 OPERATOR(ELSIF);
7533
7534 case KEY_eq:
78cdf107
Z
7535 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7536 return REPORT(0);
79072805
LW
7537 Eop(OP_SEQ);
7538
a0d0e21e
LW
7539 case KEY_exists:
7540 UNI(OP_EXISTS);
4e553d73 7541
79072805 7542 case KEY_exit:
5db06880
NC
7543 if (PL_madskills)
7544 UNI(OP_INT);
79072805
LW
7545 UNI(OP_EXIT);
7546
7547 case KEY_eval:
29595ff2 7548 s = SKIPSPACE1(s);
32e2a35d
RGS
7549 if (*s == '{') { /* block eval */
7550 PL_expect = XTERMBLOCK;
7551 UNIBRACK(OP_ENTERTRY);
7552 }
7553 else { /* string eval */
7554 PL_expect = XTERM;
7555 UNIBRACK(OP_ENTEREVAL);
7556 }
79072805 7557
7d789282
FC
7558 case KEY_evalbytes:
7559 PL_expect = XTERM;
7560 UNIBRACK(-OP_ENTEREVAL);
7561
79072805
LW
7562 case KEY_eof:
7563 UNI(OP_EOF);
7564
7565 case KEY_exp:
7566 UNI(OP_EXP);
7567
7568 case KEY_each:
7569 UNI(OP_EACH);
7570
7571 case KEY_exec:
a0d0e21e 7572 LOP(OP_EXEC,XREF);
79072805
LW
7573
7574 case KEY_endhostent:
7575 FUN0(OP_EHOSTENT);
7576
7577 case KEY_endnetent:
7578 FUN0(OP_ENETENT);
7579
7580 case KEY_endservent:
7581 FUN0(OP_ESERVENT);
7582
7583 case KEY_endprotoent:
7584 FUN0(OP_EPROTOENT);
7585
7586 case KEY_endpwent:
7587 FUN0(OP_EPWENT);
7588
7589 case KEY_endgrent:
7590 FUN0(OP_EGRENT);
7591
7592 case KEY_for:
7593 case KEY_foreach:
78cdf107
Z
7594 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7595 return REPORT(0);
6154021b 7596 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7597 s = SKIPSPACE1(s);
7e2040f0 7598 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7599 char *p = s;
5db06880
NC
7600#ifdef PERL_MAD
7601 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7602#endif
7603
3280af22 7604 if ((PL_bufend - p) >= 3 &&
55497cff 7605 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7606 p += 2;
77ca0c92
LW
7607 else if ((PL_bufend - p) >= 4 &&
7608 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7609 p += 3;
29595ff2 7610 p = PEEKSPACE(p);
7e2040f0 7611 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7612 p = scan_ident(p, PL_bufend,
7613 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7614 p = PEEKSPACE(p);
77ca0c92
LW
7615 }
7616 if (*p != '$')
cea2e8a9 7617 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7618#ifdef PERL_MAD
7619 s = SvPVX(PL_linestr) + soff;
7620#endif
55497cff 7621 }
79072805
LW
7622 OPERATOR(FOR);
7623
7624 case KEY_formline:
a0d0e21e 7625 LOP(OP_FORMLINE,XTERM);
79072805
LW
7626
7627 case KEY_fork:
7628 FUN0(OP_FORK);
7629
838f2281
BF
7630 case KEY_fc:
7631 UNI(OP_FC);
7632
79072805 7633 case KEY_fcntl:
a0d0e21e 7634 LOP(OP_FCNTL,XTERM);
79072805
LW
7635
7636 case KEY_fileno:
7637 UNI(OP_FILENO);
7638
7639 case KEY_flock:
a0d0e21e 7640 LOP(OP_FLOCK,XTERM);
79072805
LW
7641
7642 case KEY_gt:
78cdf107
Z
7643 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7644 return REPORT(0);
79072805
LW
7645 Rop(OP_SGT);
7646
7647 case KEY_ge:
78cdf107
Z
7648 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7649 return REPORT(0);
79072805
LW
7650 Rop(OP_SGE);
7651
7652 case KEY_grep:
2c38e13d 7653 LOP(OP_GREPSTART, XREF);
79072805
LW
7654
7655 case KEY_goto:
c31f6d3b 7656 PL_expect = XOPERATOR;
a0d0e21e 7657 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7658 LOOPX(OP_GOTO);
7659
7660 case KEY_gmtime:
7661 UNI(OP_GMTIME);
7662
7663 case KEY_getc:
6f33ba73 7664 UNIDOR(OP_GETC);
79072805
LW
7665
7666 case KEY_getppid:
7667 FUN0(OP_GETPPID);
7668
7669 case KEY_getpgrp:
7670 UNI(OP_GETPGRP);
7671
7672 case KEY_getpriority:
a0d0e21e 7673 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7674
7675 case KEY_getprotobyname:
7676 UNI(OP_GPBYNAME);
7677
7678 case KEY_getprotobynumber:
a0d0e21e 7679 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7680
7681 case KEY_getprotoent:
7682 FUN0(OP_GPROTOENT);
7683
7684 case KEY_getpwent:
7685 FUN0(OP_GPWENT);
7686
7687 case KEY_getpwnam:
ff68c719 7688 UNI(OP_GPWNAM);
79072805
LW
7689
7690 case KEY_getpwuid:
ff68c719 7691 UNI(OP_GPWUID);
79072805
LW
7692
7693 case KEY_getpeername:
7694 UNI(OP_GETPEERNAME);
7695
7696 case KEY_gethostbyname:
7697 UNI(OP_GHBYNAME);
7698
7699 case KEY_gethostbyaddr:
a0d0e21e 7700 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7701
7702 case KEY_gethostent:
7703 FUN0(OP_GHOSTENT);
7704
7705 case KEY_getnetbyname:
7706 UNI(OP_GNBYNAME);
7707
7708 case KEY_getnetbyaddr:
a0d0e21e 7709 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7710
7711 case KEY_getnetent:
7712 FUN0(OP_GNETENT);
7713
7714 case KEY_getservbyname:
a0d0e21e 7715 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7716
7717 case KEY_getservbyport:
a0d0e21e 7718 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7719
7720 case KEY_getservent:
7721 FUN0(OP_GSERVENT);
7722
7723 case KEY_getsockname:
7724 UNI(OP_GETSOCKNAME);
7725
7726 case KEY_getsockopt:
a0d0e21e 7727 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7728
7729 case KEY_getgrent:
7730 FUN0(OP_GGRENT);
7731
7732 case KEY_getgrnam:
ff68c719 7733 UNI(OP_GGRNAM);
79072805
LW
7734
7735 case KEY_getgrgid:
ff68c719 7736 UNI(OP_GGRGID);
79072805
LW
7737
7738 case KEY_getlogin:
7739 FUN0(OP_GETLOGIN);
7740
0d863452 7741 case KEY_given:
6154021b 7742 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7743 OPERATOR(GIVEN);
7744
93a17b20 7745 case KEY_glob:
d67594ff
FC
7746 LOP(
7747 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7748 XTERM
7749 );
93a17b20 7750
79072805
LW
7751 case KEY_hex:
7752 UNI(OP_HEX);
7753
7754 case KEY_if:
78cdf107
Z
7755 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7756 return REPORT(0);
6154021b 7757 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7758 OPERATOR(IF);
7759
7760 case KEY_index:
a0d0e21e 7761 LOP(OP_INDEX,XTERM);
79072805
LW
7762
7763 case KEY_int:
7764 UNI(OP_INT);
7765
7766 case KEY_ioctl:
a0d0e21e 7767 LOP(OP_IOCTL,XTERM);
79072805
LW
7768
7769 case KEY_join:
a0d0e21e 7770 LOP(OP_JOIN,XTERM);
79072805
LW
7771
7772 case KEY_keys:
7773 UNI(OP_KEYS);
7774
7775 case KEY_kill:
a0d0e21e 7776 LOP(OP_KILL,XTERM);
79072805
LW
7777
7778 case KEY_last:
c31f6d3b 7779 PL_expect = XOPERATOR;
a0d0e21e 7780 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7781 LOOPX(OP_LAST);
4e553d73 7782
79072805
LW
7783 case KEY_lc:
7784 UNI(OP_LC);
7785
7786 case KEY_lcfirst:
7787 UNI(OP_LCFIRST);
7788
7789 case KEY_local:
6154021b 7790 pl_yylval.ival = 0;
79072805
LW
7791 OPERATOR(LOCAL);
7792
7793 case KEY_length:
7794 UNI(OP_LENGTH);
7795
7796 case KEY_lt:
78cdf107
Z
7797 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7798 return REPORT(0);
79072805
LW
7799 Rop(OP_SLT);
7800
7801 case KEY_le:
78cdf107
Z
7802 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7803 return REPORT(0);
79072805
LW
7804 Rop(OP_SLE);
7805
7806 case KEY_localtime:
7807 UNI(OP_LOCALTIME);
7808
7809 case KEY_log:
7810 UNI(OP_LOG);
7811
7812 case KEY_link:
a0d0e21e 7813 LOP(OP_LINK,XTERM);
79072805
LW
7814
7815 case KEY_listen:
a0d0e21e 7816 LOP(OP_LISTEN,XTERM);
79072805 7817
c0329465
MB
7818 case KEY_lock:
7819 UNI(OP_LOCK);
7820
79072805
LW
7821 case KEY_lstat:
7822 UNI(OP_LSTAT);
7823
7824 case KEY_m:
8782bef2 7825 s = scan_pat(s,OP_MATCH);
79072805
LW
7826 TERM(sublex_start());
7827
a0d0e21e 7828 case KEY_map:
2c38e13d 7829 LOP(OP_MAPSTART, XREF);
4e4e412b 7830
79072805 7831 case KEY_mkdir:
a0d0e21e 7832 LOP(OP_MKDIR,XTERM);
79072805
LW
7833
7834 case KEY_msgctl:
a0d0e21e 7835 LOP(OP_MSGCTL,XTERM);
79072805
LW
7836
7837 case KEY_msgget:
a0d0e21e 7838 LOP(OP_MSGGET,XTERM);
79072805
LW
7839
7840 case KEY_msgrcv:
a0d0e21e 7841 LOP(OP_MSGRCV,XTERM);
79072805
LW
7842
7843 case KEY_msgsnd:
a0d0e21e 7844 LOP(OP_MSGSND,XTERM);
79072805 7845
77ca0c92 7846 case KEY_our:
93a17b20 7847 case KEY_my:
952306ac 7848 case KEY_state:
eac04b2e 7849 PL_in_my = (U16)tmp;
29595ff2 7850 s = SKIPSPACE1(s);
7e2040f0 7851 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7852#ifdef PERL_MAD
7853 char* start = s;
7854#endif
3280af22 7855 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843 7856 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
e7d0b801
FC
7857 {
7858 if (!FEATURE_LEXSUBS_IS_ENABLED)
7859 Perl_croak(aTHX_
7860 "Experimental \"%s\" subs not enabled",
7861 tmp == KEY_my ? "my" :
7862 tmp == KEY_state ? "state" : "our");
09bef843 7863 goto really_sub;
e7d0b801 7864 }
def3634b 7865 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7866 if (!PL_in_my_stash) {
c750a3ec 7867 char tmpbuf[1024];
3280af22 7868 PL_bufptr = s;
d9fad198 7869 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
3c54b17a 7870 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
c750a3ec 7871 }
5db06880
NC
7872#ifdef PERL_MAD
7873 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7874 sv_catsv(PL_thistoken, PL_nextwhite);
7875 PL_nextwhite = 0;
7876 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7877 }
7878#endif
c750a3ec 7879 }
6154021b 7880 pl_yylval.ival = 1;
55497cff 7881 OPERATOR(MY);
93a17b20 7882
79072805 7883 case KEY_next:
c31f6d3b 7884 PL_expect = XOPERATOR;
a0d0e21e 7885 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7886 LOOPX(OP_NEXT);
7887
7888 case KEY_ne:
78cdf107
Z
7889 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7890 return REPORT(0);
79072805
LW
7891 Eop(OP_SNE);
7892
a0d0e21e 7893 case KEY_no:
468aa647 7894 s = tokenize_use(0, s);
52d0e95b 7895 TERM(USE);
a0d0e21e
LW
7896
7897 case KEY_not:
29595ff2 7898 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7899 FUN1(OP_NOT);
78cdf107
Z
7900 else {
7901 if (!PL_lex_allbrackets &&
7902 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7903 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7904 OPERATOR(NOTOP);
78cdf107 7905 }
a0d0e21e 7906
79072805 7907 case KEY_open:
29595ff2 7908 s = SKIPSPACE1(s);
7e2040f0 7909 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7910 const char *t;
71aa9713
BF
7911 for (d = s; isALNUM_lazy_if(d,UTF);) {
7912 d += UTF ? UTF8SKIP(d) : 1;
7913 if (UTF) {
7914 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7915 d += UTF ? UTF8SKIP(d) : 1;
7916 }
7917 }
7918 }
c35e046a
AL
7919 for (t=d; isSPACE(*t);)
7920 t++;
e2ab214b 7921 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7922 /* [perl #16184] */
7923 && !(t[0] == '=' && t[1] == '>')
db3abe52 7924 && !(t[0] == ':' && t[1] == ':')
240d1b6f 7925 && !keyword(s, d-s, 0)
66fbe8fb 7926 ) {
71aa9713
BF
7927 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7928 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
9014280d 7929 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
71aa9713
BF
7930 "Precedence problem: open %"SVf" should be open(%"SVf")",
7931 SVfARG(tmpsv), SVfARG(tmpsv));
66fbe8fb 7932 }
93a17b20 7933 }
a0d0e21e 7934 LOP(OP_OPEN,XTERM);
79072805 7935
463ee0b2 7936 case KEY_or:
78cdf107
Z
7937 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7938 return REPORT(0);
6154021b 7939 pl_yylval.ival = OP_OR;
463ee0b2
LW
7940 OPERATOR(OROP);
7941
79072805
LW
7942 case KEY_ord:
7943 UNI(OP_ORD);
7944
7945 case KEY_oct:
7946 UNI(OP_OCT);
7947
7948 case KEY_opendir:
a0d0e21e 7949 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7950
7951 case KEY_print:
3280af22 7952 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7953 LOP(OP_PRINT,XREF);
79072805
LW
7954
7955 case KEY_printf:
3280af22 7956 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7957 LOP(OP_PRTF,XREF);
79072805 7958
c07a80fd 7959 case KEY_prototype:
7960 UNI(OP_PROTOTYPE);
7961
79072805 7962 case KEY_push:
a0d0e21e 7963 LOP(OP_PUSH,XTERM);
79072805
LW
7964
7965 case KEY_pop:
6f33ba73 7966 UNIDOR(OP_POP);
79072805 7967
a0d0e21e 7968 case KEY_pos:
6f33ba73 7969 UNIDOR(OP_POS);
4e553d73 7970
79072805 7971 case KEY_pack:
a0d0e21e 7972 LOP(OP_PACK,XTERM);
79072805
LW
7973
7974 case KEY_package:
a0d0e21e 7975 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7976 s = SKIPSPACE1(s);
91152fc1 7977 s = force_strict_version(s);
4e4da3ac 7978 PL_lex_expect = XBLOCK;
79072805
LW
7979 OPERATOR(PACKAGE);
7980
7981 case KEY_pipe:
a0d0e21e 7982 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7983
7984 case KEY_q:
d24ca0c5 7985 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 7986 if (!s)
d4c19fe8 7987 missingterm(NULL);
6154021b 7988 pl_yylval.ival = OP_CONST;
79072805
LW
7989 TERM(sublex_start());
7990
a0d0e21e
LW
7991 case KEY_quotemeta:
7992 UNI(OP_QUOTEMETA);
7993
ea25a9b2
Z
7994 case KEY_qw: {
7995 OP *words = NULL;
d24ca0c5 7996 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8990e307 7997 if (!s)
d4c19fe8 7998 missingterm(NULL);
3480a8d2 7999 PL_expect = XOPERATOR;
8127e0e3 8000 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
8001 int warned_comma = !ckWARN(WARN_QW);
8002 int warned_comment = warned_comma;
3280af22 8003 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 8004 while (len) {
d4c19fe8
AL
8005 for (; isSPACE(*d) && len; --len, ++d)
8006 /**/;
8127e0e3 8007 if (len) {
d4c19fe8 8008 SV *sv;
f54cb97a 8009 const char *b = d;
7e03b518 8010 if (!warned_comma || !warned_comment) {
8127e0e3 8011 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 8012 if (!warned_comma && *d == ',') {
9014280d 8013 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8014 "Possible attempt to separate words with commas");
7e03b518 8015 ++warned_comma;
8127e0e3 8016 }
7e03b518 8017 else if (!warned_comment && *d == '#') {
9014280d 8018 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 8019 "Possible attempt to put comments in qw() list");
7e03b518 8020 ++warned_comment;
8127e0e3
GS
8021 }
8022 }
8023 }
8024 else {
d4c19fe8
AL
8025 for (; !isSPACE(*d) && len; --len, ++d)
8026 /**/;
8127e0e3 8027 }
740cce10 8028 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 8029 words = op_append_elem(OP_LIST, words,
7948272d 8030 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 8031 }
8032 }
8033 }
ea25a9b2
Z
8034 if (!words)
8035 words = newNULLLIST();
37fd879b 8036 if (PL_lex_stuff) {
8127e0e3 8037 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 8038 PL_lex_stuff = NULL;
37fd879b 8039 }
ea25a9b2
Z
8040 PL_expect = XOPERATOR;
8041 pl_yylval.opval = sawparens(words);
8042 TOKEN(QWLIST);
8043 }
8990e307 8044
79072805 8045 case KEY_qq:
d24ca0c5 8046 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 8047 if (!s)
d4c19fe8 8048 missingterm(NULL);
6154021b 8049 pl_yylval.ival = OP_STRINGIFY;
3280af22 8050 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 8051 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
8052 TERM(sublex_start());
8053
8782bef2
GB
8054 case KEY_qr:
8055 s = scan_pat(s,OP_QR);
8056 TERM(sublex_start());
8057
79072805 8058 case KEY_qx:
d24ca0c5 8059 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 8060 if (!s)
d4c19fe8 8061 missingterm(NULL);
9b201d7d 8062 readpipe_override();
79072805
LW
8063 TERM(sublex_start());
8064
8065 case KEY_return:
8066 OLDLOP(OP_RETURN);
8067
8068 case KEY_require:
29595ff2 8069 s = SKIPSPACE1(s);
c31f6d3b 8070 PL_expect = XOPERATOR;
e759cc13
RGS
8071 if (isDIGIT(*s)) {
8072 s = force_version(s, FALSE);
a7cb1f99 8073 }
e759cc13
RGS
8074 else if (*s != 'v' || !isDIGIT(s[1])
8075 || (s = force_version(s, TRUE), *s == 'v'))
8076 {
a7cb1f99
GS
8077 *PL_tokenbuf = '\0';
8078 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 8079 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
8080 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8081 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
8082 else if (*s == '<')
8083 yyerror("<> should be quotes");
8084 }
a72a1c8b
RGS
8085 if (orig_keyword == KEY_require) {
8086 orig_keyword = 0;
6154021b 8087 pl_yylval.ival = 1;
a72a1c8b
RGS
8088 }
8089 else
6154021b 8090 pl_yylval.ival = 0;
a72a1c8b
RGS
8091 PL_expect = XTERM;
8092 PL_bufptr = s;
8093 PL_last_uni = PL_oldbufptr;
8094 PL_last_lop_op = OP_REQUIRE;
8095 s = skipspace(s);
8096 return REPORT( (int)REQUIRE );
79072805
LW
8097
8098 case KEY_reset:
8099 UNI(OP_RESET);
8100
8101 case KEY_redo:
c31f6d3b 8102 PL_expect = XOPERATOR;
a0d0e21e 8103 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8104 LOOPX(OP_REDO);
8105
8106 case KEY_rename:
a0d0e21e 8107 LOP(OP_RENAME,XTERM);
79072805
LW
8108
8109 case KEY_rand:
8110 UNI(OP_RAND);
8111
8112 case KEY_rmdir:
8113 UNI(OP_RMDIR);
8114
8115 case KEY_rindex:
a0d0e21e 8116 LOP(OP_RINDEX,XTERM);
79072805
LW
8117
8118 case KEY_read:
a0d0e21e 8119 LOP(OP_READ,XTERM);
79072805
LW
8120
8121 case KEY_readdir:
8122 UNI(OP_READDIR);
8123
93a17b20 8124 case KEY_readline:
6f33ba73 8125 UNIDOR(OP_READLINE);
93a17b20
LW
8126
8127 case KEY_readpipe:
0858480c 8128 UNIDOR(OP_BACKTICK);
93a17b20 8129
79072805
LW
8130 case KEY_rewinddir:
8131 UNI(OP_REWINDDIR);
8132
8133 case KEY_recv:
a0d0e21e 8134 LOP(OP_RECV,XTERM);
79072805
LW
8135
8136 case KEY_reverse:
a0d0e21e 8137 LOP(OP_REVERSE,XTERM);
79072805
LW
8138
8139 case KEY_readlink:
6f33ba73 8140 UNIDOR(OP_READLINK);
79072805
LW
8141
8142 case KEY_ref:
8143 UNI(OP_REF);
8144
8145 case KEY_s:
8146 s = scan_subst(s);
6154021b 8147 if (pl_yylval.opval)
79072805
LW
8148 TERM(sublex_start());
8149 else
8150 TOKEN(1); /* force error */
8151
0d863452
RH
8152 case KEY_say:
8153 checkcomma(s,PL_tokenbuf,"filehandle");
8154 LOP(OP_SAY,XREF);
8155
a0d0e21e
LW
8156 case KEY_chomp:
8157 UNI(OP_CHOMP);
4e553d73 8158
79072805
LW
8159 case KEY_scalar:
8160 UNI(OP_SCALAR);
8161
8162 case KEY_select:
a0d0e21e 8163 LOP(OP_SELECT,XTERM);
79072805
LW
8164
8165 case KEY_seek:
a0d0e21e 8166 LOP(OP_SEEK,XTERM);
79072805
LW
8167
8168 case KEY_semctl:
a0d0e21e 8169 LOP(OP_SEMCTL,XTERM);
79072805
LW
8170
8171 case KEY_semget:
a0d0e21e 8172 LOP(OP_SEMGET,XTERM);
79072805
LW
8173
8174 case KEY_semop:
a0d0e21e 8175 LOP(OP_SEMOP,XTERM);
79072805
LW
8176
8177 case KEY_send:
a0d0e21e 8178 LOP(OP_SEND,XTERM);
79072805
LW
8179
8180 case KEY_setpgrp:
a0d0e21e 8181 LOP(OP_SETPGRP,XTERM);
79072805
LW
8182
8183 case KEY_setpriority:
a0d0e21e 8184 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
8185
8186 case KEY_sethostent:
ff68c719 8187 UNI(OP_SHOSTENT);
79072805
LW
8188
8189 case KEY_setnetent:
ff68c719 8190 UNI(OP_SNETENT);
79072805
LW
8191
8192 case KEY_setservent:
ff68c719 8193 UNI(OP_SSERVENT);
79072805
LW
8194
8195 case KEY_setprotoent:
ff68c719 8196 UNI(OP_SPROTOENT);
79072805
LW
8197
8198 case KEY_setpwent:
8199 FUN0(OP_SPWENT);
8200
8201 case KEY_setgrent:
8202 FUN0(OP_SGRENT);
8203
8204 case KEY_seekdir:
a0d0e21e 8205 LOP(OP_SEEKDIR,XTERM);
79072805
LW
8206
8207 case KEY_setsockopt:
a0d0e21e 8208 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
8209
8210 case KEY_shift:
6f33ba73 8211 UNIDOR(OP_SHIFT);
79072805
LW
8212
8213 case KEY_shmctl:
a0d0e21e 8214 LOP(OP_SHMCTL,XTERM);
79072805
LW
8215
8216 case KEY_shmget:
a0d0e21e 8217 LOP(OP_SHMGET,XTERM);
79072805
LW
8218
8219 case KEY_shmread:
a0d0e21e 8220 LOP(OP_SHMREAD,XTERM);
79072805
LW
8221
8222 case KEY_shmwrite:
a0d0e21e 8223 LOP(OP_SHMWRITE,XTERM);
79072805
LW
8224
8225 case KEY_shutdown:
a0d0e21e 8226 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8227
8228 case KEY_sin:
8229 UNI(OP_SIN);
8230
8231 case KEY_sleep:
8232 UNI(OP_SLEEP);
8233
8234 case KEY_socket:
a0d0e21e 8235 LOP(OP_SOCKET,XTERM);
79072805
LW
8236
8237 case KEY_socketpair:
a0d0e21e 8238 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8239
8240 case KEY_sort:
3280af22 8241 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8242 s = SKIPSPACE1(s);
3280af22 8243 PL_expect = XTERM;
15f0808c 8244 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 8245 LOP(OP_SORT,XREF);
79072805
LW
8246
8247 case KEY_split:
a0d0e21e 8248 LOP(OP_SPLIT,XTERM);
79072805
LW
8249
8250 case KEY_sprintf:
a0d0e21e 8251 LOP(OP_SPRINTF,XTERM);
79072805
LW
8252
8253 case KEY_splice:
a0d0e21e 8254 LOP(OP_SPLICE,XTERM);
79072805
LW
8255
8256 case KEY_sqrt:
8257 UNI(OP_SQRT);
8258
8259 case KEY_srand:
8260 UNI(OP_SRAND);
8261
8262 case KEY_stat:
8263 UNI(OP_STAT);
8264
8265 case KEY_study:
79072805
LW
8266 UNI(OP_STUDY);
8267
8268 case KEY_substr:
a0d0e21e 8269 LOP(OP_SUBSTR,XTERM);
79072805
LW
8270
8271 case KEY_format:
8272 case KEY_sub:
93a17b20 8273 really_sub:
09bef843 8274 {
24b6ef70 8275 char * const tmpbuf = PL_tokenbuf + 1;
9c5ffd7c 8276 SSize_t tboffset = 0;
09bef843 8277 expectation attrful;
28cc6278 8278 bool have_name, have_proto;
f54cb97a 8279 const int key = tmp;
09bef843 8280
5db06880
NC
8281#ifdef PERL_MAD
8282 SV *tmpwhite = 0;
8283
cd81e915 8284 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
af9f5953 8285 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
cd81e915 8286 PL_thistoken = 0;
5db06880
NC
8287
8288 d = s;
8289 s = SKIPSPACE2(s,tmpwhite);
8290#else
8767b1ab 8291 d = s;
09bef843 8292 s = skipspace(s);
5db06880 8293#endif
09bef843 8294
7e2040f0 8295 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8296 (*s == ':' && s[1] == ':'))
8297 {
5db06880 8298#ifdef PERL_MAD
4f61fd4b 8299 SV *nametoke = NULL;
5db06880
NC
8300#endif
8301
09bef843
SB
8302 PL_expect = XBLOCK;
8303 attrful = XATTRBLOCK;
b1b65b59
JH
8304 /* remember buffer pos'n for later force_word */
8305 tboffset = s - PL_oldbufptr;
24b6ef70
FC
8306 d = scan_word(s, tmpbuf, sizeof PL_tokenbuf - 1, TRUE,
8307 &len);
5db06880
NC
8308#ifdef PERL_MAD
8309 if (PL_madskills)
af9f5953 8310 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8311#endif
689aac7b
FC
8312 *PL_tokenbuf = '&';
8313 if (memchr(tmpbuf, ':', len) || key != KEY_sub
8314 || pad_findmy_pvn(
8315 PL_tokenbuf, len + 1, UTF ? SVf_UTF8 : 0
8316 ) != NOT_IN_PAD)
6502358f 8317 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8318 else {
8319 sv_setsv(PL_subname,PL_curstname);
396482e1 8320 sv_catpvs(PL_subname,"::");
09bef843
SB
8321 sv_catpvn(PL_subname,tmpbuf,len);
8322 }
af9f5953
BF
8323 if (SvUTF8(PL_linestr))
8324 SvUTF8_on(PL_subname);
09bef843 8325 have_name = TRUE;
5db06880 8326
60ac52eb 8327
5db06880 8328#ifdef PERL_MAD
60ac52eb
FC
8329 start_force(0);
8330 CURMAD('X', nametoke);
8331 CURMAD('_', tmpwhite);
4210d3f1 8332 force_ident_maybe_lex('&');
5db06880
NC
8333
8334 s = SKIPSPACE2(d,tmpwhite);
8335#else
8336 s = skipspace(d);
8337#endif
09bef843 8338 }
463ee0b2 8339 else {
8767b1ab
FC
8340 if (key == KEY_my || key == KEY_our || key==KEY_state)
8341 {
8342 *d = '\0';
8343 /* diag_listed_as: Missing name in "%s sub" */
8344 Perl_croak(aTHX_
8345 "Missing name in \"%s\"", PL_bufptr);
8346 }
09bef843
SB
8347 PL_expect = XTERMBLOCK;
8348 attrful = XATTRTERM;
76f68e9b 8349 sv_setpvs(PL_subname,"?");
09bef843 8350 have_name = FALSE;
463ee0b2 8351 }
4633a7c4 8352
09bef843 8353 if (key == KEY_format) {
5db06880 8354#ifdef PERL_MAD
cd81e915 8355 PL_thistoken = subtoken;
5db06880
NC
8356 s = d;
8357#else
09bef843 8358 if (have_name)
b1b65b59
JH
8359 (void) force_word(PL_oldbufptr + tboffset, WORD,
8360 FALSE, TRUE, TRUE);
5db06880 8361#endif
64a40898 8362 PREBLOCK(FORMAT);
09bef843 8363 }
79072805 8364
09bef843
SB
8365 /* Look for a prototype */
8366 if (*s == '(') {
d9f2850e
RGS
8367 char *p;
8368 bool bad_proto = FALSE;
9e8d7757
RB
8369 bool in_brackets = FALSE;
8370 char greedy_proto = ' ';
8371 bool proto_after_greedy_proto = FALSE;
8372 bool must_be_last = FALSE;
8373 bool underscore = FALSE;
aef2a98a 8374 bool seen_underscore = FALSE;
197afce1 8375 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8376 STRLEN tmplen;
09bef843 8377
d24ca0c5 8378 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
37fd879b 8379 if (!s)
09bef843 8380 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8381 /* strip spaces and check for bad characters */
dab1c735 8382 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8383 tmp = 0;
dab1c735 8384 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8385 if (!isSPACE(*p)) {
dab1c735 8386 d[tmp++] = *p;
9e8d7757 8387
197afce1 8388 if (warnillegalproto) {
9e8d7757
RB
8389 if (must_be_last)
8390 proto_after_greedy_proto = TRUE;
dab1c735 8391 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8392 bad_proto = TRUE;
8393 }
8394 else {
8395 if ( underscore ) {
34daab0f 8396 if ( !strchr(";@%", *p) )
9e8d7757
RB
8397 bad_proto = TRUE;
8398 underscore = FALSE;
8399 }
8400 if ( *p == '[' ) {
8401 in_brackets = TRUE;
8402 }
8403 else if ( *p == ']' ) {
8404 in_brackets = FALSE;
8405 }
8406 else if ( (*p == '@' || *p == '%') &&
8407 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8408 !in_brackets ) {
8409 must_be_last = TRUE;
8410 greedy_proto = *p;
8411 }
8412 else if ( *p == '_' ) {
aef2a98a 8413 underscore = seen_underscore = TRUE;
9e8d7757
RB
8414 }
8415 }
8416 }
d37a9538 8417 }
09bef843 8418 }
dab1c735 8419 d[tmp] = '\0';
9e8d7757 8420 if (proto_after_greedy_proto)
197afce1 8421 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8422 "Prototype after '%c' for %"SVf" : %s",
8423 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8424 if (bad_proto) {
8425 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8426 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8427 "Illegal character %sin prototype for %"SVf" : %s",
8428 seen_underscore ? "after '_' " : "",
dab1c735 8429 SVfARG(PL_subname),
97eb901d
BF
8430 SvUTF8(PL_lex_stuff)
8431 ? sv_uni_display(dsv,
8432 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8433 tmp,
8434 UNI_DISPLAY_ISPRINT)
8435 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8436 PERL_PV_ESCAPE_NONASCII));
dab1c735
BF
8437 }
8438 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8439 have_proto = TRUE;
68dc0745 8440
5db06880
NC
8441#ifdef PERL_MAD
8442 start_force(0);
cd81e915 8443 CURMAD('q', PL_thisopen);
5db06880 8444 CURMAD('_', tmpwhite);
cd81e915
NC
8445 CURMAD('=', PL_thisstuff);
8446 CURMAD('Q', PL_thisclose);
5db06880
NC
8447 NEXTVAL_NEXTTOKE.opval =
8448 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8449 PL_lex_stuff = NULL;
5db06880
NC
8450 force_next(THING);
8451
8452 s = SKIPSPACE2(s,tmpwhite);
8453#else
09bef843 8454 s = skipspace(s);
5db06880 8455#endif
4633a7c4 8456 }
09bef843
SB
8457 else
8458 have_proto = FALSE;
8459
8460 if (*s == ':' && s[1] != ':')
8461 PL_expect = attrful;
8e742a20
MHM
8462 else if (*s != '{' && key == KEY_sub) {
8463 if (!have_name)
8464 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8465 else if (*s != ';' && *s != '}')
be2597df 8466 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8467 }
09bef843 8468
5db06880
NC
8469#ifdef PERL_MAD
8470 start_force(0);
8471 if (tmpwhite) {
8472 if (PL_madskills)
6b29d1f5 8473 curmad('^', newSVpvs(""));
5db06880
NC
8474 CURMAD('_', tmpwhite);
8475 }
8476 force_next(0);
8477
cd81e915 8478 PL_thistoken = subtoken;
5db06880 8479#else
09bef843 8480 if (have_proto) {
9ded7720 8481 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8482 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8483 PL_lex_stuff = NULL;
09bef843 8484 force_next(THING);
68dc0745 8485 }
5db06880 8486#endif
09bef843 8487 if (!have_name) {
49a54bbe
NC
8488 if (PL_curstash)
8489 sv_setpvs(PL_subname, "__ANON__");
8490 else
8491 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8492 TOKEN(ANONSUB);
4633a7c4 8493 }
5db06880 8494#ifndef PERL_MAD
4210d3f1 8495 force_ident_maybe_lex('&');
5db06880 8496#endif
09bef843 8497 TOKEN(SUB);
4633a7c4 8498 }
79072805
LW
8499
8500 case KEY_system:
a0d0e21e 8501 LOP(OP_SYSTEM,XREF);
79072805
LW
8502
8503 case KEY_symlink:
a0d0e21e 8504 LOP(OP_SYMLINK,XTERM);
79072805
LW
8505
8506 case KEY_syscall:
a0d0e21e 8507 LOP(OP_SYSCALL,XTERM);
79072805 8508
c07a80fd 8509 case KEY_sysopen:
8510 LOP(OP_SYSOPEN,XTERM);
8511
137443ea 8512 case KEY_sysseek:
8513 LOP(OP_SYSSEEK,XTERM);
8514
79072805 8515 case KEY_sysread:
a0d0e21e 8516 LOP(OP_SYSREAD,XTERM);
79072805
LW
8517
8518 case KEY_syswrite:
a0d0e21e 8519 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8520
8521 case KEY_tr:
8ce4b50f 8522 case KEY_y:
79072805
LW
8523 s = scan_trans(s);
8524 TERM(sublex_start());
8525
8526 case KEY_tell:
8527 UNI(OP_TELL);
8528
8529 case KEY_telldir:
8530 UNI(OP_TELLDIR);
8531
463ee0b2 8532 case KEY_tie:
a0d0e21e 8533 LOP(OP_TIE,XTERM);
463ee0b2 8534
c07a80fd 8535 case KEY_tied:
8536 UNI(OP_TIED);
8537
79072805
LW
8538 case KEY_time:
8539 FUN0(OP_TIME);
8540
8541 case KEY_times:
8542 FUN0(OP_TMS);
8543
8544 case KEY_truncate:
a0d0e21e 8545 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8546
8547 case KEY_uc:
8548 UNI(OP_UC);
8549
8550 case KEY_ucfirst:
8551 UNI(OP_UCFIRST);
8552
463ee0b2
LW
8553 case KEY_untie:
8554 UNI(OP_UNTIE);
8555
79072805 8556 case KEY_until:
78cdf107
Z
8557 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8558 return REPORT(0);
6154021b 8559 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8560 OPERATOR(UNTIL);
8561
8562 case KEY_unless:
78cdf107
Z
8563 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8564 return REPORT(0);
6154021b 8565 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8566 OPERATOR(UNLESS);
8567
8568 case KEY_unlink:
a0d0e21e 8569 LOP(OP_UNLINK,XTERM);
79072805
LW
8570
8571 case KEY_undef:
6f33ba73 8572 UNIDOR(OP_UNDEF);
79072805
LW
8573
8574 case KEY_unpack:
a0d0e21e 8575 LOP(OP_UNPACK,XTERM);
79072805
LW
8576
8577 case KEY_utime:
a0d0e21e 8578 LOP(OP_UTIME,XTERM);
79072805
LW
8579
8580 case KEY_umask:
6f33ba73 8581 UNIDOR(OP_UMASK);
79072805
LW
8582
8583 case KEY_unshift:
a0d0e21e
LW
8584 LOP(OP_UNSHIFT,XTERM);
8585
8586 case KEY_use:
468aa647 8587 s = tokenize_use(1, s);
a0d0e21e 8588 OPERATOR(USE);
79072805
LW
8589
8590 case KEY_values:
8591 UNI(OP_VALUES);
8592
8593 case KEY_vec:
a0d0e21e 8594 LOP(OP_VEC,XTERM);
79072805 8595
0d863452 8596 case KEY_when:
78cdf107
Z
8597 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8598 return REPORT(0);
6154021b 8599 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8600 OPERATOR(WHEN);
8601
79072805 8602 case KEY_while:
78cdf107
Z
8603 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8604 return REPORT(0);
6154021b 8605 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8606 OPERATOR(WHILE);
8607
8608 case KEY_warn:
3280af22 8609 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8610 LOP(OP_WARN,XTERM);
79072805
LW
8611
8612 case KEY_wait:
8613 FUN0(OP_WAIT);
8614
8615 case KEY_waitpid:
a0d0e21e 8616 LOP(OP_WAITPID,XTERM);
79072805
LW
8617
8618 case KEY_wantarray:
8619 FUN0(OP_WANTARRAY);
8620
8621 case KEY_write:
9d116dd7
JH
8622#ifdef EBCDIC
8623 {
df3728a2
JH
8624 char ctl_l[2];
8625 ctl_l[0] = toCTRL('L');
8626 ctl_l[1] = '\0';
fafc274c 8627 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8628 }
8629#else
fafc274c
NC
8630 /* Make sure $^L is defined */
8631 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8632#endif
79072805
LW
8633 UNI(OP_ENTERWRITE);
8634
8635 case KEY_x:
78cdf107
Z
8636 if (PL_expect == XOPERATOR) {
8637 if (*s == '=' && !PL_lex_allbrackets &&
8638 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8639 return REPORT(0);
79072805 8640 Mop(OP_REPEAT);
78cdf107 8641 }
79072805
LW
8642 check_uni();
8643 goto just_a_word;
8644
a0d0e21e 8645 case KEY_xor:
78cdf107
Z
8646 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8647 return REPORT(0);
6154021b 8648 pl_yylval.ival = OP_XOR;
a0d0e21e 8649 OPERATOR(OROP);
79072805 8650 }
49dc05e3 8651 }}
79072805 8652}
bf4acbe4
GS
8653#ifdef __SC__
8654#pragma segment Main
8655#endif
79072805 8656
3f33d153
FC
8657static int
8658S_pending_ident(pTHX)
8eceec63 8659{
97aff369 8660 dVAR;
bbd11bfc 8661 PADOFFSET tmp = 0;
3f33d153 8662 const char pit = (char)pl_yylval.ival;
9bde8eb0
NC
8663 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8664 /* All routes through this function want to know if there is a colon. */
c099d646 8665 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63 8666
3f33d153
FC
8667 DEBUG_T({ PerlIO_printf(Perl_debug_log,
8668 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8669
8670 /* if we're in a my(), we can't allow dynamics here.
8671 $foo'bar has already been turned into $foo::bar, so
8672 just check for colons.
8673
8674 if it's a legal name, the OP is a PADANY.
8675 */
8676 if (PL_in_my) {
8677 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8678 if (has_colon)
4bca4ee0 8679 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8eceec63 8680 "variable %s in \"our\"",
4bca4ee0 8681 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
bc9b26ca 8682 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8683 }
8684 else {
9bde8eb0 8685 if (has_colon)
58576270
BF
8686 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8687 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8688 UTF ? SVf_UTF8 : 0);
8eceec63 8689
3f33d153
FC
8690 pl_yylval.opval = newOP(OP_PADANY, 0);
8691 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
bc9b26ca 8692 UTF ? SVf_UTF8 : 0);
3f33d153 8693 return PRIVATEREF;
8eceec63
SC
8694 }
8695 }
8696
8697 /*
8698 build the ops for accesses to a my() variable.
8eceec63
SC
8699 */
8700
9bde8eb0 8701 if (!has_colon) {
8716503d 8702 if (!PL_in_my)
bc9b26ca
BF
8703 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8704 UTF ? SVf_UTF8 : 0);
8716503d 8705 if (tmp != NOT_IN_PAD) {
8eceec63 8706 /* might be an "our" variable" */
00b1698f 8707 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8708 /* build ops for a bareword */
b64e5050
AL
8709 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8710 HEK * const stashname = HvNAME_HEK(stash);
8711 SV * const sym = newSVhek(stashname);
396482e1 8712 sv_catpvs(sym, "::");
2a33114a 8713 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
3f33d153
FC
8714 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8715 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
8716 if (pit != '&')
8717 gv_fetchsv(sym,
8eceec63
SC
8718 (PL_in_eval
8719 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8720 : GV_ADDMULTI
8eceec63
SC
8721 ),
8722 ((PL_tokenbuf[0] == '$') ? SVt_PV
8723 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8724 : SVt_PVHV));
3f33d153 8725 return WORD;
8eceec63
SC
8726 }
8727
3f33d153
FC
8728 pl_yylval.opval = newOP(OP_PADANY, 0);
8729 pl_yylval.opval->op_targ = tmp;
8730 return PRIVATEREF;
8eceec63
SC
8731 }
8732 }
8733
8734 /*
8735 Whine if they've said @foo in a doublequoted string,
8736 and @foo isn't a variable we can find in the symbol
8737 table.
8738 */
d824713b
NC
8739 if (ckWARN(WARN_AMBIGUOUS) &&
8740 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8741 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8742 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8743 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8744 /* DO NOT warn for @- and @+ */
8745 && !( PL_tokenbuf[2] == '\0' &&
8746 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8747 )
8eceec63
SC
8748 {
8749 /* Downgraded from fatal to warning 20000522 mjd */
d824713b 8750 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
29fb1d0e
BF
8751 "Possible unintended interpolation of %"SVf" in string",
8752 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8753 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8eceec63
SC
8754 }
8755 }
8756
8757 /* build ops for a bareword */
3f33d153
FC
8758 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
8759 newSVpvn_flags(PL_tokenbuf + 1,
0be4d16f
BF
8760 tokenbuf_len - 1,
8761 UTF ? SVf_UTF8 : 0 ));
3f33d153 8762 pl_yylval.opval->op_private = OPpCONST_ENTERED;
4210d3f1
FC
8763 if (pit != '&')
8764 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8765 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8766 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8767 ((PL_tokenbuf[0] == '$') ? SVt_PV
8768 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8769 : SVt_PVHV));
3f33d153 8770 return WORD;
8eceec63
SC
8771}
8772
76e3520e 8773STATIC void
c94115d8 8774S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8775{
97aff369 8776 dVAR;
2f3197b3 8777
7918f24d
NC
8778 PERL_ARGS_ASSERT_CHECKCOMMA;
8779
d008e5eb 8780 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8781 if (ckWARN(WARN_SYNTAX)) {
8782 int level = 1;
26ff0806 8783 const char *w;
d008e5eb
GS
8784 for (w = s+2; *w && level; w++) {
8785 if (*w == '(')
8786 ++level;
8787 else if (*w == ')')
8788 --level;
8789 }
888fea98
NC
8790 while (isSPACE(*w))
8791 ++w;
b1439985
RGS
8792 /* the list of chars below is for end of statements or
8793 * block / parens, boolean operators (&&, ||, //) and branch
8794 * constructs (or, and, if, until, unless, while, err, for).
8795 * Not a very solid hack... */
8796 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8797 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8798 "%s (...) interpreted as function",name);
d008e5eb 8799 }
2f3197b3 8800 }
3280af22 8801 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8802 s++;
a687059c
LW
8803 if (*s == '(')
8804 s++;
3280af22 8805 while (s < PL_bufend && isSPACE(*s))
a687059c 8806 s++;
7e2040f0 8807 if (isIDFIRST_lazy_if(s,UTF)) {
d0fb66e4
BF
8808 const char * const w = s;
8809 s += UTF ? UTF8SKIP(s) : 1;
7e2040f0 8810 while (isALNUM_lazy_if(s,UTF))
d0fb66e4 8811 s += UTF ? UTF8SKIP(s) : 1;
3280af22 8812 while (s < PL_bufend && isSPACE(*s))
a687059c 8813 s++;
e929a76b 8814 if (*s == ',') {
c94115d8 8815 GV* gv;
5458a98a 8816 if (keyword(w, s - w, 0))
e929a76b 8817 return;
c94115d8 8818
2e38bce1 8819 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 8820 if (gv && GvCVu(gv))
abbb3198 8821 return;
cea2e8a9 8822 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8823 }
8824 }
8825}
8826
423cee85
JH
8827/* Either returns sv, or mortalizes sv and returns a new SV*.
8828 Best used as sv=new_constant(..., sv, ...).
8829 If s, pv are NULL, calls subroutine with one argument,
8830 and type is used with error messages only. */
8831
b3ac6de7 8832STATIC SV *
eb0d8d16
NC
8833S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8834 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8835{
27da23d5 8836 dVAR; dSP;
fbb93542 8837 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8838 SV *res;
b3ac6de7
IZ
8839 SV **cvp;
8840 SV *cv, *typesv;
89e33a05 8841 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8842
7918f24d
NC
8843 PERL_ARGS_ASSERT_NEW_CONSTANT;
8844
f8988b41
KW
8845 /* charnames doesn't work well if there have been errors found */
8846 if (PL_error_count > 0 && strEQ(key,"charnames"))
8847 return &PL_sv_undef;
8848
fbb93542
KW
8849 if (!table
8850 || ! (PL_hints & HINT_LOCALIZE_HH)
8851 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8852 || ! SvOK(*cvp))
8853 {
423cee85
JH
8854 SV *msg;
8855
fbb93542
KW
8856 /* Here haven't found what we're looking for. If it is charnames,
8857 * perhaps it needs to be loaded. Try doing that before giving up */
8858 if (strEQ(key,"charnames")) {
8859 Perl_load_module(aTHX_
8860 0,
8861 newSVpvs("_charnames"),
8862 /* version parameter; no need to specify it, as if
8863 * we get too early a version, will fail anyway,
8864 * not being able to find '_charnames' */
8865 NULL,
8866 newSVpvs(":full"),
8867 newSVpvs(":short"),
8868 NULL);
8869 SPAGAIN;
8870 table = GvHV(PL_hintgv);
8871 if (table
8872 && (PL_hints & HINT_LOCALIZE_HH)
8873 && (cvp = hv_fetch(table, key, keylen, FALSE))
8874 && SvOK(*cvp))
8875 {
8876 goto now_ok;
8877 }
8878 }
8879 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8880 msg = Perl_newSVpvf(aTHX_
8881 "Constant(%s) unknown", (type ? type: "undef"));
8882 }
8883 else {
8884 why1 = "$^H{";
8885 why2 = key;
8886 why3 = "} is not defined";
423cee85 8887 report:
4e553d73 8888 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8889 (type ? type: "undef"), why1, why2, why3);
fbb93542 8890 }
95a20fc0 8891 yyerror(SvPVX_const(msg));
423cee85
JH
8892 SvREFCNT_dec(msg);
8893 return sv;
8894 }
fbb93542 8895now_ok:
b3ac6de7
IZ
8896 sv_2mortal(sv); /* Parent created it permanently */
8897 cv = *cvp;
423cee85 8898 if (!pv && s)
59cd0e26 8899 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8900 if (type && pv)
59cd0e26 8901 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8902 else
423cee85 8903 typesv = &PL_sv_undef;
4e553d73 8904
e788e7d3 8905 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8906 ENTER ;
8907 SAVETMPS;
4e553d73 8908
423cee85 8909 PUSHMARK(SP) ;
a5845cb7 8910 EXTEND(sp, 3);
423cee85
JH
8911 if (pv)
8912 PUSHs(pv);
b3ac6de7 8913 PUSHs(sv);
423cee85
JH
8914 if (pv)
8915 PUSHs(typesv);
b3ac6de7 8916 PUTBACK;
423cee85 8917 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8918
423cee85 8919 SPAGAIN ;
4e553d73 8920
423cee85 8921 /* Check the eval first */
9b0e499b 8922 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8923 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8924 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8925 (void)POPs;
b37c2d43 8926 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8927 }
8928 else {
8929 res = POPs;
b37c2d43 8930 SvREFCNT_inc_simple_void(res);
423cee85 8931 }
4e553d73 8932
423cee85
JH
8933 PUTBACK ;
8934 FREETMPS ;
8935 LEAVE ;
b3ac6de7 8936 POPSTACK;
4e553d73 8937
b3ac6de7 8938 if (!SvOK(res)) {
423cee85
JH
8939 why1 = "Call to &{$^H{";
8940 why2 = key;
f0af216f 8941 why3 = "}} did not return a defined value";
423cee85
JH
8942 sv = res;
8943 goto report;
9b0e499b 8944 }
423cee85 8945
9b0e499b 8946 return res;
b3ac6de7 8947}
4e553d73 8948
d0a148a6
NC
8949/* Returns a NUL terminated string, with the length of the string written to
8950 *slp
8951 */
76e3520e 8952STATIC char *
cea2e8a9 8953S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8954{
97aff369 8955 dVAR;
eb578fdb
KW
8956 char *d = dest;
8957 char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8958
8959 PERL_ARGS_ASSERT_SCAN_WORD;
8960
463ee0b2 8961 for (;;) {
8903cb82 8962 if (d >= e)
cea2e8a9 8963 Perl_croak(aTHX_ ident_too_long);
5db1eb8d 8964 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
463ee0b2 8965 *d++ = *s++;
c35e046a 8966 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8967 *d++ = ':';
8968 *d++ = ':';
8969 s++;
8970 }
c35e046a 8971 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8972 *d++ = *s++;
8973 *d++ = *s++;
8974 }
fd400ab9 8975 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8976 char *t = s + UTF8SKIP(s);
c35e046a 8977 size_t len;
fd400ab9 8978 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8979 t += UTF8SKIP(t);
c35e046a
AL
8980 len = t - s;
8981 if (d + len > e)
cea2e8a9 8982 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8983 Copy(s, d, len, char);
8984 d += len;
a0ed51b3
LW
8985 s = t;
8986 }
463ee0b2
LW
8987 else {
8988 *d = '\0';
8989 *slp = d - dest;
8990 return s;
e929a76b 8991 }
378cc40b
LW
8992 }
8993}
8994
76e3520e 8995STATIC char *
f54cb97a 8996S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8997{
97aff369 8998 dVAR;
6136c704 8999 char *bracket = NULL;
748a9306 9000 char funny = *s++;
eb578fdb
KW
9001 char *d = dest;
9002 char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 9003
7918f24d
NC
9004 PERL_ARGS_ASSERT_SCAN_IDENT;
9005
a0d0e21e 9006 if (isSPACE(*s))
29595ff2 9007 s = PEEKSPACE(s);
de3bb511 9008 if (isDIGIT(*s)) {
8903cb82 9009 while (isDIGIT(*s)) {
9010 if (d >= e)
cea2e8a9 9011 Perl_croak(aTHX_ ident_too_long);
378cc40b 9012 *d++ = *s++;
8903cb82 9013 }
378cc40b
LW
9014 }
9015 else {
463ee0b2 9016 for (;;) {
8903cb82 9017 if (d >= e)
cea2e8a9 9018 Perl_croak(aTHX_ ident_too_long);
834a4ddd 9019 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 9020 *d++ = *s++;
7e2040f0 9021 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
9022 *d++ = ':';
9023 *d++ = ':';
9024 s++;
9025 }
a0d0e21e 9026 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
9027 *d++ = *s++;
9028 *d++ = *s++;
9029 }
fd400ab9 9030 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 9031 char *t = s + UTF8SKIP(s);
fd400ab9 9032 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
9033 t += UTF8SKIP(t);
9034 if (d + (t - s) > e)
cea2e8a9 9035 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
9036 Copy(s, d, t - s, char);
9037 d += t - s;
9038 s = t;
9039 }
463ee0b2
LW
9040 else
9041 break;
9042 }
378cc40b
LW
9043 }
9044 *d = '\0';
9045 d = dest;
79072805 9046 if (*d) {
3280af22
NIS
9047 if (PL_lex_state != LEX_NORMAL)
9048 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 9049 return s;
378cc40b 9050 }
748a9306 9051 if (*s == '$' && s[1] &&
3792a11b 9052 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 9053 {
4810e5ec 9054 return s;
5cd24f17 9055 }
79072805
LW
9056 if (*s == '{') {
9057 bracket = s;
9058 s++;
9059 }
204e6232
BF
9060 if (s < send) {
9061 if (UTF) {
9062 const STRLEN skip = UTF8SKIP(s);
9063 STRLEN i;
9064 d[skip] = '\0';
9065 for ( i = 0; i < skip; i++ )
9066 d[i] = *s++;
9067 }
9068 else {
9069 *d = *s++;
9070 d[1] = '\0';
9071 }
9072 }
2b92dfce 9073 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 9074 *d = toCTRL(*s);
9075 s++;
de3bb511 9076 }
fbdd83da
DIM
9077 else if (ck_uni && !bracket)
9078 check_uni();
79072805 9079 if (bracket) {
748a9306 9080 if (isSPACE(s[-1])) {
fa83b5b6 9081 while (s < send) {
f54cb97a 9082 const char ch = *s++;
bf4acbe4 9083 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 9084 *d = ch;
9085 break;
9086 }
9087 }
748a9306 9088 }
7e2040f0 9089 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 9090 d += UTF8SKIP(d);
a0ed51b3 9091 if (UTF) {
6136c704
AL
9092 char *end = s;
9093 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9094 end += UTF8SKIP(end);
9095 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9096 end += UTF8SKIP(end);
a0ed51b3 9097 }
6136c704
AL
9098 Copy(s, d, end - s, char);
9099 d += end - s;
9100 s = end;
a0ed51b3
LW
9101 }
9102 else {
2b92dfce 9103 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 9104 *d++ = *s++;
2b92dfce 9105 if (d >= e)
cea2e8a9 9106 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 9107 }
79072805 9108 *d = '\0';
c35e046a
AL
9109 while (s < send && SPACE_OR_TAB(*s))
9110 s++;
ff68c719 9111 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 9112 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
9113 const char * const brack =
9114 (const char *)
9115 ((*s == '[') ? "[...]" : "{...}");
e850844c 9116 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 9117 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 9118 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
9119 funny, dest, brack, funny, dest, brack);
9120 }
79072805 9121 bracket++;
a0be28da 9122 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 9123 PL_lex_allbrackets++;
79072805
LW
9124 return s;
9125 }
4e553d73
NIS
9126 }
9127 /* Handle extended ${^Foo} variables
2b92dfce
GS
9128 * 1999-02-27 mjd-perl-patch@plover.com */
9129 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9130 && isALNUM(*s))
9131 {
9132 d++;
9133 while (isALNUM(*s) && d < e) {
9134 *d++ = *s++;
9135 }
9136 if (d >= e)
cea2e8a9 9137 Perl_croak(aTHX_ ident_too_long);
2b92dfce 9138 *d = '\0';
79072805
LW
9139 }
9140 if (*s == '}') {
9141 s++;
7df0d042 9142 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 9143 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
9144 PL_expect = XREF;
9145 }
d008e5eb 9146 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 9147 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241 9148 (keyword(dest, d - dest, 0)
5c66c3dd 9149 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
d008e5eb 9150 {
5c66c3dd
BF
9151 SV *tmp = newSVpvn_flags( dest, d - dest,
9152 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
c35e046a
AL
9153 if (funny == '#')
9154 funny = '@';
9014280d 9155 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5c66c3dd
BF
9156 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9157 funny, tmp, funny, tmp);
d008e5eb
GS
9158 }
9159 }
79072805
LW
9160 }
9161 else {
9162 s = bracket; /* let the parser handle it */
93a17b20 9163 *dest = '\0';
79072805
LW
9164 }
9165 }
3280af22
NIS
9166 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9167 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
9168 return s;
9169}
9170
858a358b 9171static bool
3955e1a9 9172S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
9173
9174 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9175 * the parse starting at 's', based on the subset that are valid in this
9176 * context input to this routine in 'valid_flags'. Advances s. Returns
96f3bfda
KW
9177 * TRUE if the input should be treated as a valid flag, so the next char
9178 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9179 * first call on the current regex. This routine will set it to any
9180 * charset modifier found. The caller shouldn't change it. This way,
9181 * another charset modifier encountered in the parse can be detected as an
9182 * error, as we have decided to allow only one */
858a358b
KW
9183
9184 const char c = **s;
84159251 9185 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
94b03d7d 9186
84159251
BF
9187 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9188 if (isALNUM_lazy_if(*s, UTF)) {
4f8dbb2d 9189 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
84159251
BF
9190 UTF ? SVf_UTF8 : 0);
9191 (*s) += charlen;
96f3bfda
KW
9192 /* Pretend that it worked, so will continue processing before
9193 * dieing */
0da72d5e 9194 return TRUE;
858a358b
KW
9195 }
9196 return FALSE;
9197 }
9198
9199 switch (c) {
94b03d7d 9200
858a358b
KW
9201 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9202 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9203 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9204 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9205 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9206 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d 9207 case LOCALE_PAT_MOD:
3955e1a9
KW
9208 if (*charset) {
9209 goto multiple_charsets;
9210 }
94b03d7d 9211 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9212 *charset = c;
94b03d7d
KW
9213 break;
9214 case UNICODE_PAT_MOD:
3955e1a9
KW
9215 if (*charset) {
9216 goto multiple_charsets;
9217 }
94b03d7d 9218 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9219 *charset = c;
94b03d7d
KW
9220 break;
9221 case ASCII_RESTRICT_PAT_MOD:
ff3f26d2 9222 if (! *charset) {
94b03d7d
KW
9223 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9224 }
ff3f26d2
KW
9225 else {
9226
9227 /* Error if previous modifier wasn't an 'a', but if it was, see
9228 * if, and accept, a second occurrence (only) */
9229 if (*charset != 'a'
9230 || get_regex_charset(*pmfl)
9231 != REGEX_ASCII_RESTRICTED_CHARSET)
9232 {
9233 goto multiple_charsets;
9234 }
9235 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9236 }
9237 *charset = c;
94b03d7d
KW
9238 break;
9239 case DEPENDS_PAT_MOD:
3955e1a9
KW
9240 if (*charset) {
9241 goto multiple_charsets;
9242 }
94b03d7d 9243 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9244 *charset = c;
94b03d7d 9245 break;
879d0c72 9246 }
94b03d7d 9247
858a358b
KW
9248 (*s)++;
9249 return TRUE;
94b03d7d 9250
3955e1a9
KW
9251 multiple_charsets:
9252 if (*charset != c) {
9253 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9254 }
ff3f26d2
KW
9255 else if (c == 'a') {
9256 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9257 }
3955e1a9
KW
9258 else {
9259 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9260 }
9261
9262 /* Pretend that it worked, so will continue processing before dieing */
9263 (*s)++;
9264 return TRUE;
879d0c72
NC
9265}
9266
76e3520e 9267STATIC char *
cea2e8a9 9268S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9269{
97aff369 9270 dVAR;
79072805 9271 PMOP *pm;
d24ca0c5 9272 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
10edeb5d 9273 const char * const valid_flags =
a20207d7 9274 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9275 char charset = '\0'; /* character set modifier */
5db06880
NC
9276#ifdef PERL_MAD
9277 char *modstart;
9278#endif
9279
7918f24d 9280 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9281
d24ca0c5
DM
9282 /* this was only needed for the initial scan_str; set it to false
9283 * so that any (?{}) code blocks etc are parsed normally */
9284 PL_reg_state.re_reparsing = FALSE;
25c09cbf 9285 if (!s) {
6136c704 9286 const char * const delimiter = skipspace(start);
10edeb5d
JH
9287 Perl_croak(aTHX_
9288 (const char *)
9289 (*delimiter == '?'
9290 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9291 : "Search pattern not terminated" ));
25c09cbf 9292 }
bbce6d69 9293
8782bef2 9294 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9295 if (PL_multi_open == '?') {
9296 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9297 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9298
9299 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9300 allows us to restrict the list needed by reset to just the ??
9301 matches. */
9302 assert(type != OP_TRANS);
9303 if (PL_curstash) {
daba3364 9304 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9305 U32 elements;
9306 if (!mg) {
daba3364 9307 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9308 0);
9309 }
9310 elements = mg->mg_len / sizeof(PMOP**);
9311 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9312 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9313 mg->mg_len = elements * sizeof(PMOP**);
9314 PmopSTASH_set(pm,PL_curstash);
9315 }
9316 }
5db06880
NC
9317#ifdef PERL_MAD
9318 modstart = s;
9319#endif
d63c20f2
DM
9320
9321 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9322 * anon CV. False positives like qr/[(?{]/ are harmless */
9323
9324 if (type == OP_QR) {
6f635923
DM
9325 STRLEN len;
9326 char *e, *p = SvPV(PL_lex_stuff, len);
9327 e = p + len;
9328 for (; p < e; p++) {
d63c20f2
DM
9329 if (p[0] == '(' && p[1] == '?'
9330 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9331 {
9332 pm->op_pmflags |= PMf_HAS_CV;
9333 break;
9334 }
9335 }
6f635923 9336 pm->op_pmflags |= PMf_IS_QR;
d63c20f2
DM
9337 }
9338
3955e1a9 9339 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9340#ifdef PERL_MAD
9341 if (PL_madskills && modstart != s) {
9342 SV* tmptoken = newSVpvn(modstart, s - modstart);
9343 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9344 }
9345#endif
4ac733c9 9346 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9347 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9348 {
a2a5de95
NC
9349 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9350 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9351 }
9352
3280af22 9353 PL_lex_op = (OP*)pm;
6154021b 9354 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9355 return s;
9356}
9357
76e3520e 9358STATIC char *
cea2e8a9 9359S_scan_subst(pTHX_ char *start)
79072805 9360{
27da23d5 9361 dVAR;
22594288 9362 char *s;
eb578fdb 9363 PMOP *pm;
4fdae800 9364 I32 first_start;
79072805 9365 I32 es = 0;
3955e1a9 9366 char charset = '\0'; /* character set modifier */
5db06880
NC
9367#ifdef PERL_MAD
9368 char *modstart;
9369#endif
79072805 9370
7918f24d
NC
9371 PERL_ARGS_ASSERT_SCAN_SUBST;
9372
6154021b 9373 pl_yylval.ival = OP_NULL;
79072805 9374
d24ca0c5 9375 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 9376
37fd879b 9377 if (!s)
cea2e8a9 9378 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9379
3280af22 9380 if (s[-1] == PL_multi_open)
79072805 9381 s--;
5db06880
NC
9382#ifdef PERL_MAD
9383 if (PL_madskills) {
cd81e915
NC
9384 CURMAD('q', PL_thisopen);
9385 CURMAD('_', PL_thiswhite);
9386 CURMAD('E', PL_thisstuff);
9387 CURMAD('Q', PL_thisclose);
9388 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9389 }
9390#endif
79072805 9391
3280af22 9392 first_start = PL_multi_start;
d24ca0c5 9393 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9394 if (!s) {
37fd879b 9395 if (PL_lex_stuff) {
3280af22 9396 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9397 PL_lex_stuff = NULL;
37fd879b 9398 }
cea2e8a9 9399 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9400 }
3280af22 9401 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9402
79072805 9403 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9404
9405#ifdef PERL_MAD
9406 if (PL_madskills) {
cd81e915
NC
9407 CURMAD('z', PL_thisopen);
9408 CURMAD('R', PL_thisstuff);
9409 CURMAD('Z', PL_thisclose);
5db06880
NC
9410 }
9411 modstart = s;
9412#endif
9413
48c036b1 9414 while (*s) {
a20207d7 9415 if (*s == EXEC_PAT_MOD) {
a687059c 9416 s++;
2f3197b3 9417 es++;
a687059c 9418 }
3955e1a9
KW
9419 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9420 {
48c036b1 9421 break;
aa78b661 9422 }
378cc40b 9423 }
79072805 9424
5db06880
NC
9425#ifdef PERL_MAD
9426 if (PL_madskills) {
9427 if (modstart != s)
9428 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9429 append_madprops(PL_thismad, (OP*)pm, 0);
9430 PL_thismad = 0;
5db06880
NC
9431 }
9432#endif
a2a5de95
NC
9433 if ((pm->op_pmflags & PMf_CONTINUE)) {
9434 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9435 }
9436
79072805 9437 if (es) {
6136c704
AL
9438 SV * const repl = newSVpvs("");
9439
0244c3a4 9440 PL_multi_end = 0;
79072805 9441 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9442 while (es-- > 0) {
9443 if (es)
9444 sv_catpvs(repl, "eval ");
9445 else
9446 sv_catpvs(repl, "do ");
9447 }
6f43d98f 9448 sv_catpvs(repl, "{");
7cc34111 9449 sv_catsv(repl, PL_sublex_info.repl);
9badc361 9450 sv_catpvs(repl, "}");
25da4f38 9451 SvEVALED_on(repl);
7cc34111
FC
9452 SvREFCNT_dec(PL_sublex_info.repl);
9453 PL_sublex_info.repl = repl;
378cc40b 9454 }
79072805 9455
3280af22 9456 PL_lex_op = (OP*)pm;
6154021b 9457 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9458 return s;
9459}
9460
76e3520e 9461STATIC char *
cea2e8a9 9462S_scan_trans(pTHX_ char *start)
378cc40b 9463{
97aff369 9464 dVAR;
eb578fdb 9465 char* s;
11343788 9466 OP *o;
b84c11c8
NC
9467 U8 squash;
9468 U8 del;
9469 U8 complement;
bb16bae8 9470 bool nondestruct = 0;
5db06880
NC
9471#ifdef PERL_MAD
9472 char *modstart;
9473#endif
79072805 9474
7918f24d
NC
9475 PERL_ARGS_ASSERT_SCAN_TRANS;
9476
6154021b 9477 pl_yylval.ival = OP_NULL;
79072805 9478
d24ca0c5 9479 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
37fd879b 9480 if (!s)
cea2e8a9 9481 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9482
3280af22 9483 if (s[-1] == PL_multi_open)
2f3197b3 9484 s--;
5db06880
NC
9485#ifdef PERL_MAD
9486 if (PL_madskills) {
cd81e915
NC
9487 CURMAD('q', PL_thisopen);
9488 CURMAD('_', PL_thiswhite);
9489 CURMAD('E', PL_thisstuff);
9490 CURMAD('Q', PL_thisclose);
9491 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9492 }
9493#endif
2f3197b3 9494
d24ca0c5 9495 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9496 if (!s) {
37fd879b 9497 if (PL_lex_stuff) {
3280af22 9498 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9499 PL_lex_stuff = NULL;
37fd879b 9500 }
cea2e8a9 9501 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9502 }
5db06880 9503 if (PL_madskills) {
cd81e915
NC
9504 CURMAD('z', PL_thisopen);
9505 CURMAD('R', PL_thisstuff);
9506 CURMAD('Z', PL_thisclose);
5db06880 9507 }
79072805 9508
a0ed51b3 9509 complement = del = squash = 0;
5db06880
NC
9510#ifdef PERL_MAD
9511 modstart = s;
9512#endif
7a1e2023
NC
9513 while (1) {
9514 switch (*s) {
9515 case 'c':
79072805 9516 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9517 break;
9518 case 'd':
a0ed51b3 9519 del = OPpTRANS_DELETE;
7a1e2023
NC
9520 break;
9521 case 's':
79072805 9522 squash = OPpTRANS_SQUASH;
7a1e2023 9523 break;
bb16bae8
FC
9524 case 'r':
9525 nondestruct = 1;
9526 break;
7a1e2023
NC
9527 default:
9528 goto no_more;
9529 }
395c3793
LW
9530 s++;
9531 }
7a1e2023 9532 no_more:
8973db79 9533
9100eeb1 9534 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
59f00321
RGS
9535 o->op_private &= ~OPpTRANS_ALL;
9536 o->op_private |= del|squash|complement|
7948272d 9537 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7cc34111 9538 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
79072805 9539
3280af22 9540 PL_lex_op = o;
bb16bae8 9541 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9542
9543#ifdef PERL_MAD
9544 if (PL_madskills) {
9545 if (modstart != s)
9546 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9547 append_madprops(PL_thismad, o, 0);
9548 PL_thismad = 0;
5db06880
NC
9549 }
9550#endif
9551
79072805
LW
9552 return s;
9553}
9554
5097bf9b
FC
9555/* scan_heredoc
9556 Takes a pointer to the first < in <<FOO.
9557 Returns a pointer to the byte following <<FOO.
9558
9559 This function scans a heredoc, which involves different methods
9560 depending on whether we are in a string eval, quoted construct, etc.
9561 This is because PL_linestr could containing a single line of input, or
9562 a whole string being evalled, or the contents of the current quote-
9563 like operator.
9564
19bbc0d7
FC
9565 The two basic methods are:
9566 - Steal lines from the input stream
9567 - Scan the heredoc in PL_linestr and remove it therefrom
9568
9569 In a file scope or filtered eval, the first method is used; in a
9570 string eval, the second.
9571
9572 In a quote-like operator, we have to choose between the two,
9573 depending on where we can find a newline. We peek into outer lex-
9574 ing scopes until we find one with a newline in it. If we reach the
9575 outermost lexing scope and it is a file, we use the stream method.
9576 Otherwise it is treated as an eval.
5097bf9b
FC
9577*/
9578
76e3520e 9579STATIC char *
cea2e8a9 9580S_scan_heredoc(pTHX_ register char *s)
79072805 9581{
97aff369 9582 dVAR;
79072805
LW
9583 I32 op_type = OP_SCALAR;
9584 I32 len;
9585 SV *tmpstr;
9586 char term;
eb578fdb
KW
9587 char *d;
9588 char *e;
4633a7c4 9589 char *peek;
5097bf9b 9590 const bool infile = PL_rsfp || PL_parser->filtered;
78a635de 9591 LEXSHARED *shared = PL_parser->lex_shared;
5db06880
NC
9592#ifdef PERL_MAD
9593 I32 stuffstart = s - SvPVX(PL_linestr);
9594 char *tstart;
9595
cd81e915 9596 PL_realtokenstart = -1;
5db06880 9597#endif
79072805 9598
7918f24d
NC
9599 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9600
79072805 9601 s += 2;
5097bf9b 9602 d = PL_tokenbuf + 1;
3280af22 9603 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
5097bf9b 9604 *PL_tokenbuf = '\n';
c35e046a
AL
9605 peek = s;
9606 while (SPACE_OR_TAB(*peek))
9607 peek++;
3792a11b 9608 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9609 s = peek;
79072805 9610 term = *s++;
3280af22 9611 s = delimcpy(d, e, s, PL_bufend, term, &len);
6f2d7fc9
FC
9612 if (s == PL_bufend)
9613 Perl_croak(aTHX_ "Unterminated delimiter for here document");
fc36a67e 9614 d += len;
6f2d7fc9 9615 s++;
79072805
LW
9616 }
9617 else {
9618 if (*s == '\\')
458391bd 9619 /* <<\FOO is equivalent to <<'FOO' */
79072805
LW
9620 s++, term = '\'';
9621 else
9622 term = '"';
7e2040f0 9623 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9624 deprecate("bare << to mean <<\"\"");
7e2040f0 9625 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9626 if (d < e)
9627 *d++ = *s;
9628 }
9629 }
3280af22 9630 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9631 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9632 *d++ = '\n';
9633 *d = '\0';
3280af22 9634 len = d - PL_tokenbuf;
5db06880
NC
9635
9636#ifdef PERL_MAD
9637 if (PL_madskills) {
5097bf9b
FC
9638 tstart = PL_tokenbuf + 1;
9639 PL_thisclose = newSVpvn(tstart, len - 1);
5db06880 9640 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9641 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9642 stuffstart = s - SvPVX(PL_linestr);
9643 }
9644#endif
6a27c188 9645#ifndef PERL_STRICT_CR
f63a84b2
LW
9646 d = strchr(s, '\r');
9647 if (d) {
b464bac0 9648 char * const olds = s;
f63a84b2 9649 s = d;
3280af22 9650 while (s < PL_bufend) {
f63a84b2
LW
9651 if (*s == '\r') {
9652 *d++ = '\n';
9653 if (*++s == '\n')
9654 s++;
9655 }
9656 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9657 *d++ = *s++;
9658 s++;
9659 }
9660 else
9661 *d++ = *s++;
9662 }
9663 *d = '\0';
3280af22 9664 PL_bufend = d;
95a20fc0 9665 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9666 s = olds;
9667 }
9668#endif
5db06880
NC
9669#ifdef PERL_MAD
9670 if (PL_madskills) {
9671 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9672 if (PL_thisstuff)
9673 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9674 else
cd81e915 9675 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880 9676 }
748a9306 9677
5db06880 9678 stuffstart = s - SvPVX(PL_linestr);
5db06880
NC
9679#endif
9680
7d0a29fe
NC
9681 tmpstr = newSV_type(SVt_PVIV);
9682 SvGROW(tmpstr, 80);
748a9306 9683 if (term == '\'') {
79072805 9684 op_type = OP_CONST;
45977657 9685 SvIV_set(tmpstr, -1);
748a9306
LW
9686 }
9687 else if (term == '`') {
79072805 9688 op_type = OP_BACKTICK;
45977657 9689 SvIV_set(tmpstr, '\\');
748a9306 9690 }
79072805 9691
78a635de 9692 PL_multi_start = CopLINE(PL_curcop) + 1;
3280af22 9693 PL_multi_open = PL_multi_close = '<';
19bbc0d7 9694 /* inside a string eval or quote-like operator */
4efe39d2 9695 if (!infile || PL_lex_inwhat) {
60f40a38 9696 SV *linestr;
3526bd3e 9697 char *bufend;
074b1c59 9698 char * const olds = s;
d37427bc 9699 PERL_CONTEXT * const cx = &cxstack[cxstack_ix];
19bbc0d7
FC
9700 /* These two fields are not set until an inner lexing scope is
9701 entered. But we need them set here. */
4efe39d2
FC
9702 shared->ls_bufptr = s;
9703 shared->ls_linestr = PL_linestr;
9704 if (PL_lex_inwhat)
9705 /* Look for a newline. If the current buffer does not have one,
9706 peek into the line buffer of the parent lexing scope, going
9707 up as many levels as necessary to find one with a newline
9708 after bufptr.
9709 */
9710 while (!(s = (char *)memchr(
9711 (void *)shared->ls_bufptr, '\n',
9712 SvEND(shared->ls_linestr)-shared->ls_bufptr
9713 ))) {
60f40a38 9714 shared = shared->ls_prev;
f68f7dc1
FC
9715 /* shared is only null if we have gone beyond the outermost
9716 lexing scope. In a file, we will have broken out of the
9717 loop in the previous iteration. In an eval, the string buf-
9718 fer ends with "\n;", so the while condition below will have
9719 evaluated to false. So shared can never be null. */
9720 assert(shared);
60f40a38
FC
9721 /* A LEXSHARED struct with a null ls_prev pointer is the outer-
9722 most lexing scope. In a file, shared->ls_linestr at that
9723 level is just one line, so there is no body to steal. */
9724 if (infile && !shared->ls_prev) {
074b1c59 9725 s = olds;
99bd9d90
FC
9726 goto streaming;
9727 }
4efe39d2
FC
9728 }
9729 else { /* eval */
9730 s = (char*)memchr((void*)s, '\n', PL_bufend - s);
9731 assert(s);
9732 }
60f40a38
FC
9733 linestr = shared->ls_linestr;
9734 bufend = SvEND(linestr);
0244c3a4
GS
9735 d = s;
9736 while (s < bufend &&
5bd13da3 9737 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
0244c3a4 9738 if (*s++ == '\n')
78a635de 9739 ++shared->herelines;
0244c3a4
GS
9740 }
9741 if (s >= bufend) {
932d0cf1 9742 goto interminable;
0244c3a4 9743 }
3328ab5a 9744 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9745#ifdef PERL_MAD
9746 if (PL_madskills) {
cd81e915
NC
9747 if (PL_thisstuff)
9748 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9749 else
cd81e915 9750 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9751 stuffstart = s - SvPVX(PL_linestr);
9752 }
9753#endif
79072805 9754 s += len - 1;
d794b522 9755 /* the preceding stmt passes a newline */
78a635de 9756 shared->herelines++;
49d8d3a1 9757
db444266
FC
9758 /* s now points to the newline after the heredoc terminator.
9759 d points to the newline before the body of the heredoc.
9760 */
19bbc0d7
FC
9761
9762 /* We are going to modify linestr in place here, so set
9763 aside copies of the string if necessary for re-evals or
9764 (caller $n)[6]. */
a91428a4 9765 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
3328ab5a
FC
9766 check shared->re_eval_str. */
9767 if (shared->re_eval_start || shared->re_eval_str) {
db444266 9768 /* Set aside the rest of the regexp */
3328ab5a
FC
9769 if (!shared->re_eval_str)
9770 shared->re_eval_str =
9771 newSVpvn(shared->re_eval_start,
4efe39d2 9772 bufend - shared->re_eval_start);
3328ab5a 9773 shared->re_eval_start -= s-d;
db444266 9774 }
d37427bc 9775 if (CxTYPE(cx) == CXt_EVAL && CxOLD_OP_TYPE(cx) == OP_ENTEREVAL
4efe39d2
FC
9776 && cx->blk_eval.cur_text == linestr) {
9777 cx->blk_eval.cur_text = newSVsv(linestr);
d37427bc
FC
9778 SvSCREAM_on(cx->blk_eval.cur_text);
9779 }
db444266 9780 /* Copy everything from s onwards back to d. */
4efe39d2
FC
9781 Move(s,d,bufend-s + 1,char);
9782 SvCUR_set(linestr, SvCUR(linestr) - (s-d));
19bbc0d7
FC
9783 /* Setting PL_bufend only applies when we have not dug deeper
9784 into other scopes, because sublex_done sets PL_bufend to
9785 SvEND(PL_linestr). */
4efe39d2 9786 if (shared == PL_parser->lex_shared) PL_bufend = SvEND(linestr);
db444266 9787 s = olds;
79072805
LW
9788 }
9789 else
a7922135 9790 {
3328ab5a 9791 SV *linestr_save;
a7922135
FC
9792 streaming:
9793 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
9794 term = PL_tokenbuf[1];
9795 len--;
3328ab5a 9796 linestr_save = PL_linestr; /* must restore this afterwards */
074b1c59 9797 d = s; /* and this */
3328ab5a 9798 PL_linestr = newSVpvs("");
074b1c59
FC
9799 PL_bufend = SvPVX(PL_linestr);
9800 while (1) {
5db06880
NC
9801#ifdef PERL_MAD
9802 if (PL_madskills) {
9803 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9804 if (PL_thisstuff)
9805 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9806 else
cd81e915 9807 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9808 }
9809#endif
074b1c59 9810 PL_bufptr = PL_bufend;
d794b522 9811 CopLINE_set(PL_curcop,
78a635de 9812 PL_multi_start + shared->herelines);
112d1284
FC
9813 if (!lex_next_chunk(LEX_NO_TERM)
9814 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
3328ab5a 9815 SvREFCNT_dec(linestr_save);
932d0cf1 9816 goto interminable;
79072805 9817 }
78a635de 9818 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
112d1284
FC
9819 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9820 lex_grow_linestr(SvCUR(PL_linestr) + 2);
9821 sv_catpvs(PL_linestr, "\n\0");
9822 }
f0e67a1d 9823 s = PL_bufptr;
5db06880
NC
9824#ifdef PERL_MAD
9825 stuffstart = s - SvPVX(PL_linestr);
9826#endif
78a635de 9827 shared->herelines++;
bd61b366 9828 PL_last_lop = PL_last_uni = NULL;
6a27c188 9829#ifndef PERL_STRICT_CR
3280af22 9830 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9831 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9832 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9833 {
3280af22
NIS
9834 PL_bufend[-2] = '\n';
9835 PL_bufend--;
95a20fc0 9836 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9837 }
3280af22
NIS
9838 else if (PL_bufend[-1] == '\r')
9839 PL_bufend[-1] = '\n';
f63a84b2 9840 }
3280af22
NIS
9841 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9842 PL_bufend[-1] = '\n';
f63a84b2 9843#endif
5097bf9b 9844 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
3328ab5a
FC
9845 SvREFCNT_dec(PL_linestr);
9846 PL_linestr = linestr_save;
9847 PL_linestart = SvPVX(linestr_save);
3280af22 9848 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
3328ab5a 9849 s = d;
074b1c59 9850 break;
79072805
LW
9851 }
9852 else {
3280af22 9853 sv_catsv(tmpstr,PL_linestr);
395c3793 9854 }
a7922135 9855 }
395c3793 9856 }
57843af0 9857 PL_multi_end = CopLINE(PL_curcop);
79072805 9858 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9859 SvPV_shrink_to_cur(tmpstr);
79072805 9860 }
2f31ce75 9861 if (!IN_BYTES) {
95a20fc0 9862 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9863 SvUTF8_on(tmpstr);
9864 else if (PL_encoding)
9865 sv_recode_to_utf8(tmpstr, PL_encoding);
9866 }
3280af22 9867 PL_lex_stuff = tmpstr;
6154021b 9868 pl_yylval.ival = op_type;
79072805 9869 return s;
932d0cf1
FC
9870
9871 interminable:
932d0cf1
FC
9872 SvREFCNT_dec(tmpstr);
9873 CopLINE_set(PL_curcop, (line_t)PL_multi_start - 1);
9874 missingterm(PL_tokenbuf + 1);
79072805
LW
9875}
9876
02aa26ce
NT
9877/* scan_inputsymbol
9878 takes: current position in input buffer
9879 returns: new position in input buffer
6154021b 9880 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9881
9882 This code handles:
9883
9884 <> read from ARGV
9885 <FH> read from filehandle
9886 <pkg::FH> read from package qualified filehandle
9887 <pkg'FH> read from package qualified filehandle
9888 <$fh> read from filehandle in $fh
9889 <*.h> filename glob
9890
9891*/
9892
76e3520e 9893STATIC char *
cea2e8a9 9894S_scan_inputsymbol(pTHX_ char *start)
79072805 9895{
97aff369 9896 dVAR;
eb578fdb 9897 char *s = start; /* current position in buffer */
1b420867 9898 char *end;
79072805 9899 I32 len;
6136c704
AL
9900 char *d = PL_tokenbuf; /* start of temp holding space */
9901 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9902
7918f24d
NC
9903 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9904
1b420867
GS
9905 end = strchr(s, '\n');
9906 if (!end)
9907 end = PL_bufend;
9908 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9909
9910 /* die if we didn't have space for the contents of the <>,
1b420867 9911 or if it didn't end, or if we see a newline
02aa26ce
NT
9912 */
9913
bb7a0f54 9914 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9915 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9916 if (s >= end)
cea2e8a9 9917 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9918
fc36a67e 9919 s++;
02aa26ce
NT
9920
9921 /* check for <$fh>
9922 Remember, only scalar variables are interpreted as filehandles by
9923 this code. Anything more complex (e.g., <$fh{$num}>) will be
9924 treated as a glob() call.
9925 This code makes use of the fact that except for the $ at the front,
9926 a scalar variable and a filehandle look the same.
9927 */
4633a7c4 9928 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9929
9930 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9931 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 9932 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
9933
9934 /* If we've tried to read what we allow filehandles to look like, and
9935 there's still text left, then it must be a glob() and not a getline.
9936 Use scan_str to pull out the stuff between the <> and treat it
9937 as nothing more than a string.
9938 */
9939
3280af22 9940 if (d - PL_tokenbuf != len) {
6154021b 9941 pl_yylval.ival = OP_GLOB;
d24ca0c5 9942 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 9943 if (!s)
cea2e8a9 9944 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9945 return s;
9946 }
395c3793 9947 else {
9b3023bc 9948 bool readline_overriden = FALSE;
6136c704 9949 GV *gv_readline;
9b3023bc 9950 GV **gvp;
02aa26ce 9951 /* we're in a filehandle read situation */
3280af22 9952 d = PL_tokenbuf;
02aa26ce
NT
9953
9954 /* turn <> into <ARGV> */
79072805 9955 if (!len)
689badd5 9956 Copy("ARGV",d,5,char);
02aa26ce 9957
9b3023bc 9958 /* Check whether readline() is overriden */
fafc274c 9959 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9960 if ((gv_readline
ba979b31 9961 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9962 ||
017a3ce5 9963 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9964 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9965 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9966 readline_overriden = TRUE;
9967
02aa26ce
NT
9968 /* if <$fh>, create the ops to turn the variable into a
9969 filehandle
9970 */
79072805 9971 if (*d == '$') {
02aa26ce
NT
9972 /* try to find it in the pad for this block, otherwise find
9973 add symbol table ops
9974 */
bc9b26ca 9975 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9976 if (tmp != NOT_IN_PAD) {
00b1698f 9977 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9978 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9979 HEK * const stashname = HvNAME_HEK(stash);
9980 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9981 sv_catpvs(sym, "::");
f558d5af
JH
9982 sv_catpv(sym, d+1);
9983 d = SvPVX(sym);
9984 goto intro_sym;
9985 }
9986 else {
6136c704 9987 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9988 o->op_targ = tmp;
9b3023bc
RGS
9989 PL_lex_op = readline_overriden
9990 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9991 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9992 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9993 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9994 }
a0d0e21e
LW
9995 }
9996 else {
f558d5af
JH
9997 GV *gv;
9998 ++d;
9999intro_sym:
10000 gv = gv_fetchpv(d,
10001 (PL_in_eval
10002 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 10003 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 10004 SVt_PV);
9b3023bc
RGS
10005 PL_lex_op = readline_overriden
10006 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10007 op_append_elem(OP_LIST,
9b3023bc
RGS
10008 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
10009 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10010 : (OP*)newUNOP(OP_READLINE, 0,
10011 newUNOP(OP_RV2SV, 0,
10012 newGVOP(OP_GV, 0, gv)));
a0d0e21e 10013 }
7c6fadd6
RGS
10014 if (!readline_overriden)
10015 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
10016 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
10017 pl_yylval.ival = OP_NULL;
79072805 10018 }
02aa26ce
NT
10019
10020 /* If it's none of the above, it must be a literal filehandle
10021 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 10022 else {
25db2ea6 10023 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
10024 PL_lex_op = readline_overriden
10025 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 10026 op_append_elem(OP_LIST,
9b3023bc
RGS
10027 newGVOP(OP_GV, 0, gv),
10028 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
10029 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 10030 pl_yylval.ival = OP_NULL;
79072805
LW
10031 }
10032 }
02aa26ce 10033
79072805
LW
10034 return s;
10035}
10036
02aa26ce
NT
10037
10038/* scan_str
10039 takes: start position in buffer
09bef843
SB
10040 keep_quoted preserve \ on the embedded delimiter(s)
10041 keep_delims preserve the delimiters around the string
d24ca0c5
DM
10042 re_reparse compiling a run-time /(?{})/:
10043 collapse // to /, and skip encoding src
02aa26ce
NT
10044 returns: position to continue reading from buffer
10045 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
10046 updates the read buffer.
10047
10048 This subroutine pulls a string out of the input. It is called for:
10049 q single quotes q(literal text)
10050 ' single quotes 'literal text'
10051 qq double quotes qq(interpolate $here please)
10052 " double quotes "interpolate $here please"
10053 qx backticks qx(/bin/ls -l)
10054 ` backticks `/bin/ls -l`
10055 qw quote words @EXPORT_OK = qw( func() $spam )
10056 m// regexp match m/this/
10057 s/// regexp substitute s/this/that/
10058 tr/// string transliterate tr/this/that/
10059 y/// string transliterate y/this/that/
10060 ($*@) sub prototypes sub foo ($)
09bef843 10061 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
10062 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10063
10064 In most of these cases (all but <>, patterns and transliterate)
10065 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10066 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10067 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10068 calls scan_str().
4e553d73 10069
02aa26ce
NT
10070 It skips whitespace before the string starts, and treats the first
10071 character as the delimiter. If the delimiter is one of ([{< then
10072 the corresponding "close" character )]}> is used as the closing
10073 delimiter. It allows quoting of delimiters, and if the string has
10074 balanced delimiters ([{<>}]) it allows nesting.
10075
37fd879b
HS
10076 On success, the SV with the resulting string is put into lex_stuff or,
10077 if that is already non-NULL, into lex_repl. The second case occurs only
10078 when parsing the RHS of the special constructs s/// and tr/// (y///).
10079 For convenience, the terminating delimiter character is stuffed into
10080 SvIVX of the SV.
02aa26ce
NT
10081*/
10082
76e3520e 10083STATIC char *
d24ca0c5 10084S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
79072805 10085{
97aff369 10086 dVAR;
02aa26ce 10087 SV *sv; /* scalar value: string */
d3fcec1f 10088 const char *tmps; /* temp string, used for delimiter matching */
eb578fdb
KW
10089 char *s = start; /* current position in the buffer */
10090 char term; /* terminating character */
10091 char *to; /* current position in the sv's data */
02aa26ce 10092 I32 brackets = 1; /* bracket nesting level */
89491803 10093 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 10094 I32 termcode; /* terminating char. code */
89ebb4a3 10095 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 10096 STRLEN termlen; /* length of terminating string */
0331ef07 10097 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
10098#ifdef PERL_MAD
10099 int stuffstart;
10100 char *tstart;
10101#endif
02aa26ce 10102
7918f24d
NC
10103 PERL_ARGS_ASSERT_SCAN_STR;
10104
02aa26ce 10105 /* skip space before the delimiter */
29595ff2
NC
10106 if (isSPACE(*s)) {
10107 s = PEEKSPACE(s);
10108 }
02aa26ce 10109
5db06880 10110#ifdef PERL_MAD
cd81e915
NC
10111 if (PL_realtokenstart >= 0) {
10112 stuffstart = PL_realtokenstart;
10113 PL_realtokenstart = -1;
5db06880
NC
10114 }
10115 else
10116 stuffstart = start - SvPVX(PL_linestr);
10117#endif
02aa26ce 10118 /* mark where we are, in case we need to report errors */
79072805 10119 CLINE;
02aa26ce
NT
10120
10121 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 10122 term = *s;
220e2d4e
IH
10123 if (!UTF) {
10124 termcode = termstr[0] = term;
10125 termlen = 1;
10126 }
10127 else {
4b88fb76 10128 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
220e2d4e
IH
10129 Copy(s, termstr, termlen, U8);
10130 if (!UTF8_IS_INVARIANT(term))
10131 has_utf8 = TRUE;
10132 }
b1c7b182 10133
02aa26ce 10134 /* mark where we are */
57843af0 10135 PL_multi_start = CopLINE(PL_curcop);
3280af22 10136 PL_multi_open = term;
02aa26ce
NT
10137
10138 /* find corresponding closing delimiter */
93a17b20 10139 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
10140 termcode = termstr[0] = term = tmps[5];
10141
3280af22 10142 PL_multi_close = term;
79072805 10143
561b68a9
SH
10144 /* create a new SV to hold the contents. 79 is the SV's initial length.
10145 What a random number. */
7d0a29fe
NC
10146 sv = newSV_type(SVt_PVIV);
10147 SvGROW(sv, 80);
45977657 10148 SvIV_set(sv, termcode);
a0d0e21e 10149 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
10150
10151 /* move past delimiter and try to read a complete string */
09bef843 10152 if (keep_delims)
220e2d4e
IH
10153 sv_catpvn(sv, s, termlen);
10154 s += termlen;
5db06880
NC
10155#ifdef PERL_MAD
10156 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10157 if (!PL_thisopen && !keep_delims) {
10158 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
10159 stuffstart = s - SvPVX(PL_linestr);
10160 }
10161#endif
93a17b20 10162 for (;;) {
d24ca0c5 10163 if (PL_encoding && !UTF && !re_reparse) {
220e2d4e
IH
10164 bool cont = TRUE;
10165
10166 while (cont) {
95a20fc0 10167 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 10168 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 10169 &offset, (char*)termstr, termlen);
6136c704
AL
10170 const char * const ns = SvPVX_const(PL_linestr) + offset;
10171 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
10172
10173 for (; s < ns; s++) {
60d63348 10174 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10175 COPLINE_INC_WITH_HERELINES;
220e2d4e
IH
10176 }
10177 if (!found)
10178 goto read_more_line;
10179 else {
10180 /* handle quoted delimiters */
52327caf 10181 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 10182 const char *t;
95a20fc0 10183 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
10184 t--;
10185 if ((svlast-1 - t) % 2) {
10186 if (!keep_quoted) {
10187 *(svlast-1) = term;
10188 *svlast = '\0';
10189 SvCUR_set(sv, SvCUR(sv) - 1);
10190 }
10191 continue;
10192 }
10193 }
10194 if (PL_multi_open == PL_multi_close) {
10195 cont = FALSE;
10196 }
10197 else {
f54cb97a
AL
10198 const char *t;
10199 char *w;
0331ef07 10200 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
10201 /* At here, all closes are "was quoted" one,
10202 so we don't check PL_multi_close. */
10203 if (*t == '\\') {
10204 if (!keep_quoted && *(t+1) == PL_multi_open)
10205 t++;
10206 else
10207 *w++ = *t++;
10208 }
10209 else if (*t == PL_multi_open)
10210 brackets++;
10211
10212 *w = *t;
10213 }
10214 if (w < t) {
10215 *w++ = term;
10216 *w = '\0';
95a20fc0 10217 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 10218 }
0331ef07 10219 last_off = w - SvPVX(sv);
220e2d4e
IH
10220 if (--brackets <= 0)
10221 cont = FALSE;
10222 }
10223 }
10224 }
10225 if (!keep_delims) {
10226 SvCUR_set(sv, SvCUR(sv) - 1);
10227 *SvEND(sv) = '\0';
10228 }
10229 break;
10230 }
10231
02aa26ce 10232 /* extend sv if need be */
3280af22 10233 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 10234 /* set 'to' to the next character in the sv's string */
463ee0b2 10235 to = SvPVX(sv)+SvCUR(sv);
09bef843 10236
02aa26ce 10237 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
10238 if (PL_multi_open == PL_multi_close) {
10239 for (; s < PL_bufend; s++,to++) {
02aa26ce 10240 /* embedded newlines increment the current line number */
60d63348 10241 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10242 COPLINE_INC_WITH_HERELINES;
02aa26ce 10243 /* handle quoted delimiters */
3280af22 10244 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
d24ca0c5
DM
10245 if (!keep_quoted
10246 && (s[1] == term
10247 || (re_reparse && s[1] == '\\'))
10248 )
a0d0e21e 10249 s++;
d24ca0c5 10250 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10251 else
10252 *to++ = *s++;
10253 }
02aa26ce
NT
10254 /* terminate when run out of buffer (the for() condition), or
10255 have found the terminator */
220e2d4e
IH
10256 else if (*s == term) {
10257 if (termlen == 1)
10258 break;
f3b9ce0f 10259 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10260 break;
10261 }
63cd0674 10262 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10263 has_utf8 = TRUE;
93a17b20
LW
10264 *to = *s;
10265 }
10266 }
02aa26ce
NT
10267
10268 /* if the terminator isn't the same as the start character (e.g.,
10269 matched brackets), we have to allow more in the quoting, and
10270 be prepared for nested brackets.
10271 */
93a17b20 10272 else {
02aa26ce 10273 /* read until we run out of string, or we find the terminator */
3280af22 10274 for (; s < PL_bufend; s++,to++) {
02aa26ce 10275 /* embedded newlines increment the line count */
60d63348 10276 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
83944c01 10277 COPLINE_INC_WITH_HERELINES;
02aa26ce 10278 /* backslashes can escape the open or closing characters */
3280af22 10279 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10280 if (!keep_quoted &&
10281 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
10282 s++;
10283 else
10284 *to++ = *s++;
10285 }
02aa26ce 10286 /* allow nested opens and closes */
3280af22 10287 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10288 break;
3280af22 10289 else if (*s == PL_multi_open)
93a17b20 10290 brackets++;
63cd0674 10291 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10292 has_utf8 = TRUE;
93a17b20
LW
10293 *to = *s;
10294 }
10295 }
02aa26ce 10296 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10297 *to = '\0';
95a20fc0 10298 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10299
02aa26ce
NT
10300 /*
10301 * this next chunk reads more into the buffer if we're not done yet
10302 */
10303
b1c7b182
GS
10304 if (s < PL_bufend)
10305 break; /* handle case where we are done yet :-) */
79072805 10306
6a27c188 10307#ifndef PERL_STRICT_CR
95a20fc0 10308 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10309 if ((to[-2] == '\r' && to[-1] == '\n') ||
10310 (to[-2] == '\n' && to[-1] == '\r'))
10311 {
f63a84b2
LW
10312 to[-2] = '\n';
10313 to--;
95a20fc0 10314 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10315 }
10316 else if (to[-1] == '\r')
10317 to[-1] = '\n';
10318 }
95a20fc0 10319 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10320 to[-1] = '\n';
10321#endif
10322
220e2d4e 10323 read_more_line:
02aa26ce
NT
10324 /* if we're out of file, or a read fails, bail and reset the current
10325 line marker so we can report where the unterminated string began
10326 */
5db06880
NC
10327#ifdef PERL_MAD
10328 if (PL_madskills) {
c35e046a 10329 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10330 if (PL_thisstuff)
10331 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10332 else
cd81e915 10333 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10334 }
10335#endif
83944c01 10336 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
10337 PL_bufptr = PL_bufend;
10338 if (!lex_next_chunk(0)) {
c07a80fd 10339 sv_free(sv);
eb160463 10340 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10341 return NULL;
79072805 10342 }
f0e67a1d 10343 s = PL_bufptr;
5db06880
NC
10344#ifdef PERL_MAD
10345 stuffstart = 0;
10346#endif
378cc40b 10347 }
4e553d73 10348
02aa26ce
NT
10349 /* at this point, we have successfully read the delimited string */
10350
d24ca0c5 10351 if (!PL_encoding || UTF || re_reparse) {
5db06880
NC
10352#ifdef PERL_MAD
10353 if (PL_madskills) {
c35e046a 10354 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10355 const int len = s - tstart;
cd81e915 10356 if (PL_thisstuff)
c35e046a 10357 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10358 else
c35e046a 10359 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10360 if (!PL_thisclose && !keep_delims)
10361 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10362 }
10363#endif
10364
220e2d4e
IH
10365 if (keep_delims)
10366 sv_catpvn(sv, s, termlen);
10367 s += termlen;
10368 }
5db06880
NC
10369#ifdef PERL_MAD
10370 else {
10371 if (PL_madskills) {
c35e046a
AL
10372 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10373 const int len = s - tstart - termlen;
cd81e915 10374 if (PL_thisstuff)
c35e046a 10375 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10376 else
c35e046a 10377 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10378 if (!PL_thisclose && !keep_delims)
10379 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10380 }
10381 }
10382#endif
d24ca0c5 10383 if (has_utf8 || (PL_encoding && !re_reparse))
b1c7b182 10384 SvUTF8_on(sv);
d0063567 10385
57843af0 10386 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10387
10388 /* if we allocated too much space, give some back */
93a17b20
LW
10389 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10390 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10391 SvPV_renew(sv, SvLEN(sv));
79072805 10392 }
02aa26ce
NT
10393
10394 /* decide whether this is the first or second quoted string we've read
10395 for this op
10396 */
4e553d73 10397
3280af22 10398 if (PL_lex_stuff)
7cc34111 10399 PL_sublex_info.repl = sv;
79072805 10400 else
3280af22 10401 PL_lex_stuff = sv;
378cc40b
LW
10402 return s;
10403}
10404
02aa26ce
NT
10405/*
10406 scan_num
10407 takes: pointer to position in buffer
10408 returns: pointer to new position in buffer
6154021b 10409 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10410
10411 Read a number in any of the formats that Perl accepts:
10412
7fd134d9
JH
10413 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10414 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10415 0b[01](_?[01])*
10416 0[0-7](_?[0-7])*
10417 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10418
3280af22 10419 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10420 thing it reads.
10421
10422 If it reads a number without a decimal point or an exponent, it will
10423 try converting the number to an integer and see if it can do so
10424 without loss of precision.
10425*/
4e553d73 10426
378cc40b 10427char *
bfed75c6 10428Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10429{
97aff369 10430 dVAR;
eb578fdb
KW
10431 const char *s = start; /* current position in buffer */
10432 char *d; /* destination in temp buffer */
10433 char *e; /* end of temp buffer */
86554af2 10434 NV nv; /* number read, as a double */
a0714e2c 10435 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10436 bool floatit; /* boolean: int or float? */
cbbf8932 10437 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10438 static char const number_too_long[] = "Number too long";
378cc40b 10439
7918f24d
NC
10440 PERL_ARGS_ASSERT_SCAN_NUM;
10441
02aa26ce
NT
10442 /* We use the first character to decide what type of number this is */
10443
378cc40b 10444 switch (*s) {
79072805 10445 default:
5637ef5b 10446 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10447
02aa26ce 10448 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10449 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10450 case '0':
10451 {
02aa26ce
NT
10452 /* variables:
10453 u holds the "number so far"
4f19785b
WSI
10454 shift the power of 2 of the base
10455 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10456 overflowed was the number more than we can hold?
10457
10458 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10459 we in octal/hex/binary?" indicator to disallow hex characters
10460 when in octal mode.
02aa26ce 10461 */
9e24b6e2
JH
10462 NV n = 0.0;
10463 UV u = 0;
79072805 10464 I32 shift;
9e24b6e2 10465 bool overflowed = FALSE;
61f33854 10466 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10467 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10468 static const char* const bases[5] =
10469 { "", "binary", "", "octal", "hexadecimal" };
10470 static const char* const Bases[5] =
10471 { "", "Binary", "", "Octal", "Hexadecimal" };
10472 static const char* const maxima[5] =
10473 { "",
10474 "0b11111111111111111111111111111111",
10475 "",
10476 "037777777777",
10477 "0xffffffff" };
bfed75c6 10478 const char *base, *Base, *max;
378cc40b 10479
02aa26ce 10480 /* check for hex */
a674e8db 10481 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10482 shift = 4;
10483 s += 2;
61f33854 10484 just_zero = FALSE;
a674e8db 10485 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10486 shift = 1;
10487 s += 2;
61f33854 10488 just_zero = FALSE;
378cc40b 10489 }
02aa26ce 10490 /* check for a decimal in disguise */
b78218b7 10491 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10492 goto decimal;
02aa26ce 10493 /* so it must be octal */
928753ea 10494 else {
378cc40b 10495 shift = 3;
928753ea
JH
10496 s++;
10497 }
10498
10499 if (*s == '_') {
a2a5de95 10500 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10501 "Misplaced _ in number");
10502 lastub = s++;
10503 }
9e24b6e2
JH
10504
10505 base = bases[shift];
10506 Base = Bases[shift];
10507 max = maxima[shift];
02aa26ce 10508
4f19785b 10509 /* read the rest of the number */
378cc40b 10510 for (;;) {
9e24b6e2 10511 /* x is used in the overflow test,
893fe2c2 10512 b is the digit we're adding on. */
9e24b6e2 10513 UV x, b;
55497cff 10514
378cc40b 10515 switch (*s) {
02aa26ce
NT
10516
10517 /* if we don't mention it, we're done */
378cc40b
LW
10518 default:
10519 goto out;
02aa26ce 10520
928753ea 10521 /* _ are ignored -- but warned about if consecutive */
de3bb511 10522 case '_':
a2a5de95
NC
10523 if (lastub && s == lastub + 1)
10524 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10525 "Misplaced _ in number");
928753ea 10526 lastub = s++;
de3bb511 10527 break;
02aa26ce
NT
10528
10529 /* 8 and 9 are not octal */
378cc40b 10530 case '8': case '9':
4f19785b 10531 if (shift == 3)
cea2e8a9 10532 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10533 /* FALL THROUGH */
02aa26ce
NT
10534
10535 /* octal digits */
4f19785b 10536 case '2': case '3': case '4':
378cc40b 10537 case '5': case '6': case '7':
4f19785b 10538 if (shift == 1)
cea2e8a9 10539 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10540 /* FALL THROUGH */
10541
10542 case '0': case '1':
02aa26ce 10543 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10544 goto digit;
02aa26ce
NT
10545
10546 /* hex digits */
378cc40b
LW
10547 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10548 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10549 /* make sure they said 0x */
378cc40b
LW
10550 if (shift != 4)
10551 goto out;
55497cff 10552 b = (*s++ & 7) + 9;
02aa26ce
NT
10553
10554 /* Prepare to put the digit we have onto the end
10555 of the number so far. We check for overflows.
10556 */
10557
55497cff 10558 digit:
61f33854 10559 just_zero = FALSE;
9e24b6e2
JH
10560 if (!overflowed) {
10561 x = u << shift; /* make room for the digit */
10562
10563 if ((x >> shift) != u
10564 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10565 overflowed = TRUE;
10566 n = (NV) u;
9b387841
NC
10567 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10568 "Integer overflow in %s number",
10569 base);
9e24b6e2
JH
10570 } else
10571 u = x | b; /* add the digit to the end */
10572 }
10573 if (overflowed) {
10574 n *= nvshift[shift];
10575 /* If an NV has not enough bits in its
10576 * mantissa to represent an UV this summing of
10577 * small low-order numbers is a waste of time
10578 * (because the NV cannot preserve the
10579 * low-order bits anyway): we could just
10580 * remember when did we overflow and in the
10581 * end just multiply n by the right
10582 * amount. */
10583 n += (NV) b;
55497cff 10584 }
378cc40b
LW
10585 break;
10586 }
10587 }
02aa26ce
NT
10588
10589 /* if we get here, we had success: make a scalar value from
10590 the number.
10591 */
378cc40b 10592 out:
928753ea
JH
10593
10594 /* final misplaced underbar check */
10595 if (s[-1] == '_') {
a2a5de95 10596 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10597 }
10598
9e24b6e2 10599 if (overflowed) {
a2a5de95
NC
10600 if (n > 4294967295.0)
10601 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10602 "%s number > %s non-portable",
10603 Base, max);
b081dd7e 10604 sv = newSVnv(n);
9e24b6e2
JH
10605 }
10606 else {
15041a67 10607#if UVSIZE > 4
a2a5de95
NC
10608 if (u > 0xffffffff)
10609 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10610 "%s number > %s non-portable",
10611 Base, max);
2cc4c2dc 10612#endif
b081dd7e 10613 sv = newSVuv(u);
9e24b6e2 10614 }
61f33854 10615 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10616 sv = new_constant(start, s - start, "integer",
eb0d8d16 10617 sv, NULL, NULL, 0);
61f33854 10618 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10619 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10620 }
10621 break;
02aa26ce
NT
10622
10623 /*
10624 handle decimal numbers.
10625 we're also sent here when we read a 0 as the first digit
10626 */
378cc40b
LW
10627 case '1': case '2': case '3': case '4': case '5':
10628 case '6': case '7': case '8': case '9': case '.':
10629 decimal:
3280af22
NIS
10630 d = PL_tokenbuf;
10631 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10632 floatit = FALSE;
02aa26ce
NT
10633
10634 /* read next group of digits and _ and copy into d */
de3bb511 10635 while (isDIGIT(*s) || *s == '_') {
4e553d73 10636 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10637 if -w is on
10638 */
93a17b20 10639 if (*s == '_') {
a2a5de95
NC
10640 if (lastub && s == lastub + 1)
10641 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10642 "Misplaced _ in number");
928753ea 10643 lastub = s++;
93a17b20 10644 }
fc36a67e 10645 else {
02aa26ce 10646 /* check for end of fixed-length buffer */
fc36a67e 10647 if (d >= e)
cea2e8a9 10648 Perl_croak(aTHX_ number_too_long);
02aa26ce 10649 /* if we're ok, copy the character */
378cc40b 10650 *d++ = *s++;
fc36a67e 10651 }
378cc40b 10652 }
02aa26ce
NT
10653
10654 /* final misplaced underbar check */
928753ea 10655 if (lastub && s == lastub + 1) {
a2a5de95 10656 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10657 }
02aa26ce
NT
10658
10659 /* read a decimal portion if there is one. avoid
10660 3..5 being interpreted as the number 3. followed
10661 by .5
10662 */
2f3197b3 10663 if (*s == '.' && s[1] != '.') {
79072805 10664 floatit = TRUE;
378cc40b 10665 *d++ = *s++;
02aa26ce 10666
928753ea 10667 if (*s == '_') {
a2a5de95
NC
10668 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10669 "Misplaced _ in number");
928753ea
JH
10670 lastub = s;
10671 }
10672
10673 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10674 */
fc36a67e 10675 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10676 /* fixed length buffer check */
fc36a67e 10677 if (d >= e)
cea2e8a9 10678 Perl_croak(aTHX_ number_too_long);
928753ea 10679 if (*s == '_') {
a2a5de95
NC
10680 if (lastub && s == lastub + 1)
10681 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10682 "Misplaced _ in number");
928753ea
JH
10683 lastub = s;
10684 }
10685 else
fc36a67e 10686 *d++ = *s;
378cc40b 10687 }
928753ea
JH
10688 /* fractional part ending in underbar? */
10689 if (s[-1] == '_') {
a2a5de95
NC
10690 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10691 "Misplaced _ in number");
928753ea 10692 }
dd629d5b
GS
10693 if (*s == '.' && isDIGIT(s[1])) {
10694 /* oops, it's really a v-string, but without the "v" */
f4758303 10695 s = start;
dd629d5b
GS
10696 goto vstring;
10697 }
378cc40b 10698 }
02aa26ce
NT
10699
10700 /* read exponent part, if present */
3792a11b 10701 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10702 floatit = TRUE;
10703 s++;
02aa26ce
NT
10704
10705 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10706 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10707
7fd134d9
JH
10708 /* stray preinitial _ */
10709 if (*s == '_') {
a2a5de95
NC
10710 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10711 "Misplaced _ in number");
7fd134d9
JH
10712 lastub = s++;
10713 }
10714
02aa26ce 10715 /* allow positive or negative exponent */
378cc40b
LW
10716 if (*s == '+' || *s == '-')
10717 *d++ = *s++;
02aa26ce 10718
7fd134d9
JH
10719 /* stray initial _ */
10720 if (*s == '_') {
a2a5de95
NC
10721 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10722 "Misplaced _ in number");
7fd134d9
JH
10723 lastub = s++;
10724 }
10725
7fd134d9
JH
10726 /* read digits of exponent */
10727 while (isDIGIT(*s) || *s == '_') {
10728 if (isDIGIT(*s)) {
10729 if (d >= e)
10730 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10731 *d++ = *s++;
7fd134d9
JH
10732 }
10733 else {
041457d9 10734 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10735 (!isDIGIT(s[1]) && s[1] != '_')))
10736 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10737 "Misplaced _ in number");
b3b48e3e 10738 lastub = s++;
7fd134d9 10739 }
7fd134d9 10740 }
378cc40b 10741 }
02aa26ce 10742
02aa26ce 10743
0b7fceb9 10744 /*
58bb9ec3
NC
10745 We try to do an integer conversion first if no characters
10746 indicating "float" have been found.
0b7fceb9
MU
10747 */
10748
10749 if (!floatit) {
58bb9ec3 10750 UV uv;
6136c704 10751 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10752
10753 if (flags == IS_NUMBER_IN_UV) {
10754 if (uv <= IV_MAX)
b081dd7e 10755 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10756 else
b081dd7e 10757 sv = newSVuv(uv);
58bb9ec3
NC
10758 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10759 if (uv <= (UV) IV_MIN)
b081dd7e 10760 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10761 else
10762 floatit = TRUE;
10763 } else
10764 floatit = TRUE;
10765 }
0b7fceb9 10766 if (floatit) {
58bb9ec3
NC
10767 /* terminate the string */
10768 *d = '\0';
86554af2 10769 nv = Atof(PL_tokenbuf);
b081dd7e 10770 sv = newSVnv(nv);
86554af2 10771 }
86554af2 10772
eb0d8d16
NC
10773 if ( floatit
10774 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10775 const char *const key = floatit ? "float" : "integer";
10776 const STRLEN keylen = floatit ? 5 : 7;
10777 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10778 key, keylen, sv, NULL, NULL, 0);
10779 }
378cc40b 10780 break;
0b7fceb9 10781
e312add1 10782 /* if it starts with a v, it could be a v-string */
a7cb1f99 10783 case 'v':
dd629d5b 10784vstring:
561b68a9 10785 sv = newSV(5); /* preallocate storage space */
65b06e02 10786 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10787 break;
79072805 10788 }
a687059c 10789
02aa26ce
NT
10790 /* make the op for the constant and return */
10791
a86a20aa 10792 if (sv)
b73d6f50 10793 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10794 else
5f66b61c 10795 lvalp->opval = NULL;
a687059c 10796
73d840c0 10797 return (char *)s;
378cc40b
LW
10798}
10799
76e3520e 10800STATIC char *
cea2e8a9 10801S_scan_formline(pTHX_ register char *s)
378cc40b 10802{
97aff369 10803 dVAR;
eb578fdb
KW
10804 char *eol;
10805 char *t;
6136c704 10806 SV * const stuff = newSVpvs("");
79072805 10807 bool needargs = FALSE;
c5ee2135 10808 bool eofmt = FALSE;
5db06880
NC
10809#ifdef PERL_MAD
10810 char *tokenstart = s;
4f61fd4b
JC
10811 SV* savewhite = NULL;
10812
5db06880 10813 if (PL_madskills) {
cd81e915
NC
10814 savewhite = PL_thiswhite;
10815 PL_thiswhite = 0;
5db06880
NC
10816 }
10817#endif
378cc40b 10818
7918f24d
NC
10819 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10820
79072805 10821 while (!needargs) {
a1b95068 10822 if (*s == '.') {
c35e046a 10823 t = s+1;
51882d45 10824#ifdef PERL_STRICT_CR
c35e046a
AL
10825 while (SPACE_OR_TAB(*t))
10826 t++;
51882d45 10827#else
c35e046a
AL
10828 while (SPACE_OR_TAB(*t) || *t == '\r')
10829 t++;
51882d45 10830#endif
c5ee2135
WL
10831 if (*t == '\n' || t == PL_bufend) {
10832 eofmt = TRUE;
79072805 10833 break;
c5ee2135 10834 }
79072805 10835 }
583c9d5c
FC
10836 eol = (char *) memchr(s,'\n',PL_bufend-s);
10837 if (!eol++)
3280af22 10838 eol = PL_bufend;
79072805 10839 if (*s != '#') {
a0d0e21e
LW
10840 for (t = s; t < eol; t++) {
10841 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10842 needargs = FALSE;
10843 goto enough; /* ~~ must be first line in formline */
378cc40b 10844 }
a0d0e21e
LW
10845 if (*t == '@' || *t == '^')
10846 needargs = TRUE;
378cc40b 10847 }
7121b347
MG
10848 if (eol > s) {
10849 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10850#ifndef PERL_STRICT_CR
7121b347
MG
10851 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10852 char *end = SvPVX(stuff) + SvCUR(stuff);
10853 end[-2] = '\n';
10854 end[-1] = '\0';
b162af07 10855 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10856 }
2dc4c65b 10857#endif
7121b347
MG
10858 }
10859 else
10860 break;
79072805 10861 }
95a20fc0 10862 s = (char*)eol;
583c9d5c
FC
10863 if ((PL_rsfp || PL_parser->filtered)
10864 && PL_parser->form_lex_state == LEX_NORMAL) {
f0e67a1d 10865 bool got_some;
5db06880
NC
10866#ifdef PERL_MAD
10867 if (PL_madskills) {
cd81e915
NC
10868 if (PL_thistoken)
10869 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10870 else
cd81e915 10871 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10872 }
10873#endif
f0e67a1d 10874 PL_bufptr = PL_bufend;
83944c01 10875 COPLINE_INC_WITH_HERELINES;
f0e67a1d
Z
10876 got_some = lex_next_chunk(0);
10877 CopLINE_dec(PL_curcop);
10878 s = PL_bufptr;
5db06880 10879#ifdef PERL_MAD
f0e67a1d 10880 tokenstart = PL_bufptr;
5db06880 10881#endif
f0e67a1d 10882 if (!got_some)
378cc40b 10883 break;
378cc40b 10884 }
463ee0b2 10885 incline(s);
79072805 10886 }
a0d0e21e 10887 enough:
5c9ae74d
FC
10888 if (!SvCUR(stuff) || needargs)
10889 PL_lex_state = PL_parser->form_lex_state;
a0d0e21e 10890 if (SvCUR(stuff)) {
705fe0e5 10891 PL_expect = XSTATE;
79072805 10892 if (needargs) {
cd81e915 10893 start_force(PL_curforce);
9ded7720 10894 NEXTVAL_NEXTTOKE.ival = 0;
705fe0e5 10895 force_next(FORMLBRACK);
79072805 10896 }
1bd51a4c 10897 if (!IN_BYTES) {
95a20fc0 10898 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10899 SvUTF8_on(stuff);
10900 else if (PL_encoding)
10901 sv_recode_to_utf8(stuff, PL_encoding);
10902 }
cd81e915 10903 start_force(PL_curforce);
9ded7720 10904 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10905 force_next(THING);
378cc40b 10906 }
79072805 10907 else {
8990e307 10908 SvREFCNT_dec(stuff);
c5ee2135
WL
10909 if (eofmt)
10910 PL_lex_formbrack = 0;
79072805 10911 }
5db06880
NC
10912#ifdef PERL_MAD
10913 if (PL_madskills) {
cd81e915
NC
10914 if (PL_thistoken)
10915 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10916 else
cd81e915
NC
10917 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10918 PL_thiswhite = savewhite;
5db06880
NC
10919 }
10920#endif
79072805 10921 return s;
378cc40b 10922}
a687059c 10923
ba6d6ac9 10924I32
864dbfa3 10925Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10926{
97aff369 10927 dVAR;
a3b680e6 10928 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10929 CV* const outsidecv = PL_compcv;
8990e307 10930
7766f137 10931 SAVEI32(PL_subline);
3280af22 10932 save_item(PL_subname);
3280af22 10933 SAVESPTR(PL_compcv);
3280af22 10934
ea726b52 10935 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10936 CvFLAGS(PL_compcv) |= flags;
10937
57843af0 10938 PL_subline = CopLINE(PL_curcop);
dd2155a4 10939 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10940 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10941 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
db4cf31d
FC
10942 if (outsidecv && CvPADLIST(outsidecv))
10943 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
748a9306 10944
8990e307
LW
10945 return oldsavestack_ix;
10946}
10947
084592ab
CN
10948#ifdef __SC__
10949#pragma segment Perl_yylex
10950#endif
af41e527 10951static int
19c62481 10952S_yywarn(pTHX_ const char *const s, U32 flags)
8990e307 10953{
97aff369 10954 dVAR;
7918f24d
NC
10955
10956 PERL_ARGS_ASSERT_YYWARN;
10957
faef0170 10958 PL_in_eval |= EVAL_WARNONLY;
19c62481 10959 yyerror_pv(s, flags);
faef0170 10960 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10961 return 0;
8990e307
LW
10962}
10963
10964int
15f169a1 10965Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10966{
19c62481
BF
10967 PERL_ARGS_ASSERT_YYERROR;
10968 return yyerror_pvn(s, strlen(s), 0);
10969}
10970
10971int
10972Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10973{
10974 PERL_ARGS_ASSERT_YYERROR_PV;
10975 return yyerror_pvn(s, strlen(s), flags);
10976}
10977
10978int
19c62481
BF
10979Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10980{
97aff369 10981 dVAR;
bfed75c6 10982 const char *context = NULL;
68dc0745 10983 int contlen = -1;
46fc3d4c 10984 SV *msg;
19c62481 10985 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
5912531f 10986 int yychar = PL_parser->yychar;
19c62481 10987 U32 is_utf8 = flags & SVf_UTF8;
463ee0b2 10988
19c62481 10989 PERL_ARGS_ASSERT_YYERROR_PVN;
7918f24d 10990
3280af22 10991 if (!yychar || (yychar == ';' && !PL_rsfp))
19c62481 10992 sv_catpvs(where_sv, "at EOF");
8bcfe651
TM
10993 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10994 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10995 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10996 /*
10997 Only for NetWare:
10998 The code below is removed for NetWare because it abends/crashes on NetWare
10999 when the script has error such as not having the closing quotes like:
11000 if ($var eq "value)
11001 Checking of white spaces is anyway done in NetWare code.
11002 */
11003#ifndef NETWARE
3280af22
NIS
11004 while (isSPACE(*PL_oldoldbufptr))
11005 PL_oldoldbufptr++;
f355267c 11006#endif
3280af22
NIS
11007 context = PL_oldoldbufptr;
11008 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 11009 }
8bcfe651
TM
11010 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
11011 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
11012 /*
11013 Only for NetWare:
11014 The code below is removed for NetWare because it abends/crashes on NetWare
11015 when the script has error such as not having the closing quotes like:
11016 if ($var eq "value)
11017 Checking of white spaces is anyway done in NetWare code.
11018 */
11019#ifndef NETWARE
3280af22
NIS
11020 while (isSPACE(*PL_oldbufptr))
11021 PL_oldbufptr++;
f355267c 11022#endif
3280af22
NIS
11023 context = PL_oldbufptr;
11024 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
11025 }
11026 else if (yychar > 255)
19c62481 11027 sv_catpvs(where_sv, "next token ???");
12fbd33b 11028 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
11029 if (PL_lex_state == LEX_NORMAL ||
11030 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
19c62481 11031 sv_catpvs(where_sv, "at end of line");
3280af22 11032 else if (PL_lex_inpat)
19c62481 11033 sv_catpvs(where_sv, "within pattern");
463ee0b2 11034 else
19c62481 11035 sv_catpvs(where_sv, "within string");
463ee0b2 11036 }
46fc3d4c 11037 else {
19c62481 11038 sv_catpvs(where_sv, "next char ");
46fc3d4c 11039 if (yychar < 32)
cea2e8a9 11040 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 11041 else if (isPRINT_LC(yychar)) {
88c9ea1e 11042 const char string = yychar;
5e7aa789
NC
11043 sv_catpvn(where_sv, &string, 1);
11044 }
463ee0b2 11045 else
cea2e8a9 11046 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
463ee0b2 11047 }
19c62481 11048 msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
ed094faf 11049 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 11050 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 11051 if (context)
19c62481
BF
11052 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
11053 SVfARG(newSVpvn_flags(context, contlen,
11054 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
463ee0b2 11055 else
19c62481 11056 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
57843af0 11057 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 11058 Perl_sv_catpvf(aTHX_ msg,
57def98f 11059 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 11060 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 11061 PL_multi_end = 0;
a0d0e21e 11062 }
500960a6 11063 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 11064 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 11065 }
463ee0b2 11066 else
5a844595 11067 qerror(msg);
c7d6bfb2
GS
11068 if (PL_error_count >= 10) {
11069 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 11070 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 11071 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
11072 else
11073 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 11074 OutCopFILE(PL_curcop));
c7d6bfb2 11075 }
3280af22 11076 PL_in_my = 0;
5c284bb0 11077 PL_in_my_stash = NULL;
463ee0b2
LW
11078 return 0;
11079}
084592ab
CN
11080#ifdef __SC__
11081#pragma segment Main
11082#endif
4e35701f 11083
b250498f 11084STATIC char*
3ae08724 11085S_swallow_bom(pTHX_ U8 *s)
01ec43d0 11086{
97aff369 11087 dVAR;
f54cb97a 11088 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
11089
11090 PERL_ARGS_ASSERT_SWALLOW_BOM;
11091
7aa207d6 11092 switch (s[0]) {
4e553d73
NIS
11093 case 0xFF:
11094 if (s[1] == 0xFE) {
ee6ba15d 11095 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 11096 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 11097 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11098 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 11099#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 11100 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 11101 s += 2;
dea0fc0b 11102 if (PL_bufend > (char*)s) {
81a923f4 11103 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 11104 }
b250498f 11105#else
dcbac5bb 11106 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11107 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 11108#endif
01ec43d0
GS
11109 }
11110 break;
78ae23f5 11111 case 0xFE:
7aa207d6 11112 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 11113#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11114 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
11115 s += 2;
11116 if (PL_bufend > (char *)s) {
81a923f4 11117 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 11118 }
b250498f 11119#else
dcbac5bb 11120 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11121 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 11122#endif
01ec43d0
GS
11123 }
11124 break;
3ae08724
GS
11125 case 0xEF:
11126 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 11127 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
11128 s += 3; /* UTF-8 */
11129 }
11130 break;
11131 case 0:
7aa207d6
JH
11132 if (slen > 3) {
11133 if (s[1] == 0) {
11134 if (s[2] == 0xFE && s[3] == 0xFF) {
11135 /* UTF-32 big-endian */
dcbac5bb 11136 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11137 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
11138 }
11139 }
11140 else if (s[2] == 0 && s[3] != 0) {
11141 /* Leading bytes
11142 * 00 xx 00 xx
11143 * are a good indicator of UTF-16BE. */
ee6ba15d 11144#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11145 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
11146 s = add_utf16_textfilter(s, FALSE);
11147#else
dcbac5bb 11148 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11149 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11150#endif
7aa207d6 11151 }
01ec43d0 11152 }
e294cc5d
JH
11153#ifdef EBCDIC
11154 case 0xDD:
11155 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11156 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11157 s += 4; /* UTF-8 */
11158 }
11159 break;
11160#endif
11161
7aa207d6
JH
11162 default:
11163 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11164 /* Leading bytes
11165 * xx 00 xx 00
11166 * are a good indicator of UTF-16LE. */
ee6ba15d 11167#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11168 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 11169 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 11170#else
dcbac5bb 11171 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11172 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11173#endif
7aa207d6 11174 }
01ec43d0 11175 }
b8f84bb2 11176 return (char*)s;
b250498f 11177}
4755096e 11178
6e3aabd6
GS
11179
11180#ifndef PERL_NO_UTF16_FILTER
11181static I32
a28af015 11182S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 11183{
97aff369 11184 dVAR;
f3040f2c 11185 SV *const filter = FILTER_DATA(idx);
2a773401
NC
11186 /* We re-use this each time round, throwing the contents away before we
11187 return. */
2a773401 11188 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 11189 SV *const utf8_buffer = filter;
c28d6105 11190 IV status = IoPAGE(filter);
f2338a2e 11191 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 11192 I32 retval;
c8b0cbae 11193
c85ae797
NC
11194 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11195
c8b0cbae
NC
11196 /* As we're automatically added, at the lowest level, and hence only called
11197 from this file, we can be sure that we're not called in block mode. Hence
11198 don't bother writing code to deal with block mode. */
11199 if (maxlen) {
11200 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11201 }
c28d6105
NC
11202 if (status < 0) {
11203 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11204 }
1de9afcd 11205 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 11206 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 11207 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
11208 reverse ? 'l' : 'b', idx, maxlen, status,
11209 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11210
11211 while (1) {
11212 STRLEN chars;
11213 STRLEN have;
dea0fc0b 11214 I32 newlen;
2a773401 11215 U8 *end;
c28d6105
NC
11216 /* First, look in our buffer of existing UTF-8 data: */
11217 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11218
11219 if (nl) {
11220 ++nl;
11221 } else if (status == 0) {
11222 /* EOF */
11223 IoPAGE(filter) = 0;
11224 nl = SvEND(utf8_buffer);
11225 }
11226 if (nl) {
d2d1d4de
NC
11227 STRLEN got = nl - SvPVX(utf8_buffer);
11228 /* Did we have anything to append? */
11229 retval = got != 0;
11230 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
11231 /* Everything else in this code works just fine if SVp_POK isn't
11232 set. This, however, needs it, and we need it to work, else
11233 we loop infinitely because the buffer is never consumed. */
11234 sv_chop(utf8_buffer, nl);
11235 break;
11236 }
ba77e4cc 11237
c28d6105
NC
11238 /* OK, not a complete line there, so need to read some more UTF-16.
11239 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
11240 while (1) {
11241 if (status <= 0)
11242 break;
11243 if (SvCUR(utf16_buffer) >= 2) {
11244 /* Location of the high octet of the last complete code point.
11245 Gosh, UTF-16 is a pain. All the benefits of variable length,
11246 *coupled* with all the benefits of partial reads and
11247 endianness. */
11248 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11249 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11250
11251 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11252 break;
11253 }
11254
11255 /* We have the first half of a surrogate. Read more. */
11256 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11257 }
c28d6105 11258
c28d6105
NC
11259 status = FILTER_READ(idx + 1, utf16_buffer,
11260 160 + (SvCUR(utf16_buffer) & 1));
11261 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11262 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11263 if (status < 0) {
11264 /* Error */
11265 IoPAGE(filter) = status;
11266 return status;
11267 }
11268 }
11269
11270 chars = SvCUR(utf16_buffer) >> 1;
11271 have = SvCUR(utf8_buffer);
11272 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11273
aa6dbd60 11274 if (reverse) {
c28d6105
NC
11275 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11276 (U8*)SvPVX_const(utf8_buffer) + have,
11277 chars * 2, &newlen);
aa6dbd60 11278 } else {
2a773401 11279 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11280 (U8*)SvPVX_const(utf8_buffer) + have,
11281 chars * 2, &newlen);
2a773401 11282 }
c28d6105 11283 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11284 *end = '\0';
c28d6105 11285
e07286ed
NC
11286 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11287 it's private to us, and utf16_to_utf8{,reversed} take a
11288 (pointer,length) pair, rather than a NUL-terminated string. */
11289 if(SvCUR(utf16_buffer) & 1) {
11290 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11291 SvCUR_set(utf16_buffer, 1);
11292 } else {
11293 SvCUR_set(utf16_buffer, 0);
11294 }
2a773401 11295 }
c28d6105
NC
11296 DEBUG_P(PerlIO_printf(Perl_debug_log,
11297 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11298 status,
11299 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11300 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11301 return retval;
6e3aabd6 11302}
81a923f4
NC
11303
11304static U8 *
11305S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11306{
2a773401 11307 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11308
c85ae797
NC
11309 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11310
c28d6105 11311 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11312 sv_setpvs(filter, "");
2a773401 11313 IoLINES(filter) = reversed;
c28d6105
NC
11314 IoPAGE(filter) = 1; /* Not EOF */
11315
11316 /* Sadly, we have to return a valid pointer, come what may, so we have to
11317 ignore any error return from this. */
11318 SvCUR_set(PL_linestr, 0);
11319 if (FILTER_READ(0, PL_linestr, 0)) {
11320 SvUTF8_on(PL_linestr);
81a923f4 11321 } else {
c28d6105 11322 SvUTF8_on(PL_linestr);
81a923f4 11323 }
c28d6105 11324 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11325 return (U8*)SvPVX(PL_linestr);
11326}
6e3aabd6 11327#endif
9f4817db 11328
f333445c
JP
11329/*
11330Returns a pointer to the next character after the parsed
11331vstring, as well as updating the passed in sv.
11332
11333Function must be called like
11334
561b68a9 11335 sv = newSV(5);
65b06e02 11336 s = scan_vstring(s,e,sv);
f333445c 11337
65b06e02 11338where s and e are the start and end of the string.
f333445c
JP
11339The sv should already be large enough to store the vstring
11340passed in, for performance reasons.
11341
11342*/
11343
11344char *
15f169a1 11345Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11346{
97aff369 11347 dVAR;
bfed75c6
AL
11348 const char *pos = s;
11349 const char *start = s;
7918f24d
NC
11350
11351 PERL_ARGS_ASSERT_SCAN_VSTRING;
11352
f333445c 11353 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11354 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11355 pos++;
f333445c
JP
11356 if ( *pos != '.') {
11357 /* this may not be a v-string if followed by => */
bfed75c6 11358 const char *next = pos;
65b06e02 11359 while (next < e && isSPACE(*next))
8fc7bb1c 11360 ++next;
65b06e02 11361 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11362 /* return string not v-string */
11363 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11364 return (char *)pos;
f333445c
JP
11365 }
11366 }
11367
11368 if (!isALPHA(*pos)) {
89ebb4a3 11369 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11370
d4c19fe8
AL
11371 if (*s == 'v')
11372 s++; /* get past 'v' */
f333445c 11373
76f68e9b 11374 sv_setpvs(sv, "");
f333445c
JP
11375
11376 for (;;) {
d4c19fe8 11377 /* this is atoi() that tolerates underscores */
0bd48802
AL
11378 U8 *tmpend;
11379 UV rev = 0;
d4c19fe8
AL
11380 const char *end = pos;
11381 UV mult = 1;
11382 while (--end >= s) {
11383 if (*end != '_') {
11384 const UV orev = rev;
f333445c
JP
11385 rev += (*end - '0') * mult;
11386 mult *= 10;
9b387841 11387 if (orev > rev)
dcbac5bb 11388 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11389 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11390 "Integer overflow in decimal number");
f333445c
JP
11391 }
11392 }
11393#ifdef EBCDIC
11394 if (rev > 0x7FFFFFFF)
11395 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11396#endif
11397 /* Append native character for the rev point */
11398 tmpend = uvchr_to_utf8(tmpbuf, rev);
11399 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11400 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11401 SvUTF8_on(sv);
65b06e02 11402 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11403 s = ++pos;
11404 else {
11405 s = pos;
11406 break;
11407 }
65b06e02 11408 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11409 pos++;
11410 }
11411 SvPOK_on(sv);
11412 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11413 SvRMAGICAL_on(sv);
11414 }
73d840c0 11415 return (char *)s;
f333445c
JP
11416}
11417
88e1f1a2
JV
11418int
11419Perl_keyword_plugin_standard(pTHX_
11420 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11421{
11422 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11423 PERL_UNUSED_CONTEXT;
11424 PERL_UNUSED_ARG(keyword_ptr);
11425 PERL_UNUSED_ARG(keyword_len);
11426 PERL_UNUSED_ARG(op_ptr);
11427 return KEYWORD_PLUGIN_DECLINE;
11428}
11429
78cdf107 11430#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11431static void
78cdf107 11432S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11433{
11434 SAVEI32(PL_lex_brackets);
11435 if (PL_lex_brackets > 100)
11436 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11437 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11438 SAVEI32(PL_lex_allbrackets);
11439 PL_lex_allbrackets = 0;
11440 SAVEI8(PL_lex_fakeeof);
2dcac756 11441 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11442 if(yyparse(gramtype) && !PL_parser->error_count)
11443 qerror(Perl_mess(aTHX_ "Parse error"));
11444}
11445
78cdf107 11446#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11447static OP *
78cdf107 11448S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11449{
11450 OP *o;
11451 ENTER;
11452 SAVEVPTR(PL_eval_root);
11453 PL_eval_root = NULL;
78cdf107 11454 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11455 o = PL_eval_root;
11456 LEAVE;
11457 return o;
11458}
11459
78cdf107
Z
11460#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11461static OP *
11462S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11463{
11464 OP *exprop;
11465 if (flags & ~PARSE_OPTIONAL)
11466 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11467 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11468 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11469 if (!PL_parser->error_count)
11470 qerror(Perl_mess(aTHX_ "Parse error"));
11471 exprop = newOP(OP_NULL, 0);
11472 }
11473 return exprop;
11474}
11475
11476/*
11477=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11478
11479Parse a Perl arithmetic expression. This may contain operators of precedence
11480down to the bit shift operators. The expression must be followed (and thus
11481terminated) either by a comparison or lower-precedence operator or by
11482something that would normally terminate an expression such as semicolon.
11483If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11484otherwise it is mandatory. It is up to the caller to ensure that the
11485dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11486the source of the code to be parsed and the lexical context for the
11487expression.
11488
11489The op tree representing the expression is returned. If an optional
11490expression is absent, a null pointer is returned, otherwise the pointer
11491will be non-null.
11492
11493If an error occurs in parsing or compilation, in most cases a valid op
11494tree is returned anyway. The error is reflected in the parser state,
11495normally resulting in a single exception at the top level of parsing
11496which covers all the compilation errors that occurred. Some compilation
11497errors, however, will throw an exception immediately.
11498
11499=cut
11500*/
11501
11502OP *
11503Perl_parse_arithexpr(pTHX_ U32 flags)
11504{
11505 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11506}
11507
11508/*
11509=for apidoc Amx|OP *|parse_termexpr|U32 flags
11510
11511Parse a Perl term expression. This may contain operators of precedence
11512down to the assignment operators. The expression must be followed (and thus
11513terminated) either by a comma or lower-precedence operator or by
11514something that would normally terminate an expression such as semicolon.
11515If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11516otherwise it is mandatory. It is up to the caller to ensure that the
11517dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11518the source of the code to be parsed and the lexical context for the
11519expression.
11520
11521The op tree representing the expression is returned. If an optional
11522expression is absent, a null pointer is returned, otherwise the pointer
11523will be non-null.
11524
11525If an error occurs in parsing or compilation, in most cases a valid op
11526tree is returned anyway. The error is reflected in the parser state,
11527normally resulting in a single exception at the top level of parsing
11528which covers all the compilation errors that occurred. Some compilation
11529errors, however, will throw an exception immediately.
11530
11531=cut
11532*/
11533
11534OP *
11535Perl_parse_termexpr(pTHX_ U32 flags)
11536{
11537 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11538}
11539
11540/*
11541=for apidoc Amx|OP *|parse_listexpr|U32 flags
11542
11543Parse a Perl list expression. This may contain operators of precedence
11544down to the comma operator. The expression must be followed (and thus
11545terminated) either by a low-precedence logic operator such as C<or> or by
11546something that would normally terminate an expression such as semicolon.
11547If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11548otherwise it is mandatory. It is up to the caller to ensure that the
11549dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11550the source of the code to be parsed and the lexical context for the
11551expression.
11552
11553The op tree representing the expression is returned. If an optional
11554expression is absent, a null pointer is returned, otherwise the pointer
11555will be non-null.
11556
11557If an error occurs in parsing or compilation, in most cases a valid op
11558tree is returned anyway. The error is reflected in the parser state,
11559normally resulting in a single exception at the top level of parsing
11560which covers all the compilation errors that occurred. Some compilation
11561errors, however, will throw an exception immediately.
11562
11563=cut
11564*/
11565
11566OP *
11567Perl_parse_listexpr(pTHX_ U32 flags)
11568{
11569 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11570}
11571
11572/*
11573=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11574
11575Parse a single complete Perl expression. This allows the full
11576expression grammar, including the lowest-precedence operators such
11577as C<or>. The expression must be followed (and thus terminated) by a
11578token that an expression would normally be terminated by: end-of-file,
11579closing bracketing punctuation, semicolon, or one of the keywords that
11580signals a postfix expression-statement modifier. If I<flags> includes
11581C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11582mandatory. It is up to the caller to ensure that the dynamic parser
11583state (L</PL_parser> et al) is correctly set to reflect the source of
11584the code to be parsed and the lexical context for the expression.
11585
11586The op tree representing the expression is returned. If an optional
11587expression is absent, a null pointer is returned, otherwise the pointer
11588will be non-null.
11589
11590If an error occurs in parsing or compilation, in most cases a valid op
11591tree is returned anyway. The error is reflected in the parser state,
11592normally resulting in a single exception at the top level of parsing
11593which covers all the compilation errors that occurred. Some compilation
11594errors, however, will throw an exception immediately.
11595
11596=cut
11597*/
11598
11599OP *
11600Perl_parse_fullexpr(pTHX_ U32 flags)
11601{
11602 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11603}
11604
e53d8f76
Z
11605/*
11606=for apidoc Amx|OP *|parse_block|U32 flags
11607
11608Parse a single complete Perl code block. This consists of an opening
11609brace, a sequence of statements, and a closing brace. The block
11610constitutes a lexical scope, so C<my> variables and various compile-time
11611effects can be contained within it. It is up to the caller to ensure
11612that the dynamic parser state (L</PL_parser> et al) is correctly set to
11613reflect the source of the code to be parsed and the lexical context for
11614the statement.
11615
11616The op tree representing the code block is returned. This is always a
11617real op, never a null pointer. It will normally be a C<lineseq> list,
11618including C<nextstate> or equivalent ops. No ops to construct any kind
11619of runtime scope are included by virtue of it being a block.
11620
11621If an error occurs in parsing or compilation, in most cases a valid op
11622tree (most likely null) is returned anyway. The error is reflected in
11623the parser state, normally resulting in a single exception at the top
11624level of parsing which covers all the compilation errors that occurred.
11625Some compilation errors, however, will throw an exception immediately.
11626
11627The I<flags> parameter is reserved for future use, and must always
11628be zero.
11629
11630=cut
11631*/
11632
11633OP *
11634Perl_parse_block(pTHX_ U32 flags)
11635{
11636 if (flags)
11637 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11638 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11639}
11640
1da4ca5f 11641/*
8359b381
Z
11642=for apidoc Amx|OP *|parse_barestmt|U32 flags
11643
11644Parse a single unadorned Perl statement. This may be a normal imperative
11645statement or a declaration that has compile-time effect. It does not
11646include any label or other affixture. It is up to the caller to ensure
11647that the dynamic parser state (L</PL_parser> et al) is correctly set to
11648reflect the source of the code to be parsed and the lexical context for
11649the statement.
11650
11651The op tree representing the statement is returned. This may be a
11652null pointer if the statement is null, for example if it was actually
11653a subroutine definition (which has compile-time side effects). If not
11654null, it will be ops directly implementing the statement, suitable to
11655pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11656equivalent op (except for those embedded in a scope contained entirely
11657within the statement).
11658
11659If an error occurs in parsing or compilation, in most cases a valid op
11660tree (most likely null) is returned anyway. The error is reflected in
11661the parser state, normally resulting in a single exception at the top
11662level of parsing which covers all the compilation errors that occurred.
11663Some compilation errors, however, will throw an exception immediately.
11664
11665The I<flags> parameter is reserved for future use, and must always
11666be zero.
11667
11668=cut
11669*/
11670
11671OP *
11672Perl_parse_barestmt(pTHX_ U32 flags)
11673{
11674 if (flags)
11675 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11676 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11677}
11678
11679/*
361d9b55
Z
11680=for apidoc Amx|SV *|parse_label|U32 flags
11681
11682Parse a single label, possibly optional, of the type that may prefix a
11683Perl statement. It is up to the caller to ensure that the dynamic parser
11684state (L</PL_parser> et al) is correctly set to reflect the source of
11685the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11686label is optional, otherwise it is mandatory.
11687
11688The name of the label is returned in the form of a fresh scalar. If an
11689optional label is absent, a null pointer is returned.
11690
11691If an error occurs in parsing, which can only occur if the label is
11692mandatory, a valid label is returned anyway. The error is reflected in
11693the parser state, normally resulting in a single exception at the top
11694level of parsing which covers all the compilation errors that occurred.
11695
11696=cut
11697*/
11698
11699SV *
11700Perl_parse_label(pTHX_ U32 flags)
11701{
11702 if (flags & ~PARSE_OPTIONAL)
11703 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11704 if (PL_lex_state == LEX_KNOWNEXT) {
11705 PL_parser->yychar = yylex();
11706 if (PL_parser->yychar == LABEL) {
361d9b55
Z
11707 SV *lsv;
11708 PL_parser->yychar = YYEMPTY;
11709 lsv = newSV_type(SVt_PV);
fefd015f 11710 sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
361d9b55
Z
11711 return lsv;
11712 } else {
11713 yyunlex();
11714 goto no_label;
11715 }
11716 } else {
11717 char *s, *t;
361d9b55
Z
11718 STRLEN wlen, bufptr_pos;
11719 lex_read_space(0);
11720 t = s = PL_bufptr;
5db1eb8d 11721 if (!isIDFIRST_lazy_if(s, UTF))
361d9b55 11722 goto no_label;
5db1eb8d 11723 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
361d9b55
Z
11724 if (word_takes_any_delimeter(s, wlen))
11725 goto no_label;
11726 bufptr_pos = s - SvPVX(PL_linestr);
11727 PL_bufptr = t;
11728 lex_read_space(LEX_KEEP_PREVIOUS);
11729 t = PL_bufptr;
11730 s = SvPVX(PL_linestr) + bufptr_pos;
11731 if (t[0] == ':' && t[1] != ':') {
11732 PL_oldoldbufptr = PL_oldbufptr;
11733 PL_oldbufptr = s;
11734 PL_bufptr = t+1;
5db1eb8d 11735 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
361d9b55
Z
11736 } else {
11737 PL_bufptr = s;
11738 no_label:
11739 if (flags & PARSE_OPTIONAL) {
11740 return NULL;
11741 } else {
11742 qerror(Perl_mess(aTHX_ "Parse error"));
11743 return newSVpvs("x");
11744 }
11745 }
11746 }
11747}
11748
11749/*
28ac2b49
Z
11750=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11751
11752Parse a single complete Perl statement. This may be a normal imperative
8359b381 11753statement or a declaration that has compile-time effect, and may include
8e720305 11754optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11755parser state (L</PL_parser> et al) is correctly set to reflect the source
11756of the code to be parsed and the lexical context for the statement.
11757
11758The op tree representing the statement is returned. This may be a
11759null pointer if the statement is null, for example if it was actually
11760a subroutine definition (which has compile-time side effects). If not
11761null, it will be the result of a L</newSTATEOP> call, normally including
11762a C<nextstate> or equivalent op.
11763
11764If an error occurs in parsing or compilation, in most cases a valid op
11765tree (most likely null) is returned anyway. The error is reflected in
11766the parser state, normally resulting in a single exception at the top
11767level of parsing which covers all the compilation errors that occurred.
11768Some compilation errors, however, will throw an exception immediately.
11769
11770The I<flags> parameter is reserved for future use, and must always
11771be zero.
11772
11773=cut
11774*/
11775
11776OP *
11777Perl_parse_fullstmt(pTHX_ U32 flags)
11778{
28ac2b49
Z
11779 if (flags)
11780 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11781 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11782}
11783
07ffcb73
Z
11784/*
11785=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11786
11787Parse a sequence of zero or more Perl statements. These may be normal
11788imperative statements, including optional labels, or declarations
11789that have compile-time effect, or any mixture thereof. The statement
11790sequence ends when a closing brace or end-of-file is encountered in a
11791place where a new statement could have validly started. It is up to
11792the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11793is correctly set to reflect the source of the code to be parsed and the
11794lexical context for the statements.
11795
11796The op tree representing the statement sequence is returned. This may
11797be a null pointer if the statements were all null, for example if there
11798were no statements or if there were only subroutine definitions (which
11799have compile-time side effects). If not null, it will be a C<lineseq>
11800list, normally including C<nextstate> or equivalent ops.
11801
11802If an error occurs in parsing or compilation, in most cases a valid op
11803tree is returned anyway. The error is reflected in the parser state,
11804normally resulting in a single exception at the top level of parsing
11805which covers all the compilation errors that occurred. Some compilation
11806errors, however, will throw an exception immediately.
11807
11808The I<flags> parameter is reserved for future use, and must always
11809be zero.
11810
11811=cut
11812*/
11813
11814OP *
11815Perl_parse_stmtseq(pTHX_ U32 flags)
11816{
11817 OP *stmtseqop;
e53d8f76 11818 I32 c;
07ffcb73 11819 if (flags)
78cdf107
Z
11820 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11821 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11822 c = lex_peek_unichar(0);
11823 if (c != -1 && c != /*{*/'}')
07ffcb73 11824 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11825 return stmtseqop;
11826}
11827
28ac2b49 11828/*
1da4ca5f
NC
11829 * Local variables:
11830 * c-indentation-style: bsd
11831 * c-basic-offset: 4
14d04a33 11832 * indent-tabs-mode: nil
1da4ca5f
NC
11833 * End:
11834 *
14d04a33 11835 * ex: set ts=8 sts=4 sw=4 et:
37442d52 11836 */