This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add caching to inversion list searches
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
802a15e9 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805 150 * can get by with a single comparison (if the compiler is smart enough).
9da1dd8f
DM
151 *
152 * These values refer to the various states within a sublex parse,
153 * i.e. within a double quotish string
79072805
LW
154 */
155
fb73857a 156/* #define LEX_NOTPARSING 11 is done in perl.h. */
157
b6007c36
DM
158#define LEX_NORMAL 10 /* normal code (ie not within "...") */
159#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
160#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
161#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
162#define LEX_INTERPSTART 6 /* expecting the start of a $var */
163
164 /* at end of code, eg "$x" followed by: */
165#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
166#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
167
168#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
169 string or after \E, $foo, etc */
170#define LEX_INTERPCONST 2 /* NOT USED */
171#define LEX_FORMLINE 1 /* expecting a format line */
172#define LEX_KNOWNEXT 0 /* next token known; just return it */
173
79072805 174
bbf60fe6 175#ifdef DEBUGGING
27da23d5 176static const char* const lex_state_names[] = {
bbf60fe6
DM
177 "KNOWNEXT",
178 "FORMLINE",
179 "INTERPCONST",
180 "INTERPCONCAT",
181 "INTERPENDMAYBE",
182 "INTERPEND",
183 "INTERPSTART",
184 "INTERPPUSH",
185 "INTERPCASEMOD",
186 "INTERPNORMAL",
187 "NORMAL"
188};
189#endif
190
79072805
LW
191#ifdef ff_next
192#undef ff_next
d48672a2
LW
193#endif
194
79072805 195#include "keywords.h"
fe14fcc3 196
ffb4593c
NT
197/* CLINE is a macro that ensures PL_copline has a sane value */
198
ae986130
LW
199#ifdef CLINE
200#undef CLINE
201#endif
57843af0 202#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 203
5db06880 204#ifdef PERL_MAD
29595ff2
NC
205# define SKIPSPACE0(s) skipspace0(s)
206# define SKIPSPACE1(s) skipspace1(s)
207# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
208# define PEEKSPACE(s) skipspace2(s,0)
209#else
210# define SKIPSPACE0(s) skipspace(s)
211# define SKIPSPACE1(s) skipspace(s)
212# define SKIPSPACE2(s,tsv) skipspace(s)
213# define PEEKSPACE(s) skipspace(s)
214#endif
215
ffb4593c
NT
216/*
217 * Convenience functions to return different tokens and prime the
9cbb5ea2 218 * lexer for the next token. They all take an argument.
ffb4593c
NT
219 *
220 * TOKEN : generic token (used for '(', DOLSHARP, etc)
221 * OPERATOR : generic operator
222 * AOPERATOR : assignment operator
223 * PREBLOCK : beginning the block after an if, while, foreach, ...
224 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
225 * PREREF : *EXPR where EXPR is not a simple identifier
226 * TERM : expression term
227 * LOOPX : loop exiting command (goto, last, dump, etc)
228 * FTST : file test operator
229 * FUN0 : zero-argument function
7eb971ee 230 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 231 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
232 * BOop : bitwise or or xor
233 * BAop : bitwise and
234 * SHop : shift operator
235 * PWop : power operator
9cbb5ea2 236 * PMop : pattern-matching operator
ffb4593c
NT
237 * Aop : addition-level operator
238 * Mop : multiplication-level operator
239 * Eop : equality-testing operator
e5edeb50 240 * Rop : relational operator <= != gt
ffb4593c
NT
241 *
242 * Also see LOP and lop() below.
243 */
244
998054bd 245#ifdef DEBUGGING /* Serve -DT. */
704d4215 246# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 247#else
bbf60fe6 248# define REPORT(retval) (retval)
998054bd
SC
249#endif
250
bbf60fe6
DM
251#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
252#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
253#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
254#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
255#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
256#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
257#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
258#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
259#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
260#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 261#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
262#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
263#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
264#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
265#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
266#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
267#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
268#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
269#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
270#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
271#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 272
a687059c
LW
273/* This bit of chicanery makes a unary function followed by
274 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
275 * The UNIDOR macro is for unary functions that can be followed by the //
276 * operator (such as C<shift // 0>).
a687059c 277 */
d68ce4ac 278#define UNI3(f,x,have_x) { \
6154021b 279 pl_yylval.ival = f; \
d68ce4ac 280 if (have_x) PL_expect = x; \
376fcdbf
AL
281 PL_bufptr = s; \
282 PL_last_uni = PL_oldbufptr; \
283 PL_last_lop_op = f; \
284 if (*s == '(') \
285 return REPORT( (int)FUNC1 ); \
29595ff2 286 s = PEEKSPACE(s); \
376fcdbf
AL
287 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
288 }
d68ce4ac
FC
289#define UNI(f) UNI3(f,XTERM,1)
290#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
b5fb7ce3
FC
291#define UNIPROTO(f,optional) { \
292 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
293 OPERATOR(f); \
294 }
a687059c 295
d68ce4ac 296#define UNIBRACK(f) UNI3(f,0,0)
79072805 297
9f68db38 298/* grandfather return to old style */
78cdf107
Z
299#define OLDLOP(f) \
300 do { \
301 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
302 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
303 pl_yylval.ival = (f); \
304 PL_expect = XTERM; \
305 PL_bufptr = s; \
306 return (int)LSTOP; \
307 } while(0)
79072805 308
8fa7f367
JH
309#ifdef DEBUGGING
310
6154021b 311/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
312enum token_type {
313 TOKENTYPE_NONE,
314 TOKENTYPE_IVAL,
6154021b 315 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6 316 TOKENTYPE_PVAL,
aeaef349 317 TOKENTYPE_OPVAL
bbf60fe6
DM
318};
319
6d4a66ac
NC
320static struct debug_tokens {
321 const int token;
322 enum token_type type;
323 const char *name;
324} const debug_tokens[] =
9041c2e3 325{
bbf60fe6
DM
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
705fe0e5
FC
347 { FORMLBRACK, TOKENTYPE_NONE, "FORMLBRACK" },
348 { FORMRBRACK, TOKENTYPE_NONE, "FORMRBRACK" },
bbf60fe6
DM
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
5db1eb8d 358 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
bbf60fe6
DM
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
f3f204dc 373 { PEG, TOKENTYPE_NONE, "PEG" },
88e1f1a2
JV
374 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
375 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
376 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
377 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
378 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
379 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
380 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
381 { PREINC, TOKENTYPE_NONE, "PREINC" },
382 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
f3f204dc 383 { QWLIST, TOKENTYPE_OPVAL, "QWLIST" },
bbf60fe6
DM
384 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
385 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
f3f204dc 386 { REQUIRE, TOKENTYPE_NONE, "REQUIRE" },
bbf60fe6
DM
387 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
388 { SUB, TOKENTYPE_NONE, "SUB" },
389 { THING, TOKENTYPE_OPVAL, "THING" },
390 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
391 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
392 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
393 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
394 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
395 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 396 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
397 { WHILE, TOKENTYPE_IVAL, "WHILE" },
398 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 399 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 400 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
401};
402
6154021b 403/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 404
bbf60fe6 405STATIC int
704d4215 406S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 407{
97aff369 408 dVAR;
7918f24d
NC
409
410 PERL_ARGS_ASSERT_TOKEREPORT;
411
bbf60fe6 412 if (DEBUG_T_TEST) {
bd61b366 413 const char *name = NULL;
bbf60fe6 414 enum token_type type = TOKENTYPE_NONE;
f54cb97a 415 const struct debug_tokens *p;
396482e1 416 SV* const report = newSVpvs("<== ");
bbf60fe6 417
f54cb97a 418 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
419 if (p->token == (int)rv) {
420 name = p->name;
421 type = p->type;
422 break;
423 }
424 }
425 if (name)
54667de8 426 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
427 else if ((char)rv > ' ' && (char)rv < '~')
428 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
429 else if (!rv)
396482e1 430 sv_catpvs(report, "EOF");
bbf60fe6
DM
431 else
432 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
433 switch (type) {
434 case TOKENTYPE_NONE:
bbf60fe6
DM
435 break;
436 case TOKENTYPE_IVAL:
704d4215 437 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
438 break;
439 case TOKENTYPE_OPNUM:
440 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 441 PL_op_name[lvalp->ival]);
bbf60fe6
DM
442 break;
443 case TOKENTYPE_PVAL:
704d4215 444 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
445 break;
446 case TOKENTYPE_OPVAL:
704d4215 447 if (lvalp->opval) {
401441c0 448 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
449 PL_op_name[lvalp->opval->op_type]);
450 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 451 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 452 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
453 }
454
455 }
401441c0 456 else
396482e1 457 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
458 break;
459 }
b6007c36 460 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
461 };
462 return (int)rv;
998054bd
SC
463}
464
b6007c36
DM
465
466/* print the buffer with suitable escapes */
467
468STATIC void
15f169a1 469S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 470{
396482e1 471 SV* const tmp = newSVpvs("");
7918f24d
NC
472
473 PERL_ARGS_ASSERT_PRINTBUF;
474
b6007c36
DM
475 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
476 SvREFCNT_dec(tmp);
477}
478
8fa7f367
JH
479#endif
480
8290c323
NC
481static int
482S_deprecate_commaless_var_list(pTHX) {
483 PL_expect = XTERM;
484 deprecate("comma-less variable list");
485 return REPORT(','); /* grandfather non-comma-format format */
486}
487
ffb4593c
NT
488/*
489 * S_ao
490 *
c963b151
BD
491 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
492 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
493 */
494
76e3520e 495STATIC int
cea2e8a9 496S_ao(pTHX_ int toketype)
a0d0e21e 497{
97aff369 498 dVAR;
3280af22
NIS
499 if (*PL_bufptr == '=') {
500 PL_bufptr++;
a0d0e21e 501 if (toketype == ANDAND)
6154021b 502 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 503 else if (toketype == OROR)
6154021b 504 pl_yylval.ival = OP_ORASSIGN;
c963b151 505 else if (toketype == DORDOR)
6154021b 506 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
507 toketype = ASSIGNOP;
508 }
509 return toketype;
510}
511
ffb4593c
NT
512/*
513 * S_no_op
514 * When Perl expects an operator and finds something else, no_op
515 * prints the warning. It always prints "<something> found where
516 * operator expected. It prints "Missing semicolon on previous line?"
517 * if the surprise occurs at the start of the line. "do you need to
518 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
519 * where the compiler doesn't know if foo is a method call or a function.
520 * It prints "Missing operator before end of line" if there's nothing
521 * after the missing operator, or "... before <...>" if there is something
522 * after the missing operator.
523 */
524
76e3520e 525STATIC void
15f169a1 526S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 527{
97aff369 528 dVAR;
9d4ba2ae
AL
529 char * const oldbp = PL_bufptr;
530 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 531
7918f24d
NC
532 PERL_ARGS_ASSERT_NO_OP;
533
1189a94a
GS
534 if (!s)
535 s = oldbp;
07c798fb 536 else
1189a94a 537 PL_bufptr = s;
734ab321 538 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
56da5a46
RGS
539 if (ckWARN_d(WARN_SYNTAX)) {
540 if (is_first)
541 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
542 "\t(Missing semicolon on previous line?)\n");
543 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 544 const char *t;
734ab321
BF
545 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
546 t += UTF ? UTF8SKIP(t) : 1)
c35e046a 547 NOOP;
56da5a46
RGS
548 if (t < PL_bufptr && isSPACE(*t))
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
550 "\t(Do you need to predeclare %"SVf"?)\n",
551 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
552 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46
RGS
553 }
554 else {
555 assert(s >= oldbp);
556 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
557 "\t(Missing operator before %"SVf"?)\n",
558 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
559 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46 560 }
07c798fb 561 }
3280af22 562 PL_bufptr = oldbp;
8990e307
LW
563}
564
ffb4593c
NT
565/*
566 * S_missingterm
567 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 568 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
569 * If we're in a delimited string and the delimiter is a control
570 * character, it's reformatted into a two-char sequence like ^C.
571 * This is fatal.
572 */
573
76e3520e 574STATIC void
cea2e8a9 575S_missingterm(pTHX_ char *s)
8990e307 576{
97aff369 577 dVAR;
8990e307
LW
578 char tmpbuf[3];
579 char q;
580 if (s) {
9d4ba2ae 581 char * const nl = strrchr(s,'\n');
d2719217 582 if (nl)
8990e307
LW
583 *nl = '\0';
584 }
463559e7 585 else if (isCNTRL(PL_multi_close)) {
8990e307 586 *tmpbuf = '^';
585ec06d 587 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
588 tmpbuf[2] = '\0';
589 s = tmpbuf;
590 }
591 else {
eb160463 592 *tmpbuf = (char)PL_multi_close;
8990e307
LW
593 tmpbuf[1] = '\0';
594 s = tmpbuf;
595 }
596 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 597 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 598}
79072805 599
dd0ac2b9
FC
600#include "feature.h"
601
0d863452 602/*
0d863452
RH
603 * Check whether the named feature is enabled.
604 */
26ea9e12 605bool
3fff3427 606Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 607{
97aff369 608 dVAR;
4a731d7b 609 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
610
611 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
612
613 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 614
26ea9e12
NC
615 if (namelen > MAX_FEATURE_LEN)
616 return FALSE;
3fff3427 617 memcpy(&he_name[8], name, namelen);
7d69d4a6 618
c8ca97b0
NC
619 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
620 REFCOUNTED_HE_EXISTS));
0d863452
RH
621}
622
ffb4593c 623/*
9cbb5ea2
GS
624 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
625 * utf16-to-utf8-reversed.
ffb4593c
NT
626 */
627
c39cd008
GS
628#ifdef PERL_CR_FILTER
629static void
630strip_return(SV *sv)
631{
eb578fdb
KW
632 const char *s = SvPVX_const(sv);
633 const char * const e = s + SvCUR(sv);
7918f24d
NC
634
635 PERL_ARGS_ASSERT_STRIP_RETURN;
636
c39cd008
GS
637 /* outer loop optimized to do nothing if there are no CR-LFs */
638 while (s < e) {
639 if (*s++ == '\r' && *s == '\n') {
640 /* hit a CR-LF, need to copy the rest */
eb578fdb 641 char *d = s - 1;
c39cd008
GS
642 *d++ = *s++;
643 while (s < e) {
644 if (*s == '\r' && s[1] == '\n')
645 s++;
646 *d++ = *s++;
647 }
648 SvCUR(sv) -= s - d;
649 return;
650 }
651 }
652}
a868473f 653
76e3520e 654STATIC I32
c39cd008 655S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 656{
f54cb97a 657 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
658 if (count > 0 && !maxlen)
659 strip_return(sv);
660 return count;
a868473f
NIS
661}
662#endif
663
ffb4593c 664/*
8eaa0acf
Z
665=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
666
667Creates and initialises a new lexer/parser state object, supplying
668a context in which to lex and parse from a new source of Perl code.
669A pointer to the new state object is placed in L</PL_parser>. An entry
670is made on the save stack so that upon unwinding the new state object
671will be destroyed and the former value of L</PL_parser> will be restored.
672Nothing else need be done to clean up the parsing context.
673
674The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
675non-null, provides a string (in SV form) containing code to be parsed.
676A copy of the string is made, so subsequent modification of I<line>
677does not affect parsing. I<rsfp>, if non-null, provides an input stream
678from which code will be read to be parsed. If both are non-null, the
679code in I<line> comes first and must consist of complete lines of input,
680and I<rsfp> supplies the remainder of the source.
681
e368b3bd
FC
682The I<flags> parameter is reserved for future use. Currently it is only
683used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
684
685=cut
686*/
ffb4593c 687
27fcb6ee 688/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
87606032
NC
689 can share filters with the current parser.
690 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
691 caller, hence isn't owned by the parser, so shouldn't be closed on parser
692 destruction. This is used to handle the case of defaulting to reading the
693 script from the standard input because no filename was given on the command
694 line (without getting confused by situation where STDIN has been closed, so
695 the script handle is opened on fd 0) */
27fcb6ee 696
a0d0e21e 697void
8eaa0acf 698Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 699{
97aff369 700 dVAR;
6ef55633 701 const char *s = NULL;
5486870f 702 yy_parser *parser, *oparser;
60d63348 703 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 704 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
705
706 /* create and initialise a parser */
707
199e78b7 708 Newxz(parser, 1, yy_parser);
5486870f 709 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
710 PL_parser = parser;
711
28ac2b49
Z
712 parser->stack = NULL;
713 parser->ps = NULL;
714 parser->stack_size = 0;
acdf0a21 715
e3abe207
DM
716 /* on scope exit, free this parser and restore any outer one */
717 SAVEPARSER(parser);
7c4baf47 718 parser->saved_curcop = PL_curcop;
e3abe207 719
acdf0a21 720 /* initialise lexer state */
8990e307 721
fb205e7a
DM
722#ifdef PERL_MAD
723 parser->curforce = -1;
724#else
725 parser->nexttoke = 0;
726#endif
ca4cfd28 727 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 728 parser->copline = NOLINE;
5afb0a62 729 parser->lex_state = LEX_NORMAL;
c2598295 730 parser->expect = XSTATE;
2f9285f8 731 parser->rsfp = rsfp;
27fcb6ee
FC
732 parser->rsfp_filters =
733 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
734 ? NULL
735 : MUTABLE_AV(SvREFCNT_inc(
736 oparser->rsfp_filters
737 ? oparser->rsfp_filters
738 : (oparser->rsfp_filters = newAV())
739 ));
2f9285f8 740
199e78b7
DM
741 Newx(parser->lex_brackstack, 120, char);
742 Newx(parser->lex_casestack, 12, char);
743 *parser->lex_casestack = '\0';
02b34bbe 744
10efb74f 745 if (line) {
0528fd32 746 STRLEN len;
10efb74f 747 s = SvPV_const(line, len);
0abcdfa4
FC
748 parser->linestr = flags & LEX_START_COPIED
749 ? SvREFCNT_inc_simple_NN(line)
750 : newSVpvn_flags(s, len, SvUTF8(line));
37555a72 751 if (!len || s[len-1] != ';')
bdc0bf6f 752 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
753 } else {
754 parser->linestr = newSVpvs("\n;");
8990e307 755 }
f06b5848
DM
756 parser->oldoldbufptr =
757 parser->oldbufptr =
758 parser->bufptr =
759 parser->linestart = SvPVX(parser->linestr);
760 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
761 parser->last_lop = parser->last_uni = NULL;
87606032
NC
762 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
763 |LEX_DONT_CLOSE_RSFP);
737c24fc 764
60d63348 765 parser->in_pod = parser->filtered = 0;
79072805 766}
a687059c 767
e3abe207
DM
768
769/* delete a parser object */
770
771void
772Perl_parser_free(pTHX_ const yy_parser *parser)
773{
7918f24d
NC
774 PERL_ARGS_ASSERT_PARSER_FREE;
775
7c4baf47 776 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
777 SvREFCNT_dec(parser->linestr);
778
87606032 779 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
2f9285f8 780 PerlIO_clearerr(parser->rsfp);
799361c3
SH
781 else if (parser->rsfp && (!parser->old_parser ||
782 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 783 PerlIO_close(parser->rsfp);
5486870f 784 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 785
e3abe207
DM
786 Safefree(parser->lex_brackstack);
787 Safefree(parser->lex_casestack);
788 PL_parser = parser->old_parser;
789 Safefree(parser);
790}
791
792
ffb4593c 793/*
f0e67a1d
Z
794=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
795
796Buffer scalar containing the chunk currently under consideration of the
797text currently being lexed. This is always a plain string scalar (for
798which C<SvPOK> is true). It is not intended to be used as a scalar by
799normal scalar means; instead refer to the buffer directly by the pointer
800variables described below.
801
802The lexer maintains various C<char*> pointers to things in the
803C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
804reallocated, all of these pointers must be updated. Don't attempt to
805do this manually, but rather use L</lex_grow_linestr> if you need to
806reallocate the buffer.
807
808The content of the text chunk in the buffer is commonly exactly one
809complete line of input, up to and including a newline terminator,
810but there are situations where it is otherwise. The octets of the
811buffer may be intended to be interpreted as either UTF-8 or Latin-1.
812The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
813flag on this scalar, which may disagree with it.
814
815For direct examination of the buffer, the variable
816L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
817lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
818of these pointers is usually preferable to examination of the scalar
819through normal scalar means.
820
821=for apidoc AmxU|char *|PL_parser-E<gt>bufend
822
823Direct pointer to the end of the chunk of text currently being lexed, the
824end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
825+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
826always located at the end of the buffer, and does not count as part of
827the buffer's contents.
828
829=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
830
831Points to the current position of lexing inside the lexer buffer.
832Characters around this point may be freely examined, within
833the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
834L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
835interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
836
837Lexing code (whether in the Perl core or not) moves this pointer past
838the characters that it consumes. It is also expected to perform some
839bookkeeping whenever a newline character is consumed. This movement
840can be more conveniently performed by the function L</lex_read_to>,
841which handles newlines appropriately.
842
843Interpretation of the buffer's octets can be abstracted out by
844using the slightly higher-level functions L</lex_peek_unichar> and
845L</lex_read_unichar>.
846
847=for apidoc AmxU|char *|PL_parser-E<gt>linestart
848
849Points to the start of the current line inside the lexer buffer.
850This is useful for indicating at which column an error occurred, and
851not much else. This must be updated by any lexing code that consumes
852a newline; the function L</lex_read_to> handles this detail.
853
854=cut
855*/
856
857/*
858=for apidoc Amx|bool|lex_bufutf8
859
860Indicates whether the octets in the lexer buffer
861(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
862of Unicode characters. If not, they should be interpreted as Latin-1
863characters. This is analogous to the C<SvUTF8> flag for scalars.
864
865In UTF-8 mode, it is not guaranteed that the lexer buffer actually
866contains valid UTF-8. Lexing code must be robust in the face of invalid
867encoding.
868
869The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
870is significant, but not the whole story regarding the input character
871encoding. Normally, when a file is being read, the scalar contains octets
872and its C<SvUTF8> flag is off, but the octets should be interpreted as
873UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
874however, the scalar may have the C<SvUTF8> flag on, and in this case its
875octets should be interpreted as UTF-8 unless the C<use bytes> pragma
876is in effect. This logic may change in the future; use this function
877instead of implementing the logic yourself.
878
879=cut
880*/
881
882bool
883Perl_lex_bufutf8(pTHX)
884{
885 return UTF;
886}
887
888/*
889=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
890
891Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
892at least I<len> octets (including terminating NUL). Returns a
893pointer to the reallocated buffer. This is necessary before making
894any direct modification of the buffer that would increase its length.
895L</lex_stuff_pvn> provides a more convenient way to insert text into
896the buffer.
897
898Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
899this function updates all of the lexer's variables that point directly
900into the buffer.
901
902=cut
903*/
904
905char *
906Perl_lex_grow_linestr(pTHX_ STRLEN len)
907{
908 SV *linestr;
909 char *buf;
910 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
c7641931 911 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
f0e67a1d
Z
912 linestr = PL_parser->linestr;
913 buf = SvPVX(linestr);
914 if (len <= SvLEN(linestr))
915 return buf;
916 bufend_pos = PL_parser->bufend - buf;
917 bufptr_pos = PL_parser->bufptr - buf;
918 oldbufptr_pos = PL_parser->oldbufptr - buf;
919 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
920 linestart_pos = PL_parser->linestart - buf;
921 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
922 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
c7641931
DM
923 re_eval_start_pos = PL_sublex_info.re_eval_start ?
924 PL_sublex_info.re_eval_start - buf : 0;
925
f0e67a1d 926 buf = sv_grow(linestr, len);
c7641931 927
f0e67a1d
Z
928 PL_parser->bufend = buf + bufend_pos;
929 PL_parser->bufptr = buf + bufptr_pos;
930 PL_parser->oldbufptr = buf + oldbufptr_pos;
931 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
932 PL_parser->linestart = buf + linestart_pos;
933 if (PL_parser->last_uni)
934 PL_parser->last_uni = buf + last_uni_pos;
935 if (PL_parser->last_lop)
936 PL_parser->last_lop = buf + last_lop_pos;
c7641931
DM
937 if (PL_sublex_info.re_eval_start)
938 PL_sublex_info.re_eval_start = buf + re_eval_start_pos;
f0e67a1d
Z
939 return buf;
940}
941
942/*
83aa740e 943=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
944
945Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
946immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
947reallocating the buffer if necessary. This means that lexing code that
948runs later will see the characters as if they had appeared in the input.
949It is not recommended to do this as part of normal parsing, and most
950uses of this facility run the risk of the inserted characters being
951interpreted in an unintended manner.
952
953The string to be inserted is represented by I<len> octets starting
954at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
955according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
956The characters are recoded for the lexer buffer, according to how the
957buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 958to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
959function is more convenient.
960
961=cut
962*/
963
964void
83aa740e 965Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 966{
749123ff 967 dVAR;
f0e67a1d
Z
968 char *bufptr;
969 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
970 if (flags & ~(LEX_STUFF_UTF8))
971 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
972 if (UTF) {
973 if (flags & LEX_STUFF_UTF8) {
974 goto plain_copy;
975 } else {
976 STRLEN highhalf = 0;
83aa740e 977 const char *p, *e = pv+len;
f0e67a1d
Z
978 for (p = pv; p != e; p++)
979 highhalf += !!(((U8)*p) & 0x80);
980 if (!highhalf)
981 goto plain_copy;
982 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
983 bufptr = PL_parser->bufptr;
984 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
985 SvCUR_set(PL_parser->linestr,
986 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
987 PL_parser->bufend += len+highhalf;
988 for (p = pv; p != e; p++) {
989 U8 c = (U8)*p;
990 if (c & 0x80) {
991 *bufptr++ = (char)(0xc0 | (c >> 6));
992 *bufptr++ = (char)(0x80 | (c & 0x3f));
993 } else {
994 *bufptr++ = (char)c;
995 }
996 }
997 }
998 } else {
999 if (flags & LEX_STUFF_UTF8) {
1000 STRLEN highhalf = 0;
83aa740e 1001 const char *p, *e = pv+len;
f0e67a1d
Z
1002 for (p = pv; p != e; p++) {
1003 U8 c = (U8)*p;
1004 if (c >= 0xc4) {
1005 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1006 "non-Latin-1 character into Latin-1 input");
1007 } else if (c >= 0xc2 && p+1 != e &&
1008 (((U8)p[1]) & 0xc0) == 0x80) {
1009 p++;
1010 highhalf++;
1011 } else if (c >= 0x80) {
1012 /* malformed UTF-8 */
1013 ENTER;
1014 SAVESPTR(PL_warnhook);
1015 PL_warnhook = PERL_WARNHOOK_FATAL;
1016 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1017 LEAVE;
1018 }
1019 }
1020 if (!highhalf)
1021 goto plain_copy;
1022 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1023 bufptr = PL_parser->bufptr;
1024 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1025 SvCUR_set(PL_parser->linestr,
1026 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1027 PL_parser->bufend += len-highhalf;
1028 for (p = pv; p != e; p++) {
1029 U8 c = (U8)*p;
1030 if (c & 0x80) {
1031 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1032 p++;
1033 } else {
1034 *bufptr++ = (char)c;
1035 }
1036 }
1037 } else {
1038 plain_copy:
1039 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1040 bufptr = PL_parser->bufptr;
1041 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1042 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1043 PL_parser->bufend += len;
1044 Copy(pv, bufptr, len, char);
1045 }
1046 }
1047}
1048
1049/*
9dcc53ea
Z
1050=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1051
1052Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1053immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1054reallocating the buffer if necessary. This means that lexing code that
1055runs later will see the characters as if they had appeared in the input.
1056It is not recommended to do this as part of normal parsing, and most
1057uses of this facility run the risk of the inserted characters being
1058interpreted in an unintended manner.
1059
1060The string to be inserted is represented by octets starting at I<pv>
1061and continuing to the first nul. These octets are interpreted as either
1062UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1063in I<flags>. The characters are recoded for the lexer buffer, according
1064to how the buffer is currently being interpreted (L</lex_bufutf8>).
1065If it is not convenient to nul-terminate a string to be inserted, the
1066L</lex_stuff_pvn> function is more appropriate.
1067
1068=cut
1069*/
1070
1071void
1072Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1073{
1074 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1075 lex_stuff_pvn(pv, strlen(pv), flags);
1076}
1077
1078/*
f0e67a1d
Z
1079=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1080
1081Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1082immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1083reallocating the buffer if necessary. This means that lexing code that
1084runs later will see the characters as if they had appeared in the input.
1085It is not recommended to do this as part of normal parsing, and most
1086uses of this facility run the risk of the inserted characters being
1087interpreted in an unintended manner.
1088
1089The string to be inserted is the string value of I<sv>. The characters
1090are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1091being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1092not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1093need to construct a scalar.
1094
1095=cut
1096*/
1097
1098void
1099Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1100{
1101 char *pv;
1102 STRLEN len;
1103 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1104 if (flags)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1106 pv = SvPV(sv, len);
1107 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1108}
1109
1110/*
1111=for apidoc Amx|void|lex_unstuff|char *ptr
1112
1113Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1114I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1115This hides the discarded text from any lexing code that runs later,
1116as if the text had never appeared.
1117
1118This is not the normal way to consume lexed text. For that, use
1119L</lex_read_to>.
1120
1121=cut
1122*/
1123
1124void
1125Perl_lex_unstuff(pTHX_ char *ptr)
1126{
1127 char *buf, *bufend;
1128 STRLEN unstuff_len;
1129 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1130 buf = PL_parser->bufptr;
1131 if (ptr < buf)
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1133 if (ptr == buf)
1134 return;
1135 bufend = PL_parser->bufend;
1136 if (ptr > bufend)
1137 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1138 unstuff_len = ptr - buf;
1139 Move(ptr, buf, bufend+1-ptr, char);
1140 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1141 PL_parser->bufend = bufend - unstuff_len;
1142}
1143
1144/*
1145=for apidoc Amx|void|lex_read_to|char *ptr
1146
1147Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1148to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1149performing the correct bookkeeping whenever a newline character is passed.
1150This is the normal way to consume lexed text.
1151
1152Interpretation of the buffer's octets can be abstracted out by
1153using the slightly higher-level functions L</lex_peek_unichar> and
1154L</lex_read_unichar>.
1155
1156=cut
1157*/
1158
1159void
1160Perl_lex_read_to(pTHX_ char *ptr)
1161{
1162 char *s;
1163 PERL_ARGS_ASSERT_LEX_READ_TO;
1164 s = PL_parser->bufptr;
1165 if (ptr < s || ptr > PL_parser->bufend)
1166 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1167 for (; s != ptr; s++)
1168 if (*s == '\n') {
1169 CopLINE_inc(PL_curcop);
1170 PL_parser->linestart = s+1;
1171 }
1172 PL_parser->bufptr = ptr;
1173}
1174
1175/*
1176=for apidoc Amx|void|lex_discard_to|char *ptr
1177
1178Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1179up to I<ptr>. The remaining content of the buffer will be moved, and
1180all pointers into the buffer updated appropriately. I<ptr> must not
1181be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1182it is not permitted to discard text that has yet to be lexed.
1183
1184Normally it is not necessarily to do this directly, because it suffices to
1185use the implicit discarding behaviour of L</lex_next_chunk> and things
1186based on it. However, if a token stretches across multiple lines,
1f317c95 1187and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1188that purpose, then after completion of the token it would be wise to
1189explicitly discard the now-unneeded earlier lines, to avoid future
1190multi-line tokens growing the buffer without bound.
1191
1192=cut
1193*/
1194
1195void
1196Perl_lex_discard_to(pTHX_ char *ptr)
1197{
1198 char *buf;
1199 STRLEN discard_len;
1200 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1201 buf = SvPVX(PL_parser->linestr);
1202 if (ptr < buf)
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1204 if (ptr == buf)
1205 return;
1206 if (ptr > PL_parser->bufptr)
1207 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1208 discard_len = ptr - buf;
1209 if (PL_parser->oldbufptr < ptr)
1210 PL_parser->oldbufptr = ptr;
1211 if (PL_parser->oldoldbufptr < ptr)
1212 PL_parser->oldoldbufptr = ptr;
1213 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1214 PL_parser->last_uni = NULL;
1215 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1216 PL_parser->last_lop = NULL;
1217 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1218 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1219 PL_parser->bufend -= discard_len;
1220 PL_parser->bufptr -= discard_len;
1221 PL_parser->oldbufptr -= discard_len;
1222 PL_parser->oldoldbufptr -= discard_len;
1223 if (PL_parser->last_uni)
1224 PL_parser->last_uni -= discard_len;
1225 if (PL_parser->last_lop)
1226 PL_parser->last_lop -= discard_len;
1227}
1228
1229/*
1230=for apidoc Amx|bool|lex_next_chunk|U32 flags
1231
1232Reads in the next chunk of text to be lexed, appending it to
1233L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1234looked to the end of the current chunk and wants to know more. It is
1235usual, but not necessary, for lexing to have consumed the entirety of
1236the current chunk at this time.
1237
1238If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1239chunk (i.e., the current chunk has been entirely consumed), normally the
1240current chunk will be discarded at the same time that the new chunk is
1241read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1242will not be discarded. If the current chunk has not been entirely
1243consumed, then it will not be discarded regardless of the flag.
1244
1245Returns true if some new text was added to the buffer, or false if the
1246buffer has reached the end of the input text.
1247
1248=cut
1249*/
1250
1251#define LEX_FAKE_EOF 0x80000000
112d1284 1252#define LEX_NO_TERM 0x40000000
f0e67a1d
Z
1253
1254bool
1255Perl_lex_next_chunk(pTHX_ U32 flags)
1256{
1257 SV *linestr;
1258 char *buf;
1259 STRLEN old_bufend_pos, new_bufend_pos;
1260 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1261 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1262 bool got_some_for_debugger = 0;
f0e67a1d 1263 bool got_some;
112d1284 1264 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF|LEX_NO_TERM))
f0e67a1d 1265 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1266 linestr = PL_parser->linestr;
1267 buf = SvPVX(linestr);
1268 if (!(flags & LEX_KEEP_PREVIOUS) &&
1269 PL_parser->bufptr == PL_parser->bufend) {
1270 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1271 linestart_pos = 0;
1272 if (PL_parser->last_uni != PL_parser->bufend)
1273 PL_parser->last_uni = NULL;
1274 if (PL_parser->last_lop != PL_parser->bufend)
1275 PL_parser->last_lop = NULL;
1276 last_uni_pos = last_lop_pos = 0;
1277 *buf = 0;
1278 SvCUR(linestr) = 0;
1279 } else {
1280 old_bufend_pos = PL_parser->bufend - buf;
1281 bufptr_pos = PL_parser->bufptr - buf;
1282 oldbufptr_pos = PL_parser->oldbufptr - buf;
1283 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1284 linestart_pos = PL_parser->linestart - buf;
1285 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1286 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1287 }
1288 if (flags & LEX_FAKE_EOF) {
1289 goto eof;
60d63348 1290 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1291 got_some = 0;
1292 } else if (filter_gets(linestr, old_bufend_pos)) {
1293 got_some = 1;
17cc9359 1294 got_some_for_debugger = 1;
112d1284
FC
1295 } else if (flags & LEX_NO_TERM) {
1296 got_some = 0;
f0e67a1d 1297 } else {
580561a3
Z
1298 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1299 sv_setpvs(linestr, "");
f0e67a1d
Z
1300 eof:
1301 /* End of real input. Close filehandle (unless it was STDIN),
1302 * then add implicit termination.
1303 */
87606032 1304 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
f0e67a1d
Z
1305 PerlIO_clearerr(PL_parser->rsfp);
1306 else if (PL_parser->rsfp)
1307 (void)PerlIO_close(PL_parser->rsfp);
1308 PL_parser->rsfp = NULL;
60d63348 1309 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1310#ifdef PERL_MAD
1311 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1312 PL_faketokens = 1;
1313#endif
1314 if (!PL_in_eval && PL_minus_p) {
1315 sv_catpvs(linestr,
1316 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1317 PL_minus_n = PL_minus_p = 0;
1318 } else if (!PL_in_eval && PL_minus_n) {
1319 sv_catpvs(linestr, /*{*/";}");
1320 PL_minus_n = 0;
1321 } else
1322 sv_catpvs(linestr, ";");
1323 got_some = 1;
1324 }
1325 buf = SvPVX(linestr);
1326 new_bufend_pos = SvCUR(linestr);
1327 PL_parser->bufend = buf + new_bufend_pos;
1328 PL_parser->bufptr = buf + bufptr_pos;
1329 PL_parser->oldbufptr = buf + oldbufptr_pos;
1330 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1331 PL_parser->linestart = buf + linestart_pos;
1332 if (PL_parser->last_uni)
1333 PL_parser->last_uni = buf + last_uni_pos;
1334 if (PL_parser->last_lop)
1335 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1336 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1337 PL_curstash != PL_debstash) {
1338 /* debugger active and we're not compiling the debugger code,
1339 * so store the line into the debugger's array of lines
1340 */
1341 update_debugger_info(NULL, buf+old_bufend_pos,
1342 new_bufend_pos-old_bufend_pos);
1343 }
1344 return got_some;
1345}
1346
1347/*
1348=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1349
1350Looks ahead one (Unicode) character in the text currently being lexed.
1351Returns the codepoint (unsigned integer value) of the next character,
1352or -1 if lexing has reached the end of the input text. To consume the
1353peeked character, use L</lex_read_unichar>.
1354
1355If the next character is in (or extends into) the next chunk of input
1356text, the next chunk will be read in. Normally the current chunk will be
1357discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1358then the current chunk will not be discarded.
1359
1360If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1361is encountered, an exception is generated.
1362
1363=cut
1364*/
1365
1366I32
1367Perl_lex_peek_unichar(pTHX_ U32 flags)
1368{
749123ff 1369 dVAR;
f0e67a1d
Z
1370 char *s, *bufend;
1371 if (flags & ~(LEX_KEEP_PREVIOUS))
1372 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1373 s = PL_parser->bufptr;
1374 bufend = PL_parser->bufend;
1375 if (UTF) {
1376 U8 head;
1377 I32 unichar;
1378 STRLEN len, retlen;
1379 if (s == bufend) {
1380 if (!lex_next_chunk(flags))
1381 return -1;
1382 s = PL_parser->bufptr;
1383 bufend = PL_parser->bufend;
1384 }
1385 head = (U8)*s;
1386 if (!(head & 0x80))
1387 return head;
1388 if (head & 0x40) {
1389 len = PL_utf8skip[head];
1390 while ((STRLEN)(bufend-s) < len) {
1391 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1392 break;
1393 s = PL_parser->bufptr;
1394 bufend = PL_parser->bufend;
1395 }
1396 }
1397 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1398 if (retlen == (STRLEN)-1) {
1399 /* malformed UTF-8 */
1400 ENTER;
1401 SAVESPTR(PL_warnhook);
1402 PL_warnhook = PERL_WARNHOOK_FATAL;
1403 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1404 LEAVE;
1405 }
1406 return unichar;
1407 } else {
1408 if (s == bufend) {
1409 if (!lex_next_chunk(flags))
1410 return -1;
1411 s = PL_parser->bufptr;
1412 }
1413 return (U8)*s;
1414 }
1415}
1416
1417/*
1418=for apidoc Amx|I32|lex_read_unichar|U32 flags
1419
1420Reads the next (Unicode) character in the text currently being lexed.
1421Returns the codepoint (unsigned integer value) of the character read,
1422and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1423if lexing has reached the end of the input text. To non-destructively
1424examine the next character, use L</lex_peek_unichar> instead.
1425
1426If the next character is in (or extends into) the next chunk of input
1427text, the next chunk will be read in. Normally the current chunk will be
1428discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1429then the current chunk will not be discarded.
1430
1431If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1432is encountered, an exception is generated.
1433
1434=cut
1435*/
1436
1437I32
1438Perl_lex_read_unichar(pTHX_ U32 flags)
1439{
1440 I32 c;
1441 if (flags & ~(LEX_KEEP_PREVIOUS))
1442 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1443 c = lex_peek_unichar(flags);
1444 if (c != -1) {
1445 if (c == '\n')
1446 CopLINE_inc(PL_curcop);
d9018cbe
EB
1447 if (UTF)
1448 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1449 else
1450 ++(PL_parser->bufptr);
f0e67a1d
Z
1451 }
1452 return c;
1453}
1454
1455/*
1456=for apidoc Amx|void|lex_read_space|U32 flags
1457
1458Reads optional spaces, in Perl style, in the text currently being
1459lexed. The spaces may include ordinary whitespace characters and
1460Perl-style comments. C<#line> directives are processed if encountered.
1461L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1462at a non-space character (or the end of the input text).
1463
1464If spaces extend into the next chunk of input text, the next chunk will
1465be read in. Normally the current chunk will be discarded at the same
1466time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1467chunk will not be discarded.
1468
1469=cut
1470*/
1471
f0998909
Z
1472#define LEX_NO_NEXT_CHUNK 0x80000000
1473
f0e67a1d
Z
1474void
1475Perl_lex_read_space(pTHX_ U32 flags)
1476{
1477 char *s, *bufend;
1478 bool need_incline = 0;
f0998909 1479 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1480 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1481#ifdef PERL_MAD
1482 if (PL_skipwhite) {
1483 sv_free(PL_skipwhite);
1484 PL_skipwhite = NULL;
1485 }
1486 if (PL_madskills)
1487 PL_skipwhite = newSVpvs("");
1488#endif /* PERL_MAD */
1489 s = PL_parser->bufptr;
1490 bufend = PL_parser->bufend;
1491 while (1) {
1492 char c = *s;
1493 if (c == '#') {
1494 do {
1495 c = *++s;
1496 } while (!(c == '\n' || (c == 0 && s == bufend)));
1497 } else if (c == '\n') {
1498 s++;
1499 PL_parser->linestart = s;
1500 if (s == bufend)
1501 need_incline = 1;
1502 else
1503 incline(s);
1504 } else if (isSPACE(c)) {
1505 s++;
1506 } else if (c == 0 && s == bufend) {
1507 bool got_more;
1508#ifdef PERL_MAD
1509 if (PL_madskills)
1510 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1511#endif /* PERL_MAD */
f0998909
Z
1512 if (flags & LEX_NO_NEXT_CHUNK)
1513 break;
f0e67a1d
Z
1514 PL_parser->bufptr = s;
1515 CopLINE_inc(PL_curcop);
1516 got_more = lex_next_chunk(flags);
1517 CopLINE_dec(PL_curcop);
1518 s = PL_parser->bufptr;
1519 bufend = PL_parser->bufend;
1520 if (!got_more)
1521 break;
1522 if (need_incline && PL_parser->rsfp) {
1523 incline(s);
1524 need_incline = 0;
1525 }
1526 } else {
1527 break;
1528 }
1529 }
1530#ifdef PERL_MAD
1531 if (PL_madskills)
1532 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1533#endif /* PERL_MAD */
1534 PL_parser->bufptr = s;
1535}
1536
1537/*
ffb4593c
NT
1538 * S_incline
1539 * This subroutine has nothing to do with tilting, whether at windmills
1540 * or pinball tables. Its name is short for "increment line". It
57843af0 1541 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1542 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1543 * # line 500 "foo.pm"
1544 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1545 */
1546
76e3520e 1547STATIC void
d9095cec 1548S_incline(pTHX_ const char *s)
463ee0b2 1549{
97aff369 1550 dVAR;
d9095cec
NC
1551 const char *t;
1552 const char *n;
1553 const char *e;
8818d409 1554 line_t line_num;
463ee0b2 1555
7918f24d
NC
1556 PERL_ARGS_ASSERT_INCLINE;
1557
57843af0 1558 CopLINE_inc(PL_curcop);
463ee0b2
LW
1559 if (*s++ != '#')
1560 return;
d4c19fe8
AL
1561 while (SPACE_OR_TAB(*s))
1562 s++;
73659bf1
GS
1563 if (strnEQ(s, "line", 4))
1564 s += 4;
1565 else
1566 return;
084592ab 1567 if (SPACE_OR_TAB(*s))
73659bf1 1568 s++;
4e553d73 1569 else
73659bf1 1570 return;
d4c19fe8
AL
1571 while (SPACE_OR_TAB(*s))
1572 s++;
463ee0b2
LW
1573 if (!isDIGIT(*s))
1574 return;
d4c19fe8 1575
463ee0b2
LW
1576 n = s;
1577 while (isDIGIT(*s))
1578 s++;
07714eb4 1579 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1580 return;
bf4acbe4 1581 while (SPACE_OR_TAB(*s))
463ee0b2 1582 s++;
73659bf1 1583 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1584 s++;
73659bf1
GS
1585 e = t + 1;
1586 }
463ee0b2 1587 else {
c35e046a
AL
1588 t = s;
1589 while (!isSPACE(*t))
1590 t++;
73659bf1 1591 e = t;
463ee0b2 1592 }
bf4acbe4 1593 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1594 e++;
1595 if (*e != '\n' && *e != '\0')
1596 return; /* false alarm */
1597
8818d409
FC
1598 line_num = atoi(n)-1;
1599
f4dd75d9 1600 if (t - s > 0) {
d9095cec 1601 const STRLEN len = t - s;
19bad673
NC
1602 SV *const temp_sv = CopFILESV(PL_curcop);
1603 const char *cf;
1604 STRLEN tmplen;
1605
1606 if (temp_sv) {
1607 cf = SvPVX(temp_sv);
1608 tmplen = SvCUR(temp_sv);
1609 } else {
1610 cf = NULL;
1611 tmplen = 0;
1612 }
1613
d1299d44 1614 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1615 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1616 * to *{"::_<newfilename"} */
44867030
NC
1617 /* However, the long form of evals is only turned on by the
1618 debugger - usually they're "(eval %lu)" */
1619 char smallbuf[128];
1620 char *tmpbuf;
1621 GV **gvp;
d9095cec 1622 STRLEN tmplen2 = len;
798b63bc 1623 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1624 tmpbuf = smallbuf;
1625 else
2ae0db35 1626 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1627 tmpbuf[0] = '_';
1628 tmpbuf[1] = '<';
2ae0db35 1629 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1630 tmplen += 2;
8a5ee598
RGS
1631 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1632 if (gvp) {
44867030
NC
1633 char *tmpbuf2;
1634 GV *gv2;
1635
1636 if (tmplen2 + 2 <= sizeof smallbuf)
1637 tmpbuf2 = smallbuf;
1638 else
1639 Newx(tmpbuf2, tmplen2 + 2, char);
1640
1641 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1642 /* Either they malloc'd it, or we malloc'd it,
1643 so no prefix is present in ours. */
1644 tmpbuf2[0] = '_';
1645 tmpbuf2[1] = '<';
1646 }
1647
1648 memcpy(tmpbuf2 + 2, s, tmplen2);
1649 tmplen2 += 2;
1650
8a5ee598 1651 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1652 if (!isGV(gv2)) {
8a5ee598 1653 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1654 /* adjust ${"::_<newfilename"} to store the new file name */
1655 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1656 /* The line number may differ. If that is the case,
1657 alias the saved lines that are in the array.
1658 Otherwise alias the whole array. */
1659 if (CopLINE(PL_curcop) == line_num) {
1660 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1661 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1662 }
1663 else if (GvAV(*gvp)) {
1664 AV * const av = GvAV(*gvp);
1665 const I32 start = CopLINE(PL_curcop)+1;
1666 I32 items = AvFILLp(av) - start;
1667 if (items > 0) {
1668 AV * const av2 = GvAVn(gv2);
1669 SV **svp = AvARRAY(av) + start;
1670 I32 l = (I32)line_num+1;
1671 while (items--)
1672 av_store(av2, l++, SvREFCNT_inc(*svp++));
1673 }
1674 }
e5527e4b 1675 }
44867030
NC
1676
1677 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1678 }
e66cf94c 1679 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1680 }
05ec9bb3 1681 CopFILE_free(PL_curcop);
d9095cec 1682 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1683 }
8818d409 1684 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1685}
1686
29595ff2 1687#ifdef PERL_MAD
cd81e915 1688/* skip space before PL_thistoken */
29595ff2
NC
1689
1690STATIC char *
1691S_skipspace0(pTHX_ register char *s)
1692{
7918f24d
NC
1693 PERL_ARGS_ASSERT_SKIPSPACE0;
1694
29595ff2
NC
1695 s = skipspace(s);
1696 if (!PL_madskills)
1697 return s;
cd81e915
NC
1698 if (PL_skipwhite) {
1699 if (!PL_thiswhite)
6b29d1f5 1700 PL_thiswhite = newSVpvs("");
cd81e915
NC
1701 sv_catsv(PL_thiswhite, PL_skipwhite);
1702 sv_free(PL_skipwhite);
1703 PL_skipwhite = 0;
1704 }
1705 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1706 return s;
1707}
1708
cd81e915 1709/* skip space after PL_thistoken */
29595ff2
NC
1710
1711STATIC char *
1712S_skipspace1(pTHX_ register char *s)
1713{
d4c19fe8 1714 const char *start = s;
29595ff2
NC
1715 I32 startoff = start - SvPVX(PL_linestr);
1716
7918f24d
NC
1717 PERL_ARGS_ASSERT_SKIPSPACE1;
1718
29595ff2
NC
1719 s = skipspace(s);
1720 if (!PL_madskills)
1721 return s;
1722 start = SvPVX(PL_linestr) + startoff;
cd81e915 1723 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1724 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1725 PL_thistoken = newSVpvn(tstart, start - tstart);
1726 }
1727 PL_realtokenstart = -1;
1728 if (PL_skipwhite) {
1729 if (!PL_nextwhite)
6b29d1f5 1730 PL_nextwhite = newSVpvs("");
cd81e915
NC
1731 sv_catsv(PL_nextwhite, PL_skipwhite);
1732 sv_free(PL_skipwhite);
1733 PL_skipwhite = 0;
29595ff2
NC
1734 }
1735 return s;
1736}
1737
1738STATIC char *
1739S_skipspace2(pTHX_ register char *s, SV **svp)
1740{
c35e046a
AL
1741 char *start;
1742 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1743 const I32 startoff = s - SvPVX(PL_linestr);
1744
7918f24d
NC
1745 PERL_ARGS_ASSERT_SKIPSPACE2;
1746
29595ff2
NC
1747 s = skipspace(s);
1748 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1749 if (!PL_madskills || !svp)
1750 return s;
1751 start = SvPVX(PL_linestr) + startoff;
cd81e915 1752 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1753 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1754 PL_thistoken = newSVpvn(tstart, start - tstart);
1755 PL_realtokenstart = -1;
29595ff2 1756 }
cd81e915 1757 if (PL_skipwhite) {
29595ff2 1758 if (!*svp)
6b29d1f5 1759 *svp = newSVpvs("");
cd81e915
NC
1760 sv_setsv(*svp, PL_skipwhite);
1761 sv_free(PL_skipwhite);
1762 PL_skipwhite = 0;
29595ff2
NC
1763 }
1764
1765 return s;
1766}
1767#endif
1768
80a702cd 1769STATIC void
15f169a1 1770S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1771{
1772 AV *av = CopFILEAVx(PL_curcop);
1773 if (av) {
b9f83d2f 1774 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1775 if (orig_sv)
1776 sv_setsv(sv, orig_sv);
1777 else
1778 sv_setpvn(sv, buf, len);
80a702cd
RGS
1779 (void)SvIOK_on(sv);
1780 SvIV_set(sv, 0);
1781 av_store(av, (I32)CopLINE(PL_curcop), sv);
1782 }
1783}
1784
ffb4593c
NT
1785/*
1786 * S_skipspace
1787 * Called to gobble the appropriate amount and type of whitespace.
1788 * Skips comments as well.
1789 */
1790
76e3520e 1791STATIC char *
cea2e8a9 1792S_skipspace(pTHX_ register char *s)
a687059c 1793{
5db06880 1794#ifdef PERL_MAD
f0e67a1d
Z
1795 char *start = s;
1796#endif /* PERL_MAD */
7918f24d 1797 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1798#ifdef PERL_MAD
cd81e915
NC
1799 if (PL_skipwhite) {
1800 sv_free(PL_skipwhite);
f0e67a1d 1801 PL_skipwhite = NULL;
5db06880 1802 }
f0e67a1d 1803#endif /* PERL_MAD */
3280af22 1804 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1805 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1806 s++;
f0e67a1d
Z
1807 } else {
1808 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1809 PL_bufptr = s;
f0998909
Z
1810 lex_read_space(LEX_KEEP_PREVIOUS |
1811 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1812 LEX_NO_NEXT_CHUNK : 0));
3280af22 1813 s = PL_bufptr;
f0e67a1d
Z
1814 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1815 if (PL_linestart > PL_bufptr)
1816 PL_bufptr = PL_linestart;
1817 return s;
463ee0b2 1818 }
5db06880 1819#ifdef PERL_MAD
f0e67a1d
Z
1820 if (PL_madskills)
1821 PL_skipwhite = newSVpvn(start, s-start);
1822#endif /* PERL_MAD */
5db06880 1823 return s;
a687059c 1824}
378cc40b 1825
ffb4593c
NT
1826/*
1827 * S_check_uni
1828 * Check the unary operators to ensure there's no ambiguity in how they're
1829 * used. An ambiguous piece of code would be:
1830 * rand + 5
1831 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1832 * the +5 is its argument.
1833 */
1834
76e3520e 1835STATIC void
cea2e8a9 1836S_check_uni(pTHX)
ba106d47 1837{
97aff369 1838 dVAR;
d4c19fe8
AL
1839 const char *s;
1840 const char *t;
2f3197b3 1841
3280af22 1842 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1843 return;
3280af22
NIS
1844 while (isSPACE(*PL_last_uni))
1845 PL_last_uni++;
c35e046a
AL
1846 s = PL_last_uni;
1847 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1848 s++;
3280af22 1849 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1850 return;
6136c704 1851
9b387841
NC
1852 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1853 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1854 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1855}
1856
ffb4593c
NT
1857/*
1858 * LOP : macro to build a list operator. Its behaviour has been replaced
1859 * with a subroutine, S_lop() for which LOP is just another name.
1860 */
1861
a0d0e21e
LW
1862#define LOP(f,x) return lop(f,x,s)
1863
ffb4593c
NT
1864/*
1865 * S_lop
1866 * Build a list operator (or something that might be one). The rules:
1867 * - if we have a next token, then it's a list operator [why?]
1868 * - if the next thing is an opening paren, then it's a function
1869 * - else it's a list operator
1870 */
1871
76e3520e 1872STATIC I32
a0be28da 1873S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1874{
97aff369 1875 dVAR;
7918f24d
NC
1876
1877 PERL_ARGS_ASSERT_LOP;
1878
6154021b 1879 pl_yylval.ival = f;
35c8bce7 1880 CLINE;
3280af22
NIS
1881 PL_expect = x;
1882 PL_bufptr = s;
1883 PL_last_lop = PL_oldbufptr;
eb160463 1884 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1885#ifdef PERL_MAD
1886 if (PL_lasttoke)
78cdf107 1887 goto lstop;
5db06880 1888#else
3280af22 1889 if (PL_nexttoke)
78cdf107 1890 goto lstop;
5db06880 1891#endif
79072805 1892 if (*s == '(')
bbf60fe6 1893 return REPORT(FUNC);
29595ff2 1894 s = PEEKSPACE(s);
79072805 1895 if (*s == '(')
bbf60fe6 1896 return REPORT(FUNC);
78cdf107
Z
1897 else {
1898 lstop:
1899 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1900 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1901 return REPORT(LSTOP);
78cdf107 1902 }
79072805
LW
1903}
1904
5db06880
NC
1905#ifdef PERL_MAD
1906 /*
1907 * S_start_force
1908 * Sets up for an eventual force_next(). start_force(0) basically does
1909 * an unshift, while start_force(-1) does a push. yylex removes items
1910 * on the "pop" end.
1911 */
1912
1913STATIC void
1914S_start_force(pTHX_ int where)
1915{
1916 int i;
1917
cd81e915 1918 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1919 where = PL_lasttoke;
cd81e915
NC
1920 assert(PL_curforce < 0 || PL_curforce == where);
1921 if (PL_curforce != where) {
5db06880
NC
1922 for (i = PL_lasttoke; i > where; --i) {
1923 PL_nexttoke[i] = PL_nexttoke[i-1];
1924 }
1925 PL_lasttoke++;
1926 }
cd81e915 1927 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1928 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1929 PL_curforce = where;
1930 if (PL_nextwhite) {
5db06880 1931 if (PL_madskills)
6b29d1f5 1932 curmad('^', newSVpvs(""));
cd81e915 1933 CURMAD('_', PL_nextwhite);
5db06880
NC
1934 }
1935}
1936
1937STATIC void
1938S_curmad(pTHX_ char slot, SV *sv)
1939{
1940 MADPROP **where;
1941
1942 if (!sv)
1943 return;
cd81e915
NC
1944 if (PL_curforce < 0)
1945 where = &PL_thismad;
5db06880 1946 else
cd81e915 1947 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1948
cd81e915 1949 if (PL_faketokens)
76f68e9b 1950 sv_setpvs(sv, "");
5db06880
NC
1951 else {
1952 if (!IN_BYTES) {
1953 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1954 SvUTF8_on(sv);
1955 else if (PL_encoding) {
1956 sv_recode_to_utf8(sv, PL_encoding);
1957 }
1958 }
1959 }
1960
1961 /* keep a slot open for the head of the list? */
1962 if (slot != '_' && *where && (*where)->mad_key == '^') {
1963 (*where)->mad_key = slot;
daba3364 1964 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1965 (*where)->mad_val = (void*)sv;
1966 }
1967 else
1968 addmad(newMADsv(slot, sv), where, 0);
1969}
1970#else
b3f24c00
MHM
1971# define start_force(where) NOOP
1972# define curmad(slot, sv) NOOP
5db06880
NC
1973#endif
1974
ffb4593c
NT
1975/*
1976 * S_force_next
9cbb5ea2 1977 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1978 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1979 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1980 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1981 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1982 */
1983
4e553d73 1984STATIC void
cea2e8a9 1985S_force_next(pTHX_ I32 type)
79072805 1986{
97aff369 1987 dVAR;
704d4215
GG
1988#ifdef DEBUGGING
1989 if (DEBUG_T_TEST) {
1990 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1991 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1992 }
1993#endif
6c7ae946
FC
1994 /* Don’t let opslab_force_free snatch it */
1995 if (S_is_opval_token(type & 0xffff) && NEXTVAL_NEXTTOKE.opval) {
1996 assert(!NEXTVAL_NEXTTOKE.opval->op_savefree);
1997 NEXTVAL_NEXTTOKE.opval->op_savefree = 1;
1998 }
5db06880 1999#ifdef PERL_MAD
cd81e915 2000 if (PL_curforce < 0)
5db06880 2001 start_force(PL_lasttoke);
cd81e915 2002 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
2003 if (PL_lex_state != LEX_KNOWNEXT)
2004 PL_lex_defer = PL_lex_state;
2005 PL_lex_state = LEX_KNOWNEXT;
2006 PL_lex_expect = PL_expect;
cd81e915 2007 PL_curforce = -1;
5db06880 2008#else
3280af22
NIS
2009 PL_nexttype[PL_nexttoke] = type;
2010 PL_nexttoke++;
2011 if (PL_lex_state != LEX_KNOWNEXT) {
2012 PL_lex_defer = PL_lex_state;
2013 PL_lex_expect = PL_expect;
2014 PL_lex_state = LEX_KNOWNEXT;
79072805 2015 }
5db06880 2016#endif
79072805
LW
2017}
2018
28ac2b49
Z
2019void
2020Perl_yyunlex(pTHX)
2021{
a7aaec61
Z
2022 int yyc = PL_parser->yychar;
2023 if (yyc != YYEMPTY) {
2024 if (yyc) {
2025 start_force(-1);
2026 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2027 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2028 PL_lex_allbrackets--;
a7aaec61 2029 PL_lex_brackets--;
78cdf107
Z
2030 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2031 } else if (yyc == '('/*)*/) {
2032 PL_lex_allbrackets--;
2033 yyc |= (2<<24);
a7aaec61
Z
2034 }
2035 force_next(yyc);
2036 }
28ac2b49
Z
2037 PL_parser->yychar = YYEMPTY;
2038 }
2039}
2040
d0a148a6 2041STATIC SV *
15f169a1 2042S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2043{
97aff369 2044 dVAR;
740cce10 2045 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2046 !IN_BYTES
2047 && UTF
2048 && !is_ascii_string((const U8*)start, len)
740cce10 2049 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2050 return sv;
2051}
2052
ffb4593c
NT
2053/*
2054 * S_force_word
2055 * When the lexer knows the next thing is a word (for instance, it has
2056 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2057 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2058 * lookahead.
ffb4593c
NT
2059 *
2060 * Arguments:
b1b65b59 2061 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2062 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2063 * int check_keyword : if true, Perl checks to make sure the word isn't
2064 * a keyword (do this if the word is a label, e.g. goto FOO)
2065 * int allow_pack : if true, : characters will also be allowed (require,
2066 * use, etc. do this)
9cbb5ea2 2067 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2068 */
2069
76e3520e 2070STATIC char *
cea2e8a9 2071S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2072{
97aff369 2073 dVAR;
eb578fdb 2074 char *s;
463ee0b2 2075 STRLEN len;
4e553d73 2076
7918f24d
NC
2077 PERL_ARGS_ASSERT_FORCE_WORD;
2078
29595ff2 2079 start = SKIPSPACE1(start);
463ee0b2 2080 s = start;
7e2040f0 2081 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2082 (allow_pack && *s == ':') ||
15f0808c 2083 (allow_initial_tick && *s == '\'') )
a0d0e21e 2084 {
3280af22 2085 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2086 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2087 return start;
cd81e915 2088 start_force(PL_curforce);
5db06880
NC
2089 if (PL_madskills)
2090 curmad('X', newSVpvn(start,s-start));
463ee0b2 2091 if (token == METHOD) {
29595ff2 2092 s = SKIPSPACE1(s);
463ee0b2 2093 if (*s == '(')
3280af22 2094 PL_expect = XTERM;
463ee0b2 2095 else {
3280af22 2096 PL_expect = XOPERATOR;
463ee0b2 2097 }
79072805 2098 }
e74e6b3d 2099 if (PL_madskills)
63575281 2100 curmad('g', newSVpvs( "forced" ));
9ded7720 2101 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2102 = (OP*)newSVOP(OP_CONST,0,
2103 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2104 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2105 force_next(token);
2106 }
2107 return s;
2108}
2109
ffb4593c
NT
2110/*
2111 * S_force_ident
9cbb5ea2 2112 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2113 * text only contains the "foo" portion. The first argument is a pointer
2114 * to the "foo", and the second argument is the type symbol to prefix.
2115 * Forces the next token to be a "WORD".
9cbb5ea2 2116 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2117 */
2118
76e3520e 2119STATIC void
bfed75c6 2120S_force_ident(pTHX_ register const char *s, int kind)
79072805 2121{
97aff369 2122 dVAR;
7918f24d
NC
2123
2124 PERL_ARGS_ASSERT_FORCE_IDENT;
2125
c35e046a 2126 if (*s) {
90e5519e 2127 const STRLEN len = strlen(s);
728847b1
BF
2128 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2129 UTF ? SVf_UTF8 : 0));
cd81e915 2130 start_force(PL_curforce);
9ded7720 2131 NEXTVAL_NEXTTOKE.opval = o;
79072805 2132 force_next(WORD);
748a9306 2133 if (kind) {
11343788 2134 o->op_private = OPpCONST_ENTERED;
55497cff 2135 /* XXX see note in pp_entereval() for why we forgo typo
2136 warnings if the symbol must be introduced in an eval.
2137 GSAR 96-10-12 */
90e5519e 2138 gv_fetchpvn_flags(s, len,
728847b1
BF
2139 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2140 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2141 kind == '$' ? SVt_PV :
2142 kind == '@' ? SVt_PVAV :
2143 kind == '%' ? SVt_PVHV :
a0d0e21e 2144 SVt_PVGV
90e5519e 2145 );
748a9306 2146 }
79072805
LW
2147 }
2148}
2149
1571675a
GS
2150NV
2151Perl_str_to_version(pTHX_ SV *sv)
2152{
2153 NV retval = 0.0;
2154 NV nshift = 1.0;
2155 STRLEN len;
cfd0369c 2156 const char *start = SvPV_const(sv,len);
9d4ba2ae 2157 const char * const end = start + len;
504618e9 2158 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2159
2160 PERL_ARGS_ASSERT_STR_TO_VERSION;
2161
1571675a 2162 while (start < end) {
ba210ebe 2163 STRLEN skip;
1571675a
GS
2164 UV n;
2165 if (utf)
9041c2e3 2166 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2167 else {
2168 n = *(U8*)start;
2169 skip = 1;
2170 }
2171 retval += ((NV)n)/nshift;
2172 start += skip;
2173 nshift *= 1000;
2174 }
2175 return retval;
2176}
2177
4e553d73 2178/*
ffb4593c
NT
2179 * S_force_version
2180 * Forces the next token to be a version number.
e759cc13
RGS
2181 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2182 * and if "guessing" is TRUE, then no new token is created (and the caller
2183 * must use an alternative parsing method).
ffb4593c
NT
2184 */
2185
76e3520e 2186STATIC char *
e759cc13 2187S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2188{
97aff369 2189 dVAR;
5f66b61c 2190 OP *version = NULL;
44dcb63b 2191 char *d;
5db06880
NC
2192#ifdef PERL_MAD
2193 I32 startoff = s - SvPVX(PL_linestr);
2194#endif
89bfa8cd 2195
7918f24d
NC
2196 PERL_ARGS_ASSERT_FORCE_VERSION;
2197
29595ff2 2198 s = SKIPSPACE1(s);
89bfa8cd 2199
44dcb63b 2200 d = s;
dd629d5b 2201 if (*d == 'v')
44dcb63b 2202 d++;
44dcb63b 2203 if (isDIGIT(*d)) {
e759cc13
RGS
2204 while (isDIGIT(*d) || *d == '_' || *d == '.')
2205 d++;
5db06880
NC
2206#ifdef PERL_MAD
2207 if (PL_madskills) {
cd81e915 2208 start_force(PL_curforce);
5db06880
NC
2209 curmad('X', newSVpvn(s,d-s));
2210 }
2211#endif
4e4da3ac 2212 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2213 SV *ver;
8d08d9ba 2214#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2215 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2216 setlocale(LC_NUMERIC, "C");
8d08d9ba 2217#endif
6154021b 2218 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2219#ifdef USE_LOCALE_NUMERIC
2220 setlocale(LC_NUMERIC, loc);
909d3787 2221 Safefree(loc);
8d08d9ba 2222#endif
6154021b 2223 version = pl_yylval.opval;
dd629d5b
GS
2224 ver = cSVOPx(version)->op_sv;
2225 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2226 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2227 SvNV_set(ver, str_to_version(ver));
1571675a 2228 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2229 }
89bfa8cd 2230 }
5db06880
NC
2231 else if (guessing) {
2232#ifdef PERL_MAD
2233 if (PL_madskills) {
cd81e915
NC
2234 sv_free(PL_nextwhite); /* let next token collect whitespace */
2235 PL_nextwhite = 0;
5db06880
NC
2236 s = SvPVX(PL_linestr) + startoff;
2237 }
2238#endif
e759cc13 2239 return s;
5db06880 2240 }
89bfa8cd 2241 }
2242
5db06880
NC
2243#ifdef PERL_MAD
2244 if (PL_madskills && !version) {
cd81e915
NC
2245 sv_free(PL_nextwhite); /* let next token collect whitespace */
2246 PL_nextwhite = 0;
5db06880
NC
2247 s = SvPVX(PL_linestr) + startoff;
2248 }
2249#endif
89bfa8cd 2250 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2251 start_force(PL_curforce);
9ded7720 2252 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2253 force_next(WORD);
89bfa8cd 2254
e759cc13 2255 return s;
89bfa8cd 2256}
2257
ffb4593c 2258/*
91152fc1
DG
2259 * S_force_strict_version
2260 * Forces the next token to be a version number using strict syntax rules.
2261 */
2262
2263STATIC char *
2264S_force_strict_version(pTHX_ char *s)
2265{
2266 dVAR;
2267 OP *version = NULL;
2268#ifdef PERL_MAD
2269 I32 startoff = s - SvPVX(PL_linestr);
2270#endif
2271 const char *errstr = NULL;
2272
2273 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2274
2275 while (isSPACE(*s)) /* leading whitespace */
2276 s++;
2277
2278 if (is_STRICT_VERSION(s,&errstr)) {
2279 SV *ver = newSV(0);
2280 s = (char *)scan_version(s, ver, 0);
2281 version = newSVOP(OP_CONST, 0, ver);
2282 }
4e4da3ac
Z
2283 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2284 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2285 {
91152fc1
DG
2286 PL_bufptr = s;
2287 if (errstr)
2288 yyerror(errstr); /* version required */
2289 return s;
2290 }
2291
2292#ifdef PERL_MAD
2293 if (PL_madskills && !version) {
2294 sv_free(PL_nextwhite); /* let next token collect whitespace */
2295 PL_nextwhite = 0;
2296 s = SvPVX(PL_linestr) + startoff;
2297 }
2298#endif
2299 /* NOTE: The parser sees the package name and the VERSION swapped */
2300 start_force(PL_curforce);
2301 NEXTVAL_NEXTTOKE.opval = version;
2302 force_next(WORD);
2303
2304 return s;
2305}
2306
2307/*
ffb4593c
NT
2308 * S_tokeq
2309 * Tokenize a quoted string passed in as an SV. It finds the next
2310 * chunk, up to end of string or a backslash. It may make a new
2311 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2312 * turns \\ into \.
2313 */
2314
76e3520e 2315STATIC SV *
cea2e8a9 2316S_tokeq(pTHX_ SV *sv)
79072805 2317{
97aff369 2318 dVAR;
eb578fdb
KW
2319 char *s;
2320 char *send;
2321 char *d;
b3ac6de7
IZ
2322 STRLEN len = 0;
2323 SV *pv = sv;
79072805 2324
7918f24d
NC
2325 PERL_ARGS_ASSERT_TOKEQ;
2326
79072805 2327 if (!SvLEN(sv))
b3ac6de7 2328 goto finish;
79072805 2329
a0d0e21e 2330 s = SvPV_force(sv, len);
21a311ee 2331 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2332 goto finish;
463ee0b2 2333 send = s + len;
dcb21ed6
NC
2334 /* This is relying on the SV being "well formed" with a trailing '\0' */
2335 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2336 s++;
2337 if (s == send)
b3ac6de7 2338 goto finish;
79072805 2339 d = s;
be4731d2 2340 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2341 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2342 }
79072805
LW
2343 while (s < send) {
2344 if (*s == '\\') {
a0d0e21e 2345 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2346 s++; /* all that, just for this */
2347 }
2348 *d++ = *s++;
2349 }
2350 *d = '\0';
95a20fc0 2351 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2352 finish:
3280af22 2353 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2354 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2355 return sv;
2356}
2357
ffb4593c
NT
2358/*
2359 * Now come three functions related to double-quote context,
2360 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2361 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2362 * interact with PL_lex_state, and create fake ( ... ) argument lists
2363 * to handle functions and concatenation.
ecd24171
DM
2364 * For example,
2365 * "foo\lbar"
2366 * is tokenised as
2367 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
ffb4593c
NT
2368 */
2369
2370/*
2371 * S_sublex_start
6154021b 2372 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2373 *
2374 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2375 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2376 *
2377 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2378 *
2379 * Everything else becomes a FUNC.
2380 *
2381 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2382 * had an OP_CONST or OP_READLINE). This just sets us up for a
2383 * call to S_sublex_push().
2384 */
2385
76e3520e 2386STATIC I32
cea2e8a9 2387S_sublex_start(pTHX)
79072805 2388{
97aff369 2389 dVAR;
eb578fdb 2390 const I32 op_type = pl_yylval.ival;
79072805
LW
2391
2392 if (op_type == OP_NULL) {
6154021b 2393 pl_yylval.opval = PL_lex_op;
5f66b61c 2394 PL_lex_op = NULL;
79072805
LW
2395 return THING;
2396 }
2397 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2398 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2399
2400 if (SvTYPE(sv) == SVt_PVIV) {
2401 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2402 STRLEN len;
96a5add6 2403 const char * const p = SvPV_const(sv, len);
740cce10 2404 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2405 SvREFCNT_dec(sv);
2406 sv = nsv;
4e553d73 2407 }
6154021b 2408 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2409 PL_lex_stuff = NULL;
6f33ba73
RGS
2410 /* Allow <FH> // "foo" */
2411 if (op_type == OP_READLINE)
2412 PL_expect = XTERMORDORDOR;
79072805
LW
2413 return THING;
2414 }
e3f73d4e
RGS
2415 else if (op_type == OP_BACKTICK && PL_lex_op) {
2416 /* readpipe() vas overriden */
2417 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2418 pl_yylval.opval = PL_lex_op;
9b201d7d 2419 PL_lex_op = NULL;
e3f73d4e
RGS
2420 PL_lex_stuff = NULL;
2421 return THING;
2422 }
79072805 2423
3280af22 2424 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2425 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2426 PL_sublex_info.sub_op = PL_lex_op;
2427 PL_lex_state = LEX_INTERPPUSH;
55497cff 2428
3280af22
NIS
2429 PL_expect = XTERM;
2430 if (PL_lex_op) {
6154021b 2431 pl_yylval.opval = PL_lex_op;
5f66b61c 2432 PL_lex_op = NULL;
55497cff 2433 return PMFUNC;
2434 }
2435 else
2436 return FUNC;
2437}
2438
ffb4593c
NT
2439/*
2440 * S_sublex_push
2441 * Create a new scope to save the lexing state. The scope will be
2442 * ended in S_sublex_done. Returns a '(', starting the function arguments
2443 * to the uc, lc, etc. found before.
2444 * Sets PL_lex_state to LEX_INTERPCONCAT.
2445 */
2446
76e3520e 2447STATIC I32
cea2e8a9 2448S_sublex_push(pTHX)
55497cff 2449{
27da23d5 2450 dVAR;
f46d017c 2451 ENTER;
55497cff 2452
3280af22 2453 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2454 SAVEBOOL(PL_lex_dojoin);
3280af22 2455 SAVEI32(PL_lex_brackets);
78cdf107 2456 SAVEI32(PL_lex_allbrackets);
b27dce25 2457 SAVEI32(PL_lex_formbrack);
78cdf107 2458 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2459 SAVEI32(PL_lex_casemods);
2460 SAVEI32(PL_lex_starts);
651b5b28 2461 SAVEI8(PL_lex_state);
7cc34111 2462 SAVESPTR(PL_lex_repl);
9da1dd8f 2463 SAVEPPTR(PL_sublex_info.re_eval_start);
db444266 2464 SAVESPTR(PL_sublex_info.re_eval_str);
565b52df 2465 SAVEPPTR(PL_sublex_info.super_bufptr);
7766f137 2466 SAVEVPTR(PL_lex_inpat);
98246f1e 2467 SAVEI16(PL_lex_inwhat);
57843af0 2468 SAVECOPLINE(PL_curcop);
3280af22 2469 SAVEPPTR(PL_bufptr);
8452ff4b 2470 SAVEPPTR(PL_bufend);
3280af22
NIS
2471 SAVEPPTR(PL_oldbufptr);
2472 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2473 SAVEPPTR(PL_last_lop);
2474 SAVEPPTR(PL_last_uni);
3280af22
NIS
2475 SAVEPPTR(PL_linestart);
2476 SAVESPTR(PL_linestr);
8edd5f42
RGS
2477 SAVEGENERICPV(PL_lex_brackstack);
2478 SAVEGENERICPV(PL_lex_casestack);
3280af22 2479
99bd9d90
FC
2480 /* The here-doc parser needs to be able to peek into outer lexing
2481 scopes to find the body of the here-doc. We use SvIVX(PL_linestr)
2482 to store the outer PL_bufptr and SvNVX to store the outer
2483 PL_linestr. Since SvIVX already means something else, we use
2484 PL_sublex_info.super_bufptr for the innermost scope (the one we are
2485 now entering), and a localised SvIVX for outer scopes.
2486 */
2487 SvUPGRADE(PL_linestr, SVt_PVIV);
2488 /* A null super_bufptr means the outer lexing scope is not peekable,
2489 because it is a single line from an input stream. */
2490 SAVEIV(SvIVX(PL_linestr));
2491 SvIVX(PL_linestr) = PTR2IV(PL_sublex_info.super_bufptr);
2492 PL_sublex_info.super_bufptr =
2493 (SvTYPE(PL_linestr) < SVt_PVNV || !SvNVX(PL_linestr))
2494 && (PL_rsfp || PL_parser->filtered)
2495 ? NULL
2496 : PL_bufptr;
2497 SvUPGRADE(PL_lex_stuff, SVt_PVNV);
2498 SvNVX(PL_lex_stuff) = PTR2NV(PL_linestr);
2499
3280af22 2500 PL_linestr = PL_lex_stuff;
7cc34111 2501 PL_lex_repl = PL_sublex_info.repl;
a0714e2c 2502 PL_lex_stuff = NULL;
7cc34111 2503 PL_sublex_info.repl = NULL;
9da1dd8f 2504 PL_sublex_info.re_eval_start = NULL;
db444266 2505 PL_sublex_info.re_eval_str = NULL;
3280af22 2506
9cbb5ea2
GS
2507 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2508 = SvPVX(PL_linestr);
3280af22 2509 PL_bufend += SvCUR(PL_linestr);
bd61b366 2510 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2511 SAVEFREESV(PL_linestr);
2512
2513 PL_lex_dojoin = FALSE;
b27dce25 2514 PL_lex_brackets = PL_lex_formbrack = 0;
78cdf107
Z
2515 PL_lex_allbrackets = 0;
2516 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2517 Newx(PL_lex_brackstack, 120, char);
2518 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2519 PL_lex_casemods = 0;
2520 *PL_lex_casestack = '\0';
2521 PL_lex_starts = 0;
2522 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2523 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2524
2525 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2526 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2527 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2528 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2529 else
5f66b61c 2530 PL_lex_inpat = NULL;
79072805 2531
55497cff 2532 return '(';
79072805
LW
2533}
2534
ffb4593c
NT
2535/*
2536 * S_sublex_done
2537 * Restores lexer state after a S_sublex_push.
2538 */
2539
76e3520e 2540STATIC I32
cea2e8a9 2541S_sublex_done(pTHX)
79072805 2542{
27da23d5 2543 dVAR;
3280af22 2544 if (!PL_lex_starts++) {
396482e1 2545 SV * const sv = newSVpvs("");
9aa983d2
JH
2546 if (SvUTF8(PL_linestr))
2547 SvUTF8_on(sv);
3280af22 2548 PL_expect = XOPERATOR;
6154021b 2549 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2550 return THING;
2551 }
2552
3280af22
NIS
2553 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2554 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2555 return yylex();
79072805
LW
2556 }
2557
ffb4593c 2558 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2559 assert(PL_lex_inwhat != OP_TRANSR);
3280af22 2560 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
99bd9d90
FC
2561 SvUPGRADE(PL_lex_repl, SVt_PVNV);
2562 SvNVX(PL_lex_repl) = SvNVX(PL_linestr);
3280af22
NIS
2563 PL_linestr = PL_lex_repl;
2564 PL_lex_inpat = 0;
2565 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2566 PL_bufend += SvCUR(PL_linestr);
bd61b366 2567 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2568 SAVEFREESV(PL_linestr);
2569 PL_lex_dojoin = FALSE;
2570 PL_lex_brackets = 0;
78cdf107
Z
2571 PL_lex_allbrackets = 0;
2572 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2573 PL_lex_casemods = 0;
2574 *PL_lex_casestack = '\0';
2575 PL_lex_starts = 0;
25da4f38 2576 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2577 PL_lex_state = LEX_INTERPNORMAL;
2578 PL_lex_starts++;
e9fa98b2
HS
2579 /* we don't clear PL_lex_repl here, so that we can check later
2580 whether this is an evalled subst; that means we rely on the
2581 logic to ensure sublex_done() is called again only via the
2582 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2583 }
e9fa98b2 2584 else {
3280af22 2585 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2586 PL_lex_repl = NULL;
e9fa98b2 2587 }
79072805 2588 return ',';
ffed7fef
LW
2589 }
2590 else {
5db06880
NC
2591#ifdef PERL_MAD
2592 if (PL_madskills) {
cd81e915
NC
2593 if (PL_thiswhite) {
2594 if (!PL_endwhite)
6b29d1f5 2595 PL_endwhite = newSVpvs("");
cd81e915
NC
2596 sv_catsv(PL_endwhite, PL_thiswhite);
2597 PL_thiswhite = 0;
2598 }
2599 if (PL_thistoken)
76f68e9b 2600 sv_setpvs(PL_thistoken,"");
5db06880 2601 else
cd81e915 2602 PL_realtokenstart = -1;
5db06880
NC
2603 }
2604#endif
f46d017c 2605 LEAVE;
3280af22
NIS
2606 PL_bufend = SvPVX(PL_linestr);
2607 PL_bufend += SvCUR(PL_linestr);
2608 PL_expect = XOPERATOR;
09bef843 2609 PL_sublex_info.sub_inwhat = 0;
79072805 2610 return ')';
ffed7fef
LW
2611 }
2612}
2613
02aa26ce
NT
2614/*
2615 scan_const
2616
9da1dd8f
DM
2617 Extracts the next constant part of a pattern, double-quoted string,
2618 or transliteration. This is terrifying code.
2619
2620 For example, in parsing the double-quoted string "ab\x63$d", it would
2621 stop at the '$' and return an OP_CONST containing 'abc'.
02aa26ce 2622
94def140 2623 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2624 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2625 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2626
94def140
TS
2627 Returns a pointer to the character scanned up to. If this is
2628 advanced from the start pointer supplied (i.e. if anything was
9da1dd8f 2629 successfully parsed), will leave an OP_CONST for the substring scanned
6154021b 2630 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2631 by looking at the next characters herself.
2632
02aa26ce 2633 In patterns:
9da1dd8f
DM
2634 expand:
2635 \N{ABC} => \N{U+41.42.43}
2636
2637 pass through:
2638 all other \-char, including \N and \N{ apart from \N{ABC}
2639
2640 stops on:
2641 @ and $ where it appears to be a var, but not for $ as tail anchor
2642 \l \L \u \U \Q \E
2643 (?{ or (??{
2644
02aa26ce
NT
2645
2646 In transliterations:
2647 characters are VERY literal, except for - not at the start or end
94def140
TS
2648 of the string, which indicates a range. If the range is in bytes,
2649 scan_const expands the range to the full set of intermediate
2650 characters. If the range is in utf8, the hyphen is replaced with
2651 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2652
2653 In double-quoted strings:
2654 backslashes:
2655 double-quoted style: \r and \n
ff3f963a 2656 constants: \x31, etc.
94def140 2657 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2658 case and quoting: \U \Q \E
2659 stops on @ and $
2660
2661 scan_const does *not* construct ops to handle interpolated strings.
2662 It stops processing as soon as it finds an embedded $ or @ variable
2663 and leaves it to the caller to work out what's going on.
2664
94def140
TS
2665 embedded arrays (whether in pattern or not) could be:
2666 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2667
2668 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2669
2670 $ in pattern could be $foo or could be tail anchor. Assumption:
2671 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2672 followed by one of "()| \r\n\t"
02aa26ce 2673
9da1dd8f 2674 \1 (backreferences) are turned into $1 in substitutions
02aa26ce
NT
2675
2676 The structure of the code is
2677 while (there's a character to process) {
94def140
TS
2678 handle transliteration ranges
2679 skip regexp comments /(?#comment)/ and codes /(?{code})/
2680 skip #-initiated comments in //x patterns
2681 check for embedded arrays
02aa26ce
NT
2682 check for embedded scalars
2683 if (backslash) {
94def140 2684 deprecate \1 in substitution replacements
02aa26ce
NT
2685 handle string-changing backslashes \l \U \Q \E, etc.
2686 switch (what was escaped) {
94def140 2687 handle \- in a transliteration (becomes a literal -)
ff3f963a 2688 if a pattern and not \N{, go treat as regular character
94def140
TS
2689 handle \132 (octal characters)
2690 handle \x15 and \x{1234} (hex characters)
ff3f963a 2691 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2692 handle \cV (control characters)
2693 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2694 } (end switch)
77a135fe 2695 continue
02aa26ce 2696 } (end if backslash)
77a135fe 2697 handle regular character
02aa26ce 2698 } (end while character to read)
4e553d73 2699
02aa26ce
NT
2700*/
2701
76e3520e 2702STATIC char *
cea2e8a9 2703S_scan_const(pTHX_ char *start)
79072805 2704{
97aff369 2705 dVAR;
eb578fdb 2706 char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2707 SV *sv = newSV(send - start); /* sv for the constant. See
2708 note below on sizing. */
eb578fdb
KW
2709 char *s = start; /* start of the constant */
2710 char *d = SvPVX(sv); /* destination for copies */
02aa26ce 2711 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2712 bool didrange = FALSE; /* did we just finish a range? */
2866decb 2713 bool in_charclass = FALSE; /* within /[...]/ */
b953e60c
KW
2714 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2715 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2716 to be UTF8? But, this can
2717 show as true when the source
2718 isn't utf8, as for example
2719 when it is entirely composed
2720 of hex constants */
2721
2722 /* Note on sizing: The scanned constant is placed into sv, which is
2723 * initialized by newSV() assuming one byte of output for every byte of
2724 * input. This routine expects newSV() to allocate an extra byte for a
2725 * trailing NUL, which this routine will append if it gets to the end of
2726 * the input. There may be more bytes of input than output (eg., \N{LATIN
2727 * CAPITAL LETTER A}), or more output than input if the constant ends up
2728 * recoded to utf8, but each time a construct is found that might increase
2729 * the needed size, SvGROW() is called. Its size parameter each time is
2730 * based on the best guess estimate at the time, namely the length used so
2731 * far, plus the length the current construct will occupy, plus room for
2732 * the trailing NUL, plus one byte for every input byte still unscanned */
2733
012bcf8d 2734 UV uv;
4c3a8340
TS
2735#ifdef EBCDIC
2736 UV literal_endpoint = 0;
e294cc5d 2737 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2738#endif
012bcf8d 2739
7918f24d
NC
2740 PERL_ARGS_ASSERT_SCAN_CONST;
2741
bb16bae8 2742 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2743 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2744 /* If we are doing a trans and we know we want UTF8 set expectation */
2745 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2746 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2747 }
2748
2749
79072805 2750 while (s < send || dorange) {
ff3f963a 2751
02aa26ce 2752 /* get transliterations out of the way (they're most literal) */
3280af22 2753 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2754 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2755 if (dorange) {
1ba5c669
JH
2756 I32 i; /* current expanded character */
2757 I32 min; /* first character in range */
2758 I32 max; /* last character in range */
02aa26ce 2759
e294cc5d
JH
2760#ifdef EBCDIC
2761 UV uvmax = 0;
2762#endif
2763
2764 if (has_utf8
2765#ifdef EBCDIC
2766 && !native_range
2767#endif
2768 ) {
9d4ba2ae 2769 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2770 char *e = d++;
2771 while (e-- > c)
2772 *(e + 1) = *e;
25716404 2773 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2774 /* mark the range as done, and continue */
2775 dorange = FALSE;
2776 didrange = TRUE;
2777 continue;
2778 }
2b9d42f0 2779
95a20fc0 2780 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2781#ifdef EBCDIC
2782 SvGROW(sv,
2783 SvLEN(sv) + (has_utf8 ?
2784 (512 - UTF_CONTINUATION_MARK +
2785 UNISKIP(0x100))
2786 : 256));
2787 /* How many two-byte within 0..255: 128 in UTF-8,
2788 * 96 in UTF-8-mod. */
2789#else
9cbb5ea2 2790 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2791#endif
9cbb5ea2 2792 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2793#ifdef EBCDIC
2794 if (has_utf8) {
2795 int j;
2796 for (j = 0; j <= 1; j++) {
2797 char * const c = (char*)utf8_hop((U8*)d, -1);
2798 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2799 if (j)
2800 min = (U8)uv;
2801 else if (uv < 256)
2802 max = (U8)uv;
2803 else {
2804 max = (U8)0xff; /* only to \xff */
2805 uvmax = uv; /* \x{100} to uvmax */
2806 }
2807 d = c; /* eat endpoint chars */
2808 }
2809 }
2810 else {
2811#endif
2812 d -= 2; /* eat the first char and the - */
2813 min = (U8)*d; /* first char in range */
2814 max = (U8)d[1]; /* last char in range */
2815#ifdef EBCDIC
2816 }
2817#endif
8ada0baa 2818
c2e66d9e 2819 if (min > max) {
01ec43d0 2820 Perl_croak(aTHX_
d1573ac7 2821 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2822 (char)min, (char)max);
c2e66d9e
GS
2823 }
2824
c7f1f016 2825#ifdef EBCDIC
4c3a8340
TS
2826 if (literal_endpoint == 2 &&
2827 ((isLOWER(min) && isLOWER(max)) ||
2828 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2829 if (isLOWER(min)) {
2830 for (i = min; i <= max; i++)
2831 if (isLOWER(i))
db42d148 2832 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2833 } else {
2834 for (i = min; i <= max; i++)
2835 if (isUPPER(i))
db42d148 2836 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2837 }
2838 }
2839 else
2840#endif
2841 for (i = min; i <= max; i++)
e294cc5d
JH
2842#ifdef EBCDIC
2843 if (has_utf8) {
2844 const U8 ch = (U8)NATIVE_TO_UTF(i);
2845 if (UNI_IS_INVARIANT(ch))
2846 *d++ = (U8)i;
2847 else {
2848 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2849 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2850 }
2851 }
2852 else
2853#endif
2854 *d++ = (char)i;
2855
2856#ifdef EBCDIC
2857 if (uvmax) {
2858 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2859 if (uvmax > 0x101)
2860 *d++ = (char)UTF_TO_NATIVE(0xff);
2861 if (uvmax > 0x100)
2862 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2863 }
2864#endif
02aa26ce
NT
2865
2866 /* mark the range as done, and continue */
79072805 2867 dorange = FALSE;
01ec43d0 2868 didrange = TRUE;
4c3a8340
TS
2869#ifdef EBCDIC
2870 literal_endpoint = 0;
2871#endif
79072805 2872 continue;
4e553d73 2873 }
02aa26ce
NT
2874
2875 /* range begins (ignore - as first or last char) */
79072805 2876 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2877 if (didrange) {
1fafa243 2878 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2879 }
e294cc5d
JH
2880 if (has_utf8
2881#ifdef EBCDIC
2882 && !native_range
2883#endif
2884 ) {
25716404 2885 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2886 s++;
2887 continue;
2888 }
79072805
LW
2889 dorange = TRUE;
2890 s++;
01ec43d0
GS
2891 }
2892 else {
2893 didrange = FALSE;
4c3a8340
TS
2894#ifdef EBCDIC
2895 literal_endpoint = 0;
e294cc5d 2896 native_range = TRUE;
4c3a8340 2897#endif
01ec43d0 2898 }
79072805 2899 }
02aa26ce
NT
2900
2901 /* if we get here, we're not doing a transliteration */
2902
e4a2df84
DM
2903 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2904 char *s1 = s-1;
2905 int esc = 0;
2906 while (s1 >= start && *s1-- == '\\')
2907 esc = !esc;
2908 if (!esc)
2909 in_charclass = TRUE;
2910 }
2866decb 2911
e4a2df84
DM
2912 else if (*s == ']' && PL_lex_inpat && in_charclass) {
2913 char *s1 = s-1;
2914 int esc = 0;
2915 while (s1 >= start && *s1-- == '\\')
2916 esc = !esc;
2917 if (!esc)
2918 in_charclass = FALSE;
2919 }
2866decb 2920
9da1dd8f
DM
2921 /* skip for regexp comments /(?#comment)/, except for the last
2922 * char, which will be done separately.
2923 * Stop on (?{..}) and friends */
2924
3280af22 2925 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2926 if (s[2] == '#') {
e994fd66 2927 while (s+1 < send && *s != ')')
db42d148 2928 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94 2929 }
2866decb 2930 else if (!PL_lex_casemods && !in_charclass &&
d3cec5e5
DM
2931 ( s[2] == '{' /* This should match regcomp.c */
2932 || (s[2] == '?' && s[3] == '{')))
155aba94 2933 {
9da1dd8f 2934 break;
cc6b7395 2935 }
748a9306 2936 }
02aa26ce
NT
2937
2938 /* likewise skip #-initiated comments in //x patterns */
3280af22 2939 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2940 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2941 while (s+1 < send && *s != '\n')
db42d148 2942 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2943 }
02aa26ce 2944
9da1dd8f
DM
2945 /* no further processing of single-quoted regex */
2946 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2947 goto default_action;
2948
5d1d4326 2949 /* check for embedded arrays
da6eedaa 2950 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2951 */
1749ea0d
TS
2952 else if (*s == '@' && s[1]) {
2953 if (isALNUM_lazy_if(s+1,UTF))
2954 break;
2955 if (strchr(":'{$", s[1]))
2956 break;
2957 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2958 break; /* in regexp, neither @+ nor @- are interpolated */
2959 }
02aa26ce
NT
2960
2961 /* check for embedded scalars. only stop if we're sure it's a
2962 variable.
2963 */
79072805 2964 else if (*s == '$') {
3280af22 2965 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2966 break;
77772344 2967 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2968 if (s[1] == '\\') {
2969 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2970 "Possible unintended interpolation of $\\ in regex");
77772344 2971 }
79072805 2972 break; /* in regexp, $ might be tail anchor */
77772344 2973 }
79072805 2974 }
02aa26ce 2975
2b9d42f0
NIS
2976 /* End of else if chain - OP_TRANS rejoin rest */
2977
02aa26ce 2978 /* backslashes */
79072805 2979 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2980 char* e; /* Can be used for ending '}', etc. */
2981
79072805 2982 s++;
02aa26ce 2983
7d0fc23c
KW
2984 /* warn on \1 - \9 in substitution replacements, but note that \11
2985 * is an octal; and \19 is \1 followed by '9' */
3280af22 2986 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2987 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2988 {
a2a5de95 2989 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2990 *--s = '$';
2991 break;
2992 }
02aa26ce
NT
2993
2994 /* string-change backslash escapes */
838f2281 2995 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
2996 --s;
2997 break;
2998 }
ff3f963a
KW
2999 /* In a pattern, process \N, but skip any other backslash escapes.
3000 * This is because we don't want to translate an escape sequence
3001 * into a meta symbol and have the regex compiler use the meta
3002 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
3003 * in spite of this, we do have to process \N here while the proper
3004 * charnames handler is in scope. See bugs #56444 and #62056.
3005 * There is a complication because \N in a pattern may also stand
3006 * for 'match a non-nl', and not mean a charname, in which case its
3007 * processing should be deferred to the regex compiler. To be a
3008 * charname it must be followed immediately by a '{', and not look
3009 * like \N followed by a curly quantifier, i.e., not something like
3010 * \N{3,}. regcurly returns a boolean indicating if it is a legal
3011 * quantifier */
3012 else if (PL_lex_inpat
3013 && (*s != 'N'
3014 || s[1] != '{'
3015 || regcurly(s + 1)))
3016 {
cc74c5bd
TS
3017 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
3018 goto default_action;
3019 }
02aa26ce 3020
79072805 3021 switch (*s) {
02aa26ce
NT
3022
3023 /* quoted - in transliterations */
79072805 3024 case '-':
3280af22 3025 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
3026 *d++ = *s++;
3027 continue;
3028 }
3029 /* FALL THROUGH */
3030 default:
11b8faa4 3031 {
e4ca4584 3032 if ((isALNUMC(*s)))
a2a5de95
NC
3033 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
3034 "Unrecognized escape \\%c passed through",
3035 *s);
11b8faa4 3036 /* default action is to copy the quoted character */
f9a63242 3037 goto default_action;
11b8faa4 3038 }
02aa26ce 3039
632403cc 3040 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
3041 case '0': case '1': case '2': case '3':
3042 case '4': case '5': case '6': case '7':
ba210ebe 3043 {
53305cf1
NC
3044 I32 flags = 0;
3045 STRLEN len = 3;
77a135fe 3046 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
3047 s += len;
3048 }
012bcf8d 3049 goto NUM_ESCAPE_INSERT;
02aa26ce 3050
f0a2b745
KW
3051 /* eg. \o{24} indicates the octal constant \024 */
3052 case 'o':
3053 {
3054 STRLEN len;
454155d9 3055 const char* error;
f0a2b745 3056
454155d9 3057 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 3058 s += len;
454155d9 3059 if (! valid) {
f0a2b745
KW
3060 yyerror(error);
3061 continue;
3062 }
3063 goto NUM_ESCAPE_INSERT;
3064 }
3065
77a135fe 3066 /* eg. \x24 indicates the hex constant 0x24 */
79072805 3067 case 'x':
a0481293 3068 {
53305cf1 3069 STRLEN len;
a0481293 3070 const char* error;
355860ce 3071
a0481293
KW
3072 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3073 s += len;
3074 if (! valid) {
3075 yyerror(error);
355860ce 3076 continue;
ba210ebe 3077 }
012bcf8d
GS
3078 }
3079
3080 NUM_ESCAPE_INSERT:
ff3f963a
KW
3081 /* Insert oct or hex escaped character. There will always be
3082 * enough room in sv since such escapes will be longer than any
3083 * UTF-8 sequence they can end up as, except if they force us
3084 * to recode the rest of the string into utf8 */
ba7cea30 3085
77a135fe 3086 /* Here uv is the ordinal of the next character being added in
ff3f963a 3087 * unicode (converted from native). */
77a135fe 3088 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3089 if (!has_utf8 && uv > 255) {
77a135fe
KW
3090 /* Might need to recode whatever we have accumulated so
3091 * far if it contains any chars variant in utf8 or
3092 * utf-ebcdic. */
3093
3094 SvCUR_set(sv, d - SvPVX_const(sv));
3095 SvPOK_on(sv);
3096 *d = '\0';
77a135fe 3097 /* See Note on sizing above. */
7bf79863
KW
3098 sv_utf8_upgrade_flags_grow(sv,
3099 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3100 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3101 d = SvPVX(sv) + SvCUR(sv);
3102 has_utf8 = TRUE;
012bcf8d
GS
3103 }
3104
77a135fe
KW
3105 if (has_utf8) {
3106 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3107 if (PL_lex_inwhat == OP_TRANS &&
3108 PL_sublex_info.sub_op) {
3109 PL_sublex_info.sub_op->op_private |=
3110 (PL_lex_repl ? OPpTRANS_FROM_UTF
3111 : OPpTRANS_TO_UTF);
f9a63242 3112 }
e294cc5d
JH
3113#ifdef EBCDIC
3114 if (uv > 255 && !dorange)
3115 native_range = FALSE;
3116#endif
012bcf8d 3117 }
a0ed51b3 3118 else {
012bcf8d 3119 *d++ = (char)uv;
a0ed51b3 3120 }
012bcf8d
GS
3121 }
3122 else {
c4d5f83a 3123 *d++ = (char) uv;
a0ed51b3 3124 }
79072805 3125 continue;
02aa26ce 3126
4a2d328f 3127 case 'N':
ff3f963a
KW
3128 /* In a non-pattern \N must be a named character, like \N{LATIN
3129 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3130 * mean to match a non-newline. For non-patterns, named
3131 * characters are converted to their string equivalents. In
3132 * patterns, named characters are not converted to their
3133 * ultimate forms for the same reasons that other escapes
3134 * aren't. Instead, they are converted to the \N{U+...} form
3135 * to get the value from the charnames that is in effect right
3136 * now, while preserving the fact that it was a named character
3137 * so that the regex compiler knows this */
3138
3139 /* This section of code doesn't generally use the
3140 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3141 * a close examination of this macro and determined it is a
3142 * no-op except on utfebcdic variant characters. Every
3143 * character generated by this that would normally need to be
3144 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3145 * needed, and would complicate use of copy(). XXX There are
3146 * other parts of this file where the macro is used
3147 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3148
3149 /* The structure of this section of code (besides checking for
3150 * errors and upgrading to utf8) is:
3151 * Further disambiguate between the two meanings of \N, and if
3152 * not a charname, go process it elsewhere
0a96133f
KW
3153 * If of form \N{U+...}, pass it through if a pattern;
3154 * otherwise convert to utf8
3155 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3156 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3157
3158 /* Here, s points to the 'N'; the test below is guaranteed to
3159 * succeed if we are being called on a pattern as we already
3160 * know from a test above that the next character is a '{'.
3161 * On a non-pattern \N must mean 'named sequence, which
3162 * requires braces */
3163 s++;
3164 if (*s != '{') {
3165 yyerror("Missing braces on \\N{}");
3166 continue;
3167 }
3168 s++;
3169
0a96133f 3170 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3171 if (! (e = strchr(s, '}'))) {
3172 if (! PL_lex_inpat) {
5777a3f7 3173 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3174 } else {
3175 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3176 }
0a96133f 3177 continue;
ff3f963a 3178 }
cddc7ef4 3179
ff3f963a 3180 /* Here it looks like a named character */
cddc7ef4 3181
ff3f963a
KW
3182 if (PL_lex_inpat) {
3183
3184 /* XXX This block is temporary code. \N{} implies that the
3185 * pattern is to have Unicode semantics, and therefore
3186 * currently has to be encoded in utf8. By putting it in
3187 * utf8 now, we save a whole pass in the regular expression
3188 * compiler. Once that code is changed so Unicode
3189 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3190 * block should be removed. However, the code that parses
3191 * the output of this would have to be changed to not
3192 * necessarily expect utf8 */
ff3f963a 3193 if (!has_utf8) {
77a135fe 3194 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3195 SvPOK_on(sv);
e4f3eed8 3196 *d = '\0';
77a135fe 3197 /* See Note on sizing above. */
7bf79863 3198 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3199 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3200 /* 5 = '\N{' + cur char + NUL */
3201 (STRLEN)(send - s) + 5);
f08d6ad9 3202 d = SvPVX(sv) + SvCUR(sv);
89491803 3203 has_utf8 = TRUE;
ff3f963a
KW
3204 }
3205 }
423cee85 3206
ff3f963a
KW
3207 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3208 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3209 | PERL_SCAN_DISALLOW_PREFIX;
3210 STRLEN len;
3211
3212 /* For \N{U+...}, the '...' is a unicode value even on
3213 * EBCDIC machines */
3214 s += 2; /* Skip to next char after the 'U+' */
3215 len = e - s;
3216 uv = grok_hex(s, &len, &flags, NULL);
3217 if (len == 0 || len != (STRLEN)(e - s)) {
3218 yyerror("Invalid hexadecimal number in \\N{U+...}");
3219 s = e + 1;
3220 continue;
3221 }
3222
3223 if (PL_lex_inpat) {
3224
e2a7e165
KW
3225 /* On non-EBCDIC platforms, pass through to the regex
3226 * compiler unchanged. The reason we evaluated the
3227 * number above is to make sure there wasn't a syntax
3228 * error. But on EBCDIC we convert to native so
3229 * downstream code can continue to assume it's native
3230 */
ff3f963a 3231 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3232#ifdef EBCDIC
3233 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3234 and the \0 */
3235 "\\N{U+%X}",
3236 (unsigned int) UNI_TO_NATIVE(uv));
3237#else
ff3f963a
KW
3238 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3239 d += e - s + 1;
e2a7e165 3240#endif
ff3f963a
KW
3241 }
3242 else { /* Not a pattern: convert the hex to string */
3243
3244 /* If destination is not in utf8, unconditionally
3245 * recode it to be so. This is because \N{} implies
3246 * Unicode semantics, and scalars have to be in utf8
3247 * to guarantee those semantics */
3248 if (! has_utf8) {
3249 SvCUR_set(sv, d - SvPVX_const(sv));
3250 SvPOK_on(sv);
3251 *d = '\0';
3252 /* See Note on sizing above. */
3253 sv_utf8_upgrade_flags_grow(
3254 sv,
3255 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3256 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3257 d = SvPVX(sv) + SvCUR(sv);
3258 has_utf8 = TRUE;
3259 }
3260
3261 /* Add the string to the output */
3262 if (UNI_IS_INVARIANT(uv)) {
3263 *d++ = (char) uv;
3264 }
3265 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3266 }
3267 }
3268 else { /* Here is \N{NAME} but not \N{U+...}. */
3269
3270 SV *res; /* result from charnames */
3271 const char *str; /* the string in 'res' */
3272 STRLEN len; /* its length */
3273
3274 /* Get the value for NAME */
3275 res = newSVpvn(s, e - s);
3276 res = new_constant( NULL, 0, "charnames",
3277 /* includes all of: \N{...} */
3278 res, NULL, s - 3, e - s + 4 );
3279
3280 /* Most likely res will be in utf8 already since the
3281 * standard charnames uses pack U, but a custom translator
3282 * can leave it otherwise, so make sure. XXX This can be
3283 * revisited to not have charnames use utf8 for characters
3284 * that don't need it when regexes don't have to be in utf8
3285 * for Unicode semantics. If doing so, remember EBCDIC */
3286 sv_utf8_upgrade(res);
3287 str = SvPV_const(res, len);
3288
3289 /* Don't accept malformed input */
3290 if (! is_utf8_string((U8 *) str, len)) {
3291 yyerror("Malformed UTF-8 returned by \\N");
3292 }
3293 else if (PL_lex_inpat) {
3294
3295 if (! len) { /* The name resolved to an empty string */
3296 Copy("\\N{}", d, 4, char);
3297 d += 4;
3298 }
3299 else {
3300 /* In order to not lose information for the regex
3301 * compiler, pass the result in the specially made
3302 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3303 * the code points in hex of each character
3304 * returned by charnames */
3305
3306 const char *str_end = str + len;
3307 STRLEN char_length; /* cur char's byte length */
3308 STRLEN output_length; /* and the number of bytes
3309 after this is translated
3310 into hex digits */
3311 const STRLEN off = d - SvPVX_const(sv);
3312
3313 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3314 * max('U+', '.'); and 1 for NUL */
3315 char hex_string[2 * UTF8_MAXBYTES + 5];
3316
3317 /* Get the first character of the result. */
3318 U32 uv = utf8n_to_uvuni((U8 *) str,
3319 len,
3320 &char_length,
3321 UTF8_ALLOW_ANYUV);
3322
3323 /* The call to is_utf8_string() above hopefully
3324 * guarantees that there won't be an error. But
3325 * it's easy here to make sure. The function just
3326 * above warns and returns 0 if invalid utf8, but
3327 * it can also return 0 if the input is validly a
3328 * NUL. Disambiguate */
3329 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3330 uv = UNICODE_REPLACEMENT;
3331 }
3332
3333 /* Convert first code point to hex, including the
e2a7e165
KW
3334 * boiler plate before it. For all these, we
3335 * convert to native format so that downstream code
3336 * can continue to assume the input is native */
78c35590 3337 output_length =
3353de27 3338 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3339 "\\N{U+%X",
3340 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3341
3342 /* Make sure there is enough space to hold it */
3343 d = off + SvGROW(sv, off
3344 + output_length
3345 + (STRLEN)(send - e)
3346 + 2); /* '}' + NUL */
3347 /* And output it */
3348 Copy(hex_string, d, output_length, char);
3349 d += output_length;
3350
3351 /* For each subsequent character, append dot and
3352 * its ordinal in hex */
3353 while ((str += char_length) < str_end) {
3354 const STRLEN off = d - SvPVX_const(sv);
3355 U32 uv = utf8n_to_uvuni((U8 *) str,
3356 str_end - str,
3357 &char_length,
3358 UTF8_ALLOW_ANYUV);
3359 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3360 uv = UNICODE_REPLACEMENT;
3361 }
3362
78c35590 3363 output_length =
3353de27 3364 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3365 ".%X",
3366 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3367
3368 d = off + SvGROW(sv, off
3369 + output_length
3370 + (STRLEN)(send - e)
3371 + 2); /* '}' + NUL */
3372 Copy(hex_string, d, output_length, char);
3373 d += output_length;
3374 }
3375
3376 *d++ = '}'; /* Done. Add the trailing brace */
3377 }
3378 }
3379 else { /* Here, not in a pattern. Convert the name to a
3380 * string. */
3381
3382 /* If destination is not in utf8, unconditionally
3383 * recode it to be so. This is because \N{} implies
3384 * Unicode semantics, and scalars have to be in utf8
3385 * to guarantee those semantics */
3386 if (! has_utf8) {
3387 SvCUR_set(sv, d - SvPVX_const(sv));
3388 SvPOK_on(sv);
3389 *d = '\0';
3390 /* See Note on sizing above. */
3391 sv_utf8_upgrade_flags_grow(sv,
3392 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3393 len + (STRLEN)(send - s) + 1);
3394 d = SvPVX(sv) + SvCUR(sv);
3395 has_utf8 = TRUE;
3396 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3397
3398 /* See Note on sizing above. (NOTE: SvCUR() is not
3399 * set correctly here). */
3400 const STRLEN off = d - SvPVX_const(sv);
3401 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3402 }
3403 Copy(str, d, len, char);
3404 d += len;
423cee85 3405 }
423cee85 3406 SvREFCNT_dec(res);
cb233ae3
KW
3407
3408 /* Deprecate non-approved name syntax */
3409 if (ckWARN_d(WARN_DEPRECATED)) {
3410 bool problematic = FALSE;
3411 char* i = s;
3412
3413 /* For non-ut8 input, look to see that the first
3414 * character is an alpha, then loop through the rest
3415 * checking that each is a continuation */
3416 if (! this_utf8) {
3417 if (! isALPHAU(*i)) problematic = TRUE;
3418 else for (i = s + 1; i < e; i++) {
3419 if (isCHARNAME_CONT(*i)) continue;
3420 problematic = TRUE;
3421 break;
3422 }
3423 }
3424 else {
3425 /* Similarly for utf8. For invariants can check
3426 * directly. We accept anything above the latin1
3427 * range because it is immaterial to Perl if it is
3428 * correct or not, and is expensive to check. But
3429 * it is fairly easy in the latin1 range to convert
3430 * the variants into a single character and check
3431 * those */
3432 if (UTF8_IS_INVARIANT(*i)) {
3433 if (! isALPHAU(*i)) problematic = TRUE;
3434 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3435 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3436 *(i+1)))))
3437 {
3438 problematic = TRUE;
3439 }
3440 }
3441 if (! problematic) for (i = s + UTF8SKIP(s);
3442 i < e;
3443 i+= UTF8SKIP(i))
3444 {
3445 if (UTF8_IS_INVARIANT(*i)) {
3446 if (isCHARNAME_CONT(*i)) continue;
3447 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3448 continue;
3449 } else if (isCHARNAME_CONT(
3450 UNI_TO_NATIVE(
81c14aa2 3451 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3452 {
3453 continue;
3454 }
3455 problematic = TRUE;
3456 break;
3457 }
3458 }
3459 if (problematic) {
6e1bad6c
KW
3460 /* The e-i passed to the final %.*s makes sure that
3461 * should the trailing NUL be missing that this
3462 * print won't run off the end of the string */
cb233ae3 3463 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3464 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3465 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3466 }
3467 }
3468 } /* End \N{NAME} */
ff3f963a
KW
3469#ifdef EBCDIC
3470 if (!dorange)
3471 native_range = FALSE; /* \N{} is defined to be Unicode */
3472#endif
3473 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3474 continue;
3475
02aa26ce 3476 /* \c is a control character */
79072805
LW
3477 case 'c':
3478 s++;
961ce445 3479 if (s < send) {
17a3df4c 3480 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3481 }
961ce445
RGS
3482 else {
3483 yyerror("Missing control char name in \\c");
3484 }
79072805 3485 continue;
02aa26ce
NT
3486
3487 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3488 case 'b':
db42d148 3489 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3490 break;
3491 case 'n':
db42d148 3492 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3493 break;
3494 case 'r':
db42d148 3495 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3496 break;
3497 case 'f':
db42d148 3498 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3499 break;
3500 case 't':
db42d148 3501 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3502 break;
34a3fe2a 3503 case 'e':
db42d148 3504 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3505 break;
3506 case 'a':
db42d148 3507 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3508 break;
02aa26ce
NT
3509 } /* end switch */
3510
79072805
LW
3511 s++;
3512 continue;
02aa26ce 3513 } /* end if (backslash) */
4c3a8340
TS
3514#ifdef EBCDIC
3515 else
3516 literal_endpoint++;
3517#endif
02aa26ce 3518
f9a63242 3519 default_action:
77a135fe
KW
3520 /* If we started with encoded form, or already know we want it,
3521 then encode the next character */
3522 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3523 STRLEN len = 1;
77a135fe
KW
3524
3525
3526 /* One might think that it is wasted effort in the case of the
3527 * source being utf8 (this_utf8 == TRUE) to take the next character
3528 * in the source, convert it to an unsigned value, and then convert
3529 * it back again. But the source has not been validated here. The
3530 * routine that does the conversion checks for errors like
3531 * malformed utf8 */
3532
5f66b61c
AL
3533 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3534 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3535 if (!has_utf8) {
3536 SvCUR_set(sv, d - SvPVX_const(sv));
3537 SvPOK_on(sv);
3538 *d = '\0';
77a135fe 3539 /* See Note on sizing above. */
7bf79863
KW
3540 sv_utf8_upgrade_flags_grow(sv,
3541 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3542 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3543 d = SvPVX(sv) + SvCUR(sv);
3544 has_utf8 = TRUE;
3545 } else if (need > len) {
3546 /* encoded value larger than old, may need extra space (NOTE:
3547 * SvCUR() is not set correctly here). See Note on sizing
3548 * above. */
9d4ba2ae 3549 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3550 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3551 }
77a135fe
KW
3552 s += len;
3553
5f66b61c 3554 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3555#ifdef EBCDIC
3556 if (uv > 255 && !dorange)
3557 native_range = FALSE;
3558#endif
2b9d42f0
NIS
3559 }
3560 else {
3561 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3562 }
02aa26ce
NT
3563 } /* while loop to process each character */
3564
3565 /* terminate the string and set up the sv */
79072805 3566 *d = '\0';
95a20fc0 3567 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3568 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3569 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3570 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3571
79072805 3572 SvPOK_on(sv);
9f4817db 3573 if (PL_encoding && !has_utf8) {
d0063567
DK
3574 sv_recode_to_utf8(sv, PL_encoding);
3575 if (SvUTF8(sv))
3576 has_utf8 = TRUE;
9f4817db 3577 }
2b9d42f0 3578 if (has_utf8) {
7e2040f0 3579 SvUTF8_on(sv);
2b9d42f0 3580 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3581 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3582 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3583 }
3584 }
79072805 3585
02aa26ce 3586 /* shrink the sv if we allocated more than we used */
79072805 3587 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3588 SvPV_shrink_to_cur(sv);
79072805 3589 }
02aa26ce 3590
6154021b 3591 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3592 if (s > PL_bufptr) {
eb0d8d16
NC
3593 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3594 const char *const key = PL_lex_inpat ? "qr" : "q";
3595 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3596 const char *type;
3597 STRLEN typelen;
3598
3599 if (PL_lex_inwhat == OP_TRANS) {
3600 type = "tr";
3601 typelen = 2;
3602 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3603 type = "s";
3604 typelen = 1;
9da1dd8f
DM
3605 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3606 type = "q";
3607 typelen = 1;
eb0d8d16
NC
3608 } else {
3609 type = "qq";
3610 typelen = 2;
3611 }
3612
3613 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3614 type, typelen);
3615 }
6154021b 3616 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3617 } else
8990e307 3618 SvREFCNT_dec(sv);
79072805
LW
3619 return s;
3620}
3621
ffb4593c
NT
3622/* S_intuit_more
3623 * Returns TRUE if there's more to the expression (e.g., a subscript),
3624 * FALSE otherwise.
ffb4593c
NT
3625 *
3626 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3627 *
3628 * ->[ and ->{ return TRUE
3629 * { and [ outside a pattern are always subscripts, so return TRUE
3630 * if we're outside a pattern and it's not { or [, then return FALSE
3631 * if we're in a pattern and the first char is a {
3632 * {4,5} (any digits around the comma) returns FALSE
3633 * if we're in a pattern and the first char is a [
3634 * [] returns FALSE
3635 * [SOMETHING] has a funky algorithm to decide whether it's a
3636 * character class or not. It has to deal with things like
3637 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3638 * anything else returns TRUE
3639 */
3640
9cbb5ea2
GS
3641/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3642
76e3520e 3643STATIC int
cea2e8a9 3644S_intuit_more(pTHX_ register char *s)
79072805 3645{
97aff369 3646 dVAR;
7918f24d
NC
3647
3648 PERL_ARGS_ASSERT_INTUIT_MORE;
3649
3280af22 3650 if (PL_lex_brackets)
79072805
LW
3651 return TRUE;
3652 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3653 return TRUE;
3654 if (*s != '{' && *s != '[')
3655 return FALSE;
3280af22 3656 if (!PL_lex_inpat)
79072805
LW
3657 return TRUE;
3658
3659 /* In a pattern, so maybe we have {n,m}. */
3660 if (*s == '{') {
b3155d95 3661 if (regcurly(s)) {
79072805 3662 return FALSE;
b3155d95 3663 }
79072805 3664 return TRUE;
79072805
LW
3665 }
3666
3667 /* On the other hand, maybe we have a character class */
3668
3669 s++;
3670 if (*s == ']' || *s == '^')
3671 return FALSE;
3672 else {
ffb4593c 3673 /* this is terrifying, and it works */
79072805
LW
3674 int weight = 2; /* let's weigh the evidence */
3675 char seen[256];
f27ffc4a 3676 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3677 const char * const send = strchr(s,']');
3280af22 3678 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3679
3680 if (!send) /* has to be an expression */
3681 return TRUE;
3682
3683 Zero(seen,256,char);
3684 if (*s == '$')
3685 weight -= 3;
3686 else if (isDIGIT(*s)) {
3687 if (s[1] != ']') {
3688 if (isDIGIT(s[1]) && s[2] == ']')
3689 weight -= 10;
3690 }
3691 else
3692 weight -= 100;
3693 }
3694 for (; s < send; s++) {
3695 last_un_char = un_char;
3696 un_char = (unsigned char)*s;
3697 switch (*s) {
3698 case '@':
3699 case '&':
3700 case '$':
3701 weight -= seen[un_char] * 10;
7e2040f0 3702 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3703 int len;
8903cb82 3704 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3705 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3706 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3707 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3708 weight -= 100;
3709 else
3710 weight -= 10;
3711 }
3712 else if (*s == '$' && s[1] &&
93a17b20
LW
3713 strchr("[#!%*<>()-=",s[1])) {
3714 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3715 weight -= 10;
3716 else
3717 weight -= 1;
3718 }
3719 break;
3720 case '\\':
3721 un_char = 254;
3722 if (s[1]) {
93a17b20 3723 if (strchr("wds]",s[1]))
79072805 3724 weight += 100;
10edeb5d 3725 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3726 weight += 1;
93a17b20 3727 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3728 weight += 40;
3729 else if (isDIGIT(s[1])) {
3730 weight += 40;
3731 while (s[1] && isDIGIT(s[1]))
3732 s++;
3733 }
3734 }
3735 else
3736 weight += 100;
3737 break;
3738 case '-':
3739 if (s[1] == '\\')
3740 weight += 50;
93a17b20 3741 if (strchr("aA01! ",last_un_char))
79072805 3742 weight += 30;
93a17b20 3743 if (strchr("zZ79~",s[1]))
79072805 3744 weight += 30;
f27ffc4a
GS
3745 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3746 weight -= 5; /* cope with negative subscript */
79072805
LW
3747 break;
3748 default:
3792a11b
NC
3749 if (!isALNUM(last_un_char)
3750 && !(last_un_char == '$' || last_un_char == '@'
3751 || last_un_char == '&')
3752 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3753 char *d = tmpbuf;
3754 while (isALPHA(*s))
3755 *d++ = *s++;
3756 *d = '\0';
5458a98a 3757 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3758 weight -= 150;
3759 }
3760 if (un_char == last_un_char + 1)
3761 weight += 5;
3762 weight -= seen[un_char];
3763 break;
3764 }
3765 seen[un_char]++;
3766 }
3767 if (weight >= 0) /* probably a character class */
3768 return FALSE;
3769 }
3770
3771 return TRUE;
3772}
ffed7fef 3773
ffb4593c
NT
3774/*
3775 * S_intuit_method
3776 *
3777 * Does all the checking to disambiguate
3778 * foo bar
3779 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3780 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3781 *
3782 * First argument is the stuff after the first token, e.g. "bar".
3783 *
a4fd4a89 3784 * Not a method if foo is a filehandle.
ffb4593c
NT
3785 * Not a method if foo is a subroutine prototyped to take a filehandle.
3786 * Not a method if it's really "Foo $bar"
3787 * Method if it's "foo $bar"
3788 * Not a method if it's really "print foo $bar"
3789 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3790 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3791 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3792 * =>
3793 */
3794
76e3520e 3795STATIC int
62d55b22 3796S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3797{
97aff369 3798 dVAR;
a0d0e21e 3799 char *s = start + (*start == '$');
3280af22 3800 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3801 STRLEN len;
3802 GV* indirgv;
5db06880
NC
3803#ifdef PERL_MAD
3804 int soff;
3805#endif
a0d0e21e 3806
7918f24d
NC
3807 PERL_ARGS_ASSERT_INTUIT_METHOD;
3808
aca88b25 3809 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3810 return 0;
aca88b25 3811 if (cv && SvPOK(cv)) {
8fa6a409 3812 const char *proto = CvPROTO(cv);
62d55b22
NC
3813 if (proto) {
3814 if (*proto == ';')
3815 proto++;
3816 if (*proto == '*')
3817 return 0;
3818 }
a0d0e21e 3819 }
8903cb82 3820 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3821 /* start is the beginning of the possible filehandle/object,
3822 * and s is the end of it
3823 * tmpbuf is a copy of it
3824 */
3825
a0d0e21e 3826 if (*start == '$') {
39c012bc 3827 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3ef1310e 3828 isUPPER(*PL_tokenbuf))
a0d0e21e 3829 return 0;
5db06880
NC
3830#ifdef PERL_MAD
3831 len = start - SvPVX(PL_linestr);
3832#endif
29595ff2 3833 s = PEEKSPACE(s);
f0092767 3834#ifdef PERL_MAD
5db06880
NC
3835 start = SvPVX(PL_linestr) + len;
3836#endif
3280af22
NIS
3837 PL_bufptr = start;
3838 PL_expect = XREF;
a0d0e21e
LW
3839 return *s == '(' ? FUNCMETH : METHOD;
3840 }
5458a98a 3841 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3842 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3843 len -= 2;
3844 tmpbuf[len] = '\0';
5db06880
NC
3845#ifdef PERL_MAD
3846 soff = s - SvPVX(PL_linestr);
3847#endif
c3e0f903
GS
3848 goto bare_package;
3849 }
38d2cf30 3850 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 3851 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3852 return 0;
3853 /* filehandle or package name makes it a method */
39c012bc 3854 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
3855#ifdef PERL_MAD
3856 soff = s - SvPVX(PL_linestr);
3857#endif
29595ff2 3858 s = PEEKSPACE(s);
3280af22 3859 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3860 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3861 bare_package:
cd81e915 3862 start_force(PL_curforce);
9ded7720 3863 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3864 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3865 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 3866 if (PL_madskills)
38d2cf30
BF
3867 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3868 ( UTF ? SVf_UTF8 : 0 )));
3280af22 3869 PL_expect = XTERM;
a0d0e21e 3870 force_next(WORD);
3280af22 3871 PL_bufptr = s;
5db06880
NC
3872#ifdef PERL_MAD
3873 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3874#endif
a0d0e21e
LW
3875 return *s == '(' ? FUNCMETH : METHOD;
3876 }
3877 }
3878 return 0;
3879}
3880
16d20bd9 3881/* Encoded script support. filter_add() effectively inserts a
4e553d73 3882 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3883 * Note that the filter function only applies to the current source file
3884 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3885 *
3886 * The datasv parameter (which may be NULL) can be used to pass
3887 * private data to this instance of the filter. The filter function
3888 * can recover the SV using the FILTER_DATA macro and use it to
3889 * store private buffers and state information.
3890 *
3891 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3892 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3893 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3894 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3895 * private use must be set using malloc'd pointers.
3896 */
16d20bd9
AD
3897
3898SV *
864dbfa3 3899Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3900{
97aff369 3901 dVAR;
f4c556ac 3902 if (!funcp)
a0714e2c 3903 return NULL;
f4c556ac 3904
5486870f
DM
3905 if (!PL_parser)
3906 return NULL;
3907
f1c31c52
FC
3908 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3909 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3910
3280af22
NIS
3911 if (!PL_rsfp_filters)
3912 PL_rsfp_filters = newAV();
16d20bd9 3913 if (!datasv)
561b68a9 3914 datasv = newSV(0);
862a34c6 3915 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3916 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3917 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3918 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3919 FPTR2DPTR(void *, IoANY(datasv)),
3920 SvPV_nolen(datasv)));
3280af22
NIS
3921 av_unshift(PL_rsfp_filters, 1);
3922 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
3923 if (
3924 !PL_parser->filtered
3925 && PL_parser->lex_flags & LEX_EVALBYTES
3926 && PL_bufptr < PL_bufend
3927 ) {
3928 const char *s = PL_bufptr;
3929 while (s < PL_bufend) {
3930 if (*s == '\n') {
3931 SV *linestr = PL_parser->linestr;
3932 char *buf = SvPVX(linestr);
3933 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3934 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3935 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3936 STRLEN const linestart_pos = PL_parser->linestart - buf;
3937 STRLEN const last_uni_pos =
3938 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3939 STRLEN const last_lop_pos =
3940 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3941 av_push(PL_rsfp_filters, linestr);
3942 PL_parser->linestr =
3943 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3944 buf = SvPVX(PL_parser->linestr);
3945 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3946 PL_parser->bufptr = buf + bufptr_pos;
3947 PL_parser->oldbufptr = buf + oldbufptr_pos;
3948 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3949 PL_parser->linestart = buf + linestart_pos;
3950 if (PL_parser->last_uni)
3951 PL_parser->last_uni = buf + last_uni_pos;
3952 if (PL_parser->last_lop)
3953 PL_parser->last_lop = buf + last_lop_pos;
3954 SvLEN(linestr) = SvCUR(linestr);
3955 SvCUR(linestr) = s-SvPVX(linestr);
3956 PL_parser->filtered = 1;
3957 break;
3958 }
3959 s++;
3960 }
3961 }
16d20bd9
AD
3962 return(datasv);
3963}
4e553d73 3964
16d20bd9
AD
3965
3966/* Delete most recently added instance of this filter function. */
a0d0e21e 3967void
864dbfa3 3968Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3969{
97aff369 3970 dVAR;
e0c19803 3971 SV *datasv;
24801a4b 3972
7918f24d
NC
3973 PERL_ARGS_ASSERT_FILTER_DEL;
3974
33073adb 3975#ifdef DEBUGGING
55662e27
JH
3976 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3977 FPTR2DPTR(void*, funcp)));
33073adb 3978#endif
5486870f 3979 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3980 return;
3981 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3982 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3983 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3984 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3985
16d20bd9
AD
3986 return;
3987 }
3988 /* we need to search for the correct entry and clear it */
cea2e8a9 3989 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3990}
3991
3992
1de9afcd
RGS
3993/* Invoke the idxth filter function for the current rsfp. */
3994/* maxlen 0 = read one text line */
16d20bd9 3995I32
864dbfa3 3996Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3997{
97aff369 3998 dVAR;
16d20bd9
AD
3999 filter_t funcp;
4000 SV *datasv = NULL;
f482118e
NC
4001 /* This API is bad. It should have been using unsigned int for maxlen.
4002 Not sure if we want to change the API, but if not we should sanity
4003 check the value here. */
60d63348 4004 unsigned int correct_length
39cd7a59
NC
4005 = maxlen < 0 ?
4006#ifdef PERL_MICRO
4007 0x7FFFFFFF
4008#else
4009 INT_MAX
4010#endif
4011 : maxlen;
e50aee73 4012
7918f24d
NC
4013 PERL_ARGS_ASSERT_FILTER_READ;
4014
5486870f 4015 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 4016 return -1;
1de9afcd 4017 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
4018 /* Provide a default input filter to make life easy. */
4019 /* Note that we append to the line. This is handy. */
f4c556ac
GS
4020 DEBUG_P(PerlIO_printf(Perl_debug_log,
4021 "filter_read %d: from rsfp\n", idx));
f482118e 4022 if (correct_length) {
16d20bd9
AD
4023 /* Want a block */
4024 int len ;
f54cb97a 4025 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
4026
4027 /* ensure buf_sv is large enough */
881d8f0a 4028 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
4029 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
4030 correct_length)) <= 0) {
3280af22 4031 if (PerlIO_error(PL_rsfp))
37120919
AD
4032 return -1; /* error */
4033 else
4034 return 0 ; /* end of file */
4035 }
16d20bd9 4036 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 4037 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
4038 } else {
4039 /* Want a line */
3280af22
NIS
4040 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4041 if (PerlIO_error(PL_rsfp))
37120919
AD
4042 return -1; /* error */
4043 else
4044 return 0 ; /* end of file */
4045 }
16d20bd9
AD
4046 }
4047 return SvCUR(buf_sv);
4048 }
4049 /* Skip this filter slot if filter has been deleted */
1de9afcd 4050 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
4051 DEBUG_P(PerlIO_printf(Perl_debug_log,
4052 "filter_read %d: skipped (filter deleted)\n",
4053 idx));
f482118e 4054 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4055 }
60d63348
FC
4056 if (SvTYPE(datasv) != SVt_PVIO) {
4057 if (correct_length) {
4058 /* Want a block */
4059 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4060 if (!remainder) return 0; /* eof */
4061 if (correct_length > remainder) correct_length = remainder;
4062 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4063 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4064 } else {
4065 /* Want a line */
4066 const char *s = SvEND(datasv);
4067 const char *send = SvPVX(datasv) + SvLEN(datasv);
4068 while (s < send) {
4069 if (*s == '\n') {
4070 s++;
4071 break;
4072 }
4073 s++;
4074 }
4075 if (s == send) return 0; /* eof */
4076 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4077 SvCUR_set(datasv, s-SvPVX(datasv));
4078 }
4079 return SvCUR(buf_sv);
4080 }
16d20bd9 4081 /* Get function pointer hidden within datasv */
8141890a 4082 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4083 DEBUG_P(PerlIO_printf(Perl_debug_log,
4084 "filter_read %d: via function %p (%s)\n",
ca0270c4 4085 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4086 /* Call function. The function is expected to */
4087 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4088 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4089 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4090}
4091
76e3520e 4092STATIC char *
5cc814fd 4093S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 4094{
97aff369 4095 dVAR;
7918f24d
NC
4096
4097 PERL_ARGS_ASSERT_FILTER_GETS;
4098
c39cd008 4099#ifdef PERL_CR_FILTER
3280af22 4100 if (!PL_rsfp_filters) {
c39cd008 4101 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4102 }
4103#endif
3280af22 4104 if (PL_rsfp_filters) {
55497cff 4105 if (!append)
4106 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4107 if (FILTER_READ(0, sv, 0) > 0)
4108 return ( SvPVX(sv) ) ;
4109 else
bd61b366 4110 return NULL ;
16d20bd9 4111 }
9d116dd7 4112 else
5cc814fd 4113 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4114}
4115
01ec43d0 4116STATIC HV *
9bde8eb0 4117S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4118{
97aff369 4119 dVAR;
def3634b
GS
4120 GV *gv;
4121
7918f24d
NC
4122 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4123
01ec43d0 4124 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4125 return PL_curstash;
4126
4127 if (len > 2 &&
4128 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4129 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4130 {
4131 return GvHV(gv); /* Foo:: */
def3634b
GS
4132 }
4133
4134 /* use constant CLASS => 'MyClass' */
acc6da14 4135 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4136 if (gv && GvCV(gv)) {
4137 SV * const sv = cv_const_sv(GvCV(gv));
4138 if (sv)
9bde8eb0 4139 pkgname = SvPV_const(sv, len);
def3634b
GS
4140 }
4141
acc6da14 4142 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4143}
a0d0e21e 4144
e3f73d4e
RGS
4145/*
4146 * S_readpipe_override
486ec47a 4147 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4148 * optree, provided sublex_start() is called afterwards.
4149 */
4150STATIC void
1d51329b 4151S_readpipe_override(pTHX)
e3f73d4e
RGS
4152{
4153 GV **gvp;
4154 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4155 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4156 if ((gv_readpipe
4157 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4158 ||
4159 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4160 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4161 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4162 {
4163 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4164 op_append_elem(OP_LIST,
e3f73d4e
RGS
4165 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4166 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4167 }
e3f73d4e
RGS
4168}
4169
5db06880
NC
4170#ifdef PERL_MAD
4171 /*
4172 * Perl_madlex
4173 * The intent of this yylex wrapper is to minimize the changes to the
4174 * tokener when we aren't interested in collecting madprops. It remains
4175 * to be seen how successful this strategy will be...
4176 */
4177
4178int
4179Perl_madlex(pTHX)
4180{
4181 int optype;
4182 char *s = PL_bufptr;
4183
cd81e915
NC
4184 /* make sure PL_thiswhite is initialized */
4185 PL_thiswhite = 0;
4186 PL_thismad = 0;
5db06880 4187
cd81e915 4188 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4189 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4190 return S_pending_ident(aTHX);
4191
4192 /* previous token ate up our whitespace? */
cd81e915
NC
4193 if (!PL_lasttoke && PL_nextwhite) {
4194 PL_thiswhite = PL_nextwhite;
4195 PL_nextwhite = 0;
5db06880
NC
4196 }
4197
4198 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4199 PL_realtokenstart = -1;
4200 PL_thistoken = 0;
5db06880
NC
4201 optype = yylex();
4202 s = PL_bufptr;
cd81e915 4203 assert(PL_curforce < 0);
5db06880 4204
cd81e915
NC
4205 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4206 if (!PL_thistoken) {
4207 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4208 PL_thistoken = newSVpvs("");
5db06880 4209 else {
c35e046a 4210 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4211 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4212 }
4213 }
cd81e915
NC
4214 if (PL_thismad) /* install head */
4215 CURMAD('X', PL_thistoken);
5db06880
NC
4216 }
4217
4218 /* last whitespace of a sublex? */
cd81e915
NC
4219 if (optype == ')' && PL_endwhite) {
4220 CURMAD('X', PL_endwhite);
5db06880
NC
4221 }
4222
cd81e915 4223 if (!PL_thismad) {
5db06880
NC
4224
4225 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4226 if (!PL_thiswhite && !PL_endwhite && !optype) {
4227 sv_free(PL_thistoken);
4228 PL_thistoken = 0;
5db06880
NC
4229 return 0;
4230 }
4231
4232 /* put off final whitespace till peg */
60d63348 4233 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4234 PL_nextwhite = PL_thiswhite;
4235 PL_thiswhite = 0;
5db06880 4236 }
cd81e915
NC
4237 else if (PL_thisopen) {
4238 CURMAD('q', PL_thisopen);
4239 if (PL_thistoken)
4240 sv_free(PL_thistoken);
4241 PL_thistoken = 0;
5db06880
NC
4242 }
4243 else {
4244 /* Store actual token text as madprop X */
cd81e915 4245 CURMAD('X', PL_thistoken);
5db06880
NC
4246 }
4247
cd81e915 4248 if (PL_thiswhite) {
5db06880 4249 /* add preceding whitespace as madprop _ */
cd81e915 4250 CURMAD('_', PL_thiswhite);
5db06880
NC
4251 }
4252
cd81e915 4253 if (PL_thisstuff) {
5db06880 4254 /* add quoted material as madprop = */
cd81e915 4255 CURMAD('=', PL_thisstuff);
5db06880
NC
4256 }
4257
cd81e915 4258 if (PL_thisclose) {
5db06880 4259 /* add terminating quote as madprop Q */
cd81e915 4260 CURMAD('Q', PL_thisclose);
5db06880
NC
4261 }
4262 }
4263
4264 /* special processing based on optype */
4265
4266 switch (optype) {
4267
4268 /* opval doesn't need a TOKEN since it can already store mp */
4269 case WORD:
4270 case METHOD:
4271 case FUNCMETH:
4272 case THING:
4273 case PMFUNC:
4274 case PRIVATEREF:
4275 case FUNC0SUB:
4276 case UNIOPSUB:
4277 case LSTOPSUB:
5db1eb8d 4278 case LABEL:
6154021b
RGS
4279 if (pl_yylval.opval)
4280 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4281 PL_thismad = 0;
5db06880
NC
4282 return optype;
4283
4284 /* fake EOF */
4285 case 0:
4286 optype = PEG;
cd81e915
NC
4287 if (PL_endwhite) {
4288 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4289 PL_endwhite = 0;
5db06880
NC
4290 }
4291 break;
4292
4293 case ']':
4294 case '}':
cd81e915 4295 if (PL_faketokens)
5db06880
NC
4296 break;
4297 /* remember any fake bracket that lexer is about to discard */
4298 if (PL_lex_brackets == 1 &&
4299 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4300 {
4301 s = PL_bufptr;
4302 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4303 s++;
4304 if (*s == '}') {
cd81e915
NC
4305 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4306 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4307 PL_thiswhite = 0;
5db06880
NC
4308 PL_bufptr = s - 1;
4309 break; /* don't bother looking for trailing comment */
4310 }
4311 else
4312 s = PL_bufptr;
4313 }
4314 if (optype == ']')
4315 break;
4316 /* FALLTHROUGH */
4317
4318 /* attach a trailing comment to its statement instead of next token */
4319 case ';':
cd81e915 4320 if (PL_faketokens)
5db06880
NC
4321 break;
4322 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4323 s = PL_bufptr;
4324 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4325 s++;
4326 if (*s == '\n' || *s == '#') {
4327 while (s < PL_bufend && *s != '\n')
4328 s++;
4329 if (s < PL_bufend)
4330 s++;
cd81e915
NC
4331 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4332 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4333 PL_thiswhite = 0;
5db06880
NC
4334 PL_bufptr = s;
4335 }
4336 }
4337 break;
4338
5db06880
NC
4339 /* ival */
4340 default:
4341 break;
4342
4343 }
4344
4345 /* Create new token struct. Note: opvals return early above. */
6154021b 4346 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4347 PL_thismad = 0;
5db06880
NC
4348 return optype;
4349}
4350#endif
4351
468aa647 4352STATIC char *
cc6ed77d 4353S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4354 dVAR;
7918f24d
NC
4355
4356 PERL_ARGS_ASSERT_TOKENIZE_USE;
4357
468aa647
RGS
4358 if (PL_expect != XSTATE)
4359 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4360 is_use ? "use" : "no"));
52d0e95b 4361 PL_expect = XTERM;
29595ff2 4362 s = SKIPSPACE1(s);
468aa647
RGS
4363 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4364 s = force_version(s, TRUE);
17c59fdf
VP
4365 if (*s == ';' || *s == '}'
4366 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4367 start_force(PL_curforce);
9ded7720 4368 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4369 force_next(WORD);
4370 }
4371 else if (*s == 'v') {
4372 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4373 s = force_version(s, FALSE);
4374 }
4375 }
4376 else {
4377 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4378 s = force_version(s, FALSE);
4379 }
6154021b 4380 pl_yylval.ival = is_use;
468aa647
RGS
4381 return s;
4382}
748a9306 4383#ifdef DEBUGGING
27da23d5 4384 static const char* const exp_name[] =
09bef843 4385 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4386 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4387 };
748a9306 4388#endif
463ee0b2 4389
361d9b55
Z
4390#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4391STATIC bool
4392S_word_takes_any_delimeter(char *p, STRLEN len)
4393{
4394 return (len == 1 && strchr("msyq", p[0])) ||
4395 (len == 2 && (
4396 (p[0] == 't' && p[1] == 'r') ||
4397 (p[0] == 'q' && strchr("qwxr", p[1]))));
4398}
4399
02aa26ce
NT
4400/*
4401 yylex
4402
4403 Works out what to call the token just pulled out of the input
4404 stream. The yacc parser takes care of taking the ops we return and
4405 stitching them into a tree.
4406
4407 Returns:
4408 PRIVATEREF
4409
4410 Structure:
4411 if read an identifier
4412 if we're in a my declaration
4413 croak if they tried to say my($foo::bar)
4414 build the ops for a my() declaration
4415 if it's an access to a my() variable
4416 are we in a sort block?
4417 croak if my($a); $a <=> $b
4418 build ops for access to a my() variable
4419 if in a dq string, and they've said @foo and we can't find @foo
4420 croak
4421 build ops for a bareword
4422 if we already built the token before, use it.
4423*/
4424
20141f0e 4425
dba4d153
JH
4426#ifdef __SC__
4427#pragma segment Perl_yylex
4428#endif
dba4d153 4429int
dba4d153 4430Perl_yylex(pTHX)
20141f0e 4431{
97aff369 4432 dVAR;
eb578fdb
KW
4433 char *s = PL_bufptr;
4434 char *d;
463ee0b2 4435 STRLEN len;
705fe0e5
FC
4436 bool bof = FALSE;
4437 U8 formbrack = 0;
580561a3 4438 U32 fake_eof = 0;
a687059c 4439
10edeb5d
JH
4440 /* orig_keyword, gvp, and gv are initialized here because
4441 * jump to the label just_a_word_zero can bypass their
4442 * initialization later. */
4443 I32 orig_keyword = 0;
4444 GV *gv = NULL;
4445 GV **gvp = NULL;
4446
bbf60fe6 4447 DEBUG_T( {
396482e1 4448 SV* tmp = newSVpvs("");
b6007c36
DM
4449 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4450 (IV)CopLINE(PL_curcop),
4451 lex_state_names[PL_lex_state],
4452 exp_name[PL_expect],
4453 pv_display(tmp, s, strlen(s), 0, 60));
4454 SvREFCNT_dec(tmp);
bbf60fe6 4455 } );
02aa26ce 4456 /* check if there's an identifier for us to look at */
28ac2b49 4457 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4458 return REPORT(S_pending_ident(aTHX));
bbce6d69 4459
02aa26ce
NT
4460 /* no identifier pending identification */
4461
3280af22 4462 switch (PL_lex_state) {
79072805
LW
4463#ifdef COMMENTARY
4464 case LEX_NORMAL: /* Some compilers will produce faster */
4465 case LEX_INTERPNORMAL: /* code if we comment these out. */
4466 break;
4467#endif
4468
09bef843 4469 /* when we've already built the next token, just pull it out of the queue */
79072805 4470 case LEX_KNOWNEXT:
5db06880
NC
4471#ifdef PERL_MAD
4472 PL_lasttoke--;
6154021b 4473 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4474 if (PL_madskills) {
cd81e915 4475 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4476 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4477 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4478 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4479 PL_thismad->mad_val = 0;
4480 mad_free(PL_thismad);
4481 PL_thismad = 0;
5db06880
NC
4482 }
4483 }
4484 if (!PL_lasttoke) {
4485 PL_lex_state = PL_lex_defer;
4486 PL_expect = PL_lex_expect;
4487 PL_lex_defer = LEX_NORMAL;
4488 if (!PL_nexttoke[PL_lasttoke].next_type)
4489 return yylex();
4490 }
4491#else
3280af22 4492 PL_nexttoke--;
6154021b 4493 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4494 if (!PL_nexttoke) {
4495 PL_lex_state = PL_lex_defer;
4496 PL_expect = PL_lex_expect;
4497 PL_lex_defer = LEX_NORMAL;
463ee0b2 4498 }
5db06880 4499#endif
a7aaec61
Z
4500 {
4501 I32 next_type;
5db06880 4502#ifdef PERL_MAD
a7aaec61 4503 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4504#else
a7aaec61 4505 next_type = PL_nexttype[PL_nexttoke];
5db06880 4506#endif
78cdf107
Z
4507 if (next_type & (7<<24)) {
4508 if (next_type & (1<<24)) {
4509 if (PL_lex_brackets > 100)
4510 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4511 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4512 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4513 }
4514 if (next_type & (2<<24))
4515 PL_lex_allbrackets++;
4516 if (next_type & (4<<24))
4517 PL_lex_allbrackets--;
a7aaec61
Z
4518 next_type &= 0xffff;
4519 }
6c7ae946
FC
4520 if (S_is_opval_token(next_type) && pl_yylval.opval)
4521 pl_yylval.opval->op_savefree = 0; /* release */
a7aaec61 4522 return REPORT(next_type);
a7aaec61 4523 }
79072805 4524
02aa26ce 4525 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4526 when we get here, PL_bufptr is at the \
02aa26ce 4527 */
79072805
LW
4528 case LEX_INTERPCASEMOD:
4529#ifdef DEBUGGING
3280af22 4530 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4531 Perl_croak(aTHX_
4532 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4533 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4534#endif
02aa26ce 4535 /* handle \E or end of string */
3280af22 4536 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4537 /* if at a \E */
3280af22 4538 if (PL_lex_casemods) {
f54cb97a 4539 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4540 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4541
3792a11b 4542 if (PL_bufptr != PL_bufend
838f2281
BF
4543 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4544 || oldmod == 'F')) {
3280af22
NIS
4545 PL_bufptr += 2;
4546 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4547#ifdef PERL_MAD
4548 if (PL_madskills)
6b29d1f5 4549 PL_thistoken = newSVpvs("\\E");
5db06880 4550#endif
a0d0e21e 4551 }
78cdf107 4552 PL_lex_allbrackets--;
bbf60fe6 4553 return REPORT(')');
79072805 4554 }
52ed07f6
BF
4555 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4556 /* Got an unpaired \E */
4557 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4558 "Useless use of \\E");
52ed07f6 4559 }
5db06880
NC
4560#ifdef PERL_MAD
4561 while (PL_bufptr != PL_bufend &&
4562 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4563 if (!PL_thiswhite)
6b29d1f5 4564 PL_thiswhite = newSVpvs("");
cd81e915 4565 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4566 PL_bufptr += 2;
4567 }
4568#else
3280af22
NIS
4569 if (PL_bufptr != PL_bufend)
4570 PL_bufptr += 2;
5db06880 4571#endif
3280af22 4572 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4573 return yylex();
79072805
LW
4574 }
4575 else {
607df283 4576 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4577 "### Saw case modifier\n"); });
3280af22 4578 s = PL_bufptr + 1;
6e909404 4579 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4580#ifdef PERL_MAD
cd81e915 4581 if (!PL_thiswhite)
6b29d1f5 4582 PL_thiswhite = newSVpvs("");
cd81e915 4583 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4584#endif
89122651 4585 PL_bufptr = s + 3;
6e909404
JH
4586 PL_lex_state = LEX_INTERPCONCAT;
4587 return yylex();
a0d0e21e 4588 }
6e909404 4589 else {
90771dc0 4590 I32 tmp;
5db06880
NC
4591 if (!PL_madskills) /* when just compiling don't need correct */
4592 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4593 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4594 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4595 (strchr(PL_lex_casestack, 'L')
4596 || strchr(PL_lex_casestack, 'U')
4597 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4598 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4599 PL_lex_allbrackets--;
bbf60fe6 4600 return REPORT(')');
6e909404
JH
4601 }
4602 if (PL_lex_casemods > 10)
4603 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4604 PL_lex_casestack[PL_lex_casemods++] = *s;
4605 PL_lex_casestack[PL_lex_casemods] = '\0';
4606 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4607 start_force(PL_curforce);
9ded7720 4608 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4609 force_next((2<<24)|'(');
cd81e915 4610 start_force(PL_curforce);
6e909404 4611 if (*s == 'l')
9ded7720 4612 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4613 else if (*s == 'u')
9ded7720 4614 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4615 else if (*s == 'L')
9ded7720 4616 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4617 else if (*s == 'U')
9ded7720 4618 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4619 else if (*s == 'Q')
9ded7720 4620 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4621 else if (*s == 'F')
4622 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4623 else
5637ef5b 4624 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4625 if (PL_madskills) {
a5849ce5
NC
4626 SV* const tmpsv = newSVpvs("\\ ");
4627 /* replace the space with the character we want to escape
4628 */
4629 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4630 curmad('_', tmpsv);
4631 }
6e909404 4632 PL_bufptr = s + 1;
a0d0e21e 4633 }
79072805 4634 force_next(FUNC);
3280af22
NIS
4635 if (PL_lex_starts) {
4636 s = PL_bufptr;
4637 PL_lex_starts = 0;
5db06880
NC
4638#ifdef PERL_MAD
4639 if (PL_madskills) {
cd81e915
NC
4640 if (PL_thistoken)
4641 sv_free(PL_thistoken);
6b29d1f5 4642 PL_thistoken = newSVpvs("");
5db06880
NC
4643 }
4644#endif
131b3ad0
DM
4645 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4646 if (PL_lex_casemods == 1 && PL_lex_inpat)
4647 OPERATOR(',');
4648 else
4649 Aop(OP_CONCAT);
79072805
LW
4650 }
4651 else
cea2e8a9 4652 return yylex();
79072805
LW
4653 }
4654
55497cff 4655 case LEX_INTERPPUSH:
bbf60fe6 4656 return REPORT(sublex_push());
55497cff 4657
79072805 4658 case LEX_INTERPSTART:
3280af22 4659 if (PL_bufptr == PL_bufend)
bbf60fe6 4660 return REPORT(sublex_done());
9da1dd8f 4661 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
b6007c36 4662 "### Interpolated variable\n"); });
3280af22
NIS
4663 PL_expect = XTERM;
4664 PL_lex_dojoin = (*PL_bufptr == '@');
4665 PL_lex_state = LEX_INTERPNORMAL;
4666 if (PL_lex_dojoin) {
cd81e915 4667 start_force(PL_curforce);
9ded7720 4668 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4669 force_next(',');
cd81e915 4670 start_force(PL_curforce);
a0d0e21e 4671 force_ident("\"", '$');
cd81e915 4672 start_force(PL_curforce);
9ded7720 4673 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4674 force_next('$');
cd81e915 4675 start_force(PL_curforce);
9ded7720 4676 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4677 force_next((2<<24)|'(');
cd81e915 4678 start_force(PL_curforce);
9ded7720 4679 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4680 force_next(FUNC);
4681 }
9da1dd8f
DM
4682 /* Convert (?{...}) and friends to 'do {...}' */
4683 if (PL_lex_inpat && *PL_bufptr == '(') {
4684 PL_sublex_info.re_eval_start = PL_bufptr;
4685 PL_bufptr += 2;
4686 if (*PL_bufptr != '{')
4687 PL_bufptr++;
6165f85b
DM
4688 start_force(PL_curforce);
4689 /* XXX probably need a CURMAD(something) here */
9da1dd8f
DM
4690 PL_expect = XTERMBLOCK;
4691 force_next(DO);
4692 }
4693
3280af22
NIS
4694 if (PL_lex_starts++) {
4695 s = PL_bufptr;
5db06880
NC
4696#ifdef PERL_MAD
4697 if (PL_madskills) {
cd81e915
NC
4698 if (PL_thistoken)
4699 sv_free(PL_thistoken);
6b29d1f5 4700 PL_thistoken = newSVpvs("");
5db06880
NC
4701 }
4702#endif
131b3ad0
DM
4703 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4704 if (!PL_lex_casemods && PL_lex_inpat)
4705 OPERATOR(',');
4706 else
4707 Aop(OP_CONCAT);
79072805 4708 }
cea2e8a9 4709 return yylex();
79072805
LW
4710
4711 case LEX_INTERPENDMAYBE:
3280af22
NIS
4712 if (intuit_more(PL_bufptr)) {
4713 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4714 break;
4715 }
4716 /* FALL THROUGH */
4717
4718 case LEX_INTERPEND:
3280af22
NIS
4719 if (PL_lex_dojoin) {
4720 PL_lex_dojoin = FALSE;
4721 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4722#ifdef PERL_MAD
4723 if (PL_madskills) {
cd81e915
NC
4724 if (PL_thistoken)
4725 sv_free(PL_thistoken);
6b29d1f5 4726 PL_thistoken = newSVpvs("");
5db06880
NC
4727 }
4728#endif
78cdf107 4729 PL_lex_allbrackets--;
bbf60fe6 4730 return REPORT(')');
79072805 4731 }
43a16006 4732 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4733 && SvEVALED(PL_lex_repl))
43a16006 4734 {
e9fa98b2 4735 if (PL_bufptr != PL_bufend)
cea2e8a9 4736 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4737 PL_lex_repl = NULL;
e9fa98b2 4738 }
db444266
FC
4739 /* Paranoia. re_eval_start is adjusted when S_scan_heredoc sets
4740 re_eval_str. If the here-doc body’s length equals the previous
4741 value of re_eval_start, re_eval_start will now be null. So
4742 check re_eval_str as well. */
4743 if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
4744 SV *sv;
9da1dd8f
DM
4745 if (*PL_bufptr != ')')
4746 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4747 PL_bufptr++;
4748 /* having compiled a (?{..}) expression, return the original
4749 * text too, as a const */
db444266
FC
4750 if (PL_sublex_info.re_eval_str) {
4751 sv = PL_sublex_info.re_eval_str;
4752 PL_sublex_info.re_eval_str = NULL;
4753 SvCUR_set(sv, PL_bufptr - PL_sublex_info.re_eval_start);
4754 SvPV_shrink_to_cur(sv);
4755 }
4756 else sv = newSVpvn(PL_sublex_info.re_eval_start,
4757 PL_bufptr - PL_sublex_info.re_eval_start);
6165f85b
DM
4758 start_force(PL_curforce);
4759 /* XXX probably need a CURMAD(something) here */
4760 NEXTVAL_NEXTTOKE.opval =
9da1dd8f 4761 (OP*)newSVOP(OP_CONST, 0,
db444266 4762 sv);
9da1dd8f
DM
4763 force_next(THING);
4764 PL_sublex_info.re_eval_start = NULL;
4765 PL_expect = XTERM;
4766 return REPORT(',');
4767 }
4768
79072805
LW
4769 /* FALLTHROUGH */
4770 case LEX_INTERPCONCAT:
4771#ifdef DEBUGGING
3280af22 4772 if (PL_lex_brackets)
5637ef5b
NC
4773 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4774 (long) PL_lex_brackets);
79072805 4775#endif
3280af22 4776 if (PL_bufptr == PL_bufend)
bbf60fe6 4777 return REPORT(sublex_done());
79072805 4778
9da1dd8f
DM
4779 /* m'foo' still needs to be parsed for possible (?{...}) */
4780 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
3280af22 4781 SV *sv = newSVsv(PL_linestr);
9da1dd8f 4782 sv = tokeq(sv);
6154021b 4783 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4784 s = PL_bufend;
79072805
LW
4785 }
4786 else {
3280af22 4787 s = scan_const(PL_bufptr);
79072805 4788 if (*s == '\\')
3280af22 4789 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4790 else
3280af22 4791 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4792 }
4793
3280af22 4794 if (s != PL_bufptr) {
cd81e915 4795 start_force(PL_curforce);
5db06880
NC
4796 if (PL_madskills) {
4797 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4798 }
6154021b 4799 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4800 PL_expect = XTERM;
79072805 4801 force_next(THING);
131b3ad0 4802 if (PL_lex_starts++) {
5db06880
NC
4803#ifdef PERL_MAD
4804 if (PL_madskills) {
cd81e915
NC
4805 if (PL_thistoken)
4806 sv_free(PL_thistoken);
6b29d1f5 4807 PL_thistoken = newSVpvs("");
5db06880
NC
4808 }
4809#endif
131b3ad0
DM
4810 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4811 if (!PL_lex_casemods && PL_lex_inpat)
4812 OPERATOR(',');
4813 else
4814 Aop(OP_CONCAT);
4815 }
79072805 4816 else {
3280af22 4817 PL_bufptr = s;
cea2e8a9 4818 return yylex();
79072805
LW
4819 }
4820 }
4821
cea2e8a9 4822 return yylex();
a0d0e21e 4823 case LEX_FORMLINE:
3280af22
NIS
4824 s = scan_formline(PL_bufptr);
4825 if (!PL_lex_formbrack)
7c70caa5 4826 {
705fe0e5 4827 formbrack = 1;
a0d0e21e 4828 goto rightbracket;
7c70caa5 4829 }
705fe0e5
FC
4830 PL_bufptr = s;
4831 return yylex();
79072805
LW
4832 }
4833
3280af22
NIS
4834 s = PL_bufptr;
4835 PL_oldoldbufptr = PL_oldbufptr;
4836 PL_oldbufptr = s;
463ee0b2
LW
4837
4838 retry:
5db06880 4839#ifdef PERL_MAD
cd81e915
NC
4840 if (PL_thistoken) {
4841 sv_free(PL_thistoken);
4842 PL_thistoken = 0;
5db06880 4843 }
cd81e915 4844 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4845#endif
378cc40b
LW
4846 switch (*s) {
4847 default:
7e2040f0 4848 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4849 goto keylookup;
b1fc3636 4850 {
e2f06df0
BF
4851 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4852 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4853 UTF8SKIP(s),
4854 SVs_TEMP | SVf_UTF8),
4855 10, UNI_DISPLAY_ISPRINT))
4856 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
b1fc3636
CJ
4857 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4858 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4859 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4860 } else {
4861 d = PL_linestart;
4862 }
4863 *s = '\0';
e2f06df0
BF
4864 sv_setpv(dsv, d);
4865 if (UTF)
4866 SvUTF8_on(dsv);
4867 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
b1fc3636 4868 }
e929a76b
LW
4869 case 4:
4870 case 26:
4871 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4872 case 0:
5db06880
NC
4873#ifdef PERL_MAD
4874 if (PL_madskills)
cd81e915 4875 PL_faketokens = 0;
5db06880 4876#endif
60d63348 4877 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
4878 PL_last_uni = 0;
4879 PL_last_lop = 0;
a7aaec61
Z
4880 if (PL_lex_brackets &&
4881 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4882 yyerror((const char *)
4883 (PL_lex_formbrack
4884 ? "Format not terminated"
4885 : "Missing right curly or square bracket"));
c5ee2135 4886 }
4e553d73 4887 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4888 "### Tokener got EOF\n");
5f80b19c 4889 } );
79072805 4890 TOKEN(0);
463ee0b2 4891 }
3280af22 4892 if (s++ < PL_bufend)
a687059c 4893 goto retry; /* ignore stray nulls */
3280af22
NIS
4894 PL_last_uni = 0;
4895 PL_last_lop = 0;
4896 if (!PL_in_eval && !PL_preambled) {
4897 PL_preambled = TRUE;
5db06880
NC
4898#ifdef PERL_MAD
4899 if (PL_madskills)
cd81e915 4900 PL_faketokens = 1;
5db06880 4901#endif
5ab7ff98
NC
4902 if (PL_perldb) {
4903 /* Generate a string of Perl code to load the debugger.
4904 * If PERL5DB is set, it will return the contents of that,
4905 * otherwise a compile-time require of perl5db.pl. */
4906
4907 const char * const pdb = PerlEnv_getenv("PERL5DB");
4908
4909 if (pdb) {
4910 sv_setpv(PL_linestr, pdb);
4911 sv_catpvs(PL_linestr,";");
4912 } else {
4913 SETERRNO(0,SS_NORMAL);
4914 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4915 }
4916 } else
4917 sv_setpvs(PL_linestr,"");
c62eb204
NC
4918 if (PL_preambleav) {
4919 SV **svp = AvARRAY(PL_preambleav);
4920 SV **const end = svp + AvFILLp(PL_preambleav);
4921 while(svp <= end) {
4922 sv_catsv(PL_linestr, *svp);
4923 ++svp;
396482e1 4924 sv_catpvs(PL_linestr, ";");
91b7def8 4925 }
daba3364 4926 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4927 PL_preambleav = NULL;
91b7def8 4928 }
9f639728
FR
4929 if (PL_minus_E)
4930 sv_catpvs(PL_linestr,
4931 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4932 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4933 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4934 if (PL_minus_l)
396482e1 4935 sv_catpvs(PL_linestr,"chomp;");
3280af22 4936 if (PL_minus_a) {
3280af22 4937 if (PL_minus_F) {
3792a11b
NC
4938 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4939 || *PL_splitstr == '"')
3280af22 4940 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4941 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4942 else {
c8ef6a4b
NC
4943 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4944 bytes can be used as quoting characters. :-) */
dd374669 4945 const char *splits = PL_splitstr;
91d456ae 4946 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4947 do {
4948 /* Need to \ \s */
dd374669
AL
4949 if (*splits == '\\')
4950 sv_catpvn(PL_linestr, splits, 1);
4951 sv_catpvn(PL_linestr, splits, 1);
4952 } while (*splits++);
48c4c863
NC
4953 /* This loop will embed the trailing NUL of
4954 PL_linestr as the last thing it does before
4955 terminating. */
396482e1 4956 sv_catpvs(PL_linestr, ");");
54310121 4957 }
2304df62
AD
4958 }
4959 else
396482e1 4960 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4961 }
79072805 4962 }
396482e1 4963 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4964 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4965 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4966 PL_last_lop = PL_last_uni = NULL;
65269a95 4967 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4968 update_debugger_info(PL_linestr, NULL, 0);
79072805 4969 goto retry;
a687059c 4970 }
e929a76b 4971 do {
580561a3
Z
4972 fake_eof = 0;
4973 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4974 if (0) {
7e28d3af 4975 fake_eof:
f0e67a1d
Z
4976 fake_eof = LEX_FAKE_EOF;
4977 }
4978 PL_bufptr = PL_bufend;
17cc9359 4979 CopLINE_inc(PL_curcop);
f0e67a1d 4980 if (!lex_next_chunk(fake_eof)) {
17cc9359 4981 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4982 s = PL_bufptr;
4983 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4984 }
17cc9359 4985 CopLINE_dec(PL_curcop);
5db06880 4986#ifdef PERL_MAD
f0e67a1d 4987 if (!PL_rsfp)
cd81e915 4988 PL_realtokenstart = -1;
5db06880 4989#endif
f0e67a1d 4990 s = PL_bufptr;
7aa207d6
JH
4991 /* If it looks like the start of a BOM or raw UTF-16,
4992 * check if it in fact is. */
580561a3 4993 if (bof && PL_rsfp &&
7aa207d6
JH
4994 (*s == 0 ||
4995 *(U8*)s == 0xEF ||
4996 *(U8*)s >= 0xFE ||
4997 s[1] == 0)) {
879bc93b
DM
4998 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4999 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
5000#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
5001 /* offset may include swallowed CR */
5002 if (!bof)
879bc93b 5003 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 5004#endif
7e28d3af 5005 if (bof) {
3280af22 5006 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 5007 s = swallow_bom((U8*)s);
e929a76b 5008 }
378cc40b 5009 }
737c24fc 5010 if (PL_parser->in_pod) {
a0d0e21e 5011 /* Incest with pod. */
5db06880
NC
5012#ifdef PERL_MAD
5013 if (PL_madskills)
cd81e915 5014 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 5015#endif
01a57ef7 5016 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 5017 sv_setpvs(PL_linestr, "");
3280af22
NIS
5018 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5019 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5020 PL_last_lop = PL_last_uni = NULL;
737c24fc 5021 PL_parser->in_pod = 0;
a0d0e21e 5022 }
4e553d73 5023 }
60d63348 5024 if (PL_rsfp || PL_parser->filtered)
85613cab 5025 incline(s);
737c24fc 5026 } while (PL_parser->in_pod);
3280af22 5027 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 5028 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5029 PL_last_lop = PL_last_uni = NULL;
57843af0 5030 if (CopLINE(PL_curcop) == 1) {
3280af22 5031 while (s < PL_bufend && isSPACE(*s))
79072805 5032 s++;
a0d0e21e 5033 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 5034 s++;
5db06880
NC
5035#ifdef PERL_MAD
5036 if (PL_madskills)
cd81e915 5037 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 5038#endif
bd61b366 5039 d = NULL;
3280af22 5040 if (!PL_in_eval) {
44a8e56a 5041 if (*s == '#' && *(s+1) == '!')
5042 d = s + 2;
5043#ifdef ALTERNATE_SHEBANG
5044 else {
bfed75c6 5045 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 5046 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
5047 d = s + (sizeof(as) - 1);
5048 }
5049#endif /* ALTERNATE_SHEBANG */
5050 }
5051 if (d) {
b8378b72 5052 char *ipath;
774d564b 5053 char *ipathend;
b8378b72 5054
774d564b 5055 while (isSPACE(*d))
b8378b72
CS
5056 d++;
5057 ipath = d;
774d564b 5058 while (*d && !isSPACE(*d))
5059 d++;
5060 ipathend = d;
5061
5062#ifdef ARG_ZERO_IS_SCRIPT
5063 if (ipathend > ipath) {
5064 /*
5065 * HP-UX (at least) sets argv[0] to the script name,
5066 * which makes $^X incorrect. And Digital UNIX and Linux,
5067 * at least, set argv[0] to the basename of the Perl
5068 * interpreter. So, having found "#!", we'll set it right.
5069 */
fafc274c
NC
5070 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5071 SVt_PV)); /* $^X */
774d564b 5072 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 5073 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 5074 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 5075 SvSETMAGIC(x);
5076 }
556c1dec
JH
5077 else {
5078 STRLEN blen;
5079 STRLEN llen;
cfd0369c 5080 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 5081 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
5082 if (llen < blen) {
5083 bstart += blen - llen;
5084 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5085 sv_setpvn(x, ipath, ipathend - ipath);
5086 SvSETMAGIC(x);
5087 }
5088 }
5089 }
774d564b 5090 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 5091 }
774d564b 5092#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
5093
5094 /*
5095 * Look for options.
5096 */
748a9306 5097 d = instr(s,"perl -");
84e30d1a 5098 if (!d) {
748a9306 5099 d = instr(s,"perl");
84e30d1a
GS
5100#if defined(DOSISH)
5101 /* avoid getting into infinite loops when shebang
5102 * line contains "Perl" rather than "perl" */
5103 if (!d) {
5104 for (d = ipathend-4; d >= ipath; --d) {
5105 if ((*d == 'p' || *d == 'P')
5106 && !ibcmp(d, "perl", 4))
5107 {
5108 break;
5109 }
5110 }
5111 if (d < ipath)
bd61b366 5112 d = NULL;
84e30d1a
GS
5113 }
5114#endif
5115 }
44a8e56a 5116#ifdef ALTERNATE_SHEBANG
5117 /*
5118 * If the ALTERNATE_SHEBANG on this system starts with a
5119 * character that can be part of a Perl expression, then if
5120 * we see it but not "perl", we're probably looking at the
5121 * start of Perl code, not a request to hand off to some
5122 * other interpreter. Similarly, if "perl" is there, but
5123 * not in the first 'word' of the line, we assume the line
5124 * contains the start of the Perl program.
44a8e56a 5125 */
5126 if (d && *s != '#') {
f54cb97a 5127 const char *c = ipath;
44a8e56a 5128 while (*c && !strchr("; \t\r\n\f\v#", *c))
5129 c++;
5130 if (c < d)
bd61b366 5131 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5132 else
5133 *s = '#'; /* Don't try to parse shebang line */
5134 }
774d564b 5135#endif /* ALTERNATE_SHEBANG */
748a9306 5136 if (!d &&
44a8e56a 5137 *s == '#' &&
774d564b 5138 ipathend > ipath &&
3280af22 5139 !PL_minus_c &&
748a9306 5140 !instr(s,"indir") &&
3280af22 5141 instr(PL_origargv[0],"perl"))
748a9306 5142 {
27da23d5 5143 dVAR;
9f68db38 5144 char **newargv;
9f68db38 5145
774d564b 5146 *ipathend = '\0';
5147 s = ipathend + 1;
3280af22 5148 while (s < PL_bufend && isSPACE(*s))
9f68db38 5149 s++;
3280af22 5150 if (s < PL_bufend) {
d85f917e 5151 Newx(newargv,PL_origargc+3,char*);
9f68db38 5152 newargv[1] = s;
3280af22 5153 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5154 s++;
5155 *s = '\0';
3280af22 5156 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5157 }
5158 else
3280af22 5159 newargv = PL_origargv;
774d564b 5160 newargv[0] = ipath;
b35112e7 5161 PERL_FPU_PRE_EXEC
b4748376 5162 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5163 PERL_FPU_POST_EXEC
cea2e8a9 5164 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5165 }
748a9306 5166 if (d) {
c35e046a
AL
5167 while (*d && !isSPACE(*d))
5168 d++;
5169 while (SPACE_OR_TAB(*d))
5170 d++;
748a9306
LW
5171
5172 if (*d++ == '-') {
f54cb97a 5173 const bool switches_done = PL_doswitches;
fb993905
GA
5174 const U32 oldpdb = PL_perldb;
5175 const bool oldn = PL_minus_n;
5176 const bool oldp = PL_minus_p;
c7030b81 5177 const char *d1 = d;
fb993905 5178
8cc95fdb 5179 do {
4ba71d51
FC
5180 bool baduni = FALSE;
5181 if (*d1 == 'C') {
bd0ab00d
NC
5182 const char *d2 = d1 + 1;
5183 if (parse_unicode_opts((const char **)&d2)
5184 != PL_unicode)
5185 baduni = TRUE;
4ba71d51
FC
5186 }
5187 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5188 const char * const m = d1;
5189 while (*d1 && !isSPACE(*d1))
5190 d1++;
cea2e8a9 5191 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5192 (int)(d1 - m), m);
8cc95fdb 5193 }
c7030b81
NC
5194 d1 = moreswitches(d1);
5195 } while (d1);
f0b2cf55
YST
5196 if (PL_doswitches && !switches_done) {
5197 int argc = PL_origargc;
5198 char **argv = PL_origargv;
5199 do {
5200 argc--,argv++;
5201 } while (argc && argv[0][0] == '-' && argv[0][1]);
5202 init_argv_symbols(argc,argv);
5203 }
65269a95 5204 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5205 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5206 /* if we have already added "LINE: while (<>) {",
5207 we must not do it again */
748a9306 5208 {
76f68e9b 5209 sv_setpvs(PL_linestr, "");
3280af22
NIS
5210 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5211 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5212 PL_last_lop = PL_last_uni = NULL;
3280af22 5213 PL_preambled = FALSE;
65269a95 5214 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5215 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5216 goto retry;
5217 }
a0d0e21e 5218 }
79072805 5219 }
9f68db38 5220 }
79072805 5221 }
3280af22 5222 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5223 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5224 start_force(PL_curforce);
5225 NEXTVAL_NEXTTOKE.ival = 0;
5226 force_next(FORMRBRACK);
5227 TOKEN(';');
ae986130 5228 }
378cc40b 5229 goto retry;
4fdae800 5230 case '\r':
6a27c188 5231#ifdef PERL_STRICT_CR
cea2e8a9 5232 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5233 Perl_croak(aTHX_
cc507455 5234 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5235#endif
4fdae800 5236 case ' ': case '\t': case '\f': case 013:
5db06880 5237#ifdef PERL_MAD
cd81e915 5238 PL_realtokenstart = -1;
ac372eb8
RD
5239 if (!PL_thiswhite)
5240 PL_thiswhite = newSVpvs("");
5241 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5242#endif
ac372eb8 5243 s++;
378cc40b 5244 goto retry;
378cc40b 5245 case '#':
e929a76b 5246 case '\n':
5db06880 5247#ifdef PERL_MAD
cd81e915 5248 PL_realtokenstart = -1;
5db06880 5249 if (PL_madskills)
cd81e915 5250 PL_faketokens = 0;
5db06880 5251#endif
60d63348
FC
5252 if (PL_lex_state != LEX_NORMAL ||
5253 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5254 if (*s == '#' && s == PL_linestart && PL_in_eval
5255 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5256 /* handle eval qq[#line 1 "foo"\n ...] */
5257 CopLINE_dec(PL_curcop);
5258 incline(s);
5259 }
5db06880
NC
5260 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5261 s = SKIPSPACE0(s);
60d63348 5262 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5263 incline(s);
5264 }
5265 else {
5266 d = s;
5267 while (d < PL_bufend && *d != '\n')
5268 d++;
5269 if (d < PL_bufend)
5270 d++;
5271 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5272 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5273 d, PL_bufend);
5db06880
NC
5274#ifdef PERL_MAD
5275 if (PL_madskills)
cd81e915 5276 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5277#endif
5278 s = d;
5279 incline(s);
5280 }
3280af22 5281 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
3280af22 5282 PL_lex_state = LEX_FORMLINE;
705fe0e5
FC
5283 start_force(PL_curforce);
5284 NEXTVAL_NEXTTOKE.ival = 0;
5285 force_next(FORMRBRACK);
5286 TOKEN(';');
a687059c 5287 }
378cc40b 5288 }
a687059c 5289 else {
5db06880
NC
5290#ifdef PERL_MAD
5291 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5292 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5293 PL_faketokens = 0;
5db06880
NC
5294 s = SKIPSPACE0(s);
5295 TOKEN(PEG); /* make sure any #! line is accessible */
5296 }
5297 s = SKIPSPACE0(s);
5298 }
5299 else {
5300/* if (PL_madskills && PL_lex_formbrack) { */
5301 d = s;
5302 while (d < PL_bufend && *d != '\n')
5303 d++;
5304 if (d < PL_bufend)
5305 d++;
5306 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5307 Perl_croak(aTHX_ "panic: input overflow");
5308 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5309 if (!PL_thiswhite)
6b29d1f5 5310 PL_thiswhite = newSVpvs("");
5db06880 5311 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5312 sv_setpvs(PL_thiswhite, "");
cd81e915 5313 PL_faketokens = 0;
5db06880 5314 }
cd81e915 5315 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5316 }
5317 s = d;
5318/* }
5319 *s = '\0';
5320 PL_bufend = s; */
5321 }
5322#else
378cc40b 5323 *s = '\0';
3280af22 5324 PL_bufend = s;
5db06880 5325#endif
a687059c 5326 }
378cc40b
LW
5327 goto retry;
5328 case '-':
79072805 5329 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5330 I32 ftst = 0;
90771dc0 5331 char tmp;
e5edeb50 5332
378cc40b 5333 s++;
3280af22 5334 PL_bufptr = s;
748a9306
LW
5335 tmp = *s++;
5336
bf4acbe4 5337 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5338 s++;
5339
5340 if (strnEQ(s,"=>",2)) {
3280af22 5341 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5342 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5343 OPERATOR('-'); /* unary minus */
5344 }
3280af22 5345 PL_last_uni = PL_oldbufptr;
748a9306 5346 switch (tmp) {
e5edeb50
JH
5347 case 'r': ftst = OP_FTEREAD; break;
5348 case 'w': ftst = OP_FTEWRITE; break;
5349 case 'x': ftst = OP_FTEEXEC; break;
5350 case 'o': ftst = OP_FTEOWNED; break;
5351 case 'R': ftst = OP_FTRREAD; break;
5352 case 'W': ftst = OP_FTRWRITE; break;
5353 case 'X': ftst = OP_FTREXEC; break;
5354 case 'O': ftst = OP_FTROWNED; break;
5355 case 'e': ftst = OP_FTIS; break;
5356 case 'z': ftst = OP_FTZERO; break;
5357 case 's': ftst = OP_FTSIZE; break;
5358 case 'f': ftst = OP_FTFILE; break;
5359 case 'd': ftst = OP_FTDIR; break;
5360 case 'l': ftst = OP_FTLINK; break;
5361 case 'p': ftst = OP_FTPIPE; break;
5362 case 'S': ftst = OP_FTSOCK; break;
5363 case 'u': ftst = OP_FTSUID; break;
5364 case 'g': ftst = OP_FTSGID; break;
5365 case 'k': ftst = OP_FTSVTX; break;
5366 case 'b': ftst = OP_FTBLK; break;
5367 case 'c': ftst = OP_FTCHR; break;
5368 case 't': ftst = OP_FTTTY; break;
5369 case 'T': ftst = OP_FTTEXT; break;
5370 case 'B': ftst = OP_FTBINARY; break;
5371 case 'M': case 'A': case 'C':
fafc274c 5372 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5373 switch (tmp) {
5374 case 'M': ftst = OP_FTMTIME; break;
5375 case 'A': ftst = OP_FTATIME; break;
5376 case 'C': ftst = OP_FTCTIME; break;
5377 default: break;
5378 }
5379 break;
378cc40b 5380 default:
378cc40b
LW
5381 break;
5382 }
e5edeb50 5383 if (ftst) {
eb160463 5384 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5385 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5386 "### Saw file test %c\n", (int)tmp);
5f80b19c 5387 } );
e5edeb50
JH
5388 FTST(ftst);
5389 }
5390 else {
5391 /* Assume it was a minus followed by a one-letter named
5392 * subroutine call (or a -bareword), then. */
95c31fe3 5393 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5394 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5395 (int) tmp);
5f80b19c 5396 } );
3cf7b4c4 5397 s = --PL_bufptr;
e5edeb50 5398 }
378cc40b 5399 }
90771dc0
NC
5400 {
5401 const char tmp = *s++;
5402 if (*s == tmp) {
5403 s++;
5404 if (PL_expect == XOPERATOR)
5405 TERM(POSTDEC);
5406 else
5407 OPERATOR(PREDEC);
5408 }
5409 else if (*s == '>') {
5410 s++;
29595ff2 5411 s = SKIPSPACE1(s);
90771dc0
NC
5412 if (isIDFIRST_lazy_if(s,UTF)) {
5413 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5414 TOKEN(ARROW);
5415 }
5416 else if (*s == '$')
5417 OPERATOR(ARROW);
5418 else
5419 TERM(ARROW);
5420 }
78cdf107
Z
5421 if (PL_expect == XOPERATOR) {
5422 if (*s == '=' && !PL_lex_allbrackets &&
5423 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5424 s--;
5425 TOKEN(0);
5426 }
90771dc0 5427 Aop(OP_SUBTRACT);
78cdf107 5428 }
90771dc0
NC
5429 else {
5430 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5431 check_uni();
5432 OPERATOR('-'); /* unary minus */
79072805 5433 }
2f3197b3 5434 }
79072805 5435
378cc40b 5436 case '+':
90771dc0
NC
5437 {
5438 const char tmp = *s++;
5439 if (*s == tmp) {
5440 s++;
5441 if (PL_expect == XOPERATOR)
5442 TERM(POSTINC);
5443 else
5444 OPERATOR(PREINC);
5445 }
78cdf107
Z
5446 if (PL_expect == XOPERATOR) {
5447 if (*s == '=' && !PL_lex_allbrackets &&
5448 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5449 s--;
5450 TOKEN(0);
5451 }
90771dc0 5452 Aop(OP_ADD);
78cdf107 5453 }
90771dc0
NC
5454 else {
5455 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5456 check_uni();
5457 OPERATOR('+');
5458 }
2f3197b3 5459 }
a687059c 5460
378cc40b 5461 case '*':
3280af22
NIS
5462 if (PL_expect != XOPERATOR) {
5463 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5464 PL_expect = XOPERATOR;
5465 force_ident(PL_tokenbuf, '*');
5466 if (!*PL_tokenbuf)
a0d0e21e 5467 PREREF('*');
79072805 5468 TERM('*');
a687059c 5469 }
79072805
LW
5470 s++;
5471 if (*s == '*') {
a687059c 5472 s++;
78cdf107
Z
5473 if (*s == '=' && !PL_lex_allbrackets &&
5474 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5475 s -= 2;
5476 TOKEN(0);
5477 }
79072805 5478 PWop(OP_POW);
a687059c 5479 }
78cdf107
Z
5480 if (*s == '=' && !PL_lex_allbrackets &&
5481 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5482 s--;
5483 TOKEN(0);
5484 }
79072805
LW
5485 Mop(OP_MULTIPLY);
5486
378cc40b 5487 case '%':
3280af22 5488 if (PL_expect == XOPERATOR) {
78cdf107
Z
5489 if (s[1] == '=' && !PL_lex_allbrackets &&
5490 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5491 TOKEN(0);
bbce6d69 5492 ++s;
5493 Mop(OP_MODULO);
a687059c 5494 }
3280af22 5495 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5496 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5497 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5498 if (!PL_tokenbuf[1]) {
bbce6d69 5499 PREREF('%');
a687059c 5500 }
3280af22 5501 PL_pending_ident = '%';
bbce6d69 5502 TERM('%');
a687059c 5503
378cc40b 5504 case '^':
78cdf107
Z
5505 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5506 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5507 TOKEN(0);
79072805 5508 s++;
a0d0e21e 5509 BOop(OP_BIT_XOR);
79072805 5510 case '[':
a7aaec61
Z
5511 if (PL_lex_brackets > 100)
5512 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5513 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5514 PL_lex_allbrackets++;
df3467db
IG
5515 {
5516 const char tmp = *s++;
5517 OPERATOR(tmp);
5518 }
378cc40b 5519 case '~':
0d863452 5520 if (s[1] == '~'
3e7dd34d 5521 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5522 {
78cdf107
Z
5523 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5524 TOKEN(0);
0d863452
RH
5525 s += 2;
5526 Eop(OP_SMARTMATCH);
5527 }
78cdf107
Z
5528 s++;
5529 OPERATOR('~');
378cc40b 5530 case ',':
78cdf107
Z
5531 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5532 TOKEN(0);
5533 s++;
5534 OPERATOR(',');
a0d0e21e
LW
5535 case ':':
5536 if (s[1] == ':') {
5537 len = 0;
0bfa2a8a 5538 goto just_a_word_zero_gv;
a0d0e21e
LW
5539 }
5540 s++;
09bef843
SB
5541 switch (PL_expect) {
5542 OP *attrs;
5db06880
NC
5543#ifdef PERL_MAD
5544 I32 stuffstart;
5545#endif
09bef843
SB
5546 case XOPERATOR:
5547 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5548 break;
5549 PL_bufptr = s; /* update in case we back off */
d83f38d8 5550 if (*s == '=') {
2dc78664
NC
5551 Perl_croak(aTHX_
5552 "Use of := for an empty attribute list is not allowed");
d83f38d8 5553 }
09bef843
SB
5554 goto grabattrs;
5555 case XATTRBLOCK:
5556 PL_expect = XBLOCK;
5557 goto grabattrs;
5558 case XATTRTERM:
5559 PL_expect = XTERMBLOCK;
5560 grabattrs:
5db06880
NC
5561#ifdef PERL_MAD
5562 stuffstart = s - SvPVX(PL_linestr) - 1;
5563#endif
29595ff2 5564 s = PEEKSPACE(s);
5f66b61c 5565 attrs = NULL;
7e2040f0 5566 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5567 I32 tmp;
5cc237b8 5568 SV *sv;
09bef843 5569 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5570 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5571 if (tmp < 0) tmp = -tmp;
5572 switch (tmp) {
5573 case KEY_or:
5574 case KEY_and:
5575 case KEY_for:
11baf631 5576 case KEY_foreach:
f9829d6b
GS
5577 case KEY_unless:
5578 case KEY_if:
5579 case KEY_while:
5580 case KEY_until:
5581 goto got_attrs;
5582 default:
5583 break;
5584 }
5585 }
89a5757c 5586 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843 5587 if (*d == '(') {
d24ca0c5 5588 d = scan_str(d,TRUE,TRUE,FALSE);
09bef843 5589 if (!d) {
09bef843
SB
5590 /* MUST advance bufptr here to avoid bogus
5591 "at end of line" context messages from yyerror().
5592 */
5593 PL_bufptr = s + len;
5594 yyerror("Unterminated attribute parameter in attribute list");
5595 if (attrs)
5596 op_free(attrs);
5cc237b8 5597 sv_free(sv);
bbf60fe6 5598 return REPORT(0); /* EOF indicator */
09bef843
SB
5599 }
5600 }
5601 if (PL_lex_stuff) {
09bef843 5602 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5603 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5604 newSVOP(OP_CONST, 0, sv));
5605 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5606 PL_lex_stuff = NULL;
09bef843
SB
5607 }
5608 else {
5cc237b8
BS
5609 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5610 sv_free(sv);
1108974d 5611 if (PL_in_my == KEY_our) {
df9a6019 5612 deprecate(":unique");
1108974d 5613 }
bfed75c6 5614 else
371fce9b
DM
5615 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5616 }
5617
d3cea301
SB
5618 /* NOTE: any CV attrs applied here need to be part of
5619 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5620 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5621 sv_free(sv);
78f9721b 5622 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5623 }
5624 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5625 sv_free(sv);
8e5dadda 5626 deprecate(":locked");
5cc237b8
BS
5627 }
5628 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5629 sv_free(sv);
78f9721b 5630 CvMETHOD_on(PL_compcv);
5cc237b8 5631 }
78f9721b
SM
5632 /* After we've set the flags, it could be argued that
5633 we don't need to do the attributes.pm-based setting
5634 process, and shouldn't bother appending recognized
d3cea301
SB
5635 flags. To experiment with that, uncomment the
5636 following "else". (Note that's already been
5637 uncommented. That keeps the above-applied built-in
5638 attributes from being intercepted (and possibly
5639 rejected) by a package's attribute routines, but is
5640 justified by the performance win for the common case
5641 of applying only built-in attributes.) */
0256094b 5642 else
2fcb4757 5643 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5644 newSVOP(OP_CONST, 0,
5cc237b8 5645 sv));
09bef843 5646 }
29595ff2 5647 s = PEEKSPACE(d);
0120eecf 5648 if (*s == ':' && s[1] != ':')
29595ff2 5649 s = PEEKSPACE(s+1);
0120eecf
GS
5650 else if (s == d)
5651 break; /* require real whitespace or :'s */
29595ff2 5652 /* XXX losing whitespace on sequential attributes here */
09bef843 5653 }
90771dc0
NC
5654 {
5655 const char tmp
5656 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5657 if (*s != ';' && *s != '}' && *s != tmp
5658 && (tmp != '=' || *s != ')')) {
5659 const char q = ((*s == '\'') ? '"' : '\'');
5660 /* If here for an expression, and parsed no attrs, back
5661 off. */
5662 if (tmp == '=' && !attrs) {
5663 s = PL_bufptr;
5664 break;
5665 }
5666 /* MUST advance bufptr here to avoid bogus "at end of line"
5667 context messages from yyerror().
5668 */
5669 PL_bufptr = s;
10edeb5d
JH
5670 yyerror( (const char *)
5671 (*s
5672 ? Perl_form(aTHX_ "Invalid separator character "
5673 "%c%c%c in attribute list", q, *s, q)
5674 : "Unterminated attribute list" ) );
90771dc0
NC
5675 if (attrs)
5676 op_free(attrs);
5677 OPERATOR(':');
09bef843 5678 }
09bef843 5679 }
f9829d6b 5680 got_attrs:
09bef843 5681 if (attrs) {
cd81e915 5682 start_force(PL_curforce);
9ded7720 5683 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5684 CURMAD('_', PL_nextwhite);
89122651 5685 force_next(THING);
5db06880
NC
5686 }
5687#ifdef PERL_MAD
5688 if (PL_madskills) {
cd81e915 5689 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5690 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5691 }
5db06880 5692#endif
09bef843
SB
5693 TOKEN(COLONATTR);
5694 }
78cdf107
Z
5695 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5696 s--;
5697 TOKEN(0);
5698 }
5699 PL_lex_allbrackets--;
a0d0e21e 5700 OPERATOR(':');
8990e307
LW
5701 case '(':
5702 s++;
3280af22
NIS
5703 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5704 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5705 else
3280af22 5706 PL_expect = XTERM;
29595ff2 5707 s = SKIPSPACE1(s);
78cdf107 5708 PL_lex_allbrackets++;
a0d0e21e 5709 TOKEN('(');
378cc40b 5710 case ';':
78cdf107
Z
5711 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5712 TOKEN(0);
f4dd75d9 5713 CLINE;
78cdf107
Z
5714 s++;
5715 OPERATOR(';');
378cc40b 5716 case ')':
78cdf107
Z
5717 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5718 TOKEN(0);
5719 s++;
5720 PL_lex_allbrackets--;
5721 s = SKIPSPACE1(s);
5722 if (*s == '{')
5723 PREBLOCK(')');
5724 TERM(')');
79072805 5725 case ']':
a7aaec61
Z
5726 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5727 TOKEN(0);
79072805 5728 s++;
3280af22 5729 if (PL_lex_brackets <= 0)
d98d5fff 5730 yyerror("Unmatched right square bracket");
463ee0b2 5731 else
3280af22 5732 --PL_lex_brackets;
78cdf107 5733 PL_lex_allbrackets--;
3280af22
NIS
5734 if (PL_lex_state == LEX_INTERPNORMAL) {
5735 if (PL_lex_brackets == 0) {
02255c60
FC
5736 if (*s == '-' && s[1] == '>')
5737 PL_lex_state = LEX_INTERPENDMAYBE;
5738 else if (*s != '[' && *s != '{')
3280af22 5739 PL_lex_state = LEX_INTERPEND;
79072805
LW
5740 }
5741 }
4633a7c4 5742 TERM(']');
79072805 5743 case '{':
79072805 5744 s++;
eaf6a13d 5745 leftbracket:
3280af22 5746 if (PL_lex_brackets > 100) {
8edd5f42 5747 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5748 }
3280af22 5749 switch (PL_expect) {
a0d0e21e 5750 case XTERM:
3280af22
NIS
5751 if (PL_oldoldbufptr == PL_last_lop)
5752 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5753 else
3280af22 5754 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5755 PL_lex_allbrackets++;
79072805 5756 OPERATOR(HASHBRACK);
a0d0e21e 5757 case XOPERATOR:
bf4acbe4 5758 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5759 s++;
44a8e56a 5760 d = s;
3280af22
NIS
5761 PL_tokenbuf[0] = '\0';
5762 if (d < PL_bufend && *d == '-') {
5763 PL_tokenbuf[0] = '-';
44a8e56a 5764 d++;
bf4acbe4 5765 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5766 d++;
5767 }
7e2040f0 5768 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5769 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5770 FALSE, &len);
bf4acbe4 5771 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5772 d++;
5773 if (*d == '}') {
f54cb97a 5774 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5775 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5776 if (minus)
5777 force_next('-');
748a9306
LW
5778 }
5779 }
5780 /* FALL THROUGH */
09bef843 5781 case XATTRBLOCK:
748a9306 5782 case XBLOCK:
3280af22 5783 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5784 PL_lex_allbrackets++;
3280af22 5785 PL_expect = XSTATE;
a0d0e21e 5786 break;
09bef843 5787 case XATTRTERM:
a0d0e21e 5788 case XTERMBLOCK:
3280af22 5789 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5790 PL_lex_allbrackets++;
3280af22 5791 PL_expect = XSTATE;
a0d0e21e
LW
5792 break;
5793 default: {
f54cb97a 5794 const char *t;
3280af22
NIS
5795 if (PL_oldoldbufptr == PL_last_lop)
5796 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5797 else
3280af22 5798 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5799 PL_lex_allbrackets++;
29595ff2 5800 s = SKIPSPACE1(s);
8452ff4b
SB
5801 if (*s == '}') {
5802 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5803 PL_expect = XTERM;
5804 /* This hack is to get the ${} in the message. */
5805 PL_bufptr = s+1;
5806 yyerror("syntax error");
5807 break;
5808 }
a0d0e21e 5809 OPERATOR(HASHBRACK);
8452ff4b 5810 }
b8a4b1be
GS
5811 /* This hack serves to disambiguate a pair of curlies
5812 * as being a block or an anon hash. Normally, expectation
5813 * determines that, but in cases where we're not in a
5814 * position to expect anything in particular (like inside
5815 * eval"") we have to resolve the ambiguity. This code
5816 * covers the case where the first term in the curlies is a
5817 * quoted string. Most other cases need to be explicitly
a0288114 5818 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5819 * curly in order to force resolution as an anon hash.
5820 *
5821 * XXX should probably propagate the outer expectation
5822 * into eval"" to rely less on this hack, but that could
5823 * potentially break current behavior of eval"".
5824 * GSAR 97-07-21
5825 */
5826 t = s;
5827 if (*s == '\'' || *s == '"' || *s == '`') {
5828 /* common case: get past first string, handling escapes */
3280af22 5829 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5830 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5831 t++;
5832 t++;
a0d0e21e 5833 }
b8a4b1be 5834 else if (*s == 'q') {
3280af22 5835 if (++t < PL_bufend
b8a4b1be 5836 && (!isALNUM(*t)
3280af22 5837 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5838 && !isALNUM(*t))))
5839 {
abc667d1 5840 /* skip q//-like construct */
f54cb97a 5841 const char *tmps;
b8a4b1be
GS
5842 char open, close, term;
5843 I32 brackets = 1;
5844
3280af22 5845 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5846 t++;
abc667d1
DM
5847 /* check for q => */
5848 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5849 OPERATOR(HASHBRACK);
5850 }
b8a4b1be
GS
5851 term = *t;
5852 open = term;
5853 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5854 term = tmps[5];
5855 close = term;
5856 if (open == close)
3280af22
NIS
5857 for (t++; t < PL_bufend; t++) {
5858 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5859 t++;
6d07e5e9 5860 else if (*t == open)
b8a4b1be
GS
5861 break;
5862 }
abc667d1 5863 else {
3280af22
NIS
5864 for (t++; t < PL_bufend; t++) {
5865 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5866 t++;
6d07e5e9 5867 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5868 break;
5869 else if (*t == open)
5870 brackets++;
5871 }
abc667d1
DM
5872 }
5873 t++;
b8a4b1be 5874 }
abc667d1
DM
5875 else
5876 /* skip plain q word */
5877 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5878 t += UTF8SKIP(t);
a0d0e21e 5879 }
7e2040f0 5880 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5881 t += UTF8SKIP(t);
7e2040f0 5882 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5883 t += UTF8SKIP(t);
a0d0e21e 5884 }
3280af22 5885 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5886 t++;
b8a4b1be
GS
5887 /* if comma follows first term, call it an anon hash */
5888 /* XXX it could be a comma expression with loop modifiers */
3280af22 5889 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5890 || (*t == '=' && t[1] == '>')))
a0d0e21e 5891 OPERATOR(HASHBRACK);
3280af22 5892 if (PL_expect == XREF)
4e4e412b 5893 PL_expect = XTERM;
a0d0e21e 5894 else {
3280af22
NIS
5895 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5896 PL_expect = XSTATE;
a0d0e21e 5897 }
8990e307 5898 }
a0d0e21e 5899 break;
463ee0b2 5900 }
6154021b 5901 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5902 if (isSPACE(*s) || *s == '#')
3280af22 5903 PL_copline = NOLINE; /* invalidate current command line number */
7c70caa5 5904 TOKEN(formbrack ? '=' : '{');
378cc40b 5905 case '}':
a7aaec61
Z
5906 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5907 TOKEN(0);
79072805
LW
5908 rightbracket:
5909 s++;
3280af22 5910 if (PL_lex_brackets <= 0)
d98d5fff 5911 yyerror("Unmatched right curly bracket");
463ee0b2 5912 else
3280af22 5913 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5914 PL_lex_allbrackets--;
3280af22
NIS
5915 if (PL_lex_state == LEX_INTERPNORMAL) {
5916 if (PL_lex_brackets == 0) {
9059aa12
LW
5917 if (PL_expect & XFAKEBRACK) {
5918 PL_expect &= XENUMMASK;
3280af22
NIS
5919 PL_lex_state = LEX_INTERPEND;
5920 PL_bufptr = s;
5db06880
NC
5921#if 0
5922 if (PL_madskills) {
cd81e915 5923 if (!PL_thiswhite)
6b29d1f5 5924 PL_thiswhite = newSVpvs("");
76f68e9b 5925 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5926 }
5927#endif
cea2e8a9 5928 return yylex(); /* ignore fake brackets */
79072805 5929 }
fa83b5b6 5930 if (*s == '-' && s[1] == '>')
3280af22 5931 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5932 else if (*s != '[' && *s != '{')
3280af22 5933 PL_lex_state = LEX_INTERPEND;
79072805
LW
5934 }
5935 }
9059aa12
LW
5936 if (PL_expect & XFAKEBRACK) {
5937 PL_expect &= XENUMMASK;
3280af22 5938 PL_bufptr = s;
cea2e8a9 5939 return yylex(); /* ignore fake brackets */
748a9306 5940 }
cd81e915 5941 start_force(PL_curforce);
5db06880
NC
5942 if (PL_madskills) {
5943 curmad('X', newSVpvn(s-1,1));
cd81e915 5944 CURMAD('_', PL_thiswhite);
5db06880 5945 }
7c70caa5 5946 force_next(formbrack ? '.' : '}');
583c9d5c 5947 if (formbrack) LEAVE;
5db06880 5948#ifdef PERL_MAD
cd81e915 5949 if (!PL_thistoken)
6b29d1f5 5950 PL_thistoken = newSVpvs("");
5db06880 5951#endif
705fe0e5
FC
5952 if (formbrack == 2) { /* means . where arguments were expected */
5953 start_force(PL_curforce);
5954 force_next(';');
96f9b782 5955 TOKEN(FORMRBRACK);
705fe0e5 5956 }
79072805 5957 TOKEN(';');
378cc40b
LW
5958 case '&':
5959 s++;
78cdf107
Z
5960 if (*s++ == '&') {
5961 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5962 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5963 s -= 2;
5964 TOKEN(0);
5965 }
a0d0e21e 5966 AOPERATOR(ANDAND);
78cdf107 5967 }
378cc40b 5968 s--;
3280af22 5969 if (PL_expect == XOPERATOR) {
041457d9
DM
5970 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5971 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5972 {
57843af0 5973 CopLINE_dec(PL_curcop);
f1f66076 5974 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5975 CopLINE_inc(PL_curcop);
463ee0b2 5976 }
78cdf107
Z
5977 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5978 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5979 s--;
5980 TOKEN(0);
5981 }
79072805 5982 BAop(OP_BIT_AND);
463ee0b2 5983 }
79072805 5984
3280af22
NIS
5985 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5986 if (*PL_tokenbuf) {
5987 PL_expect = XOPERATOR;
5988 force_ident(PL_tokenbuf, '&');
463ee0b2 5989 }
79072805
LW
5990 else
5991 PREREF('&');
6154021b 5992 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5993 TERM('&');
5994
378cc40b
LW
5995 case '|':
5996 s++;
78cdf107
Z
5997 if (*s++ == '|') {
5998 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5999 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
6000 s -= 2;
6001 TOKEN(0);
6002 }
a0d0e21e 6003 AOPERATOR(OROR);
78cdf107 6004 }
378cc40b 6005 s--;
78cdf107
Z
6006 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6007 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
6008 s--;
6009 TOKEN(0);
6010 }
79072805 6011 BOop(OP_BIT_OR);
378cc40b
LW
6012 case '=':
6013 s++;
748a9306 6014 {
90771dc0 6015 const char tmp = *s++;
78cdf107
Z
6016 if (tmp == '=') {
6017 if (!PL_lex_allbrackets &&
6018 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6019 s -= 2;
6020 TOKEN(0);
6021 }
90771dc0 6022 Eop(OP_EQ);
78cdf107
Z
6023 }
6024 if (tmp == '>') {
6025 if (!PL_lex_allbrackets &&
6026 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
6027 s -= 2;
6028 TOKEN(0);
6029 }
90771dc0 6030 OPERATOR(',');
78cdf107 6031 }
90771dc0
NC
6032 if (tmp == '~')
6033 PMop(OP_MATCH);
6034 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
6035 && strchr("+-*/%.^&|<",tmp))
6036 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6037 "Reversed %c= operator",(int)tmp);
6038 s--;
6039 if (PL_expect == XSTATE && isALPHA(tmp) &&
6040 (s == PL_linestart+1 || s[-2] == '\n') )
6041 {
4a7239ff
FC
6042 if ((PL_in_eval && !PL_rsfp && !PL_parser->filtered)
6043 || PL_lex_state != LEX_NORMAL) {
90771dc0
NC
6044 d = PL_bufend;
6045 while (s < d) {
6046 if (*s++ == '\n') {
6047 incline(s);
6048 if (strnEQ(s,"=cut",4)) {
6049 s = strchr(s,'\n');
6050 if (s)
6051 s++;
6052 else
6053 s = d;
6054 incline(s);
6055 goto retry;
6056 }
6057 }
a5f75d66 6058 }
90771dc0 6059 goto retry;
a5f75d66 6060 }
5db06880
NC
6061#ifdef PERL_MAD
6062 if (PL_madskills) {
cd81e915 6063 if (!PL_thiswhite)
6b29d1f5 6064 PL_thiswhite = newSVpvs("");
cd81e915 6065 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
6066 PL_bufend - PL_linestart);
6067 }
6068#endif
90771dc0 6069 s = PL_bufend;
737c24fc 6070 PL_parser->in_pod = 1;
90771dc0 6071 goto retry;
a5f75d66 6072 }
a0d0e21e 6073 }
64a40898 6074 if (PL_expect == XBLOCK) {
c35e046a 6075 const char *t = s;
51882d45 6076#ifdef PERL_STRICT_CR
c35e046a 6077 while (SPACE_OR_TAB(*t))
51882d45 6078#else
c35e046a 6079 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 6080#endif
c35e046a 6081 t++;
a0d0e21e 6082 if (*t == '\n' || *t == '#') {
705fe0e5 6083 formbrack = 1;
583c9d5c
FC
6084 ENTER;
6085 SAVEI8(PL_parser->form_lex_state);
64a40898 6086 SAVEI32(PL_lex_formbrack);
583c9d5c 6087 PL_parser->form_lex_state = PL_lex_state;
64a40898 6088 PL_lex_formbrack = PL_lex_brackets + 1;
a0d0e21e
LW
6089 goto leftbracket;
6090 }
79072805 6091 }
78cdf107
Z
6092 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6093 s--;
6094 TOKEN(0);
6095 }
6154021b 6096 pl_yylval.ival = 0;
a0d0e21e 6097 OPERATOR(ASSIGNOP);
378cc40b
LW
6098 case '!':
6099 s++;
90771dc0
NC
6100 {
6101 const char tmp = *s++;
6102 if (tmp == '=') {
6103 /* was this !=~ where !~ was meant?
6104 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6105
6106 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6107 const char *t = s+1;
6108
6109 while (t < PL_bufend && isSPACE(*t))
6110 ++t;
6111
6112 if (*t == '/' || *t == '?' ||
6113 ((*t == 'm' || *t == 's' || *t == 'y')
6114 && !isALNUM(t[1])) ||
6115 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6116 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6117 "!=~ should be !~");
6118 }
78cdf107
Z
6119 if (!PL_lex_allbrackets &&
6120 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6121 s -= 2;
6122 TOKEN(0);
6123 }
90771dc0
NC
6124 Eop(OP_NE);
6125 }
6126 if (tmp == '~')
6127 PMop(OP_NOT);
6128 }
378cc40b
LW
6129 s--;
6130 OPERATOR('!');
6131 case '<':
3280af22 6132 if (PL_expect != XOPERATOR) {
93a17b20 6133 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6134 check_uni();
79072805
LW
6135 if (s[1] == '<')
6136 s = scan_heredoc(s);
6137 else
6138 s = scan_inputsymbol(s);
6139 TERM(sublex_start());
378cc40b
LW
6140 }
6141 s++;
90771dc0
NC
6142 {
6143 char tmp = *s++;
78cdf107
Z
6144 if (tmp == '<') {
6145 if (*s == '=' && !PL_lex_allbrackets &&
6146 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6147 s -= 2;
6148 TOKEN(0);
6149 }
90771dc0 6150 SHop(OP_LEFT_SHIFT);
78cdf107 6151 }
90771dc0
NC
6152 if (tmp == '=') {
6153 tmp = *s++;
78cdf107
Z
6154 if (tmp == '>') {
6155 if (!PL_lex_allbrackets &&
6156 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6157 s -= 3;
6158 TOKEN(0);
6159 }
90771dc0 6160 Eop(OP_NCMP);
78cdf107 6161 }
90771dc0 6162 s--;
78cdf107
Z
6163 if (!PL_lex_allbrackets &&
6164 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6165 s -= 2;
6166 TOKEN(0);
6167 }
90771dc0
NC
6168 Rop(OP_LE);
6169 }
395c3793 6170 }
378cc40b 6171 s--;
78cdf107
Z
6172 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6173 s--;
6174 TOKEN(0);
6175 }
79072805 6176 Rop(OP_LT);
378cc40b
LW
6177 case '>':
6178 s++;
90771dc0
NC
6179 {
6180 const char tmp = *s++;
78cdf107
Z
6181 if (tmp == '>') {
6182 if (*s == '=' && !PL_lex_allbrackets &&
6183 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6184 s -= 2;
6185 TOKEN(0);
6186 }
90771dc0 6187 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6188 }
6189 else if (tmp == '=') {
6190 if (!PL_lex_allbrackets &&
6191 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6192 s -= 2;
6193 TOKEN(0);
6194 }
90771dc0 6195 Rop(OP_GE);
78cdf107 6196 }
90771dc0 6197 }
378cc40b 6198 s--;
78cdf107
Z
6199 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6200 s--;
6201 TOKEN(0);
6202 }
79072805 6203 Rop(OP_GT);
378cc40b
LW
6204
6205 case '$':
bbce6d69 6206 CLINE;
6207
3280af22
NIS
6208 if (PL_expect == XOPERATOR) {
6209 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6210 return deprecate_commaless_var_list();
a0d0e21e 6211 }
8990e307 6212 }
a0d0e21e 6213
c0b977fd 6214 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6215 PL_tokenbuf[0] = '@';
376b8730
SM
6216 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6217 sizeof PL_tokenbuf - 1, FALSE);
6218 if (PL_expect == XOPERATOR)
6219 no_op("Array length", s);
3280af22 6220 if (!PL_tokenbuf[1])
a0d0e21e 6221 PREREF(DOLSHARP);
3280af22
NIS
6222 PL_expect = XOPERATOR;
6223 PL_pending_ident = '#';
463ee0b2 6224 TOKEN(DOLSHARP);
79072805 6225 }
bbce6d69 6226
3280af22 6227 PL_tokenbuf[0] = '$';
376b8730
SM
6228 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6229 sizeof PL_tokenbuf - 1, FALSE);
6230 if (PL_expect == XOPERATOR)
6231 no_op("Scalar", s);
3280af22
NIS
6232 if (!PL_tokenbuf[1]) {
6233 if (s == PL_bufend)
bbce6d69 6234 yyerror("Final $ should be \\$ or $name");
6235 PREREF('$');
8990e307 6236 }
a0d0e21e 6237
ff68c719 6238 d = s;
90771dc0
NC
6239 {
6240 const char tmp = *s;
ae28bb2a 6241 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6242 s = SKIPSPACE1(s);
ff68c719 6243
90771dc0
NC
6244 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6245 && intuit_more(s)) {
6246 if (*s == '[') {
6247 PL_tokenbuf[0] = '@';
6248 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6249 char *t = s+1;
6250
6251 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6252 t++;
90771dc0 6253 if (*t++ == ',') {
29595ff2 6254 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6255 while (t < PL_bufend && *t != ']')
6256 t++;
9014280d 6257 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6258 "Multidimensional syntax %.*s not supported",
36c7798d 6259 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6260 }
748a9306 6261 }
93a17b20 6262 }
90771dc0
NC
6263 else if (*s == '{') {
6264 char *t;
6265 PL_tokenbuf[0] = '%';
6266 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6267 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6268 {
6269 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6270 do {
6271 t++;
6272 } while (isSPACE(*t));
90771dc0 6273 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6274 STRLEN len;
90771dc0 6275 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6276 &len);
c35e046a
AL
6277 while (isSPACE(*t))
6278 t++;
4c01a014
BF
6279 if (*t == ';'
6280 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
90771dc0 6281 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4c01a014
BF
6282 "You need to quote \"%"SVf"\"",
6283 SVfARG(newSVpvn_flags(tmpbuf, len,
6284 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
90771dc0
NC
6285 }
6286 }
6287 }
93a17b20 6288 }
bbce6d69 6289
90771dc0
NC
6290 PL_expect = XOPERATOR;
6291 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6292 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6293 if (!islop || PL_last_lop_op == OP_GREPSTART)
6294 PL_expect = XOPERATOR;
6295 else if (strchr("$@\"'`q", *s))
6296 PL_expect = XTERM; /* e.g. print $fh "foo" */
6297 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6298 PL_expect = XTERM; /* e.g. print $fh &sub */
6299 else if (isIDFIRST_lazy_if(s,UTF)) {
6300 char tmpbuf[sizeof PL_tokenbuf];
6301 int t2;
6302 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6303 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6304 /* binary operators exclude handle interpretations */
6305 switch (t2) {
6306 case -KEY_x:
6307 case -KEY_eq:
6308 case -KEY_ne:
6309 case -KEY_gt:
6310 case -KEY_lt:
6311 case -KEY_ge:
6312 case -KEY_le:
6313 case -KEY_cmp:
6314 break;
6315 default:
6316 PL_expect = XTERM; /* e.g. print $fh length() */
6317 break;
6318 }
6319 }
6320 else {
6321 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6322 }
6323 }
90771dc0
NC
6324 else if (isDIGIT(*s))
6325 PL_expect = XTERM; /* e.g. print $fh 3 */
6326 else if (*s == '.' && isDIGIT(s[1]))
6327 PL_expect = XTERM; /* e.g. print $fh .3 */
6328 else if ((*s == '?' || *s == '-' || *s == '+')
6329 && !isSPACE(s[1]) && s[1] != '=')
6330 PL_expect = XTERM; /* e.g. print $fh -1 */
6331 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6332 && s[1] != '/')
6333 PL_expect = XTERM; /* e.g. print $fh /.../
6334 XXX except DORDOR operator
6335 */
6336 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6337 && s[2] != '=')
6338 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6339 }
bbce6d69 6340 }
3280af22 6341 PL_pending_ident = '$';
79072805 6342 TOKEN('$');
378cc40b
LW
6343
6344 case '@':
3280af22 6345 if (PL_expect == XOPERATOR)
bbce6d69 6346 no_op("Array", s);
3280af22
NIS
6347 PL_tokenbuf[0] = '@';
6348 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6349 if (!PL_tokenbuf[1]) {
bbce6d69 6350 PREREF('@');
6351 }
3280af22 6352 if (PL_lex_state == LEX_NORMAL)
29595ff2 6353 s = SKIPSPACE1(s);
3280af22 6354 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6355 if (*s == '{')
3280af22 6356 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6357
6358 /* Warn about @ where they meant $. */
041457d9
DM
6359 if (*s == '[' || *s == '{') {
6360 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6361 const char *t = s + 1;
7e2040f0 6362 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
b9e186cd 6363 t += UTF ? UTF8SKIP(t) : 1;
a0d0e21e
LW
6364 if (*t == '}' || *t == ']') {
6365 t++;
29595ff2 6366 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6367 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6368 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b9e186cd
BF
6369 "Scalar value %"SVf" better written as $%"SVf,
6370 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6371 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6372 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6373 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
a0d0e21e 6374 }
93a17b20
LW
6375 }
6376 }
463ee0b2 6377 }
3280af22 6378 PL_pending_ident = '@';
79072805 6379 TERM('@');
378cc40b 6380
c963b151 6381 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6382 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6383 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6384 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6385 TOKEN(0);
6f33ba73
RGS
6386 s += 2;
6387 AOPERATOR(DORDOR);
6388 }
c963b151 6389 case '?': /* may either be conditional or pattern */
be25f609 6390 if (PL_expect == XOPERATOR) {
90771dc0 6391 char tmp = *s++;
c963b151 6392 if(tmp == '?') {
78cdf107
Z
6393 if (!PL_lex_allbrackets &&
6394 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6395 s--;
6396 TOKEN(0);
6397 }
6398 PL_lex_allbrackets++;
be25f609 6399 OPERATOR('?');
c963b151
BD
6400 }
6401 else {
6402 tmp = *s++;
6403 if(tmp == '/') {
6404 /* A // operator. */
78cdf107
Z
6405 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6406 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6407 LEX_FAKEEOF_LOGIC)) {
6408 s -= 2;
6409 TOKEN(0);
6410 }
c963b151
BD
6411 AOPERATOR(DORDOR);
6412 }
6413 else {
6414 s--;
78cdf107
Z
6415 if (*s == '=' && !PL_lex_allbrackets &&
6416 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6417 s--;
6418 TOKEN(0);
6419 }
c963b151
BD
6420 Mop(OP_DIVIDE);
6421 }
6422 }
6423 }
6424 else {
6425 /* Disable warning on "study /blah/" */
6426 if (PL_oldoldbufptr == PL_last_uni
6427 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6428 || memNE(PL_last_uni, "study", 5)
6429 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6430 ))
6431 check_uni();
725a61d7
Z
6432 if (*s == '?')
6433 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6434 s = scan_pat(s,OP_MATCH);
6435 TERM(sublex_start());
6436 }
378cc40b
LW
6437
6438 case '.':
51882d45
GS
6439 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6440#ifdef PERL_STRICT_CR
6441 && s[1] == '\n'
6442#else
6443 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6444#endif
6445 && (s == PL_linestart || s[-1] == '\n') )
6446 {
3280af22 6447 PL_expect = XSTATE;
705fe0e5 6448 formbrack = 2; /* dot seen where arguments expected */
79072805
LW
6449 goto rightbracket;
6450 }
be25f609 6451 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6452 s += 3;
6453 OPERATOR(YADAYADA);
6454 }
3280af22 6455 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6456 char tmp = *s++;
a687059c 6457 if (*s == tmp) {
78cdf107
Z
6458 if (!PL_lex_allbrackets &&
6459 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6460 s--;
6461 TOKEN(0);
6462 }
a687059c 6463 s++;
2f3197b3
LW
6464 if (*s == tmp) {
6465 s++;
6154021b 6466 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6467 }
6468 else
6154021b 6469 pl_yylval.ival = 0;
378cc40b 6470 OPERATOR(DOTDOT);
a687059c 6471 }
78cdf107
Z
6472 if (*s == '=' && !PL_lex_allbrackets &&
6473 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6474 s--;
6475 TOKEN(0);
6476 }
79072805 6477 Aop(OP_CONCAT);
378cc40b
LW
6478 }
6479 /* FALL THROUGH */
6480 case '0': case '1': case '2': case '3': case '4':
6481 case '5': case '6': case '7': case '8': case '9':
6154021b 6482 s = scan_num(s, &pl_yylval);
931e0695 6483 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6484 if (PL_expect == XOPERATOR)
8990e307 6485 no_op("Number",s);
79072805
LW
6486 TERM(THING);
6487
6488 case '\'':
d24ca0c5 6489 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6490 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6491 if (PL_expect == XOPERATOR) {
6492 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6493 return deprecate_commaless_var_list();
a0d0e21e 6494 }
463ee0b2 6495 else
8990e307 6496 no_op("String",s);
463ee0b2 6497 }
79072805 6498 if (!s)
d4c19fe8 6499 missingterm(NULL);
6154021b 6500 pl_yylval.ival = OP_CONST;
79072805
LW
6501 TERM(sublex_start());
6502
6503 case '"':
d24ca0c5 6504 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6505 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6506 if (PL_expect == XOPERATOR) {
6507 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6508 return deprecate_commaless_var_list();
a0d0e21e 6509 }
463ee0b2 6510 else
8990e307 6511 no_op("String",s);
463ee0b2 6512 }
79072805 6513 if (!s)
d4c19fe8 6514 missingterm(NULL);
6154021b 6515 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6516 /* FIXME. I think that this can be const if char *d is replaced by
6517 more localised variables. */
3280af22 6518 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6519 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6520 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6521 break;
6522 }
6523 }
79072805
LW
6524 TERM(sublex_start());
6525
6526 case '`':
d24ca0c5 6527 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6528 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6529 if (PL_expect == XOPERATOR)
8990e307 6530 no_op("Backticks",s);
79072805 6531 if (!s)
d4c19fe8 6532 missingterm(NULL);
9b201d7d 6533 readpipe_override();
79072805
LW
6534 TERM(sublex_start());
6535
6536 case '\\':
6537 s++;
a2a5de95
NC
6538 if (PL_lex_inwhat && isDIGIT(*s))
6539 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6540 *s, *s);
3280af22 6541 if (PL_expect == XOPERATOR)
8990e307 6542 no_op("Backslash",s);
79072805
LW
6543 OPERATOR(REFGEN);
6544
a7cb1f99 6545 case 'v':
e526c9e6 6546 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6547 char *start = s + 2;
dd629d5b 6548 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6549 start++;
6550 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6551 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6552 TERM(THING);
6553 }
e526c9e6 6554 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6555 else if (!isALPHA(*start) && (PL_expect == XTERM
6556 || PL_expect == XREF || PL_expect == XSTATE
6557 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6558 GV *const gv = gv_fetchpvn_flags(s, start - s,
6559 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6560 if (!gv) {
6154021b 6561 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6562 TERM(THING);
6563 }
6564 }
a7cb1f99
GS
6565 }
6566 goto keylookup;
79072805 6567 case 'x':
3280af22 6568 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6569 s++;
6570 Mop(OP_REPEAT);
2f3197b3 6571 }
79072805
LW
6572 goto keylookup;
6573
378cc40b 6574 case '_':
79072805
LW
6575 case 'a': case 'A':
6576 case 'b': case 'B':
6577 case 'c': case 'C':
6578 case 'd': case 'D':
6579 case 'e': case 'E':
6580 case 'f': case 'F':
6581 case 'g': case 'G':
6582 case 'h': case 'H':
6583 case 'i': case 'I':
6584 case 'j': case 'J':
6585 case 'k': case 'K':
6586 case 'l': case 'L':
6587 case 'm': case 'M':
6588 case 'n': case 'N':
6589 case 'o': case 'O':
6590 case 'p': case 'P':
6591 case 'q': case 'Q':
6592 case 'r': case 'R':
6593 case 's': case 'S':
6594 case 't': case 'T':
6595 case 'u': case 'U':
a7cb1f99 6596 case 'V':
79072805
LW
6597 case 'w': case 'W':
6598 case 'X':
6599 case 'y': case 'Y':
6600 case 'z': case 'Z':
6601
49dc05e3 6602 keylookup: {
88e1f1a2 6603 bool anydelim;
90771dc0 6604 I32 tmp;
10edeb5d
JH
6605
6606 orig_keyword = 0;
6607 gv = NULL;
6608 gvp = NULL;
49dc05e3 6609
3280af22
NIS
6610 PL_bufptr = s;
6611 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6612
6613 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6614 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6615
6616 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6617 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6618 goto just_a_word;
6619
3643fb5f 6620 d = s;
3280af22 6621 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6622 d++; /* no comments skipped here, or s### is misparsed */
6623
748a9306 6624 /* Is this a word before a => operator? */
1c3923b3 6625 if (*d == '=' && d[1] == '>') {
748a9306 6626 CLINE;
6154021b 6627 pl_yylval.opval
d0a148a6
NC
6628 = (OP*)newSVOP(OP_CONST, 0,
6629 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6630 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6631 TERM(WORD);
6632 }
6633
88e1f1a2
JV
6634 /* Check for plugged-in keyword */
6635 {
6636 OP *o;
6637 int result;
6638 char *saved_bufptr = PL_bufptr;
6639 PL_bufptr = s;
16c91539 6640 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6641 s = PL_bufptr;
6642 if (result == KEYWORD_PLUGIN_DECLINE) {
6643 /* not a plugged-in keyword */
6644 PL_bufptr = saved_bufptr;
6645 } else if (result == KEYWORD_PLUGIN_STMT) {
6646 pl_yylval.opval = o;
6647 CLINE;
6648 PL_expect = XSTATE;
6649 return REPORT(PLUGSTMT);
6650 } else if (result == KEYWORD_PLUGIN_EXPR) {
6651 pl_yylval.opval = o;
6652 CLINE;
6653 PL_expect = XOPERATOR;
6654 return REPORT(PLUGEXPR);
6655 } else {
6656 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6657 PL_tokenbuf);
6658 }
6659 }
6660
6661 /* Check for built-in keyword */
6662 tmp = keyword(PL_tokenbuf, len, 0);
6663
6664 /* Is this a label? */
6665 if (!anydelim && PL_expect == XSTATE
6666 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2 6667 s = d + 1;
5db1eb8d
BF
6668 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6669 newSVpvn_flags(PL_tokenbuf,
6670 len, UTF ? SVf_UTF8 : 0));
88e1f1a2
JV
6671 CLINE;
6672 TOKEN(LABEL);
6673 }
6674
a0d0e21e 6675 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6676 GV *ogv = NULL; /* override (winner) */
6677 GV *hgv = NULL; /* hidden (loser) */
3280af22 6678 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6679 CV *cv;
af9f5953
BF
6680 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6681 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6682 (cv = GvCVu(gv)))
6683 {
6684 if (GvIMPORTED_CV(gv))
6685 ogv = gv;
6686 else if (! CvMETHOD(cv))
6687 hgv = gv;
6688 }
6689 if (!ogv &&
af9f5953 6690 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6691 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6692 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6693 GvCVu(gv) && GvIMPORTED_CV(gv))
6694 {
6695 ogv = gv;
6696 }
6697 }
6698 if (ogv) {
30fe34ed 6699 orig_keyword = tmp;
56f7f34b 6700 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6701 }
6702 else if (gv && !gvp
6703 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6704 && GvCVu(gv))
6e7b2336
GS
6705 {
6706 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6707 }
56f7f34b
CS
6708 else { /* no override */
6709 tmp = -tmp;
a2a5de95
NC
6710 if (tmp == KEY_dump) {
6711 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6712 "dump() better written as CORE::dump()");
ac206dc8 6713 }
a0714e2c 6714 gv = NULL;
56f7f34b 6715 gvp = 0;
a2a5de95
NC
6716 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6717 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6718 "Ambiguous call resolved as CORE::%s(), "
6719 "qualify as such or use &",
6720 GvENAME(hgv));
49dc05e3 6721 }
a0d0e21e
LW
6722 }
6723
6724 reserved_word:
6725 switch (tmp) {
79072805
LW
6726
6727 default: /* not a keyword */
0bfa2a8a
NC
6728 /* Trade off - by using this evil construction we can pull the
6729 variable gv into the block labelled keylookup. If not, then
6730 we have to give it function scope so that the goto from the
6731 earlier ':' case doesn't bypass the initialisation. */
6732 if (0) {
6733 just_a_word_zero_gv:
6734 gv = NULL;
6735 gvp = NULL;
8bee0991 6736 orig_keyword = 0;
0bfa2a8a 6737 }
93a17b20 6738 just_a_word: {
96e4d5b1 6739 SV *sv;
ce29ac45 6740 int pkgname = 0;
f54cb97a 6741 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6742 OP *rv2cv_op;
5069cc75 6743 CV *cv;
5db06880 6744#ifdef PERL_MAD
cd81e915 6745 SV *nextPL_nextwhite = 0;
5db06880
NC
6746#endif
6747
8990e307
LW
6748
6749 /* Get the rest if it looks like a package qualifier */
6750
155aba94 6751 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6752 STRLEN morelen;
3280af22 6753 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6754 TRUE, &morelen);
6755 if (!morelen)
86fe3f36
BF
6756 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6757 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6758 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
ec2ab091 6759 *s == '\'' ? "'" : "::");
c3e0f903 6760 len += morelen;
ce29ac45 6761 pkgname = 1;
a0d0e21e 6762 }
8990e307 6763
3280af22
NIS
6764 if (PL_expect == XOPERATOR) {
6765 if (PL_bufptr == PL_linestart) {
57843af0 6766 CopLINE_dec(PL_curcop);
f1f66076 6767 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6768 CopLINE_inc(PL_curcop);
463ee0b2
LW
6769 }
6770 else
54310121 6771 no_op("Bareword",s);
463ee0b2 6772 }
8990e307 6773
c3e0f903 6774 /* Look for a subroutine with this name in current package,
486ec47a 6775 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6776 (and a package name). */
6777
5db06880 6778 if (len > 2 && !PL_madskills &&
3280af22 6779 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6780 {
f776e3cd 6781 if (ckWARN(WARN_BAREWORD)
af9f5953 6782 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 6783 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
979a1401
BF
6784 "Bareword \"%"SVf"\" refers to nonexistent package",
6785 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6786 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
c3e0f903 6787 len -= 2;
3280af22 6788 PL_tokenbuf[len] = '\0';
a0714e2c 6789 gv = NULL;
c3e0f903
GS
6790 gvp = 0;
6791 }
6792 else {
62d55b22
NC
6793 if (!gv) {
6794 /* Mustn't actually add anything to a symbol table.
6795 But also don't want to "initialise" any placeholder
6796 constants that might already be there into full
6797 blown PVGVs with attached PVCV. */
90e5519e 6798 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
6799 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6800 SVt_PVCV);
62d55b22 6801 }
b3d904f3 6802 len = 0;
c3e0f903
GS
6803 }
6804
6805 /* if we saw a global override before, get the right name */
8990e307 6806
37bb7629
EB
6807 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6808 len ? len : strlen(PL_tokenbuf));
49dc05e3 6809 if (gvp) {
37bb7629 6810 SV * const tmp_sv = sv;
396482e1 6811 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6812 sv_catsv(sv, tmp_sv);
6813 SvREFCNT_dec(tmp_sv);
8a7a129d 6814 }
37bb7629 6815
5db06880 6816#ifdef PERL_MAD
cd81e915
NC
6817 if (PL_madskills && !PL_thistoken) {
6818 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6819 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6820 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6821 }
6822#endif
8990e307 6823
a0d0e21e 6824 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6825 CLINE;
6154021b
RGS
6826 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6827 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6828
c3e0f903 6829 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6830 if (len)
6831 goto safe_bareword;
6832
f7461760 6833 {
d8ebba9f 6834 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6835 const_op->op_private = OPpCONST_BARE;
6836 rv2cv_op = newCVREF(0, const_op);
6837 }
d9088386 6838 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6839
8990e307
LW
6840 /* See if it's the indirect object for a list operator. */
6841
3280af22
NIS
6842 if (PL_oldoldbufptr &&
6843 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6844 (PL_oldoldbufptr == PL_last_lop
6845 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6846 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6847 (PL_expect == XREF ||
6848 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6849 {
748a9306
LW
6850 bool immediate_paren = *s == '(';
6851
a0d0e21e 6852 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6853 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6854#ifdef PERL_MAD
cd81e915 6855 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6856#endif
a0d0e21e
LW
6857
6858 /* Two barewords in a row may indicate method call. */
6859
62d55b22 6860 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6861 (tmp = intuit_method(s, gv, cv))) {
6862 op_free(rv2cv_op);
78cdf107
Z
6863 if (tmp == METHOD && !PL_lex_allbrackets &&
6864 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6865 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6866 return REPORT(tmp);
f7461760 6867 }
a0d0e21e
LW
6868
6869 /* If not a declared subroutine, it's an indirect object. */
6870 /* (But it's an indir obj regardless for sort.) */
7294df96 6871 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6872
7294df96
RGS
6873 if (
6874 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6875 (!cv &&
a9ef352a 6876 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6877 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6878 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6879 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6880 )
a9ef352a 6881 {
3280af22 6882 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6883 goto bareword;
93a17b20
LW
6884 }
6885 }
8990e307 6886
3280af22 6887 PL_expect = XOPERATOR;
5db06880
NC
6888#ifdef PERL_MAD
6889 if (isSPACE(*s))
cd81e915
NC
6890 s = SKIPSPACE2(s,nextPL_nextwhite);
6891 PL_nextwhite = nextPL_nextwhite;
5db06880 6892#else
8990e307 6893 s = skipspace(s);
5db06880 6894#endif
1c3923b3
GS
6895
6896 /* Is this a word before a => operator? */
ce29ac45 6897 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6898 op_free(rv2cv_op);
1c3923b3 6899 CLINE;
6154021b 6900 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6901 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6902 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6903 TERM(WORD);
6904 }
6905
6906 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6907 if (*s == '(') {
79072805 6908 CLINE;
5069cc75 6909 if (cv) {
c35e046a
AL
6910 d = s + 1;
6911 while (SPACE_OR_TAB(*d))
6912 d++;
f7461760 6913 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6914 s = d + 1;
c631f32b 6915 goto its_constant;
96e4d5b1 6916 }
6917 }
5db06880
NC
6918#ifdef PERL_MAD
6919 if (PL_madskills) {
cd81e915
NC
6920 PL_nextwhite = PL_thiswhite;
6921 PL_thiswhite = 0;
5db06880 6922 }
cd81e915 6923 start_force(PL_curforce);
5db06880 6924#endif
6154021b 6925 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6926 PL_expect = XOPERATOR;
5db06880
NC
6927#ifdef PERL_MAD
6928 if (PL_madskills) {
cd81e915
NC
6929 PL_nextwhite = nextPL_nextwhite;
6930 curmad('X', PL_thistoken);
6b29d1f5 6931 PL_thistoken = newSVpvs("");
5db06880
NC
6932 }
6933#endif
f7461760 6934 op_free(rv2cv_op);
93a17b20 6935 force_next(WORD);
6154021b 6936 pl_yylval.ival = 0;
463ee0b2 6937 TOKEN('&');
79072805 6938 }
93a17b20 6939
a0d0e21e 6940 /* If followed by var or block, call it a method (unless sub) */
8990e307 6941
f7461760
Z
6942 if ((*s == '$' || *s == '{') && !cv) {
6943 op_free(rv2cv_op);
3280af22
NIS
6944 PL_last_lop = PL_oldbufptr;
6945 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6946 if (!PL_lex_allbrackets &&
6947 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6948 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6949 PREBLOCK(METHOD);
463ee0b2
LW
6950 }
6951
8990e307
LW
6952 /* If followed by a bareword, see if it looks like indir obj. */
6953
30fe34ed
RGS
6954 if (!orig_keyword
6955 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6956 && (tmp = intuit_method(s, gv, cv))) {
6957 op_free(rv2cv_op);
78cdf107
Z
6958 if (tmp == METHOD && !PL_lex_allbrackets &&
6959 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6960 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6961 return REPORT(tmp);
f7461760 6962 }
93a17b20 6963
8990e307
LW
6964 /* Not a method, so call it a subroutine (if defined) */
6965
5069cc75 6966 if (cv) {
43b5ab4c
BF
6967 if (lastchar == '-') {
6968 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
6969 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6970 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
6971 SVfARG(tmpsv), SVfARG(tmpsv));
6972 }
89bfa8cd 6973 /* Check for a constant sub */
f7461760 6974 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6975 its_constant:
f7461760 6976 op_free(rv2cv_op);
6154021b
RGS
6977 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6978 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
cc2ebcd7 6979 pl_yylval.opval->op_private = OPpCONST_FOLDED;
6b7c6d95 6980 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6981 TOKEN(WORD);
89bfa8cd 6982 }
6983
6154021b 6984 op_free(pl_yylval.opval);
f7461760 6985 pl_yylval.opval = rv2cv_op;
6154021b 6986 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6987 PL_last_lop = PL_oldbufptr;
bf848113 6988 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6989 /* Is there a prototype? */
5db06880
NC
6990 if (
6991#ifdef PERL_MAD
6992 cv &&
6993#endif
d9f2850e
RGS
6994 SvPOK(cv))
6995 {
8fa6a409
FC
6996 STRLEN protolen = CvPROTOLEN(cv);
6997 const char *proto = CvPROTO(cv);
b5fb7ce3 6998 bool optional;
5f66b61c 6999 if (!protolen)
4633a7c4 7000 TERM(FUNC0SUB);
b5fb7ce3
FC
7001 if ((optional = *proto == ';'))
7002 do
0f5d0394 7003 proto++;
b5fb7ce3 7004 while (*proto == ';');
649d02de
FC
7005 if (
7006 (
7007 (
7008 *proto == '$' || *proto == '_'
c035a075 7009 || *proto == '*' || *proto == '+'
649d02de
FC
7010 )
7011 && proto[1] == '\0'
7012 )
7013 || (
7014 *proto == '\\' && proto[1] && proto[2] == '\0'
7015 )
7016 )
b5fb7ce3 7017 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
7018 if (*proto == '\\' && proto[1] == '[') {
7019 const char *p = proto + 2;
7020 while(*p && *p != ']')
7021 ++p;
b5fb7ce3
FC
7022 if(*p == ']' && !p[1])
7023 UNIPROTO(UNIOPSUB,optional);
649d02de 7024 }
7a52d87a 7025 if (*proto == '&' && *s == '{') {
49a54bbe
NC
7026 if (PL_curstash)
7027 sv_setpvs(PL_subname, "__ANON__");
7028 else
7029 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
7030 if (!PL_lex_allbrackets &&
7031 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7032 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
7033 PREBLOCK(LSTOPSUB);
7034 }
a9ef352a 7035 }
5db06880
NC
7036#ifdef PERL_MAD
7037 {
7038 if (PL_madskills) {
cd81e915
NC
7039 PL_nextwhite = PL_thiswhite;
7040 PL_thiswhite = 0;
5db06880 7041 }
cd81e915 7042 start_force(PL_curforce);
6154021b 7043 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
7044 PL_expect = XTERM;
7045 if (PL_madskills) {
cd81e915
NC
7046 PL_nextwhite = nextPL_nextwhite;
7047 curmad('X', PL_thistoken);
6b29d1f5 7048 PL_thistoken = newSVpvs("");
5db06880
NC
7049 }
7050 force_next(WORD);
78cdf107
Z
7051 if (!PL_lex_allbrackets &&
7052 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7053 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7054 TOKEN(NOAMP);
7055 }
7056 }
7057
7058 /* Guess harder when madskills require "best effort". */
7059 if (PL_madskills && (!gv || !GvCVu(gv))) {
7060 int probable_sub = 0;
7061 if (strchr("\"'`$@%0123456789!*+{[<", *s))
7062 probable_sub = 1;
7063 else if (isALPHA(*s)) {
7064 char tmpbuf[1024];
7065 STRLEN tmplen;
7066 d = s;
7067 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 7068 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
7069 probable_sub = 1;
7070 else {
7071 while (d < PL_bufend && isSPACE(*d))
7072 d++;
7073 if (*d == '=' && d[1] == '>')
7074 probable_sub = 1;
7075 }
7076 }
7077 if (probable_sub) {
af9f5953
BF
7078 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7079 SVt_PVCV);
6154021b 7080 op_free(pl_yylval.opval);
f7461760 7081 pl_yylval.opval = rv2cv_op;
6154021b 7082 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
7083 PL_last_lop = PL_oldbufptr;
7084 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
7085 PL_nextwhite = PL_thiswhite;
7086 PL_thiswhite = 0;
7087 start_force(PL_curforce);
6154021b 7088 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 7089 PL_expect = XTERM;
cd81e915
NC
7090 PL_nextwhite = nextPL_nextwhite;
7091 curmad('X', PL_thistoken);
6b29d1f5 7092 PL_thistoken = newSVpvs("");
5db06880 7093 force_next(WORD);
78cdf107
Z
7094 if (!PL_lex_allbrackets &&
7095 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7096 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7097 TOKEN(NOAMP);
7098 }
7099#else
6154021b 7100 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 7101 PL_expect = XTERM;
8990e307 7102 force_next(WORD);
78cdf107
Z
7103 if (!PL_lex_allbrackets &&
7104 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7105 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 7106 TOKEN(NOAMP);
5db06880 7107#endif
8990e307 7108 }
748a9306 7109
8990e307
LW
7110 /* Call it a bare word */
7111
5603f27d 7112 if (PL_hints & HINT_STRICT_SUBS)
6154021b 7113 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 7114 else {
9a073a1d
RGS
7115 bareword:
7116 /* after "print" and similar functions (corresponding to
7117 * "F? L" in opcode.pl), whatever wasn't already parsed as
7118 * a filehandle should be subject to "strict subs".
7119 * Likewise for the optional indirect-object argument to system
7120 * or exec, which can't be a bareword */
7121 if ((PL_last_lop_op == OP_PRINT
7122 || PL_last_lop_op == OP_PRTF
7123 || PL_last_lop_op == OP_SAY
7124 || PL_last_lop_op == OP_SYSTEM
7125 || PL_last_lop_op == OP_EXEC)
7126 && (PL_hints & HINT_STRICT_SUBS))
7127 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7128 if (lastchar != '-') {
7129 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7130 d = PL_tokenbuf;
7131 while (isLOWER(*d))
7132 d++;
af9f5953 7133 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7134 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7135 PL_tokenbuf);
7136 }
748a9306
LW
7137 }
7138 }
f7461760 7139 op_free(rv2cv_op);
c3e0f903
GS
7140
7141 safe_bareword:
9b387841
NC
7142 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7143 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
02571fe8
BF
7144 "Operator or semicolon missing before %c%"SVf,
7145 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7146 strlen(PL_tokenbuf),
7147 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
9b387841
NC
7148 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7149 "Ambiguous use of %c resolved as operator %c",
7150 lastchar, lastchar);
748a9306 7151 }
93a17b20 7152 TOKEN(WORD);
79072805 7153 }
79072805 7154
68dc0745 7155 case KEY___FILE__:
7eb971ee 7156 FUN0OP(
14f0f125 7157 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7158 );
46fc3d4c 7159
79072805 7160 case KEY___LINE__:
7eb971ee
FC
7161 FUN0OP(
7162 (OP*)newSVOP(OP_CONST, 0,
7163 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7164 );
68dc0745 7165
7166 case KEY___PACKAGE__:
7eb971ee
FC
7167 FUN0OP(
7168 (OP*)newSVOP(OP_CONST, 0,
3280af22 7169 (PL_curstash
5aaec2b4 7170 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7171 : &PL_sv_undef))
7172 );
79072805 7173
e50aee73 7174 case KEY___DATA__:
79072805
LW
7175 case KEY___END__: {
7176 GV *gv;
3280af22 7177 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7178 const char *pname = "main";
affc13fc
FC
7179 STRLEN plen = 4;
7180 U32 putf8 = 0;
3280af22 7181 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7182 {
7183 HV * const stash =
7184 PL_curstash ? PL_curstash : PL_defstash;
7185 pname = HvNAME_get(stash);
7186 plen = HvNAMELEN (stash);
7187 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7188 }
7189 gv = gv_fetchpvn_flags(
7190 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7191 plen+6, GV_ADD|putf8, SVt_PVIO
7192 );
a5f75d66 7193 GvMULTI_on(gv);
79072805 7194 if (!GvIO(gv))
a0d0e21e 7195 GvIOp(gv) = newIO();
3280af22 7196 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7197#if defined(HAS_FCNTL) && defined(F_SETFD)
7198 {
f54cb97a 7199 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7200 fcntl(fd,F_SETFD,fd >= 3);
7201 }
79072805 7202#endif
fd049845 7203 /* Mark this internal pseudo-handle as clean */
7204 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7205 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7206 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7207 else
50952442 7208 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7209#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7210 /* if the script was opened in binmode, we need to revert
53129d29 7211 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7212 * XXX this is a questionable hack at best. */
53129d29
GS
7213 if (PL_bufend-PL_bufptr > 2
7214 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7215 {
7216 Off_t loc = 0;
50952442 7217 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7218 loc = PerlIO_tell(PL_rsfp);
7219 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7220 }
2986a63f
JH
7221#ifdef NETWARE
7222 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7223#else
c39cd008 7224 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7225#endif /* NETWARE */
c39cd008
GS
7226 if (loc > 0)
7227 PerlIO_seek(PL_rsfp, loc, 0);
7228 }
7229 }
7230#endif
7948272d 7231#ifdef PERLIO_LAYERS
52d2e0f4
JH
7232 if (!IN_BYTES) {
7233 if (UTF)
7234 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7235 else if (PL_encoding) {
7236 SV *name;
7237 dSP;
7238 ENTER;
7239 SAVETMPS;
7240 PUSHMARK(sp);
7241 EXTEND(SP, 1);
7242 XPUSHs(PL_encoding);
7243 PUTBACK;
7244 call_method("name", G_SCALAR);
7245 SPAGAIN;
7246 name = POPs;
7247 PUTBACK;
bfed75c6 7248 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7249 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7250 SVfARG(name)));
52d2e0f4
JH
7251 FREETMPS;
7252 LEAVE;
7253 }
7254 }
7948272d 7255#endif
5db06880
NC
7256#ifdef PERL_MAD
7257 if (PL_madskills) {
cd81e915
NC
7258 if (PL_realtokenstart >= 0) {
7259 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7260 if (!PL_endwhite)
6b29d1f5 7261 PL_endwhite = newSVpvs("");
cd81e915
NC
7262 sv_catsv(PL_endwhite, PL_thiswhite);
7263 PL_thiswhite = 0;
7264 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7265 PL_realtokenstart = -1;
5db06880 7266 }
5cc814fd
NC
7267 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7268 != NULL) ;
5db06880
NC
7269 }
7270#endif
4608196e 7271 PL_rsfp = NULL;
79072805
LW
7272 }
7273 goto fake_eof;
e929a76b 7274 }
de3bb511 7275
84ed0108 7276 case KEY___SUB__:
1a35f9ff 7277 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7278
8990e307 7279 case KEY_AUTOLOAD:
ed6116ce 7280 case KEY_DESTROY:
79072805 7281 case KEY_BEGIN:
3c10abe3 7282 case KEY_UNITCHECK:
7d30b5c4 7283 case KEY_CHECK:
7d07dbc2 7284 case KEY_INIT:
7d30b5c4 7285 case KEY_END:
3280af22
NIS
7286 if (PL_expect == XSTATE) {
7287 s = PL_bufptr;
93a17b20 7288 goto really_sub;
79072805
LW
7289 }
7290 goto just_a_word;
7291
a0d0e21e
LW
7292 case KEY_CORE:
7293 if (*s == ':' && s[1] == ':') {
ee36fb64 7294 STRLEN olen = len;
748a9306 7295 d = s;
ee36fb64 7296 s += 2;
3280af22 7297 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
ee36fb64
FC
7298 if ((*s == ':' && s[1] == ':')
7299 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7300 {
7301 s = d;
7302 len = olen;
7303 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7304 goto just_a_word;
7305 }
7306 if (!tmp)
3773592b
BF
7307 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7308 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7309 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
a0d0e21e
LW
7310 if (tmp < 0)
7311 tmp = -tmp;
d67594ff
FC
7312 else if (tmp == KEY_require || tmp == KEY_do
7313 || tmp == KEY_glob)
a72a1c8b 7314 /* that's a way to remember we saw "CORE::" */
850e8516 7315 orig_keyword = tmp;
a0d0e21e
LW
7316 goto reserved_word;
7317 }
7318 goto just_a_word;
7319
463ee0b2
LW
7320 case KEY_abs:
7321 UNI(OP_ABS);
7322
79072805
LW
7323 case KEY_alarm:
7324 UNI(OP_ALARM);
7325
7326 case KEY_accept:
a0d0e21e 7327 LOP(OP_ACCEPT,XTERM);
79072805 7328
463ee0b2 7329 case KEY_and:
78cdf107
Z
7330 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7331 return REPORT(0);
463ee0b2
LW
7332 OPERATOR(ANDOP);
7333
79072805 7334 case KEY_atan2:
a0d0e21e 7335 LOP(OP_ATAN2,XTERM);
85e6fe83 7336
79072805 7337 case KEY_bind:
a0d0e21e 7338 LOP(OP_BIND,XTERM);
79072805
LW
7339
7340 case KEY_binmode:
1c1fc3ea 7341 LOP(OP_BINMODE,XTERM);
79072805
LW
7342
7343 case KEY_bless:
a0d0e21e 7344 LOP(OP_BLESS,XTERM);
79072805 7345
0d863452
RH
7346 case KEY_break:
7347 FUN0(OP_BREAK);
7348
79072805
LW
7349 case KEY_chop:
7350 UNI(OP_CHOP);
7351
7352 case KEY_continue:
0d863452
RH
7353 /* We have to disambiguate the two senses of
7354 "continue". If the next token is a '{' then
7355 treat it as the start of a continue block;
7356 otherwise treat it as a control operator.
7357 */
7358 s = skipspace(s);
7359 if (*s == '{')
79072805 7360 PREBLOCK(CONTINUE);
0d863452
RH
7361 else
7362 FUN0(OP_CONTINUE);
79072805
LW
7363
7364 case KEY_chdir:
fafc274c
NC
7365 /* may use HOME */
7366 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7367 UNI(OP_CHDIR);
7368
7369 case KEY_close:
7370 UNI(OP_CLOSE);
7371
7372 case KEY_closedir:
7373 UNI(OP_CLOSEDIR);
7374
7375 case KEY_cmp:
78cdf107
Z
7376 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7377 return REPORT(0);
79072805
LW
7378 Eop(OP_SCMP);
7379
7380 case KEY_caller:
7381 UNI(OP_CALLER);
7382
7383 case KEY_crypt:
7384#ifdef FCRYPT
f4c556ac
GS
7385 if (!PL_cryptseen) {
7386 PL_cryptseen = TRUE;
de3bb511 7387 init_des();
f4c556ac 7388 }
a687059c 7389#endif
a0d0e21e 7390 LOP(OP_CRYPT,XTERM);
79072805
LW
7391
7392 case KEY_chmod:
a0d0e21e 7393 LOP(OP_CHMOD,XTERM);
79072805
LW
7394
7395 case KEY_chown:
a0d0e21e 7396 LOP(OP_CHOWN,XTERM);
79072805
LW
7397
7398 case KEY_connect:
a0d0e21e 7399 LOP(OP_CONNECT,XTERM);
79072805 7400
463ee0b2
LW
7401 case KEY_chr:
7402 UNI(OP_CHR);
7403
79072805
LW
7404 case KEY_cos:
7405 UNI(OP_COS);
7406
7407 case KEY_chroot:
7408 UNI(OP_CHROOT);
7409
0d863452
RH
7410 case KEY_default:
7411 PREBLOCK(DEFAULT);
7412
79072805 7413 case KEY_do:
29595ff2 7414 s = SKIPSPACE1(s);
79072805 7415 if (*s == '{')
a0d0e21e 7416 PRETERMBLOCK(DO);
c2900bb8
FC
7417 if (*s != '\'') {
7418 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
7419 if (len) {
7420 d = SKIPSPACE1(d);
7421 if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
7422 }
7423 }
850e8516
RGS
7424 if (orig_keyword == KEY_do) {
7425 orig_keyword = 0;
6154021b 7426 pl_yylval.ival = 1;
850e8516
RGS
7427 }
7428 else
6154021b 7429 pl_yylval.ival = 0;
378cc40b 7430 OPERATOR(DO);
79072805
LW
7431
7432 case KEY_die:
3280af22 7433 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7434 LOP(OP_DIE,XTERM);
79072805
LW
7435
7436 case KEY_defined:
7437 UNI(OP_DEFINED);
7438
7439 case KEY_delete:
a0d0e21e 7440 UNI(OP_DELETE);
79072805
LW
7441
7442 case KEY_dbmopen:
74e8ce34
NC
7443 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7444 STR_WITH_LEN("NDBM_File::"),
7445 STR_WITH_LEN("DB_File::"),
7446 STR_WITH_LEN("GDBM_File::"),
7447 STR_WITH_LEN("SDBM_File::"),
7448 STR_WITH_LEN("ODBM_File::"),
7449 NULL);
a0d0e21e 7450 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7451
7452 case KEY_dbmclose:
7453 UNI(OP_DBMCLOSE);
7454
7455 case KEY_dump:
a0d0e21e 7456 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7457 LOOPX(OP_DUMP);
7458
7459 case KEY_else:
7460 PREBLOCK(ELSE);
7461
7462 case KEY_elsif:
6154021b 7463 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7464 OPERATOR(ELSIF);
7465
7466 case KEY_eq:
78cdf107
Z
7467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7468 return REPORT(0);
79072805
LW
7469 Eop(OP_SEQ);
7470
a0d0e21e
LW
7471 case KEY_exists:
7472 UNI(OP_EXISTS);
4e553d73 7473
79072805 7474 case KEY_exit:
5db06880
NC
7475 if (PL_madskills)
7476 UNI(OP_INT);
79072805
LW
7477 UNI(OP_EXIT);
7478
7479 case KEY_eval:
29595ff2 7480 s = SKIPSPACE1(s);
32e2a35d
RGS
7481 if (*s == '{') { /* block eval */
7482 PL_expect = XTERMBLOCK;
7483 UNIBRACK(OP_ENTERTRY);
7484 }
7485 else { /* string eval */
7486 PL_expect = XTERM;
7487 UNIBRACK(OP_ENTEREVAL);
7488 }
79072805 7489
7d789282
FC
7490 case KEY_evalbytes:
7491 PL_expect = XTERM;
7492 UNIBRACK(-OP_ENTEREVAL);
7493
79072805
LW
7494 case KEY_eof:
7495 UNI(OP_EOF);
7496
7497 case KEY_exp:
7498 UNI(OP_EXP);
7499
7500 case KEY_each:
7501 UNI(OP_EACH);
7502
7503 case KEY_exec:
a0d0e21e 7504 LOP(OP_EXEC,XREF);
79072805
LW
7505
7506 case KEY_endhostent:
7507 FUN0(OP_EHOSTENT);
7508
7509 case KEY_endnetent:
7510 FUN0(OP_ENETENT);
7511
7512 case KEY_endservent:
7513 FUN0(OP_ESERVENT);
7514
7515 case KEY_endprotoent:
7516 FUN0(OP_EPROTOENT);
7517
7518 case KEY_endpwent:
7519 FUN0(OP_EPWENT);
7520
7521 case KEY_endgrent:
7522 FUN0(OP_EGRENT);
7523
7524 case KEY_for:
7525 case KEY_foreach:
78cdf107
Z
7526 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7527 return REPORT(0);
6154021b 7528 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7529 s = SKIPSPACE1(s);
7e2040f0 7530 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7531 char *p = s;
5db06880
NC
7532#ifdef PERL_MAD
7533 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7534#endif
7535
3280af22 7536 if ((PL_bufend - p) >= 3 &&
55497cff 7537 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7538 p += 2;
77ca0c92
LW
7539 else if ((PL_bufend - p) >= 4 &&
7540 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7541 p += 3;
29595ff2 7542 p = PEEKSPACE(p);
7e2040f0 7543 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7544 p = scan_ident(p, PL_bufend,
7545 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7546 p = PEEKSPACE(p);
77ca0c92
LW
7547 }
7548 if (*p != '$')
cea2e8a9 7549 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7550#ifdef PERL_MAD
7551 s = SvPVX(PL_linestr) + soff;
7552#endif
55497cff 7553 }
79072805
LW
7554 OPERATOR(FOR);
7555
7556 case KEY_formline:
a0d0e21e 7557 LOP(OP_FORMLINE,XTERM);
79072805
LW
7558
7559 case KEY_fork:
7560 FUN0(OP_FORK);
7561
838f2281
BF
7562 case KEY_fc:
7563 UNI(OP_FC);
7564
79072805 7565 case KEY_fcntl:
a0d0e21e 7566 LOP(OP_FCNTL,XTERM);
79072805
LW
7567
7568 case KEY_fileno:
7569 UNI(OP_FILENO);
7570
7571 case KEY_flock:
a0d0e21e 7572 LOP(OP_FLOCK,XTERM);
79072805
LW
7573
7574 case KEY_gt:
78cdf107
Z
7575 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7576 return REPORT(0);
79072805
LW
7577 Rop(OP_SGT);
7578
7579 case KEY_ge:
78cdf107
Z
7580 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7581 return REPORT(0);
79072805
LW
7582 Rop(OP_SGE);
7583
7584 case KEY_grep:
2c38e13d 7585 LOP(OP_GREPSTART, XREF);
79072805
LW
7586
7587 case KEY_goto:
a0d0e21e 7588 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7589 LOOPX(OP_GOTO);
7590
7591 case KEY_gmtime:
7592 UNI(OP_GMTIME);
7593
7594 case KEY_getc:
6f33ba73 7595 UNIDOR(OP_GETC);
79072805
LW
7596
7597 case KEY_getppid:
7598 FUN0(OP_GETPPID);
7599
7600 case KEY_getpgrp:
7601 UNI(OP_GETPGRP);
7602
7603 case KEY_getpriority:
a0d0e21e 7604 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7605
7606 case KEY_getprotobyname:
7607 UNI(OP_GPBYNAME);
7608
7609 case KEY_getprotobynumber:
a0d0e21e 7610 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7611
7612 case KEY_getprotoent:
7613 FUN0(OP_GPROTOENT);
7614
7615 case KEY_getpwent:
7616 FUN0(OP_GPWENT);
7617
7618 case KEY_getpwnam:
ff68c719 7619 UNI(OP_GPWNAM);
79072805
LW
7620
7621 case KEY_getpwuid:
ff68c719 7622 UNI(OP_GPWUID);
79072805
LW
7623
7624 case KEY_getpeername:
7625 UNI(OP_GETPEERNAME);
7626
7627 case KEY_gethostbyname:
7628 UNI(OP_GHBYNAME);
7629
7630 case KEY_gethostbyaddr:
a0d0e21e 7631 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7632
7633 case KEY_gethostent:
7634 FUN0(OP_GHOSTENT);
7635
7636 case KEY_getnetbyname:
7637 UNI(OP_GNBYNAME);
7638
7639 case KEY_getnetbyaddr:
a0d0e21e 7640 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7641
7642 case KEY_getnetent:
7643 FUN0(OP_GNETENT);
7644
7645 case KEY_getservbyname:
a0d0e21e 7646 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7647
7648 case KEY_getservbyport:
a0d0e21e 7649 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7650
7651 case KEY_getservent:
7652 FUN0(OP_GSERVENT);
7653
7654 case KEY_getsockname:
7655 UNI(OP_GETSOCKNAME);
7656
7657 case KEY_getsockopt:
a0d0e21e 7658 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7659
7660 case KEY_getgrent:
7661 FUN0(OP_GGRENT);
7662
7663 case KEY_getgrnam:
ff68c719 7664 UNI(OP_GGRNAM);
79072805
LW
7665
7666 case KEY_getgrgid:
ff68c719 7667 UNI(OP_GGRGID);
79072805
LW
7668
7669 case KEY_getlogin:
7670 FUN0(OP_GETLOGIN);
7671
0d863452 7672 case KEY_given:
6154021b 7673 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7674 OPERATOR(GIVEN);
7675
93a17b20 7676 case KEY_glob:
d67594ff
FC
7677 LOP(
7678 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7679 XTERM
7680 );
93a17b20 7681
79072805
LW
7682 case KEY_hex:
7683 UNI(OP_HEX);
7684
7685 case KEY_if:
78cdf107
Z
7686 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7687 return REPORT(0);
6154021b 7688 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7689 OPERATOR(IF);
7690
7691 case KEY_index:
a0d0e21e 7692 LOP(OP_INDEX,XTERM);
79072805
LW
7693
7694 case KEY_int:
7695 UNI(OP_INT);
7696
7697 case KEY_ioctl:
a0d0e21e 7698 LOP(OP_IOCTL,XTERM);
79072805
LW
7699
7700 case KEY_join:
a0d0e21e 7701 LOP(OP_JOIN,XTERM);
79072805
LW
7702
7703 case KEY_keys:
7704 UNI(OP_KEYS);
7705
7706 case KEY_kill:
a0d0e21e 7707 LOP(OP_KILL,XTERM);
79072805
LW
7708
7709 case KEY_last:
a0d0e21e 7710 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7711 LOOPX(OP_LAST);
4e553d73 7712
79072805
LW
7713 case KEY_lc:
7714 UNI(OP_LC);
7715
7716 case KEY_lcfirst:
7717 UNI(OP_LCFIRST);
7718
7719 case KEY_local:
6154021b 7720 pl_yylval.ival = 0;
79072805
LW
7721 OPERATOR(LOCAL);
7722
7723 case KEY_length:
7724 UNI(OP_LENGTH);
7725
7726 case KEY_lt:
78cdf107
Z
7727 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7728 return REPORT(0);
79072805
LW
7729 Rop(OP_SLT);
7730
7731 case KEY_le:
78cdf107
Z
7732 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7733 return REPORT(0);
79072805
LW
7734 Rop(OP_SLE);
7735
7736 case KEY_localtime:
7737 UNI(OP_LOCALTIME);
7738
7739 case KEY_log:
7740 UNI(OP_LOG);
7741
7742 case KEY_link:
a0d0e21e 7743 LOP(OP_LINK,XTERM);
79072805
LW
7744
7745 case KEY_listen:
a0d0e21e 7746 LOP(OP_LISTEN,XTERM);
79072805 7747
c0329465
MB
7748 case KEY_lock:
7749 UNI(OP_LOCK);
7750
79072805
LW
7751 case KEY_lstat:
7752 UNI(OP_LSTAT);
7753
7754 case KEY_m:
8782bef2 7755 s = scan_pat(s,OP_MATCH);
79072805
LW
7756 TERM(sublex_start());
7757
a0d0e21e 7758 case KEY_map:
2c38e13d 7759 LOP(OP_MAPSTART, XREF);
4e4e412b 7760
79072805 7761 case KEY_mkdir:
a0d0e21e 7762 LOP(OP_MKDIR,XTERM);
79072805
LW
7763
7764 case KEY_msgctl:
a0d0e21e 7765 LOP(OP_MSGCTL,XTERM);
79072805
LW
7766
7767 case KEY_msgget:
a0d0e21e 7768 LOP(OP_MSGGET,XTERM);
79072805
LW
7769
7770 case KEY_msgrcv:
a0d0e21e 7771 LOP(OP_MSGRCV,XTERM);
79072805
LW
7772
7773 case KEY_msgsnd:
a0d0e21e 7774 LOP(OP_MSGSND,XTERM);
79072805 7775
77ca0c92 7776 case KEY_our:
93a17b20 7777 case KEY_my:
952306ac 7778 case KEY_state:
eac04b2e 7779 PL_in_my = (U16)tmp;
29595ff2 7780 s = SKIPSPACE1(s);
7e2040f0 7781 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7782#ifdef PERL_MAD
7783 char* start = s;
7784#endif
3280af22 7785 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7786 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7787 goto really_sub;
def3634b 7788 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7789 if (!PL_in_my_stash) {
c750a3ec 7790 char tmpbuf[1024];
3280af22 7791 PL_bufptr = s;
d9fad198 7792 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
3c54b17a 7793 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
c750a3ec 7794 }
5db06880
NC
7795#ifdef PERL_MAD
7796 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7797 sv_catsv(PL_thistoken, PL_nextwhite);
7798 PL_nextwhite = 0;
7799 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7800 }
7801#endif
c750a3ec 7802 }
6154021b 7803 pl_yylval.ival = 1;
55497cff 7804 OPERATOR(MY);
93a17b20 7805
79072805 7806 case KEY_next:
a0d0e21e 7807 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7808 LOOPX(OP_NEXT);
7809
7810 case KEY_ne:
78cdf107
Z
7811 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7812 return REPORT(0);
79072805
LW
7813 Eop(OP_SNE);
7814
a0d0e21e 7815 case KEY_no:
468aa647 7816 s = tokenize_use(0, s);
52d0e95b 7817 TERM(USE);
a0d0e21e
LW
7818
7819 case KEY_not:
29595ff2 7820 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7821 FUN1(OP_NOT);
78cdf107
Z
7822 else {
7823 if (!PL_lex_allbrackets &&
7824 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7825 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7826 OPERATOR(NOTOP);
78cdf107 7827 }
a0d0e21e 7828
79072805 7829 case KEY_open:
29595ff2 7830 s = SKIPSPACE1(s);
7e2040f0 7831 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7832 const char *t;
71aa9713
BF
7833 for (d = s; isALNUM_lazy_if(d,UTF);) {
7834 d += UTF ? UTF8SKIP(d) : 1;
7835 if (UTF) {
7836 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7837 d += UTF ? UTF8SKIP(d) : 1;
7838 }
7839 }
7840 }
c35e046a
AL
7841 for (t=d; isSPACE(*t);)
7842 t++;
e2ab214b 7843 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7844 /* [perl #16184] */
7845 && !(t[0] == '=' && t[1] == '>')
db3abe52 7846 && !(t[0] == ':' && t[1] == ':')
240d1b6f 7847 && !keyword(s, d-s, 0)
66fbe8fb 7848 ) {
71aa9713
BF
7849 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7850 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
9014280d 7851 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
71aa9713
BF
7852 "Precedence problem: open %"SVf" should be open(%"SVf")",
7853 SVfARG(tmpsv), SVfARG(tmpsv));
66fbe8fb 7854 }
93a17b20 7855 }
a0d0e21e 7856 LOP(OP_OPEN,XTERM);
79072805 7857
463ee0b2 7858 case KEY_or:
78cdf107
Z
7859 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7860 return REPORT(0);
6154021b 7861 pl_yylval.ival = OP_OR;
463ee0b2
LW
7862 OPERATOR(OROP);
7863
79072805
LW
7864 case KEY_ord:
7865 UNI(OP_ORD);
7866
7867 case KEY_oct:
7868 UNI(OP_OCT);
7869
7870 case KEY_opendir:
a0d0e21e 7871 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7872
7873 case KEY_print:
3280af22 7874 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7875 LOP(OP_PRINT,XREF);
79072805
LW
7876
7877 case KEY_printf:
3280af22 7878 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7879 LOP(OP_PRTF,XREF);
79072805 7880
c07a80fd 7881 case KEY_prototype:
7882 UNI(OP_PROTOTYPE);
7883
79072805 7884 case KEY_push:
a0d0e21e 7885 LOP(OP_PUSH,XTERM);
79072805
LW
7886
7887 case KEY_pop:
6f33ba73 7888 UNIDOR(OP_POP);
79072805 7889
a0d0e21e 7890 case KEY_pos:
6f33ba73 7891 UNIDOR(OP_POS);
4e553d73 7892
79072805 7893 case KEY_pack:
a0d0e21e 7894 LOP(OP_PACK,XTERM);
79072805
LW
7895
7896 case KEY_package:
a0d0e21e 7897 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7898 s = SKIPSPACE1(s);
91152fc1 7899 s = force_strict_version(s);
4e4da3ac 7900 PL_lex_expect = XBLOCK;
79072805
LW
7901 OPERATOR(PACKAGE);
7902
7903 case KEY_pipe:
a0d0e21e 7904 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7905
7906 case KEY_q:
d24ca0c5 7907 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 7908 if (!s)
d4c19fe8 7909 missingterm(NULL);
6154021b 7910 pl_yylval.ival = OP_CONST;
79072805
LW
7911 TERM(sublex_start());
7912
a0d0e21e
LW
7913 case KEY_quotemeta:
7914 UNI(OP_QUOTEMETA);
7915
ea25a9b2
Z
7916 case KEY_qw: {
7917 OP *words = NULL;
d24ca0c5 7918 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8990e307 7919 if (!s)
d4c19fe8 7920 missingterm(NULL);
3480a8d2 7921 PL_expect = XOPERATOR;
8127e0e3 7922 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
7923 int warned_comma = !ckWARN(WARN_QW);
7924 int warned_comment = warned_comma;
3280af22 7925 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7926 while (len) {
d4c19fe8
AL
7927 for (; isSPACE(*d) && len; --len, ++d)
7928 /**/;
8127e0e3 7929 if (len) {
d4c19fe8 7930 SV *sv;
f54cb97a 7931 const char *b = d;
7e03b518 7932 if (!warned_comma || !warned_comment) {
8127e0e3 7933 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 7934 if (!warned_comma && *d == ',') {
9014280d 7935 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7936 "Possible attempt to separate words with commas");
7e03b518 7937 ++warned_comma;
8127e0e3 7938 }
7e03b518 7939 else if (!warned_comment && *d == '#') {
9014280d 7940 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7941 "Possible attempt to put comments in qw() list");
7e03b518 7942 ++warned_comment;
8127e0e3
GS
7943 }
7944 }
7945 }
7946 else {
d4c19fe8
AL
7947 for (; !isSPACE(*d) && len; --len, ++d)
7948 /**/;
8127e0e3 7949 }
740cce10 7950 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7951 words = op_append_elem(OP_LIST, words,
7948272d 7952 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7953 }
7954 }
7955 }
ea25a9b2
Z
7956 if (!words)
7957 words = newNULLLIST();
37fd879b 7958 if (PL_lex_stuff) {
8127e0e3 7959 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7960 PL_lex_stuff = NULL;
37fd879b 7961 }
ea25a9b2
Z
7962 PL_expect = XOPERATOR;
7963 pl_yylval.opval = sawparens(words);
7964 TOKEN(QWLIST);
7965 }
8990e307 7966
79072805 7967 case KEY_qq:
d24ca0c5 7968 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 7969 if (!s)
d4c19fe8 7970 missingterm(NULL);
6154021b 7971 pl_yylval.ival = OP_STRINGIFY;
3280af22 7972 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7973 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7974 TERM(sublex_start());
7975
8782bef2
GB
7976 case KEY_qr:
7977 s = scan_pat(s,OP_QR);
7978 TERM(sublex_start());
7979
79072805 7980 case KEY_qx:
d24ca0c5 7981 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 7982 if (!s)
d4c19fe8 7983 missingterm(NULL);
9b201d7d 7984 readpipe_override();
79072805
LW
7985 TERM(sublex_start());
7986
7987 case KEY_return:
7988 OLDLOP(OP_RETURN);
7989
7990 case KEY_require:
29595ff2 7991 s = SKIPSPACE1(s);
e759cc13
RGS
7992 if (isDIGIT(*s)) {
7993 s = force_version(s, FALSE);
a7cb1f99 7994 }
e759cc13
RGS
7995 else if (*s != 'v' || !isDIGIT(s[1])
7996 || (s = force_version(s, TRUE), *s == 'v'))
7997 {
a7cb1f99
GS
7998 *PL_tokenbuf = '\0';
7999 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 8000 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
8001 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
8002 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
8003 else if (*s == '<')
8004 yyerror("<> should be quotes");
8005 }
a72a1c8b
RGS
8006 if (orig_keyword == KEY_require) {
8007 orig_keyword = 0;
6154021b 8008 pl_yylval.ival = 1;
a72a1c8b
RGS
8009 }
8010 else
6154021b 8011 pl_yylval.ival = 0;
a72a1c8b
RGS
8012 PL_expect = XTERM;
8013 PL_bufptr = s;
8014 PL_last_uni = PL_oldbufptr;
8015 PL_last_lop_op = OP_REQUIRE;
8016 s = skipspace(s);
8017 return REPORT( (int)REQUIRE );
79072805
LW
8018
8019 case KEY_reset:
8020 UNI(OP_RESET);
8021
8022 case KEY_redo:
a0d0e21e 8023 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
8024 LOOPX(OP_REDO);
8025
8026 case KEY_rename:
a0d0e21e 8027 LOP(OP_RENAME,XTERM);
79072805
LW
8028
8029 case KEY_rand:
8030 UNI(OP_RAND);
8031
8032 case KEY_rmdir:
8033 UNI(OP_RMDIR);
8034
8035 case KEY_rindex:
a0d0e21e 8036 LOP(OP_RINDEX,XTERM);
79072805
LW
8037
8038 case KEY_read:
a0d0e21e 8039 LOP(OP_READ,XTERM);
79072805
LW
8040
8041 case KEY_readdir:
8042 UNI(OP_READDIR);
8043
93a17b20 8044 case KEY_readline:
6f33ba73 8045 UNIDOR(OP_READLINE);
93a17b20
LW
8046
8047 case KEY_readpipe:
0858480c 8048 UNIDOR(OP_BACKTICK);
93a17b20 8049
79072805
LW
8050 case KEY_rewinddir:
8051 UNI(OP_REWINDDIR);
8052
8053 case KEY_recv:
a0d0e21e 8054 LOP(OP_RECV,XTERM);
79072805
LW
8055
8056 case KEY_reverse:
a0d0e21e 8057 LOP(OP_REVERSE,XTERM);
79072805
LW
8058
8059 case KEY_readlink:
6f33ba73 8060 UNIDOR(OP_READLINK);
79072805
LW
8061
8062 case KEY_ref:
8063 UNI(OP_REF);
8064
8065 case KEY_s:
8066 s = scan_subst(s);
6154021b 8067 if (pl_yylval.opval)
79072805
LW
8068 TERM(sublex_start());
8069 else
8070 TOKEN(1); /* force error */
8071
0d863452
RH
8072 case KEY_say:
8073 checkcomma(s,PL_tokenbuf,"filehandle");
8074 LOP(OP_SAY,XREF);
8075
a0d0e21e
LW
8076 case KEY_chomp:
8077 UNI(OP_CHOMP);
4e553d73 8078
79072805
LW
8079 case KEY_scalar:
8080 UNI(OP_SCALAR);
8081
8082 case KEY_select:
a0d0e21e 8083 LOP(OP_SELECT,XTERM);
79072805
LW
8084
8085 case KEY_seek:
a0d0e21e 8086 LOP(OP_SEEK,XTERM);
79072805
LW
8087
8088 case KEY_semctl:
a0d0e21e 8089 LOP(OP_SEMCTL,XTERM);
79072805
LW
8090
8091 case KEY_semget:
a0d0e21e 8092 LOP(OP_SEMGET,XTERM);
79072805
LW
8093
8094 case KEY_semop:
a0d0e21e 8095 LOP(OP_SEMOP,XTERM);
79072805
LW
8096
8097 case KEY_send:
a0d0e21e 8098 LOP(OP_SEND,XTERM);
79072805
LW
8099
8100 case KEY_setpgrp:
a0d0e21e 8101 LOP(OP_SETPGRP,XTERM);
79072805
LW
8102
8103 case KEY_setpriority:
a0d0e21e 8104 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
8105
8106 case KEY_sethostent:
ff68c719 8107 UNI(OP_SHOSTENT);
79072805
LW
8108
8109 case KEY_setnetent:
ff68c719 8110 UNI(OP_SNETENT);
79072805
LW
8111
8112 case KEY_setservent:
ff68c719 8113 UNI(OP_SSERVENT);
79072805
LW
8114
8115 case KEY_setprotoent:
ff68c719 8116 UNI(OP_SPROTOENT);
79072805
LW
8117
8118 case KEY_setpwent:
8119 FUN0(OP_SPWENT);
8120
8121 case KEY_setgrent:
8122 FUN0(OP_SGRENT);
8123
8124 case KEY_seekdir:
a0d0e21e 8125 LOP(OP_SEEKDIR,XTERM);
79072805
LW
8126
8127 case KEY_setsockopt:
a0d0e21e 8128 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
8129
8130 case KEY_shift:
6f33ba73 8131 UNIDOR(OP_SHIFT);
79072805
LW
8132
8133 case KEY_shmctl:
a0d0e21e 8134 LOP(OP_SHMCTL,XTERM);
79072805
LW
8135
8136 case KEY_shmget:
a0d0e21e 8137 LOP(OP_SHMGET,XTERM);
79072805
LW
8138
8139 case KEY_shmread:
a0d0e21e 8140 LOP(OP_SHMREAD,XTERM);
79072805
LW
8141
8142 case KEY_shmwrite:
a0d0e21e 8143 LOP(OP_SHMWRITE,XTERM);
79072805
LW
8144
8145 case KEY_shutdown:
a0d0e21e 8146 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8147
8148 case KEY_sin:
8149 UNI(OP_SIN);
8150
8151 case KEY_sleep:
8152 UNI(OP_SLEEP);
8153
8154 case KEY_socket:
a0d0e21e 8155 LOP(OP_SOCKET,XTERM);
79072805
LW
8156
8157 case KEY_socketpair:
a0d0e21e 8158 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8159
8160 case KEY_sort:
3280af22 8161 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8162 s = SKIPSPACE1(s);
3280af22 8163 PL_expect = XTERM;
15f0808c 8164 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 8165 LOP(OP_SORT,XREF);
79072805
LW
8166
8167 case KEY_split:
a0d0e21e 8168 LOP(OP_SPLIT,XTERM);
79072805
LW
8169
8170 case KEY_sprintf:
a0d0e21e 8171 LOP(OP_SPRINTF,XTERM);
79072805
LW
8172
8173 case KEY_splice:
a0d0e21e 8174 LOP(OP_SPLICE,XTERM);
79072805
LW
8175
8176 case KEY_sqrt:
8177 UNI(OP_SQRT);
8178
8179 case KEY_srand:
8180 UNI(OP_SRAND);
8181
8182 case KEY_stat:
8183 UNI(OP_STAT);
8184
8185 case KEY_study:
79072805
LW
8186 UNI(OP_STUDY);
8187
8188 case KEY_substr:
a0d0e21e 8189 LOP(OP_SUBSTR,XTERM);
79072805
LW
8190
8191 case KEY_format:
8192 case KEY_sub:
93a17b20 8193 really_sub:
09bef843 8194 {
3280af22 8195 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 8196 SSize_t tboffset = 0;
09bef843 8197 expectation attrful;
28cc6278 8198 bool have_name, have_proto;
f54cb97a 8199 const int key = tmp;
09bef843 8200
5db06880
NC
8201#ifdef PERL_MAD
8202 SV *tmpwhite = 0;
8203
cd81e915 8204 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
af9f5953 8205 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
cd81e915 8206 PL_thistoken = 0;
5db06880
NC
8207
8208 d = s;
8209 s = SKIPSPACE2(s,tmpwhite);
8210#else
09bef843 8211 s = skipspace(s);
5db06880 8212#endif
09bef843 8213
7e2040f0 8214 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8215 (*s == ':' && s[1] == ':'))
8216 {
5db06880 8217#ifdef PERL_MAD
4f61fd4b 8218 SV *nametoke = NULL;
5db06880
NC
8219#endif
8220
09bef843
SB
8221 PL_expect = XBLOCK;
8222 attrful = XATTRBLOCK;
b1b65b59
JH
8223 /* remember buffer pos'n for later force_word */
8224 tboffset = s - PL_oldbufptr;
09bef843 8225 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
8226#ifdef PERL_MAD
8227 if (PL_madskills)
af9f5953 8228 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8229#endif
6502358f
NC
8230 if (memchr(tmpbuf, ':', len))
8231 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8232 else {
8233 sv_setsv(PL_subname,PL_curstname);
396482e1 8234 sv_catpvs(PL_subname,"::");
09bef843
SB
8235 sv_catpvn(PL_subname,tmpbuf,len);
8236 }
af9f5953
BF
8237 if (SvUTF8(PL_linestr))
8238 SvUTF8_on(PL_subname);
09bef843 8239 have_name = TRUE;
5db06880
NC
8240
8241#ifdef PERL_MAD
8242
8243 start_force(0);
8244 CURMAD('X', nametoke);
8245 CURMAD('_', tmpwhite);
8246 (void) force_word(PL_oldbufptr + tboffset, WORD,
8247 FALSE, TRUE, TRUE);
8248
8249 s = SKIPSPACE2(d,tmpwhite);
8250#else
8251 s = skipspace(d);
8252#endif
09bef843 8253 }
463ee0b2 8254 else {
09bef843
SB
8255 if (key == KEY_my)
8256 Perl_croak(aTHX_ "Missing name in \"my sub\"");
8257 PL_expect = XTERMBLOCK;
8258 attrful = XATTRTERM;
76f68e9b 8259 sv_setpvs(PL_subname,"?");
09bef843 8260 have_name = FALSE;
463ee0b2 8261 }
4633a7c4 8262
09bef843 8263 if (key == KEY_format) {
5db06880 8264#ifdef PERL_MAD
cd81e915 8265 PL_thistoken = subtoken;
5db06880
NC
8266 s = d;
8267#else
09bef843 8268 if (have_name)
b1b65b59
JH
8269 (void) force_word(PL_oldbufptr + tboffset, WORD,
8270 FALSE, TRUE, TRUE);
5db06880 8271#endif
64a40898 8272 PREBLOCK(FORMAT);
09bef843 8273 }
79072805 8274
09bef843
SB
8275 /* Look for a prototype */
8276 if (*s == '(') {
d9f2850e
RGS
8277 char *p;
8278 bool bad_proto = FALSE;
9e8d7757
RB
8279 bool in_brackets = FALSE;
8280 char greedy_proto = ' ';
8281 bool proto_after_greedy_proto = FALSE;
8282 bool must_be_last = FALSE;
8283 bool underscore = FALSE;
aef2a98a 8284 bool seen_underscore = FALSE;
197afce1 8285 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8286 STRLEN tmplen;
09bef843 8287
d24ca0c5 8288 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
37fd879b 8289 if (!s)
09bef843 8290 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8291 /* strip spaces and check for bad characters */
dab1c735 8292 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8293 tmp = 0;
dab1c735 8294 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8295 if (!isSPACE(*p)) {
dab1c735 8296 d[tmp++] = *p;
9e8d7757 8297
197afce1 8298 if (warnillegalproto) {
9e8d7757
RB
8299 if (must_be_last)
8300 proto_after_greedy_proto = TRUE;
dab1c735 8301 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8302 bad_proto = TRUE;
8303 }
8304 else {
8305 if ( underscore ) {
34daab0f 8306 if ( !strchr(";@%", *p) )
9e8d7757
RB
8307 bad_proto = TRUE;
8308 underscore = FALSE;
8309 }
8310 if ( *p == '[' ) {
8311 in_brackets = TRUE;
8312 }
8313 else if ( *p == ']' ) {
8314 in_brackets = FALSE;
8315 }
8316 else if ( (*p == '@' || *p == '%') &&
8317 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8318 !in_brackets ) {
8319 must_be_last = TRUE;
8320 greedy_proto = *p;
8321 }
8322 else if ( *p == '_' ) {
aef2a98a 8323 underscore = seen_underscore = TRUE;
9e8d7757
RB
8324 }
8325 }
8326 }
d37a9538 8327 }
09bef843 8328 }
dab1c735 8329 d[tmp] = '\0';
9e8d7757 8330 if (proto_after_greedy_proto)
197afce1 8331 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8332 "Prototype after '%c' for %"SVf" : %s",
8333 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8334 if (bad_proto) {
8335 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8336 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8337 "Illegal character %sin prototype for %"SVf" : %s",
8338 seen_underscore ? "after '_' " : "",
dab1c735 8339 SVfARG(PL_subname),
97eb901d
BF
8340 SvUTF8(PL_lex_stuff)
8341 ? sv_uni_display(dsv,
8342 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8343 tmp,
8344 UNI_DISPLAY_ISPRINT)
8345 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8346 PERL_PV_ESCAPE_NONASCII));
dab1c735
BF
8347 }
8348 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8349 have_proto = TRUE;
68dc0745 8350
5db06880
NC
8351#ifdef PERL_MAD
8352 start_force(0);
cd81e915 8353 CURMAD('q', PL_thisopen);
5db06880 8354 CURMAD('_', tmpwhite);
cd81e915
NC
8355 CURMAD('=', PL_thisstuff);
8356 CURMAD('Q', PL_thisclose);
5db06880
NC
8357 NEXTVAL_NEXTTOKE.opval =
8358 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8359 PL_lex_stuff = NULL;
5db06880
NC
8360 force_next(THING);
8361
8362 s = SKIPSPACE2(s,tmpwhite);
8363#else
09bef843 8364 s = skipspace(s);
5db06880 8365#endif
4633a7c4 8366 }
09bef843
SB
8367 else
8368 have_proto = FALSE;
8369
8370 if (*s == ':' && s[1] != ':')
8371 PL_expect = attrful;
8e742a20
MHM
8372 else if (*s != '{' && key == KEY_sub) {
8373 if (!have_name)
8374 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8375 else if (*s != ';' && *s != '}')
be2597df 8376 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8377 }
09bef843 8378
5db06880
NC
8379#ifdef PERL_MAD
8380 start_force(0);
8381 if (tmpwhite) {
8382 if (PL_madskills)
6b29d1f5 8383 curmad('^', newSVpvs(""));
5db06880
NC
8384 CURMAD('_', tmpwhite);
8385 }
8386 force_next(0);
8387
cd81e915 8388 PL_thistoken = subtoken;
5db06880 8389#else
09bef843 8390 if (have_proto) {
9ded7720 8391 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8392 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8393 PL_lex_stuff = NULL;
09bef843 8394 force_next(THING);
68dc0745 8395 }
5db06880 8396#endif
09bef843 8397 if (!have_name) {
49a54bbe
NC
8398 if (PL_curstash)
8399 sv_setpvs(PL_subname, "__ANON__");
8400 else
8401 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8402 TOKEN(ANONSUB);
4633a7c4 8403 }
5db06880 8404#ifndef PERL_MAD
b1b65b59
JH
8405 (void) force_word(PL_oldbufptr + tboffset, WORD,
8406 FALSE, TRUE, TRUE);
5db06880 8407#endif
09bef843
SB
8408 if (key == KEY_my)
8409 TOKEN(MYSUB);
8410 TOKEN(SUB);
4633a7c4 8411 }
79072805
LW
8412
8413 case KEY_system:
a0d0e21e 8414 LOP(OP_SYSTEM,XREF);
79072805
LW
8415
8416 case KEY_symlink:
a0d0e21e 8417 LOP(OP_SYMLINK,XTERM);
79072805
LW
8418
8419 case KEY_syscall:
a0d0e21e 8420 LOP(OP_SYSCALL,XTERM);
79072805 8421
c07a80fd 8422 case KEY_sysopen:
8423 LOP(OP_SYSOPEN,XTERM);
8424
137443ea 8425 case KEY_sysseek:
8426 LOP(OP_SYSSEEK,XTERM);
8427
79072805 8428 case KEY_sysread:
a0d0e21e 8429 LOP(OP_SYSREAD,XTERM);
79072805
LW
8430
8431 case KEY_syswrite:
a0d0e21e 8432 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8433
8434 case KEY_tr:
8435 s = scan_trans(s);
8436 TERM(sublex_start());
8437
8438 case KEY_tell:
8439 UNI(OP_TELL);
8440
8441 case KEY_telldir:
8442 UNI(OP_TELLDIR);
8443
463ee0b2 8444 case KEY_tie:
a0d0e21e 8445 LOP(OP_TIE,XTERM);
463ee0b2 8446
c07a80fd 8447 case KEY_tied:
8448 UNI(OP_TIED);
8449
79072805
LW
8450 case KEY_time:
8451 FUN0(OP_TIME);
8452
8453 case KEY_times:
8454 FUN0(OP_TMS);
8455
8456 case KEY_truncate:
a0d0e21e 8457 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8458
8459 case KEY_uc:
8460 UNI(OP_UC);
8461
8462 case KEY_ucfirst:
8463 UNI(OP_UCFIRST);
8464
463ee0b2
LW
8465 case KEY_untie:
8466 UNI(OP_UNTIE);
8467
79072805 8468 case KEY_until:
78cdf107
Z
8469 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8470 return REPORT(0);
6154021b 8471 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8472 OPERATOR(UNTIL);
8473
8474 case KEY_unless:
78cdf107
Z
8475 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8476 return REPORT(0);
6154021b 8477 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8478 OPERATOR(UNLESS);
8479
8480 case KEY_unlink:
a0d0e21e 8481 LOP(OP_UNLINK,XTERM);
79072805
LW
8482
8483 case KEY_undef:
6f33ba73 8484 UNIDOR(OP_UNDEF);
79072805
LW
8485
8486 case KEY_unpack:
a0d0e21e 8487 LOP(OP_UNPACK,XTERM);
79072805
LW
8488
8489 case KEY_utime:
a0d0e21e 8490 LOP(OP_UTIME,XTERM);
79072805
LW
8491
8492 case KEY_umask:
6f33ba73 8493 UNIDOR(OP_UMASK);
79072805
LW
8494
8495 case KEY_unshift:
a0d0e21e
LW
8496 LOP(OP_UNSHIFT,XTERM);
8497
8498 case KEY_use:
468aa647 8499 s = tokenize_use(1, s);
a0d0e21e 8500 OPERATOR(USE);
79072805
LW
8501
8502 case KEY_values:
8503 UNI(OP_VALUES);
8504
8505 case KEY_vec:
a0d0e21e 8506 LOP(OP_VEC,XTERM);
79072805 8507
0d863452 8508 case KEY_when:
78cdf107
Z
8509 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8510 return REPORT(0);
6154021b 8511 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8512 OPERATOR(WHEN);
8513
79072805 8514 case KEY_while:
78cdf107
Z
8515 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8516 return REPORT(0);
6154021b 8517 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8518 OPERATOR(WHILE);
8519
8520 case KEY_warn:
3280af22 8521 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8522 LOP(OP_WARN,XTERM);
79072805
LW
8523
8524 case KEY_wait:
8525 FUN0(OP_WAIT);
8526
8527 case KEY_waitpid:
a0d0e21e 8528 LOP(OP_WAITPID,XTERM);
79072805
LW
8529
8530 case KEY_wantarray:
8531 FUN0(OP_WANTARRAY);
8532
8533 case KEY_write:
9d116dd7
JH
8534#ifdef EBCDIC
8535 {
df3728a2
JH
8536 char ctl_l[2];
8537 ctl_l[0] = toCTRL('L');
8538 ctl_l[1] = '\0';
fafc274c 8539 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8540 }
8541#else
fafc274c
NC
8542 /* Make sure $^L is defined */
8543 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8544#endif
79072805
LW
8545 UNI(OP_ENTERWRITE);
8546
8547 case KEY_x:
78cdf107
Z
8548 if (PL_expect == XOPERATOR) {
8549 if (*s == '=' && !PL_lex_allbrackets &&
8550 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8551 return REPORT(0);
79072805 8552 Mop(OP_REPEAT);
78cdf107 8553 }
79072805
LW
8554 check_uni();
8555 goto just_a_word;
8556
a0d0e21e 8557 case KEY_xor:
78cdf107
Z
8558 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8559 return REPORT(0);
6154021b 8560 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8561 OPERATOR(OROP);
8562
79072805
LW
8563 case KEY_y:
8564 s = scan_trans(s);
8565 TERM(sublex_start());
8566 }
49dc05e3 8567 }}
79072805 8568}
bf4acbe4
GS
8569#ifdef __SC__
8570#pragma segment Main
8571#endif
79072805 8572
e930465f
JH
8573static int
8574S_pending_ident(pTHX)
8eceec63 8575{
97aff369 8576 dVAR;
bbd11bfc 8577 PADOFFSET tmp = 0;
8eceec63
SC
8578 /* pit holds the identifier we read and pending_ident is reset */
8579 char pit = PL_pending_ident;
9bde8eb0
NC
8580 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8581 /* All routes through this function want to know if there is a colon. */
c099d646 8582 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8583 PL_pending_ident = 0;
8584
cd81e915 8585 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8586 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8587 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8588
8589 /* if we're in a my(), we can't allow dynamics here.
8590 $foo'bar has already been turned into $foo::bar, so
8591 just check for colons.
8592
8593 if it's a legal name, the OP is a PADANY.
8594 */
8595 if (PL_in_my) {
8596 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8597 if (has_colon)
4bca4ee0 8598 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8eceec63 8599 "variable %s in \"our\"",
4bca4ee0 8600 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
bc9b26ca 8601 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8602 }
8603 else {
9bde8eb0 8604 if (has_colon)
58576270
BF
8605 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8606 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8607 UTF ? SVf_UTF8 : 0);
8eceec63 8608
6154021b 8609 pl_yylval.opval = newOP(OP_PADANY, 0);
bc9b26ca
BF
8610 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8611 UTF ? SVf_UTF8 : 0);
8eceec63
SC
8612 return PRIVATEREF;
8613 }
8614 }
8615
8616 /*
8617 build the ops for accesses to a my() variable.
8eceec63
SC
8618 */
8619
9bde8eb0 8620 if (!has_colon) {
8716503d 8621 if (!PL_in_my)
bc9b26ca
BF
8622 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8623 UTF ? SVf_UTF8 : 0);
8716503d 8624 if (tmp != NOT_IN_PAD) {
8eceec63 8625 /* might be an "our" variable" */
00b1698f 8626 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8627 /* build ops for a bareword */
b64e5050
AL
8628 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8629 HEK * const stashname = HvNAME_HEK(stash);
8630 SV * const sym = newSVhek(stashname);
396482e1 8631 sv_catpvs(sym, "::");
2a33114a 8632 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
6154021b
RGS
8633 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8634 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8635 gv_fetchsv(sym,
8eceec63
SC
8636 (PL_in_eval
8637 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8638 : GV_ADDMULTI
8eceec63
SC
8639 ),
8640 ((PL_tokenbuf[0] == '$') ? SVt_PV
8641 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8642 : SVt_PVHV));
8643 return WORD;
8644 }
8645
6154021b
RGS
8646 pl_yylval.opval = newOP(OP_PADANY, 0);
8647 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8648 return PRIVATEREF;
8649 }
8650 }
8651
8652 /*
8653 Whine if they've said @foo in a doublequoted string,
8654 and @foo isn't a variable we can find in the symbol
8655 table.
8656 */
d824713b
NC
8657 if (ckWARN(WARN_AMBIGUOUS) &&
8658 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8659 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8660 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8661 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8662 /* DO NOT warn for @- and @+ */
8663 && !( PL_tokenbuf[2] == '\0' &&
8664 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8665 )
8eceec63
SC
8666 {
8667 /* Downgraded from fatal to warning 20000522 mjd */
d824713b 8668 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
29fb1d0e
BF
8669 "Possible unintended interpolation of %"SVf" in string",
8670 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8671 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8eceec63
SC
8672 }
8673 }
8674
8675 /* build ops for a bareword */
0be4d16f
BF
8676 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8677 tokenbuf_len - 1,
8678 UTF ? SVf_UTF8 : 0 ));
6154021b 8679 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7 8680 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8681 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8682 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8683 ((PL_tokenbuf[0] == '$') ? SVt_PV
8684 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8685 : SVt_PVHV));
8eceec63
SC
8686 return WORD;
8687}
8688
76e3520e 8689STATIC void
c94115d8 8690S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8691{
97aff369 8692 dVAR;
2f3197b3 8693
7918f24d
NC
8694 PERL_ARGS_ASSERT_CHECKCOMMA;
8695
d008e5eb 8696 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8697 if (ckWARN(WARN_SYNTAX)) {
8698 int level = 1;
26ff0806 8699 const char *w;
d008e5eb
GS
8700 for (w = s+2; *w && level; w++) {
8701 if (*w == '(')
8702 ++level;
8703 else if (*w == ')')
8704 --level;
8705 }
888fea98
NC
8706 while (isSPACE(*w))
8707 ++w;
b1439985
RGS
8708 /* the list of chars below is for end of statements or
8709 * block / parens, boolean operators (&&, ||, //) and branch
8710 * constructs (or, and, if, until, unless, while, err, for).
8711 * Not a very solid hack... */
8712 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8713 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8714 "%s (...) interpreted as function",name);
d008e5eb 8715 }
2f3197b3 8716 }
3280af22 8717 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8718 s++;
a687059c
LW
8719 if (*s == '(')
8720 s++;
3280af22 8721 while (s < PL_bufend && isSPACE(*s))
a687059c 8722 s++;
7e2040f0 8723 if (isIDFIRST_lazy_if(s,UTF)) {
d0fb66e4
BF
8724 const char * const w = s;
8725 s += UTF ? UTF8SKIP(s) : 1;
7e2040f0 8726 while (isALNUM_lazy_if(s,UTF))
d0fb66e4 8727 s += UTF ? UTF8SKIP(s) : 1;
3280af22 8728 while (s < PL_bufend && isSPACE(*s))
a687059c 8729 s++;
e929a76b 8730 if (*s == ',') {
c94115d8 8731 GV* gv;
5458a98a 8732 if (keyword(w, s - w, 0))
e929a76b 8733 return;
c94115d8 8734
2e38bce1 8735 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 8736 if (gv && GvCVu(gv))
abbb3198 8737 return;
cea2e8a9 8738 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8739 }
8740 }
8741}
8742
423cee85
JH
8743/* Either returns sv, or mortalizes sv and returns a new SV*.
8744 Best used as sv=new_constant(..., sv, ...).
8745 If s, pv are NULL, calls subroutine with one argument,
8746 and type is used with error messages only. */
8747
b3ac6de7 8748STATIC SV *
eb0d8d16
NC
8749S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8750 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8751{
27da23d5 8752 dVAR; dSP;
fbb93542 8753 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8754 SV *res;
b3ac6de7
IZ
8755 SV **cvp;
8756 SV *cv, *typesv;
89e33a05 8757 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8758
7918f24d
NC
8759 PERL_ARGS_ASSERT_NEW_CONSTANT;
8760
f8988b41
KW
8761 /* charnames doesn't work well if there have been errors found */
8762 if (PL_error_count > 0 && strEQ(key,"charnames"))
8763 return &PL_sv_undef;
8764
fbb93542
KW
8765 if (!table
8766 || ! (PL_hints & HINT_LOCALIZE_HH)
8767 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8768 || ! SvOK(*cvp))
8769 {
423cee85
JH
8770 SV *msg;
8771
fbb93542
KW
8772 /* Here haven't found what we're looking for. If it is charnames,
8773 * perhaps it needs to be loaded. Try doing that before giving up */
8774 if (strEQ(key,"charnames")) {
8775 Perl_load_module(aTHX_
8776 0,
8777 newSVpvs("_charnames"),
8778 /* version parameter; no need to specify it, as if
8779 * we get too early a version, will fail anyway,
8780 * not being able to find '_charnames' */
8781 NULL,
8782 newSVpvs(":full"),
8783 newSVpvs(":short"),
8784 NULL);
8785 SPAGAIN;
8786 table = GvHV(PL_hintgv);
8787 if (table
8788 && (PL_hints & HINT_LOCALIZE_HH)
8789 && (cvp = hv_fetch(table, key, keylen, FALSE))
8790 && SvOK(*cvp))
8791 {
8792 goto now_ok;
8793 }
8794 }
8795 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8796 msg = Perl_newSVpvf(aTHX_
8797 "Constant(%s) unknown", (type ? type: "undef"));
8798 }
8799 else {
8800 why1 = "$^H{";
8801 why2 = key;
8802 why3 = "} is not defined";
423cee85 8803 report:
4e553d73 8804 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8805 (type ? type: "undef"), why1, why2, why3);
fbb93542 8806 }
95a20fc0 8807 yyerror(SvPVX_const(msg));
423cee85
JH
8808 SvREFCNT_dec(msg);
8809 return sv;
8810 }
fbb93542 8811now_ok:
b3ac6de7
IZ
8812 sv_2mortal(sv); /* Parent created it permanently */
8813 cv = *cvp;
423cee85 8814 if (!pv && s)
59cd0e26 8815 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8816 if (type && pv)
59cd0e26 8817 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8818 else
423cee85 8819 typesv = &PL_sv_undef;
4e553d73 8820
e788e7d3 8821 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8822 ENTER ;
8823 SAVETMPS;
4e553d73 8824
423cee85 8825 PUSHMARK(SP) ;
a5845cb7 8826 EXTEND(sp, 3);
423cee85
JH
8827 if (pv)
8828 PUSHs(pv);
b3ac6de7 8829 PUSHs(sv);
423cee85
JH
8830 if (pv)
8831 PUSHs(typesv);
b3ac6de7 8832 PUTBACK;
423cee85 8833 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8834
423cee85 8835 SPAGAIN ;
4e553d73 8836
423cee85 8837 /* Check the eval first */
9b0e499b 8838 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8839 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8840 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8841 (void)POPs;
b37c2d43 8842 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8843 }
8844 else {
8845 res = POPs;
b37c2d43 8846 SvREFCNT_inc_simple_void(res);
423cee85 8847 }
4e553d73 8848
423cee85
JH
8849 PUTBACK ;
8850 FREETMPS ;
8851 LEAVE ;
b3ac6de7 8852 POPSTACK;
4e553d73 8853
b3ac6de7 8854 if (!SvOK(res)) {
423cee85
JH
8855 why1 = "Call to &{$^H{";
8856 why2 = key;
f0af216f 8857 why3 = "}} did not return a defined value";
423cee85
JH
8858 sv = res;
8859 goto report;
9b0e499b 8860 }
423cee85 8861
9b0e499b 8862 return res;
b3ac6de7 8863}
4e553d73 8864
d0a148a6
NC
8865/* Returns a NUL terminated string, with the length of the string written to
8866 *slp
8867 */
76e3520e 8868STATIC char *
cea2e8a9 8869S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8870{
97aff369 8871 dVAR;
eb578fdb
KW
8872 char *d = dest;
8873 char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8874
8875 PERL_ARGS_ASSERT_SCAN_WORD;
8876
463ee0b2 8877 for (;;) {
8903cb82 8878 if (d >= e)
cea2e8a9 8879 Perl_croak(aTHX_ ident_too_long);
5db1eb8d 8880 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
463ee0b2 8881 *d++ = *s++;
c35e046a 8882 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8883 *d++ = ':';
8884 *d++ = ':';
8885 s++;
8886 }
c35e046a 8887 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8888 *d++ = *s++;
8889 *d++ = *s++;
8890 }
fd400ab9 8891 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8892 char *t = s + UTF8SKIP(s);
c35e046a 8893 size_t len;
fd400ab9 8894 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8895 t += UTF8SKIP(t);
c35e046a
AL
8896 len = t - s;
8897 if (d + len > e)
cea2e8a9 8898 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8899 Copy(s, d, len, char);
8900 d += len;
a0ed51b3
LW
8901 s = t;
8902 }
463ee0b2
LW
8903 else {
8904 *d = '\0';
8905 *slp = d - dest;
8906 return s;
e929a76b 8907 }
378cc40b
LW
8908 }
8909}
8910
76e3520e 8911STATIC char *
f54cb97a 8912S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8913{
97aff369 8914 dVAR;
6136c704 8915 char *bracket = NULL;
748a9306 8916 char funny = *s++;
eb578fdb
KW
8917 char *d = dest;
8918 char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8919
7918f24d
NC
8920 PERL_ARGS_ASSERT_SCAN_IDENT;
8921
a0d0e21e 8922 if (isSPACE(*s))
29595ff2 8923 s = PEEKSPACE(s);
de3bb511 8924 if (isDIGIT(*s)) {
8903cb82 8925 while (isDIGIT(*s)) {
8926 if (d >= e)
cea2e8a9 8927 Perl_croak(aTHX_ ident_too_long);
378cc40b 8928 *d++ = *s++;
8903cb82 8929 }
378cc40b
LW
8930 }
8931 else {
463ee0b2 8932 for (;;) {
8903cb82 8933 if (d >= e)
cea2e8a9 8934 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8935 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8936 *d++ = *s++;
7e2040f0 8937 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8938 *d++ = ':';
8939 *d++ = ':';
8940 s++;
8941 }
a0d0e21e 8942 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8943 *d++ = *s++;
8944 *d++ = *s++;
8945 }
fd400ab9 8946 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8947 char *t = s + UTF8SKIP(s);
fd400ab9 8948 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8949 t += UTF8SKIP(t);
8950 if (d + (t - s) > e)
cea2e8a9 8951 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8952 Copy(s, d, t - s, char);
8953 d += t - s;
8954 s = t;
8955 }
463ee0b2
LW
8956 else
8957 break;
8958 }
378cc40b
LW
8959 }
8960 *d = '\0';
8961 d = dest;
79072805 8962 if (*d) {
3280af22
NIS
8963 if (PL_lex_state != LEX_NORMAL)
8964 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8965 return s;
378cc40b 8966 }
748a9306 8967 if (*s == '$' && s[1] &&
3792a11b 8968 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8969 {
4810e5ec 8970 return s;
5cd24f17 8971 }
79072805
LW
8972 if (*s == '{') {
8973 bracket = s;
8974 s++;
8975 }
204e6232
BF
8976 if (s < send) {
8977 if (UTF) {
8978 const STRLEN skip = UTF8SKIP(s);
8979 STRLEN i;
8980 d[skip] = '\0';
8981 for ( i = 0; i < skip; i++ )
8982 d[i] = *s++;
8983 }
8984 else {
8985 *d = *s++;
8986 d[1] = '\0';
8987 }
8988 }
2b92dfce 8989 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8990 *d = toCTRL(*s);
8991 s++;
de3bb511 8992 }
fbdd83da
DIM
8993 else if (ck_uni && !bracket)
8994 check_uni();
79072805 8995 if (bracket) {
748a9306 8996 if (isSPACE(s[-1])) {
fa83b5b6 8997 while (s < send) {
f54cb97a 8998 const char ch = *s++;
bf4acbe4 8999 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 9000 *d = ch;
9001 break;
9002 }
9003 }
748a9306 9004 }
7e2040f0 9005 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 9006 d += UTF8SKIP(d);
a0ed51b3 9007 if (UTF) {
6136c704
AL
9008 char *end = s;
9009 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
9010 end += UTF8SKIP(end);
9011 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
9012 end += UTF8SKIP(end);
a0ed51b3 9013 }
6136c704
AL
9014 Copy(s, d, end - s, char);
9015 d += end - s;
9016 s = end;
a0ed51b3
LW
9017 }
9018 else {
2b92dfce 9019 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 9020 *d++ = *s++;
2b92dfce 9021 if (d >= e)
cea2e8a9 9022 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 9023 }
79072805 9024 *d = '\0';
c35e046a
AL
9025 while (s < send && SPACE_OR_TAB(*s))
9026 s++;
ff68c719 9027 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 9028 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
9029 const char * const brack =
9030 (const char *)
9031 ((*s == '[') ? "[...]" : "{...}");
e850844c 9032 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 9033 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 9034 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
9035 funny, dest, brack, funny, dest, brack);
9036 }
79072805 9037 bracket++;
a0be28da 9038 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 9039 PL_lex_allbrackets++;
79072805
LW
9040 return s;
9041 }
4e553d73
NIS
9042 }
9043 /* Handle extended ${^Foo} variables
2b92dfce
GS
9044 * 1999-02-27 mjd-perl-patch@plover.com */
9045 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
9046 && isALNUM(*s))
9047 {
9048 d++;
9049 while (isALNUM(*s) && d < e) {
9050 *d++ = *s++;
9051 }
9052 if (d >= e)
cea2e8a9 9053 Perl_croak(aTHX_ ident_too_long);
2b92dfce 9054 *d = '\0';
79072805
LW
9055 }
9056 if (*s == '}') {
9057 s++;
7df0d042 9058 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 9059 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
9060 PL_expect = XREF;
9061 }
d008e5eb 9062 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 9063 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241 9064 (keyword(dest, d - dest, 0)
5c66c3dd 9065 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
d008e5eb 9066 {
5c66c3dd
BF
9067 SV *tmp = newSVpvn_flags( dest, d - dest,
9068 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
c35e046a
AL
9069 if (funny == '#')
9070 funny = '@';
9014280d 9071 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5c66c3dd
BF
9072 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9073 funny, tmp, funny, tmp);
d008e5eb
GS
9074 }
9075 }
79072805
LW
9076 }
9077 else {
9078 s = bracket; /* let the parser handle it */
93a17b20 9079 *dest = '\0';
79072805
LW
9080 }
9081 }
3280af22
NIS
9082 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9083 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
9084 return s;
9085}
9086
858a358b 9087static bool
3955e1a9 9088S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
9089
9090 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9091 * the parse starting at 's', based on the subset that are valid in this
9092 * context input to this routine in 'valid_flags'. Advances s. Returns
96f3bfda
KW
9093 * TRUE if the input should be treated as a valid flag, so the next char
9094 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9095 * first call on the current regex. This routine will set it to any
9096 * charset modifier found. The caller shouldn't change it. This way,
9097 * another charset modifier encountered in the parse can be detected as an
9098 * error, as we have decided to allow only one */
858a358b
KW
9099
9100 const char c = **s;
84159251 9101 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
94b03d7d 9102
84159251
BF
9103 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9104 if (isALNUM_lazy_if(*s, UTF)) {
4f8dbb2d 9105 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
84159251
BF
9106 UTF ? SVf_UTF8 : 0);
9107 (*s) += charlen;
96f3bfda
KW
9108 /* Pretend that it worked, so will continue processing before
9109 * dieing */
0da72d5e 9110 return TRUE;
858a358b
KW
9111 }
9112 return FALSE;
9113 }
9114
9115 switch (c) {
94b03d7d 9116
858a358b
KW
9117 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9118 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9119 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9120 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9121 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9122 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d 9123 case LOCALE_PAT_MOD:
3955e1a9
KW
9124 if (*charset) {
9125 goto multiple_charsets;
9126 }
94b03d7d 9127 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9128 *charset = c;
94b03d7d
KW
9129 break;
9130 case UNICODE_PAT_MOD:
3955e1a9
KW
9131 if (*charset) {
9132 goto multiple_charsets;
9133 }
94b03d7d 9134 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9135 *charset = c;
94b03d7d
KW
9136 break;
9137 case ASCII_RESTRICT_PAT_MOD:
ff3f26d2 9138 if (! *charset) {
94b03d7d
KW
9139 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9140 }
ff3f26d2
KW
9141 else {
9142
9143 /* Error if previous modifier wasn't an 'a', but if it was, see
9144 * if, and accept, a second occurrence (only) */
9145 if (*charset != 'a'
9146 || get_regex_charset(*pmfl)
9147 != REGEX_ASCII_RESTRICTED_CHARSET)
9148 {
9149 goto multiple_charsets;
9150 }
9151 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9152 }
9153 *charset = c;
94b03d7d
KW
9154 break;
9155 case DEPENDS_PAT_MOD:
3955e1a9
KW
9156 if (*charset) {
9157 goto multiple_charsets;
9158 }
94b03d7d 9159 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9160 *charset = c;
94b03d7d 9161 break;
879d0c72 9162 }
94b03d7d 9163
858a358b
KW
9164 (*s)++;
9165 return TRUE;
94b03d7d 9166
3955e1a9
KW
9167 multiple_charsets:
9168 if (*charset != c) {
9169 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9170 }
ff3f26d2
KW
9171 else if (c == 'a') {
9172 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9173 }
3955e1a9
KW
9174 else {
9175 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9176 }
9177
9178 /* Pretend that it worked, so will continue processing before dieing */
9179 (*s)++;
9180 return TRUE;
879d0c72
NC
9181}
9182
76e3520e 9183STATIC char *
cea2e8a9 9184S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9185{
97aff369 9186 dVAR;
79072805 9187 PMOP *pm;
d24ca0c5 9188 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
10edeb5d 9189 const char * const valid_flags =
a20207d7 9190 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9191 char charset = '\0'; /* character set modifier */
5db06880
NC
9192#ifdef PERL_MAD
9193 char *modstart;
9194#endif
9195
7918f24d 9196 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9197
d24ca0c5
DM
9198 /* this was only needed for the initial scan_str; set it to false
9199 * so that any (?{}) code blocks etc are parsed normally */
9200 PL_reg_state.re_reparsing = FALSE;
25c09cbf 9201 if (!s) {
6136c704 9202 const char * const delimiter = skipspace(start);
10edeb5d
JH
9203 Perl_croak(aTHX_
9204 (const char *)
9205 (*delimiter == '?'
9206 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9207 : "Search pattern not terminated" ));
25c09cbf 9208 }
bbce6d69 9209
8782bef2 9210 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9211 if (PL_multi_open == '?') {
9212 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9213 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9214
9215 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9216 allows us to restrict the list needed by reset to just the ??
9217 matches. */
9218 assert(type != OP_TRANS);
9219 if (PL_curstash) {
daba3364 9220 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9221 U32 elements;
9222 if (!mg) {
daba3364 9223 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9224 0);
9225 }
9226 elements = mg->mg_len / sizeof(PMOP**);
9227 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9228 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9229 mg->mg_len = elements * sizeof(PMOP**);
9230 PmopSTASH_set(pm,PL_curstash);
9231 }
9232 }
5db06880
NC
9233#ifdef PERL_MAD
9234 modstart = s;
9235#endif
d63c20f2
DM
9236
9237 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9238 * anon CV. False positives like qr/[(?{]/ are harmless */
9239
9240 if (type == OP_QR) {
6f635923
DM
9241 STRLEN len;
9242 char *e, *p = SvPV(PL_lex_stuff, len);
9243 e = p + len;
9244 for (; p < e; p++) {
d63c20f2
DM
9245 if (p[0] == '(' && p[1] == '?'
9246 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9247 {
9248 pm->op_pmflags |= PMf_HAS_CV;
9249 break;
9250 }
9251 }
6f635923 9252 pm->op_pmflags |= PMf_IS_QR;
d63c20f2
DM
9253 }
9254
3955e1a9 9255 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9256#ifdef PERL_MAD
9257 if (PL_madskills && modstart != s) {
9258 SV* tmptoken = newSVpvn(modstart, s - modstart);
9259 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9260 }
9261#endif
4ac733c9 9262 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9263 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9264 {
a2a5de95
NC
9265 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9266 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9267 }
9268
3280af22 9269 PL_lex_op = (OP*)pm;
6154021b 9270 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9271 return s;
9272}
9273
76e3520e 9274STATIC char *
cea2e8a9 9275S_scan_subst(pTHX_ char *start)
79072805 9276{
27da23d5 9277 dVAR;
22594288 9278 char *s;
eb578fdb 9279 PMOP *pm;
4fdae800 9280 I32 first_start;
79072805 9281 I32 es = 0;
3955e1a9 9282 char charset = '\0'; /* character set modifier */
5db06880
NC
9283#ifdef PERL_MAD
9284 char *modstart;
9285#endif
79072805 9286
7918f24d
NC
9287 PERL_ARGS_ASSERT_SCAN_SUBST;
9288
6154021b 9289 pl_yylval.ival = OP_NULL;
79072805 9290
d24ca0c5 9291 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 9292
37fd879b 9293 if (!s)
cea2e8a9 9294 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9295
3280af22 9296 if (s[-1] == PL_multi_open)
79072805 9297 s--;
5db06880
NC
9298#ifdef PERL_MAD
9299 if (PL_madskills) {
cd81e915
NC
9300 CURMAD('q', PL_thisopen);
9301 CURMAD('_', PL_thiswhite);
9302 CURMAD('E', PL_thisstuff);
9303 CURMAD('Q', PL_thisclose);
9304 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9305 }
9306#endif
79072805 9307
3280af22 9308 first_start = PL_multi_start;
d24ca0c5 9309 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9310 if (!s) {
37fd879b 9311 if (PL_lex_stuff) {
3280af22 9312 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9313 PL_lex_stuff = NULL;
37fd879b 9314 }
cea2e8a9 9315 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9316 }
3280af22 9317 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9318
79072805 9319 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9320
9321#ifdef PERL_MAD
9322 if (PL_madskills) {
cd81e915
NC
9323 CURMAD('z', PL_thisopen);
9324 CURMAD('R', PL_thisstuff);
9325 CURMAD('Z', PL_thisclose);
5db06880
NC
9326 }
9327 modstart = s;
9328#endif
9329
48c036b1 9330 while (*s) {
a20207d7 9331 if (*s == EXEC_PAT_MOD) {
a687059c 9332 s++;
2f3197b3 9333 es++;
a687059c 9334 }
3955e1a9
KW
9335 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9336 {
48c036b1 9337 break;
aa78b661 9338 }
378cc40b 9339 }
79072805 9340
5db06880
NC
9341#ifdef PERL_MAD
9342 if (PL_madskills) {
9343 if (modstart != s)
9344 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9345 append_madprops(PL_thismad, (OP*)pm, 0);
9346 PL_thismad = 0;
5db06880
NC
9347 }
9348#endif
a2a5de95
NC
9349 if ((pm->op_pmflags & PMf_CONTINUE)) {
9350 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9351 }
9352
79072805 9353 if (es) {
6136c704
AL
9354 SV * const repl = newSVpvs("");
9355
0244c3a4 9356 PL_multi_end = 0;
79072805 9357 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9358 while (es-- > 0) {
9359 if (es)
9360 sv_catpvs(repl, "eval ");
9361 else
9362 sv_catpvs(repl, "do ");
9363 }
6f43d98f 9364 sv_catpvs(repl, "{");
7cc34111
FC
9365 sv_catsv(repl, PL_sublex_info.repl);
9366 if (strchr(SvPVX(PL_sublex_info.repl), '#'))
9badc361
RGS
9367 sv_catpvs(repl, "\n");
9368 sv_catpvs(repl, "}");
25da4f38 9369 SvEVALED_on(repl);
7cc34111
FC
9370 SvREFCNT_dec(PL_sublex_info.repl);
9371 PL_sublex_info.repl = repl;
378cc40b 9372 }
79072805 9373
3280af22 9374 PL_lex_op = (OP*)pm;
6154021b 9375 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9376 return s;
9377}
9378
76e3520e 9379STATIC char *
cea2e8a9 9380S_scan_trans(pTHX_ char *start)
378cc40b 9381{
97aff369 9382 dVAR;
eb578fdb 9383 char* s;
11343788 9384 OP *o;
b84c11c8
NC
9385 U8 squash;
9386 U8 del;
9387 U8 complement;
bb16bae8 9388 bool nondestruct = 0;
5db06880
NC
9389#ifdef PERL_MAD
9390 char *modstart;
9391#endif
79072805 9392
7918f24d
NC
9393 PERL_ARGS_ASSERT_SCAN_TRANS;
9394
6154021b 9395 pl_yylval.ival = OP_NULL;
79072805 9396
d24ca0c5 9397 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
37fd879b 9398 if (!s)
cea2e8a9 9399 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9400
3280af22 9401 if (s[-1] == PL_multi_open)
2f3197b3 9402 s--;
5db06880
NC
9403#ifdef PERL_MAD
9404 if (PL_madskills) {
cd81e915
NC
9405 CURMAD('q', PL_thisopen);
9406 CURMAD('_', PL_thiswhite);
9407 CURMAD('E', PL_thisstuff);
9408 CURMAD('Q', PL_thisclose);
9409 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9410 }
9411#endif
2f3197b3 9412
d24ca0c5 9413 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9414 if (!s) {
37fd879b 9415 if (PL_lex_stuff) {
3280af22 9416 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9417 PL_lex_stuff = NULL;
37fd879b 9418 }
cea2e8a9 9419 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9420 }
5db06880 9421 if (PL_madskills) {
cd81e915
NC
9422 CURMAD('z', PL_thisopen);
9423 CURMAD('R', PL_thisstuff);
9424 CURMAD('Z', PL_thisclose);
5db06880 9425 }
79072805 9426
a0ed51b3 9427 complement = del = squash = 0;
5db06880
NC
9428#ifdef PERL_MAD
9429 modstart = s;
9430#endif
7a1e2023
NC
9431 while (1) {
9432 switch (*s) {
9433 case 'c':
79072805 9434 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9435 break;
9436 case 'd':
a0ed51b3 9437 del = OPpTRANS_DELETE;
7a1e2023
NC
9438 break;
9439 case 's':
79072805 9440 squash = OPpTRANS_SQUASH;
7a1e2023 9441 break;
bb16bae8
FC
9442 case 'r':
9443 nondestruct = 1;
9444 break;
7a1e2023
NC
9445 default:
9446 goto no_more;
9447 }
395c3793
LW
9448 s++;
9449 }
7a1e2023 9450 no_more:
8973db79 9451
9100eeb1 9452 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
59f00321
RGS
9453 o->op_private &= ~OPpTRANS_ALL;
9454 o->op_private |= del|squash|complement|
7948272d 9455 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
7cc34111 9456 (DO_UTF8(PL_sublex_info.repl) ? OPpTRANS_TO_UTF : 0);
79072805 9457
3280af22 9458 PL_lex_op = o;
bb16bae8 9459 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9460
9461#ifdef PERL_MAD
9462 if (PL_madskills) {
9463 if (modstart != s)
9464 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9465 append_madprops(PL_thismad, o, 0);
9466 PL_thismad = 0;
5db06880
NC
9467 }
9468#endif
9469
79072805
LW
9470 return s;
9471}
9472
5097bf9b
FC
9473/* scan_heredoc
9474 Takes a pointer to the first < in <<FOO.
9475 Returns a pointer to the byte following <<FOO.
9476
9477 This function scans a heredoc, which involves different methods
9478 depending on whether we are in a string eval, quoted construct, etc.
9479 This is because PL_linestr could containing a single line of input, or
9480 a whole string being evalled, or the contents of the current quote-
9481 like operator.
9482
9483 The three methods are:
9484 - Steal lines from the input stream (stream)
9485 - Scan the heredoc in PL_linestr and remove it therefrom (linestr)
99bd9d90 9486 - Peek at the PL_linestr of outer lexing scopes (peek)
5097bf9b
FC
9487
9488 They are used in these cases:
99bd9d90
FC
9489 file scope or filtered eval stream
9490 string eval linestr
9491 multiline quoted construct linestr
9492 single-line quoted construct in file stream
9493 single-line quoted construct in eval or quote peek
5097bf9b
FC
9494
9495 Single-line also applies to heredocs that begin on the last line of a
9496 quote-like operator.
99bd9d90
FC
9497
9498 Peeking within a quote also involves falling back to the stream method,
9499 if the outer quote-like operators are all on one line (or the heredoc
9500 marker is on the last line).
5097bf9b
FC
9501*/
9502
76e3520e 9503STATIC char *
cea2e8a9 9504S_scan_heredoc(pTHX_ register char *s)
79072805 9505{
97aff369 9506 dVAR;
79072805
LW
9507 SV *herewas;
9508 I32 op_type = OP_SCALAR;
9509 I32 len;
9510 SV *tmpstr;
9511 char term;
5097bf9b 9512 const char *found_newline = 0;
eb578fdb
KW
9513 char *d;
9514 char *e;
4633a7c4 9515 char *peek;
5097bf9b 9516 const bool infile = PL_rsfp || PL_parser->filtered;
5db06880
NC
9517#ifdef PERL_MAD
9518 I32 stuffstart = s - SvPVX(PL_linestr);
9519 char *tstart;
9520
cd81e915 9521 PL_realtokenstart = -1;
5db06880 9522#endif
79072805 9523
7918f24d
NC
9524 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9525
79072805 9526 s += 2;
5097bf9b 9527 d = PL_tokenbuf + 1;
3280af22 9528 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
5097bf9b 9529 *PL_tokenbuf = '\n';
c35e046a
AL
9530 peek = s;
9531 while (SPACE_OR_TAB(*peek))
9532 peek++;
3792a11b 9533 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9534 s = peek;
79072805 9535 term = *s++;
3280af22 9536 s = delimcpy(d, e, s, PL_bufend, term, &len);
6f2d7fc9
FC
9537 if (s == PL_bufend)
9538 Perl_croak(aTHX_ "Unterminated delimiter for here document");
fc36a67e 9539 d += len;
6f2d7fc9 9540 s++;
79072805
LW
9541 }
9542 else {
9543 if (*s == '\\')
458391bd 9544 /* <<\FOO is equivalent to <<'FOO' */
79072805
LW
9545 s++, term = '\'';
9546 else
9547 term = '"';
7e2040f0 9548 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9549 deprecate("bare << to mean <<\"\"");
7e2040f0 9550 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9551 if (d < e)
9552 *d++ = *s;
9553 }
9554 }
3280af22 9555 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9556 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9557 *d++ = '\n';
9558 *d = '\0';
3280af22 9559 len = d - PL_tokenbuf;
5db06880
NC
9560
9561#ifdef PERL_MAD
9562 if (PL_madskills) {
5097bf9b
FC
9563 tstart = PL_tokenbuf + 1;
9564 PL_thisclose = newSVpvn(tstart, len - 1);
5db06880 9565 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9566 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9567 stuffstart = s - SvPVX(PL_linestr);
9568 }
9569#endif
6a27c188 9570#ifndef PERL_STRICT_CR
f63a84b2
LW
9571 d = strchr(s, '\r');
9572 if (d) {
b464bac0 9573 char * const olds = s;
f63a84b2 9574 s = d;
3280af22 9575 while (s < PL_bufend) {
f63a84b2
LW
9576 if (*s == '\r') {
9577 *d++ = '\n';
9578 if (*++s == '\n')
9579 s++;
9580 }
9581 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9582 *d++ = *s++;
9583 s++;
9584 }
9585 else
9586 *d++ = *s++;
9587 }
9588 *d = '\0';
3280af22 9589 PL_bufend = d;
95a20fc0 9590 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9591 s = olds;
9592 }
9593#endif
5097bf9b
FC
9594 if ((infile && !PL_lex_inwhat)
9595 || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s))) {
73d840c0
AL
9596 herewas = newSVpvn(s,PL_bufend-s);
9597 }
9598 else {
5db06880
NC
9599#ifdef PERL_MAD
9600 herewas = newSVpvn(s-1,found_newline-s+1);
9601#else
73d840c0
AL
9602 s--;
9603 herewas = newSVpvn(s,found_newline-s);
5db06880 9604#endif
73d840c0 9605 }
5db06880
NC
9606#ifdef PERL_MAD
9607 if (PL_madskills) {
9608 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9609 if (PL_thisstuff)
9610 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9611 else
cd81e915 9612 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9613 }
9614#endif
79072805 9615 s += SvCUR(herewas);
748a9306 9616
5db06880
NC
9617#ifdef PERL_MAD
9618 stuffstart = s - SvPVX(PL_linestr);
9619
9620 if (found_newline)
9621 s--;
9622#endif
9623
7d0a29fe
NC
9624 tmpstr = newSV_type(SVt_PVIV);
9625 SvGROW(tmpstr, 80);
748a9306 9626 if (term == '\'') {
79072805 9627 op_type = OP_CONST;
45977657 9628 SvIV_set(tmpstr, -1);
748a9306
LW
9629 }
9630 else if (term == '`') {
79072805 9631 op_type = OP_BACKTICK;
45977657 9632 SvIV_set(tmpstr, '\\');
748a9306 9633 }
79072805
LW
9634
9635 CLINE;
57843af0 9636 PL_multi_start = CopLINE(PL_curcop);
3280af22 9637 PL_multi_open = PL_multi_close = '<';
99bd9d90
FC
9638 if (PL_lex_inwhat && !found_newline) {
9639 /* Peek into the line buffer of the parent lexing scope, going up
9640 as many levels as necessary to find one with a newline after
9641 bufptr. See the comments in sublex_push for how IVX and NVX
9642 are abused.
9643 */
9644 SV *linestr = NUM2PTR(SV *, SvNVX(PL_linestr));
9645 char *bufptr = PL_sublex_info.super_bufptr;
9646 char *bufend = SvEND(linestr);
b464bac0 9647 char * const olds = s - SvCUR(herewas);
99bd9d90
FC
9648 char * const real_olds = s;
9649 if (!bufptr) {
9650 s = real_olds;
9651 goto streaming;
9652 }
9653 while (!(s = (char *)memchr((void *)bufptr, '\n', bufend-bufptr))){
9654 if (SvIVX(linestr)) {
9655 bufptr = INT2PTR(char *, SvIVX(linestr));
9656 linestr = NUM2PTR(SV *, SvNVX(linestr));
9657 bufend = SvEND(linestr);
9658 }
9659 else if (infile) {
9660 s = real_olds;
9661 goto streaming;
9662 }
9663 else {
9664 s = bufend;
9665 break;
9666 }
9667 }
0244c3a4
GS
9668 d = s;
9669 while (s < bufend &&
5bd13da3 9670 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
0244c3a4 9671 if (*s++ == '\n')
57843af0 9672 CopLINE_inc(PL_curcop);
0244c3a4
GS
9673 }
9674 if (s >= bufend) {
eb160463 9675 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
5097bf9b 9676 missingterm(PL_tokenbuf + 1);
0244c3a4
GS
9677 }
9678 sv_setpvn(herewas,bufptr,d-bufptr+1);
9679 sv_setpvn(tmpstr,d+1,s-d);
9680 s += len - 1;
9681 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9682 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
99bd9d90
FC
9683 SvCUR_set(linestr,
9684 bufptr-SvPVX_const(linestr)
043cc6c6 9685 + SvCUR(herewas));
0244c3a4
GS
9686
9687 s = olds;
9688 goto retval;
9689 }
5097bf9b 9690 else if (!infile || found_newline) {
db444266 9691 char * const olds = s - SvCUR(herewas);
79072805 9692 d = s;
3280af22 9693 while (s < PL_bufend &&
5bd13da3 9694 (*s != '\n' || memNE(s,PL_tokenbuf,len)) ) {
79072805 9695 if (*s++ == '\n')
57843af0 9696 CopLINE_inc(PL_curcop);
79072805 9697 }
3280af22 9698 if (s >= PL_bufend) {
eb160463 9699 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
5097bf9b 9700 missingterm(PL_tokenbuf + 1);
79072805
LW
9701 }
9702 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9703#ifdef PERL_MAD
9704 if (PL_madskills) {
cd81e915
NC
9705 if (PL_thisstuff)
9706 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9707 else
cd81e915 9708 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9709 stuffstart = s - SvPVX(PL_linestr);
9710 }
9711#endif
79072805 9712 s += len - 1;
57843af0 9713 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9714
db444266
FC
9715 /* s now points to the newline after the heredoc terminator.
9716 d points to the newline before the body of the heredoc.
9717 */
a91428a4
FC
9718 /* See the Paranoia note in case LEX_INTERPEND in yylex, for why we
9719 check PL_sublex_info.re_eval_str. */
9720 if (PL_sublex_info.re_eval_start || PL_sublex_info.re_eval_str) {
db444266
FC
9721 /* Set aside the rest of the regexp */
9722 if (!PL_sublex_info.re_eval_str)
9723 PL_sublex_info.re_eval_str =
9724 newSVpvn(PL_sublex_info.re_eval_start,
9725 PL_bufend - PL_sublex_info.re_eval_start);
9726 PL_sublex_info.re_eval_start -= s-d;
9727 }
9728 /* Copy everything from s onwards back to d. */
9729 Move(s,d,PL_bufend-s + 1,char);
9730 SvCUR_set(PL_linestr, SvCUR(PL_linestr) - (s-d));
3280af22 9731 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
db444266 9732 s = olds;
79072805
LW
9733 }
9734 else
76f68e9b 9735 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
99bd9d90 9736 streaming:
5097bf9b
FC
9737 term = PL_tokenbuf[1];
9738 len--;
3280af22 9739 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9740#ifdef PERL_MAD
9741 if (PL_madskills) {
9742 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9743 if (PL_thisstuff)
9744 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9745 else
cd81e915 9746 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9747 }
9748#endif
f0e67a1d 9749 PL_bufptr = s;
17cc9359 9750 CopLINE_inc(PL_curcop);
112d1284
FC
9751 if (!lex_next_chunk(LEX_NO_TERM)
9752 && (!SvCUR(tmpstr) || SvEND(tmpstr)[-1] != '\n')) {
eb160463 9753 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
5097bf9b 9754 missingterm(PL_tokenbuf + 1);
79072805 9755 }
17cc9359 9756 CopLINE_dec(PL_curcop);
112d1284
FC
9757 if (!SvCUR(PL_linestr) || PL_bufend[-1] != '\n') {
9758 lex_grow_linestr(SvCUR(PL_linestr) + 2);
9759 sv_catpvs(PL_linestr, "\n\0");
9760 }
f0e67a1d 9761 s = PL_bufptr;
5db06880
NC
9762#ifdef PERL_MAD
9763 stuffstart = s - SvPVX(PL_linestr);
9764#endif
57843af0 9765 CopLINE_inc(PL_curcop);
3280af22 9766 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9767 PL_last_lop = PL_last_uni = NULL;
6a27c188 9768#ifndef PERL_STRICT_CR
3280af22 9769 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9770 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9771 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9772 {
3280af22
NIS
9773 PL_bufend[-2] = '\n';
9774 PL_bufend--;
95a20fc0 9775 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9776 }
3280af22
NIS
9777 else if (PL_bufend[-1] == '\r')
9778 PL_bufend[-1] = '\n';
f63a84b2 9779 }
3280af22
NIS
9780 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9781 PL_bufend[-1] = '\n';
f63a84b2 9782#endif
5097bf9b 9783 if (*s == term && memEQ(s,PL_tokenbuf + 1,len)) {
95a20fc0 9784 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9785 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9786 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9787 sv_catsv(PL_linestr,herewas);
9788 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9789 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9790 }
9791 else {
3280af22
NIS
9792 s = PL_bufend;
9793 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9794 }
9795 }
79072805 9796 s++;
0244c3a4 9797retval:
57843af0 9798 PL_multi_end = CopLINE(PL_curcop);
79072805 9799 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9800 SvPV_shrink_to_cur(tmpstr);
79072805 9801 }
8990e307 9802 SvREFCNT_dec(herewas);
2f31ce75 9803 if (!IN_BYTES) {
95a20fc0 9804 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9805 SvUTF8_on(tmpstr);
9806 else if (PL_encoding)
9807 sv_recode_to_utf8(tmpstr, PL_encoding);
9808 }
3280af22 9809 PL_lex_stuff = tmpstr;
6154021b 9810 pl_yylval.ival = op_type;
79072805
LW
9811 return s;
9812}
9813
02aa26ce
NT
9814/* scan_inputsymbol
9815 takes: current position in input buffer
9816 returns: new position in input buffer
6154021b 9817 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9818
9819 This code handles:
9820
9821 <> read from ARGV
9822 <FH> read from filehandle
9823 <pkg::FH> read from package qualified filehandle
9824 <pkg'FH> read from package qualified filehandle
9825 <$fh> read from filehandle in $fh
9826 <*.h> filename glob
9827
9828*/
9829
76e3520e 9830STATIC char *
cea2e8a9 9831S_scan_inputsymbol(pTHX_ char *start)
79072805 9832{
97aff369 9833 dVAR;
eb578fdb 9834 char *s = start; /* current position in buffer */
1b420867 9835 char *end;
79072805 9836 I32 len;
6136c704
AL
9837 char *d = PL_tokenbuf; /* start of temp holding space */
9838 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9839
7918f24d
NC
9840 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9841
1b420867
GS
9842 end = strchr(s, '\n');
9843 if (!end)
9844 end = PL_bufend;
9845 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9846
9847 /* die if we didn't have space for the contents of the <>,
1b420867 9848 or if it didn't end, or if we see a newline
02aa26ce
NT
9849 */
9850
bb7a0f54 9851 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9852 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9853 if (s >= end)
cea2e8a9 9854 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9855
fc36a67e 9856 s++;
02aa26ce
NT
9857
9858 /* check for <$fh>
9859 Remember, only scalar variables are interpreted as filehandles by
9860 this code. Anything more complex (e.g., <$fh{$num}>) will be
9861 treated as a glob() call.
9862 This code makes use of the fact that except for the $ at the front,
9863 a scalar variable and a filehandle look the same.
9864 */
4633a7c4 9865 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9866
9867 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9868 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 9869 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
9870
9871 /* If we've tried to read what we allow filehandles to look like, and
9872 there's still text left, then it must be a glob() and not a getline.
9873 Use scan_str to pull out the stuff between the <> and treat it
9874 as nothing more than a string.
9875 */
9876
3280af22 9877 if (d - PL_tokenbuf != len) {
6154021b 9878 pl_yylval.ival = OP_GLOB;
d24ca0c5 9879 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 9880 if (!s)
cea2e8a9 9881 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9882 return s;
9883 }
395c3793 9884 else {
9b3023bc 9885 bool readline_overriden = FALSE;
6136c704 9886 GV *gv_readline;
9b3023bc 9887 GV **gvp;
02aa26ce 9888 /* we're in a filehandle read situation */
3280af22 9889 d = PL_tokenbuf;
02aa26ce
NT
9890
9891 /* turn <> into <ARGV> */
79072805 9892 if (!len)
689badd5 9893 Copy("ARGV",d,5,char);
02aa26ce 9894
9b3023bc 9895 /* Check whether readline() is overriden */
fafc274c 9896 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9897 if ((gv_readline
ba979b31 9898 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9899 ||
017a3ce5 9900 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9901 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9902 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9903 readline_overriden = TRUE;
9904
02aa26ce
NT
9905 /* if <$fh>, create the ops to turn the variable into a
9906 filehandle
9907 */
79072805 9908 if (*d == '$') {
02aa26ce
NT
9909 /* try to find it in the pad for this block, otherwise find
9910 add symbol table ops
9911 */
bc9b26ca 9912 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9913 if (tmp != NOT_IN_PAD) {
00b1698f 9914 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9915 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9916 HEK * const stashname = HvNAME_HEK(stash);
9917 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9918 sv_catpvs(sym, "::");
f558d5af
JH
9919 sv_catpv(sym, d+1);
9920 d = SvPVX(sym);
9921 goto intro_sym;
9922 }
9923 else {
6136c704 9924 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9925 o->op_targ = tmp;
9b3023bc
RGS
9926 PL_lex_op = readline_overriden
9927 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9928 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9929 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9930 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9931 }
a0d0e21e
LW
9932 }
9933 else {
f558d5af
JH
9934 GV *gv;
9935 ++d;
9936intro_sym:
9937 gv = gv_fetchpv(d,
9938 (PL_in_eval
9939 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 9940 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 9941 SVt_PV);
9b3023bc
RGS
9942 PL_lex_op = readline_overriden
9943 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9944 op_append_elem(OP_LIST,
9b3023bc
RGS
9945 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9946 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9947 : (OP*)newUNOP(OP_READLINE, 0,
9948 newUNOP(OP_RV2SV, 0,
9949 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9950 }
7c6fadd6
RGS
9951 if (!readline_overriden)
9952 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9953 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9954 pl_yylval.ival = OP_NULL;
79072805 9955 }
02aa26ce
NT
9956
9957 /* If it's none of the above, it must be a literal filehandle
9958 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9959 else {
25db2ea6 9960 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
9961 PL_lex_op = readline_overriden
9962 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9963 op_append_elem(OP_LIST,
9b3023bc
RGS
9964 newGVOP(OP_GV, 0, gv),
9965 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9966 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9967 pl_yylval.ival = OP_NULL;
79072805
LW
9968 }
9969 }
02aa26ce 9970
79072805
LW
9971 return s;
9972}
9973
02aa26ce
NT
9974
9975/* scan_str
9976 takes: start position in buffer
09bef843
SB
9977 keep_quoted preserve \ on the embedded delimiter(s)
9978 keep_delims preserve the delimiters around the string
d24ca0c5
DM
9979 re_reparse compiling a run-time /(?{})/:
9980 collapse // to /, and skip encoding src
02aa26ce
NT
9981 returns: position to continue reading from buffer
9982 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9983 updates the read buffer.
9984
9985 This subroutine pulls a string out of the input. It is called for:
9986 q single quotes q(literal text)
9987 ' single quotes 'literal text'
9988 qq double quotes qq(interpolate $here please)
9989 " double quotes "interpolate $here please"
9990 qx backticks qx(/bin/ls -l)
9991 ` backticks `/bin/ls -l`
9992 qw quote words @EXPORT_OK = qw( func() $spam )
9993 m// regexp match m/this/
9994 s/// regexp substitute s/this/that/
9995 tr/// string transliterate tr/this/that/
9996 y/// string transliterate y/this/that/
9997 ($*@) sub prototypes sub foo ($)
09bef843 9998 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9999 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
10000
10001 In most of these cases (all but <>, patterns and transliterate)
10002 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
10003 calls scan_str(). s/// makes yylex() call scan_subst() which calls
10004 scan_str(). tr/// and y/// make yylex() call scan_trans() which
10005 calls scan_str().
4e553d73 10006
02aa26ce
NT
10007 It skips whitespace before the string starts, and treats the first
10008 character as the delimiter. If the delimiter is one of ([{< then
10009 the corresponding "close" character )]}> is used as the closing
10010 delimiter. It allows quoting of delimiters, and if the string has
10011 balanced delimiters ([{<>}]) it allows nesting.
10012
37fd879b
HS
10013 On success, the SV with the resulting string is put into lex_stuff or,
10014 if that is already non-NULL, into lex_repl. The second case occurs only
10015 when parsing the RHS of the special constructs s/// and tr/// (y///).
10016 For convenience, the terminating delimiter character is stuffed into
10017 SvIVX of the SV.
02aa26ce
NT
10018*/
10019
76e3520e 10020STATIC char *
d24ca0c5 10021S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
79072805 10022{
97aff369 10023 dVAR;
02aa26ce 10024 SV *sv; /* scalar value: string */
d3fcec1f 10025 const char *tmps; /* temp string, used for delimiter matching */
eb578fdb
KW
10026 char *s = start; /* current position in the buffer */
10027 char term; /* terminating character */
10028 char *to; /* current position in the sv's data */
02aa26ce 10029 I32 brackets = 1; /* bracket nesting level */
89491803 10030 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 10031 I32 termcode; /* terminating char. code */
89ebb4a3 10032 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 10033 STRLEN termlen; /* length of terminating string */
0331ef07 10034 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
10035#ifdef PERL_MAD
10036 int stuffstart;
10037 char *tstart;
10038#endif
02aa26ce 10039
7918f24d
NC
10040 PERL_ARGS_ASSERT_SCAN_STR;
10041
02aa26ce 10042 /* skip space before the delimiter */
29595ff2
NC
10043 if (isSPACE(*s)) {
10044 s = PEEKSPACE(s);
10045 }
02aa26ce 10046
5db06880 10047#ifdef PERL_MAD
cd81e915
NC
10048 if (PL_realtokenstart >= 0) {
10049 stuffstart = PL_realtokenstart;
10050 PL_realtokenstart = -1;
5db06880
NC
10051 }
10052 else
10053 stuffstart = start - SvPVX(PL_linestr);
10054#endif
02aa26ce 10055 /* mark where we are, in case we need to report errors */
79072805 10056 CLINE;
02aa26ce
NT
10057
10058 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 10059 term = *s;
220e2d4e
IH
10060 if (!UTF) {
10061 termcode = termstr[0] = term;
10062 termlen = 1;
10063 }
10064 else {
4b88fb76 10065 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
220e2d4e
IH
10066 Copy(s, termstr, termlen, U8);
10067 if (!UTF8_IS_INVARIANT(term))
10068 has_utf8 = TRUE;
10069 }
b1c7b182 10070
02aa26ce 10071 /* mark where we are */
57843af0 10072 PL_multi_start = CopLINE(PL_curcop);
3280af22 10073 PL_multi_open = term;
02aa26ce
NT
10074
10075 /* find corresponding closing delimiter */
93a17b20 10076 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
10077 termcode = termstr[0] = term = tmps[5];
10078
3280af22 10079 PL_multi_close = term;
79072805 10080
561b68a9
SH
10081 /* create a new SV to hold the contents. 79 is the SV's initial length.
10082 What a random number. */
7d0a29fe
NC
10083 sv = newSV_type(SVt_PVIV);
10084 SvGROW(sv, 80);
45977657 10085 SvIV_set(sv, termcode);
a0d0e21e 10086 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
10087
10088 /* move past delimiter and try to read a complete string */
09bef843 10089 if (keep_delims)
220e2d4e
IH
10090 sv_catpvn(sv, s, termlen);
10091 s += termlen;
5db06880
NC
10092#ifdef PERL_MAD
10093 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10094 if (!PL_thisopen && !keep_delims) {
10095 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
10096 stuffstart = s - SvPVX(PL_linestr);
10097 }
10098#endif
93a17b20 10099 for (;;) {
d24ca0c5 10100 if (PL_encoding && !UTF && !re_reparse) {
220e2d4e
IH
10101 bool cont = TRUE;
10102
10103 while (cont) {
95a20fc0 10104 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 10105 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 10106 &offset, (char*)termstr, termlen);
6136c704
AL
10107 const char * const ns = SvPVX_const(PL_linestr) + offset;
10108 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
10109
10110 for (; s < ns; s++) {
60d63348 10111 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
220e2d4e
IH
10112 CopLINE_inc(PL_curcop);
10113 }
10114 if (!found)
10115 goto read_more_line;
10116 else {
10117 /* handle quoted delimiters */
52327caf 10118 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 10119 const char *t;
95a20fc0 10120 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
10121 t--;
10122 if ((svlast-1 - t) % 2) {
10123 if (!keep_quoted) {
10124 *(svlast-1) = term;
10125 *svlast = '\0';
10126 SvCUR_set(sv, SvCUR(sv) - 1);
10127 }
10128 continue;
10129 }
10130 }
10131 if (PL_multi_open == PL_multi_close) {
10132 cont = FALSE;
10133 }
10134 else {
f54cb97a
AL
10135 const char *t;
10136 char *w;
0331ef07 10137 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
10138 /* At here, all closes are "was quoted" one,
10139 so we don't check PL_multi_close. */
10140 if (*t == '\\') {
10141 if (!keep_quoted && *(t+1) == PL_multi_open)
10142 t++;
10143 else
10144 *w++ = *t++;
10145 }
10146 else if (*t == PL_multi_open)
10147 brackets++;
10148
10149 *w = *t;
10150 }
10151 if (w < t) {
10152 *w++ = term;
10153 *w = '\0';
95a20fc0 10154 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 10155 }
0331ef07 10156 last_off = w - SvPVX(sv);
220e2d4e
IH
10157 if (--brackets <= 0)
10158 cont = FALSE;
10159 }
10160 }
10161 }
10162 if (!keep_delims) {
10163 SvCUR_set(sv, SvCUR(sv) - 1);
10164 *SvEND(sv) = '\0';
10165 }
10166 break;
10167 }
10168
02aa26ce 10169 /* extend sv if need be */
3280af22 10170 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 10171 /* set 'to' to the next character in the sv's string */
463ee0b2 10172 to = SvPVX(sv)+SvCUR(sv);
09bef843 10173
02aa26ce 10174 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
10175 if (PL_multi_open == PL_multi_close) {
10176 for (; s < PL_bufend; s++,to++) {
02aa26ce 10177 /* embedded newlines increment the current line number */
60d63348 10178 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 10179 CopLINE_inc(PL_curcop);
02aa26ce 10180 /* handle quoted delimiters */
3280af22 10181 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
d24ca0c5
DM
10182 if (!keep_quoted
10183 && (s[1] == term
10184 || (re_reparse && s[1] == '\\'))
10185 )
a0d0e21e 10186 s++;
d24ca0c5 10187 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10188 else
10189 *to++ = *s++;
10190 }
02aa26ce
NT
10191 /* terminate when run out of buffer (the for() condition), or
10192 have found the terminator */
220e2d4e
IH
10193 else if (*s == term) {
10194 if (termlen == 1)
10195 break;
f3b9ce0f 10196 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10197 break;
10198 }
63cd0674 10199 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10200 has_utf8 = TRUE;
93a17b20
LW
10201 *to = *s;
10202 }
10203 }
02aa26ce
NT
10204
10205 /* if the terminator isn't the same as the start character (e.g.,
10206 matched brackets), we have to allow more in the quoting, and
10207 be prepared for nested brackets.
10208 */
93a17b20 10209 else {
02aa26ce 10210 /* read until we run out of string, or we find the terminator */
3280af22 10211 for (; s < PL_bufend; s++,to++) {
02aa26ce 10212 /* embedded newlines increment the line count */
60d63348 10213 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 10214 CopLINE_inc(PL_curcop);
02aa26ce 10215 /* backslashes can escape the open or closing characters */
3280af22 10216 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10217 if (!keep_quoted &&
10218 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
10219 s++;
10220 else
10221 *to++ = *s++;
10222 }
02aa26ce 10223 /* allow nested opens and closes */
3280af22 10224 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10225 break;
3280af22 10226 else if (*s == PL_multi_open)
93a17b20 10227 brackets++;
63cd0674 10228 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10229 has_utf8 = TRUE;
93a17b20
LW
10230 *to = *s;
10231 }
10232 }
02aa26ce 10233 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10234 *to = '\0';
95a20fc0 10235 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10236
02aa26ce
NT
10237 /*
10238 * this next chunk reads more into the buffer if we're not done yet
10239 */
10240
b1c7b182
GS
10241 if (s < PL_bufend)
10242 break; /* handle case where we are done yet :-) */
79072805 10243
6a27c188 10244#ifndef PERL_STRICT_CR
95a20fc0 10245 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10246 if ((to[-2] == '\r' && to[-1] == '\n') ||
10247 (to[-2] == '\n' && to[-1] == '\r'))
10248 {
f63a84b2
LW
10249 to[-2] = '\n';
10250 to--;
95a20fc0 10251 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10252 }
10253 else if (to[-1] == '\r')
10254 to[-1] = '\n';
10255 }
95a20fc0 10256 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10257 to[-1] = '\n';
10258#endif
10259
220e2d4e 10260 read_more_line:
02aa26ce
NT
10261 /* if we're out of file, or a read fails, bail and reset the current
10262 line marker so we can report where the unterminated string began
10263 */
5db06880
NC
10264#ifdef PERL_MAD
10265 if (PL_madskills) {
c35e046a 10266 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10267 if (PL_thisstuff)
10268 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10269 else
cd81e915 10270 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10271 }
10272#endif
f0e67a1d
Z
10273 CopLINE_inc(PL_curcop);
10274 PL_bufptr = PL_bufend;
10275 if (!lex_next_chunk(0)) {
c07a80fd 10276 sv_free(sv);
eb160463 10277 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10278 return NULL;
79072805 10279 }
f0e67a1d 10280 s = PL_bufptr;
5db06880
NC
10281#ifdef PERL_MAD
10282 stuffstart = 0;
10283#endif
378cc40b 10284 }
4e553d73 10285
02aa26ce
NT
10286 /* at this point, we have successfully read the delimited string */
10287
d24ca0c5 10288 if (!PL_encoding || UTF || re_reparse) {
5db06880
NC
10289#ifdef PERL_MAD
10290 if (PL_madskills) {
c35e046a 10291 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10292 const int len = s - tstart;
cd81e915 10293 if (PL_thisstuff)
c35e046a 10294 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10295 else
c35e046a 10296 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10297 if (!PL_thisclose && !keep_delims)
10298 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10299 }
10300#endif
10301
220e2d4e
IH
10302 if (keep_delims)
10303 sv_catpvn(sv, s, termlen);
10304 s += termlen;
10305 }
5db06880
NC
10306#ifdef PERL_MAD
10307 else {
10308 if (PL_madskills) {
c35e046a
AL
10309 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10310 const int len = s - tstart - termlen;
cd81e915 10311 if (PL_thisstuff)
c35e046a 10312 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10313 else
c35e046a 10314 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10315 if (!PL_thisclose && !keep_delims)
10316 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10317 }
10318 }
10319#endif
d24ca0c5 10320 if (has_utf8 || (PL_encoding && !re_reparse))
b1c7b182 10321 SvUTF8_on(sv);
d0063567 10322
57843af0 10323 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10324
10325 /* if we allocated too much space, give some back */
93a17b20
LW
10326 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10327 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10328 SvPV_renew(sv, SvLEN(sv));
79072805 10329 }
02aa26ce
NT
10330
10331 /* decide whether this is the first or second quoted string we've read
10332 for this op
10333 */
4e553d73 10334
3280af22 10335 if (PL_lex_stuff)
7cc34111 10336 PL_sublex_info.repl = sv;
79072805 10337 else
3280af22 10338 PL_lex_stuff = sv;
378cc40b
LW
10339 return s;
10340}
10341
02aa26ce
NT
10342/*
10343 scan_num
10344 takes: pointer to position in buffer
10345 returns: pointer to new position in buffer
6154021b 10346 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10347
10348 Read a number in any of the formats that Perl accepts:
10349
7fd134d9
JH
10350 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10351 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10352 0b[01](_?[01])*
10353 0[0-7](_?[0-7])*
10354 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10355
3280af22 10356 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10357 thing it reads.
10358
10359 If it reads a number without a decimal point or an exponent, it will
10360 try converting the number to an integer and see if it can do so
10361 without loss of precision.
10362*/
4e553d73 10363
378cc40b 10364char *
bfed75c6 10365Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10366{
97aff369 10367 dVAR;
eb578fdb
KW
10368 const char *s = start; /* current position in buffer */
10369 char *d; /* destination in temp buffer */
10370 char *e; /* end of temp buffer */
86554af2 10371 NV nv; /* number read, as a double */
a0714e2c 10372 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10373 bool floatit; /* boolean: int or float? */
cbbf8932 10374 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10375 static char const number_too_long[] = "Number too long";
378cc40b 10376
7918f24d
NC
10377 PERL_ARGS_ASSERT_SCAN_NUM;
10378
02aa26ce
NT
10379 /* We use the first character to decide what type of number this is */
10380
378cc40b 10381 switch (*s) {
79072805 10382 default:
5637ef5b 10383 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10384
02aa26ce 10385 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10386 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10387 case '0':
10388 {
02aa26ce
NT
10389 /* variables:
10390 u holds the "number so far"
4f19785b
WSI
10391 shift the power of 2 of the base
10392 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10393 overflowed was the number more than we can hold?
10394
10395 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10396 we in octal/hex/binary?" indicator to disallow hex characters
10397 when in octal mode.
02aa26ce 10398 */
9e24b6e2
JH
10399 NV n = 0.0;
10400 UV u = 0;
79072805 10401 I32 shift;
9e24b6e2 10402 bool overflowed = FALSE;
61f33854 10403 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10404 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10405 static const char* const bases[5] =
10406 { "", "binary", "", "octal", "hexadecimal" };
10407 static const char* const Bases[5] =
10408 { "", "Binary", "", "Octal", "Hexadecimal" };
10409 static const char* const maxima[5] =
10410 { "",
10411 "0b11111111111111111111111111111111",
10412 "",
10413 "037777777777",
10414 "0xffffffff" };
bfed75c6 10415 const char *base, *Base, *max;
378cc40b 10416
02aa26ce 10417 /* check for hex */
a674e8db 10418 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10419 shift = 4;
10420 s += 2;
61f33854 10421 just_zero = FALSE;
a674e8db 10422 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10423 shift = 1;
10424 s += 2;
61f33854 10425 just_zero = FALSE;
378cc40b 10426 }
02aa26ce 10427 /* check for a decimal in disguise */
b78218b7 10428 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10429 goto decimal;
02aa26ce 10430 /* so it must be octal */
928753ea 10431 else {
378cc40b 10432 shift = 3;
928753ea
JH
10433 s++;
10434 }
10435
10436 if (*s == '_') {
a2a5de95 10437 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10438 "Misplaced _ in number");
10439 lastub = s++;
10440 }
9e24b6e2
JH
10441
10442 base = bases[shift];
10443 Base = Bases[shift];
10444 max = maxima[shift];
02aa26ce 10445
4f19785b 10446 /* read the rest of the number */
378cc40b 10447 for (;;) {
9e24b6e2 10448 /* x is used in the overflow test,
893fe2c2 10449 b is the digit we're adding on. */
9e24b6e2 10450 UV x, b;
55497cff 10451
378cc40b 10452 switch (*s) {
02aa26ce
NT
10453
10454 /* if we don't mention it, we're done */
378cc40b
LW
10455 default:
10456 goto out;
02aa26ce 10457
928753ea 10458 /* _ are ignored -- but warned about if consecutive */
de3bb511 10459 case '_':
a2a5de95
NC
10460 if (lastub && s == lastub + 1)
10461 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10462 "Misplaced _ in number");
928753ea 10463 lastub = s++;
de3bb511 10464 break;
02aa26ce
NT
10465
10466 /* 8 and 9 are not octal */
378cc40b 10467 case '8': case '9':
4f19785b 10468 if (shift == 3)
cea2e8a9 10469 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10470 /* FALL THROUGH */
02aa26ce
NT
10471
10472 /* octal digits */
4f19785b 10473 case '2': case '3': case '4':
378cc40b 10474 case '5': case '6': case '7':
4f19785b 10475 if (shift == 1)
cea2e8a9 10476 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10477 /* FALL THROUGH */
10478
10479 case '0': case '1':
02aa26ce 10480 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10481 goto digit;
02aa26ce
NT
10482
10483 /* hex digits */
378cc40b
LW
10484 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10485 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10486 /* make sure they said 0x */
378cc40b
LW
10487 if (shift != 4)
10488 goto out;
55497cff 10489 b = (*s++ & 7) + 9;
02aa26ce
NT
10490
10491 /* Prepare to put the digit we have onto the end
10492 of the number so far. We check for overflows.
10493 */
10494
55497cff 10495 digit:
61f33854 10496 just_zero = FALSE;
9e24b6e2
JH
10497 if (!overflowed) {
10498 x = u << shift; /* make room for the digit */
10499
10500 if ((x >> shift) != u
10501 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10502 overflowed = TRUE;
10503 n = (NV) u;
9b387841
NC
10504 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10505 "Integer overflow in %s number",
10506 base);
9e24b6e2
JH
10507 } else
10508 u = x | b; /* add the digit to the end */
10509 }
10510 if (overflowed) {
10511 n *= nvshift[shift];
10512 /* If an NV has not enough bits in its
10513 * mantissa to represent an UV this summing of
10514 * small low-order numbers is a waste of time
10515 * (because the NV cannot preserve the
10516 * low-order bits anyway): we could just
10517 * remember when did we overflow and in the
10518 * end just multiply n by the right
10519 * amount. */
10520 n += (NV) b;
55497cff 10521 }
378cc40b
LW
10522 break;
10523 }
10524 }
02aa26ce
NT
10525
10526 /* if we get here, we had success: make a scalar value from
10527 the number.
10528 */
378cc40b 10529 out:
928753ea
JH
10530
10531 /* final misplaced underbar check */
10532 if (s[-1] == '_') {
a2a5de95 10533 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10534 }
10535
9e24b6e2 10536 if (overflowed) {
a2a5de95
NC
10537 if (n > 4294967295.0)
10538 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10539 "%s number > %s non-portable",
10540 Base, max);
b081dd7e 10541 sv = newSVnv(n);
9e24b6e2
JH
10542 }
10543 else {
15041a67 10544#if UVSIZE > 4
a2a5de95
NC
10545 if (u > 0xffffffff)
10546 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10547 "%s number > %s non-portable",
10548 Base, max);
2cc4c2dc 10549#endif
b081dd7e 10550 sv = newSVuv(u);
9e24b6e2 10551 }
61f33854 10552 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10553 sv = new_constant(start, s - start, "integer",
eb0d8d16 10554 sv, NULL, NULL, 0);
61f33854 10555 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10556 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10557 }
10558 break;
02aa26ce
NT
10559
10560 /*
10561 handle decimal numbers.
10562 we're also sent here when we read a 0 as the first digit
10563 */
378cc40b
LW
10564 case '1': case '2': case '3': case '4': case '5':
10565 case '6': case '7': case '8': case '9': case '.':
10566 decimal:
3280af22
NIS
10567 d = PL_tokenbuf;
10568 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10569 floatit = FALSE;
02aa26ce
NT
10570
10571 /* read next group of digits and _ and copy into d */
de3bb511 10572 while (isDIGIT(*s) || *s == '_') {
4e553d73 10573 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10574 if -w is on
10575 */
93a17b20 10576 if (*s == '_') {
a2a5de95
NC
10577 if (lastub && s == lastub + 1)
10578 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10579 "Misplaced _ in number");
928753ea 10580 lastub = s++;
93a17b20 10581 }
fc36a67e 10582 else {
02aa26ce 10583 /* check for end of fixed-length buffer */
fc36a67e 10584 if (d >= e)
cea2e8a9 10585 Perl_croak(aTHX_ number_too_long);
02aa26ce 10586 /* if we're ok, copy the character */
378cc40b 10587 *d++ = *s++;
fc36a67e 10588 }
378cc40b 10589 }
02aa26ce
NT
10590
10591 /* final misplaced underbar check */
928753ea 10592 if (lastub && s == lastub + 1) {
a2a5de95 10593 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10594 }
02aa26ce
NT
10595
10596 /* read a decimal portion if there is one. avoid
10597 3..5 being interpreted as the number 3. followed
10598 by .5
10599 */
2f3197b3 10600 if (*s == '.' && s[1] != '.') {
79072805 10601 floatit = TRUE;
378cc40b 10602 *d++ = *s++;
02aa26ce 10603
928753ea 10604 if (*s == '_') {
a2a5de95
NC
10605 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10606 "Misplaced _ in number");
928753ea
JH
10607 lastub = s;
10608 }
10609
10610 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10611 */
fc36a67e 10612 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10613 /* fixed length buffer check */
fc36a67e 10614 if (d >= e)
cea2e8a9 10615 Perl_croak(aTHX_ number_too_long);
928753ea 10616 if (*s == '_') {
a2a5de95
NC
10617 if (lastub && s == lastub + 1)
10618 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10619 "Misplaced _ in number");
928753ea
JH
10620 lastub = s;
10621 }
10622 else
fc36a67e 10623 *d++ = *s;
378cc40b 10624 }
928753ea
JH
10625 /* fractional part ending in underbar? */
10626 if (s[-1] == '_') {
a2a5de95
NC
10627 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10628 "Misplaced _ in number");
928753ea 10629 }
dd629d5b
GS
10630 if (*s == '.' && isDIGIT(s[1])) {
10631 /* oops, it's really a v-string, but without the "v" */
f4758303 10632 s = start;
dd629d5b
GS
10633 goto vstring;
10634 }
378cc40b 10635 }
02aa26ce
NT
10636
10637 /* read exponent part, if present */
3792a11b 10638 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10639 floatit = TRUE;
10640 s++;
02aa26ce
NT
10641
10642 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10643 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10644
7fd134d9
JH
10645 /* stray preinitial _ */
10646 if (*s == '_') {
a2a5de95
NC
10647 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10648 "Misplaced _ in number");
7fd134d9
JH
10649 lastub = s++;
10650 }
10651
02aa26ce 10652 /* allow positive or negative exponent */
378cc40b
LW
10653 if (*s == '+' || *s == '-')
10654 *d++ = *s++;
02aa26ce 10655
7fd134d9
JH
10656 /* stray initial _ */
10657 if (*s == '_') {
a2a5de95
NC
10658 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10659 "Misplaced _ in number");
7fd134d9
JH
10660 lastub = s++;
10661 }
10662
7fd134d9
JH
10663 /* read digits of exponent */
10664 while (isDIGIT(*s) || *s == '_') {
10665 if (isDIGIT(*s)) {
10666 if (d >= e)
10667 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10668 *d++ = *s++;
7fd134d9
JH
10669 }
10670 else {
041457d9 10671 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10672 (!isDIGIT(s[1]) && s[1] != '_')))
10673 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10674 "Misplaced _ in number");
b3b48e3e 10675 lastub = s++;
7fd134d9 10676 }
7fd134d9 10677 }
378cc40b 10678 }
02aa26ce 10679
02aa26ce 10680
0b7fceb9 10681 /*
58bb9ec3
NC
10682 We try to do an integer conversion first if no characters
10683 indicating "float" have been found.
0b7fceb9
MU
10684 */
10685
10686 if (!floatit) {
58bb9ec3 10687 UV uv;
6136c704 10688 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10689
10690 if (flags == IS_NUMBER_IN_UV) {
10691 if (uv <= IV_MAX)
b081dd7e 10692 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10693 else
b081dd7e 10694 sv = newSVuv(uv);
58bb9ec3
NC
10695 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10696 if (uv <= (UV) IV_MIN)
b081dd7e 10697 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10698 else
10699 floatit = TRUE;
10700 } else
10701 floatit = TRUE;
10702 }
0b7fceb9 10703 if (floatit) {
58bb9ec3
NC
10704 /* terminate the string */
10705 *d = '\0';
86554af2 10706 nv = Atof(PL_tokenbuf);
b081dd7e 10707 sv = newSVnv(nv);
86554af2 10708 }
86554af2 10709
eb0d8d16
NC
10710 if ( floatit
10711 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10712 const char *const key = floatit ? "float" : "integer";
10713 const STRLEN keylen = floatit ? 5 : 7;
10714 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10715 key, keylen, sv, NULL, NULL, 0);
10716 }
378cc40b 10717 break;
0b7fceb9 10718
e312add1 10719 /* if it starts with a v, it could be a v-string */
a7cb1f99 10720 case 'v':
dd629d5b 10721vstring:
561b68a9 10722 sv = newSV(5); /* preallocate storage space */
65b06e02 10723 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10724 break;
79072805 10725 }
a687059c 10726
02aa26ce
NT
10727 /* make the op for the constant and return */
10728
a86a20aa 10729 if (sv)
b73d6f50 10730 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10731 else
5f66b61c 10732 lvalp->opval = NULL;
a687059c 10733
73d840c0 10734 return (char *)s;
378cc40b
LW
10735}
10736
76e3520e 10737STATIC char *
cea2e8a9 10738S_scan_formline(pTHX_ register char *s)
378cc40b 10739{
97aff369 10740 dVAR;
eb578fdb
KW
10741 char *eol;
10742 char *t;
6136c704 10743 SV * const stuff = newSVpvs("");
79072805 10744 bool needargs = FALSE;
c5ee2135 10745 bool eofmt = FALSE;
5db06880
NC
10746#ifdef PERL_MAD
10747 char *tokenstart = s;
4f61fd4b
JC
10748 SV* savewhite = NULL;
10749
5db06880 10750 if (PL_madskills) {
cd81e915
NC
10751 savewhite = PL_thiswhite;
10752 PL_thiswhite = 0;
5db06880
NC
10753 }
10754#endif
378cc40b 10755
7918f24d
NC
10756 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10757
79072805 10758 while (!needargs) {
a1b95068 10759 if (*s == '.') {
c35e046a 10760 t = s+1;
51882d45 10761#ifdef PERL_STRICT_CR
c35e046a
AL
10762 while (SPACE_OR_TAB(*t))
10763 t++;
51882d45 10764#else
c35e046a
AL
10765 while (SPACE_OR_TAB(*t) || *t == '\r')
10766 t++;
51882d45 10767#endif
c5ee2135
WL
10768 if (*t == '\n' || t == PL_bufend) {
10769 eofmt = TRUE;
79072805 10770 break;
c5ee2135 10771 }
79072805 10772 }
583c9d5c
FC
10773 eol = (char *) memchr(s,'\n',PL_bufend-s);
10774 if (!eol++)
3280af22 10775 eol = PL_bufend;
79072805 10776 if (*s != '#') {
a0d0e21e
LW
10777 for (t = s; t < eol; t++) {
10778 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10779 needargs = FALSE;
10780 goto enough; /* ~~ must be first line in formline */
378cc40b 10781 }
a0d0e21e
LW
10782 if (*t == '@' || *t == '^')
10783 needargs = TRUE;
378cc40b 10784 }
7121b347
MG
10785 if (eol > s) {
10786 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10787#ifndef PERL_STRICT_CR
7121b347
MG
10788 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10789 char *end = SvPVX(stuff) + SvCUR(stuff);
10790 end[-2] = '\n';
10791 end[-1] = '\0';
b162af07 10792 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10793 }
2dc4c65b 10794#endif
7121b347
MG
10795 }
10796 else
10797 break;
79072805 10798 }
95a20fc0 10799 s = (char*)eol;
583c9d5c
FC
10800 if ((PL_rsfp || PL_parser->filtered)
10801 && PL_parser->form_lex_state == LEX_NORMAL) {
f0e67a1d 10802 bool got_some;
5db06880
NC
10803#ifdef PERL_MAD
10804 if (PL_madskills) {
cd81e915
NC
10805 if (PL_thistoken)
10806 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10807 else
cd81e915 10808 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10809 }
10810#endif
f0e67a1d
Z
10811 PL_bufptr = PL_bufend;
10812 CopLINE_inc(PL_curcop);
10813 got_some = lex_next_chunk(0);
10814 CopLINE_dec(PL_curcop);
10815 s = PL_bufptr;
5db06880 10816#ifdef PERL_MAD
f0e67a1d 10817 tokenstart = PL_bufptr;
5db06880 10818#endif
f0e67a1d 10819 if (!got_some)
378cc40b 10820 break;
378cc40b 10821 }
463ee0b2 10822 incline(s);
79072805 10823 }
a0d0e21e 10824 enough:
5c9ae74d
FC
10825 if (!SvCUR(stuff) || needargs)
10826 PL_lex_state = PL_parser->form_lex_state;
a0d0e21e 10827 if (SvCUR(stuff)) {
705fe0e5 10828 PL_expect = XSTATE;
79072805 10829 if (needargs) {
cd81e915 10830 start_force(PL_curforce);
9ded7720 10831 NEXTVAL_NEXTTOKE.ival = 0;
705fe0e5 10832 force_next(FORMLBRACK);
79072805 10833 }
1bd51a4c 10834 if (!IN_BYTES) {
95a20fc0 10835 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10836 SvUTF8_on(stuff);
10837 else if (PL_encoding)
10838 sv_recode_to_utf8(stuff, PL_encoding);
10839 }
cd81e915 10840 start_force(PL_curforce);
9ded7720 10841 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10842 force_next(THING);
378cc40b 10843 }
79072805 10844 else {
8990e307 10845 SvREFCNT_dec(stuff);
c5ee2135
WL
10846 if (eofmt)
10847 PL_lex_formbrack = 0;
79072805 10848 }
5db06880
NC
10849#ifdef PERL_MAD
10850 if (PL_madskills) {
cd81e915
NC
10851 if (PL_thistoken)
10852 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10853 else
cd81e915
NC
10854 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10855 PL_thiswhite = savewhite;
5db06880
NC
10856 }
10857#endif
79072805 10858 return s;
378cc40b 10859}
a687059c 10860
ba6d6ac9 10861I32
864dbfa3 10862Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10863{
97aff369 10864 dVAR;
a3b680e6 10865 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10866 CV* const outsidecv = PL_compcv;
8990e307 10867
7766f137 10868 SAVEI32(PL_subline);
3280af22 10869 save_item(PL_subname);
3280af22 10870 SAVESPTR(PL_compcv);
3280af22 10871
ea726b52 10872 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10873 CvFLAGS(PL_compcv) |= flags;
10874
57843af0 10875 PL_subline = CopLINE(PL_curcop);
dd2155a4 10876 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10877 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10878 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
db4cf31d
FC
10879 if (outsidecv && CvPADLIST(outsidecv))
10880 CvPADLIST(PL_compcv)->xpadl_outid = CvPADLIST(outsidecv)->xpadl_id;
748a9306 10881
8990e307
LW
10882 return oldsavestack_ix;
10883}
10884
084592ab
CN
10885#ifdef __SC__
10886#pragma segment Perl_yylex
10887#endif
af41e527 10888static int
19c62481 10889S_yywarn(pTHX_ const char *const s, U32 flags)
8990e307 10890{
97aff369 10891 dVAR;
7918f24d
NC
10892
10893 PERL_ARGS_ASSERT_YYWARN;
10894
faef0170 10895 PL_in_eval |= EVAL_WARNONLY;
19c62481 10896 yyerror_pv(s, flags);
faef0170 10897 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10898 return 0;
8990e307
LW
10899}
10900
10901int
15f169a1 10902Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10903{
19c62481
BF
10904 PERL_ARGS_ASSERT_YYERROR;
10905 return yyerror_pvn(s, strlen(s), 0);
10906}
10907
10908int
10909Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10910{
10911 PERL_ARGS_ASSERT_YYERROR_PV;
10912 return yyerror_pvn(s, strlen(s), flags);
10913}
10914
10915int
19c62481
BF
10916Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10917{
97aff369 10918 dVAR;
bfed75c6 10919 const char *context = NULL;
68dc0745 10920 int contlen = -1;
46fc3d4c 10921 SV *msg;
19c62481 10922 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
5912531f 10923 int yychar = PL_parser->yychar;
19c62481 10924 U32 is_utf8 = flags & SVf_UTF8;
463ee0b2 10925
19c62481 10926 PERL_ARGS_ASSERT_YYERROR_PVN;
7918f24d 10927
3280af22 10928 if (!yychar || (yychar == ';' && !PL_rsfp))
19c62481 10929 sv_catpvs(where_sv, "at EOF");
8bcfe651
TM
10930 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10931 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10932 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10933 /*
10934 Only for NetWare:
10935 The code below is removed for NetWare because it abends/crashes on NetWare
10936 when the script has error such as not having the closing quotes like:
10937 if ($var eq "value)
10938 Checking of white spaces is anyway done in NetWare code.
10939 */
10940#ifndef NETWARE
3280af22
NIS
10941 while (isSPACE(*PL_oldoldbufptr))
10942 PL_oldoldbufptr++;
f355267c 10943#endif
3280af22
NIS
10944 context = PL_oldoldbufptr;
10945 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10946 }
8bcfe651
TM
10947 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10948 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10949 /*
10950 Only for NetWare:
10951 The code below is removed for NetWare because it abends/crashes on NetWare
10952 when the script has error such as not having the closing quotes like:
10953 if ($var eq "value)
10954 Checking of white spaces is anyway done in NetWare code.
10955 */
10956#ifndef NETWARE
3280af22
NIS
10957 while (isSPACE(*PL_oldbufptr))
10958 PL_oldbufptr++;
f355267c 10959#endif
3280af22
NIS
10960 context = PL_oldbufptr;
10961 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10962 }
10963 else if (yychar > 255)
19c62481 10964 sv_catpvs(where_sv, "next token ???");
12fbd33b 10965 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10966 if (PL_lex_state == LEX_NORMAL ||
10967 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
19c62481 10968 sv_catpvs(where_sv, "at end of line");
3280af22 10969 else if (PL_lex_inpat)
19c62481 10970 sv_catpvs(where_sv, "within pattern");
463ee0b2 10971 else
19c62481 10972 sv_catpvs(where_sv, "within string");
463ee0b2 10973 }
46fc3d4c 10974 else {
19c62481 10975 sv_catpvs(where_sv, "next char ");
46fc3d4c 10976 if (yychar < 32)
cea2e8a9 10977 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10978 else if (isPRINT_LC(yychar)) {
88c9ea1e 10979 const char string = yychar;
5e7aa789
NC
10980 sv_catpvn(where_sv, &string, 1);
10981 }
463ee0b2 10982 else
cea2e8a9 10983 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
463ee0b2 10984 }
19c62481 10985 msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
ed094faf 10986 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10987 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10988 if (context)
19c62481
BF
10989 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
10990 SVfARG(newSVpvn_flags(context, contlen,
10991 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
463ee0b2 10992 else
19c62481 10993 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
57843af0 10994 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10995 Perl_sv_catpvf(aTHX_ msg,
57def98f 10996 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10997 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10998 PL_multi_end = 0;
a0d0e21e 10999 }
500960a6 11000 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 11001 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 11002 }
463ee0b2 11003 else
5a844595 11004 qerror(msg);
c7d6bfb2
GS
11005 if (PL_error_count >= 10) {
11006 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 11007 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 11008 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
11009 else
11010 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 11011 OutCopFILE(PL_curcop));
c7d6bfb2 11012 }
3280af22 11013 PL_in_my = 0;
5c284bb0 11014 PL_in_my_stash = NULL;
463ee0b2
LW
11015 return 0;
11016}
084592ab
CN
11017#ifdef __SC__
11018#pragma segment Main
11019#endif
4e35701f 11020
b250498f 11021STATIC char*
3ae08724 11022S_swallow_bom(pTHX_ U8 *s)
01ec43d0 11023{
97aff369 11024 dVAR;
f54cb97a 11025 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
11026
11027 PERL_ARGS_ASSERT_SWALLOW_BOM;
11028
7aa207d6 11029 switch (s[0]) {
4e553d73
NIS
11030 case 0xFF:
11031 if (s[1] == 0xFE) {
ee6ba15d 11032 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 11033 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 11034 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11035 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 11036#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 11037 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 11038 s += 2;
dea0fc0b 11039 if (PL_bufend > (char*)s) {
81a923f4 11040 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 11041 }
b250498f 11042#else
dcbac5bb 11043 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11044 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 11045#endif
01ec43d0
GS
11046 }
11047 break;
78ae23f5 11048 case 0xFE:
7aa207d6 11049 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 11050#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11051 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
11052 s += 2;
11053 if (PL_bufend > (char *)s) {
81a923f4 11054 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 11055 }
b250498f 11056#else
dcbac5bb 11057 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11058 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 11059#endif
01ec43d0
GS
11060 }
11061 break;
3ae08724
GS
11062 case 0xEF:
11063 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 11064 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
11065 s += 3; /* UTF-8 */
11066 }
11067 break;
11068 case 0:
7aa207d6
JH
11069 if (slen > 3) {
11070 if (s[1] == 0) {
11071 if (s[2] == 0xFE && s[3] == 0xFF) {
11072 /* UTF-32 big-endian */
dcbac5bb 11073 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 11074 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
11075 }
11076 }
11077 else if (s[2] == 0 && s[3] != 0) {
11078 /* Leading bytes
11079 * 00 xx 00 xx
11080 * are a good indicator of UTF-16BE. */
ee6ba15d 11081#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11082 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
11083 s = add_utf16_textfilter(s, FALSE);
11084#else
dcbac5bb 11085 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11086 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
11087#endif
7aa207d6 11088 }
01ec43d0 11089 }
e294cc5d
JH
11090#ifdef EBCDIC
11091 case 0xDD:
11092 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
11093 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
11094 s += 4; /* UTF-8 */
11095 }
11096 break;
11097#endif
11098
7aa207d6
JH
11099 default:
11100 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
11101 /* Leading bytes
11102 * xx 00 xx 00
11103 * are a good indicator of UTF-16LE. */
ee6ba15d 11104#ifndef PERL_NO_UTF16_FILTER
7aa207d6 11105 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 11106 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 11107#else
dcbac5bb 11108 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
11109 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
11110#endif
7aa207d6 11111 }
01ec43d0 11112 }
b8f84bb2 11113 return (char*)s;
b250498f 11114}
4755096e 11115
6e3aabd6
GS
11116
11117#ifndef PERL_NO_UTF16_FILTER
11118static I32
a28af015 11119S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 11120{
97aff369 11121 dVAR;
f3040f2c 11122 SV *const filter = FILTER_DATA(idx);
2a773401
NC
11123 /* We re-use this each time round, throwing the contents away before we
11124 return. */
2a773401 11125 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 11126 SV *const utf8_buffer = filter;
c28d6105 11127 IV status = IoPAGE(filter);
f2338a2e 11128 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 11129 I32 retval;
c8b0cbae 11130
c85ae797
NC
11131 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11132
c8b0cbae
NC
11133 /* As we're automatically added, at the lowest level, and hence only called
11134 from this file, we can be sure that we're not called in block mode. Hence
11135 don't bother writing code to deal with block mode. */
11136 if (maxlen) {
11137 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11138 }
c28d6105
NC
11139 if (status < 0) {
11140 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11141 }
1de9afcd 11142 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 11143 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 11144 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
11145 reverse ? 'l' : 'b', idx, maxlen, status,
11146 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11147
11148 while (1) {
11149 STRLEN chars;
11150 STRLEN have;
dea0fc0b 11151 I32 newlen;
2a773401 11152 U8 *end;
c28d6105
NC
11153 /* First, look in our buffer of existing UTF-8 data: */
11154 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11155
11156 if (nl) {
11157 ++nl;
11158 } else if (status == 0) {
11159 /* EOF */
11160 IoPAGE(filter) = 0;
11161 nl = SvEND(utf8_buffer);
11162 }
11163 if (nl) {
d2d1d4de
NC
11164 STRLEN got = nl - SvPVX(utf8_buffer);
11165 /* Did we have anything to append? */
11166 retval = got != 0;
11167 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
11168 /* Everything else in this code works just fine if SVp_POK isn't
11169 set. This, however, needs it, and we need it to work, else
11170 we loop infinitely because the buffer is never consumed. */
11171 sv_chop(utf8_buffer, nl);
11172 break;
11173 }
ba77e4cc 11174
c28d6105
NC
11175 /* OK, not a complete line there, so need to read some more UTF-16.
11176 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
11177 while (1) {
11178 if (status <= 0)
11179 break;
11180 if (SvCUR(utf16_buffer) >= 2) {
11181 /* Location of the high octet of the last complete code point.
11182 Gosh, UTF-16 is a pain. All the benefits of variable length,
11183 *coupled* with all the benefits of partial reads and
11184 endianness. */
11185 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11186 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11187
11188 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11189 break;
11190 }
11191
11192 /* We have the first half of a surrogate. Read more. */
11193 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11194 }
c28d6105 11195
c28d6105
NC
11196 status = FILTER_READ(idx + 1, utf16_buffer,
11197 160 + (SvCUR(utf16_buffer) & 1));
11198 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11199 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11200 if (status < 0) {
11201 /* Error */
11202 IoPAGE(filter) = status;
11203 return status;
11204 }
11205 }
11206
11207 chars = SvCUR(utf16_buffer) >> 1;
11208 have = SvCUR(utf8_buffer);
11209 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11210
aa6dbd60 11211 if (reverse) {
c28d6105
NC
11212 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11213 (U8*)SvPVX_const(utf8_buffer) + have,
11214 chars * 2, &newlen);
aa6dbd60 11215 } else {
2a773401 11216 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11217 (U8*)SvPVX_const(utf8_buffer) + have,
11218 chars * 2, &newlen);
2a773401 11219 }
c28d6105 11220 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11221 *end = '\0';
c28d6105 11222
e07286ed
NC
11223 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11224 it's private to us, and utf16_to_utf8{,reversed} take a
11225 (pointer,length) pair, rather than a NUL-terminated string. */
11226 if(SvCUR(utf16_buffer) & 1) {
11227 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11228 SvCUR_set(utf16_buffer, 1);
11229 } else {
11230 SvCUR_set(utf16_buffer, 0);
11231 }
2a773401 11232 }
c28d6105
NC
11233 DEBUG_P(PerlIO_printf(Perl_debug_log,
11234 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11235 status,
11236 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11237 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11238 return retval;
6e3aabd6 11239}
81a923f4
NC
11240
11241static U8 *
11242S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11243{
2a773401 11244 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11245
c85ae797
NC
11246 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11247
c28d6105 11248 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11249 sv_setpvs(filter, "");
2a773401 11250 IoLINES(filter) = reversed;
c28d6105
NC
11251 IoPAGE(filter) = 1; /* Not EOF */
11252
11253 /* Sadly, we have to return a valid pointer, come what may, so we have to
11254 ignore any error return from this. */
11255 SvCUR_set(PL_linestr, 0);
11256 if (FILTER_READ(0, PL_linestr, 0)) {
11257 SvUTF8_on(PL_linestr);
81a923f4 11258 } else {
c28d6105 11259 SvUTF8_on(PL_linestr);
81a923f4 11260 }
c28d6105 11261 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11262 return (U8*)SvPVX(PL_linestr);
11263}
6e3aabd6 11264#endif
9f4817db 11265
f333445c
JP
11266/*
11267Returns a pointer to the next character after the parsed
11268vstring, as well as updating the passed in sv.
11269
11270Function must be called like
11271
561b68a9 11272 sv = newSV(5);
65b06e02 11273 s = scan_vstring(s,e,sv);
f333445c 11274
65b06e02 11275where s and e are the start and end of the string.
f333445c
JP
11276The sv should already be large enough to store the vstring
11277passed in, for performance reasons.
11278
11279*/
11280
11281char *
15f169a1 11282Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11283{
97aff369 11284 dVAR;
bfed75c6
AL
11285 const char *pos = s;
11286 const char *start = s;
7918f24d
NC
11287
11288 PERL_ARGS_ASSERT_SCAN_VSTRING;
11289
f333445c 11290 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11291 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11292 pos++;
f333445c
JP
11293 if ( *pos != '.') {
11294 /* this may not be a v-string if followed by => */
bfed75c6 11295 const char *next = pos;
65b06e02 11296 while (next < e && isSPACE(*next))
8fc7bb1c 11297 ++next;
65b06e02 11298 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11299 /* return string not v-string */
11300 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11301 return (char *)pos;
f333445c
JP
11302 }
11303 }
11304
11305 if (!isALPHA(*pos)) {
89ebb4a3 11306 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11307
d4c19fe8
AL
11308 if (*s == 'v')
11309 s++; /* get past 'v' */
f333445c 11310
76f68e9b 11311 sv_setpvs(sv, "");
f333445c
JP
11312
11313 for (;;) {
d4c19fe8 11314 /* this is atoi() that tolerates underscores */
0bd48802
AL
11315 U8 *tmpend;
11316 UV rev = 0;
d4c19fe8
AL
11317 const char *end = pos;
11318 UV mult = 1;
11319 while (--end >= s) {
11320 if (*end != '_') {
11321 const UV orev = rev;
f333445c
JP
11322 rev += (*end - '0') * mult;
11323 mult *= 10;
9b387841 11324 if (orev > rev)
dcbac5bb 11325 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11326 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11327 "Integer overflow in decimal number");
f333445c
JP
11328 }
11329 }
11330#ifdef EBCDIC
11331 if (rev > 0x7FFFFFFF)
11332 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11333#endif
11334 /* Append native character for the rev point */
11335 tmpend = uvchr_to_utf8(tmpbuf, rev);
11336 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11337 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11338 SvUTF8_on(sv);
65b06e02 11339 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11340 s = ++pos;
11341 else {
11342 s = pos;
11343 break;
11344 }
65b06e02 11345 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11346 pos++;
11347 }
11348 SvPOK_on(sv);
11349 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11350 SvRMAGICAL_on(sv);
11351 }
73d840c0 11352 return (char *)s;
f333445c
JP
11353}
11354
88e1f1a2
JV
11355int
11356Perl_keyword_plugin_standard(pTHX_
11357 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11358{
11359 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11360 PERL_UNUSED_CONTEXT;
11361 PERL_UNUSED_ARG(keyword_ptr);
11362 PERL_UNUSED_ARG(keyword_len);
11363 PERL_UNUSED_ARG(op_ptr);
11364 return KEYWORD_PLUGIN_DECLINE;
11365}
11366
78cdf107 11367#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11368static void
78cdf107 11369S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11370{
11371 SAVEI32(PL_lex_brackets);
11372 if (PL_lex_brackets > 100)
11373 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11374 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11375 SAVEI32(PL_lex_allbrackets);
11376 PL_lex_allbrackets = 0;
11377 SAVEI8(PL_lex_fakeeof);
2dcac756 11378 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11379 if(yyparse(gramtype) && !PL_parser->error_count)
11380 qerror(Perl_mess(aTHX_ "Parse error"));
11381}
11382
78cdf107 11383#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11384static OP *
78cdf107 11385S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11386{
11387 OP *o;
11388 ENTER;
11389 SAVEVPTR(PL_eval_root);
11390 PL_eval_root = NULL;
78cdf107 11391 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11392 o = PL_eval_root;
11393 LEAVE;
11394 return o;
11395}
11396
78cdf107
Z
11397#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11398static OP *
11399S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11400{
11401 OP *exprop;
11402 if (flags & ~PARSE_OPTIONAL)
11403 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11404 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11405 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11406 if (!PL_parser->error_count)
11407 qerror(Perl_mess(aTHX_ "Parse error"));
11408 exprop = newOP(OP_NULL, 0);
11409 }
11410 return exprop;
11411}
11412
11413/*
11414=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11415
11416Parse a Perl arithmetic expression. This may contain operators of precedence
11417down to the bit shift operators. The expression must be followed (and thus
11418terminated) either by a comparison or lower-precedence operator or by
11419something that would normally terminate an expression such as semicolon.
11420If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11421otherwise it is mandatory. It is up to the caller to ensure that the
11422dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11423the source of the code to be parsed and the lexical context for the
11424expression.
11425
11426The op tree representing the expression is returned. If an optional
11427expression is absent, a null pointer is returned, otherwise the pointer
11428will be non-null.
11429
11430If an error occurs in parsing or compilation, in most cases a valid op
11431tree is returned anyway. The error is reflected in the parser state,
11432normally resulting in a single exception at the top level of parsing
11433which covers all the compilation errors that occurred. Some compilation
11434errors, however, will throw an exception immediately.
11435
11436=cut
11437*/
11438
11439OP *
11440Perl_parse_arithexpr(pTHX_ U32 flags)
11441{
11442 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11443}
11444
11445/*
11446=for apidoc Amx|OP *|parse_termexpr|U32 flags
11447
11448Parse a Perl term expression. This may contain operators of precedence
11449down to the assignment operators. The expression must be followed (and thus
11450terminated) either by a comma or lower-precedence operator or by
11451something that would normally terminate an expression such as semicolon.
11452If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11453otherwise it is mandatory. It is up to the caller to ensure that the
11454dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11455the source of the code to be parsed and the lexical context for the
11456expression.
11457
11458The op tree representing the expression is returned. If an optional
11459expression is absent, a null pointer is returned, otherwise the pointer
11460will be non-null.
11461
11462If an error occurs in parsing or compilation, in most cases a valid op
11463tree is returned anyway. The error is reflected in the parser state,
11464normally resulting in a single exception at the top level of parsing
11465which covers all the compilation errors that occurred. Some compilation
11466errors, however, will throw an exception immediately.
11467
11468=cut
11469*/
11470
11471OP *
11472Perl_parse_termexpr(pTHX_ U32 flags)
11473{
11474 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11475}
11476
11477/*
11478=for apidoc Amx|OP *|parse_listexpr|U32 flags
11479
11480Parse a Perl list expression. This may contain operators of precedence
11481down to the comma operator. The expression must be followed (and thus
11482terminated) either by a low-precedence logic operator such as C<or> or by
11483something that would normally terminate an expression such as semicolon.
11484If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11485otherwise it is mandatory. It is up to the caller to ensure that the
11486dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11487the source of the code to be parsed and the lexical context for the
11488expression.
11489
11490The op tree representing the expression is returned. If an optional
11491expression is absent, a null pointer is returned, otherwise the pointer
11492will be non-null.
11493
11494If an error occurs in parsing or compilation, in most cases a valid op
11495tree is returned anyway. The error is reflected in the parser state,
11496normally resulting in a single exception at the top level of parsing
11497which covers all the compilation errors that occurred. Some compilation
11498errors, however, will throw an exception immediately.
11499
11500=cut
11501*/
11502
11503OP *
11504Perl_parse_listexpr(pTHX_ U32 flags)
11505{
11506 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11507}
11508
11509/*
11510=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11511
11512Parse a single complete Perl expression. This allows the full
11513expression grammar, including the lowest-precedence operators such
11514as C<or>. The expression must be followed (and thus terminated) by a
11515token that an expression would normally be terminated by: end-of-file,
11516closing bracketing punctuation, semicolon, or one of the keywords that
11517signals a postfix expression-statement modifier. If I<flags> includes
11518C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11519mandatory. It is up to the caller to ensure that the dynamic parser
11520state (L</PL_parser> et al) is correctly set to reflect the source of
11521the code to be parsed and the lexical context for the expression.
11522
11523The op tree representing the expression is returned. If an optional
11524expression is absent, a null pointer is returned, otherwise the pointer
11525will be non-null.
11526
11527If an error occurs in parsing or compilation, in most cases a valid op
11528tree is returned anyway. The error is reflected in the parser state,
11529normally resulting in a single exception at the top level of parsing
11530which covers all the compilation errors that occurred. Some compilation
11531errors, however, will throw an exception immediately.
11532
11533=cut
11534*/
11535
11536OP *
11537Perl_parse_fullexpr(pTHX_ U32 flags)
11538{
11539 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11540}
11541
e53d8f76
Z
11542/*
11543=for apidoc Amx|OP *|parse_block|U32 flags
11544
11545Parse a single complete Perl code block. This consists of an opening
11546brace, a sequence of statements, and a closing brace. The block
11547constitutes a lexical scope, so C<my> variables and various compile-time
11548effects can be contained within it. It is up to the caller to ensure
11549that the dynamic parser state (L</PL_parser> et al) is correctly set to
11550reflect the source of the code to be parsed and the lexical context for
11551the statement.
11552
11553The op tree representing the code block is returned. This is always a
11554real op, never a null pointer. It will normally be a C<lineseq> list,
11555including C<nextstate> or equivalent ops. No ops to construct any kind
11556of runtime scope are included by virtue of it being a block.
11557
11558If an error occurs in parsing or compilation, in most cases a valid op
11559tree (most likely null) is returned anyway. The error is reflected in
11560the parser state, normally resulting in a single exception at the top
11561level of parsing which covers all the compilation errors that occurred.
11562Some compilation errors, however, will throw an exception immediately.
11563
11564The I<flags> parameter is reserved for future use, and must always
11565be zero.
11566
11567=cut
11568*/
11569
11570OP *
11571Perl_parse_block(pTHX_ U32 flags)
11572{
11573 if (flags)
11574 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11575 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11576}
11577
1da4ca5f 11578/*
8359b381
Z
11579=for apidoc Amx|OP *|parse_barestmt|U32 flags
11580
11581Parse a single unadorned Perl statement. This may be a normal imperative
11582statement or a declaration that has compile-time effect. It does not
11583include any label or other affixture. It is up to the caller to ensure
11584that the dynamic parser state (L</PL_parser> et al) is correctly set to
11585reflect the source of the code to be parsed and the lexical context for
11586the statement.
11587
11588The op tree representing the statement is returned. This may be a
11589null pointer if the statement is null, for example if it was actually
11590a subroutine definition (which has compile-time side effects). If not
11591null, it will be ops directly implementing the statement, suitable to
11592pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11593equivalent op (except for those embedded in a scope contained entirely
11594within the statement).
11595
11596If an error occurs in parsing or compilation, in most cases a valid op
11597tree (most likely null) is returned anyway. The error is reflected in
11598the parser state, normally resulting in a single exception at the top
11599level of parsing which covers all the compilation errors that occurred.
11600Some compilation errors, however, will throw an exception immediately.
11601
11602The I<flags> parameter is reserved for future use, and must always
11603be zero.
11604
11605=cut
11606*/
11607
11608OP *
11609Perl_parse_barestmt(pTHX_ U32 flags)
11610{
11611 if (flags)
11612 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11613 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11614}
11615
11616/*
361d9b55
Z
11617=for apidoc Amx|SV *|parse_label|U32 flags
11618
11619Parse a single label, possibly optional, of the type that may prefix a
11620Perl statement. It is up to the caller to ensure that the dynamic parser
11621state (L</PL_parser> et al) is correctly set to reflect the source of
11622the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11623label is optional, otherwise it is mandatory.
11624
11625The name of the label is returned in the form of a fresh scalar. If an
11626optional label is absent, a null pointer is returned.
11627
11628If an error occurs in parsing, which can only occur if the label is
11629mandatory, a valid label is returned anyway. The error is reflected in
11630the parser state, normally resulting in a single exception at the top
11631level of parsing which covers all the compilation errors that occurred.
11632
11633=cut
11634*/
11635
11636SV *
11637Perl_parse_label(pTHX_ U32 flags)
11638{
11639 if (flags & ~PARSE_OPTIONAL)
11640 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11641 if (PL_lex_state == LEX_KNOWNEXT) {
11642 PL_parser->yychar = yylex();
11643 if (PL_parser->yychar == LABEL) {
361d9b55
Z
11644 SV *lsv;
11645 PL_parser->yychar = YYEMPTY;
11646 lsv = newSV_type(SVt_PV);
fefd015f 11647 sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
361d9b55
Z
11648 return lsv;
11649 } else {
11650 yyunlex();
11651 goto no_label;
11652 }
11653 } else {
11654 char *s, *t;
361d9b55
Z
11655 STRLEN wlen, bufptr_pos;
11656 lex_read_space(0);
11657 t = s = PL_bufptr;
5db1eb8d 11658 if (!isIDFIRST_lazy_if(s, UTF))
361d9b55 11659 goto no_label;
5db1eb8d 11660 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
361d9b55
Z
11661 if (word_takes_any_delimeter(s, wlen))
11662 goto no_label;
11663 bufptr_pos = s - SvPVX(PL_linestr);
11664 PL_bufptr = t;
11665 lex_read_space(LEX_KEEP_PREVIOUS);
11666 t = PL_bufptr;
11667 s = SvPVX(PL_linestr) + bufptr_pos;
11668 if (t[0] == ':' && t[1] != ':') {
11669 PL_oldoldbufptr = PL_oldbufptr;
11670 PL_oldbufptr = s;
11671 PL_bufptr = t+1;
5db1eb8d 11672 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
361d9b55
Z
11673 } else {
11674 PL_bufptr = s;
11675 no_label:
11676 if (flags & PARSE_OPTIONAL) {
11677 return NULL;
11678 } else {
11679 qerror(Perl_mess(aTHX_ "Parse error"));
11680 return newSVpvs("x");
11681 }
11682 }
11683 }
11684}
11685
11686/*
28ac2b49
Z
11687=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11688
11689Parse a single complete Perl statement. This may be a normal imperative
8359b381 11690statement or a declaration that has compile-time effect, and may include
8e720305 11691optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11692parser state (L</PL_parser> et al) is correctly set to reflect the source
11693of the code to be parsed and the lexical context for the statement.
11694
11695The op tree representing the statement is returned. This may be a
11696null pointer if the statement is null, for example if it was actually
11697a subroutine definition (which has compile-time side effects). If not
11698null, it will be the result of a L</newSTATEOP> call, normally including
11699a C<nextstate> or equivalent op.
11700
11701If an error occurs in parsing or compilation, in most cases a valid op
11702tree (most likely null) is returned anyway. The error is reflected in
11703the parser state, normally resulting in a single exception at the top
11704level of parsing which covers all the compilation errors that occurred.
11705Some compilation errors, however, will throw an exception immediately.
11706
11707The I<flags> parameter is reserved for future use, and must always
11708be zero.
11709
11710=cut
11711*/
11712
11713OP *
11714Perl_parse_fullstmt(pTHX_ U32 flags)
11715{
28ac2b49
Z
11716 if (flags)
11717 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11718 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11719}
11720
07ffcb73
Z
11721/*
11722=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11723
11724Parse a sequence of zero or more Perl statements. These may be normal
11725imperative statements, including optional labels, or declarations
11726that have compile-time effect, or any mixture thereof. The statement
11727sequence ends when a closing brace or end-of-file is encountered in a
11728place where a new statement could have validly started. It is up to
11729the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11730is correctly set to reflect the source of the code to be parsed and the
11731lexical context for the statements.
11732
11733The op tree representing the statement sequence is returned. This may
11734be a null pointer if the statements were all null, for example if there
11735were no statements or if there were only subroutine definitions (which
11736have compile-time side effects). If not null, it will be a C<lineseq>
11737list, normally including C<nextstate> or equivalent ops.
11738
11739If an error occurs in parsing or compilation, in most cases a valid op
11740tree is returned anyway. The error is reflected in the parser state,
11741normally resulting in a single exception at the top level of parsing
11742which covers all the compilation errors that occurred. Some compilation
11743errors, however, will throw an exception immediately.
11744
11745The I<flags> parameter is reserved for future use, and must always
11746be zero.
11747
11748=cut
11749*/
11750
11751OP *
11752Perl_parse_stmtseq(pTHX_ U32 flags)
11753{
11754 OP *stmtseqop;
e53d8f76 11755 I32 c;
07ffcb73 11756 if (flags)
78cdf107
Z
11757 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11758 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11759 c = lex_peek_unichar(0);
11760 if (c != -1 && c != /*{*/'}')
07ffcb73 11761 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11762 return stmtseqop;
11763}
11764
28ac2b49 11765/*
1da4ca5f
NC
11766 * Local variables:
11767 * c-indentation-style: bsd
11768 * c-basic-offset: 4
14d04a33 11769 * indent-tabs-mode: nil
1da4ca5f
NC
11770 * End:
11771 *
14d04a33 11772 * ex: set ts=8 sts=4 sw=4 et:
37442d52 11773 */