This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #108754] perlgit.pod: Stress rebase more
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
802a15e9 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
7eb971ee 227 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 228 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
229 * BOop : bitwise or or xor
230 * BAop : bitwise and
231 * SHop : shift operator
232 * PWop : power operator
9cbb5ea2 233 * PMop : pattern-matching operator
ffb4593c
NT
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
e5edeb50 237 * Rop : relational operator <= != gt
ffb4593c
NT
238 *
239 * Also see LOP and lop() below.
240 */
241
998054bd 242#ifdef DEBUGGING /* Serve -DT. */
704d4215 243# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 244#else
bbf60fe6 245# define REPORT(retval) (retval)
998054bd
SC
246#endif
247
bbf60fe6
DM
248#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
255#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 258#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
259#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 269
a687059c
LW
270/* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
a687059c 274 */
376fcdbf 275#define UNI2(f,x) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_expect = x; \
278 PL_bufptr = s; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
29595ff2 283 s = PEEKSPACE(s); \
376fcdbf
AL
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
285 }
6f33ba73
RGS
286#define UNI(f) UNI2(f,XTERM)
287#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
b5fb7ce3
FC
288#define UNIPROTO(f,optional) { \
289 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
290 OPERATOR(f); \
291 }
a687059c 292
376fcdbf 293#define UNIBRACK(f) { \
6154021b 294 pl_yylval.ival = f; \
376fcdbf
AL
295 PL_bufptr = s; \
296 PL_last_uni = PL_oldbufptr; \
297 if (*s == '(') \
298 return REPORT( (int)FUNC1 ); \
29595ff2 299 s = PEEKSPACE(s); \
376fcdbf
AL
300 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
301 }
79072805 302
9f68db38 303/* grandfather return to old style */
78cdf107
Z
304#define OLDLOP(f) \
305 do { \
306 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
307 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
308 pl_yylval.ival = (f); \
309 PL_expect = XTERM; \
310 PL_bufptr = s; \
311 return (int)LSTOP; \
312 } while(0)
79072805 313
8fa7f367
JH
314#ifdef DEBUGGING
315
6154021b 316/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
317enum token_type {
318 TOKENTYPE_NONE,
319 TOKENTYPE_IVAL,
6154021b 320 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
321 TOKENTYPE_PVAL,
322 TOKENTYPE_OPVAL,
323 TOKENTYPE_GVVAL
324};
325
6d4a66ac
NC
326static struct debug_tokens {
327 const int token;
328 enum token_type type;
329 const char *name;
330} const debug_tokens[] =
9041c2e3 331{
bbf60fe6
DM
332 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
333 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
334 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
335 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
336 { ARROW, TOKENTYPE_NONE, "ARROW" },
337 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
338 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
339 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
340 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
341 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 342 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
343 { DO, TOKENTYPE_NONE, "DO" },
344 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
345 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
346 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
347 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
348 { ELSE, TOKENTYPE_NONE, "ELSE" },
349 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
350 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
351 { FOR, TOKENTYPE_IVAL, "FOR" },
352 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
353 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
354 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 355 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
356 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
357 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
358 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 359 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
360 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
361 { IF, TOKENTYPE_IVAL, "IF" },
362 { LABEL, TOKENTYPE_PVAL, "LABEL" },
363 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
364 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
365 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
366 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
367 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
368 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
369 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
370 { MY, TOKENTYPE_IVAL, "MY" },
371 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
372 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
373 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
374 { OROP, TOKENTYPE_IVAL, "OROP" },
375 { OROR, TOKENTYPE_NONE, "OROR" },
376 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
377 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
378 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
379 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
380 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
381 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
382 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
383 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
384 { PREINC, TOKENTYPE_NONE, "PREINC" },
385 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
386 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
387 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
388 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
389 { SUB, TOKENTYPE_NONE, "SUB" },
390 { THING, TOKENTYPE_OPVAL, "THING" },
391 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
392 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
393 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
394 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
395 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
396 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 397 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
398 { WHILE, TOKENTYPE_IVAL, "WHILE" },
399 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 400 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 401 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
402};
403
6154021b 404/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 405
bbf60fe6 406STATIC int
704d4215 407S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 408{
97aff369 409 dVAR;
7918f24d
NC
410
411 PERL_ARGS_ASSERT_TOKEREPORT;
412
bbf60fe6 413 if (DEBUG_T_TEST) {
bd61b366 414 const char *name = NULL;
bbf60fe6 415 enum token_type type = TOKENTYPE_NONE;
f54cb97a 416 const struct debug_tokens *p;
396482e1 417 SV* const report = newSVpvs("<== ");
bbf60fe6 418
f54cb97a 419 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
420 if (p->token == (int)rv) {
421 name = p->name;
422 type = p->type;
423 break;
424 }
425 }
426 if (name)
54667de8 427 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
428 else if ((char)rv > ' ' && (char)rv < '~')
429 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
430 else if (!rv)
396482e1 431 sv_catpvs(report, "EOF");
bbf60fe6
DM
432 else
433 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
434 switch (type) {
435 case TOKENTYPE_NONE:
436 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
437 break;
438 case TOKENTYPE_IVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPNUM:
442 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 443 PL_op_name[lvalp->ival]);
bbf60fe6
DM
444 break;
445 case TOKENTYPE_PVAL:
704d4215 446 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
447 break;
448 case TOKENTYPE_OPVAL:
704d4215 449 if (lvalp->opval) {
401441c0 450 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
451 PL_op_name[lvalp->opval->op_type]);
452 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 453 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 454 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
455 }
456
457 }
401441c0 458 else
396482e1 459 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
460 break;
461 }
b6007c36 462 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
463 };
464 return (int)rv;
998054bd
SC
465}
466
b6007c36
DM
467
468/* print the buffer with suitable escapes */
469
470STATIC void
15f169a1 471S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 472{
396482e1 473 SV* const tmp = newSVpvs("");
7918f24d
NC
474
475 PERL_ARGS_ASSERT_PRINTBUF;
476
b6007c36
DM
477 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
478 SvREFCNT_dec(tmp);
479}
480
8fa7f367
JH
481#endif
482
8290c323
NC
483static int
484S_deprecate_commaless_var_list(pTHX) {
485 PL_expect = XTERM;
486 deprecate("comma-less variable list");
487 return REPORT(','); /* grandfather non-comma-format format */
488}
489
ffb4593c
NT
490/*
491 * S_ao
492 *
c963b151
BD
493 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
494 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
495 */
496
76e3520e 497STATIC int
cea2e8a9 498S_ao(pTHX_ int toketype)
a0d0e21e 499{
97aff369 500 dVAR;
3280af22
NIS
501 if (*PL_bufptr == '=') {
502 PL_bufptr++;
a0d0e21e 503 if (toketype == ANDAND)
6154021b 504 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 505 else if (toketype == OROR)
6154021b 506 pl_yylval.ival = OP_ORASSIGN;
c963b151 507 else if (toketype == DORDOR)
6154021b 508 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
509 toketype = ASSIGNOP;
510 }
511 return toketype;
512}
513
ffb4593c
NT
514/*
515 * S_no_op
516 * When Perl expects an operator and finds something else, no_op
517 * prints the warning. It always prints "<something> found where
518 * operator expected. It prints "Missing semicolon on previous line?"
519 * if the surprise occurs at the start of the line. "do you need to
520 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
521 * where the compiler doesn't know if foo is a method call or a function.
522 * It prints "Missing operator before end of line" if there's nothing
523 * after the missing operator, or "... before <...>" if there is something
524 * after the missing operator.
525 */
526
76e3520e 527STATIC void
15f169a1 528S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 529{
97aff369 530 dVAR;
9d4ba2ae
AL
531 char * const oldbp = PL_bufptr;
532 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 533
7918f24d
NC
534 PERL_ARGS_ASSERT_NO_OP;
535
1189a94a
GS
536 if (!s)
537 s = oldbp;
07c798fb 538 else
1189a94a 539 PL_bufptr = s;
cea2e8a9 540 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
541 if (ckWARN_d(WARN_SYNTAX)) {
542 if (is_first)
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Missing semicolon on previous line?)\n");
545 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 546 const char *t;
c35e046a
AL
547 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
548 NOOP;
56da5a46
RGS
549 if (t < PL_bufptr && isSPACE(*t))
550 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551 "\t(Do you need to predeclare %.*s?)\n",
551405c4 552 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
553 }
554 else {
555 assert(s >= oldbp);
556 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 557 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 558 }
07c798fb 559 }
3280af22 560 PL_bufptr = oldbp;
8990e307
LW
561}
562
ffb4593c
NT
563/*
564 * S_missingterm
565 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 566 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
567 * If we're in a delimited string and the delimiter is a control
568 * character, it's reformatted into a two-char sequence like ^C.
569 * This is fatal.
570 */
571
76e3520e 572STATIC void
cea2e8a9 573S_missingterm(pTHX_ char *s)
8990e307 574{
97aff369 575 dVAR;
8990e307
LW
576 char tmpbuf[3];
577 char q;
578 if (s) {
9d4ba2ae 579 char * const nl = strrchr(s,'\n');
d2719217 580 if (nl)
8990e307
LW
581 *nl = '\0';
582 }
463559e7 583 else if (isCNTRL(PL_multi_close)) {
8990e307 584 *tmpbuf = '^';
585ec06d 585 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
586 tmpbuf[2] = '\0';
587 s = tmpbuf;
588 }
589 else {
eb160463 590 *tmpbuf = (char)PL_multi_close;
8990e307
LW
591 tmpbuf[1] = '\0';
592 s = tmpbuf;
593 }
594 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 595 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 596}
79072805 597
dd0ac2b9
FC
598#include "feature.h"
599
0d863452 600/*
0d863452
RH
601 * Check whether the named feature is enabled.
602 */
26ea9e12 603bool
3fff3427 604Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 605{
97aff369 606 dVAR;
4a731d7b 607 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
608
609 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
610
611 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 612
26ea9e12
NC
613 if (namelen > MAX_FEATURE_LEN)
614 return FALSE;
3fff3427 615 memcpy(&he_name[8], name, namelen);
7d69d4a6 616
c8ca97b0
NC
617 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
618 REFCOUNTED_HE_EXISTS));
0d863452
RH
619}
620
ffb4593c 621/*
9cbb5ea2
GS
622 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
623 * utf16-to-utf8-reversed.
ffb4593c
NT
624 */
625
c39cd008
GS
626#ifdef PERL_CR_FILTER
627static void
628strip_return(SV *sv)
629{
95a20fc0 630 register const char *s = SvPVX_const(sv);
9d4ba2ae 631 register const char * const e = s + SvCUR(sv);
7918f24d
NC
632
633 PERL_ARGS_ASSERT_STRIP_RETURN;
634
c39cd008
GS
635 /* outer loop optimized to do nothing if there are no CR-LFs */
636 while (s < e) {
637 if (*s++ == '\r' && *s == '\n') {
638 /* hit a CR-LF, need to copy the rest */
639 register char *d = s - 1;
640 *d++ = *s++;
641 while (s < e) {
642 if (*s == '\r' && s[1] == '\n')
643 s++;
644 *d++ = *s++;
645 }
646 SvCUR(sv) -= s - d;
647 return;
648 }
649 }
650}
a868473f 651
76e3520e 652STATIC I32
c39cd008 653S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 654{
f54cb97a 655 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
656 if (count > 0 && !maxlen)
657 strip_return(sv);
658 return count;
a868473f
NIS
659}
660#endif
661
ffb4593c 662/*
8eaa0acf
Z
663=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
664
665Creates and initialises a new lexer/parser state object, supplying
666a context in which to lex and parse from a new source of Perl code.
667A pointer to the new state object is placed in L</PL_parser>. An entry
668is made on the save stack so that upon unwinding the new state object
669will be destroyed and the former value of L</PL_parser> will be restored.
670Nothing else need be done to clean up the parsing context.
671
672The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
673non-null, provides a string (in SV form) containing code to be parsed.
674A copy of the string is made, so subsequent modification of I<line>
675does not affect parsing. I<rsfp>, if non-null, provides an input stream
676from which code will be read to be parsed. If both are non-null, the
677code in I<line> comes first and must consist of complete lines of input,
678and I<rsfp> supplies the remainder of the source.
679
e368b3bd
FC
680The I<flags> parameter is reserved for future use. Currently it is only
681used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
682
683=cut
684*/
ffb4593c 685
27fcb6ee
FC
686/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
687 can share filters with the current parser. */
688
a0d0e21e 689void
8eaa0acf 690Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 691{
97aff369 692 dVAR;
6ef55633 693 const char *s = NULL;
5486870f 694 yy_parser *parser, *oparser;
60d63348 695 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 696 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
697
698 /* create and initialise a parser */
699
199e78b7 700 Newxz(parser, 1, yy_parser);
5486870f 701 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
702 PL_parser = parser;
703
28ac2b49
Z
704 parser->stack = NULL;
705 parser->ps = NULL;
706 parser->stack_size = 0;
acdf0a21 707
e3abe207
DM
708 /* on scope exit, free this parser and restore any outer one */
709 SAVEPARSER(parser);
7c4baf47 710 parser->saved_curcop = PL_curcop;
e3abe207 711
acdf0a21 712 /* initialise lexer state */
8990e307 713
fb205e7a
DM
714#ifdef PERL_MAD
715 parser->curforce = -1;
716#else
717 parser->nexttoke = 0;
718#endif
ca4cfd28 719 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 720 parser->copline = NOLINE;
5afb0a62 721 parser->lex_state = LEX_NORMAL;
c2598295 722 parser->expect = XSTATE;
2f9285f8 723 parser->rsfp = rsfp;
27fcb6ee
FC
724 parser->rsfp_filters =
725 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
726 ? NULL
727 : MUTABLE_AV(SvREFCNT_inc(
728 oparser->rsfp_filters
729 ? oparser->rsfp_filters
730 : (oparser->rsfp_filters = newAV())
731 ));
2f9285f8 732
199e78b7
DM
733 Newx(parser->lex_brackstack, 120, char);
734 Newx(parser->lex_casestack, 12, char);
735 *parser->lex_casestack = '\0';
02b34bbe 736
10efb74f 737 if (line) {
0528fd32 738 STRLEN len;
10efb74f 739 s = SvPV_const(line, len);
0abcdfa4
FC
740 parser->linestr = flags & LEX_START_COPIED
741 ? SvREFCNT_inc_simple_NN(line)
742 : newSVpvn_flags(s, len, SvUTF8(line));
37555a72 743 if (!len || s[len-1] != ';')
bdc0bf6f 744 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
745 } else {
746 parser->linestr = newSVpvs("\n;");
8990e307 747 }
f06b5848
DM
748 parser->oldoldbufptr =
749 parser->oldbufptr =
750 parser->bufptr =
751 parser->linestart = SvPVX(parser->linestr);
752 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
753 parser->last_lop = parser->last_uni = NULL;
60d63348 754 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES);
737c24fc 755
60d63348 756 parser->in_pod = parser->filtered = 0;
79072805 757}
a687059c 758
e3abe207
DM
759
760/* delete a parser object */
761
762void
763Perl_parser_free(pTHX_ const yy_parser *parser)
764{
7918f24d
NC
765 PERL_ARGS_ASSERT_PARSER_FREE;
766
7c4baf47 767 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
768 SvREFCNT_dec(parser->linestr);
769
2f9285f8
DM
770 if (parser->rsfp == PerlIO_stdin())
771 PerlIO_clearerr(parser->rsfp);
799361c3
SH
772 else if (parser->rsfp && (!parser->old_parser ||
773 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 774 PerlIO_close(parser->rsfp);
5486870f 775 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 776
e3abe207
DM
777 Safefree(parser->lex_brackstack);
778 Safefree(parser->lex_casestack);
779 PL_parser = parser->old_parser;
780 Safefree(parser);
781}
782
783
ffb4593c 784/*
f0e67a1d
Z
785=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
786
787Buffer scalar containing the chunk currently under consideration of the
788text currently being lexed. This is always a plain string scalar (for
789which C<SvPOK> is true). It is not intended to be used as a scalar by
790normal scalar means; instead refer to the buffer directly by the pointer
791variables described below.
792
793The lexer maintains various C<char*> pointers to things in the
794C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
795reallocated, all of these pointers must be updated. Don't attempt to
796do this manually, but rather use L</lex_grow_linestr> if you need to
797reallocate the buffer.
798
799The content of the text chunk in the buffer is commonly exactly one
800complete line of input, up to and including a newline terminator,
801but there are situations where it is otherwise. The octets of the
802buffer may be intended to be interpreted as either UTF-8 or Latin-1.
803The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
804flag on this scalar, which may disagree with it.
805
806For direct examination of the buffer, the variable
807L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
808lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
809of these pointers is usually preferable to examination of the scalar
810through normal scalar means.
811
812=for apidoc AmxU|char *|PL_parser-E<gt>bufend
813
814Direct pointer to the end of the chunk of text currently being lexed, the
815end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
816+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
817always located at the end of the buffer, and does not count as part of
818the buffer's contents.
819
820=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
821
822Points to the current position of lexing inside the lexer buffer.
823Characters around this point may be freely examined, within
824the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
825L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
826interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
827
828Lexing code (whether in the Perl core or not) moves this pointer past
829the characters that it consumes. It is also expected to perform some
830bookkeeping whenever a newline character is consumed. This movement
831can be more conveniently performed by the function L</lex_read_to>,
832which handles newlines appropriately.
833
834Interpretation of the buffer's octets can be abstracted out by
835using the slightly higher-level functions L</lex_peek_unichar> and
836L</lex_read_unichar>.
837
838=for apidoc AmxU|char *|PL_parser-E<gt>linestart
839
840Points to the start of the current line inside the lexer buffer.
841This is useful for indicating at which column an error occurred, and
842not much else. This must be updated by any lexing code that consumes
843a newline; the function L</lex_read_to> handles this detail.
844
845=cut
846*/
847
848/*
849=for apidoc Amx|bool|lex_bufutf8
850
851Indicates whether the octets in the lexer buffer
852(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
853of Unicode characters. If not, they should be interpreted as Latin-1
854characters. This is analogous to the C<SvUTF8> flag for scalars.
855
856In UTF-8 mode, it is not guaranteed that the lexer buffer actually
857contains valid UTF-8. Lexing code must be robust in the face of invalid
858encoding.
859
860The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
861is significant, but not the whole story regarding the input character
862encoding. Normally, when a file is being read, the scalar contains octets
863and its C<SvUTF8> flag is off, but the octets should be interpreted as
864UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
865however, the scalar may have the C<SvUTF8> flag on, and in this case its
866octets should be interpreted as UTF-8 unless the C<use bytes> pragma
867is in effect. This logic may change in the future; use this function
868instead of implementing the logic yourself.
869
870=cut
871*/
872
873bool
874Perl_lex_bufutf8(pTHX)
875{
876 return UTF;
877}
878
879/*
880=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
881
882Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
883at least I<len> octets (including terminating NUL). Returns a
884pointer to the reallocated buffer. This is necessary before making
885any direct modification of the buffer that would increase its length.
886L</lex_stuff_pvn> provides a more convenient way to insert text into
887the buffer.
888
889Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
890this function updates all of the lexer's variables that point directly
891into the buffer.
892
893=cut
894*/
895
896char *
897Perl_lex_grow_linestr(pTHX_ STRLEN len)
898{
899 SV *linestr;
900 char *buf;
901 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
902 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
903 linestr = PL_parser->linestr;
904 buf = SvPVX(linestr);
905 if (len <= SvLEN(linestr))
906 return buf;
907 bufend_pos = PL_parser->bufend - buf;
908 bufptr_pos = PL_parser->bufptr - buf;
909 oldbufptr_pos = PL_parser->oldbufptr - buf;
910 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
911 linestart_pos = PL_parser->linestart - buf;
912 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
913 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
914 buf = sv_grow(linestr, len);
915 PL_parser->bufend = buf + bufend_pos;
916 PL_parser->bufptr = buf + bufptr_pos;
917 PL_parser->oldbufptr = buf + oldbufptr_pos;
918 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
919 PL_parser->linestart = buf + linestart_pos;
920 if (PL_parser->last_uni)
921 PL_parser->last_uni = buf + last_uni_pos;
922 if (PL_parser->last_lop)
923 PL_parser->last_lop = buf + last_lop_pos;
924 return buf;
925}
926
927/*
83aa740e 928=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
929
930Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
931immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
932reallocating the buffer if necessary. This means that lexing code that
933runs later will see the characters as if they had appeared in the input.
934It is not recommended to do this as part of normal parsing, and most
935uses of this facility run the risk of the inserted characters being
936interpreted in an unintended manner.
937
938The string to be inserted is represented by I<len> octets starting
939at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
940according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
941The characters are recoded for the lexer buffer, according to how the
942buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 943to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
944function is more convenient.
945
946=cut
947*/
948
949void
83aa740e 950Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 951{
749123ff 952 dVAR;
f0e67a1d
Z
953 char *bufptr;
954 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
955 if (flags & ~(LEX_STUFF_UTF8))
956 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
957 if (UTF) {
958 if (flags & LEX_STUFF_UTF8) {
959 goto plain_copy;
960 } else {
961 STRLEN highhalf = 0;
83aa740e 962 const char *p, *e = pv+len;
f0e67a1d
Z
963 for (p = pv; p != e; p++)
964 highhalf += !!(((U8)*p) & 0x80);
965 if (!highhalf)
966 goto plain_copy;
967 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
968 bufptr = PL_parser->bufptr;
969 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
970 SvCUR_set(PL_parser->linestr,
971 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
972 PL_parser->bufend += len+highhalf;
973 for (p = pv; p != e; p++) {
974 U8 c = (U8)*p;
975 if (c & 0x80) {
976 *bufptr++ = (char)(0xc0 | (c >> 6));
977 *bufptr++ = (char)(0x80 | (c & 0x3f));
978 } else {
979 *bufptr++ = (char)c;
980 }
981 }
982 }
983 } else {
984 if (flags & LEX_STUFF_UTF8) {
985 STRLEN highhalf = 0;
83aa740e 986 const char *p, *e = pv+len;
f0e67a1d
Z
987 for (p = pv; p != e; p++) {
988 U8 c = (U8)*p;
989 if (c >= 0xc4) {
990 Perl_croak(aTHX_ "Lexing code attempted to stuff "
991 "non-Latin-1 character into Latin-1 input");
992 } else if (c >= 0xc2 && p+1 != e &&
993 (((U8)p[1]) & 0xc0) == 0x80) {
994 p++;
995 highhalf++;
996 } else if (c >= 0x80) {
997 /* malformed UTF-8 */
998 ENTER;
999 SAVESPTR(PL_warnhook);
1000 PL_warnhook = PERL_WARNHOOK_FATAL;
1001 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1002 LEAVE;
1003 }
1004 }
1005 if (!highhalf)
1006 goto plain_copy;
1007 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1008 bufptr = PL_parser->bufptr;
1009 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1010 SvCUR_set(PL_parser->linestr,
1011 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1012 PL_parser->bufend += len-highhalf;
1013 for (p = pv; p != e; p++) {
1014 U8 c = (U8)*p;
1015 if (c & 0x80) {
1016 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1017 p++;
1018 } else {
1019 *bufptr++ = (char)c;
1020 }
1021 }
1022 } else {
1023 plain_copy:
1024 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1025 bufptr = PL_parser->bufptr;
1026 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1027 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1028 PL_parser->bufend += len;
1029 Copy(pv, bufptr, len, char);
1030 }
1031 }
1032}
1033
1034/*
9dcc53ea
Z
1035=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1036
1037Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1038immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1039reallocating the buffer if necessary. This means that lexing code that
1040runs later will see the characters as if they had appeared in the input.
1041It is not recommended to do this as part of normal parsing, and most
1042uses of this facility run the risk of the inserted characters being
1043interpreted in an unintended manner.
1044
1045The string to be inserted is represented by octets starting at I<pv>
1046and continuing to the first nul. These octets are interpreted as either
1047UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1048in I<flags>. The characters are recoded for the lexer buffer, according
1049to how the buffer is currently being interpreted (L</lex_bufutf8>).
1050If it is not convenient to nul-terminate a string to be inserted, the
1051L</lex_stuff_pvn> function is more appropriate.
1052
1053=cut
1054*/
1055
1056void
1057Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1058{
1059 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1060 lex_stuff_pvn(pv, strlen(pv), flags);
1061}
1062
1063/*
f0e67a1d
Z
1064=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1065
1066Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1067immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1068reallocating the buffer if necessary. This means that lexing code that
1069runs later will see the characters as if they had appeared in the input.
1070It is not recommended to do this as part of normal parsing, and most
1071uses of this facility run the risk of the inserted characters being
1072interpreted in an unintended manner.
1073
1074The string to be inserted is the string value of I<sv>. The characters
1075are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1076being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1077not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1078need to construct a scalar.
1079
1080=cut
1081*/
1082
1083void
1084Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1085{
1086 char *pv;
1087 STRLEN len;
1088 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1089 if (flags)
1090 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1091 pv = SvPV(sv, len);
1092 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1093}
1094
1095/*
1096=for apidoc Amx|void|lex_unstuff|char *ptr
1097
1098Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1099I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1100This hides the discarded text from any lexing code that runs later,
1101as if the text had never appeared.
1102
1103This is not the normal way to consume lexed text. For that, use
1104L</lex_read_to>.
1105
1106=cut
1107*/
1108
1109void
1110Perl_lex_unstuff(pTHX_ char *ptr)
1111{
1112 char *buf, *bufend;
1113 STRLEN unstuff_len;
1114 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1115 buf = PL_parser->bufptr;
1116 if (ptr < buf)
1117 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1118 if (ptr == buf)
1119 return;
1120 bufend = PL_parser->bufend;
1121 if (ptr > bufend)
1122 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1123 unstuff_len = ptr - buf;
1124 Move(ptr, buf, bufend+1-ptr, char);
1125 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1126 PL_parser->bufend = bufend - unstuff_len;
1127}
1128
1129/*
1130=for apidoc Amx|void|lex_read_to|char *ptr
1131
1132Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1133to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1134performing the correct bookkeeping whenever a newline character is passed.
1135This is the normal way to consume lexed text.
1136
1137Interpretation of the buffer's octets can be abstracted out by
1138using the slightly higher-level functions L</lex_peek_unichar> and
1139L</lex_read_unichar>.
1140
1141=cut
1142*/
1143
1144void
1145Perl_lex_read_to(pTHX_ char *ptr)
1146{
1147 char *s;
1148 PERL_ARGS_ASSERT_LEX_READ_TO;
1149 s = PL_parser->bufptr;
1150 if (ptr < s || ptr > PL_parser->bufend)
1151 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1152 for (; s != ptr; s++)
1153 if (*s == '\n') {
1154 CopLINE_inc(PL_curcop);
1155 PL_parser->linestart = s+1;
1156 }
1157 PL_parser->bufptr = ptr;
1158}
1159
1160/*
1161=for apidoc Amx|void|lex_discard_to|char *ptr
1162
1163Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1164up to I<ptr>. The remaining content of the buffer will be moved, and
1165all pointers into the buffer updated appropriately. I<ptr> must not
1166be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1167it is not permitted to discard text that has yet to be lexed.
1168
1169Normally it is not necessarily to do this directly, because it suffices to
1170use the implicit discarding behaviour of L</lex_next_chunk> and things
1171based on it. However, if a token stretches across multiple lines,
1f317c95 1172and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1173that purpose, then after completion of the token it would be wise to
1174explicitly discard the now-unneeded earlier lines, to avoid future
1175multi-line tokens growing the buffer without bound.
1176
1177=cut
1178*/
1179
1180void
1181Perl_lex_discard_to(pTHX_ char *ptr)
1182{
1183 char *buf;
1184 STRLEN discard_len;
1185 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1186 buf = SvPVX(PL_parser->linestr);
1187 if (ptr < buf)
1188 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1189 if (ptr == buf)
1190 return;
1191 if (ptr > PL_parser->bufptr)
1192 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1193 discard_len = ptr - buf;
1194 if (PL_parser->oldbufptr < ptr)
1195 PL_parser->oldbufptr = ptr;
1196 if (PL_parser->oldoldbufptr < ptr)
1197 PL_parser->oldoldbufptr = ptr;
1198 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1199 PL_parser->last_uni = NULL;
1200 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1201 PL_parser->last_lop = NULL;
1202 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1203 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1204 PL_parser->bufend -= discard_len;
1205 PL_parser->bufptr -= discard_len;
1206 PL_parser->oldbufptr -= discard_len;
1207 PL_parser->oldoldbufptr -= discard_len;
1208 if (PL_parser->last_uni)
1209 PL_parser->last_uni -= discard_len;
1210 if (PL_parser->last_lop)
1211 PL_parser->last_lop -= discard_len;
1212}
1213
1214/*
1215=for apidoc Amx|bool|lex_next_chunk|U32 flags
1216
1217Reads in the next chunk of text to be lexed, appending it to
1218L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1219looked to the end of the current chunk and wants to know more. It is
1220usual, but not necessary, for lexing to have consumed the entirety of
1221the current chunk at this time.
1222
1223If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1224chunk (i.e., the current chunk has been entirely consumed), normally the
1225current chunk will be discarded at the same time that the new chunk is
1226read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1227will not be discarded. If the current chunk has not been entirely
1228consumed, then it will not be discarded regardless of the flag.
1229
1230Returns true if some new text was added to the buffer, or false if the
1231buffer has reached the end of the input text.
1232
1233=cut
1234*/
1235
1236#define LEX_FAKE_EOF 0x80000000
1237
1238bool
1239Perl_lex_next_chunk(pTHX_ U32 flags)
1240{
1241 SV *linestr;
1242 char *buf;
1243 STRLEN old_bufend_pos, new_bufend_pos;
1244 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1245 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1246 bool got_some_for_debugger = 0;
f0e67a1d
Z
1247 bool got_some;
1248 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1249 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1250 linestr = PL_parser->linestr;
1251 buf = SvPVX(linestr);
1252 if (!(flags & LEX_KEEP_PREVIOUS) &&
1253 PL_parser->bufptr == PL_parser->bufend) {
1254 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1255 linestart_pos = 0;
1256 if (PL_parser->last_uni != PL_parser->bufend)
1257 PL_parser->last_uni = NULL;
1258 if (PL_parser->last_lop != PL_parser->bufend)
1259 PL_parser->last_lop = NULL;
1260 last_uni_pos = last_lop_pos = 0;
1261 *buf = 0;
1262 SvCUR(linestr) = 0;
1263 } else {
1264 old_bufend_pos = PL_parser->bufend - buf;
1265 bufptr_pos = PL_parser->bufptr - buf;
1266 oldbufptr_pos = PL_parser->oldbufptr - buf;
1267 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1268 linestart_pos = PL_parser->linestart - buf;
1269 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1270 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1271 }
1272 if (flags & LEX_FAKE_EOF) {
1273 goto eof;
60d63348 1274 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1275 got_some = 0;
1276 } else if (filter_gets(linestr, old_bufend_pos)) {
1277 got_some = 1;
17cc9359 1278 got_some_for_debugger = 1;
f0e67a1d 1279 } else {
580561a3
Z
1280 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1281 sv_setpvs(linestr, "");
f0e67a1d
Z
1282 eof:
1283 /* End of real input. Close filehandle (unless it was STDIN),
1284 * then add implicit termination.
1285 */
1286 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1287 PerlIO_clearerr(PL_parser->rsfp);
1288 else if (PL_parser->rsfp)
1289 (void)PerlIO_close(PL_parser->rsfp);
1290 PL_parser->rsfp = NULL;
60d63348 1291 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1292#ifdef PERL_MAD
1293 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1294 PL_faketokens = 1;
1295#endif
1296 if (!PL_in_eval && PL_minus_p) {
1297 sv_catpvs(linestr,
1298 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1299 PL_minus_n = PL_minus_p = 0;
1300 } else if (!PL_in_eval && PL_minus_n) {
1301 sv_catpvs(linestr, /*{*/";}");
1302 PL_minus_n = 0;
1303 } else
1304 sv_catpvs(linestr, ";");
1305 got_some = 1;
1306 }
1307 buf = SvPVX(linestr);
1308 new_bufend_pos = SvCUR(linestr);
1309 PL_parser->bufend = buf + new_bufend_pos;
1310 PL_parser->bufptr = buf + bufptr_pos;
1311 PL_parser->oldbufptr = buf + oldbufptr_pos;
1312 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1313 PL_parser->linestart = buf + linestart_pos;
1314 if (PL_parser->last_uni)
1315 PL_parser->last_uni = buf + last_uni_pos;
1316 if (PL_parser->last_lop)
1317 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1318 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1319 PL_curstash != PL_debstash) {
1320 /* debugger active and we're not compiling the debugger code,
1321 * so store the line into the debugger's array of lines
1322 */
1323 update_debugger_info(NULL, buf+old_bufend_pos,
1324 new_bufend_pos-old_bufend_pos);
1325 }
1326 return got_some;
1327}
1328
1329/*
1330=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1331
1332Looks ahead one (Unicode) character in the text currently being lexed.
1333Returns the codepoint (unsigned integer value) of the next character,
1334or -1 if lexing has reached the end of the input text. To consume the
1335peeked character, use L</lex_read_unichar>.
1336
1337If the next character is in (or extends into) the next chunk of input
1338text, the next chunk will be read in. Normally the current chunk will be
1339discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1340then the current chunk will not be discarded.
1341
1342If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1343is encountered, an exception is generated.
1344
1345=cut
1346*/
1347
1348I32
1349Perl_lex_peek_unichar(pTHX_ U32 flags)
1350{
749123ff 1351 dVAR;
f0e67a1d
Z
1352 char *s, *bufend;
1353 if (flags & ~(LEX_KEEP_PREVIOUS))
1354 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1355 s = PL_parser->bufptr;
1356 bufend = PL_parser->bufend;
1357 if (UTF) {
1358 U8 head;
1359 I32 unichar;
1360 STRLEN len, retlen;
1361 if (s == bufend) {
1362 if (!lex_next_chunk(flags))
1363 return -1;
1364 s = PL_parser->bufptr;
1365 bufend = PL_parser->bufend;
1366 }
1367 head = (U8)*s;
1368 if (!(head & 0x80))
1369 return head;
1370 if (head & 0x40) {
1371 len = PL_utf8skip[head];
1372 while ((STRLEN)(bufend-s) < len) {
1373 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1374 break;
1375 s = PL_parser->bufptr;
1376 bufend = PL_parser->bufend;
1377 }
1378 }
1379 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1380 if (retlen == (STRLEN)-1) {
1381 /* malformed UTF-8 */
1382 ENTER;
1383 SAVESPTR(PL_warnhook);
1384 PL_warnhook = PERL_WARNHOOK_FATAL;
1385 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1386 LEAVE;
1387 }
1388 return unichar;
1389 } else {
1390 if (s == bufend) {
1391 if (!lex_next_chunk(flags))
1392 return -1;
1393 s = PL_parser->bufptr;
1394 }
1395 return (U8)*s;
1396 }
1397}
1398
1399/*
1400=for apidoc Amx|I32|lex_read_unichar|U32 flags
1401
1402Reads the next (Unicode) character in the text currently being lexed.
1403Returns the codepoint (unsigned integer value) of the character read,
1404and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1405if lexing has reached the end of the input text. To non-destructively
1406examine the next character, use L</lex_peek_unichar> instead.
1407
1408If the next character is in (or extends into) the next chunk of input
1409text, the next chunk will be read in. Normally the current chunk will be
1410discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1411then the current chunk will not be discarded.
1412
1413If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1414is encountered, an exception is generated.
1415
1416=cut
1417*/
1418
1419I32
1420Perl_lex_read_unichar(pTHX_ U32 flags)
1421{
1422 I32 c;
1423 if (flags & ~(LEX_KEEP_PREVIOUS))
1424 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1425 c = lex_peek_unichar(flags);
1426 if (c != -1) {
1427 if (c == '\n')
1428 CopLINE_inc(PL_curcop);
d9018cbe
EB
1429 if (UTF)
1430 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1431 else
1432 ++(PL_parser->bufptr);
f0e67a1d
Z
1433 }
1434 return c;
1435}
1436
1437/*
1438=for apidoc Amx|void|lex_read_space|U32 flags
1439
1440Reads optional spaces, in Perl style, in the text currently being
1441lexed. The spaces may include ordinary whitespace characters and
1442Perl-style comments. C<#line> directives are processed if encountered.
1443L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1444at a non-space character (or the end of the input text).
1445
1446If spaces extend into the next chunk of input text, the next chunk will
1447be read in. Normally the current chunk will be discarded at the same
1448time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1449chunk will not be discarded.
1450
1451=cut
1452*/
1453
f0998909
Z
1454#define LEX_NO_NEXT_CHUNK 0x80000000
1455
f0e67a1d
Z
1456void
1457Perl_lex_read_space(pTHX_ U32 flags)
1458{
1459 char *s, *bufend;
1460 bool need_incline = 0;
f0998909 1461 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1462 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1463#ifdef PERL_MAD
1464 if (PL_skipwhite) {
1465 sv_free(PL_skipwhite);
1466 PL_skipwhite = NULL;
1467 }
1468 if (PL_madskills)
1469 PL_skipwhite = newSVpvs("");
1470#endif /* PERL_MAD */
1471 s = PL_parser->bufptr;
1472 bufend = PL_parser->bufend;
1473 while (1) {
1474 char c = *s;
1475 if (c == '#') {
1476 do {
1477 c = *++s;
1478 } while (!(c == '\n' || (c == 0 && s == bufend)));
1479 } else if (c == '\n') {
1480 s++;
1481 PL_parser->linestart = s;
1482 if (s == bufend)
1483 need_incline = 1;
1484 else
1485 incline(s);
1486 } else if (isSPACE(c)) {
1487 s++;
1488 } else if (c == 0 && s == bufend) {
1489 bool got_more;
1490#ifdef PERL_MAD
1491 if (PL_madskills)
1492 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1493#endif /* PERL_MAD */
f0998909
Z
1494 if (flags & LEX_NO_NEXT_CHUNK)
1495 break;
f0e67a1d
Z
1496 PL_parser->bufptr = s;
1497 CopLINE_inc(PL_curcop);
1498 got_more = lex_next_chunk(flags);
1499 CopLINE_dec(PL_curcop);
1500 s = PL_parser->bufptr;
1501 bufend = PL_parser->bufend;
1502 if (!got_more)
1503 break;
1504 if (need_incline && PL_parser->rsfp) {
1505 incline(s);
1506 need_incline = 0;
1507 }
1508 } else {
1509 break;
1510 }
1511 }
1512#ifdef PERL_MAD
1513 if (PL_madskills)
1514 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1515#endif /* PERL_MAD */
1516 PL_parser->bufptr = s;
1517}
1518
1519/*
ffb4593c
NT
1520 * S_incline
1521 * This subroutine has nothing to do with tilting, whether at windmills
1522 * or pinball tables. Its name is short for "increment line". It
57843af0 1523 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1524 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1525 * # line 500 "foo.pm"
1526 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1527 */
1528
76e3520e 1529STATIC void
d9095cec 1530S_incline(pTHX_ const char *s)
463ee0b2 1531{
97aff369 1532 dVAR;
d9095cec
NC
1533 const char *t;
1534 const char *n;
1535 const char *e;
8818d409 1536 line_t line_num;
463ee0b2 1537
7918f24d
NC
1538 PERL_ARGS_ASSERT_INCLINE;
1539
57843af0 1540 CopLINE_inc(PL_curcop);
463ee0b2
LW
1541 if (*s++ != '#')
1542 return;
d4c19fe8
AL
1543 while (SPACE_OR_TAB(*s))
1544 s++;
73659bf1
GS
1545 if (strnEQ(s, "line", 4))
1546 s += 4;
1547 else
1548 return;
084592ab 1549 if (SPACE_OR_TAB(*s))
73659bf1 1550 s++;
4e553d73 1551 else
73659bf1 1552 return;
d4c19fe8
AL
1553 while (SPACE_OR_TAB(*s))
1554 s++;
463ee0b2
LW
1555 if (!isDIGIT(*s))
1556 return;
d4c19fe8 1557
463ee0b2
LW
1558 n = s;
1559 while (isDIGIT(*s))
1560 s++;
07714eb4 1561 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1562 return;
bf4acbe4 1563 while (SPACE_OR_TAB(*s))
463ee0b2 1564 s++;
73659bf1 1565 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1566 s++;
73659bf1
GS
1567 e = t + 1;
1568 }
463ee0b2 1569 else {
c35e046a
AL
1570 t = s;
1571 while (!isSPACE(*t))
1572 t++;
73659bf1 1573 e = t;
463ee0b2 1574 }
bf4acbe4 1575 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1576 e++;
1577 if (*e != '\n' && *e != '\0')
1578 return; /* false alarm */
1579
8818d409
FC
1580 line_num = atoi(n)-1;
1581
f4dd75d9 1582 if (t - s > 0) {
d9095cec 1583 const STRLEN len = t - s;
19bad673
NC
1584 SV *const temp_sv = CopFILESV(PL_curcop);
1585 const char *cf;
1586 STRLEN tmplen;
1587
1588 if (temp_sv) {
1589 cf = SvPVX(temp_sv);
1590 tmplen = SvCUR(temp_sv);
1591 } else {
1592 cf = NULL;
1593 tmplen = 0;
1594 }
1595
d1299d44 1596 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1597 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1598 * to *{"::_<newfilename"} */
44867030
NC
1599 /* However, the long form of evals is only turned on by the
1600 debugger - usually they're "(eval %lu)" */
1601 char smallbuf[128];
1602 char *tmpbuf;
1603 GV **gvp;
d9095cec 1604 STRLEN tmplen2 = len;
798b63bc 1605 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1606 tmpbuf = smallbuf;
1607 else
2ae0db35 1608 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1609 tmpbuf[0] = '_';
1610 tmpbuf[1] = '<';
2ae0db35 1611 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1612 tmplen += 2;
8a5ee598
RGS
1613 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1614 if (gvp) {
44867030
NC
1615 char *tmpbuf2;
1616 GV *gv2;
1617
1618 if (tmplen2 + 2 <= sizeof smallbuf)
1619 tmpbuf2 = smallbuf;
1620 else
1621 Newx(tmpbuf2, tmplen2 + 2, char);
1622
1623 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1624 /* Either they malloc'd it, or we malloc'd it,
1625 so no prefix is present in ours. */
1626 tmpbuf2[0] = '_';
1627 tmpbuf2[1] = '<';
1628 }
1629
1630 memcpy(tmpbuf2 + 2, s, tmplen2);
1631 tmplen2 += 2;
1632
8a5ee598 1633 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1634 if (!isGV(gv2)) {
8a5ee598 1635 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1636 /* adjust ${"::_<newfilename"} to store the new file name */
1637 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1638 /* The line number may differ. If that is the case,
1639 alias the saved lines that are in the array.
1640 Otherwise alias the whole array. */
1641 if (CopLINE(PL_curcop) == line_num) {
1642 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1643 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1644 }
1645 else if (GvAV(*gvp)) {
1646 AV * const av = GvAV(*gvp);
1647 const I32 start = CopLINE(PL_curcop)+1;
1648 I32 items = AvFILLp(av) - start;
1649 if (items > 0) {
1650 AV * const av2 = GvAVn(gv2);
1651 SV **svp = AvARRAY(av) + start;
1652 I32 l = (I32)line_num+1;
1653 while (items--)
1654 av_store(av2, l++, SvREFCNT_inc(*svp++));
1655 }
1656 }
e5527e4b 1657 }
44867030
NC
1658
1659 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1660 }
e66cf94c 1661 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1662 }
05ec9bb3 1663 CopFILE_free(PL_curcop);
d9095cec 1664 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1665 }
8818d409 1666 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1667}
1668
29595ff2 1669#ifdef PERL_MAD
cd81e915 1670/* skip space before PL_thistoken */
29595ff2
NC
1671
1672STATIC char *
1673S_skipspace0(pTHX_ register char *s)
1674{
7918f24d
NC
1675 PERL_ARGS_ASSERT_SKIPSPACE0;
1676
29595ff2
NC
1677 s = skipspace(s);
1678 if (!PL_madskills)
1679 return s;
cd81e915
NC
1680 if (PL_skipwhite) {
1681 if (!PL_thiswhite)
6b29d1f5 1682 PL_thiswhite = newSVpvs("");
cd81e915
NC
1683 sv_catsv(PL_thiswhite, PL_skipwhite);
1684 sv_free(PL_skipwhite);
1685 PL_skipwhite = 0;
1686 }
1687 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1688 return s;
1689}
1690
cd81e915 1691/* skip space after PL_thistoken */
29595ff2
NC
1692
1693STATIC char *
1694S_skipspace1(pTHX_ register char *s)
1695{
d4c19fe8 1696 const char *start = s;
29595ff2
NC
1697 I32 startoff = start - SvPVX(PL_linestr);
1698
7918f24d
NC
1699 PERL_ARGS_ASSERT_SKIPSPACE1;
1700
29595ff2
NC
1701 s = skipspace(s);
1702 if (!PL_madskills)
1703 return s;
1704 start = SvPVX(PL_linestr) + startoff;
cd81e915 1705 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1706 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1707 PL_thistoken = newSVpvn(tstart, start - tstart);
1708 }
1709 PL_realtokenstart = -1;
1710 if (PL_skipwhite) {
1711 if (!PL_nextwhite)
6b29d1f5 1712 PL_nextwhite = newSVpvs("");
cd81e915
NC
1713 sv_catsv(PL_nextwhite, PL_skipwhite);
1714 sv_free(PL_skipwhite);
1715 PL_skipwhite = 0;
29595ff2
NC
1716 }
1717 return s;
1718}
1719
1720STATIC char *
1721S_skipspace2(pTHX_ register char *s, SV **svp)
1722{
c35e046a
AL
1723 char *start;
1724 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1725 const I32 startoff = s - SvPVX(PL_linestr);
1726
7918f24d
NC
1727 PERL_ARGS_ASSERT_SKIPSPACE2;
1728
29595ff2
NC
1729 s = skipspace(s);
1730 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1731 if (!PL_madskills || !svp)
1732 return s;
1733 start = SvPVX(PL_linestr) + startoff;
cd81e915 1734 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1735 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1736 PL_thistoken = newSVpvn(tstart, start - tstart);
1737 PL_realtokenstart = -1;
29595ff2 1738 }
cd81e915 1739 if (PL_skipwhite) {
29595ff2 1740 if (!*svp)
6b29d1f5 1741 *svp = newSVpvs("");
cd81e915
NC
1742 sv_setsv(*svp, PL_skipwhite);
1743 sv_free(PL_skipwhite);
1744 PL_skipwhite = 0;
29595ff2
NC
1745 }
1746
1747 return s;
1748}
1749#endif
1750
80a702cd 1751STATIC void
15f169a1 1752S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1753{
1754 AV *av = CopFILEAVx(PL_curcop);
1755 if (av) {
b9f83d2f 1756 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1757 if (orig_sv)
1758 sv_setsv(sv, orig_sv);
1759 else
1760 sv_setpvn(sv, buf, len);
80a702cd
RGS
1761 (void)SvIOK_on(sv);
1762 SvIV_set(sv, 0);
1763 av_store(av, (I32)CopLINE(PL_curcop), sv);
1764 }
1765}
1766
ffb4593c
NT
1767/*
1768 * S_skipspace
1769 * Called to gobble the appropriate amount and type of whitespace.
1770 * Skips comments as well.
1771 */
1772
76e3520e 1773STATIC char *
cea2e8a9 1774S_skipspace(pTHX_ register char *s)
a687059c 1775{
5db06880 1776#ifdef PERL_MAD
f0e67a1d
Z
1777 char *start = s;
1778#endif /* PERL_MAD */
7918f24d 1779 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1780#ifdef PERL_MAD
cd81e915
NC
1781 if (PL_skipwhite) {
1782 sv_free(PL_skipwhite);
f0e67a1d 1783 PL_skipwhite = NULL;
5db06880 1784 }
f0e67a1d 1785#endif /* PERL_MAD */
3280af22 1786 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1787 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1788 s++;
f0e67a1d
Z
1789 } else {
1790 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1791 PL_bufptr = s;
f0998909
Z
1792 lex_read_space(LEX_KEEP_PREVIOUS |
1793 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1794 LEX_NO_NEXT_CHUNK : 0));
3280af22 1795 s = PL_bufptr;
f0e67a1d
Z
1796 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1797 if (PL_linestart > PL_bufptr)
1798 PL_bufptr = PL_linestart;
1799 return s;
463ee0b2 1800 }
5db06880 1801#ifdef PERL_MAD
f0e67a1d
Z
1802 if (PL_madskills)
1803 PL_skipwhite = newSVpvn(start, s-start);
1804#endif /* PERL_MAD */
5db06880 1805 return s;
a687059c 1806}
378cc40b 1807
ffb4593c
NT
1808/*
1809 * S_check_uni
1810 * Check the unary operators to ensure there's no ambiguity in how they're
1811 * used. An ambiguous piece of code would be:
1812 * rand + 5
1813 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1814 * the +5 is its argument.
1815 */
1816
76e3520e 1817STATIC void
cea2e8a9 1818S_check_uni(pTHX)
ba106d47 1819{
97aff369 1820 dVAR;
d4c19fe8
AL
1821 const char *s;
1822 const char *t;
2f3197b3 1823
3280af22 1824 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1825 return;
3280af22
NIS
1826 while (isSPACE(*PL_last_uni))
1827 PL_last_uni++;
c35e046a
AL
1828 s = PL_last_uni;
1829 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1830 s++;
3280af22 1831 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1832 return;
6136c704 1833
9b387841
NC
1834 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1835 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1836 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1837}
1838
ffb4593c
NT
1839/*
1840 * LOP : macro to build a list operator. Its behaviour has been replaced
1841 * with a subroutine, S_lop() for which LOP is just another name.
1842 */
1843
a0d0e21e
LW
1844#define LOP(f,x) return lop(f,x,s)
1845
ffb4593c
NT
1846/*
1847 * S_lop
1848 * Build a list operator (or something that might be one). The rules:
1849 * - if we have a next token, then it's a list operator [why?]
1850 * - if the next thing is an opening paren, then it's a function
1851 * - else it's a list operator
1852 */
1853
76e3520e 1854STATIC I32
a0be28da 1855S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1856{
97aff369 1857 dVAR;
7918f24d
NC
1858
1859 PERL_ARGS_ASSERT_LOP;
1860
6154021b 1861 pl_yylval.ival = f;
35c8bce7 1862 CLINE;
3280af22
NIS
1863 PL_expect = x;
1864 PL_bufptr = s;
1865 PL_last_lop = PL_oldbufptr;
eb160463 1866 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1867#ifdef PERL_MAD
1868 if (PL_lasttoke)
78cdf107 1869 goto lstop;
5db06880 1870#else
3280af22 1871 if (PL_nexttoke)
78cdf107 1872 goto lstop;
5db06880 1873#endif
79072805 1874 if (*s == '(')
bbf60fe6 1875 return REPORT(FUNC);
29595ff2 1876 s = PEEKSPACE(s);
79072805 1877 if (*s == '(')
bbf60fe6 1878 return REPORT(FUNC);
78cdf107
Z
1879 else {
1880 lstop:
1881 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1882 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1883 return REPORT(LSTOP);
78cdf107 1884 }
79072805
LW
1885}
1886
5db06880
NC
1887#ifdef PERL_MAD
1888 /*
1889 * S_start_force
1890 * Sets up for an eventual force_next(). start_force(0) basically does
1891 * an unshift, while start_force(-1) does a push. yylex removes items
1892 * on the "pop" end.
1893 */
1894
1895STATIC void
1896S_start_force(pTHX_ int where)
1897{
1898 int i;
1899
cd81e915 1900 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1901 where = PL_lasttoke;
cd81e915
NC
1902 assert(PL_curforce < 0 || PL_curforce == where);
1903 if (PL_curforce != where) {
5db06880
NC
1904 for (i = PL_lasttoke; i > where; --i) {
1905 PL_nexttoke[i] = PL_nexttoke[i-1];
1906 }
1907 PL_lasttoke++;
1908 }
cd81e915 1909 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1910 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1911 PL_curforce = where;
1912 if (PL_nextwhite) {
5db06880 1913 if (PL_madskills)
6b29d1f5 1914 curmad('^', newSVpvs(""));
cd81e915 1915 CURMAD('_', PL_nextwhite);
5db06880
NC
1916 }
1917}
1918
1919STATIC void
1920S_curmad(pTHX_ char slot, SV *sv)
1921{
1922 MADPROP **where;
1923
1924 if (!sv)
1925 return;
cd81e915
NC
1926 if (PL_curforce < 0)
1927 where = &PL_thismad;
5db06880 1928 else
cd81e915 1929 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1930
cd81e915 1931 if (PL_faketokens)
76f68e9b 1932 sv_setpvs(sv, "");
5db06880
NC
1933 else {
1934 if (!IN_BYTES) {
1935 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1936 SvUTF8_on(sv);
1937 else if (PL_encoding) {
1938 sv_recode_to_utf8(sv, PL_encoding);
1939 }
1940 }
1941 }
1942
1943 /* keep a slot open for the head of the list? */
1944 if (slot != '_' && *where && (*where)->mad_key == '^') {
1945 (*where)->mad_key = slot;
daba3364 1946 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1947 (*where)->mad_val = (void*)sv;
1948 }
1949 else
1950 addmad(newMADsv(slot, sv), where, 0);
1951}
1952#else
b3f24c00
MHM
1953# define start_force(where) NOOP
1954# define curmad(slot, sv) NOOP
5db06880
NC
1955#endif
1956
ffb4593c
NT
1957/*
1958 * S_force_next
9cbb5ea2 1959 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1960 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1961 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1962 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1963 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1964 */
1965
4e553d73 1966STATIC void
cea2e8a9 1967S_force_next(pTHX_ I32 type)
79072805 1968{
97aff369 1969 dVAR;
704d4215
GG
1970#ifdef DEBUGGING
1971 if (DEBUG_T_TEST) {
1972 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1973 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1974 }
1975#endif
5db06880 1976#ifdef PERL_MAD
cd81e915 1977 if (PL_curforce < 0)
5db06880 1978 start_force(PL_lasttoke);
cd81e915 1979 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1980 if (PL_lex_state != LEX_KNOWNEXT)
1981 PL_lex_defer = PL_lex_state;
1982 PL_lex_state = LEX_KNOWNEXT;
1983 PL_lex_expect = PL_expect;
cd81e915 1984 PL_curforce = -1;
5db06880 1985#else
3280af22
NIS
1986 PL_nexttype[PL_nexttoke] = type;
1987 PL_nexttoke++;
1988 if (PL_lex_state != LEX_KNOWNEXT) {
1989 PL_lex_defer = PL_lex_state;
1990 PL_lex_expect = PL_expect;
1991 PL_lex_state = LEX_KNOWNEXT;
79072805 1992 }
5db06880 1993#endif
79072805
LW
1994}
1995
28ac2b49
Z
1996void
1997Perl_yyunlex(pTHX)
1998{
a7aaec61
Z
1999 int yyc = PL_parser->yychar;
2000 if (yyc != YYEMPTY) {
2001 if (yyc) {
2002 start_force(-1);
2003 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2004 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2005 PL_lex_allbrackets--;
a7aaec61 2006 PL_lex_brackets--;
78cdf107
Z
2007 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2008 } else if (yyc == '('/*)*/) {
2009 PL_lex_allbrackets--;
2010 yyc |= (2<<24);
a7aaec61
Z
2011 }
2012 force_next(yyc);
2013 }
28ac2b49
Z
2014 PL_parser->yychar = YYEMPTY;
2015 }
2016}
2017
d0a148a6 2018STATIC SV *
15f169a1 2019S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2020{
97aff369 2021 dVAR;
740cce10 2022 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2023 !IN_BYTES
2024 && UTF
2025 && !is_ascii_string((const U8*)start, len)
740cce10 2026 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2027 return sv;
2028}
2029
ffb4593c
NT
2030/*
2031 * S_force_word
2032 * When the lexer knows the next thing is a word (for instance, it has
2033 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2034 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2035 * lookahead.
ffb4593c
NT
2036 *
2037 * Arguments:
b1b65b59 2038 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2039 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2040 * int check_keyword : if true, Perl checks to make sure the word isn't
2041 * a keyword (do this if the word is a label, e.g. goto FOO)
2042 * int allow_pack : if true, : characters will also be allowed (require,
2043 * use, etc. do this)
9cbb5ea2 2044 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2045 */
2046
76e3520e 2047STATIC char *
cea2e8a9 2048S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2049{
97aff369 2050 dVAR;
463ee0b2
LW
2051 register char *s;
2052 STRLEN len;
4e553d73 2053
7918f24d
NC
2054 PERL_ARGS_ASSERT_FORCE_WORD;
2055
29595ff2 2056 start = SKIPSPACE1(start);
463ee0b2 2057 s = start;
7e2040f0 2058 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2059 (allow_pack && *s == ':') ||
15f0808c 2060 (allow_initial_tick && *s == '\'') )
a0d0e21e 2061 {
3280af22 2062 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2063 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2064 return start;
cd81e915 2065 start_force(PL_curforce);
5db06880
NC
2066 if (PL_madskills)
2067 curmad('X', newSVpvn(start,s-start));
463ee0b2 2068 if (token == METHOD) {
29595ff2 2069 s = SKIPSPACE1(s);
463ee0b2 2070 if (*s == '(')
3280af22 2071 PL_expect = XTERM;
463ee0b2 2072 else {
3280af22 2073 PL_expect = XOPERATOR;
463ee0b2 2074 }
79072805 2075 }
e74e6b3d 2076 if (PL_madskills)
63575281 2077 curmad('g', newSVpvs( "forced" ));
9ded7720 2078 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2079 = (OP*)newSVOP(OP_CONST,0,
2080 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2081 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2082 force_next(token);
2083 }
2084 return s;
2085}
2086
ffb4593c
NT
2087/*
2088 * S_force_ident
9cbb5ea2 2089 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2090 * text only contains the "foo" portion. The first argument is a pointer
2091 * to the "foo", and the second argument is the type symbol to prefix.
2092 * Forces the next token to be a "WORD".
9cbb5ea2 2093 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2094 */
2095
76e3520e 2096STATIC void
bfed75c6 2097S_force_ident(pTHX_ register const char *s, int kind)
79072805 2098{
97aff369 2099 dVAR;
7918f24d
NC
2100
2101 PERL_ARGS_ASSERT_FORCE_IDENT;
2102
c35e046a 2103 if (*s) {
90e5519e 2104 const STRLEN len = strlen(s);
728847b1
BF
2105 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2106 UTF ? SVf_UTF8 : 0));
cd81e915 2107 start_force(PL_curforce);
9ded7720 2108 NEXTVAL_NEXTTOKE.opval = o;
79072805 2109 force_next(WORD);
748a9306 2110 if (kind) {
11343788 2111 o->op_private = OPpCONST_ENTERED;
55497cff 2112 /* XXX see note in pp_entereval() for why we forgo typo
2113 warnings if the symbol must be introduced in an eval.
2114 GSAR 96-10-12 */
90e5519e 2115 gv_fetchpvn_flags(s, len,
728847b1
BF
2116 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2117 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2118 kind == '$' ? SVt_PV :
2119 kind == '@' ? SVt_PVAV :
2120 kind == '%' ? SVt_PVHV :
a0d0e21e 2121 SVt_PVGV
90e5519e 2122 );
748a9306 2123 }
79072805
LW
2124 }
2125}
2126
1571675a
GS
2127NV
2128Perl_str_to_version(pTHX_ SV *sv)
2129{
2130 NV retval = 0.0;
2131 NV nshift = 1.0;
2132 STRLEN len;
cfd0369c 2133 const char *start = SvPV_const(sv,len);
9d4ba2ae 2134 const char * const end = start + len;
504618e9 2135 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2136
2137 PERL_ARGS_ASSERT_STR_TO_VERSION;
2138
1571675a 2139 while (start < end) {
ba210ebe 2140 STRLEN skip;
1571675a
GS
2141 UV n;
2142 if (utf)
9041c2e3 2143 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2144 else {
2145 n = *(U8*)start;
2146 skip = 1;
2147 }
2148 retval += ((NV)n)/nshift;
2149 start += skip;
2150 nshift *= 1000;
2151 }
2152 return retval;
2153}
2154
4e553d73 2155/*
ffb4593c
NT
2156 * S_force_version
2157 * Forces the next token to be a version number.
e759cc13
RGS
2158 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2159 * and if "guessing" is TRUE, then no new token is created (and the caller
2160 * must use an alternative parsing method).
ffb4593c
NT
2161 */
2162
76e3520e 2163STATIC char *
e759cc13 2164S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2165{
97aff369 2166 dVAR;
5f66b61c 2167 OP *version = NULL;
44dcb63b 2168 char *d;
5db06880
NC
2169#ifdef PERL_MAD
2170 I32 startoff = s - SvPVX(PL_linestr);
2171#endif
89bfa8cd 2172
7918f24d
NC
2173 PERL_ARGS_ASSERT_FORCE_VERSION;
2174
29595ff2 2175 s = SKIPSPACE1(s);
89bfa8cd 2176
44dcb63b 2177 d = s;
dd629d5b 2178 if (*d == 'v')
44dcb63b 2179 d++;
44dcb63b 2180 if (isDIGIT(*d)) {
e759cc13
RGS
2181 while (isDIGIT(*d) || *d == '_' || *d == '.')
2182 d++;
5db06880
NC
2183#ifdef PERL_MAD
2184 if (PL_madskills) {
cd81e915 2185 start_force(PL_curforce);
5db06880
NC
2186 curmad('X', newSVpvn(s,d-s));
2187 }
2188#endif
4e4da3ac 2189 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2190 SV *ver;
8d08d9ba 2191#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2192 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2193 setlocale(LC_NUMERIC, "C");
8d08d9ba 2194#endif
6154021b 2195 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2196#ifdef USE_LOCALE_NUMERIC
2197 setlocale(LC_NUMERIC, loc);
909d3787 2198 Safefree(loc);
8d08d9ba 2199#endif
6154021b 2200 version = pl_yylval.opval;
dd629d5b
GS
2201 ver = cSVOPx(version)->op_sv;
2202 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2203 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2204 SvNV_set(ver, str_to_version(ver));
1571675a 2205 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2206 }
89bfa8cd 2207 }
5db06880
NC
2208 else if (guessing) {
2209#ifdef PERL_MAD
2210 if (PL_madskills) {
cd81e915
NC
2211 sv_free(PL_nextwhite); /* let next token collect whitespace */
2212 PL_nextwhite = 0;
5db06880
NC
2213 s = SvPVX(PL_linestr) + startoff;
2214 }
2215#endif
e759cc13 2216 return s;
5db06880 2217 }
89bfa8cd 2218 }
2219
5db06880
NC
2220#ifdef PERL_MAD
2221 if (PL_madskills && !version) {
cd81e915
NC
2222 sv_free(PL_nextwhite); /* let next token collect whitespace */
2223 PL_nextwhite = 0;
5db06880
NC
2224 s = SvPVX(PL_linestr) + startoff;
2225 }
2226#endif
89bfa8cd 2227 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2228 start_force(PL_curforce);
9ded7720 2229 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2230 force_next(WORD);
89bfa8cd 2231
e759cc13 2232 return s;
89bfa8cd 2233}
2234
ffb4593c 2235/*
91152fc1
DG
2236 * S_force_strict_version
2237 * Forces the next token to be a version number using strict syntax rules.
2238 */
2239
2240STATIC char *
2241S_force_strict_version(pTHX_ char *s)
2242{
2243 dVAR;
2244 OP *version = NULL;
2245#ifdef PERL_MAD
2246 I32 startoff = s - SvPVX(PL_linestr);
2247#endif
2248 const char *errstr = NULL;
2249
2250 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2251
2252 while (isSPACE(*s)) /* leading whitespace */
2253 s++;
2254
2255 if (is_STRICT_VERSION(s,&errstr)) {
2256 SV *ver = newSV(0);
2257 s = (char *)scan_version(s, ver, 0);
2258 version = newSVOP(OP_CONST, 0, ver);
2259 }
4e4da3ac
Z
2260 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2261 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2262 {
91152fc1
DG
2263 PL_bufptr = s;
2264 if (errstr)
2265 yyerror(errstr); /* version required */
2266 return s;
2267 }
2268
2269#ifdef PERL_MAD
2270 if (PL_madskills && !version) {
2271 sv_free(PL_nextwhite); /* let next token collect whitespace */
2272 PL_nextwhite = 0;
2273 s = SvPVX(PL_linestr) + startoff;
2274 }
2275#endif
2276 /* NOTE: The parser sees the package name and the VERSION swapped */
2277 start_force(PL_curforce);
2278 NEXTVAL_NEXTTOKE.opval = version;
2279 force_next(WORD);
2280
2281 return s;
2282}
2283
2284/*
ffb4593c
NT
2285 * S_tokeq
2286 * Tokenize a quoted string passed in as an SV. It finds the next
2287 * chunk, up to end of string or a backslash. It may make a new
2288 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2289 * turns \\ into \.
2290 */
2291
76e3520e 2292STATIC SV *
cea2e8a9 2293S_tokeq(pTHX_ SV *sv)
79072805 2294{
97aff369 2295 dVAR;
79072805
LW
2296 register char *s;
2297 register char *send;
2298 register char *d;
b3ac6de7
IZ
2299 STRLEN len = 0;
2300 SV *pv = sv;
79072805 2301
7918f24d
NC
2302 PERL_ARGS_ASSERT_TOKEQ;
2303
79072805 2304 if (!SvLEN(sv))
b3ac6de7 2305 goto finish;
79072805 2306
a0d0e21e 2307 s = SvPV_force(sv, len);
21a311ee 2308 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2309 goto finish;
463ee0b2 2310 send = s + len;
dcb21ed6
NC
2311 /* This is relying on the SV being "well formed" with a trailing '\0' */
2312 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2313 s++;
2314 if (s == send)
b3ac6de7 2315 goto finish;
79072805 2316 d = s;
be4731d2 2317 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2318 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2319 }
79072805
LW
2320 while (s < send) {
2321 if (*s == '\\') {
a0d0e21e 2322 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2323 s++; /* all that, just for this */
2324 }
2325 *d++ = *s++;
2326 }
2327 *d = '\0';
95a20fc0 2328 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2329 finish:
3280af22 2330 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2331 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2332 return sv;
2333}
2334
ffb4593c
NT
2335/*
2336 * Now come three functions related to double-quote context,
2337 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2338 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2339 * interact with PL_lex_state, and create fake ( ... ) argument lists
2340 * to handle functions and concatenation.
2341 * They assume that whoever calls them will be setting up a fake
2342 * join call, because each subthing puts a ',' after it. This lets
2343 * "lower \luPpEr"
2344 * become
2345 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2346 *
2347 * (I'm not sure whether the spurious commas at the end of lcfirst's
2348 * arguments and join's arguments are created or not).
2349 */
2350
2351/*
2352 * S_sublex_start
6154021b 2353 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2354 *
2355 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2356 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2357 *
2358 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2359 *
2360 * Everything else becomes a FUNC.
2361 *
2362 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2363 * had an OP_CONST or OP_READLINE). This just sets us up for a
2364 * call to S_sublex_push().
2365 */
2366
76e3520e 2367STATIC I32
cea2e8a9 2368S_sublex_start(pTHX)
79072805 2369{
97aff369 2370 dVAR;
6154021b 2371 register const I32 op_type = pl_yylval.ival;
79072805
LW
2372
2373 if (op_type == OP_NULL) {
6154021b 2374 pl_yylval.opval = PL_lex_op;
5f66b61c 2375 PL_lex_op = NULL;
79072805
LW
2376 return THING;
2377 }
2378 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2379 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2380
2381 if (SvTYPE(sv) == SVt_PVIV) {
2382 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2383 STRLEN len;
96a5add6 2384 const char * const p = SvPV_const(sv, len);
740cce10 2385 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2386 SvREFCNT_dec(sv);
2387 sv = nsv;
4e553d73 2388 }
6154021b 2389 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2390 PL_lex_stuff = NULL;
6f33ba73
RGS
2391 /* Allow <FH> // "foo" */
2392 if (op_type == OP_READLINE)
2393 PL_expect = XTERMORDORDOR;
79072805
LW
2394 return THING;
2395 }
e3f73d4e
RGS
2396 else if (op_type == OP_BACKTICK && PL_lex_op) {
2397 /* readpipe() vas overriden */
2398 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2399 pl_yylval.opval = PL_lex_op;
9b201d7d 2400 PL_lex_op = NULL;
e3f73d4e
RGS
2401 PL_lex_stuff = NULL;
2402 return THING;
2403 }
79072805 2404
3280af22 2405 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2406 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2407 PL_sublex_info.sub_op = PL_lex_op;
2408 PL_lex_state = LEX_INTERPPUSH;
55497cff 2409
3280af22
NIS
2410 PL_expect = XTERM;
2411 if (PL_lex_op) {
6154021b 2412 pl_yylval.opval = PL_lex_op;
5f66b61c 2413 PL_lex_op = NULL;
55497cff 2414 return PMFUNC;
2415 }
2416 else
2417 return FUNC;
2418}
2419
ffb4593c
NT
2420/*
2421 * S_sublex_push
2422 * Create a new scope to save the lexing state. The scope will be
2423 * ended in S_sublex_done. Returns a '(', starting the function arguments
2424 * to the uc, lc, etc. found before.
2425 * Sets PL_lex_state to LEX_INTERPCONCAT.
2426 */
2427
76e3520e 2428STATIC I32
cea2e8a9 2429S_sublex_push(pTHX)
55497cff 2430{
27da23d5 2431 dVAR;
f46d017c 2432 ENTER;
55497cff 2433
3280af22 2434 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2435 SAVEBOOL(PL_lex_dojoin);
3280af22 2436 SAVEI32(PL_lex_brackets);
78cdf107
Z
2437 SAVEI32(PL_lex_allbrackets);
2438 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2439 SAVEI32(PL_lex_casemods);
2440 SAVEI32(PL_lex_starts);
651b5b28 2441 SAVEI8(PL_lex_state);
7766f137 2442 SAVEVPTR(PL_lex_inpat);
98246f1e 2443 SAVEI16(PL_lex_inwhat);
57843af0 2444 SAVECOPLINE(PL_curcop);
3280af22 2445 SAVEPPTR(PL_bufptr);
8452ff4b 2446 SAVEPPTR(PL_bufend);
3280af22
NIS
2447 SAVEPPTR(PL_oldbufptr);
2448 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2449 SAVEPPTR(PL_last_lop);
2450 SAVEPPTR(PL_last_uni);
3280af22
NIS
2451 SAVEPPTR(PL_linestart);
2452 SAVESPTR(PL_linestr);
8edd5f42
RGS
2453 SAVEGENERICPV(PL_lex_brackstack);
2454 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2455
2456 PL_linestr = PL_lex_stuff;
a0714e2c 2457 PL_lex_stuff = NULL;
3280af22 2458
9cbb5ea2
GS
2459 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2460 = SvPVX(PL_linestr);
3280af22 2461 PL_bufend += SvCUR(PL_linestr);
bd61b366 2462 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2463 SAVEFREESV(PL_linestr);
2464
2465 PL_lex_dojoin = FALSE;
2466 PL_lex_brackets = 0;
78cdf107
Z
2467 PL_lex_allbrackets = 0;
2468 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2469 Newx(PL_lex_brackstack, 120, char);
2470 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2471 PL_lex_casemods = 0;
2472 *PL_lex_casestack = '\0';
2473 PL_lex_starts = 0;
2474 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2475 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2476
2477 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2478 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2479 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2480 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2481 else
5f66b61c 2482 PL_lex_inpat = NULL;
79072805 2483
55497cff 2484 return '(';
79072805
LW
2485}
2486
ffb4593c
NT
2487/*
2488 * S_sublex_done
2489 * Restores lexer state after a S_sublex_push.
2490 */
2491
76e3520e 2492STATIC I32
cea2e8a9 2493S_sublex_done(pTHX)
79072805 2494{
27da23d5 2495 dVAR;
3280af22 2496 if (!PL_lex_starts++) {
396482e1 2497 SV * const sv = newSVpvs("");
9aa983d2
JH
2498 if (SvUTF8(PL_linestr))
2499 SvUTF8_on(sv);
3280af22 2500 PL_expect = XOPERATOR;
6154021b 2501 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2502 return THING;
2503 }
2504
3280af22
NIS
2505 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2506 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2507 return yylex();
79072805
LW
2508 }
2509
ffb4593c 2510 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2511 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2512 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2513 PL_linestr = PL_lex_repl;
2514 PL_lex_inpat = 0;
2515 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2516 PL_bufend += SvCUR(PL_linestr);
bd61b366 2517 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2518 SAVEFREESV(PL_linestr);
2519 PL_lex_dojoin = FALSE;
2520 PL_lex_brackets = 0;
78cdf107
Z
2521 PL_lex_allbrackets = 0;
2522 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2523 PL_lex_casemods = 0;
2524 *PL_lex_casestack = '\0';
2525 PL_lex_starts = 0;
25da4f38 2526 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2527 PL_lex_state = LEX_INTERPNORMAL;
2528 PL_lex_starts++;
e9fa98b2
HS
2529 /* we don't clear PL_lex_repl here, so that we can check later
2530 whether this is an evalled subst; that means we rely on the
2531 logic to ensure sublex_done() is called again only via the
2532 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2533 }
e9fa98b2 2534 else {
3280af22 2535 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2536 PL_lex_repl = NULL;
e9fa98b2 2537 }
79072805 2538 return ',';
ffed7fef
LW
2539 }
2540 else {
5db06880
NC
2541#ifdef PERL_MAD
2542 if (PL_madskills) {
cd81e915
NC
2543 if (PL_thiswhite) {
2544 if (!PL_endwhite)
6b29d1f5 2545 PL_endwhite = newSVpvs("");
cd81e915
NC
2546 sv_catsv(PL_endwhite, PL_thiswhite);
2547 PL_thiswhite = 0;
2548 }
2549 if (PL_thistoken)
76f68e9b 2550 sv_setpvs(PL_thistoken,"");
5db06880 2551 else
cd81e915 2552 PL_realtokenstart = -1;
5db06880
NC
2553 }
2554#endif
f46d017c 2555 LEAVE;
3280af22
NIS
2556 PL_bufend = SvPVX(PL_linestr);
2557 PL_bufend += SvCUR(PL_linestr);
2558 PL_expect = XOPERATOR;
09bef843 2559 PL_sublex_info.sub_inwhat = 0;
79072805 2560 return ')';
ffed7fef
LW
2561 }
2562}
2563
02aa26ce
NT
2564/*
2565 scan_const
2566
2567 Extracts a pattern, double-quoted string, or transliteration. This
2568 is terrifying code.
2569
94def140 2570 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2571 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2572 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2573
94def140
TS
2574 Returns a pointer to the character scanned up to. If this is
2575 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2576 successfully parsed), will leave an OP for the substring scanned
6154021b 2577 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2578 by looking at the next characters herself.
2579
02aa26ce
NT
2580 In patterns:
2581 backslashes:
ff3f963a 2582 constants: \N{NAME} only
02aa26ce
NT
2583 case and quoting: \U \Q \E
2584 stops on @ and $, but not for $ as tail anchor
2585
2586 In transliterations:
2587 characters are VERY literal, except for - not at the start or end
94def140
TS
2588 of the string, which indicates a range. If the range is in bytes,
2589 scan_const expands the range to the full set of intermediate
2590 characters. If the range is in utf8, the hyphen is replaced with
2591 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2592
2593 In double-quoted strings:
2594 backslashes:
2595 double-quoted style: \r and \n
ff3f963a 2596 constants: \x31, etc.
94def140 2597 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2598 case and quoting: \U \Q \E
2599 stops on @ and $
2600
2601 scan_const does *not* construct ops to handle interpolated strings.
2602 It stops processing as soon as it finds an embedded $ or @ variable
2603 and leaves it to the caller to work out what's going on.
2604
94def140
TS
2605 embedded arrays (whether in pattern or not) could be:
2606 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2607
2608 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2609
2610 $ in pattern could be $foo or could be tail anchor. Assumption:
2611 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2612 followed by one of "()| \r\n\t"
02aa26ce
NT
2613
2614 \1 (backreferences) are turned into $1
2615
2616 The structure of the code is
2617 while (there's a character to process) {
94def140
TS
2618 handle transliteration ranges
2619 skip regexp comments /(?#comment)/ and codes /(?{code})/
2620 skip #-initiated comments in //x patterns
2621 check for embedded arrays
02aa26ce
NT
2622 check for embedded scalars
2623 if (backslash) {
94def140 2624 deprecate \1 in substitution replacements
02aa26ce
NT
2625 handle string-changing backslashes \l \U \Q \E, etc.
2626 switch (what was escaped) {
94def140 2627 handle \- in a transliteration (becomes a literal -)
ff3f963a 2628 if a pattern and not \N{, go treat as regular character
94def140
TS
2629 handle \132 (octal characters)
2630 handle \x15 and \x{1234} (hex characters)
ff3f963a 2631 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2632 handle \cV (control characters)
2633 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2634 } (end switch)
77a135fe 2635 continue
02aa26ce 2636 } (end if backslash)
77a135fe 2637 handle regular character
02aa26ce 2638 } (end while character to read)
4e553d73 2639
02aa26ce
NT
2640*/
2641
76e3520e 2642STATIC char *
cea2e8a9 2643S_scan_const(pTHX_ char *start)
79072805 2644{
97aff369 2645 dVAR;
3280af22 2646 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2647 SV *sv = newSV(send - start); /* sv for the constant. See
2648 note below on sizing. */
02aa26ce
NT
2649 register char *s = start; /* start of the constant */
2650 register char *d = SvPVX(sv); /* destination for copies */
2651 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2652 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2653 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2654 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2655 to be UTF8? But, this can
2656 show as true when the source
2657 isn't utf8, as for example
2658 when it is entirely composed
2659 of hex constants */
2660
2661 /* Note on sizing: The scanned constant is placed into sv, which is
2662 * initialized by newSV() assuming one byte of output for every byte of
2663 * input. This routine expects newSV() to allocate an extra byte for a
2664 * trailing NUL, which this routine will append if it gets to the end of
2665 * the input. There may be more bytes of input than output (eg., \N{LATIN
2666 * CAPITAL LETTER A}), or more output than input if the constant ends up
2667 * recoded to utf8, but each time a construct is found that might increase
2668 * the needed size, SvGROW() is called. Its size parameter each time is
2669 * based on the best guess estimate at the time, namely the length used so
2670 * far, plus the length the current construct will occupy, plus room for
2671 * the trailing NUL, plus one byte for every input byte still unscanned */
2672
012bcf8d 2673 UV uv;
4c3a8340
TS
2674#ifdef EBCDIC
2675 UV literal_endpoint = 0;
e294cc5d 2676 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2677#endif
012bcf8d 2678
7918f24d
NC
2679 PERL_ARGS_ASSERT_SCAN_CONST;
2680
bb16bae8 2681 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2682 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2683 /* If we are doing a trans and we know we want UTF8 set expectation */
2684 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2685 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2686 }
2687
2688
79072805 2689 while (s < send || dorange) {
ff3f963a 2690
02aa26ce 2691 /* get transliterations out of the way (they're most literal) */
3280af22 2692 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2693 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2694 if (dorange) {
1ba5c669
JH
2695 I32 i; /* current expanded character */
2696 I32 min; /* first character in range */
2697 I32 max; /* last character in range */
02aa26ce 2698
e294cc5d
JH
2699#ifdef EBCDIC
2700 UV uvmax = 0;
2701#endif
2702
2703 if (has_utf8
2704#ifdef EBCDIC
2705 && !native_range
2706#endif
2707 ) {
9d4ba2ae 2708 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2709 char *e = d++;
2710 while (e-- > c)
2711 *(e + 1) = *e;
25716404 2712 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2713 /* mark the range as done, and continue */
2714 dorange = FALSE;
2715 didrange = TRUE;
2716 continue;
2717 }
2b9d42f0 2718
95a20fc0 2719 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2720#ifdef EBCDIC
2721 SvGROW(sv,
2722 SvLEN(sv) + (has_utf8 ?
2723 (512 - UTF_CONTINUATION_MARK +
2724 UNISKIP(0x100))
2725 : 256));
2726 /* How many two-byte within 0..255: 128 in UTF-8,
2727 * 96 in UTF-8-mod. */
2728#else
9cbb5ea2 2729 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2730#endif
9cbb5ea2 2731 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2732#ifdef EBCDIC
2733 if (has_utf8) {
2734 int j;
2735 for (j = 0; j <= 1; j++) {
2736 char * const c = (char*)utf8_hop((U8*)d, -1);
2737 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2738 if (j)
2739 min = (U8)uv;
2740 else if (uv < 256)
2741 max = (U8)uv;
2742 else {
2743 max = (U8)0xff; /* only to \xff */
2744 uvmax = uv; /* \x{100} to uvmax */
2745 }
2746 d = c; /* eat endpoint chars */
2747 }
2748 }
2749 else {
2750#endif
2751 d -= 2; /* eat the first char and the - */
2752 min = (U8)*d; /* first char in range */
2753 max = (U8)d[1]; /* last char in range */
2754#ifdef EBCDIC
2755 }
2756#endif
8ada0baa 2757
c2e66d9e 2758 if (min > max) {
01ec43d0 2759 Perl_croak(aTHX_
d1573ac7 2760 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2761 (char)min, (char)max);
c2e66d9e
GS
2762 }
2763
c7f1f016 2764#ifdef EBCDIC
4c3a8340
TS
2765 if (literal_endpoint == 2 &&
2766 ((isLOWER(min) && isLOWER(max)) ||
2767 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2768 if (isLOWER(min)) {
2769 for (i = min; i <= max; i++)
2770 if (isLOWER(i))
db42d148 2771 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2772 } else {
2773 for (i = min; i <= max; i++)
2774 if (isUPPER(i))
db42d148 2775 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2776 }
2777 }
2778 else
2779#endif
2780 for (i = min; i <= max; i++)
e294cc5d
JH
2781#ifdef EBCDIC
2782 if (has_utf8) {
2783 const U8 ch = (U8)NATIVE_TO_UTF(i);
2784 if (UNI_IS_INVARIANT(ch))
2785 *d++ = (U8)i;
2786 else {
2787 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2788 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2789 }
2790 }
2791 else
2792#endif
2793 *d++ = (char)i;
2794
2795#ifdef EBCDIC
2796 if (uvmax) {
2797 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2798 if (uvmax > 0x101)
2799 *d++ = (char)UTF_TO_NATIVE(0xff);
2800 if (uvmax > 0x100)
2801 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2802 }
2803#endif
02aa26ce
NT
2804
2805 /* mark the range as done, and continue */
79072805 2806 dorange = FALSE;
01ec43d0 2807 didrange = TRUE;
4c3a8340
TS
2808#ifdef EBCDIC
2809 literal_endpoint = 0;
2810#endif
79072805 2811 continue;
4e553d73 2812 }
02aa26ce
NT
2813
2814 /* range begins (ignore - as first or last char) */
79072805 2815 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2816 if (didrange) {
1fafa243 2817 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2818 }
e294cc5d
JH
2819 if (has_utf8
2820#ifdef EBCDIC
2821 && !native_range
2822#endif
2823 ) {
25716404 2824 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2825 s++;
2826 continue;
2827 }
79072805
LW
2828 dorange = TRUE;
2829 s++;
01ec43d0
GS
2830 }
2831 else {
2832 didrange = FALSE;
4c3a8340
TS
2833#ifdef EBCDIC
2834 literal_endpoint = 0;
e294cc5d 2835 native_range = TRUE;
4c3a8340 2836#endif
01ec43d0 2837 }
79072805 2838 }
02aa26ce
NT
2839
2840 /* if we get here, we're not doing a transliteration */
2841
0f5d15d6
IZ
2842 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2843 except for the last char, which will be done separately. */
3280af22 2844 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2845 if (s[2] == '#') {
e994fd66 2846 while (s+1 < send && *s != ')')
db42d148 2847 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2848 }
2849 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2850 || (s[2] == '?' && s[3] == '{'))
155aba94 2851 {
cc6b7395 2852 I32 count = 1;
0f5d15d6 2853 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2854 char c;
2855
d9f97599
GS
2856 while (count && (c = *regparse)) {
2857 if (c == '\\' && regparse[1])
2858 regparse++;
4e553d73 2859 else if (c == '{')
cc6b7395 2860 count++;
4e553d73 2861 else if (c == '}')
cc6b7395 2862 count--;
d9f97599 2863 regparse++;
cc6b7395 2864 }
e994fd66 2865 if (*regparse != ')')
5bdf89e7 2866 regparse--; /* Leave one char for continuation. */
0f5d15d6 2867 while (s < regparse)
db42d148 2868 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2869 }
748a9306 2870 }
02aa26ce
NT
2871
2872 /* likewise skip #-initiated comments in //x patterns */
3280af22 2873 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2874 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2875 while (s+1 < send && *s != '\n')
db42d148 2876 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2877 }
02aa26ce 2878
5d1d4326 2879 /* check for embedded arrays
da6eedaa 2880 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2881 */
1749ea0d
TS
2882 else if (*s == '@' && s[1]) {
2883 if (isALNUM_lazy_if(s+1,UTF))
2884 break;
2885 if (strchr(":'{$", s[1]))
2886 break;
2887 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2888 break; /* in regexp, neither @+ nor @- are interpolated */
2889 }
02aa26ce
NT
2890
2891 /* check for embedded scalars. only stop if we're sure it's a
2892 variable.
2893 */
79072805 2894 else if (*s == '$') {
3280af22 2895 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2896 break;
77772344 2897 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2898 if (s[1] == '\\') {
2899 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2900 "Possible unintended interpolation of $\\ in regex");
77772344 2901 }
79072805 2902 break; /* in regexp, $ might be tail anchor */
77772344 2903 }
79072805 2904 }
02aa26ce 2905
2b9d42f0
NIS
2906 /* End of else if chain - OP_TRANS rejoin rest */
2907
02aa26ce 2908 /* backslashes */
79072805 2909 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2910 char* e; /* Can be used for ending '}', etc. */
2911
79072805 2912 s++;
02aa26ce 2913
7d0fc23c
KW
2914 /* warn on \1 - \9 in substitution replacements, but note that \11
2915 * is an octal; and \19 is \1 followed by '9' */
3280af22 2916 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2917 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2918 {
a2a5de95 2919 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2920 *--s = '$';
2921 break;
2922 }
02aa26ce
NT
2923
2924 /* string-change backslash escapes */
838f2281 2925 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
2926 --s;
2927 break;
2928 }
ff3f963a
KW
2929 /* In a pattern, process \N, but skip any other backslash escapes.
2930 * This is because we don't want to translate an escape sequence
2931 * into a meta symbol and have the regex compiler use the meta
2932 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2933 * in spite of this, we do have to process \N here while the proper
2934 * charnames handler is in scope. See bugs #56444 and #62056.
2935 * There is a complication because \N in a pattern may also stand
2936 * for 'match a non-nl', and not mean a charname, in which case its
2937 * processing should be deferred to the regex compiler. To be a
2938 * charname it must be followed immediately by a '{', and not look
2939 * like \N followed by a curly quantifier, i.e., not something like
2940 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2941 * quantifier */
2942 else if (PL_lex_inpat
2943 && (*s != 'N'
2944 || s[1] != '{'
2945 || regcurly(s + 1)))
2946 {
cc74c5bd
TS
2947 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2948 goto default_action;
2949 }
02aa26ce 2950
79072805 2951 switch (*s) {
02aa26ce
NT
2952
2953 /* quoted - in transliterations */
79072805 2954 case '-':
3280af22 2955 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2956 *d++ = *s++;
2957 continue;
2958 }
2959 /* FALL THROUGH */
2960 default:
11b8faa4 2961 {
a2a5de95
NC
2962 if ((isALPHA(*s) || isDIGIT(*s)))
2963 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2964 "Unrecognized escape \\%c passed through",
2965 *s);
11b8faa4 2966 /* default action is to copy the quoted character */
f9a63242 2967 goto default_action;
11b8faa4 2968 }
02aa26ce 2969
632403cc 2970 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2971 case '0': case '1': case '2': case '3':
2972 case '4': case '5': case '6': case '7':
ba210ebe 2973 {
53305cf1
NC
2974 I32 flags = 0;
2975 STRLEN len = 3;
77a135fe 2976 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2977 s += len;
2978 }
012bcf8d 2979 goto NUM_ESCAPE_INSERT;
02aa26ce 2980
f0a2b745
KW
2981 /* eg. \o{24} indicates the octal constant \024 */
2982 case 'o':
2983 {
2984 STRLEN len;
454155d9 2985 const char* error;
f0a2b745 2986
454155d9 2987 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2988 s += len;
454155d9 2989 if (! valid) {
f0a2b745
KW
2990 yyerror(error);
2991 continue;
2992 }
2993 goto NUM_ESCAPE_INSERT;
2994 }
2995
77a135fe 2996 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2997 case 'x':
a0ed51b3
LW
2998 ++s;
2999 if (*s == '{') {
9d4ba2ae 3000 char* const e = strchr(s, '}');
a4c04bdc
NC
3001 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
3002 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 3003 STRLEN len;
355860ce 3004
53305cf1 3005 ++s;
adaeee49 3006 if (!e) {
a0ed51b3 3007 yyerror("Missing right brace on \\x{}");
355860ce 3008 continue;
ba210ebe 3009 }
53305cf1 3010 len = e - s;
77a135fe 3011 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 3012 s = e + 1;
a0ed51b3
LW
3013 }
3014 else {
ba210ebe 3015 {
53305cf1 3016 STRLEN len = 2;
a4c04bdc 3017 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 3018 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
3019 s += len;
3020 }
012bcf8d
GS
3021 }
3022
3023 NUM_ESCAPE_INSERT:
ff3f963a
KW
3024 /* Insert oct or hex escaped character. There will always be
3025 * enough room in sv since such escapes will be longer than any
3026 * UTF-8 sequence they can end up as, except if they force us
3027 * to recode the rest of the string into utf8 */
ba7cea30 3028
77a135fe 3029 /* Here uv is the ordinal of the next character being added in
ff3f963a 3030 * unicode (converted from native). */
77a135fe 3031 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3032 if (!has_utf8 && uv > 255) {
77a135fe
KW
3033 /* Might need to recode whatever we have accumulated so
3034 * far if it contains any chars variant in utf8 or
3035 * utf-ebcdic. */
3036
3037 SvCUR_set(sv, d - SvPVX_const(sv));
3038 SvPOK_on(sv);
3039 *d = '\0';
77a135fe 3040 /* See Note on sizing above. */
7bf79863
KW
3041 sv_utf8_upgrade_flags_grow(sv,
3042 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3043 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3044 d = SvPVX(sv) + SvCUR(sv);
3045 has_utf8 = TRUE;
012bcf8d
GS
3046 }
3047
77a135fe
KW
3048 if (has_utf8) {
3049 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3050 if (PL_lex_inwhat == OP_TRANS &&
3051 PL_sublex_info.sub_op) {
3052 PL_sublex_info.sub_op->op_private |=
3053 (PL_lex_repl ? OPpTRANS_FROM_UTF
3054 : OPpTRANS_TO_UTF);
f9a63242 3055 }
e294cc5d
JH
3056#ifdef EBCDIC
3057 if (uv > 255 && !dorange)
3058 native_range = FALSE;
3059#endif
012bcf8d 3060 }
a0ed51b3 3061 else {
012bcf8d 3062 *d++ = (char)uv;
a0ed51b3 3063 }
012bcf8d
GS
3064 }
3065 else {
c4d5f83a 3066 *d++ = (char) uv;
a0ed51b3 3067 }
79072805 3068 continue;
02aa26ce 3069
4a2d328f 3070 case 'N':
ff3f963a
KW
3071 /* In a non-pattern \N must be a named character, like \N{LATIN
3072 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3073 * mean to match a non-newline. For non-patterns, named
3074 * characters are converted to their string equivalents. In
3075 * patterns, named characters are not converted to their
3076 * ultimate forms for the same reasons that other escapes
3077 * aren't. Instead, they are converted to the \N{U+...} form
3078 * to get the value from the charnames that is in effect right
3079 * now, while preserving the fact that it was a named character
3080 * so that the regex compiler knows this */
3081
3082 /* This section of code doesn't generally use the
3083 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3084 * a close examination of this macro and determined it is a
3085 * no-op except on utfebcdic variant characters. Every
3086 * character generated by this that would normally need to be
3087 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3088 * needed, and would complicate use of copy(). XXX There are
3089 * other parts of this file where the macro is used
3090 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3091
3092 /* The structure of this section of code (besides checking for
3093 * errors and upgrading to utf8) is:
3094 * Further disambiguate between the two meanings of \N, and if
3095 * not a charname, go process it elsewhere
0a96133f
KW
3096 * If of form \N{U+...}, pass it through if a pattern;
3097 * otherwise convert to utf8
3098 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3099 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3100
3101 /* Here, s points to the 'N'; the test below is guaranteed to
3102 * succeed if we are being called on a pattern as we already
3103 * know from a test above that the next character is a '{'.
3104 * On a non-pattern \N must mean 'named sequence, which
3105 * requires braces */
3106 s++;
3107 if (*s != '{') {
3108 yyerror("Missing braces on \\N{}");
3109 continue;
3110 }
3111 s++;
3112
0a96133f 3113 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3114 if (! (e = strchr(s, '}'))) {
3115 if (! PL_lex_inpat) {
5777a3f7 3116 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3117 } else {
3118 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3119 }
0a96133f 3120 continue;
ff3f963a 3121 }
cddc7ef4 3122
ff3f963a 3123 /* Here it looks like a named character */
cddc7ef4 3124
ff3f963a
KW
3125 if (PL_lex_inpat) {
3126
3127 /* XXX This block is temporary code. \N{} implies that the
3128 * pattern is to have Unicode semantics, and therefore
3129 * currently has to be encoded in utf8. By putting it in
3130 * utf8 now, we save a whole pass in the regular expression
3131 * compiler. Once that code is changed so Unicode
3132 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3133 * block should be removed. However, the code that parses
3134 * the output of this would have to be changed to not
3135 * necessarily expect utf8 */
ff3f963a 3136 if (!has_utf8) {
77a135fe 3137 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3138 SvPOK_on(sv);
e4f3eed8 3139 *d = '\0';
77a135fe 3140 /* See Note on sizing above. */
7bf79863 3141 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3142 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3143 /* 5 = '\N{' + cur char + NUL */
3144 (STRLEN)(send - s) + 5);
f08d6ad9 3145 d = SvPVX(sv) + SvCUR(sv);
89491803 3146 has_utf8 = TRUE;
ff3f963a
KW
3147 }
3148 }
423cee85 3149
ff3f963a
KW
3150 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3151 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3152 | PERL_SCAN_DISALLOW_PREFIX;
3153 STRLEN len;
3154
3155 /* For \N{U+...}, the '...' is a unicode value even on
3156 * EBCDIC machines */
3157 s += 2; /* Skip to next char after the 'U+' */
3158 len = e - s;
3159 uv = grok_hex(s, &len, &flags, NULL);
3160 if (len == 0 || len != (STRLEN)(e - s)) {
3161 yyerror("Invalid hexadecimal number in \\N{U+...}");
3162 s = e + 1;
3163 continue;
3164 }
3165
3166 if (PL_lex_inpat) {
3167
e2a7e165
KW
3168 /* On non-EBCDIC platforms, pass through to the regex
3169 * compiler unchanged. The reason we evaluated the
3170 * number above is to make sure there wasn't a syntax
3171 * error. But on EBCDIC we convert to native so
3172 * downstream code can continue to assume it's native
3173 */
ff3f963a 3174 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3175#ifdef EBCDIC
3176 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3177 and the \0 */
3178 "\\N{U+%X}",
3179 (unsigned int) UNI_TO_NATIVE(uv));
3180#else
ff3f963a
KW
3181 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3182 d += e - s + 1;
e2a7e165 3183#endif
ff3f963a
KW
3184 }
3185 else { /* Not a pattern: convert the hex to string */
3186
3187 /* If destination is not in utf8, unconditionally
3188 * recode it to be so. This is because \N{} implies
3189 * Unicode semantics, and scalars have to be in utf8
3190 * to guarantee those semantics */
3191 if (! has_utf8) {
3192 SvCUR_set(sv, d - SvPVX_const(sv));
3193 SvPOK_on(sv);
3194 *d = '\0';
3195 /* See Note on sizing above. */
3196 sv_utf8_upgrade_flags_grow(
3197 sv,
3198 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3199 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3200 d = SvPVX(sv) + SvCUR(sv);
3201 has_utf8 = TRUE;
3202 }
3203
3204 /* Add the string to the output */
3205 if (UNI_IS_INVARIANT(uv)) {
3206 *d++ = (char) uv;
3207 }
3208 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3209 }
3210 }
3211 else { /* Here is \N{NAME} but not \N{U+...}. */
3212
3213 SV *res; /* result from charnames */
3214 const char *str; /* the string in 'res' */
3215 STRLEN len; /* its length */
3216
3217 /* Get the value for NAME */
3218 res = newSVpvn(s, e - s);
3219 res = new_constant( NULL, 0, "charnames",
3220 /* includes all of: \N{...} */
3221 res, NULL, s - 3, e - s + 4 );
3222
3223 /* Most likely res will be in utf8 already since the
3224 * standard charnames uses pack U, but a custom translator
3225 * can leave it otherwise, so make sure. XXX This can be
3226 * revisited to not have charnames use utf8 for characters
3227 * that don't need it when regexes don't have to be in utf8
3228 * for Unicode semantics. If doing so, remember EBCDIC */
3229 sv_utf8_upgrade(res);
3230 str = SvPV_const(res, len);
3231
3232 /* Don't accept malformed input */
3233 if (! is_utf8_string((U8 *) str, len)) {
3234 yyerror("Malformed UTF-8 returned by \\N");
3235 }
3236 else if (PL_lex_inpat) {
3237
3238 if (! len) { /* The name resolved to an empty string */
3239 Copy("\\N{}", d, 4, char);
3240 d += 4;
3241 }
3242 else {
3243 /* In order to not lose information for the regex
3244 * compiler, pass the result in the specially made
3245 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3246 * the code points in hex of each character
3247 * returned by charnames */
3248
3249 const char *str_end = str + len;
3250 STRLEN char_length; /* cur char's byte length */
3251 STRLEN output_length; /* and the number of bytes
3252 after this is translated
3253 into hex digits */
3254 const STRLEN off = d - SvPVX_const(sv);
3255
3256 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3257 * max('U+', '.'); and 1 for NUL */
3258 char hex_string[2 * UTF8_MAXBYTES + 5];
3259
3260 /* Get the first character of the result. */
3261 U32 uv = utf8n_to_uvuni((U8 *) str,
3262 len,
3263 &char_length,
3264 UTF8_ALLOW_ANYUV);
3265
3266 /* The call to is_utf8_string() above hopefully
3267 * guarantees that there won't be an error. But
3268 * it's easy here to make sure. The function just
3269 * above warns and returns 0 if invalid utf8, but
3270 * it can also return 0 if the input is validly a
3271 * NUL. Disambiguate */
3272 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3273 uv = UNICODE_REPLACEMENT;
3274 }
3275
3276 /* Convert first code point to hex, including the
e2a7e165
KW
3277 * boiler plate before it. For all these, we
3278 * convert to native format so that downstream code
3279 * can continue to assume the input is native */
78c35590 3280 output_length =
3353de27 3281 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3282 "\\N{U+%X",
3283 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3284
3285 /* Make sure there is enough space to hold it */
3286 d = off + SvGROW(sv, off
3287 + output_length
3288 + (STRLEN)(send - e)
3289 + 2); /* '}' + NUL */
3290 /* And output it */
3291 Copy(hex_string, d, output_length, char);
3292 d += output_length;
3293
3294 /* For each subsequent character, append dot and
3295 * its ordinal in hex */
3296 while ((str += char_length) < str_end) {
3297 const STRLEN off = d - SvPVX_const(sv);
3298 U32 uv = utf8n_to_uvuni((U8 *) str,
3299 str_end - str,
3300 &char_length,
3301 UTF8_ALLOW_ANYUV);
3302 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3303 uv = UNICODE_REPLACEMENT;
3304 }
3305
78c35590 3306 output_length =
3353de27 3307 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3308 ".%X",
3309 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3310
3311 d = off + SvGROW(sv, off
3312 + output_length
3313 + (STRLEN)(send - e)
3314 + 2); /* '}' + NUL */
3315 Copy(hex_string, d, output_length, char);
3316 d += output_length;
3317 }
3318
3319 *d++ = '}'; /* Done. Add the trailing brace */
3320 }
3321 }
3322 else { /* Here, not in a pattern. Convert the name to a
3323 * string. */
3324
3325 /* If destination is not in utf8, unconditionally
3326 * recode it to be so. This is because \N{} implies
3327 * Unicode semantics, and scalars have to be in utf8
3328 * to guarantee those semantics */
3329 if (! has_utf8) {
3330 SvCUR_set(sv, d - SvPVX_const(sv));
3331 SvPOK_on(sv);
3332 *d = '\0';
3333 /* See Note on sizing above. */
3334 sv_utf8_upgrade_flags_grow(sv,
3335 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3336 len + (STRLEN)(send - s) + 1);
3337 d = SvPVX(sv) + SvCUR(sv);
3338 has_utf8 = TRUE;
3339 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3340
3341 /* See Note on sizing above. (NOTE: SvCUR() is not
3342 * set correctly here). */
3343 const STRLEN off = d - SvPVX_const(sv);
3344 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3345 }
3346 Copy(str, d, len, char);
3347 d += len;
423cee85 3348 }
423cee85 3349 SvREFCNT_dec(res);
cb233ae3
KW
3350
3351 /* Deprecate non-approved name syntax */
3352 if (ckWARN_d(WARN_DEPRECATED)) {
3353 bool problematic = FALSE;
3354 char* i = s;
3355
3356 /* For non-ut8 input, look to see that the first
3357 * character is an alpha, then loop through the rest
3358 * checking that each is a continuation */
3359 if (! this_utf8) {
3360 if (! isALPHAU(*i)) problematic = TRUE;
3361 else for (i = s + 1; i < e; i++) {
3362 if (isCHARNAME_CONT(*i)) continue;
3363 problematic = TRUE;
3364 break;
3365 }
3366 }
3367 else {
3368 /* Similarly for utf8. For invariants can check
3369 * directly. We accept anything above the latin1
3370 * range because it is immaterial to Perl if it is
3371 * correct or not, and is expensive to check. But
3372 * it is fairly easy in the latin1 range to convert
3373 * the variants into a single character and check
3374 * those */
3375 if (UTF8_IS_INVARIANT(*i)) {
3376 if (! isALPHAU(*i)) problematic = TRUE;
3377 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3378 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3379 *(i+1)))))
3380 {
3381 problematic = TRUE;
3382 }
3383 }
3384 if (! problematic) for (i = s + UTF8SKIP(s);
3385 i < e;
3386 i+= UTF8SKIP(i))
3387 {
3388 if (UTF8_IS_INVARIANT(*i)) {
3389 if (isCHARNAME_CONT(*i)) continue;
3390 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3391 continue;
3392 } else if (isCHARNAME_CONT(
3393 UNI_TO_NATIVE(
81c14aa2 3394 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3395 {
3396 continue;
3397 }
3398 problematic = TRUE;
3399 break;
3400 }
3401 }
3402 if (problematic) {
6e1bad6c
KW
3403 /* The e-i passed to the final %.*s makes sure that
3404 * should the trailing NUL be missing that this
3405 * print won't run off the end of the string */
cb233ae3 3406 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3407 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3408 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3409 }
3410 }
3411 } /* End \N{NAME} */
ff3f963a
KW
3412#ifdef EBCDIC
3413 if (!dorange)
3414 native_range = FALSE; /* \N{} is defined to be Unicode */
3415#endif
3416 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3417 continue;
3418
02aa26ce 3419 /* \c is a control character */
79072805
LW
3420 case 'c':
3421 s++;
961ce445 3422 if (s < send) {
17a3df4c 3423 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3424 }
961ce445
RGS
3425 else {
3426 yyerror("Missing control char name in \\c");
3427 }
79072805 3428 continue;
02aa26ce
NT
3429
3430 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3431 case 'b':
db42d148 3432 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3433 break;
3434 case 'n':
db42d148 3435 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3436 break;
3437 case 'r':
db42d148 3438 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3439 break;
3440 case 'f':
db42d148 3441 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3442 break;
3443 case 't':
db42d148 3444 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3445 break;
34a3fe2a 3446 case 'e':
db42d148 3447 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3448 break;
3449 case 'a':
db42d148 3450 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3451 break;
02aa26ce
NT
3452 } /* end switch */
3453
79072805
LW
3454 s++;
3455 continue;
02aa26ce 3456 } /* end if (backslash) */
4c3a8340
TS
3457#ifdef EBCDIC
3458 else
3459 literal_endpoint++;
3460#endif
02aa26ce 3461
f9a63242 3462 default_action:
77a135fe
KW
3463 /* If we started with encoded form, or already know we want it,
3464 then encode the next character */
3465 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3466 STRLEN len = 1;
77a135fe
KW
3467
3468
3469 /* One might think that it is wasted effort in the case of the
3470 * source being utf8 (this_utf8 == TRUE) to take the next character
3471 * in the source, convert it to an unsigned value, and then convert
3472 * it back again. But the source has not been validated here. The
3473 * routine that does the conversion checks for errors like
3474 * malformed utf8 */
3475
5f66b61c
AL
3476 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3477 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3478 if (!has_utf8) {
3479 SvCUR_set(sv, d - SvPVX_const(sv));
3480 SvPOK_on(sv);
3481 *d = '\0';
77a135fe 3482 /* See Note on sizing above. */
7bf79863
KW
3483 sv_utf8_upgrade_flags_grow(sv,
3484 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3485 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3486 d = SvPVX(sv) + SvCUR(sv);
3487 has_utf8 = TRUE;
3488 } else if (need > len) {
3489 /* encoded value larger than old, may need extra space (NOTE:
3490 * SvCUR() is not set correctly here). See Note on sizing
3491 * above. */
9d4ba2ae 3492 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3493 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3494 }
77a135fe
KW
3495 s += len;
3496
5f66b61c 3497 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3498#ifdef EBCDIC
3499 if (uv > 255 && !dorange)
3500 native_range = FALSE;
3501#endif
2b9d42f0
NIS
3502 }
3503 else {
3504 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3505 }
02aa26ce
NT
3506 } /* while loop to process each character */
3507
3508 /* terminate the string and set up the sv */
79072805 3509 *d = '\0';
95a20fc0 3510 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3511 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3512 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3513 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3514
79072805 3515 SvPOK_on(sv);
9f4817db 3516 if (PL_encoding && !has_utf8) {
d0063567
DK
3517 sv_recode_to_utf8(sv, PL_encoding);
3518 if (SvUTF8(sv))
3519 has_utf8 = TRUE;
9f4817db 3520 }
2b9d42f0 3521 if (has_utf8) {
7e2040f0 3522 SvUTF8_on(sv);
2b9d42f0 3523 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3524 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3525 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3526 }
3527 }
79072805 3528
02aa26ce 3529 /* shrink the sv if we allocated more than we used */
79072805 3530 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3531 SvPV_shrink_to_cur(sv);
79072805 3532 }
02aa26ce 3533
6154021b 3534 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3535 if (s > PL_bufptr) {
eb0d8d16
NC
3536 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3537 const char *const key = PL_lex_inpat ? "qr" : "q";
3538 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3539 const char *type;
3540 STRLEN typelen;
3541
3542 if (PL_lex_inwhat == OP_TRANS) {
3543 type = "tr";
3544 typelen = 2;
3545 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3546 type = "s";
3547 typelen = 1;
3548 } else {
3549 type = "qq";
3550 typelen = 2;
3551 }
3552
3553 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3554 type, typelen);
3555 }
6154021b 3556 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3557 } else
8990e307 3558 SvREFCNT_dec(sv);
79072805
LW
3559 return s;
3560}
3561
ffb4593c
NT
3562/* S_intuit_more
3563 * Returns TRUE if there's more to the expression (e.g., a subscript),
3564 * FALSE otherwise.
ffb4593c
NT
3565 *
3566 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3567 *
3568 * ->[ and ->{ return TRUE
3569 * { and [ outside a pattern are always subscripts, so return TRUE
3570 * if we're outside a pattern and it's not { or [, then return FALSE
3571 * if we're in a pattern and the first char is a {
3572 * {4,5} (any digits around the comma) returns FALSE
3573 * if we're in a pattern and the first char is a [
3574 * [] returns FALSE
3575 * [SOMETHING] has a funky algorithm to decide whether it's a
3576 * character class or not. It has to deal with things like
3577 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3578 * anything else returns TRUE
3579 */
3580
9cbb5ea2
GS
3581/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3582
76e3520e 3583STATIC int
cea2e8a9 3584S_intuit_more(pTHX_ register char *s)
79072805 3585{
97aff369 3586 dVAR;
7918f24d
NC
3587
3588 PERL_ARGS_ASSERT_INTUIT_MORE;
3589
3280af22 3590 if (PL_lex_brackets)
79072805
LW
3591 return TRUE;
3592 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3593 return TRUE;
3594 if (*s != '{' && *s != '[')
3595 return FALSE;
3280af22 3596 if (!PL_lex_inpat)
79072805
LW
3597 return TRUE;
3598
3599 /* In a pattern, so maybe we have {n,m}. */
3600 if (*s == '{') {
b3155d95 3601 if (regcurly(s)) {
79072805 3602 return FALSE;
b3155d95 3603 }
79072805 3604 return TRUE;
79072805
LW
3605 }
3606
3607 /* On the other hand, maybe we have a character class */
3608
3609 s++;
3610 if (*s == ']' || *s == '^')
3611 return FALSE;
3612 else {
ffb4593c 3613 /* this is terrifying, and it works */
79072805
LW
3614 int weight = 2; /* let's weigh the evidence */
3615 char seen[256];
f27ffc4a 3616 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3617 const char * const send = strchr(s,']');
3280af22 3618 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3619
3620 if (!send) /* has to be an expression */
3621 return TRUE;
3622
3623 Zero(seen,256,char);
3624 if (*s == '$')
3625 weight -= 3;
3626 else if (isDIGIT(*s)) {
3627 if (s[1] != ']') {
3628 if (isDIGIT(s[1]) && s[2] == ']')
3629 weight -= 10;
3630 }
3631 else
3632 weight -= 100;
3633 }
3634 for (; s < send; s++) {
3635 last_un_char = un_char;
3636 un_char = (unsigned char)*s;
3637 switch (*s) {
3638 case '@':
3639 case '&':
3640 case '$':
3641 weight -= seen[un_char] * 10;
7e2040f0 3642 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3643 int len;
8903cb82 3644 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3645 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3646 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3647 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3648 weight -= 100;
3649 else
3650 weight -= 10;
3651 }
3652 else if (*s == '$' && s[1] &&
93a17b20
LW
3653 strchr("[#!%*<>()-=",s[1])) {
3654 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3655 weight -= 10;
3656 else
3657 weight -= 1;
3658 }
3659 break;
3660 case '\\':
3661 un_char = 254;
3662 if (s[1]) {
93a17b20 3663 if (strchr("wds]",s[1]))
79072805 3664 weight += 100;
10edeb5d 3665 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3666 weight += 1;
93a17b20 3667 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3668 weight += 40;
3669 else if (isDIGIT(s[1])) {
3670 weight += 40;
3671 while (s[1] && isDIGIT(s[1]))
3672 s++;
3673 }
3674 }
3675 else
3676 weight += 100;
3677 break;
3678 case '-':
3679 if (s[1] == '\\')
3680 weight += 50;
93a17b20 3681 if (strchr("aA01! ",last_un_char))
79072805 3682 weight += 30;
93a17b20 3683 if (strchr("zZ79~",s[1]))
79072805 3684 weight += 30;
f27ffc4a
GS
3685 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3686 weight -= 5; /* cope with negative subscript */
79072805
LW
3687 break;
3688 default:
3792a11b
NC
3689 if (!isALNUM(last_un_char)
3690 && !(last_un_char == '$' || last_un_char == '@'
3691 || last_un_char == '&')
3692 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3693 char *d = tmpbuf;
3694 while (isALPHA(*s))
3695 *d++ = *s++;
3696 *d = '\0';
5458a98a 3697 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3698 weight -= 150;
3699 }
3700 if (un_char == last_un_char + 1)
3701 weight += 5;
3702 weight -= seen[un_char];
3703 break;
3704 }
3705 seen[un_char]++;
3706 }
3707 if (weight >= 0) /* probably a character class */
3708 return FALSE;
3709 }
3710
3711 return TRUE;
3712}
ffed7fef 3713
ffb4593c
NT
3714/*
3715 * S_intuit_method
3716 *
3717 * Does all the checking to disambiguate
3718 * foo bar
3719 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3720 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3721 *
3722 * First argument is the stuff after the first token, e.g. "bar".
3723 *
3724 * Not a method if bar is a filehandle.
3725 * Not a method if foo is a subroutine prototyped to take a filehandle.
3726 * Not a method if it's really "Foo $bar"
3727 * Method if it's "foo $bar"
3728 * Not a method if it's really "print foo $bar"
3729 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3730 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3731 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3732 * =>
3733 */
3734
76e3520e 3735STATIC int
62d55b22 3736S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3737{
97aff369 3738 dVAR;
a0d0e21e 3739 char *s = start + (*start == '$');
3280af22 3740 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3741 STRLEN len;
3742 GV* indirgv;
5db06880
NC
3743#ifdef PERL_MAD
3744 int soff;
3745#endif
a0d0e21e 3746
7918f24d
NC
3747 PERL_ARGS_ASSERT_INTUIT_METHOD;
3748
a0d0e21e 3749 if (gv) {
62d55b22 3750 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3751 return 0;
62d55b22
NC
3752 if (cv) {
3753 if (SvPOK(cv)) {
8fa6a409 3754 const char *proto = CvPROTO(cv);
62d55b22
NC
3755 if (proto) {
3756 if (*proto == ';')
3757 proto++;
3758 if (*proto == '*')
3759 return 0;
3760 }
b6c543e3
IZ
3761 }
3762 } else
c35e046a 3763 gv = NULL;
a0d0e21e 3764 }
8903cb82 3765 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3766 /* start is the beginning of the possible filehandle/object,
3767 * and s is the end of it
3768 * tmpbuf is a copy of it
3769 */
3770
a0d0e21e 3771 if (*start == '$') {
3ef1310e
RGS
3772 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3773 isUPPER(*PL_tokenbuf))
a0d0e21e 3774 return 0;
5db06880
NC
3775#ifdef PERL_MAD
3776 len = start - SvPVX(PL_linestr);
3777#endif
29595ff2 3778 s = PEEKSPACE(s);
f0092767 3779#ifdef PERL_MAD
5db06880
NC
3780 start = SvPVX(PL_linestr) + len;
3781#endif
3280af22
NIS
3782 PL_bufptr = start;
3783 PL_expect = XREF;
a0d0e21e
LW
3784 return *s == '(' ? FUNCMETH : METHOD;
3785 }
5458a98a 3786 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3787 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3788 len -= 2;
3789 tmpbuf[len] = '\0';
5db06880
NC
3790#ifdef PERL_MAD
3791 soff = s - SvPVX(PL_linestr);
3792#endif
c3e0f903
GS
3793 goto bare_package;
3794 }
38d2cf30 3795 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 3796 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3797 return 0;
3798 /* filehandle or package name makes it a method */
38d2cf30 3799 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
3800#ifdef PERL_MAD
3801 soff = s - SvPVX(PL_linestr);
3802#endif
29595ff2 3803 s = PEEKSPACE(s);
3280af22 3804 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3805 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3806 bare_package:
cd81e915 3807 start_force(PL_curforce);
9ded7720 3808 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3809 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3810 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 3811 if (PL_madskills)
38d2cf30
BF
3812 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3813 ( UTF ? SVf_UTF8 : 0 )));
3280af22 3814 PL_expect = XTERM;
a0d0e21e 3815 force_next(WORD);
3280af22 3816 PL_bufptr = s;
5db06880
NC
3817#ifdef PERL_MAD
3818 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3819#endif
a0d0e21e
LW
3820 return *s == '(' ? FUNCMETH : METHOD;
3821 }
3822 }
3823 return 0;
3824}
3825
16d20bd9 3826/* Encoded script support. filter_add() effectively inserts a
4e553d73 3827 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3828 * Note that the filter function only applies to the current source file
3829 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3830 *
3831 * The datasv parameter (which may be NULL) can be used to pass
3832 * private data to this instance of the filter. The filter function
3833 * can recover the SV using the FILTER_DATA macro and use it to
3834 * store private buffers and state information.
3835 *
3836 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3837 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3838 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3839 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3840 * private use must be set using malloc'd pointers.
3841 */
16d20bd9
AD
3842
3843SV *
864dbfa3 3844Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3845{
97aff369 3846 dVAR;
f4c556ac 3847 if (!funcp)
a0714e2c 3848 return NULL;
f4c556ac 3849
5486870f
DM
3850 if (!PL_parser)
3851 return NULL;
3852
f1c31c52
FC
3853 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3854 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3855
3280af22
NIS
3856 if (!PL_rsfp_filters)
3857 PL_rsfp_filters = newAV();
16d20bd9 3858 if (!datasv)
561b68a9 3859 datasv = newSV(0);
862a34c6 3860 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3861 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3862 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3863 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3864 FPTR2DPTR(void *, IoANY(datasv)),
3865 SvPV_nolen(datasv)));
3280af22
NIS
3866 av_unshift(PL_rsfp_filters, 1);
3867 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
3868 if (
3869 !PL_parser->filtered
3870 && PL_parser->lex_flags & LEX_EVALBYTES
3871 && PL_bufptr < PL_bufend
3872 ) {
3873 const char *s = PL_bufptr;
3874 while (s < PL_bufend) {
3875 if (*s == '\n') {
3876 SV *linestr = PL_parser->linestr;
3877 char *buf = SvPVX(linestr);
3878 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3879 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3880 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3881 STRLEN const linestart_pos = PL_parser->linestart - buf;
3882 STRLEN const last_uni_pos =
3883 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3884 STRLEN const last_lop_pos =
3885 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3886 av_push(PL_rsfp_filters, linestr);
3887 PL_parser->linestr =
3888 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3889 buf = SvPVX(PL_parser->linestr);
3890 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3891 PL_parser->bufptr = buf + bufptr_pos;
3892 PL_parser->oldbufptr = buf + oldbufptr_pos;
3893 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3894 PL_parser->linestart = buf + linestart_pos;
3895 if (PL_parser->last_uni)
3896 PL_parser->last_uni = buf + last_uni_pos;
3897 if (PL_parser->last_lop)
3898 PL_parser->last_lop = buf + last_lop_pos;
3899 SvLEN(linestr) = SvCUR(linestr);
3900 SvCUR(linestr) = s-SvPVX(linestr);
3901 PL_parser->filtered = 1;
3902 break;
3903 }
3904 s++;
3905 }
3906 }
16d20bd9
AD
3907 return(datasv);
3908}
4e553d73 3909
16d20bd9
AD
3910
3911/* Delete most recently added instance of this filter function. */
a0d0e21e 3912void
864dbfa3 3913Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3914{
97aff369 3915 dVAR;
e0c19803 3916 SV *datasv;
24801a4b 3917
7918f24d
NC
3918 PERL_ARGS_ASSERT_FILTER_DEL;
3919
33073adb 3920#ifdef DEBUGGING
55662e27
JH
3921 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3922 FPTR2DPTR(void*, funcp)));
33073adb 3923#endif
5486870f 3924 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3925 return;
3926 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3927 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3928 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3929 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3930
16d20bd9
AD
3931 return;
3932 }
3933 /* we need to search for the correct entry and clear it */
cea2e8a9 3934 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3935}
3936
3937
1de9afcd
RGS
3938/* Invoke the idxth filter function for the current rsfp. */
3939/* maxlen 0 = read one text line */
16d20bd9 3940I32
864dbfa3 3941Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3942{
97aff369 3943 dVAR;
16d20bd9
AD
3944 filter_t funcp;
3945 SV *datasv = NULL;
f482118e
NC
3946 /* This API is bad. It should have been using unsigned int for maxlen.
3947 Not sure if we want to change the API, but if not we should sanity
3948 check the value here. */
60d63348 3949 unsigned int correct_length
39cd7a59
NC
3950 = maxlen < 0 ?
3951#ifdef PERL_MICRO
3952 0x7FFFFFFF
3953#else
3954 INT_MAX
3955#endif
3956 : maxlen;
e50aee73 3957
7918f24d
NC
3958 PERL_ARGS_ASSERT_FILTER_READ;
3959
5486870f 3960 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3961 return -1;
1de9afcd 3962 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3963 /* Provide a default input filter to make life easy. */
3964 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3965 DEBUG_P(PerlIO_printf(Perl_debug_log,
3966 "filter_read %d: from rsfp\n", idx));
f482118e 3967 if (correct_length) {
16d20bd9
AD
3968 /* Want a block */
3969 int len ;
f54cb97a 3970 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3971
3972 /* ensure buf_sv is large enough */
881d8f0a 3973 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3974 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3975 correct_length)) <= 0) {
3280af22 3976 if (PerlIO_error(PL_rsfp))
37120919
AD
3977 return -1; /* error */
3978 else
3979 return 0 ; /* end of file */
3980 }
16d20bd9 3981 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3982 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3983 } else {
3984 /* Want a line */
3280af22
NIS
3985 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3986 if (PerlIO_error(PL_rsfp))
37120919
AD
3987 return -1; /* error */
3988 else
3989 return 0 ; /* end of file */
3990 }
16d20bd9
AD
3991 }
3992 return SvCUR(buf_sv);
3993 }
3994 /* Skip this filter slot if filter has been deleted */
1de9afcd 3995 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3996 DEBUG_P(PerlIO_printf(Perl_debug_log,
3997 "filter_read %d: skipped (filter deleted)\n",
3998 idx));
f482118e 3999 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4000 }
60d63348
FC
4001 if (SvTYPE(datasv) != SVt_PVIO) {
4002 if (correct_length) {
4003 /* Want a block */
4004 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4005 if (!remainder) return 0; /* eof */
4006 if (correct_length > remainder) correct_length = remainder;
4007 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4008 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4009 } else {
4010 /* Want a line */
4011 const char *s = SvEND(datasv);
4012 const char *send = SvPVX(datasv) + SvLEN(datasv);
4013 while (s < send) {
4014 if (*s == '\n') {
4015 s++;
4016 break;
4017 }
4018 s++;
4019 }
4020 if (s == send) return 0; /* eof */
4021 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4022 SvCUR_set(datasv, s-SvPVX(datasv));
4023 }
4024 return SvCUR(buf_sv);
4025 }
16d20bd9 4026 /* Get function pointer hidden within datasv */
8141890a 4027 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4028 DEBUG_P(PerlIO_printf(Perl_debug_log,
4029 "filter_read %d: via function %p (%s)\n",
ca0270c4 4030 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4031 /* Call function. The function is expected to */
4032 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4033 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4034 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4035}
4036
76e3520e 4037STATIC char *
5cc814fd 4038S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 4039{
97aff369 4040 dVAR;
7918f24d
NC
4041
4042 PERL_ARGS_ASSERT_FILTER_GETS;
4043
c39cd008 4044#ifdef PERL_CR_FILTER
3280af22 4045 if (!PL_rsfp_filters) {
c39cd008 4046 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4047 }
4048#endif
3280af22 4049 if (PL_rsfp_filters) {
55497cff 4050 if (!append)
4051 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4052 if (FILTER_READ(0, sv, 0) > 0)
4053 return ( SvPVX(sv) ) ;
4054 else
bd61b366 4055 return NULL ;
16d20bd9 4056 }
9d116dd7 4057 else
5cc814fd 4058 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4059}
4060
01ec43d0 4061STATIC HV *
9bde8eb0 4062S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4063{
97aff369 4064 dVAR;
def3634b
GS
4065 GV *gv;
4066
7918f24d
NC
4067 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4068
01ec43d0 4069 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4070 return PL_curstash;
4071
4072 if (len > 2 &&
4073 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4074 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4075 {
4076 return GvHV(gv); /* Foo:: */
def3634b
GS
4077 }
4078
4079 /* use constant CLASS => 'MyClass' */
acc6da14 4080 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4081 if (gv && GvCV(gv)) {
4082 SV * const sv = cv_const_sv(GvCV(gv));
4083 if (sv)
9bde8eb0 4084 pkgname = SvPV_const(sv, len);
def3634b
GS
4085 }
4086
acc6da14 4087 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4088}
a0d0e21e 4089
e3f73d4e
RGS
4090/*
4091 * S_readpipe_override
486ec47a 4092 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4093 * optree, provided sublex_start() is called afterwards.
4094 */
4095STATIC void
1d51329b 4096S_readpipe_override(pTHX)
e3f73d4e
RGS
4097{
4098 GV **gvp;
4099 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4100 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4101 if ((gv_readpipe
4102 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4103 ||
4104 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4105 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4106 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4107 {
4108 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4109 op_append_elem(OP_LIST,
e3f73d4e
RGS
4110 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4111 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4112 }
e3f73d4e
RGS
4113}
4114
5db06880
NC
4115#ifdef PERL_MAD
4116 /*
4117 * Perl_madlex
4118 * The intent of this yylex wrapper is to minimize the changes to the
4119 * tokener when we aren't interested in collecting madprops. It remains
4120 * to be seen how successful this strategy will be...
4121 */
4122
4123int
4124Perl_madlex(pTHX)
4125{
4126 int optype;
4127 char *s = PL_bufptr;
4128
cd81e915
NC
4129 /* make sure PL_thiswhite is initialized */
4130 PL_thiswhite = 0;
4131 PL_thismad = 0;
5db06880 4132
cd81e915 4133 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4134 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4135 return S_pending_ident(aTHX);
4136
4137 /* previous token ate up our whitespace? */
cd81e915
NC
4138 if (!PL_lasttoke && PL_nextwhite) {
4139 PL_thiswhite = PL_nextwhite;
4140 PL_nextwhite = 0;
5db06880
NC
4141 }
4142
4143 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4144 PL_realtokenstart = -1;
4145 PL_thistoken = 0;
5db06880
NC
4146 optype = yylex();
4147 s = PL_bufptr;
cd81e915 4148 assert(PL_curforce < 0);
5db06880 4149
cd81e915
NC
4150 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4151 if (!PL_thistoken) {
4152 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4153 PL_thistoken = newSVpvs("");
5db06880 4154 else {
c35e046a 4155 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4156 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4157 }
4158 }
cd81e915
NC
4159 if (PL_thismad) /* install head */
4160 CURMAD('X', PL_thistoken);
5db06880
NC
4161 }
4162
4163 /* last whitespace of a sublex? */
cd81e915
NC
4164 if (optype == ')' && PL_endwhite) {
4165 CURMAD('X', PL_endwhite);
5db06880
NC
4166 }
4167
cd81e915 4168 if (!PL_thismad) {
5db06880
NC
4169
4170 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4171 if (!PL_thiswhite && !PL_endwhite && !optype) {
4172 sv_free(PL_thistoken);
4173 PL_thistoken = 0;
5db06880
NC
4174 return 0;
4175 }
4176
4177 /* put off final whitespace till peg */
60d63348 4178 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4179 PL_nextwhite = PL_thiswhite;
4180 PL_thiswhite = 0;
5db06880 4181 }
cd81e915
NC
4182 else if (PL_thisopen) {
4183 CURMAD('q', PL_thisopen);
4184 if (PL_thistoken)
4185 sv_free(PL_thistoken);
4186 PL_thistoken = 0;
5db06880
NC
4187 }
4188 else {
4189 /* Store actual token text as madprop X */
cd81e915 4190 CURMAD('X', PL_thistoken);
5db06880
NC
4191 }
4192
cd81e915 4193 if (PL_thiswhite) {
5db06880 4194 /* add preceding whitespace as madprop _ */
cd81e915 4195 CURMAD('_', PL_thiswhite);
5db06880
NC
4196 }
4197
cd81e915 4198 if (PL_thisstuff) {
5db06880 4199 /* add quoted material as madprop = */
cd81e915 4200 CURMAD('=', PL_thisstuff);
5db06880
NC
4201 }
4202
cd81e915 4203 if (PL_thisclose) {
5db06880 4204 /* add terminating quote as madprop Q */
cd81e915 4205 CURMAD('Q', PL_thisclose);
5db06880
NC
4206 }
4207 }
4208
4209 /* special processing based on optype */
4210
4211 switch (optype) {
4212
4213 /* opval doesn't need a TOKEN since it can already store mp */
4214 case WORD:
4215 case METHOD:
4216 case FUNCMETH:
4217 case THING:
4218 case PMFUNC:
4219 case PRIVATEREF:
4220 case FUNC0SUB:
4221 case UNIOPSUB:
4222 case LSTOPSUB:
6154021b
RGS
4223 if (pl_yylval.opval)
4224 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4225 PL_thismad = 0;
5db06880
NC
4226 return optype;
4227
4228 /* fake EOF */
4229 case 0:
4230 optype = PEG;
cd81e915
NC
4231 if (PL_endwhite) {
4232 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4233 PL_endwhite = 0;
5db06880
NC
4234 }
4235 break;
4236
4237 case ']':
4238 case '}':
cd81e915 4239 if (PL_faketokens)
5db06880
NC
4240 break;
4241 /* remember any fake bracket that lexer is about to discard */
4242 if (PL_lex_brackets == 1 &&
4243 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4244 {
4245 s = PL_bufptr;
4246 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4247 s++;
4248 if (*s == '}') {
cd81e915
NC
4249 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4250 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4251 PL_thiswhite = 0;
5db06880
NC
4252 PL_bufptr = s - 1;
4253 break; /* don't bother looking for trailing comment */
4254 }
4255 else
4256 s = PL_bufptr;
4257 }
4258 if (optype == ']')
4259 break;
4260 /* FALLTHROUGH */
4261
4262 /* attach a trailing comment to its statement instead of next token */
4263 case ';':
cd81e915 4264 if (PL_faketokens)
5db06880
NC
4265 break;
4266 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4267 s = PL_bufptr;
4268 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4269 s++;
4270 if (*s == '\n' || *s == '#') {
4271 while (s < PL_bufend && *s != '\n')
4272 s++;
4273 if (s < PL_bufend)
4274 s++;
cd81e915
NC
4275 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4276 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4277 PL_thiswhite = 0;
5db06880
NC
4278 PL_bufptr = s;
4279 }
4280 }
4281 break;
4282
4283 /* pval */
4284 case LABEL:
4285 break;
4286
4287 /* ival */
4288 default:
4289 break;
4290
4291 }
4292
4293 /* Create new token struct. Note: opvals return early above. */
6154021b 4294 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4295 PL_thismad = 0;
5db06880
NC
4296 return optype;
4297}
4298#endif
4299
468aa647 4300STATIC char *
cc6ed77d 4301S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4302 dVAR;
7918f24d
NC
4303
4304 PERL_ARGS_ASSERT_TOKENIZE_USE;
4305
468aa647
RGS
4306 if (PL_expect != XSTATE)
4307 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4308 is_use ? "use" : "no"));
29595ff2 4309 s = SKIPSPACE1(s);
468aa647
RGS
4310 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4311 s = force_version(s, TRUE);
17c59fdf
VP
4312 if (*s == ';' || *s == '}'
4313 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4314 start_force(PL_curforce);
9ded7720 4315 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4316 force_next(WORD);
4317 }
4318 else if (*s == 'v') {
4319 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4320 s = force_version(s, FALSE);
4321 }
4322 }
4323 else {
4324 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4325 s = force_version(s, FALSE);
4326 }
6154021b 4327 pl_yylval.ival = is_use;
468aa647
RGS
4328 return s;
4329}
748a9306 4330#ifdef DEBUGGING
27da23d5 4331 static const char* const exp_name[] =
09bef843 4332 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4333 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4334 };
748a9306 4335#endif
463ee0b2 4336
361d9b55
Z
4337#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4338STATIC bool
4339S_word_takes_any_delimeter(char *p, STRLEN len)
4340{
4341 return (len == 1 && strchr("msyq", p[0])) ||
4342 (len == 2 && (
4343 (p[0] == 't' && p[1] == 'r') ||
4344 (p[0] == 'q' && strchr("qwxr", p[1]))));
4345}
4346
02aa26ce
NT
4347/*
4348 yylex
4349
4350 Works out what to call the token just pulled out of the input
4351 stream. The yacc parser takes care of taking the ops we return and
4352 stitching them into a tree.
4353
4354 Returns:
4355 PRIVATEREF
4356
4357 Structure:
4358 if read an identifier
4359 if we're in a my declaration
4360 croak if they tried to say my($foo::bar)
4361 build the ops for a my() declaration
4362 if it's an access to a my() variable
4363 are we in a sort block?
4364 croak if my($a); $a <=> $b
4365 build ops for access to a my() variable
4366 if in a dq string, and they've said @foo and we can't find @foo
4367 croak
4368 build ops for a bareword
4369 if we already built the token before, use it.
4370*/
4371
20141f0e 4372
dba4d153
JH
4373#ifdef __SC__
4374#pragma segment Perl_yylex
4375#endif
dba4d153 4376int
dba4d153 4377Perl_yylex(pTHX)
20141f0e 4378{
97aff369 4379 dVAR;
3afc138a 4380 register char *s = PL_bufptr;
378cc40b 4381 register char *d;
463ee0b2 4382 STRLEN len;
aa7440fb 4383 bool bof = FALSE;
580561a3 4384 U32 fake_eof = 0;
a687059c 4385
10edeb5d
JH
4386 /* orig_keyword, gvp, and gv are initialized here because
4387 * jump to the label just_a_word_zero can bypass their
4388 * initialization later. */
4389 I32 orig_keyword = 0;
4390 GV *gv = NULL;
4391 GV **gvp = NULL;
4392
bbf60fe6 4393 DEBUG_T( {
396482e1 4394 SV* tmp = newSVpvs("");
b6007c36
DM
4395 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4396 (IV)CopLINE(PL_curcop),
4397 lex_state_names[PL_lex_state],
4398 exp_name[PL_expect],
4399 pv_display(tmp, s, strlen(s), 0, 60));
4400 SvREFCNT_dec(tmp);
bbf60fe6 4401 } );
02aa26ce 4402 /* check if there's an identifier for us to look at */
28ac2b49 4403 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4404 return REPORT(S_pending_ident(aTHX));
bbce6d69 4405
02aa26ce
NT
4406 /* no identifier pending identification */
4407
3280af22 4408 switch (PL_lex_state) {
79072805
LW
4409#ifdef COMMENTARY
4410 case LEX_NORMAL: /* Some compilers will produce faster */
4411 case LEX_INTERPNORMAL: /* code if we comment these out. */
4412 break;
4413#endif
4414
09bef843 4415 /* when we've already built the next token, just pull it out of the queue */
79072805 4416 case LEX_KNOWNEXT:
5db06880
NC
4417#ifdef PERL_MAD
4418 PL_lasttoke--;
6154021b 4419 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4420 if (PL_madskills) {
cd81e915 4421 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4422 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4423 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4424 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4425 PL_thismad->mad_val = 0;
4426 mad_free(PL_thismad);
4427 PL_thismad = 0;
5db06880
NC
4428 }
4429 }
4430 if (!PL_lasttoke) {
4431 PL_lex_state = PL_lex_defer;
4432 PL_expect = PL_lex_expect;
4433 PL_lex_defer = LEX_NORMAL;
4434 if (!PL_nexttoke[PL_lasttoke].next_type)
4435 return yylex();
4436 }
4437#else
3280af22 4438 PL_nexttoke--;
6154021b 4439 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4440 if (!PL_nexttoke) {
4441 PL_lex_state = PL_lex_defer;
4442 PL_expect = PL_lex_expect;
4443 PL_lex_defer = LEX_NORMAL;
463ee0b2 4444 }
5db06880 4445#endif
a7aaec61
Z
4446 {
4447 I32 next_type;
5db06880 4448#ifdef PERL_MAD
a7aaec61 4449 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4450#else
a7aaec61 4451 next_type = PL_nexttype[PL_nexttoke];
5db06880 4452#endif
78cdf107
Z
4453 if (next_type & (7<<24)) {
4454 if (next_type & (1<<24)) {
4455 if (PL_lex_brackets > 100)
4456 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4457 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4458 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4459 }
4460 if (next_type & (2<<24))
4461 PL_lex_allbrackets++;
4462 if (next_type & (4<<24))
4463 PL_lex_allbrackets--;
a7aaec61
Z
4464 next_type &= 0xffff;
4465 }
4466#ifdef PERL_MAD
4467 /* FIXME - can these be merged? */
4468 return next_type;
4469#else
4470 return REPORT(next_type);
4471#endif
4472 }
79072805 4473
02aa26ce 4474 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4475 when we get here, PL_bufptr is at the \
02aa26ce 4476 */
79072805
LW
4477 case LEX_INTERPCASEMOD:
4478#ifdef DEBUGGING
3280af22 4479 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4480 Perl_croak(aTHX_
4481 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4482 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4483#endif
02aa26ce 4484 /* handle \E or end of string */
3280af22 4485 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4486 /* if at a \E */
3280af22 4487 if (PL_lex_casemods) {
f54cb97a 4488 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4489 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4490
3792a11b 4491 if (PL_bufptr != PL_bufend
838f2281
BF
4492 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4493 || oldmod == 'F')) {
3280af22
NIS
4494 PL_bufptr += 2;
4495 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4496#ifdef PERL_MAD
4497 if (PL_madskills)
6b29d1f5 4498 PL_thistoken = newSVpvs("\\E");
5db06880 4499#endif
a0d0e21e 4500 }
78cdf107 4501 PL_lex_allbrackets--;
bbf60fe6 4502 return REPORT(')');
79072805 4503 }
52ed07f6
BF
4504 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4505 /* Got an unpaired \E */
4506 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4507 "Useless use of \\E");
52ed07f6 4508 }
5db06880
NC
4509#ifdef PERL_MAD
4510 while (PL_bufptr != PL_bufend &&
4511 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4512 if (!PL_thiswhite)
6b29d1f5 4513 PL_thiswhite = newSVpvs("");
cd81e915 4514 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4515 PL_bufptr += 2;
4516 }
4517#else
3280af22
NIS
4518 if (PL_bufptr != PL_bufend)
4519 PL_bufptr += 2;
5db06880 4520#endif
3280af22 4521 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4522 return yylex();
79072805
LW
4523 }
4524 else {
607df283 4525 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4526 "### Saw case modifier\n"); });
3280af22 4527 s = PL_bufptr + 1;
6e909404 4528 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4529#ifdef PERL_MAD
cd81e915 4530 if (!PL_thiswhite)
6b29d1f5 4531 PL_thiswhite = newSVpvs("");
cd81e915 4532 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4533#endif
89122651 4534 PL_bufptr = s + 3;
6e909404
JH
4535 PL_lex_state = LEX_INTERPCONCAT;
4536 return yylex();
a0d0e21e 4537 }
6e909404 4538 else {
90771dc0 4539 I32 tmp;
5db06880
NC
4540 if (!PL_madskills) /* when just compiling don't need correct */
4541 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4542 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4543 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4544 (strchr(PL_lex_casestack, 'L')
4545 || strchr(PL_lex_casestack, 'U')
4546 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4547 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4548 PL_lex_allbrackets--;
bbf60fe6 4549 return REPORT(')');
6e909404
JH
4550 }
4551 if (PL_lex_casemods > 10)
4552 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4553 PL_lex_casestack[PL_lex_casemods++] = *s;
4554 PL_lex_casestack[PL_lex_casemods] = '\0';
4555 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4556 start_force(PL_curforce);
9ded7720 4557 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4558 force_next((2<<24)|'(');
cd81e915 4559 start_force(PL_curforce);
6e909404 4560 if (*s == 'l')
9ded7720 4561 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4562 else if (*s == 'u')
9ded7720 4563 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4564 else if (*s == 'L')
9ded7720 4565 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4566 else if (*s == 'U')
9ded7720 4567 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4568 else if (*s == 'Q')
9ded7720 4569 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4570 else if (*s == 'F')
4571 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4572 else
5637ef5b 4573 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4574 if (PL_madskills) {
a5849ce5
NC
4575 SV* const tmpsv = newSVpvs("\\ ");
4576 /* replace the space with the character we want to escape
4577 */
4578 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4579 curmad('_', tmpsv);
4580 }
6e909404 4581 PL_bufptr = s + 1;
a0d0e21e 4582 }
79072805 4583 force_next(FUNC);
3280af22
NIS
4584 if (PL_lex_starts) {
4585 s = PL_bufptr;
4586 PL_lex_starts = 0;
5db06880
NC
4587#ifdef PERL_MAD
4588 if (PL_madskills) {
cd81e915
NC
4589 if (PL_thistoken)
4590 sv_free(PL_thistoken);
6b29d1f5 4591 PL_thistoken = newSVpvs("");
5db06880
NC
4592 }
4593#endif
131b3ad0
DM
4594 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4595 if (PL_lex_casemods == 1 && PL_lex_inpat)
4596 OPERATOR(',');
4597 else
4598 Aop(OP_CONCAT);
79072805
LW
4599 }
4600 else
cea2e8a9 4601 return yylex();
79072805
LW
4602 }
4603
55497cff 4604 case LEX_INTERPPUSH:
bbf60fe6 4605 return REPORT(sublex_push());
55497cff 4606
79072805 4607 case LEX_INTERPSTART:
3280af22 4608 if (PL_bufptr == PL_bufend)
bbf60fe6 4609 return REPORT(sublex_done());
607df283 4610 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4611 "### Interpolated variable\n"); });
3280af22
NIS
4612 PL_expect = XTERM;
4613 PL_lex_dojoin = (*PL_bufptr == '@');
4614 PL_lex_state = LEX_INTERPNORMAL;
4615 if (PL_lex_dojoin) {
cd81e915 4616 start_force(PL_curforce);
9ded7720 4617 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4618 force_next(',');
cd81e915 4619 start_force(PL_curforce);
a0d0e21e 4620 force_ident("\"", '$');
cd81e915 4621 start_force(PL_curforce);
9ded7720 4622 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4623 force_next('$');
cd81e915 4624 start_force(PL_curforce);
9ded7720 4625 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4626 force_next((2<<24)|'(');
cd81e915 4627 start_force(PL_curforce);
9ded7720 4628 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4629 force_next(FUNC);
4630 }
3280af22
NIS
4631 if (PL_lex_starts++) {
4632 s = PL_bufptr;
5db06880
NC
4633#ifdef PERL_MAD
4634 if (PL_madskills) {
cd81e915
NC
4635 if (PL_thistoken)
4636 sv_free(PL_thistoken);
6b29d1f5 4637 PL_thistoken = newSVpvs("");
5db06880
NC
4638 }
4639#endif
131b3ad0
DM
4640 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4641 if (!PL_lex_casemods && PL_lex_inpat)
4642 OPERATOR(',');
4643 else
4644 Aop(OP_CONCAT);
79072805 4645 }
cea2e8a9 4646 return yylex();
79072805
LW
4647
4648 case LEX_INTERPENDMAYBE:
3280af22
NIS
4649 if (intuit_more(PL_bufptr)) {
4650 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4651 break;
4652 }
4653 /* FALL THROUGH */
4654
4655 case LEX_INTERPEND:
3280af22
NIS
4656 if (PL_lex_dojoin) {
4657 PL_lex_dojoin = FALSE;
4658 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4659#ifdef PERL_MAD
4660 if (PL_madskills) {
cd81e915
NC
4661 if (PL_thistoken)
4662 sv_free(PL_thistoken);
6b29d1f5 4663 PL_thistoken = newSVpvs("");
5db06880
NC
4664 }
4665#endif
78cdf107 4666 PL_lex_allbrackets--;
bbf60fe6 4667 return REPORT(')');
79072805 4668 }
43a16006 4669 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4670 && SvEVALED(PL_lex_repl))
43a16006 4671 {
e9fa98b2 4672 if (PL_bufptr != PL_bufend)
cea2e8a9 4673 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4674 PL_lex_repl = NULL;
e9fa98b2 4675 }
79072805
LW
4676 /* FALLTHROUGH */
4677 case LEX_INTERPCONCAT:
4678#ifdef DEBUGGING
3280af22 4679 if (PL_lex_brackets)
5637ef5b
NC
4680 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4681 (long) PL_lex_brackets);
79072805 4682#endif
3280af22 4683 if (PL_bufptr == PL_bufend)
bbf60fe6 4684 return REPORT(sublex_done());
79072805 4685
3280af22
NIS
4686 if (SvIVX(PL_linestr) == '\'') {
4687 SV *sv = newSVsv(PL_linestr);
4688 if (!PL_lex_inpat)
76e3520e 4689 sv = tokeq(sv);
3280af22 4690 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4691 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4692 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4693 s = PL_bufend;
79072805
LW
4694 }
4695 else {
3280af22 4696 s = scan_const(PL_bufptr);
79072805 4697 if (*s == '\\')
3280af22 4698 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4699 else
3280af22 4700 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4701 }
4702
3280af22 4703 if (s != PL_bufptr) {
cd81e915 4704 start_force(PL_curforce);
5db06880
NC
4705 if (PL_madskills) {
4706 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4707 }
6154021b 4708 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4709 PL_expect = XTERM;
79072805 4710 force_next(THING);
131b3ad0 4711 if (PL_lex_starts++) {
5db06880
NC
4712#ifdef PERL_MAD
4713 if (PL_madskills) {
cd81e915
NC
4714 if (PL_thistoken)
4715 sv_free(PL_thistoken);
6b29d1f5 4716 PL_thistoken = newSVpvs("");
5db06880
NC
4717 }
4718#endif
131b3ad0
DM
4719 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4720 if (!PL_lex_casemods && PL_lex_inpat)
4721 OPERATOR(',');
4722 else
4723 Aop(OP_CONCAT);
4724 }
79072805 4725 else {
3280af22 4726 PL_bufptr = s;
cea2e8a9 4727 return yylex();
79072805
LW
4728 }
4729 }
4730
cea2e8a9 4731 return yylex();
a0d0e21e 4732 case LEX_FORMLINE:
3280af22
NIS
4733 PL_lex_state = LEX_NORMAL;
4734 s = scan_formline(PL_bufptr);
4735 if (!PL_lex_formbrack)
a0d0e21e
LW
4736 goto rightbracket;
4737 OPERATOR(';');
79072805
LW
4738 }
4739
3280af22
NIS
4740 s = PL_bufptr;
4741 PL_oldoldbufptr = PL_oldbufptr;
4742 PL_oldbufptr = s;
463ee0b2
LW
4743
4744 retry:
5db06880 4745#ifdef PERL_MAD
cd81e915
NC
4746 if (PL_thistoken) {
4747 sv_free(PL_thistoken);
4748 PL_thistoken = 0;
5db06880 4749 }
cd81e915 4750 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4751#endif
378cc40b
LW
4752 switch (*s) {
4753 default:
7e2040f0 4754 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4755 goto keylookup;
b1fc3636
CJ
4756 {
4757 unsigned char c = *s;
4758 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4759 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4760 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4761 } else {
4762 d = PL_linestart;
4763 }
4764 *s = '\0';
4765 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4766 }
e929a76b
LW
4767 case 4:
4768 case 26:
4769 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4770 case 0:
5db06880
NC
4771#ifdef PERL_MAD
4772 if (PL_madskills)
cd81e915 4773 PL_faketokens = 0;
5db06880 4774#endif
60d63348 4775 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
4776 PL_last_uni = 0;
4777 PL_last_lop = 0;
a7aaec61
Z
4778 if (PL_lex_brackets &&
4779 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4780 yyerror((const char *)
4781 (PL_lex_formbrack
4782 ? "Format not terminated"
4783 : "Missing right curly or square bracket"));
c5ee2135 4784 }
4e553d73 4785 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4786 "### Tokener got EOF\n");
5f80b19c 4787 } );
79072805 4788 TOKEN(0);
463ee0b2 4789 }
3280af22 4790 if (s++ < PL_bufend)
a687059c 4791 goto retry; /* ignore stray nulls */
3280af22
NIS
4792 PL_last_uni = 0;
4793 PL_last_lop = 0;
4794 if (!PL_in_eval && !PL_preambled) {
4795 PL_preambled = TRUE;
5db06880
NC
4796#ifdef PERL_MAD
4797 if (PL_madskills)
cd81e915 4798 PL_faketokens = 1;
5db06880 4799#endif
5ab7ff98
NC
4800 if (PL_perldb) {
4801 /* Generate a string of Perl code to load the debugger.
4802 * If PERL5DB is set, it will return the contents of that,
4803 * otherwise a compile-time require of perl5db.pl. */
4804
4805 const char * const pdb = PerlEnv_getenv("PERL5DB");
4806
4807 if (pdb) {
4808 sv_setpv(PL_linestr, pdb);
4809 sv_catpvs(PL_linestr,";");
4810 } else {
4811 SETERRNO(0,SS_NORMAL);
4812 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4813 }
4814 } else
4815 sv_setpvs(PL_linestr,"");
c62eb204
NC
4816 if (PL_preambleav) {
4817 SV **svp = AvARRAY(PL_preambleav);
4818 SV **const end = svp + AvFILLp(PL_preambleav);
4819 while(svp <= end) {
4820 sv_catsv(PL_linestr, *svp);
4821 ++svp;
396482e1 4822 sv_catpvs(PL_linestr, ";");
91b7def8 4823 }
daba3364 4824 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4825 PL_preambleav = NULL;
91b7def8 4826 }
9f639728
FR
4827 if (PL_minus_E)
4828 sv_catpvs(PL_linestr,
4829 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4830 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4831 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4832 if (PL_minus_l)
396482e1 4833 sv_catpvs(PL_linestr,"chomp;");
3280af22 4834 if (PL_minus_a) {
3280af22 4835 if (PL_minus_F) {
3792a11b
NC
4836 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4837 || *PL_splitstr == '"')
3280af22 4838 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4839 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4840 else {
c8ef6a4b
NC
4841 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4842 bytes can be used as quoting characters. :-) */
dd374669 4843 const char *splits = PL_splitstr;
91d456ae 4844 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4845 do {
4846 /* Need to \ \s */
dd374669
AL
4847 if (*splits == '\\')
4848 sv_catpvn(PL_linestr, splits, 1);
4849 sv_catpvn(PL_linestr, splits, 1);
4850 } while (*splits++);
48c4c863
NC
4851 /* This loop will embed the trailing NUL of
4852 PL_linestr as the last thing it does before
4853 terminating. */
396482e1 4854 sv_catpvs(PL_linestr, ");");
54310121 4855 }
2304df62
AD
4856 }
4857 else
396482e1 4858 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4859 }
79072805 4860 }
396482e1 4861 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4862 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4863 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4864 PL_last_lop = PL_last_uni = NULL;
65269a95 4865 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4866 update_debugger_info(PL_linestr, NULL, 0);
79072805 4867 goto retry;
a687059c 4868 }
e929a76b 4869 do {
580561a3
Z
4870 fake_eof = 0;
4871 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4872 if (0) {
7e28d3af 4873 fake_eof:
f0e67a1d
Z
4874 fake_eof = LEX_FAKE_EOF;
4875 }
4876 PL_bufptr = PL_bufend;
17cc9359 4877 CopLINE_inc(PL_curcop);
f0e67a1d 4878 if (!lex_next_chunk(fake_eof)) {
17cc9359 4879 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4880 s = PL_bufptr;
4881 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4882 }
17cc9359 4883 CopLINE_dec(PL_curcop);
5db06880 4884#ifdef PERL_MAD
f0e67a1d 4885 if (!PL_rsfp)
cd81e915 4886 PL_realtokenstart = -1;
5db06880 4887#endif
f0e67a1d 4888 s = PL_bufptr;
7aa207d6
JH
4889 /* If it looks like the start of a BOM or raw UTF-16,
4890 * check if it in fact is. */
580561a3 4891 if (bof && PL_rsfp &&
7aa207d6
JH
4892 (*s == 0 ||
4893 *(U8*)s == 0xEF ||
4894 *(U8*)s >= 0xFE ||
4895 s[1] == 0)) {
879bc93b
DM
4896 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4897 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4898#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4899 /* offset may include swallowed CR */
4900 if (!bof)
879bc93b 4901 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4902#endif
7e28d3af 4903 if (bof) {
3280af22 4904 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4905 s = swallow_bom((U8*)s);
e929a76b 4906 }
378cc40b 4907 }
737c24fc 4908 if (PL_parser->in_pod) {
a0d0e21e 4909 /* Incest with pod. */
5db06880
NC
4910#ifdef PERL_MAD
4911 if (PL_madskills)
cd81e915 4912 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4913#endif
01a57ef7 4914 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4915 sv_setpvs(PL_linestr, "");
3280af22
NIS
4916 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4917 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4918 PL_last_lop = PL_last_uni = NULL;
737c24fc 4919 PL_parser->in_pod = 0;
a0d0e21e 4920 }
4e553d73 4921 }
60d63348 4922 if (PL_rsfp || PL_parser->filtered)
85613cab 4923 incline(s);
737c24fc 4924 } while (PL_parser->in_pod);
3280af22 4925 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4926 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4927 PL_last_lop = PL_last_uni = NULL;
57843af0 4928 if (CopLINE(PL_curcop) == 1) {
3280af22 4929 while (s < PL_bufend && isSPACE(*s))
79072805 4930 s++;
a0d0e21e 4931 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4932 s++;
5db06880
NC
4933#ifdef PERL_MAD
4934 if (PL_madskills)
cd81e915 4935 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4936#endif
bd61b366 4937 d = NULL;
3280af22 4938 if (!PL_in_eval) {
44a8e56a 4939 if (*s == '#' && *(s+1) == '!')
4940 d = s + 2;
4941#ifdef ALTERNATE_SHEBANG
4942 else {
bfed75c6 4943 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4944 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4945 d = s + (sizeof(as) - 1);
4946 }
4947#endif /* ALTERNATE_SHEBANG */
4948 }
4949 if (d) {
b8378b72 4950 char *ipath;
774d564b 4951 char *ipathend;
b8378b72 4952
774d564b 4953 while (isSPACE(*d))
b8378b72
CS
4954 d++;
4955 ipath = d;
774d564b 4956 while (*d && !isSPACE(*d))
4957 d++;
4958 ipathend = d;
4959
4960#ifdef ARG_ZERO_IS_SCRIPT
4961 if (ipathend > ipath) {
4962 /*
4963 * HP-UX (at least) sets argv[0] to the script name,
4964 * which makes $^X incorrect. And Digital UNIX and Linux,
4965 * at least, set argv[0] to the basename of the Perl
4966 * interpreter. So, having found "#!", we'll set it right.
4967 */
fafc274c
NC
4968 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4969 SVt_PV)); /* $^X */
774d564b 4970 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4971 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4972 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4973 SvSETMAGIC(x);
4974 }
556c1dec
JH
4975 else {
4976 STRLEN blen;
4977 STRLEN llen;
cfd0369c 4978 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4979 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4980 if (llen < blen) {
4981 bstart += blen - llen;
4982 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4983 sv_setpvn(x, ipath, ipathend - ipath);
4984 SvSETMAGIC(x);
4985 }
4986 }
4987 }
774d564b 4988 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4989 }
774d564b 4990#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4991
4992 /*
4993 * Look for options.
4994 */
748a9306 4995 d = instr(s,"perl -");
84e30d1a 4996 if (!d) {
748a9306 4997 d = instr(s,"perl");
84e30d1a
GS
4998#if defined(DOSISH)
4999 /* avoid getting into infinite loops when shebang
5000 * line contains "Perl" rather than "perl" */
5001 if (!d) {
5002 for (d = ipathend-4; d >= ipath; --d) {
5003 if ((*d == 'p' || *d == 'P')
5004 && !ibcmp(d, "perl", 4))
5005 {
5006 break;
5007 }
5008 }
5009 if (d < ipath)
bd61b366 5010 d = NULL;
84e30d1a
GS
5011 }
5012#endif
5013 }
44a8e56a 5014#ifdef ALTERNATE_SHEBANG
5015 /*
5016 * If the ALTERNATE_SHEBANG on this system starts with a
5017 * character that can be part of a Perl expression, then if
5018 * we see it but not "perl", we're probably looking at the
5019 * start of Perl code, not a request to hand off to some
5020 * other interpreter. Similarly, if "perl" is there, but
5021 * not in the first 'word' of the line, we assume the line
5022 * contains the start of the Perl program.
44a8e56a 5023 */
5024 if (d && *s != '#') {
f54cb97a 5025 const char *c = ipath;
44a8e56a 5026 while (*c && !strchr("; \t\r\n\f\v#", *c))
5027 c++;
5028 if (c < d)
bd61b366 5029 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5030 else
5031 *s = '#'; /* Don't try to parse shebang line */
5032 }
774d564b 5033#endif /* ALTERNATE_SHEBANG */
748a9306 5034 if (!d &&
44a8e56a 5035 *s == '#' &&
774d564b 5036 ipathend > ipath &&
3280af22 5037 !PL_minus_c &&
748a9306 5038 !instr(s,"indir") &&
3280af22 5039 instr(PL_origargv[0],"perl"))
748a9306 5040 {
27da23d5 5041 dVAR;
9f68db38 5042 char **newargv;
9f68db38 5043
774d564b 5044 *ipathend = '\0';
5045 s = ipathend + 1;
3280af22 5046 while (s < PL_bufend && isSPACE(*s))
9f68db38 5047 s++;
3280af22 5048 if (s < PL_bufend) {
d85f917e 5049 Newx(newargv,PL_origargc+3,char*);
9f68db38 5050 newargv[1] = s;
3280af22 5051 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5052 s++;
5053 *s = '\0';
3280af22 5054 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5055 }
5056 else
3280af22 5057 newargv = PL_origargv;
774d564b 5058 newargv[0] = ipath;
b35112e7 5059 PERL_FPU_PRE_EXEC
b4748376 5060 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5061 PERL_FPU_POST_EXEC
cea2e8a9 5062 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5063 }
748a9306 5064 if (d) {
c35e046a
AL
5065 while (*d && !isSPACE(*d))
5066 d++;
5067 while (SPACE_OR_TAB(*d))
5068 d++;
748a9306
LW
5069
5070 if (*d++ == '-') {
f54cb97a 5071 const bool switches_done = PL_doswitches;
fb993905
GA
5072 const U32 oldpdb = PL_perldb;
5073 const bool oldn = PL_minus_n;
5074 const bool oldp = PL_minus_p;
c7030b81 5075 const char *d1 = d;
fb993905 5076
8cc95fdb 5077 do {
4ba71d51
FC
5078 bool baduni = FALSE;
5079 if (*d1 == 'C') {
bd0ab00d
NC
5080 const char *d2 = d1 + 1;
5081 if (parse_unicode_opts((const char **)&d2)
5082 != PL_unicode)
5083 baduni = TRUE;
4ba71d51
FC
5084 }
5085 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5086 const char * const m = d1;
5087 while (*d1 && !isSPACE(*d1))
5088 d1++;
cea2e8a9 5089 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5090 (int)(d1 - m), m);
8cc95fdb 5091 }
c7030b81
NC
5092 d1 = moreswitches(d1);
5093 } while (d1);
f0b2cf55
YST
5094 if (PL_doswitches && !switches_done) {
5095 int argc = PL_origargc;
5096 char **argv = PL_origargv;
5097 do {
5098 argc--,argv++;
5099 } while (argc && argv[0][0] == '-' && argv[0][1]);
5100 init_argv_symbols(argc,argv);
5101 }
65269a95 5102 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5103 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5104 /* if we have already added "LINE: while (<>) {",
5105 we must not do it again */
748a9306 5106 {
76f68e9b 5107 sv_setpvs(PL_linestr, "");
3280af22
NIS
5108 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5109 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5110 PL_last_lop = PL_last_uni = NULL;
3280af22 5111 PL_preambled = FALSE;
65269a95 5112 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5113 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5114 goto retry;
5115 }
a0d0e21e 5116 }
79072805 5117 }
9f68db38 5118 }
79072805 5119 }
3280af22
NIS
5120 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5121 PL_bufptr = s;
5122 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5123 return yylex();
ae986130 5124 }
378cc40b 5125 goto retry;
4fdae800 5126 case '\r':
6a27c188 5127#ifdef PERL_STRICT_CR
cea2e8a9 5128 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5129 Perl_croak(aTHX_
cc507455 5130 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5131#endif
4fdae800 5132 case ' ': case '\t': case '\f': case 013:
5db06880 5133#ifdef PERL_MAD
cd81e915 5134 PL_realtokenstart = -1;
ac372eb8
RD
5135 if (!PL_thiswhite)
5136 PL_thiswhite = newSVpvs("");
5137 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5138#endif
ac372eb8 5139 s++;
378cc40b 5140 goto retry;
378cc40b 5141 case '#':
e929a76b 5142 case '\n':
5db06880 5143#ifdef PERL_MAD
cd81e915 5144 PL_realtokenstart = -1;
5db06880 5145 if (PL_madskills)
cd81e915 5146 PL_faketokens = 0;
5db06880 5147#endif
60d63348
FC
5148 if (PL_lex_state != LEX_NORMAL ||
5149 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5150 if (*s == '#' && s == PL_linestart && PL_in_eval
5151 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5152 /* handle eval qq[#line 1 "foo"\n ...] */
5153 CopLINE_dec(PL_curcop);
5154 incline(s);
5155 }
5db06880
NC
5156 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5157 s = SKIPSPACE0(s);
60d63348 5158 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5159 incline(s);
5160 }
5161 else {
5162 d = s;
5163 while (d < PL_bufend && *d != '\n')
5164 d++;
5165 if (d < PL_bufend)
5166 d++;
5167 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5168 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5169 d, PL_bufend);
5db06880
NC
5170#ifdef PERL_MAD
5171 if (PL_madskills)
cd81e915 5172 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5173#endif
5174 s = d;
5175 incline(s);
5176 }
3280af22
NIS
5177 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5178 PL_bufptr = s;
5179 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5180 return yylex();
a687059c 5181 }
378cc40b 5182 }
a687059c 5183 else {
5db06880
NC
5184#ifdef PERL_MAD
5185 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5186 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5187 PL_faketokens = 0;
5db06880
NC
5188 s = SKIPSPACE0(s);
5189 TOKEN(PEG); /* make sure any #! line is accessible */
5190 }
5191 s = SKIPSPACE0(s);
5192 }
5193 else {
5194/* if (PL_madskills && PL_lex_formbrack) { */
5195 d = s;
5196 while (d < PL_bufend && *d != '\n')
5197 d++;
5198 if (d < PL_bufend)
5199 d++;
5200 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5201 Perl_croak(aTHX_ "panic: input overflow");
5202 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5203 if (!PL_thiswhite)
6b29d1f5 5204 PL_thiswhite = newSVpvs("");
5db06880 5205 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5206 sv_setpvs(PL_thiswhite, "");
cd81e915 5207 PL_faketokens = 0;
5db06880 5208 }
cd81e915 5209 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5210 }
5211 s = d;
5212/* }
5213 *s = '\0';
5214 PL_bufend = s; */
5215 }
5216#else
378cc40b 5217 *s = '\0';
3280af22 5218 PL_bufend = s;
5db06880 5219#endif
a687059c 5220 }
378cc40b
LW
5221 goto retry;
5222 case '-':
79072805 5223 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5224 I32 ftst = 0;
90771dc0 5225 char tmp;
e5edeb50 5226
378cc40b 5227 s++;
3280af22 5228 PL_bufptr = s;
748a9306
LW
5229 tmp = *s++;
5230
bf4acbe4 5231 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5232 s++;
5233
5234 if (strnEQ(s,"=>",2)) {
3280af22 5235 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5236 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5237 OPERATOR('-'); /* unary minus */
5238 }
3280af22 5239 PL_last_uni = PL_oldbufptr;
748a9306 5240 switch (tmp) {
e5edeb50
JH
5241 case 'r': ftst = OP_FTEREAD; break;
5242 case 'w': ftst = OP_FTEWRITE; break;
5243 case 'x': ftst = OP_FTEEXEC; break;
5244 case 'o': ftst = OP_FTEOWNED; break;
5245 case 'R': ftst = OP_FTRREAD; break;
5246 case 'W': ftst = OP_FTRWRITE; break;
5247 case 'X': ftst = OP_FTREXEC; break;
5248 case 'O': ftst = OP_FTROWNED; break;
5249 case 'e': ftst = OP_FTIS; break;
5250 case 'z': ftst = OP_FTZERO; break;
5251 case 's': ftst = OP_FTSIZE; break;
5252 case 'f': ftst = OP_FTFILE; break;
5253 case 'd': ftst = OP_FTDIR; break;
5254 case 'l': ftst = OP_FTLINK; break;
5255 case 'p': ftst = OP_FTPIPE; break;
5256 case 'S': ftst = OP_FTSOCK; break;
5257 case 'u': ftst = OP_FTSUID; break;
5258 case 'g': ftst = OP_FTSGID; break;
5259 case 'k': ftst = OP_FTSVTX; break;
5260 case 'b': ftst = OP_FTBLK; break;
5261 case 'c': ftst = OP_FTCHR; break;
5262 case 't': ftst = OP_FTTTY; break;
5263 case 'T': ftst = OP_FTTEXT; break;
5264 case 'B': ftst = OP_FTBINARY; break;
5265 case 'M': case 'A': case 'C':
fafc274c 5266 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5267 switch (tmp) {
5268 case 'M': ftst = OP_FTMTIME; break;
5269 case 'A': ftst = OP_FTATIME; break;
5270 case 'C': ftst = OP_FTCTIME; break;
5271 default: break;
5272 }
5273 break;
378cc40b 5274 default:
378cc40b
LW
5275 break;
5276 }
e5edeb50 5277 if (ftst) {
eb160463 5278 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5279 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5280 "### Saw file test %c\n", (int)tmp);
5f80b19c 5281 } );
e5edeb50
JH
5282 FTST(ftst);
5283 }
5284 else {
5285 /* Assume it was a minus followed by a one-letter named
5286 * subroutine call (or a -bareword), then. */
95c31fe3 5287 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5288 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5289 (int) tmp);
5f80b19c 5290 } );
3cf7b4c4 5291 s = --PL_bufptr;
e5edeb50 5292 }
378cc40b 5293 }
90771dc0
NC
5294 {
5295 const char tmp = *s++;
5296 if (*s == tmp) {
5297 s++;
5298 if (PL_expect == XOPERATOR)
5299 TERM(POSTDEC);
5300 else
5301 OPERATOR(PREDEC);
5302 }
5303 else if (*s == '>') {
5304 s++;
29595ff2 5305 s = SKIPSPACE1(s);
90771dc0
NC
5306 if (isIDFIRST_lazy_if(s,UTF)) {
5307 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5308 TOKEN(ARROW);
5309 }
5310 else if (*s == '$')
5311 OPERATOR(ARROW);
5312 else
5313 TERM(ARROW);
5314 }
78cdf107
Z
5315 if (PL_expect == XOPERATOR) {
5316 if (*s == '=' && !PL_lex_allbrackets &&
5317 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5318 s--;
5319 TOKEN(0);
5320 }
90771dc0 5321 Aop(OP_SUBTRACT);
78cdf107 5322 }
90771dc0
NC
5323 else {
5324 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5325 check_uni();
5326 OPERATOR('-'); /* unary minus */
79072805 5327 }
2f3197b3 5328 }
79072805 5329
378cc40b 5330 case '+':
90771dc0
NC
5331 {
5332 const char tmp = *s++;
5333 if (*s == tmp) {
5334 s++;
5335 if (PL_expect == XOPERATOR)
5336 TERM(POSTINC);
5337 else
5338 OPERATOR(PREINC);
5339 }
78cdf107
Z
5340 if (PL_expect == XOPERATOR) {
5341 if (*s == '=' && !PL_lex_allbrackets &&
5342 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5343 s--;
5344 TOKEN(0);
5345 }
90771dc0 5346 Aop(OP_ADD);
78cdf107 5347 }
90771dc0
NC
5348 else {
5349 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5350 check_uni();
5351 OPERATOR('+');
5352 }
2f3197b3 5353 }
a687059c 5354
378cc40b 5355 case '*':
3280af22
NIS
5356 if (PL_expect != XOPERATOR) {
5357 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5358 PL_expect = XOPERATOR;
5359 force_ident(PL_tokenbuf, '*');
5360 if (!*PL_tokenbuf)
a0d0e21e 5361 PREREF('*');
79072805 5362 TERM('*');
a687059c 5363 }
79072805
LW
5364 s++;
5365 if (*s == '*') {
a687059c 5366 s++;
78cdf107
Z
5367 if (*s == '=' && !PL_lex_allbrackets &&
5368 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5369 s -= 2;
5370 TOKEN(0);
5371 }
79072805 5372 PWop(OP_POW);
a687059c 5373 }
78cdf107
Z
5374 if (*s == '=' && !PL_lex_allbrackets &&
5375 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5376 s--;
5377 TOKEN(0);
5378 }
79072805
LW
5379 Mop(OP_MULTIPLY);
5380
378cc40b 5381 case '%':
3280af22 5382 if (PL_expect == XOPERATOR) {
78cdf107
Z
5383 if (s[1] == '=' && !PL_lex_allbrackets &&
5384 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5385 TOKEN(0);
bbce6d69 5386 ++s;
5387 Mop(OP_MODULO);
a687059c 5388 }
3280af22 5389 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5390 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5391 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5392 if (!PL_tokenbuf[1]) {
bbce6d69 5393 PREREF('%');
a687059c 5394 }
3280af22 5395 PL_pending_ident = '%';
bbce6d69 5396 TERM('%');
a687059c 5397
378cc40b 5398 case '^':
78cdf107
Z
5399 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5400 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5401 TOKEN(0);
79072805 5402 s++;
a0d0e21e 5403 BOop(OP_BIT_XOR);
79072805 5404 case '[':
a7aaec61
Z
5405 if (PL_lex_brackets > 100)
5406 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5407 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5408 PL_lex_allbrackets++;
df3467db
IG
5409 {
5410 const char tmp = *s++;
5411 OPERATOR(tmp);
5412 }
378cc40b 5413 case '~':
0d863452 5414 if (s[1] == '~'
3e7dd34d 5415 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5416 {
78cdf107
Z
5417 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5418 TOKEN(0);
0d863452
RH
5419 s += 2;
5420 Eop(OP_SMARTMATCH);
5421 }
78cdf107
Z
5422 s++;
5423 OPERATOR('~');
378cc40b 5424 case ',':
78cdf107
Z
5425 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5426 TOKEN(0);
5427 s++;
5428 OPERATOR(',');
a0d0e21e
LW
5429 case ':':
5430 if (s[1] == ':') {
5431 len = 0;
0bfa2a8a 5432 goto just_a_word_zero_gv;
a0d0e21e
LW
5433 }
5434 s++;
09bef843
SB
5435 switch (PL_expect) {
5436 OP *attrs;
5db06880
NC
5437#ifdef PERL_MAD
5438 I32 stuffstart;
5439#endif
09bef843
SB
5440 case XOPERATOR:
5441 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5442 break;
5443 PL_bufptr = s; /* update in case we back off */
d83f38d8 5444 if (*s == '=') {
2dc78664
NC
5445 Perl_croak(aTHX_
5446 "Use of := for an empty attribute list is not allowed");
d83f38d8 5447 }
09bef843
SB
5448 goto grabattrs;
5449 case XATTRBLOCK:
5450 PL_expect = XBLOCK;
5451 goto grabattrs;
5452 case XATTRTERM:
5453 PL_expect = XTERMBLOCK;
5454 grabattrs:
5db06880
NC
5455#ifdef PERL_MAD
5456 stuffstart = s - SvPVX(PL_linestr) - 1;
5457#endif
29595ff2 5458 s = PEEKSPACE(s);
5f66b61c 5459 attrs = NULL;
7e2040f0 5460 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5461 I32 tmp;
5cc237b8 5462 SV *sv;
09bef843 5463 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5464 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5465 if (tmp < 0) tmp = -tmp;
5466 switch (tmp) {
5467 case KEY_or:
5468 case KEY_and:
5469 case KEY_for:
11baf631 5470 case KEY_foreach:
f9829d6b
GS
5471 case KEY_unless:
5472 case KEY_if:
5473 case KEY_while:
5474 case KEY_until:
5475 goto got_attrs;
5476 default:
5477 break;
5478 }
5479 }
89a5757c 5480 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843
SB
5481 if (*d == '(') {
5482 d = scan_str(d,TRUE,TRUE);
5483 if (!d) {
09bef843
SB
5484 /* MUST advance bufptr here to avoid bogus
5485 "at end of line" context messages from yyerror().
5486 */
5487 PL_bufptr = s + len;
5488 yyerror("Unterminated attribute parameter in attribute list");
5489 if (attrs)
5490 op_free(attrs);
5cc237b8 5491 sv_free(sv);
bbf60fe6 5492 return REPORT(0); /* EOF indicator */
09bef843
SB
5493 }
5494 }
5495 if (PL_lex_stuff) {
09bef843 5496 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5497 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5498 newSVOP(OP_CONST, 0, sv));
5499 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5500 PL_lex_stuff = NULL;
09bef843
SB
5501 }
5502 else {
5cc237b8
BS
5503 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5504 sv_free(sv);
1108974d 5505 if (PL_in_my == KEY_our) {
df9a6019 5506 deprecate(":unique");
1108974d 5507 }
bfed75c6 5508 else
371fce9b
DM
5509 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5510 }
5511
d3cea301
SB
5512 /* NOTE: any CV attrs applied here need to be part of
5513 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5514 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5515 sv_free(sv);
78f9721b 5516 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5517 }
5518 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5519 sv_free(sv);
8e5dadda 5520 deprecate(":locked");
5cc237b8
BS
5521 }
5522 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5523 sv_free(sv);
78f9721b 5524 CvMETHOD_on(PL_compcv);
5cc237b8 5525 }
78f9721b
SM
5526 /* After we've set the flags, it could be argued that
5527 we don't need to do the attributes.pm-based setting
5528 process, and shouldn't bother appending recognized
d3cea301
SB
5529 flags. To experiment with that, uncomment the
5530 following "else". (Note that's already been
5531 uncommented. That keeps the above-applied built-in
5532 attributes from being intercepted (and possibly
5533 rejected) by a package's attribute routines, but is
5534 justified by the performance win for the common case
5535 of applying only built-in attributes.) */
0256094b 5536 else
2fcb4757 5537 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5538 newSVOP(OP_CONST, 0,
5cc237b8 5539 sv));
09bef843 5540 }
29595ff2 5541 s = PEEKSPACE(d);
0120eecf 5542 if (*s == ':' && s[1] != ':')
29595ff2 5543 s = PEEKSPACE(s+1);
0120eecf
GS
5544 else if (s == d)
5545 break; /* require real whitespace or :'s */
29595ff2 5546 /* XXX losing whitespace on sequential attributes here */
09bef843 5547 }
90771dc0
NC
5548 {
5549 const char tmp
5550 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5551 if (*s != ';' && *s != '}' && *s != tmp
5552 && (tmp != '=' || *s != ')')) {
5553 const char q = ((*s == '\'') ? '"' : '\'');
5554 /* If here for an expression, and parsed no attrs, back
5555 off. */
5556 if (tmp == '=' && !attrs) {
5557 s = PL_bufptr;
5558 break;
5559 }
5560 /* MUST advance bufptr here to avoid bogus "at end of line"
5561 context messages from yyerror().
5562 */
5563 PL_bufptr = s;
10edeb5d
JH
5564 yyerror( (const char *)
5565 (*s
5566 ? Perl_form(aTHX_ "Invalid separator character "
5567 "%c%c%c in attribute list", q, *s, q)
5568 : "Unterminated attribute list" ) );
90771dc0
NC
5569 if (attrs)
5570 op_free(attrs);
5571 OPERATOR(':');
09bef843 5572 }
09bef843 5573 }
f9829d6b 5574 got_attrs:
09bef843 5575 if (attrs) {
cd81e915 5576 start_force(PL_curforce);
9ded7720 5577 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5578 CURMAD('_', PL_nextwhite);
89122651 5579 force_next(THING);
5db06880
NC
5580 }
5581#ifdef PERL_MAD
5582 if (PL_madskills) {
cd81e915 5583 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5584 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5585 }
5db06880 5586#endif
09bef843
SB
5587 TOKEN(COLONATTR);
5588 }
78cdf107
Z
5589 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5590 s--;
5591 TOKEN(0);
5592 }
5593 PL_lex_allbrackets--;
a0d0e21e 5594 OPERATOR(':');
8990e307
LW
5595 case '(':
5596 s++;
3280af22
NIS
5597 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5598 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5599 else
3280af22 5600 PL_expect = XTERM;
29595ff2 5601 s = SKIPSPACE1(s);
78cdf107 5602 PL_lex_allbrackets++;
a0d0e21e 5603 TOKEN('(');
378cc40b 5604 case ';':
78cdf107
Z
5605 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5606 TOKEN(0);
f4dd75d9 5607 CLINE;
78cdf107
Z
5608 s++;
5609 OPERATOR(';');
378cc40b 5610 case ')':
78cdf107
Z
5611 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5612 TOKEN(0);
5613 s++;
5614 PL_lex_allbrackets--;
5615 s = SKIPSPACE1(s);
5616 if (*s == '{')
5617 PREBLOCK(')');
5618 TERM(')');
79072805 5619 case ']':
a7aaec61
Z
5620 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5621 TOKEN(0);
79072805 5622 s++;
3280af22 5623 if (PL_lex_brackets <= 0)
d98d5fff 5624 yyerror("Unmatched right square bracket");
463ee0b2 5625 else
3280af22 5626 --PL_lex_brackets;
78cdf107 5627 PL_lex_allbrackets--;
3280af22
NIS
5628 if (PL_lex_state == LEX_INTERPNORMAL) {
5629 if (PL_lex_brackets == 0) {
02255c60
FC
5630 if (*s == '-' && s[1] == '>')
5631 PL_lex_state = LEX_INTERPENDMAYBE;
5632 else if (*s != '[' && *s != '{')
3280af22 5633 PL_lex_state = LEX_INTERPEND;
79072805
LW
5634 }
5635 }
4633a7c4 5636 TERM(']');
79072805
LW
5637 case '{':
5638 leftbracket:
79072805 5639 s++;
3280af22 5640 if (PL_lex_brackets > 100) {
8edd5f42 5641 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5642 }
3280af22 5643 switch (PL_expect) {
a0d0e21e 5644 case XTERM:
3280af22 5645 if (PL_lex_formbrack) {
a0d0e21e
LW
5646 s--;
5647 PRETERMBLOCK(DO);
5648 }
3280af22
NIS
5649 if (PL_oldoldbufptr == PL_last_lop)
5650 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5651 else
3280af22 5652 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5653 PL_lex_allbrackets++;
79072805 5654 OPERATOR(HASHBRACK);
a0d0e21e 5655 case XOPERATOR:
bf4acbe4 5656 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5657 s++;
44a8e56a 5658 d = s;
3280af22
NIS
5659 PL_tokenbuf[0] = '\0';
5660 if (d < PL_bufend && *d == '-') {
5661 PL_tokenbuf[0] = '-';
44a8e56a 5662 d++;
bf4acbe4 5663 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5664 d++;
5665 }
7e2040f0 5666 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5667 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5668 FALSE, &len);
bf4acbe4 5669 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5670 d++;
5671 if (*d == '}') {
f54cb97a 5672 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5673 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5674 if (minus)
5675 force_next('-');
748a9306
LW
5676 }
5677 }
5678 /* FALL THROUGH */
09bef843 5679 case XATTRBLOCK:
748a9306 5680 case XBLOCK:
3280af22 5681 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5682 PL_lex_allbrackets++;
3280af22 5683 PL_expect = XSTATE;
a0d0e21e 5684 break;
09bef843 5685 case XATTRTERM:
a0d0e21e 5686 case XTERMBLOCK:
3280af22 5687 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5688 PL_lex_allbrackets++;
3280af22 5689 PL_expect = XSTATE;
a0d0e21e
LW
5690 break;
5691 default: {
f54cb97a 5692 const char *t;
3280af22
NIS
5693 if (PL_oldoldbufptr == PL_last_lop)
5694 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5695 else
3280af22 5696 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5697 PL_lex_allbrackets++;
29595ff2 5698 s = SKIPSPACE1(s);
8452ff4b
SB
5699 if (*s == '}') {
5700 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5701 PL_expect = XTERM;
5702 /* This hack is to get the ${} in the message. */
5703 PL_bufptr = s+1;
5704 yyerror("syntax error");
5705 break;
5706 }
a0d0e21e 5707 OPERATOR(HASHBRACK);
8452ff4b 5708 }
b8a4b1be
GS
5709 /* This hack serves to disambiguate a pair of curlies
5710 * as being a block or an anon hash. Normally, expectation
5711 * determines that, but in cases where we're not in a
5712 * position to expect anything in particular (like inside
5713 * eval"") we have to resolve the ambiguity. This code
5714 * covers the case where the first term in the curlies is a
5715 * quoted string. Most other cases need to be explicitly
a0288114 5716 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5717 * curly in order to force resolution as an anon hash.
5718 *
5719 * XXX should probably propagate the outer expectation
5720 * into eval"" to rely less on this hack, but that could
5721 * potentially break current behavior of eval"".
5722 * GSAR 97-07-21
5723 */
5724 t = s;
5725 if (*s == '\'' || *s == '"' || *s == '`') {
5726 /* common case: get past first string, handling escapes */
3280af22 5727 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5728 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5729 t++;
5730 t++;
a0d0e21e 5731 }
b8a4b1be 5732 else if (*s == 'q') {
3280af22 5733 if (++t < PL_bufend
b8a4b1be 5734 && (!isALNUM(*t)
3280af22 5735 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5736 && !isALNUM(*t))))
5737 {
abc667d1 5738 /* skip q//-like construct */
f54cb97a 5739 const char *tmps;
b8a4b1be
GS
5740 char open, close, term;
5741 I32 brackets = 1;
5742
3280af22 5743 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5744 t++;
abc667d1
DM
5745 /* check for q => */
5746 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5747 OPERATOR(HASHBRACK);
5748 }
b8a4b1be
GS
5749 term = *t;
5750 open = term;
5751 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5752 term = tmps[5];
5753 close = term;
5754 if (open == close)
3280af22
NIS
5755 for (t++; t < PL_bufend; t++) {
5756 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5757 t++;
6d07e5e9 5758 else if (*t == open)
b8a4b1be
GS
5759 break;
5760 }
abc667d1 5761 else {
3280af22
NIS
5762 for (t++; t < PL_bufend; t++) {
5763 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5764 t++;
6d07e5e9 5765 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5766 break;
5767 else if (*t == open)
5768 brackets++;
5769 }
abc667d1
DM
5770 }
5771 t++;
b8a4b1be 5772 }
abc667d1
DM
5773 else
5774 /* skip plain q word */
5775 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5776 t += UTF8SKIP(t);
a0d0e21e 5777 }
7e2040f0 5778 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5779 t += UTF8SKIP(t);
7e2040f0 5780 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5781 t += UTF8SKIP(t);
a0d0e21e 5782 }
3280af22 5783 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5784 t++;
b8a4b1be
GS
5785 /* if comma follows first term, call it an anon hash */
5786 /* XXX it could be a comma expression with loop modifiers */
3280af22 5787 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5788 || (*t == '=' && t[1] == '>')))
a0d0e21e 5789 OPERATOR(HASHBRACK);
3280af22 5790 if (PL_expect == XREF)
4e4e412b 5791 PL_expect = XTERM;
a0d0e21e 5792 else {
3280af22
NIS
5793 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5794 PL_expect = XSTATE;
a0d0e21e 5795 }
8990e307 5796 }
a0d0e21e 5797 break;
463ee0b2 5798 }
6154021b 5799 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5800 if (isSPACE(*s) || *s == '#')
3280af22 5801 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5802 TOKEN('{');
378cc40b 5803 case '}':
a7aaec61
Z
5804 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5805 TOKEN(0);
79072805
LW
5806 rightbracket:
5807 s++;
3280af22 5808 if (PL_lex_brackets <= 0)
d98d5fff 5809 yyerror("Unmatched right curly bracket");
463ee0b2 5810 else
3280af22 5811 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5812 PL_lex_allbrackets--;
c2e66d9e 5813 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5814 PL_lex_formbrack = 0;
5815 if (PL_lex_state == LEX_INTERPNORMAL) {
5816 if (PL_lex_brackets == 0) {
9059aa12
LW
5817 if (PL_expect & XFAKEBRACK) {
5818 PL_expect &= XENUMMASK;
3280af22
NIS
5819 PL_lex_state = LEX_INTERPEND;
5820 PL_bufptr = s;
5db06880
NC
5821#if 0
5822 if (PL_madskills) {
cd81e915 5823 if (!PL_thiswhite)
6b29d1f5 5824 PL_thiswhite = newSVpvs("");
76f68e9b 5825 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5826 }
5827#endif
cea2e8a9 5828 return yylex(); /* ignore fake brackets */
79072805 5829 }
fa83b5b6 5830 if (*s == '-' && s[1] == '>')
3280af22 5831 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5832 else if (*s != '[' && *s != '{')
3280af22 5833 PL_lex_state = LEX_INTERPEND;
79072805
LW
5834 }
5835 }
9059aa12
LW
5836 if (PL_expect & XFAKEBRACK) {
5837 PL_expect &= XENUMMASK;
3280af22 5838 PL_bufptr = s;
cea2e8a9 5839 return yylex(); /* ignore fake brackets */
748a9306 5840 }
cd81e915 5841 start_force(PL_curforce);
5db06880
NC
5842 if (PL_madskills) {
5843 curmad('X', newSVpvn(s-1,1));
cd81e915 5844 CURMAD('_', PL_thiswhite);
5db06880 5845 }
79072805 5846 force_next('}');
5db06880 5847#ifdef PERL_MAD
cd81e915 5848 if (!PL_thistoken)
6b29d1f5 5849 PL_thistoken = newSVpvs("");
5db06880 5850#endif
79072805 5851 TOKEN(';');
378cc40b
LW
5852 case '&':
5853 s++;
78cdf107
Z
5854 if (*s++ == '&') {
5855 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5856 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5857 s -= 2;
5858 TOKEN(0);
5859 }
a0d0e21e 5860 AOPERATOR(ANDAND);
78cdf107 5861 }
378cc40b 5862 s--;
3280af22 5863 if (PL_expect == XOPERATOR) {
041457d9
DM
5864 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5865 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5866 {
57843af0 5867 CopLINE_dec(PL_curcop);
f1f66076 5868 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5869 CopLINE_inc(PL_curcop);
463ee0b2 5870 }
78cdf107
Z
5871 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5872 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5873 s--;
5874 TOKEN(0);
5875 }
79072805 5876 BAop(OP_BIT_AND);
463ee0b2 5877 }
79072805 5878
3280af22
NIS
5879 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5880 if (*PL_tokenbuf) {
5881 PL_expect = XOPERATOR;
5882 force_ident(PL_tokenbuf, '&');
463ee0b2 5883 }
79072805
LW
5884 else
5885 PREREF('&');
6154021b 5886 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5887 TERM('&');
5888
378cc40b
LW
5889 case '|':
5890 s++;
78cdf107
Z
5891 if (*s++ == '|') {
5892 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5893 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5894 s -= 2;
5895 TOKEN(0);
5896 }
a0d0e21e 5897 AOPERATOR(OROR);
78cdf107 5898 }
378cc40b 5899 s--;
78cdf107
Z
5900 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5901 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5902 s--;
5903 TOKEN(0);
5904 }
79072805 5905 BOop(OP_BIT_OR);
378cc40b
LW
5906 case '=':
5907 s++;
748a9306 5908 {
90771dc0 5909 const char tmp = *s++;
78cdf107
Z
5910 if (tmp == '=') {
5911 if (!PL_lex_allbrackets &&
5912 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5913 s -= 2;
5914 TOKEN(0);
5915 }
90771dc0 5916 Eop(OP_EQ);
78cdf107
Z
5917 }
5918 if (tmp == '>') {
5919 if (!PL_lex_allbrackets &&
5920 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5921 s -= 2;
5922 TOKEN(0);
5923 }
90771dc0 5924 OPERATOR(',');
78cdf107 5925 }
90771dc0
NC
5926 if (tmp == '~')
5927 PMop(OP_MATCH);
5928 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5929 && strchr("+-*/%.^&|<",tmp))
5930 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5931 "Reversed %c= operator",(int)tmp);
5932 s--;
5933 if (PL_expect == XSTATE && isALPHA(tmp) &&
5934 (s == PL_linestart+1 || s[-2] == '\n') )
5935 {
60d63348 5936 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
90771dc0
NC
5937 d = PL_bufend;
5938 while (s < d) {
5939 if (*s++ == '\n') {
5940 incline(s);
5941 if (strnEQ(s,"=cut",4)) {
5942 s = strchr(s,'\n');
5943 if (s)
5944 s++;
5945 else
5946 s = d;
5947 incline(s);
5948 goto retry;
5949 }
5950 }
a5f75d66 5951 }
90771dc0 5952 goto retry;
a5f75d66 5953 }
5db06880
NC
5954#ifdef PERL_MAD
5955 if (PL_madskills) {
cd81e915 5956 if (!PL_thiswhite)
6b29d1f5 5957 PL_thiswhite = newSVpvs("");
cd81e915 5958 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5959 PL_bufend - PL_linestart);
5960 }
5961#endif
90771dc0 5962 s = PL_bufend;
737c24fc 5963 PL_parser->in_pod = 1;
90771dc0 5964 goto retry;
a5f75d66 5965 }
a0d0e21e 5966 }
3280af22 5967 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5968 const char *t = s;
51882d45 5969#ifdef PERL_STRICT_CR
c35e046a 5970 while (SPACE_OR_TAB(*t))
51882d45 5971#else
c35e046a 5972 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5973#endif
c35e046a 5974 t++;
a0d0e21e
LW
5975 if (*t == '\n' || *t == '#') {
5976 s--;
3280af22 5977 PL_expect = XBLOCK;
a0d0e21e
LW
5978 goto leftbracket;
5979 }
79072805 5980 }
78cdf107
Z
5981 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5982 s--;
5983 TOKEN(0);
5984 }
6154021b 5985 pl_yylval.ival = 0;
a0d0e21e 5986 OPERATOR(ASSIGNOP);
378cc40b
LW
5987 case '!':
5988 s++;
90771dc0
NC
5989 {
5990 const char tmp = *s++;
5991 if (tmp == '=') {
5992 /* was this !=~ where !~ was meant?
5993 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5994
5995 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5996 const char *t = s+1;
5997
5998 while (t < PL_bufend && isSPACE(*t))
5999 ++t;
6000
6001 if (*t == '/' || *t == '?' ||
6002 ((*t == 'm' || *t == 's' || *t == 'y')
6003 && !isALNUM(t[1])) ||
6004 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6005 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6006 "!=~ should be !~");
6007 }
78cdf107
Z
6008 if (!PL_lex_allbrackets &&
6009 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6010 s -= 2;
6011 TOKEN(0);
6012 }
90771dc0
NC
6013 Eop(OP_NE);
6014 }
6015 if (tmp == '~')
6016 PMop(OP_NOT);
6017 }
378cc40b
LW
6018 s--;
6019 OPERATOR('!');
6020 case '<':
3280af22 6021 if (PL_expect != XOPERATOR) {
93a17b20 6022 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6023 check_uni();
79072805
LW
6024 if (s[1] == '<')
6025 s = scan_heredoc(s);
6026 else
6027 s = scan_inputsymbol(s);
6028 TERM(sublex_start());
378cc40b
LW
6029 }
6030 s++;
90771dc0
NC
6031 {
6032 char tmp = *s++;
78cdf107
Z
6033 if (tmp == '<') {
6034 if (*s == '=' && !PL_lex_allbrackets &&
6035 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6036 s -= 2;
6037 TOKEN(0);
6038 }
90771dc0 6039 SHop(OP_LEFT_SHIFT);
78cdf107 6040 }
90771dc0
NC
6041 if (tmp == '=') {
6042 tmp = *s++;
78cdf107
Z
6043 if (tmp == '>') {
6044 if (!PL_lex_allbrackets &&
6045 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6046 s -= 3;
6047 TOKEN(0);
6048 }
90771dc0 6049 Eop(OP_NCMP);
78cdf107 6050 }
90771dc0 6051 s--;
78cdf107
Z
6052 if (!PL_lex_allbrackets &&
6053 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6054 s -= 2;
6055 TOKEN(0);
6056 }
90771dc0
NC
6057 Rop(OP_LE);
6058 }
395c3793 6059 }
378cc40b 6060 s--;
78cdf107
Z
6061 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6062 s--;
6063 TOKEN(0);
6064 }
79072805 6065 Rop(OP_LT);
378cc40b
LW
6066 case '>':
6067 s++;
90771dc0
NC
6068 {
6069 const char tmp = *s++;
78cdf107
Z
6070 if (tmp == '>') {
6071 if (*s == '=' && !PL_lex_allbrackets &&
6072 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6073 s -= 2;
6074 TOKEN(0);
6075 }
90771dc0 6076 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6077 }
6078 else if (tmp == '=') {
6079 if (!PL_lex_allbrackets &&
6080 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6081 s -= 2;
6082 TOKEN(0);
6083 }
90771dc0 6084 Rop(OP_GE);
78cdf107 6085 }
90771dc0 6086 }
378cc40b 6087 s--;
78cdf107
Z
6088 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6089 s--;
6090 TOKEN(0);
6091 }
79072805 6092 Rop(OP_GT);
378cc40b
LW
6093
6094 case '$':
bbce6d69 6095 CLINE;
6096
3280af22
NIS
6097 if (PL_expect == XOPERATOR) {
6098 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6099 return deprecate_commaless_var_list();
a0d0e21e 6100 }
8990e307 6101 }
a0d0e21e 6102
c0b977fd 6103 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6104 PL_tokenbuf[0] = '@';
376b8730
SM
6105 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6106 sizeof PL_tokenbuf - 1, FALSE);
6107 if (PL_expect == XOPERATOR)
6108 no_op("Array length", s);
3280af22 6109 if (!PL_tokenbuf[1])
a0d0e21e 6110 PREREF(DOLSHARP);
3280af22
NIS
6111 PL_expect = XOPERATOR;
6112 PL_pending_ident = '#';
463ee0b2 6113 TOKEN(DOLSHARP);
79072805 6114 }
bbce6d69 6115
3280af22 6116 PL_tokenbuf[0] = '$';
376b8730
SM
6117 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6118 sizeof PL_tokenbuf - 1, FALSE);
6119 if (PL_expect == XOPERATOR)
6120 no_op("Scalar", s);
3280af22
NIS
6121 if (!PL_tokenbuf[1]) {
6122 if (s == PL_bufend)
bbce6d69 6123 yyerror("Final $ should be \\$ or $name");
6124 PREREF('$');
8990e307 6125 }
a0d0e21e 6126
ff68c719 6127 d = s;
90771dc0
NC
6128 {
6129 const char tmp = *s;
ae28bb2a 6130 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6131 s = SKIPSPACE1(s);
ff68c719 6132
90771dc0
NC
6133 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6134 && intuit_more(s)) {
6135 if (*s == '[') {
6136 PL_tokenbuf[0] = '@';
6137 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6138 char *t = s+1;
6139
6140 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6141 t++;
90771dc0 6142 if (*t++ == ',') {
29595ff2 6143 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6144 while (t < PL_bufend && *t != ']')
6145 t++;
9014280d 6146 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6147 "Multidimensional syntax %.*s not supported",
36c7798d 6148 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6149 }
748a9306 6150 }
93a17b20 6151 }
90771dc0
NC
6152 else if (*s == '{') {
6153 char *t;
6154 PL_tokenbuf[0] = '%';
6155 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6156 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6157 {
6158 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6159 do {
6160 t++;
6161 } while (isSPACE(*t));
90771dc0 6162 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6163 STRLEN len;
90771dc0 6164 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6165 &len);
c35e046a
AL
6166 while (isSPACE(*t))
6167 t++;
780a5241 6168 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6169 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6170 "You need to quote \"%s\"",
6171 tmpbuf);
6172 }
6173 }
6174 }
93a17b20 6175 }
bbce6d69 6176
90771dc0
NC
6177 PL_expect = XOPERATOR;
6178 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6179 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6180 if (!islop || PL_last_lop_op == OP_GREPSTART)
6181 PL_expect = XOPERATOR;
6182 else if (strchr("$@\"'`q", *s))
6183 PL_expect = XTERM; /* e.g. print $fh "foo" */
6184 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6185 PL_expect = XTERM; /* e.g. print $fh &sub */
6186 else if (isIDFIRST_lazy_if(s,UTF)) {
6187 char tmpbuf[sizeof PL_tokenbuf];
6188 int t2;
6189 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6190 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6191 /* binary operators exclude handle interpretations */
6192 switch (t2) {
6193 case -KEY_x:
6194 case -KEY_eq:
6195 case -KEY_ne:
6196 case -KEY_gt:
6197 case -KEY_lt:
6198 case -KEY_ge:
6199 case -KEY_le:
6200 case -KEY_cmp:
6201 break;
6202 default:
6203 PL_expect = XTERM; /* e.g. print $fh length() */
6204 break;
6205 }
6206 }
6207 else {
6208 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6209 }
6210 }
90771dc0
NC
6211 else if (isDIGIT(*s))
6212 PL_expect = XTERM; /* e.g. print $fh 3 */
6213 else if (*s == '.' && isDIGIT(s[1]))
6214 PL_expect = XTERM; /* e.g. print $fh .3 */
6215 else if ((*s == '?' || *s == '-' || *s == '+')
6216 && !isSPACE(s[1]) && s[1] != '=')
6217 PL_expect = XTERM; /* e.g. print $fh -1 */
6218 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6219 && s[1] != '/')
6220 PL_expect = XTERM; /* e.g. print $fh /.../
6221 XXX except DORDOR operator
6222 */
6223 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6224 && s[2] != '=')
6225 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6226 }
bbce6d69 6227 }
3280af22 6228 PL_pending_ident = '$';
79072805 6229 TOKEN('$');
378cc40b
LW
6230
6231 case '@':
3280af22 6232 if (PL_expect == XOPERATOR)
bbce6d69 6233 no_op("Array", s);
3280af22
NIS
6234 PL_tokenbuf[0] = '@';
6235 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6236 if (!PL_tokenbuf[1]) {
bbce6d69 6237 PREREF('@');
6238 }
3280af22 6239 if (PL_lex_state == LEX_NORMAL)
29595ff2 6240 s = SKIPSPACE1(s);
3280af22 6241 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6242 if (*s == '{')
3280af22 6243 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6244
6245 /* Warn about @ where they meant $. */
041457d9
DM
6246 if (*s == '[' || *s == '{') {
6247 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6248 const char *t = s + 1;
7e2040f0 6249 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6250 t++;
6251 if (*t == '}' || *t == ']') {
6252 t++;
29595ff2 6253 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6254 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6255 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6256 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6257 (int)(t-PL_bufptr), PL_bufptr,
6258 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6259 }
93a17b20
LW
6260 }
6261 }
463ee0b2 6262 }
3280af22 6263 PL_pending_ident = '@';
79072805 6264 TERM('@');
378cc40b 6265
c963b151 6266 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6267 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6268 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6269 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6270 TOKEN(0);
6f33ba73
RGS
6271 s += 2;
6272 AOPERATOR(DORDOR);
6273 }
c963b151 6274 case '?': /* may either be conditional or pattern */
be25f609 6275 if (PL_expect == XOPERATOR) {
90771dc0 6276 char tmp = *s++;
c963b151 6277 if(tmp == '?') {
78cdf107
Z
6278 if (!PL_lex_allbrackets &&
6279 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6280 s--;
6281 TOKEN(0);
6282 }
6283 PL_lex_allbrackets++;
be25f609 6284 OPERATOR('?');
c963b151
BD
6285 }
6286 else {
6287 tmp = *s++;
6288 if(tmp == '/') {
6289 /* A // operator. */
78cdf107
Z
6290 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6291 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6292 LEX_FAKEEOF_LOGIC)) {
6293 s -= 2;
6294 TOKEN(0);
6295 }
c963b151
BD
6296 AOPERATOR(DORDOR);
6297 }
6298 else {
6299 s--;
78cdf107
Z
6300 if (*s == '=' && !PL_lex_allbrackets &&
6301 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6302 s--;
6303 TOKEN(0);
6304 }
c963b151
BD
6305 Mop(OP_DIVIDE);
6306 }
6307 }
6308 }
6309 else {
6310 /* Disable warning on "study /blah/" */
6311 if (PL_oldoldbufptr == PL_last_uni
6312 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6313 || memNE(PL_last_uni, "study", 5)
6314 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6315 ))
6316 check_uni();
725a61d7
Z
6317 if (*s == '?')
6318 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6319 s = scan_pat(s,OP_MATCH);
6320 TERM(sublex_start());
6321 }
378cc40b
LW
6322
6323 case '.':
51882d45
GS
6324 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6325#ifdef PERL_STRICT_CR
6326 && s[1] == '\n'
6327#else
6328 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6329#endif
6330 && (s == PL_linestart || s[-1] == '\n') )
6331 {
3280af22
NIS
6332 PL_lex_formbrack = 0;
6333 PL_expect = XSTATE;
79072805
LW
6334 goto rightbracket;
6335 }
be25f609 6336 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6337 s += 3;
6338 OPERATOR(YADAYADA);
6339 }
3280af22 6340 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6341 char tmp = *s++;
a687059c 6342 if (*s == tmp) {
78cdf107
Z
6343 if (!PL_lex_allbrackets &&
6344 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6345 s--;
6346 TOKEN(0);
6347 }
a687059c 6348 s++;
2f3197b3
LW
6349 if (*s == tmp) {
6350 s++;
6154021b 6351 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6352 }
6353 else
6154021b 6354 pl_yylval.ival = 0;
378cc40b 6355 OPERATOR(DOTDOT);
a687059c 6356 }
78cdf107
Z
6357 if (*s == '=' && !PL_lex_allbrackets &&
6358 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6359 s--;
6360 TOKEN(0);
6361 }
79072805 6362 Aop(OP_CONCAT);
378cc40b
LW
6363 }
6364 /* FALL THROUGH */
6365 case '0': case '1': case '2': case '3': case '4':
6366 case '5': case '6': case '7': case '8': case '9':
6154021b 6367 s = scan_num(s, &pl_yylval);
931e0695 6368 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6369 if (PL_expect == XOPERATOR)
8990e307 6370 no_op("Number",s);
79072805
LW
6371 TERM(THING);
6372
6373 case '\'':
5db06880 6374 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6375 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6376 if (PL_expect == XOPERATOR) {
6377 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6378 return deprecate_commaless_var_list();
a0d0e21e 6379 }
463ee0b2 6380 else
8990e307 6381 no_op("String",s);
463ee0b2 6382 }
79072805 6383 if (!s)
d4c19fe8 6384 missingterm(NULL);
6154021b 6385 pl_yylval.ival = OP_CONST;
79072805
LW
6386 TERM(sublex_start());
6387
6388 case '"':
5db06880 6389 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6390 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6391 if (PL_expect == XOPERATOR) {
6392 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6393 return deprecate_commaless_var_list();
a0d0e21e 6394 }
463ee0b2 6395 else
8990e307 6396 no_op("String",s);
463ee0b2 6397 }
79072805 6398 if (!s)
d4c19fe8 6399 missingterm(NULL);
6154021b 6400 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6401 /* FIXME. I think that this can be const if char *d is replaced by
6402 more localised variables. */
3280af22 6403 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6404 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6405 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6406 break;
6407 }
6408 }
79072805
LW
6409 TERM(sublex_start());
6410
6411 case '`':
5db06880 6412 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6413 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6414 if (PL_expect == XOPERATOR)
8990e307 6415 no_op("Backticks",s);
79072805 6416 if (!s)
d4c19fe8 6417 missingterm(NULL);
9b201d7d 6418 readpipe_override();
79072805
LW
6419 TERM(sublex_start());
6420
6421 case '\\':
6422 s++;
a2a5de95
NC
6423 if (PL_lex_inwhat && isDIGIT(*s))
6424 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6425 *s, *s);
3280af22 6426 if (PL_expect == XOPERATOR)
8990e307 6427 no_op("Backslash",s);
79072805
LW
6428 OPERATOR(REFGEN);
6429
a7cb1f99 6430 case 'v':
e526c9e6 6431 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6432 char *start = s + 2;
dd629d5b 6433 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6434 start++;
6435 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6436 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6437 TERM(THING);
6438 }
e526c9e6 6439 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6440 else if (!isALPHA(*start) && (PL_expect == XTERM
6441 || PL_expect == XREF || PL_expect == XSTATE
6442 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6443 GV *const gv = gv_fetchpvn_flags(s, start - s,
6444 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6445 if (!gv) {
6154021b 6446 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6447 TERM(THING);
6448 }
6449 }
a7cb1f99
GS
6450 }
6451 goto keylookup;
79072805 6452 case 'x':
3280af22 6453 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6454 s++;
6455 Mop(OP_REPEAT);
2f3197b3 6456 }
79072805
LW
6457 goto keylookup;
6458
378cc40b 6459 case '_':
79072805
LW
6460 case 'a': case 'A':
6461 case 'b': case 'B':
6462 case 'c': case 'C':
6463 case 'd': case 'D':
6464 case 'e': case 'E':
6465 case 'f': case 'F':
6466 case 'g': case 'G':
6467 case 'h': case 'H':
6468 case 'i': case 'I':
6469 case 'j': case 'J':
6470 case 'k': case 'K':
6471 case 'l': case 'L':
6472 case 'm': case 'M':
6473 case 'n': case 'N':
6474 case 'o': case 'O':
6475 case 'p': case 'P':
6476 case 'q': case 'Q':
6477 case 'r': case 'R':
6478 case 's': case 'S':
6479 case 't': case 'T':
6480 case 'u': case 'U':
a7cb1f99 6481 case 'V':
79072805
LW
6482 case 'w': case 'W':
6483 case 'X':
6484 case 'y': case 'Y':
6485 case 'z': case 'Z':
6486
49dc05e3 6487 keylookup: {
88e1f1a2 6488 bool anydelim;
90771dc0 6489 I32 tmp;
10edeb5d
JH
6490
6491 orig_keyword = 0;
6492 gv = NULL;
6493 gvp = NULL;
49dc05e3 6494
3280af22
NIS
6495 PL_bufptr = s;
6496 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6497
6498 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6499 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6500
6501 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6502 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6503 goto just_a_word;
6504
3643fb5f 6505 d = s;
3280af22 6506 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6507 d++; /* no comments skipped here, or s### is misparsed */
6508
748a9306 6509 /* Is this a word before a => operator? */
1c3923b3 6510 if (*d == '=' && d[1] == '>') {
748a9306 6511 CLINE;
6154021b 6512 pl_yylval.opval
d0a148a6
NC
6513 = (OP*)newSVOP(OP_CONST, 0,
6514 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6515 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6516 TERM(WORD);
6517 }
6518
88e1f1a2
JV
6519 /* Check for plugged-in keyword */
6520 {
6521 OP *o;
6522 int result;
6523 char *saved_bufptr = PL_bufptr;
6524 PL_bufptr = s;
16c91539 6525 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6526 s = PL_bufptr;
6527 if (result == KEYWORD_PLUGIN_DECLINE) {
6528 /* not a plugged-in keyword */
6529 PL_bufptr = saved_bufptr;
6530 } else if (result == KEYWORD_PLUGIN_STMT) {
6531 pl_yylval.opval = o;
6532 CLINE;
6533 PL_expect = XSTATE;
6534 return REPORT(PLUGSTMT);
6535 } else if (result == KEYWORD_PLUGIN_EXPR) {
6536 pl_yylval.opval = o;
6537 CLINE;
6538 PL_expect = XOPERATOR;
6539 return REPORT(PLUGEXPR);
6540 } else {
6541 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6542 PL_tokenbuf);
6543 }
6544 }
6545
6546 /* Check for built-in keyword */
6547 tmp = keyword(PL_tokenbuf, len, 0);
6548
6549 /* Is this a label? */
6550 if (!anydelim && PL_expect == XSTATE
6551 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6552 s = d + 1;
6553 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6554 CLINE;
6555 TOKEN(LABEL);
6556 }
6557
a0d0e21e 6558 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6559 GV *ogv = NULL; /* override (winner) */
6560 GV *hgv = NULL; /* hidden (loser) */
3280af22 6561 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6562 CV *cv;
af9f5953
BF
6563 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6564 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6565 (cv = GvCVu(gv)))
6566 {
6567 if (GvIMPORTED_CV(gv))
6568 ogv = gv;
6569 else if (! CvMETHOD(cv))
6570 hgv = gv;
6571 }
6572 if (!ogv &&
af9f5953 6573 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6574 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6575 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6576 GvCVu(gv) && GvIMPORTED_CV(gv))
6577 {
6578 ogv = gv;
6579 }
6580 }
6581 if (ogv) {
30fe34ed 6582 orig_keyword = tmp;
56f7f34b 6583 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6584 }
6585 else if (gv && !gvp
6586 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6587 && GvCVu(gv))
6e7b2336
GS
6588 {
6589 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6590 }
56f7f34b
CS
6591 else { /* no override */
6592 tmp = -tmp;
a2a5de95
NC
6593 if (tmp == KEY_dump) {
6594 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6595 "dump() better written as CORE::dump()");
ac206dc8 6596 }
a0714e2c 6597 gv = NULL;
56f7f34b 6598 gvp = 0;
a2a5de95
NC
6599 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6600 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6601 "Ambiguous call resolved as CORE::%s(), "
6602 "qualify as such or use &",
6603 GvENAME(hgv));
49dc05e3 6604 }
a0d0e21e
LW
6605 }
6606
6607 reserved_word:
6608 switch (tmp) {
79072805
LW
6609
6610 default: /* not a keyword */
0bfa2a8a
NC
6611 /* Trade off - by using this evil construction we can pull the
6612 variable gv into the block labelled keylookup. If not, then
6613 we have to give it function scope so that the goto from the
6614 earlier ':' case doesn't bypass the initialisation. */
6615 if (0) {
6616 just_a_word_zero_gv:
6617 gv = NULL;
6618 gvp = NULL;
8bee0991 6619 orig_keyword = 0;
0bfa2a8a 6620 }
93a17b20 6621 just_a_word: {
96e4d5b1 6622 SV *sv;
ce29ac45 6623 int pkgname = 0;
f54cb97a 6624 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6625 OP *rv2cv_op;
5069cc75 6626 CV *cv;
5db06880 6627#ifdef PERL_MAD
cd81e915 6628 SV *nextPL_nextwhite = 0;
5db06880
NC
6629#endif
6630
8990e307
LW
6631
6632 /* Get the rest if it looks like a package qualifier */
6633
155aba94 6634 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6635 STRLEN morelen;
3280af22 6636 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6637 TRUE, &morelen);
6638 if (!morelen)
cea2e8a9 6639 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6640 *s == '\'' ? "'" : "::");
c3e0f903 6641 len += morelen;
ce29ac45 6642 pkgname = 1;
a0d0e21e 6643 }
8990e307 6644
3280af22
NIS
6645 if (PL_expect == XOPERATOR) {
6646 if (PL_bufptr == PL_linestart) {
57843af0 6647 CopLINE_dec(PL_curcop);
f1f66076 6648 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6649 CopLINE_inc(PL_curcop);
463ee0b2
LW
6650 }
6651 else
54310121 6652 no_op("Bareword",s);
463ee0b2 6653 }
8990e307 6654
c3e0f903 6655 /* Look for a subroutine with this name in current package,
486ec47a 6656 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6657 (and a package name). */
6658
5db06880 6659 if (len > 2 && !PL_madskills &&
3280af22 6660 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6661 {
f776e3cd 6662 if (ckWARN(WARN_BAREWORD)
af9f5953 6663 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 6664 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6665 "Bareword \"%s\" refers to nonexistent package",
3280af22 6666 PL_tokenbuf);
c3e0f903 6667 len -= 2;
3280af22 6668 PL_tokenbuf[len] = '\0';
a0714e2c 6669 gv = NULL;
c3e0f903
GS
6670 gvp = 0;
6671 }
6672 else {
62d55b22
NC
6673 if (!gv) {
6674 /* Mustn't actually add anything to a symbol table.
6675 But also don't want to "initialise" any placeholder
6676 constants that might already be there into full
6677 blown PVGVs with attached PVCV. */
90e5519e 6678 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
6679 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6680 SVt_PVCV);
62d55b22 6681 }
b3d904f3 6682 len = 0;
c3e0f903
GS
6683 }
6684
6685 /* if we saw a global override before, get the right name */
8990e307 6686
37bb7629
EB
6687 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6688 len ? len : strlen(PL_tokenbuf));
49dc05e3 6689 if (gvp) {
37bb7629 6690 SV * const tmp_sv = sv;
396482e1 6691 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6692 sv_catsv(sv, tmp_sv);
6693 SvREFCNT_dec(tmp_sv);
8a7a129d 6694 }
37bb7629 6695
5db06880 6696#ifdef PERL_MAD
cd81e915
NC
6697 if (PL_madskills && !PL_thistoken) {
6698 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6699 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6700 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6701 }
6702#endif
8990e307 6703
a0d0e21e 6704 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6705 CLINE;
6154021b
RGS
6706 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6707 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6708
c3e0f903 6709 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6710 if (len)
6711 goto safe_bareword;
6712
f7461760 6713 {
d8ebba9f 6714 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6715 const_op->op_private = OPpCONST_BARE;
6716 rv2cv_op = newCVREF(0, const_op);
6717 }
d9088386 6718 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6719
8990e307
LW
6720 /* See if it's the indirect object for a list operator. */
6721
3280af22
NIS
6722 if (PL_oldoldbufptr &&
6723 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6724 (PL_oldoldbufptr == PL_last_lop
6725 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6726 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6727 (PL_expect == XREF ||
6728 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6729 {
748a9306
LW
6730 bool immediate_paren = *s == '(';
6731
a0d0e21e 6732 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6733 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6734#ifdef PERL_MAD
cd81e915 6735 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6736#endif
a0d0e21e
LW
6737
6738 /* Two barewords in a row may indicate method call. */
6739
62d55b22 6740 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6741 (tmp = intuit_method(s, gv, cv))) {
6742 op_free(rv2cv_op);
78cdf107
Z
6743 if (tmp == METHOD && !PL_lex_allbrackets &&
6744 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6745 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6746 return REPORT(tmp);
f7461760 6747 }
a0d0e21e
LW
6748
6749 /* If not a declared subroutine, it's an indirect object. */
6750 /* (But it's an indir obj regardless for sort.) */
7294df96 6751 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6752
7294df96
RGS
6753 if (
6754 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6755 (!cv &&
a9ef352a 6756 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6757 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6758 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6759 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6760 )
a9ef352a 6761 {
3280af22 6762 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6763 goto bareword;
93a17b20
LW
6764 }
6765 }
8990e307 6766
3280af22 6767 PL_expect = XOPERATOR;
5db06880
NC
6768#ifdef PERL_MAD
6769 if (isSPACE(*s))
cd81e915
NC
6770 s = SKIPSPACE2(s,nextPL_nextwhite);
6771 PL_nextwhite = nextPL_nextwhite;
5db06880 6772#else
8990e307 6773 s = skipspace(s);
5db06880 6774#endif
1c3923b3
GS
6775
6776 /* Is this a word before a => operator? */
ce29ac45 6777 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6778 op_free(rv2cv_op);
1c3923b3 6779 CLINE;
6154021b 6780 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6781 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6782 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6783 TERM(WORD);
6784 }
6785
6786 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6787 if (*s == '(') {
79072805 6788 CLINE;
5069cc75 6789 if (cv) {
c35e046a
AL
6790 d = s + 1;
6791 while (SPACE_OR_TAB(*d))
6792 d++;
f7461760 6793 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6794 s = d + 1;
c631f32b 6795 goto its_constant;
96e4d5b1 6796 }
6797 }
5db06880
NC
6798#ifdef PERL_MAD
6799 if (PL_madskills) {
cd81e915
NC
6800 PL_nextwhite = PL_thiswhite;
6801 PL_thiswhite = 0;
5db06880 6802 }
cd81e915 6803 start_force(PL_curforce);
5db06880 6804#endif
6154021b 6805 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6806 PL_expect = XOPERATOR;
5db06880
NC
6807#ifdef PERL_MAD
6808 if (PL_madskills) {
cd81e915
NC
6809 PL_nextwhite = nextPL_nextwhite;
6810 curmad('X', PL_thistoken);
6b29d1f5 6811 PL_thistoken = newSVpvs("");
5db06880
NC
6812 }
6813#endif
f7461760 6814 op_free(rv2cv_op);
93a17b20 6815 force_next(WORD);
6154021b 6816 pl_yylval.ival = 0;
463ee0b2 6817 TOKEN('&');
79072805 6818 }
93a17b20 6819
a0d0e21e 6820 /* If followed by var or block, call it a method (unless sub) */
8990e307 6821
f7461760
Z
6822 if ((*s == '$' || *s == '{') && !cv) {
6823 op_free(rv2cv_op);
3280af22
NIS
6824 PL_last_lop = PL_oldbufptr;
6825 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6826 if (!PL_lex_allbrackets &&
6827 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6828 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6829 PREBLOCK(METHOD);
463ee0b2
LW
6830 }
6831
8990e307
LW
6832 /* If followed by a bareword, see if it looks like indir obj. */
6833
30fe34ed
RGS
6834 if (!orig_keyword
6835 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6836 && (tmp = intuit_method(s, gv, cv))) {
6837 op_free(rv2cv_op);
78cdf107
Z
6838 if (tmp == METHOD && !PL_lex_allbrackets &&
6839 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6840 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6841 return REPORT(tmp);
f7461760 6842 }
93a17b20 6843
8990e307
LW
6844 /* Not a method, so call it a subroutine (if defined) */
6845
5069cc75 6846 if (cv) {
9b387841
NC
6847 if (lastchar == '-')
6848 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6849 "Ambiguous use of -%s resolved as -&%s()",
6850 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6851 /* Check for a constant sub */
f7461760 6852 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6853 its_constant:
f7461760 6854 op_free(rv2cv_op);
6154021b
RGS
6855 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6856 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6857 pl_yylval.opval->op_private = 0;
6b7c6d95 6858 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6859 TOKEN(WORD);
89bfa8cd 6860 }
6861
6154021b 6862 op_free(pl_yylval.opval);
f7461760 6863 pl_yylval.opval = rv2cv_op;
6154021b 6864 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6865 PL_last_lop = PL_oldbufptr;
bf848113 6866 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6867 /* Is there a prototype? */
5db06880
NC
6868 if (
6869#ifdef PERL_MAD
6870 cv &&
6871#endif
d9f2850e
RGS
6872 SvPOK(cv))
6873 {
8fa6a409
FC
6874 STRLEN protolen = CvPROTOLEN(cv);
6875 const char *proto = CvPROTO(cv);
b5fb7ce3 6876 bool optional;
5f66b61c 6877 if (!protolen)
4633a7c4 6878 TERM(FUNC0SUB);
b5fb7ce3
FC
6879 if ((optional = *proto == ';'))
6880 do
0f5d0394 6881 proto++;
b5fb7ce3 6882 while (*proto == ';');
649d02de
FC
6883 if (
6884 (
6885 (
6886 *proto == '$' || *proto == '_'
c035a075 6887 || *proto == '*' || *proto == '+'
649d02de
FC
6888 )
6889 && proto[1] == '\0'
6890 )
6891 || (
6892 *proto == '\\' && proto[1] && proto[2] == '\0'
6893 )
6894 )
b5fb7ce3 6895 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
6896 if (*proto == '\\' && proto[1] == '[') {
6897 const char *p = proto + 2;
6898 while(*p && *p != ']')
6899 ++p;
b5fb7ce3
FC
6900 if(*p == ']' && !p[1])
6901 UNIPROTO(UNIOPSUB,optional);
649d02de 6902 }
7a52d87a 6903 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6904 if (PL_curstash)
6905 sv_setpvs(PL_subname, "__ANON__");
6906 else
6907 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6908 if (!PL_lex_allbrackets &&
6909 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6910 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6911 PREBLOCK(LSTOPSUB);
6912 }
a9ef352a 6913 }
5db06880
NC
6914#ifdef PERL_MAD
6915 {
6916 if (PL_madskills) {
cd81e915
NC
6917 PL_nextwhite = PL_thiswhite;
6918 PL_thiswhite = 0;
5db06880 6919 }
cd81e915 6920 start_force(PL_curforce);
6154021b 6921 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6922 PL_expect = XTERM;
6923 if (PL_madskills) {
cd81e915
NC
6924 PL_nextwhite = nextPL_nextwhite;
6925 curmad('X', PL_thistoken);
6b29d1f5 6926 PL_thistoken = newSVpvs("");
5db06880
NC
6927 }
6928 force_next(WORD);
78cdf107
Z
6929 if (!PL_lex_allbrackets &&
6930 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6931 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6932 TOKEN(NOAMP);
6933 }
6934 }
6935
6936 /* Guess harder when madskills require "best effort". */
6937 if (PL_madskills && (!gv || !GvCVu(gv))) {
6938 int probable_sub = 0;
6939 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6940 probable_sub = 1;
6941 else if (isALPHA(*s)) {
6942 char tmpbuf[1024];
6943 STRLEN tmplen;
6944 d = s;
6945 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6946 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6947 probable_sub = 1;
6948 else {
6949 while (d < PL_bufend && isSPACE(*d))
6950 d++;
6951 if (*d == '=' && d[1] == '>')
6952 probable_sub = 1;
6953 }
6954 }
6955 if (probable_sub) {
af9f5953
BF
6956 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
6957 SVt_PVCV);
6154021b 6958 op_free(pl_yylval.opval);
f7461760 6959 pl_yylval.opval = rv2cv_op;
6154021b 6960 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6961 PL_last_lop = PL_oldbufptr;
6962 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6963 PL_nextwhite = PL_thiswhite;
6964 PL_thiswhite = 0;
6965 start_force(PL_curforce);
6154021b 6966 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6967 PL_expect = XTERM;
cd81e915
NC
6968 PL_nextwhite = nextPL_nextwhite;
6969 curmad('X', PL_thistoken);
6b29d1f5 6970 PL_thistoken = newSVpvs("");
5db06880 6971 force_next(WORD);
78cdf107
Z
6972 if (!PL_lex_allbrackets &&
6973 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6974 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6975 TOKEN(NOAMP);
6976 }
6977#else
6154021b 6978 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6979 PL_expect = XTERM;
8990e307 6980 force_next(WORD);
78cdf107
Z
6981 if (!PL_lex_allbrackets &&
6982 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6983 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6984 TOKEN(NOAMP);
5db06880 6985#endif
8990e307 6986 }
748a9306 6987
8990e307
LW
6988 /* Call it a bare word */
6989
5603f27d 6990 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6991 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6992 else {
9a073a1d
RGS
6993 bareword:
6994 /* after "print" and similar functions (corresponding to
6995 * "F? L" in opcode.pl), whatever wasn't already parsed as
6996 * a filehandle should be subject to "strict subs".
6997 * Likewise for the optional indirect-object argument to system
6998 * or exec, which can't be a bareword */
6999 if ((PL_last_lop_op == OP_PRINT
7000 || PL_last_lop_op == OP_PRTF
7001 || PL_last_lop_op == OP_SAY
7002 || PL_last_lop_op == OP_SYSTEM
7003 || PL_last_lop_op == OP_EXEC)
7004 && (PL_hints & HINT_STRICT_SUBS))
7005 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7006 if (lastchar != '-') {
7007 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7008 d = PL_tokenbuf;
7009 while (isLOWER(*d))
7010 d++;
af9f5953 7011 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7012 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7013 PL_tokenbuf);
7014 }
748a9306
LW
7015 }
7016 }
f7461760 7017 op_free(rv2cv_op);
c3e0f903
GS
7018
7019 safe_bareword:
9b387841
NC
7020 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7021 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7022 "Operator or semicolon missing before %c%s",
7023 lastchar, PL_tokenbuf);
7024 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7025 "Ambiguous use of %c resolved as operator %c",
7026 lastchar, lastchar);
748a9306 7027 }
93a17b20 7028 TOKEN(WORD);
79072805 7029 }
79072805 7030
68dc0745 7031 case KEY___FILE__:
7eb971ee 7032 FUN0OP(
14f0f125 7033 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7034 );
46fc3d4c 7035
79072805 7036 case KEY___LINE__:
7eb971ee
FC
7037 FUN0OP(
7038 (OP*)newSVOP(OP_CONST, 0,
7039 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7040 );
68dc0745 7041
7042 case KEY___PACKAGE__:
7eb971ee
FC
7043 FUN0OP(
7044 (OP*)newSVOP(OP_CONST, 0,
3280af22 7045 (PL_curstash
5aaec2b4 7046 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7047 : &PL_sv_undef))
7048 );
79072805 7049
e50aee73 7050 case KEY___DATA__:
79072805
LW
7051 case KEY___END__: {
7052 GV *gv;
3280af22 7053 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7054 const char *pname = "main";
affc13fc
FC
7055 STRLEN plen = 4;
7056 U32 putf8 = 0;
3280af22 7057 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7058 {
7059 HV * const stash =
7060 PL_curstash ? PL_curstash : PL_defstash;
7061 pname = HvNAME_get(stash);
7062 plen = HvNAMELEN (stash);
7063 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7064 }
7065 gv = gv_fetchpvn_flags(
7066 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7067 plen+6, GV_ADD|putf8, SVt_PVIO
7068 );
a5f75d66 7069 GvMULTI_on(gv);
79072805 7070 if (!GvIO(gv))
a0d0e21e 7071 GvIOp(gv) = newIO();
3280af22 7072 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7073#if defined(HAS_FCNTL) && defined(F_SETFD)
7074 {
f54cb97a 7075 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7076 fcntl(fd,F_SETFD,fd >= 3);
7077 }
79072805 7078#endif
fd049845 7079 /* Mark this internal pseudo-handle as clean */
7080 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7081 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7082 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7083 else
50952442 7084 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7085#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7086 /* if the script was opened in binmode, we need to revert
53129d29 7087 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7088 * XXX this is a questionable hack at best. */
53129d29
GS
7089 if (PL_bufend-PL_bufptr > 2
7090 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7091 {
7092 Off_t loc = 0;
50952442 7093 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7094 loc = PerlIO_tell(PL_rsfp);
7095 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7096 }
2986a63f
JH
7097#ifdef NETWARE
7098 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7099#else
c39cd008 7100 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7101#endif /* NETWARE */
c39cd008
GS
7102 if (loc > 0)
7103 PerlIO_seek(PL_rsfp, loc, 0);
7104 }
7105 }
7106#endif
7948272d 7107#ifdef PERLIO_LAYERS
52d2e0f4
JH
7108 if (!IN_BYTES) {
7109 if (UTF)
7110 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7111 else if (PL_encoding) {
7112 SV *name;
7113 dSP;
7114 ENTER;
7115 SAVETMPS;
7116 PUSHMARK(sp);
7117 EXTEND(SP, 1);
7118 XPUSHs(PL_encoding);
7119 PUTBACK;
7120 call_method("name", G_SCALAR);
7121 SPAGAIN;
7122 name = POPs;
7123 PUTBACK;
bfed75c6 7124 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7125 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7126 SVfARG(name)));
52d2e0f4
JH
7127 FREETMPS;
7128 LEAVE;
7129 }
7130 }
7948272d 7131#endif
5db06880
NC
7132#ifdef PERL_MAD
7133 if (PL_madskills) {
cd81e915
NC
7134 if (PL_realtokenstart >= 0) {
7135 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7136 if (!PL_endwhite)
6b29d1f5 7137 PL_endwhite = newSVpvs("");
cd81e915
NC
7138 sv_catsv(PL_endwhite, PL_thiswhite);
7139 PL_thiswhite = 0;
7140 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7141 PL_realtokenstart = -1;
5db06880 7142 }
5cc814fd
NC
7143 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7144 != NULL) ;
5db06880
NC
7145 }
7146#endif
4608196e 7147 PL_rsfp = NULL;
79072805
LW
7148 }
7149 goto fake_eof;
e929a76b 7150 }
de3bb511 7151
84ed0108 7152 case KEY___SUB__:
1a35f9ff 7153 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7154
8990e307 7155 case KEY_AUTOLOAD:
ed6116ce 7156 case KEY_DESTROY:
79072805 7157 case KEY_BEGIN:
3c10abe3 7158 case KEY_UNITCHECK:
7d30b5c4 7159 case KEY_CHECK:
7d07dbc2 7160 case KEY_INIT:
7d30b5c4 7161 case KEY_END:
3280af22
NIS
7162 if (PL_expect == XSTATE) {
7163 s = PL_bufptr;
93a17b20 7164 goto really_sub;
79072805
LW
7165 }
7166 goto just_a_word;
7167
a0d0e21e
LW
7168 case KEY_CORE:
7169 if (*s == ':' && s[1] == ':') {
7170 s += 2;
748a9306 7171 d = s;
3280af22 7172 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
9dcb8368 7173 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
6798c92b 7174 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7175 if (tmp < 0)
7176 tmp = -tmp;
d67594ff
FC
7177 else if (tmp == KEY_require || tmp == KEY_do
7178 || tmp == KEY_glob)
a72a1c8b 7179 /* that's a way to remember we saw "CORE::" */
850e8516 7180 orig_keyword = tmp;
a0d0e21e
LW
7181 goto reserved_word;
7182 }
7183 goto just_a_word;
7184
463ee0b2
LW
7185 case KEY_abs:
7186 UNI(OP_ABS);
7187
79072805
LW
7188 case KEY_alarm:
7189 UNI(OP_ALARM);
7190
7191 case KEY_accept:
a0d0e21e 7192 LOP(OP_ACCEPT,XTERM);
79072805 7193
463ee0b2 7194 case KEY_and:
78cdf107
Z
7195 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7196 return REPORT(0);
463ee0b2
LW
7197 OPERATOR(ANDOP);
7198
79072805 7199 case KEY_atan2:
a0d0e21e 7200 LOP(OP_ATAN2,XTERM);
85e6fe83 7201
79072805 7202 case KEY_bind:
a0d0e21e 7203 LOP(OP_BIND,XTERM);
79072805
LW
7204
7205 case KEY_binmode:
1c1fc3ea 7206 LOP(OP_BINMODE,XTERM);
79072805
LW
7207
7208 case KEY_bless:
a0d0e21e 7209 LOP(OP_BLESS,XTERM);
79072805 7210
0d863452
RH
7211 case KEY_break:
7212 FUN0(OP_BREAK);
7213
79072805
LW
7214 case KEY_chop:
7215 UNI(OP_CHOP);
7216
7217 case KEY_continue:
0d863452
RH
7218 /* We have to disambiguate the two senses of
7219 "continue". If the next token is a '{' then
7220 treat it as the start of a continue block;
7221 otherwise treat it as a control operator.
7222 */
7223 s = skipspace(s);
7224 if (*s == '{')
79072805 7225 PREBLOCK(CONTINUE);
0d863452
RH
7226 else
7227 FUN0(OP_CONTINUE);
79072805
LW
7228
7229 case KEY_chdir:
fafc274c
NC
7230 /* may use HOME */
7231 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7232 UNI(OP_CHDIR);
7233
7234 case KEY_close:
7235 UNI(OP_CLOSE);
7236
7237 case KEY_closedir:
7238 UNI(OP_CLOSEDIR);
7239
7240 case KEY_cmp:
78cdf107
Z
7241 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7242 return REPORT(0);
79072805
LW
7243 Eop(OP_SCMP);
7244
7245 case KEY_caller:
7246 UNI(OP_CALLER);
7247
7248 case KEY_crypt:
7249#ifdef FCRYPT
f4c556ac
GS
7250 if (!PL_cryptseen) {
7251 PL_cryptseen = TRUE;
de3bb511 7252 init_des();
f4c556ac 7253 }
a687059c 7254#endif
a0d0e21e 7255 LOP(OP_CRYPT,XTERM);
79072805
LW
7256
7257 case KEY_chmod:
a0d0e21e 7258 LOP(OP_CHMOD,XTERM);
79072805
LW
7259
7260 case KEY_chown:
a0d0e21e 7261 LOP(OP_CHOWN,XTERM);
79072805
LW
7262
7263 case KEY_connect:
a0d0e21e 7264 LOP(OP_CONNECT,XTERM);
79072805 7265
463ee0b2
LW
7266 case KEY_chr:
7267 UNI(OP_CHR);
7268
79072805
LW
7269 case KEY_cos:
7270 UNI(OP_COS);
7271
7272 case KEY_chroot:
7273 UNI(OP_CHROOT);
7274
0d863452
RH
7275 case KEY_default:
7276 PREBLOCK(DEFAULT);
7277
79072805 7278 case KEY_do:
29595ff2 7279 s = SKIPSPACE1(s);
79072805 7280 if (*s == '{')
a0d0e21e 7281 PRETERMBLOCK(DO);
79072805 7282 if (*s != '\'')
89c5585f 7283 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7284 if (orig_keyword == KEY_do) {
7285 orig_keyword = 0;
6154021b 7286 pl_yylval.ival = 1;
850e8516
RGS
7287 }
7288 else
6154021b 7289 pl_yylval.ival = 0;
378cc40b 7290 OPERATOR(DO);
79072805
LW
7291
7292 case KEY_die:
3280af22 7293 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7294 LOP(OP_DIE,XTERM);
79072805
LW
7295
7296 case KEY_defined:
7297 UNI(OP_DEFINED);
7298
7299 case KEY_delete:
a0d0e21e 7300 UNI(OP_DELETE);
79072805
LW
7301
7302 case KEY_dbmopen:
74e8ce34
NC
7303 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7304 STR_WITH_LEN("NDBM_File::"),
7305 STR_WITH_LEN("DB_File::"),
7306 STR_WITH_LEN("GDBM_File::"),
7307 STR_WITH_LEN("SDBM_File::"),
7308 STR_WITH_LEN("ODBM_File::"),
7309 NULL);
a0d0e21e 7310 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7311
7312 case KEY_dbmclose:
7313 UNI(OP_DBMCLOSE);
7314
7315 case KEY_dump:
a0d0e21e 7316 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7317 LOOPX(OP_DUMP);
7318
7319 case KEY_else:
7320 PREBLOCK(ELSE);
7321
7322 case KEY_elsif:
6154021b 7323 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7324 OPERATOR(ELSIF);
7325
7326 case KEY_eq:
78cdf107
Z
7327 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7328 return REPORT(0);
79072805
LW
7329 Eop(OP_SEQ);
7330
a0d0e21e
LW
7331 case KEY_exists:
7332 UNI(OP_EXISTS);
4e553d73 7333
79072805 7334 case KEY_exit:
5db06880
NC
7335 if (PL_madskills)
7336 UNI(OP_INT);
79072805
LW
7337 UNI(OP_EXIT);
7338
7339 case KEY_eval:
29595ff2 7340 s = SKIPSPACE1(s);
32e2a35d
RGS
7341 if (*s == '{') { /* block eval */
7342 PL_expect = XTERMBLOCK;
7343 UNIBRACK(OP_ENTERTRY);
7344 }
7345 else { /* string eval */
7346 PL_expect = XTERM;
7347 UNIBRACK(OP_ENTEREVAL);
7348 }
79072805 7349
7d789282
FC
7350 case KEY_evalbytes:
7351 PL_expect = XTERM;
7352 UNIBRACK(-OP_ENTEREVAL);
7353
79072805
LW
7354 case KEY_eof:
7355 UNI(OP_EOF);
7356
7357 case KEY_exp:
7358 UNI(OP_EXP);
7359
7360 case KEY_each:
7361 UNI(OP_EACH);
7362
7363 case KEY_exec:
a0d0e21e 7364 LOP(OP_EXEC,XREF);
79072805
LW
7365
7366 case KEY_endhostent:
7367 FUN0(OP_EHOSTENT);
7368
7369 case KEY_endnetent:
7370 FUN0(OP_ENETENT);
7371
7372 case KEY_endservent:
7373 FUN0(OP_ESERVENT);
7374
7375 case KEY_endprotoent:
7376 FUN0(OP_EPROTOENT);
7377
7378 case KEY_endpwent:
7379 FUN0(OP_EPWENT);
7380
7381 case KEY_endgrent:
7382 FUN0(OP_EGRENT);
7383
7384 case KEY_for:
7385 case KEY_foreach:
78cdf107
Z
7386 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7387 return REPORT(0);
6154021b 7388 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7389 s = SKIPSPACE1(s);
7e2040f0 7390 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7391 char *p = s;
5db06880
NC
7392#ifdef PERL_MAD
7393 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7394#endif
7395
3280af22 7396 if ((PL_bufend - p) >= 3 &&
55497cff 7397 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7398 p += 2;
77ca0c92
LW
7399 else if ((PL_bufend - p) >= 4 &&
7400 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7401 p += 3;
29595ff2 7402 p = PEEKSPACE(p);
7e2040f0 7403 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7404 p = scan_ident(p, PL_bufend,
7405 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7406 p = PEEKSPACE(p);
77ca0c92
LW
7407 }
7408 if (*p != '$')
cea2e8a9 7409 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7410#ifdef PERL_MAD
7411 s = SvPVX(PL_linestr) + soff;
7412#endif
55497cff 7413 }
79072805
LW
7414 OPERATOR(FOR);
7415
7416 case KEY_formline:
a0d0e21e 7417 LOP(OP_FORMLINE,XTERM);
79072805
LW
7418
7419 case KEY_fork:
7420 FUN0(OP_FORK);
7421
838f2281
BF
7422 case KEY_fc:
7423 UNI(OP_FC);
7424
79072805 7425 case KEY_fcntl:
a0d0e21e 7426 LOP(OP_FCNTL,XTERM);
79072805
LW
7427
7428 case KEY_fileno:
7429 UNI(OP_FILENO);
7430
7431 case KEY_flock:
a0d0e21e 7432 LOP(OP_FLOCK,XTERM);
79072805
LW
7433
7434 case KEY_gt:
78cdf107
Z
7435 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7436 return REPORT(0);
79072805
LW
7437 Rop(OP_SGT);
7438
7439 case KEY_ge:
78cdf107
Z
7440 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7441 return REPORT(0);
79072805
LW
7442 Rop(OP_SGE);
7443
7444 case KEY_grep:
2c38e13d 7445 LOP(OP_GREPSTART, XREF);
79072805
LW
7446
7447 case KEY_goto:
a0d0e21e 7448 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7449 LOOPX(OP_GOTO);
7450
7451 case KEY_gmtime:
7452 UNI(OP_GMTIME);
7453
7454 case KEY_getc:
6f33ba73 7455 UNIDOR(OP_GETC);
79072805
LW
7456
7457 case KEY_getppid:
7458 FUN0(OP_GETPPID);
7459
7460 case KEY_getpgrp:
7461 UNI(OP_GETPGRP);
7462
7463 case KEY_getpriority:
a0d0e21e 7464 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7465
7466 case KEY_getprotobyname:
7467 UNI(OP_GPBYNAME);
7468
7469 case KEY_getprotobynumber:
a0d0e21e 7470 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7471
7472 case KEY_getprotoent:
7473 FUN0(OP_GPROTOENT);
7474
7475 case KEY_getpwent:
7476 FUN0(OP_GPWENT);
7477
7478 case KEY_getpwnam:
ff68c719 7479 UNI(OP_GPWNAM);
79072805
LW
7480
7481 case KEY_getpwuid:
ff68c719 7482 UNI(OP_GPWUID);
79072805
LW
7483
7484 case KEY_getpeername:
7485 UNI(OP_GETPEERNAME);
7486
7487 case KEY_gethostbyname:
7488 UNI(OP_GHBYNAME);
7489
7490 case KEY_gethostbyaddr:
a0d0e21e 7491 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7492
7493 case KEY_gethostent:
7494 FUN0(OP_GHOSTENT);
7495
7496 case KEY_getnetbyname:
7497 UNI(OP_GNBYNAME);
7498
7499 case KEY_getnetbyaddr:
a0d0e21e 7500 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7501
7502 case KEY_getnetent:
7503 FUN0(OP_GNETENT);
7504
7505 case KEY_getservbyname:
a0d0e21e 7506 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7507
7508 case KEY_getservbyport:
a0d0e21e 7509 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7510
7511 case KEY_getservent:
7512 FUN0(OP_GSERVENT);
7513
7514 case KEY_getsockname:
7515 UNI(OP_GETSOCKNAME);
7516
7517 case KEY_getsockopt:
a0d0e21e 7518 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7519
7520 case KEY_getgrent:
7521 FUN0(OP_GGRENT);
7522
7523 case KEY_getgrnam:
ff68c719 7524 UNI(OP_GGRNAM);
79072805
LW
7525
7526 case KEY_getgrgid:
ff68c719 7527 UNI(OP_GGRGID);
79072805
LW
7528
7529 case KEY_getlogin:
7530 FUN0(OP_GETLOGIN);
7531
0d863452 7532 case KEY_given:
6154021b 7533 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7534 OPERATOR(GIVEN);
7535
93a17b20 7536 case KEY_glob:
d67594ff
FC
7537 LOP(
7538 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7539 XTERM
7540 );
93a17b20 7541
79072805
LW
7542 case KEY_hex:
7543 UNI(OP_HEX);
7544
7545 case KEY_if:
78cdf107
Z
7546 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7547 return REPORT(0);
6154021b 7548 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7549 OPERATOR(IF);
7550
7551 case KEY_index:
a0d0e21e 7552 LOP(OP_INDEX,XTERM);
79072805
LW
7553
7554 case KEY_int:
7555 UNI(OP_INT);
7556
7557 case KEY_ioctl:
a0d0e21e 7558 LOP(OP_IOCTL,XTERM);
79072805
LW
7559
7560 case KEY_join:
a0d0e21e 7561 LOP(OP_JOIN,XTERM);
79072805
LW
7562
7563 case KEY_keys:
7564 UNI(OP_KEYS);
7565
7566 case KEY_kill:
a0d0e21e 7567 LOP(OP_KILL,XTERM);
79072805
LW
7568
7569 case KEY_last:
a0d0e21e 7570 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7571 LOOPX(OP_LAST);
4e553d73 7572
79072805
LW
7573 case KEY_lc:
7574 UNI(OP_LC);
7575
7576 case KEY_lcfirst:
7577 UNI(OP_LCFIRST);
7578
7579 case KEY_local:
6154021b 7580 pl_yylval.ival = 0;
79072805
LW
7581 OPERATOR(LOCAL);
7582
7583 case KEY_length:
7584 UNI(OP_LENGTH);
7585
7586 case KEY_lt:
78cdf107
Z
7587 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7588 return REPORT(0);
79072805
LW
7589 Rop(OP_SLT);
7590
7591 case KEY_le:
78cdf107
Z
7592 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7593 return REPORT(0);
79072805
LW
7594 Rop(OP_SLE);
7595
7596 case KEY_localtime:
7597 UNI(OP_LOCALTIME);
7598
7599 case KEY_log:
7600 UNI(OP_LOG);
7601
7602 case KEY_link:
a0d0e21e 7603 LOP(OP_LINK,XTERM);
79072805
LW
7604
7605 case KEY_listen:
a0d0e21e 7606 LOP(OP_LISTEN,XTERM);
79072805 7607
c0329465
MB
7608 case KEY_lock:
7609 UNI(OP_LOCK);
7610
79072805
LW
7611 case KEY_lstat:
7612 UNI(OP_LSTAT);
7613
7614 case KEY_m:
8782bef2 7615 s = scan_pat(s,OP_MATCH);
79072805
LW
7616 TERM(sublex_start());
7617
a0d0e21e 7618 case KEY_map:
2c38e13d 7619 LOP(OP_MAPSTART, XREF);
4e4e412b 7620
79072805 7621 case KEY_mkdir:
a0d0e21e 7622 LOP(OP_MKDIR,XTERM);
79072805
LW
7623
7624 case KEY_msgctl:
a0d0e21e 7625 LOP(OP_MSGCTL,XTERM);
79072805
LW
7626
7627 case KEY_msgget:
a0d0e21e 7628 LOP(OP_MSGGET,XTERM);
79072805
LW
7629
7630 case KEY_msgrcv:
a0d0e21e 7631 LOP(OP_MSGRCV,XTERM);
79072805
LW
7632
7633 case KEY_msgsnd:
a0d0e21e 7634 LOP(OP_MSGSND,XTERM);
79072805 7635
77ca0c92 7636 case KEY_our:
93a17b20 7637 case KEY_my:
952306ac 7638 case KEY_state:
eac04b2e 7639 PL_in_my = (U16)tmp;
29595ff2 7640 s = SKIPSPACE1(s);
7e2040f0 7641 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7642#ifdef PERL_MAD
7643 char* start = s;
7644#endif
3280af22 7645 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7646 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7647 goto really_sub;
def3634b 7648 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7649 if (!PL_in_my_stash) {
c750a3ec 7650 char tmpbuf[1024];
3280af22 7651 PL_bufptr = s;
d9fad198 7652 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7653 yyerror(tmpbuf);
7654 }
5db06880
NC
7655#ifdef PERL_MAD
7656 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7657 sv_catsv(PL_thistoken, PL_nextwhite);
7658 PL_nextwhite = 0;
7659 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7660 }
7661#endif
c750a3ec 7662 }
6154021b 7663 pl_yylval.ival = 1;
55497cff 7664 OPERATOR(MY);
93a17b20 7665
79072805 7666 case KEY_next:
a0d0e21e 7667 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7668 LOOPX(OP_NEXT);
7669
7670 case KEY_ne:
78cdf107
Z
7671 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7672 return REPORT(0);
79072805
LW
7673 Eop(OP_SNE);
7674
a0d0e21e 7675 case KEY_no:
468aa647 7676 s = tokenize_use(0, s);
a0d0e21e
LW
7677 OPERATOR(USE);
7678
7679 case KEY_not:
29595ff2 7680 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7681 FUN1(OP_NOT);
78cdf107
Z
7682 else {
7683 if (!PL_lex_allbrackets &&
7684 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7685 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7686 OPERATOR(NOTOP);
78cdf107 7687 }
a0d0e21e 7688
79072805 7689 case KEY_open:
29595ff2 7690 s = SKIPSPACE1(s);
7e2040f0 7691 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7692 const char *t;
c35e046a
AL
7693 for (d = s; isALNUM_lazy_if(d,UTF);)
7694 d++;
7695 for (t=d; isSPACE(*t);)
7696 t++;
e2ab214b 7697 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7698 /* [perl #16184] */
7699 && !(t[0] == '=' && t[1] == '>')
db3abe52 7700 && !(t[0] == ':' && t[1] == ':')
240d1b6f 7701 && !keyword(s, d-s, 0)
66fbe8fb 7702 ) {
5f66b61c 7703 int parms_len = (int)(d-s);
9014280d 7704 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7705 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7706 parms_len, s, parms_len, s);
66fbe8fb 7707 }
93a17b20 7708 }
a0d0e21e 7709 LOP(OP_OPEN,XTERM);
79072805 7710
463ee0b2 7711 case KEY_or:
78cdf107
Z
7712 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7713 return REPORT(0);
6154021b 7714 pl_yylval.ival = OP_OR;
463ee0b2
LW
7715 OPERATOR(OROP);
7716
79072805
LW
7717 case KEY_ord:
7718 UNI(OP_ORD);
7719
7720 case KEY_oct:
7721 UNI(OP_OCT);
7722
7723 case KEY_opendir:
a0d0e21e 7724 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7725
7726 case KEY_print:
3280af22 7727 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7728 LOP(OP_PRINT,XREF);
79072805
LW
7729
7730 case KEY_printf:
3280af22 7731 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7732 LOP(OP_PRTF,XREF);
79072805 7733
c07a80fd 7734 case KEY_prototype:
7735 UNI(OP_PROTOTYPE);
7736
79072805 7737 case KEY_push:
a0d0e21e 7738 LOP(OP_PUSH,XTERM);
79072805
LW
7739
7740 case KEY_pop:
6f33ba73 7741 UNIDOR(OP_POP);
79072805 7742
a0d0e21e 7743 case KEY_pos:
6f33ba73 7744 UNIDOR(OP_POS);
4e553d73 7745
79072805 7746 case KEY_pack:
a0d0e21e 7747 LOP(OP_PACK,XTERM);
79072805
LW
7748
7749 case KEY_package:
a0d0e21e 7750 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7751 s = SKIPSPACE1(s);
91152fc1 7752 s = force_strict_version(s);
4e4da3ac 7753 PL_lex_expect = XBLOCK;
79072805
LW
7754 OPERATOR(PACKAGE);
7755
7756 case KEY_pipe:
a0d0e21e 7757 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7758
7759 case KEY_q:
5db06880 7760 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7761 if (!s)
d4c19fe8 7762 missingterm(NULL);
6154021b 7763 pl_yylval.ival = OP_CONST;
79072805
LW
7764 TERM(sublex_start());
7765
a0d0e21e
LW
7766 case KEY_quotemeta:
7767 UNI(OP_QUOTEMETA);
7768
ea25a9b2
Z
7769 case KEY_qw: {
7770 OP *words = NULL;
5db06880 7771 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7772 if (!s)
d4c19fe8 7773 missingterm(NULL);
3480a8d2 7774 PL_expect = XOPERATOR;
8127e0e3 7775 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
7776 int warned_comma = !ckWARN(WARN_QW);
7777 int warned_comment = warned_comma;
3280af22 7778 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7779 while (len) {
d4c19fe8
AL
7780 for (; isSPACE(*d) && len; --len, ++d)
7781 /**/;
8127e0e3 7782 if (len) {
d4c19fe8 7783 SV *sv;
f54cb97a 7784 const char *b = d;
7e03b518 7785 if (!warned_comma || !warned_comment) {
8127e0e3 7786 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 7787 if (!warned_comma && *d == ',') {
9014280d 7788 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7789 "Possible attempt to separate words with commas");
7e03b518 7790 ++warned_comma;
8127e0e3 7791 }
7e03b518 7792 else if (!warned_comment && *d == '#') {
9014280d 7793 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7794 "Possible attempt to put comments in qw() list");
7e03b518 7795 ++warned_comment;
8127e0e3
GS
7796 }
7797 }
7798 }
7799 else {
d4c19fe8
AL
7800 for (; !isSPACE(*d) && len; --len, ++d)
7801 /**/;
8127e0e3 7802 }
740cce10 7803 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7804 words = op_append_elem(OP_LIST, words,
7948272d 7805 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7806 }
7807 }
7808 }
ea25a9b2
Z
7809 if (!words)
7810 words = newNULLLIST();
37fd879b 7811 if (PL_lex_stuff) {
8127e0e3 7812 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7813 PL_lex_stuff = NULL;
37fd879b 7814 }
ea25a9b2
Z
7815 PL_expect = XOPERATOR;
7816 pl_yylval.opval = sawparens(words);
7817 TOKEN(QWLIST);
7818 }
8990e307 7819
79072805 7820 case KEY_qq:
5db06880 7821 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7822 if (!s)
d4c19fe8 7823 missingterm(NULL);
6154021b 7824 pl_yylval.ival = OP_STRINGIFY;
3280af22 7825 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7826 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7827 TERM(sublex_start());
7828
8782bef2
GB
7829 case KEY_qr:
7830 s = scan_pat(s,OP_QR);
7831 TERM(sublex_start());
7832
79072805 7833 case KEY_qx:
5db06880 7834 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7835 if (!s)
d4c19fe8 7836 missingterm(NULL);
9b201d7d 7837 readpipe_override();
79072805
LW
7838 TERM(sublex_start());
7839
7840 case KEY_return:
7841 OLDLOP(OP_RETURN);
7842
7843 case KEY_require:
29595ff2 7844 s = SKIPSPACE1(s);
e759cc13
RGS
7845 if (isDIGIT(*s)) {
7846 s = force_version(s, FALSE);
a7cb1f99 7847 }
e759cc13
RGS
7848 else if (*s != 'v' || !isDIGIT(s[1])
7849 || (s = force_version(s, TRUE), *s == 'v'))
7850 {
a7cb1f99
GS
7851 *PL_tokenbuf = '\0';
7852 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7853 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
7854 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7855 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
7856 else if (*s == '<')
7857 yyerror("<> should be quotes");
7858 }
a72a1c8b
RGS
7859 if (orig_keyword == KEY_require) {
7860 orig_keyword = 0;
6154021b 7861 pl_yylval.ival = 1;
a72a1c8b
RGS
7862 }
7863 else
6154021b 7864 pl_yylval.ival = 0;
a72a1c8b
RGS
7865 PL_expect = XTERM;
7866 PL_bufptr = s;
7867 PL_last_uni = PL_oldbufptr;
7868 PL_last_lop_op = OP_REQUIRE;
7869 s = skipspace(s);
7870 return REPORT( (int)REQUIRE );
79072805
LW
7871
7872 case KEY_reset:
7873 UNI(OP_RESET);
7874
7875 case KEY_redo:
a0d0e21e 7876 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7877 LOOPX(OP_REDO);
7878
7879 case KEY_rename:
a0d0e21e 7880 LOP(OP_RENAME,XTERM);
79072805
LW
7881
7882 case KEY_rand:
7883 UNI(OP_RAND);
7884
7885 case KEY_rmdir:
7886 UNI(OP_RMDIR);
7887
7888 case KEY_rindex:
a0d0e21e 7889 LOP(OP_RINDEX,XTERM);
79072805
LW
7890
7891 case KEY_read:
a0d0e21e 7892 LOP(OP_READ,XTERM);
79072805
LW
7893
7894 case KEY_readdir:
7895 UNI(OP_READDIR);
7896
93a17b20 7897 case KEY_readline:
6f33ba73 7898 UNIDOR(OP_READLINE);
93a17b20
LW
7899
7900 case KEY_readpipe:
0858480c 7901 UNIDOR(OP_BACKTICK);
93a17b20 7902
79072805
LW
7903 case KEY_rewinddir:
7904 UNI(OP_REWINDDIR);
7905
7906 case KEY_recv:
a0d0e21e 7907 LOP(OP_RECV,XTERM);
79072805
LW
7908
7909 case KEY_reverse:
a0d0e21e 7910 LOP(OP_REVERSE,XTERM);
79072805
LW
7911
7912 case KEY_readlink:
6f33ba73 7913 UNIDOR(OP_READLINK);
79072805
LW
7914
7915 case KEY_ref:
7916 UNI(OP_REF);
7917
7918 case KEY_s:
7919 s = scan_subst(s);
6154021b 7920 if (pl_yylval.opval)
79072805
LW
7921 TERM(sublex_start());
7922 else
7923 TOKEN(1); /* force error */
7924
0d863452
RH
7925 case KEY_say:
7926 checkcomma(s,PL_tokenbuf,"filehandle");
7927 LOP(OP_SAY,XREF);
7928
a0d0e21e
LW
7929 case KEY_chomp:
7930 UNI(OP_CHOMP);
4e553d73 7931
79072805
LW
7932 case KEY_scalar:
7933 UNI(OP_SCALAR);
7934
7935 case KEY_select:
a0d0e21e 7936 LOP(OP_SELECT,XTERM);
79072805
LW
7937
7938 case KEY_seek:
a0d0e21e 7939 LOP(OP_SEEK,XTERM);
79072805
LW
7940
7941 case KEY_semctl:
a0d0e21e 7942 LOP(OP_SEMCTL,XTERM);
79072805
LW
7943
7944 case KEY_semget:
a0d0e21e 7945 LOP(OP_SEMGET,XTERM);
79072805
LW
7946
7947 case KEY_semop:
a0d0e21e 7948 LOP(OP_SEMOP,XTERM);
79072805
LW
7949
7950 case KEY_send:
a0d0e21e 7951 LOP(OP_SEND,XTERM);
79072805
LW
7952
7953 case KEY_setpgrp:
a0d0e21e 7954 LOP(OP_SETPGRP,XTERM);
79072805
LW
7955
7956 case KEY_setpriority:
a0d0e21e 7957 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7958
7959 case KEY_sethostent:
ff68c719 7960 UNI(OP_SHOSTENT);
79072805
LW
7961
7962 case KEY_setnetent:
ff68c719 7963 UNI(OP_SNETENT);
79072805
LW
7964
7965 case KEY_setservent:
ff68c719 7966 UNI(OP_SSERVENT);
79072805
LW
7967
7968 case KEY_setprotoent:
ff68c719 7969 UNI(OP_SPROTOENT);
79072805
LW
7970
7971 case KEY_setpwent:
7972 FUN0(OP_SPWENT);
7973
7974 case KEY_setgrent:
7975 FUN0(OP_SGRENT);
7976
7977 case KEY_seekdir:
a0d0e21e 7978 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7979
7980 case KEY_setsockopt:
a0d0e21e 7981 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7982
7983 case KEY_shift:
6f33ba73 7984 UNIDOR(OP_SHIFT);
79072805
LW
7985
7986 case KEY_shmctl:
a0d0e21e 7987 LOP(OP_SHMCTL,XTERM);
79072805
LW
7988
7989 case KEY_shmget:
a0d0e21e 7990 LOP(OP_SHMGET,XTERM);
79072805
LW
7991
7992 case KEY_shmread:
a0d0e21e 7993 LOP(OP_SHMREAD,XTERM);
79072805
LW
7994
7995 case KEY_shmwrite:
a0d0e21e 7996 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7997
7998 case KEY_shutdown:
a0d0e21e 7999 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8000
8001 case KEY_sin:
8002 UNI(OP_SIN);
8003
8004 case KEY_sleep:
8005 UNI(OP_SLEEP);
8006
8007 case KEY_socket:
a0d0e21e 8008 LOP(OP_SOCKET,XTERM);
79072805
LW
8009
8010 case KEY_socketpair:
a0d0e21e 8011 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8012
8013 case KEY_sort:
3280af22 8014 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8015 s = SKIPSPACE1(s);
3280af22 8016 PL_expect = XTERM;
15f0808c 8017 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 8018 LOP(OP_SORT,XREF);
79072805
LW
8019
8020 case KEY_split:
a0d0e21e 8021 LOP(OP_SPLIT,XTERM);
79072805
LW
8022
8023 case KEY_sprintf:
a0d0e21e 8024 LOP(OP_SPRINTF,XTERM);
79072805
LW
8025
8026 case KEY_splice:
a0d0e21e 8027 LOP(OP_SPLICE,XTERM);
79072805
LW
8028
8029 case KEY_sqrt:
8030 UNI(OP_SQRT);
8031
8032 case KEY_srand:
8033 UNI(OP_SRAND);
8034
8035 case KEY_stat:
8036 UNI(OP_STAT);
8037
8038 case KEY_study:
79072805
LW
8039 UNI(OP_STUDY);
8040
8041 case KEY_substr:
a0d0e21e 8042 LOP(OP_SUBSTR,XTERM);
79072805
LW
8043
8044 case KEY_format:
8045 case KEY_sub:
93a17b20 8046 really_sub:
09bef843 8047 {
3280af22 8048 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 8049 SSize_t tboffset = 0;
09bef843 8050 expectation attrful;
28cc6278 8051 bool have_name, have_proto;
f54cb97a 8052 const int key = tmp;
09bef843 8053
5db06880
NC
8054#ifdef PERL_MAD
8055 SV *tmpwhite = 0;
8056
cd81e915 8057 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
af9f5953 8058 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
cd81e915 8059 PL_thistoken = 0;
5db06880
NC
8060
8061 d = s;
8062 s = SKIPSPACE2(s,tmpwhite);
8063#else
09bef843 8064 s = skipspace(s);
5db06880 8065#endif
09bef843 8066
7e2040f0 8067 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8068 (*s == ':' && s[1] == ':'))
8069 {
5db06880 8070#ifdef PERL_MAD
4f61fd4b 8071 SV *nametoke = NULL;
5db06880
NC
8072#endif
8073
09bef843
SB
8074 PL_expect = XBLOCK;
8075 attrful = XATTRBLOCK;
b1b65b59
JH
8076 /* remember buffer pos'n for later force_word */
8077 tboffset = s - PL_oldbufptr;
09bef843 8078 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
8079#ifdef PERL_MAD
8080 if (PL_madskills)
af9f5953 8081 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8082#endif
6502358f
NC
8083 if (memchr(tmpbuf, ':', len))
8084 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8085 else {
8086 sv_setsv(PL_subname,PL_curstname);
396482e1 8087 sv_catpvs(PL_subname,"::");
09bef843
SB
8088 sv_catpvn(PL_subname,tmpbuf,len);
8089 }
af9f5953
BF
8090 if (SvUTF8(PL_linestr))
8091 SvUTF8_on(PL_subname);
09bef843 8092 have_name = TRUE;
5db06880
NC
8093
8094#ifdef PERL_MAD
8095
8096 start_force(0);
8097 CURMAD('X', nametoke);
8098 CURMAD('_', tmpwhite);
8099 (void) force_word(PL_oldbufptr + tboffset, WORD,
8100 FALSE, TRUE, TRUE);
8101
8102 s = SKIPSPACE2(d,tmpwhite);
8103#else
8104 s = skipspace(d);
8105#endif
09bef843 8106 }
463ee0b2 8107 else {
09bef843
SB
8108 if (key == KEY_my)
8109 Perl_croak(aTHX_ "Missing name in \"my sub\"");
8110 PL_expect = XTERMBLOCK;
8111 attrful = XATTRTERM;
76f68e9b 8112 sv_setpvs(PL_subname,"?");
09bef843 8113 have_name = FALSE;
463ee0b2 8114 }
4633a7c4 8115
09bef843
SB
8116 if (key == KEY_format) {
8117 if (*s == '=')
8118 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 8119#ifdef PERL_MAD
cd81e915 8120 PL_thistoken = subtoken;
5db06880
NC
8121 s = d;
8122#else
09bef843 8123 if (have_name)
b1b65b59
JH
8124 (void) force_word(PL_oldbufptr + tboffset, WORD,
8125 FALSE, TRUE, TRUE);
5db06880 8126#endif
09bef843
SB
8127 OPERATOR(FORMAT);
8128 }
79072805 8129
09bef843
SB
8130 /* Look for a prototype */
8131 if (*s == '(') {
d9f2850e
RGS
8132 char *p;
8133 bool bad_proto = FALSE;
9e8d7757
RB
8134 bool in_brackets = FALSE;
8135 char greedy_proto = ' ';
8136 bool proto_after_greedy_proto = FALSE;
8137 bool must_be_last = FALSE;
8138 bool underscore = FALSE;
aef2a98a 8139 bool seen_underscore = FALSE;
197afce1 8140 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8141 STRLEN tmplen;
09bef843 8142
5db06880 8143 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 8144 if (!s)
09bef843 8145 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8146 /* strip spaces and check for bad characters */
dab1c735 8147 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8148 tmp = 0;
dab1c735 8149 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8150 if (!isSPACE(*p)) {
dab1c735 8151 d[tmp++] = *p;
9e8d7757 8152
197afce1 8153 if (warnillegalproto) {
9e8d7757
RB
8154 if (must_be_last)
8155 proto_after_greedy_proto = TRUE;
dab1c735 8156 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8157 bad_proto = TRUE;
8158 }
8159 else {
8160 if ( underscore ) {
34daab0f 8161 if ( !strchr(";@%", *p) )
9e8d7757
RB
8162 bad_proto = TRUE;
8163 underscore = FALSE;
8164 }
8165 if ( *p == '[' ) {
8166 in_brackets = TRUE;
8167 }
8168 else if ( *p == ']' ) {
8169 in_brackets = FALSE;
8170 }
8171 else if ( (*p == '@' || *p == '%') &&
8172 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8173 !in_brackets ) {
8174 must_be_last = TRUE;
8175 greedy_proto = *p;
8176 }
8177 else if ( *p == '_' ) {
aef2a98a 8178 underscore = seen_underscore = TRUE;
9e8d7757
RB
8179 }
8180 }
8181 }
d37a9538 8182 }
09bef843 8183 }
dab1c735 8184 d[tmp] = '\0';
9e8d7757 8185 if (proto_after_greedy_proto)
197afce1 8186 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8187 "Prototype after '%c' for %"SVf" : %s",
8188 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8189 if (bad_proto) {
8190 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8191 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8192 "Illegal character %sin prototype for %"SVf" : %s",
8193 seen_underscore ? "after '_' " : "",
dab1c735
BF
8194 SVfARG(PL_subname),
8195 sv_uni_display(dsv,
8196 newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
8197 tmp, UNI_DISPLAY_ISPRINT));
8198 }
8199 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8200 have_proto = TRUE;
68dc0745 8201
5db06880
NC
8202#ifdef PERL_MAD
8203 start_force(0);
cd81e915 8204 CURMAD('q', PL_thisopen);
5db06880 8205 CURMAD('_', tmpwhite);
cd81e915
NC
8206 CURMAD('=', PL_thisstuff);
8207 CURMAD('Q', PL_thisclose);
5db06880
NC
8208 NEXTVAL_NEXTTOKE.opval =
8209 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8210 PL_lex_stuff = NULL;
5db06880
NC
8211 force_next(THING);
8212
8213 s = SKIPSPACE2(s,tmpwhite);
8214#else
09bef843 8215 s = skipspace(s);
5db06880 8216#endif
4633a7c4 8217 }
09bef843
SB
8218 else
8219 have_proto = FALSE;
8220
8221 if (*s == ':' && s[1] != ':')
8222 PL_expect = attrful;
8e742a20
MHM
8223 else if (*s != '{' && key == KEY_sub) {
8224 if (!have_name)
8225 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8226 else if (*s != ';' && *s != '}')
be2597df 8227 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8228 }
09bef843 8229
5db06880
NC
8230#ifdef PERL_MAD
8231 start_force(0);
8232 if (tmpwhite) {
8233 if (PL_madskills)
6b29d1f5 8234 curmad('^', newSVpvs(""));
5db06880
NC
8235 CURMAD('_', tmpwhite);
8236 }
8237 force_next(0);
8238
cd81e915 8239 PL_thistoken = subtoken;
5db06880 8240#else
09bef843 8241 if (have_proto) {
9ded7720 8242 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8243 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8244 PL_lex_stuff = NULL;
09bef843 8245 force_next(THING);
68dc0745 8246 }
5db06880 8247#endif
09bef843 8248 if (!have_name) {
49a54bbe
NC
8249 if (PL_curstash)
8250 sv_setpvs(PL_subname, "__ANON__");
8251 else
8252 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8253 TOKEN(ANONSUB);
4633a7c4 8254 }
5db06880 8255#ifndef PERL_MAD
b1b65b59
JH
8256 (void) force_word(PL_oldbufptr + tboffset, WORD,
8257 FALSE, TRUE, TRUE);
5db06880 8258#endif
09bef843
SB
8259 if (key == KEY_my)
8260 TOKEN(MYSUB);
8261 TOKEN(SUB);
4633a7c4 8262 }
79072805
LW
8263
8264 case KEY_system:
a0d0e21e 8265 LOP(OP_SYSTEM,XREF);
79072805
LW
8266
8267 case KEY_symlink:
a0d0e21e 8268 LOP(OP_SYMLINK,XTERM);
79072805
LW
8269
8270 case KEY_syscall:
a0d0e21e 8271 LOP(OP_SYSCALL,XTERM);
79072805 8272
c07a80fd 8273 case KEY_sysopen:
8274 LOP(OP_SYSOPEN,XTERM);
8275
137443ea 8276 case KEY_sysseek:
8277 LOP(OP_SYSSEEK,XTERM);
8278
79072805 8279 case KEY_sysread:
a0d0e21e 8280 LOP(OP_SYSREAD,XTERM);
79072805
LW
8281
8282 case KEY_syswrite:
a0d0e21e 8283 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8284
8285 case KEY_tr:
8286 s = scan_trans(s);
8287 TERM(sublex_start());
8288
8289 case KEY_tell:
8290 UNI(OP_TELL);
8291
8292 case KEY_telldir:
8293 UNI(OP_TELLDIR);
8294
463ee0b2 8295 case KEY_tie:
a0d0e21e 8296 LOP(OP_TIE,XTERM);
463ee0b2 8297
c07a80fd 8298 case KEY_tied:
8299 UNI(OP_TIED);
8300
79072805
LW
8301 case KEY_time:
8302 FUN0(OP_TIME);
8303
8304 case KEY_times:
8305 FUN0(OP_TMS);
8306
8307 case KEY_truncate:
a0d0e21e 8308 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8309
8310 case KEY_uc:
8311 UNI(OP_UC);
8312
8313 case KEY_ucfirst:
8314 UNI(OP_UCFIRST);
8315
463ee0b2
LW
8316 case KEY_untie:
8317 UNI(OP_UNTIE);
8318
79072805 8319 case KEY_until:
78cdf107
Z
8320 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8321 return REPORT(0);
6154021b 8322 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8323 OPERATOR(UNTIL);
8324
8325 case KEY_unless:
78cdf107
Z
8326 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8327 return REPORT(0);
6154021b 8328 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8329 OPERATOR(UNLESS);
8330
8331 case KEY_unlink:
a0d0e21e 8332 LOP(OP_UNLINK,XTERM);
79072805
LW
8333
8334 case KEY_undef:
6f33ba73 8335 UNIDOR(OP_UNDEF);
79072805
LW
8336
8337 case KEY_unpack:
a0d0e21e 8338 LOP(OP_UNPACK,XTERM);
79072805
LW
8339
8340 case KEY_utime:
a0d0e21e 8341 LOP(OP_UTIME,XTERM);
79072805
LW
8342
8343 case KEY_umask:
6f33ba73 8344 UNIDOR(OP_UMASK);
79072805
LW
8345
8346 case KEY_unshift:
a0d0e21e
LW
8347 LOP(OP_UNSHIFT,XTERM);
8348
8349 case KEY_use:
468aa647 8350 s = tokenize_use(1, s);
a0d0e21e 8351 OPERATOR(USE);
79072805
LW
8352
8353 case KEY_values:
8354 UNI(OP_VALUES);
8355
8356 case KEY_vec:
a0d0e21e 8357 LOP(OP_VEC,XTERM);
79072805 8358
0d863452 8359 case KEY_when:
78cdf107
Z
8360 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8361 return REPORT(0);
6154021b 8362 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8363 OPERATOR(WHEN);
8364
79072805 8365 case KEY_while:
78cdf107
Z
8366 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8367 return REPORT(0);
6154021b 8368 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8369 OPERATOR(WHILE);
8370
8371 case KEY_warn:
3280af22 8372 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8373 LOP(OP_WARN,XTERM);
79072805
LW
8374
8375 case KEY_wait:
8376 FUN0(OP_WAIT);
8377
8378 case KEY_waitpid:
a0d0e21e 8379 LOP(OP_WAITPID,XTERM);
79072805
LW
8380
8381 case KEY_wantarray:
8382 FUN0(OP_WANTARRAY);
8383
8384 case KEY_write:
9d116dd7
JH
8385#ifdef EBCDIC
8386 {
df3728a2
JH
8387 char ctl_l[2];
8388 ctl_l[0] = toCTRL('L');
8389 ctl_l[1] = '\0';
fafc274c 8390 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8391 }
8392#else
fafc274c
NC
8393 /* Make sure $^L is defined */
8394 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8395#endif
79072805
LW
8396 UNI(OP_ENTERWRITE);
8397
8398 case KEY_x:
78cdf107
Z
8399 if (PL_expect == XOPERATOR) {
8400 if (*s == '=' && !PL_lex_allbrackets &&
8401 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8402 return REPORT(0);
79072805 8403 Mop(OP_REPEAT);
78cdf107 8404 }
79072805
LW
8405 check_uni();
8406 goto just_a_word;
8407
a0d0e21e 8408 case KEY_xor:
78cdf107
Z
8409 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8410 return REPORT(0);
6154021b 8411 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8412 OPERATOR(OROP);
8413
79072805
LW
8414 case KEY_y:
8415 s = scan_trans(s);
8416 TERM(sublex_start());
8417 }
49dc05e3 8418 }}
79072805 8419}
bf4acbe4
GS
8420#ifdef __SC__
8421#pragma segment Main
8422#endif
79072805 8423
e930465f
JH
8424static int
8425S_pending_ident(pTHX)
8eceec63 8426{
97aff369 8427 dVAR;
8eceec63 8428 register char *d;
bbd11bfc 8429 PADOFFSET tmp = 0;
8eceec63
SC
8430 /* pit holds the identifier we read and pending_ident is reset */
8431 char pit = PL_pending_ident;
9bde8eb0
NC
8432 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8433 /* All routes through this function want to know if there is a colon. */
c099d646 8434 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8435 PL_pending_ident = 0;
8436
cd81e915 8437 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8438 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8439 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8440
8441 /* if we're in a my(), we can't allow dynamics here.
8442 $foo'bar has already been turned into $foo::bar, so
8443 just check for colons.
8444
8445 if it's a legal name, the OP is a PADANY.
8446 */
8447 if (PL_in_my) {
8448 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8449 if (has_colon)
8eceec63
SC
8450 yyerror(Perl_form(aTHX_ "No package name allowed for "
8451 "variable %s in \"our\"",
8452 PL_tokenbuf));
bc9b26ca 8453 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8454 }
8455 else {
9bde8eb0 8456 if (has_colon)
952306ac
RGS
8457 yyerror(Perl_form(aTHX_ PL_no_myglob,
8458 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8459
6154021b 8460 pl_yylval.opval = newOP(OP_PADANY, 0);
bc9b26ca
BF
8461 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8462 UTF ? SVf_UTF8 : 0);
8eceec63
SC
8463 return PRIVATEREF;
8464 }
8465 }
8466
8467 /*
8468 build the ops for accesses to a my() variable.
8469
8470 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8471 then used in a comparison. This catches most, but not
8472 all cases. For instance, it catches
8473 sort { my($a); $a <=> $b }
8474 but not
8475 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8476 (although why you'd do that is anyone's guess).
8477 */
8478
9bde8eb0 8479 if (!has_colon) {
8716503d 8480 if (!PL_in_my)
bc9b26ca
BF
8481 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8482 UTF ? SVf_UTF8 : 0);
8716503d 8483 if (tmp != NOT_IN_PAD) {
8eceec63 8484 /* might be an "our" variable" */
00b1698f 8485 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8486 /* build ops for a bareword */
b64e5050
AL
8487 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8488 HEK * const stashname = HvNAME_HEK(stash);
8489 SV * const sym = newSVhek(stashname);
396482e1 8490 sv_catpvs(sym, "::");
2a33114a 8491 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
6154021b
RGS
8492 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8493 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8494 gv_fetchsv(sym,
8eceec63
SC
8495 (PL_in_eval
8496 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8497 : GV_ADDMULTI
8eceec63
SC
8498 ),
8499 ((PL_tokenbuf[0] == '$') ? SVt_PV
8500 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8501 : SVt_PVHV));
8502 return WORD;
8503 }
8504
8505 /* if it's a sort block and they're naming $a or $b */
8506 if (PL_last_lop_op == OP_SORT &&
8507 PL_tokenbuf[0] == '$' &&
8508 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8509 && !PL_tokenbuf[2])
8510 {
8511 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8512 d < PL_bufend && *d != '\n';
8513 d++)
8514 {
8515 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8516 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8517 PL_tokenbuf);
8518 }
8519 }
8520 }
8521
6154021b
RGS
8522 pl_yylval.opval = newOP(OP_PADANY, 0);
8523 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8524 return PRIVATEREF;
8525 }
8526 }
8527
8528 /*
8529 Whine if they've said @foo in a doublequoted string,
8530 and @foo isn't a variable we can find in the symbol
8531 table.
8532 */
d824713b
NC
8533 if (ckWARN(WARN_AMBIGUOUS) &&
8534 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8535 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8536 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8537 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8538 /* DO NOT warn for @- and @+ */
8539 && !( PL_tokenbuf[2] == '\0' &&
8540 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8541 )
8eceec63
SC
8542 {
8543 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8544 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8545 "Possible unintended interpolation of %s in string",
8546 PL_tokenbuf);
8eceec63
SC
8547 }
8548 }
8549
8550 /* build ops for a bareword */
0be4d16f
BF
8551 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8552 tokenbuf_len - 1,
8553 UTF ? SVf_UTF8 : 0 ));
6154021b 8554 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7 8555 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8556 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8557 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8558 ((PL_tokenbuf[0] == '$') ? SVt_PV
8559 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8560 : SVt_PVHV));
8eceec63
SC
8561 return WORD;
8562}
8563
76e3520e 8564STATIC void
c94115d8 8565S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8566{
97aff369 8567 dVAR;
2f3197b3 8568
7918f24d
NC
8569 PERL_ARGS_ASSERT_CHECKCOMMA;
8570
d008e5eb 8571 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8572 if (ckWARN(WARN_SYNTAX)) {
8573 int level = 1;
26ff0806 8574 const char *w;
d008e5eb
GS
8575 for (w = s+2; *w && level; w++) {
8576 if (*w == '(')
8577 ++level;
8578 else if (*w == ')')
8579 --level;
8580 }
888fea98
NC
8581 while (isSPACE(*w))
8582 ++w;
b1439985
RGS
8583 /* the list of chars below is for end of statements or
8584 * block / parens, boolean operators (&&, ||, //) and branch
8585 * constructs (or, and, if, until, unless, while, err, for).
8586 * Not a very solid hack... */
8587 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8588 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8589 "%s (...) interpreted as function",name);
d008e5eb 8590 }
2f3197b3 8591 }
3280af22 8592 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8593 s++;
a687059c
LW
8594 if (*s == '(')
8595 s++;
3280af22 8596 while (s < PL_bufend && isSPACE(*s))
a687059c 8597 s++;
7e2040f0 8598 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8599 const char * const w = s++;
7e2040f0 8600 while (isALNUM_lazy_if(s,UTF))
a687059c 8601 s++;
3280af22 8602 while (s < PL_bufend && isSPACE(*s))
a687059c 8603 s++;
e929a76b 8604 if (*s == ',') {
c94115d8 8605 GV* gv;
5458a98a 8606 if (keyword(w, s - w, 0))
e929a76b 8607 return;
c94115d8 8608
2e38bce1 8609 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 8610 if (gv && GvCVu(gv))
abbb3198 8611 return;
cea2e8a9 8612 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8613 }
8614 }
8615}
8616
423cee85
JH
8617/* Either returns sv, or mortalizes sv and returns a new SV*.
8618 Best used as sv=new_constant(..., sv, ...).
8619 If s, pv are NULL, calls subroutine with one argument,
8620 and type is used with error messages only. */
8621
b3ac6de7 8622STATIC SV *
eb0d8d16
NC
8623S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8624 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8625{
27da23d5 8626 dVAR; dSP;
fbb93542 8627 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8628 SV *res;
b3ac6de7
IZ
8629 SV **cvp;
8630 SV *cv, *typesv;
89e33a05 8631 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8632
7918f24d
NC
8633 PERL_ARGS_ASSERT_NEW_CONSTANT;
8634
f8988b41
KW
8635 /* charnames doesn't work well if there have been errors found */
8636 if (PL_error_count > 0 && strEQ(key,"charnames"))
8637 return &PL_sv_undef;
8638
fbb93542
KW
8639 if (!table
8640 || ! (PL_hints & HINT_LOCALIZE_HH)
8641 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8642 || ! SvOK(*cvp))
8643 {
423cee85
JH
8644 SV *msg;
8645
fbb93542
KW
8646 /* Here haven't found what we're looking for. If it is charnames,
8647 * perhaps it needs to be loaded. Try doing that before giving up */
8648 if (strEQ(key,"charnames")) {
8649 Perl_load_module(aTHX_
8650 0,
8651 newSVpvs("_charnames"),
8652 /* version parameter; no need to specify it, as if
8653 * we get too early a version, will fail anyway,
8654 * not being able to find '_charnames' */
8655 NULL,
8656 newSVpvs(":full"),
8657 newSVpvs(":short"),
8658 NULL);
8659 SPAGAIN;
8660 table = GvHV(PL_hintgv);
8661 if (table
8662 && (PL_hints & HINT_LOCALIZE_HH)
8663 && (cvp = hv_fetch(table, key, keylen, FALSE))
8664 && SvOK(*cvp))
8665 {
8666 goto now_ok;
8667 }
8668 }
8669 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8670 msg = Perl_newSVpvf(aTHX_
8671 "Constant(%s) unknown", (type ? type: "undef"));
8672 }
8673 else {
8674 why1 = "$^H{";
8675 why2 = key;
8676 why3 = "} is not defined";
423cee85 8677 report:
4e553d73 8678 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8679 (type ? type: "undef"), why1, why2, why3);
fbb93542 8680 }
95a20fc0 8681 yyerror(SvPVX_const(msg));
423cee85
JH
8682 SvREFCNT_dec(msg);
8683 return sv;
8684 }
fbb93542 8685now_ok:
b3ac6de7
IZ
8686 sv_2mortal(sv); /* Parent created it permanently */
8687 cv = *cvp;
423cee85 8688 if (!pv && s)
59cd0e26 8689 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8690 if (type && pv)
59cd0e26 8691 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8692 else
423cee85 8693 typesv = &PL_sv_undef;
4e553d73 8694
e788e7d3 8695 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8696 ENTER ;
8697 SAVETMPS;
4e553d73 8698
423cee85 8699 PUSHMARK(SP) ;
a5845cb7 8700 EXTEND(sp, 3);
423cee85
JH
8701 if (pv)
8702 PUSHs(pv);
b3ac6de7 8703 PUSHs(sv);
423cee85
JH
8704 if (pv)
8705 PUSHs(typesv);
b3ac6de7 8706 PUTBACK;
423cee85 8707 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8708
423cee85 8709 SPAGAIN ;
4e553d73 8710
423cee85 8711 /* Check the eval first */
9b0e499b 8712 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8713 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8714 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8715 (void)POPs;
b37c2d43 8716 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8717 }
8718 else {
8719 res = POPs;
b37c2d43 8720 SvREFCNT_inc_simple_void(res);
423cee85 8721 }
4e553d73 8722
423cee85
JH
8723 PUTBACK ;
8724 FREETMPS ;
8725 LEAVE ;
b3ac6de7 8726 POPSTACK;
4e553d73 8727
b3ac6de7 8728 if (!SvOK(res)) {
423cee85
JH
8729 why1 = "Call to &{$^H{";
8730 why2 = key;
f0af216f 8731 why3 = "}} did not return a defined value";
423cee85
JH
8732 sv = res;
8733 goto report;
9b0e499b 8734 }
423cee85 8735
9b0e499b 8736 return res;
b3ac6de7 8737}
4e553d73 8738
d0a148a6
NC
8739/* Returns a NUL terminated string, with the length of the string written to
8740 *slp
8741 */
76e3520e 8742STATIC char *
cea2e8a9 8743S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8744{
97aff369 8745 dVAR;
463ee0b2 8746 register char *d = dest;
890ce7af 8747 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8748
8749 PERL_ARGS_ASSERT_SCAN_WORD;
8750
463ee0b2 8751 for (;;) {
8903cb82 8752 if (d >= e)
cea2e8a9 8753 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8754 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8755 *d++ = *s++;
c35e046a 8756 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8757 *d++ = ':';
8758 *d++ = ':';
8759 s++;
8760 }
c35e046a 8761 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8762 *d++ = *s++;
8763 *d++ = *s++;
8764 }
fd400ab9 8765 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8766 char *t = s + UTF8SKIP(s);
c35e046a 8767 size_t len;
fd400ab9 8768 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8769 t += UTF8SKIP(t);
c35e046a
AL
8770 len = t - s;
8771 if (d + len > e)
cea2e8a9 8772 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8773 Copy(s, d, len, char);
8774 d += len;
a0ed51b3
LW
8775 s = t;
8776 }
463ee0b2
LW
8777 else {
8778 *d = '\0';
8779 *slp = d - dest;
8780 return s;
e929a76b 8781 }
378cc40b
LW
8782 }
8783}
8784
76e3520e 8785STATIC char *
f54cb97a 8786S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8787{
97aff369 8788 dVAR;
6136c704 8789 char *bracket = NULL;
748a9306 8790 char funny = *s++;
6136c704 8791 register char *d = dest;
0b3da58d 8792 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8793
7918f24d
NC
8794 PERL_ARGS_ASSERT_SCAN_IDENT;
8795
a0d0e21e 8796 if (isSPACE(*s))
29595ff2 8797 s = PEEKSPACE(s);
de3bb511 8798 if (isDIGIT(*s)) {
8903cb82 8799 while (isDIGIT(*s)) {
8800 if (d >= e)
cea2e8a9 8801 Perl_croak(aTHX_ ident_too_long);
378cc40b 8802 *d++ = *s++;
8903cb82 8803 }
378cc40b
LW
8804 }
8805 else {
463ee0b2 8806 for (;;) {
8903cb82 8807 if (d >= e)
cea2e8a9 8808 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8809 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8810 *d++ = *s++;
7e2040f0 8811 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8812 *d++ = ':';
8813 *d++ = ':';
8814 s++;
8815 }
a0d0e21e 8816 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8817 *d++ = *s++;
8818 *d++ = *s++;
8819 }
fd400ab9 8820 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8821 char *t = s + UTF8SKIP(s);
fd400ab9 8822 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8823 t += UTF8SKIP(t);
8824 if (d + (t - s) > e)
cea2e8a9 8825 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8826 Copy(s, d, t - s, char);
8827 d += t - s;
8828 s = t;
8829 }
463ee0b2
LW
8830 else
8831 break;
8832 }
378cc40b
LW
8833 }
8834 *d = '\0';
8835 d = dest;
79072805 8836 if (*d) {
3280af22
NIS
8837 if (PL_lex_state != LEX_NORMAL)
8838 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8839 return s;
378cc40b 8840 }
748a9306 8841 if (*s == '$' && s[1] &&
3792a11b 8842 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8843 {
4810e5ec 8844 return s;
5cd24f17 8845 }
79072805
LW
8846 if (*s == '{') {
8847 bracket = s;
8848 s++;
8849 }
8850 else if (ck_uni)
8851 check_uni();
204e6232
BF
8852 if (s < send) {
8853 if (UTF) {
8854 const STRLEN skip = UTF8SKIP(s);
8855 STRLEN i;
8856 d[skip] = '\0';
8857 for ( i = 0; i < skip; i++ )
8858 d[i] = *s++;
8859 }
8860 else {
8861 *d = *s++;
8862 d[1] = '\0';
8863 }
8864 }
2b92dfce 8865 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8866 *d = toCTRL(*s);
8867 s++;
de3bb511 8868 }
79072805 8869 if (bracket) {
748a9306 8870 if (isSPACE(s[-1])) {
fa83b5b6 8871 while (s < send) {
f54cb97a 8872 const char ch = *s++;
bf4acbe4 8873 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8874 *d = ch;
8875 break;
8876 }
8877 }
748a9306 8878 }
7e2040f0 8879 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 8880 d += UTF8SKIP(d);
a0ed51b3 8881 if (UTF) {
6136c704
AL
8882 char *end = s;
8883 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8884 end += UTF8SKIP(end);
8885 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8886 end += UTF8SKIP(end);
a0ed51b3 8887 }
6136c704
AL
8888 Copy(s, d, end - s, char);
8889 d += end - s;
8890 s = end;
a0ed51b3
LW
8891 }
8892 else {
2b92dfce 8893 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8894 *d++ = *s++;
2b92dfce 8895 if (d >= e)
cea2e8a9 8896 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8897 }
79072805 8898 *d = '\0';
c35e046a
AL
8899 while (s < send && SPACE_OR_TAB(*s))
8900 s++;
ff68c719 8901 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8902 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8903 const char * const brack =
8904 (const char *)
8905 ((*s == '[') ? "[...]" : "{...}");
e850844c 8906 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8907 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8908 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8909 funny, dest, brack, funny, dest, brack);
8910 }
79072805 8911 bracket++;
a0be28da 8912 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8913 PL_lex_allbrackets++;
79072805
LW
8914 return s;
8915 }
4e553d73
NIS
8916 }
8917 /* Handle extended ${^Foo} variables
2b92dfce
GS
8918 * 1999-02-27 mjd-perl-patch@plover.com */
8919 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8920 && isALNUM(*s))
8921 {
8922 d++;
8923 while (isALNUM(*s) && d < e) {
8924 *d++ = *s++;
8925 }
8926 if (d >= e)
cea2e8a9 8927 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8928 *d = '\0';
79072805
LW
8929 }
8930 if (*s == '}') {
8931 s++;
7df0d042 8932 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8933 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8934 PL_expect = XREF;
8935 }
d008e5eb 8936 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8937 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8938 (keyword(dest, d - dest, 0)
8939 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8940 {
c35e046a
AL
8941 if (funny == '#')
8942 funny = '@';
9014280d 8943 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8944 "Ambiguous use of %c{%s} resolved to %c%s",
8945 funny, dest, funny, dest);
8946 }
8947 }
79072805
LW
8948 }
8949 else {
8950 s = bracket; /* let the parser handle it */
93a17b20 8951 *dest = '\0';
79072805
LW
8952 }
8953 }
3280af22
NIS
8954 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8955 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8956 return s;
8957}
8958
858a358b 8959static bool
3955e1a9 8960S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
8961
8962 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8963 * the parse starting at 's', based on the subset that are valid in this
8964 * context input to this routine in 'valid_flags'. Advances s. Returns
8965 * TRUE if the input was a valid flag, so the next char may be as well;
3955e1a9
KW
8966 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8967 * current regex. This routine will set it to any charset modifier found.
8968 * The caller shouldn't change it. This way, another charset modifier
8969 * encountered in the parse can be detected as an error, as we have decided
8970 * allow only one */
858a358b
KW
8971
8972 const char c = **s;
94b03d7d 8973
858a358b
KW
8974 if (! strchr(valid_flags, c)) {
8975 if (isALNUM(c)) {
94b03d7d 8976 goto deprecate;
858a358b
KW
8977 }
8978 return FALSE;
8979 }
8980
8981 switch (c) {
94b03d7d 8982
858a358b
KW
8983 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8984 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8985 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8986 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8987 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8988 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d
KW
8989 case LOCALE_PAT_MOD:
8990
8991 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8992 * can't be regex modifiers.
8993 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8994 * much as easily done. s///lei, for example, has to mean regex
8995 * modifiers if it's not an error (as does any word character
8996 * following the 'e'). Otherwise, we resolve to the backwards-
8997 * compatible, but less likely 's/// le ...', i.e. as meaning
8998 * less-than-or-equal. The reason it's not likely is that s//
967ac236
KW
8999 * returns a number for code in the field (/r returns a string, but
9000 * that wasn't added until the 5.13 series), and so '<=' should be
9001 * used for comparing, not 'le'. */
94b03d7d
KW
9002 if (*((*s) + 1) == 't') {
9003 goto deprecate;
9004 }
563734a5 9005 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
aeac89a5
KW
9006
9007 /* 'e' is valid only for substitutes, s///e. If it is not
9008 * valid in the current context, then 'm//le' must mean the
9009 * comparison operator, so use the regular deprecation message.
9010 */
9011 if (! strchr(valid_flags, 'e')) {
9012 goto deprecate;
9013 }
94b03d7d 9014 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
79ef86ee 9015 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.18, it will be resolved the other way");
94b03d7d
KW
9016 return FALSE;
9017 }
3955e1a9
KW
9018 if (*charset) {
9019 goto multiple_charsets;
9020 }
94b03d7d 9021 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9022 *charset = c;
94b03d7d
KW
9023 break;
9024 case UNICODE_PAT_MOD:
9025 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
9026 * 'n' means they can't be regex modifiers */
9027 if (*((*s) + 1) == 'n') {
9028 goto deprecate;
9029 }
3955e1a9
KW
9030 if (*charset) {
9031 goto multiple_charsets;
9032 }
94b03d7d 9033 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9034 *charset = c;
94b03d7d
KW
9035 break;
9036 case ASCII_RESTRICT_PAT_MOD:
9037 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
9038 * can't be regex modifiers */
9039 if (*((*s) + 1) == 'n') {
9040 goto deprecate;
9041 }
ff3f26d2
KW
9042
9043 if (! *charset) {
94b03d7d
KW
9044 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9045 }
ff3f26d2
KW
9046 else {
9047
9048 /* Error if previous modifier wasn't an 'a', but if it was, see
9049 * if, and accept, a second occurrence (only) */
9050 if (*charset != 'a'
9051 || get_regex_charset(*pmfl)
9052 != REGEX_ASCII_RESTRICTED_CHARSET)
9053 {
9054 goto multiple_charsets;
9055 }
9056 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9057 }
9058 *charset = c;
94b03d7d
KW
9059 break;
9060 case DEPENDS_PAT_MOD:
3955e1a9
KW
9061 if (*charset) {
9062 goto multiple_charsets;
9063 }
94b03d7d 9064 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9065 *charset = c;
94b03d7d 9066 break;
879d0c72 9067 }
94b03d7d 9068
858a358b
KW
9069 (*s)++;
9070 return TRUE;
94b03d7d
KW
9071
9072 deprecate:
9073 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
9074 "Having no space between pattern and following word is deprecated");
9075 return FALSE;
3955e1a9
KW
9076
9077 multiple_charsets:
9078 if (*charset != c) {
9079 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9080 }
ff3f26d2
KW
9081 else if (c == 'a') {
9082 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9083 }
3955e1a9
KW
9084 else {
9085 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9086 }
9087
9088 /* Pretend that it worked, so will continue processing before dieing */
9089 (*s)++;
9090 return TRUE;
879d0c72
NC
9091}
9092
76e3520e 9093STATIC char *
cea2e8a9 9094S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9095{
97aff369 9096 dVAR;
79072805 9097 PMOP *pm;
5db06880 9098 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 9099 const char * const valid_flags =
a20207d7 9100 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9101 char charset = '\0'; /* character set modifier */
5db06880
NC
9102#ifdef PERL_MAD
9103 char *modstart;
9104#endif
9105
7918f24d 9106 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9107
25c09cbf 9108 if (!s) {
6136c704 9109 const char * const delimiter = skipspace(start);
10edeb5d
JH
9110 Perl_croak(aTHX_
9111 (const char *)
9112 (*delimiter == '?'
9113 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9114 : "Search pattern not terminated" ));
25c09cbf 9115 }
bbce6d69 9116
8782bef2 9117 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9118 if (PL_multi_open == '?') {
9119 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9120 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9121
9122 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9123 allows us to restrict the list needed by reset to just the ??
9124 matches. */
9125 assert(type != OP_TRANS);
9126 if (PL_curstash) {
daba3364 9127 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9128 U32 elements;
9129 if (!mg) {
daba3364 9130 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9131 0);
9132 }
9133 elements = mg->mg_len / sizeof(PMOP**);
9134 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9135 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9136 mg->mg_len = elements * sizeof(PMOP**);
9137 PmopSTASH_set(pm,PL_curstash);
9138 }
9139 }
5db06880
NC
9140#ifdef PERL_MAD
9141 modstart = s;
9142#endif
3955e1a9 9143 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9144#ifdef PERL_MAD
9145 if (PL_madskills && modstart != s) {
9146 SV* tmptoken = newSVpvn(modstart, s - modstart);
9147 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9148 }
9149#endif
4ac733c9 9150 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9151 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9152 {
a2a5de95
NC
9153 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9154 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9155 }
9156
3280af22 9157 PL_lex_op = (OP*)pm;
6154021b 9158 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9159 return s;
9160}
9161
76e3520e 9162STATIC char *
cea2e8a9 9163S_scan_subst(pTHX_ char *start)
79072805 9164{
27da23d5 9165 dVAR;
22594288 9166 char *s;
79072805 9167 register PMOP *pm;
4fdae800 9168 I32 first_start;
79072805 9169 I32 es = 0;
3955e1a9 9170 char charset = '\0'; /* character set modifier */
5db06880
NC
9171#ifdef PERL_MAD
9172 char *modstart;
9173#endif
79072805 9174
7918f24d
NC
9175 PERL_ARGS_ASSERT_SCAN_SUBST;
9176
6154021b 9177 pl_yylval.ival = OP_NULL;
79072805 9178
5db06880 9179 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9180
37fd879b 9181 if (!s)
cea2e8a9 9182 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9183
3280af22 9184 if (s[-1] == PL_multi_open)
79072805 9185 s--;
5db06880
NC
9186#ifdef PERL_MAD
9187 if (PL_madskills) {
cd81e915
NC
9188 CURMAD('q', PL_thisopen);
9189 CURMAD('_', PL_thiswhite);
9190 CURMAD('E', PL_thisstuff);
9191 CURMAD('Q', PL_thisclose);
9192 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9193 }
9194#endif
79072805 9195
3280af22 9196 first_start = PL_multi_start;
5db06880 9197 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9198 if (!s) {
37fd879b 9199 if (PL_lex_stuff) {
3280af22 9200 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9201 PL_lex_stuff = NULL;
37fd879b 9202 }
cea2e8a9 9203 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9204 }
3280af22 9205 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9206
79072805 9207 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9208
9209#ifdef PERL_MAD
9210 if (PL_madskills) {
cd81e915
NC
9211 CURMAD('z', PL_thisopen);
9212 CURMAD('R', PL_thisstuff);
9213 CURMAD('Z', PL_thisclose);
5db06880
NC
9214 }
9215 modstart = s;
9216#endif
9217
48c036b1 9218 while (*s) {
a20207d7 9219 if (*s == EXEC_PAT_MOD) {
a687059c 9220 s++;
2f3197b3 9221 es++;
a687059c 9222 }
3955e1a9
KW
9223 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9224 {
48c036b1 9225 break;
aa78b661 9226 }
378cc40b 9227 }
79072805 9228
5db06880
NC
9229#ifdef PERL_MAD
9230 if (PL_madskills) {
9231 if (modstart != s)
9232 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9233 append_madprops(PL_thismad, (OP*)pm, 0);
9234 PL_thismad = 0;
5db06880
NC
9235 }
9236#endif
a2a5de95
NC
9237 if ((pm->op_pmflags & PMf_CONTINUE)) {
9238 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9239 }
9240
79072805 9241 if (es) {
6136c704
AL
9242 SV * const repl = newSVpvs("");
9243
0244c3a4
GS
9244 PL_sublex_info.super_bufptr = s;
9245 PL_sublex_info.super_bufend = PL_bufend;
9246 PL_multi_end = 0;
79072805 9247 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9248 while (es-- > 0) {
9249 if (es)
9250 sv_catpvs(repl, "eval ");
9251 else
9252 sv_catpvs(repl, "do ");
9253 }
6f43d98f 9254 sv_catpvs(repl, "{");
3280af22 9255 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9256 if (strchr(SvPVX(PL_lex_repl), '#'))
9257 sv_catpvs(repl, "\n");
9258 sv_catpvs(repl, "}");
25da4f38 9259 SvEVALED_on(repl);
3280af22
NIS
9260 SvREFCNT_dec(PL_lex_repl);
9261 PL_lex_repl = repl;
378cc40b 9262 }
79072805 9263
3280af22 9264 PL_lex_op = (OP*)pm;
6154021b 9265 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9266 return s;
9267}
9268
76e3520e 9269STATIC char *
cea2e8a9 9270S_scan_trans(pTHX_ char *start)
378cc40b 9271{
97aff369 9272 dVAR;
a0d0e21e 9273 register char* s;
11343788 9274 OP *o;
79072805 9275 short *tbl;
b84c11c8
NC
9276 U8 squash;
9277 U8 del;
9278 U8 complement;
bb16bae8 9279 bool nondestruct = 0;
5db06880
NC
9280#ifdef PERL_MAD
9281 char *modstart;
9282#endif
79072805 9283
7918f24d
NC
9284 PERL_ARGS_ASSERT_SCAN_TRANS;
9285
6154021b 9286 pl_yylval.ival = OP_NULL;
79072805 9287
5db06880 9288 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 9289 if (!s)
cea2e8a9 9290 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9291
3280af22 9292 if (s[-1] == PL_multi_open)
2f3197b3 9293 s--;
5db06880
NC
9294#ifdef PERL_MAD
9295 if (PL_madskills) {
cd81e915
NC
9296 CURMAD('q', PL_thisopen);
9297 CURMAD('_', PL_thiswhite);
9298 CURMAD('E', PL_thisstuff);
9299 CURMAD('Q', PL_thisclose);
9300 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9301 }
9302#endif
2f3197b3 9303
5db06880 9304 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9305 if (!s) {
37fd879b 9306 if (PL_lex_stuff) {
3280af22 9307 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9308 PL_lex_stuff = NULL;
37fd879b 9309 }
cea2e8a9 9310 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9311 }
5db06880 9312 if (PL_madskills) {
cd81e915
NC
9313 CURMAD('z', PL_thisopen);
9314 CURMAD('R', PL_thisstuff);
9315 CURMAD('Z', PL_thisclose);
5db06880 9316 }
79072805 9317
a0ed51b3 9318 complement = del = squash = 0;
5db06880
NC
9319#ifdef PERL_MAD
9320 modstart = s;
9321#endif
7a1e2023
NC
9322 while (1) {
9323 switch (*s) {
9324 case 'c':
79072805 9325 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9326 break;
9327 case 'd':
a0ed51b3 9328 del = OPpTRANS_DELETE;
7a1e2023
NC
9329 break;
9330 case 's':
79072805 9331 squash = OPpTRANS_SQUASH;
7a1e2023 9332 break;
bb16bae8
FC
9333 case 'r':
9334 nondestruct = 1;
9335 break;
7a1e2023
NC
9336 default:
9337 goto no_more;
9338 }
395c3793
LW
9339 s++;
9340 }
7a1e2023 9341 no_more:
8973db79 9342
aa1f7c5b 9343 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9344 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9345 o->op_private &= ~OPpTRANS_ALL;
9346 o->op_private |= del|squash|complement|
7948272d
NIS
9347 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9348 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9349
3280af22 9350 PL_lex_op = o;
bb16bae8 9351 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9352
9353#ifdef PERL_MAD
9354 if (PL_madskills) {
9355 if (modstart != s)
9356 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9357 append_madprops(PL_thismad, o, 0);
9358 PL_thismad = 0;
5db06880
NC
9359 }
9360#endif
9361
79072805
LW
9362 return s;
9363}
9364
76e3520e 9365STATIC char *
cea2e8a9 9366S_scan_heredoc(pTHX_ register char *s)
79072805 9367{
97aff369 9368 dVAR;
79072805
LW
9369 SV *herewas;
9370 I32 op_type = OP_SCALAR;
9371 I32 len;
9372 SV *tmpstr;
9373 char term;
73d840c0 9374 const char *found_newline;
79072805 9375 register char *d;
fc36a67e 9376 register char *e;
4633a7c4 9377 char *peek;
60d63348
FC
9378 const int outer = (PL_rsfp || PL_parser->filtered)
9379 && !(PL_lex_inwhat == OP_SCALAR);
5db06880
NC
9380#ifdef PERL_MAD
9381 I32 stuffstart = s - SvPVX(PL_linestr);
9382 char *tstart;
9383
cd81e915 9384 PL_realtokenstart = -1;
5db06880 9385#endif
79072805 9386
7918f24d
NC
9387 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9388
79072805 9389 s += 2;
3280af22
NIS
9390 d = PL_tokenbuf;
9391 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9392 if (!outer)
79072805 9393 *d++ = '\n';
c35e046a
AL
9394 peek = s;
9395 while (SPACE_OR_TAB(*peek))
9396 peek++;
3792a11b 9397 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9398 s = peek;
79072805 9399 term = *s++;
3280af22 9400 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9401 d += len;
3280af22 9402 if (s < PL_bufend)
79072805 9403 s++;
79072805
LW
9404 }
9405 else {
9406 if (*s == '\\')
9407 s++, term = '\'';
9408 else
9409 term = '"';
7e2040f0 9410 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9411 deprecate("bare << to mean <<\"\"");
7e2040f0 9412 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9413 if (d < e)
9414 *d++ = *s;
9415 }
9416 }
3280af22 9417 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9418 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9419 *d++ = '\n';
9420 *d = '\0';
3280af22 9421 len = d - PL_tokenbuf;
5db06880
NC
9422
9423#ifdef PERL_MAD
9424 if (PL_madskills) {
9425 tstart = PL_tokenbuf + !outer;
cd81e915 9426 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9427 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9428 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9429 stuffstart = s - SvPVX(PL_linestr);
9430 }
9431#endif
6a27c188 9432#ifndef PERL_STRICT_CR
f63a84b2
LW
9433 d = strchr(s, '\r');
9434 if (d) {
b464bac0 9435 char * const olds = s;
f63a84b2 9436 s = d;
3280af22 9437 while (s < PL_bufend) {
f63a84b2
LW
9438 if (*s == '\r') {
9439 *d++ = '\n';
9440 if (*++s == '\n')
9441 s++;
9442 }
9443 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9444 *d++ = *s++;
9445 s++;
9446 }
9447 else
9448 *d++ = *s++;
9449 }
9450 *d = '\0';
3280af22 9451 PL_bufend = d;
95a20fc0 9452 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9453 s = olds;
9454 }
9455#endif
5db06880
NC
9456#ifdef PERL_MAD
9457 found_newline = 0;
9458#endif
10edeb5d 9459 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9460 herewas = newSVpvn(s,PL_bufend-s);
9461 }
9462 else {
5db06880
NC
9463#ifdef PERL_MAD
9464 herewas = newSVpvn(s-1,found_newline-s+1);
9465#else
73d840c0
AL
9466 s--;
9467 herewas = newSVpvn(s,found_newline-s);
5db06880 9468#endif
73d840c0 9469 }
5db06880
NC
9470#ifdef PERL_MAD
9471 if (PL_madskills) {
9472 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9473 if (PL_thisstuff)
9474 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9475 else
cd81e915 9476 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9477 }
9478#endif
79072805 9479 s += SvCUR(herewas);
748a9306 9480
5db06880
NC
9481#ifdef PERL_MAD
9482 stuffstart = s - SvPVX(PL_linestr);
9483
9484 if (found_newline)
9485 s--;
9486#endif
9487
7d0a29fe
NC
9488 tmpstr = newSV_type(SVt_PVIV);
9489 SvGROW(tmpstr, 80);
748a9306 9490 if (term == '\'') {
79072805 9491 op_type = OP_CONST;
45977657 9492 SvIV_set(tmpstr, -1);
748a9306
LW
9493 }
9494 else if (term == '`') {
79072805 9495 op_type = OP_BACKTICK;
45977657 9496 SvIV_set(tmpstr, '\\');
748a9306 9497 }
79072805
LW
9498
9499 CLINE;
57843af0 9500 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9501 PL_multi_open = PL_multi_close = '<';
9502 term = *PL_tokenbuf;
60d63348
FC
9503 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
9504 && !PL_parser->filtered) {
6136c704
AL
9505 char * const bufptr = PL_sublex_info.super_bufptr;
9506 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9507 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9508 s = strchr(bufptr, '\n');
9509 if (!s)
9510 s = bufend;
9511 d = s;
9512 while (s < bufend &&
9513 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9514 if (*s++ == '\n')
57843af0 9515 CopLINE_inc(PL_curcop);
0244c3a4
GS
9516 }
9517 if (s >= bufend) {
eb160463 9518 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9519 missingterm(PL_tokenbuf);
9520 }
9521 sv_setpvn(herewas,bufptr,d-bufptr+1);
9522 sv_setpvn(tmpstr,d+1,s-d);
9523 s += len - 1;
9524 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9525 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9526
9527 s = olds;
9528 goto retval;
9529 }
9530 else if (!outer) {
79072805 9531 d = s;
3280af22
NIS
9532 while (s < PL_bufend &&
9533 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9534 if (*s++ == '\n')
57843af0 9535 CopLINE_inc(PL_curcop);
79072805 9536 }
3280af22 9537 if (s >= PL_bufend) {
eb160463 9538 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9539 missingterm(PL_tokenbuf);
79072805
LW
9540 }
9541 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9542#ifdef PERL_MAD
9543 if (PL_madskills) {
cd81e915
NC
9544 if (PL_thisstuff)
9545 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9546 else
cd81e915 9547 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9548 stuffstart = s - SvPVX(PL_linestr);
9549 }
9550#endif
79072805 9551 s += len - 1;
57843af0 9552 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9553
3280af22
NIS
9554 sv_catpvn(herewas,s,PL_bufend-s);
9555 sv_setsv(PL_linestr,herewas);
9556 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9557 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9558 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9559 }
9560 else
76f68e9b 9561 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9562 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9563#ifdef PERL_MAD
9564 if (PL_madskills) {
9565 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9566 if (PL_thisstuff)
9567 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9568 else
cd81e915 9569 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9570 }
9571#endif
f0e67a1d 9572 PL_bufptr = s;
17cc9359 9573 CopLINE_inc(PL_curcop);
f0e67a1d 9574 if (!outer || !lex_next_chunk(0)) {
eb160463 9575 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9576 missingterm(PL_tokenbuf);
79072805 9577 }
17cc9359 9578 CopLINE_dec(PL_curcop);
f0e67a1d 9579 s = PL_bufptr;
5db06880
NC
9580#ifdef PERL_MAD
9581 stuffstart = s - SvPVX(PL_linestr);
9582#endif
57843af0 9583 CopLINE_inc(PL_curcop);
3280af22 9584 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9585 PL_last_lop = PL_last_uni = NULL;
6a27c188 9586#ifndef PERL_STRICT_CR
3280af22 9587 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9588 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9589 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9590 {
3280af22
NIS
9591 PL_bufend[-2] = '\n';
9592 PL_bufend--;
95a20fc0 9593 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9594 }
3280af22
NIS
9595 else if (PL_bufend[-1] == '\r')
9596 PL_bufend[-1] = '\n';
f63a84b2 9597 }
3280af22
NIS
9598 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9599 PL_bufend[-1] = '\n';
f63a84b2 9600#endif
3280af22 9601 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9602 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9603 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9604 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9605 sv_catsv(PL_linestr,herewas);
9606 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9607 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9608 }
9609 else {
3280af22
NIS
9610 s = PL_bufend;
9611 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9612 }
9613 }
79072805 9614 s++;
0244c3a4 9615retval:
57843af0 9616 PL_multi_end = CopLINE(PL_curcop);
79072805 9617 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9618 SvPV_shrink_to_cur(tmpstr);
79072805 9619 }
8990e307 9620 SvREFCNT_dec(herewas);
2f31ce75 9621 if (!IN_BYTES) {
95a20fc0 9622 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9623 SvUTF8_on(tmpstr);
9624 else if (PL_encoding)
9625 sv_recode_to_utf8(tmpstr, PL_encoding);
9626 }
3280af22 9627 PL_lex_stuff = tmpstr;
6154021b 9628 pl_yylval.ival = op_type;
79072805
LW
9629 return s;
9630}
9631
02aa26ce
NT
9632/* scan_inputsymbol
9633 takes: current position in input buffer
9634 returns: new position in input buffer
6154021b 9635 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9636
9637 This code handles:
9638
9639 <> read from ARGV
9640 <FH> read from filehandle
9641 <pkg::FH> read from package qualified filehandle
9642 <pkg'FH> read from package qualified filehandle
9643 <$fh> read from filehandle in $fh
9644 <*.h> filename glob
9645
9646*/
9647
76e3520e 9648STATIC char *
cea2e8a9 9649S_scan_inputsymbol(pTHX_ char *start)
79072805 9650{
97aff369 9651 dVAR;
02aa26ce 9652 register char *s = start; /* current position in buffer */
1b420867 9653 char *end;
79072805 9654 I32 len;
6136c704
AL
9655 char *d = PL_tokenbuf; /* start of temp holding space */
9656 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9657
7918f24d
NC
9658 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9659
1b420867
GS
9660 end = strchr(s, '\n');
9661 if (!end)
9662 end = PL_bufend;
9663 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9664
9665 /* die if we didn't have space for the contents of the <>,
1b420867 9666 or if it didn't end, or if we see a newline
02aa26ce
NT
9667 */
9668
bb7a0f54 9669 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9670 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9671 if (s >= end)
cea2e8a9 9672 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9673
fc36a67e 9674 s++;
02aa26ce
NT
9675
9676 /* check for <$fh>
9677 Remember, only scalar variables are interpreted as filehandles by
9678 this code. Anything more complex (e.g., <$fh{$num}>) will be
9679 treated as a glob() call.
9680 This code makes use of the fact that except for the $ at the front,
9681 a scalar variable and a filehandle look the same.
9682 */
4633a7c4 9683 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9684
9685 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9686 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 9687 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
9688
9689 /* If we've tried to read what we allow filehandles to look like, and
9690 there's still text left, then it must be a glob() and not a getline.
9691 Use scan_str to pull out the stuff between the <> and treat it
9692 as nothing more than a string.
9693 */
9694
3280af22 9695 if (d - PL_tokenbuf != len) {
6154021b 9696 pl_yylval.ival = OP_GLOB;
5db06880 9697 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9698 if (!s)
cea2e8a9 9699 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9700 return s;
9701 }
395c3793 9702 else {
9b3023bc 9703 bool readline_overriden = FALSE;
6136c704 9704 GV *gv_readline;
9b3023bc 9705 GV **gvp;
02aa26ce 9706 /* we're in a filehandle read situation */
3280af22 9707 d = PL_tokenbuf;
02aa26ce
NT
9708
9709 /* turn <> into <ARGV> */
79072805 9710 if (!len)
689badd5 9711 Copy("ARGV",d,5,char);
02aa26ce 9712
9b3023bc 9713 /* Check whether readline() is overriden */
fafc274c 9714 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9715 if ((gv_readline
ba979b31 9716 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9717 ||
017a3ce5 9718 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9719 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9720 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9721 readline_overriden = TRUE;
9722
02aa26ce
NT
9723 /* if <$fh>, create the ops to turn the variable into a
9724 filehandle
9725 */
79072805 9726 if (*d == '$') {
02aa26ce
NT
9727 /* try to find it in the pad for this block, otherwise find
9728 add symbol table ops
9729 */
bc9b26ca 9730 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9731 if (tmp != NOT_IN_PAD) {
00b1698f 9732 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9733 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9734 HEK * const stashname = HvNAME_HEK(stash);
9735 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9736 sv_catpvs(sym, "::");
f558d5af
JH
9737 sv_catpv(sym, d+1);
9738 d = SvPVX(sym);
9739 goto intro_sym;
9740 }
9741 else {
6136c704 9742 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9743 o->op_targ = tmp;
9b3023bc
RGS
9744 PL_lex_op = readline_overriden
9745 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9746 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9747 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9748 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9749 }
a0d0e21e
LW
9750 }
9751 else {
f558d5af
JH
9752 GV *gv;
9753 ++d;
9754intro_sym:
9755 gv = gv_fetchpv(d,
9756 (PL_in_eval
9757 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 9758 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 9759 SVt_PV);
9b3023bc
RGS
9760 PL_lex_op = readline_overriden
9761 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9762 op_append_elem(OP_LIST,
9b3023bc
RGS
9763 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9764 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9765 : (OP*)newUNOP(OP_READLINE, 0,
9766 newUNOP(OP_RV2SV, 0,
9767 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9768 }
7c6fadd6
RGS
9769 if (!readline_overriden)
9770 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9771 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9772 pl_yylval.ival = OP_NULL;
79072805 9773 }
02aa26ce
NT
9774
9775 /* If it's none of the above, it must be a literal filehandle
9776 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9777 else {
25db2ea6 9778 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
9779 PL_lex_op = readline_overriden
9780 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9781 op_append_elem(OP_LIST,
9b3023bc
RGS
9782 newGVOP(OP_GV, 0, gv),
9783 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9784 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9785 pl_yylval.ival = OP_NULL;
79072805
LW
9786 }
9787 }
02aa26ce 9788
79072805
LW
9789 return s;
9790}
9791
02aa26ce
NT
9792
9793/* scan_str
9794 takes: start position in buffer
09bef843
SB
9795 keep_quoted preserve \ on the embedded delimiter(s)
9796 keep_delims preserve the delimiters around the string
02aa26ce
NT
9797 returns: position to continue reading from buffer
9798 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9799 updates the read buffer.
9800
9801 This subroutine pulls a string out of the input. It is called for:
9802 q single quotes q(literal text)
9803 ' single quotes 'literal text'
9804 qq double quotes qq(interpolate $here please)
9805 " double quotes "interpolate $here please"
9806 qx backticks qx(/bin/ls -l)
9807 ` backticks `/bin/ls -l`
9808 qw quote words @EXPORT_OK = qw( func() $spam )
9809 m// regexp match m/this/
9810 s/// regexp substitute s/this/that/
9811 tr/// string transliterate tr/this/that/
9812 y/// string transliterate y/this/that/
9813 ($*@) sub prototypes sub foo ($)
09bef843 9814 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9815 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9816
9817 In most of these cases (all but <>, patterns and transliterate)
9818 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9819 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9820 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9821 calls scan_str().
4e553d73 9822
02aa26ce
NT
9823 It skips whitespace before the string starts, and treats the first
9824 character as the delimiter. If the delimiter is one of ([{< then
9825 the corresponding "close" character )]}> is used as the closing
9826 delimiter. It allows quoting of delimiters, and if the string has
9827 balanced delimiters ([{<>}]) it allows nesting.
9828
37fd879b
HS
9829 On success, the SV with the resulting string is put into lex_stuff or,
9830 if that is already non-NULL, into lex_repl. The second case occurs only
9831 when parsing the RHS of the special constructs s/// and tr/// (y///).
9832 For convenience, the terminating delimiter character is stuffed into
9833 SvIVX of the SV.
02aa26ce
NT
9834*/
9835
76e3520e 9836STATIC char *
09bef843 9837S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9838{
97aff369 9839 dVAR;
02aa26ce 9840 SV *sv; /* scalar value: string */
d3fcec1f 9841 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9842 register char *s = start; /* current position in the buffer */
9843 register char term; /* terminating character */
9844 register char *to; /* current position in the sv's data */
9845 I32 brackets = 1; /* bracket nesting level */
89491803 9846 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9847 I32 termcode; /* terminating char. code */
89ebb4a3 9848 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9849 STRLEN termlen; /* length of terminating string */
0331ef07 9850 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9851#ifdef PERL_MAD
9852 int stuffstart;
9853 char *tstart;
9854#endif
02aa26ce 9855
7918f24d
NC
9856 PERL_ARGS_ASSERT_SCAN_STR;
9857
02aa26ce 9858 /* skip space before the delimiter */
29595ff2
NC
9859 if (isSPACE(*s)) {
9860 s = PEEKSPACE(s);
9861 }
02aa26ce 9862
5db06880 9863#ifdef PERL_MAD
cd81e915
NC
9864 if (PL_realtokenstart >= 0) {
9865 stuffstart = PL_realtokenstart;
9866 PL_realtokenstart = -1;
5db06880
NC
9867 }
9868 else
9869 stuffstart = start - SvPVX(PL_linestr);
9870#endif
02aa26ce 9871 /* mark where we are, in case we need to report errors */
79072805 9872 CLINE;
02aa26ce
NT
9873
9874 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9875 term = *s;
220e2d4e
IH
9876 if (!UTF) {
9877 termcode = termstr[0] = term;
9878 termlen = 1;
9879 }
9880 else {
f3b9ce0f 9881 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9882 Copy(s, termstr, termlen, U8);
9883 if (!UTF8_IS_INVARIANT(term))
9884 has_utf8 = TRUE;
9885 }
b1c7b182 9886
02aa26ce 9887 /* mark where we are */
57843af0 9888 PL_multi_start = CopLINE(PL_curcop);
3280af22 9889 PL_multi_open = term;
02aa26ce
NT
9890
9891 /* find corresponding closing delimiter */
93a17b20 9892 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9893 termcode = termstr[0] = term = tmps[5];
9894
3280af22 9895 PL_multi_close = term;
79072805 9896
561b68a9
SH
9897 /* create a new SV to hold the contents. 79 is the SV's initial length.
9898 What a random number. */
7d0a29fe
NC
9899 sv = newSV_type(SVt_PVIV);
9900 SvGROW(sv, 80);
45977657 9901 SvIV_set(sv, termcode);
a0d0e21e 9902 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9903
9904 /* move past delimiter and try to read a complete string */
09bef843 9905 if (keep_delims)
220e2d4e
IH
9906 sv_catpvn(sv, s, termlen);
9907 s += termlen;
5db06880
NC
9908#ifdef PERL_MAD
9909 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9910 if (!PL_thisopen && !keep_delims) {
9911 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9912 stuffstart = s - SvPVX(PL_linestr);
9913 }
9914#endif
93a17b20 9915 for (;;) {
220e2d4e
IH
9916 if (PL_encoding && !UTF) {
9917 bool cont = TRUE;
9918
9919 while (cont) {
95a20fc0 9920 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9921 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9922 &offset, (char*)termstr, termlen);
6136c704
AL
9923 const char * const ns = SvPVX_const(PL_linestr) + offset;
9924 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9925
9926 for (; s < ns; s++) {
60d63348 9927 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
220e2d4e
IH
9928 CopLINE_inc(PL_curcop);
9929 }
9930 if (!found)
9931 goto read_more_line;
9932 else {
9933 /* handle quoted delimiters */
52327caf 9934 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9935 const char *t;
95a20fc0 9936 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9937 t--;
9938 if ((svlast-1 - t) % 2) {
9939 if (!keep_quoted) {
9940 *(svlast-1) = term;
9941 *svlast = '\0';
9942 SvCUR_set(sv, SvCUR(sv) - 1);
9943 }
9944 continue;
9945 }
9946 }
9947 if (PL_multi_open == PL_multi_close) {
9948 cont = FALSE;
9949 }
9950 else {
f54cb97a
AL
9951 const char *t;
9952 char *w;
0331ef07 9953 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9954 /* At here, all closes are "was quoted" one,
9955 so we don't check PL_multi_close. */
9956 if (*t == '\\') {
9957 if (!keep_quoted && *(t+1) == PL_multi_open)
9958 t++;
9959 else
9960 *w++ = *t++;
9961 }
9962 else if (*t == PL_multi_open)
9963 brackets++;
9964
9965 *w = *t;
9966 }
9967 if (w < t) {
9968 *w++ = term;
9969 *w = '\0';
95a20fc0 9970 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9971 }
0331ef07 9972 last_off = w - SvPVX(sv);
220e2d4e
IH
9973 if (--brackets <= 0)
9974 cont = FALSE;
9975 }
9976 }
9977 }
9978 if (!keep_delims) {
9979 SvCUR_set(sv, SvCUR(sv) - 1);
9980 *SvEND(sv) = '\0';
9981 }
9982 break;
9983 }
9984
02aa26ce 9985 /* extend sv if need be */
3280af22 9986 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9987 /* set 'to' to the next character in the sv's string */
463ee0b2 9988 to = SvPVX(sv)+SvCUR(sv);
09bef843 9989
02aa26ce 9990 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9991 if (PL_multi_open == PL_multi_close) {
9992 for (; s < PL_bufend; s++,to++) {
02aa26ce 9993 /* embedded newlines increment the current line number */
60d63348 9994 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 9995 CopLINE_inc(PL_curcop);
02aa26ce 9996 /* handle quoted delimiters */
3280af22 9997 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9998 if (!keep_quoted && s[1] == term)
a0d0e21e 9999 s++;
02aa26ce 10000 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10001 else
10002 *to++ = *s++;
10003 }
02aa26ce
NT
10004 /* terminate when run out of buffer (the for() condition), or
10005 have found the terminator */
220e2d4e
IH
10006 else if (*s == term) {
10007 if (termlen == 1)
10008 break;
f3b9ce0f 10009 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10010 break;
10011 }
63cd0674 10012 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10013 has_utf8 = TRUE;
93a17b20
LW
10014 *to = *s;
10015 }
10016 }
02aa26ce
NT
10017
10018 /* if the terminator isn't the same as the start character (e.g.,
10019 matched brackets), we have to allow more in the quoting, and
10020 be prepared for nested brackets.
10021 */
93a17b20 10022 else {
02aa26ce 10023 /* read until we run out of string, or we find the terminator */
3280af22 10024 for (; s < PL_bufend; s++,to++) {
02aa26ce 10025 /* embedded newlines increment the line count */
60d63348 10026 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 10027 CopLINE_inc(PL_curcop);
02aa26ce 10028 /* backslashes can escape the open or closing characters */
3280af22 10029 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10030 if (!keep_quoted &&
10031 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
10032 s++;
10033 else
10034 *to++ = *s++;
10035 }
02aa26ce 10036 /* allow nested opens and closes */
3280af22 10037 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10038 break;
3280af22 10039 else if (*s == PL_multi_open)
93a17b20 10040 brackets++;
63cd0674 10041 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10042 has_utf8 = TRUE;
93a17b20
LW
10043 *to = *s;
10044 }
10045 }
02aa26ce 10046 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10047 *to = '\0';
95a20fc0 10048 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10049
02aa26ce
NT
10050 /*
10051 * this next chunk reads more into the buffer if we're not done yet
10052 */
10053
b1c7b182
GS
10054 if (s < PL_bufend)
10055 break; /* handle case where we are done yet :-) */
79072805 10056
6a27c188 10057#ifndef PERL_STRICT_CR
95a20fc0 10058 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10059 if ((to[-2] == '\r' && to[-1] == '\n') ||
10060 (to[-2] == '\n' && to[-1] == '\r'))
10061 {
f63a84b2
LW
10062 to[-2] = '\n';
10063 to--;
95a20fc0 10064 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10065 }
10066 else if (to[-1] == '\r')
10067 to[-1] = '\n';
10068 }
95a20fc0 10069 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10070 to[-1] = '\n';
10071#endif
10072
220e2d4e 10073 read_more_line:
02aa26ce
NT
10074 /* if we're out of file, or a read fails, bail and reset the current
10075 line marker so we can report where the unterminated string began
10076 */
5db06880
NC
10077#ifdef PERL_MAD
10078 if (PL_madskills) {
c35e046a 10079 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10080 if (PL_thisstuff)
10081 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10082 else
cd81e915 10083 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10084 }
10085#endif
f0e67a1d
Z
10086 CopLINE_inc(PL_curcop);
10087 PL_bufptr = PL_bufend;
10088 if (!lex_next_chunk(0)) {
c07a80fd 10089 sv_free(sv);
eb160463 10090 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10091 return NULL;
79072805 10092 }
f0e67a1d 10093 s = PL_bufptr;
5db06880
NC
10094#ifdef PERL_MAD
10095 stuffstart = 0;
10096#endif
378cc40b 10097 }
4e553d73 10098
02aa26ce
NT
10099 /* at this point, we have successfully read the delimited string */
10100
220e2d4e 10101 if (!PL_encoding || UTF) {
5db06880
NC
10102#ifdef PERL_MAD
10103 if (PL_madskills) {
c35e046a 10104 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10105 const int len = s - tstart;
cd81e915 10106 if (PL_thisstuff)
c35e046a 10107 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10108 else
c35e046a 10109 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10110 if (!PL_thisclose && !keep_delims)
10111 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10112 }
10113#endif
10114
220e2d4e
IH
10115 if (keep_delims)
10116 sv_catpvn(sv, s, termlen);
10117 s += termlen;
10118 }
5db06880
NC
10119#ifdef PERL_MAD
10120 else {
10121 if (PL_madskills) {
c35e046a
AL
10122 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10123 const int len = s - tstart - termlen;
cd81e915 10124 if (PL_thisstuff)
c35e046a 10125 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10126 else
c35e046a 10127 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10128 if (!PL_thisclose && !keep_delims)
10129 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10130 }
10131 }
10132#endif
220e2d4e 10133 if (has_utf8 || PL_encoding)
b1c7b182 10134 SvUTF8_on(sv);
d0063567 10135
57843af0 10136 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10137
10138 /* if we allocated too much space, give some back */
93a17b20
LW
10139 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10140 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10141 SvPV_renew(sv, SvLEN(sv));
79072805 10142 }
02aa26ce
NT
10143
10144 /* decide whether this is the first or second quoted string we've read
10145 for this op
10146 */
4e553d73 10147
3280af22
NIS
10148 if (PL_lex_stuff)
10149 PL_lex_repl = sv;
79072805 10150 else
3280af22 10151 PL_lex_stuff = sv;
378cc40b
LW
10152 return s;
10153}
10154
02aa26ce
NT
10155/*
10156 scan_num
10157 takes: pointer to position in buffer
10158 returns: pointer to new position in buffer
6154021b 10159 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10160
10161 Read a number in any of the formats that Perl accepts:
10162
7fd134d9
JH
10163 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10164 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10165 0b[01](_?[01])*
10166 0[0-7](_?[0-7])*
10167 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10168
3280af22 10169 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10170 thing it reads.
10171
10172 If it reads a number without a decimal point or an exponent, it will
10173 try converting the number to an integer and see if it can do so
10174 without loss of precision.
10175*/
4e553d73 10176
378cc40b 10177char *
bfed75c6 10178Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10179{
97aff369 10180 dVAR;
bfed75c6 10181 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10182 register char *d; /* destination in temp buffer */
10183 register char *e; /* end of temp buffer */
86554af2 10184 NV nv; /* number read, as a double */
a0714e2c 10185 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10186 bool floatit; /* boolean: int or float? */
cbbf8932 10187 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10188 static char const number_too_long[] = "Number too long";
378cc40b 10189
7918f24d
NC
10190 PERL_ARGS_ASSERT_SCAN_NUM;
10191
02aa26ce
NT
10192 /* We use the first character to decide what type of number this is */
10193
378cc40b 10194 switch (*s) {
79072805 10195 default:
5637ef5b 10196 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10197
02aa26ce 10198 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10199 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10200 case '0':
10201 {
02aa26ce
NT
10202 /* variables:
10203 u holds the "number so far"
4f19785b
WSI
10204 shift the power of 2 of the base
10205 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10206 overflowed was the number more than we can hold?
10207
10208 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10209 we in octal/hex/binary?" indicator to disallow hex characters
10210 when in octal mode.
02aa26ce 10211 */
9e24b6e2
JH
10212 NV n = 0.0;
10213 UV u = 0;
79072805 10214 I32 shift;
9e24b6e2 10215 bool overflowed = FALSE;
61f33854 10216 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10217 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10218 static const char* const bases[5] =
10219 { "", "binary", "", "octal", "hexadecimal" };
10220 static const char* const Bases[5] =
10221 { "", "Binary", "", "Octal", "Hexadecimal" };
10222 static const char* const maxima[5] =
10223 { "",
10224 "0b11111111111111111111111111111111",
10225 "",
10226 "037777777777",
10227 "0xffffffff" };
bfed75c6 10228 const char *base, *Base, *max;
378cc40b 10229
02aa26ce 10230 /* check for hex */
a674e8db 10231 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10232 shift = 4;
10233 s += 2;
61f33854 10234 just_zero = FALSE;
a674e8db 10235 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10236 shift = 1;
10237 s += 2;
61f33854 10238 just_zero = FALSE;
378cc40b 10239 }
02aa26ce 10240 /* check for a decimal in disguise */
b78218b7 10241 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10242 goto decimal;
02aa26ce 10243 /* so it must be octal */
928753ea 10244 else {
378cc40b 10245 shift = 3;
928753ea
JH
10246 s++;
10247 }
10248
10249 if (*s == '_') {
a2a5de95 10250 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10251 "Misplaced _ in number");
10252 lastub = s++;
10253 }
9e24b6e2
JH
10254
10255 base = bases[shift];
10256 Base = Bases[shift];
10257 max = maxima[shift];
02aa26ce 10258
4f19785b 10259 /* read the rest of the number */
378cc40b 10260 for (;;) {
9e24b6e2 10261 /* x is used in the overflow test,
893fe2c2 10262 b is the digit we're adding on. */
9e24b6e2 10263 UV x, b;
55497cff 10264
378cc40b 10265 switch (*s) {
02aa26ce
NT
10266
10267 /* if we don't mention it, we're done */
378cc40b
LW
10268 default:
10269 goto out;
02aa26ce 10270
928753ea 10271 /* _ are ignored -- but warned about if consecutive */
de3bb511 10272 case '_':
a2a5de95
NC
10273 if (lastub && s == lastub + 1)
10274 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10275 "Misplaced _ in number");
928753ea 10276 lastub = s++;
de3bb511 10277 break;
02aa26ce
NT
10278
10279 /* 8 and 9 are not octal */
378cc40b 10280 case '8': case '9':
4f19785b 10281 if (shift == 3)
cea2e8a9 10282 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10283 /* FALL THROUGH */
02aa26ce
NT
10284
10285 /* octal digits */
4f19785b 10286 case '2': case '3': case '4':
378cc40b 10287 case '5': case '6': case '7':
4f19785b 10288 if (shift == 1)
cea2e8a9 10289 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10290 /* FALL THROUGH */
10291
10292 case '0': case '1':
02aa26ce 10293 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10294 goto digit;
02aa26ce
NT
10295
10296 /* hex digits */
378cc40b
LW
10297 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10298 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10299 /* make sure they said 0x */
378cc40b
LW
10300 if (shift != 4)
10301 goto out;
55497cff 10302 b = (*s++ & 7) + 9;
02aa26ce
NT
10303
10304 /* Prepare to put the digit we have onto the end
10305 of the number so far. We check for overflows.
10306 */
10307
55497cff 10308 digit:
61f33854 10309 just_zero = FALSE;
9e24b6e2
JH
10310 if (!overflowed) {
10311 x = u << shift; /* make room for the digit */
10312
10313 if ((x >> shift) != u
10314 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10315 overflowed = TRUE;
10316 n = (NV) u;
9b387841
NC
10317 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10318 "Integer overflow in %s number",
10319 base);
9e24b6e2
JH
10320 } else
10321 u = x | b; /* add the digit to the end */
10322 }
10323 if (overflowed) {
10324 n *= nvshift[shift];
10325 /* If an NV has not enough bits in its
10326 * mantissa to represent an UV this summing of
10327 * small low-order numbers is a waste of time
10328 * (because the NV cannot preserve the
10329 * low-order bits anyway): we could just
10330 * remember when did we overflow and in the
10331 * end just multiply n by the right
10332 * amount. */
10333 n += (NV) b;
55497cff 10334 }
378cc40b
LW
10335 break;
10336 }
10337 }
02aa26ce
NT
10338
10339 /* if we get here, we had success: make a scalar value from
10340 the number.
10341 */
378cc40b 10342 out:
928753ea
JH
10343
10344 /* final misplaced underbar check */
10345 if (s[-1] == '_') {
a2a5de95 10346 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10347 }
10348
9e24b6e2 10349 if (overflowed) {
a2a5de95
NC
10350 if (n > 4294967295.0)
10351 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10352 "%s number > %s non-portable",
10353 Base, max);
b081dd7e 10354 sv = newSVnv(n);
9e24b6e2
JH
10355 }
10356 else {
15041a67 10357#if UVSIZE > 4
a2a5de95
NC
10358 if (u > 0xffffffff)
10359 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10360 "%s number > %s non-portable",
10361 Base, max);
2cc4c2dc 10362#endif
b081dd7e 10363 sv = newSVuv(u);
9e24b6e2 10364 }
61f33854 10365 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10366 sv = new_constant(start, s - start, "integer",
eb0d8d16 10367 sv, NULL, NULL, 0);
61f33854 10368 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10369 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10370 }
10371 break;
02aa26ce
NT
10372
10373 /*
10374 handle decimal numbers.
10375 we're also sent here when we read a 0 as the first digit
10376 */
378cc40b
LW
10377 case '1': case '2': case '3': case '4': case '5':
10378 case '6': case '7': case '8': case '9': case '.':
10379 decimal:
3280af22
NIS
10380 d = PL_tokenbuf;
10381 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10382 floatit = FALSE;
02aa26ce
NT
10383
10384 /* read next group of digits and _ and copy into d */
de3bb511 10385 while (isDIGIT(*s) || *s == '_') {
4e553d73 10386 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10387 if -w is on
10388 */
93a17b20 10389 if (*s == '_') {
a2a5de95
NC
10390 if (lastub && s == lastub + 1)
10391 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10392 "Misplaced _ in number");
928753ea 10393 lastub = s++;
93a17b20 10394 }
fc36a67e 10395 else {
02aa26ce 10396 /* check for end of fixed-length buffer */
fc36a67e 10397 if (d >= e)
cea2e8a9 10398 Perl_croak(aTHX_ number_too_long);
02aa26ce 10399 /* if we're ok, copy the character */
378cc40b 10400 *d++ = *s++;
fc36a67e 10401 }
378cc40b 10402 }
02aa26ce
NT
10403
10404 /* final misplaced underbar check */
928753ea 10405 if (lastub && s == lastub + 1) {
a2a5de95 10406 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10407 }
02aa26ce
NT
10408
10409 /* read a decimal portion if there is one. avoid
10410 3..5 being interpreted as the number 3. followed
10411 by .5
10412 */
2f3197b3 10413 if (*s == '.' && s[1] != '.') {
79072805 10414 floatit = TRUE;
378cc40b 10415 *d++ = *s++;
02aa26ce 10416
928753ea 10417 if (*s == '_') {
a2a5de95
NC
10418 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10419 "Misplaced _ in number");
928753ea
JH
10420 lastub = s;
10421 }
10422
10423 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10424 */
fc36a67e 10425 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10426 /* fixed length buffer check */
fc36a67e 10427 if (d >= e)
cea2e8a9 10428 Perl_croak(aTHX_ number_too_long);
928753ea 10429 if (*s == '_') {
a2a5de95
NC
10430 if (lastub && s == lastub + 1)
10431 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10432 "Misplaced _ in number");
928753ea
JH
10433 lastub = s;
10434 }
10435 else
fc36a67e 10436 *d++ = *s;
378cc40b 10437 }
928753ea
JH
10438 /* fractional part ending in underbar? */
10439 if (s[-1] == '_') {
a2a5de95
NC
10440 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10441 "Misplaced _ in number");
928753ea 10442 }
dd629d5b
GS
10443 if (*s == '.' && isDIGIT(s[1])) {
10444 /* oops, it's really a v-string, but without the "v" */
f4758303 10445 s = start;
dd629d5b
GS
10446 goto vstring;
10447 }
378cc40b 10448 }
02aa26ce
NT
10449
10450 /* read exponent part, if present */
3792a11b 10451 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10452 floatit = TRUE;
10453 s++;
02aa26ce
NT
10454
10455 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10456 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10457
7fd134d9
JH
10458 /* stray preinitial _ */
10459 if (*s == '_') {
a2a5de95
NC
10460 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10461 "Misplaced _ in number");
7fd134d9
JH
10462 lastub = s++;
10463 }
10464
02aa26ce 10465 /* allow positive or negative exponent */
378cc40b
LW
10466 if (*s == '+' || *s == '-')
10467 *d++ = *s++;
02aa26ce 10468
7fd134d9
JH
10469 /* stray initial _ */
10470 if (*s == '_') {
a2a5de95
NC
10471 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10472 "Misplaced _ in number");
7fd134d9
JH
10473 lastub = s++;
10474 }
10475
7fd134d9
JH
10476 /* read digits of exponent */
10477 while (isDIGIT(*s) || *s == '_') {
10478 if (isDIGIT(*s)) {
10479 if (d >= e)
10480 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10481 *d++ = *s++;
7fd134d9
JH
10482 }
10483 else {
041457d9 10484 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10485 (!isDIGIT(s[1]) && s[1] != '_')))
10486 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10487 "Misplaced _ in number");
b3b48e3e 10488 lastub = s++;
7fd134d9 10489 }
7fd134d9 10490 }
378cc40b 10491 }
02aa26ce 10492
02aa26ce 10493
0b7fceb9 10494 /*
58bb9ec3
NC
10495 We try to do an integer conversion first if no characters
10496 indicating "float" have been found.
0b7fceb9
MU
10497 */
10498
10499 if (!floatit) {
58bb9ec3 10500 UV uv;
6136c704 10501 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10502
10503 if (flags == IS_NUMBER_IN_UV) {
10504 if (uv <= IV_MAX)
b081dd7e 10505 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10506 else
b081dd7e 10507 sv = newSVuv(uv);
58bb9ec3
NC
10508 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10509 if (uv <= (UV) IV_MIN)
b081dd7e 10510 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10511 else
10512 floatit = TRUE;
10513 } else
10514 floatit = TRUE;
10515 }
0b7fceb9 10516 if (floatit) {
58bb9ec3
NC
10517 /* terminate the string */
10518 *d = '\0';
86554af2 10519 nv = Atof(PL_tokenbuf);
b081dd7e 10520 sv = newSVnv(nv);
86554af2 10521 }
86554af2 10522
eb0d8d16
NC
10523 if ( floatit
10524 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10525 const char *const key = floatit ? "float" : "integer";
10526 const STRLEN keylen = floatit ? 5 : 7;
10527 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10528 key, keylen, sv, NULL, NULL, 0);
10529 }
378cc40b 10530 break;
0b7fceb9 10531
e312add1 10532 /* if it starts with a v, it could be a v-string */
a7cb1f99 10533 case 'v':
dd629d5b 10534vstring:
561b68a9 10535 sv = newSV(5); /* preallocate storage space */
65b06e02 10536 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10537 break;
79072805 10538 }
a687059c 10539
02aa26ce
NT
10540 /* make the op for the constant and return */
10541
a86a20aa 10542 if (sv)
b73d6f50 10543 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10544 else
5f66b61c 10545 lvalp->opval = NULL;
a687059c 10546
73d840c0 10547 return (char *)s;
378cc40b
LW
10548}
10549
76e3520e 10550STATIC char *
cea2e8a9 10551S_scan_formline(pTHX_ register char *s)
378cc40b 10552{
97aff369 10553 dVAR;
79072805 10554 register char *eol;
378cc40b 10555 register char *t;
6136c704 10556 SV * const stuff = newSVpvs("");
79072805 10557 bool needargs = FALSE;
c5ee2135 10558 bool eofmt = FALSE;
5db06880
NC
10559#ifdef PERL_MAD
10560 char *tokenstart = s;
4f61fd4b
JC
10561 SV* savewhite = NULL;
10562
5db06880 10563 if (PL_madskills) {
cd81e915
NC
10564 savewhite = PL_thiswhite;
10565 PL_thiswhite = 0;
5db06880
NC
10566 }
10567#endif
378cc40b 10568
7918f24d
NC
10569 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10570
79072805 10571 while (!needargs) {
a1b95068 10572 if (*s == '.') {
c35e046a 10573 t = s+1;
51882d45 10574#ifdef PERL_STRICT_CR
c35e046a
AL
10575 while (SPACE_OR_TAB(*t))
10576 t++;
51882d45 10577#else
c35e046a
AL
10578 while (SPACE_OR_TAB(*t) || *t == '\r')
10579 t++;
51882d45 10580#endif
c5ee2135
WL
10581 if (*t == '\n' || t == PL_bufend) {
10582 eofmt = TRUE;
79072805 10583 break;
c5ee2135 10584 }
79072805 10585 }
60d63348 10586 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
07409e01 10587 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10588 if (!eol++)
3280af22 10589 eol = PL_bufend;
0f85fab0
LW
10590 }
10591 else
3280af22 10592 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10593 if (*s != '#') {
a0d0e21e
LW
10594 for (t = s; t < eol; t++) {
10595 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10596 needargs = FALSE;
10597 goto enough; /* ~~ must be first line in formline */
378cc40b 10598 }
a0d0e21e
LW
10599 if (*t == '@' || *t == '^')
10600 needargs = TRUE;
378cc40b 10601 }
7121b347
MG
10602 if (eol > s) {
10603 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10604#ifndef PERL_STRICT_CR
7121b347
MG
10605 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10606 char *end = SvPVX(stuff) + SvCUR(stuff);
10607 end[-2] = '\n';
10608 end[-1] = '\0';
b162af07 10609 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10610 }
2dc4c65b 10611#endif
7121b347
MG
10612 }
10613 else
10614 break;
79072805 10615 }
95a20fc0 10616 s = (char*)eol;
60d63348 10617 if (PL_rsfp || PL_parser->filtered) {
f0e67a1d 10618 bool got_some;
5db06880
NC
10619#ifdef PERL_MAD
10620 if (PL_madskills) {
cd81e915
NC
10621 if (PL_thistoken)
10622 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10623 else
cd81e915 10624 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10625 }
10626#endif
f0e67a1d
Z
10627 PL_bufptr = PL_bufend;
10628 CopLINE_inc(PL_curcop);
10629 got_some = lex_next_chunk(0);
10630 CopLINE_dec(PL_curcop);
10631 s = PL_bufptr;
5db06880 10632#ifdef PERL_MAD
f0e67a1d 10633 tokenstart = PL_bufptr;
5db06880 10634#endif
f0e67a1d 10635 if (!got_some)
378cc40b 10636 break;
378cc40b 10637 }
463ee0b2 10638 incline(s);
79072805 10639 }
a0d0e21e
LW
10640 enough:
10641 if (SvCUR(stuff)) {
3280af22 10642 PL_expect = XTERM;
79072805 10643 if (needargs) {
3280af22 10644 PL_lex_state = LEX_NORMAL;
cd81e915 10645 start_force(PL_curforce);
9ded7720 10646 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10647 force_next(',');
10648 }
a0d0e21e 10649 else
3280af22 10650 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10651 if (!IN_BYTES) {
95a20fc0 10652 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10653 SvUTF8_on(stuff);
10654 else if (PL_encoding)
10655 sv_recode_to_utf8(stuff, PL_encoding);
10656 }
cd81e915 10657 start_force(PL_curforce);
9ded7720 10658 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10659 force_next(THING);
cd81e915 10660 start_force(PL_curforce);
9ded7720 10661 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10662 force_next(LSTOP);
378cc40b 10663 }
79072805 10664 else {
8990e307 10665 SvREFCNT_dec(stuff);
c5ee2135
WL
10666 if (eofmt)
10667 PL_lex_formbrack = 0;
3280af22 10668 PL_bufptr = s;
79072805 10669 }
5db06880
NC
10670#ifdef PERL_MAD
10671 if (PL_madskills) {
cd81e915
NC
10672 if (PL_thistoken)
10673 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10674 else
cd81e915
NC
10675 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10676 PL_thiswhite = savewhite;
5db06880
NC
10677 }
10678#endif
79072805 10679 return s;
378cc40b 10680}
a687059c 10681
ba6d6ac9 10682I32
864dbfa3 10683Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10684{
97aff369 10685 dVAR;
a3b680e6 10686 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10687 CV* const outsidecv = PL_compcv;
8990e307 10688
3280af22
NIS
10689 if (PL_compcv) {
10690 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10691 }
7766f137 10692 SAVEI32(PL_subline);
3280af22 10693 save_item(PL_subname);
3280af22 10694 SAVESPTR(PL_compcv);
3280af22 10695
ea726b52 10696 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10697 CvFLAGS(PL_compcv) |= flags;
10698
57843af0 10699 PL_subline = CopLINE(PL_curcop);
dd2155a4 10700 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10701 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10702 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10703
8990e307
LW
10704 return oldsavestack_ix;
10705}
10706
084592ab
CN
10707#ifdef __SC__
10708#pragma segment Perl_yylex
10709#endif
af41e527
NC
10710static int
10711S_yywarn(pTHX_ const char *const s)
8990e307 10712{
97aff369 10713 dVAR;
7918f24d
NC
10714
10715 PERL_ARGS_ASSERT_YYWARN;
10716
faef0170 10717 PL_in_eval |= EVAL_WARNONLY;
748a9306 10718 yyerror(s);
faef0170 10719 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10720 return 0;
8990e307
LW
10721}
10722
10723int
15f169a1 10724Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10725{
97aff369 10726 dVAR;
bfed75c6
AL
10727 const char *where = NULL;
10728 const char *context = NULL;
68dc0745 10729 int contlen = -1;
46fc3d4c 10730 SV *msg;
5912531f 10731 int yychar = PL_parser->yychar;
463ee0b2 10732
7918f24d
NC
10733 PERL_ARGS_ASSERT_YYERROR;
10734
3280af22 10735 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10736 where = "at EOF";
8bcfe651
TM
10737 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10738 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10739 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10740 /*
10741 Only for NetWare:
10742 The code below is removed for NetWare because it abends/crashes on NetWare
10743 when the script has error such as not having the closing quotes like:
10744 if ($var eq "value)
10745 Checking of white spaces is anyway done in NetWare code.
10746 */
10747#ifndef NETWARE
3280af22
NIS
10748 while (isSPACE(*PL_oldoldbufptr))
10749 PL_oldoldbufptr++;
f355267c 10750#endif
3280af22
NIS
10751 context = PL_oldoldbufptr;
10752 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10753 }
8bcfe651
TM
10754 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10755 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10756 /*
10757 Only for NetWare:
10758 The code below is removed for NetWare because it abends/crashes on NetWare
10759 when the script has error such as not having the closing quotes like:
10760 if ($var eq "value)
10761 Checking of white spaces is anyway done in NetWare code.
10762 */
10763#ifndef NETWARE
3280af22
NIS
10764 while (isSPACE(*PL_oldbufptr))
10765 PL_oldbufptr++;
f355267c 10766#endif
3280af22
NIS
10767 context = PL_oldbufptr;
10768 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10769 }
10770 else if (yychar > 255)
68dc0745 10771 where = "next token ???";
12fbd33b 10772 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10773 if (PL_lex_state == LEX_NORMAL ||
10774 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10775 where = "at end of line";
3280af22 10776 else if (PL_lex_inpat)
68dc0745 10777 where = "within pattern";
463ee0b2 10778 else
68dc0745 10779 where = "within string";
463ee0b2 10780 }
46fc3d4c 10781 else {
84bafc02 10782 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10783 if (yychar < 32)
cea2e8a9 10784 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10785 else if (isPRINT_LC(yychar)) {
88c9ea1e 10786 const char string = yychar;
5e7aa789
NC
10787 sv_catpvn(where_sv, &string, 1);
10788 }
463ee0b2 10789 else
cea2e8a9 10790 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10791 where = SvPVX_const(where_sv);
463ee0b2 10792 }
46fc3d4c 10793 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10794 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10795 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10796 if (context)
cea2e8a9 10797 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10798 else
cea2e8a9 10799 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10800 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10801 Perl_sv_catpvf(aTHX_ msg,
57def98f 10802 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10803 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10804 PL_multi_end = 0;
a0d0e21e 10805 }
500960a6 10806 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10807 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10808 }
463ee0b2 10809 else
5a844595 10810 qerror(msg);
c7d6bfb2
GS
10811 if (PL_error_count >= 10) {
10812 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10813 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10814 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10815 else
10816 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10817 OutCopFILE(PL_curcop));
c7d6bfb2 10818 }
3280af22 10819 PL_in_my = 0;
5c284bb0 10820 PL_in_my_stash = NULL;
463ee0b2
LW
10821 return 0;
10822}
084592ab
CN
10823#ifdef __SC__
10824#pragma segment Main
10825#endif
4e35701f 10826
b250498f 10827STATIC char*
3ae08724 10828S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10829{
97aff369 10830 dVAR;
f54cb97a 10831 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10832
10833 PERL_ARGS_ASSERT_SWALLOW_BOM;
10834
7aa207d6 10835 switch (s[0]) {
4e553d73
NIS
10836 case 0xFF:
10837 if (s[1] == 0xFE) {
ee6ba15d 10838 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10839 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 10840 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10841 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10842#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10843 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10844 s += 2;
dea0fc0b 10845 if (PL_bufend > (char*)s) {
81a923f4 10846 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10847 }
b250498f 10848#else
dcbac5bb 10849 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10850 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10851#endif
01ec43d0
GS
10852 }
10853 break;
78ae23f5 10854 case 0xFE:
7aa207d6 10855 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10856#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10857 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10858 s += 2;
10859 if (PL_bufend > (char *)s) {
81a923f4 10860 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10861 }
b250498f 10862#else
dcbac5bb 10863 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10864 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10865#endif
01ec43d0
GS
10866 }
10867 break;
3ae08724
GS
10868 case 0xEF:
10869 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10870 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10871 s += 3; /* UTF-8 */
10872 }
10873 break;
10874 case 0:
7aa207d6
JH
10875 if (slen > 3) {
10876 if (s[1] == 0) {
10877 if (s[2] == 0xFE && s[3] == 0xFF) {
10878 /* UTF-32 big-endian */
dcbac5bb 10879 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10880 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10881 }
10882 }
10883 else if (s[2] == 0 && s[3] != 0) {
10884 /* Leading bytes
10885 * 00 xx 00 xx
10886 * are a good indicator of UTF-16BE. */
ee6ba15d 10887#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10888 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10889 s = add_utf16_textfilter(s, FALSE);
10890#else
dcbac5bb 10891 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
10892 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10893#endif
7aa207d6 10894 }
01ec43d0 10895 }
e294cc5d
JH
10896#ifdef EBCDIC
10897 case 0xDD:
10898 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10899 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10900 s += 4; /* UTF-8 */
10901 }
10902 break;
10903#endif
10904
7aa207d6
JH
10905 default:
10906 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10907 /* Leading bytes
10908 * xx 00 xx 00
10909 * are a good indicator of UTF-16LE. */
ee6ba15d 10910#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10911 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10912 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 10913#else
dcbac5bb 10914 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
10915 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10916#endif
7aa207d6 10917 }
01ec43d0 10918 }
b8f84bb2 10919 return (char*)s;
b250498f 10920}
4755096e 10921
6e3aabd6
GS
10922
10923#ifndef PERL_NO_UTF16_FILTER
10924static I32
a28af015 10925S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10926{
97aff369 10927 dVAR;
f3040f2c 10928 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10929 /* We re-use this each time round, throwing the contents away before we
10930 return. */
2a773401 10931 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10932 SV *const utf8_buffer = filter;
c28d6105 10933 IV status = IoPAGE(filter);
f2338a2e 10934 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10935 I32 retval;
c8b0cbae 10936
c85ae797
NC
10937 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10938
c8b0cbae
NC
10939 /* As we're automatically added, at the lowest level, and hence only called
10940 from this file, we can be sure that we're not called in block mode. Hence
10941 don't bother writing code to deal with block mode. */
10942 if (maxlen) {
10943 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10944 }
c28d6105
NC
10945 if (status < 0) {
10946 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10947 }
1de9afcd 10948 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10949 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10950 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10951 reverse ? 'l' : 'b', idx, maxlen, status,
10952 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10953
10954 while (1) {
10955 STRLEN chars;
10956 STRLEN have;
dea0fc0b 10957 I32 newlen;
2a773401 10958 U8 *end;
c28d6105
NC
10959 /* First, look in our buffer of existing UTF-8 data: */
10960 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10961
10962 if (nl) {
10963 ++nl;
10964 } else if (status == 0) {
10965 /* EOF */
10966 IoPAGE(filter) = 0;
10967 nl = SvEND(utf8_buffer);
10968 }
10969 if (nl) {
d2d1d4de
NC
10970 STRLEN got = nl - SvPVX(utf8_buffer);
10971 /* Did we have anything to append? */
10972 retval = got != 0;
10973 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10974 /* Everything else in this code works just fine if SVp_POK isn't
10975 set. This, however, needs it, and we need it to work, else
10976 we loop infinitely because the buffer is never consumed. */
10977 sv_chop(utf8_buffer, nl);
10978 break;
10979 }
ba77e4cc 10980
c28d6105
NC
10981 /* OK, not a complete line there, so need to read some more UTF-16.
10982 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10983 while (1) {
10984 if (status <= 0)
10985 break;
10986 if (SvCUR(utf16_buffer) >= 2) {
10987 /* Location of the high octet of the last complete code point.
10988 Gosh, UTF-16 is a pain. All the benefits of variable length,
10989 *coupled* with all the benefits of partial reads and
10990 endianness. */
10991 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10992 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10993
10994 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10995 break;
10996 }
10997
10998 /* We have the first half of a surrogate. Read more. */
10999 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11000 }
c28d6105 11001
c28d6105
NC
11002 status = FILTER_READ(idx + 1, utf16_buffer,
11003 160 + (SvCUR(utf16_buffer) & 1));
11004 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11005 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11006 if (status < 0) {
11007 /* Error */
11008 IoPAGE(filter) = status;
11009 return status;
11010 }
11011 }
11012
11013 chars = SvCUR(utf16_buffer) >> 1;
11014 have = SvCUR(utf8_buffer);
11015 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11016
aa6dbd60 11017 if (reverse) {
c28d6105
NC
11018 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11019 (U8*)SvPVX_const(utf8_buffer) + have,
11020 chars * 2, &newlen);
aa6dbd60 11021 } else {
2a773401 11022 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11023 (U8*)SvPVX_const(utf8_buffer) + have,
11024 chars * 2, &newlen);
2a773401 11025 }
c28d6105 11026 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11027 *end = '\0';
c28d6105 11028
e07286ed
NC
11029 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11030 it's private to us, and utf16_to_utf8{,reversed} take a
11031 (pointer,length) pair, rather than a NUL-terminated string. */
11032 if(SvCUR(utf16_buffer) & 1) {
11033 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11034 SvCUR_set(utf16_buffer, 1);
11035 } else {
11036 SvCUR_set(utf16_buffer, 0);
11037 }
2a773401 11038 }
c28d6105
NC
11039 DEBUG_P(PerlIO_printf(Perl_debug_log,
11040 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11041 status,
11042 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11043 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11044 return retval;
6e3aabd6 11045}
81a923f4
NC
11046
11047static U8 *
11048S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11049{
2a773401 11050 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11051
c85ae797
NC
11052 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11053
c28d6105 11054 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11055 sv_setpvs(filter, "");
2a773401 11056 IoLINES(filter) = reversed;
c28d6105
NC
11057 IoPAGE(filter) = 1; /* Not EOF */
11058
11059 /* Sadly, we have to return a valid pointer, come what may, so we have to
11060 ignore any error return from this. */
11061 SvCUR_set(PL_linestr, 0);
11062 if (FILTER_READ(0, PL_linestr, 0)) {
11063 SvUTF8_on(PL_linestr);
81a923f4 11064 } else {
c28d6105 11065 SvUTF8_on(PL_linestr);
81a923f4 11066 }
c28d6105 11067 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11068 return (U8*)SvPVX(PL_linestr);
11069}
6e3aabd6 11070#endif
9f4817db 11071
f333445c
JP
11072/*
11073Returns a pointer to the next character after the parsed
11074vstring, as well as updating the passed in sv.
11075
11076Function must be called like
11077
561b68a9 11078 sv = newSV(5);
65b06e02 11079 s = scan_vstring(s,e,sv);
f333445c 11080
65b06e02 11081where s and e are the start and end of the string.
f333445c
JP
11082The sv should already be large enough to store the vstring
11083passed in, for performance reasons.
11084
11085*/
11086
11087char *
15f169a1 11088Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11089{
97aff369 11090 dVAR;
bfed75c6
AL
11091 const char *pos = s;
11092 const char *start = s;
7918f24d
NC
11093
11094 PERL_ARGS_ASSERT_SCAN_VSTRING;
11095
f333445c 11096 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11097 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11098 pos++;
f333445c
JP
11099 if ( *pos != '.') {
11100 /* this may not be a v-string if followed by => */
bfed75c6 11101 const char *next = pos;
65b06e02 11102 while (next < e && isSPACE(*next))
8fc7bb1c 11103 ++next;
65b06e02 11104 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11105 /* return string not v-string */
11106 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11107 return (char *)pos;
f333445c
JP
11108 }
11109 }
11110
11111 if (!isALPHA(*pos)) {
89ebb4a3 11112 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11113
d4c19fe8
AL
11114 if (*s == 'v')
11115 s++; /* get past 'v' */
f333445c 11116
76f68e9b 11117 sv_setpvs(sv, "");
f333445c
JP
11118
11119 for (;;) {
d4c19fe8 11120 /* this is atoi() that tolerates underscores */
0bd48802
AL
11121 U8 *tmpend;
11122 UV rev = 0;
d4c19fe8
AL
11123 const char *end = pos;
11124 UV mult = 1;
11125 while (--end >= s) {
11126 if (*end != '_') {
11127 const UV orev = rev;
f333445c
JP
11128 rev += (*end - '0') * mult;
11129 mult *= 10;
9b387841 11130 if (orev > rev)
dcbac5bb 11131 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11132 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11133 "Integer overflow in decimal number");
f333445c
JP
11134 }
11135 }
11136#ifdef EBCDIC
11137 if (rev > 0x7FFFFFFF)
11138 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11139#endif
11140 /* Append native character for the rev point */
11141 tmpend = uvchr_to_utf8(tmpbuf, rev);
11142 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11143 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11144 SvUTF8_on(sv);
65b06e02 11145 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11146 s = ++pos;
11147 else {
11148 s = pos;
11149 break;
11150 }
65b06e02 11151 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11152 pos++;
11153 }
11154 SvPOK_on(sv);
11155 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11156 SvRMAGICAL_on(sv);
11157 }
73d840c0 11158 return (char *)s;
f333445c
JP
11159}
11160
88e1f1a2
JV
11161int
11162Perl_keyword_plugin_standard(pTHX_
11163 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11164{
11165 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11166 PERL_UNUSED_CONTEXT;
11167 PERL_UNUSED_ARG(keyword_ptr);
11168 PERL_UNUSED_ARG(keyword_len);
11169 PERL_UNUSED_ARG(op_ptr);
11170 return KEYWORD_PLUGIN_DECLINE;
11171}
11172
78cdf107 11173#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11174static void
78cdf107 11175S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11176{
11177 SAVEI32(PL_lex_brackets);
11178 if (PL_lex_brackets > 100)
11179 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11180 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11181 SAVEI32(PL_lex_allbrackets);
11182 PL_lex_allbrackets = 0;
11183 SAVEI8(PL_lex_fakeeof);
2dcac756 11184 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11185 if(yyparse(gramtype) && !PL_parser->error_count)
11186 qerror(Perl_mess(aTHX_ "Parse error"));
11187}
11188
78cdf107 11189#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11190static OP *
78cdf107 11191S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11192{
11193 OP *o;
11194 ENTER;
11195 SAVEVPTR(PL_eval_root);
11196 PL_eval_root = NULL;
78cdf107 11197 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11198 o = PL_eval_root;
11199 LEAVE;
11200 return o;
11201}
11202
78cdf107
Z
11203#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11204static OP *
11205S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11206{
11207 OP *exprop;
11208 if (flags & ~PARSE_OPTIONAL)
11209 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11210 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11211 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11212 if (!PL_parser->error_count)
11213 qerror(Perl_mess(aTHX_ "Parse error"));
11214 exprop = newOP(OP_NULL, 0);
11215 }
11216 return exprop;
11217}
11218
11219/*
11220=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11221
11222Parse a Perl arithmetic expression. This may contain operators of precedence
11223down to the bit shift operators. The expression must be followed (and thus
11224terminated) either by a comparison or lower-precedence operator or by
11225something that would normally terminate an expression such as semicolon.
11226If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11227otherwise it is mandatory. It is up to the caller to ensure that the
11228dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11229the source of the code to be parsed and the lexical context for the
11230expression.
11231
11232The op tree representing the expression is returned. If an optional
11233expression is absent, a null pointer is returned, otherwise the pointer
11234will be non-null.
11235
11236If an error occurs in parsing or compilation, in most cases a valid op
11237tree is returned anyway. The error is reflected in the parser state,
11238normally resulting in a single exception at the top level of parsing
11239which covers all the compilation errors that occurred. Some compilation
11240errors, however, will throw an exception immediately.
11241
11242=cut
11243*/
11244
11245OP *
11246Perl_parse_arithexpr(pTHX_ U32 flags)
11247{
11248 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11249}
11250
11251/*
11252=for apidoc Amx|OP *|parse_termexpr|U32 flags
11253
11254Parse a Perl term expression. This may contain operators of precedence
11255down to the assignment operators. The expression must be followed (and thus
11256terminated) either by a comma or lower-precedence operator or by
11257something that would normally terminate an expression such as semicolon.
11258If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11259otherwise it is mandatory. It is up to the caller to ensure that the
11260dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11261the source of the code to be parsed and the lexical context for the
11262expression.
11263
11264The op tree representing the expression is returned. If an optional
11265expression is absent, a null pointer is returned, otherwise the pointer
11266will be non-null.
11267
11268If an error occurs in parsing or compilation, in most cases a valid op
11269tree is returned anyway. The error is reflected in the parser state,
11270normally resulting in a single exception at the top level of parsing
11271which covers all the compilation errors that occurred. Some compilation
11272errors, however, will throw an exception immediately.
11273
11274=cut
11275*/
11276
11277OP *
11278Perl_parse_termexpr(pTHX_ U32 flags)
11279{
11280 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11281}
11282
11283/*
11284=for apidoc Amx|OP *|parse_listexpr|U32 flags
11285
11286Parse a Perl list expression. This may contain operators of precedence
11287down to the comma operator. The expression must be followed (and thus
11288terminated) either by a low-precedence logic operator such as C<or> or by
11289something that would normally terminate an expression such as semicolon.
11290If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11291otherwise it is mandatory. It is up to the caller to ensure that the
11292dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11293the source of the code to be parsed and the lexical context for the
11294expression.
11295
11296The op tree representing the expression is returned. If an optional
11297expression is absent, a null pointer is returned, otherwise the pointer
11298will be non-null.
11299
11300If an error occurs in parsing or compilation, in most cases a valid op
11301tree is returned anyway. The error is reflected in the parser state,
11302normally resulting in a single exception at the top level of parsing
11303which covers all the compilation errors that occurred. Some compilation
11304errors, however, will throw an exception immediately.
11305
11306=cut
11307*/
11308
11309OP *
11310Perl_parse_listexpr(pTHX_ U32 flags)
11311{
11312 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11313}
11314
11315/*
11316=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11317
11318Parse a single complete Perl expression. This allows the full
11319expression grammar, including the lowest-precedence operators such
11320as C<or>. The expression must be followed (and thus terminated) by a
11321token that an expression would normally be terminated by: end-of-file,
11322closing bracketing punctuation, semicolon, or one of the keywords that
11323signals a postfix expression-statement modifier. If I<flags> includes
11324C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11325mandatory. It is up to the caller to ensure that the dynamic parser
11326state (L</PL_parser> et al) is correctly set to reflect the source of
11327the code to be parsed and the lexical context for the expression.
11328
11329The op tree representing the expression is returned. If an optional
11330expression is absent, a null pointer is returned, otherwise the pointer
11331will be non-null.
11332
11333If an error occurs in parsing or compilation, in most cases a valid op
11334tree is returned anyway. The error is reflected in the parser state,
11335normally resulting in a single exception at the top level of parsing
11336which covers all the compilation errors that occurred. Some compilation
11337errors, however, will throw an exception immediately.
11338
11339=cut
11340*/
11341
11342OP *
11343Perl_parse_fullexpr(pTHX_ U32 flags)
11344{
11345 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11346}
11347
e53d8f76
Z
11348/*
11349=for apidoc Amx|OP *|parse_block|U32 flags
11350
11351Parse a single complete Perl code block. This consists of an opening
11352brace, a sequence of statements, and a closing brace. The block
11353constitutes a lexical scope, so C<my> variables and various compile-time
11354effects can be contained within it. It is up to the caller to ensure
11355that the dynamic parser state (L</PL_parser> et al) is correctly set to
11356reflect the source of the code to be parsed and the lexical context for
11357the statement.
11358
11359The op tree representing the code block is returned. This is always a
11360real op, never a null pointer. It will normally be a C<lineseq> list,
11361including C<nextstate> or equivalent ops. No ops to construct any kind
11362of runtime scope are included by virtue of it being a block.
11363
11364If an error occurs in parsing or compilation, in most cases a valid op
11365tree (most likely null) is returned anyway. The error is reflected in
11366the parser state, normally resulting in a single exception at the top
11367level of parsing which covers all the compilation errors that occurred.
11368Some compilation errors, however, will throw an exception immediately.
11369
11370The I<flags> parameter is reserved for future use, and must always
11371be zero.
11372
11373=cut
11374*/
11375
11376OP *
11377Perl_parse_block(pTHX_ U32 flags)
11378{
11379 if (flags)
11380 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11381 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11382}
11383
1da4ca5f 11384/*
8359b381
Z
11385=for apidoc Amx|OP *|parse_barestmt|U32 flags
11386
11387Parse a single unadorned Perl statement. This may be a normal imperative
11388statement or a declaration that has compile-time effect. It does not
11389include any label or other affixture. It is up to the caller to ensure
11390that the dynamic parser state (L</PL_parser> et al) is correctly set to
11391reflect the source of the code to be parsed and the lexical context for
11392the statement.
11393
11394The op tree representing the statement is returned. This may be a
11395null pointer if the statement is null, for example if it was actually
11396a subroutine definition (which has compile-time side effects). If not
11397null, it will be ops directly implementing the statement, suitable to
11398pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11399equivalent op (except for those embedded in a scope contained entirely
11400within the statement).
11401
11402If an error occurs in parsing or compilation, in most cases a valid op
11403tree (most likely null) is returned anyway. The error is reflected in
11404the parser state, normally resulting in a single exception at the top
11405level of parsing which covers all the compilation errors that occurred.
11406Some compilation errors, however, will throw an exception immediately.
11407
11408The I<flags> parameter is reserved for future use, and must always
11409be zero.
11410
11411=cut
11412*/
11413
11414OP *
11415Perl_parse_barestmt(pTHX_ U32 flags)
11416{
11417 if (flags)
11418 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11419 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11420}
11421
11422/*
361d9b55
Z
11423=for apidoc Amx|SV *|parse_label|U32 flags
11424
11425Parse a single label, possibly optional, of the type that may prefix a
11426Perl statement. It is up to the caller to ensure that the dynamic parser
11427state (L</PL_parser> et al) is correctly set to reflect the source of
11428the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11429label is optional, otherwise it is mandatory.
11430
11431The name of the label is returned in the form of a fresh scalar. If an
11432optional label is absent, a null pointer is returned.
11433
11434If an error occurs in parsing, which can only occur if the label is
11435mandatory, a valid label is returned anyway. The error is reflected in
11436the parser state, normally resulting in a single exception at the top
11437level of parsing which covers all the compilation errors that occurred.
11438
11439=cut
11440*/
11441
11442SV *
11443Perl_parse_label(pTHX_ U32 flags)
11444{
11445 if (flags & ~PARSE_OPTIONAL)
11446 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11447 if (PL_lex_state == LEX_KNOWNEXT) {
11448 PL_parser->yychar = yylex();
11449 if (PL_parser->yychar == LABEL) {
11450 char *lpv = pl_yylval.pval;
11451 STRLEN llen = strlen(lpv);
11452 SV *lsv;
11453 PL_parser->yychar = YYEMPTY;
11454 lsv = newSV_type(SVt_PV);
11455 SvPV_set(lsv, lpv);
11456 SvCUR_set(lsv, llen);
11457 SvLEN_set(lsv, llen+1);
11458 SvPOK_on(lsv);
11459 return lsv;
11460 } else {
11461 yyunlex();
11462 goto no_label;
11463 }
11464 } else {
11465 char *s, *t;
11466 U8 c;
11467 STRLEN wlen, bufptr_pos;
11468 lex_read_space(0);
11469 t = s = PL_bufptr;
11470 c = (U8)*s;
11471 if (!isIDFIRST_A(c))
11472 goto no_label;
11473 do {
11474 c = (U8)*++t;
11475 } while(isWORDCHAR_A(c));
11476 wlen = t - s;
11477 if (word_takes_any_delimeter(s, wlen))
11478 goto no_label;
11479 bufptr_pos = s - SvPVX(PL_linestr);
11480 PL_bufptr = t;
11481 lex_read_space(LEX_KEEP_PREVIOUS);
11482 t = PL_bufptr;
11483 s = SvPVX(PL_linestr) + bufptr_pos;
11484 if (t[0] == ':' && t[1] != ':') {
11485 PL_oldoldbufptr = PL_oldbufptr;
11486 PL_oldbufptr = s;
11487 PL_bufptr = t+1;
11488 return newSVpvn(s, wlen);
11489 } else {
11490 PL_bufptr = s;
11491 no_label:
11492 if (flags & PARSE_OPTIONAL) {
11493 return NULL;
11494 } else {
11495 qerror(Perl_mess(aTHX_ "Parse error"));
11496 return newSVpvs("x");
11497 }
11498 }
11499 }
11500}
11501
11502/*
28ac2b49
Z
11503=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11504
11505Parse a single complete Perl statement. This may be a normal imperative
8359b381 11506statement or a declaration that has compile-time effect, and may include
8e720305 11507optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11508parser state (L</PL_parser> et al) is correctly set to reflect the source
11509of the code to be parsed and the lexical context for the statement.
11510
11511The op tree representing the statement is returned. This may be a
11512null pointer if the statement is null, for example if it was actually
11513a subroutine definition (which has compile-time side effects). If not
11514null, it will be the result of a L</newSTATEOP> call, normally including
11515a C<nextstate> or equivalent op.
11516
11517If an error occurs in parsing or compilation, in most cases a valid op
11518tree (most likely null) is returned anyway. The error is reflected in
11519the parser state, normally resulting in a single exception at the top
11520level of parsing which covers all the compilation errors that occurred.
11521Some compilation errors, however, will throw an exception immediately.
11522
11523The I<flags> parameter is reserved for future use, and must always
11524be zero.
11525
11526=cut
11527*/
11528
11529OP *
11530Perl_parse_fullstmt(pTHX_ U32 flags)
11531{
28ac2b49
Z
11532 if (flags)
11533 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11534 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11535}
11536
07ffcb73
Z
11537/*
11538=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11539
11540Parse a sequence of zero or more Perl statements. These may be normal
11541imperative statements, including optional labels, or declarations
11542that have compile-time effect, or any mixture thereof. The statement
11543sequence ends when a closing brace or end-of-file is encountered in a
11544place where a new statement could have validly started. It is up to
11545the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11546is correctly set to reflect the source of the code to be parsed and the
11547lexical context for the statements.
11548
11549The op tree representing the statement sequence is returned. This may
11550be a null pointer if the statements were all null, for example if there
11551were no statements or if there were only subroutine definitions (which
11552have compile-time side effects). If not null, it will be a C<lineseq>
11553list, normally including C<nextstate> or equivalent ops.
11554
11555If an error occurs in parsing or compilation, in most cases a valid op
11556tree is returned anyway. The error is reflected in the parser state,
11557normally resulting in a single exception at the top level of parsing
11558which covers all the compilation errors that occurred. Some compilation
11559errors, however, will throw an exception immediately.
11560
11561The I<flags> parameter is reserved for future use, and must always
11562be zero.
11563
11564=cut
11565*/
11566
11567OP *
11568Perl_parse_stmtseq(pTHX_ U32 flags)
11569{
11570 OP *stmtseqop;
e53d8f76 11571 I32 c;
07ffcb73 11572 if (flags)
78cdf107
Z
11573 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11574 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11575 c = lex_peek_unichar(0);
11576 if (c != -1 && c != /*{*/'}')
07ffcb73 11577 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11578 return stmtseqop;
11579}
11580
ea25a9b2 11581void
f7e3d326 11582Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11583{
f7e3d326 11584 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11585 deprecate("qw(...) as parentheses");
78cdf107 11586 force_next((4<<24)|')');
ea25a9b2
Z
11587 if (qwlist->op_type == OP_STUB) {
11588 op_free(qwlist);
11589 }
11590 else {
3d8e05a0 11591 start_force(PL_curforce);
ea25a9b2
Z
11592 NEXTVAL_NEXTTOKE.opval = qwlist;
11593 force_next(THING);
11594 }
78cdf107 11595 force_next((2<<24)|'(');
ea25a9b2
Z
11596}
11597
28ac2b49 11598/*
1da4ca5f
NC
11599 * Local variables:
11600 * c-indentation-style: bsd
11601 * c-basic-offset: 4
11602 * indent-tabs-mode: t
11603 * End:
11604 *
37442d52
RGS
11605 * ex: set ts=8 sts=4 sw=4 noet:
11606 */