This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade Pod::Simple from version 3.18 to 3.19
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
7eb971ee 227 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 228 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
229 * BOop : bitwise or or xor
230 * BAop : bitwise and
231 * SHop : shift operator
232 * PWop : power operator
9cbb5ea2 233 * PMop : pattern-matching operator
ffb4593c
NT
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
e5edeb50 237 * Rop : relational operator <= != gt
ffb4593c
NT
238 *
239 * Also see LOP and lop() below.
240 */
241
998054bd 242#ifdef DEBUGGING /* Serve -DT. */
704d4215 243# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 244#else
bbf60fe6 245# define REPORT(retval) (retval)
998054bd
SC
246#endif
247
bbf60fe6
DM
248#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
255#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 258#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
259#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 269
a687059c
LW
270/* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
a687059c 274 */
376fcdbf 275#define UNI2(f,x) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_expect = x; \
278 PL_bufptr = s; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
29595ff2 283 s = PEEKSPACE(s); \
376fcdbf
AL
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
285 }
6f33ba73
RGS
286#define UNI(f) UNI2(f,XTERM)
287#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 288
376fcdbf 289#define UNIBRACK(f) { \
6154021b 290 pl_yylval.ival = f; \
376fcdbf
AL
291 PL_bufptr = s; \
292 PL_last_uni = PL_oldbufptr; \
293 if (*s == '(') \
294 return REPORT( (int)FUNC1 ); \
29595ff2 295 s = PEEKSPACE(s); \
376fcdbf
AL
296 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
297 }
79072805 298
9f68db38 299/* grandfather return to old style */
78cdf107
Z
300#define OLDLOP(f) \
301 do { \
302 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
303 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
304 pl_yylval.ival = (f); \
305 PL_expect = XTERM; \
306 PL_bufptr = s; \
307 return (int)LSTOP; \
308 } while(0)
79072805 309
8fa7f367
JH
310#ifdef DEBUGGING
311
6154021b 312/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
313enum token_type {
314 TOKENTYPE_NONE,
315 TOKENTYPE_IVAL,
6154021b 316 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
317 TOKENTYPE_PVAL,
318 TOKENTYPE_OPVAL,
319 TOKENTYPE_GVVAL
320};
321
6d4a66ac
NC
322static struct debug_tokens {
323 const int token;
324 enum token_type type;
325 const char *name;
326} const debug_tokens[] =
9041c2e3 327{
bbf60fe6
DM
328 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
329 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
330 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
331 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
332 { ARROW, TOKENTYPE_NONE, "ARROW" },
333 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
334 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
335 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
336 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
337 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 338 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
339 { DO, TOKENTYPE_NONE, "DO" },
340 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
341 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
342 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
343 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
344 { ELSE, TOKENTYPE_NONE, "ELSE" },
345 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
346 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
347 { FOR, TOKENTYPE_IVAL, "FOR" },
348 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_PVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
373 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
374 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
375 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
376 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
377 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
378 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
379 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
380 { PREINC, TOKENTYPE_NONE, "PREINC" },
381 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
385 { SUB, TOKENTYPE_NONE, "SUB" },
386 { THING, TOKENTYPE_OPVAL, "THING" },
387 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
388 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
389 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
390 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
391 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
392 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 393 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
394 { WHILE, TOKENTYPE_IVAL, "WHILE" },
395 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 396 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 397 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
398};
399
6154021b 400/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 401
bbf60fe6 402STATIC int
704d4215 403S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 404{
97aff369 405 dVAR;
7918f24d
NC
406
407 PERL_ARGS_ASSERT_TOKEREPORT;
408
bbf60fe6 409 if (DEBUG_T_TEST) {
bd61b366 410 const char *name = NULL;
bbf60fe6 411 enum token_type type = TOKENTYPE_NONE;
f54cb97a 412 const struct debug_tokens *p;
396482e1 413 SV* const report = newSVpvs("<== ");
bbf60fe6 414
f54cb97a 415 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
416 if (p->token == (int)rv) {
417 name = p->name;
418 type = p->type;
419 break;
420 }
421 }
422 if (name)
54667de8 423 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
424 else if ((char)rv > ' ' && (char)rv < '~')
425 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
426 else if (!rv)
396482e1 427 sv_catpvs(report, "EOF");
bbf60fe6
DM
428 else
429 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
430 switch (type) {
431 case TOKENTYPE_NONE:
432 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
433 break;
434 case TOKENTYPE_IVAL:
704d4215 435 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
436 break;
437 case TOKENTYPE_OPNUM:
438 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 439 PL_op_name[lvalp->ival]);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_PVAL:
704d4215 442 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
443 break;
444 case TOKENTYPE_OPVAL:
704d4215 445 if (lvalp->opval) {
401441c0 446 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
447 PL_op_name[lvalp->opval->op_type]);
448 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 449 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 450 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
451 }
452
453 }
401441c0 454 else
396482e1 455 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
456 break;
457 }
b6007c36 458 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
459 };
460 return (int)rv;
998054bd
SC
461}
462
b6007c36
DM
463
464/* print the buffer with suitable escapes */
465
466STATIC void
15f169a1 467S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 468{
396482e1 469 SV* const tmp = newSVpvs("");
7918f24d
NC
470
471 PERL_ARGS_ASSERT_PRINTBUF;
472
b6007c36
DM
473 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
474 SvREFCNT_dec(tmp);
475}
476
8fa7f367
JH
477#endif
478
8290c323
NC
479static int
480S_deprecate_commaless_var_list(pTHX) {
481 PL_expect = XTERM;
482 deprecate("comma-less variable list");
483 return REPORT(','); /* grandfather non-comma-format format */
484}
485
ffb4593c
NT
486/*
487 * S_ao
488 *
c963b151
BD
489 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
490 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
491 */
492
76e3520e 493STATIC int
cea2e8a9 494S_ao(pTHX_ int toketype)
a0d0e21e 495{
97aff369 496 dVAR;
3280af22
NIS
497 if (*PL_bufptr == '=') {
498 PL_bufptr++;
a0d0e21e 499 if (toketype == ANDAND)
6154021b 500 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 501 else if (toketype == OROR)
6154021b 502 pl_yylval.ival = OP_ORASSIGN;
c963b151 503 else if (toketype == DORDOR)
6154021b 504 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
505 toketype = ASSIGNOP;
506 }
507 return toketype;
508}
509
ffb4593c
NT
510/*
511 * S_no_op
512 * When Perl expects an operator and finds something else, no_op
513 * prints the warning. It always prints "<something> found where
514 * operator expected. It prints "Missing semicolon on previous line?"
515 * if the surprise occurs at the start of the line. "do you need to
516 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
517 * where the compiler doesn't know if foo is a method call or a function.
518 * It prints "Missing operator before end of line" if there's nothing
519 * after the missing operator, or "... before <...>" if there is something
520 * after the missing operator.
521 */
522
76e3520e 523STATIC void
15f169a1 524S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 525{
97aff369 526 dVAR;
9d4ba2ae
AL
527 char * const oldbp = PL_bufptr;
528 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 529
7918f24d
NC
530 PERL_ARGS_ASSERT_NO_OP;
531
1189a94a
GS
532 if (!s)
533 s = oldbp;
07c798fb 534 else
1189a94a 535 PL_bufptr = s;
cea2e8a9 536 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
537 if (ckWARN_d(WARN_SYNTAX)) {
538 if (is_first)
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
540 "\t(Missing semicolon on previous line?)\n");
541 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 542 const char *t;
c35e046a
AL
543 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
544 NOOP;
56da5a46
RGS
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Do you need to predeclare %.*s?)\n",
551405c4 548 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
549 }
550 else {
551 assert(s >= oldbp);
552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 553 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 554 }
07c798fb 555 }
3280af22 556 PL_bufptr = oldbp;
8990e307
LW
557}
558
ffb4593c
NT
559/*
560 * S_missingterm
561 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 562 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
563 * If we're in a delimited string and the delimiter is a control
564 * character, it's reformatted into a two-char sequence like ^C.
565 * This is fatal.
566 */
567
76e3520e 568STATIC void
cea2e8a9 569S_missingterm(pTHX_ char *s)
8990e307 570{
97aff369 571 dVAR;
8990e307
LW
572 char tmpbuf[3];
573 char q;
574 if (s) {
9d4ba2ae 575 char * const nl = strrchr(s,'\n');
d2719217 576 if (nl)
8990e307
LW
577 *nl = '\0';
578 }
463559e7 579 else if (isCNTRL(PL_multi_close)) {
8990e307 580 *tmpbuf = '^';
585ec06d 581 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
582 tmpbuf[2] = '\0';
583 s = tmpbuf;
584 }
585 else {
eb160463 586 *tmpbuf = (char)PL_multi_close;
8990e307
LW
587 tmpbuf[1] = '\0';
588 s = tmpbuf;
589 }
590 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 591 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 592}
79072805 593
0d863452 594/*
0d863452
RH
595 * Check whether the named feature is enabled.
596 */
26ea9e12
NC
597bool
598Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 599{
97aff369 600 dVAR;
0d863452 601 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 602 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
603
604 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
605
26ea9e12
NC
606 if (namelen > MAX_FEATURE_LEN)
607 return FALSE;
4a731d7b 608 memcpy(&he_name[8], name, namelen);
d4c19fe8 609
7b9ef140 610 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
611}
612
ffb4593c 613/*
9cbb5ea2
GS
614 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
615 * utf16-to-utf8-reversed.
ffb4593c
NT
616 */
617
c39cd008
GS
618#ifdef PERL_CR_FILTER
619static void
620strip_return(SV *sv)
621{
95a20fc0 622 register const char *s = SvPVX_const(sv);
9d4ba2ae 623 register const char * const e = s + SvCUR(sv);
7918f24d
NC
624
625 PERL_ARGS_ASSERT_STRIP_RETURN;
626
c39cd008
GS
627 /* outer loop optimized to do nothing if there are no CR-LFs */
628 while (s < e) {
629 if (*s++ == '\r' && *s == '\n') {
630 /* hit a CR-LF, need to copy the rest */
631 register char *d = s - 1;
632 *d++ = *s++;
633 while (s < e) {
634 if (*s == '\r' && s[1] == '\n')
635 s++;
636 *d++ = *s++;
637 }
638 SvCUR(sv) -= s - d;
639 return;
640 }
641 }
642}
a868473f 643
76e3520e 644STATIC I32
c39cd008 645S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 646{
f54cb97a 647 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
648 if (count > 0 && !maxlen)
649 strip_return(sv);
650 return count;
a868473f
NIS
651}
652#endif
653
ffb4593c 654/*
8eaa0acf
Z
655=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
656
657Creates and initialises a new lexer/parser state object, supplying
658a context in which to lex and parse from a new source of Perl code.
659A pointer to the new state object is placed in L</PL_parser>. An entry
660is made on the save stack so that upon unwinding the new state object
661will be destroyed and the former value of L</PL_parser> will be restored.
662Nothing else need be done to clean up the parsing context.
663
664The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
665non-null, provides a string (in SV form) containing code to be parsed.
666A copy of the string is made, so subsequent modification of I<line>
667does not affect parsing. I<rsfp>, if non-null, provides an input stream
668from which code will be read to be parsed. If both are non-null, the
669code in I<line> comes first and must consist of complete lines of input,
670and I<rsfp> supplies the remainder of the source.
671
672The I<flags> parameter is reserved for future use, and must always
27fcb6ee
FC
673be zero, except for one flag that is currently reserved for perl's internal
674use.
8eaa0acf
Z
675
676=cut
677*/
ffb4593c 678
27fcb6ee
FC
679/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
680 can share filters with the current parser. */
681
a0d0e21e 682void
8eaa0acf 683Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 684{
97aff369 685 dVAR;
6ef55633 686 const char *s = NULL;
8990e307 687 STRLEN len;
5486870f 688 yy_parser *parser, *oparser;
27fcb6ee 689 if (flags && flags != LEX_START_SAME_FILTER)
8eaa0acf 690 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
691
692 /* create and initialise a parser */
693
199e78b7 694 Newxz(parser, 1, yy_parser);
5486870f 695 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
696 PL_parser = parser;
697
28ac2b49
Z
698 parser->stack = NULL;
699 parser->ps = NULL;
700 parser->stack_size = 0;
acdf0a21 701
e3abe207
DM
702 /* on scope exit, free this parser and restore any outer one */
703 SAVEPARSER(parser);
7c4baf47 704 parser->saved_curcop = PL_curcop;
e3abe207 705
acdf0a21 706 /* initialise lexer state */
8990e307 707
fb205e7a
DM
708#ifdef PERL_MAD
709 parser->curforce = -1;
710#else
711 parser->nexttoke = 0;
712#endif
ca4cfd28 713 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 714 parser->copline = NOLINE;
5afb0a62 715 parser->lex_state = LEX_NORMAL;
c2598295 716 parser->expect = XSTATE;
2f9285f8 717 parser->rsfp = rsfp;
27fcb6ee
FC
718 parser->rsfp_filters =
719 !(flags & LEX_START_SAME_FILTER) || !oparser
720 ? newAV()
721 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 722
199e78b7
DM
723 Newx(parser->lex_brackstack, 120, char);
724 Newx(parser->lex_casestack, 12, char);
725 *parser->lex_casestack = '\0';
02b34bbe 726
10efb74f
NC
727 if (line) {
728 s = SvPV_const(line, len);
729 } else {
730 len = 0;
731 }
bdc0bf6f 732
10efb74f 733 if (!len) {
bdc0bf6f 734 parser->linestr = newSVpvs("\n;");
805700c1 735 } else {
719a9bb0 736 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 737 if (s[len-1] != ';')
bdc0bf6f 738 sv_catpvs(parser->linestr, "\n;");
8990e307 739 }
f06b5848
DM
740 parser->oldoldbufptr =
741 parser->oldbufptr =
742 parser->bufptr =
743 parser->linestart = SvPVX(parser->linestr);
744 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
745 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
746
747 parser->in_pod = 0;
79072805 748}
a687059c 749
e3abe207
DM
750
751/* delete a parser object */
752
753void
754Perl_parser_free(pTHX_ const yy_parser *parser)
755{
7918f24d
NC
756 PERL_ARGS_ASSERT_PARSER_FREE;
757
7c4baf47 758 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
759 SvREFCNT_dec(parser->linestr);
760
2f9285f8
DM
761 if (parser->rsfp == PerlIO_stdin())
762 PerlIO_clearerr(parser->rsfp);
799361c3
SH
763 else if (parser->rsfp && (!parser->old_parser ||
764 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 765 PerlIO_close(parser->rsfp);
5486870f 766 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 767
e3abe207
DM
768 Safefree(parser->lex_brackstack);
769 Safefree(parser->lex_casestack);
770 PL_parser = parser->old_parser;
771 Safefree(parser);
772}
773
774
ffb4593c 775/*
f0e67a1d
Z
776=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
777
778Buffer scalar containing the chunk currently under consideration of the
779text currently being lexed. This is always a plain string scalar (for
780which C<SvPOK> is true). It is not intended to be used as a scalar by
781normal scalar means; instead refer to the buffer directly by the pointer
782variables described below.
783
784The lexer maintains various C<char*> pointers to things in the
785C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
786reallocated, all of these pointers must be updated. Don't attempt to
787do this manually, but rather use L</lex_grow_linestr> if you need to
788reallocate the buffer.
789
790The content of the text chunk in the buffer is commonly exactly one
791complete line of input, up to and including a newline terminator,
792but there are situations where it is otherwise. The octets of the
793buffer may be intended to be interpreted as either UTF-8 or Latin-1.
794The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
795flag on this scalar, which may disagree with it.
796
797For direct examination of the buffer, the variable
798L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
799lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
800of these pointers is usually preferable to examination of the scalar
801through normal scalar means.
802
803=for apidoc AmxU|char *|PL_parser-E<gt>bufend
804
805Direct pointer to the end of the chunk of text currently being lexed, the
806end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
807+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
808always located at the end of the buffer, and does not count as part of
809the buffer's contents.
810
811=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
812
813Points to the current position of lexing inside the lexer buffer.
814Characters around this point may be freely examined, within
815the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
816L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
817interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
818
819Lexing code (whether in the Perl core or not) moves this pointer past
820the characters that it consumes. It is also expected to perform some
821bookkeeping whenever a newline character is consumed. This movement
822can be more conveniently performed by the function L</lex_read_to>,
823which handles newlines appropriately.
824
825Interpretation of the buffer's octets can be abstracted out by
826using the slightly higher-level functions L</lex_peek_unichar> and
827L</lex_read_unichar>.
828
829=for apidoc AmxU|char *|PL_parser-E<gt>linestart
830
831Points to the start of the current line inside the lexer buffer.
832This is useful for indicating at which column an error occurred, and
833not much else. This must be updated by any lexing code that consumes
834a newline; the function L</lex_read_to> handles this detail.
835
836=cut
837*/
838
839/*
840=for apidoc Amx|bool|lex_bufutf8
841
842Indicates whether the octets in the lexer buffer
843(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
844of Unicode characters. If not, they should be interpreted as Latin-1
845characters. This is analogous to the C<SvUTF8> flag for scalars.
846
847In UTF-8 mode, it is not guaranteed that the lexer buffer actually
848contains valid UTF-8. Lexing code must be robust in the face of invalid
849encoding.
850
851The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
852is significant, but not the whole story regarding the input character
853encoding. Normally, when a file is being read, the scalar contains octets
854and its C<SvUTF8> flag is off, but the octets should be interpreted as
855UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
856however, the scalar may have the C<SvUTF8> flag on, and in this case its
857octets should be interpreted as UTF-8 unless the C<use bytes> pragma
858is in effect. This logic may change in the future; use this function
859instead of implementing the logic yourself.
860
861=cut
862*/
863
864bool
865Perl_lex_bufutf8(pTHX)
866{
867 return UTF;
868}
869
870/*
871=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
872
873Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
874at least I<len> octets (including terminating NUL). Returns a
875pointer to the reallocated buffer. This is necessary before making
876any direct modification of the buffer that would increase its length.
877L</lex_stuff_pvn> provides a more convenient way to insert text into
878the buffer.
879
880Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
881this function updates all of the lexer's variables that point directly
882into the buffer.
883
884=cut
885*/
886
887char *
888Perl_lex_grow_linestr(pTHX_ STRLEN len)
889{
890 SV *linestr;
891 char *buf;
892 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
893 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
894 linestr = PL_parser->linestr;
895 buf = SvPVX(linestr);
896 if (len <= SvLEN(linestr))
897 return buf;
898 bufend_pos = PL_parser->bufend - buf;
899 bufptr_pos = PL_parser->bufptr - buf;
900 oldbufptr_pos = PL_parser->oldbufptr - buf;
901 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
902 linestart_pos = PL_parser->linestart - buf;
903 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
904 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
905 buf = sv_grow(linestr, len);
906 PL_parser->bufend = buf + bufend_pos;
907 PL_parser->bufptr = buf + bufptr_pos;
908 PL_parser->oldbufptr = buf + oldbufptr_pos;
909 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
910 PL_parser->linestart = buf + linestart_pos;
911 if (PL_parser->last_uni)
912 PL_parser->last_uni = buf + last_uni_pos;
913 if (PL_parser->last_lop)
914 PL_parser->last_lop = buf + last_lop_pos;
915 return buf;
916}
917
918/*
83aa740e 919=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
920
921Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
922immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
923reallocating the buffer if necessary. This means that lexing code that
924runs later will see the characters as if they had appeared in the input.
925It is not recommended to do this as part of normal parsing, and most
926uses of this facility run the risk of the inserted characters being
927interpreted in an unintended manner.
928
929The string to be inserted is represented by I<len> octets starting
930at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
931according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
932The characters are recoded for the lexer buffer, according to how the
933buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 934to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
935function is more convenient.
936
937=cut
938*/
939
940void
83aa740e 941Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 942{
749123ff 943 dVAR;
f0e67a1d
Z
944 char *bufptr;
945 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
946 if (flags & ~(LEX_STUFF_UTF8))
947 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
948 if (UTF) {
949 if (flags & LEX_STUFF_UTF8) {
950 goto plain_copy;
951 } else {
952 STRLEN highhalf = 0;
83aa740e 953 const char *p, *e = pv+len;
f0e67a1d
Z
954 for (p = pv; p != e; p++)
955 highhalf += !!(((U8)*p) & 0x80);
956 if (!highhalf)
957 goto plain_copy;
958 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
959 bufptr = PL_parser->bufptr;
960 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
961 SvCUR_set(PL_parser->linestr,
962 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
963 PL_parser->bufend += len+highhalf;
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c & 0x80) {
967 *bufptr++ = (char)(0xc0 | (c >> 6));
968 *bufptr++ = (char)(0x80 | (c & 0x3f));
969 } else {
970 *bufptr++ = (char)c;
971 }
972 }
973 }
974 } else {
975 if (flags & LEX_STUFF_UTF8) {
976 STRLEN highhalf = 0;
83aa740e 977 const char *p, *e = pv+len;
f0e67a1d
Z
978 for (p = pv; p != e; p++) {
979 U8 c = (U8)*p;
980 if (c >= 0xc4) {
981 Perl_croak(aTHX_ "Lexing code attempted to stuff "
982 "non-Latin-1 character into Latin-1 input");
983 } else if (c >= 0xc2 && p+1 != e &&
984 (((U8)p[1]) & 0xc0) == 0x80) {
985 p++;
986 highhalf++;
987 } else if (c >= 0x80) {
988 /* malformed UTF-8 */
989 ENTER;
990 SAVESPTR(PL_warnhook);
991 PL_warnhook = PERL_WARNHOOK_FATAL;
992 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
993 LEAVE;
994 }
995 }
996 if (!highhalf)
997 goto plain_copy;
998 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
999 bufptr = PL_parser->bufptr;
1000 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1001 SvCUR_set(PL_parser->linestr,
1002 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1003 PL_parser->bufend += len-highhalf;
1004 for (p = pv; p != e; p++) {
1005 U8 c = (U8)*p;
1006 if (c & 0x80) {
1007 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008 p++;
1009 } else {
1010 *bufptr++ = (char)c;
1011 }
1012 }
1013 } else {
1014 plain_copy:
1015 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016 bufptr = PL_parser->bufptr;
1017 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1018 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1019 PL_parser->bufend += len;
1020 Copy(pv, bufptr, len, char);
1021 }
1022 }
1023}
1024
1025/*
9dcc53ea
Z
1026=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030reallocating the buffer if necessary. This means that lexing code that
1031runs later will see the characters as if they had appeared in the input.
1032It is not recommended to do this as part of normal parsing, and most
1033uses of this facility run the risk of the inserted characters being
1034interpreted in an unintended manner.
1035
1036The string to be inserted is represented by octets starting at I<pv>
1037and continuing to the first nul. These octets are interpreted as either
1038UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039in I<flags>. The characters are recoded for the lexer buffer, according
1040to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041If it is not convenient to nul-terminate a string to be inserted, the
1042L</lex_stuff_pvn> function is more appropriate.
1043
1044=cut
1045*/
1046
1047void
1048Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049{
1050 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051 lex_stuff_pvn(pv, strlen(pv), flags);
1052}
1053
1054/*
f0e67a1d
Z
1055=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059reallocating the buffer if necessary. This means that lexing code that
1060runs later will see the characters as if they had appeared in the input.
1061It is not recommended to do this as part of normal parsing, and most
1062uses of this facility run the risk of the inserted characters being
1063interpreted in an unintended manner.
1064
1065The string to be inserted is the string value of I<sv>. The characters
1066are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1067being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1068not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069need to construct a scalar.
1070
1071=cut
1072*/
1073
1074void
1075Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076{
1077 char *pv;
1078 STRLEN len;
1079 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080 if (flags)
1081 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082 pv = SvPV(sv, len);
1083 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084}
1085
1086/*
1087=for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1091This hides the discarded text from any lexing code that runs later,
1092as if the text had never appeared.
1093
1094This is not the normal way to consume lexed text. For that, use
1095L</lex_read_to>.
1096
1097=cut
1098*/
1099
1100void
1101Perl_lex_unstuff(pTHX_ char *ptr)
1102{
1103 char *buf, *bufend;
1104 STRLEN unstuff_len;
1105 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106 buf = PL_parser->bufptr;
1107 if (ptr < buf)
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109 if (ptr == buf)
1110 return;
1111 bufend = PL_parser->bufend;
1112 if (ptr > bufend)
1113 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114 unstuff_len = ptr - buf;
1115 Move(ptr, buf, bufend+1-ptr, char);
1116 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117 PL_parser->bufend = bufend - unstuff_len;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_read_to|char *ptr
1122
1123Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125performing the correct bookkeeping whenever a newline character is passed.
1126This is the normal way to consume lexed text.
1127
1128Interpretation of the buffer's octets can be abstracted out by
1129using the slightly higher-level functions L</lex_peek_unichar> and
1130L</lex_read_unichar>.
1131
1132=cut
1133*/
1134
1135void
1136Perl_lex_read_to(pTHX_ char *ptr)
1137{
1138 char *s;
1139 PERL_ARGS_ASSERT_LEX_READ_TO;
1140 s = PL_parser->bufptr;
1141 if (ptr < s || ptr > PL_parser->bufend)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143 for (; s != ptr; s++)
1144 if (*s == '\n') {
1145 CopLINE_inc(PL_curcop);
1146 PL_parser->linestart = s+1;
1147 }
1148 PL_parser->bufptr = ptr;
1149}
1150
1151/*
1152=for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155up to I<ptr>. The remaining content of the buffer will be moved, and
1156all pointers into the buffer updated appropriately. I<ptr> must not
1157be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158it is not permitted to discard text that has yet to be lexed.
1159
1160Normally it is not necessarily to do this directly, because it suffices to
1161use the implicit discarding behaviour of L</lex_next_chunk> and things
1162based on it. However, if a token stretches across multiple lines,
1f317c95 1163and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1164that purpose, then after completion of the token it would be wise to
1165explicitly discard the now-unneeded earlier lines, to avoid future
1166multi-line tokens growing the buffer without bound.
1167
1168=cut
1169*/
1170
1171void
1172Perl_lex_discard_to(pTHX_ char *ptr)
1173{
1174 char *buf;
1175 STRLEN discard_len;
1176 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177 buf = SvPVX(PL_parser->linestr);
1178 if (ptr < buf)
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180 if (ptr == buf)
1181 return;
1182 if (ptr > PL_parser->bufptr)
1183 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184 discard_len = ptr - buf;
1185 if (PL_parser->oldbufptr < ptr)
1186 PL_parser->oldbufptr = ptr;
1187 if (PL_parser->oldoldbufptr < ptr)
1188 PL_parser->oldoldbufptr = ptr;
1189 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190 PL_parser->last_uni = NULL;
1191 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192 PL_parser->last_lop = NULL;
1193 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195 PL_parser->bufend -= discard_len;
1196 PL_parser->bufptr -= discard_len;
1197 PL_parser->oldbufptr -= discard_len;
1198 PL_parser->oldoldbufptr -= discard_len;
1199 if (PL_parser->last_uni)
1200 PL_parser->last_uni -= discard_len;
1201 if (PL_parser->last_lop)
1202 PL_parser->last_lop -= discard_len;
1203}
1204
1205/*
1206=for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208Reads in the next chunk of text to be lexed, appending it to
1209L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1210looked to the end of the current chunk and wants to know more. It is
1211usual, but not necessary, for lexing to have consumed the entirety of
1212the current chunk at this time.
1213
1214If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215chunk (i.e., the current chunk has been entirely consumed), normally the
1216current chunk will be discarded at the same time that the new chunk is
1217read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218will not be discarded. If the current chunk has not been entirely
1219consumed, then it will not be discarded regardless of the flag.
1220
1221Returns true if some new text was added to the buffer, or false if the
1222buffer has reached the end of the input text.
1223
1224=cut
1225*/
1226
1227#define LEX_FAKE_EOF 0x80000000
1228
1229bool
1230Perl_lex_next_chunk(pTHX_ U32 flags)
1231{
1232 SV *linestr;
1233 char *buf;
1234 STRLEN old_bufend_pos, new_bufend_pos;
1235 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1237 bool got_some_for_debugger = 0;
f0e67a1d
Z
1238 bool got_some;
1239 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1241 linestr = PL_parser->linestr;
1242 buf = SvPVX(linestr);
1243 if (!(flags & LEX_KEEP_PREVIOUS) &&
1244 PL_parser->bufptr == PL_parser->bufend) {
1245 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246 linestart_pos = 0;
1247 if (PL_parser->last_uni != PL_parser->bufend)
1248 PL_parser->last_uni = NULL;
1249 if (PL_parser->last_lop != PL_parser->bufend)
1250 PL_parser->last_lop = NULL;
1251 last_uni_pos = last_lop_pos = 0;
1252 *buf = 0;
1253 SvCUR(linestr) = 0;
1254 } else {
1255 old_bufend_pos = PL_parser->bufend - buf;
1256 bufptr_pos = PL_parser->bufptr - buf;
1257 oldbufptr_pos = PL_parser->oldbufptr - buf;
1258 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259 linestart_pos = PL_parser->linestart - buf;
1260 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262 }
1263 if (flags & LEX_FAKE_EOF) {
1264 goto eof;
1265 } else if (!PL_parser->rsfp) {
1266 got_some = 0;
1267 } else if (filter_gets(linestr, old_bufend_pos)) {
1268 got_some = 1;
17cc9359 1269 got_some_for_debugger = 1;
f0e67a1d 1270 } else {
580561a3
Z
1271 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1272 sv_setpvs(linestr, "");
f0e67a1d
Z
1273 eof:
1274 /* End of real input. Close filehandle (unless it was STDIN),
1275 * then add implicit termination.
1276 */
1277 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278 PerlIO_clearerr(PL_parser->rsfp);
1279 else if (PL_parser->rsfp)
1280 (void)PerlIO_close(PL_parser->rsfp);
1281 PL_parser->rsfp = NULL;
737c24fc 1282 PL_parser->in_pod = 0;
f0e67a1d
Z
1283#ifdef PERL_MAD
1284 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285 PL_faketokens = 1;
1286#endif
1287 if (!PL_in_eval && PL_minus_p) {
1288 sv_catpvs(linestr,
1289 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290 PL_minus_n = PL_minus_p = 0;
1291 } else if (!PL_in_eval && PL_minus_n) {
1292 sv_catpvs(linestr, /*{*/";}");
1293 PL_minus_n = 0;
1294 } else
1295 sv_catpvs(linestr, ";");
1296 got_some = 1;
1297 }
1298 buf = SvPVX(linestr);
1299 new_bufend_pos = SvCUR(linestr);
1300 PL_parser->bufend = buf + new_bufend_pos;
1301 PL_parser->bufptr = buf + bufptr_pos;
1302 PL_parser->oldbufptr = buf + oldbufptr_pos;
1303 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304 PL_parser->linestart = buf + linestart_pos;
1305 if (PL_parser->last_uni)
1306 PL_parser->last_uni = buf + last_uni_pos;
1307 if (PL_parser->last_lop)
1308 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1309 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1310 PL_curstash != PL_debstash) {
1311 /* debugger active and we're not compiling the debugger code,
1312 * so store the line into the debugger's array of lines
1313 */
1314 update_debugger_info(NULL, buf+old_bufend_pos,
1315 new_bufend_pos-old_bufend_pos);
1316 }
1317 return got_some;
1318}
1319
1320/*
1321=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323Looks ahead one (Unicode) character in the text currently being lexed.
1324Returns the codepoint (unsigned integer value) of the next character,
1325or -1 if lexing has reached the end of the input text. To consume the
1326peeked character, use L</lex_read_unichar>.
1327
1328If the next character is in (or extends into) the next chunk of input
1329text, the next chunk will be read in. Normally the current chunk will be
1330discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331then the current chunk will not be discarded.
1332
1333If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334is encountered, an exception is generated.
1335
1336=cut
1337*/
1338
1339I32
1340Perl_lex_peek_unichar(pTHX_ U32 flags)
1341{
749123ff 1342 dVAR;
f0e67a1d
Z
1343 char *s, *bufend;
1344 if (flags & ~(LEX_KEEP_PREVIOUS))
1345 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346 s = PL_parser->bufptr;
1347 bufend = PL_parser->bufend;
1348 if (UTF) {
1349 U8 head;
1350 I32 unichar;
1351 STRLEN len, retlen;
1352 if (s == bufend) {
1353 if (!lex_next_chunk(flags))
1354 return -1;
1355 s = PL_parser->bufptr;
1356 bufend = PL_parser->bufend;
1357 }
1358 head = (U8)*s;
1359 if (!(head & 0x80))
1360 return head;
1361 if (head & 0x40) {
1362 len = PL_utf8skip[head];
1363 while ((STRLEN)(bufend-s) < len) {
1364 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365 break;
1366 s = PL_parser->bufptr;
1367 bufend = PL_parser->bufend;
1368 }
1369 }
1370 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371 if (retlen == (STRLEN)-1) {
1372 /* malformed UTF-8 */
1373 ENTER;
1374 SAVESPTR(PL_warnhook);
1375 PL_warnhook = PERL_WARNHOOK_FATAL;
1376 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377 LEAVE;
1378 }
1379 return unichar;
1380 } else {
1381 if (s == bufend) {
1382 if (!lex_next_chunk(flags))
1383 return -1;
1384 s = PL_parser->bufptr;
1385 }
1386 return (U8)*s;
1387 }
1388}
1389
1390/*
1391=for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393Reads the next (Unicode) character in the text currently being lexed.
1394Returns the codepoint (unsigned integer value) of the character read,
1395and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396if lexing has reached the end of the input text. To non-destructively
1397examine the next character, use L</lex_peek_unichar> instead.
1398
1399If the next character is in (or extends into) the next chunk of input
1400text, the next chunk will be read in. Normally the current chunk will be
1401discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402then the current chunk will not be discarded.
1403
1404If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405is encountered, an exception is generated.
1406
1407=cut
1408*/
1409
1410I32
1411Perl_lex_read_unichar(pTHX_ U32 flags)
1412{
1413 I32 c;
1414 if (flags & ~(LEX_KEEP_PREVIOUS))
1415 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416 c = lex_peek_unichar(flags);
1417 if (c != -1) {
1418 if (c == '\n')
1419 CopLINE_inc(PL_curcop);
1420 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1421 }
1422 return c;
1423}
1424
1425/*
1426=for apidoc Amx|void|lex_read_space|U32 flags
1427
1428Reads optional spaces, in Perl style, in the text currently being
1429lexed. The spaces may include ordinary whitespace characters and
1430Perl-style comments. C<#line> directives are processed if encountered.
1431L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1432at a non-space character (or the end of the input text).
1433
1434If spaces extend into the next chunk of input text, the next chunk will
1435be read in. Normally the current chunk will be discarded at the same
1436time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1437chunk will not be discarded.
1438
1439=cut
1440*/
1441
f0998909
Z
1442#define LEX_NO_NEXT_CHUNK 0x80000000
1443
f0e67a1d
Z
1444void
1445Perl_lex_read_space(pTHX_ U32 flags)
1446{
1447 char *s, *bufend;
1448 bool need_incline = 0;
f0998909 1449 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1450 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1451#ifdef PERL_MAD
1452 if (PL_skipwhite) {
1453 sv_free(PL_skipwhite);
1454 PL_skipwhite = NULL;
1455 }
1456 if (PL_madskills)
1457 PL_skipwhite = newSVpvs("");
1458#endif /* PERL_MAD */
1459 s = PL_parser->bufptr;
1460 bufend = PL_parser->bufend;
1461 while (1) {
1462 char c = *s;
1463 if (c == '#') {
1464 do {
1465 c = *++s;
1466 } while (!(c == '\n' || (c == 0 && s == bufend)));
1467 } else if (c == '\n') {
1468 s++;
1469 PL_parser->linestart = s;
1470 if (s == bufend)
1471 need_incline = 1;
1472 else
1473 incline(s);
1474 } else if (isSPACE(c)) {
1475 s++;
1476 } else if (c == 0 && s == bufend) {
1477 bool got_more;
1478#ifdef PERL_MAD
1479 if (PL_madskills)
1480 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1481#endif /* PERL_MAD */
f0998909
Z
1482 if (flags & LEX_NO_NEXT_CHUNK)
1483 break;
f0e67a1d
Z
1484 PL_parser->bufptr = s;
1485 CopLINE_inc(PL_curcop);
1486 got_more = lex_next_chunk(flags);
1487 CopLINE_dec(PL_curcop);
1488 s = PL_parser->bufptr;
1489 bufend = PL_parser->bufend;
1490 if (!got_more)
1491 break;
1492 if (need_incline && PL_parser->rsfp) {
1493 incline(s);
1494 need_incline = 0;
1495 }
1496 } else {
1497 break;
1498 }
1499 }
1500#ifdef PERL_MAD
1501 if (PL_madskills)
1502 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1503#endif /* PERL_MAD */
1504 PL_parser->bufptr = s;
1505}
1506
1507/*
ffb4593c
NT
1508 * S_incline
1509 * This subroutine has nothing to do with tilting, whether at windmills
1510 * or pinball tables. Its name is short for "increment line". It
57843af0 1511 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1512 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1513 * # line 500 "foo.pm"
1514 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1515 */
1516
76e3520e 1517STATIC void
d9095cec 1518S_incline(pTHX_ const char *s)
463ee0b2 1519{
97aff369 1520 dVAR;
d9095cec
NC
1521 const char *t;
1522 const char *n;
1523 const char *e;
8818d409 1524 line_t line_num;
463ee0b2 1525
7918f24d
NC
1526 PERL_ARGS_ASSERT_INCLINE;
1527
57843af0 1528 CopLINE_inc(PL_curcop);
463ee0b2
LW
1529 if (*s++ != '#')
1530 return;
d4c19fe8
AL
1531 while (SPACE_OR_TAB(*s))
1532 s++;
73659bf1
GS
1533 if (strnEQ(s, "line", 4))
1534 s += 4;
1535 else
1536 return;
084592ab 1537 if (SPACE_OR_TAB(*s))
73659bf1 1538 s++;
4e553d73 1539 else
73659bf1 1540 return;
d4c19fe8
AL
1541 while (SPACE_OR_TAB(*s))
1542 s++;
463ee0b2
LW
1543 if (!isDIGIT(*s))
1544 return;
d4c19fe8 1545
463ee0b2
LW
1546 n = s;
1547 while (isDIGIT(*s))
1548 s++;
07714eb4 1549 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1550 return;
bf4acbe4 1551 while (SPACE_OR_TAB(*s))
463ee0b2 1552 s++;
73659bf1 1553 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1554 s++;
73659bf1
GS
1555 e = t + 1;
1556 }
463ee0b2 1557 else {
c35e046a
AL
1558 t = s;
1559 while (!isSPACE(*t))
1560 t++;
73659bf1 1561 e = t;
463ee0b2 1562 }
bf4acbe4 1563 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1564 e++;
1565 if (*e != '\n' && *e != '\0')
1566 return; /* false alarm */
1567
8818d409
FC
1568 line_num = atoi(n)-1;
1569
f4dd75d9 1570 if (t - s > 0) {
d9095cec 1571 const STRLEN len = t - s;
19bad673
NC
1572 SV *const temp_sv = CopFILESV(PL_curcop);
1573 const char *cf;
1574 STRLEN tmplen;
1575
1576 if (temp_sv) {
1577 cf = SvPVX(temp_sv);
1578 tmplen = SvCUR(temp_sv);
1579 } else {
1580 cf = NULL;
1581 tmplen = 0;
1582 }
1583
42d9b98d 1584 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1585 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1586 * to *{"::_<newfilename"} */
44867030
NC
1587 /* However, the long form of evals is only turned on by the
1588 debugger - usually they're "(eval %lu)" */
1589 char smallbuf[128];
1590 char *tmpbuf;
1591 GV **gvp;
d9095cec 1592 STRLEN tmplen2 = len;
798b63bc 1593 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1594 tmpbuf = smallbuf;
1595 else
2ae0db35 1596 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1597 tmpbuf[0] = '_';
1598 tmpbuf[1] = '<';
2ae0db35 1599 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1600 tmplen += 2;
8a5ee598
RGS
1601 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1602 if (gvp) {
44867030
NC
1603 char *tmpbuf2;
1604 GV *gv2;
1605
1606 if (tmplen2 + 2 <= sizeof smallbuf)
1607 tmpbuf2 = smallbuf;
1608 else
1609 Newx(tmpbuf2, tmplen2 + 2, char);
1610
1611 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1612 /* Either they malloc'd it, or we malloc'd it,
1613 so no prefix is present in ours. */
1614 tmpbuf2[0] = '_';
1615 tmpbuf2[1] = '<';
1616 }
1617
1618 memcpy(tmpbuf2 + 2, s, tmplen2);
1619 tmplen2 += 2;
1620
8a5ee598 1621 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1622 if (!isGV(gv2)) {
8a5ee598 1623 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1624 /* adjust ${"::_<newfilename"} to store the new file name */
1625 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1626 /* The line number may differ. If that is the case,
1627 alias the saved lines that are in the array.
1628 Otherwise alias the whole array. */
1629 if (CopLINE(PL_curcop) == line_num) {
1630 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1631 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1632 }
1633 else if (GvAV(*gvp)) {
1634 AV * const av = GvAV(*gvp);
1635 const I32 start = CopLINE(PL_curcop)+1;
1636 I32 items = AvFILLp(av) - start;
1637 if (items > 0) {
1638 AV * const av2 = GvAVn(gv2);
1639 SV **svp = AvARRAY(av) + start;
1640 I32 l = (I32)line_num+1;
1641 while (items--)
1642 av_store(av2, l++, SvREFCNT_inc(*svp++));
1643 }
1644 }
e5527e4b 1645 }
44867030
NC
1646
1647 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1648 }
e66cf94c 1649 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1650 }
05ec9bb3 1651 CopFILE_free(PL_curcop);
d9095cec 1652 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1653 }
8818d409 1654 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1655}
1656
29595ff2 1657#ifdef PERL_MAD
cd81e915 1658/* skip space before PL_thistoken */
29595ff2
NC
1659
1660STATIC char *
1661S_skipspace0(pTHX_ register char *s)
1662{
7918f24d
NC
1663 PERL_ARGS_ASSERT_SKIPSPACE0;
1664
29595ff2
NC
1665 s = skipspace(s);
1666 if (!PL_madskills)
1667 return s;
cd81e915
NC
1668 if (PL_skipwhite) {
1669 if (!PL_thiswhite)
6b29d1f5 1670 PL_thiswhite = newSVpvs("");
cd81e915
NC
1671 sv_catsv(PL_thiswhite, PL_skipwhite);
1672 sv_free(PL_skipwhite);
1673 PL_skipwhite = 0;
1674 }
1675 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1676 return s;
1677}
1678
cd81e915 1679/* skip space after PL_thistoken */
29595ff2
NC
1680
1681STATIC char *
1682S_skipspace1(pTHX_ register char *s)
1683{
d4c19fe8 1684 const char *start = s;
29595ff2
NC
1685 I32 startoff = start - SvPVX(PL_linestr);
1686
7918f24d
NC
1687 PERL_ARGS_ASSERT_SKIPSPACE1;
1688
29595ff2
NC
1689 s = skipspace(s);
1690 if (!PL_madskills)
1691 return s;
1692 start = SvPVX(PL_linestr) + startoff;
cd81e915 1693 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1694 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1695 PL_thistoken = newSVpvn(tstart, start - tstart);
1696 }
1697 PL_realtokenstart = -1;
1698 if (PL_skipwhite) {
1699 if (!PL_nextwhite)
6b29d1f5 1700 PL_nextwhite = newSVpvs("");
cd81e915
NC
1701 sv_catsv(PL_nextwhite, PL_skipwhite);
1702 sv_free(PL_skipwhite);
1703 PL_skipwhite = 0;
29595ff2
NC
1704 }
1705 return s;
1706}
1707
1708STATIC char *
1709S_skipspace2(pTHX_ register char *s, SV **svp)
1710{
c35e046a
AL
1711 char *start;
1712 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1713 const I32 startoff = s - SvPVX(PL_linestr);
1714
7918f24d
NC
1715 PERL_ARGS_ASSERT_SKIPSPACE2;
1716
29595ff2
NC
1717 s = skipspace(s);
1718 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1719 if (!PL_madskills || !svp)
1720 return s;
1721 start = SvPVX(PL_linestr) + startoff;
cd81e915 1722 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1723 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1724 PL_thistoken = newSVpvn(tstart, start - tstart);
1725 PL_realtokenstart = -1;
29595ff2 1726 }
cd81e915 1727 if (PL_skipwhite) {
29595ff2 1728 if (!*svp)
6b29d1f5 1729 *svp = newSVpvs("");
cd81e915
NC
1730 sv_setsv(*svp, PL_skipwhite);
1731 sv_free(PL_skipwhite);
1732 PL_skipwhite = 0;
29595ff2
NC
1733 }
1734
1735 return s;
1736}
1737#endif
1738
80a702cd 1739STATIC void
15f169a1 1740S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1741{
1742 AV *av = CopFILEAVx(PL_curcop);
1743 if (av) {
b9f83d2f 1744 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1745 if (orig_sv)
1746 sv_setsv(sv, orig_sv);
1747 else
1748 sv_setpvn(sv, buf, len);
80a702cd
RGS
1749 (void)SvIOK_on(sv);
1750 SvIV_set(sv, 0);
1751 av_store(av, (I32)CopLINE(PL_curcop), sv);
1752 }
1753}
1754
ffb4593c
NT
1755/*
1756 * S_skipspace
1757 * Called to gobble the appropriate amount and type of whitespace.
1758 * Skips comments as well.
1759 */
1760
76e3520e 1761STATIC char *
cea2e8a9 1762S_skipspace(pTHX_ register char *s)
a687059c 1763{
5db06880 1764#ifdef PERL_MAD
f0e67a1d
Z
1765 char *start = s;
1766#endif /* PERL_MAD */
7918f24d 1767 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1768#ifdef PERL_MAD
cd81e915
NC
1769 if (PL_skipwhite) {
1770 sv_free(PL_skipwhite);
f0e67a1d 1771 PL_skipwhite = NULL;
5db06880 1772 }
f0e67a1d 1773#endif /* PERL_MAD */
3280af22 1774 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1775 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1776 s++;
f0e67a1d
Z
1777 } else {
1778 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1779 PL_bufptr = s;
f0998909
Z
1780 lex_read_space(LEX_KEEP_PREVIOUS |
1781 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1782 LEX_NO_NEXT_CHUNK : 0));
3280af22 1783 s = PL_bufptr;
f0e67a1d
Z
1784 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1785 if (PL_linestart > PL_bufptr)
1786 PL_bufptr = PL_linestart;
1787 return s;
463ee0b2 1788 }
5db06880 1789#ifdef PERL_MAD
f0e67a1d
Z
1790 if (PL_madskills)
1791 PL_skipwhite = newSVpvn(start, s-start);
1792#endif /* PERL_MAD */
5db06880 1793 return s;
a687059c 1794}
378cc40b 1795
ffb4593c
NT
1796/*
1797 * S_check_uni
1798 * Check the unary operators to ensure there's no ambiguity in how they're
1799 * used. An ambiguous piece of code would be:
1800 * rand + 5
1801 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1802 * the +5 is its argument.
1803 */
1804
76e3520e 1805STATIC void
cea2e8a9 1806S_check_uni(pTHX)
ba106d47 1807{
97aff369 1808 dVAR;
d4c19fe8
AL
1809 const char *s;
1810 const char *t;
2f3197b3 1811
3280af22 1812 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1813 return;
3280af22
NIS
1814 while (isSPACE(*PL_last_uni))
1815 PL_last_uni++;
c35e046a
AL
1816 s = PL_last_uni;
1817 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1818 s++;
3280af22 1819 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1820 return;
6136c704 1821
9b387841
NC
1822 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1823 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1824 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1825}
1826
ffb4593c
NT
1827/*
1828 * LOP : macro to build a list operator. Its behaviour has been replaced
1829 * with a subroutine, S_lop() for which LOP is just another name.
1830 */
1831
a0d0e21e
LW
1832#define LOP(f,x) return lop(f,x,s)
1833
ffb4593c
NT
1834/*
1835 * S_lop
1836 * Build a list operator (or something that might be one). The rules:
1837 * - if we have a next token, then it's a list operator [why?]
1838 * - if the next thing is an opening paren, then it's a function
1839 * - else it's a list operator
1840 */
1841
76e3520e 1842STATIC I32
a0be28da 1843S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1844{
97aff369 1845 dVAR;
7918f24d
NC
1846
1847 PERL_ARGS_ASSERT_LOP;
1848
6154021b 1849 pl_yylval.ival = f;
35c8bce7 1850 CLINE;
3280af22
NIS
1851 PL_expect = x;
1852 PL_bufptr = s;
1853 PL_last_lop = PL_oldbufptr;
eb160463 1854 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1855#ifdef PERL_MAD
1856 if (PL_lasttoke)
78cdf107 1857 goto lstop;
5db06880 1858#else
3280af22 1859 if (PL_nexttoke)
78cdf107 1860 goto lstop;
5db06880 1861#endif
79072805 1862 if (*s == '(')
bbf60fe6 1863 return REPORT(FUNC);
29595ff2 1864 s = PEEKSPACE(s);
79072805 1865 if (*s == '(')
bbf60fe6 1866 return REPORT(FUNC);
78cdf107
Z
1867 else {
1868 lstop:
1869 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1870 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1871 return REPORT(LSTOP);
78cdf107 1872 }
79072805
LW
1873}
1874
5db06880
NC
1875#ifdef PERL_MAD
1876 /*
1877 * S_start_force
1878 * Sets up for an eventual force_next(). start_force(0) basically does
1879 * an unshift, while start_force(-1) does a push. yylex removes items
1880 * on the "pop" end.
1881 */
1882
1883STATIC void
1884S_start_force(pTHX_ int where)
1885{
1886 int i;
1887
cd81e915 1888 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1889 where = PL_lasttoke;
cd81e915
NC
1890 assert(PL_curforce < 0 || PL_curforce == where);
1891 if (PL_curforce != where) {
5db06880
NC
1892 for (i = PL_lasttoke; i > where; --i) {
1893 PL_nexttoke[i] = PL_nexttoke[i-1];
1894 }
1895 PL_lasttoke++;
1896 }
cd81e915 1897 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1898 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1899 PL_curforce = where;
1900 if (PL_nextwhite) {
5db06880 1901 if (PL_madskills)
6b29d1f5 1902 curmad('^', newSVpvs(""));
cd81e915 1903 CURMAD('_', PL_nextwhite);
5db06880
NC
1904 }
1905}
1906
1907STATIC void
1908S_curmad(pTHX_ char slot, SV *sv)
1909{
1910 MADPROP **where;
1911
1912 if (!sv)
1913 return;
cd81e915
NC
1914 if (PL_curforce < 0)
1915 where = &PL_thismad;
5db06880 1916 else
cd81e915 1917 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1918
cd81e915 1919 if (PL_faketokens)
76f68e9b 1920 sv_setpvs(sv, "");
5db06880
NC
1921 else {
1922 if (!IN_BYTES) {
1923 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1924 SvUTF8_on(sv);
1925 else if (PL_encoding) {
1926 sv_recode_to_utf8(sv, PL_encoding);
1927 }
1928 }
1929 }
1930
1931 /* keep a slot open for the head of the list? */
1932 if (slot != '_' && *where && (*where)->mad_key == '^') {
1933 (*where)->mad_key = slot;
daba3364 1934 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1935 (*where)->mad_val = (void*)sv;
1936 }
1937 else
1938 addmad(newMADsv(slot, sv), where, 0);
1939}
1940#else
b3f24c00
MHM
1941# define start_force(where) NOOP
1942# define curmad(slot, sv) NOOP
5db06880
NC
1943#endif
1944
ffb4593c
NT
1945/*
1946 * S_force_next
9cbb5ea2 1947 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1948 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1949 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1950 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1951 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1952 */
1953
4e553d73 1954STATIC void
cea2e8a9 1955S_force_next(pTHX_ I32 type)
79072805 1956{
97aff369 1957 dVAR;
704d4215
GG
1958#ifdef DEBUGGING
1959 if (DEBUG_T_TEST) {
1960 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1961 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1962 }
1963#endif
5db06880 1964#ifdef PERL_MAD
cd81e915 1965 if (PL_curforce < 0)
5db06880 1966 start_force(PL_lasttoke);
cd81e915 1967 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1968 if (PL_lex_state != LEX_KNOWNEXT)
1969 PL_lex_defer = PL_lex_state;
1970 PL_lex_state = LEX_KNOWNEXT;
1971 PL_lex_expect = PL_expect;
cd81e915 1972 PL_curforce = -1;
5db06880 1973#else
3280af22
NIS
1974 PL_nexttype[PL_nexttoke] = type;
1975 PL_nexttoke++;
1976 if (PL_lex_state != LEX_KNOWNEXT) {
1977 PL_lex_defer = PL_lex_state;
1978 PL_lex_expect = PL_expect;
1979 PL_lex_state = LEX_KNOWNEXT;
79072805 1980 }
5db06880 1981#endif
79072805
LW
1982}
1983
28ac2b49
Z
1984void
1985Perl_yyunlex(pTHX)
1986{
a7aaec61
Z
1987 int yyc = PL_parser->yychar;
1988 if (yyc != YYEMPTY) {
1989 if (yyc) {
1990 start_force(-1);
1991 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1992 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1993 PL_lex_allbrackets--;
a7aaec61 1994 PL_lex_brackets--;
78cdf107
Z
1995 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1996 } else if (yyc == '('/*)*/) {
1997 PL_lex_allbrackets--;
1998 yyc |= (2<<24);
a7aaec61
Z
1999 }
2000 force_next(yyc);
2001 }
28ac2b49
Z
2002 PL_parser->yychar = YYEMPTY;
2003 }
2004}
2005
d0a148a6 2006STATIC SV *
15f169a1 2007S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2008{
97aff369 2009 dVAR;
740cce10 2010 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2011 !IN_BYTES
2012 && UTF
2013 && !is_ascii_string((const U8*)start, len)
740cce10 2014 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2015 return sv;
2016}
2017
ffb4593c
NT
2018/*
2019 * S_force_word
2020 * When the lexer knows the next thing is a word (for instance, it has
2021 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2022 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2023 * lookahead.
ffb4593c
NT
2024 *
2025 * Arguments:
b1b65b59 2026 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2027 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2028 * int check_keyword : if true, Perl checks to make sure the word isn't
2029 * a keyword (do this if the word is a label, e.g. goto FOO)
2030 * int allow_pack : if true, : characters will also be allowed (require,
2031 * use, etc. do this)
9cbb5ea2 2032 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2033 */
2034
76e3520e 2035STATIC char *
cea2e8a9 2036S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2037{
97aff369 2038 dVAR;
463ee0b2
LW
2039 register char *s;
2040 STRLEN len;
4e553d73 2041
7918f24d
NC
2042 PERL_ARGS_ASSERT_FORCE_WORD;
2043
29595ff2 2044 start = SKIPSPACE1(start);
463ee0b2 2045 s = start;
7e2040f0 2046 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2047 (allow_pack && *s == ':') ||
15f0808c 2048 (allow_initial_tick && *s == '\'') )
a0d0e21e 2049 {
3280af22 2050 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2051 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2052 return start;
cd81e915 2053 start_force(PL_curforce);
5db06880
NC
2054 if (PL_madskills)
2055 curmad('X', newSVpvn(start,s-start));
463ee0b2 2056 if (token == METHOD) {
29595ff2 2057 s = SKIPSPACE1(s);
463ee0b2 2058 if (*s == '(')
3280af22 2059 PL_expect = XTERM;
463ee0b2 2060 else {
3280af22 2061 PL_expect = XOPERATOR;
463ee0b2 2062 }
79072805 2063 }
e74e6b3d 2064 if (PL_madskills)
63575281 2065 curmad('g', newSVpvs( "forced" ));
9ded7720 2066 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2067 = (OP*)newSVOP(OP_CONST,0,
2068 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2069 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2070 force_next(token);
2071 }
2072 return s;
2073}
2074
ffb4593c
NT
2075/*
2076 * S_force_ident
9cbb5ea2 2077 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2078 * text only contains the "foo" portion. The first argument is a pointer
2079 * to the "foo", and the second argument is the type symbol to prefix.
2080 * Forces the next token to be a "WORD".
9cbb5ea2 2081 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2082 */
2083
76e3520e 2084STATIC void
bfed75c6 2085S_force_ident(pTHX_ register const char *s, int kind)
79072805 2086{
97aff369 2087 dVAR;
7918f24d
NC
2088
2089 PERL_ARGS_ASSERT_FORCE_IDENT;
2090
c35e046a 2091 if (*s) {
90e5519e
NC
2092 const STRLEN len = strlen(s);
2093 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2094 start_force(PL_curforce);
9ded7720 2095 NEXTVAL_NEXTTOKE.opval = o;
79072805 2096 force_next(WORD);
748a9306 2097 if (kind) {
11343788 2098 o->op_private = OPpCONST_ENTERED;
55497cff 2099 /* XXX see note in pp_entereval() for why we forgo typo
2100 warnings if the symbol must be introduced in an eval.
2101 GSAR 96-10-12 */
90e5519e
NC
2102 gv_fetchpvn_flags(s, len,
2103 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2104 : GV_ADD,
2105 kind == '$' ? SVt_PV :
2106 kind == '@' ? SVt_PVAV :
2107 kind == '%' ? SVt_PVHV :
a0d0e21e 2108 SVt_PVGV
90e5519e 2109 );
748a9306 2110 }
79072805
LW
2111 }
2112}
2113
1571675a
GS
2114NV
2115Perl_str_to_version(pTHX_ SV *sv)
2116{
2117 NV retval = 0.0;
2118 NV nshift = 1.0;
2119 STRLEN len;
cfd0369c 2120 const char *start = SvPV_const(sv,len);
9d4ba2ae 2121 const char * const end = start + len;
504618e9 2122 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2123
2124 PERL_ARGS_ASSERT_STR_TO_VERSION;
2125
1571675a 2126 while (start < end) {
ba210ebe 2127 STRLEN skip;
1571675a
GS
2128 UV n;
2129 if (utf)
9041c2e3 2130 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2131 else {
2132 n = *(U8*)start;
2133 skip = 1;
2134 }
2135 retval += ((NV)n)/nshift;
2136 start += skip;
2137 nshift *= 1000;
2138 }
2139 return retval;
2140}
2141
4e553d73 2142/*
ffb4593c
NT
2143 * S_force_version
2144 * Forces the next token to be a version number.
e759cc13
RGS
2145 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2146 * and if "guessing" is TRUE, then no new token is created (and the caller
2147 * must use an alternative parsing method).
ffb4593c
NT
2148 */
2149
76e3520e 2150STATIC char *
e759cc13 2151S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2152{
97aff369 2153 dVAR;
5f66b61c 2154 OP *version = NULL;
44dcb63b 2155 char *d;
5db06880
NC
2156#ifdef PERL_MAD
2157 I32 startoff = s - SvPVX(PL_linestr);
2158#endif
89bfa8cd 2159
7918f24d
NC
2160 PERL_ARGS_ASSERT_FORCE_VERSION;
2161
29595ff2 2162 s = SKIPSPACE1(s);
89bfa8cd 2163
44dcb63b 2164 d = s;
dd629d5b 2165 if (*d == 'v')
44dcb63b 2166 d++;
44dcb63b 2167 if (isDIGIT(*d)) {
e759cc13
RGS
2168 while (isDIGIT(*d) || *d == '_' || *d == '.')
2169 d++;
5db06880
NC
2170#ifdef PERL_MAD
2171 if (PL_madskills) {
cd81e915 2172 start_force(PL_curforce);
5db06880
NC
2173 curmad('X', newSVpvn(s,d-s));
2174 }
2175#endif
4e4da3ac 2176 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2177 SV *ver;
8d08d9ba
DG
2178#ifdef USE_LOCALE_NUMERIC
2179 char *loc = setlocale(LC_NUMERIC, "C");
2180#endif
6154021b 2181 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2182#ifdef USE_LOCALE_NUMERIC
2183 setlocale(LC_NUMERIC, loc);
2184#endif
6154021b 2185 version = pl_yylval.opval;
dd629d5b
GS
2186 ver = cSVOPx(version)->op_sv;
2187 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2188 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2189 SvNV_set(ver, str_to_version(ver));
1571675a 2190 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2191 }
89bfa8cd 2192 }
5db06880
NC
2193 else if (guessing) {
2194#ifdef PERL_MAD
2195 if (PL_madskills) {
cd81e915
NC
2196 sv_free(PL_nextwhite); /* let next token collect whitespace */
2197 PL_nextwhite = 0;
5db06880
NC
2198 s = SvPVX(PL_linestr) + startoff;
2199 }
2200#endif
e759cc13 2201 return s;
5db06880 2202 }
89bfa8cd 2203 }
2204
5db06880
NC
2205#ifdef PERL_MAD
2206 if (PL_madskills && !version) {
cd81e915
NC
2207 sv_free(PL_nextwhite); /* let next token collect whitespace */
2208 PL_nextwhite = 0;
5db06880
NC
2209 s = SvPVX(PL_linestr) + startoff;
2210 }
2211#endif
89bfa8cd 2212 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2213 start_force(PL_curforce);
9ded7720 2214 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2215 force_next(WORD);
89bfa8cd 2216
e759cc13 2217 return s;
89bfa8cd 2218}
2219
ffb4593c 2220/*
91152fc1
DG
2221 * S_force_strict_version
2222 * Forces the next token to be a version number using strict syntax rules.
2223 */
2224
2225STATIC char *
2226S_force_strict_version(pTHX_ char *s)
2227{
2228 dVAR;
2229 OP *version = NULL;
2230#ifdef PERL_MAD
2231 I32 startoff = s - SvPVX(PL_linestr);
2232#endif
2233 const char *errstr = NULL;
2234
2235 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2236
2237 while (isSPACE(*s)) /* leading whitespace */
2238 s++;
2239
2240 if (is_STRICT_VERSION(s,&errstr)) {
2241 SV *ver = newSV(0);
2242 s = (char *)scan_version(s, ver, 0);
2243 version = newSVOP(OP_CONST, 0, ver);
2244 }
4e4da3ac
Z
2245 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2246 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2247 {
91152fc1
DG
2248 PL_bufptr = s;
2249 if (errstr)
2250 yyerror(errstr); /* version required */
2251 return s;
2252 }
2253
2254#ifdef PERL_MAD
2255 if (PL_madskills && !version) {
2256 sv_free(PL_nextwhite); /* let next token collect whitespace */
2257 PL_nextwhite = 0;
2258 s = SvPVX(PL_linestr) + startoff;
2259 }
2260#endif
2261 /* NOTE: The parser sees the package name and the VERSION swapped */
2262 start_force(PL_curforce);
2263 NEXTVAL_NEXTTOKE.opval = version;
2264 force_next(WORD);
2265
2266 return s;
2267}
2268
2269/*
ffb4593c
NT
2270 * S_tokeq
2271 * Tokenize a quoted string passed in as an SV. It finds the next
2272 * chunk, up to end of string or a backslash. It may make a new
2273 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2274 * turns \\ into \.
2275 */
2276
76e3520e 2277STATIC SV *
cea2e8a9 2278S_tokeq(pTHX_ SV *sv)
79072805 2279{
97aff369 2280 dVAR;
79072805
LW
2281 register char *s;
2282 register char *send;
2283 register char *d;
b3ac6de7
IZ
2284 STRLEN len = 0;
2285 SV *pv = sv;
79072805 2286
7918f24d
NC
2287 PERL_ARGS_ASSERT_TOKEQ;
2288
79072805 2289 if (!SvLEN(sv))
b3ac6de7 2290 goto finish;
79072805 2291
a0d0e21e 2292 s = SvPV_force(sv, len);
21a311ee 2293 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2294 goto finish;
463ee0b2 2295 send = s + len;
dcb21ed6
NC
2296 /* This is relying on the SV being "well formed" with a trailing '\0' */
2297 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2298 s++;
2299 if (s == send)
b3ac6de7 2300 goto finish;
79072805 2301 d = s;
be4731d2 2302 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2303 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2304 }
79072805
LW
2305 while (s < send) {
2306 if (*s == '\\') {
a0d0e21e 2307 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2308 s++; /* all that, just for this */
2309 }
2310 *d++ = *s++;
2311 }
2312 *d = '\0';
95a20fc0 2313 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2314 finish:
3280af22 2315 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2316 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2317 return sv;
2318}
2319
ffb4593c
NT
2320/*
2321 * Now come three functions related to double-quote context,
2322 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2323 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2324 * interact with PL_lex_state, and create fake ( ... ) argument lists
2325 * to handle functions and concatenation.
2326 * They assume that whoever calls them will be setting up a fake
2327 * join call, because each subthing puts a ',' after it. This lets
2328 * "lower \luPpEr"
2329 * become
2330 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2331 *
2332 * (I'm not sure whether the spurious commas at the end of lcfirst's
2333 * arguments and join's arguments are created or not).
2334 */
2335
2336/*
2337 * S_sublex_start
6154021b 2338 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2339 *
2340 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2341 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2342 *
2343 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2344 *
2345 * Everything else becomes a FUNC.
2346 *
2347 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2348 * had an OP_CONST or OP_READLINE). This just sets us up for a
2349 * call to S_sublex_push().
2350 */
2351
76e3520e 2352STATIC I32
cea2e8a9 2353S_sublex_start(pTHX)
79072805 2354{
97aff369 2355 dVAR;
6154021b 2356 register const I32 op_type = pl_yylval.ival;
79072805
LW
2357
2358 if (op_type == OP_NULL) {
6154021b 2359 pl_yylval.opval = PL_lex_op;
5f66b61c 2360 PL_lex_op = NULL;
79072805
LW
2361 return THING;
2362 }
2363 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2364 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2365
2366 if (SvTYPE(sv) == SVt_PVIV) {
2367 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2368 STRLEN len;
96a5add6 2369 const char * const p = SvPV_const(sv, len);
740cce10 2370 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2371 SvREFCNT_dec(sv);
2372 sv = nsv;
4e553d73 2373 }
6154021b 2374 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2375 PL_lex_stuff = NULL;
6f33ba73
RGS
2376 /* Allow <FH> // "foo" */
2377 if (op_type == OP_READLINE)
2378 PL_expect = XTERMORDORDOR;
79072805
LW
2379 return THING;
2380 }
e3f73d4e
RGS
2381 else if (op_type == OP_BACKTICK && PL_lex_op) {
2382 /* readpipe() vas overriden */
2383 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2384 pl_yylval.opval = PL_lex_op;
9b201d7d 2385 PL_lex_op = NULL;
e3f73d4e
RGS
2386 PL_lex_stuff = NULL;
2387 return THING;
2388 }
79072805 2389
3280af22 2390 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2391 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2392 PL_sublex_info.sub_op = PL_lex_op;
2393 PL_lex_state = LEX_INTERPPUSH;
55497cff 2394
3280af22
NIS
2395 PL_expect = XTERM;
2396 if (PL_lex_op) {
6154021b 2397 pl_yylval.opval = PL_lex_op;
5f66b61c 2398 PL_lex_op = NULL;
55497cff 2399 return PMFUNC;
2400 }
2401 else
2402 return FUNC;
2403}
2404
ffb4593c
NT
2405/*
2406 * S_sublex_push
2407 * Create a new scope to save the lexing state. The scope will be
2408 * ended in S_sublex_done. Returns a '(', starting the function arguments
2409 * to the uc, lc, etc. found before.
2410 * Sets PL_lex_state to LEX_INTERPCONCAT.
2411 */
2412
76e3520e 2413STATIC I32
cea2e8a9 2414S_sublex_push(pTHX)
55497cff 2415{
27da23d5 2416 dVAR;
f46d017c 2417 ENTER;
55497cff 2418
3280af22 2419 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2420 SAVEBOOL(PL_lex_dojoin);
3280af22 2421 SAVEI32(PL_lex_brackets);
78cdf107
Z
2422 SAVEI32(PL_lex_allbrackets);
2423 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2424 SAVEI32(PL_lex_casemods);
2425 SAVEI32(PL_lex_starts);
651b5b28 2426 SAVEI8(PL_lex_state);
7766f137 2427 SAVEVPTR(PL_lex_inpat);
98246f1e 2428 SAVEI16(PL_lex_inwhat);
57843af0 2429 SAVECOPLINE(PL_curcop);
3280af22 2430 SAVEPPTR(PL_bufptr);
8452ff4b 2431 SAVEPPTR(PL_bufend);
3280af22
NIS
2432 SAVEPPTR(PL_oldbufptr);
2433 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2434 SAVEPPTR(PL_last_lop);
2435 SAVEPPTR(PL_last_uni);
3280af22
NIS
2436 SAVEPPTR(PL_linestart);
2437 SAVESPTR(PL_linestr);
8edd5f42
RGS
2438 SAVEGENERICPV(PL_lex_brackstack);
2439 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2440
2441 PL_linestr = PL_lex_stuff;
a0714e2c 2442 PL_lex_stuff = NULL;
3280af22 2443
9cbb5ea2
GS
2444 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2445 = SvPVX(PL_linestr);
3280af22 2446 PL_bufend += SvCUR(PL_linestr);
bd61b366 2447 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2448 SAVEFREESV(PL_linestr);
2449
2450 PL_lex_dojoin = FALSE;
2451 PL_lex_brackets = 0;
78cdf107
Z
2452 PL_lex_allbrackets = 0;
2453 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2454 Newx(PL_lex_brackstack, 120, char);
2455 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2456 PL_lex_casemods = 0;
2457 *PL_lex_casestack = '\0';
2458 PL_lex_starts = 0;
2459 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2460 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2461
2462 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2463 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2464 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2465 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2466 else
5f66b61c 2467 PL_lex_inpat = NULL;
79072805 2468
55497cff 2469 return '(';
79072805
LW
2470}
2471
ffb4593c
NT
2472/*
2473 * S_sublex_done
2474 * Restores lexer state after a S_sublex_push.
2475 */
2476
76e3520e 2477STATIC I32
cea2e8a9 2478S_sublex_done(pTHX)
79072805 2479{
27da23d5 2480 dVAR;
3280af22 2481 if (!PL_lex_starts++) {
396482e1 2482 SV * const sv = newSVpvs("");
9aa983d2
JH
2483 if (SvUTF8(PL_linestr))
2484 SvUTF8_on(sv);
3280af22 2485 PL_expect = XOPERATOR;
6154021b 2486 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2487 return THING;
2488 }
2489
3280af22
NIS
2490 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2491 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2492 return yylex();
79072805
LW
2493 }
2494
ffb4593c 2495 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2496 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2497 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2498 PL_linestr = PL_lex_repl;
2499 PL_lex_inpat = 0;
2500 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2501 PL_bufend += SvCUR(PL_linestr);
bd61b366 2502 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2503 SAVEFREESV(PL_linestr);
2504 PL_lex_dojoin = FALSE;
2505 PL_lex_brackets = 0;
78cdf107
Z
2506 PL_lex_allbrackets = 0;
2507 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2508 PL_lex_casemods = 0;
2509 *PL_lex_casestack = '\0';
2510 PL_lex_starts = 0;
25da4f38 2511 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2512 PL_lex_state = LEX_INTERPNORMAL;
2513 PL_lex_starts++;
e9fa98b2
HS
2514 /* we don't clear PL_lex_repl here, so that we can check later
2515 whether this is an evalled subst; that means we rely on the
2516 logic to ensure sublex_done() is called again only via the
2517 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2518 }
e9fa98b2 2519 else {
3280af22 2520 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2521 PL_lex_repl = NULL;
e9fa98b2 2522 }
79072805 2523 return ',';
ffed7fef
LW
2524 }
2525 else {
5db06880
NC
2526#ifdef PERL_MAD
2527 if (PL_madskills) {
cd81e915
NC
2528 if (PL_thiswhite) {
2529 if (!PL_endwhite)
6b29d1f5 2530 PL_endwhite = newSVpvs("");
cd81e915
NC
2531 sv_catsv(PL_endwhite, PL_thiswhite);
2532 PL_thiswhite = 0;
2533 }
2534 if (PL_thistoken)
76f68e9b 2535 sv_setpvs(PL_thistoken,"");
5db06880 2536 else
cd81e915 2537 PL_realtokenstart = -1;
5db06880
NC
2538 }
2539#endif
f46d017c 2540 LEAVE;
3280af22
NIS
2541 PL_bufend = SvPVX(PL_linestr);
2542 PL_bufend += SvCUR(PL_linestr);
2543 PL_expect = XOPERATOR;
09bef843 2544 PL_sublex_info.sub_inwhat = 0;
79072805 2545 return ')';
ffed7fef
LW
2546 }
2547}
2548
02aa26ce
NT
2549/*
2550 scan_const
2551
2552 Extracts a pattern, double-quoted string, or transliteration. This
2553 is terrifying code.
2554
94def140 2555 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2556 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2557 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2558
94def140
TS
2559 Returns a pointer to the character scanned up to. If this is
2560 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2561 successfully parsed), will leave an OP for the substring scanned
6154021b 2562 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2563 by looking at the next characters herself.
2564
02aa26ce
NT
2565 In patterns:
2566 backslashes:
ff3f963a 2567 constants: \N{NAME} only
02aa26ce
NT
2568 case and quoting: \U \Q \E
2569 stops on @ and $, but not for $ as tail anchor
2570
2571 In transliterations:
2572 characters are VERY literal, except for - not at the start or end
94def140
TS
2573 of the string, which indicates a range. If the range is in bytes,
2574 scan_const expands the range to the full set of intermediate
2575 characters. If the range is in utf8, the hyphen is replaced with
2576 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2577
2578 In double-quoted strings:
2579 backslashes:
2580 double-quoted style: \r and \n
ff3f963a 2581 constants: \x31, etc.
94def140 2582 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2583 case and quoting: \U \Q \E
2584 stops on @ and $
2585
2586 scan_const does *not* construct ops to handle interpolated strings.
2587 It stops processing as soon as it finds an embedded $ or @ variable
2588 and leaves it to the caller to work out what's going on.
2589
94def140
TS
2590 embedded arrays (whether in pattern or not) could be:
2591 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2592
2593 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2594
2595 $ in pattern could be $foo or could be tail anchor. Assumption:
2596 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2597 followed by one of "()| \r\n\t"
02aa26ce
NT
2598
2599 \1 (backreferences) are turned into $1
2600
2601 The structure of the code is
2602 while (there's a character to process) {
94def140
TS
2603 handle transliteration ranges
2604 skip regexp comments /(?#comment)/ and codes /(?{code})/
2605 skip #-initiated comments in //x patterns
2606 check for embedded arrays
02aa26ce
NT
2607 check for embedded scalars
2608 if (backslash) {
94def140 2609 deprecate \1 in substitution replacements
02aa26ce
NT
2610 handle string-changing backslashes \l \U \Q \E, etc.
2611 switch (what was escaped) {
94def140 2612 handle \- in a transliteration (becomes a literal -)
ff3f963a 2613 if a pattern and not \N{, go treat as regular character
94def140
TS
2614 handle \132 (octal characters)
2615 handle \x15 and \x{1234} (hex characters)
ff3f963a 2616 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2617 handle \cV (control characters)
2618 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2619 } (end switch)
77a135fe 2620 continue
02aa26ce 2621 } (end if backslash)
77a135fe 2622 handle regular character
02aa26ce 2623 } (end while character to read)
4e553d73 2624
02aa26ce
NT
2625*/
2626
76e3520e 2627STATIC char *
cea2e8a9 2628S_scan_const(pTHX_ char *start)
79072805 2629{
97aff369 2630 dVAR;
3280af22 2631 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2632 SV *sv = newSV(send - start); /* sv for the constant. See
2633 note below on sizing. */
02aa26ce
NT
2634 register char *s = start; /* start of the constant */
2635 register char *d = SvPVX(sv); /* destination for copies */
2636 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2637 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2638 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2639 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2640 to be UTF8? But, this can
2641 show as true when the source
2642 isn't utf8, as for example
2643 when it is entirely composed
2644 of hex constants */
2645
2646 /* Note on sizing: The scanned constant is placed into sv, which is
2647 * initialized by newSV() assuming one byte of output for every byte of
2648 * input. This routine expects newSV() to allocate an extra byte for a
2649 * trailing NUL, which this routine will append if it gets to the end of
2650 * the input. There may be more bytes of input than output (eg., \N{LATIN
2651 * CAPITAL LETTER A}), or more output than input if the constant ends up
2652 * recoded to utf8, but each time a construct is found that might increase
2653 * the needed size, SvGROW() is called. Its size parameter each time is
2654 * based on the best guess estimate at the time, namely the length used so
2655 * far, plus the length the current construct will occupy, plus room for
2656 * the trailing NUL, plus one byte for every input byte still unscanned */
2657
012bcf8d 2658 UV uv;
4c3a8340
TS
2659#ifdef EBCDIC
2660 UV literal_endpoint = 0;
e294cc5d 2661 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2662#endif
012bcf8d 2663
7918f24d
NC
2664 PERL_ARGS_ASSERT_SCAN_CONST;
2665
bb16bae8 2666 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2667 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2668 /* If we are doing a trans and we know we want UTF8 set expectation */
2669 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2670 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2671 }
2672
2673
79072805 2674 while (s < send || dorange) {
ff3f963a 2675
02aa26ce 2676 /* get transliterations out of the way (they're most literal) */
3280af22 2677 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2678 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2679 if (dorange) {
1ba5c669
JH
2680 I32 i; /* current expanded character */
2681 I32 min; /* first character in range */
2682 I32 max; /* last character in range */
02aa26ce 2683
e294cc5d
JH
2684#ifdef EBCDIC
2685 UV uvmax = 0;
2686#endif
2687
2688 if (has_utf8
2689#ifdef EBCDIC
2690 && !native_range
2691#endif
2692 ) {
9d4ba2ae 2693 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2694 char *e = d++;
2695 while (e-- > c)
2696 *(e + 1) = *e;
25716404 2697 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2698 /* mark the range as done, and continue */
2699 dorange = FALSE;
2700 didrange = TRUE;
2701 continue;
2702 }
2b9d42f0 2703
95a20fc0 2704 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2705#ifdef EBCDIC
2706 SvGROW(sv,
2707 SvLEN(sv) + (has_utf8 ?
2708 (512 - UTF_CONTINUATION_MARK +
2709 UNISKIP(0x100))
2710 : 256));
2711 /* How many two-byte within 0..255: 128 in UTF-8,
2712 * 96 in UTF-8-mod. */
2713#else
9cbb5ea2 2714 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2715#endif
9cbb5ea2 2716 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2717#ifdef EBCDIC
2718 if (has_utf8) {
2719 int j;
2720 for (j = 0; j <= 1; j++) {
2721 char * const c = (char*)utf8_hop((U8*)d, -1);
2722 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2723 if (j)
2724 min = (U8)uv;
2725 else if (uv < 256)
2726 max = (U8)uv;
2727 else {
2728 max = (U8)0xff; /* only to \xff */
2729 uvmax = uv; /* \x{100} to uvmax */
2730 }
2731 d = c; /* eat endpoint chars */
2732 }
2733 }
2734 else {
2735#endif
2736 d -= 2; /* eat the first char and the - */
2737 min = (U8)*d; /* first char in range */
2738 max = (U8)d[1]; /* last char in range */
2739#ifdef EBCDIC
2740 }
2741#endif
8ada0baa 2742
c2e66d9e 2743 if (min > max) {
01ec43d0 2744 Perl_croak(aTHX_
d1573ac7 2745 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2746 (char)min, (char)max);
c2e66d9e
GS
2747 }
2748
c7f1f016 2749#ifdef EBCDIC
4c3a8340
TS
2750 if (literal_endpoint == 2 &&
2751 ((isLOWER(min) && isLOWER(max)) ||
2752 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2753 if (isLOWER(min)) {
2754 for (i = min; i <= max; i++)
2755 if (isLOWER(i))
db42d148 2756 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2757 } else {
2758 for (i = min; i <= max; i++)
2759 if (isUPPER(i))
db42d148 2760 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2761 }
2762 }
2763 else
2764#endif
2765 for (i = min; i <= max; i++)
e294cc5d
JH
2766#ifdef EBCDIC
2767 if (has_utf8) {
2768 const U8 ch = (U8)NATIVE_TO_UTF(i);
2769 if (UNI_IS_INVARIANT(ch))
2770 *d++ = (U8)i;
2771 else {
2772 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2773 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2774 }
2775 }
2776 else
2777#endif
2778 *d++ = (char)i;
2779
2780#ifdef EBCDIC
2781 if (uvmax) {
2782 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2783 if (uvmax > 0x101)
2784 *d++ = (char)UTF_TO_NATIVE(0xff);
2785 if (uvmax > 0x100)
2786 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2787 }
2788#endif
02aa26ce
NT
2789
2790 /* mark the range as done, and continue */
79072805 2791 dorange = FALSE;
01ec43d0 2792 didrange = TRUE;
4c3a8340
TS
2793#ifdef EBCDIC
2794 literal_endpoint = 0;
2795#endif
79072805 2796 continue;
4e553d73 2797 }
02aa26ce
NT
2798
2799 /* range begins (ignore - as first or last char) */
79072805 2800 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2801 if (didrange) {
1fafa243 2802 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2803 }
e294cc5d
JH
2804 if (has_utf8
2805#ifdef EBCDIC
2806 && !native_range
2807#endif
2808 ) {
25716404 2809 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2810 s++;
2811 continue;
2812 }
79072805
LW
2813 dorange = TRUE;
2814 s++;
01ec43d0
GS
2815 }
2816 else {
2817 didrange = FALSE;
4c3a8340
TS
2818#ifdef EBCDIC
2819 literal_endpoint = 0;
e294cc5d 2820 native_range = TRUE;
4c3a8340 2821#endif
01ec43d0 2822 }
79072805 2823 }
02aa26ce
NT
2824
2825 /* if we get here, we're not doing a transliteration */
2826
0f5d15d6
IZ
2827 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2828 except for the last char, which will be done separately. */
3280af22 2829 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2830 if (s[2] == '#') {
e994fd66 2831 while (s+1 < send && *s != ')')
db42d148 2832 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2833 }
2834 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2835 || (s[2] == '?' && s[3] == '{'))
155aba94 2836 {
cc6b7395 2837 I32 count = 1;
0f5d15d6 2838 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2839 char c;
2840
d9f97599
GS
2841 while (count && (c = *regparse)) {
2842 if (c == '\\' && regparse[1])
2843 regparse++;
4e553d73 2844 else if (c == '{')
cc6b7395 2845 count++;
4e553d73 2846 else if (c == '}')
cc6b7395 2847 count--;
d9f97599 2848 regparse++;
cc6b7395 2849 }
e994fd66 2850 if (*regparse != ')')
5bdf89e7 2851 regparse--; /* Leave one char for continuation. */
0f5d15d6 2852 while (s < regparse)
db42d148 2853 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2854 }
748a9306 2855 }
02aa26ce
NT
2856
2857 /* likewise skip #-initiated comments in //x patterns */
3280af22 2858 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2859 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2860 while (s+1 < send && *s != '\n')
db42d148 2861 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2862 }
02aa26ce 2863
5d1d4326 2864 /* check for embedded arrays
da6eedaa 2865 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2866 */
1749ea0d
TS
2867 else if (*s == '@' && s[1]) {
2868 if (isALNUM_lazy_if(s+1,UTF))
2869 break;
2870 if (strchr(":'{$", s[1]))
2871 break;
2872 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2873 break; /* in regexp, neither @+ nor @- are interpolated */
2874 }
02aa26ce
NT
2875
2876 /* check for embedded scalars. only stop if we're sure it's a
2877 variable.
2878 */
79072805 2879 else if (*s == '$') {
3280af22 2880 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2881 break;
77772344 2882 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2883 if (s[1] == '\\') {
2884 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2885 "Possible unintended interpolation of $\\ in regex");
77772344 2886 }
79072805 2887 break; /* in regexp, $ might be tail anchor */
77772344 2888 }
79072805 2889 }
02aa26ce 2890
2b9d42f0
NIS
2891 /* End of else if chain - OP_TRANS rejoin rest */
2892
02aa26ce 2893 /* backslashes */
79072805 2894 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2895 char* e; /* Can be used for ending '}', etc. */
2896
79072805 2897 s++;
02aa26ce 2898
7d0fc23c
KW
2899 /* warn on \1 - \9 in substitution replacements, but note that \11
2900 * is an octal; and \19 is \1 followed by '9' */
3280af22 2901 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2902 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2903 {
a2a5de95 2904 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2905 *--s = '$';
2906 break;
2907 }
02aa26ce
NT
2908
2909 /* string-change backslash escapes */
3280af22 2910 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2911 --s;
2912 break;
2913 }
ff3f963a
KW
2914 /* In a pattern, process \N, but skip any other backslash escapes.
2915 * This is because we don't want to translate an escape sequence
2916 * into a meta symbol and have the regex compiler use the meta
2917 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2918 * in spite of this, we do have to process \N here while the proper
2919 * charnames handler is in scope. See bugs #56444 and #62056.
2920 * There is a complication because \N in a pattern may also stand
2921 * for 'match a non-nl', and not mean a charname, in which case its
2922 * processing should be deferred to the regex compiler. To be a
2923 * charname it must be followed immediately by a '{', and not look
2924 * like \N followed by a curly quantifier, i.e., not something like
2925 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2926 * quantifier */
2927 else if (PL_lex_inpat
2928 && (*s != 'N'
2929 || s[1] != '{'
2930 || regcurly(s + 1)))
2931 {
cc74c5bd
TS
2932 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2933 goto default_action;
2934 }
02aa26ce 2935
79072805 2936 switch (*s) {
02aa26ce
NT
2937
2938 /* quoted - in transliterations */
79072805 2939 case '-':
3280af22 2940 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2941 *d++ = *s++;
2942 continue;
2943 }
2944 /* FALL THROUGH */
2945 default:
11b8faa4 2946 {
a2a5de95
NC
2947 if ((isALPHA(*s) || isDIGIT(*s)))
2948 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2949 "Unrecognized escape \\%c passed through",
2950 *s);
11b8faa4 2951 /* default action is to copy the quoted character */
f9a63242 2952 goto default_action;
11b8faa4 2953 }
02aa26ce 2954
632403cc 2955 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2956 case '0': case '1': case '2': case '3':
2957 case '4': case '5': case '6': case '7':
ba210ebe 2958 {
53305cf1
NC
2959 I32 flags = 0;
2960 STRLEN len = 3;
77a135fe 2961 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2962 s += len;
2963 }
012bcf8d 2964 goto NUM_ESCAPE_INSERT;
02aa26ce 2965
f0a2b745
KW
2966 /* eg. \o{24} indicates the octal constant \024 */
2967 case 'o':
2968 {
2969 STRLEN len;
454155d9 2970 const char* error;
f0a2b745 2971
454155d9 2972 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2973 s += len;
454155d9 2974 if (! valid) {
f0a2b745
KW
2975 yyerror(error);
2976 continue;
2977 }
2978 goto NUM_ESCAPE_INSERT;
2979 }
2980
77a135fe 2981 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2982 case 'x':
a0ed51b3
LW
2983 ++s;
2984 if (*s == '{') {
9d4ba2ae 2985 char* const e = strchr(s, '}');
a4c04bdc
NC
2986 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2987 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2988 STRLEN len;
355860ce 2989
53305cf1 2990 ++s;
adaeee49 2991 if (!e) {
a0ed51b3 2992 yyerror("Missing right brace on \\x{}");
355860ce 2993 continue;
ba210ebe 2994 }
53305cf1 2995 len = e - s;
77a135fe 2996 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2997 s = e + 1;
a0ed51b3
LW
2998 }
2999 else {
ba210ebe 3000 {
53305cf1 3001 STRLEN len = 2;
a4c04bdc 3002 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 3003 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
3004 s += len;
3005 }
012bcf8d
GS
3006 }
3007
3008 NUM_ESCAPE_INSERT:
ff3f963a
KW
3009 /* Insert oct or hex escaped character. There will always be
3010 * enough room in sv since such escapes will be longer than any
3011 * UTF-8 sequence they can end up as, except if they force us
3012 * to recode the rest of the string into utf8 */
ba7cea30 3013
77a135fe 3014 /* Here uv is the ordinal of the next character being added in
ff3f963a 3015 * unicode (converted from native). */
77a135fe 3016 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3017 if (!has_utf8 && uv > 255) {
77a135fe
KW
3018 /* Might need to recode whatever we have accumulated so
3019 * far if it contains any chars variant in utf8 or
3020 * utf-ebcdic. */
3021
3022 SvCUR_set(sv, d - SvPVX_const(sv));
3023 SvPOK_on(sv);
3024 *d = '\0';
77a135fe 3025 /* See Note on sizing above. */
7bf79863
KW
3026 sv_utf8_upgrade_flags_grow(sv,
3027 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3028 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3029 d = SvPVX(sv) + SvCUR(sv);
3030 has_utf8 = TRUE;
012bcf8d
GS
3031 }
3032
77a135fe
KW
3033 if (has_utf8) {
3034 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3035 if (PL_lex_inwhat == OP_TRANS &&
3036 PL_sublex_info.sub_op) {
3037 PL_sublex_info.sub_op->op_private |=
3038 (PL_lex_repl ? OPpTRANS_FROM_UTF
3039 : OPpTRANS_TO_UTF);
f9a63242 3040 }
e294cc5d
JH
3041#ifdef EBCDIC
3042 if (uv > 255 && !dorange)
3043 native_range = FALSE;
3044#endif
012bcf8d 3045 }
a0ed51b3 3046 else {
012bcf8d 3047 *d++ = (char)uv;
a0ed51b3 3048 }
012bcf8d
GS
3049 }
3050 else {
c4d5f83a 3051 *d++ = (char) uv;
a0ed51b3 3052 }
79072805 3053 continue;
02aa26ce 3054
4a2d328f 3055 case 'N':
ff3f963a
KW
3056 /* In a non-pattern \N must be a named character, like \N{LATIN
3057 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3058 * mean to match a non-newline. For non-patterns, named
3059 * characters are converted to their string equivalents. In
3060 * patterns, named characters are not converted to their
3061 * ultimate forms for the same reasons that other escapes
3062 * aren't. Instead, they are converted to the \N{U+...} form
3063 * to get the value from the charnames that is in effect right
3064 * now, while preserving the fact that it was a named character
3065 * so that the regex compiler knows this */
3066
3067 /* This section of code doesn't generally use the
3068 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3069 * a close examination of this macro and determined it is a
3070 * no-op except on utfebcdic variant characters. Every
3071 * character generated by this that would normally need to be
3072 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3073 * needed, and would complicate use of copy(). XXX There are
3074 * other parts of this file where the macro is used
3075 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3076
3077 /* The structure of this section of code (besides checking for
3078 * errors and upgrading to utf8) is:
3079 * Further disambiguate between the two meanings of \N, and if
3080 * not a charname, go process it elsewhere
0a96133f
KW
3081 * If of form \N{U+...}, pass it through if a pattern;
3082 * otherwise convert to utf8
3083 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3084 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3085
3086 /* Here, s points to the 'N'; the test below is guaranteed to
3087 * succeed if we are being called on a pattern as we already
3088 * know from a test above that the next character is a '{'.
3089 * On a non-pattern \N must mean 'named sequence, which
3090 * requires braces */
3091 s++;
3092 if (*s != '{') {
3093 yyerror("Missing braces on \\N{}");
3094 continue;
3095 }
3096 s++;
3097
0a96133f 3098 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3099 if (! (e = strchr(s, '}'))) {
3100 if (! PL_lex_inpat) {
5777a3f7 3101 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3102 } else {
3103 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3104 }
0a96133f 3105 continue;
ff3f963a 3106 }
cddc7ef4 3107
ff3f963a 3108 /* Here it looks like a named character */
cddc7ef4 3109
ff3f963a
KW
3110 if (PL_lex_inpat) {
3111
3112 /* XXX This block is temporary code. \N{} implies that the
3113 * pattern is to have Unicode semantics, and therefore
3114 * currently has to be encoded in utf8. By putting it in
3115 * utf8 now, we save a whole pass in the regular expression
3116 * compiler. Once that code is changed so Unicode
3117 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3118 * block should be removed. However, the code that parses
3119 * the output of this would have to be changed to not
3120 * necessarily expect utf8 */
ff3f963a 3121 if (!has_utf8) {
77a135fe 3122 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3123 SvPOK_on(sv);
e4f3eed8 3124 *d = '\0';
77a135fe 3125 /* See Note on sizing above. */
7bf79863 3126 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3127 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3128 /* 5 = '\N{' + cur char + NUL */
3129 (STRLEN)(send - s) + 5);
f08d6ad9 3130 d = SvPVX(sv) + SvCUR(sv);
89491803 3131 has_utf8 = TRUE;
ff3f963a
KW
3132 }
3133 }
423cee85 3134
ff3f963a
KW
3135 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3136 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3137 | PERL_SCAN_DISALLOW_PREFIX;
3138 STRLEN len;
3139
3140 /* For \N{U+...}, the '...' is a unicode value even on
3141 * EBCDIC machines */
3142 s += 2; /* Skip to next char after the 'U+' */
3143 len = e - s;
3144 uv = grok_hex(s, &len, &flags, NULL);
3145 if (len == 0 || len != (STRLEN)(e - s)) {
3146 yyerror("Invalid hexadecimal number in \\N{U+...}");
3147 s = e + 1;
3148 continue;
3149 }
3150
3151 if (PL_lex_inpat) {
3152
e2a7e165
KW
3153 /* On non-EBCDIC platforms, pass through to the regex
3154 * compiler unchanged. The reason we evaluated the
3155 * number above is to make sure there wasn't a syntax
3156 * error. But on EBCDIC we convert to native so
3157 * downstream code can continue to assume it's native
3158 */
ff3f963a 3159 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3160#ifdef EBCDIC
3161 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3162 and the \0 */
3163 "\\N{U+%X}",
3164 (unsigned int) UNI_TO_NATIVE(uv));
3165#else
ff3f963a
KW
3166 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3167 d += e - s + 1;
e2a7e165 3168#endif
ff3f963a
KW
3169 }
3170 else { /* Not a pattern: convert the hex to string */
3171
3172 /* If destination is not in utf8, unconditionally
3173 * recode it to be so. This is because \N{} implies
3174 * Unicode semantics, and scalars have to be in utf8
3175 * to guarantee those semantics */
3176 if (! has_utf8) {
3177 SvCUR_set(sv, d - SvPVX_const(sv));
3178 SvPOK_on(sv);
3179 *d = '\0';
3180 /* See Note on sizing above. */
3181 sv_utf8_upgrade_flags_grow(
3182 sv,
3183 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3184 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3185 d = SvPVX(sv) + SvCUR(sv);
3186 has_utf8 = TRUE;
3187 }
3188
3189 /* Add the string to the output */
3190 if (UNI_IS_INVARIANT(uv)) {
3191 *d++ = (char) uv;
3192 }
3193 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3194 }
3195 }
3196 else { /* Here is \N{NAME} but not \N{U+...}. */
3197
3198 SV *res; /* result from charnames */
3199 const char *str; /* the string in 'res' */
3200 STRLEN len; /* its length */
3201
3202 /* Get the value for NAME */
3203 res = newSVpvn(s, e - s);
3204 res = new_constant( NULL, 0, "charnames",
3205 /* includes all of: \N{...} */
3206 res, NULL, s - 3, e - s + 4 );
3207
3208 /* Most likely res will be in utf8 already since the
3209 * standard charnames uses pack U, but a custom translator
3210 * can leave it otherwise, so make sure. XXX This can be
3211 * revisited to not have charnames use utf8 for characters
3212 * that don't need it when regexes don't have to be in utf8
3213 * for Unicode semantics. If doing so, remember EBCDIC */
3214 sv_utf8_upgrade(res);
3215 str = SvPV_const(res, len);
3216
3217 /* Don't accept malformed input */
3218 if (! is_utf8_string((U8 *) str, len)) {
3219 yyerror("Malformed UTF-8 returned by \\N");
3220 }
3221 else if (PL_lex_inpat) {
3222
3223 if (! len) { /* The name resolved to an empty string */
3224 Copy("\\N{}", d, 4, char);
3225 d += 4;
3226 }
3227 else {
3228 /* In order to not lose information for the regex
3229 * compiler, pass the result in the specially made
3230 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3231 * the code points in hex of each character
3232 * returned by charnames */
3233
3234 const char *str_end = str + len;
3235 STRLEN char_length; /* cur char's byte length */
3236 STRLEN output_length; /* and the number of bytes
3237 after this is translated
3238 into hex digits */
3239 const STRLEN off = d - SvPVX_const(sv);
3240
3241 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3242 * max('U+', '.'); and 1 for NUL */
3243 char hex_string[2 * UTF8_MAXBYTES + 5];
3244
3245 /* Get the first character of the result. */
3246 U32 uv = utf8n_to_uvuni((U8 *) str,
3247 len,
3248 &char_length,
3249 UTF8_ALLOW_ANYUV);
3250
3251 /* The call to is_utf8_string() above hopefully
3252 * guarantees that there won't be an error. But
3253 * it's easy here to make sure. The function just
3254 * above warns and returns 0 if invalid utf8, but
3255 * it can also return 0 if the input is validly a
3256 * NUL. Disambiguate */
3257 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3258 uv = UNICODE_REPLACEMENT;
3259 }
3260
3261 /* Convert first code point to hex, including the
e2a7e165
KW
3262 * boiler plate before it. For all these, we
3263 * convert to native format so that downstream code
3264 * can continue to assume the input is native */
78c35590 3265 output_length =
3353de27 3266 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3267 "\\N{U+%X",
3268 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3269
3270 /* Make sure there is enough space to hold it */
3271 d = off + SvGROW(sv, off
3272 + output_length
3273 + (STRLEN)(send - e)
3274 + 2); /* '}' + NUL */
3275 /* And output it */
3276 Copy(hex_string, d, output_length, char);
3277 d += output_length;
3278
3279 /* For each subsequent character, append dot and
3280 * its ordinal in hex */
3281 while ((str += char_length) < str_end) {
3282 const STRLEN off = d - SvPVX_const(sv);
3283 U32 uv = utf8n_to_uvuni((U8 *) str,
3284 str_end - str,
3285 &char_length,
3286 UTF8_ALLOW_ANYUV);
3287 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3288 uv = UNICODE_REPLACEMENT;
3289 }
3290
78c35590 3291 output_length =
3353de27 3292 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3293 ".%X",
3294 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3295
3296 d = off + SvGROW(sv, off
3297 + output_length
3298 + (STRLEN)(send - e)
3299 + 2); /* '}' + NUL */
3300 Copy(hex_string, d, output_length, char);
3301 d += output_length;
3302 }
3303
3304 *d++ = '}'; /* Done. Add the trailing brace */
3305 }
3306 }
3307 else { /* Here, not in a pattern. Convert the name to a
3308 * string. */
3309
3310 /* If destination is not in utf8, unconditionally
3311 * recode it to be so. This is because \N{} implies
3312 * Unicode semantics, and scalars have to be in utf8
3313 * to guarantee those semantics */
3314 if (! has_utf8) {
3315 SvCUR_set(sv, d - SvPVX_const(sv));
3316 SvPOK_on(sv);
3317 *d = '\0';
3318 /* See Note on sizing above. */
3319 sv_utf8_upgrade_flags_grow(sv,
3320 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3321 len + (STRLEN)(send - s) + 1);
3322 d = SvPVX(sv) + SvCUR(sv);
3323 has_utf8 = TRUE;
3324 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3325
3326 /* See Note on sizing above. (NOTE: SvCUR() is not
3327 * set correctly here). */
3328 const STRLEN off = d - SvPVX_const(sv);
3329 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3330 }
3331 Copy(str, d, len, char);
3332 d += len;
423cee85 3333 }
423cee85 3334 SvREFCNT_dec(res);
cb233ae3
KW
3335
3336 /* Deprecate non-approved name syntax */
3337 if (ckWARN_d(WARN_DEPRECATED)) {
3338 bool problematic = FALSE;
3339 char* i = s;
3340
3341 /* For non-ut8 input, look to see that the first
3342 * character is an alpha, then loop through the rest
3343 * checking that each is a continuation */
3344 if (! this_utf8) {
3345 if (! isALPHAU(*i)) problematic = TRUE;
3346 else for (i = s + 1; i < e; i++) {
3347 if (isCHARNAME_CONT(*i)) continue;
3348 problematic = TRUE;
3349 break;
3350 }
3351 }
3352 else {
3353 /* Similarly for utf8. For invariants can check
3354 * directly. We accept anything above the latin1
3355 * range because it is immaterial to Perl if it is
3356 * correct or not, and is expensive to check. But
3357 * it is fairly easy in the latin1 range to convert
3358 * the variants into a single character and check
3359 * those */
3360 if (UTF8_IS_INVARIANT(*i)) {
3361 if (! isALPHAU(*i)) problematic = TRUE;
3362 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3363 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3364 *(i+1)))))
3365 {
3366 problematic = TRUE;
3367 }
3368 }
3369 if (! problematic) for (i = s + UTF8SKIP(s);
3370 i < e;
3371 i+= UTF8SKIP(i))
3372 {
3373 if (UTF8_IS_INVARIANT(*i)) {
3374 if (isCHARNAME_CONT(*i)) continue;
3375 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3376 continue;
3377 } else if (isCHARNAME_CONT(
3378 UNI_TO_NATIVE(
81c14aa2 3379 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3380 {
3381 continue;
3382 }
3383 problematic = TRUE;
3384 break;
3385 }
3386 }
3387 if (problematic) {
6e1bad6c
KW
3388 /* The e-i passed to the final %.*s makes sure that
3389 * should the trailing NUL be missing that this
3390 * print won't run off the end of the string */
cb233ae3 3391 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3392 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3393 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3394 }
3395 }
3396 } /* End \N{NAME} */
ff3f963a
KW
3397#ifdef EBCDIC
3398 if (!dorange)
3399 native_range = FALSE; /* \N{} is defined to be Unicode */
3400#endif
3401 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3402 continue;
3403
02aa26ce 3404 /* \c is a control character */
79072805
LW
3405 case 'c':
3406 s++;
961ce445 3407 if (s < send) {
17a3df4c 3408 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3409 }
961ce445
RGS
3410 else {
3411 yyerror("Missing control char name in \\c");
3412 }
79072805 3413 continue;
02aa26ce
NT
3414
3415 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3416 case 'b':
db42d148 3417 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3418 break;
3419 case 'n':
db42d148 3420 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3421 break;
3422 case 'r':
db42d148 3423 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3424 break;
3425 case 'f':
db42d148 3426 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3427 break;
3428 case 't':
db42d148 3429 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3430 break;
34a3fe2a 3431 case 'e':
db42d148 3432 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3433 break;
3434 case 'a':
db42d148 3435 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3436 break;
02aa26ce
NT
3437 } /* end switch */
3438
79072805
LW
3439 s++;
3440 continue;
02aa26ce 3441 } /* end if (backslash) */
4c3a8340
TS
3442#ifdef EBCDIC
3443 else
3444 literal_endpoint++;
3445#endif
02aa26ce 3446
f9a63242 3447 default_action:
77a135fe
KW
3448 /* If we started with encoded form, or already know we want it,
3449 then encode the next character */
3450 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3451 STRLEN len = 1;
77a135fe
KW
3452
3453
3454 /* One might think that it is wasted effort in the case of the
3455 * source being utf8 (this_utf8 == TRUE) to take the next character
3456 * in the source, convert it to an unsigned value, and then convert
3457 * it back again. But the source has not been validated here. The
3458 * routine that does the conversion checks for errors like
3459 * malformed utf8 */
3460
5f66b61c
AL
3461 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3462 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3463 if (!has_utf8) {
3464 SvCUR_set(sv, d - SvPVX_const(sv));
3465 SvPOK_on(sv);
3466 *d = '\0';
77a135fe 3467 /* See Note on sizing above. */
7bf79863
KW
3468 sv_utf8_upgrade_flags_grow(sv,
3469 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3470 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3471 d = SvPVX(sv) + SvCUR(sv);
3472 has_utf8 = TRUE;
3473 } else if (need > len) {
3474 /* encoded value larger than old, may need extra space (NOTE:
3475 * SvCUR() is not set correctly here). See Note on sizing
3476 * above. */
9d4ba2ae 3477 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3478 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3479 }
77a135fe
KW
3480 s += len;
3481
5f66b61c 3482 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3483#ifdef EBCDIC
3484 if (uv > 255 && !dorange)
3485 native_range = FALSE;
3486#endif
2b9d42f0
NIS
3487 }
3488 else {
3489 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3490 }
02aa26ce
NT
3491 } /* while loop to process each character */
3492
3493 /* terminate the string and set up the sv */
79072805 3494 *d = '\0';
95a20fc0 3495 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3496 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3497 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3498
79072805 3499 SvPOK_on(sv);
9f4817db 3500 if (PL_encoding && !has_utf8) {
d0063567
DK
3501 sv_recode_to_utf8(sv, PL_encoding);
3502 if (SvUTF8(sv))
3503 has_utf8 = TRUE;
9f4817db 3504 }
2b9d42f0 3505 if (has_utf8) {
7e2040f0 3506 SvUTF8_on(sv);
2b9d42f0 3507 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3508 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3509 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3510 }
3511 }
79072805 3512
02aa26ce 3513 /* shrink the sv if we allocated more than we used */
79072805 3514 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3515 SvPV_shrink_to_cur(sv);
79072805 3516 }
02aa26ce 3517
6154021b 3518 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3519 if (s > PL_bufptr) {
eb0d8d16
NC
3520 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3521 const char *const key = PL_lex_inpat ? "qr" : "q";
3522 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3523 const char *type;
3524 STRLEN typelen;
3525
3526 if (PL_lex_inwhat == OP_TRANS) {
3527 type = "tr";
3528 typelen = 2;
3529 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3530 type = "s";
3531 typelen = 1;
3532 } else {
3533 type = "qq";
3534 typelen = 2;
3535 }
3536
3537 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3538 type, typelen);
3539 }
6154021b 3540 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3541 } else
8990e307 3542 SvREFCNT_dec(sv);
79072805
LW
3543 return s;
3544}
3545
ffb4593c
NT
3546/* S_intuit_more
3547 * Returns TRUE if there's more to the expression (e.g., a subscript),
3548 * FALSE otherwise.
ffb4593c
NT
3549 *
3550 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3551 *
3552 * ->[ and ->{ return TRUE
3553 * { and [ outside a pattern are always subscripts, so return TRUE
3554 * if we're outside a pattern and it's not { or [, then return FALSE
3555 * if we're in a pattern and the first char is a {
3556 * {4,5} (any digits around the comma) returns FALSE
3557 * if we're in a pattern and the first char is a [
3558 * [] returns FALSE
3559 * [SOMETHING] has a funky algorithm to decide whether it's a
3560 * character class or not. It has to deal with things like
3561 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3562 * anything else returns TRUE
3563 */
3564
9cbb5ea2
GS
3565/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3566
76e3520e 3567STATIC int
cea2e8a9 3568S_intuit_more(pTHX_ register char *s)
79072805 3569{
97aff369 3570 dVAR;
7918f24d
NC
3571
3572 PERL_ARGS_ASSERT_INTUIT_MORE;
3573
3280af22 3574 if (PL_lex_brackets)
79072805
LW
3575 return TRUE;
3576 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3577 return TRUE;
3578 if (*s != '{' && *s != '[')
3579 return FALSE;
3280af22 3580 if (!PL_lex_inpat)
79072805
LW
3581 return TRUE;
3582
3583 /* In a pattern, so maybe we have {n,m}. */
3584 if (*s == '{') {
b3155d95 3585 if (regcurly(s)) {
79072805 3586 return FALSE;
b3155d95 3587 }
79072805 3588 return TRUE;
79072805
LW
3589 }
3590
3591 /* On the other hand, maybe we have a character class */
3592
3593 s++;
3594 if (*s == ']' || *s == '^')
3595 return FALSE;
3596 else {
ffb4593c 3597 /* this is terrifying, and it works */
79072805
LW
3598 int weight = 2; /* let's weigh the evidence */
3599 char seen[256];
f27ffc4a 3600 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3601 const char * const send = strchr(s,']');
3280af22 3602 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3603
3604 if (!send) /* has to be an expression */
3605 return TRUE;
3606
3607 Zero(seen,256,char);
3608 if (*s == '$')
3609 weight -= 3;
3610 else if (isDIGIT(*s)) {
3611 if (s[1] != ']') {
3612 if (isDIGIT(s[1]) && s[2] == ']')
3613 weight -= 10;
3614 }
3615 else
3616 weight -= 100;
3617 }
3618 for (; s < send; s++) {
3619 last_un_char = un_char;
3620 un_char = (unsigned char)*s;
3621 switch (*s) {
3622 case '@':
3623 case '&':
3624 case '$':
3625 weight -= seen[un_char] * 10;
7e2040f0 3626 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3627 int len;
8903cb82 3628 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3629 len = (int)strlen(tmpbuf);
3630 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3631 weight -= 100;
3632 else
3633 weight -= 10;
3634 }
3635 else if (*s == '$' && s[1] &&
93a17b20
LW
3636 strchr("[#!%*<>()-=",s[1])) {
3637 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3638 weight -= 10;
3639 else
3640 weight -= 1;
3641 }
3642 break;
3643 case '\\':
3644 un_char = 254;
3645 if (s[1]) {
93a17b20 3646 if (strchr("wds]",s[1]))
79072805 3647 weight += 100;
10edeb5d 3648 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3649 weight += 1;
93a17b20 3650 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3651 weight += 40;
3652 else if (isDIGIT(s[1])) {
3653 weight += 40;
3654 while (s[1] && isDIGIT(s[1]))
3655 s++;
3656 }
3657 }
3658 else
3659 weight += 100;
3660 break;
3661 case '-':
3662 if (s[1] == '\\')
3663 weight += 50;
93a17b20 3664 if (strchr("aA01! ",last_un_char))
79072805 3665 weight += 30;
93a17b20 3666 if (strchr("zZ79~",s[1]))
79072805 3667 weight += 30;
f27ffc4a
GS
3668 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3669 weight -= 5; /* cope with negative subscript */
79072805
LW
3670 break;
3671 default:
3792a11b
NC
3672 if (!isALNUM(last_un_char)
3673 && !(last_un_char == '$' || last_un_char == '@'
3674 || last_un_char == '&')
3675 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3676 char *d = tmpbuf;
3677 while (isALPHA(*s))
3678 *d++ = *s++;
3679 *d = '\0';
5458a98a 3680 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3681 weight -= 150;
3682 }
3683 if (un_char == last_un_char + 1)
3684 weight += 5;
3685 weight -= seen[un_char];
3686 break;
3687 }
3688 seen[un_char]++;
3689 }
3690 if (weight >= 0) /* probably a character class */
3691 return FALSE;
3692 }
3693
3694 return TRUE;
3695}
ffed7fef 3696
ffb4593c
NT
3697/*
3698 * S_intuit_method
3699 *
3700 * Does all the checking to disambiguate
3701 * foo bar
3702 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3703 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3704 *
3705 * First argument is the stuff after the first token, e.g. "bar".
3706 *
3707 * Not a method if bar is a filehandle.
3708 * Not a method if foo is a subroutine prototyped to take a filehandle.
3709 * Not a method if it's really "Foo $bar"
3710 * Method if it's "foo $bar"
3711 * Not a method if it's really "print foo $bar"
3712 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3713 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3714 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3715 * =>
3716 */
3717
76e3520e 3718STATIC int
62d55b22 3719S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3720{
97aff369 3721 dVAR;
a0d0e21e 3722 char *s = start + (*start == '$');
3280af22 3723 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3724 STRLEN len;
3725 GV* indirgv;
5db06880
NC
3726#ifdef PERL_MAD
3727 int soff;
3728#endif
a0d0e21e 3729
7918f24d
NC
3730 PERL_ARGS_ASSERT_INTUIT_METHOD;
3731
a0d0e21e 3732 if (gv) {
62d55b22 3733 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3734 return 0;
62d55b22
NC
3735 if (cv) {
3736 if (SvPOK(cv)) {
3737 const char *proto = SvPVX_const(cv);
3738 if (proto) {
3739 if (*proto == ';')
3740 proto++;
3741 if (*proto == '*')
3742 return 0;
3743 }
b6c543e3
IZ
3744 }
3745 } else
c35e046a 3746 gv = NULL;
a0d0e21e 3747 }
8903cb82 3748 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3749 /* start is the beginning of the possible filehandle/object,
3750 * and s is the end of it
3751 * tmpbuf is a copy of it
3752 */
3753
a0d0e21e 3754 if (*start == '$') {
3ef1310e
RGS
3755 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3756 isUPPER(*PL_tokenbuf))
a0d0e21e 3757 return 0;
5db06880
NC
3758#ifdef PERL_MAD
3759 len = start - SvPVX(PL_linestr);
3760#endif
29595ff2 3761 s = PEEKSPACE(s);
f0092767 3762#ifdef PERL_MAD
5db06880
NC
3763 start = SvPVX(PL_linestr) + len;
3764#endif
3280af22
NIS
3765 PL_bufptr = start;
3766 PL_expect = XREF;
a0d0e21e
LW
3767 return *s == '(' ? FUNCMETH : METHOD;
3768 }
5458a98a 3769 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3770 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3771 len -= 2;
3772 tmpbuf[len] = '\0';
5db06880
NC
3773#ifdef PERL_MAD
3774 soff = s - SvPVX(PL_linestr);
3775#endif
c3e0f903
GS
3776 goto bare_package;
3777 }
90e5519e 3778 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3779 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3780 return 0;
3781 /* filehandle or package name makes it a method */
da51bb9b 3782 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3783#ifdef PERL_MAD
3784 soff = s - SvPVX(PL_linestr);
3785#endif
29595ff2 3786 s = PEEKSPACE(s);
3280af22 3787 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3788 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3789 bare_package:
cd81e915 3790 start_force(PL_curforce);
9ded7720 3791 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3792 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3793 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3794 if (PL_madskills)
3795 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3796 PL_expect = XTERM;
a0d0e21e 3797 force_next(WORD);
3280af22 3798 PL_bufptr = s;
5db06880
NC
3799#ifdef PERL_MAD
3800 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3801#endif
a0d0e21e
LW
3802 return *s == '(' ? FUNCMETH : METHOD;
3803 }
3804 }
3805 return 0;
3806}
3807
16d20bd9 3808/* Encoded script support. filter_add() effectively inserts a
4e553d73 3809 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3810 * Note that the filter function only applies to the current source file
3811 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3812 *
3813 * The datasv parameter (which may be NULL) can be used to pass
3814 * private data to this instance of the filter. The filter function
3815 * can recover the SV using the FILTER_DATA macro and use it to
3816 * store private buffers and state information.
3817 *
3818 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3819 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3820 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3821 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3822 * private use must be set using malloc'd pointers.
3823 */
16d20bd9
AD
3824
3825SV *
864dbfa3 3826Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3827{
97aff369 3828 dVAR;
f4c556ac 3829 if (!funcp)
a0714e2c 3830 return NULL;
f4c556ac 3831
5486870f
DM
3832 if (!PL_parser)
3833 return NULL;
3834
3280af22
NIS
3835 if (!PL_rsfp_filters)
3836 PL_rsfp_filters = newAV();
16d20bd9 3837 if (!datasv)
561b68a9 3838 datasv = newSV(0);
862a34c6 3839 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3840 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3841 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3842 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3843 FPTR2DPTR(void *, IoANY(datasv)),
3844 SvPV_nolen(datasv)));
3280af22
NIS
3845 av_unshift(PL_rsfp_filters, 1);
3846 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3847 return(datasv);
3848}
4e553d73 3849
16d20bd9
AD
3850
3851/* Delete most recently added instance of this filter function. */
a0d0e21e 3852void
864dbfa3 3853Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3854{
97aff369 3855 dVAR;
e0c19803 3856 SV *datasv;
24801a4b 3857
7918f24d
NC
3858 PERL_ARGS_ASSERT_FILTER_DEL;
3859
33073adb 3860#ifdef DEBUGGING
55662e27
JH
3861 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3862 FPTR2DPTR(void*, funcp)));
33073adb 3863#endif
5486870f 3864 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3865 return;
3866 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3867 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3868 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3869 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3870
16d20bd9
AD
3871 return;
3872 }
3873 /* we need to search for the correct entry and clear it */
cea2e8a9 3874 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3875}
3876
3877
1de9afcd
RGS
3878/* Invoke the idxth filter function for the current rsfp. */
3879/* maxlen 0 = read one text line */
16d20bd9 3880I32
864dbfa3 3881Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3882{
97aff369 3883 dVAR;
16d20bd9
AD
3884 filter_t funcp;
3885 SV *datasv = NULL;
f482118e
NC
3886 /* This API is bad. It should have been using unsigned int for maxlen.
3887 Not sure if we want to change the API, but if not we should sanity
3888 check the value here. */
39cd7a59
NC
3889 const unsigned int correct_length
3890 = maxlen < 0 ?
3891#ifdef PERL_MICRO
3892 0x7FFFFFFF
3893#else
3894 INT_MAX
3895#endif
3896 : maxlen;
e50aee73 3897
7918f24d
NC
3898 PERL_ARGS_ASSERT_FILTER_READ;
3899
5486870f 3900 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3901 return -1;
1de9afcd 3902 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3903 /* Provide a default input filter to make life easy. */
3904 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3905 DEBUG_P(PerlIO_printf(Perl_debug_log,
3906 "filter_read %d: from rsfp\n", idx));
f482118e 3907 if (correct_length) {
16d20bd9
AD
3908 /* Want a block */
3909 int len ;
f54cb97a 3910 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3911
3912 /* ensure buf_sv is large enough */
881d8f0a 3913 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3914 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3915 correct_length)) <= 0) {
3280af22 3916 if (PerlIO_error(PL_rsfp))
37120919
AD
3917 return -1; /* error */
3918 else
3919 return 0 ; /* end of file */
3920 }
16d20bd9 3921 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3922 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3923 } else {
3924 /* Want a line */
3280af22
NIS
3925 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3926 if (PerlIO_error(PL_rsfp))
37120919
AD
3927 return -1; /* error */
3928 else
3929 return 0 ; /* end of file */
3930 }
16d20bd9
AD
3931 }
3932 return SvCUR(buf_sv);
3933 }
3934 /* Skip this filter slot if filter has been deleted */
1de9afcd 3935 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3936 DEBUG_P(PerlIO_printf(Perl_debug_log,
3937 "filter_read %d: skipped (filter deleted)\n",
3938 idx));
f482118e 3939 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3940 }
3941 /* Get function pointer hidden within datasv */
8141890a 3942 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3943 DEBUG_P(PerlIO_printf(Perl_debug_log,
3944 "filter_read %d: via function %p (%s)\n",
ca0270c4 3945 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3946 /* Call function. The function is expected to */
3947 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3948 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3949 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3950}
3951
76e3520e 3952STATIC char *
5cc814fd 3953S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3954{
97aff369 3955 dVAR;
7918f24d
NC
3956
3957 PERL_ARGS_ASSERT_FILTER_GETS;
3958
c39cd008 3959#ifdef PERL_CR_FILTER
3280af22 3960 if (!PL_rsfp_filters) {
c39cd008 3961 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3962 }
3963#endif
3280af22 3964 if (PL_rsfp_filters) {
55497cff 3965 if (!append)
3966 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3967 if (FILTER_READ(0, sv, 0) > 0)
3968 return ( SvPVX(sv) ) ;
3969 else
bd61b366 3970 return NULL ;
16d20bd9 3971 }
9d116dd7 3972 else
5cc814fd 3973 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3974}
3975
01ec43d0 3976STATIC HV *
9bde8eb0 3977S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3978{
97aff369 3979 dVAR;
def3634b
GS
3980 GV *gv;
3981
7918f24d
NC
3982 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3983
01ec43d0 3984 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3985 return PL_curstash;
3986
3987 if (len > 2 &&
3988 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3989 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3990 {
3991 return GvHV(gv); /* Foo:: */
def3634b
GS
3992 }
3993
3994 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3995 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3996 if (gv && GvCV(gv)) {
3997 SV * const sv = cv_const_sv(GvCV(gv));
3998 if (sv)
9bde8eb0 3999 pkgname = SvPV_const(sv, len);
def3634b
GS
4000 }
4001
9bde8eb0 4002 return gv_stashpvn(pkgname, len, 0);
def3634b 4003}
a0d0e21e 4004
e3f73d4e
RGS
4005/*
4006 * S_readpipe_override
486ec47a 4007 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4008 * optree, provided sublex_start() is called afterwards.
4009 */
4010STATIC void
1d51329b 4011S_readpipe_override(pTHX)
e3f73d4e
RGS
4012{
4013 GV **gvp;
4014 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4015 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4016 if ((gv_readpipe
4017 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4018 ||
4019 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4020 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4021 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4022 {
4023 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4024 op_append_elem(OP_LIST,
e3f73d4e
RGS
4025 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4026 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4027 }
e3f73d4e
RGS
4028}
4029
5db06880
NC
4030#ifdef PERL_MAD
4031 /*
4032 * Perl_madlex
4033 * The intent of this yylex wrapper is to minimize the changes to the
4034 * tokener when we aren't interested in collecting madprops. It remains
4035 * to be seen how successful this strategy will be...
4036 */
4037
4038int
4039Perl_madlex(pTHX)
4040{
4041 int optype;
4042 char *s = PL_bufptr;
4043
cd81e915
NC
4044 /* make sure PL_thiswhite is initialized */
4045 PL_thiswhite = 0;
4046 PL_thismad = 0;
5db06880 4047
cd81e915 4048 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4049 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4050 return S_pending_ident(aTHX);
4051
4052 /* previous token ate up our whitespace? */
cd81e915
NC
4053 if (!PL_lasttoke && PL_nextwhite) {
4054 PL_thiswhite = PL_nextwhite;
4055 PL_nextwhite = 0;
5db06880
NC
4056 }
4057
4058 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4059 PL_realtokenstart = -1;
4060 PL_thistoken = 0;
5db06880
NC
4061 optype = yylex();
4062 s = PL_bufptr;
cd81e915 4063 assert(PL_curforce < 0);
5db06880 4064
cd81e915
NC
4065 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4066 if (!PL_thistoken) {
4067 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4068 PL_thistoken = newSVpvs("");
5db06880 4069 else {
c35e046a 4070 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4071 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4072 }
4073 }
cd81e915
NC
4074 if (PL_thismad) /* install head */
4075 CURMAD('X', PL_thistoken);
5db06880
NC
4076 }
4077
4078 /* last whitespace of a sublex? */
cd81e915
NC
4079 if (optype == ')' && PL_endwhite) {
4080 CURMAD('X', PL_endwhite);
5db06880
NC
4081 }
4082
cd81e915 4083 if (!PL_thismad) {
5db06880
NC
4084
4085 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4086 if (!PL_thiswhite && !PL_endwhite && !optype) {
4087 sv_free(PL_thistoken);
4088 PL_thistoken = 0;
5db06880
NC
4089 return 0;
4090 }
4091
4092 /* put off final whitespace till peg */
4093 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4094 PL_nextwhite = PL_thiswhite;
4095 PL_thiswhite = 0;
5db06880 4096 }
cd81e915
NC
4097 else if (PL_thisopen) {
4098 CURMAD('q', PL_thisopen);
4099 if (PL_thistoken)
4100 sv_free(PL_thistoken);
4101 PL_thistoken = 0;
5db06880
NC
4102 }
4103 else {
4104 /* Store actual token text as madprop X */
cd81e915 4105 CURMAD('X', PL_thistoken);
5db06880
NC
4106 }
4107
cd81e915 4108 if (PL_thiswhite) {
5db06880 4109 /* add preceding whitespace as madprop _ */
cd81e915 4110 CURMAD('_', PL_thiswhite);
5db06880
NC
4111 }
4112
cd81e915 4113 if (PL_thisstuff) {
5db06880 4114 /* add quoted material as madprop = */
cd81e915 4115 CURMAD('=', PL_thisstuff);
5db06880
NC
4116 }
4117
cd81e915 4118 if (PL_thisclose) {
5db06880 4119 /* add terminating quote as madprop Q */
cd81e915 4120 CURMAD('Q', PL_thisclose);
5db06880
NC
4121 }
4122 }
4123
4124 /* special processing based on optype */
4125
4126 switch (optype) {
4127
4128 /* opval doesn't need a TOKEN since it can already store mp */
4129 case WORD:
4130 case METHOD:
4131 case FUNCMETH:
4132 case THING:
4133 case PMFUNC:
4134 case PRIVATEREF:
4135 case FUNC0SUB:
4136 case UNIOPSUB:
4137 case LSTOPSUB:
6154021b
RGS
4138 if (pl_yylval.opval)
4139 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4140 PL_thismad = 0;
5db06880
NC
4141 return optype;
4142
4143 /* fake EOF */
4144 case 0:
4145 optype = PEG;
cd81e915
NC
4146 if (PL_endwhite) {
4147 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4148 PL_endwhite = 0;
5db06880
NC
4149 }
4150 break;
4151
4152 case ']':
4153 case '}':
cd81e915 4154 if (PL_faketokens)
5db06880
NC
4155 break;
4156 /* remember any fake bracket that lexer is about to discard */
4157 if (PL_lex_brackets == 1 &&
4158 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4159 {
4160 s = PL_bufptr;
4161 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4162 s++;
4163 if (*s == '}') {
cd81e915
NC
4164 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4165 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4166 PL_thiswhite = 0;
5db06880
NC
4167 PL_bufptr = s - 1;
4168 break; /* don't bother looking for trailing comment */
4169 }
4170 else
4171 s = PL_bufptr;
4172 }
4173 if (optype == ']')
4174 break;
4175 /* FALLTHROUGH */
4176
4177 /* attach a trailing comment to its statement instead of next token */
4178 case ';':
cd81e915 4179 if (PL_faketokens)
5db06880
NC
4180 break;
4181 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4182 s = PL_bufptr;
4183 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4184 s++;
4185 if (*s == '\n' || *s == '#') {
4186 while (s < PL_bufend && *s != '\n')
4187 s++;
4188 if (s < PL_bufend)
4189 s++;
cd81e915
NC
4190 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4191 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4192 PL_thiswhite = 0;
5db06880
NC
4193 PL_bufptr = s;
4194 }
4195 }
4196 break;
4197
4198 /* pval */
4199 case LABEL:
4200 break;
4201
4202 /* ival */
4203 default:
4204 break;
4205
4206 }
4207
4208 /* Create new token struct. Note: opvals return early above. */
6154021b 4209 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4210 PL_thismad = 0;
5db06880
NC
4211 return optype;
4212}
4213#endif
4214
468aa647 4215STATIC char *
cc6ed77d 4216S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4217 dVAR;
7918f24d
NC
4218
4219 PERL_ARGS_ASSERT_TOKENIZE_USE;
4220
468aa647
RGS
4221 if (PL_expect != XSTATE)
4222 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4223 is_use ? "use" : "no"));
29595ff2 4224 s = SKIPSPACE1(s);
468aa647
RGS
4225 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4226 s = force_version(s, TRUE);
17c59fdf
VP
4227 if (*s == ';' || *s == '}'
4228 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4229 start_force(PL_curforce);
9ded7720 4230 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4231 force_next(WORD);
4232 }
4233 else if (*s == 'v') {
4234 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4235 s = force_version(s, FALSE);
4236 }
4237 }
4238 else {
4239 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4240 s = force_version(s, FALSE);
4241 }
6154021b 4242 pl_yylval.ival = is_use;
468aa647
RGS
4243 return s;
4244}
748a9306 4245#ifdef DEBUGGING
27da23d5 4246 static const char* const exp_name[] =
09bef843 4247 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4248 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4249 };
748a9306 4250#endif
463ee0b2 4251
361d9b55
Z
4252#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4253STATIC bool
4254S_word_takes_any_delimeter(char *p, STRLEN len)
4255{
4256 return (len == 1 && strchr("msyq", p[0])) ||
4257 (len == 2 && (
4258 (p[0] == 't' && p[1] == 'r') ||
4259 (p[0] == 'q' && strchr("qwxr", p[1]))));
4260}
4261
02aa26ce
NT
4262/*
4263 yylex
4264
4265 Works out what to call the token just pulled out of the input
4266 stream. The yacc parser takes care of taking the ops we return and
4267 stitching them into a tree.
4268
4269 Returns:
4270 PRIVATEREF
4271
4272 Structure:
4273 if read an identifier
4274 if we're in a my declaration
4275 croak if they tried to say my($foo::bar)
4276 build the ops for a my() declaration
4277 if it's an access to a my() variable
4278 are we in a sort block?
4279 croak if my($a); $a <=> $b
4280 build ops for access to a my() variable
4281 if in a dq string, and they've said @foo and we can't find @foo
4282 croak
4283 build ops for a bareword
4284 if we already built the token before, use it.
4285*/
4286
20141f0e 4287
dba4d153
JH
4288#ifdef __SC__
4289#pragma segment Perl_yylex
4290#endif
dba4d153 4291int
dba4d153 4292Perl_yylex(pTHX)
20141f0e 4293{
97aff369 4294 dVAR;
3afc138a 4295 register char *s = PL_bufptr;
378cc40b 4296 register char *d;
463ee0b2 4297 STRLEN len;
aa7440fb 4298 bool bof = FALSE;
580561a3 4299 U32 fake_eof = 0;
a687059c 4300
10edeb5d
JH
4301 /* orig_keyword, gvp, and gv are initialized here because
4302 * jump to the label just_a_word_zero can bypass their
4303 * initialization later. */
4304 I32 orig_keyword = 0;
4305 GV *gv = NULL;
4306 GV **gvp = NULL;
4307
bbf60fe6 4308 DEBUG_T( {
396482e1 4309 SV* tmp = newSVpvs("");
b6007c36
DM
4310 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4311 (IV)CopLINE(PL_curcop),
4312 lex_state_names[PL_lex_state],
4313 exp_name[PL_expect],
4314 pv_display(tmp, s, strlen(s), 0, 60));
4315 SvREFCNT_dec(tmp);
bbf60fe6 4316 } );
02aa26ce 4317 /* check if there's an identifier for us to look at */
28ac2b49 4318 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4319 return REPORT(S_pending_ident(aTHX));
bbce6d69 4320
02aa26ce
NT
4321 /* no identifier pending identification */
4322
3280af22 4323 switch (PL_lex_state) {
79072805
LW
4324#ifdef COMMENTARY
4325 case LEX_NORMAL: /* Some compilers will produce faster */
4326 case LEX_INTERPNORMAL: /* code if we comment these out. */
4327 break;
4328#endif
4329
09bef843 4330 /* when we've already built the next token, just pull it out of the queue */
79072805 4331 case LEX_KNOWNEXT:
5db06880
NC
4332#ifdef PERL_MAD
4333 PL_lasttoke--;
6154021b 4334 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4335 if (PL_madskills) {
cd81e915 4336 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4337 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4338 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4339 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4340 PL_thismad->mad_val = 0;
4341 mad_free(PL_thismad);
4342 PL_thismad = 0;
5db06880
NC
4343 }
4344 }
4345 if (!PL_lasttoke) {
4346 PL_lex_state = PL_lex_defer;
4347 PL_expect = PL_lex_expect;
4348 PL_lex_defer = LEX_NORMAL;
4349 if (!PL_nexttoke[PL_lasttoke].next_type)
4350 return yylex();
4351 }
4352#else
3280af22 4353 PL_nexttoke--;
6154021b 4354 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4355 if (!PL_nexttoke) {
4356 PL_lex_state = PL_lex_defer;
4357 PL_expect = PL_lex_expect;
4358 PL_lex_defer = LEX_NORMAL;
463ee0b2 4359 }
5db06880 4360#endif
a7aaec61
Z
4361 {
4362 I32 next_type;
5db06880 4363#ifdef PERL_MAD
a7aaec61 4364 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4365#else
a7aaec61 4366 next_type = PL_nexttype[PL_nexttoke];
5db06880 4367#endif
78cdf107
Z
4368 if (next_type & (7<<24)) {
4369 if (next_type & (1<<24)) {
4370 if (PL_lex_brackets > 100)
4371 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4372 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4373 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4374 }
4375 if (next_type & (2<<24))
4376 PL_lex_allbrackets++;
4377 if (next_type & (4<<24))
4378 PL_lex_allbrackets--;
a7aaec61
Z
4379 next_type &= 0xffff;
4380 }
4381#ifdef PERL_MAD
4382 /* FIXME - can these be merged? */
4383 return next_type;
4384#else
4385 return REPORT(next_type);
4386#endif
4387 }
79072805 4388
02aa26ce 4389 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4390 when we get here, PL_bufptr is at the \
02aa26ce 4391 */
79072805
LW
4392 case LEX_INTERPCASEMOD:
4393#ifdef DEBUGGING
3280af22 4394 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4395 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4396#endif
02aa26ce 4397 /* handle \E or end of string */
3280af22 4398 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4399 /* if at a \E */
3280af22 4400 if (PL_lex_casemods) {
f54cb97a 4401 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4402 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4403
3792a11b
NC
4404 if (PL_bufptr != PL_bufend
4405 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4406 PL_bufptr += 2;
4407 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4408#ifdef PERL_MAD
4409 if (PL_madskills)
6b29d1f5 4410 PL_thistoken = newSVpvs("\\E");
5db06880 4411#endif
a0d0e21e 4412 }
78cdf107 4413 PL_lex_allbrackets--;
bbf60fe6 4414 return REPORT(')');
79072805 4415 }
5db06880
NC
4416#ifdef PERL_MAD
4417 while (PL_bufptr != PL_bufend &&
4418 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4419 if (!PL_thiswhite)
6b29d1f5 4420 PL_thiswhite = newSVpvs("");
cd81e915 4421 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4422 PL_bufptr += 2;
4423 }
4424#else
3280af22
NIS
4425 if (PL_bufptr != PL_bufend)
4426 PL_bufptr += 2;
5db06880 4427#endif
3280af22 4428 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4429 return yylex();
79072805
LW
4430 }
4431 else {
607df283 4432 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4433 "### Saw case modifier\n"); });
3280af22 4434 s = PL_bufptr + 1;
6e909404 4435 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4436#ifdef PERL_MAD
cd81e915 4437 if (!PL_thiswhite)
6b29d1f5 4438 PL_thiswhite = newSVpvs("");
cd81e915 4439 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4440#endif
89122651 4441 PL_bufptr = s + 3;
6e909404
JH
4442 PL_lex_state = LEX_INTERPCONCAT;
4443 return yylex();
a0d0e21e 4444 }
6e909404 4445 else {
90771dc0 4446 I32 tmp;
5db06880
NC
4447 if (!PL_madskills) /* when just compiling don't need correct */
4448 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4449 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4450 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4451 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4452 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4453 PL_lex_allbrackets--;
bbf60fe6 4454 return REPORT(')');
6e909404
JH
4455 }
4456 if (PL_lex_casemods > 10)
4457 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4458 PL_lex_casestack[PL_lex_casemods++] = *s;
4459 PL_lex_casestack[PL_lex_casemods] = '\0';
4460 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4461 start_force(PL_curforce);
9ded7720 4462 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4463 force_next((2<<24)|'(');
cd81e915 4464 start_force(PL_curforce);
6e909404 4465 if (*s == 'l')
9ded7720 4466 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4467 else if (*s == 'u')
9ded7720 4468 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4469 else if (*s == 'L')
9ded7720 4470 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4471 else if (*s == 'U')
9ded7720 4472 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4473 else if (*s == 'Q')
9ded7720 4474 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4475 else
4476 Perl_croak(aTHX_ "panic: yylex");
5db06880 4477 if (PL_madskills) {
a5849ce5
NC
4478 SV* const tmpsv = newSVpvs("\\ ");
4479 /* replace the space with the character we want to escape
4480 */
4481 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4482 curmad('_', tmpsv);
4483 }
6e909404 4484 PL_bufptr = s + 1;
a0d0e21e 4485 }
79072805 4486 force_next(FUNC);
3280af22
NIS
4487 if (PL_lex_starts) {
4488 s = PL_bufptr;
4489 PL_lex_starts = 0;
5db06880
NC
4490#ifdef PERL_MAD
4491 if (PL_madskills) {
cd81e915
NC
4492 if (PL_thistoken)
4493 sv_free(PL_thistoken);
6b29d1f5 4494 PL_thistoken = newSVpvs("");
5db06880
NC
4495 }
4496#endif
131b3ad0
DM
4497 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4498 if (PL_lex_casemods == 1 && PL_lex_inpat)
4499 OPERATOR(',');
4500 else
4501 Aop(OP_CONCAT);
79072805
LW
4502 }
4503 else
cea2e8a9 4504 return yylex();
79072805
LW
4505 }
4506
55497cff 4507 case LEX_INTERPPUSH:
bbf60fe6 4508 return REPORT(sublex_push());
55497cff 4509
79072805 4510 case LEX_INTERPSTART:
3280af22 4511 if (PL_bufptr == PL_bufend)
bbf60fe6 4512 return REPORT(sublex_done());
607df283 4513 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4514 "### Interpolated variable\n"); });
3280af22
NIS
4515 PL_expect = XTERM;
4516 PL_lex_dojoin = (*PL_bufptr == '@');
4517 PL_lex_state = LEX_INTERPNORMAL;
4518 if (PL_lex_dojoin) {
cd81e915 4519 start_force(PL_curforce);
9ded7720 4520 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4521 force_next(',');
cd81e915 4522 start_force(PL_curforce);
a0d0e21e 4523 force_ident("\"", '$');
cd81e915 4524 start_force(PL_curforce);
9ded7720 4525 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4526 force_next('$');
cd81e915 4527 start_force(PL_curforce);
9ded7720 4528 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4529 force_next((2<<24)|'(');
cd81e915 4530 start_force(PL_curforce);
9ded7720 4531 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4532 force_next(FUNC);
4533 }
3280af22
NIS
4534 if (PL_lex_starts++) {
4535 s = PL_bufptr;
5db06880
NC
4536#ifdef PERL_MAD
4537 if (PL_madskills) {
cd81e915
NC
4538 if (PL_thistoken)
4539 sv_free(PL_thistoken);
6b29d1f5 4540 PL_thistoken = newSVpvs("");
5db06880
NC
4541 }
4542#endif
131b3ad0
DM
4543 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4544 if (!PL_lex_casemods && PL_lex_inpat)
4545 OPERATOR(',');
4546 else
4547 Aop(OP_CONCAT);
79072805 4548 }
cea2e8a9 4549 return yylex();
79072805
LW
4550
4551 case LEX_INTERPENDMAYBE:
3280af22
NIS
4552 if (intuit_more(PL_bufptr)) {
4553 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4554 break;
4555 }
4556 /* FALL THROUGH */
4557
4558 case LEX_INTERPEND:
3280af22
NIS
4559 if (PL_lex_dojoin) {
4560 PL_lex_dojoin = FALSE;
4561 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4562#ifdef PERL_MAD
4563 if (PL_madskills) {
cd81e915
NC
4564 if (PL_thistoken)
4565 sv_free(PL_thistoken);
6b29d1f5 4566 PL_thistoken = newSVpvs("");
5db06880
NC
4567 }
4568#endif
78cdf107 4569 PL_lex_allbrackets--;
bbf60fe6 4570 return REPORT(')');
79072805 4571 }
43a16006 4572 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4573 && SvEVALED(PL_lex_repl))
43a16006 4574 {
e9fa98b2 4575 if (PL_bufptr != PL_bufend)
cea2e8a9 4576 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4577 PL_lex_repl = NULL;
e9fa98b2 4578 }
79072805
LW
4579 /* FALLTHROUGH */
4580 case LEX_INTERPCONCAT:
4581#ifdef DEBUGGING
3280af22 4582 if (PL_lex_brackets)
cea2e8a9 4583 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4584#endif
3280af22 4585 if (PL_bufptr == PL_bufend)
bbf60fe6 4586 return REPORT(sublex_done());
79072805 4587
3280af22
NIS
4588 if (SvIVX(PL_linestr) == '\'') {
4589 SV *sv = newSVsv(PL_linestr);
4590 if (!PL_lex_inpat)
76e3520e 4591 sv = tokeq(sv);
3280af22 4592 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4593 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4594 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4595 s = PL_bufend;
79072805
LW
4596 }
4597 else {
3280af22 4598 s = scan_const(PL_bufptr);
79072805 4599 if (*s == '\\')
3280af22 4600 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4601 else
3280af22 4602 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4603 }
4604
3280af22 4605 if (s != PL_bufptr) {
cd81e915 4606 start_force(PL_curforce);
5db06880
NC
4607 if (PL_madskills) {
4608 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4609 }
6154021b 4610 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4611 PL_expect = XTERM;
79072805 4612 force_next(THING);
131b3ad0 4613 if (PL_lex_starts++) {
5db06880
NC
4614#ifdef PERL_MAD
4615 if (PL_madskills) {
cd81e915
NC
4616 if (PL_thistoken)
4617 sv_free(PL_thistoken);
6b29d1f5 4618 PL_thistoken = newSVpvs("");
5db06880
NC
4619 }
4620#endif
131b3ad0
DM
4621 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4622 if (!PL_lex_casemods && PL_lex_inpat)
4623 OPERATOR(',');
4624 else
4625 Aop(OP_CONCAT);
4626 }
79072805 4627 else {
3280af22 4628 PL_bufptr = s;
cea2e8a9 4629 return yylex();
79072805
LW
4630 }
4631 }
4632
cea2e8a9 4633 return yylex();
a0d0e21e 4634 case LEX_FORMLINE:
3280af22
NIS
4635 PL_lex_state = LEX_NORMAL;
4636 s = scan_formline(PL_bufptr);
4637 if (!PL_lex_formbrack)
a0d0e21e
LW
4638 goto rightbracket;
4639 OPERATOR(';');
79072805
LW
4640 }
4641
3280af22
NIS
4642 s = PL_bufptr;
4643 PL_oldoldbufptr = PL_oldbufptr;
4644 PL_oldbufptr = s;
463ee0b2
LW
4645
4646 retry:
5db06880 4647#ifdef PERL_MAD
cd81e915
NC
4648 if (PL_thistoken) {
4649 sv_free(PL_thistoken);
4650 PL_thistoken = 0;
5db06880 4651 }
cd81e915 4652 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4653#endif
378cc40b
LW
4654 switch (*s) {
4655 default:
7e2040f0 4656 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4657 goto keylookup;
b1fc3636
CJ
4658 {
4659 unsigned char c = *s;
4660 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4661 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4662 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4663 } else {
4664 d = PL_linestart;
4665 }
4666 *s = '\0';
4667 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4668 }
e929a76b
LW
4669 case 4:
4670 case 26:
4671 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4672 case 0:
5db06880
NC
4673#ifdef PERL_MAD
4674 if (PL_madskills)
cd81e915 4675 PL_faketokens = 0;
5db06880 4676#endif
3280af22
NIS
4677 if (!PL_rsfp) {
4678 PL_last_uni = 0;
4679 PL_last_lop = 0;
a7aaec61
Z
4680 if (PL_lex_brackets &&
4681 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4682 yyerror((const char *)
4683 (PL_lex_formbrack
4684 ? "Format not terminated"
4685 : "Missing right curly or square bracket"));
c5ee2135 4686 }
4e553d73 4687 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4688 "### Tokener got EOF\n");
5f80b19c 4689 } );
79072805 4690 TOKEN(0);
463ee0b2 4691 }
3280af22 4692 if (s++ < PL_bufend)
a687059c 4693 goto retry; /* ignore stray nulls */
3280af22
NIS
4694 PL_last_uni = 0;
4695 PL_last_lop = 0;
4696 if (!PL_in_eval && !PL_preambled) {
4697 PL_preambled = TRUE;
5db06880
NC
4698#ifdef PERL_MAD
4699 if (PL_madskills)
cd81e915 4700 PL_faketokens = 1;
5db06880 4701#endif
5ab7ff98
NC
4702 if (PL_perldb) {
4703 /* Generate a string of Perl code to load the debugger.
4704 * If PERL5DB is set, it will return the contents of that,
4705 * otherwise a compile-time require of perl5db.pl. */
4706
4707 const char * const pdb = PerlEnv_getenv("PERL5DB");
4708
4709 if (pdb) {
4710 sv_setpv(PL_linestr, pdb);
4711 sv_catpvs(PL_linestr,";");
4712 } else {
4713 SETERRNO(0,SS_NORMAL);
4714 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4715 }
4716 } else
4717 sv_setpvs(PL_linestr,"");
c62eb204
NC
4718 if (PL_preambleav) {
4719 SV **svp = AvARRAY(PL_preambleav);
4720 SV **const end = svp + AvFILLp(PL_preambleav);
4721 while(svp <= end) {
4722 sv_catsv(PL_linestr, *svp);
4723 ++svp;
396482e1 4724 sv_catpvs(PL_linestr, ";");
91b7def8 4725 }
daba3364 4726 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4727 PL_preambleav = NULL;
91b7def8 4728 }
9f639728
FR
4729 if (PL_minus_E)
4730 sv_catpvs(PL_linestr,
4731 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4732 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4733 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4734 if (PL_minus_l)
396482e1 4735 sv_catpvs(PL_linestr,"chomp;");
3280af22 4736 if (PL_minus_a) {
3280af22 4737 if (PL_minus_F) {
3792a11b
NC
4738 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4739 || *PL_splitstr == '"')
3280af22 4740 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4741 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4742 else {
c8ef6a4b
NC
4743 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4744 bytes can be used as quoting characters. :-) */
dd374669 4745 const char *splits = PL_splitstr;
91d456ae 4746 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4747 do {
4748 /* Need to \ \s */
dd374669
AL
4749 if (*splits == '\\')
4750 sv_catpvn(PL_linestr, splits, 1);
4751 sv_catpvn(PL_linestr, splits, 1);
4752 } while (*splits++);
48c4c863
NC
4753 /* This loop will embed the trailing NUL of
4754 PL_linestr as the last thing it does before
4755 terminating. */
396482e1 4756 sv_catpvs(PL_linestr, ");");
54310121 4757 }
2304df62
AD
4758 }
4759 else
396482e1 4760 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4761 }
79072805 4762 }
396482e1 4763 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4764 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4765 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4766 PL_last_lop = PL_last_uni = NULL;
65269a95 4767 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4768 update_debugger_info(PL_linestr, NULL, 0);
79072805 4769 goto retry;
a687059c 4770 }
e929a76b 4771 do {
580561a3
Z
4772 fake_eof = 0;
4773 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4774 if (0) {
7e28d3af 4775 fake_eof:
f0e67a1d
Z
4776 fake_eof = LEX_FAKE_EOF;
4777 }
4778 PL_bufptr = PL_bufend;
17cc9359 4779 CopLINE_inc(PL_curcop);
f0e67a1d 4780 if (!lex_next_chunk(fake_eof)) {
17cc9359 4781 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4782 s = PL_bufptr;
4783 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4784 }
17cc9359 4785 CopLINE_dec(PL_curcop);
5db06880 4786#ifdef PERL_MAD
f0e67a1d 4787 if (!PL_rsfp)
cd81e915 4788 PL_realtokenstart = -1;
5db06880 4789#endif
f0e67a1d 4790 s = PL_bufptr;
7aa207d6
JH
4791 /* If it looks like the start of a BOM or raw UTF-16,
4792 * check if it in fact is. */
580561a3 4793 if (bof && PL_rsfp &&
7aa207d6
JH
4794 (*s == 0 ||
4795 *(U8*)s == 0xEF ||
4796 *(U8*)s >= 0xFE ||
4797 s[1] == 0)) {
879bc93b
DM
4798 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4799 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4800#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4801 /* offset may include swallowed CR */
4802 if (!bof)
879bc93b 4803 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4804#endif
7e28d3af 4805 if (bof) {
3280af22 4806 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4807 s = swallow_bom((U8*)s);
e929a76b 4808 }
378cc40b 4809 }
737c24fc 4810 if (PL_parser->in_pod) {
a0d0e21e 4811 /* Incest with pod. */
5db06880
NC
4812#ifdef PERL_MAD
4813 if (PL_madskills)
cd81e915 4814 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4815#endif
01a57ef7 4816 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4817 sv_setpvs(PL_linestr, "");
3280af22
NIS
4818 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4819 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4820 PL_last_lop = PL_last_uni = NULL;
737c24fc 4821 PL_parser->in_pod = 0;
a0d0e21e 4822 }
4e553d73 4823 }
85613cab
Z
4824 if (PL_rsfp)
4825 incline(s);
737c24fc 4826 } while (PL_parser->in_pod);
3280af22 4827 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4828 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4829 PL_last_lop = PL_last_uni = NULL;
57843af0 4830 if (CopLINE(PL_curcop) == 1) {
3280af22 4831 while (s < PL_bufend && isSPACE(*s))
79072805 4832 s++;
a0d0e21e 4833 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4834 s++;
5db06880
NC
4835#ifdef PERL_MAD
4836 if (PL_madskills)
cd81e915 4837 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4838#endif
bd61b366 4839 d = NULL;
3280af22 4840 if (!PL_in_eval) {
44a8e56a 4841 if (*s == '#' && *(s+1) == '!')
4842 d = s + 2;
4843#ifdef ALTERNATE_SHEBANG
4844 else {
bfed75c6 4845 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4846 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4847 d = s + (sizeof(as) - 1);
4848 }
4849#endif /* ALTERNATE_SHEBANG */
4850 }
4851 if (d) {
b8378b72 4852 char *ipath;
774d564b 4853 char *ipathend;
b8378b72 4854
774d564b 4855 while (isSPACE(*d))
b8378b72
CS
4856 d++;
4857 ipath = d;
774d564b 4858 while (*d && !isSPACE(*d))
4859 d++;
4860 ipathend = d;
4861
4862#ifdef ARG_ZERO_IS_SCRIPT
4863 if (ipathend > ipath) {
4864 /*
4865 * HP-UX (at least) sets argv[0] to the script name,
4866 * which makes $^X incorrect. And Digital UNIX and Linux,
4867 * at least, set argv[0] to the basename of the Perl
4868 * interpreter. So, having found "#!", we'll set it right.
4869 */
fafc274c
NC
4870 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4871 SVt_PV)); /* $^X */
774d564b 4872 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4873 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4874 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4875 SvSETMAGIC(x);
4876 }
556c1dec
JH
4877 else {
4878 STRLEN blen;
4879 STRLEN llen;
cfd0369c 4880 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4881 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4882 if (llen < blen) {
4883 bstart += blen - llen;
4884 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4885 sv_setpvn(x, ipath, ipathend - ipath);
4886 SvSETMAGIC(x);
4887 }
4888 }
4889 }
774d564b 4890 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4891 }
774d564b 4892#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4893
4894 /*
4895 * Look for options.
4896 */
748a9306 4897 d = instr(s,"perl -");
84e30d1a 4898 if (!d) {
748a9306 4899 d = instr(s,"perl");
84e30d1a
GS
4900#if defined(DOSISH)
4901 /* avoid getting into infinite loops when shebang
4902 * line contains "Perl" rather than "perl" */
4903 if (!d) {
4904 for (d = ipathend-4; d >= ipath; --d) {
4905 if ((*d == 'p' || *d == 'P')
4906 && !ibcmp(d, "perl", 4))
4907 {
4908 break;
4909 }
4910 }
4911 if (d < ipath)
bd61b366 4912 d = NULL;
84e30d1a
GS
4913 }
4914#endif
4915 }
44a8e56a 4916#ifdef ALTERNATE_SHEBANG
4917 /*
4918 * If the ALTERNATE_SHEBANG on this system starts with a
4919 * character that can be part of a Perl expression, then if
4920 * we see it but not "perl", we're probably looking at the
4921 * start of Perl code, not a request to hand off to some
4922 * other interpreter. Similarly, if "perl" is there, but
4923 * not in the first 'word' of the line, we assume the line
4924 * contains the start of the Perl program.
44a8e56a 4925 */
4926 if (d && *s != '#') {
f54cb97a 4927 const char *c = ipath;
44a8e56a 4928 while (*c && !strchr("; \t\r\n\f\v#", *c))
4929 c++;
4930 if (c < d)
bd61b366 4931 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4932 else
4933 *s = '#'; /* Don't try to parse shebang line */
4934 }
774d564b 4935#endif /* ALTERNATE_SHEBANG */
748a9306 4936 if (!d &&
44a8e56a 4937 *s == '#' &&
774d564b 4938 ipathend > ipath &&
3280af22 4939 !PL_minus_c &&
748a9306 4940 !instr(s,"indir") &&
3280af22 4941 instr(PL_origargv[0],"perl"))
748a9306 4942 {
27da23d5 4943 dVAR;
9f68db38 4944 char **newargv;
9f68db38 4945
774d564b 4946 *ipathend = '\0';
4947 s = ipathend + 1;
3280af22 4948 while (s < PL_bufend && isSPACE(*s))
9f68db38 4949 s++;
3280af22 4950 if (s < PL_bufend) {
d85f917e 4951 Newx(newargv,PL_origargc+3,char*);
9f68db38 4952 newargv[1] = s;
3280af22 4953 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4954 s++;
4955 *s = '\0';
3280af22 4956 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4957 }
4958 else
3280af22 4959 newargv = PL_origargv;
774d564b 4960 newargv[0] = ipath;
b35112e7 4961 PERL_FPU_PRE_EXEC
b4748376 4962 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4963 PERL_FPU_POST_EXEC
cea2e8a9 4964 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4965 }
748a9306 4966 if (d) {
c35e046a
AL
4967 while (*d && !isSPACE(*d))
4968 d++;
4969 while (SPACE_OR_TAB(*d))
4970 d++;
748a9306
LW
4971
4972 if (*d++ == '-') {
f54cb97a 4973 const bool switches_done = PL_doswitches;
fb993905
GA
4974 const U32 oldpdb = PL_perldb;
4975 const bool oldn = PL_minus_n;
4976 const bool oldp = PL_minus_p;
c7030b81 4977 const char *d1 = d;
fb993905 4978
8cc95fdb 4979 do {
4ba71d51
FC
4980 bool baduni = FALSE;
4981 if (*d1 == 'C') {
bd0ab00d
NC
4982 const char *d2 = d1 + 1;
4983 if (parse_unicode_opts((const char **)&d2)
4984 != PL_unicode)
4985 baduni = TRUE;
4ba71d51
FC
4986 }
4987 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4988 const char * const m = d1;
4989 while (*d1 && !isSPACE(*d1))
4990 d1++;
cea2e8a9 4991 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4992 (int)(d1 - m), m);
8cc95fdb 4993 }
c7030b81
NC
4994 d1 = moreswitches(d1);
4995 } while (d1);
f0b2cf55
YST
4996 if (PL_doswitches && !switches_done) {
4997 int argc = PL_origargc;
4998 char **argv = PL_origargv;
4999 do {
5000 argc--,argv++;
5001 } while (argc && argv[0][0] == '-' && argv[0][1]);
5002 init_argv_symbols(argc,argv);
5003 }
65269a95 5004 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5005 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5006 /* if we have already added "LINE: while (<>) {",
5007 we must not do it again */
748a9306 5008 {
76f68e9b 5009 sv_setpvs(PL_linestr, "");
3280af22
NIS
5010 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5011 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5012 PL_last_lop = PL_last_uni = NULL;
3280af22 5013 PL_preambled = FALSE;
65269a95 5014 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5015 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5016 goto retry;
5017 }
a0d0e21e 5018 }
79072805 5019 }
9f68db38 5020 }
79072805 5021 }
3280af22
NIS
5022 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5023 PL_bufptr = s;
5024 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5025 return yylex();
ae986130 5026 }
378cc40b 5027 goto retry;
4fdae800 5028 case '\r':
6a27c188 5029#ifdef PERL_STRICT_CR
cea2e8a9 5030 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5031 Perl_croak(aTHX_
cc507455 5032 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5033#endif
4fdae800 5034 case ' ': case '\t': case '\f': case 013:
5db06880 5035#ifdef PERL_MAD
cd81e915 5036 PL_realtokenstart = -1;
ac372eb8
RD
5037 if (!PL_thiswhite)
5038 PL_thiswhite = newSVpvs("");
5039 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5040#endif
ac372eb8 5041 s++;
378cc40b 5042 goto retry;
378cc40b 5043 case '#':
e929a76b 5044 case '\n':
5db06880 5045#ifdef PERL_MAD
cd81e915 5046 PL_realtokenstart = -1;
5db06880 5047 if (PL_madskills)
cd81e915 5048 PL_faketokens = 0;
5db06880 5049#endif
3280af22 5050 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
5051 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5052 /* handle eval qq[#line 1 "foo"\n ...] */
5053 CopLINE_dec(PL_curcop);
5054 incline(s);
5055 }
5db06880
NC
5056 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5057 s = SKIPSPACE0(s);
5058 if (!PL_in_eval || PL_rsfp)
5059 incline(s);
5060 }
5061 else {
5062 d = s;
5063 while (d < PL_bufend && *d != '\n')
5064 d++;
5065 if (d < PL_bufend)
5066 d++;
5067 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5068 Perl_croak(aTHX_ "panic: input overflow");
5069#ifdef PERL_MAD
5070 if (PL_madskills)
cd81e915 5071 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5072#endif
5073 s = d;
5074 incline(s);
5075 }
3280af22
NIS
5076 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5077 PL_bufptr = s;
5078 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5079 return yylex();
a687059c 5080 }
378cc40b 5081 }
a687059c 5082 else {
5db06880
NC
5083#ifdef PERL_MAD
5084 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5085 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5086 PL_faketokens = 0;
5db06880
NC
5087 s = SKIPSPACE0(s);
5088 TOKEN(PEG); /* make sure any #! line is accessible */
5089 }
5090 s = SKIPSPACE0(s);
5091 }
5092 else {
5093/* if (PL_madskills && PL_lex_formbrack) { */
5094 d = s;
5095 while (d < PL_bufend && *d != '\n')
5096 d++;
5097 if (d < PL_bufend)
5098 d++;
5099 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5100 Perl_croak(aTHX_ "panic: input overflow");
5101 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5102 if (!PL_thiswhite)
6b29d1f5 5103 PL_thiswhite = newSVpvs("");
5db06880 5104 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5105 sv_setpvs(PL_thiswhite, "");
cd81e915 5106 PL_faketokens = 0;
5db06880 5107 }
cd81e915 5108 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5109 }
5110 s = d;
5111/* }
5112 *s = '\0';
5113 PL_bufend = s; */
5114 }
5115#else
378cc40b 5116 *s = '\0';
3280af22 5117 PL_bufend = s;
5db06880 5118#endif
a687059c 5119 }
378cc40b
LW
5120 goto retry;
5121 case '-':
79072805 5122 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5123 I32 ftst = 0;
90771dc0 5124 char tmp;
e5edeb50 5125
378cc40b 5126 s++;
3280af22 5127 PL_bufptr = s;
748a9306
LW
5128 tmp = *s++;
5129
bf4acbe4 5130 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5131 s++;
5132
5133 if (strnEQ(s,"=>",2)) {
3280af22 5134 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5135 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5136 OPERATOR('-'); /* unary minus */
5137 }
3280af22 5138 PL_last_uni = PL_oldbufptr;
748a9306 5139 switch (tmp) {
e5edeb50
JH
5140 case 'r': ftst = OP_FTEREAD; break;
5141 case 'w': ftst = OP_FTEWRITE; break;
5142 case 'x': ftst = OP_FTEEXEC; break;
5143 case 'o': ftst = OP_FTEOWNED; break;
5144 case 'R': ftst = OP_FTRREAD; break;
5145 case 'W': ftst = OP_FTRWRITE; break;
5146 case 'X': ftst = OP_FTREXEC; break;
5147 case 'O': ftst = OP_FTROWNED; break;
5148 case 'e': ftst = OP_FTIS; break;
5149 case 'z': ftst = OP_FTZERO; break;
5150 case 's': ftst = OP_FTSIZE; break;
5151 case 'f': ftst = OP_FTFILE; break;
5152 case 'd': ftst = OP_FTDIR; break;
5153 case 'l': ftst = OP_FTLINK; break;
5154 case 'p': ftst = OP_FTPIPE; break;
5155 case 'S': ftst = OP_FTSOCK; break;
5156 case 'u': ftst = OP_FTSUID; break;
5157 case 'g': ftst = OP_FTSGID; break;
5158 case 'k': ftst = OP_FTSVTX; break;
5159 case 'b': ftst = OP_FTBLK; break;
5160 case 'c': ftst = OP_FTCHR; break;
5161 case 't': ftst = OP_FTTTY; break;
5162 case 'T': ftst = OP_FTTEXT; break;
5163 case 'B': ftst = OP_FTBINARY; break;
5164 case 'M': case 'A': case 'C':
fafc274c 5165 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5166 switch (tmp) {
5167 case 'M': ftst = OP_FTMTIME; break;
5168 case 'A': ftst = OP_FTATIME; break;
5169 case 'C': ftst = OP_FTCTIME; break;
5170 default: break;
5171 }
5172 break;
378cc40b 5173 default:
378cc40b
LW
5174 break;
5175 }
e5edeb50 5176 if (ftst) {
eb160463 5177 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5178 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5179 "### Saw file test %c\n", (int)tmp);
5f80b19c 5180 } );
e5edeb50
JH
5181 FTST(ftst);
5182 }
5183 else {
5184 /* Assume it was a minus followed by a one-letter named
5185 * subroutine call (or a -bareword), then. */
95c31fe3 5186 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5187 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5188 (int) tmp);
5f80b19c 5189 } );
3cf7b4c4 5190 s = --PL_bufptr;
e5edeb50 5191 }
378cc40b 5192 }
90771dc0
NC
5193 {
5194 const char tmp = *s++;
5195 if (*s == tmp) {
5196 s++;
5197 if (PL_expect == XOPERATOR)
5198 TERM(POSTDEC);
5199 else
5200 OPERATOR(PREDEC);
5201 }
5202 else if (*s == '>') {
5203 s++;
29595ff2 5204 s = SKIPSPACE1(s);
90771dc0
NC
5205 if (isIDFIRST_lazy_if(s,UTF)) {
5206 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5207 TOKEN(ARROW);
5208 }
5209 else if (*s == '$')
5210 OPERATOR(ARROW);
5211 else
5212 TERM(ARROW);
5213 }
78cdf107
Z
5214 if (PL_expect == XOPERATOR) {
5215 if (*s == '=' && !PL_lex_allbrackets &&
5216 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5217 s--;
5218 TOKEN(0);
5219 }
90771dc0 5220 Aop(OP_SUBTRACT);
78cdf107 5221 }
90771dc0
NC
5222 else {
5223 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5224 check_uni();
5225 OPERATOR('-'); /* unary minus */
79072805 5226 }
2f3197b3 5227 }
79072805 5228
378cc40b 5229 case '+':
90771dc0
NC
5230 {
5231 const char tmp = *s++;
5232 if (*s == tmp) {
5233 s++;
5234 if (PL_expect == XOPERATOR)
5235 TERM(POSTINC);
5236 else
5237 OPERATOR(PREINC);
5238 }
78cdf107
Z
5239 if (PL_expect == XOPERATOR) {
5240 if (*s == '=' && !PL_lex_allbrackets &&
5241 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5242 s--;
5243 TOKEN(0);
5244 }
90771dc0 5245 Aop(OP_ADD);
78cdf107 5246 }
90771dc0
NC
5247 else {
5248 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5249 check_uni();
5250 OPERATOR('+');
5251 }
2f3197b3 5252 }
a687059c 5253
378cc40b 5254 case '*':
3280af22
NIS
5255 if (PL_expect != XOPERATOR) {
5256 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5257 PL_expect = XOPERATOR;
5258 force_ident(PL_tokenbuf, '*');
5259 if (!*PL_tokenbuf)
a0d0e21e 5260 PREREF('*');
79072805 5261 TERM('*');
a687059c 5262 }
79072805
LW
5263 s++;
5264 if (*s == '*') {
a687059c 5265 s++;
78cdf107
Z
5266 if (*s == '=' && !PL_lex_allbrackets &&
5267 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5268 s -= 2;
5269 TOKEN(0);
5270 }
79072805 5271 PWop(OP_POW);
a687059c 5272 }
78cdf107
Z
5273 if (*s == '=' && !PL_lex_allbrackets &&
5274 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5275 s--;
5276 TOKEN(0);
5277 }
79072805
LW
5278 Mop(OP_MULTIPLY);
5279
378cc40b 5280 case '%':
3280af22 5281 if (PL_expect == XOPERATOR) {
78cdf107
Z
5282 if (s[1] == '=' && !PL_lex_allbrackets &&
5283 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5284 TOKEN(0);
bbce6d69 5285 ++s;
5286 Mop(OP_MODULO);
a687059c 5287 }
3280af22 5288 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5289 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5290 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5291 if (!PL_tokenbuf[1]) {
bbce6d69 5292 PREREF('%');
a687059c 5293 }
3280af22 5294 PL_pending_ident = '%';
bbce6d69 5295 TERM('%');
a687059c 5296
378cc40b 5297 case '^':
78cdf107
Z
5298 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5299 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5300 TOKEN(0);
79072805 5301 s++;
a0d0e21e 5302 BOop(OP_BIT_XOR);
79072805 5303 case '[':
a7aaec61
Z
5304 if (PL_lex_brackets > 100)
5305 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5306 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5307 PL_lex_allbrackets++;
df3467db
IG
5308 {
5309 const char tmp = *s++;
5310 OPERATOR(tmp);
5311 }
378cc40b 5312 case '~':
0d863452 5313 if (s[1] == '~'
3e7dd34d 5314 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5315 {
78cdf107
Z
5316 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5317 TOKEN(0);
0d863452
RH
5318 s += 2;
5319 Eop(OP_SMARTMATCH);
5320 }
78cdf107
Z
5321 s++;
5322 OPERATOR('~');
378cc40b 5323 case ',':
78cdf107
Z
5324 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5325 TOKEN(0);
5326 s++;
5327 OPERATOR(',');
a0d0e21e
LW
5328 case ':':
5329 if (s[1] == ':') {
5330 len = 0;
0bfa2a8a 5331 goto just_a_word_zero_gv;
a0d0e21e
LW
5332 }
5333 s++;
09bef843
SB
5334 switch (PL_expect) {
5335 OP *attrs;
5db06880
NC
5336#ifdef PERL_MAD
5337 I32 stuffstart;
5338#endif
09bef843
SB
5339 case XOPERATOR:
5340 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5341 break;
5342 PL_bufptr = s; /* update in case we back off */
d83f38d8 5343 if (*s == '=') {
2dc78664
NC
5344 Perl_croak(aTHX_
5345 "Use of := for an empty attribute list is not allowed");
d83f38d8 5346 }
09bef843
SB
5347 goto grabattrs;
5348 case XATTRBLOCK:
5349 PL_expect = XBLOCK;
5350 goto grabattrs;
5351 case XATTRTERM:
5352 PL_expect = XTERMBLOCK;
5353 grabattrs:
5db06880
NC
5354#ifdef PERL_MAD
5355 stuffstart = s - SvPVX(PL_linestr) - 1;
5356#endif
29595ff2 5357 s = PEEKSPACE(s);
5f66b61c 5358 attrs = NULL;
7e2040f0 5359 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5360 I32 tmp;
5cc237b8 5361 SV *sv;
09bef843 5362 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5363 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5364 if (tmp < 0) tmp = -tmp;
5365 switch (tmp) {
5366 case KEY_or:
5367 case KEY_and:
5368 case KEY_for:
11baf631 5369 case KEY_foreach:
f9829d6b
GS
5370 case KEY_unless:
5371 case KEY_if:
5372 case KEY_while:
5373 case KEY_until:
5374 goto got_attrs;
5375 default:
5376 break;
5377 }
5378 }
5cc237b8 5379 sv = newSVpvn(s, len);
09bef843
SB
5380 if (*d == '(') {
5381 d = scan_str(d,TRUE,TRUE);
5382 if (!d) {
09bef843
SB
5383 /* MUST advance bufptr here to avoid bogus
5384 "at end of line" context messages from yyerror().
5385 */
5386 PL_bufptr = s + len;
5387 yyerror("Unterminated attribute parameter in attribute list");
5388 if (attrs)
5389 op_free(attrs);
5cc237b8 5390 sv_free(sv);
bbf60fe6 5391 return REPORT(0); /* EOF indicator */
09bef843
SB
5392 }
5393 }
5394 if (PL_lex_stuff) {
09bef843 5395 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5396 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5397 newSVOP(OP_CONST, 0, sv));
5398 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5399 PL_lex_stuff = NULL;
09bef843
SB
5400 }
5401 else {
5cc237b8
BS
5402 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5403 sv_free(sv);
1108974d 5404 if (PL_in_my == KEY_our) {
df9a6019 5405 deprecate(":unique");
1108974d 5406 }
bfed75c6 5407 else
371fce9b
DM
5408 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5409 }
5410
d3cea301
SB
5411 /* NOTE: any CV attrs applied here need to be part of
5412 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5413 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5414 sv_free(sv);
78f9721b 5415 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5416 }
5417 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5418 sv_free(sv);
8e5dadda 5419 deprecate(":locked");
5cc237b8
BS
5420 }
5421 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5422 sv_free(sv);
78f9721b 5423 CvMETHOD_on(PL_compcv);
5cc237b8 5424 }
78f9721b
SM
5425 /* After we've set the flags, it could be argued that
5426 we don't need to do the attributes.pm-based setting
5427 process, and shouldn't bother appending recognized
d3cea301
SB
5428 flags. To experiment with that, uncomment the
5429 following "else". (Note that's already been
5430 uncommented. That keeps the above-applied built-in
5431 attributes from being intercepted (and possibly
5432 rejected) by a package's attribute routines, but is
5433 justified by the performance win for the common case
5434 of applying only built-in attributes.) */
0256094b 5435 else
2fcb4757 5436 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5437 newSVOP(OP_CONST, 0,
5cc237b8 5438 sv));
09bef843 5439 }
29595ff2 5440 s = PEEKSPACE(d);
0120eecf 5441 if (*s == ':' && s[1] != ':')
29595ff2 5442 s = PEEKSPACE(s+1);
0120eecf
GS
5443 else if (s == d)
5444 break; /* require real whitespace or :'s */
29595ff2 5445 /* XXX losing whitespace on sequential attributes here */
09bef843 5446 }
90771dc0
NC
5447 {
5448 const char tmp
5449 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5450 if (*s != ';' && *s != '}' && *s != tmp
5451 && (tmp != '=' || *s != ')')) {
5452 const char q = ((*s == '\'') ? '"' : '\'');
5453 /* If here for an expression, and parsed no attrs, back
5454 off. */
5455 if (tmp == '=' && !attrs) {
5456 s = PL_bufptr;
5457 break;
5458 }
5459 /* MUST advance bufptr here to avoid bogus "at end of line"
5460 context messages from yyerror().
5461 */
5462 PL_bufptr = s;
10edeb5d
JH
5463 yyerror( (const char *)
5464 (*s
5465 ? Perl_form(aTHX_ "Invalid separator character "
5466 "%c%c%c in attribute list", q, *s, q)
5467 : "Unterminated attribute list" ) );
90771dc0
NC
5468 if (attrs)
5469 op_free(attrs);
5470 OPERATOR(':');
09bef843 5471 }
09bef843 5472 }
f9829d6b 5473 got_attrs:
09bef843 5474 if (attrs) {
cd81e915 5475 start_force(PL_curforce);
9ded7720 5476 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5477 CURMAD('_', PL_nextwhite);
89122651 5478 force_next(THING);
5db06880
NC
5479 }
5480#ifdef PERL_MAD
5481 if (PL_madskills) {
cd81e915 5482 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5483 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5484 }
5db06880 5485#endif
09bef843
SB
5486 TOKEN(COLONATTR);
5487 }
78cdf107
Z
5488 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5489 s--;
5490 TOKEN(0);
5491 }
5492 PL_lex_allbrackets--;
a0d0e21e 5493 OPERATOR(':');
8990e307
LW
5494 case '(':
5495 s++;
3280af22
NIS
5496 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5497 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5498 else
3280af22 5499 PL_expect = XTERM;
29595ff2 5500 s = SKIPSPACE1(s);
78cdf107 5501 PL_lex_allbrackets++;
a0d0e21e 5502 TOKEN('(');
378cc40b 5503 case ';':
78cdf107
Z
5504 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5505 TOKEN(0);
f4dd75d9 5506 CLINE;
78cdf107
Z
5507 s++;
5508 OPERATOR(';');
378cc40b 5509 case ')':
78cdf107
Z
5510 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5511 TOKEN(0);
5512 s++;
5513 PL_lex_allbrackets--;
5514 s = SKIPSPACE1(s);
5515 if (*s == '{')
5516 PREBLOCK(')');
5517 TERM(')');
79072805 5518 case ']':
a7aaec61
Z
5519 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5520 TOKEN(0);
79072805 5521 s++;
3280af22 5522 if (PL_lex_brackets <= 0)
d98d5fff 5523 yyerror("Unmatched right square bracket");
463ee0b2 5524 else
3280af22 5525 --PL_lex_brackets;
78cdf107 5526 PL_lex_allbrackets--;
3280af22
NIS
5527 if (PL_lex_state == LEX_INTERPNORMAL) {
5528 if (PL_lex_brackets == 0) {
02255c60
FC
5529 if (*s == '-' && s[1] == '>')
5530 PL_lex_state = LEX_INTERPENDMAYBE;
5531 else if (*s != '[' && *s != '{')
3280af22 5532 PL_lex_state = LEX_INTERPEND;
79072805
LW
5533 }
5534 }
4633a7c4 5535 TERM(']');
79072805
LW
5536 case '{':
5537 leftbracket:
79072805 5538 s++;
3280af22 5539 if (PL_lex_brackets > 100) {
8edd5f42 5540 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5541 }
3280af22 5542 switch (PL_expect) {
a0d0e21e 5543 case XTERM:
3280af22 5544 if (PL_lex_formbrack) {
a0d0e21e
LW
5545 s--;
5546 PRETERMBLOCK(DO);
5547 }
3280af22
NIS
5548 if (PL_oldoldbufptr == PL_last_lop)
5549 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5550 else
3280af22 5551 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5552 PL_lex_allbrackets++;
79072805 5553 OPERATOR(HASHBRACK);
a0d0e21e 5554 case XOPERATOR:
bf4acbe4 5555 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5556 s++;
44a8e56a 5557 d = s;
3280af22
NIS
5558 PL_tokenbuf[0] = '\0';
5559 if (d < PL_bufend && *d == '-') {
5560 PL_tokenbuf[0] = '-';
44a8e56a 5561 d++;
bf4acbe4 5562 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5563 d++;
5564 }
7e2040f0 5565 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5566 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5567 FALSE, &len);
bf4acbe4 5568 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5569 d++;
5570 if (*d == '}') {
f54cb97a 5571 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5572 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5573 if (minus)
5574 force_next('-');
748a9306
LW
5575 }
5576 }
5577 /* FALL THROUGH */
09bef843 5578 case XATTRBLOCK:
748a9306 5579 case XBLOCK:
3280af22 5580 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5581 PL_lex_allbrackets++;
3280af22 5582 PL_expect = XSTATE;
a0d0e21e 5583 break;
09bef843 5584 case XATTRTERM:
a0d0e21e 5585 case XTERMBLOCK:
3280af22 5586 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5587 PL_lex_allbrackets++;
3280af22 5588 PL_expect = XSTATE;
a0d0e21e
LW
5589 break;
5590 default: {
f54cb97a 5591 const char *t;
3280af22
NIS
5592 if (PL_oldoldbufptr == PL_last_lop)
5593 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5594 else
3280af22 5595 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5596 PL_lex_allbrackets++;
29595ff2 5597 s = SKIPSPACE1(s);
8452ff4b
SB
5598 if (*s == '}') {
5599 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5600 PL_expect = XTERM;
5601 /* This hack is to get the ${} in the message. */
5602 PL_bufptr = s+1;
5603 yyerror("syntax error");
5604 break;
5605 }
a0d0e21e 5606 OPERATOR(HASHBRACK);
8452ff4b 5607 }
b8a4b1be
GS
5608 /* This hack serves to disambiguate a pair of curlies
5609 * as being a block or an anon hash. Normally, expectation
5610 * determines that, but in cases where we're not in a
5611 * position to expect anything in particular (like inside
5612 * eval"") we have to resolve the ambiguity. This code
5613 * covers the case where the first term in the curlies is a
5614 * quoted string. Most other cases need to be explicitly
a0288114 5615 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5616 * curly in order to force resolution as an anon hash.
5617 *
5618 * XXX should probably propagate the outer expectation
5619 * into eval"" to rely less on this hack, but that could
5620 * potentially break current behavior of eval"".
5621 * GSAR 97-07-21
5622 */
5623 t = s;
5624 if (*s == '\'' || *s == '"' || *s == '`') {
5625 /* common case: get past first string, handling escapes */
3280af22 5626 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5627 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5628 t++;
5629 t++;
a0d0e21e 5630 }
b8a4b1be 5631 else if (*s == 'q') {
3280af22 5632 if (++t < PL_bufend
b8a4b1be 5633 && (!isALNUM(*t)
3280af22 5634 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5635 && !isALNUM(*t))))
5636 {
abc667d1 5637 /* skip q//-like construct */
f54cb97a 5638 const char *tmps;
b8a4b1be
GS
5639 char open, close, term;
5640 I32 brackets = 1;
5641
3280af22 5642 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5643 t++;
abc667d1
DM
5644 /* check for q => */
5645 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5646 OPERATOR(HASHBRACK);
5647 }
b8a4b1be
GS
5648 term = *t;
5649 open = term;
5650 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5651 term = tmps[5];
5652 close = term;
5653 if (open == close)
3280af22
NIS
5654 for (t++; t < PL_bufend; t++) {
5655 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5656 t++;
6d07e5e9 5657 else if (*t == open)
b8a4b1be
GS
5658 break;
5659 }
abc667d1 5660 else {
3280af22
NIS
5661 for (t++; t < PL_bufend; t++) {
5662 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5663 t++;
6d07e5e9 5664 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5665 break;
5666 else if (*t == open)
5667 brackets++;
5668 }
abc667d1
DM
5669 }
5670 t++;
b8a4b1be 5671 }
abc667d1
DM
5672 else
5673 /* skip plain q word */
5674 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5675 t += UTF8SKIP(t);
a0d0e21e 5676 }
7e2040f0 5677 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5678 t += UTF8SKIP(t);
7e2040f0 5679 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5680 t += UTF8SKIP(t);
a0d0e21e 5681 }
3280af22 5682 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5683 t++;
b8a4b1be
GS
5684 /* if comma follows first term, call it an anon hash */
5685 /* XXX it could be a comma expression with loop modifiers */
3280af22 5686 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5687 || (*t == '=' && t[1] == '>')))
a0d0e21e 5688 OPERATOR(HASHBRACK);
3280af22 5689 if (PL_expect == XREF)
4e4e412b 5690 PL_expect = XTERM;
a0d0e21e 5691 else {
3280af22
NIS
5692 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5693 PL_expect = XSTATE;
a0d0e21e 5694 }
8990e307 5695 }
a0d0e21e 5696 break;
463ee0b2 5697 }
6154021b 5698 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5699 if (isSPACE(*s) || *s == '#')
3280af22 5700 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5701 TOKEN('{');
378cc40b 5702 case '}':
a7aaec61
Z
5703 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5704 TOKEN(0);
79072805
LW
5705 rightbracket:
5706 s++;
3280af22 5707 if (PL_lex_brackets <= 0)
d98d5fff 5708 yyerror("Unmatched right curly bracket");
463ee0b2 5709 else
3280af22 5710 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5711 PL_lex_allbrackets--;
c2e66d9e 5712 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5713 PL_lex_formbrack = 0;
5714 if (PL_lex_state == LEX_INTERPNORMAL) {
5715 if (PL_lex_brackets == 0) {
9059aa12
LW
5716 if (PL_expect & XFAKEBRACK) {
5717 PL_expect &= XENUMMASK;
3280af22
NIS
5718 PL_lex_state = LEX_INTERPEND;
5719 PL_bufptr = s;
5db06880
NC
5720#if 0
5721 if (PL_madskills) {
cd81e915 5722 if (!PL_thiswhite)
6b29d1f5 5723 PL_thiswhite = newSVpvs("");
76f68e9b 5724 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5725 }
5726#endif
cea2e8a9 5727 return yylex(); /* ignore fake brackets */
79072805 5728 }
fa83b5b6 5729 if (*s == '-' && s[1] == '>')
3280af22 5730 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5731 else if (*s != '[' && *s != '{')
3280af22 5732 PL_lex_state = LEX_INTERPEND;
79072805
LW
5733 }
5734 }
9059aa12
LW
5735 if (PL_expect & XFAKEBRACK) {
5736 PL_expect &= XENUMMASK;
3280af22 5737 PL_bufptr = s;
cea2e8a9 5738 return yylex(); /* ignore fake brackets */
748a9306 5739 }
cd81e915 5740 start_force(PL_curforce);
5db06880
NC
5741 if (PL_madskills) {
5742 curmad('X', newSVpvn(s-1,1));
cd81e915 5743 CURMAD('_', PL_thiswhite);
5db06880 5744 }
79072805 5745 force_next('}');
5db06880 5746#ifdef PERL_MAD
cd81e915 5747 if (!PL_thistoken)
6b29d1f5 5748 PL_thistoken = newSVpvs("");
5db06880 5749#endif
79072805 5750 TOKEN(';');
378cc40b
LW
5751 case '&':
5752 s++;
78cdf107
Z
5753 if (*s++ == '&') {
5754 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5755 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5756 s -= 2;
5757 TOKEN(0);
5758 }
a0d0e21e 5759 AOPERATOR(ANDAND);
78cdf107 5760 }
378cc40b 5761 s--;
3280af22 5762 if (PL_expect == XOPERATOR) {
041457d9
DM
5763 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5764 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5765 {
57843af0 5766 CopLINE_dec(PL_curcop);
f1f66076 5767 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5768 CopLINE_inc(PL_curcop);
463ee0b2 5769 }
78cdf107
Z
5770 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5771 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5772 s--;
5773 TOKEN(0);
5774 }
79072805 5775 BAop(OP_BIT_AND);
463ee0b2 5776 }
79072805 5777
3280af22
NIS
5778 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5779 if (*PL_tokenbuf) {
5780 PL_expect = XOPERATOR;
5781 force_ident(PL_tokenbuf, '&');
463ee0b2 5782 }
79072805
LW
5783 else
5784 PREREF('&');
6154021b 5785 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5786 TERM('&');
5787
378cc40b
LW
5788 case '|':
5789 s++;
78cdf107
Z
5790 if (*s++ == '|') {
5791 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5792 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5793 s -= 2;
5794 TOKEN(0);
5795 }
a0d0e21e 5796 AOPERATOR(OROR);
78cdf107 5797 }
378cc40b 5798 s--;
78cdf107
Z
5799 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5800 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5801 s--;
5802 TOKEN(0);
5803 }
79072805 5804 BOop(OP_BIT_OR);
378cc40b
LW
5805 case '=':
5806 s++;
748a9306 5807 {
90771dc0 5808 const char tmp = *s++;
78cdf107
Z
5809 if (tmp == '=') {
5810 if (!PL_lex_allbrackets &&
5811 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5812 s -= 2;
5813 TOKEN(0);
5814 }
90771dc0 5815 Eop(OP_EQ);
78cdf107
Z
5816 }
5817 if (tmp == '>') {
5818 if (!PL_lex_allbrackets &&
5819 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5820 s -= 2;
5821 TOKEN(0);
5822 }
90771dc0 5823 OPERATOR(',');
78cdf107 5824 }
90771dc0
NC
5825 if (tmp == '~')
5826 PMop(OP_MATCH);
5827 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5828 && strchr("+-*/%.^&|<",tmp))
5829 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5830 "Reversed %c= operator",(int)tmp);
5831 s--;
5832 if (PL_expect == XSTATE && isALPHA(tmp) &&
5833 (s == PL_linestart+1 || s[-2] == '\n') )
5834 {
5835 if (PL_in_eval && !PL_rsfp) {
5836 d = PL_bufend;
5837 while (s < d) {
5838 if (*s++ == '\n') {
5839 incline(s);
5840 if (strnEQ(s,"=cut",4)) {
5841 s = strchr(s,'\n');
5842 if (s)
5843 s++;
5844 else
5845 s = d;
5846 incline(s);
5847 goto retry;
5848 }
5849 }
a5f75d66 5850 }
90771dc0 5851 goto retry;
a5f75d66 5852 }
5db06880
NC
5853#ifdef PERL_MAD
5854 if (PL_madskills) {
cd81e915 5855 if (!PL_thiswhite)
6b29d1f5 5856 PL_thiswhite = newSVpvs("");
cd81e915 5857 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5858 PL_bufend - PL_linestart);
5859 }
5860#endif
90771dc0 5861 s = PL_bufend;
737c24fc 5862 PL_parser->in_pod = 1;
90771dc0 5863 goto retry;
a5f75d66 5864 }
a0d0e21e 5865 }
3280af22 5866 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5867 const char *t = s;
51882d45 5868#ifdef PERL_STRICT_CR
c35e046a 5869 while (SPACE_OR_TAB(*t))
51882d45 5870#else
c35e046a 5871 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5872#endif
c35e046a 5873 t++;
a0d0e21e
LW
5874 if (*t == '\n' || *t == '#') {
5875 s--;
3280af22 5876 PL_expect = XBLOCK;
a0d0e21e
LW
5877 goto leftbracket;
5878 }
79072805 5879 }
78cdf107
Z
5880 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5881 s--;
5882 TOKEN(0);
5883 }
6154021b 5884 pl_yylval.ival = 0;
a0d0e21e 5885 OPERATOR(ASSIGNOP);
378cc40b
LW
5886 case '!':
5887 s++;
90771dc0
NC
5888 {
5889 const char tmp = *s++;
5890 if (tmp == '=') {
5891 /* was this !=~ where !~ was meant?
5892 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5893
5894 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5895 const char *t = s+1;
5896
5897 while (t < PL_bufend && isSPACE(*t))
5898 ++t;
5899
5900 if (*t == '/' || *t == '?' ||
5901 ((*t == 'm' || *t == 's' || *t == 'y')
5902 && !isALNUM(t[1])) ||
5903 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5904 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5905 "!=~ should be !~");
5906 }
78cdf107
Z
5907 if (!PL_lex_allbrackets &&
5908 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5909 s -= 2;
5910 TOKEN(0);
5911 }
90771dc0
NC
5912 Eop(OP_NE);
5913 }
5914 if (tmp == '~')
5915 PMop(OP_NOT);
5916 }
378cc40b
LW
5917 s--;
5918 OPERATOR('!');
5919 case '<':
3280af22 5920 if (PL_expect != XOPERATOR) {
93a17b20 5921 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5922 check_uni();
79072805
LW
5923 if (s[1] == '<')
5924 s = scan_heredoc(s);
5925 else
5926 s = scan_inputsymbol(s);
5927 TERM(sublex_start());
378cc40b
LW
5928 }
5929 s++;
90771dc0
NC
5930 {
5931 char tmp = *s++;
78cdf107
Z
5932 if (tmp == '<') {
5933 if (*s == '=' && !PL_lex_allbrackets &&
5934 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5935 s -= 2;
5936 TOKEN(0);
5937 }
90771dc0 5938 SHop(OP_LEFT_SHIFT);
78cdf107 5939 }
90771dc0
NC
5940 if (tmp == '=') {
5941 tmp = *s++;
78cdf107
Z
5942 if (tmp == '>') {
5943 if (!PL_lex_allbrackets &&
5944 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5945 s -= 3;
5946 TOKEN(0);
5947 }
90771dc0 5948 Eop(OP_NCMP);
78cdf107 5949 }
90771dc0 5950 s--;
78cdf107
Z
5951 if (!PL_lex_allbrackets &&
5952 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5953 s -= 2;
5954 TOKEN(0);
5955 }
90771dc0
NC
5956 Rop(OP_LE);
5957 }
395c3793 5958 }
378cc40b 5959 s--;
78cdf107
Z
5960 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5961 s--;
5962 TOKEN(0);
5963 }
79072805 5964 Rop(OP_LT);
378cc40b
LW
5965 case '>':
5966 s++;
90771dc0
NC
5967 {
5968 const char tmp = *s++;
78cdf107
Z
5969 if (tmp == '>') {
5970 if (*s == '=' && !PL_lex_allbrackets &&
5971 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5972 s -= 2;
5973 TOKEN(0);
5974 }
90771dc0 5975 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
5976 }
5977 else if (tmp == '=') {
5978 if (!PL_lex_allbrackets &&
5979 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5980 s -= 2;
5981 TOKEN(0);
5982 }
90771dc0 5983 Rop(OP_GE);
78cdf107 5984 }
90771dc0 5985 }
378cc40b 5986 s--;
78cdf107
Z
5987 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5988 s--;
5989 TOKEN(0);
5990 }
79072805 5991 Rop(OP_GT);
378cc40b
LW
5992
5993 case '$':
bbce6d69 5994 CLINE;
5995
3280af22
NIS
5996 if (PL_expect == XOPERATOR) {
5997 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5998 return deprecate_commaless_var_list();
a0d0e21e 5999 }
8990e307 6000 }
a0d0e21e 6001
c0b977fd 6002 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6003 PL_tokenbuf[0] = '@';
376b8730
SM
6004 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6005 sizeof PL_tokenbuf - 1, FALSE);
6006 if (PL_expect == XOPERATOR)
6007 no_op("Array length", s);
3280af22 6008 if (!PL_tokenbuf[1])
a0d0e21e 6009 PREREF(DOLSHARP);
3280af22
NIS
6010 PL_expect = XOPERATOR;
6011 PL_pending_ident = '#';
463ee0b2 6012 TOKEN(DOLSHARP);
79072805 6013 }
bbce6d69 6014
3280af22 6015 PL_tokenbuf[0] = '$';
376b8730
SM
6016 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6017 sizeof PL_tokenbuf - 1, FALSE);
6018 if (PL_expect == XOPERATOR)
6019 no_op("Scalar", s);
3280af22
NIS
6020 if (!PL_tokenbuf[1]) {
6021 if (s == PL_bufend)
bbce6d69 6022 yyerror("Final $ should be \\$ or $name");
6023 PREREF('$');
8990e307 6024 }
a0d0e21e 6025
bbce6d69 6026 /* This kludge not intended to be bulletproof. */
3280af22 6027 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 6028 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 6029 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 6030 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 6031 TERM(THING);
6032 }
6033
ff68c719 6034 d = s;
90771dc0
NC
6035 {
6036 const char tmp = *s;
ae28bb2a 6037 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6038 s = SKIPSPACE1(s);
ff68c719 6039
90771dc0
NC
6040 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6041 && intuit_more(s)) {
6042 if (*s == '[') {
6043 PL_tokenbuf[0] = '@';
6044 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6045 char *t = s+1;
6046
6047 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6048 t++;
90771dc0 6049 if (*t++ == ',') {
29595ff2 6050 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6051 while (t < PL_bufend && *t != ']')
6052 t++;
9014280d 6053 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6054 "Multidimensional syntax %.*s not supported",
36c7798d 6055 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6056 }
748a9306 6057 }
93a17b20 6058 }
90771dc0
NC
6059 else if (*s == '{') {
6060 char *t;
6061 PL_tokenbuf[0] = '%';
6062 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6063 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6064 {
6065 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6066 do {
6067 t++;
6068 } while (isSPACE(*t));
90771dc0 6069 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6070 STRLEN len;
90771dc0 6071 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6072 &len);
c35e046a
AL
6073 while (isSPACE(*t))
6074 t++;
780a5241 6075 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6076 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6077 "You need to quote \"%s\"",
6078 tmpbuf);
6079 }
6080 }
6081 }
93a17b20 6082 }
bbce6d69 6083
90771dc0
NC
6084 PL_expect = XOPERATOR;
6085 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6086 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6087 if (!islop || PL_last_lop_op == OP_GREPSTART)
6088 PL_expect = XOPERATOR;
6089 else if (strchr("$@\"'`q", *s))
6090 PL_expect = XTERM; /* e.g. print $fh "foo" */
6091 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6092 PL_expect = XTERM; /* e.g. print $fh &sub */
6093 else if (isIDFIRST_lazy_if(s,UTF)) {
6094 char tmpbuf[sizeof PL_tokenbuf];
6095 int t2;
6096 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6097 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6098 /* binary operators exclude handle interpretations */
6099 switch (t2) {
6100 case -KEY_x:
6101 case -KEY_eq:
6102 case -KEY_ne:
6103 case -KEY_gt:
6104 case -KEY_lt:
6105 case -KEY_ge:
6106 case -KEY_le:
6107 case -KEY_cmp:
6108 break;
6109 default:
6110 PL_expect = XTERM; /* e.g. print $fh length() */
6111 break;
6112 }
6113 }
6114 else {
6115 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6116 }
6117 }
90771dc0
NC
6118 else if (isDIGIT(*s))
6119 PL_expect = XTERM; /* e.g. print $fh 3 */
6120 else if (*s == '.' && isDIGIT(s[1]))
6121 PL_expect = XTERM; /* e.g. print $fh .3 */
6122 else if ((*s == '?' || *s == '-' || *s == '+')
6123 && !isSPACE(s[1]) && s[1] != '=')
6124 PL_expect = XTERM; /* e.g. print $fh -1 */
6125 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6126 && s[1] != '/')
6127 PL_expect = XTERM; /* e.g. print $fh /.../
6128 XXX except DORDOR operator
6129 */
6130 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6131 && s[2] != '=')
6132 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6133 }
bbce6d69 6134 }
3280af22 6135 PL_pending_ident = '$';
79072805 6136 TOKEN('$');
378cc40b
LW
6137
6138 case '@':
3280af22 6139 if (PL_expect == XOPERATOR)
bbce6d69 6140 no_op("Array", s);
3280af22
NIS
6141 PL_tokenbuf[0] = '@';
6142 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6143 if (!PL_tokenbuf[1]) {
bbce6d69 6144 PREREF('@');
6145 }
3280af22 6146 if (PL_lex_state == LEX_NORMAL)
29595ff2 6147 s = SKIPSPACE1(s);
3280af22 6148 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6149 if (*s == '{')
3280af22 6150 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6151
6152 /* Warn about @ where they meant $. */
041457d9
DM
6153 if (*s == '[' || *s == '{') {
6154 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6155 const char *t = s + 1;
7e2040f0 6156 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6157 t++;
6158 if (*t == '}' || *t == ']') {
6159 t++;
29595ff2 6160 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 6161 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6162 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6163 (int)(t-PL_bufptr), PL_bufptr,
6164 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6165 }
93a17b20
LW
6166 }
6167 }
463ee0b2 6168 }
3280af22 6169 PL_pending_ident = '@';
79072805 6170 TERM('@');
378cc40b 6171
c963b151 6172 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6173 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6174 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6175 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6176 TOKEN(0);
6f33ba73
RGS
6177 s += 2;
6178 AOPERATOR(DORDOR);
6179 }
c963b151 6180 case '?': /* may either be conditional or pattern */
be25f609 6181 if (PL_expect == XOPERATOR) {
90771dc0 6182 char tmp = *s++;
c963b151 6183 if(tmp == '?') {
78cdf107
Z
6184 if (!PL_lex_allbrackets &&
6185 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6186 s--;
6187 TOKEN(0);
6188 }
6189 PL_lex_allbrackets++;
be25f609 6190 OPERATOR('?');
c963b151
BD
6191 }
6192 else {
6193 tmp = *s++;
6194 if(tmp == '/') {
6195 /* A // operator. */
78cdf107
Z
6196 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6197 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6198 LEX_FAKEEOF_LOGIC)) {
6199 s -= 2;
6200 TOKEN(0);
6201 }
c963b151
BD
6202 AOPERATOR(DORDOR);
6203 }
6204 else {
6205 s--;
78cdf107
Z
6206 if (*s == '=' && !PL_lex_allbrackets &&
6207 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6208 s--;
6209 TOKEN(0);
6210 }
c963b151
BD
6211 Mop(OP_DIVIDE);
6212 }
6213 }
6214 }
6215 else {
6216 /* Disable warning on "study /blah/" */
6217 if (PL_oldoldbufptr == PL_last_uni
6218 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6219 || memNE(PL_last_uni, "study", 5)
6220 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6221 ))
6222 check_uni();
725a61d7
Z
6223 if (*s == '?')
6224 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6225 s = scan_pat(s,OP_MATCH);
6226 TERM(sublex_start());
6227 }
378cc40b
LW
6228
6229 case '.':
51882d45
GS
6230 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6231#ifdef PERL_STRICT_CR
6232 && s[1] == '\n'
6233#else
6234 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6235#endif
6236 && (s == PL_linestart || s[-1] == '\n') )
6237 {
3280af22
NIS
6238 PL_lex_formbrack = 0;
6239 PL_expect = XSTATE;
79072805
LW
6240 goto rightbracket;
6241 }
be25f609 6242 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6243 s += 3;
6244 OPERATOR(YADAYADA);
6245 }
3280af22 6246 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6247 char tmp = *s++;
a687059c 6248 if (*s == tmp) {
78cdf107
Z
6249 if (!PL_lex_allbrackets &&
6250 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6251 s--;
6252 TOKEN(0);
6253 }
a687059c 6254 s++;
2f3197b3
LW
6255 if (*s == tmp) {
6256 s++;
6154021b 6257 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6258 }
6259 else
6154021b 6260 pl_yylval.ival = 0;
378cc40b 6261 OPERATOR(DOTDOT);
a687059c 6262 }
78cdf107
Z
6263 if (*s == '=' && !PL_lex_allbrackets &&
6264 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6265 s--;
6266 TOKEN(0);
6267 }
79072805 6268 Aop(OP_CONCAT);
378cc40b
LW
6269 }
6270 /* FALL THROUGH */
6271 case '0': case '1': case '2': case '3': case '4':
6272 case '5': case '6': case '7': case '8': case '9':
6154021b 6273 s = scan_num(s, &pl_yylval);
931e0695 6274 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6275 if (PL_expect == XOPERATOR)
8990e307 6276 no_op("Number",s);
79072805
LW
6277 TERM(THING);
6278
6279 case '\'':
5db06880 6280 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6281 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6282 if (PL_expect == XOPERATOR) {
6283 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6284 return deprecate_commaless_var_list();
a0d0e21e 6285 }
463ee0b2 6286 else
8990e307 6287 no_op("String",s);
463ee0b2 6288 }
79072805 6289 if (!s)
d4c19fe8 6290 missingterm(NULL);
6154021b 6291 pl_yylval.ival = OP_CONST;
79072805
LW
6292 TERM(sublex_start());
6293
6294 case '"':
5db06880 6295 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6296 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6297 if (PL_expect == XOPERATOR) {
6298 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6299 return deprecate_commaless_var_list();
a0d0e21e 6300 }
463ee0b2 6301 else
8990e307 6302 no_op("String",s);
463ee0b2 6303 }
79072805 6304 if (!s)
d4c19fe8 6305 missingterm(NULL);
6154021b 6306 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6307 /* FIXME. I think that this can be const if char *d is replaced by
6308 more localised variables. */
3280af22 6309 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6310 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6311 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6312 break;
6313 }
6314 }
79072805
LW
6315 TERM(sublex_start());
6316
6317 case '`':
5db06880 6318 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6319 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6320 if (PL_expect == XOPERATOR)
8990e307 6321 no_op("Backticks",s);
79072805 6322 if (!s)
d4c19fe8 6323 missingterm(NULL);
9b201d7d 6324 readpipe_override();
79072805
LW
6325 TERM(sublex_start());
6326
6327 case '\\':
6328 s++;
a2a5de95
NC
6329 if (PL_lex_inwhat && isDIGIT(*s))
6330 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6331 *s, *s);
3280af22 6332 if (PL_expect == XOPERATOR)
8990e307 6333 no_op("Backslash",s);
79072805
LW
6334 OPERATOR(REFGEN);
6335
a7cb1f99 6336 case 'v':
e526c9e6 6337 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6338 char *start = s + 2;
dd629d5b 6339 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6340 start++;
6341 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6342 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6343 TERM(THING);
6344 }
e526c9e6 6345 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6346 else if (!isALPHA(*start) && (PL_expect == XTERM
6347 || PL_expect == XREF || PL_expect == XSTATE
6348 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6349 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6350 if (!gv) {
6154021b 6351 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6352 TERM(THING);
6353 }
6354 }
a7cb1f99
GS
6355 }
6356 goto keylookup;
79072805 6357 case 'x':
3280af22 6358 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6359 s++;
6360 Mop(OP_REPEAT);
2f3197b3 6361 }
79072805
LW
6362 goto keylookup;
6363
378cc40b 6364 case '_':
79072805
LW
6365 case 'a': case 'A':
6366 case 'b': case 'B':
6367 case 'c': case 'C':
6368 case 'd': case 'D':
6369 case 'e': case 'E':
6370 case 'f': case 'F':
6371 case 'g': case 'G':
6372 case 'h': case 'H':
6373 case 'i': case 'I':
6374 case 'j': case 'J':
6375 case 'k': case 'K':
6376 case 'l': case 'L':
6377 case 'm': case 'M':
6378 case 'n': case 'N':
6379 case 'o': case 'O':
6380 case 'p': case 'P':
6381 case 'q': case 'Q':
6382 case 'r': case 'R':
6383 case 's': case 'S':
6384 case 't': case 'T':
6385 case 'u': case 'U':
a7cb1f99 6386 case 'V':
79072805
LW
6387 case 'w': case 'W':
6388 case 'X':
6389 case 'y': case 'Y':
6390 case 'z': case 'Z':
6391
49dc05e3 6392 keylookup: {
88e1f1a2 6393 bool anydelim;
90771dc0 6394 I32 tmp;
10edeb5d
JH
6395
6396 orig_keyword = 0;
6397 gv = NULL;
6398 gvp = NULL;
49dc05e3 6399
3280af22
NIS
6400 PL_bufptr = s;
6401 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6402
6403 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6404 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6405
6406 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6407 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6408 goto just_a_word;
6409
3643fb5f 6410 d = s;
3280af22 6411 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6412 d++; /* no comments skipped here, or s### is misparsed */
6413
748a9306 6414 /* Is this a word before a => operator? */
1c3923b3 6415 if (*d == '=' && d[1] == '>') {
748a9306 6416 CLINE;
6154021b 6417 pl_yylval.opval
d0a148a6
NC
6418 = (OP*)newSVOP(OP_CONST, 0,
6419 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6420 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6421 TERM(WORD);
6422 }
6423
88e1f1a2
JV
6424 /* Check for plugged-in keyword */
6425 {
6426 OP *o;
6427 int result;
6428 char *saved_bufptr = PL_bufptr;
6429 PL_bufptr = s;
16c91539 6430 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6431 s = PL_bufptr;
6432 if (result == KEYWORD_PLUGIN_DECLINE) {
6433 /* not a plugged-in keyword */
6434 PL_bufptr = saved_bufptr;
6435 } else if (result == KEYWORD_PLUGIN_STMT) {
6436 pl_yylval.opval = o;
6437 CLINE;
6438 PL_expect = XSTATE;
6439 return REPORT(PLUGSTMT);
6440 } else if (result == KEYWORD_PLUGIN_EXPR) {
6441 pl_yylval.opval = o;
6442 CLINE;
6443 PL_expect = XOPERATOR;
6444 return REPORT(PLUGEXPR);
6445 } else {
6446 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6447 PL_tokenbuf);
6448 }
6449 }
6450
6451 /* Check for built-in keyword */
6452 tmp = keyword(PL_tokenbuf, len, 0);
6453
6454 /* Is this a label? */
6455 if (!anydelim && PL_expect == XSTATE
6456 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6457 s = d + 1;
6458 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6459 CLINE;
6460 TOKEN(LABEL);
6461 }
6462
a0d0e21e 6463 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6464 GV *ogv = NULL; /* override (winner) */
6465 GV *hgv = NULL; /* hidden (loser) */
3280af22 6466 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6467 CV *cv;
90e5519e 6468 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6469 (cv = GvCVu(gv)))
6470 {
6471 if (GvIMPORTED_CV(gv))
6472 ogv = gv;
6473 else if (! CvMETHOD(cv))
6474 hgv = gv;
6475 }
6476 if (!ogv &&
3280af22 6477 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6478 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6479 GvCVu(gv) && GvIMPORTED_CV(gv))
6480 {
6481 ogv = gv;
6482 }
6483 }
6484 if (ogv) {
30fe34ed 6485 orig_keyword = tmp;
56f7f34b 6486 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6487 }
6488 else if (gv && !gvp
6489 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6490 && GvCVu(gv))
6e7b2336
GS
6491 {
6492 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6493 }
56f7f34b
CS
6494 else { /* no override */
6495 tmp = -tmp;
a2a5de95
NC
6496 if (tmp == KEY_dump) {
6497 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6498 "dump() better written as CORE::dump()");
ac206dc8 6499 }
a0714e2c 6500 gv = NULL;
56f7f34b 6501 gvp = 0;
a2a5de95
NC
6502 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6503 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6504 "Ambiguous call resolved as CORE::%s(), "
6505 "qualify as such or use &",
6506 GvENAME(hgv));
49dc05e3 6507 }
a0d0e21e
LW
6508 }
6509
6510 reserved_word:
6511 switch (tmp) {
79072805
LW
6512
6513 default: /* not a keyword */
0bfa2a8a
NC
6514 /* Trade off - by using this evil construction we can pull the
6515 variable gv into the block labelled keylookup. If not, then
6516 we have to give it function scope so that the goto from the
6517 earlier ':' case doesn't bypass the initialisation. */
6518 if (0) {
6519 just_a_word_zero_gv:
6520 gv = NULL;
6521 gvp = NULL;
8bee0991 6522 orig_keyword = 0;
0bfa2a8a 6523 }
93a17b20 6524 just_a_word: {
96e4d5b1 6525 SV *sv;
ce29ac45 6526 int pkgname = 0;
f54cb97a 6527 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6528 OP *rv2cv_op;
5069cc75 6529 CV *cv;
5db06880 6530#ifdef PERL_MAD
cd81e915 6531 SV *nextPL_nextwhite = 0;
5db06880
NC
6532#endif
6533
8990e307
LW
6534
6535 /* Get the rest if it looks like a package qualifier */
6536
155aba94 6537 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6538 STRLEN morelen;
3280af22 6539 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6540 TRUE, &morelen);
6541 if (!morelen)
cea2e8a9 6542 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6543 *s == '\'' ? "'" : "::");
c3e0f903 6544 len += morelen;
ce29ac45 6545 pkgname = 1;
a0d0e21e 6546 }
8990e307 6547
3280af22
NIS
6548 if (PL_expect == XOPERATOR) {
6549 if (PL_bufptr == PL_linestart) {
57843af0 6550 CopLINE_dec(PL_curcop);
f1f66076 6551 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6552 CopLINE_inc(PL_curcop);
463ee0b2
LW
6553 }
6554 else
54310121 6555 no_op("Bareword",s);
463ee0b2 6556 }
8990e307 6557
c3e0f903 6558 /* Look for a subroutine with this name in current package,
486ec47a 6559 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6560 (and a package name). */
6561
5db06880 6562 if (len > 2 && !PL_madskills &&
3280af22 6563 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6564 {
f776e3cd 6565 if (ckWARN(WARN_BAREWORD)
90e5519e 6566 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6567 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6568 "Bareword \"%s\" refers to nonexistent package",
3280af22 6569 PL_tokenbuf);
c3e0f903 6570 len -= 2;
3280af22 6571 PL_tokenbuf[len] = '\0';
a0714e2c 6572 gv = NULL;
c3e0f903
GS
6573 gvp = 0;
6574 }
6575 else {
62d55b22
NC
6576 if (!gv) {
6577 /* Mustn't actually add anything to a symbol table.
6578 But also don't want to "initialise" any placeholder
6579 constants that might already be there into full
6580 blown PVGVs with attached PVCV. */
90e5519e
NC
6581 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6582 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6583 }
b3d904f3 6584 len = 0;
c3e0f903
GS
6585 }
6586
6587 /* if we saw a global override before, get the right name */
8990e307 6588
37bb7629
EB
6589 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6590 len ? len : strlen(PL_tokenbuf));
49dc05e3 6591 if (gvp) {
37bb7629 6592 SV * const tmp_sv = sv;
396482e1 6593 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6594 sv_catsv(sv, tmp_sv);
6595 SvREFCNT_dec(tmp_sv);
8a7a129d 6596 }
37bb7629 6597
5db06880 6598#ifdef PERL_MAD
cd81e915
NC
6599 if (PL_madskills && !PL_thistoken) {
6600 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6601 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6602 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6603 }
6604#endif
8990e307 6605
a0d0e21e 6606 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6607 CLINE;
6154021b
RGS
6608 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6609 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6610
c3e0f903 6611 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6612 if (len)
6613 goto safe_bareword;
6614
f7461760 6615 {
d8ebba9f 6616 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6617 const_op->op_private = OPpCONST_BARE;
6618 rv2cv_op = newCVREF(0, const_op);
6619 }
d9088386 6620 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6621
8990e307
LW
6622 /* See if it's the indirect object for a list operator. */
6623
3280af22
NIS
6624 if (PL_oldoldbufptr &&
6625 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6626 (PL_oldoldbufptr == PL_last_lop
6627 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6628 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6629 (PL_expect == XREF ||
6630 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6631 {
748a9306
LW
6632 bool immediate_paren = *s == '(';
6633
a0d0e21e 6634 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6635 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6636#ifdef PERL_MAD
cd81e915 6637 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6638#endif
a0d0e21e
LW
6639
6640 /* Two barewords in a row may indicate method call. */
6641
62d55b22 6642 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6643 (tmp = intuit_method(s, gv, cv))) {
6644 op_free(rv2cv_op);
78cdf107
Z
6645 if (tmp == METHOD && !PL_lex_allbrackets &&
6646 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6647 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6648 return REPORT(tmp);
f7461760 6649 }
a0d0e21e
LW
6650
6651 /* If not a declared subroutine, it's an indirect object. */
6652 /* (But it's an indir obj regardless for sort.) */
7294df96 6653 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6654
7294df96
RGS
6655 if (
6656 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6657 (!cv &&
a9ef352a 6658 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6659 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6660 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6661 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6662 )
a9ef352a 6663 {
3280af22 6664 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6665 goto bareword;
93a17b20
LW
6666 }
6667 }
8990e307 6668
3280af22 6669 PL_expect = XOPERATOR;
5db06880
NC
6670#ifdef PERL_MAD
6671 if (isSPACE(*s))
cd81e915
NC
6672 s = SKIPSPACE2(s,nextPL_nextwhite);
6673 PL_nextwhite = nextPL_nextwhite;
5db06880 6674#else
8990e307 6675 s = skipspace(s);
5db06880 6676#endif
1c3923b3
GS
6677
6678 /* Is this a word before a => operator? */
ce29ac45 6679 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6680 op_free(rv2cv_op);
1c3923b3 6681 CLINE;
6154021b 6682 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6683 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6684 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6685 TERM(WORD);
6686 }
6687
6688 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6689 if (*s == '(') {
79072805 6690 CLINE;
5069cc75 6691 if (cv) {
c35e046a
AL
6692 d = s + 1;
6693 while (SPACE_OR_TAB(*d))
6694 d++;
f7461760 6695 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6696 s = d + 1;
c631f32b 6697 goto its_constant;
96e4d5b1 6698 }
6699 }
5db06880
NC
6700#ifdef PERL_MAD
6701 if (PL_madskills) {
cd81e915
NC
6702 PL_nextwhite = PL_thiswhite;
6703 PL_thiswhite = 0;
5db06880 6704 }
cd81e915 6705 start_force(PL_curforce);
5db06880 6706#endif
6154021b 6707 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6708 PL_expect = XOPERATOR;
5db06880
NC
6709#ifdef PERL_MAD
6710 if (PL_madskills) {
cd81e915
NC
6711 PL_nextwhite = nextPL_nextwhite;
6712 curmad('X', PL_thistoken);
6b29d1f5 6713 PL_thistoken = newSVpvs("");
5db06880
NC
6714 }
6715#endif
f7461760 6716 op_free(rv2cv_op);
93a17b20 6717 force_next(WORD);
6154021b 6718 pl_yylval.ival = 0;
463ee0b2 6719 TOKEN('&');
79072805 6720 }
93a17b20 6721
a0d0e21e 6722 /* If followed by var or block, call it a method (unless sub) */
8990e307 6723
f7461760
Z
6724 if ((*s == '$' || *s == '{') && !cv) {
6725 op_free(rv2cv_op);
3280af22
NIS
6726 PL_last_lop = PL_oldbufptr;
6727 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6728 if (!PL_lex_allbrackets &&
6729 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6730 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6731 PREBLOCK(METHOD);
463ee0b2
LW
6732 }
6733
8990e307
LW
6734 /* If followed by a bareword, see if it looks like indir obj. */
6735
30fe34ed
RGS
6736 if (!orig_keyword
6737 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6738 && (tmp = intuit_method(s, gv, cv))) {
6739 op_free(rv2cv_op);
78cdf107
Z
6740 if (tmp == METHOD && !PL_lex_allbrackets &&
6741 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6742 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6743 return REPORT(tmp);
f7461760 6744 }
93a17b20 6745
8990e307
LW
6746 /* Not a method, so call it a subroutine (if defined) */
6747
5069cc75 6748 if (cv) {
9b387841
NC
6749 if (lastchar == '-')
6750 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6751 "Ambiguous use of -%s resolved as -&%s()",
6752 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6753 /* Check for a constant sub */
f7461760 6754 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6755 its_constant:
f7461760 6756 op_free(rv2cv_op);
6154021b
RGS
6757 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6758 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6759 pl_yylval.opval->op_private = 0;
6b7c6d95 6760 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6761 TOKEN(WORD);
89bfa8cd 6762 }
6763
6154021b 6764 op_free(pl_yylval.opval);
f7461760 6765 pl_yylval.opval = rv2cv_op;
6154021b 6766 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6767 PL_last_lop = PL_oldbufptr;
bf848113 6768 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6769 /* Is there a prototype? */
5db06880
NC
6770 if (
6771#ifdef PERL_MAD
6772 cv &&
6773#endif
d9f2850e
RGS
6774 SvPOK(cv))
6775 {
5f66b61c 6776 STRLEN protolen;
daba3364 6777 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6778 if (!protolen)
4633a7c4 6779 TERM(FUNC0SUB);
0f5d0394
AE
6780 while (*proto == ';')
6781 proto++;
649d02de
FC
6782 if (
6783 (
6784 (
6785 *proto == '$' || *proto == '_'
c035a075 6786 || *proto == '*' || *proto == '+'
649d02de
FC
6787 )
6788 && proto[1] == '\0'
6789 )
6790 || (
6791 *proto == '\\' && proto[1] && proto[2] == '\0'
6792 )
6793 )
6794 OPERATOR(UNIOPSUB);
6795 if (*proto == '\\' && proto[1] == '[') {
6796 const char *p = proto + 2;
6797 while(*p && *p != ']')
6798 ++p;
6799 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6800 }
7a52d87a 6801 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6802 if (PL_curstash)
6803 sv_setpvs(PL_subname, "__ANON__");
6804 else
6805 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6806 if (!PL_lex_allbrackets &&
6807 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6808 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6809 PREBLOCK(LSTOPSUB);
6810 }
a9ef352a 6811 }
5db06880
NC
6812#ifdef PERL_MAD
6813 {
6814 if (PL_madskills) {
cd81e915
NC
6815 PL_nextwhite = PL_thiswhite;
6816 PL_thiswhite = 0;
5db06880 6817 }
cd81e915 6818 start_force(PL_curforce);
6154021b 6819 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6820 PL_expect = XTERM;
6821 if (PL_madskills) {
cd81e915
NC
6822 PL_nextwhite = nextPL_nextwhite;
6823 curmad('X', PL_thistoken);
6b29d1f5 6824 PL_thistoken = newSVpvs("");
5db06880
NC
6825 }
6826 force_next(WORD);
78cdf107
Z
6827 if (!PL_lex_allbrackets &&
6828 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6829 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6830 TOKEN(NOAMP);
6831 }
6832 }
6833
6834 /* Guess harder when madskills require "best effort". */
6835 if (PL_madskills && (!gv || !GvCVu(gv))) {
6836 int probable_sub = 0;
6837 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6838 probable_sub = 1;
6839 else if (isALPHA(*s)) {
6840 char tmpbuf[1024];
6841 STRLEN tmplen;
6842 d = s;
6843 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6844 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6845 probable_sub = 1;
6846 else {
6847 while (d < PL_bufend && isSPACE(*d))
6848 d++;
6849 if (*d == '=' && d[1] == '>')
6850 probable_sub = 1;
6851 }
6852 }
6853 if (probable_sub) {
7a6d04f4 6854 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6855 op_free(pl_yylval.opval);
f7461760 6856 pl_yylval.opval = rv2cv_op;
6154021b 6857 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6858 PL_last_lop = PL_oldbufptr;
6859 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6860 PL_nextwhite = PL_thiswhite;
6861 PL_thiswhite = 0;
6862 start_force(PL_curforce);
6154021b 6863 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6864 PL_expect = XTERM;
cd81e915
NC
6865 PL_nextwhite = nextPL_nextwhite;
6866 curmad('X', PL_thistoken);
6b29d1f5 6867 PL_thistoken = newSVpvs("");
5db06880 6868 force_next(WORD);
78cdf107
Z
6869 if (!PL_lex_allbrackets &&
6870 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6871 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6872 TOKEN(NOAMP);
6873 }
6874#else
6154021b 6875 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6876 PL_expect = XTERM;
8990e307 6877 force_next(WORD);
78cdf107
Z
6878 if (!PL_lex_allbrackets &&
6879 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6880 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6881 TOKEN(NOAMP);
5db06880 6882#endif
8990e307 6883 }
748a9306 6884
8990e307
LW
6885 /* Call it a bare word */
6886
5603f27d 6887 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6888 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6889 else {
9a073a1d
RGS
6890 bareword:
6891 /* after "print" and similar functions (corresponding to
6892 * "F? L" in opcode.pl), whatever wasn't already parsed as
6893 * a filehandle should be subject to "strict subs".
6894 * Likewise for the optional indirect-object argument to system
6895 * or exec, which can't be a bareword */
6896 if ((PL_last_lop_op == OP_PRINT
6897 || PL_last_lop_op == OP_PRTF
6898 || PL_last_lop_op == OP_SAY
6899 || PL_last_lop_op == OP_SYSTEM
6900 || PL_last_lop_op == OP_EXEC)
6901 && (PL_hints & HINT_STRICT_SUBS))
6902 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6903 if (lastchar != '-') {
6904 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6905 d = PL_tokenbuf;
6906 while (isLOWER(*d))
6907 d++;
da51bb9b 6908 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6909 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6910 PL_tokenbuf);
6911 }
748a9306
LW
6912 }
6913 }
f7461760 6914 op_free(rv2cv_op);
c3e0f903
GS
6915
6916 safe_bareword:
9b387841
NC
6917 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6918 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6919 "Operator or semicolon missing before %c%s",
6920 lastchar, PL_tokenbuf);
6921 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6922 "Ambiguous use of %c resolved as operator %c",
6923 lastchar, lastchar);
748a9306 6924 }
93a17b20 6925 TOKEN(WORD);
79072805 6926 }
79072805 6927
68dc0745 6928 case KEY___FILE__:
7eb971ee
FC
6929 FUN0OP(
6930 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6931 newSVpv(CopFILE(PL_curcop),0))
6932 );
46fc3d4c 6933
79072805 6934 case KEY___LINE__:
7eb971ee
FC
6935 FUN0OP(
6936 (OP*)newSVOP(OP_CONST, 0,
6937 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6938 );
68dc0745 6939
6940 case KEY___PACKAGE__:
7eb971ee
FC
6941 FUN0OP(
6942 (OP*)newSVOP(OP_CONST, 0,
3280af22 6943 (PL_curstash
5aaec2b4 6944 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
6945 : &PL_sv_undef))
6946 );
79072805 6947
e50aee73 6948 case KEY___DATA__:
79072805
LW
6949 case KEY___END__: {
6950 GV *gv;
3280af22 6951 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6952 const char *pname = "main";
3280af22 6953 if (PL_tokenbuf[2] == 'D')
bfcb3514 6954 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6955 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6956 SVt_PVIO);
a5f75d66 6957 GvMULTI_on(gv);
79072805 6958 if (!GvIO(gv))
a0d0e21e 6959 GvIOp(gv) = newIO();
3280af22 6960 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6961#if defined(HAS_FCNTL) && defined(F_SETFD)
6962 {
f54cb97a 6963 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6964 fcntl(fd,F_SETFD,fd >= 3);
6965 }
79072805 6966#endif
fd049845 6967 /* Mark this internal pseudo-handle as clean */
6968 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6969 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6970 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6971 else
50952442 6972 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6973#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6974 /* if the script was opened in binmode, we need to revert
53129d29 6975 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6976 * XXX this is a questionable hack at best. */
53129d29
GS
6977 if (PL_bufend-PL_bufptr > 2
6978 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6979 {
6980 Off_t loc = 0;
50952442 6981 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6982 loc = PerlIO_tell(PL_rsfp);
6983 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6984 }
2986a63f
JH
6985#ifdef NETWARE
6986 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6987#else
c39cd008 6988 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6989#endif /* NETWARE */
1143fce0
JH
6990#ifdef PERLIO_IS_STDIO /* really? */
6991# if defined(__BORLANDC__)
cb359b41
JH
6992 /* XXX see note in do_binmode() */
6993 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6994# endif
6995#endif
c39cd008
GS
6996 if (loc > 0)
6997 PerlIO_seek(PL_rsfp, loc, 0);
6998 }
6999 }
7000#endif
7948272d 7001#ifdef PERLIO_LAYERS
52d2e0f4
JH
7002 if (!IN_BYTES) {
7003 if (UTF)
7004 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7005 else if (PL_encoding) {
7006 SV *name;
7007 dSP;
7008 ENTER;
7009 SAVETMPS;
7010 PUSHMARK(sp);
7011 EXTEND(SP, 1);
7012 XPUSHs(PL_encoding);
7013 PUTBACK;
7014 call_method("name", G_SCALAR);
7015 SPAGAIN;
7016 name = POPs;
7017 PUTBACK;
bfed75c6 7018 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7019 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7020 SVfARG(name)));
52d2e0f4
JH
7021 FREETMPS;
7022 LEAVE;
7023 }
7024 }
7948272d 7025#endif
5db06880
NC
7026#ifdef PERL_MAD
7027 if (PL_madskills) {
cd81e915
NC
7028 if (PL_realtokenstart >= 0) {
7029 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7030 if (!PL_endwhite)
6b29d1f5 7031 PL_endwhite = newSVpvs("");
cd81e915
NC
7032 sv_catsv(PL_endwhite, PL_thiswhite);
7033 PL_thiswhite = 0;
7034 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7035 PL_realtokenstart = -1;
5db06880 7036 }
5cc814fd
NC
7037 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7038 != NULL) ;
5db06880
NC
7039 }
7040#endif
4608196e 7041 PL_rsfp = NULL;
79072805
LW
7042 }
7043 goto fake_eof;
e929a76b 7044 }
de3bb511 7045
8990e307 7046 case KEY_AUTOLOAD:
ed6116ce 7047 case KEY_DESTROY:
79072805 7048 case KEY_BEGIN:
3c10abe3 7049 case KEY_UNITCHECK:
7d30b5c4 7050 case KEY_CHECK:
7d07dbc2 7051 case KEY_INIT:
7d30b5c4 7052 case KEY_END:
3280af22
NIS
7053 if (PL_expect == XSTATE) {
7054 s = PL_bufptr;
93a17b20 7055 goto really_sub;
79072805
LW
7056 }
7057 goto just_a_word;
7058
a0d0e21e
LW
7059 case KEY_CORE:
7060 if (*s == ':' && s[1] == ':') {
7061 s += 2;
748a9306 7062 d = s;
3280af22 7063 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
9dcb8368 7064 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
6798c92b 7065 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7066 if (tmp < 0)
7067 tmp = -tmp;
a5c70c4d 7068 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 7069 /* that's a way to remember we saw "CORE::" */
850e8516 7070 orig_keyword = tmp;
a0d0e21e
LW
7071 goto reserved_word;
7072 }
7073 goto just_a_word;
7074
463ee0b2
LW
7075 case KEY_abs:
7076 UNI(OP_ABS);
7077
79072805
LW
7078 case KEY_alarm:
7079 UNI(OP_ALARM);
7080
7081 case KEY_accept:
a0d0e21e 7082 LOP(OP_ACCEPT,XTERM);
79072805 7083
463ee0b2 7084 case KEY_and:
78cdf107
Z
7085 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7086 return REPORT(0);
463ee0b2
LW
7087 OPERATOR(ANDOP);
7088
79072805 7089 case KEY_atan2:
a0d0e21e 7090 LOP(OP_ATAN2,XTERM);
85e6fe83 7091
79072805 7092 case KEY_bind:
a0d0e21e 7093 LOP(OP_BIND,XTERM);
79072805
LW
7094
7095 case KEY_binmode:
1c1fc3ea 7096 LOP(OP_BINMODE,XTERM);
79072805
LW
7097
7098 case KEY_bless:
a0d0e21e 7099 LOP(OP_BLESS,XTERM);
79072805 7100
0d863452
RH
7101 case KEY_break:
7102 FUN0(OP_BREAK);
7103
79072805
LW
7104 case KEY_chop:
7105 UNI(OP_CHOP);
7106
7107 case KEY_continue:
0d863452
RH
7108 /* We have to disambiguate the two senses of
7109 "continue". If the next token is a '{' then
7110 treat it as the start of a continue block;
7111 otherwise treat it as a control operator.
7112 */
7113 s = skipspace(s);
7114 if (*s == '{')
79072805 7115 PREBLOCK(CONTINUE);
0d863452
RH
7116 else
7117 FUN0(OP_CONTINUE);
79072805
LW
7118
7119 case KEY_chdir:
fafc274c
NC
7120 /* may use HOME */
7121 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7122 UNI(OP_CHDIR);
7123
7124 case KEY_close:
7125 UNI(OP_CLOSE);
7126
7127 case KEY_closedir:
7128 UNI(OP_CLOSEDIR);
7129
7130 case KEY_cmp:
78cdf107
Z
7131 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7132 return REPORT(0);
79072805
LW
7133 Eop(OP_SCMP);
7134
7135 case KEY_caller:
7136 UNI(OP_CALLER);
7137
7138 case KEY_crypt:
7139#ifdef FCRYPT
f4c556ac
GS
7140 if (!PL_cryptseen) {
7141 PL_cryptseen = TRUE;
de3bb511 7142 init_des();
f4c556ac 7143 }
a687059c 7144#endif
a0d0e21e 7145 LOP(OP_CRYPT,XTERM);
79072805
LW
7146
7147 case KEY_chmod:
a0d0e21e 7148 LOP(OP_CHMOD,XTERM);
79072805
LW
7149
7150 case KEY_chown:
a0d0e21e 7151 LOP(OP_CHOWN,XTERM);
79072805
LW
7152
7153 case KEY_connect:
a0d0e21e 7154 LOP(OP_CONNECT,XTERM);
79072805 7155
463ee0b2
LW
7156 case KEY_chr:
7157 UNI(OP_CHR);
7158
79072805
LW
7159 case KEY_cos:
7160 UNI(OP_COS);
7161
7162 case KEY_chroot:
7163 UNI(OP_CHROOT);
7164
0d863452
RH
7165 case KEY_default:
7166 PREBLOCK(DEFAULT);
7167
79072805 7168 case KEY_do:
29595ff2 7169 s = SKIPSPACE1(s);
79072805 7170 if (*s == '{')
a0d0e21e 7171 PRETERMBLOCK(DO);
79072805 7172 if (*s != '\'')
89c5585f 7173 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7174 if (orig_keyword == KEY_do) {
7175 orig_keyword = 0;
6154021b 7176 pl_yylval.ival = 1;
850e8516
RGS
7177 }
7178 else
6154021b 7179 pl_yylval.ival = 0;
378cc40b 7180 OPERATOR(DO);
79072805
LW
7181
7182 case KEY_die:
3280af22 7183 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7184 LOP(OP_DIE,XTERM);
79072805
LW
7185
7186 case KEY_defined:
7187 UNI(OP_DEFINED);
7188
7189 case KEY_delete:
a0d0e21e 7190 UNI(OP_DELETE);
79072805
LW
7191
7192 case KEY_dbmopen:
74e8ce34
NC
7193 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7194 STR_WITH_LEN("NDBM_File::"),
7195 STR_WITH_LEN("DB_File::"),
7196 STR_WITH_LEN("GDBM_File::"),
7197 STR_WITH_LEN("SDBM_File::"),
7198 STR_WITH_LEN("ODBM_File::"),
7199 NULL);
a0d0e21e 7200 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7201
7202 case KEY_dbmclose:
7203 UNI(OP_DBMCLOSE);
7204
7205 case KEY_dump:
a0d0e21e 7206 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7207 LOOPX(OP_DUMP);
7208
7209 case KEY_else:
7210 PREBLOCK(ELSE);
7211
7212 case KEY_elsif:
6154021b 7213 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7214 OPERATOR(ELSIF);
7215
7216 case KEY_eq:
78cdf107
Z
7217 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7218 return REPORT(0);
79072805
LW
7219 Eop(OP_SEQ);
7220
a0d0e21e
LW
7221 case KEY_exists:
7222 UNI(OP_EXISTS);
4e553d73 7223
79072805 7224 case KEY_exit:
5db06880
NC
7225 if (PL_madskills)
7226 UNI(OP_INT);
79072805
LW
7227 UNI(OP_EXIT);
7228
7229 case KEY_eval:
29595ff2 7230 s = SKIPSPACE1(s);
32e2a35d
RGS
7231 if (*s == '{') { /* block eval */
7232 PL_expect = XTERMBLOCK;
7233 UNIBRACK(OP_ENTERTRY);
7234 }
7235 else { /* string eval */
7236 PL_expect = XTERM;
7237 UNIBRACK(OP_ENTEREVAL);
7238 }
79072805
LW
7239
7240 case KEY_eof:
7241 UNI(OP_EOF);
7242
7243 case KEY_exp:
7244 UNI(OP_EXP);
7245
7246 case KEY_each:
7247 UNI(OP_EACH);
7248
7249 case KEY_exec:
a0d0e21e 7250 LOP(OP_EXEC,XREF);
79072805
LW
7251
7252 case KEY_endhostent:
7253 FUN0(OP_EHOSTENT);
7254
7255 case KEY_endnetent:
7256 FUN0(OP_ENETENT);
7257
7258 case KEY_endservent:
7259 FUN0(OP_ESERVENT);
7260
7261 case KEY_endprotoent:
7262 FUN0(OP_EPROTOENT);
7263
7264 case KEY_endpwent:
7265 FUN0(OP_EPWENT);
7266
7267 case KEY_endgrent:
7268 FUN0(OP_EGRENT);
7269
7270 case KEY_for:
7271 case KEY_foreach:
78cdf107
Z
7272 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7273 return REPORT(0);
6154021b 7274 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7275 s = SKIPSPACE1(s);
7e2040f0 7276 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7277 char *p = s;
5db06880
NC
7278#ifdef PERL_MAD
7279 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7280#endif
7281
3280af22 7282 if ((PL_bufend - p) >= 3 &&
55497cff 7283 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7284 p += 2;
77ca0c92
LW
7285 else if ((PL_bufend - p) >= 4 &&
7286 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7287 p += 3;
29595ff2 7288 p = PEEKSPACE(p);
7e2040f0 7289 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7290 p = scan_ident(p, PL_bufend,
7291 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7292 p = PEEKSPACE(p);
77ca0c92
LW
7293 }
7294 if (*p != '$')
cea2e8a9 7295 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7296#ifdef PERL_MAD
7297 s = SvPVX(PL_linestr) + soff;
7298#endif
55497cff 7299 }
79072805
LW
7300 OPERATOR(FOR);
7301
7302 case KEY_formline:
a0d0e21e 7303 LOP(OP_FORMLINE,XTERM);
79072805
LW
7304
7305 case KEY_fork:
7306 FUN0(OP_FORK);
7307
7308 case KEY_fcntl:
a0d0e21e 7309 LOP(OP_FCNTL,XTERM);
79072805
LW
7310
7311 case KEY_fileno:
7312 UNI(OP_FILENO);
7313
7314 case KEY_flock:
a0d0e21e 7315 LOP(OP_FLOCK,XTERM);
79072805
LW
7316
7317 case KEY_gt:
78cdf107
Z
7318 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7319 return REPORT(0);
79072805
LW
7320 Rop(OP_SGT);
7321
7322 case KEY_ge:
78cdf107
Z
7323 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7324 return REPORT(0);
79072805
LW
7325 Rop(OP_SGE);
7326
7327 case KEY_grep:
2c38e13d 7328 LOP(OP_GREPSTART, XREF);
79072805
LW
7329
7330 case KEY_goto:
a0d0e21e 7331 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7332 LOOPX(OP_GOTO);
7333
7334 case KEY_gmtime:
7335 UNI(OP_GMTIME);
7336
7337 case KEY_getc:
6f33ba73 7338 UNIDOR(OP_GETC);
79072805
LW
7339
7340 case KEY_getppid:
7341 FUN0(OP_GETPPID);
7342
7343 case KEY_getpgrp:
7344 UNI(OP_GETPGRP);
7345
7346 case KEY_getpriority:
a0d0e21e 7347 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7348
7349 case KEY_getprotobyname:
7350 UNI(OP_GPBYNAME);
7351
7352 case KEY_getprotobynumber:
a0d0e21e 7353 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7354
7355 case KEY_getprotoent:
7356 FUN0(OP_GPROTOENT);
7357
7358 case KEY_getpwent:
7359 FUN0(OP_GPWENT);
7360
7361 case KEY_getpwnam:
ff68c719 7362 UNI(OP_GPWNAM);
79072805
LW
7363
7364 case KEY_getpwuid:
ff68c719 7365 UNI(OP_GPWUID);
79072805
LW
7366
7367 case KEY_getpeername:
7368 UNI(OP_GETPEERNAME);
7369
7370 case KEY_gethostbyname:
7371 UNI(OP_GHBYNAME);
7372
7373 case KEY_gethostbyaddr:
a0d0e21e 7374 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7375
7376 case KEY_gethostent:
7377 FUN0(OP_GHOSTENT);
7378
7379 case KEY_getnetbyname:
7380 UNI(OP_GNBYNAME);
7381
7382 case KEY_getnetbyaddr:
a0d0e21e 7383 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7384
7385 case KEY_getnetent:
7386 FUN0(OP_GNETENT);
7387
7388 case KEY_getservbyname:
a0d0e21e 7389 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7390
7391 case KEY_getservbyport:
a0d0e21e 7392 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7393
7394 case KEY_getservent:
7395 FUN0(OP_GSERVENT);
7396
7397 case KEY_getsockname:
7398 UNI(OP_GETSOCKNAME);
7399
7400 case KEY_getsockopt:
a0d0e21e 7401 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7402
7403 case KEY_getgrent:
7404 FUN0(OP_GGRENT);
7405
7406 case KEY_getgrnam:
ff68c719 7407 UNI(OP_GGRNAM);
79072805
LW
7408
7409 case KEY_getgrgid:
ff68c719 7410 UNI(OP_GGRGID);
79072805
LW
7411
7412 case KEY_getlogin:
7413 FUN0(OP_GETLOGIN);
7414
0d863452 7415 case KEY_given:
6154021b 7416 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7417 OPERATOR(GIVEN);
7418
93a17b20 7419 case KEY_glob:
a0d0e21e 7420 LOP(OP_GLOB,XTERM);
93a17b20 7421
79072805
LW
7422 case KEY_hex:
7423 UNI(OP_HEX);
7424
7425 case KEY_if:
78cdf107
Z
7426 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7427 return REPORT(0);
6154021b 7428 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7429 OPERATOR(IF);
7430
7431 case KEY_index:
a0d0e21e 7432 LOP(OP_INDEX,XTERM);
79072805
LW
7433
7434 case KEY_int:
7435 UNI(OP_INT);
7436
7437 case KEY_ioctl:
a0d0e21e 7438 LOP(OP_IOCTL,XTERM);
79072805
LW
7439
7440 case KEY_join:
a0d0e21e 7441 LOP(OP_JOIN,XTERM);
79072805
LW
7442
7443 case KEY_keys:
7444 UNI(OP_KEYS);
7445
7446 case KEY_kill:
a0d0e21e 7447 LOP(OP_KILL,XTERM);
79072805
LW
7448
7449 case KEY_last:
a0d0e21e 7450 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7451 LOOPX(OP_LAST);
4e553d73 7452
79072805
LW
7453 case KEY_lc:
7454 UNI(OP_LC);
7455
7456 case KEY_lcfirst:
7457 UNI(OP_LCFIRST);
7458
7459 case KEY_local:
6154021b 7460 pl_yylval.ival = 0;
79072805
LW
7461 OPERATOR(LOCAL);
7462
7463 case KEY_length:
7464 UNI(OP_LENGTH);
7465
7466 case KEY_lt:
78cdf107
Z
7467 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7468 return REPORT(0);
79072805
LW
7469 Rop(OP_SLT);
7470
7471 case KEY_le:
78cdf107
Z
7472 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7473 return REPORT(0);
79072805
LW
7474 Rop(OP_SLE);
7475
7476 case KEY_localtime:
7477 UNI(OP_LOCALTIME);
7478
7479 case KEY_log:
7480 UNI(OP_LOG);
7481
7482 case KEY_link:
a0d0e21e 7483 LOP(OP_LINK,XTERM);
79072805
LW
7484
7485 case KEY_listen:
a0d0e21e 7486 LOP(OP_LISTEN,XTERM);
79072805 7487
c0329465
MB
7488 case KEY_lock:
7489 UNI(OP_LOCK);
7490
79072805
LW
7491 case KEY_lstat:
7492 UNI(OP_LSTAT);
7493
7494 case KEY_m:
8782bef2 7495 s = scan_pat(s,OP_MATCH);
79072805
LW
7496 TERM(sublex_start());
7497
a0d0e21e 7498 case KEY_map:
2c38e13d 7499 LOP(OP_MAPSTART, XREF);
4e4e412b 7500
79072805 7501 case KEY_mkdir:
a0d0e21e 7502 LOP(OP_MKDIR,XTERM);
79072805
LW
7503
7504 case KEY_msgctl:
a0d0e21e 7505 LOP(OP_MSGCTL,XTERM);
79072805
LW
7506
7507 case KEY_msgget:
a0d0e21e 7508 LOP(OP_MSGGET,XTERM);
79072805
LW
7509
7510 case KEY_msgrcv:
a0d0e21e 7511 LOP(OP_MSGRCV,XTERM);
79072805
LW
7512
7513 case KEY_msgsnd:
a0d0e21e 7514 LOP(OP_MSGSND,XTERM);
79072805 7515
77ca0c92 7516 case KEY_our:
93a17b20 7517 case KEY_my:
952306ac 7518 case KEY_state:
eac04b2e 7519 PL_in_my = (U16)tmp;
29595ff2 7520 s = SKIPSPACE1(s);
7e2040f0 7521 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7522#ifdef PERL_MAD
7523 char* start = s;
7524#endif
3280af22 7525 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7526 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7527 goto really_sub;
def3634b 7528 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7529 if (!PL_in_my_stash) {
c750a3ec 7530 char tmpbuf[1024];
3280af22 7531 PL_bufptr = s;
d9fad198 7532 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7533 yyerror(tmpbuf);
7534 }
5db06880
NC
7535#ifdef PERL_MAD
7536 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7537 sv_catsv(PL_thistoken, PL_nextwhite);
7538 PL_nextwhite = 0;
7539 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7540 }
7541#endif
c750a3ec 7542 }
6154021b 7543 pl_yylval.ival = 1;
55497cff 7544 OPERATOR(MY);
93a17b20 7545
79072805 7546 case KEY_next:
a0d0e21e 7547 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7548 LOOPX(OP_NEXT);
7549
7550 case KEY_ne:
78cdf107
Z
7551 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7552 return REPORT(0);
79072805
LW
7553 Eop(OP_SNE);
7554
a0d0e21e 7555 case KEY_no:
468aa647 7556 s = tokenize_use(0, s);
a0d0e21e
LW
7557 OPERATOR(USE);
7558
7559 case KEY_not:
29595ff2 7560 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7561 FUN1(OP_NOT);
78cdf107
Z
7562 else {
7563 if (!PL_lex_allbrackets &&
7564 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7565 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7566 OPERATOR(NOTOP);
78cdf107 7567 }
a0d0e21e 7568
79072805 7569 case KEY_open:
29595ff2 7570 s = SKIPSPACE1(s);
7e2040f0 7571 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7572 const char *t;
c35e046a
AL
7573 for (d = s; isALNUM_lazy_if(d,UTF);)
7574 d++;
7575 for (t=d; isSPACE(*t);)
7576 t++;
e2ab214b 7577 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7578 /* [perl #16184] */
7579 && !(t[0] == '=' && t[1] == '>')
7580 ) {
5f66b61c 7581 int parms_len = (int)(d-s);
9014280d 7582 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7583 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7584 parms_len, s, parms_len, s);
66fbe8fb 7585 }
93a17b20 7586 }
a0d0e21e 7587 LOP(OP_OPEN,XTERM);
79072805 7588
463ee0b2 7589 case KEY_or:
78cdf107
Z
7590 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7591 return REPORT(0);
6154021b 7592 pl_yylval.ival = OP_OR;
463ee0b2
LW
7593 OPERATOR(OROP);
7594
79072805
LW
7595 case KEY_ord:
7596 UNI(OP_ORD);
7597
7598 case KEY_oct:
7599 UNI(OP_OCT);
7600
7601 case KEY_opendir:
a0d0e21e 7602 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7603
7604 case KEY_print:
3280af22 7605 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7606 LOP(OP_PRINT,XREF);
79072805
LW
7607
7608 case KEY_printf:
3280af22 7609 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7610 LOP(OP_PRTF,XREF);
79072805 7611
c07a80fd 7612 case KEY_prototype:
7613 UNI(OP_PROTOTYPE);
7614
79072805 7615 case KEY_push:
a0d0e21e 7616 LOP(OP_PUSH,XTERM);
79072805
LW
7617
7618 case KEY_pop:
6f33ba73 7619 UNIDOR(OP_POP);
79072805 7620
a0d0e21e 7621 case KEY_pos:
6f33ba73 7622 UNIDOR(OP_POS);
4e553d73 7623
79072805 7624 case KEY_pack:
a0d0e21e 7625 LOP(OP_PACK,XTERM);
79072805
LW
7626
7627 case KEY_package:
a0d0e21e 7628 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7629 s = SKIPSPACE1(s);
91152fc1 7630 s = force_strict_version(s);
4e4da3ac 7631 PL_lex_expect = XBLOCK;
79072805
LW
7632 OPERATOR(PACKAGE);
7633
7634 case KEY_pipe:
a0d0e21e 7635 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7636
7637 case KEY_q:
5db06880 7638 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7639 if (!s)
d4c19fe8 7640 missingterm(NULL);
6154021b 7641 pl_yylval.ival = OP_CONST;
79072805
LW
7642 TERM(sublex_start());
7643
a0d0e21e
LW
7644 case KEY_quotemeta:
7645 UNI(OP_QUOTEMETA);
7646
ea25a9b2
Z
7647 case KEY_qw: {
7648 OP *words = NULL;
5db06880 7649 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7650 if (!s)
d4c19fe8 7651 missingterm(NULL);
3480a8d2 7652 PL_expect = XOPERATOR;
8127e0e3 7653 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
7654 int warned_comma = !ckWARN(WARN_QW);
7655 int warned_comment = warned_comma;
3280af22 7656 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7657 while (len) {
d4c19fe8
AL
7658 for (; isSPACE(*d) && len; --len, ++d)
7659 /**/;
8127e0e3 7660 if (len) {
d4c19fe8 7661 SV *sv;
f54cb97a 7662 const char *b = d;
7e03b518 7663 if (!warned_comma || !warned_comment) {
8127e0e3 7664 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 7665 if (!warned_comma && *d == ',') {
9014280d 7666 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7667 "Possible attempt to separate words with commas");
7e03b518 7668 ++warned_comma;
8127e0e3 7669 }
7e03b518 7670 else if (!warned_comment && *d == '#') {
9014280d 7671 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7672 "Possible attempt to put comments in qw() list");
7e03b518 7673 ++warned_comment;
8127e0e3
GS
7674 }
7675 }
7676 }
7677 else {
d4c19fe8
AL
7678 for (; !isSPACE(*d) && len; --len, ++d)
7679 /**/;
8127e0e3 7680 }
740cce10 7681 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7682 words = op_append_elem(OP_LIST, words,
7948272d 7683 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7684 }
7685 }
7686 }
ea25a9b2
Z
7687 if (!words)
7688 words = newNULLLIST();
37fd879b 7689 if (PL_lex_stuff) {
8127e0e3 7690 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7691 PL_lex_stuff = NULL;
37fd879b 7692 }
ea25a9b2
Z
7693 PL_expect = XOPERATOR;
7694 pl_yylval.opval = sawparens(words);
7695 TOKEN(QWLIST);
7696 }
8990e307 7697
79072805 7698 case KEY_qq:
5db06880 7699 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7700 if (!s)
d4c19fe8 7701 missingterm(NULL);
6154021b 7702 pl_yylval.ival = OP_STRINGIFY;
3280af22 7703 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7704 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7705 TERM(sublex_start());
7706
8782bef2
GB
7707 case KEY_qr:
7708 s = scan_pat(s,OP_QR);
7709 TERM(sublex_start());
7710
79072805 7711 case KEY_qx:
5db06880 7712 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7713 if (!s)
d4c19fe8 7714 missingterm(NULL);
9b201d7d 7715 readpipe_override();
79072805
LW
7716 TERM(sublex_start());
7717
7718 case KEY_return:
7719 OLDLOP(OP_RETURN);
7720
7721 case KEY_require:
29595ff2 7722 s = SKIPSPACE1(s);
e759cc13
RGS
7723 if (isDIGIT(*s)) {
7724 s = force_version(s, FALSE);
a7cb1f99 7725 }
e759cc13
RGS
7726 else if (*s != 'v' || !isDIGIT(s[1])
7727 || (s = force_version(s, TRUE), *s == 'v'))
7728 {
a7cb1f99
GS
7729 *PL_tokenbuf = '\0';
7730 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7731 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7732 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7733 else if (*s == '<')
7734 yyerror("<> should be quotes");
7735 }
a72a1c8b
RGS
7736 if (orig_keyword == KEY_require) {
7737 orig_keyword = 0;
6154021b 7738 pl_yylval.ival = 1;
a72a1c8b
RGS
7739 }
7740 else
6154021b 7741 pl_yylval.ival = 0;
a72a1c8b
RGS
7742 PL_expect = XTERM;
7743 PL_bufptr = s;
7744 PL_last_uni = PL_oldbufptr;
7745 PL_last_lop_op = OP_REQUIRE;
7746 s = skipspace(s);
7747 return REPORT( (int)REQUIRE );
79072805
LW
7748
7749 case KEY_reset:
7750 UNI(OP_RESET);
7751
7752 case KEY_redo:
a0d0e21e 7753 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7754 LOOPX(OP_REDO);
7755
7756 case KEY_rename:
a0d0e21e 7757 LOP(OP_RENAME,XTERM);
79072805
LW
7758
7759 case KEY_rand:
7760 UNI(OP_RAND);
7761
7762 case KEY_rmdir:
7763 UNI(OP_RMDIR);
7764
7765 case KEY_rindex:
a0d0e21e 7766 LOP(OP_RINDEX,XTERM);
79072805
LW
7767
7768 case KEY_read:
a0d0e21e 7769 LOP(OP_READ,XTERM);
79072805
LW
7770
7771 case KEY_readdir:
7772 UNI(OP_READDIR);
7773
93a17b20 7774 case KEY_readline:
6f33ba73 7775 UNIDOR(OP_READLINE);
93a17b20
LW
7776
7777 case KEY_readpipe:
0858480c 7778 UNIDOR(OP_BACKTICK);
93a17b20 7779
79072805
LW
7780 case KEY_rewinddir:
7781 UNI(OP_REWINDDIR);
7782
7783 case KEY_recv:
a0d0e21e 7784 LOP(OP_RECV,XTERM);
79072805
LW
7785
7786 case KEY_reverse:
a0d0e21e 7787 LOP(OP_REVERSE,XTERM);
79072805
LW
7788
7789 case KEY_readlink:
6f33ba73 7790 UNIDOR(OP_READLINK);
79072805
LW
7791
7792 case KEY_ref:
7793 UNI(OP_REF);
7794
7795 case KEY_s:
7796 s = scan_subst(s);
6154021b 7797 if (pl_yylval.opval)
79072805
LW
7798 TERM(sublex_start());
7799 else
7800 TOKEN(1); /* force error */
7801
0d863452
RH
7802 case KEY_say:
7803 checkcomma(s,PL_tokenbuf,"filehandle");
7804 LOP(OP_SAY,XREF);
7805
a0d0e21e
LW
7806 case KEY_chomp:
7807 UNI(OP_CHOMP);
4e553d73 7808
79072805
LW
7809 case KEY_scalar:
7810 UNI(OP_SCALAR);
7811
7812 case KEY_select:
a0d0e21e 7813 LOP(OP_SELECT,XTERM);
79072805
LW
7814
7815 case KEY_seek:
a0d0e21e 7816 LOP(OP_SEEK,XTERM);
79072805
LW
7817
7818 case KEY_semctl:
a0d0e21e 7819 LOP(OP_SEMCTL,XTERM);
79072805
LW
7820
7821 case KEY_semget:
a0d0e21e 7822 LOP(OP_SEMGET,XTERM);
79072805
LW
7823
7824 case KEY_semop:
a0d0e21e 7825 LOP(OP_SEMOP,XTERM);
79072805
LW
7826
7827 case KEY_send:
a0d0e21e 7828 LOP(OP_SEND,XTERM);
79072805
LW
7829
7830 case KEY_setpgrp:
a0d0e21e 7831 LOP(OP_SETPGRP,XTERM);
79072805
LW
7832
7833 case KEY_setpriority:
a0d0e21e 7834 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7835
7836 case KEY_sethostent:
ff68c719 7837 UNI(OP_SHOSTENT);
79072805
LW
7838
7839 case KEY_setnetent:
ff68c719 7840 UNI(OP_SNETENT);
79072805
LW
7841
7842 case KEY_setservent:
ff68c719 7843 UNI(OP_SSERVENT);
79072805
LW
7844
7845 case KEY_setprotoent:
ff68c719 7846 UNI(OP_SPROTOENT);
79072805
LW
7847
7848 case KEY_setpwent:
7849 FUN0(OP_SPWENT);
7850
7851 case KEY_setgrent:
7852 FUN0(OP_SGRENT);
7853
7854 case KEY_seekdir:
a0d0e21e 7855 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7856
7857 case KEY_setsockopt:
a0d0e21e 7858 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7859
7860 case KEY_shift:
6f33ba73 7861 UNIDOR(OP_SHIFT);
79072805
LW
7862
7863 case KEY_shmctl:
a0d0e21e 7864 LOP(OP_SHMCTL,XTERM);
79072805
LW
7865
7866 case KEY_shmget:
a0d0e21e 7867 LOP(OP_SHMGET,XTERM);
79072805
LW
7868
7869 case KEY_shmread:
a0d0e21e 7870 LOP(OP_SHMREAD,XTERM);
79072805
LW
7871
7872 case KEY_shmwrite:
a0d0e21e 7873 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7874
7875 case KEY_shutdown:
a0d0e21e 7876 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7877
7878 case KEY_sin:
7879 UNI(OP_SIN);
7880
7881 case KEY_sleep:
7882 UNI(OP_SLEEP);
7883
7884 case KEY_socket:
a0d0e21e 7885 LOP(OP_SOCKET,XTERM);
79072805
LW
7886
7887 case KEY_socketpair:
a0d0e21e 7888 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7889
7890 case KEY_sort:
3280af22 7891 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7892 s = SKIPSPACE1(s);
79072805 7893 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7894 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7895 PL_expect = XTERM;
15f0808c 7896 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7897 LOP(OP_SORT,XREF);
79072805
LW
7898
7899 case KEY_split:
a0d0e21e 7900 LOP(OP_SPLIT,XTERM);
79072805
LW
7901
7902 case KEY_sprintf:
a0d0e21e 7903 LOP(OP_SPRINTF,XTERM);
79072805
LW
7904
7905 case KEY_splice:
a0d0e21e 7906 LOP(OP_SPLICE,XTERM);
79072805
LW
7907
7908 case KEY_sqrt:
7909 UNI(OP_SQRT);
7910
7911 case KEY_srand:
7912 UNI(OP_SRAND);
7913
7914 case KEY_stat:
7915 UNI(OP_STAT);
7916
7917 case KEY_study:
79072805
LW
7918 UNI(OP_STUDY);
7919
7920 case KEY_substr:
a0d0e21e 7921 LOP(OP_SUBSTR,XTERM);
79072805
LW
7922
7923 case KEY_format:
7924 case KEY_sub:
93a17b20 7925 really_sub:
09bef843 7926 {
3280af22 7927 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7928 SSize_t tboffset = 0;
09bef843 7929 expectation attrful;
28cc6278 7930 bool have_name, have_proto;
f54cb97a 7931 const int key = tmp;
09bef843 7932
5db06880
NC
7933#ifdef PERL_MAD
7934 SV *tmpwhite = 0;
7935
cd81e915 7936 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7937 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7938 PL_thistoken = 0;
5db06880
NC
7939
7940 d = s;
7941 s = SKIPSPACE2(s,tmpwhite);
7942#else
09bef843 7943 s = skipspace(s);
5db06880 7944#endif
09bef843 7945
7e2040f0 7946 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7947 (*s == ':' && s[1] == ':'))
7948 {
5db06880 7949#ifdef PERL_MAD
4f61fd4b 7950 SV *nametoke = NULL;
5db06880
NC
7951#endif
7952
09bef843
SB
7953 PL_expect = XBLOCK;
7954 attrful = XATTRBLOCK;
b1b65b59
JH
7955 /* remember buffer pos'n for later force_word */
7956 tboffset = s - PL_oldbufptr;
09bef843 7957 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7958#ifdef PERL_MAD
7959 if (PL_madskills)
7960 nametoke = newSVpvn(s, d - s);
7961#endif
6502358f
NC
7962 if (memchr(tmpbuf, ':', len))
7963 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7964 else {
7965 sv_setsv(PL_subname,PL_curstname);
396482e1 7966 sv_catpvs(PL_subname,"::");
09bef843
SB
7967 sv_catpvn(PL_subname,tmpbuf,len);
7968 }
09bef843 7969 have_name = TRUE;
5db06880
NC
7970
7971#ifdef PERL_MAD
7972
7973 start_force(0);
7974 CURMAD('X', nametoke);
7975 CURMAD('_', tmpwhite);
7976 (void) force_word(PL_oldbufptr + tboffset, WORD,
7977 FALSE, TRUE, TRUE);
7978
7979 s = SKIPSPACE2(d,tmpwhite);
7980#else
7981 s = skipspace(d);
7982#endif
09bef843 7983 }
463ee0b2 7984 else {
09bef843
SB
7985 if (key == KEY_my)
7986 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7987 PL_expect = XTERMBLOCK;
7988 attrful = XATTRTERM;
76f68e9b 7989 sv_setpvs(PL_subname,"?");
09bef843 7990 have_name = FALSE;
463ee0b2 7991 }
4633a7c4 7992
09bef843
SB
7993 if (key == KEY_format) {
7994 if (*s == '=')
7995 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7996#ifdef PERL_MAD
cd81e915 7997 PL_thistoken = subtoken;
5db06880
NC
7998 s = d;
7999#else
09bef843 8000 if (have_name)
b1b65b59
JH
8001 (void) force_word(PL_oldbufptr + tboffset, WORD,
8002 FALSE, TRUE, TRUE);
5db06880 8003#endif
09bef843
SB
8004 OPERATOR(FORMAT);
8005 }
79072805 8006
09bef843
SB
8007 /* Look for a prototype */
8008 if (*s == '(') {
d9f2850e
RGS
8009 char *p;
8010 bool bad_proto = FALSE;
9e8d7757
RB
8011 bool in_brackets = FALSE;
8012 char greedy_proto = ' ';
8013 bool proto_after_greedy_proto = FALSE;
8014 bool must_be_last = FALSE;
8015 bool underscore = FALSE;
aef2a98a 8016 bool seen_underscore = FALSE;
197afce1 8017 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 8018
5db06880 8019 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 8020 if (!s)
09bef843 8021 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8022 /* strip spaces and check for bad characters */
09bef843
SB
8023 d = SvPVX(PL_lex_stuff);
8024 tmp = 0;
d9f2850e
RGS
8025 for (p = d; *p; ++p) {
8026 if (!isSPACE(*p)) {
8027 d[tmp++] = *p;
9e8d7757 8028
197afce1 8029 if (warnillegalproto) {
9e8d7757
RB
8030 if (must_be_last)
8031 proto_after_greedy_proto = TRUE;
c035a075 8032 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
8033 bad_proto = TRUE;
8034 }
8035 else {
8036 if ( underscore ) {
8037 if ( *p != ';' )
8038 bad_proto = TRUE;
8039 underscore = FALSE;
8040 }
8041 if ( *p == '[' ) {
8042 in_brackets = TRUE;
8043 }
8044 else if ( *p == ']' ) {
8045 in_brackets = FALSE;
8046 }
8047 else if ( (*p == '@' || *p == '%') &&
8048 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8049 !in_brackets ) {
8050 must_be_last = TRUE;
8051 greedy_proto = *p;
8052 }
8053 else if ( *p == '_' ) {
aef2a98a 8054 underscore = seen_underscore = TRUE;
9e8d7757
RB
8055 }
8056 }
8057 }
d37a9538 8058 }
09bef843 8059 }
d9f2850e 8060 d[tmp] = '\0';
9e8d7757 8061 if (proto_after_greedy_proto)
197afce1 8062 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8063 "Prototype after '%c' for %"SVf" : %s",
8064 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 8065 if (bad_proto)
197afce1 8066 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8067 "Illegal character %sin prototype for %"SVf" : %s",
8068 seen_underscore ? "after '_' " : "",
be2597df 8069 SVfARG(PL_subname), d);
b162af07 8070 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8071 have_proto = TRUE;
68dc0745 8072
5db06880
NC
8073#ifdef PERL_MAD
8074 start_force(0);
cd81e915 8075 CURMAD('q', PL_thisopen);
5db06880 8076 CURMAD('_', tmpwhite);
cd81e915
NC
8077 CURMAD('=', PL_thisstuff);
8078 CURMAD('Q', PL_thisclose);
5db06880
NC
8079 NEXTVAL_NEXTTOKE.opval =
8080 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8081 PL_lex_stuff = NULL;
5db06880
NC
8082 force_next(THING);
8083
8084 s = SKIPSPACE2(s,tmpwhite);
8085#else
09bef843 8086 s = skipspace(s);
5db06880 8087#endif
4633a7c4 8088 }
09bef843
SB
8089 else
8090 have_proto = FALSE;
8091
8092 if (*s == ':' && s[1] != ':')
8093 PL_expect = attrful;
8e742a20
MHM
8094 else if (*s != '{' && key == KEY_sub) {
8095 if (!have_name)
8096 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8097 else if (*s != ';' && *s != '}')
be2597df 8098 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8099 }
09bef843 8100
5db06880
NC
8101#ifdef PERL_MAD
8102 start_force(0);
8103 if (tmpwhite) {
8104 if (PL_madskills)
6b29d1f5 8105 curmad('^', newSVpvs(""));
5db06880
NC
8106 CURMAD('_', tmpwhite);
8107 }
8108 force_next(0);
8109
cd81e915 8110 PL_thistoken = subtoken;
5db06880 8111#else
09bef843 8112 if (have_proto) {
9ded7720 8113 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8114 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8115 PL_lex_stuff = NULL;
09bef843 8116 force_next(THING);
68dc0745 8117 }
5db06880 8118#endif
09bef843 8119 if (!have_name) {
49a54bbe
NC
8120 if (PL_curstash)
8121 sv_setpvs(PL_subname, "__ANON__");
8122 else
8123 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8124 TOKEN(ANONSUB);
4633a7c4 8125 }
5db06880 8126#ifndef PERL_MAD
b1b65b59
JH
8127 (void) force_word(PL_oldbufptr + tboffset, WORD,
8128 FALSE, TRUE, TRUE);
5db06880 8129#endif
09bef843
SB
8130 if (key == KEY_my)
8131 TOKEN(MYSUB);
8132 TOKEN(SUB);
4633a7c4 8133 }
79072805
LW
8134
8135 case KEY_system:
a0d0e21e 8136 LOP(OP_SYSTEM,XREF);
79072805
LW
8137
8138 case KEY_symlink:
a0d0e21e 8139 LOP(OP_SYMLINK,XTERM);
79072805
LW
8140
8141 case KEY_syscall:
a0d0e21e 8142 LOP(OP_SYSCALL,XTERM);
79072805 8143
c07a80fd 8144 case KEY_sysopen:
8145 LOP(OP_SYSOPEN,XTERM);
8146
137443ea 8147 case KEY_sysseek:
8148 LOP(OP_SYSSEEK,XTERM);
8149
79072805 8150 case KEY_sysread:
a0d0e21e 8151 LOP(OP_SYSREAD,XTERM);
79072805
LW
8152
8153 case KEY_syswrite:
a0d0e21e 8154 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8155
8156 case KEY_tr:
8157 s = scan_trans(s);
8158 TERM(sublex_start());
8159
8160 case KEY_tell:
8161 UNI(OP_TELL);
8162
8163 case KEY_telldir:
8164 UNI(OP_TELLDIR);
8165
463ee0b2 8166 case KEY_tie:
a0d0e21e 8167 LOP(OP_TIE,XTERM);
463ee0b2 8168
c07a80fd 8169 case KEY_tied:
8170 UNI(OP_TIED);
8171
79072805
LW
8172 case KEY_time:
8173 FUN0(OP_TIME);
8174
8175 case KEY_times:
8176 FUN0(OP_TMS);
8177
8178 case KEY_truncate:
a0d0e21e 8179 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8180
8181 case KEY_uc:
8182 UNI(OP_UC);
8183
8184 case KEY_ucfirst:
8185 UNI(OP_UCFIRST);
8186
463ee0b2
LW
8187 case KEY_untie:
8188 UNI(OP_UNTIE);
8189
79072805 8190 case KEY_until:
78cdf107
Z
8191 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8192 return REPORT(0);
6154021b 8193 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8194 OPERATOR(UNTIL);
8195
8196 case KEY_unless:
78cdf107
Z
8197 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8198 return REPORT(0);
6154021b 8199 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8200 OPERATOR(UNLESS);
8201
8202 case KEY_unlink:
a0d0e21e 8203 LOP(OP_UNLINK,XTERM);
79072805
LW
8204
8205 case KEY_undef:
6f33ba73 8206 UNIDOR(OP_UNDEF);
79072805
LW
8207
8208 case KEY_unpack:
a0d0e21e 8209 LOP(OP_UNPACK,XTERM);
79072805
LW
8210
8211 case KEY_utime:
a0d0e21e 8212 LOP(OP_UTIME,XTERM);
79072805
LW
8213
8214 case KEY_umask:
6f33ba73 8215 UNIDOR(OP_UMASK);
79072805
LW
8216
8217 case KEY_unshift:
a0d0e21e
LW
8218 LOP(OP_UNSHIFT,XTERM);
8219
8220 case KEY_use:
468aa647 8221 s = tokenize_use(1, s);
a0d0e21e 8222 OPERATOR(USE);
79072805
LW
8223
8224 case KEY_values:
8225 UNI(OP_VALUES);
8226
8227 case KEY_vec:
a0d0e21e 8228 LOP(OP_VEC,XTERM);
79072805 8229
0d863452 8230 case KEY_when:
78cdf107
Z
8231 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8232 return REPORT(0);
6154021b 8233 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8234 OPERATOR(WHEN);
8235
79072805 8236 case KEY_while:
78cdf107
Z
8237 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8238 return REPORT(0);
6154021b 8239 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8240 OPERATOR(WHILE);
8241
8242 case KEY_warn:
3280af22 8243 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8244 LOP(OP_WARN,XTERM);
79072805
LW
8245
8246 case KEY_wait:
8247 FUN0(OP_WAIT);
8248
8249 case KEY_waitpid:
a0d0e21e 8250 LOP(OP_WAITPID,XTERM);
79072805
LW
8251
8252 case KEY_wantarray:
8253 FUN0(OP_WANTARRAY);
8254
8255 case KEY_write:
9d116dd7
JH
8256#ifdef EBCDIC
8257 {
df3728a2
JH
8258 char ctl_l[2];
8259 ctl_l[0] = toCTRL('L');
8260 ctl_l[1] = '\0';
fafc274c 8261 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8262 }
8263#else
fafc274c
NC
8264 /* Make sure $^L is defined */
8265 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8266#endif
79072805
LW
8267 UNI(OP_ENTERWRITE);
8268
8269 case KEY_x:
78cdf107
Z
8270 if (PL_expect == XOPERATOR) {
8271 if (*s == '=' && !PL_lex_allbrackets &&
8272 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8273 return REPORT(0);
79072805 8274 Mop(OP_REPEAT);
78cdf107 8275 }
79072805
LW
8276 check_uni();
8277 goto just_a_word;
8278
a0d0e21e 8279 case KEY_xor:
78cdf107
Z
8280 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8281 return REPORT(0);
6154021b 8282 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8283 OPERATOR(OROP);
8284
79072805
LW
8285 case KEY_y:
8286 s = scan_trans(s);
8287 TERM(sublex_start());
8288 }
49dc05e3 8289 }}
79072805 8290}
bf4acbe4
GS
8291#ifdef __SC__
8292#pragma segment Main
8293#endif
79072805 8294
e930465f
JH
8295static int
8296S_pending_ident(pTHX)
8eceec63 8297{
97aff369 8298 dVAR;
8eceec63 8299 register char *d;
bbd11bfc 8300 PADOFFSET tmp = 0;
8eceec63
SC
8301 /* pit holds the identifier we read and pending_ident is reset */
8302 char pit = PL_pending_ident;
9bde8eb0
NC
8303 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8304 /* All routes through this function want to know if there is a colon. */
c099d646 8305 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8306 PL_pending_ident = 0;
8307
cd81e915 8308 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8309 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8310 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8311
8312 /* if we're in a my(), we can't allow dynamics here.
8313 $foo'bar has already been turned into $foo::bar, so
8314 just check for colons.
8315
8316 if it's a legal name, the OP is a PADANY.
8317 */
8318 if (PL_in_my) {
8319 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8320 if (has_colon)
8eceec63
SC
8321 yyerror(Perl_form(aTHX_ "No package name allowed for "
8322 "variable %s in \"our\"",
8323 PL_tokenbuf));
bc9b26ca 8324 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8325 }
8326 else {
9bde8eb0 8327 if (has_colon)
952306ac
RGS
8328 yyerror(Perl_form(aTHX_ PL_no_myglob,
8329 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8330
6154021b 8331 pl_yylval.opval = newOP(OP_PADANY, 0);
bc9b26ca
BF
8332 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8333 UTF ? SVf_UTF8 : 0);
8eceec63
SC
8334 return PRIVATEREF;
8335 }
8336 }
8337
8338 /*
8339 build the ops for accesses to a my() variable.
8340
8341 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8342 then used in a comparison. This catches most, but not
8343 all cases. For instance, it catches
8344 sort { my($a); $a <=> $b }
8345 but not
8346 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8347 (although why you'd do that is anyone's guess).
8348 */
8349
9bde8eb0 8350 if (!has_colon) {
8716503d 8351 if (!PL_in_my)
bc9b26ca
BF
8352 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8353 UTF ? SVf_UTF8 : 0);
8716503d 8354 if (tmp != NOT_IN_PAD) {
8eceec63 8355 /* might be an "our" variable" */
00b1698f 8356 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8357 /* build ops for a bareword */
b64e5050
AL
8358 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8359 HEK * const stashname = HvNAME_HEK(stash);
8360 SV * const sym = newSVhek(stashname);
396482e1 8361 sv_catpvs(sym, "::");
9bde8eb0 8362 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8363 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8364 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8365 gv_fetchsv(sym,
8eceec63
SC
8366 (PL_in_eval
8367 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8368 : GV_ADDMULTI
8eceec63
SC
8369 ),
8370 ((PL_tokenbuf[0] == '$') ? SVt_PV
8371 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8372 : SVt_PVHV));
8373 return WORD;
8374 }
8375
8376 /* if it's a sort block and they're naming $a or $b */
8377 if (PL_last_lop_op == OP_SORT &&
8378 PL_tokenbuf[0] == '$' &&
8379 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8380 && !PL_tokenbuf[2])
8381 {
8382 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8383 d < PL_bufend && *d != '\n';
8384 d++)
8385 {
8386 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8387 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8388 PL_tokenbuf);
8389 }
8390 }
8391 }
8392
6154021b
RGS
8393 pl_yylval.opval = newOP(OP_PADANY, 0);
8394 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8395 return PRIVATEREF;
8396 }
8397 }
8398
8399 /*
8400 Whine if they've said @foo in a doublequoted string,
8401 and @foo isn't a variable we can find in the symbol
8402 table.
8403 */
d824713b
NC
8404 if (ckWARN(WARN_AMBIGUOUS) &&
8405 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8406 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8407 SVt_PVAV);
8eceec63 8408 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8409 /* DO NOT warn for @- and @+ */
8410 && !( PL_tokenbuf[2] == '\0' &&
8411 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8412 )
8eceec63
SC
8413 {
8414 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8415 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8416 "Possible unintended interpolation of %s in string",
8417 PL_tokenbuf);
8eceec63
SC
8418 }
8419 }
8420
8421 /* build ops for a bareword */
6154021b 8422 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8423 tokenbuf_len - 1));
6154021b 8424 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8425 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8426 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8427 ((PL_tokenbuf[0] == '$') ? SVt_PV
8428 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8429 : SVt_PVHV));
8eceec63
SC
8430 return WORD;
8431}
8432
76e3520e 8433STATIC void
c94115d8 8434S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8435{
97aff369 8436 dVAR;
2f3197b3 8437
7918f24d
NC
8438 PERL_ARGS_ASSERT_CHECKCOMMA;
8439
d008e5eb 8440 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8441 if (ckWARN(WARN_SYNTAX)) {
8442 int level = 1;
26ff0806 8443 const char *w;
d008e5eb
GS
8444 for (w = s+2; *w && level; w++) {
8445 if (*w == '(')
8446 ++level;
8447 else if (*w == ')')
8448 --level;
8449 }
888fea98
NC
8450 while (isSPACE(*w))
8451 ++w;
b1439985
RGS
8452 /* the list of chars below is for end of statements or
8453 * block / parens, boolean operators (&&, ||, //) and branch
8454 * constructs (or, and, if, until, unless, while, err, for).
8455 * Not a very solid hack... */
8456 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8457 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8458 "%s (...) interpreted as function",name);
d008e5eb 8459 }
2f3197b3 8460 }
3280af22 8461 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8462 s++;
a687059c
LW
8463 if (*s == '(')
8464 s++;
3280af22 8465 while (s < PL_bufend && isSPACE(*s))
a687059c 8466 s++;
7e2040f0 8467 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8468 const char * const w = s++;
7e2040f0 8469 while (isALNUM_lazy_if(s,UTF))
a687059c 8470 s++;
3280af22 8471 while (s < PL_bufend && isSPACE(*s))
a687059c 8472 s++;
e929a76b 8473 if (*s == ',') {
c94115d8 8474 GV* gv;
5458a98a 8475 if (keyword(w, s - w, 0))
e929a76b 8476 return;
c94115d8
NC
8477
8478 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8479 if (gv && GvCVu(gv))
abbb3198 8480 return;
cea2e8a9 8481 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8482 }
8483 }
8484}
8485
423cee85
JH
8486/* Either returns sv, or mortalizes sv and returns a new SV*.
8487 Best used as sv=new_constant(..., sv, ...).
8488 If s, pv are NULL, calls subroutine with one argument,
8489 and type is used with error messages only. */
8490
b3ac6de7 8491STATIC SV *
eb0d8d16
NC
8492S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8493 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8494{
27da23d5 8495 dVAR; dSP;
890ce7af 8496 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8497 SV *res;
b3ac6de7
IZ
8498 SV **cvp;
8499 SV *cv, *typesv;
89e33a05 8500 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8501
7918f24d
NC
8502 PERL_ARGS_ASSERT_NEW_CONSTANT;
8503
f0af216f 8504 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
8505 SV *msg;
8506
10edeb5d
JH
8507 why2 = (const char *)
8508 (strEQ(key,"charnames")
8509 ? "(possibly a missing \"use charnames ...\")"
8510 : "");
4e553d73 8511 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
8512 (type ? type: "undef"), why2);
8513
8514 /* This is convoluted and evil ("goto considered harmful")
8515 * but I do not understand the intricacies of all the different
8516 * failure modes of %^H in here. The goal here is to make
8517 * the most probable error message user-friendly. --jhi */
8518
8519 goto msgdone;
8520
423cee85 8521 report:
4e553d73 8522 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8523 (type ? type: "undef"), why1, why2, why3);
41ab332f 8524 msgdone:
95a20fc0 8525 yyerror(SvPVX_const(msg));
423cee85
JH
8526 SvREFCNT_dec(msg);
8527 return sv;
8528 }
ff3f963a
KW
8529
8530 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
8531 if (PL_error_count > 0 && strEQ(key,"charnames"))
8532 return &PL_sv_undef;
ff3f963a 8533
eb0d8d16 8534 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 8535 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
8536 why1 = "$^H{";
8537 why2 = key;
f0af216f 8538 why3 = "} is not defined";
423cee85 8539 goto report;
b3ac6de7
IZ
8540 }
8541 sv_2mortal(sv); /* Parent created it permanently */
8542 cv = *cvp;
423cee85 8543 if (!pv && s)
59cd0e26 8544 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8545 if (type && pv)
59cd0e26 8546 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8547 else
423cee85 8548 typesv = &PL_sv_undef;
4e553d73 8549
e788e7d3 8550 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8551 ENTER ;
8552 SAVETMPS;
4e553d73 8553
423cee85 8554 PUSHMARK(SP) ;
a5845cb7 8555 EXTEND(sp, 3);
423cee85
JH
8556 if (pv)
8557 PUSHs(pv);
b3ac6de7 8558 PUSHs(sv);
423cee85
JH
8559 if (pv)
8560 PUSHs(typesv);
b3ac6de7 8561 PUTBACK;
423cee85 8562 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8563
423cee85 8564 SPAGAIN ;
4e553d73 8565
423cee85 8566 /* Check the eval first */
9b0e499b 8567 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8568 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8569 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8570 (void)POPs;
b37c2d43 8571 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8572 }
8573 else {
8574 res = POPs;
b37c2d43 8575 SvREFCNT_inc_simple_void(res);
423cee85 8576 }
4e553d73 8577
423cee85
JH
8578 PUTBACK ;
8579 FREETMPS ;
8580 LEAVE ;
b3ac6de7 8581 POPSTACK;
4e553d73 8582
b3ac6de7 8583 if (!SvOK(res)) {
423cee85
JH
8584 why1 = "Call to &{$^H{";
8585 why2 = key;
f0af216f 8586 why3 = "}} did not return a defined value";
423cee85
JH
8587 sv = res;
8588 goto report;
9b0e499b 8589 }
423cee85 8590
9b0e499b 8591 return res;
b3ac6de7 8592}
4e553d73 8593
d0a148a6
NC
8594/* Returns a NUL terminated string, with the length of the string written to
8595 *slp
8596 */
76e3520e 8597STATIC char *
cea2e8a9 8598S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8599{
97aff369 8600 dVAR;
463ee0b2 8601 register char *d = dest;
890ce7af 8602 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8603
8604 PERL_ARGS_ASSERT_SCAN_WORD;
8605
463ee0b2 8606 for (;;) {
8903cb82 8607 if (d >= e)
cea2e8a9 8608 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8609 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8610 *d++ = *s++;
c35e046a 8611 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8612 *d++ = ':';
8613 *d++ = ':';
8614 s++;
8615 }
c35e046a 8616 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8617 *d++ = *s++;
8618 *d++ = *s++;
8619 }
fd400ab9 8620 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8621 char *t = s + UTF8SKIP(s);
c35e046a 8622 size_t len;
fd400ab9 8623 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8624 t += UTF8SKIP(t);
c35e046a
AL
8625 len = t - s;
8626 if (d + len > e)
cea2e8a9 8627 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8628 Copy(s, d, len, char);
8629 d += len;
a0ed51b3
LW
8630 s = t;
8631 }
463ee0b2
LW
8632 else {
8633 *d = '\0';
8634 *slp = d - dest;
8635 return s;
e929a76b 8636 }
378cc40b
LW
8637 }
8638}
8639
76e3520e 8640STATIC char *
f54cb97a 8641S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8642{
97aff369 8643 dVAR;
6136c704 8644 char *bracket = NULL;
748a9306 8645 char funny = *s++;
6136c704 8646 register char *d = dest;
0b3da58d 8647 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8648
7918f24d
NC
8649 PERL_ARGS_ASSERT_SCAN_IDENT;
8650
a0d0e21e 8651 if (isSPACE(*s))
29595ff2 8652 s = PEEKSPACE(s);
de3bb511 8653 if (isDIGIT(*s)) {
8903cb82 8654 while (isDIGIT(*s)) {
8655 if (d >= e)
cea2e8a9 8656 Perl_croak(aTHX_ ident_too_long);
378cc40b 8657 *d++ = *s++;
8903cb82 8658 }
378cc40b
LW
8659 }
8660 else {
463ee0b2 8661 for (;;) {
8903cb82 8662 if (d >= e)
cea2e8a9 8663 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8664 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8665 *d++ = *s++;
7e2040f0 8666 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8667 *d++ = ':';
8668 *d++ = ':';
8669 s++;
8670 }
a0d0e21e 8671 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8672 *d++ = *s++;
8673 *d++ = *s++;
8674 }
fd400ab9 8675 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8676 char *t = s + UTF8SKIP(s);
fd400ab9 8677 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8678 t += UTF8SKIP(t);
8679 if (d + (t - s) > e)
cea2e8a9 8680 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8681 Copy(s, d, t - s, char);
8682 d += t - s;
8683 s = t;
8684 }
463ee0b2
LW
8685 else
8686 break;
8687 }
378cc40b
LW
8688 }
8689 *d = '\0';
8690 d = dest;
79072805 8691 if (*d) {
3280af22
NIS
8692 if (PL_lex_state != LEX_NORMAL)
8693 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8694 return s;
378cc40b 8695 }
748a9306 8696 if (*s == '$' && s[1] &&
3792a11b 8697 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8698 {
4810e5ec 8699 return s;
5cd24f17 8700 }
79072805
LW
8701 if (*s == '{') {
8702 bracket = s;
8703 s++;
8704 }
8705 else if (ck_uni)
8706 check_uni();
93a17b20 8707 if (s < send)
79072805
LW
8708 *d = *s++;
8709 d[1] = '\0';
2b92dfce 8710 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8711 *d = toCTRL(*s);
8712 s++;
de3bb511 8713 }
79072805 8714 if (bracket) {
748a9306 8715 if (isSPACE(s[-1])) {
fa83b5b6 8716 while (s < send) {
f54cb97a 8717 const char ch = *s++;
bf4acbe4 8718 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8719 *d = ch;
8720 break;
8721 }
8722 }
748a9306 8723 }
7e2040f0 8724 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 8725 d++;
a0ed51b3 8726 if (UTF) {
6136c704
AL
8727 char *end = s;
8728 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8729 end += UTF8SKIP(end);
8730 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8731 end += UTF8SKIP(end);
a0ed51b3 8732 }
6136c704
AL
8733 Copy(s, d, end - s, char);
8734 d += end - s;
8735 s = end;
a0ed51b3
LW
8736 }
8737 else {
2b92dfce 8738 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8739 *d++ = *s++;
2b92dfce 8740 if (d >= e)
cea2e8a9 8741 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8742 }
79072805 8743 *d = '\0';
c35e046a
AL
8744 while (s < send && SPACE_OR_TAB(*s))
8745 s++;
ff68c719 8746 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8747 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8748 const char * const brack =
8749 (const char *)
8750 ((*s == '[') ? "[...]" : "{...}");
e850844c 8751 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8752 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8753 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8754 funny, dest, brack, funny, dest, brack);
8755 }
79072805 8756 bracket++;
a0be28da 8757 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8758 PL_lex_allbrackets++;
79072805
LW
8759 return s;
8760 }
4e553d73
NIS
8761 }
8762 /* Handle extended ${^Foo} variables
2b92dfce
GS
8763 * 1999-02-27 mjd-perl-patch@plover.com */
8764 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8765 && isALNUM(*s))
8766 {
8767 d++;
8768 while (isALNUM(*s) && d < e) {
8769 *d++ = *s++;
8770 }
8771 if (d >= e)
cea2e8a9 8772 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8773 *d = '\0';
79072805
LW
8774 }
8775 if (*s == '}') {
8776 s++;
7df0d042 8777 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8778 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8779 PL_expect = XREF;
8780 }
d008e5eb 8781 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8782 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8783 (keyword(dest, d - dest, 0)
8784 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8785 {
c35e046a
AL
8786 if (funny == '#')
8787 funny = '@';
9014280d 8788 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8789 "Ambiguous use of %c{%s} resolved to %c%s",
8790 funny, dest, funny, dest);
8791 }
8792 }
79072805
LW
8793 }
8794 else {
8795 s = bracket; /* let the parser handle it */
93a17b20 8796 *dest = '\0';
79072805
LW
8797 }
8798 }
3280af22
NIS
8799 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8800 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8801 return s;
8802}
8803
858a358b 8804static bool
3955e1a9 8805S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
8806
8807 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8808 * the parse starting at 's', based on the subset that are valid in this
8809 * context input to this routine in 'valid_flags'. Advances s. Returns
8810 * TRUE if the input was a valid flag, so the next char may be as well;
3955e1a9
KW
8811 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8812 * current regex. This routine will set it to any charset modifier found.
8813 * The caller shouldn't change it. This way, another charset modifier
8814 * encountered in the parse can be detected as an error, as we have decided
8815 * allow only one */
858a358b
KW
8816
8817 const char c = **s;
94b03d7d 8818
858a358b
KW
8819 if (! strchr(valid_flags, c)) {
8820 if (isALNUM(c)) {
94b03d7d 8821 goto deprecate;
858a358b
KW
8822 }
8823 return FALSE;
8824 }
8825
8826 switch (c) {
94b03d7d 8827
858a358b
KW
8828 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8829 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8830 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8831 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8832 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8833 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d
KW
8834 case LOCALE_PAT_MOD:
8835
8836 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8837 * can't be regex modifiers.
8838 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8839 * much as easily done. s///lei, for example, has to mean regex
8840 * modifiers if it's not an error (as does any word character
8841 * following the 'e'). Otherwise, we resolve to the backwards-
8842 * compatible, but less likely 's/// le ...', i.e. as meaning
8843 * less-than-or-equal. The reason it's not likely is that s//
967ac236
KW
8844 * returns a number for code in the field (/r returns a string, but
8845 * that wasn't added until the 5.13 series), and so '<=' should be
8846 * used for comparing, not 'le'. */
94b03d7d
KW
8847 if (*((*s) + 1) == 't') {
8848 goto deprecate;
8849 }
563734a5 8850 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
aeac89a5
KW
8851
8852 /* 'e' is valid only for substitutes, s///e. If it is not
8853 * valid in the current context, then 'm//le' must mean the
8854 * comparison operator, so use the regular deprecation message.
8855 */
8856 if (! strchr(valid_flags, 'e')) {
8857 goto deprecate;
8858 }
94b03d7d
KW
8859 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8860 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8861 return FALSE;
8862 }
3955e1a9
KW
8863 if (*charset) {
8864 goto multiple_charsets;
8865 }
94b03d7d 8866 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 8867 *charset = c;
94b03d7d
KW
8868 break;
8869 case UNICODE_PAT_MOD:
8870 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8871 * 'n' means they can't be regex modifiers */
8872 if (*((*s) + 1) == 'n') {
8873 goto deprecate;
8874 }
3955e1a9
KW
8875 if (*charset) {
8876 goto multiple_charsets;
8877 }
94b03d7d 8878 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 8879 *charset = c;
94b03d7d
KW
8880 break;
8881 case ASCII_RESTRICT_PAT_MOD:
8882 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8883 * can't be regex modifiers */
8884 if (*((*s) + 1) == 'n') {
8885 goto deprecate;
8886 }
ff3f26d2
KW
8887
8888 if (! *charset) {
94b03d7d
KW
8889 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8890 }
ff3f26d2
KW
8891 else {
8892
8893 /* Error if previous modifier wasn't an 'a', but if it was, see
8894 * if, and accept, a second occurrence (only) */
8895 if (*charset != 'a'
8896 || get_regex_charset(*pmfl)
8897 != REGEX_ASCII_RESTRICTED_CHARSET)
8898 {
8899 goto multiple_charsets;
8900 }
8901 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
8902 }
8903 *charset = c;
94b03d7d
KW
8904 break;
8905 case DEPENDS_PAT_MOD:
3955e1a9
KW
8906 if (*charset) {
8907 goto multiple_charsets;
8908 }
94b03d7d 8909 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 8910 *charset = c;
94b03d7d 8911 break;
879d0c72 8912 }
94b03d7d 8913
858a358b
KW
8914 (*s)++;
8915 return TRUE;
94b03d7d
KW
8916
8917 deprecate:
8918 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8919 "Having no space between pattern and following word is deprecated");
8920 return FALSE;
3955e1a9
KW
8921
8922 multiple_charsets:
8923 if (*charset != c) {
8924 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8925 }
ff3f26d2
KW
8926 else if (c == 'a') {
8927 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8928 }
3955e1a9
KW
8929 else {
8930 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8931 }
8932
8933 /* Pretend that it worked, so will continue processing before dieing */
8934 (*s)++;
8935 return TRUE;
879d0c72
NC
8936}
8937
76e3520e 8938STATIC char *
cea2e8a9 8939S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 8940{
97aff369 8941 dVAR;
79072805 8942 PMOP *pm;
5db06880 8943 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 8944 const char * const valid_flags =
a20207d7 8945 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 8946 char charset = '\0'; /* character set modifier */
5db06880
NC
8947#ifdef PERL_MAD
8948 char *modstart;
8949#endif
8950
7918f24d 8951 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 8952
25c09cbf 8953 if (!s) {
6136c704 8954 const char * const delimiter = skipspace(start);
10edeb5d
JH
8955 Perl_croak(aTHX_
8956 (const char *)
8957 (*delimiter == '?'
8958 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8959 : "Search pattern not terminated" ));
25c09cbf 8960 }
bbce6d69 8961
8782bef2 8962 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
8963 if (PL_multi_open == '?') {
8964 /* This is the only point in the code that sets PMf_ONCE: */
79072805 8965 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
8966
8967 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8968 allows us to restrict the list needed by reset to just the ??
8969 matches. */
8970 assert(type != OP_TRANS);
8971 if (PL_curstash) {
daba3364 8972 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
8973 U32 elements;
8974 if (!mg) {
daba3364 8975 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
8976 0);
8977 }
8978 elements = mg->mg_len / sizeof(PMOP**);
8979 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8980 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8981 mg->mg_len = elements * sizeof(PMOP**);
8982 PmopSTASH_set(pm,PL_curstash);
8983 }
8984 }
5db06880
NC
8985#ifdef PERL_MAD
8986 modstart = s;
8987#endif
3955e1a9 8988 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
8989#ifdef PERL_MAD
8990 if (PL_madskills && modstart != s) {
8991 SV* tmptoken = newSVpvn(modstart, s - modstart);
8992 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8993 }
8994#endif
4ac733c9 8995 /* issue a warning if /c is specified,but /g is not */
a2a5de95 8996 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 8997 {
a2a5de95
NC
8998 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8999 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9000 }
9001
3280af22 9002 PL_lex_op = (OP*)pm;
6154021b 9003 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9004 return s;
9005}
9006
76e3520e 9007STATIC char *
cea2e8a9 9008S_scan_subst(pTHX_ char *start)
79072805 9009{
27da23d5 9010 dVAR;
22594288 9011 char *s;
79072805 9012 register PMOP *pm;
4fdae800 9013 I32 first_start;
79072805 9014 I32 es = 0;
3955e1a9 9015 char charset = '\0'; /* character set modifier */
5db06880
NC
9016#ifdef PERL_MAD
9017 char *modstart;
9018#endif
79072805 9019
7918f24d
NC
9020 PERL_ARGS_ASSERT_SCAN_SUBST;
9021
6154021b 9022 pl_yylval.ival = OP_NULL;
79072805 9023
5db06880 9024 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9025
37fd879b 9026 if (!s)
cea2e8a9 9027 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9028
3280af22 9029 if (s[-1] == PL_multi_open)
79072805 9030 s--;
5db06880
NC
9031#ifdef PERL_MAD
9032 if (PL_madskills) {
cd81e915
NC
9033 CURMAD('q', PL_thisopen);
9034 CURMAD('_', PL_thiswhite);
9035 CURMAD('E', PL_thisstuff);
9036 CURMAD('Q', PL_thisclose);
9037 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9038 }
9039#endif
79072805 9040
3280af22 9041 first_start = PL_multi_start;
5db06880 9042 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9043 if (!s) {
37fd879b 9044 if (PL_lex_stuff) {
3280af22 9045 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9046 PL_lex_stuff = NULL;
37fd879b 9047 }
cea2e8a9 9048 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9049 }
3280af22 9050 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9051
79072805 9052 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9053
9054#ifdef PERL_MAD
9055 if (PL_madskills) {
cd81e915
NC
9056 CURMAD('z', PL_thisopen);
9057 CURMAD('R', PL_thisstuff);
9058 CURMAD('Z', PL_thisclose);
5db06880
NC
9059 }
9060 modstart = s;
9061#endif
9062
48c036b1 9063 while (*s) {
a20207d7 9064 if (*s == EXEC_PAT_MOD) {
a687059c 9065 s++;
2f3197b3 9066 es++;
a687059c 9067 }
3955e1a9
KW
9068 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9069 {
48c036b1 9070 break;
aa78b661 9071 }
378cc40b 9072 }
79072805 9073
5db06880
NC
9074#ifdef PERL_MAD
9075 if (PL_madskills) {
9076 if (modstart != s)
9077 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9078 append_madprops(PL_thismad, (OP*)pm, 0);
9079 PL_thismad = 0;
5db06880
NC
9080 }
9081#endif
a2a5de95
NC
9082 if ((pm->op_pmflags & PMf_CONTINUE)) {
9083 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9084 }
9085
79072805 9086 if (es) {
6136c704
AL
9087 SV * const repl = newSVpvs("");
9088
0244c3a4
GS
9089 PL_sublex_info.super_bufptr = s;
9090 PL_sublex_info.super_bufend = PL_bufend;
9091 PL_multi_end = 0;
79072805 9092 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9093 while (es-- > 0) {
9094 if (es)
9095 sv_catpvs(repl, "eval ");
9096 else
9097 sv_catpvs(repl, "do ");
9098 }
6f43d98f 9099 sv_catpvs(repl, "{");
3280af22 9100 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9101 if (strchr(SvPVX(PL_lex_repl), '#'))
9102 sv_catpvs(repl, "\n");
9103 sv_catpvs(repl, "}");
25da4f38 9104 SvEVALED_on(repl);
3280af22
NIS
9105 SvREFCNT_dec(PL_lex_repl);
9106 PL_lex_repl = repl;
378cc40b 9107 }
79072805 9108
3280af22 9109 PL_lex_op = (OP*)pm;
6154021b 9110 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9111 return s;
9112}
9113
76e3520e 9114STATIC char *
cea2e8a9 9115S_scan_trans(pTHX_ char *start)
378cc40b 9116{
97aff369 9117 dVAR;
a0d0e21e 9118 register char* s;
11343788 9119 OP *o;
79072805 9120 short *tbl;
b84c11c8
NC
9121 U8 squash;
9122 U8 del;
9123 U8 complement;
bb16bae8 9124 bool nondestruct = 0;
5db06880
NC
9125#ifdef PERL_MAD
9126 char *modstart;
9127#endif
79072805 9128
7918f24d
NC
9129 PERL_ARGS_ASSERT_SCAN_TRANS;
9130
6154021b 9131 pl_yylval.ival = OP_NULL;
79072805 9132
5db06880 9133 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 9134 if (!s)
cea2e8a9 9135 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9136
3280af22 9137 if (s[-1] == PL_multi_open)
2f3197b3 9138 s--;
5db06880
NC
9139#ifdef PERL_MAD
9140 if (PL_madskills) {
cd81e915
NC
9141 CURMAD('q', PL_thisopen);
9142 CURMAD('_', PL_thiswhite);
9143 CURMAD('E', PL_thisstuff);
9144 CURMAD('Q', PL_thisclose);
9145 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9146 }
9147#endif
2f3197b3 9148
5db06880 9149 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9150 if (!s) {
37fd879b 9151 if (PL_lex_stuff) {
3280af22 9152 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9153 PL_lex_stuff = NULL;
37fd879b 9154 }
cea2e8a9 9155 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9156 }
5db06880 9157 if (PL_madskills) {
cd81e915
NC
9158 CURMAD('z', PL_thisopen);
9159 CURMAD('R', PL_thisstuff);
9160 CURMAD('Z', PL_thisclose);
5db06880 9161 }
79072805 9162
a0ed51b3 9163 complement = del = squash = 0;
5db06880
NC
9164#ifdef PERL_MAD
9165 modstart = s;
9166#endif
7a1e2023
NC
9167 while (1) {
9168 switch (*s) {
9169 case 'c':
79072805 9170 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9171 break;
9172 case 'd':
a0ed51b3 9173 del = OPpTRANS_DELETE;
7a1e2023
NC
9174 break;
9175 case 's':
79072805 9176 squash = OPpTRANS_SQUASH;
7a1e2023 9177 break;
bb16bae8
FC
9178 case 'r':
9179 nondestruct = 1;
9180 break;
7a1e2023
NC
9181 default:
9182 goto no_more;
9183 }
395c3793
LW
9184 s++;
9185 }
7a1e2023 9186 no_more:
8973db79 9187
aa1f7c5b 9188 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9189 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9190 o->op_private &= ~OPpTRANS_ALL;
9191 o->op_private |= del|squash|complement|
7948272d
NIS
9192 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9193 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9194
3280af22 9195 PL_lex_op = o;
bb16bae8 9196 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9197
9198#ifdef PERL_MAD
9199 if (PL_madskills) {
9200 if (modstart != s)
9201 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9202 append_madprops(PL_thismad, o, 0);
9203 PL_thismad = 0;
5db06880
NC
9204 }
9205#endif
9206
79072805
LW
9207 return s;
9208}
9209
76e3520e 9210STATIC char *
cea2e8a9 9211S_scan_heredoc(pTHX_ register char *s)
79072805 9212{
97aff369 9213 dVAR;
79072805
LW
9214 SV *herewas;
9215 I32 op_type = OP_SCALAR;
9216 I32 len;
9217 SV *tmpstr;
9218 char term;
73d840c0 9219 const char *found_newline;
79072805 9220 register char *d;
fc36a67e 9221 register char *e;
4633a7c4 9222 char *peek;
f54cb97a 9223 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
9224#ifdef PERL_MAD
9225 I32 stuffstart = s - SvPVX(PL_linestr);
9226 char *tstart;
9227
cd81e915 9228 PL_realtokenstart = -1;
5db06880 9229#endif
79072805 9230
7918f24d
NC
9231 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9232
79072805 9233 s += 2;
3280af22
NIS
9234 d = PL_tokenbuf;
9235 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9236 if (!outer)
79072805 9237 *d++ = '\n';
c35e046a
AL
9238 peek = s;
9239 while (SPACE_OR_TAB(*peek))
9240 peek++;
3792a11b 9241 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9242 s = peek;
79072805 9243 term = *s++;
3280af22 9244 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9245 d += len;
3280af22 9246 if (s < PL_bufend)
79072805 9247 s++;
79072805
LW
9248 }
9249 else {
9250 if (*s == '\\')
9251 s++, term = '\'';
9252 else
9253 term = '"';
7e2040f0 9254 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9255 deprecate("bare << to mean <<\"\"");
7e2040f0 9256 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9257 if (d < e)
9258 *d++ = *s;
9259 }
9260 }
3280af22 9261 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9262 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9263 *d++ = '\n';
9264 *d = '\0';
3280af22 9265 len = d - PL_tokenbuf;
5db06880
NC
9266
9267#ifdef PERL_MAD
9268 if (PL_madskills) {
9269 tstart = PL_tokenbuf + !outer;
cd81e915 9270 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9271 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9272 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9273 stuffstart = s - SvPVX(PL_linestr);
9274 }
9275#endif
6a27c188 9276#ifndef PERL_STRICT_CR
f63a84b2
LW
9277 d = strchr(s, '\r');
9278 if (d) {
b464bac0 9279 char * const olds = s;
f63a84b2 9280 s = d;
3280af22 9281 while (s < PL_bufend) {
f63a84b2
LW
9282 if (*s == '\r') {
9283 *d++ = '\n';
9284 if (*++s == '\n')
9285 s++;
9286 }
9287 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9288 *d++ = *s++;
9289 s++;
9290 }
9291 else
9292 *d++ = *s++;
9293 }
9294 *d = '\0';
3280af22 9295 PL_bufend = d;
95a20fc0 9296 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9297 s = olds;
9298 }
9299#endif
5db06880
NC
9300#ifdef PERL_MAD
9301 found_newline = 0;
9302#endif
10edeb5d 9303 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9304 herewas = newSVpvn(s,PL_bufend-s);
9305 }
9306 else {
5db06880
NC
9307#ifdef PERL_MAD
9308 herewas = newSVpvn(s-1,found_newline-s+1);
9309#else
73d840c0
AL
9310 s--;
9311 herewas = newSVpvn(s,found_newline-s);
5db06880 9312#endif
73d840c0 9313 }
5db06880
NC
9314#ifdef PERL_MAD
9315 if (PL_madskills) {
9316 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9317 if (PL_thisstuff)
9318 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9319 else
cd81e915 9320 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9321 }
9322#endif
79072805 9323 s += SvCUR(herewas);
748a9306 9324
5db06880
NC
9325#ifdef PERL_MAD
9326 stuffstart = s - SvPVX(PL_linestr);
9327
9328 if (found_newline)
9329 s--;
9330#endif
9331
7d0a29fe
NC
9332 tmpstr = newSV_type(SVt_PVIV);
9333 SvGROW(tmpstr, 80);
748a9306 9334 if (term == '\'') {
79072805 9335 op_type = OP_CONST;
45977657 9336 SvIV_set(tmpstr, -1);
748a9306
LW
9337 }
9338 else if (term == '`') {
79072805 9339 op_type = OP_BACKTICK;
45977657 9340 SvIV_set(tmpstr, '\\');
748a9306 9341 }
79072805
LW
9342
9343 CLINE;
57843af0 9344 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9345 PL_multi_open = PL_multi_close = '<';
9346 term = *PL_tokenbuf;
0244c3a4 9347 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
9348 char * const bufptr = PL_sublex_info.super_bufptr;
9349 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9350 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9351 s = strchr(bufptr, '\n');
9352 if (!s)
9353 s = bufend;
9354 d = s;
9355 while (s < bufend &&
9356 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9357 if (*s++ == '\n')
57843af0 9358 CopLINE_inc(PL_curcop);
0244c3a4
GS
9359 }
9360 if (s >= bufend) {
eb160463 9361 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9362 missingterm(PL_tokenbuf);
9363 }
9364 sv_setpvn(herewas,bufptr,d-bufptr+1);
9365 sv_setpvn(tmpstr,d+1,s-d);
9366 s += len - 1;
9367 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9368 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9369
9370 s = olds;
9371 goto retval;
9372 }
9373 else if (!outer) {
79072805 9374 d = s;
3280af22
NIS
9375 while (s < PL_bufend &&
9376 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9377 if (*s++ == '\n')
57843af0 9378 CopLINE_inc(PL_curcop);
79072805 9379 }
3280af22 9380 if (s >= PL_bufend) {
eb160463 9381 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9382 missingterm(PL_tokenbuf);
79072805
LW
9383 }
9384 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9385#ifdef PERL_MAD
9386 if (PL_madskills) {
cd81e915
NC
9387 if (PL_thisstuff)
9388 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9389 else
cd81e915 9390 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9391 stuffstart = s - SvPVX(PL_linestr);
9392 }
9393#endif
79072805 9394 s += len - 1;
57843af0 9395 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9396
3280af22
NIS
9397 sv_catpvn(herewas,s,PL_bufend-s);
9398 sv_setsv(PL_linestr,herewas);
9399 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9400 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9401 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9402 }
9403 else
76f68e9b 9404 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9405 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9406#ifdef PERL_MAD
9407 if (PL_madskills) {
9408 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9409 if (PL_thisstuff)
9410 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9411 else
cd81e915 9412 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9413 }
9414#endif
f0e67a1d 9415 PL_bufptr = s;
17cc9359 9416 CopLINE_inc(PL_curcop);
f0e67a1d 9417 if (!outer || !lex_next_chunk(0)) {
eb160463 9418 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9419 missingterm(PL_tokenbuf);
79072805 9420 }
17cc9359 9421 CopLINE_dec(PL_curcop);
f0e67a1d 9422 s = PL_bufptr;
5db06880
NC
9423#ifdef PERL_MAD
9424 stuffstart = s - SvPVX(PL_linestr);
9425#endif
57843af0 9426 CopLINE_inc(PL_curcop);
3280af22 9427 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9428 PL_last_lop = PL_last_uni = NULL;
6a27c188 9429#ifndef PERL_STRICT_CR
3280af22 9430 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9431 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9432 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9433 {
3280af22
NIS
9434 PL_bufend[-2] = '\n';
9435 PL_bufend--;
95a20fc0 9436 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9437 }
3280af22
NIS
9438 else if (PL_bufend[-1] == '\r')
9439 PL_bufend[-1] = '\n';
f63a84b2 9440 }
3280af22
NIS
9441 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9442 PL_bufend[-1] = '\n';
f63a84b2 9443#endif
3280af22 9444 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9445 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9446 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9447 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9448 sv_catsv(PL_linestr,herewas);
9449 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9450 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9451 }
9452 else {
3280af22
NIS
9453 s = PL_bufend;
9454 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9455 }
9456 }
79072805 9457 s++;
0244c3a4 9458retval:
57843af0 9459 PL_multi_end = CopLINE(PL_curcop);
79072805 9460 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9461 SvPV_shrink_to_cur(tmpstr);
79072805 9462 }
8990e307 9463 SvREFCNT_dec(herewas);
2f31ce75 9464 if (!IN_BYTES) {
95a20fc0 9465 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9466 SvUTF8_on(tmpstr);
9467 else if (PL_encoding)
9468 sv_recode_to_utf8(tmpstr, PL_encoding);
9469 }
3280af22 9470 PL_lex_stuff = tmpstr;
6154021b 9471 pl_yylval.ival = op_type;
79072805
LW
9472 return s;
9473}
9474
02aa26ce
NT
9475/* scan_inputsymbol
9476 takes: current position in input buffer
9477 returns: new position in input buffer
6154021b 9478 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9479
9480 This code handles:
9481
9482 <> read from ARGV
9483 <FH> read from filehandle
9484 <pkg::FH> read from package qualified filehandle
9485 <pkg'FH> read from package qualified filehandle
9486 <$fh> read from filehandle in $fh
9487 <*.h> filename glob
9488
9489*/
9490
76e3520e 9491STATIC char *
cea2e8a9 9492S_scan_inputsymbol(pTHX_ char *start)
79072805 9493{
97aff369 9494 dVAR;
02aa26ce 9495 register char *s = start; /* current position in buffer */
1b420867 9496 char *end;
79072805 9497 I32 len;
6136c704
AL
9498 char *d = PL_tokenbuf; /* start of temp holding space */
9499 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9500
7918f24d
NC
9501 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9502
1b420867
GS
9503 end = strchr(s, '\n');
9504 if (!end)
9505 end = PL_bufend;
9506 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9507
9508 /* die if we didn't have space for the contents of the <>,
1b420867 9509 or if it didn't end, or if we see a newline
02aa26ce
NT
9510 */
9511
bb7a0f54 9512 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9513 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9514 if (s >= end)
cea2e8a9 9515 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9516
fc36a67e 9517 s++;
02aa26ce
NT
9518
9519 /* check for <$fh>
9520 Remember, only scalar variables are interpreted as filehandles by
9521 this code. Anything more complex (e.g., <$fh{$num}>) will be
9522 treated as a glob() call.
9523 This code makes use of the fact that except for the $ at the front,
9524 a scalar variable and a filehandle look the same.
9525 */
4633a7c4 9526 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9527
9528 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9529 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 9530 d++;
02aa26ce
NT
9531
9532 /* If we've tried to read what we allow filehandles to look like, and
9533 there's still text left, then it must be a glob() and not a getline.
9534 Use scan_str to pull out the stuff between the <> and treat it
9535 as nothing more than a string.
9536 */
9537
3280af22 9538 if (d - PL_tokenbuf != len) {
6154021b 9539 pl_yylval.ival = OP_GLOB;
5db06880 9540 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9541 if (!s)
cea2e8a9 9542 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9543 return s;
9544 }
395c3793 9545 else {
9b3023bc 9546 bool readline_overriden = FALSE;
6136c704 9547 GV *gv_readline;
9b3023bc 9548 GV **gvp;
02aa26ce 9549 /* we're in a filehandle read situation */
3280af22 9550 d = PL_tokenbuf;
02aa26ce
NT
9551
9552 /* turn <> into <ARGV> */
79072805 9553 if (!len)
689badd5 9554 Copy("ARGV",d,5,char);
02aa26ce 9555
9b3023bc 9556 /* Check whether readline() is overriden */
fafc274c 9557 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9558 if ((gv_readline
ba979b31 9559 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9560 ||
017a3ce5 9561 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9562 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9563 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9564 readline_overriden = TRUE;
9565
02aa26ce
NT
9566 /* if <$fh>, create the ops to turn the variable into a
9567 filehandle
9568 */
79072805 9569 if (*d == '$') {
02aa26ce
NT
9570 /* try to find it in the pad for this block, otherwise find
9571 add symbol table ops
9572 */
bc9b26ca 9573 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9574 if (tmp != NOT_IN_PAD) {
00b1698f 9575 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9576 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9577 HEK * const stashname = HvNAME_HEK(stash);
9578 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9579 sv_catpvs(sym, "::");
f558d5af
JH
9580 sv_catpv(sym, d+1);
9581 d = SvPVX(sym);
9582 goto intro_sym;
9583 }
9584 else {
6136c704 9585 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9586 o->op_targ = tmp;
9b3023bc
RGS
9587 PL_lex_op = readline_overriden
9588 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9589 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9590 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9591 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9592 }
a0d0e21e
LW
9593 }
9594 else {
f558d5af
JH
9595 GV *gv;
9596 ++d;
9597intro_sym:
9598 gv = gv_fetchpv(d,
9599 (PL_in_eval
9600 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 9601 : GV_ADDMULTI),
f558d5af 9602 SVt_PV);
9b3023bc
RGS
9603 PL_lex_op = readline_overriden
9604 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9605 op_append_elem(OP_LIST,
9b3023bc
RGS
9606 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9607 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9608 : (OP*)newUNOP(OP_READLINE, 0,
9609 newUNOP(OP_RV2SV, 0,
9610 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9611 }
7c6fadd6
RGS
9612 if (!readline_overriden)
9613 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9614 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9615 pl_yylval.ival = OP_NULL;
79072805 9616 }
02aa26ce
NT
9617
9618 /* If it's none of the above, it must be a literal filehandle
9619 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9620 else {
6136c704 9621 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
9622 PL_lex_op = readline_overriden
9623 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9624 op_append_elem(OP_LIST,
9b3023bc
RGS
9625 newGVOP(OP_GV, 0, gv),
9626 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9627 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9628 pl_yylval.ival = OP_NULL;
79072805
LW
9629 }
9630 }
02aa26ce 9631
79072805
LW
9632 return s;
9633}
9634
02aa26ce
NT
9635
9636/* scan_str
9637 takes: start position in buffer
09bef843
SB
9638 keep_quoted preserve \ on the embedded delimiter(s)
9639 keep_delims preserve the delimiters around the string
02aa26ce
NT
9640 returns: position to continue reading from buffer
9641 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9642 updates the read buffer.
9643
9644 This subroutine pulls a string out of the input. It is called for:
9645 q single quotes q(literal text)
9646 ' single quotes 'literal text'
9647 qq double quotes qq(interpolate $here please)
9648 " double quotes "interpolate $here please"
9649 qx backticks qx(/bin/ls -l)
9650 ` backticks `/bin/ls -l`
9651 qw quote words @EXPORT_OK = qw( func() $spam )
9652 m// regexp match m/this/
9653 s/// regexp substitute s/this/that/
9654 tr/// string transliterate tr/this/that/
9655 y/// string transliterate y/this/that/
9656 ($*@) sub prototypes sub foo ($)
09bef843 9657 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9658 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9659
9660 In most of these cases (all but <>, patterns and transliterate)
9661 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9662 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9663 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9664 calls scan_str().
4e553d73 9665
02aa26ce
NT
9666 It skips whitespace before the string starts, and treats the first
9667 character as the delimiter. If the delimiter is one of ([{< then
9668 the corresponding "close" character )]}> is used as the closing
9669 delimiter. It allows quoting of delimiters, and if the string has
9670 balanced delimiters ([{<>}]) it allows nesting.
9671
37fd879b
HS
9672 On success, the SV with the resulting string is put into lex_stuff or,
9673 if that is already non-NULL, into lex_repl. The second case occurs only
9674 when parsing the RHS of the special constructs s/// and tr/// (y///).
9675 For convenience, the terminating delimiter character is stuffed into
9676 SvIVX of the SV.
02aa26ce
NT
9677*/
9678
76e3520e 9679STATIC char *
09bef843 9680S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9681{
97aff369 9682 dVAR;
02aa26ce 9683 SV *sv; /* scalar value: string */
d3fcec1f 9684 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9685 register char *s = start; /* current position in the buffer */
9686 register char term; /* terminating character */
9687 register char *to; /* current position in the sv's data */
9688 I32 brackets = 1; /* bracket nesting level */
89491803 9689 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9690 I32 termcode; /* terminating char. code */
89ebb4a3 9691 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9692 STRLEN termlen; /* length of terminating string */
0331ef07 9693 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9694#ifdef PERL_MAD
9695 int stuffstart;
9696 char *tstart;
9697#endif
02aa26ce 9698
7918f24d
NC
9699 PERL_ARGS_ASSERT_SCAN_STR;
9700
02aa26ce 9701 /* skip space before the delimiter */
29595ff2
NC
9702 if (isSPACE(*s)) {
9703 s = PEEKSPACE(s);
9704 }
02aa26ce 9705
5db06880 9706#ifdef PERL_MAD
cd81e915
NC
9707 if (PL_realtokenstart >= 0) {
9708 stuffstart = PL_realtokenstart;
9709 PL_realtokenstart = -1;
5db06880
NC
9710 }
9711 else
9712 stuffstart = start - SvPVX(PL_linestr);
9713#endif
02aa26ce 9714 /* mark where we are, in case we need to report errors */
79072805 9715 CLINE;
02aa26ce
NT
9716
9717 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9718 term = *s;
220e2d4e
IH
9719 if (!UTF) {
9720 termcode = termstr[0] = term;
9721 termlen = 1;
9722 }
9723 else {
f3b9ce0f 9724 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9725 Copy(s, termstr, termlen, U8);
9726 if (!UTF8_IS_INVARIANT(term))
9727 has_utf8 = TRUE;
9728 }
b1c7b182 9729
02aa26ce 9730 /* mark where we are */
57843af0 9731 PL_multi_start = CopLINE(PL_curcop);
3280af22 9732 PL_multi_open = term;
02aa26ce
NT
9733
9734 /* find corresponding closing delimiter */
93a17b20 9735 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9736 termcode = termstr[0] = term = tmps[5];
9737
3280af22 9738 PL_multi_close = term;
79072805 9739
561b68a9
SH
9740 /* create a new SV to hold the contents. 79 is the SV's initial length.
9741 What a random number. */
7d0a29fe
NC
9742 sv = newSV_type(SVt_PVIV);
9743 SvGROW(sv, 80);
45977657 9744 SvIV_set(sv, termcode);
a0d0e21e 9745 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9746
9747 /* move past delimiter and try to read a complete string */
09bef843 9748 if (keep_delims)
220e2d4e
IH
9749 sv_catpvn(sv, s, termlen);
9750 s += termlen;
5db06880
NC
9751#ifdef PERL_MAD
9752 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9753 if (!PL_thisopen && !keep_delims) {
9754 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9755 stuffstart = s - SvPVX(PL_linestr);
9756 }
9757#endif
93a17b20 9758 for (;;) {
220e2d4e
IH
9759 if (PL_encoding && !UTF) {
9760 bool cont = TRUE;
9761
9762 while (cont) {
95a20fc0 9763 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9764 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9765 &offset, (char*)termstr, termlen);
6136c704
AL
9766 const char * const ns = SvPVX_const(PL_linestr) + offset;
9767 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9768
9769 for (; s < ns; s++) {
9770 if (*s == '\n' && !PL_rsfp)
9771 CopLINE_inc(PL_curcop);
9772 }
9773 if (!found)
9774 goto read_more_line;
9775 else {
9776 /* handle quoted delimiters */
52327caf 9777 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9778 const char *t;
95a20fc0 9779 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9780 t--;
9781 if ((svlast-1 - t) % 2) {
9782 if (!keep_quoted) {
9783 *(svlast-1) = term;
9784 *svlast = '\0';
9785 SvCUR_set(sv, SvCUR(sv) - 1);
9786 }
9787 continue;
9788 }
9789 }
9790 if (PL_multi_open == PL_multi_close) {
9791 cont = FALSE;
9792 }
9793 else {
f54cb97a
AL
9794 const char *t;
9795 char *w;
0331ef07 9796 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9797 /* At here, all closes are "was quoted" one,
9798 so we don't check PL_multi_close. */
9799 if (*t == '\\') {
9800 if (!keep_quoted && *(t+1) == PL_multi_open)
9801 t++;
9802 else
9803 *w++ = *t++;
9804 }
9805 else if (*t == PL_multi_open)
9806 brackets++;
9807
9808 *w = *t;
9809 }
9810 if (w < t) {
9811 *w++ = term;
9812 *w = '\0';
95a20fc0 9813 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9814 }
0331ef07 9815 last_off = w - SvPVX(sv);
220e2d4e
IH
9816 if (--brackets <= 0)
9817 cont = FALSE;
9818 }
9819 }
9820 }
9821 if (!keep_delims) {
9822 SvCUR_set(sv, SvCUR(sv) - 1);
9823 *SvEND(sv) = '\0';
9824 }
9825 break;
9826 }
9827
02aa26ce 9828 /* extend sv if need be */
3280af22 9829 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9830 /* set 'to' to the next character in the sv's string */
463ee0b2 9831 to = SvPVX(sv)+SvCUR(sv);
09bef843 9832
02aa26ce 9833 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9834 if (PL_multi_open == PL_multi_close) {
9835 for (; s < PL_bufend; s++,to++) {
02aa26ce 9836 /* embedded newlines increment the current line number */
3280af22 9837 if (*s == '\n' && !PL_rsfp)
57843af0 9838 CopLINE_inc(PL_curcop);
02aa26ce 9839 /* handle quoted delimiters */
3280af22 9840 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9841 if (!keep_quoted && s[1] == term)
a0d0e21e 9842 s++;
02aa26ce 9843 /* any other quotes are simply copied straight through */
a0d0e21e
LW
9844 else
9845 *to++ = *s++;
9846 }
02aa26ce
NT
9847 /* terminate when run out of buffer (the for() condition), or
9848 have found the terminator */
220e2d4e
IH
9849 else if (*s == term) {
9850 if (termlen == 1)
9851 break;
f3b9ce0f 9852 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
9853 break;
9854 }
63cd0674 9855 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9856 has_utf8 = TRUE;
93a17b20
LW
9857 *to = *s;
9858 }
9859 }
02aa26ce
NT
9860
9861 /* if the terminator isn't the same as the start character (e.g.,
9862 matched brackets), we have to allow more in the quoting, and
9863 be prepared for nested brackets.
9864 */
93a17b20 9865 else {
02aa26ce 9866 /* read until we run out of string, or we find the terminator */
3280af22 9867 for (; s < PL_bufend; s++,to++) {
02aa26ce 9868 /* embedded newlines increment the line count */
3280af22 9869 if (*s == '\n' && !PL_rsfp)
57843af0 9870 CopLINE_inc(PL_curcop);
02aa26ce 9871 /* backslashes can escape the open or closing characters */
3280af22 9872 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
9873 if (!keep_quoted &&
9874 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
9875 s++;
9876 else
9877 *to++ = *s++;
9878 }
02aa26ce 9879 /* allow nested opens and closes */
3280af22 9880 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 9881 break;
3280af22 9882 else if (*s == PL_multi_open)
93a17b20 9883 brackets++;
63cd0674 9884 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9885 has_utf8 = TRUE;
93a17b20
LW
9886 *to = *s;
9887 }
9888 }
02aa26ce 9889 /* terminate the copied string and update the sv's end-of-string */
93a17b20 9890 *to = '\0';
95a20fc0 9891 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 9892
02aa26ce
NT
9893 /*
9894 * this next chunk reads more into the buffer if we're not done yet
9895 */
9896
b1c7b182
GS
9897 if (s < PL_bufend)
9898 break; /* handle case where we are done yet :-) */
79072805 9899
6a27c188 9900#ifndef PERL_STRICT_CR
95a20fc0 9901 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
9902 if ((to[-2] == '\r' && to[-1] == '\n') ||
9903 (to[-2] == '\n' && to[-1] == '\r'))
9904 {
f63a84b2
LW
9905 to[-2] = '\n';
9906 to--;
95a20fc0 9907 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
9908 }
9909 else if (to[-1] == '\r')
9910 to[-1] = '\n';
9911 }
95a20fc0 9912 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
9913 to[-1] = '\n';
9914#endif
9915
220e2d4e 9916 read_more_line:
02aa26ce
NT
9917 /* if we're out of file, or a read fails, bail and reset the current
9918 line marker so we can report where the unterminated string began
9919 */
5db06880
NC
9920#ifdef PERL_MAD
9921 if (PL_madskills) {
c35e046a 9922 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9923 if (PL_thisstuff)
9924 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9925 else
cd81e915 9926 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9927 }
9928#endif
f0e67a1d
Z
9929 CopLINE_inc(PL_curcop);
9930 PL_bufptr = PL_bufend;
9931 if (!lex_next_chunk(0)) {
c07a80fd 9932 sv_free(sv);
eb160463 9933 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 9934 return NULL;
79072805 9935 }
f0e67a1d 9936 s = PL_bufptr;
5db06880
NC
9937#ifdef PERL_MAD
9938 stuffstart = 0;
9939#endif
378cc40b 9940 }
4e553d73 9941
02aa26ce
NT
9942 /* at this point, we have successfully read the delimited string */
9943
220e2d4e 9944 if (!PL_encoding || UTF) {
5db06880
NC
9945#ifdef PERL_MAD
9946 if (PL_madskills) {
c35e046a 9947 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 9948 const int len = s - tstart;
cd81e915 9949 if (PL_thisstuff)
c35e046a 9950 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9951 else
c35e046a 9952 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9953 if (!PL_thisclose && !keep_delims)
9954 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
9955 }
9956#endif
9957
220e2d4e
IH
9958 if (keep_delims)
9959 sv_catpvn(sv, s, termlen);
9960 s += termlen;
9961 }
5db06880
NC
9962#ifdef PERL_MAD
9963 else {
9964 if (PL_madskills) {
c35e046a
AL
9965 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9966 const int len = s - tstart - termlen;
cd81e915 9967 if (PL_thisstuff)
c35e046a 9968 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9969 else
c35e046a 9970 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9971 if (!PL_thisclose && !keep_delims)
9972 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
9973 }
9974 }
9975#endif
220e2d4e 9976 if (has_utf8 || PL_encoding)
b1c7b182 9977 SvUTF8_on(sv);
d0063567 9978
57843af0 9979 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
9980
9981 /* if we allocated too much space, give some back */
93a17b20
LW
9982 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9983 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 9984 SvPV_renew(sv, SvLEN(sv));
79072805 9985 }
02aa26ce
NT
9986
9987 /* decide whether this is the first or second quoted string we've read
9988 for this op
9989 */
4e553d73 9990
3280af22
NIS
9991 if (PL_lex_stuff)
9992 PL_lex_repl = sv;
79072805 9993 else
3280af22 9994 PL_lex_stuff = sv;
378cc40b
LW
9995 return s;
9996}
9997
02aa26ce
NT
9998/*
9999 scan_num
10000 takes: pointer to position in buffer
10001 returns: pointer to new position in buffer
6154021b 10002 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10003
10004 Read a number in any of the formats that Perl accepts:
10005
7fd134d9
JH
10006 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10007 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10008 0b[01](_?[01])*
10009 0[0-7](_?[0-7])*
10010 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10011
3280af22 10012 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10013 thing it reads.
10014
10015 If it reads a number without a decimal point or an exponent, it will
10016 try converting the number to an integer and see if it can do so
10017 without loss of precision.
10018*/
4e553d73 10019
378cc40b 10020char *
bfed75c6 10021Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10022{
97aff369 10023 dVAR;
bfed75c6 10024 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10025 register char *d; /* destination in temp buffer */
10026 register char *e; /* end of temp buffer */
86554af2 10027 NV nv; /* number read, as a double */
a0714e2c 10028 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10029 bool floatit; /* boolean: int or float? */
cbbf8932 10030 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10031 static char const number_too_long[] = "Number too long";
378cc40b 10032
7918f24d
NC
10033 PERL_ARGS_ASSERT_SCAN_NUM;
10034
02aa26ce
NT
10035 /* We use the first character to decide what type of number this is */
10036
378cc40b 10037 switch (*s) {
79072805 10038 default:
cea2e8a9 10039 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 10040
02aa26ce 10041 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10042 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10043 case '0':
10044 {
02aa26ce
NT
10045 /* variables:
10046 u holds the "number so far"
4f19785b
WSI
10047 shift the power of 2 of the base
10048 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10049 overflowed was the number more than we can hold?
10050
10051 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10052 we in octal/hex/binary?" indicator to disallow hex characters
10053 when in octal mode.
02aa26ce 10054 */
9e24b6e2
JH
10055 NV n = 0.0;
10056 UV u = 0;
79072805 10057 I32 shift;
9e24b6e2 10058 bool overflowed = FALSE;
61f33854 10059 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10060 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10061 static const char* const bases[5] =
10062 { "", "binary", "", "octal", "hexadecimal" };
10063 static const char* const Bases[5] =
10064 { "", "Binary", "", "Octal", "Hexadecimal" };
10065 static const char* const maxima[5] =
10066 { "",
10067 "0b11111111111111111111111111111111",
10068 "",
10069 "037777777777",
10070 "0xffffffff" };
bfed75c6 10071 const char *base, *Base, *max;
378cc40b 10072
02aa26ce 10073 /* check for hex */
a674e8db 10074 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10075 shift = 4;
10076 s += 2;
61f33854 10077 just_zero = FALSE;
a674e8db 10078 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10079 shift = 1;
10080 s += 2;
61f33854 10081 just_zero = FALSE;
378cc40b 10082 }
02aa26ce 10083 /* check for a decimal in disguise */
b78218b7 10084 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10085 goto decimal;
02aa26ce 10086 /* so it must be octal */
928753ea 10087 else {
378cc40b 10088 shift = 3;
928753ea
JH
10089 s++;
10090 }
10091
10092 if (*s == '_') {
a2a5de95 10093 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10094 "Misplaced _ in number");
10095 lastub = s++;
10096 }
9e24b6e2
JH
10097
10098 base = bases[shift];
10099 Base = Bases[shift];
10100 max = maxima[shift];
02aa26ce 10101
4f19785b 10102 /* read the rest of the number */
378cc40b 10103 for (;;) {
9e24b6e2 10104 /* x is used in the overflow test,
893fe2c2 10105 b is the digit we're adding on. */
9e24b6e2 10106 UV x, b;
55497cff 10107
378cc40b 10108 switch (*s) {
02aa26ce
NT
10109
10110 /* if we don't mention it, we're done */
378cc40b
LW
10111 default:
10112 goto out;
02aa26ce 10113
928753ea 10114 /* _ are ignored -- but warned about if consecutive */
de3bb511 10115 case '_':
a2a5de95
NC
10116 if (lastub && s == lastub + 1)
10117 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10118 "Misplaced _ in number");
928753ea 10119 lastub = s++;
de3bb511 10120 break;
02aa26ce
NT
10121
10122 /* 8 and 9 are not octal */
378cc40b 10123 case '8': case '9':
4f19785b 10124 if (shift == 3)
cea2e8a9 10125 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10126 /* FALL THROUGH */
02aa26ce
NT
10127
10128 /* octal digits */
4f19785b 10129 case '2': case '3': case '4':
378cc40b 10130 case '5': case '6': case '7':
4f19785b 10131 if (shift == 1)
cea2e8a9 10132 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10133 /* FALL THROUGH */
10134
10135 case '0': case '1':
02aa26ce 10136 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10137 goto digit;
02aa26ce
NT
10138
10139 /* hex digits */
378cc40b
LW
10140 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10141 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10142 /* make sure they said 0x */
378cc40b
LW
10143 if (shift != 4)
10144 goto out;
55497cff 10145 b = (*s++ & 7) + 9;
02aa26ce
NT
10146
10147 /* Prepare to put the digit we have onto the end
10148 of the number so far. We check for overflows.
10149 */
10150
55497cff 10151 digit:
61f33854 10152 just_zero = FALSE;
9e24b6e2
JH
10153 if (!overflowed) {
10154 x = u << shift; /* make room for the digit */
10155
10156 if ((x >> shift) != u
10157 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10158 overflowed = TRUE;
10159 n = (NV) u;
9b387841
NC
10160 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10161 "Integer overflow in %s number",
10162 base);
9e24b6e2
JH
10163 } else
10164 u = x | b; /* add the digit to the end */
10165 }
10166 if (overflowed) {
10167 n *= nvshift[shift];
10168 /* If an NV has not enough bits in its
10169 * mantissa to represent an UV this summing of
10170 * small low-order numbers is a waste of time
10171 * (because the NV cannot preserve the
10172 * low-order bits anyway): we could just
10173 * remember when did we overflow and in the
10174 * end just multiply n by the right
10175 * amount. */
10176 n += (NV) b;
55497cff 10177 }
378cc40b
LW
10178 break;
10179 }
10180 }
02aa26ce
NT
10181
10182 /* if we get here, we had success: make a scalar value from
10183 the number.
10184 */
378cc40b 10185 out:
928753ea
JH
10186
10187 /* final misplaced underbar check */
10188 if (s[-1] == '_') {
a2a5de95 10189 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10190 }
10191
9e24b6e2 10192 if (overflowed) {
a2a5de95
NC
10193 if (n > 4294967295.0)
10194 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10195 "%s number > %s non-portable",
10196 Base, max);
b081dd7e 10197 sv = newSVnv(n);
9e24b6e2
JH
10198 }
10199 else {
15041a67 10200#if UVSIZE > 4
a2a5de95
NC
10201 if (u > 0xffffffff)
10202 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10203 "%s number > %s non-portable",
10204 Base, max);
2cc4c2dc 10205#endif
b081dd7e 10206 sv = newSVuv(u);
9e24b6e2 10207 }
61f33854 10208 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10209 sv = new_constant(start, s - start, "integer",
eb0d8d16 10210 sv, NULL, NULL, 0);
61f33854 10211 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10212 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10213 }
10214 break;
02aa26ce
NT
10215
10216 /*
10217 handle decimal numbers.
10218 we're also sent here when we read a 0 as the first digit
10219 */
378cc40b
LW
10220 case '1': case '2': case '3': case '4': case '5':
10221 case '6': case '7': case '8': case '9': case '.':
10222 decimal:
3280af22
NIS
10223 d = PL_tokenbuf;
10224 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10225 floatit = FALSE;
02aa26ce
NT
10226
10227 /* read next group of digits and _ and copy into d */
de3bb511 10228 while (isDIGIT(*s) || *s == '_') {
4e553d73 10229 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10230 if -w is on
10231 */
93a17b20 10232 if (*s == '_') {
a2a5de95
NC
10233 if (lastub && s == lastub + 1)
10234 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10235 "Misplaced _ in number");
928753ea 10236 lastub = s++;
93a17b20 10237 }
fc36a67e 10238 else {
02aa26ce 10239 /* check for end of fixed-length buffer */
fc36a67e 10240 if (d >= e)
cea2e8a9 10241 Perl_croak(aTHX_ number_too_long);
02aa26ce 10242 /* if we're ok, copy the character */
378cc40b 10243 *d++ = *s++;
fc36a67e 10244 }
378cc40b 10245 }
02aa26ce
NT
10246
10247 /* final misplaced underbar check */
928753ea 10248 if (lastub && s == lastub + 1) {
a2a5de95 10249 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10250 }
02aa26ce
NT
10251
10252 /* read a decimal portion if there is one. avoid
10253 3..5 being interpreted as the number 3. followed
10254 by .5
10255 */
2f3197b3 10256 if (*s == '.' && s[1] != '.') {
79072805 10257 floatit = TRUE;
378cc40b 10258 *d++ = *s++;
02aa26ce 10259
928753ea 10260 if (*s == '_') {
a2a5de95
NC
10261 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10262 "Misplaced _ in number");
928753ea
JH
10263 lastub = s;
10264 }
10265
10266 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10267 */
fc36a67e 10268 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10269 /* fixed length buffer check */
fc36a67e 10270 if (d >= e)
cea2e8a9 10271 Perl_croak(aTHX_ number_too_long);
928753ea 10272 if (*s == '_') {
a2a5de95
NC
10273 if (lastub && s == lastub + 1)
10274 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10275 "Misplaced _ in number");
928753ea
JH
10276 lastub = s;
10277 }
10278 else
fc36a67e 10279 *d++ = *s;
378cc40b 10280 }
928753ea
JH
10281 /* fractional part ending in underbar? */
10282 if (s[-1] == '_') {
a2a5de95
NC
10283 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10284 "Misplaced _ in number");
928753ea 10285 }
dd629d5b
GS
10286 if (*s == '.' && isDIGIT(s[1])) {
10287 /* oops, it's really a v-string, but without the "v" */
f4758303 10288 s = start;
dd629d5b
GS
10289 goto vstring;
10290 }
378cc40b 10291 }
02aa26ce
NT
10292
10293 /* read exponent part, if present */
3792a11b 10294 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10295 floatit = TRUE;
10296 s++;
02aa26ce
NT
10297
10298 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10299 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10300
7fd134d9
JH
10301 /* stray preinitial _ */
10302 if (*s == '_') {
a2a5de95
NC
10303 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10304 "Misplaced _ in number");
7fd134d9
JH
10305 lastub = s++;
10306 }
10307
02aa26ce 10308 /* allow positive or negative exponent */
378cc40b
LW
10309 if (*s == '+' || *s == '-')
10310 *d++ = *s++;
02aa26ce 10311
7fd134d9
JH
10312 /* stray initial _ */
10313 if (*s == '_') {
a2a5de95
NC
10314 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10315 "Misplaced _ in number");
7fd134d9
JH
10316 lastub = s++;
10317 }
10318
7fd134d9
JH
10319 /* read digits of exponent */
10320 while (isDIGIT(*s) || *s == '_') {
10321 if (isDIGIT(*s)) {
10322 if (d >= e)
10323 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10324 *d++ = *s++;
7fd134d9
JH
10325 }
10326 else {
041457d9 10327 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10328 (!isDIGIT(s[1]) && s[1] != '_')))
10329 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10330 "Misplaced _ in number");
b3b48e3e 10331 lastub = s++;
7fd134d9 10332 }
7fd134d9 10333 }
378cc40b 10334 }
02aa26ce 10335
02aa26ce 10336
0b7fceb9 10337 /*
58bb9ec3
NC
10338 We try to do an integer conversion first if no characters
10339 indicating "float" have been found.
0b7fceb9
MU
10340 */
10341
10342 if (!floatit) {
58bb9ec3 10343 UV uv;
6136c704 10344 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10345
10346 if (flags == IS_NUMBER_IN_UV) {
10347 if (uv <= IV_MAX)
b081dd7e 10348 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10349 else
b081dd7e 10350 sv = newSVuv(uv);
58bb9ec3
NC
10351 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10352 if (uv <= (UV) IV_MIN)
b081dd7e 10353 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10354 else
10355 floatit = TRUE;
10356 } else
10357 floatit = TRUE;
10358 }
0b7fceb9 10359 if (floatit) {
58bb9ec3
NC
10360 /* terminate the string */
10361 *d = '\0';
86554af2 10362 nv = Atof(PL_tokenbuf);
b081dd7e 10363 sv = newSVnv(nv);
86554af2 10364 }
86554af2 10365
eb0d8d16
NC
10366 if ( floatit
10367 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10368 const char *const key = floatit ? "float" : "integer";
10369 const STRLEN keylen = floatit ? 5 : 7;
10370 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10371 key, keylen, sv, NULL, NULL, 0);
10372 }
378cc40b 10373 break;
0b7fceb9 10374
e312add1 10375 /* if it starts with a v, it could be a v-string */
a7cb1f99 10376 case 'v':
dd629d5b 10377vstring:
561b68a9 10378 sv = newSV(5); /* preallocate storage space */
65b06e02 10379 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10380 break;
79072805 10381 }
a687059c 10382
02aa26ce
NT
10383 /* make the op for the constant and return */
10384
a86a20aa 10385 if (sv)
b73d6f50 10386 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10387 else
5f66b61c 10388 lvalp->opval = NULL;
a687059c 10389
73d840c0 10390 return (char *)s;
378cc40b
LW
10391}
10392
76e3520e 10393STATIC char *
cea2e8a9 10394S_scan_formline(pTHX_ register char *s)
378cc40b 10395{
97aff369 10396 dVAR;
79072805 10397 register char *eol;
378cc40b 10398 register char *t;
6136c704 10399 SV * const stuff = newSVpvs("");
79072805 10400 bool needargs = FALSE;
c5ee2135 10401 bool eofmt = FALSE;
5db06880
NC
10402#ifdef PERL_MAD
10403 char *tokenstart = s;
4f61fd4b
JC
10404 SV* savewhite = NULL;
10405
5db06880 10406 if (PL_madskills) {
cd81e915
NC
10407 savewhite = PL_thiswhite;
10408 PL_thiswhite = 0;
5db06880
NC
10409 }
10410#endif
378cc40b 10411
7918f24d
NC
10412 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10413
79072805 10414 while (!needargs) {
a1b95068 10415 if (*s == '.') {
c35e046a 10416 t = s+1;
51882d45 10417#ifdef PERL_STRICT_CR
c35e046a
AL
10418 while (SPACE_OR_TAB(*t))
10419 t++;
51882d45 10420#else
c35e046a
AL
10421 while (SPACE_OR_TAB(*t) || *t == '\r')
10422 t++;
51882d45 10423#endif
c5ee2135
WL
10424 if (*t == '\n' || t == PL_bufend) {
10425 eofmt = TRUE;
79072805 10426 break;
c5ee2135 10427 }
79072805 10428 }
3280af22 10429 if (PL_in_eval && !PL_rsfp) {
07409e01 10430 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10431 if (!eol++)
3280af22 10432 eol = PL_bufend;
0f85fab0
LW
10433 }
10434 else
3280af22 10435 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10436 if (*s != '#') {
a0d0e21e
LW
10437 for (t = s; t < eol; t++) {
10438 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10439 needargs = FALSE;
10440 goto enough; /* ~~ must be first line in formline */
378cc40b 10441 }
a0d0e21e
LW
10442 if (*t == '@' || *t == '^')
10443 needargs = TRUE;
378cc40b 10444 }
7121b347
MG
10445 if (eol > s) {
10446 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10447#ifndef PERL_STRICT_CR
7121b347
MG
10448 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10449 char *end = SvPVX(stuff) + SvCUR(stuff);
10450 end[-2] = '\n';
10451 end[-1] = '\0';
b162af07 10452 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10453 }
2dc4c65b 10454#endif
7121b347
MG
10455 }
10456 else
10457 break;
79072805 10458 }
95a20fc0 10459 s = (char*)eol;
3280af22 10460 if (PL_rsfp) {
f0e67a1d 10461 bool got_some;
5db06880
NC
10462#ifdef PERL_MAD
10463 if (PL_madskills) {
cd81e915
NC
10464 if (PL_thistoken)
10465 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10466 else
cd81e915 10467 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10468 }
10469#endif
f0e67a1d
Z
10470 PL_bufptr = PL_bufend;
10471 CopLINE_inc(PL_curcop);
10472 got_some = lex_next_chunk(0);
10473 CopLINE_dec(PL_curcop);
10474 s = PL_bufptr;
5db06880 10475#ifdef PERL_MAD
f0e67a1d 10476 tokenstart = PL_bufptr;
5db06880 10477#endif
f0e67a1d 10478 if (!got_some)
378cc40b 10479 break;
378cc40b 10480 }
463ee0b2 10481 incline(s);
79072805 10482 }
a0d0e21e
LW
10483 enough:
10484 if (SvCUR(stuff)) {
3280af22 10485 PL_expect = XTERM;
79072805 10486 if (needargs) {
3280af22 10487 PL_lex_state = LEX_NORMAL;
cd81e915 10488 start_force(PL_curforce);
9ded7720 10489 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10490 force_next(',');
10491 }
a0d0e21e 10492 else
3280af22 10493 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10494 if (!IN_BYTES) {
95a20fc0 10495 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10496 SvUTF8_on(stuff);
10497 else if (PL_encoding)
10498 sv_recode_to_utf8(stuff, PL_encoding);
10499 }
cd81e915 10500 start_force(PL_curforce);
9ded7720 10501 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10502 force_next(THING);
cd81e915 10503 start_force(PL_curforce);
9ded7720 10504 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10505 force_next(LSTOP);
378cc40b 10506 }
79072805 10507 else {
8990e307 10508 SvREFCNT_dec(stuff);
c5ee2135
WL
10509 if (eofmt)
10510 PL_lex_formbrack = 0;
3280af22 10511 PL_bufptr = s;
79072805 10512 }
5db06880
NC
10513#ifdef PERL_MAD
10514 if (PL_madskills) {
cd81e915
NC
10515 if (PL_thistoken)
10516 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10517 else
cd81e915
NC
10518 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10519 PL_thiswhite = savewhite;
5db06880
NC
10520 }
10521#endif
79072805 10522 return s;
378cc40b 10523}
a687059c 10524
ba6d6ac9 10525I32
864dbfa3 10526Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10527{
97aff369 10528 dVAR;
a3b680e6 10529 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10530 CV* const outsidecv = PL_compcv;
8990e307 10531
3280af22
NIS
10532 if (PL_compcv) {
10533 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10534 }
7766f137 10535 SAVEI32(PL_subline);
3280af22 10536 save_item(PL_subname);
3280af22 10537 SAVESPTR(PL_compcv);
3280af22 10538
ea726b52 10539 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10540 CvFLAGS(PL_compcv) |= flags;
10541
57843af0 10542 PL_subline = CopLINE(PL_curcop);
dd2155a4 10543 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10544 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10545 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10546
8990e307
LW
10547 return oldsavestack_ix;
10548}
10549
084592ab
CN
10550#ifdef __SC__
10551#pragma segment Perl_yylex
10552#endif
af41e527
NC
10553static int
10554S_yywarn(pTHX_ const char *const s)
8990e307 10555{
97aff369 10556 dVAR;
7918f24d
NC
10557
10558 PERL_ARGS_ASSERT_YYWARN;
10559
faef0170 10560 PL_in_eval |= EVAL_WARNONLY;
748a9306 10561 yyerror(s);
faef0170 10562 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10563 return 0;
8990e307
LW
10564}
10565
10566int
15f169a1 10567Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10568{
97aff369 10569 dVAR;
bfed75c6
AL
10570 const char *where = NULL;
10571 const char *context = NULL;
68dc0745 10572 int contlen = -1;
46fc3d4c 10573 SV *msg;
5912531f 10574 int yychar = PL_parser->yychar;
463ee0b2 10575
7918f24d
NC
10576 PERL_ARGS_ASSERT_YYERROR;
10577
3280af22 10578 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10579 where = "at EOF";
8bcfe651
TM
10580 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10581 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10582 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10583 /*
10584 Only for NetWare:
10585 The code below is removed for NetWare because it abends/crashes on NetWare
10586 when the script has error such as not having the closing quotes like:
10587 if ($var eq "value)
10588 Checking of white spaces is anyway done in NetWare code.
10589 */
10590#ifndef NETWARE
3280af22
NIS
10591 while (isSPACE(*PL_oldoldbufptr))
10592 PL_oldoldbufptr++;
f355267c 10593#endif
3280af22
NIS
10594 context = PL_oldoldbufptr;
10595 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10596 }
8bcfe651
TM
10597 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10598 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10599 /*
10600 Only for NetWare:
10601 The code below is removed for NetWare because it abends/crashes on NetWare
10602 when the script has error such as not having the closing quotes like:
10603 if ($var eq "value)
10604 Checking of white spaces is anyway done in NetWare code.
10605 */
10606#ifndef NETWARE
3280af22
NIS
10607 while (isSPACE(*PL_oldbufptr))
10608 PL_oldbufptr++;
f355267c 10609#endif
3280af22
NIS
10610 context = PL_oldbufptr;
10611 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10612 }
10613 else if (yychar > 255)
68dc0745 10614 where = "next token ???";
12fbd33b 10615 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10616 if (PL_lex_state == LEX_NORMAL ||
10617 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10618 where = "at end of line";
3280af22 10619 else if (PL_lex_inpat)
68dc0745 10620 where = "within pattern";
463ee0b2 10621 else
68dc0745 10622 where = "within string";
463ee0b2 10623 }
46fc3d4c 10624 else {
84bafc02 10625 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10626 if (yychar < 32)
cea2e8a9 10627 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10628 else if (isPRINT_LC(yychar)) {
88c9ea1e 10629 const char string = yychar;
5e7aa789
NC
10630 sv_catpvn(where_sv, &string, 1);
10631 }
463ee0b2 10632 else
cea2e8a9 10633 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10634 where = SvPVX_const(where_sv);
463ee0b2 10635 }
46fc3d4c 10636 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10637 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10638 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10639 if (context)
cea2e8a9 10640 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10641 else
cea2e8a9 10642 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10643 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10644 Perl_sv_catpvf(aTHX_ msg,
57def98f 10645 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10646 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10647 PL_multi_end = 0;
a0d0e21e 10648 }
500960a6 10649 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10650 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10651 }
463ee0b2 10652 else
5a844595 10653 qerror(msg);
c7d6bfb2
GS
10654 if (PL_error_count >= 10) {
10655 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10656 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10657 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10658 else
10659 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10660 OutCopFILE(PL_curcop));
c7d6bfb2 10661 }
3280af22 10662 PL_in_my = 0;
5c284bb0 10663 PL_in_my_stash = NULL;
463ee0b2
LW
10664 return 0;
10665}
084592ab
CN
10666#ifdef __SC__
10667#pragma segment Main
10668#endif
4e35701f 10669
b250498f 10670STATIC char*
3ae08724 10671S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10672{
97aff369 10673 dVAR;
f54cb97a 10674 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10675
10676 PERL_ARGS_ASSERT_SWALLOW_BOM;
10677
7aa207d6 10678 switch (s[0]) {
4e553d73
NIS
10679 case 0xFF:
10680 if (s[1] == 0xFE) {
ee6ba15d 10681 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10682 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 10683 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10684#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10685 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10686 s += 2;
dea0fc0b 10687 if (PL_bufend > (char*)s) {
81a923f4 10688 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10689 }
b250498f 10690#else
ee6ba15d 10691 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10692#endif
01ec43d0
GS
10693 }
10694 break;
78ae23f5 10695 case 0xFE:
7aa207d6 10696 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10697#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10698 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10699 s += 2;
10700 if (PL_bufend > (char *)s) {
81a923f4 10701 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10702 }
b250498f 10703#else
ee6ba15d 10704 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10705#endif
01ec43d0
GS
10706 }
10707 break;
3ae08724
GS
10708 case 0xEF:
10709 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10710 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10711 s += 3; /* UTF-8 */
10712 }
10713 break;
10714 case 0:
7aa207d6
JH
10715 if (slen > 3) {
10716 if (s[1] == 0) {
10717 if (s[2] == 0xFE && s[3] == 0xFF) {
10718 /* UTF-32 big-endian */
ee6ba15d 10719 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10720 }
10721 }
10722 else if (s[2] == 0 && s[3] != 0) {
10723 /* Leading bytes
10724 * 00 xx 00 xx
10725 * are a good indicator of UTF-16BE. */
ee6ba15d 10726#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10727 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10728 s = add_utf16_textfilter(s, FALSE);
10729#else
10730 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10731#endif
7aa207d6 10732 }
01ec43d0 10733 }
e294cc5d
JH
10734#ifdef EBCDIC
10735 case 0xDD:
10736 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10737 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10738 s += 4; /* UTF-8 */
10739 }
10740 break;
10741#endif
10742
7aa207d6
JH
10743 default:
10744 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10745 /* Leading bytes
10746 * xx 00 xx 00
10747 * are a good indicator of UTF-16LE. */
ee6ba15d 10748#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10749 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10750 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
10751#else
10752 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10753#endif
7aa207d6 10754 }
01ec43d0 10755 }
b8f84bb2 10756 return (char*)s;
b250498f 10757}
4755096e 10758
6e3aabd6
GS
10759
10760#ifndef PERL_NO_UTF16_FILTER
10761static I32
a28af015 10762S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10763{
97aff369 10764 dVAR;
f3040f2c 10765 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10766 /* We re-use this each time round, throwing the contents away before we
10767 return. */
2a773401 10768 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10769 SV *const utf8_buffer = filter;
c28d6105 10770 IV status = IoPAGE(filter);
f2338a2e 10771 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10772 I32 retval;
c8b0cbae 10773
c85ae797
NC
10774 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10775
c8b0cbae
NC
10776 /* As we're automatically added, at the lowest level, and hence only called
10777 from this file, we can be sure that we're not called in block mode. Hence
10778 don't bother writing code to deal with block mode. */
10779 if (maxlen) {
10780 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10781 }
c28d6105
NC
10782 if (status < 0) {
10783 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10784 }
1de9afcd 10785 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10786 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10787 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10788 reverse ? 'l' : 'b', idx, maxlen, status,
10789 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10790
10791 while (1) {
10792 STRLEN chars;
10793 STRLEN have;
dea0fc0b 10794 I32 newlen;
2a773401 10795 U8 *end;
c28d6105
NC
10796 /* First, look in our buffer of existing UTF-8 data: */
10797 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10798
10799 if (nl) {
10800 ++nl;
10801 } else if (status == 0) {
10802 /* EOF */
10803 IoPAGE(filter) = 0;
10804 nl = SvEND(utf8_buffer);
10805 }
10806 if (nl) {
d2d1d4de
NC
10807 STRLEN got = nl - SvPVX(utf8_buffer);
10808 /* Did we have anything to append? */
10809 retval = got != 0;
10810 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10811 /* Everything else in this code works just fine if SVp_POK isn't
10812 set. This, however, needs it, and we need it to work, else
10813 we loop infinitely because the buffer is never consumed. */
10814 sv_chop(utf8_buffer, nl);
10815 break;
10816 }
ba77e4cc 10817
c28d6105
NC
10818 /* OK, not a complete line there, so need to read some more UTF-16.
10819 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10820 while (1) {
10821 if (status <= 0)
10822 break;
10823 if (SvCUR(utf16_buffer) >= 2) {
10824 /* Location of the high octet of the last complete code point.
10825 Gosh, UTF-16 is a pain. All the benefits of variable length,
10826 *coupled* with all the benefits of partial reads and
10827 endianness. */
10828 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10829 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10830
10831 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10832 break;
10833 }
10834
10835 /* We have the first half of a surrogate. Read more. */
10836 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10837 }
c28d6105 10838
c28d6105
NC
10839 status = FILTER_READ(idx + 1, utf16_buffer,
10840 160 + (SvCUR(utf16_buffer) & 1));
10841 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 10842 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
10843 if (status < 0) {
10844 /* Error */
10845 IoPAGE(filter) = status;
10846 return status;
10847 }
10848 }
10849
10850 chars = SvCUR(utf16_buffer) >> 1;
10851 have = SvCUR(utf8_buffer);
10852 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 10853
aa6dbd60 10854 if (reverse) {
c28d6105
NC
10855 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10856 (U8*)SvPVX_const(utf8_buffer) + have,
10857 chars * 2, &newlen);
aa6dbd60 10858 } else {
2a773401 10859 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
10860 (U8*)SvPVX_const(utf8_buffer) + have,
10861 chars * 2, &newlen);
2a773401 10862 }
c28d6105 10863 SvCUR_set(utf8_buffer, have + newlen);
2a773401 10864 *end = '\0';
c28d6105 10865
e07286ed
NC
10866 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10867 it's private to us, and utf16_to_utf8{,reversed} take a
10868 (pointer,length) pair, rather than a NUL-terminated string. */
10869 if(SvCUR(utf16_buffer) & 1) {
10870 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10871 SvCUR_set(utf16_buffer, 1);
10872 } else {
10873 SvCUR_set(utf16_buffer, 0);
10874 }
2a773401 10875 }
c28d6105
NC
10876 DEBUG_P(PerlIO_printf(Perl_debug_log,
10877 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10878 status,
10879 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10880 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 10881 return retval;
6e3aabd6 10882}
81a923f4
NC
10883
10884static U8 *
10885S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10886{
2a773401 10887 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 10888
c85ae797
NC
10889 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10890
c28d6105 10891 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 10892 sv_setpvs(filter, "");
2a773401 10893 IoLINES(filter) = reversed;
c28d6105
NC
10894 IoPAGE(filter) = 1; /* Not EOF */
10895
10896 /* Sadly, we have to return a valid pointer, come what may, so we have to
10897 ignore any error return from this. */
10898 SvCUR_set(PL_linestr, 0);
10899 if (FILTER_READ(0, PL_linestr, 0)) {
10900 SvUTF8_on(PL_linestr);
81a923f4 10901 } else {
c28d6105 10902 SvUTF8_on(PL_linestr);
81a923f4 10903 }
c28d6105 10904 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
10905 return (U8*)SvPVX(PL_linestr);
10906}
6e3aabd6 10907#endif
9f4817db 10908
f333445c
JP
10909/*
10910Returns a pointer to the next character after the parsed
10911vstring, as well as updating the passed in sv.
10912
10913Function must be called like
10914
561b68a9 10915 sv = newSV(5);
65b06e02 10916 s = scan_vstring(s,e,sv);
f333445c 10917
65b06e02 10918where s and e are the start and end of the string.
f333445c
JP
10919The sv should already be large enough to store the vstring
10920passed in, for performance reasons.
10921
10922*/
10923
10924char *
15f169a1 10925Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 10926{
97aff369 10927 dVAR;
bfed75c6
AL
10928 const char *pos = s;
10929 const char *start = s;
7918f24d
NC
10930
10931 PERL_ARGS_ASSERT_SCAN_VSTRING;
10932
f333445c 10933 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 10934 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 10935 pos++;
f333445c
JP
10936 if ( *pos != '.') {
10937 /* this may not be a v-string if followed by => */
bfed75c6 10938 const char *next = pos;
65b06e02 10939 while (next < e && isSPACE(*next))
8fc7bb1c 10940 ++next;
65b06e02 10941 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
10942 /* return string not v-string */
10943 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 10944 return (char *)pos;
f333445c
JP
10945 }
10946 }
10947
10948 if (!isALPHA(*pos)) {
89ebb4a3 10949 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 10950
d4c19fe8
AL
10951 if (*s == 'v')
10952 s++; /* get past 'v' */
f333445c 10953
76f68e9b 10954 sv_setpvs(sv, "");
f333445c
JP
10955
10956 for (;;) {
d4c19fe8 10957 /* this is atoi() that tolerates underscores */
0bd48802
AL
10958 U8 *tmpend;
10959 UV rev = 0;
d4c19fe8
AL
10960 const char *end = pos;
10961 UV mult = 1;
10962 while (--end >= s) {
10963 if (*end != '_') {
10964 const UV orev = rev;
f333445c
JP
10965 rev += (*end - '0') * mult;
10966 mult *= 10;
9b387841
NC
10967 if (orev > rev)
10968 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10969 "Integer overflow in decimal number");
f333445c
JP
10970 }
10971 }
10972#ifdef EBCDIC
10973 if (rev > 0x7FFFFFFF)
10974 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10975#endif
10976 /* Append native character for the rev point */
10977 tmpend = uvchr_to_utf8(tmpbuf, rev);
10978 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10979 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10980 SvUTF8_on(sv);
65b06e02 10981 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
10982 s = ++pos;
10983 else {
10984 s = pos;
10985 break;
10986 }
65b06e02 10987 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
10988 pos++;
10989 }
10990 SvPOK_on(sv);
10991 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10992 SvRMAGICAL_on(sv);
10993 }
73d840c0 10994 return (char *)s;
f333445c
JP
10995}
10996
88e1f1a2
JV
10997int
10998Perl_keyword_plugin_standard(pTHX_
10999 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11000{
11001 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11002 PERL_UNUSED_CONTEXT;
11003 PERL_UNUSED_ARG(keyword_ptr);
11004 PERL_UNUSED_ARG(keyword_len);
11005 PERL_UNUSED_ARG(op_ptr);
11006 return KEYWORD_PLUGIN_DECLINE;
11007}
11008
78cdf107 11009#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11010static void
78cdf107 11011S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11012{
11013 SAVEI32(PL_lex_brackets);
11014 if (PL_lex_brackets > 100)
11015 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11016 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11017 SAVEI32(PL_lex_allbrackets);
11018 PL_lex_allbrackets = 0;
11019 SAVEI8(PL_lex_fakeeof);
2dcac756 11020 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11021 if(yyparse(gramtype) && !PL_parser->error_count)
11022 qerror(Perl_mess(aTHX_ "Parse error"));
11023}
11024
78cdf107 11025#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11026static OP *
78cdf107 11027S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11028{
11029 OP *o;
11030 ENTER;
11031 SAVEVPTR(PL_eval_root);
11032 PL_eval_root = NULL;
78cdf107 11033 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11034 o = PL_eval_root;
11035 LEAVE;
11036 return o;
11037}
11038
78cdf107
Z
11039#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11040static OP *
11041S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11042{
11043 OP *exprop;
11044 if (flags & ~PARSE_OPTIONAL)
11045 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11046 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11047 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11048 if (!PL_parser->error_count)
11049 qerror(Perl_mess(aTHX_ "Parse error"));
11050 exprop = newOP(OP_NULL, 0);
11051 }
11052 return exprop;
11053}
11054
11055/*
11056=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11057
11058Parse a Perl arithmetic expression. This may contain operators of precedence
11059down to the bit shift operators. The expression must be followed (and thus
11060terminated) either by a comparison or lower-precedence operator or by
11061something that would normally terminate an expression such as semicolon.
11062If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11063otherwise it is mandatory. It is up to the caller to ensure that the
11064dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11065the source of the code to be parsed and the lexical context for the
11066expression.
11067
11068The op tree representing the expression is returned. If an optional
11069expression is absent, a null pointer is returned, otherwise the pointer
11070will be non-null.
11071
11072If an error occurs in parsing or compilation, in most cases a valid op
11073tree is returned anyway. The error is reflected in the parser state,
11074normally resulting in a single exception at the top level of parsing
11075which covers all the compilation errors that occurred. Some compilation
11076errors, however, will throw an exception immediately.
11077
11078=cut
11079*/
11080
11081OP *
11082Perl_parse_arithexpr(pTHX_ U32 flags)
11083{
11084 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11085}
11086
11087/*
11088=for apidoc Amx|OP *|parse_termexpr|U32 flags
11089
11090Parse a Perl term expression. This may contain operators of precedence
11091down to the assignment operators. The expression must be followed (and thus
11092terminated) either by a comma or lower-precedence operator or by
11093something that would normally terminate an expression such as semicolon.
11094If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11095otherwise it is mandatory. It is up to the caller to ensure that the
11096dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11097the source of the code to be parsed and the lexical context for the
11098expression.
11099
11100The op tree representing the expression is returned. If an optional
11101expression is absent, a null pointer is returned, otherwise the pointer
11102will be non-null.
11103
11104If an error occurs in parsing or compilation, in most cases a valid op
11105tree is returned anyway. The error is reflected in the parser state,
11106normally resulting in a single exception at the top level of parsing
11107which covers all the compilation errors that occurred. Some compilation
11108errors, however, will throw an exception immediately.
11109
11110=cut
11111*/
11112
11113OP *
11114Perl_parse_termexpr(pTHX_ U32 flags)
11115{
11116 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11117}
11118
11119/*
11120=for apidoc Amx|OP *|parse_listexpr|U32 flags
11121
11122Parse a Perl list expression. This may contain operators of precedence
11123down to the comma operator. The expression must be followed (and thus
11124terminated) either by a low-precedence logic operator such as C<or> or by
11125something that would normally terminate an expression such as semicolon.
11126If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11127otherwise it is mandatory. It is up to the caller to ensure that the
11128dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11129the source of the code to be parsed and the lexical context for the
11130expression.
11131
11132The op tree representing the expression is returned. If an optional
11133expression is absent, a null pointer is returned, otherwise the pointer
11134will be non-null.
11135
11136If an error occurs in parsing or compilation, in most cases a valid op
11137tree is returned anyway. The error is reflected in the parser state,
11138normally resulting in a single exception at the top level of parsing
11139which covers all the compilation errors that occurred. Some compilation
11140errors, however, will throw an exception immediately.
11141
11142=cut
11143*/
11144
11145OP *
11146Perl_parse_listexpr(pTHX_ U32 flags)
11147{
11148 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11149}
11150
11151/*
11152=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11153
11154Parse a single complete Perl expression. This allows the full
11155expression grammar, including the lowest-precedence operators such
11156as C<or>. The expression must be followed (and thus terminated) by a
11157token that an expression would normally be terminated by: end-of-file,
11158closing bracketing punctuation, semicolon, or one of the keywords that
11159signals a postfix expression-statement modifier. If I<flags> includes
11160C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11161mandatory. It is up to the caller to ensure that the dynamic parser
11162state (L</PL_parser> et al) is correctly set to reflect the source of
11163the code to be parsed and the lexical context for the expression.
11164
11165The op tree representing the expression is returned. If an optional
11166expression is absent, a null pointer is returned, otherwise the pointer
11167will be non-null.
11168
11169If an error occurs in parsing or compilation, in most cases a valid op
11170tree is returned anyway. The error is reflected in the parser state,
11171normally resulting in a single exception at the top level of parsing
11172which covers all the compilation errors that occurred. Some compilation
11173errors, however, will throw an exception immediately.
11174
11175=cut
11176*/
11177
11178OP *
11179Perl_parse_fullexpr(pTHX_ U32 flags)
11180{
11181 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11182}
11183
e53d8f76
Z
11184/*
11185=for apidoc Amx|OP *|parse_block|U32 flags
11186
11187Parse a single complete Perl code block. This consists of an opening
11188brace, a sequence of statements, and a closing brace. The block
11189constitutes a lexical scope, so C<my> variables and various compile-time
11190effects can be contained within it. It is up to the caller to ensure
11191that the dynamic parser state (L</PL_parser> et al) is correctly set to
11192reflect the source of the code to be parsed and the lexical context for
11193the statement.
11194
11195The op tree representing the code block is returned. This is always a
11196real op, never a null pointer. It will normally be a C<lineseq> list,
11197including C<nextstate> or equivalent ops. No ops to construct any kind
11198of runtime scope are included by virtue of it being a block.
11199
11200If an error occurs in parsing or compilation, in most cases a valid op
11201tree (most likely null) is returned anyway. The error is reflected in
11202the parser state, normally resulting in a single exception at the top
11203level of parsing which covers all the compilation errors that occurred.
11204Some compilation errors, however, will throw an exception immediately.
11205
11206The I<flags> parameter is reserved for future use, and must always
11207be zero.
11208
11209=cut
11210*/
11211
11212OP *
11213Perl_parse_block(pTHX_ U32 flags)
11214{
11215 if (flags)
11216 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11217 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11218}
11219
1da4ca5f 11220/*
8359b381
Z
11221=for apidoc Amx|OP *|parse_barestmt|U32 flags
11222
11223Parse a single unadorned Perl statement. This may be a normal imperative
11224statement or a declaration that has compile-time effect. It does not
11225include any label or other affixture. It is up to the caller to ensure
11226that the dynamic parser state (L</PL_parser> et al) is correctly set to
11227reflect the source of the code to be parsed and the lexical context for
11228the statement.
11229
11230The op tree representing the statement is returned. This may be a
11231null pointer if the statement is null, for example if it was actually
11232a subroutine definition (which has compile-time side effects). If not
11233null, it will be ops directly implementing the statement, suitable to
11234pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11235equivalent op (except for those embedded in a scope contained entirely
11236within the statement).
11237
11238If an error occurs in parsing or compilation, in most cases a valid op
11239tree (most likely null) is returned anyway. The error is reflected in
11240the parser state, normally resulting in a single exception at the top
11241level of parsing which covers all the compilation errors that occurred.
11242Some compilation errors, however, will throw an exception immediately.
11243
11244The I<flags> parameter is reserved for future use, and must always
11245be zero.
11246
11247=cut
11248*/
11249
11250OP *
11251Perl_parse_barestmt(pTHX_ U32 flags)
11252{
11253 if (flags)
11254 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11255 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11256}
11257
11258/*
361d9b55
Z
11259=for apidoc Amx|SV *|parse_label|U32 flags
11260
11261Parse a single label, possibly optional, of the type that may prefix a
11262Perl statement. It is up to the caller to ensure that the dynamic parser
11263state (L</PL_parser> et al) is correctly set to reflect the source of
11264the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11265label is optional, otherwise it is mandatory.
11266
11267The name of the label is returned in the form of a fresh scalar. If an
11268optional label is absent, a null pointer is returned.
11269
11270If an error occurs in parsing, which can only occur if the label is
11271mandatory, a valid label is returned anyway. The error is reflected in
11272the parser state, normally resulting in a single exception at the top
11273level of parsing which covers all the compilation errors that occurred.
11274
11275=cut
11276*/
11277
11278SV *
11279Perl_parse_label(pTHX_ U32 flags)
11280{
11281 if (flags & ~PARSE_OPTIONAL)
11282 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11283 if (PL_lex_state == LEX_KNOWNEXT) {
11284 PL_parser->yychar = yylex();
11285 if (PL_parser->yychar == LABEL) {
11286 char *lpv = pl_yylval.pval;
11287 STRLEN llen = strlen(lpv);
11288 SV *lsv;
11289 PL_parser->yychar = YYEMPTY;
11290 lsv = newSV_type(SVt_PV);
11291 SvPV_set(lsv, lpv);
11292 SvCUR_set(lsv, llen);
11293 SvLEN_set(lsv, llen+1);
11294 SvPOK_on(lsv);
11295 return lsv;
11296 } else {
11297 yyunlex();
11298 goto no_label;
11299 }
11300 } else {
11301 char *s, *t;
11302 U8 c;
11303 STRLEN wlen, bufptr_pos;
11304 lex_read_space(0);
11305 t = s = PL_bufptr;
11306 c = (U8)*s;
11307 if (!isIDFIRST_A(c))
11308 goto no_label;
11309 do {
11310 c = (U8)*++t;
11311 } while(isWORDCHAR_A(c));
11312 wlen = t - s;
11313 if (word_takes_any_delimeter(s, wlen))
11314 goto no_label;
11315 bufptr_pos = s - SvPVX(PL_linestr);
11316 PL_bufptr = t;
11317 lex_read_space(LEX_KEEP_PREVIOUS);
11318 t = PL_bufptr;
11319 s = SvPVX(PL_linestr) + bufptr_pos;
11320 if (t[0] == ':' && t[1] != ':') {
11321 PL_oldoldbufptr = PL_oldbufptr;
11322 PL_oldbufptr = s;
11323 PL_bufptr = t+1;
11324 return newSVpvn(s, wlen);
11325 } else {
11326 PL_bufptr = s;
11327 no_label:
11328 if (flags & PARSE_OPTIONAL) {
11329 return NULL;
11330 } else {
11331 qerror(Perl_mess(aTHX_ "Parse error"));
11332 return newSVpvs("x");
11333 }
11334 }
11335 }
11336}
11337
11338/*
28ac2b49
Z
11339=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11340
11341Parse a single complete Perl statement. This may be a normal imperative
8359b381 11342statement or a declaration that has compile-time effect, and may include
8e720305 11343optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11344parser state (L</PL_parser> et al) is correctly set to reflect the source
11345of the code to be parsed and the lexical context for the statement.
11346
11347The op tree representing the statement is returned. This may be a
11348null pointer if the statement is null, for example if it was actually
11349a subroutine definition (which has compile-time side effects). If not
11350null, it will be the result of a L</newSTATEOP> call, normally including
11351a C<nextstate> or equivalent op.
11352
11353If an error occurs in parsing or compilation, in most cases a valid op
11354tree (most likely null) is returned anyway. The error is reflected in
11355the parser state, normally resulting in a single exception at the top
11356level of parsing which covers all the compilation errors that occurred.
11357Some compilation errors, however, will throw an exception immediately.
11358
11359The I<flags> parameter is reserved for future use, and must always
11360be zero.
11361
11362=cut
11363*/
11364
11365OP *
11366Perl_parse_fullstmt(pTHX_ U32 flags)
11367{
28ac2b49
Z
11368 if (flags)
11369 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11370 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11371}
11372
07ffcb73
Z
11373/*
11374=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11375
11376Parse a sequence of zero or more Perl statements. These may be normal
11377imperative statements, including optional labels, or declarations
11378that have compile-time effect, or any mixture thereof. The statement
11379sequence ends when a closing brace or end-of-file is encountered in a
11380place where a new statement could have validly started. It is up to
11381the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11382is correctly set to reflect the source of the code to be parsed and the
11383lexical context for the statements.
11384
11385The op tree representing the statement sequence is returned. This may
11386be a null pointer if the statements were all null, for example if there
11387were no statements or if there were only subroutine definitions (which
11388have compile-time side effects). If not null, it will be a C<lineseq>
11389list, normally including C<nextstate> or equivalent ops.
11390
11391If an error occurs in parsing or compilation, in most cases a valid op
11392tree is returned anyway. The error is reflected in the parser state,
11393normally resulting in a single exception at the top level of parsing
11394which covers all the compilation errors that occurred. Some compilation
11395errors, however, will throw an exception immediately.
11396
11397The I<flags> parameter is reserved for future use, and must always
11398be zero.
11399
11400=cut
11401*/
11402
11403OP *
11404Perl_parse_stmtseq(pTHX_ U32 flags)
11405{
11406 OP *stmtseqop;
e53d8f76 11407 I32 c;
07ffcb73 11408 if (flags)
78cdf107
Z
11409 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11410 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11411 c = lex_peek_unichar(0);
11412 if (c != -1 && c != /*{*/'}')
07ffcb73 11413 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11414 return stmtseqop;
11415}
11416
ea25a9b2 11417void
f7e3d326 11418Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11419{
f7e3d326 11420 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11421 deprecate("qw(...) as parentheses");
78cdf107 11422 force_next((4<<24)|')');
ea25a9b2
Z
11423 if (qwlist->op_type == OP_STUB) {
11424 op_free(qwlist);
11425 }
11426 else {
3d8e05a0 11427 start_force(PL_curforce);
ea25a9b2
Z
11428 NEXTVAL_NEXTTOKE.opval = qwlist;
11429 force_next(THING);
11430 }
78cdf107 11431 force_next((2<<24)|'(');
ea25a9b2
Z
11432}
11433
28ac2b49 11434/*
1da4ca5f
NC
11435 * Local variables:
11436 * c-indentation-style: bsd
11437 * c-basic-offset: 4
11438 * indent-tabs-mode: t
11439 * End:
11440 *
37442d52
RGS
11441 * ex: set ts=8 sts=4 sw=4 noet:
11442 */