This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
make reg_eval_scope.t TODOs consistently fail
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
2d2e263d 227 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
228 * BOop : bitwise or or xor
229 * BAop : bitwise and
230 * SHop : shift operator
231 * PWop : power operator
9cbb5ea2 232 * PMop : pattern-matching operator
ffb4593c
NT
233 * Aop : addition-level operator
234 * Mop : multiplication-level operator
235 * Eop : equality-testing operator
e5edeb50 236 * Rop : relational operator <= != gt
ffb4593c
NT
237 *
238 * Also see LOP and lop() below.
239 */
240
998054bd 241#ifdef DEBUGGING /* Serve -DT. */
704d4215 242# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 243#else
bbf60fe6 244# define REPORT(retval) (retval)
998054bd
SC
245#endif
246
bbf60fe6
DM
247#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
248#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
249#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
250#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
251#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
252#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
253#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
254#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
255#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
256#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
257#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
258#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
259#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
260#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
261#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
262#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
263#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
264#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
265#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
266#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 267
a687059c
LW
268/* This bit of chicanery makes a unary function followed by
269 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
270 * The UNIDOR macro is for unary functions that can be followed by the //
271 * operator (such as C<shift // 0>).
a687059c 272 */
376fcdbf 273#define UNI2(f,x) { \
6154021b 274 pl_yylval.ival = f; \
376fcdbf
AL
275 PL_expect = x; \
276 PL_bufptr = s; \
277 PL_last_uni = PL_oldbufptr; \
278 PL_last_lop_op = f; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 }
6f33ba73
RGS
284#define UNI(f) UNI2(f,XTERM)
285#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 286
376fcdbf 287#define UNIBRACK(f) { \
6154021b 288 pl_yylval.ival = f; \
376fcdbf
AL
289 PL_bufptr = s; \
290 PL_last_uni = PL_oldbufptr; \
291 if (*s == '(') \
292 return REPORT( (int)FUNC1 ); \
29595ff2 293 s = PEEKSPACE(s); \
376fcdbf
AL
294 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
295 }
79072805 296
9f68db38 297/* grandfather return to old style */
78cdf107
Z
298#define OLDLOP(f) \
299 do { \
300 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
302 pl_yylval.ival = (f); \
303 PL_expect = XTERM; \
304 PL_bufptr = s; \
305 return (int)LSTOP; \
306 } while(0)
79072805 307
8fa7f367
JH
308#ifdef DEBUGGING
309
6154021b 310/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
311enum token_type {
312 TOKENTYPE_NONE,
313 TOKENTYPE_IVAL,
6154021b 314 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
315 TOKENTYPE_PVAL,
316 TOKENTYPE_OPVAL,
317 TOKENTYPE_GVVAL
318};
319
6d4a66ac
NC
320static struct debug_tokens {
321 const int token;
322 enum token_type type;
323 const char *name;
324} const debug_tokens[] =
9041c2e3 325{
bbf60fe6
DM
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
347 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
348 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
349 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
350 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
351 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 352 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
353 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
354 { IF, TOKENTYPE_IVAL, "IF" },
355 { LABEL, TOKENTYPE_PVAL, "LABEL" },
356 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
357 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
358 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
359 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
360 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
361 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
362 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
363 { MY, TOKENTYPE_IVAL, "MY" },
364 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
365 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
366 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
367 { OROP, TOKENTYPE_IVAL, "OROP" },
368 { OROR, TOKENTYPE_NONE, "OROR" },
369 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
370 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
371 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
372 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
373 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
374 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
375 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
376 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
377 { PREINC, TOKENTYPE_NONE, "PREINC" },
378 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
379 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
380 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
381 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
382 { SUB, TOKENTYPE_NONE, "SUB" },
383 { THING, TOKENTYPE_OPVAL, "THING" },
384 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
385 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
386 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
387 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
388 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
389 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 390 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
391 { WHILE, TOKENTYPE_IVAL, "WHILE" },
392 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 393 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 394 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
395};
396
6154021b 397/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 398
bbf60fe6 399STATIC int
704d4215 400S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 401{
97aff369 402 dVAR;
7918f24d
NC
403
404 PERL_ARGS_ASSERT_TOKEREPORT;
405
bbf60fe6 406 if (DEBUG_T_TEST) {
bd61b366 407 const char *name = NULL;
bbf60fe6 408 enum token_type type = TOKENTYPE_NONE;
f54cb97a 409 const struct debug_tokens *p;
396482e1 410 SV* const report = newSVpvs("<== ");
bbf60fe6 411
f54cb97a 412 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
413 if (p->token == (int)rv) {
414 name = p->name;
415 type = p->type;
416 break;
417 }
418 }
419 if (name)
54667de8 420 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
421 else if ((char)rv > ' ' && (char)rv < '~')
422 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
423 else if (!rv)
396482e1 424 sv_catpvs(report, "EOF");
bbf60fe6
DM
425 else
426 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
427 switch (type) {
428 case TOKENTYPE_NONE:
429 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
430 break;
431 case TOKENTYPE_IVAL:
704d4215 432 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
433 break;
434 case TOKENTYPE_OPNUM:
435 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 436 PL_op_name[lvalp->ival]);
bbf60fe6
DM
437 break;
438 case TOKENTYPE_PVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPVAL:
704d4215 442 if (lvalp->opval) {
401441c0 443 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
444 PL_op_name[lvalp->opval->op_type]);
445 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 446 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 447 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
448 }
449
450 }
401441c0 451 else
396482e1 452 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
453 break;
454 }
b6007c36 455 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
456 };
457 return (int)rv;
998054bd
SC
458}
459
b6007c36
DM
460
461/* print the buffer with suitable escapes */
462
463STATIC void
15f169a1 464S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 465{
396482e1 466 SV* const tmp = newSVpvs("");
7918f24d
NC
467
468 PERL_ARGS_ASSERT_PRINTBUF;
469
b6007c36
DM
470 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
471 SvREFCNT_dec(tmp);
472}
473
8fa7f367
JH
474#endif
475
8290c323
NC
476static int
477S_deprecate_commaless_var_list(pTHX) {
478 PL_expect = XTERM;
479 deprecate("comma-less variable list");
480 return REPORT(','); /* grandfather non-comma-format format */
481}
482
ffb4593c
NT
483/*
484 * S_ao
485 *
c963b151
BD
486 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
487 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
488 */
489
76e3520e 490STATIC int
cea2e8a9 491S_ao(pTHX_ int toketype)
a0d0e21e 492{
97aff369 493 dVAR;
3280af22
NIS
494 if (*PL_bufptr == '=') {
495 PL_bufptr++;
a0d0e21e 496 if (toketype == ANDAND)
6154021b 497 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 498 else if (toketype == OROR)
6154021b 499 pl_yylval.ival = OP_ORASSIGN;
c963b151 500 else if (toketype == DORDOR)
6154021b 501 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
502 toketype = ASSIGNOP;
503 }
504 return toketype;
505}
506
ffb4593c
NT
507/*
508 * S_no_op
509 * When Perl expects an operator and finds something else, no_op
510 * prints the warning. It always prints "<something> found where
511 * operator expected. It prints "Missing semicolon on previous line?"
512 * if the surprise occurs at the start of the line. "do you need to
513 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
514 * where the compiler doesn't know if foo is a method call or a function.
515 * It prints "Missing operator before end of line" if there's nothing
516 * after the missing operator, or "... before <...>" if there is something
517 * after the missing operator.
518 */
519
76e3520e 520STATIC void
15f169a1 521S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 522{
97aff369 523 dVAR;
9d4ba2ae
AL
524 char * const oldbp = PL_bufptr;
525 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 526
7918f24d
NC
527 PERL_ARGS_ASSERT_NO_OP;
528
1189a94a
GS
529 if (!s)
530 s = oldbp;
07c798fb 531 else
1189a94a 532 PL_bufptr = s;
cea2e8a9 533 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
534 if (ckWARN_d(WARN_SYNTAX)) {
535 if (is_first)
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Missing semicolon on previous line?)\n");
538 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 539 const char *t;
c35e046a
AL
540 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
541 NOOP;
56da5a46
RGS
542 if (t < PL_bufptr && isSPACE(*t))
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Do you need to predeclare %.*s?)\n",
551405c4 545 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
546 }
547 else {
548 assert(s >= oldbp);
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 550 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 551 }
07c798fb 552 }
3280af22 553 PL_bufptr = oldbp;
8990e307
LW
554}
555
ffb4593c
NT
556/*
557 * S_missingterm
558 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 559 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
560 * If we're in a delimited string and the delimiter is a control
561 * character, it's reformatted into a two-char sequence like ^C.
562 * This is fatal.
563 */
564
76e3520e 565STATIC void
cea2e8a9 566S_missingterm(pTHX_ char *s)
8990e307 567{
97aff369 568 dVAR;
8990e307
LW
569 char tmpbuf[3];
570 char q;
571 if (s) {
9d4ba2ae 572 char * const nl = strrchr(s,'\n');
d2719217 573 if (nl)
8990e307
LW
574 *nl = '\0';
575 }
463559e7 576 else if (isCNTRL(PL_multi_close)) {
8990e307 577 *tmpbuf = '^';
585ec06d 578 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
579 tmpbuf[2] = '\0';
580 s = tmpbuf;
581 }
582 else {
eb160463 583 *tmpbuf = (char)PL_multi_close;
8990e307
LW
584 tmpbuf[1] = '\0';
585 s = tmpbuf;
586 }
587 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 588 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 589}
79072805 590
0d863452 591/*
0d863452
RH
592 * Check whether the named feature is enabled.
593 */
26ea9e12
NC
594bool
595Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 596{
97aff369 597 dVAR;
0d863452 598 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 599 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
600
601 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
602
26ea9e12
NC
603 if (namelen > MAX_FEATURE_LEN)
604 return FALSE;
4a731d7b 605 memcpy(&he_name[8], name, namelen);
d4c19fe8 606
7b9ef140 607 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
608}
609
ffb4593c 610/*
9cbb5ea2
GS
611 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
612 * utf16-to-utf8-reversed.
ffb4593c
NT
613 */
614
c39cd008
GS
615#ifdef PERL_CR_FILTER
616static void
617strip_return(SV *sv)
618{
95a20fc0 619 register const char *s = SvPVX_const(sv);
9d4ba2ae 620 register const char * const e = s + SvCUR(sv);
7918f24d
NC
621
622 PERL_ARGS_ASSERT_STRIP_RETURN;
623
c39cd008
GS
624 /* outer loop optimized to do nothing if there are no CR-LFs */
625 while (s < e) {
626 if (*s++ == '\r' && *s == '\n') {
627 /* hit a CR-LF, need to copy the rest */
628 register char *d = s - 1;
629 *d++ = *s++;
630 while (s < e) {
631 if (*s == '\r' && s[1] == '\n')
632 s++;
633 *d++ = *s++;
634 }
635 SvCUR(sv) -= s - d;
636 return;
637 }
638 }
639}
a868473f 640
76e3520e 641STATIC I32
c39cd008 642S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 643{
f54cb97a 644 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
645 if (count > 0 && !maxlen)
646 strip_return(sv);
647 return count;
a868473f
NIS
648}
649#endif
650
ffb4593c 651/*
8eaa0acf
Z
652=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
653
654Creates and initialises a new lexer/parser state object, supplying
655a context in which to lex and parse from a new source of Perl code.
656A pointer to the new state object is placed in L</PL_parser>. An entry
657is made on the save stack so that upon unwinding the new state object
658will be destroyed and the former value of L</PL_parser> will be restored.
659Nothing else need be done to clean up the parsing context.
660
661The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
662non-null, provides a string (in SV form) containing code to be parsed.
663A copy of the string is made, so subsequent modification of I<line>
664does not affect parsing. I<rsfp>, if non-null, provides an input stream
665from which code will be read to be parsed. If both are non-null, the
666code in I<line> comes first and must consist of complete lines of input,
667and I<rsfp> supplies the remainder of the source.
668
669The I<flags> parameter is reserved for future use, and must always
670be zero.
671
672=cut
673*/
ffb4593c 674
a0d0e21e 675void
8eaa0acf 676Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 677{
97aff369 678 dVAR;
6ef55633 679 const char *s = NULL;
8990e307 680 STRLEN len;
5486870f 681 yy_parser *parser, *oparser;
8eaa0acf
Z
682 if (flags)
683 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
684
685 /* create and initialise a parser */
686
199e78b7 687 Newxz(parser, 1, yy_parser);
5486870f 688 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
689 PL_parser = parser;
690
28ac2b49
Z
691 parser->stack = NULL;
692 parser->ps = NULL;
693 parser->stack_size = 0;
acdf0a21 694
e3abe207
DM
695 /* on scope exit, free this parser and restore any outer one */
696 SAVEPARSER(parser);
7c4baf47 697 parser->saved_curcop = PL_curcop;
e3abe207 698
acdf0a21 699 /* initialise lexer state */
8990e307 700
fb205e7a
DM
701#ifdef PERL_MAD
702 parser->curforce = -1;
703#else
704 parser->nexttoke = 0;
705#endif
ca4cfd28 706 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 707 parser->copline = NOLINE;
5afb0a62 708 parser->lex_state = LEX_NORMAL;
c2598295 709 parser->expect = XSTATE;
2f9285f8 710 parser->rsfp = rsfp;
f07ec6dd 711 parser->rsfp_filters = newAV();
2f9285f8 712
199e78b7
DM
713 Newx(parser->lex_brackstack, 120, char);
714 Newx(parser->lex_casestack, 12, char);
715 *parser->lex_casestack = '\0';
02b34bbe 716
10efb74f
NC
717 if (line) {
718 s = SvPV_const(line, len);
719 } else {
720 len = 0;
721 }
bdc0bf6f 722
10efb74f 723 if (!len) {
bdc0bf6f 724 parser->linestr = newSVpvs("\n;");
805700c1 725 } else {
719a9bb0 726 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 727 if (s[len-1] != ';')
bdc0bf6f 728 sv_catpvs(parser->linestr, "\n;");
8990e307 729 }
f06b5848
DM
730 parser->oldoldbufptr =
731 parser->oldbufptr =
732 parser->bufptr =
733 parser->linestart = SvPVX(parser->linestr);
734 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
735 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
736
737 parser->in_pod = 0;
79072805 738}
a687059c 739
e3abe207
DM
740
741/* delete a parser object */
742
743void
744Perl_parser_free(pTHX_ const yy_parser *parser)
745{
7918f24d
NC
746 PERL_ARGS_ASSERT_PARSER_FREE;
747
7c4baf47 748 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
749 SvREFCNT_dec(parser->linestr);
750
2f9285f8
DM
751 if (parser->rsfp == PerlIO_stdin())
752 PerlIO_clearerr(parser->rsfp);
799361c3
SH
753 else if (parser->rsfp && (!parser->old_parser ||
754 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 755 PerlIO_close(parser->rsfp);
5486870f 756 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 757
e3abe207
DM
758 Safefree(parser->lex_brackstack);
759 Safefree(parser->lex_casestack);
760 PL_parser = parser->old_parser;
761 Safefree(parser);
762}
763
764
ffb4593c 765/*
f0e67a1d
Z
766=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
767
768Buffer scalar containing the chunk currently under consideration of the
769text currently being lexed. This is always a plain string scalar (for
770which C<SvPOK> is true). It is not intended to be used as a scalar by
771normal scalar means; instead refer to the buffer directly by the pointer
772variables described below.
773
774The lexer maintains various C<char*> pointers to things in the
775C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
776reallocated, all of these pointers must be updated. Don't attempt to
777do this manually, but rather use L</lex_grow_linestr> if you need to
778reallocate the buffer.
779
780The content of the text chunk in the buffer is commonly exactly one
781complete line of input, up to and including a newline terminator,
782but there are situations where it is otherwise. The octets of the
783buffer may be intended to be interpreted as either UTF-8 or Latin-1.
784The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
785flag on this scalar, which may disagree with it.
786
787For direct examination of the buffer, the variable
788L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
789lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
790of these pointers is usually preferable to examination of the scalar
791through normal scalar means.
792
793=for apidoc AmxU|char *|PL_parser-E<gt>bufend
794
795Direct pointer to the end of the chunk of text currently being lexed, the
796end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
797+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
798always located at the end of the buffer, and does not count as part of
799the buffer's contents.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
802
803Points to the current position of lexing inside the lexer buffer.
804Characters around this point may be freely examined, within
805the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
806L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
807interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
808
809Lexing code (whether in the Perl core or not) moves this pointer past
810the characters that it consumes. It is also expected to perform some
811bookkeeping whenever a newline character is consumed. This movement
812can be more conveniently performed by the function L</lex_read_to>,
813which handles newlines appropriately.
814
815Interpretation of the buffer's octets can be abstracted out by
816using the slightly higher-level functions L</lex_peek_unichar> and
817L</lex_read_unichar>.
818
819=for apidoc AmxU|char *|PL_parser-E<gt>linestart
820
821Points to the start of the current line inside the lexer buffer.
822This is useful for indicating at which column an error occurred, and
823not much else. This must be updated by any lexing code that consumes
824a newline; the function L</lex_read_to> handles this detail.
825
826=cut
827*/
828
829/*
830=for apidoc Amx|bool|lex_bufutf8
831
832Indicates whether the octets in the lexer buffer
833(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
834of Unicode characters. If not, they should be interpreted as Latin-1
835characters. This is analogous to the C<SvUTF8> flag for scalars.
836
837In UTF-8 mode, it is not guaranteed that the lexer buffer actually
838contains valid UTF-8. Lexing code must be robust in the face of invalid
839encoding.
840
841The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
842is significant, but not the whole story regarding the input character
843encoding. Normally, when a file is being read, the scalar contains octets
844and its C<SvUTF8> flag is off, but the octets should be interpreted as
845UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
846however, the scalar may have the C<SvUTF8> flag on, and in this case its
847octets should be interpreted as UTF-8 unless the C<use bytes> pragma
848is in effect. This logic may change in the future; use this function
849instead of implementing the logic yourself.
850
851=cut
852*/
853
854bool
855Perl_lex_bufutf8(pTHX)
856{
857 return UTF;
858}
859
860/*
861=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
862
863Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
864at least I<len> octets (including terminating NUL). Returns a
865pointer to the reallocated buffer. This is necessary before making
866any direct modification of the buffer that would increase its length.
867L</lex_stuff_pvn> provides a more convenient way to insert text into
868the buffer.
869
870Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
871this function updates all of the lexer's variables that point directly
872into the buffer.
873
874=cut
875*/
876
877char *
878Perl_lex_grow_linestr(pTHX_ STRLEN len)
879{
880 SV *linestr;
881 char *buf;
882 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
883 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
884 linestr = PL_parser->linestr;
885 buf = SvPVX(linestr);
886 if (len <= SvLEN(linestr))
887 return buf;
888 bufend_pos = PL_parser->bufend - buf;
889 bufptr_pos = PL_parser->bufptr - buf;
890 oldbufptr_pos = PL_parser->oldbufptr - buf;
891 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
892 linestart_pos = PL_parser->linestart - buf;
893 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
894 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
895 buf = sv_grow(linestr, len);
896 PL_parser->bufend = buf + bufend_pos;
897 PL_parser->bufptr = buf + bufptr_pos;
898 PL_parser->oldbufptr = buf + oldbufptr_pos;
899 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
900 PL_parser->linestart = buf + linestart_pos;
901 if (PL_parser->last_uni)
902 PL_parser->last_uni = buf + last_uni_pos;
903 if (PL_parser->last_lop)
904 PL_parser->last_lop = buf + last_lop_pos;
905 return buf;
906}
907
908/*
83aa740e 909=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
910
911Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
912immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
913reallocating the buffer if necessary. This means that lexing code that
914runs later will see the characters as if they had appeared in the input.
915It is not recommended to do this as part of normal parsing, and most
916uses of this facility run the risk of the inserted characters being
917interpreted in an unintended manner.
918
919The string to be inserted is represented by I<len> octets starting
920at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
921according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
922The characters are recoded for the lexer buffer, according to how the
923buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 924to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
925function is more convenient.
926
927=cut
928*/
929
930void
83aa740e 931Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 932{
749123ff 933 dVAR;
f0e67a1d
Z
934 char *bufptr;
935 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
936 if (flags & ~(LEX_STUFF_UTF8))
937 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
938 if (UTF) {
939 if (flags & LEX_STUFF_UTF8) {
940 goto plain_copy;
941 } else {
942 STRLEN highhalf = 0;
83aa740e 943 const char *p, *e = pv+len;
f0e67a1d
Z
944 for (p = pv; p != e; p++)
945 highhalf += !!(((U8)*p) & 0x80);
946 if (!highhalf)
947 goto plain_copy;
948 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
949 bufptr = PL_parser->bufptr;
950 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
951 SvCUR_set(PL_parser->linestr,
952 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
953 PL_parser->bufend += len+highhalf;
954 for (p = pv; p != e; p++) {
955 U8 c = (U8)*p;
956 if (c & 0x80) {
957 *bufptr++ = (char)(0xc0 | (c >> 6));
958 *bufptr++ = (char)(0x80 | (c & 0x3f));
959 } else {
960 *bufptr++ = (char)c;
961 }
962 }
963 }
964 } else {
965 if (flags & LEX_STUFF_UTF8) {
966 STRLEN highhalf = 0;
83aa740e 967 const char *p, *e = pv+len;
f0e67a1d
Z
968 for (p = pv; p != e; p++) {
969 U8 c = (U8)*p;
970 if (c >= 0xc4) {
971 Perl_croak(aTHX_ "Lexing code attempted to stuff "
972 "non-Latin-1 character into Latin-1 input");
973 } else if (c >= 0xc2 && p+1 != e &&
974 (((U8)p[1]) & 0xc0) == 0x80) {
975 p++;
976 highhalf++;
977 } else if (c >= 0x80) {
978 /* malformed UTF-8 */
979 ENTER;
980 SAVESPTR(PL_warnhook);
981 PL_warnhook = PERL_WARNHOOK_FATAL;
982 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
983 LEAVE;
984 }
985 }
986 if (!highhalf)
987 goto plain_copy;
988 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
989 bufptr = PL_parser->bufptr;
990 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
991 SvCUR_set(PL_parser->linestr,
992 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
993 PL_parser->bufend += len-highhalf;
994 for (p = pv; p != e; p++) {
995 U8 c = (U8)*p;
996 if (c & 0x80) {
997 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
998 p++;
999 } else {
1000 *bufptr++ = (char)c;
1001 }
1002 }
1003 } else {
1004 plain_copy:
1005 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1006 bufptr = PL_parser->bufptr;
1007 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1008 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1009 PL_parser->bufend += len;
1010 Copy(pv, bufptr, len, char);
1011 }
1012 }
1013}
1014
1015/*
9dcc53ea
Z
1016=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1017
1018Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1019immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1020reallocating the buffer if necessary. This means that lexing code that
1021runs later will see the characters as if they had appeared in the input.
1022It is not recommended to do this as part of normal parsing, and most
1023uses of this facility run the risk of the inserted characters being
1024interpreted in an unintended manner.
1025
1026The string to be inserted is represented by octets starting at I<pv>
1027and continuing to the first nul. These octets are interpreted as either
1028UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1029in I<flags>. The characters are recoded for the lexer buffer, according
1030to how the buffer is currently being interpreted (L</lex_bufutf8>).
1031If it is not convenient to nul-terminate a string to be inserted, the
1032L</lex_stuff_pvn> function is more appropriate.
1033
1034=cut
1035*/
1036
1037void
1038Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1039{
1040 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1041 lex_stuff_pvn(pv, strlen(pv), flags);
1042}
1043
1044/*
f0e67a1d
Z
1045=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1046
1047Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1048immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1049reallocating the buffer if necessary. This means that lexing code that
1050runs later will see the characters as if they had appeared in the input.
1051It is not recommended to do this as part of normal parsing, and most
1052uses of this facility run the risk of the inserted characters being
1053interpreted in an unintended manner.
1054
1055The string to be inserted is the string value of I<sv>. The characters
1056are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1057being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1058not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1059need to construct a scalar.
1060
1061=cut
1062*/
1063
1064void
1065Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1066{
1067 char *pv;
1068 STRLEN len;
1069 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1070 if (flags)
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1072 pv = SvPV(sv, len);
1073 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1074}
1075
1076/*
1077=for apidoc Amx|void|lex_unstuff|char *ptr
1078
1079Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1080I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1081This hides the discarded text from any lexing code that runs later,
1082as if the text had never appeared.
1083
1084This is not the normal way to consume lexed text. For that, use
1085L</lex_read_to>.
1086
1087=cut
1088*/
1089
1090void
1091Perl_lex_unstuff(pTHX_ char *ptr)
1092{
1093 char *buf, *bufend;
1094 STRLEN unstuff_len;
1095 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1096 buf = PL_parser->bufptr;
1097 if (ptr < buf)
1098 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1099 if (ptr == buf)
1100 return;
1101 bufend = PL_parser->bufend;
1102 if (ptr > bufend)
1103 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1104 unstuff_len = ptr - buf;
1105 Move(ptr, buf, bufend+1-ptr, char);
1106 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1107 PL_parser->bufend = bufend - unstuff_len;
1108}
1109
1110/*
1111=for apidoc Amx|void|lex_read_to|char *ptr
1112
1113Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1114to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1115performing the correct bookkeeping whenever a newline character is passed.
1116This is the normal way to consume lexed text.
1117
1118Interpretation of the buffer's octets can be abstracted out by
1119using the slightly higher-level functions L</lex_peek_unichar> and
1120L</lex_read_unichar>.
1121
1122=cut
1123*/
1124
1125void
1126Perl_lex_read_to(pTHX_ char *ptr)
1127{
1128 char *s;
1129 PERL_ARGS_ASSERT_LEX_READ_TO;
1130 s = PL_parser->bufptr;
1131 if (ptr < s || ptr > PL_parser->bufend)
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1133 for (; s != ptr; s++)
1134 if (*s == '\n') {
1135 CopLINE_inc(PL_curcop);
1136 PL_parser->linestart = s+1;
1137 }
1138 PL_parser->bufptr = ptr;
1139}
1140
1141/*
1142=for apidoc Amx|void|lex_discard_to|char *ptr
1143
1144Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1145up to I<ptr>. The remaining content of the buffer will be moved, and
1146all pointers into the buffer updated appropriately. I<ptr> must not
1147be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1148it is not permitted to discard text that has yet to be lexed.
1149
1150Normally it is not necessarily to do this directly, because it suffices to
1151use the implicit discarding behaviour of L</lex_next_chunk> and things
1152based on it. However, if a token stretches across multiple lines,
1f317c95 1153and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1154that purpose, then after completion of the token it would be wise to
1155explicitly discard the now-unneeded earlier lines, to avoid future
1156multi-line tokens growing the buffer without bound.
1157
1158=cut
1159*/
1160
1161void
1162Perl_lex_discard_to(pTHX_ char *ptr)
1163{
1164 char *buf;
1165 STRLEN discard_len;
1166 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1167 buf = SvPVX(PL_parser->linestr);
1168 if (ptr < buf)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170 if (ptr == buf)
1171 return;
1172 if (ptr > PL_parser->bufptr)
1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174 discard_len = ptr - buf;
1175 if (PL_parser->oldbufptr < ptr)
1176 PL_parser->oldbufptr = ptr;
1177 if (PL_parser->oldoldbufptr < ptr)
1178 PL_parser->oldoldbufptr = ptr;
1179 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1180 PL_parser->last_uni = NULL;
1181 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1182 PL_parser->last_lop = NULL;
1183 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1184 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1185 PL_parser->bufend -= discard_len;
1186 PL_parser->bufptr -= discard_len;
1187 PL_parser->oldbufptr -= discard_len;
1188 PL_parser->oldoldbufptr -= discard_len;
1189 if (PL_parser->last_uni)
1190 PL_parser->last_uni -= discard_len;
1191 if (PL_parser->last_lop)
1192 PL_parser->last_lop -= discard_len;
1193}
1194
1195/*
1196=for apidoc Amx|bool|lex_next_chunk|U32 flags
1197
1198Reads in the next chunk of text to be lexed, appending it to
1199L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1200looked to the end of the current chunk and wants to know more. It is
1201usual, but not necessary, for lexing to have consumed the entirety of
1202the current chunk at this time.
1203
1204If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1205chunk (i.e., the current chunk has been entirely consumed), normally the
1206current chunk will be discarded at the same time that the new chunk is
1207read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1208will not be discarded. If the current chunk has not been entirely
1209consumed, then it will not be discarded regardless of the flag.
1210
1211Returns true if some new text was added to the buffer, or false if the
1212buffer has reached the end of the input text.
1213
1214=cut
1215*/
1216
1217#define LEX_FAKE_EOF 0x80000000
1218
1219bool
1220Perl_lex_next_chunk(pTHX_ U32 flags)
1221{
1222 SV *linestr;
1223 char *buf;
1224 STRLEN old_bufend_pos, new_bufend_pos;
1225 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1226 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1227 bool got_some_for_debugger = 0;
f0e67a1d
Z
1228 bool got_some;
1229 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1230 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1231 linestr = PL_parser->linestr;
1232 buf = SvPVX(linestr);
1233 if (!(flags & LEX_KEEP_PREVIOUS) &&
1234 PL_parser->bufptr == PL_parser->bufend) {
1235 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1236 linestart_pos = 0;
1237 if (PL_parser->last_uni != PL_parser->bufend)
1238 PL_parser->last_uni = NULL;
1239 if (PL_parser->last_lop != PL_parser->bufend)
1240 PL_parser->last_lop = NULL;
1241 last_uni_pos = last_lop_pos = 0;
1242 *buf = 0;
1243 SvCUR(linestr) = 0;
1244 } else {
1245 old_bufend_pos = PL_parser->bufend - buf;
1246 bufptr_pos = PL_parser->bufptr - buf;
1247 oldbufptr_pos = PL_parser->oldbufptr - buf;
1248 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1249 linestart_pos = PL_parser->linestart - buf;
1250 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1251 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1252 }
1253 if (flags & LEX_FAKE_EOF) {
1254 goto eof;
1255 } else if (!PL_parser->rsfp) {
1256 got_some = 0;
1257 } else if (filter_gets(linestr, old_bufend_pos)) {
1258 got_some = 1;
17cc9359 1259 got_some_for_debugger = 1;
f0e67a1d 1260 } else {
580561a3
Z
1261 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1262 sv_setpvs(linestr, "");
f0e67a1d
Z
1263 eof:
1264 /* End of real input. Close filehandle (unless it was STDIN),
1265 * then add implicit termination.
1266 */
1267 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1268 PerlIO_clearerr(PL_parser->rsfp);
1269 else if (PL_parser->rsfp)
1270 (void)PerlIO_close(PL_parser->rsfp);
1271 PL_parser->rsfp = NULL;
737c24fc 1272 PL_parser->in_pod = 0;
f0e67a1d
Z
1273#ifdef PERL_MAD
1274 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1275 PL_faketokens = 1;
1276#endif
1277 if (!PL_in_eval && PL_minus_p) {
1278 sv_catpvs(linestr,
1279 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1280 PL_minus_n = PL_minus_p = 0;
1281 } else if (!PL_in_eval && PL_minus_n) {
1282 sv_catpvs(linestr, /*{*/";}");
1283 PL_minus_n = 0;
1284 } else
1285 sv_catpvs(linestr, ";");
1286 got_some = 1;
1287 }
1288 buf = SvPVX(linestr);
1289 new_bufend_pos = SvCUR(linestr);
1290 PL_parser->bufend = buf + new_bufend_pos;
1291 PL_parser->bufptr = buf + bufptr_pos;
1292 PL_parser->oldbufptr = buf + oldbufptr_pos;
1293 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1294 PL_parser->linestart = buf + linestart_pos;
1295 if (PL_parser->last_uni)
1296 PL_parser->last_uni = buf + last_uni_pos;
1297 if (PL_parser->last_lop)
1298 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1299 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1300 PL_curstash != PL_debstash) {
1301 /* debugger active and we're not compiling the debugger code,
1302 * so store the line into the debugger's array of lines
1303 */
1304 update_debugger_info(NULL, buf+old_bufend_pos,
1305 new_bufend_pos-old_bufend_pos);
1306 }
1307 return got_some;
1308}
1309
1310/*
1311=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1312
1313Looks ahead one (Unicode) character in the text currently being lexed.
1314Returns the codepoint (unsigned integer value) of the next character,
1315or -1 if lexing has reached the end of the input text. To consume the
1316peeked character, use L</lex_read_unichar>.
1317
1318If the next character is in (or extends into) the next chunk of input
1319text, the next chunk will be read in. Normally the current chunk will be
1320discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1321then the current chunk will not be discarded.
1322
1323If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1324is encountered, an exception is generated.
1325
1326=cut
1327*/
1328
1329I32
1330Perl_lex_peek_unichar(pTHX_ U32 flags)
1331{
749123ff 1332 dVAR;
f0e67a1d
Z
1333 char *s, *bufend;
1334 if (flags & ~(LEX_KEEP_PREVIOUS))
1335 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1336 s = PL_parser->bufptr;
1337 bufend = PL_parser->bufend;
1338 if (UTF) {
1339 U8 head;
1340 I32 unichar;
1341 STRLEN len, retlen;
1342 if (s == bufend) {
1343 if (!lex_next_chunk(flags))
1344 return -1;
1345 s = PL_parser->bufptr;
1346 bufend = PL_parser->bufend;
1347 }
1348 head = (U8)*s;
1349 if (!(head & 0x80))
1350 return head;
1351 if (head & 0x40) {
1352 len = PL_utf8skip[head];
1353 while ((STRLEN)(bufend-s) < len) {
1354 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1355 break;
1356 s = PL_parser->bufptr;
1357 bufend = PL_parser->bufend;
1358 }
1359 }
1360 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1361 if (retlen == (STRLEN)-1) {
1362 /* malformed UTF-8 */
1363 ENTER;
1364 SAVESPTR(PL_warnhook);
1365 PL_warnhook = PERL_WARNHOOK_FATAL;
1366 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1367 LEAVE;
1368 }
1369 return unichar;
1370 } else {
1371 if (s == bufend) {
1372 if (!lex_next_chunk(flags))
1373 return -1;
1374 s = PL_parser->bufptr;
1375 }
1376 return (U8)*s;
1377 }
1378}
1379
1380/*
1381=for apidoc Amx|I32|lex_read_unichar|U32 flags
1382
1383Reads the next (Unicode) character in the text currently being lexed.
1384Returns the codepoint (unsigned integer value) of the character read,
1385and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1386if lexing has reached the end of the input text. To non-destructively
1387examine the next character, use L</lex_peek_unichar> instead.
1388
1389If the next character is in (or extends into) the next chunk of input
1390text, the next chunk will be read in. Normally the current chunk will be
1391discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1392then the current chunk will not be discarded.
1393
1394If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1395is encountered, an exception is generated.
1396
1397=cut
1398*/
1399
1400I32
1401Perl_lex_read_unichar(pTHX_ U32 flags)
1402{
1403 I32 c;
1404 if (flags & ~(LEX_KEEP_PREVIOUS))
1405 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1406 c = lex_peek_unichar(flags);
1407 if (c != -1) {
1408 if (c == '\n')
1409 CopLINE_inc(PL_curcop);
1410 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1411 }
1412 return c;
1413}
1414
1415/*
1416=for apidoc Amx|void|lex_read_space|U32 flags
1417
1418Reads optional spaces, in Perl style, in the text currently being
1419lexed. The spaces may include ordinary whitespace characters and
1420Perl-style comments. C<#line> directives are processed if encountered.
1421L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1422at a non-space character (or the end of the input text).
1423
1424If spaces extend into the next chunk of input text, the next chunk will
1425be read in. Normally the current chunk will be discarded at the same
1426time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1427chunk will not be discarded.
1428
1429=cut
1430*/
1431
f0998909
Z
1432#define LEX_NO_NEXT_CHUNK 0x80000000
1433
f0e67a1d
Z
1434void
1435Perl_lex_read_space(pTHX_ U32 flags)
1436{
1437 char *s, *bufend;
1438 bool need_incline = 0;
f0998909 1439 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1440 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1441#ifdef PERL_MAD
1442 if (PL_skipwhite) {
1443 sv_free(PL_skipwhite);
1444 PL_skipwhite = NULL;
1445 }
1446 if (PL_madskills)
1447 PL_skipwhite = newSVpvs("");
1448#endif /* PERL_MAD */
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1451 while (1) {
1452 char c = *s;
1453 if (c == '#') {
1454 do {
1455 c = *++s;
1456 } while (!(c == '\n' || (c == 0 && s == bufend)));
1457 } else if (c == '\n') {
1458 s++;
1459 PL_parser->linestart = s;
1460 if (s == bufend)
1461 need_incline = 1;
1462 else
1463 incline(s);
1464 } else if (isSPACE(c)) {
1465 s++;
1466 } else if (c == 0 && s == bufend) {
1467 bool got_more;
1468#ifdef PERL_MAD
1469 if (PL_madskills)
1470 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1471#endif /* PERL_MAD */
f0998909
Z
1472 if (flags & LEX_NO_NEXT_CHUNK)
1473 break;
f0e67a1d
Z
1474 PL_parser->bufptr = s;
1475 CopLINE_inc(PL_curcop);
1476 got_more = lex_next_chunk(flags);
1477 CopLINE_dec(PL_curcop);
1478 s = PL_parser->bufptr;
1479 bufend = PL_parser->bufend;
1480 if (!got_more)
1481 break;
1482 if (need_incline && PL_parser->rsfp) {
1483 incline(s);
1484 need_incline = 0;
1485 }
1486 } else {
1487 break;
1488 }
1489 }
1490#ifdef PERL_MAD
1491 if (PL_madskills)
1492 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1493#endif /* PERL_MAD */
1494 PL_parser->bufptr = s;
1495}
1496
1497/*
ffb4593c
NT
1498 * S_incline
1499 * This subroutine has nothing to do with tilting, whether at windmills
1500 * or pinball tables. Its name is short for "increment line". It
57843af0 1501 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1502 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1503 * # line 500 "foo.pm"
1504 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1505 */
1506
76e3520e 1507STATIC void
d9095cec 1508S_incline(pTHX_ const char *s)
463ee0b2 1509{
97aff369 1510 dVAR;
d9095cec
NC
1511 const char *t;
1512 const char *n;
1513 const char *e;
463ee0b2 1514
7918f24d
NC
1515 PERL_ARGS_ASSERT_INCLINE;
1516
57843af0 1517 CopLINE_inc(PL_curcop);
463ee0b2
LW
1518 if (*s++ != '#')
1519 return;
d4c19fe8
AL
1520 while (SPACE_OR_TAB(*s))
1521 s++;
73659bf1
GS
1522 if (strnEQ(s, "line", 4))
1523 s += 4;
1524 else
1525 return;
084592ab 1526 if (SPACE_OR_TAB(*s))
73659bf1 1527 s++;
4e553d73 1528 else
73659bf1 1529 return;
d4c19fe8
AL
1530 while (SPACE_OR_TAB(*s))
1531 s++;
463ee0b2
LW
1532 if (!isDIGIT(*s))
1533 return;
d4c19fe8 1534
463ee0b2
LW
1535 n = s;
1536 while (isDIGIT(*s))
1537 s++;
07714eb4 1538 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1539 return;
bf4acbe4 1540 while (SPACE_OR_TAB(*s))
463ee0b2 1541 s++;
73659bf1 1542 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1543 s++;
73659bf1
GS
1544 e = t + 1;
1545 }
463ee0b2 1546 else {
c35e046a
AL
1547 t = s;
1548 while (!isSPACE(*t))
1549 t++;
73659bf1 1550 e = t;
463ee0b2 1551 }
bf4acbe4 1552 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1553 e++;
1554 if (*e != '\n' && *e != '\0')
1555 return; /* false alarm */
1556
f4dd75d9 1557 if (t - s > 0) {
d9095cec 1558 const STRLEN len = t - s;
8a5ee598 1559#ifndef USE_ITHREADS
19bad673
NC
1560 SV *const temp_sv = CopFILESV(PL_curcop);
1561 const char *cf;
1562 STRLEN tmplen;
1563
1564 if (temp_sv) {
1565 cf = SvPVX(temp_sv);
1566 tmplen = SvCUR(temp_sv);
1567 } else {
1568 cf = NULL;
1569 tmplen = 0;
1570 }
1571
42d9b98d 1572 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1573 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1574 * to *{"::_<newfilename"} */
44867030
NC
1575 /* However, the long form of evals is only turned on by the
1576 debugger - usually they're "(eval %lu)" */
1577 char smallbuf[128];
1578 char *tmpbuf;
1579 GV **gvp;
d9095cec 1580 STRLEN tmplen2 = len;
798b63bc 1581 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1582 tmpbuf = smallbuf;
1583 else
2ae0db35 1584 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1585 tmpbuf[0] = '_';
1586 tmpbuf[1] = '<';
2ae0db35 1587 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1588 tmplen += 2;
8a5ee598
RGS
1589 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1590 if (gvp) {
44867030
NC
1591 char *tmpbuf2;
1592 GV *gv2;
1593
1594 if (tmplen2 + 2 <= sizeof smallbuf)
1595 tmpbuf2 = smallbuf;
1596 else
1597 Newx(tmpbuf2, tmplen2 + 2, char);
1598
1599 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1600 /* Either they malloc'd it, or we malloc'd it,
1601 so no prefix is present in ours. */
1602 tmpbuf2[0] = '_';
1603 tmpbuf2[1] = '<';
1604 }
1605
1606 memcpy(tmpbuf2 + 2, s, tmplen2);
1607 tmplen2 += 2;
1608
8a5ee598 1609 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1610 if (!isGV(gv2)) {
8a5ee598 1611 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1612 /* adjust ${"::_<newfilename"} to store the new file name */
1613 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1614 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1615 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1616 }
44867030
NC
1617
1618 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1619 }
e66cf94c 1620 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1621 }
8a5ee598 1622#endif
05ec9bb3 1623 CopFILE_free(PL_curcop);
d9095cec 1624 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1625 }
57843af0 1626 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1627}
1628
29595ff2 1629#ifdef PERL_MAD
cd81e915 1630/* skip space before PL_thistoken */
29595ff2
NC
1631
1632STATIC char *
1633S_skipspace0(pTHX_ register char *s)
1634{
7918f24d
NC
1635 PERL_ARGS_ASSERT_SKIPSPACE0;
1636
29595ff2
NC
1637 s = skipspace(s);
1638 if (!PL_madskills)
1639 return s;
cd81e915
NC
1640 if (PL_skipwhite) {
1641 if (!PL_thiswhite)
6b29d1f5 1642 PL_thiswhite = newSVpvs("");
cd81e915
NC
1643 sv_catsv(PL_thiswhite, PL_skipwhite);
1644 sv_free(PL_skipwhite);
1645 PL_skipwhite = 0;
1646 }
1647 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1648 return s;
1649}
1650
cd81e915 1651/* skip space after PL_thistoken */
29595ff2
NC
1652
1653STATIC char *
1654S_skipspace1(pTHX_ register char *s)
1655{
d4c19fe8 1656 const char *start = s;
29595ff2
NC
1657 I32 startoff = start - SvPVX(PL_linestr);
1658
7918f24d
NC
1659 PERL_ARGS_ASSERT_SKIPSPACE1;
1660
29595ff2
NC
1661 s = skipspace(s);
1662 if (!PL_madskills)
1663 return s;
1664 start = SvPVX(PL_linestr) + startoff;
cd81e915 1665 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1666 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1667 PL_thistoken = newSVpvn(tstart, start - tstart);
1668 }
1669 PL_realtokenstart = -1;
1670 if (PL_skipwhite) {
1671 if (!PL_nextwhite)
6b29d1f5 1672 PL_nextwhite = newSVpvs("");
cd81e915
NC
1673 sv_catsv(PL_nextwhite, PL_skipwhite);
1674 sv_free(PL_skipwhite);
1675 PL_skipwhite = 0;
29595ff2
NC
1676 }
1677 return s;
1678}
1679
1680STATIC char *
1681S_skipspace2(pTHX_ register char *s, SV **svp)
1682{
c35e046a
AL
1683 char *start;
1684 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1685 const I32 startoff = s - SvPVX(PL_linestr);
1686
7918f24d
NC
1687 PERL_ARGS_ASSERT_SKIPSPACE2;
1688
29595ff2
NC
1689 s = skipspace(s);
1690 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1691 if (!PL_madskills || !svp)
1692 return s;
1693 start = SvPVX(PL_linestr) + startoff;
cd81e915 1694 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1695 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1696 PL_thistoken = newSVpvn(tstart, start - tstart);
1697 PL_realtokenstart = -1;
29595ff2 1698 }
cd81e915 1699 if (PL_skipwhite) {
29595ff2 1700 if (!*svp)
6b29d1f5 1701 *svp = newSVpvs("");
cd81e915
NC
1702 sv_setsv(*svp, PL_skipwhite);
1703 sv_free(PL_skipwhite);
1704 PL_skipwhite = 0;
29595ff2
NC
1705 }
1706
1707 return s;
1708}
1709#endif
1710
80a702cd 1711STATIC void
15f169a1 1712S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1713{
1714 AV *av = CopFILEAVx(PL_curcop);
1715 if (av) {
b9f83d2f 1716 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1717 if (orig_sv)
1718 sv_setsv(sv, orig_sv);
1719 else
1720 sv_setpvn(sv, buf, len);
80a702cd
RGS
1721 (void)SvIOK_on(sv);
1722 SvIV_set(sv, 0);
1723 av_store(av, (I32)CopLINE(PL_curcop), sv);
1724 }
1725}
1726
ffb4593c
NT
1727/*
1728 * S_skipspace
1729 * Called to gobble the appropriate amount and type of whitespace.
1730 * Skips comments as well.
1731 */
1732
76e3520e 1733STATIC char *
cea2e8a9 1734S_skipspace(pTHX_ register char *s)
a687059c 1735{
5db06880 1736#ifdef PERL_MAD
f0e67a1d
Z
1737 char *start = s;
1738#endif /* PERL_MAD */
7918f24d 1739 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1740#ifdef PERL_MAD
cd81e915
NC
1741 if (PL_skipwhite) {
1742 sv_free(PL_skipwhite);
f0e67a1d 1743 PL_skipwhite = NULL;
5db06880 1744 }
f0e67a1d 1745#endif /* PERL_MAD */
3280af22 1746 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1747 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1748 s++;
f0e67a1d
Z
1749 } else {
1750 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1751 PL_bufptr = s;
f0998909
Z
1752 lex_read_space(LEX_KEEP_PREVIOUS |
1753 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1754 LEX_NO_NEXT_CHUNK : 0));
3280af22 1755 s = PL_bufptr;
f0e67a1d
Z
1756 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1757 if (PL_linestart > PL_bufptr)
1758 PL_bufptr = PL_linestart;
1759 return s;
463ee0b2 1760 }
5db06880 1761#ifdef PERL_MAD
f0e67a1d
Z
1762 if (PL_madskills)
1763 PL_skipwhite = newSVpvn(start, s-start);
1764#endif /* PERL_MAD */
5db06880 1765 return s;
a687059c 1766}
378cc40b 1767
ffb4593c
NT
1768/*
1769 * S_check_uni
1770 * Check the unary operators to ensure there's no ambiguity in how they're
1771 * used. An ambiguous piece of code would be:
1772 * rand + 5
1773 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1774 * the +5 is its argument.
1775 */
1776
76e3520e 1777STATIC void
cea2e8a9 1778S_check_uni(pTHX)
ba106d47 1779{
97aff369 1780 dVAR;
d4c19fe8
AL
1781 const char *s;
1782 const char *t;
2f3197b3 1783
3280af22 1784 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1785 return;
3280af22
NIS
1786 while (isSPACE(*PL_last_uni))
1787 PL_last_uni++;
c35e046a
AL
1788 s = PL_last_uni;
1789 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1790 s++;
3280af22 1791 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1792 return;
6136c704 1793
9b387841
NC
1794 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1795 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1796 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1797}
1798
ffb4593c
NT
1799/*
1800 * LOP : macro to build a list operator. Its behaviour has been replaced
1801 * with a subroutine, S_lop() for which LOP is just another name.
1802 */
1803
a0d0e21e
LW
1804#define LOP(f,x) return lop(f,x,s)
1805
ffb4593c
NT
1806/*
1807 * S_lop
1808 * Build a list operator (or something that might be one). The rules:
1809 * - if we have a next token, then it's a list operator [why?]
1810 * - if the next thing is an opening paren, then it's a function
1811 * - else it's a list operator
1812 */
1813
76e3520e 1814STATIC I32
a0be28da 1815S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1816{
97aff369 1817 dVAR;
7918f24d
NC
1818
1819 PERL_ARGS_ASSERT_LOP;
1820
6154021b 1821 pl_yylval.ival = f;
35c8bce7 1822 CLINE;
3280af22
NIS
1823 PL_expect = x;
1824 PL_bufptr = s;
1825 PL_last_lop = PL_oldbufptr;
eb160463 1826 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1827#ifdef PERL_MAD
1828 if (PL_lasttoke)
78cdf107 1829 goto lstop;
5db06880 1830#else
3280af22 1831 if (PL_nexttoke)
78cdf107 1832 goto lstop;
5db06880 1833#endif
79072805 1834 if (*s == '(')
bbf60fe6 1835 return REPORT(FUNC);
29595ff2 1836 s = PEEKSPACE(s);
79072805 1837 if (*s == '(')
bbf60fe6 1838 return REPORT(FUNC);
78cdf107
Z
1839 else {
1840 lstop:
1841 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1842 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1843 return REPORT(LSTOP);
78cdf107 1844 }
79072805
LW
1845}
1846
5db06880
NC
1847#ifdef PERL_MAD
1848 /*
1849 * S_start_force
1850 * Sets up for an eventual force_next(). start_force(0) basically does
1851 * an unshift, while start_force(-1) does a push. yylex removes items
1852 * on the "pop" end.
1853 */
1854
1855STATIC void
1856S_start_force(pTHX_ int where)
1857{
1858 int i;
1859
cd81e915 1860 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1861 where = PL_lasttoke;
cd81e915
NC
1862 assert(PL_curforce < 0 || PL_curforce == where);
1863 if (PL_curforce != where) {
5db06880
NC
1864 for (i = PL_lasttoke; i > where; --i) {
1865 PL_nexttoke[i] = PL_nexttoke[i-1];
1866 }
1867 PL_lasttoke++;
1868 }
cd81e915 1869 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1870 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1871 PL_curforce = where;
1872 if (PL_nextwhite) {
5db06880 1873 if (PL_madskills)
6b29d1f5 1874 curmad('^', newSVpvs(""));
cd81e915 1875 CURMAD('_', PL_nextwhite);
5db06880
NC
1876 }
1877}
1878
1879STATIC void
1880S_curmad(pTHX_ char slot, SV *sv)
1881{
1882 MADPROP **where;
1883
1884 if (!sv)
1885 return;
cd81e915
NC
1886 if (PL_curforce < 0)
1887 where = &PL_thismad;
5db06880 1888 else
cd81e915 1889 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1890
cd81e915 1891 if (PL_faketokens)
76f68e9b 1892 sv_setpvs(sv, "");
5db06880
NC
1893 else {
1894 if (!IN_BYTES) {
1895 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1896 SvUTF8_on(sv);
1897 else if (PL_encoding) {
1898 sv_recode_to_utf8(sv, PL_encoding);
1899 }
1900 }
1901 }
1902
1903 /* keep a slot open for the head of the list? */
1904 if (slot != '_' && *where && (*where)->mad_key == '^') {
1905 (*where)->mad_key = slot;
daba3364 1906 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1907 (*where)->mad_val = (void*)sv;
1908 }
1909 else
1910 addmad(newMADsv(slot, sv), where, 0);
1911}
1912#else
b3f24c00
MHM
1913# define start_force(where) NOOP
1914# define curmad(slot, sv) NOOP
5db06880
NC
1915#endif
1916
ffb4593c
NT
1917/*
1918 * S_force_next
9cbb5ea2 1919 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1920 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1921 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1922 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1923 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1924 */
1925
4e553d73 1926STATIC void
cea2e8a9 1927S_force_next(pTHX_ I32 type)
79072805 1928{
97aff369 1929 dVAR;
704d4215
GG
1930#ifdef DEBUGGING
1931 if (DEBUG_T_TEST) {
1932 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1933 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1934 }
1935#endif
5db06880 1936#ifdef PERL_MAD
cd81e915 1937 if (PL_curforce < 0)
5db06880 1938 start_force(PL_lasttoke);
cd81e915 1939 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1940 if (PL_lex_state != LEX_KNOWNEXT)
1941 PL_lex_defer = PL_lex_state;
1942 PL_lex_state = LEX_KNOWNEXT;
1943 PL_lex_expect = PL_expect;
cd81e915 1944 PL_curforce = -1;
5db06880 1945#else
3280af22
NIS
1946 PL_nexttype[PL_nexttoke] = type;
1947 PL_nexttoke++;
1948 if (PL_lex_state != LEX_KNOWNEXT) {
1949 PL_lex_defer = PL_lex_state;
1950 PL_lex_expect = PL_expect;
1951 PL_lex_state = LEX_KNOWNEXT;
79072805 1952 }
5db06880 1953#endif
79072805
LW
1954}
1955
28ac2b49
Z
1956void
1957Perl_yyunlex(pTHX)
1958{
a7aaec61
Z
1959 int yyc = PL_parser->yychar;
1960 if (yyc != YYEMPTY) {
1961 if (yyc) {
1962 start_force(-1);
1963 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1964 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1965 PL_lex_allbrackets--;
a7aaec61 1966 PL_lex_brackets--;
78cdf107
Z
1967 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1968 } else if (yyc == '('/*)*/) {
1969 PL_lex_allbrackets--;
1970 yyc |= (2<<24);
a7aaec61
Z
1971 }
1972 force_next(yyc);
1973 }
28ac2b49
Z
1974 PL_parser->yychar = YYEMPTY;
1975 }
1976}
1977
d0a148a6 1978STATIC SV *
15f169a1 1979S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1980{
97aff369 1981 dVAR;
740cce10 1982 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1983 !IN_BYTES
1984 && UTF
1985 && !is_ascii_string((const U8*)start, len)
740cce10 1986 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1987 return sv;
1988}
1989
ffb4593c
NT
1990/*
1991 * S_force_word
1992 * When the lexer knows the next thing is a word (for instance, it has
1993 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1994 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1995 * lookahead.
ffb4593c
NT
1996 *
1997 * Arguments:
b1b65b59 1998 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1999 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2000 * int check_keyword : if true, Perl checks to make sure the word isn't
2001 * a keyword (do this if the word is a label, e.g. goto FOO)
2002 * int allow_pack : if true, : characters will also be allowed (require,
2003 * use, etc. do this)
9cbb5ea2 2004 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2005 */
2006
76e3520e 2007STATIC char *
cea2e8a9 2008S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2009{
97aff369 2010 dVAR;
463ee0b2
LW
2011 register char *s;
2012 STRLEN len;
4e553d73 2013
7918f24d
NC
2014 PERL_ARGS_ASSERT_FORCE_WORD;
2015
29595ff2 2016 start = SKIPSPACE1(start);
463ee0b2 2017 s = start;
7e2040f0 2018 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2019 (allow_pack && *s == ':') ||
15f0808c 2020 (allow_initial_tick && *s == '\'') )
a0d0e21e 2021 {
3280af22 2022 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2023 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2024 return start;
cd81e915 2025 start_force(PL_curforce);
5db06880
NC
2026 if (PL_madskills)
2027 curmad('X', newSVpvn(start,s-start));
463ee0b2 2028 if (token == METHOD) {
29595ff2 2029 s = SKIPSPACE1(s);
463ee0b2 2030 if (*s == '(')
3280af22 2031 PL_expect = XTERM;
463ee0b2 2032 else {
3280af22 2033 PL_expect = XOPERATOR;
463ee0b2 2034 }
79072805 2035 }
e74e6b3d 2036 if (PL_madskills)
63575281 2037 curmad('g', newSVpvs( "forced" ));
9ded7720 2038 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2039 = (OP*)newSVOP(OP_CONST,0,
2040 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2041 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2042 force_next(token);
2043 }
2044 return s;
2045}
2046
ffb4593c
NT
2047/*
2048 * S_force_ident
9cbb5ea2 2049 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2050 * text only contains the "foo" portion. The first argument is a pointer
2051 * to the "foo", and the second argument is the type symbol to prefix.
2052 * Forces the next token to be a "WORD".
9cbb5ea2 2053 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2054 */
2055
76e3520e 2056STATIC void
bfed75c6 2057S_force_ident(pTHX_ register const char *s, int kind)
79072805 2058{
97aff369 2059 dVAR;
7918f24d
NC
2060
2061 PERL_ARGS_ASSERT_FORCE_IDENT;
2062
c35e046a 2063 if (*s) {
90e5519e
NC
2064 const STRLEN len = strlen(s);
2065 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2066 start_force(PL_curforce);
9ded7720 2067 NEXTVAL_NEXTTOKE.opval = o;
79072805 2068 force_next(WORD);
748a9306 2069 if (kind) {
11343788 2070 o->op_private = OPpCONST_ENTERED;
55497cff 2071 /* XXX see note in pp_entereval() for why we forgo typo
2072 warnings if the symbol must be introduced in an eval.
2073 GSAR 96-10-12 */
90e5519e
NC
2074 gv_fetchpvn_flags(s, len,
2075 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2076 : GV_ADD,
2077 kind == '$' ? SVt_PV :
2078 kind == '@' ? SVt_PVAV :
2079 kind == '%' ? SVt_PVHV :
a0d0e21e 2080 SVt_PVGV
90e5519e 2081 );
748a9306 2082 }
79072805
LW
2083 }
2084}
2085
1571675a
GS
2086NV
2087Perl_str_to_version(pTHX_ SV *sv)
2088{
2089 NV retval = 0.0;
2090 NV nshift = 1.0;
2091 STRLEN len;
cfd0369c 2092 const char *start = SvPV_const(sv,len);
9d4ba2ae 2093 const char * const end = start + len;
504618e9 2094 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2095
2096 PERL_ARGS_ASSERT_STR_TO_VERSION;
2097
1571675a 2098 while (start < end) {
ba210ebe 2099 STRLEN skip;
1571675a
GS
2100 UV n;
2101 if (utf)
9041c2e3 2102 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2103 else {
2104 n = *(U8*)start;
2105 skip = 1;
2106 }
2107 retval += ((NV)n)/nshift;
2108 start += skip;
2109 nshift *= 1000;
2110 }
2111 return retval;
2112}
2113
4e553d73 2114/*
ffb4593c
NT
2115 * S_force_version
2116 * Forces the next token to be a version number.
e759cc13
RGS
2117 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2118 * and if "guessing" is TRUE, then no new token is created (and the caller
2119 * must use an alternative parsing method).
ffb4593c
NT
2120 */
2121
76e3520e 2122STATIC char *
e759cc13 2123S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2124{
97aff369 2125 dVAR;
5f66b61c 2126 OP *version = NULL;
44dcb63b 2127 char *d;
5db06880
NC
2128#ifdef PERL_MAD
2129 I32 startoff = s - SvPVX(PL_linestr);
2130#endif
89bfa8cd 2131
7918f24d
NC
2132 PERL_ARGS_ASSERT_FORCE_VERSION;
2133
29595ff2 2134 s = SKIPSPACE1(s);
89bfa8cd 2135
44dcb63b 2136 d = s;
dd629d5b 2137 if (*d == 'v')
44dcb63b 2138 d++;
44dcb63b 2139 if (isDIGIT(*d)) {
e759cc13
RGS
2140 while (isDIGIT(*d) || *d == '_' || *d == '.')
2141 d++;
5db06880
NC
2142#ifdef PERL_MAD
2143 if (PL_madskills) {
cd81e915 2144 start_force(PL_curforce);
5db06880
NC
2145 curmad('X', newSVpvn(s,d-s));
2146 }
2147#endif
4e4da3ac 2148 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2149 SV *ver;
8d08d9ba
DG
2150#ifdef USE_LOCALE_NUMERIC
2151 char *loc = setlocale(LC_NUMERIC, "C");
2152#endif
6154021b 2153 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2154#ifdef USE_LOCALE_NUMERIC
2155 setlocale(LC_NUMERIC, loc);
2156#endif
6154021b 2157 version = pl_yylval.opval;
dd629d5b
GS
2158 ver = cSVOPx(version)->op_sv;
2159 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2160 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2161 SvNV_set(ver, str_to_version(ver));
1571675a 2162 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2163 }
89bfa8cd 2164 }
5db06880
NC
2165 else if (guessing) {
2166#ifdef PERL_MAD
2167 if (PL_madskills) {
cd81e915
NC
2168 sv_free(PL_nextwhite); /* let next token collect whitespace */
2169 PL_nextwhite = 0;
5db06880
NC
2170 s = SvPVX(PL_linestr) + startoff;
2171 }
2172#endif
e759cc13 2173 return s;
5db06880 2174 }
89bfa8cd 2175 }
2176
5db06880
NC
2177#ifdef PERL_MAD
2178 if (PL_madskills && !version) {
cd81e915
NC
2179 sv_free(PL_nextwhite); /* let next token collect whitespace */
2180 PL_nextwhite = 0;
5db06880
NC
2181 s = SvPVX(PL_linestr) + startoff;
2182 }
2183#endif
89bfa8cd 2184 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2185 start_force(PL_curforce);
9ded7720 2186 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2187 force_next(WORD);
89bfa8cd 2188
e759cc13 2189 return s;
89bfa8cd 2190}
2191
ffb4593c 2192/*
91152fc1
DG
2193 * S_force_strict_version
2194 * Forces the next token to be a version number using strict syntax rules.
2195 */
2196
2197STATIC char *
2198S_force_strict_version(pTHX_ char *s)
2199{
2200 dVAR;
2201 OP *version = NULL;
2202#ifdef PERL_MAD
2203 I32 startoff = s - SvPVX(PL_linestr);
2204#endif
2205 const char *errstr = NULL;
2206
2207 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2208
2209 while (isSPACE(*s)) /* leading whitespace */
2210 s++;
2211
2212 if (is_STRICT_VERSION(s,&errstr)) {
2213 SV *ver = newSV(0);
2214 s = (char *)scan_version(s, ver, 0);
2215 version = newSVOP(OP_CONST, 0, ver);
2216 }
4e4da3ac
Z
2217 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2218 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2219 {
91152fc1
DG
2220 PL_bufptr = s;
2221 if (errstr)
2222 yyerror(errstr); /* version required */
2223 return s;
2224 }
2225
2226#ifdef PERL_MAD
2227 if (PL_madskills && !version) {
2228 sv_free(PL_nextwhite); /* let next token collect whitespace */
2229 PL_nextwhite = 0;
2230 s = SvPVX(PL_linestr) + startoff;
2231 }
2232#endif
2233 /* NOTE: The parser sees the package name and the VERSION swapped */
2234 start_force(PL_curforce);
2235 NEXTVAL_NEXTTOKE.opval = version;
2236 force_next(WORD);
2237
2238 return s;
2239}
2240
2241/*
ffb4593c
NT
2242 * S_tokeq
2243 * Tokenize a quoted string passed in as an SV. It finds the next
2244 * chunk, up to end of string or a backslash. It may make a new
2245 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2246 * turns \\ into \.
2247 */
2248
76e3520e 2249STATIC SV *
cea2e8a9 2250S_tokeq(pTHX_ SV *sv)
79072805 2251{
97aff369 2252 dVAR;
79072805
LW
2253 register char *s;
2254 register char *send;
2255 register char *d;
b3ac6de7
IZ
2256 STRLEN len = 0;
2257 SV *pv = sv;
79072805 2258
7918f24d
NC
2259 PERL_ARGS_ASSERT_TOKEQ;
2260
79072805 2261 if (!SvLEN(sv))
b3ac6de7 2262 goto finish;
79072805 2263
a0d0e21e 2264 s = SvPV_force(sv, len);
21a311ee 2265 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2266 goto finish;
463ee0b2 2267 send = s + len;
dcb21ed6
NC
2268 /* This is relying on the SV being "well formed" with a trailing '\0' */
2269 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2270 s++;
2271 if (s == send)
b3ac6de7 2272 goto finish;
79072805 2273 d = s;
be4731d2 2274 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2275 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2276 }
79072805
LW
2277 while (s < send) {
2278 if (*s == '\\') {
a0d0e21e 2279 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2280 s++; /* all that, just for this */
2281 }
2282 *d++ = *s++;
2283 }
2284 *d = '\0';
95a20fc0 2285 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2286 finish:
3280af22 2287 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2288 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2289 return sv;
2290}
2291
ffb4593c
NT
2292/*
2293 * Now come three functions related to double-quote context,
2294 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2295 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2296 * interact with PL_lex_state, and create fake ( ... ) argument lists
2297 * to handle functions and concatenation.
2298 * They assume that whoever calls them will be setting up a fake
2299 * join call, because each subthing puts a ',' after it. This lets
2300 * "lower \luPpEr"
2301 * become
2302 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2303 *
2304 * (I'm not sure whether the spurious commas at the end of lcfirst's
2305 * arguments and join's arguments are created or not).
2306 */
2307
2308/*
2309 * S_sublex_start
6154021b 2310 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2311 *
2312 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2313 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2314 *
2315 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2316 *
2317 * Everything else becomes a FUNC.
2318 *
2319 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2320 * had an OP_CONST or OP_READLINE). This just sets us up for a
2321 * call to S_sublex_push().
2322 */
2323
76e3520e 2324STATIC I32
cea2e8a9 2325S_sublex_start(pTHX)
79072805 2326{
97aff369 2327 dVAR;
6154021b 2328 register const I32 op_type = pl_yylval.ival;
79072805
LW
2329
2330 if (op_type == OP_NULL) {
6154021b 2331 pl_yylval.opval = PL_lex_op;
5f66b61c 2332 PL_lex_op = NULL;
79072805
LW
2333 return THING;
2334 }
2335 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2336 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2337
2338 if (SvTYPE(sv) == SVt_PVIV) {
2339 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2340 STRLEN len;
96a5add6 2341 const char * const p = SvPV_const(sv, len);
740cce10 2342 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2343 SvREFCNT_dec(sv);
2344 sv = nsv;
4e553d73 2345 }
6154021b 2346 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2347 PL_lex_stuff = NULL;
6f33ba73
RGS
2348 /* Allow <FH> // "foo" */
2349 if (op_type == OP_READLINE)
2350 PL_expect = XTERMORDORDOR;
79072805
LW
2351 return THING;
2352 }
e3f73d4e
RGS
2353 else if (op_type == OP_BACKTICK && PL_lex_op) {
2354 /* readpipe() vas overriden */
2355 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2356 pl_yylval.opval = PL_lex_op;
9b201d7d 2357 PL_lex_op = NULL;
e3f73d4e
RGS
2358 PL_lex_stuff = NULL;
2359 return THING;
2360 }
79072805 2361
3280af22 2362 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2363 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2364 PL_sublex_info.sub_op = PL_lex_op;
2365 PL_lex_state = LEX_INTERPPUSH;
55497cff 2366
3280af22
NIS
2367 PL_expect = XTERM;
2368 if (PL_lex_op) {
6154021b 2369 pl_yylval.opval = PL_lex_op;
5f66b61c 2370 PL_lex_op = NULL;
55497cff 2371 return PMFUNC;
2372 }
2373 else
2374 return FUNC;
2375}
2376
ffb4593c
NT
2377/*
2378 * S_sublex_push
2379 * Create a new scope to save the lexing state. The scope will be
2380 * ended in S_sublex_done. Returns a '(', starting the function arguments
2381 * to the uc, lc, etc. found before.
2382 * Sets PL_lex_state to LEX_INTERPCONCAT.
2383 */
2384
76e3520e 2385STATIC I32
cea2e8a9 2386S_sublex_push(pTHX)
55497cff 2387{
27da23d5 2388 dVAR;
f46d017c 2389 ENTER;
55497cff 2390
3280af22 2391 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2392 SAVEBOOL(PL_lex_dojoin);
3280af22 2393 SAVEI32(PL_lex_brackets);
78cdf107
Z
2394 SAVEI32(PL_lex_allbrackets);
2395 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2396 SAVEI32(PL_lex_casemods);
2397 SAVEI32(PL_lex_starts);
651b5b28 2398 SAVEI8(PL_lex_state);
7766f137 2399 SAVEVPTR(PL_lex_inpat);
98246f1e 2400 SAVEI16(PL_lex_inwhat);
57843af0 2401 SAVECOPLINE(PL_curcop);
3280af22 2402 SAVEPPTR(PL_bufptr);
8452ff4b 2403 SAVEPPTR(PL_bufend);
3280af22
NIS
2404 SAVEPPTR(PL_oldbufptr);
2405 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2406 SAVEPPTR(PL_last_lop);
2407 SAVEPPTR(PL_last_uni);
3280af22
NIS
2408 SAVEPPTR(PL_linestart);
2409 SAVESPTR(PL_linestr);
8edd5f42
RGS
2410 SAVEGENERICPV(PL_lex_brackstack);
2411 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2412
2413 PL_linestr = PL_lex_stuff;
a0714e2c 2414 PL_lex_stuff = NULL;
3280af22 2415
9cbb5ea2
GS
2416 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2417 = SvPVX(PL_linestr);
3280af22 2418 PL_bufend += SvCUR(PL_linestr);
bd61b366 2419 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2420 SAVEFREESV(PL_linestr);
2421
2422 PL_lex_dojoin = FALSE;
2423 PL_lex_brackets = 0;
78cdf107
Z
2424 PL_lex_allbrackets = 0;
2425 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2426 Newx(PL_lex_brackstack, 120, char);
2427 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2428 PL_lex_casemods = 0;
2429 *PL_lex_casestack = '\0';
2430 PL_lex_starts = 0;
2431 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2432 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2433
2434 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2435 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2436 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2437 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2438 else
5f66b61c 2439 PL_lex_inpat = NULL;
79072805 2440
55497cff 2441 return '(';
79072805
LW
2442}
2443
ffb4593c
NT
2444/*
2445 * S_sublex_done
2446 * Restores lexer state after a S_sublex_push.
2447 */
2448
76e3520e 2449STATIC I32
cea2e8a9 2450S_sublex_done(pTHX)
79072805 2451{
27da23d5 2452 dVAR;
3280af22 2453 if (!PL_lex_starts++) {
396482e1 2454 SV * const sv = newSVpvs("");
9aa983d2
JH
2455 if (SvUTF8(PL_linestr))
2456 SvUTF8_on(sv);
3280af22 2457 PL_expect = XOPERATOR;
6154021b 2458 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2459 return THING;
2460 }
2461
3280af22
NIS
2462 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2463 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2464 return yylex();
79072805
LW
2465 }
2466
ffb4593c 2467 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2468 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2469 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2470 PL_linestr = PL_lex_repl;
2471 PL_lex_inpat = 0;
2472 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2473 PL_bufend += SvCUR(PL_linestr);
bd61b366 2474 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2475 SAVEFREESV(PL_linestr);
2476 PL_lex_dojoin = FALSE;
2477 PL_lex_brackets = 0;
78cdf107
Z
2478 PL_lex_allbrackets = 0;
2479 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2480 PL_lex_casemods = 0;
2481 *PL_lex_casestack = '\0';
2482 PL_lex_starts = 0;
25da4f38 2483 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2484 PL_lex_state = LEX_INTERPNORMAL;
2485 PL_lex_starts++;
e9fa98b2
HS
2486 /* we don't clear PL_lex_repl here, so that we can check later
2487 whether this is an evalled subst; that means we rely on the
2488 logic to ensure sublex_done() is called again only via the
2489 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2490 }
e9fa98b2 2491 else {
3280af22 2492 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2493 PL_lex_repl = NULL;
e9fa98b2 2494 }
79072805 2495 return ',';
ffed7fef
LW
2496 }
2497 else {
5db06880
NC
2498#ifdef PERL_MAD
2499 if (PL_madskills) {
cd81e915
NC
2500 if (PL_thiswhite) {
2501 if (!PL_endwhite)
6b29d1f5 2502 PL_endwhite = newSVpvs("");
cd81e915
NC
2503 sv_catsv(PL_endwhite, PL_thiswhite);
2504 PL_thiswhite = 0;
2505 }
2506 if (PL_thistoken)
76f68e9b 2507 sv_setpvs(PL_thistoken,"");
5db06880 2508 else
cd81e915 2509 PL_realtokenstart = -1;
5db06880
NC
2510 }
2511#endif
f46d017c 2512 LEAVE;
3280af22
NIS
2513 PL_bufend = SvPVX(PL_linestr);
2514 PL_bufend += SvCUR(PL_linestr);
2515 PL_expect = XOPERATOR;
09bef843 2516 PL_sublex_info.sub_inwhat = 0;
79072805 2517 return ')';
ffed7fef
LW
2518 }
2519}
2520
02aa26ce
NT
2521/*
2522 scan_const
2523
2524 Extracts a pattern, double-quoted string, or transliteration. This
2525 is terrifying code.
2526
94def140 2527 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2528 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2529 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2530
94def140
TS
2531 Returns a pointer to the character scanned up to. If this is
2532 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2533 successfully parsed), will leave an OP for the substring scanned
6154021b 2534 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2535 by looking at the next characters herself.
2536
02aa26ce
NT
2537 In patterns:
2538 backslashes:
ff3f963a 2539 constants: \N{NAME} only
02aa26ce
NT
2540 case and quoting: \U \Q \E
2541 stops on @ and $, but not for $ as tail anchor
2542
2543 In transliterations:
2544 characters are VERY literal, except for - not at the start or end
94def140
TS
2545 of the string, which indicates a range. If the range is in bytes,
2546 scan_const expands the range to the full set of intermediate
2547 characters. If the range is in utf8, the hyphen is replaced with
2548 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2549
2550 In double-quoted strings:
2551 backslashes:
2552 double-quoted style: \r and \n
ff3f963a 2553 constants: \x31, etc.
94def140 2554 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2555 case and quoting: \U \Q \E
2556 stops on @ and $
2557
2558 scan_const does *not* construct ops to handle interpolated strings.
2559 It stops processing as soon as it finds an embedded $ or @ variable
2560 and leaves it to the caller to work out what's going on.
2561
94def140
TS
2562 embedded arrays (whether in pattern or not) could be:
2563 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2564
2565 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2566
2567 $ in pattern could be $foo or could be tail anchor. Assumption:
2568 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2569 followed by one of "()| \r\n\t"
02aa26ce
NT
2570
2571 \1 (backreferences) are turned into $1
2572
2573 The structure of the code is
2574 while (there's a character to process) {
94def140
TS
2575 handle transliteration ranges
2576 skip regexp comments /(?#comment)/ and codes /(?{code})/
2577 skip #-initiated comments in //x patterns
2578 check for embedded arrays
02aa26ce
NT
2579 check for embedded scalars
2580 if (backslash) {
94def140 2581 deprecate \1 in substitution replacements
02aa26ce
NT
2582 handle string-changing backslashes \l \U \Q \E, etc.
2583 switch (what was escaped) {
94def140 2584 handle \- in a transliteration (becomes a literal -)
ff3f963a 2585 if a pattern and not \N{, go treat as regular character
94def140
TS
2586 handle \132 (octal characters)
2587 handle \x15 and \x{1234} (hex characters)
ff3f963a 2588 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2589 handle \cV (control characters)
2590 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2591 } (end switch)
77a135fe 2592 continue
02aa26ce 2593 } (end if backslash)
77a135fe 2594 handle regular character
02aa26ce 2595 } (end while character to read)
4e553d73 2596
02aa26ce
NT
2597*/
2598
76e3520e 2599STATIC char *
cea2e8a9 2600S_scan_const(pTHX_ char *start)
79072805 2601{
97aff369 2602 dVAR;
3280af22 2603 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2604 SV *sv = newSV(send - start); /* sv for the constant. See
2605 note below on sizing. */
02aa26ce
NT
2606 register char *s = start; /* start of the constant */
2607 register char *d = SvPVX(sv); /* destination for copies */
2608 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2609 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2610 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2611 I32 this_utf8 = UTF; /* Is the source string assumed
2612 to be UTF8? But, this can
2613 show as true when the source
2614 isn't utf8, as for example
2615 when it is entirely composed
2616 of hex constants */
2617
2618 /* Note on sizing: The scanned constant is placed into sv, which is
2619 * initialized by newSV() assuming one byte of output for every byte of
2620 * input. This routine expects newSV() to allocate an extra byte for a
2621 * trailing NUL, which this routine will append if it gets to the end of
2622 * the input. There may be more bytes of input than output (eg., \N{LATIN
2623 * CAPITAL LETTER A}), or more output than input if the constant ends up
2624 * recoded to utf8, but each time a construct is found that might increase
2625 * the needed size, SvGROW() is called. Its size parameter each time is
2626 * based on the best guess estimate at the time, namely the length used so
2627 * far, plus the length the current construct will occupy, plus room for
2628 * the trailing NUL, plus one byte for every input byte still unscanned */
2629
012bcf8d 2630 UV uv;
4c3a8340
TS
2631#ifdef EBCDIC
2632 UV literal_endpoint = 0;
e294cc5d 2633 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2634#endif
012bcf8d 2635
7918f24d
NC
2636 PERL_ARGS_ASSERT_SCAN_CONST;
2637
bb16bae8 2638 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2639 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2640 /* If we are doing a trans and we know we want UTF8 set expectation */
2641 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2642 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2643 }
2644
2645
79072805 2646 while (s < send || dorange) {
ff3f963a 2647
02aa26ce 2648 /* get transliterations out of the way (they're most literal) */
3280af22 2649 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2650 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2651 if (dorange) {
1ba5c669
JH
2652 I32 i; /* current expanded character */
2653 I32 min; /* first character in range */
2654 I32 max; /* last character in range */
02aa26ce 2655
e294cc5d
JH
2656#ifdef EBCDIC
2657 UV uvmax = 0;
2658#endif
2659
2660 if (has_utf8
2661#ifdef EBCDIC
2662 && !native_range
2663#endif
2664 ) {
9d4ba2ae 2665 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2666 char *e = d++;
2667 while (e-- > c)
2668 *(e + 1) = *e;
25716404 2669 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2670 /* mark the range as done, and continue */
2671 dorange = FALSE;
2672 didrange = TRUE;
2673 continue;
2674 }
2b9d42f0 2675
95a20fc0 2676 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2677#ifdef EBCDIC
2678 SvGROW(sv,
2679 SvLEN(sv) + (has_utf8 ?
2680 (512 - UTF_CONTINUATION_MARK +
2681 UNISKIP(0x100))
2682 : 256));
2683 /* How many two-byte within 0..255: 128 in UTF-8,
2684 * 96 in UTF-8-mod. */
2685#else
9cbb5ea2 2686 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2687#endif
9cbb5ea2 2688 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2689#ifdef EBCDIC
2690 if (has_utf8) {
2691 int j;
2692 for (j = 0; j <= 1; j++) {
2693 char * const c = (char*)utf8_hop((U8*)d, -1);
2694 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2695 if (j)
2696 min = (U8)uv;
2697 else if (uv < 256)
2698 max = (U8)uv;
2699 else {
2700 max = (U8)0xff; /* only to \xff */
2701 uvmax = uv; /* \x{100} to uvmax */
2702 }
2703 d = c; /* eat endpoint chars */
2704 }
2705 }
2706 else {
2707#endif
2708 d -= 2; /* eat the first char and the - */
2709 min = (U8)*d; /* first char in range */
2710 max = (U8)d[1]; /* last char in range */
2711#ifdef EBCDIC
2712 }
2713#endif
8ada0baa 2714
c2e66d9e 2715 if (min > max) {
01ec43d0 2716 Perl_croak(aTHX_
d1573ac7 2717 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2718 (char)min, (char)max);
c2e66d9e
GS
2719 }
2720
c7f1f016 2721#ifdef EBCDIC
4c3a8340
TS
2722 if (literal_endpoint == 2 &&
2723 ((isLOWER(min) && isLOWER(max)) ||
2724 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2725 if (isLOWER(min)) {
2726 for (i = min; i <= max; i++)
2727 if (isLOWER(i))
db42d148 2728 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2729 } else {
2730 for (i = min; i <= max; i++)
2731 if (isUPPER(i))
db42d148 2732 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2733 }
2734 }
2735 else
2736#endif
2737 for (i = min; i <= max; i++)
e294cc5d
JH
2738#ifdef EBCDIC
2739 if (has_utf8) {
2740 const U8 ch = (U8)NATIVE_TO_UTF(i);
2741 if (UNI_IS_INVARIANT(ch))
2742 *d++ = (U8)i;
2743 else {
2744 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2745 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2746 }
2747 }
2748 else
2749#endif
2750 *d++ = (char)i;
2751
2752#ifdef EBCDIC
2753 if (uvmax) {
2754 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2755 if (uvmax > 0x101)
2756 *d++ = (char)UTF_TO_NATIVE(0xff);
2757 if (uvmax > 0x100)
2758 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2759 }
2760#endif
02aa26ce
NT
2761
2762 /* mark the range as done, and continue */
79072805 2763 dorange = FALSE;
01ec43d0 2764 didrange = TRUE;
4c3a8340
TS
2765#ifdef EBCDIC
2766 literal_endpoint = 0;
2767#endif
79072805 2768 continue;
4e553d73 2769 }
02aa26ce
NT
2770
2771 /* range begins (ignore - as first or last char) */
79072805 2772 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2773 if (didrange) {
1fafa243 2774 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2775 }
e294cc5d
JH
2776 if (has_utf8
2777#ifdef EBCDIC
2778 && !native_range
2779#endif
2780 ) {
25716404 2781 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2782 s++;
2783 continue;
2784 }
79072805
LW
2785 dorange = TRUE;
2786 s++;
01ec43d0
GS
2787 }
2788 else {
2789 didrange = FALSE;
4c3a8340
TS
2790#ifdef EBCDIC
2791 literal_endpoint = 0;
e294cc5d 2792 native_range = TRUE;
4c3a8340 2793#endif
01ec43d0 2794 }
79072805 2795 }
02aa26ce
NT
2796
2797 /* if we get here, we're not doing a transliteration */
2798
0f5d15d6
IZ
2799 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2800 except for the last char, which will be done separately. */
3280af22 2801 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2802 if (s[2] == '#') {
e994fd66 2803 while (s+1 < send && *s != ')')
db42d148 2804 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2805 }
2806 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2807 || (s[2] == '?' && s[3] == '{'))
155aba94 2808 {
cc6b7395 2809 I32 count = 1;
0f5d15d6 2810 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2811 char c;
2812
d9f97599
GS
2813 while (count && (c = *regparse)) {
2814 if (c == '\\' && regparse[1])
2815 regparse++;
4e553d73 2816 else if (c == '{')
cc6b7395 2817 count++;
4e553d73 2818 else if (c == '}')
cc6b7395 2819 count--;
d9f97599 2820 regparse++;
cc6b7395 2821 }
e994fd66 2822 if (*regparse != ')')
5bdf89e7 2823 regparse--; /* Leave one char for continuation. */
0f5d15d6 2824 while (s < regparse)
db42d148 2825 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2826 }
748a9306 2827 }
02aa26ce
NT
2828
2829 /* likewise skip #-initiated comments in //x patterns */
3280af22 2830 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2831 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2832 while (s+1 < send && *s != '\n')
db42d148 2833 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2834 }
02aa26ce 2835
5d1d4326 2836 /* check for embedded arrays
da6eedaa 2837 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2838 */
1749ea0d
TS
2839 else if (*s == '@' && s[1]) {
2840 if (isALNUM_lazy_if(s+1,UTF))
2841 break;
2842 if (strchr(":'{$", s[1]))
2843 break;
2844 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2845 break; /* in regexp, neither @+ nor @- are interpolated */
2846 }
02aa26ce
NT
2847
2848 /* check for embedded scalars. only stop if we're sure it's a
2849 variable.
2850 */
79072805 2851 else if (*s == '$') {
3280af22 2852 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2853 break;
77772344 2854 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2855 if (s[1] == '\\') {
2856 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2857 "Possible unintended interpolation of $\\ in regex");
77772344 2858 }
79072805 2859 break; /* in regexp, $ might be tail anchor */
77772344 2860 }
79072805 2861 }
02aa26ce 2862
2b9d42f0
NIS
2863 /* End of else if chain - OP_TRANS rejoin rest */
2864
02aa26ce 2865 /* backslashes */
79072805 2866 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2867 char* e; /* Can be used for ending '}', etc. */
2868
79072805 2869 s++;
02aa26ce 2870
7d0fc23c
KW
2871 /* warn on \1 - \9 in substitution replacements, but note that \11
2872 * is an octal; and \19 is \1 followed by '9' */
3280af22 2873 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2874 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2875 {
a2a5de95 2876 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2877 *--s = '$';
2878 break;
2879 }
02aa26ce
NT
2880
2881 /* string-change backslash escapes */
3280af22 2882 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2883 --s;
2884 break;
2885 }
ff3f963a
KW
2886 /* In a pattern, process \N, but skip any other backslash escapes.
2887 * This is because we don't want to translate an escape sequence
2888 * into a meta symbol and have the regex compiler use the meta
2889 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2890 * in spite of this, we do have to process \N here while the proper
2891 * charnames handler is in scope. See bugs #56444 and #62056.
2892 * There is a complication because \N in a pattern may also stand
2893 * for 'match a non-nl', and not mean a charname, in which case its
2894 * processing should be deferred to the regex compiler. To be a
2895 * charname it must be followed immediately by a '{', and not look
2896 * like \N followed by a curly quantifier, i.e., not something like
2897 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2898 * quantifier */
2899 else if (PL_lex_inpat
2900 && (*s != 'N'
2901 || s[1] != '{'
2902 || regcurly(s + 1)))
2903 {
cc74c5bd
TS
2904 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2905 goto default_action;
2906 }
02aa26ce 2907
79072805 2908 switch (*s) {
02aa26ce
NT
2909
2910 /* quoted - in transliterations */
79072805 2911 case '-':
3280af22 2912 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2913 *d++ = *s++;
2914 continue;
2915 }
2916 /* FALL THROUGH */
2917 default:
11b8faa4 2918 {
a2a5de95
NC
2919 if ((isALPHA(*s) || isDIGIT(*s)))
2920 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2921 "Unrecognized escape \\%c passed through",
2922 *s);
11b8faa4 2923 /* default action is to copy the quoted character */
f9a63242 2924 goto default_action;
11b8faa4 2925 }
02aa26ce 2926
632403cc 2927 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2928 case '0': case '1': case '2': case '3':
2929 case '4': case '5': case '6': case '7':
ba210ebe 2930 {
53305cf1
NC
2931 I32 flags = 0;
2932 STRLEN len = 3;
77a135fe 2933 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2934 s += len;
2935 }
012bcf8d 2936 goto NUM_ESCAPE_INSERT;
02aa26ce 2937
f0a2b745
KW
2938 /* eg. \o{24} indicates the octal constant \024 */
2939 case 'o':
2940 {
2941 STRLEN len;
454155d9 2942 const char* error;
f0a2b745 2943
454155d9 2944 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2945 s += len;
454155d9 2946 if (! valid) {
f0a2b745
KW
2947 yyerror(error);
2948 continue;
2949 }
2950 goto NUM_ESCAPE_INSERT;
2951 }
2952
77a135fe 2953 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2954 case 'x':
a0ed51b3
LW
2955 ++s;
2956 if (*s == '{') {
9d4ba2ae 2957 char* const e = strchr(s, '}');
a4c04bdc
NC
2958 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2959 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2960 STRLEN len;
355860ce 2961
53305cf1 2962 ++s;
adaeee49 2963 if (!e) {
a0ed51b3 2964 yyerror("Missing right brace on \\x{}");
355860ce 2965 continue;
ba210ebe 2966 }
53305cf1 2967 len = e - s;
77a135fe 2968 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2969 s = e + 1;
a0ed51b3
LW
2970 }
2971 else {
ba210ebe 2972 {
53305cf1 2973 STRLEN len = 2;
a4c04bdc 2974 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2975 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2976 s += len;
2977 }
012bcf8d
GS
2978 }
2979
2980 NUM_ESCAPE_INSERT:
ff3f963a
KW
2981 /* Insert oct or hex escaped character. There will always be
2982 * enough room in sv since such escapes will be longer than any
2983 * UTF-8 sequence they can end up as, except if they force us
2984 * to recode the rest of the string into utf8 */
ba7cea30 2985
77a135fe 2986 /* Here uv is the ordinal of the next character being added in
ff3f963a 2987 * unicode (converted from native). */
77a135fe 2988 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2989 if (!has_utf8 && uv > 255) {
77a135fe
KW
2990 /* Might need to recode whatever we have accumulated so
2991 * far if it contains any chars variant in utf8 or
2992 * utf-ebcdic. */
2993
2994 SvCUR_set(sv, d - SvPVX_const(sv));
2995 SvPOK_on(sv);
2996 *d = '\0';
77a135fe 2997 /* See Note on sizing above. */
7bf79863
KW
2998 sv_utf8_upgrade_flags_grow(sv,
2999 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3000 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3001 d = SvPVX(sv) + SvCUR(sv);
3002 has_utf8 = TRUE;
012bcf8d
GS
3003 }
3004
77a135fe
KW
3005 if (has_utf8) {
3006 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3007 if (PL_lex_inwhat == OP_TRANS &&
3008 PL_sublex_info.sub_op) {
3009 PL_sublex_info.sub_op->op_private |=
3010 (PL_lex_repl ? OPpTRANS_FROM_UTF
3011 : OPpTRANS_TO_UTF);
f9a63242 3012 }
e294cc5d
JH
3013#ifdef EBCDIC
3014 if (uv > 255 && !dorange)
3015 native_range = FALSE;
3016#endif
012bcf8d 3017 }
a0ed51b3 3018 else {
012bcf8d 3019 *d++ = (char)uv;
a0ed51b3 3020 }
012bcf8d
GS
3021 }
3022 else {
c4d5f83a 3023 *d++ = (char) uv;
a0ed51b3 3024 }
79072805 3025 continue;
02aa26ce 3026
4a2d328f 3027 case 'N':
ff3f963a
KW
3028 /* In a non-pattern \N must be a named character, like \N{LATIN
3029 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3030 * mean to match a non-newline. For non-patterns, named
3031 * characters are converted to their string equivalents. In
3032 * patterns, named characters are not converted to their
3033 * ultimate forms for the same reasons that other escapes
3034 * aren't. Instead, they are converted to the \N{U+...} form
3035 * to get the value from the charnames that is in effect right
3036 * now, while preserving the fact that it was a named character
3037 * so that the regex compiler knows this */
3038
3039 /* This section of code doesn't generally use the
3040 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3041 * a close examination of this macro and determined it is a
3042 * no-op except on utfebcdic variant characters. Every
3043 * character generated by this that would normally need to be
3044 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3045 * needed, and would complicate use of copy(). XXX There are
3046 * other parts of this file where the macro is used
3047 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3048
3049 /* The structure of this section of code (besides checking for
3050 * errors and upgrading to utf8) is:
3051 * Further disambiguate between the two meanings of \N, and if
3052 * not a charname, go process it elsewhere
0a96133f
KW
3053 * If of form \N{U+...}, pass it through if a pattern;
3054 * otherwise convert to utf8
3055 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3056 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3057
3058 /* Here, s points to the 'N'; the test below is guaranteed to
3059 * succeed if we are being called on a pattern as we already
3060 * know from a test above that the next character is a '{'.
3061 * On a non-pattern \N must mean 'named sequence, which
3062 * requires braces */
3063 s++;
3064 if (*s != '{') {
3065 yyerror("Missing braces on \\N{}");
3066 continue;
3067 }
3068 s++;
3069
0a96133f 3070 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3071 if (! (e = strchr(s, '}'))) {
3072 if (! PL_lex_inpat) {
5777a3f7 3073 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3074 } else {
3075 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3076 }
0a96133f 3077 continue;
ff3f963a 3078 }
cddc7ef4 3079
ff3f963a 3080 /* Here it looks like a named character */
cddc7ef4 3081
ff3f963a
KW
3082 if (PL_lex_inpat) {
3083
3084 /* XXX This block is temporary code. \N{} implies that the
3085 * pattern is to have Unicode semantics, and therefore
3086 * currently has to be encoded in utf8. By putting it in
3087 * utf8 now, we save a whole pass in the regular expression
3088 * compiler. Once that code is changed so Unicode
3089 * semantics doesn't necessarily have to be in utf8, this
3090 * block should be removed */
3091 if (!has_utf8) {
77a135fe 3092 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3093 SvPOK_on(sv);
e4f3eed8 3094 *d = '\0';
77a135fe 3095 /* See Note on sizing above. */
7bf79863 3096 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3097 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3098 /* 5 = '\N{' + cur char + NUL */
3099 (STRLEN)(send - s) + 5);
f08d6ad9 3100 d = SvPVX(sv) + SvCUR(sv);
89491803 3101 has_utf8 = TRUE;
ff3f963a
KW
3102 }
3103 }
423cee85 3104
ff3f963a
KW
3105 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3106 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3107 | PERL_SCAN_DISALLOW_PREFIX;
3108 STRLEN len;
3109
3110 /* For \N{U+...}, the '...' is a unicode value even on
3111 * EBCDIC machines */
3112 s += 2; /* Skip to next char after the 'U+' */
3113 len = e - s;
3114 uv = grok_hex(s, &len, &flags, NULL);
3115 if (len == 0 || len != (STRLEN)(e - s)) {
3116 yyerror("Invalid hexadecimal number in \\N{U+...}");
3117 s = e + 1;
3118 continue;
3119 }
3120
3121 if (PL_lex_inpat) {
3122
3123 /* Pass through to the regex compiler unchanged. The
3124 * reason we evaluated the number above is to make sure
0a96133f 3125 * there wasn't a syntax error. */
ff3f963a
KW
3126 s -= 5; /* Include the '\N{U+' */
3127 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3128 d += e - s + 1;
3129 }
3130 else { /* Not a pattern: convert the hex to string */
3131
3132 /* If destination is not in utf8, unconditionally
3133 * recode it to be so. This is because \N{} implies
3134 * Unicode semantics, and scalars have to be in utf8
3135 * to guarantee those semantics */
3136 if (! has_utf8) {
3137 SvCUR_set(sv, d - SvPVX_const(sv));
3138 SvPOK_on(sv);
3139 *d = '\0';
3140 /* See Note on sizing above. */
3141 sv_utf8_upgrade_flags_grow(
3142 sv,
3143 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3144 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3145 d = SvPVX(sv) + SvCUR(sv);
3146 has_utf8 = TRUE;
3147 }
3148
3149 /* Add the string to the output */
3150 if (UNI_IS_INVARIANT(uv)) {
3151 *d++ = (char) uv;
3152 }
3153 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3154 }
3155 }
3156 else { /* Here is \N{NAME} but not \N{U+...}. */
3157
3158 SV *res; /* result from charnames */
3159 const char *str; /* the string in 'res' */
3160 STRLEN len; /* its length */
3161
3162 /* Get the value for NAME */
3163 res = newSVpvn(s, e - s);
3164 res = new_constant( NULL, 0, "charnames",
3165 /* includes all of: \N{...} */
3166 res, NULL, s - 3, e - s + 4 );
3167
3168 /* Most likely res will be in utf8 already since the
3169 * standard charnames uses pack U, but a custom translator
3170 * can leave it otherwise, so make sure. XXX This can be
3171 * revisited to not have charnames use utf8 for characters
3172 * that don't need it when regexes don't have to be in utf8
3173 * for Unicode semantics. If doing so, remember EBCDIC */
3174 sv_utf8_upgrade(res);
3175 str = SvPV_const(res, len);
3176
3177 /* Don't accept malformed input */
3178 if (! is_utf8_string((U8 *) str, len)) {
3179 yyerror("Malformed UTF-8 returned by \\N");
3180 }
3181 else if (PL_lex_inpat) {
3182
3183 if (! len) { /* The name resolved to an empty string */
3184 Copy("\\N{}", d, 4, char);
3185 d += 4;
3186 }
3187 else {
3188 /* In order to not lose information for the regex
3189 * compiler, pass the result in the specially made
3190 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3191 * the code points in hex of each character
3192 * returned by charnames */
3193
3194 const char *str_end = str + len;
3195 STRLEN char_length; /* cur char's byte length */
3196 STRLEN output_length; /* and the number of bytes
3197 after this is translated
3198 into hex digits */
3199 const STRLEN off = d - SvPVX_const(sv);
3200
3201 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3202 * max('U+', '.'); and 1 for NUL */
3203 char hex_string[2 * UTF8_MAXBYTES + 5];
3204
3205 /* Get the first character of the result. */
3206 U32 uv = utf8n_to_uvuni((U8 *) str,
3207 len,
3208 &char_length,
3209 UTF8_ALLOW_ANYUV);
3210
3211 /* The call to is_utf8_string() above hopefully
3212 * guarantees that there won't be an error. But
3213 * it's easy here to make sure. The function just
3214 * above warns and returns 0 if invalid utf8, but
3215 * it can also return 0 if the input is validly a
3216 * NUL. Disambiguate */
3217 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3218 uv = UNICODE_REPLACEMENT;
3219 }
3220
3221 /* Convert first code point to hex, including the
3222 * boiler plate before it */
78c35590 3223 output_length =
3353de27
NC
3224 my_snprintf(hex_string, sizeof(hex_string),
3225 "\\N{U+%X", (unsigned int) uv);
ff3f963a
KW
3226
3227 /* Make sure there is enough space to hold it */
3228 d = off + SvGROW(sv, off
3229 + output_length
3230 + (STRLEN)(send - e)
3231 + 2); /* '}' + NUL */
3232 /* And output it */
3233 Copy(hex_string, d, output_length, char);
3234 d += output_length;
3235
3236 /* For each subsequent character, append dot and
3237 * its ordinal in hex */
3238 while ((str += char_length) < str_end) {
3239 const STRLEN off = d - SvPVX_const(sv);
3240 U32 uv = utf8n_to_uvuni((U8 *) str,
3241 str_end - str,
3242 &char_length,
3243 UTF8_ALLOW_ANYUV);
3244 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3245 uv = UNICODE_REPLACEMENT;
3246 }
3247
78c35590 3248 output_length =
3353de27
NC
3249 my_snprintf(hex_string, sizeof(hex_string),
3250 ".%X", (unsigned int) uv);
ff3f963a
KW
3251
3252 d = off + SvGROW(sv, off
3253 + output_length
3254 + (STRLEN)(send - e)
3255 + 2); /* '}' + NUL */
3256 Copy(hex_string, d, output_length, char);
3257 d += output_length;
3258 }
3259
3260 *d++ = '}'; /* Done. Add the trailing brace */
3261 }
3262 }
3263 else { /* Here, not in a pattern. Convert the name to a
3264 * string. */
3265
3266 /* If destination is not in utf8, unconditionally
3267 * recode it to be so. This is because \N{} implies
3268 * Unicode semantics, and scalars have to be in utf8
3269 * to guarantee those semantics */
3270 if (! has_utf8) {
3271 SvCUR_set(sv, d - SvPVX_const(sv));
3272 SvPOK_on(sv);
3273 *d = '\0';
3274 /* See Note on sizing above. */
3275 sv_utf8_upgrade_flags_grow(sv,
3276 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3277 len + (STRLEN)(send - s) + 1);
3278 d = SvPVX(sv) + SvCUR(sv);
3279 has_utf8 = TRUE;
3280 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3281
3282 /* See Note on sizing above. (NOTE: SvCUR() is not
3283 * set correctly here). */
3284 const STRLEN off = d - SvPVX_const(sv);
3285 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3286 }
3287 Copy(str, d, len, char);
3288 d += len;
423cee85 3289 }
423cee85 3290 SvREFCNT_dec(res);
cb233ae3
KW
3291
3292 /* Deprecate non-approved name syntax */
3293 if (ckWARN_d(WARN_DEPRECATED)) {
3294 bool problematic = FALSE;
3295 char* i = s;
3296
3297 /* For non-ut8 input, look to see that the first
3298 * character is an alpha, then loop through the rest
3299 * checking that each is a continuation */
3300 if (! this_utf8) {
3301 if (! isALPHAU(*i)) problematic = TRUE;
3302 else for (i = s + 1; i < e; i++) {
3303 if (isCHARNAME_CONT(*i)) continue;
3304 problematic = TRUE;
3305 break;
3306 }
3307 }
3308 else {
3309 /* Similarly for utf8. For invariants can check
3310 * directly. We accept anything above the latin1
3311 * range because it is immaterial to Perl if it is
3312 * correct or not, and is expensive to check. But
3313 * it is fairly easy in the latin1 range to convert
3314 * the variants into a single character and check
3315 * those */
3316 if (UTF8_IS_INVARIANT(*i)) {
3317 if (! isALPHAU(*i)) problematic = TRUE;
3318 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3319 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3320 *(i+1)))))
3321 {
3322 problematic = TRUE;
3323 }
3324 }
3325 if (! problematic) for (i = s + UTF8SKIP(s);
3326 i < e;
3327 i+= UTF8SKIP(i))
3328 {
3329 if (UTF8_IS_INVARIANT(*i)) {
3330 if (isCHARNAME_CONT(*i)) continue;
3331 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3332 continue;
3333 } else if (isCHARNAME_CONT(
3334 UNI_TO_NATIVE(
81c14aa2 3335 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3336 {
3337 continue;
3338 }
3339 problematic = TRUE;
3340 break;
3341 }
3342 }
3343 if (problematic) {
6e1bad6c
KW
3344 /* The e-i passed to the final %.*s makes sure that
3345 * should the trailing NUL be missing that this
3346 * print won't run off the end of the string */
cb233ae3 3347 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3348 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3349 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3350 }
3351 }
3352 } /* End \N{NAME} */
ff3f963a
KW
3353#ifdef EBCDIC
3354 if (!dorange)
3355 native_range = FALSE; /* \N{} is defined to be Unicode */
3356#endif
3357 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3358 continue;
3359
02aa26ce 3360 /* \c is a control character */
79072805
LW
3361 case 'c':
3362 s++;
961ce445 3363 if (s < send) {
f9d13529 3364 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3365 }
961ce445
RGS
3366 else {
3367 yyerror("Missing control char name in \\c");
3368 }
79072805 3369 continue;
02aa26ce
NT
3370
3371 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3372 case 'b':
db42d148 3373 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3374 break;
3375 case 'n':
db42d148 3376 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3377 break;
3378 case 'r':
db42d148 3379 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3380 break;
3381 case 'f':
db42d148 3382 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3383 break;
3384 case 't':
db42d148 3385 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3386 break;
34a3fe2a 3387 case 'e':
db42d148 3388 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3389 break;
3390 case 'a':
db42d148 3391 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3392 break;
02aa26ce
NT
3393 } /* end switch */
3394
79072805
LW
3395 s++;
3396 continue;
02aa26ce 3397 } /* end if (backslash) */
4c3a8340
TS
3398#ifdef EBCDIC
3399 else
3400 literal_endpoint++;
3401#endif
02aa26ce 3402
f9a63242 3403 default_action:
77a135fe
KW
3404 /* If we started with encoded form, or already know we want it,
3405 then encode the next character */
3406 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3407 STRLEN len = 1;
77a135fe
KW
3408
3409
3410 /* One might think that it is wasted effort in the case of the
3411 * source being utf8 (this_utf8 == TRUE) to take the next character
3412 * in the source, convert it to an unsigned value, and then convert
3413 * it back again. But the source has not been validated here. The
3414 * routine that does the conversion checks for errors like
3415 * malformed utf8 */
3416
5f66b61c
AL
3417 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3418 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3419 if (!has_utf8) {
3420 SvCUR_set(sv, d - SvPVX_const(sv));
3421 SvPOK_on(sv);
3422 *d = '\0';
77a135fe 3423 /* See Note on sizing above. */
7bf79863
KW
3424 sv_utf8_upgrade_flags_grow(sv,
3425 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3426 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3427 d = SvPVX(sv) + SvCUR(sv);
3428 has_utf8 = TRUE;
3429 } else if (need > len) {
3430 /* encoded value larger than old, may need extra space (NOTE:
3431 * SvCUR() is not set correctly here). See Note on sizing
3432 * above. */
9d4ba2ae 3433 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3434 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3435 }
77a135fe
KW
3436 s += len;
3437
5f66b61c 3438 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3439#ifdef EBCDIC
3440 if (uv > 255 && !dorange)
3441 native_range = FALSE;
3442#endif
2b9d42f0
NIS
3443 }
3444 else {
3445 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3446 }
02aa26ce
NT
3447 } /* while loop to process each character */
3448
3449 /* terminate the string and set up the sv */
79072805 3450 *d = '\0';
95a20fc0 3451 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3452 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3453 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3454
79072805 3455 SvPOK_on(sv);
9f4817db 3456 if (PL_encoding && !has_utf8) {
d0063567
DK
3457 sv_recode_to_utf8(sv, PL_encoding);
3458 if (SvUTF8(sv))
3459 has_utf8 = TRUE;
9f4817db 3460 }
2b9d42f0 3461 if (has_utf8) {
7e2040f0 3462 SvUTF8_on(sv);
2b9d42f0 3463 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3464 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3465 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3466 }
3467 }
79072805 3468
02aa26ce 3469 /* shrink the sv if we allocated more than we used */
79072805 3470 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3471 SvPV_shrink_to_cur(sv);
79072805 3472 }
02aa26ce 3473
6154021b 3474 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3475 if (s > PL_bufptr) {
eb0d8d16
NC
3476 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3477 const char *const key = PL_lex_inpat ? "qr" : "q";
3478 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3479 const char *type;
3480 STRLEN typelen;
3481
3482 if (PL_lex_inwhat == OP_TRANS) {
3483 type = "tr";
3484 typelen = 2;
3485 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3486 type = "s";
3487 typelen = 1;
3488 } else {
3489 type = "qq";
3490 typelen = 2;
3491 }
3492
3493 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3494 type, typelen);
3495 }
6154021b 3496 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3497 } else
8990e307 3498 SvREFCNT_dec(sv);
79072805
LW
3499 return s;
3500}
3501
ffb4593c
NT
3502/* S_intuit_more
3503 * Returns TRUE if there's more to the expression (e.g., a subscript),
3504 * FALSE otherwise.
ffb4593c
NT
3505 *
3506 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3507 *
3508 * ->[ and ->{ return TRUE
3509 * { and [ outside a pattern are always subscripts, so return TRUE
3510 * if we're outside a pattern and it's not { or [, then return FALSE
3511 * if we're in a pattern and the first char is a {
3512 * {4,5} (any digits around the comma) returns FALSE
3513 * if we're in a pattern and the first char is a [
3514 * [] returns FALSE
3515 * [SOMETHING] has a funky algorithm to decide whether it's a
3516 * character class or not. It has to deal with things like
3517 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3518 * anything else returns TRUE
3519 */
3520
9cbb5ea2
GS
3521/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3522
76e3520e 3523STATIC int
cea2e8a9 3524S_intuit_more(pTHX_ register char *s)
79072805 3525{
97aff369 3526 dVAR;
7918f24d
NC
3527
3528 PERL_ARGS_ASSERT_INTUIT_MORE;
3529
3280af22 3530 if (PL_lex_brackets)
79072805
LW
3531 return TRUE;
3532 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3533 return TRUE;
3534 if (*s != '{' && *s != '[')
3535 return FALSE;
3280af22 3536 if (!PL_lex_inpat)
79072805
LW
3537 return TRUE;
3538
3539 /* In a pattern, so maybe we have {n,m}. */
3540 if (*s == '{') {
b3155d95 3541 if (regcurly(s)) {
79072805 3542 return FALSE;
b3155d95 3543 }
79072805 3544 return TRUE;
79072805
LW
3545 }
3546
3547 /* On the other hand, maybe we have a character class */
3548
3549 s++;
3550 if (*s == ']' || *s == '^')
3551 return FALSE;
3552 else {
ffb4593c 3553 /* this is terrifying, and it works */
79072805
LW
3554 int weight = 2; /* let's weigh the evidence */
3555 char seen[256];
f27ffc4a 3556 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3557 const char * const send = strchr(s,']');
3280af22 3558 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3559
3560 if (!send) /* has to be an expression */
3561 return TRUE;
3562
3563 Zero(seen,256,char);
3564 if (*s == '$')
3565 weight -= 3;
3566 else if (isDIGIT(*s)) {
3567 if (s[1] != ']') {
3568 if (isDIGIT(s[1]) && s[2] == ']')
3569 weight -= 10;
3570 }
3571 else
3572 weight -= 100;
3573 }
3574 for (; s < send; s++) {
3575 last_un_char = un_char;
3576 un_char = (unsigned char)*s;
3577 switch (*s) {
3578 case '@':
3579 case '&':
3580 case '$':
3581 weight -= seen[un_char] * 10;
7e2040f0 3582 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3583 int len;
8903cb82 3584 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3585 len = (int)strlen(tmpbuf);
3586 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3587 weight -= 100;
3588 else
3589 weight -= 10;
3590 }
3591 else if (*s == '$' && s[1] &&
93a17b20
LW
3592 strchr("[#!%*<>()-=",s[1])) {
3593 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3594 weight -= 10;
3595 else
3596 weight -= 1;
3597 }
3598 break;
3599 case '\\':
3600 un_char = 254;
3601 if (s[1]) {
93a17b20 3602 if (strchr("wds]",s[1]))
79072805 3603 weight += 100;
10edeb5d 3604 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3605 weight += 1;
93a17b20 3606 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3607 weight += 40;
3608 else if (isDIGIT(s[1])) {
3609 weight += 40;
3610 while (s[1] && isDIGIT(s[1]))
3611 s++;
3612 }
3613 }
3614 else
3615 weight += 100;
3616 break;
3617 case '-':
3618 if (s[1] == '\\')
3619 weight += 50;
93a17b20 3620 if (strchr("aA01! ",last_un_char))
79072805 3621 weight += 30;
93a17b20 3622 if (strchr("zZ79~",s[1]))
79072805 3623 weight += 30;
f27ffc4a
GS
3624 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3625 weight -= 5; /* cope with negative subscript */
79072805
LW
3626 break;
3627 default:
3792a11b
NC
3628 if (!isALNUM(last_un_char)
3629 && !(last_un_char == '$' || last_un_char == '@'
3630 || last_un_char == '&')
3631 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3632 char *d = tmpbuf;
3633 while (isALPHA(*s))
3634 *d++ = *s++;
3635 *d = '\0';
5458a98a 3636 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3637 weight -= 150;
3638 }
3639 if (un_char == last_un_char + 1)
3640 weight += 5;
3641 weight -= seen[un_char];
3642 break;
3643 }
3644 seen[un_char]++;
3645 }
3646 if (weight >= 0) /* probably a character class */
3647 return FALSE;
3648 }
3649
3650 return TRUE;
3651}
ffed7fef 3652
ffb4593c
NT
3653/*
3654 * S_intuit_method
3655 *
3656 * Does all the checking to disambiguate
3657 * foo bar
3658 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3659 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3660 *
3661 * First argument is the stuff after the first token, e.g. "bar".
3662 *
3663 * Not a method if bar is a filehandle.
3664 * Not a method if foo is a subroutine prototyped to take a filehandle.
3665 * Not a method if it's really "Foo $bar"
3666 * Method if it's "foo $bar"
3667 * Not a method if it's really "print foo $bar"
3668 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3669 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3670 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3671 * =>
3672 */
3673
76e3520e 3674STATIC int
62d55b22 3675S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3676{
97aff369 3677 dVAR;
a0d0e21e 3678 char *s = start + (*start == '$');
3280af22 3679 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3680 STRLEN len;
3681 GV* indirgv;
5db06880
NC
3682#ifdef PERL_MAD
3683 int soff;
3684#endif
a0d0e21e 3685
7918f24d
NC
3686 PERL_ARGS_ASSERT_INTUIT_METHOD;
3687
a0d0e21e 3688 if (gv) {
62d55b22 3689 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3690 return 0;
62d55b22
NC
3691 if (cv) {
3692 if (SvPOK(cv)) {
3693 const char *proto = SvPVX_const(cv);
3694 if (proto) {
3695 if (*proto == ';')
3696 proto++;
3697 if (*proto == '*')
3698 return 0;
3699 }
b6c543e3
IZ
3700 }
3701 } else
c35e046a 3702 gv = NULL;
a0d0e21e 3703 }
8903cb82 3704 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3705 /* start is the beginning of the possible filehandle/object,
3706 * and s is the end of it
3707 * tmpbuf is a copy of it
3708 */
3709
a0d0e21e 3710 if (*start == '$') {
3ef1310e
RGS
3711 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3712 isUPPER(*PL_tokenbuf))
a0d0e21e 3713 return 0;
5db06880
NC
3714#ifdef PERL_MAD
3715 len = start - SvPVX(PL_linestr);
3716#endif
29595ff2 3717 s = PEEKSPACE(s);
f0092767 3718#ifdef PERL_MAD
5db06880
NC
3719 start = SvPVX(PL_linestr) + len;
3720#endif
3280af22
NIS
3721 PL_bufptr = start;
3722 PL_expect = XREF;
a0d0e21e
LW
3723 return *s == '(' ? FUNCMETH : METHOD;
3724 }
5458a98a 3725 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3726 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3727 len -= 2;
3728 tmpbuf[len] = '\0';
5db06880
NC
3729#ifdef PERL_MAD
3730 soff = s - SvPVX(PL_linestr);
3731#endif
c3e0f903
GS
3732 goto bare_package;
3733 }
90e5519e 3734 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3735 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3736 return 0;
3737 /* filehandle or package name makes it a method */
da51bb9b 3738 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3739#ifdef PERL_MAD
3740 soff = s - SvPVX(PL_linestr);
3741#endif
29595ff2 3742 s = PEEKSPACE(s);
3280af22 3743 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3744 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3745 bare_package:
cd81e915 3746 start_force(PL_curforce);
9ded7720 3747 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3748 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3749 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3750 if (PL_madskills)
3751 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3752 PL_expect = XTERM;
a0d0e21e 3753 force_next(WORD);
3280af22 3754 PL_bufptr = s;
5db06880
NC
3755#ifdef PERL_MAD
3756 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3757#endif
a0d0e21e
LW
3758 return *s == '(' ? FUNCMETH : METHOD;
3759 }
3760 }
3761 return 0;
3762}
3763
16d20bd9 3764/* Encoded script support. filter_add() effectively inserts a
4e553d73 3765 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3766 * Note that the filter function only applies to the current source file
3767 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3768 *
3769 * The datasv parameter (which may be NULL) can be used to pass
3770 * private data to this instance of the filter. The filter function
3771 * can recover the SV using the FILTER_DATA macro and use it to
3772 * store private buffers and state information.
3773 *
3774 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3775 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3776 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3777 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3778 * private use must be set using malloc'd pointers.
3779 */
16d20bd9
AD
3780
3781SV *
864dbfa3 3782Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3783{
97aff369 3784 dVAR;
f4c556ac 3785 if (!funcp)
a0714e2c 3786 return NULL;
f4c556ac 3787
5486870f
DM
3788 if (!PL_parser)
3789 return NULL;
3790
3280af22
NIS
3791 if (!PL_rsfp_filters)
3792 PL_rsfp_filters = newAV();
16d20bd9 3793 if (!datasv)
561b68a9 3794 datasv = newSV(0);
862a34c6 3795 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3796 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3797 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3798 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3799 FPTR2DPTR(void *, IoANY(datasv)),
3800 SvPV_nolen(datasv)));
3280af22
NIS
3801 av_unshift(PL_rsfp_filters, 1);
3802 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3803 return(datasv);
3804}
4e553d73 3805
16d20bd9
AD
3806
3807/* Delete most recently added instance of this filter function. */
a0d0e21e 3808void
864dbfa3 3809Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3810{
97aff369 3811 dVAR;
e0c19803 3812 SV *datasv;
24801a4b 3813
7918f24d
NC
3814 PERL_ARGS_ASSERT_FILTER_DEL;
3815
33073adb 3816#ifdef DEBUGGING
55662e27
JH
3817 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3818 FPTR2DPTR(void*, funcp)));
33073adb 3819#endif
5486870f 3820 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3821 return;
3822 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3823 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3824 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3825 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3826
16d20bd9
AD
3827 return;
3828 }
3829 /* we need to search for the correct entry and clear it */
cea2e8a9 3830 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3831}
3832
3833
1de9afcd
RGS
3834/* Invoke the idxth filter function for the current rsfp. */
3835/* maxlen 0 = read one text line */
16d20bd9 3836I32
864dbfa3 3837Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3838{
97aff369 3839 dVAR;
16d20bd9
AD
3840 filter_t funcp;
3841 SV *datasv = NULL;
f482118e
NC
3842 /* This API is bad. It should have been using unsigned int for maxlen.
3843 Not sure if we want to change the API, but if not we should sanity
3844 check the value here. */
39cd7a59
NC
3845 const unsigned int correct_length
3846 = maxlen < 0 ?
3847#ifdef PERL_MICRO
3848 0x7FFFFFFF
3849#else
3850 INT_MAX
3851#endif
3852 : maxlen;
e50aee73 3853
7918f24d
NC
3854 PERL_ARGS_ASSERT_FILTER_READ;
3855
5486870f 3856 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3857 return -1;
1de9afcd 3858 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3859 /* Provide a default input filter to make life easy. */
3860 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3861 DEBUG_P(PerlIO_printf(Perl_debug_log,
3862 "filter_read %d: from rsfp\n", idx));
f482118e 3863 if (correct_length) {
16d20bd9
AD
3864 /* Want a block */
3865 int len ;
f54cb97a 3866 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3867
3868 /* ensure buf_sv is large enough */
881d8f0a 3869 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3870 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3871 correct_length)) <= 0) {
3280af22 3872 if (PerlIO_error(PL_rsfp))
37120919
AD
3873 return -1; /* error */
3874 else
3875 return 0 ; /* end of file */
3876 }
16d20bd9 3877 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3878 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3879 } else {
3880 /* Want a line */
3280af22
NIS
3881 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3882 if (PerlIO_error(PL_rsfp))
37120919
AD
3883 return -1; /* error */
3884 else
3885 return 0 ; /* end of file */
3886 }
16d20bd9
AD
3887 }
3888 return SvCUR(buf_sv);
3889 }
3890 /* Skip this filter slot if filter has been deleted */
1de9afcd 3891 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3892 DEBUG_P(PerlIO_printf(Perl_debug_log,
3893 "filter_read %d: skipped (filter deleted)\n",
3894 idx));
f482118e 3895 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3896 }
3897 /* Get function pointer hidden within datasv */
8141890a 3898 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3899 DEBUG_P(PerlIO_printf(Perl_debug_log,
3900 "filter_read %d: via function %p (%s)\n",
ca0270c4 3901 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3902 /* Call function. The function is expected to */
3903 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3904 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3905 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3906}
3907
76e3520e 3908STATIC char *
5cc814fd 3909S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3910{
97aff369 3911 dVAR;
7918f24d
NC
3912
3913 PERL_ARGS_ASSERT_FILTER_GETS;
3914
c39cd008 3915#ifdef PERL_CR_FILTER
3280af22 3916 if (!PL_rsfp_filters) {
c39cd008 3917 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3918 }
3919#endif
3280af22 3920 if (PL_rsfp_filters) {
55497cff 3921 if (!append)
3922 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3923 if (FILTER_READ(0, sv, 0) > 0)
3924 return ( SvPVX(sv) ) ;
3925 else
bd61b366 3926 return NULL ;
16d20bd9 3927 }
9d116dd7 3928 else
5cc814fd 3929 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3930}
3931
01ec43d0 3932STATIC HV *
9bde8eb0 3933S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3934{
97aff369 3935 dVAR;
def3634b
GS
3936 GV *gv;
3937
7918f24d
NC
3938 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3939
01ec43d0 3940 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3941 return PL_curstash;
3942
3943 if (len > 2 &&
3944 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3945 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3946 {
3947 return GvHV(gv); /* Foo:: */
def3634b
GS
3948 }
3949
3950 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3951 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3952 if (gv && GvCV(gv)) {
3953 SV * const sv = cv_const_sv(GvCV(gv));
3954 if (sv)
9bde8eb0 3955 pkgname = SvPV_const(sv, len);
def3634b
GS
3956 }
3957
9bde8eb0 3958 return gv_stashpvn(pkgname, len, 0);
def3634b 3959}
a0d0e21e 3960
e3f73d4e
RGS
3961/*
3962 * S_readpipe_override
486ec47a 3963 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
3964 * optree, provided sublex_start() is called afterwards.
3965 */
3966STATIC void
1d51329b 3967S_readpipe_override(pTHX)
e3f73d4e
RGS
3968{
3969 GV **gvp;
3970 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3971 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3972 if ((gv_readpipe
3973 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3974 ||
3975 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3976 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3977 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3978 {
3979 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 3980 op_append_elem(OP_LIST,
e3f73d4e
RGS
3981 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3982 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3983 }
e3f73d4e
RGS
3984}
3985
5db06880
NC
3986#ifdef PERL_MAD
3987 /*
3988 * Perl_madlex
3989 * The intent of this yylex wrapper is to minimize the changes to the
3990 * tokener when we aren't interested in collecting madprops. It remains
3991 * to be seen how successful this strategy will be...
3992 */
3993
3994int
3995Perl_madlex(pTHX)
3996{
3997 int optype;
3998 char *s = PL_bufptr;
3999
cd81e915
NC
4000 /* make sure PL_thiswhite is initialized */
4001 PL_thiswhite = 0;
4002 PL_thismad = 0;
5db06880 4003
cd81e915 4004 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4005 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4006 return S_pending_ident(aTHX);
4007
4008 /* previous token ate up our whitespace? */
cd81e915
NC
4009 if (!PL_lasttoke && PL_nextwhite) {
4010 PL_thiswhite = PL_nextwhite;
4011 PL_nextwhite = 0;
5db06880
NC
4012 }
4013
4014 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4015 PL_realtokenstart = -1;
4016 PL_thistoken = 0;
5db06880
NC
4017 optype = yylex();
4018 s = PL_bufptr;
cd81e915 4019 assert(PL_curforce < 0);
5db06880 4020
cd81e915
NC
4021 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4022 if (!PL_thistoken) {
4023 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4024 PL_thistoken = newSVpvs("");
5db06880 4025 else {
c35e046a 4026 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4027 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4028 }
4029 }
cd81e915
NC
4030 if (PL_thismad) /* install head */
4031 CURMAD('X', PL_thistoken);
5db06880
NC
4032 }
4033
4034 /* last whitespace of a sublex? */
cd81e915
NC
4035 if (optype == ')' && PL_endwhite) {
4036 CURMAD('X', PL_endwhite);
5db06880
NC
4037 }
4038
cd81e915 4039 if (!PL_thismad) {
5db06880
NC
4040
4041 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4042 if (!PL_thiswhite && !PL_endwhite && !optype) {
4043 sv_free(PL_thistoken);
4044 PL_thistoken = 0;
5db06880
NC
4045 return 0;
4046 }
4047
4048 /* put off final whitespace till peg */
4049 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4050 PL_nextwhite = PL_thiswhite;
4051 PL_thiswhite = 0;
5db06880 4052 }
cd81e915
NC
4053 else if (PL_thisopen) {
4054 CURMAD('q', PL_thisopen);
4055 if (PL_thistoken)
4056 sv_free(PL_thistoken);
4057 PL_thistoken = 0;
5db06880
NC
4058 }
4059 else {
4060 /* Store actual token text as madprop X */
cd81e915 4061 CURMAD('X', PL_thistoken);
5db06880
NC
4062 }
4063
cd81e915 4064 if (PL_thiswhite) {
5db06880 4065 /* add preceding whitespace as madprop _ */
cd81e915 4066 CURMAD('_', PL_thiswhite);
5db06880
NC
4067 }
4068
cd81e915 4069 if (PL_thisstuff) {
5db06880 4070 /* add quoted material as madprop = */
cd81e915 4071 CURMAD('=', PL_thisstuff);
5db06880
NC
4072 }
4073
cd81e915 4074 if (PL_thisclose) {
5db06880 4075 /* add terminating quote as madprop Q */
cd81e915 4076 CURMAD('Q', PL_thisclose);
5db06880
NC
4077 }
4078 }
4079
4080 /* special processing based on optype */
4081
4082 switch (optype) {
4083
4084 /* opval doesn't need a TOKEN since it can already store mp */
4085 case WORD:
4086 case METHOD:
4087 case FUNCMETH:
4088 case THING:
4089 case PMFUNC:
4090 case PRIVATEREF:
4091 case FUNC0SUB:
4092 case UNIOPSUB:
4093 case LSTOPSUB:
6154021b
RGS
4094 if (pl_yylval.opval)
4095 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4096 PL_thismad = 0;
5db06880
NC
4097 return optype;
4098
4099 /* fake EOF */
4100 case 0:
4101 optype = PEG;
cd81e915
NC
4102 if (PL_endwhite) {
4103 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4104 PL_endwhite = 0;
5db06880
NC
4105 }
4106 break;
4107
4108 case ']':
4109 case '}':
cd81e915 4110 if (PL_faketokens)
5db06880
NC
4111 break;
4112 /* remember any fake bracket that lexer is about to discard */
4113 if (PL_lex_brackets == 1 &&
4114 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4115 {
4116 s = PL_bufptr;
4117 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4118 s++;
4119 if (*s == '}') {
cd81e915
NC
4120 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4121 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4122 PL_thiswhite = 0;
5db06880
NC
4123 PL_bufptr = s - 1;
4124 break; /* don't bother looking for trailing comment */
4125 }
4126 else
4127 s = PL_bufptr;
4128 }
4129 if (optype == ']')
4130 break;
4131 /* FALLTHROUGH */
4132
4133 /* attach a trailing comment to its statement instead of next token */
4134 case ';':
cd81e915 4135 if (PL_faketokens)
5db06880
NC
4136 break;
4137 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4138 s = PL_bufptr;
4139 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4140 s++;
4141 if (*s == '\n' || *s == '#') {
4142 while (s < PL_bufend && *s != '\n')
4143 s++;
4144 if (s < PL_bufend)
4145 s++;
cd81e915
NC
4146 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4147 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4148 PL_thiswhite = 0;
5db06880
NC
4149 PL_bufptr = s;
4150 }
4151 }
4152 break;
4153
4154 /* pval */
4155 case LABEL:
4156 break;
4157
4158 /* ival */
4159 default:
4160 break;
4161
4162 }
4163
4164 /* Create new token struct. Note: opvals return early above. */
6154021b 4165 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4166 PL_thismad = 0;
5db06880
NC
4167 return optype;
4168}
4169#endif
4170
468aa647 4171STATIC char *
cc6ed77d 4172S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4173 dVAR;
7918f24d
NC
4174
4175 PERL_ARGS_ASSERT_TOKENIZE_USE;
4176
468aa647
RGS
4177 if (PL_expect != XSTATE)
4178 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4179 is_use ? "use" : "no"));
29595ff2 4180 s = SKIPSPACE1(s);
468aa647
RGS
4181 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4182 s = force_version(s, TRUE);
17c59fdf
VP
4183 if (*s == ';' || *s == '}'
4184 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4185 start_force(PL_curforce);
9ded7720 4186 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4187 force_next(WORD);
4188 }
4189 else if (*s == 'v') {
4190 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4191 s = force_version(s, FALSE);
4192 }
4193 }
4194 else {
4195 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4196 s = force_version(s, FALSE);
4197 }
6154021b 4198 pl_yylval.ival = is_use;
468aa647
RGS
4199 return s;
4200}
748a9306 4201#ifdef DEBUGGING
27da23d5 4202 static const char* const exp_name[] =
09bef843 4203 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4204 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4205 };
748a9306 4206#endif
463ee0b2 4207
361d9b55
Z
4208#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4209STATIC bool
4210S_word_takes_any_delimeter(char *p, STRLEN len)
4211{
4212 return (len == 1 && strchr("msyq", p[0])) ||
4213 (len == 2 && (
4214 (p[0] == 't' && p[1] == 'r') ||
4215 (p[0] == 'q' && strchr("qwxr", p[1]))));
4216}
4217
02aa26ce
NT
4218/*
4219 yylex
4220
4221 Works out what to call the token just pulled out of the input
4222 stream. The yacc parser takes care of taking the ops we return and
4223 stitching them into a tree.
4224
4225 Returns:
4226 PRIVATEREF
4227
4228 Structure:
4229 if read an identifier
4230 if we're in a my declaration
4231 croak if they tried to say my($foo::bar)
4232 build the ops for a my() declaration
4233 if it's an access to a my() variable
4234 are we in a sort block?
4235 croak if my($a); $a <=> $b
4236 build ops for access to a my() variable
4237 if in a dq string, and they've said @foo and we can't find @foo
4238 croak
4239 build ops for a bareword
4240 if we already built the token before, use it.
4241*/
4242
20141f0e 4243
dba4d153
JH
4244#ifdef __SC__
4245#pragma segment Perl_yylex
4246#endif
dba4d153 4247int
dba4d153 4248Perl_yylex(pTHX)
20141f0e 4249{
97aff369 4250 dVAR;
3afc138a 4251 register char *s = PL_bufptr;
378cc40b 4252 register char *d;
463ee0b2 4253 STRLEN len;
aa7440fb 4254 bool bof = FALSE;
580561a3 4255 U32 fake_eof = 0;
a687059c 4256
10edeb5d
JH
4257 /* orig_keyword, gvp, and gv are initialized here because
4258 * jump to the label just_a_word_zero can bypass their
4259 * initialization later. */
4260 I32 orig_keyword = 0;
4261 GV *gv = NULL;
4262 GV **gvp = NULL;
4263
bbf60fe6 4264 DEBUG_T( {
396482e1 4265 SV* tmp = newSVpvs("");
b6007c36
DM
4266 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4267 (IV)CopLINE(PL_curcop),
4268 lex_state_names[PL_lex_state],
4269 exp_name[PL_expect],
4270 pv_display(tmp, s, strlen(s), 0, 60));
4271 SvREFCNT_dec(tmp);
bbf60fe6 4272 } );
02aa26ce 4273 /* check if there's an identifier for us to look at */
28ac2b49 4274 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4275 return REPORT(S_pending_ident(aTHX));
bbce6d69 4276
02aa26ce
NT
4277 /* no identifier pending identification */
4278
3280af22 4279 switch (PL_lex_state) {
79072805
LW
4280#ifdef COMMENTARY
4281 case LEX_NORMAL: /* Some compilers will produce faster */
4282 case LEX_INTERPNORMAL: /* code if we comment these out. */
4283 break;
4284#endif
4285
09bef843 4286 /* when we've already built the next token, just pull it out of the queue */
79072805 4287 case LEX_KNOWNEXT:
5db06880
NC
4288#ifdef PERL_MAD
4289 PL_lasttoke--;
6154021b 4290 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4291 if (PL_madskills) {
cd81e915 4292 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4293 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4294 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4295 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4296 PL_thismad->mad_val = 0;
4297 mad_free(PL_thismad);
4298 PL_thismad = 0;
5db06880
NC
4299 }
4300 }
4301 if (!PL_lasttoke) {
4302 PL_lex_state = PL_lex_defer;
4303 PL_expect = PL_lex_expect;
4304 PL_lex_defer = LEX_NORMAL;
4305 if (!PL_nexttoke[PL_lasttoke].next_type)
4306 return yylex();
4307 }
4308#else
3280af22 4309 PL_nexttoke--;
6154021b 4310 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4311 if (!PL_nexttoke) {
4312 PL_lex_state = PL_lex_defer;
4313 PL_expect = PL_lex_expect;
4314 PL_lex_defer = LEX_NORMAL;
463ee0b2 4315 }
5db06880 4316#endif
a7aaec61
Z
4317 {
4318 I32 next_type;
5db06880 4319#ifdef PERL_MAD
a7aaec61 4320 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4321#else
a7aaec61 4322 next_type = PL_nexttype[PL_nexttoke];
5db06880 4323#endif
78cdf107
Z
4324 if (next_type & (7<<24)) {
4325 if (next_type & (1<<24)) {
4326 if (PL_lex_brackets > 100)
4327 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4328 PL_lex_brackstack[PL_lex_brackets++] =
4329 (next_type >> 16) & 0xff;
4330 }
4331 if (next_type & (2<<24))
4332 PL_lex_allbrackets++;
4333 if (next_type & (4<<24))
4334 PL_lex_allbrackets--;
a7aaec61
Z
4335 next_type &= 0xffff;
4336 }
4337#ifdef PERL_MAD
4338 /* FIXME - can these be merged? */
4339 return next_type;
4340#else
4341 return REPORT(next_type);
4342#endif
4343 }
79072805 4344
02aa26ce 4345 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4346 when we get here, PL_bufptr is at the \
02aa26ce 4347 */
79072805
LW
4348 case LEX_INTERPCASEMOD:
4349#ifdef DEBUGGING
3280af22 4350 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4351 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4352#endif
02aa26ce 4353 /* handle \E or end of string */
3280af22 4354 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4355 /* if at a \E */
3280af22 4356 if (PL_lex_casemods) {
f54cb97a 4357 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4358 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4359
3792a11b
NC
4360 if (PL_bufptr != PL_bufend
4361 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4362 PL_bufptr += 2;
4363 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4364#ifdef PERL_MAD
4365 if (PL_madskills)
6b29d1f5 4366 PL_thistoken = newSVpvs("\\E");
5db06880 4367#endif
a0d0e21e 4368 }
78cdf107 4369 PL_lex_allbrackets--;
bbf60fe6 4370 return REPORT(')');
79072805 4371 }
5db06880
NC
4372#ifdef PERL_MAD
4373 while (PL_bufptr != PL_bufend &&
4374 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4375 if (!PL_thiswhite)
6b29d1f5 4376 PL_thiswhite = newSVpvs("");
cd81e915 4377 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4378 PL_bufptr += 2;
4379 }
4380#else
3280af22
NIS
4381 if (PL_bufptr != PL_bufend)
4382 PL_bufptr += 2;
5db06880 4383#endif
3280af22 4384 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4385 return yylex();
79072805
LW
4386 }
4387 else {
607df283 4388 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4389 "### Saw case modifier\n"); });
3280af22 4390 s = PL_bufptr + 1;
6e909404 4391 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4392#ifdef PERL_MAD
cd81e915 4393 if (!PL_thiswhite)
6b29d1f5 4394 PL_thiswhite = newSVpvs("");
cd81e915 4395 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4396#endif
89122651 4397 PL_bufptr = s + 3;
6e909404
JH
4398 PL_lex_state = LEX_INTERPCONCAT;
4399 return yylex();
a0d0e21e 4400 }
6e909404 4401 else {
90771dc0 4402 I32 tmp;
5db06880
NC
4403 if (!PL_madskills) /* when just compiling don't need correct */
4404 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4405 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4406 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4407 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4408 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4409 PL_lex_allbrackets--;
bbf60fe6 4410 return REPORT(')');
6e909404
JH
4411 }
4412 if (PL_lex_casemods > 10)
4413 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4414 PL_lex_casestack[PL_lex_casemods++] = *s;
4415 PL_lex_casestack[PL_lex_casemods] = '\0';
4416 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4417 start_force(PL_curforce);
9ded7720 4418 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4419 force_next((2<<24)|'(');
cd81e915 4420 start_force(PL_curforce);
6e909404 4421 if (*s == 'l')
9ded7720 4422 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4423 else if (*s == 'u')
9ded7720 4424 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4425 else if (*s == 'L')
9ded7720 4426 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4427 else if (*s == 'U')
9ded7720 4428 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4429 else if (*s == 'Q')
9ded7720 4430 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4431 else
4432 Perl_croak(aTHX_ "panic: yylex");
5db06880 4433 if (PL_madskills) {
a5849ce5
NC
4434 SV* const tmpsv = newSVpvs("\\ ");
4435 /* replace the space with the character we want to escape
4436 */
4437 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4438 curmad('_', tmpsv);
4439 }
6e909404 4440 PL_bufptr = s + 1;
a0d0e21e 4441 }
79072805 4442 force_next(FUNC);
3280af22
NIS
4443 if (PL_lex_starts) {
4444 s = PL_bufptr;
4445 PL_lex_starts = 0;
5db06880
NC
4446#ifdef PERL_MAD
4447 if (PL_madskills) {
cd81e915
NC
4448 if (PL_thistoken)
4449 sv_free(PL_thistoken);
6b29d1f5 4450 PL_thistoken = newSVpvs("");
5db06880
NC
4451 }
4452#endif
131b3ad0
DM
4453 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4454 if (PL_lex_casemods == 1 && PL_lex_inpat)
4455 OPERATOR(',');
4456 else
4457 Aop(OP_CONCAT);
79072805
LW
4458 }
4459 else
cea2e8a9 4460 return yylex();
79072805
LW
4461 }
4462
55497cff 4463 case LEX_INTERPPUSH:
bbf60fe6 4464 return REPORT(sublex_push());
55497cff 4465
79072805 4466 case LEX_INTERPSTART:
3280af22 4467 if (PL_bufptr == PL_bufend)
bbf60fe6 4468 return REPORT(sublex_done());
607df283 4469 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4470 "### Interpolated variable\n"); });
3280af22
NIS
4471 PL_expect = XTERM;
4472 PL_lex_dojoin = (*PL_bufptr == '@');
4473 PL_lex_state = LEX_INTERPNORMAL;
4474 if (PL_lex_dojoin) {
cd81e915 4475 start_force(PL_curforce);
9ded7720 4476 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4477 force_next(',');
cd81e915 4478 start_force(PL_curforce);
a0d0e21e 4479 force_ident("\"", '$');
cd81e915 4480 start_force(PL_curforce);
9ded7720 4481 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4482 force_next('$');
cd81e915 4483 start_force(PL_curforce);
9ded7720 4484 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4485 force_next((2<<24)|'(');
cd81e915 4486 start_force(PL_curforce);
9ded7720 4487 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4488 force_next(FUNC);
4489 }
3280af22
NIS
4490 if (PL_lex_starts++) {
4491 s = PL_bufptr;
5db06880
NC
4492#ifdef PERL_MAD
4493 if (PL_madskills) {
cd81e915
NC
4494 if (PL_thistoken)
4495 sv_free(PL_thistoken);
6b29d1f5 4496 PL_thistoken = newSVpvs("");
5db06880
NC
4497 }
4498#endif
131b3ad0
DM
4499 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4500 if (!PL_lex_casemods && PL_lex_inpat)
4501 OPERATOR(',');
4502 else
4503 Aop(OP_CONCAT);
79072805 4504 }
cea2e8a9 4505 return yylex();
79072805
LW
4506
4507 case LEX_INTERPENDMAYBE:
3280af22
NIS
4508 if (intuit_more(PL_bufptr)) {
4509 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4510 break;
4511 }
4512 /* FALL THROUGH */
4513
4514 case LEX_INTERPEND:
3280af22
NIS
4515 if (PL_lex_dojoin) {
4516 PL_lex_dojoin = FALSE;
4517 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4518#ifdef PERL_MAD
4519 if (PL_madskills) {
cd81e915
NC
4520 if (PL_thistoken)
4521 sv_free(PL_thistoken);
6b29d1f5 4522 PL_thistoken = newSVpvs("");
5db06880
NC
4523 }
4524#endif
78cdf107 4525 PL_lex_allbrackets--;
bbf60fe6 4526 return REPORT(')');
79072805 4527 }
43a16006 4528 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4529 && SvEVALED(PL_lex_repl))
43a16006 4530 {
e9fa98b2 4531 if (PL_bufptr != PL_bufend)
cea2e8a9 4532 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4533 PL_lex_repl = NULL;
e9fa98b2 4534 }
79072805
LW
4535 /* FALLTHROUGH */
4536 case LEX_INTERPCONCAT:
4537#ifdef DEBUGGING
3280af22 4538 if (PL_lex_brackets)
cea2e8a9 4539 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4540#endif
3280af22 4541 if (PL_bufptr == PL_bufend)
bbf60fe6 4542 return REPORT(sublex_done());
79072805 4543
3280af22
NIS
4544 if (SvIVX(PL_linestr) == '\'') {
4545 SV *sv = newSVsv(PL_linestr);
4546 if (!PL_lex_inpat)
76e3520e 4547 sv = tokeq(sv);
3280af22 4548 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4549 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4550 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4551 s = PL_bufend;
79072805
LW
4552 }
4553 else {
3280af22 4554 s = scan_const(PL_bufptr);
79072805 4555 if (*s == '\\')
3280af22 4556 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4557 else
3280af22 4558 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4559 }
4560
3280af22 4561 if (s != PL_bufptr) {
cd81e915 4562 start_force(PL_curforce);
5db06880
NC
4563 if (PL_madskills) {
4564 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4565 }
6154021b 4566 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4567 PL_expect = XTERM;
79072805 4568 force_next(THING);
131b3ad0 4569 if (PL_lex_starts++) {
5db06880
NC
4570#ifdef PERL_MAD
4571 if (PL_madskills) {
cd81e915
NC
4572 if (PL_thistoken)
4573 sv_free(PL_thistoken);
6b29d1f5 4574 PL_thistoken = newSVpvs("");
5db06880
NC
4575 }
4576#endif
131b3ad0
DM
4577 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4578 if (!PL_lex_casemods && PL_lex_inpat)
4579 OPERATOR(',');
4580 else
4581 Aop(OP_CONCAT);
4582 }
79072805 4583 else {
3280af22 4584 PL_bufptr = s;
cea2e8a9 4585 return yylex();
79072805
LW
4586 }
4587 }
4588
cea2e8a9 4589 return yylex();
a0d0e21e 4590 case LEX_FORMLINE:
3280af22
NIS
4591 PL_lex_state = LEX_NORMAL;
4592 s = scan_formline(PL_bufptr);
4593 if (!PL_lex_formbrack)
a0d0e21e
LW
4594 goto rightbracket;
4595 OPERATOR(';');
79072805
LW
4596 }
4597
3280af22
NIS
4598 s = PL_bufptr;
4599 PL_oldoldbufptr = PL_oldbufptr;
4600 PL_oldbufptr = s;
463ee0b2
LW
4601
4602 retry:
5db06880 4603#ifdef PERL_MAD
cd81e915
NC
4604 if (PL_thistoken) {
4605 sv_free(PL_thistoken);
4606 PL_thistoken = 0;
5db06880 4607 }
cd81e915 4608 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4609#endif
378cc40b
LW
4610 switch (*s) {
4611 default:
7e2040f0 4612 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4613 goto keylookup;
b1fc3636
CJ
4614 {
4615 unsigned char c = *s;
4616 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4617 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4618 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4619 } else {
4620 d = PL_linestart;
4621 }
4622 *s = '\0';
4623 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4624 }
e929a76b
LW
4625 case 4:
4626 case 26:
4627 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4628 case 0:
5db06880
NC
4629#ifdef PERL_MAD
4630 if (PL_madskills)
cd81e915 4631 PL_faketokens = 0;
5db06880 4632#endif
3280af22
NIS
4633 if (!PL_rsfp) {
4634 PL_last_uni = 0;
4635 PL_last_lop = 0;
a7aaec61
Z
4636 if (PL_lex_brackets &&
4637 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4638 yyerror((const char *)
4639 (PL_lex_formbrack
4640 ? "Format not terminated"
4641 : "Missing right curly or square bracket"));
c5ee2135 4642 }
4e553d73 4643 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4644 "### Tokener got EOF\n");
5f80b19c 4645 } );
79072805 4646 TOKEN(0);
463ee0b2 4647 }
3280af22 4648 if (s++ < PL_bufend)
a687059c 4649 goto retry; /* ignore stray nulls */
3280af22
NIS
4650 PL_last_uni = 0;
4651 PL_last_lop = 0;
4652 if (!PL_in_eval && !PL_preambled) {
4653 PL_preambled = TRUE;
5db06880
NC
4654#ifdef PERL_MAD
4655 if (PL_madskills)
cd81e915 4656 PL_faketokens = 1;
5db06880 4657#endif
5ab7ff98
NC
4658 if (PL_perldb) {
4659 /* Generate a string of Perl code to load the debugger.
4660 * If PERL5DB is set, it will return the contents of that,
4661 * otherwise a compile-time require of perl5db.pl. */
4662
4663 const char * const pdb = PerlEnv_getenv("PERL5DB");
4664
4665 if (pdb) {
4666 sv_setpv(PL_linestr, pdb);
4667 sv_catpvs(PL_linestr,";");
4668 } else {
4669 SETERRNO(0,SS_NORMAL);
4670 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4671 }
4672 } else
4673 sv_setpvs(PL_linestr,"");
c62eb204
NC
4674 if (PL_preambleav) {
4675 SV **svp = AvARRAY(PL_preambleav);
4676 SV **const end = svp + AvFILLp(PL_preambleav);
4677 while(svp <= end) {
4678 sv_catsv(PL_linestr, *svp);
4679 ++svp;
396482e1 4680 sv_catpvs(PL_linestr, ";");
91b7def8 4681 }
daba3364 4682 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4683 PL_preambleav = NULL;
91b7def8 4684 }
9f639728
FR
4685 if (PL_minus_E)
4686 sv_catpvs(PL_linestr,
4687 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4688 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4689 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4690 if (PL_minus_l)
396482e1 4691 sv_catpvs(PL_linestr,"chomp;");
3280af22 4692 if (PL_minus_a) {
3280af22 4693 if (PL_minus_F) {
3792a11b
NC
4694 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4695 || *PL_splitstr == '"')
3280af22 4696 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4697 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4698 else {
c8ef6a4b
NC
4699 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4700 bytes can be used as quoting characters. :-) */
dd374669 4701 const char *splits = PL_splitstr;
91d456ae 4702 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4703 do {
4704 /* Need to \ \s */
dd374669
AL
4705 if (*splits == '\\')
4706 sv_catpvn(PL_linestr, splits, 1);
4707 sv_catpvn(PL_linestr, splits, 1);
4708 } while (*splits++);
48c4c863
NC
4709 /* This loop will embed the trailing NUL of
4710 PL_linestr as the last thing it does before
4711 terminating. */
396482e1 4712 sv_catpvs(PL_linestr, ");");
54310121 4713 }
2304df62
AD
4714 }
4715 else
396482e1 4716 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4717 }
79072805 4718 }
396482e1 4719 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4720 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4721 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4722 PL_last_lop = PL_last_uni = NULL;
65269a95 4723 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4724 update_debugger_info(PL_linestr, NULL, 0);
79072805 4725 goto retry;
a687059c 4726 }
e929a76b 4727 do {
580561a3
Z
4728 fake_eof = 0;
4729 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4730 if (0) {
7e28d3af 4731 fake_eof:
f0e67a1d
Z
4732 fake_eof = LEX_FAKE_EOF;
4733 }
4734 PL_bufptr = PL_bufend;
17cc9359 4735 CopLINE_inc(PL_curcop);
f0e67a1d 4736 if (!lex_next_chunk(fake_eof)) {
17cc9359 4737 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4738 s = PL_bufptr;
4739 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4740 }
17cc9359 4741 CopLINE_dec(PL_curcop);
5db06880 4742#ifdef PERL_MAD
f0e67a1d 4743 if (!PL_rsfp)
cd81e915 4744 PL_realtokenstart = -1;
5db06880 4745#endif
f0e67a1d 4746 s = PL_bufptr;
7aa207d6
JH
4747 /* If it looks like the start of a BOM or raw UTF-16,
4748 * check if it in fact is. */
580561a3 4749 if (bof && PL_rsfp &&
7aa207d6
JH
4750 (*s == 0 ||
4751 *(U8*)s == 0xEF ||
4752 *(U8*)s >= 0xFE ||
4753 s[1] == 0)) {
eb160463 4754 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4755 if (bof) {
3280af22 4756 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4757 s = swallow_bom((U8*)s);
e929a76b 4758 }
378cc40b 4759 }
737c24fc 4760 if (PL_parser->in_pod) {
a0d0e21e 4761 /* Incest with pod. */
5db06880
NC
4762#ifdef PERL_MAD
4763 if (PL_madskills)
cd81e915 4764 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4765#endif
01a57ef7 4766 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4767 sv_setpvs(PL_linestr, "");
3280af22
NIS
4768 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4769 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4770 PL_last_lop = PL_last_uni = NULL;
737c24fc 4771 PL_parser->in_pod = 0;
a0d0e21e 4772 }
4e553d73 4773 }
85613cab
Z
4774 if (PL_rsfp)
4775 incline(s);
737c24fc 4776 } while (PL_parser->in_pod);
3280af22 4777 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4778 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4779 PL_last_lop = PL_last_uni = NULL;
57843af0 4780 if (CopLINE(PL_curcop) == 1) {
3280af22 4781 while (s < PL_bufend && isSPACE(*s))
79072805 4782 s++;
a0d0e21e 4783 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4784 s++;
5db06880
NC
4785#ifdef PERL_MAD
4786 if (PL_madskills)
cd81e915 4787 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4788#endif
bd61b366 4789 d = NULL;
3280af22 4790 if (!PL_in_eval) {
44a8e56a 4791 if (*s == '#' && *(s+1) == '!')
4792 d = s + 2;
4793#ifdef ALTERNATE_SHEBANG
4794 else {
bfed75c6 4795 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4796 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4797 d = s + (sizeof(as) - 1);
4798 }
4799#endif /* ALTERNATE_SHEBANG */
4800 }
4801 if (d) {
b8378b72 4802 char *ipath;
774d564b 4803 char *ipathend;
b8378b72 4804
774d564b 4805 while (isSPACE(*d))
b8378b72
CS
4806 d++;
4807 ipath = d;
774d564b 4808 while (*d && !isSPACE(*d))
4809 d++;
4810 ipathend = d;
4811
4812#ifdef ARG_ZERO_IS_SCRIPT
4813 if (ipathend > ipath) {
4814 /*
4815 * HP-UX (at least) sets argv[0] to the script name,
4816 * which makes $^X incorrect. And Digital UNIX and Linux,
4817 * at least, set argv[0] to the basename of the Perl
4818 * interpreter. So, having found "#!", we'll set it right.
4819 */
fafc274c
NC
4820 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4821 SVt_PV)); /* $^X */
774d564b 4822 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4823 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4824 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4825 SvSETMAGIC(x);
4826 }
556c1dec
JH
4827 else {
4828 STRLEN blen;
4829 STRLEN llen;
cfd0369c 4830 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4831 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4832 if (llen < blen) {
4833 bstart += blen - llen;
4834 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4835 sv_setpvn(x, ipath, ipathend - ipath);
4836 SvSETMAGIC(x);
4837 }
4838 }
4839 }
774d564b 4840 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4841 }
774d564b 4842#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4843
4844 /*
4845 * Look for options.
4846 */
748a9306 4847 d = instr(s,"perl -");
84e30d1a 4848 if (!d) {
748a9306 4849 d = instr(s,"perl");
84e30d1a
GS
4850#if defined(DOSISH)
4851 /* avoid getting into infinite loops when shebang
4852 * line contains "Perl" rather than "perl" */
4853 if (!d) {
4854 for (d = ipathend-4; d >= ipath; --d) {
4855 if ((*d == 'p' || *d == 'P')
4856 && !ibcmp(d, "perl", 4))
4857 {
4858 break;
4859 }
4860 }
4861 if (d < ipath)
bd61b366 4862 d = NULL;
84e30d1a
GS
4863 }
4864#endif
4865 }
44a8e56a 4866#ifdef ALTERNATE_SHEBANG
4867 /*
4868 * If the ALTERNATE_SHEBANG on this system starts with a
4869 * character that can be part of a Perl expression, then if
4870 * we see it but not "perl", we're probably looking at the
4871 * start of Perl code, not a request to hand off to some
4872 * other interpreter. Similarly, if "perl" is there, but
4873 * not in the first 'word' of the line, we assume the line
4874 * contains the start of the Perl program.
44a8e56a 4875 */
4876 if (d && *s != '#') {
f54cb97a 4877 const char *c = ipath;
44a8e56a 4878 while (*c && !strchr("; \t\r\n\f\v#", *c))
4879 c++;
4880 if (c < d)
bd61b366 4881 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4882 else
4883 *s = '#'; /* Don't try to parse shebang line */
4884 }
774d564b 4885#endif /* ALTERNATE_SHEBANG */
748a9306 4886 if (!d &&
44a8e56a 4887 *s == '#' &&
774d564b 4888 ipathend > ipath &&
3280af22 4889 !PL_minus_c &&
748a9306 4890 !instr(s,"indir") &&
3280af22 4891 instr(PL_origargv[0],"perl"))
748a9306 4892 {
27da23d5 4893 dVAR;
9f68db38 4894 char **newargv;
9f68db38 4895
774d564b 4896 *ipathend = '\0';
4897 s = ipathend + 1;
3280af22 4898 while (s < PL_bufend && isSPACE(*s))
9f68db38 4899 s++;
3280af22 4900 if (s < PL_bufend) {
d85f917e 4901 Newx(newargv,PL_origargc+3,char*);
9f68db38 4902 newargv[1] = s;
3280af22 4903 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4904 s++;
4905 *s = '\0';
3280af22 4906 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4907 }
4908 else
3280af22 4909 newargv = PL_origargv;
774d564b 4910 newargv[0] = ipath;
b35112e7 4911 PERL_FPU_PRE_EXEC
b4748376 4912 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4913 PERL_FPU_POST_EXEC
cea2e8a9 4914 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4915 }
748a9306 4916 if (d) {
c35e046a
AL
4917 while (*d && !isSPACE(*d))
4918 d++;
4919 while (SPACE_OR_TAB(*d))
4920 d++;
748a9306
LW
4921
4922 if (*d++ == '-') {
f54cb97a 4923 const bool switches_done = PL_doswitches;
fb993905
GA
4924 const U32 oldpdb = PL_perldb;
4925 const bool oldn = PL_minus_n;
4926 const bool oldp = PL_minus_p;
c7030b81 4927 const char *d1 = d;
fb993905 4928
8cc95fdb 4929 do {
4ba71d51
FC
4930 bool baduni = FALSE;
4931 if (*d1 == 'C') {
bd0ab00d
NC
4932 const char *d2 = d1 + 1;
4933 if (parse_unicode_opts((const char **)&d2)
4934 != PL_unicode)
4935 baduni = TRUE;
4ba71d51
FC
4936 }
4937 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4938 const char * const m = d1;
4939 while (*d1 && !isSPACE(*d1))
4940 d1++;
cea2e8a9 4941 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4942 (int)(d1 - m), m);
8cc95fdb 4943 }
c7030b81
NC
4944 d1 = moreswitches(d1);
4945 } while (d1);
f0b2cf55
YST
4946 if (PL_doswitches && !switches_done) {
4947 int argc = PL_origargc;
4948 char **argv = PL_origargv;
4949 do {
4950 argc--,argv++;
4951 } while (argc && argv[0][0] == '-' && argv[0][1]);
4952 init_argv_symbols(argc,argv);
4953 }
65269a95 4954 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4955 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4956 /* if we have already added "LINE: while (<>) {",
4957 we must not do it again */
748a9306 4958 {
76f68e9b 4959 sv_setpvs(PL_linestr, "");
3280af22
NIS
4960 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4961 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4962 PL_last_lop = PL_last_uni = NULL;
3280af22 4963 PL_preambled = FALSE;
65269a95 4964 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4965 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4966 goto retry;
4967 }
a0d0e21e 4968 }
79072805 4969 }
9f68db38 4970 }
79072805 4971 }
3280af22
NIS
4972 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4973 PL_bufptr = s;
4974 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4975 return yylex();
ae986130 4976 }
378cc40b 4977 goto retry;
4fdae800 4978 case '\r':
6a27c188 4979#ifdef PERL_STRICT_CR
cea2e8a9 4980 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4981 Perl_croak(aTHX_
cc507455 4982 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4983#endif
4fdae800 4984 case ' ': case '\t': case '\f': case 013:
5db06880 4985#ifdef PERL_MAD
cd81e915 4986 PL_realtokenstart = -1;
ac372eb8
RD
4987 if (!PL_thiswhite)
4988 PL_thiswhite = newSVpvs("");
4989 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4990#endif
ac372eb8 4991 s++;
378cc40b 4992 goto retry;
378cc40b 4993 case '#':
e929a76b 4994 case '\n':
5db06880 4995#ifdef PERL_MAD
cd81e915 4996 PL_realtokenstart = -1;
5db06880 4997 if (PL_madskills)
cd81e915 4998 PL_faketokens = 0;
5db06880 4999#endif
3280af22 5000 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
5001 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5002 /* handle eval qq[#line 1 "foo"\n ...] */
5003 CopLINE_dec(PL_curcop);
5004 incline(s);
5005 }
5db06880
NC
5006 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5007 s = SKIPSPACE0(s);
5008 if (!PL_in_eval || PL_rsfp)
5009 incline(s);
5010 }
5011 else {
5012 d = s;
5013 while (d < PL_bufend && *d != '\n')
5014 d++;
5015 if (d < PL_bufend)
5016 d++;
5017 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5018 Perl_croak(aTHX_ "panic: input overflow");
5019#ifdef PERL_MAD
5020 if (PL_madskills)
cd81e915 5021 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5022#endif
5023 s = d;
5024 incline(s);
5025 }
3280af22
NIS
5026 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5027 PL_bufptr = s;
5028 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5029 return yylex();
a687059c 5030 }
378cc40b 5031 }
a687059c 5032 else {
5db06880
NC
5033#ifdef PERL_MAD
5034 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5035 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5036 PL_faketokens = 0;
5db06880
NC
5037 s = SKIPSPACE0(s);
5038 TOKEN(PEG); /* make sure any #! line is accessible */
5039 }
5040 s = SKIPSPACE0(s);
5041 }
5042 else {
5043/* if (PL_madskills && PL_lex_formbrack) { */
5044 d = s;
5045 while (d < PL_bufend && *d != '\n')
5046 d++;
5047 if (d < PL_bufend)
5048 d++;
5049 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5050 Perl_croak(aTHX_ "panic: input overflow");
5051 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5052 if (!PL_thiswhite)
6b29d1f5 5053 PL_thiswhite = newSVpvs("");
5db06880 5054 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5055 sv_setpvs(PL_thiswhite, "");
cd81e915 5056 PL_faketokens = 0;
5db06880 5057 }
cd81e915 5058 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5059 }
5060 s = d;
5061/* }
5062 *s = '\0';
5063 PL_bufend = s; */
5064 }
5065#else
378cc40b 5066 *s = '\0';
3280af22 5067 PL_bufend = s;
5db06880 5068#endif
a687059c 5069 }
378cc40b
LW
5070 goto retry;
5071 case '-':
79072805 5072 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5073 I32 ftst = 0;
90771dc0 5074 char tmp;
e5edeb50 5075
378cc40b 5076 s++;
3280af22 5077 PL_bufptr = s;
748a9306
LW
5078 tmp = *s++;
5079
bf4acbe4 5080 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5081 s++;
5082
5083 if (strnEQ(s,"=>",2)) {
3280af22 5084 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5085 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5086 OPERATOR('-'); /* unary minus */
5087 }
3280af22 5088 PL_last_uni = PL_oldbufptr;
748a9306 5089 switch (tmp) {
e5edeb50
JH
5090 case 'r': ftst = OP_FTEREAD; break;
5091 case 'w': ftst = OP_FTEWRITE; break;
5092 case 'x': ftst = OP_FTEEXEC; break;
5093 case 'o': ftst = OP_FTEOWNED; break;
5094 case 'R': ftst = OP_FTRREAD; break;
5095 case 'W': ftst = OP_FTRWRITE; break;
5096 case 'X': ftst = OP_FTREXEC; break;
5097 case 'O': ftst = OP_FTROWNED; break;
5098 case 'e': ftst = OP_FTIS; break;
5099 case 'z': ftst = OP_FTZERO; break;
5100 case 's': ftst = OP_FTSIZE; break;
5101 case 'f': ftst = OP_FTFILE; break;
5102 case 'd': ftst = OP_FTDIR; break;
5103 case 'l': ftst = OP_FTLINK; break;
5104 case 'p': ftst = OP_FTPIPE; break;
5105 case 'S': ftst = OP_FTSOCK; break;
5106 case 'u': ftst = OP_FTSUID; break;
5107 case 'g': ftst = OP_FTSGID; break;
5108 case 'k': ftst = OP_FTSVTX; break;
5109 case 'b': ftst = OP_FTBLK; break;
5110 case 'c': ftst = OP_FTCHR; break;
5111 case 't': ftst = OP_FTTTY; break;
5112 case 'T': ftst = OP_FTTEXT; break;
5113 case 'B': ftst = OP_FTBINARY; break;
5114 case 'M': case 'A': case 'C':
fafc274c 5115 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5116 switch (tmp) {
5117 case 'M': ftst = OP_FTMTIME; break;
5118 case 'A': ftst = OP_FTATIME; break;
5119 case 'C': ftst = OP_FTCTIME; break;
5120 default: break;
5121 }
5122 break;
378cc40b 5123 default:
378cc40b
LW
5124 break;
5125 }
e5edeb50 5126 if (ftst) {
eb160463 5127 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5128 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5129 "### Saw file test %c\n", (int)tmp);
5f80b19c 5130 } );
e5edeb50
JH
5131 FTST(ftst);
5132 }
5133 else {
5134 /* Assume it was a minus followed by a one-letter named
5135 * subroutine call (or a -bareword), then. */
95c31fe3 5136 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5137 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5138 (int) tmp);
5f80b19c 5139 } );
3cf7b4c4 5140 s = --PL_bufptr;
e5edeb50 5141 }
378cc40b 5142 }
90771dc0
NC
5143 {
5144 const char tmp = *s++;
5145 if (*s == tmp) {
5146 s++;
5147 if (PL_expect == XOPERATOR)
5148 TERM(POSTDEC);
5149 else
5150 OPERATOR(PREDEC);
5151 }
5152 else if (*s == '>') {
5153 s++;
29595ff2 5154 s = SKIPSPACE1(s);
90771dc0
NC
5155 if (isIDFIRST_lazy_if(s,UTF)) {
5156 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5157 TOKEN(ARROW);
5158 }
5159 else if (*s == '$')
5160 OPERATOR(ARROW);
5161 else
5162 TERM(ARROW);
5163 }
78cdf107
Z
5164 if (PL_expect == XOPERATOR) {
5165 if (*s == '=' && !PL_lex_allbrackets &&
5166 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5167 s--;
5168 TOKEN(0);
5169 }
90771dc0 5170 Aop(OP_SUBTRACT);
78cdf107 5171 }
90771dc0
NC
5172 else {
5173 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5174 check_uni();
5175 OPERATOR('-'); /* unary minus */
79072805 5176 }
2f3197b3 5177 }
79072805 5178
378cc40b 5179 case '+':
90771dc0
NC
5180 {
5181 const char tmp = *s++;
5182 if (*s == tmp) {
5183 s++;
5184 if (PL_expect == XOPERATOR)
5185 TERM(POSTINC);
5186 else
5187 OPERATOR(PREINC);
5188 }
78cdf107
Z
5189 if (PL_expect == XOPERATOR) {
5190 if (*s == '=' && !PL_lex_allbrackets &&
5191 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5192 s--;
5193 TOKEN(0);
5194 }
90771dc0 5195 Aop(OP_ADD);
78cdf107 5196 }
90771dc0
NC
5197 else {
5198 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5199 check_uni();
5200 OPERATOR('+');
5201 }
2f3197b3 5202 }
a687059c 5203
378cc40b 5204 case '*':
3280af22
NIS
5205 if (PL_expect != XOPERATOR) {
5206 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5207 PL_expect = XOPERATOR;
5208 force_ident(PL_tokenbuf, '*');
5209 if (!*PL_tokenbuf)
a0d0e21e 5210 PREREF('*');
79072805 5211 TERM('*');
a687059c 5212 }
79072805
LW
5213 s++;
5214 if (*s == '*') {
a687059c 5215 s++;
78cdf107
Z
5216 if (*s == '=' && !PL_lex_allbrackets &&
5217 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5218 s -= 2;
5219 TOKEN(0);
5220 }
79072805 5221 PWop(OP_POW);
a687059c 5222 }
78cdf107
Z
5223 if (*s == '=' && !PL_lex_allbrackets &&
5224 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5225 s--;
5226 TOKEN(0);
5227 }
79072805
LW
5228 Mop(OP_MULTIPLY);
5229
378cc40b 5230 case '%':
3280af22 5231 if (PL_expect == XOPERATOR) {
78cdf107
Z
5232 if (s[1] == '=' && !PL_lex_allbrackets &&
5233 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5234 TOKEN(0);
bbce6d69 5235 ++s;
5236 Mop(OP_MODULO);
a687059c 5237 }
3280af22 5238 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5239 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5240 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5241 if (!PL_tokenbuf[1]) {
bbce6d69 5242 PREREF('%');
a687059c 5243 }
3280af22 5244 PL_pending_ident = '%';
bbce6d69 5245 TERM('%');
a687059c 5246
378cc40b 5247 case '^':
78cdf107
Z
5248 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5249 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5250 TOKEN(0);
79072805 5251 s++;
a0d0e21e 5252 BOop(OP_BIT_XOR);
79072805 5253 case '[':
a7aaec61
Z
5254 if (PL_lex_brackets > 100)
5255 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5256 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5257 PL_lex_allbrackets++;
df3467db
IG
5258 {
5259 const char tmp = *s++;
5260 OPERATOR(tmp);
5261 }
378cc40b 5262 case '~':
0d863452 5263 if (s[1] == '~'
3e7dd34d 5264 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5265 {
78cdf107
Z
5266 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5267 TOKEN(0);
0d863452
RH
5268 s += 2;
5269 Eop(OP_SMARTMATCH);
5270 }
78cdf107
Z
5271 s++;
5272 OPERATOR('~');
378cc40b 5273 case ',':
78cdf107
Z
5274 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5275 TOKEN(0);
5276 s++;
5277 OPERATOR(',');
a0d0e21e
LW
5278 case ':':
5279 if (s[1] == ':') {
5280 len = 0;
0bfa2a8a 5281 goto just_a_word_zero_gv;
a0d0e21e
LW
5282 }
5283 s++;
09bef843
SB
5284 switch (PL_expect) {
5285 OP *attrs;
5db06880
NC
5286#ifdef PERL_MAD
5287 I32 stuffstart;
5288#endif
09bef843
SB
5289 case XOPERATOR:
5290 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5291 break;
5292 PL_bufptr = s; /* update in case we back off */
d83f38d8 5293 if (*s == '=') {
2dc78664
NC
5294 Perl_croak(aTHX_
5295 "Use of := for an empty attribute list is not allowed");
d83f38d8 5296 }
09bef843
SB
5297 goto grabattrs;
5298 case XATTRBLOCK:
5299 PL_expect = XBLOCK;
5300 goto grabattrs;
5301 case XATTRTERM:
5302 PL_expect = XTERMBLOCK;
5303 grabattrs:
5db06880
NC
5304#ifdef PERL_MAD
5305 stuffstart = s - SvPVX(PL_linestr) - 1;
5306#endif
29595ff2 5307 s = PEEKSPACE(s);
5f66b61c 5308 attrs = NULL;
7e2040f0 5309 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5310 I32 tmp;
5cc237b8 5311 SV *sv;
09bef843 5312 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5313 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5314 if (tmp < 0) tmp = -tmp;
5315 switch (tmp) {
5316 case KEY_or:
5317 case KEY_and:
5318 case KEY_for:
11baf631 5319 case KEY_foreach:
f9829d6b
GS
5320 case KEY_unless:
5321 case KEY_if:
5322 case KEY_while:
5323 case KEY_until:
5324 goto got_attrs;
5325 default:
5326 break;
5327 }
5328 }
5cc237b8 5329 sv = newSVpvn(s, len);
09bef843
SB
5330 if (*d == '(') {
5331 d = scan_str(d,TRUE,TRUE);
5332 if (!d) {
09bef843
SB
5333 /* MUST advance bufptr here to avoid bogus
5334 "at end of line" context messages from yyerror().
5335 */
5336 PL_bufptr = s + len;
5337 yyerror("Unterminated attribute parameter in attribute list");
5338 if (attrs)
5339 op_free(attrs);
5cc237b8 5340 sv_free(sv);
bbf60fe6 5341 return REPORT(0); /* EOF indicator */
09bef843
SB
5342 }
5343 }
5344 if (PL_lex_stuff) {
09bef843 5345 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5346 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5347 newSVOP(OP_CONST, 0, sv));
5348 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5349 PL_lex_stuff = NULL;
09bef843
SB
5350 }
5351 else {
5cc237b8
BS
5352 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5353 sv_free(sv);
1108974d 5354 if (PL_in_my == KEY_our) {
df9a6019 5355 deprecate(":unique");
1108974d 5356 }
bfed75c6 5357 else
371fce9b
DM
5358 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5359 }
5360
d3cea301
SB
5361 /* NOTE: any CV attrs applied here need to be part of
5362 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5363 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5364 sv_free(sv);
78f9721b 5365 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5366 }
5367 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5368 sv_free(sv);
8e5dadda 5369 deprecate(":locked");
5cc237b8
BS
5370 }
5371 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5372 sv_free(sv);
78f9721b 5373 CvMETHOD_on(PL_compcv);
5cc237b8 5374 }
78f9721b
SM
5375 /* After we've set the flags, it could be argued that
5376 we don't need to do the attributes.pm-based setting
5377 process, and shouldn't bother appending recognized
d3cea301
SB
5378 flags. To experiment with that, uncomment the
5379 following "else". (Note that's already been
5380 uncommented. That keeps the above-applied built-in
5381 attributes from being intercepted (and possibly
5382 rejected) by a package's attribute routines, but is
5383 justified by the performance win for the common case
5384 of applying only built-in attributes.) */
0256094b 5385 else
2fcb4757 5386 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5387 newSVOP(OP_CONST, 0,
5cc237b8 5388 sv));
09bef843 5389 }
29595ff2 5390 s = PEEKSPACE(d);
0120eecf 5391 if (*s == ':' && s[1] != ':')
29595ff2 5392 s = PEEKSPACE(s+1);
0120eecf
GS
5393 else if (s == d)
5394 break; /* require real whitespace or :'s */
29595ff2 5395 /* XXX losing whitespace on sequential attributes here */
09bef843 5396 }
90771dc0
NC
5397 {
5398 const char tmp
5399 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5400 if (*s != ';' && *s != '}' && *s != tmp
5401 && (tmp != '=' || *s != ')')) {
5402 const char q = ((*s == '\'') ? '"' : '\'');
5403 /* If here for an expression, and parsed no attrs, back
5404 off. */
5405 if (tmp == '=' && !attrs) {
5406 s = PL_bufptr;
5407 break;
5408 }
5409 /* MUST advance bufptr here to avoid bogus "at end of line"
5410 context messages from yyerror().
5411 */
5412 PL_bufptr = s;
10edeb5d
JH
5413 yyerror( (const char *)
5414 (*s
5415 ? Perl_form(aTHX_ "Invalid separator character "
5416 "%c%c%c in attribute list", q, *s, q)
5417 : "Unterminated attribute list" ) );
90771dc0
NC
5418 if (attrs)
5419 op_free(attrs);
5420 OPERATOR(':');
09bef843 5421 }
09bef843 5422 }
f9829d6b 5423 got_attrs:
09bef843 5424 if (attrs) {
cd81e915 5425 start_force(PL_curforce);
9ded7720 5426 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5427 CURMAD('_', PL_nextwhite);
89122651 5428 force_next(THING);
5db06880
NC
5429 }
5430#ifdef PERL_MAD
5431 if (PL_madskills) {
cd81e915 5432 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5433 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5434 }
5db06880 5435#endif
09bef843
SB
5436 TOKEN(COLONATTR);
5437 }
78cdf107
Z
5438 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5439 s--;
5440 TOKEN(0);
5441 }
5442 PL_lex_allbrackets--;
a0d0e21e 5443 OPERATOR(':');
8990e307
LW
5444 case '(':
5445 s++;
3280af22
NIS
5446 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5447 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5448 else
3280af22 5449 PL_expect = XTERM;
29595ff2 5450 s = SKIPSPACE1(s);
78cdf107 5451 PL_lex_allbrackets++;
a0d0e21e 5452 TOKEN('(');
378cc40b 5453 case ';':
78cdf107
Z
5454 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5455 TOKEN(0);
f4dd75d9 5456 CLINE;
78cdf107
Z
5457 s++;
5458 OPERATOR(';');
378cc40b 5459 case ')':
78cdf107
Z
5460 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5461 TOKEN(0);
5462 s++;
5463 PL_lex_allbrackets--;
5464 s = SKIPSPACE1(s);
5465 if (*s == '{')
5466 PREBLOCK(')');
5467 TERM(')');
79072805 5468 case ']':
a7aaec61
Z
5469 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5470 TOKEN(0);
79072805 5471 s++;
3280af22 5472 if (PL_lex_brackets <= 0)
d98d5fff 5473 yyerror("Unmatched right square bracket");
463ee0b2 5474 else
3280af22 5475 --PL_lex_brackets;
78cdf107 5476 PL_lex_allbrackets--;
3280af22
NIS
5477 if (PL_lex_state == LEX_INTERPNORMAL) {
5478 if (PL_lex_brackets == 0) {
02255c60
FC
5479 if (*s == '-' && s[1] == '>')
5480 PL_lex_state = LEX_INTERPENDMAYBE;
5481 else if (*s != '[' && *s != '{')
3280af22 5482 PL_lex_state = LEX_INTERPEND;
79072805
LW
5483 }
5484 }
4633a7c4 5485 TERM(']');
79072805
LW
5486 case '{':
5487 leftbracket:
79072805 5488 s++;
3280af22 5489 if (PL_lex_brackets > 100) {
8edd5f42 5490 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5491 }
3280af22 5492 switch (PL_expect) {
a0d0e21e 5493 case XTERM:
3280af22 5494 if (PL_lex_formbrack) {
a0d0e21e
LW
5495 s--;
5496 PRETERMBLOCK(DO);
5497 }
3280af22
NIS
5498 if (PL_oldoldbufptr == PL_last_lop)
5499 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5500 else
3280af22 5501 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5502 PL_lex_allbrackets++;
79072805 5503 OPERATOR(HASHBRACK);
a0d0e21e 5504 case XOPERATOR:
bf4acbe4 5505 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5506 s++;
44a8e56a 5507 d = s;
3280af22
NIS
5508 PL_tokenbuf[0] = '\0';
5509 if (d < PL_bufend && *d == '-') {
5510 PL_tokenbuf[0] = '-';
44a8e56a 5511 d++;
bf4acbe4 5512 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5513 d++;
5514 }
7e2040f0 5515 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5516 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5517 FALSE, &len);
bf4acbe4 5518 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5519 d++;
5520 if (*d == '}') {
f54cb97a 5521 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5522 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5523 if (minus)
5524 force_next('-');
748a9306
LW
5525 }
5526 }
5527 /* FALL THROUGH */
09bef843 5528 case XATTRBLOCK:
748a9306 5529 case XBLOCK:
3280af22 5530 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5531 PL_lex_allbrackets++;
3280af22 5532 PL_expect = XSTATE;
a0d0e21e 5533 break;
09bef843 5534 case XATTRTERM:
a0d0e21e 5535 case XTERMBLOCK:
3280af22 5536 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5537 PL_lex_allbrackets++;
3280af22 5538 PL_expect = XSTATE;
a0d0e21e
LW
5539 break;
5540 default: {
f54cb97a 5541 const char *t;
3280af22
NIS
5542 if (PL_oldoldbufptr == PL_last_lop)
5543 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5544 else
3280af22 5545 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5546 PL_lex_allbrackets++;
29595ff2 5547 s = SKIPSPACE1(s);
8452ff4b
SB
5548 if (*s == '}') {
5549 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5550 PL_expect = XTERM;
5551 /* This hack is to get the ${} in the message. */
5552 PL_bufptr = s+1;
5553 yyerror("syntax error");
5554 break;
5555 }
a0d0e21e 5556 OPERATOR(HASHBRACK);
8452ff4b 5557 }
b8a4b1be
GS
5558 /* This hack serves to disambiguate a pair of curlies
5559 * as being a block or an anon hash. Normally, expectation
5560 * determines that, but in cases where we're not in a
5561 * position to expect anything in particular (like inside
5562 * eval"") we have to resolve the ambiguity. This code
5563 * covers the case where the first term in the curlies is a
5564 * quoted string. Most other cases need to be explicitly
a0288114 5565 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5566 * curly in order to force resolution as an anon hash.
5567 *
5568 * XXX should probably propagate the outer expectation
5569 * into eval"" to rely less on this hack, but that could
5570 * potentially break current behavior of eval"".
5571 * GSAR 97-07-21
5572 */
5573 t = s;
5574 if (*s == '\'' || *s == '"' || *s == '`') {
5575 /* common case: get past first string, handling escapes */
3280af22 5576 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5577 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5578 t++;
5579 t++;
a0d0e21e 5580 }
b8a4b1be 5581 else if (*s == 'q') {
3280af22 5582 if (++t < PL_bufend
b8a4b1be 5583 && (!isALNUM(*t)
3280af22 5584 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5585 && !isALNUM(*t))))
5586 {
abc667d1 5587 /* skip q//-like construct */
f54cb97a 5588 const char *tmps;
b8a4b1be
GS
5589 char open, close, term;
5590 I32 brackets = 1;
5591
3280af22 5592 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5593 t++;
abc667d1
DM
5594 /* check for q => */
5595 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5596 OPERATOR(HASHBRACK);
5597 }
b8a4b1be
GS
5598 term = *t;
5599 open = term;
5600 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5601 term = tmps[5];
5602 close = term;
5603 if (open == close)
3280af22
NIS
5604 for (t++; t < PL_bufend; t++) {
5605 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5606 t++;
6d07e5e9 5607 else if (*t == open)
b8a4b1be
GS
5608 break;
5609 }
abc667d1 5610 else {
3280af22
NIS
5611 for (t++; t < PL_bufend; t++) {
5612 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5613 t++;
6d07e5e9 5614 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5615 break;
5616 else if (*t == open)
5617 brackets++;
5618 }
abc667d1
DM
5619 }
5620 t++;
b8a4b1be 5621 }
abc667d1
DM
5622 else
5623 /* skip plain q word */
5624 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5625 t += UTF8SKIP(t);
a0d0e21e 5626 }
7e2040f0 5627 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5628 t += UTF8SKIP(t);
7e2040f0 5629 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5630 t += UTF8SKIP(t);
a0d0e21e 5631 }
3280af22 5632 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5633 t++;
b8a4b1be
GS
5634 /* if comma follows first term, call it an anon hash */
5635 /* XXX it could be a comma expression with loop modifiers */
3280af22 5636 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5637 || (*t == '=' && t[1] == '>')))
a0d0e21e 5638 OPERATOR(HASHBRACK);
3280af22 5639 if (PL_expect == XREF)
4e4e412b 5640 PL_expect = XTERM;
a0d0e21e 5641 else {
3280af22
NIS
5642 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5643 PL_expect = XSTATE;
a0d0e21e 5644 }
8990e307 5645 }
a0d0e21e 5646 break;
463ee0b2 5647 }
6154021b 5648 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5649 if (isSPACE(*s) || *s == '#')
3280af22 5650 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5651 TOKEN('{');
378cc40b 5652 case '}':
a7aaec61
Z
5653 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5654 TOKEN(0);
79072805
LW
5655 rightbracket:
5656 s++;
3280af22 5657 if (PL_lex_brackets <= 0)
d98d5fff 5658 yyerror("Unmatched right curly bracket");
463ee0b2 5659 else
3280af22 5660 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5661 PL_lex_allbrackets--;
c2e66d9e 5662 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5663 PL_lex_formbrack = 0;
5664 if (PL_lex_state == LEX_INTERPNORMAL) {
5665 if (PL_lex_brackets == 0) {
9059aa12
LW
5666 if (PL_expect & XFAKEBRACK) {
5667 PL_expect &= XENUMMASK;
3280af22
NIS
5668 PL_lex_state = LEX_INTERPEND;
5669 PL_bufptr = s;
5db06880
NC
5670#if 0
5671 if (PL_madskills) {
cd81e915 5672 if (!PL_thiswhite)
6b29d1f5 5673 PL_thiswhite = newSVpvs("");
76f68e9b 5674 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5675 }
5676#endif
cea2e8a9 5677 return yylex(); /* ignore fake brackets */
79072805 5678 }
fa83b5b6 5679 if (*s == '-' && s[1] == '>')
3280af22 5680 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5681 else if (*s != '[' && *s != '{')
3280af22 5682 PL_lex_state = LEX_INTERPEND;
79072805
LW
5683 }
5684 }
9059aa12
LW
5685 if (PL_expect & XFAKEBRACK) {
5686 PL_expect &= XENUMMASK;
3280af22 5687 PL_bufptr = s;
cea2e8a9 5688 return yylex(); /* ignore fake brackets */
748a9306 5689 }
cd81e915 5690 start_force(PL_curforce);
5db06880
NC
5691 if (PL_madskills) {
5692 curmad('X', newSVpvn(s-1,1));
cd81e915 5693 CURMAD('_', PL_thiswhite);
5db06880 5694 }
79072805 5695 force_next('}');
5db06880 5696#ifdef PERL_MAD
cd81e915 5697 if (!PL_thistoken)
6b29d1f5 5698 PL_thistoken = newSVpvs("");
5db06880 5699#endif
79072805 5700 TOKEN(';');
378cc40b
LW
5701 case '&':
5702 s++;
78cdf107
Z
5703 if (*s++ == '&') {
5704 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5705 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5706 s -= 2;
5707 TOKEN(0);
5708 }
a0d0e21e 5709 AOPERATOR(ANDAND);
78cdf107 5710 }
378cc40b 5711 s--;
3280af22 5712 if (PL_expect == XOPERATOR) {
041457d9
DM
5713 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5714 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5715 {
57843af0 5716 CopLINE_dec(PL_curcop);
f1f66076 5717 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5718 CopLINE_inc(PL_curcop);
463ee0b2 5719 }
78cdf107
Z
5720 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5721 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5722 s--;
5723 TOKEN(0);
5724 }
79072805 5725 BAop(OP_BIT_AND);
463ee0b2 5726 }
79072805 5727
3280af22
NIS
5728 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5729 if (*PL_tokenbuf) {
5730 PL_expect = XOPERATOR;
5731 force_ident(PL_tokenbuf, '&');
463ee0b2 5732 }
79072805
LW
5733 else
5734 PREREF('&');
6154021b 5735 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5736 TERM('&');
5737
378cc40b
LW
5738 case '|':
5739 s++;
78cdf107
Z
5740 if (*s++ == '|') {
5741 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5742 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5743 s -= 2;
5744 TOKEN(0);
5745 }
a0d0e21e 5746 AOPERATOR(OROR);
78cdf107 5747 }
378cc40b 5748 s--;
78cdf107
Z
5749 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5750 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5751 s--;
5752 TOKEN(0);
5753 }
79072805 5754 BOop(OP_BIT_OR);
378cc40b
LW
5755 case '=':
5756 s++;
748a9306 5757 {
90771dc0 5758 const char tmp = *s++;
78cdf107
Z
5759 if (tmp == '=') {
5760 if (!PL_lex_allbrackets &&
5761 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5762 s -= 2;
5763 TOKEN(0);
5764 }
90771dc0 5765 Eop(OP_EQ);
78cdf107
Z
5766 }
5767 if (tmp == '>') {
5768 if (!PL_lex_allbrackets &&
5769 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5770 s -= 2;
5771 TOKEN(0);
5772 }
90771dc0 5773 OPERATOR(',');
78cdf107 5774 }
90771dc0
NC
5775 if (tmp == '~')
5776 PMop(OP_MATCH);
5777 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5778 && strchr("+-*/%.^&|<",tmp))
5779 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5780 "Reversed %c= operator",(int)tmp);
5781 s--;
5782 if (PL_expect == XSTATE && isALPHA(tmp) &&
5783 (s == PL_linestart+1 || s[-2] == '\n') )
5784 {
5785 if (PL_in_eval && !PL_rsfp) {
5786 d = PL_bufend;
5787 while (s < d) {
5788 if (*s++ == '\n') {
5789 incline(s);
5790 if (strnEQ(s,"=cut",4)) {
5791 s = strchr(s,'\n');
5792 if (s)
5793 s++;
5794 else
5795 s = d;
5796 incline(s);
5797 goto retry;
5798 }
5799 }
a5f75d66 5800 }
90771dc0 5801 goto retry;
a5f75d66 5802 }
5db06880
NC
5803#ifdef PERL_MAD
5804 if (PL_madskills) {
cd81e915 5805 if (!PL_thiswhite)
6b29d1f5 5806 PL_thiswhite = newSVpvs("");
cd81e915 5807 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5808 PL_bufend - PL_linestart);
5809 }
5810#endif
90771dc0 5811 s = PL_bufend;
737c24fc 5812 PL_parser->in_pod = 1;
90771dc0 5813 goto retry;
a5f75d66 5814 }
a0d0e21e 5815 }
3280af22 5816 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5817 const char *t = s;
51882d45 5818#ifdef PERL_STRICT_CR
c35e046a 5819 while (SPACE_OR_TAB(*t))
51882d45 5820#else
c35e046a 5821 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5822#endif
c35e046a 5823 t++;
a0d0e21e
LW
5824 if (*t == '\n' || *t == '#') {
5825 s--;
3280af22 5826 PL_expect = XBLOCK;
a0d0e21e
LW
5827 goto leftbracket;
5828 }
79072805 5829 }
78cdf107
Z
5830 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5831 s--;
5832 TOKEN(0);
5833 }
6154021b 5834 pl_yylval.ival = 0;
a0d0e21e 5835 OPERATOR(ASSIGNOP);
378cc40b
LW
5836 case '!':
5837 s++;
90771dc0
NC
5838 {
5839 const char tmp = *s++;
5840 if (tmp == '=') {
5841 /* was this !=~ where !~ was meant?
5842 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5843
5844 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5845 const char *t = s+1;
5846
5847 while (t < PL_bufend && isSPACE(*t))
5848 ++t;
5849
5850 if (*t == '/' || *t == '?' ||
5851 ((*t == 'm' || *t == 's' || *t == 'y')
5852 && !isALNUM(t[1])) ||
5853 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5854 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5855 "!=~ should be !~");
5856 }
78cdf107
Z
5857 if (!PL_lex_allbrackets &&
5858 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5859 s -= 2;
5860 TOKEN(0);
5861 }
90771dc0
NC
5862 Eop(OP_NE);
5863 }
5864 if (tmp == '~')
5865 PMop(OP_NOT);
5866 }
378cc40b
LW
5867 s--;
5868 OPERATOR('!');
5869 case '<':
3280af22 5870 if (PL_expect != XOPERATOR) {
93a17b20 5871 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5872 check_uni();
79072805
LW
5873 if (s[1] == '<')
5874 s = scan_heredoc(s);
5875 else
5876 s = scan_inputsymbol(s);
5877 TERM(sublex_start());
378cc40b
LW
5878 }
5879 s++;
90771dc0
NC
5880 {
5881 char tmp = *s++;
78cdf107
Z
5882 if (tmp == '<') {
5883 if (*s == '=' && !PL_lex_allbrackets &&
5884 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5885 s -= 2;
5886 TOKEN(0);
5887 }
90771dc0 5888 SHop(OP_LEFT_SHIFT);
78cdf107 5889 }
90771dc0
NC
5890 if (tmp == '=') {
5891 tmp = *s++;
78cdf107
Z
5892 if (tmp == '>') {
5893 if (!PL_lex_allbrackets &&
5894 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5895 s -= 3;
5896 TOKEN(0);
5897 }
90771dc0 5898 Eop(OP_NCMP);
78cdf107 5899 }
90771dc0 5900 s--;
78cdf107
Z
5901 if (!PL_lex_allbrackets &&
5902 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5903 s -= 2;
5904 TOKEN(0);
5905 }
90771dc0
NC
5906 Rop(OP_LE);
5907 }
395c3793 5908 }
378cc40b 5909 s--;
78cdf107
Z
5910 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5911 s--;
5912 TOKEN(0);
5913 }
79072805 5914 Rop(OP_LT);
378cc40b
LW
5915 case '>':
5916 s++;
90771dc0
NC
5917 {
5918 const char tmp = *s++;
78cdf107
Z
5919 if (tmp == '>') {
5920 if (*s == '=' && !PL_lex_allbrackets &&
5921 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5922 s -= 2;
5923 TOKEN(0);
5924 }
90771dc0 5925 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
5926 }
5927 else if (tmp == '=') {
5928 if (!PL_lex_allbrackets &&
5929 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5930 s -= 2;
5931 TOKEN(0);
5932 }
90771dc0 5933 Rop(OP_GE);
78cdf107 5934 }
90771dc0 5935 }
378cc40b 5936 s--;
78cdf107
Z
5937 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5938 s--;
5939 TOKEN(0);
5940 }
79072805 5941 Rop(OP_GT);
378cc40b
LW
5942
5943 case '$':
bbce6d69 5944 CLINE;
5945
3280af22
NIS
5946 if (PL_expect == XOPERATOR) {
5947 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5948 return deprecate_commaless_var_list();
a0d0e21e 5949 }
8990e307 5950 }
a0d0e21e 5951
c0b977fd 5952 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5953 PL_tokenbuf[0] = '@';
376b8730
SM
5954 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5955 sizeof PL_tokenbuf - 1, FALSE);
5956 if (PL_expect == XOPERATOR)
5957 no_op("Array length", s);
3280af22 5958 if (!PL_tokenbuf[1])
a0d0e21e 5959 PREREF(DOLSHARP);
3280af22
NIS
5960 PL_expect = XOPERATOR;
5961 PL_pending_ident = '#';
463ee0b2 5962 TOKEN(DOLSHARP);
79072805 5963 }
bbce6d69 5964
3280af22 5965 PL_tokenbuf[0] = '$';
376b8730
SM
5966 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5967 sizeof PL_tokenbuf - 1, FALSE);
5968 if (PL_expect == XOPERATOR)
5969 no_op("Scalar", s);
3280af22
NIS
5970 if (!PL_tokenbuf[1]) {
5971 if (s == PL_bufend)
bbce6d69 5972 yyerror("Final $ should be \\$ or $name");
5973 PREREF('$');
8990e307 5974 }
a0d0e21e 5975
bbce6d69 5976 /* This kludge not intended to be bulletproof. */
3280af22 5977 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5978 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5979 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5980 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5981 TERM(THING);
5982 }
5983
ff68c719 5984 d = s;
90771dc0
NC
5985 {
5986 const char tmp = *s;
ae28bb2a 5987 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5988 s = SKIPSPACE1(s);
ff68c719 5989
90771dc0
NC
5990 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5991 && intuit_more(s)) {
5992 if (*s == '[') {
5993 PL_tokenbuf[0] = '@';
5994 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5995 char *t = s+1;
5996
5997 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5998 t++;
90771dc0 5999 if (*t++ == ',') {
29595ff2 6000 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6001 while (t < PL_bufend && *t != ']')
6002 t++;
9014280d 6003 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6004 "Multidimensional syntax %.*s not supported",
36c7798d 6005 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6006 }
748a9306 6007 }
93a17b20 6008 }
90771dc0
NC
6009 else if (*s == '{') {
6010 char *t;
6011 PL_tokenbuf[0] = '%';
6012 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6013 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6014 {
6015 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6016 do {
6017 t++;
6018 } while (isSPACE(*t));
90771dc0 6019 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6020 STRLEN len;
90771dc0 6021 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6022 &len);
c35e046a
AL
6023 while (isSPACE(*t))
6024 t++;
780a5241 6025 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6026 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6027 "You need to quote \"%s\"",
6028 tmpbuf);
6029 }
6030 }
6031 }
93a17b20 6032 }
bbce6d69 6033
90771dc0
NC
6034 PL_expect = XOPERATOR;
6035 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6036 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6037 if (!islop || PL_last_lop_op == OP_GREPSTART)
6038 PL_expect = XOPERATOR;
6039 else if (strchr("$@\"'`q", *s))
6040 PL_expect = XTERM; /* e.g. print $fh "foo" */
6041 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6042 PL_expect = XTERM; /* e.g. print $fh &sub */
6043 else if (isIDFIRST_lazy_if(s,UTF)) {
6044 char tmpbuf[sizeof PL_tokenbuf];
6045 int t2;
6046 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6047 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6048 /* binary operators exclude handle interpretations */
6049 switch (t2) {
6050 case -KEY_x:
6051 case -KEY_eq:
6052 case -KEY_ne:
6053 case -KEY_gt:
6054 case -KEY_lt:
6055 case -KEY_ge:
6056 case -KEY_le:
6057 case -KEY_cmp:
6058 break;
6059 default:
6060 PL_expect = XTERM; /* e.g. print $fh length() */
6061 break;
6062 }
6063 }
6064 else {
6065 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6066 }
6067 }
90771dc0
NC
6068 else if (isDIGIT(*s))
6069 PL_expect = XTERM; /* e.g. print $fh 3 */
6070 else if (*s == '.' && isDIGIT(s[1]))
6071 PL_expect = XTERM; /* e.g. print $fh .3 */
6072 else if ((*s == '?' || *s == '-' || *s == '+')
6073 && !isSPACE(s[1]) && s[1] != '=')
6074 PL_expect = XTERM; /* e.g. print $fh -1 */
6075 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6076 && s[1] != '/')
6077 PL_expect = XTERM; /* e.g. print $fh /.../
6078 XXX except DORDOR operator
6079 */
6080 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6081 && s[2] != '=')
6082 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6083 }
bbce6d69 6084 }
3280af22 6085 PL_pending_ident = '$';
79072805 6086 TOKEN('$');
378cc40b
LW
6087
6088 case '@':
3280af22 6089 if (PL_expect == XOPERATOR)
bbce6d69 6090 no_op("Array", s);
3280af22
NIS
6091 PL_tokenbuf[0] = '@';
6092 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6093 if (!PL_tokenbuf[1]) {
bbce6d69 6094 PREREF('@');
6095 }
3280af22 6096 if (PL_lex_state == LEX_NORMAL)
29595ff2 6097 s = SKIPSPACE1(s);
3280af22 6098 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6099 if (*s == '{')
3280af22 6100 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6101
6102 /* Warn about @ where they meant $. */
041457d9
DM
6103 if (*s == '[' || *s == '{') {
6104 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6105 const char *t = s + 1;
7e2040f0 6106 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6107 t++;
6108 if (*t == '}' || *t == ']') {
6109 t++;
29595ff2 6110 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 6111 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6112 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6113 (int)(t-PL_bufptr), PL_bufptr,
6114 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6115 }
93a17b20
LW
6116 }
6117 }
463ee0b2 6118 }
3280af22 6119 PL_pending_ident = '@';
79072805 6120 TERM('@');
378cc40b 6121
c963b151 6122 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6123 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6124 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6125 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6126 TOKEN(0);
6f33ba73
RGS
6127 s += 2;
6128 AOPERATOR(DORDOR);
6129 }
c963b151 6130 case '?': /* may either be conditional or pattern */
be25f609 6131 if (PL_expect == XOPERATOR) {
90771dc0 6132 char tmp = *s++;
c963b151 6133 if(tmp == '?') {
78cdf107
Z
6134 if (!PL_lex_allbrackets &&
6135 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6136 s--;
6137 TOKEN(0);
6138 }
6139 PL_lex_allbrackets++;
be25f609 6140 OPERATOR('?');
c963b151
BD
6141 }
6142 else {
6143 tmp = *s++;
6144 if(tmp == '/') {
6145 /* A // operator. */
78cdf107
Z
6146 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6147 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6148 LEX_FAKEEOF_LOGIC)) {
6149 s -= 2;
6150 TOKEN(0);
6151 }
c963b151
BD
6152 AOPERATOR(DORDOR);
6153 }
6154 else {
6155 s--;
78cdf107
Z
6156 if (*s == '=' && !PL_lex_allbrackets &&
6157 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6158 s--;
6159 TOKEN(0);
6160 }
c963b151
BD
6161 Mop(OP_DIVIDE);
6162 }
6163 }
6164 }
6165 else {
6166 /* Disable warning on "study /blah/" */
6167 if (PL_oldoldbufptr == PL_last_uni
6168 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6169 || memNE(PL_last_uni, "study", 5)
6170 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6171 ))
6172 check_uni();
725a61d7
Z
6173 if (*s == '?')
6174 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6175 s = scan_pat(s,OP_MATCH);
6176 TERM(sublex_start());
6177 }
378cc40b
LW
6178
6179 case '.':
51882d45
GS
6180 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6181#ifdef PERL_STRICT_CR
6182 && s[1] == '\n'
6183#else
6184 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6185#endif
6186 && (s == PL_linestart || s[-1] == '\n') )
6187 {
3280af22
NIS
6188 PL_lex_formbrack = 0;
6189 PL_expect = XSTATE;
79072805
LW
6190 goto rightbracket;
6191 }
be25f609 6192 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6193 s += 3;
6194 OPERATOR(YADAYADA);
6195 }
3280af22 6196 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6197 char tmp = *s++;
a687059c 6198 if (*s == tmp) {
78cdf107
Z
6199 if (!PL_lex_allbrackets &&
6200 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6201 s--;
6202 TOKEN(0);
6203 }
a687059c 6204 s++;
2f3197b3
LW
6205 if (*s == tmp) {
6206 s++;
6154021b 6207 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6208 }
6209 else
6154021b 6210 pl_yylval.ival = 0;
378cc40b 6211 OPERATOR(DOTDOT);
a687059c 6212 }
78cdf107
Z
6213 if (*s == '=' && !PL_lex_allbrackets &&
6214 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6215 s--;
6216 TOKEN(0);
6217 }
79072805 6218 Aop(OP_CONCAT);
378cc40b
LW
6219 }
6220 /* FALL THROUGH */
6221 case '0': case '1': case '2': case '3': case '4':
6222 case '5': case '6': case '7': case '8': case '9':
6154021b 6223 s = scan_num(s, &pl_yylval);
931e0695 6224 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6225 if (PL_expect == XOPERATOR)
8990e307 6226 no_op("Number",s);
79072805
LW
6227 TERM(THING);
6228
6229 case '\'':
5db06880 6230 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6231 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6232 if (PL_expect == XOPERATOR) {
6233 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6234 return deprecate_commaless_var_list();
a0d0e21e 6235 }
463ee0b2 6236 else
8990e307 6237 no_op("String",s);
463ee0b2 6238 }
79072805 6239 if (!s)
d4c19fe8 6240 missingterm(NULL);
6154021b 6241 pl_yylval.ival = OP_CONST;
79072805
LW
6242 TERM(sublex_start());
6243
6244 case '"':
5db06880 6245 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6246 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6247 if (PL_expect == XOPERATOR) {
6248 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6249 return deprecate_commaless_var_list();
a0d0e21e 6250 }
463ee0b2 6251 else
8990e307 6252 no_op("String",s);
463ee0b2 6253 }
79072805 6254 if (!s)
d4c19fe8 6255 missingterm(NULL);
6154021b 6256 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6257 /* FIXME. I think that this can be const if char *d is replaced by
6258 more localised variables. */
3280af22 6259 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6260 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6261 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6262 break;
6263 }
6264 }
79072805
LW
6265 TERM(sublex_start());
6266
6267 case '`':
5db06880 6268 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6269 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6270 if (PL_expect == XOPERATOR)
8990e307 6271 no_op("Backticks",s);
79072805 6272 if (!s)
d4c19fe8 6273 missingterm(NULL);
9b201d7d 6274 readpipe_override();
79072805
LW
6275 TERM(sublex_start());
6276
6277 case '\\':
6278 s++;
a2a5de95
NC
6279 if (PL_lex_inwhat && isDIGIT(*s))
6280 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6281 *s, *s);
3280af22 6282 if (PL_expect == XOPERATOR)
8990e307 6283 no_op("Backslash",s);
79072805
LW
6284 OPERATOR(REFGEN);
6285
a7cb1f99 6286 case 'v':
e526c9e6 6287 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6288 char *start = s + 2;
dd629d5b 6289 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6290 start++;
6291 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6292 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6293 TERM(THING);
6294 }
e526c9e6 6295 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6296 else if (!isALPHA(*start) && (PL_expect == XTERM
6297 || PL_expect == XREF || PL_expect == XSTATE
6298 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6299 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6300 if (!gv) {
6154021b 6301 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6302 TERM(THING);
6303 }
6304 }
a7cb1f99
GS
6305 }
6306 goto keylookup;
79072805 6307 case 'x':
3280af22 6308 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6309 s++;
6310 Mop(OP_REPEAT);
2f3197b3 6311 }
79072805
LW
6312 goto keylookup;
6313
378cc40b 6314 case '_':
79072805
LW
6315 case 'a': case 'A':
6316 case 'b': case 'B':
6317 case 'c': case 'C':
6318 case 'd': case 'D':
6319 case 'e': case 'E':
6320 case 'f': case 'F':
6321 case 'g': case 'G':
6322 case 'h': case 'H':
6323 case 'i': case 'I':
6324 case 'j': case 'J':
6325 case 'k': case 'K':
6326 case 'l': case 'L':
6327 case 'm': case 'M':
6328 case 'n': case 'N':
6329 case 'o': case 'O':
6330 case 'p': case 'P':
6331 case 'q': case 'Q':
6332 case 'r': case 'R':
6333 case 's': case 'S':
6334 case 't': case 'T':
6335 case 'u': case 'U':
a7cb1f99 6336 case 'V':
79072805
LW
6337 case 'w': case 'W':
6338 case 'X':
6339 case 'y': case 'Y':
6340 case 'z': case 'Z':
6341
49dc05e3 6342 keylookup: {
88e1f1a2 6343 bool anydelim;
90771dc0 6344 I32 tmp;
10edeb5d
JH
6345
6346 orig_keyword = 0;
6347 gv = NULL;
6348 gvp = NULL;
49dc05e3 6349
3280af22
NIS
6350 PL_bufptr = s;
6351 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6352
6353 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6354 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6355
6356 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6357 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6358 goto just_a_word;
6359
3643fb5f 6360 d = s;
3280af22 6361 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6362 d++; /* no comments skipped here, or s### is misparsed */
6363
748a9306 6364 /* Is this a word before a => operator? */
1c3923b3 6365 if (*d == '=' && d[1] == '>') {
748a9306 6366 CLINE;
6154021b 6367 pl_yylval.opval
d0a148a6
NC
6368 = (OP*)newSVOP(OP_CONST, 0,
6369 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6370 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6371 TERM(WORD);
6372 }
6373
88e1f1a2
JV
6374 /* Check for plugged-in keyword */
6375 {
6376 OP *o;
6377 int result;
6378 char *saved_bufptr = PL_bufptr;
6379 PL_bufptr = s;
16c91539 6380 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6381 s = PL_bufptr;
6382 if (result == KEYWORD_PLUGIN_DECLINE) {
6383 /* not a plugged-in keyword */
6384 PL_bufptr = saved_bufptr;
6385 } else if (result == KEYWORD_PLUGIN_STMT) {
6386 pl_yylval.opval = o;
6387 CLINE;
6388 PL_expect = XSTATE;
6389 return REPORT(PLUGSTMT);
6390 } else if (result == KEYWORD_PLUGIN_EXPR) {
6391 pl_yylval.opval = o;
6392 CLINE;
6393 PL_expect = XOPERATOR;
6394 return REPORT(PLUGEXPR);
6395 } else {
6396 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6397 PL_tokenbuf);
6398 }
6399 }
6400
6401 /* Check for built-in keyword */
6402 tmp = keyword(PL_tokenbuf, len, 0);
6403
6404 /* Is this a label? */
6405 if (!anydelim && PL_expect == XSTATE
6406 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6407 s = d + 1;
6408 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6409 CLINE;
6410 TOKEN(LABEL);
6411 }
6412
a0d0e21e 6413 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6414 GV *ogv = NULL; /* override (winner) */
6415 GV *hgv = NULL; /* hidden (loser) */
3280af22 6416 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6417 CV *cv;
90e5519e 6418 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6419 (cv = GvCVu(gv)))
6420 {
6421 if (GvIMPORTED_CV(gv))
6422 ogv = gv;
6423 else if (! CvMETHOD(cv))
6424 hgv = gv;
6425 }
6426 if (!ogv &&
3280af22 6427 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6428 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6429 GvCVu(gv) && GvIMPORTED_CV(gv))
6430 {
6431 ogv = gv;
6432 }
6433 }
6434 if (ogv) {
30fe34ed 6435 orig_keyword = tmp;
56f7f34b 6436 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6437 }
6438 else if (gv && !gvp
6439 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6440 && GvCVu(gv))
6e7b2336
GS
6441 {
6442 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6443 }
56f7f34b
CS
6444 else { /* no override */
6445 tmp = -tmp;
a2a5de95
NC
6446 if (tmp == KEY_dump) {
6447 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6448 "dump() better written as CORE::dump()");
ac206dc8 6449 }
a0714e2c 6450 gv = NULL;
56f7f34b 6451 gvp = 0;
a2a5de95
NC
6452 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6453 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6454 "Ambiguous call resolved as CORE::%s(), "
6455 "qualify as such or use &",
6456 GvENAME(hgv));
49dc05e3 6457 }
a0d0e21e
LW
6458 }
6459
6460 reserved_word:
6461 switch (tmp) {
79072805
LW
6462
6463 default: /* not a keyword */
0bfa2a8a
NC
6464 /* Trade off - by using this evil construction we can pull the
6465 variable gv into the block labelled keylookup. If not, then
6466 we have to give it function scope so that the goto from the
6467 earlier ':' case doesn't bypass the initialisation. */
6468 if (0) {
6469 just_a_word_zero_gv:
6470 gv = NULL;
6471 gvp = NULL;
8bee0991 6472 orig_keyword = 0;
0bfa2a8a 6473 }
93a17b20 6474 just_a_word: {
96e4d5b1 6475 SV *sv;
ce29ac45 6476 int pkgname = 0;
f54cb97a 6477 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6478 OP *rv2cv_op;
5069cc75 6479 CV *cv;
5db06880 6480#ifdef PERL_MAD
cd81e915 6481 SV *nextPL_nextwhite = 0;
5db06880
NC
6482#endif
6483
8990e307
LW
6484
6485 /* Get the rest if it looks like a package qualifier */
6486
155aba94 6487 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6488 STRLEN morelen;
3280af22 6489 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6490 TRUE, &morelen);
6491 if (!morelen)
cea2e8a9 6492 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6493 *s == '\'' ? "'" : "::");
c3e0f903 6494 len += morelen;
ce29ac45 6495 pkgname = 1;
a0d0e21e 6496 }
8990e307 6497
3280af22
NIS
6498 if (PL_expect == XOPERATOR) {
6499 if (PL_bufptr == PL_linestart) {
57843af0 6500 CopLINE_dec(PL_curcop);
f1f66076 6501 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6502 CopLINE_inc(PL_curcop);
463ee0b2
LW
6503 }
6504 else
54310121 6505 no_op("Bareword",s);
463ee0b2 6506 }
8990e307 6507
c3e0f903 6508 /* Look for a subroutine with this name in current package,
486ec47a 6509 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6510 (and a package name). */
6511
5db06880 6512 if (len > 2 && !PL_madskills &&
3280af22 6513 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6514 {
f776e3cd 6515 if (ckWARN(WARN_BAREWORD)
90e5519e 6516 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6517 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6518 "Bareword \"%s\" refers to nonexistent package",
3280af22 6519 PL_tokenbuf);
c3e0f903 6520 len -= 2;
3280af22 6521 PL_tokenbuf[len] = '\0';
a0714e2c 6522 gv = NULL;
c3e0f903
GS
6523 gvp = 0;
6524 }
6525 else {
62d55b22
NC
6526 if (!gv) {
6527 /* Mustn't actually add anything to a symbol table.
6528 But also don't want to "initialise" any placeholder
6529 constants that might already be there into full
6530 blown PVGVs with attached PVCV. */
90e5519e
NC
6531 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6532 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6533 }
b3d904f3 6534 len = 0;
c3e0f903
GS
6535 }
6536
6537 /* if we saw a global override before, get the right name */
8990e307 6538
37bb7629
EB
6539 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6540 len ? len : strlen(PL_tokenbuf));
49dc05e3 6541 if (gvp) {
37bb7629 6542 SV * const tmp_sv = sv;
396482e1 6543 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6544 sv_catsv(sv, tmp_sv);
6545 SvREFCNT_dec(tmp_sv);
8a7a129d 6546 }
37bb7629 6547
5db06880 6548#ifdef PERL_MAD
cd81e915
NC
6549 if (PL_madskills && !PL_thistoken) {
6550 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6551 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6552 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6553 }
6554#endif
8990e307 6555
a0d0e21e 6556 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6557 CLINE;
6154021b
RGS
6558 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6559 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6560
c3e0f903 6561 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6562 if (len)
6563 goto safe_bareword;
6564
f7461760
Z
6565 {
6566 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6567 const_op->op_private = OPpCONST_BARE;
6568 rv2cv_op = newCVREF(0, const_op);
6569 }
d9088386 6570 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6571
8990e307
LW
6572 /* See if it's the indirect object for a list operator. */
6573
3280af22
NIS
6574 if (PL_oldoldbufptr &&
6575 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6576 (PL_oldoldbufptr == PL_last_lop
6577 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6578 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6579 (PL_expect == XREF ||
6580 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6581 {
748a9306
LW
6582 bool immediate_paren = *s == '(';
6583
a0d0e21e 6584 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6585 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6586#ifdef PERL_MAD
cd81e915 6587 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6588#endif
a0d0e21e
LW
6589
6590 /* Two barewords in a row may indicate method call. */
6591
62d55b22 6592 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6593 (tmp = intuit_method(s, gv, cv))) {
6594 op_free(rv2cv_op);
78cdf107
Z
6595 if (tmp == METHOD && !PL_lex_allbrackets &&
6596 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6597 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6598 return REPORT(tmp);
f7461760 6599 }
a0d0e21e
LW
6600
6601 /* If not a declared subroutine, it's an indirect object. */
6602 /* (But it's an indir obj regardless for sort.) */
7294df96 6603 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6604
7294df96
RGS
6605 if (
6606 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6607 (!cv &&
a9ef352a 6608 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6609 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6610 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6611 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6612 )
a9ef352a 6613 {
3280af22 6614 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6615 goto bareword;
93a17b20
LW
6616 }
6617 }
8990e307 6618
3280af22 6619 PL_expect = XOPERATOR;
5db06880
NC
6620#ifdef PERL_MAD
6621 if (isSPACE(*s))
cd81e915
NC
6622 s = SKIPSPACE2(s,nextPL_nextwhite);
6623 PL_nextwhite = nextPL_nextwhite;
5db06880 6624#else
8990e307 6625 s = skipspace(s);
5db06880 6626#endif
1c3923b3
GS
6627
6628 /* Is this a word before a => operator? */
ce29ac45 6629 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6630 op_free(rv2cv_op);
1c3923b3 6631 CLINE;
6154021b 6632 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6633 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6634 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6635 TERM(WORD);
6636 }
6637
6638 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6639 if (*s == '(') {
79072805 6640 CLINE;
5069cc75 6641 if (cv) {
c35e046a
AL
6642 d = s + 1;
6643 while (SPACE_OR_TAB(*d))
6644 d++;
f7461760 6645 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6646 s = d + 1;
c631f32b 6647 goto its_constant;
96e4d5b1 6648 }
6649 }
5db06880
NC
6650#ifdef PERL_MAD
6651 if (PL_madskills) {
cd81e915
NC
6652 PL_nextwhite = PL_thiswhite;
6653 PL_thiswhite = 0;
5db06880 6654 }
cd81e915 6655 start_force(PL_curforce);
5db06880 6656#endif
6154021b 6657 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6658 PL_expect = XOPERATOR;
5db06880
NC
6659#ifdef PERL_MAD
6660 if (PL_madskills) {
cd81e915
NC
6661 PL_nextwhite = nextPL_nextwhite;
6662 curmad('X', PL_thistoken);
6b29d1f5 6663 PL_thistoken = newSVpvs("");
5db06880
NC
6664 }
6665#endif
f7461760 6666 op_free(rv2cv_op);
93a17b20 6667 force_next(WORD);
6154021b 6668 pl_yylval.ival = 0;
463ee0b2 6669 TOKEN('&');
79072805 6670 }
93a17b20 6671
a0d0e21e 6672 /* If followed by var or block, call it a method (unless sub) */
8990e307 6673
f7461760
Z
6674 if ((*s == '$' || *s == '{') && !cv) {
6675 op_free(rv2cv_op);
3280af22
NIS
6676 PL_last_lop = PL_oldbufptr;
6677 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6678 if (!PL_lex_allbrackets &&
6679 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6680 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6681 PREBLOCK(METHOD);
463ee0b2
LW
6682 }
6683
8990e307
LW
6684 /* If followed by a bareword, see if it looks like indir obj. */
6685
30fe34ed
RGS
6686 if (!orig_keyword
6687 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6688 && (tmp = intuit_method(s, gv, cv))) {
6689 op_free(rv2cv_op);
78cdf107
Z
6690 if (tmp == METHOD && !PL_lex_allbrackets &&
6691 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6692 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6693 return REPORT(tmp);
f7461760 6694 }
93a17b20 6695
8990e307
LW
6696 /* Not a method, so call it a subroutine (if defined) */
6697
5069cc75 6698 if (cv) {
9b387841
NC
6699 if (lastchar == '-')
6700 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6701 "Ambiguous use of -%s resolved as -&%s()",
6702 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6703 /* Check for a constant sub */
f7461760 6704 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6705 its_constant:
f7461760 6706 op_free(rv2cv_op);
6154021b
RGS
6707 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6708 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6709 pl_yylval.opval->op_private = 0;
6b7c6d95 6710 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6711 TOKEN(WORD);
89bfa8cd 6712 }
6713
6154021b 6714 op_free(pl_yylval.opval);
f7461760 6715 pl_yylval.opval = rv2cv_op;
6154021b 6716 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6717 PL_last_lop = PL_oldbufptr;
bf848113 6718 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6719 /* Is there a prototype? */
5db06880
NC
6720 if (
6721#ifdef PERL_MAD
6722 cv &&
6723#endif
d9f2850e
RGS
6724 SvPOK(cv))
6725 {
5f66b61c 6726 STRLEN protolen;
daba3364 6727 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6728 if (!protolen)
4633a7c4 6729 TERM(FUNC0SUB);
0f5d0394
AE
6730 while (*proto == ';')
6731 proto++;
649d02de
FC
6732 if (
6733 (
6734 (
6735 *proto == '$' || *proto == '_'
c035a075 6736 || *proto == '*' || *proto == '+'
649d02de
FC
6737 )
6738 && proto[1] == '\0'
6739 )
6740 || (
6741 *proto == '\\' && proto[1] && proto[2] == '\0'
6742 )
6743 )
6744 OPERATOR(UNIOPSUB);
6745 if (*proto == '\\' && proto[1] == '[') {
6746 const char *p = proto + 2;
6747 while(*p && *p != ']')
6748 ++p;
6749 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6750 }
7a52d87a 6751 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6752 if (PL_curstash)
6753 sv_setpvs(PL_subname, "__ANON__");
6754 else
6755 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6756 if (!PL_lex_allbrackets &&
6757 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6758 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6759 PREBLOCK(LSTOPSUB);
6760 }
a9ef352a 6761 }
5db06880
NC
6762#ifdef PERL_MAD
6763 {
6764 if (PL_madskills) {
cd81e915
NC
6765 PL_nextwhite = PL_thiswhite;
6766 PL_thiswhite = 0;
5db06880 6767 }
cd81e915 6768 start_force(PL_curforce);
6154021b 6769 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6770 PL_expect = XTERM;
6771 if (PL_madskills) {
cd81e915
NC
6772 PL_nextwhite = nextPL_nextwhite;
6773 curmad('X', PL_thistoken);
6b29d1f5 6774 PL_thistoken = newSVpvs("");
5db06880
NC
6775 }
6776 force_next(WORD);
78cdf107
Z
6777 if (!PL_lex_allbrackets &&
6778 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6779 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6780 TOKEN(NOAMP);
6781 }
6782 }
6783
6784 /* Guess harder when madskills require "best effort". */
6785 if (PL_madskills && (!gv || !GvCVu(gv))) {
6786 int probable_sub = 0;
6787 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6788 probable_sub = 1;
6789 else if (isALPHA(*s)) {
6790 char tmpbuf[1024];
6791 STRLEN tmplen;
6792 d = s;
6793 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6794 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6795 probable_sub = 1;
6796 else {
6797 while (d < PL_bufend && isSPACE(*d))
6798 d++;
6799 if (*d == '=' && d[1] == '>')
6800 probable_sub = 1;
6801 }
6802 }
6803 if (probable_sub) {
7a6d04f4 6804 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6805 op_free(pl_yylval.opval);
f7461760 6806 pl_yylval.opval = rv2cv_op;
6154021b 6807 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6808 PL_last_lop = PL_oldbufptr;
6809 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6810 PL_nextwhite = PL_thiswhite;
6811 PL_thiswhite = 0;
6812 start_force(PL_curforce);
6154021b 6813 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6814 PL_expect = XTERM;
cd81e915
NC
6815 PL_nextwhite = nextPL_nextwhite;
6816 curmad('X', PL_thistoken);
6b29d1f5 6817 PL_thistoken = newSVpvs("");
5db06880 6818 force_next(WORD);
78cdf107
Z
6819 if (!PL_lex_allbrackets &&
6820 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6821 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6822 TOKEN(NOAMP);
6823 }
6824#else
6154021b 6825 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6826 PL_expect = XTERM;
8990e307 6827 force_next(WORD);
78cdf107
Z
6828 if (!PL_lex_allbrackets &&
6829 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6830 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6831 TOKEN(NOAMP);
5db06880 6832#endif
8990e307 6833 }
748a9306 6834
8990e307
LW
6835 /* Call it a bare word */
6836
5603f27d 6837 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6838 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6839 else {
9a073a1d
RGS
6840 bareword:
6841 /* after "print" and similar functions (corresponding to
6842 * "F? L" in opcode.pl), whatever wasn't already parsed as
6843 * a filehandle should be subject to "strict subs".
6844 * Likewise for the optional indirect-object argument to system
6845 * or exec, which can't be a bareword */
6846 if ((PL_last_lop_op == OP_PRINT
6847 || PL_last_lop_op == OP_PRTF
6848 || PL_last_lop_op == OP_SAY
6849 || PL_last_lop_op == OP_SYSTEM
6850 || PL_last_lop_op == OP_EXEC)
6851 && (PL_hints & HINT_STRICT_SUBS))
6852 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6853 if (lastchar != '-') {
6854 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6855 d = PL_tokenbuf;
6856 while (isLOWER(*d))
6857 d++;
da51bb9b 6858 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6859 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6860 PL_tokenbuf);
6861 }
748a9306
LW
6862 }
6863 }
f7461760 6864 op_free(rv2cv_op);
c3e0f903
GS
6865
6866 safe_bareword:
9b387841
NC
6867 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6868 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6869 "Operator or semicolon missing before %c%s",
6870 lastchar, PL_tokenbuf);
6871 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6872 "Ambiguous use of %c resolved as operator %c",
6873 lastchar, lastchar);
748a9306 6874 }
93a17b20 6875 TOKEN(WORD);
79072805 6876 }
79072805 6877
68dc0745 6878 case KEY___FILE__:
6154021b 6879 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6880 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6881 TERM(THING);
6882
79072805 6883 case KEY___LINE__:
6154021b 6884 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6885 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6886 TERM(THING);
68dc0745 6887
6888 case KEY___PACKAGE__:
6154021b 6889 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6890 (PL_curstash
5aaec2b4 6891 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6892 : &PL_sv_undef));
79072805 6893 TERM(THING);
79072805 6894
e50aee73 6895 case KEY___DATA__:
79072805
LW
6896 case KEY___END__: {
6897 GV *gv;
3280af22 6898 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6899 const char *pname = "main";
3280af22 6900 if (PL_tokenbuf[2] == 'D')
bfcb3514 6901 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6902 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6903 SVt_PVIO);
a5f75d66 6904 GvMULTI_on(gv);
79072805 6905 if (!GvIO(gv))
a0d0e21e 6906 GvIOp(gv) = newIO();
3280af22 6907 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6908#if defined(HAS_FCNTL) && defined(F_SETFD)
6909 {
f54cb97a 6910 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6911 fcntl(fd,F_SETFD,fd >= 3);
6912 }
79072805 6913#endif
fd049845 6914 /* Mark this internal pseudo-handle as clean */
6915 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6916 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6917 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6918 else
50952442 6919 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6920#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6921 /* if the script was opened in binmode, we need to revert
53129d29 6922 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6923 * XXX this is a questionable hack at best. */
53129d29
GS
6924 if (PL_bufend-PL_bufptr > 2
6925 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6926 {
6927 Off_t loc = 0;
50952442 6928 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6929 loc = PerlIO_tell(PL_rsfp);
6930 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6931 }
2986a63f
JH
6932#ifdef NETWARE
6933 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6934#else
c39cd008 6935 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6936#endif /* NETWARE */
1143fce0
JH
6937#ifdef PERLIO_IS_STDIO /* really? */
6938# if defined(__BORLANDC__)
cb359b41
JH
6939 /* XXX see note in do_binmode() */
6940 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6941# endif
6942#endif
c39cd008
GS
6943 if (loc > 0)
6944 PerlIO_seek(PL_rsfp, loc, 0);
6945 }
6946 }
6947#endif
7948272d 6948#ifdef PERLIO_LAYERS
52d2e0f4
JH
6949 if (!IN_BYTES) {
6950 if (UTF)
6951 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6952 else if (PL_encoding) {
6953 SV *name;
6954 dSP;
6955 ENTER;
6956 SAVETMPS;
6957 PUSHMARK(sp);
6958 EXTEND(SP, 1);
6959 XPUSHs(PL_encoding);
6960 PUTBACK;
6961 call_method("name", G_SCALAR);
6962 SPAGAIN;
6963 name = POPs;
6964 PUTBACK;
bfed75c6 6965 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6966 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6967 SVfARG(name)));
52d2e0f4
JH
6968 FREETMPS;
6969 LEAVE;
6970 }
6971 }
7948272d 6972#endif
5db06880
NC
6973#ifdef PERL_MAD
6974 if (PL_madskills) {
cd81e915
NC
6975 if (PL_realtokenstart >= 0) {
6976 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6977 if (!PL_endwhite)
6b29d1f5 6978 PL_endwhite = newSVpvs("");
cd81e915
NC
6979 sv_catsv(PL_endwhite, PL_thiswhite);
6980 PL_thiswhite = 0;
6981 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6982 PL_realtokenstart = -1;
5db06880 6983 }
5cc814fd
NC
6984 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6985 != NULL) ;
5db06880
NC
6986 }
6987#endif
4608196e 6988 PL_rsfp = NULL;
79072805
LW
6989 }
6990 goto fake_eof;
e929a76b 6991 }
de3bb511 6992
8990e307 6993 case KEY_AUTOLOAD:
ed6116ce 6994 case KEY_DESTROY:
79072805 6995 case KEY_BEGIN:
3c10abe3 6996 case KEY_UNITCHECK:
7d30b5c4 6997 case KEY_CHECK:
7d07dbc2 6998 case KEY_INIT:
7d30b5c4 6999 case KEY_END:
3280af22
NIS
7000 if (PL_expect == XSTATE) {
7001 s = PL_bufptr;
93a17b20 7002 goto really_sub;
79072805
LW
7003 }
7004 goto just_a_word;
7005
a0d0e21e
LW
7006 case KEY_CORE:
7007 if (*s == ':' && s[1] == ':') {
7008 s += 2;
748a9306 7009 d = s;
3280af22 7010 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 7011 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 7012 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7013 if (tmp < 0)
7014 tmp = -tmp;
850e8516 7015 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 7016 /* that's a way to remember we saw "CORE::" */
850e8516 7017 orig_keyword = tmp;
a0d0e21e
LW
7018 goto reserved_word;
7019 }
7020 goto just_a_word;
7021
463ee0b2
LW
7022 case KEY_abs:
7023 UNI(OP_ABS);
7024
79072805
LW
7025 case KEY_alarm:
7026 UNI(OP_ALARM);
7027
7028 case KEY_accept:
a0d0e21e 7029 LOP(OP_ACCEPT,XTERM);
79072805 7030
463ee0b2 7031 case KEY_and:
78cdf107
Z
7032 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7033 return REPORT(0);
463ee0b2
LW
7034 OPERATOR(ANDOP);
7035
79072805 7036 case KEY_atan2:
a0d0e21e 7037 LOP(OP_ATAN2,XTERM);
85e6fe83 7038
79072805 7039 case KEY_bind:
a0d0e21e 7040 LOP(OP_BIND,XTERM);
79072805
LW
7041
7042 case KEY_binmode:
1c1fc3ea 7043 LOP(OP_BINMODE,XTERM);
79072805
LW
7044
7045 case KEY_bless:
a0d0e21e 7046 LOP(OP_BLESS,XTERM);
79072805 7047
0d863452
RH
7048 case KEY_break:
7049 FUN0(OP_BREAK);
7050
79072805
LW
7051 case KEY_chop:
7052 UNI(OP_CHOP);
7053
7054 case KEY_continue:
0d863452
RH
7055 /* When 'use switch' is in effect, continue has a dual
7056 life as a control operator. */
7057 {
ef89dcc3 7058 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
7059 PREBLOCK(CONTINUE);
7060 else {
7061 /* We have to disambiguate the two senses of
7062 "continue". If the next token is a '{' then
7063 treat it as the start of a continue block;
7064 otherwise treat it as a control operator.
7065 */
7066 s = skipspace(s);
7067 if (*s == '{')
79072805 7068 PREBLOCK(CONTINUE);
0d863452
RH
7069 else
7070 FUN0(OP_CONTINUE);
7071 }
7072 }
79072805
LW
7073
7074 case KEY_chdir:
fafc274c
NC
7075 /* may use HOME */
7076 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7077 UNI(OP_CHDIR);
7078
7079 case KEY_close:
7080 UNI(OP_CLOSE);
7081
7082 case KEY_closedir:
7083 UNI(OP_CLOSEDIR);
7084
7085 case KEY_cmp:
78cdf107
Z
7086 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7087 return REPORT(0);
79072805
LW
7088 Eop(OP_SCMP);
7089
7090 case KEY_caller:
7091 UNI(OP_CALLER);
7092
7093 case KEY_crypt:
7094#ifdef FCRYPT
f4c556ac
GS
7095 if (!PL_cryptseen) {
7096 PL_cryptseen = TRUE;
de3bb511 7097 init_des();
f4c556ac 7098 }
a687059c 7099#endif
a0d0e21e 7100 LOP(OP_CRYPT,XTERM);
79072805
LW
7101
7102 case KEY_chmod:
a0d0e21e 7103 LOP(OP_CHMOD,XTERM);
79072805
LW
7104
7105 case KEY_chown:
a0d0e21e 7106 LOP(OP_CHOWN,XTERM);
79072805
LW
7107
7108 case KEY_connect:
a0d0e21e 7109 LOP(OP_CONNECT,XTERM);
79072805 7110
463ee0b2
LW
7111 case KEY_chr:
7112 UNI(OP_CHR);
7113
79072805
LW
7114 case KEY_cos:
7115 UNI(OP_COS);
7116
7117 case KEY_chroot:
7118 UNI(OP_CHROOT);
7119
0d863452
RH
7120 case KEY_default:
7121 PREBLOCK(DEFAULT);
7122
79072805 7123 case KEY_do:
29595ff2 7124 s = SKIPSPACE1(s);
79072805 7125 if (*s == '{')
a0d0e21e 7126 PRETERMBLOCK(DO);
79072805 7127 if (*s != '\'')
89c5585f 7128 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7129 if (orig_keyword == KEY_do) {
7130 orig_keyword = 0;
6154021b 7131 pl_yylval.ival = 1;
850e8516
RGS
7132 }
7133 else
6154021b 7134 pl_yylval.ival = 0;
378cc40b 7135 OPERATOR(DO);
79072805
LW
7136
7137 case KEY_die:
3280af22 7138 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7139 LOP(OP_DIE,XTERM);
79072805
LW
7140
7141 case KEY_defined:
7142 UNI(OP_DEFINED);
7143
7144 case KEY_delete:
a0d0e21e 7145 UNI(OP_DELETE);
79072805
LW
7146
7147 case KEY_dbmopen:
74e8ce34
NC
7148 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7149 STR_WITH_LEN("NDBM_File::"),
7150 STR_WITH_LEN("DB_File::"),
7151 STR_WITH_LEN("GDBM_File::"),
7152 STR_WITH_LEN("SDBM_File::"),
7153 STR_WITH_LEN("ODBM_File::"),
7154 NULL);
a0d0e21e 7155 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7156
7157 case KEY_dbmclose:
7158 UNI(OP_DBMCLOSE);
7159
7160 case KEY_dump:
a0d0e21e 7161 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7162 LOOPX(OP_DUMP);
7163
7164 case KEY_else:
7165 PREBLOCK(ELSE);
7166
7167 case KEY_elsif:
6154021b 7168 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7169 OPERATOR(ELSIF);
7170
7171 case KEY_eq:
78cdf107
Z
7172 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7173 return REPORT(0);
79072805
LW
7174 Eop(OP_SEQ);
7175
a0d0e21e
LW
7176 case KEY_exists:
7177 UNI(OP_EXISTS);
4e553d73 7178
79072805 7179 case KEY_exit:
5db06880
NC
7180 if (PL_madskills)
7181 UNI(OP_INT);
79072805
LW
7182 UNI(OP_EXIT);
7183
7184 case KEY_eval:
29595ff2 7185 s = SKIPSPACE1(s);
32e2a35d
RGS
7186 if (*s == '{') { /* block eval */
7187 PL_expect = XTERMBLOCK;
7188 UNIBRACK(OP_ENTERTRY);
7189 }
7190 else { /* string eval */
7191 PL_expect = XTERM;
7192 UNIBRACK(OP_ENTEREVAL);
7193 }
79072805
LW
7194
7195 case KEY_eof:
7196 UNI(OP_EOF);
7197
7198 case KEY_exp:
7199 UNI(OP_EXP);
7200
7201 case KEY_each:
7202 UNI(OP_EACH);
7203
7204 case KEY_exec:
a0d0e21e 7205 LOP(OP_EXEC,XREF);
79072805
LW
7206
7207 case KEY_endhostent:
7208 FUN0(OP_EHOSTENT);
7209
7210 case KEY_endnetent:
7211 FUN0(OP_ENETENT);
7212
7213 case KEY_endservent:
7214 FUN0(OP_ESERVENT);
7215
7216 case KEY_endprotoent:
7217 FUN0(OP_EPROTOENT);
7218
7219 case KEY_endpwent:
7220 FUN0(OP_EPWENT);
7221
7222 case KEY_endgrent:
7223 FUN0(OP_EGRENT);
7224
7225 case KEY_for:
7226 case KEY_foreach:
78cdf107
Z
7227 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7228 return REPORT(0);
6154021b 7229 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7230 s = SKIPSPACE1(s);
7e2040f0 7231 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7232 char *p = s;
5db06880
NC
7233#ifdef PERL_MAD
7234 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7235#endif
7236
3280af22 7237 if ((PL_bufend - p) >= 3 &&
55497cff 7238 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7239 p += 2;
77ca0c92
LW
7240 else if ((PL_bufend - p) >= 4 &&
7241 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7242 p += 3;
29595ff2 7243 p = PEEKSPACE(p);
7e2040f0 7244 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7245 p = scan_ident(p, PL_bufend,
7246 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7247 p = PEEKSPACE(p);
77ca0c92
LW
7248 }
7249 if (*p != '$')
cea2e8a9 7250 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7251#ifdef PERL_MAD
7252 s = SvPVX(PL_linestr) + soff;
7253#endif
55497cff 7254 }
79072805
LW
7255 OPERATOR(FOR);
7256
7257 case KEY_formline:
a0d0e21e 7258 LOP(OP_FORMLINE,XTERM);
79072805
LW
7259
7260 case KEY_fork:
7261 FUN0(OP_FORK);
7262
7263 case KEY_fcntl:
a0d0e21e 7264 LOP(OP_FCNTL,XTERM);
79072805
LW
7265
7266 case KEY_fileno:
7267 UNI(OP_FILENO);
7268
7269 case KEY_flock:
a0d0e21e 7270 LOP(OP_FLOCK,XTERM);
79072805
LW
7271
7272 case KEY_gt:
78cdf107
Z
7273 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7274 return REPORT(0);
79072805
LW
7275 Rop(OP_SGT);
7276
7277 case KEY_ge:
78cdf107
Z
7278 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7279 return REPORT(0);
79072805
LW
7280 Rop(OP_SGE);
7281
7282 case KEY_grep:
2c38e13d 7283 LOP(OP_GREPSTART, XREF);
79072805
LW
7284
7285 case KEY_goto:
a0d0e21e 7286 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7287 LOOPX(OP_GOTO);
7288
7289 case KEY_gmtime:
7290 UNI(OP_GMTIME);
7291
7292 case KEY_getc:
6f33ba73 7293 UNIDOR(OP_GETC);
79072805
LW
7294
7295 case KEY_getppid:
7296 FUN0(OP_GETPPID);
7297
7298 case KEY_getpgrp:
7299 UNI(OP_GETPGRP);
7300
7301 case KEY_getpriority:
a0d0e21e 7302 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7303
7304 case KEY_getprotobyname:
7305 UNI(OP_GPBYNAME);
7306
7307 case KEY_getprotobynumber:
a0d0e21e 7308 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7309
7310 case KEY_getprotoent:
7311 FUN0(OP_GPROTOENT);
7312
7313 case KEY_getpwent:
7314 FUN0(OP_GPWENT);
7315
7316 case KEY_getpwnam:
ff68c719 7317 UNI(OP_GPWNAM);
79072805
LW
7318
7319 case KEY_getpwuid:
ff68c719 7320 UNI(OP_GPWUID);
79072805
LW
7321
7322 case KEY_getpeername:
7323 UNI(OP_GETPEERNAME);
7324
7325 case KEY_gethostbyname:
7326 UNI(OP_GHBYNAME);
7327
7328 case KEY_gethostbyaddr:
a0d0e21e 7329 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7330
7331 case KEY_gethostent:
7332 FUN0(OP_GHOSTENT);
7333
7334 case KEY_getnetbyname:
7335 UNI(OP_GNBYNAME);
7336
7337 case KEY_getnetbyaddr:
a0d0e21e 7338 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7339
7340 case KEY_getnetent:
7341 FUN0(OP_GNETENT);
7342
7343 case KEY_getservbyname:
a0d0e21e 7344 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7345
7346 case KEY_getservbyport:
a0d0e21e 7347 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7348
7349 case KEY_getservent:
7350 FUN0(OP_GSERVENT);
7351
7352 case KEY_getsockname:
7353 UNI(OP_GETSOCKNAME);
7354
7355 case KEY_getsockopt:
a0d0e21e 7356 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7357
7358 case KEY_getgrent:
7359 FUN0(OP_GGRENT);
7360
7361 case KEY_getgrnam:
ff68c719 7362 UNI(OP_GGRNAM);
79072805
LW
7363
7364 case KEY_getgrgid:
ff68c719 7365 UNI(OP_GGRGID);
79072805
LW
7366
7367 case KEY_getlogin:
7368 FUN0(OP_GETLOGIN);
7369
0d863452 7370 case KEY_given:
6154021b 7371 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7372 OPERATOR(GIVEN);
7373
93a17b20 7374 case KEY_glob:
a0d0e21e 7375 LOP(OP_GLOB,XTERM);
93a17b20 7376
79072805
LW
7377 case KEY_hex:
7378 UNI(OP_HEX);
7379
7380 case KEY_if:
78cdf107
Z
7381 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7382 return REPORT(0);
6154021b 7383 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7384 OPERATOR(IF);
7385
7386 case KEY_index:
a0d0e21e 7387 LOP(OP_INDEX,XTERM);
79072805
LW
7388
7389 case KEY_int:
7390 UNI(OP_INT);
7391
7392 case KEY_ioctl:
a0d0e21e 7393 LOP(OP_IOCTL,XTERM);
79072805
LW
7394
7395 case KEY_join:
a0d0e21e 7396 LOP(OP_JOIN,XTERM);
79072805
LW
7397
7398 case KEY_keys:
7399 UNI(OP_KEYS);
7400
7401 case KEY_kill:
a0d0e21e 7402 LOP(OP_KILL,XTERM);
79072805
LW
7403
7404 case KEY_last:
a0d0e21e 7405 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7406 LOOPX(OP_LAST);
4e553d73 7407
79072805
LW
7408 case KEY_lc:
7409 UNI(OP_LC);
7410
7411 case KEY_lcfirst:
7412 UNI(OP_LCFIRST);
7413
7414 case KEY_local:
6154021b 7415 pl_yylval.ival = 0;
79072805
LW
7416 OPERATOR(LOCAL);
7417
7418 case KEY_length:
7419 UNI(OP_LENGTH);
7420
7421 case KEY_lt:
78cdf107
Z
7422 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7423 return REPORT(0);
79072805
LW
7424 Rop(OP_SLT);
7425
7426 case KEY_le:
78cdf107
Z
7427 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7428 return REPORT(0);
79072805
LW
7429 Rop(OP_SLE);
7430
7431 case KEY_localtime:
7432 UNI(OP_LOCALTIME);
7433
7434 case KEY_log:
7435 UNI(OP_LOG);
7436
7437 case KEY_link:
a0d0e21e 7438 LOP(OP_LINK,XTERM);
79072805
LW
7439
7440 case KEY_listen:
a0d0e21e 7441 LOP(OP_LISTEN,XTERM);
79072805 7442
c0329465
MB
7443 case KEY_lock:
7444 UNI(OP_LOCK);
7445
79072805
LW
7446 case KEY_lstat:
7447 UNI(OP_LSTAT);
7448
7449 case KEY_m:
8782bef2 7450 s = scan_pat(s,OP_MATCH);
79072805
LW
7451 TERM(sublex_start());
7452
a0d0e21e 7453 case KEY_map:
2c38e13d 7454 LOP(OP_MAPSTART, XREF);
4e4e412b 7455
79072805 7456 case KEY_mkdir:
a0d0e21e 7457 LOP(OP_MKDIR,XTERM);
79072805
LW
7458
7459 case KEY_msgctl:
a0d0e21e 7460 LOP(OP_MSGCTL,XTERM);
79072805
LW
7461
7462 case KEY_msgget:
a0d0e21e 7463 LOP(OP_MSGGET,XTERM);
79072805
LW
7464
7465 case KEY_msgrcv:
a0d0e21e 7466 LOP(OP_MSGRCV,XTERM);
79072805
LW
7467
7468 case KEY_msgsnd:
a0d0e21e 7469 LOP(OP_MSGSND,XTERM);
79072805 7470
77ca0c92 7471 case KEY_our:
93a17b20 7472 case KEY_my:
952306ac 7473 case KEY_state:
eac04b2e 7474 PL_in_my = (U16)tmp;
29595ff2 7475 s = SKIPSPACE1(s);
7e2040f0 7476 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7477#ifdef PERL_MAD
7478 char* start = s;
7479#endif
3280af22 7480 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7481 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7482 goto really_sub;
def3634b 7483 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7484 if (!PL_in_my_stash) {
c750a3ec 7485 char tmpbuf[1024];
3280af22 7486 PL_bufptr = s;
d9fad198 7487 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7488 yyerror(tmpbuf);
7489 }
5db06880
NC
7490#ifdef PERL_MAD
7491 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7492 sv_catsv(PL_thistoken, PL_nextwhite);
7493 PL_nextwhite = 0;
7494 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7495 }
7496#endif
c750a3ec 7497 }
6154021b 7498 pl_yylval.ival = 1;
55497cff 7499 OPERATOR(MY);
93a17b20 7500
79072805 7501 case KEY_next:
a0d0e21e 7502 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7503 LOOPX(OP_NEXT);
7504
7505 case KEY_ne:
78cdf107
Z
7506 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7507 return REPORT(0);
79072805
LW
7508 Eop(OP_SNE);
7509
a0d0e21e 7510 case KEY_no:
468aa647 7511 s = tokenize_use(0, s);
a0d0e21e
LW
7512 OPERATOR(USE);
7513
7514 case KEY_not:
29595ff2 7515 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7516 FUN1(OP_NOT);
78cdf107
Z
7517 else {
7518 if (!PL_lex_allbrackets &&
7519 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7520 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7521 OPERATOR(NOTOP);
78cdf107 7522 }
a0d0e21e 7523
79072805 7524 case KEY_open:
29595ff2 7525 s = SKIPSPACE1(s);
7e2040f0 7526 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7527 const char *t;
c35e046a
AL
7528 for (d = s; isALNUM_lazy_if(d,UTF);)
7529 d++;
7530 for (t=d; isSPACE(*t);)
7531 t++;
e2ab214b 7532 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7533 /* [perl #16184] */
7534 && !(t[0] == '=' && t[1] == '>')
7535 ) {
5f66b61c 7536 int parms_len = (int)(d-s);
9014280d 7537 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7538 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7539 parms_len, s, parms_len, s);
66fbe8fb 7540 }
93a17b20 7541 }
a0d0e21e 7542 LOP(OP_OPEN,XTERM);
79072805 7543
463ee0b2 7544 case KEY_or:
78cdf107
Z
7545 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7546 return REPORT(0);
6154021b 7547 pl_yylval.ival = OP_OR;
463ee0b2
LW
7548 OPERATOR(OROP);
7549
79072805
LW
7550 case KEY_ord:
7551 UNI(OP_ORD);
7552
7553 case KEY_oct:
7554 UNI(OP_OCT);
7555
7556 case KEY_opendir:
a0d0e21e 7557 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7558
7559 case KEY_print:
3280af22 7560 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7561 LOP(OP_PRINT,XREF);
79072805
LW
7562
7563 case KEY_printf:
3280af22 7564 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7565 LOP(OP_PRTF,XREF);
79072805 7566
c07a80fd 7567 case KEY_prototype:
7568 UNI(OP_PROTOTYPE);
7569
79072805 7570 case KEY_push:
a0d0e21e 7571 LOP(OP_PUSH,XTERM);
79072805
LW
7572
7573 case KEY_pop:
6f33ba73 7574 UNIDOR(OP_POP);
79072805 7575
a0d0e21e 7576 case KEY_pos:
6f33ba73 7577 UNIDOR(OP_POS);
4e553d73 7578
79072805 7579 case KEY_pack:
a0d0e21e 7580 LOP(OP_PACK,XTERM);
79072805
LW
7581
7582 case KEY_package:
a0d0e21e 7583 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7584 s = SKIPSPACE1(s);
91152fc1 7585 s = force_strict_version(s);
4e4da3ac 7586 PL_lex_expect = XBLOCK;
79072805
LW
7587 OPERATOR(PACKAGE);
7588
7589 case KEY_pipe:
a0d0e21e 7590 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7591
7592 case KEY_q:
5db06880 7593 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7594 if (!s)
d4c19fe8 7595 missingterm(NULL);
6154021b 7596 pl_yylval.ival = OP_CONST;
79072805
LW
7597 TERM(sublex_start());
7598
a0d0e21e
LW
7599 case KEY_quotemeta:
7600 UNI(OP_QUOTEMETA);
7601
ea25a9b2
Z
7602 case KEY_qw: {
7603 OP *words = NULL;
5db06880 7604 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7605 if (!s)
d4c19fe8 7606 missingterm(NULL);
3480a8d2 7607 PL_expect = XOPERATOR;
8127e0e3 7608 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7609 int warned = 0;
3280af22 7610 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7611 while (len) {
d4c19fe8
AL
7612 for (; isSPACE(*d) && len; --len, ++d)
7613 /**/;
8127e0e3 7614 if (len) {
d4c19fe8 7615 SV *sv;
f54cb97a 7616 const char *b = d;
e476b1b5 7617 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7618 for (; !isSPACE(*d) && len; --len, ++d) {
7619 if (*d == ',') {
9014280d 7620 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7621 "Possible attempt to separate words with commas");
7622 ++warned;
7623 }
7624 else if (*d == '#') {
9014280d 7625 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7626 "Possible attempt to put comments in qw() list");
7627 ++warned;
7628 }
7629 }
7630 }
7631 else {
d4c19fe8
AL
7632 for (; !isSPACE(*d) && len; --len, ++d)
7633 /**/;
8127e0e3 7634 }
740cce10 7635 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7636 words = op_append_elem(OP_LIST, words,
7948272d 7637 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7638 }
7639 }
7640 }
ea25a9b2
Z
7641 if (!words)
7642 words = newNULLLIST();
37fd879b 7643 if (PL_lex_stuff) {
8127e0e3 7644 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7645 PL_lex_stuff = NULL;
37fd879b 7646 }
ea25a9b2
Z
7647 PL_expect = XOPERATOR;
7648 pl_yylval.opval = sawparens(words);
7649 TOKEN(QWLIST);
7650 }
8990e307 7651
79072805 7652 case KEY_qq:
5db06880 7653 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7654 if (!s)
d4c19fe8 7655 missingterm(NULL);
6154021b 7656 pl_yylval.ival = OP_STRINGIFY;
3280af22 7657 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7658 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7659 TERM(sublex_start());
7660
8782bef2
GB
7661 case KEY_qr:
7662 s = scan_pat(s,OP_QR);
7663 TERM(sublex_start());
7664
79072805 7665 case KEY_qx:
5db06880 7666 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7667 if (!s)
d4c19fe8 7668 missingterm(NULL);
9b201d7d 7669 readpipe_override();
79072805
LW
7670 TERM(sublex_start());
7671
7672 case KEY_return:
7673 OLDLOP(OP_RETURN);
7674
7675 case KEY_require:
29595ff2 7676 s = SKIPSPACE1(s);
e759cc13
RGS
7677 if (isDIGIT(*s)) {
7678 s = force_version(s, FALSE);
a7cb1f99 7679 }
e759cc13
RGS
7680 else if (*s != 'v' || !isDIGIT(s[1])
7681 || (s = force_version(s, TRUE), *s == 'v'))
7682 {
a7cb1f99
GS
7683 *PL_tokenbuf = '\0';
7684 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7685 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7686 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7687 else if (*s == '<')
7688 yyerror("<> should be quotes");
7689 }
a72a1c8b
RGS
7690 if (orig_keyword == KEY_require) {
7691 orig_keyword = 0;
6154021b 7692 pl_yylval.ival = 1;
a72a1c8b
RGS
7693 }
7694 else
6154021b 7695 pl_yylval.ival = 0;
a72a1c8b
RGS
7696 PL_expect = XTERM;
7697 PL_bufptr = s;
7698 PL_last_uni = PL_oldbufptr;
7699 PL_last_lop_op = OP_REQUIRE;
7700 s = skipspace(s);
7701 return REPORT( (int)REQUIRE );
79072805
LW
7702
7703 case KEY_reset:
7704 UNI(OP_RESET);
7705
7706 case KEY_redo:
a0d0e21e 7707 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7708 LOOPX(OP_REDO);
7709
7710 case KEY_rename:
a0d0e21e 7711 LOP(OP_RENAME,XTERM);
79072805
LW
7712
7713 case KEY_rand:
7714 UNI(OP_RAND);
7715
7716 case KEY_rmdir:
7717 UNI(OP_RMDIR);
7718
7719 case KEY_rindex:
a0d0e21e 7720 LOP(OP_RINDEX,XTERM);
79072805
LW
7721
7722 case KEY_read:
a0d0e21e 7723 LOP(OP_READ,XTERM);
79072805
LW
7724
7725 case KEY_readdir:
7726 UNI(OP_READDIR);
7727
93a17b20 7728 case KEY_readline:
6f33ba73 7729 UNIDOR(OP_READLINE);
93a17b20
LW
7730
7731 case KEY_readpipe:
0858480c 7732 UNIDOR(OP_BACKTICK);
93a17b20 7733
79072805
LW
7734 case KEY_rewinddir:
7735 UNI(OP_REWINDDIR);
7736
7737 case KEY_recv:
a0d0e21e 7738 LOP(OP_RECV,XTERM);
79072805
LW
7739
7740 case KEY_reverse:
a0d0e21e 7741 LOP(OP_REVERSE,XTERM);
79072805
LW
7742
7743 case KEY_readlink:
6f33ba73 7744 UNIDOR(OP_READLINK);
79072805
LW
7745
7746 case KEY_ref:
7747 UNI(OP_REF);
7748
7749 case KEY_s:
7750 s = scan_subst(s);
6154021b 7751 if (pl_yylval.opval)
79072805
LW
7752 TERM(sublex_start());
7753 else
7754 TOKEN(1); /* force error */
7755
0d863452
RH
7756 case KEY_say:
7757 checkcomma(s,PL_tokenbuf,"filehandle");
7758 LOP(OP_SAY,XREF);
7759
a0d0e21e
LW
7760 case KEY_chomp:
7761 UNI(OP_CHOMP);
4e553d73 7762
79072805
LW
7763 case KEY_scalar:
7764 UNI(OP_SCALAR);
7765
7766 case KEY_select:
a0d0e21e 7767 LOP(OP_SELECT,XTERM);
79072805
LW
7768
7769 case KEY_seek:
a0d0e21e 7770 LOP(OP_SEEK,XTERM);
79072805
LW
7771
7772 case KEY_semctl:
a0d0e21e 7773 LOP(OP_SEMCTL,XTERM);
79072805
LW
7774
7775 case KEY_semget:
a0d0e21e 7776 LOP(OP_SEMGET,XTERM);
79072805
LW
7777
7778 case KEY_semop:
a0d0e21e 7779 LOP(OP_SEMOP,XTERM);
79072805
LW
7780
7781 case KEY_send:
a0d0e21e 7782 LOP(OP_SEND,XTERM);
79072805
LW
7783
7784 case KEY_setpgrp:
a0d0e21e 7785 LOP(OP_SETPGRP,XTERM);
79072805
LW
7786
7787 case KEY_setpriority:
a0d0e21e 7788 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7789
7790 case KEY_sethostent:
ff68c719 7791 UNI(OP_SHOSTENT);
79072805
LW
7792
7793 case KEY_setnetent:
ff68c719 7794 UNI(OP_SNETENT);
79072805
LW
7795
7796 case KEY_setservent:
ff68c719 7797 UNI(OP_SSERVENT);
79072805
LW
7798
7799 case KEY_setprotoent:
ff68c719 7800 UNI(OP_SPROTOENT);
79072805
LW
7801
7802 case KEY_setpwent:
7803 FUN0(OP_SPWENT);
7804
7805 case KEY_setgrent:
7806 FUN0(OP_SGRENT);
7807
7808 case KEY_seekdir:
a0d0e21e 7809 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7810
7811 case KEY_setsockopt:
a0d0e21e 7812 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7813
7814 case KEY_shift:
6f33ba73 7815 UNIDOR(OP_SHIFT);
79072805
LW
7816
7817 case KEY_shmctl:
a0d0e21e 7818 LOP(OP_SHMCTL,XTERM);
79072805
LW
7819
7820 case KEY_shmget:
a0d0e21e 7821 LOP(OP_SHMGET,XTERM);
79072805
LW
7822
7823 case KEY_shmread:
a0d0e21e 7824 LOP(OP_SHMREAD,XTERM);
79072805
LW
7825
7826 case KEY_shmwrite:
a0d0e21e 7827 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7828
7829 case KEY_shutdown:
a0d0e21e 7830 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7831
7832 case KEY_sin:
7833 UNI(OP_SIN);
7834
7835 case KEY_sleep:
7836 UNI(OP_SLEEP);
7837
7838 case KEY_socket:
a0d0e21e 7839 LOP(OP_SOCKET,XTERM);
79072805
LW
7840
7841 case KEY_socketpair:
a0d0e21e 7842 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7843
7844 case KEY_sort:
3280af22 7845 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7846 s = SKIPSPACE1(s);
79072805 7847 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7848 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7849 PL_expect = XTERM;
15f0808c 7850 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7851 LOP(OP_SORT,XREF);
79072805
LW
7852
7853 case KEY_split:
a0d0e21e 7854 LOP(OP_SPLIT,XTERM);
79072805
LW
7855
7856 case KEY_sprintf:
a0d0e21e 7857 LOP(OP_SPRINTF,XTERM);
79072805
LW
7858
7859 case KEY_splice:
a0d0e21e 7860 LOP(OP_SPLICE,XTERM);
79072805
LW
7861
7862 case KEY_sqrt:
7863 UNI(OP_SQRT);
7864
7865 case KEY_srand:
7866 UNI(OP_SRAND);
7867
7868 case KEY_stat:
7869 UNI(OP_STAT);
7870
7871 case KEY_study:
79072805
LW
7872 UNI(OP_STUDY);
7873
7874 case KEY_substr:
a0d0e21e 7875 LOP(OP_SUBSTR,XTERM);
79072805
LW
7876
7877 case KEY_format:
7878 case KEY_sub:
93a17b20 7879 really_sub:
09bef843 7880 {
3280af22 7881 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7882 SSize_t tboffset = 0;
09bef843 7883 expectation attrful;
28cc6278 7884 bool have_name, have_proto;
f54cb97a 7885 const int key = tmp;
09bef843 7886
5db06880
NC
7887#ifdef PERL_MAD
7888 SV *tmpwhite = 0;
7889
cd81e915 7890 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7891 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7892 PL_thistoken = 0;
5db06880
NC
7893
7894 d = s;
7895 s = SKIPSPACE2(s,tmpwhite);
7896#else
09bef843 7897 s = skipspace(s);
5db06880 7898#endif
09bef843 7899
7e2040f0 7900 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7901 (*s == ':' && s[1] == ':'))
7902 {
5db06880 7903#ifdef PERL_MAD
4f61fd4b 7904 SV *nametoke = NULL;
5db06880
NC
7905#endif
7906
09bef843
SB
7907 PL_expect = XBLOCK;
7908 attrful = XATTRBLOCK;
b1b65b59
JH
7909 /* remember buffer pos'n for later force_word */
7910 tboffset = s - PL_oldbufptr;
09bef843 7911 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7912#ifdef PERL_MAD
7913 if (PL_madskills)
7914 nametoke = newSVpvn(s, d - s);
7915#endif
6502358f
NC
7916 if (memchr(tmpbuf, ':', len))
7917 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7918 else {
7919 sv_setsv(PL_subname,PL_curstname);
396482e1 7920 sv_catpvs(PL_subname,"::");
09bef843
SB
7921 sv_catpvn(PL_subname,tmpbuf,len);
7922 }
09bef843 7923 have_name = TRUE;
5db06880
NC
7924
7925#ifdef PERL_MAD
7926
7927 start_force(0);
7928 CURMAD('X', nametoke);
7929 CURMAD('_', tmpwhite);
7930 (void) force_word(PL_oldbufptr + tboffset, WORD,
7931 FALSE, TRUE, TRUE);
7932
7933 s = SKIPSPACE2(d,tmpwhite);
7934#else
7935 s = skipspace(d);
7936#endif
09bef843 7937 }
463ee0b2 7938 else {
09bef843
SB
7939 if (key == KEY_my)
7940 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7941 PL_expect = XTERMBLOCK;
7942 attrful = XATTRTERM;
76f68e9b 7943 sv_setpvs(PL_subname,"?");
09bef843 7944 have_name = FALSE;
463ee0b2 7945 }
4633a7c4 7946
09bef843
SB
7947 if (key == KEY_format) {
7948 if (*s == '=')
7949 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7950#ifdef PERL_MAD
cd81e915 7951 PL_thistoken = subtoken;
5db06880
NC
7952 s = d;
7953#else
09bef843 7954 if (have_name)
b1b65b59
JH
7955 (void) force_word(PL_oldbufptr + tboffset, WORD,
7956 FALSE, TRUE, TRUE);
5db06880 7957#endif
09bef843
SB
7958 OPERATOR(FORMAT);
7959 }
79072805 7960
09bef843
SB
7961 /* Look for a prototype */
7962 if (*s == '(') {
d9f2850e
RGS
7963 char *p;
7964 bool bad_proto = FALSE;
9e8d7757
RB
7965 bool in_brackets = FALSE;
7966 char greedy_proto = ' ';
7967 bool proto_after_greedy_proto = FALSE;
7968 bool must_be_last = FALSE;
7969 bool underscore = FALSE;
aef2a98a 7970 bool seen_underscore = FALSE;
197afce1 7971 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7972
5db06880 7973 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7974 if (!s)
09bef843 7975 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7976 /* strip spaces and check for bad characters */
09bef843
SB
7977 d = SvPVX(PL_lex_stuff);
7978 tmp = 0;
d9f2850e
RGS
7979 for (p = d; *p; ++p) {
7980 if (!isSPACE(*p)) {
7981 d[tmp++] = *p;
9e8d7757 7982
197afce1 7983 if (warnillegalproto) {
9e8d7757
RB
7984 if (must_be_last)
7985 proto_after_greedy_proto = TRUE;
c035a075 7986 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
7987 bad_proto = TRUE;
7988 }
7989 else {
7990 if ( underscore ) {
7991 if ( *p != ';' )
7992 bad_proto = TRUE;
7993 underscore = FALSE;
7994 }
7995 if ( *p == '[' ) {
7996 in_brackets = TRUE;
7997 }
7998 else if ( *p == ']' ) {
7999 in_brackets = FALSE;
8000 }
8001 else if ( (*p == '@' || *p == '%') &&
8002 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8003 !in_brackets ) {
8004 must_be_last = TRUE;
8005 greedy_proto = *p;
8006 }
8007 else if ( *p == '_' ) {
aef2a98a 8008 underscore = seen_underscore = TRUE;
9e8d7757
RB
8009 }
8010 }
8011 }
d37a9538 8012 }
09bef843 8013 }
d9f2850e 8014 d[tmp] = '\0';
9e8d7757 8015 if (proto_after_greedy_proto)
197afce1 8016 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8017 "Prototype after '%c' for %"SVf" : %s",
8018 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 8019 if (bad_proto)
197afce1 8020 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8021 "Illegal character %sin prototype for %"SVf" : %s",
8022 seen_underscore ? "after '_' " : "",
be2597df 8023 SVfARG(PL_subname), d);
b162af07 8024 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8025 have_proto = TRUE;
68dc0745 8026
5db06880
NC
8027#ifdef PERL_MAD
8028 start_force(0);
cd81e915 8029 CURMAD('q', PL_thisopen);
5db06880 8030 CURMAD('_', tmpwhite);
cd81e915
NC
8031 CURMAD('=', PL_thisstuff);
8032 CURMAD('Q', PL_thisclose);
5db06880
NC
8033 NEXTVAL_NEXTTOKE.opval =
8034 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8035 PL_lex_stuff = NULL;
5db06880
NC
8036 force_next(THING);
8037
8038 s = SKIPSPACE2(s,tmpwhite);
8039#else
09bef843 8040 s = skipspace(s);
5db06880 8041#endif
4633a7c4 8042 }
09bef843
SB
8043 else
8044 have_proto = FALSE;
8045
8046 if (*s == ':' && s[1] != ':')
8047 PL_expect = attrful;
8e742a20
MHM
8048 else if (*s != '{' && key == KEY_sub) {
8049 if (!have_name)
8050 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8051 else if (*s != ';' && *s != '}')
be2597df 8052 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8053 }
09bef843 8054
5db06880
NC
8055#ifdef PERL_MAD
8056 start_force(0);
8057 if (tmpwhite) {
8058 if (PL_madskills)
6b29d1f5 8059 curmad('^', newSVpvs(""));
5db06880
NC
8060 CURMAD('_', tmpwhite);
8061 }
8062 force_next(0);
8063
cd81e915 8064 PL_thistoken = subtoken;
5db06880 8065#else
09bef843 8066 if (have_proto) {
9ded7720 8067 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8068 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8069 PL_lex_stuff = NULL;
09bef843 8070 force_next(THING);
68dc0745 8071 }
5db06880 8072#endif
09bef843 8073 if (!have_name) {
49a54bbe
NC
8074 if (PL_curstash)
8075 sv_setpvs(PL_subname, "__ANON__");
8076 else
8077 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8078 TOKEN(ANONSUB);
4633a7c4 8079 }
5db06880 8080#ifndef PERL_MAD
b1b65b59
JH
8081 (void) force_word(PL_oldbufptr + tboffset, WORD,
8082 FALSE, TRUE, TRUE);
5db06880 8083#endif
09bef843
SB
8084 if (key == KEY_my)
8085 TOKEN(MYSUB);
8086 TOKEN(SUB);
4633a7c4 8087 }
79072805
LW
8088
8089 case KEY_system:
a0d0e21e 8090 LOP(OP_SYSTEM,XREF);
79072805
LW
8091
8092 case KEY_symlink:
a0d0e21e 8093 LOP(OP_SYMLINK,XTERM);
79072805
LW
8094
8095 case KEY_syscall:
a0d0e21e 8096 LOP(OP_SYSCALL,XTERM);
79072805 8097
c07a80fd 8098 case KEY_sysopen:
8099 LOP(OP_SYSOPEN,XTERM);
8100
137443ea 8101 case KEY_sysseek:
8102 LOP(OP_SYSSEEK,XTERM);
8103
79072805 8104 case KEY_sysread:
a0d0e21e 8105 LOP(OP_SYSREAD,XTERM);
79072805
LW
8106
8107 case KEY_syswrite:
a0d0e21e 8108 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8109
8110 case KEY_tr:
8111 s = scan_trans(s);
8112 TERM(sublex_start());
8113
8114 case KEY_tell:
8115 UNI(OP_TELL);
8116
8117 case KEY_telldir:
8118 UNI(OP_TELLDIR);
8119
463ee0b2 8120 case KEY_tie:
a0d0e21e 8121 LOP(OP_TIE,XTERM);
463ee0b2 8122
c07a80fd 8123 case KEY_tied:
8124 UNI(OP_TIED);
8125
79072805
LW
8126 case KEY_time:
8127 FUN0(OP_TIME);
8128
8129 case KEY_times:
8130 FUN0(OP_TMS);
8131
8132 case KEY_truncate:
a0d0e21e 8133 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8134
8135 case KEY_uc:
8136 UNI(OP_UC);
8137
8138 case KEY_ucfirst:
8139 UNI(OP_UCFIRST);
8140
463ee0b2
LW
8141 case KEY_untie:
8142 UNI(OP_UNTIE);
8143
79072805 8144 case KEY_until:
78cdf107
Z
8145 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8146 return REPORT(0);
6154021b 8147 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8148 OPERATOR(UNTIL);
8149
8150 case KEY_unless:
78cdf107
Z
8151 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8152 return REPORT(0);
6154021b 8153 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8154 OPERATOR(UNLESS);
8155
8156 case KEY_unlink:
a0d0e21e 8157 LOP(OP_UNLINK,XTERM);
79072805
LW
8158
8159 case KEY_undef:
6f33ba73 8160 UNIDOR(OP_UNDEF);
79072805
LW
8161
8162 case KEY_unpack:
a0d0e21e 8163 LOP(OP_UNPACK,XTERM);
79072805
LW
8164
8165 case KEY_utime:
a0d0e21e 8166 LOP(OP_UTIME,XTERM);
79072805
LW
8167
8168 case KEY_umask:
6f33ba73 8169 UNIDOR(OP_UMASK);
79072805
LW
8170
8171 case KEY_unshift:
a0d0e21e
LW
8172 LOP(OP_UNSHIFT,XTERM);
8173
8174 case KEY_use:
468aa647 8175 s = tokenize_use(1, s);
a0d0e21e 8176 OPERATOR(USE);
79072805
LW
8177
8178 case KEY_values:
8179 UNI(OP_VALUES);
8180
8181 case KEY_vec:
a0d0e21e 8182 LOP(OP_VEC,XTERM);
79072805 8183
0d863452 8184 case KEY_when:
78cdf107
Z
8185 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8186 return REPORT(0);
6154021b 8187 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8188 OPERATOR(WHEN);
8189
79072805 8190 case KEY_while:
78cdf107
Z
8191 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8192 return REPORT(0);
6154021b 8193 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8194 OPERATOR(WHILE);
8195
8196 case KEY_warn:
3280af22 8197 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8198 LOP(OP_WARN,XTERM);
79072805
LW
8199
8200 case KEY_wait:
8201 FUN0(OP_WAIT);
8202
8203 case KEY_waitpid:
a0d0e21e 8204 LOP(OP_WAITPID,XTERM);
79072805
LW
8205
8206 case KEY_wantarray:
8207 FUN0(OP_WANTARRAY);
8208
8209 case KEY_write:
9d116dd7
JH
8210#ifdef EBCDIC
8211 {
df3728a2
JH
8212 char ctl_l[2];
8213 ctl_l[0] = toCTRL('L');
8214 ctl_l[1] = '\0';
fafc274c 8215 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8216 }
8217#else
fafc274c
NC
8218 /* Make sure $^L is defined */
8219 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8220#endif
79072805
LW
8221 UNI(OP_ENTERWRITE);
8222
8223 case KEY_x:
78cdf107
Z
8224 if (PL_expect == XOPERATOR) {
8225 if (*s == '=' && !PL_lex_allbrackets &&
8226 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8227 return REPORT(0);
79072805 8228 Mop(OP_REPEAT);
78cdf107 8229 }
79072805
LW
8230 check_uni();
8231 goto just_a_word;
8232
a0d0e21e 8233 case KEY_xor:
78cdf107
Z
8234 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8235 return REPORT(0);
6154021b 8236 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8237 OPERATOR(OROP);
8238
79072805
LW
8239 case KEY_y:
8240 s = scan_trans(s);
8241 TERM(sublex_start());
8242 }
49dc05e3 8243 }}
79072805 8244}
bf4acbe4
GS
8245#ifdef __SC__
8246#pragma segment Main
8247#endif
79072805 8248
e930465f
JH
8249static int
8250S_pending_ident(pTHX)
8eceec63 8251{
97aff369 8252 dVAR;
8eceec63 8253 register char *d;
bbd11bfc 8254 PADOFFSET tmp = 0;
8eceec63
SC
8255 /* pit holds the identifier we read and pending_ident is reset */
8256 char pit = PL_pending_ident;
9bde8eb0
NC
8257 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8258 /* All routes through this function want to know if there is a colon. */
c099d646 8259 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8260 PL_pending_ident = 0;
8261
cd81e915 8262 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8263 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8264 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8265
8266 /* if we're in a my(), we can't allow dynamics here.
8267 $foo'bar has already been turned into $foo::bar, so
8268 just check for colons.
8269
8270 if it's a legal name, the OP is a PADANY.
8271 */
8272 if (PL_in_my) {
8273 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8274 if (has_colon)
8eceec63
SC
8275 yyerror(Perl_form(aTHX_ "No package name allowed for "
8276 "variable %s in \"our\"",
8277 PL_tokenbuf));
d6447115 8278 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8279 }
8280 else {
9bde8eb0 8281 if (has_colon)
952306ac
RGS
8282 yyerror(Perl_form(aTHX_ PL_no_myglob,
8283 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8284
6154021b 8285 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 8286 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8287 return PRIVATEREF;
8288 }
8289 }
8290
8291 /*
8292 build the ops for accesses to a my() variable.
8293
8294 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8295 then used in a comparison. This catches most, but not
8296 all cases. For instance, it catches
8297 sort { my($a); $a <=> $b }
8298 but not
8299 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8300 (although why you'd do that is anyone's guess).
8301 */
8302
9bde8eb0 8303 if (!has_colon) {
8716503d 8304 if (!PL_in_my)
f8f98e0a 8305 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8306 if (tmp != NOT_IN_PAD) {
8eceec63 8307 /* might be an "our" variable" */
00b1698f 8308 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8309 /* build ops for a bareword */
b64e5050
AL
8310 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8311 HEK * const stashname = HvNAME_HEK(stash);
8312 SV * const sym = newSVhek(stashname);
396482e1 8313 sv_catpvs(sym, "::");
9bde8eb0 8314 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8315 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8316 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8317 gv_fetchsv(sym,
8eceec63
SC
8318 (PL_in_eval
8319 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8320 : GV_ADDMULTI
8eceec63
SC
8321 ),
8322 ((PL_tokenbuf[0] == '$') ? SVt_PV
8323 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8324 : SVt_PVHV));
8325 return WORD;
8326 }
8327
8328 /* if it's a sort block and they're naming $a or $b */
8329 if (PL_last_lop_op == OP_SORT &&
8330 PL_tokenbuf[0] == '$' &&
8331 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8332 && !PL_tokenbuf[2])
8333 {
8334 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8335 d < PL_bufend && *d != '\n';
8336 d++)
8337 {
8338 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8339 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8340 PL_tokenbuf);
8341 }
8342 }
8343 }
8344
6154021b
RGS
8345 pl_yylval.opval = newOP(OP_PADANY, 0);
8346 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8347 return PRIVATEREF;
8348 }
8349 }
8350
8351 /*
8352 Whine if they've said @foo in a doublequoted string,
8353 and @foo isn't a variable we can find in the symbol
8354 table.
8355 */
d824713b
NC
8356 if (ckWARN(WARN_AMBIGUOUS) &&
8357 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8358 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8359 SVt_PVAV);
8eceec63 8360 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8361 /* DO NOT warn for @- and @+ */
8362 && !( PL_tokenbuf[2] == '\0' &&
8363 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8364 )
8eceec63
SC
8365 {
8366 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8367 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8368 "Possible unintended interpolation of %s in string",
8369 PL_tokenbuf);
8eceec63
SC
8370 }
8371 }
8372
8373 /* build ops for a bareword */
6154021b 8374 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8375 tokenbuf_len - 1));
6154021b 8376 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8377 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8378 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8379 ((PL_tokenbuf[0] == '$') ? SVt_PV
8380 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8381 : SVt_PVHV));
8eceec63
SC
8382 return WORD;
8383}
8384
76e3520e 8385STATIC void
c94115d8 8386S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8387{
97aff369 8388 dVAR;
2f3197b3 8389
7918f24d
NC
8390 PERL_ARGS_ASSERT_CHECKCOMMA;
8391
d008e5eb 8392 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8393 if (ckWARN(WARN_SYNTAX)) {
8394 int level = 1;
26ff0806 8395 const char *w;
d008e5eb
GS
8396 for (w = s+2; *w && level; w++) {
8397 if (*w == '(')
8398 ++level;
8399 else if (*w == ')')
8400 --level;
8401 }
888fea98
NC
8402 while (isSPACE(*w))
8403 ++w;
b1439985
RGS
8404 /* the list of chars below is for end of statements or
8405 * block / parens, boolean operators (&&, ||, //) and branch
8406 * constructs (or, and, if, until, unless, while, err, for).
8407 * Not a very solid hack... */
8408 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8409 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8410 "%s (...) interpreted as function",name);
d008e5eb 8411 }
2f3197b3 8412 }
3280af22 8413 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8414 s++;
a687059c
LW
8415 if (*s == '(')
8416 s++;
3280af22 8417 while (s < PL_bufend && isSPACE(*s))
a687059c 8418 s++;
7e2040f0 8419 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8420 const char * const w = s++;
7e2040f0 8421 while (isALNUM_lazy_if(s,UTF))
a687059c 8422 s++;
3280af22 8423 while (s < PL_bufend && isSPACE(*s))
a687059c 8424 s++;
e929a76b 8425 if (*s == ',') {
c94115d8 8426 GV* gv;
5458a98a 8427 if (keyword(w, s - w, 0))
e929a76b 8428 return;
c94115d8
NC
8429
8430 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8431 if (gv && GvCVu(gv))
abbb3198 8432 return;
cea2e8a9 8433 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8434 }
8435 }
8436}
8437
423cee85
JH
8438/* Either returns sv, or mortalizes sv and returns a new SV*.
8439 Best used as sv=new_constant(..., sv, ...).
8440 If s, pv are NULL, calls subroutine with one argument,
8441 and type is used with error messages only. */
8442
b3ac6de7 8443STATIC SV *
eb0d8d16
NC
8444S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8445 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8446{
27da23d5 8447 dVAR; dSP;
890ce7af 8448 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8449 SV *res;
b3ac6de7
IZ
8450 SV **cvp;
8451 SV *cv, *typesv;
89e33a05 8452 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8453
7918f24d
NC
8454 PERL_ARGS_ASSERT_NEW_CONSTANT;
8455
f0af216f 8456 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
8457 SV *msg;
8458
10edeb5d
JH
8459 why2 = (const char *)
8460 (strEQ(key,"charnames")
8461 ? "(possibly a missing \"use charnames ...\")"
8462 : "");
4e553d73 8463 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
8464 (type ? type: "undef"), why2);
8465
8466 /* This is convoluted and evil ("goto considered harmful")
8467 * but I do not understand the intricacies of all the different
8468 * failure modes of %^H in here. The goal here is to make
8469 * the most probable error message user-friendly. --jhi */
8470
8471 goto msgdone;
8472
423cee85 8473 report:
4e553d73 8474 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8475 (type ? type: "undef"), why1, why2, why3);
41ab332f 8476 msgdone:
95a20fc0 8477 yyerror(SvPVX_const(msg));
423cee85
JH
8478 SvREFCNT_dec(msg);
8479 return sv;
8480 }
ff3f963a
KW
8481
8482 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
8483 if (PL_error_count > 0 && strEQ(key,"charnames"))
8484 return &PL_sv_undef;
ff3f963a 8485
eb0d8d16 8486 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 8487 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
8488 why1 = "$^H{";
8489 why2 = key;
f0af216f 8490 why3 = "} is not defined";
423cee85 8491 goto report;
b3ac6de7
IZ
8492 }
8493 sv_2mortal(sv); /* Parent created it permanently */
8494 cv = *cvp;
423cee85 8495 if (!pv && s)
59cd0e26 8496 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8497 if (type && pv)
59cd0e26 8498 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8499 else
423cee85 8500 typesv = &PL_sv_undef;
4e553d73 8501
e788e7d3 8502 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8503 ENTER ;
8504 SAVETMPS;
4e553d73 8505
423cee85 8506 PUSHMARK(SP) ;
a5845cb7 8507 EXTEND(sp, 3);
423cee85
JH
8508 if (pv)
8509 PUSHs(pv);
b3ac6de7 8510 PUSHs(sv);
423cee85
JH
8511 if (pv)
8512 PUSHs(typesv);
b3ac6de7 8513 PUTBACK;
423cee85 8514 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8515
423cee85 8516 SPAGAIN ;
4e553d73 8517
423cee85 8518 /* Check the eval first */
9b0e499b 8519 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8520 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8521 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8522 (void)POPs;
b37c2d43 8523 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8524 }
8525 else {
8526 res = POPs;
b37c2d43 8527 SvREFCNT_inc_simple_void(res);
423cee85 8528 }
4e553d73 8529
423cee85
JH
8530 PUTBACK ;
8531 FREETMPS ;
8532 LEAVE ;
b3ac6de7 8533 POPSTACK;
4e553d73 8534
b3ac6de7 8535 if (!SvOK(res)) {
423cee85
JH
8536 why1 = "Call to &{$^H{";
8537 why2 = key;
f0af216f 8538 why3 = "}} did not return a defined value";
423cee85
JH
8539 sv = res;
8540 goto report;
9b0e499b 8541 }
423cee85 8542
9b0e499b 8543 return res;
b3ac6de7 8544}
4e553d73 8545
d0a148a6
NC
8546/* Returns a NUL terminated string, with the length of the string written to
8547 *slp
8548 */
76e3520e 8549STATIC char *
cea2e8a9 8550S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8551{
97aff369 8552 dVAR;
463ee0b2 8553 register char *d = dest;
890ce7af 8554 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8555
8556 PERL_ARGS_ASSERT_SCAN_WORD;
8557
463ee0b2 8558 for (;;) {
8903cb82 8559 if (d >= e)
cea2e8a9 8560 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8561 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8562 *d++ = *s++;
c35e046a 8563 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8564 *d++ = ':';
8565 *d++ = ':';
8566 s++;
8567 }
c35e046a 8568 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8569 *d++ = *s++;
8570 *d++ = *s++;
8571 }
fd400ab9 8572 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8573 char *t = s + UTF8SKIP(s);
c35e046a 8574 size_t len;
fd400ab9 8575 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8576 t += UTF8SKIP(t);
c35e046a
AL
8577 len = t - s;
8578 if (d + len > e)
cea2e8a9 8579 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8580 Copy(s, d, len, char);
8581 d += len;
a0ed51b3
LW
8582 s = t;
8583 }
463ee0b2
LW
8584 else {
8585 *d = '\0';
8586 *slp = d - dest;
8587 return s;
e929a76b 8588 }
378cc40b
LW
8589 }
8590}
8591
76e3520e 8592STATIC char *
f54cb97a 8593S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8594{
97aff369 8595 dVAR;
6136c704 8596 char *bracket = NULL;
748a9306 8597 char funny = *s++;
6136c704 8598 register char *d = dest;
0b3da58d 8599 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8600
7918f24d
NC
8601 PERL_ARGS_ASSERT_SCAN_IDENT;
8602
a0d0e21e 8603 if (isSPACE(*s))
29595ff2 8604 s = PEEKSPACE(s);
de3bb511 8605 if (isDIGIT(*s)) {
8903cb82 8606 while (isDIGIT(*s)) {
8607 if (d >= e)
cea2e8a9 8608 Perl_croak(aTHX_ ident_too_long);
378cc40b 8609 *d++ = *s++;
8903cb82 8610 }
378cc40b
LW
8611 }
8612 else {
463ee0b2 8613 for (;;) {
8903cb82 8614 if (d >= e)
cea2e8a9 8615 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8616 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8617 *d++ = *s++;
7e2040f0 8618 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8619 *d++ = ':';
8620 *d++ = ':';
8621 s++;
8622 }
a0d0e21e 8623 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8624 *d++ = *s++;
8625 *d++ = *s++;
8626 }
fd400ab9 8627 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8628 char *t = s + UTF8SKIP(s);
fd400ab9 8629 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8630 t += UTF8SKIP(t);
8631 if (d + (t - s) > e)
cea2e8a9 8632 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8633 Copy(s, d, t - s, char);
8634 d += t - s;
8635 s = t;
8636 }
463ee0b2
LW
8637 else
8638 break;
8639 }
378cc40b
LW
8640 }
8641 *d = '\0';
8642 d = dest;
79072805 8643 if (*d) {
3280af22
NIS
8644 if (PL_lex_state != LEX_NORMAL)
8645 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8646 return s;
378cc40b 8647 }
748a9306 8648 if (*s == '$' && s[1] &&
3792a11b 8649 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8650 {
4810e5ec 8651 return s;
5cd24f17 8652 }
79072805
LW
8653 if (*s == '{') {
8654 bracket = s;
8655 s++;
8656 }
8657 else if (ck_uni)
8658 check_uni();
93a17b20 8659 if (s < send)
79072805
LW
8660 *d = *s++;
8661 d[1] = '\0';
2b92dfce 8662 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8663 *d = toCTRL(*s);
8664 s++;
de3bb511 8665 }
79072805 8666 if (bracket) {
748a9306 8667 if (isSPACE(s[-1])) {
fa83b5b6 8668 while (s < send) {
f54cb97a 8669 const char ch = *s++;
bf4acbe4 8670 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8671 *d = ch;
8672 break;
8673 }
8674 }
748a9306 8675 }
7e2040f0 8676 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 8677 d++;
a0ed51b3 8678 if (UTF) {
6136c704
AL
8679 char *end = s;
8680 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8681 end += UTF8SKIP(end);
8682 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8683 end += UTF8SKIP(end);
a0ed51b3 8684 }
6136c704
AL
8685 Copy(s, d, end - s, char);
8686 d += end - s;
8687 s = end;
a0ed51b3
LW
8688 }
8689 else {
2b92dfce 8690 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8691 *d++ = *s++;
2b92dfce 8692 if (d >= e)
cea2e8a9 8693 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8694 }
79072805 8695 *d = '\0';
c35e046a
AL
8696 while (s < send && SPACE_OR_TAB(*s))
8697 s++;
ff68c719 8698 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8699 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8700 const char * const brack =
8701 (const char *)
8702 ((*s == '[') ? "[...]" : "{...}");
9014280d 8703 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8704 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8705 funny, dest, brack, funny, dest, brack);
8706 }
79072805 8707 bracket++;
a0be28da 8708 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8709 PL_lex_allbrackets++;
79072805
LW
8710 return s;
8711 }
4e553d73
NIS
8712 }
8713 /* Handle extended ${^Foo} variables
2b92dfce
GS
8714 * 1999-02-27 mjd-perl-patch@plover.com */
8715 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8716 && isALNUM(*s))
8717 {
8718 d++;
8719 while (isALNUM(*s) && d < e) {
8720 *d++ = *s++;
8721 }
8722 if (d >= e)
cea2e8a9 8723 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8724 *d = '\0';
79072805
LW
8725 }
8726 if (*s == '}') {
8727 s++;
7df0d042 8728 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8729 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8730 PL_expect = XREF;
8731 }
d008e5eb 8732 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8733 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8734 (keyword(dest, d - dest, 0)
8735 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8736 {
c35e046a
AL
8737 if (funny == '#')
8738 funny = '@';
9014280d 8739 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8740 "Ambiguous use of %c{%s} resolved to %c%s",
8741 funny, dest, funny, dest);
8742 }
8743 }
79072805
LW
8744 }
8745 else {
8746 s = bracket; /* let the parser handle it */
93a17b20 8747 *dest = '\0';
79072805
LW
8748 }
8749 }
3280af22
NIS
8750 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8751 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8752 return s;
8753}
8754
879d0c72
NC
8755static U32
8756S_pmflag(U32 pmfl, const char ch) {
8757 switch (ch) {
8758 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
8759 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
8760 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
8761 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
73134a2e 8762 case KEEPCOPY_PAT_MOD: pmfl |= RXf_PMf_KEEPCOPY; break;
4f4d7508 8763 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
8764 }
8765 return pmfl;
8766}
8767
76e3520e 8768STATIC char *
cea2e8a9 8769S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 8770{
97aff369 8771 dVAR;
79072805 8772 PMOP *pm;
5db06880 8773 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 8774 const char * const valid_flags =
a20207d7 8775 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
8776#ifdef PERL_MAD
8777 char *modstart;
8778#endif
8779
7918f24d 8780 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 8781
25c09cbf 8782 if (!s) {
6136c704 8783 const char * const delimiter = skipspace(start);
10edeb5d
JH
8784 Perl_croak(aTHX_
8785 (const char *)
8786 (*delimiter == '?'
8787 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8788 : "Search pattern not terminated" ));
25c09cbf 8789 }
bbce6d69 8790
8782bef2 8791 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
8792 if (PL_multi_open == '?') {
8793 /* This is the only point in the code that sets PMf_ONCE: */
79072805 8794 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
8795
8796 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8797 allows us to restrict the list needed by reset to just the ??
8798 matches. */
8799 assert(type != OP_TRANS);
8800 if (PL_curstash) {
daba3364 8801 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
8802 U32 elements;
8803 if (!mg) {
daba3364 8804 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
8805 0);
8806 }
8807 elements = mg->mg_len / sizeof(PMOP**);
8808 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8809 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8810 mg->mg_len = elements * sizeof(PMOP**);
8811 PmopSTASH_set(pm,PL_curstash);
8812 }
8813 }
5db06880
NC
8814#ifdef PERL_MAD
8815 modstart = s;
8816#endif
6136c704 8817 while (*s && strchr(valid_flags, *s))
879d0c72 8818 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
8819
8820 if (isALNUM(*s)) {
8821 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8822 "Having no space between pattern and following word is deprecated");
8823
8824 }
5db06880
NC
8825#ifdef PERL_MAD
8826 if (PL_madskills && modstart != s) {
8827 SV* tmptoken = newSVpvn(modstart, s - modstart);
8828 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8829 }
8830#endif
4ac733c9 8831 /* issue a warning if /c is specified,but /g is not */
a2a5de95 8832 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 8833 {
a2a5de95
NC
8834 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8835 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
8836 }
8837
3280af22 8838 PL_lex_op = (OP*)pm;
6154021b 8839 pl_yylval.ival = OP_MATCH;
378cc40b
LW
8840 return s;
8841}
8842
76e3520e 8843STATIC char *
cea2e8a9 8844S_scan_subst(pTHX_ char *start)
79072805 8845{
27da23d5 8846 dVAR;
a0d0e21e 8847 register char *s;
79072805 8848 register PMOP *pm;
4fdae800 8849 I32 first_start;
79072805 8850 I32 es = 0;
5db06880
NC
8851#ifdef PERL_MAD
8852 char *modstart;
8853#endif
79072805 8854
7918f24d
NC
8855 PERL_ARGS_ASSERT_SCAN_SUBST;
8856
6154021b 8857 pl_yylval.ival = OP_NULL;
79072805 8858
5db06880 8859 s = scan_str(start,!!PL_madskills,FALSE);
79072805 8860
37fd879b 8861 if (!s)
cea2e8a9 8862 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 8863
3280af22 8864 if (s[-1] == PL_multi_open)
79072805 8865 s--;
5db06880
NC
8866#ifdef PERL_MAD
8867 if (PL_madskills) {
cd81e915
NC
8868 CURMAD('q', PL_thisopen);
8869 CURMAD('_', PL_thiswhite);
8870 CURMAD('E', PL_thisstuff);
8871 CURMAD('Q', PL_thisclose);
8872 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
8873 }
8874#endif
79072805 8875
3280af22 8876 first_start = PL_multi_start;
5db06880 8877 s = scan_str(s,!!PL_madskills,FALSE);
79072805 8878 if (!s) {
37fd879b 8879 if (PL_lex_stuff) {
3280af22 8880 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 8881 PL_lex_stuff = NULL;
37fd879b 8882 }
cea2e8a9 8883 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 8884 }
3280af22 8885 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 8886
79072805 8887 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
8888
8889#ifdef PERL_MAD
8890 if (PL_madskills) {
cd81e915
NC
8891 CURMAD('z', PL_thisopen);
8892 CURMAD('R', PL_thisstuff);
8893 CURMAD('Z', PL_thisclose);
5db06880
NC
8894 }
8895 modstart = s;
8896#endif
8897
48c036b1 8898 while (*s) {
a20207d7 8899 if (*s == EXEC_PAT_MOD) {
a687059c 8900 s++;
2f3197b3 8901 es++;
a687059c 8902 }
a20207d7 8903 else if (strchr(S_PAT_MODS, *s))
879d0c72 8904 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
8905 else {
8906 if (isALNUM(*s)) {
8907 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8908 "Having no space between pattern and following word is deprecated");
8909
8910 }
48c036b1 8911 break;
aa78b661 8912 }
378cc40b 8913 }
79072805 8914
5db06880
NC
8915#ifdef PERL_MAD
8916 if (PL_madskills) {
8917 if (modstart != s)
8918 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
8919 append_madprops(PL_thismad, (OP*)pm, 0);
8920 PL_thismad = 0;
5db06880
NC
8921 }
8922#endif
a2a5de95
NC
8923 if ((pm->op_pmflags & PMf_CONTINUE)) {
8924 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
8925 }
8926
79072805 8927 if (es) {
6136c704
AL
8928 SV * const repl = newSVpvs("");
8929
0244c3a4
GS
8930 PL_sublex_info.super_bufptr = s;
8931 PL_sublex_info.super_bufend = PL_bufend;
8932 PL_multi_end = 0;
79072805 8933 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
8934 while (es-- > 0) {
8935 if (es)
8936 sv_catpvs(repl, "eval ");
8937 else
8938 sv_catpvs(repl, "do ");
8939 }
6f43d98f 8940 sv_catpvs(repl, "{");
3280af22 8941 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
8942 if (strchr(SvPVX(PL_lex_repl), '#'))
8943 sv_catpvs(repl, "\n");
8944 sv_catpvs(repl, "}");
25da4f38 8945 SvEVALED_on(repl);
3280af22
NIS
8946 SvREFCNT_dec(PL_lex_repl);
8947 PL_lex_repl = repl;
378cc40b 8948 }
79072805 8949
3280af22 8950 PL_lex_op = (OP*)pm;
6154021b 8951 pl_yylval.ival = OP_SUBST;
378cc40b
LW
8952 return s;
8953}
8954
76e3520e 8955STATIC char *
cea2e8a9 8956S_scan_trans(pTHX_ char *start)
378cc40b 8957{
97aff369 8958 dVAR;
a0d0e21e 8959 register char* s;
11343788 8960 OP *o;
79072805 8961 short *tbl;
b84c11c8
NC
8962 U8 squash;
8963 U8 del;
8964 U8 complement;
bb16bae8 8965 bool nondestruct = 0;
5db06880
NC
8966#ifdef PERL_MAD
8967 char *modstart;
8968#endif
79072805 8969
7918f24d
NC
8970 PERL_ARGS_ASSERT_SCAN_TRANS;
8971
6154021b 8972 pl_yylval.ival = OP_NULL;
79072805 8973
5db06880 8974 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 8975 if (!s)
cea2e8a9 8976 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 8977
3280af22 8978 if (s[-1] == PL_multi_open)
2f3197b3 8979 s--;
5db06880
NC
8980#ifdef PERL_MAD
8981 if (PL_madskills) {
cd81e915
NC
8982 CURMAD('q', PL_thisopen);
8983 CURMAD('_', PL_thiswhite);
8984 CURMAD('E', PL_thisstuff);
8985 CURMAD('Q', PL_thisclose);
8986 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
8987 }
8988#endif
2f3197b3 8989
5db06880 8990 s = scan_str(s,!!PL_madskills,FALSE);
79072805 8991 if (!s) {
37fd879b 8992 if (PL_lex_stuff) {
3280af22 8993 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 8994 PL_lex_stuff = NULL;
37fd879b 8995 }
cea2e8a9 8996 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 8997 }
5db06880 8998 if (PL_madskills) {
cd81e915
NC
8999 CURMAD('z', PL_thisopen);
9000 CURMAD('R', PL_thisstuff);
9001 CURMAD('Z', PL_thisclose);
5db06880 9002 }
79072805 9003
a0ed51b3 9004 complement = del = squash = 0;
5db06880
NC
9005#ifdef PERL_MAD
9006 modstart = s;
9007#endif
7a1e2023
NC
9008 while (1) {
9009 switch (*s) {
9010 case 'c':
79072805 9011 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9012 break;
9013 case 'd':
a0ed51b3 9014 del = OPpTRANS_DELETE;
7a1e2023
NC
9015 break;
9016 case 's':
79072805 9017 squash = OPpTRANS_SQUASH;
7a1e2023 9018 break;
bb16bae8
FC
9019 case 'r':
9020 nondestruct = 1;
9021 break;
7a1e2023
NC
9022 default:
9023 goto no_more;
9024 }
395c3793
LW
9025 s++;
9026 }
7a1e2023 9027 no_more:
8973db79 9028
aa1f7c5b 9029 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9030 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9031 o->op_private &= ~OPpTRANS_ALL;
9032 o->op_private |= del|squash|complement|
7948272d
NIS
9033 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9034 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9035
3280af22 9036 PL_lex_op = o;
bb16bae8 9037 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9038
9039#ifdef PERL_MAD
9040 if (PL_madskills) {
9041 if (modstart != s)
9042 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9043 append_madprops(PL_thismad, o, 0);
9044 PL_thismad = 0;
5db06880
NC
9045 }
9046#endif
9047
79072805
LW
9048 return s;
9049}
9050
76e3520e 9051STATIC char *
cea2e8a9 9052S_scan_heredoc(pTHX_ register char *s)
79072805 9053{
97aff369 9054 dVAR;
79072805
LW
9055 SV *herewas;
9056 I32 op_type = OP_SCALAR;
9057 I32 len;
9058 SV *tmpstr;
9059 char term;
73d840c0 9060 const char *found_newline;
79072805 9061 register char *d;
fc36a67e 9062 register char *e;
4633a7c4 9063 char *peek;
f54cb97a 9064 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
9065#ifdef PERL_MAD
9066 I32 stuffstart = s - SvPVX(PL_linestr);
9067 char *tstart;
9068
cd81e915 9069 PL_realtokenstart = -1;
5db06880 9070#endif
79072805 9071
7918f24d
NC
9072 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9073
79072805 9074 s += 2;
3280af22
NIS
9075 d = PL_tokenbuf;
9076 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9077 if (!outer)
79072805 9078 *d++ = '\n';
c35e046a
AL
9079 peek = s;
9080 while (SPACE_OR_TAB(*peek))
9081 peek++;
3792a11b 9082 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9083 s = peek;
79072805 9084 term = *s++;
3280af22 9085 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9086 d += len;
3280af22 9087 if (s < PL_bufend)
79072805 9088 s++;
79072805
LW
9089 }
9090 else {
9091 if (*s == '\\')
9092 s++, term = '\'';
9093 else
9094 term = '"';
7e2040f0 9095 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9096 deprecate("bare << to mean <<\"\"");
7e2040f0 9097 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9098 if (d < e)
9099 *d++ = *s;
9100 }
9101 }
3280af22 9102 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9103 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9104 *d++ = '\n';
9105 *d = '\0';
3280af22 9106 len = d - PL_tokenbuf;
5db06880
NC
9107
9108#ifdef PERL_MAD
9109 if (PL_madskills) {
9110 tstart = PL_tokenbuf + !outer;
cd81e915 9111 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9112 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9113 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9114 stuffstart = s - SvPVX(PL_linestr);
9115 }
9116#endif
6a27c188 9117#ifndef PERL_STRICT_CR
f63a84b2
LW
9118 d = strchr(s, '\r');
9119 if (d) {
b464bac0 9120 char * const olds = s;
f63a84b2 9121 s = d;
3280af22 9122 while (s < PL_bufend) {
f63a84b2
LW
9123 if (*s == '\r') {
9124 *d++ = '\n';
9125 if (*++s == '\n')
9126 s++;
9127 }
9128 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9129 *d++ = *s++;
9130 s++;
9131 }
9132 else
9133 *d++ = *s++;
9134 }
9135 *d = '\0';
3280af22 9136 PL_bufend = d;
95a20fc0 9137 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9138 s = olds;
9139 }
9140#endif
5db06880
NC
9141#ifdef PERL_MAD
9142 found_newline = 0;
9143#endif
10edeb5d 9144 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9145 herewas = newSVpvn(s,PL_bufend-s);
9146 }
9147 else {
5db06880
NC
9148#ifdef PERL_MAD
9149 herewas = newSVpvn(s-1,found_newline-s+1);
9150#else
73d840c0
AL
9151 s--;
9152 herewas = newSVpvn(s,found_newline-s);
5db06880 9153#endif
73d840c0 9154 }
5db06880
NC
9155#ifdef PERL_MAD
9156 if (PL_madskills) {
9157 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9158 if (PL_thisstuff)
9159 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9160 else
cd81e915 9161 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9162 }
9163#endif
79072805 9164 s += SvCUR(herewas);
748a9306 9165
5db06880
NC
9166#ifdef PERL_MAD
9167 stuffstart = s - SvPVX(PL_linestr);
9168
9169 if (found_newline)
9170 s--;
9171#endif
9172
7d0a29fe
NC
9173 tmpstr = newSV_type(SVt_PVIV);
9174 SvGROW(tmpstr, 80);
748a9306 9175 if (term == '\'') {
79072805 9176 op_type = OP_CONST;
45977657 9177 SvIV_set(tmpstr, -1);
748a9306
LW
9178 }
9179 else if (term == '`') {
79072805 9180 op_type = OP_BACKTICK;
45977657 9181 SvIV_set(tmpstr, '\\');
748a9306 9182 }
79072805
LW
9183
9184 CLINE;
57843af0 9185 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9186 PL_multi_open = PL_multi_close = '<';
9187 term = *PL_tokenbuf;
0244c3a4 9188 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
9189 char * const bufptr = PL_sublex_info.super_bufptr;
9190 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9191 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9192 s = strchr(bufptr, '\n');
9193 if (!s)
9194 s = bufend;
9195 d = s;
9196 while (s < bufend &&
9197 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9198 if (*s++ == '\n')
57843af0 9199 CopLINE_inc(PL_curcop);
0244c3a4
GS
9200 }
9201 if (s >= bufend) {
eb160463 9202 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9203 missingterm(PL_tokenbuf);
9204 }
9205 sv_setpvn(herewas,bufptr,d-bufptr+1);
9206 sv_setpvn(tmpstr,d+1,s-d);
9207 s += len - 1;
9208 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9209 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9210
9211 s = olds;
9212 goto retval;
9213 }
9214 else if (!outer) {
79072805 9215 d = s;
3280af22
NIS
9216 while (s < PL_bufend &&
9217 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9218 if (*s++ == '\n')
57843af0 9219 CopLINE_inc(PL_curcop);
79072805 9220 }
3280af22 9221 if (s >= PL_bufend) {
eb160463 9222 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9223 missingterm(PL_tokenbuf);
79072805
LW
9224 }
9225 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9226#ifdef PERL_MAD
9227 if (PL_madskills) {
cd81e915
NC
9228 if (PL_thisstuff)
9229 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9230 else
cd81e915 9231 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9232 stuffstart = s - SvPVX(PL_linestr);
9233 }
9234#endif
79072805 9235 s += len - 1;
57843af0 9236 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9237
3280af22
NIS
9238 sv_catpvn(herewas,s,PL_bufend-s);
9239 sv_setsv(PL_linestr,herewas);
9240 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9241 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9242 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9243 }
9244 else
76f68e9b 9245 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9246 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9247#ifdef PERL_MAD
9248 if (PL_madskills) {
9249 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9250 if (PL_thisstuff)
9251 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9252 else
cd81e915 9253 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9254 }
9255#endif
f0e67a1d 9256 PL_bufptr = s;
17cc9359 9257 CopLINE_inc(PL_curcop);
f0e67a1d 9258 if (!outer || !lex_next_chunk(0)) {
eb160463 9259 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9260 missingterm(PL_tokenbuf);
79072805 9261 }
17cc9359 9262 CopLINE_dec(PL_curcop);
f0e67a1d 9263 s = PL_bufptr;
5db06880
NC
9264#ifdef PERL_MAD
9265 stuffstart = s - SvPVX(PL_linestr);
9266#endif
57843af0 9267 CopLINE_inc(PL_curcop);
3280af22 9268 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9269 PL_last_lop = PL_last_uni = NULL;
6a27c188 9270#ifndef PERL_STRICT_CR
3280af22 9271 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9272 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9273 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9274 {
3280af22
NIS
9275 PL_bufend[-2] = '\n';
9276 PL_bufend--;
95a20fc0 9277 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9278 }
3280af22
NIS
9279 else if (PL_bufend[-1] == '\r')
9280 PL_bufend[-1] = '\n';
f63a84b2 9281 }
3280af22
NIS
9282 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9283 PL_bufend[-1] = '\n';
f63a84b2 9284#endif
3280af22 9285 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9286 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9287 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
9288 sv_catsv(PL_linestr,herewas);
9289 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9290 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9291 }
9292 else {
3280af22
NIS
9293 s = PL_bufend;
9294 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9295 }
9296 }
79072805 9297 s++;
0244c3a4 9298retval:
57843af0 9299 PL_multi_end = CopLINE(PL_curcop);
79072805 9300 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9301 SvPV_shrink_to_cur(tmpstr);
79072805 9302 }
8990e307 9303 SvREFCNT_dec(herewas);
2f31ce75 9304 if (!IN_BYTES) {
95a20fc0 9305 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9306 SvUTF8_on(tmpstr);
9307 else if (PL_encoding)
9308 sv_recode_to_utf8(tmpstr, PL_encoding);
9309 }
3280af22 9310 PL_lex_stuff = tmpstr;
6154021b 9311 pl_yylval.ival = op_type;
79072805
LW
9312 return s;
9313}
9314
02aa26ce
NT
9315/* scan_inputsymbol
9316 takes: current position in input buffer
9317 returns: new position in input buffer
6154021b 9318 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9319
9320 This code handles:
9321
9322 <> read from ARGV
9323 <FH> read from filehandle
9324 <pkg::FH> read from package qualified filehandle
9325 <pkg'FH> read from package qualified filehandle
9326 <$fh> read from filehandle in $fh
9327 <*.h> filename glob
9328
9329*/
9330
76e3520e 9331STATIC char *
cea2e8a9 9332S_scan_inputsymbol(pTHX_ char *start)
79072805 9333{
97aff369 9334 dVAR;
02aa26ce 9335 register char *s = start; /* current position in buffer */
1b420867 9336 char *end;
79072805 9337 I32 len;
6136c704
AL
9338 char *d = PL_tokenbuf; /* start of temp holding space */
9339 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9340
7918f24d
NC
9341 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9342
1b420867
GS
9343 end = strchr(s, '\n');
9344 if (!end)
9345 end = PL_bufend;
9346 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9347
9348 /* die if we didn't have space for the contents of the <>,
1b420867 9349 or if it didn't end, or if we see a newline
02aa26ce
NT
9350 */
9351
bb7a0f54 9352 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9353 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9354 if (s >= end)
cea2e8a9 9355 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9356
fc36a67e 9357 s++;
02aa26ce
NT
9358
9359 /* check for <$fh>
9360 Remember, only scalar variables are interpreted as filehandles by
9361 this code. Anything more complex (e.g., <$fh{$num}>) will be
9362 treated as a glob() call.
9363 This code makes use of the fact that except for the $ at the front,
9364 a scalar variable and a filehandle look the same.
9365 */
4633a7c4 9366 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9367
9368 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9369 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 9370 d++;
02aa26ce
NT
9371
9372 /* If we've tried to read what we allow filehandles to look like, and
9373 there's still text left, then it must be a glob() and not a getline.
9374 Use scan_str to pull out the stuff between the <> and treat it
9375 as nothing more than a string.
9376 */
9377
3280af22 9378 if (d - PL_tokenbuf != len) {
6154021b 9379 pl_yylval.ival = OP_GLOB;
5db06880 9380 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9381 if (!s)
cea2e8a9 9382 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9383 return s;
9384 }
395c3793 9385 else {
9b3023bc 9386 bool readline_overriden = FALSE;
6136c704 9387 GV *gv_readline;
9b3023bc 9388 GV **gvp;
02aa26ce 9389 /* we're in a filehandle read situation */
3280af22 9390 d = PL_tokenbuf;
02aa26ce
NT
9391
9392 /* turn <> into <ARGV> */
79072805 9393 if (!len)
689badd5 9394 Copy("ARGV",d,5,char);
02aa26ce 9395
9b3023bc 9396 /* Check whether readline() is overriden */
fafc274c 9397 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9398 if ((gv_readline
ba979b31 9399 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9400 ||
017a3ce5 9401 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9402 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9403 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9404 readline_overriden = TRUE;
9405
02aa26ce
NT
9406 /* if <$fh>, create the ops to turn the variable into a
9407 filehandle
9408 */
79072805 9409 if (*d == '$') {
02aa26ce
NT
9410 /* try to find it in the pad for this block, otherwise find
9411 add symbol table ops
9412 */
f8f98e0a 9413 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 9414 if (tmp != NOT_IN_PAD) {
00b1698f 9415 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9416 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9417 HEK * const stashname = HvNAME_HEK(stash);
9418 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9419 sv_catpvs(sym, "::");
f558d5af
JH
9420 sv_catpv(sym, d+1);
9421 d = SvPVX(sym);
9422 goto intro_sym;
9423 }
9424 else {
6136c704 9425 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9426 o->op_targ = tmp;
9b3023bc
RGS
9427 PL_lex_op = readline_overriden
9428 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9429 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9430 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9431 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9432 }
a0d0e21e
LW
9433 }
9434 else {
f558d5af
JH
9435 GV *gv;
9436 ++d;
9437intro_sym:
9438 gv = gv_fetchpv(d,
9439 (PL_in_eval
9440 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 9441 : GV_ADDMULTI),
f558d5af 9442 SVt_PV);
9b3023bc
RGS
9443 PL_lex_op = readline_overriden
9444 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9445 op_append_elem(OP_LIST,
9b3023bc
RGS
9446 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9447 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9448 : (OP*)newUNOP(OP_READLINE, 0,
9449 newUNOP(OP_RV2SV, 0,
9450 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9451 }
7c6fadd6
RGS
9452 if (!readline_overriden)
9453 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9454 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9455 pl_yylval.ival = OP_NULL;
79072805 9456 }
02aa26ce
NT
9457
9458 /* If it's none of the above, it must be a literal filehandle
9459 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9460 else {
6136c704 9461 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
9462 PL_lex_op = readline_overriden
9463 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9464 op_append_elem(OP_LIST,
9b3023bc
RGS
9465 newGVOP(OP_GV, 0, gv),
9466 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9467 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9468 pl_yylval.ival = OP_NULL;
79072805
LW
9469 }
9470 }
02aa26ce 9471
79072805
LW
9472 return s;
9473}
9474
02aa26ce
NT
9475
9476/* scan_str
9477 takes: start position in buffer
09bef843
SB
9478 keep_quoted preserve \ on the embedded delimiter(s)
9479 keep_delims preserve the delimiters around the string
02aa26ce
NT
9480 returns: position to continue reading from buffer
9481 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9482 updates the read buffer.
9483
9484 This subroutine pulls a string out of the input. It is called for:
9485 q single quotes q(literal text)
9486 ' single quotes 'literal text'
9487 qq double quotes qq(interpolate $here please)
9488 " double quotes "interpolate $here please"
9489 qx backticks qx(/bin/ls -l)
9490 ` backticks `/bin/ls -l`
9491 qw quote words @EXPORT_OK = qw( func() $spam )
9492 m// regexp match m/this/
9493 s/// regexp substitute s/this/that/
9494 tr/// string transliterate tr/this/that/
9495 y/// string transliterate y/this/that/
9496 ($*@) sub prototypes sub foo ($)
09bef843 9497 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9498 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9499
9500 In most of these cases (all but <>, patterns and transliterate)
9501 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9502 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9503 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9504 calls scan_str().
4e553d73 9505
02aa26ce
NT
9506 It skips whitespace before the string starts, and treats the first
9507 character as the delimiter. If the delimiter is one of ([{< then
9508 the corresponding "close" character )]}> is used as the closing
9509 delimiter. It allows quoting of delimiters, and if the string has
9510 balanced delimiters ([{<>}]) it allows nesting.
9511
37fd879b
HS
9512 On success, the SV with the resulting string is put into lex_stuff or,
9513 if that is already non-NULL, into lex_repl. The second case occurs only
9514 when parsing the RHS of the special constructs s/// and tr/// (y///).
9515 For convenience, the terminating delimiter character is stuffed into
9516 SvIVX of the SV.
02aa26ce
NT
9517*/
9518
76e3520e 9519STATIC char *
09bef843 9520S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9521{
97aff369 9522 dVAR;
02aa26ce 9523 SV *sv; /* scalar value: string */
d3fcec1f 9524 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9525 register char *s = start; /* current position in the buffer */
9526 register char term; /* terminating character */
9527 register char *to; /* current position in the sv's data */
9528 I32 brackets = 1; /* bracket nesting level */
89491803 9529 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9530 I32 termcode; /* terminating char. code */
89ebb4a3 9531 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9532 STRLEN termlen; /* length of terminating string */
0331ef07 9533 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9534#ifdef PERL_MAD
9535 int stuffstart;
9536 char *tstart;
9537#endif
02aa26ce 9538
7918f24d
NC
9539 PERL_ARGS_ASSERT_SCAN_STR;
9540
02aa26ce 9541 /* skip space before the delimiter */
29595ff2
NC
9542 if (isSPACE(*s)) {
9543 s = PEEKSPACE(s);
9544 }
02aa26ce 9545
5db06880 9546#ifdef PERL_MAD
cd81e915
NC
9547 if (PL_realtokenstart >= 0) {
9548 stuffstart = PL_realtokenstart;
9549 PL_realtokenstart = -1;
5db06880
NC
9550 }
9551 else
9552 stuffstart = start - SvPVX(PL_linestr);
9553#endif
02aa26ce 9554 /* mark where we are, in case we need to report errors */
79072805 9555 CLINE;
02aa26ce
NT
9556
9557 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9558 term = *s;
220e2d4e
IH
9559 if (!UTF) {
9560 termcode = termstr[0] = term;
9561 termlen = 1;
9562 }
9563 else {
f3b9ce0f 9564 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9565 Copy(s, termstr, termlen, U8);
9566 if (!UTF8_IS_INVARIANT(term))
9567 has_utf8 = TRUE;
9568 }
b1c7b182 9569
02aa26ce 9570 /* mark where we are */
57843af0 9571 PL_multi_start = CopLINE(PL_curcop);
3280af22 9572 PL_multi_open = term;
02aa26ce
NT
9573
9574 /* find corresponding closing delimiter */
93a17b20 9575 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9576 termcode = termstr[0] = term = tmps[5];
9577
3280af22 9578 PL_multi_close = term;
79072805 9579
561b68a9
SH
9580 /* create a new SV to hold the contents. 79 is the SV's initial length.
9581 What a random number. */
7d0a29fe
NC
9582 sv = newSV_type(SVt_PVIV);
9583 SvGROW(sv, 80);
45977657 9584 SvIV_set(sv, termcode);
a0d0e21e 9585 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9586
9587 /* move past delimiter and try to read a complete string */
09bef843 9588 if (keep_delims)
220e2d4e
IH
9589 sv_catpvn(sv, s, termlen);
9590 s += termlen;
5db06880
NC
9591#ifdef PERL_MAD
9592 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9593 if (!PL_thisopen && !keep_delims) {
9594 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9595 stuffstart = s - SvPVX(PL_linestr);
9596 }
9597#endif
93a17b20 9598 for (;;) {
220e2d4e
IH
9599 if (PL_encoding && !UTF) {
9600 bool cont = TRUE;
9601
9602 while (cont) {
95a20fc0 9603 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9604 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9605 &offset, (char*)termstr, termlen);
6136c704
AL
9606 const char * const ns = SvPVX_const(PL_linestr) + offset;
9607 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9608
9609 for (; s < ns; s++) {
9610 if (*s == '\n' && !PL_rsfp)
9611 CopLINE_inc(PL_curcop);
9612 }
9613 if (!found)
9614 goto read_more_line;
9615 else {
9616 /* handle quoted delimiters */
52327caf 9617 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9618 const char *t;
95a20fc0 9619 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9620 t--;
9621 if ((svlast-1 - t) % 2) {
9622 if (!keep_quoted) {
9623 *(svlast-1) = term;
9624 *svlast = '\0';
9625 SvCUR_set(sv, SvCUR(sv) - 1);
9626 }
9627 continue;
9628 }
9629 }
9630 if (PL_multi_open == PL_multi_close) {
9631 cont = FALSE;
9632 }
9633 else {
f54cb97a
AL
9634 const char *t;
9635 char *w;
0331ef07 9636 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9637 /* At here, all closes are "was quoted" one,
9638 so we don't check PL_multi_close. */
9639 if (*t == '\\') {
9640 if (!keep_quoted && *(t+1) == PL_multi_open)
9641 t++;
9642 else
9643 *w++ = *t++;
9644 }
9645 else if (*t == PL_multi_open)
9646 brackets++;
9647
9648 *w = *t;
9649 }
9650 if (w < t) {
9651 *w++ = term;
9652 *w = '\0';
95a20fc0 9653 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9654 }
0331ef07 9655 last_off = w - SvPVX(sv);
220e2d4e
IH
9656 if (--brackets <= 0)
9657 cont = FALSE;
9658 }
9659 }
9660 }
9661 if (!keep_delims) {
9662 SvCUR_set(sv, SvCUR(sv) - 1);
9663 *SvEND(sv) = '\0';
9664 }
9665 break;
9666 }
9667
02aa26ce 9668 /* extend sv if need be */
3280af22 9669 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9670 /* set 'to' to the next character in the sv's string */
463ee0b2 9671 to = SvPVX(sv)+SvCUR(sv);
09bef843 9672
02aa26ce 9673 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9674 if (PL_multi_open == PL_multi_close) {
9675 for (; s < PL_bufend; s++,to++) {
02aa26ce 9676 /* embedded newlines increment the current line number */
3280af22 9677 if (*s == '\n' && !PL_rsfp)
57843af0 9678 CopLINE_inc(PL_curcop);
02aa26ce 9679 /* handle quoted delimiters */
3280af22 9680 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9681 if (!keep_quoted && s[1] == term)
a0d0e21e 9682 s++;
02aa26ce 9683 /* any other quotes are simply copied straight through */
a0d0e21e
LW
9684 else
9685 *to++ = *s++;
9686 }
02aa26ce
NT
9687 /* terminate when run out of buffer (the for() condition), or
9688 have found the terminator */
220e2d4e
IH
9689 else if (*s == term) {
9690 if (termlen == 1)
9691 break;
f3b9ce0f 9692 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
9693 break;
9694 }
63cd0674 9695 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9696 has_utf8 = TRUE;
93a17b20
LW
9697 *to = *s;
9698 }
9699 }
02aa26ce
NT
9700
9701 /* if the terminator isn't the same as the start character (e.g.,
9702 matched brackets), we have to allow more in the quoting, and
9703 be prepared for nested brackets.
9704 */
93a17b20 9705 else {
02aa26ce 9706 /* read until we run out of string, or we find the terminator */
3280af22 9707 for (; s < PL_bufend; s++,to++) {
02aa26ce 9708 /* embedded newlines increment the line count */
3280af22 9709 if (*s == '\n' && !PL_rsfp)
57843af0 9710 CopLINE_inc(PL_curcop);
02aa26ce 9711 /* backslashes can escape the open or closing characters */
3280af22 9712 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
9713 if (!keep_quoted &&
9714 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
9715 s++;
9716 else
9717 *to++ = *s++;
9718 }
02aa26ce 9719 /* allow nested opens and closes */
3280af22 9720 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 9721 break;
3280af22 9722 else if (*s == PL_multi_open)
93a17b20 9723 brackets++;
63cd0674 9724 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9725 has_utf8 = TRUE;
93a17b20
LW
9726 *to = *s;
9727 }
9728 }
02aa26ce 9729 /* terminate the copied string and update the sv's end-of-string */
93a17b20 9730 *to = '\0';
95a20fc0 9731 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 9732
02aa26ce
NT
9733 /*
9734 * this next chunk reads more into the buffer if we're not done yet
9735 */
9736
b1c7b182
GS
9737 if (s < PL_bufend)
9738 break; /* handle case where we are done yet :-) */
79072805 9739
6a27c188 9740#ifndef PERL_STRICT_CR
95a20fc0 9741 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
9742 if ((to[-2] == '\r' && to[-1] == '\n') ||
9743 (to[-2] == '\n' && to[-1] == '\r'))
9744 {
f63a84b2
LW
9745 to[-2] = '\n';
9746 to--;
95a20fc0 9747 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
9748 }
9749 else if (to[-1] == '\r')
9750 to[-1] = '\n';
9751 }
95a20fc0 9752 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
9753 to[-1] = '\n';
9754#endif
9755
220e2d4e 9756 read_more_line:
02aa26ce
NT
9757 /* if we're out of file, or a read fails, bail and reset the current
9758 line marker so we can report where the unterminated string began
9759 */
5db06880
NC
9760#ifdef PERL_MAD
9761 if (PL_madskills) {
c35e046a 9762 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9763 if (PL_thisstuff)
9764 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9765 else
cd81e915 9766 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9767 }
9768#endif
f0e67a1d
Z
9769 CopLINE_inc(PL_curcop);
9770 PL_bufptr = PL_bufend;
9771 if (!lex_next_chunk(0)) {
c07a80fd 9772 sv_free(sv);
eb160463 9773 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 9774 return NULL;
79072805 9775 }
f0e67a1d 9776 s = PL_bufptr;
5db06880
NC
9777#ifdef PERL_MAD
9778 stuffstart = 0;
9779#endif
378cc40b 9780 }
4e553d73 9781
02aa26ce
NT
9782 /* at this point, we have successfully read the delimited string */
9783
220e2d4e 9784 if (!PL_encoding || UTF) {
5db06880
NC
9785#ifdef PERL_MAD
9786 if (PL_madskills) {
c35e046a 9787 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 9788 const int len = s - tstart;
cd81e915 9789 if (PL_thisstuff)
c35e046a 9790 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9791 else
c35e046a 9792 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9793 if (!PL_thisclose && !keep_delims)
9794 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
9795 }
9796#endif
9797
220e2d4e
IH
9798 if (keep_delims)
9799 sv_catpvn(sv, s, termlen);
9800 s += termlen;
9801 }
5db06880
NC
9802#ifdef PERL_MAD
9803 else {
9804 if (PL_madskills) {
c35e046a
AL
9805 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9806 const int len = s - tstart - termlen;
cd81e915 9807 if (PL_thisstuff)
c35e046a 9808 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9809 else
c35e046a 9810 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9811 if (!PL_thisclose && !keep_delims)
9812 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
9813 }
9814 }
9815#endif
220e2d4e 9816 if (has_utf8 || PL_encoding)
b1c7b182 9817 SvUTF8_on(sv);
d0063567 9818
57843af0 9819 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
9820
9821 /* if we allocated too much space, give some back */
93a17b20
LW
9822 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9823 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 9824 SvPV_renew(sv, SvLEN(sv));
79072805 9825 }
02aa26ce
NT
9826
9827 /* decide whether this is the first or second quoted string we've read
9828 for this op
9829 */
4e553d73 9830
3280af22
NIS
9831 if (PL_lex_stuff)
9832 PL_lex_repl = sv;
79072805 9833 else
3280af22 9834 PL_lex_stuff = sv;
378cc40b
LW
9835 return s;
9836}
9837
02aa26ce
NT
9838/*
9839 scan_num
9840 takes: pointer to position in buffer
9841 returns: pointer to new position in buffer
6154021b 9842 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
9843
9844 Read a number in any of the formats that Perl accepts:
9845
7fd134d9
JH
9846 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9847 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
9848 0b[01](_?[01])*
9849 0[0-7](_?[0-7])*
9850 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 9851
3280af22 9852 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
9853 thing it reads.
9854
9855 If it reads a number without a decimal point or an exponent, it will
9856 try converting the number to an integer and see if it can do so
9857 without loss of precision.
9858*/
4e553d73 9859
378cc40b 9860char *
bfed75c6 9861Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 9862{
97aff369 9863 dVAR;
bfed75c6 9864 register const char *s = start; /* current position in buffer */
02aa26ce
NT
9865 register char *d; /* destination in temp buffer */
9866 register char *e; /* end of temp buffer */
86554af2 9867 NV nv; /* number read, as a double */
a0714e2c 9868 SV *sv = NULL; /* place to put the converted number */
a86a20aa 9869 bool floatit; /* boolean: int or float? */
cbbf8932 9870 const char *lastub = NULL; /* position of last underbar */
bfed75c6 9871 static char const number_too_long[] = "Number too long";
378cc40b 9872
7918f24d
NC
9873 PERL_ARGS_ASSERT_SCAN_NUM;
9874
02aa26ce
NT
9875 /* We use the first character to decide what type of number this is */
9876
378cc40b 9877 switch (*s) {
79072805 9878 default:
cea2e8a9 9879 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 9880
02aa26ce 9881 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 9882 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
9883 case '0':
9884 {
02aa26ce
NT
9885 /* variables:
9886 u holds the "number so far"
4f19785b
WSI
9887 shift the power of 2 of the base
9888 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
9889 overflowed was the number more than we can hold?
9890
9891 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
9892 we in octal/hex/binary?" indicator to disallow hex characters
9893 when in octal mode.
02aa26ce 9894 */
9e24b6e2
JH
9895 NV n = 0.0;
9896 UV u = 0;
79072805 9897 I32 shift;
9e24b6e2 9898 bool overflowed = FALSE;
61f33854 9899 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
9900 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
9901 static const char* const bases[5] =
9902 { "", "binary", "", "octal", "hexadecimal" };
9903 static const char* const Bases[5] =
9904 { "", "Binary", "", "Octal", "Hexadecimal" };
9905 static const char* const maxima[5] =
9906 { "",
9907 "0b11111111111111111111111111111111",
9908 "",
9909 "037777777777",
9910 "0xffffffff" };
bfed75c6 9911 const char *base, *Base, *max;
378cc40b 9912
02aa26ce 9913 /* check for hex */
a674e8db 9914 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
9915 shift = 4;
9916 s += 2;
61f33854 9917 just_zero = FALSE;
a674e8db 9918 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
9919 shift = 1;
9920 s += 2;
61f33854 9921 just_zero = FALSE;
378cc40b 9922 }
02aa26ce 9923 /* check for a decimal in disguise */
b78218b7 9924 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 9925 goto decimal;
02aa26ce 9926 /* so it must be octal */
928753ea 9927 else {
378cc40b 9928 shift = 3;
928753ea
JH
9929 s++;
9930 }
9931
9932 if (*s == '_') {
a2a5de95 9933 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
9934 "Misplaced _ in number");
9935 lastub = s++;
9936 }
9e24b6e2
JH
9937
9938 base = bases[shift];
9939 Base = Bases[shift];
9940 max = maxima[shift];
02aa26ce 9941
4f19785b 9942 /* read the rest of the number */
378cc40b 9943 for (;;) {
9e24b6e2 9944 /* x is used in the overflow test,
893fe2c2 9945 b is the digit we're adding on. */
9e24b6e2 9946 UV x, b;
55497cff 9947
378cc40b 9948 switch (*s) {
02aa26ce
NT
9949
9950 /* if we don't mention it, we're done */
378cc40b
LW
9951 default:
9952 goto out;
02aa26ce 9953
928753ea 9954 /* _ are ignored -- but warned about if consecutive */
de3bb511 9955 case '_':
a2a5de95
NC
9956 if (lastub && s == lastub + 1)
9957 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
9958 "Misplaced _ in number");
928753ea 9959 lastub = s++;
de3bb511 9960 break;
02aa26ce
NT
9961
9962 /* 8 and 9 are not octal */
378cc40b 9963 case '8': case '9':
4f19785b 9964 if (shift == 3)
cea2e8a9 9965 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 9966 /* FALL THROUGH */
02aa26ce
NT
9967
9968 /* octal digits */
4f19785b 9969 case '2': case '3': case '4':
378cc40b 9970 case '5': case '6': case '7':
4f19785b 9971 if (shift == 1)
cea2e8a9 9972 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
9973 /* FALL THROUGH */
9974
9975 case '0': case '1':
02aa26ce 9976 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 9977 goto digit;
02aa26ce
NT
9978
9979 /* hex digits */
378cc40b
LW
9980 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
9981 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 9982 /* make sure they said 0x */
378cc40b
LW
9983 if (shift != 4)
9984 goto out;
55497cff 9985 b = (*s++ & 7) + 9;
02aa26ce
NT
9986
9987 /* Prepare to put the digit we have onto the end
9988 of the number so far. We check for overflows.
9989 */
9990
55497cff 9991 digit:
61f33854 9992 just_zero = FALSE;
9e24b6e2
JH
9993 if (!overflowed) {
9994 x = u << shift; /* make room for the digit */
9995
9996 if ((x >> shift) != u
9997 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
9998 overflowed = TRUE;
9999 n = (NV) u;
9b387841
NC
10000 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10001 "Integer overflow in %s number",
10002 base);
9e24b6e2
JH
10003 } else
10004 u = x | b; /* add the digit to the end */
10005 }
10006 if (overflowed) {
10007 n *= nvshift[shift];
10008 /* If an NV has not enough bits in its
10009 * mantissa to represent an UV this summing of
10010 * small low-order numbers is a waste of time
10011 * (because the NV cannot preserve the
10012 * low-order bits anyway): we could just
10013 * remember when did we overflow and in the
10014 * end just multiply n by the right
10015 * amount. */
10016 n += (NV) b;
55497cff 10017 }
378cc40b
LW
10018 break;
10019 }
10020 }
02aa26ce
NT
10021
10022 /* if we get here, we had success: make a scalar value from
10023 the number.
10024 */
378cc40b 10025 out:
928753ea
JH
10026
10027 /* final misplaced underbar check */
10028 if (s[-1] == '_') {
a2a5de95 10029 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10030 }
10031
9e24b6e2 10032 if (overflowed) {
a2a5de95
NC
10033 if (n > 4294967295.0)
10034 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10035 "%s number > %s non-portable",
10036 Base, max);
b081dd7e 10037 sv = newSVnv(n);
9e24b6e2
JH
10038 }
10039 else {
15041a67 10040#if UVSIZE > 4
a2a5de95
NC
10041 if (u > 0xffffffff)
10042 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10043 "%s number > %s non-portable",
10044 Base, max);
2cc4c2dc 10045#endif
b081dd7e 10046 sv = newSVuv(u);
9e24b6e2 10047 }
61f33854 10048 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10049 sv = new_constant(start, s - start, "integer",
eb0d8d16 10050 sv, NULL, NULL, 0);
61f33854 10051 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10052 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10053 }
10054 break;
02aa26ce
NT
10055
10056 /*
10057 handle decimal numbers.
10058 we're also sent here when we read a 0 as the first digit
10059 */
378cc40b
LW
10060 case '1': case '2': case '3': case '4': case '5':
10061 case '6': case '7': case '8': case '9': case '.':
10062 decimal:
3280af22
NIS
10063 d = PL_tokenbuf;
10064 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10065 floatit = FALSE;
02aa26ce
NT
10066
10067 /* read next group of digits and _ and copy into d */
de3bb511 10068 while (isDIGIT(*s) || *s == '_') {
4e553d73 10069 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10070 if -w is on
10071 */
93a17b20 10072 if (*s == '_') {
a2a5de95
NC
10073 if (lastub && s == lastub + 1)
10074 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10075 "Misplaced _ in number");
928753ea 10076 lastub = s++;
93a17b20 10077 }
fc36a67e 10078 else {
02aa26ce 10079 /* check for end of fixed-length buffer */
fc36a67e 10080 if (d >= e)
cea2e8a9 10081 Perl_croak(aTHX_ number_too_long);
02aa26ce 10082 /* if we're ok, copy the character */
378cc40b 10083 *d++ = *s++;
fc36a67e 10084 }
378cc40b 10085 }
02aa26ce
NT
10086
10087 /* final misplaced underbar check */
928753ea 10088 if (lastub && s == lastub + 1) {
a2a5de95 10089 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10090 }
02aa26ce
NT
10091
10092 /* read a decimal portion if there is one. avoid
10093 3..5 being interpreted as the number 3. followed
10094 by .5
10095 */
2f3197b3 10096 if (*s == '.' && s[1] != '.') {
79072805 10097 floatit = TRUE;
378cc40b 10098 *d++ = *s++;
02aa26ce 10099
928753ea 10100 if (*s == '_') {
a2a5de95
NC
10101 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10102 "Misplaced _ in number");
928753ea
JH
10103 lastub = s;
10104 }
10105
10106 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10107 */
fc36a67e 10108 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10109 /* fixed length buffer check */
fc36a67e 10110 if (d >= e)
cea2e8a9 10111 Perl_croak(aTHX_ number_too_long);
928753ea 10112 if (*s == '_') {
a2a5de95
NC
10113 if (lastub && s == lastub + 1)
10114 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10115 "Misplaced _ in number");
928753ea
JH
10116 lastub = s;
10117 }
10118 else
fc36a67e 10119 *d++ = *s;
378cc40b 10120 }
928753ea
JH
10121 /* fractional part ending in underbar? */
10122 if (s[-1] == '_') {
a2a5de95
NC
10123 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10124 "Misplaced _ in number");
928753ea 10125 }
dd629d5b
GS
10126 if (*s == '.' && isDIGIT(s[1])) {
10127 /* oops, it's really a v-string, but without the "v" */
f4758303 10128 s = start;
dd629d5b
GS
10129 goto vstring;
10130 }
378cc40b 10131 }
02aa26ce
NT
10132
10133 /* read exponent part, if present */
3792a11b 10134 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10135 floatit = TRUE;
10136 s++;
02aa26ce
NT
10137
10138 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10139 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10140
7fd134d9
JH
10141 /* stray preinitial _ */
10142 if (*s == '_') {
a2a5de95
NC
10143 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10144 "Misplaced _ in number");
7fd134d9
JH
10145 lastub = s++;
10146 }
10147
02aa26ce 10148 /* allow positive or negative exponent */
378cc40b
LW
10149 if (*s == '+' || *s == '-')
10150 *d++ = *s++;
02aa26ce 10151
7fd134d9
JH
10152 /* stray initial _ */
10153 if (*s == '_') {
a2a5de95
NC
10154 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10155 "Misplaced _ in number");
7fd134d9
JH
10156 lastub = s++;
10157 }
10158
7fd134d9
JH
10159 /* read digits of exponent */
10160 while (isDIGIT(*s) || *s == '_') {
10161 if (isDIGIT(*s)) {
10162 if (d >= e)
10163 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10164 *d++ = *s++;
7fd134d9
JH
10165 }
10166 else {
041457d9 10167 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10168 (!isDIGIT(s[1]) && s[1] != '_')))
10169 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10170 "Misplaced _ in number");
b3b48e3e 10171 lastub = s++;
7fd134d9 10172 }
7fd134d9 10173 }
378cc40b 10174 }
02aa26ce 10175
02aa26ce 10176
0b7fceb9 10177 /*
58bb9ec3
NC
10178 We try to do an integer conversion first if no characters
10179 indicating "float" have been found.
0b7fceb9
MU
10180 */
10181
10182 if (!floatit) {
58bb9ec3 10183 UV uv;
6136c704 10184 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10185
10186 if (flags == IS_NUMBER_IN_UV) {
10187 if (uv <= IV_MAX)
b081dd7e 10188 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10189 else
b081dd7e 10190 sv = newSVuv(uv);
58bb9ec3
NC
10191 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10192 if (uv <= (UV) IV_MIN)
b081dd7e 10193 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10194 else
10195 floatit = TRUE;
10196 } else
10197 floatit = TRUE;
10198 }
0b7fceb9 10199 if (floatit) {
58bb9ec3
NC
10200 /* terminate the string */
10201 *d = '\0';
86554af2 10202 nv = Atof(PL_tokenbuf);
b081dd7e 10203 sv = newSVnv(nv);
86554af2 10204 }
86554af2 10205
eb0d8d16
NC
10206 if ( floatit
10207 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10208 const char *const key = floatit ? "float" : "integer";
10209 const STRLEN keylen = floatit ? 5 : 7;
10210 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10211 key, keylen, sv, NULL, NULL, 0);
10212 }
378cc40b 10213 break;
0b7fceb9 10214
e312add1 10215 /* if it starts with a v, it could be a v-string */
a7cb1f99 10216 case 'v':
dd629d5b 10217vstring:
561b68a9 10218 sv = newSV(5); /* preallocate storage space */
65b06e02 10219 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10220 break;
79072805 10221 }
a687059c 10222
02aa26ce
NT
10223 /* make the op for the constant and return */
10224
a86a20aa 10225 if (sv)
b73d6f50 10226 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10227 else
5f66b61c 10228 lvalp->opval = NULL;
a687059c 10229
73d840c0 10230 return (char *)s;
378cc40b
LW
10231}
10232
76e3520e 10233STATIC char *
cea2e8a9 10234S_scan_formline(pTHX_ register char *s)
378cc40b 10235{
97aff369 10236 dVAR;
79072805 10237 register char *eol;
378cc40b 10238 register char *t;
6136c704 10239 SV * const stuff = newSVpvs("");
79072805 10240 bool needargs = FALSE;
c5ee2135 10241 bool eofmt = FALSE;
5db06880
NC
10242#ifdef PERL_MAD
10243 char *tokenstart = s;
4f61fd4b
JC
10244 SV* savewhite = NULL;
10245
5db06880 10246 if (PL_madskills) {
cd81e915
NC
10247 savewhite = PL_thiswhite;
10248 PL_thiswhite = 0;
5db06880
NC
10249 }
10250#endif
378cc40b 10251
7918f24d
NC
10252 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10253
79072805 10254 while (!needargs) {
a1b95068 10255 if (*s == '.') {
c35e046a 10256 t = s+1;
51882d45 10257#ifdef PERL_STRICT_CR
c35e046a
AL
10258 while (SPACE_OR_TAB(*t))
10259 t++;
51882d45 10260#else
c35e046a
AL
10261 while (SPACE_OR_TAB(*t) || *t == '\r')
10262 t++;
51882d45 10263#endif
c5ee2135
WL
10264 if (*t == '\n' || t == PL_bufend) {
10265 eofmt = TRUE;
79072805 10266 break;
c5ee2135 10267 }
79072805 10268 }
3280af22 10269 if (PL_in_eval && !PL_rsfp) {
07409e01 10270 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10271 if (!eol++)
3280af22 10272 eol = PL_bufend;
0f85fab0
LW
10273 }
10274 else
3280af22 10275 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10276 if (*s != '#') {
a0d0e21e
LW
10277 for (t = s; t < eol; t++) {
10278 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10279 needargs = FALSE;
10280 goto enough; /* ~~ must be first line in formline */
378cc40b 10281 }
a0d0e21e
LW
10282 if (*t == '@' || *t == '^')
10283 needargs = TRUE;
378cc40b 10284 }
7121b347
MG
10285 if (eol > s) {
10286 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10287#ifndef PERL_STRICT_CR
7121b347
MG
10288 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10289 char *end = SvPVX(stuff) + SvCUR(stuff);
10290 end[-2] = '\n';
10291 end[-1] = '\0';
b162af07 10292 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10293 }
2dc4c65b 10294#endif
7121b347
MG
10295 }
10296 else
10297 break;
79072805 10298 }
95a20fc0 10299 s = (char*)eol;
3280af22 10300 if (PL_rsfp) {
f0e67a1d 10301 bool got_some;
5db06880
NC
10302#ifdef PERL_MAD
10303 if (PL_madskills) {
cd81e915
NC
10304 if (PL_thistoken)
10305 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10306 else
cd81e915 10307 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10308 }
10309#endif
f0e67a1d
Z
10310 PL_bufptr = PL_bufend;
10311 CopLINE_inc(PL_curcop);
10312 got_some = lex_next_chunk(0);
10313 CopLINE_dec(PL_curcop);
10314 s = PL_bufptr;
5db06880 10315#ifdef PERL_MAD
f0e67a1d 10316 tokenstart = PL_bufptr;
5db06880 10317#endif
f0e67a1d 10318 if (!got_some)
378cc40b 10319 break;
378cc40b 10320 }
463ee0b2 10321 incline(s);
79072805 10322 }
a0d0e21e
LW
10323 enough:
10324 if (SvCUR(stuff)) {
3280af22 10325 PL_expect = XTERM;
79072805 10326 if (needargs) {
3280af22 10327 PL_lex_state = LEX_NORMAL;
cd81e915 10328 start_force(PL_curforce);
9ded7720 10329 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10330 force_next(',');
10331 }
a0d0e21e 10332 else
3280af22 10333 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10334 if (!IN_BYTES) {
95a20fc0 10335 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10336 SvUTF8_on(stuff);
10337 else if (PL_encoding)
10338 sv_recode_to_utf8(stuff, PL_encoding);
10339 }
cd81e915 10340 start_force(PL_curforce);
9ded7720 10341 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10342 force_next(THING);
cd81e915 10343 start_force(PL_curforce);
9ded7720 10344 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10345 force_next(LSTOP);
378cc40b 10346 }
79072805 10347 else {
8990e307 10348 SvREFCNT_dec(stuff);
c5ee2135
WL
10349 if (eofmt)
10350 PL_lex_formbrack = 0;
3280af22 10351 PL_bufptr = s;
79072805 10352 }
5db06880
NC
10353#ifdef PERL_MAD
10354 if (PL_madskills) {
cd81e915
NC
10355 if (PL_thistoken)
10356 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10357 else
cd81e915
NC
10358 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10359 PL_thiswhite = savewhite;
5db06880
NC
10360 }
10361#endif
79072805 10362 return s;
378cc40b 10363}
a687059c 10364
ba6d6ac9 10365I32
864dbfa3 10366Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10367{
97aff369 10368 dVAR;
a3b680e6 10369 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10370 CV* const outsidecv = PL_compcv;
8990e307 10371
3280af22
NIS
10372 if (PL_compcv) {
10373 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10374 }
7766f137 10375 SAVEI32(PL_subline);
3280af22 10376 save_item(PL_subname);
3280af22 10377 SAVESPTR(PL_compcv);
3280af22 10378
ea726b52 10379 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10380 CvFLAGS(PL_compcv) |= flags;
10381
57843af0 10382 PL_subline = CopLINE(PL_curcop);
dd2155a4 10383 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10384 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10385 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10386
8990e307
LW
10387 return oldsavestack_ix;
10388}
10389
084592ab
CN
10390#ifdef __SC__
10391#pragma segment Perl_yylex
10392#endif
af41e527
NC
10393static int
10394S_yywarn(pTHX_ const char *const s)
8990e307 10395{
97aff369 10396 dVAR;
7918f24d
NC
10397
10398 PERL_ARGS_ASSERT_YYWARN;
10399
faef0170 10400 PL_in_eval |= EVAL_WARNONLY;
748a9306 10401 yyerror(s);
faef0170 10402 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10403 return 0;
8990e307
LW
10404}
10405
10406int
15f169a1 10407Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10408{
97aff369 10409 dVAR;
bfed75c6
AL
10410 const char *where = NULL;
10411 const char *context = NULL;
68dc0745 10412 int contlen = -1;
46fc3d4c 10413 SV *msg;
5912531f 10414 int yychar = PL_parser->yychar;
463ee0b2 10415
7918f24d
NC
10416 PERL_ARGS_ASSERT_YYERROR;
10417
3280af22 10418 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10419 where = "at EOF";
8bcfe651
TM
10420 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10421 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10422 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10423 /*
10424 Only for NetWare:
10425 The code below is removed for NetWare because it abends/crashes on NetWare
10426 when the script has error such as not having the closing quotes like:
10427 if ($var eq "value)
10428 Checking of white spaces is anyway done in NetWare code.
10429 */
10430#ifndef NETWARE
3280af22
NIS
10431 while (isSPACE(*PL_oldoldbufptr))
10432 PL_oldoldbufptr++;
f355267c 10433#endif
3280af22
NIS
10434 context = PL_oldoldbufptr;
10435 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10436 }
8bcfe651
TM
10437 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10438 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10439 /*
10440 Only for NetWare:
10441 The code below is removed for NetWare because it abends/crashes on NetWare
10442 when the script has error such as not having the closing quotes like:
10443 if ($var eq "value)
10444 Checking of white spaces is anyway done in NetWare code.
10445 */
10446#ifndef NETWARE
3280af22
NIS
10447 while (isSPACE(*PL_oldbufptr))
10448 PL_oldbufptr++;
f355267c 10449#endif
3280af22
NIS
10450 context = PL_oldbufptr;
10451 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10452 }
10453 else if (yychar > 255)
68dc0745 10454 where = "next token ???";
12fbd33b 10455 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10456 if (PL_lex_state == LEX_NORMAL ||
10457 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10458 where = "at end of line";
3280af22 10459 else if (PL_lex_inpat)
68dc0745 10460 where = "within pattern";
463ee0b2 10461 else
68dc0745 10462 where = "within string";
463ee0b2 10463 }
46fc3d4c 10464 else {
84bafc02 10465 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10466 if (yychar < 32)
cea2e8a9 10467 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10468 else if (isPRINT_LC(yychar)) {
88c9ea1e 10469 const char string = yychar;
5e7aa789
NC
10470 sv_catpvn(where_sv, &string, 1);
10471 }
463ee0b2 10472 else
cea2e8a9 10473 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10474 where = SvPVX_const(where_sv);
463ee0b2 10475 }
46fc3d4c 10476 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10477 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10478 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10479 if (context)
cea2e8a9 10480 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10481 else
cea2e8a9 10482 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10483 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10484 Perl_sv_catpvf(aTHX_ msg,
57def98f 10485 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10486 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10487 PL_multi_end = 0;
a0d0e21e 10488 }
500960a6 10489 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10490 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10491 }
463ee0b2 10492 else
5a844595 10493 qerror(msg);
c7d6bfb2
GS
10494 if (PL_error_count >= 10) {
10495 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10496 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10497 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10498 else
10499 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10500 OutCopFILE(PL_curcop));
c7d6bfb2 10501 }
3280af22 10502 PL_in_my = 0;
5c284bb0 10503 PL_in_my_stash = NULL;
463ee0b2
LW
10504 return 0;
10505}
084592ab
CN
10506#ifdef __SC__
10507#pragma segment Main
10508#endif
4e35701f 10509
b250498f 10510STATIC char*
3ae08724 10511S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10512{
97aff369 10513 dVAR;
f54cb97a 10514 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10515
10516 PERL_ARGS_ASSERT_SWALLOW_BOM;
10517
7aa207d6 10518 switch (s[0]) {
4e553d73
NIS
10519 case 0xFF:
10520 if (s[1] == 0xFE) {
ee6ba15d 10521 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10522 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 10523 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10524#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10525 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10526 s += 2;
dea0fc0b 10527 if (PL_bufend > (char*)s) {
81a923f4 10528 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10529 }
b250498f 10530#else
ee6ba15d 10531 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10532#endif
01ec43d0
GS
10533 }
10534 break;
78ae23f5 10535 case 0xFE:
7aa207d6 10536 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10537#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10538 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10539 s += 2;
10540 if (PL_bufend > (char *)s) {
81a923f4 10541 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10542 }
b250498f 10543#else
ee6ba15d 10544 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10545#endif
01ec43d0
GS
10546 }
10547 break;
3ae08724
GS
10548 case 0xEF:
10549 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10550 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10551 s += 3; /* UTF-8 */
10552 }
10553 break;
10554 case 0:
7aa207d6
JH
10555 if (slen > 3) {
10556 if (s[1] == 0) {
10557 if (s[2] == 0xFE && s[3] == 0xFF) {
10558 /* UTF-32 big-endian */
ee6ba15d 10559 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10560 }
10561 }
10562 else if (s[2] == 0 && s[3] != 0) {
10563 /* Leading bytes
10564 * 00 xx 00 xx
10565 * are a good indicator of UTF-16BE. */
ee6ba15d 10566#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10567 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10568 s = add_utf16_textfilter(s, FALSE);
10569#else
10570 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10571#endif
7aa207d6 10572 }
01ec43d0 10573 }
e294cc5d
JH
10574#ifdef EBCDIC
10575 case 0xDD:
10576 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10577 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10578 s += 4; /* UTF-8 */
10579 }
10580 break;
10581#endif
10582
7aa207d6
JH
10583 default:
10584 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10585 /* Leading bytes
10586 * xx 00 xx 00
10587 * are a good indicator of UTF-16LE. */
ee6ba15d 10588#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10589 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10590 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
10591#else
10592 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10593#endif
7aa207d6 10594 }
01ec43d0 10595 }
b8f84bb2 10596 return (char*)s;
b250498f 10597}
4755096e 10598
6e3aabd6
GS
10599
10600#ifndef PERL_NO_UTF16_FILTER
10601static I32
a28af015 10602S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10603{
97aff369 10604 dVAR;
f3040f2c 10605 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10606 /* We re-use this each time round, throwing the contents away before we
10607 return. */
2a773401 10608 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10609 SV *const utf8_buffer = filter;
c28d6105 10610 IV status = IoPAGE(filter);
f2338a2e 10611 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10612 I32 retval;
c8b0cbae 10613
c85ae797
NC
10614 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10615
c8b0cbae
NC
10616 /* As we're automatically added, at the lowest level, and hence only called
10617 from this file, we can be sure that we're not called in block mode. Hence
10618 don't bother writing code to deal with block mode. */
10619 if (maxlen) {
10620 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10621 }
c28d6105
NC
10622 if (status < 0) {
10623 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10624 }
1de9afcd 10625 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10626 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10627 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10628 reverse ? 'l' : 'b', idx, maxlen, status,
10629 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10630
10631 while (1) {
10632 STRLEN chars;
10633 STRLEN have;
dea0fc0b 10634 I32 newlen;
2a773401 10635 U8 *end;
c28d6105
NC
10636 /* First, look in our buffer of existing UTF-8 data: */
10637 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10638
10639 if (nl) {
10640 ++nl;
10641 } else if (status == 0) {
10642 /* EOF */
10643 IoPAGE(filter) = 0;
10644 nl = SvEND(utf8_buffer);
10645 }
10646 if (nl) {
d2d1d4de
NC
10647 STRLEN got = nl - SvPVX(utf8_buffer);
10648 /* Did we have anything to append? */
10649 retval = got != 0;
10650 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10651 /* Everything else in this code works just fine if SVp_POK isn't
10652 set. This, however, needs it, and we need it to work, else
10653 we loop infinitely because the buffer is never consumed. */
10654 sv_chop(utf8_buffer, nl);
10655 break;
10656 }
ba77e4cc 10657
c28d6105
NC
10658 /* OK, not a complete line there, so need to read some more UTF-16.
10659 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10660 while (1) {
10661 if (status <= 0)
10662 break;
10663 if (SvCUR(utf16_buffer) >= 2) {
10664 /* Location of the high octet of the last complete code point.
10665 Gosh, UTF-16 is a pain. All the benefits of variable length,
10666 *coupled* with all the benefits of partial reads and
10667 endianness. */
10668 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10669 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10670
10671 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10672 break;
10673 }
10674
10675 /* We have the first half of a surrogate. Read more. */
10676 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10677 }
c28d6105 10678
c28d6105
NC
10679 status = FILTER_READ(idx + 1, utf16_buffer,
10680 160 + (SvCUR(utf16_buffer) & 1));
10681 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 10682 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
10683 if (status < 0) {
10684 /* Error */
10685 IoPAGE(filter) = status;
10686 return status;
10687 }
10688 }
10689
10690 chars = SvCUR(utf16_buffer) >> 1;
10691 have = SvCUR(utf8_buffer);
10692 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 10693
aa6dbd60 10694 if (reverse) {
c28d6105
NC
10695 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10696 (U8*)SvPVX_const(utf8_buffer) + have,
10697 chars * 2, &newlen);
aa6dbd60 10698 } else {
2a773401 10699 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
10700 (U8*)SvPVX_const(utf8_buffer) + have,
10701 chars * 2, &newlen);
2a773401 10702 }
c28d6105 10703 SvCUR_set(utf8_buffer, have + newlen);
2a773401 10704 *end = '\0';
c28d6105 10705
e07286ed
NC
10706 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10707 it's private to us, and utf16_to_utf8{,reversed} take a
10708 (pointer,length) pair, rather than a NUL-terminated string. */
10709 if(SvCUR(utf16_buffer) & 1) {
10710 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10711 SvCUR_set(utf16_buffer, 1);
10712 } else {
10713 SvCUR_set(utf16_buffer, 0);
10714 }
2a773401 10715 }
c28d6105
NC
10716 DEBUG_P(PerlIO_printf(Perl_debug_log,
10717 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10718 status,
10719 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10720 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 10721 return retval;
6e3aabd6 10722}
81a923f4
NC
10723
10724static U8 *
10725S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10726{
2a773401 10727 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 10728
c85ae797
NC
10729 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10730
c28d6105 10731 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 10732 sv_setpvs(filter, "");
2a773401 10733 IoLINES(filter) = reversed;
c28d6105
NC
10734 IoPAGE(filter) = 1; /* Not EOF */
10735
10736 /* Sadly, we have to return a valid pointer, come what may, so we have to
10737 ignore any error return from this. */
10738 SvCUR_set(PL_linestr, 0);
10739 if (FILTER_READ(0, PL_linestr, 0)) {
10740 SvUTF8_on(PL_linestr);
81a923f4 10741 } else {
c28d6105 10742 SvUTF8_on(PL_linestr);
81a923f4 10743 }
c28d6105 10744 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
10745 return (U8*)SvPVX(PL_linestr);
10746}
6e3aabd6 10747#endif
9f4817db 10748
f333445c
JP
10749/*
10750Returns a pointer to the next character after the parsed
10751vstring, as well as updating the passed in sv.
10752
10753Function must be called like
10754
561b68a9 10755 sv = newSV(5);
65b06e02 10756 s = scan_vstring(s,e,sv);
f333445c 10757
65b06e02 10758where s and e are the start and end of the string.
f333445c
JP
10759The sv should already be large enough to store the vstring
10760passed in, for performance reasons.
10761
10762*/
10763
10764char *
15f169a1 10765Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 10766{
97aff369 10767 dVAR;
bfed75c6
AL
10768 const char *pos = s;
10769 const char *start = s;
7918f24d
NC
10770
10771 PERL_ARGS_ASSERT_SCAN_VSTRING;
10772
f333445c 10773 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 10774 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 10775 pos++;
f333445c
JP
10776 if ( *pos != '.') {
10777 /* this may not be a v-string if followed by => */
bfed75c6 10778 const char *next = pos;
65b06e02 10779 while (next < e && isSPACE(*next))
8fc7bb1c 10780 ++next;
65b06e02 10781 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
10782 /* return string not v-string */
10783 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 10784 return (char *)pos;
f333445c
JP
10785 }
10786 }
10787
10788 if (!isALPHA(*pos)) {
89ebb4a3 10789 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 10790
d4c19fe8
AL
10791 if (*s == 'v')
10792 s++; /* get past 'v' */
f333445c 10793
76f68e9b 10794 sv_setpvs(sv, "");
f333445c
JP
10795
10796 for (;;) {
d4c19fe8 10797 /* this is atoi() that tolerates underscores */
0bd48802
AL
10798 U8 *tmpend;
10799 UV rev = 0;
d4c19fe8
AL
10800 const char *end = pos;
10801 UV mult = 1;
10802 while (--end >= s) {
10803 if (*end != '_') {
10804 const UV orev = rev;
f333445c
JP
10805 rev += (*end - '0') * mult;
10806 mult *= 10;
9b387841
NC
10807 if (orev > rev)
10808 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10809 "Integer overflow in decimal number");
f333445c
JP
10810 }
10811 }
10812#ifdef EBCDIC
10813 if (rev > 0x7FFFFFFF)
10814 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10815#endif
10816 /* Append native character for the rev point */
10817 tmpend = uvchr_to_utf8(tmpbuf, rev);
10818 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10819 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10820 SvUTF8_on(sv);
65b06e02 10821 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
10822 s = ++pos;
10823 else {
10824 s = pos;
10825 break;
10826 }
65b06e02 10827 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
10828 pos++;
10829 }
10830 SvPOK_on(sv);
10831 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10832 SvRMAGICAL_on(sv);
10833 }
73d840c0 10834 return (char *)s;
f333445c
JP
10835}
10836
88e1f1a2
JV
10837int
10838Perl_keyword_plugin_standard(pTHX_
10839 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10840{
10841 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10842 PERL_UNUSED_CONTEXT;
10843 PERL_UNUSED_ARG(keyword_ptr);
10844 PERL_UNUSED_ARG(keyword_len);
10845 PERL_UNUSED_ARG(op_ptr);
10846 return KEYWORD_PLUGIN_DECLINE;
10847}
10848
78cdf107 10849#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 10850static void
78cdf107 10851S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
10852{
10853 SAVEI32(PL_lex_brackets);
10854 if (PL_lex_brackets > 100)
10855 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10856 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
10857 SAVEI32(PL_lex_allbrackets);
10858 PL_lex_allbrackets = 0;
10859 SAVEI8(PL_lex_fakeeof);
2dcac756 10860 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
10861 if(yyparse(gramtype) && !PL_parser->error_count)
10862 qerror(Perl_mess(aTHX_ "Parse error"));
10863}
10864
78cdf107 10865#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 10866static OP *
78cdf107 10867S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
10868{
10869 OP *o;
10870 ENTER;
10871 SAVEVPTR(PL_eval_root);
10872 PL_eval_root = NULL;
78cdf107 10873 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
10874 o = PL_eval_root;
10875 LEAVE;
10876 return o;
10877}
10878
78cdf107
Z
10879#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
10880static OP *
10881S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
10882{
10883 OP *exprop;
10884 if (flags & ~PARSE_OPTIONAL)
10885 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
10886 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
10887 if (!exprop && !(flags & PARSE_OPTIONAL)) {
10888 if (!PL_parser->error_count)
10889 qerror(Perl_mess(aTHX_ "Parse error"));
10890 exprop = newOP(OP_NULL, 0);
10891 }
10892 return exprop;
10893}
10894
10895/*
10896=for apidoc Amx|OP *|parse_arithexpr|U32 flags
10897
10898Parse a Perl arithmetic expression. This may contain operators of precedence
10899down to the bit shift operators. The expression must be followed (and thus
10900terminated) either by a comparison or lower-precedence operator or by
10901something that would normally terminate an expression such as semicolon.
10902If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10903otherwise it is mandatory. It is up to the caller to ensure that the
10904dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10905the source of the code to be parsed and the lexical context for the
10906expression.
10907
10908The op tree representing the expression is returned. If an optional
10909expression is absent, a null pointer is returned, otherwise the pointer
10910will be non-null.
10911
10912If an error occurs in parsing or compilation, in most cases a valid op
10913tree is returned anyway. The error is reflected in the parser state,
10914normally resulting in a single exception at the top level of parsing
10915which covers all the compilation errors that occurred. Some compilation
10916errors, however, will throw an exception immediately.
10917
10918=cut
10919*/
10920
10921OP *
10922Perl_parse_arithexpr(pTHX_ U32 flags)
10923{
10924 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
10925}
10926
10927/*
10928=for apidoc Amx|OP *|parse_termexpr|U32 flags
10929
10930Parse a Perl term expression. This may contain operators of precedence
10931down to the assignment operators. The expression must be followed (and thus
10932terminated) either by a comma or lower-precedence operator or by
10933something that would normally terminate an expression such as semicolon.
10934If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10935otherwise it is mandatory. It is up to the caller to ensure that the
10936dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10937the source of the code to be parsed and the lexical context for the
10938expression.
10939
10940The op tree representing the expression is returned. If an optional
10941expression is absent, a null pointer is returned, otherwise the pointer
10942will be non-null.
10943
10944If an error occurs in parsing or compilation, in most cases a valid op
10945tree is returned anyway. The error is reflected in the parser state,
10946normally resulting in a single exception at the top level of parsing
10947which covers all the compilation errors that occurred. Some compilation
10948errors, however, will throw an exception immediately.
10949
10950=cut
10951*/
10952
10953OP *
10954Perl_parse_termexpr(pTHX_ U32 flags)
10955{
10956 return parse_expr(LEX_FAKEEOF_COMMA, flags);
10957}
10958
10959/*
10960=for apidoc Amx|OP *|parse_listexpr|U32 flags
10961
10962Parse a Perl list expression. This may contain operators of precedence
10963down to the comma operator. The expression must be followed (and thus
10964terminated) either by a low-precedence logic operator such as C<or> or by
10965something that would normally terminate an expression such as semicolon.
10966If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
10967otherwise it is mandatory. It is up to the caller to ensure that the
10968dynamic parser state (L</PL_parser> et al) is correctly set to reflect
10969the source of the code to be parsed and the lexical context for the
10970expression.
10971
10972The op tree representing the expression is returned. If an optional
10973expression is absent, a null pointer is returned, otherwise the pointer
10974will be non-null.
10975
10976If an error occurs in parsing or compilation, in most cases a valid op
10977tree is returned anyway. The error is reflected in the parser state,
10978normally resulting in a single exception at the top level of parsing
10979which covers all the compilation errors that occurred. Some compilation
10980errors, however, will throw an exception immediately.
10981
10982=cut
10983*/
10984
10985OP *
10986Perl_parse_listexpr(pTHX_ U32 flags)
10987{
10988 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
10989}
10990
10991/*
10992=for apidoc Amx|OP *|parse_fullexpr|U32 flags
10993
10994Parse a single complete Perl expression. This allows the full
10995expression grammar, including the lowest-precedence operators such
10996as C<or>. The expression must be followed (and thus terminated) by a
10997token that an expression would normally be terminated by: end-of-file,
10998closing bracketing punctuation, semicolon, or one of the keywords that
10999signals a postfix expression-statement modifier. If I<flags> includes
11000C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11001mandatory. It is up to the caller to ensure that the dynamic parser
11002state (L</PL_parser> et al) is correctly set to reflect the source of
11003the code to be parsed and the lexical context for the expression.
11004
11005The op tree representing the expression is returned. If an optional
11006expression is absent, a null pointer is returned, otherwise the pointer
11007will be non-null.
11008
11009If an error occurs in parsing or compilation, in most cases a valid op
11010tree is returned anyway. The error is reflected in the parser state,
11011normally resulting in a single exception at the top level of parsing
11012which covers all the compilation errors that occurred. Some compilation
11013errors, however, will throw an exception immediately.
11014
11015=cut
11016*/
11017
11018OP *
11019Perl_parse_fullexpr(pTHX_ U32 flags)
11020{
11021 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11022}
11023
e53d8f76
Z
11024/*
11025=for apidoc Amx|OP *|parse_block|U32 flags
11026
11027Parse a single complete Perl code block. This consists of an opening
11028brace, a sequence of statements, and a closing brace. The block
11029constitutes a lexical scope, so C<my> variables and various compile-time
11030effects can be contained within it. It is up to the caller to ensure
11031that the dynamic parser state (L</PL_parser> et al) is correctly set to
11032reflect the source of the code to be parsed and the lexical context for
11033the statement.
11034
11035The op tree representing the code block is returned. This is always a
11036real op, never a null pointer. It will normally be a C<lineseq> list,
11037including C<nextstate> or equivalent ops. No ops to construct any kind
11038of runtime scope are included by virtue of it being a block.
11039
11040If an error occurs in parsing or compilation, in most cases a valid op
11041tree (most likely null) is returned anyway. The error is reflected in
11042the parser state, normally resulting in a single exception at the top
11043level of parsing which covers all the compilation errors that occurred.
11044Some compilation errors, however, will throw an exception immediately.
11045
11046The I<flags> parameter is reserved for future use, and must always
11047be zero.
11048
11049=cut
11050*/
11051
11052OP *
11053Perl_parse_block(pTHX_ U32 flags)
11054{
11055 if (flags)
11056 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11057 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11058}
11059
1da4ca5f 11060/*
8359b381
Z
11061=for apidoc Amx|OP *|parse_barestmt|U32 flags
11062
11063Parse a single unadorned Perl statement. This may be a normal imperative
11064statement or a declaration that has compile-time effect. It does not
11065include any label or other affixture. It is up to the caller to ensure
11066that the dynamic parser state (L</PL_parser> et al) is correctly set to
11067reflect the source of the code to be parsed and the lexical context for
11068the statement.
11069
11070The op tree representing the statement is returned. This may be a
11071null pointer if the statement is null, for example if it was actually
11072a subroutine definition (which has compile-time side effects). If not
11073null, it will be ops directly implementing the statement, suitable to
11074pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11075equivalent op (except for those embedded in a scope contained entirely
11076within the statement).
11077
11078If an error occurs in parsing or compilation, in most cases a valid op
11079tree (most likely null) is returned anyway. The error is reflected in
11080the parser state, normally resulting in a single exception at the top
11081level of parsing which covers all the compilation errors that occurred.
11082Some compilation errors, however, will throw an exception immediately.
11083
11084The I<flags> parameter is reserved for future use, and must always
11085be zero.
11086
11087=cut
11088*/
11089
11090OP *
11091Perl_parse_barestmt(pTHX_ U32 flags)
11092{
11093 if (flags)
11094 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11095 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11096}
11097
11098/*
361d9b55
Z
11099=for apidoc Amx|SV *|parse_label|U32 flags
11100
11101Parse a single label, possibly optional, of the type that may prefix a
11102Perl statement. It is up to the caller to ensure that the dynamic parser
11103state (L</PL_parser> et al) is correctly set to reflect the source of
11104the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11105label is optional, otherwise it is mandatory.
11106
11107The name of the label is returned in the form of a fresh scalar. If an
11108optional label is absent, a null pointer is returned.
11109
11110If an error occurs in parsing, which can only occur if the label is
11111mandatory, a valid label is returned anyway. The error is reflected in
11112the parser state, normally resulting in a single exception at the top
11113level of parsing which covers all the compilation errors that occurred.
11114
11115=cut
11116*/
11117
11118SV *
11119Perl_parse_label(pTHX_ U32 flags)
11120{
11121 if (flags & ~PARSE_OPTIONAL)
11122 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11123 if (PL_lex_state == LEX_KNOWNEXT) {
11124 PL_parser->yychar = yylex();
11125 if (PL_parser->yychar == LABEL) {
11126 char *lpv = pl_yylval.pval;
11127 STRLEN llen = strlen(lpv);
11128 SV *lsv;
11129 PL_parser->yychar = YYEMPTY;
11130 lsv = newSV_type(SVt_PV);
11131 SvPV_set(lsv, lpv);
11132 SvCUR_set(lsv, llen);
11133 SvLEN_set(lsv, llen+1);
11134 SvPOK_on(lsv);
11135 return lsv;
11136 } else {
11137 yyunlex();
11138 goto no_label;
11139 }
11140 } else {
11141 char *s, *t;
11142 U8 c;
11143 STRLEN wlen, bufptr_pos;
11144 lex_read_space(0);
11145 t = s = PL_bufptr;
11146 c = (U8)*s;
11147 if (!isIDFIRST_A(c))
11148 goto no_label;
11149 do {
11150 c = (U8)*++t;
11151 } while(isWORDCHAR_A(c));
11152 wlen = t - s;
11153 if (word_takes_any_delimeter(s, wlen))
11154 goto no_label;
11155 bufptr_pos = s - SvPVX(PL_linestr);
11156 PL_bufptr = t;
11157 lex_read_space(LEX_KEEP_PREVIOUS);
11158 t = PL_bufptr;
11159 s = SvPVX(PL_linestr) + bufptr_pos;
11160 if (t[0] == ':' && t[1] != ':') {
11161 PL_oldoldbufptr = PL_oldbufptr;
11162 PL_oldbufptr = s;
11163 PL_bufptr = t+1;
11164 return newSVpvn(s, wlen);
11165 } else {
11166 PL_bufptr = s;
11167 no_label:
11168 if (flags & PARSE_OPTIONAL) {
11169 return NULL;
11170 } else {
11171 qerror(Perl_mess(aTHX_ "Parse error"));
11172 return newSVpvs("x");
11173 }
11174 }
11175 }
11176}
11177
11178/*
28ac2b49
Z
11179=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11180
11181Parse a single complete Perl statement. This may be a normal imperative
8359b381 11182statement or a declaration that has compile-time effect, and may include
8e720305 11183optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11184parser state (L</PL_parser> et al) is correctly set to reflect the source
11185of the code to be parsed and the lexical context for the statement.
11186
11187The op tree representing the statement is returned. This may be a
11188null pointer if the statement is null, for example if it was actually
11189a subroutine definition (which has compile-time side effects). If not
11190null, it will be the result of a L</newSTATEOP> call, normally including
11191a C<nextstate> or equivalent op.
11192
11193If an error occurs in parsing or compilation, in most cases a valid op
11194tree (most likely null) is returned anyway. The error is reflected in
11195the parser state, normally resulting in a single exception at the top
11196level of parsing which covers all the compilation errors that occurred.
11197Some compilation errors, however, will throw an exception immediately.
11198
11199The I<flags> parameter is reserved for future use, and must always
11200be zero.
11201
11202=cut
11203*/
11204
11205OP *
11206Perl_parse_fullstmt(pTHX_ U32 flags)
11207{
28ac2b49
Z
11208 if (flags)
11209 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11210 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11211}
11212
07ffcb73
Z
11213/*
11214=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11215
11216Parse a sequence of zero or more Perl statements. These may be normal
11217imperative statements, including optional labels, or declarations
11218that have compile-time effect, or any mixture thereof. The statement
11219sequence ends when a closing brace or end-of-file is encountered in a
11220place where a new statement could have validly started. It is up to
11221the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11222is correctly set to reflect the source of the code to be parsed and the
11223lexical context for the statements.
11224
11225The op tree representing the statement sequence is returned. This may
11226be a null pointer if the statements were all null, for example if there
11227were no statements or if there were only subroutine definitions (which
11228have compile-time side effects). If not null, it will be a C<lineseq>
11229list, normally including C<nextstate> or equivalent ops.
11230
11231If an error occurs in parsing or compilation, in most cases a valid op
11232tree is returned anyway. The error is reflected in the parser state,
11233normally resulting in a single exception at the top level of parsing
11234which covers all the compilation errors that occurred. Some compilation
11235errors, however, will throw an exception immediately.
11236
11237The I<flags> parameter is reserved for future use, and must always
11238be zero.
11239
11240=cut
11241*/
11242
11243OP *
11244Perl_parse_stmtseq(pTHX_ U32 flags)
11245{
11246 OP *stmtseqop;
e53d8f76 11247 I32 c;
07ffcb73 11248 if (flags)
78cdf107
Z
11249 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11250 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11251 c = lex_peek_unichar(0);
11252 if (c != -1 && c != /*{*/'}')
07ffcb73 11253 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11254 return stmtseqop;
11255}
11256
ea25a9b2 11257void
f7e3d326 11258Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11259{
f7e3d326 11260 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11261 deprecate("qw(...) as parentheses");
78cdf107 11262 force_next((4<<24)|')');
ea25a9b2
Z
11263 if (qwlist->op_type == OP_STUB) {
11264 op_free(qwlist);
11265 }
11266 else {
3d8e05a0 11267 start_force(PL_curforce);
ea25a9b2
Z
11268 NEXTVAL_NEXTTOKE.opval = qwlist;
11269 force_next(THING);
11270 }
78cdf107 11271 force_next((2<<24)|'(');
ea25a9b2
Z
11272}
11273
28ac2b49 11274/*
1da4ca5f
NC
11275 * Local variables:
11276 * c-indentation-style: bsd
11277 * c-basic-offset: 4
11278 * indent-tabs-mode: t
11279 * End:
11280 *
37442d52
RGS
11281 * ex: set ts=8 sts=4 sw=4 noet:
11282 */