This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Note the if upgrade in perldelta
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
2d2e263d 227 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
228 * BOop : bitwise or or xor
229 * BAop : bitwise and
230 * SHop : shift operator
231 * PWop : power operator
9cbb5ea2 232 * PMop : pattern-matching operator
ffb4593c
NT
233 * Aop : addition-level operator
234 * Mop : multiplication-level operator
235 * Eop : equality-testing operator
e5edeb50 236 * Rop : relational operator <= != gt
ffb4593c
NT
237 *
238 * Also see LOP and lop() below.
239 */
240
998054bd 241#ifdef DEBUGGING /* Serve -DT. */
704d4215 242# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 243#else
bbf60fe6 244# define REPORT(retval) (retval)
998054bd
SC
245#endif
246
bbf60fe6
DM
247#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
248#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
249#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
250#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
251#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
252#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
253#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
254#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
255#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
256#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
257#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
258#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
259#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
260#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
261#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
262#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
263#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
264#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
265#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
266#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 267
a687059c
LW
268/* This bit of chicanery makes a unary function followed by
269 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
270 * The UNIDOR macro is for unary functions that can be followed by the //
271 * operator (such as C<shift // 0>).
a687059c 272 */
376fcdbf 273#define UNI2(f,x) { \
6154021b 274 pl_yylval.ival = f; \
376fcdbf
AL
275 PL_expect = x; \
276 PL_bufptr = s; \
277 PL_last_uni = PL_oldbufptr; \
278 PL_last_lop_op = f; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 }
6f33ba73
RGS
284#define UNI(f) UNI2(f,XTERM)
285#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 286
376fcdbf 287#define UNIBRACK(f) { \
6154021b 288 pl_yylval.ival = f; \
376fcdbf
AL
289 PL_bufptr = s; \
290 PL_last_uni = PL_oldbufptr; \
291 if (*s == '(') \
292 return REPORT( (int)FUNC1 ); \
29595ff2 293 s = PEEKSPACE(s); \
376fcdbf
AL
294 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
295 }
79072805 296
9f68db38 297/* grandfather return to old style */
78cdf107
Z
298#define OLDLOP(f) \
299 do { \
300 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
302 pl_yylval.ival = (f); \
303 PL_expect = XTERM; \
304 PL_bufptr = s; \
305 return (int)LSTOP; \
306 } while(0)
79072805 307
8fa7f367
JH
308#ifdef DEBUGGING
309
6154021b 310/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
311enum token_type {
312 TOKENTYPE_NONE,
313 TOKENTYPE_IVAL,
6154021b 314 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
315 TOKENTYPE_PVAL,
316 TOKENTYPE_OPVAL,
317 TOKENTYPE_GVVAL
318};
319
6d4a66ac
NC
320static struct debug_tokens {
321 const int token;
322 enum token_type type;
323 const char *name;
324} const debug_tokens[] =
9041c2e3 325{
bbf60fe6
DM
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
347 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
348 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
349 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
350 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
351 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 352 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
353 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
354 { IF, TOKENTYPE_IVAL, "IF" },
355 { LABEL, TOKENTYPE_PVAL, "LABEL" },
356 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
357 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
358 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
359 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
360 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
361 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
362 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
363 { MY, TOKENTYPE_IVAL, "MY" },
364 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
365 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
366 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
367 { OROP, TOKENTYPE_IVAL, "OROP" },
368 { OROR, TOKENTYPE_NONE, "OROR" },
369 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
370 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
371 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
372 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
373 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
374 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
375 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
376 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
377 { PREINC, TOKENTYPE_NONE, "PREINC" },
378 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
379 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
380 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
381 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
382 { SUB, TOKENTYPE_NONE, "SUB" },
383 { THING, TOKENTYPE_OPVAL, "THING" },
384 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
385 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
386 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
387 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
388 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
389 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 390 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
391 { WHILE, TOKENTYPE_IVAL, "WHILE" },
392 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 393 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 394 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
395};
396
6154021b 397/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 398
bbf60fe6 399STATIC int
704d4215 400S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 401{
97aff369 402 dVAR;
7918f24d
NC
403
404 PERL_ARGS_ASSERT_TOKEREPORT;
405
bbf60fe6 406 if (DEBUG_T_TEST) {
bd61b366 407 const char *name = NULL;
bbf60fe6 408 enum token_type type = TOKENTYPE_NONE;
f54cb97a 409 const struct debug_tokens *p;
396482e1 410 SV* const report = newSVpvs("<== ");
bbf60fe6 411
f54cb97a 412 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
413 if (p->token == (int)rv) {
414 name = p->name;
415 type = p->type;
416 break;
417 }
418 }
419 if (name)
54667de8 420 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
421 else if ((char)rv > ' ' && (char)rv < '~')
422 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
423 else if (!rv)
396482e1 424 sv_catpvs(report, "EOF");
bbf60fe6
DM
425 else
426 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
427 switch (type) {
428 case TOKENTYPE_NONE:
429 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
430 break;
431 case TOKENTYPE_IVAL:
704d4215 432 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
433 break;
434 case TOKENTYPE_OPNUM:
435 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 436 PL_op_name[lvalp->ival]);
bbf60fe6
DM
437 break;
438 case TOKENTYPE_PVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPVAL:
704d4215 442 if (lvalp->opval) {
401441c0 443 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
444 PL_op_name[lvalp->opval->op_type]);
445 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 446 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 447 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
448 }
449
450 }
401441c0 451 else
396482e1 452 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
453 break;
454 }
b6007c36 455 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
456 };
457 return (int)rv;
998054bd
SC
458}
459
b6007c36
DM
460
461/* print the buffer with suitable escapes */
462
463STATIC void
15f169a1 464S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 465{
396482e1 466 SV* const tmp = newSVpvs("");
7918f24d
NC
467
468 PERL_ARGS_ASSERT_PRINTBUF;
469
b6007c36
DM
470 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
471 SvREFCNT_dec(tmp);
472}
473
8fa7f367
JH
474#endif
475
8290c323
NC
476static int
477S_deprecate_commaless_var_list(pTHX) {
478 PL_expect = XTERM;
479 deprecate("comma-less variable list");
480 return REPORT(','); /* grandfather non-comma-format format */
481}
482
ffb4593c
NT
483/*
484 * S_ao
485 *
c963b151
BD
486 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
487 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
488 */
489
76e3520e 490STATIC int
cea2e8a9 491S_ao(pTHX_ int toketype)
a0d0e21e 492{
97aff369 493 dVAR;
3280af22
NIS
494 if (*PL_bufptr == '=') {
495 PL_bufptr++;
a0d0e21e 496 if (toketype == ANDAND)
6154021b 497 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 498 else if (toketype == OROR)
6154021b 499 pl_yylval.ival = OP_ORASSIGN;
c963b151 500 else if (toketype == DORDOR)
6154021b 501 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
502 toketype = ASSIGNOP;
503 }
504 return toketype;
505}
506
ffb4593c
NT
507/*
508 * S_no_op
509 * When Perl expects an operator and finds something else, no_op
510 * prints the warning. It always prints "<something> found where
511 * operator expected. It prints "Missing semicolon on previous line?"
512 * if the surprise occurs at the start of the line. "do you need to
513 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
514 * where the compiler doesn't know if foo is a method call or a function.
515 * It prints "Missing operator before end of line" if there's nothing
516 * after the missing operator, or "... before <...>" if there is something
517 * after the missing operator.
518 */
519
76e3520e 520STATIC void
15f169a1 521S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 522{
97aff369 523 dVAR;
9d4ba2ae
AL
524 char * const oldbp = PL_bufptr;
525 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 526
7918f24d
NC
527 PERL_ARGS_ASSERT_NO_OP;
528
1189a94a
GS
529 if (!s)
530 s = oldbp;
07c798fb 531 else
1189a94a 532 PL_bufptr = s;
cea2e8a9 533 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
534 if (ckWARN_d(WARN_SYNTAX)) {
535 if (is_first)
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Missing semicolon on previous line?)\n");
538 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 539 const char *t;
c35e046a
AL
540 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
541 NOOP;
56da5a46
RGS
542 if (t < PL_bufptr && isSPACE(*t))
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Do you need to predeclare %.*s?)\n",
551405c4 545 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
546 }
547 else {
548 assert(s >= oldbp);
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 550 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 551 }
07c798fb 552 }
3280af22 553 PL_bufptr = oldbp;
8990e307
LW
554}
555
ffb4593c
NT
556/*
557 * S_missingterm
558 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 559 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
560 * If we're in a delimited string and the delimiter is a control
561 * character, it's reformatted into a two-char sequence like ^C.
562 * This is fatal.
563 */
564
76e3520e 565STATIC void
cea2e8a9 566S_missingterm(pTHX_ char *s)
8990e307 567{
97aff369 568 dVAR;
8990e307
LW
569 char tmpbuf[3];
570 char q;
571 if (s) {
9d4ba2ae 572 char * const nl = strrchr(s,'\n');
d2719217 573 if (nl)
8990e307
LW
574 *nl = '\0';
575 }
463559e7 576 else if (isCNTRL(PL_multi_close)) {
8990e307 577 *tmpbuf = '^';
585ec06d 578 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
579 tmpbuf[2] = '\0';
580 s = tmpbuf;
581 }
582 else {
eb160463 583 *tmpbuf = (char)PL_multi_close;
8990e307
LW
584 tmpbuf[1] = '\0';
585 s = tmpbuf;
586 }
587 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 588 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 589}
79072805 590
0d863452 591/*
0d863452
RH
592 * Check whether the named feature is enabled.
593 */
26ea9e12
NC
594bool
595Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 596{
97aff369 597 dVAR;
0d863452 598 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 599 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
600
601 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
602
26ea9e12
NC
603 if (namelen > MAX_FEATURE_LEN)
604 return FALSE;
4a731d7b 605 memcpy(&he_name[8], name, namelen);
d4c19fe8 606
7b9ef140 607 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
608}
609
ffb4593c 610/*
9cbb5ea2
GS
611 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
612 * utf16-to-utf8-reversed.
ffb4593c
NT
613 */
614
c39cd008
GS
615#ifdef PERL_CR_FILTER
616static void
617strip_return(SV *sv)
618{
95a20fc0 619 register const char *s = SvPVX_const(sv);
9d4ba2ae 620 register const char * const e = s + SvCUR(sv);
7918f24d
NC
621
622 PERL_ARGS_ASSERT_STRIP_RETURN;
623
c39cd008
GS
624 /* outer loop optimized to do nothing if there are no CR-LFs */
625 while (s < e) {
626 if (*s++ == '\r' && *s == '\n') {
627 /* hit a CR-LF, need to copy the rest */
628 register char *d = s - 1;
629 *d++ = *s++;
630 while (s < e) {
631 if (*s == '\r' && s[1] == '\n')
632 s++;
633 *d++ = *s++;
634 }
635 SvCUR(sv) -= s - d;
636 return;
637 }
638 }
639}
a868473f 640
76e3520e 641STATIC I32
c39cd008 642S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 643{
f54cb97a 644 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
645 if (count > 0 && !maxlen)
646 strip_return(sv);
647 return count;
a868473f
NIS
648}
649#endif
650
ffb4593c 651/*
8eaa0acf
Z
652=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
653
654Creates and initialises a new lexer/parser state object, supplying
655a context in which to lex and parse from a new source of Perl code.
656A pointer to the new state object is placed in L</PL_parser>. An entry
657is made on the save stack so that upon unwinding the new state object
658will be destroyed and the former value of L</PL_parser> will be restored.
659Nothing else need be done to clean up the parsing context.
660
661The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
662non-null, provides a string (in SV form) containing code to be parsed.
663A copy of the string is made, so subsequent modification of I<line>
664does not affect parsing. I<rsfp>, if non-null, provides an input stream
665from which code will be read to be parsed. If both are non-null, the
666code in I<line> comes first and must consist of complete lines of input,
667and I<rsfp> supplies the remainder of the source.
668
669The I<flags> parameter is reserved for future use, and must always
670be zero.
671
672=cut
673*/
ffb4593c 674
a0d0e21e 675void
8eaa0acf 676Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 677{
97aff369 678 dVAR;
6ef55633 679 const char *s = NULL;
8990e307 680 STRLEN len;
5486870f 681 yy_parser *parser, *oparser;
8eaa0acf
Z
682 if (flags)
683 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
684
685 /* create and initialise a parser */
686
199e78b7 687 Newxz(parser, 1, yy_parser);
5486870f 688 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
689 PL_parser = parser;
690
28ac2b49
Z
691 parser->stack = NULL;
692 parser->ps = NULL;
693 parser->stack_size = 0;
acdf0a21 694
e3abe207
DM
695 /* on scope exit, free this parser and restore any outer one */
696 SAVEPARSER(parser);
7c4baf47 697 parser->saved_curcop = PL_curcop;
e3abe207 698
acdf0a21 699 /* initialise lexer state */
8990e307 700
fb205e7a
DM
701#ifdef PERL_MAD
702 parser->curforce = -1;
703#else
704 parser->nexttoke = 0;
705#endif
ca4cfd28 706 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 707 parser->copline = NOLINE;
5afb0a62 708 parser->lex_state = LEX_NORMAL;
c2598295 709 parser->expect = XSTATE;
2f9285f8 710 parser->rsfp = rsfp;
f07ec6dd 711 parser->rsfp_filters = newAV();
2f9285f8 712
199e78b7
DM
713 Newx(parser->lex_brackstack, 120, char);
714 Newx(parser->lex_casestack, 12, char);
715 *parser->lex_casestack = '\0';
02b34bbe 716
10efb74f
NC
717 if (line) {
718 s = SvPV_const(line, len);
719 } else {
720 len = 0;
721 }
bdc0bf6f 722
10efb74f 723 if (!len) {
bdc0bf6f 724 parser->linestr = newSVpvs("\n;");
805700c1 725 } else {
719a9bb0 726 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 727 if (s[len-1] != ';')
bdc0bf6f 728 sv_catpvs(parser->linestr, "\n;");
8990e307 729 }
f06b5848
DM
730 parser->oldoldbufptr =
731 parser->oldbufptr =
732 parser->bufptr =
733 parser->linestart = SvPVX(parser->linestr);
734 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
735 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
736
737 parser->in_pod = 0;
79072805 738}
a687059c 739
e3abe207
DM
740
741/* delete a parser object */
742
743void
744Perl_parser_free(pTHX_ const yy_parser *parser)
745{
7918f24d
NC
746 PERL_ARGS_ASSERT_PARSER_FREE;
747
7c4baf47 748 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
749 SvREFCNT_dec(parser->linestr);
750
2f9285f8
DM
751 if (parser->rsfp == PerlIO_stdin())
752 PerlIO_clearerr(parser->rsfp);
799361c3
SH
753 else if (parser->rsfp && (!parser->old_parser ||
754 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 755 PerlIO_close(parser->rsfp);
5486870f 756 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 757
e3abe207
DM
758 Safefree(parser->lex_brackstack);
759 Safefree(parser->lex_casestack);
760 PL_parser = parser->old_parser;
761 Safefree(parser);
762}
763
764
ffb4593c 765/*
f0e67a1d
Z
766=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
767
768Buffer scalar containing the chunk currently under consideration of the
769text currently being lexed. This is always a plain string scalar (for
770which C<SvPOK> is true). It is not intended to be used as a scalar by
771normal scalar means; instead refer to the buffer directly by the pointer
772variables described below.
773
774The lexer maintains various C<char*> pointers to things in the
775C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
776reallocated, all of these pointers must be updated. Don't attempt to
777do this manually, but rather use L</lex_grow_linestr> if you need to
778reallocate the buffer.
779
780The content of the text chunk in the buffer is commonly exactly one
781complete line of input, up to and including a newline terminator,
782but there are situations where it is otherwise. The octets of the
783buffer may be intended to be interpreted as either UTF-8 or Latin-1.
784The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
785flag on this scalar, which may disagree with it.
786
787For direct examination of the buffer, the variable
788L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
789lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
790of these pointers is usually preferable to examination of the scalar
791through normal scalar means.
792
793=for apidoc AmxU|char *|PL_parser-E<gt>bufend
794
795Direct pointer to the end of the chunk of text currently being lexed, the
796end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
797+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
798always located at the end of the buffer, and does not count as part of
799the buffer's contents.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
802
803Points to the current position of lexing inside the lexer buffer.
804Characters around this point may be freely examined, within
805the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
806L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
807interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
808
809Lexing code (whether in the Perl core or not) moves this pointer past
810the characters that it consumes. It is also expected to perform some
811bookkeeping whenever a newline character is consumed. This movement
812can be more conveniently performed by the function L</lex_read_to>,
813which handles newlines appropriately.
814
815Interpretation of the buffer's octets can be abstracted out by
816using the slightly higher-level functions L</lex_peek_unichar> and
817L</lex_read_unichar>.
818
819=for apidoc AmxU|char *|PL_parser-E<gt>linestart
820
821Points to the start of the current line inside the lexer buffer.
822This is useful for indicating at which column an error occurred, and
823not much else. This must be updated by any lexing code that consumes
824a newline; the function L</lex_read_to> handles this detail.
825
826=cut
827*/
828
829/*
830=for apidoc Amx|bool|lex_bufutf8
831
832Indicates whether the octets in the lexer buffer
833(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
834of Unicode characters. If not, they should be interpreted as Latin-1
835characters. This is analogous to the C<SvUTF8> flag for scalars.
836
837In UTF-8 mode, it is not guaranteed that the lexer buffer actually
838contains valid UTF-8. Lexing code must be robust in the face of invalid
839encoding.
840
841The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
842is significant, but not the whole story regarding the input character
843encoding. Normally, when a file is being read, the scalar contains octets
844and its C<SvUTF8> flag is off, but the octets should be interpreted as
845UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
846however, the scalar may have the C<SvUTF8> flag on, and in this case its
847octets should be interpreted as UTF-8 unless the C<use bytes> pragma
848is in effect. This logic may change in the future; use this function
849instead of implementing the logic yourself.
850
851=cut
852*/
853
854bool
855Perl_lex_bufutf8(pTHX)
856{
857 return UTF;
858}
859
860/*
861=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
862
863Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
864at least I<len> octets (including terminating NUL). Returns a
865pointer to the reallocated buffer. This is necessary before making
866any direct modification of the buffer that would increase its length.
867L</lex_stuff_pvn> provides a more convenient way to insert text into
868the buffer.
869
870Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
871this function updates all of the lexer's variables that point directly
872into the buffer.
873
874=cut
875*/
876
877char *
878Perl_lex_grow_linestr(pTHX_ STRLEN len)
879{
880 SV *linestr;
881 char *buf;
882 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
883 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
884 linestr = PL_parser->linestr;
885 buf = SvPVX(linestr);
886 if (len <= SvLEN(linestr))
887 return buf;
888 bufend_pos = PL_parser->bufend - buf;
889 bufptr_pos = PL_parser->bufptr - buf;
890 oldbufptr_pos = PL_parser->oldbufptr - buf;
891 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
892 linestart_pos = PL_parser->linestart - buf;
893 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
894 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
895 buf = sv_grow(linestr, len);
896 PL_parser->bufend = buf + bufend_pos;
897 PL_parser->bufptr = buf + bufptr_pos;
898 PL_parser->oldbufptr = buf + oldbufptr_pos;
899 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
900 PL_parser->linestart = buf + linestart_pos;
901 if (PL_parser->last_uni)
902 PL_parser->last_uni = buf + last_uni_pos;
903 if (PL_parser->last_lop)
904 PL_parser->last_lop = buf + last_lop_pos;
905 return buf;
906}
907
908/*
83aa740e 909=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
910
911Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
912immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
913reallocating the buffer if necessary. This means that lexing code that
914runs later will see the characters as if they had appeared in the input.
915It is not recommended to do this as part of normal parsing, and most
916uses of this facility run the risk of the inserted characters being
917interpreted in an unintended manner.
918
919The string to be inserted is represented by I<len> octets starting
920at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
921according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
922The characters are recoded for the lexer buffer, according to how the
923buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 924to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
925function is more convenient.
926
927=cut
928*/
929
930void
83aa740e 931Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 932{
749123ff 933 dVAR;
f0e67a1d
Z
934 char *bufptr;
935 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
936 if (flags & ~(LEX_STUFF_UTF8))
937 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
938 if (UTF) {
939 if (flags & LEX_STUFF_UTF8) {
940 goto plain_copy;
941 } else {
942 STRLEN highhalf = 0;
83aa740e 943 const char *p, *e = pv+len;
f0e67a1d
Z
944 for (p = pv; p != e; p++)
945 highhalf += !!(((U8)*p) & 0x80);
946 if (!highhalf)
947 goto plain_copy;
948 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
949 bufptr = PL_parser->bufptr;
950 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
951 SvCUR_set(PL_parser->linestr,
952 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
953 PL_parser->bufend += len+highhalf;
954 for (p = pv; p != e; p++) {
955 U8 c = (U8)*p;
956 if (c & 0x80) {
957 *bufptr++ = (char)(0xc0 | (c >> 6));
958 *bufptr++ = (char)(0x80 | (c & 0x3f));
959 } else {
960 *bufptr++ = (char)c;
961 }
962 }
963 }
964 } else {
965 if (flags & LEX_STUFF_UTF8) {
966 STRLEN highhalf = 0;
83aa740e 967 const char *p, *e = pv+len;
f0e67a1d
Z
968 for (p = pv; p != e; p++) {
969 U8 c = (U8)*p;
970 if (c >= 0xc4) {
971 Perl_croak(aTHX_ "Lexing code attempted to stuff "
972 "non-Latin-1 character into Latin-1 input");
973 } else if (c >= 0xc2 && p+1 != e &&
974 (((U8)p[1]) & 0xc0) == 0x80) {
975 p++;
976 highhalf++;
977 } else if (c >= 0x80) {
978 /* malformed UTF-8 */
979 ENTER;
980 SAVESPTR(PL_warnhook);
981 PL_warnhook = PERL_WARNHOOK_FATAL;
982 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
983 LEAVE;
984 }
985 }
986 if (!highhalf)
987 goto plain_copy;
988 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
989 bufptr = PL_parser->bufptr;
990 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
991 SvCUR_set(PL_parser->linestr,
992 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
993 PL_parser->bufend += len-highhalf;
994 for (p = pv; p != e; p++) {
995 U8 c = (U8)*p;
996 if (c & 0x80) {
997 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
998 p++;
999 } else {
1000 *bufptr++ = (char)c;
1001 }
1002 }
1003 } else {
1004 plain_copy:
1005 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1006 bufptr = PL_parser->bufptr;
1007 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1008 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1009 PL_parser->bufend += len;
1010 Copy(pv, bufptr, len, char);
1011 }
1012 }
1013}
1014
1015/*
9dcc53ea
Z
1016=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1017
1018Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1019immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1020reallocating the buffer if necessary. This means that lexing code that
1021runs later will see the characters as if they had appeared in the input.
1022It is not recommended to do this as part of normal parsing, and most
1023uses of this facility run the risk of the inserted characters being
1024interpreted in an unintended manner.
1025
1026The string to be inserted is represented by octets starting at I<pv>
1027and continuing to the first nul. These octets are interpreted as either
1028UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1029in I<flags>. The characters are recoded for the lexer buffer, according
1030to how the buffer is currently being interpreted (L</lex_bufutf8>).
1031If it is not convenient to nul-terminate a string to be inserted, the
1032L</lex_stuff_pvn> function is more appropriate.
1033
1034=cut
1035*/
1036
1037void
1038Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1039{
1040 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1041 lex_stuff_pvn(pv, strlen(pv), flags);
1042}
1043
1044/*
f0e67a1d
Z
1045=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1046
1047Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1048immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1049reallocating the buffer if necessary. This means that lexing code that
1050runs later will see the characters as if they had appeared in the input.
1051It is not recommended to do this as part of normal parsing, and most
1052uses of this facility run the risk of the inserted characters being
1053interpreted in an unintended manner.
1054
1055The string to be inserted is the string value of I<sv>. The characters
1056are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1057being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1058not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1059need to construct a scalar.
1060
1061=cut
1062*/
1063
1064void
1065Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1066{
1067 char *pv;
1068 STRLEN len;
1069 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1070 if (flags)
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1072 pv = SvPV(sv, len);
1073 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1074}
1075
1076/*
1077=for apidoc Amx|void|lex_unstuff|char *ptr
1078
1079Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1080I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1081This hides the discarded text from any lexing code that runs later,
1082as if the text had never appeared.
1083
1084This is not the normal way to consume lexed text. For that, use
1085L</lex_read_to>.
1086
1087=cut
1088*/
1089
1090void
1091Perl_lex_unstuff(pTHX_ char *ptr)
1092{
1093 char *buf, *bufend;
1094 STRLEN unstuff_len;
1095 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1096 buf = PL_parser->bufptr;
1097 if (ptr < buf)
1098 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1099 if (ptr == buf)
1100 return;
1101 bufend = PL_parser->bufend;
1102 if (ptr > bufend)
1103 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1104 unstuff_len = ptr - buf;
1105 Move(ptr, buf, bufend+1-ptr, char);
1106 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1107 PL_parser->bufend = bufend - unstuff_len;
1108}
1109
1110/*
1111=for apidoc Amx|void|lex_read_to|char *ptr
1112
1113Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1114to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1115performing the correct bookkeeping whenever a newline character is passed.
1116This is the normal way to consume lexed text.
1117
1118Interpretation of the buffer's octets can be abstracted out by
1119using the slightly higher-level functions L</lex_peek_unichar> and
1120L</lex_read_unichar>.
1121
1122=cut
1123*/
1124
1125void
1126Perl_lex_read_to(pTHX_ char *ptr)
1127{
1128 char *s;
1129 PERL_ARGS_ASSERT_LEX_READ_TO;
1130 s = PL_parser->bufptr;
1131 if (ptr < s || ptr > PL_parser->bufend)
1132 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1133 for (; s != ptr; s++)
1134 if (*s == '\n') {
1135 CopLINE_inc(PL_curcop);
1136 PL_parser->linestart = s+1;
1137 }
1138 PL_parser->bufptr = ptr;
1139}
1140
1141/*
1142=for apidoc Amx|void|lex_discard_to|char *ptr
1143
1144Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1145up to I<ptr>. The remaining content of the buffer will be moved, and
1146all pointers into the buffer updated appropriately. I<ptr> must not
1147be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1148it is not permitted to discard text that has yet to be lexed.
1149
1150Normally it is not necessarily to do this directly, because it suffices to
1151use the implicit discarding behaviour of L</lex_next_chunk> and things
1152based on it. However, if a token stretches across multiple lines,
1f317c95 1153and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1154that purpose, then after completion of the token it would be wise to
1155explicitly discard the now-unneeded earlier lines, to avoid future
1156multi-line tokens growing the buffer without bound.
1157
1158=cut
1159*/
1160
1161void
1162Perl_lex_discard_to(pTHX_ char *ptr)
1163{
1164 char *buf;
1165 STRLEN discard_len;
1166 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1167 buf = SvPVX(PL_parser->linestr);
1168 if (ptr < buf)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170 if (ptr == buf)
1171 return;
1172 if (ptr > PL_parser->bufptr)
1173 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1174 discard_len = ptr - buf;
1175 if (PL_parser->oldbufptr < ptr)
1176 PL_parser->oldbufptr = ptr;
1177 if (PL_parser->oldoldbufptr < ptr)
1178 PL_parser->oldoldbufptr = ptr;
1179 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1180 PL_parser->last_uni = NULL;
1181 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1182 PL_parser->last_lop = NULL;
1183 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1184 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1185 PL_parser->bufend -= discard_len;
1186 PL_parser->bufptr -= discard_len;
1187 PL_parser->oldbufptr -= discard_len;
1188 PL_parser->oldoldbufptr -= discard_len;
1189 if (PL_parser->last_uni)
1190 PL_parser->last_uni -= discard_len;
1191 if (PL_parser->last_lop)
1192 PL_parser->last_lop -= discard_len;
1193}
1194
1195/*
1196=for apidoc Amx|bool|lex_next_chunk|U32 flags
1197
1198Reads in the next chunk of text to be lexed, appending it to
1199L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1200looked to the end of the current chunk and wants to know more. It is
1201usual, but not necessary, for lexing to have consumed the entirety of
1202the current chunk at this time.
1203
1204If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1205chunk (i.e., the current chunk has been entirely consumed), normally the
1206current chunk will be discarded at the same time that the new chunk is
1207read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1208will not be discarded. If the current chunk has not been entirely
1209consumed, then it will not be discarded regardless of the flag.
1210
1211Returns true if some new text was added to the buffer, or false if the
1212buffer has reached the end of the input text.
1213
1214=cut
1215*/
1216
1217#define LEX_FAKE_EOF 0x80000000
1218
1219bool
1220Perl_lex_next_chunk(pTHX_ U32 flags)
1221{
1222 SV *linestr;
1223 char *buf;
1224 STRLEN old_bufend_pos, new_bufend_pos;
1225 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1226 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1227 bool got_some_for_debugger = 0;
f0e67a1d
Z
1228 bool got_some;
1229 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1230 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1231 linestr = PL_parser->linestr;
1232 buf = SvPVX(linestr);
1233 if (!(flags & LEX_KEEP_PREVIOUS) &&
1234 PL_parser->bufptr == PL_parser->bufend) {
1235 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1236 linestart_pos = 0;
1237 if (PL_parser->last_uni != PL_parser->bufend)
1238 PL_parser->last_uni = NULL;
1239 if (PL_parser->last_lop != PL_parser->bufend)
1240 PL_parser->last_lop = NULL;
1241 last_uni_pos = last_lop_pos = 0;
1242 *buf = 0;
1243 SvCUR(linestr) = 0;
1244 } else {
1245 old_bufend_pos = PL_parser->bufend - buf;
1246 bufptr_pos = PL_parser->bufptr - buf;
1247 oldbufptr_pos = PL_parser->oldbufptr - buf;
1248 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1249 linestart_pos = PL_parser->linestart - buf;
1250 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1251 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1252 }
1253 if (flags & LEX_FAKE_EOF) {
1254 goto eof;
1255 } else if (!PL_parser->rsfp) {
1256 got_some = 0;
1257 } else if (filter_gets(linestr, old_bufend_pos)) {
1258 got_some = 1;
17cc9359 1259 got_some_for_debugger = 1;
f0e67a1d 1260 } else {
580561a3
Z
1261 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1262 sv_setpvs(linestr, "");
f0e67a1d
Z
1263 eof:
1264 /* End of real input. Close filehandle (unless it was STDIN),
1265 * then add implicit termination.
1266 */
1267 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1268 PerlIO_clearerr(PL_parser->rsfp);
1269 else if (PL_parser->rsfp)
1270 (void)PerlIO_close(PL_parser->rsfp);
1271 PL_parser->rsfp = NULL;
737c24fc 1272 PL_parser->in_pod = 0;
f0e67a1d
Z
1273#ifdef PERL_MAD
1274 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1275 PL_faketokens = 1;
1276#endif
1277 if (!PL_in_eval && PL_minus_p) {
1278 sv_catpvs(linestr,
1279 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1280 PL_minus_n = PL_minus_p = 0;
1281 } else if (!PL_in_eval && PL_minus_n) {
1282 sv_catpvs(linestr, /*{*/";}");
1283 PL_minus_n = 0;
1284 } else
1285 sv_catpvs(linestr, ";");
1286 got_some = 1;
1287 }
1288 buf = SvPVX(linestr);
1289 new_bufend_pos = SvCUR(linestr);
1290 PL_parser->bufend = buf + new_bufend_pos;
1291 PL_parser->bufptr = buf + bufptr_pos;
1292 PL_parser->oldbufptr = buf + oldbufptr_pos;
1293 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1294 PL_parser->linestart = buf + linestart_pos;
1295 if (PL_parser->last_uni)
1296 PL_parser->last_uni = buf + last_uni_pos;
1297 if (PL_parser->last_lop)
1298 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1299 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1300 PL_curstash != PL_debstash) {
1301 /* debugger active and we're not compiling the debugger code,
1302 * so store the line into the debugger's array of lines
1303 */
1304 update_debugger_info(NULL, buf+old_bufend_pos,
1305 new_bufend_pos-old_bufend_pos);
1306 }
1307 return got_some;
1308}
1309
1310/*
1311=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1312
1313Looks ahead one (Unicode) character in the text currently being lexed.
1314Returns the codepoint (unsigned integer value) of the next character,
1315or -1 if lexing has reached the end of the input text. To consume the
1316peeked character, use L</lex_read_unichar>.
1317
1318If the next character is in (or extends into) the next chunk of input
1319text, the next chunk will be read in. Normally the current chunk will be
1320discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1321then the current chunk will not be discarded.
1322
1323If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1324is encountered, an exception is generated.
1325
1326=cut
1327*/
1328
1329I32
1330Perl_lex_peek_unichar(pTHX_ U32 flags)
1331{
749123ff 1332 dVAR;
f0e67a1d
Z
1333 char *s, *bufend;
1334 if (flags & ~(LEX_KEEP_PREVIOUS))
1335 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1336 s = PL_parser->bufptr;
1337 bufend = PL_parser->bufend;
1338 if (UTF) {
1339 U8 head;
1340 I32 unichar;
1341 STRLEN len, retlen;
1342 if (s == bufend) {
1343 if (!lex_next_chunk(flags))
1344 return -1;
1345 s = PL_parser->bufptr;
1346 bufend = PL_parser->bufend;
1347 }
1348 head = (U8)*s;
1349 if (!(head & 0x80))
1350 return head;
1351 if (head & 0x40) {
1352 len = PL_utf8skip[head];
1353 while ((STRLEN)(bufend-s) < len) {
1354 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1355 break;
1356 s = PL_parser->bufptr;
1357 bufend = PL_parser->bufend;
1358 }
1359 }
1360 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1361 if (retlen == (STRLEN)-1) {
1362 /* malformed UTF-8 */
1363 ENTER;
1364 SAVESPTR(PL_warnhook);
1365 PL_warnhook = PERL_WARNHOOK_FATAL;
1366 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1367 LEAVE;
1368 }
1369 return unichar;
1370 } else {
1371 if (s == bufend) {
1372 if (!lex_next_chunk(flags))
1373 return -1;
1374 s = PL_parser->bufptr;
1375 }
1376 return (U8)*s;
1377 }
1378}
1379
1380/*
1381=for apidoc Amx|I32|lex_read_unichar|U32 flags
1382
1383Reads the next (Unicode) character in the text currently being lexed.
1384Returns the codepoint (unsigned integer value) of the character read,
1385and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1386if lexing has reached the end of the input text. To non-destructively
1387examine the next character, use L</lex_peek_unichar> instead.
1388
1389If the next character is in (or extends into) the next chunk of input
1390text, the next chunk will be read in. Normally the current chunk will be
1391discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1392then the current chunk will not be discarded.
1393
1394If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1395is encountered, an exception is generated.
1396
1397=cut
1398*/
1399
1400I32
1401Perl_lex_read_unichar(pTHX_ U32 flags)
1402{
1403 I32 c;
1404 if (flags & ~(LEX_KEEP_PREVIOUS))
1405 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1406 c = lex_peek_unichar(flags);
1407 if (c != -1) {
1408 if (c == '\n')
1409 CopLINE_inc(PL_curcop);
1410 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1411 }
1412 return c;
1413}
1414
1415/*
1416=for apidoc Amx|void|lex_read_space|U32 flags
1417
1418Reads optional spaces, in Perl style, in the text currently being
1419lexed. The spaces may include ordinary whitespace characters and
1420Perl-style comments. C<#line> directives are processed if encountered.
1421L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1422at a non-space character (or the end of the input text).
1423
1424If spaces extend into the next chunk of input text, the next chunk will
1425be read in. Normally the current chunk will be discarded at the same
1426time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1427chunk will not be discarded.
1428
1429=cut
1430*/
1431
f0998909
Z
1432#define LEX_NO_NEXT_CHUNK 0x80000000
1433
f0e67a1d
Z
1434void
1435Perl_lex_read_space(pTHX_ U32 flags)
1436{
1437 char *s, *bufend;
1438 bool need_incline = 0;
f0998909 1439 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1440 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1441#ifdef PERL_MAD
1442 if (PL_skipwhite) {
1443 sv_free(PL_skipwhite);
1444 PL_skipwhite = NULL;
1445 }
1446 if (PL_madskills)
1447 PL_skipwhite = newSVpvs("");
1448#endif /* PERL_MAD */
1449 s = PL_parser->bufptr;
1450 bufend = PL_parser->bufend;
1451 while (1) {
1452 char c = *s;
1453 if (c == '#') {
1454 do {
1455 c = *++s;
1456 } while (!(c == '\n' || (c == 0 && s == bufend)));
1457 } else if (c == '\n') {
1458 s++;
1459 PL_parser->linestart = s;
1460 if (s == bufend)
1461 need_incline = 1;
1462 else
1463 incline(s);
1464 } else if (isSPACE(c)) {
1465 s++;
1466 } else if (c == 0 && s == bufend) {
1467 bool got_more;
1468#ifdef PERL_MAD
1469 if (PL_madskills)
1470 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1471#endif /* PERL_MAD */
f0998909
Z
1472 if (flags & LEX_NO_NEXT_CHUNK)
1473 break;
f0e67a1d
Z
1474 PL_parser->bufptr = s;
1475 CopLINE_inc(PL_curcop);
1476 got_more = lex_next_chunk(flags);
1477 CopLINE_dec(PL_curcop);
1478 s = PL_parser->bufptr;
1479 bufend = PL_parser->bufend;
1480 if (!got_more)
1481 break;
1482 if (need_incline && PL_parser->rsfp) {
1483 incline(s);
1484 need_incline = 0;
1485 }
1486 } else {
1487 break;
1488 }
1489 }
1490#ifdef PERL_MAD
1491 if (PL_madskills)
1492 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1493#endif /* PERL_MAD */
1494 PL_parser->bufptr = s;
1495}
1496
1497/*
ffb4593c
NT
1498 * S_incline
1499 * This subroutine has nothing to do with tilting, whether at windmills
1500 * or pinball tables. Its name is short for "increment line". It
57843af0 1501 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1502 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1503 * # line 500 "foo.pm"
1504 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1505 */
1506
76e3520e 1507STATIC void
d9095cec 1508S_incline(pTHX_ const char *s)
463ee0b2 1509{
97aff369 1510 dVAR;
d9095cec
NC
1511 const char *t;
1512 const char *n;
1513 const char *e;
8818d409 1514 line_t line_num;
463ee0b2 1515
7918f24d
NC
1516 PERL_ARGS_ASSERT_INCLINE;
1517
57843af0 1518 CopLINE_inc(PL_curcop);
463ee0b2
LW
1519 if (*s++ != '#')
1520 return;
d4c19fe8
AL
1521 while (SPACE_OR_TAB(*s))
1522 s++;
73659bf1
GS
1523 if (strnEQ(s, "line", 4))
1524 s += 4;
1525 else
1526 return;
084592ab 1527 if (SPACE_OR_TAB(*s))
73659bf1 1528 s++;
4e553d73 1529 else
73659bf1 1530 return;
d4c19fe8
AL
1531 while (SPACE_OR_TAB(*s))
1532 s++;
463ee0b2
LW
1533 if (!isDIGIT(*s))
1534 return;
d4c19fe8 1535
463ee0b2
LW
1536 n = s;
1537 while (isDIGIT(*s))
1538 s++;
07714eb4 1539 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1540 return;
bf4acbe4 1541 while (SPACE_OR_TAB(*s))
463ee0b2 1542 s++;
73659bf1 1543 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1544 s++;
73659bf1
GS
1545 e = t + 1;
1546 }
463ee0b2 1547 else {
c35e046a
AL
1548 t = s;
1549 while (!isSPACE(*t))
1550 t++;
73659bf1 1551 e = t;
463ee0b2 1552 }
bf4acbe4 1553 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1554 e++;
1555 if (*e != '\n' && *e != '\0')
1556 return; /* false alarm */
1557
8818d409
FC
1558 line_num = atoi(n)-1;
1559
f4dd75d9 1560 if (t - s > 0) {
d9095cec 1561 const STRLEN len = t - s;
19bad673
NC
1562 SV *const temp_sv = CopFILESV(PL_curcop);
1563 const char *cf;
1564 STRLEN tmplen;
1565
1566 if (temp_sv) {
1567 cf = SvPVX(temp_sv);
1568 tmplen = SvCUR(temp_sv);
1569 } else {
1570 cf = NULL;
1571 tmplen = 0;
1572 }
1573
42d9b98d 1574 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1575 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1576 * to *{"::_<newfilename"} */
44867030
NC
1577 /* However, the long form of evals is only turned on by the
1578 debugger - usually they're "(eval %lu)" */
1579 char smallbuf[128];
1580 char *tmpbuf;
1581 GV **gvp;
d9095cec 1582 STRLEN tmplen2 = len;
798b63bc 1583 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1584 tmpbuf = smallbuf;
1585 else
2ae0db35 1586 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1587 tmpbuf[0] = '_';
1588 tmpbuf[1] = '<';
2ae0db35 1589 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1590 tmplen += 2;
8a5ee598
RGS
1591 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1592 if (gvp) {
44867030
NC
1593 char *tmpbuf2;
1594 GV *gv2;
1595
1596 if (tmplen2 + 2 <= sizeof smallbuf)
1597 tmpbuf2 = smallbuf;
1598 else
1599 Newx(tmpbuf2, tmplen2 + 2, char);
1600
1601 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1602 /* Either they malloc'd it, or we malloc'd it,
1603 so no prefix is present in ours. */
1604 tmpbuf2[0] = '_';
1605 tmpbuf2[1] = '<';
1606 }
1607
1608 memcpy(tmpbuf2 + 2, s, tmplen2);
1609 tmplen2 += 2;
1610
8a5ee598 1611 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1612 if (!isGV(gv2)) {
8a5ee598 1613 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1614 /* adjust ${"::_<newfilename"} to store the new file name */
1615 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1616 /* The line number may differ. If that is the case,
1617 alias the saved lines that are in the array.
1618 Otherwise alias the whole array. */
1619 if (CopLINE(PL_curcop) == line_num) {
1620 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1621 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1622 }
1623 else if (GvAV(*gvp)) {
1624 AV * const av = GvAV(*gvp);
1625 const I32 start = CopLINE(PL_curcop)+1;
1626 I32 items = AvFILLp(av) - start;
1627 if (items > 0) {
1628 AV * const av2 = GvAVn(gv2);
1629 SV **svp = AvARRAY(av) + start;
1630 I32 l = (I32)line_num+1;
1631 while (items--)
1632 av_store(av2, l++, SvREFCNT_inc(*svp++));
1633 }
1634 }
e5527e4b 1635 }
44867030
NC
1636
1637 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1638 }
e66cf94c 1639 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1640 }
05ec9bb3 1641 CopFILE_free(PL_curcop);
d9095cec 1642 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1643 }
8818d409 1644 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1645}
1646
29595ff2 1647#ifdef PERL_MAD
cd81e915 1648/* skip space before PL_thistoken */
29595ff2
NC
1649
1650STATIC char *
1651S_skipspace0(pTHX_ register char *s)
1652{
7918f24d
NC
1653 PERL_ARGS_ASSERT_SKIPSPACE0;
1654
29595ff2
NC
1655 s = skipspace(s);
1656 if (!PL_madskills)
1657 return s;
cd81e915
NC
1658 if (PL_skipwhite) {
1659 if (!PL_thiswhite)
6b29d1f5 1660 PL_thiswhite = newSVpvs("");
cd81e915
NC
1661 sv_catsv(PL_thiswhite, PL_skipwhite);
1662 sv_free(PL_skipwhite);
1663 PL_skipwhite = 0;
1664 }
1665 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1666 return s;
1667}
1668
cd81e915 1669/* skip space after PL_thistoken */
29595ff2
NC
1670
1671STATIC char *
1672S_skipspace1(pTHX_ register char *s)
1673{
d4c19fe8 1674 const char *start = s;
29595ff2
NC
1675 I32 startoff = start - SvPVX(PL_linestr);
1676
7918f24d
NC
1677 PERL_ARGS_ASSERT_SKIPSPACE1;
1678
29595ff2
NC
1679 s = skipspace(s);
1680 if (!PL_madskills)
1681 return s;
1682 start = SvPVX(PL_linestr) + startoff;
cd81e915 1683 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1684 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1685 PL_thistoken = newSVpvn(tstart, start - tstart);
1686 }
1687 PL_realtokenstart = -1;
1688 if (PL_skipwhite) {
1689 if (!PL_nextwhite)
6b29d1f5 1690 PL_nextwhite = newSVpvs("");
cd81e915
NC
1691 sv_catsv(PL_nextwhite, PL_skipwhite);
1692 sv_free(PL_skipwhite);
1693 PL_skipwhite = 0;
29595ff2
NC
1694 }
1695 return s;
1696}
1697
1698STATIC char *
1699S_skipspace2(pTHX_ register char *s, SV **svp)
1700{
c35e046a
AL
1701 char *start;
1702 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1703 const I32 startoff = s - SvPVX(PL_linestr);
1704
7918f24d
NC
1705 PERL_ARGS_ASSERT_SKIPSPACE2;
1706
29595ff2
NC
1707 s = skipspace(s);
1708 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1709 if (!PL_madskills || !svp)
1710 return s;
1711 start = SvPVX(PL_linestr) + startoff;
cd81e915 1712 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1713 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1714 PL_thistoken = newSVpvn(tstart, start - tstart);
1715 PL_realtokenstart = -1;
29595ff2 1716 }
cd81e915 1717 if (PL_skipwhite) {
29595ff2 1718 if (!*svp)
6b29d1f5 1719 *svp = newSVpvs("");
cd81e915
NC
1720 sv_setsv(*svp, PL_skipwhite);
1721 sv_free(PL_skipwhite);
1722 PL_skipwhite = 0;
29595ff2
NC
1723 }
1724
1725 return s;
1726}
1727#endif
1728
80a702cd 1729STATIC void
15f169a1 1730S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1731{
1732 AV *av = CopFILEAVx(PL_curcop);
1733 if (av) {
b9f83d2f 1734 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1735 if (orig_sv)
1736 sv_setsv(sv, orig_sv);
1737 else
1738 sv_setpvn(sv, buf, len);
80a702cd
RGS
1739 (void)SvIOK_on(sv);
1740 SvIV_set(sv, 0);
1741 av_store(av, (I32)CopLINE(PL_curcop), sv);
1742 }
1743}
1744
ffb4593c
NT
1745/*
1746 * S_skipspace
1747 * Called to gobble the appropriate amount and type of whitespace.
1748 * Skips comments as well.
1749 */
1750
76e3520e 1751STATIC char *
cea2e8a9 1752S_skipspace(pTHX_ register char *s)
a687059c 1753{
5db06880 1754#ifdef PERL_MAD
f0e67a1d
Z
1755 char *start = s;
1756#endif /* PERL_MAD */
7918f24d 1757 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1758#ifdef PERL_MAD
cd81e915
NC
1759 if (PL_skipwhite) {
1760 sv_free(PL_skipwhite);
f0e67a1d 1761 PL_skipwhite = NULL;
5db06880 1762 }
f0e67a1d 1763#endif /* PERL_MAD */
3280af22 1764 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1765 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1766 s++;
f0e67a1d
Z
1767 } else {
1768 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1769 PL_bufptr = s;
f0998909
Z
1770 lex_read_space(LEX_KEEP_PREVIOUS |
1771 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1772 LEX_NO_NEXT_CHUNK : 0));
3280af22 1773 s = PL_bufptr;
f0e67a1d
Z
1774 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1775 if (PL_linestart > PL_bufptr)
1776 PL_bufptr = PL_linestart;
1777 return s;
463ee0b2 1778 }
5db06880 1779#ifdef PERL_MAD
f0e67a1d
Z
1780 if (PL_madskills)
1781 PL_skipwhite = newSVpvn(start, s-start);
1782#endif /* PERL_MAD */
5db06880 1783 return s;
a687059c 1784}
378cc40b 1785
ffb4593c
NT
1786/*
1787 * S_check_uni
1788 * Check the unary operators to ensure there's no ambiguity in how they're
1789 * used. An ambiguous piece of code would be:
1790 * rand + 5
1791 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1792 * the +5 is its argument.
1793 */
1794
76e3520e 1795STATIC void
cea2e8a9 1796S_check_uni(pTHX)
ba106d47 1797{
97aff369 1798 dVAR;
d4c19fe8
AL
1799 const char *s;
1800 const char *t;
2f3197b3 1801
3280af22 1802 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1803 return;
3280af22
NIS
1804 while (isSPACE(*PL_last_uni))
1805 PL_last_uni++;
c35e046a
AL
1806 s = PL_last_uni;
1807 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1808 s++;
3280af22 1809 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1810 return;
6136c704 1811
9b387841
NC
1812 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1813 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1814 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1815}
1816
ffb4593c
NT
1817/*
1818 * LOP : macro to build a list operator. Its behaviour has been replaced
1819 * with a subroutine, S_lop() for which LOP is just another name.
1820 */
1821
a0d0e21e
LW
1822#define LOP(f,x) return lop(f,x,s)
1823
ffb4593c
NT
1824/*
1825 * S_lop
1826 * Build a list operator (or something that might be one). The rules:
1827 * - if we have a next token, then it's a list operator [why?]
1828 * - if the next thing is an opening paren, then it's a function
1829 * - else it's a list operator
1830 */
1831
76e3520e 1832STATIC I32
a0be28da 1833S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1834{
97aff369 1835 dVAR;
7918f24d
NC
1836
1837 PERL_ARGS_ASSERT_LOP;
1838
6154021b 1839 pl_yylval.ival = f;
35c8bce7 1840 CLINE;
3280af22
NIS
1841 PL_expect = x;
1842 PL_bufptr = s;
1843 PL_last_lop = PL_oldbufptr;
eb160463 1844 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1845#ifdef PERL_MAD
1846 if (PL_lasttoke)
78cdf107 1847 goto lstop;
5db06880 1848#else
3280af22 1849 if (PL_nexttoke)
78cdf107 1850 goto lstop;
5db06880 1851#endif
79072805 1852 if (*s == '(')
bbf60fe6 1853 return REPORT(FUNC);
29595ff2 1854 s = PEEKSPACE(s);
79072805 1855 if (*s == '(')
bbf60fe6 1856 return REPORT(FUNC);
78cdf107
Z
1857 else {
1858 lstop:
1859 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1860 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1861 return REPORT(LSTOP);
78cdf107 1862 }
79072805
LW
1863}
1864
5db06880
NC
1865#ifdef PERL_MAD
1866 /*
1867 * S_start_force
1868 * Sets up for an eventual force_next(). start_force(0) basically does
1869 * an unshift, while start_force(-1) does a push. yylex removes items
1870 * on the "pop" end.
1871 */
1872
1873STATIC void
1874S_start_force(pTHX_ int where)
1875{
1876 int i;
1877
cd81e915 1878 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1879 where = PL_lasttoke;
cd81e915
NC
1880 assert(PL_curforce < 0 || PL_curforce == where);
1881 if (PL_curforce != where) {
5db06880
NC
1882 for (i = PL_lasttoke; i > where; --i) {
1883 PL_nexttoke[i] = PL_nexttoke[i-1];
1884 }
1885 PL_lasttoke++;
1886 }
cd81e915 1887 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1888 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1889 PL_curforce = where;
1890 if (PL_nextwhite) {
5db06880 1891 if (PL_madskills)
6b29d1f5 1892 curmad('^', newSVpvs(""));
cd81e915 1893 CURMAD('_', PL_nextwhite);
5db06880
NC
1894 }
1895}
1896
1897STATIC void
1898S_curmad(pTHX_ char slot, SV *sv)
1899{
1900 MADPROP **where;
1901
1902 if (!sv)
1903 return;
cd81e915
NC
1904 if (PL_curforce < 0)
1905 where = &PL_thismad;
5db06880 1906 else
cd81e915 1907 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1908
cd81e915 1909 if (PL_faketokens)
76f68e9b 1910 sv_setpvs(sv, "");
5db06880
NC
1911 else {
1912 if (!IN_BYTES) {
1913 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1914 SvUTF8_on(sv);
1915 else if (PL_encoding) {
1916 sv_recode_to_utf8(sv, PL_encoding);
1917 }
1918 }
1919 }
1920
1921 /* keep a slot open for the head of the list? */
1922 if (slot != '_' && *where && (*where)->mad_key == '^') {
1923 (*where)->mad_key = slot;
daba3364 1924 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1925 (*where)->mad_val = (void*)sv;
1926 }
1927 else
1928 addmad(newMADsv(slot, sv), where, 0);
1929}
1930#else
b3f24c00
MHM
1931# define start_force(where) NOOP
1932# define curmad(slot, sv) NOOP
5db06880
NC
1933#endif
1934
ffb4593c
NT
1935/*
1936 * S_force_next
9cbb5ea2 1937 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1938 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1939 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1940 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1941 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1942 */
1943
4e553d73 1944STATIC void
cea2e8a9 1945S_force_next(pTHX_ I32 type)
79072805 1946{
97aff369 1947 dVAR;
704d4215
GG
1948#ifdef DEBUGGING
1949 if (DEBUG_T_TEST) {
1950 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1951 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1952 }
1953#endif
5db06880 1954#ifdef PERL_MAD
cd81e915 1955 if (PL_curforce < 0)
5db06880 1956 start_force(PL_lasttoke);
cd81e915 1957 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1958 if (PL_lex_state != LEX_KNOWNEXT)
1959 PL_lex_defer = PL_lex_state;
1960 PL_lex_state = LEX_KNOWNEXT;
1961 PL_lex_expect = PL_expect;
cd81e915 1962 PL_curforce = -1;
5db06880 1963#else
3280af22
NIS
1964 PL_nexttype[PL_nexttoke] = type;
1965 PL_nexttoke++;
1966 if (PL_lex_state != LEX_KNOWNEXT) {
1967 PL_lex_defer = PL_lex_state;
1968 PL_lex_expect = PL_expect;
1969 PL_lex_state = LEX_KNOWNEXT;
79072805 1970 }
5db06880 1971#endif
79072805
LW
1972}
1973
28ac2b49
Z
1974void
1975Perl_yyunlex(pTHX)
1976{
a7aaec61
Z
1977 int yyc = PL_parser->yychar;
1978 if (yyc != YYEMPTY) {
1979 if (yyc) {
1980 start_force(-1);
1981 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1982 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1983 PL_lex_allbrackets--;
a7aaec61 1984 PL_lex_brackets--;
78cdf107
Z
1985 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1986 } else if (yyc == '('/*)*/) {
1987 PL_lex_allbrackets--;
1988 yyc |= (2<<24);
a7aaec61
Z
1989 }
1990 force_next(yyc);
1991 }
28ac2b49
Z
1992 PL_parser->yychar = YYEMPTY;
1993 }
1994}
1995
d0a148a6 1996STATIC SV *
15f169a1 1997S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1998{
97aff369 1999 dVAR;
740cce10 2000 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2001 !IN_BYTES
2002 && UTF
2003 && !is_ascii_string((const U8*)start, len)
740cce10 2004 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2005 return sv;
2006}
2007
ffb4593c
NT
2008/*
2009 * S_force_word
2010 * When the lexer knows the next thing is a word (for instance, it has
2011 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2012 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2013 * lookahead.
ffb4593c
NT
2014 *
2015 * Arguments:
b1b65b59 2016 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2017 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2018 * int check_keyword : if true, Perl checks to make sure the word isn't
2019 * a keyword (do this if the word is a label, e.g. goto FOO)
2020 * int allow_pack : if true, : characters will also be allowed (require,
2021 * use, etc. do this)
9cbb5ea2 2022 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2023 */
2024
76e3520e 2025STATIC char *
cea2e8a9 2026S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2027{
97aff369 2028 dVAR;
463ee0b2
LW
2029 register char *s;
2030 STRLEN len;
4e553d73 2031
7918f24d
NC
2032 PERL_ARGS_ASSERT_FORCE_WORD;
2033
29595ff2 2034 start = SKIPSPACE1(start);
463ee0b2 2035 s = start;
7e2040f0 2036 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2037 (allow_pack && *s == ':') ||
15f0808c 2038 (allow_initial_tick && *s == '\'') )
a0d0e21e 2039 {
3280af22 2040 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2041 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2042 return start;
cd81e915 2043 start_force(PL_curforce);
5db06880
NC
2044 if (PL_madskills)
2045 curmad('X', newSVpvn(start,s-start));
463ee0b2 2046 if (token == METHOD) {
29595ff2 2047 s = SKIPSPACE1(s);
463ee0b2 2048 if (*s == '(')
3280af22 2049 PL_expect = XTERM;
463ee0b2 2050 else {
3280af22 2051 PL_expect = XOPERATOR;
463ee0b2 2052 }
79072805 2053 }
e74e6b3d 2054 if (PL_madskills)
63575281 2055 curmad('g', newSVpvs( "forced" ));
9ded7720 2056 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2057 = (OP*)newSVOP(OP_CONST,0,
2058 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2059 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2060 force_next(token);
2061 }
2062 return s;
2063}
2064
ffb4593c
NT
2065/*
2066 * S_force_ident
9cbb5ea2 2067 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2068 * text only contains the "foo" portion. The first argument is a pointer
2069 * to the "foo", and the second argument is the type symbol to prefix.
2070 * Forces the next token to be a "WORD".
9cbb5ea2 2071 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2072 */
2073
76e3520e 2074STATIC void
bfed75c6 2075S_force_ident(pTHX_ register const char *s, int kind)
79072805 2076{
97aff369 2077 dVAR;
7918f24d
NC
2078
2079 PERL_ARGS_ASSERT_FORCE_IDENT;
2080
c35e046a 2081 if (*s) {
90e5519e
NC
2082 const STRLEN len = strlen(s);
2083 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2084 start_force(PL_curforce);
9ded7720 2085 NEXTVAL_NEXTTOKE.opval = o;
79072805 2086 force_next(WORD);
748a9306 2087 if (kind) {
11343788 2088 o->op_private = OPpCONST_ENTERED;
55497cff 2089 /* XXX see note in pp_entereval() for why we forgo typo
2090 warnings if the symbol must be introduced in an eval.
2091 GSAR 96-10-12 */
90e5519e
NC
2092 gv_fetchpvn_flags(s, len,
2093 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2094 : GV_ADD,
2095 kind == '$' ? SVt_PV :
2096 kind == '@' ? SVt_PVAV :
2097 kind == '%' ? SVt_PVHV :
a0d0e21e 2098 SVt_PVGV
90e5519e 2099 );
748a9306 2100 }
79072805
LW
2101 }
2102}
2103
1571675a
GS
2104NV
2105Perl_str_to_version(pTHX_ SV *sv)
2106{
2107 NV retval = 0.0;
2108 NV nshift = 1.0;
2109 STRLEN len;
cfd0369c 2110 const char *start = SvPV_const(sv,len);
9d4ba2ae 2111 const char * const end = start + len;
504618e9 2112 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2113
2114 PERL_ARGS_ASSERT_STR_TO_VERSION;
2115
1571675a 2116 while (start < end) {
ba210ebe 2117 STRLEN skip;
1571675a
GS
2118 UV n;
2119 if (utf)
9041c2e3 2120 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2121 else {
2122 n = *(U8*)start;
2123 skip = 1;
2124 }
2125 retval += ((NV)n)/nshift;
2126 start += skip;
2127 nshift *= 1000;
2128 }
2129 return retval;
2130}
2131
4e553d73 2132/*
ffb4593c
NT
2133 * S_force_version
2134 * Forces the next token to be a version number.
e759cc13
RGS
2135 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2136 * and if "guessing" is TRUE, then no new token is created (and the caller
2137 * must use an alternative parsing method).
ffb4593c
NT
2138 */
2139
76e3520e 2140STATIC char *
e759cc13 2141S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2142{
97aff369 2143 dVAR;
5f66b61c 2144 OP *version = NULL;
44dcb63b 2145 char *d;
5db06880
NC
2146#ifdef PERL_MAD
2147 I32 startoff = s - SvPVX(PL_linestr);
2148#endif
89bfa8cd 2149
7918f24d
NC
2150 PERL_ARGS_ASSERT_FORCE_VERSION;
2151
29595ff2 2152 s = SKIPSPACE1(s);
89bfa8cd 2153
44dcb63b 2154 d = s;
dd629d5b 2155 if (*d == 'v')
44dcb63b 2156 d++;
44dcb63b 2157 if (isDIGIT(*d)) {
e759cc13
RGS
2158 while (isDIGIT(*d) || *d == '_' || *d == '.')
2159 d++;
5db06880
NC
2160#ifdef PERL_MAD
2161 if (PL_madskills) {
cd81e915 2162 start_force(PL_curforce);
5db06880
NC
2163 curmad('X', newSVpvn(s,d-s));
2164 }
2165#endif
4e4da3ac 2166 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2167 SV *ver;
8d08d9ba
DG
2168#ifdef USE_LOCALE_NUMERIC
2169 char *loc = setlocale(LC_NUMERIC, "C");
2170#endif
6154021b 2171 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2172#ifdef USE_LOCALE_NUMERIC
2173 setlocale(LC_NUMERIC, loc);
2174#endif
6154021b 2175 version = pl_yylval.opval;
dd629d5b
GS
2176 ver = cSVOPx(version)->op_sv;
2177 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2178 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2179 SvNV_set(ver, str_to_version(ver));
1571675a 2180 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2181 }
89bfa8cd 2182 }
5db06880
NC
2183 else if (guessing) {
2184#ifdef PERL_MAD
2185 if (PL_madskills) {
cd81e915
NC
2186 sv_free(PL_nextwhite); /* let next token collect whitespace */
2187 PL_nextwhite = 0;
5db06880
NC
2188 s = SvPVX(PL_linestr) + startoff;
2189 }
2190#endif
e759cc13 2191 return s;
5db06880 2192 }
89bfa8cd 2193 }
2194
5db06880
NC
2195#ifdef PERL_MAD
2196 if (PL_madskills && !version) {
cd81e915
NC
2197 sv_free(PL_nextwhite); /* let next token collect whitespace */
2198 PL_nextwhite = 0;
5db06880
NC
2199 s = SvPVX(PL_linestr) + startoff;
2200 }
2201#endif
89bfa8cd 2202 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2203 start_force(PL_curforce);
9ded7720 2204 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2205 force_next(WORD);
89bfa8cd 2206
e759cc13 2207 return s;
89bfa8cd 2208}
2209
ffb4593c 2210/*
91152fc1
DG
2211 * S_force_strict_version
2212 * Forces the next token to be a version number using strict syntax rules.
2213 */
2214
2215STATIC char *
2216S_force_strict_version(pTHX_ char *s)
2217{
2218 dVAR;
2219 OP *version = NULL;
2220#ifdef PERL_MAD
2221 I32 startoff = s - SvPVX(PL_linestr);
2222#endif
2223 const char *errstr = NULL;
2224
2225 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2226
2227 while (isSPACE(*s)) /* leading whitespace */
2228 s++;
2229
2230 if (is_STRICT_VERSION(s,&errstr)) {
2231 SV *ver = newSV(0);
2232 s = (char *)scan_version(s, ver, 0);
2233 version = newSVOP(OP_CONST, 0, ver);
2234 }
4e4da3ac
Z
2235 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2236 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2237 {
91152fc1
DG
2238 PL_bufptr = s;
2239 if (errstr)
2240 yyerror(errstr); /* version required */
2241 return s;
2242 }
2243
2244#ifdef PERL_MAD
2245 if (PL_madskills && !version) {
2246 sv_free(PL_nextwhite); /* let next token collect whitespace */
2247 PL_nextwhite = 0;
2248 s = SvPVX(PL_linestr) + startoff;
2249 }
2250#endif
2251 /* NOTE: The parser sees the package name and the VERSION swapped */
2252 start_force(PL_curforce);
2253 NEXTVAL_NEXTTOKE.opval = version;
2254 force_next(WORD);
2255
2256 return s;
2257}
2258
2259/*
ffb4593c
NT
2260 * S_tokeq
2261 * Tokenize a quoted string passed in as an SV. It finds the next
2262 * chunk, up to end of string or a backslash. It may make a new
2263 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2264 * turns \\ into \.
2265 */
2266
76e3520e 2267STATIC SV *
cea2e8a9 2268S_tokeq(pTHX_ SV *sv)
79072805 2269{
97aff369 2270 dVAR;
79072805
LW
2271 register char *s;
2272 register char *send;
2273 register char *d;
b3ac6de7
IZ
2274 STRLEN len = 0;
2275 SV *pv = sv;
79072805 2276
7918f24d
NC
2277 PERL_ARGS_ASSERT_TOKEQ;
2278
79072805 2279 if (!SvLEN(sv))
b3ac6de7 2280 goto finish;
79072805 2281
a0d0e21e 2282 s = SvPV_force(sv, len);
21a311ee 2283 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2284 goto finish;
463ee0b2 2285 send = s + len;
dcb21ed6
NC
2286 /* This is relying on the SV being "well formed" with a trailing '\0' */
2287 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2288 s++;
2289 if (s == send)
b3ac6de7 2290 goto finish;
79072805 2291 d = s;
be4731d2 2292 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2293 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2294 }
79072805
LW
2295 while (s < send) {
2296 if (*s == '\\') {
a0d0e21e 2297 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2298 s++; /* all that, just for this */
2299 }
2300 *d++ = *s++;
2301 }
2302 *d = '\0';
95a20fc0 2303 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2304 finish:
3280af22 2305 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2306 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2307 return sv;
2308}
2309
ffb4593c
NT
2310/*
2311 * Now come three functions related to double-quote context,
2312 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2313 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2314 * interact with PL_lex_state, and create fake ( ... ) argument lists
2315 * to handle functions and concatenation.
2316 * They assume that whoever calls them will be setting up a fake
2317 * join call, because each subthing puts a ',' after it. This lets
2318 * "lower \luPpEr"
2319 * become
2320 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2321 *
2322 * (I'm not sure whether the spurious commas at the end of lcfirst's
2323 * arguments and join's arguments are created or not).
2324 */
2325
2326/*
2327 * S_sublex_start
6154021b 2328 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2329 *
2330 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2331 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2332 *
2333 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2334 *
2335 * Everything else becomes a FUNC.
2336 *
2337 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2338 * had an OP_CONST or OP_READLINE). This just sets us up for a
2339 * call to S_sublex_push().
2340 */
2341
76e3520e 2342STATIC I32
cea2e8a9 2343S_sublex_start(pTHX)
79072805 2344{
97aff369 2345 dVAR;
6154021b 2346 register const I32 op_type = pl_yylval.ival;
79072805
LW
2347
2348 if (op_type == OP_NULL) {
6154021b 2349 pl_yylval.opval = PL_lex_op;
5f66b61c 2350 PL_lex_op = NULL;
79072805
LW
2351 return THING;
2352 }
2353 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2354 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2355
2356 if (SvTYPE(sv) == SVt_PVIV) {
2357 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2358 STRLEN len;
96a5add6 2359 const char * const p = SvPV_const(sv, len);
740cce10 2360 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2361 SvREFCNT_dec(sv);
2362 sv = nsv;
4e553d73 2363 }
6154021b 2364 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2365 PL_lex_stuff = NULL;
6f33ba73
RGS
2366 /* Allow <FH> // "foo" */
2367 if (op_type == OP_READLINE)
2368 PL_expect = XTERMORDORDOR;
79072805
LW
2369 return THING;
2370 }
e3f73d4e
RGS
2371 else if (op_type == OP_BACKTICK && PL_lex_op) {
2372 /* readpipe() vas overriden */
2373 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2374 pl_yylval.opval = PL_lex_op;
9b201d7d 2375 PL_lex_op = NULL;
e3f73d4e
RGS
2376 PL_lex_stuff = NULL;
2377 return THING;
2378 }
79072805 2379
3280af22 2380 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2381 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2382 PL_sublex_info.sub_op = PL_lex_op;
2383 PL_lex_state = LEX_INTERPPUSH;
55497cff 2384
3280af22
NIS
2385 PL_expect = XTERM;
2386 if (PL_lex_op) {
6154021b 2387 pl_yylval.opval = PL_lex_op;
5f66b61c 2388 PL_lex_op = NULL;
55497cff 2389 return PMFUNC;
2390 }
2391 else
2392 return FUNC;
2393}
2394
ffb4593c
NT
2395/*
2396 * S_sublex_push
2397 * Create a new scope to save the lexing state. The scope will be
2398 * ended in S_sublex_done. Returns a '(', starting the function arguments
2399 * to the uc, lc, etc. found before.
2400 * Sets PL_lex_state to LEX_INTERPCONCAT.
2401 */
2402
76e3520e 2403STATIC I32
cea2e8a9 2404S_sublex_push(pTHX)
55497cff 2405{
27da23d5 2406 dVAR;
f46d017c 2407 ENTER;
55497cff 2408
3280af22 2409 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2410 SAVEBOOL(PL_lex_dojoin);
3280af22 2411 SAVEI32(PL_lex_brackets);
78cdf107
Z
2412 SAVEI32(PL_lex_allbrackets);
2413 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2414 SAVEI32(PL_lex_casemods);
2415 SAVEI32(PL_lex_starts);
651b5b28 2416 SAVEI8(PL_lex_state);
7766f137 2417 SAVEVPTR(PL_lex_inpat);
98246f1e 2418 SAVEI16(PL_lex_inwhat);
57843af0 2419 SAVECOPLINE(PL_curcop);
3280af22 2420 SAVEPPTR(PL_bufptr);
8452ff4b 2421 SAVEPPTR(PL_bufend);
3280af22
NIS
2422 SAVEPPTR(PL_oldbufptr);
2423 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2424 SAVEPPTR(PL_last_lop);
2425 SAVEPPTR(PL_last_uni);
3280af22
NIS
2426 SAVEPPTR(PL_linestart);
2427 SAVESPTR(PL_linestr);
8edd5f42
RGS
2428 SAVEGENERICPV(PL_lex_brackstack);
2429 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2430
2431 PL_linestr = PL_lex_stuff;
a0714e2c 2432 PL_lex_stuff = NULL;
3280af22 2433
9cbb5ea2
GS
2434 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2435 = SvPVX(PL_linestr);
3280af22 2436 PL_bufend += SvCUR(PL_linestr);
bd61b366 2437 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2438 SAVEFREESV(PL_linestr);
2439
2440 PL_lex_dojoin = FALSE;
2441 PL_lex_brackets = 0;
78cdf107
Z
2442 PL_lex_allbrackets = 0;
2443 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2444 Newx(PL_lex_brackstack, 120, char);
2445 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2446 PL_lex_casemods = 0;
2447 *PL_lex_casestack = '\0';
2448 PL_lex_starts = 0;
2449 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2450 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2451
2452 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2453 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2454 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2455 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2456 else
5f66b61c 2457 PL_lex_inpat = NULL;
79072805 2458
55497cff 2459 return '(';
79072805
LW
2460}
2461
ffb4593c
NT
2462/*
2463 * S_sublex_done
2464 * Restores lexer state after a S_sublex_push.
2465 */
2466
76e3520e 2467STATIC I32
cea2e8a9 2468S_sublex_done(pTHX)
79072805 2469{
27da23d5 2470 dVAR;
3280af22 2471 if (!PL_lex_starts++) {
396482e1 2472 SV * const sv = newSVpvs("");
9aa983d2
JH
2473 if (SvUTF8(PL_linestr))
2474 SvUTF8_on(sv);
3280af22 2475 PL_expect = XOPERATOR;
6154021b 2476 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2477 return THING;
2478 }
2479
3280af22
NIS
2480 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2481 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2482 return yylex();
79072805
LW
2483 }
2484
ffb4593c 2485 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2486 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2487 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2488 PL_linestr = PL_lex_repl;
2489 PL_lex_inpat = 0;
2490 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2491 PL_bufend += SvCUR(PL_linestr);
bd61b366 2492 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2493 SAVEFREESV(PL_linestr);
2494 PL_lex_dojoin = FALSE;
2495 PL_lex_brackets = 0;
78cdf107
Z
2496 PL_lex_allbrackets = 0;
2497 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2498 PL_lex_casemods = 0;
2499 *PL_lex_casestack = '\0';
2500 PL_lex_starts = 0;
25da4f38 2501 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2502 PL_lex_state = LEX_INTERPNORMAL;
2503 PL_lex_starts++;
e9fa98b2
HS
2504 /* we don't clear PL_lex_repl here, so that we can check later
2505 whether this is an evalled subst; that means we rely on the
2506 logic to ensure sublex_done() is called again only via the
2507 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2508 }
e9fa98b2 2509 else {
3280af22 2510 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2511 PL_lex_repl = NULL;
e9fa98b2 2512 }
79072805 2513 return ',';
ffed7fef
LW
2514 }
2515 else {
5db06880
NC
2516#ifdef PERL_MAD
2517 if (PL_madskills) {
cd81e915
NC
2518 if (PL_thiswhite) {
2519 if (!PL_endwhite)
6b29d1f5 2520 PL_endwhite = newSVpvs("");
cd81e915
NC
2521 sv_catsv(PL_endwhite, PL_thiswhite);
2522 PL_thiswhite = 0;
2523 }
2524 if (PL_thistoken)
76f68e9b 2525 sv_setpvs(PL_thistoken,"");
5db06880 2526 else
cd81e915 2527 PL_realtokenstart = -1;
5db06880
NC
2528 }
2529#endif
f46d017c 2530 LEAVE;
3280af22
NIS
2531 PL_bufend = SvPVX(PL_linestr);
2532 PL_bufend += SvCUR(PL_linestr);
2533 PL_expect = XOPERATOR;
09bef843 2534 PL_sublex_info.sub_inwhat = 0;
79072805 2535 return ')';
ffed7fef
LW
2536 }
2537}
2538
02aa26ce
NT
2539/*
2540 scan_const
2541
2542 Extracts a pattern, double-quoted string, or transliteration. This
2543 is terrifying code.
2544
94def140 2545 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2546 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2547 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2548
94def140
TS
2549 Returns a pointer to the character scanned up to. If this is
2550 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2551 successfully parsed), will leave an OP for the substring scanned
6154021b 2552 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2553 by looking at the next characters herself.
2554
02aa26ce
NT
2555 In patterns:
2556 backslashes:
ff3f963a 2557 constants: \N{NAME} only
02aa26ce
NT
2558 case and quoting: \U \Q \E
2559 stops on @ and $, but not for $ as tail anchor
2560
2561 In transliterations:
2562 characters are VERY literal, except for - not at the start or end
94def140
TS
2563 of the string, which indicates a range. If the range is in bytes,
2564 scan_const expands the range to the full set of intermediate
2565 characters. If the range is in utf8, the hyphen is replaced with
2566 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2567
2568 In double-quoted strings:
2569 backslashes:
2570 double-quoted style: \r and \n
ff3f963a 2571 constants: \x31, etc.
94def140 2572 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2573 case and quoting: \U \Q \E
2574 stops on @ and $
2575
2576 scan_const does *not* construct ops to handle interpolated strings.
2577 It stops processing as soon as it finds an embedded $ or @ variable
2578 and leaves it to the caller to work out what's going on.
2579
94def140
TS
2580 embedded arrays (whether in pattern or not) could be:
2581 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2582
2583 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2584
2585 $ in pattern could be $foo or could be tail anchor. Assumption:
2586 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2587 followed by one of "()| \r\n\t"
02aa26ce
NT
2588
2589 \1 (backreferences) are turned into $1
2590
2591 The structure of the code is
2592 while (there's a character to process) {
94def140
TS
2593 handle transliteration ranges
2594 skip regexp comments /(?#comment)/ and codes /(?{code})/
2595 skip #-initiated comments in //x patterns
2596 check for embedded arrays
02aa26ce
NT
2597 check for embedded scalars
2598 if (backslash) {
94def140 2599 deprecate \1 in substitution replacements
02aa26ce
NT
2600 handle string-changing backslashes \l \U \Q \E, etc.
2601 switch (what was escaped) {
94def140 2602 handle \- in a transliteration (becomes a literal -)
ff3f963a 2603 if a pattern and not \N{, go treat as regular character
94def140
TS
2604 handle \132 (octal characters)
2605 handle \x15 and \x{1234} (hex characters)
ff3f963a 2606 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2607 handle \cV (control characters)
2608 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2609 } (end switch)
77a135fe 2610 continue
02aa26ce 2611 } (end if backslash)
77a135fe 2612 handle regular character
02aa26ce 2613 } (end while character to read)
4e553d73 2614
02aa26ce
NT
2615*/
2616
76e3520e 2617STATIC char *
cea2e8a9 2618S_scan_const(pTHX_ char *start)
79072805 2619{
97aff369 2620 dVAR;
3280af22 2621 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2622 SV *sv = newSV(send - start); /* sv for the constant. See
2623 note below on sizing. */
02aa26ce
NT
2624 register char *s = start; /* start of the constant */
2625 register char *d = SvPVX(sv); /* destination for copies */
2626 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2627 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2628 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2629 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2630 to be UTF8? But, this can
2631 show as true when the source
2632 isn't utf8, as for example
2633 when it is entirely composed
2634 of hex constants */
2635
2636 /* Note on sizing: The scanned constant is placed into sv, which is
2637 * initialized by newSV() assuming one byte of output for every byte of
2638 * input. This routine expects newSV() to allocate an extra byte for a
2639 * trailing NUL, which this routine will append if it gets to the end of
2640 * the input. There may be more bytes of input than output (eg., \N{LATIN
2641 * CAPITAL LETTER A}), or more output than input if the constant ends up
2642 * recoded to utf8, but each time a construct is found that might increase
2643 * the needed size, SvGROW() is called. Its size parameter each time is
2644 * based on the best guess estimate at the time, namely the length used so
2645 * far, plus the length the current construct will occupy, plus room for
2646 * the trailing NUL, plus one byte for every input byte still unscanned */
2647
012bcf8d 2648 UV uv;
4c3a8340
TS
2649#ifdef EBCDIC
2650 UV literal_endpoint = 0;
e294cc5d 2651 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2652#endif
012bcf8d 2653
7918f24d
NC
2654 PERL_ARGS_ASSERT_SCAN_CONST;
2655
bb16bae8 2656 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2657 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2658 /* If we are doing a trans and we know we want UTF8 set expectation */
2659 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2660 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2661 }
2662
2663
79072805 2664 while (s < send || dorange) {
ff3f963a 2665
02aa26ce 2666 /* get transliterations out of the way (they're most literal) */
3280af22 2667 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2668 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2669 if (dorange) {
1ba5c669
JH
2670 I32 i; /* current expanded character */
2671 I32 min; /* first character in range */
2672 I32 max; /* last character in range */
02aa26ce 2673
e294cc5d
JH
2674#ifdef EBCDIC
2675 UV uvmax = 0;
2676#endif
2677
2678 if (has_utf8
2679#ifdef EBCDIC
2680 && !native_range
2681#endif
2682 ) {
9d4ba2ae 2683 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2684 char *e = d++;
2685 while (e-- > c)
2686 *(e + 1) = *e;
25716404 2687 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2688 /* mark the range as done, and continue */
2689 dorange = FALSE;
2690 didrange = TRUE;
2691 continue;
2692 }
2b9d42f0 2693
95a20fc0 2694 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2695#ifdef EBCDIC
2696 SvGROW(sv,
2697 SvLEN(sv) + (has_utf8 ?
2698 (512 - UTF_CONTINUATION_MARK +
2699 UNISKIP(0x100))
2700 : 256));
2701 /* How many two-byte within 0..255: 128 in UTF-8,
2702 * 96 in UTF-8-mod. */
2703#else
9cbb5ea2 2704 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2705#endif
9cbb5ea2 2706 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2707#ifdef EBCDIC
2708 if (has_utf8) {
2709 int j;
2710 for (j = 0; j <= 1; j++) {
2711 char * const c = (char*)utf8_hop((U8*)d, -1);
2712 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2713 if (j)
2714 min = (U8)uv;
2715 else if (uv < 256)
2716 max = (U8)uv;
2717 else {
2718 max = (U8)0xff; /* only to \xff */
2719 uvmax = uv; /* \x{100} to uvmax */
2720 }
2721 d = c; /* eat endpoint chars */
2722 }
2723 }
2724 else {
2725#endif
2726 d -= 2; /* eat the first char and the - */
2727 min = (U8)*d; /* first char in range */
2728 max = (U8)d[1]; /* last char in range */
2729#ifdef EBCDIC
2730 }
2731#endif
8ada0baa 2732
c2e66d9e 2733 if (min > max) {
01ec43d0 2734 Perl_croak(aTHX_
d1573ac7 2735 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2736 (char)min, (char)max);
c2e66d9e
GS
2737 }
2738
c7f1f016 2739#ifdef EBCDIC
4c3a8340
TS
2740 if (literal_endpoint == 2 &&
2741 ((isLOWER(min) && isLOWER(max)) ||
2742 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2743 if (isLOWER(min)) {
2744 for (i = min; i <= max; i++)
2745 if (isLOWER(i))
db42d148 2746 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2747 } else {
2748 for (i = min; i <= max; i++)
2749 if (isUPPER(i))
db42d148 2750 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2751 }
2752 }
2753 else
2754#endif
2755 for (i = min; i <= max; i++)
e294cc5d
JH
2756#ifdef EBCDIC
2757 if (has_utf8) {
2758 const U8 ch = (U8)NATIVE_TO_UTF(i);
2759 if (UNI_IS_INVARIANT(ch))
2760 *d++ = (U8)i;
2761 else {
2762 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2763 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2764 }
2765 }
2766 else
2767#endif
2768 *d++ = (char)i;
2769
2770#ifdef EBCDIC
2771 if (uvmax) {
2772 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2773 if (uvmax > 0x101)
2774 *d++ = (char)UTF_TO_NATIVE(0xff);
2775 if (uvmax > 0x100)
2776 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2777 }
2778#endif
02aa26ce
NT
2779
2780 /* mark the range as done, and continue */
79072805 2781 dorange = FALSE;
01ec43d0 2782 didrange = TRUE;
4c3a8340
TS
2783#ifdef EBCDIC
2784 literal_endpoint = 0;
2785#endif
79072805 2786 continue;
4e553d73 2787 }
02aa26ce
NT
2788
2789 /* range begins (ignore - as first or last char) */
79072805 2790 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2791 if (didrange) {
1fafa243 2792 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2793 }
e294cc5d
JH
2794 if (has_utf8
2795#ifdef EBCDIC
2796 && !native_range
2797#endif
2798 ) {
25716404 2799 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2800 s++;
2801 continue;
2802 }
79072805
LW
2803 dorange = TRUE;
2804 s++;
01ec43d0
GS
2805 }
2806 else {
2807 didrange = FALSE;
4c3a8340
TS
2808#ifdef EBCDIC
2809 literal_endpoint = 0;
e294cc5d 2810 native_range = TRUE;
4c3a8340 2811#endif
01ec43d0 2812 }
79072805 2813 }
02aa26ce
NT
2814
2815 /* if we get here, we're not doing a transliteration */
2816
0f5d15d6
IZ
2817 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2818 except for the last char, which will be done separately. */
3280af22 2819 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2820 if (s[2] == '#') {
e994fd66 2821 while (s+1 < send && *s != ')')
db42d148 2822 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2823 }
2824 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2825 || (s[2] == '?' && s[3] == '{'))
155aba94 2826 {
cc6b7395 2827 I32 count = 1;
0f5d15d6 2828 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2829 char c;
2830
d9f97599
GS
2831 while (count && (c = *regparse)) {
2832 if (c == '\\' && regparse[1])
2833 regparse++;
4e553d73 2834 else if (c == '{')
cc6b7395 2835 count++;
4e553d73 2836 else if (c == '}')
cc6b7395 2837 count--;
d9f97599 2838 regparse++;
cc6b7395 2839 }
e994fd66 2840 if (*regparse != ')')
5bdf89e7 2841 regparse--; /* Leave one char for continuation. */
0f5d15d6 2842 while (s < regparse)
db42d148 2843 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2844 }
748a9306 2845 }
02aa26ce
NT
2846
2847 /* likewise skip #-initiated comments in //x patterns */
3280af22 2848 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2849 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2850 while (s+1 < send && *s != '\n')
db42d148 2851 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2852 }
02aa26ce 2853
5d1d4326 2854 /* check for embedded arrays
da6eedaa 2855 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2856 */
1749ea0d
TS
2857 else if (*s == '@' && s[1]) {
2858 if (isALNUM_lazy_if(s+1,UTF))
2859 break;
2860 if (strchr(":'{$", s[1]))
2861 break;
2862 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2863 break; /* in regexp, neither @+ nor @- are interpolated */
2864 }
02aa26ce
NT
2865
2866 /* check for embedded scalars. only stop if we're sure it's a
2867 variable.
2868 */
79072805 2869 else if (*s == '$') {
3280af22 2870 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2871 break;
77772344 2872 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2873 if (s[1] == '\\') {
2874 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2875 "Possible unintended interpolation of $\\ in regex");
77772344 2876 }
79072805 2877 break; /* in regexp, $ might be tail anchor */
77772344 2878 }
79072805 2879 }
02aa26ce 2880
2b9d42f0
NIS
2881 /* End of else if chain - OP_TRANS rejoin rest */
2882
02aa26ce 2883 /* backslashes */
79072805 2884 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2885 char* e; /* Can be used for ending '}', etc. */
2886
79072805 2887 s++;
02aa26ce 2888
7d0fc23c
KW
2889 /* warn on \1 - \9 in substitution replacements, but note that \11
2890 * is an octal; and \19 is \1 followed by '9' */
3280af22 2891 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2892 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2893 {
a2a5de95 2894 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2895 *--s = '$';
2896 break;
2897 }
02aa26ce
NT
2898
2899 /* string-change backslash escapes */
3280af22 2900 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2901 --s;
2902 break;
2903 }
ff3f963a
KW
2904 /* In a pattern, process \N, but skip any other backslash escapes.
2905 * This is because we don't want to translate an escape sequence
2906 * into a meta symbol and have the regex compiler use the meta
2907 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2908 * in spite of this, we do have to process \N here while the proper
2909 * charnames handler is in scope. See bugs #56444 and #62056.
2910 * There is a complication because \N in a pattern may also stand
2911 * for 'match a non-nl', and not mean a charname, in which case its
2912 * processing should be deferred to the regex compiler. To be a
2913 * charname it must be followed immediately by a '{', and not look
2914 * like \N followed by a curly quantifier, i.e., not something like
2915 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2916 * quantifier */
2917 else if (PL_lex_inpat
2918 && (*s != 'N'
2919 || s[1] != '{'
2920 || regcurly(s + 1)))
2921 {
cc74c5bd
TS
2922 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2923 goto default_action;
2924 }
02aa26ce 2925
79072805 2926 switch (*s) {
02aa26ce
NT
2927
2928 /* quoted - in transliterations */
79072805 2929 case '-':
3280af22 2930 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2931 *d++ = *s++;
2932 continue;
2933 }
2934 /* FALL THROUGH */
2935 default:
11b8faa4 2936 {
a2a5de95
NC
2937 if ((isALPHA(*s) || isDIGIT(*s)))
2938 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2939 "Unrecognized escape \\%c passed through",
2940 *s);
11b8faa4 2941 /* default action is to copy the quoted character */
f9a63242 2942 goto default_action;
11b8faa4 2943 }
02aa26ce 2944
632403cc 2945 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2946 case '0': case '1': case '2': case '3':
2947 case '4': case '5': case '6': case '7':
ba210ebe 2948 {
53305cf1
NC
2949 I32 flags = 0;
2950 STRLEN len = 3;
77a135fe 2951 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2952 s += len;
2953 }
012bcf8d 2954 goto NUM_ESCAPE_INSERT;
02aa26ce 2955
f0a2b745
KW
2956 /* eg. \o{24} indicates the octal constant \024 */
2957 case 'o':
2958 {
2959 STRLEN len;
454155d9 2960 const char* error;
f0a2b745 2961
454155d9 2962 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2963 s += len;
454155d9 2964 if (! valid) {
f0a2b745
KW
2965 yyerror(error);
2966 continue;
2967 }
2968 goto NUM_ESCAPE_INSERT;
2969 }
2970
77a135fe 2971 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2972 case 'x':
a0ed51b3
LW
2973 ++s;
2974 if (*s == '{') {
9d4ba2ae 2975 char* const e = strchr(s, '}');
a4c04bdc
NC
2976 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2977 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2978 STRLEN len;
355860ce 2979
53305cf1 2980 ++s;
adaeee49 2981 if (!e) {
a0ed51b3 2982 yyerror("Missing right brace on \\x{}");
355860ce 2983 continue;
ba210ebe 2984 }
53305cf1 2985 len = e - s;
77a135fe 2986 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2987 s = e + 1;
a0ed51b3
LW
2988 }
2989 else {
ba210ebe 2990 {
53305cf1 2991 STRLEN len = 2;
a4c04bdc 2992 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2993 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2994 s += len;
2995 }
012bcf8d
GS
2996 }
2997
2998 NUM_ESCAPE_INSERT:
ff3f963a
KW
2999 /* Insert oct or hex escaped character. There will always be
3000 * enough room in sv since such escapes will be longer than any
3001 * UTF-8 sequence they can end up as, except if they force us
3002 * to recode the rest of the string into utf8 */
ba7cea30 3003
77a135fe 3004 /* Here uv is the ordinal of the next character being added in
ff3f963a 3005 * unicode (converted from native). */
77a135fe 3006 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3007 if (!has_utf8 && uv > 255) {
77a135fe
KW
3008 /* Might need to recode whatever we have accumulated so
3009 * far if it contains any chars variant in utf8 or
3010 * utf-ebcdic. */
3011
3012 SvCUR_set(sv, d - SvPVX_const(sv));
3013 SvPOK_on(sv);
3014 *d = '\0';
77a135fe 3015 /* See Note on sizing above. */
7bf79863
KW
3016 sv_utf8_upgrade_flags_grow(sv,
3017 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3018 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3019 d = SvPVX(sv) + SvCUR(sv);
3020 has_utf8 = TRUE;
012bcf8d
GS
3021 }
3022
77a135fe
KW
3023 if (has_utf8) {
3024 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3025 if (PL_lex_inwhat == OP_TRANS &&
3026 PL_sublex_info.sub_op) {
3027 PL_sublex_info.sub_op->op_private |=
3028 (PL_lex_repl ? OPpTRANS_FROM_UTF
3029 : OPpTRANS_TO_UTF);
f9a63242 3030 }
e294cc5d
JH
3031#ifdef EBCDIC
3032 if (uv > 255 && !dorange)
3033 native_range = FALSE;
3034#endif
012bcf8d 3035 }
a0ed51b3 3036 else {
012bcf8d 3037 *d++ = (char)uv;
a0ed51b3 3038 }
012bcf8d
GS
3039 }
3040 else {
c4d5f83a 3041 *d++ = (char) uv;
a0ed51b3 3042 }
79072805 3043 continue;
02aa26ce 3044
4a2d328f 3045 case 'N':
ff3f963a
KW
3046 /* In a non-pattern \N must be a named character, like \N{LATIN
3047 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3048 * mean to match a non-newline. For non-patterns, named
3049 * characters are converted to their string equivalents. In
3050 * patterns, named characters are not converted to their
3051 * ultimate forms for the same reasons that other escapes
3052 * aren't. Instead, they are converted to the \N{U+...} form
3053 * to get the value from the charnames that is in effect right
3054 * now, while preserving the fact that it was a named character
3055 * so that the regex compiler knows this */
3056
3057 /* This section of code doesn't generally use the
3058 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3059 * a close examination of this macro and determined it is a
3060 * no-op except on utfebcdic variant characters. Every
3061 * character generated by this that would normally need to be
3062 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3063 * needed, and would complicate use of copy(). XXX There are
3064 * other parts of this file where the macro is used
3065 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3066
3067 /* The structure of this section of code (besides checking for
3068 * errors and upgrading to utf8) is:
3069 * Further disambiguate between the two meanings of \N, and if
3070 * not a charname, go process it elsewhere
0a96133f
KW
3071 * If of form \N{U+...}, pass it through if a pattern;
3072 * otherwise convert to utf8
3073 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3074 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3075
3076 /* Here, s points to the 'N'; the test below is guaranteed to
3077 * succeed if we are being called on a pattern as we already
3078 * know from a test above that the next character is a '{'.
3079 * On a non-pattern \N must mean 'named sequence, which
3080 * requires braces */
3081 s++;
3082 if (*s != '{') {
3083 yyerror("Missing braces on \\N{}");
3084 continue;
3085 }
3086 s++;
3087
0a96133f 3088 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3089 if (! (e = strchr(s, '}'))) {
3090 if (! PL_lex_inpat) {
5777a3f7 3091 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3092 } else {
3093 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3094 }
0a96133f 3095 continue;
ff3f963a 3096 }
cddc7ef4 3097
ff3f963a 3098 /* Here it looks like a named character */
cddc7ef4 3099
ff3f963a
KW
3100 if (PL_lex_inpat) {
3101
3102 /* XXX This block is temporary code. \N{} implies that the
3103 * pattern is to have Unicode semantics, and therefore
3104 * currently has to be encoded in utf8. By putting it in
3105 * utf8 now, we save a whole pass in the regular expression
3106 * compiler. Once that code is changed so Unicode
3107 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3108 * block should be removed. However, the code that parses
3109 * the output of this would have to be changed to not
3110 * necessarily expect utf8 */
ff3f963a 3111 if (!has_utf8) {
77a135fe 3112 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3113 SvPOK_on(sv);
e4f3eed8 3114 *d = '\0';
77a135fe 3115 /* See Note on sizing above. */
7bf79863 3116 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3117 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3118 /* 5 = '\N{' + cur char + NUL */
3119 (STRLEN)(send - s) + 5);
f08d6ad9 3120 d = SvPVX(sv) + SvCUR(sv);
89491803 3121 has_utf8 = TRUE;
ff3f963a
KW
3122 }
3123 }
423cee85 3124
ff3f963a
KW
3125 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3126 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3127 | PERL_SCAN_DISALLOW_PREFIX;
3128 STRLEN len;
3129
3130 /* For \N{U+...}, the '...' is a unicode value even on
3131 * EBCDIC machines */
3132 s += 2; /* Skip to next char after the 'U+' */
3133 len = e - s;
3134 uv = grok_hex(s, &len, &flags, NULL);
3135 if (len == 0 || len != (STRLEN)(e - s)) {
3136 yyerror("Invalid hexadecimal number in \\N{U+...}");
3137 s = e + 1;
3138 continue;
3139 }
3140
3141 if (PL_lex_inpat) {
3142
e2a7e165
KW
3143 /* On non-EBCDIC platforms, pass through to the regex
3144 * compiler unchanged. The reason we evaluated the
3145 * number above is to make sure there wasn't a syntax
3146 * error. But on EBCDIC we convert to native so
3147 * downstream code can continue to assume it's native
3148 */
ff3f963a 3149 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3150#ifdef EBCDIC
3151 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3152 and the \0 */
3153 "\\N{U+%X}",
3154 (unsigned int) UNI_TO_NATIVE(uv));
3155#else
ff3f963a
KW
3156 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3157 d += e - s + 1;
e2a7e165 3158#endif
ff3f963a
KW
3159 }
3160 else { /* Not a pattern: convert the hex to string */
3161
3162 /* If destination is not in utf8, unconditionally
3163 * recode it to be so. This is because \N{} implies
3164 * Unicode semantics, and scalars have to be in utf8
3165 * to guarantee those semantics */
3166 if (! has_utf8) {
3167 SvCUR_set(sv, d - SvPVX_const(sv));
3168 SvPOK_on(sv);
3169 *d = '\0';
3170 /* See Note on sizing above. */
3171 sv_utf8_upgrade_flags_grow(
3172 sv,
3173 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3174 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3175 d = SvPVX(sv) + SvCUR(sv);
3176 has_utf8 = TRUE;
3177 }
3178
3179 /* Add the string to the output */
3180 if (UNI_IS_INVARIANT(uv)) {
3181 *d++ = (char) uv;
3182 }
3183 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3184 }
3185 }
3186 else { /* Here is \N{NAME} but not \N{U+...}. */
3187
3188 SV *res; /* result from charnames */
3189 const char *str; /* the string in 'res' */
3190 STRLEN len; /* its length */
3191
3192 /* Get the value for NAME */
3193 res = newSVpvn(s, e - s);
3194 res = new_constant( NULL, 0, "charnames",
3195 /* includes all of: \N{...} */
3196 res, NULL, s - 3, e - s + 4 );
3197
3198 /* Most likely res will be in utf8 already since the
3199 * standard charnames uses pack U, but a custom translator
3200 * can leave it otherwise, so make sure. XXX This can be
3201 * revisited to not have charnames use utf8 for characters
3202 * that don't need it when regexes don't have to be in utf8
3203 * for Unicode semantics. If doing so, remember EBCDIC */
3204 sv_utf8_upgrade(res);
3205 str = SvPV_const(res, len);
3206
3207 /* Don't accept malformed input */
3208 if (! is_utf8_string((U8 *) str, len)) {
3209 yyerror("Malformed UTF-8 returned by \\N");
3210 }
3211 else if (PL_lex_inpat) {
3212
3213 if (! len) { /* The name resolved to an empty string */
3214 Copy("\\N{}", d, 4, char);
3215 d += 4;
3216 }
3217 else {
3218 /* In order to not lose information for the regex
3219 * compiler, pass the result in the specially made
3220 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3221 * the code points in hex of each character
3222 * returned by charnames */
3223
3224 const char *str_end = str + len;
3225 STRLEN char_length; /* cur char's byte length */
3226 STRLEN output_length; /* and the number of bytes
3227 after this is translated
3228 into hex digits */
3229 const STRLEN off = d - SvPVX_const(sv);
3230
3231 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3232 * max('U+', '.'); and 1 for NUL */
3233 char hex_string[2 * UTF8_MAXBYTES + 5];
3234
3235 /* Get the first character of the result. */
3236 U32 uv = utf8n_to_uvuni((U8 *) str,
3237 len,
3238 &char_length,
3239 UTF8_ALLOW_ANYUV);
3240
3241 /* The call to is_utf8_string() above hopefully
3242 * guarantees that there won't be an error. But
3243 * it's easy here to make sure. The function just
3244 * above warns and returns 0 if invalid utf8, but
3245 * it can also return 0 if the input is validly a
3246 * NUL. Disambiguate */
3247 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3248 uv = UNICODE_REPLACEMENT;
3249 }
3250
3251 /* Convert first code point to hex, including the
e2a7e165
KW
3252 * boiler plate before it. For all these, we
3253 * convert to native format so that downstream code
3254 * can continue to assume the input is native */
78c35590 3255 output_length =
3353de27 3256 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3257 "\\N{U+%X",
3258 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3259
3260 /* Make sure there is enough space to hold it */
3261 d = off + SvGROW(sv, off
3262 + output_length
3263 + (STRLEN)(send - e)
3264 + 2); /* '}' + NUL */
3265 /* And output it */
3266 Copy(hex_string, d, output_length, char);
3267 d += output_length;
3268
3269 /* For each subsequent character, append dot and
3270 * its ordinal in hex */
3271 while ((str += char_length) < str_end) {
3272 const STRLEN off = d - SvPVX_const(sv);
3273 U32 uv = utf8n_to_uvuni((U8 *) str,
3274 str_end - str,
3275 &char_length,
3276 UTF8_ALLOW_ANYUV);
3277 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3278 uv = UNICODE_REPLACEMENT;
3279 }
3280
78c35590 3281 output_length =
3353de27 3282 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3283 ".%X",
3284 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3285
3286 d = off + SvGROW(sv, off
3287 + output_length
3288 + (STRLEN)(send - e)
3289 + 2); /* '}' + NUL */
3290 Copy(hex_string, d, output_length, char);
3291 d += output_length;
3292 }
3293
3294 *d++ = '}'; /* Done. Add the trailing brace */
3295 }
3296 }
3297 else { /* Here, not in a pattern. Convert the name to a
3298 * string. */
3299
3300 /* If destination is not in utf8, unconditionally
3301 * recode it to be so. This is because \N{} implies
3302 * Unicode semantics, and scalars have to be in utf8
3303 * to guarantee those semantics */
3304 if (! has_utf8) {
3305 SvCUR_set(sv, d - SvPVX_const(sv));
3306 SvPOK_on(sv);
3307 *d = '\0';
3308 /* See Note on sizing above. */
3309 sv_utf8_upgrade_flags_grow(sv,
3310 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3311 len + (STRLEN)(send - s) + 1);
3312 d = SvPVX(sv) + SvCUR(sv);
3313 has_utf8 = TRUE;
3314 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3315
3316 /* See Note on sizing above. (NOTE: SvCUR() is not
3317 * set correctly here). */
3318 const STRLEN off = d - SvPVX_const(sv);
3319 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3320 }
3321 Copy(str, d, len, char);
3322 d += len;
423cee85 3323 }
423cee85 3324 SvREFCNT_dec(res);
cb233ae3
KW
3325
3326 /* Deprecate non-approved name syntax */
3327 if (ckWARN_d(WARN_DEPRECATED)) {
3328 bool problematic = FALSE;
3329 char* i = s;
3330
3331 /* For non-ut8 input, look to see that the first
3332 * character is an alpha, then loop through the rest
3333 * checking that each is a continuation */
3334 if (! this_utf8) {
3335 if (! isALPHAU(*i)) problematic = TRUE;
3336 else for (i = s + 1; i < e; i++) {
3337 if (isCHARNAME_CONT(*i)) continue;
3338 problematic = TRUE;
3339 break;
3340 }
3341 }
3342 else {
3343 /* Similarly for utf8. For invariants can check
3344 * directly. We accept anything above the latin1
3345 * range because it is immaterial to Perl if it is
3346 * correct or not, and is expensive to check. But
3347 * it is fairly easy in the latin1 range to convert
3348 * the variants into a single character and check
3349 * those */
3350 if (UTF8_IS_INVARIANT(*i)) {
3351 if (! isALPHAU(*i)) problematic = TRUE;
3352 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3353 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3354 *(i+1)))))
3355 {
3356 problematic = TRUE;
3357 }
3358 }
3359 if (! problematic) for (i = s + UTF8SKIP(s);
3360 i < e;
3361 i+= UTF8SKIP(i))
3362 {
3363 if (UTF8_IS_INVARIANT(*i)) {
3364 if (isCHARNAME_CONT(*i)) continue;
3365 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3366 continue;
3367 } else if (isCHARNAME_CONT(
3368 UNI_TO_NATIVE(
81c14aa2 3369 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3370 {
3371 continue;
3372 }
3373 problematic = TRUE;
3374 break;
3375 }
3376 }
3377 if (problematic) {
6e1bad6c
KW
3378 /* The e-i passed to the final %.*s makes sure that
3379 * should the trailing NUL be missing that this
3380 * print won't run off the end of the string */
cb233ae3 3381 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3382 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3383 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3384 }
3385 }
3386 } /* End \N{NAME} */
ff3f963a
KW
3387#ifdef EBCDIC
3388 if (!dorange)
3389 native_range = FALSE; /* \N{} is defined to be Unicode */
3390#endif
3391 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3392 continue;
3393
02aa26ce 3394 /* \c is a control character */
79072805
LW
3395 case 'c':
3396 s++;
961ce445 3397 if (s < send) {
17a3df4c 3398 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3399 }
961ce445
RGS
3400 else {
3401 yyerror("Missing control char name in \\c");
3402 }
79072805 3403 continue;
02aa26ce
NT
3404
3405 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3406 case 'b':
db42d148 3407 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3408 break;
3409 case 'n':
db42d148 3410 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3411 break;
3412 case 'r':
db42d148 3413 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3414 break;
3415 case 'f':
db42d148 3416 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3417 break;
3418 case 't':
db42d148 3419 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3420 break;
34a3fe2a 3421 case 'e':
db42d148 3422 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3423 break;
3424 case 'a':
db42d148 3425 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3426 break;
02aa26ce
NT
3427 } /* end switch */
3428
79072805
LW
3429 s++;
3430 continue;
02aa26ce 3431 } /* end if (backslash) */
4c3a8340
TS
3432#ifdef EBCDIC
3433 else
3434 literal_endpoint++;
3435#endif
02aa26ce 3436
f9a63242 3437 default_action:
77a135fe
KW
3438 /* If we started with encoded form, or already know we want it,
3439 then encode the next character */
3440 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3441 STRLEN len = 1;
77a135fe
KW
3442
3443
3444 /* One might think that it is wasted effort in the case of the
3445 * source being utf8 (this_utf8 == TRUE) to take the next character
3446 * in the source, convert it to an unsigned value, and then convert
3447 * it back again. But the source has not been validated here. The
3448 * routine that does the conversion checks for errors like
3449 * malformed utf8 */
3450
5f66b61c
AL
3451 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3452 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3453 if (!has_utf8) {
3454 SvCUR_set(sv, d - SvPVX_const(sv));
3455 SvPOK_on(sv);
3456 *d = '\0';
77a135fe 3457 /* See Note on sizing above. */
7bf79863
KW
3458 sv_utf8_upgrade_flags_grow(sv,
3459 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3460 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3461 d = SvPVX(sv) + SvCUR(sv);
3462 has_utf8 = TRUE;
3463 } else if (need > len) {
3464 /* encoded value larger than old, may need extra space (NOTE:
3465 * SvCUR() is not set correctly here). See Note on sizing
3466 * above. */
9d4ba2ae 3467 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3468 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3469 }
77a135fe
KW
3470 s += len;
3471
5f66b61c 3472 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3473#ifdef EBCDIC
3474 if (uv > 255 && !dorange)
3475 native_range = FALSE;
3476#endif
2b9d42f0
NIS
3477 }
3478 else {
3479 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3480 }
02aa26ce
NT
3481 } /* while loop to process each character */
3482
3483 /* terminate the string and set up the sv */
79072805 3484 *d = '\0';
95a20fc0 3485 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3486 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3487 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3488
79072805 3489 SvPOK_on(sv);
9f4817db 3490 if (PL_encoding && !has_utf8) {
d0063567
DK
3491 sv_recode_to_utf8(sv, PL_encoding);
3492 if (SvUTF8(sv))
3493 has_utf8 = TRUE;
9f4817db 3494 }
2b9d42f0 3495 if (has_utf8) {
7e2040f0 3496 SvUTF8_on(sv);
2b9d42f0 3497 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3498 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3499 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3500 }
3501 }
79072805 3502
02aa26ce 3503 /* shrink the sv if we allocated more than we used */
79072805 3504 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3505 SvPV_shrink_to_cur(sv);
79072805 3506 }
02aa26ce 3507
6154021b 3508 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3509 if (s > PL_bufptr) {
eb0d8d16
NC
3510 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3511 const char *const key = PL_lex_inpat ? "qr" : "q";
3512 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3513 const char *type;
3514 STRLEN typelen;
3515
3516 if (PL_lex_inwhat == OP_TRANS) {
3517 type = "tr";
3518 typelen = 2;
3519 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3520 type = "s";
3521 typelen = 1;
3522 } else {
3523 type = "qq";
3524 typelen = 2;
3525 }
3526
3527 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3528 type, typelen);
3529 }
6154021b 3530 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3531 } else
8990e307 3532 SvREFCNT_dec(sv);
79072805
LW
3533 return s;
3534}
3535
ffb4593c
NT
3536/* S_intuit_more
3537 * Returns TRUE if there's more to the expression (e.g., a subscript),
3538 * FALSE otherwise.
ffb4593c
NT
3539 *
3540 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3541 *
3542 * ->[ and ->{ return TRUE
3543 * { and [ outside a pattern are always subscripts, so return TRUE
3544 * if we're outside a pattern and it's not { or [, then return FALSE
3545 * if we're in a pattern and the first char is a {
3546 * {4,5} (any digits around the comma) returns FALSE
3547 * if we're in a pattern and the first char is a [
3548 * [] returns FALSE
3549 * [SOMETHING] has a funky algorithm to decide whether it's a
3550 * character class or not. It has to deal with things like
3551 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3552 * anything else returns TRUE
3553 */
3554
9cbb5ea2
GS
3555/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3556
76e3520e 3557STATIC int
cea2e8a9 3558S_intuit_more(pTHX_ register char *s)
79072805 3559{
97aff369 3560 dVAR;
7918f24d
NC
3561
3562 PERL_ARGS_ASSERT_INTUIT_MORE;
3563
3280af22 3564 if (PL_lex_brackets)
79072805
LW
3565 return TRUE;
3566 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3567 return TRUE;
3568 if (*s != '{' && *s != '[')
3569 return FALSE;
3280af22 3570 if (!PL_lex_inpat)
79072805
LW
3571 return TRUE;
3572
3573 /* In a pattern, so maybe we have {n,m}. */
3574 if (*s == '{') {
b3155d95 3575 if (regcurly(s)) {
79072805 3576 return FALSE;
b3155d95 3577 }
79072805 3578 return TRUE;
79072805
LW
3579 }
3580
3581 /* On the other hand, maybe we have a character class */
3582
3583 s++;
3584 if (*s == ']' || *s == '^')
3585 return FALSE;
3586 else {
ffb4593c 3587 /* this is terrifying, and it works */
79072805
LW
3588 int weight = 2; /* let's weigh the evidence */
3589 char seen[256];
f27ffc4a 3590 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3591 const char * const send = strchr(s,']');
3280af22 3592 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3593
3594 if (!send) /* has to be an expression */
3595 return TRUE;
3596
3597 Zero(seen,256,char);
3598 if (*s == '$')
3599 weight -= 3;
3600 else if (isDIGIT(*s)) {
3601 if (s[1] != ']') {
3602 if (isDIGIT(s[1]) && s[2] == ']')
3603 weight -= 10;
3604 }
3605 else
3606 weight -= 100;
3607 }
3608 for (; s < send; s++) {
3609 last_un_char = un_char;
3610 un_char = (unsigned char)*s;
3611 switch (*s) {
3612 case '@':
3613 case '&':
3614 case '$':
3615 weight -= seen[un_char] * 10;
7e2040f0 3616 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3617 int len;
8903cb82 3618 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3619 len = (int)strlen(tmpbuf);
3620 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3621 weight -= 100;
3622 else
3623 weight -= 10;
3624 }
3625 else if (*s == '$' && s[1] &&
93a17b20
LW
3626 strchr("[#!%*<>()-=",s[1])) {
3627 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3628 weight -= 10;
3629 else
3630 weight -= 1;
3631 }
3632 break;
3633 case '\\':
3634 un_char = 254;
3635 if (s[1]) {
93a17b20 3636 if (strchr("wds]",s[1]))
79072805 3637 weight += 100;
10edeb5d 3638 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3639 weight += 1;
93a17b20 3640 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3641 weight += 40;
3642 else if (isDIGIT(s[1])) {
3643 weight += 40;
3644 while (s[1] && isDIGIT(s[1]))
3645 s++;
3646 }
3647 }
3648 else
3649 weight += 100;
3650 break;
3651 case '-':
3652 if (s[1] == '\\')
3653 weight += 50;
93a17b20 3654 if (strchr("aA01! ",last_un_char))
79072805 3655 weight += 30;
93a17b20 3656 if (strchr("zZ79~",s[1]))
79072805 3657 weight += 30;
f27ffc4a
GS
3658 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3659 weight -= 5; /* cope with negative subscript */
79072805
LW
3660 break;
3661 default:
3792a11b
NC
3662 if (!isALNUM(last_un_char)
3663 && !(last_un_char == '$' || last_un_char == '@'
3664 || last_un_char == '&')
3665 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3666 char *d = tmpbuf;
3667 while (isALPHA(*s))
3668 *d++ = *s++;
3669 *d = '\0';
5458a98a 3670 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3671 weight -= 150;
3672 }
3673 if (un_char == last_un_char + 1)
3674 weight += 5;
3675 weight -= seen[un_char];
3676 break;
3677 }
3678 seen[un_char]++;
3679 }
3680 if (weight >= 0) /* probably a character class */
3681 return FALSE;
3682 }
3683
3684 return TRUE;
3685}
ffed7fef 3686
ffb4593c
NT
3687/*
3688 * S_intuit_method
3689 *
3690 * Does all the checking to disambiguate
3691 * foo bar
3692 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3693 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3694 *
3695 * First argument is the stuff after the first token, e.g. "bar".
3696 *
3697 * Not a method if bar is a filehandle.
3698 * Not a method if foo is a subroutine prototyped to take a filehandle.
3699 * Not a method if it's really "Foo $bar"
3700 * Method if it's "foo $bar"
3701 * Not a method if it's really "print foo $bar"
3702 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3703 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3704 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3705 * =>
3706 */
3707
76e3520e 3708STATIC int
62d55b22 3709S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3710{
97aff369 3711 dVAR;
a0d0e21e 3712 char *s = start + (*start == '$');
3280af22 3713 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3714 STRLEN len;
3715 GV* indirgv;
5db06880
NC
3716#ifdef PERL_MAD
3717 int soff;
3718#endif
a0d0e21e 3719
7918f24d
NC
3720 PERL_ARGS_ASSERT_INTUIT_METHOD;
3721
a0d0e21e 3722 if (gv) {
62d55b22 3723 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3724 return 0;
62d55b22
NC
3725 if (cv) {
3726 if (SvPOK(cv)) {
3727 const char *proto = SvPVX_const(cv);
3728 if (proto) {
3729 if (*proto == ';')
3730 proto++;
3731 if (*proto == '*')
3732 return 0;
3733 }
b6c543e3
IZ
3734 }
3735 } else
c35e046a 3736 gv = NULL;
a0d0e21e 3737 }
8903cb82 3738 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3739 /* start is the beginning of the possible filehandle/object,
3740 * and s is the end of it
3741 * tmpbuf is a copy of it
3742 */
3743
a0d0e21e 3744 if (*start == '$') {
3ef1310e
RGS
3745 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3746 isUPPER(*PL_tokenbuf))
a0d0e21e 3747 return 0;
5db06880
NC
3748#ifdef PERL_MAD
3749 len = start - SvPVX(PL_linestr);
3750#endif
29595ff2 3751 s = PEEKSPACE(s);
f0092767 3752#ifdef PERL_MAD
5db06880
NC
3753 start = SvPVX(PL_linestr) + len;
3754#endif
3280af22
NIS
3755 PL_bufptr = start;
3756 PL_expect = XREF;
a0d0e21e
LW
3757 return *s == '(' ? FUNCMETH : METHOD;
3758 }
5458a98a 3759 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3760 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3761 len -= 2;
3762 tmpbuf[len] = '\0';
5db06880
NC
3763#ifdef PERL_MAD
3764 soff = s - SvPVX(PL_linestr);
3765#endif
c3e0f903
GS
3766 goto bare_package;
3767 }
90e5519e 3768 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3769 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3770 return 0;
3771 /* filehandle or package name makes it a method */
da51bb9b 3772 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3773#ifdef PERL_MAD
3774 soff = s - SvPVX(PL_linestr);
3775#endif
29595ff2 3776 s = PEEKSPACE(s);
3280af22 3777 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3778 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3779 bare_package:
cd81e915 3780 start_force(PL_curforce);
9ded7720 3781 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3782 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3783 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3784 if (PL_madskills)
3785 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3786 PL_expect = XTERM;
a0d0e21e 3787 force_next(WORD);
3280af22 3788 PL_bufptr = s;
5db06880
NC
3789#ifdef PERL_MAD
3790 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3791#endif
a0d0e21e
LW
3792 return *s == '(' ? FUNCMETH : METHOD;
3793 }
3794 }
3795 return 0;
3796}
3797
16d20bd9 3798/* Encoded script support. filter_add() effectively inserts a
4e553d73 3799 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3800 * Note that the filter function only applies to the current source file
3801 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3802 *
3803 * The datasv parameter (which may be NULL) can be used to pass
3804 * private data to this instance of the filter. The filter function
3805 * can recover the SV using the FILTER_DATA macro and use it to
3806 * store private buffers and state information.
3807 *
3808 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3809 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3810 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3811 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3812 * private use must be set using malloc'd pointers.
3813 */
16d20bd9
AD
3814
3815SV *
864dbfa3 3816Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3817{
97aff369 3818 dVAR;
f4c556ac 3819 if (!funcp)
a0714e2c 3820 return NULL;
f4c556ac 3821
5486870f
DM
3822 if (!PL_parser)
3823 return NULL;
3824
3280af22
NIS
3825 if (!PL_rsfp_filters)
3826 PL_rsfp_filters = newAV();
16d20bd9 3827 if (!datasv)
561b68a9 3828 datasv = newSV(0);
862a34c6 3829 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3830 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3831 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3832 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3833 FPTR2DPTR(void *, IoANY(datasv)),
3834 SvPV_nolen(datasv)));
3280af22
NIS
3835 av_unshift(PL_rsfp_filters, 1);
3836 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3837 return(datasv);
3838}
4e553d73 3839
16d20bd9
AD
3840
3841/* Delete most recently added instance of this filter function. */
a0d0e21e 3842void
864dbfa3 3843Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3844{
97aff369 3845 dVAR;
e0c19803 3846 SV *datasv;
24801a4b 3847
7918f24d
NC
3848 PERL_ARGS_ASSERT_FILTER_DEL;
3849
33073adb 3850#ifdef DEBUGGING
55662e27
JH
3851 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3852 FPTR2DPTR(void*, funcp)));
33073adb 3853#endif
5486870f 3854 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3855 return;
3856 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3857 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3858 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3859 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3860
16d20bd9
AD
3861 return;
3862 }
3863 /* we need to search for the correct entry and clear it */
cea2e8a9 3864 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3865}
3866
3867
1de9afcd
RGS
3868/* Invoke the idxth filter function for the current rsfp. */
3869/* maxlen 0 = read one text line */
16d20bd9 3870I32
864dbfa3 3871Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3872{
97aff369 3873 dVAR;
16d20bd9
AD
3874 filter_t funcp;
3875 SV *datasv = NULL;
f482118e
NC
3876 /* This API is bad. It should have been using unsigned int for maxlen.
3877 Not sure if we want to change the API, but if not we should sanity
3878 check the value here. */
39cd7a59
NC
3879 const unsigned int correct_length
3880 = maxlen < 0 ?
3881#ifdef PERL_MICRO
3882 0x7FFFFFFF
3883#else
3884 INT_MAX
3885#endif
3886 : maxlen;
e50aee73 3887
7918f24d
NC
3888 PERL_ARGS_ASSERT_FILTER_READ;
3889
5486870f 3890 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3891 return -1;
1de9afcd 3892 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3893 /* Provide a default input filter to make life easy. */
3894 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3895 DEBUG_P(PerlIO_printf(Perl_debug_log,
3896 "filter_read %d: from rsfp\n", idx));
f482118e 3897 if (correct_length) {
16d20bd9
AD
3898 /* Want a block */
3899 int len ;
f54cb97a 3900 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3901
3902 /* ensure buf_sv is large enough */
881d8f0a 3903 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3904 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3905 correct_length)) <= 0) {
3280af22 3906 if (PerlIO_error(PL_rsfp))
37120919
AD
3907 return -1; /* error */
3908 else
3909 return 0 ; /* end of file */
3910 }
16d20bd9 3911 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3912 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3913 } else {
3914 /* Want a line */
3280af22
NIS
3915 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3916 if (PerlIO_error(PL_rsfp))
37120919
AD
3917 return -1; /* error */
3918 else
3919 return 0 ; /* end of file */
3920 }
16d20bd9
AD
3921 }
3922 return SvCUR(buf_sv);
3923 }
3924 /* Skip this filter slot if filter has been deleted */
1de9afcd 3925 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3926 DEBUG_P(PerlIO_printf(Perl_debug_log,
3927 "filter_read %d: skipped (filter deleted)\n",
3928 idx));
f482118e 3929 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3930 }
3931 /* Get function pointer hidden within datasv */
8141890a 3932 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3933 DEBUG_P(PerlIO_printf(Perl_debug_log,
3934 "filter_read %d: via function %p (%s)\n",
ca0270c4 3935 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3936 /* Call function. The function is expected to */
3937 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3938 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3939 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3940}
3941
76e3520e 3942STATIC char *
5cc814fd 3943S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3944{
97aff369 3945 dVAR;
7918f24d
NC
3946
3947 PERL_ARGS_ASSERT_FILTER_GETS;
3948
c39cd008 3949#ifdef PERL_CR_FILTER
3280af22 3950 if (!PL_rsfp_filters) {
c39cd008 3951 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3952 }
3953#endif
3280af22 3954 if (PL_rsfp_filters) {
55497cff 3955 if (!append)
3956 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3957 if (FILTER_READ(0, sv, 0) > 0)
3958 return ( SvPVX(sv) ) ;
3959 else
bd61b366 3960 return NULL ;
16d20bd9 3961 }
9d116dd7 3962 else
5cc814fd 3963 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3964}
3965
01ec43d0 3966STATIC HV *
9bde8eb0 3967S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3968{
97aff369 3969 dVAR;
def3634b
GS
3970 GV *gv;
3971
7918f24d
NC
3972 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3973
01ec43d0 3974 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3975 return PL_curstash;
3976
3977 if (len > 2 &&
3978 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3979 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3980 {
3981 return GvHV(gv); /* Foo:: */
def3634b
GS
3982 }
3983
3984 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3985 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3986 if (gv && GvCV(gv)) {
3987 SV * const sv = cv_const_sv(GvCV(gv));
3988 if (sv)
9bde8eb0 3989 pkgname = SvPV_const(sv, len);
def3634b
GS
3990 }
3991
9bde8eb0 3992 return gv_stashpvn(pkgname, len, 0);
def3634b 3993}
a0d0e21e 3994
e3f73d4e
RGS
3995/*
3996 * S_readpipe_override
486ec47a 3997 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
3998 * optree, provided sublex_start() is called afterwards.
3999 */
4000STATIC void
1d51329b 4001S_readpipe_override(pTHX)
e3f73d4e
RGS
4002{
4003 GV **gvp;
4004 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4005 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4006 if ((gv_readpipe
4007 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4008 ||
4009 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4010 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4011 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4012 {
4013 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4014 op_append_elem(OP_LIST,
e3f73d4e
RGS
4015 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4016 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4017 }
e3f73d4e
RGS
4018}
4019
5db06880
NC
4020#ifdef PERL_MAD
4021 /*
4022 * Perl_madlex
4023 * The intent of this yylex wrapper is to minimize the changes to the
4024 * tokener when we aren't interested in collecting madprops. It remains
4025 * to be seen how successful this strategy will be...
4026 */
4027
4028int
4029Perl_madlex(pTHX)
4030{
4031 int optype;
4032 char *s = PL_bufptr;
4033
cd81e915
NC
4034 /* make sure PL_thiswhite is initialized */
4035 PL_thiswhite = 0;
4036 PL_thismad = 0;
5db06880 4037
cd81e915 4038 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4039 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4040 return S_pending_ident(aTHX);
4041
4042 /* previous token ate up our whitespace? */
cd81e915
NC
4043 if (!PL_lasttoke && PL_nextwhite) {
4044 PL_thiswhite = PL_nextwhite;
4045 PL_nextwhite = 0;
5db06880
NC
4046 }
4047
4048 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4049 PL_realtokenstart = -1;
4050 PL_thistoken = 0;
5db06880
NC
4051 optype = yylex();
4052 s = PL_bufptr;
cd81e915 4053 assert(PL_curforce < 0);
5db06880 4054
cd81e915
NC
4055 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4056 if (!PL_thistoken) {
4057 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4058 PL_thistoken = newSVpvs("");
5db06880 4059 else {
c35e046a 4060 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4061 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4062 }
4063 }
cd81e915
NC
4064 if (PL_thismad) /* install head */
4065 CURMAD('X', PL_thistoken);
5db06880
NC
4066 }
4067
4068 /* last whitespace of a sublex? */
cd81e915
NC
4069 if (optype == ')' && PL_endwhite) {
4070 CURMAD('X', PL_endwhite);
5db06880
NC
4071 }
4072
cd81e915 4073 if (!PL_thismad) {
5db06880
NC
4074
4075 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4076 if (!PL_thiswhite && !PL_endwhite && !optype) {
4077 sv_free(PL_thistoken);
4078 PL_thistoken = 0;
5db06880
NC
4079 return 0;
4080 }
4081
4082 /* put off final whitespace till peg */
4083 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4084 PL_nextwhite = PL_thiswhite;
4085 PL_thiswhite = 0;
5db06880 4086 }
cd81e915
NC
4087 else if (PL_thisopen) {
4088 CURMAD('q', PL_thisopen);
4089 if (PL_thistoken)
4090 sv_free(PL_thistoken);
4091 PL_thistoken = 0;
5db06880
NC
4092 }
4093 else {
4094 /* Store actual token text as madprop X */
cd81e915 4095 CURMAD('X', PL_thistoken);
5db06880
NC
4096 }
4097
cd81e915 4098 if (PL_thiswhite) {
5db06880 4099 /* add preceding whitespace as madprop _ */
cd81e915 4100 CURMAD('_', PL_thiswhite);
5db06880
NC
4101 }
4102
cd81e915 4103 if (PL_thisstuff) {
5db06880 4104 /* add quoted material as madprop = */
cd81e915 4105 CURMAD('=', PL_thisstuff);
5db06880
NC
4106 }
4107
cd81e915 4108 if (PL_thisclose) {
5db06880 4109 /* add terminating quote as madprop Q */
cd81e915 4110 CURMAD('Q', PL_thisclose);
5db06880
NC
4111 }
4112 }
4113
4114 /* special processing based on optype */
4115
4116 switch (optype) {
4117
4118 /* opval doesn't need a TOKEN since it can already store mp */
4119 case WORD:
4120 case METHOD:
4121 case FUNCMETH:
4122 case THING:
4123 case PMFUNC:
4124 case PRIVATEREF:
4125 case FUNC0SUB:
4126 case UNIOPSUB:
4127 case LSTOPSUB:
6154021b
RGS
4128 if (pl_yylval.opval)
4129 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4130 PL_thismad = 0;
5db06880
NC
4131 return optype;
4132
4133 /* fake EOF */
4134 case 0:
4135 optype = PEG;
cd81e915
NC
4136 if (PL_endwhite) {
4137 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4138 PL_endwhite = 0;
5db06880
NC
4139 }
4140 break;
4141
4142 case ']':
4143 case '}':
cd81e915 4144 if (PL_faketokens)
5db06880
NC
4145 break;
4146 /* remember any fake bracket that lexer is about to discard */
4147 if (PL_lex_brackets == 1 &&
4148 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4149 {
4150 s = PL_bufptr;
4151 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4152 s++;
4153 if (*s == '}') {
cd81e915
NC
4154 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4155 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4156 PL_thiswhite = 0;
5db06880
NC
4157 PL_bufptr = s - 1;
4158 break; /* don't bother looking for trailing comment */
4159 }
4160 else
4161 s = PL_bufptr;
4162 }
4163 if (optype == ']')
4164 break;
4165 /* FALLTHROUGH */
4166
4167 /* attach a trailing comment to its statement instead of next token */
4168 case ';':
cd81e915 4169 if (PL_faketokens)
5db06880
NC
4170 break;
4171 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4172 s = PL_bufptr;
4173 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4174 s++;
4175 if (*s == '\n' || *s == '#') {
4176 while (s < PL_bufend && *s != '\n')
4177 s++;
4178 if (s < PL_bufend)
4179 s++;
cd81e915
NC
4180 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4181 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4182 PL_thiswhite = 0;
5db06880
NC
4183 PL_bufptr = s;
4184 }
4185 }
4186 break;
4187
4188 /* pval */
4189 case LABEL:
4190 break;
4191
4192 /* ival */
4193 default:
4194 break;
4195
4196 }
4197
4198 /* Create new token struct. Note: opvals return early above. */
6154021b 4199 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4200 PL_thismad = 0;
5db06880
NC
4201 return optype;
4202}
4203#endif
4204
468aa647 4205STATIC char *
cc6ed77d 4206S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4207 dVAR;
7918f24d
NC
4208
4209 PERL_ARGS_ASSERT_TOKENIZE_USE;
4210
468aa647
RGS
4211 if (PL_expect != XSTATE)
4212 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4213 is_use ? "use" : "no"));
29595ff2 4214 s = SKIPSPACE1(s);
468aa647
RGS
4215 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4216 s = force_version(s, TRUE);
17c59fdf
VP
4217 if (*s == ';' || *s == '}'
4218 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4219 start_force(PL_curforce);
9ded7720 4220 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4221 force_next(WORD);
4222 }
4223 else if (*s == 'v') {
4224 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4225 s = force_version(s, FALSE);
4226 }
4227 }
4228 else {
4229 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4230 s = force_version(s, FALSE);
4231 }
6154021b 4232 pl_yylval.ival = is_use;
468aa647
RGS
4233 return s;
4234}
748a9306 4235#ifdef DEBUGGING
27da23d5 4236 static const char* const exp_name[] =
09bef843 4237 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4238 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4239 };
748a9306 4240#endif
463ee0b2 4241
361d9b55
Z
4242#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4243STATIC bool
4244S_word_takes_any_delimeter(char *p, STRLEN len)
4245{
4246 return (len == 1 && strchr("msyq", p[0])) ||
4247 (len == 2 && (
4248 (p[0] == 't' && p[1] == 'r') ||
4249 (p[0] == 'q' && strchr("qwxr", p[1]))));
4250}
4251
02aa26ce
NT
4252/*
4253 yylex
4254
4255 Works out what to call the token just pulled out of the input
4256 stream. The yacc parser takes care of taking the ops we return and
4257 stitching them into a tree.
4258
4259 Returns:
4260 PRIVATEREF
4261
4262 Structure:
4263 if read an identifier
4264 if we're in a my declaration
4265 croak if they tried to say my($foo::bar)
4266 build the ops for a my() declaration
4267 if it's an access to a my() variable
4268 are we in a sort block?
4269 croak if my($a); $a <=> $b
4270 build ops for access to a my() variable
4271 if in a dq string, and they've said @foo and we can't find @foo
4272 croak
4273 build ops for a bareword
4274 if we already built the token before, use it.
4275*/
4276
20141f0e 4277
dba4d153
JH
4278#ifdef __SC__
4279#pragma segment Perl_yylex
4280#endif
dba4d153 4281int
dba4d153 4282Perl_yylex(pTHX)
20141f0e 4283{
97aff369 4284 dVAR;
3afc138a 4285 register char *s = PL_bufptr;
378cc40b 4286 register char *d;
463ee0b2 4287 STRLEN len;
aa7440fb 4288 bool bof = FALSE;
580561a3 4289 U32 fake_eof = 0;
a687059c 4290
10edeb5d
JH
4291 /* orig_keyword, gvp, and gv are initialized here because
4292 * jump to the label just_a_word_zero can bypass their
4293 * initialization later. */
4294 I32 orig_keyword = 0;
4295 GV *gv = NULL;
4296 GV **gvp = NULL;
4297
bbf60fe6 4298 DEBUG_T( {
396482e1 4299 SV* tmp = newSVpvs("");
b6007c36
DM
4300 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4301 (IV)CopLINE(PL_curcop),
4302 lex_state_names[PL_lex_state],
4303 exp_name[PL_expect],
4304 pv_display(tmp, s, strlen(s), 0, 60));
4305 SvREFCNT_dec(tmp);
bbf60fe6 4306 } );
02aa26ce 4307 /* check if there's an identifier for us to look at */
28ac2b49 4308 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4309 return REPORT(S_pending_ident(aTHX));
bbce6d69 4310
02aa26ce
NT
4311 /* no identifier pending identification */
4312
3280af22 4313 switch (PL_lex_state) {
79072805
LW
4314#ifdef COMMENTARY
4315 case LEX_NORMAL: /* Some compilers will produce faster */
4316 case LEX_INTERPNORMAL: /* code if we comment these out. */
4317 break;
4318#endif
4319
09bef843 4320 /* when we've already built the next token, just pull it out of the queue */
79072805 4321 case LEX_KNOWNEXT:
5db06880
NC
4322#ifdef PERL_MAD
4323 PL_lasttoke--;
6154021b 4324 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4325 if (PL_madskills) {
cd81e915 4326 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4327 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4328 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4329 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4330 PL_thismad->mad_val = 0;
4331 mad_free(PL_thismad);
4332 PL_thismad = 0;
5db06880
NC
4333 }
4334 }
4335 if (!PL_lasttoke) {
4336 PL_lex_state = PL_lex_defer;
4337 PL_expect = PL_lex_expect;
4338 PL_lex_defer = LEX_NORMAL;
4339 if (!PL_nexttoke[PL_lasttoke].next_type)
4340 return yylex();
4341 }
4342#else
3280af22 4343 PL_nexttoke--;
6154021b 4344 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4345 if (!PL_nexttoke) {
4346 PL_lex_state = PL_lex_defer;
4347 PL_expect = PL_lex_expect;
4348 PL_lex_defer = LEX_NORMAL;
463ee0b2 4349 }
5db06880 4350#endif
a7aaec61
Z
4351 {
4352 I32 next_type;
5db06880 4353#ifdef PERL_MAD
a7aaec61 4354 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4355#else
a7aaec61 4356 next_type = PL_nexttype[PL_nexttoke];
5db06880 4357#endif
78cdf107
Z
4358 if (next_type & (7<<24)) {
4359 if (next_type & (1<<24)) {
4360 if (PL_lex_brackets > 100)
4361 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4362 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4363 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4364 }
4365 if (next_type & (2<<24))
4366 PL_lex_allbrackets++;
4367 if (next_type & (4<<24))
4368 PL_lex_allbrackets--;
a7aaec61
Z
4369 next_type &= 0xffff;
4370 }
4371#ifdef PERL_MAD
4372 /* FIXME - can these be merged? */
4373 return next_type;
4374#else
4375 return REPORT(next_type);
4376#endif
4377 }
79072805 4378
02aa26ce 4379 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4380 when we get here, PL_bufptr is at the \
02aa26ce 4381 */
79072805
LW
4382 case LEX_INTERPCASEMOD:
4383#ifdef DEBUGGING
3280af22 4384 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4385 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4386#endif
02aa26ce 4387 /* handle \E or end of string */
3280af22 4388 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4389 /* if at a \E */
3280af22 4390 if (PL_lex_casemods) {
f54cb97a 4391 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4392 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4393
3792a11b
NC
4394 if (PL_bufptr != PL_bufend
4395 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4396 PL_bufptr += 2;
4397 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4398#ifdef PERL_MAD
4399 if (PL_madskills)
6b29d1f5 4400 PL_thistoken = newSVpvs("\\E");
5db06880 4401#endif
a0d0e21e 4402 }
78cdf107 4403 PL_lex_allbrackets--;
bbf60fe6 4404 return REPORT(')');
79072805 4405 }
5db06880
NC
4406#ifdef PERL_MAD
4407 while (PL_bufptr != PL_bufend &&
4408 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4409 if (!PL_thiswhite)
6b29d1f5 4410 PL_thiswhite = newSVpvs("");
cd81e915 4411 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4412 PL_bufptr += 2;
4413 }
4414#else
3280af22
NIS
4415 if (PL_bufptr != PL_bufend)
4416 PL_bufptr += 2;
5db06880 4417#endif
3280af22 4418 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4419 return yylex();
79072805
LW
4420 }
4421 else {
607df283 4422 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4423 "### Saw case modifier\n"); });
3280af22 4424 s = PL_bufptr + 1;
6e909404 4425 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4426#ifdef PERL_MAD
cd81e915 4427 if (!PL_thiswhite)
6b29d1f5 4428 PL_thiswhite = newSVpvs("");
cd81e915 4429 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4430#endif
89122651 4431 PL_bufptr = s + 3;
6e909404
JH
4432 PL_lex_state = LEX_INTERPCONCAT;
4433 return yylex();
a0d0e21e 4434 }
6e909404 4435 else {
90771dc0 4436 I32 tmp;
5db06880
NC
4437 if (!PL_madskills) /* when just compiling don't need correct */
4438 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4439 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4440 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4441 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4442 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4443 PL_lex_allbrackets--;
bbf60fe6 4444 return REPORT(')');
6e909404
JH
4445 }
4446 if (PL_lex_casemods > 10)
4447 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4448 PL_lex_casestack[PL_lex_casemods++] = *s;
4449 PL_lex_casestack[PL_lex_casemods] = '\0';
4450 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4451 start_force(PL_curforce);
9ded7720 4452 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4453 force_next((2<<24)|'(');
cd81e915 4454 start_force(PL_curforce);
6e909404 4455 if (*s == 'l')
9ded7720 4456 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4457 else if (*s == 'u')
9ded7720 4458 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4459 else if (*s == 'L')
9ded7720 4460 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4461 else if (*s == 'U')
9ded7720 4462 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4463 else if (*s == 'Q')
9ded7720 4464 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4465 else
4466 Perl_croak(aTHX_ "panic: yylex");
5db06880 4467 if (PL_madskills) {
a5849ce5
NC
4468 SV* const tmpsv = newSVpvs("\\ ");
4469 /* replace the space with the character we want to escape
4470 */
4471 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4472 curmad('_', tmpsv);
4473 }
6e909404 4474 PL_bufptr = s + 1;
a0d0e21e 4475 }
79072805 4476 force_next(FUNC);
3280af22
NIS
4477 if (PL_lex_starts) {
4478 s = PL_bufptr;
4479 PL_lex_starts = 0;
5db06880
NC
4480#ifdef PERL_MAD
4481 if (PL_madskills) {
cd81e915
NC
4482 if (PL_thistoken)
4483 sv_free(PL_thistoken);
6b29d1f5 4484 PL_thistoken = newSVpvs("");
5db06880
NC
4485 }
4486#endif
131b3ad0
DM
4487 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4488 if (PL_lex_casemods == 1 && PL_lex_inpat)
4489 OPERATOR(',');
4490 else
4491 Aop(OP_CONCAT);
79072805
LW
4492 }
4493 else
cea2e8a9 4494 return yylex();
79072805
LW
4495 }
4496
55497cff 4497 case LEX_INTERPPUSH:
bbf60fe6 4498 return REPORT(sublex_push());
55497cff 4499
79072805 4500 case LEX_INTERPSTART:
3280af22 4501 if (PL_bufptr == PL_bufend)
bbf60fe6 4502 return REPORT(sublex_done());
607df283 4503 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4504 "### Interpolated variable\n"); });
3280af22
NIS
4505 PL_expect = XTERM;
4506 PL_lex_dojoin = (*PL_bufptr == '@');
4507 PL_lex_state = LEX_INTERPNORMAL;
4508 if (PL_lex_dojoin) {
cd81e915 4509 start_force(PL_curforce);
9ded7720 4510 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4511 force_next(',');
cd81e915 4512 start_force(PL_curforce);
a0d0e21e 4513 force_ident("\"", '$');
cd81e915 4514 start_force(PL_curforce);
9ded7720 4515 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4516 force_next('$');
cd81e915 4517 start_force(PL_curforce);
9ded7720 4518 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4519 force_next((2<<24)|'(');
cd81e915 4520 start_force(PL_curforce);
9ded7720 4521 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4522 force_next(FUNC);
4523 }
3280af22
NIS
4524 if (PL_lex_starts++) {
4525 s = PL_bufptr;
5db06880
NC
4526#ifdef PERL_MAD
4527 if (PL_madskills) {
cd81e915
NC
4528 if (PL_thistoken)
4529 sv_free(PL_thistoken);
6b29d1f5 4530 PL_thistoken = newSVpvs("");
5db06880
NC
4531 }
4532#endif
131b3ad0
DM
4533 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4534 if (!PL_lex_casemods && PL_lex_inpat)
4535 OPERATOR(',');
4536 else
4537 Aop(OP_CONCAT);
79072805 4538 }
cea2e8a9 4539 return yylex();
79072805
LW
4540
4541 case LEX_INTERPENDMAYBE:
3280af22
NIS
4542 if (intuit_more(PL_bufptr)) {
4543 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4544 break;
4545 }
4546 /* FALL THROUGH */
4547
4548 case LEX_INTERPEND:
3280af22
NIS
4549 if (PL_lex_dojoin) {
4550 PL_lex_dojoin = FALSE;
4551 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4552#ifdef PERL_MAD
4553 if (PL_madskills) {
cd81e915
NC
4554 if (PL_thistoken)
4555 sv_free(PL_thistoken);
6b29d1f5 4556 PL_thistoken = newSVpvs("");
5db06880
NC
4557 }
4558#endif
78cdf107 4559 PL_lex_allbrackets--;
bbf60fe6 4560 return REPORT(')');
79072805 4561 }
43a16006 4562 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4563 && SvEVALED(PL_lex_repl))
43a16006 4564 {
e9fa98b2 4565 if (PL_bufptr != PL_bufend)
cea2e8a9 4566 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4567 PL_lex_repl = NULL;
e9fa98b2 4568 }
79072805
LW
4569 /* FALLTHROUGH */
4570 case LEX_INTERPCONCAT:
4571#ifdef DEBUGGING
3280af22 4572 if (PL_lex_brackets)
cea2e8a9 4573 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4574#endif
3280af22 4575 if (PL_bufptr == PL_bufend)
bbf60fe6 4576 return REPORT(sublex_done());
79072805 4577
3280af22
NIS
4578 if (SvIVX(PL_linestr) == '\'') {
4579 SV *sv = newSVsv(PL_linestr);
4580 if (!PL_lex_inpat)
76e3520e 4581 sv = tokeq(sv);
3280af22 4582 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4583 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4584 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4585 s = PL_bufend;
79072805
LW
4586 }
4587 else {
3280af22 4588 s = scan_const(PL_bufptr);
79072805 4589 if (*s == '\\')
3280af22 4590 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4591 else
3280af22 4592 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4593 }
4594
3280af22 4595 if (s != PL_bufptr) {
cd81e915 4596 start_force(PL_curforce);
5db06880
NC
4597 if (PL_madskills) {
4598 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4599 }
6154021b 4600 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4601 PL_expect = XTERM;
79072805 4602 force_next(THING);
131b3ad0 4603 if (PL_lex_starts++) {
5db06880
NC
4604#ifdef PERL_MAD
4605 if (PL_madskills) {
cd81e915
NC
4606 if (PL_thistoken)
4607 sv_free(PL_thistoken);
6b29d1f5 4608 PL_thistoken = newSVpvs("");
5db06880
NC
4609 }
4610#endif
131b3ad0
DM
4611 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4612 if (!PL_lex_casemods && PL_lex_inpat)
4613 OPERATOR(',');
4614 else
4615 Aop(OP_CONCAT);
4616 }
79072805 4617 else {
3280af22 4618 PL_bufptr = s;
cea2e8a9 4619 return yylex();
79072805
LW
4620 }
4621 }
4622
cea2e8a9 4623 return yylex();
a0d0e21e 4624 case LEX_FORMLINE:
3280af22
NIS
4625 PL_lex_state = LEX_NORMAL;
4626 s = scan_formline(PL_bufptr);
4627 if (!PL_lex_formbrack)
a0d0e21e
LW
4628 goto rightbracket;
4629 OPERATOR(';');
79072805
LW
4630 }
4631
3280af22
NIS
4632 s = PL_bufptr;
4633 PL_oldoldbufptr = PL_oldbufptr;
4634 PL_oldbufptr = s;
463ee0b2
LW
4635
4636 retry:
5db06880 4637#ifdef PERL_MAD
cd81e915
NC
4638 if (PL_thistoken) {
4639 sv_free(PL_thistoken);
4640 PL_thistoken = 0;
5db06880 4641 }
cd81e915 4642 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4643#endif
378cc40b
LW
4644 switch (*s) {
4645 default:
7e2040f0 4646 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4647 goto keylookup;
b1fc3636
CJ
4648 {
4649 unsigned char c = *s;
4650 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4651 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4652 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4653 } else {
4654 d = PL_linestart;
4655 }
4656 *s = '\0';
4657 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4658 }
e929a76b
LW
4659 case 4:
4660 case 26:
4661 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4662 case 0:
5db06880
NC
4663#ifdef PERL_MAD
4664 if (PL_madskills)
cd81e915 4665 PL_faketokens = 0;
5db06880 4666#endif
3280af22
NIS
4667 if (!PL_rsfp) {
4668 PL_last_uni = 0;
4669 PL_last_lop = 0;
a7aaec61
Z
4670 if (PL_lex_brackets &&
4671 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4672 yyerror((const char *)
4673 (PL_lex_formbrack
4674 ? "Format not terminated"
4675 : "Missing right curly or square bracket"));
c5ee2135 4676 }
4e553d73 4677 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4678 "### Tokener got EOF\n");
5f80b19c 4679 } );
79072805 4680 TOKEN(0);
463ee0b2 4681 }
3280af22 4682 if (s++ < PL_bufend)
a687059c 4683 goto retry; /* ignore stray nulls */
3280af22
NIS
4684 PL_last_uni = 0;
4685 PL_last_lop = 0;
4686 if (!PL_in_eval && !PL_preambled) {
4687 PL_preambled = TRUE;
5db06880
NC
4688#ifdef PERL_MAD
4689 if (PL_madskills)
cd81e915 4690 PL_faketokens = 1;
5db06880 4691#endif
5ab7ff98
NC
4692 if (PL_perldb) {
4693 /* Generate a string of Perl code to load the debugger.
4694 * If PERL5DB is set, it will return the contents of that,
4695 * otherwise a compile-time require of perl5db.pl. */
4696
4697 const char * const pdb = PerlEnv_getenv("PERL5DB");
4698
4699 if (pdb) {
4700 sv_setpv(PL_linestr, pdb);
4701 sv_catpvs(PL_linestr,";");
4702 } else {
4703 SETERRNO(0,SS_NORMAL);
4704 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4705 }
4706 } else
4707 sv_setpvs(PL_linestr,"");
c62eb204
NC
4708 if (PL_preambleav) {
4709 SV **svp = AvARRAY(PL_preambleav);
4710 SV **const end = svp + AvFILLp(PL_preambleav);
4711 while(svp <= end) {
4712 sv_catsv(PL_linestr, *svp);
4713 ++svp;
396482e1 4714 sv_catpvs(PL_linestr, ";");
91b7def8 4715 }
daba3364 4716 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4717 PL_preambleav = NULL;
91b7def8 4718 }
9f639728
FR
4719 if (PL_minus_E)
4720 sv_catpvs(PL_linestr,
4721 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4722 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4723 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4724 if (PL_minus_l)
396482e1 4725 sv_catpvs(PL_linestr,"chomp;");
3280af22 4726 if (PL_minus_a) {
3280af22 4727 if (PL_minus_F) {
3792a11b
NC
4728 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4729 || *PL_splitstr == '"')
3280af22 4730 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4731 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4732 else {
c8ef6a4b
NC
4733 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4734 bytes can be used as quoting characters. :-) */
dd374669 4735 const char *splits = PL_splitstr;
91d456ae 4736 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4737 do {
4738 /* Need to \ \s */
dd374669
AL
4739 if (*splits == '\\')
4740 sv_catpvn(PL_linestr, splits, 1);
4741 sv_catpvn(PL_linestr, splits, 1);
4742 } while (*splits++);
48c4c863
NC
4743 /* This loop will embed the trailing NUL of
4744 PL_linestr as the last thing it does before
4745 terminating. */
396482e1 4746 sv_catpvs(PL_linestr, ");");
54310121 4747 }
2304df62
AD
4748 }
4749 else
396482e1 4750 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4751 }
79072805 4752 }
396482e1 4753 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4754 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4755 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4756 PL_last_lop = PL_last_uni = NULL;
65269a95 4757 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4758 update_debugger_info(PL_linestr, NULL, 0);
79072805 4759 goto retry;
a687059c 4760 }
e929a76b 4761 do {
580561a3
Z
4762 fake_eof = 0;
4763 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4764 if (0) {
7e28d3af 4765 fake_eof:
f0e67a1d
Z
4766 fake_eof = LEX_FAKE_EOF;
4767 }
4768 PL_bufptr = PL_bufend;
17cc9359 4769 CopLINE_inc(PL_curcop);
f0e67a1d 4770 if (!lex_next_chunk(fake_eof)) {
17cc9359 4771 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4772 s = PL_bufptr;
4773 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4774 }
17cc9359 4775 CopLINE_dec(PL_curcop);
5db06880 4776#ifdef PERL_MAD
f0e67a1d 4777 if (!PL_rsfp)
cd81e915 4778 PL_realtokenstart = -1;
5db06880 4779#endif
f0e67a1d 4780 s = PL_bufptr;
7aa207d6
JH
4781 /* If it looks like the start of a BOM or raw UTF-16,
4782 * check if it in fact is. */
580561a3 4783 if (bof && PL_rsfp &&
7aa207d6
JH
4784 (*s == 0 ||
4785 *(U8*)s == 0xEF ||
4786 *(U8*)s >= 0xFE ||
4787 s[1] == 0)) {
eb160463 4788 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4789 if (bof) {
3280af22 4790 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4791 s = swallow_bom((U8*)s);
e929a76b 4792 }
378cc40b 4793 }
737c24fc 4794 if (PL_parser->in_pod) {
a0d0e21e 4795 /* Incest with pod. */
5db06880
NC
4796#ifdef PERL_MAD
4797 if (PL_madskills)
cd81e915 4798 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4799#endif
01a57ef7 4800 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4801 sv_setpvs(PL_linestr, "");
3280af22
NIS
4802 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4803 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4804 PL_last_lop = PL_last_uni = NULL;
737c24fc 4805 PL_parser->in_pod = 0;
a0d0e21e 4806 }
4e553d73 4807 }
85613cab
Z
4808 if (PL_rsfp)
4809 incline(s);
737c24fc 4810 } while (PL_parser->in_pod);
3280af22 4811 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4812 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4813 PL_last_lop = PL_last_uni = NULL;
57843af0 4814 if (CopLINE(PL_curcop) == 1) {
3280af22 4815 while (s < PL_bufend && isSPACE(*s))
79072805 4816 s++;
a0d0e21e 4817 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4818 s++;
5db06880
NC
4819#ifdef PERL_MAD
4820 if (PL_madskills)
cd81e915 4821 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4822#endif
bd61b366 4823 d = NULL;
3280af22 4824 if (!PL_in_eval) {
44a8e56a 4825 if (*s == '#' && *(s+1) == '!')
4826 d = s + 2;
4827#ifdef ALTERNATE_SHEBANG
4828 else {
bfed75c6 4829 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4830 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4831 d = s + (sizeof(as) - 1);
4832 }
4833#endif /* ALTERNATE_SHEBANG */
4834 }
4835 if (d) {
b8378b72 4836 char *ipath;
774d564b 4837 char *ipathend;
b8378b72 4838
774d564b 4839 while (isSPACE(*d))
b8378b72
CS
4840 d++;
4841 ipath = d;
774d564b 4842 while (*d && !isSPACE(*d))
4843 d++;
4844 ipathend = d;
4845
4846#ifdef ARG_ZERO_IS_SCRIPT
4847 if (ipathend > ipath) {
4848 /*
4849 * HP-UX (at least) sets argv[0] to the script name,
4850 * which makes $^X incorrect. And Digital UNIX and Linux,
4851 * at least, set argv[0] to the basename of the Perl
4852 * interpreter. So, having found "#!", we'll set it right.
4853 */
fafc274c
NC
4854 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4855 SVt_PV)); /* $^X */
774d564b 4856 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4857 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4858 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4859 SvSETMAGIC(x);
4860 }
556c1dec
JH
4861 else {
4862 STRLEN blen;
4863 STRLEN llen;
cfd0369c 4864 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4865 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4866 if (llen < blen) {
4867 bstart += blen - llen;
4868 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4869 sv_setpvn(x, ipath, ipathend - ipath);
4870 SvSETMAGIC(x);
4871 }
4872 }
4873 }
774d564b 4874 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4875 }
774d564b 4876#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4877
4878 /*
4879 * Look for options.
4880 */
748a9306 4881 d = instr(s,"perl -");
84e30d1a 4882 if (!d) {
748a9306 4883 d = instr(s,"perl");
84e30d1a
GS
4884#if defined(DOSISH)
4885 /* avoid getting into infinite loops when shebang
4886 * line contains "Perl" rather than "perl" */
4887 if (!d) {
4888 for (d = ipathend-4; d >= ipath; --d) {
4889 if ((*d == 'p' || *d == 'P')
4890 && !ibcmp(d, "perl", 4))
4891 {
4892 break;
4893 }
4894 }
4895 if (d < ipath)
bd61b366 4896 d = NULL;
84e30d1a
GS
4897 }
4898#endif
4899 }
44a8e56a 4900#ifdef ALTERNATE_SHEBANG
4901 /*
4902 * If the ALTERNATE_SHEBANG on this system starts with a
4903 * character that can be part of a Perl expression, then if
4904 * we see it but not "perl", we're probably looking at the
4905 * start of Perl code, not a request to hand off to some
4906 * other interpreter. Similarly, if "perl" is there, but
4907 * not in the first 'word' of the line, we assume the line
4908 * contains the start of the Perl program.
44a8e56a 4909 */
4910 if (d && *s != '#') {
f54cb97a 4911 const char *c = ipath;
44a8e56a 4912 while (*c && !strchr("; \t\r\n\f\v#", *c))
4913 c++;
4914 if (c < d)
bd61b366 4915 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4916 else
4917 *s = '#'; /* Don't try to parse shebang line */
4918 }
774d564b 4919#endif /* ALTERNATE_SHEBANG */
748a9306 4920 if (!d &&
44a8e56a 4921 *s == '#' &&
774d564b 4922 ipathend > ipath &&
3280af22 4923 !PL_minus_c &&
748a9306 4924 !instr(s,"indir") &&
3280af22 4925 instr(PL_origargv[0],"perl"))
748a9306 4926 {
27da23d5 4927 dVAR;
9f68db38 4928 char **newargv;
9f68db38 4929
774d564b 4930 *ipathend = '\0';
4931 s = ipathend + 1;
3280af22 4932 while (s < PL_bufend && isSPACE(*s))
9f68db38 4933 s++;
3280af22 4934 if (s < PL_bufend) {
d85f917e 4935 Newx(newargv,PL_origargc+3,char*);
9f68db38 4936 newargv[1] = s;
3280af22 4937 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4938 s++;
4939 *s = '\0';
3280af22 4940 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4941 }
4942 else
3280af22 4943 newargv = PL_origargv;
774d564b 4944 newargv[0] = ipath;
b35112e7 4945 PERL_FPU_PRE_EXEC
b4748376 4946 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4947 PERL_FPU_POST_EXEC
cea2e8a9 4948 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4949 }
748a9306 4950 if (d) {
c35e046a
AL
4951 while (*d && !isSPACE(*d))
4952 d++;
4953 while (SPACE_OR_TAB(*d))
4954 d++;
748a9306
LW
4955
4956 if (*d++ == '-') {
f54cb97a 4957 const bool switches_done = PL_doswitches;
fb993905
GA
4958 const U32 oldpdb = PL_perldb;
4959 const bool oldn = PL_minus_n;
4960 const bool oldp = PL_minus_p;
c7030b81 4961 const char *d1 = d;
fb993905 4962
8cc95fdb 4963 do {
4ba71d51
FC
4964 bool baduni = FALSE;
4965 if (*d1 == 'C') {
bd0ab00d
NC
4966 const char *d2 = d1 + 1;
4967 if (parse_unicode_opts((const char **)&d2)
4968 != PL_unicode)
4969 baduni = TRUE;
4ba71d51
FC
4970 }
4971 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4972 const char * const m = d1;
4973 while (*d1 && !isSPACE(*d1))
4974 d1++;
cea2e8a9 4975 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4976 (int)(d1 - m), m);
8cc95fdb 4977 }
c7030b81
NC
4978 d1 = moreswitches(d1);
4979 } while (d1);
f0b2cf55
YST
4980 if (PL_doswitches && !switches_done) {
4981 int argc = PL_origargc;
4982 char **argv = PL_origargv;
4983 do {
4984 argc--,argv++;
4985 } while (argc && argv[0][0] == '-' && argv[0][1]);
4986 init_argv_symbols(argc,argv);
4987 }
65269a95 4988 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4989 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4990 /* if we have already added "LINE: while (<>) {",
4991 we must not do it again */
748a9306 4992 {
76f68e9b 4993 sv_setpvs(PL_linestr, "");
3280af22
NIS
4994 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4995 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4996 PL_last_lop = PL_last_uni = NULL;
3280af22 4997 PL_preambled = FALSE;
65269a95 4998 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4999 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5000 goto retry;
5001 }
a0d0e21e 5002 }
79072805 5003 }
9f68db38 5004 }
79072805 5005 }
3280af22
NIS
5006 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5007 PL_bufptr = s;
5008 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5009 return yylex();
ae986130 5010 }
378cc40b 5011 goto retry;
4fdae800 5012 case '\r':
6a27c188 5013#ifdef PERL_STRICT_CR
cea2e8a9 5014 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5015 Perl_croak(aTHX_
cc507455 5016 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5017#endif
4fdae800 5018 case ' ': case '\t': case '\f': case 013:
5db06880 5019#ifdef PERL_MAD
cd81e915 5020 PL_realtokenstart = -1;
ac372eb8
RD
5021 if (!PL_thiswhite)
5022 PL_thiswhite = newSVpvs("");
5023 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5024#endif
ac372eb8 5025 s++;
378cc40b 5026 goto retry;
378cc40b 5027 case '#':
e929a76b 5028 case '\n':
5db06880 5029#ifdef PERL_MAD
cd81e915 5030 PL_realtokenstart = -1;
5db06880 5031 if (PL_madskills)
cd81e915 5032 PL_faketokens = 0;
5db06880 5033#endif
3280af22 5034 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
5035 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5036 /* handle eval qq[#line 1 "foo"\n ...] */
5037 CopLINE_dec(PL_curcop);
5038 incline(s);
5039 }
5db06880
NC
5040 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5041 s = SKIPSPACE0(s);
5042 if (!PL_in_eval || PL_rsfp)
5043 incline(s);
5044 }
5045 else {
5046 d = s;
5047 while (d < PL_bufend && *d != '\n')
5048 d++;
5049 if (d < PL_bufend)
5050 d++;
5051 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5052 Perl_croak(aTHX_ "panic: input overflow");
5053#ifdef PERL_MAD
5054 if (PL_madskills)
cd81e915 5055 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5056#endif
5057 s = d;
5058 incline(s);
5059 }
3280af22
NIS
5060 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5061 PL_bufptr = s;
5062 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5063 return yylex();
a687059c 5064 }
378cc40b 5065 }
a687059c 5066 else {
5db06880
NC
5067#ifdef PERL_MAD
5068 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5069 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5070 PL_faketokens = 0;
5db06880
NC
5071 s = SKIPSPACE0(s);
5072 TOKEN(PEG); /* make sure any #! line is accessible */
5073 }
5074 s = SKIPSPACE0(s);
5075 }
5076 else {
5077/* if (PL_madskills && PL_lex_formbrack) { */
5078 d = s;
5079 while (d < PL_bufend && *d != '\n')
5080 d++;
5081 if (d < PL_bufend)
5082 d++;
5083 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5084 Perl_croak(aTHX_ "panic: input overflow");
5085 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5086 if (!PL_thiswhite)
6b29d1f5 5087 PL_thiswhite = newSVpvs("");
5db06880 5088 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5089 sv_setpvs(PL_thiswhite, "");
cd81e915 5090 PL_faketokens = 0;
5db06880 5091 }
cd81e915 5092 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5093 }
5094 s = d;
5095/* }
5096 *s = '\0';
5097 PL_bufend = s; */
5098 }
5099#else
378cc40b 5100 *s = '\0';
3280af22 5101 PL_bufend = s;
5db06880 5102#endif
a687059c 5103 }
378cc40b
LW
5104 goto retry;
5105 case '-':
79072805 5106 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5107 I32 ftst = 0;
90771dc0 5108 char tmp;
e5edeb50 5109
378cc40b 5110 s++;
3280af22 5111 PL_bufptr = s;
748a9306
LW
5112 tmp = *s++;
5113
bf4acbe4 5114 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5115 s++;
5116
5117 if (strnEQ(s,"=>",2)) {
3280af22 5118 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5119 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5120 OPERATOR('-'); /* unary minus */
5121 }
3280af22 5122 PL_last_uni = PL_oldbufptr;
748a9306 5123 switch (tmp) {
e5edeb50
JH
5124 case 'r': ftst = OP_FTEREAD; break;
5125 case 'w': ftst = OP_FTEWRITE; break;
5126 case 'x': ftst = OP_FTEEXEC; break;
5127 case 'o': ftst = OP_FTEOWNED; break;
5128 case 'R': ftst = OP_FTRREAD; break;
5129 case 'W': ftst = OP_FTRWRITE; break;
5130 case 'X': ftst = OP_FTREXEC; break;
5131 case 'O': ftst = OP_FTROWNED; break;
5132 case 'e': ftst = OP_FTIS; break;
5133 case 'z': ftst = OP_FTZERO; break;
5134 case 's': ftst = OP_FTSIZE; break;
5135 case 'f': ftst = OP_FTFILE; break;
5136 case 'd': ftst = OP_FTDIR; break;
5137 case 'l': ftst = OP_FTLINK; break;
5138 case 'p': ftst = OP_FTPIPE; break;
5139 case 'S': ftst = OP_FTSOCK; break;
5140 case 'u': ftst = OP_FTSUID; break;
5141 case 'g': ftst = OP_FTSGID; break;
5142 case 'k': ftst = OP_FTSVTX; break;
5143 case 'b': ftst = OP_FTBLK; break;
5144 case 'c': ftst = OP_FTCHR; break;
5145 case 't': ftst = OP_FTTTY; break;
5146 case 'T': ftst = OP_FTTEXT; break;
5147 case 'B': ftst = OP_FTBINARY; break;
5148 case 'M': case 'A': case 'C':
fafc274c 5149 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5150 switch (tmp) {
5151 case 'M': ftst = OP_FTMTIME; break;
5152 case 'A': ftst = OP_FTATIME; break;
5153 case 'C': ftst = OP_FTCTIME; break;
5154 default: break;
5155 }
5156 break;
378cc40b 5157 default:
378cc40b
LW
5158 break;
5159 }
e5edeb50 5160 if (ftst) {
eb160463 5161 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5162 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5163 "### Saw file test %c\n", (int)tmp);
5f80b19c 5164 } );
e5edeb50
JH
5165 FTST(ftst);
5166 }
5167 else {
5168 /* Assume it was a minus followed by a one-letter named
5169 * subroutine call (or a -bareword), then. */
95c31fe3 5170 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5171 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5172 (int) tmp);
5f80b19c 5173 } );
3cf7b4c4 5174 s = --PL_bufptr;
e5edeb50 5175 }
378cc40b 5176 }
90771dc0
NC
5177 {
5178 const char tmp = *s++;
5179 if (*s == tmp) {
5180 s++;
5181 if (PL_expect == XOPERATOR)
5182 TERM(POSTDEC);
5183 else
5184 OPERATOR(PREDEC);
5185 }
5186 else if (*s == '>') {
5187 s++;
29595ff2 5188 s = SKIPSPACE1(s);
90771dc0
NC
5189 if (isIDFIRST_lazy_if(s,UTF)) {
5190 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5191 TOKEN(ARROW);
5192 }
5193 else if (*s == '$')
5194 OPERATOR(ARROW);
5195 else
5196 TERM(ARROW);
5197 }
78cdf107
Z
5198 if (PL_expect == XOPERATOR) {
5199 if (*s == '=' && !PL_lex_allbrackets &&
5200 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5201 s--;
5202 TOKEN(0);
5203 }
90771dc0 5204 Aop(OP_SUBTRACT);
78cdf107 5205 }
90771dc0
NC
5206 else {
5207 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5208 check_uni();
5209 OPERATOR('-'); /* unary minus */
79072805 5210 }
2f3197b3 5211 }
79072805 5212
378cc40b 5213 case '+':
90771dc0
NC
5214 {
5215 const char tmp = *s++;
5216 if (*s == tmp) {
5217 s++;
5218 if (PL_expect == XOPERATOR)
5219 TERM(POSTINC);
5220 else
5221 OPERATOR(PREINC);
5222 }
78cdf107
Z
5223 if (PL_expect == XOPERATOR) {
5224 if (*s == '=' && !PL_lex_allbrackets &&
5225 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5226 s--;
5227 TOKEN(0);
5228 }
90771dc0 5229 Aop(OP_ADD);
78cdf107 5230 }
90771dc0
NC
5231 else {
5232 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5233 check_uni();
5234 OPERATOR('+');
5235 }
2f3197b3 5236 }
a687059c 5237
378cc40b 5238 case '*':
3280af22
NIS
5239 if (PL_expect != XOPERATOR) {
5240 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5241 PL_expect = XOPERATOR;
5242 force_ident(PL_tokenbuf, '*');
5243 if (!*PL_tokenbuf)
a0d0e21e 5244 PREREF('*');
79072805 5245 TERM('*');
a687059c 5246 }
79072805
LW
5247 s++;
5248 if (*s == '*') {
a687059c 5249 s++;
78cdf107
Z
5250 if (*s == '=' && !PL_lex_allbrackets &&
5251 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5252 s -= 2;
5253 TOKEN(0);
5254 }
79072805 5255 PWop(OP_POW);
a687059c 5256 }
78cdf107
Z
5257 if (*s == '=' && !PL_lex_allbrackets &&
5258 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5259 s--;
5260 TOKEN(0);
5261 }
79072805
LW
5262 Mop(OP_MULTIPLY);
5263
378cc40b 5264 case '%':
3280af22 5265 if (PL_expect == XOPERATOR) {
78cdf107
Z
5266 if (s[1] == '=' && !PL_lex_allbrackets &&
5267 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5268 TOKEN(0);
bbce6d69 5269 ++s;
5270 Mop(OP_MODULO);
a687059c 5271 }
3280af22 5272 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5273 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5274 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5275 if (!PL_tokenbuf[1]) {
bbce6d69 5276 PREREF('%');
a687059c 5277 }
3280af22 5278 PL_pending_ident = '%';
bbce6d69 5279 TERM('%');
a687059c 5280
378cc40b 5281 case '^':
78cdf107
Z
5282 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5283 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5284 TOKEN(0);
79072805 5285 s++;
a0d0e21e 5286 BOop(OP_BIT_XOR);
79072805 5287 case '[':
a7aaec61
Z
5288 if (PL_lex_brackets > 100)
5289 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5290 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5291 PL_lex_allbrackets++;
df3467db
IG
5292 {
5293 const char tmp = *s++;
5294 OPERATOR(tmp);
5295 }
378cc40b 5296 case '~':
0d863452 5297 if (s[1] == '~'
3e7dd34d 5298 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5299 {
78cdf107
Z
5300 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5301 TOKEN(0);
0d863452
RH
5302 s += 2;
5303 Eop(OP_SMARTMATCH);
5304 }
78cdf107
Z
5305 s++;
5306 OPERATOR('~');
378cc40b 5307 case ',':
78cdf107
Z
5308 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5309 TOKEN(0);
5310 s++;
5311 OPERATOR(',');
a0d0e21e
LW
5312 case ':':
5313 if (s[1] == ':') {
5314 len = 0;
0bfa2a8a 5315 goto just_a_word_zero_gv;
a0d0e21e
LW
5316 }
5317 s++;
09bef843
SB
5318 switch (PL_expect) {
5319 OP *attrs;
5db06880
NC
5320#ifdef PERL_MAD
5321 I32 stuffstart;
5322#endif
09bef843
SB
5323 case XOPERATOR:
5324 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5325 break;
5326 PL_bufptr = s; /* update in case we back off */
d83f38d8 5327 if (*s == '=') {
2dc78664
NC
5328 Perl_croak(aTHX_
5329 "Use of := for an empty attribute list is not allowed");
d83f38d8 5330 }
09bef843
SB
5331 goto grabattrs;
5332 case XATTRBLOCK:
5333 PL_expect = XBLOCK;
5334 goto grabattrs;
5335 case XATTRTERM:
5336 PL_expect = XTERMBLOCK;
5337 grabattrs:
5db06880
NC
5338#ifdef PERL_MAD
5339 stuffstart = s - SvPVX(PL_linestr) - 1;
5340#endif
29595ff2 5341 s = PEEKSPACE(s);
5f66b61c 5342 attrs = NULL;
7e2040f0 5343 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5344 I32 tmp;
5cc237b8 5345 SV *sv;
09bef843 5346 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5347 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5348 if (tmp < 0) tmp = -tmp;
5349 switch (tmp) {
5350 case KEY_or:
5351 case KEY_and:
5352 case KEY_for:
11baf631 5353 case KEY_foreach:
f9829d6b
GS
5354 case KEY_unless:
5355 case KEY_if:
5356 case KEY_while:
5357 case KEY_until:
5358 goto got_attrs;
5359 default:
5360 break;
5361 }
5362 }
5cc237b8 5363 sv = newSVpvn(s, len);
09bef843
SB
5364 if (*d == '(') {
5365 d = scan_str(d,TRUE,TRUE);
5366 if (!d) {
09bef843
SB
5367 /* MUST advance bufptr here to avoid bogus
5368 "at end of line" context messages from yyerror().
5369 */
5370 PL_bufptr = s + len;
5371 yyerror("Unterminated attribute parameter in attribute list");
5372 if (attrs)
5373 op_free(attrs);
5cc237b8 5374 sv_free(sv);
bbf60fe6 5375 return REPORT(0); /* EOF indicator */
09bef843
SB
5376 }
5377 }
5378 if (PL_lex_stuff) {
09bef843 5379 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5380 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5381 newSVOP(OP_CONST, 0, sv));
5382 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5383 PL_lex_stuff = NULL;
09bef843
SB
5384 }
5385 else {
5cc237b8
BS
5386 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5387 sv_free(sv);
1108974d 5388 if (PL_in_my == KEY_our) {
df9a6019 5389 deprecate(":unique");
1108974d 5390 }
bfed75c6 5391 else
371fce9b
DM
5392 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5393 }
5394
d3cea301
SB
5395 /* NOTE: any CV attrs applied here need to be part of
5396 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5397 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5398 sv_free(sv);
78f9721b 5399 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5400 }
5401 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5402 sv_free(sv);
8e5dadda 5403 deprecate(":locked");
5cc237b8
BS
5404 }
5405 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5406 sv_free(sv);
78f9721b 5407 CvMETHOD_on(PL_compcv);
5cc237b8 5408 }
78f9721b
SM
5409 /* After we've set the flags, it could be argued that
5410 we don't need to do the attributes.pm-based setting
5411 process, and shouldn't bother appending recognized
d3cea301
SB
5412 flags. To experiment with that, uncomment the
5413 following "else". (Note that's already been
5414 uncommented. That keeps the above-applied built-in
5415 attributes from being intercepted (and possibly
5416 rejected) by a package's attribute routines, but is
5417 justified by the performance win for the common case
5418 of applying only built-in attributes.) */
0256094b 5419 else
2fcb4757 5420 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5421 newSVOP(OP_CONST, 0,
5cc237b8 5422 sv));
09bef843 5423 }
29595ff2 5424 s = PEEKSPACE(d);
0120eecf 5425 if (*s == ':' && s[1] != ':')
29595ff2 5426 s = PEEKSPACE(s+1);
0120eecf
GS
5427 else if (s == d)
5428 break; /* require real whitespace or :'s */
29595ff2 5429 /* XXX losing whitespace on sequential attributes here */
09bef843 5430 }
90771dc0
NC
5431 {
5432 const char tmp
5433 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5434 if (*s != ';' && *s != '}' && *s != tmp
5435 && (tmp != '=' || *s != ')')) {
5436 const char q = ((*s == '\'') ? '"' : '\'');
5437 /* If here for an expression, and parsed no attrs, back
5438 off. */
5439 if (tmp == '=' && !attrs) {
5440 s = PL_bufptr;
5441 break;
5442 }
5443 /* MUST advance bufptr here to avoid bogus "at end of line"
5444 context messages from yyerror().
5445 */
5446 PL_bufptr = s;
10edeb5d
JH
5447 yyerror( (const char *)
5448 (*s
5449 ? Perl_form(aTHX_ "Invalid separator character "
5450 "%c%c%c in attribute list", q, *s, q)
5451 : "Unterminated attribute list" ) );
90771dc0
NC
5452 if (attrs)
5453 op_free(attrs);
5454 OPERATOR(':');
09bef843 5455 }
09bef843 5456 }
f9829d6b 5457 got_attrs:
09bef843 5458 if (attrs) {
cd81e915 5459 start_force(PL_curforce);
9ded7720 5460 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5461 CURMAD('_', PL_nextwhite);
89122651 5462 force_next(THING);
5db06880
NC
5463 }
5464#ifdef PERL_MAD
5465 if (PL_madskills) {
cd81e915 5466 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5467 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5468 }
5db06880 5469#endif
09bef843
SB
5470 TOKEN(COLONATTR);
5471 }
78cdf107
Z
5472 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5473 s--;
5474 TOKEN(0);
5475 }
5476 PL_lex_allbrackets--;
a0d0e21e 5477 OPERATOR(':');
8990e307
LW
5478 case '(':
5479 s++;
3280af22
NIS
5480 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5481 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5482 else
3280af22 5483 PL_expect = XTERM;
29595ff2 5484 s = SKIPSPACE1(s);
78cdf107 5485 PL_lex_allbrackets++;
a0d0e21e 5486 TOKEN('(');
378cc40b 5487 case ';':
78cdf107
Z
5488 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5489 TOKEN(0);
f4dd75d9 5490 CLINE;
78cdf107
Z
5491 s++;
5492 OPERATOR(';');
378cc40b 5493 case ')':
78cdf107
Z
5494 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5495 TOKEN(0);
5496 s++;
5497 PL_lex_allbrackets--;
5498 s = SKIPSPACE1(s);
5499 if (*s == '{')
5500 PREBLOCK(')');
5501 TERM(')');
79072805 5502 case ']':
a7aaec61
Z
5503 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5504 TOKEN(0);
79072805 5505 s++;
3280af22 5506 if (PL_lex_brackets <= 0)
d98d5fff 5507 yyerror("Unmatched right square bracket");
463ee0b2 5508 else
3280af22 5509 --PL_lex_brackets;
78cdf107 5510 PL_lex_allbrackets--;
3280af22
NIS
5511 if (PL_lex_state == LEX_INTERPNORMAL) {
5512 if (PL_lex_brackets == 0) {
02255c60
FC
5513 if (*s == '-' && s[1] == '>')
5514 PL_lex_state = LEX_INTERPENDMAYBE;
5515 else if (*s != '[' && *s != '{')
3280af22 5516 PL_lex_state = LEX_INTERPEND;
79072805
LW
5517 }
5518 }
4633a7c4 5519 TERM(']');
79072805
LW
5520 case '{':
5521 leftbracket:
79072805 5522 s++;
3280af22 5523 if (PL_lex_brackets > 100) {
8edd5f42 5524 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5525 }
3280af22 5526 switch (PL_expect) {
a0d0e21e 5527 case XTERM:
3280af22 5528 if (PL_lex_formbrack) {
a0d0e21e
LW
5529 s--;
5530 PRETERMBLOCK(DO);
5531 }
3280af22
NIS
5532 if (PL_oldoldbufptr == PL_last_lop)
5533 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5534 else
3280af22 5535 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5536 PL_lex_allbrackets++;
79072805 5537 OPERATOR(HASHBRACK);
a0d0e21e 5538 case XOPERATOR:
bf4acbe4 5539 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5540 s++;
44a8e56a 5541 d = s;
3280af22
NIS
5542 PL_tokenbuf[0] = '\0';
5543 if (d < PL_bufend && *d == '-') {
5544 PL_tokenbuf[0] = '-';
44a8e56a 5545 d++;
bf4acbe4 5546 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5547 d++;
5548 }
7e2040f0 5549 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5550 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5551 FALSE, &len);
bf4acbe4 5552 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5553 d++;
5554 if (*d == '}') {
f54cb97a 5555 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5556 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5557 if (minus)
5558 force_next('-');
748a9306
LW
5559 }
5560 }
5561 /* FALL THROUGH */
09bef843 5562 case XATTRBLOCK:
748a9306 5563 case XBLOCK:
3280af22 5564 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5565 PL_lex_allbrackets++;
3280af22 5566 PL_expect = XSTATE;
a0d0e21e 5567 break;
09bef843 5568 case XATTRTERM:
a0d0e21e 5569 case XTERMBLOCK:
3280af22 5570 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5571 PL_lex_allbrackets++;
3280af22 5572 PL_expect = XSTATE;
a0d0e21e
LW
5573 break;
5574 default: {
f54cb97a 5575 const char *t;
3280af22
NIS
5576 if (PL_oldoldbufptr == PL_last_lop)
5577 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5578 else
3280af22 5579 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5580 PL_lex_allbrackets++;
29595ff2 5581 s = SKIPSPACE1(s);
8452ff4b
SB
5582 if (*s == '}') {
5583 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5584 PL_expect = XTERM;
5585 /* This hack is to get the ${} in the message. */
5586 PL_bufptr = s+1;
5587 yyerror("syntax error");
5588 break;
5589 }
a0d0e21e 5590 OPERATOR(HASHBRACK);
8452ff4b 5591 }
b8a4b1be
GS
5592 /* This hack serves to disambiguate a pair of curlies
5593 * as being a block or an anon hash. Normally, expectation
5594 * determines that, but in cases where we're not in a
5595 * position to expect anything in particular (like inside
5596 * eval"") we have to resolve the ambiguity. This code
5597 * covers the case where the first term in the curlies is a
5598 * quoted string. Most other cases need to be explicitly
a0288114 5599 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5600 * curly in order to force resolution as an anon hash.
5601 *
5602 * XXX should probably propagate the outer expectation
5603 * into eval"" to rely less on this hack, but that could
5604 * potentially break current behavior of eval"".
5605 * GSAR 97-07-21
5606 */
5607 t = s;
5608 if (*s == '\'' || *s == '"' || *s == '`') {
5609 /* common case: get past first string, handling escapes */
3280af22 5610 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5611 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5612 t++;
5613 t++;
a0d0e21e 5614 }
b8a4b1be 5615 else if (*s == 'q') {
3280af22 5616 if (++t < PL_bufend
b8a4b1be 5617 && (!isALNUM(*t)
3280af22 5618 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5619 && !isALNUM(*t))))
5620 {
abc667d1 5621 /* skip q//-like construct */
f54cb97a 5622 const char *tmps;
b8a4b1be
GS
5623 char open, close, term;
5624 I32 brackets = 1;
5625
3280af22 5626 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5627 t++;
abc667d1
DM
5628 /* check for q => */
5629 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5630 OPERATOR(HASHBRACK);
5631 }
b8a4b1be
GS
5632 term = *t;
5633 open = term;
5634 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5635 term = tmps[5];
5636 close = term;
5637 if (open == close)
3280af22
NIS
5638 for (t++; t < PL_bufend; t++) {
5639 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5640 t++;
6d07e5e9 5641 else if (*t == open)
b8a4b1be
GS
5642 break;
5643 }
abc667d1 5644 else {
3280af22
NIS
5645 for (t++; t < PL_bufend; t++) {
5646 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5647 t++;
6d07e5e9 5648 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5649 break;
5650 else if (*t == open)
5651 brackets++;
5652 }
abc667d1
DM
5653 }
5654 t++;
b8a4b1be 5655 }
abc667d1
DM
5656 else
5657 /* skip plain q word */
5658 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5659 t += UTF8SKIP(t);
a0d0e21e 5660 }
7e2040f0 5661 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5662 t += UTF8SKIP(t);
7e2040f0 5663 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5664 t += UTF8SKIP(t);
a0d0e21e 5665 }
3280af22 5666 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5667 t++;
b8a4b1be
GS
5668 /* if comma follows first term, call it an anon hash */
5669 /* XXX it could be a comma expression with loop modifiers */
3280af22 5670 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5671 || (*t == '=' && t[1] == '>')))
a0d0e21e 5672 OPERATOR(HASHBRACK);
3280af22 5673 if (PL_expect == XREF)
4e4e412b 5674 PL_expect = XTERM;
a0d0e21e 5675 else {
3280af22
NIS
5676 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5677 PL_expect = XSTATE;
a0d0e21e 5678 }
8990e307 5679 }
a0d0e21e 5680 break;
463ee0b2 5681 }
6154021b 5682 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5683 if (isSPACE(*s) || *s == '#')
3280af22 5684 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5685 TOKEN('{');
378cc40b 5686 case '}':
a7aaec61
Z
5687 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5688 TOKEN(0);
79072805
LW
5689 rightbracket:
5690 s++;
3280af22 5691 if (PL_lex_brackets <= 0)
d98d5fff 5692 yyerror("Unmatched right curly bracket");
463ee0b2 5693 else
3280af22 5694 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5695 PL_lex_allbrackets--;
c2e66d9e 5696 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5697 PL_lex_formbrack = 0;
5698 if (PL_lex_state == LEX_INTERPNORMAL) {
5699 if (PL_lex_brackets == 0) {
9059aa12
LW
5700 if (PL_expect & XFAKEBRACK) {
5701 PL_expect &= XENUMMASK;
3280af22
NIS
5702 PL_lex_state = LEX_INTERPEND;
5703 PL_bufptr = s;
5db06880
NC
5704#if 0
5705 if (PL_madskills) {
cd81e915 5706 if (!PL_thiswhite)
6b29d1f5 5707 PL_thiswhite = newSVpvs("");
76f68e9b 5708 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5709 }
5710#endif
cea2e8a9 5711 return yylex(); /* ignore fake brackets */
79072805 5712 }
fa83b5b6 5713 if (*s == '-' && s[1] == '>')
3280af22 5714 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5715 else if (*s != '[' && *s != '{')
3280af22 5716 PL_lex_state = LEX_INTERPEND;
79072805
LW
5717 }
5718 }
9059aa12
LW
5719 if (PL_expect & XFAKEBRACK) {
5720 PL_expect &= XENUMMASK;
3280af22 5721 PL_bufptr = s;
cea2e8a9 5722 return yylex(); /* ignore fake brackets */
748a9306 5723 }
cd81e915 5724 start_force(PL_curforce);
5db06880
NC
5725 if (PL_madskills) {
5726 curmad('X', newSVpvn(s-1,1));
cd81e915 5727 CURMAD('_', PL_thiswhite);
5db06880 5728 }
79072805 5729 force_next('}');
5db06880 5730#ifdef PERL_MAD
cd81e915 5731 if (!PL_thistoken)
6b29d1f5 5732 PL_thistoken = newSVpvs("");
5db06880 5733#endif
79072805 5734 TOKEN(';');
378cc40b
LW
5735 case '&':
5736 s++;
78cdf107
Z
5737 if (*s++ == '&') {
5738 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5739 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5740 s -= 2;
5741 TOKEN(0);
5742 }
a0d0e21e 5743 AOPERATOR(ANDAND);
78cdf107 5744 }
378cc40b 5745 s--;
3280af22 5746 if (PL_expect == XOPERATOR) {
041457d9
DM
5747 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5748 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5749 {
57843af0 5750 CopLINE_dec(PL_curcop);
f1f66076 5751 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5752 CopLINE_inc(PL_curcop);
463ee0b2 5753 }
78cdf107
Z
5754 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5755 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5756 s--;
5757 TOKEN(0);
5758 }
79072805 5759 BAop(OP_BIT_AND);
463ee0b2 5760 }
79072805 5761
3280af22
NIS
5762 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5763 if (*PL_tokenbuf) {
5764 PL_expect = XOPERATOR;
5765 force_ident(PL_tokenbuf, '&');
463ee0b2 5766 }
79072805
LW
5767 else
5768 PREREF('&');
6154021b 5769 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5770 TERM('&');
5771
378cc40b
LW
5772 case '|':
5773 s++;
78cdf107
Z
5774 if (*s++ == '|') {
5775 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5776 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5777 s -= 2;
5778 TOKEN(0);
5779 }
a0d0e21e 5780 AOPERATOR(OROR);
78cdf107 5781 }
378cc40b 5782 s--;
78cdf107
Z
5783 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5784 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5785 s--;
5786 TOKEN(0);
5787 }
79072805 5788 BOop(OP_BIT_OR);
378cc40b
LW
5789 case '=':
5790 s++;
748a9306 5791 {
90771dc0 5792 const char tmp = *s++;
78cdf107
Z
5793 if (tmp == '=') {
5794 if (!PL_lex_allbrackets &&
5795 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5796 s -= 2;
5797 TOKEN(0);
5798 }
90771dc0 5799 Eop(OP_EQ);
78cdf107
Z
5800 }
5801 if (tmp == '>') {
5802 if (!PL_lex_allbrackets &&
5803 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5804 s -= 2;
5805 TOKEN(0);
5806 }
90771dc0 5807 OPERATOR(',');
78cdf107 5808 }
90771dc0
NC
5809 if (tmp == '~')
5810 PMop(OP_MATCH);
5811 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5812 && strchr("+-*/%.^&|<",tmp))
5813 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5814 "Reversed %c= operator",(int)tmp);
5815 s--;
5816 if (PL_expect == XSTATE && isALPHA(tmp) &&
5817 (s == PL_linestart+1 || s[-2] == '\n') )
5818 {
5819 if (PL_in_eval && !PL_rsfp) {
5820 d = PL_bufend;
5821 while (s < d) {
5822 if (*s++ == '\n') {
5823 incline(s);
5824 if (strnEQ(s,"=cut",4)) {
5825 s = strchr(s,'\n');
5826 if (s)
5827 s++;
5828 else
5829 s = d;
5830 incline(s);
5831 goto retry;
5832 }
5833 }
a5f75d66 5834 }
90771dc0 5835 goto retry;
a5f75d66 5836 }
5db06880
NC
5837#ifdef PERL_MAD
5838 if (PL_madskills) {
cd81e915 5839 if (!PL_thiswhite)
6b29d1f5 5840 PL_thiswhite = newSVpvs("");
cd81e915 5841 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5842 PL_bufend - PL_linestart);
5843 }
5844#endif
90771dc0 5845 s = PL_bufend;
737c24fc 5846 PL_parser->in_pod = 1;
90771dc0 5847 goto retry;
a5f75d66 5848 }
a0d0e21e 5849 }
3280af22 5850 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5851 const char *t = s;
51882d45 5852#ifdef PERL_STRICT_CR
c35e046a 5853 while (SPACE_OR_TAB(*t))
51882d45 5854#else
c35e046a 5855 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5856#endif
c35e046a 5857 t++;
a0d0e21e
LW
5858 if (*t == '\n' || *t == '#') {
5859 s--;
3280af22 5860 PL_expect = XBLOCK;
a0d0e21e
LW
5861 goto leftbracket;
5862 }
79072805 5863 }
78cdf107
Z
5864 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5865 s--;
5866 TOKEN(0);
5867 }
6154021b 5868 pl_yylval.ival = 0;
a0d0e21e 5869 OPERATOR(ASSIGNOP);
378cc40b
LW
5870 case '!':
5871 s++;
90771dc0
NC
5872 {
5873 const char tmp = *s++;
5874 if (tmp == '=') {
5875 /* was this !=~ where !~ was meant?
5876 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5877
5878 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5879 const char *t = s+1;
5880
5881 while (t < PL_bufend && isSPACE(*t))
5882 ++t;
5883
5884 if (*t == '/' || *t == '?' ||
5885 ((*t == 'm' || *t == 's' || *t == 'y')
5886 && !isALNUM(t[1])) ||
5887 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5888 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5889 "!=~ should be !~");
5890 }
78cdf107
Z
5891 if (!PL_lex_allbrackets &&
5892 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5893 s -= 2;
5894 TOKEN(0);
5895 }
90771dc0
NC
5896 Eop(OP_NE);
5897 }
5898 if (tmp == '~')
5899 PMop(OP_NOT);
5900 }
378cc40b
LW
5901 s--;
5902 OPERATOR('!');
5903 case '<':
3280af22 5904 if (PL_expect != XOPERATOR) {
93a17b20 5905 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5906 check_uni();
79072805
LW
5907 if (s[1] == '<')
5908 s = scan_heredoc(s);
5909 else
5910 s = scan_inputsymbol(s);
5911 TERM(sublex_start());
378cc40b
LW
5912 }
5913 s++;
90771dc0
NC
5914 {
5915 char tmp = *s++;
78cdf107
Z
5916 if (tmp == '<') {
5917 if (*s == '=' && !PL_lex_allbrackets &&
5918 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5919 s -= 2;
5920 TOKEN(0);
5921 }
90771dc0 5922 SHop(OP_LEFT_SHIFT);
78cdf107 5923 }
90771dc0
NC
5924 if (tmp == '=') {
5925 tmp = *s++;
78cdf107
Z
5926 if (tmp == '>') {
5927 if (!PL_lex_allbrackets &&
5928 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5929 s -= 3;
5930 TOKEN(0);
5931 }
90771dc0 5932 Eop(OP_NCMP);
78cdf107 5933 }
90771dc0 5934 s--;
78cdf107
Z
5935 if (!PL_lex_allbrackets &&
5936 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5937 s -= 2;
5938 TOKEN(0);
5939 }
90771dc0
NC
5940 Rop(OP_LE);
5941 }
395c3793 5942 }
378cc40b 5943 s--;
78cdf107
Z
5944 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5945 s--;
5946 TOKEN(0);
5947 }
79072805 5948 Rop(OP_LT);
378cc40b
LW
5949 case '>':
5950 s++;
90771dc0
NC
5951 {
5952 const char tmp = *s++;
78cdf107
Z
5953 if (tmp == '>') {
5954 if (*s == '=' && !PL_lex_allbrackets &&
5955 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5956 s -= 2;
5957 TOKEN(0);
5958 }
90771dc0 5959 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
5960 }
5961 else if (tmp == '=') {
5962 if (!PL_lex_allbrackets &&
5963 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5964 s -= 2;
5965 TOKEN(0);
5966 }
90771dc0 5967 Rop(OP_GE);
78cdf107 5968 }
90771dc0 5969 }
378cc40b 5970 s--;
78cdf107
Z
5971 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5972 s--;
5973 TOKEN(0);
5974 }
79072805 5975 Rop(OP_GT);
378cc40b
LW
5976
5977 case '$':
bbce6d69 5978 CLINE;
5979
3280af22
NIS
5980 if (PL_expect == XOPERATOR) {
5981 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5982 return deprecate_commaless_var_list();
a0d0e21e 5983 }
8990e307 5984 }
a0d0e21e 5985
c0b977fd 5986 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5987 PL_tokenbuf[0] = '@';
376b8730
SM
5988 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5989 sizeof PL_tokenbuf - 1, FALSE);
5990 if (PL_expect == XOPERATOR)
5991 no_op("Array length", s);
3280af22 5992 if (!PL_tokenbuf[1])
a0d0e21e 5993 PREREF(DOLSHARP);
3280af22
NIS
5994 PL_expect = XOPERATOR;
5995 PL_pending_ident = '#';
463ee0b2 5996 TOKEN(DOLSHARP);
79072805 5997 }
bbce6d69 5998
3280af22 5999 PL_tokenbuf[0] = '$';
376b8730
SM
6000 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6001 sizeof PL_tokenbuf - 1, FALSE);
6002 if (PL_expect == XOPERATOR)
6003 no_op("Scalar", s);
3280af22
NIS
6004 if (!PL_tokenbuf[1]) {
6005 if (s == PL_bufend)
bbce6d69 6006 yyerror("Final $ should be \\$ or $name");
6007 PREREF('$');
8990e307 6008 }
a0d0e21e 6009
bbce6d69 6010 /* This kludge not intended to be bulletproof. */
3280af22 6011 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 6012 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 6013 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 6014 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 6015 TERM(THING);
6016 }
6017
ff68c719 6018 d = s;
90771dc0
NC
6019 {
6020 const char tmp = *s;
ae28bb2a 6021 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6022 s = SKIPSPACE1(s);
ff68c719 6023
90771dc0
NC
6024 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6025 && intuit_more(s)) {
6026 if (*s == '[') {
6027 PL_tokenbuf[0] = '@';
6028 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6029 char *t = s+1;
6030
6031 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6032 t++;
90771dc0 6033 if (*t++ == ',') {
29595ff2 6034 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6035 while (t < PL_bufend && *t != ']')
6036 t++;
9014280d 6037 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6038 "Multidimensional syntax %.*s not supported",
36c7798d 6039 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6040 }
748a9306 6041 }
93a17b20 6042 }
90771dc0
NC
6043 else if (*s == '{') {
6044 char *t;
6045 PL_tokenbuf[0] = '%';
6046 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6047 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6048 {
6049 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6050 do {
6051 t++;
6052 } while (isSPACE(*t));
90771dc0 6053 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6054 STRLEN len;
90771dc0 6055 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6056 &len);
c35e046a
AL
6057 while (isSPACE(*t))
6058 t++;
780a5241 6059 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6060 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6061 "You need to quote \"%s\"",
6062 tmpbuf);
6063 }
6064 }
6065 }
93a17b20 6066 }
bbce6d69 6067
90771dc0
NC
6068 PL_expect = XOPERATOR;
6069 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6070 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6071 if (!islop || PL_last_lop_op == OP_GREPSTART)
6072 PL_expect = XOPERATOR;
6073 else if (strchr("$@\"'`q", *s))
6074 PL_expect = XTERM; /* e.g. print $fh "foo" */
6075 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6076 PL_expect = XTERM; /* e.g. print $fh &sub */
6077 else if (isIDFIRST_lazy_if(s,UTF)) {
6078 char tmpbuf[sizeof PL_tokenbuf];
6079 int t2;
6080 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6081 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6082 /* binary operators exclude handle interpretations */
6083 switch (t2) {
6084 case -KEY_x:
6085 case -KEY_eq:
6086 case -KEY_ne:
6087 case -KEY_gt:
6088 case -KEY_lt:
6089 case -KEY_ge:
6090 case -KEY_le:
6091 case -KEY_cmp:
6092 break;
6093 default:
6094 PL_expect = XTERM; /* e.g. print $fh length() */
6095 break;
6096 }
6097 }
6098 else {
6099 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6100 }
6101 }
90771dc0
NC
6102 else if (isDIGIT(*s))
6103 PL_expect = XTERM; /* e.g. print $fh 3 */
6104 else if (*s == '.' && isDIGIT(s[1]))
6105 PL_expect = XTERM; /* e.g. print $fh .3 */
6106 else if ((*s == '?' || *s == '-' || *s == '+')
6107 && !isSPACE(s[1]) && s[1] != '=')
6108 PL_expect = XTERM; /* e.g. print $fh -1 */
6109 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6110 && s[1] != '/')
6111 PL_expect = XTERM; /* e.g. print $fh /.../
6112 XXX except DORDOR operator
6113 */
6114 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6115 && s[2] != '=')
6116 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6117 }
bbce6d69 6118 }
3280af22 6119 PL_pending_ident = '$';
79072805 6120 TOKEN('$');
378cc40b
LW
6121
6122 case '@':
3280af22 6123 if (PL_expect == XOPERATOR)
bbce6d69 6124 no_op("Array", s);
3280af22
NIS
6125 PL_tokenbuf[0] = '@';
6126 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6127 if (!PL_tokenbuf[1]) {
bbce6d69 6128 PREREF('@');
6129 }
3280af22 6130 if (PL_lex_state == LEX_NORMAL)
29595ff2 6131 s = SKIPSPACE1(s);
3280af22 6132 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6133 if (*s == '{')
3280af22 6134 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6135
6136 /* Warn about @ where they meant $. */
041457d9
DM
6137 if (*s == '[' || *s == '{') {
6138 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6139 const char *t = s + 1;
7e2040f0 6140 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6141 t++;
6142 if (*t == '}' || *t == ']') {
6143 t++;
29595ff2 6144 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 6145 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6146 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6147 (int)(t-PL_bufptr), PL_bufptr,
6148 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6149 }
93a17b20
LW
6150 }
6151 }
463ee0b2 6152 }
3280af22 6153 PL_pending_ident = '@';
79072805 6154 TERM('@');
378cc40b 6155
c963b151 6156 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6157 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6158 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6159 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6160 TOKEN(0);
6f33ba73
RGS
6161 s += 2;
6162 AOPERATOR(DORDOR);
6163 }
c963b151 6164 case '?': /* may either be conditional or pattern */
be25f609 6165 if (PL_expect == XOPERATOR) {
90771dc0 6166 char tmp = *s++;
c963b151 6167 if(tmp == '?') {
78cdf107
Z
6168 if (!PL_lex_allbrackets &&
6169 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6170 s--;
6171 TOKEN(0);
6172 }
6173 PL_lex_allbrackets++;
be25f609 6174 OPERATOR('?');
c963b151
BD
6175 }
6176 else {
6177 tmp = *s++;
6178 if(tmp == '/') {
6179 /* A // operator. */
78cdf107
Z
6180 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6181 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6182 LEX_FAKEEOF_LOGIC)) {
6183 s -= 2;
6184 TOKEN(0);
6185 }
c963b151
BD
6186 AOPERATOR(DORDOR);
6187 }
6188 else {
6189 s--;
78cdf107
Z
6190 if (*s == '=' && !PL_lex_allbrackets &&
6191 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6192 s--;
6193 TOKEN(0);
6194 }
c963b151
BD
6195 Mop(OP_DIVIDE);
6196 }
6197 }
6198 }
6199 else {
6200 /* Disable warning on "study /blah/" */
6201 if (PL_oldoldbufptr == PL_last_uni
6202 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6203 || memNE(PL_last_uni, "study", 5)
6204 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6205 ))
6206 check_uni();
725a61d7
Z
6207 if (*s == '?')
6208 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6209 s = scan_pat(s,OP_MATCH);
6210 TERM(sublex_start());
6211 }
378cc40b
LW
6212
6213 case '.':
51882d45
GS
6214 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6215#ifdef PERL_STRICT_CR
6216 && s[1] == '\n'
6217#else
6218 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6219#endif
6220 && (s == PL_linestart || s[-1] == '\n') )
6221 {
3280af22
NIS
6222 PL_lex_formbrack = 0;
6223 PL_expect = XSTATE;
79072805
LW
6224 goto rightbracket;
6225 }
be25f609 6226 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6227 s += 3;
6228 OPERATOR(YADAYADA);
6229 }
3280af22 6230 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6231 char tmp = *s++;
a687059c 6232 if (*s == tmp) {
78cdf107
Z
6233 if (!PL_lex_allbrackets &&
6234 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6235 s--;
6236 TOKEN(0);
6237 }
a687059c 6238 s++;
2f3197b3
LW
6239 if (*s == tmp) {
6240 s++;
6154021b 6241 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6242 }
6243 else
6154021b 6244 pl_yylval.ival = 0;
378cc40b 6245 OPERATOR(DOTDOT);
a687059c 6246 }
78cdf107
Z
6247 if (*s == '=' && !PL_lex_allbrackets &&
6248 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6249 s--;
6250 TOKEN(0);
6251 }
79072805 6252 Aop(OP_CONCAT);
378cc40b
LW
6253 }
6254 /* FALL THROUGH */
6255 case '0': case '1': case '2': case '3': case '4':
6256 case '5': case '6': case '7': case '8': case '9':
6154021b 6257 s = scan_num(s, &pl_yylval);
931e0695 6258 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6259 if (PL_expect == XOPERATOR)
8990e307 6260 no_op("Number",s);
79072805
LW
6261 TERM(THING);
6262
6263 case '\'':
5db06880 6264 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6265 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6266 if (PL_expect == XOPERATOR) {
6267 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6268 return deprecate_commaless_var_list();
a0d0e21e 6269 }
463ee0b2 6270 else
8990e307 6271 no_op("String",s);
463ee0b2 6272 }
79072805 6273 if (!s)
d4c19fe8 6274 missingterm(NULL);
6154021b 6275 pl_yylval.ival = OP_CONST;
79072805
LW
6276 TERM(sublex_start());
6277
6278 case '"':
5db06880 6279 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6280 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6281 if (PL_expect == XOPERATOR) {
6282 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6283 return deprecate_commaless_var_list();
a0d0e21e 6284 }
463ee0b2 6285 else
8990e307 6286 no_op("String",s);
463ee0b2 6287 }
79072805 6288 if (!s)
d4c19fe8 6289 missingterm(NULL);
6154021b 6290 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6291 /* FIXME. I think that this can be const if char *d is replaced by
6292 more localised variables. */
3280af22 6293 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6294 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6295 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6296 break;
6297 }
6298 }
79072805
LW
6299 TERM(sublex_start());
6300
6301 case '`':
5db06880 6302 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6303 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6304 if (PL_expect == XOPERATOR)
8990e307 6305 no_op("Backticks",s);
79072805 6306 if (!s)
d4c19fe8 6307 missingterm(NULL);
9b201d7d 6308 readpipe_override();
79072805
LW
6309 TERM(sublex_start());
6310
6311 case '\\':
6312 s++;
a2a5de95
NC
6313 if (PL_lex_inwhat && isDIGIT(*s))
6314 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6315 *s, *s);
3280af22 6316 if (PL_expect == XOPERATOR)
8990e307 6317 no_op("Backslash",s);
79072805
LW
6318 OPERATOR(REFGEN);
6319
a7cb1f99 6320 case 'v':
e526c9e6 6321 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6322 char *start = s + 2;
dd629d5b 6323 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6324 start++;
6325 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6326 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6327 TERM(THING);
6328 }
e526c9e6 6329 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6330 else if (!isALPHA(*start) && (PL_expect == XTERM
6331 || PL_expect == XREF || PL_expect == XSTATE
6332 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6333 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6334 if (!gv) {
6154021b 6335 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6336 TERM(THING);
6337 }
6338 }
a7cb1f99
GS
6339 }
6340 goto keylookup;
79072805 6341 case 'x':
3280af22 6342 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6343 s++;
6344 Mop(OP_REPEAT);
2f3197b3 6345 }
79072805
LW
6346 goto keylookup;
6347
378cc40b 6348 case '_':
79072805
LW
6349 case 'a': case 'A':
6350 case 'b': case 'B':
6351 case 'c': case 'C':
6352 case 'd': case 'D':
6353 case 'e': case 'E':
6354 case 'f': case 'F':
6355 case 'g': case 'G':
6356 case 'h': case 'H':
6357 case 'i': case 'I':
6358 case 'j': case 'J':
6359 case 'k': case 'K':
6360 case 'l': case 'L':
6361 case 'm': case 'M':
6362 case 'n': case 'N':
6363 case 'o': case 'O':
6364 case 'p': case 'P':
6365 case 'q': case 'Q':
6366 case 'r': case 'R':
6367 case 's': case 'S':
6368 case 't': case 'T':
6369 case 'u': case 'U':
a7cb1f99 6370 case 'V':
79072805
LW
6371 case 'w': case 'W':
6372 case 'X':
6373 case 'y': case 'Y':
6374 case 'z': case 'Z':
6375
49dc05e3 6376 keylookup: {
88e1f1a2 6377 bool anydelim;
90771dc0 6378 I32 tmp;
10edeb5d
JH
6379
6380 orig_keyword = 0;
6381 gv = NULL;
6382 gvp = NULL;
49dc05e3 6383
3280af22
NIS
6384 PL_bufptr = s;
6385 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6386
6387 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6388 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6389
6390 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6391 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6392 goto just_a_word;
6393
3643fb5f 6394 d = s;
3280af22 6395 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6396 d++; /* no comments skipped here, or s### is misparsed */
6397
748a9306 6398 /* Is this a word before a => operator? */
1c3923b3 6399 if (*d == '=' && d[1] == '>') {
748a9306 6400 CLINE;
6154021b 6401 pl_yylval.opval
d0a148a6
NC
6402 = (OP*)newSVOP(OP_CONST, 0,
6403 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6404 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6405 TERM(WORD);
6406 }
6407
88e1f1a2
JV
6408 /* Check for plugged-in keyword */
6409 {
6410 OP *o;
6411 int result;
6412 char *saved_bufptr = PL_bufptr;
6413 PL_bufptr = s;
16c91539 6414 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6415 s = PL_bufptr;
6416 if (result == KEYWORD_PLUGIN_DECLINE) {
6417 /* not a plugged-in keyword */
6418 PL_bufptr = saved_bufptr;
6419 } else if (result == KEYWORD_PLUGIN_STMT) {
6420 pl_yylval.opval = o;
6421 CLINE;
6422 PL_expect = XSTATE;
6423 return REPORT(PLUGSTMT);
6424 } else if (result == KEYWORD_PLUGIN_EXPR) {
6425 pl_yylval.opval = o;
6426 CLINE;
6427 PL_expect = XOPERATOR;
6428 return REPORT(PLUGEXPR);
6429 } else {
6430 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6431 PL_tokenbuf);
6432 }
6433 }
6434
6435 /* Check for built-in keyword */
6436 tmp = keyword(PL_tokenbuf, len, 0);
6437
6438 /* Is this a label? */
6439 if (!anydelim && PL_expect == XSTATE
6440 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6441 s = d + 1;
6442 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6443 CLINE;
6444 TOKEN(LABEL);
6445 }
6446
a0d0e21e 6447 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6448 GV *ogv = NULL; /* override (winner) */
6449 GV *hgv = NULL; /* hidden (loser) */
3280af22 6450 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6451 CV *cv;
90e5519e 6452 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6453 (cv = GvCVu(gv)))
6454 {
6455 if (GvIMPORTED_CV(gv))
6456 ogv = gv;
6457 else if (! CvMETHOD(cv))
6458 hgv = gv;
6459 }
6460 if (!ogv &&
3280af22 6461 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6462 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6463 GvCVu(gv) && GvIMPORTED_CV(gv))
6464 {
6465 ogv = gv;
6466 }
6467 }
6468 if (ogv) {
30fe34ed 6469 orig_keyword = tmp;
56f7f34b 6470 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6471 }
6472 else if (gv && !gvp
6473 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6474 && GvCVu(gv))
6e7b2336
GS
6475 {
6476 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6477 }
56f7f34b
CS
6478 else { /* no override */
6479 tmp = -tmp;
a2a5de95
NC
6480 if (tmp == KEY_dump) {
6481 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6482 "dump() better written as CORE::dump()");
ac206dc8 6483 }
a0714e2c 6484 gv = NULL;
56f7f34b 6485 gvp = 0;
a2a5de95
NC
6486 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6487 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6488 "Ambiguous call resolved as CORE::%s(), "
6489 "qualify as such or use &",
6490 GvENAME(hgv));
49dc05e3 6491 }
a0d0e21e
LW
6492 }
6493
6494 reserved_word:
6495 switch (tmp) {
79072805
LW
6496
6497 default: /* not a keyword */
0bfa2a8a
NC
6498 /* Trade off - by using this evil construction we can pull the
6499 variable gv into the block labelled keylookup. If not, then
6500 we have to give it function scope so that the goto from the
6501 earlier ':' case doesn't bypass the initialisation. */
6502 if (0) {
6503 just_a_word_zero_gv:
6504 gv = NULL;
6505 gvp = NULL;
8bee0991 6506 orig_keyword = 0;
0bfa2a8a 6507 }
93a17b20 6508 just_a_word: {
96e4d5b1 6509 SV *sv;
ce29ac45 6510 int pkgname = 0;
f54cb97a 6511 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6512 OP *rv2cv_op;
5069cc75 6513 CV *cv;
5db06880 6514#ifdef PERL_MAD
cd81e915 6515 SV *nextPL_nextwhite = 0;
5db06880
NC
6516#endif
6517
8990e307
LW
6518
6519 /* Get the rest if it looks like a package qualifier */
6520
155aba94 6521 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6522 STRLEN morelen;
3280af22 6523 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6524 TRUE, &morelen);
6525 if (!morelen)
cea2e8a9 6526 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6527 *s == '\'' ? "'" : "::");
c3e0f903 6528 len += morelen;
ce29ac45 6529 pkgname = 1;
a0d0e21e 6530 }
8990e307 6531
3280af22
NIS
6532 if (PL_expect == XOPERATOR) {
6533 if (PL_bufptr == PL_linestart) {
57843af0 6534 CopLINE_dec(PL_curcop);
f1f66076 6535 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6536 CopLINE_inc(PL_curcop);
463ee0b2
LW
6537 }
6538 else
54310121 6539 no_op("Bareword",s);
463ee0b2 6540 }
8990e307 6541
c3e0f903 6542 /* Look for a subroutine with this name in current package,
486ec47a 6543 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6544 (and a package name). */
6545
5db06880 6546 if (len > 2 && !PL_madskills &&
3280af22 6547 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6548 {
f776e3cd 6549 if (ckWARN(WARN_BAREWORD)
90e5519e 6550 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6551 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6552 "Bareword \"%s\" refers to nonexistent package",
3280af22 6553 PL_tokenbuf);
c3e0f903 6554 len -= 2;
3280af22 6555 PL_tokenbuf[len] = '\0';
a0714e2c 6556 gv = NULL;
c3e0f903
GS
6557 gvp = 0;
6558 }
6559 else {
62d55b22
NC
6560 if (!gv) {
6561 /* Mustn't actually add anything to a symbol table.
6562 But also don't want to "initialise" any placeholder
6563 constants that might already be there into full
6564 blown PVGVs with attached PVCV. */
90e5519e
NC
6565 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6566 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6567 }
b3d904f3 6568 len = 0;
c3e0f903
GS
6569 }
6570
6571 /* if we saw a global override before, get the right name */
8990e307 6572
37bb7629
EB
6573 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6574 len ? len : strlen(PL_tokenbuf));
49dc05e3 6575 if (gvp) {
37bb7629 6576 SV * const tmp_sv = sv;
396482e1 6577 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6578 sv_catsv(sv, tmp_sv);
6579 SvREFCNT_dec(tmp_sv);
8a7a129d 6580 }
37bb7629 6581
5db06880 6582#ifdef PERL_MAD
cd81e915
NC
6583 if (PL_madskills && !PL_thistoken) {
6584 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6585 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6586 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6587 }
6588#endif
8990e307 6589
a0d0e21e 6590 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6591 CLINE;
6154021b
RGS
6592 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6593 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6594
c3e0f903 6595 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6596 if (len)
6597 goto safe_bareword;
6598
f7461760
Z
6599 {
6600 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6601 const_op->op_private = OPpCONST_BARE;
6602 rv2cv_op = newCVREF(0, const_op);
6603 }
d9088386 6604 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6605
8990e307
LW
6606 /* See if it's the indirect object for a list operator. */
6607
3280af22
NIS
6608 if (PL_oldoldbufptr &&
6609 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6610 (PL_oldoldbufptr == PL_last_lop
6611 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6612 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6613 (PL_expect == XREF ||
6614 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6615 {
748a9306
LW
6616 bool immediate_paren = *s == '(';
6617
a0d0e21e 6618 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6619 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6620#ifdef PERL_MAD
cd81e915 6621 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6622#endif
a0d0e21e
LW
6623
6624 /* Two barewords in a row may indicate method call. */
6625
62d55b22 6626 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6627 (tmp = intuit_method(s, gv, cv))) {
6628 op_free(rv2cv_op);
78cdf107
Z
6629 if (tmp == METHOD && !PL_lex_allbrackets &&
6630 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6631 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6632 return REPORT(tmp);
f7461760 6633 }
a0d0e21e
LW
6634
6635 /* If not a declared subroutine, it's an indirect object. */
6636 /* (But it's an indir obj regardless for sort.) */
7294df96 6637 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6638
7294df96
RGS
6639 if (
6640 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6641 (!cv &&
a9ef352a 6642 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6643 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6644 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6645 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6646 )
a9ef352a 6647 {
3280af22 6648 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6649 goto bareword;
93a17b20
LW
6650 }
6651 }
8990e307 6652
3280af22 6653 PL_expect = XOPERATOR;
5db06880
NC
6654#ifdef PERL_MAD
6655 if (isSPACE(*s))
cd81e915
NC
6656 s = SKIPSPACE2(s,nextPL_nextwhite);
6657 PL_nextwhite = nextPL_nextwhite;
5db06880 6658#else
8990e307 6659 s = skipspace(s);
5db06880 6660#endif
1c3923b3
GS
6661
6662 /* Is this a word before a => operator? */
ce29ac45 6663 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6664 op_free(rv2cv_op);
1c3923b3 6665 CLINE;
6154021b 6666 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6667 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6668 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6669 TERM(WORD);
6670 }
6671
6672 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6673 if (*s == '(') {
79072805 6674 CLINE;
5069cc75 6675 if (cv) {
c35e046a
AL
6676 d = s + 1;
6677 while (SPACE_OR_TAB(*d))
6678 d++;
f7461760 6679 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6680 s = d + 1;
c631f32b 6681 goto its_constant;
96e4d5b1 6682 }
6683 }
5db06880
NC
6684#ifdef PERL_MAD
6685 if (PL_madskills) {
cd81e915
NC
6686 PL_nextwhite = PL_thiswhite;
6687 PL_thiswhite = 0;
5db06880 6688 }
cd81e915 6689 start_force(PL_curforce);
5db06880 6690#endif
6154021b 6691 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6692 PL_expect = XOPERATOR;
5db06880
NC
6693#ifdef PERL_MAD
6694 if (PL_madskills) {
cd81e915
NC
6695 PL_nextwhite = nextPL_nextwhite;
6696 curmad('X', PL_thistoken);
6b29d1f5 6697 PL_thistoken = newSVpvs("");
5db06880
NC
6698 }
6699#endif
f7461760 6700 op_free(rv2cv_op);
93a17b20 6701 force_next(WORD);
6154021b 6702 pl_yylval.ival = 0;
463ee0b2 6703 TOKEN('&');
79072805 6704 }
93a17b20 6705
a0d0e21e 6706 /* If followed by var or block, call it a method (unless sub) */
8990e307 6707
f7461760
Z
6708 if ((*s == '$' || *s == '{') && !cv) {
6709 op_free(rv2cv_op);
3280af22
NIS
6710 PL_last_lop = PL_oldbufptr;
6711 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6712 if (!PL_lex_allbrackets &&
6713 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6714 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6715 PREBLOCK(METHOD);
463ee0b2
LW
6716 }
6717
8990e307
LW
6718 /* If followed by a bareword, see if it looks like indir obj. */
6719
30fe34ed
RGS
6720 if (!orig_keyword
6721 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6722 && (tmp = intuit_method(s, gv, cv))) {
6723 op_free(rv2cv_op);
78cdf107
Z
6724 if (tmp == METHOD && !PL_lex_allbrackets &&
6725 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6726 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6727 return REPORT(tmp);
f7461760 6728 }
93a17b20 6729
8990e307
LW
6730 /* Not a method, so call it a subroutine (if defined) */
6731
5069cc75 6732 if (cv) {
9b387841
NC
6733 if (lastchar == '-')
6734 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6735 "Ambiguous use of -%s resolved as -&%s()",
6736 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6737 /* Check for a constant sub */
f7461760 6738 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6739 its_constant:
f7461760 6740 op_free(rv2cv_op);
6154021b
RGS
6741 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6742 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6743 pl_yylval.opval->op_private = 0;
6b7c6d95 6744 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6745 TOKEN(WORD);
89bfa8cd 6746 }
6747
6154021b 6748 op_free(pl_yylval.opval);
f7461760 6749 pl_yylval.opval = rv2cv_op;
6154021b 6750 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6751 PL_last_lop = PL_oldbufptr;
bf848113 6752 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6753 /* Is there a prototype? */
5db06880
NC
6754 if (
6755#ifdef PERL_MAD
6756 cv &&
6757#endif
d9f2850e
RGS
6758 SvPOK(cv))
6759 {
5f66b61c 6760 STRLEN protolen;
daba3364 6761 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6762 if (!protolen)
4633a7c4 6763 TERM(FUNC0SUB);
0f5d0394
AE
6764 while (*proto == ';')
6765 proto++;
649d02de
FC
6766 if (
6767 (
6768 (
6769 *proto == '$' || *proto == '_'
c035a075 6770 || *proto == '*' || *proto == '+'
649d02de
FC
6771 )
6772 && proto[1] == '\0'
6773 )
6774 || (
6775 *proto == '\\' && proto[1] && proto[2] == '\0'
6776 )
6777 )
6778 OPERATOR(UNIOPSUB);
6779 if (*proto == '\\' && proto[1] == '[') {
6780 const char *p = proto + 2;
6781 while(*p && *p != ']')
6782 ++p;
6783 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6784 }
7a52d87a 6785 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6786 if (PL_curstash)
6787 sv_setpvs(PL_subname, "__ANON__");
6788 else
6789 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6790 if (!PL_lex_allbrackets &&
6791 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6792 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6793 PREBLOCK(LSTOPSUB);
6794 }
a9ef352a 6795 }
5db06880
NC
6796#ifdef PERL_MAD
6797 {
6798 if (PL_madskills) {
cd81e915
NC
6799 PL_nextwhite = PL_thiswhite;
6800 PL_thiswhite = 0;
5db06880 6801 }
cd81e915 6802 start_force(PL_curforce);
6154021b 6803 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6804 PL_expect = XTERM;
6805 if (PL_madskills) {
cd81e915
NC
6806 PL_nextwhite = nextPL_nextwhite;
6807 curmad('X', PL_thistoken);
6b29d1f5 6808 PL_thistoken = newSVpvs("");
5db06880
NC
6809 }
6810 force_next(WORD);
78cdf107
Z
6811 if (!PL_lex_allbrackets &&
6812 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6813 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6814 TOKEN(NOAMP);
6815 }
6816 }
6817
6818 /* Guess harder when madskills require "best effort". */
6819 if (PL_madskills && (!gv || !GvCVu(gv))) {
6820 int probable_sub = 0;
6821 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6822 probable_sub = 1;
6823 else if (isALPHA(*s)) {
6824 char tmpbuf[1024];
6825 STRLEN tmplen;
6826 d = s;
6827 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6828 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6829 probable_sub = 1;
6830 else {
6831 while (d < PL_bufend && isSPACE(*d))
6832 d++;
6833 if (*d == '=' && d[1] == '>')
6834 probable_sub = 1;
6835 }
6836 }
6837 if (probable_sub) {
7a6d04f4 6838 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6839 op_free(pl_yylval.opval);
f7461760 6840 pl_yylval.opval = rv2cv_op;
6154021b 6841 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6842 PL_last_lop = PL_oldbufptr;
6843 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6844 PL_nextwhite = PL_thiswhite;
6845 PL_thiswhite = 0;
6846 start_force(PL_curforce);
6154021b 6847 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6848 PL_expect = XTERM;
cd81e915
NC
6849 PL_nextwhite = nextPL_nextwhite;
6850 curmad('X', PL_thistoken);
6b29d1f5 6851 PL_thistoken = newSVpvs("");
5db06880 6852 force_next(WORD);
78cdf107
Z
6853 if (!PL_lex_allbrackets &&
6854 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6855 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6856 TOKEN(NOAMP);
6857 }
6858#else
6154021b 6859 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6860 PL_expect = XTERM;
8990e307 6861 force_next(WORD);
78cdf107
Z
6862 if (!PL_lex_allbrackets &&
6863 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6864 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6865 TOKEN(NOAMP);
5db06880 6866#endif
8990e307 6867 }
748a9306 6868
8990e307
LW
6869 /* Call it a bare word */
6870
5603f27d 6871 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6872 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6873 else {
9a073a1d
RGS
6874 bareword:
6875 /* after "print" and similar functions (corresponding to
6876 * "F? L" in opcode.pl), whatever wasn't already parsed as
6877 * a filehandle should be subject to "strict subs".
6878 * Likewise for the optional indirect-object argument to system
6879 * or exec, which can't be a bareword */
6880 if ((PL_last_lop_op == OP_PRINT
6881 || PL_last_lop_op == OP_PRTF
6882 || PL_last_lop_op == OP_SAY
6883 || PL_last_lop_op == OP_SYSTEM
6884 || PL_last_lop_op == OP_EXEC)
6885 && (PL_hints & HINT_STRICT_SUBS))
6886 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6887 if (lastchar != '-') {
6888 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6889 d = PL_tokenbuf;
6890 while (isLOWER(*d))
6891 d++;
da51bb9b 6892 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6893 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6894 PL_tokenbuf);
6895 }
748a9306
LW
6896 }
6897 }
f7461760 6898 op_free(rv2cv_op);
c3e0f903
GS
6899
6900 safe_bareword:
9b387841
NC
6901 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6902 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6903 "Operator or semicolon missing before %c%s",
6904 lastchar, PL_tokenbuf);
6905 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6906 "Ambiguous use of %c resolved as operator %c",
6907 lastchar, lastchar);
748a9306 6908 }
93a17b20 6909 TOKEN(WORD);
79072805 6910 }
79072805 6911
68dc0745 6912 case KEY___FILE__:
6154021b 6913 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6914 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6915 TERM(THING);
6916
79072805 6917 case KEY___LINE__:
6154021b 6918 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6919 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6920 TERM(THING);
68dc0745 6921
6922 case KEY___PACKAGE__:
6154021b 6923 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6924 (PL_curstash
5aaec2b4 6925 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6926 : &PL_sv_undef));
79072805 6927 TERM(THING);
79072805 6928
e50aee73 6929 case KEY___DATA__:
79072805
LW
6930 case KEY___END__: {
6931 GV *gv;
3280af22 6932 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6933 const char *pname = "main";
3280af22 6934 if (PL_tokenbuf[2] == 'D')
bfcb3514 6935 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6936 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6937 SVt_PVIO);
a5f75d66 6938 GvMULTI_on(gv);
79072805 6939 if (!GvIO(gv))
a0d0e21e 6940 GvIOp(gv) = newIO();
3280af22 6941 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6942#if defined(HAS_FCNTL) && defined(F_SETFD)
6943 {
f54cb97a 6944 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6945 fcntl(fd,F_SETFD,fd >= 3);
6946 }
79072805 6947#endif
fd049845 6948 /* Mark this internal pseudo-handle as clean */
6949 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6950 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6951 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6952 else
50952442 6953 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6954#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6955 /* if the script was opened in binmode, we need to revert
53129d29 6956 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6957 * XXX this is a questionable hack at best. */
53129d29
GS
6958 if (PL_bufend-PL_bufptr > 2
6959 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6960 {
6961 Off_t loc = 0;
50952442 6962 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6963 loc = PerlIO_tell(PL_rsfp);
6964 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6965 }
2986a63f
JH
6966#ifdef NETWARE
6967 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6968#else
c39cd008 6969 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6970#endif /* NETWARE */
1143fce0
JH
6971#ifdef PERLIO_IS_STDIO /* really? */
6972# if defined(__BORLANDC__)
cb359b41
JH
6973 /* XXX see note in do_binmode() */
6974 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6975# endif
6976#endif
c39cd008
GS
6977 if (loc > 0)
6978 PerlIO_seek(PL_rsfp, loc, 0);
6979 }
6980 }
6981#endif
7948272d 6982#ifdef PERLIO_LAYERS
52d2e0f4
JH
6983 if (!IN_BYTES) {
6984 if (UTF)
6985 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6986 else if (PL_encoding) {
6987 SV *name;
6988 dSP;
6989 ENTER;
6990 SAVETMPS;
6991 PUSHMARK(sp);
6992 EXTEND(SP, 1);
6993 XPUSHs(PL_encoding);
6994 PUTBACK;
6995 call_method("name", G_SCALAR);
6996 SPAGAIN;
6997 name = POPs;
6998 PUTBACK;
bfed75c6 6999 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7000 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7001 SVfARG(name)));
52d2e0f4
JH
7002 FREETMPS;
7003 LEAVE;
7004 }
7005 }
7948272d 7006#endif
5db06880
NC
7007#ifdef PERL_MAD
7008 if (PL_madskills) {
cd81e915
NC
7009 if (PL_realtokenstart >= 0) {
7010 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7011 if (!PL_endwhite)
6b29d1f5 7012 PL_endwhite = newSVpvs("");
cd81e915
NC
7013 sv_catsv(PL_endwhite, PL_thiswhite);
7014 PL_thiswhite = 0;
7015 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7016 PL_realtokenstart = -1;
5db06880 7017 }
5cc814fd
NC
7018 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7019 != NULL) ;
5db06880
NC
7020 }
7021#endif
4608196e 7022 PL_rsfp = NULL;
79072805
LW
7023 }
7024 goto fake_eof;
e929a76b 7025 }
de3bb511 7026
8990e307 7027 case KEY_AUTOLOAD:
ed6116ce 7028 case KEY_DESTROY:
79072805 7029 case KEY_BEGIN:
3c10abe3 7030 case KEY_UNITCHECK:
7d30b5c4 7031 case KEY_CHECK:
7d07dbc2 7032 case KEY_INIT:
7d30b5c4 7033 case KEY_END:
3280af22
NIS
7034 if (PL_expect == XSTATE) {
7035 s = PL_bufptr;
93a17b20 7036 goto really_sub;
79072805
LW
7037 }
7038 goto just_a_word;
7039
a0d0e21e
LW
7040 case KEY_CORE:
7041 if (*s == ':' && s[1] == ':') {
7042 s += 2;
748a9306 7043 d = s;
3280af22 7044 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 7045 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 7046 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7047 if (tmp < 0)
7048 tmp = -tmp;
850e8516 7049 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 7050 /* that's a way to remember we saw "CORE::" */
850e8516 7051 orig_keyword = tmp;
a0d0e21e
LW
7052 goto reserved_word;
7053 }
7054 goto just_a_word;
7055
463ee0b2
LW
7056 case KEY_abs:
7057 UNI(OP_ABS);
7058
79072805
LW
7059 case KEY_alarm:
7060 UNI(OP_ALARM);
7061
7062 case KEY_accept:
a0d0e21e 7063 LOP(OP_ACCEPT,XTERM);
79072805 7064
463ee0b2 7065 case KEY_and:
78cdf107
Z
7066 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7067 return REPORT(0);
463ee0b2
LW
7068 OPERATOR(ANDOP);
7069
79072805 7070 case KEY_atan2:
a0d0e21e 7071 LOP(OP_ATAN2,XTERM);
85e6fe83 7072
79072805 7073 case KEY_bind:
a0d0e21e 7074 LOP(OP_BIND,XTERM);
79072805
LW
7075
7076 case KEY_binmode:
1c1fc3ea 7077 LOP(OP_BINMODE,XTERM);
79072805
LW
7078
7079 case KEY_bless:
a0d0e21e 7080 LOP(OP_BLESS,XTERM);
79072805 7081
0d863452
RH
7082 case KEY_break:
7083 FUN0(OP_BREAK);
7084
79072805
LW
7085 case KEY_chop:
7086 UNI(OP_CHOP);
7087
7088 case KEY_continue:
0d863452
RH
7089 /* When 'use switch' is in effect, continue has a dual
7090 life as a control operator. */
7091 {
ef89dcc3 7092 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
7093 PREBLOCK(CONTINUE);
7094 else {
7095 /* We have to disambiguate the two senses of
7096 "continue". If the next token is a '{' then
7097 treat it as the start of a continue block;
7098 otherwise treat it as a control operator.
7099 */
7100 s = skipspace(s);
7101 if (*s == '{')
79072805 7102 PREBLOCK(CONTINUE);
0d863452
RH
7103 else
7104 FUN0(OP_CONTINUE);
7105 }
7106 }
79072805
LW
7107
7108 case KEY_chdir:
fafc274c
NC
7109 /* may use HOME */
7110 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7111 UNI(OP_CHDIR);
7112
7113 case KEY_close:
7114 UNI(OP_CLOSE);
7115
7116 case KEY_closedir:
7117 UNI(OP_CLOSEDIR);
7118
7119 case KEY_cmp:
78cdf107
Z
7120 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7121 return REPORT(0);
79072805
LW
7122 Eop(OP_SCMP);
7123
7124 case KEY_caller:
7125 UNI(OP_CALLER);
7126
7127 case KEY_crypt:
7128#ifdef FCRYPT
f4c556ac
GS
7129 if (!PL_cryptseen) {
7130 PL_cryptseen = TRUE;
de3bb511 7131 init_des();
f4c556ac 7132 }
a687059c 7133#endif
a0d0e21e 7134 LOP(OP_CRYPT,XTERM);
79072805
LW
7135
7136 case KEY_chmod:
a0d0e21e 7137 LOP(OP_CHMOD,XTERM);
79072805
LW
7138
7139 case KEY_chown:
a0d0e21e 7140 LOP(OP_CHOWN,XTERM);
79072805
LW
7141
7142 case KEY_connect:
a0d0e21e 7143 LOP(OP_CONNECT,XTERM);
79072805 7144
463ee0b2
LW
7145 case KEY_chr:
7146 UNI(OP_CHR);
7147
79072805
LW
7148 case KEY_cos:
7149 UNI(OP_COS);
7150
7151 case KEY_chroot:
7152 UNI(OP_CHROOT);
7153
0d863452
RH
7154 case KEY_default:
7155 PREBLOCK(DEFAULT);
7156
79072805 7157 case KEY_do:
29595ff2 7158 s = SKIPSPACE1(s);
79072805 7159 if (*s == '{')
a0d0e21e 7160 PRETERMBLOCK(DO);
79072805 7161 if (*s != '\'')
89c5585f 7162 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7163 if (orig_keyword == KEY_do) {
7164 orig_keyword = 0;
6154021b 7165 pl_yylval.ival = 1;
850e8516
RGS
7166 }
7167 else
6154021b 7168 pl_yylval.ival = 0;
378cc40b 7169 OPERATOR(DO);
79072805
LW
7170
7171 case KEY_die:
3280af22 7172 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7173 LOP(OP_DIE,XTERM);
79072805
LW
7174
7175 case KEY_defined:
7176 UNI(OP_DEFINED);
7177
7178 case KEY_delete:
a0d0e21e 7179 UNI(OP_DELETE);
79072805
LW
7180
7181 case KEY_dbmopen:
74e8ce34
NC
7182 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7183 STR_WITH_LEN("NDBM_File::"),
7184 STR_WITH_LEN("DB_File::"),
7185 STR_WITH_LEN("GDBM_File::"),
7186 STR_WITH_LEN("SDBM_File::"),
7187 STR_WITH_LEN("ODBM_File::"),
7188 NULL);
a0d0e21e 7189 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7190
7191 case KEY_dbmclose:
7192 UNI(OP_DBMCLOSE);
7193
7194 case KEY_dump:
a0d0e21e 7195 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7196 LOOPX(OP_DUMP);
7197
7198 case KEY_else:
7199 PREBLOCK(ELSE);
7200
7201 case KEY_elsif:
6154021b 7202 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7203 OPERATOR(ELSIF);
7204
7205 case KEY_eq:
78cdf107
Z
7206 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7207 return REPORT(0);
79072805
LW
7208 Eop(OP_SEQ);
7209
a0d0e21e
LW
7210 case KEY_exists:
7211 UNI(OP_EXISTS);
4e553d73 7212
79072805 7213 case KEY_exit:
5db06880
NC
7214 if (PL_madskills)
7215 UNI(OP_INT);
79072805
LW
7216 UNI(OP_EXIT);
7217
7218 case KEY_eval:
29595ff2 7219 s = SKIPSPACE1(s);
32e2a35d
RGS
7220 if (*s == '{') { /* block eval */
7221 PL_expect = XTERMBLOCK;
7222 UNIBRACK(OP_ENTERTRY);
7223 }
7224 else { /* string eval */
7225 PL_expect = XTERM;
7226 UNIBRACK(OP_ENTEREVAL);
7227 }
79072805
LW
7228
7229 case KEY_eof:
7230 UNI(OP_EOF);
7231
7232 case KEY_exp:
7233 UNI(OP_EXP);
7234
7235 case KEY_each:
7236 UNI(OP_EACH);
7237
7238 case KEY_exec:
a0d0e21e 7239 LOP(OP_EXEC,XREF);
79072805
LW
7240
7241 case KEY_endhostent:
7242 FUN0(OP_EHOSTENT);
7243
7244 case KEY_endnetent:
7245 FUN0(OP_ENETENT);
7246
7247 case KEY_endservent:
7248 FUN0(OP_ESERVENT);
7249
7250 case KEY_endprotoent:
7251 FUN0(OP_EPROTOENT);
7252
7253 case KEY_endpwent:
7254 FUN0(OP_EPWENT);
7255
7256 case KEY_endgrent:
7257 FUN0(OP_EGRENT);
7258
7259 case KEY_for:
7260 case KEY_foreach:
78cdf107
Z
7261 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7262 return REPORT(0);
6154021b 7263 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7264 s = SKIPSPACE1(s);
7e2040f0 7265 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7266 char *p = s;
5db06880
NC
7267#ifdef PERL_MAD
7268 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7269#endif
7270
3280af22 7271 if ((PL_bufend - p) >= 3 &&
55497cff 7272 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7273 p += 2;
77ca0c92
LW
7274 else if ((PL_bufend - p) >= 4 &&
7275 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7276 p += 3;
29595ff2 7277 p = PEEKSPACE(p);
7e2040f0 7278 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7279 p = scan_ident(p, PL_bufend,
7280 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7281 p = PEEKSPACE(p);
77ca0c92
LW
7282 }
7283 if (*p != '$')
cea2e8a9 7284 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7285#ifdef PERL_MAD
7286 s = SvPVX(PL_linestr) + soff;
7287#endif
55497cff 7288 }
79072805
LW
7289 OPERATOR(FOR);
7290
7291 case KEY_formline:
a0d0e21e 7292 LOP(OP_FORMLINE,XTERM);
79072805
LW
7293
7294 case KEY_fork:
7295 FUN0(OP_FORK);
7296
7297 case KEY_fcntl:
a0d0e21e 7298 LOP(OP_FCNTL,XTERM);
79072805
LW
7299
7300 case KEY_fileno:
7301 UNI(OP_FILENO);
7302
7303 case KEY_flock:
a0d0e21e 7304 LOP(OP_FLOCK,XTERM);
79072805
LW
7305
7306 case KEY_gt:
78cdf107
Z
7307 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7308 return REPORT(0);
79072805
LW
7309 Rop(OP_SGT);
7310
7311 case KEY_ge:
78cdf107
Z
7312 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7313 return REPORT(0);
79072805
LW
7314 Rop(OP_SGE);
7315
7316 case KEY_grep:
2c38e13d 7317 LOP(OP_GREPSTART, XREF);
79072805
LW
7318
7319 case KEY_goto:
a0d0e21e 7320 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7321 LOOPX(OP_GOTO);
7322
7323 case KEY_gmtime:
7324 UNI(OP_GMTIME);
7325
7326 case KEY_getc:
6f33ba73 7327 UNIDOR(OP_GETC);
79072805
LW
7328
7329 case KEY_getppid:
7330 FUN0(OP_GETPPID);
7331
7332 case KEY_getpgrp:
7333 UNI(OP_GETPGRP);
7334
7335 case KEY_getpriority:
a0d0e21e 7336 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7337
7338 case KEY_getprotobyname:
7339 UNI(OP_GPBYNAME);
7340
7341 case KEY_getprotobynumber:
a0d0e21e 7342 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7343
7344 case KEY_getprotoent:
7345 FUN0(OP_GPROTOENT);
7346
7347 case KEY_getpwent:
7348 FUN0(OP_GPWENT);
7349
7350 case KEY_getpwnam:
ff68c719 7351 UNI(OP_GPWNAM);
79072805
LW
7352
7353 case KEY_getpwuid:
ff68c719 7354 UNI(OP_GPWUID);
79072805
LW
7355
7356 case KEY_getpeername:
7357 UNI(OP_GETPEERNAME);
7358
7359 case KEY_gethostbyname:
7360 UNI(OP_GHBYNAME);
7361
7362 case KEY_gethostbyaddr:
a0d0e21e 7363 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7364
7365 case KEY_gethostent:
7366 FUN0(OP_GHOSTENT);
7367
7368 case KEY_getnetbyname:
7369 UNI(OP_GNBYNAME);
7370
7371 case KEY_getnetbyaddr:
a0d0e21e 7372 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7373
7374 case KEY_getnetent:
7375 FUN0(OP_GNETENT);
7376
7377 case KEY_getservbyname:
a0d0e21e 7378 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7379
7380 case KEY_getservbyport:
a0d0e21e 7381 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7382
7383 case KEY_getservent:
7384 FUN0(OP_GSERVENT);
7385
7386 case KEY_getsockname:
7387 UNI(OP_GETSOCKNAME);
7388
7389 case KEY_getsockopt:
a0d0e21e 7390 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7391
7392 case KEY_getgrent:
7393 FUN0(OP_GGRENT);
7394
7395 case KEY_getgrnam:
ff68c719 7396 UNI(OP_GGRNAM);
79072805
LW
7397
7398 case KEY_getgrgid:
ff68c719 7399 UNI(OP_GGRGID);
79072805
LW
7400
7401 case KEY_getlogin:
7402 FUN0(OP_GETLOGIN);
7403
0d863452 7404 case KEY_given:
6154021b 7405 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7406 OPERATOR(GIVEN);
7407
93a17b20 7408 case KEY_glob:
a0d0e21e 7409 LOP(OP_GLOB,XTERM);
93a17b20 7410
79072805
LW
7411 case KEY_hex:
7412 UNI(OP_HEX);
7413
7414 case KEY_if:
78cdf107
Z
7415 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7416 return REPORT(0);
6154021b 7417 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7418 OPERATOR(IF);
7419
7420 case KEY_index:
a0d0e21e 7421 LOP(OP_INDEX,XTERM);
79072805
LW
7422
7423 case KEY_int:
7424 UNI(OP_INT);
7425
7426 case KEY_ioctl:
a0d0e21e 7427 LOP(OP_IOCTL,XTERM);
79072805
LW
7428
7429 case KEY_join:
a0d0e21e 7430 LOP(OP_JOIN,XTERM);
79072805
LW
7431
7432 case KEY_keys:
7433 UNI(OP_KEYS);
7434
7435 case KEY_kill:
a0d0e21e 7436 LOP(OP_KILL,XTERM);
79072805
LW
7437
7438 case KEY_last:
a0d0e21e 7439 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7440 LOOPX(OP_LAST);
4e553d73 7441
79072805
LW
7442 case KEY_lc:
7443 UNI(OP_LC);
7444
7445 case KEY_lcfirst:
7446 UNI(OP_LCFIRST);
7447
7448 case KEY_local:
6154021b 7449 pl_yylval.ival = 0;
79072805
LW
7450 OPERATOR(LOCAL);
7451
7452 case KEY_length:
7453 UNI(OP_LENGTH);
7454
7455 case KEY_lt:
78cdf107
Z
7456 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7457 return REPORT(0);
79072805
LW
7458 Rop(OP_SLT);
7459
7460 case KEY_le:
78cdf107
Z
7461 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7462 return REPORT(0);
79072805
LW
7463 Rop(OP_SLE);
7464
7465 case KEY_localtime:
7466 UNI(OP_LOCALTIME);
7467
7468 case KEY_log:
7469 UNI(OP_LOG);
7470
7471 case KEY_link:
a0d0e21e 7472 LOP(OP_LINK,XTERM);
79072805
LW
7473
7474 case KEY_listen:
a0d0e21e 7475 LOP(OP_LISTEN,XTERM);
79072805 7476
c0329465
MB
7477 case KEY_lock:
7478 UNI(OP_LOCK);
7479
79072805
LW
7480 case KEY_lstat:
7481 UNI(OP_LSTAT);
7482
7483 case KEY_m:
8782bef2 7484 s = scan_pat(s,OP_MATCH);
79072805
LW
7485 TERM(sublex_start());
7486
a0d0e21e 7487 case KEY_map:
2c38e13d 7488 LOP(OP_MAPSTART, XREF);
4e4e412b 7489
79072805 7490 case KEY_mkdir:
a0d0e21e 7491 LOP(OP_MKDIR,XTERM);
79072805
LW
7492
7493 case KEY_msgctl:
a0d0e21e 7494 LOP(OP_MSGCTL,XTERM);
79072805
LW
7495
7496 case KEY_msgget:
a0d0e21e 7497 LOP(OP_MSGGET,XTERM);
79072805
LW
7498
7499 case KEY_msgrcv:
a0d0e21e 7500 LOP(OP_MSGRCV,XTERM);
79072805
LW
7501
7502 case KEY_msgsnd:
a0d0e21e 7503 LOP(OP_MSGSND,XTERM);
79072805 7504
77ca0c92 7505 case KEY_our:
93a17b20 7506 case KEY_my:
952306ac 7507 case KEY_state:
eac04b2e 7508 PL_in_my = (U16)tmp;
29595ff2 7509 s = SKIPSPACE1(s);
7e2040f0 7510 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7511#ifdef PERL_MAD
7512 char* start = s;
7513#endif
3280af22 7514 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7515 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7516 goto really_sub;
def3634b 7517 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7518 if (!PL_in_my_stash) {
c750a3ec 7519 char tmpbuf[1024];
3280af22 7520 PL_bufptr = s;
d9fad198 7521 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7522 yyerror(tmpbuf);
7523 }
5db06880
NC
7524#ifdef PERL_MAD
7525 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7526 sv_catsv(PL_thistoken, PL_nextwhite);
7527 PL_nextwhite = 0;
7528 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7529 }
7530#endif
c750a3ec 7531 }
6154021b 7532 pl_yylval.ival = 1;
55497cff 7533 OPERATOR(MY);
93a17b20 7534
79072805 7535 case KEY_next:
a0d0e21e 7536 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7537 LOOPX(OP_NEXT);
7538
7539 case KEY_ne:
78cdf107
Z
7540 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7541 return REPORT(0);
79072805
LW
7542 Eop(OP_SNE);
7543
a0d0e21e 7544 case KEY_no:
468aa647 7545 s = tokenize_use(0, s);
a0d0e21e
LW
7546 OPERATOR(USE);
7547
7548 case KEY_not:
29595ff2 7549 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7550 FUN1(OP_NOT);
78cdf107
Z
7551 else {
7552 if (!PL_lex_allbrackets &&
7553 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7554 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7555 OPERATOR(NOTOP);
78cdf107 7556 }
a0d0e21e 7557
79072805 7558 case KEY_open:
29595ff2 7559 s = SKIPSPACE1(s);
7e2040f0 7560 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7561 const char *t;
c35e046a
AL
7562 for (d = s; isALNUM_lazy_if(d,UTF);)
7563 d++;
7564 for (t=d; isSPACE(*t);)
7565 t++;
e2ab214b 7566 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7567 /* [perl #16184] */
7568 && !(t[0] == '=' && t[1] == '>')
7569 ) {
5f66b61c 7570 int parms_len = (int)(d-s);
9014280d 7571 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7572 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7573 parms_len, s, parms_len, s);
66fbe8fb 7574 }
93a17b20 7575 }
a0d0e21e 7576 LOP(OP_OPEN,XTERM);
79072805 7577
463ee0b2 7578 case KEY_or:
78cdf107
Z
7579 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7580 return REPORT(0);
6154021b 7581 pl_yylval.ival = OP_OR;
463ee0b2
LW
7582 OPERATOR(OROP);
7583
79072805
LW
7584 case KEY_ord:
7585 UNI(OP_ORD);
7586
7587 case KEY_oct:
7588 UNI(OP_OCT);
7589
7590 case KEY_opendir:
a0d0e21e 7591 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7592
7593 case KEY_print:
3280af22 7594 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7595 LOP(OP_PRINT,XREF);
79072805
LW
7596
7597 case KEY_printf:
3280af22 7598 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7599 LOP(OP_PRTF,XREF);
79072805 7600
c07a80fd 7601 case KEY_prototype:
7602 UNI(OP_PROTOTYPE);
7603
79072805 7604 case KEY_push:
a0d0e21e 7605 LOP(OP_PUSH,XTERM);
79072805
LW
7606
7607 case KEY_pop:
6f33ba73 7608 UNIDOR(OP_POP);
79072805 7609
a0d0e21e 7610 case KEY_pos:
6f33ba73 7611 UNIDOR(OP_POS);
4e553d73 7612
79072805 7613 case KEY_pack:
a0d0e21e 7614 LOP(OP_PACK,XTERM);
79072805
LW
7615
7616 case KEY_package:
a0d0e21e 7617 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7618 s = SKIPSPACE1(s);
91152fc1 7619 s = force_strict_version(s);
4e4da3ac 7620 PL_lex_expect = XBLOCK;
79072805
LW
7621 OPERATOR(PACKAGE);
7622
7623 case KEY_pipe:
a0d0e21e 7624 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7625
7626 case KEY_q:
5db06880 7627 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7628 if (!s)
d4c19fe8 7629 missingterm(NULL);
6154021b 7630 pl_yylval.ival = OP_CONST;
79072805
LW
7631 TERM(sublex_start());
7632
a0d0e21e
LW
7633 case KEY_quotemeta:
7634 UNI(OP_QUOTEMETA);
7635
ea25a9b2
Z
7636 case KEY_qw: {
7637 OP *words = NULL;
5db06880 7638 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7639 if (!s)
d4c19fe8 7640 missingterm(NULL);
3480a8d2 7641 PL_expect = XOPERATOR;
8127e0e3 7642 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7643 int warned = 0;
3280af22 7644 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7645 while (len) {
d4c19fe8
AL
7646 for (; isSPACE(*d) && len; --len, ++d)
7647 /**/;
8127e0e3 7648 if (len) {
d4c19fe8 7649 SV *sv;
f54cb97a 7650 const char *b = d;
e476b1b5 7651 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7652 for (; !isSPACE(*d) && len; --len, ++d) {
7653 if (*d == ',') {
9014280d 7654 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7655 "Possible attempt to separate words with commas");
7656 ++warned;
7657 }
7658 else if (*d == '#') {
9014280d 7659 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7660 "Possible attempt to put comments in qw() list");
7661 ++warned;
7662 }
7663 }
7664 }
7665 else {
d4c19fe8
AL
7666 for (; !isSPACE(*d) && len; --len, ++d)
7667 /**/;
8127e0e3 7668 }
740cce10 7669 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7670 words = op_append_elem(OP_LIST, words,
7948272d 7671 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7672 }
7673 }
7674 }
ea25a9b2
Z
7675 if (!words)
7676 words = newNULLLIST();
37fd879b 7677 if (PL_lex_stuff) {
8127e0e3 7678 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7679 PL_lex_stuff = NULL;
37fd879b 7680 }
ea25a9b2
Z
7681 PL_expect = XOPERATOR;
7682 pl_yylval.opval = sawparens(words);
7683 TOKEN(QWLIST);
7684 }
8990e307 7685
79072805 7686 case KEY_qq:
5db06880 7687 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7688 if (!s)
d4c19fe8 7689 missingterm(NULL);
6154021b 7690 pl_yylval.ival = OP_STRINGIFY;
3280af22 7691 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7692 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7693 TERM(sublex_start());
7694
8782bef2
GB
7695 case KEY_qr:
7696 s = scan_pat(s,OP_QR);
7697 TERM(sublex_start());
7698
79072805 7699 case KEY_qx:
5db06880 7700 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7701 if (!s)
d4c19fe8 7702 missingterm(NULL);
9b201d7d 7703 readpipe_override();
79072805
LW
7704 TERM(sublex_start());
7705
7706 case KEY_return:
7707 OLDLOP(OP_RETURN);
7708
7709 case KEY_require:
29595ff2 7710 s = SKIPSPACE1(s);
e759cc13
RGS
7711 if (isDIGIT(*s)) {
7712 s = force_version(s, FALSE);
a7cb1f99 7713 }
e759cc13
RGS
7714 else if (*s != 'v' || !isDIGIT(s[1])
7715 || (s = force_version(s, TRUE), *s == 'v'))
7716 {
a7cb1f99
GS
7717 *PL_tokenbuf = '\0';
7718 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7719 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7720 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7721 else if (*s == '<')
7722 yyerror("<> should be quotes");
7723 }
a72a1c8b
RGS
7724 if (orig_keyword == KEY_require) {
7725 orig_keyword = 0;
6154021b 7726 pl_yylval.ival = 1;
a72a1c8b
RGS
7727 }
7728 else
6154021b 7729 pl_yylval.ival = 0;
a72a1c8b
RGS
7730 PL_expect = XTERM;
7731 PL_bufptr = s;
7732 PL_last_uni = PL_oldbufptr;
7733 PL_last_lop_op = OP_REQUIRE;
7734 s = skipspace(s);
7735 return REPORT( (int)REQUIRE );
79072805
LW
7736
7737 case KEY_reset:
7738 UNI(OP_RESET);
7739
7740 case KEY_redo:
a0d0e21e 7741 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7742 LOOPX(OP_REDO);
7743
7744 case KEY_rename:
a0d0e21e 7745 LOP(OP_RENAME,XTERM);
79072805
LW
7746
7747 case KEY_rand:
7748 UNI(OP_RAND);
7749
7750 case KEY_rmdir:
7751 UNI(OP_RMDIR);
7752
7753 case KEY_rindex:
a0d0e21e 7754 LOP(OP_RINDEX,XTERM);
79072805
LW
7755
7756 case KEY_read:
a0d0e21e 7757 LOP(OP_READ,XTERM);
79072805
LW
7758
7759 case KEY_readdir:
7760 UNI(OP_READDIR);
7761
93a17b20 7762 case KEY_readline:
6f33ba73 7763 UNIDOR(OP_READLINE);
93a17b20
LW
7764
7765 case KEY_readpipe:
0858480c 7766 UNIDOR(OP_BACKTICK);
93a17b20 7767
79072805
LW
7768 case KEY_rewinddir:
7769 UNI(OP_REWINDDIR);
7770
7771 case KEY_recv:
a0d0e21e 7772 LOP(OP_RECV,XTERM);
79072805
LW
7773
7774 case KEY_reverse:
a0d0e21e 7775 LOP(OP_REVERSE,XTERM);
79072805
LW
7776
7777 case KEY_readlink:
6f33ba73 7778 UNIDOR(OP_READLINK);
79072805
LW
7779
7780 case KEY_ref:
7781 UNI(OP_REF);
7782
7783 case KEY_s:
7784 s = scan_subst(s);
6154021b 7785 if (pl_yylval.opval)
79072805
LW
7786 TERM(sublex_start());
7787 else
7788 TOKEN(1); /* force error */
7789
0d863452
RH
7790 case KEY_say:
7791 checkcomma(s,PL_tokenbuf,"filehandle");
7792 LOP(OP_SAY,XREF);
7793
a0d0e21e
LW
7794 case KEY_chomp:
7795 UNI(OP_CHOMP);
4e553d73 7796
79072805
LW
7797 case KEY_scalar:
7798 UNI(OP_SCALAR);
7799
7800 case KEY_select:
a0d0e21e 7801 LOP(OP_SELECT,XTERM);
79072805
LW
7802
7803 case KEY_seek:
a0d0e21e 7804 LOP(OP_SEEK,XTERM);
79072805
LW
7805
7806 case KEY_semctl:
a0d0e21e 7807 LOP(OP_SEMCTL,XTERM);
79072805
LW
7808
7809 case KEY_semget:
a0d0e21e 7810 LOP(OP_SEMGET,XTERM);
79072805
LW
7811
7812 case KEY_semop:
a0d0e21e 7813 LOP(OP_SEMOP,XTERM);
79072805
LW
7814
7815 case KEY_send:
a0d0e21e 7816 LOP(OP_SEND,XTERM);
79072805
LW
7817
7818 case KEY_setpgrp:
a0d0e21e 7819 LOP(OP_SETPGRP,XTERM);
79072805
LW
7820
7821 case KEY_setpriority:
a0d0e21e 7822 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7823
7824 case KEY_sethostent:
ff68c719 7825 UNI(OP_SHOSTENT);
79072805
LW
7826
7827 case KEY_setnetent:
ff68c719 7828 UNI(OP_SNETENT);
79072805
LW
7829
7830 case KEY_setservent:
ff68c719 7831 UNI(OP_SSERVENT);
79072805
LW
7832
7833 case KEY_setprotoent:
ff68c719 7834 UNI(OP_SPROTOENT);
79072805
LW
7835
7836 case KEY_setpwent:
7837 FUN0(OP_SPWENT);
7838
7839 case KEY_setgrent:
7840 FUN0(OP_SGRENT);
7841
7842 case KEY_seekdir:
a0d0e21e 7843 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7844
7845 case KEY_setsockopt:
a0d0e21e 7846 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7847
7848 case KEY_shift:
6f33ba73 7849 UNIDOR(OP_SHIFT);
79072805
LW
7850
7851 case KEY_shmctl:
a0d0e21e 7852 LOP(OP_SHMCTL,XTERM);
79072805
LW
7853
7854 case KEY_shmget:
a0d0e21e 7855 LOP(OP_SHMGET,XTERM);
79072805
LW
7856
7857 case KEY_shmread:
a0d0e21e 7858 LOP(OP_SHMREAD,XTERM);
79072805
LW
7859
7860 case KEY_shmwrite:
a0d0e21e 7861 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7862
7863 case KEY_shutdown:
a0d0e21e 7864 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7865
7866 case KEY_sin:
7867 UNI(OP_SIN);
7868
7869 case KEY_sleep:
7870 UNI(OP_SLEEP);
7871
7872 case KEY_socket:
a0d0e21e 7873 LOP(OP_SOCKET,XTERM);
79072805
LW
7874
7875 case KEY_socketpair:
a0d0e21e 7876 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7877
7878 case KEY_sort:
3280af22 7879 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7880 s = SKIPSPACE1(s);
79072805 7881 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7882 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7883 PL_expect = XTERM;
15f0808c 7884 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7885 LOP(OP_SORT,XREF);
79072805
LW
7886
7887 case KEY_split:
a0d0e21e 7888 LOP(OP_SPLIT,XTERM);
79072805
LW
7889
7890 case KEY_sprintf:
a0d0e21e 7891 LOP(OP_SPRINTF,XTERM);
79072805
LW
7892
7893 case KEY_splice:
a0d0e21e 7894 LOP(OP_SPLICE,XTERM);
79072805
LW
7895
7896 case KEY_sqrt:
7897 UNI(OP_SQRT);
7898
7899 case KEY_srand:
7900 UNI(OP_SRAND);
7901
7902 case KEY_stat:
7903 UNI(OP_STAT);
7904
7905 case KEY_study:
79072805
LW
7906 UNI(OP_STUDY);
7907
7908 case KEY_substr:
a0d0e21e 7909 LOP(OP_SUBSTR,XTERM);
79072805
LW
7910
7911 case KEY_format:
7912 case KEY_sub:
93a17b20 7913 really_sub:
09bef843 7914 {
3280af22 7915 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7916 SSize_t tboffset = 0;
09bef843 7917 expectation attrful;
28cc6278 7918 bool have_name, have_proto;
f54cb97a 7919 const int key = tmp;
09bef843 7920
5db06880
NC
7921#ifdef PERL_MAD
7922 SV *tmpwhite = 0;
7923
cd81e915 7924 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7925 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7926 PL_thistoken = 0;
5db06880
NC
7927
7928 d = s;
7929 s = SKIPSPACE2(s,tmpwhite);
7930#else
09bef843 7931 s = skipspace(s);
5db06880 7932#endif
09bef843 7933
7e2040f0 7934 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7935 (*s == ':' && s[1] == ':'))
7936 {
5db06880 7937#ifdef PERL_MAD
4f61fd4b 7938 SV *nametoke = NULL;
5db06880
NC
7939#endif
7940
09bef843
SB
7941 PL_expect = XBLOCK;
7942 attrful = XATTRBLOCK;
b1b65b59
JH
7943 /* remember buffer pos'n for later force_word */
7944 tboffset = s - PL_oldbufptr;
09bef843 7945 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7946#ifdef PERL_MAD
7947 if (PL_madskills)
7948 nametoke = newSVpvn(s, d - s);
7949#endif
6502358f
NC
7950 if (memchr(tmpbuf, ':', len))
7951 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7952 else {
7953 sv_setsv(PL_subname,PL_curstname);
396482e1 7954 sv_catpvs(PL_subname,"::");
09bef843
SB
7955 sv_catpvn(PL_subname,tmpbuf,len);
7956 }
09bef843 7957 have_name = TRUE;
5db06880
NC
7958
7959#ifdef PERL_MAD
7960
7961 start_force(0);
7962 CURMAD('X', nametoke);
7963 CURMAD('_', tmpwhite);
7964 (void) force_word(PL_oldbufptr + tboffset, WORD,
7965 FALSE, TRUE, TRUE);
7966
7967 s = SKIPSPACE2(d,tmpwhite);
7968#else
7969 s = skipspace(d);
7970#endif
09bef843 7971 }
463ee0b2 7972 else {
09bef843
SB
7973 if (key == KEY_my)
7974 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7975 PL_expect = XTERMBLOCK;
7976 attrful = XATTRTERM;
76f68e9b 7977 sv_setpvs(PL_subname,"?");
09bef843 7978 have_name = FALSE;
463ee0b2 7979 }
4633a7c4 7980
09bef843
SB
7981 if (key == KEY_format) {
7982 if (*s == '=')
7983 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7984#ifdef PERL_MAD
cd81e915 7985 PL_thistoken = subtoken;
5db06880
NC
7986 s = d;
7987#else
09bef843 7988 if (have_name)
b1b65b59
JH
7989 (void) force_word(PL_oldbufptr + tboffset, WORD,
7990 FALSE, TRUE, TRUE);
5db06880 7991#endif
09bef843
SB
7992 OPERATOR(FORMAT);
7993 }
79072805 7994
09bef843
SB
7995 /* Look for a prototype */
7996 if (*s == '(') {
d9f2850e
RGS
7997 char *p;
7998 bool bad_proto = FALSE;
9e8d7757
RB
7999 bool in_brackets = FALSE;
8000 char greedy_proto = ' ';
8001 bool proto_after_greedy_proto = FALSE;
8002 bool must_be_last = FALSE;
8003 bool underscore = FALSE;
aef2a98a 8004 bool seen_underscore = FALSE;
197afce1 8005 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 8006
5db06880 8007 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 8008 if (!s)
09bef843 8009 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8010 /* strip spaces and check for bad characters */
09bef843
SB
8011 d = SvPVX(PL_lex_stuff);
8012 tmp = 0;
d9f2850e
RGS
8013 for (p = d; *p; ++p) {
8014 if (!isSPACE(*p)) {
8015 d[tmp++] = *p;
9e8d7757 8016
197afce1 8017 if (warnillegalproto) {
9e8d7757
RB
8018 if (must_be_last)
8019 proto_after_greedy_proto = TRUE;
c035a075 8020 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
8021 bad_proto = TRUE;
8022 }
8023 else {
8024 if ( underscore ) {
8025 if ( *p != ';' )
8026 bad_proto = TRUE;
8027 underscore = FALSE;
8028 }
8029 if ( *p == '[' ) {
8030 in_brackets = TRUE;
8031 }
8032 else if ( *p == ']' ) {
8033 in_brackets = FALSE;
8034 }
8035 else if ( (*p == '@' || *p == '%') &&
8036 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8037 !in_brackets ) {
8038 must_be_last = TRUE;
8039 greedy_proto = *p;
8040 }
8041 else if ( *p == '_' ) {
aef2a98a 8042 underscore = seen_underscore = TRUE;
9e8d7757
RB
8043 }
8044 }
8045 }
d37a9538 8046 }
09bef843 8047 }
d9f2850e 8048 d[tmp] = '\0';
9e8d7757 8049 if (proto_after_greedy_proto)
197afce1 8050 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8051 "Prototype after '%c' for %"SVf" : %s",
8052 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 8053 if (bad_proto)
197afce1 8054 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8055 "Illegal character %sin prototype for %"SVf" : %s",
8056 seen_underscore ? "after '_' " : "",
be2597df 8057 SVfARG(PL_subname), d);
b162af07 8058 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8059 have_proto = TRUE;
68dc0745 8060
5db06880
NC
8061#ifdef PERL_MAD
8062 start_force(0);
cd81e915 8063 CURMAD('q', PL_thisopen);
5db06880 8064 CURMAD('_', tmpwhite);
cd81e915
NC
8065 CURMAD('=', PL_thisstuff);
8066 CURMAD('Q', PL_thisclose);
5db06880
NC
8067 NEXTVAL_NEXTTOKE.opval =
8068 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8069 PL_lex_stuff = NULL;
5db06880
NC
8070 force_next(THING);
8071
8072 s = SKIPSPACE2(s,tmpwhite);
8073#else
09bef843 8074 s = skipspace(s);
5db06880 8075#endif
4633a7c4 8076 }
09bef843
SB
8077 else
8078 have_proto = FALSE;
8079
8080 if (*s == ':' && s[1] != ':')
8081 PL_expect = attrful;
8e742a20
MHM
8082 else if (*s != '{' && key == KEY_sub) {
8083 if (!have_name)
8084 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8085 else if (*s != ';' && *s != '}')
be2597df 8086 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8087 }
09bef843 8088
5db06880
NC
8089#ifdef PERL_MAD
8090 start_force(0);
8091 if (tmpwhite) {
8092 if (PL_madskills)
6b29d1f5 8093 curmad('^', newSVpvs(""));
5db06880
NC
8094 CURMAD('_', tmpwhite);
8095 }
8096 force_next(0);
8097
cd81e915 8098 PL_thistoken = subtoken;
5db06880 8099#else
09bef843 8100 if (have_proto) {
9ded7720 8101 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8102 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8103 PL_lex_stuff = NULL;
09bef843 8104 force_next(THING);
68dc0745 8105 }
5db06880 8106#endif
09bef843 8107 if (!have_name) {
49a54bbe
NC
8108 if (PL_curstash)
8109 sv_setpvs(PL_subname, "__ANON__");
8110 else
8111 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8112 TOKEN(ANONSUB);
4633a7c4 8113 }
5db06880 8114#ifndef PERL_MAD
b1b65b59
JH
8115 (void) force_word(PL_oldbufptr + tboffset, WORD,
8116 FALSE, TRUE, TRUE);
5db06880 8117#endif
09bef843
SB
8118 if (key == KEY_my)
8119 TOKEN(MYSUB);
8120 TOKEN(SUB);
4633a7c4 8121 }
79072805
LW
8122
8123 case KEY_system:
a0d0e21e 8124 LOP(OP_SYSTEM,XREF);
79072805
LW
8125
8126 case KEY_symlink:
a0d0e21e 8127 LOP(OP_SYMLINK,XTERM);
79072805
LW
8128
8129 case KEY_syscall:
a0d0e21e 8130 LOP(OP_SYSCALL,XTERM);
79072805 8131
c07a80fd 8132 case KEY_sysopen:
8133 LOP(OP_SYSOPEN,XTERM);
8134
137443ea 8135 case KEY_sysseek:
8136 LOP(OP_SYSSEEK,XTERM);
8137
79072805 8138 case KEY_sysread:
a0d0e21e 8139 LOP(OP_SYSREAD,XTERM);
79072805
LW
8140
8141 case KEY_syswrite:
a0d0e21e 8142 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8143
8144 case KEY_tr:
8145 s = scan_trans(s);
8146 TERM(sublex_start());
8147
8148 case KEY_tell:
8149 UNI(OP_TELL);
8150
8151 case KEY_telldir:
8152 UNI(OP_TELLDIR);
8153
463ee0b2 8154 case KEY_tie:
a0d0e21e 8155 LOP(OP_TIE,XTERM);
463ee0b2 8156
c07a80fd 8157 case KEY_tied:
8158 UNI(OP_TIED);
8159
79072805
LW
8160 case KEY_time:
8161 FUN0(OP_TIME);
8162
8163 case KEY_times:
8164 FUN0(OP_TMS);
8165
8166 case KEY_truncate:
a0d0e21e 8167 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8168
8169 case KEY_uc:
8170 UNI(OP_UC);
8171
8172 case KEY_ucfirst:
8173 UNI(OP_UCFIRST);
8174
463ee0b2
LW
8175 case KEY_untie:
8176 UNI(OP_UNTIE);
8177
79072805 8178 case KEY_until:
78cdf107
Z
8179 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8180 return REPORT(0);
6154021b 8181 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8182 OPERATOR(UNTIL);
8183
8184 case KEY_unless:
78cdf107
Z
8185 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8186 return REPORT(0);
6154021b 8187 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8188 OPERATOR(UNLESS);
8189
8190 case KEY_unlink:
a0d0e21e 8191 LOP(OP_UNLINK,XTERM);
79072805
LW
8192
8193 case KEY_undef:
6f33ba73 8194 UNIDOR(OP_UNDEF);
79072805
LW
8195
8196 case KEY_unpack:
a0d0e21e 8197 LOP(OP_UNPACK,XTERM);
79072805
LW
8198
8199 case KEY_utime:
a0d0e21e 8200 LOP(OP_UTIME,XTERM);
79072805
LW
8201
8202 case KEY_umask:
6f33ba73 8203 UNIDOR(OP_UMASK);
79072805
LW
8204
8205 case KEY_unshift:
a0d0e21e
LW
8206 LOP(OP_UNSHIFT,XTERM);
8207
8208 case KEY_use:
468aa647 8209 s = tokenize_use(1, s);
a0d0e21e 8210 OPERATOR(USE);
79072805
LW
8211
8212 case KEY_values:
8213 UNI(OP_VALUES);
8214
8215 case KEY_vec:
a0d0e21e 8216 LOP(OP_VEC,XTERM);
79072805 8217
0d863452 8218 case KEY_when:
78cdf107
Z
8219 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8220 return REPORT(0);
6154021b 8221 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8222 OPERATOR(WHEN);
8223
79072805 8224 case KEY_while:
78cdf107
Z
8225 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8226 return REPORT(0);
6154021b 8227 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8228 OPERATOR(WHILE);
8229
8230 case KEY_warn:
3280af22 8231 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8232 LOP(OP_WARN,XTERM);
79072805
LW
8233
8234 case KEY_wait:
8235 FUN0(OP_WAIT);
8236
8237 case KEY_waitpid:
a0d0e21e 8238 LOP(OP_WAITPID,XTERM);
79072805
LW
8239
8240 case KEY_wantarray:
8241 FUN0(OP_WANTARRAY);
8242
8243 case KEY_write:
9d116dd7
JH
8244#ifdef EBCDIC
8245 {
df3728a2
JH
8246 char ctl_l[2];
8247 ctl_l[0] = toCTRL('L');
8248 ctl_l[1] = '\0';
fafc274c 8249 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8250 }
8251#else
fafc274c
NC
8252 /* Make sure $^L is defined */
8253 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8254#endif
79072805
LW
8255 UNI(OP_ENTERWRITE);
8256
8257 case KEY_x:
78cdf107
Z
8258 if (PL_expect == XOPERATOR) {
8259 if (*s == '=' && !PL_lex_allbrackets &&
8260 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8261 return REPORT(0);
79072805 8262 Mop(OP_REPEAT);
78cdf107 8263 }
79072805
LW
8264 check_uni();
8265 goto just_a_word;
8266
a0d0e21e 8267 case KEY_xor:
78cdf107
Z
8268 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8269 return REPORT(0);
6154021b 8270 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8271 OPERATOR(OROP);
8272
79072805
LW
8273 case KEY_y:
8274 s = scan_trans(s);
8275 TERM(sublex_start());
8276 }
49dc05e3 8277 }}
79072805 8278}
bf4acbe4
GS
8279#ifdef __SC__
8280#pragma segment Main
8281#endif
79072805 8282
e930465f
JH
8283static int
8284S_pending_ident(pTHX)
8eceec63 8285{
97aff369 8286 dVAR;
8eceec63 8287 register char *d;
bbd11bfc 8288 PADOFFSET tmp = 0;
8eceec63
SC
8289 /* pit holds the identifier we read and pending_ident is reset */
8290 char pit = PL_pending_ident;
9bde8eb0
NC
8291 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8292 /* All routes through this function want to know if there is a colon. */
c099d646 8293 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8294 PL_pending_ident = 0;
8295
cd81e915 8296 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8297 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8298 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8299
8300 /* if we're in a my(), we can't allow dynamics here.
8301 $foo'bar has already been turned into $foo::bar, so
8302 just check for colons.
8303
8304 if it's a legal name, the OP is a PADANY.
8305 */
8306 if (PL_in_my) {
8307 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8308 if (has_colon)
8eceec63
SC
8309 yyerror(Perl_form(aTHX_ "No package name allowed for "
8310 "variable %s in \"our\"",
8311 PL_tokenbuf));
d6447115 8312 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8313 }
8314 else {
9bde8eb0 8315 if (has_colon)
952306ac
RGS
8316 yyerror(Perl_form(aTHX_ PL_no_myglob,
8317 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8318
6154021b 8319 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 8320 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8321 return PRIVATEREF;
8322 }
8323 }
8324
8325 /*
8326 build the ops for accesses to a my() variable.
8327
8328 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8329 then used in a comparison. This catches most, but not
8330 all cases. For instance, it catches
8331 sort { my($a); $a <=> $b }
8332 but not
8333 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8334 (although why you'd do that is anyone's guess).
8335 */
8336
9bde8eb0 8337 if (!has_colon) {
8716503d 8338 if (!PL_in_my)
f8f98e0a 8339 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8340 if (tmp != NOT_IN_PAD) {
8eceec63 8341 /* might be an "our" variable" */
00b1698f 8342 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8343 /* build ops for a bareword */
b64e5050
AL
8344 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8345 HEK * const stashname = HvNAME_HEK(stash);
8346 SV * const sym = newSVhek(stashname);
396482e1 8347 sv_catpvs(sym, "::");
9bde8eb0 8348 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8349 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8350 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8351 gv_fetchsv(sym,
8eceec63
SC
8352 (PL_in_eval
8353 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8354 : GV_ADDMULTI
8eceec63
SC
8355 ),
8356 ((PL_tokenbuf[0] == '$') ? SVt_PV
8357 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8358 : SVt_PVHV));
8359 return WORD;
8360 }
8361
8362 /* if it's a sort block and they're naming $a or $b */
8363 if (PL_last_lop_op == OP_SORT &&
8364 PL_tokenbuf[0] == '$' &&
8365 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8366 && !PL_tokenbuf[2])
8367 {
8368 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8369 d < PL_bufend && *d != '\n';
8370 d++)
8371 {
8372 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8373 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8374 PL_tokenbuf);
8375 }
8376 }
8377 }
8378
6154021b
RGS
8379 pl_yylval.opval = newOP(OP_PADANY, 0);
8380 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8381 return PRIVATEREF;
8382 }
8383 }
8384
8385 /*
8386 Whine if they've said @foo in a doublequoted string,
8387 and @foo isn't a variable we can find in the symbol
8388 table.
8389 */
d824713b
NC
8390 if (ckWARN(WARN_AMBIGUOUS) &&
8391 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8392 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8393 SVt_PVAV);
8eceec63 8394 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8395 /* DO NOT warn for @- and @+ */
8396 && !( PL_tokenbuf[2] == '\0' &&
8397 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8398 )
8eceec63
SC
8399 {
8400 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8401 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8402 "Possible unintended interpolation of %s in string",
8403 PL_tokenbuf);
8eceec63
SC
8404 }
8405 }
8406
8407 /* build ops for a bareword */
6154021b 8408 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8409 tokenbuf_len - 1));
6154021b 8410 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8411 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8412 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8413 ((PL_tokenbuf[0] == '$') ? SVt_PV
8414 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8415 : SVt_PVHV));
8eceec63
SC
8416 return WORD;
8417}
8418
76e3520e 8419STATIC void
c94115d8 8420S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8421{
97aff369 8422 dVAR;
2f3197b3 8423
7918f24d
NC
8424 PERL_ARGS_ASSERT_CHECKCOMMA;
8425
d008e5eb 8426 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8427 if (ckWARN(WARN_SYNTAX)) {
8428 int level = 1;
26ff0806 8429 const char *w;
d008e5eb
GS
8430 for (w = s+2; *w && level; w++) {
8431 if (*w == '(')
8432 ++level;
8433 else if (*w == ')')
8434 --level;
8435 }
888fea98
NC
8436 while (isSPACE(*w))
8437 ++w;
b1439985
RGS
8438 /* the list of chars below is for end of statements or
8439 * block / parens, boolean operators (&&, ||, //) and branch
8440 * constructs (or, and, if, until, unless, while, err, for).
8441 * Not a very solid hack... */
8442 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8443 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8444 "%s (...) interpreted as function",name);
d008e5eb 8445 }
2f3197b3 8446 }
3280af22 8447 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8448 s++;
a687059c
LW
8449 if (*s == '(')
8450 s++;
3280af22 8451 while (s < PL_bufend && isSPACE(*s))
a687059c 8452 s++;
7e2040f0 8453 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8454 const char * const w = s++;
7e2040f0 8455 while (isALNUM_lazy_if(s,UTF))
a687059c 8456 s++;
3280af22 8457 while (s < PL_bufend && isSPACE(*s))
a687059c 8458 s++;
e929a76b 8459 if (*s == ',') {
c94115d8 8460 GV* gv;
5458a98a 8461 if (keyword(w, s - w, 0))
e929a76b 8462 return;
c94115d8
NC
8463
8464 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8465 if (gv && GvCVu(gv))
abbb3198 8466 return;
cea2e8a9 8467 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8468 }
8469 }
8470}
8471
423cee85
JH
8472/* Either returns sv, or mortalizes sv and returns a new SV*.
8473 Best used as sv=new_constant(..., sv, ...).
8474 If s, pv are NULL, calls subroutine with one argument,
8475 and type is used with error messages only. */
8476
b3ac6de7 8477STATIC SV *
eb0d8d16
NC
8478S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8479 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8480{
27da23d5 8481 dVAR; dSP;
890ce7af 8482 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8483 SV *res;
b3ac6de7
IZ
8484 SV **cvp;
8485 SV *cv, *typesv;
89e33a05 8486 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8487
7918f24d
NC
8488 PERL_ARGS_ASSERT_NEW_CONSTANT;
8489
f0af216f 8490 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
8491 SV *msg;
8492
10edeb5d
JH
8493 why2 = (const char *)
8494 (strEQ(key,"charnames")
8495 ? "(possibly a missing \"use charnames ...\")"
8496 : "");
4e553d73 8497 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
8498 (type ? type: "undef"), why2);
8499
8500 /* This is convoluted and evil ("goto considered harmful")
8501 * but I do not understand the intricacies of all the different
8502 * failure modes of %^H in here. The goal here is to make
8503 * the most probable error message user-friendly. --jhi */
8504
8505 goto msgdone;
8506
423cee85 8507 report:
4e553d73 8508 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8509 (type ? type: "undef"), why1, why2, why3);
41ab332f 8510 msgdone:
95a20fc0 8511 yyerror(SvPVX_const(msg));
423cee85
JH
8512 SvREFCNT_dec(msg);
8513 return sv;
8514 }
ff3f963a
KW
8515
8516 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
8517 if (PL_error_count > 0 && strEQ(key,"charnames"))
8518 return &PL_sv_undef;
ff3f963a 8519
eb0d8d16 8520 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 8521 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
8522 why1 = "$^H{";
8523 why2 = key;
f0af216f 8524 why3 = "} is not defined";
423cee85 8525 goto report;
b3ac6de7
IZ
8526 }
8527 sv_2mortal(sv); /* Parent created it permanently */
8528 cv = *cvp;
423cee85 8529 if (!pv && s)
59cd0e26 8530 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8531 if (type && pv)
59cd0e26 8532 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8533 else
423cee85 8534 typesv = &PL_sv_undef;
4e553d73 8535
e788e7d3 8536 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8537 ENTER ;
8538 SAVETMPS;
4e553d73 8539
423cee85 8540 PUSHMARK(SP) ;
a5845cb7 8541 EXTEND(sp, 3);
423cee85
JH
8542 if (pv)
8543 PUSHs(pv);
b3ac6de7 8544 PUSHs(sv);
423cee85
JH
8545 if (pv)
8546 PUSHs(typesv);
b3ac6de7 8547 PUTBACK;
423cee85 8548 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8549
423cee85 8550 SPAGAIN ;
4e553d73 8551
423cee85 8552 /* Check the eval first */
9b0e499b 8553 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8554 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8555 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8556 (void)POPs;
b37c2d43 8557 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8558 }
8559 else {
8560 res = POPs;
b37c2d43 8561 SvREFCNT_inc_simple_void(res);
423cee85 8562 }
4e553d73 8563
423cee85
JH
8564 PUTBACK ;
8565 FREETMPS ;
8566 LEAVE ;
b3ac6de7 8567 POPSTACK;
4e553d73 8568
b3ac6de7 8569 if (!SvOK(res)) {
423cee85
JH
8570 why1 = "Call to &{$^H{";
8571 why2 = key;
f0af216f 8572 why3 = "}} did not return a defined value";
423cee85
JH
8573 sv = res;
8574 goto report;
9b0e499b 8575 }
423cee85 8576
9b0e499b 8577 return res;
b3ac6de7 8578}
4e553d73 8579
d0a148a6
NC
8580/* Returns a NUL terminated string, with the length of the string written to
8581 *slp
8582 */
76e3520e 8583STATIC char *
cea2e8a9 8584S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8585{
97aff369 8586 dVAR;
463ee0b2 8587 register char *d = dest;
890ce7af 8588 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8589
8590 PERL_ARGS_ASSERT_SCAN_WORD;
8591
463ee0b2 8592 for (;;) {
8903cb82 8593 if (d >= e)
cea2e8a9 8594 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8595 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8596 *d++ = *s++;
c35e046a 8597 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8598 *d++ = ':';
8599 *d++ = ':';
8600 s++;
8601 }
c35e046a 8602 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8603 *d++ = *s++;
8604 *d++ = *s++;
8605 }
fd400ab9 8606 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8607 char *t = s + UTF8SKIP(s);
c35e046a 8608 size_t len;
fd400ab9 8609 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8610 t += UTF8SKIP(t);
c35e046a
AL
8611 len = t - s;
8612 if (d + len > e)
cea2e8a9 8613 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8614 Copy(s, d, len, char);
8615 d += len;
a0ed51b3
LW
8616 s = t;
8617 }
463ee0b2
LW
8618 else {
8619 *d = '\0';
8620 *slp = d - dest;
8621 return s;
e929a76b 8622 }
378cc40b
LW
8623 }
8624}
8625
76e3520e 8626STATIC char *
f54cb97a 8627S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8628{
97aff369 8629 dVAR;
6136c704 8630 char *bracket = NULL;
748a9306 8631 char funny = *s++;
6136c704 8632 register char *d = dest;
0b3da58d 8633 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8634
7918f24d
NC
8635 PERL_ARGS_ASSERT_SCAN_IDENT;
8636
a0d0e21e 8637 if (isSPACE(*s))
29595ff2 8638 s = PEEKSPACE(s);
de3bb511 8639 if (isDIGIT(*s)) {
8903cb82 8640 while (isDIGIT(*s)) {
8641 if (d >= e)
cea2e8a9 8642 Perl_croak(aTHX_ ident_too_long);
378cc40b 8643 *d++ = *s++;
8903cb82 8644 }
378cc40b
LW
8645 }
8646 else {
463ee0b2 8647 for (;;) {
8903cb82 8648 if (d >= e)
cea2e8a9 8649 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8650 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8651 *d++ = *s++;
7e2040f0 8652 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8653 *d++ = ':';
8654 *d++ = ':';
8655 s++;
8656 }
a0d0e21e 8657 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8658 *d++ = *s++;
8659 *d++ = *s++;
8660 }
fd400ab9 8661 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8662 char *t = s + UTF8SKIP(s);
fd400ab9 8663 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8664 t += UTF8SKIP(t);
8665 if (d + (t - s) > e)
cea2e8a9 8666 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8667 Copy(s, d, t - s, char);
8668 d += t - s;
8669 s = t;
8670 }
463ee0b2
LW
8671 else
8672 break;
8673 }
378cc40b
LW
8674 }
8675 *d = '\0';
8676 d = dest;
79072805 8677 if (*d) {
3280af22
NIS
8678 if (PL_lex_state != LEX_NORMAL)
8679 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8680 return s;
378cc40b 8681 }
748a9306 8682 if (*s == '$' && s[1] &&
3792a11b 8683 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8684 {
4810e5ec 8685 return s;
5cd24f17 8686 }
79072805
LW
8687 if (*s == '{') {
8688 bracket = s;
8689 s++;
8690 }
8691 else if (ck_uni)
8692 check_uni();
93a17b20 8693 if (s < send)
79072805
LW
8694 *d = *s++;
8695 d[1] = '\0';
2b92dfce 8696 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8697 *d = toCTRL(*s);
8698 s++;
de3bb511 8699 }
79072805 8700 if (bracket) {
748a9306 8701 if (isSPACE(s[-1])) {
fa83b5b6 8702 while (s < send) {
f54cb97a 8703 const char ch = *s++;
bf4acbe4 8704 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8705 *d = ch;
8706 break;
8707 }
8708 }
748a9306 8709 }
7e2040f0 8710 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 8711 d++;
a0ed51b3 8712 if (UTF) {
6136c704
AL
8713 char *end = s;
8714 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8715 end += UTF8SKIP(end);
8716 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8717 end += UTF8SKIP(end);
a0ed51b3 8718 }
6136c704
AL
8719 Copy(s, d, end - s, char);
8720 d += end - s;
8721 s = end;
a0ed51b3
LW
8722 }
8723 else {
2b92dfce 8724 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8725 *d++ = *s++;
2b92dfce 8726 if (d >= e)
cea2e8a9 8727 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8728 }
79072805 8729 *d = '\0';
c35e046a
AL
8730 while (s < send && SPACE_OR_TAB(*s))
8731 s++;
ff68c719 8732 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8733 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8734 const char * const brack =
8735 (const char *)
8736 ((*s == '[') ? "[...]" : "{...}");
e850844c 8737 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8738 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8739 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8740 funny, dest, brack, funny, dest, brack);
8741 }
79072805 8742 bracket++;
a0be28da 8743 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8744 PL_lex_allbrackets++;
79072805
LW
8745 return s;
8746 }
4e553d73
NIS
8747 }
8748 /* Handle extended ${^Foo} variables
2b92dfce
GS
8749 * 1999-02-27 mjd-perl-patch@plover.com */
8750 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8751 && isALNUM(*s))
8752 {
8753 d++;
8754 while (isALNUM(*s) && d < e) {
8755 *d++ = *s++;
8756 }
8757 if (d >= e)
cea2e8a9 8758 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8759 *d = '\0';
79072805
LW
8760 }
8761 if (*s == '}') {
8762 s++;
7df0d042 8763 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8764 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8765 PL_expect = XREF;
8766 }
d008e5eb 8767 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8768 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8769 (keyword(dest, d - dest, 0)
8770 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8771 {
c35e046a
AL
8772 if (funny == '#')
8773 funny = '@';
9014280d 8774 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8775 "Ambiguous use of %c{%s} resolved to %c%s",
8776 funny, dest, funny, dest);
8777 }
8778 }
79072805
LW
8779 }
8780 else {
8781 s = bracket; /* let the parser handle it */
93a17b20 8782 *dest = '\0';
79072805
LW
8783 }
8784 }
3280af22
NIS
8785 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8786 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8787 return s;
8788}
8789
858a358b 8790static bool
3955e1a9 8791S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
8792
8793 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8794 * the parse starting at 's', based on the subset that are valid in this
8795 * context input to this routine in 'valid_flags'. Advances s. Returns
8796 * TRUE if the input was a valid flag, so the next char may be as well;
3955e1a9
KW
8797 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8798 * current regex. This routine will set it to any charset modifier found.
8799 * The caller shouldn't change it. This way, another charset modifier
8800 * encountered in the parse can be detected as an error, as we have decided
8801 * allow only one */
858a358b
KW
8802
8803 const char c = **s;
94b03d7d 8804
858a358b
KW
8805 if (! strchr(valid_flags, c)) {
8806 if (isALNUM(c)) {
94b03d7d 8807 goto deprecate;
858a358b
KW
8808 }
8809 return FALSE;
8810 }
8811
8812 switch (c) {
94b03d7d 8813
858a358b
KW
8814 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8815 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8816 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8817 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8818 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8819 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d
KW
8820 case LOCALE_PAT_MOD:
8821
8822 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8823 * can't be regex modifiers.
8824 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8825 * much as easily done. s///lei, for example, has to mean regex
8826 * modifiers if it's not an error (as does any word character
8827 * following the 'e'). Otherwise, we resolve to the backwards-
8828 * compatible, but less likely 's/// le ...', i.e. as meaning
8829 * less-than-or-equal. The reason it's not likely is that s//
967ac236
KW
8830 * returns a number for code in the field (/r returns a string, but
8831 * that wasn't added until the 5.13 series), and so '<=' should be
8832 * used for comparing, not 'le'. */
94b03d7d
KW
8833 if (*((*s) + 1) == 't') {
8834 goto deprecate;
8835 }
563734a5 8836 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
aeac89a5
KW
8837
8838 /* 'e' is valid only for substitutes, s///e. If it is not
8839 * valid in the current context, then 'm//le' must mean the
8840 * comparison operator, so use the regular deprecation message.
8841 */
8842 if (! strchr(valid_flags, 'e')) {
8843 goto deprecate;
8844 }
94b03d7d
KW
8845 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8846 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8847 return FALSE;
8848 }
3955e1a9
KW
8849 if (*charset) {
8850 goto multiple_charsets;
8851 }
94b03d7d 8852 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 8853 *charset = c;
94b03d7d
KW
8854 break;
8855 case UNICODE_PAT_MOD:
8856 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8857 * 'n' means they can't be regex modifiers */
8858 if (*((*s) + 1) == 'n') {
8859 goto deprecate;
8860 }
3955e1a9
KW
8861 if (*charset) {
8862 goto multiple_charsets;
8863 }
94b03d7d 8864 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 8865 *charset = c;
94b03d7d
KW
8866 break;
8867 case ASCII_RESTRICT_PAT_MOD:
8868 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8869 * can't be regex modifiers */
8870 if (*((*s) + 1) == 'n') {
8871 goto deprecate;
8872 }
8873 if (*((*s) + 1) == ASCII_RESTRICT_PAT_MOD) {
8874 /* Doubled modifier implies more restricted */
8875 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
8876 (*s)++;
8877 }
8878 else {
8879 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8880 }
3955e1a9
KW
8881 if (*charset) { /* Do this after the increment of *s in /aa, so
8882 the return advances the ptr correctly */
8883 goto multiple_charsets;
8884 }
8885 *charset = c;
94b03d7d
KW
8886 break;
8887 case DEPENDS_PAT_MOD:
3955e1a9
KW
8888 if (*charset) {
8889 goto multiple_charsets;
8890 }
94b03d7d 8891 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 8892 *charset = c;
94b03d7d 8893 break;
879d0c72 8894 }
94b03d7d 8895
858a358b
KW
8896 (*s)++;
8897 return TRUE;
94b03d7d
KW
8898
8899 deprecate:
8900 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8901 "Having no space between pattern and following word is deprecated");
8902 return FALSE;
3955e1a9
KW
8903
8904 multiple_charsets:
8905 if (*charset != c) {
8906 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8907 }
8908 else {
8909 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8910 }
8911
8912 /* Pretend that it worked, so will continue processing before dieing */
8913 (*s)++;
8914 return TRUE;
879d0c72
NC
8915}
8916
76e3520e 8917STATIC char *
cea2e8a9 8918S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 8919{
97aff369 8920 dVAR;
79072805 8921 PMOP *pm;
5db06880 8922 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 8923 const char * const valid_flags =
a20207d7 8924 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 8925 char charset = '\0'; /* character set modifier */
5db06880
NC
8926#ifdef PERL_MAD
8927 char *modstart;
8928#endif
8929
7918f24d 8930 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 8931
25c09cbf 8932 if (!s) {
6136c704 8933 const char * const delimiter = skipspace(start);
10edeb5d
JH
8934 Perl_croak(aTHX_
8935 (const char *)
8936 (*delimiter == '?'
8937 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8938 : "Search pattern not terminated" ));
25c09cbf 8939 }
bbce6d69 8940
8782bef2 8941 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
8942 if (PL_multi_open == '?') {
8943 /* This is the only point in the code that sets PMf_ONCE: */
79072805 8944 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
8945
8946 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8947 allows us to restrict the list needed by reset to just the ??
8948 matches. */
8949 assert(type != OP_TRANS);
8950 if (PL_curstash) {
daba3364 8951 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
8952 U32 elements;
8953 if (!mg) {
daba3364 8954 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
8955 0);
8956 }
8957 elements = mg->mg_len / sizeof(PMOP**);
8958 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8959 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8960 mg->mg_len = elements * sizeof(PMOP**);
8961 PmopSTASH_set(pm,PL_curstash);
8962 }
8963 }
5db06880
NC
8964#ifdef PERL_MAD
8965 modstart = s;
8966#endif
3955e1a9 8967 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
8968#ifdef PERL_MAD
8969 if (PL_madskills && modstart != s) {
8970 SV* tmptoken = newSVpvn(modstart, s - modstart);
8971 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8972 }
8973#endif
4ac733c9 8974 /* issue a warning if /c is specified,but /g is not */
a2a5de95 8975 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 8976 {
a2a5de95
NC
8977 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8978 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
8979 }
8980
3280af22 8981 PL_lex_op = (OP*)pm;
6154021b 8982 pl_yylval.ival = OP_MATCH;
378cc40b
LW
8983 return s;
8984}
8985
76e3520e 8986STATIC char *
cea2e8a9 8987S_scan_subst(pTHX_ char *start)
79072805 8988{
27da23d5 8989 dVAR;
22594288 8990 char *s;
79072805 8991 register PMOP *pm;
4fdae800 8992 I32 first_start;
79072805 8993 I32 es = 0;
3955e1a9 8994 char charset = '\0'; /* character set modifier */
5db06880
NC
8995#ifdef PERL_MAD
8996 char *modstart;
8997#endif
79072805 8998
7918f24d
NC
8999 PERL_ARGS_ASSERT_SCAN_SUBST;
9000
6154021b 9001 pl_yylval.ival = OP_NULL;
79072805 9002
5db06880 9003 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9004
37fd879b 9005 if (!s)
cea2e8a9 9006 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9007
3280af22 9008 if (s[-1] == PL_multi_open)
79072805 9009 s--;
5db06880
NC
9010#ifdef PERL_MAD
9011 if (PL_madskills) {
cd81e915
NC
9012 CURMAD('q', PL_thisopen);
9013 CURMAD('_', PL_thiswhite);
9014 CURMAD('E', PL_thisstuff);
9015 CURMAD('Q', PL_thisclose);
9016 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9017 }
9018#endif
79072805 9019
3280af22 9020 first_start = PL_multi_start;
5db06880 9021 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9022 if (!s) {
37fd879b 9023 if (PL_lex_stuff) {
3280af22 9024 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9025 PL_lex_stuff = NULL;
37fd879b 9026 }
cea2e8a9 9027 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9028 }
3280af22 9029 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9030
79072805 9031 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9032
9033#ifdef PERL_MAD
9034 if (PL_madskills) {
cd81e915
NC
9035 CURMAD('z', PL_thisopen);
9036 CURMAD('R', PL_thisstuff);
9037 CURMAD('Z', PL_thisclose);
5db06880
NC
9038 }
9039 modstart = s;
9040#endif
9041
48c036b1 9042 while (*s) {
a20207d7 9043 if (*s == EXEC_PAT_MOD) {
a687059c 9044 s++;
2f3197b3 9045 es++;
a687059c 9046 }
3955e1a9
KW
9047 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9048 {
48c036b1 9049 break;
aa78b661 9050 }
378cc40b 9051 }
79072805 9052
5db06880
NC
9053#ifdef PERL_MAD
9054 if (PL_madskills) {
9055 if (modstart != s)
9056 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9057 append_madprops(PL_thismad, (OP*)pm, 0);
9058 PL_thismad = 0;
5db06880
NC
9059 }
9060#endif
a2a5de95
NC
9061 if ((pm->op_pmflags & PMf_CONTINUE)) {
9062 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9063 }
9064
79072805 9065 if (es) {
6136c704
AL
9066 SV * const repl = newSVpvs("");
9067
0244c3a4
GS
9068 PL_sublex_info.super_bufptr = s;
9069 PL_sublex_info.super_bufend = PL_bufend;
9070 PL_multi_end = 0;
79072805 9071 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9072 while (es-- > 0) {
9073 if (es)
9074 sv_catpvs(repl, "eval ");
9075 else
9076 sv_catpvs(repl, "do ");
9077 }
6f43d98f 9078 sv_catpvs(repl, "{");
3280af22 9079 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9080 if (strchr(SvPVX(PL_lex_repl), '#'))
9081 sv_catpvs(repl, "\n");
9082 sv_catpvs(repl, "}");
25da4f38 9083 SvEVALED_on(repl);
3280af22
NIS
9084 SvREFCNT_dec(PL_lex_repl);
9085 PL_lex_repl = repl;
378cc40b 9086 }
79072805 9087
3280af22 9088 PL_lex_op = (OP*)pm;
6154021b 9089 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9090 return s;
9091}
9092
76e3520e 9093STATIC char *
cea2e8a9 9094S_scan_trans(pTHX_ char *start)
378cc40b 9095{
97aff369 9096 dVAR;
a0d0e21e 9097 register char* s;
11343788 9098 OP *o;
79072805 9099 short *tbl;
b84c11c8
NC
9100 U8 squash;
9101 U8 del;
9102 U8 complement;
bb16bae8 9103 bool nondestruct = 0;
5db06880
NC
9104#ifdef PERL_MAD
9105 char *modstart;
9106#endif
79072805 9107
7918f24d
NC
9108 PERL_ARGS_ASSERT_SCAN_TRANS;
9109
6154021b 9110 pl_yylval.ival = OP_NULL;
79072805 9111
5db06880 9112 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 9113 if (!s)
cea2e8a9 9114 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9115
3280af22 9116 if (s[-1] == PL_multi_open)
2f3197b3 9117 s--;
5db06880
NC
9118#ifdef PERL_MAD
9119 if (PL_madskills) {
cd81e915
NC
9120 CURMAD('q', PL_thisopen);
9121 CURMAD('_', PL_thiswhite);
9122 CURMAD('E', PL_thisstuff);
9123 CURMAD('Q', PL_thisclose);
9124 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9125 }
9126#endif
2f3197b3 9127
5db06880 9128 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9129 if (!s) {
37fd879b 9130 if (PL_lex_stuff) {
3280af22 9131 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9132 PL_lex_stuff = NULL;
37fd879b 9133 }
cea2e8a9 9134 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9135 }
5db06880 9136 if (PL_madskills) {
cd81e915
NC
9137 CURMAD('z', PL_thisopen);
9138 CURMAD('R', PL_thisstuff);
9139 CURMAD('Z', PL_thisclose);
5db06880 9140 }
79072805 9141
a0ed51b3 9142 complement = del = squash = 0;
5db06880
NC
9143#ifdef PERL_MAD
9144 modstart = s;
9145#endif
7a1e2023
NC
9146 while (1) {
9147 switch (*s) {
9148 case 'c':
79072805 9149 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9150 break;
9151 case 'd':
a0ed51b3 9152 del = OPpTRANS_DELETE;
7a1e2023
NC
9153 break;
9154 case 's':
79072805 9155 squash = OPpTRANS_SQUASH;
7a1e2023 9156 break;
bb16bae8
FC
9157 case 'r':
9158 nondestruct = 1;
9159 break;
7a1e2023
NC
9160 default:
9161 goto no_more;
9162 }
395c3793
LW
9163 s++;
9164 }
7a1e2023 9165 no_more:
8973db79 9166
aa1f7c5b 9167 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9168 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9169 o->op_private &= ~OPpTRANS_ALL;
9170 o->op_private |= del|squash|complement|
7948272d
NIS
9171 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9172 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9173
3280af22 9174 PL_lex_op = o;
bb16bae8 9175 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9176
9177#ifdef PERL_MAD
9178 if (PL_madskills) {
9179 if (modstart != s)
9180 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9181 append_madprops(PL_thismad, o, 0);
9182 PL_thismad = 0;
5db06880
NC
9183 }
9184#endif
9185
79072805
LW
9186 return s;
9187}
9188
76e3520e 9189STATIC char *
cea2e8a9 9190S_scan_heredoc(pTHX_ register char *s)
79072805 9191{
97aff369 9192 dVAR;
79072805
LW
9193 SV *herewas;
9194 I32 op_type = OP_SCALAR;
9195 I32 len;
9196 SV *tmpstr;
9197 char term;
73d840c0 9198 const char *found_newline;
79072805 9199 register char *d;
fc36a67e 9200 register char *e;
4633a7c4 9201 char *peek;
f54cb97a 9202 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
9203#ifdef PERL_MAD
9204 I32 stuffstart = s - SvPVX(PL_linestr);
9205 char *tstart;
9206
cd81e915 9207 PL_realtokenstart = -1;
5db06880 9208#endif
79072805 9209
7918f24d
NC
9210 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9211
79072805 9212 s += 2;
3280af22
NIS
9213 d = PL_tokenbuf;
9214 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9215 if (!outer)
79072805 9216 *d++ = '\n';
c35e046a
AL
9217 peek = s;
9218 while (SPACE_OR_TAB(*peek))
9219 peek++;
3792a11b 9220 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9221 s = peek;
79072805 9222 term = *s++;
3280af22 9223 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9224 d += len;
3280af22 9225 if (s < PL_bufend)
79072805 9226 s++;
79072805
LW
9227 }
9228 else {
9229 if (*s == '\\')
9230 s++, term = '\'';
9231 else
9232 term = '"';
7e2040f0 9233 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9234 deprecate("bare << to mean <<\"\"");
7e2040f0 9235 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9236 if (d < e)
9237 *d++ = *s;
9238 }
9239 }
3280af22 9240 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9241 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9242 *d++ = '\n';
9243 *d = '\0';
3280af22 9244 len = d - PL_tokenbuf;
5db06880
NC
9245
9246#ifdef PERL_MAD
9247 if (PL_madskills) {
9248 tstart = PL_tokenbuf + !outer;
cd81e915 9249 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9250 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9251 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9252 stuffstart = s - SvPVX(PL_linestr);
9253 }
9254#endif
6a27c188 9255#ifndef PERL_STRICT_CR
f63a84b2
LW
9256 d = strchr(s, '\r');
9257 if (d) {
b464bac0 9258 char * const olds = s;
f63a84b2 9259 s = d;
3280af22 9260 while (s < PL_bufend) {
f63a84b2
LW
9261 if (*s == '\r') {
9262 *d++ = '\n';
9263 if (*++s == '\n')
9264 s++;
9265 }
9266 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9267 *d++ = *s++;
9268 s++;
9269 }
9270 else
9271 *d++ = *s++;
9272 }
9273 *d = '\0';
3280af22 9274 PL_bufend = d;
95a20fc0 9275 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9276 s = olds;
9277 }
9278#endif
5db06880
NC
9279#ifdef PERL_MAD
9280 found_newline = 0;
9281#endif
10edeb5d 9282 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9283 herewas = newSVpvn(s,PL_bufend-s);
9284 }
9285 else {
5db06880
NC
9286#ifdef PERL_MAD
9287 herewas = newSVpvn(s-1,found_newline-s+1);
9288#else
73d840c0
AL
9289 s--;
9290 herewas = newSVpvn(s,found_newline-s);
5db06880 9291#endif
73d840c0 9292 }
5db06880
NC
9293#ifdef PERL_MAD
9294 if (PL_madskills) {
9295 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9296 if (PL_thisstuff)
9297 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9298 else
cd81e915 9299 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9300 }
9301#endif
79072805 9302 s += SvCUR(herewas);
748a9306 9303
5db06880
NC
9304#ifdef PERL_MAD
9305 stuffstart = s - SvPVX(PL_linestr);
9306
9307 if (found_newline)
9308 s--;
9309#endif
9310
7d0a29fe
NC
9311 tmpstr = newSV_type(SVt_PVIV);
9312 SvGROW(tmpstr, 80);
748a9306 9313 if (term == '\'') {
79072805 9314 op_type = OP_CONST;
45977657 9315 SvIV_set(tmpstr, -1);
748a9306
LW
9316 }
9317 else if (term == '`') {
79072805 9318 op_type = OP_BACKTICK;
45977657 9319 SvIV_set(tmpstr, '\\');
748a9306 9320 }
79072805
LW
9321
9322 CLINE;
57843af0 9323 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9324 PL_multi_open = PL_multi_close = '<';
9325 term = *PL_tokenbuf;
0244c3a4 9326 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
9327 char * const bufptr = PL_sublex_info.super_bufptr;
9328 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9329 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9330 s = strchr(bufptr, '\n');
9331 if (!s)
9332 s = bufend;
9333 d = s;
9334 while (s < bufend &&
9335 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9336 if (*s++ == '\n')
57843af0 9337 CopLINE_inc(PL_curcop);
0244c3a4
GS
9338 }
9339 if (s >= bufend) {
eb160463 9340 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9341 missingterm(PL_tokenbuf);
9342 }
9343 sv_setpvn(herewas,bufptr,d-bufptr+1);
9344 sv_setpvn(tmpstr,d+1,s-d);
9345 s += len - 1;
9346 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9347 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9348
9349 s = olds;
9350 goto retval;
9351 }
9352 else if (!outer) {
79072805 9353 d = s;
3280af22
NIS
9354 while (s < PL_bufend &&
9355 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9356 if (*s++ == '\n')
57843af0 9357 CopLINE_inc(PL_curcop);
79072805 9358 }
3280af22 9359 if (s >= PL_bufend) {
eb160463 9360 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9361 missingterm(PL_tokenbuf);
79072805
LW
9362 }
9363 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9364#ifdef PERL_MAD
9365 if (PL_madskills) {
cd81e915
NC
9366 if (PL_thisstuff)
9367 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9368 else
cd81e915 9369 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9370 stuffstart = s - SvPVX(PL_linestr);
9371 }
9372#endif
79072805 9373 s += len - 1;
57843af0 9374 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9375
3280af22
NIS
9376 sv_catpvn(herewas,s,PL_bufend-s);
9377 sv_setsv(PL_linestr,herewas);
9378 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9379 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9380 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9381 }
9382 else
76f68e9b 9383 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9384 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9385#ifdef PERL_MAD
9386 if (PL_madskills) {
9387 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9388 if (PL_thisstuff)
9389 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9390 else
cd81e915 9391 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9392 }
9393#endif
f0e67a1d 9394 PL_bufptr = s;
17cc9359 9395 CopLINE_inc(PL_curcop);
f0e67a1d 9396 if (!outer || !lex_next_chunk(0)) {
eb160463 9397 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9398 missingterm(PL_tokenbuf);
79072805 9399 }
17cc9359 9400 CopLINE_dec(PL_curcop);
f0e67a1d 9401 s = PL_bufptr;
5db06880
NC
9402#ifdef PERL_MAD
9403 stuffstart = s - SvPVX(PL_linestr);
9404#endif
57843af0 9405 CopLINE_inc(PL_curcop);
3280af22 9406 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9407 PL_last_lop = PL_last_uni = NULL;
6a27c188 9408#ifndef PERL_STRICT_CR
3280af22 9409 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9410 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9411 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9412 {
3280af22
NIS
9413 PL_bufend[-2] = '\n';
9414 PL_bufend--;
95a20fc0 9415 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9416 }
3280af22
NIS
9417 else if (PL_bufend[-1] == '\r')
9418 PL_bufend[-1] = '\n';
f63a84b2 9419 }
3280af22
NIS
9420 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9421 PL_bufend[-1] = '\n';
f63a84b2 9422#endif
3280af22 9423 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9424 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9425 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
9426 sv_catsv(PL_linestr,herewas);
9427 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9428 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9429 }
9430 else {
3280af22
NIS
9431 s = PL_bufend;
9432 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9433 }
9434 }
79072805 9435 s++;
0244c3a4 9436retval:
57843af0 9437 PL_multi_end = CopLINE(PL_curcop);
79072805 9438 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9439 SvPV_shrink_to_cur(tmpstr);
79072805 9440 }
8990e307 9441 SvREFCNT_dec(herewas);
2f31ce75 9442 if (!IN_BYTES) {
95a20fc0 9443 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9444 SvUTF8_on(tmpstr);
9445 else if (PL_encoding)
9446 sv_recode_to_utf8(tmpstr, PL_encoding);
9447 }
3280af22 9448 PL_lex_stuff = tmpstr;
6154021b 9449 pl_yylval.ival = op_type;
79072805
LW
9450 return s;
9451}
9452
02aa26ce
NT
9453/* scan_inputsymbol
9454 takes: current position in input buffer
9455 returns: new position in input buffer
6154021b 9456 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9457
9458 This code handles:
9459
9460 <> read from ARGV
9461 <FH> read from filehandle
9462 <pkg::FH> read from package qualified filehandle
9463 <pkg'FH> read from package qualified filehandle
9464 <$fh> read from filehandle in $fh
9465 <*.h> filename glob
9466
9467*/
9468
76e3520e 9469STATIC char *
cea2e8a9 9470S_scan_inputsymbol(pTHX_ char *start)
79072805 9471{
97aff369 9472 dVAR;
02aa26ce 9473 register char *s = start; /* current position in buffer */
1b420867 9474 char *end;
79072805 9475 I32 len;
6136c704
AL
9476 char *d = PL_tokenbuf; /* start of temp holding space */
9477 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9478
7918f24d
NC
9479 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9480
1b420867
GS
9481 end = strchr(s, '\n');
9482 if (!end)
9483 end = PL_bufend;
9484 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9485
9486 /* die if we didn't have space for the contents of the <>,
1b420867 9487 or if it didn't end, or if we see a newline
02aa26ce
NT
9488 */
9489
bb7a0f54 9490 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9491 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9492 if (s >= end)
cea2e8a9 9493 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9494
fc36a67e 9495 s++;
02aa26ce
NT
9496
9497 /* check for <$fh>
9498 Remember, only scalar variables are interpreted as filehandles by
9499 this code. Anything more complex (e.g., <$fh{$num}>) will be
9500 treated as a glob() call.
9501 This code makes use of the fact that except for the $ at the front,
9502 a scalar variable and a filehandle look the same.
9503 */
4633a7c4 9504 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9505
9506 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9507 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 9508 d++;
02aa26ce
NT
9509
9510 /* If we've tried to read what we allow filehandles to look like, and
9511 there's still text left, then it must be a glob() and not a getline.
9512 Use scan_str to pull out the stuff between the <> and treat it
9513 as nothing more than a string.
9514 */
9515
3280af22 9516 if (d - PL_tokenbuf != len) {
6154021b 9517 pl_yylval.ival = OP_GLOB;
5db06880 9518 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9519 if (!s)
cea2e8a9 9520 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9521 return s;
9522 }
395c3793 9523 else {
9b3023bc 9524 bool readline_overriden = FALSE;
6136c704 9525 GV *gv_readline;
9b3023bc 9526 GV **gvp;
02aa26ce 9527 /* we're in a filehandle read situation */
3280af22 9528 d = PL_tokenbuf;
02aa26ce
NT
9529
9530 /* turn <> into <ARGV> */
79072805 9531 if (!len)
689badd5 9532 Copy("ARGV",d,5,char);
02aa26ce 9533
9b3023bc 9534 /* Check whether readline() is overriden */
fafc274c 9535 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9536 if ((gv_readline
ba979b31 9537 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9538 ||
017a3ce5 9539 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9540 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9541 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9542 readline_overriden = TRUE;
9543
02aa26ce
NT
9544 /* if <$fh>, create the ops to turn the variable into a
9545 filehandle
9546 */
79072805 9547 if (*d == '$') {
02aa26ce
NT
9548 /* try to find it in the pad for this block, otherwise find
9549 add symbol table ops
9550 */
f8f98e0a 9551 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 9552 if (tmp != NOT_IN_PAD) {
00b1698f 9553 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9554 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9555 HEK * const stashname = HvNAME_HEK(stash);
9556 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9557 sv_catpvs(sym, "::");
f558d5af
JH
9558 sv_catpv(sym, d+1);
9559 d = SvPVX(sym);
9560 goto intro_sym;
9561 }
9562 else {
6136c704 9563 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9564 o->op_targ = tmp;
9b3023bc
RGS
9565 PL_lex_op = readline_overriden
9566 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9567 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9568 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9569 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9570 }
a0d0e21e
LW
9571 }
9572 else {
f558d5af
JH
9573 GV *gv;
9574 ++d;
9575intro_sym:
9576 gv = gv_fetchpv(d,
9577 (PL_in_eval
9578 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 9579 : GV_ADDMULTI),
f558d5af 9580 SVt_PV);
9b3023bc
RGS
9581 PL_lex_op = readline_overriden
9582 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9583 op_append_elem(OP_LIST,
9b3023bc
RGS
9584 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9585 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9586 : (OP*)newUNOP(OP_READLINE, 0,
9587 newUNOP(OP_RV2SV, 0,
9588 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9589 }
7c6fadd6
RGS
9590 if (!readline_overriden)
9591 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9592 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9593 pl_yylval.ival = OP_NULL;
79072805 9594 }
02aa26ce
NT
9595
9596 /* If it's none of the above, it must be a literal filehandle
9597 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9598 else {
6136c704 9599 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
9600 PL_lex_op = readline_overriden
9601 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9602 op_append_elem(OP_LIST,
9b3023bc
RGS
9603 newGVOP(OP_GV, 0, gv),
9604 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9605 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9606 pl_yylval.ival = OP_NULL;
79072805
LW
9607 }
9608 }
02aa26ce 9609
79072805
LW
9610 return s;
9611}
9612
02aa26ce
NT
9613
9614/* scan_str
9615 takes: start position in buffer
09bef843
SB
9616 keep_quoted preserve \ on the embedded delimiter(s)
9617 keep_delims preserve the delimiters around the string
02aa26ce
NT
9618 returns: position to continue reading from buffer
9619 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9620 updates the read buffer.
9621
9622 This subroutine pulls a string out of the input. It is called for:
9623 q single quotes q(literal text)
9624 ' single quotes 'literal text'
9625 qq double quotes qq(interpolate $here please)
9626 " double quotes "interpolate $here please"
9627 qx backticks qx(/bin/ls -l)
9628 ` backticks `/bin/ls -l`
9629 qw quote words @EXPORT_OK = qw( func() $spam )
9630 m// regexp match m/this/
9631 s/// regexp substitute s/this/that/
9632 tr/// string transliterate tr/this/that/
9633 y/// string transliterate y/this/that/
9634 ($*@) sub prototypes sub foo ($)
09bef843 9635 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9636 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9637
9638 In most of these cases (all but <>, patterns and transliterate)
9639 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9640 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9641 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9642 calls scan_str().
4e553d73 9643
02aa26ce
NT
9644 It skips whitespace before the string starts, and treats the first
9645 character as the delimiter. If the delimiter is one of ([{< then
9646 the corresponding "close" character )]}> is used as the closing
9647 delimiter. It allows quoting of delimiters, and if the string has
9648 balanced delimiters ([{<>}]) it allows nesting.
9649
37fd879b
HS
9650 On success, the SV with the resulting string is put into lex_stuff or,
9651 if that is already non-NULL, into lex_repl. The second case occurs only
9652 when parsing the RHS of the special constructs s/// and tr/// (y///).
9653 For convenience, the terminating delimiter character is stuffed into
9654 SvIVX of the SV.
02aa26ce
NT
9655*/
9656
76e3520e 9657STATIC char *
09bef843 9658S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9659{
97aff369 9660 dVAR;
02aa26ce 9661 SV *sv; /* scalar value: string */
d3fcec1f 9662 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9663 register char *s = start; /* current position in the buffer */
9664 register char term; /* terminating character */
9665 register char *to; /* current position in the sv's data */
9666 I32 brackets = 1; /* bracket nesting level */
89491803 9667 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9668 I32 termcode; /* terminating char. code */
89ebb4a3 9669 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9670 STRLEN termlen; /* length of terminating string */
0331ef07 9671 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9672#ifdef PERL_MAD
9673 int stuffstart;
9674 char *tstart;
9675#endif
02aa26ce 9676
7918f24d
NC
9677 PERL_ARGS_ASSERT_SCAN_STR;
9678
02aa26ce 9679 /* skip space before the delimiter */
29595ff2
NC
9680 if (isSPACE(*s)) {
9681 s = PEEKSPACE(s);
9682 }
02aa26ce 9683
5db06880 9684#ifdef PERL_MAD
cd81e915
NC
9685 if (PL_realtokenstart >= 0) {
9686 stuffstart = PL_realtokenstart;
9687 PL_realtokenstart = -1;
5db06880
NC
9688 }
9689 else
9690 stuffstart = start - SvPVX(PL_linestr);
9691#endif
02aa26ce 9692 /* mark where we are, in case we need to report errors */
79072805 9693 CLINE;
02aa26ce
NT
9694
9695 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9696 term = *s;
220e2d4e
IH
9697 if (!UTF) {
9698 termcode = termstr[0] = term;
9699 termlen = 1;
9700 }
9701 else {
f3b9ce0f 9702 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9703 Copy(s, termstr, termlen, U8);
9704 if (!UTF8_IS_INVARIANT(term))
9705 has_utf8 = TRUE;
9706 }
b1c7b182 9707
02aa26ce 9708 /* mark where we are */
57843af0 9709 PL_multi_start = CopLINE(PL_curcop);
3280af22 9710 PL_multi_open = term;
02aa26ce
NT
9711
9712 /* find corresponding closing delimiter */
93a17b20 9713 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9714 termcode = termstr[0] = term = tmps[5];
9715
3280af22 9716 PL_multi_close = term;
79072805 9717
561b68a9
SH
9718 /* create a new SV to hold the contents. 79 is the SV's initial length.
9719 What a random number. */
7d0a29fe
NC
9720 sv = newSV_type(SVt_PVIV);
9721 SvGROW(sv, 80);
45977657 9722 SvIV_set(sv, termcode);
a0d0e21e 9723 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9724
9725 /* move past delimiter and try to read a complete string */
09bef843 9726 if (keep_delims)
220e2d4e
IH
9727 sv_catpvn(sv, s, termlen);
9728 s += termlen;
5db06880
NC
9729#ifdef PERL_MAD
9730 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9731 if (!PL_thisopen && !keep_delims) {
9732 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9733 stuffstart = s - SvPVX(PL_linestr);
9734 }
9735#endif
93a17b20 9736 for (;;) {
220e2d4e
IH
9737 if (PL_encoding && !UTF) {
9738 bool cont = TRUE;
9739
9740 while (cont) {
95a20fc0 9741 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9742 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9743 &offset, (char*)termstr, termlen);
6136c704
AL
9744 const char * const ns = SvPVX_const(PL_linestr) + offset;
9745 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9746
9747 for (; s < ns; s++) {
9748 if (*s == '\n' && !PL_rsfp)
9749 CopLINE_inc(PL_curcop);
9750 }
9751 if (!found)
9752 goto read_more_line;
9753 else {
9754 /* handle quoted delimiters */
52327caf 9755 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9756 const char *t;
95a20fc0 9757 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9758 t--;
9759 if ((svlast-1 - t) % 2) {
9760 if (!keep_quoted) {
9761 *(svlast-1) = term;
9762 *svlast = '\0';
9763 SvCUR_set(sv, SvCUR(sv) - 1);
9764 }
9765 continue;
9766 }
9767 }
9768 if (PL_multi_open == PL_multi_close) {
9769 cont = FALSE;
9770 }
9771 else {
f54cb97a
AL
9772 const char *t;
9773 char *w;
0331ef07 9774 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9775 /* At here, all closes are "was quoted" one,
9776 so we don't check PL_multi_close. */
9777 if (*t == '\\') {
9778 if (!keep_quoted && *(t+1) == PL_multi_open)
9779 t++;
9780 else
9781 *w++ = *t++;
9782 }
9783 else if (*t == PL_multi_open)
9784 brackets++;
9785
9786 *w = *t;
9787 }
9788 if (w < t) {
9789 *w++ = term;
9790 *w = '\0';
95a20fc0 9791 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9792 }
0331ef07 9793 last_off = w - SvPVX(sv);
220e2d4e
IH
9794 if (--brackets <= 0)
9795 cont = FALSE;
9796 }
9797 }
9798 }
9799 if (!keep_delims) {
9800 SvCUR_set(sv, SvCUR(sv) - 1);
9801 *SvEND(sv) = '\0';
9802 }
9803 break;
9804 }
9805
02aa26ce 9806 /* extend sv if need be */
3280af22 9807 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9808 /* set 'to' to the next character in the sv's string */
463ee0b2 9809 to = SvPVX(sv)+SvCUR(sv);
09bef843 9810
02aa26ce 9811 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9812 if (PL_multi_open == PL_multi_close) {
9813 for (; s < PL_bufend; s++,to++) {
02aa26ce 9814 /* embedded newlines increment the current line number */
3280af22 9815 if (*s == '\n' && !PL_rsfp)
57843af0 9816 CopLINE_inc(PL_curcop);
02aa26ce 9817 /* handle quoted delimiters */
3280af22 9818 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9819 if (!keep_quoted && s[1] == term)
a0d0e21e 9820 s++;
02aa26ce 9821 /* any other quotes are simply copied straight through */
a0d0e21e
LW
9822 else
9823 *to++ = *s++;
9824 }
02aa26ce
NT
9825 /* terminate when run out of buffer (the for() condition), or
9826 have found the terminator */
220e2d4e
IH
9827 else if (*s == term) {
9828 if (termlen == 1)
9829 break;
f3b9ce0f 9830 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
9831 break;
9832 }
63cd0674 9833 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9834 has_utf8 = TRUE;
93a17b20
LW
9835 *to = *s;
9836 }
9837 }
02aa26ce
NT
9838
9839 /* if the terminator isn't the same as the start character (e.g.,
9840 matched brackets), we have to allow more in the quoting, and
9841 be prepared for nested brackets.
9842 */
93a17b20 9843 else {
02aa26ce 9844 /* read until we run out of string, or we find the terminator */
3280af22 9845 for (; s < PL_bufend; s++,to++) {
02aa26ce 9846 /* embedded newlines increment the line count */
3280af22 9847 if (*s == '\n' && !PL_rsfp)
57843af0 9848 CopLINE_inc(PL_curcop);
02aa26ce 9849 /* backslashes can escape the open or closing characters */
3280af22 9850 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
9851 if (!keep_quoted &&
9852 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
9853 s++;
9854 else
9855 *to++ = *s++;
9856 }
02aa26ce 9857 /* allow nested opens and closes */
3280af22 9858 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 9859 break;
3280af22 9860 else if (*s == PL_multi_open)
93a17b20 9861 brackets++;
63cd0674 9862 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9863 has_utf8 = TRUE;
93a17b20
LW
9864 *to = *s;
9865 }
9866 }
02aa26ce 9867 /* terminate the copied string and update the sv's end-of-string */
93a17b20 9868 *to = '\0';
95a20fc0 9869 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 9870
02aa26ce
NT
9871 /*
9872 * this next chunk reads more into the buffer if we're not done yet
9873 */
9874
b1c7b182
GS
9875 if (s < PL_bufend)
9876 break; /* handle case where we are done yet :-) */
79072805 9877
6a27c188 9878#ifndef PERL_STRICT_CR
95a20fc0 9879 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
9880 if ((to[-2] == '\r' && to[-1] == '\n') ||
9881 (to[-2] == '\n' && to[-1] == '\r'))
9882 {
f63a84b2
LW
9883 to[-2] = '\n';
9884 to--;
95a20fc0 9885 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
9886 }
9887 else if (to[-1] == '\r')
9888 to[-1] = '\n';
9889 }
95a20fc0 9890 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
9891 to[-1] = '\n';
9892#endif
9893
220e2d4e 9894 read_more_line:
02aa26ce
NT
9895 /* if we're out of file, or a read fails, bail and reset the current
9896 line marker so we can report where the unterminated string began
9897 */
5db06880
NC
9898#ifdef PERL_MAD
9899 if (PL_madskills) {
c35e046a 9900 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9901 if (PL_thisstuff)
9902 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9903 else
cd81e915 9904 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9905 }
9906#endif
f0e67a1d
Z
9907 CopLINE_inc(PL_curcop);
9908 PL_bufptr = PL_bufend;
9909 if (!lex_next_chunk(0)) {
c07a80fd 9910 sv_free(sv);
eb160463 9911 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 9912 return NULL;
79072805 9913 }
f0e67a1d 9914 s = PL_bufptr;
5db06880
NC
9915#ifdef PERL_MAD
9916 stuffstart = 0;
9917#endif
378cc40b 9918 }
4e553d73 9919
02aa26ce
NT
9920 /* at this point, we have successfully read the delimited string */
9921
220e2d4e 9922 if (!PL_encoding || UTF) {
5db06880
NC
9923#ifdef PERL_MAD
9924 if (PL_madskills) {
c35e046a 9925 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 9926 const int len = s - tstart;
cd81e915 9927 if (PL_thisstuff)
c35e046a 9928 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9929 else
c35e046a 9930 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9931 if (!PL_thisclose && !keep_delims)
9932 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
9933 }
9934#endif
9935
220e2d4e
IH
9936 if (keep_delims)
9937 sv_catpvn(sv, s, termlen);
9938 s += termlen;
9939 }
5db06880
NC
9940#ifdef PERL_MAD
9941 else {
9942 if (PL_madskills) {
c35e046a
AL
9943 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9944 const int len = s - tstart - termlen;
cd81e915 9945 if (PL_thisstuff)
c35e046a 9946 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9947 else
c35e046a 9948 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9949 if (!PL_thisclose && !keep_delims)
9950 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
9951 }
9952 }
9953#endif
220e2d4e 9954 if (has_utf8 || PL_encoding)
b1c7b182 9955 SvUTF8_on(sv);
d0063567 9956
57843af0 9957 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
9958
9959 /* if we allocated too much space, give some back */
93a17b20
LW
9960 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9961 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 9962 SvPV_renew(sv, SvLEN(sv));
79072805 9963 }
02aa26ce
NT
9964
9965 /* decide whether this is the first or second quoted string we've read
9966 for this op
9967 */
4e553d73 9968
3280af22
NIS
9969 if (PL_lex_stuff)
9970 PL_lex_repl = sv;
79072805 9971 else
3280af22 9972 PL_lex_stuff = sv;
378cc40b
LW
9973 return s;
9974}
9975
02aa26ce
NT
9976/*
9977 scan_num
9978 takes: pointer to position in buffer
9979 returns: pointer to new position in buffer
6154021b 9980 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
9981
9982 Read a number in any of the formats that Perl accepts:
9983
7fd134d9
JH
9984 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9985 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
9986 0b[01](_?[01])*
9987 0[0-7](_?[0-7])*
9988 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 9989
3280af22 9990 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
9991 thing it reads.
9992
9993 If it reads a number without a decimal point or an exponent, it will
9994 try converting the number to an integer and see if it can do so
9995 without loss of precision.
9996*/
4e553d73 9997
378cc40b 9998char *
bfed75c6 9999Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10000{
97aff369 10001 dVAR;
bfed75c6 10002 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10003 register char *d; /* destination in temp buffer */
10004 register char *e; /* end of temp buffer */
86554af2 10005 NV nv; /* number read, as a double */
a0714e2c 10006 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10007 bool floatit; /* boolean: int or float? */
cbbf8932 10008 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10009 static char const number_too_long[] = "Number too long";
378cc40b 10010
7918f24d
NC
10011 PERL_ARGS_ASSERT_SCAN_NUM;
10012
02aa26ce
NT
10013 /* We use the first character to decide what type of number this is */
10014
378cc40b 10015 switch (*s) {
79072805 10016 default:
cea2e8a9 10017 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 10018
02aa26ce 10019 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10020 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10021 case '0':
10022 {
02aa26ce
NT
10023 /* variables:
10024 u holds the "number so far"
4f19785b
WSI
10025 shift the power of 2 of the base
10026 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10027 overflowed was the number more than we can hold?
10028
10029 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10030 we in octal/hex/binary?" indicator to disallow hex characters
10031 when in octal mode.
02aa26ce 10032 */
9e24b6e2
JH
10033 NV n = 0.0;
10034 UV u = 0;
79072805 10035 I32 shift;
9e24b6e2 10036 bool overflowed = FALSE;
61f33854 10037 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10038 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10039 static const char* const bases[5] =
10040 { "", "binary", "", "octal", "hexadecimal" };
10041 static const char* const Bases[5] =
10042 { "", "Binary", "", "Octal", "Hexadecimal" };
10043 static const char* const maxima[5] =
10044 { "",
10045 "0b11111111111111111111111111111111",
10046 "",
10047 "037777777777",
10048 "0xffffffff" };
bfed75c6 10049 const char *base, *Base, *max;
378cc40b 10050
02aa26ce 10051 /* check for hex */
a674e8db 10052 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10053 shift = 4;
10054 s += 2;
61f33854 10055 just_zero = FALSE;
a674e8db 10056 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10057 shift = 1;
10058 s += 2;
61f33854 10059 just_zero = FALSE;
378cc40b 10060 }
02aa26ce 10061 /* check for a decimal in disguise */
b78218b7 10062 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10063 goto decimal;
02aa26ce 10064 /* so it must be octal */
928753ea 10065 else {
378cc40b 10066 shift = 3;
928753ea
JH
10067 s++;
10068 }
10069
10070 if (*s == '_') {
a2a5de95 10071 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10072 "Misplaced _ in number");
10073 lastub = s++;
10074 }
9e24b6e2
JH
10075
10076 base = bases[shift];
10077 Base = Bases[shift];
10078 max = maxima[shift];
02aa26ce 10079
4f19785b 10080 /* read the rest of the number */
378cc40b 10081 for (;;) {
9e24b6e2 10082 /* x is used in the overflow test,
893fe2c2 10083 b is the digit we're adding on. */
9e24b6e2 10084 UV x, b;
55497cff 10085
378cc40b 10086 switch (*s) {
02aa26ce
NT
10087
10088 /* if we don't mention it, we're done */
378cc40b
LW
10089 default:
10090 goto out;
02aa26ce 10091
928753ea 10092 /* _ are ignored -- but warned about if consecutive */
de3bb511 10093 case '_':
a2a5de95
NC
10094 if (lastub && s == lastub + 1)
10095 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10096 "Misplaced _ in number");
928753ea 10097 lastub = s++;
de3bb511 10098 break;
02aa26ce
NT
10099
10100 /* 8 and 9 are not octal */
378cc40b 10101 case '8': case '9':
4f19785b 10102 if (shift == 3)
cea2e8a9 10103 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10104 /* FALL THROUGH */
02aa26ce
NT
10105
10106 /* octal digits */
4f19785b 10107 case '2': case '3': case '4':
378cc40b 10108 case '5': case '6': case '7':
4f19785b 10109 if (shift == 1)
cea2e8a9 10110 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10111 /* FALL THROUGH */
10112
10113 case '0': case '1':
02aa26ce 10114 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10115 goto digit;
02aa26ce
NT
10116
10117 /* hex digits */
378cc40b
LW
10118 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10119 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10120 /* make sure they said 0x */
378cc40b
LW
10121 if (shift != 4)
10122 goto out;
55497cff 10123 b = (*s++ & 7) + 9;
02aa26ce
NT
10124
10125 /* Prepare to put the digit we have onto the end
10126 of the number so far. We check for overflows.
10127 */
10128
55497cff 10129 digit:
61f33854 10130 just_zero = FALSE;
9e24b6e2
JH
10131 if (!overflowed) {
10132 x = u << shift; /* make room for the digit */
10133
10134 if ((x >> shift) != u
10135 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10136 overflowed = TRUE;
10137 n = (NV) u;
9b387841
NC
10138 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10139 "Integer overflow in %s number",
10140 base);
9e24b6e2
JH
10141 } else
10142 u = x | b; /* add the digit to the end */
10143 }
10144 if (overflowed) {
10145 n *= nvshift[shift];
10146 /* If an NV has not enough bits in its
10147 * mantissa to represent an UV this summing of
10148 * small low-order numbers is a waste of time
10149 * (because the NV cannot preserve the
10150 * low-order bits anyway): we could just
10151 * remember when did we overflow and in the
10152 * end just multiply n by the right
10153 * amount. */
10154 n += (NV) b;
55497cff 10155 }
378cc40b
LW
10156 break;
10157 }
10158 }
02aa26ce
NT
10159
10160 /* if we get here, we had success: make a scalar value from
10161 the number.
10162 */
378cc40b 10163 out:
928753ea
JH
10164
10165 /* final misplaced underbar check */
10166 if (s[-1] == '_') {
a2a5de95 10167 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10168 }
10169
9e24b6e2 10170 if (overflowed) {
a2a5de95
NC
10171 if (n > 4294967295.0)
10172 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10173 "%s number > %s non-portable",
10174 Base, max);
b081dd7e 10175 sv = newSVnv(n);
9e24b6e2
JH
10176 }
10177 else {
15041a67 10178#if UVSIZE > 4
a2a5de95
NC
10179 if (u > 0xffffffff)
10180 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10181 "%s number > %s non-portable",
10182 Base, max);
2cc4c2dc 10183#endif
b081dd7e 10184 sv = newSVuv(u);
9e24b6e2 10185 }
61f33854 10186 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10187 sv = new_constant(start, s - start, "integer",
eb0d8d16 10188 sv, NULL, NULL, 0);
61f33854 10189 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10190 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10191 }
10192 break;
02aa26ce
NT
10193
10194 /*
10195 handle decimal numbers.
10196 we're also sent here when we read a 0 as the first digit
10197 */
378cc40b
LW
10198 case '1': case '2': case '3': case '4': case '5':
10199 case '6': case '7': case '8': case '9': case '.':
10200 decimal:
3280af22
NIS
10201 d = PL_tokenbuf;
10202 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10203 floatit = FALSE;
02aa26ce
NT
10204
10205 /* read next group of digits and _ and copy into d */
de3bb511 10206 while (isDIGIT(*s) || *s == '_') {
4e553d73 10207 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10208 if -w is on
10209 */
93a17b20 10210 if (*s == '_') {
a2a5de95
NC
10211 if (lastub && s == lastub + 1)
10212 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10213 "Misplaced _ in number");
928753ea 10214 lastub = s++;
93a17b20 10215 }
fc36a67e 10216 else {
02aa26ce 10217 /* check for end of fixed-length buffer */
fc36a67e 10218 if (d >= e)
cea2e8a9 10219 Perl_croak(aTHX_ number_too_long);
02aa26ce 10220 /* if we're ok, copy the character */
378cc40b 10221 *d++ = *s++;
fc36a67e 10222 }
378cc40b 10223 }
02aa26ce
NT
10224
10225 /* final misplaced underbar check */
928753ea 10226 if (lastub && s == lastub + 1) {
a2a5de95 10227 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10228 }
02aa26ce
NT
10229
10230 /* read a decimal portion if there is one. avoid
10231 3..5 being interpreted as the number 3. followed
10232 by .5
10233 */
2f3197b3 10234 if (*s == '.' && s[1] != '.') {
79072805 10235 floatit = TRUE;
378cc40b 10236 *d++ = *s++;
02aa26ce 10237
928753ea 10238 if (*s == '_') {
a2a5de95
NC
10239 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10240 "Misplaced _ in number");
928753ea
JH
10241 lastub = s;
10242 }
10243
10244 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10245 */
fc36a67e 10246 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10247 /* fixed length buffer check */
fc36a67e 10248 if (d >= e)
cea2e8a9 10249 Perl_croak(aTHX_ number_too_long);
928753ea 10250 if (*s == '_') {
a2a5de95
NC
10251 if (lastub && s == lastub + 1)
10252 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10253 "Misplaced _ in number");
928753ea
JH
10254 lastub = s;
10255 }
10256 else
fc36a67e 10257 *d++ = *s;
378cc40b 10258 }
928753ea
JH
10259 /* fractional part ending in underbar? */
10260 if (s[-1] == '_') {
a2a5de95
NC
10261 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10262 "Misplaced _ in number");
928753ea 10263 }
dd629d5b
GS
10264 if (*s == '.' && isDIGIT(s[1])) {
10265 /* oops, it's really a v-string, but without the "v" */
f4758303 10266 s = start;
dd629d5b
GS
10267 goto vstring;
10268 }
378cc40b 10269 }
02aa26ce
NT
10270
10271 /* read exponent part, if present */
3792a11b 10272 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10273 floatit = TRUE;
10274 s++;
02aa26ce
NT
10275
10276 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10277 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10278
7fd134d9
JH
10279 /* stray preinitial _ */
10280 if (*s == '_') {
a2a5de95
NC
10281 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10282 "Misplaced _ in number");
7fd134d9
JH
10283 lastub = s++;
10284 }
10285
02aa26ce 10286 /* allow positive or negative exponent */
378cc40b
LW
10287 if (*s == '+' || *s == '-')
10288 *d++ = *s++;
02aa26ce 10289
7fd134d9
JH
10290 /* stray initial _ */
10291 if (*s == '_') {
a2a5de95
NC
10292 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10293 "Misplaced _ in number");
7fd134d9
JH
10294 lastub = s++;
10295 }
10296
7fd134d9
JH
10297 /* read digits of exponent */
10298 while (isDIGIT(*s) || *s == '_') {
10299 if (isDIGIT(*s)) {
10300 if (d >= e)
10301 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10302 *d++ = *s++;
7fd134d9
JH
10303 }
10304 else {
041457d9 10305 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10306 (!isDIGIT(s[1]) && s[1] != '_')))
10307 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10308 "Misplaced _ in number");
b3b48e3e 10309 lastub = s++;
7fd134d9 10310 }
7fd134d9 10311 }
378cc40b 10312 }
02aa26ce 10313
02aa26ce 10314
0b7fceb9 10315 /*
58bb9ec3
NC
10316 We try to do an integer conversion first if no characters
10317 indicating "float" have been found.
0b7fceb9
MU
10318 */
10319
10320 if (!floatit) {
58bb9ec3 10321 UV uv;
6136c704 10322 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10323
10324 if (flags == IS_NUMBER_IN_UV) {
10325 if (uv <= IV_MAX)
b081dd7e 10326 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10327 else
b081dd7e 10328 sv = newSVuv(uv);
58bb9ec3
NC
10329 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10330 if (uv <= (UV) IV_MIN)
b081dd7e 10331 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10332 else
10333 floatit = TRUE;
10334 } else
10335 floatit = TRUE;
10336 }
0b7fceb9 10337 if (floatit) {
58bb9ec3
NC
10338 /* terminate the string */
10339 *d = '\0';
86554af2 10340 nv = Atof(PL_tokenbuf);
b081dd7e 10341 sv = newSVnv(nv);
86554af2 10342 }
86554af2 10343
eb0d8d16
NC
10344 if ( floatit
10345 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10346 const char *const key = floatit ? "float" : "integer";
10347 const STRLEN keylen = floatit ? 5 : 7;
10348 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10349 key, keylen, sv, NULL, NULL, 0);
10350 }
378cc40b 10351 break;
0b7fceb9 10352
e312add1 10353 /* if it starts with a v, it could be a v-string */
a7cb1f99 10354 case 'v':
dd629d5b 10355vstring:
561b68a9 10356 sv = newSV(5); /* preallocate storage space */
65b06e02 10357 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10358 break;
79072805 10359 }
a687059c 10360
02aa26ce
NT
10361 /* make the op for the constant and return */
10362
a86a20aa 10363 if (sv)
b73d6f50 10364 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10365 else
5f66b61c 10366 lvalp->opval = NULL;
a687059c 10367
73d840c0 10368 return (char *)s;
378cc40b
LW
10369}
10370
76e3520e 10371STATIC char *
cea2e8a9 10372S_scan_formline(pTHX_ register char *s)
378cc40b 10373{
97aff369 10374 dVAR;
79072805 10375 register char *eol;
378cc40b 10376 register char *t;
6136c704 10377 SV * const stuff = newSVpvs("");
79072805 10378 bool needargs = FALSE;
c5ee2135 10379 bool eofmt = FALSE;
5db06880
NC
10380#ifdef PERL_MAD
10381 char *tokenstart = s;
4f61fd4b
JC
10382 SV* savewhite = NULL;
10383
5db06880 10384 if (PL_madskills) {
cd81e915
NC
10385 savewhite = PL_thiswhite;
10386 PL_thiswhite = 0;
5db06880
NC
10387 }
10388#endif
378cc40b 10389
7918f24d
NC
10390 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10391
79072805 10392 while (!needargs) {
a1b95068 10393 if (*s == '.') {
c35e046a 10394 t = s+1;
51882d45 10395#ifdef PERL_STRICT_CR
c35e046a
AL
10396 while (SPACE_OR_TAB(*t))
10397 t++;
51882d45 10398#else
c35e046a
AL
10399 while (SPACE_OR_TAB(*t) || *t == '\r')
10400 t++;
51882d45 10401#endif
c5ee2135
WL
10402 if (*t == '\n' || t == PL_bufend) {
10403 eofmt = TRUE;
79072805 10404 break;
c5ee2135 10405 }
79072805 10406 }
3280af22 10407 if (PL_in_eval && !PL_rsfp) {
07409e01 10408 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10409 if (!eol++)
3280af22 10410 eol = PL_bufend;
0f85fab0
LW
10411 }
10412 else
3280af22 10413 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10414 if (*s != '#') {
a0d0e21e
LW
10415 for (t = s; t < eol; t++) {
10416 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10417 needargs = FALSE;
10418 goto enough; /* ~~ must be first line in formline */
378cc40b 10419 }
a0d0e21e
LW
10420 if (*t == '@' || *t == '^')
10421 needargs = TRUE;
378cc40b 10422 }
7121b347
MG
10423 if (eol > s) {
10424 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10425#ifndef PERL_STRICT_CR
7121b347
MG
10426 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10427 char *end = SvPVX(stuff) + SvCUR(stuff);
10428 end[-2] = '\n';
10429 end[-1] = '\0';
b162af07 10430 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10431 }
2dc4c65b 10432#endif
7121b347
MG
10433 }
10434 else
10435 break;
79072805 10436 }
95a20fc0 10437 s = (char*)eol;
3280af22 10438 if (PL_rsfp) {
f0e67a1d 10439 bool got_some;
5db06880
NC
10440#ifdef PERL_MAD
10441 if (PL_madskills) {
cd81e915
NC
10442 if (PL_thistoken)
10443 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10444 else
cd81e915 10445 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10446 }
10447#endif
f0e67a1d
Z
10448 PL_bufptr = PL_bufend;
10449 CopLINE_inc(PL_curcop);
10450 got_some = lex_next_chunk(0);
10451 CopLINE_dec(PL_curcop);
10452 s = PL_bufptr;
5db06880 10453#ifdef PERL_MAD
f0e67a1d 10454 tokenstart = PL_bufptr;
5db06880 10455#endif
f0e67a1d 10456 if (!got_some)
378cc40b 10457 break;
378cc40b 10458 }
463ee0b2 10459 incline(s);
79072805 10460 }
a0d0e21e
LW
10461 enough:
10462 if (SvCUR(stuff)) {
3280af22 10463 PL_expect = XTERM;
79072805 10464 if (needargs) {
3280af22 10465 PL_lex_state = LEX_NORMAL;
cd81e915 10466 start_force(PL_curforce);
9ded7720 10467 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10468 force_next(',');
10469 }
a0d0e21e 10470 else
3280af22 10471 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10472 if (!IN_BYTES) {
95a20fc0 10473 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10474 SvUTF8_on(stuff);
10475 else if (PL_encoding)
10476 sv_recode_to_utf8(stuff, PL_encoding);
10477 }
cd81e915 10478 start_force(PL_curforce);
9ded7720 10479 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10480 force_next(THING);
cd81e915 10481 start_force(PL_curforce);
9ded7720 10482 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10483 force_next(LSTOP);
378cc40b 10484 }
79072805 10485 else {
8990e307 10486 SvREFCNT_dec(stuff);
c5ee2135
WL
10487 if (eofmt)
10488 PL_lex_formbrack = 0;
3280af22 10489 PL_bufptr = s;
79072805 10490 }
5db06880
NC
10491#ifdef PERL_MAD
10492 if (PL_madskills) {
cd81e915
NC
10493 if (PL_thistoken)
10494 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10495 else
cd81e915
NC
10496 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10497 PL_thiswhite = savewhite;
5db06880
NC
10498 }
10499#endif
79072805 10500 return s;
378cc40b 10501}
a687059c 10502
ba6d6ac9 10503I32
864dbfa3 10504Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10505{
97aff369 10506 dVAR;
a3b680e6 10507 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10508 CV* const outsidecv = PL_compcv;
8990e307 10509
3280af22
NIS
10510 if (PL_compcv) {
10511 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10512 }
7766f137 10513 SAVEI32(PL_subline);
3280af22 10514 save_item(PL_subname);
3280af22 10515 SAVESPTR(PL_compcv);
3280af22 10516
ea726b52 10517 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10518 CvFLAGS(PL_compcv) |= flags;
10519
57843af0 10520 PL_subline = CopLINE(PL_curcop);
dd2155a4 10521 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10522 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10523 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10524
8990e307
LW
10525 return oldsavestack_ix;
10526}
10527
084592ab
CN
10528#ifdef __SC__
10529#pragma segment Perl_yylex
10530#endif
af41e527
NC
10531static int
10532S_yywarn(pTHX_ const char *const s)
8990e307 10533{
97aff369 10534 dVAR;
7918f24d
NC
10535
10536 PERL_ARGS_ASSERT_YYWARN;
10537
faef0170 10538 PL_in_eval |= EVAL_WARNONLY;
748a9306 10539 yyerror(s);
faef0170 10540 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10541 return 0;
8990e307
LW
10542}
10543
10544int
15f169a1 10545Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10546{
97aff369 10547 dVAR;
bfed75c6
AL
10548 const char *where = NULL;
10549 const char *context = NULL;
68dc0745 10550 int contlen = -1;
46fc3d4c 10551 SV *msg;
5912531f 10552 int yychar = PL_parser->yychar;
463ee0b2 10553
7918f24d
NC
10554 PERL_ARGS_ASSERT_YYERROR;
10555
3280af22 10556 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10557 where = "at EOF";
8bcfe651
TM
10558 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10559 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10560 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10561 /*
10562 Only for NetWare:
10563 The code below is removed for NetWare because it abends/crashes on NetWare
10564 when the script has error such as not having the closing quotes like:
10565 if ($var eq "value)
10566 Checking of white spaces is anyway done in NetWare code.
10567 */
10568#ifndef NETWARE
3280af22
NIS
10569 while (isSPACE(*PL_oldoldbufptr))
10570 PL_oldoldbufptr++;
f355267c 10571#endif
3280af22
NIS
10572 context = PL_oldoldbufptr;
10573 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10574 }
8bcfe651
TM
10575 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10576 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10577 /*
10578 Only for NetWare:
10579 The code below is removed for NetWare because it abends/crashes on NetWare
10580 when the script has error such as not having the closing quotes like:
10581 if ($var eq "value)
10582 Checking of white spaces is anyway done in NetWare code.
10583 */
10584#ifndef NETWARE
3280af22
NIS
10585 while (isSPACE(*PL_oldbufptr))
10586 PL_oldbufptr++;
f355267c 10587#endif
3280af22
NIS
10588 context = PL_oldbufptr;
10589 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10590 }
10591 else if (yychar > 255)
68dc0745 10592 where = "next token ???";
12fbd33b 10593 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10594 if (PL_lex_state == LEX_NORMAL ||
10595 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10596 where = "at end of line";
3280af22 10597 else if (PL_lex_inpat)
68dc0745 10598 where = "within pattern";
463ee0b2 10599 else
68dc0745 10600 where = "within string";
463ee0b2 10601 }
46fc3d4c 10602 else {
84bafc02 10603 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10604 if (yychar < 32)
cea2e8a9 10605 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10606 else if (isPRINT_LC(yychar)) {
88c9ea1e 10607 const char string = yychar;
5e7aa789
NC
10608 sv_catpvn(where_sv, &string, 1);
10609 }
463ee0b2 10610 else
cea2e8a9 10611 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10612 where = SvPVX_const(where_sv);
463ee0b2 10613 }
46fc3d4c 10614 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10615 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10616 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10617 if (context)
cea2e8a9 10618 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10619 else
cea2e8a9 10620 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10621 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10622 Perl_sv_catpvf(aTHX_ msg,
57def98f 10623 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10624 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10625 PL_multi_end = 0;
a0d0e21e 10626 }
500960a6 10627 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10628 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10629 }
463ee0b2 10630 else
5a844595 10631 qerror(msg);
c7d6bfb2
GS
10632 if (PL_error_count >= 10) {
10633 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10634 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10635 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10636 else
10637 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10638 OutCopFILE(PL_curcop));
c7d6bfb2 10639 }
3280af22 10640 PL_in_my = 0;
5c284bb0 10641 PL_in_my_stash = NULL;
463ee0b2
LW
10642 return 0;
10643}
084592ab
CN
10644#ifdef __SC__
10645#pragma segment Main
10646#endif
4e35701f 10647
b250498f 10648STATIC char*
3ae08724 10649S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10650{
97aff369 10651 dVAR;
f54cb97a 10652 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10653
10654 PERL_ARGS_ASSERT_SWALLOW_BOM;
10655
7aa207d6 10656 switch (s[0]) {
4e553d73
NIS
10657 case 0xFF:
10658 if (s[1] == 0xFE) {
ee6ba15d 10659 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10660 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 10661 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10662#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10663 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10664 s += 2;
dea0fc0b 10665 if (PL_bufend > (char*)s) {
81a923f4 10666 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10667 }
b250498f 10668#else
ee6ba15d 10669 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10670#endif
01ec43d0
GS
10671 }
10672 break;
78ae23f5 10673 case 0xFE:
7aa207d6 10674 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10675#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10676 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10677 s += 2;
10678 if (PL_bufend > (char *)s) {
81a923f4 10679 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10680 }
b250498f 10681#else
ee6ba15d 10682 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10683#endif
01ec43d0
GS
10684 }
10685 break;
3ae08724
GS
10686 case 0xEF:
10687 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10688 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10689 s += 3; /* UTF-8 */
10690 }
10691 break;
10692 case 0:
7aa207d6
JH
10693 if (slen > 3) {
10694 if (s[1] == 0) {
10695 if (s[2] == 0xFE && s[3] == 0xFF) {
10696 /* UTF-32 big-endian */
ee6ba15d 10697 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10698 }
10699 }
10700 else if (s[2] == 0 && s[3] != 0) {
10701 /* Leading bytes
10702 * 00 xx 00 xx
10703 * are a good indicator of UTF-16BE. */
ee6ba15d 10704#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10705 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10706 s = add_utf16_textfilter(s, FALSE);
10707#else
10708 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10709#endif
7aa207d6 10710 }
01ec43d0 10711 }
e294cc5d
JH
10712#ifdef EBCDIC
10713 case 0xDD:
10714 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10715 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10716 s += 4; /* UTF-8 */
10717 }
10718 break;
10719#endif
10720
7aa207d6
JH
10721 default:
10722 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10723 /* Leading bytes
10724 * xx 00 xx 00
10725 * are a good indicator of UTF-16LE. */
ee6ba15d 10726#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10727 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10728 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
10729#else
10730 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10731#endif
7aa207d6 10732 }
01ec43d0 10733 }
b8f84bb2 10734 return (char*)s;
b250498f 10735}
4755096e 10736
6e3aabd6
GS
10737
10738#ifndef PERL_NO_UTF16_FILTER
10739static I32
a28af015 10740S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10741{
97aff369 10742 dVAR;
f3040f2c 10743 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10744 /* We re-use this each time round, throwing the contents away before we
10745 return. */
2a773401 10746 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10747 SV *const utf8_buffer = filter;
c28d6105 10748 IV status = IoPAGE(filter);
f2338a2e 10749 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10750 I32 retval;
c8b0cbae 10751
c85ae797
NC
10752 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10753
c8b0cbae
NC
10754 /* As we're automatically added, at the lowest level, and hence only called
10755 from this file, we can be sure that we're not called in block mode. Hence
10756 don't bother writing code to deal with block mode. */
10757 if (maxlen) {
10758 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10759 }
c28d6105
NC
10760 if (status < 0) {
10761 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10762 }
1de9afcd 10763 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10764 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10765 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10766 reverse ? 'l' : 'b', idx, maxlen, status,
10767 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10768
10769 while (1) {
10770 STRLEN chars;
10771 STRLEN have;
dea0fc0b 10772 I32 newlen;
2a773401 10773 U8 *end;
c28d6105
NC
10774 /* First, look in our buffer of existing UTF-8 data: */
10775 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10776
10777 if (nl) {
10778 ++nl;
10779 } else if (status == 0) {
10780 /* EOF */
10781 IoPAGE(filter) = 0;
10782 nl = SvEND(utf8_buffer);
10783 }
10784 if (nl) {
d2d1d4de
NC
10785 STRLEN got = nl - SvPVX(utf8_buffer);
10786 /* Did we have anything to append? */
10787 retval = got != 0;
10788 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10789 /* Everything else in this code works just fine if SVp_POK isn't
10790 set. This, however, needs it, and we need it to work, else
10791 we loop infinitely because the buffer is never consumed. */
10792 sv_chop(utf8_buffer, nl);
10793 break;
10794 }
ba77e4cc 10795
c28d6105
NC
10796 /* OK, not a complete line there, so need to read some more UTF-16.
10797 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10798 while (1) {
10799 if (status <= 0)
10800 break;
10801 if (SvCUR(utf16_buffer) >= 2) {
10802 /* Location of the high octet of the last complete code point.
10803 Gosh, UTF-16 is a pain. All the benefits of variable length,
10804 *coupled* with all the benefits of partial reads and
10805 endianness. */
10806 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10807 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10808
10809 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10810 break;
10811 }
10812
10813 /* We have the first half of a surrogate. Read more. */
10814 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10815 }
c28d6105 10816
c28d6105
NC
10817 status = FILTER_READ(idx + 1, utf16_buffer,
10818 160 + (SvCUR(utf16_buffer) & 1));
10819 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 10820 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
10821 if (status < 0) {
10822 /* Error */
10823 IoPAGE(filter) = status;
10824 return status;
10825 }
10826 }
10827
10828 chars = SvCUR(utf16_buffer) >> 1;
10829 have = SvCUR(utf8_buffer);
10830 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 10831
aa6dbd60 10832 if (reverse) {
c28d6105
NC
10833 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10834 (U8*)SvPVX_const(utf8_buffer) + have,
10835 chars * 2, &newlen);
aa6dbd60 10836 } else {
2a773401 10837 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
10838 (U8*)SvPVX_const(utf8_buffer) + have,
10839 chars * 2, &newlen);
2a773401 10840 }
c28d6105 10841 SvCUR_set(utf8_buffer, have + newlen);
2a773401 10842 *end = '\0';
c28d6105 10843
e07286ed
NC
10844 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10845 it's private to us, and utf16_to_utf8{,reversed} take a
10846 (pointer,length) pair, rather than a NUL-terminated string. */
10847 if(SvCUR(utf16_buffer) & 1) {
10848 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10849 SvCUR_set(utf16_buffer, 1);
10850 } else {
10851 SvCUR_set(utf16_buffer, 0);
10852 }
2a773401 10853 }
c28d6105
NC
10854 DEBUG_P(PerlIO_printf(Perl_debug_log,
10855 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10856 status,
10857 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10858 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 10859 return retval;
6e3aabd6 10860}
81a923f4
NC
10861
10862static U8 *
10863S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10864{
2a773401 10865 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 10866
c85ae797
NC
10867 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10868
c28d6105 10869 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 10870 sv_setpvs(filter, "");
2a773401 10871 IoLINES(filter) = reversed;
c28d6105
NC
10872 IoPAGE(filter) = 1; /* Not EOF */
10873
10874 /* Sadly, we have to return a valid pointer, come what may, so we have to
10875 ignore any error return from this. */
10876 SvCUR_set(PL_linestr, 0);
10877 if (FILTER_READ(0, PL_linestr, 0)) {
10878 SvUTF8_on(PL_linestr);
81a923f4 10879 } else {
c28d6105 10880 SvUTF8_on(PL_linestr);
81a923f4 10881 }
c28d6105 10882 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
10883 return (U8*)SvPVX(PL_linestr);
10884}
6e3aabd6 10885#endif
9f4817db 10886
f333445c
JP
10887/*
10888Returns a pointer to the next character after the parsed
10889vstring, as well as updating the passed in sv.
10890
10891Function must be called like
10892
561b68a9 10893 sv = newSV(5);
65b06e02 10894 s = scan_vstring(s,e,sv);
f333445c 10895
65b06e02 10896where s and e are the start and end of the string.
f333445c
JP
10897The sv should already be large enough to store the vstring
10898passed in, for performance reasons.
10899
10900*/
10901
10902char *
15f169a1 10903Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 10904{
97aff369 10905 dVAR;
bfed75c6
AL
10906 const char *pos = s;
10907 const char *start = s;
7918f24d
NC
10908
10909 PERL_ARGS_ASSERT_SCAN_VSTRING;
10910
f333445c 10911 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 10912 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 10913 pos++;
f333445c
JP
10914 if ( *pos != '.') {
10915 /* this may not be a v-string if followed by => */
bfed75c6 10916 const char *next = pos;
65b06e02 10917 while (next < e && isSPACE(*next))
8fc7bb1c 10918 ++next;
65b06e02 10919 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
10920 /* return string not v-string */
10921 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 10922 return (char *)pos;
f333445c
JP
10923 }
10924 }
10925
10926 if (!isALPHA(*pos)) {
89ebb4a3 10927 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 10928
d4c19fe8
AL
10929 if (*s == 'v')
10930 s++; /* get past 'v' */
f333445c 10931
76f68e9b 10932 sv_setpvs(sv, "");
f333445c
JP
10933
10934 for (;;) {
d4c19fe8 10935 /* this is atoi() that tolerates underscores */
0bd48802
AL
10936 U8 *tmpend;
10937 UV rev = 0;
d4c19fe8
AL
10938 const char *end = pos;
10939 UV mult = 1;
10940 while (--end >= s) {
10941 if (*end != '_') {
10942 const UV orev = rev;
f333445c
JP
10943 rev += (*end - '0') * mult;
10944 mult *= 10;
9b387841
NC
10945 if (orev > rev)
10946 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10947 "Integer overflow in decimal number");
f333445c
JP
10948 }
10949 }
10950#ifdef EBCDIC
10951 if (rev > 0x7FFFFFFF)
10952 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10953#endif
10954 /* Append native character for the rev point */
10955 tmpend = uvchr_to_utf8(tmpbuf, rev);
10956 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10957 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10958 SvUTF8_on(sv);
65b06e02 10959 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
10960 s = ++pos;
10961 else {
10962 s = pos;
10963 break;
10964 }
65b06e02 10965 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
10966 pos++;
10967 }
10968 SvPOK_on(sv);
10969 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10970 SvRMAGICAL_on(sv);
10971 }
73d840c0 10972 return (char *)s;
f333445c
JP
10973}
10974
88e1f1a2
JV
10975int
10976Perl_keyword_plugin_standard(pTHX_
10977 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10978{
10979 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10980 PERL_UNUSED_CONTEXT;
10981 PERL_UNUSED_ARG(keyword_ptr);
10982 PERL_UNUSED_ARG(keyword_len);
10983 PERL_UNUSED_ARG(op_ptr);
10984 return KEYWORD_PLUGIN_DECLINE;
10985}
10986
78cdf107 10987#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 10988static void
78cdf107 10989S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
10990{
10991 SAVEI32(PL_lex_brackets);
10992 if (PL_lex_brackets > 100)
10993 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
10994 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
10995 SAVEI32(PL_lex_allbrackets);
10996 PL_lex_allbrackets = 0;
10997 SAVEI8(PL_lex_fakeeof);
2dcac756 10998 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
10999 if(yyparse(gramtype) && !PL_parser->error_count)
11000 qerror(Perl_mess(aTHX_ "Parse error"));
11001}
11002
78cdf107 11003#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11004static OP *
78cdf107 11005S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11006{
11007 OP *o;
11008 ENTER;
11009 SAVEVPTR(PL_eval_root);
11010 PL_eval_root = NULL;
78cdf107 11011 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11012 o = PL_eval_root;
11013 LEAVE;
11014 return o;
11015}
11016
78cdf107
Z
11017#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11018static OP *
11019S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11020{
11021 OP *exprop;
11022 if (flags & ~PARSE_OPTIONAL)
11023 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11024 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11025 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11026 if (!PL_parser->error_count)
11027 qerror(Perl_mess(aTHX_ "Parse error"));
11028 exprop = newOP(OP_NULL, 0);
11029 }
11030 return exprop;
11031}
11032
11033/*
11034=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11035
11036Parse a Perl arithmetic expression. This may contain operators of precedence
11037down to the bit shift operators. The expression must be followed (and thus
11038terminated) either by a comparison or lower-precedence operator or by
11039something that would normally terminate an expression such as semicolon.
11040If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11041otherwise it is mandatory. It is up to the caller to ensure that the
11042dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11043the source of the code to be parsed and the lexical context for the
11044expression.
11045
11046The op tree representing the expression is returned. If an optional
11047expression is absent, a null pointer is returned, otherwise the pointer
11048will be non-null.
11049
11050If an error occurs in parsing or compilation, in most cases a valid op
11051tree is returned anyway. The error is reflected in the parser state,
11052normally resulting in a single exception at the top level of parsing
11053which covers all the compilation errors that occurred. Some compilation
11054errors, however, will throw an exception immediately.
11055
11056=cut
11057*/
11058
11059OP *
11060Perl_parse_arithexpr(pTHX_ U32 flags)
11061{
11062 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11063}
11064
11065/*
11066=for apidoc Amx|OP *|parse_termexpr|U32 flags
11067
11068Parse a Perl term expression. This may contain operators of precedence
11069down to the assignment operators. The expression must be followed (and thus
11070terminated) either by a comma or lower-precedence operator or by
11071something that would normally terminate an expression such as semicolon.
11072If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11073otherwise it is mandatory. It is up to the caller to ensure that the
11074dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11075the source of the code to be parsed and the lexical context for the
11076expression.
11077
11078The op tree representing the expression is returned. If an optional
11079expression is absent, a null pointer is returned, otherwise the pointer
11080will be non-null.
11081
11082If an error occurs in parsing or compilation, in most cases a valid op
11083tree is returned anyway. The error is reflected in the parser state,
11084normally resulting in a single exception at the top level of parsing
11085which covers all the compilation errors that occurred. Some compilation
11086errors, however, will throw an exception immediately.
11087
11088=cut
11089*/
11090
11091OP *
11092Perl_parse_termexpr(pTHX_ U32 flags)
11093{
11094 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11095}
11096
11097/*
11098=for apidoc Amx|OP *|parse_listexpr|U32 flags
11099
11100Parse a Perl list expression. This may contain operators of precedence
11101down to the comma operator. The expression must be followed (and thus
11102terminated) either by a low-precedence logic operator such as C<or> or by
11103something that would normally terminate an expression such as semicolon.
11104If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11105otherwise it is mandatory. It is up to the caller to ensure that the
11106dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11107the source of the code to be parsed and the lexical context for the
11108expression.
11109
11110The op tree representing the expression is returned. If an optional
11111expression is absent, a null pointer is returned, otherwise the pointer
11112will be non-null.
11113
11114If an error occurs in parsing or compilation, in most cases a valid op
11115tree is returned anyway. The error is reflected in the parser state,
11116normally resulting in a single exception at the top level of parsing
11117which covers all the compilation errors that occurred. Some compilation
11118errors, however, will throw an exception immediately.
11119
11120=cut
11121*/
11122
11123OP *
11124Perl_parse_listexpr(pTHX_ U32 flags)
11125{
11126 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11127}
11128
11129/*
11130=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11131
11132Parse a single complete Perl expression. This allows the full
11133expression grammar, including the lowest-precedence operators such
11134as C<or>. The expression must be followed (and thus terminated) by a
11135token that an expression would normally be terminated by: end-of-file,
11136closing bracketing punctuation, semicolon, or one of the keywords that
11137signals a postfix expression-statement modifier. If I<flags> includes
11138C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11139mandatory. It is up to the caller to ensure that the dynamic parser
11140state (L</PL_parser> et al) is correctly set to reflect the source of
11141the code to be parsed and the lexical context for the expression.
11142
11143The op tree representing the expression is returned. If an optional
11144expression is absent, a null pointer is returned, otherwise the pointer
11145will be non-null.
11146
11147If an error occurs in parsing or compilation, in most cases a valid op
11148tree is returned anyway. The error is reflected in the parser state,
11149normally resulting in a single exception at the top level of parsing
11150which covers all the compilation errors that occurred. Some compilation
11151errors, however, will throw an exception immediately.
11152
11153=cut
11154*/
11155
11156OP *
11157Perl_parse_fullexpr(pTHX_ U32 flags)
11158{
11159 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11160}
11161
e53d8f76
Z
11162/*
11163=for apidoc Amx|OP *|parse_block|U32 flags
11164
11165Parse a single complete Perl code block. This consists of an opening
11166brace, a sequence of statements, and a closing brace. The block
11167constitutes a lexical scope, so C<my> variables and various compile-time
11168effects can be contained within it. It is up to the caller to ensure
11169that the dynamic parser state (L</PL_parser> et al) is correctly set to
11170reflect the source of the code to be parsed and the lexical context for
11171the statement.
11172
11173The op tree representing the code block is returned. This is always a
11174real op, never a null pointer. It will normally be a C<lineseq> list,
11175including C<nextstate> or equivalent ops. No ops to construct any kind
11176of runtime scope are included by virtue of it being a block.
11177
11178If an error occurs in parsing or compilation, in most cases a valid op
11179tree (most likely null) is returned anyway. The error is reflected in
11180the parser state, normally resulting in a single exception at the top
11181level of parsing which covers all the compilation errors that occurred.
11182Some compilation errors, however, will throw an exception immediately.
11183
11184The I<flags> parameter is reserved for future use, and must always
11185be zero.
11186
11187=cut
11188*/
11189
11190OP *
11191Perl_parse_block(pTHX_ U32 flags)
11192{
11193 if (flags)
11194 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11195 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11196}
11197
1da4ca5f 11198/*
8359b381
Z
11199=for apidoc Amx|OP *|parse_barestmt|U32 flags
11200
11201Parse a single unadorned Perl statement. This may be a normal imperative
11202statement or a declaration that has compile-time effect. It does not
11203include any label or other affixture. It is up to the caller to ensure
11204that the dynamic parser state (L</PL_parser> et al) is correctly set to
11205reflect the source of the code to be parsed and the lexical context for
11206the statement.
11207
11208The op tree representing the statement is returned. This may be a
11209null pointer if the statement is null, for example if it was actually
11210a subroutine definition (which has compile-time side effects). If not
11211null, it will be ops directly implementing the statement, suitable to
11212pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11213equivalent op (except for those embedded in a scope contained entirely
11214within the statement).
11215
11216If an error occurs in parsing or compilation, in most cases a valid op
11217tree (most likely null) is returned anyway. The error is reflected in
11218the parser state, normally resulting in a single exception at the top
11219level of parsing which covers all the compilation errors that occurred.
11220Some compilation errors, however, will throw an exception immediately.
11221
11222The I<flags> parameter is reserved for future use, and must always
11223be zero.
11224
11225=cut
11226*/
11227
11228OP *
11229Perl_parse_barestmt(pTHX_ U32 flags)
11230{
11231 if (flags)
11232 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11233 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11234}
11235
11236/*
361d9b55
Z
11237=for apidoc Amx|SV *|parse_label|U32 flags
11238
11239Parse a single label, possibly optional, of the type that may prefix a
11240Perl statement. It is up to the caller to ensure that the dynamic parser
11241state (L</PL_parser> et al) is correctly set to reflect the source of
11242the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11243label is optional, otherwise it is mandatory.
11244
11245The name of the label is returned in the form of a fresh scalar. If an
11246optional label is absent, a null pointer is returned.
11247
11248If an error occurs in parsing, which can only occur if the label is
11249mandatory, a valid label is returned anyway. The error is reflected in
11250the parser state, normally resulting in a single exception at the top
11251level of parsing which covers all the compilation errors that occurred.
11252
11253=cut
11254*/
11255
11256SV *
11257Perl_parse_label(pTHX_ U32 flags)
11258{
11259 if (flags & ~PARSE_OPTIONAL)
11260 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11261 if (PL_lex_state == LEX_KNOWNEXT) {
11262 PL_parser->yychar = yylex();
11263 if (PL_parser->yychar == LABEL) {
11264 char *lpv = pl_yylval.pval;
11265 STRLEN llen = strlen(lpv);
11266 SV *lsv;
11267 PL_parser->yychar = YYEMPTY;
11268 lsv = newSV_type(SVt_PV);
11269 SvPV_set(lsv, lpv);
11270 SvCUR_set(lsv, llen);
11271 SvLEN_set(lsv, llen+1);
11272 SvPOK_on(lsv);
11273 return lsv;
11274 } else {
11275 yyunlex();
11276 goto no_label;
11277 }
11278 } else {
11279 char *s, *t;
11280 U8 c;
11281 STRLEN wlen, bufptr_pos;
11282 lex_read_space(0);
11283 t = s = PL_bufptr;
11284 c = (U8)*s;
11285 if (!isIDFIRST_A(c))
11286 goto no_label;
11287 do {
11288 c = (U8)*++t;
11289 } while(isWORDCHAR_A(c));
11290 wlen = t - s;
11291 if (word_takes_any_delimeter(s, wlen))
11292 goto no_label;
11293 bufptr_pos = s - SvPVX(PL_linestr);
11294 PL_bufptr = t;
11295 lex_read_space(LEX_KEEP_PREVIOUS);
11296 t = PL_bufptr;
11297 s = SvPVX(PL_linestr) + bufptr_pos;
11298 if (t[0] == ':' && t[1] != ':') {
11299 PL_oldoldbufptr = PL_oldbufptr;
11300 PL_oldbufptr = s;
11301 PL_bufptr = t+1;
11302 return newSVpvn(s, wlen);
11303 } else {
11304 PL_bufptr = s;
11305 no_label:
11306 if (flags & PARSE_OPTIONAL) {
11307 return NULL;
11308 } else {
11309 qerror(Perl_mess(aTHX_ "Parse error"));
11310 return newSVpvs("x");
11311 }
11312 }
11313 }
11314}
11315
11316/*
28ac2b49
Z
11317=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11318
11319Parse a single complete Perl statement. This may be a normal imperative
8359b381 11320statement or a declaration that has compile-time effect, and may include
8e720305 11321optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11322parser state (L</PL_parser> et al) is correctly set to reflect the source
11323of the code to be parsed and the lexical context for the statement.
11324
11325The op tree representing the statement is returned. This may be a
11326null pointer if the statement is null, for example if it was actually
11327a subroutine definition (which has compile-time side effects). If not
11328null, it will be the result of a L</newSTATEOP> call, normally including
11329a C<nextstate> or equivalent op.
11330
11331If an error occurs in parsing or compilation, in most cases a valid op
11332tree (most likely null) is returned anyway. The error is reflected in
11333the parser state, normally resulting in a single exception at the top
11334level of parsing which covers all the compilation errors that occurred.
11335Some compilation errors, however, will throw an exception immediately.
11336
11337The I<flags> parameter is reserved for future use, and must always
11338be zero.
11339
11340=cut
11341*/
11342
11343OP *
11344Perl_parse_fullstmt(pTHX_ U32 flags)
11345{
28ac2b49
Z
11346 if (flags)
11347 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11348 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11349}
11350
07ffcb73
Z
11351/*
11352=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11353
11354Parse a sequence of zero or more Perl statements. These may be normal
11355imperative statements, including optional labels, or declarations
11356that have compile-time effect, or any mixture thereof. The statement
11357sequence ends when a closing brace or end-of-file is encountered in a
11358place where a new statement could have validly started. It is up to
11359the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11360is correctly set to reflect the source of the code to be parsed and the
11361lexical context for the statements.
11362
11363The op tree representing the statement sequence is returned. This may
11364be a null pointer if the statements were all null, for example if there
11365were no statements or if there were only subroutine definitions (which
11366have compile-time side effects). If not null, it will be a C<lineseq>
11367list, normally including C<nextstate> or equivalent ops.
11368
11369If an error occurs in parsing or compilation, in most cases a valid op
11370tree is returned anyway. The error is reflected in the parser state,
11371normally resulting in a single exception at the top level of parsing
11372which covers all the compilation errors that occurred. Some compilation
11373errors, however, will throw an exception immediately.
11374
11375The I<flags> parameter is reserved for future use, and must always
11376be zero.
11377
11378=cut
11379*/
11380
11381OP *
11382Perl_parse_stmtseq(pTHX_ U32 flags)
11383{
11384 OP *stmtseqop;
e53d8f76 11385 I32 c;
07ffcb73 11386 if (flags)
78cdf107
Z
11387 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11388 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11389 c = lex_peek_unichar(0);
11390 if (c != -1 && c != /*{*/'}')
07ffcb73 11391 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11392 return stmtseqop;
11393}
11394
ea25a9b2 11395void
f7e3d326 11396Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11397{
f7e3d326 11398 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11399 deprecate("qw(...) as parentheses");
78cdf107 11400 force_next((4<<24)|')');
ea25a9b2
Z
11401 if (qwlist->op_type == OP_STUB) {
11402 op_free(qwlist);
11403 }
11404 else {
3d8e05a0 11405 start_force(PL_curforce);
ea25a9b2
Z
11406 NEXTVAL_NEXTTOKE.opval = qwlist;
11407 force_next(THING);
11408 }
78cdf107 11409 force_next((2<<24)|'(');
ea25a9b2
Z
11410}
11411
28ac2b49 11412/*
1da4ca5f
NC
11413 * Local variables:
11414 * c-indentation-style: bsd
11415 * c-basic-offset: 4
11416 * indent-tabs-mode: t
11417 * End:
11418 *
37442d52
RGS
11419 * ex: set ts=8 sts=4 sw=4 noet:
11420 */