This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Add another address for John Peacock
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
2d2e263d 227 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
228 * BOop : bitwise or or xor
229 * BAop : bitwise and
230 * SHop : shift operator
231 * PWop : power operator
9cbb5ea2 232 * PMop : pattern-matching operator
ffb4593c
NT
233 * Aop : addition-level operator
234 * Mop : multiplication-level operator
235 * Eop : equality-testing operator
e5edeb50 236 * Rop : relational operator <= != gt
ffb4593c
NT
237 *
238 * Also see LOP and lop() below.
239 */
240
998054bd 241#ifdef DEBUGGING /* Serve -DT. */
704d4215 242# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 243#else
bbf60fe6 244# define REPORT(retval) (retval)
998054bd
SC
245#endif
246
bbf60fe6
DM
247#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
248#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
249#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
250#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
251#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
252#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
253#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
254#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
255#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
256#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
257#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
258#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
259#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
260#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
261#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
262#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
263#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
264#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
265#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
266#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 267
a687059c
LW
268/* This bit of chicanery makes a unary function followed by
269 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
270 * The UNIDOR macro is for unary functions that can be followed by the //
271 * operator (such as C<shift // 0>).
a687059c 272 */
376fcdbf 273#define UNI2(f,x) { \
6154021b 274 pl_yylval.ival = f; \
376fcdbf
AL
275 PL_expect = x; \
276 PL_bufptr = s; \
277 PL_last_uni = PL_oldbufptr; \
278 PL_last_lop_op = f; \
279 if (*s == '(') \
280 return REPORT( (int)FUNC1 ); \
29595ff2 281 s = PEEKSPACE(s); \
376fcdbf
AL
282 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
283 }
6f33ba73
RGS
284#define UNI(f) UNI2(f,XTERM)
285#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 286
376fcdbf 287#define UNIBRACK(f) { \
6154021b 288 pl_yylval.ival = f; \
376fcdbf
AL
289 PL_bufptr = s; \
290 PL_last_uni = PL_oldbufptr; \
291 if (*s == '(') \
292 return REPORT( (int)FUNC1 ); \
29595ff2 293 s = PEEKSPACE(s); \
376fcdbf
AL
294 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
295 }
79072805 296
9f68db38 297/* grandfather return to old style */
78cdf107
Z
298#define OLDLOP(f) \
299 do { \
300 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
301 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
302 pl_yylval.ival = (f); \
303 PL_expect = XTERM; \
304 PL_bufptr = s; \
305 return (int)LSTOP; \
306 } while(0)
79072805 307
8fa7f367
JH
308#ifdef DEBUGGING
309
6154021b 310/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
311enum token_type {
312 TOKENTYPE_NONE,
313 TOKENTYPE_IVAL,
6154021b 314 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
315 TOKENTYPE_PVAL,
316 TOKENTYPE_OPVAL,
317 TOKENTYPE_GVVAL
318};
319
6d4a66ac
NC
320static struct debug_tokens {
321 const int token;
322 enum token_type type;
323 const char *name;
324} const debug_tokens[] =
9041c2e3 325{
bbf60fe6
DM
326 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
327 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
328 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
329 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
330 { ARROW, TOKENTYPE_NONE, "ARROW" },
331 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
332 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
333 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
334 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
335 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 336 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
337 { DO, TOKENTYPE_NONE, "DO" },
338 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
339 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
340 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
341 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
342 { ELSE, TOKENTYPE_NONE, "ELSE" },
343 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
344 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
345 { FOR, TOKENTYPE_IVAL, "FOR" },
346 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
347 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
348 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
349 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
350 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
351 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 352 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
353 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
354 { IF, TOKENTYPE_IVAL, "IF" },
355 { LABEL, TOKENTYPE_PVAL, "LABEL" },
356 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
357 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
358 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
359 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
360 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
361 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
362 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
363 { MY, TOKENTYPE_IVAL, "MY" },
364 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
365 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
366 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
367 { OROP, TOKENTYPE_IVAL, "OROP" },
368 { OROR, TOKENTYPE_NONE, "OROR" },
369 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
370 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
371 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
372 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
373 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
374 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
375 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
376 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
377 { PREINC, TOKENTYPE_NONE, "PREINC" },
378 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
379 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
380 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
381 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
382 { SUB, TOKENTYPE_NONE, "SUB" },
383 { THING, TOKENTYPE_OPVAL, "THING" },
384 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
385 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
386 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
387 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
388 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
389 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 390 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
391 { WHILE, TOKENTYPE_IVAL, "WHILE" },
392 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 393 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 394 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
395};
396
6154021b 397/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 398
bbf60fe6 399STATIC int
704d4215 400S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 401{
97aff369 402 dVAR;
7918f24d
NC
403
404 PERL_ARGS_ASSERT_TOKEREPORT;
405
bbf60fe6 406 if (DEBUG_T_TEST) {
bd61b366 407 const char *name = NULL;
bbf60fe6 408 enum token_type type = TOKENTYPE_NONE;
f54cb97a 409 const struct debug_tokens *p;
396482e1 410 SV* const report = newSVpvs("<== ");
bbf60fe6 411
f54cb97a 412 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
413 if (p->token == (int)rv) {
414 name = p->name;
415 type = p->type;
416 break;
417 }
418 }
419 if (name)
54667de8 420 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
421 else if ((char)rv > ' ' && (char)rv < '~')
422 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
423 else if (!rv)
396482e1 424 sv_catpvs(report, "EOF");
bbf60fe6
DM
425 else
426 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
427 switch (type) {
428 case TOKENTYPE_NONE:
429 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
430 break;
431 case TOKENTYPE_IVAL:
704d4215 432 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
433 break;
434 case TOKENTYPE_OPNUM:
435 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 436 PL_op_name[lvalp->ival]);
bbf60fe6
DM
437 break;
438 case TOKENTYPE_PVAL:
704d4215 439 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_OPVAL:
704d4215 442 if (lvalp->opval) {
401441c0 443 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
444 PL_op_name[lvalp->opval->op_type]);
445 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 446 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 447 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
448 }
449
450 }
401441c0 451 else
396482e1 452 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
453 break;
454 }
b6007c36 455 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
456 };
457 return (int)rv;
998054bd
SC
458}
459
b6007c36
DM
460
461/* print the buffer with suitable escapes */
462
463STATIC void
15f169a1 464S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 465{
396482e1 466 SV* const tmp = newSVpvs("");
7918f24d
NC
467
468 PERL_ARGS_ASSERT_PRINTBUF;
469
b6007c36
DM
470 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
471 SvREFCNT_dec(tmp);
472}
473
8fa7f367
JH
474#endif
475
8290c323
NC
476static int
477S_deprecate_commaless_var_list(pTHX) {
478 PL_expect = XTERM;
479 deprecate("comma-less variable list");
480 return REPORT(','); /* grandfather non-comma-format format */
481}
482
ffb4593c
NT
483/*
484 * S_ao
485 *
c963b151
BD
486 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
487 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
488 */
489
76e3520e 490STATIC int
cea2e8a9 491S_ao(pTHX_ int toketype)
a0d0e21e 492{
97aff369 493 dVAR;
3280af22
NIS
494 if (*PL_bufptr == '=') {
495 PL_bufptr++;
a0d0e21e 496 if (toketype == ANDAND)
6154021b 497 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 498 else if (toketype == OROR)
6154021b 499 pl_yylval.ival = OP_ORASSIGN;
c963b151 500 else if (toketype == DORDOR)
6154021b 501 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
502 toketype = ASSIGNOP;
503 }
504 return toketype;
505}
506
ffb4593c
NT
507/*
508 * S_no_op
509 * When Perl expects an operator and finds something else, no_op
510 * prints the warning. It always prints "<something> found where
511 * operator expected. It prints "Missing semicolon on previous line?"
512 * if the surprise occurs at the start of the line. "do you need to
513 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
514 * where the compiler doesn't know if foo is a method call or a function.
515 * It prints "Missing operator before end of line" if there's nothing
516 * after the missing operator, or "... before <...>" if there is something
517 * after the missing operator.
518 */
519
76e3520e 520STATIC void
15f169a1 521S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 522{
97aff369 523 dVAR;
9d4ba2ae
AL
524 char * const oldbp = PL_bufptr;
525 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 526
7918f24d
NC
527 PERL_ARGS_ASSERT_NO_OP;
528
1189a94a
GS
529 if (!s)
530 s = oldbp;
07c798fb 531 else
1189a94a 532 PL_bufptr = s;
cea2e8a9 533 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
534 if (ckWARN_d(WARN_SYNTAX)) {
535 if (is_first)
536 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
537 "\t(Missing semicolon on previous line?)\n");
538 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 539 const char *t;
c35e046a
AL
540 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
541 NOOP;
56da5a46
RGS
542 if (t < PL_bufptr && isSPACE(*t))
543 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
544 "\t(Do you need to predeclare %.*s?)\n",
551405c4 545 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
546 }
547 else {
548 assert(s >= oldbp);
549 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 550 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 551 }
07c798fb 552 }
3280af22 553 PL_bufptr = oldbp;
8990e307
LW
554}
555
ffb4593c
NT
556/*
557 * S_missingterm
558 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 559 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
560 * If we're in a delimited string and the delimiter is a control
561 * character, it's reformatted into a two-char sequence like ^C.
562 * This is fatal.
563 */
564
76e3520e 565STATIC void
cea2e8a9 566S_missingterm(pTHX_ char *s)
8990e307 567{
97aff369 568 dVAR;
8990e307
LW
569 char tmpbuf[3];
570 char q;
571 if (s) {
9d4ba2ae 572 char * const nl = strrchr(s,'\n');
d2719217 573 if (nl)
8990e307
LW
574 *nl = '\0';
575 }
463559e7 576 else if (isCNTRL(PL_multi_close)) {
8990e307 577 *tmpbuf = '^';
585ec06d 578 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
579 tmpbuf[2] = '\0';
580 s = tmpbuf;
581 }
582 else {
eb160463 583 *tmpbuf = (char)PL_multi_close;
8990e307
LW
584 tmpbuf[1] = '\0';
585 s = tmpbuf;
586 }
587 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 588 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 589}
79072805 590
0d863452 591/*
0d863452
RH
592 * Check whether the named feature is enabled.
593 */
26ea9e12
NC
594bool
595Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 596{
97aff369 597 dVAR;
0d863452 598 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 599 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
600
601 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
602
26ea9e12
NC
603 if (namelen > MAX_FEATURE_LEN)
604 return FALSE;
4a731d7b 605 memcpy(&he_name[8], name, namelen);
d4c19fe8 606
7b9ef140 607 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
608}
609
ffb4593c 610/*
9cbb5ea2
GS
611 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
612 * utf16-to-utf8-reversed.
ffb4593c
NT
613 */
614
c39cd008
GS
615#ifdef PERL_CR_FILTER
616static void
617strip_return(SV *sv)
618{
95a20fc0 619 register const char *s = SvPVX_const(sv);
9d4ba2ae 620 register const char * const e = s + SvCUR(sv);
7918f24d
NC
621
622 PERL_ARGS_ASSERT_STRIP_RETURN;
623
c39cd008
GS
624 /* outer loop optimized to do nothing if there are no CR-LFs */
625 while (s < e) {
626 if (*s++ == '\r' && *s == '\n') {
627 /* hit a CR-LF, need to copy the rest */
628 register char *d = s - 1;
629 *d++ = *s++;
630 while (s < e) {
631 if (*s == '\r' && s[1] == '\n')
632 s++;
633 *d++ = *s++;
634 }
635 SvCUR(sv) -= s - d;
636 return;
637 }
638 }
639}
a868473f 640
76e3520e 641STATIC I32
c39cd008 642S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 643{
f54cb97a 644 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
645 if (count > 0 && !maxlen)
646 strip_return(sv);
647 return count;
a868473f
NIS
648}
649#endif
650
ffb4593c 651/*
8eaa0acf
Z
652=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
653
654Creates and initialises a new lexer/parser state object, supplying
655a context in which to lex and parse from a new source of Perl code.
656A pointer to the new state object is placed in L</PL_parser>. An entry
657is made on the save stack so that upon unwinding the new state object
658will be destroyed and the former value of L</PL_parser> will be restored.
659Nothing else need be done to clean up the parsing context.
660
661The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
662non-null, provides a string (in SV form) containing code to be parsed.
663A copy of the string is made, so subsequent modification of I<line>
664does not affect parsing. I<rsfp>, if non-null, provides an input stream
665from which code will be read to be parsed. If both are non-null, the
666code in I<line> comes first and must consist of complete lines of input,
667and I<rsfp> supplies the remainder of the source.
668
669The I<flags> parameter is reserved for future use, and must always
27fcb6ee
FC
670be zero, except for one flag that is currently reserved for perl's internal
671use.
8eaa0acf
Z
672
673=cut
674*/
ffb4593c 675
27fcb6ee
FC
676/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
677 can share filters with the current parser. */
678
a0d0e21e 679void
8eaa0acf 680Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 681{
97aff369 682 dVAR;
6ef55633 683 const char *s = NULL;
8990e307 684 STRLEN len;
5486870f 685 yy_parser *parser, *oparser;
27fcb6ee 686 if (flags && flags != LEX_START_SAME_FILTER)
8eaa0acf 687 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
688
689 /* create and initialise a parser */
690
199e78b7 691 Newxz(parser, 1, yy_parser);
5486870f 692 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
693 PL_parser = parser;
694
28ac2b49
Z
695 parser->stack = NULL;
696 parser->ps = NULL;
697 parser->stack_size = 0;
acdf0a21 698
e3abe207
DM
699 /* on scope exit, free this parser and restore any outer one */
700 SAVEPARSER(parser);
7c4baf47 701 parser->saved_curcop = PL_curcop;
e3abe207 702
acdf0a21 703 /* initialise lexer state */
8990e307 704
fb205e7a
DM
705#ifdef PERL_MAD
706 parser->curforce = -1;
707#else
708 parser->nexttoke = 0;
709#endif
ca4cfd28 710 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 711 parser->copline = NOLINE;
5afb0a62 712 parser->lex_state = LEX_NORMAL;
c2598295 713 parser->expect = XSTATE;
2f9285f8 714 parser->rsfp = rsfp;
27fcb6ee
FC
715 parser->rsfp_filters =
716 !(flags & LEX_START_SAME_FILTER) || !oparser
717 ? newAV()
718 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 719
199e78b7
DM
720 Newx(parser->lex_brackstack, 120, char);
721 Newx(parser->lex_casestack, 12, char);
722 *parser->lex_casestack = '\0';
02b34bbe 723
10efb74f
NC
724 if (line) {
725 s = SvPV_const(line, len);
726 } else {
727 len = 0;
728 }
bdc0bf6f 729
10efb74f 730 if (!len) {
bdc0bf6f 731 parser->linestr = newSVpvs("\n;");
805700c1 732 } else {
719a9bb0 733 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 734 if (s[len-1] != ';')
bdc0bf6f 735 sv_catpvs(parser->linestr, "\n;");
8990e307 736 }
f06b5848
DM
737 parser->oldoldbufptr =
738 parser->oldbufptr =
739 parser->bufptr =
740 parser->linestart = SvPVX(parser->linestr);
741 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
742 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
743
744 parser->in_pod = 0;
79072805 745}
a687059c 746
e3abe207
DM
747
748/* delete a parser object */
749
750void
751Perl_parser_free(pTHX_ const yy_parser *parser)
752{
7918f24d
NC
753 PERL_ARGS_ASSERT_PARSER_FREE;
754
7c4baf47 755 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
756 SvREFCNT_dec(parser->linestr);
757
2f9285f8
DM
758 if (parser->rsfp == PerlIO_stdin())
759 PerlIO_clearerr(parser->rsfp);
799361c3
SH
760 else if (parser->rsfp && (!parser->old_parser ||
761 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 762 PerlIO_close(parser->rsfp);
5486870f 763 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 764
e3abe207
DM
765 Safefree(parser->lex_brackstack);
766 Safefree(parser->lex_casestack);
767 PL_parser = parser->old_parser;
768 Safefree(parser);
769}
770
771
ffb4593c 772/*
f0e67a1d
Z
773=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
774
775Buffer scalar containing the chunk currently under consideration of the
776text currently being lexed. This is always a plain string scalar (for
777which C<SvPOK> is true). It is not intended to be used as a scalar by
778normal scalar means; instead refer to the buffer directly by the pointer
779variables described below.
780
781The lexer maintains various C<char*> pointers to things in the
782C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
783reallocated, all of these pointers must be updated. Don't attempt to
784do this manually, but rather use L</lex_grow_linestr> if you need to
785reallocate the buffer.
786
787The content of the text chunk in the buffer is commonly exactly one
788complete line of input, up to and including a newline terminator,
789but there are situations where it is otherwise. The octets of the
790buffer may be intended to be interpreted as either UTF-8 or Latin-1.
791The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
792flag on this scalar, which may disagree with it.
793
794For direct examination of the buffer, the variable
795L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
796lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
797of these pointers is usually preferable to examination of the scalar
798through normal scalar means.
799
800=for apidoc AmxU|char *|PL_parser-E<gt>bufend
801
802Direct pointer to the end of the chunk of text currently being lexed, the
803end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
804+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
805always located at the end of the buffer, and does not count as part of
806the buffer's contents.
807
808=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
809
810Points to the current position of lexing inside the lexer buffer.
811Characters around this point may be freely examined, within
812the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
813L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
814interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
815
816Lexing code (whether in the Perl core or not) moves this pointer past
817the characters that it consumes. It is also expected to perform some
818bookkeeping whenever a newline character is consumed. This movement
819can be more conveniently performed by the function L</lex_read_to>,
820which handles newlines appropriately.
821
822Interpretation of the buffer's octets can be abstracted out by
823using the slightly higher-level functions L</lex_peek_unichar> and
824L</lex_read_unichar>.
825
826=for apidoc AmxU|char *|PL_parser-E<gt>linestart
827
828Points to the start of the current line inside the lexer buffer.
829This is useful for indicating at which column an error occurred, and
830not much else. This must be updated by any lexing code that consumes
831a newline; the function L</lex_read_to> handles this detail.
832
833=cut
834*/
835
836/*
837=for apidoc Amx|bool|lex_bufutf8
838
839Indicates whether the octets in the lexer buffer
840(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
841of Unicode characters. If not, they should be interpreted as Latin-1
842characters. This is analogous to the C<SvUTF8> flag for scalars.
843
844In UTF-8 mode, it is not guaranteed that the lexer buffer actually
845contains valid UTF-8. Lexing code must be robust in the face of invalid
846encoding.
847
848The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
849is significant, but not the whole story regarding the input character
850encoding. Normally, when a file is being read, the scalar contains octets
851and its C<SvUTF8> flag is off, but the octets should be interpreted as
852UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
853however, the scalar may have the C<SvUTF8> flag on, and in this case its
854octets should be interpreted as UTF-8 unless the C<use bytes> pragma
855is in effect. This logic may change in the future; use this function
856instead of implementing the logic yourself.
857
858=cut
859*/
860
861bool
862Perl_lex_bufutf8(pTHX)
863{
864 return UTF;
865}
866
867/*
868=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
869
870Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
871at least I<len> octets (including terminating NUL). Returns a
872pointer to the reallocated buffer. This is necessary before making
873any direct modification of the buffer that would increase its length.
874L</lex_stuff_pvn> provides a more convenient way to insert text into
875the buffer.
876
877Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
878this function updates all of the lexer's variables that point directly
879into the buffer.
880
881=cut
882*/
883
884char *
885Perl_lex_grow_linestr(pTHX_ STRLEN len)
886{
887 SV *linestr;
888 char *buf;
889 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
890 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
891 linestr = PL_parser->linestr;
892 buf = SvPVX(linestr);
893 if (len <= SvLEN(linestr))
894 return buf;
895 bufend_pos = PL_parser->bufend - buf;
896 bufptr_pos = PL_parser->bufptr - buf;
897 oldbufptr_pos = PL_parser->oldbufptr - buf;
898 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
899 linestart_pos = PL_parser->linestart - buf;
900 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
901 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
902 buf = sv_grow(linestr, len);
903 PL_parser->bufend = buf + bufend_pos;
904 PL_parser->bufptr = buf + bufptr_pos;
905 PL_parser->oldbufptr = buf + oldbufptr_pos;
906 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
907 PL_parser->linestart = buf + linestart_pos;
908 if (PL_parser->last_uni)
909 PL_parser->last_uni = buf + last_uni_pos;
910 if (PL_parser->last_lop)
911 PL_parser->last_lop = buf + last_lop_pos;
912 return buf;
913}
914
915/*
83aa740e 916=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
917
918Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
919immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
920reallocating the buffer if necessary. This means that lexing code that
921runs later will see the characters as if they had appeared in the input.
922It is not recommended to do this as part of normal parsing, and most
923uses of this facility run the risk of the inserted characters being
924interpreted in an unintended manner.
925
926The string to be inserted is represented by I<len> octets starting
927at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
928according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
929The characters are recoded for the lexer buffer, according to how the
930buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 931to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
932function is more convenient.
933
934=cut
935*/
936
937void
83aa740e 938Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 939{
749123ff 940 dVAR;
f0e67a1d
Z
941 char *bufptr;
942 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
943 if (flags & ~(LEX_STUFF_UTF8))
944 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
945 if (UTF) {
946 if (flags & LEX_STUFF_UTF8) {
947 goto plain_copy;
948 } else {
949 STRLEN highhalf = 0;
83aa740e 950 const char *p, *e = pv+len;
f0e67a1d
Z
951 for (p = pv; p != e; p++)
952 highhalf += !!(((U8)*p) & 0x80);
953 if (!highhalf)
954 goto plain_copy;
955 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
956 bufptr = PL_parser->bufptr;
957 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
958 SvCUR_set(PL_parser->linestr,
959 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
960 PL_parser->bufend += len+highhalf;
961 for (p = pv; p != e; p++) {
962 U8 c = (U8)*p;
963 if (c & 0x80) {
964 *bufptr++ = (char)(0xc0 | (c >> 6));
965 *bufptr++ = (char)(0x80 | (c & 0x3f));
966 } else {
967 *bufptr++ = (char)c;
968 }
969 }
970 }
971 } else {
972 if (flags & LEX_STUFF_UTF8) {
973 STRLEN highhalf = 0;
83aa740e 974 const char *p, *e = pv+len;
f0e67a1d
Z
975 for (p = pv; p != e; p++) {
976 U8 c = (U8)*p;
977 if (c >= 0xc4) {
978 Perl_croak(aTHX_ "Lexing code attempted to stuff "
979 "non-Latin-1 character into Latin-1 input");
980 } else if (c >= 0xc2 && p+1 != e &&
981 (((U8)p[1]) & 0xc0) == 0x80) {
982 p++;
983 highhalf++;
984 } else if (c >= 0x80) {
985 /* malformed UTF-8 */
986 ENTER;
987 SAVESPTR(PL_warnhook);
988 PL_warnhook = PERL_WARNHOOK_FATAL;
989 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
990 LEAVE;
991 }
992 }
993 if (!highhalf)
994 goto plain_copy;
995 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
996 bufptr = PL_parser->bufptr;
997 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
998 SvCUR_set(PL_parser->linestr,
999 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1000 PL_parser->bufend += len-highhalf;
1001 for (p = pv; p != e; p++) {
1002 U8 c = (U8)*p;
1003 if (c & 0x80) {
1004 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1005 p++;
1006 } else {
1007 *bufptr++ = (char)c;
1008 }
1009 }
1010 } else {
1011 plain_copy:
1012 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1013 bufptr = PL_parser->bufptr;
1014 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1015 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1016 PL_parser->bufend += len;
1017 Copy(pv, bufptr, len, char);
1018 }
1019 }
1020}
1021
1022/*
9dcc53ea
Z
1023=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1024
1025Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1026immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1027reallocating the buffer if necessary. This means that lexing code that
1028runs later will see the characters as if they had appeared in the input.
1029It is not recommended to do this as part of normal parsing, and most
1030uses of this facility run the risk of the inserted characters being
1031interpreted in an unintended manner.
1032
1033The string to be inserted is represented by octets starting at I<pv>
1034and continuing to the first nul. These octets are interpreted as either
1035UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1036in I<flags>. The characters are recoded for the lexer buffer, according
1037to how the buffer is currently being interpreted (L</lex_bufutf8>).
1038If it is not convenient to nul-terminate a string to be inserted, the
1039L</lex_stuff_pvn> function is more appropriate.
1040
1041=cut
1042*/
1043
1044void
1045Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1046{
1047 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1048 lex_stuff_pvn(pv, strlen(pv), flags);
1049}
1050
1051/*
f0e67a1d
Z
1052=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1053
1054Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1055immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1056reallocating the buffer if necessary. This means that lexing code that
1057runs later will see the characters as if they had appeared in the input.
1058It is not recommended to do this as part of normal parsing, and most
1059uses of this facility run the risk of the inserted characters being
1060interpreted in an unintended manner.
1061
1062The string to be inserted is the string value of I<sv>. The characters
1063are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1064being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1065not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1066need to construct a scalar.
1067
1068=cut
1069*/
1070
1071void
1072Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1073{
1074 char *pv;
1075 STRLEN len;
1076 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1077 if (flags)
1078 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1079 pv = SvPV(sv, len);
1080 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1081}
1082
1083/*
1084=for apidoc Amx|void|lex_unstuff|char *ptr
1085
1086Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1087I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1088This hides the discarded text from any lexing code that runs later,
1089as if the text had never appeared.
1090
1091This is not the normal way to consume lexed text. For that, use
1092L</lex_read_to>.
1093
1094=cut
1095*/
1096
1097void
1098Perl_lex_unstuff(pTHX_ char *ptr)
1099{
1100 char *buf, *bufend;
1101 STRLEN unstuff_len;
1102 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1103 buf = PL_parser->bufptr;
1104 if (ptr < buf)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1106 if (ptr == buf)
1107 return;
1108 bufend = PL_parser->bufend;
1109 if (ptr > bufend)
1110 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1111 unstuff_len = ptr - buf;
1112 Move(ptr, buf, bufend+1-ptr, char);
1113 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1114 PL_parser->bufend = bufend - unstuff_len;
1115}
1116
1117/*
1118=for apidoc Amx|void|lex_read_to|char *ptr
1119
1120Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1121to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1122performing the correct bookkeeping whenever a newline character is passed.
1123This is the normal way to consume lexed text.
1124
1125Interpretation of the buffer's octets can be abstracted out by
1126using the slightly higher-level functions L</lex_peek_unichar> and
1127L</lex_read_unichar>.
1128
1129=cut
1130*/
1131
1132void
1133Perl_lex_read_to(pTHX_ char *ptr)
1134{
1135 char *s;
1136 PERL_ARGS_ASSERT_LEX_READ_TO;
1137 s = PL_parser->bufptr;
1138 if (ptr < s || ptr > PL_parser->bufend)
1139 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1140 for (; s != ptr; s++)
1141 if (*s == '\n') {
1142 CopLINE_inc(PL_curcop);
1143 PL_parser->linestart = s+1;
1144 }
1145 PL_parser->bufptr = ptr;
1146}
1147
1148/*
1149=for apidoc Amx|void|lex_discard_to|char *ptr
1150
1151Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1152up to I<ptr>. The remaining content of the buffer will be moved, and
1153all pointers into the buffer updated appropriately. I<ptr> must not
1154be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1155it is not permitted to discard text that has yet to be lexed.
1156
1157Normally it is not necessarily to do this directly, because it suffices to
1158use the implicit discarding behaviour of L</lex_next_chunk> and things
1159based on it. However, if a token stretches across multiple lines,
1f317c95 1160and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1161that purpose, then after completion of the token it would be wise to
1162explicitly discard the now-unneeded earlier lines, to avoid future
1163multi-line tokens growing the buffer without bound.
1164
1165=cut
1166*/
1167
1168void
1169Perl_lex_discard_to(pTHX_ char *ptr)
1170{
1171 char *buf;
1172 STRLEN discard_len;
1173 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1174 buf = SvPVX(PL_parser->linestr);
1175 if (ptr < buf)
1176 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1177 if (ptr == buf)
1178 return;
1179 if (ptr > PL_parser->bufptr)
1180 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1181 discard_len = ptr - buf;
1182 if (PL_parser->oldbufptr < ptr)
1183 PL_parser->oldbufptr = ptr;
1184 if (PL_parser->oldoldbufptr < ptr)
1185 PL_parser->oldoldbufptr = ptr;
1186 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1187 PL_parser->last_uni = NULL;
1188 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1189 PL_parser->last_lop = NULL;
1190 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1191 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1192 PL_parser->bufend -= discard_len;
1193 PL_parser->bufptr -= discard_len;
1194 PL_parser->oldbufptr -= discard_len;
1195 PL_parser->oldoldbufptr -= discard_len;
1196 if (PL_parser->last_uni)
1197 PL_parser->last_uni -= discard_len;
1198 if (PL_parser->last_lop)
1199 PL_parser->last_lop -= discard_len;
1200}
1201
1202/*
1203=for apidoc Amx|bool|lex_next_chunk|U32 flags
1204
1205Reads in the next chunk of text to be lexed, appending it to
1206L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1207looked to the end of the current chunk and wants to know more. It is
1208usual, but not necessary, for lexing to have consumed the entirety of
1209the current chunk at this time.
1210
1211If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1212chunk (i.e., the current chunk has been entirely consumed), normally the
1213current chunk will be discarded at the same time that the new chunk is
1214read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1215will not be discarded. If the current chunk has not been entirely
1216consumed, then it will not be discarded regardless of the flag.
1217
1218Returns true if some new text was added to the buffer, or false if the
1219buffer has reached the end of the input text.
1220
1221=cut
1222*/
1223
1224#define LEX_FAKE_EOF 0x80000000
1225
1226bool
1227Perl_lex_next_chunk(pTHX_ U32 flags)
1228{
1229 SV *linestr;
1230 char *buf;
1231 STRLEN old_bufend_pos, new_bufend_pos;
1232 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1233 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1234 bool got_some_for_debugger = 0;
f0e67a1d
Z
1235 bool got_some;
1236 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1237 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1238 linestr = PL_parser->linestr;
1239 buf = SvPVX(linestr);
1240 if (!(flags & LEX_KEEP_PREVIOUS) &&
1241 PL_parser->bufptr == PL_parser->bufend) {
1242 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1243 linestart_pos = 0;
1244 if (PL_parser->last_uni != PL_parser->bufend)
1245 PL_parser->last_uni = NULL;
1246 if (PL_parser->last_lop != PL_parser->bufend)
1247 PL_parser->last_lop = NULL;
1248 last_uni_pos = last_lop_pos = 0;
1249 *buf = 0;
1250 SvCUR(linestr) = 0;
1251 } else {
1252 old_bufend_pos = PL_parser->bufend - buf;
1253 bufptr_pos = PL_parser->bufptr - buf;
1254 oldbufptr_pos = PL_parser->oldbufptr - buf;
1255 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1256 linestart_pos = PL_parser->linestart - buf;
1257 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1258 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1259 }
1260 if (flags & LEX_FAKE_EOF) {
1261 goto eof;
1262 } else if (!PL_parser->rsfp) {
1263 got_some = 0;
1264 } else if (filter_gets(linestr, old_bufend_pos)) {
1265 got_some = 1;
17cc9359 1266 got_some_for_debugger = 1;
f0e67a1d 1267 } else {
580561a3
Z
1268 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1269 sv_setpvs(linestr, "");
f0e67a1d
Z
1270 eof:
1271 /* End of real input. Close filehandle (unless it was STDIN),
1272 * then add implicit termination.
1273 */
1274 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1275 PerlIO_clearerr(PL_parser->rsfp);
1276 else if (PL_parser->rsfp)
1277 (void)PerlIO_close(PL_parser->rsfp);
1278 PL_parser->rsfp = NULL;
737c24fc 1279 PL_parser->in_pod = 0;
f0e67a1d
Z
1280#ifdef PERL_MAD
1281 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1282 PL_faketokens = 1;
1283#endif
1284 if (!PL_in_eval && PL_minus_p) {
1285 sv_catpvs(linestr,
1286 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1287 PL_minus_n = PL_minus_p = 0;
1288 } else if (!PL_in_eval && PL_minus_n) {
1289 sv_catpvs(linestr, /*{*/";}");
1290 PL_minus_n = 0;
1291 } else
1292 sv_catpvs(linestr, ";");
1293 got_some = 1;
1294 }
1295 buf = SvPVX(linestr);
1296 new_bufend_pos = SvCUR(linestr);
1297 PL_parser->bufend = buf + new_bufend_pos;
1298 PL_parser->bufptr = buf + bufptr_pos;
1299 PL_parser->oldbufptr = buf + oldbufptr_pos;
1300 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1301 PL_parser->linestart = buf + linestart_pos;
1302 if (PL_parser->last_uni)
1303 PL_parser->last_uni = buf + last_uni_pos;
1304 if (PL_parser->last_lop)
1305 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1306 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1307 PL_curstash != PL_debstash) {
1308 /* debugger active and we're not compiling the debugger code,
1309 * so store the line into the debugger's array of lines
1310 */
1311 update_debugger_info(NULL, buf+old_bufend_pos,
1312 new_bufend_pos-old_bufend_pos);
1313 }
1314 return got_some;
1315}
1316
1317/*
1318=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1319
1320Looks ahead one (Unicode) character in the text currently being lexed.
1321Returns the codepoint (unsigned integer value) of the next character,
1322or -1 if lexing has reached the end of the input text. To consume the
1323peeked character, use L</lex_read_unichar>.
1324
1325If the next character is in (or extends into) the next chunk of input
1326text, the next chunk will be read in. Normally the current chunk will be
1327discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1328then the current chunk will not be discarded.
1329
1330If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1331is encountered, an exception is generated.
1332
1333=cut
1334*/
1335
1336I32
1337Perl_lex_peek_unichar(pTHX_ U32 flags)
1338{
749123ff 1339 dVAR;
f0e67a1d
Z
1340 char *s, *bufend;
1341 if (flags & ~(LEX_KEEP_PREVIOUS))
1342 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1343 s = PL_parser->bufptr;
1344 bufend = PL_parser->bufend;
1345 if (UTF) {
1346 U8 head;
1347 I32 unichar;
1348 STRLEN len, retlen;
1349 if (s == bufend) {
1350 if (!lex_next_chunk(flags))
1351 return -1;
1352 s = PL_parser->bufptr;
1353 bufend = PL_parser->bufend;
1354 }
1355 head = (U8)*s;
1356 if (!(head & 0x80))
1357 return head;
1358 if (head & 0x40) {
1359 len = PL_utf8skip[head];
1360 while ((STRLEN)(bufend-s) < len) {
1361 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1362 break;
1363 s = PL_parser->bufptr;
1364 bufend = PL_parser->bufend;
1365 }
1366 }
1367 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1368 if (retlen == (STRLEN)-1) {
1369 /* malformed UTF-8 */
1370 ENTER;
1371 SAVESPTR(PL_warnhook);
1372 PL_warnhook = PERL_WARNHOOK_FATAL;
1373 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1374 LEAVE;
1375 }
1376 return unichar;
1377 } else {
1378 if (s == bufend) {
1379 if (!lex_next_chunk(flags))
1380 return -1;
1381 s = PL_parser->bufptr;
1382 }
1383 return (U8)*s;
1384 }
1385}
1386
1387/*
1388=for apidoc Amx|I32|lex_read_unichar|U32 flags
1389
1390Reads the next (Unicode) character in the text currently being lexed.
1391Returns the codepoint (unsigned integer value) of the character read,
1392and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1393if lexing has reached the end of the input text. To non-destructively
1394examine the next character, use L</lex_peek_unichar> instead.
1395
1396If the next character is in (or extends into) the next chunk of input
1397text, the next chunk will be read in. Normally the current chunk will be
1398discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1399then the current chunk will not be discarded.
1400
1401If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1402is encountered, an exception is generated.
1403
1404=cut
1405*/
1406
1407I32
1408Perl_lex_read_unichar(pTHX_ U32 flags)
1409{
1410 I32 c;
1411 if (flags & ~(LEX_KEEP_PREVIOUS))
1412 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1413 c = lex_peek_unichar(flags);
1414 if (c != -1) {
1415 if (c == '\n')
1416 CopLINE_inc(PL_curcop);
1417 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1418 }
1419 return c;
1420}
1421
1422/*
1423=for apidoc Amx|void|lex_read_space|U32 flags
1424
1425Reads optional spaces, in Perl style, in the text currently being
1426lexed. The spaces may include ordinary whitespace characters and
1427Perl-style comments. C<#line> directives are processed if encountered.
1428L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1429at a non-space character (or the end of the input text).
1430
1431If spaces extend into the next chunk of input text, the next chunk will
1432be read in. Normally the current chunk will be discarded at the same
1433time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1434chunk will not be discarded.
1435
1436=cut
1437*/
1438
f0998909
Z
1439#define LEX_NO_NEXT_CHUNK 0x80000000
1440
f0e67a1d
Z
1441void
1442Perl_lex_read_space(pTHX_ U32 flags)
1443{
1444 char *s, *bufend;
1445 bool need_incline = 0;
f0998909 1446 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1447 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1448#ifdef PERL_MAD
1449 if (PL_skipwhite) {
1450 sv_free(PL_skipwhite);
1451 PL_skipwhite = NULL;
1452 }
1453 if (PL_madskills)
1454 PL_skipwhite = newSVpvs("");
1455#endif /* PERL_MAD */
1456 s = PL_parser->bufptr;
1457 bufend = PL_parser->bufend;
1458 while (1) {
1459 char c = *s;
1460 if (c == '#') {
1461 do {
1462 c = *++s;
1463 } while (!(c == '\n' || (c == 0 && s == bufend)));
1464 } else if (c == '\n') {
1465 s++;
1466 PL_parser->linestart = s;
1467 if (s == bufend)
1468 need_incline = 1;
1469 else
1470 incline(s);
1471 } else if (isSPACE(c)) {
1472 s++;
1473 } else if (c == 0 && s == bufend) {
1474 bool got_more;
1475#ifdef PERL_MAD
1476 if (PL_madskills)
1477 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1478#endif /* PERL_MAD */
f0998909
Z
1479 if (flags & LEX_NO_NEXT_CHUNK)
1480 break;
f0e67a1d
Z
1481 PL_parser->bufptr = s;
1482 CopLINE_inc(PL_curcop);
1483 got_more = lex_next_chunk(flags);
1484 CopLINE_dec(PL_curcop);
1485 s = PL_parser->bufptr;
1486 bufend = PL_parser->bufend;
1487 if (!got_more)
1488 break;
1489 if (need_incline && PL_parser->rsfp) {
1490 incline(s);
1491 need_incline = 0;
1492 }
1493 } else {
1494 break;
1495 }
1496 }
1497#ifdef PERL_MAD
1498 if (PL_madskills)
1499 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1500#endif /* PERL_MAD */
1501 PL_parser->bufptr = s;
1502}
1503
1504/*
ffb4593c
NT
1505 * S_incline
1506 * This subroutine has nothing to do with tilting, whether at windmills
1507 * or pinball tables. Its name is short for "increment line". It
57843af0 1508 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1509 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1510 * # line 500 "foo.pm"
1511 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1512 */
1513
76e3520e 1514STATIC void
d9095cec 1515S_incline(pTHX_ const char *s)
463ee0b2 1516{
97aff369 1517 dVAR;
d9095cec
NC
1518 const char *t;
1519 const char *n;
1520 const char *e;
8818d409 1521 line_t line_num;
463ee0b2 1522
7918f24d
NC
1523 PERL_ARGS_ASSERT_INCLINE;
1524
57843af0 1525 CopLINE_inc(PL_curcop);
463ee0b2
LW
1526 if (*s++ != '#')
1527 return;
d4c19fe8
AL
1528 while (SPACE_OR_TAB(*s))
1529 s++;
73659bf1
GS
1530 if (strnEQ(s, "line", 4))
1531 s += 4;
1532 else
1533 return;
084592ab 1534 if (SPACE_OR_TAB(*s))
73659bf1 1535 s++;
4e553d73 1536 else
73659bf1 1537 return;
d4c19fe8
AL
1538 while (SPACE_OR_TAB(*s))
1539 s++;
463ee0b2
LW
1540 if (!isDIGIT(*s))
1541 return;
d4c19fe8 1542
463ee0b2
LW
1543 n = s;
1544 while (isDIGIT(*s))
1545 s++;
07714eb4 1546 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1547 return;
bf4acbe4 1548 while (SPACE_OR_TAB(*s))
463ee0b2 1549 s++;
73659bf1 1550 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1551 s++;
73659bf1
GS
1552 e = t + 1;
1553 }
463ee0b2 1554 else {
c35e046a
AL
1555 t = s;
1556 while (!isSPACE(*t))
1557 t++;
73659bf1 1558 e = t;
463ee0b2 1559 }
bf4acbe4 1560 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1561 e++;
1562 if (*e != '\n' && *e != '\0')
1563 return; /* false alarm */
1564
8818d409
FC
1565 line_num = atoi(n)-1;
1566
f4dd75d9 1567 if (t - s > 0) {
d9095cec 1568 const STRLEN len = t - s;
19bad673
NC
1569 SV *const temp_sv = CopFILESV(PL_curcop);
1570 const char *cf;
1571 STRLEN tmplen;
1572
1573 if (temp_sv) {
1574 cf = SvPVX(temp_sv);
1575 tmplen = SvCUR(temp_sv);
1576 } else {
1577 cf = NULL;
1578 tmplen = 0;
1579 }
1580
42d9b98d 1581 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1582 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1583 * to *{"::_<newfilename"} */
44867030
NC
1584 /* However, the long form of evals is only turned on by the
1585 debugger - usually they're "(eval %lu)" */
1586 char smallbuf[128];
1587 char *tmpbuf;
1588 GV **gvp;
d9095cec 1589 STRLEN tmplen2 = len;
798b63bc 1590 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1591 tmpbuf = smallbuf;
1592 else
2ae0db35 1593 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1594 tmpbuf[0] = '_';
1595 tmpbuf[1] = '<';
2ae0db35 1596 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1597 tmplen += 2;
8a5ee598
RGS
1598 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1599 if (gvp) {
44867030
NC
1600 char *tmpbuf2;
1601 GV *gv2;
1602
1603 if (tmplen2 + 2 <= sizeof smallbuf)
1604 tmpbuf2 = smallbuf;
1605 else
1606 Newx(tmpbuf2, tmplen2 + 2, char);
1607
1608 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1609 /* Either they malloc'd it, or we malloc'd it,
1610 so no prefix is present in ours. */
1611 tmpbuf2[0] = '_';
1612 tmpbuf2[1] = '<';
1613 }
1614
1615 memcpy(tmpbuf2 + 2, s, tmplen2);
1616 tmplen2 += 2;
1617
8a5ee598 1618 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1619 if (!isGV(gv2)) {
8a5ee598 1620 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1621 /* adjust ${"::_<newfilename"} to store the new file name */
1622 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1623 /* The line number may differ. If that is the case,
1624 alias the saved lines that are in the array.
1625 Otherwise alias the whole array. */
1626 if (CopLINE(PL_curcop) == line_num) {
1627 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1628 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1629 }
1630 else if (GvAV(*gvp)) {
1631 AV * const av = GvAV(*gvp);
1632 const I32 start = CopLINE(PL_curcop)+1;
1633 I32 items = AvFILLp(av) - start;
1634 if (items > 0) {
1635 AV * const av2 = GvAVn(gv2);
1636 SV **svp = AvARRAY(av) + start;
1637 I32 l = (I32)line_num+1;
1638 while (items--)
1639 av_store(av2, l++, SvREFCNT_inc(*svp++));
1640 }
1641 }
e5527e4b 1642 }
44867030
NC
1643
1644 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1645 }
e66cf94c 1646 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1647 }
05ec9bb3 1648 CopFILE_free(PL_curcop);
d9095cec 1649 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1650 }
8818d409 1651 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1652}
1653
29595ff2 1654#ifdef PERL_MAD
cd81e915 1655/* skip space before PL_thistoken */
29595ff2
NC
1656
1657STATIC char *
1658S_skipspace0(pTHX_ register char *s)
1659{
7918f24d
NC
1660 PERL_ARGS_ASSERT_SKIPSPACE0;
1661
29595ff2
NC
1662 s = skipspace(s);
1663 if (!PL_madskills)
1664 return s;
cd81e915
NC
1665 if (PL_skipwhite) {
1666 if (!PL_thiswhite)
6b29d1f5 1667 PL_thiswhite = newSVpvs("");
cd81e915
NC
1668 sv_catsv(PL_thiswhite, PL_skipwhite);
1669 sv_free(PL_skipwhite);
1670 PL_skipwhite = 0;
1671 }
1672 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1673 return s;
1674}
1675
cd81e915 1676/* skip space after PL_thistoken */
29595ff2
NC
1677
1678STATIC char *
1679S_skipspace1(pTHX_ register char *s)
1680{
d4c19fe8 1681 const char *start = s;
29595ff2
NC
1682 I32 startoff = start - SvPVX(PL_linestr);
1683
7918f24d
NC
1684 PERL_ARGS_ASSERT_SKIPSPACE1;
1685
29595ff2
NC
1686 s = skipspace(s);
1687 if (!PL_madskills)
1688 return s;
1689 start = SvPVX(PL_linestr) + startoff;
cd81e915 1690 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1691 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1692 PL_thistoken = newSVpvn(tstart, start - tstart);
1693 }
1694 PL_realtokenstart = -1;
1695 if (PL_skipwhite) {
1696 if (!PL_nextwhite)
6b29d1f5 1697 PL_nextwhite = newSVpvs("");
cd81e915
NC
1698 sv_catsv(PL_nextwhite, PL_skipwhite);
1699 sv_free(PL_skipwhite);
1700 PL_skipwhite = 0;
29595ff2
NC
1701 }
1702 return s;
1703}
1704
1705STATIC char *
1706S_skipspace2(pTHX_ register char *s, SV **svp)
1707{
c35e046a
AL
1708 char *start;
1709 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1710 const I32 startoff = s - SvPVX(PL_linestr);
1711
7918f24d
NC
1712 PERL_ARGS_ASSERT_SKIPSPACE2;
1713
29595ff2
NC
1714 s = skipspace(s);
1715 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1716 if (!PL_madskills || !svp)
1717 return s;
1718 start = SvPVX(PL_linestr) + startoff;
cd81e915 1719 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1720 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1721 PL_thistoken = newSVpvn(tstart, start - tstart);
1722 PL_realtokenstart = -1;
29595ff2 1723 }
cd81e915 1724 if (PL_skipwhite) {
29595ff2 1725 if (!*svp)
6b29d1f5 1726 *svp = newSVpvs("");
cd81e915
NC
1727 sv_setsv(*svp, PL_skipwhite);
1728 sv_free(PL_skipwhite);
1729 PL_skipwhite = 0;
29595ff2
NC
1730 }
1731
1732 return s;
1733}
1734#endif
1735
80a702cd 1736STATIC void
15f169a1 1737S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1738{
1739 AV *av = CopFILEAVx(PL_curcop);
1740 if (av) {
b9f83d2f 1741 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1742 if (orig_sv)
1743 sv_setsv(sv, orig_sv);
1744 else
1745 sv_setpvn(sv, buf, len);
80a702cd
RGS
1746 (void)SvIOK_on(sv);
1747 SvIV_set(sv, 0);
1748 av_store(av, (I32)CopLINE(PL_curcop), sv);
1749 }
1750}
1751
ffb4593c
NT
1752/*
1753 * S_skipspace
1754 * Called to gobble the appropriate amount and type of whitespace.
1755 * Skips comments as well.
1756 */
1757
76e3520e 1758STATIC char *
cea2e8a9 1759S_skipspace(pTHX_ register char *s)
a687059c 1760{
5db06880 1761#ifdef PERL_MAD
f0e67a1d
Z
1762 char *start = s;
1763#endif /* PERL_MAD */
7918f24d 1764 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1765#ifdef PERL_MAD
cd81e915
NC
1766 if (PL_skipwhite) {
1767 sv_free(PL_skipwhite);
f0e67a1d 1768 PL_skipwhite = NULL;
5db06880 1769 }
f0e67a1d 1770#endif /* PERL_MAD */
3280af22 1771 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1772 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1773 s++;
f0e67a1d
Z
1774 } else {
1775 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1776 PL_bufptr = s;
f0998909
Z
1777 lex_read_space(LEX_KEEP_PREVIOUS |
1778 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1779 LEX_NO_NEXT_CHUNK : 0));
3280af22 1780 s = PL_bufptr;
f0e67a1d
Z
1781 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1782 if (PL_linestart > PL_bufptr)
1783 PL_bufptr = PL_linestart;
1784 return s;
463ee0b2 1785 }
5db06880 1786#ifdef PERL_MAD
f0e67a1d
Z
1787 if (PL_madskills)
1788 PL_skipwhite = newSVpvn(start, s-start);
1789#endif /* PERL_MAD */
5db06880 1790 return s;
a687059c 1791}
378cc40b 1792
ffb4593c
NT
1793/*
1794 * S_check_uni
1795 * Check the unary operators to ensure there's no ambiguity in how they're
1796 * used. An ambiguous piece of code would be:
1797 * rand + 5
1798 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1799 * the +5 is its argument.
1800 */
1801
76e3520e 1802STATIC void
cea2e8a9 1803S_check_uni(pTHX)
ba106d47 1804{
97aff369 1805 dVAR;
d4c19fe8
AL
1806 const char *s;
1807 const char *t;
2f3197b3 1808
3280af22 1809 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1810 return;
3280af22
NIS
1811 while (isSPACE(*PL_last_uni))
1812 PL_last_uni++;
c35e046a
AL
1813 s = PL_last_uni;
1814 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1815 s++;
3280af22 1816 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1817 return;
6136c704 1818
9b387841
NC
1819 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1820 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1821 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1822}
1823
ffb4593c
NT
1824/*
1825 * LOP : macro to build a list operator. Its behaviour has been replaced
1826 * with a subroutine, S_lop() for which LOP is just another name.
1827 */
1828
a0d0e21e
LW
1829#define LOP(f,x) return lop(f,x,s)
1830
ffb4593c
NT
1831/*
1832 * S_lop
1833 * Build a list operator (or something that might be one). The rules:
1834 * - if we have a next token, then it's a list operator [why?]
1835 * - if the next thing is an opening paren, then it's a function
1836 * - else it's a list operator
1837 */
1838
76e3520e 1839STATIC I32
a0be28da 1840S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1841{
97aff369 1842 dVAR;
7918f24d
NC
1843
1844 PERL_ARGS_ASSERT_LOP;
1845
6154021b 1846 pl_yylval.ival = f;
35c8bce7 1847 CLINE;
3280af22
NIS
1848 PL_expect = x;
1849 PL_bufptr = s;
1850 PL_last_lop = PL_oldbufptr;
eb160463 1851 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1852#ifdef PERL_MAD
1853 if (PL_lasttoke)
78cdf107 1854 goto lstop;
5db06880 1855#else
3280af22 1856 if (PL_nexttoke)
78cdf107 1857 goto lstop;
5db06880 1858#endif
79072805 1859 if (*s == '(')
bbf60fe6 1860 return REPORT(FUNC);
29595ff2 1861 s = PEEKSPACE(s);
79072805 1862 if (*s == '(')
bbf60fe6 1863 return REPORT(FUNC);
78cdf107
Z
1864 else {
1865 lstop:
1866 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1867 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1868 return REPORT(LSTOP);
78cdf107 1869 }
79072805
LW
1870}
1871
5db06880
NC
1872#ifdef PERL_MAD
1873 /*
1874 * S_start_force
1875 * Sets up for an eventual force_next(). start_force(0) basically does
1876 * an unshift, while start_force(-1) does a push. yylex removes items
1877 * on the "pop" end.
1878 */
1879
1880STATIC void
1881S_start_force(pTHX_ int where)
1882{
1883 int i;
1884
cd81e915 1885 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1886 where = PL_lasttoke;
cd81e915
NC
1887 assert(PL_curforce < 0 || PL_curforce == where);
1888 if (PL_curforce != where) {
5db06880
NC
1889 for (i = PL_lasttoke; i > where; --i) {
1890 PL_nexttoke[i] = PL_nexttoke[i-1];
1891 }
1892 PL_lasttoke++;
1893 }
cd81e915 1894 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1895 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1896 PL_curforce = where;
1897 if (PL_nextwhite) {
5db06880 1898 if (PL_madskills)
6b29d1f5 1899 curmad('^', newSVpvs(""));
cd81e915 1900 CURMAD('_', PL_nextwhite);
5db06880
NC
1901 }
1902}
1903
1904STATIC void
1905S_curmad(pTHX_ char slot, SV *sv)
1906{
1907 MADPROP **where;
1908
1909 if (!sv)
1910 return;
cd81e915
NC
1911 if (PL_curforce < 0)
1912 where = &PL_thismad;
5db06880 1913 else
cd81e915 1914 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1915
cd81e915 1916 if (PL_faketokens)
76f68e9b 1917 sv_setpvs(sv, "");
5db06880
NC
1918 else {
1919 if (!IN_BYTES) {
1920 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1921 SvUTF8_on(sv);
1922 else if (PL_encoding) {
1923 sv_recode_to_utf8(sv, PL_encoding);
1924 }
1925 }
1926 }
1927
1928 /* keep a slot open for the head of the list? */
1929 if (slot != '_' && *where && (*where)->mad_key == '^') {
1930 (*where)->mad_key = slot;
daba3364 1931 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1932 (*where)->mad_val = (void*)sv;
1933 }
1934 else
1935 addmad(newMADsv(slot, sv), where, 0);
1936}
1937#else
b3f24c00
MHM
1938# define start_force(where) NOOP
1939# define curmad(slot, sv) NOOP
5db06880
NC
1940#endif
1941
ffb4593c
NT
1942/*
1943 * S_force_next
9cbb5ea2 1944 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1945 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1946 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1947 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1948 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1949 */
1950
4e553d73 1951STATIC void
cea2e8a9 1952S_force_next(pTHX_ I32 type)
79072805 1953{
97aff369 1954 dVAR;
704d4215
GG
1955#ifdef DEBUGGING
1956 if (DEBUG_T_TEST) {
1957 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1958 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1959 }
1960#endif
5db06880 1961#ifdef PERL_MAD
cd81e915 1962 if (PL_curforce < 0)
5db06880 1963 start_force(PL_lasttoke);
cd81e915 1964 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1965 if (PL_lex_state != LEX_KNOWNEXT)
1966 PL_lex_defer = PL_lex_state;
1967 PL_lex_state = LEX_KNOWNEXT;
1968 PL_lex_expect = PL_expect;
cd81e915 1969 PL_curforce = -1;
5db06880 1970#else
3280af22
NIS
1971 PL_nexttype[PL_nexttoke] = type;
1972 PL_nexttoke++;
1973 if (PL_lex_state != LEX_KNOWNEXT) {
1974 PL_lex_defer = PL_lex_state;
1975 PL_lex_expect = PL_expect;
1976 PL_lex_state = LEX_KNOWNEXT;
79072805 1977 }
5db06880 1978#endif
79072805
LW
1979}
1980
28ac2b49
Z
1981void
1982Perl_yyunlex(pTHX)
1983{
a7aaec61
Z
1984 int yyc = PL_parser->yychar;
1985 if (yyc != YYEMPTY) {
1986 if (yyc) {
1987 start_force(-1);
1988 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1989 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1990 PL_lex_allbrackets--;
a7aaec61 1991 PL_lex_brackets--;
78cdf107
Z
1992 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1993 } else if (yyc == '('/*)*/) {
1994 PL_lex_allbrackets--;
1995 yyc |= (2<<24);
a7aaec61
Z
1996 }
1997 force_next(yyc);
1998 }
28ac2b49
Z
1999 PL_parser->yychar = YYEMPTY;
2000 }
2001}
2002
d0a148a6 2003STATIC SV *
15f169a1 2004S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2005{
97aff369 2006 dVAR;
740cce10 2007 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2008 !IN_BYTES
2009 && UTF
2010 && !is_ascii_string((const U8*)start, len)
740cce10 2011 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2012 return sv;
2013}
2014
ffb4593c
NT
2015/*
2016 * S_force_word
2017 * When the lexer knows the next thing is a word (for instance, it has
2018 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2019 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2020 * lookahead.
ffb4593c
NT
2021 *
2022 * Arguments:
b1b65b59 2023 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2024 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2025 * int check_keyword : if true, Perl checks to make sure the word isn't
2026 * a keyword (do this if the word is a label, e.g. goto FOO)
2027 * int allow_pack : if true, : characters will also be allowed (require,
2028 * use, etc. do this)
9cbb5ea2 2029 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2030 */
2031
76e3520e 2032STATIC char *
cea2e8a9 2033S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2034{
97aff369 2035 dVAR;
463ee0b2
LW
2036 register char *s;
2037 STRLEN len;
4e553d73 2038
7918f24d
NC
2039 PERL_ARGS_ASSERT_FORCE_WORD;
2040
29595ff2 2041 start = SKIPSPACE1(start);
463ee0b2 2042 s = start;
7e2040f0 2043 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2044 (allow_pack && *s == ':') ||
15f0808c 2045 (allow_initial_tick && *s == '\'') )
a0d0e21e 2046 {
3280af22 2047 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2048 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2049 return start;
cd81e915 2050 start_force(PL_curforce);
5db06880
NC
2051 if (PL_madskills)
2052 curmad('X', newSVpvn(start,s-start));
463ee0b2 2053 if (token == METHOD) {
29595ff2 2054 s = SKIPSPACE1(s);
463ee0b2 2055 if (*s == '(')
3280af22 2056 PL_expect = XTERM;
463ee0b2 2057 else {
3280af22 2058 PL_expect = XOPERATOR;
463ee0b2 2059 }
79072805 2060 }
e74e6b3d 2061 if (PL_madskills)
63575281 2062 curmad('g', newSVpvs( "forced" ));
9ded7720 2063 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2064 = (OP*)newSVOP(OP_CONST,0,
2065 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2066 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2067 force_next(token);
2068 }
2069 return s;
2070}
2071
ffb4593c
NT
2072/*
2073 * S_force_ident
9cbb5ea2 2074 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2075 * text only contains the "foo" portion. The first argument is a pointer
2076 * to the "foo", and the second argument is the type symbol to prefix.
2077 * Forces the next token to be a "WORD".
9cbb5ea2 2078 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2079 */
2080
76e3520e 2081STATIC void
bfed75c6 2082S_force_ident(pTHX_ register const char *s, int kind)
79072805 2083{
97aff369 2084 dVAR;
7918f24d
NC
2085
2086 PERL_ARGS_ASSERT_FORCE_IDENT;
2087
c35e046a 2088 if (*s) {
90e5519e
NC
2089 const STRLEN len = strlen(s);
2090 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2091 start_force(PL_curforce);
9ded7720 2092 NEXTVAL_NEXTTOKE.opval = o;
79072805 2093 force_next(WORD);
748a9306 2094 if (kind) {
11343788 2095 o->op_private = OPpCONST_ENTERED;
55497cff 2096 /* XXX see note in pp_entereval() for why we forgo typo
2097 warnings if the symbol must be introduced in an eval.
2098 GSAR 96-10-12 */
90e5519e
NC
2099 gv_fetchpvn_flags(s, len,
2100 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2101 : GV_ADD,
2102 kind == '$' ? SVt_PV :
2103 kind == '@' ? SVt_PVAV :
2104 kind == '%' ? SVt_PVHV :
a0d0e21e 2105 SVt_PVGV
90e5519e 2106 );
748a9306 2107 }
79072805
LW
2108 }
2109}
2110
1571675a
GS
2111NV
2112Perl_str_to_version(pTHX_ SV *sv)
2113{
2114 NV retval = 0.0;
2115 NV nshift = 1.0;
2116 STRLEN len;
cfd0369c 2117 const char *start = SvPV_const(sv,len);
9d4ba2ae 2118 const char * const end = start + len;
504618e9 2119 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2120
2121 PERL_ARGS_ASSERT_STR_TO_VERSION;
2122
1571675a 2123 while (start < end) {
ba210ebe 2124 STRLEN skip;
1571675a
GS
2125 UV n;
2126 if (utf)
9041c2e3 2127 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2128 else {
2129 n = *(U8*)start;
2130 skip = 1;
2131 }
2132 retval += ((NV)n)/nshift;
2133 start += skip;
2134 nshift *= 1000;
2135 }
2136 return retval;
2137}
2138
4e553d73 2139/*
ffb4593c
NT
2140 * S_force_version
2141 * Forces the next token to be a version number.
e759cc13
RGS
2142 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2143 * and if "guessing" is TRUE, then no new token is created (and the caller
2144 * must use an alternative parsing method).
ffb4593c
NT
2145 */
2146
76e3520e 2147STATIC char *
e759cc13 2148S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2149{
97aff369 2150 dVAR;
5f66b61c 2151 OP *version = NULL;
44dcb63b 2152 char *d;
5db06880
NC
2153#ifdef PERL_MAD
2154 I32 startoff = s - SvPVX(PL_linestr);
2155#endif
89bfa8cd 2156
7918f24d
NC
2157 PERL_ARGS_ASSERT_FORCE_VERSION;
2158
29595ff2 2159 s = SKIPSPACE1(s);
89bfa8cd 2160
44dcb63b 2161 d = s;
dd629d5b 2162 if (*d == 'v')
44dcb63b 2163 d++;
44dcb63b 2164 if (isDIGIT(*d)) {
e759cc13
RGS
2165 while (isDIGIT(*d) || *d == '_' || *d == '.')
2166 d++;
5db06880
NC
2167#ifdef PERL_MAD
2168 if (PL_madskills) {
cd81e915 2169 start_force(PL_curforce);
5db06880
NC
2170 curmad('X', newSVpvn(s,d-s));
2171 }
2172#endif
4e4da3ac 2173 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2174 SV *ver;
8d08d9ba
DG
2175#ifdef USE_LOCALE_NUMERIC
2176 char *loc = setlocale(LC_NUMERIC, "C");
2177#endif
6154021b 2178 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2179#ifdef USE_LOCALE_NUMERIC
2180 setlocale(LC_NUMERIC, loc);
2181#endif
6154021b 2182 version = pl_yylval.opval;
dd629d5b
GS
2183 ver = cSVOPx(version)->op_sv;
2184 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2185 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2186 SvNV_set(ver, str_to_version(ver));
1571675a 2187 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2188 }
89bfa8cd 2189 }
5db06880
NC
2190 else if (guessing) {
2191#ifdef PERL_MAD
2192 if (PL_madskills) {
cd81e915
NC
2193 sv_free(PL_nextwhite); /* let next token collect whitespace */
2194 PL_nextwhite = 0;
5db06880
NC
2195 s = SvPVX(PL_linestr) + startoff;
2196 }
2197#endif
e759cc13 2198 return s;
5db06880 2199 }
89bfa8cd 2200 }
2201
5db06880
NC
2202#ifdef PERL_MAD
2203 if (PL_madskills && !version) {
cd81e915
NC
2204 sv_free(PL_nextwhite); /* let next token collect whitespace */
2205 PL_nextwhite = 0;
5db06880
NC
2206 s = SvPVX(PL_linestr) + startoff;
2207 }
2208#endif
89bfa8cd 2209 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2210 start_force(PL_curforce);
9ded7720 2211 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2212 force_next(WORD);
89bfa8cd 2213
e759cc13 2214 return s;
89bfa8cd 2215}
2216
ffb4593c 2217/*
91152fc1
DG
2218 * S_force_strict_version
2219 * Forces the next token to be a version number using strict syntax rules.
2220 */
2221
2222STATIC char *
2223S_force_strict_version(pTHX_ char *s)
2224{
2225 dVAR;
2226 OP *version = NULL;
2227#ifdef PERL_MAD
2228 I32 startoff = s - SvPVX(PL_linestr);
2229#endif
2230 const char *errstr = NULL;
2231
2232 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2233
2234 while (isSPACE(*s)) /* leading whitespace */
2235 s++;
2236
2237 if (is_STRICT_VERSION(s,&errstr)) {
2238 SV *ver = newSV(0);
2239 s = (char *)scan_version(s, ver, 0);
2240 version = newSVOP(OP_CONST, 0, ver);
2241 }
4e4da3ac
Z
2242 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2243 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2244 {
91152fc1
DG
2245 PL_bufptr = s;
2246 if (errstr)
2247 yyerror(errstr); /* version required */
2248 return s;
2249 }
2250
2251#ifdef PERL_MAD
2252 if (PL_madskills && !version) {
2253 sv_free(PL_nextwhite); /* let next token collect whitespace */
2254 PL_nextwhite = 0;
2255 s = SvPVX(PL_linestr) + startoff;
2256 }
2257#endif
2258 /* NOTE: The parser sees the package name and the VERSION swapped */
2259 start_force(PL_curforce);
2260 NEXTVAL_NEXTTOKE.opval = version;
2261 force_next(WORD);
2262
2263 return s;
2264}
2265
2266/*
ffb4593c
NT
2267 * S_tokeq
2268 * Tokenize a quoted string passed in as an SV. It finds the next
2269 * chunk, up to end of string or a backslash. It may make a new
2270 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2271 * turns \\ into \.
2272 */
2273
76e3520e 2274STATIC SV *
cea2e8a9 2275S_tokeq(pTHX_ SV *sv)
79072805 2276{
97aff369 2277 dVAR;
79072805
LW
2278 register char *s;
2279 register char *send;
2280 register char *d;
b3ac6de7
IZ
2281 STRLEN len = 0;
2282 SV *pv = sv;
79072805 2283
7918f24d
NC
2284 PERL_ARGS_ASSERT_TOKEQ;
2285
79072805 2286 if (!SvLEN(sv))
b3ac6de7 2287 goto finish;
79072805 2288
a0d0e21e 2289 s = SvPV_force(sv, len);
21a311ee 2290 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2291 goto finish;
463ee0b2 2292 send = s + len;
dcb21ed6
NC
2293 /* This is relying on the SV being "well formed" with a trailing '\0' */
2294 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2295 s++;
2296 if (s == send)
b3ac6de7 2297 goto finish;
79072805 2298 d = s;
be4731d2 2299 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2300 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2301 }
79072805
LW
2302 while (s < send) {
2303 if (*s == '\\') {
a0d0e21e 2304 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2305 s++; /* all that, just for this */
2306 }
2307 *d++ = *s++;
2308 }
2309 *d = '\0';
95a20fc0 2310 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2311 finish:
3280af22 2312 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2313 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2314 return sv;
2315}
2316
ffb4593c
NT
2317/*
2318 * Now come three functions related to double-quote context,
2319 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2320 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2321 * interact with PL_lex_state, and create fake ( ... ) argument lists
2322 * to handle functions and concatenation.
2323 * They assume that whoever calls them will be setting up a fake
2324 * join call, because each subthing puts a ',' after it. This lets
2325 * "lower \luPpEr"
2326 * become
2327 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2328 *
2329 * (I'm not sure whether the spurious commas at the end of lcfirst's
2330 * arguments and join's arguments are created or not).
2331 */
2332
2333/*
2334 * S_sublex_start
6154021b 2335 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2336 *
2337 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2338 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2339 *
2340 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2341 *
2342 * Everything else becomes a FUNC.
2343 *
2344 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2345 * had an OP_CONST or OP_READLINE). This just sets us up for a
2346 * call to S_sublex_push().
2347 */
2348
76e3520e 2349STATIC I32
cea2e8a9 2350S_sublex_start(pTHX)
79072805 2351{
97aff369 2352 dVAR;
6154021b 2353 register const I32 op_type = pl_yylval.ival;
79072805
LW
2354
2355 if (op_type == OP_NULL) {
6154021b 2356 pl_yylval.opval = PL_lex_op;
5f66b61c 2357 PL_lex_op = NULL;
79072805
LW
2358 return THING;
2359 }
2360 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2361 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2362
2363 if (SvTYPE(sv) == SVt_PVIV) {
2364 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2365 STRLEN len;
96a5add6 2366 const char * const p = SvPV_const(sv, len);
740cce10 2367 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2368 SvREFCNT_dec(sv);
2369 sv = nsv;
4e553d73 2370 }
6154021b 2371 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2372 PL_lex_stuff = NULL;
6f33ba73
RGS
2373 /* Allow <FH> // "foo" */
2374 if (op_type == OP_READLINE)
2375 PL_expect = XTERMORDORDOR;
79072805
LW
2376 return THING;
2377 }
e3f73d4e
RGS
2378 else if (op_type == OP_BACKTICK && PL_lex_op) {
2379 /* readpipe() vas overriden */
2380 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2381 pl_yylval.opval = PL_lex_op;
9b201d7d 2382 PL_lex_op = NULL;
e3f73d4e
RGS
2383 PL_lex_stuff = NULL;
2384 return THING;
2385 }
79072805 2386
3280af22 2387 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2388 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2389 PL_sublex_info.sub_op = PL_lex_op;
2390 PL_lex_state = LEX_INTERPPUSH;
55497cff 2391
3280af22
NIS
2392 PL_expect = XTERM;
2393 if (PL_lex_op) {
6154021b 2394 pl_yylval.opval = PL_lex_op;
5f66b61c 2395 PL_lex_op = NULL;
55497cff 2396 return PMFUNC;
2397 }
2398 else
2399 return FUNC;
2400}
2401
ffb4593c
NT
2402/*
2403 * S_sublex_push
2404 * Create a new scope to save the lexing state. The scope will be
2405 * ended in S_sublex_done. Returns a '(', starting the function arguments
2406 * to the uc, lc, etc. found before.
2407 * Sets PL_lex_state to LEX_INTERPCONCAT.
2408 */
2409
76e3520e 2410STATIC I32
cea2e8a9 2411S_sublex_push(pTHX)
55497cff 2412{
27da23d5 2413 dVAR;
f46d017c 2414 ENTER;
55497cff 2415
3280af22 2416 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2417 SAVEBOOL(PL_lex_dojoin);
3280af22 2418 SAVEI32(PL_lex_brackets);
78cdf107
Z
2419 SAVEI32(PL_lex_allbrackets);
2420 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2421 SAVEI32(PL_lex_casemods);
2422 SAVEI32(PL_lex_starts);
651b5b28 2423 SAVEI8(PL_lex_state);
7766f137 2424 SAVEVPTR(PL_lex_inpat);
98246f1e 2425 SAVEI16(PL_lex_inwhat);
57843af0 2426 SAVECOPLINE(PL_curcop);
3280af22 2427 SAVEPPTR(PL_bufptr);
8452ff4b 2428 SAVEPPTR(PL_bufend);
3280af22
NIS
2429 SAVEPPTR(PL_oldbufptr);
2430 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2431 SAVEPPTR(PL_last_lop);
2432 SAVEPPTR(PL_last_uni);
3280af22
NIS
2433 SAVEPPTR(PL_linestart);
2434 SAVESPTR(PL_linestr);
8edd5f42
RGS
2435 SAVEGENERICPV(PL_lex_brackstack);
2436 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2437
2438 PL_linestr = PL_lex_stuff;
a0714e2c 2439 PL_lex_stuff = NULL;
3280af22 2440
9cbb5ea2
GS
2441 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2442 = SvPVX(PL_linestr);
3280af22 2443 PL_bufend += SvCUR(PL_linestr);
bd61b366 2444 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2445 SAVEFREESV(PL_linestr);
2446
2447 PL_lex_dojoin = FALSE;
2448 PL_lex_brackets = 0;
78cdf107
Z
2449 PL_lex_allbrackets = 0;
2450 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2451 Newx(PL_lex_brackstack, 120, char);
2452 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2453 PL_lex_casemods = 0;
2454 *PL_lex_casestack = '\0';
2455 PL_lex_starts = 0;
2456 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2457 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2458
2459 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2460 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2461 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2462 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2463 else
5f66b61c 2464 PL_lex_inpat = NULL;
79072805 2465
55497cff 2466 return '(';
79072805
LW
2467}
2468
ffb4593c
NT
2469/*
2470 * S_sublex_done
2471 * Restores lexer state after a S_sublex_push.
2472 */
2473
76e3520e 2474STATIC I32
cea2e8a9 2475S_sublex_done(pTHX)
79072805 2476{
27da23d5 2477 dVAR;
3280af22 2478 if (!PL_lex_starts++) {
396482e1 2479 SV * const sv = newSVpvs("");
9aa983d2
JH
2480 if (SvUTF8(PL_linestr))
2481 SvUTF8_on(sv);
3280af22 2482 PL_expect = XOPERATOR;
6154021b 2483 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2484 return THING;
2485 }
2486
3280af22
NIS
2487 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2488 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2489 return yylex();
79072805
LW
2490 }
2491
ffb4593c 2492 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2493 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2494 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2495 PL_linestr = PL_lex_repl;
2496 PL_lex_inpat = 0;
2497 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2498 PL_bufend += SvCUR(PL_linestr);
bd61b366 2499 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2500 SAVEFREESV(PL_linestr);
2501 PL_lex_dojoin = FALSE;
2502 PL_lex_brackets = 0;
78cdf107
Z
2503 PL_lex_allbrackets = 0;
2504 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2505 PL_lex_casemods = 0;
2506 *PL_lex_casestack = '\0';
2507 PL_lex_starts = 0;
25da4f38 2508 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2509 PL_lex_state = LEX_INTERPNORMAL;
2510 PL_lex_starts++;
e9fa98b2
HS
2511 /* we don't clear PL_lex_repl here, so that we can check later
2512 whether this is an evalled subst; that means we rely on the
2513 logic to ensure sublex_done() is called again only via the
2514 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2515 }
e9fa98b2 2516 else {
3280af22 2517 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2518 PL_lex_repl = NULL;
e9fa98b2 2519 }
79072805 2520 return ',';
ffed7fef
LW
2521 }
2522 else {
5db06880
NC
2523#ifdef PERL_MAD
2524 if (PL_madskills) {
cd81e915
NC
2525 if (PL_thiswhite) {
2526 if (!PL_endwhite)
6b29d1f5 2527 PL_endwhite = newSVpvs("");
cd81e915
NC
2528 sv_catsv(PL_endwhite, PL_thiswhite);
2529 PL_thiswhite = 0;
2530 }
2531 if (PL_thistoken)
76f68e9b 2532 sv_setpvs(PL_thistoken,"");
5db06880 2533 else
cd81e915 2534 PL_realtokenstart = -1;
5db06880
NC
2535 }
2536#endif
f46d017c 2537 LEAVE;
3280af22
NIS
2538 PL_bufend = SvPVX(PL_linestr);
2539 PL_bufend += SvCUR(PL_linestr);
2540 PL_expect = XOPERATOR;
09bef843 2541 PL_sublex_info.sub_inwhat = 0;
79072805 2542 return ')';
ffed7fef
LW
2543 }
2544}
2545
02aa26ce
NT
2546/*
2547 scan_const
2548
2549 Extracts a pattern, double-quoted string, or transliteration. This
2550 is terrifying code.
2551
94def140 2552 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2553 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2554 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2555
94def140
TS
2556 Returns a pointer to the character scanned up to. If this is
2557 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2558 successfully parsed), will leave an OP for the substring scanned
6154021b 2559 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2560 by looking at the next characters herself.
2561
02aa26ce
NT
2562 In patterns:
2563 backslashes:
ff3f963a 2564 constants: \N{NAME} only
02aa26ce
NT
2565 case and quoting: \U \Q \E
2566 stops on @ and $, but not for $ as tail anchor
2567
2568 In transliterations:
2569 characters are VERY literal, except for - not at the start or end
94def140
TS
2570 of the string, which indicates a range. If the range is in bytes,
2571 scan_const expands the range to the full set of intermediate
2572 characters. If the range is in utf8, the hyphen is replaced with
2573 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2574
2575 In double-quoted strings:
2576 backslashes:
2577 double-quoted style: \r and \n
ff3f963a 2578 constants: \x31, etc.
94def140 2579 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2580 case and quoting: \U \Q \E
2581 stops on @ and $
2582
2583 scan_const does *not* construct ops to handle interpolated strings.
2584 It stops processing as soon as it finds an embedded $ or @ variable
2585 and leaves it to the caller to work out what's going on.
2586
94def140
TS
2587 embedded arrays (whether in pattern or not) could be:
2588 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2589
2590 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2591
2592 $ in pattern could be $foo or could be tail anchor. Assumption:
2593 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2594 followed by one of "()| \r\n\t"
02aa26ce
NT
2595
2596 \1 (backreferences) are turned into $1
2597
2598 The structure of the code is
2599 while (there's a character to process) {
94def140
TS
2600 handle transliteration ranges
2601 skip regexp comments /(?#comment)/ and codes /(?{code})/
2602 skip #-initiated comments in //x patterns
2603 check for embedded arrays
02aa26ce
NT
2604 check for embedded scalars
2605 if (backslash) {
94def140 2606 deprecate \1 in substitution replacements
02aa26ce
NT
2607 handle string-changing backslashes \l \U \Q \E, etc.
2608 switch (what was escaped) {
94def140 2609 handle \- in a transliteration (becomes a literal -)
ff3f963a 2610 if a pattern and not \N{, go treat as regular character
94def140
TS
2611 handle \132 (octal characters)
2612 handle \x15 and \x{1234} (hex characters)
ff3f963a 2613 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2614 handle \cV (control characters)
2615 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2616 } (end switch)
77a135fe 2617 continue
02aa26ce 2618 } (end if backslash)
77a135fe 2619 handle regular character
02aa26ce 2620 } (end while character to read)
4e553d73 2621
02aa26ce
NT
2622*/
2623
76e3520e 2624STATIC char *
cea2e8a9 2625S_scan_const(pTHX_ char *start)
79072805 2626{
97aff369 2627 dVAR;
3280af22 2628 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2629 SV *sv = newSV(send - start); /* sv for the constant. See
2630 note below on sizing. */
02aa26ce
NT
2631 register char *s = start; /* start of the constant */
2632 register char *d = SvPVX(sv); /* destination for copies */
2633 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2634 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2635 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2636 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2637 to be UTF8? But, this can
2638 show as true when the source
2639 isn't utf8, as for example
2640 when it is entirely composed
2641 of hex constants */
2642
2643 /* Note on sizing: The scanned constant is placed into sv, which is
2644 * initialized by newSV() assuming one byte of output for every byte of
2645 * input. This routine expects newSV() to allocate an extra byte for a
2646 * trailing NUL, which this routine will append if it gets to the end of
2647 * the input. There may be more bytes of input than output (eg., \N{LATIN
2648 * CAPITAL LETTER A}), or more output than input if the constant ends up
2649 * recoded to utf8, but each time a construct is found that might increase
2650 * the needed size, SvGROW() is called. Its size parameter each time is
2651 * based on the best guess estimate at the time, namely the length used so
2652 * far, plus the length the current construct will occupy, plus room for
2653 * the trailing NUL, plus one byte for every input byte still unscanned */
2654
012bcf8d 2655 UV uv;
4c3a8340
TS
2656#ifdef EBCDIC
2657 UV literal_endpoint = 0;
e294cc5d 2658 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2659#endif
012bcf8d 2660
7918f24d
NC
2661 PERL_ARGS_ASSERT_SCAN_CONST;
2662
bb16bae8 2663 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2664 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2665 /* If we are doing a trans and we know we want UTF8 set expectation */
2666 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2667 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2668 }
2669
2670
79072805 2671 while (s < send || dorange) {
ff3f963a 2672
02aa26ce 2673 /* get transliterations out of the way (they're most literal) */
3280af22 2674 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2675 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2676 if (dorange) {
1ba5c669
JH
2677 I32 i; /* current expanded character */
2678 I32 min; /* first character in range */
2679 I32 max; /* last character in range */
02aa26ce 2680
e294cc5d
JH
2681#ifdef EBCDIC
2682 UV uvmax = 0;
2683#endif
2684
2685 if (has_utf8
2686#ifdef EBCDIC
2687 && !native_range
2688#endif
2689 ) {
9d4ba2ae 2690 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2691 char *e = d++;
2692 while (e-- > c)
2693 *(e + 1) = *e;
25716404 2694 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2695 /* mark the range as done, and continue */
2696 dorange = FALSE;
2697 didrange = TRUE;
2698 continue;
2699 }
2b9d42f0 2700
95a20fc0 2701 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2702#ifdef EBCDIC
2703 SvGROW(sv,
2704 SvLEN(sv) + (has_utf8 ?
2705 (512 - UTF_CONTINUATION_MARK +
2706 UNISKIP(0x100))
2707 : 256));
2708 /* How many two-byte within 0..255: 128 in UTF-8,
2709 * 96 in UTF-8-mod. */
2710#else
9cbb5ea2 2711 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2712#endif
9cbb5ea2 2713 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2714#ifdef EBCDIC
2715 if (has_utf8) {
2716 int j;
2717 for (j = 0; j <= 1; j++) {
2718 char * const c = (char*)utf8_hop((U8*)d, -1);
2719 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2720 if (j)
2721 min = (U8)uv;
2722 else if (uv < 256)
2723 max = (U8)uv;
2724 else {
2725 max = (U8)0xff; /* only to \xff */
2726 uvmax = uv; /* \x{100} to uvmax */
2727 }
2728 d = c; /* eat endpoint chars */
2729 }
2730 }
2731 else {
2732#endif
2733 d -= 2; /* eat the first char and the - */
2734 min = (U8)*d; /* first char in range */
2735 max = (U8)d[1]; /* last char in range */
2736#ifdef EBCDIC
2737 }
2738#endif
8ada0baa 2739
c2e66d9e 2740 if (min > max) {
01ec43d0 2741 Perl_croak(aTHX_
d1573ac7 2742 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2743 (char)min, (char)max);
c2e66d9e
GS
2744 }
2745
c7f1f016 2746#ifdef EBCDIC
4c3a8340
TS
2747 if (literal_endpoint == 2 &&
2748 ((isLOWER(min) && isLOWER(max)) ||
2749 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2750 if (isLOWER(min)) {
2751 for (i = min; i <= max; i++)
2752 if (isLOWER(i))
db42d148 2753 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2754 } else {
2755 for (i = min; i <= max; i++)
2756 if (isUPPER(i))
db42d148 2757 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2758 }
2759 }
2760 else
2761#endif
2762 for (i = min; i <= max; i++)
e294cc5d
JH
2763#ifdef EBCDIC
2764 if (has_utf8) {
2765 const U8 ch = (U8)NATIVE_TO_UTF(i);
2766 if (UNI_IS_INVARIANT(ch))
2767 *d++ = (U8)i;
2768 else {
2769 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2770 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2771 }
2772 }
2773 else
2774#endif
2775 *d++ = (char)i;
2776
2777#ifdef EBCDIC
2778 if (uvmax) {
2779 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2780 if (uvmax > 0x101)
2781 *d++ = (char)UTF_TO_NATIVE(0xff);
2782 if (uvmax > 0x100)
2783 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2784 }
2785#endif
02aa26ce
NT
2786
2787 /* mark the range as done, and continue */
79072805 2788 dorange = FALSE;
01ec43d0 2789 didrange = TRUE;
4c3a8340
TS
2790#ifdef EBCDIC
2791 literal_endpoint = 0;
2792#endif
79072805 2793 continue;
4e553d73 2794 }
02aa26ce
NT
2795
2796 /* range begins (ignore - as first or last char) */
79072805 2797 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2798 if (didrange) {
1fafa243 2799 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2800 }
e294cc5d
JH
2801 if (has_utf8
2802#ifdef EBCDIC
2803 && !native_range
2804#endif
2805 ) {
25716404 2806 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2807 s++;
2808 continue;
2809 }
79072805
LW
2810 dorange = TRUE;
2811 s++;
01ec43d0
GS
2812 }
2813 else {
2814 didrange = FALSE;
4c3a8340
TS
2815#ifdef EBCDIC
2816 literal_endpoint = 0;
e294cc5d 2817 native_range = TRUE;
4c3a8340 2818#endif
01ec43d0 2819 }
79072805 2820 }
02aa26ce
NT
2821
2822 /* if we get here, we're not doing a transliteration */
2823
0f5d15d6
IZ
2824 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2825 except for the last char, which will be done separately. */
3280af22 2826 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2827 if (s[2] == '#') {
e994fd66 2828 while (s+1 < send && *s != ')')
db42d148 2829 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2830 }
2831 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2832 || (s[2] == '?' && s[3] == '{'))
155aba94 2833 {
cc6b7395 2834 I32 count = 1;
0f5d15d6 2835 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2836 char c;
2837
d9f97599
GS
2838 while (count && (c = *regparse)) {
2839 if (c == '\\' && regparse[1])
2840 regparse++;
4e553d73 2841 else if (c == '{')
cc6b7395 2842 count++;
4e553d73 2843 else if (c == '}')
cc6b7395 2844 count--;
d9f97599 2845 regparse++;
cc6b7395 2846 }
e994fd66 2847 if (*regparse != ')')
5bdf89e7 2848 regparse--; /* Leave one char for continuation. */
0f5d15d6 2849 while (s < regparse)
db42d148 2850 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2851 }
748a9306 2852 }
02aa26ce
NT
2853
2854 /* likewise skip #-initiated comments in //x patterns */
3280af22 2855 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2856 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2857 while (s+1 < send && *s != '\n')
db42d148 2858 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2859 }
02aa26ce 2860
5d1d4326 2861 /* check for embedded arrays
da6eedaa 2862 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2863 */
1749ea0d
TS
2864 else if (*s == '@' && s[1]) {
2865 if (isALNUM_lazy_if(s+1,UTF))
2866 break;
2867 if (strchr(":'{$", s[1]))
2868 break;
2869 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2870 break; /* in regexp, neither @+ nor @- are interpolated */
2871 }
02aa26ce
NT
2872
2873 /* check for embedded scalars. only stop if we're sure it's a
2874 variable.
2875 */
79072805 2876 else if (*s == '$') {
3280af22 2877 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2878 break;
77772344 2879 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2880 if (s[1] == '\\') {
2881 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2882 "Possible unintended interpolation of $\\ in regex");
77772344 2883 }
79072805 2884 break; /* in regexp, $ might be tail anchor */
77772344 2885 }
79072805 2886 }
02aa26ce 2887
2b9d42f0
NIS
2888 /* End of else if chain - OP_TRANS rejoin rest */
2889
02aa26ce 2890 /* backslashes */
79072805 2891 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2892 char* e; /* Can be used for ending '}', etc. */
2893
79072805 2894 s++;
02aa26ce 2895
7d0fc23c
KW
2896 /* warn on \1 - \9 in substitution replacements, but note that \11
2897 * is an octal; and \19 is \1 followed by '9' */
3280af22 2898 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2899 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2900 {
a2a5de95 2901 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2902 *--s = '$';
2903 break;
2904 }
02aa26ce
NT
2905
2906 /* string-change backslash escapes */
3280af22 2907 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2908 --s;
2909 break;
2910 }
ff3f963a
KW
2911 /* In a pattern, process \N, but skip any other backslash escapes.
2912 * This is because we don't want to translate an escape sequence
2913 * into a meta symbol and have the regex compiler use the meta
2914 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2915 * in spite of this, we do have to process \N here while the proper
2916 * charnames handler is in scope. See bugs #56444 and #62056.
2917 * There is a complication because \N in a pattern may also stand
2918 * for 'match a non-nl', and not mean a charname, in which case its
2919 * processing should be deferred to the regex compiler. To be a
2920 * charname it must be followed immediately by a '{', and not look
2921 * like \N followed by a curly quantifier, i.e., not something like
2922 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2923 * quantifier */
2924 else if (PL_lex_inpat
2925 && (*s != 'N'
2926 || s[1] != '{'
2927 || regcurly(s + 1)))
2928 {
cc74c5bd
TS
2929 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2930 goto default_action;
2931 }
02aa26ce 2932
79072805 2933 switch (*s) {
02aa26ce
NT
2934
2935 /* quoted - in transliterations */
79072805 2936 case '-':
3280af22 2937 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2938 *d++ = *s++;
2939 continue;
2940 }
2941 /* FALL THROUGH */
2942 default:
11b8faa4 2943 {
a2a5de95
NC
2944 if ((isALPHA(*s) || isDIGIT(*s)))
2945 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2946 "Unrecognized escape \\%c passed through",
2947 *s);
11b8faa4 2948 /* default action is to copy the quoted character */
f9a63242 2949 goto default_action;
11b8faa4 2950 }
02aa26ce 2951
632403cc 2952 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2953 case '0': case '1': case '2': case '3':
2954 case '4': case '5': case '6': case '7':
ba210ebe 2955 {
53305cf1
NC
2956 I32 flags = 0;
2957 STRLEN len = 3;
77a135fe 2958 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2959 s += len;
2960 }
012bcf8d 2961 goto NUM_ESCAPE_INSERT;
02aa26ce 2962
f0a2b745
KW
2963 /* eg. \o{24} indicates the octal constant \024 */
2964 case 'o':
2965 {
2966 STRLEN len;
454155d9 2967 const char* error;
f0a2b745 2968
454155d9 2969 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2970 s += len;
454155d9 2971 if (! valid) {
f0a2b745
KW
2972 yyerror(error);
2973 continue;
2974 }
2975 goto NUM_ESCAPE_INSERT;
2976 }
2977
77a135fe 2978 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2979 case 'x':
a0ed51b3
LW
2980 ++s;
2981 if (*s == '{') {
9d4ba2ae 2982 char* const e = strchr(s, '}');
a4c04bdc
NC
2983 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2984 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2985 STRLEN len;
355860ce 2986
53305cf1 2987 ++s;
adaeee49 2988 if (!e) {
a0ed51b3 2989 yyerror("Missing right brace on \\x{}");
355860ce 2990 continue;
ba210ebe 2991 }
53305cf1 2992 len = e - s;
77a135fe 2993 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2994 s = e + 1;
a0ed51b3
LW
2995 }
2996 else {
ba210ebe 2997 {
53305cf1 2998 STRLEN len = 2;
a4c04bdc 2999 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 3000 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
3001 s += len;
3002 }
012bcf8d
GS
3003 }
3004
3005 NUM_ESCAPE_INSERT:
ff3f963a
KW
3006 /* Insert oct or hex escaped character. There will always be
3007 * enough room in sv since such escapes will be longer than any
3008 * UTF-8 sequence they can end up as, except if they force us
3009 * to recode the rest of the string into utf8 */
ba7cea30 3010
77a135fe 3011 /* Here uv is the ordinal of the next character being added in
ff3f963a 3012 * unicode (converted from native). */
77a135fe 3013 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3014 if (!has_utf8 && uv > 255) {
77a135fe
KW
3015 /* Might need to recode whatever we have accumulated so
3016 * far if it contains any chars variant in utf8 or
3017 * utf-ebcdic. */
3018
3019 SvCUR_set(sv, d - SvPVX_const(sv));
3020 SvPOK_on(sv);
3021 *d = '\0';
77a135fe 3022 /* See Note on sizing above. */
7bf79863
KW
3023 sv_utf8_upgrade_flags_grow(sv,
3024 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3025 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3026 d = SvPVX(sv) + SvCUR(sv);
3027 has_utf8 = TRUE;
012bcf8d
GS
3028 }
3029
77a135fe
KW
3030 if (has_utf8) {
3031 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3032 if (PL_lex_inwhat == OP_TRANS &&
3033 PL_sublex_info.sub_op) {
3034 PL_sublex_info.sub_op->op_private |=
3035 (PL_lex_repl ? OPpTRANS_FROM_UTF
3036 : OPpTRANS_TO_UTF);
f9a63242 3037 }
e294cc5d
JH
3038#ifdef EBCDIC
3039 if (uv > 255 && !dorange)
3040 native_range = FALSE;
3041#endif
012bcf8d 3042 }
a0ed51b3 3043 else {
012bcf8d 3044 *d++ = (char)uv;
a0ed51b3 3045 }
012bcf8d
GS
3046 }
3047 else {
c4d5f83a 3048 *d++ = (char) uv;
a0ed51b3 3049 }
79072805 3050 continue;
02aa26ce 3051
4a2d328f 3052 case 'N':
ff3f963a
KW
3053 /* In a non-pattern \N must be a named character, like \N{LATIN
3054 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3055 * mean to match a non-newline. For non-patterns, named
3056 * characters are converted to their string equivalents. In
3057 * patterns, named characters are not converted to their
3058 * ultimate forms for the same reasons that other escapes
3059 * aren't. Instead, they are converted to the \N{U+...} form
3060 * to get the value from the charnames that is in effect right
3061 * now, while preserving the fact that it was a named character
3062 * so that the regex compiler knows this */
3063
3064 /* This section of code doesn't generally use the
3065 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3066 * a close examination of this macro and determined it is a
3067 * no-op except on utfebcdic variant characters. Every
3068 * character generated by this that would normally need to be
3069 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3070 * needed, and would complicate use of copy(). XXX There are
3071 * other parts of this file where the macro is used
3072 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3073
3074 /* The structure of this section of code (besides checking for
3075 * errors and upgrading to utf8) is:
3076 * Further disambiguate between the two meanings of \N, and if
3077 * not a charname, go process it elsewhere
0a96133f
KW
3078 * If of form \N{U+...}, pass it through if a pattern;
3079 * otherwise convert to utf8
3080 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3081 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3082
3083 /* Here, s points to the 'N'; the test below is guaranteed to
3084 * succeed if we are being called on a pattern as we already
3085 * know from a test above that the next character is a '{'.
3086 * On a non-pattern \N must mean 'named sequence, which
3087 * requires braces */
3088 s++;
3089 if (*s != '{') {
3090 yyerror("Missing braces on \\N{}");
3091 continue;
3092 }
3093 s++;
3094
0a96133f 3095 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3096 if (! (e = strchr(s, '}'))) {
3097 if (! PL_lex_inpat) {
5777a3f7 3098 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3099 } else {
3100 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3101 }
0a96133f 3102 continue;
ff3f963a 3103 }
cddc7ef4 3104
ff3f963a 3105 /* Here it looks like a named character */
cddc7ef4 3106
ff3f963a
KW
3107 if (PL_lex_inpat) {
3108
3109 /* XXX This block is temporary code. \N{} implies that the
3110 * pattern is to have Unicode semantics, and therefore
3111 * currently has to be encoded in utf8. By putting it in
3112 * utf8 now, we save a whole pass in the regular expression
3113 * compiler. Once that code is changed so Unicode
3114 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3115 * block should be removed. However, the code that parses
3116 * the output of this would have to be changed to not
3117 * necessarily expect utf8 */
ff3f963a 3118 if (!has_utf8) {
77a135fe 3119 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3120 SvPOK_on(sv);
e4f3eed8 3121 *d = '\0';
77a135fe 3122 /* See Note on sizing above. */
7bf79863 3123 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3124 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3125 /* 5 = '\N{' + cur char + NUL */
3126 (STRLEN)(send - s) + 5);
f08d6ad9 3127 d = SvPVX(sv) + SvCUR(sv);
89491803 3128 has_utf8 = TRUE;
ff3f963a
KW
3129 }
3130 }
423cee85 3131
ff3f963a
KW
3132 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3133 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3134 | PERL_SCAN_DISALLOW_PREFIX;
3135 STRLEN len;
3136
3137 /* For \N{U+...}, the '...' is a unicode value even on
3138 * EBCDIC machines */
3139 s += 2; /* Skip to next char after the 'U+' */
3140 len = e - s;
3141 uv = grok_hex(s, &len, &flags, NULL);
3142 if (len == 0 || len != (STRLEN)(e - s)) {
3143 yyerror("Invalid hexadecimal number in \\N{U+...}");
3144 s = e + 1;
3145 continue;
3146 }
3147
3148 if (PL_lex_inpat) {
3149
e2a7e165
KW
3150 /* On non-EBCDIC platforms, pass through to the regex
3151 * compiler unchanged. The reason we evaluated the
3152 * number above is to make sure there wasn't a syntax
3153 * error. But on EBCDIC we convert to native so
3154 * downstream code can continue to assume it's native
3155 */
ff3f963a 3156 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3157#ifdef EBCDIC
3158 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3159 and the \0 */
3160 "\\N{U+%X}",
3161 (unsigned int) UNI_TO_NATIVE(uv));
3162#else
ff3f963a
KW
3163 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3164 d += e - s + 1;
e2a7e165 3165#endif
ff3f963a
KW
3166 }
3167 else { /* Not a pattern: convert the hex to string */
3168
3169 /* If destination is not in utf8, unconditionally
3170 * recode it to be so. This is because \N{} implies
3171 * Unicode semantics, and scalars have to be in utf8
3172 * to guarantee those semantics */
3173 if (! has_utf8) {
3174 SvCUR_set(sv, d - SvPVX_const(sv));
3175 SvPOK_on(sv);
3176 *d = '\0';
3177 /* See Note on sizing above. */
3178 sv_utf8_upgrade_flags_grow(
3179 sv,
3180 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3181 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3182 d = SvPVX(sv) + SvCUR(sv);
3183 has_utf8 = TRUE;
3184 }
3185
3186 /* Add the string to the output */
3187 if (UNI_IS_INVARIANT(uv)) {
3188 *d++ = (char) uv;
3189 }
3190 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3191 }
3192 }
3193 else { /* Here is \N{NAME} but not \N{U+...}. */
3194
3195 SV *res; /* result from charnames */
3196 const char *str; /* the string in 'res' */
3197 STRLEN len; /* its length */
3198
3199 /* Get the value for NAME */
3200 res = newSVpvn(s, e - s);
3201 res = new_constant( NULL, 0, "charnames",
3202 /* includes all of: \N{...} */
3203 res, NULL, s - 3, e - s + 4 );
3204
3205 /* Most likely res will be in utf8 already since the
3206 * standard charnames uses pack U, but a custom translator
3207 * can leave it otherwise, so make sure. XXX This can be
3208 * revisited to not have charnames use utf8 for characters
3209 * that don't need it when regexes don't have to be in utf8
3210 * for Unicode semantics. If doing so, remember EBCDIC */
3211 sv_utf8_upgrade(res);
3212 str = SvPV_const(res, len);
3213
3214 /* Don't accept malformed input */
3215 if (! is_utf8_string((U8 *) str, len)) {
3216 yyerror("Malformed UTF-8 returned by \\N");
3217 }
3218 else if (PL_lex_inpat) {
3219
3220 if (! len) { /* The name resolved to an empty string */
3221 Copy("\\N{}", d, 4, char);
3222 d += 4;
3223 }
3224 else {
3225 /* In order to not lose information for the regex
3226 * compiler, pass the result in the specially made
3227 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3228 * the code points in hex of each character
3229 * returned by charnames */
3230
3231 const char *str_end = str + len;
3232 STRLEN char_length; /* cur char's byte length */
3233 STRLEN output_length; /* and the number of bytes
3234 after this is translated
3235 into hex digits */
3236 const STRLEN off = d - SvPVX_const(sv);
3237
3238 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3239 * max('U+', '.'); and 1 for NUL */
3240 char hex_string[2 * UTF8_MAXBYTES + 5];
3241
3242 /* Get the first character of the result. */
3243 U32 uv = utf8n_to_uvuni((U8 *) str,
3244 len,
3245 &char_length,
3246 UTF8_ALLOW_ANYUV);
3247
3248 /* The call to is_utf8_string() above hopefully
3249 * guarantees that there won't be an error. But
3250 * it's easy here to make sure. The function just
3251 * above warns and returns 0 if invalid utf8, but
3252 * it can also return 0 if the input is validly a
3253 * NUL. Disambiguate */
3254 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3255 uv = UNICODE_REPLACEMENT;
3256 }
3257
3258 /* Convert first code point to hex, including the
e2a7e165
KW
3259 * boiler plate before it. For all these, we
3260 * convert to native format so that downstream code
3261 * can continue to assume the input is native */
78c35590 3262 output_length =
3353de27 3263 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3264 "\\N{U+%X",
3265 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3266
3267 /* Make sure there is enough space to hold it */
3268 d = off + SvGROW(sv, off
3269 + output_length
3270 + (STRLEN)(send - e)
3271 + 2); /* '}' + NUL */
3272 /* And output it */
3273 Copy(hex_string, d, output_length, char);
3274 d += output_length;
3275
3276 /* For each subsequent character, append dot and
3277 * its ordinal in hex */
3278 while ((str += char_length) < str_end) {
3279 const STRLEN off = d - SvPVX_const(sv);
3280 U32 uv = utf8n_to_uvuni((U8 *) str,
3281 str_end - str,
3282 &char_length,
3283 UTF8_ALLOW_ANYUV);
3284 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3285 uv = UNICODE_REPLACEMENT;
3286 }
3287
78c35590 3288 output_length =
3353de27 3289 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3290 ".%X",
3291 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3292
3293 d = off + SvGROW(sv, off
3294 + output_length
3295 + (STRLEN)(send - e)
3296 + 2); /* '}' + NUL */
3297 Copy(hex_string, d, output_length, char);
3298 d += output_length;
3299 }
3300
3301 *d++ = '}'; /* Done. Add the trailing brace */
3302 }
3303 }
3304 else { /* Here, not in a pattern. Convert the name to a
3305 * string. */
3306
3307 /* If destination is not in utf8, unconditionally
3308 * recode it to be so. This is because \N{} implies
3309 * Unicode semantics, and scalars have to be in utf8
3310 * to guarantee those semantics */
3311 if (! has_utf8) {
3312 SvCUR_set(sv, d - SvPVX_const(sv));
3313 SvPOK_on(sv);
3314 *d = '\0';
3315 /* See Note on sizing above. */
3316 sv_utf8_upgrade_flags_grow(sv,
3317 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3318 len + (STRLEN)(send - s) + 1);
3319 d = SvPVX(sv) + SvCUR(sv);
3320 has_utf8 = TRUE;
3321 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3322
3323 /* See Note on sizing above. (NOTE: SvCUR() is not
3324 * set correctly here). */
3325 const STRLEN off = d - SvPVX_const(sv);
3326 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3327 }
3328 Copy(str, d, len, char);
3329 d += len;
423cee85 3330 }
423cee85 3331 SvREFCNT_dec(res);
cb233ae3
KW
3332
3333 /* Deprecate non-approved name syntax */
3334 if (ckWARN_d(WARN_DEPRECATED)) {
3335 bool problematic = FALSE;
3336 char* i = s;
3337
3338 /* For non-ut8 input, look to see that the first
3339 * character is an alpha, then loop through the rest
3340 * checking that each is a continuation */
3341 if (! this_utf8) {
3342 if (! isALPHAU(*i)) problematic = TRUE;
3343 else for (i = s + 1; i < e; i++) {
3344 if (isCHARNAME_CONT(*i)) continue;
3345 problematic = TRUE;
3346 break;
3347 }
3348 }
3349 else {
3350 /* Similarly for utf8. For invariants can check
3351 * directly. We accept anything above the latin1
3352 * range because it is immaterial to Perl if it is
3353 * correct or not, and is expensive to check. But
3354 * it is fairly easy in the latin1 range to convert
3355 * the variants into a single character and check
3356 * those */
3357 if (UTF8_IS_INVARIANT(*i)) {
3358 if (! isALPHAU(*i)) problematic = TRUE;
3359 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3360 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3361 *(i+1)))))
3362 {
3363 problematic = TRUE;
3364 }
3365 }
3366 if (! problematic) for (i = s + UTF8SKIP(s);
3367 i < e;
3368 i+= UTF8SKIP(i))
3369 {
3370 if (UTF8_IS_INVARIANT(*i)) {
3371 if (isCHARNAME_CONT(*i)) continue;
3372 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3373 continue;
3374 } else if (isCHARNAME_CONT(
3375 UNI_TO_NATIVE(
81c14aa2 3376 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3377 {
3378 continue;
3379 }
3380 problematic = TRUE;
3381 break;
3382 }
3383 }
3384 if (problematic) {
6e1bad6c
KW
3385 /* The e-i passed to the final %.*s makes sure that
3386 * should the trailing NUL be missing that this
3387 * print won't run off the end of the string */
cb233ae3 3388 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3389 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3390 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3391 }
3392 }
3393 } /* End \N{NAME} */
ff3f963a
KW
3394#ifdef EBCDIC
3395 if (!dorange)
3396 native_range = FALSE; /* \N{} is defined to be Unicode */
3397#endif
3398 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3399 continue;
3400
02aa26ce 3401 /* \c is a control character */
79072805
LW
3402 case 'c':
3403 s++;
961ce445 3404 if (s < send) {
17a3df4c 3405 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3406 }
961ce445
RGS
3407 else {
3408 yyerror("Missing control char name in \\c");
3409 }
79072805 3410 continue;
02aa26ce
NT
3411
3412 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3413 case 'b':
db42d148 3414 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3415 break;
3416 case 'n':
db42d148 3417 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3418 break;
3419 case 'r':
db42d148 3420 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3421 break;
3422 case 'f':
db42d148 3423 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3424 break;
3425 case 't':
db42d148 3426 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3427 break;
34a3fe2a 3428 case 'e':
db42d148 3429 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3430 break;
3431 case 'a':
db42d148 3432 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3433 break;
02aa26ce
NT
3434 } /* end switch */
3435
79072805
LW
3436 s++;
3437 continue;
02aa26ce 3438 } /* end if (backslash) */
4c3a8340
TS
3439#ifdef EBCDIC
3440 else
3441 literal_endpoint++;
3442#endif
02aa26ce 3443
f9a63242 3444 default_action:
77a135fe
KW
3445 /* If we started with encoded form, or already know we want it,
3446 then encode the next character */
3447 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3448 STRLEN len = 1;
77a135fe
KW
3449
3450
3451 /* One might think that it is wasted effort in the case of the
3452 * source being utf8 (this_utf8 == TRUE) to take the next character
3453 * in the source, convert it to an unsigned value, and then convert
3454 * it back again. But the source has not been validated here. The
3455 * routine that does the conversion checks for errors like
3456 * malformed utf8 */
3457
5f66b61c
AL
3458 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3459 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3460 if (!has_utf8) {
3461 SvCUR_set(sv, d - SvPVX_const(sv));
3462 SvPOK_on(sv);
3463 *d = '\0';
77a135fe 3464 /* See Note on sizing above. */
7bf79863
KW
3465 sv_utf8_upgrade_flags_grow(sv,
3466 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3467 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3468 d = SvPVX(sv) + SvCUR(sv);
3469 has_utf8 = TRUE;
3470 } else if (need > len) {
3471 /* encoded value larger than old, may need extra space (NOTE:
3472 * SvCUR() is not set correctly here). See Note on sizing
3473 * above. */
9d4ba2ae 3474 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3475 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3476 }
77a135fe
KW
3477 s += len;
3478
5f66b61c 3479 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3480#ifdef EBCDIC
3481 if (uv > 255 && !dorange)
3482 native_range = FALSE;
3483#endif
2b9d42f0
NIS
3484 }
3485 else {
3486 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3487 }
02aa26ce
NT
3488 } /* while loop to process each character */
3489
3490 /* terminate the string and set up the sv */
79072805 3491 *d = '\0';
95a20fc0 3492 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3493 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3494 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3495
79072805 3496 SvPOK_on(sv);
9f4817db 3497 if (PL_encoding && !has_utf8) {
d0063567
DK
3498 sv_recode_to_utf8(sv, PL_encoding);
3499 if (SvUTF8(sv))
3500 has_utf8 = TRUE;
9f4817db 3501 }
2b9d42f0 3502 if (has_utf8) {
7e2040f0 3503 SvUTF8_on(sv);
2b9d42f0 3504 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3505 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3506 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3507 }
3508 }
79072805 3509
02aa26ce 3510 /* shrink the sv if we allocated more than we used */
79072805 3511 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3512 SvPV_shrink_to_cur(sv);
79072805 3513 }
02aa26ce 3514
6154021b 3515 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3516 if (s > PL_bufptr) {
eb0d8d16
NC
3517 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3518 const char *const key = PL_lex_inpat ? "qr" : "q";
3519 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3520 const char *type;
3521 STRLEN typelen;
3522
3523 if (PL_lex_inwhat == OP_TRANS) {
3524 type = "tr";
3525 typelen = 2;
3526 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3527 type = "s";
3528 typelen = 1;
3529 } else {
3530 type = "qq";
3531 typelen = 2;
3532 }
3533
3534 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3535 type, typelen);
3536 }
6154021b 3537 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3538 } else
8990e307 3539 SvREFCNT_dec(sv);
79072805
LW
3540 return s;
3541}
3542
ffb4593c
NT
3543/* S_intuit_more
3544 * Returns TRUE if there's more to the expression (e.g., a subscript),
3545 * FALSE otherwise.
ffb4593c
NT
3546 *
3547 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3548 *
3549 * ->[ and ->{ return TRUE
3550 * { and [ outside a pattern are always subscripts, so return TRUE
3551 * if we're outside a pattern and it's not { or [, then return FALSE
3552 * if we're in a pattern and the first char is a {
3553 * {4,5} (any digits around the comma) returns FALSE
3554 * if we're in a pattern and the first char is a [
3555 * [] returns FALSE
3556 * [SOMETHING] has a funky algorithm to decide whether it's a
3557 * character class or not. It has to deal with things like
3558 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3559 * anything else returns TRUE
3560 */
3561
9cbb5ea2
GS
3562/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3563
76e3520e 3564STATIC int
cea2e8a9 3565S_intuit_more(pTHX_ register char *s)
79072805 3566{
97aff369 3567 dVAR;
7918f24d
NC
3568
3569 PERL_ARGS_ASSERT_INTUIT_MORE;
3570
3280af22 3571 if (PL_lex_brackets)
79072805
LW
3572 return TRUE;
3573 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3574 return TRUE;
3575 if (*s != '{' && *s != '[')
3576 return FALSE;
3280af22 3577 if (!PL_lex_inpat)
79072805
LW
3578 return TRUE;
3579
3580 /* In a pattern, so maybe we have {n,m}. */
3581 if (*s == '{') {
b3155d95 3582 if (regcurly(s)) {
79072805 3583 return FALSE;
b3155d95 3584 }
79072805 3585 return TRUE;
79072805
LW
3586 }
3587
3588 /* On the other hand, maybe we have a character class */
3589
3590 s++;
3591 if (*s == ']' || *s == '^')
3592 return FALSE;
3593 else {
ffb4593c 3594 /* this is terrifying, and it works */
79072805
LW
3595 int weight = 2; /* let's weigh the evidence */
3596 char seen[256];
f27ffc4a 3597 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3598 const char * const send = strchr(s,']');
3280af22 3599 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3600
3601 if (!send) /* has to be an expression */
3602 return TRUE;
3603
3604 Zero(seen,256,char);
3605 if (*s == '$')
3606 weight -= 3;
3607 else if (isDIGIT(*s)) {
3608 if (s[1] != ']') {
3609 if (isDIGIT(s[1]) && s[2] == ']')
3610 weight -= 10;
3611 }
3612 else
3613 weight -= 100;
3614 }
3615 for (; s < send; s++) {
3616 last_un_char = un_char;
3617 un_char = (unsigned char)*s;
3618 switch (*s) {
3619 case '@':
3620 case '&':
3621 case '$':
3622 weight -= seen[un_char] * 10;
7e2040f0 3623 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3624 int len;
8903cb82 3625 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3626 len = (int)strlen(tmpbuf);
3627 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3628 weight -= 100;
3629 else
3630 weight -= 10;
3631 }
3632 else if (*s == '$' && s[1] &&
93a17b20
LW
3633 strchr("[#!%*<>()-=",s[1])) {
3634 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3635 weight -= 10;
3636 else
3637 weight -= 1;
3638 }
3639 break;
3640 case '\\':
3641 un_char = 254;
3642 if (s[1]) {
93a17b20 3643 if (strchr("wds]",s[1]))
79072805 3644 weight += 100;
10edeb5d 3645 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3646 weight += 1;
93a17b20 3647 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3648 weight += 40;
3649 else if (isDIGIT(s[1])) {
3650 weight += 40;
3651 while (s[1] && isDIGIT(s[1]))
3652 s++;
3653 }
3654 }
3655 else
3656 weight += 100;
3657 break;
3658 case '-':
3659 if (s[1] == '\\')
3660 weight += 50;
93a17b20 3661 if (strchr("aA01! ",last_un_char))
79072805 3662 weight += 30;
93a17b20 3663 if (strchr("zZ79~",s[1]))
79072805 3664 weight += 30;
f27ffc4a
GS
3665 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3666 weight -= 5; /* cope with negative subscript */
79072805
LW
3667 break;
3668 default:
3792a11b
NC
3669 if (!isALNUM(last_un_char)
3670 && !(last_un_char == '$' || last_un_char == '@'
3671 || last_un_char == '&')
3672 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3673 char *d = tmpbuf;
3674 while (isALPHA(*s))
3675 *d++ = *s++;
3676 *d = '\0';
5458a98a 3677 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3678 weight -= 150;
3679 }
3680 if (un_char == last_un_char + 1)
3681 weight += 5;
3682 weight -= seen[un_char];
3683 break;
3684 }
3685 seen[un_char]++;
3686 }
3687 if (weight >= 0) /* probably a character class */
3688 return FALSE;
3689 }
3690
3691 return TRUE;
3692}
ffed7fef 3693
ffb4593c
NT
3694/*
3695 * S_intuit_method
3696 *
3697 * Does all the checking to disambiguate
3698 * foo bar
3699 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3700 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3701 *
3702 * First argument is the stuff after the first token, e.g. "bar".
3703 *
3704 * Not a method if bar is a filehandle.
3705 * Not a method if foo is a subroutine prototyped to take a filehandle.
3706 * Not a method if it's really "Foo $bar"
3707 * Method if it's "foo $bar"
3708 * Not a method if it's really "print foo $bar"
3709 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3710 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3711 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3712 * =>
3713 */
3714
76e3520e 3715STATIC int
62d55b22 3716S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3717{
97aff369 3718 dVAR;
a0d0e21e 3719 char *s = start + (*start == '$');
3280af22 3720 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3721 STRLEN len;
3722 GV* indirgv;
5db06880
NC
3723#ifdef PERL_MAD
3724 int soff;
3725#endif
a0d0e21e 3726
7918f24d
NC
3727 PERL_ARGS_ASSERT_INTUIT_METHOD;
3728
a0d0e21e 3729 if (gv) {
62d55b22 3730 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3731 return 0;
62d55b22
NC
3732 if (cv) {
3733 if (SvPOK(cv)) {
3734 const char *proto = SvPVX_const(cv);
3735 if (proto) {
3736 if (*proto == ';')
3737 proto++;
3738 if (*proto == '*')
3739 return 0;
3740 }
b6c543e3
IZ
3741 }
3742 } else
c35e046a 3743 gv = NULL;
a0d0e21e 3744 }
8903cb82 3745 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3746 /* start is the beginning of the possible filehandle/object,
3747 * and s is the end of it
3748 * tmpbuf is a copy of it
3749 */
3750
a0d0e21e 3751 if (*start == '$') {
3ef1310e
RGS
3752 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3753 isUPPER(*PL_tokenbuf))
a0d0e21e 3754 return 0;
5db06880
NC
3755#ifdef PERL_MAD
3756 len = start - SvPVX(PL_linestr);
3757#endif
29595ff2 3758 s = PEEKSPACE(s);
f0092767 3759#ifdef PERL_MAD
5db06880
NC
3760 start = SvPVX(PL_linestr) + len;
3761#endif
3280af22
NIS
3762 PL_bufptr = start;
3763 PL_expect = XREF;
a0d0e21e
LW
3764 return *s == '(' ? FUNCMETH : METHOD;
3765 }
5458a98a 3766 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3767 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3768 len -= 2;
3769 tmpbuf[len] = '\0';
5db06880
NC
3770#ifdef PERL_MAD
3771 soff = s - SvPVX(PL_linestr);
3772#endif
c3e0f903
GS
3773 goto bare_package;
3774 }
90e5519e 3775 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3776 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3777 return 0;
3778 /* filehandle or package name makes it a method */
da51bb9b 3779 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3780#ifdef PERL_MAD
3781 soff = s - SvPVX(PL_linestr);
3782#endif
29595ff2 3783 s = PEEKSPACE(s);
3280af22 3784 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3785 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3786 bare_package:
cd81e915 3787 start_force(PL_curforce);
9ded7720 3788 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3789 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3790 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3791 if (PL_madskills)
3792 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3793 PL_expect = XTERM;
a0d0e21e 3794 force_next(WORD);
3280af22 3795 PL_bufptr = s;
5db06880
NC
3796#ifdef PERL_MAD
3797 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3798#endif
a0d0e21e
LW
3799 return *s == '(' ? FUNCMETH : METHOD;
3800 }
3801 }
3802 return 0;
3803}
3804
16d20bd9 3805/* Encoded script support. filter_add() effectively inserts a
4e553d73 3806 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3807 * Note that the filter function only applies to the current source file
3808 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3809 *
3810 * The datasv parameter (which may be NULL) can be used to pass
3811 * private data to this instance of the filter. The filter function
3812 * can recover the SV using the FILTER_DATA macro and use it to
3813 * store private buffers and state information.
3814 *
3815 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3816 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3817 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3818 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3819 * private use must be set using malloc'd pointers.
3820 */
16d20bd9
AD
3821
3822SV *
864dbfa3 3823Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3824{
97aff369 3825 dVAR;
f4c556ac 3826 if (!funcp)
a0714e2c 3827 return NULL;
f4c556ac 3828
5486870f
DM
3829 if (!PL_parser)
3830 return NULL;
3831
3280af22
NIS
3832 if (!PL_rsfp_filters)
3833 PL_rsfp_filters = newAV();
16d20bd9 3834 if (!datasv)
561b68a9 3835 datasv = newSV(0);
862a34c6 3836 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3837 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3838 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3839 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3840 FPTR2DPTR(void *, IoANY(datasv)),
3841 SvPV_nolen(datasv)));
3280af22
NIS
3842 av_unshift(PL_rsfp_filters, 1);
3843 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3844 return(datasv);
3845}
4e553d73 3846
16d20bd9
AD
3847
3848/* Delete most recently added instance of this filter function. */
a0d0e21e 3849void
864dbfa3 3850Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3851{
97aff369 3852 dVAR;
e0c19803 3853 SV *datasv;
24801a4b 3854
7918f24d
NC
3855 PERL_ARGS_ASSERT_FILTER_DEL;
3856
33073adb 3857#ifdef DEBUGGING
55662e27
JH
3858 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3859 FPTR2DPTR(void*, funcp)));
33073adb 3860#endif
5486870f 3861 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3862 return;
3863 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3864 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3865 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3866 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3867
16d20bd9
AD
3868 return;
3869 }
3870 /* we need to search for the correct entry and clear it */
cea2e8a9 3871 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3872}
3873
3874
1de9afcd
RGS
3875/* Invoke the idxth filter function for the current rsfp. */
3876/* maxlen 0 = read one text line */
16d20bd9 3877I32
864dbfa3 3878Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3879{
97aff369 3880 dVAR;
16d20bd9
AD
3881 filter_t funcp;
3882 SV *datasv = NULL;
f482118e
NC
3883 /* This API is bad. It should have been using unsigned int for maxlen.
3884 Not sure if we want to change the API, but if not we should sanity
3885 check the value here. */
39cd7a59
NC
3886 const unsigned int correct_length
3887 = maxlen < 0 ?
3888#ifdef PERL_MICRO
3889 0x7FFFFFFF
3890#else
3891 INT_MAX
3892#endif
3893 : maxlen;
e50aee73 3894
7918f24d
NC
3895 PERL_ARGS_ASSERT_FILTER_READ;
3896
5486870f 3897 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3898 return -1;
1de9afcd 3899 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3900 /* Provide a default input filter to make life easy. */
3901 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3902 DEBUG_P(PerlIO_printf(Perl_debug_log,
3903 "filter_read %d: from rsfp\n", idx));
f482118e 3904 if (correct_length) {
16d20bd9
AD
3905 /* Want a block */
3906 int len ;
f54cb97a 3907 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3908
3909 /* ensure buf_sv is large enough */
881d8f0a 3910 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3911 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3912 correct_length)) <= 0) {
3280af22 3913 if (PerlIO_error(PL_rsfp))
37120919
AD
3914 return -1; /* error */
3915 else
3916 return 0 ; /* end of file */
3917 }
16d20bd9 3918 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3919 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3920 } else {
3921 /* Want a line */
3280af22
NIS
3922 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3923 if (PerlIO_error(PL_rsfp))
37120919
AD
3924 return -1; /* error */
3925 else
3926 return 0 ; /* end of file */
3927 }
16d20bd9
AD
3928 }
3929 return SvCUR(buf_sv);
3930 }
3931 /* Skip this filter slot if filter has been deleted */
1de9afcd 3932 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3933 DEBUG_P(PerlIO_printf(Perl_debug_log,
3934 "filter_read %d: skipped (filter deleted)\n",
3935 idx));
f482118e 3936 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3937 }
3938 /* Get function pointer hidden within datasv */
8141890a 3939 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3940 DEBUG_P(PerlIO_printf(Perl_debug_log,
3941 "filter_read %d: via function %p (%s)\n",
ca0270c4 3942 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3943 /* Call function. The function is expected to */
3944 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3945 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3946 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3947}
3948
76e3520e 3949STATIC char *
5cc814fd 3950S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3951{
97aff369 3952 dVAR;
7918f24d
NC
3953
3954 PERL_ARGS_ASSERT_FILTER_GETS;
3955
c39cd008 3956#ifdef PERL_CR_FILTER
3280af22 3957 if (!PL_rsfp_filters) {
c39cd008 3958 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3959 }
3960#endif
3280af22 3961 if (PL_rsfp_filters) {
55497cff 3962 if (!append)
3963 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3964 if (FILTER_READ(0, sv, 0) > 0)
3965 return ( SvPVX(sv) ) ;
3966 else
bd61b366 3967 return NULL ;
16d20bd9 3968 }
9d116dd7 3969 else
5cc814fd 3970 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3971}
3972
01ec43d0 3973STATIC HV *
9bde8eb0 3974S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3975{
97aff369 3976 dVAR;
def3634b
GS
3977 GV *gv;
3978
7918f24d
NC
3979 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3980
01ec43d0 3981 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3982 return PL_curstash;
3983
3984 if (len > 2 &&
3985 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3986 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3987 {
3988 return GvHV(gv); /* Foo:: */
def3634b
GS
3989 }
3990
3991 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3992 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3993 if (gv && GvCV(gv)) {
3994 SV * const sv = cv_const_sv(GvCV(gv));
3995 if (sv)
9bde8eb0 3996 pkgname = SvPV_const(sv, len);
def3634b
GS
3997 }
3998
9bde8eb0 3999 return gv_stashpvn(pkgname, len, 0);
def3634b 4000}
a0d0e21e 4001
e3f73d4e
RGS
4002/*
4003 * S_readpipe_override
486ec47a 4004 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4005 * optree, provided sublex_start() is called afterwards.
4006 */
4007STATIC void
1d51329b 4008S_readpipe_override(pTHX)
e3f73d4e
RGS
4009{
4010 GV **gvp;
4011 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4012 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4013 if ((gv_readpipe
4014 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4015 ||
4016 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4017 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4018 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4019 {
4020 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4021 op_append_elem(OP_LIST,
e3f73d4e
RGS
4022 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4023 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4024 }
e3f73d4e
RGS
4025}
4026
5db06880
NC
4027#ifdef PERL_MAD
4028 /*
4029 * Perl_madlex
4030 * The intent of this yylex wrapper is to minimize the changes to the
4031 * tokener when we aren't interested in collecting madprops. It remains
4032 * to be seen how successful this strategy will be...
4033 */
4034
4035int
4036Perl_madlex(pTHX)
4037{
4038 int optype;
4039 char *s = PL_bufptr;
4040
cd81e915
NC
4041 /* make sure PL_thiswhite is initialized */
4042 PL_thiswhite = 0;
4043 PL_thismad = 0;
5db06880 4044
cd81e915 4045 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4046 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4047 return S_pending_ident(aTHX);
4048
4049 /* previous token ate up our whitespace? */
cd81e915
NC
4050 if (!PL_lasttoke && PL_nextwhite) {
4051 PL_thiswhite = PL_nextwhite;
4052 PL_nextwhite = 0;
5db06880
NC
4053 }
4054
4055 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4056 PL_realtokenstart = -1;
4057 PL_thistoken = 0;
5db06880
NC
4058 optype = yylex();
4059 s = PL_bufptr;
cd81e915 4060 assert(PL_curforce < 0);
5db06880 4061
cd81e915
NC
4062 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4063 if (!PL_thistoken) {
4064 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4065 PL_thistoken = newSVpvs("");
5db06880 4066 else {
c35e046a 4067 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4068 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4069 }
4070 }
cd81e915
NC
4071 if (PL_thismad) /* install head */
4072 CURMAD('X', PL_thistoken);
5db06880
NC
4073 }
4074
4075 /* last whitespace of a sublex? */
cd81e915
NC
4076 if (optype == ')' && PL_endwhite) {
4077 CURMAD('X', PL_endwhite);
5db06880
NC
4078 }
4079
cd81e915 4080 if (!PL_thismad) {
5db06880
NC
4081
4082 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4083 if (!PL_thiswhite && !PL_endwhite && !optype) {
4084 sv_free(PL_thistoken);
4085 PL_thistoken = 0;
5db06880
NC
4086 return 0;
4087 }
4088
4089 /* put off final whitespace till peg */
4090 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4091 PL_nextwhite = PL_thiswhite;
4092 PL_thiswhite = 0;
5db06880 4093 }
cd81e915
NC
4094 else if (PL_thisopen) {
4095 CURMAD('q', PL_thisopen);
4096 if (PL_thistoken)
4097 sv_free(PL_thistoken);
4098 PL_thistoken = 0;
5db06880
NC
4099 }
4100 else {
4101 /* Store actual token text as madprop X */
cd81e915 4102 CURMAD('X', PL_thistoken);
5db06880
NC
4103 }
4104
cd81e915 4105 if (PL_thiswhite) {
5db06880 4106 /* add preceding whitespace as madprop _ */
cd81e915 4107 CURMAD('_', PL_thiswhite);
5db06880
NC
4108 }
4109
cd81e915 4110 if (PL_thisstuff) {
5db06880 4111 /* add quoted material as madprop = */
cd81e915 4112 CURMAD('=', PL_thisstuff);
5db06880
NC
4113 }
4114
cd81e915 4115 if (PL_thisclose) {
5db06880 4116 /* add terminating quote as madprop Q */
cd81e915 4117 CURMAD('Q', PL_thisclose);
5db06880
NC
4118 }
4119 }
4120
4121 /* special processing based on optype */
4122
4123 switch (optype) {
4124
4125 /* opval doesn't need a TOKEN since it can already store mp */
4126 case WORD:
4127 case METHOD:
4128 case FUNCMETH:
4129 case THING:
4130 case PMFUNC:
4131 case PRIVATEREF:
4132 case FUNC0SUB:
4133 case UNIOPSUB:
4134 case LSTOPSUB:
6154021b
RGS
4135 if (pl_yylval.opval)
4136 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4137 PL_thismad = 0;
5db06880
NC
4138 return optype;
4139
4140 /* fake EOF */
4141 case 0:
4142 optype = PEG;
cd81e915
NC
4143 if (PL_endwhite) {
4144 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4145 PL_endwhite = 0;
5db06880
NC
4146 }
4147 break;
4148
4149 case ']':
4150 case '}':
cd81e915 4151 if (PL_faketokens)
5db06880
NC
4152 break;
4153 /* remember any fake bracket that lexer is about to discard */
4154 if (PL_lex_brackets == 1 &&
4155 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4156 {
4157 s = PL_bufptr;
4158 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4159 s++;
4160 if (*s == '}') {
cd81e915
NC
4161 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4162 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4163 PL_thiswhite = 0;
5db06880
NC
4164 PL_bufptr = s - 1;
4165 break; /* don't bother looking for trailing comment */
4166 }
4167 else
4168 s = PL_bufptr;
4169 }
4170 if (optype == ']')
4171 break;
4172 /* FALLTHROUGH */
4173
4174 /* attach a trailing comment to its statement instead of next token */
4175 case ';':
cd81e915 4176 if (PL_faketokens)
5db06880
NC
4177 break;
4178 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4179 s = PL_bufptr;
4180 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4181 s++;
4182 if (*s == '\n' || *s == '#') {
4183 while (s < PL_bufend && *s != '\n')
4184 s++;
4185 if (s < PL_bufend)
4186 s++;
cd81e915
NC
4187 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4188 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4189 PL_thiswhite = 0;
5db06880
NC
4190 PL_bufptr = s;
4191 }
4192 }
4193 break;
4194
4195 /* pval */
4196 case LABEL:
4197 break;
4198
4199 /* ival */
4200 default:
4201 break;
4202
4203 }
4204
4205 /* Create new token struct. Note: opvals return early above. */
6154021b 4206 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4207 PL_thismad = 0;
5db06880
NC
4208 return optype;
4209}
4210#endif
4211
468aa647 4212STATIC char *
cc6ed77d 4213S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4214 dVAR;
7918f24d
NC
4215
4216 PERL_ARGS_ASSERT_TOKENIZE_USE;
4217
468aa647
RGS
4218 if (PL_expect != XSTATE)
4219 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4220 is_use ? "use" : "no"));
29595ff2 4221 s = SKIPSPACE1(s);
468aa647
RGS
4222 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4223 s = force_version(s, TRUE);
17c59fdf
VP
4224 if (*s == ';' || *s == '}'
4225 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4226 start_force(PL_curforce);
9ded7720 4227 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4228 force_next(WORD);
4229 }
4230 else if (*s == 'v') {
4231 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4232 s = force_version(s, FALSE);
4233 }
4234 }
4235 else {
4236 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4237 s = force_version(s, FALSE);
4238 }
6154021b 4239 pl_yylval.ival = is_use;
468aa647
RGS
4240 return s;
4241}
748a9306 4242#ifdef DEBUGGING
27da23d5 4243 static const char* const exp_name[] =
09bef843 4244 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4245 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4246 };
748a9306 4247#endif
463ee0b2 4248
361d9b55
Z
4249#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4250STATIC bool
4251S_word_takes_any_delimeter(char *p, STRLEN len)
4252{
4253 return (len == 1 && strchr("msyq", p[0])) ||
4254 (len == 2 && (
4255 (p[0] == 't' && p[1] == 'r') ||
4256 (p[0] == 'q' && strchr("qwxr", p[1]))));
4257}
4258
02aa26ce
NT
4259/*
4260 yylex
4261
4262 Works out what to call the token just pulled out of the input
4263 stream. The yacc parser takes care of taking the ops we return and
4264 stitching them into a tree.
4265
4266 Returns:
4267 PRIVATEREF
4268
4269 Structure:
4270 if read an identifier
4271 if we're in a my declaration
4272 croak if they tried to say my($foo::bar)
4273 build the ops for a my() declaration
4274 if it's an access to a my() variable
4275 are we in a sort block?
4276 croak if my($a); $a <=> $b
4277 build ops for access to a my() variable
4278 if in a dq string, and they've said @foo and we can't find @foo
4279 croak
4280 build ops for a bareword
4281 if we already built the token before, use it.
4282*/
4283
20141f0e 4284
dba4d153
JH
4285#ifdef __SC__
4286#pragma segment Perl_yylex
4287#endif
dba4d153 4288int
dba4d153 4289Perl_yylex(pTHX)
20141f0e 4290{
97aff369 4291 dVAR;
3afc138a 4292 register char *s = PL_bufptr;
378cc40b 4293 register char *d;
463ee0b2 4294 STRLEN len;
aa7440fb 4295 bool bof = FALSE;
580561a3 4296 U32 fake_eof = 0;
a687059c 4297
10edeb5d
JH
4298 /* orig_keyword, gvp, and gv are initialized here because
4299 * jump to the label just_a_word_zero can bypass their
4300 * initialization later. */
4301 I32 orig_keyword = 0;
4302 GV *gv = NULL;
4303 GV **gvp = NULL;
4304
bbf60fe6 4305 DEBUG_T( {
396482e1 4306 SV* tmp = newSVpvs("");
b6007c36
DM
4307 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4308 (IV)CopLINE(PL_curcop),
4309 lex_state_names[PL_lex_state],
4310 exp_name[PL_expect],
4311 pv_display(tmp, s, strlen(s), 0, 60));
4312 SvREFCNT_dec(tmp);
bbf60fe6 4313 } );
02aa26ce 4314 /* check if there's an identifier for us to look at */
28ac2b49 4315 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4316 return REPORT(S_pending_ident(aTHX));
bbce6d69 4317
02aa26ce
NT
4318 /* no identifier pending identification */
4319
3280af22 4320 switch (PL_lex_state) {
79072805
LW
4321#ifdef COMMENTARY
4322 case LEX_NORMAL: /* Some compilers will produce faster */
4323 case LEX_INTERPNORMAL: /* code if we comment these out. */
4324 break;
4325#endif
4326
09bef843 4327 /* when we've already built the next token, just pull it out of the queue */
79072805 4328 case LEX_KNOWNEXT:
5db06880
NC
4329#ifdef PERL_MAD
4330 PL_lasttoke--;
6154021b 4331 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4332 if (PL_madskills) {
cd81e915 4333 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4334 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4335 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4336 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4337 PL_thismad->mad_val = 0;
4338 mad_free(PL_thismad);
4339 PL_thismad = 0;
5db06880
NC
4340 }
4341 }
4342 if (!PL_lasttoke) {
4343 PL_lex_state = PL_lex_defer;
4344 PL_expect = PL_lex_expect;
4345 PL_lex_defer = LEX_NORMAL;
4346 if (!PL_nexttoke[PL_lasttoke].next_type)
4347 return yylex();
4348 }
4349#else
3280af22 4350 PL_nexttoke--;
6154021b 4351 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4352 if (!PL_nexttoke) {
4353 PL_lex_state = PL_lex_defer;
4354 PL_expect = PL_lex_expect;
4355 PL_lex_defer = LEX_NORMAL;
463ee0b2 4356 }
5db06880 4357#endif
a7aaec61
Z
4358 {
4359 I32 next_type;
5db06880 4360#ifdef PERL_MAD
a7aaec61 4361 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4362#else
a7aaec61 4363 next_type = PL_nexttype[PL_nexttoke];
5db06880 4364#endif
78cdf107
Z
4365 if (next_type & (7<<24)) {
4366 if (next_type & (1<<24)) {
4367 if (PL_lex_brackets > 100)
4368 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4369 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4370 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4371 }
4372 if (next_type & (2<<24))
4373 PL_lex_allbrackets++;
4374 if (next_type & (4<<24))
4375 PL_lex_allbrackets--;
a7aaec61
Z
4376 next_type &= 0xffff;
4377 }
4378#ifdef PERL_MAD
4379 /* FIXME - can these be merged? */
4380 return next_type;
4381#else
4382 return REPORT(next_type);
4383#endif
4384 }
79072805 4385
02aa26ce 4386 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4387 when we get here, PL_bufptr is at the \
02aa26ce 4388 */
79072805
LW
4389 case LEX_INTERPCASEMOD:
4390#ifdef DEBUGGING
3280af22 4391 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4392 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4393#endif
02aa26ce 4394 /* handle \E or end of string */
3280af22 4395 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4396 /* if at a \E */
3280af22 4397 if (PL_lex_casemods) {
f54cb97a 4398 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4399 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4400
3792a11b
NC
4401 if (PL_bufptr != PL_bufend
4402 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4403 PL_bufptr += 2;
4404 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4405#ifdef PERL_MAD
4406 if (PL_madskills)
6b29d1f5 4407 PL_thistoken = newSVpvs("\\E");
5db06880 4408#endif
a0d0e21e 4409 }
78cdf107 4410 PL_lex_allbrackets--;
bbf60fe6 4411 return REPORT(')');
79072805 4412 }
5db06880
NC
4413#ifdef PERL_MAD
4414 while (PL_bufptr != PL_bufend &&
4415 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4416 if (!PL_thiswhite)
6b29d1f5 4417 PL_thiswhite = newSVpvs("");
cd81e915 4418 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4419 PL_bufptr += 2;
4420 }
4421#else
3280af22
NIS
4422 if (PL_bufptr != PL_bufend)
4423 PL_bufptr += 2;
5db06880 4424#endif
3280af22 4425 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4426 return yylex();
79072805
LW
4427 }
4428 else {
607df283 4429 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4430 "### Saw case modifier\n"); });
3280af22 4431 s = PL_bufptr + 1;
6e909404 4432 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4433#ifdef PERL_MAD
cd81e915 4434 if (!PL_thiswhite)
6b29d1f5 4435 PL_thiswhite = newSVpvs("");
cd81e915 4436 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4437#endif
89122651 4438 PL_bufptr = s + 3;
6e909404
JH
4439 PL_lex_state = LEX_INTERPCONCAT;
4440 return yylex();
a0d0e21e 4441 }
6e909404 4442 else {
90771dc0 4443 I32 tmp;
5db06880
NC
4444 if (!PL_madskills) /* when just compiling don't need correct */
4445 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4446 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4447 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4448 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4449 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4450 PL_lex_allbrackets--;
bbf60fe6 4451 return REPORT(')');
6e909404
JH
4452 }
4453 if (PL_lex_casemods > 10)
4454 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4455 PL_lex_casestack[PL_lex_casemods++] = *s;
4456 PL_lex_casestack[PL_lex_casemods] = '\0';
4457 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4458 start_force(PL_curforce);
9ded7720 4459 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4460 force_next((2<<24)|'(');
cd81e915 4461 start_force(PL_curforce);
6e909404 4462 if (*s == 'l')
9ded7720 4463 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4464 else if (*s == 'u')
9ded7720 4465 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4466 else if (*s == 'L')
9ded7720 4467 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4468 else if (*s == 'U')
9ded7720 4469 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4470 else if (*s == 'Q')
9ded7720 4471 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4472 else
4473 Perl_croak(aTHX_ "panic: yylex");
5db06880 4474 if (PL_madskills) {
a5849ce5
NC
4475 SV* const tmpsv = newSVpvs("\\ ");
4476 /* replace the space with the character we want to escape
4477 */
4478 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4479 curmad('_', tmpsv);
4480 }
6e909404 4481 PL_bufptr = s + 1;
a0d0e21e 4482 }
79072805 4483 force_next(FUNC);
3280af22
NIS
4484 if (PL_lex_starts) {
4485 s = PL_bufptr;
4486 PL_lex_starts = 0;
5db06880
NC
4487#ifdef PERL_MAD
4488 if (PL_madskills) {
cd81e915
NC
4489 if (PL_thistoken)
4490 sv_free(PL_thistoken);
6b29d1f5 4491 PL_thistoken = newSVpvs("");
5db06880
NC
4492 }
4493#endif
131b3ad0
DM
4494 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4495 if (PL_lex_casemods == 1 && PL_lex_inpat)
4496 OPERATOR(',');
4497 else
4498 Aop(OP_CONCAT);
79072805
LW
4499 }
4500 else
cea2e8a9 4501 return yylex();
79072805
LW
4502 }
4503
55497cff 4504 case LEX_INTERPPUSH:
bbf60fe6 4505 return REPORT(sublex_push());
55497cff 4506
79072805 4507 case LEX_INTERPSTART:
3280af22 4508 if (PL_bufptr == PL_bufend)
bbf60fe6 4509 return REPORT(sublex_done());
607df283 4510 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4511 "### Interpolated variable\n"); });
3280af22
NIS
4512 PL_expect = XTERM;
4513 PL_lex_dojoin = (*PL_bufptr == '@');
4514 PL_lex_state = LEX_INTERPNORMAL;
4515 if (PL_lex_dojoin) {
cd81e915 4516 start_force(PL_curforce);
9ded7720 4517 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4518 force_next(',');
cd81e915 4519 start_force(PL_curforce);
a0d0e21e 4520 force_ident("\"", '$');
cd81e915 4521 start_force(PL_curforce);
9ded7720 4522 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4523 force_next('$');
cd81e915 4524 start_force(PL_curforce);
9ded7720 4525 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4526 force_next((2<<24)|'(');
cd81e915 4527 start_force(PL_curforce);
9ded7720 4528 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4529 force_next(FUNC);
4530 }
3280af22
NIS
4531 if (PL_lex_starts++) {
4532 s = PL_bufptr;
5db06880
NC
4533#ifdef PERL_MAD
4534 if (PL_madskills) {
cd81e915
NC
4535 if (PL_thistoken)
4536 sv_free(PL_thistoken);
6b29d1f5 4537 PL_thistoken = newSVpvs("");
5db06880
NC
4538 }
4539#endif
131b3ad0
DM
4540 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4541 if (!PL_lex_casemods && PL_lex_inpat)
4542 OPERATOR(',');
4543 else
4544 Aop(OP_CONCAT);
79072805 4545 }
cea2e8a9 4546 return yylex();
79072805
LW
4547
4548 case LEX_INTERPENDMAYBE:
3280af22
NIS
4549 if (intuit_more(PL_bufptr)) {
4550 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4551 break;
4552 }
4553 /* FALL THROUGH */
4554
4555 case LEX_INTERPEND:
3280af22
NIS
4556 if (PL_lex_dojoin) {
4557 PL_lex_dojoin = FALSE;
4558 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4559#ifdef PERL_MAD
4560 if (PL_madskills) {
cd81e915
NC
4561 if (PL_thistoken)
4562 sv_free(PL_thistoken);
6b29d1f5 4563 PL_thistoken = newSVpvs("");
5db06880
NC
4564 }
4565#endif
78cdf107 4566 PL_lex_allbrackets--;
bbf60fe6 4567 return REPORT(')');
79072805 4568 }
43a16006 4569 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4570 && SvEVALED(PL_lex_repl))
43a16006 4571 {
e9fa98b2 4572 if (PL_bufptr != PL_bufend)
cea2e8a9 4573 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4574 PL_lex_repl = NULL;
e9fa98b2 4575 }
79072805
LW
4576 /* FALLTHROUGH */
4577 case LEX_INTERPCONCAT:
4578#ifdef DEBUGGING
3280af22 4579 if (PL_lex_brackets)
cea2e8a9 4580 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4581#endif
3280af22 4582 if (PL_bufptr == PL_bufend)
bbf60fe6 4583 return REPORT(sublex_done());
79072805 4584
3280af22
NIS
4585 if (SvIVX(PL_linestr) == '\'') {
4586 SV *sv = newSVsv(PL_linestr);
4587 if (!PL_lex_inpat)
76e3520e 4588 sv = tokeq(sv);
3280af22 4589 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4590 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4591 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4592 s = PL_bufend;
79072805
LW
4593 }
4594 else {
3280af22 4595 s = scan_const(PL_bufptr);
79072805 4596 if (*s == '\\')
3280af22 4597 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4598 else
3280af22 4599 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4600 }
4601
3280af22 4602 if (s != PL_bufptr) {
cd81e915 4603 start_force(PL_curforce);
5db06880
NC
4604 if (PL_madskills) {
4605 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4606 }
6154021b 4607 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4608 PL_expect = XTERM;
79072805 4609 force_next(THING);
131b3ad0 4610 if (PL_lex_starts++) {
5db06880
NC
4611#ifdef PERL_MAD
4612 if (PL_madskills) {
cd81e915
NC
4613 if (PL_thistoken)
4614 sv_free(PL_thistoken);
6b29d1f5 4615 PL_thistoken = newSVpvs("");
5db06880
NC
4616 }
4617#endif
131b3ad0
DM
4618 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4619 if (!PL_lex_casemods && PL_lex_inpat)
4620 OPERATOR(',');
4621 else
4622 Aop(OP_CONCAT);
4623 }
79072805 4624 else {
3280af22 4625 PL_bufptr = s;
cea2e8a9 4626 return yylex();
79072805
LW
4627 }
4628 }
4629
cea2e8a9 4630 return yylex();
a0d0e21e 4631 case LEX_FORMLINE:
3280af22
NIS
4632 PL_lex_state = LEX_NORMAL;
4633 s = scan_formline(PL_bufptr);
4634 if (!PL_lex_formbrack)
a0d0e21e
LW
4635 goto rightbracket;
4636 OPERATOR(';');
79072805
LW
4637 }
4638
3280af22
NIS
4639 s = PL_bufptr;
4640 PL_oldoldbufptr = PL_oldbufptr;
4641 PL_oldbufptr = s;
463ee0b2
LW
4642
4643 retry:
5db06880 4644#ifdef PERL_MAD
cd81e915
NC
4645 if (PL_thistoken) {
4646 sv_free(PL_thistoken);
4647 PL_thistoken = 0;
5db06880 4648 }
cd81e915 4649 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4650#endif
378cc40b
LW
4651 switch (*s) {
4652 default:
7e2040f0 4653 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4654 goto keylookup;
b1fc3636
CJ
4655 {
4656 unsigned char c = *s;
4657 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4658 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4659 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4660 } else {
4661 d = PL_linestart;
4662 }
4663 *s = '\0';
4664 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4665 }
e929a76b
LW
4666 case 4:
4667 case 26:
4668 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4669 case 0:
5db06880
NC
4670#ifdef PERL_MAD
4671 if (PL_madskills)
cd81e915 4672 PL_faketokens = 0;
5db06880 4673#endif
3280af22
NIS
4674 if (!PL_rsfp) {
4675 PL_last_uni = 0;
4676 PL_last_lop = 0;
a7aaec61
Z
4677 if (PL_lex_brackets &&
4678 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4679 yyerror((const char *)
4680 (PL_lex_formbrack
4681 ? "Format not terminated"
4682 : "Missing right curly or square bracket"));
c5ee2135 4683 }
4e553d73 4684 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4685 "### Tokener got EOF\n");
5f80b19c 4686 } );
79072805 4687 TOKEN(0);
463ee0b2 4688 }
3280af22 4689 if (s++ < PL_bufend)
a687059c 4690 goto retry; /* ignore stray nulls */
3280af22
NIS
4691 PL_last_uni = 0;
4692 PL_last_lop = 0;
4693 if (!PL_in_eval && !PL_preambled) {
4694 PL_preambled = TRUE;
5db06880
NC
4695#ifdef PERL_MAD
4696 if (PL_madskills)
cd81e915 4697 PL_faketokens = 1;
5db06880 4698#endif
5ab7ff98
NC
4699 if (PL_perldb) {
4700 /* Generate a string of Perl code to load the debugger.
4701 * If PERL5DB is set, it will return the contents of that,
4702 * otherwise a compile-time require of perl5db.pl. */
4703
4704 const char * const pdb = PerlEnv_getenv("PERL5DB");
4705
4706 if (pdb) {
4707 sv_setpv(PL_linestr, pdb);
4708 sv_catpvs(PL_linestr,";");
4709 } else {
4710 SETERRNO(0,SS_NORMAL);
4711 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4712 }
4713 } else
4714 sv_setpvs(PL_linestr,"");
c62eb204
NC
4715 if (PL_preambleav) {
4716 SV **svp = AvARRAY(PL_preambleav);
4717 SV **const end = svp + AvFILLp(PL_preambleav);
4718 while(svp <= end) {
4719 sv_catsv(PL_linestr, *svp);
4720 ++svp;
396482e1 4721 sv_catpvs(PL_linestr, ";");
91b7def8 4722 }
daba3364 4723 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4724 PL_preambleav = NULL;
91b7def8 4725 }
9f639728
FR
4726 if (PL_minus_E)
4727 sv_catpvs(PL_linestr,
4728 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4729 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4730 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4731 if (PL_minus_l)
396482e1 4732 sv_catpvs(PL_linestr,"chomp;");
3280af22 4733 if (PL_minus_a) {
3280af22 4734 if (PL_minus_F) {
3792a11b
NC
4735 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4736 || *PL_splitstr == '"')
3280af22 4737 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4738 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4739 else {
c8ef6a4b
NC
4740 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4741 bytes can be used as quoting characters. :-) */
dd374669 4742 const char *splits = PL_splitstr;
91d456ae 4743 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4744 do {
4745 /* Need to \ \s */
dd374669
AL
4746 if (*splits == '\\')
4747 sv_catpvn(PL_linestr, splits, 1);
4748 sv_catpvn(PL_linestr, splits, 1);
4749 } while (*splits++);
48c4c863
NC
4750 /* This loop will embed the trailing NUL of
4751 PL_linestr as the last thing it does before
4752 terminating. */
396482e1 4753 sv_catpvs(PL_linestr, ");");
54310121 4754 }
2304df62
AD
4755 }
4756 else
396482e1 4757 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4758 }
79072805 4759 }
396482e1 4760 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4761 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4762 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4763 PL_last_lop = PL_last_uni = NULL;
65269a95 4764 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4765 update_debugger_info(PL_linestr, NULL, 0);
79072805 4766 goto retry;
a687059c 4767 }
e929a76b 4768 do {
580561a3
Z
4769 fake_eof = 0;
4770 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4771 if (0) {
7e28d3af 4772 fake_eof:
f0e67a1d
Z
4773 fake_eof = LEX_FAKE_EOF;
4774 }
4775 PL_bufptr = PL_bufend;
17cc9359 4776 CopLINE_inc(PL_curcop);
f0e67a1d 4777 if (!lex_next_chunk(fake_eof)) {
17cc9359 4778 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4779 s = PL_bufptr;
4780 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4781 }
17cc9359 4782 CopLINE_dec(PL_curcop);
5db06880 4783#ifdef PERL_MAD
f0e67a1d 4784 if (!PL_rsfp)
cd81e915 4785 PL_realtokenstart = -1;
5db06880 4786#endif
f0e67a1d 4787 s = PL_bufptr;
7aa207d6
JH
4788 /* If it looks like the start of a BOM or raw UTF-16,
4789 * check if it in fact is. */
580561a3 4790 if (bof && PL_rsfp &&
7aa207d6
JH
4791 (*s == 0 ||
4792 *(U8*)s == 0xEF ||
4793 *(U8*)s >= 0xFE ||
4794 s[1] == 0)) {
879bc93b
DM
4795 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4796 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4797#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4798 /* offset may include swallowed CR */
4799 if (!bof)
879bc93b 4800 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4801#endif
7e28d3af 4802 if (bof) {
3280af22 4803 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4804 s = swallow_bom((U8*)s);
e929a76b 4805 }
378cc40b 4806 }
737c24fc 4807 if (PL_parser->in_pod) {
a0d0e21e 4808 /* Incest with pod. */
5db06880
NC
4809#ifdef PERL_MAD
4810 if (PL_madskills)
cd81e915 4811 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4812#endif
01a57ef7 4813 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4814 sv_setpvs(PL_linestr, "");
3280af22
NIS
4815 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4816 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4817 PL_last_lop = PL_last_uni = NULL;
737c24fc 4818 PL_parser->in_pod = 0;
a0d0e21e 4819 }
4e553d73 4820 }
85613cab
Z
4821 if (PL_rsfp)
4822 incline(s);
737c24fc 4823 } while (PL_parser->in_pod);
3280af22 4824 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4826 PL_last_lop = PL_last_uni = NULL;
57843af0 4827 if (CopLINE(PL_curcop) == 1) {
3280af22 4828 while (s < PL_bufend && isSPACE(*s))
79072805 4829 s++;
a0d0e21e 4830 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4831 s++;
5db06880
NC
4832#ifdef PERL_MAD
4833 if (PL_madskills)
cd81e915 4834 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4835#endif
bd61b366 4836 d = NULL;
3280af22 4837 if (!PL_in_eval) {
44a8e56a 4838 if (*s == '#' && *(s+1) == '!')
4839 d = s + 2;
4840#ifdef ALTERNATE_SHEBANG
4841 else {
bfed75c6 4842 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4843 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4844 d = s + (sizeof(as) - 1);
4845 }
4846#endif /* ALTERNATE_SHEBANG */
4847 }
4848 if (d) {
b8378b72 4849 char *ipath;
774d564b 4850 char *ipathend;
b8378b72 4851
774d564b 4852 while (isSPACE(*d))
b8378b72
CS
4853 d++;
4854 ipath = d;
774d564b 4855 while (*d && !isSPACE(*d))
4856 d++;
4857 ipathend = d;
4858
4859#ifdef ARG_ZERO_IS_SCRIPT
4860 if (ipathend > ipath) {
4861 /*
4862 * HP-UX (at least) sets argv[0] to the script name,
4863 * which makes $^X incorrect. And Digital UNIX and Linux,
4864 * at least, set argv[0] to the basename of the Perl
4865 * interpreter. So, having found "#!", we'll set it right.
4866 */
fafc274c
NC
4867 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4868 SVt_PV)); /* $^X */
774d564b 4869 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4870 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4871 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4872 SvSETMAGIC(x);
4873 }
556c1dec
JH
4874 else {
4875 STRLEN blen;
4876 STRLEN llen;
cfd0369c 4877 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4878 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4879 if (llen < blen) {
4880 bstart += blen - llen;
4881 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4882 sv_setpvn(x, ipath, ipathend - ipath);
4883 SvSETMAGIC(x);
4884 }
4885 }
4886 }
774d564b 4887 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4888 }
774d564b 4889#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4890
4891 /*
4892 * Look for options.
4893 */
748a9306 4894 d = instr(s,"perl -");
84e30d1a 4895 if (!d) {
748a9306 4896 d = instr(s,"perl");
84e30d1a
GS
4897#if defined(DOSISH)
4898 /* avoid getting into infinite loops when shebang
4899 * line contains "Perl" rather than "perl" */
4900 if (!d) {
4901 for (d = ipathend-4; d >= ipath; --d) {
4902 if ((*d == 'p' || *d == 'P')
4903 && !ibcmp(d, "perl", 4))
4904 {
4905 break;
4906 }
4907 }
4908 if (d < ipath)
bd61b366 4909 d = NULL;
84e30d1a
GS
4910 }
4911#endif
4912 }
44a8e56a 4913#ifdef ALTERNATE_SHEBANG
4914 /*
4915 * If the ALTERNATE_SHEBANG on this system starts with a
4916 * character that can be part of a Perl expression, then if
4917 * we see it but not "perl", we're probably looking at the
4918 * start of Perl code, not a request to hand off to some
4919 * other interpreter. Similarly, if "perl" is there, but
4920 * not in the first 'word' of the line, we assume the line
4921 * contains the start of the Perl program.
44a8e56a 4922 */
4923 if (d && *s != '#') {
f54cb97a 4924 const char *c = ipath;
44a8e56a 4925 while (*c && !strchr("; \t\r\n\f\v#", *c))
4926 c++;
4927 if (c < d)
bd61b366 4928 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4929 else
4930 *s = '#'; /* Don't try to parse shebang line */
4931 }
774d564b 4932#endif /* ALTERNATE_SHEBANG */
748a9306 4933 if (!d &&
44a8e56a 4934 *s == '#' &&
774d564b 4935 ipathend > ipath &&
3280af22 4936 !PL_minus_c &&
748a9306 4937 !instr(s,"indir") &&
3280af22 4938 instr(PL_origargv[0],"perl"))
748a9306 4939 {
27da23d5 4940 dVAR;
9f68db38 4941 char **newargv;
9f68db38 4942
774d564b 4943 *ipathend = '\0';
4944 s = ipathend + 1;
3280af22 4945 while (s < PL_bufend && isSPACE(*s))
9f68db38 4946 s++;
3280af22 4947 if (s < PL_bufend) {
d85f917e 4948 Newx(newargv,PL_origargc+3,char*);
9f68db38 4949 newargv[1] = s;
3280af22 4950 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4951 s++;
4952 *s = '\0';
3280af22 4953 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4954 }
4955 else
3280af22 4956 newargv = PL_origargv;
774d564b 4957 newargv[0] = ipath;
b35112e7 4958 PERL_FPU_PRE_EXEC
b4748376 4959 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4960 PERL_FPU_POST_EXEC
cea2e8a9 4961 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4962 }
748a9306 4963 if (d) {
c35e046a
AL
4964 while (*d && !isSPACE(*d))
4965 d++;
4966 while (SPACE_OR_TAB(*d))
4967 d++;
748a9306
LW
4968
4969 if (*d++ == '-') {
f54cb97a 4970 const bool switches_done = PL_doswitches;
fb993905
GA
4971 const U32 oldpdb = PL_perldb;
4972 const bool oldn = PL_minus_n;
4973 const bool oldp = PL_minus_p;
c7030b81 4974 const char *d1 = d;
fb993905 4975
8cc95fdb 4976 do {
4ba71d51
FC
4977 bool baduni = FALSE;
4978 if (*d1 == 'C') {
bd0ab00d
NC
4979 const char *d2 = d1 + 1;
4980 if (parse_unicode_opts((const char **)&d2)
4981 != PL_unicode)
4982 baduni = TRUE;
4ba71d51
FC
4983 }
4984 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4985 const char * const m = d1;
4986 while (*d1 && !isSPACE(*d1))
4987 d1++;
cea2e8a9 4988 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4989 (int)(d1 - m), m);
8cc95fdb 4990 }
c7030b81
NC
4991 d1 = moreswitches(d1);
4992 } while (d1);
f0b2cf55
YST
4993 if (PL_doswitches && !switches_done) {
4994 int argc = PL_origargc;
4995 char **argv = PL_origargv;
4996 do {
4997 argc--,argv++;
4998 } while (argc && argv[0][0] == '-' && argv[0][1]);
4999 init_argv_symbols(argc,argv);
5000 }
65269a95 5001 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5002 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5003 /* if we have already added "LINE: while (<>) {",
5004 we must not do it again */
748a9306 5005 {
76f68e9b 5006 sv_setpvs(PL_linestr, "");
3280af22
NIS
5007 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5008 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5009 PL_last_lop = PL_last_uni = NULL;
3280af22 5010 PL_preambled = FALSE;
65269a95 5011 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5012 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5013 goto retry;
5014 }
a0d0e21e 5015 }
79072805 5016 }
9f68db38 5017 }
79072805 5018 }
3280af22
NIS
5019 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5020 PL_bufptr = s;
5021 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5022 return yylex();
ae986130 5023 }
378cc40b 5024 goto retry;
4fdae800 5025 case '\r':
6a27c188 5026#ifdef PERL_STRICT_CR
cea2e8a9 5027 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5028 Perl_croak(aTHX_
cc507455 5029 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5030#endif
4fdae800 5031 case ' ': case '\t': case '\f': case 013:
5db06880 5032#ifdef PERL_MAD
cd81e915 5033 PL_realtokenstart = -1;
ac372eb8
RD
5034 if (!PL_thiswhite)
5035 PL_thiswhite = newSVpvs("");
5036 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5037#endif
ac372eb8 5038 s++;
378cc40b 5039 goto retry;
378cc40b 5040 case '#':
e929a76b 5041 case '\n':
5db06880 5042#ifdef PERL_MAD
cd81e915 5043 PL_realtokenstart = -1;
5db06880 5044 if (PL_madskills)
cd81e915 5045 PL_faketokens = 0;
5db06880 5046#endif
3280af22 5047 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
5048 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5049 /* handle eval qq[#line 1 "foo"\n ...] */
5050 CopLINE_dec(PL_curcop);
5051 incline(s);
5052 }
5db06880
NC
5053 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5054 s = SKIPSPACE0(s);
5055 if (!PL_in_eval || PL_rsfp)
5056 incline(s);
5057 }
5058 else {
5059 d = s;
5060 while (d < PL_bufend && *d != '\n')
5061 d++;
5062 if (d < PL_bufend)
5063 d++;
5064 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5065 Perl_croak(aTHX_ "panic: input overflow");
5066#ifdef PERL_MAD
5067 if (PL_madskills)
cd81e915 5068 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5069#endif
5070 s = d;
5071 incline(s);
5072 }
3280af22
NIS
5073 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5074 PL_bufptr = s;
5075 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5076 return yylex();
a687059c 5077 }
378cc40b 5078 }
a687059c 5079 else {
5db06880
NC
5080#ifdef PERL_MAD
5081 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5082 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5083 PL_faketokens = 0;
5db06880
NC
5084 s = SKIPSPACE0(s);
5085 TOKEN(PEG); /* make sure any #! line is accessible */
5086 }
5087 s = SKIPSPACE0(s);
5088 }
5089 else {
5090/* if (PL_madskills && PL_lex_formbrack) { */
5091 d = s;
5092 while (d < PL_bufend && *d != '\n')
5093 d++;
5094 if (d < PL_bufend)
5095 d++;
5096 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5097 Perl_croak(aTHX_ "panic: input overflow");
5098 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5099 if (!PL_thiswhite)
6b29d1f5 5100 PL_thiswhite = newSVpvs("");
5db06880 5101 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5102 sv_setpvs(PL_thiswhite, "");
cd81e915 5103 PL_faketokens = 0;
5db06880 5104 }
cd81e915 5105 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5106 }
5107 s = d;
5108/* }
5109 *s = '\0';
5110 PL_bufend = s; */
5111 }
5112#else
378cc40b 5113 *s = '\0';
3280af22 5114 PL_bufend = s;
5db06880 5115#endif
a687059c 5116 }
378cc40b
LW
5117 goto retry;
5118 case '-':
79072805 5119 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5120 I32 ftst = 0;
90771dc0 5121 char tmp;
e5edeb50 5122
378cc40b 5123 s++;
3280af22 5124 PL_bufptr = s;
748a9306
LW
5125 tmp = *s++;
5126
bf4acbe4 5127 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5128 s++;
5129
5130 if (strnEQ(s,"=>",2)) {
3280af22 5131 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5132 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5133 OPERATOR('-'); /* unary minus */
5134 }
3280af22 5135 PL_last_uni = PL_oldbufptr;
748a9306 5136 switch (tmp) {
e5edeb50
JH
5137 case 'r': ftst = OP_FTEREAD; break;
5138 case 'w': ftst = OP_FTEWRITE; break;
5139 case 'x': ftst = OP_FTEEXEC; break;
5140 case 'o': ftst = OP_FTEOWNED; break;
5141 case 'R': ftst = OP_FTRREAD; break;
5142 case 'W': ftst = OP_FTRWRITE; break;
5143 case 'X': ftst = OP_FTREXEC; break;
5144 case 'O': ftst = OP_FTROWNED; break;
5145 case 'e': ftst = OP_FTIS; break;
5146 case 'z': ftst = OP_FTZERO; break;
5147 case 's': ftst = OP_FTSIZE; break;
5148 case 'f': ftst = OP_FTFILE; break;
5149 case 'd': ftst = OP_FTDIR; break;
5150 case 'l': ftst = OP_FTLINK; break;
5151 case 'p': ftst = OP_FTPIPE; break;
5152 case 'S': ftst = OP_FTSOCK; break;
5153 case 'u': ftst = OP_FTSUID; break;
5154 case 'g': ftst = OP_FTSGID; break;
5155 case 'k': ftst = OP_FTSVTX; break;
5156 case 'b': ftst = OP_FTBLK; break;
5157 case 'c': ftst = OP_FTCHR; break;
5158 case 't': ftst = OP_FTTTY; break;
5159 case 'T': ftst = OP_FTTEXT; break;
5160 case 'B': ftst = OP_FTBINARY; break;
5161 case 'M': case 'A': case 'C':
fafc274c 5162 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5163 switch (tmp) {
5164 case 'M': ftst = OP_FTMTIME; break;
5165 case 'A': ftst = OP_FTATIME; break;
5166 case 'C': ftst = OP_FTCTIME; break;
5167 default: break;
5168 }
5169 break;
378cc40b 5170 default:
378cc40b
LW
5171 break;
5172 }
e5edeb50 5173 if (ftst) {
eb160463 5174 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5175 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5176 "### Saw file test %c\n", (int)tmp);
5f80b19c 5177 } );
e5edeb50
JH
5178 FTST(ftst);
5179 }
5180 else {
5181 /* Assume it was a minus followed by a one-letter named
5182 * subroutine call (or a -bareword), then. */
95c31fe3 5183 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5184 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5185 (int) tmp);
5f80b19c 5186 } );
3cf7b4c4 5187 s = --PL_bufptr;
e5edeb50 5188 }
378cc40b 5189 }
90771dc0
NC
5190 {
5191 const char tmp = *s++;
5192 if (*s == tmp) {
5193 s++;
5194 if (PL_expect == XOPERATOR)
5195 TERM(POSTDEC);
5196 else
5197 OPERATOR(PREDEC);
5198 }
5199 else if (*s == '>') {
5200 s++;
29595ff2 5201 s = SKIPSPACE1(s);
90771dc0
NC
5202 if (isIDFIRST_lazy_if(s,UTF)) {
5203 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5204 TOKEN(ARROW);
5205 }
5206 else if (*s == '$')
5207 OPERATOR(ARROW);
5208 else
5209 TERM(ARROW);
5210 }
78cdf107
Z
5211 if (PL_expect == XOPERATOR) {
5212 if (*s == '=' && !PL_lex_allbrackets &&
5213 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5214 s--;
5215 TOKEN(0);
5216 }
90771dc0 5217 Aop(OP_SUBTRACT);
78cdf107 5218 }
90771dc0
NC
5219 else {
5220 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5221 check_uni();
5222 OPERATOR('-'); /* unary minus */
79072805 5223 }
2f3197b3 5224 }
79072805 5225
378cc40b 5226 case '+':
90771dc0
NC
5227 {
5228 const char tmp = *s++;
5229 if (*s == tmp) {
5230 s++;
5231 if (PL_expect == XOPERATOR)
5232 TERM(POSTINC);
5233 else
5234 OPERATOR(PREINC);
5235 }
78cdf107
Z
5236 if (PL_expect == XOPERATOR) {
5237 if (*s == '=' && !PL_lex_allbrackets &&
5238 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5239 s--;
5240 TOKEN(0);
5241 }
90771dc0 5242 Aop(OP_ADD);
78cdf107 5243 }
90771dc0
NC
5244 else {
5245 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5246 check_uni();
5247 OPERATOR('+');
5248 }
2f3197b3 5249 }
a687059c 5250
378cc40b 5251 case '*':
3280af22
NIS
5252 if (PL_expect != XOPERATOR) {
5253 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5254 PL_expect = XOPERATOR;
5255 force_ident(PL_tokenbuf, '*');
5256 if (!*PL_tokenbuf)
a0d0e21e 5257 PREREF('*');
79072805 5258 TERM('*');
a687059c 5259 }
79072805
LW
5260 s++;
5261 if (*s == '*') {
a687059c 5262 s++;
78cdf107
Z
5263 if (*s == '=' && !PL_lex_allbrackets &&
5264 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5265 s -= 2;
5266 TOKEN(0);
5267 }
79072805 5268 PWop(OP_POW);
a687059c 5269 }
78cdf107
Z
5270 if (*s == '=' && !PL_lex_allbrackets &&
5271 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5272 s--;
5273 TOKEN(0);
5274 }
79072805
LW
5275 Mop(OP_MULTIPLY);
5276
378cc40b 5277 case '%':
3280af22 5278 if (PL_expect == XOPERATOR) {
78cdf107
Z
5279 if (s[1] == '=' && !PL_lex_allbrackets &&
5280 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5281 TOKEN(0);
bbce6d69 5282 ++s;
5283 Mop(OP_MODULO);
a687059c 5284 }
3280af22 5285 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5286 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5287 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5288 if (!PL_tokenbuf[1]) {
bbce6d69 5289 PREREF('%');
a687059c 5290 }
3280af22 5291 PL_pending_ident = '%';
bbce6d69 5292 TERM('%');
a687059c 5293
378cc40b 5294 case '^':
78cdf107
Z
5295 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5296 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5297 TOKEN(0);
79072805 5298 s++;
a0d0e21e 5299 BOop(OP_BIT_XOR);
79072805 5300 case '[':
a7aaec61
Z
5301 if (PL_lex_brackets > 100)
5302 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5303 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5304 PL_lex_allbrackets++;
df3467db
IG
5305 {
5306 const char tmp = *s++;
5307 OPERATOR(tmp);
5308 }
378cc40b 5309 case '~':
0d863452 5310 if (s[1] == '~'
3e7dd34d 5311 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5312 {
78cdf107
Z
5313 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5314 TOKEN(0);
0d863452
RH
5315 s += 2;
5316 Eop(OP_SMARTMATCH);
5317 }
78cdf107
Z
5318 s++;
5319 OPERATOR('~');
378cc40b 5320 case ',':
78cdf107
Z
5321 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5322 TOKEN(0);
5323 s++;
5324 OPERATOR(',');
a0d0e21e
LW
5325 case ':':
5326 if (s[1] == ':') {
5327 len = 0;
0bfa2a8a 5328 goto just_a_word_zero_gv;
a0d0e21e
LW
5329 }
5330 s++;
09bef843
SB
5331 switch (PL_expect) {
5332 OP *attrs;
5db06880
NC
5333#ifdef PERL_MAD
5334 I32 stuffstart;
5335#endif
09bef843
SB
5336 case XOPERATOR:
5337 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5338 break;
5339 PL_bufptr = s; /* update in case we back off */
d83f38d8 5340 if (*s == '=') {
2dc78664
NC
5341 Perl_croak(aTHX_
5342 "Use of := for an empty attribute list is not allowed");
d83f38d8 5343 }
09bef843
SB
5344 goto grabattrs;
5345 case XATTRBLOCK:
5346 PL_expect = XBLOCK;
5347 goto grabattrs;
5348 case XATTRTERM:
5349 PL_expect = XTERMBLOCK;
5350 grabattrs:
5db06880
NC
5351#ifdef PERL_MAD
5352 stuffstart = s - SvPVX(PL_linestr) - 1;
5353#endif
29595ff2 5354 s = PEEKSPACE(s);
5f66b61c 5355 attrs = NULL;
7e2040f0 5356 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5357 I32 tmp;
5cc237b8 5358 SV *sv;
09bef843 5359 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5360 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5361 if (tmp < 0) tmp = -tmp;
5362 switch (tmp) {
5363 case KEY_or:
5364 case KEY_and:
5365 case KEY_for:
11baf631 5366 case KEY_foreach:
f9829d6b
GS
5367 case KEY_unless:
5368 case KEY_if:
5369 case KEY_while:
5370 case KEY_until:
5371 goto got_attrs;
5372 default:
5373 break;
5374 }
5375 }
5cc237b8 5376 sv = newSVpvn(s, len);
09bef843
SB
5377 if (*d == '(') {
5378 d = scan_str(d,TRUE,TRUE);
5379 if (!d) {
09bef843
SB
5380 /* MUST advance bufptr here to avoid bogus
5381 "at end of line" context messages from yyerror().
5382 */
5383 PL_bufptr = s + len;
5384 yyerror("Unterminated attribute parameter in attribute list");
5385 if (attrs)
5386 op_free(attrs);
5cc237b8 5387 sv_free(sv);
bbf60fe6 5388 return REPORT(0); /* EOF indicator */
09bef843
SB
5389 }
5390 }
5391 if (PL_lex_stuff) {
09bef843 5392 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5393 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5394 newSVOP(OP_CONST, 0, sv));
5395 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5396 PL_lex_stuff = NULL;
09bef843
SB
5397 }
5398 else {
5cc237b8
BS
5399 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5400 sv_free(sv);
1108974d 5401 if (PL_in_my == KEY_our) {
df9a6019 5402 deprecate(":unique");
1108974d 5403 }
bfed75c6 5404 else
371fce9b
DM
5405 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5406 }
5407
d3cea301
SB
5408 /* NOTE: any CV attrs applied here need to be part of
5409 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5410 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5411 sv_free(sv);
78f9721b 5412 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5413 }
5414 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5415 sv_free(sv);
8e5dadda 5416 deprecate(":locked");
5cc237b8
BS
5417 }
5418 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5419 sv_free(sv);
78f9721b 5420 CvMETHOD_on(PL_compcv);
5cc237b8 5421 }
78f9721b
SM
5422 /* After we've set the flags, it could be argued that
5423 we don't need to do the attributes.pm-based setting
5424 process, and shouldn't bother appending recognized
d3cea301
SB
5425 flags. To experiment with that, uncomment the
5426 following "else". (Note that's already been
5427 uncommented. That keeps the above-applied built-in
5428 attributes from being intercepted (and possibly
5429 rejected) by a package's attribute routines, but is
5430 justified by the performance win for the common case
5431 of applying only built-in attributes.) */
0256094b 5432 else
2fcb4757 5433 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5434 newSVOP(OP_CONST, 0,
5cc237b8 5435 sv));
09bef843 5436 }
29595ff2 5437 s = PEEKSPACE(d);
0120eecf 5438 if (*s == ':' && s[1] != ':')
29595ff2 5439 s = PEEKSPACE(s+1);
0120eecf
GS
5440 else if (s == d)
5441 break; /* require real whitespace or :'s */
29595ff2 5442 /* XXX losing whitespace on sequential attributes here */
09bef843 5443 }
90771dc0
NC
5444 {
5445 const char tmp
5446 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5447 if (*s != ';' && *s != '}' && *s != tmp
5448 && (tmp != '=' || *s != ')')) {
5449 const char q = ((*s == '\'') ? '"' : '\'');
5450 /* If here for an expression, and parsed no attrs, back
5451 off. */
5452 if (tmp == '=' && !attrs) {
5453 s = PL_bufptr;
5454 break;
5455 }
5456 /* MUST advance bufptr here to avoid bogus "at end of line"
5457 context messages from yyerror().
5458 */
5459 PL_bufptr = s;
10edeb5d
JH
5460 yyerror( (const char *)
5461 (*s
5462 ? Perl_form(aTHX_ "Invalid separator character "
5463 "%c%c%c in attribute list", q, *s, q)
5464 : "Unterminated attribute list" ) );
90771dc0
NC
5465 if (attrs)
5466 op_free(attrs);
5467 OPERATOR(':');
09bef843 5468 }
09bef843 5469 }
f9829d6b 5470 got_attrs:
09bef843 5471 if (attrs) {
cd81e915 5472 start_force(PL_curforce);
9ded7720 5473 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5474 CURMAD('_', PL_nextwhite);
89122651 5475 force_next(THING);
5db06880
NC
5476 }
5477#ifdef PERL_MAD
5478 if (PL_madskills) {
cd81e915 5479 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5480 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5481 }
5db06880 5482#endif
09bef843
SB
5483 TOKEN(COLONATTR);
5484 }
78cdf107
Z
5485 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5486 s--;
5487 TOKEN(0);
5488 }
5489 PL_lex_allbrackets--;
a0d0e21e 5490 OPERATOR(':');
8990e307
LW
5491 case '(':
5492 s++;
3280af22
NIS
5493 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5494 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5495 else
3280af22 5496 PL_expect = XTERM;
29595ff2 5497 s = SKIPSPACE1(s);
78cdf107 5498 PL_lex_allbrackets++;
a0d0e21e 5499 TOKEN('(');
378cc40b 5500 case ';':
78cdf107
Z
5501 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5502 TOKEN(0);
f4dd75d9 5503 CLINE;
78cdf107
Z
5504 s++;
5505 OPERATOR(';');
378cc40b 5506 case ')':
78cdf107
Z
5507 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5508 TOKEN(0);
5509 s++;
5510 PL_lex_allbrackets--;
5511 s = SKIPSPACE1(s);
5512 if (*s == '{')
5513 PREBLOCK(')');
5514 TERM(')');
79072805 5515 case ']':
a7aaec61
Z
5516 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5517 TOKEN(0);
79072805 5518 s++;
3280af22 5519 if (PL_lex_brackets <= 0)
d98d5fff 5520 yyerror("Unmatched right square bracket");
463ee0b2 5521 else
3280af22 5522 --PL_lex_brackets;
78cdf107 5523 PL_lex_allbrackets--;
3280af22
NIS
5524 if (PL_lex_state == LEX_INTERPNORMAL) {
5525 if (PL_lex_brackets == 0) {
02255c60
FC
5526 if (*s == '-' && s[1] == '>')
5527 PL_lex_state = LEX_INTERPENDMAYBE;
5528 else if (*s != '[' && *s != '{')
3280af22 5529 PL_lex_state = LEX_INTERPEND;
79072805
LW
5530 }
5531 }
4633a7c4 5532 TERM(']');
79072805
LW
5533 case '{':
5534 leftbracket:
79072805 5535 s++;
3280af22 5536 if (PL_lex_brackets > 100) {
8edd5f42 5537 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5538 }
3280af22 5539 switch (PL_expect) {
a0d0e21e 5540 case XTERM:
3280af22 5541 if (PL_lex_formbrack) {
a0d0e21e
LW
5542 s--;
5543 PRETERMBLOCK(DO);
5544 }
3280af22
NIS
5545 if (PL_oldoldbufptr == PL_last_lop)
5546 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5547 else
3280af22 5548 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5549 PL_lex_allbrackets++;
79072805 5550 OPERATOR(HASHBRACK);
a0d0e21e 5551 case XOPERATOR:
bf4acbe4 5552 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5553 s++;
44a8e56a 5554 d = s;
3280af22
NIS
5555 PL_tokenbuf[0] = '\0';
5556 if (d < PL_bufend && *d == '-') {
5557 PL_tokenbuf[0] = '-';
44a8e56a 5558 d++;
bf4acbe4 5559 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5560 d++;
5561 }
7e2040f0 5562 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5563 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5564 FALSE, &len);
bf4acbe4 5565 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5566 d++;
5567 if (*d == '}') {
f54cb97a 5568 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5569 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5570 if (minus)
5571 force_next('-');
748a9306
LW
5572 }
5573 }
5574 /* FALL THROUGH */
09bef843 5575 case XATTRBLOCK:
748a9306 5576 case XBLOCK:
3280af22 5577 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5578 PL_lex_allbrackets++;
3280af22 5579 PL_expect = XSTATE;
a0d0e21e 5580 break;
09bef843 5581 case XATTRTERM:
a0d0e21e 5582 case XTERMBLOCK:
3280af22 5583 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5584 PL_lex_allbrackets++;
3280af22 5585 PL_expect = XSTATE;
a0d0e21e
LW
5586 break;
5587 default: {
f54cb97a 5588 const char *t;
3280af22
NIS
5589 if (PL_oldoldbufptr == PL_last_lop)
5590 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5591 else
3280af22 5592 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5593 PL_lex_allbrackets++;
29595ff2 5594 s = SKIPSPACE1(s);
8452ff4b
SB
5595 if (*s == '}') {
5596 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5597 PL_expect = XTERM;
5598 /* This hack is to get the ${} in the message. */
5599 PL_bufptr = s+1;
5600 yyerror("syntax error");
5601 break;
5602 }
a0d0e21e 5603 OPERATOR(HASHBRACK);
8452ff4b 5604 }
b8a4b1be
GS
5605 /* This hack serves to disambiguate a pair of curlies
5606 * as being a block or an anon hash. Normally, expectation
5607 * determines that, but in cases where we're not in a
5608 * position to expect anything in particular (like inside
5609 * eval"") we have to resolve the ambiguity. This code
5610 * covers the case where the first term in the curlies is a
5611 * quoted string. Most other cases need to be explicitly
a0288114 5612 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5613 * curly in order to force resolution as an anon hash.
5614 *
5615 * XXX should probably propagate the outer expectation
5616 * into eval"" to rely less on this hack, but that could
5617 * potentially break current behavior of eval"".
5618 * GSAR 97-07-21
5619 */
5620 t = s;
5621 if (*s == '\'' || *s == '"' || *s == '`') {
5622 /* common case: get past first string, handling escapes */
3280af22 5623 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5624 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5625 t++;
5626 t++;
a0d0e21e 5627 }
b8a4b1be 5628 else if (*s == 'q') {
3280af22 5629 if (++t < PL_bufend
b8a4b1be 5630 && (!isALNUM(*t)
3280af22 5631 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5632 && !isALNUM(*t))))
5633 {
abc667d1 5634 /* skip q//-like construct */
f54cb97a 5635 const char *tmps;
b8a4b1be
GS
5636 char open, close, term;
5637 I32 brackets = 1;
5638
3280af22 5639 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5640 t++;
abc667d1
DM
5641 /* check for q => */
5642 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5643 OPERATOR(HASHBRACK);
5644 }
b8a4b1be
GS
5645 term = *t;
5646 open = term;
5647 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5648 term = tmps[5];
5649 close = term;
5650 if (open == close)
3280af22
NIS
5651 for (t++; t < PL_bufend; t++) {
5652 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5653 t++;
6d07e5e9 5654 else if (*t == open)
b8a4b1be
GS
5655 break;
5656 }
abc667d1 5657 else {
3280af22
NIS
5658 for (t++; t < PL_bufend; t++) {
5659 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5660 t++;
6d07e5e9 5661 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5662 break;
5663 else if (*t == open)
5664 brackets++;
5665 }
abc667d1
DM
5666 }
5667 t++;
b8a4b1be 5668 }
abc667d1
DM
5669 else
5670 /* skip plain q word */
5671 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5672 t += UTF8SKIP(t);
a0d0e21e 5673 }
7e2040f0 5674 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5675 t += UTF8SKIP(t);
7e2040f0 5676 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5677 t += UTF8SKIP(t);
a0d0e21e 5678 }
3280af22 5679 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5680 t++;
b8a4b1be
GS
5681 /* if comma follows first term, call it an anon hash */
5682 /* XXX it could be a comma expression with loop modifiers */
3280af22 5683 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5684 || (*t == '=' && t[1] == '>')))
a0d0e21e 5685 OPERATOR(HASHBRACK);
3280af22 5686 if (PL_expect == XREF)
4e4e412b 5687 PL_expect = XTERM;
a0d0e21e 5688 else {
3280af22
NIS
5689 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5690 PL_expect = XSTATE;
a0d0e21e 5691 }
8990e307 5692 }
a0d0e21e 5693 break;
463ee0b2 5694 }
6154021b 5695 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5696 if (isSPACE(*s) || *s == '#')
3280af22 5697 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5698 TOKEN('{');
378cc40b 5699 case '}':
a7aaec61
Z
5700 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5701 TOKEN(0);
79072805
LW
5702 rightbracket:
5703 s++;
3280af22 5704 if (PL_lex_brackets <= 0)
d98d5fff 5705 yyerror("Unmatched right curly bracket");
463ee0b2 5706 else
3280af22 5707 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5708 PL_lex_allbrackets--;
c2e66d9e 5709 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5710 PL_lex_formbrack = 0;
5711 if (PL_lex_state == LEX_INTERPNORMAL) {
5712 if (PL_lex_brackets == 0) {
9059aa12
LW
5713 if (PL_expect & XFAKEBRACK) {
5714 PL_expect &= XENUMMASK;
3280af22
NIS
5715 PL_lex_state = LEX_INTERPEND;
5716 PL_bufptr = s;
5db06880
NC
5717#if 0
5718 if (PL_madskills) {
cd81e915 5719 if (!PL_thiswhite)
6b29d1f5 5720 PL_thiswhite = newSVpvs("");
76f68e9b 5721 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5722 }
5723#endif
cea2e8a9 5724 return yylex(); /* ignore fake brackets */
79072805 5725 }
fa83b5b6 5726 if (*s == '-' && s[1] == '>')
3280af22 5727 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5728 else if (*s != '[' && *s != '{')
3280af22 5729 PL_lex_state = LEX_INTERPEND;
79072805
LW
5730 }
5731 }
9059aa12
LW
5732 if (PL_expect & XFAKEBRACK) {
5733 PL_expect &= XENUMMASK;
3280af22 5734 PL_bufptr = s;
cea2e8a9 5735 return yylex(); /* ignore fake brackets */
748a9306 5736 }
cd81e915 5737 start_force(PL_curforce);
5db06880
NC
5738 if (PL_madskills) {
5739 curmad('X', newSVpvn(s-1,1));
cd81e915 5740 CURMAD('_', PL_thiswhite);
5db06880 5741 }
79072805 5742 force_next('}');
5db06880 5743#ifdef PERL_MAD
cd81e915 5744 if (!PL_thistoken)
6b29d1f5 5745 PL_thistoken = newSVpvs("");
5db06880 5746#endif
79072805 5747 TOKEN(';');
378cc40b
LW
5748 case '&':
5749 s++;
78cdf107
Z
5750 if (*s++ == '&') {
5751 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5752 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5753 s -= 2;
5754 TOKEN(0);
5755 }
a0d0e21e 5756 AOPERATOR(ANDAND);
78cdf107 5757 }
378cc40b 5758 s--;
3280af22 5759 if (PL_expect == XOPERATOR) {
041457d9
DM
5760 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5761 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5762 {
57843af0 5763 CopLINE_dec(PL_curcop);
f1f66076 5764 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5765 CopLINE_inc(PL_curcop);
463ee0b2 5766 }
78cdf107
Z
5767 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5768 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5769 s--;
5770 TOKEN(0);
5771 }
79072805 5772 BAop(OP_BIT_AND);
463ee0b2 5773 }
79072805 5774
3280af22
NIS
5775 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5776 if (*PL_tokenbuf) {
5777 PL_expect = XOPERATOR;
5778 force_ident(PL_tokenbuf, '&');
463ee0b2 5779 }
79072805
LW
5780 else
5781 PREREF('&');
6154021b 5782 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5783 TERM('&');
5784
378cc40b
LW
5785 case '|':
5786 s++;
78cdf107
Z
5787 if (*s++ == '|') {
5788 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5789 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5790 s -= 2;
5791 TOKEN(0);
5792 }
a0d0e21e 5793 AOPERATOR(OROR);
78cdf107 5794 }
378cc40b 5795 s--;
78cdf107
Z
5796 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5797 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5798 s--;
5799 TOKEN(0);
5800 }
79072805 5801 BOop(OP_BIT_OR);
378cc40b
LW
5802 case '=':
5803 s++;
748a9306 5804 {
90771dc0 5805 const char tmp = *s++;
78cdf107
Z
5806 if (tmp == '=') {
5807 if (!PL_lex_allbrackets &&
5808 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5809 s -= 2;
5810 TOKEN(0);
5811 }
90771dc0 5812 Eop(OP_EQ);
78cdf107
Z
5813 }
5814 if (tmp == '>') {
5815 if (!PL_lex_allbrackets &&
5816 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5817 s -= 2;
5818 TOKEN(0);
5819 }
90771dc0 5820 OPERATOR(',');
78cdf107 5821 }
90771dc0
NC
5822 if (tmp == '~')
5823 PMop(OP_MATCH);
5824 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5825 && strchr("+-*/%.^&|<",tmp))
5826 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5827 "Reversed %c= operator",(int)tmp);
5828 s--;
5829 if (PL_expect == XSTATE && isALPHA(tmp) &&
5830 (s == PL_linestart+1 || s[-2] == '\n') )
5831 {
5832 if (PL_in_eval && !PL_rsfp) {
5833 d = PL_bufend;
5834 while (s < d) {
5835 if (*s++ == '\n') {
5836 incline(s);
5837 if (strnEQ(s,"=cut",4)) {
5838 s = strchr(s,'\n');
5839 if (s)
5840 s++;
5841 else
5842 s = d;
5843 incline(s);
5844 goto retry;
5845 }
5846 }
a5f75d66 5847 }
90771dc0 5848 goto retry;
a5f75d66 5849 }
5db06880
NC
5850#ifdef PERL_MAD
5851 if (PL_madskills) {
cd81e915 5852 if (!PL_thiswhite)
6b29d1f5 5853 PL_thiswhite = newSVpvs("");
cd81e915 5854 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5855 PL_bufend - PL_linestart);
5856 }
5857#endif
90771dc0 5858 s = PL_bufend;
737c24fc 5859 PL_parser->in_pod = 1;
90771dc0 5860 goto retry;
a5f75d66 5861 }
a0d0e21e 5862 }
3280af22 5863 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5864 const char *t = s;
51882d45 5865#ifdef PERL_STRICT_CR
c35e046a 5866 while (SPACE_OR_TAB(*t))
51882d45 5867#else
c35e046a 5868 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5869#endif
c35e046a 5870 t++;
a0d0e21e
LW
5871 if (*t == '\n' || *t == '#') {
5872 s--;
3280af22 5873 PL_expect = XBLOCK;
a0d0e21e
LW
5874 goto leftbracket;
5875 }
79072805 5876 }
78cdf107
Z
5877 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5878 s--;
5879 TOKEN(0);
5880 }
6154021b 5881 pl_yylval.ival = 0;
a0d0e21e 5882 OPERATOR(ASSIGNOP);
378cc40b
LW
5883 case '!':
5884 s++;
90771dc0
NC
5885 {
5886 const char tmp = *s++;
5887 if (tmp == '=') {
5888 /* was this !=~ where !~ was meant?
5889 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5890
5891 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5892 const char *t = s+1;
5893
5894 while (t < PL_bufend && isSPACE(*t))
5895 ++t;
5896
5897 if (*t == '/' || *t == '?' ||
5898 ((*t == 'm' || *t == 's' || *t == 'y')
5899 && !isALNUM(t[1])) ||
5900 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5901 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5902 "!=~ should be !~");
5903 }
78cdf107
Z
5904 if (!PL_lex_allbrackets &&
5905 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5906 s -= 2;
5907 TOKEN(0);
5908 }
90771dc0
NC
5909 Eop(OP_NE);
5910 }
5911 if (tmp == '~')
5912 PMop(OP_NOT);
5913 }
378cc40b
LW
5914 s--;
5915 OPERATOR('!');
5916 case '<':
3280af22 5917 if (PL_expect != XOPERATOR) {
93a17b20 5918 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5919 check_uni();
79072805
LW
5920 if (s[1] == '<')
5921 s = scan_heredoc(s);
5922 else
5923 s = scan_inputsymbol(s);
5924 TERM(sublex_start());
378cc40b
LW
5925 }
5926 s++;
90771dc0
NC
5927 {
5928 char tmp = *s++;
78cdf107
Z
5929 if (tmp == '<') {
5930 if (*s == '=' && !PL_lex_allbrackets &&
5931 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5932 s -= 2;
5933 TOKEN(0);
5934 }
90771dc0 5935 SHop(OP_LEFT_SHIFT);
78cdf107 5936 }
90771dc0
NC
5937 if (tmp == '=') {
5938 tmp = *s++;
78cdf107
Z
5939 if (tmp == '>') {
5940 if (!PL_lex_allbrackets &&
5941 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5942 s -= 3;
5943 TOKEN(0);
5944 }
90771dc0 5945 Eop(OP_NCMP);
78cdf107 5946 }
90771dc0 5947 s--;
78cdf107
Z
5948 if (!PL_lex_allbrackets &&
5949 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5950 s -= 2;
5951 TOKEN(0);
5952 }
90771dc0
NC
5953 Rop(OP_LE);
5954 }
395c3793 5955 }
378cc40b 5956 s--;
78cdf107
Z
5957 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5958 s--;
5959 TOKEN(0);
5960 }
79072805 5961 Rop(OP_LT);
378cc40b
LW
5962 case '>':
5963 s++;
90771dc0
NC
5964 {
5965 const char tmp = *s++;
78cdf107
Z
5966 if (tmp == '>') {
5967 if (*s == '=' && !PL_lex_allbrackets &&
5968 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5969 s -= 2;
5970 TOKEN(0);
5971 }
90771dc0 5972 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
5973 }
5974 else if (tmp == '=') {
5975 if (!PL_lex_allbrackets &&
5976 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5977 s -= 2;
5978 TOKEN(0);
5979 }
90771dc0 5980 Rop(OP_GE);
78cdf107 5981 }
90771dc0 5982 }
378cc40b 5983 s--;
78cdf107
Z
5984 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5985 s--;
5986 TOKEN(0);
5987 }
79072805 5988 Rop(OP_GT);
378cc40b
LW
5989
5990 case '$':
bbce6d69 5991 CLINE;
5992
3280af22
NIS
5993 if (PL_expect == XOPERATOR) {
5994 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5995 return deprecate_commaless_var_list();
a0d0e21e 5996 }
8990e307 5997 }
a0d0e21e 5998
c0b977fd 5999 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6000 PL_tokenbuf[0] = '@';
376b8730
SM
6001 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6002 sizeof PL_tokenbuf - 1, FALSE);
6003 if (PL_expect == XOPERATOR)
6004 no_op("Array length", s);
3280af22 6005 if (!PL_tokenbuf[1])
a0d0e21e 6006 PREREF(DOLSHARP);
3280af22
NIS
6007 PL_expect = XOPERATOR;
6008 PL_pending_ident = '#';
463ee0b2 6009 TOKEN(DOLSHARP);
79072805 6010 }
bbce6d69 6011
3280af22 6012 PL_tokenbuf[0] = '$';
376b8730
SM
6013 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6014 sizeof PL_tokenbuf - 1, FALSE);
6015 if (PL_expect == XOPERATOR)
6016 no_op("Scalar", s);
3280af22
NIS
6017 if (!PL_tokenbuf[1]) {
6018 if (s == PL_bufend)
bbce6d69 6019 yyerror("Final $ should be \\$ or $name");
6020 PREREF('$');
8990e307 6021 }
a0d0e21e 6022
bbce6d69 6023 /* This kludge not intended to be bulletproof. */
3280af22 6024 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 6025 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 6026 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 6027 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 6028 TERM(THING);
6029 }
6030
ff68c719 6031 d = s;
90771dc0
NC
6032 {
6033 const char tmp = *s;
ae28bb2a 6034 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6035 s = SKIPSPACE1(s);
ff68c719 6036
90771dc0
NC
6037 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6038 && intuit_more(s)) {
6039 if (*s == '[') {
6040 PL_tokenbuf[0] = '@';
6041 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6042 char *t = s+1;
6043
6044 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6045 t++;
90771dc0 6046 if (*t++ == ',') {
29595ff2 6047 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6048 while (t < PL_bufend && *t != ']')
6049 t++;
9014280d 6050 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6051 "Multidimensional syntax %.*s not supported",
36c7798d 6052 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6053 }
748a9306 6054 }
93a17b20 6055 }
90771dc0
NC
6056 else if (*s == '{') {
6057 char *t;
6058 PL_tokenbuf[0] = '%';
6059 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6060 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6061 {
6062 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6063 do {
6064 t++;
6065 } while (isSPACE(*t));
90771dc0 6066 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6067 STRLEN len;
90771dc0 6068 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6069 &len);
c35e046a
AL
6070 while (isSPACE(*t))
6071 t++;
780a5241 6072 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6073 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6074 "You need to quote \"%s\"",
6075 tmpbuf);
6076 }
6077 }
6078 }
93a17b20 6079 }
bbce6d69 6080
90771dc0
NC
6081 PL_expect = XOPERATOR;
6082 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6083 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6084 if (!islop || PL_last_lop_op == OP_GREPSTART)
6085 PL_expect = XOPERATOR;
6086 else if (strchr("$@\"'`q", *s))
6087 PL_expect = XTERM; /* e.g. print $fh "foo" */
6088 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6089 PL_expect = XTERM; /* e.g. print $fh &sub */
6090 else if (isIDFIRST_lazy_if(s,UTF)) {
6091 char tmpbuf[sizeof PL_tokenbuf];
6092 int t2;
6093 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6094 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6095 /* binary operators exclude handle interpretations */
6096 switch (t2) {
6097 case -KEY_x:
6098 case -KEY_eq:
6099 case -KEY_ne:
6100 case -KEY_gt:
6101 case -KEY_lt:
6102 case -KEY_ge:
6103 case -KEY_le:
6104 case -KEY_cmp:
6105 break;
6106 default:
6107 PL_expect = XTERM; /* e.g. print $fh length() */
6108 break;
6109 }
6110 }
6111 else {
6112 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6113 }
6114 }
90771dc0
NC
6115 else if (isDIGIT(*s))
6116 PL_expect = XTERM; /* e.g. print $fh 3 */
6117 else if (*s == '.' && isDIGIT(s[1]))
6118 PL_expect = XTERM; /* e.g. print $fh .3 */
6119 else if ((*s == '?' || *s == '-' || *s == '+')
6120 && !isSPACE(s[1]) && s[1] != '=')
6121 PL_expect = XTERM; /* e.g. print $fh -1 */
6122 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6123 && s[1] != '/')
6124 PL_expect = XTERM; /* e.g. print $fh /.../
6125 XXX except DORDOR operator
6126 */
6127 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6128 && s[2] != '=')
6129 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6130 }
bbce6d69 6131 }
3280af22 6132 PL_pending_ident = '$';
79072805 6133 TOKEN('$');
378cc40b
LW
6134
6135 case '@':
3280af22 6136 if (PL_expect == XOPERATOR)
bbce6d69 6137 no_op("Array", s);
3280af22
NIS
6138 PL_tokenbuf[0] = '@';
6139 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6140 if (!PL_tokenbuf[1]) {
bbce6d69 6141 PREREF('@');
6142 }
3280af22 6143 if (PL_lex_state == LEX_NORMAL)
29595ff2 6144 s = SKIPSPACE1(s);
3280af22 6145 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6146 if (*s == '{')
3280af22 6147 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6148
6149 /* Warn about @ where they meant $. */
041457d9
DM
6150 if (*s == '[' || *s == '{') {
6151 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6152 const char *t = s + 1;
7e2040f0 6153 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6154 t++;
6155 if (*t == '}' || *t == ']') {
6156 t++;
29595ff2 6157 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 6158 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6159 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6160 (int)(t-PL_bufptr), PL_bufptr,
6161 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6162 }
93a17b20
LW
6163 }
6164 }
463ee0b2 6165 }
3280af22 6166 PL_pending_ident = '@';
79072805 6167 TERM('@');
378cc40b 6168
c963b151 6169 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6170 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6171 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6172 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6173 TOKEN(0);
6f33ba73
RGS
6174 s += 2;
6175 AOPERATOR(DORDOR);
6176 }
c963b151 6177 case '?': /* may either be conditional or pattern */
be25f609 6178 if (PL_expect == XOPERATOR) {
90771dc0 6179 char tmp = *s++;
c963b151 6180 if(tmp == '?') {
78cdf107
Z
6181 if (!PL_lex_allbrackets &&
6182 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6183 s--;
6184 TOKEN(0);
6185 }
6186 PL_lex_allbrackets++;
be25f609 6187 OPERATOR('?');
c963b151
BD
6188 }
6189 else {
6190 tmp = *s++;
6191 if(tmp == '/') {
6192 /* A // operator. */
78cdf107
Z
6193 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6194 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6195 LEX_FAKEEOF_LOGIC)) {
6196 s -= 2;
6197 TOKEN(0);
6198 }
c963b151
BD
6199 AOPERATOR(DORDOR);
6200 }
6201 else {
6202 s--;
78cdf107
Z
6203 if (*s == '=' && !PL_lex_allbrackets &&
6204 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6205 s--;
6206 TOKEN(0);
6207 }
c963b151
BD
6208 Mop(OP_DIVIDE);
6209 }
6210 }
6211 }
6212 else {
6213 /* Disable warning on "study /blah/" */
6214 if (PL_oldoldbufptr == PL_last_uni
6215 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6216 || memNE(PL_last_uni, "study", 5)
6217 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6218 ))
6219 check_uni();
725a61d7
Z
6220 if (*s == '?')
6221 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6222 s = scan_pat(s,OP_MATCH);
6223 TERM(sublex_start());
6224 }
378cc40b
LW
6225
6226 case '.':
51882d45
GS
6227 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6228#ifdef PERL_STRICT_CR
6229 && s[1] == '\n'
6230#else
6231 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6232#endif
6233 && (s == PL_linestart || s[-1] == '\n') )
6234 {
3280af22
NIS
6235 PL_lex_formbrack = 0;
6236 PL_expect = XSTATE;
79072805
LW
6237 goto rightbracket;
6238 }
be25f609 6239 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6240 s += 3;
6241 OPERATOR(YADAYADA);
6242 }
3280af22 6243 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6244 char tmp = *s++;
a687059c 6245 if (*s == tmp) {
78cdf107
Z
6246 if (!PL_lex_allbrackets &&
6247 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6248 s--;
6249 TOKEN(0);
6250 }
a687059c 6251 s++;
2f3197b3
LW
6252 if (*s == tmp) {
6253 s++;
6154021b 6254 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6255 }
6256 else
6154021b 6257 pl_yylval.ival = 0;
378cc40b 6258 OPERATOR(DOTDOT);
a687059c 6259 }
78cdf107
Z
6260 if (*s == '=' && !PL_lex_allbrackets &&
6261 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6262 s--;
6263 TOKEN(0);
6264 }
79072805 6265 Aop(OP_CONCAT);
378cc40b
LW
6266 }
6267 /* FALL THROUGH */
6268 case '0': case '1': case '2': case '3': case '4':
6269 case '5': case '6': case '7': case '8': case '9':
6154021b 6270 s = scan_num(s, &pl_yylval);
931e0695 6271 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6272 if (PL_expect == XOPERATOR)
8990e307 6273 no_op("Number",s);
79072805
LW
6274 TERM(THING);
6275
6276 case '\'':
5db06880 6277 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6278 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6279 if (PL_expect == XOPERATOR) {
6280 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6281 return deprecate_commaless_var_list();
a0d0e21e 6282 }
463ee0b2 6283 else
8990e307 6284 no_op("String",s);
463ee0b2 6285 }
79072805 6286 if (!s)
d4c19fe8 6287 missingterm(NULL);
6154021b 6288 pl_yylval.ival = OP_CONST;
79072805
LW
6289 TERM(sublex_start());
6290
6291 case '"':
5db06880 6292 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6293 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6294 if (PL_expect == XOPERATOR) {
6295 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6296 return deprecate_commaless_var_list();
a0d0e21e 6297 }
463ee0b2 6298 else
8990e307 6299 no_op("String",s);
463ee0b2 6300 }
79072805 6301 if (!s)
d4c19fe8 6302 missingterm(NULL);
6154021b 6303 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6304 /* FIXME. I think that this can be const if char *d is replaced by
6305 more localised variables. */
3280af22 6306 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6307 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6308 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6309 break;
6310 }
6311 }
79072805
LW
6312 TERM(sublex_start());
6313
6314 case '`':
5db06880 6315 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6316 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6317 if (PL_expect == XOPERATOR)
8990e307 6318 no_op("Backticks",s);
79072805 6319 if (!s)
d4c19fe8 6320 missingterm(NULL);
9b201d7d 6321 readpipe_override();
79072805
LW
6322 TERM(sublex_start());
6323
6324 case '\\':
6325 s++;
a2a5de95
NC
6326 if (PL_lex_inwhat && isDIGIT(*s))
6327 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6328 *s, *s);
3280af22 6329 if (PL_expect == XOPERATOR)
8990e307 6330 no_op("Backslash",s);
79072805
LW
6331 OPERATOR(REFGEN);
6332
a7cb1f99 6333 case 'v':
e526c9e6 6334 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6335 char *start = s + 2;
dd629d5b 6336 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6337 start++;
6338 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6339 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6340 TERM(THING);
6341 }
e526c9e6 6342 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6343 else if (!isALPHA(*start) && (PL_expect == XTERM
6344 || PL_expect == XREF || PL_expect == XSTATE
6345 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6346 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6347 if (!gv) {
6154021b 6348 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6349 TERM(THING);
6350 }
6351 }
a7cb1f99
GS
6352 }
6353 goto keylookup;
79072805 6354 case 'x':
3280af22 6355 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6356 s++;
6357 Mop(OP_REPEAT);
2f3197b3 6358 }
79072805
LW
6359 goto keylookup;
6360
378cc40b 6361 case '_':
79072805
LW
6362 case 'a': case 'A':
6363 case 'b': case 'B':
6364 case 'c': case 'C':
6365 case 'd': case 'D':
6366 case 'e': case 'E':
6367 case 'f': case 'F':
6368 case 'g': case 'G':
6369 case 'h': case 'H':
6370 case 'i': case 'I':
6371 case 'j': case 'J':
6372 case 'k': case 'K':
6373 case 'l': case 'L':
6374 case 'm': case 'M':
6375 case 'n': case 'N':
6376 case 'o': case 'O':
6377 case 'p': case 'P':
6378 case 'q': case 'Q':
6379 case 'r': case 'R':
6380 case 's': case 'S':
6381 case 't': case 'T':
6382 case 'u': case 'U':
a7cb1f99 6383 case 'V':
79072805
LW
6384 case 'w': case 'W':
6385 case 'X':
6386 case 'y': case 'Y':
6387 case 'z': case 'Z':
6388
49dc05e3 6389 keylookup: {
88e1f1a2 6390 bool anydelim;
90771dc0 6391 I32 tmp;
10edeb5d
JH
6392
6393 orig_keyword = 0;
6394 gv = NULL;
6395 gvp = NULL;
49dc05e3 6396
3280af22
NIS
6397 PL_bufptr = s;
6398 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6399
6400 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6401 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6402
6403 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6404 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6405 goto just_a_word;
6406
3643fb5f 6407 d = s;
3280af22 6408 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6409 d++; /* no comments skipped here, or s### is misparsed */
6410
748a9306 6411 /* Is this a word before a => operator? */
1c3923b3 6412 if (*d == '=' && d[1] == '>') {
748a9306 6413 CLINE;
6154021b 6414 pl_yylval.opval
d0a148a6
NC
6415 = (OP*)newSVOP(OP_CONST, 0,
6416 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6417 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6418 TERM(WORD);
6419 }
6420
88e1f1a2
JV
6421 /* Check for plugged-in keyword */
6422 {
6423 OP *o;
6424 int result;
6425 char *saved_bufptr = PL_bufptr;
6426 PL_bufptr = s;
16c91539 6427 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6428 s = PL_bufptr;
6429 if (result == KEYWORD_PLUGIN_DECLINE) {
6430 /* not a plugged-in keyword */
6431 PL_bufptr = saved_bufptr;
6432 } else if (result == KEYWORD_PLUGIN_STMT) {
6433 pl_yylval.opval = o;
6434 CLINE;
6435 PL_expect = XSTATE;
6436 return REPORT(PLUGSTMT);
6437 } else if (result == KEYWORD_PLUGIN_EXPR) {
6438 pl_yylval.opval = o;
6439 CLINE;
6440 PL_expect = XOPERATOR;
6441 return REPORT(PLUGEXPR);
6442 } else {
6443 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6444 PL_tokenbuf);
6445 }
6446 }
6447
6448 /* Check for built-in keyword */
6449 tmp = keyword(PL_tokenbuf, len, 0);
6450
6451 /* Is this a label? */
6452 if (!anydelim && PL_expect == XSTATE
6453 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6454 s = d + 1;
6455 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6456 CLINE;
6457 TOKEN(LABEL);
6458 }
6459
a0d0e21e 6460 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6461 GV *ogv = NULL; /* override (winner) */
6462 GV *hgv = NULL; /* hidden (loser) */
3280af22 6463 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6464 CV *cv;
90e5519e 6465 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6466 (cv = GvCVu(gv)))
6467 {
6468 if (GvIMPORTED_CV(gv))
6469 ogv = gv;
6470 else if (! CvMETHOD(cv))
6471 hgv = gv;
6472 }
6473 if (!ogv &&
3280af22 6474 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6475 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6476 GvCVu(gv) && GvIMPORTED_CV(gv))
6477 {
6478 ogv = gv;
6479 }
6480 }
6481 if (ogv) {
30fe34ed 6482 orig_keyword = tmp;
56f7f34b 6483 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6484 }
6485 else if (gv && !gvp
6486 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6487 && GvCVu(gv))
6e7b2336
GS
6488 {
6489 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6490 }
56f7f34b
CS
6491 else { /* no override */
6492 tmp = -tmp;
a2a5de95
NC
6493 if (tmp == KEY_dump) {
6494 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6495 "dump() better written as CORE::dump()");
ac206dc8 6496 }
a0714e2c 6497 gv = NULL;
56f7f34b 6498 gvp = 0;
a2a5de95
NC
6499 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6500 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6501 "Ambiguous call resolved as CORE::%s(), "
6502 "qualify as such or use &",
6503 GvENAME(hgv));
49dc05e3 6504 }
a0d0e21e
LW
6505 }
6506
6507 reserved_word:
6508 switch (tmp) {
79072805
LW
6509
6510 default: /* not a keyword */
0bfa2a8a
NC
6511 /* Trade off - by using this evil construction we can pull the
6512 variable gv into the block labelled keylookup. If not, then
6513 we have to give it function scope so that the goto from the
6514 earlier ':' case doesn't bypass the initialisation. */
6515 if (0) {
6516 just_a_word_zero_gv:
6517 gv = NULL;
6518 gvp = NULL;
8bee0991 6519 orig_keyword = 0;
0bfa2a8a 6520 }
93a17b20 6521 just_a_word: {
96e4d5b1 6522 SV *sv;
ce29ac45 6523 int pkgname = 0;
f54cb97a 6524 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6525 OP *rv2cv_op;
5069cc75 6526 CV *cv;
5db06880 6527#ifdef PERL_MAD
cd81e915 6528 SV *nextPL_nextwhite = 0;
5db06880
NC
6529#endif
6530
8990e307
LW
6531
6532 /* Get the rest if it looks like a package qualifier */
6533
155aba94 6534 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6535 STRLEN morelen;
3280af22 6536 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6537 TRUE, &morelen);
6538 if (!morelen)
cea2e8a9 6539 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6540 *s == '\'' ? "'" : "::");
c3e0f903 6541 len += morelen;
ce29ac45 6542 pkgname = 1;
a0d0e21e 6543 }
8990e307 6544
3280af22
NIS
6545 if (PL_expect == XOPERATOR) {
6546 if (PL_bufptr == PL_linestart) {
57843af0 6547 CopLINE_dec(PL_curcop);
f1f66076 6548 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6549 CopLINE_inc(PL_curcop);
463ee0b2
LW
6550 }
6551 else
54310121 6552 no_op("Bareword",s);
463ee0b2 6553 }
8990e307 6554
c3e0f903 6555 /* Look for a subroutine with this name in current package,
486ec47a 6556 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6557 (and a package name). */
6558
5db06880 6559 if (len > 2 && !PL_madskills &&
3280af22 6560 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6561 {
f776e3cd 6562 if (ckWARN(WARN_BAREWORD)
90e5519e 6563 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6564 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6565 "Bareword \"%s\" refers to nonexistent package",
3280af22 6566 PL_tokenbuf);
c3e0f903 6567 len -= 2;
3280af22 6568 PL_tokenbuf[len] = '\0';
a0714e2c 6569 gv = NULL;
c3e0f903
GS
6570 gvp = 0;
6571 }
6572 else {
62d55b22
NC
6573 if (!gv) {
6574 /* Mustn't actually add anything to a symbol table.
6575 But also don't want to "initialise" any placeholder
6576 constants that might already be there into full
6577 blown PVGVs with attached PVCV. */
90e5519e
NC
6578 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6579 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6580 }
b3d904f3 6581 len = 0;
c3e0f903
GS
6582 }
6583
6584 /* if we saw a global override before, get the right name */
8990e307 6585
37bb7629
EB
6586 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6587 len ? len : strlen(PL_tokenbuf));
49dc05e3 6588 if (gvp) {
37bb7629 6589 SV * const tmp_sv = sv;
396482e1 6590 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6591 sv_catsv(sv, tmp_sv);
6592 SvREFCNT_dec(tmp_sv);
8a7a129d 6593 }
37bb7629 6594
5db06880 6595#ifdef PERL_MAD
cd81e915
NC
6596 if (PL_madskills && !PL_thistoken) {
6597 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6598 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6599 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6600 }
6601#endif
8990e307 6602
a0d0e21e 6603 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6604 CLINE;
6154021b
RGS
6605 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6606 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6607
c3e0f903 6608 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6609 if (len)
6610 goto safe_bareword;
6611
f7461760 6612 {
d8ebba9f 6613 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6614 const_op->op_private = OPpCONST_BARE;
6615 rv2cv_op = newCVREF(0, const_op);
6616 }
d9088386 6617 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6618
8990e307
LW
6619 /* See if it's the indirect object for a list operator. */
6620
3280af22
NIS
6621 if (PL_oldoldbufptr &&
6622 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6623 (PL_oldoldbufptr == PL_last_lop
6624 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6625 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6626 (PL_expect == XREF ||
6627 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6628 {
748a9306
LW
6629 bool immediate_paren = *s == '(';
6630
a0d0e21e 6631 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6632 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6633#ifdef PERL_MAD
cd81e915 6634 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6635#endif
a0d0e21e
LW
6636
6637 /* Two barewords in a row may indicate method call. */
6638
62d55b22 6639 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6640 (tmp = intuit_method(s, gv, cv))) {
6641 op_free(rv2cv_op);
78cdf107
Z
6642 if (tmp == METHOD && !PL_lex_allbrackets &&
6643 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6644 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6645 return REPORT(tmp);
f7461760 6646 }
a0d0e21e
LW
6647
6648 /* If not a declared subroutine, it's an indirect object. */
6649 /* (But it's an indir obj regardless for sort.) */
7294df96 6650 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6651
7294df96
RGS
6652 if (
6653 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6654 (!cv &&
a9ef352a 6655 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6656 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6657 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6658 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6659 )
a9ef352a 6660 {
3280af22 6661 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6662 goto bareword;
93a17b20
LW
6663 }
6664 }
8990e307 6665
3280af22 6666 PL_expect = XOPERATOR;
5db06880
NC
6667#ifdef PERL_MAD
6668 if (isSPACE(*s))
cd81e915
NC
6669 s = SKIPSPACE2(s,nextPL_nextwhite);
6670 PL_nextwhite = nextPL_nextwhite;
5db06880 6671#else
8990e307 6672 s = skipspace(s);
5db06880 6673#endif
1c3923b3
GS
6674
6675 /* Is this a word before a => operator? */
ce29ac45 6676 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6677 op_free(rv2cv_op);
1c3923b3 6678 CLINE;
6154021b 6679 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6680 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6681 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6682 TERM(WORD);
6683 }
6684
6685 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6686 if (*s == '(') {
79072805 6687 CLINE;
5069cc75 6688 if (cv) {
c35e046a
AL
6689 d = s + 1;
6690 while (SPACE_OR_TAB(*d))
6691 d++;
f7461760 6692 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6693 s = d + 1;
c631f32b 6694 goto its_constant;
96e4d5b1 6695 }
6696 }
5db06880
NC
6697#ifdef PERL_MAD
6698 if (PL_madskills) {
cd81e915
NC
6699 PL_nextwhite = PL_thiswhite;
6700 PL_thiswhite = 0;
5db06880 6701 }
cd81e915 6702 start_force(PL_curforce);
5db06880 6703#endif
6154021b 6704 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6705 PL_expect = XOPERATOR;
5db06880
NC
6706#ifdef PERL_MAD
6707 if (PL_madskills) {
cd81e915
NC
6708 PL_nextwhite = nextPL_nextwhite;
6709 curmad('X', PL_thistoken);
6b29d1f5 6710 PL_thistoken = newSVpvs("");
5db06880
NC
6711 }
6712#endif
f7461760 6713 op_free(rv2cv_op);
93a17b20 6714 force_next(WORD);
6154021b 6715 pl_yylval.ival = 0;
463ee0b2 6716 TOKEN('&');
79072805 6717 }
93a17b20 6718
a0d0e21e 6719 /* If followed by var or block, call it a method (unless sub) */
8990e307 6720
f7461760
Z
6721 if ((*s == '$' || *s == '{') && !cv) {
6722 op_free(rv2cv_op);
3280af22
NIS
6723 PL_last_lop = PL_oldbufptr;
6724 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6725 if (!PL_lex_allbrackets &&
6726 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6727 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6728 PREBLOCK(METHOD);
463ee0b2
LW
6729 }
6730
8990e307
LW
6731 /* If followed by a bareword, see if it looks like indir obj. */
6732
30fe34ed
RGS
6733 if (!orig_keyword
6734 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6735 && (tmp = intuit_method(s, gv, cv))) {
6736 op_free(rv2cv_op);
78cdf107
Z
6737 if (tmp == METHOD && !PL_lex_allbrackets &&
6738 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6739 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6740 return REPORT(tmp);
f7461760 6741 }
93a17b20 6742
8990e307
LW
6743 /* Not a method, so call it a subroutine (if defined) */
6744
5069cc75 6745 if (cv) {
9b387841
NC
6746 if (lastchar == '-')
6747 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6748 "Ambiguous use of -%s resolved as -&%s()",
6749 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6750 /* Check for a constant sub */
f7461760 6751 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6752 its_constant:
f7461760 6753 op_free(rv2cv_op);
6154021b
RGS
6754 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6755 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6756 pl_yylval.opval->op_private = 0;
6b7c6d95 6757 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6758 TOKEN(WORD);
89bfa8cd 6759 }
6760
6154021b 6761 op_free(pl_yylval.opval);
f7461760 6762 pl_yylval.opval = rv2cv_op;
6154021b 6763 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6764 PL_last_lop = PL_oldbufptr;
bf848113 6765 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6766 /* Is there a prototype? */
5db06880
NC
6767 if (
6768#ifdef PERL_MAD
6769 cv &&
6770#endif
d9f2850e
RGS
6771 SvPOK(cv))
6772 {
5f66b61c 6773 STRLEN protolen;
daba3364 6774 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6775 if (!protolen)
4633a7c4 6776 TERM(FUNC0SUB);
0f5d0394
AE
6777 while (*proto == ';')
6778 proto++;
649d02de
FC
6779 if (
6780 (
6781 (
6782 *proto == '$' || *proto == '_'
c035a075 6783 || *proto == '*' || *proto == '+'
649d02de
FC
6784 )
6785 && proto[1] == '\0'
6786 )
6787 || (
6788 *proto == '\\' && proto[1] && proto[2] == '\0'
6789 )
6790 )
6791 OPERATOR(UNIOPSUB);
6792 if (*proto == '\\' && proto[1] == '[') {
6793 const char *p = proto + 2;
6794 while(*p && *p != ']')
6795 ++p;
6796 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6797 }
7a52d87a 6798 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6799 if (PL_curstash)
6800 sv_setpvs(PL_subname, "__ANON__");
6801 else
6802 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6803 if (!PL_lex_allbrackets &&
6804 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6805 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6806 PREBLOCK(LSTOPSUB);
6807 }
a9ef352a 6808 }
5db06880
NC
6809#ifdef PERL_MAD
6810 {
6811 if (PL_madskills) {
cd81e915
NC
6812 PL_nextwhite = PL_thiswhite;
6813 PL_thiswhite = 0;
5db06880 6814 }
cd81e915 6815 start_force(PL_curforce);
6154021b 6816 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6817 PL_expect = XTERM;
6818 if (PL_madskills) {
cd81e915
NC
6819 PL_nextwhite = nextPL_nextwhite;
6820 curmad('X', PL_thistoken);
6b29d1f5 6821 PL_thistoken = newSVpvs("");
5db06880
NC
6822 }
6823 force_next(WORD);
78cdf107
Z
6824 if (!PL_lex_allbrackets &&
6825 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6826 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6827 TOKEN(NOAMP);
6828 }
6829 }
6830
6831 /* Guess harder when madskills require "best effort". */
6832 if (PL_madskills && (!gv || !GvCVu(gv))) {
6833 int probable_sub = 0;
6834 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6835 probable_sub = 1;
6836 else if (isALPHA(*s)) {
6837 char tmpbuf[1024];
6838 STRLEN tmplen;
6839 d = s;
6840 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6841 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6842 probable_sub = 1;
6843 else {
6844 while (d < PL_bufend && isSPACE(*d))
6845 d++;
6846 if (*d == '=' && d[1] == '>')
6847 probable_sub = 1;
6848 }
6849 }
6850 if (probable_sub) {
7a6d04f4 6851 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6852 op_free(pl_yylval.opval);
f7461760 6853 pl_yylval.opval = rv2cv_op;
6154021b 6854 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6855 PL_last_lop = PL_oldbufptr;
6856 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6857 PL_nextwhite = PL_thiswhite;
6858 PL_thiswhite = 0;
6859 start_force(PL_curforce);
6154021b 6860 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6861 PL_expect = XTERM;
cd81e915
NC
6862 PL_nextwhite = nextPL_nextwhite;
6863 curmad('X', PL_thistoken);
6b29d1f5 6864 PL_thistoken = newSVpvs("");
5db06880 6865 force_next(WORD);
78cdf107
Z
6866 if (!PL_lex_allbrackets &&
6867 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6868 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6869 TOKEN(NOAMP);
6870 }
6871#else
6154021b 6872 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6873 PL_expect = XTERM;
8990e307 6874 force_next(WORD);
78cdf107
Z
6875 if (!PL_lex_allbrackets &&
6876 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6877 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6878 TOKEN(NOAMP);
5db06880 6879#endif
8990e307 6880 }
748a9306 6881
8990e307
LW
6882 /* Call it a bare word */
6883
5603f27d 6884 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6885 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6886 else {
9a073a1d
RGS
6887 bareword:
6888 /* after "print" and similar functions (corresponding to
6889 * "F? L" in opcode.pl), whatever wasn't already parsed as
6890 * a filehandle should be subject to "strict subs".
6891 * Likewise for the optional indirect-object argument to system
6892 * or exec, which can't be a bareword */
6893 if ((PL_last_lop_op == OP_PRINT
6894 || PL_last_lop_op == OP_PRTF
6895 || PL_last_lop_op == OP_SAY
6896 || PL_last_lop_op == OP_SYSTEM
6897 || PL_last_lop_op == OP_EXEC)
6898 && (PL_hints & HINT_STRICT_SUBS))
6899 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6900 if (lastchar != '-') {
6901 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6902 d = PL_tokenbuf;
6903 while (isLOWER(*d))
6904 d++;
da51bb9b 6905 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6906 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6907 PL_tokenbuf);
6908 }
748a9306
LW
6909 }
6910 }
f7461760 6911 op_free(rv2cv_op);
c3e0f903
GS
6912
6913 safe_bareword:
9b387841
NC
6914 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6915 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6916 "Operator or semicolon missing before %c%s",
6917 lastchar, PL_tokenbuf);
6918 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6919 "Ambiguous use of %c resolved as operator %c",
6920 lastchar, lastchar);
748a9306 6921 }
93a17b20 6922 TOKEN(WORD);
79072805 6923 }
79072805 6924
68dc0745 6925 case KEY___FILE__:
6154021b 6926 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6927 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6928 TERM(THING);
6929
79072805 6930 case KEY___LINE__:
6154021b 6931 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6932 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6933 TERM(THING);
68dc0745 6934
6935 case KEY___PACKAGE__:
6154021b 6936 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6937 (PL_curstash
5aaec2b4 6938 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6939 : &PL_sv_undef));
79072805 6940 TERM(THING);
79072805 6941
e50aee73 6942 case KEY___DATA__:
79072805
LW
6943 case KEY___END__: {
6944 GV *gv;
3280af22 6945 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6946 const char *pname = "main";
3280af22 6947 if (PL_tokenbuf[2] == 'D')
bfcb3514 6948 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6949 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6950 SVt_PVIO);
a5f75d66 6951 GvMULTI_on(gv);
79072805 6952 if (!GvIO(gv))
a0d0e21e 6953 GvIOp(gv) = newIO();
3280af22 6954 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6955#if defined(HAS_FCNTL) && defined(F_SETFD)
6956 {
f54cb97a 6957 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6958 fcntl(fd,F_SETFD,fd >= 3);
6959 }
79072805 6960#endif
fd049845 6961 /* Mark this internal pseudo-handle as clean */
6962 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6963 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6964 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6965 else
50952442 6966 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6967#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6968 /* if the script was opened in binmode, we need to revert
53129d29 6969 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6970 * XXX this is a questionable hack at best. */
53129d29
GS
6971 if (PL_bufend-PL_bufptr > 2
6972 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6973 {
6974 Off_t loc = 0;
50952442 6975 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6976 loc = PerlIO_tell(PL_rsfp);
6977 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6978 }
2986a63f
JH
6979#ifdef NETWARE
6980 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6981#else
c39cd008 6982 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6983#endif /* NETWARE */
1143fce0
JH
6984#ifdef PERLIO_IS_STDIO /* really? */
6985# if defined(__BORLANDC__)
cb359b41
JH
6986 /* XXX see note in do_binmode() */
6987 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6988# endif
6989#endif
c39cd008
GS
6990 if (loc > 0)
6991 PerlIO_seek(PL_rsfp, loc, 0);
6992 }
6993 }
6994#endif
7948272d 6995#ifdef PERLIO_LAYERS
52d2e0f4
JH
6996 if (!IN_BYTES) {
6997 if (UTF)
6998 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6999 else if (PL_encoding) {
7000 SV *name;
7001 dSP;
7002 ENTER;
7003 SAVETMPS;
7004 PUSHMARK(sp);
7005 EXTEND(SP, 1);
7006 XPUSHs(PL_encoding);
7007 PUTBACK;
7008 call_method("name", G_SCALAR);
7009 SPAGAIN;
7010 name = POPs;
7011 PUTBACK;
bfed75c6 7012 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7013 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7014 SVfARG(name)));
52d2e0f4
JH
7015 FREETMPS;
7016 LEAVE;
7017 }
7018 }
7948272d 7019#endif
5db06880
NC
7020#ifdef PERL_MAD
7021 if (PL_madskills) {
cd81e915
NC
7022 if (PL_realtokenstart >= 0) {
7023 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7024 if (!PL_endwhite)
6b29d1f5 7025 PL_endwhite = newSVpvs("");
cd81e915
NC
7026 sv_catsv(PL_endwhite, PL_thiswhite);
7027 PL_thiswhite = 0;
7028 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7029 PL_realtokenstart = -1;
5db06880 7030 }
5cc814fd
NC
7031 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7032 != NULL) ;
5db06880
NC
7033 }
7034#endif
4608196e 7035 PL_rsfp = NULL;
79072805
LW
7036 }
7037 goto fake_eof;
e929a76b 7038 }
de3bb511 7039
8990e307 7040 case KEY_AUTOLOAD:
ed6116ce 7041 case KEY_DESTROY:
79072805 7042 case KEY_BEGIN:
3c10abe3 7043 case KEY_UNITCHECK:
7d30b5c4 7044 case KEY_CHECK:
7d07dbc2 7045 case KEY_INIT:
7d30b5c4 7046 case KEY_END:
3280af22
NIS
7047 if (PL_expect == XSTATE) {
7048 s = PL_bufptr;
93a17b20 7049 goto really_sub;
79072805
LW
7050 }
7051 goto just_a_word;
7052
a0d0e21e
LW
7053 case KEY_CORE:
7054 if (*s == ':' && s[1] == ':') {
7055 s += 2;
748a9306 7056 d = s;
3280af22 7057 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
9dcb8368 7058 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
6798c92b 7059 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7060 if (tmp < 0)
7061 tmp = -tmp;
a5c70c4d 7062 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 7063 /* that's a way to remember we saw "CORE::" */
850e8516 7064 orig_keyword = tmp;
a0d0e21e
LW
7065 goto reserved_word;
7066 }
7067 goto just_a_word;
7068
463ee0b2
LW
7069 case KEY_abs:
7070 UNI(OP_ABS);
7071
79072805
LW
7072 case KEY_alarm:
7073 UNI(OP_ALARM);
7074
7075 case KEY_accept:
a0d0e21e 7076 LOP(OP_ACCEPT,XTERM);
79072805 7077
463ee0b2 7078 case KEY_and:
78cdf107
Z
7079 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7080 return REPORT(0);
463ee0b2
LW
7081 OPERATOR(ANDOP);
7082
79072805 7083 case KEY_atan2:
a0d0e21e 7084 LOP(OP_ATAN2,XTERM);
85e6fe83 7085
79072805 7086 case KEY_bind:
a0d0e21e 7087 LOP(OP_BIND,XTERM);
79072805
LW
7088
7089 case KEY_binmode:
1c1fc3ea 7090 LOP(OP_BINMODE,XTERM);
79072805
LW
7091
7092 case KEY_bless:
a0d0e21e 7093 LOP(OP_BLESS,XTERM);
79072805 7094
0d863452
RH
7095 case KEY_break:
7096 FUN0(OP_BREAK);
7097
79072805
LW
7098 case KEY_chop:
7099 UNI(OP_CHOP);
7100
7101 case KEY_continue:
0d863452
RH
7102 /* We have to disambiguate the two senses of
7103 "continue". If the next token is a '{' then
7104 treat it as the start of a continue block;
7105 otherwise treat it as a control operator.
7106 */
7107 s = skipspace(s);
7108 if (*s == '{')
79072805 7109 PREBLOCK(CONTINUE);
0d863452
RH
7110 else
7111 FUN0(OP_CONTINUE);
79072805
LW
7112
7113 case KEY_chdir:
fafc274c
NC
7114 /* may use HOME */
7115 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7116 UNI(OP_CHDIR);
7117
7118 case KEY_close:
7119 UNI(OP_CLOSE);
7120
7121 case KEY_closedir:
7122 UNI(OP_CLOSEDIR);
7123
7124 case KEY_cmp:
78cdf107
Z
7125 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7126 return REPORT(0);
79072805
LW
7127 Eop(OP_SCMP);
7128
7129 case KEY_caller:
7130 UNI(OP_CALLER);
7131
7132 case KEY_crypt:
7133#ifdef FCRYPT
f4c556ac
GS
7134 if (!PL_cryptseen) {
7135 PL_cryptseen = TRUE;
de3bb511 7136 init_des();
f4c556ac 7137 }
a687059c 7138#endif
a0d0e21e 7139 LOP(OP_CRYPT,XTERM);
79072805
LW
7140
7141 case KEY_chmod:
a0d0e21e 7142 LOP(OP_CHMOD,XTERM);
79072805
LW
7143
7144 case KEY_chown:
a0d0e21e 7145 LOP(OP_CHOWN,XTERM);
79072805
LW
7146
7147 case KEY_connect:
a0d0e21e 7148 LOP(OP_CONNECT,XTERM);
79072805 7149
463ee0b2
LW
7150 case KEY_chr:
7151 UNI(OP_CHR);
7152
79072805
LW
7153 case KEY_cos:
7154 UNI(OP_COS);
7155
7156 case KEY_chroot:
7157 UNI(OP_CHROOT);
7158
0d863452
RH
7159 case KEY_default:
7160 PREBLOCK(DEFAULT);
7161
79072805 7162 case KEY_do:
29595ff2 7163 s = SKIPSPACE1(s);
79072805 7164 if (*s == '{')
a0d0e21e 7165 PRETERMBLOCK(DO);
79072805 7166 if (*s != '\'')
89c5585f 7167 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7168 if (orig_keyword == KEY_do) {
7169 orig_keyword = 0;
6154021b 7170 pl_yylval.ival = 1;
850e8516
RGS
7171 }
7172 else
6154021b 7173 pl_yylval.ival = 0;
378cc40b 7174 OPERATOR(DO);
79072805
LW
7175
7176 case KEY_die:
3280af22 7177 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7178 LOP(OP_DIE,XTERM);
79072805
LW
7179
7180 case KEY_defined:
7181 UNI(OP_DEFINED);
7182
7183 case KEY_delete:
a0d0e21e 7184 UNI(OP_DELETE);
79072805
LW
7185
7186 case KEY_dbmopen:
74e8ce34
NC
7187 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7188 STR_WITH_LEN("NDBM_File::"),
7189 STR_WITH_LEN("DB_File::"),
7190 STR_WITH_LEN("GDBM_File::"),
7191 STR_WITH_LEN("SDBM_File::"),
7192 STR_WITH_LEN("ODBM_File::"),
7193 NULL);
a0d0e21e 7194 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7195
7196 case KEY_dbmclose:
7197 UNI(OP_DBMCLOSE);
7198
7199 case KEY_dump:
a0d0e21e 7200 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7201 LOOPX(OP_DUMP);
7202
7203 case KEY_else:
7204 PREBLOCK(ELSE);
7205
7206 case KEY_elsif:
6154021b 7207 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7208 OPERATOR(ELSIF);
7209
7210 case KEY_eq:
78cdf107
Z
7211 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7212 return REPORT(0);
79072805
LW
7213 Eop(OP_SEQ);
7214
a0d0e21e
LW
7215 case KEY_exists:
7216 UNI(OP_EXISTS);
4e553d73 7217
79072805 7218 case KEY_exit:
5db06880
NC
7219 if (PL_madskills)
7220 UNI(OP_INT);
79072805
LW
7221 UNI(OP_EXIT);
7222
7223 case KEY_eval:
29595ff2 7224 s = SKIPSPACE1(s);
32e2a35d
RGS
7225 if (*s == '{') { /* block eval */
7226 PL_expect = XTERMBLOCK;
7227 UNIBRACK(OP_ENTERTRY);
7228 }
7229 else { /* string eval */
7230 PL_expect = XTERM;
7231 UNIBRACK(OP_ENTEREVAL);
7232 }
79072805
LW
7233
7234 case KEY_eof:
7235 UNI(OP_EOF);
7236
7237 case KEY_exp:
7238 UNI(OP_EXP);
7239
7240 case KEY_each:
7241 UNI(OP_EACH);
7242
7243 case KEY_exec:
a0d0e21e 7244 LOP(OP_EXEC,XREF);
79072805
LW
7245
7246 case KEY_endhostent:
7247 FUN0(OP_EHOSTENT);
7248
7249 case KEY_endnetent:
7250 FUN0(OP_ENETENT);
7251
7252 case KEY_endservent:
7253 FUN0(OP_ESERVENT);
7254
7255 case KEY_endprotoent:
7256 FUN0(OP_EPROTOENT);
7257
7258 case KEY_endpwent:
7259 FUN0(OP_EPWENT);
7260
7261 case KEY_endgrent:
7262 FUN0(OP_EGRENT);
7263
7264 case KEY_for:
7265 case KEY_foreach:
78cdf107
Z
7266 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7267 return REPORT(0);
6154021b 7268 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7269 s = SKIPSPACE1(s);
7e2040f0 7270 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7271 char *p = s;
5db06880
NC
7272#ifdef PERL_MAD
7273 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7274#endif
7275
3280af22 7276 if ((PL_bufend - p) >= 3 &&
55497cff 7277 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7278 p += 2;
77ca0c92
LW
7279 else if ((PL_bufend - p) >= 4 &&
7280 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7281 p += 3;
29595ff2 7282 p = PEEKSPACE(p);
7e2040f0 7283 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7284 p = scan_ident(p, PL_bufend,
7285 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7286 p = PEEKSPACE(p);
77ca0c92
LW
7287 }
7288 if (*p != '$')
cea2e8a9 7289 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7290#ifdef PERL_MAD
7291 s = SvPVX(PL_linestr) + soff;
7292#endif
55497cff 7293 }
79072805
LW
7294 OPERATOR(FOR);
7295
7296 case KEY_formline:
a0d0e21e 7297 LOP(OP_FORMLINE,XTERM);
79072805
LW
7298
7299 case KEY_fork:
7300 FUN0(OP_FORK);
7301
7302 case KEY_fcntl:
a0d0e21e 7303 LOP(OP_FCNTL,XTERM);
79072805
LW
7304
7305 case KEY_fileno:
7306 UNI(OP_FILENO);
7307
7308 case KEY_flock:
a0d0e21e 7309 LOP(OP_FLOCK,XTERM);
79072805
LW
7310
7311 case KEY_gt:
78cdf107
Z
7312 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7313 return REPORT(0);
79072805
LW
7314 Rop(OP_SGT);
7315
7316 case KEY_ge:
78cdf107
Z
7317 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7318 return REPORT(0);
79072805
LW
7319 Rop(OP_SGE);
7320
7321 case KEY_grep:
2c38e13d 7322 LOP(OP_GREPSTART, XREF);
79072805
LW
7323
7324 case KEY_goto:
a0d0e21e 7325 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7326 LOOPX(OP_GOTO);
7327
7328 case KEY_gmtime:
7329 UNI(OP_GMTIME);
7330
7331 case KEY_getc:
6f33ba73 7332 UNIDOR(OP_GETC);
79072805
LW
7333
7334 case KEY_getppid:
7335 FUN0(OP_GETPPID);
7336
7337 case KEY_getpgrp:
7338 UNI(OP_GETPGRP);
7339
7340 case KEY_getpriority:
a0d0e21e 7341 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7342
7343 case KEY_getprotobyname:
7344 UNI(OP_GPBYNAME);
7345
7346 case KEY_getprotobynumber:
a0d0e21e 7347 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7348
7349 case KEY_getprotoent:
7350 FUN0(OP_GPROTOENT);
7351
7352 case KEY_getpwent:
7353 FUN0(OP_GPWENT);
7354
7355 case KEY_getpwnam:
ff68c719 7356 UNI(OP_GPWNAM);
79072805
LW
7357
7358 case KEY_getpwuid:
ff68c719 7359 UNI(OP_GPWUID);
79072805
LW
7360
7361 case KEY_getpeername:
7362 UNI(OP_GETPEERNAME);
7363
7364 case KEY_gethostbyname:
7365 UNI(OP_GHBYNAME);
7366
7367 case KEY_gethostbyaddr:
a0d0e21e 7368 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7369
7370 case KEY_gethostent:
7371 FUN0(OP_GHOSTENT);
7372
7373 case KEY_getnetbyname:
7374 UNI(OP_GNBYNAME);
7375
7376 case KEY_getnetbyaddr:
a0d0e21e 7377 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7378
7379 case KEY_getnetent:
7380 FUN0(OP_GNETENT);
7381
7382 case KEY_getservbyname:
a0d0e21e 7383 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7384
7385 case KEY_getservbyport:
a0d0e21e 7386 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7387
7388 case KEY_getservent:
7389 FUN0(OP_GSERVENT);
7390
7391 case KEY_getsockname:
7392 UNI(OP_GETSOCKNAME);
7393
7394 case KEY_getsockopt:
a0d0e21e 7395 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7396
7397 case KEY_getgrent:
7398 FUN0(OP_GGRENT);
7399
7400 case KEY_getgrnam:
ff68c719 7401 UNI(OP_GGRNAM);
79072805
LW
7402
7403 case KEY_getgrgid:
ff68c719 7404 UNI(OP_GGRGID);
79072805
LW
7405
7406 case KEY_getlogin:
7407 FUN0(OP_GETLOGIN);
7408
0d863452 7409 case KEY_given:
6154021b 7410 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7411 OPERATOR(GIVEN);
7412
93a17b20 7413 case KEY_glob:
a0d0e21e 7414 LOP(OP_GLOB,XTERM);
93a17b20 7415
79072805
LW
7416 case KEY_hex:
7417 UNI(OP_HEX);
7418
7419 case KEY_if:
78cdf107
Z
7420 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7421 return REPORT(0);
6154021b 7422 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7423 OPERATOR(IF);
7424
7425 case KEY_index:
a0d0e21e 7426 LOP(OP_INDEX,XTERM);
79072805
LW
7427
7428 case KEY_int:
7429 UNI(OP_INT);
7430
7431 case KEY_ioctl:
a0d0e21e 7432 LOP(OP_IOCTL,XTERM);
79072805
LW
7433
7434 case KEY_join:
a0d0e21e 7435 LOP(OP_JOIN,XTERM);
79072805
LW
7436
7437 case KEY_keys:
7438 UNI(OP_KEYS);
7439
7440 case KEY_kill:
a0d0e21e 7441 LOP(OP_KILL,XTERM);
79072805
LW
7442
7443 case KEY_last:
a0d0e21e 7444 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7445 LOOPX(OP_LAST);
4e553d73 7446
79072805
LW
7447 case KEY_lc:
7448 UNI(OP_LC);
7449
7450 case KEY_lcfirst:
7451 UNI(OP_LCFIRST);
7452
7453 case KEY_local:
6154021b 7454 pl_yylval.ival = 0;
79072805
LW
7455 OPERATOR(LOCAL);
7456
7457 case KEY_length:
7458 UNI(OP_LENGTH);
7459
7460 case KEY_lt:
78cdf107
Z
7461 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7462 return REPORT(0);
79072805
LW
7463 Rop(OP_SLT);
7464
7465 case KEY_le:
78cdf107
Z
7466 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7467 return REPORT(0);
79072805
LW
7468 Rop(OP_SLE);
7469
7470 case KEY_localtime:
7471 UNI(OP_LOCALTIME);
7472
7473 case KEY_log:
7474 UNI(OP_LOG);
7475
7476 case KEY_link:
a0d0e21e 7477 LOP(OP_LINK,XTERM);
79072805
LW
7478
7479 case KEY_listen:
a0d0e21e 7480 LOP(OP_LISTEN,XTERM);
79072805 7481
c0329465
MB
7482 case KEY_lock:
7483 UNI(OP_LOCK);
7484
79072805
LW
7485 case KEY_lstat:
7486 UNI(OP_LSTAT);
7487
7488 case KEY_m:
8782bef2 7489 s = scan_pat(s,OP_MATCH);
79072805
LW
7490 TERM(sublex_start());
7491
a0d0e21e 7492 case KEY_map:
2c38e13d 7493 LOP(OP_MAPSTART, XREF);
4e4e412b 7494
79072805 7495 case KEY_mkdir:
a0d0e21e 7496 LOP(OP_MKDIR,XTERM);
79072805
LW
7497
7498 case KEY_msgctl:
a0d0e21e 7499 LOP(OP_MSGCTL,XTERM);
79072805
LW
7500
7501 case KEY_msgget:
a0d0e21e 7502 LOP(OP_MSGGET,XTERM);
79072805
LW
7503
7504 case KEY_msgrcv:
a0d0e21e 7505 LOP(OP_MSGRCV,XTERM);
79072805
LW
7506
7507 case KEY_msgsnd:
a0d0e21e 7508 LOP(OP_MSGSND,XTERM);
79072805 7509
77ca0c92 7510 case KEY_our:
93a17b20 7511 case KEY_my:
952306ac 7512 case KEY_state:
eac04b2e 7513 PL_in_my = (U16)tmp;
29595ff2 7514 s = SKIPSPACE1(s);
7e2040f0 7515 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7516#ifdef PERL_MAD
7517 char* start = s;
7518#endif
3280af22 7519 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7520 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7521 goto really_sub;
def3634b 7522 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7523 if (!PL_in_my_stash) {
c750a3ec 7524 char tmpbuf[1024];
3280af22 7525 PL_bufptr = s;
d9fad198 7526 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7527 yyerror(tmpbuf);
7528 }
5db06880
NC
7529#ifdef PERL_MAD
7530 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7531 sv_catsv(PL_thistoken, PL_nextwhite);
7532 PL_nextwhite = 0;
7533 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7534 }
7535#endif
c750a3ec 7536 }
6154021b 7537 pl_yylval.ival = 1;
55497cff 7538 OPERATOR(MY);
93a17b20 7539
79072805 7540 case KEY_next:
a0d0e21e 7541 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7542 LOOPX(OP_NEXT);
7543
7544 case KEY_ne:
78cdf107
Z
7545 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7546 return REPORT(0);
79072805
LW
7547 Eop(OP_SNE);
7548
a0d0e21e 7549 case KEY_no:
468aa647 7550 s = tokenize_use(0, s);
a0d0e21e
LW
7551 OPERATOR(USE);
7552
7553 case KEY_not:
29595ff2 7554 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7555 FUN1(OP_NOT);
78cdf107
Z
7556 else {
7557 if (!PL_lex_allbrackets &&
7558 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7559 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7560 OPERATOR(NOTOP);
78cdf107 7561 }
a0d0e21e 7562
79072805 7563 case KEY_open:
29595ff2 7564 s = SKIPSPACE1(s);
7e2040f0 7565 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7566 const char *t;
c35e046a
AL
7567 for (d = s; isALNUM_lazy_if(d,UTF);)
7568 d++;
7569 for (t=d; isSPACE(*t);)
7570 t++;
e2ab214b 7571 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7572 /* [perl #16184] */
7573 && !(t[0] == '=' && t[1] == '>')
7574 ) {
5f66b61c 7575 int parms_len = (int)(d-s);
9014280d 7576 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7577 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7578 parms_len, s, parms_len, s);
66fbe8fb 7579 }
93a17b20 7580 }
a0d0e21e 7581 LOP(OP_OPEN,XTERM);
79072805 7582
463ee0b2 7583 case KEY_or:
78cdf107
Z
7584 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7585 return REPORT(0);
6154021b 7586 pl_yylval.ival = OP_OR;
463ee0b2
LW
7587 OPERATOR(OROP);
7588
79072805
LW
7589 case KEY_ord:
7590 UNI(OP_ORD);
7591
7592 case KEY_oct:
7593 UNI(OP_OCT);
7594
7595 case KEY_opendir:
a0d0e21e 7596 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7597
7598 case KEY_print:
3280af22 7599 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7600 LOP(OP_PRINT,XREF);
79072805
LW
7601
7602 case KEY_printf:
3280af22 7603 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7604 LOP(OP_PRTF,XREF);
79072805 7605
c07a80fd 7606 case KEY_prototype:
7607 UNI(OP_PROTOTYPE);
7608
79072805 7609 case KEY_push:
a0d0e21e 7610 LOP(OP_PUSH,XTERM);
79072805
LW
7611
7612 case KEY_pop:
6f33ba73 7613 UNIDOR(OP_POP);
79072805 7614
a0d0e21e 7615 case KEY_pos:
6f33ba73 7616 UNIDOR(OP_POS);
4e553d73 7617
79072805 7618 case KEY_pack:
a0d0e21e 7619 LOP(OP_PACK,XTERM);
79072805
LW
7620
7621 case KEY_package:
a0d0e21e 7622 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7623 s = SKIPSPACE1(s);
91152fc1 7624 s = force_strict_version(s);
4e4da3ac 7625 PL_lex_expect = XBLOCK;
79072805
LW
7626 OPERATOR(PACKAGE);
7627
7628 case KEY_pipe:
a0d0e21e 7629 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7630
7631 case KEY_q:
5db06880 7632 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7633 if (!s)
d4c19fe8 7634 missingterm(NULL);
6154021b 7635 pl_yylval.ival = OP_CONST;
79072805
LW
7636 TERM(sublex_start());
7637
a0d0e21e
LW
7638 case KEY_quotemeta:
7639 UNI(OP_QUOTEMETA);
7640
ea25a9b2
Z
7641 case KEY_qw: {
7642 OP *words = NULL;
5db06880 7643 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7644 if (!s)
d4c19fe8 7645 missingterm(NULL);
3480a8d2 7646 PL_expect = XOPERATOR;
8127e0e3 7647 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7648 int warned = 0;
3280af22 7649 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7650 while (len) {
d4c19fe8
AL
7651 for (; isSPACE(*d) && len; --len, ++d)
7652 /**/;
8127e0e3 7653 if (len) {
d4c19fe8 7654 SV *sv;
f54cb97a 7655 const char *b = d;
e476b1b5 7656 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7657 for (; !isSPACE(*d) && len; --len, ++d) {
7658 if (*d == ',') {
9014280d 7659 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7660 "Possible attempt to separate words with commas");
7661 ++warned;
7662 }
7663 else if (*d == '#') {
9014280d 7664 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7665 "Possible attempt to put comments in qw() list");
7666 ++warned;
7667 }
7668 }
7669 }
7670 else {
d4c19fe8
AL
7671 for (; !isSPACE(*d) && len; --len, ++d)
7672 /**/;
8127e0e3 7673 }
740cce10 7674 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7675 words = op_append_elem(OP_LIST, words,
7948272d 7676 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7677 }
7678 }
7679 }
ea25a9b2
Z
7680 if (!words)
7681 words = newNULLLIST();
37fd879b 7682 if (PL_lex_stuff) {
8127e0e3 7683 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7684 PL_lex_stuff = NULL;
37fd879b 7685 }
ea25a9b2
Z
7686 PL_expect = XOPERATOR;
7687 pl_yylval.opval = sawparens(words);
7688 TOKEN(QWLIST);
7689 }
8990e307 7690
79072805 7691 case KEY_qq:
5db06880 7692 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7693 if (!s)
d4c19fe8 7694 missingterm(NULL);
6154021b 7695 pl_yylval.ival = OP_STRINGIFY;
3280af22 7696 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7697 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7698 TERM(sublex_start());
7699
8782bef2
GB
7700 case KEY_qr:
7701 s = scan_pat(s,OP_QR);
7702 TERM(sublex_start());
7703
79072805 7704 case KEY_qx:
5db06880 7705 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7706 if (!s)
d4c19fe8 7707 missingterm(NULL);
9b201d7d 7708 readpipe_override();
79072805
LW
7709 TERM(sublex_start());
7710
7711 case KEY_return:
7712 OLDLOP(OP_RETURN);
7713
7714 case KEY_require:
29595ff2 7715 s = SKIPSPACE1(s);
e759cc13
RGS
7716 if (isDIGIT(*s)) {
7717 s = force_version(s, FALSE);
a7cb1f99 7718 }
e759cc13
RGS
7719 else if (*s != 'v' || !isDIGIT(s[1])
7720 || (s = force_version(s, TRUE), *s == 'v'))
7721 {
a7cb1f99
GS
7722 *PL_tokenbuf = '\0';
7723 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7724 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7725 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7726 else if (*s == '<')
7727 yyerror("<> should be quotes");
7728 }
a72a1c8b
RGS
7729 if (orig_keyword == KEY_require) {
7730 orig_keyword = 0;
6154021b 7731 pl_yylval.ival = 1;
a72a1c8b
RGS
7732 }
7733 else
6154021b 7734 pl_yylval.ival = 0;
a72a1c8b
RGS
7735 PL_expect = XTERM;
7736 PL_bufptr = s;
7737 PL_last_uni = PL_oldbufptr;
7738 PL_last_lop_op = OP_REQUIRE;
7739 s = skipspace(s);
7740 return REPORT( (int)REQUIRE );
79072805
LW
7741
7742 case KEY_reset:
7743 UNI(OP_RESET);
7744
7745 case KEY_redo:
a0d0e21e 7746 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7747 LOOPX(OP_REDO);
7748
7749 case KEY_rename:
a0d0e21e 7750 LOP(OP_RENAME,XTERM);
79072805
LW
7751
7752 case KEY_rand:
7753 UNI(OP_RAND);
7754
7755 case KEY_rmdir:
7756 UNI(OP_RMDIR);
7757
7758 case KEY_rindex:
a0d0e21e 7759 LOP(OP_RINDEX,XTERM);
79072805
LW
7760
7761 case KEY_read:
a0d0e21e 7762 LOP(OP_READ,XTERM);
79072805
LW
7763
7764 case KEY_readdir:
7765 UNI(OP_READDIR);
7766
93a17b20 7767 case KEY_readline:
6f33ba73 7768 UNIDOR(OP_READLINE);
93a17b20
LW
7769
7770 case KEY_readpipe:
0858480c 7771 UNIDOR(OP_BACKTICK);
93a17b20 7772
79072805
LW
7773 case KEY_rewinddir:
7774 UNI(OP_REWINDDIR);
7775
7776 case KEY_recv:
a0d0e21e 7777 LOP(OP_RECV,XTERM);
79072805
LW
7778
7779 case KEY_reverse:
a0d0e21e 7780 LOP(OP_REVERSE,XTERM);
79072805
LW
7781
7782 case KEY_readlink:
6f33ba73 7783 UNIDOR(OP_READLINK);
79072805
LW
7784
7785 case KEY_ref:
7786 UNI(OP_REF);
7787
7788 case KEY_s:
7789 s = scan_subst(s);
6154021b 7790 if (pl_yylval.opval)
79072805
LW
7791 TERM(sublex_start());
7792 else
7793 TOKEN(1); /* force error */
7794
0d863452
RH
7795 case KEY_say:
7796 checkcomma(s,PL_tokenbuf,"filehandle");
7797 LOP(OP_SAY,XREF);
7798
a0d0e21e
LW
7799 case KEY_chomp:
7800 UNI(OP_CHOMP);
4e553d73 7801
79072805
LW
7802 case KEY_scalar:
7803 UNI(OP_SCALAR);
7804
7805 case KEY_select:
a0d0e21e 7806 LOP(OP_SELECT,XTERM);
79072805
LW
7807
7808 case KEY_seek:
a0d0e21e 7809 LOP(OP_SEEK,XTERM);
79072805
LW
7810
7811 case KEY_semctl:
a0d0e21e 7812 LOP(OP_SEMCTL,XTERM);
79072805
LW
7813
7814 case KEY_semget:
a0d0e21e 7815 LOP(OP_SEMGET,XTERM);
79072805
LW
7816
7817 case KEY_semop:
a0d0e21e 7818 LOP(OP_SEMOP,XTERM);
79072805
LW
7819
7820 case KEY_send:
a0d0e21e 7821 LOP(OP_SEND,XTERM);
79072805
LW
7822
7823 case KEY_setpgrp:
a0d0e21e 7824 LOP(OP_SETPGRP,XTERM);
79072805
LW
7825
7826 case KEY_setpriority:
a0d0e21e 7827 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7828
7829 case KEY_sethostent:
ff68c719 7830 UNI(OP_SHOSTENT);
79072805
LW
7831
7832 case KEY_setnetent:
ff68c719 7833 UNI(OP_SNETENT);
79072805
LW
7834
7835 case KEY_setservent:
ff68c719 7836 UNI(OP_SSERVENT);
79072805
LW
7837
7838 case KEY_setprotoent:
ff68c719 7839 UNI(OP_SPROTOENT);
79072805
LW
7840
7841 case KEY_setpwent:
7842 FUN0(OP_SPWENT);
7843
7844 case KEY_setgrent:
7845 FUN0(OP_SGRENT);
7846
7847 case KEY_seekdir:
a0d0e21e 7848 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7849
7850 case KEY_setsockopt:
a0d0e21e 7851 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7852
7853 case KEY_shift:
6f33ba73 7854 UNIDOR(OP_SHIFT);
79072805
LW
7855
7856 case KEY_shmctl:
a0d0e21e 7857 LOP(OP_SHMCTL,XTERM);
79072805
LW
7858
7859 case KEY_shmget:
a0d0e21e 7860 LOP(OP_SHMGET,XTERM);
79072805
LW
7861
7862 case KEY_shmread:
a0d0e21e 7863 LOP(OP_SHMREAD,XTERM);
79072805
LW
7864
7865 case KEY_shmwrite:
a0d0e21e 7866 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7867
7868 case KEY_shutdown:
a0d0e21e 7869 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7870
7871 case KEY_sin:
7872 UNI(OP_SIN);
7873
7874 case KEY_sleep:
7875 UNI(OP_SLEEP);
7876
7877 case KEY_socket:
a0d0e21e 7878 LOP(OP_SOCKET,XTERM);
79072805
LW
7879
7880 case KEY_socketpair:
a0d0e21e 7881 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7882
7883 case KEY_sort:
3280af22 7884 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7885 s = SKIPSPACE1(s);
79072805 7886 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7887 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7888 PL_expect = XTERM;
15f0808c 7889 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7890 LOP(OP_SORT,XREF);
79072805
LW
7891
7892 case KEY_split:
a0d0e21e 7893 LOP(OP_SPLIT,XTERM);
79072805
LW
7894
7895 case KEY_sprintf:
a0d0e21e 7896 LOP(OP_SPRINTF,XTERM);
79072805
LW
7897
7898 case KEY_splice:
a0d0e21e 7899 LOP(OP_SPLICE,XTERM);
79072805
LW
7900
7901 case KEY_sqrt:
7902 UNI(OP_SQRT);
7903
7904 case KEY_srand:
7905 UNI(OP_SRAND);
7906
7907 case KEY_stat:
7908 UNI(OP_STAT);
7909
7910 case KEY_study:
79072805
LW
7911 UNI(OP_STUDY);
7912
7913 case KEY_substr:
a0d0e21e 7914 LOP(OP_SUBSTR,XTERM);
79072805
LW
7915
7916 case KEY_format:
7917 case KEY_sub:
93a17b20 7918 really_sub:
09bef843 7919 {
3280af22 7920 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7921 SSize_t tboffset = 0;
09bef843 7922 expectation attrful;
28cc6278 7923 bool have_name, have_proto;
f54cb97a 7924 const int key = tmp;
09bef843 7925
5db06880
NC
7926#ifdef PERL_MAD
7927 SV *tmpwhite = 0;
7928
cd81e915 7929 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7930 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7931 PL_thistoken = 0;
5db06880
NC
7932
7933 d = s;
7934 s = SKIPSPACE2(s,tmpwhite);
7935#else
09bef843 7936 s = skipspace(s);
5db06880 7937#endif
09bef843 7938
7e2040f0 7939 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7940 (*s == ':' && s[1] == ':'))
7941 {
5db06880 7942#ifdef PERL_MAD
4f61fd4b 7943 SV *nametoke = NULL;
5db06880
NC
7944#endif
7945
09bef843
SB
7946 PL_expect = XBLOCK;
7947 attrful = XATTRBLOCK;
b1b65b59
JH
7948 /* remember buffer pos'n for later force_word */
7949 tboffset = s - PL_oldbufptr;
09bef843 7950 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7951#ifdef PERL_MAD
7952 if (PL_madskills)
7953 nametoke = newSVpvn(s, d - s);
7954#endif
6502358f
NC
7955 if (memchr(tmpbuf, ':', len))
7956 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7957 else {
7958 sv_setsv(PL_subname,PL_curstname);
396482e1 7959 sv_catpvs(PL_subname,"::");
09bef843
SB
7960 sv_catpvn(PL_subname,tmpbuf,len);
7961 }
09bef843 7962 have_name = TRUE;
5db06880
NC
7963
7964#ifdef PERL_MAD
7965
7966 start_force(0);
7967 CURMAD('X', nametoke);
7968 CURMAD('_', tmpwhite);
7969 (void) force_word(PL_oldbufptr + tboffset, WORD,
7970 FALSE, TRUE, TRUE);
7971
7972 s = SKIPSPACE2(d,tmpwhite);
7973#else
7974 s = skipspace(d);
7975#endif
09bef843 7976 }
463ee0b2 7977 else {
09bef843
SB
7978 if (key == KEY_my)
7979 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7980 PL_expect = XTERMBLOCK;
7981 attrful = XATTRTERM;
76f68e9b 7982 sv_setpvs(PL_subname,"?");
09bef843 7983 have_name = FALSE;
463ee0b2 7984 }
4633a7c4 7985
09bef843
SB
7986 if (key == KEY_format) {
7987 if (*s == '=')
7988 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7989#ifdef PERL_MAD
cd81e915 7990 PL_thistoken = subtoken;
5db06880
NC
7991 s = d;
7992#else
09bef843 7993 if (have_name)
b1b65b59
JH
7994 (void) force_word(PL_oldbufptr + tboffset, WORD,
7995 FALSE, TRUE, TRUE);
5db06880 7996#endif
09bef843
SB
7997 OPERATOR(FORMAT);
7998 }
79072805 7999
09bef843
SB
8000 /* Look for a prototype */
8001 if (*s == '(') {
d9f2850e
RGS
8002 char *p;
8003 bool bad_proto = FALSE;
9e8d7757
RB
8004 bool in_brackets = FALSE;
8005 char greedy_proto = ' ';
8006 bool proto_after_greedy_proto = FALSE;
8007 bool must_be_last = FALSE;
8008 bool underscore = FALSE;
aef2a98a 8009 bool seen_underscore = FALSE;
197afce1 8010 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 8011
5db06880 8012 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 8013 if (!s)
09bef843 8014 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8015 /* strip spaces and check for bad characters */
09bef843
SB
8016 d = SvPVX(PL_lex_stuff);
8017 tmp = 0;
d9f2850e
RGS
8018 for (p = d; *p; ++p) {
8019 if (!isSPACE(*p)) {
8020 d[tmp++] = *p;
9e8d7757 8021
197afce1 8022 if (warnillegalproto) {
9e8d7757
RB
8023 if (must_be_last)
8024 proto_after_greedy_proto = TRUE;
c035a075 8025 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
8026 bad_proto = TRUE;
8027 }
8028 else {
8029 if ( underscore ) {
8030 if ( *p != ';' )
8031 bad_proto = TRUE;
8032 underscore = FALSE;
8033 }
8034 if ( *p == '[' ) {
8035 in_brackets = TRUE;
8036 }
8037 else if ( *p == ']' ) {
8038 in_brackets = FALSE;
8039 }
8040 else if ( (*p == '@' || *p == '%') &&
8041 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8042 !in_brackets ) {
8043 must_be_last = TRUE;
8044 greedy_proto = *p;
8045 }
8046 else if ( *p == '_' ) {
aef2a98a 8047 underscore = seen_underscore = TRUE;
9e8d7757
RB
8048 }
8049 }
8050 }
d37a9538 8051 }
09bef843 8052 }
d9f2850e 8053 d[tmp] = '\0';
9e8d7757 8054 if (proto_after_greedy_proto)
197afce1 8055 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8056 "Prototype after '%c' for %"SVf" : %s",
8057 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 8058 if (bad_proto)
197afce1 8059 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8060 "Illegal character %sin prototype for %"SVf" : %s",
8061 seen_underscore ? "after '_' " : "",
be2597df 8062 SVfARG(PL_subname), d);
b162af07 8063 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8064 have_proto = TRUE;
68dc0745 8065
5db06880
NC
8066#ifdef PERL_MAD
8067 start_force(0);
cd81e915 8068 CURMAD('q', PL_thisopen);
5db06880 8069 CURMAD('_', tmpwhite);
cd81e915
NC
8070 CURMAD('=', PL_thisstuff);
8071 CURMAD('Q', PL_thisclose);
5db06880
NC
8072 NEXTVAL_NEXTTOKE.opval =
8073 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8074 PL_lex_stuff = NULL;
5db06880
NC
8075 force_next(THING);
8076
8077 s = SKIPSPACE2(s,tmpwhite);
8078#else
09bef843 8079 s = skipspace(s);
5db06880 8080#endif
4633a7c4 8081 }
09bef843
SB
8082 else
8083 have_proto = FALSE;
8084
8085 if (*s == ':' && s[1] != ':')
8086 PL_expect = attrful;
8e742a20
MHM
8087 else if (*s != '{' && key == KEY_sub) {
8088 if (!have_name)
8089 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8090 else if (*s != ';' && *s != '}')
be2597df 8091 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8092 }
09bef843 8093
5db06880
NC
8094#ifdef PERL_MAD
8095 start_force(0);
8096 if (tmpwhite) {
8097 if (PL_madskills)
6b29d1f5 8098 curmad('^', newSVpvs(""));
5db06880
NC
8099 CURMAD('_', tmpwhite);
8100 }
8101 force_next(0);
8102
cd81e915 8103 PL_thistoken = subtoken;
5db06880 8104#else
09bef843 8105 if (have_proto) {
9ded7720 8106 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8107 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8108 PL_lex_stuff = NULL;
09bef843 8109 force_next(THING);
68dc0745 8110 }
5db06880 8111#endif
09bef843 8112 if (!have_name) {
49a54bbe
NC
8113 if (PL_curstash)
8114 sv_setpvs(PL_subname, "__ANON__");
8115 else
8116 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8117 TOKEN(ANONSUB);
4633a7c4 8118 }
5db06880 8119#ifndef PERL_MAD
b1b65b59
JH
8120 (void) force_word(PL_oldbufptr + tboffset, WORD,
8121 FALSE, TRUE, TRUE);
5db06880 8122#endif
09bef843
SB
8123 if (key == KEY_my)
8124 TOKEN(MYSUB);
8125 TOKEN(SUB);
4633a7c4 8126 }
79072805
LW
8127
8128 case KEY_system:
a0d0e21e 8129 LOP(OP_SYSTEM,XREF);
79072805
LW
8130
8131 case KEY_symlink:
a0d0e21e 8132 LOP(OP_SYMLINK,XTERM);
79072805
LW
8133
8134 case KEY_syscall:
a0d0e21e 8135 LOP(OP_SYSCALL,XTERM);
79072805 8136
c07a80fd 8137 case KEY_sysopen:
8138 LOP(OP_SYSOPEN,XTERM);
8139
137443ea 8140 case KEY_sysseek:
8141 LOP(OP_SYSSEEK,XTERM);
8142
79072805 8143 case KEY_sysread:
a0d0e21e 8144 LOP(OP_SYSREAD,XTERM);
79072805
LW
8145
8146 case KEY_syswrite:
a0d0e21e 8147 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8148
8149 case KEY_tr:
8150 s = scan_trans(s);
8151 TERM(sublex_start());
8152
8153 case KEY_tell:
8154 UNI(OP_TELL);
8155
8156 case KEY_telldir:
8157 UNI(OP_TELLDIR);
8158
463ee0b2 8159 case KEY_tie:
a0d0e21e 8160 LOP(OP_TIE,XTERM);
463ee0b2 8161
c07a80fd 8162 case KEY_tied:
8163 UNI(OP_TIED);
8164
79072805
LW
8165 case KEY_time:
8166 FUN0(OP_TIME);
8167
8168 case KEY_times:
8169 FUN0(OP_TMS);
8170
8171 case KEY_truncate:
a0d0e21e 8172 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8173
8174 case KEY_uc:
8175 UNI(OP_UC);
8176
8177 case KEY_ucfirst:
8178 UNI(OP_UCFIRST);
8179
463ee0b2
LW
8180 case KEY_untie:
8181 UNI(OP_UNTIE);
8182
79072805 8183 case KEY_until:
78cdf107
Z
8184 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8185 return REPORT(0);
6154021b 8186 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8187 OPERATOR(UNTIL);
8188
8189 case KEY_unless:
78cdf107
Z
8190 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8191 return REPORT(0);
6154021b 8192 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8193 OPERATOR(UNLESS);
8194
8195 case KEY_unlink:
a0d0e21e 8196 LOP(OP_UNLINK,XTERM);
79072805
LW
8197
8198 case KEY_undef:
6f33ba73 8199 UNIDOR(OP_UNDEF);
79072805
LW
8200
8201 case KEY_unpack:
a0d0e21e 8202 LOP(OP_UNPACK,XTERM);
79072805
LW
8203
8204 case KEY_utime:
a0d0e21e 8205 LOP(OP_UTIME,XTERM);
79072805
LW
8206
8207 case KEY_umask:
6f33ba73 8208 UNIDOR(OP_UMASK);
79072805
LW
8209
8210 case KEY_unshift:
a0d0e21e
LW
8211 LOP(OP_UNSHIFT,XTERM);
8212
8213 case KEY_use:
468aa647 8214 s = tokenize_use(1, s);
a0d0e21e 8215 OPERATOR(USE);
79072805
LW
8216
8217 case KEY_values:
8218 UNI(OP_VALUES);
8219
8220 case KEY_vec:
a0d0e21e 8221 LOP(OP_VEC,XTERM);
79072805 8222
0d863452 8223 case KEY_when:
78cdf107
Z
8224 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8225 return REPORT(0);
6154021b 8226 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8227 OPERATOR(WHEN);
8228
79072805 8229 case KEY_while:
78cdf107
Z
8230 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8231 return REPORT(0);
6154021b 8232 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8233 OPERATOR(WHILE);
8234
8235 case KEY_warn:
3280af22 8236 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8237 LOP(OP_WARN,XTERM);
79072805
LW
8238
8239 case KEY_wait:
8240 FUN0(OP_WAIT);
8241
8242 case KEY_waitpid:
a0d0e21e 8243 LOP(OP_WAITPID,XTERM);
79072805
LW
8244
8245 case KEY_wantarray:
8246 FUN0(OP_WANTARRAY);
8247
8248 case KEY_write:
9d116dd7
JH
8249#ifdef EBCDIC
8250 {
df3728a2
JH
8251 char ctl_l[2];
8252 ctl_l[0] = toCTRL('L');
8253 ctl_l[1] = '\0';
fafc274c 8254 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8255 }
8256#else
fafc274c
NC
8257 /* Make sure $^L is defined */
8258 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8259#endif
79072805
LW
8260 UNI(OP_ENTERWRITE);
8261
8262 case KEY_x:
78cdf107
Z
8263 if (PL_expect == XOPERATOR) {
8264 if (*s == '=' && !PL_lex_allbrackets &&
8265 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8266 return REPORT(0);
79072805 8267 Mop(OP_REPEAT);
78cdf107 8268 }
79072805
LW
8269 check_uni();
8270 goto just_a_word;
8271
a0d0e21e 8272 case KEY_xor:
78cdf107
Z
8273 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8274 return REPORT(0);
6154021b 8275 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8276 OPERATOR(OROP);
8277
79072805
LW
8278 case KEY_y:
8279 s = scan_trans(s);
8280 TERM(sublex_start());
8281 }
49dc05e3 8282 }}
79072805 8283}
bf4acbe4
GS
8284#ifdef __SC__
8285#pragma segment Main
8286#endif
79072805 8287
e930465f
JH
8288static int
8289S_pending_ident(pTHX)
8eceec63 8290{
97aff369 8291 dVAR;
8eceec63 8292 register char *d;
bbd11bfc 8293 PADOFFSET tmp = 0;
8eceec63
SC
8294 /* pit holds the identifier we read and pending_ident is reset */
8295 char pit = PL_pending_ident;
9bde8eb0
NC
8296 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8297 /* All routes through this function want to know if there is a colon. */
c099d646 8298 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8299 PL_pending_ident = 0;
8300
cd81e915 8301 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8302 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8303 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8304
8305 /* if we're in a my(), we can't allow dynamics here.
8306 $foo'bar has already been turned into $foo::bar, so
8307 just check for colons.
8308
8309 if it's a legal name, the OP is a PADANY.
8310 */
8311 if (PL_in_my) {
8312 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8313 if (has_colon)
8eceec63
SC
8314 yyerror(Perl_form(aTHX_ "No package name allowed for "
8315 "variable %s in \"our\"",
8316 PL_tokenbuf));
d6447115 8317 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8318 }
8319 else {
9bde8eb0 8320 if (has_colon)
952306ac
RGS
8321 yyerror(Perl_form(aTHX_ PL_no_myglob,
8322 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8323
6154021b 8324 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 8325 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8326 return PRIVATEREF;
8327 }
8328 }
8329
8330 /*
8331 build the ops for accesses to a my() variable.
8332
8333 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8334 then used in a comparison. This catches most, but not
8335 all cases. For instance, it catches
8336 sort { my($a); $a <=> $b }
8337 but not
8338 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8339 (although why you'd do that is anyone's guess).
8340 */
8341
9bde8eb0 8342 if (!has_colon) {
8716503d 8343 if (!PL_in_my)
f8f98e0a 8344 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8345 if (tmp != NOT_IN_PAD) {
8eceec63 8346 /* might be an "our" variable" */
00b1698f 8347 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8348 /* build ops for a bareword */
b64e5050
AL
8349 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8350 HEK * const stashname = HvNAME_HEK(stash);
8351 SV * const sym = newSVhek(stashname);
396482e1 8352 sv_catpvs(sym, "::");
9bde8eb0 8353 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8354 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8355 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8356 gv_fetchsv(sym,
8eceec63
SC
8357 (PL_in_eval
8358 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8359 : GV_ADDMULTI
8eceec63
SC
8360 ),
8361 ((PL_tokenbuf[0] == '$') ? SVt_PV
8362 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8363 : SVt_PVHV));
8364 return WORD;
8365 }
8366
8367 /* if it's a sort block and they're naming $a or $b */
8368 if (PL_last_lop_op == OP_SORT &&
8369 PL_tokenbuf[0] == '$' &&
8370 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8371 && !PL_tokenbuf[2])
8372 {
8373 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8374 d < PL_bufend && *d != '\n';
8375 d++)
8376 {
8377 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8378 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8379 PL_tokenbuf);
8380 }
8381 }
8382 }
8383
6154021b
RGS
8384 pl_yylval.opval = newOP(OP_PADANY, 0);
8385 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8386 return PRIVATEREF;
8387 }
8388 }
8389
8390 /*
8391 Whine if they've said @foo in a doublequoted string,
8392 and @foo isn't a variable we can find in the symbol
8393 table.
8394 */
d824713b
NC
8395 if (ckWARN(WARN_AMBIGUOUS) &&
8396 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8397 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8398 SVt_PVAV);
8eceec63 8399 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8400 /* DO NOT warn for @- and @+ */
8401 && !( PL_tokenbuf[2] == '\0' &&
8402 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8403 )
8eceec63
SC
8404 {
8405 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8406 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8407 "Possible unintended interpolation of %s in string",
8408 PL_tokenbuf);
8eceec63
SC
8409 }
8410 }
8411
8412 /* build ops for a bareword */
6154021b 8413 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8414 tokenbuf_len - 1));
6154021b 8415 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8416 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8417 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8418 ((PL_tokenbuf[0] == '$') ? SVt_PV
8419 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8420 : SVt_PVHV));
8eceec63
SC
8421 return WORD;
8422}
8423
76e3520e 8424STATIC void
c94115d8 8425S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8426{
97aff369 8427 dVAR;
2f3197b3 8428
7918f24d
NC
8429 PERL_ARGS_ASSERT_CHECKCOMMA;
8430
d008e5eb 8431 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8432 if (ckWARN(WARN_SYNTAX)) {
8433 int level = 1;
26ff0806 8434 const char *w;
d008e5eb
GS
8435 for (w = s+2; *w && level; w++) {
8436 if (*w == '(')
8437 ++level;
8438 else if (*w == ')')
8439 --level;
8440 }
888fea98
NC
8441 while (isSPACE(*w))
8442 ++w;
b1439985
RGS
8443 /* the list of chars below is for end of statements or
8444 * block / parens, boolean operators (&&, ||, //) and branch
8445 * constructs (or, and, if, until, unless, while, err, for).
8446 * Not a very solid hack... */
8447 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8448 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8449 "%s (...) interpreted as function",name);
d008e5eb 8450 }
2f3197b3 8451 }
3280af22 8452 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8453 s++;
a687059c
LW
8454 if (*s == '(')
8455 s++;
3280af22 8456 while (s < PL_bufend && isSPACE(*s))
a687059c 8457 s++;
7e2040f0 8458 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8459 const char * const w = s++;
7e2040f0 8460 while (isALNUM_lazy_if(s,UTF))
a687059c 8461 s++;
3280af22 8462 while (s < PL_bufend && isSPACE(*s))
a687059c 8463 s++;
e929a76b 8464 if (*s == ',') {
c94115d8 8465 GV* gv;
5458a98a 8466 if (keyword(w, s - w, 0))
e929a76b 8467 return;
c94115d8
NC
8468
8469 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8470 if (gv && GvCVu(gv))
abbb3198 8471 return;
cea2e8a9 8472 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8473 }
8474 }
8475}
8476
423cee85
JH
8477/* Either returns sv, or mortalizes sv and returns a new SV*.
8478 Best used as sv=new_constant(..., sv, ...).
8479 If s, pv are NULL, calls subroutine with one argument,
8480 and type is used with error messages only. */
8481
b3ac6de7 8482STATIC SV *
eb0d8d16
NC
8483S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8484 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8485{
27da23d5 8486 dVAR; dSP;
890ce7af 8487 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8488 SV *res;
b3ac6de7
IZ
8489 SV **cvp;
8490 SV *cv, *typesv;
89e33a05 8491 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8492
7918f24d
NC
8493 PERL_ARGS_ASSERT_NEW_CONSTANT;
8494
f0af216f 8495 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
8496 SV *msg;
8497
10edeb5d
JH
8498 why2 = (const char *)
8499 (strEQ(key,"charnames")
8500 ? "(possibly a missing \"use charnames ...\")"
8501 : "");
4e553d73 8502 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
8503 (type ? type: "undef"), why2);
8504
8505 /* This is convoluted and evil ("goto considered harmful")
8506 * but I do not understand the intricacies of all the different
8507 * failure modes of %^H in here. The goal here is to make
8508 * the most probable error message user-friendly. --jhi */
8509
8510 goto msgdone;
8511
423cee85 8512 report:
4e553d73 8513 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8514 (type ? type: "undef"), why1, why2, why3);
41ab332f 8515 msgdone:
95a20fc0 8516 yyerror(SvPVX_const(msg));
423cee85
JH
8517 SvREFCNT_dec(msg);
8518 return sv;
8519 }
ff3f963a
KW
8520
8521 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
8522 if (PL_error_count > 0 && strEQ(key,"charnames"))
8523 return &PL_sv_undef;
ff3f963a 8524
eb0d8d16 8525 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 8526 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
8527 why1 = "$^H{";
8528 why2 = key;
f0af216f 8529 why3 = "} is not defined";
423cee85 8530 goto report;
b3ac6de7
IZ
8531 }
8532 sv_2mortal(sv); /* Parent created it permanently */
8533 cv = *cvp;
423cee85 8534 if (!pv && s)
59cd0e26 8535 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8536 if (type && pv)
59cd0e26 8537 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8538 else
423cee85 8539 typesv = &PL_sv_undef;
4e553d73 8540
e788e7d3 8541 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8542 ENTER ;
8543 SAVETMPS;
4e553d73 8544
423cee85 8545 PUSHMARK(SP) ;
a5845cb7 8546 EXTEND(sp, 3);
423cee85
JH
8547 if (pv)
8548 PUSHs(pv);
b3ac6de7 8549 PUSHs(sv);
423cee85
JH
8550 if (pv)
8551 PUSHs(typesv);
b3ac6de7 8552 PUTBACK;
423cee85 8553 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8554
423cee85 8555 SPAGAIN ;
4e553d73 8556
423cee85 8557 /* Check the eval first */
9b0e499b 8558 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8559 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8560 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8561 (void)POPs;
b37c2d43 8562 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8563 }
8564 else {
8565 res = POPs;
b37c2d43 8566 SvREFCNT_inc_simple_void(res);
423cee85 8567 }
4e553d73 8568
423cee85
JH
8569 PUTBACK ;
8570 FREETMPS ;
8571 LEAVE ;
b3ac6de7 8572 POPSTACK;
4e553d73 8573
b3ac6de7 8574 if (!SvOK(res)) {
423cee85
JH
8575 why1 = "Call to &{$^H{";
8576 why2 = key;
f0af216f 8577 why3 = "}} did not return a defined value";
423cee85
JH
8578 sv = res;
8579 goto report;
9b0e499b 8580 }
423cee85 8581
9b0e499b 8582 return res;
b3ac6de7 8583}
4e553d73 8584
d0a148a6
NC
8585/* Returns a NUL terminated string, with the length of the string written to
8586 *slp
8587 */
76e3520e 8588STATIC char *
cea2e8a9 8589S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8590{
97aff369 8591 dVAR;
463ee0b2 8592 register char *d = dest;
890ce7af 8593 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8594
8595 PERL_ARGS_ASSERT_SCAN_WORD;
8596
463ee0b2 8597 for (;;) {
8903cb82 8598 if (d >= e)
cea2e8a9 8599 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8600 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8601 *d++ = *s++;
c35e046a 8602 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8603 *d++ = ':';
8604 *d++ = ':';
8605 s++;
8606 }
c35e046a 8607 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8608 *d++ = *s++;
8609 *d++ = *s++;
8610 }
fd400ab9 8611 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8612 char *t = s + UTF8SKIP(s);
c35e046a 8613 size_t len;
fd400ab9 8614 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8615 t += UTF8SKIP(t);
c35e046a
AL
8616 len = t - s;
8617 if (d + len > e)
cea2e8a9 8618 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8619 Copy(s, d, len, char);
8620 d += len;
a0ed51b3
LW
8621 s = t;
8622 }
463ee0b2
LW
8623 else {
8624 *d = '\0';
8625 *slp = d - dest;
8626 return s;
e929a76b 8627 }
378cc40b
LW
8628 }
8629}
8630
76e3520e 8631STATIC char *
f54cb97a 8632S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8633{
97aff369 8634 dVAR;
6136c704 8635 char *bracket = NULL;
748a9306 8636 char funny = *s++;
6136c704 8637 register char *d = dest;
0b3da58d 8638 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8639
7918f24d
NC
8640 PERL_ARGS_ASSERT_SCAN_IDENT;
8641
a0d0e21e 8642 if (isSPACE(*s))
29595ff2 8643 s = PEEKSPACE(s);
de3bb511 8644 if (isDIGIT(*s)) {
8903cb82 8645 while (isDIGIT(*s)) {
8646 if (d >= e)
cea2e8a9 8647 Perl_croak(aTHX_ ident_too_long);
378cc40b 8648 *d++ = *s++;
8903cb82 8649 }
378cc40b
LW
8650 }
8651 else {
463ee0b2 8652 for (;;) {
8903cb82 8653 if (d >= e)
cea2e8a9 8654 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8655 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8656 *d++ = *s++;
7e2040f0 8657 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8658 *d++ = ':';
8659 *d++ = ':';
8660 s++;
8661 }
a0d0e21e 8662 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8663 *d++ = *s++;
8664 *d++ = *s++;
8665 }
fd400ab9 8666 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8667 char *t = s + UTF8SKIP(s);
fd400ab9 8668 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8669 t += UTF8SKIP(t);
8670 if (d + (t - s) > e)
cea2e8a9 8671 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8672 Copy(s, d, t - s, char);
8673 d += t - s;
8674 s = t;
8675 }
463ee0b2
LW
8676 else
8677 break;
8678 }
378cc40b
LW
8679 }
8680 *d = '\0';
8681 d = dest;
79072805 8682 if (*d) {
3280af22
NIS
8683 if (PL_lex_state != LEX_NORMAL)
8684 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8685 return s;
378cc40b 8686 }
748a9306 8687 if (*s == '$' && s[1] &&
3792a11b 8688 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8689 {
4810e5ec 8690 return s;
5cd24f17 8691 }
79072805
LW
8692 if (*s == '{') {
8693 bracket = s;
8694 s++;
8695 }
8696 else if (ck_uni)
8697 check_uni();
93a17b20 8698 if (s < send)
79072805
LW
8699 *d = *s++;
8700 d[1] = '\0';
2b92dfce 8701 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8702 *d = toCTRL(*s);
8703 s++;
de3bb511 8704 }
79072805 8705 if (bracket) {
748a9306 8706 if (isSPACE(s[-1])) {
fa83b5b6 8707 while (s < send) {
f54cb97a 8708 const char ch = *s++;
bf4acbe4 8709 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8710 *d = ch;
8711 break;
8712 }
8713 }
748a9306 8714 }
7e2040f0 8715 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 8716 d++;
a0ed51b3 8717 if (UTF) {
6136c704
AL
8718 char *end = s;
8719 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8720 end += UTF8SKIP(end);
8721 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8722 end += UTF8SKIP(end);
a0ed51b3 8723 }
6136c704
AL
8724 Copy(s, d, end - s, char);
8725 d += end - s;
8726 s = end;
a0ed51b3
LW
8727 }
8728 else {
2b92dfce 8729 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8730 *d++ = *s++;
2b92dfce 8731 if (d >= e)
cea2e8a9 8732 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8733 }
79072805 8734 *d = '\0';
c35e046a
AL
8735 while (s < send && SPACE_OR_TAB(*s))
8736 s++;
ff68c719 8737 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8738 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8739 const char * const brack =
8740 (const char *)
8741 ((*s == '[') ? "[...]" : "{...}");
e850844c 8742 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8743 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8744 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8745 funny, dest, brack, funny, dest, brack);
8746 }
79072805 8747 bracket++;
a0be28da 8748 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8749 PL_lex_allbrackets++;
79072805
LW
8750 return s;
8751 }
4e553d73
NIS
8752 }
8753 /* Handle extended ${^Foo} variables
2b92dfce
GS
8754 * 1999-02-27 mjd-perl-patch@plover.com */
8755 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8756 && isALNUM(*s))
8757 {
8758 d++;
8759 while (isALNUM(*s) && d < e) {
8760 *d++ = *s++;
8761 }
8762 if (d >= e)
cea2e8a9 8763 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8764 *d = '\0';
79072805
LW
8765 }
8766 if (*s == '}') {
8767 s++;
7df0d042 8768 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8769 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8770 PL_expect = XREF;
8771 }
d008e5eb 8772 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8773 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8774 (keyword(dest, d - dest, 0)
8775 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8776 {
c35e046a
AL
8777 if (funny == '#')
8778 funny = '@';
9014280d 8779 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8780 "Ambiguous use of %c{%s} resolved to %c%s",
8781 funny, dest, funny, dest);
8782 }
8783 }
79072805
LW
8784 }
8785 else {
8786 s = bracket; /* let the parser handle it */
93a17b20 8787 *dest = '\0';
79072805
LW
8788 }
8789 }
3280af22
NIS
8790 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8791 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8792 return s;
8793}
8794
858a358b 8795static bool
3955e1a9 8796S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
8797
8798 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8799 * the parse starting at 's', based on the subset that are valid in this
8800 * context input to this routine in 'valid_flags'. Advances s. Returns
8801 * TRUE if the input was a valid flag, so the next char may be as well;
3955e1a9
KW
8802 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8803 * current regex. This routine will set it to any charset modifier found.
8804 * The caller shouldn't change it. This way, another charset modifier
8805 * encountered in the parse can be detected as an error, as we have decided
8806 * allow only one */
858a358b
KW
8807
8808 const char c = **s;
94b03d7d 8809
858a358b
KW
8810 if (! strchr(valid_flags, c)) {
8811 if (isALNUM(c)) {
94b03d7d 8812 goto deprecate;
858a358b
KW
8813 }
8814 return FALSE;
8815 }
8816
8817 switch (c) {
94b03d7d 8818
858a358b
KW
8819 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8820 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8821 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8822 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8823 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8824 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d
KW
8825 case LOCALE_PAT_MOD:
8826
8827 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8828 * can't be regex modifiers.
8829 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8830 * much as easily done. s///lei, for example, has to mean regex
8831 * modifiers if it's not an error (as does any word character
8832 * following the 'e'). Otherwise, we resolve to the backwards-
8833 * compatible, but less likely 's/// le ...', i.e. as meaning
8834 * less-than-or-equal. The reason it's not likely is that s//
967ac236
KW
8835 * returns a number for code in the field (/r returns a string, but
8836 * that wasn't added until the 5.13 series), and so '<=' should be
8837 * used for comparing, not 'le'. */
94b03d7d
KW
8838 if (*((*s) + 1) == 't') {
8839 goto deprecate;
8840 }
563734a5 8841 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
aeac89a5
KW
8842
8843 /* 'e' is valid only for substitutes, s///e. If it is not
8844 * valid in the current context, then 'm//le' must mean the
8845 * comparison operator, so use the regular deprecation message.
8846 */
8847 if (! strchr(valid_flags, 'e')) {
8848 goto deprecate;
8849 }
94b03d7d
KW
8850 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8851 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8852 return FALSE;
8853 }
3955e1a9
KW
8854 if (*charset) {
8855 goto multiple_charsets;
8856 }
94b03d7d 8857 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 8858 *charset = c;
94b03d7d
KW
8859 break;
8860 case UNICODE_PAT_MOD:
8861 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8862 * 'n' means they can't be regex modifiers */
8863 if (*((*s) + 1) == 'n') {
8864 goto deprecate;
8865 }
3955e1a9
KW
8866 if (*charset) {
8867 goto multiple_charsets;
8868 }
94b03d7d 8869 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 8870 *charset = c;
94b03d7d
KW
8871 break;
8872 case ASCII_RESTRICT_PAT_MOD:
8873 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8874 * can't be regex modifiers */
8875 if (*((*s) + 1) == 'n') {
8876 goto deprecate;
8877 }
ff3f26d2
KW
8878
8879 if (! *charset) {
94b03d7d
KW
8880 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8881 }
ff3f26d2
KW
8882 else {
8883
8884 /* Error if previous modifier wasn't an 'a', but if it was, see
8885 * if, and accept, a second occurrence (only) */
8886 if (*charset != 'a'
8887 || get_regex_charset(*pmfl)
8888 != REGEX_ASCII_RESTRICTED_CHARSET)
8889 {
8890 goto multiple_charsets;
8891 }
8892 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
8893 }
8894 *charset = c;
94b03d7d
KW
8895 break;
8896 case DEPENDS_PAT_MOD:
3955e1a9
KW
8897 if (*charset) {
8898 goto multiple_charsets;
8899 }
94b03d7d 8900 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 8901 *charset = c;
94b03d7d 8902 break;
879d0c72 8903 }
94b03d7d 8904
858a358b
KW
8905 (*s)++;
8906 return TRUE;
94b03d7d
KW
8907
8908 deprecate:
8909 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8910 "Having no space between pattern and following word is deprecated");
8911 return FALSE;
3955e1a9
KW
8912
8913 multiple_charsets:
8914 if (*charset != c) {
8915 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8916 }
ff3f26d2
KW
8917 else if (c == 'a') {
8918 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8919 }
3955e1a9
KW
8920 else {
8921 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8922 }
8923
8924 /* Pretend that it worked, so will continue processing before dieing */
8925 (*s)++;
8926 return TRUE;
879d0c72
NC
8927}
8928
76e3520e 8929STATIC char *
cea2e8a9 8930S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 8931{
97aff369 8932 dVAR;
79072805 8933 PMOP *pm;
5db06880 8934 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 8935 const char * const valid_flags =
a20207d7 8936 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 8937 char charset = '\0'; /* character set modifier */
5db06880
NC
8938#ifdef PERL_MAD
8939 char *modstart;
8940#endif
8941
7918f24d 8942 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 8943
25c09cbf 8944 if (!s) {
6136c704 8945 const char * const delimiter = skipspace(start);
10edeb5d
JH
8946 Perl_croak(aTHX_
8947 (const char *)
8948 (*delimiter == '?'
8949 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8950 : "Search pattern not terminated" ));
25c09cbf 8951 }
bbce6d69 8952
8782bef2 8953 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
8954 if (PL_multi_open == '?') {
8955 /* This is the only point in the code that sets PMf_ONCE: */
79072805 8956 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
8957
8958 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8959 allows us to restrict the list needed by reset to just the ??
8960 matches. */
8961 assert(type != OP_TRANS);
8962 if (PL_curstash) {
daba3364 8963 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
8964 U32 elements;
8965 if (!mg) {
daba3364 8966 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
8967 0);
8968 }
8969 elements = mg->mg_len / sizeof(PMOP**);
8970 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8971 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8972 mg->mg_len = elements * sizeof(PMOP**);
8973 PmopSTASH_set(pm,PL_curstash);
8974 }
8975 }
5db06880
NC
8976#ifdef PERL_MAD
8977 modstart = s;
8978#endif
3955e1a9 8979 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
8980#ifdef PERL_MAD
8981 if (PL_madskills && modstart != s) {
8982 SV* tmptoken = newSVpvn(modstart, s - modstart);
8983 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8984 }
8985#endif
4ac733c9 8986 /* issue a warning if /c is specified,but /g is not */
a2a5de95 8987 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 8988 {
a2a5de95
NC
8989 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8990 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
8991 }
8992
3280af22 8993 PL_lex_op = (OP*)pm;
6154021b 8994 pl_yylval.ival = OP_MATCH;
378cc40b
LW
8995 return s;
8996}
8997
76e3520e 8998STATIC char *
cea2e8a9 8999S_scan_subst(pTHX_ char *start)
79072805 9000{
27da23d5 9001 dVAR;
22594288 9002 char *s;
79072805 9003 register PMOP *pm;
4fdae800 9004 I32 first_start;
79072805 9005 I32 es = 0;
3955e1a9 9006 char charset = '\0'; /* character set modifier */
5db06880
NC
9007#ifdef PERL_MAD
9008 char *modstart;
9009#endif
79072805 9010
7918f24d
NC
9011 PERL_ARGS_ASSERT_SCAN_SUBST;
9012
6154021b 9013 pl_yylval.ival = OP_NULL;
79072805 9014
5db06880 9015 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9016
37fd879b 9017 if (!s)
cea2e8a9 9018 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9019
3280af22 9020 if (s[-1] == PL_multi_open)
79072805 9021 s--;
5db06880
NC
9022#ifdef PERL_MAD
9023 if (PL_madskills) {
cd81e915
NC
9024 CURMAD('q', PL_thisopen);
9025 CURMAD('_', PL_thiswhite);
9026 CURMAD('E', PL_thisstuff);
9027 CURMAD('Q', PL_thisclose);
9028 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9029 }
9030#endif
79072805 9031
3280af22 9032 first_start = PL_multi_start;
5db06880 9033 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9034 if (!s) {
37fd879b 9035 if (PL_lex_stuff) {
3280af22 9036 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9037 PL_lex_stuff = NULL;
37fd879b 9038 }
cea2e8a9 9039 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9040 }
3280af22 9041 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9042
79072805 9043 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9044
9045#ifdef PERL_MAD
9046 if (PL_madskills) {
cd81e915
NC
9047 CURMAD('z', PL_thisopen);
9048 CURMAD('R', PL_thisstuff);
9049 CURMAD('Z', PL_thisclose);
5db06880
NC
9050 }
9051 modstart = s;
9052#endif
9053
48c036b1 9054 while (*s) {
a20207d7 9055 if (*s == EXEC_PAT_MOD) {
a687059c 9056 s++;
2f3197b3 9057 es++;
a687059c 9058 }
3955e1a9
KW
9059 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9060 {
48c036b1 9061 break;
aa78b661 9062 }
378cc40b 9063 }
79072805 9064
5db06880
NC
9065#ifdef PERL_MAD
9066 if (PL_madskills) {
9067 if (modstart != s)
9068 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9069 append_madprops(PL_thismad, (OP*)pm, 0);
9070 PL_thismad = 0;
5db06880
NC
9071 }
9072#endif
a2a5de95
NC
9073 if ((pm->op_pmflags & PMf_CONTINUE)) {
9074 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9075 }
9076
79072805 9077 if (es) {
6136c704
AL
9078 SV * const repl = newSVpvs("");
9079
0244c3a4
GS
9080 PL_sublex_info.super_bufptr = s;
9081 PL_sublex_info.super_bufend = PL_bufend;
9082 PL_multi_end = 0;
79072805 9083 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9084 while (es-- > 0) {
9085 if (es)
9086 sv_catpvs(repl, "eval ");
9087 else
9088 sv_catpvs(repl, "do ");
9089 }
6f43d98f 9090 sv_catpvs(repl, "{");
3280af22 9091 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9092 if (strchr(SvPVX(PL_lex_repl), '#'))
9093 sv_catpvs(repl, "\n");
9094 sv_catpvs(repl, "}");
25da4f38 9095 SvEVALED_on(repl);
3280af22
NIS
9096 SvREFCNT_dec(PL_lex_repl);
9097 PL_lex_repl = repl;
378cc40b 9098 }
79072805 9099
3280af22 9100 PL_lex_op = (OP*)pm;
6154021b 9101 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9102 return s;
9103}
9104
76e3520e 9105STATIC char *
cea2e8a9 9106S_scan_trans(pTHX_ char *start)
378cc40b 9107{
97aff369 9108 dVAR;
a0d0e21e 9109 register char* s;
11343788 9110 OP *o;
79072805 9111 short *tbl;
b84c11c8
NC
9112 U8 squash;
9113 U8 del;
9114 U8 complement;
bb16bae8 9115 bool nondestruct = 0;
5db06880
NC
9116#ifdef PERL_MAD
9117 char *modstart;
9118#endif
79072805 9119
7918f24d
NC
9120 PERL_ARGS_ASSERT_SCAN_TRANS;
9121
6154021b 9122 pl_yylval.ival = OP_NULL;
79072805 9123
5db06880 9124 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 9125 if (!s)
cea2e8a9 9126 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9127
3280af22 9128 if (s[-1] == PL_multi_open)
2f3197b3 9129 s--;
5db06880
NC
9130#ifdef PERL_MAD
9131 if (PL_madskills) {
cd81e915
NC
9132 CURMAD('q', PL_thisopen);
9133 CURMAD('_', PL_thiswhite);
9134 CURMAD('E', PL_thisstuff);
9135 CURMAD('Q', PL_thisclose);
9136 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9137 }
9138#endif
2f3197b3 9139
5db06880 9140 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9141 if (!s) {
37fd879b 9142 if (PL_lex_stuff) {
3280af22 9143 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9144 PL_lex_stuff = NULL;
37fd879b 9145 }
cea2e8a9 9146 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9147 }
5db06880 9148 if (PL_madskills) {
cd81e915
NC
9149 CURMAD('z', PL_thisopen);
9150 CURMAD('R', PL_thisstuff);
9151 CURMAD('Z', PL_thisclose);
5db06880 9152 }
79072805 9153
a0ed51b3 9154 complement = del = squash = 0;
5db06880
NC
9155#ifdef PERL_MAD
9156 modstart = s;
9157#endif
7a1e2023
NC
9158 while (1) {
9159 switch (*s) {
9160 case 'c':
79072805 9161 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9162 break;
9163 case 'd':
a0ed51b3 9164 del = OPpTRANS_DELETE;
7a1e2023
NC
9165 break;
9166 case 's':
79072805 9167 squash = OPpTRANS_SQUASH;
7a1e2023 9168 break;
bb16bae8
FC
9169 case 'r':
9170 nondestruct = 1;
9171 break;
7a1e2023
NC
9172 default:
9173 goto no_more;
9174 }
395c3793
LW
9175 s++;
9176 }
7a1e2023 9177 no_more:
8973db79 9178
aa1f7c5b 9179 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9180 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9181 o->op_private &= ~OPpTRANS_ALL;
9182 o->op_private |= del|squash|complement|
7948272d
NIS
9183 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9184 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9185
3280af22 9186 PL_lex_op = o;
bb16bae8 9187 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9188
9189#ifdef PERL_MAD
9190 if (PL_madskills) {
9191 if (modstart != s)
9192 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9193 append_madprops(PL_thismad, o, 0);
9194 PL_thismad = 0;
5db06880
NC
9195 }
9196#endif
9197
79072805
LW
9198 return s;
9199}
9200
76e3520e 9201STATIC char *
cea2e8a9 9202S_scan_heredoc(pTHX_ register char *s)
79072805 9203{
97aff369 9204 dVAR;
79072805
LW
9205 SV *herewas;
9206 I32 op_type = OP_SCALAR;
9207 I32 len;
9208 SV *tmpstr;
9209 char term;
73d840c0 9210 const char *found_newline;
79072805 9211 register char *d;
fc36a67e 9212 register char *e;
4633a7c4 9213 char *peek;
f54cb97a 9214 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
9215#ifdef PERL_MAD
9216 I32 stuffstart = s - SvPVX(PL_linestr);
9217 char *tstart;
9218
cd81e915 9219 PL_realtokenstart = -1;
5db06880 9220#endif
79072805 9221
7918f24d
NC
9222 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9223
79072805 9224 s += 2;
3280af22
NIS
9225 d = PL_tokenbuf;
9226 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9227 if (!outer)
79072805 9228 *d++ = '\n';
c35e046a
AL
9229 peek = s;
9230 while (SPACE_OR_TAB(*peek))
9231 peek++;
3792a11b 9232 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9233 s = peek;
79072805 9234 term = *s++;
3280af22 9235 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9236 d += len;
3280af22 9237 if (s < PL_bufend)
79072805 9238 s++;
79072805
LW
9239 }
9240 else {
9241 if (*s == '\\')
9242 s++, term = '\'';
9243 else
9244 term = '"';
7e2040f0 9245 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9246 deprecate("bare << to mean <<\"\"");
7e2040f0 9247 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9248 if (d < e)
9249 *d++ = *s;
9250 }
9251 }
3280af22 9252 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9253 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9254 *d++ = '\n';
9255 *d = '\0';
3280af22 9256 len = d - PL_tokenbuf;
5db06880
NC
9257
9258#ifdef PERL_MAD
9259 if (PL_madskills) {
9260 tstart = PL_tokenbuf + !outer;
cd81e915 9261 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9262 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9263 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9264 stuffstart = s - SvPVX(PL_linestr);
9265 }
9266#endif
6a27c188 9267#ifndef PERL_STRICT_CR
f63a84b2
LW
9268 d = strchr(s, '\r');
9269 if (d) {
b464bac0 9270 char * const olds = s;
f63a84b2 9271 s = d;
3280af22 9272 while (s < PL_bufend) {
f63a84b2
LW
9273 if (*s == '\r') {
9274 *d++ = '\n';
9275 if (*++s == '\n')
9276 s++;
9277 }
9278 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9279 *d++ = *s++;
9280 s++;
9281 }
9282 else
9283 *d++ = *s++;
9284 }
9285 *d = '\0';
3280af22 9286 PL_bufend = d;
95a20fc0 9287 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9288 s = olds;
9289 }
9290#endif
5db06880
NC
9291#ifdef PERL_MAD
9292 found_newline = 0;
9293#endif
10edeb5d 9294 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9295 herewas = newSVpvn(s,PL_bufend-s);
9296 }
9297 else {
5db06880
NC
9298#ifdef PERL_MAD
9299 herewas = newSVpvn(s-1,found_newline-s+1);
9300#else
73d840c0
AL
9301 s--;
9302 herewas = newSVpvn(s,found_newline-s);
5db06880 9303#endif
73d840c0 9304 }
5db06880
NC
9305#ifdef PERL_MAD
9306 if (PL_madskills) {
9307 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9308 if (PL_thisstuff)
9309 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9310 else
cd81e915 9311 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9312 }
9313#endif
79072805 9314 s += SvCUR(herewas);
748a9306 9315
5db06880
NC
9316#ifdef PERL_MAD
9317 stuffstart = s - SvPVX(PL_linestr);
9318
9319 if (found_newline)
9320 s--;
9321#endif
9322
7d0a29fe
NC
9323 tmpstr = newSV_type(SVt_PVIV);
9324 SvGROW(tmpstr, 80);
748a9306 9325 if (term == '\'') {
79072805 9326 op_type = OP_CONST;
45977657 9327 SvIV_set(tmpstr, -1);
748a9306
LW
9328 }
9329 else if (term == '`') {
79072805 9330 op_type = OP_BACKTICK;
45977657 9331 SvIV_set(tmpstr, '\\');
748a9306 9332 }
79072805
LW
9333
9334 CLINE;
57843af0 9335 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9336 PL_multi_open = PL_multi_close = '<';
9337 term = *PL_tokenbuf;
0244c3a4 9338 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
9339 char * const bufptr = PL_sublex_info.super_bufptr;
9340 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9341 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9342 s = strchr(bufptr, '\n');
9343 if (!s)
9344 s = bufend;
9345 d = s;
9346 while (s < bufend &&
9347 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9348 if (*s++ == '\n')
57843af0 9349 CopLINE_inc(PL_curcop);
0244c3a4
GS
9350 }
9351 if (s >= bufend) {
eb160463 9352 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9353 missingterm(PL_tokenbuf);
9354 }
9355 sv_setpvn(herewas,bufptr,d-bufptr+1);
9356 sv_setpvn(tmpstr,d+1,s-d);
9357 s += len - 1;
9358 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9359 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9360
9361 s = olds;
9362 goto retval;
9363 }
9364 else if (!outer) {
79072805 9365 d = s;
3280af22
NIS
9366 while (s < PL_bufend &&
9367 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9368 if (*s++ == '\n')
57843af0 9369 CopLINE_inc(PL_curcop);
79072805 9370 }
3280af22 9371 if (s >= PL_bufend) {
eb160463 9372 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9373 missingterm(PL_tokenbuf);
79072805
LW
9374 }
9375 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9376#ifdef PERL_MAD
9377 if (PL_madskills) {
cd81e915
NC
9378 if (PL_thisstuff)
9379 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9380 else
cd81e915 9381 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9382 stuffstart = s - SvPVX(PL_linestr);
9383 }
9384#endif
79072805 9385 s += len - 1;
57843af0 9386 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9387
3280af22
NIS
9388 sv_catpvn(herewas,s,PL_bufend-s);
9389 sv_setsv(PL_linestr,herewas);
9390 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9391 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9392 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9393 }
9394 else
76f68e9b 9395 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9396 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9397#ifdef PERL_MAD
9398 if (PL_madskills) {
9399 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9400 if (PL_thisstuff)
9401 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9402 else
cd81e915 9403 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9404 }
9405#endif
f0e67a1d 9406 PL_bufptr = s;
17cc9359 9407 CopLINE_inc(PL_curcop);
f0e67a1d 9408 if (!outer || !lex_next_chunk(0)) {
eb160463 9409 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9410 missingterm(PL_tokenbuf);
79072805 9411 }
17cc9359 9412 CopLINE_dec(PL_curcop);
f0e67a1d 9413 s = PL_bufptr;
5db06880
NC
9414#ifdef PERL_MAD
9415 stuffstart = s - SvPVX(PL_linestr);
9416#endif
57843af0 9417 CopLINE_inc(PL_curcop);
3280af22 9418 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9419 PL_last_lop = PL_last_uni = NULL;
6a27c188 9420#ifndef PERL_STRICT_CR
3280af22 9421 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9422 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9423 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9424 {
3280af22
NIS
9425 PL_bufend[-2] = '\n';
9426 PL_bufend--;
95a20fc0 9427 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9428 }
3280af22
NIS
9429 else if (PL_bufend[-1] == '\r')
9430 PL_bufend[-1] = '\n';
f63a84b2 9431 }
3280af22
NIS
9432 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9433 PL_bufend[-1] = '\n';
f63a84b2 9434#endif
3280af22 9435 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9436 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9437 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9438 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9439 sv_catsv(PL_linestr,herewas);
9440 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9441 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9442 }
9443 else {
3280af22
NIS
9444 s = PL_bufend;
9445 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9446 }
9447 }
79072805 9448 s++;
0244c3a4 9449retval:
57843af0 9450 PL_multi_end = CopLINE(PL_curcop);
79072805 9451 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9452 SvPV_shrink_to_cur(tmpstr);
79072805 9453 }
8990e307 9454 SvREFCNT_dec(herewas);
2f31ce75 9455 if (!IN_BYTES) {
95a20fc0 9456 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9457 SvUTF8_on(tmpstr);
9458 else if (PL_encoding)
9459 sv_recode_to_utf8(tmpstr, PL_encoding);
9460 }
3280af22 9461 PL_lex_stuff = tmpstr;
6154021b 9462 pl_yylval.ival = op_type;
79072805
LW
9463 return s;
9464}
9465
02aa26ce
NT
9466/* scan_inputsymbol
9467 takes: current position in input buffer
9468 returns: new position in input buffer
6154021b 9469 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9470
9471 This code handles:
9472
9473 <> read from ARGV
9474 <FH> read from filehandle
9475 <pkg::FH> read from package qualified filehandle
9476 <pkg'FH> read from package qualified filehandle
9477 <$fh> read from filehandle in $fh
9478 <*.h> filename glob
9479
9480*/
9481
76e3520e 9482STATIC char *
cea2e8a9 9483S_scan_inputsymbol(pTHX_ char *start)
79072805 9484{
97aff369 9485 dVAR;
02aa26ce 9486 register char *s = start; /* current position in buffer */
1b420867 9487 char *end;
79072805 9488 I32 len;
6136c704
AL
9489 char *d = PL_tokenbuf; /* start of temp holding space */
9490 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9491
7918f24d
NC
9492 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9493
1b420867
GS
9494 end = strchr(s, '\n');
9495 if (!end)
9496 end = PL_bufend;
9497 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9498
9499 /* die if we didn't have space for the contents of the <>,
1b420867 9500 or if it didn't end, or if we see a newline
02aa26ce
NT
9501 */
9502
bb7a0f54 9503 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9504 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9505 if (s >= end)
cea2e8a9 9506 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9507
fc36a67e 9508 s++;
02aa26ce
NT
9509
9510 /* check for <$fh>
9511 Remember, only scalar variables are interpreted as filehandles by
9512 this code. Anything more complex (e.g., <$fh{$num}>) will be
9513 treated as a glob() call.
9514 This code makes use of the fact that except for the $ at the front,
9515 a scalar variable and a filehandle look the same.
9516 */
4633a7c4 9517 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9518
9519 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9520 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 9521 d++;
02aa26ce
NT
9522
9523 /* If we've tried to read what we allow filehandles to look like, and
9524 there's still text left, then it must be a glob() and not a getline.
9525 Use scan_str to pull out the stuff between the <> and treat it
9526 as nothing more than a string.
9527 */
9528
3280af22 9529 if (d - PL_tokenbuf != len) {
6154021b 9530 pl_yylval.ival = OP_GLOB;
5db06880 9531 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9532 if (!s)
cea2e8a9 9533 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9534 return s;
9535 }
395c3793 9536 else {
9b3023bc 9537 bool readline_overriden = FALSE;
6136c704 9538 GV *gv_readline;
9b3023bc 9539 GV **gvp;
02aa26ce 9540 /* we're in a filehandle read situation */
3280af22 9541 d = PL_tokenbuf;
02aa26ce
NT
9542
9543 /* turn <> into <ARGV> */
79072805 9544 if (!len)
689badd5 9545 Copy("ARGV",d,5,char);
02aa26ce 9546
9b3023bc 9547 /* Check whether readline() is overriden */
fafc274c 9548 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9549 if ((gv_readline
ba979b31 9550 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9551 ||
017a3ce5 9552 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9553 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9554 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9555 readline_overriden = TRUE;
9556
02aa26ce
NT
9557 /* if <$fh>, create the ops to turn the variable into a
9558 filehandle
9559 */
79072805 9560 if (*d == '$') {
02aa26ce
NT
9561 /* try to find it in the pad for this block, otherwise find
9562 add symbol table ops
9563 */
f8f98e0a 9564 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 9565 if (tmp != NOT_IN_PAD) {
00b1698f 9566 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9567 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9568 HEK * const stashname = HvNAME_HEK(stash);
9569 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9570 sv_catpvs(sym, "::");
f558d5af
JH
9571 sv_catpv(sym, d+1);
9572 d = SvPVX(sym);
9573 goto intro_sym;
9574 }
9575 else {
6136c704 9576 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9577 o->op_targ = tmp;
9b3023bc
RGS
9578 PL_lex_op = readline_overriden
9579 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9580 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9581 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9582 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9583 }
a0d0e21e
LW
9584 }
9585 else {
f558d5af
JH
9586 GV *gv;
9587 ++d;
9588intro_sym:
9589 gv = gv_fetchpv(d,
9590 (PL_in_eval
9591 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 9592 : GV_ADDMULTI),
f558d5af 9593 SVt_PV);
9b3023bc
RGS
9594 PL_lex_op = readline_overriden
9595 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9596 op_append_elem(OP_LIST,
9b3023bc
RGS
9597 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9598 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9599 : (OP*)newUNOP(OP_READLINE, 0,
9600 newUNOP(OP_RV2SV, 0,
9601 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9602 }
7c6fadd6
RGS
9603 if (!readline_overriden)
9604 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9605 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9606 pl_yylval.ival = OP_NULL;
79072805 9607 }
02aa26ce
NT
9608
9609 /* If it's none of the above, it must be a literal filehandle
9610 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9611 else {
6136c704 9612 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
9613 PL_lex_op = readline_overriden
9614 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9615 op_append_elem(OP_LIST,
9b3023bc
RGS
9616 newGVOP(OP_GV, 0, gv),
9617 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9618 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9619 pl_yylval.ival = OP_NULL;
79072805
LW
9620 }
9621 }
02aa26ce 9622
79072805
LW
9623 return s;
9624}
9625
02aa26ce
NT
9626
9627/* scan_str
9628 takes: start position in buffer
09bef843
SB
9629 keep_quoted preserve \ on the embedded delimiter(s)
9630 keep_delims preserve the delimiters around the string
02aa26ce
NT
9631 returns: position to continue reading from buffer
9632 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9633 updates the read buffer.
9634
9635 This subroutine pulls a string out of the input. It is called for:
9636 q single quotes q(literal text)
9637 ' single quotes 'literal text'
9638 qq double quotes qq(interpolate $here please)
9639 " double quotes "interpolate $here please"
9640 qx backticks qx(/bin/ls -l)
9641 ` backticks `/bin/ls -l`
9642 qw quote words @EXPORT_OK = qw( func() $spam )
9643 m// regexp match m/this/
9644 s/// regexp substitute s/this/that/
9645 tr/// string transliterate tr/this/that/
9646 y/// string transliterate y/this/that/
9647 ($*@) sub prototypes sub foo ($)
09bef843 9648 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9649 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9650
9651 In most of these cases (all but <>, patterns and transliterate)
9652 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9653 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9654 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9655 calls scan_str().
4e553d73 9656
02aa26ce
NT
9657 It skips whitespace before the string starts, and treats the first
9658 character as the delimiter. If the delimiter is one of ([{< then
9659 the corresponding "close" character )]}> is used as the closing
9660 delimiter. It allows quoting of delimiters, and if the string has
9661 balanced delimiters ([{<>}]) it allows nesting.
9662
37fd879b
HS
9663 On success, the SV with the resulting string is put into lex_stuff or,
9664 if that is already non-NULL, into lex_repl. The second case occurs only
9665 when parsing the RHS of the special constructs s/// and tr/// (y///).
9666 For convenience, the terminating delimiter character is stuffed into
9667 SvIVX of the SV.
02aa26ce
NT
9668*/
9669
76e3520e 9670STATIC char *
09bef843 9671S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9672{
97aff369 9673 dVAR;
02aa26ce 9674 SV *sv; /* scalar value: string */
d3fcec1f 9675 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9676 register char *s = start; /* current position in the buffer */
9677 register char term; /* terminating character */
9678 register char *to; /* current position in the sv's data */
9679 I32 brackets = 1; /* bracket nesting level */
89491803 9680 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9681 I32 termcode; /* terminating char. code */
89ebb4a3 9682 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9683 STRLEN termlen; /* length of terminating string */
0331ef07 9684 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9685#ifdef PERL_MAD
9686 int stuffstart;
9687 char *tstart;
9688#endif
02aa26ce 9689
7918f24d
NC
9690 PERL_ARGS_ASSERT_SCAN_STR;
9691
02aa26ce 9692 /* skip space before the delimiter */
29595ff2
NC
9693 if (isSPACE(*s)) {
9694 s = PEEKSPACE(s);
9695 }
02aa26ce 9696
5db06880 9697#ifdef PERL_MAD
cd81e915
NC
9698 if (PL_realtokenstart >= 0) {
9699 stuffstart = PL_realtokenstart;
9700 PL_realtokenstart = -1;
5db06880
NC
9701 }
9702 else
9703 stuffstart = start - SvPVX(PL_linestr);
9704#endif
02aa26ce 9705 /* mark where we are, in case we need to report errors */
79072805 9706 CLINE;
02aa26ce
NT
9707
9708 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9709 term = *s;
220e2d4e
IH
9710 if (!UTF) {
9711 termcode = termstr[0] = term;
9712 termlen = 1;
9713 }
9714 else {
f3b9ce0f 9715 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9716 Copy(s, termstr, termlen, U8);
9717 if (!UTF8_IS_INVARIANT(term))
9718 has_utf8 = TRUE;
9719 }
b1c7b182 9720
02aa26ce 9721 /* mark where we are */
57843af0 9722 PL_multi_start = CopLINE(PL_curcop);
3280af22 9723 PL_multi_open = term;
02aa26ce
NT
9724
9725 /* find corresponding closing delimiter */
93a17b20 9726 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9727 termcode = termstr[0] = term = tmps[5];
9728
3280af22 9729 PL_multi_close = term;
79072805 9730
561b68a9
SH
9731 /* create a new SV to hold the contents. 79 is the SV's initial length.
9732 What a random number. */
7d0a29fe
NC
9733 sv = newSV_type(SVt_PVIV);
9734 SvGROW(sv, 80);
45977657 9735 SvIV_set(sv, termcode);
a0d0e21e 9736 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9737
9738 /* move past delimiter and try to read a complete string */
09bef843 9739 if (keep_delims)
220e2d4e
IH
9740 sv_catpvn(sv, s, termlen);
9741 s += termlen;
5db06880
NC
9742#ifdef PERL_MAD
9743 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9744 if (!PL_thisopen && !keep_delims) {
9745 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9746 stuffstart = s - SvPVX(PL_linestr);
9747 }
9748#endif
93a17b20 9749 for (;;) {
220e2d4e
IH
9750 if (PL_encoding && !UTF) {
9751 bool cont = TRUE;
9752
9753 while (cont) {
95a20fc0 9754 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9755 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9756 &offset, (char*)termstr, termlen);
6136c704
AL
9757 const char * const ns = SvPVX_const(PL_linestr) + offset;
9758 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9759
9760 for (; s < ns; s++) {
9761 if (*s == '\n' && !PL_rsfp)
9762 CopLINE_inc(PL_curcop);
9763 }
9764 if (!found)
9765 goto read_more_line;
9766 else {
9767 /* handle quoted delimiters */
52327caf 9768 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9769 const char *t;
95a20fc0 9770 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9771 t--;
9772 if ((svlast-1 - t) % 2) {
9773 if (!keep_quoted) {
9774 *(svlast-1) = term;
9775 *svlast = '\0';
9776 SvCUR_set(sv, SvCUR(sv) - 1);
9777 }
9778 continue;
9779 }
9780 }
9781 if (PL_multi_open == PL_multi_close) {
9782 cont = FALSE;
9783 }
9784 else {
f54cb97a
AL
9785 const char *t;
9786 char *w;
0331ef07 9787 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9788 /* At here, all closes are "was quoted" one,
9789 so we don't check PL_multi_close. */
9790 if (*t == '\\') {
9791 if (!keep_quoted && *(t+1) == PL_multi_open)
9792 t++;
9793 else
9794 *w++ = *t++;
9795 }
9796 else if (*t == PL_multi_open)
9797 brackets++;
9798
9799 *w = *t;
9800 }
9801 if (w < t) {
9802 *w++ = term;
9803 *w = '\0';
95a20fc0 9804 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9805 }
0331ef07 9806 last_off = w - SvPVX(sv);
220e2d4e
IH
9807 if (--brackets <= 0)
9808 cont = FALSE;
9809 }
9810 }
9811 }
9812 if (!keep_delims) {
9813 SvCUR_set(sv, SvCUR(sv) - 1);
9814 *SvEND(sv) = '\0';
9815 }
9816 break;
9817 }
9818
02aa26ce 9819 /* extend sv if need be */
3280af22 9820 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9821 /* set 'to' to the next character in the sv's string */
463ee0b2 9822 to = SvPVX(sv)+SvCUR(sv);
09bef843 9823
02aa26ce 9824 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9825 if (PL_multi_open == PL_multi_close) {
9826 for (; s < PL_bufend; s++,to++) {
02aa26ce 9827 /* embedded newlines increment the current line number */
3280af22 9828 if (*s == '\n' && !PL_rsfp)
57843af0 9829 CopLINE_inc(PL_curcop);
02aa26ce 9830 /* handle quoted delimiters */
3280af22 9831 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9832 if (!keep_quoted && s[1] == term)
a0d0e21e 9833 s++;
02aa26ce 9834 /* any other quotes are simply copied straight through */
a0d0e21e
LW
9835 else
9836 *to++ = *s++;
9837 }
02aa26ce
NT
9838 /* terminate when run out of buffer (the for() condition), or
9839 have found the terminator */
220e2d4e
IH
9840 else if (*s == term) {
9841 if (termlen == 1)
9842 break;
f3b9ce0f 9843 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
9844 break;
9845 }
63cd0674 9846 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9847 has_utf8 = TRUE;
93a17b20
LW
9848 *to = *s;
9849 }
9850 }
02aa26ce
NT
9851
9852 /* if the terminator isn't the same as the start character (e.g.,
9853 matched brackets), we have to allow more in the quoting, and
9854 be prepared for nested brackets.
9855 */
93a17b20 9856 else {
02aa26ce 9857 /* read until we run out of string, or we find the terminator */
3280af22 9858 for (; s < PL_bufend; s++,to++) {
02aa26ce 9859 /* embedded newlines increment the line count */
3280af22 9860 if (*s == '\n' && !PL_rsfp)
57843af0 9861 CopLINE_inc(PL_curcop);
02aa26ce 9862 /* backslashes can escape the open or closing characters */
3280af22 9863 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
9864 if (!keep_quoted &&
9865 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
9866 s++;
9867 else
9868 *to++ = *s++;
9869 }
02aa26ce 9870 /* allow nested opens and closes */
3280af22 9871 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 9872 break;
3280af22 9873 else if (*s == PL_multi_open)
93a17b20 9874 brackets++;
63cd0674 9875 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9876 has_utf8 = TRUE;
93a17b20
LW
9877 *to = *s;
9878 }
9879 }
02aa26ce 9880 /* terminate the copied string and update the sv's end-of-string */
93a17b20 9881 *to = '\0';
95a20fc0 9882 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 9883
02aa26ce
NT
9884 /*
9885 * this next chunk reads more into the buffer if we're not done yet
9886 */
9887
b1c7b182
GS
9888 if (s < PL_bufend)
9889 break; /* handle case where we are done yet :-) */
79072805 9890
6a27c188 9891#ifndef PERL_STRICT_CR
95a20fc0 9892 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
9893 if ((to[-2] == '\r' && to[-1] == '\n') ||
9894 (to[-2] == '\n' && to[-1] == '\r'))
9895 {
f63a84b2
LW
9896 to[-2] = '\n';
9897 to--;
95a20fc0 9898 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
9899 }
9900 else if (to[-1] == '\r')
9901 to[-1] = '\n';
9902 }
95a20fc0 9903 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
9904 to[-1] = '\n';
9905#endif
9906
220e2d4e 9907 read_more_line:
02aa26ce
NT
9908 /* if we're out of file, or a read fails, bail and reset the current
9909 line marker so we can report where the unterminated string began
9910 */
5db06880
NC
9911#ifdef PERL_MAD
9912 if (PL_madskills) {
c35e046a 9913 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9914 if (PL_thisstuff)
9915 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9916 else
cd81e915 9917 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9918 }
9919#endif
f0e67a1d
Z
9920 CopLINE_inc(PL_curcop);
9921 PL_bufptr = PL_bufend;
9922 if (!lex_next_chunk(0)) {
c07a80fd 9923 sv_free(sv);
eb160463 9924 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 9925 return NULL;
79072805 9926 }
f0e67a1d 9927 s = PL_bufptr;
5db06880
NC
9928#ifdef PERL_MAD
9929 stuffstart = 0;
9930#endif
378cc40b 9931 }
4e553d73 9932
02aa26ce
NT
9933 /* at this point, we have successfully read the delimited string */
9934
220e2d4e 9935 if (!PL_encoding || UTF) {
5db06880
NC
9936#ifdef PERL_MAD
9937 if (PL_madskills) {
c35e046a 9938 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 9939 const int len = s - tstart;
cd81e915 9940 if (PL_thisstuff)
c35e046a 9941 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9942 else
c35e046a 9943 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9944 if (!PL_thisclose && !keep_delims)
9945 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
9946 }
9947#endif
9948
220e2d4e
IH
9949 if (keep_delims)
9950 sv_catpvn(sv, s, termlen);
9951 s += termlen;
9952 }
5db06880
NC
9953#ifdef PERL_MAD
9954 else {
9955 if (PL_madskills) {
c35e046a
AL
9956 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9957 const int len = s - tstart - termlen;
cd81e915 9958 if (PL_thisstuff)
c35e046a 9959 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9960 else
c35e046a 9961 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9962 if (!PL_thisclose && !keep_delims)
9963 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
9964 }
9965 }
9966#endif
220e2d4e 9967 if (has_utf8 || PL_encoding)
b1c7b182 9968 SvUTF8_on(sv);
d0063567 9969
57843af0 9970 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
9971
9972 /* if we allocated too much space, give some back */
93a17b20
LW
9973 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9974 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 9975 SvPV_renew(sv, SvLEN(sv));
79072805 9976 }
02aa26ce
NT
9977
9978 /* decide whether this is the first or second quoted string we've read
9979 for this op
9980 */
4e553d73 9981
3280af22
NIS
9982 if (PL_lex_stuff)
9983 PL_lex_repl = sv;
79072805 9984 else
3280af22 9985 PL_lex_stuff = sv;
378cc40b
LW
9986 return s;
9987}
9988
02aa26ce
NT
9989/*
9990 scan_num
9991 takes: pointer to position in buffer
9992 returns: pointer to new position in buffer
6154021b 9993 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
9994
9995 Read a number in any of the formats that Perl accepts:
9996
7fd134d9
JH
9997 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9998 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
9999 0b[01](_?[01])*
10000 0[0-7](_?[0-7])*
10001 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10002
3280af22 10003 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10004 thing it reads.
10005
10006 If it reads a number without a decimal point or an exponent, it will
10007 try converting the number to an integer and see if it can do so
10008 without loss of precision.
10009*/
4e553d73 10010
378cc40b 10011char *
bfed75c6 10012Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10013{
97aff369 10014 dVAR;
bfed75c6 10015 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10016 register char *d; /* destination in temp buffer */
10017 register char *e; /* end of temp buffer */
86554af2 10018 NV nv; /* number read, as a double */
a0714e2c 10019 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10020 bool floatit; /* boolean: int or float? */
cbbf8932 10021 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10022 static char const number_too_long[] = "Number too long";
378cc40b 10023
7918f24d
NC
10024 PERL_ARGS_ASSERT_SCAN_NUM;
10025
02aa26ce
NT
10026 /* We use the first character to decide what type of number this is */
10027
378cc40b 10028 switch (*s) {
79072805 10029 default:
cea2e8a9 10030 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 10031
02aa26ce 10032 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10033 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10034 case '0':
10035 {
02aa26ce
NT
10036 /* variables:
10037 u holds the "number so far"
4f19785b
WSI
10038 shift the power of 2 of the base
10039 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10040 overflowed was the number more than we can hold?
10041
10042 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10043 we in octal/hex/binary?" indicator to disallow hex characters
10044 when in octal mode.
02aa26ce 10045 */
9e24b6e2
JH
10046 NV n = 0.0;
10047 UV u = 0;
79072805 10048 I32 shift;
9e24b6e2 10049 bool overflowed = FALSE;
61f33854 10050 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10051 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10052 static const char* const bases[5] =
10053 { "", "binary", "", "octal", "hexadecimal" };
10054 static const char* const Bases[5] =
10055 { "", "Binary", "", "Octal", "Hexadecimal" };
10056 static const char* const maxima[5] =
10057 { "",
10058 "0b11111111111111111111111111111111",
10059 "",
10060 "037777777777",
10061 "0xffffffff" };
bfed75c6 10062 const char *base, *Base, *max;
378cc40b 10063
02aa26ce 10064 /* check for hex */
a674e8db 10065 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10066 shift = 4;
10067 s += 2;
61f33854 10068 just_zero = FALSE;
a674e8db 10069 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10070 shift = 1;
10071 s += 2;
61f33854 10072 just_zero = FALSE;
378cc40b 10073 }
02aa26ce 10074 /* check for a decimal in disguise */
b78218b7 10075 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10076 goto decimal;
02aa26ce 10077 /* so it must be octal */
928753ea 10078 else {
378cc40b 10079 shift = 3;
928753ea
JH
10080 s++;
10081 }
10082
10083 if (*s == '_') {
a2a5de95 10084 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10085 "Misplaced _ in number");
10086 lastub = s++;
10087 }
9e24b6e2
JH
10088
10089 base = bases[shift];
10090 Base = Bases[shift];
10091 max = maxima[shift];
02aa26ce 10092
4f19785b 10093 /* read the rest of the number */
378cc40b 10094 for (;;) {
9e24b6e2 10095 /* x is used in the overflow test,
893fe2c2 10096 b is the digit we're adding on. */
9e24b6e2 10097 UV x, b;
55497cff 10098
378cc40b 10099 switch (*s) {
02aa26ce
NT
10100
10101 /* if we don't mention it, we're done */
378cc40b
LW
10102 default:
10103 goto out;
02aa26ce 10104
928753ea 10105 /* _ are ignored -- but warned about if consecutive */
de3bb511 10106 case '_':
a2a5de95
NC
10107 if (lastub && s == lastub + 1)
10108 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10109 "Misplaced _ in number");
928753ea 10110 lastub = s++;
de3bb511 10111 break;
02aa26ce
NT
10112
10113 /* 8 and 9 are not octal */
378cc40b 10114 case '8': case '9':
4f19785b 10115 if (shift == 3)
cea2e8a9 10116 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10117 /* FALL THROUGH */
02aa26ce
NT
10118
10119 /* octal digits */
4f19785b 10120 case '2': case '3': case '4':
378cc40b 10121 case '5': case '6': case '7':
4f19785b 10122 if (shift == 1)
cea2e8a9 10123 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10124 /* FALL THROUGH */
10125
10126 case '0': case '1':
02aa26ce 10127 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10128 goto digit;
02aa26ce
NT
10129
10130 /* hex digits */
378cc40b
LW
10131 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10132 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10133 /* make sure they said 0x */
378cc40b
LW
10134 if (shift != 4)
10135 goto out;
55497cff 10136 b = (*s++ & 7) + 9;
02aa26ce
NT
10137
10138 /* Prepare to put the digit we have onto the end
10139 of the number so far. We check for overflows.
10140 */
10141
55497cff 10142 digit:
61f33854 10143 just_zero = FALSE;
9e24b6e2
JH
10144 if (!overflowed) {
10145 x = u << shift; /* make room for the digit */
10146
10147 if ((x >> shift) != u
10148 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10149 overflowed = TRUE;
10150 n = (NV) u;
9b387841
NC
10151 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10152 "Integer overflow in %s number",
10153 base);
9e24b6e2
JH
10154 } else
10155 u = x | b; /* add the digit to the end */
10156 }
10157 if (overflowed) {
10158 n *= nvshift[shift];
10159 /* If an NV has not enough bits in its
10160 * mantissa to represent an UV this summing of
10161 * small low-order numbers is a waste of time
10162 * (because the NV cannot preserve the
10163 * low-order bits anyway): we could just
10164 * remember when did we overflow and in the
10165 * end just multiply n by the right
10166 * amount. */
10167 n += (NV) b;
55497cff 10168 }
378cc40b
LW
10169 break;
10170 }
10171 }
02aa26ce
NT
10172
10173 /* if we get here, we had success: make a scalar value from
10174 the number.
10175 */
378cc40b 10176 out:
928753ea
JH
10177
10178 /* final misplaced underbar check */
10179 if (s[-1] == '_') {
a2a5de95 10180 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10181 }
10182
9e24b6e2 10183 if (overflowed) {
a2a5de95
NC
10184 if (n > 4294967295.0)
10185 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10186 "%s number > %s non-portable",
10187 Base, max);
b081dd7e 10188 sv = newSVnv(n);
9e24b6e2
JH
10189 }
10190 else {
15041a67 10191#if UVSIZE > 4
a2a5de95
NC
10192 if (u > 0xffffffff)
10193 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10194 "%s number > %s non-portable",
10195 Base, max);
2cc4c2dc 10196#endif
b081dd7e 10197 sv = newSVuv(u);
9e24b6e2 10198 }
61f33854 10199 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10200 sv = new_constant(start, s - start, "integer",
eb0d8d16 10201 sv, NULL, NULL, 0);
61f33854 10202 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10203 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10204 }
10205 break;
02aa26ce
NT
10206
10207 /*
10208 handle decimal numbers.
10209 we're also sent here when we read a 0 as the first digit
10210 */
378cc40b
LW
10211 case '1': case '2': case '3': case '4': case '5':
10212 case '6': case '7': case '8': case '9': case '.':
10213 decimal:
3280af22
NIS
10214 d = PL_tokenbuf;
10215 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10216 floatit = FALSE;
02aa26ce
NT
10217
10218 /* read next group of digits and _ and copy into d */
de3bb511 10219 while (isDIGIT(*s) || *s == '_') {
4e553d73 10220 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10221 if -w is on
10222 */
93a17b20 10223 if (*s == '_') {
a2a5de95
NC
10224 if (lastub && s == lastub + 1)
10225 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10226 "Misplaced _ in number");
928753ea 10227 lastub = s++;
93a17b20 10228 }
fc36a67e 10229 else {
02aa26ce 10230 /* check for end of fixed-length buffer */
fc36a67e 10231 if (d >= e)
cea2e8a9 10232 Perl_croak(aTHX_ number_too_long);
02aa26ce 10233 /* if we're ok, copy the character */
378cc40b 10234 *d++ = *s++;
fc36a67e 10235 }
378cc40b 10236 }
02aa26ce
NT
10237
10238 /* final misplaced underbar check */
928753ea 10239 if (lastub && s == lastub + 1) {
a2a5de95 10240 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10241 }
02aa26ce
NT
10242
10243 /* read a decimal portion if there is one. avoid
10244 3..5 being interpreted as the number 3. followed
10245 by .5
10246 */
2f3197b3 10247 if (*s == '.' && s[1] != '.') {
79072805 10248 floatit = TRUE;
378cc40b 10249 *d++ = *s++;
02aa26ce 10250
928753ea 10251 if (*s == '_') {
a2a5de95
NC
10252 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10253 "Misplaced _ in number");
928753ea
JH
10254 lastub = s;
10255 }
10256
10257 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10258 */
fc36a67e 10259 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10260 /* fixed length buffer check */
fc36a67e 10261 if (d >= e)
cea2e8a9 10262 Perl_croak(aTHX_ number_too_long);
928753ea 10263 if (*s == '_') {
a2a5de95
NC
10264 if (lastub && s == lastub + 1)
10265 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10266 "Misplaced _ in number");
928753ea
JH
10267 lastub = s;
10268 }
10269 else
fc36a67e 10270 *d++ = *s;
378cc40b 10271 }
928753ea
JH
10272 /* fractional part ending in underbar? */
10273 if (s[-1] == '_') {
a2a5de95
NC
10274 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10275 "Misplaced _ in number");
928753ea 10276 }
dd629d5b
GS
10277 if (*s == '.' && isDIGIT(s[1])) {
10278 /* oops, it's really a v-string, but without the "v" */
f4758303 10279 s = start;
dd629d5b
GS
10280 goto vstring;
10281 }
378cc40b 10282 }
02aa26ce
NT
10283
10284 /* read exponent part, if present */
3792a11b 10285 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10286 floatit = TRUE;
10287 s++;
02aa26ce
NT
10288
10289 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10290 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10291
7fd134d9
JH
10292 /* stray preinitial _ */
10293 if (*s == '_') {
a2a5de95
NC
10294 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10295 "Misplaced _ in number");
7fd134d9
JH
10296 lastub = s++;
10297 }
10298
02aa26ce 10299 /* allow positive or negative exponent */
378cc40b
LW
10300 if (*s == '+' || *s == '-')
10301 *d++ = *s++;
02aa26ce 10302
7fd134d9
JH
10303 /* stray initial _ */
10304 if (*s == '_') {
a2a5de95
NC
10305 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10306 "Misplaced _ in number");
7fd134d9
JH
10307 lastub = s++;
10308 }
10309
7fd134d9
JH
10310 /* read digits of exponent */
10311 while (isDIGIT(*s) || *s == '_') {
10312 if (isDIGIT(*s)) {
10313 if (d >= e)
10314 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10315 *d++ = *s++;
7fd134d9
JH
10316 }
10317 else {
041457d9 10318 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10319 (!isDIGIT(s[1]) && s[1] != '_')))
10320 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10321 "Misplaced _ in number");
b3b48e3e 10322 lastub = s++;
7fd134d9 10323 }
7fd134d9 10324 }
378cc40b 10325 }
02aa26ce 10326
02aa26ce 10327
0b7fceb9 10328 /*
58bb9ec3
NC
10329 We try to do an integer conversion first if no characters
10330 indicating "float" have been found.
0b7fceb9
MU
10331 */
10332
10333 if (!floatit) {
58bb9ec3 10334 UV uv;
6136c704 10335 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10336
10337 if (flags == IS_NUMBER_IN_UV) {
10338 if (uv <= IV_MAX)
b081dd7e 10339 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10340 else
b081dd7e 10341 sv = newSVuv(uv);
58bb9ec3
NC
10342 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10343 if (uv <= (UV) IV_MIN)
b081dd7e 10344 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10345 else
10346 floatit = TRUE;
10347 } else
10348 floatit = TRUE;
10349 }
0b7fceb9 10350 if (floatit) {
58bb9ec3
NC
10351 /* terminate the string */
10352 *d = '\0';
86554af2 10353 nv = Atof(PL_tokenbuf);
b081dd7e 10354 sv = newSVnv(nv);
86554af2 10355 }
86554af2 10356
eb0d8d16
NC
10357 if ( floatit
10358 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10359 const char *const key = floatit ? "float" : "integer";
10360 const STRLEN keylen = floatit ? 5 : 7;
10361 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10362 key, keylen, sv, NULL, NULL, 0);
10363 }
378cc40b 10364 break;
0b7fceb9 10365
e312add1 10366 /* if it starts with a v, it could be a v-string */
a7cb1f99 10367 case 'v':
dd629d5b 10368vstring:
561b68a9 10369 sv = newSV(5); /* preallocate storage space */
65b06e02 10370 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10371 break;
79072805 10372 }
a687059c 10373
02aa26ce
NT
10374 /* make the op for the constant and return */
10375
a86a20aa 10376 if (sv)
b73d6f50 10377 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10378 else
5f66b61c 10379 lvalp->opval = NULL;
a687059c 10380
73d840c0 10381 return (char *)s;
378cc40b
LW
10382}
10383
76e3520e 10384STATIC char *
cea2e8a9 10385S_scan_formline(pTHX_ register char *s)
378cc40b 10386{
97aff369 10387 dVAR;
79072805 10388 register char *eol;
378cc40b 10389 register char *t;
6136c704 10390 SV * const stuff = newSVpvs("");
79072805 10391 bool needargs = FALSE;
c5ee2135 10392 bool eofmt = FALSE;
5db06880
NC
10393#ifdef PERL_MAD
10394 char *tokenstart = s;
4f61fd4b
JC
10395 SV* savewhite = NULL;
10396
5db06880 10397 if (PL_madskills) {
cd81e915
NC
10398 savewhite = PL_thiswhite;
10399 PL_thiswhite = 0;
5db06880
NC
10400 }
10401#endif
378cc40b 10402
7918f24d
NC
10403 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10404
79072805 10405 while (!needargs) {
a1b95068 10406 if (*s == '.') {
c35e046a 10407 t = s+1;
51882d45 10408#ifdef PERL_STRICT_CR
c35e046a
AL
10409 while (SPACE_OR_TAB(*t))
10410 t++;
51882d45 10411#else
c35e046a
AL
10412 while (SPACE_OR_TAB(*t) || *t == '\r')
10413 t++;
51882d45 10414#endif
c5ee2135
WL
10415 if (*t == '\n' || t == PL_bufend) {
10416 eofmt = TRUE;
79072805 10417 break;
c5ee2135 10418 }
79072805 10419 }
3280af22 10420 if (PL_in_eval && !PL_rsfp) {
07409e01 10421 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10422 if (!eol++)
3280af22 10423 eol = PL_bufend;
0f85fab0
LW
10424 }
10425 else
3280af22 10426 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10427 if (*s != '#') {
a0d0e21e
LW
10428 for (t = s; t < eol; t++) {
10429 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10430 needargs = FALSE;
10431 goto enough; /* ~~ must be first line in formline */
378cc40b 10432 }
a0d0e21e
LW
10433 if (*t == '@' || *t == '^')
10434 needargs = TRUE;
378cc40b 10435 }
7121b347
MG
10436 if (eol > s) {
10437 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10438#ifndef PERL_STRICT_CR
7121b347
MG
10439 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10440 char *end = SvPVX(stuff) + SvCUR(stuff);
10441 end[-2] = '\n';
10442 end[-1] = '\0';
b162af07 10443 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10444 }
2dc4c65b 10445#endif
7121b347
MG
10446 }
10447 else
10448 break;
79072805 10449 }
95a20fc0 10450 s = (char*)eol;
3280af22 10451 if (PL_rsfp) {
f0e67a1d 10452 bool got_some;
5db06880
NC
10453#ifdef PERL_MAD
10454 if (PL_madskills) {
cd81e915
NC
10455 if (PL_thistoken)
10456 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10457 else
cd81e915 10458 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10459 }
10460#endif
f0e67a1d
Z
10461 PL_bufptr = PL_bufend;
10462 CopLINE_inc(PL_curcop);
10463 got_some = lex_next_chunk(0);
10464 CopLINE_dec(PL_curcop);
10465 s = PL_bufptr;
5db06880 10466#ifdef PERL_MAD
f0e67a1d 10467 tokenstart = PL_bufptr;
5db06880 10468#endif
f0e67a1d 10469 if (!got_some)
378cc40b 10470 break;
378cc40b 10471 }
463ee0b2 10472 incline(s);
79072805 10473 }
a0d0e21e
LW
10474 enough:
10475 if (SvCUR(stuff)) {
3280af22 10476 PL_expect = XTERM;
79072805 10477 if (needargs) {
3280af22 10478 PL_lex_state = LEX_NORMAL;
cd81e915 10479 start_force(PL_curforce);
9ded7720 10480 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10481 force_next(',');
10482 }
a0d0e21e 10483 else
3280af22 10484 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10485 if (!IN_BYTES) {
95a20fc0 10486 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10487 SvUTF8_on(stuff);
10488 else if (PL_encoding)
10489 sv_recode_to_utf8(stuff, PL_encoding);
10490 }
cd81e915 10491 start_force(PL_curforce);
9ded7720 10492 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10493 force_next(THING);
cd81e915 10494 start_force(PL_curforce);
9ded7720 10495 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10496 force_next(LSTOP);
378cc40b 10497 }
79072805 10498 else {
8990e307 10499 SvREFCNT_dec(stuff);
c5ee2135
WL
10500 if (eofmt)
10501 PL_lex_formbrack = 0;
3280af22 10502 PL_bufptr = s;
79072805 10503 }
5db06880
NC
10504#ifdef PERL_MAD
10505 if (PL_madskills) {
cd81e915
NC
10506 if (PL_thistoken)
10507 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10508 else
cd81e915
NC
10509 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10510 PL_thiswhite = savewhite;
5db06880
NC
10511 }
10512#endif
79072805 10513 return s;
378cc40b 10514}
a687059c 10515
ba6d6ac9 10516I32
864dbfa3 10517Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10518{
97aff369 10519 dVAR;
a3b680e6 10520 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10521 CV* const outsidecv = PL_compcv;
8990e307 10522
3280af22
NIS
10523 if (PL_compcv) {
10524 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10525 }
7766f137 10526 SAVEI32(PL_subline);
3280af22 10527 save_item(PL_subname);
3280af22 10528 SAVESPTR(PL_compcv);
3280af22 10529
ea726b52 10530 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10531 CvFLAGS(PL_compcv) |= flags;
10532
57843af0 10533 PL_subline = CopLINE(PL_curcop);
dd2155a4 10534 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10535 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10536 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10537
8990e307
LW
10538 return oldsavestack_ix;
10539}
10540
084592ab
CN
10541#ifdef __SC__
10542#pragma segment Perl_yylex
10543#endif
af41e527
NC
10544static int
10545S_yywarn(pTHX_ const char *const s)
8990e307 10546{
97aff369 10547 dVAR;
7918f24d
NC
10548
10549 PERL_ARGS_ASSERT_YYWARN;
10550
faef0170 10551 PL_in_eval |= EVAL_WARNONLY;
748a9306 10552 yyerror(s);
faef0170 10553 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10554 return 0;
8990e307
LW
10555}
10556
10557int
15f169a1 10558Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10559{
97aff369 10560 dVAR;
bfed75c6
AL
10561 const char *where = NULL;
10562 const char *context = NULL;
68dc0745 10563 int contlen = -1;
46fc3d4c 10564 SV *msg;
5912531f 10565 int yychar = PL_parser->yychar;
463ee0b2 10566
7918f24d
NC
10567 PERL_ARGS_ASSERT_YYERROR;
10568
3280af22 10569 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10570 where = "at EOF";
8bcfe651
TM
10571 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10572 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10573 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10574 /*
10575 Only for NetWare:
10576 The code below is removed for NetWare because it abends/crashes on NetWare
10577 when the script has error such as not having the closing quotes like:
10578 if ($var eq "value)
10579 Checking of white spaces is anyway done in NetWare code.
10580 */
10581#ifndef NETWARE
3280af22
NIS
10582 while (isSPACE(*PL_oldoldbufptr))
10583 PL_oldoldbufptr++;
f355267c 10584#endif
3280af22
NIS
10585 context = PL_oldoldbufptr;
10586 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10587 }
8bcfe651
TM
10588 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10589 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10590 /*
10591 Only for NetWare:
10592 The code below is removed for NetWare because it abends/crashes on NetWare
10593 when the script has error such as not having the closing quotes like:
10594 if ($var eq "value)
10595 Checking of white spaces is anyway done in NetWare code.
10596 */
10597#ifndef NETWARE
3280af22
NIS
10598 while (isSPACE(*PL_oldbufptr))
10599 PL_oldbufptr++;
f355267c 10600#endif
3280af22
NIS
10601 context = PL_oldbufptr;
10602 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10603 }
10604 else if (yychar > 255)
68dc0745 10605 where = "next token ???";
12fbd33b 10606 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10607 if (PL_lex_state == LEX_NORMAL ||
10608 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10609 where = "at end of line";
3280af22 10610 else if (PL_lex_inpat)
68dc0745 10611 where = "within pattern";
463ee0b2 10612 else
68dc0745 10613 where = "within string";
463ee0b2 10614 }
46fc3d4c 10615 else {
84bafc02 10616 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10617 if (yychar < 32)
cea2e8a9 10618 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10619 else if (isPRINT_LC(yychar)) {
88c9ea1e 10620 const char string = yychar;
5e7aa789
NC
10621 sv_catpvn(where_sv, &string, 1);
10622 }
463ee0b2 10623 else
cea2e8a9 10624 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10625 where = SvPVX_const(where_sv);
463ee0b2 10626 }
46fc3d4c 10627 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10628 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10629 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10630 if (context)
cea2e8a9 10631 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10632 else
cea2e8a9 10633 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10634 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10635 Perl_sv_catpvf(aTHX_ msg,
57def98f 10636 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10637 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10638 PL_multi_end = 0;
a0d0e21e 10639 }
500960a6 10640 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10641 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10642 }
463ee0b2 10643 else
5a844595 10644 qerror(msg);
c7d6bfb2
GS
10645 if (PL_error_count >= 10) {
10646 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10647 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10648 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10649 else
10650 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10651 OutCopFILE(PL_curcop));
c7d6bfb2 10652 }
3280af22 10653 PL_in_my = 0;
5c284bb0 10654 PL_in_my_stash = NULL;
463ee0b2
LW
10655 return 0;
10656}
084592ab
CN
10657#ifdef __SC__
10658#pragma segment Main
10659#endif
4e35701f 10660
b250498f 10661STATIC char*
3ae08724 10662S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10663{
97aff369 10664 dVAR;
f54cb97a 10665 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10666
10667 PERL_ARGS_ASSERT_SWALLOW_BOM;
10668
7aa207d6 10669 switch (s[0]) {
4e553d73
NIS
10670 case 0xFF:
10671 if (s[1] == 0xFE) {
ee6ba15d 10672 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10673 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 10674 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10675#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10676 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10677 s += 2;
dea0fc0b 10678 if (PL_bufend > (char*)s) {
81a923f4 10679 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10680 }
b250498f 10681#else
ee6ba15d 10682 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10683#endif
01ec43d0
GS
10684 }
10685 break;
78ae23f5 10686 case 0xFE:
7aa207d6 10687 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10688#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10689 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10690 s += 2;
10691 if (PL_bufend > (char *)s) {
81a923f4 10692 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10693 }
b250498f 10694#else
ee6ba15d 10695 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10696#endif
01ec43d0
GS
10697 }
10698 break;
3ae08724
GS
10699 case 0xEF:
10700 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10701 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10702 s += 3; /* UTF-8 */
10703 }
10704 break;
10705 case 0:
7aa207d6
JH
10706 if (slen > 3) {
10707 if (s[1] == 0) {
10708 if (s[2] == 0xFE && s[3] == 0xFF) {
10709 /* UTF-32 big-endian */
ee6ba15d 10710 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10711 }
10712 }
10713 else if (s[2] == 0 && s[3] != 0) {
10714 /* Leading bytes
10715 * 00 xx 00 xx
10716 * are a good indicator of UTF-16BE. */
ee6ba15d 10717#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10718 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10719 s = add_utf16_textfilter(s, FALSE);
10720#else
10721 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10722#endif
7aa207d6 10723 }
01ec43d0 10724 }
e294cc5d
JH
10725#ifdef EBCDIC
10726 case 0xDD:
10727 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10728 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10729 s += 4; /* UTF-8 */
10730 }
10731 break;
10732#endif
10733
7aa207d6
JH
10734 default:
10735 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10736 /* Leading bytes
10737 * xx 00 xx 00
10738 * are a good indicator of UTF-16LE. */
ee6ba15d 10739#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10740 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10741 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
10742#else
10743 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10744#endif
7aa207d6 10745 }
01ec43d0 10746 }
b8f84bb2 10747 return (char*)s;
b250498f 10748}
4755096e 10749
6e3aabd6
GS
10750
10751#ifndef PERL_NO_UTF16_FILTER
10752static I32
a28af015 10753S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10754{
97aff369 10755 dVAR;
f3040f2c 10756 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10757 /* We re-use this each time round, throwing the contents away before we
10758 return. */
2a773401 10759 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10760 SV *const utf8_buffer = filter;
c28d6105 10761 IV status = IoPAGE(filter);
f2338a2e 10762 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10763 I32 retval;
c8b0cbae 10764
c85ae797
NC
10765 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10766
c8b0cbae
NC
10767 /* As we're automatically added, at the lowest level, and hence only called
10768 from this file, we can be sure that we're not called in block mode. Hence
10769 don't bother writing code to deal with block mode. */
10770 if (maxlen) {
10771 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10772 }
c28d6105
NC
10773 if (status < 0) {
10774 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10775 }
1de9afcd 10776 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10777 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10778 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10779 reverse ? 'l' : 'b', idx, maxlen, status,
10780 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10781
10782 while (1) {
10783 STRLEN chars;
10784 STRLEN have;
dea0fc0b 10785 I32 newlen;
2a773401 10786 U8 *end;
c28d6105
NC
10787 /* First, look in our buffer of existing UTF-8 data: */
10788 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10789
10790 if (nl) {
10791 ++nl;
10792 } else if (status == 0) {
10793 /* EOF */
10794 IoPAGE(filter) = 0;
10795 nl = SvEND(utf8_buffer);
10796 }
10797 if (nl) {
d2d1d4de
NC
10798 STRLEN got = nl - SvPVX(utf8_buffer);
10799 /* Did we have anything to append? */
10800 retval = got != 0;
10801 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10802 /* Everything else in this code works just fine if SVp_POK isn't
10803 set. This, however, needs it, and we need it to work, else
10804 we loop infinitely because the buffer is never consumed. */
10805 sv_chop(utf8_buffer, nl);
10806 break;
10807 }
ba77e4cc 10808
c28d6105
NC
10809 /* OK, not a complete line there, so need to read some more UTF-16.
10810 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10811 while (1) {
10812 if (status <= 0)
10813 break;
10814 if (SvCUR(utf16_buffer) >= 2) {
10815 /* Location of the high octet of the last complete code point.
10816 Gosh, UTF-16 is a pain. All the benefits of variable length,
10817 *coupled* with all the benefits of partial reads and
10818 endianness. */
10819 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10820 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10821
10822 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10823 break;
10824 }
10825
10826 /* We have the first half of a surrogate. Read more. */
10827 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10828 }
c28d6105 10829
c28d6105
NC
10830 status = FILTER_READ(idx + 1, utf16_buffer,
10831 160 + (SvCUR(utf16_buffer) & 1));
10832 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 10833 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
10834 if (status < 0) {
10835 /* Error */
10836 IoPAGE(filter) = status;
10837 return status;
10838 }
10839 }
10840
10841 chars = SvCUR(utf16_buffer) >> 1;
10842 have = SvCUR(utf8_buffer);
10843 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 10844
aa6dbd60 10845 if (reverse) {
c28d6105
NC
10846 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10847 (U8*)SvPVX_const(utf8_buffer) + have,
10848 chars * 2, &newlen);
aa6dbd60 10849 } else {
2a773401 10850 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
10851 (U8*)SvPVX_const(utf8_buffer) + have,
10852 chars * 2, &newlen);
2a773401 10853 }
c28d6105 10854 SvCUR_set(utf8_buffer, have + newlen);
2a773401 10855 *end = '\0';
c28d6105 10856
e07286ed
NC
10857 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10858 it's private to us, and utf16_to_utf8{,reversed} take a
10859 (pointer,length) pair, rather than a NUL-terminated string. */
10860 if(SvCUR(utf16_buffer) & 1) {
10861 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10862 SvCUR_set(utf16_buffer, 1);
10863 } else {
10864 SvCUR_set(utf16_buffer, 0);
10865 }
2a773401 10866 }
c28d6105
NC
10867 DEBUG_P(PerlIO_printf(Perl_debug_log,
10868 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10869 status,
10870 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10871 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 10872 return retval;
6e3aabd6 10873}
81a923f4
NC
10874
10875static U8 *
10876S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10877{
2a773401 10878 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 10879
c85ae797
NC
10880 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10881
c28d6105 10882 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 10883 sv_setpvs(filter, "");
2a773401 10884 IoLINES(filter) = reversed;
c28d6105
NC
10885 IoPAGE(filter) = 1; /* Not EOF */
10886
10887 /* Sadly, we have to return a valid pointer, come what may, so we have to
10888 ignore any error return from this. */
10889 SvCUR_set(PL_linestr, 0);
10890 if (FILTER_READ(0, PL_linestr, 0)) {
10891 SvUTF8_on(PL_linestr);
81a923f4 10892 } else {
c28d6105 10893 SvUTF8_on(PL_linestr);
81a923f4 10894 }
c28d6105 10895 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
10896 return (U8*)SvPVX(PL_linestr);
10897}
6e3aabd6 10898#endif
9f4817db 10899
f333445c
JP
10900/*
10901Returns a pointer to the next character after the parsed
10902vstring, as well as updating the passed in sv.
10903
10904Function must be called like
10905
561b68a9 10906 sv = newSV(5);
65b06e02 10907 s = scan_vstring(s,e,sv);
f333445c 10908
65b06e02 10909where s and e are the start and end of the string.
f333445c
JP
10910The sv should already be large enough to store the vstring
10911passed in, for performance reasons.
10912
10913*/
10914
10915char *
15f169a1 10916Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 10917{
97aff369 10918 dVAR;
bfed75c6
AL
10919 const char *pos = s;
10920 const char *start = s;
7918f24d
NC
10921
10922 PERL_ARGS_ASSERT_SCAN_VSTRING;
10923
f333445c 10924 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 10925 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 10926 pos++;
f333445c
JP
10927 if ( *pos != '.') {
10928 /* this may not be a v-string if followed by => */
bfed75c6 10929 const char *next = pos;
65b06e02 10930 while (next < e && isSPACE(*next))
8fc7bb1c 10931 ++next;
65b06e02 10932 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
10933 /* return string not v-string */
10934 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 10935 return (char *)pos;
f333445c
JP
10936 }
10937 }
10938
10939 if (!isALPHA(*pos)) {
89ebb4a3 10940 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 10941
d4c19fe8
AL
10942 if (*s == 'v')
10943 s++; /* get past 'v' */
f333445c 10944
76f68e9b 10945 sv_setpvs(sv, "");
f333445c
JP
10946
10947 for (;;) {
d4c19fe8 10948 /* this is atoi() that tolerates underscores */
0bd48802
AL
10949 U8 *tmpend;
10950 UV rev = 0;
d4c19fe8
AL
10951 const char *end = pos;
10952 UV mult = 1;
10953 while (--end >= s) {
10954 if (*end != '_') {
10955 const UV orev = rev;
f333445c
JP
10956 rev += (*end - '0') * mult;
10957 mult *= 10;
9b387841
NC
10958 if (orev > rev)
10959 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10960 "Integer overflow in decimal number");
f333445c
JP
10961 }
10962 }
10963#ifdef EBCDIC
10964 if (rev > 0x7FFFFFFF)
10965 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10966#endif
10967 /* Append native character for the rev point */
10968 tmpend = uvchr_to_utf8(tmpbuf, rev);
10969 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10970 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10971 SvUTF8_on(sv);
65b06e02 10972 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
10973 s = ++pos;
10974 else {
10975 s = pos;
10976 break;
10977 }
65b06e02 10978 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
10979 pos++;
10980 }
10981 SvPOK_on(sv);
10982 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10983 SvRMAGICAL_on(sv);
10984 }
73d840c0 10985 return (char *)s;
f333445c
JP
10986}
10987
88e1f1a2
JV
10988int
10989Perl_keyword_plugin_standard(pTHX_
10990 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10991{
10992 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10993 PERL_UNUSED_CONTEXT;
10994 PERL_UNUSED_ARG(keyword_ptr);
10995 PERL_UNUSED_ARG(keyword_len);
10996 PERL_UNUSED_ARG(op_ptr);
10997 return KEYWORD_PLUGIN_DECLINE;
10998}
10999
78cdf107 11000#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11001static void
78cdf107 11002S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11003{
11004 SAVEI32(PL_lex_brackets);
11005 if (PL_lex_brackets > 100)
11006 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11007 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11008 SAVEI32(PL_lex_allbrackets);
11009 PL_lex_allbrackets = 0;
11010 SAVEI8(PL_lex_fakeeof);
2dcac756 11011 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11012 if(yyparse(gramtype) && !PL_parser->error_count)
11013 qerror(Perl_mess(aTHX_ "Parse error"));
11014}
11015
78cdf107 11016#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11017static OP *
78cdf107 11018S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11019{
11020 OP *o;
11021 ENTER;
11022 SAVEVPTR(PL_eval_root);
11023 PL_eval_root = NULL;
78cdf107 11024 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11025 o = PL_eval_root;
11026 LEAVE;
11027 return o;
11028}
11029
78cdf107
Z
11030#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11031static OP *
11032S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11033{
11034 OP *exprop;
11035 if (flags & ~PARSE_OPTIONAL)
11036 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11037 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11038 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11039 if (!PL_parser->error_count)
11040 qerror(Perl_mess(aTHX_ "Parse error"));
11041 exprop = newOP(OP_NULL, 0);
11042 }
11043 return exprop;
11044}
11045
11046/*
11047=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11048
11049Parse a Perl arithmetic expression. This may contain operators of precedence
11050down to the bit shift operators. The expression must be followed (and thus
11051terminated) either by a comparison or lower-precedence operator or by
11052something that would normally terminate an expression such as semicolon.
11053If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11054otherwise it is mandatory. It is up to the caller to ensure that the
11055dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11056the source of the code to be parsed and the lexical context for the
11057expression.
11058
11059The op tree representing the expression is returned. If an optional
11060expression is absent, a null pointer is returned, otherwise the pointer
11061will be non-null.
11062
11063If an error occurs in parsing or compilation, in most cases a valid op
11064tree is returned anyway. The error is reflected in the parser state,
11065normally resulting in a single exception at the top level of parsing
11066which covers all the compilation errors that occurred. Some compilation
11067errors, however, will throw an exception immediately.
11068
11069=cut
11070*/
11071
11072OP *
11073Perl_parse_arithexpr(pTHX_ U32 flags)
11074{
11075 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11076}
11077
11078/*
11079=for apidoc Amx|OP *|parse_termexpr|U32 flags
11080
11081Parse a Perl term expression. This may contain operators of precedence
11082down to the assignment operators. The expression must be followed (and thus
11083terminated) either by a comma or lower-precedence operator or by
11084something that would normally terminate an expression such as semicolon.
11085If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11086otherwise it is mandatory. It is up to the caller to ensure that the
11087dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11088the source of the code to be parsed and the lexical context for the
11089expression.
11090
11091The op tree representing the expression is returned. If an optional
11092expression is absent, a null pointer is returned, otherwise the pointer
11093will be non-null.
11094
11095If an error occurs in parsing or compilation, in most cases a valid op
11096tree is returned anyway. The error is reflected in the parser state,
11097normally resulting in a single exception at the top level of parsing
11098which covers all the compilation errors that occurred. Some compilation
11099errors, however, will throw an exception immediately.
11100
11101=cut
11102*/
11103
11104OP *
11105Perl_parse_termexpr(pTHX_ U32 flags)
11106{
11107 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11108}
11109
11110/*
11111=for apidoc Amx|OP *|parse_listexpr|U32 flags
11112
11113Parse a Perl list expression. This may contain operators of precedence
11114down to the comma operator. The expression must be followed (and thus
11115terminated) either by a low-precedence logic operator such as C<or> or by
11116something that would normally terminate an expression such as semicolon.
11117If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11118otherwise it is mandatory. It is up to the caller to ensure that the
11119dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11120the source of the code to be parsed and the lexical context for the
11121expression.
11122
11123The op tree representing the expression is returned. If an optional
11124expression is absent, a null pointer is returned, otherwise the pointer
11125will be non-null.
11126
11127If an error occurs in parsing or compilation, in most cases a valid op
11128tree is returned anyway. The error is reflected in the parser state,
11129normally resulting in a single exception at the top level of parsing
11130which covers all the compilation errors that occurred. Some compilation
11131errors, however, will throw an exception immediately.
11132
11133=cut
11134*/
11135
11136OP *
11137Perl_parse_listexpr(pTHX_ U32 flags)
11138{
11139 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11140}
11141
11142/*
11143=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11144
11145Parse a single complete Perl expression. This allows the full
11146expression grammar, including the lowest-precedence operators such
11147as C<or>. The expression must be followed (and thus terminated) by a
11148token that an expression would normally be terminated by: end-of-file,
11149closing bracketing punctuation, semicolon, or one of the keywords that
11150signals a postfix expression-statement modifier. If I<flags> includes
11151C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11152mandatory. It is up to the caller to ensure that the dynamic parser
11153state (L</PL_parser> et al) is correctly set to reflect the source of
11154the code to be parsed and the lexical context for the expression.
11155
11156The op tree representing the expression is returned. If an optional
11157expression is absent, a null pointer is returned, otherwise the pointer
11158will be non-null.
11159
11160If an error occurs in parsing or compilation, in most cases a valid op
11161tree is returned anyway. The error is reflected in the parser state,
11162normally resulting in a single exception at the top level of parsing
11163which covers all the compilation errors that occurred. Some compilation
11164errors, however, will throw an exception immediately.
11165
11166=cut
11167*/
11168
11169OP *
11170Perl_parse_fullexpr(pTHX_ U32 flags)
11171{
11172 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11173}
11174
e53d8f76
Z
11175/*
11176=for apidoc Amx|OP *|parse_block|U32 flags
11177
11178Parse a single complete Perl code block. This consists of an opening
11179brace, a sequence of statements, and a closing brace. The block
11180constitutes a lexical scope, so C<my> variables and various compile-time
11181effects can be contained within it. It is up to the caller to ensure
11182that the dynamic parser state (L</PL_parser> et al) is correctly set to
11183reflect the source of the code to be parsed and the lexical context for
11184the statement.
11185
11186The op tree representing the code block is returned. This is always a
11187real op, never a null pointer. It will normally be a C<lineseq> list,
11188including C<nextstate> or equivalent ops. No ops to construct any kind
11189of runtime scope are included by virtue of it being a block.
11190
11191If an error occurs in parsing or compilation, in most cases a valid op
11192tree (most likely null) is returned anyway. The error is reflected in
11193the parser state, normally resulting in a single exception at the top
11194level of parsing which covers all the compilation errors that occurred.
11195Some compilation errors, however, will throw an exception immediately.
11196
11197The I<flags> parameter is reserved for future use, and must always
11198be zero.
11199
11200=cut
11201*/
11202
11203OP *
11204Perl_parse_block(pTHX_ U32 flags)
11205{
11206 if (flags)
11207 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11208 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11209}
11210
1da4ca5f 11211/*
8359b381
Z
11212=for apidoc Amx|OP *|parse_barestmt|U32 flags
11213
11214Parse a single unadorned Perl statement. This may be a normal imperative
11215statement or a declaration that has compile-time effect. It does not
11216include any label or other affixture. It is up to the caller to ensure
11217that the dynamic parser state (L</PL_parser> et al) is correctly set to
11218reflect the source of the code to be parsed and the lexical context for
11219the statement.
11220
11221The op tree representing the statement is returned. This may be a
11222null pointer if the statement is null, for example if it was actually
11223a subroutine definition (which has compile-time side effects). If not
11224null, it will be ops directly implementing the statement, suitable to
11225pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11226equivalent op (except for those embedded in a scope contained entirely
11227within the statement).
11228
11229If an error occurs in parsing or compilation, in most cases a valid op
11230tree (most likely null) is returned anyway. The error is reflected in
11231the parser state, normally resulting in a single exception at the top
11232level of parsing which covers all the compilation errors that occurred.
11233Some compilation errors, however, will throw an exception immediately.
11234
11235The I<flags> parameter is reserved for future use, and must always
11236be zero.
11237
11238=cut
11239*/
11240
11241OP *
11242Perl_parse_barestmt(pTHX_ U32 flags)
11243{
11244 if (flags)
11245 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11246 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11247}
11248
11249/*
361d9b55
Z
11250=for apidoc Amx|SV *|parse_label|U32 flags
11251
11252Parse a single label, possibly optional, of the type that may prefix a
11253Perl statement. It is up to the caller to ensure that the dynamic parser
11254state (L</PL_parser> et al) is correctly set to reflect the source of
11255the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11256label is optional, otherwise it is mandatory.
11257
11258The name of the label is returned in the form of a fresh scalar. If an
11259optional label is absent, a null pointer is returned.
11260
11261If an error occurs in parsing, which can only occur if the label is
11262mandatory, a valid label is returned anyway. The error is reflected in
11263the parser state, normally resulting in a single exception at the top
11264level of parsing which covers all the compilation errors that occurred.
11265
11266=cut
11267*/
11268
11269SV *
11270Perl_parse_label(pTHX_ U32 flags)
11271{
11272 if (flags & ~PARSE_OPTIONAL)
11273 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11274 if (PL_lex_state == LEX_KNOWNEXT) {
11275 PL_parser->yychar = yylex();
11276 if (PL_parser->yychar == LABEL) {
11277 char *lpv = pl_yylval.pval;
11278 STRLEN llen = strlen(lpv);
11279 SV *lsv;
11280 PL_parser->yychar = YYEMPTY;
11281 lsv = newSV_type(SVt_PV);
11282 SvPV_set(lsv, lpv);
11283 SvCUR_set(lsv, llen);
11284 SvLEN_set(lsv, llen+1);
11285 SvPOK_on(lsv);
11286 return lsv;
11287 } else {
11288 yyunlex();
11289 goto no_label;
11290 }
11291 } else {
11292 char *s, *t;
11293 U8 c;
11294 STRLEN wlen, bufptr_pos;
11295 lex_read_space(0);
11296 t = s = PL_bufptr;
11297 c = (U8)*s;
11298 if (!isIDFIRST_A(c))
11299 goto no_label;
11300 do {
11301 c = (U8)*++t;
11302 } while(isWORDCHAR_A(c));
11303 wlen = t - s;
11304 if (word_takes_any_delimeter(s, wlen))
11305 goto no_label;
11306 bufptr_pos = s - SvPVX(PL_linestr);
11307 PL_bufptr = t;
11308 lex_read_space(LEX_KEEP_PREVIOUS);
11309 t = PL_bufptr;
11310 s = SvPVX(PL_linestr) + bufptr_pos;
11311 if (t[0] == ':' && t[1] != ':') {
11312 PL_oldoldbufptr = PL_oldbufptr;
11313 PL_oldbufptr = s;
11314 PL_bufptr = t+1;
11315 return newSVpvn(s, wlen);
11316 } else {
11317 PL_bufptr = s;
11318 no_label:
11319 if (flags & PARSE_OPTIONAL) {
11320 return NULL;
11321 } else {
11322 qerror(Perl_mess(aTHX_ "Parse error"));
11323 return newSVpvs("x");
11324 }
11325 }
11326 }
11327}
11328
11329/*
28ac2b49
Z
11330=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11331
11332Parse a single complete Perl statement. This may be a normal imperative
8359b381 11333statement or a declaration that has compile-time effect, and may include
8e720305 11334optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11335parser state (L</PL_parser> et al) is correctly set to reflect the source
11336of the code to be parsed and the lexical context for the statement.
11337
11338The op tree representing the statement is returned. This may be a
11339null pointer if the statement is null, for example if it was actually
11340a subroutine definition (which has compile-time side effects). If not
11341null, it will be the result of a L</newSTATEOP> call, normally including
11342a C<nextstate> or equivalent op.
11343
11344If an error occurs in parsing or compilation, in most cases a valid op
11345tree (most likely null) is returned anyway. The error is reflected in
11346the parser state, normally resulting in a single exception at the top
11347level of parsing which covers all the compilation errors that occurred.
11348Some compilation errors, however, will throw an exception immediately.
11349
11350The I<flags> parameter is reserved for future use, and must always
11351be zero.
11352
11353=cut
11354*/
11355
11356OP *
11357Perl_parse_fullstmt(pTHX_ U32 flags)
11358{
28ac2b49
Z
11359 if (flags)
11360 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11361 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11362}
11363
07ffcb73
Z
11364/*
11365=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11366
11367Parse a sequence of zero or more Perl statements. These may be normal
11368imperative statements, including optional labels, or declarations
11369that have compile-time effect, or any mixture thereof. The statement
11370sequence ends when a closing brace or end-of-file is encountered in a
11371place where a new statement could have validly started. It is up to
11372the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11373is correctly set to reflect the source of the code to be parsed and the
11374lexical context for the statements.
11375
11376The op tree representing the statement sequence is returned. This may
11377be a null pointer if the statements were all null, for example if there
11378were no statements or if there were only subroutine definitions (which
11379have compile-time side effects). If not null, it will be a C<lineseq>
11380list, normally including C<nextstate> or equivalent ops.
11381
11382If an error occurs in parsing or compilation, in most cases a valid op
11383tree is returned anyway. The error is reflected in the parser state,
11384normally resulting in a single exception at the top level of parsing
11385which covers all the compilation errors that occurred. Some compilation
11386errors, however, will throw an exception immediately.
11387
11388The I<flags> parameter is reserved for future use, and must always
11389be zero.
11390
11391=cut
11392*/
11393
11394OP *
11395Perl_parse_stmtseq(pTHX_ U32 flags)
11396{
11397 OP *stmtseqop;
e53d8f76 11398 I32 c;
07ffcb73 11399 if (flags)
78cdf107
Z
11400 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11401 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11402 c = lex_peek_unichar(0);
11403 if (c != -1 && c != /*{*/'}')
07ffcb73 11404 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11405 return stmtseqop;
11406}
11407
ea25a9b2 11408void
f7e3d326 11409Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11410{
f7e3d326 11411 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11412 deprecate("qw(...) as parentheses");
78cdf107 11413 force_next((4<<24)|')');
ea25a9b2
Z
11414 if (qwlist->op_type == OP_STUB) {
11415 op_free(qwlist);
11416 }
11417 else {
3d8e05a0 11418 start_force(PL_curforce);
ea25a9b2
Z
11419 NEXTVAL_NEXTTOKE.opval = qwlist;
11420 force_next(THING);
11421 }
78cdf107 11422 force_next((2<<24)|'(');
ea25a9b2
Z
11423}
11424
28ac2b49 11425/*
1da4ca5f
NC
11426 * Local variables:
11427 * c-indentation-style: bsd
11428 * c-basic-offset: 4
11429 * indent-tabs-mode: t
11430 * End:
11431 *
37442d52
RGS
11432 * ex: set ts=8 sts=4 sw=4 noet:
11433 */