This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Don’t let ?: folding affect truncate
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
802a15e9 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805 150 * can get by with a single comparison (if the compiler is smart enough).
9da1dd8f
DM
151 *
152 * These values refer to the various states within a sublex parse,
153 * i.e. within a double quotish string
79072805
LW
154 */
155
fb73857a 156/* #define LEX_NOTPARSING 11 is done in perl.h. */
157
b6007c36
DM
158#define LEX_NORMAL 10 /* normal code (ie not within "...") */
159#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
160#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
161#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
162#define LEX_INTERPSTART 6 /* expecting the start of a $var */
163
164 /* at end of code, eg "$x" followed by: */
165#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
166#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
167
168#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
169 string or after \E, $foo, etc */
170#define LEX_INTERPCONST 2 /* NOT USED */
171#define LEX_FORMLINE 1 /* expecting a format line */
172#define LEX_KNOWNEXT 0 /* next token known; just return it */
173
79072805 174
bbf60fe6 175#ifdef DEBUGGING
27da23d5 176static const char* const lex_state_names[] = {
bbf60fe6
DM
177 "KNOWNEXT",
178 "FORMLINE",
179 "INTERPCONST",
180 "INTERPCONCAT",
181 "INTERPENDMAYBE",
182 "INTERPEND",
183 "INTERPSTART",
184 "INTERPPUSH",
185 "INTERPCASEMOD",
186 "INTERPNORMAL",
187 "NORMAL"
188};
189#endif
190
79072805
LW
191#ifdef ff_next
192#undef ff_next
d48672a2
LW
193#endif
194
79072805 195#include "keywords.h"
fe14fcc3 196
ffb4593c
NT
197/* CLINE is a macro that ensures PL_copline has a sane value */
198
ae986130
LW
199#ifdef CLINE
200#undef CLINE
201#endif
57843af0 202#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 203
5db06880 204#ifdef PERL_MAD
29595ff2
NC
205# define SKIPSPACE0(s) skipspace0(s)
206# define SKIPSPACE1(s) skipspace1(s)
207# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
208# define PEEKSPACE(s) skipspace2(s,0)
209#else
210# define SKIPSPACE0(s) skipspace(s)
211# define SKIPSPACE1(s) skipspace(s)
212# define SKIPSPACE2(s,tsv) skipspace(s)
213# define PEEKSPACE(s) skipspace(s)
214#endif
215
ffb4593c
NT
216/*
217 * Convenience functions to return different tokens and prime the
9cbb5ea2 218 * lexer for the next token. They all take an argument.
ffb4593c
NT
219 *
220 * TOKEN : generic token (used for '(', DOLSHARP, etc)
221 * OPERATOR : generic operator
222 * AOPERATOR : assignment operator
223 * PREBLOCK : beginning the block after an if, while, foreach, ...
224 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
225 * PREREF : *EXPR where EXPR is not a simple identifier
226 * TERM : expression term
227 * LOOPX : loop exiting command (goto, last, dump, etc)
228 * FTST : file test operator
229 * FUN0 : zero-argument function
7eb971ee 230 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 231 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
232 * BOop : bitwise or or xor
233 * BAop : bitwise and
234 * SHop : shift operator
235 * PWop : power operator
9cbb5ea2 236 * PMop : pattern-matching operator
ffb4593c
NT
237 * Aop : addition-level operator
238 * Mop : multiplication-level operator
239 * Eop : equality-testing operator
e5edeb50 240 * Rop : relational operator <= != gt
ffb4593c
NT
241 *
242 * Also see LOP and lop() below.
243 */
244
998054bd 245#ifdef DEBUGGING /* Serve -DT. */
704d4215 246# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 247#else
bbf60fe6 248# define REPORT(retval) (retval)
998054bd
SC
249#endif
250
bbf60fe6
DM
251#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
252#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
253#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
254#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
255#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
256#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
257#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
258#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
259#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
260#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 261#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
262#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
263#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
264#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
265#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
266#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
267#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
268#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
269#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
270#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
271#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 272
a687059c
LW
273/* This bit of chicanery makes a unary function followed by
274 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
275 * The UNIDOR macro is for unary functions that can be followed by the //
276 * operator (such as C<shift // 0>).
a687059c 277 */
d68ce4ac 278#define UNI3(f,x,have_x) { \
6154021b 279 pl_yylval.ival = f; \
d68ce4ac 280 if (have_x) PL_expect = x; \
376fcdbf
AL
281 PL_bufptr = s; \
282 PL_last_uni = PL_oldbufptr; \
283 PL_last_lop_op = f; \
284 if (*s == '(') \
285 return REPORT( (int)FUNC1 ); \
29595ff2 286 s = PEEKSPACE(s); \
376fcdbf
AL
287 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
288 }
d68ce4ac
FC
289#define UNI(f) UNI3(f,XTERM,1)
290#define UNIDOR(f) UNI3(f,XTERMORDORDOR,1)
b5fb7ce3
FC
291#define UNIPROTO(f,optional) { \
292 if (optional) PL_last_uni = PL_oldbufptr; \
22393538
MH
293 OPERATOR(f); \
294 }
a687059c 295
d68ce4ac 296#define UNIBRACK(f) UNI3(f,0,0)
79072805 297
9f68db38 298/* grandfather return to old style */
78cdf107
Z
299#define OLDLOP(f) \
300 do { \
301 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
302 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
303 pl_yylval.ival = (f); \
304 PL_expect = XTERM; \
305 PL_bufptr = s; \
306 return (int)LSTOP; \
307 } while(0)
79072805 308
8fa7f367
JH
309#ifdef DEBUGGING
310
6154021b 311/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
312enum token_type {
313 TOKENTYPE_NONE,
314 TOKENTYPE_IVAL,
6154021b 315 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
316 TOKENTYPE_PVAL,
317 TOKENTYPE_OPVAL,
318 TOKENTYPE_GVVAL
319};
320
6d4a66ac
NC
321static struct debug_tokens {
322 const int token;
323 enum token_type type;
324 const char *name;
325} const debug_tokens[] =
9041c2e3 326{
bbf60fe6
DM
327 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
328 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
329 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
330 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
331 { ARROW, TOKENTYPE_NONE, "ARROW" },
332 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
333 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
334 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
335 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
336 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 337 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
338 { DO, TOKENTYPE_NONE, "DO" },
339 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
340 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
341 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
342 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
343 { ELSE, TOKENTYPE_NONE, "ELSE" },
344 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
345 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
346 { FOR, TOKENTYPE_IVAL, "FOR" },
347 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
348 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
349 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 350 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
351 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
352 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
353 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 354 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
355 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
356 { IF, TOKENTYPE_IVAL, "IF" },
5db1eb8d 357 { LABEL, TOKENTYPE_OPVAL, "LABEL" },
bbf60fe6
DM
358 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
359 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
360 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
361 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
362 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
363 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
364 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
365 { MY, TOKENTYPE_IVAL, "MY" },
366 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
367 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
368 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
369 { OROP, TOKENTYPE_IVAL, "OROP" },
370 { OROR, TOKENTYPE_NONE, "OROR" },
371 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
372 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
373 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
374 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
375 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
376 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
377 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
378 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
379 { PREINC, TOKENTYPE_NONE, "PREINC" },
380 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
381 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
382 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
383 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
384 { SUB, TOKENTYPE_NONE, "SUB" },
385 { THING, TOKENTYPE_OPVAL, "THING" },
386 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
387 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
388 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
389 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
390 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
391 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 392 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
393 { WHILE, TOKENTYPE_IVAL, "WHILE" },
394 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 395 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 396 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
397};
398
6154021b 399/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 400
bbf60fe6 401STATIC int
704d4215 402S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 403{
97aff369 404 dVAR;
7918f24d
NC
405
406 PERL_ARGS_ASSERT_TOKEREPORT;
407
bbf60fe6 408 if (DEBUG_T_TEST) {
bd61b366 409 const char *name = NULL;
bbf60fe6 410 enum token_type type = TOKENTYPE_NONE;
f54cb97a 411 const struct debug_tokens *p;
396482e1 412 SV* const report = newSVpvs("<== ");
bbf60fe6 413
f54cb97a 414 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
415 if (p->token == (int)rv) {
416 name = p->name;
417 type = p->type;
418 break;
419 }
420 }
421 if (name)
54667de8 422 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
423 else if ((char)rv > ' ' && (char)rv < '~')
424 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
425 else if (!rv)
396482e1 426 sv_catpvs(report, "EOF");
bbf60fe6
DM
427 else
428 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
429 switch (type) {
430 case TOKENTYPE_NONE:
431 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
432 break;
433 case TOKENTYPE_IVAL:
704d4215 434 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
435 break;
436 case TOKENTYPE_OPNUM:
437 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 438 PL_op_name[lvalp->ival]);
bbf60fe6
DM
439 break;
440 case TOKENTYPE_PVAL:
704d4215 441 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
442 break;
443 case TOKENTYPE_OPVAL:
704d4215 444 if (lvalp->opval) {
401441c0 445 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
446 PL_op_name[lvalp->opval->op_type]);
447 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 448 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 449 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
450 }
451
452 }
401441c0 453 else
396482e1 454 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
455 break;
456 }
b6007c36 457 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
458 };
459 return (int)rv;
998054bd
SC
460}
461
b6007c36
DM
462
463/* print the buffer with suitable escapes */
464
465STATIC void
15f169a1 466S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 467{
396482e1 468 SV* const tmp = newSVpvs("");
7918f24d
NC
469
470 PERL_ARGS_ASSERT_PRINTBUF;
471
b6007c36
DM
472 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
473 SvREFCNT_dec(tmp);
474}
475
8fa7f367
JH
476#endif
477
8290c323
NC
478static int
479S_deprecate_commaless_var_list(pTHX) {
480 PL_expect = XTERM;
481 deprecate("comma-less variable list");
482 return REPORT(','); /* grandfather non-comma-format format */
483}
484
ffb4593c
NT
485/*
486 * S_ao
487 *
c963b151
BD
488 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
489 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
490 */
491
76e3520e 492STATIC int
cea2e8a9 493S_ao(pTHX_ int toketype)
a0d0e21e 494{
97aff369 495 dVAR;
3280af22
NIS
496 if (*PL_bufptr == '=') {
497 PL_bufptr++;
a0d0e21e 498 if (toketype == ANDAND)
6154021b 499 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 500 else if (toketype == OROR)
6154021b 501 pl_yylval.ival = OP_ORASSIGN;
c963b151 502 else if (toketype == DORDOR)
6154021b 503 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
504 toketype = ASSIGNOP;
505 }
506 return toketype;
507}
508
ffb4593c
NT
509/*
510 * S_no_op
511 * When Perl expects an operator and finds something else, no_op
512 * prints the warning. It always prints "<something> found where
513 * operator expected. It prints "Missing semicolon on previous line?"
514 * if the surprise occurs at the start of the line. "do you need to
515 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
516 * where the compiler doesn't know if foo is a method call or a function.
517 * It prints "Missing operator before end of line" if there's nothing
518 * after the missing operator, or "... before <...>" if there is something
519 * after the missing operator.
520 */
521
76e3520e 522STATIC void
15f169a1 523S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 524{
97aff369 525 dVAR;
9d4ba2ae
AL
526 char * const oldbp = PL_bufptr;
527 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 528
7918f24d
NC
529 PERL_ARGS_ASSERT_NO_OP;
530
1189a94a
GS
531 if (!s)
532 s = oldbp;
07c798fb 533 else
1189a94a 534 PL_bufptr = s;
734ab321 535 yywarn(Perl_form(aTHX_ "%s found where operator expected", what), UTF ? SVf_UTF8 : 0);
56da5a46
RGS
536 if (ckWARN_d(WARN_SYNTAX)) {
537 if (is_first)
538 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
539 "\t(Missing semicolon on previous line?)\n");
540 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 541 const char *t;
734ab321
BF
542 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':');
543 t += UTF ? UTF8SKIP(t) : 1)
c35e046a 544 NOOP;
56da5a46
RGS
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
547 "\t(Do you need to predeclare %"SVf"?)\n",
548 SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
549 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46
RGS
550 }
551 else {
552 assert(s >= oldbp);
553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
734ab321
BF
554 "\t(Missing operator before %"SVf"?)\n",
555 SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
556 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
56da5a46 557 }
07c798fb 558 }
3280af22 559 PL_bufptr = oldbp;
8990e307
LW
560}
561
ffb4593c
NT
562/*
563 * S_missingterm
564 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 565 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
566 * If we're in a delimited string and the delimiter is a control
567 * character, it's reformatted into a two-char sequence like ^C.
568 * This is fatal.
569 */
570
76e3520e 571STATIC void
cea2e8a9 572S_missingterm(pTHX_ char *s)
8990e307 573{
97aff369 574 dVAR;
8990e307
LW
575 char tmpbuf[3];
576 char q;
577 if (s) {
9d4ba2ae 578 char * const nl = strrchr(s,'\n');
d2719217 579 if (nl)
8990e307
LW
580 *nl = '\0';
581 }
463559e7 582 else if (isCNTRL(PL_multi_close)) {
8990e307 583 *tmpbuf = '^';
585ec06d 584 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
585 tmpbuf[2] = '\0';
586 s = tmpbuf;
587 }
588 else {
eb160463 589 *tmpbuf = (char)PL_multi_close;
8990e307
LW
590 tmpbuf[1] = '\0';
591 s = tmpbuf;
592 }
593 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 594 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 595}
79072805 596
dd0ac2b9
FC
597#include "feature.h"
598
0d863452 599/*
0d863452
RH
600 * Check whether the named feature is enabled.
601 */
26ea9e12 602bool
3fff3427 603Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 604{
97aff369 605 dVAR;
4a731d7b 606 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
607
608 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
ca4d40c4
FC
609
610 assert(CURRENT_FEATURE_BUNDLE == FEATURE_BUNDLE_CUSTOM);
7918f24d 611
26ea9e12
NC
612 if (namelen > MAX_FEATURE_LEN)
613 return FALSE;
3fff3427 614 memcpy(&he_name[8], name, namelen);
7d69d4a6 615
c8ca97b0
NC
616 return cBOOL(cop_hints_fetch_pvn(PL_curcop, he_name, 8 + namelen, 0,
617 REFCOUNTED_HE_EXISTS));
0d863452
RH
618}
619
ffb4593c 620/*
9cbb5ea2
GS
621 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
622 * utf16-to-utf8-reversed.
ffb4593c
NT
623 */
624
c39cd008
GS
625#ifdef PERL_CR_FILTER
626static void
627strip_return(SV *sv)
628{
95a20fc0 629 register const char *s = SvPVX_const(sv);
9d4ba2ae 630 register const char * const e = s + SvCUR(sv);
7918f24d
NC
631
632 PERL_ARGS_ASSERT_STRIP_RETURN;
633
c39cd008
GS
634 /* outer loop optimized to do nothing if there are no CR-LFs */
635 while (s < e) {
636 if (*s++ == '\r' && *s == '\n') {
637 /* hit a CR-LF, need to copy the rest */
638 register char *d = s - 1;
639 *d++ = *s++;
640 while (s < e) {
641 if (*s == '\r' && s[1] == '\n')
642 s++;
643 *d++ = *s++;
644 }
645 SvCUR(sv) -= s - d;
646 return;
647 }
648 }
649}
a868473f 650
76e3520e 651STATIC I32
c39cd008 652S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 653{
f54cb97a 654 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
655 if (count > 0 && !maxlen)
656 strip_return(sv);
657 return count;
a868473f
NIS
658}
659#endif
660
ffb4593c 661/*
8eaa0acf
Z
662=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
663
664Creates and initialises a new lexer/parser state object, supplying
665a context in which to lex and parse from a new source of Perl code.
666A pointer to the new state object is placed in L</PL_parser>. An entry
667is made on the save stack so that upon unwinding the new state object
668will be destroyed and the former value of L</PL_parser> will be restored.
669Nothing else need be done to clean up the parsing context.
670
671The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
672non-null, provides a string (in SV form) containing code to be parsed.
673A copy of the string is made, so subsequent modification of I<line>
674does not affect parsing. I<rsfp>, if non-null, provides an input stream
675from which code will be read to be parsed. If both are non-null, the
676code in I<line> comes first and must consist of complete lines of input,
677and I<rsfp> supplies the remainder of the source.
678
e368b3bd
FC
679The I<flags> parameter is reserved for future use. Currently it is only
680used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
681
682=cut
683*/
ffb4593c 684
27fcb6ee 685/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
87606032
NC
686 can share filters with the current parser.
687 LEX_START_DONT_CLOSE indicates that the file handle wasn't opened by the
688 caller, hence isn't owned by the parser, so shouldn't be closed on parser
689 destruction. This is used to handle the case of defaulting to reading the
690 script from the standard input because no filename was given on the command
691 line (without getting confused by situation where STDIN has been closed, so
692 the script handle is opened on fd 0) */
27fcb6ee 693
a0d0e21e 694void
8eaa0acf 695Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 696{
97aff369 697 dVAR;
6ef55633 698 const char *s = NULL;
5486870f 699 yy_parser *parser, *oparser;
60d63348 700 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 701 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
702
703 /* create and initialise a parser */
704
199e78b7 705 Newxz(parser, 1, yy_parser);
5486870f 706 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
707 PL_parser = parser;
708
28ac2b49
Z
709 parser->stack = NULL;
710 parser->ps = NULL;
711 parser->stack_size = 0;
acdf0a21 712
e3abe207
DM
713 /* on scope exit, free this parser and restore any outer one */
714 SAVEPARSER(parser);
7c4baf47 715 parser->saved_curcop = PL_curcop;
e3abe207 716
acdf0a21 717 /* initialise lexer state */
8990e307 718
fb205e7a
DM
719#ifdef PERL_MAD
720 parser->curforce = -1;
721#else
722 parser->nexttoke = 0;
723#endif
ca4cfd28 724 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 725 parser->copline = NOLINE;
5afb0a62 726 parser->lex_state = LEX_NORMAL;
c2598295 727 parser->expect = XSTATE;
2f9285f8 728 parser->rsfp = rsfp;
27fcb6ee
FC
729 parser->rsfp_filters =
730 !(flags & LEX_START_SAME_FILTER) || !oparser
d3cd8e11
FC
731 ? NULL
732 : MUTABLE_AV(SvREFCNT_inc(
733 oparser->rsfp_filters
734 ? oparser->rsfp_filters
735 : (oparser->rsfp_filters = newAV())
736 ));
2f9285f8 737
199e78b7
DM
738 Newx(parser->lex_brackstack, 120, char);
739 Newx(parser->lex_casestack, 12, char);
740 *parser->lex_casestack = '\0';
02b34bbe 741
10efb74f 742 if (line) {
0528fd32 743 STRLEN len;
10efb74f 744 s = SvPV_const(line, len);
0abcdfa4
FC
745 parser->linestr = flags & LEX_START_COPIED
746 ? SvREFCNT_inc_simple_NN(line)
747 : newSVpvn_flags(s, len, SvUTF8(line));
37555a72 748 if (!len || s[len-1] != ';')
bdc0bf6f 749 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
750 } else {
751 parser->linestr = newSVpvs("\n;");
8990e307 752 }
f06b5848
DM
753 parser->oldoldbufptr =
754 parser->oldbufptr =
755 parser->bufptr =
756 parser->linestart = SvPVX(parser->linestr);
757 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
758 parser->last_lop = parser->last_uni = NULL;
87606032
NC
759 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES
760 |LEX_DONT_CLOSE_RSFP);
737c24fc 761
60d63348 762 parser->in_pod = parser->filtered = 0;
79072805 763}
a687059c 764
e3abe207
DM
765
766/* delete a parser object */
767
768void
769Perl_parser_free(pTHX_ const yy_parser *parser)
770{
7918f24d
NC
771 PERL_ARGS_ASSERT_PARSER_FREE;
772
7c4baf47 773 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
774 SvREFCNT_dec(parser->linestr);
775
87606032 776 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
2f9285f8 777 PerlIO_clearerr(parser->rsfp);
799361c3
SH
778 else if (parser->rsfp && (!parser->old_parser ||
779 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 780 PerlIO_close(parser->rsfp);
5486870f 781 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 782
e3abe207
DM
783 Safefree(parser->lex_brackstack);
784 Safefree(parser->lex_casestack);
785 PL_parser = parser->old_parser;
786 Safefree(parser);
787}
788
789
ffb4593c 790/*
f0e67a1d
Z
791=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
792
793Buffer scalar containing the chunk currently under consideration of the
794text currently being lexed. This is always a plain string scalar (for
795which C<SvPOK> is true). It is not intended to be used as a scalar by
796normal scalar means; instead refer to the buffer directly by the pointer
797variables described below.
798
799The lexer maintains various C<char*> pointers to things in the
800C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
801reallocated, all of these pointers must be updated. Don't attempt to
802do this manually, but rather use L</lex_grow_linestr> if you need to
803reallocate the buffer.
804
805The content of the text chunk in the buffer is commonly exactly one
806complete line of input, up to and including a newline terminator,
807but there are situations where it is otherwise. The octets of the
808buffer may be intended to be interpreted as either UTF-8 or Latin-1.
809The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
810flag on this scalar, which may disagree with it.
811
812For direct examination of the buffer, the variable
813L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
814lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
815of these pointers is usually preferable to examination of the scalar
816through normal scalar means.
817
818=for apidoc AmxU|char *|PL_parser-E<gt>bufend
819
820Direct pointer to the end of the chunk of text currently being lexed, the
821end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
822+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
823always located at the end of the buffer, and does not count as part of
824the buffer's contents.
825
826=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
827
828Points to the current position of lexing inside the lexer buffer.
829Characters around this point may be freely examined, within
830the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
831L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
832interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
833
834Lexing code (whether in the Perl core or not) moves this pointer past
835the characters that it consumes. It is also expected to perform some
836bookkeeping whenever a newline character is consumed. This movement
837can be more conveniently performed by the function L</lex_read_to>,
838which handles newlines appropriately.
839
840Interpretation of the buffer's octets can be abstracted out by
841using the slightly higher-level functions L</lex_peek_unichar> and
842L</lex_read_unichar>.
843
844=for apidoc AmxU|char *|PL_parser-E<gt>linestart
845
846Points to the start of the current line inside the lexer buffer.
847This is useful for indicating at which column an error occurred, and
848not much else. This must be updated by any lexing code that consumes
849a newline; the function L</lex_read_to> handles this detail.
850
851=cut
852*/
853
854/*
855=for apidoc Amx|bool|lex_bufutf8
856
857Indicates whether the octets in the lexer buffer
858(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
859of Unicode characters. If not, they should be interpreted as Latin-1
860characters. This is analogous to the C<SvUTF8> flag for scalars.
861
862In UTF-8 mode, it is not guaranteed that the lexer buffer actually
863contains valid UTF-8. Lexing code must be robust in the face of invalid
864encoding.
865
866The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
867is significant, but not the whole story regarding the input character
868encoding. Normally, when a file is being read, the scalar contains octets
869and its C<SvUTF8> flag is off, but the octets should be interpreted as
870UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
871however, the scalar may have the C<SvUTF8> flag on, and in this case its
872octets should be interpreted as UTF-8 unless the C<use bytes> pragma
873is in effect. This logic may change in the future; use this function
874instead of implementing the logic yourself.
875
876=cut
877*/
878
879bool
880Perl_lex_bufutf8(pTHX)
881{
882 return UTF;
883}
884
885/*
886=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
887
888Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
889at least I<len> octets (including terminating NUL). Returns a
890pointer to the reallocated buffer. This is necessary before making
891any direct modification of the buffer that would increase its length.
892L</lex_stuff_pvn> provides a more convenient way to insert text into
893the buffer.
894
895Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
896this function updates all of the lexer's variables that point directly
897into the buffer.
898
899=cut
900*/
901
902char *
903Perl_lex_grow_linestr(pTHX_ STRLEN len)
904{
905 SV *linestr;
906 char *buf;
907 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
c7641931 908 STRLEN linestart_pos, last_uni_pos, last_lop_pos, re_eval_start_pos;
f0e67a1d
Z
909 linestr = PL_parser->linestr;
910 buf = SvPVX(linestr);
911 if (len <= SvLEN(linestr))
912 return buf;
913 bufend_pos = PL_parser->bufend - buf;
914 bufptr_pos = PL_parser->bufptr - buf;
915 oldbufptr_pos = PL_parser->oldbufptr - buf;
916 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
917 linestart_pos = PL_parser->linestart - buf;
918 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
919 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
c7641931
DM
920 re_eval_start_pos = PL_sublex_info.re_eval_start ?
921 PL_sublex_info.re_eval_start - buf : 0;
922
f0e67a1d 923 buf = sv_grow(linestr, len);
c7641931 924
f0e67a1d
Z
925 PL_parser->bufend = buf + bufend_pos;
926 PL_parser->bufptr = buf + bufptr_pos;
927 PL_parser->oldbufptr = buf + oldbufptr_pos;
928 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
929 PL_parser->linestart = buf + linestart_pos;
930 if (PL_parser->last_uni)
931 PL_parser->last_uni = buf + last_uni_pos;
932 if (PL_parser->last_lop)
933 PL_parser->last_lop = buf + last_lop_pos;
c7641931
DM
934 if (PL_sublex_info.re_eval_start)
935 PL_sublex_info.re_eval_start = buf + re_eval_start_pos;
f0e67a1d
Z
936 return buf;
937}
938
939/*
83aa740e 940=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
941
942Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
943immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
944reallocating the buffer if necessary. This means that lexing code that
945runs later will see the characters as if they had appeared in the input.
946It is not recommended to do this as part of normal parsing, and most
947uses of this facility run the risk of the inserted characters being
948interpreted in an unintended manner.
949
950The string to be inserted is represented by I<len> octets starting
951at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
952according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
953The characters are recoded for the lexer buffer, according to how the
954buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 955to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
956function is more convenient.
957
958=cut
959*/
960
961void
83aa740e 962Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 963{
749123ff 964 dVAR;
f0e67a1d
Z
965 char *bufptr;
966 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
967 if (flags & ~(LEX_STUFF_UTF8))
968 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
969 if (UTF) {
970 if (flags & LEX_STUFF_UTF8) {
971 goto plain_copy;
972 } else {
973 STRLEN highhalf = 0;
83aa740e 974 const char *p, *e = pv+len;
f0e67a1d
Z
975 for (p = pv; p != e; p++)
976 highhalf += !!(((U8)*p) & 0x80);
977 if (!highhalf)
978 goto plain_copy;
979 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
980 bufptr = PL_parser->bufptr;
981 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
982 SvCUR_set(PL_parser->linestr,
983 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
984 PL_parser->bufend += len+highhalf;
985 for (p = pv; p != e; p++) {
986 U8 c = (U8)*p;
987 if (c & 0x80) {
988 *bufptr++ = (char)(0xc0 | (c >> 6));
989 *bufptr++ = (char)(0x80 | (c & 0x3f));
990 } else {
991 *bufptr++ = (char)c;
992 }
993 }
994 }
995 } else {
996 if (flags & LEX_STUFF_UTF8) {
997 STRLEN highhalf = 0;
83aa740e 998 const char *p, *e = pv+len;
f0e67a1d
Z
999 for (p = pv; p != e; p++) {
1000 U8 c = (U8)*p;
1001 if (c >= 0xc4) {
1002 Perl_croak(aTHX_ "Lexing code attempted to stuff "
1003 "non-Latin-1 character into Latin-1 input");
1004 } else if (c >= 0xc2 && p+1 != e &&
1005 (((U8)p[1]) & 0xc0) == 0x80) {
1006 p++;
1007 highhalf++;
1008 } else if (c >= 0x80) {
1009 /* malformed UTF-8 */
1010 ENTER;
1011 SAVESPTR(PL_warnhook);
1012 PL_warnhook = PERL_WARNHOOK_FATAL;
1013 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
1014 LEAVE;
1015 }
1016 }
1017 if (!highhalf)
1018 goto plain_copy;
1019 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
1020 bufptr = PL_parser->bufptr;
1021 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1022 SvCUR_set(PL_parser->linestr,
1023 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1024 PL_parser->bufend += len-highhalf;
1025 for (p = pv; p != e; p++) {
1026 U8 c = (U8)*p;
1027 if (c & 0x80) {
1028 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1029 p++;
1030 } else {
1031 *bufptr++ = (char)c;
1032 }
1033 }
1034 } else {
1035 plain_copy:
1036 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1037 bufptr = PL_parser->bufptr;
1038 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1039 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1040 PL_parser->bufend += len;
1041 Copy(pv, bufptr, len, char);
1042 }
1043 }
1044}
1045
1046/*
9dcc53ea
Z
1047=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1048
1049Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1050immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1051reallocating the buffer if necessary. This means that lexing code that
1052runs later will see the characters as if they had appeared in the input.
1053It is not recommended to do this as part of normal parsing, and most
1054uses of this facility run the risk of the inserted characters being
1055interpreted in an unintended manner.
1056
1057The string to be inserted is represented by octets starting at I<pv>
1058and continuing to the first nul. These octets are interpreted as either
1059UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1060in I<flags>. The characters are recoded for the lexer buffer, according
1061to how the buffer is currently being interpreted (L</lex_bufutf8>).
1062If it is not convenient to nul-terminate a string to be inserted, the
1063L</lex_stuff_pvn> function is more appropriate.
1064
1065=cut
1066*/
1067
1068void
1069Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1070{
1071 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1072 lex_stuff_pvn(pv, strlen(pv), flags);
1073}
1074
1075/*
f0e67a1d
Z
1076=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1077
1078Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1079immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1080reallocating the buffer if necessary. This means that lexing code that
1081runs later will see the characters as if they had appeared in the input.
1082It is not recommended to do this as part of normal parsing, and most
1083uses of this facility run the risk of the inserted characters being
1084interpreted in an unintended manner.
1085
1086The string to be inserted is the string value of I<sv>. The characters
1087are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1088being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1089not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1090need to construct a scalar.
1091
1092=cut
1093*/
1094
1095void
1096Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1097{
1098 char *pv;
1099 STRLEN len;
1100 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1101 if (flags)
1102 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1103 pv = SvPV(sv, len);
1104 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1105}
1106
1107/*
1108=for apidoc Amx|void|lex_unstuff|char *ptr
1109
1110Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1111I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1112This hides the discarded text from any lexing code that runs later,
1113as if the text had never appeared.
1114
1115This is not the normal way to consume lexed text. For that, use
1116L</lex_read_to>.
1117
1118=cut
1119*/
1120
1121void
1122Perl_lex_unstuff(pTHX_ char *ptr)
1123{
1124 char *buf, *bufend;
1125 STRLEN unstuff_len;
1126 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1127 buf = PL_parser->bufptr;
1128 if (ptr < buf)
1129 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1130 if (ptr == buf)
1131 return;
1132 bufend = PL_parser->bufend;
1133 if (ptr > bufend)
1134 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1135 unstuff_len = ptr - buf;
1136 Move(ptr, buf, bufend+1-ptr, char);
1137 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1138 PL_parser->bufend = bufend - unstuff_len;
1139}
1140
1141/*
1142=for apidoc Amx|void|lex_read_to|char *ptr
1143
1144Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1145to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1146performing the correct bookkeeping whenever a newline character is passed.
1147This is the normal way to consume lexed text.
1148
1149Interpretation of the buffer's octets can be abstracted out by
1150using the slightly higher-level functions L</lex_peek_unichar> and
1151L</lex_read_unichar>.
1152
1153=cut
1154*/
1155
1156void
1157Perl_lex_read_to(pTHX_ char *ptr)
1158{
1159 char *s;
1160 PERL_ARGS_ASSERT_LEX_READ_TO;
1161 s = PL_parser->bufptr;
1162 if (ptr < s || ptr > PL_parser->bufend)
1163 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1164 for (; s != ptr; s++)
1165 if (*s == '\n') {
1166 CopLINE_inc(PL_curcop);
1167 PL_parser->linestart = s+1;
1168 }
1169 PL_parser->bufptr = ptr;
1170}
1171
1172/*
1173=for apidoc Amx|void|lex_discard_to|char *ptr
1174
1175Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1176up to I<ptr>. The remaining content of the buffer will be moved, and
1177all pointers into the buffer updated appropriately. I<ptr> must not
1178be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1179it is not permitted to discard text that has yet to be lexed.
1180
1181Normally it is not necessarily to do this directly, because it suffices to
1182use the implicit discarding behaviour of L</lex_next_chunk> and things
1183based on it. However, if a token stretches across multiple lines,
1f317c95 1184and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1185that purpose, then after completion of the token it would be wise to
1186explicitly discard the now-unneeded earlier lines, to avoid future
1187multi-line tokens growing the buffer without bound.
1188
1189=cut
1190*/
1191
1192void
1193Perl_lex_discard_to(pTHX_ char *ptr)
1194{
1195 char *buf;
1196 STRLEN discard_len;
1197 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1198 buf = SvPVX(PL_parser->linestr);
1199 if (ptr < buf)
1200 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1201 if (ptr == buf)
1202 return;
1203 if (ptr > PL_parser->bufptr)
1204 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1205 discard_len = ptr - buf;
1206 if (PL_parser->oldbufptr < ptr)
1207 PL_parser->oldbufptr = ptr;
1208 if (PL_parser->oldoldbufptr < ptr)
1209 PL_parser->oldoldbufptr = ptr;
1210 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1211 PL_parser->last_uni = NULL;
1212 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1213 PL_parser->last_lop = NULL;
1214 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1215 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1216 PL_parser->bufend -= discard_len;
1217 PL_parser->bufptr -= discard_len;
1218 PL_parser->oldbufptr -= discard_len;
1219 PL_parser->oldoldbufptr -= discard_len;
1220 if (PL_parser->last_uni)
1221 PL_parser->last_uni -= discard_len;
1222 if (PL_parser->last_lop)
1223 PL_parser->last_lop -= discard_len;
1224}
1225
1226/*
1227=for apidoc Amx|bool|lex_next_chunk|U32 flags
1228
1229Reads in the next chunk of text to be lexed, appending it to
1230L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1231looked to the end of the current chunk and wants to know more. It is
1232usual, but not necessary, for lexing to have consumed the entirety of
1233the current chunk at this time.
1234
1235If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1236chunk (i.e., the current chunk has been entirely consumed), normally the
1237current chunk will be discarded at the same time that the new chunk is
1238read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1239will not be discarded. If the current chunk has not been entirely
1240consumed, then it will not be discarded regardless of the flag.
1241
1242Returns true if some new text was added to the buffer, or false if the
1243buffer has reached the end of the input text.
1244
1245=cut
1246*/
1247
1248#define LEX_FAKE_EOF 0x80000000
1249
1250bool
1251Perl_lex_next_chunk(pTHX_ U32 flags)
1252{
1253 SV *linestr;
1254 char *buf;
1255 STRLEN old_bufend_pos, new_bufend_pos;
1256 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1257 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1258 bool got_some_for_debugger = 0;
f0e67a1d
Z
1259 bool got_some;
1260 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1261 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1262 linestr = PL_parser->linestr;
1263 buf = SvPVX(linestr);
1264 if (!(flags & LEX_KEEP_PREVIOUS) &&
1265 PL_parser->bufptr == PL_parser->bufend) {
1266 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1267 linestart_pos = 0;
1268 if (PL_parser->last_uni != PL_parser->bufend)
1269 PL_parser->last_uni = NULL;
1270 if (PL_parser->last_lop != PL_parser->bufend)
1271 PL_parser->last_lop = NULL;
1272 last_uni_pos = last_lop_pos = 0;
1273 *buf = 0;
1274 SvCUR(linestr) = 0;
1275 } else {
1276 old_bufend_pos = PL_parser->bufend - buf;
1277 bufptr_pos = PL_parser->bufptr - buf;
1278 oldbufptr_pos = PL_parser->oldbufptr - buf;
1279 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1280 linestart_pos = PL_parser->linestart - buf;
1281 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1282 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1283 }
1284 if (flags & LEX_FAKE_EOF) {
1285 goto eof;
60d63348 1286 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1287 got_some = 0;
1288 } else if (filter_gets(linestr, old_bufend_pos)) {
1289 got_some = 1;
17cc9359 1290 got_some_for_debugger = 1;
f0e67a1d 1291 } else {
580561a3
Z
1292 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1293 sv_setpvs(linestr, "");
f0e67a1d
Z
1294 eof:
1295 /* End of real input. Close filehandle (unless it was STDIN),
1296 * then add implicit termination.
1297 */
87606032 1298 if (PL_parser->lex_flags & LEX_DONT_CLOSE_RSFP)
f0e67a1d
Z
1299 PerlIO_clearerr(PL_parser->rsfp);
1300 else if (PL_parser->rsfp)
1301 (void)PerlIO_close(PL_parser->rsfp);
1302 PL_parser->rsfp = NULL;
60d63348 1303 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1304#ifdef PERL_MAD
1305 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1306 PL_faketokens = 1;
1307#endif
1308 if (!PL_in_eval && PL_minus_p) {
1309 sv_catpvs(linestr,
1310 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1311 PL_minus_n = PL_minus_p = 0;
1312 } else if (!PL_in_eval && PL_minus_n) {
1313 sv_catpvs(linestr, /*{*/";}");
1314 PL_minus_n = 0;
1315 } else
1316 sv_catpvs(linestr, ";");
1317 got_some = 1;
1318 }
1319 buf = SvPVX(linestr);
1320 new_bufend_pos = SvCUR(linestr);
1321 PL_parser->bufend = buf + new_bufend_pos;
1322 PL_parser->bufptr = buf + bufptr_pos;
1323 PL_parser->oldbufptr = buf + oldbufptr_pos;
1324 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1325 PL_parser->linestart = buf + linestart_pos;
1326 if (PL_parser->last_uni)
1327 PL_parser->last_uni = buf + last_uni_pos;
1328 if (PL_parser->last_lop)
1329 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1330 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1331 PL_curstash != PL_debstash) {
1332 /* debugger active and we're not compiling the debugger code,
1333 * so store the line into the debugger's array of lines
1334 */
1335 update_debugger_info(NULL, buf+old_bufend_pos,
1336 new_bufend_pos-old_bufend_pos);
1337 }
1338 return got_some;
1339}
1340
1341/*
1342=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1343
1344Looks ahead one (Unicode) character in the text currently being lexed.
1345Returns the codepoint (unsigned integer value) of the next character,
1346or -1 if lexing has reached the end of the input text. To consume the
1347peeked character, use L</lex_read_unichar>.
1348
1349If the next character is in (or extends into) the next chunk of input
1350text, the next chunk will be read in. Normally the current chunk will be
1351discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1352then the current chunk will not be discarded.
1353
1354If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1355is encountered, an exception is generated.
1356
1357=cut
1358*/
1359
1360I32
1361Perl_lex_peek_unichar(pTHX_ U32 flags)
1362{
749123ff 1363 dVAR;
f0e67a1d
Z
1364 char *s, *bufend;
1365 if (flags & ~(LEX_KEEP_PREVIOUS))
1366 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1367 s = PL_parser->bufptr;
1368 bufend = PL_parser->bufend;
1369 if (UTF) {
1370 U8 head;
1371 I32 unichar;
1372 STRLEN len, retlen;
1373 if (s == bufend) {
1374 if (!lex_next_chunk(flags))
1375 return -1;
1376 s = PL_parser->bufptr;
1377 bufend = PL_parser->bufend;
1378 }
1379 head = (U8)*s;
1380 if (!(head & 0x80))
1381 return head;
1382 if (head & 0x40) {
1383 len = PL_utf8skip[head];
1384 while ((STRLEN)(bufend-s) < len) {
1385 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1386 break;
1387 s = PL_parser->bufptr;
1388 bufend = PL_parser->bufend;
1389 }
1390 }
1391 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1392 if (retlen == (STRLEN)-1) {
1393 /* malformed UTF-8 */
1394 ENTER;
1395 SAVESPTR(PL_warnhook);
1396 PL_warnhook = PERL_WARNHOOK_FATAL;
1397 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1398 LEAVE;
1399 }
1400 return unichar;
1401 } else {
1402 if (s == bufend) {
1403 if (!lex_next_chunk(flags))
1404 return -1;
1405 s = PL_parser->bufptr;
1406 }
1407 return (U8)*s;
1408 }
1409}
1410
1411/*
1412=for apidoc Amx|I32|lex_read_unichar|U32 flags
1413
1414Reads the next (Unicode) character in the text currently being lexed.
1415Returns the codepoint (unsigned integer value) of the character read,
1416and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1417if lexing has reached the end of the input text. To non-destructively
1418examine the next character, use L</lex_peek_unichar> instead.
1419
1420If the next character is in (or extends into) the next chunk of input
1421text, the next chunk will be read in. Normally the current chunk will be
1422discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1423then the current chunk will not be discarded.
1424
1425If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1426is encountered, an exception is generated.
1427
1428=cut
1429*/
1430
1431I32
1432Perl_lex_read_unichar(pTHX_ U32 flags)
1433{
1434 I32 c;
1435 if (flags & ~(LEX_KEEP_PREVIOUS))
1436 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1437 c = lex_peek_unichar(flags);
1438 if (c != -1) {
1439 if (c == '\n')
1440 CopLINE_inc(PL_curcop);
d9018cbe
EB
1441 if (UTF)
1442 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1443 else
1444 ++(PL_parser->bufptr);
f0e67a1d
Z
1445 }
1446 return c;
1447}
1448
1449/*
1450=for apidoc Amx|void|lex_read_space|U32 flags
1451
1452Reads optional spaces, in Perl style, in the text currently being
1453lexed. The spaces may include ordinary whitespace characters and
1454Perl-style comments. C<#line> directives are processed if encountered.
1455L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1456at a non-space character (or the end of the input text).
1457
1458If spaces extend into the next chunk of input text, the next chunk will
1459be read in. Normally the current chunk will be discarded at the same
1460time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1461chunk will not be discarded.
1462
1463=cut
1464*/
1465
f0998909
Z
1466#define LEX_NO_NEXT_CHUNK 0x80000000
1467
f0e67a1d
Z
1468void
1469Perl_lex_read_space(pTHX_ U32 flags)
1470{
1471 char *s, *bufend;
1472 bool need_incline = 0;
f0998909 1473 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1474 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1475#ifdef PERL_MAD
1476 if (PL_skipwhite) {
1477 sv_free(PL_skipwhite);
1478 PL_skipwhite = NULL;
1479 }
1480 if (PL_madskills)
1481 PL_skipwhite = newSVpvs("");
1482#endif /* PERL_MAD */
1483 s = PL_parser->bufptr;
1484 bufend = PL_parser->bufend;
1485 while (1) {
1486 char c = *s;
1487 if (c == '#') {
1488 do {
1489 c = *++s;
1490 } while (!(c == '\n' || (c == 0 && s == bufend)));
1491 } else if (c == '\n') {
1492 s++;
1493 PL_parser->linestart = s;
1494 if (s == bufend)
1495 need_incline = 1;
1496 else
1497 incline(s);
1498 } else if (isSPACE(c)) {
1499 s++;
1500 } else if (c == 0 && s == bufend) {
1501 bool got_more;
1502#ifdef PERL_MAD
1503 if (PL_madskills)
1504 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1505#endif /* PERL_MAD */
f0998909
Z
1506 if (flags & LEX_NO_NEXT_CHUNK)
1507 break;
f0e67a1d
Z
1508 PL_parser->bufptr = s;
1509 CopLINE_inc(PL_curcop);
1510 got_more = lex_next_chunk(flags);
1511 CopLINE_dec(PL_curcop);
1512 s = PL_parser->bufptr;
1513 bufend = PL_parser->bufend;
1514 if (!got_more)
1515 break;
1516 if (need_incline && PL_parser->rsfp) {
1517 incline(s);
1518 need_incline = 0;
1519 }
1520 } else {
1521 break;
1522 }
1523 }
1524#ifdef PERL_MAD
1525 if (PL_madskills)
1526 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1527#endif /* PERL_MAD */
1528 PL_parser->bufptr = s;
1529}
1530
1531/*
ffb4593c
NT
1532 * S_incline
1533 * This subroutine has nothing to do with tilting, whether at windmills
1534 * or pinball tables. Its name is short for "increment line". It
57843af0 1535 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1536 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1537 * # line 500 "foo.pm"
1538 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1539 */
1540
76e3520e 1541STATIC void
d9095cec 1542S_incline(pTHX_ const char *s)
463ee0b2 1543{
97aff369 1544 dVAR;
d9095cec
NC
1545 const char *t;
1546 const char *n;
1547 const char *e;
8818d409 1548 line_t line_num;
463ee0b2 1549
7918f24d
NC
1550 PERL_ARGS_ASSERT_INCLINE;
1551
57843af0 1552 CopLINE_inc(PL_curcop);
463ee0b2
LW
1553 if (*s++ != '#')
1554 return;
d4c19fe8
AL
1555 while (SPACE_OR_TAB(*s))
1556 s++;
73659bf1
GS
1557 if (strnEQ(s, "line", 4))
1558 s += 4;
1559 else
1560 return;
084592ab 1561 if (SPACE_OR_TAB(*s))
73659bf1 1562 s++;
4e553d73 1563 else
73659bf1 1564 return;
d4c19fe8
AL
1565 while (SPACE_OR_TAB(*s))
1566 s++;
463ee0b2
LW
1567 if (!isDIGIT(*s))
1568 return;
d4c19fe8 1569
463ee0b2
LW
1570 n = s;
1571 while (isDIGIT(*s))
1572 s++;
07714eb4 1573 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1574 return;
bf4acbe4 1575 while (SPACE_OR_TAB(*s))
463ee0b2 1576 s++;
73659bf1 1577 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1578 s++;
73659bf1
GS
1579 e = t + 1;
1580 }
463ee0b2 1581 else {
c35e046a
AL
1582 t = s;
1583 while (!isSPACE(*t))
1584 t++;
73659bf1 1585 e = t;
463ee0b2 1586 }
bf4acbe4 1587 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1588 e++;
1589 if (*e != '\n' && *e != '\0')
1590 return; /* false alarm */
1591
8818d409
FC
1592 line_num = atoi(n)-1;
1593
f4dd75d9 1594 if (t - s > 0) {
d9095cec 1595 const STRLEN len = t - s;
19bad673
NC
1596 SV *const temp_sv = CopFILESV(PL_curcop);
1597 const char *cf;
1598 STRLEN tmplen;
1599
1600 if (temp_sv) {
1601 cf = SvPVX(temp_sv);
1602 tmplen = SvCUR(temp_sv);
1603 } else {
1604 cf = NULL;
1605 tmplen = 0;
1606 }
1607
d1299d44 1608 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1609 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1610 * to *{"::_<newfilename"} */
44867030
NC
1611 /* However, the long form of evals is only turned on by the
1612 debugger - usually they're "(eval %lu)" */
1613 char smallbuf[128];
1614 char *tmpbuf;
1615 GV **gvp;
d9095cec 1616 STRLEN tmplen2 = len;
798b63bc 1617 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1618 tmpbuf = smallbuf;
1619 else
2ae0db35 1620 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1621 tmpbuf[0] = '_';
1622 tmpbuf[1] = '<';
2ae0db35 1623 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1624 tmplen += 2;
8a5ee598
RGS
1625 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1626 if (gvp) {
44867030
NC
1627 char *tmpbuf2;
1628 GV *gv2;
1629
1630 if (tmplen2 + 2 <= sizeof smallbuf)
1631 tmpbuf2 = smallbuf;
1632 else
1633 Newx(tmpbuf2, tmplen2 + 2, char);
1634
1635 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1636 /* Either they malloc'd it, or we malloc'd it,
1637 so no prefix is present in ours. */
1638 tmpbuf2[0] = '_';
1639 tmpbuf2[1] = '<';
1640 }
1641
1642 memcpy(tmpbuf2 + 2, s, tmplen2);
1643 tmplen2 += 2;
1644
8a5ee598 1645 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1646 if (!isGV(gv2)) {
8a5ee598 1647 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1648 /* adjust ${"::_<newfilename"} to store the new file name */
1649 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1650 /* The line number may differ. If that is the case,
1651 alias the saved lines that are in the array.
1652 Otherwise alias the whole array. */
1653 if (CopLINE(PL_curcop) == line_num) {
1654 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1655 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1656 }
1657 else if (GvAV(*gvp)) {
1658 AV * const av = GvAV(*gvp);
1659 const I32 start = CopLINE(PL_curcop)+1;
1660 I32 items = AvFILLp(av) - start;
1661 if (items > 0) {
1662 AV * const av2 = GvAVn(gv2);
1663 SV **svp = AvARRAY(av) + start;
1664 I32 l = (I32)line_num+1;
1665 while (items--)
1666 av_store(av2, l++, SvREFCNT_inc(*svp++));
1667 }
1668 }
e5527e4b 1669 }
44867030
NC
1670
1671 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1672 }
e66cf94c 1673 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1674 }
05ec9bb3 1675 CopFILE_free(PL_curcop);
d9095cec 1676 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1677 }
8818d409 1678 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1679}
1680
29595ff2 1681#ifdef PERL_MAD
cd81e915 1682/* skip space before PL_thistoken */
29595ff2
NC
1683
1684STATIC char *
1685S_skipspace0(pTHX_ register char *s)
1686{
7918f24d
NC
1687 PERL_ARGS_ASSERT_SKIPSPACE0;
1688
29595ff2
NC
1689 s = skipspace(s);
1690 if (!PL_madskills)
1691 return s;
cd81e915
NC
1692 if (PL_skipwhite) {
1693 if (!PL_thiswhite)
6b29d1f5 1694 PL_thiswhite = newSVpvs("");
cd81e915
NC
1695 sv_catsv(PL_thiswhite, PL_skipwhite);
1696 sv_free(PL_skipwhite);
1697 PL_skipwhite = 0;
1698 }
1699 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1700 return s;
1701}
1702
cd81e915 1703/* skip space after PL_thistoken */
29595ff2
NC
1704
1705STATIC char *
1706S_skipspace1(pTHX_ register char *s)
1707{
d4c19fe8 1708 const char *start = s;
29595ff2
NC
1709 I32 startoff = start - SvPVX(PL_linestr);
1710
7918f24d
NC
1711 PERL_ARGS_ASSERT_SKIPSPACE1;
1712
29595ff2
NC
1713 s = skipspace(s);
1714 if (!PL_madskills)
1715 return s;
1716 start = SvPVX(PL_linestr) + startoff;
cd81e915 1717 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1718 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1719 PL_thistoken = newSVpvn(tstart, start - tstart);
1720 }
1721 PL_realtokenstart = -1;
1722 if (PL_skipwhite) {
1723 if (!PL_nextwhite)
6b29d1f5 1724 PL_nextwhite = newSVpvs("");
cd81e915
NC
1725 sv_catsv(PL_nextwhite, PL_skipwhite);
1726 sv_free(PL_skipwhite);
1727 PL_skipwhite = 0;
29595ff2
NC
1728 }
1729 return s;
1730}
1731
1732STATIC char *
1733S_skipspace2(pTHX_ register char *s, SV **svp)
1734{
c35e046a
AL
1735 char *start;
1736 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1737 const I32 startoff = s - SvPVX(PL_linestr);
1738
7918f24d
NC
1739 PERL_ARGS_ASSERT_SKIPSPACE2;
1740
29595ff2
NC
1741 s = skipspace(s);
1742 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1743 if (!PL_madskills || !svp)
1744 return s;
1745 start = SvPVX(PL_linestr) + startoff;
cd81e915 1746 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1747 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1748 PL_thistoken = newSVpvn(tstart, start - tstart);
1749 PL_realtokenstart = -1;
29595ff2 1750 }
cd81e915 1751 if (PL_skipwhite) {
29595ff2 1752 if (!*svp)
6b29d1f5 1753 *svp = newSVpvs("");
cd81e915
NC
1754 sv_setsv(*svp, PL_skipwhite);
1755 sv_free(PL_skipwhite);
1756 PL_skipwhite = 0;
29595ff2
NC
1757 }
1758
1759 return s;
1760}
1761#endif
1762
80a702cd 1763STATIC void
15f169a1 1764S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1765{
1766 AV *av = CopFILEAVx(PL_curcop);
1767 if (av) {
b9f83d2f 1768 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1769 if (orig_sv)
1770 sv_setsv(sv, orig_sv);
1771 else
1772 sv_setpvn(sv, buf, len);
80a702cd
RGS
1773 (void)SvIOK_on(sv);
1774 SvIV_set(sv, 0);
1775 av_store(av, (I32)CopLINE(PL_curcop), sv);
1776 }
1777}
1778
ffb4593c
NT
1779/*
1780 * S_skipspace
1781 * Called to gobble the appropriate amount and type of whitespace.
1782 * Skips comments as well.
1783 */
1784
76e3520e 1785STATIC char *
cea2e8a9 1786S_skipspace(pTHX_ register char *s)
a687059c 1787{
5db06880 1788#ifdef PERL_MAD
f0e67a1d
Z
1789 char *start = s;
1790#endif /* PERL_MAD */
7918f24d 1791 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1792#ifdef PERL_MAD
cd81e915
NC
1793 if (PL_skipwhite) {
1794 sv_free(PL_skipwhite);
f0e67a1d 1795 PL_skipwhite = NULL;
5db06880 1796 }
f0e67a1d 1797#endif /* PERL_MAD */
3280af22 1798 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1799 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1800 s++;
f0e67a1d
Z
1801 } else {
1802 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1803 PL_bufptr = s;
f0998909
Z
1804 lex_read_space(LEX_KEEP_PREVIOUS |
1805 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1806 LEX_NO_NEXT_CHUNK : 0));
3280af22 1807 s = PL_bufptr;
f0e67a1d
Z
1808 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1809 if (PL_linestart > PL_bufptr)
1810 PL_bufptr = PL_linestart;
1811 return s;
463ee0b2 1812 }
5db06880 1813#ifdef PERL_MAD
f0e67a1d
Z
1814 if (PL_madskills)
1815 PL_skipwhite = newSVpvn(start, s-start);
1816#endif /* PERL_MAD */
5db06880 1817 return s;
a687059c 1818}
378cc40b 1819
ffb4593c
NT
1820/*
1821 * S_check_uni
1822 * Check the unary operators to ensure there's no ambiguity in how they're
1823 * used. An ambiguous piece of code would be:
1824 * rand + 5
1825 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1826 * the +5 is its argument.
1827 */
1828
76e3520e 1829STATIC void
cea2e8a9 1830S_check_uni(pTHX)
ba106d47 1831{
97aff369 1832 dVAR;
d4c19fe8
AL
1833 const char *s;
1834 const char *t;
2f3197b3 1835
3280af22 1836 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1837 return;
3280af22
NIS
1838 while (isSPACE(*PL_last_uni))
1839 PL_last_uni++;
c35e046a
AL
1840 s = PL_last_uni;
1841 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1842 s++;
3280af22 1843 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1844 return;
6136c704 1845
9b387841
NC
1846 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1847 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1848 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1849}
1850
ffb4593c
NT
1851/*
1852 * LOP : macro to build a list operator. Its behaviour has been replaced
1853 * with a subroutine, S_lop() for which LOP is just another name.
1854 */
1855
a0d0e21e
LW
1856#define LOP(f,x) return lop(f,x,s)
1857
ffb4593c
NT
1858/*
1859 * S_lop
1860 * Build a list operator (or something that might be one). The rules:
1861 * - if we have a next token, then it's a list operator [why?]
1862 * - if the next thing is an opening paren, then it's a function
1863 * - else it's a list operator
1864 */
1865
76e3520e 1866STATIC I32
a0be28da 1867S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1868{
97aff369 1869 dVAR;
7918f24d
NC
1870
1871 PERL_ARGS_ASSERT_LOP;
1872
6154021b 1873 pl_yylval.ival = f;
35c8bce7 1874 CLINE;
3280af22
NIS
1875 PL_expect = x;
1876 PL_bufptr = s;
1877 PL_last_lop = PL_oldbufptr;
eb160463 1878 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1879#ifdef PERL_MAD
1880 if (PL_lasttoke)
78cdf107 1881 goto lstop;
5db06880 1882#else
3280af22 1883 if (PL_nexttoke)
78cdf107 1884 goto lstop;
5db06880 1885#endif
79072805 1886 if (*s == '(')
bbf60fe6 1887 return REPORT(FUNC);
29595ff2 1888 s = PEEKSPACE(s);
79072805 1889 if (*s == '(')
bbf60fe6 1890 return REPORT(FUNC);
78cdf107
Z
1891 else {
1892 lstop:
1893 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1894 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1895 return REPORT(LSTOP);
78cdf107 1896 }
79072805
LW
1897}
1898
5db06880
NC
1899#ifdef PERL_MAD
1900 /*
1901 * S_start_force
1902 * Sets up for an eventual force_next(). start_force(0) basically does
1903 * an unshift, while start_force(-1) does a push. yylex removes items
1904 * on the "pop" end.
1905 */
1906
1907STATIC void
1908S_start_force(pTHX_ int where)
1909{
1910 int i;
1911
cd81e915 1912 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1913 where = PL_lasttoke;
cd81e915
NC
1914 assert(PL_curforce < 0 || PL_curforce == where);
1915 if (PL_curforce != where) {
5db06880
NC
1916 for (i = PL_lasttoke; i > where; --i) {
1917 PL_nexttoke[i] = PL_nexttoke[i-1];
1918 }
1919 PL_lasttoke++;
1920 }
cd81e915 1921 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1922 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1923 PL_curforce = where;
1924 if (PL_nextwhite) {
5db06880 1925 if (PL_madskills)
6b29d1f5 1926 curmad('^', newSVpvs(""));
cd81e915 1927 CURMAD('_', PL_nextwhite);
5db06880
NC
1928 }
1929}
1930
1931STATIC void
1932S_curmad(pTHX_ char slot, SV *sv)
1933{
1934 MADPROP **where;
1935
1936 if (!sv)
1937 return;
cd81e915
NC
1938 if (PL_curforce < 0)
1939 where = &PL_thismad;
5db06880 1940 else
cd81e915 1941 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1942
cd81e915 1943 if (PL_faketokens)
76f68e9b 1944 sv_setpvs(sv, "");
5db06880
NC
1945 else {
1946 if (!IN_BYTES) {
1947 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1948 SvUTF8_on(sv);
1949 else if (PL_encoding) {
1950 sv_recode_to_utf8(sv, PL_encoding);
1951 }
1952 }
1953 }
1954
1955 /* keep a slot open for the head of the list? */
1956 if (slot != '_' && *where && (*where)->mad_key == '^') {
1957 (*where)->mad_key = slot;
daba3364 1958 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1959 (*where)->mad_val = (void*)sv;
1960 }
1961 else
1962 addmad(newMADsv(slot, sv), where, 0);
1963}
1964#else
b3f24c00
MHM
1965# define start_force(where) NOOP
1966# define curmad(slot, sv) NOOP
5db06880
NC
1967#endif
1968
ffb4593c
NT
1969/*
1970 * S_force_next
9cbb5ea2 1971 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1972 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1973 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1974 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1975 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1976 */
1977
4e553d73 1978STATIC void
cea2e8a9 1979S_force_next(pTHX_ I32 type)
79072805 1980{
97aff369 1981 dVAR;
704d4215
GG
1982#ifdef DEBUGGING
1983 if (DEBUG_T_TEST) {
1984 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1985 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1986 }
1987#endif
5db06880 1988#ifdef PERL_MAD
cd81e915 1989 if (PL_curforce < 0)
5db06880 1990 start_force(PL_lasttoke);
cd81e915 1991 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1992 if (PL_lex_state != LEX_KNOWNEXT)
1993 PL_lex_defer = PL_lex_state;
1994 PL_lex_state = LEX_KNOWNEXT;
1995 PL_lex_expect = PL_expect;
cd81e915 1996 PL_curforce = -1;
5db06880 1997#else
3280af22
NIS
1998 PL_nexttype[PL_nexttoke] = type;
1999 PL_nexttoke++;
2000 if (PL_lex_state != LEX_KNOWNEXT) {
2001 PL_lex_defer = PL_lex_state;
2002 PL_lex_expect = PL_expect;
2003 PL_lex_state = LEX_KNOWNEXT;
79072805 2004 }
5db06880 2005#endif
79072805
LW
2006}
2007
28ac2b49
Z
2008void
2009Perl_yyunlex(pTHX)
2010{
a7aaec61
Z
2011 int yyc = PL_parser->yychar;
2012 if (yyc != YYEMPTY) {
2013 if (yyc) {
2014 start_force(-1);
2015 NEXTVAL_NEXTTOKE = PL_parser->yylval;
2016 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 2017 PL_lex_allbrackets--;
a7aaec61 2018 PL_lex_brackets--;
78cdf107
Z
2019 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
2020 } else if (yyc == '('/*)*/) {
2021 PL_lex_allbrackets--;
2022 yyc |= (2<<24);
a7aaec61
Z
2023 }
2024 force_next(yyc);
2025 }
28ac2b49
Z
2026 PL_parser->yychar = YYEMPTY;
2027 }
2028}
2029
d0a148a6 2030STATIC SV *
15f169a1 2031S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2032{
97aff369 2033 dVAR;
740cce10 2034 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2035 !IN_BYTES
2036 && UTF
2037 && !is_ascii_string((const U8*)start, len)
740cce10 2038 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2039 return sv;
2040}
2041
ffb4593c
NT
2042/*
2043 * S_force_word
2044 * When the lexer knows the next thing is a word (for instance, it has
2045 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2046 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2047 * lookahead.
ffb4593c
NT
2048 *
2049 * Arguments:
b1b65b59 2050 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2051 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2052 * int check_keyword : if true, Perl checks to make sure the word isn't
2053 * a keyword (do this if the word is a label, e.g. goto FOO)
2054 * int allow_pack : if true, : characters will also be allowed (require,
2055 * use, etc. do this)
9cbb5ea2 2056 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2057 */
2058
76e3520e 2059STATIC char *
cea2e8a9 2060S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2061{
97aff369 2062 dVAR;
463ee0b2
LW
2063 register char *s;
2064 STRLEN len;
4e553d73 2065
7918f24d
NC
2066 PERL_ARGS_ASSERT_FORCE_WORD;
2067
29595ff2 2068 start = SKIPSPACE1(start);
463ee0b2 2069 s = start;
7e2040f0 2070 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2071 (allow_pack && *s == ':') ||
15f0808c 2072 (allow_initial_tick && *s == '\'') )
a0d0e21e 2073 {
3280af22 2074 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2075 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2076 return start;
cd81e915 2077 start_force(PL_curforce);
5db06880
NC
2078 if (PL_madskills)
2079 curmad('X', newSVpvn(start,s-start));
463ee0b2 2080 if (token == METHOD) {
29595ff2 2081 s = SKIPSPACE1(s);
463ee0b2 2082 if (*s == '(')
3280af22 2083 PL_expect = XTERM;
463ee0b2 2084 else {
3280af22 2085 PL_expect = XOPERATOR;
463ee0b2 2086 }
79072805 2087 }
e74e6b3d 2088 if (PL_madskills)
63575281 2089 curmad('g', newSVpvs( "forced" ));
9ded7720 2090 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2091 = (OP*)newSVOP(OP_CONST,0,
2092 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2093 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2094 force_next(token);
2095 }
2096 return s;
2097}
2098
ffb4593c
NT
2099/*
2100 * S_force_ident
9cbb5ea2 2101 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2102 * text only contains the "foo" portion. The first argument is a pointer
2103 * to the "foo", and the second argument is the type symbol to prefix.
2104 * Forces the next token to be a "WORD".
9cbb5ea2 2105 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2106 */
2107
76e3520e 2108STATIC void
bfed75c6 2109S_force_ident(pTHX_ register const char *s, int kind)
79072805 2110{
97aff369 2111 dVAR;
7918f24d
NC
2112
2113 PERL_ARGS_ASSERT_FORCE_IDENT;
2114
c35e046a 2115 if (*s) {
90e5519e 2116 const STRLEN len = strlen(s);
728847b1
BF
2117 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2118 UTF ? SVf_UTF8 : 0));
cd81e915 2119 start_force(PL_curforce);
9ded7720 2120 NEXTVAL_NEXTTOKE.opval = o;
79072805 2121 force_next(WORD);
748a9306 2122 if (kind) {
11343788 2123 o->op_private = OPpCONST_ENTERED;
55497cff 2124 /* XXX see note in pp_entereval() for why we forgo typo
2125 warnings if the symbol must be introduced in an eval.
2126 GSAR 96-10-12 */
90e5519e 2127 gv_fetchpvn_flags(s, len,
728847b1
BF
2128 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2129 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2130 kind == '$' ? SVt_PV :
2131 kind == '@' ? SVt_PVAV :
2132 kind == '%' ? SVt_PVHV :
a0d0e21e 2133 SVt_PVGV
90e5519e 2134 );
748a9306 2135 }
79072805
LW
2136 }
2137}
2138
1571675a
GS
2139NV
2140Perl_str_to_version(pTHX_ SV *sv)
2141{
2142 NV retval = 0.0;
2143 NV nshift = 1.0;
2144 STRLEN len;
cfd0369c 2145 const char *start = SvPV_const(sv,len);
9d4ba2ae 2146 const char * const end = start + len;
504618e9 2147 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2148
2149 PERL_ARGS_ASSERT_STR_TO_VERSION;
2150
1571675a 2151 while (start < end) {
ba210ebe 2152 STRLEN skip;
1571675a
GS
2153 UV n;
2154 if (utf)
9041c2e3 2155 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2156 else {
2157 n = *(U8*)start;
2158 skip = 1;
2159 }
2160 retval += ((NV)n)/nshift;
2161 start += skip;
2162 nshift *= 1000;
2163 }
2164 return retval;
2165}
2166
4e553d73 2167/*
ffb4593c
NT
2168 * S_force_version
2169 * Forces the next token to be a version number.
e759cc13
RGS
2170 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2171 * and if "guessing" is TRUE, then no new token is created (and the caller
2172 * must use an alternative parsing method).
ffb4593c
NT
2173 */
2174
76e3520e 2175STATIC char *
e759cc13 2176S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2177{
97aff369 2178 dVAR;
5f66b61c 2179 OP *version = NULL;
44dcb63b 2180 char *d;
5db06880
NC
2181#ifdef PERL_MAD
2182 I32 startoff = s - SvPVX(PL_linestr);
2183#endif
89bfa8cd 2184
7918f24d
NC
2185 PERL_ARGS_ASSERT_FORCE_VERSION;
2186
29595ff2 2187 s = SKIPSPACE1(s);
89bfa8cd 2188
44dcb63b 2189 d = s;
dd629d5b 2190 if (*d == 'v')
44dcb63b 2191 d++;
44dcb63b 2192 if (isDIGIT(*d)) {
e759cc13
RGS
2193 while (isDIGIT(*d) || *d == '_' || *d == '.')
2194 d++;
5db06880
NC
2195#ifdef PERL_MAD
2196 if (PL_madskills) {
cd81e915 2197 start_force(PL_curforce);
5db06880
NC
2198 curmad('X', newSVpvn(s,d-s));
2199 }
2200#endif
4e4da3ac 2201 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2202 SV *ver;
8d08d9ba 2203#ifdef USE_LOCALE_NUMERIC
909d3787
KW
2204 char *loc = savepv(setlocale(LC_NUMERIC, NULL));
2205 setlocale(LC_NUMERIC, "C");
8d08d9ba 2206#endif
6154021b 2207 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2208#ifdef USE_LOCALE_NUMERIC
2209 setlocale(LC_NUMERIC, loc);
909d3787 2210 Safefree(loc);
8d08d9ba 2211#endif
6154021b 2212 version = pl_yylval.opval;
dd629d5b
GS
2213 ver = cSVOPx(version)->op_sv;
2214 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2215 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2216 SvNV_set(ver, str_to_version(ver));
1571675a 2217 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2218 }
89bfa8cd 2219 }
5db06880
NC
2220 else if (guessing) {
2221#ifdef PERL_MAD
2222 if (PL_madskills) {
cd81e915
NC
2223 sv_free(PL_nextwhite); /* let next token collect whitespace */
2224 PL_nextwhite = 0;
5db06880
NC
2225 s = SvPVX(PL_linestr) + startoff;
2226 }
2227#endif
e759cc13 2228 return s;
5db06880 2229 }
89bfa8cd 2230 }
2231
5db06880
NC
2232#ifdef PERL_MAD
2233 if (PL_madskills && !version) {
cd81e915
NC
2234 sv_free(PL_nextwhite); /* let next token collect whitespace */
2235 PL_nextwhite = 0;
5db06880
NC
2236 s = SvPVX(PL_linestr) + startoff;
2237 }
2238#endif
89bfa8cd 2239 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2240 start_force(PL_curforce);
9ded7720 2241 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2242 force_next(WORD);
89bfa8cd 2243
e759cc13 2244 return s;
89bfa8cd 2245}
2246
ffb4593c 2247/*
91152fc1
DG
2248 * S_force_strict_version
2249 * Forces the next token to be a version number using strict syntax rules.
2250 */
2251
2252STATIC char *
2253S_force_strict_version(pTHX_ char *s)
2254{
2255 dVAR;
2256 OP *version = NULL;
2257#ifdef PERL_MAD
2258 I32 startoff = s - SvPVX(PL_linestr);
2259#endif
2260 const char *errstr = NULL;
2261
2262 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2263
2264 while (isSPACE(*s)) /* leading whitespace */
2265 s++;
2266
2267 if (is_STRICT_VERSION(s,&errstr)) {
2268 SV *ver = newSV(0);
2269 s = (char *)scan_version(s, ver, 0);
2270 version = newSVOP(OP_CONST, 0, ver);
2271 }
4e4da3ac
Z
2272 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2273 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2274 {
91152fc1
DG
2275 PL_bufptr = s;
2276 if (errstr)
2277 yyerror(errstr); /* version required */
2278 return s;
2279 }
2280
2281#ifdef PERL_MAD
2282 if (PL_madskills && !version) {
2283 sv_free(PL_nextwhite); /* let next token collect whitespace */
2284 PL_nextwhite = 0;
2285 s = SvPVX(PL_linestr) + startoff;
2286 }
2287#endif
2288 /* NOTE: The parser sees the package name and the VERSION swapped */
2289 start_force(PL_curforce);
2290 NEXTVAL_NEXTTOKE.opval = version;
2291 force_next(WORD);
2292
2293 return s;
2294}
2295
2296/*
ffb4593c
NT
2297 * S_tokeq
2298 * Tokenize a quoted string passed in as an SV. It finds the next
2299 * chunk, up to end of string or a backslash. It may make a new
2300 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2301 * turns \\ into \.
2302 */
2303
76e3520e 2304STATIC SV *
cea2e8a9 2305S_tokeq(pTHX_ SV *sv)
79072805 2306{
97aff369 2307 dVAR;
79072805
LW
2308 register char *s;
2309 register char *send;
2310 register char *d;
b3ac6de7
IZ
2311 STRLEN len = 0;
2312 SV *pv = sv;
79072805 2313
7918f24d
NC
2314 PERL_ARGS_ASSERT_TOKEQ;
2315
79072805 2316 if (!SvLEN(sv))
b3ac6de7 2317 goto finish;
79072805 2318
a0d0e21e 2319 s = SvPV_force(sv, len);
21a311ee 2320 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2321 goto finish;
463ee0b2 2322 send = s + len;
dcb21ed6
NC
2323 /* This is relying on the SV being "well formed" with a trailing '\0' */
2324 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2325 s++;
2326 if (s == send)
b3ac6de7 2327 goto finish;
79072805 2328 d = s;
be4731d2 2329 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2330 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2331 }
79072805
LW
2332 while (s < send) {
2333 if (*s == '\\') {
a0d0e21e 2334 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2335 s++; /* all that, just for this */
2336 }
2337 *d++ = *s++;
2338 }
2339 *d = '\0';
95a20fc0 2340 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2341 finish:
3280af22 2342 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2343 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2344 return sv;
2345}
2346
ffb4593c
NT
2347/*
2348 * Now come three functions related to double-quote context,
2349 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2350 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2351 * interact with PL_lex_state, and create fake ( ... ) argument lists
2352 * to handle functions and concatenation.
ecd24171
DM
2353 * For example,
2354 * "foo\lbar"
2355 * is tokenised as
2356 * stringify ( const[foo] concat lcfirst ( const[bar] ) )
ffb4593c
NT
2357 */
2358
2359/*
2360 * S_sublex_start
6154021b 2361 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2362 *
2363 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2364 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2365 *
2366 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2367 *
2368 * Everything else becomes a FUNC.
2369 *
2370 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2371 * had an OP_CONST or OP_READLINE). This just sets us up for a
2372 * call to S_sublex_push().
2373 */
2374
76e3520e 2375STATIC I32
cea2e8a9 2376S_sublex_start(pTHX)
79072805 2377{
97aff369 2378 dVAR;
6154021b 2379 register const I32 op_type = pl_yylval.ival;
79072805
LW
2380
2381 if (op_type == OP_NULL) {
6154021b 2382 pl_yylval.opval = PL_lex_op;
5f66b61c 2383 PL_lex_op = NULL;
79072805
LW
2384 return THING;
2385 }
2386 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2387 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2388
2389 if (SvTYPE(sv) == SVt_PVIV) {
2390 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2391 STRLEN len;
96a5add6 2392 const char * const p = SvPV_const(sv, len);
740cce10 2393 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2394 SvREFCNT_dec(sv);
2395 sv = nsv;
4e553d73 2396 }
6154021b 2397 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2398 PL_lex_stuff = NULL;
6f33ba73
RGS
2399 /* Allow <FH> // "foo" */
2400 if (op_type == OP_READLINE)
2401 PL_expect = XTERMORDORDOR;
79072805
LW
2402 return THING;
2403 }
e3f73d4e
RGS
2404 else if (op_type == OP_BACKTICK && PL_lex_op) {
2405 /* readpipe() vas overriden */
2406 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2407 pl_yylval.opval = PL_lex_op;
9b201d7d 2408 PL_lex_op = NULL;
e3f73d4e
RGS
2409 PL_lex_stuff = NULL;
2410 return THING;
2411 }
79072805 2412
3280af22 2413 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2414 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2415 PL_sublex_info.sub_op = PL_lex_op;
2416 PL_lex_state = LEX_INTERPPUSH;
55497cff 2417
3280af22
NIS
2418 PL_expect = XTERM;
2419 if (PL_lex_op) {
6154021b 2420 pl_yylval.opval = PL_lex_op;
5f66b61c 2421 PL_lex_op = NULL;
55497cff 2422 return PMFUNC;
2423 }
2424 else
2425 return FUNC;
2426}
2427
ffb4593c
NT
2428/*
2429 * S_sublex_push
2430 * Create a new scope to save the lexing state. The scope will be
2431 * ended in S_sublex_done. Returns a '(', starting the function arguments
2432 * to the uc, lc, etc. found before.
2433 * Sets PL_lex_state to LEX_INTERPCONCAT.
2434 */
2435
76e3520e 2436STATIC I32
cea2e8a9 2437S_sublex_push(pTHX)
55497cff 2438{
27da23d5 2439 dVAR;
f46d017c 2440 ENTER;
55497cff 2441
3280af22 2442 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2443 SAVEBOOL(PL_lex_dojoin);
3280af22 2444 SAVEI32(PL_lex_brackets);
78cdf107
Z
2445 SAVEI32(PL_lex_allbrackets);
2446 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2447 SAVEI32(PL_lex_casemods);
2448 SAVEI32(PL_lex_starts);
651b5b28 2449 SAVEI8(PL_lex_state);
9da1dd8f 2450 SAVEPPTR(PL_sublex_info.re_eval_start);
7766f137 2451 SAVEVPTR(PL_lex_inpat);
98246f1e 2452 SAVEI16(PL_lex_inwhat);
57843af0 2453 SAVECOPLINE(PL_curcop);
3280af22 2454 SAVEPPTR(PL_bufptr);
8452ff4b 2455 SAVEPPTR(PL_bufend);
3280af22
NIS
2456 SAVEPPTR(PL_oldbufptr);
2457 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2458 SAVEPPTR(PL_last_lop);
2459 SAVEPPTR(PL_last_uni);
3280af22
NIS
2460 SAVEPPTR(PL_linestart);
2461 SAVESPTR(PL_linestr);
8edd5f42
RGS
2462 SAVEGENERICPV(PL_lex_brackstack);
2463 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2464
2465 PL_linestr = PL_lex_stuff;
a0714e2c 2466 PL_lex_stuff = NULL;
9da1dd8f 2467 PL_sublex_info.re_eval_start = NULL;
3280af22 2468
9cbb5ea2
GS
2469 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2470 = SvPVX(PL_linestr);
3280af22 2471 PL_bufend += SvCUR(PL_linestr);
bd61b366 2472 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2473 SAVEFREESV(PL_linestr);
2474
2475 PL_lex_dojoin = FALSE;
2476 PL_lex_brackets = 0;
78cdf107
Z
2477 PL_lex_allbrackets = 0;
2478 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2479 Newx(PL_lex_brackstack, 120, char);
2480 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2481 PL_lex_casemods = 0;
2482 *PL_lex_casestack = '\0';
2483 PL_lex_starts = 0;
2484 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2485 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2486
2487 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2488 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2489 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2490 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2491 else
5f66b61c 2492 PL_lex_inpat = NULL;
79072805 2493
55497cff 2494 return '(';
79072805
LW
2495}
2496
ffb4593c
NT
2497/*
2498 * S_sublex_done
2499 * Restores lexer state after a S_sublex_push.
2500 */
2501
76e3520e 2502STATIC I32
cea2e8a9 2503S_sublex_done(pTHX)
79072805 2504{
27da23d5 2505 dVAR;
3280af22 2506 if (!PL_lex_starts++) {
396482e1 2507 SV * const sv = newSVpvs("");
9aa983d2
JH
2508 if (SvUTF8(PL_linestr))
2509 SvUTF8_on(sv);
3280af22 2510 PL_expect = XOPERATOR;
6154021b 2511 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2512 return THING;
2513 }
2514
3280af22
NIS
2515 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2516 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2517 return yylex();
79072805
LW
2518 }
2519
ffb4593c 2520 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2521 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2522 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2523 PL_linestr = PL_lex_repl;
2524 PL_lex_inpat = 0;
2525 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2526 PL_bufend += SvCUR(PL_linestr);
bd61b366 2527 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2528 SAVEFREESV(PL_linestr);
2529 PL_lex_dojoin = FALSE;
2530 PL_lex_brackets = 0;
78cdf107
Z
2531 PL_lex_allbrackets = 0;
2532 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2533 PL_lex_casemods = 0;
2534 *PL_lex_casestack = '\0';
2535 PL_lex_starts = 0;
25da4f38 2536 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2537 PL_lex_state = LEX_INTERPNORMAL;
2538 PL_lex_starts++;
e9fa98b2
HS
2539 /* we don't clear PL_lex_repl here, so that we can check later
2540 whether this is an evalled subst; that means we rely on the
2541 logic to ensure sublex_done() is called again only via the
2542 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2543 }
e9fa98b2 2544 else {
3280af22 2545 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2546 PL_lex_repl = NULL;
e9fa98b2 2547 }
79072805 2548 return ',';
ffed7fef
LW
2549 }
2550 else {
5db06880
NC
2551#ifdef PERL_MAD
2552 if (PL_madskills) {
cd81e915
NC
2553 if (PL_thiswhite) {
2554 if (!PL_endwhite)
6b29d1f5 2555 PL_endwhite = newSVpvs("");
cd81e915
NC
2556 sv_catsv(PL_endwhite, PL_thiswhite);
2557 PL_thiswhite = 0;
2558 }
2559 if (PL_thistoken)
76f68e9b 2560 sv_setpvs(PL_thistoken,"");
5db06880 2561 else
cd81e915 2562 PL_realtokenstart = -1;
5db06880
NC
2563 }
2564#endif
f46d017c 2565 LEAVE;
3280af22
NIS
2566 PL_bufend = SvPVX(PL_linestr);
2567 PL_bufend += SvCUR(PL_linestr);
2568 PL_expect = XOPERATOR;
09bef843 2569 PL_sublex_info.sub_inwhat = 0;
79072805 2570 return ')';
ffed7fef
LW
2571 }
2572}
2573
02aa26ce
NT
2574/*
2575 scan_const
2576
9da1dd8f
DM
2577 Extracts the next constant part of a pattern, double-quoted string,
2578 or transliteration. This is terrifying code.
2579
2580 For example, in parsing the double-quoted string "ab\x63$d", it would
2581 stop at the '$' and return an OP_CONST containing 'abc'.
02aa26ce 2582
94def140 2583 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2584 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2585 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2586
94def140
TS
2587 Returns a pointer to the character scanned up to. If this is
2588 advanced from the start pointer supplied (i.e. if anything was
9da1dd8f 2589 successfully parsed), will leave an OP_CONST for the substring scanned
6154021b 2590 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2591 by looking at the next characters herself.
2592
02aa26ce 2593 In patterns:
9da1dd8f
DM
2594 expand:
2595 \N{ABC} => \N{U+41.42.43}
2596
2597 pass through:
2598 all other \-char, including \N and \N{ apart from \N{ABC}
2599
2600 stops on:
2601 @ and $ where it appears to be a var, but not for $ as tail anchor
2602 \l \L \u \U \Q \E
2603 (?{ or (??{
2604
02aa26ce
NT
2605
2606 In transliterations:
2607 characters are VERY literal, except for - not at the start or end
94def140
TS
2608 of the string, which indicates a range. If the range is in bytes,
2609 scan_const expands the range to the full set of intermediate
2610 characters. If the range is in utf8, the hyphen is replaced with
2611 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2612
2613 In double-quoted strings:
2614 backslashes:
2615 double-quoted style: \r and \n
ff3f963a 2616 constants: \x31, etc.
94def140 2617 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2618 case and quoting: \U \Q \E
2619 stops on @ and $
2620
2621 scan_const does *not* construct ops to handle interpolated strings.
2622 It stops processing as soon as it finds an embedded $ or @ variable
2623 and leaves it to the caller to work out what's going on.
2624
94def140
TS
2625 embedded arrays (whether in pattern or not) could be:
2626 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2627
2628 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2629
2630 $ in pattern could be $foo or could be tail anchor. Assumption:
2631 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2632 followed by one of "()| \r\n\t"
02aa26ce 2633
9da1dd8f 2634 \1 (backreferences) are turned into $1 in substitutions
02aa26ce
NT
2635
2636 The structure of the code is
2637 while (there's a character to process) {
94def140
TS
2638 handle transliteration ranges
2639 skip regexp comments /(?#comment)/ and codes /(?{code})/
2640 skip #-initiated comments in //x patterns
2641 check for embedded arrays
02aa26ce
NT
2642 check for embedded scalars
2643 if (backslash) {
94def140 2644 deprecate \1 in substitution replacements
02aa26ce
NT
2645 handle string-changing backslashes \l \U \Q \E, etc.
2646 switch (what was escaped) {
94def140 2647 handle \- in a transliteration (becomes a literal -)
ff3f963a 2648 if a pattern and not \N{, go treat as regular character
94def140
TS
2649 handle \132 (octal characters)
2650 handle \x15 and \x{1234} (hex characters)
ff3f963a 2651 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2652 handle \cV (control characters)
2653 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2654 } (end switch)
77a135fe 2655 continue
02aa26ce 2656 } (end if backslash)
77a135fe 2657 handle regular character
02aa26ce 2658 } (end while character to read)
4e553d73 2659
02aa26ce
NT
2660*/
2661
76e3520e 2662STATIC char *
cea2e8a9 2663S_scan_const(pTHX_ char *start)
79072805 2664{
97aff369 2665 dVAR;
3280af22 2666 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2667 SV *sv = newSV(send - start); /* sv for the constant. See
2668 note below on sizing. */
02aa26ce
NT
2669 register char *s = start; /* start of the constant */
2670 register char *d = SvPVX(sv); /* destination for copies */
2671 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2672 bool didrange = FALSE; /* did we just finish a range? */
2866decb 2673 bool in_charclass = FALSE; /* within /[...]/ */
b953e60c
KW
2674 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2675 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2676 to be UTF8? But, this can
2677 show as true when the source
2678 isn't utf8, as for example
2679 when it is entirely composed
2680 of hex constants */
2681
2682 /* Note on sizing: The scanned constant is placed into sv, which is
2683 * initialized by newSV() assuming one byte of output for every byte of
2684 * input. This routine expects newSV() to allocate an extra byte for a
2685 * trailing NUL, which this routine will append if it gets to the end of
2686 * the input. There may be more bytes of input than output (eg., \N{LATIN
2687 * CAPITAL LETTER A}), or more output than input if the constant ends up
2688 * recoded to utf8, but each time a construct is found that might increase
2689 * the needed size, SvGROW() is called. Its size parameter each time is
2690 * based on the best guess estimate at the time, namely the length used so
2691 * far, plus the length the current construct will occupy, plus room for
2692 * the trailing NUL, plus one byte for every input byte still unscanned */
2693
012bcf8d 2694 UV uv;
4c3a8340
TS
2695#ifdef EBCDIC
2696 UV literal_endpoint = 0;
e294cc5d 2697 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2698#endif
012bcf8d 2699
7918f24d
NC
2700 PERL_ARGS_ASSERT_SCAN_CONST;
2701
bb16bae8 2702 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2703 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2704 /* If we are doing a trans and we know we want UTF8 set expectation */
2705 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2706 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2707 }
2708
2709
79072805 2710 while (s < send || dorange) {
ff3f963a 2711
02aa26ce 2712 /* get transliterations out of the way (they're most literal) */
3280af22 2713 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2714 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2715 if (dorange) {
1ba5c669
JH
2716 I32 i; /* current expanded character */
2717 I32 min; /* first character in range */
2718 I32 max; /* last character in range */
02aa26ce 2719
e294cc5d
JH
2720#ifdef EBCDIC
2721 UV uvmax = 0;
2722#endif
2723
2724 if (has_utf8
2725#ifdef EBCDIC
2726 && !native_range
2727#endif
2728 ) {
9d4ba2ae 2729 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2730 char *e = d++;
2731 while (e-- > c)
2732 *(e + 1) = *e;
25716404 2733 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2734 /* mark the range as done, and continue */
2735 dorange = FALSE;
2736 didrange = TRUE;
2737 continue;
2738 }
2b9d42f0 2739
95a20fc0 2740 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2741#ifdef EBCDIC
2742 SvGROW(sv,
2743 SvLEN(sv) + (has_utf8 ?
2744 (512 - UTF_CONTINUATION_MARK +
2745 UNISKIP(0x100))
2746 : 256));
2747 /* How many two-byte within 0..255: 128 in UTF-8,
2748 * 96 in UTF-8-mod. */
2749#else
9cbb5ea2 2750 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2751#endif
9cbb5ea2 2752 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2753#ifdef EBCDIC
2754 if (has_utf8) {
2755 int j;
2756 for (j = 0; j <= 1; j++) {
2757 char * const c = (char*)utf8_hop((U8*)d, -1);
2758 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2759 if (j)
2760 min = (U8)uv;
2761 else if (uv < 256)
2762 max = (U8)uv;
2763 else {
2764 max = (U8)0xff; /* only to \xff */
2765 uvmax = uv; /* \x{100} to uvmax */
2766 }
2767 d = c; /* eat endpoint chars */
2768 }
2769 }
2770 else {
2771#endif
2772 d -= 2; /* eat the first char and the - */
2773 min = (U8)*d; /* first char in range */
2774 max = (U8)d[1]; /* last char in range */
2775#ifdef EBCDIC
2776 }
2777#endif
8ada0baa 2778
c2e66d9e 2779 if (min > max) {
01ec43d0 2780 Perl_croak(aTHX_
d1573ac7 2781 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2782 (char)min, (char)max);
c2e66d9e
GS
2783 }
2784
c7f1f016 2785#ifdef EBCDIC
4c3a8340
TS
2786 if (literal_endpoint == 2 &&
2787 ((isLOWER(min) && isLOWER(max)) ||
2788 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2789 if (isLOWER(min)) {
2790 for (i = min; i <= max; i++)
2791 if (isLOWER(i))
db42d148 2792 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2793 } else {
2794 for (i = min; i <= max; i++)
2795 if (isUPPER(i))
db42d148 2796 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2797 }
2798 }
2799 else
2800#endif
2801 for (i = min; i <= max; i++)
e294cc5d
JH
2802#ifdef EBCDIC
2803 if (has_utf8) {
2804 const U8 ch = (U8)NATIVE_TO_UTF(i);
2805 if (UNI_IS_INVARIANT(ch))
2806 *d++ = (U8)i;
2807 else {
2808 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2809 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2810 }
2811 }
2812 else
2813#endif
2814 *d++ = (char)i;
2815
2816#ifdef EBCDIC
2817 if (uvmax) {
2818 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2819 if (uvmax > 0x101)
2820 *d++ = (char)UTF_TO_NATIVE(0xff);
2821 if (uvmax > 0x100)
2822 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2823 }
2824#endif
02aa26ce
NT
2825
2826 /* mark the range as done, and continue */
79072805 2827 dorange = FALSE;
01ec43d0 2828 didrange = TRUE;
4c3a8340
TS
2829#ifdef EBCDIC
2830 literal_endpoint = 0;
2831#endif
79072805 2832 continue;
4e553d73 2833 }
02aa26ce
NT
2834
2835 /* range begins (ignore - as first or last char) */
79072805 2836 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2837 if (didrange) {
1fafa243 2838 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2839 }
e294cc5d
JH
2840 if (has_utf8
2841#ifdef EBCDIC
2842 && !native_range
2843#endif
2844 ) {
25716404 2845 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2846 s++;
2847 continue;
2848 }
79072805
LW
2849 dorange = TRUE;
2850 s++;
01ec43d0
GS
2851 }
2852 else {
2853 didrange = FALSE;
4c3a8340
TS
2854#ifdef EBCDIC
2855 literal_endpoint = 0;
e294cc5d 2856 native_range = TRUE;
4c3a8340 2857#endif
01ec43d0 2858 }
79072805 2859 }
02aa26ce
NT
2860
2861 /* if we get here, we're not doing a transliteration */
2862
e4a2df84
DM
2863 else if (*s == '[' && PL_lex_inpat && !in_charclass) {
2864 char *s1 = s-1;
2865 int esc = 0;
2866 while (s1 >= start && *s1-- == '\\')
2867 esc = !esc;
2868 if (!esc)
2869 in_charclass = TRUE;
2870 }
2866decb 2871
e4a2df84
DM
2872 else if (*s == ']' && PL_lex_inpat && in_charclass) {
2873 char *s1 = s-1;
2874 int esc = 0;
2875 while (s1 >= start && *s1-- == '\\')
2876 esc = !esc;
2877 if (!esc)
2878 in_charclass = FALSE;
2879 }
2866decb 2880
9da1dd8f
DM
2881 /* skip for regexp comments /(?#comment)/, except for the last
2882 * char, which will be done separately.
2883 * Stop on (?{..}) and friends */
2884
3280af22 2885 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2886 if (s[2] == '#') {
e994fd66 2887 while (s+1 < send && *s != ')')
db42d148 2888 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94 2889 }
2866decb 2890 else if (!PL_lex_casemods && !in_charclass &&
d3cec5e5
DM
2891 ( s[2] == '{' /* This should match regcomp.c */
2892 || (s[2] == '?' && s[3] == '{')))
155aba94 2893 {
9da1dd8f 2894 break;
cc6b7395 2895 }
748a9306 2896 }
02aa26ce
NT
2897
2898 /* likewise skip #-initiated comments in //x patterns */
3280af22 2899 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2900 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2901 while (s+1 < send && *s != '\n')
db42d148 2902 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2903 }
02aa26ce 2904
9da1dd8f
DM
2905 /* no further processing of single-quoted regex */
2906 else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'')
2907 goto default_action;
2908
5d1d4326 2909 /* check for embedded arrays
da6eedaa 2910 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2911 */
1749ea0d
TS
2912 else if (*s == '@' && s[1]) {
2913 if (isALNUM_lazy_if(s+1,UTF))
2914 break;
2915 if (strchr(":'{$", s[1]))
2916 break;
2917 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2918 break; /* in regexp, neither @+ nor @- are interpolated */
2919 }
02aa26ce
NT
2920
2921 /* check for embedded scalars. only stop if we're sure it's a
2922 variable.
2923 */
79072805 2924 else if (*s == '$') {
3280af22 2925 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2926 break;
77772344 2927 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2928 if (s[1] == '\\') {
2929 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2930 "Possible unintended interpolation of $\\ in regex");
77772344 2931 }
79072805 2932 break; /* in regexp, $ might be tail anchor */
77772344 2933 }
79072805 2934 }
02aa26ce 2935
2b9d42f0
NIS
2936 /* End of else if chain - OP_TRANS rejoin rest */
2937
02aa26ce 2938 /* backslashes */
79072805 2939 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2940 char* e; /* Can be used for ending '}', etc. */
2941
79072805 2942 s++;
02aa26ce 2943
7d0fc23c
KW
2944 /* warn on \1 - \9 in substitution replacements, but note that \11
2945 * is an octal; and \19 is \1 followed by '9' */
3280af22 2946 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2947 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2948 {
a2a5de95 2949 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2950 *--s = '$';
2951 break;
2952 }
02aa26ce
NT
2953
2954 /* string-change backslash escapes */
838f2281 2955 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQF", *s)) {
79072805
LW
2956 --s;
2957 break;
2958 }
ff3f963a
KW
2959 /* In a pattern, process \N, but skip any other backslash escapes.
2960 * This is because we don't want to translate an escape sequence
2961 * into a meta symbol and have the regex compiler use the meta
2962 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2963 * in spite of this, we do have to process \N here while the proper
2964 * charnames handler is in scope. See bugs #56444 and #62056.
2965 * There is a complication because \N in a pattern may also stand
2966 * for 'match a non-nl', and not mean a charname, in which case its
2967 * processing should be deferred to the regex compiler. To be a
2968 * charname it must be followed immediately by a '{', and not look
2969 * like \N followed by a curly quantifier, i.e., not something like
2970 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2971 * quantifier */
2972 else if (PL_lex_inpat
2973 && (*s != 'N'
2974 || s[1] != '{'
2975 || regcurly(s + 1)))
2976 {
cc74c5bd
TS
2977 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2978 goto default_action;
2979 }
02aa26ce 2980
79072805 2981 switch (*s) {
02aa26ce
NT
2982
2983 /* quoted - in transliterations */
79072805 2984 case '-':
3280af22 2985 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2986 *d++ = *s++;
2987 continue;
2988 }
2989 /* FALL THROUGH */
2990 default:
11b8faa4 2991 {
e4ca4584 2992 if ((isALNUMC(*s)))
a2a5de95
NC
2993 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2994 "Unrecognized escape \\%c passed through",
2995 *s);
11b8faa4 2996 /* default action is to copy the quoted character */
f9a63242 2997 goto default_action;
11b8faa4 2998 }
02aa26ce 2999
632403cc 3000 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
3001 case '0': case '1': case '2': case '3':
3002 case '4': case '5': case '6': case '7':
ba210ebe 3003 {
53305cf1
NC
3004 I32 flags = 0;
3005 STRLEN len = 3;
77a135fe 3006 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
3007 s += len;
3008 }
012bcf8d 3009 goto NUM_ESCAPE_INSERT;
02aa26ce 3010
f0a2b745
KW
3011 /* eg. \o{24} indicates the octal constant \024 */
3012 case 'o':
3013 {
3014 STRLEN len;
454155d9 3015 const char* error;
f0a2b745 3016
454155d9 3017 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 3018 s += len;
454155d9 3019 if (! valid) {
f0a2b745
KW
3020 yyerror(error);
3021 continue;
3022 }
3023 goto NUM_ESCAPE_INSERT;
3024 }
3025
77a135fe 3026 /* eg. \x24 indicates the hex constant 0x24 */
79072805 3027 case 'x':
a0481293 3028 {
53305cf1 3029 STRLEN len;
a0481293 3030 const char* error;
355860ce 3031
a0481293
KW
3032 bool valid = grok_bslash_x(s, &uv, &len, &error, 1);
3033 s += len;
3034 if (! valid) {
3035 yyerror(error);
355860ce 3036 continue;
ba210ebe 3037 }
012bcf8d
GS
3038 }
3039
3040 NUM_ESCAPE_INSERT:
ff3f963a
KW
3041 /* Insert oct or hex escaped character. There will always be
3042 * enough room in sv since such escapes will be longer than any
3043 * UTF-8 sequence they can end up as, except if they force us
3044 * to recode the rest of the string into utf8 */
ba7cea30 3045
77a135fe 3046 /* Here uv is the ordinal of the next character being added in
ff3f963a 3047 * unicode (converted from native). */
77a135fe 3048 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3049 if (!has_utf8 && uv > 255) {
77a135fe
KW
3050 /* Might need to recode whatever we have accumulated so
3051 * far if it contains any chars variant in utf8 or
3052 * utf-ebcdic. */
3053
3054 SvCUR_set(sv, d - SvPVX_const(sv));
3055 SvPOK_on(sv);
3056 *d = '\0';
77a135fe 3057 /* See Note on sizing above. */
7bf79863
KW
3058 sv_utf8_upgrade_flags_grow(sv,
3059 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3060 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3061 d = SvPVX(sv) + SvCUR(sv);
3062 has_utf8 = TRUE;
012bcf8d
GS
3063 }
3064
77a135fe
KW
3065 if (has_utf8) {
3066 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3067 if (PL_lex_inwhat == OP_TRANS &&
3068 PL_sublex_info.sub_op) {
3069 PL_sublex_info.sub_op->op_private |=
3070 (PL_lex_repl ? OPpTRANS_FROM_UTF
3071 : OPpTRANS_TO_UTF);
f9a63242 3072 }
e294cc5d
JH
3073#ifdef EBCDIC
3074 if (uv > 255 && !dorange)
3075 native_range = FALSE;
3076#endif
012bcf8d 3077 }
a0ed51b3 3078 else {
012bcf8d 3079 *d++ = (char)uv;
a0ed51b3 3080 }
012bcf8d
GS
3081 }
3082 else {
c4d5f83a 3083 *d++ = (char) uv;
a0ed51b3 3084 }
79072805 3085 continue;
02aa26ce 3086
4a2d328f 3087 case 'N':
ff3f963a
KW
3088 /* In a non-pattern \N must be a named character, like \N{LATIN
3089 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3090 * mean to match a non-newline. For non-patterns, named
3091 * characters are converted to their string equivalents. In
3092 * patterns, named characters are not converted to their
3093 * ultimate forms for the same reasons that other escapes
3094 * aren't. Instead, they are converted to the \N{U+...} form
3095 * to get the value from the charnames that is in effect right
3096 * now, while preserving the fact that it was a named character
3097 * so that the regex compiler knows this */
3098
3099 /* This section of code doesn't generally use the
3100 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3101 * a close examination of this macro and determined it is a
3102 * no-op except on utfebcdic variant characters. Every
3103 * character generated by this that would normally need to be
3104 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3105 * needed, and would complicate use of copy(). XXX There are
3106 * other parts of this file where the macro is used
3107 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3108
3109 /* The structure of this section of code (besides checking for
3110 * errors and upgrading to utf8) is:
3111 * Further disambiguate between the two meanings of \N, and if
3112 * not a charname, go process it elsewhere
0a96133f
KW
3113 * If of form \N{U+...}, pass it through if a pattern;
3114 * otherwise convert to utf8
3115 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3116 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3117
3118 /* Here, s points to the 'N'; the test below is guaranteed to
3119 * succeed if we are being called on a pattern as we already
3120 * know from a test above that the next character is a '{'.
3121 * On a non-pattern \N must mean 'named sequence, which
3122 * requires braces */
3123 s++;
3124 if (*s != '{') {
3125 yyerror("Missing braces on \\N{}");
3126 continue;
3127 }
3128 s++;
3129
0a96133f 3130 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3131 if (! (e = strchr(s, '}'))) {
3132 if (! PL_lex_inpat) {
5777a3f7 3133 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3134 } else {
3135 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3136 }
0a96133f 3137 continue;
ff3f963a 3138 }
cddc7ef4 3139
ff3f963a 3140 /* Here it looks like a named character */
cddc7ef4 3141
ff3f963a
KW
3142 if (PL_lex_inpat) {
3143
3144 /* XXX This block is temporary code. \N{} implies that the
3145 * pattern is to have Unicode semantics, and therefore
3146 * currently has to be encoded in utf8. By putting it in
3147 * utf8 now, we save a whole pass in the regular expression
3148 * compiler. Once that code is changed so Unicode
3149 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3150 * block should be removed. However, the code that parses
3151 * the output of this would have to be changed to not
3152 * necessarily expect utf8 */
ff3f963a 3153 if (!has_utf8) {
77a135fe 3154 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3155 SvPOK_on(sv);
e4f3eed8 3156 *d = '\0';
77a135fe 3157 /* See Note on sizing above. */
7bf79863 3158 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3159 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3160 /* 5 = '\N{' + cur char + NUL */
3161 (STRLEN)(send - s) + 5);
f08d6ad9 3162 d = SvPVX(sv) + SvCUR(sv);
89491803 3163 has_utf8 = TRUE;
ff3f963a
KW
3164 }
3165 }
423cee85 3166
ff3f963a
KW
3167 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3168 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3169 | PERL_SCAN_DISALLOW_PREFIX;
3170 STRLEN len;
3171
3172 /* For \N{U+...}, the '...' is a unicode value even on
3173 * EBCDIC machines */
3174 s += 2; /* Skip to next char after the 'U+' */
3175 len = e - s;
3176 uv = grok_hex(s, &len, &flags, NULL);
3177 if (len == 0 || len != (STRLEN)(e - s)) {
3178 yyerror("Invalid hexadecimal number in \\N{U+...}");
3179 s = e + 1;
3180 continue;
3181 }
3182
3183 if (PL_lex_inpat) {
3184
e2a7e165
KW
3185 /* On non-EBCDIC platforms, pass through to the regex
3186 * compiler unchanged. The reason we evaluated the
3187 * number above is to make sure there wasn't a syntax
3188 * error. But on EBCDIC we convert to native so
3189 * downstream code can continue to assume it's native
3190 */
ff3f963a 3191 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3192#ifdef EBCDIC
3193 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3194 and the \0 */
3195 "\\N{U+%X}",
3196 (unsigned int) UNI_TO_NATIVE(uv));
3197#else
ff3f963a
KW
3198 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3199 d += e - s + 1;
e2a7e165 3200#endif
ff3f963a
KW
3201 }
3202 else { /* Not a pattern: convert the hex to string */
3203
3204 /* If destination is not in utf8, unconditionally
3205 * recode it to be so. This is because \N{} implies
3206 * Unicode semantics, and scalars have to be in utf8
3207 * to guarantee those semantics */
3208 if (! has_utf8) {
3209 SvCUR_set(sv, d - SvPVX_const(sv));
3210 SvPOK_on(sv);
3211 *d = '\0';
3212 /* See Note on sizing above. */
3213 sv_utf8_upgrade_flags_grow(
3214 sv,
3215 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3216 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3217 d = SvPVX(sv) + SvCUR(sv);
3218 has_utf8 = TRUE;
3219 }
3220
3221 /* Add the string to the output */
3222 if (UNI_IS_INVARIANT(uv)) {
3223 *d++ = (char) uv;
3224 }
3225 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3226 }
3227 }
3228 else { /* Here is \N{NAME} but not \N{U+...}. */
3229
3230 SV *res; /* result from charnames */
3231 const char *str; /* the string in 'res' */
3232 STRLEN len; /* its length */
3233
3234 /* Get the value for NAME */
3235 res = newSVpvn(s, e - s);
3236 res = new_constant( NULL, 0, "charnames",
3237 /* includes all of: \N{...} */
3238 res, NULL, s - 3, e - s + 4 );
3239
3240 /* Most likely res will be in utf8 already since the
3241 * standard charnames uses pack U, but a custom translator
3242 * can leave it otherwise, so make sure. XXX This can be
3243 * revisited to not have charnames use utf8 for characters
3244 * that don't need it when regexes don't have to be in utf8
3245 * for Unicode semantics. If doing so, remember EBCDIC */
3246 sv_utf8_upgrade(res);
3247 str = SvPV_const(res, len);
3248
3249 /* Don't accept malformed input */
3250 if (! is_utf8_string((U8 *) str, len)) {
3251 yyerror("Malformed UTF-8 returned by \\N");
3252 }
3253 else if (PL_lex_inpat) {
3254
3255 if (! len) { /* The name resolved to an empty string */
3256 Copy("\\N{}", d, 4, char);
3257 d += 4;
3258 }
3259 else {
3260 /* In order to not lose information for the regex
3261 * compiler, pass the result in the specially made
3262 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3263 * the code points in hex of each character
3264 * returned by charnames */
3265
3266 const char *str_end = str + len;
3267 STRLEN char_length; /* cur char's byte length */
3268 STRLEN output_length; /* and the number of bytes
3269 after this is translated
3270 into hex digits */
3271 const STRLEN off = d - SvPVX_const(sv);
3272
3273 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3274 * max('U+', '.'); and 1 for NUL */
3275 char hex_string[2 * UTF8_MAXBYTES + 5];
3276
3277 /* Get the first character of the result. */
3278 U32 uv = utf8n_to_uvuni((U8 *) str,
3279 len,
3280 &char_length,
3281 UTF8_ALLOW_ANYUV);
3282
3283 /* The call to is_utf8_string() above hopefully
3284 * guarantees that there won't be an error. But
3285 * it's easy here to make sure. The function just
3286 * above warns and returns 0 if invalid utf8, but
3287 * it can also return 0 if the input is validly a
3288 * NUL. Disambiguate */
3289 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3290 uv = UNICODE_REPLACEMENT;
3291 }
3292
3293 /* Convert first code point to hex, including the
e2a7e165
KW
3294 * boiler plate before it. For all these, we
3295 * convert to native format so that downstream code
3296 * can continue to assume the input is native */
78c35590 3297 output_length =
3353de27 3298 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3299 "\\N{U+%X",
3300 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3301
3302 /* Make sure there is enough space to hold it */
3303 d = off + SvGROW(sv, off
3304 + output_length
3305 + (STRLEN)(send - e)
3306 + 2); /* '}' + NUL */
3307 /* And output it */
3308 Copy(hex_string, d, output_length, char);
3309 d += output_length;
3310
3311 /* For each subsequent character, append dot and
3312 * its ordinal in hex */
3313 while ((str += char_length) < str_end) {
3314 const STRLEN off = d - SvPVX_const(sv);
3315 U32 uv = utf8n_to_uvuni((U8 *) str,
3316 str_end - str,
3317 &char_length,
3318 UTF8_ALLOW_ANYUV);
3319 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3320 uv = UNICODE_REPLACEMENT;
3321 }
3322
78c35590 3323 output_length =
3353de27 3324 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3325 ".%X",
3326 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3327
3328 d = off + SvGROW(sv, off
3329 + output_length
3330 + (STRLEN)(send - e)
3331 + 2); /* '}' + NUL */
3332 Copy(hex_string, d, output_length, char);
3333 d += output_length;
3334 }
3335
3336 *d++ = '}'; /* Done. Add the trailing brace */
3337 }
3338 }
3339 else { /* Here, not in a pattern. Convert the name to a
3340 * string. */
3341
3342 /* If destination is not in utf8, unconditionally
3343 * recode it to be so. This is because \N{} implies
3344 * Unicode semantics, and scalars have to be in utf8
3345 * to guarantee those semantics */
3346 if (! has_utf8) {
3347 SvCUR_set(sv, d - SvPVX_const(sv));
3348 SvPOK_on(sv);
3349 *d = '\0';
3350 /* See Note on sizing above. */
3351 sv_utf8_upgrade_flags_grow(sv,
3352 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3353 len + (STRLEN)(send - s) + 1);
3354 d = SvPVX(sv) + SvCUR(sv);
3355 has_utf8 = TRUE;
3356 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3357
3358 /* See Note on sizing above. (NOTE: SvCUR() is not
3359 * set correctly here). */
3360 const STRLEN off = d - SvPVX_const(sv);
3361 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3362 }
3363 Copy(str, d, len, char);
3364 d += len;
423cee85 3365 }
423cee85 3366 SvREFCNT_dec(res);
cb233ae3
KW
3367
3368 /* Deprecate non-approved name syntax */
3369 if (ckWARN_d(WARN_DEPRECATED)) {
3370 bool problematic = FALSE;
3371 char* i = s;
3372
3373 /* For non-ut8 input, look to see that the first
3374 * character is an alpha, then loop through the rest
3375 * checking that each is a continuation */
3376 if (! this_utf8) {
3377 if (! isALPHAU(*i)) problematic = TRUE;
3378 else for (i = s + 1; i < e; i++) {
3379 if (isCHARNAME_CONT(*i)) continue;
3380 problematic = TRUE;
3381 break;
3382 }
3383 }
3384 else {
3385 /* Similarly for utf8. For invariants can check
3386 * directly. We accept anything above the latin1
3387 * range because it is immaterial to Perl if it is
3388 * correct or not, and is expensive to check. But
3389 * it is fairly easy in the latin1 range to convert
3390 * the variants into a single character and check
3391 * those */
3392 if (UTF8_IS_INVARIANT(*i)) {
3393 if (! isALPHAU(*i)) problematic = TRUE;
3394 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3395 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3396 *(i+1)))))
3397 {
3398 problematic = TRUE;
3399 }
3400 }
3401 if (! problematic) for (i = s + UTF8SKIP(s);
3402 i < e;
3403 i+= UTF8SKIP(i))
3404 {
3405 if (UTF8_IS_INVARIANT(*i)) {
3406 if (isCHARNAME_CONT(*i)) continue;
3407 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3408 continue;
3409 } else if (isCHARNAME_CONT(
3410 UNI_TO_NATIVE(
81c14aa2 3411 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3412 {
3413 continue;
3414 }
3415 problematic = TRUE;
3416 break;
3417 }
3418 }
3419 if (problematic) {
6e1bad6c
KW
3420 /* The e-i passed to the final %.*s makes sure that
3421 * should the trailing NUL be missing that this
3422 * print won't run off the end of the string */
cb233ae3 3423 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3424 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3425 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3426 }
3427 }
3428 } /* End \N{NAME} */
ff3f963a
KW
3429#ifdef EBCDIC
3430 if (!dorange)
3431 native_range = FALSE; /* \N{} is defined to be Unicode */
3432#endif
3433 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3434 continue;
3435
02aa26ce 3436 /* \c is a control character */
79072805
LW
3437 case 'c':
3438 s++;
961ce445 3439 if (s < send) {
17a3df4c 3440 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3441 }
961ce445
RGS
3442 else {
3443 yyerror("Missing control char name in \\c");
3444 }
79072805 3445 continue;
02aa26ce
NT
3446
3447 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3448 case 'b':
db42d148 3449 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3450 break;
3451 case 'n':
db42d148 3452 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3453 break;
3454 case 'r':
db42d148 3455 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3456 break;
3457 case 'f':
db42d148 3458 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3459 break;
3460 case 't':
db42d148 3461 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3462 break;
34a3fe2a 3463 case 'e':
db42d148 3464 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3465 break;
3466 case 'a':
db42d148 3467 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3468 break;
02aa26ce
NT
3469 } /* end switch */
3470
79072805
LW
3471 s++;
3472 continue;
02aa26ce 3473 } /* end if (backslash) */
4c3a8340
TS
3474#ifdef EBCDIC
3475 else
3476 literal_endpoint++;
3477#endif
02aa26ce 3478
f9a63242 3479 default_action:
77a135fe
KW
3480 /* If we started with encoded form, or already know we want it,
3481 then encode the next character */
3482 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3483 STRLEN len = 1;
77a135fe
KW
3484
3485
3486 /* One might think that it is wasted effort in the case of the
3487 * source being utf8 (this_utf8 == TRUE) to take the next character
3488 * in the source, convert it to an unsigned value, and then convert
3489 * it back again. But the source has not been validated here. The
3490 * routine that does the conversion checks for errors like
3491 * malformed utf8 */
3492
5f66b61c
AL
3493 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3494 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3495 if (!has_utf8) {
3496 SvCUR_set(sv, d - SvPVX_const(sv));
3497 SvPOK_on(sv);
3498 *d = '\0';
77a135fe 3499 /* See Note on sizing above. */
7bf79863
KW
3500 sv_utf8_upgrade_flags_grow(sv,
3501 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3502 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3503 d = SvPVX(sv) + SvCUR(sv);
3504 has_utf8 = TRUE;
3505 } else if (need > len) {
3506 /* encoded value larger than old, may need extra space (NOTE:
3507 * SvCUR() is not set correctly here). See Note on sizing
3508 * above. */
9d4ba2ae 3509 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3510 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3511 }
77a135fe
KW
3512 s += len;
3513
5f66b61c 3514 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3515#ifdef EBCDIC
3516 if (uv > 255 && !dorange)
3517 native_range = FALSE;
3518#endif
2b9d42f0
NIS
3519 }
3520 else {
3521 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3522 }
02aa26ce
NT
3523 } /* while loop to process each character */
3524
3525 /* terminate the string and set up the sv */
79072805 3526 *d = '\0';
95a20fc0 3527 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3528 if (SvCUR(sv) >= SvLEN(sv))
5637ef5b
NC
3529 Perl_croak(aTHX_ "panic: constant overflowed allocated space, %"UVuf
3530 " >= %"UVuf, (UV)SvCUR(sv), (UV)SvLEN(sv));
2b9d42f0 3531
79072805 3532 SvPOK_on(sv);
9f4817db 3533 if (PL_encoding && !has_utf8) {
d0063567
DK
3534 sv_recode_to_utf8(sv, PL_encoding);
3535 if (SvUTF8(sv))
3536 has_utf8 = TRUE;
9f4817db 3537 }
2b9d42f0 3538 if (has_utf8) {
7e2040f0 3539 SvUTF8_on(sv);
2b9d42f0 3540 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3541 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3542 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3543 }
3544 }
79072805 3545
02aa26ce 3546 /* shrink the sv if we allocated more than we used */
79072805 3547 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3548 SvPV_shrink_to_cur(sv);
79072805 3549 }
02aa26ce 3550
6154021b 3551 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3552 if (s > PL_bufptr) {
eb0d8d16
NC
3553 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3554 const char *const key = PL_lex_inpat ? "qr" : "q";
3555 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3556 const char *type;
3557 STRLEN typelen;
3558
3559 if (PL_lex_inwhat == OP_TRANS) {
3560 type = "tr";
3561 typelen = 2;
3562 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3563 type = "s";
3564 typelen = 1;
9da1dd8f
DM
3565 } else if (PL_lex_inpat && SvIVX(PL_linestr) == '\'') {
3566 type = "q";
3567 typelen = 1;
eb0d8d16
NC
3568 } else {
3569 type = "qq";
3570 typelen = 2;
3571 }
3572
3573 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3574 type, typelen);
3575 }
6154021b 3576 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3577 } else
8990e307 3578 SvREFCNT_dec(sv);
79072805
LW
3579 return s;
3580}
3581
ffb4593c
NT
3582/* S_intuit_more
3583 * Returns TRUE if there's more to the expression (e.g., a subscript),
3584 * FALSE otherwise.
ffb4593c
NT
3585 *
3586 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3587 *
3588 * ->[ and ->{ return TRUE
3589 * { and [ outside a pattern are always subscripts, so return TRUE
3590 * if we're outside a pattern and it's not { or [, then return FALSE
3591 * if we're in a pattern and the first char is a {
3592 * {4,5} (any digits around the comma) returns FALSE
3593 * if we're in a pattern and the first char is a [
3594 * [] returns FALSE
3595 * [SOMETHING] has a funky algorithm to decide whether it's a
3596 * character class or not. It has to deal with things like
3597 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3598 * anything else returns TRUE
3599 */
3600
9cbb5ea2
GS
3601/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3602
76e3520e 3603STATIC int
cea2e8a9 3604S_intuit_more(pTHX_ register char *s)
79072805 3605{
97aff369 3606 dVAR;
7918f24d
NC
3607
3608 PERL_ARGS_ASSERT_INTUIT_MORE;
3609
3280af22 3610 if (PL_lex_brackets)
79072805
LW
3611 return TRUE;
3612 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3613 return TRUE;
3614 if (*s != '{' && *s != '[')
3615 return FALSE;
3280af22 3616 if (!PL_lex_inpat)
79072805
LW
3617 return TRUE;
3618
3619 /* In a pattern, so maybe we have {n,m}. */
3620 if (*s == '{') {
b3155d95 3621 if (regcurly(s)) {
79072805 3622 return FALSE;
b3155d95 3623 }
79072805 3624 return TRUE;
79072805
LW
3625 }
3626
3627 /* On the other hand, maybe we have a character class */
3628
3629 s++;
3630 if (*s == ']' || *s == '^')
3631 return FALSE;
3632 else {
ffb4593c 3633 /* this is terrifying, and it works */
79072805
LW
3634 int weight = 2; /* let's weigh the evidence */
3635 char seen[256];
f27ffc4a 3636 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3637 const char * const send = strchr(s,']');
3280af22 3638 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3639
3640 if (!send) /* has to be an expression */
3641 return TRUE;
3642
3643 Zero(seen,256,char);
3644 if (*s == '$')
3645 weight -= 3;
3646 else if (isDIGIT(*s)) {
3647 if (s[1] != ']') {
3648 if (isDIGIT(s[1]) && s[2] == ']')
3649 weight -= 10;
3650 }
3651 else
3652 weight -= 100;
3653 }
3654 for (; s < send; s++) {
3655 last_un_char = un_char;
3656 un_char = (unsigned char)*s;
3657 switch (*s) {
3658 case '@':
3659 case '&':
3660 case '$':
3661 weight -= seen[un_char] * 10;
7e2040f0 3662 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3663 int len;
8903cb82 3664 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3665 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3666 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3667 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3668 weight -= 100;
3669 else
3670 weight -= 10;
3671 }
3672 else if (*s == '$' && s[1] &&
93a17b20
LW
3673 strchr("[#!%*<>()-=",s[1])) {
3674 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3675 weight -= 10;
3676 else
3677 weight -= 1;
3678 }
3679 break;
3680 case '\\':
3681 un_char = 254;
3682 if (s[1]) {
93a17b20 3683 if (strchr("wds]",s[1]))
79072805 3684 weight += 100;
10edeb5d 3685 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3686 weight += 1;
93a17b20 3687 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3688 weight += 40;
3689 else if (isDIGIT(s[1])) {
3690 weight += 40;
3691 while (s[1] && isDIGIT(s[1]))
3692 s++;
3693 }
3694 }
3695 else
3696 weight += 100;
3697 break;
3698 case '-':
3699 if (s[1] == '\\')
3700 weight += 50;
93a17b20 3701 if (strchr("aA01! ",last_un_char))
79072805 3702 weight += 30;
93a17b20 3703 if (strchr("zZ79~",s[1]))
79072805 3704 weight += 30;
f27ffc4a
GS
3705 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3706 weight -= 5; /* cope with negative subscript */
79072805
LW
3707 break;
3708 default:
3792a11b
NC
3709 if (!isALNUM(last_un_char)
3710 && !(last_un_char == '$' || last_un_char == '@'
3711 || last_un_char == '&')
3712 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3713 char *d = tmpbuf;
3714 while (isALPHA(*s))
3715 *d++ = *s++;
3716 *d = '\0';
5458a98a 3717 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3718 weight -= 150;
3719 }
3720 if (un_char == last_un_char + 1)
3721 weight += 5;
3722 weight -= seen[un_char];
3723 break;
3724 }
3725 seen[un_char]++;
3726 }
3727 if (weight >= 0) /* probably a character class */
3728 return FALSE;
3729 }
3730
3731 return TRUE;
3732}
ffed7fef 3733
ffb4593c
NT
3734/*
3735 * S_intuit_method
3736 *
3737 * Does all the checking to disambiguate
3738 * foo bar
3739 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3740 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3741 *
3742 * First argument is the stuff after the first token, e.g. "bar".
3743 *
a4fd4a89 3744 * Not a method if foo is a filehandle.
ffb4593c
NT
3745 * Not a method if foo is a subroutine prototyped to take a filehandle.
3746 * Not a method if it's really "Foo $bar"
3747 * Method if it's "foo $bar"
3748 * Not a method if it's really "print foo $bar"
3749 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3750 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3751 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3752 * =>
3753 */
3754
76e3520e 3755STATIC int
62d55b22 3756S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3757{
97aff369 3758 dVAR;
a0d0e21e 3759 char *s = start + (*start == '$');
3280af22 3760 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3761 STRLEN len;
3762 GV* indirgv;
5db06880
NC
3763#ifdef PERL_MAD
3764 int soff;
3765#endif
a0d0e21e 3766
7918f24d
NC
3767 PERL_ARGS_ASSERT_INTUIT_METHOD;
3768
aca88b25 3769 if (gv && SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3770 return 0;
aca88b25 3771 if (cv && SvPOK(cv)) {
8fa6a409 3772 const char *proto = CvPROTO(cv);
62d55b22
NC
3773 if (proto) {
3774 if (*proto == ';')
3775 proto++;
3776 if (*proto == '*')
3777 return 0;
3778 }
a0d0e21e 3779 }
8903cb82 3780 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3781 /* start is the beginning of the possible filehandle/object,
3782 * and s is the end of it
3783 * tmpbuf is a copy of it
3784 */
3785
a0d0e21e 3786 if (*start == '$') {
39c012bc 3787 if (cv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3ef1310e 3788 isUPPER(*PL_tokenbuf))
a0d0e21e 3789 return 0;
5db06880
NC
3790#ifdef PERL_MAD
3791 len = start - SvPVX(PL_linestr);
3792#endif
29595ff2 3793 s = PEEKSPACE(s);
f0092767 3794#ifdef PERL_MAD
5db06880
NC
3795 start = SvPVX(PL_linestr) + len;
3796#endif
3280af22
NIS
3797 PL_bufptr = start;
3798 PL_expect = XREF;
a0d0e21e
LW
3799 return *s == '(' ? FUNCMETH : METHOD;
3800 }
5458a98a 3801 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3802 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3803 len -= 2;
3804 tmpbuf[len] = '\0';
5db06880
NC
3805#ifdef PERL_MAD
3806 soff = s - SvPVX(PL_linestr);
3807#endif
c3e0f903
GS
3808 goto bare_package;
3809 }
38d2cf30 3810 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 3811 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3812 return 0;
3813 /* filehandle or package name makes it a method */
39c012bc 3814 if (!cv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
3815#ifdef PERL_MAD
3816 soff = s - SvPVX(PL_linestr);
3817#endif
29595ff2 3818 s = PEEKSPACE(s);
3280af22 3819 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3820 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3821 bare_package:
cd81e915 3822 start_force(PL_curforce);
9ded7720 3823 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3824 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3825 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 3826 if (PL_madskills)
38d2cf30
BF
3827 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3828 ( UTF ? SVf_UTF8 : 0 )));
3280af22 3829 PL_expect = XTERM;
a0d0e21e 3830 force_next(WORD);
3280af22 3831 PL_bufptr = s;
5db06880
NC
3832#ifdef PERL_MAD
3833 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3834#endif
a0d0e21e
LW
3835 return *s == '(' ? FUNCMETH : METHOD;
3836 }
3837 }
3838 return 0;
3839}
3840
16d20bd9 3841/* Encoded script support. filter_add() effectively inserts a
4e553d73 3842 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3843 * Note that the filter function only applies to the current source file
3844 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3845 *
3846 * The datasv parameter (which may be NULL) can be used to pass
3847 * private data to this instance of the filter. The filter function
3848 * can recover the SV using the FILTER_DATA macro and use it to
3849 * store private buffers and state information.
3850 *
3851 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3852 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3853 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3854 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3855 * private use must be set using malloc'd pointers.
3856 */
16d20bd9
AD
3857
3858SV *
864dbfa3 3859Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3860{
97aff369 3861 dVAR;
f4c556ac 3862 if (!funcp)
a0714e2c 3863 return NULL;
f4c556ac 3864
5486870f
DM
3865 if (!PL_parser)
3866 return NULL;
3867
f1c31c52
FC
3868 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3869 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3870
3280af22
NIS
3871 if (!PL_rsfp_filters)
3872 PL_rsfp_filters = newAV();
16d20bd9 3873 if (!datasv)
561b68a9 3874 datasv = newSV(0);
862a34c6 3875 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3876 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3877 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3878 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3879 FPTR2DPTR(void *, IoANY(datasv)),
3880 SvPV_nolen(datasv)));
3280af22
NIS
3881 av_unshift(PL_rsfp_filters, 1);
3882 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
3883 if (
3884 !PL_parser->filtered
3885 && PL_parser->lex_flags & LEX_EVALBYTES
3886 && PL_bufptr < PL_bufend
3887 ) {
3888 const char *s = PL_bufptr;
3889 while (s < PL_bufend) {
3890 if (*s == '\n') {
3891 SV *linestr = PL_parser->linestr;
3892 char *buf = SvPVX(linestr);
3893 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3894 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3895 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3896 STRLEN const linestart_pos = PL_parser->linestart - buf;
3897 STRLEN const last_uni_pos =
3898 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3899 STRLEN const last_lop_pos =
3900 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3901 av_push(PL_rsfp_filters, linestr);
3902 PL_parser->linestr =
3903 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3904 buf = SvPVX(PL_parser->linestr);
3905 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3906 PL_parser->bufptr = buf + bufptr_pos;
3907 PL_parser->oldbufptr = buf + oldbufptr_pos;
3908 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3909 PL_parser->linestart = buf + linestart_pos;
3910 if (PL_parser->last_uni)
3911 PL_parser->last_uni = buf + last_uni_pos;
3912 if (PL_parser->last_lop)
3913 PL_parser->last_lop = buf + last_lop_pos;
3914 SvLEN(linestr) = SvCUR(linestr);
3915 SvCUR(linestr) = s-SvPVX(linestr);
3916 PL_parser->filtered = 1;
3917 break;
3918 }
3919 s++;
3920 }
3921 }
16d20bd9
AD
3922 return(datasv);
3923}
4e553d73 3924
16d20bd9
AD
3925
3926/* Delete most recently added instance of this filter function. */
a0d0e21e 3927void
864dbfa3 3928Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3929{
97aff369 3930 dVAR;
e0c19803 3931 SV *datasv;
24801a4b 3932
7918f24d
NC
3933 PERL_ARGS_ASSERT_FILTER_DEL;
3934
33073adb 3935#ifdef DEBUGGING
55662e27
JH
3936 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3937 FPTR2DPTR(void*, funcp)));
33073adb 3938#endif
5486870f 3939 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3940 return;
3941 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3942 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3943 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3944 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3945
16d20bd9
AD
3946 return;
3947 }
3948 /* we need to search for the correct entry and clear it */
cea2e8a9 3949 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3950}
3951
3952
1de9afcd
RGS
3953/* Invoke the idxth filter function for the current rsfp. */
3954/* maxlen 0 = read one text line */
16d20bd9 3955I32
864dbfa3 3956Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3957{
97aff369 3958 dVAR;
16d20bd9
AD
3959 filter_t funcp;
3960 SV *datasv = NULL;
f482118e
NC
3961 /* This API is bad. It should have been using unsigned int for maxlen.
3962 Not sure if we want to change the API, but if not we should sanity
3963 check the value here. */
60d63348 3964 unsigned int correct_length
39cd7a59
NC
3965 = maxlen < 0 ?
3966#ifdef PERL_MICRO
3967 0x7FFFFFFF
3968#else
3969 INT_MAX
3970#endif
3971 : maxlen;
e50aee73 3972
7918f24d
NC
3973 PERL_ARGS_ASSERT_FILTER_READ;
3974
5486870f 3975 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3976 return -1;
1de9afcd 3977 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3978 /* Provide a default input filter to make life easy. */
3979 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3980 DEBUG_P(PerlIO_printf(Perl_debug_log,
3981 "filter_read %d: from rsfp\n", idx));
f482118e 3982 if (correct_length) {
16d20bd9
AD
3983 /* Want a block */
3984 int len ;
f54cb97a 3985 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3986
3987 /* ensure buf_sv is large enough */
881d8f0a 3988 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3989 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3990 correct_length)) <= 0) {
3280af22 3991 if (PerlIO_error(PL_rsfp))
37120919
AD
3992 return -1; /* error */
3993 else
3994 return 0 ; /* end of file */
3995 }
16d20bd9 3996 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3997 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3998 } else {
3999 /* Want a line */
3280af22
NIS
4000 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
4001 if (PerlIO_error(PL_rsfp))
37120919
AD
4002 return -1; /* error */
4003 else
4004 return 0 ; /* end of file */
4005 }
16d20bd9
AD
4006 }
4007 return SvCUR(buf_sv);
4008 }
4009 /* Skip this filter slot if filter has been deleted */
1de9afcd 4010 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
4011 DEBUG_P(PerlIO_printf(Perl_debug_log,
4012 "filter_read %d: skipped (filter deleted)\n",
4013 idx));
f482118e 4014 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 4015 }
60d63348
FC
4016 if (SvTYPE(datasv) != SVt_PVIO) {
4017 if (correct_length) {
4018 /* Want a block */
4019 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
4020 if (!remainder) return 0; /* eof */
4021 if (correct_length > remainder) correct_length = remainder;
4022 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
4023 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
4024 } else {
4025 /* Want a line */
4026 const char *s = SvEND(datasv);
4027 const char *send = SvPVX(datasv) + SvLEN(datasv);
4028 while (s < send) {
4029 if (*s == '\n') {
4030 s++;
4031 break;
4032 }
4033 s++;
4034 }
4035 if (s == send) return 0; /* eof */
4036 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4037 SvCUR_set(datasv, s-SvPVX(datasv));
4038 }
4039 return SvCUR(buf_sv);
4040 }
16d20bd9 4041 /* Get function pointer hidden within datasv */
8141890a 4042 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4043 DEBUG_P(PerlIO_printf(Perl_debug_log,
4044 "filter_read %d: via function %p (%s)\n",
ca0270c4 4045 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4046 /* Call function. The function is expected to */
4047 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4048 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4049 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4050}
4051
76e3520e 4052STATIC char *
5cc814fd 4053S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 4054{
97aff369 4055 dVAR;
7918f24d
NC
4056
4057 PERL_ARGS_ASSERT_FILTER_GETS;
4058
c39cd008 4059#ifdef PERL_CR_FILTER
3280af22 4060 if (!PL_rsfp_filters) {
c39cd008 4061 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4062 }
4063#endif
3280af22 4064 if (PL_rsfp_filters) {
55497cff 4065 if (!append)
4066 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4067 if (FILTER_READ(0, sv, 0) > 0)
4068 return ( SvPVX(sv) ) ;
4069 else
bd61b366 4070 return NULL ;
16d20bd9 4071 }
9d116dd7 4072 else
5cc814fd 4073 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4074}
4075
01ec43d0 4076STATIC HV *
9bde8eb0 4077S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4078{
97aff369 4079 dVAR;
def3634b
GS
4080 GV *gv;
4081
7918f24d
NC
4082 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4083
01ec43d0 4084 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4085 return PL_curstash;
4086
4087 if (len > 2 &&
4088 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4089 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4090 {
4091 return GvHV(gv); /* Foo:: */
def3634b
GS
4092 }
4093
4094 /* use constant CLASS => 'MyClass' */
acc6da14 4095 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4096 if (gv && GvCV(gv)) {
4097 SV * const sv = cv_const_sv(GvCV(gv));
4098 if (sv)
9bde8eb0 4099 pkgname = SvPV_const(sv, len);
def3634b
GS
4100 }
4101
acc6da14 4102 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4103}
a0d0e21e 4104
e3f73d4e
RGS
4105/*
4106 * S_readpipe_override
486ec47a 4107 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4108 * optree, provided sublex_start() is called afterwards.
4109 */
4110STATIC void
1d51329b 4111S_readpipe_override(pTHX)
e3f73d4e
RGS
4112{
4113 GV **gvp;
4114 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4115 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4116 if ((gv_readpipe
4117 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4118 ||
4119 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4120 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4121 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4122 {
4123 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4124 op_append_elem(OP_LIST,
e3f73d4e
RGS
4125 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4126 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4127 }
e3f73d4e
RGS
4128}
4129
5db06880
NC
4130#ifdef PERL_MAD
4131 /*
4132 * Perl_madlex
4133 * The intent of this yylex wrapper is to minimize the changes to the
4134 * tokener when we aren't interested in collecting madprops. It remains
4135 * to be seen how successful this strategy will be...
4136 */
4137
4138int
4139Perl_madlex(pTHX)
4140{
4141 int optype;
4142 char *s = PL_bufptr;
4143
cd81e915
NC
4144 /* make sure PL_thiswhite is initialized */
4145 PL_thiswhite = 0;
4146 PL_thismad = 0;
5db06880 4147
cd81e915 4148 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4149 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4150 return S_pending_ident(aTHX);
4151
4152 /* previous token ate up our whitespace? */
cd81e915
NC
4153 if (!PL_lasttoke && PL_nextwhite) {
4154 PL_thiswhite = PL_nextwhite;
4155 PL_nextwhite = 0;
5db06880
NC
4156 }
4157
4158 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4159 PL_realtokenstart = -1;
4160 PL_thistoken = 0;
5db06880
NC
4161 optype = yylex();
4162 s = PL_bufptr;
cd81e915 4163 assert(PL_curforce < 0);
5db06880 4164
cd81e915
NC
4165 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4166 if (!PL_thistoken) {
4167 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4168 PL_thistoken = newSVpvs("");
5db06880 4169 else {
c35e046a 4170 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4171 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4172 }
4173 }
cd81e915
NC
4174 if (PL_thismad) /* install head */
4175 CURMAD('X', PL_thistoken);
5db06880
NC
4176 }
4177
4178 /* last whitespace of a sublex? */
cd81e915
NC
4179 if (optype == ')' && PL_endwhite) {
4180 CURMAD('X', PL_endwhite);
5db06880
NC
4181 }
4182
cd81e915 4183 if (!PL_thismad) {
5db06880
NC
4184
4185 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4186 if (!PL_thiswhite && !PL_endwhite && !optype) {
4187 sv_free(PL_thistoken);
4188 PL_thistoken = 0;
5db06880
NC
4189 return 0;
4190 }
4191
4192 /* put off final whitespace till peg */
60d63348 4193 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4194 PL_nextwhite = PL_thiswhite;
4195 PL_thiswhite = 0;
5db06880 4196 }
cd81e915
NC
4197 else if (PL_thisopen) {
4198 CURMAD('q', PL_thisopen);
4199 if (PL_thistoken)
4200 sv_free(PL_thistoken);
4201 PL_thistoken = 0;
5db06880
NC
4202 }
4203 else {
4204 /* Store actual token text as madprop X */
cd81e915 4205 CURMAD('X', PL_thistoken);
5db06880
NC
4206 }
4207
cd81e915 4208 if (PL_thiswhite) {
5db06880 4209 /* add preceding whitespace as madprop _ */
cd81e915 4210 CURMAD('_', PL_thiswhite);
5db06880
NC
4211 }
4212
cd81e915 4213 if (PL_thisstuff) {
5db06880 4214 /* add quoted material as madprop = */
cd81e915 4215 CURMAD('=', PL_thisstuff);
5db06880
NC
4216 }
4217
cd81e915 4218 if (PL_thisclose) {
5db06880 4219 /* add terminating quote as madprop Q */
cd81e915 4220 CURMAD('Q', PL_thisclose);
5db06880
NC
4221 }
4222 }
4223
4224 /* special processing based on optype */
4225
4226 switch (optype) {
4227
4228 /* opval doesn't need a TOKEN since it can already store mp */
4229 case WORD:
4230 case METHOD:
4231 case FUNCMETH:
4232 case THING:
4233 case PMFUNC:
4234 case PRIVATEREF:
4235 case FUNC0SUB:
4236 case UNIOPSUB:
4237 case LSTOPSUB:
5db1eb8d 4238 case LABEL:
6154021b
RGS
4239 if (pl_yylval.opval)
4240 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4241 PL_thismad = 0;
5db06880
NC
4242 return optype;
4243
4244 /* fake EOF */
4245 case 0:
4246 optype = PEG;
cd81e915
NC
4247 if (PL_endwhite) {
4248 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4249 PL_endwhite = 0;
5db06880
NC
4250 }
4251 break;
4252
4253 case ']':
4254 case '}':
cd81e915 4255 if (PL_faketokens)
5db06880
NC
4256 break;
4257 /* remember any fake bracket that lexer is about to discard */
4258 if (PL_lex_brackets == 1 &&
4259 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4260 {
4261 s = PL_bufptr;
4262 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4263 s++;
4264 if (*s == '}') {
cd81e915
NC
4265 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4266 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4267 PL_thiswhite = 0;
5db06880
NC
4268 PL_bufptr = s - 1;
4269 break; /* don't bother looking for trailing comment */
4270 }
4271 else
4272 s = PL_bufptr;
4273 }
4274 if (optype == ']')
4275 break;
4276 /* FALLTHROUGH */
4277
4278 /* attach a trailing comment to its statement instead of next token */
4279 case ';':
cd81e915 4280 if (PL_faketokens)
5db06880
NC
4281 break;
4282 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4283 s = PL_bufptr;
4284 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4285 s++;
4286 if (*s == '\n' || *s == '#') {
4287 while (s < PL_bufend && *s != '\n')
4288 s++;
4289 if (s < PL_bufend)
4290 s++;
cd81e915
NC
4291 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4292 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4293 PL_thiswhite = 0;
5db06880
NC
4294 PL_bufptr = s;
4295 }
4296 }
4297 break;
4298
5db06880
NC
4299 /* ival */
4300 default:
4301 break;
4302
4303 }
4304
4305 /* Create new token struct. Note: opvals return early above. */
6154021b 4306 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4307 PL_thismad = 0;
5db06880
NC
4308 return optype;
4309}
4310#endif
4311
468aa647 4312STATIC char *
cc6ed77d 4313S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4314 dVAR;
7918f24d
NC
4315
4316 PERL_ARGS_ASSERT_TOKENIZE_USE;
4317
468aa647
RGS
4318 if (PL_expect != XSTATE)
4319 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4320 is_use ? "use" : "no"));
29595ff2 4321 s = SKIPSPACE1(s);
468aa647
RGS
4322 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4323 s = force_version(s, TRUE);
17c59fdf
VP
4324 if (*s == ';' || *s == '}'
4325 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4326 start_force(PL_curforce);
9ded7720 4327 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4328 force_next(WORD);
4329 }
4330 else if (*s == 'v') {
4331 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4332 s = force_version(s, FALSE);
4333 }
4334 }
4335 else {
4336 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4337 s = force_version(s, FALSE);
4338 }
6154021b 4339 pl_yylval.ival = is_use;
468aa647
RGS
4340 return s;
4341}
748a9306 4342#ifdef DEBUGGING
27da23d5 4343 static const char* const exp_name[] =
09bef843 4344 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4345 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4346 };
748a9306 4347#endif
463ee0b2 4348
361d9b55
Z
4349#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4350STATIC bool
4351S_word_takes_any_delimeter(char *p, STRLEN len)
4352{
4353 return (len == 1 && strchr("msyq", p[0])) ||
4354 (len == 2 && (
4355 (p[0] == 't' && p[1] == 'r') ||
4356 (p[0] == 'q' && strchr("qwxr", p[1]))));
4357}
4358
02aa26ce
NT
4359/*
4360 yylex
4361
4362 Works out what to call the token just pulled out of the input
4363 stream. The yacc parser takes care of taking the ops we return and
4364 stitching them into a tree.
4365
4366 Returns:
4367 PRIVATEREF
4368
4369 Structure:
4370 if read an identifier
4371 if we're in a my declaration
4372 croak if they tried to say my($foo::bar)
4373 build the ops for a my() declaration
4374 if it's an access to a my() variable
4375 are we in a sort block?
4376 croak if my($a); $a <=> $b
4377 build ops for access to a my() variable
4378 if in a dq string, and they've said @foo and we can't find @foo
4379 croak
4380 build ops for a bareword
4381 if we already built the token before, use it.
4382*/
4383
20141f0e 4384
dba4d153
JH
4385#ifdef __SC__
4386#pragma segment Perl_yylex
4387#endif
dba4d153 4388int
dba4d153 4389Perl_yylex(pTHX)
20141f0e 4390{
97aff369 4391 dVAR;
3afc138a 4392 register char *s = PL_bufptr;
378cc40b 4393 register char *d;
463ee0b2 4394 STRLEN len;
aa7440fb 4395 bool bof = FALSE;
580561a3 4396 U32 fake_eof = 0;
a687059c 4397
10edeb5d
JH
4398 /* orig_keyword, gvp, and gv are initialized here because
4399 * jump to the label just_a_word_zero can bypass their
4400 * initialization later. */
4401 I32 orig_keyword = 0;
4402 GV *gv = NULL;
4403 GV **gvp = NULL;
4404
bbf60fe6 4405 DEBUG_T( {
396482e1 4406 SV* tmp = newSVpvs("");
b6007c36
DM
4407 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4408 (IV)CopLINE(PL_curcop),
4409 lex_state_names[PL_lex_state],
4410 exp_name[PL_expect],
4411 pv_display(tmp, s, strlen(s), 0, 60));
4412 SvREFCNT_dec(tmp);
bbf60fe6 4413 } );
02aa26ce 4414 /* check if there's an identifier for us to look at */
28ac2b49 4415 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4416 return REPORT(S_pending_ident(aTHX));
bbce6d69 4417
02aa26ce
NT
4418 /* no identifier pending identification */
4419
3280af22 4420 switch (PL_lex_state) {
79072805
LW
4421#ifdef COMMENTARY
4422 case LEX_NORMAL: /* Some compilers will produce faster */
4423 case LEX_INTERPNORMAL: /* code if we comment these out. */
4424 break;
4425#endif
4426
09bef843 4427 /* when we've already built the next token, just pull it out of the queue */
79072805 4428 case LEX_KNOWNEXT:
5db06880
NC
4429#ifdef PERL_MAD
4430 PL_lasttoke--;
6154021b 4431 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4432 if (PL_madskills) {
cd81e915 4433 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4434 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4435 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4436 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4437 PL_thismad->mad_val = 0;
4438 mad_free(PL_thismad);
4439 PL_thismad = 0;
5db06880
NC
4440 }
4441 }
4442 if (!PL_lasttoke) {
4443 PL_lex_state = PL_lex_defer;
4444 PL_expect = PL_lex_expect;
4445 PL_lex_defer = LEX_NORMAL;
4446 if (!PL_nexttoke[PL_lasttoke].next_type)
4447 return yylex();
4448 }
4449#else
3280af22 4450 PL_nexttoke--;
6154021b 4451 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4452 if (!PL_nexttoke) {
4453 PL_lex_state = PL_lex_defer;
4454 PL_expect = PL_lex_expect;
4455 PL_lex_defer = LEX_NORMAL;
463ee0b2 4456 }
5db06880 4457#endif
a7aaec61
Z
4458 {
4459 I32 next_type;
5db06880 4460#ifdef PERL_MAD
a7aaec61 4461 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4462#else
a7aaec61 4463 next_type = PL_nexttype[PL_nexttoke];
5db06880 4464#endif
78cdf107
Z
4465 if (next_type & (7<<24)) {
4466 if (next_type & (1<<24)) {
4467 if (PL_lex_brackets > 100)
4468 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4469 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4470 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4471 }
4472 if (next_type & (2<<24))
4473 PL_lex_allbrackets++;
4474 if (next_type & (4<<24))
4475 PL_lex_allbrackets--;
a7aaec61
Z
4476 next_type &= 0xffff;
4477 }
4478#ifdef PERL_MAD
4479 /* FIXME - can these be merged? */
4480 return next_type;
4481#else
4482 return REPORT(next_type);
4483#endif
4484 }
79072805 4485
02aa26ce 4486 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4487 when we get here, PL_bufptr is at the \
02aa26ce 4488 */
79072805
LW
4489 case LEX_INTERPCASEMOD:
4490#ifdef DEBUGGING
3280af22 4491 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
5637ef5b
NC
4492 Perl_croak(aTHX_
4493 "panic: INTERPCASEMOD bufptr=%p, bufend=%p, *bufptr=%u",
4494 PL_bufptr, PL_bufend, *PL_bufptr);
79072805 4495#endif
02aa26ce 4496 /* handle \E or end of string */
3280af22 4497 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4498 /* if at a \E */
3280af22 4499 if (PL_lex_casemods) {
f54cb97a 4500 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4501 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4502
3792a11b 4503 if (PL_bufptr != PL_bufend
838f2281
BF
4504 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q'
4505 || oldmod == 'F')) {
3280af22
NIS
4506 PL_bufptr += 2;
4507 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4508#ifdef PERL_MAD
4509 if (PL_madskills)
6b29d1f5 4510 PL_thistoken = newSVpvs("\\E");
5db06880 4511#endif
a0d0e21e 4512 }
78cdf107 4513 PL_lex_allbrackets--;
bbf60fe6 4514 return REPORT(')');
79072805 4515 }
52ed07f6
BF
4516 else if ( PL_bufptr != PL_bufend && PL_bufptr[1] == 'E' ) {
4517 /* Got an unpaired \E */
4518 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
820438b1 4519 "Useless use of \\E");
52ed07f6 4520 }
5db06880
NC
4521#ifdef PERL_MAD
4522 while (PL_bufptr != PL_bufend &&
4523 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4524 if (!PL_thiswhite)
6b29d1f5 4525 PL_thiswhite = newSVpvs("");
cd81e915 4526 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4527 PL_bufptr += 2;
4528 }
4529#else
3280af22
NIS
4530 if (PL_bufptr != PL_bufend)
4531 PL_bufptr += 2;
5db06880 4532#endif
3280af22 4533 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4534 return yylex();
79072805
LW
4535 }
4536 else {
607df283 4537 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4538 "### Saw case modifier\n"); });
3280af22 4539 s = PL_bufptr + 1;
6e909404 4540 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4541#ifdef PERL_MAD
cd81e915 4542 if (!PL_thiswhite)
6b29d1f5 4543 PL_thiswhite = newSVpvs("");
cd81e915 4544 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4545#endif
89122651 4546 PL_bufptr = s + 3;
6e909404
JH
4547 PL_lex_state = LEX_INTERPCONCAT;
4548 return yylex();
a0d0e21e 4549 }
6e909404 4550 else {
90771dc0 4551 I32 tmp;
5db06880
NC
4552 if (!PL_madskills) /* when just compiling don't need correct */
4553 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4554 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
838f2281
BF
4555 if ((*s == 'L' || *s == 'U' || *s == 'F') &&
4556 (strchr(PL_lex_casestack, 'L')
4557 || strchr(PL_lex_casestack, 'U')
4558 || strchr(PL_lex_casestack, 'F'))) {
6e909404 4559 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4560 PL_lex_allbrackets--;
bbf60fe6 4561 return REPORT(')');
6e909404
JH
4562 }
4563 if (PL_lex_casemods > 10)
4564 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4565 PL_lex_casestack[PL_lex_casemods++] = *s;
4566 PL_lex_casestack[PL_lex_casemods] = '\0';
4567 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4568 start_force(PL_curforce);
9ded7720 4569 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4570 force_next((2<<24)|'(');
cd81e915 4571 start_force(PL_curforce);
6e909404 4572 if (*s == 'l')
9ded7720 4573 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4574 else if (*s == 'u')
9ded7720 4575 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4576 else if (*s == 'L')
9ded7720 4577 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4578 else if (*s == 'U')
9ded7720 4579 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4580 else if (*s == 'Q')
9ded7720 4581 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
838f2281
BF
4582 else if (*s == 'F')
4583 NEXTVAL_NEXTTOKE.ival = OP_FC;
6e909404 4584 else
5637ef5b 4585 Perl_croak(aTHX_ "panic: yylex, *s=%u", *s);
5db06880 4586 if (PL_madskills) {
a5849ce5
NC
4587 SV* const tmpsv = newSVpvs("\\ ");
4588 /* replace the space with the character we want to escape
4589 */
4590 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4591 curmad('_', tmpsv);
4592 }
6e909404 4593 PL_bufptr = s + 1;
a0d0e21e 4594 }
79072805 4595 force_next(FUNC);
3280af22
NIS
4596 if (PL_lex_starts) {
4597 s = PL_bufptr;
4598 PL_lex_starts = 0;
5db06880
NC
4599#ifdef PERL_MAD
4600 if (PL_madskills) {
cd81e915
NC
4601 if (PL_thistoken)
4602 sv_free(PL_thistoken);
6b29d1f5 4603 PL_thistoken = newSVpvs("");
5db06880
NC
4604 }
4605#endif
131b3ad0
DM
4606 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4607 if (PL_lex_casemods == 1 && PL_lex_inpat)
4608 OPERATOR(',');
4609 else
4610 Aop(OP_CONCAT);
79072805
LW
4611 }
4612 else
cea2e8a9 4613 return yylex();
79072805
LW
4614 }
4615
55497cff 4616 case LEX_INTERPPUSH:
bbf60fe6 4617 return REPORT(sublex_push());
55497cff 4618
79072805 4619 case LEX_INTERPSTART:
3280af22 4620 if (PL_bufptr == PL_bufend)
bbf60fe6 4621 return REPORT(sublex_done());
9da1dd8f 4622 DEBUG_T({ if(*PL_bufptr != '(') PerlIO_printf(Perl_debug_log,
b6007c36 4623 "### Interpolated variable\n"); });
3280af22
NIS
4624 PL_expect = XTERM;
4625 PL_lex_dojoin = (*PL_bufptr == '@');
4626 PL_lex_state = LEX_INTERPNORMAL;
4627 if (PL_lex_dojoin) {
cd81e915 4628 start_force(PL_curforce);
9ded7720 4629 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4630 force_next(',');
cd81e915 4631 start_force(PL_curforce);
a0d0e21e 4632 force_ident("\"", '$');
cd81e915 4633 start_force(PL_curforce);
9ded7720 4634 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4635 force_next('$');
cd81e915 4636 start_force(PL_curforce);
9ded7720 4637 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4638 force_next((2<<24)|'(');
cd81e915 4639 start_force(PL_curforce);
9ded7720 4640 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4641 force_next(FUNC);
4642 }
9da1dd8f
DM
4643 /* Convert (?{...}) and friends to 'do {...}' */
4644 if (PL_lex_inpat && *PL_bufptr == '(') {
4645 PL_sublex_info.re_eval_start = PL_bufptr;
4646 PL_bufptr += 2;
4647 if (*PL_bufptr != '{')
4648 PL_bufptr++;
6165f85b
DM
4649 start_force(PL_curforce);
4650 /* XXX probably need a CURMAD(something) here */
9da1dd8f
DM
4651 PL_expect = XTERMBLOCK;
4652 force_next(DO);
4653 }
4654
3280af22
NIS
4655 if (PL_lex_starts++) {
4656 s = PL_bufptr;
5db06880
NC
4657#ifdef PERL_MAD
4658 if (PL_madskills) {
cd81e915
NC
4659 if (PL_thistoken)
4660 sv_free(PL_thistoken);
6b29d1f5 4661 PL_thistoken = newSVpvs("");
5db06880
NC
4662 }
4663#endif
131b3ad0
DM
4664 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4665 if (!PL_lex_casemods && PL_lex_inpat)
4666 OPERATOR(',');
4667 else
4668 Aop(OP_CONCAT);
79072805 4669 }
cea2e8a9 4670 return yylex();
79072805
LW
4671
4672 case LEX_INTERPENDMAYBE:
3280af22
NIS
4673 if (intuit_more(PL_bufptr)) {
4674 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4675 break;
4676 }
4677 /* FALL THROUGH */
4678
4679 case LEX_INTERPEND:
3280af22
NIS
4680 if (PL_lex_dojoin) {
4681 PL_lex_dojoin = FALSE;
4682 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4683#ifdef PERL_MAD
4684 if (PL_madskills) {
cd81e915
NC
4685 if (PL_thistoken)
4686 sv_free(PL_thistoken);
6b29d1f5 4687 PL_thistoken = newSVpvs("");
5db06880
NC
4688 }
4689#endif
78cdf107 4690 PL_lex_allbrackets--;
bbf60fe6 4691 return REPORT(')');
79072805 4692 }
43a16006 4693 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4694 && SvEVALED(PL_lex_repl))
43a16006 4695 {
e9fa98b2 4696 if (PL_bufptr != PL_bufend)
cea2e8a9 4697 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4698 PL_lex_repl = NULL;
e9fa98b2 4699 }
9da1dd8f
DM
4700 if (PL_sublex_info.re_eval_start) {
4701 if (*PL_bufptr != ')')
4702 Perl_croak(aTHX_ "Sequence (?{...}) not terminated with ')'");
4703 PL_bufptr++;
4704 /* having compiled a (?{..}) expression, return the original
4705 * text too, as a const */
6165f85b
DM
4706 start_force(PL_curforce);
4707 /* XXX probably need a CURMAD(something) here */
4708 NEXTVAL_NEXTTOKE.opval =
9da1dd8f
DM
4709 (OP*)newSVOP(OP_CONST, 0,
4710 newSVpvn(PL_sublex_info.re_eval_start,
4711 PL_bufptr - PL_sublex_info.re_eval_start));
4712 force_next(THING);
4713 PL_sublex_info.re_eval_start = NULL;
4714 PL_expect = XTERM;
4715 return REPORT(',');
4716 }
4717
79072805
LW
4718 /* FALLTHROUGH */
4719 case LEX_INTERPCONCAT:
4720#ifdef DEBUGGING
3280af22 4721 if (PL_lex_brackets)
5637ef5b
NC
4722 Perl_croak(aTHX_ "panic: INTERPCONCAT, lex_brackets=%ld",
4723 (long) PL_lex_brackets);
79072805 4724#endif
3280af22 4725 if (PL_bufptr == PL_bufend)
bbf60fe6 4726 return REPORT(sublex_done());
79072805 4727
9da1dd8f
DM
4728 /* m'foo' still needs to be parsed for possible (?{...}) */
4729 if (SvIVX(PL_linestr) == '\'' && !PL_lex_inpat) {
3280af22 4730 SV *sv = newSVsv(PL_linestr);
9da1dd8f 4731 sv = tokeq(sv);
6154021b 4732 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4733 s = PL_bufend;
79072805
LW
4734 }
4735 else {
3280af22 4736 s = scan_const(PL_bufptr);
79072805 4737 if (*s == '\\')
3280af22 4738 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4739 else
3280af22 4740 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4741 }
4742
3280af22 4743 if (s != PL_bufptr) {
cd81e915 4744 start_force(PL_curforce);
5db06880
NC
4745 if (PL_madskills) {
4746 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4747 }
6154021b 4748 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4749 PL_expect = XTERM;
79072805 4750 force_next(THING);
131b3ad0 4751 if (PL_lex_starts++) {
5db06880
NC
4752#ifdef PERL_MAD
4753 if (PL_madskills) {
cd81e915
NC
4754 if (PL_thistoken)
4755 sv_free(PL_thistoken);
6b29d1f5 4756 PL_thistoken = newSVpvs("");
5db06880
NC
4757 }
4758#endif
131b3ad0
DM
4759 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4760 if (!PL_lex_casemods && PL_lex_inpat)
4761 OPERATOR(',');
4762 else
4763 Aop(OP_CONCAT);
4764 }
79072805 4765 else {
3280af22 4766 PL_bufptr = s;
cea2e8a9 4767 return yylex();
79072805
LW
4768 }
4769 }
4770
cea2e8a9 4771 return yylex();
a0d0e21e 4772 case LEX_FORMLINE:
3280af22
NIS
4773 PL_lex_state = LEX_NORMAL;
4774 s = scan_formline(PL_bufptr);
4775 if (!PL_lex_formbrack)
a0d0e21e
LW
4776 goto rightbracket;
4777 OPERATOR(';');
79072805
LW
4778 }
4779
3280af22
NIS
4780 s = PL_bufptr;
4781 PL_oldoldbufptr = PL_oldbufptr;
4782 PL_oldbufptr = s;
463ee0b2
LW
4783
4784 retry:
5db06880 4785#ifdef PERL_MAD
cd81e915
NC
4786 if (PL_thistoken) {
4787 sv_free(PL_thistoken);
4788 PL_thistoken = 0;
5db06880 4789 }
cd81e915 4790 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4791#endif
378cc40b
LW
4792 switch (*s) {
4793 default:
7e2040f0 4794 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4795 goto keylookup;
b1fc3636 4796 {
e2f06df0
BF
4797 SV *dsv = newSVpvs_flags("", SVs_TEMP);
4798 const char *c = UTF ? savepv(sv_uni_display(dsv, newSVpvn_flags(s,
4799 UTF8SKIP(s),
4800 SVs_TEMP | SVf_UTF8),
4801 10, UNI_DISPLAY_ISPRINT))
4802 : Perl_form(aTHX_ "\\x%02X", (unsigned char)*s);
b1fc3636
CJ
4803 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4804 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4805 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4806 } else {
4807 d = PL_linestart;
4808 }
4809 *s = '\0';
e2f06df0
BF
4810 sv_setpv(dsv, d);
4811 if (UTF)
4812 SvUTF8_on(dsv);
4813 Perl_croak(aTHX_ "Unrecognized character %s; marked by <-- HERE after %"SVf"<-- HERE near column %d", c, SVfARG(dsv), (int) len + 1);
b1fc3636 4814 }
e929a76b
LW
4815 case 4:
4816 case 26:
4817 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4818 case 0:
5db06880
NC
4819#ifdef PERL_MAD
4820 if (PL_madskills)
cd81e915 4821 PL_faketokens = 0;
5db06880 4822#endif
60d63348 4823 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
4824 PL_last_uni = 0;
4825 PL_last_lop = 0;
a7aaec61
Z
4826 if (PL_lex_brackets &&
4827 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4828 yyerror((const char *)
4829 (PL_lex_formbrack
4830 ? "Format not terminated"
4831 : "Missing right curly or square bracket"));
c5ee2135 4832 }
4e553d73 4833 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4834 "### Tokener got EOF\n");
5f80b19c 4835 } );
79072805 4836 TOKEN(0);
463ee0b2 4837 }
3280af22 4838 if (s++ < PL_bufend)
a687059c 4839 goto retry; /* ignore stray nulls */
3280af22
NIS
4840 PL_last_uni = 0;
4841 PL_last_lop = 0;
4842 if (!PL_in_eval && !PL_preambled) {
4843 PL_preambled = TRUE;
5db06880
NC
4844#ifdef PERL_MAD
4845 if (PL_madskills)
cd81e915 4846 PL_faketokens = 1;
5db06880 4847#endif
5ab7ff98
NC
4848 if (PL_perldb) {
4849 /* Generate a string of Perl code to load the debugger.
4850 * If PERL5DB is set, it will return the contents of that,
4851 * otherwise a compile-time require of perl5db.pl. */
4852
4853 const char * const pdb = PerlEnv_getenv("PERL5DB");
4854
4855 if (pdb) {
4856 sv_setpv(PL_linestr, pdb);
4857 sv_catpvs(PL_linestr,";");
4858 } else {
4859 SETERRNO(0,SS_NORMAL);
4860 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4861 }
4862 } else
4863 sv_setpvs(PL_linestr,"");
c62eb204
NC
4864 if (PL_preambleav) {
4865 SV **svp = AvARRAY(PL_preambleav);
4866 SV **const end = svp + AvFILLp(PL_preambleav);
4867 while(svp <= end) {
4868 sv_catsv(PL_linestr, *svp);
4869 ++svp;
396482e1 4870 sv_catpvs(PL_linestr, ";");
91b7def8 4871 }
daba3364 4872 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4873 PL_preambleav = NULL;
91b7def8 4874 }
9f639728
FR
4875 if (PL_minus_E)
4876 sv_catpvs(PL_linestr,
4877 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4878 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4879 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4880 if (PL_minus_l)
396482e1 4881 sv_catpvs(PL_linestr,"chomp;");
3280af22 4882 if (PL_minus_a) {
3280af22 4883 if (PL_minus_F) {
3792a11b
NC
4884 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4885 || *PL_splitstr == '"')
3280af22 4886 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4887 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4888 else {
c8ef6a4b
NC
4889 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4890 bytes can be used as quoting characters. :-) */
dd374669 4891 const char *splits = PL_splitstr;
91d456ae 4892 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4893 do {
4894 /* Need to \ \s */
dd374669
AL
4895 if (*splits == '\\')
4896 sv_catpvn(PL_linestr, splits, 1);
4897 sv_catpvn(PL_linestr, splits, 1);
4898 } while (*splits++);
48c4c863
NC
4899 /* This loop will embed the trailing NUL of
4900 PL_linestr as the last thing it does before
4901 terminating. */
396482e1 4902 sv_catpvs(PL_linestr, ");");
54310121 4903 }
2304df62
AD
4904 }
4905 else
396482e1 4906 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4907 }
79072805 4908 }
396482e1 4909 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4910 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4911 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4912 PL_last_lop = PL_last_uni = NULL;
65269a95 4913 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4914 update_debugger_info(PL_linestr, NULL, 0);
79072805 4915 goto retry;
a687059c 4916 }
e929a76b 4917 do {
580561a3
Z
4918 fake_eof = 0;
4919 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4920 if (0) {
7e28d3af 4921 fake_eof:
f0e67a1d
Z
4922 fake_eof = LEX_FAKE_EOF;
4923 }
4924 PL_bufptr = PL_bufend;
17cc9359 4925 CopLINE_inc(PL_curcop);
f0e67a1d 4926 if (!lex_next_chunk(fake_eof)) {
17cc9359 4927 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4928 s = PL_bufptr;
4929 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4930 }
17cc9359 4931 CopLINE_dec(PL_curcop);
5db06880 4932#ifdef PERL_MAD
f0e67a1d 4933 if (!PL_rsfp)
cd81e915 4934 PL_realtokenstart = -1;
5db06880 4935#endif
f0e67a1d 4936 s = PL_bufptr;
7aa207d6
JH
4937 /* If it looks like the start of a BOM or raw UTF-16,
4938 * check if it in fact is. */
580561a3 4939 if (bof && PL_rsfp &&
7aa207d6
JH
4940 (*s == 0 ||
4941 *(U8*)s == 0xEF ||
4942 *(U8*)s >= 0xFE ||
4943 s[1] == 0)) {
879bc93b
DM
4944 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4945 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4946#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4947 /* offset may include swallowed CR */
4948 if (!bof)
879bc93b 4949 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4950#endif
7e28d3af 4951 if (bof) {
3280af22 4952 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4953 s = swallow_bom((U8*)s);
e929a76b 4954 }
378cc40b 4955 }
737c24fc 4956 if (PL_parser->in_pod) {
a0d0e21e 4957 /* Incest with pod. */
5db06880
NC
4958#ifdef PERL_MAD
4959 if (PL_madskills)
cd81e915 4960 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4961#endif
01a57ef7 4962 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4963 sv_setpvs(PL_linestr, "");
3280af22
NIS
4964 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4965 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4966 PL_last_lop = PL_last_uni = NULL;
737c24fc 4967 PL_parser->in_pod = 0;
a0d0e21e 4968 }
4e553d73 4969 }
60d63348 4970 if (PL_rsfp || PL_parser->filtered)
85613cab 4971 incline(s);
737c24fc 4972 } while (PL_parser->in_pod);
3280af22 4973 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4974 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4975 PL_last_lop = PL_last_uni = NULL;
57843af0 4976 if (CopLINE(PL_curcop) == 1) {
3280af22 4977 while (s < PL_bufend && isSPACE(*s))
79072805 4978 s++;
a0d0e21e 4979 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4980 s++;
5db06880
NC
4981#ifdef PERL_MAD
4982 if (PL_madskills)
cd81e915 4983 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4984#endif
bd61b366 4985 d = NULL;
3280af22 4986 if (!PL_in_eval) {
44a8e56a 4987 if (*s == '#' && *(s+1) == '!')
4988 d = s + 2;
4989#ifdef ALTERNATE_SHEBANG
4990 else {
bfed75c6 4991 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4992 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4993 d = s + (sizeof(as) - 1);
4994 }
4995#endif /* ALTERNATE_SHEBANG */
4996 }
4997 if (d) {
b8378b72 4998 char *ipath;
774d564b 4999 char *ipathend;
b8378b72 5000
774d564b 5001 while (isSPACE(*d))
b8378b72
CS
5002 d++;
5003 ipath = d;
774d564b 5004 while (*d && !isSPACE(*d))
5005 d++;
5006 ipathend = d;
5007
5008#ifdef ARG_ZERO_IS_SCRIPT
5009 if (ipathend > ipath) {
5010 /*
5011 * HP-UX (at least) sets argv[0] to the script name,
5012 * which makes $^X incorrect. And Digital UNIX and Linux,
5013 * at least, set argv[0] to the basename of the Perl
5014 * interpreter. So, having found "#!", we'll set it right.
5015 */
fafc274c
NC
5016 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
5017 SVt_PV)); /* $^X */
774d564b 5018 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 5019 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 5020 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 5021 SvSETMAGIC(x);
5022 }
556c1dec
JH
5023 else {
5024 STRLEN blen;
5025 STRLEN llen;
cfd0369c 5026 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 5027 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
5028 if (llen < blen) {
5029 bstart += blen - llen;
5030 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
5031 sv_setpvn(x, ipath, ipathend - ipath);
5032 SvSETMAGIC(x);
5033 }
5034 }
5035 }
774d564b 5036 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 5037 }
774d564b 5038#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
5039
5040 /*
5041 * Look for options.
5042 */
748a9306 5043 d = instr(s,"perl -");
84e30d1a 5044 if (!d) {
748a9306 5045 d = instr(s,"perl");
84e30d1a
GS
5046#if defined(DOSISH)
5047 /* avoid getting into infinite loops when shebang
5048 * line contains "Perl" rather than "perl" */
5049 if (!d) {
5050 for (d = ipathend-4; d >= ipath; --d) {
5051 if ((*d == 'p' || *d == 'P')
5052 && !ibcmp(d, "perl", 4))
5053 {
5054 break;
5055 }
5056 }
5057 if (d < ipath)
bd61b366 5058 d = NULL;
84e30d1a
GS
5059 }
5060#endif
5061 }
44a8e56a 5062#ifdef ALTERNATE_SHEBANG
5063 /*
5064 * If the ALTERNATE_SHEBANG on this system starts with a
5065 * character that can be part of a Perl expression, then if
5066 * we see it but not "perl", we're probably looking at the
5067 * start of Perl code, not a request to hand off to some
5068 * other interpreter. Similarly, if "perl" is there, but
5069 * not in the first 'word' of the line, we assume the line
5070 * contains the start of the Perl program.
44a8e56a 5071 */
5072 if (d && *s != '#') {
f54cb97a 5073 const char *c = ipath;
44a8e56a 5074 while (*c && !strchr("; \t\r\n\f\v#", *c))
5075 c++;
5076 if (c < d)
bd61b366 5077 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5078 else
5079 *s = '#'; /* Don't try to parse shebang line */
5080 }
774d564b 5081#endif /* ALTERNATE_SHEBANG */
748a9306 5082 if (!d &&
44a8e56a 5083 *s == '#' &&
774d564b 5084 ipathend > ipath &&
3280af22 5085 !PL_minus_c &&
748a9306 5086 !instr(s,"indir") &&
3280af22 5087 instr(PL_origargv[0],"perl"))
748a9306 5088 {
27da23d5 5089 dVAR;
9f68db38 5090 char **newargv;
9f68db38 5091
774d564b 5092 *ipathend = '\0';
5093 s = ipathend + 1;
3280af22 5094 while (s < PL_bufend && isSPACE(*s))
9f68db38 5095 s++;
3280af22 5096 if (s < PL_bufend) {
d85f917e 5097 Newx(newargv,PL_origargc+3,char*);
9f68db38 5098 newargv[1] = s;
3280af22 5099 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5100 s++;
5101 *s = '\0';
3280af22 5102 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5103 }
5104 else
3280af22 5105 newargv = PL_origargv;
774d564b 5106 newargv[0] = ipath;
b35112e7 5107 PERL_FPU_PRE_EXEC
b4748376 5108 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5109 PERL_FPU_POST_EXEC
cea2e8a9 5110 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5111 }
748a9306 5112 if (d) {
c35e046a
AL
5113 while (*d && !isSPACE(*d))
5114 d++;
5115 while (SPACE_OR_TAB(*d))
5116 d++;
748a9306
LW
5117
5118 if (*d++ == '-') {
f54cb97a 5119 const bool switches_done = PL_doswitches;
fb993905
GA
5120 const U32 oldpdb = PL_perldb;
5121 const bool oldn = PL_minus_n;
5122 const bool oldp = PL_minus_p;
c7030b81 5123 const char *d1 = d;
fb993905 5124
8cc95fdb 5125 do {
4ba71d51
FC
5126 bool baduni = FALSE;
5127 if (*d1 == 'C') {
bd0ab00d
NC
5128 const char *d2 = d1 + 1;
5129 if (parse_unicode_opts((const char **)&d2)
5130 != PL_unicode)
5131 baduni = TRUE;
4ba71d51
FC
5132 }
5133 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5134 const char * const m = d1;
5135 while (*d1 && !isSPACE(*d1))
5136 d1++;
cea2e8a9 5137 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5138 (int)(d1 - m), m);
8cc95fdb 5139 }
c7030b81
NC
5140 d1 = moreswitches(d1);
5141 } while (d1);
f0b2cf55
YST
5142 if (PL_doswitches && !switches_done) {
5143 int argc = PL_origargc;
5144 char **argv = PL_origargv;
5145 do {
5146 argc--,argv++;
5147 } while (argc && argv[0][0] == '-' && argv[0][1]);
5148 init_argv_symbols(argc,argv);
5149 }
65269a95 5150 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5151 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5152 /* if we have already added "LINE: while (<>) {",
5153 we must not do it again */
748a9306 5154 {
76f68e9b 5155 sv_setpvs(PL_linestr, "");
3280af22
NIS
5156 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5157 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5158 PL_last_lop = PL_last_uni = NULL;
3280af22 5159 PL_preambled = FALSE;
65269a95 5160 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5161 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5162 goto retry;
5163 }
a0d0e21e 5164 }
79072805 5165 }
9f68db38 5166 }
79072805 5167 }
3280af22
NIS
5168 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5169 PL_bufptr = s;
5170 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5171 return yylex();
ae986130 5172 }
378cc40b 5173 goto retry;
4fdae800 5174 case '\r':
6a27c188 5175#ifdef PERL_STRICT_CR
cea2e8a9 5176 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5177 Perl_croak(aTHX_
cc507455 5178 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5179#endif
4fdae800 5180 case ' ': case '\t': case '\f': case 013:
5db06880 5181#ifdef PERL_MAD
cd81e915 5182 PL_realtokenstart = -1;
ac372eb8
RD
5183 if (!PL_thiswhite)
5184 PL_thiswhite = newSVpvs("");
5185 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5186#endif
ac372eb8 5187 s++;
378cc40b 5188 goto retry;
378cc40b 5189 case '#':
e929a76b 5190 case '\n':
5db06880 5191#ifdef PERL_MAD
cd81e915 5192 PL_realtokenstart = -1;
5db06880 5193 if (PL_madskills)
cd81e915 5194 PL_faketokens = 0;
5db06880 5195#endif
60d63348
FC
5196 if (PL_lex_state != LEX_NORMAL ||
5197 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5198 if (*s == '#' && s == PL_linestart && PL_in_eval
5199 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5200 /* handle eval qq[#line 1 "foo"\n ...] */
5201 CopLINE_dec(PL_curcop);
5202 incline(s);
5203 }
5db06880
NC
5204 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5205 s = SKIPSPACE0(s);
60d63348 5206 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5207 incline(s);
5208 }
5209 else {
5210 d = s;
5211 while (d < PL_bufend && *d != '\n')
5212 d++;
5213 if (d < PL_bufend)
5214 d++;
5215 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5637ef5b
NC
5216 Perl_croak(aTHX_ "panic: input overflow, %p > %p",
5217 d, PL_bufend);
5db06880
NC
5218#ifdef PERL_MAD
5219 if (PL_madskills)
cd81e915 5220 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5221#endif
5222 s = d;
5223 incline(s);
5224 }
3280af22
NIS
5225 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5226 PL_bufptr = s;
5227 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5228 return yylex();
a687059c 5229 }
378cc40b 5230 }
a687059c 5231 else {
5db06880
NC
5232#ifdef PERL_MAD
5233 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5234 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5235 PL_faketokens = 0;
5db06880
NC
5236 s = SKIPSPACE0(s);
5237 TOKEN(PEG); /* make sure any #! line is accessible */
5238 }
5239 s = SKIPSPACE0(s);
5240 }
5241 else {
5242/* if (PL_madskills && PL_lex_formbrack) { */
5243 d = s;
5244 while (d < PL_bufend && *d != '\n')
5245 d++;
5246 if (d < PL_bufend)
5247 d++;
5248 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5249 Perl_croak(aTHX_ "panic: input overflow");
5250 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5251 if (!PL_thiswhite)
6b29d1f5 5252 PL_thiswhite = newSVpvs("");
5db06880 5253 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5254 sv_setpvs(PL_thiswhite, "");
cd81e915 5255 PL_faketokens = 0;
5db06880 5256 }
cd81e915 5257 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5258 }
5259 s = d;
5260/* }
5261 *s = '\0';
5262 PL_bufend = s; */
5263 }
5264#else
378cc40b 5265 *s = '\0';
3280af22 5266 PL_bufend = s;
5db06880 5267#endif
a687059c 5268 }
378cc40b
LW
5269 goto retry;
5270 case '-':
79072805 5271 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5272 I32 ftst = 0;
90771dc0 5273 char tmp;
e5edeb50 5274
378cc40b 5275 s++;
3280af22 5276 PL_bufptr = s;
748a9306
LW
5277 tmp = *s++;
5278
bf4acbe4 5279 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5280 s++;
5281
5282 if (strnEQ(s,"=>",2)) {
3280af22 5283 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5284 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5285 OPERATOR('-'); /* unary minus */
5286 }
3280af22 5287 PL_last_uni = PL_oldbufptr;
748a9306 5288 switch (tmp) {
e5edeb50
JH
5289 case 'r': ftst = OP_FTEREAD; break;
5290 case 'w': ftst = OP_FTEWRITE; break;
5291 case 'x': ftst = OP_FTEEXEC; break;
5292 case 'o': ftst = OP_FTEOWNED; break;
5293 case 'R': ftst = OP_FTRREAD; break;
5294 case 'W': ftst = OP_FTRWRITE; break;
5295 case 'X': ftst = OP_FTREXEC; break;
5296 case 'O': ftst = OP_FTROWNED; break;
5297 case 'e': ftst = OP_FTIS; break;
5298 case 'z': ftst = OP_FTZERO; break;
5299 case 's': ftst = OP_FTSIZE; break;
5300 case 'f': ftst = OP_FTFILE; break;
5301 case 'd': ftst = OP_FTDIR; break;
5302 case 'l': ftst = OP_FTLINK; break;
5303 case 'p': ftst = OP_FTPIPE; break;
5304 case 'S': ftst = OP_FTSOCK; break;
5305 case 'u': ftst = OP_FTSUID; break;
5306 case 'g': ftst = OP_FTSGID; break;
5307 case 'k': ftst = OP_FTSVTX; break;
5308 case 'b': ftst = OP_FTBLK; break;
5309 case 'c': ftst = OP_FTCHR; break;
5310 case 't': ftst = OP_FTTTY; break;
5311 case 'T': ftst = OP_FTTEXT; break;
5312 case 'B': ftst = OP_FTBINARY; break;
5313 case 'M': case 'A': case 'C':
fafc274c 5314 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5315 switch (tmp) {
5316 case 'M': ftst = OP_FTMTIME; break;
5317 case 'A': ftst = OP_FTATIME; break;
5318 case 'C': ftst = OP_FTCTIME; break;
5319 default: break;
5320 }
5321 break;
378cc40b 5322 default:
378cc40b
LW
5323 break;
5324 }
e5edeb50 5325 if (ftst) {
eb160463 5326 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5327 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5328 "### Saw file test %c\n", (int)tmp);
5f80b19c 5329 } );
e5edeb50
JH
5330 FTST(ftst);
5331 }
5332 else {
5333 /* Assume it was a minus followed by a one-letter named
5334 * subroutine call (or a -bareword), then. */
95c31fe3 5335 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5336 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5337 (int) tmp);
5f80b19c 5338 } );
3cf7b4c4 5339 s = --PL_bufptr;
e5edeb50 5340 }
378cc40b 5341 }
90771dc0
NC
5342 {
5343 const char tmp = *s++;
5344 if (*s == tmp) {
5345 s++;
5346 if (PL_expect == XOPERATOR)
5347 TERM(POSTDEC);
5348 else
5349 OPERATOR(PREDEC);
5350 }
5351 else if (*s == '>') {
5352 s++;
29595ff2 5353 s = SKIPSPACE1(s);
90771dc0
NC
5354 if (isIDFIRST_lazy_if(s,UTF)) {
5355 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5356 TOKEN(ARROW);
5357 }
5358 else if (*s == '$')
5359 OPERATOR(ARROW);
5360 else
5361 TERM(ARROW);
5362 }
78cdf107
Z
5363 if (PL_expect == XOPERATOR) {
5364 if (*s == '=' && !PL_lex_allbrackets &&
5365 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5366 s--;
5367 TOKEN(0);
5368 }
90771dc0 5369 Aop(OP_SUBTRACT);
78cdf107 5370 }
90771dc0
NC
5371 else {
5372 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5373 check_uni();
5374 OPERATOR('-'); /* unary minus */
79072805 5375 }
2f3197b3 5376 }
79072805 5377
378cc40b 5378 case '+':
90771dc0
NC
5379 {
5380 const char tmp = *s++;
5381 if (*s == tmp) {
5382 s++;
5383 if (PL_expect == XOPERATOR)
5384 TERM(POSTINC);
5385 else
5386 OPERATOR(PREINC);
5387 }
78cdf107
Z
5388 if (PL_expect == XOPERATOR) {
5389 if (*s == '=' && !PL_lex_allbrackets &&
5390 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5391 s--;
5392 TOKEN(0);
5393 }
90771dc0 5394 Aop(OP_ADD);
78cdf107 5395 }
90771dc0
NC
5396 else {
5397 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5398 check_uni();
5399 OPERATOR('+');
5400 }
2f3197b3 5401 }
a687059c 5402
378cc40b 5403 case '*':
3280af22
NIS
5404 if (PL_expect != XOPERATOR) {
5405 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5406 PL_expect = XOPERATOR;
5407 force_ident(PL_tokenbuf, '*');
5408 if (!*PL_tokenbuf)
a0d0e21e 5409 PREREF('*');
79072805 5410 TERM('*');
a687059c 5411 }
79072805
LW
5412 s++;
5413 if (*s == '*') {
a687059c 5414 s++;
78cdf107
Z
5415 if (*s == '=' && !PL_lex_allbrackets &&
5416 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5417 s -= 2;
5418 TOKEN(0);
5419 }
79072805 5420 PWop(OP_POW);
a687059c 5421 }
78cdf107
Z
5422 if (*s == '=' && !PL_lex_allbrackets &&
5423 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5424 s--;
5425 TOKEN(0);
5426 }
79072805
LW
5427 Mop(OP_MULTIPLY);
5428
378cc40b 5429 case '%':
3280af22 5430 if (PL_expect == XOPERATOR) {
78cdf107
Z
5431 if (s[1] == '=' && !PL_lex_allbrackets &&
5432 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5433 TOKEN(0);
bbce6d69 5434 ++s;
5435 Mop(OP_MODULO);
a687059c 5436 }
3280af22 5437 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5438 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5439 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5440 if (!PL_tokenbuf[1]) {
bbce6d69 5441 PREREF('%');
a687059c 5442 }
3280af22 5443 PL_pending_ident = '%';
bbce6d69 5444 TERM('%');
a687059c 5445
378cc40b 5446 case '^':
78cdf107
Z
5447 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5448 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5449 TOKEN(0);
79072805 5450 s++;
a0d0e21e 5451 BOop(OP_BIT_XOR);
79072805 5452 case '[':
a7aaec61
Z
5453 if (PL_lex_brackets > 100)
5454 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5455 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5456 PL_lex_allbrackets++;
df3467db
IG
5457 {
5458 const char tmp = *s++;
5459 OPERATOR(tmp);
5460 }
378cc40b 5461 case '~':
0d863452 5462 if (s[1] == '~'
3e7dd34d 5463 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5464 {
78cdf107
Z
5465 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5466 TOKEN(0);
0d863452
RH
5467 s += 2;
5468 Eop(OP_SMARTMATCH);
5469 }
78cdf107
Z
5470 s++;
5471 OPERATOR('~');
378cc40b 5472 case ',':
78cdf107
Z
5473 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5474 TOKEN(0);
5475 s++;
5476 OPERATOR(',');
a0d0e21e
LW
5477 case ':':
5478 if (s[1] == ':') {
5479 len = 0;
0bfa2a8a 5480 goto just_a_word_zero_gv;
a0d0e21e
LW
5481 }
5482 s++;
09bef843
SB
5483 switch (PL_expect) {
5484 OP *attrs;
5db06880
NC
5485#ifdef PERL_MAD
5486 I32 stuffstart;
5487#endif
09bef843
SB
5488 case XOPERATOR:
5489 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5490 break;
5491 PL_bufptr = s; /* update in case we back off */
d83f38d8 5492 if (*s == '=') {
2dc78664
NC
5493 Perl_croak(aTHX_
5494 "Use of := for an empty attribute list is not allowed");
d83f38d8 5495 }
09bef843
SB
5496 goto grabattrs;
5497 case XATTRBLOCK:
5498 PL_expect = XBLOCK;
5499 goto grabattrs;
5500 case XATTRTERM:
5501 PL_expect = XTERMBLOCK;
5502 grabattrs:
5db06880
NC
5503#ifdef PERL_MAD
5504 stuffstart = s - SvPVX(PL_linestr) - 1;
5505#endif
29595ff2 5506 s = PEEKSPACE(s);
5f66b61c 5507 attrs = NULL;
7e2040f0 5508 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5509 I32 tmp;
5cc237b8 5510 SV *sv;
09bef843 5511 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5512 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5513 if (tmp < 0) tmp = -tmp;
5514 switch (tmp) {
5515 case KEY_or:
5516 case KEY_and:
5517 case KEY_for:
11baf631 5518 case KEY_foreach:
f9829d6b
GS
5519 case KEY_unless:
5520 case KEY_if:
5521 case KEY_while:
5522 case KEY_until:
5523 goto got_attrs;
5524 default:
5525 break;
5526 }
5527 }
89a5757c 5528 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843 5529 if (*d == '(') {
d24ca0c5 5530 d = scan_str(d,TRUE,TRUE,FALSE);
09bef843 5531 if (!d) {
09bef843
SB
5532 /* MUST advance bufptr here to avoid bogus
5533 "at end of line" context messages from yyerror().
5534 */
5535 PL_bufptr = s + len;
5536 yyerror("Unterminated attribute parameter in attribute list");
5537 if (attrs)
5538 op_free(attrs);
5cc237b8 5539 sv_free(sv);
bbf60fe6 5540 return REPORT(0); /* EOF indicator */
09bef843
SB
5541 }
5542 }
5543 if (PL_lex_stuff) {
09bef843 5544 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5545 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5546 newSVOP(OP_CONST, 0, sv));
5547 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5548 PL_lex_stuff = NULL;
09bef843
SB
5549 }
5550 else {
5cc237b8
BS
5551 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5552 sv_free(sv);
1108974d 5553 if (PL_in_my == KEY_our) {
df9a6019 5554 deprecate(":unique");
1108974d 5555 }
bfed75c6 5556 else
371fce9b
DM
5557 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5558 }
5559
d3cea301
SB
5560 /* NOTE: any CV attrs applied here need to be part of
5561 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5562 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5563 sv_free(sv);
78f9721b 5564 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5565 }
5566 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5567 sv_free(sv);
8e5dadda 5568 deprecate(":locked");
5cc237b8
BS
5569 }
5570 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5571 sv_free(sv);
78f9721b 5572 CvMETHOD_on(PL_compcv);
5cc237b8 5573 }
78f9721b
SM
5574 /* After we've set the flags, it could be argued that
5575 we don't need to do the attributes.pm-based setting
5576 process, and shouldn't bother appending recognized
d3cea301
SB
5577 flags. To experiment with that, uncomment the
5578 following "else". (Note that's already been
5579 uncommented. That keeps the above-applied built-in
5580 attributes from being intercepted (and possibly
5581 rejected) by a package's attribute routines, but is
5582 justified by the performance win for the common case
5583 of applying only built-in attributes.) */
0256094b 5584 else
2fcb4757 5585 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5586 newSVOP(OP_CONST, 0,
5cc237b8 5587 sv));
09bef843 5588 }
29595ff2 5589 s = PEEKSPACE(d);
0120eecf 5590 if (*s == ':' && s[1] != ':')
29595ff2 5591 s = PEEKSPACE(s+1);
0120eecf
GS
5592 else if (s == d)
5593 break; /* require real whitespace or :'s */
29595ff2 5594 /* XXX losing whitespace on sequential attributes here */
09bef843 5595 }
90771dc0
NC
5596 {
5597 const char tmp
5598 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5599 if (*s != ';' && *s != '}' && *s != tmp
5600 && (tmp != '=' || *s != ')')) {
5601 const char q = ((*s == '\'') ? '"' : '\'');
5602 /* If here for an expression, and parsed no attrs, back
5603 off. */
5604 if (tmp == '=' && !attrs) {
5605 s = PL_bufptr;
5606 break;
5607 }
5608 /* MUST advance bufptr here to avoid bogus "at end of line"
5609 context messages from yyerror().
5610 */
5611 PL_bufptr = s;
10edeb5d
JH
5612 yyerror( (const char *)
5613 (*s
5614 ? Perl_form(aTHX_ "Invalid separator character "
5615 "%c%c%c in attribute list", q, *s, q)
5616 : "Unterminated attribute list" ) );
90771dc0
NC
5617 if (attrs)
5618 op_free(attrs);
5619 OPERATOR(':');
09bef843 5620 }
09bef843 5621 }
f9829d6b 5622 got_attrs:
09bef843 5623 if (attrs) {
cd81e915 5624 start_force(PL_curforce);
9ded7720 5625 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5626 CURMAD('_', PL_nextwhite);
89122651 5627 force_next(THING);
5db06880
NC
5628 }
5629#ifdef PERL_MAD
5630 if (PL_madskills) {
cd81e915 5631 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5632 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5633 }
5db06880 5634#endif
09bef843
SB
5635 TOKEN(COLONATTR);
5636 }
78cdf107
Z
5637 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5638 s--;
5639 TOKEN(0);
5640 }
5641 PL_lex_allbrackets--;
a0d0e21e 5642 OPERATOR(':');
8990e307
LW
5643 case '(':
5644 s++;
3280af22
NIS
5645 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5646 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5647 else
3280af22 5648 PL_expect = XTERM;
29595ff2 5649 s = SKIPSPACE1(s);
78cdf107 5650 PL_lex_allbrackets++;
a0d0e21e 5651 TOKEN('(');
378cc40b 5652 case ';':
78cdf107
Z
5653 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5654 TOKEN(0);
f4dd75d9 5655 CLINE;
78cdf107
Z
5656 s++;
5657 OPERATOR(';');
378cc40b 5658 case ')':
78cdf107
Z
5659 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5660 TOKEN(0);
5661 s++;
5662 PL_lex_allbrackets--;
5663 s = SKIPSPACE1(s);
5664 if (*s == '{')
5665 PREBLOCK(')');
5666 TERM(')');
79072805 5667 case ']':
a7aaec61
Z
5668 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5669 TOKEN(0);
79072805 5670 s++;
3280af22 5671 if (PL_lex_brackets <= 0)
d98d5fff 5672 yyerror("Unmatched right square bracket");
463ee0b2 5673 else
3280af22 5674 --PL_lex_brackets;
78cdf107 5675 PL_lex_allbrackets--;
3280af22
NIS
5676 if (PL_lex_state == LEX_INTERPNORMAL) {
5677 if (PL_lex_brackets == 0) {
02255c60
FC
5678 if (*s == '-' && s[1] == '>')
5679 PL_lex_state = LEX_INTERPENDMAYBE;
5680 else if (*s != '[' && *s != '{')
3280af22 5681 PL_lex_state = LEX_INTERPEND;
79072805
LW
5682 }
5683 }
4633a7c4 5684 TERM(']');
79072805
LW
5685 case '{':
5686 leftbracket:
79072805 5687 s++;
3280af22 5688 if (PL_lex_brackets > 100) {
8edd5f42 5689 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5690 }
3280af22 5691 switch (PL_expect) {
a0d0e21e 5692 case XTERM:
3280af22 5693 if (PL_lex_formbrack) {
a0d0e21e
LW
5694 s--;
5695 PRETERMBLOCK(DO);
5696 }
3280af22
NIS
5697 if (PL_oldoldbufptr == PL_last_lop)
5698 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5699 else
3280af22 5700 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5701 PL_lex_allbrackets++;
79072805 5702 OPERATOR(HASHBRACK);
a0d0e21e 5703 case XOPERATOR:
bf4acbe4 5704 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5705 s++;
44a8e56a 5706 d = s;
3280af22
NIS
5707 PL_tokenbuf[0] = '\0';
5708 if (d < PL_bufend && *d == '-') {
5709 PL_tokenbuf[0] = '-';
44a8e56a 5710 d++;
bf4acbe4 5711 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5712 d++;
5713 }
7e2040f0 5714 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5715 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5716 FALSE, &len);
bf4acbe4 5717 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5718 d++;
5719 if (*d == '}') {
f54cb97a 5720 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5721 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5722 if (minus)
5723 force_next('-');
748a9306
LW
5724 }
5725 }
5726 /* FALL THROUGH */
09bef843 5727 case XATTRBLOCK:
748a9306 5728 case XBLOCK:
3280af22 5729 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5730 PL_lex_allbrackets++;
3280af22 5731 PL_expect = XSTATE;
a0d0e21e 5732 break;
09bef843 5733 case XATTRTERM:
a0d0e21e 5734 case XTERMBLOCK:
3280af22 5735 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5736 PL_lex_allbrackets++;
3280af22 5737 PL_expect = XSTATE;
a0d0e21e
LW
5738 break;
5739 default: {
f54cb97a 5740 const char *t;
3280af22
NIS
5741 if (PL_oldoldbufptr == PL_last_lop)
5742 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5743 else
3280af22 5744 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5745 PL_lex_allbrackets++;
29595ff2 5746 s = SKIPSPACE1(s);
8452ff4b
SB
5747 if (*s == '}') {
5748 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5749 PL_expect = XTERM;
5750 /* This hack is to get the ${} in the message. */
5751 PL_bufptr = s+1;
5752 yyerror("syntax error");
5753 break;
5754 }
a0d0e21e 5755 OPERATOR(HASHBRACK);
8452ff4b 5756 }
b8a4b1be
GS
5757 /* This hack serves to disambiguate a pair of curlies
5758 * as being a block or an anon hash. Normally, expectation
5759 * determines that, but in cases where we're not in a
5760 * position to expect anything in particular (like inside
5761 * eval"") we have to resolve the ambiguity. This code
5762 * covers the case where the first term in the curlies is a
5763 * quoted string. Most other cases need to be explicitly
a0288114 5764 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5765 * curly in order to force resolution as an anon hash.
5766 *
5767 * XXX should probably propagate the outer expectation
5768 * into eval"" to rely less on this hack, but that could
5769 * potentially break current behavior of eval"".
5770 * GSAR 97-07-21
5771 */
5772 t = s;
5773 if (*s == '\'' || *s == '"' || *s == '`') {
5774 /* common case: get past first string, handling escapes */
3280af22 5775 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5776 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5777 t++;
5778 t++;
a0d0e21e 5779 }
b8a4b1be 5780 else if (*s == 'q') {
3280af22 5781 if (++t < PL_bufend
b8a4b1be 5782 && (!isALNUM(*t)
3280af22 5783 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5784 && !isALNUM(*t))))
5785 {
abc667d1 5786 /* skip q//-like construct */
f54cb97a 5787 const char *tmps;
b8a4b1be
GS
5788 char open, close, term;
5789 I32 brackets = 1;
5790
3280af22 5791 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5792 t++;
abc667d1
DM
5793 /* check for q => */
5794 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5795 OPERATOR(HASHBRACK);
5796 }
b8a4b1be
GS
5797 term = *t;
5798 open = term;
5799 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5800 term = tmps[5];
5801 close = term;
5802 if (open == close)
3280af22
NIS
5803 for (t++; t < PL_bufend; t++) {
5804 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5805 t++;
6d07e5e9 5806 else if (*t == open)
b8a4b1be
GS
5807 break;
5808 }
abc667d1 5809 else {
3280af22
NIS
5810 for (t++; t < PL_bufend; t++) {
5811 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5812 t++;
6d07e5e9 5813 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5814 break;
5815 else if (*t == open)
5816 brackets++;
5817 }
abc667d1
DM
5818 }
5819 t++;
b8a4b1be 5820 }
abc667d1
DM
5821 else
5822 /* skip plain q word */
5823 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5824 t += UTF8SKIP(t);
a0d0e21e 5825 }
7e2040f0 5826 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5827 t += UTF8SKIP(t);
7e2040f0 5828 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5829 t += UTF8SKIP(t);
a0d0e21e 5830 }
3280af22 5831 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5832 t++;
b8a4b1be
GS
5833 /* if comma follows first term, call it an anon hash */
5834 /* XXX it could be a comma expression with loop modifiers */
3280af22 5835 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5836 || (*t == '=' && t[1] == '>')))
a0d0e21e 5837 OPERATOR(HASHBRACK);
3280af22 5838 if (PL_expect == XREF)
4e4e412b 5839 PL_expect = XTERM;
a0d0e21e 5840 else {
3280af22
NIS
5841 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5842 PL_expect = XSTATE;
a0d0e21e 5843 }
8990e307 5844 }
a0d0e21e 5845 break;
463ee0b2 5846 }
6154021b 5847 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5848 if (isSPACE(*s) || *s == '#')
3280af22 5849 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5850 TOKEN('{');
378cc40b 5851 case '}':
a7aaec61
Z
5852 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5853 TOKEN(0);
79072805
LW
5854 rightbracket:
5855 s++;
3280af22 5856 if (PL_lex_brackets <= 0)
d98d5fff 5857 yyerror("Unmatched right curly bracket");
463ee0b2 5858 else
3280af22 5859 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5860 PL_lex_allbrackets--;
c2e66d9e 5861 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5862 PL_lex_formbrack = 0;
5863 if (PL_lex_state == LEX_INTERPNORMAL) {
5864 if (PL_lex_brackets == 0) {
9059aa12
LW
5865 if (PL_expect & XFAKEBRACK) {
5866 PL_expect &= XENUMMASK;
3280af22
NIS
5867 PL_lex_state = LEX_INTERPEND;
5868 PL_bufptr = s;
5db06880
NC
5869#if 0
5870 if (PL_madskills) {
cd81e915 5871 if (!PL_thiswhite)
6b29d1f5 5872 PL_thiswhite = newSVpvs("");
76f68e9b 5873 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5874 }
5875#endif
cea2e8a9 5876 return yylex(); /* ignore fake brackets */
79072805 5877 }
fa83b5b6 5878 if (*s == '-' && s[1] == '>')
3280af22 5879 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5880 else if (*s != '[' && *s != '{')
3280af22 5881 PL_lex_state = LEX_INTERPEND;
79072805
LW
5882 }
5883 }
9059aa12
LW
5884 if (PL_expect & XFAKEBRACK) {
5885 PL_expect &= XENUMMASK;
3280af22 5886 PL_bufptr = s;
cea2e8a9 5887 return yylex(); /* ignore fake brackets */
748a9306 5888 }
cd81e915 5889 start_force(PL_curforce);
5db06880
NC
5890 if (PL_madskills) {
5891 curmad('X', newSVpvn(s-1,1));
cd81e915 5892 CURMAD('_', PL_thiswhite);
5db06880 5893 }
79072805 5894 force_next('}');
5db06880 5895#ifdef PERL_MAD
cd81e915 5896 if (!PL_thistoken)
6b29d1f5 5897 PL_thistoken = newSVpvs("");
5db06880 5898#endif
79072805 5899 TOKEN(';');
378cc40b
LW
5900 case '&':
5901 s++;
78cdf107
Z
5902 if (*s++ == '&') {
5903 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5904 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5905 s -= 2;
5906 TOKEN(0);
5907 }
a0d0e21e 5908 AOPERATOR(ANDAND);
78cdf107 5909 }
378cc40b 5910 s--;
3280af22 5911 if (PL_expect == XOPERATOR) {
041457d9
DM
5912 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5913 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5914 {
57843af0 5915 CopLINE_dec(PL_curcop);
f1f66076 5916 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5917 CopLINE_inc(PL_curcop);
463ee0b2 5918 }
78cdf107
Z
5919 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5920 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5921 s--;
5922 TOKEN(0);
5923 }
79072805 5924 BAop(OP_BIT_AND);
463ee0b2 5925 }
79072805 5926
3280af22
NIS
5927 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5928 if (*PL_tokenbuf) {
5929 PL_expect = XOPERATOR;
5930 force_ident(PL_tokenbuf, '&');
463ee0b2 5931 }
79072805
LW
5932 else
5933 PREREF('&');
6154021b 5934 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5935 TERM('&');
5936
378cc40b
LW
5937 case '|':
5938 s++;
78cdf107
Z
5939 if (*s++ == '|') {
5940 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5941 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5942 s -= 2;
5943 TOKEN(0);
5944 }
a0d0e21e 5945 AOPERATOR(OROR);
78cdf107 5946 }
378cc40b 5947 s--;
78cdf107
Z
5948 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5949 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5950 s--;
5951 TOKEN(0);
5952 }
79072805 5953 BOop(OP_BIT_OR);
378cc40b
LW
5954 case '=':
5955 s++;
748a9306 5956 {
90771dc0 5957 const char tmp = *s++;
78cdf107
Z
5958 if (tmp == '=') {
5959 if (!PL_lex_allbrackets &&
5960 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5961 s -= 2;
5962 TOKEN(0);
5963 }
90771dc0 5964 Eop(OP_EQ);
78cdf107
Z
5965 }
5966 if (tmp == '>') {
5967 if (!PL_lex_allbrackets &&
5968 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5969 s -= 2;
5970 TOKEN(0);
5971 }
90771dc0 5972 OPERATOR(',');
78cdf107 5973 }
90771dc0
NC
5974 if (tmp == '~')
5975 PMop(OP_MATCH);
5976 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5977 && strchr("+-*/%.^&|<",tmp))
5978 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5979 "Reversed %c= operator",(int)tmp);
5980 s--;
5981 if (PL_expect == XSTATE && isALPHA(tmp) &&
5982 (s == PL_linestart+1 || s[-2] == '\n') )
5983 {
60d63348 5984 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
90771dc0
NC
5985 d = PL_bufend;
5986 while (s < d) {
5987 if (*s++ == '\n') {
5988 incline(s);
5989 if (strnEQ(s,"=cut",4)) {
5990 s = strchr(s,'\n');
5991 if (s)
5992 s++;
5993 else
5994 s = d;
5995 incline(s);
5996 goto retry;
5997 }
5998 }
a5f75d66 5999 }
90771dc0 6000 goto retry;
a5f75d66 6001 }
5db06880
NC
6002#ifdef PERL_MAD
6003 if (PL_madskills) {
cd81e915 6004 if (!PL_thiswhite)
6b29d1f5 6005 PL_thiswhite = newSVpvs("");
cd81e915 6006 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
6007 PL_bufend - PL_linestart);
6008 }
6009#endif
90771dc0 6010 s = PL_bufend;
737c24fc 6011 PL_parser->in_pod = 1;
90771dc0 6012 goto retry;
a5f75d66 6013 }
a0d0e21e 6014 }
3280af22 6015 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 6016 const char *t = s;
51882d45 6017#ifdef PERL_STRICT_CR
c35e046a 6018 while (SPACE_OR_TAB(*t))
51882d45 6019#else
c35e046a 6020 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 6021#endif
c35e046a 6022 t++;
a0d0e21e
LW
6023 if (*t == '\n' || *t == '#') {
6024 s--;
3280af22 6025 PL_expect = XBLOCK;
a0d0e21e
LW
6026 goto leftbracket;
6027 }
79072805 6028 }
78cdf107
Z
6029 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6030 s--;
6031 TOKEN(0);
6032 }
6154021b 6033 pl_yylval.ival = 0;
a0d0e21e 6034 OPERATOR(ASSIGNOP);
378cc40b
LW
6035 case '!':
6036 s++;
90771dc0
NC
6037 {
6038 const char tmp = *s++;
6039 if (tmp == '=') {
6040 /* was this !=~ where !~ was meant?
6041 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
6042
6043 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
6044 const char *t = s+1;
6045
6046 while (t < PL_bufend && isSPACE(*t))
6047 ++t;
6048
6049 if (*t == '/' || *t == '?' ||
6050 ((*t == 'm' || *t == 's' || *t == 'y')
6051 && !isALNUM(t[1])) ||
6052 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
6053 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6054 "!=~ should be !~");
6055 }
78cdf107
Z
6056 if (!PL_lex_allbrackets &&
6057 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6058 s -= 2;
6059 TOKEN(0);
6060 }
90771dc0
NC
6061 Eop(OP_NE);
6062 }
6063 if (tmp == '~')
6064 PMop(OP_NOT);
6065 }
378cc40b
LW
6066 s--;
6067 OPERATOR('!');
6068 case '<':
3280af22 6069 if (PL_expect != XOPERATOR) {
93a17b20 6070 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 6071 check_uni();
79072805
LW
6072 if (s[1] == '<')
6073 s = scan_heredoc(s);
6074 else
6075 s = scan_inputsymbol(s);
6076 TERM(sublex_start());
378cc40b
LW
6077 }
6078 s++;
90771dc0
NC
6079 {
6080 char tmp = *s++;
78cdf107
Z
6081 if (tmp == '<') {
6082 if (*s == '=' && !PL_lex_allbrackets &&
6083 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6084 s -= 2;
6085 TOKEN(0);
6086 }
90771dc0 6087 SHop(OP_LEFT_SHIFT);
78cdf107 6088 }
90771dc0
NC
6089 if (tmp == '=') {
6090 tmp = *s++;
78cdf107
Z
6091 if (tmp == '>') {
6092 if (!PL_lex_allbrackets &&
6093 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6094 s -= 3;
6095 TOKEN(0);
6096 }
90771dc0 6097 Eop(OP_NCMP);
78cdf107 6098 }
90771dc0 6099 s--;
78cdf107
Z
6100 if (!PL_lex_allbrackets &&
6101 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6102 s -= 2;
6103 TOKEN(0);
6104 }
90771dc0
NC
6105 Rop(OP_LE);
6106 }
395c3793 6107 }
378cc40b 6108 s--;
78cdf107
Z
6109 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6110 s--;
6111 TOKEN(0);
6112 }
79072805 6113 Rop(OP_LT);
378cc40b
LW
6114 case '>':
6115 s++;
90771dc0
NC
6116 {
6117 const char tmp = *s++;
78cdf107
Z
6118 if (tmp == '>') {
6119 if (*s == '=' && !PL_lex_allbrackets &&
6120 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6121 s -= 2;
6122 TOKEN(0);
6123 }
90771dc0 6124 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6125 }
6126 else if (tmp == '=') {
6127 if (!PL_lex_allbrackets &&
6128 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6129 s -= 2;
6130 TOKEN(0);
6131 }
90771dc0 6132 Rop(OP_GE);
78cdf107 6133 }
90771dc0 6134 }
378cc40b 6135 s--;
78cdf107
Z
6136 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6137 s--;
6138 TOKEN(0);
6139 }
79072805 6140 Rop(OP_GT);
378cc40b
LW
6141
6142 case '$':
bbce6d69 6143 CLINE;
6144
3280af22
NIS
6145 if (PL_expect == XOPERATOR) {
6146 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6147 return deprecate_commaless_var_list();
a0d0e21e 6148 }
8990e307 6149 }
a0d0e21e 6150
c0b977fd 6151 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6152 PL_tokenbuf[0] = '@';
376b8730
SM
6153 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6154 sizeof PL_tokenbuf - 1, FALSE);
6155 if (PL_expect == XOPERATOR)
6156 no_op("Array length", s);
3280af22 6157 if (!PL_tokenbuf[1])
a0d0e21e 6158 PREREF(DOLSHARP);
3280af22
NIS
6159 PL_expect = XOPERATOR;
6160 PL_pending_ident = '#';
463ee0b2 6161 TOKEN(DOLSHARP);
79072805 6162 }
bbce6d69 6163
3280af22 6164 PL_tokenbuf[0] = '$';
376b8730
SM
6165 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6166 sizeof PL_tokenbuf - 1, FALSE);
6167 if (PL_expect == XOPERATOR)
6168 no_op("Scalar", s);
3280af22
NIS
6169 if (!PL_tokenbuf[1]) {
6170 if (s == PL_bufend)
bbce6d69 6171 yyerror("Final $ should be \\$ or $name");
6172 PREREF('$');
8990e307 6173 }
a0d0e21e 6174
ff68c719 6175 d = s;
90771dc0
NC
6176 {
6177 const char tmp = *s;
ae28bb2a 6178 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6179 s = SKIPSPACE1(s);
ff68c719 6180
90771dc0
NC
6181 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6182 && intuit_more(s)) {
6183 if (*s == '[') {
6184 PL_tokenbuf[0] = '@';
6185 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6186 char *t = s+1;
6187
6188 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6189 t++;
90771dc0 6190 if (*t++ == ',') {
29595ff2 6191 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6192 while (t < PL_bufend && *t != ']')
6193 t++;
9014280d 6194 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6195 "Multidimensional syntax %.*s not supported",
36c7798d 6196 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6197 }
748a9306 6198 }
93a17b20 6199 }
90771dc0
NC
6200 else if (*s == '{') {
6201 char *t;
6202 PL_tokenbuf[0] = '%';
6203 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6204 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6205 {
6206 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6207 do {
6208 t++;
6209 } while (isSPACE(*t));
90771dc0 6210 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6211 STRLEN len;
90771dc0 6212 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6213 &len);
c35e046a
AL
6214 while (isSPACE(*t))
6215 t++;
4c01a014
BF
6216 if (*t == ';'
6217 && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
90771dc0 6218 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
4c01a014
BF
6219 "You need to quote \"%"SVf"\"",
6220 SVfARG(newSVpvn_flags(tmpbuf, len,
6221 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
90771dc0
NC
6222 }
6223 }
6224 }
93a17b20 6225 }
bbce6d69 6226
90771dc0
NC
6227 PL_expect = XOPERATOR;
6228 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6229 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6230 if (!islop || PL_last_lop_op == OP_GREPSTART)
6231 PL_expect = XOPERATOR;
6232 else if (strchr("$@\"'`q", *s))
6233 PL_expect = XTERM; /* e.g. print $fh "foo" */
6234 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6235 PL_expect = XTERM; /* e.g. print $fh &sub */
6236 else if (isIDFIRST_lazy_if(s,UTF)) {
6237 char tmpbuf[sizeof PL_tokenbuf];
6238 int t2;
6239 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6240 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6241 /* binary operators exclude handle interpretations */
6242 switch (t2) {
6243 case -KEY_x:
6244 case -KEY_eq:
6245 case -KEY_ne:
6246 case -KEY_gt:
6247 case -KEY_lt:
6248 case -KEY_ge:
6249 case -KEY_le:
6250 case -KEY_cmp:
6251 break;
6252 default:
6253 PL_expect = XTERM; /* e.g. print $fh length() */
6254 break;
6255 }
6256 }
6257 else {
6258 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6259 }
6260 }
90771dc0
NC
6261 else if (isDIGIT(*s))
6262 PL_expect = XTERM; /* e.g. print $fh 3 */
6263 else if (*s == '.' && isDIGIT(s[1]))
6264 PL_expect = XTERM; /* e.g. print $fh .3 */
6265 else if ((*s == '?' || *s == '-' || *s == '+')
6266 && !isSPACE(s[1]) && s[1] != '=')
6267 PL_expect = XTERM; /* e.g. print $fh -1 */
6268 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6269 && s[1] != '/')
6270 PL_expect = XTERM; /* e.g. print $fh /.../
6271 XXX except DORDOR operator
6272 */
6273 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6274 && s[2] != '=')
6275 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6276 }
bbce6d69 6277 }
3280af22 6278 PL_pending_ident = '$';
79072805 6279 TOKEN('$');
378cc40b
LW
6280
6281 case '@':
3280af22 6282 if (PL_expect == XOPERATOR)
bbce6d69 6283 no_op("Array", s);
3280af22
NIS
6284 PL_tokenbuf[0] = '@';
6285 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6286 if (!PL_tokenbuf[1]) {
bbce6d69 6287 PREREF('@');
6288 }
3280af22 6289 if (PL_lex_state == LEX_NORMAL)
29595ff2 6290 s = SKIPSPACE1(s);
3280af22 6291 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6292 if (*s == '{')
3280af22 6293 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6294
6295 /* Warn about @ where they meant $. */
041457d9
DM
6296 if (*s == '[' || *s == '{') {
6297 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6298 const char *t = s + 1;
7e2040f0 6299 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
b9e186cd 6300 t += UTF ? UTF8SKIP(t) : 1;
a0d0e21e
LW
6301 if (*t == '}' || *t == ']') {
6302 t++;
29595ff2 6303 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
dcbac5bb 6304 /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
9014280d 6305 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
b9e186cd
BF
6306 "Scalar value %"SVf" better written as $%"SVf,
6307 SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
6308 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
6309 SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
6310 SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
a0d0e21e 6311 }
93a17b20
LW
6312 }
6313 }
463ee0b2 6314 }
3280af22 6315 PL_pending_ident = '@';
79072805 6316 TERM('@');
378cc40b 6317
c963b151 6318 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6319 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6320 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6321 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6322 TOKEN(0);
6f33ba73
RGS
6323 s += 2;
6324 AOPERATOR(DORDOR);
6325 }
c963b151 6326 case '?': /* may either be conditional or pattern */
be25f609 6327 if (PL_expect == XOPERATOR) {
90771dc0 6328 char tmp = *s++;
c963b151 6329 if(tmp == '?') {
78cdf107
Z
6330 if (!PL_lex_allbrackets &&
6331 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6332 s--;
6333 TOKEN(0);
6334 }
6335 PL_lex_allbrackets++;
be25f609 6336 OPERATOR('?');
c963b151
BD
6337 }
6338 else {
6339 tmp = *s++;
6340 if(tmp == '/') {
6341 /* A // operator. */
78cdf107
Z
6342 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6343 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6344 LEX_FAKEEOF_LOGIC)) {
6345 s -= 2;
6346 TOKEN(0);
6347 }
c963b151
BD
6348 AOPERATOR(DORDOR);
6349 }
6350 else {
6351 s--;
78cdf107
Z
6352 if (*s == '=' && !PL_lex_allbrackets &&
6353 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6354 s--;
6355 TOKEN(0);
6356 }
c963b151
BD
6357 Mop(OP_DIVIDE);
6358 }
6359 }
6360 }
6361 else {
6362 /* Disable warning on "study /blah/" */
6363 if (PL_oldoldbufptr == PL_last_uni
6364 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6365 || memNE(PL_last_uni, "study", 5)
6366 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6367 ))
6368 check_uni();
725a61d7
Z
6369 if (*s == '?')
6370 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6371 s = scan_pat(s,OP_MATCH);
6372 TERM(sublex_start());
6373 }
378cc40b
LW
6374
6375 case '.':
51882d45
GS
6376 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6377#ifdef PERL_STRICT_CR
6378 && s[1] == '\n'
6379#else
6380 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6381#endif
6382 && (s == PL_linestart || s[-1] == '\n') )
6383 {
3280af22
NIS
6384 PL_lex_formbrack = 0;
6385 PL_expect = XSTATE;
79072805
LW
6386 goto rightbracket;
6387 }
be25f609 6388 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6389 s += 3;
6390 OPERATOR(YADAYADA);
6391 }
3280af22 6392 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6393 char tmp = *s++;
a687059c 6394 if (*s == tmp) {
78cdf107
Z
6395 if (!PL_lex_allbrackets &&
6396 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6397 s--;
6398 TOKEN(0);
6399 }
a687059c 6400 s++;
2f3197b3
LW
6401 if (*s == tmp) {
6402 s++;
6154021b 6403 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6404 }
6405 else
6154021b 6406 pl_yylval.ival = 0;
378cc40b 6407 OPERATOR(DOTDOT);
a687059c 6408 }
78cdf107
Z
6409 if (*s == '=' && !PL_lex_allbrackets &&
6410 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6411 s--;
6412 TOKEN(0);
6413 }
79072805 6414 Aop(OP_CONCAT);
378cc40b
LW
6415 }
6416 /* FALL THROUGH */
6417 case '0': case '1': case '2': case '3': case '4':
6418 case '5': case '6': case '7': case '8': case '9':
6154021b 6419 s = scan_num(s, &pl_yylval);
931e0695 6420 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6421 if (PL_expect == XOPERATOR)
8990e307 6422 no_op("Number",s);
79072805
LW
6423 TERM(THING);
6424
6425 case '\'':
d24ca0c5 6426 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6427 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6428 if (PL_expect == XOPERATOR) {
6429 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6430 return deprecate_commaless_var_list();
a0d0e21e 6431 }
463ee0b2 6432 else
8990e307 6433 no_op("String",s);
463ee0b2 6434 }
79072805 6435 if (!s)
d4c19fe8 6436 missingterm(NULL);
6154021b 6437 pl_yylval.ival = OP_CONST;
79072805
LW
6438 TERM(sublex_start());
6439
6440 case '"':
d24ca0c5 6441 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6442 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6443 if (PL_expect == XOPERATOR) {
6444 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6445 return deprecate_commaless_var_list();
a0d0e21e 6446 }
463ee0b2 6447 else
8990e307 6448 no_op("String",s);
463ee0b2 6449 }
79072805 6450 if (!s)
d4c19fe8 6451 missingterm(NULL);
6154021b 6452 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6453 /* FIXME. I think that this can be const if char *d is replaced by
6454 more localised variables. */
3280af22 6455 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6456 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6457 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6458 break;
6459 }
6460 }
79072805
LW
6461 TERM(sublex_start());
6462
6463 case '`':
d24ca0c5 6464 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
931e0695 6465 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6466 if (PL_expect == XOPERATOR)
8990e307 6467 no_op("Backticks",s);
79072805 6468 if (!s)
d4c19fe8 6469 missingterm(NULL);
9b201d7d 6470 readpipe_override();
79072805
LW
6471 TERM(sublex_start());
6472
6473 case '\\':
6474 s++;
a2a5de95
NC
6475 if (PL_lex_inwhat && isDIGIT(*s))
6476 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6477 *s, *s);
3280af22 6478 if (PL_expect == XOPERATOR)
8990e307 6479 no_op("Backslash",s);
79072805
LW
6480 OPERATOR(REFGEN);
6481
a7cb1f99 6482 case 'v':
e526c9e6 6483 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6484 char *start = s + 2;
dd629d5b 6485 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6486 start++;
6487 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6488 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6489 TERM(THING);
6490 }
e526c9e6 6491 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6492 else if (!isALPHA(*start) && (PL_expect == XTERM
6493 || PL_expect == XREF || PL_expect == XSTATE
6494 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6495 GV *const gv = gv_fetchpvn_flags(s, start - s,
6496 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6497 if (!gv) {
6154021b 6498 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6499 TERM(THING);
6500 }
6501 }
a7cb1f99
GS
6502 }
6503 goto keylookup;
79072805 6504 case 'x':
3280af22 6505 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6506 s++;
6507 Mop(OP_REPEAT);
2f3197b3 6508 }
79072805
LW
6509 goto keylookup;
6510
378cc40b 6511 case '_':
79072805
LW
6512 case 'a': case 'A':
6513 case 'b': case 'B':
6514 case 'c': case 'C':
6515 case 'd': case 'D':
6516 case 'e': case 'E':
6517 case 'f': case 'F':
6518 case 'g': case 'G':
6519 case 'h': case 'H':
6520 case 'i': case 'I':
6521 case 'j': case 'J':
6522 case 'k': case 'K':
6523 case 'l': case 'L':
6524 case 'm': case 'M':
6525 case 'n': case 'N':
6526 case 'o': case 'O':
6527 case 'p': case 'P':
6528 case 'q': case 'Q':
6529 case 'r': case 'R':
6530 case 's': case 'S':
6531 case 't': case 'T':
6532 case 'u': case 'U':
a7cb1f99 6533 case 'V':
79072805
LW
6534 case 'w': case 'W':
6535 case 'X':
6536 case 'y': case 'Y':
6537 case 'z': case 'Z':
6538
49dc05e3 6539 keylookup: {
88e1f1a2 6540 bool anydelim;
90771dc0 6541 I32 tmp;
10edeb5d
JH
6542
6543 orig_keyword = 0;
6544 gv = NULL;
6545 gvp = NULL;
49dc05e3 6546
3280af22
NIS
6547 PL_bufptr = s;
6548 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6549
6550 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6551 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6552
6553 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6554 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6555 goto just_a_word;
6556
3643fb5f 6557 d = s;
3280af22 6558 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6559 d++; /* no comments skipped here, or s### is misparsed */
6560
748a9306 6561 /* Is this a word before a => operator? */
1c3923b3 6562 if (*d == '=' && d[1] == '>') {
748a9306 6563 CLINE;
6154021b 6564 pl_yylval.opval
d0a148a6
NC
6565 = (OP*)newSVOP(OP_CONST, 0,
6566 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6567 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6568 TERM(WORD);
6569 }
6570
88e1f1a2
JV
6571 /* Check for plugged-in keyword */
6572 {
6573 OP *o;
6574 int result;
6575 char *saved_bufptr = PL_bufptr;
6576 PL_bufptr = s;
16c91539 6577 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6578 s = PL_bufptr;
6579 if (result == KEYWORD_PLUGIN_DECLINE) {
6580 /* not a plugged-in keyword */
6581 PL_bufptr = saved_bufptr;
6582 } else if (result == KEYWORD_PLUGIN_STMT) {
6583 pl_yylval.opval = o;
6584 CLINE;
6585 PL_expect = XSTATE;
6586 return REPORT(PLUGSTMT);
6587 } else if (result == KEYWORD_PLUGIN_EXPR) {
6588 pl_yylval.opval = o;
6589 CLINE;
6590 PL_expect = XOPERATOR;
6591 return REPORT(PLUGEXPR);
6592 } else {
6593 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6594 PL_tokenbuf);
6595 }
6596 }
6597
6598 /* Check for built-in keyword */
6599 tmp = keyword(PL_tokenbuf, len, 0);
6600
6601 /* Is this a label? */
6602 if (!anydelim && PL_expect == XSTATE
6603 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2 6604 s = d + 1;
5db1eb8d
BF
6605 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
6606 newSVpvn_flags(PL_tokenbuf,
6607 len, UTF ? SVf_UTF8 : 0));
88e1f1a2
JV
6608 CLINE;
6609 TOKEN(LABEL);
6610 }
6611
a0d0e21e 6612 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6613 GV *ogv = NULL; /* override (winner) */
6614 GV *hgv = NULL; /* hidden (loser) */
3280af22 6615 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6616 CV *cv;
af9f5953
BF
6617 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6618 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6619 (cv = GvCVu(gv)))
6620 {
6621 if (GvIMPORTED_CV(gv))
6622 ogv = gv;
6623 else if (! CvMETHOD(cv))
6624 hgv = gv;
6625 }
6626 if (!ogv &&
af9f5953 6627 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6628 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6629 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6630 GvCVu(gv) && GvIMPORTED_CV(gv))
6631 {
6632 ogv = gv;
6633 }
6634 }
6635 if (ogv) {
30fe34ed 6636 orig_keyword = tmp;
56f7f34b 6637 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6638 }
6639 else if (gv && !gvp
6640 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6641 && GvCVu(gv))
6e7b2336
GS
6642 {
6643 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6644 }
56f7f34b
CS
6645 else { /* no override */
6646 tmp = -tmp;
a2a5de95
NC
6647 if (tmp == KEY_dump) {
6648 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6649 "dump() better written as CORE::dump()");
ac206dc8 6650 }
a0714e2c 6651 gv = NULL;
56f7f34b 6652 gvp = 0;
a2a5de95
NC
6653 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6654 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6655 "Ambiguous call resolved as CORE::%s(), "
6656 "qualify as such or use &",
6657 GvENAME(hgv));
49dc05e3 6658 }
a0d0e21e
LW
6659 }
6660
6661 reserved_word:
6662 switch (tmp) {
79072805
LW
6663
6664 default: /* not a keyword */
0bfa2a8a
NC
6665 /* Trade off - by using this evil construction we can pull the
6666 variable gv into the block labelled keylookup. If not, then
6667 we have to give it function scope so that the goto from the
6668 earlier ':' case doesn't bypass the initialisation. */
6669 if (0) {
6670 just_a_word_zero_gv:
6671 gv = NULL;
6672 gvp = NULL;
8bee0991 6673 orig_keyword = 0;
0bfa2a8a 6674 }
93a17b20 6675 just_a_word: {
96e4d5b1 6676 SV *sv;
ce29ac45 6677 int pkgname = 0;
f54cb97a 6678 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6679 OP *rv2cv_op;
5069cc75 6680 CV *cv;
5db06880 6681#ifdef PERL_MAD
cd81e915 6682 SV *nextPL_nextwhite = 0;
5db06880
NC
6683#endif
6684
8990e307
LW
6685
6686 /* Get the rest if it looks like a package qualifier */
6687
155aba94 6688 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6689 STRLEN morelen;
3280af22 6690 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6691 TRUE, &morelen);
6692 if (!morelen)
86fe3f36
BF
6693 Perl_croak(aTHX_ "Bad name after %"SVf"%s",
6694 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6695 (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
ec2ab091 6696 *s == '\'' ? "'" : "::");
c3e0f903 6697 len += morelen;
ce29ac45 6698 pkgname = 1;
a0d0e21e 6699 }
8990e307 6700
3280af22
NIS
6701 if (PL_expect == XOPERATOR) {
6702 if (PL_bufptr == PL_linestart) {
57843af0 6703 CopLINE_dec(PL_curcop);
f1f66076 6704 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6705 CopLINE_inc(PL_curcop);
463ee0b2
LW
6706 }
6707 else
54310121 6708 no_op("Bareword",s);
463ee0b2 6709 }
8990e307 6710
c3e0f903 6711 /* Look for a subroutine with this name in current package,
486ec47a 6712 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6713 (and a package name). */
6714
5db06880 6715 if (len > 2 && !PL_madskills &&
3280af22 6716 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6717 {
f776e3cd 6718 if (ckWARN(WARN_BAREWORD)
af9f5953 6719 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 6720 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
979a1401
BF
6721 "Bareword \"%"SVf"\" refers to nonexistent package",
6722 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
6723 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
c3e0f903 6724 len -= 2;
3280af22 6725 PL_tokenbuf[len] = '\0';
a0714e2c 6726 gv = NULL;
c3e0f903
GS
6727 gvp = 0;
6728 }
6729 else {
62d55b22
NC
6730 if (!gv) {
6731 /* Mustn't actually add anything to a symbol table.
6732 But also don't want to "initialise" any placeholder
6733 constants that might already be there into full
6734 blown PVGVs with attached PVCV. */
90e5519e 6735 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
6736 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6737 SVt_PVCV);
62d55b22 6738 }
b3d904f3 6739 len = 0;
c3e0f903
GS
6740 }
6741
6742 /* if we saw a global override before, get the right name */
8990e307 6743
37bb7629
EB
6744 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6745 len ? len : strlen(PL_tokenbuf));
49dc05e3 6746 if (gvp) {
37bb7629 6747 SV * const tmp_sv = sv;
396482e1 6748 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6749 sv_catsv(sv, tmp_sv);
6750 SvREFCNT_dec(tmp_sv);
8a7a129d 6751 }
37bb7629 6752
5db06880 6753#ifdef PERL_MAD
cd81e915
NC
6754 if (PL_madskills && !PL_thistoken) {
6755 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6756 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6757 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6758 }
6759#endif
8990e307 6760
a0d0e21e 6761 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6762 CLINE;
6154021b
RGS
6763 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6764 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6765
c3e0f903 6766 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6767 if (len)
6768 goto safe_bareword;
6769
f7461760 6770 {
d8ebba9f 6771 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6772 const_op->op_private = OPpCONST_BARE;
6773 rv2cv_op = newCVREF(0, const_op);
6774 }
d9088386 6775 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6776
8990e307
LW
6777 /* See if it's the indirect object for a list operator. */
6778
3280af22
NIS
6779 if (PL_oldoldbufptr &&
6780 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6781 (PL_oldoldbufptr == PL_last_lop
6782 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6783 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6784 (PL_expect == XREF ||
6785 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6786 {
748a9306
LW
6787 bool immediate_paren = *s == '(';
6788
a0d0e21e 6789 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6790 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6791#ifdef PERL_MAD
cd81e915 6792 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6793#endif
a0d0e21e
LW
6794
6795 /* Two barewords in a row may indicate method call. */
6796
62d55b22 6797 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6798 (tmp = intuit_method(s, gv, cv))) {
6799 op_free(rv2cv_op);
78cdf107
Z
6800 if (tmp == METHOD && !PL_lex_allbrackets &&
6801 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6802 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6803 return REPORT(tmp);
f7461760 6804 }
a0d0e21e
LW
6805
6806 /* If not a declared subroutine, it's an indirect object. */
6807 /* (But it's an indir obj regardless for sort.) */
7294df96 6808 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6809
7294df96
RGS
6810 if (
6811 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6812 (!cv &&
a9ef352a 6813 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6814 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6815 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6816 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6817 )
a9ef352a 6818 {
3280af22 6819 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6820 goto bareword;
93a17b20
LW
6821 }
6822 }
8990e307 6823
3280af22 6824 PL_expect = XOPERATOR;
5db06880
NC
6825#ifdef PERL_MAD
6826 if (isSPACE(*s))
cd81e915
NC
6827 s = SKIPSPACE2(s,nextPL_nextwhite);
6828 PL_nextwhite = nextPL_nextwhite;
5db06880 6829#else
8990e307 6830 s = skipspace(s);
5db06880 6831#endif
1c3923b3
GS
6832
6833 /* Is this a word before a => operator? */
ce29ac45 6834 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6835 op_free(rv2cv_op);
1c3923b3 6836 CLINE;
6154021b 6837 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6838 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6839 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6840 TERM(WORD);
6841 }
6842
6843 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6844 if (*s == '(') {
79072805 6845 CLINE;
5069cc75 6846 if (cv) {
c35e046a
AL
6847 d = s + 1;
6848 while (SPACE_OR_TAB(*d))
6849 d++;
f7461760 6850 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6851 s = d + 1;
c631f32b 6852 goto its_constant;
96e4d5b1 6853 }
6854 }
5db06880
NC
6855#ifdef PERL_MAD
6856 if (PL_madskills) {
cd81e915
NC
6857 PL_nextwhite = PL_thiswhite;
6858 PL_thiswhite = 0;
5db06880 6859 }
cd81e915 6860 start_force(PL_curforce);
5db06880 6861#endif
6154021b 6862 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6863 PL_expect = XOPERATOR;
5db06880
NC
6864#ifdef PERL_MAD
6865 if (PL_madskills) {
cd81e915
NC
6866 PL_nextwhite = nextPL_nextwhite;
6867 curmad('X', PL_thistoken);
6b29d1f5 6868 PL_thistoken = newSVpvs("");
5db06880
NC
6869 }
6870#endif
f7461760 6871 op_free(rv2cv_op);
93a17b20 6872 force_next(WORD);
6154021b 6873 pl_yylval.ival = 0;
463ee0b2 6874 TOKEN('&');
79072805 6875 }
93a17b20 6876
a0d0e21e 6877 /* If followed by var or block, call it a method (unless sub) */
8990e307 6878
f7461760
Z
6879 if ((*s == '$' || *s == '{') && !cv) {
6880 op_free(rv2cv_op);
3280af22
NIS
6881 PL_last_lop = PL_oldbufptr;
6882 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6883 if (!PL_lex_allbrackets &&
6884 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6885 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6886 PREBLOCK(METHOD);
463ee0b2
LW
6887 }
6888
8990e307
LW
6889 /* If followed by a bareword, see if it looks like indir obj. */
6890
30fe34ed
RGS
6891 if (!orig_keyword
6892 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6893 && (tmp = intuit_method(s, gv, cv))) {
6894 op_free(rv2cv_op);
78cdf107
Z
6895 if (tmp == METHOD && !PL_lex_allbrackets &&
6896 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6897 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6898 return REPORT(tmp);
f7461760 6899 }
93a17b20 6900
8990e307
LW
6901 /* Not a method, so call it a subroutine (if defined) */
6902
5069cc75 6903 if (cv) {
43b5ab4c
BF
6904 if (lastchar == '-') {
6905 const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
6906 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6907 "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
6908 SVfARG(tmpsv), SVfARG(tmpsv));
6909 }
89bfa8cd 6910 /* Check for a constant sub */
f7461760 6911 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6912 its_constant:
f7461760 6913 op_free(rv2cv_op);
6154021b
RGS
6914 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6915 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
cc2ebcd7 6916 pl_yylval.opval->op_private = OPpCONST_FOLDED;
6b7c6d95 6917 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6918 TOKEN(WORD);
89bfa8cd 6919 }
6920
6154021b 6921 op_free(pl_yylval.opval);
f7461760 6922 pl_yylval.opval = rv2cv_op;
6154021b 6923 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6924 PL_last_lop = PL_oldbufptr;
bf848113 6925 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6926 /* Is there a prototype? */
5db06880
NC
6927 if (
6928#ifdef PERL_MAD
6929 cv &&
6930#endif
d9f2850e
RGS
6931 SvPOK(cv))
6932 {
8fa6a409
FC
6933 STRLEN protolen = CvPROTOLEN(cv);
6934 const char *proto = CvPROTO(cv);
b5fb7ce3 6935 bool optional;
5f66b61c 6936 if (!protolen)
4633a7c4 6937 TERM(FUNC0SUB);
b5fb7ce3
FC
6938 if ((optional = *proto == ';'))
6939 do
0f5d0394 6940 proto++;
b5fb7ce3 6941 while (*proto == ';');
649d02de
FC
6942 if (
6943 (
6944 (
6945 *proto == '$' || *proto == '_'
c035a075 6946 || *proto == '*' || *proto == '+'
649d02de
FC
6947 )
6948 && proto[1] == '\0'
6949 )
6950 || (
6951 *proto == '\\' && proto[1] && proto[2] == '\0'
6952 )
6953 )
b5fb7ce3 6954 UNIPROTO(UNIOPSUB,optional);
649d02de
FC
6955 if (*proto == '\\' && proto[1] == '[') {
6956 const char *p = proto + 2;
6957 while(*p && *p != ']')
6958 ++p;
b5fb7ce3
FC
6959 if(*p == ']' && !p[1])
6960 UNIPROTO(UNIOPSUB,optional);
649d02de 6961 }
7a52d87a 6962 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6963 if (PL_curstash)
6964 sv_setpvs(PL_subname, "__ANON__");
6965 else
6966 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6967 if (!PL_lex_allbrackets &&
6968 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6969 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6970 PREBLOCK(LSTOPSUB);
6971 }
a9ef352a 6972 }
5db06880
NC
6973#ifdef PERL_MAD
6974 {
6975 if (PL_madskills) {
cd81e915
NC
6976 PL_nextwhite = PL_thiswhite;
6977 PL_thiswhite = 0;
5db06880 6978 }
cd81e915 6979 start_force(PL_curforce);
6154021b 6980 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6981 PL_expect = XTERM;
6982 if (PL_madskills) {
cd81e915
NC
6983 PL_nextwhite = nextPL_nextwhite;
6984 curmad('X', PL_thistoken);
6b29d1f5 6985 PL_thistoken = newSVpvs("");
5db06880
NC
6986 }
6987 force_next(WORD);
78cdf107
Z
6988 if (!PL_lex_allbrackets &&
6989 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6990 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6991 TOKEN(NOAMP);
6992 }
6993 }
6994
6995 /* Guess harder when madskills require "best effort". */
6996 if (PL_madskills && (!gv || !GvCVu(gv))) {
6997 int probable_sub = 0;
6998 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6999 probable_sub = 1;
7000 else if (isALPHA(*s)) {
7001 char tmpbuf[1024];
7002 STRLEN tmplen;
7003 d = s;
7004 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 7005 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
7006 probable_sub = 1;
7007 else {
7008 while (d < PL_bufend && isSPACE(*d))
7009 d++;
7010 if (*d == '=' && d[1] == '>')
7011 probable_sub = 1;
7012 }
7013 }
7014 if (probable_sub) {
af9f5953
BF
7015 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
7016 SVt_PVCV);
6154021b 7017 op_free(pl_yylval.opval);
f7461760 7018 pl_yylval.opval = rv2cv_op;
6154021b 7019 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
7020 PL_last_lop = PL_oldbufptr;
7021 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
7022 PL_nextwhite = PL_thiswhite;
7023 PL_thiswhite = 0;
7024 start_force(PL_curforce);
6154021b 7025 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 7026 PL_expect = XTERM;
cd81e915
NC
7027 PL_nextwhite = nextPL_nextwhite;
7028 curmad('X', PL_thistoken);
6b29d1f5 7029 PL_thistoken = newSVpvs("");
5db06880 7030 force_next(WORD);
78cdf107
Z
7031 if (!PL_lex_allbrackets &&
7032 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7033 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
7034 TOKEN(NOAMP);
7035 }
7036#else
6154021b 7037 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 7038 PL_expect = XTERM;
8990e307 7039 force_next(WORD);
78cdf107
Z
7040 if (!PL_lex_allbrackets &&
7041 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7042 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 7043 TOKEN(NOAMP);
5db06880 7044#endif
8990e307 7045 }
748a9306 7046
8990e307
LW
7047 /* Call it a bare word */
7048
5603f27d 7049 if (PL_hints & HINT_STRICT_SUBS)
6154021b 7050 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 7051 else {
9a073a1d
RGS
7052 bareword:
7053 /* after "print" and similar functions (corresponding to
7054 * "F? L" in opcode.pl), whatever wasn't already parsed as
7055 * a filehandle should be subject to "strict subs".
7056 * Likewise for the optional indirect-object argument to system
7057 * or exec, which can't be a bareword */
7058 if ((PL_last_lop_op == OP_PRINT
7059 || PL_last_lop_op == OP_PRTF
7060 || PL_last_lop_op == OP_SAY
7061 || PL_last_lop_op == OP_SYSTEM
7062 || PL_last_lop_op == OP_EXEC)
7063 && (PL_hints & HINT_STRICT_SUBS))
7064 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
7065 if (lastchar != '-') {
7066 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
7067 d = PL_tokenbuf;
7068 while (isLOWER(*d))
7069 d++;
af9f5953 7070 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 7071 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
7072 PL_tokenbuf);
7073 }
748a9306
LW
7074 }
7075 }
f7461760 7076 op_free(rv2cv_op);
c3e0f903
GS
7077
7078 safe_bareword:
9b387841
NC
7079 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
7080 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
02571fe8
BF
7081 "Operator or semicolon missing before %c%"SVf,
7082 lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
7083 strlen(PL_tokenbuf),
7084 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
9b387841
NC
7085 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
7086 "Ambiguous use of %c resolved as operator %c",
7087 lastchar, lastchar);
748a9306 7088 }
93a17b20 7089 TOKEN(WORD);
79072805 7090 }
79072805 7091
68dc0745 7092 case KEY___FILE__:
7eb971ee 7093 FUN0OP(
14f0f125 7094 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7095 );
46fc3d4c 7096
79072805 7097 case KEY___LINE__:
7eb971ee
FC
7098 FUN0OP(
7099 (OP*)newSVOP(OP_CONST, 0,
7100 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7101 );
68dc0745 7102
7103 case KEY___PACKAGE__:
7eb971ee
FC
7104 FUN0OP(
7105 (OP*)newSVOP(OP_CONST, 0,
3280af22 7106 (PL_curstash
5aaec2b4 7107 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7108 : &PL_sv_undef))
7109 );
79072805 7110
e50aee73 7111 case KEY___DATA__:
79072805
LW
7112 case KEY___END__: {
7113 GV *gv;
3280af22 7114 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7115 const char *pname = "main";
affc13fc
FC
7116 STRLEN plen = 4;
7117 U32 putf8 = 0;
3280af22 7118 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7119 {
7120 HV * const stash =
7121 PL_curstash ? PL_curstash : PL_defstash;
7122 pname = HvNAME_get(stash);
7123 plen = HvNAMELEN (stash);
7124 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7125 }
7126 gv = gv_fetchpvn_flags(
7127 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7128 plen+6, GV_ADD|putf8, SVt_PVIO
7129 );
a5f75d66 7130 GvMULTI_on(gv);
79072805 7131 if (!GvIO(gv))
a0d0e21e 7132 GvIOp(gv) = newIO();
3280af22 7133 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7134#if defined(HAS_FCNTL) && defined(F_SETFD)
7135 {
f54cb97a 7136 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7137 fcntl(fd,F_SETFD,fd >= 3);
7138 }
79072805 7139#endif
fd049845 7140 /* Mark this internal pseudo-handle as clean */
7141 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7142 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7143 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7144 else
50952442 7145 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7146#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7147 /* if the script was opened in binmode, we need to revert
53129d29 7148 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7149 * XXX this is a questionable hack at best. */
53129d29
GS
7150 if (PL_bufend-PL_bufptr > 2
7151 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7152 {
7153 Off_t loc = 0;
50952442 7154 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7155 loc = PerlIO_tell(PL_rsfp);
7156 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7157 }
2986a63f
JH
7158#ifdef NETWARE
7159 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7160#else
c39cd008 7161 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7162#endif /* NETWARE */
c39cd008
GS
7163 if (loc > 0)
7164 PerlIO_seek(PL_rsfp, loc, 0);
7165 }
7166 }
7167#endif
7948272d 7168#ifdef PERLIO_LAYERS
52d2e0f4
JH
7169 if (!IN_BYTES) {
7170 if (UTF)
7171 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7172 else if (PL_encoding) {
7173 SV *name;
7174 dSP;
7175 ENTER;
7176 SAVETMPS;
7177 PUSHMARK(sp);
7178 EXTEND(SP, 1);
7179 XPUSHs(PL_encoding);
7180 PUTBACK;
7181 call_method("name", G_SCALAR);
7182 SPAGAIN;
7183 name = POPs;
7184 PUTBACK;
bfed75c6 7185 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7186 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7187 SVfARG(name)));
52d2e0f4
JH
7188 FREETMPS;
7189 LEAVE;
7190 }
7191 }
7948272d 7192#endif
5db06880
NC
7193#ifdef PERL_MAD
7194 if (PL_madskills) {
cd81e915
NC
7195 if (PL_realtokenstart >= 0) {
7196 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7197 if (!PL_endwhite)
6b29d1f5 7198 PL_endwhite = newSVpvs("");
cd81e915
NC
7199 sv_catsv(PL_endwhite, PL_thiswhite);
7200 PL_thiswhite = 0;
7201 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7202 PL_realtokenstart = -1;
5db06880 7203 }
5cc814fd
NC
7204 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7205 != NULL) ;
5db06880
NC
7206 }
7207#endif
4608196e 7208 PL_rsfp = NULL;
79072805
LW
7209 }
7210 goto fake_eof;
e929a76b 7211 }
de3bb511 7212
84ed0108 7213 case KEY___SUB__:
1a35f9ff 7214 FUN0OP(newPVOP(OP_RUNCV,0,NULL));
84ed0108 7215
8990e307 7216 case KEY_AUTOLOAD:
ed6116ce 7217 case KEY_DESTROY:
79072805 7218 case KEY_BEGIN:
3c10abe3 7219 case KEY_UNITCHECK:
7d30b5c4 7220 case KEY_CHECK:
7d07dbc2 7221 case KEY_INIT:
7d30b5c4 7222 case KEY_END:
3280af22
NIS
7223 if (PL_expect == XSTATE) {
7224 s = PL_bufptr;
93a17b20 7225 goto really_sub;
79072805
LW
7226 }
7227 goto just_a_word;
7228
a0d0e21e
LW
7229 case KEY_CORE:
7230 if (*s == ':' && s[1] == ':') {
ee36fb64 7231 STRLEN olen = len;
748a9306 7232 d = s;
ee36fb64 7233 s += 2;
3280af22 7234 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
ee36fb64
FC
7235 if ((*s == ':' && s[1] == ':')
7236 || (!(tmp = keyword(PL_tokenbuf, len, 1)) && *s == '\''))
7237 {
7238 s = d;
7239 len = olen;
7240 Copy(PL_bufptr, PL_tokenbuf, olen, char);
7241 goto just_a_word;
7242 }
7243 if (!tmp)
3773592b
BF
7244 Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
7245 SVfARG(newSVpvn_flags(PL_tokenbuf, len,
7246 (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
a0d0e21e
LW
7247 if (tmp < 0)
7248 tmp = -tmp;
d67594ff
FC
7249 else if (tmp == KEY_require || tmp == KEY_do
7250 || tmp == KEY_glob)
a72a1c8b 7251 /* that's a way to remember we saw "CORE::" */
850e8516 7252 orig_keyword = tmp;
a0d0e21e
LW
7253 goto reserved_word;
7254 }
7255 goto just_a_word;
7256
463ee0b2
LW
7257 case KEY_abs:
7258 UNI(OP_ABS);
7259
79072805
LW
7260 case KEY_alarm:
7261 UNI(OP_ALARM);
7262
7263 case KEY_accept:
a0d0e21e 7264 LOP(OP_ACCEPT,XTERM);
79072805 7265
463ee0b2 7266 case KEY_and:
78cdf107
Z
7267 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7268 return REPORT(0);
463ee0b2
LW
7269 OPERATOR(ANDOP);
7270
79072805 7271 case KEY_atan2:
a0d0e21e 7272 LOP(OP_ATAN2,XTERM);
85e6fe83 7273
79072805 7274 case KEY_bind:
a0d0e21e 7275 LOP(OP_BIND,XTERM);
79072805
LW
7276
7277 case KEY_binmode:
1c1fc3ea 7278 LOP(OP_BINMODE,XTERM);
79072805
LW
7279
7280 case KEY_bless:
a0d0e21e 7281 LOP(OP_BLESS,XTERM);
79072805 7282
0d863452
RH
7283 case KEY_break:
7284 FUN0(OP_BREAK);
7285
79072805
LW
7286 case KEY_chop:
7287 UNI(OP_CHOP);
7288
7289 case KEY_continue:
0d863452
RH
7290 /* We have to disambiguate the two senses of
7291 "continue". If the next token is a '{' then
7292 treat it as the start of a continue block;
7293 otherwise treat it as a control operator.
7294 */
7295 s = skipspace(s);
7296 if (*s == '{')
79072805 7297 PREBLOCK(CONTINUE);
0d863452
RH
7298 else
7299 FUN0(OP_CONTINUE);
79072805
LW
7300
7301 case KEY_chdir:
fafc274c
NC
7302 /* may use HOME */
7303 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7304 UNI(OP_CHDIR);
7305
7306 case KEY_close:
7307 UNI(OP_CLOSE);
7308
7309 case KEY_closedir:
7310 UNI(OP_CLOSEDIR);
7311
7312 case KEY_cmp:
78cdf107
Z
7313 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7314 return REPORT(0);
79072805
LW
7315 Eop(OP_SCMP);
7316
7317 case KEY_caller:
7318 UNI(OP_CALLER);
7319
7320 case KEY_crypt:
7321#ifdef FCRYPT
f4c556ac
GS
7322 if (!PL_cryptseen) {
7323 PL_cryptseen = TRUE;
de3bb511 7324 init_des();
f4c556ac 7325 }
a687059c 7326#endif
a0d0e21e 7327 LOP(OP_CRYPT,XTERM);
79072805
LW
7328
7329 case KEY_chmod:
a0d0e21e 7330 LOP(OP_CHMOD,XTERM);
79072805
LW
7331
7332 case KEY_chown:
a0d0e21e 7333 LOP(OP_CHOWN,XTERM);
79072805
LW
7334
7335 case KEY_connect:
a0d0e21e 7336 LOP(OP_CONNECT,XTERM);
79072805 7337
463ee0b2
LW
7338 case KEY_chr:
7339 UNI(OP_CHR);
7340
79072805
LW
7341 case KEY_cos:
7342 UNI(OP_COS);
7343
7344 case KEY_chroot:
7345 UNI(OP_CHROOT);
7346
0d863452
RH
7347 case KEY_default:
7348 PREBLOCK(DEFAULT);
7349
79072805 7350 case KEY_do:
29595ff2 7351 s = SKIPSPACE1(s);
79072805 7352 if (*s == '{')
a0d0e21e 7353 PRETERMBLOCK(DO);
c2900bb8
FC
7354 if (*s != '\'') {
7355 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, 1, &len);
7356 if (len) {
7357 d = SKIPSPACE1(d);
7358 if (*d == '(') s = force_word(s,WORD,TRUE,TRUE,FALSE);
7359 }
7360 }
850e8516
RGS
7361 if (orig_keyword == KEY_do) {
7362 orig_keyword = 0;
6154021b 7363 pl_yylval.ival = 1;
850e8516
RGS
7364 }
7365 else
6154021b 7366 pl_yylval.ival = 0;
378cc40b 7367 OPERATOR(DO);
79072805
LW
7368
7369 case KEY_die:
3280af22 7370 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7371 LOP(OP_DIE,XTERM);
79072805
LW
7372
7373 case KEY_defined:
7374 UNI(OP_DEFINED);
7375
7376 case KEY_delete:
a0d0e21e 7377 UNI(OP_DELETE);
79072805
LW
7378
7379 case KEY_dbmopen:
74e8ce34
NC
7380 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7381 STR_WITH_LEN("NDBM_File::"),
7382 STR_WITH_LEN("DB_File::"),
7383 STR_WITH_LEN("GDBM_File::"),
7384 STR_WITH_LEN("SDBM_File::"),
7385 STR_WITH_LEN("ODBM_File::"),
7386 NULL);
a0d0e21e 7387 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7388
7389 case KEY_dbmclose:
7390 UNI(OP_DBMCLOSE);
7391
7392 case KEY_dump:
a0d0e21e 7393 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7394 LOOPX(OP_DUMP);
7395
7396 case KEY_else:
7397 PREBLOCK(ELSE);
7398
7399 case KEY_elsif:
6154021b 7400 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7401 OPERATOR(ELSIF);
7402
7403 case KEY_eq:
78cdf107
Z
7404 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7405 return REPORT(0);
79072805
LW
7406 Eop(OP_SEQ);
7407
a0d0e21e
LW
7408 case KEY_exists:
7409 UNI(OP_EXISTS);
4e553d73 7410
79072805 7411 case KEY_exit:
5db06880
NC
7412 if (PL_madskills)
7413 UNI(OP_INT);
79072805
LW
7414 UNI(OP_EXIT);
7415
7416 case KEY_eval:
29595ff2 7417 s = SKIPSPACE1(s);
32e2a35d
RGS
7418 if (*s == '{') { /* block eval */
7419 PL_expect = XTERMBLOCK;
7420 UNIBRACK(OP_ENTERTRY);
7421 }
7422 else { /* string eval */
7423 PL_expect = XTERM;
7424 UNIBRACK(OP_ENTEREVAL);
7425 }
79072805 7426
7d789282
FC
7427 case KEY_evalbytes:
7428 PL_expect = XTERM;
7429 UNIBRACK(-OP_ENTEREVAL);
7430
79072805
LW
7431 case KEY_eof:
7432 UNI(OP_EOF);
7433
7434 case KEY_exp:
7435 UNI(OP_EXP);
7436
7437 case KEY_each:
7438 UNI(OP_EACH);
7439
7440 case KEY_exec:
a0d0e21e 7441 LOP(OP_EXEC,XREF);
79072805
LW
7442
7443 case KEY_endhostent:
7444 FUN0(OP_EHOSTENT);
7445
7446 case KEY_endnetent:
7447 FUN0(OP_ENETENT);
7448
7449 case KEY_endservent:
7450 FUN0(OP_ESERVENT);
7451
7452 case KEY_endprotoent:
7453 FUN0(OP_EPROTOENT);
7454
7455 case KEY_endpwent:
7456 FUN0(OP_EPWENT);
7457
7458 case KEY_endgrent:
7459 FUN0(OP_EGRENT);
7460
7461 case KEY_for:
7462 case KEY_foreach:
78cdf107
Z
7463 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7464 return REPORT(0);
6154021b 7465 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7466 s = SKIPSPACE1(s);
7e2040f0 7467 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7468 char *p = s;
5db06880
NC
7469#ifdef PERL_MAD
7470 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7471#endif
7472
3280af22 7473 if ((PL_bufend - p) >= 3 &&
55497cff 7474 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7475 p += 2;
77ca0c92
LW
7476 else if ((PL_bufend - p) >= 4 &&
7477 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7478 p += 3;
29595ff2 7479 p = PEEKSPACE(p);
7e2040f0 7480 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7481 p = scan_ident(p, PL_bufend,
7482 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7483 p = PEEKSPACE(p);
77ca0c92
LW
7484 }
7485 if (*p != '$')
cea2e8a9 7486 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7487#ifdef PERL_MAD
7488 s = SvPVX(PL_linestr) + soff;
7489#endif
55497cff 7490 }
79072805
LW
7491 OPERATOR(FOR);
7492
7493 case KEY_formline:
a0d0e21e 7494 LOP(OP_FORMLINE,XTERM);
79072805
LW
7495
7496 case KEY_fork:
7497 FUN0(OP_FORK);
7498
838f2281
BF
7499 case KEY_fc:
7500 UNI(OP_FC);
7501
79072805 7502 case KEY_fcntl:
a0d0e21e 7503 LOP(OP_FCNTL,XTERM);
79072805
LW
7504
7505 case KEY_fileno:
7506 UNI(OP_FILENO);
7507
7508 case KEY_flock:
a0d0e21e 7509 LOP(OP_FLOCK,XTERM);
79072805
LW
7510
7511 case KEY_gt:
78cdf107
Z
7512 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7513 return REPORT(0);
79072805
LW
7514 Rop(OP_SGT);
7515
7516 case KEY_ge:
78cdf107
Z
7517 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7518 return REPORT(0);
79072805
LW
7519 Rop(OP_SGE);
7520
7521 case KEY_grep:
2c38e13d 7522 LOP(OP_GREPSTART, XREF);
79072805
LW
7523
7524 case KEY_goto:
a0d0e21e 7525 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7526 LOOPX(OP_GOTO);
7527
7528 case KEY_gmtime:
7529 UNI(OP_GMTIME);
7530
7531 case KEY_getc:
6f33ba73 7532 UNIDOR(OP_GETC);
79072805
LW
7533
7534 case KEY_getppid:
7535 FUN0(OP_GETPPID);
7536
7537 case KEY_getpgrp:
7538 UNI(OP_GETPGRP);
7539
7540 case KEY_getpriority:
a0d0e21e 7541 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7542
7543 case KEY_getprotobyname:
7544 UNI(OP_GPBYNAME);
7545
7546 case KEY_getprotobynumber:
a0d0e21e 7547 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7548
7549 case KEY_getprotoent:
7550 FUN0(OP_GPROTOENT);
7551
7552 case KEY_getpwent:
7553 FUN0(OP_GPWENT);
7554
7555 case KEY_getpwnam:
ff68c719 7556 UNI(OP_GPWNAM);
79072805
LW
7557
7558 case KEY_getpwuid:
ff68c719 7559 UNI(OP_GPWUID);
79072805
LW
7560
7561 case KEY_getpeername:
7562 UNI(OP_GETPEERNAME);
7563
7564 case KEY_gethostbyname:
7565 UNI(OP_GHBYNAME);
7566
7567 case KEY_gethostbyaddr:
a0d0e21e 7568 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7569
7570 case KEY_gethostent:
7571 FUN0(OP_GHOSTENT);
7572
7573 case KEY_getnetbyname:
7574 UNI(OP_GNBYNAME);
7575
7576 case KEY_getnetbyaddr:
a0d0e21e 7577 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7578
7579 case KEY_getnetent:
7580 FUN0(OP_GNETENT);
7581
7582 case KEY_getservbyname:
a0d0e21e 7583 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7584
7585 case KEY_getservbyport:
a0d0e21e 7586 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7587
7588 case KEY_getservent:
7589 FUN0(OP_GSERVENT);
7590
7591 case KEY_getsockname:
7592 UNI(OP_GETSOCKNAME);
7593
7594 case KEY_getsockopt:
a0d0e21e 7595 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7596
7597 case KEY_getgrent:
7598 FUN0(OP_GGRENT);
7599
7600 case KEY_getgrnam:
ff68c719 7601 UNI(OP_GGRNAM);
79072805
LW
7602
7603 case KEY_getgrgid:
ff68c719 7604 UNI(OP_GGRGID);
79072805
LW
7605
7606 case KEY_getlogin:
7607 FUN0(OP_GETLOGIN);
7608
0d863452 7609 case KEY_given:
6154021b 7610 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7611 OPERATOR(GIVEN);
7612
93a17b20 7613 case KEY_glob:
d67594ff
FC
7614 LOP(
7615 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7616 XTERM
7617 );
93a17b20 7618
79072805
LW
7619 case KEY_hex:
7620 UNI(OP_HEX);
7621
7622 case KEY_if:
78cdf107
Z
7623 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7624 return REPORT(0);
6154021b 7625 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7626 OPERATOR(IF);
7627
7628 case KEY_index:
a0d0e21e 7629 LOP(OP_INDEX,XTERM);
79072805
LW
7630
7631 case KEY_int:
7632 UNI(OP_INT);
7633
7634 case KEY_ioctl:
a0d0e21e 7635 LOP(OP_IOCTL,XTERM);
79072805
LW
7636
7637 case KEY_join:
a0d0e21e 7638 LOP(OP_JOIN,XTERM);
79072805
LW
7639
7640 case KEY_keys:
7641 UNI(OP_KEYS);
7642
7643 case KEY_kill:
a0d0e21e 7644 LOP(OP_KILL,XTERM);
79072805
LW
7645
7646 case KEY_last:
a0d0e21e 7647 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7648 LOOPX(OP_LAST);
4e553d73 7649
79072805
LW
7650 case KEY_lc:
7651 UNI(OP_LC);
7652
7653 case KEY_lcfirst:
7654 UNI(OP_LCFIRST);
7655
7656 case KEY_local:
6154021b 7657 pl_yylval.ival = 0;
79072805
LW
7658 OPERATOR(LOCAL);
7659
7660 case KEY_length:
7661 UNI(OP_LENGTH);
7662
7663 case KEY_lt:
78cdf107
Z
7664 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7665 return REPORT(0);
79072805
LW
7666 Rop(OP_SLT);
7667
7668 case KEY_le:
78cdf107
Z
7669 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7670 return REPORT(0);
79072805
LW
7671 Rop(OP_SLE);
7672
7673 case KEY_localtime:
7674 UNI(OP_LOCALTIME);
7675
7676 case KEY_log:
7677 UNI(OP_LOG);
7678
7679 case KEY_link:
a0d0e21e 7680 LOP(OP_LINK,XTERM);
79072805
LW
7681
7682 case KEY_listen:
a0d0e21e 7683 LOP(OP_LISTEN,XTERM);
79072805 7684
c0329465
MB
7685 case KEY_lock:
7686 UNI(OP_LOCK);
7687
79072805
LW
7688 case KEY_lstat:
7689 UNI(OP_LSTAT);
7690
7691 case KEY_m:
8782bef2 7692 s = scan_pat(s,OP_MATCH);
79072805
LW
7693 TERM(sublex_start());
7694
a0d0e21e 7695 case KEY_map:
2c38e13d 7696 LOP(OP_MAPSTART, XREF);
4e4e412b 7697
79072805 7698 case KEY_mkdir:
a0d0e21e 7699 LOP(OP_MKDIR,XTERM);
79072805
LW
7700
7701 case KEY_msgctl:
a0d0e21e 7702 LOP(OP_MSGCTL,XTERM);
79072805
LW
7703
7704 case KEY_msgget:
a0d0e21e 7705 LOP(OP_MSGGET,XTERM);
79072805
LW
7706
7707 case KEY_msgrcv:
a0d0e21e 7708 LOP(OP_MSGRCV,XTERM);
79072805
LW
7709
7710 case KEY_msgsnd:
a0d0e21e 7711 LOP(OP_MSGSND,XTERM);
79072805 7712
77ca0c92 7713 case KEY_our:
93a17b20 7714 case KEY_my:
952306ac 7715 case KEY_state:
eac04b2e 7716 PL_in_my = (U16)tmp;
29595ff2 7717 s = SKIPSPACE1(s);
7e2040f0 7718 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7719#ifdef PERL_MAD
7720 char* start = s;
7721#endif
3280af22 7722 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7723 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7724 goto really_sub;
def3634b 7725 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7726 if (!PL_in_my_stash) {
c750a3ec 7727 char tmpbuf[1024];
3280af22 7728 PL_bufptr = s;
d9fad198 7729 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
3c54b17a 7730 yyerror_pv(tmpbuf, UTF ? SVf_UTF8 : 0);
c750a3ec 7731 }
5db06880
NC
7732#ifdef PERL_MAD
7733 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7734 sv_catsv(PL_thistoken, PL_nextwhite);
7735 PL_nextwhite = 0;
7736 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7737 }
7738#endif
c750a3ec 7739 }
6154021b 7740 pl_yylval.ival = 1;
55497cff 7741 OPERATOR(MY);
93a17b20 7742
79072805 7743 case KEY_next:
a0d0e21e 7744 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7745 LOOPX(OP_NEXT);
7746
7747 case KEY_ne:
78cdf107
Z
7748 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7749 return REPORT(0);
79072805
LW
7750 Eop(OP_SNE);
7751
a0d0e21e 7752 case KEY_no:
468aa647 7753 s = tokenize_use(0, s);
a0d0e21e
LW
7754 OPERATOR(USE);
7755
7756 case KEY_not:
29595ff2 7757 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7758 FUN1(OP_NOT);
78cdf107
Z
7759 else {
7760 if (!PL_lex_allbrackets &&
7761 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7762 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7763 OPERATOR(NOTOP);
78cdf107 7764 }
a0d0e21e 7765
79072805 7766 case KEY_open:
29595ff2 7767 s = SKIPSPACE1(s);
7e2040f0 7768 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7769 const char *t;
71aa9713
BF
7770 for (d = s; isALNUM_lazy_if(d,UTF);) {
7771 d += UTF ? UTF8SKIP(d) : 1;
7772 if (UTF) {
7773 while (UTF8_IS_CONTINUED(*d) && is_utf8_mark((U8*)d)) {
7774 d += UTF ? UTF8SKIP(d) : 1;
7775 }
7776 }
7777 }
c35e046a
AL
7778 for (t=d; isSPACE(*t);)
7779 t++;
e2ab214b 7780 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7781 /* [perl #16184] */
7782 && !(t[0] == '=' && t[1] == '>')
db3abe52 7783 && !(t[0] == ':' && t[1] == ':')
240d1b6f 7784 && !keyword(s, d-s, 0)
66fbe8fb 7785 ) {
71aa9713
BF
7786 SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
7787 SVs_TEMP | (UTF ? SVf_UTF8 : 0));
9014280d 7788 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
71aa9713
BF
7789 "Precedence problem: open %"SVf" should be open(%"SVf")",
7790 SVfARG(tmpsv), SVfARG(tmpsv));
66fbe8fb 7791 }
93a17b20 7792 }
a0d0e21e 7793 LOP(OP_OPEN,XTERM);
79072805 7794
463ee0b2 7795 case KEY_or:
78cdf107
Z
7796 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7797 return REPORT(0);
6154021b 7798 pl_yylval.ival = OP_OR;
463ee0b2
LW
7799 OPERATOR(OROP);
7800
79072805
LW
7801 case KEY_ord:
7802 UNI(OP_ORD);
7803
7804 case KEY_oct:
7805 UNI(OP_OCT);
7806
7807 case KEY_opendir:
a0d0e21e 7808 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7809
7810 case KEY_print:
3280af22 7811 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7812 LOP(OP_PRINT,XREF);
79072805
LW
7813
7814 case KEY_printf:
3280af22 7815 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7816 LOP(OP_PRTF,XREF);
79072805 7817
c07a80fd 7818 case KEY_prototype:
7819 UNI(OP_PROTOTYPE);
7820
79072805 7821 case KEY_push:
a0d0e21e 7822 LOP(OP_PUSH,XTERM);
79072805
LW
7823
7824 case KEY_pop:
6f33ba73 7825 UNIDOR(OP_POP);
79072805 7826
a0d0e21e 7827 case KEY_pos:
6f33ba73 7828 UNIDOR(OP_POS);
4e553d73 7829
79072805 7830 case KEY_pack:
a0d0e21e 7831 LOP(OP_PACK,XTERM);
79072805
LW
7832
7833 case KEY_package:
a0d0e21e 7834 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7835 s = SKIPSPACE1(s);
91152fc1 7836 s = force_strict_version(s);
4e4da3ac 7837 PL_lex_expect = XBLOCK;
79072805
LW
7838 OPERATOR(PACKAGE);
7839
7840 case KEY_pipe:
a0d0e21e 7841 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7842
7843 case KEY_q:
d24ca0c5 7844 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 7845 if (!s)
d4c19fe8 7846 missingterm(NULL);
6154021b 7847 pl_yylval.ival = OP_CONST;
79072805
LW
7848 TERM(sublex_start());
7849
a0d0e21e
LW
7850 case KEY_quotemeta:
7851 UNI(OP_QUOTEMETA);
7852
ea25a9b2
Z
7853 case KEY_qw: {
7854 OP *words = NULL;
d24ca0c5 7855 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
8990e307 7856 if (!s)
d4c19fe8 7857 missingterm(NULL);
3480a8d2 7858 PL_expect = XOPERATOR;
8127e0e3 7859 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
7860 int warned_comma = !ckWARN(WARN_QW);
7861 int warned_comment = warned_comma;
3280af22 7862 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7863 while (len) {
d4c19fe8
AL
7864 for (; isSPACE(*d) && len; --len, ++d)
7865 /**/;
8127e0e3 7866 if (len) {
d4c19fe8 7867 SV *sv;
f54cb97a 7868 const char *b = d;
7e03b518 7869 if (!warned_comma || !warned_comment) {
8127e0e3 7870 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 7871 if (!warned_comma && *d == ',') {
9014280d 7872 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7873 "Possible attempt to separate words with commas");
7e03b518 7874 ++warned_comma;
8127e0e3 7875 }
7e03b518 7876 else if (!warned_comment && *d == '#') {
9014280d 7877 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7878 "Possible attempt to put comments in qw() list");
7e03b518 7879 ++warned_comment;
8127e0e3
GS
7880 }
7881 }
7882 }
7883 else {
d4c19fe8
AL
7884 for (; !isSPACE(*d) && len; --len, ++d)
7885 /**/;
8127e0e3 7886 }
740cce10 7887 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7888 words = op_append_elem(OP_LIST, words,
7948272d 7889 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7890 }
7891 }
7892 }
ea25a9b2
Z
7893 if (!words)
7894 words = newNULLLIST();
37fd879b 7895 if (PL_lex_stuff) {
8127e0e3 7896 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7897 PL_lex_stuff = NULL;
37fd879b 7898 }
ea25a9b2
Z
7899 PL_expect = XOPERATOR;
7900 pl_yylval.opval = sawparens(words);
7901 TOKEN(QWLIST);
7902 }
8990e307 7903
79072805 7904 case KEY_qq:
d24ca0c5 7905 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 7906 if (!s)
d4c19fe8 7907 missingterm(NULL);
6154021b 7908 pl_yylval.ival = OP_STRINGIFY;
3280af22 7909 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7910 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7911 TERM(sublex_start());
7912
8782bef2
GB
7913 case KEY_qr:
7914 s = scan_pat(s,OP_QR);
7915 TERM(sublex_start());
7916
79072805 7917 case KEY_qx:
d24ca0c5 7918 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 7919 if (!s)
d4c19fe8 7920 missingterm(NULL);
9b201d7d 7921 readpipe_override();
79072805
LW
7922 TERM(sublex_start());
7923
7924 case KEY_return:
7925 OLDLOP(OP_RETURN);
7926
7927 case KEY_require:
29595ff2 7928 s = SKIPSPACE1(s);
e759cc13
RGS
7929 if (isDIGIT(*s)) {
7930 s = force_version(s, FALSE);
a7cb1f99 7931 }
e759cc13
RGS
7932 else if (*s != 'v' || !isDIGIT(s[1])
7933 || (s = force_version(s, TRUE), *s == 'v'))
7934 {
a7cb1f99
GS
7935 *PL_tokenbuf = '\0';
7936 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7937 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
7938 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7939 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
7940 else if (*s == '<')
7941 yyerror("<> should be quotes");
7942 }
a72a1c8b
RGS
7943 if (orig_keyword == KEY_require) {
7944 orig_keyword = 0;
6154021b 7945 pl_yylval.ival = 1;
a72a1c8b
RGS
7946 }
7947 else
6154021b 7948 pl_yylval.ival = 0;
a72a1c8b
RGS
7949 PL_expect = XTERM;
7950 PL_bufptr = s;
7951 PL_last_uni = PL_oldbufptr;
7952 PL_last_lop_op = OP_REQUIRE;
7953 s = skipspace(s);
7954 return REPORT( (int)REQUIRE );
79072805
LW
7955
7956 case KEY_reset:
7957 UNI(OP_RESET);
7958
7959 case KEY_redo:
a0d0e21e 7960 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7961 LOOPX(OP_REDO);
7962
7963 case KEY_rename:
a0d0e21e 7964 LOP(OP_RENAME,XTERM);
79072805
LW
7965
7966 case KEY_rand:
7967 UNI(OP_RAND);
7968
7969 case KEY_rmdir:
7970 UNI(OP_RMDIR);
7971
7972 case KEY_rindex:
a0d0e21e 7973 LOP(OP_RINDEX,XTERM);
79072805
LW
7974
7975 case KEY_read:
a0d0e21e 7976 LOP(OP_READ,XTERM);
79072805
LW
7977
7978 case KEY_readdir:
7979 UNI(OP_READDIR);
7980
93a17b20 7981 case KEY_readline:
6f33ba73 7982 UNIDOR(OP_READLINE);
93a17b20
LW
7983
7984 case KEY_readpipe:
0858480c 7985 UNIDOR(OP_BACKTICK);
93a17b20 7986
79072805
LW
7987 case KEY_rewinddir:
7988 UNI(OP_REWINDDIR);
7989
7990 case KEY_recv:
a0d0e21e 7991 LOP(OP_RECV,XTERM);
79072805
LW
7992
7993 case KEY_reverse:
a0d0e21e 7994 LOP(OP_REVERSE,XTERM);
79072805
LW
7995
7996 case KEY_readlink:
6f33ba73 7997 UNIDOR(OP_READLINK);
79072805
LW
7998
7999 case KEY_ref:
8000 UNI(OP_REF);
8001
8002 case KEY_s:
8003 s = scan_subst(s);
6154021b 8004 if (pl_yylval.opval)
79072805
LW
8005 TERM(sublex_start());
8006 else
8007 TOKEN(1); /* force error */
8008
0d863452
RH
8009 case KEY_say:
8010 checkcomma(s,PL_tokenbuf,"filehandle");
8011 LOP(OP_SAY,XREF);
8012
a0d0e21e
LW
8013 case KEY_chomp:
8014 UNI(OP_CHOMP);
4e553d73 8015
79072805
LW
8016 case KEY_scalar:
8017 UNI(OP_SCALAR);
8018
8019 case KEY_select:
a0d0e21e 8020 LOP(OP_SELECT,XTERM);
79072805
LW
8021
8022 case KEY_seek:
a0d0e21e 8023 LOP(OP_SEEK,XTERM);
79072805
LW
8024
8025 case KEY_semctl:
a0d0e21e 8026 LOP(OP_SEMCTL,XTERM);
79072805
LW
8027
8028 case KEY_semget:
a0d0e21e 8029 LOP(OP_SEMGET,XTERM);
79072805
LW
8030
8031 case KEY_semop:
a0d0e21e 8032 LOP(OP_SEMOP,XTERM);
79072805
LW
8033
8034 case KEY_send:
a0d0e21e 8035 LOP(OP_SEND,XTERM);
79072805
LW
8036
8037 case KEY_setpgrp:
a0d0e21e 8038 LOP(OP_SETPGRP,XTERM);
79072805
LW
8039
8040 case KEY_setpriority:
a0d0e21e 8041 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
8042
8043 case KEY_sethostent:
ff68c719 8044 UNI(OP_SHOSTENT);
79072805
LW
8045
8046 case KEY_setnetent:
ff68c719 8047 UNI(OP_SNETENT);
79072805
LW
8048
8049 case KEY_setservent:
ff68c719 8050 UNI(OP_SSERVENT);
79072805
LW
8051
8052 case KEY_setprotoent:
ff68c719 8053 UNI(OP_SPROTOENT);
79072805
LW
8054
8055 case KEY_setpwent:
8056 FUN0(OP_SPWENT);
8057
8058 case KEY_setgrent:
8059 FUN0(OP_SGRENT);
8060
8061 case KEY_seekdir:
a0d0e21e 8062 LOP(OP_SEEKDIR,XTERM);
79072805
LW
8063
8064 case KEY_setsockopt:
a0d0e21e 8065 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
8066
8067 case KEY_shift:
6f33ba73 8068 UNIDOR(OP_SHIFT);
79072805
LW
8069
8070 case KEY_shmctl:
a0d0e21e 8071 LOP(OP_SHMCTL,XTERM);
79072805
LW
8072
8073 case KEY_shmget:
a0d0e21e 8074 LOP(OP_SHMGET,XTERM);
79072805
LW
8075
8076 case KEY_shmread:
a0d0e21e 8077 LOP(OP_SHMREAD,XTERM);
79072805
LW
8078
8079 case KEY_shmwrite:
a0d0e21e 8080 LOP(OP_SHMWRITE,XTERM);
79072805
LW
8081
8082 case KEY_shutdown:
a0d0e21e 8083 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
8084
8085 case KEY_sin:
8086 UNI(OP_SIN);
8087
8088 case KEY_sleep:
8089 UNI(OP_SLEEP);
8090
8091 case KEY_socket:
a0d0e21e 8092 LOP(OP_SOCKET,XTERM);
79072805
LW
8093
8094 case KEY_socketpair:
a0d0e21e 8095 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
8096
8097 case KEY_sort:
3280af22 8098 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 8099 s = SKIPSPACE1(s);
3280af22 8100 PL_expect = XTERM;
15f0808c 8101 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 8102 LOP(OP_SORT,XREF);
79072805
LW
8103
8104 case KEY_split:
a0d0e21e 8105 LOP(OP_SPLIT,XTERM);
79072805
LW
8106
8107 case KEY_sprintf:
a0d0e21e 8108 LOP(OP_SPRINTF,XTERM);
79072805
LW
8109
8110 case KEY_splice:
a0d0e21e 8111 LOP(OP_SPLICE,XTERM);
79072805
LW
8112
8113 case KEY_sqrt:
8114 UNI(OP_SQRT);
8115
8116 case KEY_srand:
8117 UNI(OP_SRAND);
8118
8119 case KEY_stat:
8120 UNI(OP_STAT);
8121
8122 case KEY_study:
79072805
LW
8123 UNI(OP_STUDY);
8124
8125 case KEY_substr:
a0d0e21e 8126 LOP(OP_SUBSTR,XTERM);
79072805
LW
8127
8128 case KEY_format:
8129 case KEY_sub:
93a17b20 8130 really_sub:
09bef843 8131 {
3280af22 8132 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 8133 SSize_t tboffset = 0;
09bef843 8134 expectation attrful;
28cc6278 8135 bool have_name, have_proto;
f54cb97a 8136 const int key = tmp;
09bef843 8137
5db06880
NC
8138#ifdef PERL_MAD
8139 SV *tmpwhite = 0;
8140
cd81e915 8141 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
af9f5953 8142 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
cd81e915 8143 PL_thistoken = 0;
5db06880
NC
8144
8145 d = s;
8146 s = SKIPSPACE2(s,tmpwhite);
8147#else
09bef843 8148 s = skipspace(s);
5db06880 8149#endif
09bef843 8150
7e2040f0 8151 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8152 (*s == ':' && s[1] == ':'))
8153 {
5db06880 8154#ifdef PERL_MAD
4f61fd4b 8155 SV *nametoke = NULL;
5db06880
NC
8156#endif
8157
09bef843
SB
8158 PL_expect = XBLOCK;
8159 attrful = XATTRBLOCK;
b1b65b59
JH
8160 /* remember buffer pos'n for later force_word */
8161 tboffset = s - PL_oldbufptr;
09bef843 8162 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
8163#ifdef PERL_MAD
8164 if (PL_madskills)
af9f5953 8165 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8166#endif
6502358f
NC
8167 if (memchr(tmpbuf, ':', len))
8168 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8169 else {
8170 sv_setsv(PL_subname,PL_curstname);
396482e1 8171 sv_catpvs(PL_subname,"::");
09bef843
SB
8172 sv_catpvn(PL_subname,tmpbuf,len);
8173 }
af9f5953
BF
8174 if (SvUTF8(PL_linestr))
8175 SvUTF8_on(PL_subname);
09bef843 8176 have_name = TRUE;
5db06880
NC
8177
8178#ifdef PERL_MAD
8179
8180 start_force(0);
8181 CURMAD('X', nametoke);
8182 CURMAD('_', tmpwhite);
8183 (void) force_word(PL_oldbufptr + tboffset, WORD,
8184 FALSE, TRUE, TRUE);
8185
8186 s = SKIPSPACE2(d,tmpwhite);
8187#else
8188 s = skipspace(d);
8189#endif
09bef843 8190 }
463ee0b2 8191 else {
09bef843
SB
8192 if (key == KEY_my)
8193 Perl_croak(aTHX_ "Missing name in \"my sub\"");
8194 PL_expect = XTERMBLOCK;
8195 attrful = XATTRTERM;
76f68e9b 8196 sv_setpvs(PL_subname,"?");
09bef843 8197 have_name = FALSE;
463ee0b2 8198 }
4633a7c4 8199
09bef843
SB
8200 if (key == KEY_format) {
8201 if (*s == '=')
8202 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 8203#ifdef PERL_MAD
cd81e915 8204 PL_thistoken = subtoken;
5db06880
NC
8205 s = d;
8206#else
09bef843 8207 if (have_name)
b1b65b59
JH
8208 (void) force_word(PL_oldbufptr + tboffset, WORD,
8209 FALSE, TRUE, TRUE);
5db06880 8210#endif
09bef843
SB
8211 OPERATOR(FORMAT);
8212 }
79072805 8213
09bef843
SB
8214 /* Look for a prototype */
8215 if (*s == '(') {
d9f2850e
RGS
8216 char *p;
8217 bool bad_proto = FALSE;
9e8d7757
RB
8218 bool in_brackets = FALSE;
8219 char greedy_proto = ' ';
8220 bool proto_after_greedy_proto = FALSE;
8221 bool must_be_last = FALSE;
8222 bool underscore = FALSE;
aef2a98a 8223 bool seen_underscore = FALSE;
197afce1 8224 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8225 STRLEN tmplen;
09bef843 8226
d24ca0c5 8227 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
37fd879b 8228 if (!s)
09bef843 8229 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8230 /* strip spaces and check for bad characters */
dab1c735 8231 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8232 tmp = 0;
dab1c735 8233 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8234 if (!isSPACE(*p)) {
dab1c735 8235 d[tmp++] = *p;
9e8d7757 8236
197afce1 8237 if (warnillegalproto) {
9e8d7757
RB
8238 if (must_be_last)
8239 proto_after_greedy_proto = TRUE;
dab1c735 8240 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8241 bad_proto = TRUE;
8242 }
8243 else {
8244 if ( underscore ) {
34daab0f 8245 if ( !strchr(";@%", *p) )
9e8d7757
RB
8246 bad_proto = TRUE;
8247 underscore = FALSE;
8248 }
8249 if ( *p == '[' ) {
8250 in_brackets = TRUE;
8251 }
8252 else if ( *p == ']' ) {
8253 in_brackets = FALSE;
8254 }
8255 else if ( (*p == '@' || *p == '%') &&
8256 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8257 !in_brackets ) {
8258 must_be_last = TRUE;
8259 greedy_proto = *p;
8260 }
8261 else if ( *p == '_' ) {
aef2a98a 8262 underscore = seen_underscore = TRUE;
9e8d7757
RB
8263 }
8264 }
8265 }
d37a9538 8266 }
09bef843 8267 }
dab1c735 8268 d[tmp] = '\0';
9e8d7757 8269 if (proto_after_greedy_proto)
197afce1 8270 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8271 "Prototype after '%c' for %"SVf" : %s",
8272 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8273 if (bad_proto) {
8274 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8275 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8276 "Illegal character %sin prototype for %"SVf" : %s",
8277 seen_underscore ? "after '_' " : "",
dab1c735 8278 SVfARG(PL_subname),
97eb901d
BF
8279 SvUTF8(PL_lex_stuff)
8280 ? sv_uni_display(dsv,
8281 newSVpvn_flags(d, tmp, SVs_TEMP | SVf_UTF8),
8282 tmp,
8283 UNI_DISPLAY_ISPRINT)
8284 : pv_pretty(dsv, d, tmp, 60, NULL, NULL,
8285 PERL_PV_ESCAPE_NONASCII));
dab1c735
BF
8286 }
8287 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8288 have_proto = TRUE;
68dc0745 8289
5db06880
NC
8290#ifdef PERL_MAD
8291 start_force(0);
cd81e915 8292 CURMAD('q', PL_thisopen);
5db06880 8293 CURMAD('_', tmpwhite);
cd81e915
NC
8294 CURMAD('=', PL_thisstuff);
8295 CURMAD('Q', PL_thisclose);
5db06880
NC
8296 NEXTVAL_NEXTTOKE.opval =
8297 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8298 PL_lex_stuff = NULL;
5db06880
NC
8299 force_next(THING);
8300
8301 s = SKIPSPACE2(s,tmpwhite);
8302#else
09bef843 8303 s = skipspace(s);
5db06880 8304#endif
4633a7c4 8305 }
09bef843
SB
8306 else
8307 have_proto = FALSE;
8308
8309 if (*s == ':' && s[1] != ':')
8310 PL_expect = attrful;
8e742a20
MHM
8311 else if (*s != '{' && key == KEY_sub) {
8312 if (!have_name)
8313 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8314 else if (*s != ';' && *s != '}')
be2597df 8315 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8316 }
09bef843 8317
5db06880
NC
8318#ifdef PERL_MAD
8319 start_force(0);
8320 if (tmpwhite) {
8321 if (PL_madskills)
6b29d1f5 8322 curmad('^', newSVpvs(""));
5db06880
NC
8323 CURMAD('_', tmpwhite);
8324 }
8325 force_next(0);
8326
cd81e915 8327 PL_thistoken = subtoken;
5db06880 8328#else
09bef843 8329 if (have_proto) {
9ded7720 8330 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8331 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8332 PL_lex_stuff = NULL;
09bef843 8333 force_next(THING);
68dc0745 8334 }
5db06880 8335#endif
09bef843 8336 if (!have_name) {
49a54bbe
NC
8337 if (PL_curstash)
8338 sv_setpvs(PL_subname, "__ANON__");
8339 else
8340 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8341 TOKEN(ANONSUB);
4633a7c4 8342 }
5db06880 8343#ifndef PERL_MAD
b1b65b59
JH
8344 (void) force_word(PL_oldbufptr + tboffset, WORD,
8345 FALSE, TRUE, TRUE);
5db06880 8346#endif
09bef843
SB
8347 if (key == KEY_my)
8348 TOKEN(MYSUB);
8349 TOKEN(SUB);
4633a7c4 8350 }
79072805
LW
8351
8352 case KEY_system:
a0d0e21e 8353 LOP(OP_SYSTEM,XREF);
79072805
LW
8354
8355 case KEY_symlink:
a0d0e21e 8356 LOP(OP_SYMLINK,XTERM);
79072805
LW
8357
8358 case KEY_syscall:
a0d0e21e 8359 LOP(OP_SYSCALL,XTERM);
79072805 8360
c07a80fd 8361 case KEY_sysopen:
8362 LOP(OP_SYSOPEN,XTERM);
8363
137443ea 8364 case KEY_sysseek:
8365 LOP(OP_SYSSEEK,XTERM);
8366
79072805 8367 case KEY_sysread:
a0d0e21e 8368 LOP(OP_SYSREAD,XTERM);
79072805
LW
8369
8370 case KEY_syswrite:
a0d0e21e 8371 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8372
8373 case KEY_tr:
8374 s = scan_trans(s);
8375 TERM(sublex_start());
8376
8377 case KEY_tell:
8378 UNI(OP_TELL);
8379
8380 case KEY_telldir:
8381 UNI(OP_TELLDIR);
8382
463ee0b2 8383 case KEY_tie:
a0d0e21e 8384 LOP(OP_TIE,XTERM);
463ee0b2 8385
c07a80fd 8386 case KEY_tied:
8387 UNI(OP_TIED);
8388
79072805
LW
8389 case KEY_time:
8390 FUN0(OP_TIME);
8391
8392 case KEY_times:
8393 FUN0(OP_TMS);
8394
8395 case KEY_truncate:
a0d0e21e 8396 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8397
8398 case KEY_uc:
8399 UNI(OP_UC);
8400
8401 case KEY_ucfirst:
8402 UNI(OP_UCFIRST);
8403
463ee0b2
LW
8404 case KEY_untie:
8405 UNI(OP_UNTIE);
8406
79072805 8407 case KEY_until:
78cdf107
Z
8408 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8409 return REPORT(0);
6154021b 8410 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8411 OPERATOR(UNTIL);
8412
8413 case KEY_unless:
78cdf107
Z
8414 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8415 return REPORT(0);
6154021b 8416 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8417 OPERATOR(UNLESS);
8418
8419 case KEY_unlink:
a0d0e21e 8420 LOP(OP_UNLINK,XTERM);
79072805
LW
8421
8422 case KEY_undef:
6f33ba73 8423 UNIDOR(OP_UNDEF);
79072805
LW
8424
8425 case KEY_unpack:
a0d0e21e 8426 LOP(OP_UNPACK,XTERM);
79072805
LW
8427
8428 case KEY_utime:
a0d0e21e 8429 LOP(OP_UTIME,XTERM);
79072805
LW
8430
8431 case KEY_umask:
6f33ba73 8432 UNIDOR(OP_UMASK);
79072805
LW
8433
8434 case KEY_unshift:
a0d0e21e
LW
8435 LOP(OP_UNSHIFT,XTERM);
8436
8437 case KEY_use:
468aa647 8438 s = tokenize_use(1, s);
a0d0e21e 8439 OPERATOR(USE);
79072805
LW
8440
8441 case KEY_values:
8442 UNI(OP_VALUES);
8443
8444 case KEY_vec:
a0d0e21e 8445 LOP(OP_VEC,XTERM);
79072805 8446
0d863452 8447 case KEY_when:
78cdf107
Z
8448 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8449 return REPORT(0);
6154021b 8450 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8451 OPERATOR(WHEN);
8452
79072805 8453 case KEY_while:
78cdf107
Z
8454 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8455 return REPORT(0);
6154021b 8456 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8457 OPERATOR(WHILE);
8458
8459 case KEY_warn:
3280af22 8460 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8461 LOP(OP_WARN,XTERM);
79072805
LW
8462
8463 case KEY_wait:
8464 FUN0(OP_WAIT);
8465
8466 case KEY_waitpid:
a0d0e21e 8467 LOP(OP_WAITPID,XTERM);
79072805
LW
8468
8469 case KEY_wantarray:
8470 FUN0(OP_WANTARRAY);
8471
8472 case KEY_write:
9d116dd7
JH
8473#ifdef EBCDIC
8474 {
df3728a2
JH
8475 char ctl_l[2];
8476 ctl_l[0] = toCTRL('L');
8477 ctl_l[1] = '\0';
fafc274c 8478 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8479 }
8480#else
fafc274c
NC
8481 /* Make sure $^L is defined */
8482 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8483#endif
79072805
LW
8484 UNI(OP_ENTERWRITE);
8485
8486 case KEY_x:
78cdf107
Z
8487 if (PL_expect == XOPERATOR) {
8488 if (*s == '=' && !PL_lex_allbrackets &&
8489 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8490 return REPORT(0);
79072805 8491 Mop(OP_REPEAT);
78cdf107 8492 }
79072805
LW
8493 check_uni();
8494 goto just_a_word;
8495
a0d0e21e 8496 case KEY_xor:
78cdf107
Z
8497 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8498 return REPORT(0);
6154021b 8499 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8500 OPERATOR(OROP);
8501
79072805
LW
8502 case KEY_y:
8503 s = scan_trans(s);
8504 TERM(sublex_start());
8505 }
49dc05e3 8506 }}
79072805 8507}
bf4acbe4
GS
8508#ifdef __SC__
8509#pragma segment Main
8510#endif
79072805 8511
e930465f
JH
8512static int
8513S_pending_ident(pTHX)
8eceec63 8514{
97aff369 8515 dVAR;
bbd11bfc 8516 PADOFFSET tmp = 0;
8eceec63
SC
8517 /* pit holds the identifier we read and pending_ident is reset */
8518 char pit = PL_pending_ident;
9bde8eb0
NC
8519 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8520 /* All routes through this function want to know if there is a colon. */
c099d646 8521 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8522 PL_pending_ident = 0;
8523
cd81e915 8524 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8525 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8526 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8527
8528 /* if we're in a my(), we can't allow dynamics here.
8529 $foo'bar has already been turned into $foo::bar, so
8530 just check for colons.
8531
8532 if it's a legal name, the OP is a PADANY.
8533 */
8534 if (PL_in_my) {
8535 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8536 if (has_colon)
4bca4ee0 8537 yyerror_pv(Perl_form(aTHX_ "No package name allowed for "
8eceec63 8538 "variable %s in \"our\"",
4bca4ee0 8539 PL_tokenbuf), UTF ? SVf_UTF8 : 0);
bc9b26ca 8540 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8541 }
8542 else {
9bde8eb0 8543 if (has_colon)
58576270
BF
8544 yyerror_pv(Perl_form(aTHX_ PL_no_myglob,
8545 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf),
8546 UTF ? SVf_UTF8 : 0);
8eceec63 8547
6154021b 8548 pl_yylval.opval = newOP(OP_PADANY, 0);
bc9b26ca
BF
8549 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8550 UTF ? SVf_UTF8 : 0);
8eceec63
SC
8551 return PRIVATEREF;
8552 }
8553 }
8554
8555 /*
8556 build the ops for accesses to a my() variable.
8eceec63
SC
8557 */
8558
9bde8eb0 8559 if (!has_colon) {
8716503d 8560 if (!PL_in_my)
bc9b26ca
BF
8561 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8562 UTF ? SVf_UTF8 : 0);
8716503d 8563 if (tmp != NOT_IN_PAD) {
8eceec63 8564 /* might be an "our" variable" */
00b1698f 8565 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8566 /* build ops for a bareword */
b64e5050
AL
8567 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8568 HEK * const stashname = HvNAME_HEK(stash);
8569 SV * const sym = newSVhek(stashname);
396482e1 8570 sv_catpvs(sym, "::");
2a33114a 8571 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
6154021b
RGS
8572 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8573 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8574 gv_fetchsv(sym,
8eceec63
SC
8575 (PL_in_eval
8576 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8577 : GV_ADDMULTI
8eceec63
SC
8578 ),
8579 ((PL_tokenbuf[0] == '$') ? SVt_PV
8580 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8581 : SVt_PVHV));
8582 return WORD;
8583 }
8584
6154021b
RGS
8585 pl_yylval.opval = newOP(OP_PADANY, 0);
8586 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8587 return PRIVATEREF;
8588 }
8589 }
8590
8591 /*
8592 Whine if they've said @foo in a doublequoted string,
8593 and @foo isn't a variable we can find in the symbol
8594 table.
8595 */
d824713b
NC
8596 if (ckWARN(WARN_AMBIGUOUS) &&
8597 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8598 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8599 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8600 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8601 /* DO NOT warn for @- and @+ */
8602 && !( PL_tokenbuf[2] == '\0' &&
8603 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8604 )
8eceec63
SC
8605 {
8606 /* Downgraded from fatal to warning 20000522 mjd */
d824713b 8607 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
29fb1d0e
BF
8608 "Possible unintended interpolation of %"SVf" in string",
8609 SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
8610 SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
8eceec63
SC
8611 }
8612 }
8613
8614 /* build ops for a bareword */
0be4d16f
BF
8615 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8616 tokenbuf_len - 1,
8617 UTF ? SVf_UTF8 : 0 ));
6154021b 8618 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7 8619 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8620 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8621 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8622 ((PL_tokenbuf[0] == '$') ? SVt_PV
8623 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8624 : SVt_PVHV));
8eceec63
SC
8625 return WORD;
8626}
8627
76e3520e 8628STATIC void
c94115d8 8629S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8630{
97aff369 8631 dVAR;
2f3197b3 8632
7918f24d
NC
8633 PERL_ARGS_ASSERT_CHECKCOMMA;
8634
d008e5eb 8635 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8636 if (ckWARN(WARN_SYNTAX)) {
8637 int level = 1;
26ff0806 8638 const char *w;
d008e5eb
GS
8639 for (w = s+2; *w && level; w++) {
8640 if (*w == '(')
8641 ++level;
8642 else if (*w == ')')
8643 --level;
8644 }
888fea98
NC
8645 while (isSPACE(*w))
8646 ++w;
b1439985
RGS
8647 /* the list of chars below is for end of statements or
8648 * block / parens, boolean operators (&&, ||, //) and branch
8649 * constructs (or, and, if, until, unless, while, err, for).
8650 * Not a very solid hack... */
8651 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8652 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8653 "%s (...) interpreted as function",name);
d008e5eb 8654 }
2f3197b3 8655 }
3280af22 8656 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8657 s++;
a687059c
LW
8658 if (*s == '(')
8659 s++;
3280af22 8660 while (s < PL_bufend && isSPACE(*s))
a687059c 8661 s++;
7e2040f0 8662 if (isIDFIRST_lazy_if(s,UTF)) {
d0fb66e4
BF
8663 const char * const w = s;
8664 s += UTF ? UTF8SKIP(s) : 1;
7e2040f0 8665 while (isALNUM_lazy_if(s,UTF))
d0fb66e4 8666 s += UTF ? UTF8SKIP(s) : 1;
3280af22 8667 while (s < PL_bufend && isSPACE(*s))
a687059c 8668 s++;
e929a76b 8669 if (*s == ',') {
c94115d8 8670 GV* gv;
5458a98a 8671 if (keyword(w, s - w, 0))
e929a76b 8672 return;
c94115d8 8673
2e38bce1 8674 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 8675 if (gv && GvCVu(gv))
abbb3198 8676 return;
cea2e8a9 8677 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8678 }
8679 }
8680}
8681
423cee85
JH
8682/* Either returns sv, or mortalizes sv and returns a new SV*.
8683 Best used as sv=new_constant(..., sv, ...).
8684 If s, pv are NULL, calls subroutine with one argument,
8685 and type is used with error messages only. */
8686
b3ac6de7 8687STATIC SV *
eb0d8d16
NC
8688S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8689 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8690{
27da23d5 8691 dVAR; dSP;
fbb93542 8692 HV * table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8693 SV *res;
b3ac6de7
IZ
8694 SV **cvp;
8695 SV *cv, *typesv;
89e33a05 8696 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8697
7918f24d
NC
8698 PERL_ARGS_ASSERT_NEW_CONSTANT;
8699
f8988b41
KW
8700 /* charnames doesn't work well if there have been errors found */
8701 if (PL_error_count > 0 && strEQ(key,"charnames"))
8702 return &PL_sv_undef;
8703
fbb93542
KW
8704 if (!table
8705 || ! (PL_hints & HINT_LOCALIZE_HH)
8706 || ! (cvp = hv_fetch(table, key, keylen, FALSE))
8707 || ! SvOK(*cvp))
8708 {
423cee85
JH
8709 SV *msg;
8710
fbb93542
KW
8711 /* Here haven't found what we're looking for. If it is charnames,
8712 * perhaps it needs to be loaded. Try doing that before giving up */
8713 if (strEQ(key,"charnames")) {
8714 Perl_load_module(aTHX_
8715 0,
8716 newSVpvs("_charnames"),
8717 /* version parameter; no need to specify it, as if
8718 * we get too early a version, will fail anyway,
8719 * not being able to find '_charnames' */
8720 NULL,
8721 newSVpvs(":full"),
8722 newSVpvs(":short"),
8723 NULL);
8724 SPAGAIN;
8725 table = GvHV(PL_hintgv);
8726 if (table
8727 && (PL_hints & HINT_LOCALIZE_HH)
8728 && (cvp = hv_fetch(table, key, keylen, FALSE))
8729 && SvOK(*cvp))
8730 {
8731 goto now_ok;
8732 }
8733 }
8734 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
8735 msg = Perl_newSVpvf(aTHX_
8736 "Constant(%s) unknown", (type ? type: "undef"));
8737 }
8738 else {
8739 why1 = "$^H{";
8740 why2 = key;
8741 why3 = "} is not defined";
423cee85 8742 report:
4e553d73 8743 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8744 (type ? type: "undef"), why1, why2, why3);
fbb93542 8745 }
95a20fc0 8746 yyerror(SvPVX_const(msg));
423cee85
JH
8747 SvREFCNT_dec(msg);
8748 return sv;
8749 }
fbb93542 8750now_ok:
b3ac6de7
IZ
8751 sv_2mortal(sv); /* Parent created it permanently */
8752 cv = *cvp;
423cee85 8753 if (!pv && s)
59cd0e26 8754 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8755 if (type && pv)
59cd0e26 8756 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8757 else
423cee85 8758 typesv = &PL_sv_undef;
4e553d73 8759
e788e7d3 8760 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8761 ENTER ;
8762 SAVETMPS;
4e553d73 8763
423cee85 8764 PUSHMARK(SP) ;
a5845cb7 8765 EXTEND(sp, 3);
423cee85
JH
8766 if (pv)
8767 PUSHs(pv);
b3ac6de7 8768 PUSHs(sv);
423cee85
JH
8769 if (pv)
8770 PUSHs(typesv);
b3ac6de7 8771 PUTBACK;
423cee85 8772 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8773
423cee85 8774 SPAGAIN ;
4e553d73 8775
423cee85 8776 /* Check the eval first */
9b0e499b 8777 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8778 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8779 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8780 (void)POPs;
b37c2d43 8781 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8782 }
8783 else {
8784 res = POPs;
b37c2d43 8785 SvREFCNT_inc_simple_void(res);
423cee85 8786 }
4e553d73 8787
423cee85
JH
8788 PUTBACK ;
8789 FREETMPS ;
8790 LEAVE ;
b3ac6de7 8791 POPSTACK;
4e553d73 8792
b3ac6de7 8793 if (!SvOK(res)) {
423cee85
JH
8794 why1 = "Call to &{$^H{";
8795 why2 = key;
f0af216f 8796 why3 = "}} did not return a defined value";
423cee85
JH
8797 sv = res;
8798 goto report;
9b0e499b 8799 }
423cee85 8800
9b0e499b 8801 return res;
b3ac6de7 8802}
4e553d73 8803
d0a148a6
NC
8804/* Returns a NUL terminated string, with the length of the string written to
8805 *slp
8806 */
76e3520e 8807STATIC char *
cea2e8a9 8808S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8809{
97aff369 8810 dVAR;
463ee0b2 8811 register char *d = dest;
890ce7af 8812 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8813
8814 PERL_ARGS_ASSERT_SCAN_WORD;
8815
463ee0b2 8816 for (;;) {
8903cb82 8817 if (d >= e)
cea2e8a9 8818 Perl_croak(aTHX_ ident_too_long);
5db1eb8d 8819 if (isALNUM(*s) || (!UTF && isALNUMC_L1(*s))) /* UTF handled below */
463ee0b2 8820 *d++ = *s++;
c35e046a 8821 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8822 *d++ = ':';
8823 *d++ = ':';
8824 s++;
8825 }
c35e046a 8826 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8827 *d++ = *s++;
8828 *d++ = *s++;
8829 }
fd400ab9 8830 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8831 char *t = s + UTF8SKIP(s);
c35e046a 8832 size_t len;
fd400ab9 8833 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8834 t += UTF8SKIP(t);
c35e046a
AL
8835 len = t - s;
8836 if (d + len > e)
cea2e8a9 8837 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8838 Copy(s, d, len, char);
8839 d += len;
a0ed51b3
LW
8840 s = t;
8841 }
463ee0b2
LW
8842 else {
8843 *d = '\0';
8844 *slp = d - dest;
8845 return s;
e929a76b 8846 }
378cc40b
LW
8847 }
8848}
8849
76e3520e 8850STATIC char *
f54cb97a 8851S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8852{
97aff369 8853 dVAR;
6136c704 8854 char *bracket = NULL;
748a9306 8855 char funny = *s++;
6136c704 8856 register char *d = dest;
0b3da58d 8857 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8858
7918f24d
NC
8859 PERL_ARGS_ASSERT_SCAN_IDENT;
8860
a0d0e21e 8861 if (isSPACE(*s))
29595ff2 8862 s = PEEKSPACE(s);
de3bb511 8863 if (isDIGIT(*s)) {
8903cb82 8864 while (isDIGIT(*s)) {
8865 if (d >= e)
cea2e8a9 8866 Perl_croak(aTHX_ ident_too_long);
378cc40b 8867 *d++ = *s++;
8903cb82 8868 }
378cc40b
LW
8869 }
8870 else {
463ee0b2 8871 for (;;) {
8903cb82 8872 if (d >= e)
cea2e8a9 8873 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8874 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8875 *d++ = *s++;
7e2040f0 8876 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8877 *d++ = ':';
8878 *d++ = ':';
8879 s++;
8880 }
a0d0e21e 8881 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8882 *d++ = *s++;
8883 *d++ = *s++;
8884 }
fd400ab9 8885 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8886 char *t = s + UTF8SKIP(s);
fd400ab9 8887 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8888 t += UTF8SKIP(t);
8889 if (d + (t - s) > e)
cea2e8a9 8890 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8891 Copy(s, d, t - s, char);
8892 d += t - s;
8893 s = t;
8894 }
463ee0b2
LW
8895 else
8896 break;
8897 }
378cc40b
LW
8898 }
8899 *d = '\0';
8900 d = dest;
79072805 8901 if (*d) {
3280af22
NIS
8902 if (PL_lex_state != LEX_NORMAL)
8903 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8904 return s;
378cc40b 8905 }
748a9306 8906 if (*s == '$' && s[1] &&
3792a11b 8907 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8908 {
4810e5ec 8909 return s;
5cd24f17 8910 }
79072805
LW
8911 if (*s == '{') {
8912 bracket = s;
8913 s++;
8914 }
204e6232
BF
8915 if (s < send) {
8916 if (UTF) {
8917 const STRLEN skip = UTF8SKIP(s);
8918 STRLEN i;
8919 d[skip] = '\0';
8920 for ( i = 0; i < skip; i++ )
8921 d[i] = *s++;
8922 }
8923 else {
8924 *d = *s++;
8925 d[1] = '\0';
8926 }
8927 }
2b92dfce 8928 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8929 *d = toCTRL(*s);
8930 s++;
de3bb511 8931 }
fbdd83da
DIM
8932 else if (ck_uni && !bracket)
8933 check_uni();
79072805 8934 if (bracket) {
748a9306 8935 if (isSPACE(s[-1])) {
fa83b5b6 8936 while (s < send) {
f54cb97a 8937 const char ch = *s++;
bf4acbe4 8938 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8939 *d = ch;
8940 break;
8941 }
8942 }
748a9306 8943 }
7e2040f0 8944 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 8945 d += UTF8SKIP(d);
a0ed51b3 8946 if (UTF) {
6136c704
AL
8947 char *end = s;
8948 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8949 end += UTF8SKIP(end);
8950 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8951 end += UTF8SKIP(end);
a0ed51b3 8952 }
6136c704
AL
8953 Copy(s, d, end - s, char);
8954 d += end - s;
8955 s = end;
a0ed51b3
LW
8956 }
8957 else {
2b92dfce 8958 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8959 *d++ = *s++;
2b92dfce 8960 if (d >= e)
cea2e8a9 8961 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8962 }
79072805 8963 *d = '\0';
c35e046a
AL
8964 while (s < send && SPACE_OR_TAB(*s))
8965 s++;
ff68c719 8966 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8967 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8968 const char * const brack =
8969 (const char *)
8970 ((*s == '[') ? "[...]" : "{...}");
e850844c 8971 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8972 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8973 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8974 funny, dest, brack, funny, dest, brack);
8975 }
79072805 8976 bracket++;
a0be28da 8977 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8978 PL_lex_allbrackets++;
79072805
LW
8979 return s;
8980 }
4e553d73
NIS
8981 }
8982 /* Handle extended ${^Foo} variables
2b92dfce
GS
8983 * 1999-02-27 mjd-perl-patch@plover.com */
8984 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8985 && isALNUM(*s))
8986 {
8987 d++;
8988 while (isALNUM(*s) && d < e) {
8989 *d++ = *s++;
8990 }
8991 if (d >= e)
cea2e8a9 8992 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8993 *d = '\0';
79072805
LW
8994 }
8995 if (*s == '}') {
8996 s++;
7df0d042 8997 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8998 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8999 PL_expect = XREF;
9000 }
d008e5eb 9001 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 9002 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241 9003 (keyword(dest, d - dest, 0)
5c66c3dd 9004 || get_cvn_flags(dest, d - dest, UTF ? SVf_UTF8 : 0)))
d008e5eb 9005 {
5c66c3dd
BF
9006 SV *tmp = newSVpvn_flags( dest, d - dest,
9007 SVs_TEMP | (UTF ? SVf_UTF8 : 0) );
c35e046a
AL
9008 if (funny == '#')
9009 funny = '@';
9014280d 9010 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5c66c3dd
BF
9011 "Ambiguous use of %c{%"SVf"} resolved to %c%"SVf,
9012 funny, tmp, funny, tmp);
d008e5eb
GS
9013 }
9014 }
79072805
LW
9015 }
9016 else {
9017 s = bracket; /* let the parser handle it */
93a17b20 9018 *dest = '\0';
79072805
LW
9019 }
9020 }
3280af22
NIS
9021 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
9022 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
9023 return s;
9024}
9025
858a358b 9026static bool
3955e1a9 9027S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
9028
9029 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
9030 * the parse starting at 's', based on the subset that are valid in this
9031 * context input to this routine in 'valid_flags'. Advances s. Returns
96f3bfda
KW
9032 * TRUE if the input should be treated as a valid flag, so the next char
9033 * may be as well; otherwise FALSE. 'charset' should point to a NUL upon
9034 * first call on the current regex. This routine will set it to any
9035 * charset modifier found. The caller shouldn't change it. This way,
9036 * another charset modifier encountered in the parse can be detected as an
9037 * error, as we have decided to allow only one */
858a358b
KW
9038
9039 const char c = **s;
84159251 9040 STRLEN charlen = UTF ? UTF8SKIP(*s) : 1;
94b03d7d 9041
84159251
BF
9042 if ( charlen != 1 || ! strchr(valid_flags, c) ) {
9043 if (isALNUM_lazy_if(*s, UTF)) {
4f8dbb2d 9044 yyerror_pv(Perl_form(aTHX_ "Unknown regexp modifier \"/%.*s\"", (int)charlen, *s),
84159251
BF
9045 UTF ? SVf_UTF8 : 0);
9046 (*s) += charlen;
96f3bfda
KW
9047 /* Pretend that it worked, so will continue processing before
9048 * dieing */
0da72d5e 9049 return TRUE;
858a358b
KW
9050 }
9051 return FALSE;
9052 }
9053
9054 switch (c) {
94b03d7d 9055
858a358b
KW
9056 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
9057 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
9058 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
9059 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
9060 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
9061 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d 9062 case LOCALE_PAT_MOD:
3955e1a9
KW
9063 if (*charset) {
9064 goto multiple_charsets;
9065 }
94b03d7d 9066 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 9067 *charset = c;
94b03d7d
KW
9068 break;
9069 case UNICODE_PAT_MOD:
3955e1a9
KW
9070 if (*charset) {
9071 goto multiple_charsets;
9072 }
94b03d7d 9073 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 9074 *charset = c;
94b03d7d
KW
9075 break;
9076 case ASCII_RESTRICT_PAT_MOD:
ff3f26d2 9077 if (! *charset) {
94b03d7d
KW
9078 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
9079 }
ff3f26d2
KW
9080 else {
9081
9082 /* Error if previous modifier wasn't an 'a', but if it was, see
9083 * if, and accept, a second occurrence (only) */
9084 if (*charset != 'a'
9085 || get_regex_charset(*pmfl)
9086 != REGEX_ASCII_RESTRICTED_CHARSET)
9087 {
9088 goto multiple_charsets;
9089 }
9090 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9091 }
9092 *charset = c;
94b03d7d
KW
9093 break;
9094 case DEPENDS_PAT_MOD:
3955e1a9
KW
9095 if (*charset) {
9096 goto multiple_charsets;
9097 }
94b03d7d 9098 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9099 *charset = c;
94b03d7d 9100 break;
879d0c72 9101 }
94b03d7d 9102
858a358b
KW
9103 (*s)++;
9104 return TRUE;
94b03d7d 9105
3955e1a9
KW
9106 multiple_charsets:
9107 if (*charset != c) {
9108 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9109 }
ff3f26d2
KW
9110 else if (c == 'a') {
9111 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9112 }
3955e1a9
KW
9113 else {
9114 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9115 }
9116
9117 /* Pretend that it worked, so will continue processing before dieing */
9118 (*s)++;
9119 return TRUE;
879d0c72
NC
9120}
9121
76e3520e 9122STATIC char *
cea2e8a9 9123S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9124{
97aff369 9125 dVAR;
79072805 9126 PMOP *pm;
d24ca0c5 9127 char *s = scan_str(start,!!PL_madskills,FALSE, PL_reg_state.re_reparsing);
10edeb5d 9128 const char * const valid_flags =
a20207d7 9129 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9130 char charset = '\0'; /* character set modifier */
5db06880
NC
9131#ifdef PERL_MAD
9132 char *modstart;
9133#endif
9134
7918f24d 9135 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9136
d24ca0c5
DM
9137 /* this was only needed for the initial scan_str; set it to false
9138 * so that any (?{}) code blocks etc are parsed normally */
9139 PL_reg_state.re_reparsing = FALSE;
25c09cbf 9140 if (!s) {
6136c704 9141 const char * const delimiter = skipspace(start);
10edeb5d
JH
9142 Perl_croak(aTHX_
9143 (const char *)
9144 (*delimiter == '?'
9145 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9146 : "Search pattern not terminated" ));
25c09cbf 9147 }
bbce6d69 9148
8782bef2 9149 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9150 if (PL_multi_open == '?') {
9151 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9152 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9153
9154 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9155 allows us to restrict the list needed by reset to just the ??
9156 matches. */
9157 assert(type != OP_TRANS);
9158 if (PL_curstash) {
daba3364 9159 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9160 U32 elements;
9161 if (!mg) {
daba3364 9162 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9163 0);
9164 }
9165 elements = mg->mg_len / sizeof(PMOP**);
9166 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9167 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9168 mg->mg_len = elements * sizeof(PMOP**);
9169 PmopSTASH_set(pm,PL_curstash);
9170 }
9171 }
5db06880
NC
9172#ifdef PERL_MAD
9173 modstart = s;
9174#endif
d63c20f2
DM
9175
9176 /* if qr/...(?{..}).../, then need to parse the pattern within a new
9177 * anon CV. False positives like qr/[(?{]/ are harmless */
9178
9179 if (type == OP_QR) {
6f635923
DM
9180 STRLEN len;
9181 char *e, *p = SvPV(PL_lex_stuff, len);
9182 e = p + len;
9183 for (; p < e; p++) {
d63c20f2
DM
9184 if (p[0] == '(' && p[1] == '?'
9185 && (p[2] == '{' || (p[2] == '?' && p[3] == '{')))
9186 {
9187 pm->op_pmflags |= PMf_HAS_CV;
9188 break;
9189 }
9190 }
6f635923 9191 pm->op_pmflags |= PMf_IS_QR;
d63c20f2
DM
9192 }
9193
3955e1a9 9194 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9195#ifdef PERL_MAD
9196 if (PL_madskills && modstart != s) {
9197 SV* tmptoken = newSVpvn(modstart, s - modstart);
9198 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9199 }
9200#endif
4ac733c9 9201 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9202 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9203 {
a2a5de95
NC
9204 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9205 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9206 }
9207
3280af22 9208 PL_lex_op = (OP*)pm;
6154021b 9209 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9210 return s;
9211}
9212
76e3520e 9213STATIC char *
cea2e8a9 9214S_scan_subst(pTHX_ char *start)
79072805 9215{
27da23d5 9216 dVAR;
22594288 9217 char *s;
79072805 9218 register PMOP *pm;
4fdae800 9219 I32 first_start;
79072805 9220 I32 es = 0;
3955e1a9 9221 char charset = '\0'; /* character set modifier */
5db06880
NC
9222#ifdef PERL_MAD
9223 char *modstart;
9224#endif
79072805 9225
7918f24d
NC
9226 PERL_ARGS_ASSERT_SCAN_SUBST;
9227
6154021b 9228 pl_yylval.ival = OP_NULL;
79072805 9229
d24ca0c5 9230 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 9231
37fd879b 9232 if (!s)
cea2e8a9 9233 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9234
3280af22 9235 if (s[-1] == PL_multi_open)
79072805 9236 s--;
5db06880
NC
9237#ifdef PERL_MAD
9238 if (PL_madskills) {
cd81e915
NC
9239 CURMAD('q', PL_thisopen);
9240 CURMAD('_', PL_thiswhite);
9241 CURMAD('E', PL_thisstuff);
9242 CURMAD('Q', PL_thisclose);
9243 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9244 }
9245#endif
79072805 9246
3280af22 9247 first_start = PL_multi_start;
d24ca0c5 9248 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9249 if (!s) {
37fd879b 9250 if (PL_lex_stuff) {
3280af22 9251 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9252 PL_lex_stuff = NULL;
37fd879b 9253 }
cea2e8a9 9254 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9255 }
3280af22 9256 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9257
79072805 9258 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9259
9260#ifdef PERL_MAD
9261 if (PL_madskills) {
cd81e915
NC
9262 CURMAD('z', PL_thisopen);
9263 CURMAD('R', PL_thisstuff);
9264 CURMAD('Z', PL_thisclose);
5db06880
NC
9265 }
9266 modstart = s;
9267#endif
9268
48c036b1 9269 while (*s) {
a20207d7 9270 if (*s == EXEC_PAT_MOD) {
a687059c 9271 s++;
2f3197b3 9272 es++;
a687059c 9273 }
3955e1a9
KW
9274 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9275 {
48c036b1 9276 break;
aa78b661 9277 }
378cc40b 9278 }
79072805 9279
5db06880
NC
9280#ifdef PERL_MAD
9281 if (PL_madskills) {
9282 if (modstart != s)
9283 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9284 append_madprops(PL_thismad, (OP*)pm, 0);
9285 PL_thismad = 0;
5db06880
NC
9286 }
9287#endif
a2a5de95
NC
9288 if ((pm->op_pmflags & PMf_CONTINUE)) {
9289 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9290 }
9291
79072805 9292 if (es) {
6136c704
AL
9293 SV * const repl = newSVpvs("");
9294
0244c3a4
GS
9295 PL_sublex_info.super_bufptr = s;
9296 PL_sublex_info.super_bufend = PL_bufend;
9297 PL_multi_end = 0;
79072805 9298 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9299 while (es-- > 0) {
9300 if (es)
9301 sv_catpvs(repl, "eval ");
9302 else
9303 sv_catpvs(repl, "do ");
9304 }
6f43d98f 9305 sv_catpvs(repl, "{");
3280af22 9306 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9307 if (strchr(SvPVX(PL_lex_repl), '#'))
9308 sv_catpvs(repl, "\n");
9309 sv_catpvs(repl, "}");
25da4f38 9310 SvEVALED_on(repl);
3280af22
NIS
9311 SvREFCNT_dec(PL_lex_repl);
9312 PL_lex_repl = repl;
378cc40b 9313 }
79072805 9314
3280af22 9315 PL_lex_op = (OP*)pm;
6154021b 9316 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9317 return s;
9318}
9319
76e3520e 9320STATIC char *
cea2e8a9 9321S_scan_trans(pTHX_ char *start)
378cc40b 9322{
97aff369 9323 dVAR;
a0d0e21e 9324 register char* s;
11343788 9325 OP *o;
b84c11c8
NC
9326 U8 squash;
9327 U8 del;
9328 U8 complement;
bb16bae8 9329 bool nondestruct = 0;
5db06880
NC
9330#ifdef PERL_MAD
9331 char *modstart;
9332#endif
79072805 9333
7918f24d
NC
9334 PERL_ARGS_ASSERT_SCAN_TRANS;
9335
6154021b 9336 pl_yylval.ival = OP_NULL;
79072805 9337
d24ca0c5 9338 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
37fd879b 9339 if (!s)
cea2e8a9 9340 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9341
3280af22 9342 if (s[-1] == PL_multi_open)
2f3197b3 9343 s--;
5db06880
NC
9344#ifdef PERL_MAD
9345 if (PL_madskills) {
cd81e915
NC
9346 CURMAD('q', PL_thisopen);
9347 CURMAD('_', PL_thiswhite);
9348 CURMAD('E', PL_thisstuff);
9349 CURMAD('Q', PL_thisclose);
9350 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9351 }
9352#endif
2f3197b3 9353
d24ca0c5 9354 s = scan_str(s,!!PL_madskills,FALSE,FALSE);
79072805 9355 if (!s) {
37fd879b 9356 if (PL_lex_stuff) {
3280af22 9357 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9358 PL_lex_stuff = NULL;
37fd879b 9359 }
cea2e8a9 9360 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9361 }
5db06880 9362 if (PL_madskills) {
cd81e915
NC
9363 CURMAD('z', PL_thisopen);
9364 CURMAD('R', PL_thisstuff);
9365 CURMAD('Z', PL_thisclose);
5db06880 9366 }
79072805 9367
a0ed51b3 9368 complement = del = squash = 0;
5db06880
NC
9369#ifdef PERL_MAD
9370 modstart = s;
9371#endif
7a1e2023
NC
9372 while (1) {
9373 switch (*s) {
9374 case 'c':
79072805 9375 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9376 break;
9377 case 'd':
a0ed51b3 9378 del = OPpTRANS_DELETE;
7a1e2023
NC
9379 break;
9380 case 's':
79072805 9381 squash = OPpTRANS_SQUASH;
7a1e2023 9382 break;
bb16bae8
FC
9383 case 'r':
9384 nondestruct = 1;
9385 break;
7a1e2023
NC
9386 default:
9387 goto no_more;
9388 }
395c3793
LW
9389 s++;
9390 }
7a1e2023 9391 no_more:
8973db79 9392
9100eeb1 9393 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)NULL);
59f00321
RGS
9394 o->op_private &= ~OPpTRANS_ALL;
9395 o->op_private |= del|squash|complement|
7948272d
NIS
9396 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9397 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9398
3280af22 9399 PL_lex_op = o;
bb16bae8 9400 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9401
9402#ifdef PERL_MAD
9403 if (PL_madskills) {
9404 if (modstart != s)
9405 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9406 append_madprops(PL_thismad, o, 0);
9407 PL_thismad = 0;
5db06880
NC
9408 }
9409#endif
9410
79072805
LW
9411 return s;
9412}
9413
76e3520e 9414STATIC char *
cea2e8a9 9415S_scan_heredoc(pTHX_ register char *s)
79072805 9416{
97aff369 9417 dVAR;
79072805
LW
9418 SV *herewas;
9419 I32 op_type = OP_SCALAR;
9420 I32 len;
9421 SV *tmpstr;
9422 char term;
73d840c0 9423 const char *found_newline;
79072805 9424 register char *d;
fc36a67e 9425 register char *e;
4633a7c4 9426 char *peek;
60d63348
FC
9427 const int outer = (PL_rsfp || PL_parser->filtered)
9428 && !(PL_lex_inwhat == OP_SCALAR);
5db06880
NC
9429#ifdef PERL_MAD
9430 I32 stuffstart = s - SvPVX(PL_linestr);
9431 char *tstart;
9432
cd81e915 9433 PL_realtokenstart = -1;
5db06880 9434#endif
79072805 9435
7918f24d
NC
9436 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9437
79072805 9438 s += 2;
3280af22
NIS
9439 d = PL_tokenbuf;
9440 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9441 if (!outer)
79072805 9442 *d++ = '\n';
c35e046a
AL
9443 peek = s;
9444 while (SPACE_OR_TAB(*peek))
9445 peek++;
3792a11b 9446 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9447 s = peek;
79072805 9448 term = *s++;
3280af22 9449 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9450 d += len;
3280af22 9451 if (s < PL_bufend)
79072805 9452 s++;
79072805
LW
9453 }
9454 else {
9455 if (*s == '\\')
9456 s++, term = '\'';
9457 else
9458 term = '"';
7e2040f0 9459 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9460 deprecate("bare << to mean <<\"\"");
7e2040f0 9461 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9462 if (d < e)
9463 *d++ = *s;
9464 }
9465 }
3280af22 9466 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9467 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9468 *d++ = '\n';
9469 *d = '\0';
3280af22 9470 len = d - PL_tokenbuf;
5db06880
NC
9471
9472#ifdef PERL_MAD
9473 if (PL_madskills) {
9474 tstart = PL_tokenbuf + !outer;
cd81e915 9475 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9476 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9477 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9478 stuffstart = s - SvPVX(PL_linestr);
9479 }
9480#endif
6a27c188 9481#ifndef PERL_STRICT_CR
f63a84b2
LW
9482 d = strchr(s, '\r');
9483 if (d) {
b464bac0 9484 char * const olds = s;
f63a84b2 9485 s = d;
3280af22 9486 while (s < PL_bufend) {
f63a84b2
LW
9487 if (*s == '\r') {
9488 *d++ = '\n';
9489 if (*++s == '\n')
9490 s++;
9491 }
9492 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9493 *d++ = *s++;
9494 s++;
9495 }
9496 else
9497 *d++ = *s++;
9498 }
9499 *d = '\0';
3280af22 9500 PL_bufend = d;
95a20fc0 9501 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9502 s = olds;
9503 }
9504#endif
5db06880
NC
9505#ifdef PERL_MAD
9506 found_newline = 0;
9507#endif
10edeb5d 9508 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9509 herewas = newSVpvn(s,PL_bufend-s);
9510 }
9511 else {
5db06880
NC
9512#ifdef PERL_MAD
9513 herewas = newSVpvn(s-1,found_newline-s+1);
9514#else
73d840c0
AL
9515 s--;
9516 herewas = newSVpvn(s,found_newline-s);
5db06880 9517#endif
73d840c0 9518 }
5db06880
NC
9519#ifdef PERL_MAD
9520 if (PL_madskills) {
9521 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9522 if (PL_thisstuff)
9523 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9524 else
cd81e915 9525 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9526 }
9527#endif
79072805 9528 s += SvCUR(herewas);
748a9306 9529
5db06880
NC
9530#ifdef PERL_MAD
9531 stuffstart = s - SvPVX(PL_linestr);
9532
9533 if (found_newline)
9534 s--;
9535#endif
9536
7d0a29fe
NC
9537 tmpstr = newSV_type(SVt_PVIV);
9538 SvGROW(tmpstr, 80);
748a9306 9539 if (term == '\'') {
79072805 9540 op_type = OP_CONST;
45977657 9541 SvIV_set(tmpstr, -1);
748a9306
LW
9542 }
9543 else if (term == '`') {
79072805 9544 op_type = OP_BACKTICK;
45977657 9545 SvIV_set(tmpstr, '\\');
748a9306 9546 }
79072805
LW
9547
9548 CLINE;
57843af0 9549 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9550 PL_multi_open = PL_multi_close = '<';
9551 term = *PL_tokenbuf;
60d63348
FC
9552 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
9553 && !PL_parser->filtered) {
6136c704
AL
9554 char * const bufptr = PL_sublex_info.super_bufptr;
9555 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9556 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9557 s = strchr(bufptr, '\n');
9558 if (!s)
9559 s = bufend;
9560 d = s;
9561 while (s < bufend &&
9562 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9563 if (*s++ == '\n')
57843af0 9564 CopLINE_inc(PL_curcop);
0244c3a4
GS
9565 }
9566 if (s >= bufend) {
eb160463 9567 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9568 missingterm(PL_tokenbuf);
9569 }
9570 sv_setpvn(herewas,bufptr,d-bufptr+1);
9571 sv_setpvn(tmpstr,d+1,s-d);
9572 s += len - 1;
9573 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9574 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9575
9576 s = olds;
9577 goto retval;
9578 }
9579 else if (!outer) {
79072805 9580 d = s;
3280af22
NIS
9581 while (s < PL_bufend &&
9582 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9583 if (*s++ == '\n')
57843af0 9584 CopLINE_inc(PL_curcop);
79072805 9585 }
3280af22 9586 if (s >= PL_bufend) {
eb160463 9587 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9588 missingterm(PL_tokenbuf);
79072805
LW
9589 }
9590 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9591#ifdef PERL_MAD
9592 if (PL_madskills) {
cd81e915
NC
9593 if (PL_thisstuff)
9594 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9595 else
cd81e915 9596 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9597 stuffstart = s - SvPVX(PL_linestr);
9598 }
9599#endif
79072805 9600 s += len - 1;
57843af0 9601 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9602
3280af22
NIS
9603 sv_catpvn(herewas,s,PL_bufend-s);
9604 sv_setsv(PL_linestr,herewas);
9605 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9606 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9607 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9608 }
9609 else
76f68e9b 9610 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9611 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9612#ifdef PERL_MAD
9613 if (PL_madskills) {
9614 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9615 if (PL_thisstuff)
9616 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9617 else
cd81e915 9618 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9619 }
9620#endif
f0e67a1d 9621 PL_bufptr = s;
17cc9359 9622 CopLINE_inc(PL_curcop);
f0e67a1d 9623 if (!outer || !lex_next_chunk(0)) {
eb160463 9624 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9625 missingterm(PL_tokenbuf);
79072805 9626 }
17cc9359 9627 CopLINE_dec(PL_curcop);
f0e67a1d 9628 s = PL_bufptr;
5db06880
NC
9629#ifdef PERL_MAD
9630 stuffstart = s - SvPVX(PL_linestr);
9631#endif
57843af0 9632 CopLINE_inc(PL_curcop);
3280af22 9633 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9634 PL_last_lop = PL_last_uni = NULL;
6a27c188 9635#ifndef PERL_STRICT_CR
3280af22 9636 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9637 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9638 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9639 {
3280af22
NIS
9640 PL_bufend[-2] = '\n';
9641 PL_bufend--;
95a20fc0 9642 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9643 }
3280af22
NIS
9644 else if (PL_bufend[-1] == '\r')
9645 PL_bufend[-1] = '\n';
f63a84b2 9646 }
3280af22
NIS
9647 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9648 PL_bufend[-1] = '\n';
f63a84b2 9649#endif
3280af22 9650 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9651 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9652 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9653 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9654 sv_catsv(PL_linestr,herewas);
9655 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9656 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9657 }
9658 else {
3280af22
NIS
9659 s = PL_bufend;
9660 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9661 }
9662 }
79072805 9663 s++;
0244c3a4 9664retval:
57843af0 9665 PL_multi_end = CopLINE(PL_curcop);
79072805 9666 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9667 SvPV_shrink_to_cur(tmpstr);
79072805 9668 }
8990e307 9669 SvREFCNT_dec(herewas);
2f31ce75 9670 if (!IN_BYTES) {
95a20fc0 9671 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9672 SvUTF8_on(tmpstr);
9673 else if (PL_encoding)
9674 sv_recode_to_utf8(tmpstr, PL_encoding);
9675 }
3280af22 9676 PL_lex_stuff = tmpstr;
6154021b 9677 pl_yylval.ival = op_type;
79072805
LW
9678 return s;
9679}
9680
02aa26ce
NT
9681/* scan_inputsymbol
9682 takes: current position in input buffer
9683 returns: new position in input buffer
6154021b 9684 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9685
9686 This code handles:
9687
9688 <> read from ARGV
9689 <FH> read from filehandle
9690 <pkg::FH> read from package qualified filehandle
9691 <pkg'FH> read from package qualified filehandle
9692 <$fh> read from filehandle in $fh
9693 <*.h> filename glob
9694
9695*/
9696
76e3520e 9697STATIC char *
cea2e8a9 9698S_scan_inputsymbol(pTHX_ char *start)
79072805 9699{
97aff369 9700 dVAR;
02aa26ce 9701 register char *s = start; /* current position in buffer */
1b420867 9702 char *end;
79072805 9703 I32 len;
6136c704
AL
9704 char *d = PL_tokenbuf; /* start of temp holding space */
9705 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9706
7918f24d
NC
9707 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9708
1b420867
GS
9709 end = strchr(s, '\n');
9710 if (!end)
9711 end = PL_bufend;
9712 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9713
9714 /* die if we didn't have space for the contents of the <>,
1b420867 9715 or if it didn't end, or if we see a newline
02aa26ce
NT
9716 */
9717
bb7a0f54 9718 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9719 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9720 if (s >= end)
cea2e8a9 9721 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9722
fc36a67e 9723 s++;
02aa26ce
NT
9724
9725 /* check for <$fh>
9726 Remember, only scalar variables are interpreted as filehandles by
9727 this code. Anything more complex (e.g., <$fh{$num}>) will be
9728 treated as a glob() call.
9729 This code makes use of the fact that except for the $ at the front,
9730 a scalar variable and a filehandle look the same.
9731 */
4633a7c4 9732 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9733
9734 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9735 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 9736 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
9737
9738 /* If we've tried to read what we allow filehandles to look like, and
9739 there's still text left, then it must be a glob() and not a getline.
9740 Use scan_str to pull out the stuff between the <> and treat it
9741 as nothing more than a string.
9742 */
9743
3280af22 9744 if (d - PL_tokenbuf != len) {
6154021b 9745 pl_yylval.ival = OP_GLOB;
d24ca0c5 9746 s = scan_str(start,!!PL_madskills,FALSE,FALSE);
79072805 9747 if (!s)
cea2e8a9 9748 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9749 return s;
9750 }
395c3793 9751 else {
9b3023bc 9752 bool readline_overriden = FALSE;
6136c704 9753 GV *gv_readline;
9b3023bc 9754 GV **gvp;
02aa26ce 9755 /* we're in a filehandle read situation */
3280af22 9756 d = PL_tokenbuf;
02aa26ce
NT
9757
9758 /* turn <> into <ARGV> */
79072805 9759 if (!len)
689badd5 9760 Copy("ARGV",d,5,char);
02aa26ce 9761
9b3023bc 9762 /* Check whether readline() is overriden */
fafc274c 9763 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9764 if ((gv_readline
ba979b31 9765 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9766 ||
017a3ce5 9767 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9768 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9769 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9770 readline_overriden = TRUE;
9771
02aa26ce
NT
9772 /* if <$fh>, create the ops to turn the variable into a
9773 filehandle
9774 */
79072805 9775 if (*d == '$') {
02aa26ce
NT
9776 /* try to find it in the pad for this block, otherwise find
9777 add symbol table ops
9778 */
bc9b26ca 9779 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9780 if (tmp != NOT_IN_PAD) {
00b1698f 9781 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9782 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9783 HEK * const stashname = HvNAME_HEK(stash);
9784 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9785 sv_catpvs(sym, "::");
f558d5af
JH
9786 sv_catpv(sym, d+1);
9787 d = SvPVX(sym);
9788 goto intro_sym;
9789 }
9790 else {
6136c704 9791 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9792 o->op_targ = tmp;
9b3023bc
RGS
9793 PL_lex_op = readline_overriden
9794 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9795 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9796 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9797 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9798 }
a0d0e21e
LW
9799 }
9800 else {
f558d5af
JH
9801 GV *gv;
9802 ++d;
9803intro_sym:
9804 gv = gv_fetchpv(d,
9805 (PL_in_eval
9806 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 9807 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 9808 SVt_PV);
9b3023bc
RGS
9809 PL_lex_op = readline_overriden
9810 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9811 op_append_elem(OP_LIST,
9b3023bc
RGS
9812 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9813 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9814 : (OP*)newUNOP(OP_READLINE, 0,
9815 newUNOP(OP_RV2SV, 0,
9816 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9817 }
7c6fadd6
RGS
9818 if (!readline_overriden)
9819 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9820 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9821 pl_yylval.ival = OP_NULL;
79072805 9822 }
02aa26ce
NT
9823
9824 /* If it's none of the above, it must be a literal filehandle
9825 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9826 else {
25db2ea6 9827 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
9828 PL_lex_op = readline_overriden
9829 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9830 op_append_elem(OP_LIST,
9b3023bc
RGS
9831 newGVOP(OP_GV, 0, gv),
9832 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9833 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9834 pl_yylval.ival = OP_NULL;
79072805
LW
9835 }
9836 }
02aa26ce 9837
79072805
LW
9838 return s;
9839}
9840
02aa26ce
NT
9841
9842/* scan_str
9843 takes: start position in buffer
09bef843
SB
9844 keep_quoted preserve \ on the embedded delimiter(s)
9845 keep_delims preserve the delimiters around the string
d24ca0c5
DM
9846 re_reparse compiling a run-time /(?{})/:
9847 collapse // to /, and skip encoding src
02aa26ce
NT
9848 returns: position to continue reading from buffer
9849 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9850 updates the read buffer.
9851
9852 This subroutine pulls a string out of the input. It is called for:
9853 q single quotes q(literal text)
9854 ' single quotes 'literal text'
9855 qq double quotes qq(interpolate $here please)
9856 " double quotes "interpolate $here please"
9857 qx backticks qx(/bin/ls -l)
9858 ` backticks `/bin/ls -l`
9859 qw quote words @EXPORT_OK = qw( func() $spam )
9860 m// regexp match m/this/
9861 s/// regexp substitute s/this/that/
9862 tr/// string transliterate tr/this/that/
9863 y/// string transliterate y/this/that/
9864 ($*@) sub prototypes sub foo ($)
09bef843 9865 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9866 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9867
9868 In most of these cases (all but <>, patterns and transliterate)
9869 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9870 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9871 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9872 calls scan_str().
4e553d73 9873
02aa26ce
NT
9874 It skips whitespace before the string starts, and treats the first
9875 character as the delimiter. If the delimiter is one of ([{< then
9876 the corresponding "close" character )]}> is used as the closing
9877 delimiter. It allows quoting of delimiters, and if the string has
9878 balanced delimiters ([{<>}]) it allows nesting.
9879
37fd879b
HS
9880 On success, the SV with the resulting string is put into lex_stuff or,
9881 if that is already non-NULL, into lex_repl. The second case occurs only
9882 when parsing the RHS of the special constructs s/// and tr/// (y///).
9883 For convenience, the terminating delimiter character is stuffed into
9884 SvIVX of the SV.
02aa26ce
NT
9885*/
9886
76e3520e 9887STATIC char *
d24ca0c5 9888S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims, int re_reparse)
79072805 9889{
97aff369 9890 dVAR;
02aa26ce 9891 SV *sv; /* scalar value: string */
d3fcec1f 9892 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9893 register char *s = start; /* current position in the buffer */
9894 register char term; /* terminating character */
9895 register char *to; /* current position in the sv's data */
9896 I32 brackets = 1; /* bracket nesting level */
89491803 9897 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9898 I32 termcode; /* terminating char. code */
89ebb4a3 9899 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9900 STRLEN termlen; /* length of terminating string */
0331ef07 9901 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9902#ifdef PERL_MAD
9903 int stuffstart;
9904 char *tstart;
9905#endif
02aa26ce 9906
7918f24d
NC
9907 PERL_ARGS_ASSERT_SCAN_STR;
9908
02aa26ce 9909 /* skip space before the delimiter */
29595ff2
NC
9910 if (isSPACE(*s)) {
9911 s = PEEKSPACE(s);
9912 }
02aa26ce 9913
5db06880 9914#ifdef PERL_MAD
cd81e915
NC
9915 if (PL_realtokenstart >= 0) {
9916 stuffstart = PL_realtokenstart;
9917 PL_realtokenstart = -1;
5db06880
NC
9918 }
9919 else
9920 stuffstart = start - SvPVX(PL_linestr);
9921#endif
02aa26ce 9922 /* mark where we are, in case we need to report errors */
79072805 9923 CLINE;
02aa26ce
NT
9924
9925 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9926 term = *s;
220e2d4e
IH
9927 if (!UTF) {
9928 termcode = termstr[0] = term;
9929 termlen = 1;
9930 }
9931 else {
4b88fb76 9932 termcode = utf8_to_uvchr_buf((U8*)s, (U8*)PL_bufend, &termlen);
220e2d4e
IH
9933 Copy(s, termstr, termlen, U8);
9934 if (!UTF8_IS_INVARIANT(term))
9935 has_utf8 = TRUE;
9936 }
b1c7b182 9937
02aa26ce 9938 /* mark where we are */
57843af0 9939 PL_multi_start = CopLINE(PL_curcop);
3280af22 9940 PL_multi_open = term;
02aa26ce
NT
9941
9942 /* find corresponding closing delimiter */
93a17b20 9943 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9944 termcode = termstr[0] = term = tmps[5];
9945
3280af22 9946 PL_multi_close = term;
79072805 9947
561b68a9
SH
9948 /* create a new SV to hold the contents. 79 is the SV's initial length.
9949 What a random number. */
7d0a29fe
NC
9950 sv = newSV_type(SVt_PVIV);
9951 SvGROW(sv, 80);
45977657 9952 SvIV_set(sv, termcode);
a0d0e21e 9953 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9954
9955 /* move past delimiter and try to read a complete string */
09bef843 9956 if (keep_delims)
220e2d4e
IH
9957 sv_catpvn(sv, s, termlen);
9958 s += termlen;
5db06880
NC
9959#ifdef PERL_MAD
9960 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9961 if (!PL_thisopen && !keep_delims) {
9962 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9963 stuffstart = s - SvPVX(PL_linestr);
9964 }
9965#endif
93a17b20 9966 for (;;) {
d24ca0c5 9967 if (PL_encoding && !UTF && !re_reparse) {
220e2d4e
IH
9968 bool cont = TRUE;
9969
9970 while (cont) {
95a20fc0 9971 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9972 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9973 &offset, (char*)termstr, termlen);
6136c704
AL
9974 const char * const ns = SvPVX_const(PL_linestr) + offset;
9975 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9976
9977 for (; s < ns; s++) {
60d63348 9978 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
220e2d4e
IH
9979 CopLINE_inc(PL_curcop);
9980 }
9981 if (!found)
9982 goto read_more_line;
9983 else {
9984 /* handle quoted delimiters */
52327caf 9985 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9986 const char *t;
95a20fc0 9987 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9988 t--;
9989 if ((svlast-1 - t) % 2) {
9990 if (!keep_quoted) {
9991 *(svlast-1) = term;
9992 *svlast = '\0';
9993 SvCUR_set(sv, SvCUR(sv) - 1);
9994 }
9995 continue;
9996 }
9997 }
9998 if (PL_multi_open == PL_multi_close) {
9999 cont = FALSE;
10000 }
10001 else {
f54cb97a
AL
10002 const char *t;
10003 char *w;
0331ef07 10004 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
10005 /* At here, all closes are "was quoted" one,
10006 so we don't check PL_multi_close. */
10007 if (*t == '\\') {
10008 if (!keep_quoted && *(t+1) == PL_multi_open)
10009 t++;
10010 else
10011 *w++ = *t++;
10012 }
10013 else if (*t == PL_multi_open)
10014 brackets++;
10015
10016 *w = *t;
10017 }
10018 if (w < t) {
10019 *w++ = term;
10020 *w = '\0';
95a20fc0 10021 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 10022 }
0331ef07 10023 last_off = w - SvPVX(sv);
220e2d4e
IH
10024 if (--brackets <= 0)
10025 cont = FALSE;
10026 }
10027 }
10028 }
10029 if (!keep_delims) {
10030 SvCUR_set(sv, SvCUR(sv) - 1);
10031 *SvEND(sv) = '\0';
10032 }
10033 break;
10034 }
10035
02aa26ce 10036 /* extend sv if need be */
3280af22 10037 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 10038 /* set 'to' to the next character in the sv's string */
463ee0b2 10039 to = SvPVX(sv)+SvCUR(sv);
09bef843 10040
02aa26ce 10041 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
10042 if (PL_multi_open == PL_multi_close) {
10043 for (; s < PL_bufend; s++,to++) {
02aa26ce 10044 /* embedded newlines increment the current line number */
60d63348 10045 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 10046 CopLINE_inc(PL_curcop);
02aa26ce 10047 /* handle quoted delimiters */
3280af22 10048 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
d24ca0c5
DM
10049 if (!keep_quoted
10050 && (s[1] == term
10051 || (re_reparse && s[1] == '\\'))
10052 )
a0d0e21e 10053 s++;
d24ca0c5 10054 /* any other quotes are simply copied straight through */
a0d0e21e
LW
10055 else
10056 *to++ = *s++;
10057 }
02aa26ce
NT
10058 /* terminate when run out of buffer (the for() condition), or
10059 have found the terminator */
220e2d4e
IH
10060 else if (*s == term) {
10061 if (termlen == 1)
10062 break;
f3b9ce0f 10063 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
10064 break;
10065 }
63cd0674 10066 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10067 has_utf8 = TRUE;
93a17b20
LW
10068 *to = *s;
10069 }
10070 }
02aa26ce
NT
10071
10072 /* if the terminator isn't the same as the start character (e.g.,
10073 matched brackets), we have to allow more in the quoting, and
10074 be prepared for nested brackets.
10075 */
93a17b20 10076 else {
02aa26ce 10077 /* read until we run out of string, or we find the terminator */
3280af22 10078 for (; s < PL_bufend; s++,to++) {
02aa26ce 10079 /* embedded newlines increment the line count */
60d63348 10080 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 10081 CopLINE_inc(PL_curcop);
02aa26ce 10082 /* backslashes can escape the open or closing characters */
3280af22 10083 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
10084 if (!keep_quoted &&
10085 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
10086 s++;
10087 else
10088 *to++ = *s++;
10089 }
02aa26ce 10090 /* allow nested opens and closes */
3280af22 10091 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 10092 break;
3280af22 10093 else if (*s == PL_multi_open)
93a17b20 10094 brackets++;
63cd0674 10095 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 10096 has_utf8 = TRUE;
93a17b20
LW
10097 *to = *s;
10098 }
10099 }
02aa26ce 10100 /* terminate the copied string and update the sv's end-of-string */
93a17b20 10101 *to = '\0';
95a20fc0 10102 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 10103
02aa26ce
NT
10104 /*
10105 * this next chunk reads more into the buffer if we're not done yet
10106 */
10107
b1c7b182
GS
10108 if (s < PL_bufend)
10109 break; /* handle case where we are done yet :-) */
79072805 10110
6a27c188 10111#ifndef PERL_STRICT_CR
95a20fc0 10112 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10113 if ((to[-2] == '\r' && to[-1] == '\n') ||
10114 (to[-2] == '\n' && to[-1] == '\r'))
10115 {
f63a84b2
LW
10116 to[-2] = '\n';
10117 to--;
95a20fc0 10118 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10119 }
10120 else if (to[-1] == '\r')
10121 to[-1] = '\n';
10122 }
95a20fc0 10123 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10124 to[-1] = '\n';
10125#endif
10126
220e2d4e 10127 read_more_line:
02aa26ce
NT
10128 /* if we're out of file, or a read fails, bail and reset the current
10129 line marker so we can report where the unterminated string began
10130 */
5db06880
NC
10131#ifdef PERL_MAD
10132 if (PL_madskills) {
c35e046a 10133 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10134 if (PL_thisstuff)
10135 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10136 else
cd81e915 10137 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10138 }
10139#endif
f0e67a1d
Z
10140 CopLINE_inc(PL_curcop);
10141 PL_bufptr = PL_bufend;
10142 if (!lex_next_chunk(0)) {
c07a80fd 10143 sv_free(sv);
eb160463 10144 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10145 return NULL;
79072805 10146 }
f0e67a1d 10147 s = PL_bufptr;
5db06880
NC
10148#ifdef PERL_MAD
10149 stuffstart = 0;
10150#endif
378cc40b 10151 }
4e553d73 10152
02aa26ce
NT
10153 /* at this point, we have successfully read the delimited string */
10154
d24ca0c5 10155 if (!PL_encoding || UTF || re_reparse) {
5db06880
NC
10156#ifdef PERL_MAD
10157 if (PL_madskills) {
c35e046a 10158 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10159 const int len = s - tstart;
cd81e915 10160 if (PL_thisstuff)
c35e046a 10161 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10162 else
c35e046a 10163 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10164 if (!PL_thisclose && !keep_delims)
10165 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10166 }
10167#endif
10168
220e2d4e
IH
10169 if (keep_delims)
10170 sv_catpvn(sv, s, termlen);
10171 s += termlen;
10172 }
5db06880
NC
10173#ifdef PERL_MAD
10174 else {
10175 if (PL_madskills) {
c35e046a
AL
10176 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10177 const int len = s - tstart - termlen;
cd81e915 10178 if (PL_thisstuff)
c35e046a 10179 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10180 else
c35e046a 10181 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10182 if (!PL_thisclose && !keep_delims)
10183 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10184 }
10185 }
10186#endif
d24ca0c5 10187 if (has_utf8 || (PL_encoding && !re_reparse))
b1c7b182 10188 SvUTF8_on(sv);
d0063567 10189
57843af0 10190 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10191
10192 /* if we allocated too much space, give some back */
93a17b20
LW
10193 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10194 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10195 SvPV_renew(sv, SvLEN(sv));
79072805 10196 }
02aa26ce
NT
10197
10198 /* decide whether this is the first or second quoted string we've read
10199 for this op
10200 */
4e553d73 10201
3280af22
NIS
10202 if (PL_lex_stuff)
10203 PL_lex_repl = sv;
79072805 10204 else
3280af22 10205 PL_lex_stuff = sv;
378cc40b
LW
10206 return s;
10207}
10208
02aa26ce
NT
10209/*
10210 scan_num
10211 takes: pointer to position in buffer
10212 returns: pointer to new position in buffer
6154021b 10213 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10214
10215 Read a number in any of the formats that Perl accepts:
10216
7fd134d9
JH
10217 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10218 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10219 0b[01](_?[01])*
10220 0[0-7](_?[0-7])*
10221 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10222
3280af22 10223 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10224 thing it reads.
10225
10226 If it reads a number without a decimal point or an exponent, it will
10227 try converting the number to an integer and see if it can do so
10228 without loss of precision.
10229*/
4e553d73 10230
378cc40b 10231char *
bfed75c6 10232Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10233{
97aff369 10234 dVAR;
bfed75c6 10235 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10236 register char *d; /* destination in temp buffer */
10237 register char *e; /* end of temp buffer */
86554af2 10238 NV nv; /* number read, as a double */
a0714e2c 10239 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10240 bool floatit; /* boolean: int or float? */
cbbf8932 10241 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10242 static char const number_too_long[] = "Number too long";
378cc40b 10243
7918f24d
NC
10244 PERL_ARGS_ASSERT_SCAN_NUM;
10245
02aa26ce
NT
10246 /* We use the first character to decide what type of number this is */
10247
378cc40b 10248 switch (*s) {
79072805 10249 default:
5637ef5b 10250 Perl_croak(aTHX_ "panic: scan_num, *s=%d", *s);
4e553d73 10251
02aa26ce 10252 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10253 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10254 case '0':
10255 {
02aa26ce
NT
10256 /* variables:
10257 u holds the "number so far"
4f19785b
WSI
10258 shift the power of 2 of the base
10259 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10260 overflowed was the number more than we can hold?
10261
10262 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10263 we in octal/hex/binary?" indicator to disallow hex characters
10264 when in octal mode.
02aa26ce 10265 */
9e24b6e2
JH
10266 NV n = 0.0;
10267 UV u = 0;
79072805 10268 I32 shift;
9e24b6e2 10269 bool overflowed = FALSE;
61f33854 10270 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10271 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10272 static const char* const bases[5] =
10273 { "", "binary", "", "octal", "hexadecimal" };
10274 static const char* const Bases[5] =
10275 { "", "Binary", "", "Octal", "Hexadecimal" };
10276 static const char* const maxima[5] =
10277 { "",
10278 "0b11111111111111111111111111111111",
10279 "",
10280 "037777777777",
10281 "0xffffffff" };
bfed75c6 10282 const char *base, *Base, *max;
378cc40b 10283
02aa26ce 10284 /* check for hex */
a674e8db 10285 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10286 shift = 4;
10287 s += 2;
61f33854 10288 just_zero = FALSE;
a674e8db 10289 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10290 shift = 1;
10291 s += 2;
61f33854 10292 just_zero = FALSE;
378cc40b 10293 }
02aa26ce 10294 /* check for a decimal in disguise */
b78218b7 10295 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10296 goto decimal;
02aa26ce 10297 /* so it must be octal */
928753ea 10298 else {
378cc40b 10299 shift = 3;
928753ea
JH
10300 s++;
10301 }
10302
10303 if (*s == '_') {
a2a5de95 10304 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10305 "Misplaced _ in number");
10306 lastub = s++;
10307 }
9e24b6e2
JH
10308
10309 base = bases[shift];
10310 Base = Bases[shift];
10311 max = maxima[shift];
02aa26ce 10312
4f19785b 10313 /* read the rest of the number */
378cc40b 10314 for (;;) {
9e24b6e2 10315 /* x is used in the overflow test,
893fe2c2 10316 b is the digit we're adding on. */
9e24b6e2 10317 UV x, b;
55497cff 10318
378cc40b 10319 switch (*s) {
02aa26ce
NT
10320
10321 /* if we don't mention it, we're done */
378cc40b
LW
10322 default:
10323 goto out;
02aa26ce 10324
928753ea 10325 /* _ are ignored -- but warned about if consecutive */
de3bb511 10326 case '_':
a2a5de95
NC
10327 if (lastub && s == lastub + 1)
10328 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10329 "Misplaced _ in number");
928753ea 10330 lastub = s++;
de3bb511 10331 break;
02aa26ce
NT
10332
10333 /* 8 and 9 are not octal */
378cc40b 10334 case '8': case '9':
4f19785b 10335 if (shift == 3)
cea2e8a9 10336 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10337 /* FALL THROUGH */
02aa26ce
NT
10338
10339 /* octal digits */
4f19785b 10340 case '2': case '3': case '4':
378cc40b 10341 case '5': case '6': case '7':
4f19785b 10342 if (shift == 1)
cea2e8a9 10343 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10344 /* FALL THROUGH */
10345
10346 case '0': case '1':
02aa26ce 10347 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10348 goto digit;
02aa26ce
NT
10349
10350 /* hex digits */
378cc40b
LW
10351 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10352 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10353 /* make sure they said 0x */
378cc40b
LW
10354 if (shift != 4)
10355 goto out;
55497cff 10356 b = (*s++ & 7) + 9;
02aa26ce
NT
10357
10358 /* Prepare to put the digit we have onto the end
10359 of the number so far. We check for overflows.
10360 */
10361
55497cff 10362 digit:
61f33854 10363 just_zero = FALSE;
9e24b6e2
JH
10364 if (!overflowed) {
10365 x = u << shift; /* make room for the digit */
10366
10367 if ((x >> shift) != u
10368 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10369 overflowed = TRUE;
10370 n = (NV) u;
9b387841
NC
10371 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10372 "Integer overflow in %s number",
10373 base);
9e24b6e2
JH
10374 } else
10375 u = x | b; /* add the digit to the end */
10376 }
10377 if (overflowed) {
10378 n *= nvshift[shift];
10379 /* If an NV has not enough bits in its
10380 * mantissa to represent an UV this summing of
10381 * small low-order numbers is a waste of time
10382 * (because the NV cannot preserve the
10383 * low-order bits anyway): we could just
10384 * remember when did we overflow and in the
10385 * end just multiply n by the right
10386 * amount. */
10387 n += (NV) b;
55497cff 10388 }
378cc40b
LW
10389 break;
10390 }
10391 }
02aa26ce
NT
10392
10393 /* if we get here, we had success: make a scalar value from
10394 the number.
10395 */
378cc40b 10396 out:
928753ea
JH
10397
10398 /* final misplaced underbar check */
10399 if (s[-1] == '_') {
a2a5de95 10400 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10401 }
10402
9e24b6e2 10403 if (overflowed) {
a2a5de95
NC
10404 if (n > 4294967295.0)
10405 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10406 "%s number > %s non-portable",
10407 Base, max);
b081dd7e 10408 sv = newSVnv(n);
9e24b6e2
JH
10409 }
10410 else {
15041a67 10411#if UVSIZE > 4
a2a5de95
NC
10412 if (u > 0xffffffff)
10413 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10414 "%s number > %s non-portable",
10415 Base, max);
2cc4c2dc 10416#endif
b081dd7e 10417 sv = newSVuv(u);
9e24b6e2 10418 }
61f33854 10419 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10420 sv = new_constant(start, s - start, "integer",
eb0d8d16 10421 sv, NULL, NULL, 0);
61f33854 10422 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10423 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10424 }
10425 break;
02aa26ce
NT
10426
10427 /*
10428 handle decimal numbers.
10429 we're also sent here when we read a 0 as the first digit
10430 */
378cc40b
LW
10431 case '1': case '2': case '3': case '4': case '5':
10432 case '6': case '7': case '8': case '9': case '.':
10433 decimal:
3280af22
NIS
10434 d = PL_tokenbuf;
10435 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10436 floatit = FALSE;
02aa26ce
NT
10437
10438 /* read next group of digits and _ and copy into d */
de3bb511 10439 while (isDIGIT(*s) || *s == '_') {
4e553d73 10440 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10441 if -w is on
10442 */
93a17b20 10443 if (*s == '_') {
a2a5de95
NC
10444 if (lastub && s == lastub + 1)
10445 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10446 "Misplaced _ in number");
928753ea 10447 lastub = s++;
93a17b20 10448 }
fc36a67e 10449 else {
02aa26ce 10450 /* check for end of fixed-length buffer */
fc36a67e 10451 if (d >= e)
cea2e8a9 10452 Perl_croak(aTHX_ number_too_long);
02aa26ce 10453 /* if we're ok, copy the character */
378cc40b 10454 *d++ = *s++;
fc36a67e 10455 }
378cc40b 10456 }
02aa26ce
NT
10457
10458 /* final misplaced underbar check */
928753ea 10459 if (lastub && s == lastub + 1) {
a2a5de95 10460 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10461 }
02aa26ce
NT
10462
10463 /* read a decimal portion if there is one. avoid
10464 3..5 being interpreted as the number 3. followed
10465 by .5
10466 */
2f3197b3 10467 if (*s == '.' && s[1] != '.') {
79072805 10468 floatit = TRUE;
378cc40b 10469 *d++ = *s++;
02aa26ce 10470
928753ea 10471 if (*s == '_') {
a2a5de95
NC
10472 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10473 "Misplaced _ in number");
928753ea
JH
10474 lastub = s;
10475 }
10476
10477 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10478 */
fc36a67e 10479 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10480 /* fixed length buffer check */
fc36a67e 10481 if (d >= e)
cea2e8a9 10482 Perl_croak(aTHX_ number_too_long);
928753ea 10483 if (*s == '_') {
a2a5de95
NC
10484 if (lastub && s == lastub + 1)
10485 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10486 "Misplaced _ in number");
928753ea
JH
10487 lastub = s;
10488 }
10489 else
fc36a67e 10490 *d++ = *s;
378cc40b 10491 }
928753ea
JH
10492 /* fractional part ending in underbar? */
10493 if (s[-1] == '_') {
a2a5de95
NC
10494 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10495 "Misplaced _ in number");
928753ea 10496 }
dd629d5b
GS
10497 if (*s == '.' && isDIGIT(s[1])) {
10498 /* oops, it's really a v-string, but without the "v" */
f4758303 10499 s = start;
dd629d5b
GS
10500 goto vstring;
10501 }
378cc40b 10502 }
02aa26ce
NT
10503
10504 /* read exponent part, if present */
3792a11b 10505 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10506 floatit = TRUE;
10507 s++;
02aa26ce
NT
10508
10509 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10510 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10511
7fd134d9
JH
10512 /* stray preinitial _ */
10513 if (*s == '_') {
a2a5de95
NC
10514 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10515 "Misplaced _ in number");
7fd134d9
JH
10516 lastub = s++;
10517 }
10518
02aa26ce 10519 /* allow positive or negative exponent */
378cc40b
LW
10520 if (*s == '+' || *s == '-')
10521 *d++ = *s++;
02aa26ce 10522
7fd134d9
JH
10523 /* stray initial _ */
10524 if (*s == '_') {
a2a5de95
NC
10525 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10526 "Misplaced _ in number");
7fd134d9
JH
10527 lastub = s++;
10528 }
10529
7fd134d9
JH
10530 /* read digits of exponent */
10531 while (isDIGIT(*s) || *s == '_') {
10532 if (isDIGIT(*s)) {
10533 if (d >= e)
10534 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10535 *d++ = *s++;
7fd134d9
JH
10536 }
10537 else {
041457d9 10538 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10539 (!isDIGIT(s[1]) && s[1] != '_')))
10540 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10541 "Misplaced _ in number");
b3b48e3e 10542 lastub = s++;
7fd134d9 10543 }
7fd134d9 10544 }
378cc40b 10545 }
02aa26ce 10546
02aa26ce 10547
0b7fceb9 10548 /*
58bb9ec3
NC
10549 We try to do an integer conversion first if no characters
10550 indicating "float" have been found.
0b7fceb9
MU
10551 */
10552
10553 if (!floatit) {
58bb9ec3 10554 UV uv;
6136c704 10555 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10556
10557 if (flags == IS_NUMBER_IN_UV) {
10558 if (uv <= IV_MAX)
b081dd7e 10559 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10560 else
b081dd7e 10561 sv = newSVuv(uv);
58bb9ec3
NC
10562 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10563 if (uv <= (UV) IV_MIN)
b081dd7e 10564 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10565 else
10566 floatit = TRUE;
10567 } else
10568 floatit = TRUE;
10569 }
0b7fceb9 10570 if (floatit) {
58bb9ec3
NC
10571 /* terminate the string */
10572 *d = '\0';
86554af2 10573 nv = Atof(PL_tokenbuf);
b081dd7e 10574 sv = newSVnv(nv);
86554af2 10575 }
86554af2 10576
eb0d8d16
NC
10577 if ( floatit
10578 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10579 const char *const key = floatit ? "float" : "integer";
10580 const STRLEN keylen = floatit ? 5 : 7;
10581 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10582 key, keylen, sv, NULL, NULL, 0);
10583 }
378cc40b 10584 break;
0b7fceb9 10585
e312add1 10586 /* if it starts with a v, it could be a v-string */
a7cb1f99 10587 case 'v':
dd629d5b 10588vstring:
561b68a9 10589 sv = newSV(5); /* preallocate storage space */
65b06e02 10590 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10591 break;
79072805 10592 }
a687059c 10593
02aa26ce
NT
10594 /* make the op for the constant and return */
10595
a86a20aa 10596 if (sv)
b73d6f50 10597 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10598 else
5f66b61c 10599 lvalp->opval = NULL;
a687059c 10600
73d840c0 10601 return (char *)s;
378cc40b
LW
10602}
10603
76e3520e 10604STATIC char *
cea2e8a9 10605S_scan_formline(pTHX_ register char *s)
378cc40b 10606{
97aff369 10607 dVAR;
79072805 10608 register char *eol;
378cc40b 10609 register char *t;
6136c704 10610 SV * const stuff = newSVpvs("");
79072805 10611 bool needargs = FALSE;
c5ee2135 10612 bool eofmt = FALSE;
5db06880
NC
10613#ifdef PERL_MAD
10614 char *tokenstart = s;
4f61fd4b
JC
10615 SV* savewhite = NULL;
10616
5db06880 10617 if (PL_madskills) {
cd81e915
NC
10618 savewhite = PL_thiswhite;
10619 PL_thiswhite = 0;
5db06880
NC
10620 }
10621#endif
378cc40b 10622
7918f24d
NC
10623 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10624
79072805 10625 while (!needargs) {
a1b95068 10626 if (*s == '.') {
c35e046a 10627 t = s+1;
51882d45 10628#ifdef PERL_STRICT_CR
c35e046a
AL
10629 while (SPACE_OR_TAB(*t))
10630 t++;
51882d45 10631#else
c35e046a
AL
10632 while (SPACE_OR_TAB(*t) || *t == '\r')
10633 t++;
51882d45 10634#endif
c5ee2135
WL
10635 if (*t == '\n' || t == PL_bufend) {
10636 eofmt = TRUE;
79072805 10637 break;
c5ee2135 10638 }
79072805 10639 }
60d63348 10640 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
07409e01 10641 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10642 if (!eol++)
3280af22 10643 eol = PL_bufend;
0f85fab0
LW
10644 }
10645 else
3280af22 10646 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10647 if (*s != '#') {
a0d0e21e
LW
10648 for (t = s; t < eol; t++) {
10649 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10650 needargs = FALSE;
10651 goto enough; /* ~~ must be first line in formline */
378cc40b 10652 }
a0d0e21e
LW
10653 if (*t == '@' || *t == '^')
10654 needargs = TRUE;
378cc40b 10655 }
7121b347
MG
10656 if (eol > s) {
10657 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10658#ifndef PERL_STRICT_CR
7121b347
MG
10659 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10660 char *end = SvPVX(stuff) + SvCUR(stuff);
10661 end[-2] = '\n';
10662 end[-1] = '\0';
b162af07 10663 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10664 }
2dc4c65b 10665#endif
7121b347
MG
10666 }
10667 else
10668 break;
79072805 10669 }
95a20fc0 10670 s = (char*)eol;
60d63348 10671 if (PL_rsfp || PL_parser->filtered) {
f0e67a1d 10672 bool got_some;
5db06880
NC
10673#ifdef PERL_MAD
10674 if (PL_madskills) {
cd81e915
NC
10675 if (PL_thistoken)
10676 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10677 else
cd81e915 10678 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10679 }
10680#endif
f0e67a1d
Z
10681 PL_bufptr = PL_bufend;
10682 CopLINE_inc(PL_curcop);
10683 got_some = lex_next_chunk(0);
10684 CopLINE_dec(PL_curcop);
10685 s = PL_bufptr;
5db06880 10686#ifdef PERL_MAD
f0e67a1d 10687 tokenstart = PL_bufptr;
5db06880 10688#endif
f0e67a1d 10689 if (!got_some)
378cc40b 10690 break;
378cc40b 10691 }
463ee0b2 10692 incline(s);
79072805 10693 }
a0d0e21e
LW
10694 enough:
10695 if (SvCUR(stuff)) {
3280af22 10696 PL_expect = XTERM;
79072805 10697 if (needargs) {
3280af22 10698 PL_lex_state = LEX_NORMAL;
cd81e915 10699 start_force(PL_curforce);
9ded7720 10700 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10701 force_next(',');
10702 }
a0d0e21e 10703 else
3280af22 10704 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10705 if (!IN_BYTES) {
95a20fc0 10706 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10707 SvUTF8_on(stuff);
10708 else if (PL_encoding)
10709 sv_recode_to_utf8(stuff, PL_encoding);
10710 }
cd81e915 10711 start_force(PL_curforce);
9ded7720 10712 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10713 force_next(THING);
cd81e915 10714 start_force(PL_curforce);
9ded7720 10715 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10716 force_next(LSTOP);
378cc40b 10717 }
79072805 10718 else {
8990e307 10719 SvREFCNT_dec(stuff);
c5ee2135
WL
10720 if (eofmt)
10721 PL_lex_formbrack = 0;
3280af22 10722 PL_bufptr = s;
79072805 10723 }
5db06880
NC
10724#ifdef PERL_MAD
10725 if (PL_madskills) {
cd81e915
NC
10726 if (PL_thistoken)
10727 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10728 else
cd81e915
NC
10729 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10730 PL_thiswhite = savewhite;
5db06880
NC
10731 }
10732#endif
79072805 10733 return s;
378cc40b 10734}
a687059c 10735
ba6d6ac9 10736I32
864dbfa3 10737Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10738{
97aff369 10739 dVAR;
a3b680e6 10740 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10741 CV* const outsidecv = PL_compcv;
8990e307 10742
3280af22
NIS
10743 if (PL_compcv) {
10744 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10745 }
7766f137 10746 SAVEI32(PL_subline);
3280af22 10747 save_item(PL_subname);
3280af22 10748 SAVESPTR(PL_compcv);
3280af22 10749
ea726b52 10750 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10751 CvFLAGS(PL_compcv) |= flags;
10752
57843af0 10753 PL_subline = CopLINE(PL_curcop);
dd2155a4 10754 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10755 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10756 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10757
8990e307
LW
10758 return oldsavestack_ix;
10759}
10760
084592ab
CN
10761#ifdef __SC__
10762#pragma segment Perl_yylex
10763#endif
af41e527 10764static int
19c62481 10765S_yywarn(pTHX_ const char *const s, U32 flags)
8990e307 10766{
97aff369 10767 dVAR;
7918f24d
NC
10768
10769 PERL_ARGS_ASSERT_YYWARN;
10770
faef0170 10771 PL_in_eval |= EVAL_WARNONLY;
19c62481 10772 yyerror_pv(s, flags);
faef0170 10773 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10774 return 0;
8990e307
LW
10775}
10776
10777int
15f169a1 10778Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10779{
19c62481
BF
10780 PERL_ARGS_ASSERT_YYERROR;
10781 return yyerror_pvn(s, strlen(s), 0);
10782}
10783
10784int
10785Perl_yyerror_pv(pTHX_ const char *const s, U32 flags)
10786{
10787 PERL_ARGS_ASSERT_YYERROR_PV;
10788 return yyerror_pvn(s, strlen(s), flags);
10789}
10790
10791int
19c62481
BF
10792Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
10793{
97aff369 10794 dVAR;
bfed75c6 10795 const char *context = NULL;
68dc0745 10796 int contlen = -1;
46fc3d4c 10797 SV *msg;
19c62481 10798 SV * const where_sv = newSVpvs_flags("", SVs_TEMP);
5912531f 10799 int yychar = PL_parser->yychar;
19c62481 10800 U32 is_utf8 = flags & SVf_UTF8;
463ee0b2 10801
19c62481 10802 PERL_ARGS_ASSERT_YYERROR_PVN;
7918f24d 10803
3280af22 10804 if (!yychar || (yychar == ';' && !PL_rsfp))
19c62481 10805 sv_catpvs(where_sv, "at EOF");
8bcfe651
TM
10806 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10807 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10808 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10809 /*
10810 Only for NetWare:
10811 The code below is removed for NetWare because it abends/crashes on NetWare
10812 when the script has error such as not having the closing quotes like:
10813 if ($var eq "value)
10814 Checking of white spaces is anyway done in NetWare code.
10815 */
10816#ifndef NETWARE
3280af22
NIS
10817 while (isSPACE(*PL_oldoldbufptr))
10818 PL_oldoldbufptr++;
f355267c 10819#endif
3280af22
NIS
10820 context = PL_oldoldbufptr;
10821 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10822 }
8bcfe651
TM
10823 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10824 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10825 /*
10826 Only for NetWare:
10827 The code below is removed for NetWare because it abends/crashes on NetWare
10828 when the script has error such as not having the closing quotes like:
10829 if ($var eq "value)
10830 Checking of white spaces is anyway done in NetWare code.
10831 */
10832#ifndef NETWARE
3280af22
NIS
10833 while (isSPACE(*PL_oldbufptr))
10834 PL_oldbufptr++;
f355267c 10835#endif
3280af22
NIS
10836 context = PL_oldbufptr;
10837 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10838 }
10839 else if (yychar > 255)
19c62481 10840 sv_catpvs(where_sv, "next token ???");
12fbd33b 10841 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10842 if (PL_lex_state == LEX_NORMAL ||
10843 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
19c62481 10844 sv_catpvs(where_sv, "at end of line");
3280af22 10845 else if (PL_lex_inpat)
19c62481 10846 sv_catpvs(where_sv, "within pattern");
463ee0b2 10847 else
19c62481 10848 sv_catpvs(where_sv, "within string");
463ee0b2 10849 }
46fc3d4c 10850 else {
19c62481 10851 sv_catpvs(where_sv, "next char ");
46fc3d4c 10852 if (yychar < 32)
cea2e8a9 10853 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10854 else if (isPRINT_LC(yychar)) {
88c9ea1e 10855 const char string = yychar;
5e7aa789
NC
10856 sv_catpvn(where_sv, &string, 1);
10857 }
463ee0b2 10858 else
cea2e8a9 10859 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
463ee0b2 10860 }
19c62481 10861 msg = sv_2mortal(newSVpvn_flags(s, len, is_utf8));
ed094faf 10862 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10863 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10864 if (context)
19c62481
BF
10865 Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
10866 SVfARG(newSVpvn_flags(context, contlen,
10867 SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
463ee0b2 10868 else
19c62481 10869 Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
57843af0 10870 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10871 Perl_sv_catpvf(aTHX_ msg,
57def98f 10872 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10873 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10874 PL_multi_end = 0;
a0d0e21e 10875 }
500960a6 10876 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10877 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10878 }
463ee0b2 10879 else
5a844595 10880 qerror(msg);
c7d6bfb2
GS
10881 if (PL_error_count >= 10) {
10882 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10883 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10884 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10885 else
10886 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10887 OutCopFILE(PL_curcop));
c7d6bfb2 10888 }
3280af22 10889 PL_in_my = 0;
5c284bb0 10890 PL_in_my_stash = NULL;
463ee0b2
LW
10891 return 0;
10892}
084592ab
CN
10893#ifdef __SC__
10894#pragma segment Main
10895#endif
4e35701f 10896
b250498f 10897STATIC char*
3ae08724 10898S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10899{
97aff369 10900 dVAR;
f54cb97a 10901 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10902
10903 PERL_ARGS_ASSERT_SWALLOW_BOM;
10904
7aa207d6 10905 switch (s[0]) {
4e553d73
NIS
10906 case 0xFF:
10907 if (s[1] == 0xFE) {
ee6ba15d 10908 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10909 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
dcbac5bb 10910 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10911 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10912#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10913 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10914 s += 2;
dea0fc0b 10915 if (PL_bufend > (char*)s) {
81a923f4 10916 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10917 }
b250498f 10918#else
dcbac5bb 10919 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10920 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10921#endif
01ec43d0
GS
10922 }
10923 break;
78ae23f5 10924 case 0xFE:
7aa207d6 10925 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10926#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10927 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10928 s += 2;
10929 if (PL_bufend > (char *)s) {
81a923f4 10930 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10931 }
b250498f 10932#else
dcbac5bb 10933 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10934 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10935#endif
01ec43d0
GS
10936 }
10937 break;
3ae08724
GS
10938 case 0xEF:
10939 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10940 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10941 s += 3; /* UTF-8 */
10942 }
10943 break;
10944 case 0:
7aa207d6
JH
10945 if (slen > 3) {
10946 if (s[1] == 0) {
10947 if (s[2] == 0xFE && s[3] == 0xFF) {
10948 /* UTF-32 big-endian */
dcbac5bb 10949 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d 10950 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10951 }
10952 }
10953 else if (s[2] == 0 && s[3] != 0) {
10954 /* Leading bytes
10955 * 00 xx 00 xx
10956 * are a good indicator of UTF-16BE. */
ee6ba15d 10957#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10958 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10959 s = add_utf16_textfilter(s, FALSE);
10960#else
dcbac5bb 10961 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
10962 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10963#endif
7aa207d6 10964 }
01ec43d0 10965 }
e294cc5d
JH
10966#ifdef EBCDIC
10967 case 0xDD:
10968 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10969 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10970 s += 4; /* UTF-8 */
10971 }
10972 break;
10973#endif
10974
7aa207d6
JH
10975 default:
10976 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10977 /* Leading bytes
10978 * xx 00 xx 00
10979 * are a good indicator of UTF-16LE. */
ee6ba15d 10980#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10981 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10982 s = add_utf16_textfilter(s, TRUE);
ee6ba15d 10983#else
dcbac5bb 10984 /* diag_listed_as: Unsupported script encoding %s */
ee6ba15d
EB
10985 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10986#endif
7aa207d6 10987 }
01ec43d0 10988 }
b8f84bb2 10989 return (char*)s;
b250498f 10990}
4755096e 10991
6e3aabd6
GS
10992
10993#ifndef PERL_NO_UTF16_FILTER
10994static I32
a28af015 10995S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10996{
97aff369 10997 dVAR;
f3040f2c 10998 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10999 /* We re-use this each time round, throwing the contents away before we
11000 return. */
2a773401 11001 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 11002 SV *const utf8_buffer = filter;
c28d6105 11003 IV status = IoPAGE(filter);
f2338a2e 11004 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 11005 I32 retval;
c8b0cbae 11006
c85ae797
NC
11007 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
11008
c8b0cbae
NC
11009 /* As we're automatically added, at the lowest level, and hence only called
11010 from this file, we can be sure that we're not called in block mode. Hence
11011 don't bother writing code to deal with block mode. */
11012 if (maxlen) {
11013 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
11014 }
c28d6105
NC
11015 if (status < 0) {
11016 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
11017 }
1de9afcd 11018 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 11019 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 11020 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
11021 reverse ? 'l' : 'b', idx, maxlen, status,
11022 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11023
11024 while (1) {
11025 STRLEN chars;
11026 STRLEN have;
dea0fc0b 11027 I32 newlen;
2a773401 11028 U8 *end;
c28d6105
NC
11029 /* First, look in our buffer of existing UTF-8 data: */
11030 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
11031
11032 if (nl) {
11033 ++nl;
11034 } else if (status == 0) {
11035 /* EOF */
11036 IoPAGE(filter) = 0;
11037 nl = SvEND(utf8_buffer);
11038 }
11039 if (nl) {
d2d1d4de
NC
11040 STRLEN got = nl - SvPVX(utf8_buffer);
11041 /* Did we have anything to append? */
11042 retval = got != 0;
11043 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
11044 /* Everything else in this code works just fine if SVp_POK isn't
11045 set. This, however, needs it, and we need it to work, else
11046 we loop infinitely because the buffer is never consumed. */
11047 sv_chop(utf8_buffer, nl);
11048 break;
11049 }
ba77e4cc 11050
c28d6105
NC
11051 /* OK, not a complete line there, so need to read some more UTF-16.
11052 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
11053 while (1) {
11054 if (status <= 0)
11055 break;
11056 if (SvCUR(utf16_buffer) >= 2) {
11057 /* Location of the high octet of the last complete code point.
11058 Gosh, UTF-16 is a pain. All the benefits of variable length,
11059 *coupled* with all the benefits of partial reads and
11060 endianness. */
11061 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
11062 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
11063
11064 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
11065 break;
11066 }
11067
11068 /* We have the first half of a surrogate. Read more. */
11069 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
11070 }
c28d6105 11071
c28d6105
NC
11072 status = FILTER_READ(idx + 1, utf16_buffer,
11073 160 + (SvCUR(utf16_buffer) & 1));
11074 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 11075 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
11076 if (status < 0) {
11077 /* Error */
11078 IoPAGE(filter) = status;
11079 return status;
11080 }
11081 }
11082
11083 chars = SvCUR(utf16_buffer) >> 1;
11084 have = SvCUR(utf8_buffer);
11085 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 11086
aa6dbd60 11087 if (reverse) {
c28d6105
NC
11088 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
11089 (U8*)SvPVX_const(utf8_buffer) + have,
11090 chars * 2, &newlen);
aa6dbd60 11091 } else {
2a773401 11092 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
11093 (U8*)SvPVX_const(utf8_buffer) + have,
11094 chars * 2, &newlen);
2a773401 11095 }
c28d6105 11096 SvCUR_set(utf8_buffer, have + newlen);
2a773401 11097 *end = '\0';
c28d6105 11098
e07286ed
NC
11099 /* No need to keep this SV "well-formed" with a '\0' after the end, as
11100 it's private to us, and utf16_to_utf8{,reversed} take a
11101 (pointer,length) pair, rather than a NUL-terminated string. */
11102 if(SvCUR(utf16_buffer) & 1) {
11103 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
11104 SvCUR_set(utf16_buffer, 1);
11105 } else {
11106 SvCUR_set(utf16_buffer, 0);
11107 }
2a773401 11108 }
c28d6105
NC
11109 DEBUG_P(PerlIO_printf(Perl_debug_log,
11110 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
11111 status,
11112 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
11113 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 11114 return retval;
6e3aabd6 11115}
81a923f4
NC
11116
11117static U8 *
11118S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
11119{
2a773401 11120 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 11121
c85ae797
NC
11122 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
11123
c28d6105 11124 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 11125 sv_setpvs(filter, "");
2a773401 11126 IoLINES(filter) = reversed;
c28d6105
NC
11127 IoPAGE(filter) = 1; /* Not EOF */
11128
11129 /* Sadly, we have to return a valid pointer, come what may, so we have to
11130 ignore any error return from this. */
11131 SvCUR_set(PL_linestr, 0);
11132 if (FILTER_READ(0, PL_linestr, 0)) {
11133 SvUTF8_on(PL_linestr);
81a923f4 11134 } else {
c28d6105 11135 SvUTF8_on(PL_linestr);
81a923f4 11136 }
c28d6105 11137 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11138 return (U8*)SvPVX(PL_linestr);
11139}
6e3aabd6 11140#endif
9f4817db 11141
f333445c
JP
11142/*
11143Returns a pointer to the next character after the parsed
11144vstring, as well as updating the passed in sv.
11145
11146Function must be called like
11147
561b68a9 11148 sv = newSV(5);
65b06e02 11149 s = scan_vstring(s,e,sv);
f333445c 11150
65b06e02 11151where s and e are the start and end of the string.
f333445c
JP
11152The sv should already be large enough to store the vstring
11153passed in, for performance reasons.
11154
11155*/
11156
11157char *
15f169a1 11158Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11159{
97aff369 11160 dVAR;
bfed75c6
AL
11161 const char *pos = s;
11162 const char *start = s;
7918f24d
NC
11163
11164 PERL_ARGS_ASSERT_SCAN_VSTRING;
11165
f333445c 11166 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11167 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11168 pos++;
f333445c
JP
11169 if ( *pos != '.') {
11170 /* this may not be a v-string if followed by => */
bfed75c6 11171 const char *next = pos;
65b06e02 11172 while (next < e && isSPACE(*next))
8fc7bb1c 11173 ++next;
65b06e02 11174 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11175 /* return string not v-string */
11176 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11177 return (char *)pos;
f333445c
JP
11178 }
11179 }
11180
11181 if (!isALPHA(*pos)) {
89ebb4a3 11182 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11183
d4c19fe8
AL
11184 if (*s == 'v')
11185 s++; /* get past 'v' */
f333445c 11186
76f68e9b 11187 sv_setpvs(sv, "");
f333445c
JP
11188
11189 for (;;) {
d4c19fe8 11190 /* this is atoi() that tolerates underscores */
0bd48802
AL
11191 U8 *tmpend;
11192 UV rev = 0;
d4c19fe8
AL
11193 const char *end = pos;
11194 UV mult = 1;
11195 while (--end >= s) {
11196 if (*end != '_') {
11197 const UV orev = rev;
f333445c
JP
11198 rev += (*end - '0') * mult;
11199 mult *= 10;
9b387841 11200 if (orev > rev)
dcbac5bb 11201 /* diag_listed_as: Integer overflow in %s number */
9b387841
NC
11202 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11203 "Integer overflow in decimal number");
f333445c
JP
11204 }
11205 }
11206#ifdef EBCDIC
11207 if (rev > 0x7FFFFFFF)
11208 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11209#endif
11210 /* Append native character for the rev point */
11211 tmpend = uvchr_to_utf8(tmpbuf, rev);
11212 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11213 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11214 SvUTF8_on(sv);
65b06e02 11215 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11216 s = ++pos;
11217 else {
11218 s = pos;
11219 break;
11220 }
65b06e02 11221 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11222 pos++;
11223 }
11224 SvPOK_on(sv);
11225 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11226 SvRMAGICAL_on(sv);
11227 }
73d840c0 11228 return (char *)s;
f333445c
JP
11229}
11230
88e1f1a2
JV
11231int
11232Perl_keyword_plugin_standard(pTHX_
11233 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11234{
11235 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11236 PERL_UNUSED_CONTEXT;
11237 PERL_UNUSED_ARG(keyword_ptr);
11238 PERL_UNUSED_ARG(keyword_len);
11239 PERL_UNUSED_ARG(op_ptr);
11240 return KEYWORD_PLUGIN_DECLINE;
11241}
11242
78cdf107 11243#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11244static void
78cdf107 11245S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11246{
11247 SAVEI32(PL_lex_brackets);
11248 if (PL_lex_brackets > 100)
11249 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11250 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11251 SAVEI32(PL_lex_allbrackets);
11252 PL_lex_allbrackets = 0;
11253 SAVEI8(PL_lex_fakeeof);
2dcac756 11254 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11255 if(yyparse(gramtype) && !PL_parser->error_count)
11256 qerror(Perl_mess(aTHX_ "Parse error"));
11257}
11258
78cdf107 11259#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11260static OP *
78cdf107 11261S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11262{
11263 OP *o;
11264 ENTER;
11265 SAVEVPTR(PL_eval_root);
11266 PL_eval_root = NULL;
78cdf107 11267 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11268 o = PL_eval_root;
11269 LEAVE;
11270 return o;
11271}
11272
78cdf107
Z
11273#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11274static OP *
11275S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11276{
11277 OP *exprop;
11278 if (flags & ~PARSE_OPTIONAL)
11279 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11280 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11281 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11282 if (!PL_parser->error_count)
11283 qerror(Perl_mess(aTHX_ "Parse error"));
11284 exprop = newOP(OP_NULL, 0);
11285 }
11286 return exprop;
11287}
11288
11289/*
11290=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11291
11292Parse a Perl arithmetic expression. This may contain operators of precedence
11293down to the bit shift operators. The expression must be followed (and thus
11294terminated) either by a comparison or lower-precedence operator or by
11295something that would normally terminate an expression such as semicolon.
11296If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11297otherwise it is mandatory. It is up to the caller to ensure that the
11298dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11299the source of the code to be parsed and the lexical context for the
11300expression.
11301
11302The op tree representing the expression is returned. If an optional
11303expression is absent, a null pointer is returned, otherwise the pointer
11304will be non-null.
11305
11306If an error occurs in parsing or compilation, in most cases a valid op
11307tree is returned anyway. The error is reflected in the parser state,
11308normally resulting in a single exception at the top level of parsing
11309which covers all the compilation errors that occurred. Some compilation
11310errors, however, will throw an exception immediately.
11311
11312=cut
11313*/
11314
11315OP *
11316Perl_parse_arithexpr(pTHX_ U32 flags)
11317{
11318 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11319}
11320
11321/*
11322=for apidoc Amx|OP *|parse_termexpr|U32 flags
11323
11324Parse a Perl term expression. This may contain operators of precedence
11325down to the assignment operators. The expression must be followed (and thus
11326terminated) either by a comma or lower-precedence operator or by
11327something that would normally terminate an expression such as semicolon.
11328If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11329otherwise it is mandatory. It is up to the caller to ensure that the
11330dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11331the source of the code to be parsed and the lexical context for the
11332expression.
11333
11334The op tree representing the expression is returned. If an optional
11335expression is absent, a null pointer is returned, otherwise the pointer
11336will be non-null.
11337
11338If an error occurs in parsing or compilation, in most cases a valid op
11339tree is returned anyway. The error is reflected in the parser state,
11340normally resulting in a single exception at the top level of parsing
11341which covers all the compilation errors that occurred. Some compilation
11342errors, however, will throw an exception immediately.
11343
11344=cut
11345*/
11346
11347OP *
11348Perl_parse_termexpr(pTHX_ U32 flags)
11349{
11350 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11351}
11352
11353/*
11354=for apidoc Amx|OP *|parse_listexpr|U32 flags
11355
11356Parse a Perl list expression. This may contain operators of precedence
11357down to the comma operator. The expression must be followed (and thus
11358terminated) either by a low-precedence logic operator such as C<or> or by
11359something that would normally terminate an expression such as semicolon.
11360If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11361otherwise it is mandatory. It is up to the caller to ensure that the
11362dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11363the source of the code to be parsed and the lexical context for the
11364expression.
11365
11366The op tree representing the expression is returned. If an optional
11367expression is absent, a null pointer is returned, otherwise the pointer
11368will be non-null.
11369
11370If an error occurs in parsing or compilation, in most cases a valid op
11371tree is returned anyway. The error is reflected in the parser state,
11372normally resulting in a single exception at the top level of parsing
11373which covers all the compilation errors that occurred. Some compilation
11374errors, however, will throw an exception immediately.
11375
11376=cut
11377*/
11378
11379OP *
11380Perl_parse_listexpr(pTHX_ U32 flags)
11381{
11382 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11383}
11384
11385/*
11386=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11387
11388Parse a single complete Perl expression. This allows the full
11389expression grammar, including the lowest-precedence operators such
11390as C<or>. The expression must be followed (and thus terminated) by a
11391token that an expression would normally be terminated by: end-of-file,
11392closing bracketing punctuation, semicolon, or one of the keywords that
11393signals a postfix expression-statement modifier. If I<flags> includes
11394C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11395mandatory. It is up to the caller to ensure that the dynamic parser
11396state (L</PL_parser> et al) is correctly set to reflect the source of
11397the code to be parsed and the lexical context for the expression.
11398
11399The op tree representing the expression is returned. If an optional
11400expression is absent, a null pointer is returned, otherwise the pointer
11401will be non-null.
11402
11403If an error occurs in parsing or compilation, in most cases a valid op
11404tree is returned anyway. The error is reflected in the parser state,
11405normally resulting in a single exception at the top level of parsing
11406which covers all the compilation errors that occurred. Some compilation
11407errors, however, will throw an exception immediately.
11408
11409=cut
11410*/
11411
11412OP *
11413Perl_parse_fullexpr(pTHX_ U32 flags)
11414{
11415 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11416}
11417
e53d8f76
Z
11418/*
11419=for apidoc Amx|OP *|parse_block|U32 flags
11420
11421Parse a single complete Perl code block. This consists of an opening
11422brace, a sequence of statements, and a closing brace. The block
11423constitutes a lexical scope, so C<my> variables and various compile-time
11424effects can be contained within it. It is up to the caller to ensure
11425that the dynamic parser state (L</PL_parser> et al) is correctly set to
11426reflect the source of the code to be parsed and the lexical context for
11427the statement.
11428
11429The op tree representing the code block is returned. This is always a
11430real op, never a null pointer. It will normally be a C<lineseq> list,
11431including C<nextstate> or equivalent ops. No ops to construct any kind
11432of runtime scope are included by virtue of it being a block.
11433
11434If an error occurs in parsing or compilation, in most cases a valid op
11435tree (most likely null) is returned anyway. The error is reflected in
11436the parser state, normally resulting in a single exception at the top
11437level of parsing which covers all the compilation errors that occurred.
11438Some compilation errors, however, will throw an exception immediately.
11439
11440The I<flags> parameter is reserved for future use, and must always
11441be zero.
11442
11443=cut
11444*/
11445
11446OP *
11447Perl_parse_block(pTHX_ U32 flags)
11448{
11449 if (flags)
11450 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11451 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11452}
11453
1da4ca5f 11454/*
8359b381
Z
11455=for apidoc Amx|OP *|parse_barestmt|U32 flags
11456
11457Parse a single unadorned Perl statement. This may be a normal imperative
11458statement or a declaration that has compile-time effect. It does not
11459include any label or other affixture. It is up to the caller to ensure
11460that the dynamic parser state (L</PL_parser> et al) is correctly set to
11461reflect the source of the code to be parsed and the lexical context for
11462the statement.
11463
11464The op tree representing the statement is returned. This may be a
11465null pointer if the statement is null, for example if it was actually
11466a subroutine definition (which has compile-time side effects). If not
11467null, it will be ops directly implementing the statement, suitable to
11468pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11469equivalent op (except for those embedded in a scope contained entirely
11470within the statement).
11471
11472If an error occurs in parsing or compilation, in most cases a valid op
11473tree (most likely null) is returned anyway. The error is reflected in
11474the parser state, normally resulting in a single exception at the top
11475level of parsing which covers all the compilation errors that occurred.
11476Some compilation errors, however, will throw an exception immediately.
11477
11478The I<flags> parameter is reserved for future use, and must always
11479be zero.
11480
11481=cut
11482*/
11483
11484OP *
11485Perl_parse_barestmt(pTHX_ U32 flags)
11486{
11487 if (flags)
11488 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11489 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11490}
11491
11492/*
361d9b55
Z
11493=for apidoc Amx|SV *|parse_label|U32 flags
11494
11495Parse a single label, possibly optional, of the type that may prefix a
11496Perl statement. It is up to the caller to ensure that the dynamic parser
11497state (L</PL_parser> et al) is correctly set to reflect the source of
11498the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11499label is optional, otherwise it is mandatory.
11500
11501The name of the label is returned in the form of a fresh scalar. If an
11502optional label is absent, a null pointer is returned.
11503
11504If an error occurs in parsing, which can only occur if the label is
11505mandatory, a valid label is returned anyway. The error is reflected in
11506the parser state, normally resulting in a single exception at the top
11507level of parsing which covers all the compilation errors that occurred.
11508
11509=cut
11510*/
11511
11512SV *
11513Perl_parse_label(pTHX_ U32 flags)
11514{
11515 if (flags & ~PARSE_OPTIONAL)
11516 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11517 if (PL_lex_state == LEX_KNOWNEXT) {
11518 PL_parser->yychar = yylex();
11519 if (PL_parser->yychar == LABEL) {
361d9b55
Z
11520 SV *lsv;
11521 PL_parser->yychar = YYEMPTY;
11522 lsv = newSV_type(SVt_PV);
fefd015f 11523 sv_copypv(lsv, cSVOPx(pl_yylval.opval)->op_sv);
361d9b55
Z
11524 return lsv;
11525 } else {
11526 yyunlex();
11527 goto no_label;
11528 }
11529 } else {
11530 char *s, *t;
361d9b55
Z
11531 STRLEN wlen, bufptr_pos;
11532 lex_read_space(0);
11533 t = s = PL_bufptr;
5db1eb8d 11534 if (!isIDFIRST_lazy_if(s, UTF))
361d9b55 11535 goto no_label;
5db1eb8d 11536 t = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &wlen);
361d9b55
Z
11537 if (word_takes_any_delimeter(s, wlen))
11538 goto no_label;
11539 bufptr_pos = s - SvPVX(PL_linestr);
11540 PL_bufptr = t;
11541 lex_read_space(LEX_KEEP_PREVIOUS);
11542 t = PL_bufptr;
11543 s = SvPVX(PL_linestr) + bufptr_pos;
11544 if (t[0] == ':' && t[1] != ':') {
11545 PL_oldoldbufptr = PL_oldbufptr;
11546 PL_oldbufptr = s;
11547 PL_bufptr = t+1;
5db1eb8d 11548 return newSVpvn_flags(s, wlen, UTF ? SVf_UTF8 : 0);
361d9b55
Z
11549 } else {
11550 PL_bufptr = s;
11551 no_label:
11552 if (flags & PARSE_OPTIONAL) {
11553 return NULL;
11554 } else {
11555 qerror(Perl_mess(aTHX_ "Parse error"));
11556 return newSVpvs("x");
11557 }
11558 }
11559 }
11560}
11561
11562/*
28ac2b49
Z
11563=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11564
11565Parse a single complete Perl statement. This may be a normal imperative
8359b381 11566statement or a declaration that has compile-time effect, and may include
8e720305 11567optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11568parser state (L</PL_parser> et al) is correctly set to reflect the source
11569of the code to be parsed and the lexical context for the statement.
11570
11571The op tree representing the statement is returned. This may be a
11572null pointer if the statement is null, for example if it was actually
11573a subroutine definition (which has compile-time side effects). If not
11574null, it will be the result of a L</newSTATEOP> call, normally including
11575a C<nextstate> or equivalent op.
11576
11577If an error occurs in parsing or compilation, in most cases a valid op
11578tree (most likely null) is returned anyway. The error is reflected in
11579the parser state, normally resulting in a single exception at the top
11580level of parsing which covers all the compilation errors that occurred.
11581Some compilation errors, however, will throw an exception immediately.
11582
11583The I<flags> parameter is reserved for future use, and must always
11584be zero.
11585
11586=cut
11587*/
11588
11589OP *
11590Perl_parse_fullstmt(pTHX_ U32 flags)
11591{
28ac2b49
Z
11592 if (flags)
11593 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11594 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11595}
11596
07ffcb73
Z
11597/*
11598=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11599
11600Parse a sequence of zero or more Perl statements. These may be normal
11601imperative statements, including optional labels, or declarations
11602that have compile-time effect, or any mixture thereof. The statement
11603sequence ends when a closing brace or end-of-file is encountered in a
11604place where a new statement could have validly started. It is up to
11605the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11606is correctly set to reflect the source of the code to be parsed and the
11607lexical context for the statements.
11608
11609The op tree representing the statement sequence is returned. This may
11610be a null pointer if the statements were all null, for example if there
11611were no statements or if there were only subroutine definitions (which
11612have compile-time side effects). If not null, it will be a C<lineseq>
11613list, normally including C<nextstate> or equivalent ops.
11614
11615If an error occurs in parsing or compilation, in most cases a valid op
11616tree is returned anyway. The error is reflected in the parser state,
11617normally resulting in a single exception at the top level of parsing
11618which covers all the compilation errors that occurred. Some compilation
11619errors, however, will throw an exception immediately.
11620
11621The I<flags> parameter is reserved for future use, and must always
11622be zero.
11623
11624=cut
11625*/
11626
11627OP *
11628Perl_parse_stmtseq(pTHX_ U32 flags)
11629{
11630 OP *stmtseqop;
e53d8f76 11631 I32 c;
07ffcb73 11632 if (flags)
78cdf107
Z
11633 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11634 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11635 c = lex_peek_unichar(0);
11636 if (c != -1 && c != /*{*/'}')
07ffcb73 11637 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11638 return stmtseqop;
11639}
11640
28ac2b49 11641/*
1da4ca5f
NC
11642 * Local variables:
11643 * c-indentation-style: bsd
11644 * c-basic-offset: 4
14d04a33 11645 * indent-tabs-mode: nil
1da4ca5f
NC
11646 * End:
11647 *
14d04a33 11648 * ex: set ts=8 sts=4 sw=4 et:
37442d52 11649 */