This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
mktables: Output ISO_Comment table
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
802a15e9 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || ( !(PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS) && (PL_hints & HINT_UTF8)))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
7eb971ee 227 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 228 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
229 * BOop : bitwise or or xor
230 * BAop : bitwise and
231 * SHop : shift operator
232 * PWop : power operator
9cbb5ea2 233 * PMop : pattern-matching operator
ffb4593c
NT
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
e5edeb50 237 * Rop : relational operator <= != gt
ffb4593c
NT
238 *
239 * Also see LOP and lop() below.
240 */
241
998054bd 242#ifdef DEBUGGING /* Serve -DT. */
704d4215 243# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 244#else
bbf60fe6 245# define REPORT(retval) (retval)
998054bd
SC
246#endif
247
bbf60fe6
DM
248#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
255#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 258#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
259#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 269
a687059c
LW
270/* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
a687059c 274 */
376fcdbf 275#define UNI2(f,x) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_expect = x; \
278 PL_bufptr = s; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
29595ff2 283 s = PEEKSPACE(s); \
376fcdbf
AL
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
285 }
6f33ba73
RGS
286#define UNI(f) UNI2(f,XTERM)
287#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 288
376fcdbf 289#define UNIBRACK(f) { \
6154021b 290 pl_yylval.ival = f; \
376fcdbf
AL
291 PL_bufptr = s; \
292 PL_last_uni = PL_oldbufptr; \
293 if (*s == '(') \
294 return REPORT( (int)FUNC1 ); \
29595ff2 295 s = PEEKSPACE(s); \
376fcdbf
AL
296 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
297 }
79072805 298
9f68db38 299/* grandfather return to old style */
78cdf107
Z
300#define OLDLOP(f) \
301 do { \
302 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
303 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
304 pl_yylval.ival = (f); \
305 PL_expect = XTERM; \
306 PL_bufptr = s; \
307 return (int)LSTOP; \
308 } while(0)
79072805 309
8fa7f367
JH
310#ifdef DEBUGGING
311
6154021b 312/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
313enum token_type {
314 TOKENTYPE_NONE,
315 TOKENTYPE_IVAL,
6154021b 316 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
317 TOKENTYPE_PVAL,
318 TOKENTYPE_OPVAL,
319 TOKENTYPE_GVVAL
320};
321
6d4a66ac
NC
322static struct debug_tokens {
323 const int token;
324 enum token_type type;
325 const char *name;
326} const debug_tokens[] =
9041c2e3 327{
bbf60fe6
DM
328 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
329 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
330 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
331 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
332 { ARROW, TOKENTYPE_NONE, "ARROW" },
333 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
334 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
335 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
336 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
337 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 338 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
339 { DO, TOKENTYPE_NONE, "DO" },
340 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
341 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
342 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
343 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
344 { ELSE, TOKENTYPE_NONE, "ELSE" },
345 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
346 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
347 { FOR, TOKENTYPE_IVAL, "FOR" },
348 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_PVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
373 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
374 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
375 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
376 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
377 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
378 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
379 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
380 { PREINC, TOKENTYPE_NONE, "PREINC" },
381 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
385 { SUB, TOKENTYPE_NONE, "SUB" },
386 { THING, TOKENTYPE_OPVAL, "THING" },
387 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
388 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
389 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
390 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
391 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
392 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 393 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
394 { WHILE, TOKENTYPE_IVAL, "WHILE" },
395 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 396 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 397 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
398};
399
6154021b 400/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 401
bbf60fe6 402STATIC int
704d4215 403S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 404{
97aff369 405 dVAR;
7918f24d
NC
406
407 PERL_ARGS_ASSERT_TOKEREPORT;
408
bbf60fe6 409 if (DEBUG_T_TEST) {
bd61b366 410 const char *name = NULL;
bbf60fe6 411 enum token_type type = TOKENTYPE_NONE;
f54cb97a 412 const struct debug_tokens *p;
396482e1 413 SV* const report = newSVpvs("<== ");
bbf60fe6 414
f54cb97a 415 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
416 if (p->token == (int)rv) {
417 name = p->name;
418 type = p->type;
419 break;
420 }
421 }
422 if (name)
54667de8 423 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
424 else if ((char)rv > ' ' && (char)rv < '~')
425 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
426 else if (!rv)
396482e1 427 sv_catpvs(report, "EOF");
bbf60fe6
DM
428 else
429 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
430 switch (type) {
431 case TOKENTYPE_NONE:
432 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
433 break;
434 case TOKENTYPE_IVAL:
704d4215 435 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
436 break;
437 case TOKENTYPE_OPNUM:
438 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 439 PL_op_name[lvalp->ival]);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_PVAL:
704d4215 442 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
443 break;
444 case TOKENTYPE_OPVAL:
704d4215 445 if (lvalp->opval) {
401441c0 446 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
447 PL_op_name[lvalp->opval->op_type]);
448 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 449 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 450 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
451 }
452
453 }
401441c0 454 else
396482e1 455 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
456 break;
457 }
b6007c36 458 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
459 };
460 return (int)rv;
998054bd
SC
461}
462
b6007c36
DM
463
464/* print the buffer with suitable escapes */
465
466STATIC void
15f169a1 467S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 468{
396482e1 469 SV* const tmp = newSVpvs("");
7918f24d
NC
470
471 PERL_ARGS_ASSERT_PRINTBUF;
472
b6007c36
DM
473 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
474 SvREFCNT_dec(tmp);
475}
476
8fa7f367
JH
477#endif
478
8290c323
NC
479static int
480S_deprecate_commaless_var_list(pTHX) {
481 PL_expect = XTERM;
482 deprecate("comma-less variable list");
483 return REPORT(','); /* grandfather non-comma-format format */
484}
485
ffb4593c
NT
486/*
487 * S_ao
488 *
c963b151
BD
489 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
490 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
491 */
492
76e3520e 493STATIC int
cea2e8a9 494S_ao(pTHX_ int toketype)
a0d0e21e 495{
97aff369 496 dVAR;
3280af22
NIS
497 if (*PL_bufptr == '=') {
498 PL_bufptr++;
a0d0e21e 499 if (toketype == ANDAND)
6154021b 500 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 501 else if (toketype == OROR)
6154021b 502 pl_yylval.ival = OP_ORASSIGN;
c963b151 503 else if (toketype == DORDOR)
6154021b 504 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
505 toketype = ASSIGNOP;
506 }
507 return toketype;
508}
509
ffb4593c
NT
510/*
511 * S_no_op
512 * When Perl expects an operator and finds something else, no_op
513 * prints the warning. It always prints "<something> found where
514 * operator expected. It prints "Missing semicolon on previous line?"
515 * if the surprise occurs at the start of the line. "do you need to
516 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
517 * where the compiler doesn't know if foo is a method call or a function.
518 * It prints "Missing operator before end of line" if there's nothing
519 * after the missing operator, or "... before <...>" if there is something
520 * after the missing operator.
521 */
522
76e3520e 523STATIC void
15f169a1 524S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 525{
97aff369 526 dVAR;
9d4ba2ae
AL
527 char * const oldbp = PL_bufptr;
528 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 529
7918f24d
NC
530 PERL_ARGS_ASSERT_NO_OP;
531
1189a94a
GS
532 if (!s)
533 s = oldbp;
07c798fb 534 else
1189a94a 535 PL_bufptr = s;
cea2e8a9 536 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
537 if (ckWARN_d(WARN_SYNTAX)) {
538 if (is_first)
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
540 "\t(Missing semicolon on previous line?)\n");
541 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 542 const char *t;
c35e046a
AL
543 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
544 NOOP;
56da5a46
RGS
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Do you need to predeclare %.*s?)\n",
551405c4 548 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
549 }
550 else {
551 assert(s >= oldbp);
552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 553 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 554 }
07c798fb 555 }
3280af22 556 PL_bufptr = oldbp;
8990e307
LW
557}
558
ffb4593c
NT
559/*
560 * S_missingterm
561 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 562 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
563 * If we're in a delimited string and the delimiter is a control
564 * character, it's reformatted into a two-char sequence like ^C.
565 * This is fatal.
566 */
567
76e3520e 568STATIC void
cea2e8a9 569S_missingterm(pTHX_ char *s)
8990e307 570{
97aff369 571 dVAR;
8990e307
LW
572 char tmpbuf[3];
573 char q;
574 if (s) {
9d4ba2ae 575 char * const nl = strrchr(s,'\n');
d2719217 576 if (nl)
8990e307
LW
577 *nl = '\0';
578 }
463559e7 579 else if (isCNTRL(PL_multi_close)) {
8990e307 580 *tmpbuf = '^';
585ec06d 581 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
582 tmpbuf[2] = '\0';
583 s = tmpbuf;
584 }
585 else {
eb160463 586 *tmpbuf = (char)PL_multi_close;
8990e307
LW
587 tmpbuf[1] = '\0';
588 s = tmpbuf;
589 }
590 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 591 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 592}
79072805 593
0d863452 594/*
0d863452
RH
595 * Check whether the named feature is enabled.
596 */
26ea9e12
NC
597bool
598Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 599{
97aff369 600 dVAR;
0d863452 601 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 602 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
603
604 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
605
26ea9e12
NC
606 if (namelen > MAX_FEATURE_LEN)
607 return FALSE;
4a731d7b 608 memcpy(&he_name[8], name, namelen);
d4c19fe8 609
7b9ef140 610 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
611}
612
ffb4593c 613/*
9cbb5ea2
GS
614 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
615 * utf16-to-utf8-reversed.
ffb4593c
NT
616 */
617
c39cd008
GS
618#ifdef PERL_CR_FILTER
619static void
620strip_return(SV *sv)
621{
95a20fc0 622 register const char *s = SvPVX_const(sv);
9d4ba2ae 623 register const char * const e = s + SvCUR(sv);
7918f24d
NC
624
625 PERL_ARGS_ASSERT_STRIP_RETURN;
626
c39cd008
GS
627 /* outer loop optimized to do nothing if there are no CR-LFs */
628 while (s < e) {
629 if (*s++ == '\r' && *s == '\n') {
630 /* hit a CR-LF, need to copy the rest */
631 register char *d = s - 1;
632 *d++ = *s++;
633 while (s < e) {
634 if (*s == '\r' && s[1] == '\n')
635 s++;
636 *d++ = *s++;
637 }
638 SvCUR(sv) -= s - d;
639 return;
640 }
641 }
642}
a868473f 643
76e3520e 644STATIC I32
c39cd008 645S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 646{
f54cb97a 647 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
648 if (count > 0 && !maxlen)
649 strip_return(sv);
650 return count;
a868473f
NIS
651}
652#endif
653
ffb4593c 654/*
8eaa0acf
Z
655=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
656
657Creates and initialises a new lexer/parser state object, supplying
658a context in which to lex and parse from a new source of Perl code.
659A pointer to the new state object is placed in L</PL_parser>. An entry
660is made on the save stack so that upon unwinding the new state object
661will be destroyed and the former value of L</PL_parser> will be restored.
662Nothing else need be done to clean up the parsing context.
663
664The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
665non-null, provides a string (in SV form) containing code to be parsed.
666A copy of the string is made, so subsequent modification of I<line>
667does not affect parsing. I<rsfp>, if non-null, provides an input stream
668from which code will be read to be parsed. If both are non-null, the
669code in I<line> comes first and must consist of complete lines of input,
670and I<rsfp> supplies the remainder of the source.
671
e368b3bd
FC
672The I<flags> parameter is reserved for future use. Currently it is only
673used by perl internally, so extensions should always pass zero.
8eaa0acf
Z
674
675=cut
676*/
ffb4593c 677
27fcb6ee
FC
678/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
679 can share filters with the current parser. */
680
a0d0e21e 681void
8eaa0acf 682Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 683{
97aff369 684 dVAR;
6ef55633 685 const char *s = NULL;
5486870f 686 yy_parser *parser, *oparser;
60d63348 687 if (flags && flags & ~LEX_START_FLAGS)
8eaa0acf 688 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
689
690 /* create and initialise a parser */
691
199e78b7 692 Newxz(parser, 1, yy_parser);
5486870f 693 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
694 PL_parser = parser;
695
28ac2b49
Z
696 parser->stack = NULL;
697 parser->ps = NULL;
698 parser->stack_size = 0;
acdf0a21 699
e3abe207
DM
700 /* on scope exit, free this parser and restore any outer one */
701 SAVEPARSER(parser);
7c4baf47 702 parser->saved_curcop = PL_curcop;
e3abe207 703
acdf0a21 704 /* initialise lexer state */
8990e307 705
fb205e7a
DM
706#ifdef PERL_MAD
707 parser->curforce = -1;
708#else
709 parser->nexttoke = 0;
710#endif
ca4cfd28 711 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 712 parser->copline = NOLINE;
5afb0a62 713 parser->lex_state = LEX_NORMAL;
c2598295 714 parser->expect = XSTATE;
2f9285f8 715 parser->rsfp = rsfp;
27fcb6ee
FC
716 parser->rsfp_filters =
717 !(flags & LEX_START_SAME_FILTER) || !oparser
718 ? newAV()
719 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 720
199e78b7
DM
721 Newx(parser->lex_brackstack, 120, char);
722 Newx(parser->lex_casestack, 12, char);
723 *parser->lex_casestack = '\0';
02b34bbe 724
10efb74f 725 if (line) {
0528fd32 726 STRLEN len;
10efb74f 727 s = SvPV_const(line, len);
0abcdfa4
FC
728 parser->linestr = flags & LEX_START_COPIED
729 ? SvREFCNT_inc_simple_NN(line)
730 : newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 731 if (s[len-1] != ';')
bdc0bf6f 732 sv_catpvs(parser->linestr, "\n;");
0abcdfa4
FC
733 } else {
734 parser->linestr = newSVpvs("\n;");
8990e307 735 }
f06b5848
DM
736 parser->oldoldbufptr =
737 parser->oldbufptr =
738 parser->bufptr =
739 parser->linestart = SvPVX(parser->linestr);
740 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
741 parser->last_lop = parser->last_uni = NULL;
60d63348 742 parser->lex_flags = flags & (LEX_IGNORE_UTF8_HINTS|LEX_EVALBYTES);
737c24fc 743
60d63348 744 parser->in_pod = parser->filtered = 0;
79072805 745}
a687059c 746
e3abe207
DM
747
748/* delete a parser object */
749
750void
751Perl_parser_free(pTHX_ const yy_parser *parser)
752{
7918f24d
NC
753 PERL_ARGS_ASSERT_PARSER_FREE;
754
7c4baf47 755 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
756 SvREFCNT_dec(parser->linestr);
757
2f9285f8
DM
758 if (parser->rsfp == PerlIO_stdin())
759 PerlIO_clearerr(parser->rsfp);
799361c3
SH
760 else if (parser->rsfp && (!parser->old_parser ||
761 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 762 PerlIO_close(parser->rsfp);
5486870f 763 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 764
e3abe207
DM
765 Safefree(parser->lex_brackstack);
766 Safefree(parser->lex_casestack);
767 PL_parser = parser->old_parser;
768 Safefree(parser);
769}
770
771
ffb4593c 772/*
f0e67a1d
Z
773=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
774
775Buffer scalar containing the chunk currently under consideration of the
776text currently being lexed. This is always a plain string scalar (for
777which C<SvPOK> is true). It is not intended to be used as a scalar by
778normal scalar means; instead refer to the buffer directly by the pointer
779variables described below.
780
781The lexer maintains various C<char*> pointers to things in the
782C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
783reallocated, all of these pointers must be updated. Don't attempt to
784do this manually, but rather use L</lex_grow_linestr> if you need to
785reallocate the buffer.
786
787The content of the text chunk in the buffer is commonly exactly one
788complete line of input, up to and including a newline terminator,
789but there are situations where it is otherwise. The octets of the
790buffer may be intended to be interpreted as either UTF-8 or Latin-1.
791The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
792flag on this scalar, which may disagree with it.
793
794For direct examination of the buffer, the variable
795L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
796lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
797of these pointers is usually preferable to examination of the scalar
798through normal scalar means.
799
800=for apidoc AmxU|char *|PL_parser-E<gt>bufend
801
802Direct pointer to the end of the chunk of text currently being lexed, the
803end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
804+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
805always located at the end of the buffer, and does not count as part of
806the buffer's contents.
807
808=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
809
810Points to the current position of lexing inside the lexer buffer.
811Characters around this point may be freely examined, within
812the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
813L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
814interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
815
816Lexing code (whether in the Perl core or not) moves this pointer past
817the characters that it consumes. It is also expected to perform some
818bookkeeping whenever a newline character is consumed. This movement
819can be more conveniently performed by the function L</lex_read_to>,
820which handles newlines appropriately.
821
822Interpretation of the buffer's octets can be abstracted out by
823using the slightly higher-level functions L</lex_peek_unichar> and
824L</lex_read_unichar>.
825
826=for apidoc AmxU|char *|PL_parser-E<gt>linestart
827
828Points to the start of the current line inside the lexer buffer.
829This is useful for indicating at which column an error occurred, and
830not much else. This must be updated by any lexing code that consumes
831a newline; the function L</lex_read_to> handles this detail.
832
833=cut
834*/
835
836/*
837=for apidoc Amx|bool|lex_bufutf8
838
839Indicates whether the octets in the lexer buffer
840(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
841of Unicode characters. If not, they should be interpreted as Latin-1
842characters. This is analogous to the C<SvUTF8> flag for scalars.
843
844In UTF-8 mode, it is not guaranteed that the lexer buffer actually
845contains valid UTF-8. Lexing code must be robust in the face of invalid
846encoding.
847
848The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
849is significant, but not the whole story regarding the input character
850encoding. Normally, when a file is being read, the scalar contains octets
851and its C<SvUTF8> flag is off, but the octets should be interpreted as
852UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
853however, the scalar may have the C<SvUTF8> flag on, and in this case its
854octets should be interpreted as UTF-8 unless the C<use bytes> pragma
855is in effect. This logic may change in the future; use this function
856instead of implementing the logic yourself.
857
858=cut
859*/
860
861bool
862Perl_lex_bufutf8(pTHX)
863{
864 return UTF;
865}
866
867/*
868=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
869
870Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
871at least I<len> octets (including terminating NUL). Returns a
872pointer to the reallocated buffer. This is necessary before making
873any direct modification of the buffer that would increase its length.
874L</lex_stuff_pvn> provides a more convenient way to insert text into
875the buffer.
876
877Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
878this function updates all of the lexer's variables that point directly
879into the buffer.
880
881=cut
882*/
883
884char *
885Perl_lex_grow_linestr(pTHX_ STRLEN len)
886{
887 SV *linestr;
888 char *buf;
889 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
890 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
891 linestr = PL_parser->linestr;
892 buf = SvPVX(linestr);
893 if (len <= SvLEN(linestr))
894 return buf;
895 bufend_pos = PL_parser->bufend - buf;
896 bufptr_pos = PL_parser->bufptr - buf;
897 oldbufptr_pos = PL_parser->oldbufptr - buf;
898 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
899 linestart_pos = PL_parser->linestart - buf;
900 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
901 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
902 buf = sv_grow(linestr, len);
903 PL_parser->bufend = buf + bufend_pos;
904 PL_parser->bufptr = buf + bufptr_pos;
905 PL_parser->oldbufptr = buf + oldbufptr_pos;
906 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
907 PL_parser->linestart = buf + linestart_pos;
908 if (PL_parser->last_uni)
909 PL_parser->last_uni = buf + last_uni_pos;
910 if (PL_parser->last_lop)
911 PL_parser->last_lop = buf + last_lop_pos;
912 return buf;
913}
914
915/*
83aa740e 916=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
917
918Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
919immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
920reallocating the buffer if necessary. This means that lexing code that
921runs later will see the characters as if they had appeared in the input.
922It is not recommended to do this as part of normal parsing, and most
923uses of this facility run the risk of the inserted characters being
924interpreted in an unintended manner.
925
926The string to be inserted is represented by I<len> octets starting
927at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
928according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
929The characters are recoded for the lexer buffer, according to how the
930buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 931to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
932function is more convenient.
933
934=cut
935*/
936
937void
83aa740e 938Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 939{
749123ff 940 dVAR;
f0e67a1d
Z
941 char *bufptr;
942 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
943 if (flags & ~(LEX_STUFF_UTF8))
944 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
945 if (UTF) {
946 if (flags & LEX_STUFF_UTF8) {
947 goto plain_copy;
948 } else {
949 STRLEN highhalf = 0;
83aa740e 950 const char *p, *e = pv+len;
f0e67a1d
Z
951 for (p = pv; p != e; p++)
952 highhalf += !!(((U8)*p) & 0x80);
953 if (!highhalf)
954 goto plain_copy;
955 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
956 bufptr = PL_parser->bufptr;
957 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
958 SvCUR_set(PL_parser->linestr,
959 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
960 PL_parser->bufend += len+highhalf;
961 for (p = pv; p != e; p++) {
962 U8 c = (U8)*p;
963 if (c & 0x80) {
964 *bufptr++ = (char)(0xc0 | (c >> 6));
965 *bufptr++ = (char)(0x80 | (c & 0x3f));
966 } else {
967 *bufptr++ = (char)c;
968 }
969 }
970 }
971 } else {
972 if (flags & LEX_STUFF_UTF8) {
973 STRLEN highhalf = 0;
83aa740e 974 const char *p, *e = pv+len;
f0e67a1d
Z
975 for (p = pv; p != e; p++) {
976 U8 c = (U8)*p;
977 if (c >= 0xc4) {
978 Perl_croak(aTHX_ "Lexing code attempted to stuff "
979 "non-Latin-1 character into Latin-1 input");
980 } else if (c >= 0xc2 && p+1 != e &&
981 (((U8)p[1]) & 0xc0) == 0x80) {
982 p++;
983 highhalf++;
984 } else if (c >= 0x80) {
985 /* malformed UTF-8 */
986 ENTER;
987 SAVESPTR(PL_warnhook);
988 PL_warnhook = PERL_WARNHOOK_FATAL;
989 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
990 LEAVE;
991 }
992 }
993 if (!highhalf)
994 goto plain_copy;
995 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
996 bufptr = PL_parser->bufptr;
997 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
998 SvCUR_set(PL_parser->linestr,
999 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1000 PL_parser->bufend += len-highhalf;
1001 for (p = pv; p != e; p++) {
1002 U8 c = (U8)*p;
1003 if (c & 0x80) {
1004 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1005 p++;
1006 } else {
1007 *bufptr++ = (char)c;
1008 }
1009 }
1010 } else {
1011 plain_copy:
1012 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1013 bufptr = PL_parser->bufptr;
1014 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1015 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1016 PL_parser->bufend += len;
1017 Copy(pv, bufptr, len, char);
1018 }
1019 }
1020}
1021
1022/*
9dcc53ea
Z
1023=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1024
1025Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1026immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1027reallocating the buffer if necessary. This means that lexing code that
1028runs later will see the characters as if they had appeared in the input.
1029It is not recommended to do this as part of normal parsing, and most
1030uses of this facility run the risk of the inserted characters being
1031interpreted in an unintended manner.
1032
1033The string to be inserted is represented by octets starting at I<pv>
1034and continuing to the first nul. These octets are interpreted as either
1035UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1036in I<flags>. The characters are recoded for the lexer buffer, according
1037to how the buffer is currently being interpreted (L</lex_bufutf8>).
1038If it is not convenient to nul-terminate a string to be inserted, the
1039L</lex_stuff_pvn> function is more appropriate.
1040
1041=cut
1042*/
1043
1044void
1045Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1046{
1047 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1048 lex_stuff_pvn(pv, strlen(pv), flags);
1049}
1050
1051/*
f0e67a1d
Z
1052=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1053
1054Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1055immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1056reallocating the buffer if necessary. This means that lexing code that
1057runs later will see the characters as if they had appeared in the input.
1058It is not recommended to do this as part of normal parsing, and most
1059uses of this facility run the risk of the inserted characters being
1060interpreted in an unintended manner.
1061
1062The string to be inserted is the string value of I<sv>. The characters
1063are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1064being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1065not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1066need to construct a scalar.
1067
1068=cut
1069*/
1070
1071void
1072Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1073{
1074 char *pv;
1075 STRLEN len;
1076 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1077 if (flags)
1078 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1079 pv = SvPV(sv, len);
1080 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1081}
1082
1083/*
1084=for apidoc Amx|void|lex_unstuff|char *ptr
1085
1086Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1087I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1088This hides the discarded text from any lexing code that runs later,
1089as if the text had never appeared.
1090
1091This is not the normal way to consume lexed text. For that, use
1092L</lex_read_to>.
1093
1094=cut
1095*/
1096
1097void
1098Perl_lex_unstuff(pTHX_ char *ptr)
1099{
1100 char *buf, *bufend;
1101 STRLEN unstuff_len;
1102 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1103 buf = PL_parser->bufptr;
1104 if (ptr < buf)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1106 if (ptr == buf)
1107 return;
1108 bufend = PL_parser->bufend;
1109 if (ptr > bufend)
1110 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1111 unstuff_len = ptr - buf;
1112 Move(ptr, buf, bufend+1-ptr, char);
1113 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1114 PL_parser->bufend = bufend - unstuff_len;
1115}
1116
1117/*
1118=for apidoc Amx|void|lex_read_to|char *ptr
1119
1120Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1121to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1122performing the correct bookkeeping whenever a newline character is passed.
1123This is the normal way to consume lexed text.
1124
1125Interpretation of the buffer's octets can be abstracted out by
1126using the slightly higher-level functions L</lex_peek_unichar> and
1127L</lex_read_unichar>.
1128
1129=cut
1130*/
1131
1132void
1133Perl_lex_read_to(pTHX_ char *ptr)
1134{
1135 char *s;
1136 PERL_ARGS_ASSERT_LEX_READ_TO;
1137 s = PL_parser->bufptr;
1138 if (ptr < s || ptr > PL_parser->bufend)
1139 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1140 for (; s != ptr; s++)
1141 if (*s == '\n') {
1142 CopLINE_inc(PL_curcop);
1143 PL_parser->linestart = s+1;
1144 }
1145 PL_parser->bufptr = ptr;
1146}
1147
1148/*
1149=for apidoc Amx|void|lex_discard_to|char *ptr
1150
1151Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1152up to I<ptr>. The remaining content of the buffer will be moved, and
1153all pointers into the buffer updated appropriately. I<ptr> must not
1154be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1155it is not permitted to discard text that has yet to be lexed.
1156
1157Normally it is not necessarily to do this directly, because it suffices to
1158use the implicit discarding behaviour of L</lex_next_chunk> and things
1159based on it. However, if a token stretches across multiple lines,
1f317c95 1160and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1161that purpose, then after completion of the token it would be wise to
1162explicitly discard the now-unneeded earlier lines, to avoid future
1163multi-line tokens growing the buffer without bound.
1164
1165=cut
1166*/
1167
1168void
1169Perl_lex_discard_to(pTHX_ char *ptr)
1170{
1171 char *buf;
1172 STRLEN discard_len;
1173 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1174 buf = SvPVX(PL_parser->linestr);
1175 if (ptr < buf)
1176 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1177 if (ptr == buf)
1178 return;
1179 if (ptr > PL_parser->bufptr)
1180 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1181 discard_len = ptr - buf;
1182 if (PL_parser->oldbufptr < ptr)
1183 PL_parser->oldbufptr = ptr;
1184 if (PL_parser->oldoldbufptr < ptr)
1185 PL_parser->oldoldbufptr = ptr;
1186 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1187 PL_parser->last_uni = NULL;
1188 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1189 PL_parser->last_lop = NULL;
1190 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1191 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1192 PL_parser->bufend -= discard_len;
1193 PL_parser->bufptr -= discard_len;
1194 PL_parser->oldbufptr -= discard_len;
1195 PL_parser->oldoldbufptr -= discard_len;
1196 if (PL_parser->last_uni)
1197 PL_parser->last_uni -= discard_len;
1198 if (PL_parser->last_lop)
1199 PL_parser->last_lop -= discard_len;
1200}
1201
1202/*
1203=for apidoc Amx|bool|lex_next_chunk|U32 flags
1204
1205Reads in the next chunk of text to be lexed, appending it to
1206L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1207looked to the end of the current chunk and wants to know more. It is
1208usual, but not necessary, for lexing to have consumed the entirety of
1209the current chunk at this time.
1210
1211If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1212chunk (i.e., the current chunk has been entirely consumed), normally the
1213current chunk will be discarded at the same time that the new chunk is
1214read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1215will not be discarded. If the current chunk has not been entirely
1216consumed, then it will not be discarded regardless of the flag.
1217
1218Returns true if some new text was added to the buffer, or false if the
1219buffer has reached the end of the input text.
1220
1221=cut
1222*/
1223
1224#define LEX_FAKE_EOF 0x80000000
1225
1226bool
1227Perl_lex_next_chunk(pTHX_ U32 flags)
1228{
1229 SV *linestr;
1230 char *buf;
1231 STRLEN old_bufend_pos, new_bufend_pos;
1232 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1233 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1234 bool got_some_for_debugger = 0;
f0e67a1d
Z
1235 bool got_some;
1236 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1237 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1238 linestr = PL_parser->linestr;
1239 buf = SvPVX(linestr);
1240 if (!(flags & LEX_KEEP_PREVIOUS) &&
1241 PL_parser->bufptr == PL_parser->bufend) {
1242 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1243 linestart_pos = 0;
1244 if (PL_parser->last_uni != PL_parser->bufend)
1245 PL_parser->last_uni = NULL;
1246 if (PL_parser->last_lop != PL_parser->bufend)
1247 PL_parser->last_lop = NULL;
1248 last_uni_pos = last_lop_pos = 0;
1249 *buf = 0;
1250 SvCUR(linestr) = 0;
1251 } else {
1252 old_bufend_pos = PL_parser->bufend - buf;
1253 bufptr_pos = PL_parser->bufptr - buf;
1254 oldbufptr_pos = PL_parser->oldbufptr - buf;
1255 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1256 linestart_pos = PL_parser->linestart - buf;
1257 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1258 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1259 }
1260 if (flags & LEX_FAKE_EOF) {
1261 goto eof;
60d63348 1262 } else if (!PL_parser->rsfp && !PL_parser->filtered) {
f0e67a1d
Z
1263 got_some = 0;
1264 } else if (filter_gets(linestr, old_bufend_pos)) {
1265 got_some = 1;
17cc9359 1266 got_some_for_debugger = 1;
f0e67a1d 1267 } else {
580561a3
Z
1268 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1269 sv_setpvs(linestr, "");
f0e67a1d
Z
1270 eof:
1271 /* End of real input. Close filehandle (unless it was STDIN),
1272 * then add implicit termination.
1273 */
1274 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1275 PerlIO_clearerr(PL_parser->rsfp);
1276 else if (PL_parser->rsfp)
1277 (void)PerlIO_close(PL_parser->rsfp);
1278 PL_parser->rsfp = NULL;
60d63348 1279 PL_parser->in_pod = PL_parser->filtered = 0;
f0e67a1d
Z
1280#ifdef PERL_MAD
1281 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1282 PL_faketokens = 1;
1283#endif
1284 if (!PL_in_eval && PL_minus_p) {
1285 sv_catpvs(linestr,
1286 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1287 PL_minus_n = PL_minus_p = 0;
1288 } else if (!PL_in_eval && PL_minus_n) {
1289 sv_catpvs(linestr, /*{*/";}");
1290 PL_minus_n = 0;
1291 } else
1292 sv_catpvs(linestr, ";");
1293 got_some = 1;
1294 }
1295 buf = SvPVX(linestr);
1296 new_bufend_pos = SvCUR(linestr);
1297 PL_parser->bufend = buf + new_bufend_pos;
1298 PL_parser->bufptr = buf + bufptr_pos;
1299 PL_parser->oldbufptr = buf + oldbufptr_pos;
1300 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1301 PL_parser->linestart = buf + linestart_pos;
1302 if (PL_parser->last_uni)
1303 PL_parser->last_uni = buf + last_uni_pos;
1304 if (PL_parser->last_lop)
1305 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1306 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1307 PL_curstash != PL_debstash) {
1308 /* debugger active and we're not compiling the debugger code,
1309 * so store the line into the debugger's array of lines
1310 */
1311 update_debugger_info(NULL, buf+old_bufend_pos,
1312 new_bufend_pos-old_bufend_pos);
1313 }
1314 return got_some;
1315}
1316
1317/*
1318=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1319
1320Looks ahead one (Unicode) character in the text currently being lexed.
1321Returns the codepoint (unsigned integer value) of the next character,
1322or -1 if lexing has reached the end of the input text. To consume the
1323peeked character, use L</lex_read_unichar>.
1324
1325If the next character is in (or extends into) the next chunk of input
1326text, the next chunk will be read in. Normally the current chunk will be
1327discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1328then the current chunk will not be discarded.
1329
1330If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1331is encountered, an exception is generated.
1332
1333=cut
1334*/
1335
1336I32
1337Perl_lex_peek_unichar(pTHX_ U32 flags)
1338{
749123ff 1339 dVAR;
f0e67a1d
Z
1340 char *s, *bufend;
1341 if (flags & ~(LEX_KEEP_PREVIOUS))
1342 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1343 s = PL_parser->bufptr;
1344 bufend = PL_parser->bufend;
1345 if (UTF) {
1346 U8 head;
1347 I32 unichar;
1348 STRLEN len, retlen;
1349 if (s == bufend) {
1350 if (!lex_next_chunk(flags))
1351 return -1;
1352 s = PL_parser->bufptr;
1353 bufend = PL_parser->bufend;
1354 }
1355 head = (U8)*s;
1356 if (!(head & 0x80))
1357 return head;
1358 if (head & 0x40) {
1359 len = PL_utf8skip[head];
1360 while ((STRLEN)(bufend-s) < len) {
1361 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1362 break;
1363 s = PL_parser->bufptr;
1364 bufend = PL_parser->bufend;
1365 }
1366 }
1367 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1368 if (retlen == (STRLEN)-1) {
1369 /* malformed UTF-8 */
1370 ENTER;
1371 SAVESPTR(PL_warnhook);
1372 PL_warnhook = PERL_WARNHOOK_FATAL;
1373 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1374 LEAVE;
1375 }
1376 return unichar;
1377 } else {
1378 if (s == bufend) {
1379 if (!lex_next_chunk(flags))
1380 return -1;
1381 s = PL_parser->bufptr;
1382 }
1383 return (U8)*s;
1384 }
1385}
1386
1387/*
1388=for apidoc Amx|I32|lex_read_unichar|U32 flags
1389
1390Reads the next (Unicode) character in the text currently being lexed.
1391Returns the codepoint (unsigned integer value) of the character read,
1392and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1393if lexing has reached the end of the input text. To non-destructively
1394examine the next character, use L</lex_peek_unichar> instead.
1395
1396If the next character is in (or extends into) the next chunk of input
1397text, the next chunk will be read in. Normally the current chunk will be
1398discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1399then the current chunk will not be discarded.
1400
1401If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1402is encountered, an exception is generated.
1403
1404=cut
1405*/
1406
1407I32
1408Perl_lex_read_unichar(pTHX_ U32 flags)
1409{
1410 I32 c;
1411 if (flags & ~(LEX_KEEP_PREVIOUS))
1412 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1413 c = lex_peek_unichar(flags);
1414 if (c != -1) {
1415 if (c == '\n')
1416 CopLINE_inc(PL_curcop);
d9018cbe
EB
1417 if (UTF)
1418 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1419 else
1420 ++(PL_parser->bufptr);
f0e67a1d
Z
1421 }
1422 return c;
1423}
1424
1425/*
1426=for apidoc Amx|void|lex_read_space|U32 flags
1427
1428Reads optional spaces, in Perl style, in the text currently being
1429lexed. The spaces may include ordinary whitespace characters and
1430Perl-style comments. C<#line> directives are processed if encountered.
1431L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1432at a non-space character (or the end of the input text).
1433
1434If spaces extend into the next chunk of input text, the next chunk will
1435be read in. Normally the current chunk will be discarded at the same
1436time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1437chunk will not be discarded.
1438
1439=cut
1440*/
1441
f0998909
Z
1442#define LEX_NO_NEXT_CHUNK 0x80000000
1443
f0e67a1d
Z
1444void
1445Perl_lex_read_space(pTHX_ U32 flags)
1446{
1447 char *s, *bufend;
1448 bool need_incline = 0;
f0998909 1449 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1450 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1451#ifdef PERL_MAD
1452 if (PL_skipwhite) {
1453 sv_free(PL_skipwhite);
1454 PL_skipwhite = NULL;
1455 }
1456 if (PL_madskills)
1457 PL_skipwhite = newSVpvs("");
1458#endif /* PERL_MAD */
1459 s = PL_parser->bufptr;
1460 bufend = PL_parser->bufend;
1461 while (1) {
1462 char c = *s;
1463 if (c == '#') {
1464 do {
1465 c = *++s;
1466 } while (!(c == '\n' || (c == 0 && s == bufend)));
1467 } else if (c == '\n') {
1468 s++;
1469 PL_parser->linestart = s;
1470 if (s == bufend)
1471 need_incline = 1;
1472 else
1473 incline(s);
1474 } else if (isSPACE(c)) {
1475 s++;
1476 } else if (c == 0 && s == bufend) {
1477 bool got_more;
1478#ifdef PERL_MAD
1479 if (PL_madskills)
1480 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1481#endif /* PERL_MAD */
f0998909
Z
1482 if (flags & LEX_NO_NEXT_CHUNK)
1483 break;
f0e67a1d
Z
1484 PL_parser->bufptr = s;
1485 CopLINE_inc(PL_curcop);
1486 got_more = lex_next_chunk(flags);
1487 CopLINE_dec(PL_curcop);
1488 s = PL_parser->bufptr;
1489 bufend = PL_parser->bufend;
1490 if (!got_more)
1491 break;
1492 if (need_incline && PL_parser->rsfp) {
1493 incline(s);
1494 need_incline = 0;
1495 }
1496 } else {
1497 break;
1498 }
1499 }
1500#ifdef PERL_MAD
1501 if (PL_madskills)
1502 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1503#endif /* PERL_MAD */
1504 PL_parser->bufptr = s;
1505}
1506
1507/*
ffb4593c
NT
1508 * S_incline
1509 * This subroutine has nothing to do with tilting, whether at windmills
1510 * or pinball tables. Its name is short for "increment line". It
57843af0 1511 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1512 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1513 * # line 500 "foo.pm"
1514 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1515 */
1516
76e3520e 1517STATIC void
d9095cec 1518S_incline(pTHX_ const char *s)
463ee0b2 1519{
97aff369 1520 dVAR;
d9095cec
NC
1521 const char *t;
1522 const char *n;
1523 const char *e;
8818d409 1524 line_t line_num;
463ee0b2 1525
7918f24d
NC
1526 PERL_ARGS_ASSERT_INCLINE;
1527
57843af0 1528 CopLINE_inc(PL_curcop);
463ee0b2
LW
1529 if (*s++ != '#')
1530 return;
d4c19fe8
AL
1531 while (SPACE_OR_TAB(*s))
1532 s++;
73659bf1
GS
1533 if (strnEQ(s, "line", 4))
1534 s += 4;
1535 else
1536 return;
084592ab 1537 if (SPACE_OR_TAB(*s))
73659bf1 1538 s++;
4e553d73 1539 else
73659bf1 1540 return;
d4c19fe8
AL
1541 while (SPACE_OR_TAB(*s))
1542 s++;
463ee0b2
LW
1543 if (!isDIGIT(*s))
1544 return;
d4c19fe8 1545
463ee0b2
LW
1546 n = s;
1547 while (isDIGIT(*s))
1548 s++;
07714eb4 1549 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1550 return;
bf4acbe4 1551 while (SPACE_OR_TAB(*s))
463ee0b2 1552 s++;
73659bf1 1553 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1554 s++;
73659bf1
GS
1555 e = t + 1;
1556 }
463ee0b2 1557 else {
c35e046a
AL
1558 t = s;
1559 while (!isSPACE(*t))
1560 t++;
73659bf1 1561 e = t;
463ee0b2 1562 }
bf4acbe4 1563 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1564 e++;
1565 if (*e != '\n' && *e != '\0')
1566 return; /* false alarm */
1567
8818d409
FC
1568 line_num = atoi(n)-1;
1569
f4dd75d9 1570 if (t - s > 0) {
d9095cec 1571 const STRLEN len = t - s;
19bad673
NC
1572 SV *const temp_sv = CopFILESV(PL_curcop);
1573 const char *cf;
1574 STRLEN tmplen;
1575
1576 if (temp_sv) {
1577 cf = SvPVX(temp_sv);
1578 tmplen = SvCUR(temp_sv);
1579 } else {
1580 cf = NULL;
1581 tmplen = 0;
1582 }
1583
d1299d44 1584 if (!PL_rsfp && !PL_parser->filtered) {
e66cf94c
RGS
1585 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1586 * to *{"::_<newfilename"} */
44867030
NC
1587 /* However, the long form of evals is only turned on by the
1588 debugger - usually they're "(eval %lu)" */
1589 char smallbuf[128];
1590 char *tmpbuf;
1591 GV **gvp;
d9095cec 1592 STRLEN tmplen2 = len;
798b63bc 1593 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1594 tmpbuf = smallbuf;
1595 else
2ae0db35 1596 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1597 tmpbuf[0] = '_';
1598 tmpbuf[1] = '<';
2ae0db35 1599 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1600 tmplen += 2;
8a5ee598
RGS
1601 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1602 if (gvp) {
44867030
NC
1603 char *tmpbuf2;
1604 GV *gv2;
1605
1606 if (tmplen2 + 2 <= sizeof smallbuf)
1607 tmpbuf2 = smallbuf;
1608 else
1609 Newx(tmpbuf2, tmplen2 + 2, char);
1610
1611 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1612 /* Either they malloc'd it, or we malloc'd it,
1613 so no prefix is present in ours. */
1614 tmpbuf2[0] = '_';
1615 tmpbuf2[1] = '<';
1616 }
1617
1618 memcpy(tmpbuf2 + 2, s, tmplen2);
1619 tmplen2 += 2;
1620
8a5ee598 1621 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1622 if (!isGV(gv2)) {
8a5ee598 1623 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1624 /* adjust ${"::_<newfilename"} to store the new file name */
1625 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1626 /* The line number may differ. If that is the case,
1627 alias the saved lines that are in the array.
1628 Otherwise alias the whole array. */
1629 if (CopLINE(PL_curcop) == line_num) {
1630 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1631 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1632 }
1633 else if (GvAV(*gvp)) {
1634 AV * const av = GvAV(*gvp);
1635 const I32 start = CopLINE(PL_curcop)+1;
1636 I32 items = AvFILLp(av) - start;
1637 if (items > 0) {
1638 AV * const av2 = GvAVn(gv2);
1639 SV **svp = AvARRAY(av) + start;
1640 I32 l = (I32)line_num+1;
1641 while (items--)
1642 av_store(av2, l++, SvREFCNT_inc(*svp++));
1643 }
1644 }
e5527e4b 1645 }
44867030
NC
1646
1647 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1648 }
e66cf94c 1649 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1650 }
05ec9bb3 1651 CopFILE_free(PL_curcop);
d9095cec 1652 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1653 }
8818d409 1654 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1655}
1656
29595ff2 1657#ifdef PERL_MAD
cd81e915 1658/* skip space before PL_thistoken */
29595ff2
NC
1659
1660STATIC char *
1661S_skipspace0(pTHX_ register char *s)
1662{
7918f24d
NC
1663 PERL_ARGS_ASSERT_SKIPSPACE0;
1664
29595ff2
NC
1665 s = skipspace(s);
1666 if (!PL_madskills)
1667 return s;
cd81e915
NC
1668 if (PL_skipwhite) {
1669 if (!PL_thiswhite)
6b29d1f5 1670 PL_thiswhite = newSVpvs("");
cd81e915
NC
1671 sv_catsv(PL_thiswhite, PL_skipwhite);
1672 sv_free(PL_skipwhite);
1673 PL_skipwhite = 0;
1674 }
1675 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1676 return s;
1677}
1678
cd81e915 1679/* skip space after PL_thistoken */
29595ff2
NC
1680
1681STATIC char *
1682S_skipspace1(pTHX_ register char *s)
1683{
d4c19fe8 1684 const char *start = s;
29595ff2
NC
1685 I32 startoff = start - SvPVX(PL_linestr);
1686
7918f24d
NC
1687 PERL_ARGS_ASSERT_SKIPSPACE1;
1688
29595ff2
NC
1689 s = skipspace(s);
1690 if (!PL_madskills)
1691 return s;
1692 start = SvPVX(PL_linestr) + startoff;
cd81e915 1693 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1694 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1695 PL_thistoken = newSVpvn(tstart, start - tstart);
1696 }
1697 PL_realtokenstart = -1;
1698 if (PL_skipwhite) {
1699 if (!PL_nextwhite)
6b29d1f5 1700 PL_nextwhite = newSVpvs("");
cd81e915
NC
1701 sv_catsv(PL_nextwhite, PL_skipwhite);
1702 sv_free(PL_skipwhite);
1703 PL_skipwhite = 0;
29595ff2
NC
1704 }
1705 return s;
1706}
1707
1708STATIC char *
1709S_skipspace2(pTHX_ register char *s, SV **svp)
1710{
c35e046a
AL
1711 char *start;
1712 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1713 const I32 startoff = s - SvPVX(PL_linestr);
1714
7918f24d
NC
1715 PERL_ARGS_ASSERT_SKIPSPACE2;
1716
29595ff2
NC
1717 s = skipspace(s);
1718 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1719 if (!PL_madskills || !svp)
1720 return s;
1721 start = SvPVX(PL_linestr) + startoff;
cd81e915 1722 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1723 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1724 PL_thistoken = newSVpvn(tstart, start - tstart);
1725 PL_realtokenstart = -1;
29595ff2 1726 }
cd81e915 1727 if (PL_skipwhite) {
29595ff2 1728 if (!*svp)
6b29d1f5 1729 *svp = newSVpvs("");
cd81e915
NC
1730 sv_setsv(*svp, PL_skipwhite);
1731 sv_free(PL_skipwhite);
1732 PL_skipwhite = 0;
29595ff2
NC
1733 }
1734
1735 return s;
1736}
1737#endif
1738
80a702cd 1739STATIC void
15f169a1 1740S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1741{
1742 AV *av = CopFILEAVx(PL_curcop);
1743 if (av) {
b9f83d2f 1744 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1745 if (orig_sv)
1746 sv_setsv(sv, orig_sv);
1747 else
1748 sv_setpvn(sv, buf, len);
80a702cd
RGS
1749 (void)SvIOK_on(sv);
1750 SvIV_set(sv, 0);
1751 av_store(av, (I32)CopLINE(PL_curcop), sv);
1752 }
1753}
1754
ffb4593c
NT
1755/*
1756 * S_skipspace
1757 * Called to gobble the appropriate amount and type of whitespace.
1758 * Skips comments as well.
1759 */
1760
76e3520e 1761STATIC char *
cea2e8a9 1762S_skipspace(pTHX_ register char *s)
a687059c 1763{
5db06880 1764#ifdef PERL_MAD
f0e67a1d
Z
1765 char *start = s;
1766#endif /* PERL_MAD */
7918f24d 1767 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1768#ifdef PERL_MAD
cd81e915
NC
1769 if (PL_skipwhite) {
1770 sv_free(PL_skipwhite);
f0e67a1d 1771 PL_skipwhite = NULL;
5db06880 1772 }
f0e67a1d 1773#endif /* PERL_MAD */
3280af22 1774 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1775 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1776 s++;
f0e67a1d
Z
1777 } else {
1778 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1779 PL_bufptr = s;
f0998909
Z
1780 lex_read_space(LEX_KEEP_PREVIOUS |
1781 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1782 LEX_NO_NEXT_CHUNK : 0));
3280af22 1783 s = PL_bufptr;
f0e67a1d
Z
1784 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1785 if (PL_linestart > PL_bufptr)
1786 PL_bufptr = PL_linestart;
1787 return s;
463ee0b2 1788 }
5db06880 1789#ifdef PERL_MAD
f0e67a1d
Z
1790 if (PL_madskills)
1791 PL_skipwhite = newSVpvn(start, s-start);
1792#endif /* PERL_MAD */
5db06880 1793 return s;
a687059c 1794}
378cc40b 1795
ffb4593c
NT
1796/*
1797 * S_check_uni
1798 * Check the unary operators to ensure there's no ambiguity in how they're
1799 * used. An ambiguous piece of code would be:
1800 * rand + 5
1801 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1802 * the +5 is its argument.
1803 */
1804
76e3520e 1805STATIC void
cea2e8a9 1806S_check_uni(pTHX)
ba106d47 1807{
97aff369 1808 dVAR;
d4c19fe8
AL
1809 const char *s;
1810 const char *t;
2f3197b3 1811
3280af22 1812 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1813 return;
3280af22
NIS
1814 while (isSPACE(*PL_last_uni))
1815 PL_last_uni++;
c35e046a
AL
1816 s = PL_last_uni;
1817 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1818 s++;
3280af22 1819 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1820 return;
6136c704 1821
9b387841
NC
1822 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1823 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1824 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1825}
1826
ffb4593c
NT
1827/*
1828 * LOP : macro to build a list operator. Its behaviour has been replaced
1829 * with a subroutine, S_lop() for which LOP is just another name.
1830 */
1831
a0d0e21e
LW
1832#define LOP(f,x) return lop(f,x,s)
1833
ffb4593c
NT
1834/*
1835 * S_lop
1836 * Build a list operator (or something that might be one). The rules:
1837 * - if we have a next token, then it's a list operator [why?]
1838 * - if the next thing is an opening paren, then it's a function
1839 * - else it's a list operator
1840 */
1841
76e3520e 1842STATIC I32
a0be28da 1843S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1844{
97aff369 1845 dVAR;
7918f24d
NC
1846
1847 PERL_ARGS_ASSERT_LOP;
1848
6154021b 1849 pl_yylval.ival = f;
35c8bce7 1850 CLINE;
3280af22
NIS
1851 PL_expect = x;
1852 PL_bufptr = s;
1853 PL_last_lop = PL_oldbufptr;
eb160463 1854 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1855#ifdef PERL_MAD
1856 if (PL_lasttoke)
78cdf107 1857 goto lstop;
5db06880 1858#else
3280af22 1859 if (PL_nexttoke)
78cdf107 1860 goto lstop;
5db06880 1861#endif
79072805 1862 if (*s == '(')
bbf60fe6 1863 return REPORT(FUNC);
29595ff2 1864 s = PEEKSPACE(s);
79072805 1865 if (*s == '(')
bbf60fe6 1866 return REPORT(FUNC);
78cdf107
Z
1867 else {
1868 lstop:
1869 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1870 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1871 return REPORT(LSTOP);
78cdf107 1872 }
79072805
LW
1873}
1874
5db06880
NC
1875#ifdef PERL_MAD
1876 /*
1877 * S_start_force
1878 * Sets up for an eventual force_next(). start_force(0) basically does
1879 * an unshift, while start_force(-1) does a push. yylex removes items
1880 * on the "pop" end.
1881 */
1882
1883STATIC void
1884S_start_force(pTHX_ int where)
1885{
1886 int i;
1887
cd81e915 1888 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1889 where = PL_lasttoke;
cd81e915
NC
1890 assert(PL_curforce < 0 || PL_curforce == where);
1891 if (PL_curforce != where) {
5db06880
NC
1892 for (i = PL_lasttoke; i > where; --i) {
1893 PL_nexttoke[i] = PL_nexttoke[i-1];
1894 }
1895 PL_lasttoke++;
1896 }
cd81e915 1897 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1898 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1899 PL_curforce = where;
1900 if (PL_nextwhite) {
5db06880 1901 if (PL_madskills)
6b29d1f5 1902 curmad('^', newSVpvs(""));
cd81e915 1903 CURMAD('_', PL_nextwhite);
5db06880
NC
1904 }
1905}
1906
1907STATIC void
1908S_curmad(pTHX_ char slot, SV *sv)
1909{
1910 MADPROP **where;
1911
1912 if (!sv)
1913 return;
cd81e915
NC
1914 if (PL_curforce < 0)
1915 where = &PL_thismad;
5db06880 1916 else
cd81e915 1917 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1918
cd81e915 1919 if (PL_faketokens)
76f68e9b 1920 sv_setpvs(sv, "");
5db06880
NC
1921 else {
1922 if (!IN_BYTES) {
1923 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1924 SvUTF8_on(sv);
1925 else if (PL_encoding) {
1926 sv_recode_to_utf8(sv, PL_encoding);
1927 }
1928 }
1929 }
1930
1931 /* keep a slot open for the head of the list? */
1932 if (slot != '_' && *where && (*where)->mad_key == '^') {
1933 (*where)->mad_key = slot;
daba3364 1934 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1935 (*where)->mad_val = (void*)sv;
1936 }
1937 else
1938 addmad(newMADsv(slot, sv), where, 0);
1939}
1940#else
b3f24c00
MHM
1941# define start_force(where) NOOP
1942# define curmad(slot, sv) NOOP
5db06880
NC
1943#endif
1944
ffb4593c
NT
1945/*
1946 * S_force_next
9cbb5ea2 1947 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1948 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1949 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1950 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1951 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1952 */
1953
4e553d73 1954STATIC void
cea2e8a9 1955S_force_next(pTHX_ I32 type)
79072805 1956{
97aff369 1957 dVAR;
704d4215
GG
1958#ifdef DEBUGGING
1959 if (DEBUG_T_TEST) {
1960 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1961 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1962 }
1963#endif
5db06880 1964#ifdef PERL_MAD
cd81e915 1965 if (PL_curforce < 0)
5db06880 1966 start_force(PL_lasttoke);
cd81e915 1967 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1968 if (PL_lex_state != LEX_KNOWNEXT)
1969 PL_lex_defer = PL_lex_state;
1970 PL_lex_state = LEX_KNOWNEXT;
1971 PL_lex_expect = PL_expect;
cd81e915 1972 PL_curforce = -1;
5db06880 1973#else
3280af22
NIS
1974 PL_nexttype[PL_nexttoke] = type;
1975 PL_nexttoke++;
1976 if (PL_lex_state != LEX_KNOWNEXT) {
1977 PL_lex_defer = PL_lex_state;
1978 PL_lex_expect = PL_expect;
1979 PL_lex_state = LEX_KNOWNEXT;
79072805 1980 }
5db06880 1981#endif
79072805
LW
1982}
1983
28ac2b49
Z
1984void
1985Perl_yyunlex(pTHX)
1986{
a7aaec61
Z
1987 int yyc = PL_parser->yychar;
1988 if (yyc != YYEMPTY) {
1989 if (yyc) {
1990 start_force(-1);
1991 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1992 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1993 PL_lex_allbrackets--;
a7aaec61 1994 PL_lex_brackets--;
78cdf107
Z
1995 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1996 } else if (yyc == '('/*)*/) {
1997 PL_lex_allbrackets--;
1998 yyc |= (2<<24);
a7aaec61
Z
1999 }
2000 force_next(yyc);
2001 }
28ac2b49
Z
2002 PL_parser->yychar = YYEMPTY;
2003 }
2004}
2005
d0a148a6 2006STATIC SV *
15f169a1 2007S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2008{
97aff369 2009 dVAR;
740cce10 2010 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2011 !IN_BYTES
2012 && UTF
2013 && !is_ascii_string((const U8*)start, len)
740cce10 2014 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2015 return sv;
2016}
2017
ffb4593c
NT
2018/*
2019 * S_force_word
2020 * When the lexer knows the next thing is a word (for instance, it has
2021 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2022 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2023 * lookahead.
ffb4593c
NT
2024 *
2025 * Arguments:
b1b65b59 2026 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2027 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2028 * int check_keyword : if true, Perl checks to make sure the word isn't
2029 * a keyword (do this if the word is a label, e.g. goto FOO)
2030 * int allow_pack : if true, : characters will also be allowed (require,
2031 * use, etc. do this)
9cbb5ea2 2032 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2033 */
2034
76e3520e 2035STATIC char *
cea2e8a9 2036S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2037{
97aff369 2038 dVAR;
463ee0b2
LW
2039 register char *s;
2040 STRLEN len;
4e553d73 2041
7918f24d
NC
2042 PERL_ARGS_ASSERT_FORCE_WORD;
2043
29595ff2 2044 start = SKIPSPACE1(start);
463ee0b2 2045 s = start;
7e2040f0 2046 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2047 (allow_pack && *s == ':') ||
15f0808c 2048 (allow_initial_tick && *s == '\'') )
a0d0e21e 2049 {
3280af22 2050 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2051 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2052 return start;
cd81e915 2053 start_force(PL_curforce);
5db06880
NC
2054 if (PL_madskills)
2055 curmad('X', newSVpvn(start,s-start));
463ee0b2 2056 if (token == METHOD) {
29595ff2 2057 s = SKIPSPACE1(s);
463ee0b2 2058 if (*s == '(')
3280af22 2059 PL_expect = XTERM;
463ee0b2 2060 else {
3280af22 2061 PL_expect = XOPERATOR;
463ee0b2 2062 }
79072805 2063 }
e74e6b3d 2064 if (PL_madskills)
63575281 2065 curmad('g', newSVpvs( "forced" ));
9ded7720 2066 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2067 = (OP*)newSVOP(OP_CONST,0,
2068 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2069 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2070 force_next(token);
2071 }
2072 return s;
2073}
2074
ffb4593c
NT
2075/*
2076 * S_force_ident
9cbb5ea2 2077 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2078 * text only contains the "foo" portion. The first argument is a pointer
2079 * to the "foo", and the second argument is the type symbol to prefix.
2080 * Forces the next token to be a "WORD".
9cbb5ea2 2081 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2082 */
2083
76e3520e 2084STATIC void
bfed75c6 2085S_force_ident(pTHX_ register const char *s, int kind)
79072805 2086{
97aff369 2087 dVAR;
7918f24d
NC
2088
2089 PERL_ARGS_ASSERT_FORCE_IDENT;
2090
c35e046a 2091 if (*s) {
90e5519e 2092 const STRLEN len = strlen(s);
728847b1
BF
2093 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2094 UTF ? SVf_UTF8 : 0));
cd81e915 2095 start_force(PL_curforce);
9ded7720 2096 NEXTVAL_NEXTTOKE.opval = o;
79072805 2097 force_next(WORD);
748a9306 2098 if (kind) {
11343788 2099 o->op_private = OPpCONST_ENTERED;
55497cff 2100 /* XXX see note in pp_entereval() for why we forgo typo
2101 warnings if the symbol must be introduced in an eval.
2102 GSAR 96-10-12 */
90e5519e 2103 gv_fetchpvn_flags(s, len,
728847b1
BF
2104 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2105 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2106 kind == '$' ? SVt_PV :
2107 kind == '@' ? SVt_PVAV :
2108 kind == '%' ? SVt_PVHV :
a0d0e21e 2109 SVt_PVGV
90e5519e 2110 );
748a9306 2111 }
79072805
LW
2112 }
2113}
2114
1571675a
GS
2115NV
2116Perl_str_to_version(pTHX_ SV *sv)
2117{
2118 NV retval = 0.0;
2119 NV nshift = 1.0;
2120 STRLEN len;
cfd0369c 2121 const char *start = SvPV_const(sv,len);
9d4ba2ae 2122 const char * const end = start + len;
504618e9 2123 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2124
2125 PERL_ARGS_ASSERT_STR_TO_VERSION;
2126
1571675a 2127 while (start < end) {
ba210ebe 2128 STRLEN skip;
1571675a
GS
2129 UV n;
2130 if (utf)
9041c2e3 2131 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2132 else {
2133 n = *(U8*)start;
2134 skip = 1;
2135 }
2136 retval += ((NV)n)/nshift;
2137 start += skip;
2138 nshift *= 1000;
2139 }
2140 return retval;
2141}
2142
4e553d73 2143/*
ffb4593c
NT
2144 * S_force_version
2145 * Forces the next token to be a version number.
e759cc13
RGS
2146 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2147 * and if "guessing" is TRUE, then no new token is created (and the caller
2148 * must use an alternative parsing method).
ffb4593c
NT
2149 */
2150
76e3520e 2151STATIC char *
e759cc13 2152S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2153{
97aff369 2154 dVAR;
5f66b61c 2155 OP *version = NULL;
44dcb63b 2156 char *d;
5db06880
NC
2157#ifdef PERL_MAD
2158 I32 startoff = s - SvPVX(PL_linestr);
2159#endif
89bfa8cd 2160
7918f24d
NC
2161 PERL_ARGS_ASSERT_FORCE_VERSION;
2162
29595ff2 2163 s = SKIPSPACE1(s);
89bfa8cd 2164
44dcb63b 2165 d = s;
dd629d5b 2166 if (*d == 'v')
44dcb63b 2167 d++;
44dcb63b 2168 if (isDIGIT(*d)) {
e759cc13
RGS
2169 while (isDIGIT(*d) || *d == '_' || *d == '.')
2170 d++;
5db06880
NC
2171#ifdef PERL_MAD
2172 if (PL_madskills) {
cd81e915 2173 start_force(PL_curforce);
5db06880
NC
2174 curmad('X', newSVpvn(s,d-s));
2175 }
2176#endif
4e4da3ac 2177 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2178 SV *ver;
8d08d9ba
DG
2179#ifdef USE_LOCALE_NUMERIC
2180 char *loc = setlocale(LC_NUMERIC, "C");
2181#endif
6154021b 2182 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2183#ifdef USE_LOCALE_NUMERIC
2184 setlocale(LC_NUMERIC, loc);
2185#endif
6154021b 2186 version = pl_yylval.opval;
dd629d5b
GS
2187 ver = cSVOPx(version)->op_sv;
2188 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2189 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2190 SvNV_set(ver, str_to_version(ver));
1571675a 2191 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2192 }
89bfa8cd 2193 }
5db06880
NC
2194 else if (guessing) {
2195#ifdef PERL_MAD
2196 if (PL_madskills) {
cd81e915
NC
2197 sv_free(PL_nextwhite); /* let next token collect whitespace */
2198 PL_nextwhite = 0;
5db06880
NC
2199 s = SvPVX(PL_linestr) + startoff;
2200 }
2201#endif
e759cc13 2202 return s;
5db06880 2203 }
89bfa8cd 2204 }
2205
5db06880
NC
2206#ifdef PERL_MAD
2207 if (PL_madskills && !version) {
cd81e915
NC
2208 sv_free(PL_nextwhite); /* let next token collect whitespace */
2209 PL_nextwhite = 0;
5db06880
NC
2210 s = SvPVX(PL_linestr) + startoff;
2211 }
2212#endif
89bfa8cd 2213 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2214 start_force(PL_curforce);
9ded7720 2215 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2216 force_next(WORD);
89bfa8cd 2217
e759cc13 2218 return s;
89bfa8cd 2219}
2220
ffb4593c 2221/*
91152fc1
DG
2222 * S_force_strict_version
2223 * Forces the next token to be a version number using strict syntax rules.
2224 */
2225
2226STATIC char *
2227S_force_strict_version(pTHX_ char *s)
2228{
2229 dVAR;
2230 OP *version = NULL;
2231#ifdef PERL_MAD
2232 I32 startoff = s - SvPVX(PL_linestr);
2233#endif
2234 const char *errstr = NULL;
2235
2236 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2237
2238 while (isSPACE(*s)) /* leading whitespace */
2239 s++;
2240
2241 if (is_STRICT_VERSION(s,&errstr)) {
2242 SV *ver = newSV(0);
2243 s = (char *)scan_version(s, ver, 0);
2244 version = newSVOP(OP_CONST, 0, ver);
2245 }
4e4da3ac
Z
2246 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2247 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2248 {
91152fc1
DG
2249 PL_bufptr = s;
2250 if (errstr)
2251 yyerror(errstr); /* version required */
2252 return s;
2253 }
2254
2255#ifdef PERL_MAD
2256 if (PL_madskills && !version) {
2257 sv_free(PL_nextwhite); /* let next token collect whitespace */
2258 PL_nextwhite = 0;
2259 s = SvPVX(PL_linestr) + startoff;
2260 }
2261#endif
2262 /* NOTE: The parser sees the package name and the VERSION swapped */
2263 start_force(PL_curforce);
2264 NEXTVAL_NEXTTOKE.opval = version;
2265 force_next(WORD);
2266
2267 return s;
2268}
2269
2270/*
ffb4593c
NT
2271 * S_tokeq
2272 * Tokenize a quoted string passed in as an SV. It finds the next
2273 * chunk, up to end of string or a backslash. It may make a new
2274 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2275 * turns \\ into \.
2276 */
2277
76e3520e 2278STATIC SV *
cea2e8a9 2279S_tokeq(pTHX_ SV *sv)
79072805 2280{
97aff369 2281 dVAR;
79072805
LW
2282 register char *s;
2283 register char *send;
2284 register char *d;
b3ac6de7
IZ
2285 STRLEN len = 0;
2286 SV *pv = sv;
79072805 2287
7918f24d
NC
2288 PERL_ARGS_ASSERT_TOKEQ;
2289
79072805 2290 if (!SvLEN(sv))
b3ac6de7 2291 goto finish;
79072805 2292
a0d0e21e 2293 s = SvPV_force(sv, len);
21a311ee 2294 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2295 goto finish;
463ee0b2 2296 send = s + len;
dcb21ed6
NC
2297 /* This is relying on the SV being "well formed" with a trailing '\0' */
2298 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2299 s++;
2300 if (s == send)
b3ac6de7 2301 goto finish;
79072805 2302 d = s;
be4731d2 2303 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2304 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2305 }
79072805
LW
2306 while (s < send) {
2307 if (*s == '\\') {
a0d0e21e 2308 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2309 s++; /* all that, just for this */
2310 }
2311 *d++ = *s++;
2312 }
2313 *d = '\0';
95a20fc0 2314 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2315 finish:
3280af22 2316 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2317 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2318 return sv;
2319}
2320
ffb4593c
NT
2321/*
2322 * Now come three functions related to double-quote context,
2323 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2324 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2325 * interact with PL_lex_state, and create fake ( ... ) argument lists
2326 * to handle functions and concatenation.
2327 * They assume that whoever calls them will be setting up a fake
2328 * join call, because each subthing puts a ',' after it. This lets
2329 * "lower \luPpEr"
2330 * become
2331 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2332 *
2333 * (I'm not sure whether the spurious commas at the end of lcfirst's
2334 * arguments and join's arguments are created or not).
2335 */
2336
2337/*
2338 * S_sublex_start
6154021b 2339 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2340 *
2341 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2342 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2343 *
2344 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2345 *
2346 * Everything else becomes a FUNC.
2347 *
2348 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2349 * had an OP_CONST or OP_READLINE). This just sets us up for a
2350 * call to S_sublex_push().
2351 */
2352
76e3520e 2353STATIC I32
cea2e8a9 2354S_sublex_start(pTHX)
79072805 2355{
97aff369 2356 dVAR;
6154021b 2357 register const I32 op_type = pl_yylval.ival;
79072805
LW
2358
2359 if (op_type == OP_NULL) {
6154021b 2360 pl_yylval.opval = PL_lex_op;
5f66b61c 2361 PL_lex_op = NULL;
79072805
LW
2362 return THING;
2363 }
2364 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2365 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2366
2367 if (SvTYPE(sv) == SVt_PVIV) {
2368 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2369 STRLEN len;
96a5add6 2370 const char * const p = SvPV_const(sv, len);
740cce10 2371 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2372 SvREFCNT_dec(sv);
2373 sv = nsv;
4e553d73 2374 }
6154021b 2375 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2376 PL_lex_stuff = NULL;
6f33ba73
RGS
2377 /* Allow <FH> // "foo" */
2378 if (op_type == OP_READLINE)
2379 PL_expect = XTERMORDORDOR;
79072805
LW
2380 return THING;
2381 }
e3f73d4e
RGS
2382 else if (op_type == OP_BACKTICK && PL_lex_op) {
2383 /* readpipe() vas overriden */
2384 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2385 pl_yylval.opval = PL_lex_op;
9b201d7d 2386 PL_lex_op = NULL;
e3f73d4e
RGS
2387 PL_lex_stuff = NULL;
2388 return THING;
2389 }
79072805 2390
3280af22 2391 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2392 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2393 PL_sublex_info.sub_op = PL_lex_op;
2394 PL_lex_state = LEX_INTERPPUSH;
55497cff 2395
3280af22
NIS
2396 PL_expect = XTERM;
2397 if (PL_lex_op) {
6154021b 2398 pl_yylval.opval = PL_lex_op;
5f66b61c 2399 PL_lex_op = NULL;
55497cff 2400 return PMFUNC;
2401 }
2402 else
2403 return FUNC;
2404}
2405
ffb4593c
NT
2406/*
2407 * S_sublex_push
2408 * Create a new scope to save the lexing state. The scope will be
2409 * ended in S_sublex_done. Returns a '(', starting the function arguments
2410 * to the uc, lc, etc. found before.
2411 * Sets PL_lex_state to LEX_INTERPCONCAT.
2412 */
2413
76e3520e 2414STATIC I32
cea2e8a9 2415S_sublex_push(pTHX)
55497cff 2416{
27da23d5 2417 dVAR;
f46d017c 2418 ENTER;
55497cff 2419
3280af22 2420 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2421 SAVEBOOL(PL_lex_dojoin);
3280af22 2422 SAVEI32(PL_lex_brackets);
78cdf107
Z
2423 SAVEI32(PL_lex_allbrackets);
2424 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2425 SAVEI32(PL_lex_casemods);
2426 SAVEI32(PL_lex_starts);
651b5b28 2427 SAVEI8(PL_lex_state);
7766f137 2428 SAVEVPTR(PL_lex_inpat);
98246f1e 2429 SAVEI16(PL_lex_inwhat);
57843af0 2430 SAVECOPLINE(PL_curcop);
3280af22 2431 SAVEPPTR(PL_bufptr);
8452ff4b 2432 SAVEPPTR(PL_bufend);
3280af22
NIS
2433 SAVEPPTR(PL_oldbufptr);
2434 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2435 SAVEPPTR(PL_last_lop);
2436 SAVEPPTR(PL_last_uni);
3280af22
NIS
2437 SAVEPPTR(PL_linestart);
2438 SAVESPTR(PL_linestr);
8edd5f42
RGS
2439 SAVEGENERICPV(PL_lex_brackstack);
2440 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2441
2442 PL_linestr = PL_lex_stuff;
a0714e2c 2443 PL_lex_stuff = NULL;
3280af22 2444
9cbb5ea2
GS
2445 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2446 = SvPVX(PL_linestr);
3280af22 2447 PL_bufend += SvCUR(PL_linestr);
bd61b366 2448 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2449 SAVEFREESV(PL_linestr);
2450
2451 PL_lex_dojoin = FALSE;
2452 PL_lex_brackets = 0;
78cdf107
Z
2453 PL_lex_allbrackets = 0;
2454 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2455 Newx(PL_lex_brackstack, 120, char);
2456 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2457 PL_lex_casemods = 0;
2458 *PL_lex_casestack = '\0';
2459 PL_lex_starts = 0;
2460 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2461 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2462
2463 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2464 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2465 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2466 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2467 else
5f66b61c 2468 PL_lex_inpat = NULL;
79072805 2469
55497cff 2470 return '(';
79072805
LW
2471}
2472
ffb4593c
NT
2473/*
2474 * S_sublex_done
2475 * Restores lexer state after a S_sublex_push.
2476 */
2477
76e3520e 2478STATIC I32
cea2e8a9 2479S_sublex_done(pTHX)
79072805 2480{
27da23d5 2481 dVAR;
3280af22 2482 if (!PL_lex_starts++) {
396482e1 2483 SV * const sv = newSVpvs("");
9aa983d2
JH
2484 if (SvUTF8(PL_linestr))
2485 SvUTF8_on(sv);
3280af22 2486 PL_expect = XOPERATOR;
6154021b 2487 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2488 return THING;
2489 }
2490
3280af22
NIS
2491 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2492 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2493 return yylex();
79072805
LW
2494 }
2495
ffb4593c 2496 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2497 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2498 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2499 PL_linestr = PL_lex_repl;
2500 PL_lex_inpat = 0;
2501 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2502 PL_bufend += SvCUR(PL_linestr);
bd61b366 2503 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2504 SAVEFREESV(PL_linestr);
2505 PL_lex_dojoin = FALSE;
2506 PL_lex_brackets = 0;
78cdf107
Z
2507 PL_lex_allbrackets = 0;
2508 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2509 PL_lex_casemods = 0;
2510 *PL_lex_casestack = '\0';
2511 PL_lex_starts = 0;
25da4f38 2512 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2513 PL_lex_state = LEX_INTERPNORMAL;
2514 PL_lex_starts++;
e9fa98b2
HS
2515 /* we don't clear PL_lex_repl here, so that we can check later
2516 whether this is an evalled subst; that means we rely on the
2517 logic to ensure sublex_done() is called again only via the
2518 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2519 }
e9fa98b2 2520 else {
3280af22 2521 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2522 PL_lex_repl = NULL;
e9fa98b2 2523 }
79072805 2524 return ',';
ffed7fef
LW
2525 }
2526 else {
5db06880
NC
2527#ifdef PERL_MAD
2528 if (PL_madskills) {
cd81e915
NC
2529 if (PL_thiswhite) {
2530 if (!PL_endwhite)
6b29d1f5 2531 PL_endwhite = newSVpvs("");
cd81e915
NC
2532 sv_catsv(PL_endwhite, PL_thiswhite);
2533 PL_thiswhite = 0;
2534 }
2535 if (PL_thistoken)
76f68e9b 2536 sv_setpvs(PL_thistoken,"");
5db06880 2537 else
cd81e915 2538 PL_realtokenstart = -1;
5db06880
NC
2539 }
2540#endif
f46d017c 2541 LEAVE;
3280af22
NIS
2542 PL_bufend = SvPVX(PL_linestr);
2543 PL_bufend += SvCUR(PL_linestr);
2544 PL_expect = XOPERATOR;
09bef843 2545 PL_sublex_info.sub_inwhat = 0;
79072805 2546 return ')';
ffed7fef
LW
2547 }
2548}
2549
02aa26ce
NT
2550/*
2551 scan_const
2552
2553 Extracts a pattern, double-quoted string, or transliteration. This
2554 is terrifying code.
2555
94def140 2556 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2557 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2558 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2559
94def140
TS
2560 Returns a pointer to the character scanned up to. If this is
2561 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2562 successfully parsed), will leave an OP for the substring scanned
6154021b 2563 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2564 by looking at the next characters herself.
2565
02aa26ce
NT
2566 In patterns:
2567 backslashes:
ff3f963a 2568 constants: \N{NAME} only
02aa26ce
NT
2569 case and quoting: \U \Q \E
2570 stops on @ and $, but not for $ as tail anchor
2571
2572 In transliterations:
2573 characters are VERY literal, except for - not at the start or end
94def140
TS
2574 of the string, which indicates a range. If the range is in bytes,
2575 scan_const expands the range to the full set of intermediate
2576 characters. If the range is in utf8, the hyphen is replaced with
2577 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2578
2579 In double-quoted strings:
2580 backslashes:
2581 double-quoted style: \r and \n
ff3f963a 2582 constants: \x31, etc.
94def140 2583 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2584 case and quoting: \U \Q \E
2585 stops on @ and $
2586
2587 scan_const does *not* construct ops to handle interpolated strings.
2588 It stops processing as soon as it finds an embedded $ or @ variable
2589 and leaves it to the caller to work out what's going on.
2590
94def140
TS
2591 embedded arrays (whether in pattern or not) could be:
2592 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2593
2594 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2595
2596 $ in pattern could be $foo or could be tail anchor. Assumption:
2597 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2598 followed by one of "()| \r\n\t"
02aa26ce
NT
2599
2600 \1 (backreferences) are turned into $1
2601
2602 The structure of the code is
2603 while (there's a character to process) {
94def140
TS
2604 handle transliteration ranges
2605 skip regexp comments /(?#comment)/ and codes /(?{code})/
2606 skip #-initiated comments in //x patterns
2607 check for embedded arrays
02aa26ce
NT
2608 check for embedded scalars
2609 if (backslash) {
94def140 2610 deprecate \1 in substitution replacements
02aa26ce
NT
2611 handle string-changing backslashes \l \U \Q \E, etc.
2612 switch (what was escaped) {
94def140 2613 handle \- in a transliteration (becomes a literal -)
ff3f963a 2614 if a pattern and not \N{, go treat as regular character
94def140
TS
2615 handle \132 (octal characters)
2616 handle \x15 and \x{1234} (hex characters)
ff3f963a 2617 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2618 handle \cV (control characters)
2619 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2620 } (end switch)
77a135fe 2621 continue
02aa26ce 2622 } (end if backslash)
77a135fe 2623 handle regular character
02aa26ce 2624 } (end while character to read)
4e553d73 2625
02aa26ce
NT
2626*/
2627
76e3520e 2628STATIC char *
cea2e8a9 2629S_scan_const(pTHX_ char *start)
79072805 2630{
97aff369 2631 dVAR;
3280af22 2632 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2633 SV *sv = newSV(send - start); /* sv for the constant. See
2634 note below on sizing. */
02aa26ce
NT
2635 register char *s = start; /* start of the constant */
2636 register char *d = SvPVX(sv); /* destination for copies */
2637 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2638 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2639 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2640 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2641 to be UTF8? But, this can
2642 show as true when the source
2643 isn't utf8, as for example
2644 when it is entirely composed
2645 of hex constants */
2646
2647 /* Note on sizing: The scanned constant is placed into sv, which is
2648 * initialized by newSV() assuming one byte of output for every byte of
2649 * input. This routine expects newSV() to allocate an extra byte for a
2650 * trailing NUL, which this routine will append if it gets to the end of
2651 * the input. There may be more bytes of input than output (eg., \N{LATIN
2652 * CAPITAL LETTER A}), or more output than input if the constant ends up
2653 * recoded to utf8, but each time a construct is found that might increase
2654 * the needed size, SvGROW() is called. Its size parameter each time is
2655 * based on the best guess estimate at the time, namely the length used so
2656 * far, plus the length the current construct will occupy, plus room for
2657 * the trailing NUL, plus one byte for every input byte still unscanned */
2658
012bcf8d 2659 UV uv;
4c3a8340
TS
2660#ifdef EBCDIC
2661 UV literal_endpoint = 0;
e294cc5d 2662 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2663#endif
012bcf8d 2664
7918f24d
NC
2665 PERL_ARGS_ASSERT_SCAN_CONST;
2666
bb16bae8 2667 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2668 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2669 /* If we are doing a trans and we know we want UTF8 set expectation */
2670 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2671 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2672 }
2673
2674
79072805 2675 while (s < send || dorange) {
ff3f963a 2676
02aa26ce 2677 /* get transliterations out of the way (they're most literal) */
3280af22 2678 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2679 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2680 if (dorange) {
1ba5c669
JH
2681 I32 i; /* current expanded character */
2682 I32 min; /* first character in range */
2683 I32 max; /* last character in range */
02aa26ce 2684
e294cc5d
JH
2685#ifdef EBCDIC
2686 UV uvmax = 0;
2687#endif
2688
2689 if (has_utf8
2690#ifdef EBCDIC
2691 && !native_range
2692#endif
2693 ) {
9d4ba2ae 2694 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2695 char *e = d++;
2696 while (e-- > c)
2697 *(e + 1) = *e;
25716404 2698 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2699 /* mark the range as done, and continue */
2700 dorange = FALSE;
2701 didrange = TRUE;
2702 continue;
2703 }
2b9d42f0 2704
95a20fc0 2705 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2706#ifdef EBCDIC
2707 SvGROW(sv,
2708 SvLEN(sv) + (has_utf8 ?
2709 (512 - UTF_CONTINUATION_MARK +
2710 UNISKIP(0x100))
2711 : 256));
2712 /* How many two-byte within 0..255: 128 in UTF-8,
2713 * 96 in UTF-8-mod. */
2714#else
9cbb5ea2 2715 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2716#endif
9cbb5ea2 2717 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2718#ifdef EBCDIC
2719 if (has_utf8) {
2720 int j;
2721 for (j = 0; j <= 1; j++) {
2722 char * const c = (char*)utf8_hop((U8*)d, -1);
2723 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2724 if (j)
2725 min = (U8)uv;
2726 else if (uv < 256)
2727 max = (U8)uv;
2728 else {
2729 max = (U8)0xff; /* only to \xff */
2730 uvmax = uv; /* \x{100} to uvmax */
2731 }
2732 d = c; /* eat endpoint chars */
2733 }
2734 }
2735 else {
2736#endif
2737 d -= 2; /* eat the first char and the - */
2738 min = (U8)*d; /* first char in range */
2739 max = (U8)d[1]; /* last char in range */
2740#ifdef EBCDIC
2741 }
2742#endif
8ada0baa 2743
c2e66d9e 2744 if (min > max) {
01ec43d0 2745 Perl_croak(aTHX_
d1573ac7 2746 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2747 (char)min, (char)max);
c2e66d9e
GS
2748 }
2749
c7f1f016 2750#ifdef EBCDIC
4c3a8340
TS
2751 if (literal_endpoint == 2 &&
2752 ((isLOWER(min) && isLOWER(max)) ||
2753 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2754 if (isLOWER(min)) {
2755 for (i = min; i <= max; i++)
2756 if (isLOWER(i))
db42d148 2757 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2758 } else {
2759 for (i = min; i <= max; i++)
2760 if (isUPPER(i))
db42d148 2761 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2762 }
2763 }
2764 else
2765#endif
2766 for (i = min; i <= max; i++)
e294cc5d
JH
2767#ifdef EBCDIC
2768 if (has_utf8) {
2769 const U8 ch = (U8)NATIVE_TO_UTF(i);
2770 if (UNI_IS_INVARIANT(ch))
2771 *d++ = (U8)i;
2772 else {
2773 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2774 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2775 }
2776 }
2777 else
2778#endif
2779 *d++ = (char)i;
2780
2781#ifdef EBCDIC
2782 if (uvmax) {
2783 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2784 if (uvmax > 0x101)
2785 *d++ = (char)UTF_TO_NATIVE(0xff);
2786 if (uvmax > 0x100)
2787 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2788 }
2789#endif
02aa26ce
NT
2790
2791 /* mark the range as done, and continue */
79072805 2792 dorange = FALSE;
01ec43d0 2793 didrange = TRUE;
4c3a8340
TS
2794#ifdef EBCDIC
2795 literal_endpoint = 0;
2796#endif
79072805 2797 continue;
4e553d73 2798 }
02aa26ce
NT
2799
2800 /* range begins (ignore - as first or last char) */
79072805 2801 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2802 if (didrange) {
1fafa243 2803 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2804 }
e294cc5d
JH
2805 if (has_utf8
2806#ifdef EBCDIC
2807 && !native_range
2808#endif
2809 ) {
25716404 2810 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2811 s++;
2812 continue;
2813 }
79072805
LW
2814 dorange = TRUE;
2815 s++;
01ec43d0
GS
2816 }
2817 else {
2818 didrange = FALSE;
4c3a8340
TS
2819#ifdef EBCDIC
2820 literal_endpoint = 0;
e294cc5d 2821 native_range = TRUE;
4c3a8340 2822#endif
01ec43d0 2823 }
79072805 2824 }
02aa26ce
NT
2825
2826 /* if we get here, we're not doing a transliteration */
2827
0f5d15d6
IZ
2828 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2829 except for the last char, which will be done separately. */
3280af22 2830 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2831 if (s[2] == '#') {
e994fd66 2832 while (s+1 < send && *s != ')')
db42d148 2833 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2834 }
2835 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2836 || (s[2] == '?' && s[3] == '{'))
155aba94 2837 {
cc6b7395 2838 I32 count = 1;
0f5d15d6 2839 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2840 char c;
2841
d9f97599
GS
2842 while (count && (c = *regparse)) {
2843 if (c == '\\' && regparse[1])
2844 regparse++;
4e553d73 2845 else if (c == '{')
cc6b7395 2846 count++;
4e553d73 2847 else if (c == '}')
cc6b7395 2848 count--;
d9f97599 2849 regparse++;
cc6b7395 2850 }
e994fd66 2851 if (*regparse != ')')
5bdf89e7 2852 regparse--; /* Leave one char for continuation. */
0f5d15d6 2853 while (s < regparse)
db42d148 2854 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2855 }
748a9306 2856 }
02aa26ce
NT
2857
2858 /* likewise skip #-initiated comments in //x patterns */
3280af22 2859 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2860 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2861 while (s+1 < send && *s != '\n')
db42d148 2862 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2863 }
02aa26ce 2864
5d1d4326 2865 /* check for embedded arrays
da6eedaa 2866 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2867 */
1749ea0d
TS
2868 else if (*s == '@' && s[1]) {
2869 if (isALNUM_lazy_if(s+1,UTF))
2870 break;
2871 if (strchr(":'{$", s[1]))
2872 break;
2873 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2874 break; /* in regexp, neither @+ nor @- are interpolated */
2875 }
02aa26ce
NT
2876
2877 /* check for embedded scalars. only stop if we're sure it's a
2878 variable.
2879 */
79072805 2880 else if (*s == '$') {
3280af22 2881 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2882 break;
77772344 2883 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2884 if (s[1] == '\\') {
2885 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2886 "Possible unintended interpolation of $\\ in regex");
77772344 2887 }
79072805 2888 break; /* in regexp, $ might be tail anchor */
77772344 2889 }
79072805 2890 }
02aa26ce 2891
2b9d42f0
NIS
2892 /* End of else if chain - OP_TRANS rejoin rest */
2893
02aa26ce 2894 /* backslashes */
79072805 2895 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2896 char* e; /* Can be used for ending '}', etc. */
2897
79072805 2898 s++;
02aa26ce 2899
7d0fc23c
KW
2900 /* warn on \1 - \9 in substitution replacements, but note that \11
2901 * is an octal; and \19 is \1 followed by '9' */
3280af22 2902 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2903 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2904 {
a2a5de95 2905 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2906 *--s = '$';
2907 break;
2908 }
02aa26ce
NT
2909
2910 /* string-change backslash escapes */
3280af22 2911 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2912 --s;
2913 break;
2914 }
ff3f963a
KW
2915 /* In a pattern, process \N, but skip any other backslash escapes.
2916 * This is because we don't want to translate an escape sequence
2917 * into a meta symbol and have the regex compiler use the meta
2918 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2919 * in spite of this, we do have to process \N here while the proper
2920 * charnames handler is in scope. See bugs #56444 and #62056.
2921 * There is a complication because \N in a pattern may also stand
2922 * for 'match a non-nl', and not mean a charname, in which case its
2923 * processing should be deferred to the regex compiler. To be a
2924 * charname it must be followed immediately by a '{', and not look
2925 * like \N followed by a curly quantifier, i.e., not something like
2926 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2927 * quantifier */
2928 else if (PL_lex_inpat
2929 && (*s != 'N'
2930 || s[1] != '{'
2931 || regcurly(s + 1)))
2932 {
cc74c5bd
TS
2933 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2934 goto default_action;
2935 }
02aa26ce 2936
79072805 2937 switch (*s) {
02aa26ce
NT
2938
2939 /* quoted - in transliterations */
79072805 2940 case '-':
3280af22 2941 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2942 *d++ = *s++;
2943 continue;
2944 }
2945 /* FALL THROUGH */
2946 default:
11b8faa4 2947 {
a2a5de95
NC
2948 if ((isALPHA(*s) || isDIGIT(*s)))
2949 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2950 "Unrecognized escape \\%c passed through",
2951 *s);
11b8faa4 2952 /* default action is to copy the quoted character */
f9a63242 2953 goto default_action;
11b8faa4 2954 }
02aa26ce 2955
632403cc 2956 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2957 case '0': case '1': case '2': case '3':
2958 case '4': case '5': case '6': case '7':
ba210ebe 2959 {
53305cf1
NC
2960 I32 flags = 0;
2961 STRLEN len = 3;
77a135fe 2962 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2963 s += len;
2964 }
012bcf8d 2965 goto NUM_ESCAPE_INSERT;
02aa26ce 2966
f0a2b745
KW
2967 /* eg. \o{24} indicates the octal constant \024 */
2968 case 'o':
2969 {
2970 STRLEN len;
454155d9 2971 const char* error;
f0a2b745 2972
454155d9 2973 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2974 s += len;
454155d9 2975 if (! valid) {
f0a2b745
KW
2976 yyerror(error);
2977 continue;
2978 }
2979 goto NUM_ESCAPE_INSERT;
2980 }
2981
77a135fe 2982 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2983 case 'x':
a0ed51b3
LW
2984 ++s;
2985 if (*s == '{') {
9d4ba2ae 2986 char* const e = strchr(s, '}');
a4c04bdc
NC
2987 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2988 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2989 STRLEN len;
355860ce 2990
53305cf1 2991 ++s;
adaeee49 2992 if (!e) {
a0ed51b3 2993 yyerror("Missing right brace on \\x{}");
355860ce 2994 continue;
ba210ebe 2995 }
53305cf1 2996 len = e - s;
77a135fe 2997 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2998 s = e + 1;
a0ed51b3
LW
2999 }
3000 else {
ba210ebe 3001 {
53305cf1 3002 STRLEN len = 2;
a4c04bdc 3003 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 3004 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
3005 s += len;
3006 }
012bcf8d
GS
3007 }
3008
3009 NUM_ESCAPE_INSERT:
ff3f963a
KW
3010 /* Insert oct or hex escaped character. There will always be
3011 * enough room in sv since such escapes will be longer than any
3012 * UTF-8 sequence they can end up as, except if they force us
3013 * to recode the rest of the string into utf8 */
ba7cea30 3014
77a135fe 3015 /* Here uv is the ordinal of the next character being added in
ff3f963a 3016 * unicode (converted from native). */
77a135fe 3017 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3018 if (!has_utf8 && uv > 255) {
77a135fe
KW
3019 /* Might need to recode whatever we have accumulated so
3020 * far if it contains any chars variant in utf8 or
3021 * utf-ebcdic. */
3022
3023 SvCUR_set(sv, d - SvPVX_const(sv));
3024 SvPOK_on(sv);
3025 *d = '\0';
77a135fe 3026 /* See Note on sizing above. */
7bf79863
KW
3027 sv_utf8_upgrade_flags_grow(sv,
3028 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3029 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3030 d = SvPVX(sv) + SvCUR(sv);
3031 has_utf8 = TRUE;
012bcf8d
GS
3032 }
3033
77a135fe
KW
3034 if (has_utf8) {
3035 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3036 if (PL_lex_inwhat == OP_TRANS &&
3037 PL_sublex_info.sub_op) {
3038 PL_sublex_info.sub_op->op_private |=
3039 (PL_lex_repl ? OPpTRANS_FROM_UTF
3040 : OPpTRANS_TO_UTF);
f9a63242 3041 }
e294cc5d
JH
3042#ifdef EBCDIC
3043 if (uv > 255 && !dorange)
3044 native_range = FALSE;
3045#endif
012bcf8d 3046 }
a0ed51b3 3047 else {
012bcf8d 3048 *d++ = (char)uv;
a0ed51b3 3049 }
012bcf8d
GS
3050 }
3051 else {
c4d5f83a 3052 *d++ = (char) uv;
a0ed51b3 3053 }
79072805 3054 continue;
02aa26ce 3055
4a2d328f 3056 case 'N':
ff3f963a
KW
3057 /* In a non-pattern \N must be a named character, like \N{LATIN
3058 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3059 * mean to match a non-newline. For non-patterns, named
3060 * characters are converted to their string equivalents. In
3061 * patterns, named characters are not converted to their
3062 * ultimate forms for the same reasons that other escapes
3063 * aren't. Instead, they are converted to the \N{U+...} form
3064 * to get the value from the charnames that is in effect right
3065 * now, while preserving the fact that it was a named character
3066 * so that the regex compiler knows this */
3067
3068 /* This section of code doesn't generally use the
3069 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3070 * a close examination of this macro and determined it is a
3071 * no-op except on utfebcdic variant characters. Every
3072 * character generated by this that would normally need to be
3073 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3074 * needed, and would complicate use of copy(). XXX There are
3075 * other parts of this file where the macro is used
3076 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3077
3078 /* The structure of this section of code (besides checking for
3079 * errors and upgrading to utf8) is:
3080 * Further disambiguate between the two meanings of \N, and if
3081 * not a charname, go process it elsewhere
0a96133f
KW
3082 * If of form \N{U+...}, pass it through if a pattern;
3083 * otherwise convert to utf8
3084 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3085 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3086
3087 /* Here, s points to the 'N'; the test below is guaranteed to
3088 * succeed if we are being called on a pattern as we already
3089 * know from a test above that the next character is a '{'.
3090 * On a non-pattern \N must mean 'named sequence, which
3091 * requires braces */
3092 s++;
3093 if (*s != '{') {
3094 yyerror("Missing braces on \\N{}");
3095 continue;
3096 }
3097 s++;
3098
0a96133f 3099 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3100 if (! (e = strchr(s, '}'))) {
3101 if (! PL_lex_inpat) {
5777a3f7 3102 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3103 } else {
3104 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3105 }
0a96133f 3106 continue;
ff3f963a 3107 }
cddc7ef4 3108
ff3f963a 3109 /* Here it looks like a named character */
cddc7ef4 3110
ff3f963a
KW
3111 if (PL_lex_inpat) {
3112
3113 /* XXX This block is temporary code. \N{} implies that the
3114 * pattern is to have Unicode semantics, and therefore
3115 * currently has to be encoded in utf8. By putting it in
3116 * utf8 now, we save a whole pass in the regular expression
3117 * compiler. Once that code is changed so Unicode
3118 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3119 * block should be removed. However, the code that parses
3120 * the output of this would have to be changed to not
3121 * necessarily expect utf8 */
ff3f963a 3122 if (!has_utf8) {
77a135fe 3123 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3124 SvPOK_on(sv);
e4f3eed8 3125 *d = '\0';
77a135fe 3126 /* See Note on sizing above. */
7bf79863 3127 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3128 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3129 /* 5 = '\N{' + cur char + NUL */
3130 (STRLEN)(send - s) + 5);
f08d6ad9 3131 d = SvPVX(sv) + SvCUR(sv);
89491803 3132 has_utf8 = TRUE;
ff3f963a
KW
3133 }
3134 }
423cee85 3135
ff3f963a
KW
3136 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3137 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3138 | PERL_SCAN_DISALLOW_PREFIX;
3139 STRLEN len;
3140
3141 /* For \N{U+...}, the '...' is a unicode value even on
3142 * EBCDIC machines */
3143 s += 2; /* Skip to next char after the 'U+' */
3144 len = e - s;
3145 uv = grok_hex(s, &len, &flags, NULL);
3146 if (len == 0 || len != (STRLEN)(e - s)) {
3147 yyerror("Invalid hexadecimal number in \\N{U+...}");
3148 s = e + 1;
3149 continue;
3150 }
3151
3152 if (PL_lex_inpat) {
3153
e2a7e165
KW
3154 /* On non-EBCDIC platforms, pass through to the regex
3155 * compiler unchanged. The reason we evaluated the
3156 * number above is to make sure there wasn't a syntax
3157 * error. But on EBCDIC we convert to native so
3158 * downstream code can continue to assume it's native
3159 */
ff3f963a 3160 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3161#ifdef EBCDIC
3162 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3163 and the \0 */
3164 "\\N{U+%X}",
3165 (unsigned int) UNI_TO_NATIVE(uv));
3166#else
ff3f963a
KW
3167 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3168 d += e - s + 1;
e2a7e165 3169#endif
ff3f963a
KW
3170 }
3171 else { /* Not a pattern: convert the hex to string */
3172
3173 /* If destination is not in utf8, unconditionally
3174 * recode it to be so. This is because \N{} implies
3175 * Unicode semantics, and scalars have to be in utf8
3176 * to guarantee those semantics */
3177 if (! has_utf8) {
3178 SvCUR_set(sv, d - SvPVX_const(sv));
3179 SvPOK_on(sv);
3180 *d = '\0';
3181 /* See Note on sizing above. */
3182 sv_utf8_upgrade_flags_grow(
3183 sv,
3184 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3185 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3186 d = SvPVX(sv) + SvCUR(sv);
3187 has_utf8 = TRUE;
3188 }
3189
3190 /* Add the string to the output */
3191 if (UNI_IS_INVARIANT(uv)) {
3192 *d++ = (char) uv;
3193 }
3194 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3195 }
3196 }
3197 else { /* Here is \N{NAME} but not \N{U+...}. */
3198
3199 SV *res; /* result from charnames */
3200 const char *str; /* the string in 'res' */
3201 STRLEN len; /* its length */
3202
3203 /* Get the value for NAME */
3204 res = newSVpvn(s, e - s);
3205 res = new_constant( NULL, 0, "charnames",
3206 /* includes all of: \N{...} */
3207 res, NULL, s - 3, e - s + 4 );
3208
3209 /* Most likely res will be in utf8 already since the
3210 * standard charnames uses pack U, but a custom translator
3211 * can leave it otherwise, so make sure. XXX This can be
3212 * revisited to not have charnames use utf8 for characters
3213 * that don't need it when regexes don't have to be in utf8
3214 * for Unicode semantics. If doing so, remember EBCDIC */
3215 sv_utf8_upgrade(res);
3216 str = SvPV_const(res, len);
3217
3218 /* Don't accept malformed input */
3219 if (! is_utf8_string((U8 *) str, len)) {
3220 yyerror("Malformed UTF-8 returned by \\N");
3221 }
3222 else if (PL_lex_inpat) {
3223
3224 if (! len) { /* The name resolved to an empty string */
3225 Copy("\\N{}", d, 4, char);
3226 d += 4;
3227 }
3228 else {
3229 /* In order to not lose information for the regex
3230 * compiler, pass the result in the specially made
3231 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3232 * the code points in hex of each character
3233 * returned by charnames */
3234
3235 const char *str_end = str + len;
3236 STRLEN char_length; /* cur char's byte length */
3237 STRLEN output_length; /* and the number of bytes
3238 after this is translated
3239 into hex digits */
3240 const STRLEN off = d - SvPVX_const(sv);
3241
3242 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3243 * max('U+', '.'); and 1 for NUL */
3244 char hex_string[2 * UTF8_MAXBYTES + 5];
3245
3246 /* Get the first character of the result. */
3247 U32 uv = utf8n_to_uvuni((U8 *) str,
3248 len,
3249 &char_length,
3250 UTF8_ALLOW_ANYUV);
3251
3252 /* The call to is_utf8_string() above hopefully
3253 * guarantees that there won't be an error. But
3254 * it's easy here to make sure. The function just
3255 * above warns and returns 0 if invalid utf8, but
3256 * it can also return 0 if the input is validly a
3257 * NUL. Disambiguate */
3258 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3259 uv = UNICODE_REPLACEMENT;
3260 }
3261
3262 /* Convert first code point to hex, including the
e2a7e165
KW
3263 * boiler plate before it. For all these, we
3264 * convert to native format so that downstream code
3265 * can continue to assume the input is native */
78c35590 3266 output_length =
3353de27 3267 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3268 "\\N{U+%X",
3269 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3270
3271 /* Make sure there is enough space to hold it */
3272 d = off + SvGROW(sv, off
3273 + output_length
3274 + (STRLEN)(send - e)
3275 + 2); /* '}' + NUL */
3276 /* And output it */
3277 Copy(hex_string, d, output_length, char);
3278 d += output_length;
3279
3280 /* For each subsequent character, append dot and
3281 * its ordinal in hex */
3282 while ((str += char_length) < str_end) {
3283 const STRLEN off = d - SvPVX_const(sv);
3284 U32 uv = utf8n_to_uvuni((U8 *) str,
3285 str_end - str,
3286 &char_length,
3287 UTF8_ALLOW_ANYUV);
3288 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3289 uv = UNICODE_REPLACEMENT;
3290 }
3291
78c35590 3292 output_length =
3353de27 3293 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3294 ".%X",
3295 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3296
3297 d = off + SvGROW(sv, off
3298 + output_length
3299 + (STRLEN)(send - e)
3300 + 2); /* '}' + NUL */
3301 Copy(hex_string, d, output_length, char);
3302 d += output_length;
3303 }
3304
3305 *d++ = '}'; /* Done. Add the trailing brace */
3306 }
3307 }
3308 else { /* Here, not in a pattern. Convert the name to a
3309 * string. */
3310
3311 /* If destination is not in utf8, unconditionally
3312 * recode it to be so. This is because \N{} implies
3313 * Unicode semantics, and scalars have to be in utf8
3314 * to guarantee those semantics */
3315 if (! has_utf8) {
3316 SvCUR_set(sv, d - SvPVX_const(sv));
3317 SvPOK_on(sv);
3318 *d = '\0';
3319 /* See Note on sizing above. */
3320 sv_utf8_upgrade_flags_grow(sv,
3321 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3322 len + (STRLEN)(send - s) + 1);
3323 d = SvPVX(sv) + SvCUR(sv);
3324 has_utf8 = TRUE;
3325 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3326
3327 /* See Note on sizing above. (NOTE: SvCUR() is not
3328 * set correctly here). */
3329 const STRLEN off = d - SvPVX_const(sv);
3330 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3331 }
3332 Copy(str, d, len, char);
3333 d += len;
423cee85 3334 }
423cee85 3335 SvREFCNT_dec(res);
cb233ae3
KW
3336
3337 /* Deprecate non-approved name syntax */
3338 if (ckWARN_d(WARN_DEPRECATED)) {
3339 bool problematic = FALSE;
3340 char* i = s;
3341
3342 /* For non-ut8 input, look to see that the first
3343 * character is an alpha, then loop through the rest
3344 * checking that each is a continuation */
3345 if (! this_utf8) {
3346 if (! isALPHAU(*i)) problematic = TRUE;
3347 else for (i = s + 1; i < e; i++) {
3348 if (isCHARNAME_CONT(*i)) continue;
3349 problematic = TRUE;
3350 break;
3351 }
3352 }
3353 else {
3354 /* Similarly for utf8. For invariants can check
3355 * directly. We accept anything above the latin1
3356 * range because it is immaterial to Perl if it is
3357 * correct or not, and is expensive to check. But
3358 * it is fairly easy in the latin1 range to convert
3359 * the variants into a single character and check
3360 * those */
3361 if (UTF8_IS_INVARIANT(*i)) {
3362 if (! isALPHAU(*i)) problematic = TRUE;
3363 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3364 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3365 *(i+1)))))
3366 {
3367 problematic = TRUE;
3368 }
3369 }
3370 if (! problematic) for (i = s + UTF8SKIP(s);
3371 i < e;
3372 i+= UTF8SKIP(i))
3373 {
3374 if (UTF8_IS_INVARIANT(*i)) {
3375 if (isCHARNAME_CONT(*i)) continue;
3376 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3377 continue;
3378 } else if (isCHARNAME_CONT(
3379 UNI_TO_NATIVE(
81c14aa2 3380 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3381 {
3382 continue;
3383 }
3384 problematic = TRUE;
3385 break;
3386 }
3387 }
3388 if (problematic) {
6e1bad6c
KW
3389 /* The e-i passed to the final %.*s makes sure that
3390 * should the trailing NUL be missing that this
3391 * print won't run off the end of the string */
cb233ae3 3392 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3393 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3394 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3395 }
3396 }
3397 } /* End \N{NAME} */
ff3f963a
KW
3398#ifdef EBCDIC
3399 if (!dorange)
3400 native_range = FALSE; /* \N{} is defined to be Unicode */
3401#endif
3402 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3403 continue;
3404
02aa26ce 3405 /* \c is a control character */
79072805
LW
3406 case 'c':
3407 s++;
961ce445 3408 if (s < send) {
17a3df4c 3409 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3410 }
961ce445
RGS
3411 else {
3412 yyerror("Missing control char name in \\c");
3413 }
79072805 3414 continue;
02aa26ce
NT
3415
3416 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3417 case 'b':
db42d148 3418 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3419 break;
3420 case 'n':
db42d148 3421 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3422 break;
3423 case 'r':
db42d148 3424 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3425 break;
3426 case 'f':
db42d148 3427 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3428 break;
3429 case 't':
db42d148 3430 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3431 break;
34a3fe2a 3432 case 'e':
db42d148 3433 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3434 break;
3435 case 'a':
db42d148 3436 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3437 break;
02aa26ce
NT
3438 } /* end switch */
3439
79072805
LW
3440 s++;
3441 continue;
02aa26ce 3442 } /* end if (backslash) */
4c3a8340
TS
3443#ifdef EBCDIC
3444 else
3445 literal_endpoint++;
3446#endif
02aa26ce 3447
f9a63242 3448 default_action:
77a135fe
KW
3449 /* If we started with encoded form, or already know we want it,
3450 then encode the next character */
3451 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3452 STRLEN len = 1;
77a135fe
KW
3453
3454
3455 /* One might think that it is wasted effort in the case of the
3456 * source being utf8 (this_utf8 == TRUE) to take the next character
3457 * in the source, convert it to an unsigned value, and then convert
3458 * it back again. But the source has not been validated here. The
3459 * routine that does the conversion checks for errors like
3460 * malformed utf8 */
3461
5f66b61c
AL
3462 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3463 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3464 if (!has_utf8) {
3465 SvCUR_set(sv, d - SvPVX_const(sv));
3466 SvPOK_on(sv);
3467 *d = '\0';
77a135fe 3468 /* See Note on sizing above. */
7bf79863
KW
3469 sv_utf8_upgrade_flags_grow(sv,
3470 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3471 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3472 d = SvPVX(sv) + SvCUR(sv);
3473 has_utf8 = TRUE;
3474 } else if (need > len) {
3475 /* encoded value larger than old, may need extra space (NOTE:
3476 * SvCUR() is not set correctly here). See Note on sizing
3477 * above. */
9d4ba2ae 3478 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3479 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3480 }
77a135fe
KW
3481 s += len;
3482
5f66b61c 3483 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3484#ifdef EBCDIC
3485 if (uv > 255 && !dorange)
3486 native_range = FALSE;
3487#endif
2b9d42f0
NIS
3488 }
3489 else {
3490 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3491 }
02aa26ce
NT
3492 } /* while loop to process each character */
3493
3494 /* terminate the string and set up the sv */
79072805 3495 *d = '\0';
95a20fc0 3496 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3497 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3498 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3499
79072805 3500 SvPOK_on(sv);
9f4817db 3501 if (PL_encoding && !has_utf8) {
d0063567
DK
3502 sv_recode_to_utf8(sv, PL_encoding);
3503 if (SvUTF8(sv))
3504 has_utf8 = TRUE;
9f4817db 3505 }
2b9d42f0 3506 if (has_utf8) {
7e2040f0 3507 SvUTF8_on(sv);
2b9d42f0 3508 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3509 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3510 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3511 }
3512 }
79072805 3513
02aa26ce 3514 /* shrink the sv if we allocated more than we used */
79072805 3515 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3516 SvPV_shrink_to_cur(sv);
79072805 3517 }
02aa26ce 3518
6154021b 3519 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3520 if (s > PL_bufptr) {
eb0d8d16
NC
3521 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3522 const char *const key = PL_lex_inpat ? "qr" : "q";
3523 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3524 const char *type;
3525 STRLEN typelen;
3526
3527 if (PL_lex_inwhat == OP_TRANS) {
3528 type = "tr";
3529 typelen = 2;
3530 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3531 type = "s";
3532 typelen = 1;
3533 } else {
3534 type = "qq";
3535 typelen = 2;
3536 }
3537
3538 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3539 type, typelen);
3540 }
6154021b 3541 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3542 } else
8990e307 3543 SvREFCNT_dec(sv);
79072805
LW
3544 return s;
3545}
3546
ffb4593c
NT
3547/* S_intuit_more
3548 * Returns TRUE if there's more to the expression (e.g., a subscript),
3549 * FALSE otherwise.
ffb4593c
NT
3550 *
3551 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3552 *
3553 * ->[ and ->{ return TRUE
3554 * { and [ outside a pattern are always subscripts, so return TRUE
3555 * if we're outside a pattern and it's not { or [, then return FALSE
3556 * if we're in a pattern and the first char is a {
3557 * {4,5} (any digits around the comma) returns FALSE
3558 * if we're in a pattern and the first char is a [
3559 * [] returns FALSE
3560 * [SOMETHING] has a funky algorithm to decide whether it's a
3561 * character class or not. It has to deal with things like
3562 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3563 * anything else returns TRUE
3564 */
3565
9cbb5ea2
GS
3566/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3567
76e3520e 3568STATIC int
cea2e8a9 3569S_intuit_more(pTHX_ register char *s)
79072805 3570{
97aff369 3571 dVAR;
7918f24d
NC
3572
3573 PERL_ARGS_ASSERT_INTUIT_MORE;
3574
3280af22 3575 if (PL_lex_brackets)
79072805
LW
3576 return TRUE;
3577 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3578 return TRUE;
3579 if (*s != '{' && *s != '[')
3580 return FALSE;
3280af22 3581 if (!PL_lex_inpat)
79072805
LW
3582 return TRUE;
3583
3584 /* In a pattern, so maybe we have {n,m}. */
3585 if (*s == '{') {
b3155d95 3586 if (regcurly(s)) {
79072805 3587 return FALSE;
b3155d95 3588 }
79072805 3589 return TRUE;
79072805
LW
3590 }
3591
3592 /* On the other hand, maybe we have a character class */
3593
3594 s++;
3595 if (*s == ']' || *s == '^')
3596 return FALSE;
3597 else {
ffb4593c 3598 /* this is terrifying, and it works */
79072805
LW
3599 int weight = 2; /* let's weigh the evidence */
3600 char seen[256];
f27ffc4a 3601 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3602 const char * const send = strchr(s,']');
3280af22 3603 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3604
3605 if (!send) /* has to be an expression */
3606 return TRUE;
3607
3608 Zero(seen,256,char);
3609 if (*s == '$')
3610 weight -= 3;
3611 else if (isDIGIT(*s)) {
3612 if (s[1] != ']') {
3613 if (isDIGIT(s[1]) && s[2] == ']')
3614 weight -= 10;
3615 }
3616 else
3617 weight -= 100;
3618 }
3619 for (; s < send; s++) {
3620 last_un_char = un_char;
3621 un_char = (unsigned char)*s;
3622 switch (*s) {
3623 case '@':
3624 case '&':
3625 case '$':
3626 weight -= seen[un_char] * 10;
7e2040f0 3627 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3628 int len;
8903cb82 3629 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3630 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3631 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3632 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3633 weight -= 100;
3634 else
3635 weight -= 10;
3636 }
3637 else if (*s == '$' && s[1] &&
93a17b20
LW
3638 strchr("[#!%*<>()-=",s[1])) {
3639 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3640 weight -= 10;
3641 else
3642 weight -= 1;
3643 }
3644 break;
3645 case '\\':
3646 un_char = 254;
3647 if (s[1]) {
93a17b20 3648 if (strchr("wds]",s[1]))
79072805 3649 weight += 100;
10edeb5d 3650 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3651 weight += 1;
93a17b20 3652 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3653 weight += 40;
3654 else if (isDIGIT(s[1])) {
3655 weight += 40;
3656 while (s[1] && isDIGIT(s[1]))
3657 s++;
3658 }
3659 }
3660 else
3661 weight += 100;
3662 break;
3663 case '-':
3664 if (s[1] == '\\')
3665 weight += 50;
93a17b20 3666 if (strchr("aA01! ",last_un_char))
79072805 3667 weight += 30;
93a17b20 3668 if (strchr("zZ79~",s[1]))
79072805 3669 weight += 30;
f27ffc4a
GS
3670 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3671 weight -= 5; /* cope with negative subscript */
79072805
LW
3672 break;
3673 default:
3792a11b
NC
3674 if (!isALNUM(last_un_char)
3675 && !(last_un_char == '$' || last_un_char == '@'
3676 || last_un_char == '&')
3677 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3678 char *d = tmpbuf;
3679 while (isALPHA(*s))
3680 *d++ = *s++;
3681 *d = '\0';
5458a98a 3682 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3683 weight -= 150;
3684 }
3685 if (un_char == last_un_char + 1)
3686 weight += 5;
3687 weight -= seen[un_char];
3688 break;
3689 }
3690 seen[un_char]++;
3691 }
3692 if (weight >= 0) /* probably a character class */
3693 return FALSE;
3694 }
3695
3696 return TRUE;
3697}
ffed7fef 3698
ffb4593c
NT
3699/*
3700 * S_intuit_method
3701 *
3702 * Does all the checking to disambiguate
3703 * foo bar
3704 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3705 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3706 *
3707 * First argument is the stuff after the first token, e.g. "bar".
3708 *
3709 * Not a method if bar is a filehandle.
3710 * Not a method if foo is a subroutine prototyped to take a filehandle.
3711 * Not a method if it's really "Foo $bar"
3712 * Method if it's "foo $bar"
3713 * Not a method if it's really "print foo $bar"
3714 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3715 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3716 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3717 * =>
3718 */
3719
76e3520e 3720STATIC int
62d55b22 3721S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3722{
97aff369 3723 dVAR;
a0d0e21e 3724 char *s = start + (*start == '$');
3280af22 3725 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3726 STRLEN len;
3727 GV* indirgv;
5db06880
NC
3728#ifdef PERL_MAD
3729 int soff;
3730#endif
a0d0e21e 3731
7918f24d
NC
3732 PERL_ARGS_ASSERT_INTUIT_METHOD;
3733
a0d0e21e 3734 if (gv) {
62d55b22 3735 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3736 return 0;
62d55b22
NC
3737 if (cv) {
3738 if (SvPOK(cv)) {
8fa6a409 3739 const char *proto = CvPROTO(cv);
62d55b22
NC
3740 if (proto) {
3741 if (*proto == ';')
3742 proto++;
3743 if (*proto == '*')
3744 return 0;
3745 }
b6c543e3
IZ
3746 }
3747 } else
c35e046a 3748 gv = NULL;
a0d0e21e 3749 }
8903cb82 3750 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3751 /* start is the beginning of the possible filehandle/object,
3752 * and s is the end of it
3753 * tmpbuf is a copy of it
3754 */
3755
a0d0e21e 3756 if (*start == '$') {
3ef1310e
RGS
3757 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3758 isUPPER(*PL_tokenbuf))
a0d0e21e 3759 return 0;
5db06880
NC
3760#ifdef PERL_MAD
3761 len = start - SvPVX(PL_linestr);
3762#endif
29595ff2 3763 s = PEEKSPACE(s);
f0092767 3764#ifdef PERL_MAD
5db06880
NC
3765 start = SvPVX(PL_linestr) + len;
3766#endif
3280af22
NIS
3767 PL_bufptr = start;
3768 PL_expect = XREF;
a0d0e21e
LW
3769 return *s == '(' ? FUNCMETH : METHOD;
3770 }
5458a98a 3771 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3772 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3773 len -= 2;
3774 tmpbuf[len] = '\0';
5db06880
NC
3775#ifdef PERL_MAD
3776 soff = s - SvPVX(PL_linestr);
3777#endif
c3e0f903
GS
3778 goto bare_package;
3779 }
38d2cf30 3780 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 3781 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3782 return 0;
3783 /* filehandle or package name makes it a method */
38d2cf30 3784 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
3785#ifdef PERL_MAD
3786 soff = s - SvPVX(PL_linestr);
3787#endif
29595ff2 3788 s = PEEKSPACE(s);
3280af22 3789 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3790 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3791 bare_package:
cd81e915 3792 start_force(PL_curforce);
9ded7720 3793 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3794 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3795 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 3796 if (PL_madskills)
38d2cf30
BF
3797 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3798 ( UTF ? SVf_UTF8 : 0 )));
3280af22 3799 PL_expect = XTERM;
a0d0e21e 3800 force_next(WORD);
3280af22 3801 PL_bufptr = s;
5db06880
NC
3802#ifdef PERL_MAD
3803 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3804#endif
a0d0e21e
LW
3805 return *s == '(' ? FUNCMETH : METHOD;
3806 }
3807 }
3808 return 0;
3809}
3810
16d20bd9 3811/* Encoded script support. filter_add() effectively inserts a
4e553d73 3812 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3813 * Note that the filter function only applies to the current source file
3814 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3815 *
3816 * The datasv parameter (which may be NULL) can be used to pass
3817 * private data to this instance of the filter. The filter function
3818 * can recover the SV using the FILTER_DATA macro and use it to
3819 * store private buffers and state information.
3820 *
3821 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3822 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3823 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3824 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3825 * private use must be set using malloc'd pointers.
3826 */
16d20bd9
AD
3827
3828SV *
864dbfa3 3829Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3830{
97aff369 3831 dVAR;
f4c556ac 3832 if (!funcp)
a0714e2c 3833 return NULL;
f4c556ac 3834
5486870f
DM
3835 if (!PL_parser)
3836 return NULL;
3837
f1c31c52
FC
3838 if (PL_parser->lex_flags & LEX_IGNORE_UTF8_HINTS)
3839 Perl_croak(aTHX_ "Source filters apply only to byte streams");
3840
3280af22
NIS
3841 if (!PL_rsfp_filters)
3842 PL_rsfp_filters = newAV();
16d20bd9 3843 if (!datasv)
561b68a9 3844 datasv = newSV(0);
862a34c6 3845 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3846 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3847 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3848 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3849 FPTR2DPTR(void *, IoANY(datasv)),
3850 SvPV_nolen(datasv)));
3280af22
NIS
3851 av_unshift(PL_rsfp_filters, 1);
3852 av_store(PL_rsfp_filters, 0, datasv) ;
60d63348
FC
3853 if (
3854 !PL_parser->filtered
3855 && PL_parser->lex_flags & LEX_EVALBYTES
3856 && PL_bufptr < PL_bufend
3857 ) {
3858 const char *s = PL_bufptr;
3859 while (s < PL_bufend) {
3860 if (*s == '\n') {
3861 SV *linestr = PL_parser->linestr;
3862 char *buf = SvPVX(linestr);
3863 STRLEN const bufptr_pos = PL_parser->bufptr - buf;
3864 STRLEN const oldbufptr_pos = PL_parser->oldbufptr - buf;
3865 STRLEN const oldoldbufptr_pos=PL_parser->oldoldbufptr-buf;
3866 STRLEN const linestart_pos = PL_parser->linestart - buf;
3867 STRLEN const last_uni_pos =
3868 PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
3869 STRLEN const last_lop_pos =
3870 PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
3871 av_push(PL_rsfp_filters, linestr);
3872 PL_parser->linestr =
3873 newSVpvn(SvPVX(linestr), ++s-SvPVX(linestr));
3874 buf = SvPVX(PL_parser->linestr);
3875 PL_parser->bufend = buf + SvCUR(PL_parser->linestr);
3876 PL_parser->bufptr = buf + bufptr_pos;
3877 PL_parser->oldbufptr = buf + oldbufptr_pos;
3878 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
3879 PL_parser->linestart = buf + linestart_pos;
3880 if (PL_parser->last_uni)
3881 PL_parser->last_uni = buf + last_uni_pos;
3882 if (PL_parser->last_lop)
3883 PL_parser->last_lop = buf + last_lop_pos;
3884 SvLEN(linestr) = SvCUR(linestr);
3885 SvCUR(linestr) = s-SvPVX(linestr);
3886 PL_parser->filtered = 1;
3887 break;
3888 }
3889 s++;
3890 }
3891 }
16d20bd9
AD
3892 return(datasv);
3893}
4e553d73 3894
16d20bd9
AD
3895
3896/* Delete most recently added instance of this filter function. */
a0d0e21e 3897void
864dbfa3 3898Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3899{
97aff369 3900 dVAR;
e0c19803 3901 SV *datasv;
24801a4b 3902
7918f24d
NC
3903 PERL_ARGS_ASSERT_FILTER_DEL;
3904
33073adb 3905#ifdef DEBUGGING
55662e27
JH
3906 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3907 FPTR2DPTR(void*, funcp)));
33073adb 3908#endif
5486870f 3909 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3910 return;
3911 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3912 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3913 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3914 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3915
16d20bd9
AD
3916 return;
3917 }
3918 /* we need to search for the correct entry and clear it */
cea2e8a9 3919 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3920}
3921
3922
1de9afcd
RGS
3923/* Invoke the idxth filter function for the current rsfp. */
3924/* maxlen 0 = read one text line */
16d20bd9 3925I32
864dbfa3 3926Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3927{
97aff369 3928 dVAR;
16d20bd9
AD
3929 filter_t funcp;
3930 SV *datasv = NULL;
f482118e
NC
3931 /* This API is bad. It should have been using unsigned int for maxlen.
3932 Not sure if we want to change the API, but if not we should sanity
3933 check the value here. */
60d63348 3934 unsigned int correct_length
39cd7a59
NC
3935 = maxlen < 0 ?
3936#ifdef PERL_MICRO
3937 0x7FFFFFFF
3938#else
3939 INT_MAX
3940#endif
3941 : maxlen;
e50aee73 3942
7918f24d
NC
3943 PERL_ARGS_ASSERT_FILTER_READ;
3944
5486870f 3945 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3946 return -1;
1de9afcd 3947 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3948 /* Provide a default input filter to make life easy. */
3949 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3950 DEBUG_P(PerlIO_printf(Perl_debug_log,
3951 "filter_read %d: from rsfp\n", idx));
f482118e 3952 if (correct_length) {
16d20bd9
AD
3953 /* Want a block */
3954 int len ;
f54cb97a 3955 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3956
3957 /* ensure buf_sv is large enough */
881d8f0a 3958 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3959 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3960 correct_length)) <= 0) {
3280af22 3961 if (PerlIO_error(PL_rsfp))
37120919
AD
3962 return -1; /* error */
3963 else
3964 return 0 ; /* end of file */
3965 }
16d20bd9 3966 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3967 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3968 } else {
3969 /* Want a line */
3280af22
NIS
3970 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3971 if (PerlIO_error(PL_rsfp))
37120919
AD
3972 return -1; /* error */
3973 else
3974 return 0 ; /* end of file */
3975 }
16d20bd9
AD
3976 }
3977 return SvCUR(buf_sv);
3978 }
3979 /* Skip this filter slot if filter has been deleted */
1de9afcd 3980 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3981 DEBUG_P(PerlIO_printf(Perl_debug_log,
3982 "filter_read %d: skipped (filter deleted)\n",
3983 idx));
f482118e 3984 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9 3985 }
60d63348
FC
3986 if (SvTYPE(datasv) != SVt_PVIO) {
3987 if (correct_length) {
3988 /* Want a block */
3989 const STRLEN remainder = SvLEN(datasv) - SvCUR(datasv);
3990 if (!remainder) return 0; /* eof */
3991 if (correct_length > remainder) correct_length = remainder;
3992 sv_catpvn(buf_sv, SvEND(datasv), correct_length);
3993 SvCUR_set(datasv, SvCUR(datasv) + correct_length);
3994 } else {
3995 /* Want a line */
3996 const char *s = SvEND(datasv);
3997 const char *send = SvPVX(datasv) + SvLEN(datasv);
3998 while (s < send) {
3999 if (*s == '\n') {
4000 s++;
4001 break;
4002 }
4003 s++;
4004 }
4005 if (s == send) return 0; /* eof */
4006 sv_catpvn(buf_sv, SvEND(datasv), s-SvEND(datasv));
4007 SvCUR_set(datasv, s-SvPVX(datasv));
4008 }
4009 return SvCUR(buf_sv);
4010 }
16d20bd9 4011 /* Get function pointer hidden within datasv */
8141890a 4012 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
4013 DEBUG_P(PerlIO_printf(Perl_debug_log,
4014 "filter_read %d: via function %p (%s)\n",
ca0270c4 4015 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
4016 /* Call function. The function is expected to */
4017 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 4018 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 4019 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
4020}
4021
76e3520e 4022STATIC char *
5cc814fd 4023S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 4024{
97aff369 4025 dVAR;
7918f24d
NC
4026
4027 PERL_ARGS_ASSERT_FILTER_GETS;
4028
c39cd008 4029#ifdef PERL_CR_FILTER
3280af22 4030 if (!PL_rsfp_filters) {
c39cd008 4031 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
4032 }
4033#endif
3280af22 4034 if (PL_rsfp_filters) {
55497cff 4035 if (!append)
4036 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
4037 if (FILTER_READ(0, sv, 0) > 0)
4038 return ( SvPVX(sv) ) ;
4039 else
bd61b366 4040 return NULL ;
16d20bd9 4041 }
9d116dd7 4042 else
5cc814fd 4043 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
4044}
4045
01ec43d0 4046STATIC HV *
9bde8eb0 4047S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 4048{
97aff369 4049 dVAR;
def3634b
GS
4050 GV *gv;
4051
7918f24d
NC
4052 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
4053
01ec43d0 4054 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
4055 return PL_curstash;
4056
4057 if (len > 2 &&
4058 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 4059 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
4060 {
4061 return GvHV(gv); /* Foo:: */
def3634b
GS
4062 }
4063
4064 /* use constant CLASS => 'MyClass' */
acc6da14 4065 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4066 if (gv && GvCV(gv)) {
4067 SV * const sv = cv_const_sv(GvCV(gv));
4068 if (sv)
9bde8eb0 4069 pkgname = SvPV_const(sv, len);
def3634b
GS
4070 }
4071
acc6da14 4072 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4073}
a0d0e21e 4074
e3f73d4e
RGS
4075/*
4076 * S_readpipe_override
486ec47a 4077 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4078 * optree, provided sublex_start() is called afterwards.
4079 */
4080STATIC void
1d51329b 4081S_readpipe_override(pTHX)
e3f73d4e
RGS
4082{
4083 GV **gvp;
4084 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4085 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4086 if ((gv_readpipe
4087 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4088 ||
4089 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4090 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4091 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4092 {
4093 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4094 op_append_elem(OP_LIST,
e3f73d4e
RGS
4095 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4096 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4097 }
e3f73d4e
RGS
4098}
4099
5db06880
NC
4100#ifdef PERL_MAD
4101 /*
4102 * Perl_madlex
4103 * The intent of this yylex wrapper is to minimize the changes to the
4104 * tokener when we aren't interested in collecting madprops. It remains
4105 * to be seen how successful this strategy will be...
4106 */
4107
4108int
4109Perl_madlex(pTHX)
4110{
4111 int optype;
4112 char *s = PL_bufptr;
4113
cd81e915
NC
4114 /* make sure PL_thiswhite is initialized */
4115 PL_thiswhite = 0;
4116 PL_thismad = 0;
5db06880 4117
cd81e915 4118 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4119 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4120 return S_pending_ident(aTHX);
4121
4122 /* previous token ate up our whitespace? */
cd81e915
NC
4123 if (!PL_lasttoke && PL_nextwhite) {
4124 PL_thiswhite = PL_nextwhite;
4125 PL_nextwhite = 0;
5db06880
NC
4126 }
4127
4128 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4129 PL_realtokenstart = -1;
4130 PL_thistoken = 0;
5db06880
NC
4131 optype = yylex();
4132 s = PL_bufptr;
cd81e915 4133 assert(PL_curforce < 0);
5db06880 4134
cd81e915
NC
4135 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4136 if (!PL_thistoken) {
4137 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4138 PL_thistoken = newSVpvs("");
5db06880 4139 else {
c35e046a 4140 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4141 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4142 }
4143 }
cd81e915
NC
4144 if (PL_thismad) /* install head */
4145 CURMAD('X', PL_thistoken);
5db06880
NC
4146 }
4147
4148 /* last whitespace of a sublex? */
cd81e915
NC
4149 if (optype == ')' && PL_endwhite) {
4150 CURMAD('X', PL_endwhite);
5db06880
NC
4151 }
4152
cd81e915 4153 if (!PL_thismad) {
5db06880
NC
4154
4155 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4156 if (!PL_thiswhite && !PL_endwhite && !optype) {
4157 sv_free(PL_thistoken);
4158 PL_thistoken = 0;
5db06880
NC
4159 return 0;
4160 }
4161
4162 /* put off final whitespace till peg */
60d63348 4163 if (optype == ';' && !PL_rsfp && !PL_parser->filtered) {
cd81e915
NC
4164 PL_nextwhite = PL_thiswhite;
4165 PL_thiswhite = 0;
5db06880 4166 }
cd81e915
NC
4167 else if (PL_thisopen) {
4168 CURMAD('q', PL_thisopen);
4169 if (PL_thistoken)
4170 sv_free(PL_thistoken);
4171 PL_thistoken = 0;
5db06880
NC
4172 }
4173 else {
4174 /* Store actual token text as madprop X */
cd81e915 4175 CURMAD('X', PL_thistoken);
5db06880
NC
4176 }
4177
cd81e915 4178 if (PL_thiswhite) {
5db06880 4179 /* add preceding whitespace as madprop _ */
cd81e915 4180 CURMAD('_', PL_thiswhite);
5db06880
NC
4181 }
4182
cd81e915 4183 if (PL_thisstuff) {
5db06880 4184 /* add quoted material as madprop = */
cd81e915 4185 CURMAD('=', PL_thisstuff);
5db06880
NC
4186 }
4187
cd81e915 4188 if (PL_thisclose) {
5db06880 4189 /* add terminating quote as madprop Q */
cd81e915 4190 CURMAD('Q', PL_thisclose);
5db06880
NC
4191 }
4192 }
4193
4194 /* special processing based on optype */
4195
4196 switch (optype) {
4197
4198 /* opval doesn't need a TOKEN since it can already store mp */
4199 case WORD:
4200 case METHOD:
4201 case FUNCMETH:
4202 case THING:
4203 case PMFUNC:
4204 case PRIVATEREF:
4205 case FUNC0SUB:
4206 case UNIOPSUB:
4207 case LSTOPSUB:
6154021b
RGS
4208 if (pl_yylval.opval)
4209 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4210 PL_thismad = 0;
5db06880
NC
4211 return optype;
4212
4213 /* fake EOF */
4214 case 0:
4215 optype = PEG;
cd81e915
NC
4216 if (PL_endwhite) {
4217 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4218 PL_endwhite = 0;
5db06880
NC
4219 }
4220 break;
4221
4222 case ']':
4223 case '}':
cd81e915 4224 if (PL_faketokens)
5db06880
NC
4225 break;
4226 /* remember any fake bracket that lexer is about to discard */
4227 if (PL_lex_brackets == 1 &&
4228 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4229 {
4230 s = PL_bufptr;
4231 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4232 s++;
4233 if (*s == '}') {
cd81e915
NC
4234 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4235 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4236 PL_thiswhite = 0;
5db06880
NC
4237 PL_bufptr = s - 1;
4238 break; /* don't bother looking for trailing comment */
4239 }
4240 else
4241 s = PL_bufptr;
4242 }
4243 if (optype == ']')
4244 break;
4245 /* FALLTHROUGH */
4246
4247 /* attach a trailing comment to its statement instead of next token */
4248 case ';':
cd81e915 4249 if (PL_faketokens)
5db06880
NC
4250 break;
4251 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4252 s = PL_bufptr;
4253 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4254 s++;
4255 if (*s == '\n' || *s == '#') {
4256 while (s < PL_bufend && *s != '\n')
4257 s++;
4258 if (s < PL_bufend)
4259 s++;
cd81e915
NC
4260 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4261 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4262 PL_thiswhite = 0;
5db06880
NC
4263 PL_bufptr = s;
4264 }
4265 }
4266 break;
4267
4268 /* pval */
4269 case LABEL:
4270 break;
4271
4272 /* ival */
4273 default:
4274 break;
4275
4276 }
4277
4278 /* Create new token struct. Note: opvals return early above. */
6154021b 4279 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4280 PL_thismad = 0;
5db06880
NC
4281 return optype;
4282}
4283#endif
4284
468aa647 4285STATIC char *
cc6ed77d 4286S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4287 dVAR;
7918f24d
NC
4288
4289 PERL_ARGS_ASSERT_TOKENIZE_USE;
4290
468aa647
RGS
4291 if (PL_expect != XSTATE)
4292 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4293 is_use ? "use" : "no"));
29595ff2 4294 s = SKIPSPACE1(s);
468aa647
RGS
4295 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4296 s = force_version(s, TRUE);
17c59fdf
VP
4297 if (*s == ';' || *s == '}'
4298 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4299 start_force(PL_curforce);
9ded7720 4300 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4301 force_next(WORD);
4302 }
4303 else if (*s == 'v') {
4304 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4305 s = force_version(s, FALSE);
4306 }
4307 }
4308 else {
4309 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4310 s = force_version(s, FALSE);
4311 }
6154021b 4312 pl_yylval.ival = is_use;
468aa647
RGS
4313 return s;
4314}
748a9306 4315#ifdef DEBUGGING
27da23d5 4316 static const char* const exp_name[] =
09bef843 4317 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4318 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4319 };
748a9306 4320#endif
463ee0b2 4321
361d9b55
Z
4322#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4323STATIC bool
4324S_word_takes_any_delimeter(char *p, STRLEN len)
4325{
4326 return (len == 1 && strchr("msyq", p[0])) ||
4327 (len == 2 && (
4328 (p[0] == 't' && p[1] == 'r') ||
4329 (p[0] == 'q' && strchr("qwxr", p[1]))));
4330}
4331
02aa26ce
NT
4332/*
4333 yylex
4334
4335 Works out what to call the token just pulled out of the input
4336 stream. The yacc parser takes care of taking the ops we return and
4337 stitching them into a tree.
4338
4339 Returns:
4340 PRIVATEREF
4341
4342 Structure:
4343 if read an identifier
4344 if we're in a my declaration
4345 croak if they tried to say my($foo::bar)
4346 build the ops for a my() declaration
4347 if it's an access to a my() variable
4348 are we in a sort block?
4349 croak if my($a); $a <=> $b
4350 build ops for access to a my() variable
4351 if in a dq string, and they've said @foo and we can't find @foo
4352 croak
4353 build ops for a bareword
4354 if we already built the token before, use it.
4355*/
4356
20141f0e 4357
dba4d153
JH
4358#ifdef __SC__
4359#pragma segment Perl_yylex
4360#endif
dba4d153 4361int
dba4d153 4362Perl_yylex(pTHX)
20141f0e 4363{
97aff369 4364 dVAR;
3afc138a 4365 register char *s = PL_bufptr;
378cc40b 4366 register char *d;
463ee0b2 4367 STRLEN len;
aa7440fb 4368 bool bof = FALSE;
580561a3 4369 U32 fake_eof = 0;
a687059c 4370
10edeb5d
JH
4371 /* orig_keyword, gvp, and gv are initialized here because
4372 * jump to the label just_a_word_zero can bypass their
4373 * initialization later. */
4374 I32 orig_keyword = 0;
4375 GV *gv = NULL;
4376 GV **gvp = NULL;
4377
bbf60fe6 4378 DEBUG_T( {
396482e1 4379 SV* tmp = newSVpvs("");
b6007c36
DM
4380 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4381 (IV)CopLINE(PL_curcop),
4382 lex_state_names[PL_lex_state],
4383 exp_name[PL_expect],
4384 pv_display(tmp, s, strlen(s), 0, 60));
4385 SvREFCNT_dec(tmp);
bbf60fe6 4386 } );
02aa26ce 4387 /* check if there's an identifier for us to look at */
28ac2b49 4388 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4389 return REPORT(S_pending_ident(aTHX));
bbce6d69 4390
02aa26ce
NT
4391 /* no identifier pending identification */
4392
3280af22 4393 switch (PL_lex_state) {
79072805
LW
4394#ifdef COMMENTARY
4395 case LEX_NORMAL: /* Some compilers will produce faster */
4396 case LEX_INTERPNORMAL: /* code if we comment these out. */
4397 break;
4398#endif
4399
09bef843 4400 /* when we've already built the next token, just pull it out of the queue */
79072805 4401 case LEX_KNOWNEXT:
5db06880
NC
4402#ifdef PERL_MAD
4403 PL_lasttoke--;
6154021b 4404 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4405 if (PL_madskills) {
cd81e915 4406 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4407 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4408 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4409 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4410 PL_thismad->mad_val = 0;
4411 mad_free(PL_thismad);
4412 PL_thismad = 0;
5db06880
NC
4413 }
4414 }
4415 if (!PL_lasttoke) {
4416 PL_lex_state = PL_lex_defer;
4417 PL_expect = PL_lex_expect;
4418 PL_lex_defer = LEX_NORMAL;
4419 if (!PL_nexttoke[PL_lasttoke].next_type)
4420 return yylex();
4421 }
4422#else
3280af22 4423 PL_nexttoke--;
6154021b 4424 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4425 if (!PL_nexttoke) {
4426 PL_lex_state = PL_lex_defer;
4427 PL_expect = PL_lex_expect;
4428 PL_lex_defer = LEX_NORMAL;
463ee0b2 4429 }
5db06880 4430#endif
a7aaec61
Z
4431 {
4432 I32 next_type;
5db06880 4433#ifdef PERL_MAD
a7aaec61 4434 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4435#else
a7aaec61 4436 next_type = PL_nexttype[PL_nexttoke];
5db06880 4437#endif
78cdf107
Z
4438 if (next_type & (7<<24)) {
4439 if (next_type & (1<<24)) {
4440 if (PL_lex_brackets > 100)
4441 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4442 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4443 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4444 }
4445 if (next_type & (2<<24))
4446 PL_lex_allbrackets++;
4447 if (next_type & (4<<24))
4448 PL_lex_allbrackets--;
a7aaec61
Z
4449 next_type &= 0xffff;
4450 }
4451#ifdef PERL_MAD
4452 /* FIXME - can these be merged? */
4453 return next_type;
4454#else
4455 return REPORT(next_type);
4456#endif
4457 }
79072805 4458
02aa26ce 4459 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4460 when we get here, PL_bufptr is at the \
02aa26ce 4461 */
79072805
LW
4462 case LEX_INTERPCASEMOD:
4463#ifdef DEBUGGING
3280af22 4464 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4465 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4466#endif
02aa26ce 4467 /* handle \E or end of string */
3280af22 4468 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4469 /* if at a \E */
3280af22 4470 if (PL_lex_casemods) {
f54cb97a 4471 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4472 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4473
3792a11b
NC
4474 if (PL_bufptr != PL_bufend
4475 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4476 PL_bufptr += 2;
4477 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4478#ifdef PERL_MAD
4479 if (PL_madskills)
6b29d1f5 4480 PL_thistoken = newSVpvs("\\E");
5db06880 4481#endif
a0d0e21e 4482 }
78cdf107 4483 PL_lex_allbrackets--;
bbf60fe6 4484 return REPORT(')');
79072805 4485 }
5db06880
NC
4486#ifdef PERL_MAD
4487 while (PL_bufptr != PL_bufend &&
4488 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4489 if (!PL_thiswhite)
6b29d1f5 4490 PL_thiswhite = newSVpvs("");
cd81e915 4491 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4492 PL_bufptr += 2;
4493 }
4494#else
3280af22
NIS
4495 if (PL_bufptr != PL_bufend)
4496 PL_bufptr += 2;
5db06880 4497#endif
3280af22 4498 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4499 return yylex();
79072805
LW
4500 }
4501 else {
607df283 4502 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4503 "### Saw case modifier\n"); });
3280af22 4504 s = PL_bufptr + 1;
6e909404 4505 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4506#ifdef PERL_MAD
cd81e915 4507 if (!PL_thiswhite)
6b29d1f5 4508 PL_thiswhite = newSVpvs("");
cd81e915 4509 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4510#endif
89122651 4511 PL_bufptr = s + 3;
6e909404
JH
4512 PL_lex_state = LEX_INTERPCONCAT;
4513 return yylex();
a0d0e21e 4514 }
6e909404 4515 else {
90771dc0 4516 I32 tmp;
5db06880
NC
4517 if (!PL_madskills) /* when just compiling don't need correct */
4518 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4519 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4520 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4521 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4522 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4523 PL_lex_allbrackets--;
bbf60fe6 4524 return REPORT(')');
6e909404
JH
4525 }
4526 if (PL_lex_casemods > 10)
4527 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4528 PL_lex_casestack[PL_lex_casemods++] = *s;
4529 PL_lex_casestack[PL_lex_casemods] = '\0';
4530 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4531 start_force(PL_curforce);
9ded7720 4532 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4533 force_next((2<<24)|'(');
cd81e915 4534 start_force(PL_curforce);
6e909404 4535 if (*s == 'l')
9ded7720 4536 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4537 else if (*s == 'u')
9ded7720 4538 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4539 else if (*s == 'L')
9ded7720 4540 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4541 else if (*s == 'U')
9ded7720 4542 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4543 else if (*s == 'Q')
9ded7720 4544 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4545 else
4546 Perl_croak(aTHX_ "panic: yylex");
5db06880 4547 if (PL_madskills) {
a5849ce5
NC
4548 SV* const tmpsv = newSVpvs("\\ ");
4549 /* replace the space with the character we want to escape
4550 */
4551 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4552 curmad('_', tmpsv);
4553 }
6e909404 4554 PL_bufptr = s + 1;
a0d0e21e 4555 }
79072805 4556 force_next(FUNC);
3280af22
NIS
4557 if (PL_lex_starts) {
4558 s = PL_bufptr;
4559 PL_lex_starts = 0;
5db06880
NC
4560#ifdef PERL_MAD
4561 if (PL_madskills) {
cd81e915
NC
4562 if (PL_thistoken)
4563 sv_free(PL_thistoken);
6b29d1f5 4564 PL_thistoken = newSVpvs("");
5db06880
NC
4565 }
4566#endif
131b3ad0
DM
4567 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4568 if (PL_lex_casemods == 1 && PL_lex_inpat)
4569 OPERATOR(',');
4570 else
4571 Aop(OP_CONCAT);
79072805
LW
4572 }
4573 else
cea2e8a9 4574 return yylex();
79072805
LW
4575 }
4576
55497cff 4577 case LEX_INTERPPUSH:
bbf60fe6 4578 return REPORT(sublex_push());
55497cff 4579
79072805 4580 case LEX_INTERPSTART:
3280af22 4581 if (PL_bufptr == PL_bufend)
bbf60fe6 4582 return REPORT(sublex_done());
607df283 4583 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4584 "### Interpolated variable\n"); });
3280af22
NIS
4585 PL_expect = XTERM;
4586 PL_lex_dojoin = (*PL_bufptr == '@');
4587 PL_lex_state = LEX_INTERPNORMAL;
4588 if (PL_lex_dojoin) {
cd81e915 4589 start_force(PL_curforce);
9ded7720 4590 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4591 force_next(',');
cd81e915 4592 start_force(PL_curforce);
a0d0e21e 4593 force_ident("\"", '$');
cd81e915 4594 start_force(PL_curforce);
9ded7720 4595 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4596 force_next('$');
cd81e915 4597 start_force(PL_curforce);
9ded7720 4598 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4599 force_next((2<<24)|'(');
cd81e915 4600 start_force(PL_curforce);
9ded7720 4601 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4602 force_next(FUNC);
4603 }
3280af22
NIS
4604 if (PL_lex_starts++) {
4605 s = PL_bufptr;
5db06880
NC
4606#ifdef PERL_MAD
4607 if (PL_madskills) {
cd81e915
NC
4608 if (PL_thistoken)
4609 sv_free(PL_thistoken);
6b29d1f5 4610 PL_thistoken = newSVpvs("");
5db06880
NC
4611 }
4612#endif
131b3ad0
DM
4613 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4614 if (!PL_lex_casemods && PL_lex_inpat)
4615 OPERATOR(',');
4616 else
4617 Aop(OP_CONCAT);
79072805 4618 }
cea2e8a9 4619 return yylex();
79072805
LW
4620
4621 case LEX_INTERPENDMAYBE:
3280af22
NIS
4622 if (intuit_more(PL_bufptr)) {
4623 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4624 break;
4625 }
4626 /* FALL THROUGH */
4627
4628 case LEX_INTERPEND:
3280af22
NIS
4629 if (PL_lex_dojoin) {
4630 PL_lex_dojoin = FALSE;
4631 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4632#ifdef PERL_MAD
4633 if (PL_madskills) {
cd81e915
NC
4634 if (PL_thistoken)
4635 sv_free(PL_thistoken);
6b29d1f5 4636 PL_thistoken = newSVpvs("");
5db06880
NC
4637 }
4638#endif
78cdf107 4639 PL_lex_allbrackets--;
bbf60fe6 4640 return REPORT(')');
79072805 4641 }
43a16006 4642 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4643 && SvEVALED(PL_lex_repl))
43a16006 4644 {
e9fa98b2 4645 if (PL_bufptr != PL_bufend)
cea2e8a9 4646 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4647 PL_lex_repl = NULL;
e9fa98b2 4648 }
79072805
LW
4649 /* FALLTHROUGH */
4650 case LEX_INTERPCONCAT:
4651#ifdef DEBUGGING
3280af22 4652 if (PL_lex_brackets)
cea2e8a9 4653 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4654#endif
3280af22 4655 if (PL_bufptr == PL_bufend)
bbf60fe6 4656 return REPORT(sublex_done());
79072805 4657
3280af22
NIS
4658 if (SvIVX(PL_linestr) == '\'') {
4659 SV *sv = newSVsv(PL_linestr);
4660 if (!PL_lex_inpat)
76e3520e 4661 sv = tokeq(sv);
3280af22 4662 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4663 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4664 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4665 s = PL_bufend;
79072805
LW
4666 }
4667 else {
3280af22 4668 s = scan_const(PL_bufptr);
79072805 4669 if (*s == '\\')
3280af22 4670 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4671 else
3280af22 4672 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4673 }
4674
3280af22 4675 if (s != PL_bufptr) {
cd81e915 4676 start_force(PL_curforce);
5db06880
NC
4677 if (PL_madskills) {
4678 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4679 }
6154021b 4680 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4681 PL_expect = XTERM;
79072805 4682 force_next(THING);
131b3ad0 4683 if (PL_lex_starts++) {
5db06880
NC
4684#ifdef PERL_MAD
4685 if (PL_madskills) {
cd81e915
NC
4686 if (PL_thistoken)
4687 sv_free(PL_thistoken);
6b29d1f5 4688 PL_thistoken = newSVpvs("");
5db06880
NC
4689 }
4690#endif
131b3ad0
DM
4691 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4692 if (!PL_lex_casemods && PL_lex_inpat)
4693 OPERATOR(',');
4694 else
4695 Aop(OP_CONCAT);
4696 }
79072805 4697 else {
3280af22 4698 PL_bufptr = s;
cea2e8a9 4699 return yylex();
79072805
LW
4700 }
4701 }
4702
cea2e8a9 4703 return yylex();
a0d0e21e 4704 case LEX_FORMLINE:
3280af22
NIS
4705 PL_lex_state = LEX_NORMAL;
4706 s = scan_formline(PL_bufptr);
4707 if (!PL_lex_formbrack)
a0d0e21e
LW
4708 goto rightbracket;
4709 OPERATOR(';');
79072805
LW
4710 }
4711
3280af22
NIS
4712 s = PL_bufptr;
4713 PL_oldoldbufptr = PL_oldbufptr;
4714 PL_oldbufptr = s;
463ee0b2
LW
4715
4716 retry:
5db06880 4717#ifdef PERL_MAD
cd81e915
NC
4718 if (PL_thistoken) {
4719 sv_free(PL_thistoken);
4720 PL_thistoken = 0;
5db06880 4721 }
cd81e915 4722 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4723#endif
378cc40b
LW
4724 switch (*s) {
4725 default:
7e2040f0 4726 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4727 goto keylookup;
b1fc3636
CJ
4728 {
4729 unsigned char c = *s;
4730 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4731 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4732 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4733 } else {
4734 d = PL_linestart;
4735 }
4736 *s = '\0';
4737 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4738 }
e929a76b
LW
4739 case 4:
4740 case 26:
4741 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4742 case 0:
5db06880
NC
4743#ifdef PERL_MAD
4744 if (PL_madskills)
cd81e915 4745 PL_faketokens = 0;
5db06880 4746#endif
60d63348 4747 if (!PL_rsfp && (!PL_parser->filtered || s+1 < PL_bufend)) {
3280af22
NIS
4748 PL_last_uni = 0;
4749 PL_last_lop = 0;
a7aaec61
Z
4750 if (PL_lex_brackets &&
4751 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4752 yyerror((const char *)
4753 (PL_lex_formbrack
4754 ? "Format not terminated"
4755 : "Missing right curly or square bracket"));
c5ee2135 4756 }
4e553d73 4757 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4758 "### Tokener got EOF\n");
5f80b19c 4759 } );
79072805 4760 TOKEN(0);
463ee0b2 4761 }
3280af22 4762 if (s++ < PL_bufend)
a687059c 4763 goto retry; /* ignore stray nulls */
3280af22
NIS
4764 PL_last_uni = 0;
4765 PL_last_lop = 0;
4766 if (!PL_in_eval && !PL_preambled) {
4767 PL_preambled = TRUE;
5db06880
NC
4768#ifdef PERL_MAD
4769 if (PL_madskills)
cd81e915 4770 PL_faketokens = 1;
5db06880 4771#endif
5ab7ff98
NC
4772 if (PL_perldb) {
4773 /* Generate a string of Perl code to load the debugger.
4774 * If PERL5DB is set, it will return the contents of that,
4775 * otherwise a compile-time require of perl5db.pl. */
4776
4777 const char * const pdb = PerlEnv_getenv("PERL5DB");
4778
4779 if (pdb) {
4780 sv_setpv(PL_linestr, pdb);
4781 sv_catpvs(PL_linestr,";");
4782 } else {
4783 SETERRNO(0,SS_NORMAL);
4784 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4785 }
4786 } else
4787 sv_setpvs(PL_linestr,"");
c62eb204
NC
4788 if (PL_preambleav) {
4789 SV **svp = AvARRAY(PL_preambleav);
4790 SV **const end = svp + AvFILLp(PL_preambleav);
4791 while(svp <= end) {
4792 sv_catsv(PL_linestr, *svp);
4793 ++svp;
396482e1 4794 sv_catpvs(PL_linestr, ";");
91b7def8 4795 }
daba3364 4796 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4797 PL_preambleav = NULL;
91b7def8 4798 }
9f639728
FR
4799 if (PL_minus_E)
4800 sv_catpvs(PL_linestr,
4801 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4802 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4803 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4804 if (PL_minus_l)
396482e1 4805 sv_catpvs(PL_linestr,"chomp;");
3280af22 4806 if (PL_minus_a) {
3280af22 4807 if (PL_minus_F) {
3792a11b
NC
4808 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4809 || *PL_splitstr == '"')
3280af22 4810 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4811 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4812 else {
c8ef6a4b
NC
4813 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4814 bytes can be used as quoting characters. :-) */
dd374669 4815 const char *splits = PL_splitstr;
91d456ae 4816 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4817 do {
4818 /* Need to \ \s */
dd374669
AL
4819 if (*splits == '\\')
4820 sv_catpvn(PL_linestr, splits, 1);
4821 sv_catpvn(PL_linestr, splits, 1);
4822 } while (*splits++);
48c4c863
NC
4823 /* This loop will embed the trailing NUL of
4824 PL_linestr as the last thing it does before
4825 terminating. */
396482e1 4826 sv_catpvs(PL_linestr, ");");
54310121 4827 }
2304df62
AD
4828 }
4829 else
396482e1 4830 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4831 }
79072805 4832 }
396482e1 4833 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4834 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4835 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4836 PL_last_lop = PL_last_uni = NULL;
65269a95 4837 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4838 update_debugger_info(PL_linestr, NULL, 0);
79072805 4839 goto retry;
a687059c 4840 }
e929a76b 4841 do {
580561a3
Z
4842 fake_eof = 0;
4843 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4844 if (0) {
7e28d3af 4845 fake_eof:
f0e67a1d
Z
4846 fake_eof = LEX_FAKE_EOF;
4847 }
4848 PL_bufptr = PL_bufend;
17cc9359 4849 CopLINE_inc(PL_curcop);
f0e67a1d 4850 if (!lex_next_chunk(fake_eof)) {
17cc9359 4851 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4852 s = PL_bufptr;
4853 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4854 }
17cc9359 4855 CopLINE_dec(PL_curcop);
5db06880 4856#ifdef PERL_MAD
f0e67a1d 4857 if (!PL_rsfp)
cd81e915 4858 PL_realtokenstart = -1;
5db06880 4859#endif
f0e67a1d 4860 s = PL_bufptr;
7aa207d6
JH
4861 /* If it looks like the start of a BOM or raw UTF-16,
4862 * check if it in fact is. */
580561a3 4863 if (bof && PL_rsfp &&
7aa207d6
JH
4864 (*s == 0 ||
4865 *(U8*)s == 0xEF ||
4866 *(U8*)s >= 0xFE ||
4867 s[1] == 0)) {
879bc93b
DM
4868 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4869 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4870#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4871 /* offset may include swallowed CR */
4872 if (!bof)
879bc93b 4873 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4874#endif
7e28d3af 4875 if (bof) {
3280af22 4876 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4877 s = swallow_bom((U8*)s);
e929a76b 4878 }
378cc40b 4879 }
737c24fc 4880 if (PL_parser->in_pod) {
a0d0e21e 4881 /* Incest with pod. */
5db06880
NC
4882#ifdef PERL_MAD
4883 if (PL_madskills)
cd81e915 4884 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4885#endif
01a57ef7 4886 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4887 sv_setpvs(PL_linestr, "");
3280af22
NIS
4888 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4889 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4890 PL_last_lop = PL_last_uni = NULL;
737c24fc 4891 PL_parser->in_pod = 0;
a0d0e21e 4892 }
4e553d73 4893 }
60d63348 4894 if (PL_rsfp || PL_parser->filtered)
85613cab 4895 incline(s);
737c24fc 4896 } while (PL_parser->in_pod);
3280af22 4897 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4898 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4899 PL_last_lop = PL_last_uni = NULL;
57843af0 4900 if (CopLINE(PL_curcop) == 1) {
3280af22 4901 while (s < PL_bufend && isSPACE(*s))
79072805 4902 s++;
a0d0e21e 4903 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4904 s++;
5db06880
NC
4905#ifdef PERL_MAD
4906 if (PL_madskills)
cd81e915 4907 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4908#endif
bd61b366 4909 d = NULL;
3280af22 4910 if (!PL_in_eval) {
44a8e56a 4911 if (*s == '#' && *(s+1) == '!')
4912 d = s + 2;
4913#ifdef ALTERNATE_SHEBANG
4914 else {
bfed75c6 4915 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4916 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4917 d = s + (sizeof(as) - 1);
4918 }
4919#endif /* ALTERNATE_SHEBANG */
4920 }
4921 if (d) {
b8378b72 4922 char *ipath;
774d564b 4923 char *ipathend;
b8378b72 4924
774d564b 4925 while (isSPACE(*d))
b8378b72
CS
4926 d++;
4927 ipath = d;
774d564b 4928 while (*d && !isSPACE(*d))
4929 d++;
4930 ipathend = d;
4931
4932#ifdef ARG_ZERO_IS_SCRIPT
4933 if (ipathend > ipath) {
4934 /*
4935 * HP-UX (at least) sets argv[0] to the script name,
4936 * which makes $^X incorrect. And Digital UNIX and Linux,
4937 * at least, set argv[0] to the basename of the Perl
4938 * interpreter. So, having found "#!", we'll set it right.
4939 */
fafc274c
NC
4940 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4941 SVt_PV)); /* $^X */
774d564b 4942 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4943 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4944 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4945 SvSETMAGIC(x);
4946 }
556c1dec
JH
4947 else {
4948 STRLEN blen;
4949 STRLEN llen;
cfd0369c 4950 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4951 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4952 if (llen < blen) {
4953 bstart += blen - llen;
4954 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4955 sv_setpvn(x, ipath, ipathend - ipath);
4956 SvSETMAGIC(x);
4957 }
4958 }
4959 }
774d564b 4960 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4961 }
774d564b 4962#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4963
4964 /*
4965 * Look for options.
4966 */
748a9306 4967 d = instr(s,"perl -");
84e30d1a 4968 if (!d) {
748a9306 4969 d = instr(s,"perl");
84e30d1a
GS
4970#if defined(DOSISH)
4971 /* avoid getting into infinite loops when shebang
4972 * line contains "Perl" rather than "perl" */
4973 if (!d) {
4974 for (d = ipathend-4; d >= ipath; --d) {
4975 if ((*d == 'p' || *d == 'P')
4976 && !ibcmp(d, "perl", 4))
4977 {
4978 break;
4979 }
4980 }
4981 if (d < ipath)
bd61b366 4982 d = NULL;
84e30d1a
GS
4983 }
4984#endif
4985 }
44a8e56a 4986#ifdef ALTERNATE_SHEBANG
4987 /*
4988 * If the ALTERNATE_SHEBANG on this system starts with a
4989 * character that can be part of a Perl expression, then if
4990 * we see it but not "perl", we're probably looking at the
4991 * start of Perl code, not a request to hand off to some
4992 * other interpreter. Similarly, if "perl" is there, but
4993 * not in the first 'word' of the line, we assume the line
4994 * contains the start of the Perl program.
44a8e56a 4995 */
4996 if (d && *s != '#') {
f54cb97a 4997 const char *c = ipath;
44a8e56a 4998 while (*c && !strchr("; \t\r\n\f\v#", *c))
4999 c++;
5000 if (c < d)
bd61b366 5001 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 5002 else
5003 *s = '#'; /* Don't try to parse shebang line */
5004 }
774d564b 5005#endif /* ALTERNATE_SHEBANG */
748a9306 5006 if (!d &&
44a8e56a 5007 *s == '#' &&
774d564b 5008 ipathend > ipath &&
3280af22 5009 !PL_minus_c &&
748a9306 5010 !instr(s,"indir") &&
3280af22 5011 instr(PL_origargv[0],"perl"))
748a9306 5012 {
27da23d5 5013 dVAR;
9f68db38 5014 char **newargv;
9f68db38 5015
774d564b 5016 *ipathend = '\0';
5017 s = ipathend + 1;
3280af22 5018 while (s < PL_bufend && isSPACE(*s))
9f68db38 5019 s++;
3280af22 5020 if (s < PL_bufend) {
d85f917e 5021 Newx(newargv,PL_origargc+3,char*);
9f68db38 5022 newargv[1] = s;
3280af22 5023 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
5024 s++;
5025 *s = '\0';
3280af22 5026 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
5027 }
5028 else
3280af22 5029 newargv = PL_origargv;
774d564b 5030 newargv[0] = ipath;
b35112e7 5031 PERL_FPU_PRE_EXEC
b4748376 5032 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 5033 PERL_FPU_POST_EXEC
cea2e8a9 5034 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 5035 }
748a9306 5036 if (d) {
c35e046a
AL
5037 while (*d && !isSPACE(*d))
5038 d++;
5039 while (SPACE_OR_TAB(*d))
5040 d++;
748a9306
LW
5041
5042 if (*d++ == '-') {
f54cb97a 5043 const bool switches_done = PL_doswitches;
fb993905
GA
5044 const U32 oldpdb = PL_perldb;
5045 const bool oldn = PL_minus_n;
5046 const bool oldp = PL_minus_p;
c7030b81 5047 const char *d1 = d;
fb993905 5048
8cc95fdb 5049 do {
4ba71d51
FC
5050 bool baduni = FALSE;
5051 if (*d1 == 'C') {
bd0ab00d
NC
5052 const char *d2 = d1 + 1;
5053 if (parse_unicode_opts((const char **)&d2)
5054 != PL_unicode)
5055 baduni = TRUE;
4ba71d51
FC
5056 }
5057 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
5058 const char * const m = d1;
5059 while (*d1 && !isSPACE(*d1))
5060 d1++;
cea2e8a9 5061 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 5062 (int)(d1 - m), m);
8cc95fdb 5063 }
c7030b81
NC
5064 d1 = moreswitches(d1);
5065 } while (d1);
f0b2cf55
YST
5066 if (PL_doswitches && !switches_done) {
5067 int argc = PL_origargc;
5068 char **argv = PL_origargv;
5069 do {
5070 argc--,argv++;
5071 } while (argc && argv[0][0] == '-' && argv[0][1]);
5072 init_argv_symbols(argc,argv);
5073 }
65269a95 5074 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5075 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5076 /* if we have already added "LINE: while (<>) {",
5077 we must not do it again */
748a9306 5078 {
76f68e9b 5079 sv_setpvs(PL_linestr, "");
3280af22
NIS
5080 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5081 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5082 PL_last_lop = PL_last_uni = NULL;
3280af22 5083 PL_preambled = FALSE;
65269a95 5084 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5085 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5086 goto retry;
5087 }
a0d0e21e 5088 }
79072805 5089 }
9f68db38 5090 }
79072805 5091 }
3280af22
NIS
5092 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5093 PL_bufptr = s;
5094 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5095 return yylex();
ae986130 5096 }
378cc40b 5097 goto retry;
4fdae800 5098 case '\r':
6a27c188 5099#ifdef PERL_STRICT_CR
cea2e8a9 5100 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5101 Perl_croak(aTHX_
cc507455 5102 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5103#endif
4fdae800 5104 case ' ': case '\t': case '\f': case 013:
5db06880 5105#ifdef PERL_MAD
cd81e915 5106 PL_realtokenstart = -1;
ac372eb8
RD
5107 if (!PL_thiswhite)
5108 PL_thiswhite = newSVpvs("");
5109 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5110#endif
ac372eb8 5111 s++;
378cc40b 5112 goto retry;
378cc40b 5113 case '#':
e929a76b 5114 case '\n':
5db06880 5115#ifdef PERL_MAD
cd81e915 5116 PL_realtokenstart = -1;
5db06880 5117 if (PL_madskills)
cd81e915 5118 PL_faketokens = 0;
5db06880 5119#endif
60d63348
FC
5120 if (PL_lex_state != LEX_NORMAL ||
5121 (PL_in_eval && !PL_rsfp && !PL_parser->filtered)) {
5122 if (*s == '#' && s == PL_linestart && PL_in_eval
5123 && !PL_rsfp && !PL_parser->filtered) {
df0deb90
GS
5124 /* handle eval qq[#line 1 "foo"\n ...] */
5125 CopLINE_dec(PL_curcop);
5126 incline(s);
5127 }
5db06880
NC
5128 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5129 s = SKIPSPACE0(s);
60d63348 5130 if (!PL_in_eval || PL_rsfp || PL_parser->filtered)
5db06880
NC
5131 incline(s);
5132 }
5133 else {
5134 d = s;
5135 while (d < PL_bufend && *d != '\n')
5136 d++;
5137 if (d < PL_bufend)
5138 d++;
5139 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5140 Perl_croak(aTHX_ "panic: input overflow");
5141#ifdef PERL_MAD
5142 if (PL_madskills)
cd81e915 5143 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5144#endif
5145 s = d;
5146 incline(s);
5147 }
3280af22
NIS
5148 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5149 PL_bufptr = s;
5150 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5151 return yylex();
a687059c 5152 }
378cc40b 5153 }
a687059c 5154 else {
5db06880
NC
5155#ifdef PERL_MAD
5156 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5157 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5158 PL_faketokens = 0;
5db06880
NC
5159 s = SKIPSPACE0(s);
5160 TOKEN(PEG); /* make sure any #! line is accessible */
5161 }
5162 s = SKIPSPACE0(s);
5163 }
5164 else {
5165/* if (PL_madskills && PL_lex_formbrack) { */
5166 d = s;
5167 while (d < PL_bufend && *d != '\n')
5168 d++;
5169 if (d < PL_bufend)
5170 d++;
5171 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5172 Perl_croak(aTHX_ "panic: input overflow");
5173 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5174 if (!PL_thiswhite)
6b29d1f5 5175 PL_thiswhite = newSVpvs("");
5db06880 5176 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5177 sv_setpvs(PL_thiswhite, "");
cd81e915 5178 PL_faketokens = 0;
5db06880 5179 }
cd81e915 5180 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5181 }
5182 s = d;
5183/* }
5184 *s = '\0';
5185 PL_bufend = s; */
5186 }
5187#else
378cc40b 5188 *s = '\0';
3280af22 5189 PL_bufend = s;
5db06880 5190#endif
a687059c 5191 }
378cc40b
LW
5192 goto retry;
5193 case '-':
79072805 5194 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5195 I32 ftst = 0;
90771dc0 5196 char tmp;
e5edeb50 5197
378cc40b 5198 s++;
3280af22 5199 PL_bufptr = s;
748a9306
LW
5200 tmp = *s++;
5201
bf4acbe4 5202 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5203 s++;
5204
5205 if (strnEQ(s,"=>",2)) {
3280af22 5206 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5207 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5208 OPERATOR('-'); /* unary minus */
5209 }
3280af22 5210 PL_last_uni = PL_oldbufptr;
748a9306 5211 switch (tmp) {
e5edeb50
JH
5212 case 'r': ftst = OP_FTEREAD; break;
5213 case 'w': ftst = OP_FTEWRITE; break;
5214 case 'x': ftst = OP_FTEEXEC; break;
5215 case 'o': ftst = OP_FTEOWNED; break;
5216 case 'R': ftst = OP_FTRREAD; break;
5217 case 'W': ftst = OP_FTRWRITE; break;
5218 case 'X': ftst = OP_FTREXEC; break;
5219 case 'O': ftst = OP_FTROWNED; break;
5220 case 'e': ftst = OP_FTIS; break;
5221 case 'z': ftst = OP_FTZERO; break;
5222 case 's': ftst = OP_FTSIZE; break;
5223 case 'f': ftst = OP_FTFILE; break;
5224 case 'd': ftst = OP_FTDIR; break;
5225 case 'l': ftst = OP_FTLINK; break;
5226 case 'p': ftst = OP_FTPIPE; break;
5227 case 'S': ftst = OP_FTSOCK; break;
5228 case 'u': ftst = OP_FTSUID; break;
5229 case 'g': ftst = OP_FTSGID; break;
5230 case 'k': ftst = OP_FTSVTX; break;
5231 case 'b': ftst = OP_FTBLK; break;
5232 case 'c': ftst = OP_FTCHR; break;
5233 case 't': ftst = OP_FTTTY; break;
5234 case 'T': ftst = OP_FTTEXT; break;
5235 case 'B': ftst = OP_FTBINARY; break;
5236 case 'M': case 'A': case 'C':
fafc274c 5237 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5238 switch (tmp) {
5239 case 'M': ftst = OP_FTMTIME; break;
5240 case 'A': ftst = OP_FTATIME; break;
5241 case 'C': ftst = OP_FTCTIME; break;
5242 default: break;
5243 }
5244 break;
378cc40b 5245 default:
378cc40b
LW
5246 break;
5247 }
e5edeb50 5248 if (ftst) {
eb160463 5249 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5250 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5251 "### Saw file test %c\n", (int)tmp);
5f80b19c 5252 } );
e5edeb50
JH
5253 FTST(ftst);
5254 }
5255 else {
5256 /* Assume it was a minus followed by a one-letter named
5257 * subroutine call (or a -bareword), then. */
95c31fe3 5258 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5259 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5260 (int) tmp);
5f80b19c 5261 } );
3cf7b4c4 5262 s = --PL_bufptr;
e5edeb50 5263 }
378cc40b 5264 }
90771dc0
NC
5265 {
5266 const char tmp = *s++;
5267 if (*s == tmp) {
5268 s++;
5269 if (PL_expect == XOPERATOR)
5270 TERM(POSTDEC);
5271 else
5272 OPERATOR(PREDEC);
5273 }
5274 else if (*s == '>') {
5275 s++;
29595ff2 5276 s = SKIPSPACE1(s);
90771dc0
NC
5277 if (isIDFIRST_lazy_if(s,UTF)) {
5278 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5279 TOKEN(ARROW);
5280 }
5281 else if (*s == '$')
5282 OPERATOR(ARROW);
5283 else
5284 TERM(ARROW);
5285 }
78cdf107
Z
5286 if (PL_expect == XOPERATOR) {
5287 if (*s == '=' && !PL_lex_allbrackets &&
5288 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5289 s--;
5290 TOKEN(0);
5291 }
90771dc0 5292 Aop(OP_SUBTRACT);
78cdf107 5293 }
90771dc0
NC
5294 else {
5295 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5296 check_uni();
5297 OPERATOR('-'); /* unary minus */
79072805 5298 }
2f3197b3 5299 }
79072805 5300
378cc40b 5301 case '+':
90771dc0
NC
5302 {
5303 const char tmp = *s++;
5304 if (*s == tmp) {
5305 s++;
5306 if (PL_expect == XOPERATOR)
5307 TERM(POSTINC);
5308 else
5309 OPERATOR(PREINC);
5310 }
78cdf107
Z
5311 if (PL_expect == XOPERATOR) {
5312 if (*s == '=' && !PL_lex_allbrackets &&
5313 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5314 s--;
5315 TOKEN(0);
5316 }
90771dc0 5317 Aop(OP_ADD);
78cdf107 5318 }
90771dc0
NC
5319 else {
5320 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5321 check_uni();
5322 OPERATOR('+');
5323 }
2f3197b3 5324 }
a687059c 5325
378cc40b 5326 case '*':
3280af22
NIS
5327 if (PL_expect != XOPERATOR) {
5328 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5329 PL_expect = XOPERATOR;
5330 force_ident(PL_tokenbuf, '*');
5331 if (!*PL_tokenbuf)
a0d0e21e 5332 PREREF('*');
79072805 5333 TERM('*');
a687059c 5334 }
79072805
LW
5335 s++;
5336 if (*s == '*') {
a687059c 5337 s++;
78cdf107
Z
5338 if (*s == '=' && !PL_lex_allbrackets &&
5339 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5340 s -= 2;
5341 TOKEN(0);
5342 }
79072805 5343 PWop(OP_POW);
a687059c 5344 }
78cdf107
Z
5345 if (*s == '=' && !PL_lex_allbrackets &&
5346 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5347 s--;
5348 TOKEN(0);
5349 }
79072805
LW
5350 Mop(OP_MULTIPLY);
5351
378cc40b 5352 case '%':
3280af22 5353 if (PL_expect == XOPERATOR) {
78cdf107
Z
5354 if (s[1] == '=' && !PL_lex_allbrackets &&
5355 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5356 TOKEN(0);
bbce6d69 5357 ++s;
5358 Mop(OP_MODULO);
a687059c 5359 }
3280af22 5360 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5361 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5362 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5363 if (!PL_tokenbuf[1]) {
bbce6d69 5364 PREREF('%');
a687059c 5365 }
3280af22 5366 PL_pending_ident = '%';
bbce6d69 5367 TERM('%');
a687059c 5368
378cc40b 5369 case '^':
78cdf107
Z
5370 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5371 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5372 TOKEN(0);
79072805 5373 s++;
a0d0e21e 5374 BOop(OP_BIT_XOR);
79072805 5375 case '[':
a7aaec61
Z
5376 if (PL_lex_brackets > 100)
5377 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5378 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5379 PL_lex_allbrackets++;
df3467db
IG
5380 {
5381 const char tmp = *s++;
5382 OPERATOR(tmp);
5383 }
378cc40b 5384 case '~':
0d863452 5385 if (s[1] == '~'
3e7dd34d 5386 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5387 {
78cdf107
Z
5388 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5389 TOKEN(0);
0d863452
RH
5390 s += 2;
5391 Eop(OP_SMARTMATCH);
5392 }
78cdf107
Z
5393 s++;
5394 OPERATOR('~');
378cc40b 5395 case ',':
78cdf107
Z
5396 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5397 TOKEN(0);
5398 s++;
5399 OPERATOR(',');
a0d0e21e
LW
5400 case ':':
5401 if (s[1] == ':') {
5402 len = 0;
0bfa2a8a 5403 goto just_a_word_zero_gv;
a0d0e21e
LW
5404 }
5405 s++;
09bef843
SB
5406 switch (PL_expect) {
5407 OP *attrs;
5db06880
NC
5408#ifdef PERL_MAD
5409 I32 stuffstart;
5410#endif
09bef843
SB
5411 case XOPERATOR:
5412 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5413 break;
5414 PL_bufptr = s; /* update in case we back off */
d83f38d8 5415 if (*s == '=') {
2dc78664
NC
5416 Perl_croak(aTHX_
5417 "Use of := for an empty attribute list is not allowed");
d83f38d8 5418 }
09bef843
SB
5419 goto grabattrs;
5420 case XATTRBLOCK:
5421 PL_expect = XBLOCK;
5422 goto grabattrs;
5423 case XATTRTERM:
5424 PL_expect = XTERMBLOCK;
5425 grabattrs:
5db06880
NC
5426#ifdef PERL_MAD
5427 stuffstart = s - SvPVX(PL_linestr) - 1;
5428#endif
29595ff2 5429 s = PEEKSPACE(s);
5f66b61c 5430 attrs = NULL;
7e2040f0 5431 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5432 I32 tmp;
5cc237b8 5433 SV *sv;
09bef843 5434 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5435 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5436 if (tmp < 0) tmp = -tmp;
5437 switch (tmp) {
5438 case KEY_or:
5439 case KEY_and:
5440 case KEY_for:
11baf631 5441 case KEY_foreach:
f9829d6b
GS
5442 case KEY_unless:
5443 case KEY_if:
5444 case KEY_while:
5445 case KEY_until:
5446 goto got_attrs;
5447 default:
5448 break;
5449 }
5450 }
89a5757c 5451 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843
SB
5452 if (*d == '(') {
5453 d = scan_str(d,TRUE,TRUE);
5454 if (!d) {
09bef843
SB
5455 /* MUST advance bufptr here to avoid bogus
5456 "at end of line" context messages from yyerror().
5457 */
5458 PL_bufptr = s + len;
5459 yyerror("Unterminated attribute parameter in attribute list");
5460 if (attrs)
5461 op_free(attrs);
5cc237b8 5462 sv_free(sv);
bbf60fe6 5463 return REPORT(0); /* EOF indicator */
09bef843
SB
5464 }
5465 }
5466 if (PL_lex_stuff) {
09bef843 5467 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5468 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5469 newSVOP(OP_CONST, 0, sv));
5470 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5471 PL_lex_stuff = NULL;
09bef843
SB
5472 }
5473 else {
5cc237b8
BS
5474 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5475 sv_free(sv);
1108974d 5476 if (PL_in_my == KEY_our) {
df9a6019 5477 deprecate(":unique");
1108974d 5478 }
bfed75c6 5479 else
371fce9b
DM
5480 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5481 }
5482
d3cea301
SB
5483 /* NOTE: any CV attrs applied here need to be part of
5484 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5485 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5486 sv_free(sv);
78f9721b 5487 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5488 }
5489 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5490 sv_free(sv);
8e5dadda 5491 deprecate(":locked");
5cc237b8
BS
5492 }
5493 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5494 sv_free(sv);
78f9721b 5495 CvMETHOD_on(PL_compcv);
5cc237b8 5496 }
78f9721b
SM
5497 /* After we've set the flags, it could be argued that
5498 we don't need to do the attributes.pm-based setting
5499 process, and shouldn't bother appending recognized
d3cea301
SB
5500 flags. To experiment with that, uncomment the
5501 following "else". (Note that's already been
5502 uncommented. That keeps the above-applied built-in
5503 attributes from being intercepted (and possibly
5504 rejected) by a package's attribute routines, but is
5505 justified by the performance win for the common case
5506 of applying only built-in attributes.) */
0256094b 5507 else
2fcb4757 5508 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5509 newSVOP(OP_CONST, 0,
5cc237b8 5510 sv));
09bef843 5511 }
29595ff2 5512 s = PEEKSPACE(d);
0120eecf 5513 if (*s == ':' && s[1] != ':')
29595ff2 5514 s = PEEKSPACE(s+1);
0120eecf
GS
5515 else if (s == d)
5516 break; /* require real whitespace or :'s */
29595ff2 5517 /* XXX losing whitespace on sequential attributes here */
09bef843 5518 }
90771dc0
NC
5519 {
5520 const char tmp
5521 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5522 if (*s != ';' && *s != '}' && *s != tmp
5523 && (tmp != '=' || *s != ')')) {
5524 const char q = ((*s == '\'') ? '"' : '\'');
5525 /* If here for an expression, and parsed no attrs, back
5526 off. */
5527 if (tmp == '=' && !attrs) {
5528 s = PL_bufptr;
5529 break;
5530 }
5531 /* MUST advance bufptr here to avoid bogus "at end of line"
5532 context messages from yyerror().
5533 */
5534 PL_bufptr = s;
10edeb5d
JH
5535 yyerror( (const char *)
5536 (*s
5537 ? Perl_form(aTHX_ "Invalid separator character "
5538 "%c%c%c in attribute list", q, *s, q)
5539 : "Unterminated attribute list" ) );
90771dc0
NC
5540 if (attrs)
5541 op_free(attrs);
5542 OPERATOR(':');
09bef843 5543 }
09bef843 5544 }
f9829d6b 5545 got_attrs:
09bef843 5546 if (attrs) {
cd81e915 5547 start_force(PL_curforce);
9ded7720 5548 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5549 CURMAD('_', PL_nextwhite);
89122651 5550 force_next(THING);
5db06880
NC
5551 }
5552#ifdef PERL_MAD
5553 if (PL_madskills) {
cd81e915 5554 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5555 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5556 }
5db06880 5557#endif
09bef843
SB
5558 TOKEN(COLONATTR);
5559 }
78cdf107
Z
5560 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5561 s--;
5562 TOKEN(0);
5563 }
5564 PL_lex_allbrackets--;
a0d0e21e 5565 OPERATOR(':');
8990e307
LW
5566 case '(':
5567 s++;
3280af22
NIS
5568 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5569 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5570 else
3280af22 5571 PL_expect = XTERM;
29595ff2 5572 s = SKIPSPACE1(s);
78cdf107 5573 PL_lex_allbrackets++;
a0d0e21e 5574 TOKEN('(');
378cc40b 5575 case ';':
78cdf107
Z
5576 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5577 TOKEN(0);
f4dd75d9 5578 CLINE;
78cdf107
Z
5579 s++;
5580 OPERATOR(';');
378cc40b 5581 case ')':
78cdf107
Z
5582 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5583 TOKEN(0);
5584 s++;
5585 PL_lex_allbrackets--;
5586 s = SKIPSPACE1(s);
5587 if (*s == '{')
5588 PREBLOCK(')');
5589 TERM(')');
79072805 5590 case ']':
a7aaec61
Z
5591 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5592 TOKEN(0);
79072805 5593 s++;
3280af22 5594 if (PL_lex_brackets <= 0)
d98d5fff 5595 yyerror("Unmatched right square bracket");
463ee0b2 5596 else
3280af22 5597 --PL_lex_brackets;
78cdf107 5598 PL_lex_allbrackets--;
3280af22
NIS
5599 if (PL_lex_state == LEX_INTERPNORMAL) {
5600 if (PL_lex_brackets == 0) {
02255c60
FC
5601 if (*s == '-' && s[1] == '>')
5602 PL_lex_state = LEX_INTERPENDMAYBE;
5603 else if (*s != '[' && *s != '{')
3280af22 5604 PL_lex_state = LEX_INTERPEND;
79072805
LW
5605 }
5606 }
4633a7c4 5607 TERM(']');
79072805
LW
5608 case '{':
5609 leftbracket:
79072805 5610 s++;
3280af22 5611 if (PL_lex_brackets > 100) {
8edd5f42 5612 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5613 }
3280af22 5614 switch (PL_expect) {
a0d0e21e 5615 case XTERM:
3280af22 5616 if (PL_lex_formbrack) {
a0d0e21e
LW
5617 s--;
5618 PRETERMBLOCK(DO);
5619 }
3280af22
NIS
5620 if (PL_oldoldbufptr == PL_last_lop)
5621 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5622 else
3280af22 5623 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5624 PL_lex_allbrackets++;
79072805 5625 OPERATOR(HASHBRACK);
a0d0e21e 5626 case XOPERATOR:
bf4acbe4 5627 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5628 s++;
44a8e56a 5629 d = s;
3280af22
NIS
5630 PL_tokenbuf[0] = '\0';
5631 if (d < PL_bufend && *d == '-') {
5632 PL_tokenbuf[0] = '-';
44a8e56a 5633 d++;
bf4acbe4 5634 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5635 d++;
5636 }
7e2040f0 5637 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5638 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5639 FALSE, &len);
bf4acbe4 5640 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5641 d++;
5642 if (*d == '}') {
f54cb97a 5643 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5644 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5645 if (minus)
5646 force_next('-');
748a9306
LW
5647 }
5648 }
5649 /* FALL THROUGH */
09bef843 5650 case XATTRBLOCK:
748a9306 5651 case XBLOCK:
3280af22 5652 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5653 PL_lex_allbrackets++;
3280af22 5654 PL_expect = XSTATE;
a0d0e21e 5655 break;
09bef843 5656 case XATTRTERM:
a0d0e21e 5657 case XTERMBLOCK:
3280af22 5658 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5659 PL_lex_allbrackets++;
3280af22 5660 PL_expect = XSTATE;
a0d0e21e
LW
5661 break;
5662 default: {
f54cb97a 5663 const char *t;
3280af22
NIS
5664 if (PL_oldoldbufptr == PL_last_lop)
5665 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5666 else
3280af22 5667 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5668 PL_lex_allbrackets++;
29595ff2 5669 s = SKIPSPACE1(s);
8452ff4b
SB
5670 if (*s == '}') {
5671 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5672 PL_expect = XTERM;
5673 /* This hack is to get the ${} in the message. */
5674 PL_bufptr = s+1;
5675 yyerror("syntax error");
5676 break;
5677 }
a0d0e21e 5678 OPERATOR(HASHBRACK);
8452ff4b 5679 }
b8a4b1be
GS
5680 /* This hack serves to disambiguate a pair of curlies
5681 * as being a block or an anon hash. Normally, expectation
5682 * determines that, but in cases where we're not in a
5683 * position to expect anything in particular (like inside
5684 * eval"") we have to resolve the ambiguity. This code
5685 * covers the case where the first term in the curlies is a
5686 * quoted string. Most other cases need to be explicitly
a0288114 5687 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5688 * curly in order to force resolution as an anon hash.
5689 *
5690 * XXX should probably propagate the outer expectation
5691 * into eval"" to rely less on this hack, but that could
5692 * potentially break current behavior of eval"".
5693 * GSAR 97-07-21
5694 */
5695 t = s;
5696 if (*s == '\'' || *s == '"' || *s == '`') {
5697 /* common case: get past first string, handling escapes */
3280af22 5698 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5699 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5700 t++;
5701 t++;
a0d0e21e 5702 }
b8a4b1be 5703 else if (*s == 'q') {
3280af22 5704 if (++t < PL_bufend
b8a4b1be 5705 && (!isALNUM(*t)
3280af22 5706 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5707 && !isALNUM(*t))))
5708 {
abc667d1 5709 /* skip q//-like construct */
f54cb97a 5710 const char *tmps;
b8a4b1be
GS
5711 char open, close, term;
5712 I32 brackets = 1;
5713
3280af22 5714 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5715 t++;
abc667d1
DM
5716 /* check for q => */
5717 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5718 OPERATOR(HASHBRACK);
5719 }
b8a4b1be
GS
5720 term = *t;
5721 open = term;
5722 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5723 term = tmps[5];
5724 close = term;
5725 if (open == close)
3280af22
NIS
5726 for (t++; t < PL_bufend; t++) {
5727 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5728 t++;
6d07e5e9 5729 else if (*t == open)
b8a4b1be
GS
5730 break;
5731 }
abc667d1 5732 else {
3280af22
NIS
5733 for (t++; t < PL_bufend; t++) {
5734 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5735 t++;
6d07e5e9 5736 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5737 break;
5738 else if (*t == open)
5739 brackets++;
5740 }
abc667d1
DM
5741 }
5742 t++;
b8a4b1be 5743 }
abc667d1
DM
5744 else
5745 /* skip plain q word */
5746 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5747 t += UTF8SKIP(t);
a0d0e21e 5748 }
7e2040f0 5749 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5750 t += UTF8SKIP(t);
7e2040f0 5751 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5752 t += UTF8SKIP(t);
a0d0e21e 5753 }
3280af22 5754 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5755 t++;
b8a4b1be
GS
5756 /* if comma follows first term, call it an anon hash */
5757 /* XXX it could be a comma expression with loop modifiers */
3280af22 5758 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5759 || (*t == '=' && t[1] == '>')))
a0d0e21e 5760 OPERATOR(HASHBRACK);
3280af22 5761 if (PL_expect == XREF)
4e4e412b 5762 PL_expect = XTERM;
a0d0e21e 5763 else {
3280af22
NIS
5764 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5765 PL_expect = XSTATE;
a0d0e21e 5766 }
8990e307 5767 }
a0d0e21e 5768 break;
463ee0b2 5769 }
6154021b 5770 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5771 if (isSPACE(*s) || *s == '#')
3280af22 5772 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5773 TOKEN('{');
378cc40b 5774 case '}':
a7aaec61
Z
5775 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5776 TOKEN(0);
79072805
LW
5777 rightbracket:
5778 s++;
3280af22 5779 if (PL_lex_brackets <= 0)
d98d5fff 5780 yyerror("Unmatched right curly bracket");
463ee0b2 5781 else
3280af22 5782 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5783 PL_lex_allbrackets--;
c2e66d9e 5784 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5785 PL_lex_formbrack = 0;
5786 if (PL_lex_state == LEX_INTERPNORMAL) {
5787 if (PL_lex_brackets == 0) {
9059aa12
LW
5788 if (PL_expect & XFAKEBRACK) {
5789 PL_expect &= XENUMMASK;
3280af22
NIS
5790 PL_lex_state = LEX_INTERPEND;
5791 PL_bufptr = s;
5db06880
NC
5792#if 0
5793 if (PL_madskills) {
cd81e915 5794 if (!PL_thiswhite)
6b29d1f5 5795 PL_thiswhite = newSVpvs("");
76f68e9b 5796 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5797 }
5798#endif
cea2e8a9 5799 return yylex(); /* ignore fake brackets */
79072805 5800 }
fa83b5b6 5801 if (*s == '-' && s[1] == '>')
3280af22 5802 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5803 else if (*s != '[' && *s != '{')
3280af22 5804 PL_lex_state = LEX_INTERPEND;
79072805
LW
5805 }
5806 }
9059aa12
LW
5807 if (PL_expect & XFAKEBRACK) {
5808 PL_expect &= XENUMMASK;
3280af22 5809 PL_bufptr = s;
cea2e8a9 5810 return yylex(); /* ignore fake brackets */
748a9306 5811 }
cd81e915 5812 start_force(PL_curforce);
5db06880
NC
5813 if (PL_madskills) {
5814 curmad('X', newSVpvn(s-1,1));
cd81e915 5815 CURMAD('_', PL_thiswhite);
5db06880 5816 }
79072805 5817 force_next('}');
5db06880 5818#ifdef PERL_MAD
cd81e915 5819 if (!PL_thistoken)
6b29d1f5 5820 PL_thistoken = newSVpvs("");
5db06880 5821#endif
79072805 5822 TOKEN(';');
378cc40b
LW
5823 case '&':
5824 s++;
78cdf107
Z
5825 if (*s++ == '&') {
5826 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5827 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5828 s -= 2;
5829 TOKEN(0);
5830 }
a0d0e21e 5831 AOPERATOR(ANDAND);
78cdf107 5832 }
378cc40b 5833 s--;
3280af22 5834 if (PL_expect == XOPERATOR) {
041457d9
DM
5835 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5836 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5837 {
57843af0 5838 CopLINE_dec(PL_curcop);
f1f66076 5839 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5840 CopLINE_inc(PL_curcop);
463ee0b2 5841 }
78cdf107
Z
5842 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5843 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5844 s--;
5845 TOKEN(0);
5846 }
79072805 5847 BAop(OP_BIT_AND);
463ee0b2 5848 }
79072805 5849
3280af22
NIS
5850 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5851 if (*PL_tokenbuf) {
5852 PL_expect = XOPERATOR;
5853 force_ident(PL_tokenbuf, '&');
463ee0b2 5854 }
79072805
LW
5855 else
5856 PREREF('&');
6154021b 5857 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5858 TERM('&');
5859
378cc40b
LW
5860 case '|':
5861 s++;
78cdf107
Z
5862 if (*s++ == '|') {
5863 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5864 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5865 s -= 2;
5866 TOKEN(0);
5867 }
a0d0e21e 5868 AOPERATOR(OROR);
78cdf107 5869 }
378cc40b 5870 s--;
78cdf107
Z
5871 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5872 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5873 s--;
5874 TOKEN(0);
5875 }
79072805 5876 BOop(OP_BIT_OR);
378cc40b
LW
5877 case '=':
5878 s++;
748a9306 5879 {
90771dc0 5880 const char tmp = *s++;
78cdf107
Z
5881 if (tmp == '=') {
5882 if (!PL_lex_allbrackets &&
5883 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5884 s -= 2;
5885 TOKEN(0);
5886 }
90771dc0 5887 Eop(OP_EQ);
78cdf107
Z
5888 }
5889 if (tmp == '>') {
5890 if (!PL_lex_allbrackets &&
5891 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5892 s -= 2;
5893 TOKEN(0);
5894 }
90771dc0 5895 OPERATOR(',');
78cdf107 5896 }
90771dc0
NC
5897 if (tmp == '~')
5898 PMop(OP_MATCH);
5899 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5900 && strchr("+-*/%.^&|<",tmp))
5901 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5902 "Reversed %c= operator",(int)tmp);
5903 s--;
5904 if (PL_expect == XSTATE && isALPHA(tmp) &&
5905 (s == PL_linestart+1 || s[-2] == '\n') )
5906 {
60d63348 5907 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
90771dc0
NC
5908 d = PL_bufend;
5909 while (s < d) {
5910 if (*s++ == '\n') {
5911 incline(s);
5912 if (strnEQ(s,"=cut",4)) {
5913 s = strchr(s,'\n');
5914 if (s)
5915 s++;
5916 else
5917 s = d;
5918 incline(s);
5919 goto retry;
5920 }
5921 }
a5f75d66 5922 }
90771dc0 5923 goto retry;
a5f75d66 5924 }
5db06880
NC
5925#ifdef PERL_MAD
5926 if (PL_madskills) {
cd81e915 5927 if (!PL_thiswhite)
6b29d1f5 5928 PL_thiswhite = newSVpvs("");
cd81e915 5929 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5930 PL_bufend - PL_linestart);
5931 }
5932#endif
90771dc0 5933 s = PL_bufend;
737c24fc 5934 PL_parser->in_pod = 1;
90771dc0 5935 goto retry;
a5f75d66 5936 }
a0d0e21e 5937 }
3280af22 5938 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5939 const char *t = s;
51882d45 5940#ifdef PERL_STRICT_CR
c35e046a 5941 while (SPACE_OR_TAB(*t))
51882d45 5942#else
c35e046a 5943 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5944#endif
c35e046a 5945 t++;
a0d0e21e
LW
5946 if (*t == '\n' || *t == '#') {
5947 s--;
3280af22 5948 PL_expect = XBLOCK;
a0d0e21e
LW
5949 goto leftbracket;
5950 }
79072805 5951 }
78cdf107
Z
5952 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5953 s--;
5954 TOKEN(0);
5955 }
6154021b 5956 pl_yylval.ival = 0;
a0d0e21e 5957 OPERATOR(ASSIGNOP);
378cc40b
LW
5958 case '!':
5959 s++;
90771dc0
NC
5960 {
5961 const char tmp = *s++;
5962 if (tmp == '=') {
5963 /* was this !=~ where !~ was meant?
5964 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5965
5966 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5967 const char *t = s+1;
5968
5969 while (t < PL_bufend && isSPACE(*t))
5970 ++t;
5971
5972 if (*t == '/' || *t == '?' ||
5973 ((*t == 'm' || *t == 's' || *t == 'y')
5974 && !isALNUM(t[1])) ||
5975 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5976 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5977 "!=~ should be !~");
5978 }
78cdf107
Z
5979 if (!PL_lex_allbrackets &&
5980 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5981 s -= 2;
5982 TOKEN(0);
5983 }
90771dc0
NC
5984 Eop(OP_NE);
5985 }
5986 if (tmp == '~')
5987 PMop(OP_NOT);
5988 }
378cc40b
LW
5989 s--;
5990 OPERATOR('!');
5991 case '<':
3280af22 5992 if (PL_expect != XOPERATOR) {
93a17b20 5993 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5994 check_uni();
79072805
LW
5995 if (s[1] == '<')
5996 s = scan_heredoc(s);
5997 else
5998 s = scan_inputsymbol(s);
5999 TERM(sublex_start());
378cc40b
LW
6000 }
6001 s++;
90771dc0
NC
6002 {
6003 char tmp = *s++;
78cdf107
Z
6004 if (tmp == '<') {
6005 if (*s == '=' && !PL_lex_allbrackets &&
6006 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6007 s -= 2;
6008 TOKEN(0);
6009 }
90771dc0 6010 SHop(OP_LEFT_SHIFT);
78cdf107 6011 }
90771dc0
NC
6012 if (tmp == '=') {
6013 tmp = *s++;
78cdf107
Z
6014 if (tmp == '>') {
6015 if (!PL_lex_allbrackets &&
6016 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6017 s -= 3;
6018 TOKEN(0);
6019 }
90771dc0 6020 Eop(OP_NCMP);
78cdf107 6021 }
90771dc0 6022 s--;
78cdf107
Z
6023 if (!PL_lex_allbrackets &&
6024 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6025 s -= 2;
6026 TOKEN(0);
6027 }
90771dc0
NC
6028 Rop(OP_LE);
6029 }
395c3793 6030 }
378cc40b 6031 s--;
78cdf107
Z
6032 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6033 s--;
6034 TOKEN(0);
6035 }
79072805 6036 Rop(OP_LT);
378cc40b
LW
6037 case '>':
6038 s++;
90771dc0
NC
6039 {
6040 const char tmp = *s++;
78cdf107
Z
6041 if (tmp == '>') {
6042 if (*s == '=' && !PL_lex_allbrackets &&
6043 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6044 s -= 2;
6045 TOKEN(0);
6046 }
90771dc0 6047 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
6048 }
6049 else if (tmp == '=') {
6050 if (!PL_lex_allbrackets &&
6051 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6052 s -= 2;
6053 TOKEN(0);
6054 }
90771dc0 6055 Rop(OP_GE);
78cdf107 6056 }
90771dc0 6057 }
378cc40b 6058 s--;
78cdf107
Z
6059 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
6060 s--;
6061 TOKEN(0);
6062 }
79072805 6063 Rop(OP_GT);
378cc40b
LW
6064
6065 case '$':
bbce6d69 6066 CLINE;
6067
3280af22
NIS
6068 if (PL_expect == XOPERATOR) {
6069 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6070 return deprecate_commaless_var_list();
a0d0e21e 6071 }
8990e307 6072 }
a0d0e21e 6073
c0b977fd 6074 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6075 PL_tokenbuf[0] = '@';
376b8730
SM
6076 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6077 sizeof PL_tokenbuf - 1, FALSE);
6078 if (PL_expect == XOPERATOR)
6079 no_op("Array length", s);
3280af22 6080 if (!PL_tokenbuf[1])
a0d0e21e 6081 PREREF(DOLSHARP);
3280af22
NIS
6082 PL_expect = XOPERATOR;
6083 PL_pending_ident = '#';
463ee0b2 6084 TOKEN(DOLSHARP);
79072805 6085 }
bbce6d69 6086
3280af22 6087 PL_tokenbuf[0] = '$';
376b8730
SM
6088 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6089 sizeof PL_tokenbuf - 1, FALSE);
6090 if (PL_expect == XOPERATOR)
6091 no_op("Scalar", s);
3280af22
NIS
6092 if (!PL_tokenbuf[1]) {
6093 if (s == PL_bufend)
bbce6d69 6094 yyerror("Final $ should be \\$ or $name");
6095 PREREF('$');
8990e307 6096 }
a0d0e21e 6097
ff68c719 6098 d = s;
90771dc0
NC
6099 {
6100 const char tmp = *s;
ae28bb2a 6101 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6102 s = SKIPSPACE1(s);
ff68c719 6103
90771dc0
NC
6104 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6105 && intuit_more(s)) {
6106 if (*s == '[') {
6107 PL_tokenbuf[0] = '@';
6108 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6109 char *t = s+1;
6110
6111 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6112 t++;
90771dc0 6113 if (*t++ == ',') {
29595ff2 6114 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6115 while (t < PL_bufend && *t != ']')
6116 t++;
9014280d 6117 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6118 "Multidimensional syntax %.*s not supported",
36c7798d 6119 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6120 }
748a9306 6121 }
93a17b20 6122 }
90771dc0
NC
6123 else if (*s == '{') {
6124 char *t;
6125 PL_tokenbuf[0] = '%';
6126 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6127 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6128 {
6129 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6130 do {
6131 t++;
6132 } while (isSPACE(*t));
90771dc0 6133 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6134 STRLEN len;
90771dc0 6135 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6136 &len);
c35e046a
AL
6137 while (isSPACE(*t))
6138 t++;
780a5241 6139 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6140 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6141 "You need to quote \"%s\"",
6142 tmpbuf);
6143 }
6144 }
6145 }
93a17b20 6146 }
bbce6d69 6147
90771dc0
NC
6148 PL_expect = XOPERATOR;
6149 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6150 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6151 if (!islop || PL_last_lop_op == OP_GREPSTART)
6152 PL_expect = XOPERATOR;
6153 else if (strchr("$@\"'`q", *s))
6154 PL_expect = XTERM; /* e.g. print $fh "foo" */
6155 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6156 PL_expect = XTERM; /* e.g. print $fh &sub */
6157 else if (isIDFIRST_lazy_if(s,UTF)) {
6158 char tmpbuf[sizeof PL_tokenbuf];
6159 int t2;
6160 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6161 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6162 /* binary operators exclude handle interpretations */
6163 switch (t2) {
6164 case -KEY_x:
6165 case -KEY_eq:
6166 case -KEY_ne:
6167 case -KEY_gt:
6168 case -KEY_lt:
6169 case -KEY_ge:
6170 case -KEY_le:
6171 case -KEY_cmp:
6172 break;
6173 default:
6174 PL_expect = XTERM; /* e.g. print $fh length() */
6175 break;
6176 }
6177 }
6178 else {
6179 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6180 }
6181 }
90771dc0
NC
6182 else if (isDIGIT(*s))
6183 PL_expect = XTERM; /* e.g. print $fh 3 */
6184 else if (*s == '.' && isDIGIT(s[1]))
6185 PL_expect = XTERM; /* e.g. print $fh .3 */
6186 else if ((*s == '?' || *s == '-' || *s == '+')
6187 && !isSPACE(s[1]) && s[1] != '=')
6188 PL_expect = XTERM; /* e.g. print $fh -1 */
6189 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6190 && s[1] != '/')
6191 PL_expect = XTERM; /* e.g. print $fh /.../
6192 XXX except DORDOR operator
6193 */
6194 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6195 && s[2] != '=')
6196 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6197 }
bbce6d69 6198 }
3280af22 6199 PL_pending_ident = '$';
79072805 6200 TOKEN('$');
378cc40b
LW
6201
6202 case '@':
3280af22 6203 if (PL_expect == XOPERATOR)
bbce6d69 6204 no_op("Array", s);
3280af22
NIS
6205 PL_tokenbuf[0] = '@';
6206 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6207 if (!PL_tokenbuf[1]) {
bbce6d69 6208 PREREF('@');
6209 }
3280af22 6210 if (PL_lex_state == LEX_NORMAL)
29595ff2 6211 s = SKIPSPACE1(s);
3280af22 6212 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6213 if (*s == '{')
3280af22 6214 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6215
6216 /* Warn about @ where they meant $. */
041457d9
DM
6217 if (*s == '[' || *s == '{') {
6218 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6219 const char *t = s + 1;
7e2040f0 6220 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6221 t++;
6222 if (*t == '}' || *t == ']') {
6223 t++;
29595ff2 6224 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 6225 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6226 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6227 (int)(t-PL_bufptr), PL_bufptr,
6228 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6229 }
93a17b20
LW
6230 }
6231 }
463ee0b2 6232 }
3280af22 6233 PL_pending_ident = '@';
79072805 6234 TERM('@');
378cc40b 6235
c963b151 6236 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6237 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6238 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6239 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6240 TOKEN(0);
6f33ba73
RGS
6241 s += 2;
6242 AOPERATOR(DORDOR);
6243 }
c963b151 6244 case '?': /* may either be conditional or pattern */
be25f609 6245 if (PL_expect == XOPERATOR) {
90771dc0 6246 char tmp = *s++;
c963b151 6247 if(tmp == '?') {
78cdf107
Z
6248 if (!PL_lex_allbrackets &&
6249 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6250 s--;
6251 TOKEN(0);
6252 }
6253 PL_lex_allbrackets++;
be25f609 6254 OPERATOR('?');
c963b151
BD
6255 }
6256 else {
6257 tmp = *s++;
6258 if(tmp == '/') {
6259 /* A // operator. */
78cdf107
Z
6260 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6261 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6262 LEX_FAKEEOF_LOGIC)) {
6263 s -= 2;
6264 TOKEN(0);
6265 }
c963b151
BD
6266 AOPERATOR(DORDOR);
6267 }
6268 else {
6269 s--;
78cdf107
Z
6270 if (*s == '=' && !PL_lex_allbrackets &&
6271 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6272 s--;
6273 TOKEN(0);
6274 }
c963b151
BD
6275 Mop(OP_DIVIDE);
6276 }
6277 }
6278 }
6279 else {
6280 /* Disable warning on "study /blah/" */
6281 if (PL_oldoldbufptr == PL_last_uni
6282 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6283 || memNE(PL_last_uni, "study", 5)
6284 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6285 ))
6286 check_uni();
725a61d7
Z
6287 if (*s == '?')
6288 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6289 s = scan_pat(s,OP_MATCH);
6290 TERM(sublex_start());
6291 }
378cc40b
LW
6292
6293 case '.':
51882d45
GS
6294 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6295#ifdef PERL_STRICT_CR
6296 && s[1] == '\n'
6297#else
6298 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6299#endif
6300 && (s == PL_linestart || s[-1] == '\n') )
6301 {
3280af22
NIS
6302 PL_lex_formbrack = 0;
6303 PL_expect = XSTATE;
79072805
LW
6304 goto rightbracket;
6305 }
be25f609 6306 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6307 s += 3;
6308 OPERATOR(YADAYADA);
6309 }
3280af22 6310 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6311 char tmp = *s++;
a687059c 6312 if (*s == tmp) {
78cdf107
Z
6313 if (!PL_lex_allbrackets &&
6314 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6315 s--;
6316 TOKEN(0);
6317 }
a687059c 6318 s++;
2f3197b3
LW
6319 if (*s == tmp) {
6320 s++;
6154021b 6321 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6322 }
6323 else
6154021b 6324 pl_yylval.ival = 0;
378cc40b 6325 OPERATOR(DOTDOT);
a687059c 6326 }
78cdf107
Z
6327 if (*s == '=' && !PL_lex_allbrackets &&
6328 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6329 s--;
6330 TOKEN(0);
6331 }
79072805 6332 Aop(OP_CONCAT);
378cc40b
LW
6333 }
6334 /* FALL THROUGH */
6335 case '0': case '1': case '2': case '3': case '4':
6336 case '5': case '6': case '7': case '8': case '9':
6154021b 6337 s = scan_num(s, &pl_yylval);
931e0695 6338 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6339 if (PL_expect == XOPERATOR)
8990e307 6340 no_op("Number",s);
79072805
LW
6341 TERM(THING);
6342
6343 case '\'':
5db06880 6344 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6345 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6346 if (PL_expect == XOPERATOR) {
6347 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6348 return deprecate_commaless_var_list();
a0d0e21e 6349 }
463ee0b2 6350 else
8990e307 6351 no_op("String",s);
463ee0b2 6352 }
79072805 6353 if (!s)
d4c19fe8 6354 missingterm(NULL);
6154021b 6355 pl_yylval.ival = OP_CONST;
79072805
LW
6356 TERM(sublex_start());
6357
6358 case '"':
5db06880 6359 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6360 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6361 if (PL_expect == XOPERATOR) {
6362 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6363 return deprecate_commaless_var_list();
a0d0e21e 6364 }
463ee0b2 6365 else
8990e307 6366 no_op("String",s);
463ee0b2 6367 }
79072805 6368 if (!s)
d4c19fe8 6369 missingterm(NULL);
6154021b 6370 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6371 /* FIXME. I think that this can be const if char *d is replaced by
6372 more localised variables. */
3280af22 6373 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6374 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6375 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6376 break;
6377 }
6378 }
79072805
LW
6379 TERM(sublex_start());
6380
6381 case '`':
5db06880 6382 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6383 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6384 if (PL_expect == XOPERATOR)
8990e307 6385 no_op("Backticks",s);
79072805 6386 if (!s)
d4c19fe8 6387 missingterm(NULL);
9b201d7d 6388 readpipe_override();
79072805
LW
6389 TERM(sublex_start());
6390
6391 case '\\':
6392 s++;
a2a5de95
NC
6393 if (PL_lex_inwhat && isDIGIT(*s))
6394 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6395 *s, *s);
3280af22 6396 if (PL_expect == XOPERATOR)
8990e307 6397 no_op("Backslash",s);
79072805
LW
6398 OPERATOR(REFGEN);
6399
a7cb1f99 6400 case 'v':
e526c9e6 6401 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6402 char *start = s + 2;
dd629d5b 6403 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6404 start++;
6405 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6406 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6407 TERM(THING);
6408 }
e526c9e6 6409 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6410 else if (!isALPHA(*start) && (PL_expect == XTERM
6411 || PL_expect == XREF || PL_expect == XSTATE
6412 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6413 GV *const gv = gv_fetchpvn_flags(s, start - s,
6414 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6415 if (!gv) {
6154021b 6416 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6417 TERM(THING);
6418 }
6419 }
a7cb1f99
GS
6420 }
6421 goto keylookup;
79072805 6422 case 'x':
3280af22 6423 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6424 s++;
6425 Mop(OP_REPEAT);
2f3197b3 6426 }
79072805
LW
6427 goto keylookup;
6428
378cc40b 6429 case '_':
79072805
LW
6430 case 'a': case 'A':
6431 case 'b': case 'B':
6432 case 'c': case 'C':
6433 case 'd': case 'D':
6434 case 'e': case 'E':
6435 case 'f': case 'F':
6436 case 'g': case 'G':
6437 case 'h': case 'H':
6438 case 'i': case 'I':
6439 case 'j': case 'J':
6440 case 'k': case 'K':
6441 case 'l': case 'L':
6442 case 'm': case 'M':
6443 case 'n': case 'N':
6444 case 'o': case 'O':
6445 case 'p': case 'P':
6446 case 'q': case 'Q':
6447 case 'r': case 'R':
6448 case 's': case 'S':
6449 case 't': case 'T':
6450 case 'u': case 'U':
a7cb1f99 6451 case 'V':
79072805
LW
6452 case 'w': case 'W':
6453 case 'X':
6454 case 'y': case 'Y':
6455 case 'z': case 'Z':
6456
49dc05e3 6457 keylookup: {
88e1f1a2 6458 bool anydelim;
90771dc0 6459 I32 tmp;
10edeb5d
JH
6460
6461 orig_keyword = 0;
6462 gv = NULL;
6463 gvp = NULL;
49dc05e3 6464
3280af22
NIS
6465 PL_bufptr = s;
6466 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6467
6468 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6469 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6470
6471 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6472 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6473 goto just_a_word;
6474
3643fb5f 6475 d = s;
3280af22 6476 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6477 d++; /* no comments skipped here, or s### is misparsed */
6478
748a9306 6479 /* Is this a word before a => operator? */
1c3923b3 6480 if (*d == '=' && d[1] == '>') {
748a9306 6481 CLINE;
6154021b 6482 pl_yylval.opval
d0a148a6
NC
6483 = (OP*)newSVOP(OP_CONST, 0,
6484 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6485 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6486 TERM(WORD);
6487 }
6488
88e1f1a2
JV
6489 /* Check for plugged-in keyword */
6490 {
6491 OP *o;
6492 int result;
6493 char *saved_bufptr = PL_bufptr;
6494 PL_bufptr = s;
16c91539 6495 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6496 s = PL_bufptr;
6497 if (result == KEYWORD_PLUGIN_DECLINE) {
6498 /* not a plugged-in keyword */
6499 PL_bufptr = saved_bufptr;
6500 } else if (result == KEYWORD_PLUGIN_STMT) {
6501 pl_yylval.opval = o;
6502 CLINE;
6503 PL_expect = XSTATE;
6504 return REPORT(PLUGSTMT);
6505 } else if (result == KEYWORD_PLUGIN_EXPR) {
6506 pl_yylval.opval = o;
6507 CLINE;
6508 PL_expect = XOPERATOR;
6509 return REPORT(PLUGEXPR);
6510 } else {
6511 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6512 PL_tokenbuf);
6513 }
6514 }
6515
6516 /* Check for built-in keyword */
6517 tmp = keyword(PL_tokenbuf, len, 0);
6518
6519 /* Is this a label? */
6520 if (!anydelim && PL_expect == XSTATE
6521 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6522 s = d + 1;
6523 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6524 CLINE;
6525 TOKEN(LABEL);
6526 }
6527
a0d0e21e 6528 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6529 GV *ogv = NULL; /* override (winner) */
6530 GV *hgv = NULL; /* hidden (loser) */
3280af22 6531 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6532 CV *cv;
af9f5953
BF
6533 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6534 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6535 (cv = GvCVu(gv)))
6536 {
6537 if (GvIMPORTED_CV(gv))
6538 ogv = gv;
6539 else if (! CvMETHOD(cv))
6540 hgv = gv;
6541 }
6542 if (!ogv &&
af9f5953 6543 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
c60dbbc3 6544 UTF ? -(I32)len : (I32)len, FALSE)) &&
9e0d86f8 6545 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6546 GvCVu(gv) && GvIMPORTED_CV(gv))
6547 {
6548 ogv = gv;
6549 }
6550 }
6551 if (ogv) {
30fe34ed 6552 orig_keyword = tmp;
56f7f34b 6553 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6554 }
6555 else if (gv && !gvp
6556 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6557 && GvCVu(gv))
6e7b2336
GS
6558 {
6559 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6560 }
56f7f34b
CS
6561 else { /* no override */
6562 tmp = -tmp;
a2a5de95
NC
6563 if (tmp == KEY_dump) {
6564 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6565 "dump() better written as CORE::dump()");
ac206dc8 6566 }
a0714e2c 6567 gv = NULL;
56f7f34b 6568 gvp = 0;
a2a5de95
NC
6569 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6570 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6571 "Ambiguous call resolved as CORE::%s(), "
6572 "qualify as such or use &",
6573 GvENAME(hgv));
49dc05e3 6574 }
a0d0e21e
LW
6575 }
6576
6577 reserved_word:
6578 switch (tmp) {
79072805
LW
6579
6580 default: /* not a keyword */
0bfa2a8a
NC
6581 /* Trade off - by using this evil construction we can pull the
6582 variable gv into the block labelled keylookup. If not, then
6583 we have to give it function scope so that the goto from the
6584 earlier ':' case doesn't bypass the initialisation. */
6585 if (0) {
6586 just_a_word_zero_gv:
6587 gv = NULL;
6588 gvp = NULL;
8bee0991 6589 orig_keyword = 0;
0bfa2a8a 6590 }
93a17b20 6591 just_a_word: {
96e4d5b1 6592 SV *sv;
ce29ac45 6593 int pkgname = 0;
f54cb97a 6594 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6595 OP *rv2cv_op;
5069cc75 6596 CV *cv;
5db06880 6597#ifdef PERL_MAD
cd81e915 6598 SV *nextPL_nextwhite = 0;
5db06880
NC
6599#endif
6600
8990e307
LW
6601
6602 /* Get the rest if it looks like a package qualifier */
6603
155aba94 6604 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6605 STRLEN morelen;
3280af22 6606 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6607 TRUE, &morelen);
6608 if (!morelen)
cea2e8a9 6609 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6610 *s == '\'' ? "'" : "::");
c3e0f903 6611 len += morelen;
ce29ac45 6612 pkgname = 1;
a0d0e21e 6613 }
8990e307 6614
3280af22
NIS
6615 if (PL_expect == XOPERATOR) {
6616 if (PL_bufptr == PL_linestart) {
57843af0 6617 CopLINE_dec(PL_curcop);
f1f66076 6618 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6619 CopLINE_inc(PL_curcop);
463ee0b2
LW
6620 }
6621 else
54310121 6622 no_op("Bareword",s);
463ee0b2 6623 }
8990e307 6624
c3e0f903 6625 /* Look for a subroutine with this name in current package,
486ec47a 6626 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6627 (and a package name). */
6628
5db06880 6629 if (len > 2 && !PL_madskills &&
3280af22 6630 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6631 {
f776e3cd 6632 if (ckWARN(WARN_BAREWORD)
af9f5953 6633 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 6634 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6635 "Bareword \"%s\" refers to nonexistent package",
3280af22 6636 PL_tokenbuf);
c3e0f903 6637 len -= 2;
3280af22 6638 PL_tokenbuf[len] = '\0';
a0714e2c 6639 gv = NULL;
c3e0f903
GS
6640 gvp = 0;
6641 }
6642 else {
62d55b22
NC
6643 if (!gv) {
6644 /* Mustn't actually add anything to a symbol table.
6645 But also don't want to "initialise" any placeholder
6646 constants that might already be there into full
6647 blown PVGVs with attached PVCV. */
90e5519e 6648 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
6649 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6650 SVt_PVCV);
62d55b22 6651 }
b3d904f3 6652 len = 0;
c3e0f903
GS
6653 }
6654
6655 /* if we saw a global override before, get the right name */
8990e307 6656
37bb7629
EB
6657 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6658 len ? len : strlen(PL_tokenbuf));
49dc05e3 6659 if (gvp) {
37bb7629 6660 SV * const tmp_sv = sv;
396482e1 6661 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6662 sv_catsv(sv, tmp_sv);
6663 SvREFCNT_dec(tmp_sv);
8a7a129d 6664 }
37bb7629 6665
5db06880 6666#ifdef PERL_MAD
cd81e915
NC
6667 if (PL_madskills && !PL_thistoken) {
6668 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6669 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6670 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6671 }
6672#endif
8990e307 6673
a0d0e21e 6674 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6675 CLINE;
6154021b
RGS
6676 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6677 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6678
c3e0f903 6679 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6680 if (len)
6681 goto safe_bareword;
6682
f7461760 6683 {
d8ebba9f 6684 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6685 const_op->op_private = OPpCONST_BARE;
6686 rv2cv_op = newCVREF(0, const_op);
6687 }
d9088386 6688 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6689
8990e307
LW
6690 /* See if it's the indirect object for a list operator. */
6691
3280af22
NIS
6692 if (PL_oldoldbufptr &&
6693 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6694 (PL_oldoldbufptr == PL_last_lop
6695 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6696 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6697 (PL_expect == XREF ||
6698 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6699 {
748a9306
LW
6700 bool immediate_paren = *s == '(';
6701
a0d0e21e 6702 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6703 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6704#ifdef PERL_MAD
cd81e915 6705 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6706#endif
a0d0e21e
LW
6707
6708 /* Two barewords in a row may indicate method call. */
6709
62d55b22 6710 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6711 (tmp = intuit_method(s, gv, cv))) {
6712 op_free(rv2cv_op);
78cdf107
Z
6713 if (tmp == METHOD && !PL_lex_allbrackets &&
6714 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6715 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6716 return REPORT(tmp);
f7461760 6717 }
a0d0e21e
LW
6718
6719 /* If not a declared subroutine, it's an indirect object. */
6720 /* (But it's an indir obj regardless for sort.) */
7294df96 6721 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6722
7294df96
RGS
6723 if (
6724 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6725 (!cv &&
a9ef352a 6726 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6727 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6728 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6729 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6730 )
a9ef352a 6731 {
3280af22 6732 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6733 goto bareword;
93a17b20
LW
6734 }
6735 }
8990e307 6736
3280af22 6737 PL_expect = XOPERATOR;
5db06880
NC
6738#ifdef PERL_MAD
6739 if (isSPACE(*s))
cd81e915
NC
6740 s = SKIPSPACE2(s,nextPL_nextwhite);
6741 PL_nextwhite = nextPL_nextwhite;
5db06880 6742#else
8990e307 6743 s = skipspace(s);
5db06880 6744#endif
1c3923b3
GS
6745
6746 /* Is this a word before a => operator? */
ce29ac45 6747 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6748 op_free(rv2cv_op);
1c3923b3 6749 CLINE;
6154021b 6750 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6751 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6752 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6753 TERM(WORD);
6754 }
6755
6756 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6757 if (*s == '(') {
79072805 6758 CLINE;
5069cc75 6759 if (cv) {
c35e046a
AL
6760 d = s + 1;
6761 while (SPACE_OR_TAB(*d))
6762 d++;
f7461760 6763 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6764 s = d + 1;
c631f32b 6765 goto its_constant;
96e4d5b1 6766 }
6767 }
5db06880
NC
6768#ifdef PERL_MAD
6769 if (PL_madskills) {
cd81e915
NC
6770 PL_nextwhite = PL_thiswhite;
6771 PL_thiswhite = 0;
5db06880 6772 }
cd81e915 6773 start_force(PL_curforce);
5db06880 6774#endif
6154021b 6775 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6776 PL_expect = XOPERATOR;
5db06880
NC
6777#ifdef PERL_MAD
6778 if (PL_madskills) {
cd81e915
NC
6779 PL_nextwhite = nextPL_nextwhite;
6780 curmad('X', PL_thistoken);
6b29d1f5 6781 PL_thistoken = newSVpvs("");
5db06880
NC
6782 }
6783#endif
f7461760 6784 op_free(rv2cv_op);
93a17b20 6785 force_next(WORD);
6154021b 6786 pl_yylval.ival = 0;
463ee0b2 6787 TOKEN('&');
79072805 6788 }
93a17b20 6789
a0d0e21e 6790 /* If followed by var or block, call it a method (unless sub) */
8990e307 6791
f7461760
Z
6792 if ((*s == '$' || *s == '{') && !cv) {
6793 op_free(rv2cv_op);
3280af22
NIS
6794 PL_last_lop = PL_oldbufptr;
6795 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6796 if (!PL_lex_allbrackets &&
6797 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6798 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6799 PREBLOCK(METHOD);
463ee0b2
LW
6800 }
6801
8990e307
LW
6802 /* If followed by a bareword, see if it looks like indir obj. */
6803
30fe34ed
RGS
6804 if (!orig_keyword
6805 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6806 && (tmp = intuit_method(s, gv, cv))) {
6807 op_free(rv2cv_op);
78cdf107
Z
6808 if (tmp == METHOD && !PL_lex_allbrackets &&
6809 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6810 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6811 return REPORT(tmp);
f7461760 6812 }
93a17b20 6813
8990e307
LW
6814 /* Not a method, so call it a subroutine (if defined) */
6815
5069cc75 6816 if (cv) {
9b387841
NC
6817 if (lastchar == '-')
6818 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6819 "Ambiguous use of -%s resolved as -&%s()",
6820 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6821 /* Check for a constant sub */
f7461760 6822 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6823 its_constant:
f7461760 6824 op_free(rv2cv_op);
6154021b
RGS
6825 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6826 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6827 pl_yylval.opval->op_private = 0;
6b7c6d95 6828 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6829 TOKEN(WORD);
89bfa8cd 6830 }
6831
6154021b 6832 op_free(pl_yylval.opval);
f7461760 6833 pl_yylval.opval = rv2cv_op;
6154021b 6834 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6835 PL_last_lop = PL_oldbufptr;
bf848113 6836 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6837 /* Is there a prototype? */
5db06880
NC
6838 if (
6839#ifdef PERL_MAD
6840 cv &&
6841#endif
d9f2850e
RGS
6842 SvPOK(cv))
6843 {
8fa6a409
FC
6844 STRLEN protolen = CvPROTOLEN(cv);
6845 const char *proto = CvPROTO(cv);
5f66b61c 6846 if (!protolen)
4633a7c4 6847 TERM(FUNC0SUB);
0f5d0394
AE
6848 while (*proto == ';')
6849 proto++;
649d02de
FC
6850 if (
6851 (
6852 (
6853 *proto == '$' || *proto == '_'
c035a075 6854 || *proto == '*' || *proto == '+'
649d02de
FC
6855 )
6856 && proto[1] == '\0'
6857 )
6858 || (
6859 *proto == '\\' && proto[1] && proto[2] == '\0'
6860 )
6861 )
6862 OPERATOR(UNIOPSUB);
6863 if (*proto == '\\' && proto[1] == '[') {
6864 const char *p = proto + 2;
6865 while(*p && *p != ']')
6866 ++p;
6867 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6868 }
7a52d87a 6869 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6870 if (PL_curstash)
6871 sv_setpvs(PL_subname, "__ANON__");
6872 else
6873 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6874 if (!PL_lex_allbrackets &&
6875 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6876 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6877 PREBLOCK(LSTOPSUB);
6878 }
a9ef352a 6879 }
5db06880
NC
6880#ifdef PERL_MAD
6881 {
6882 if (PL_madskills) {
cd81e915
NC
6883 PL_nextwhite = PL_thiswhite;
6884 PL_thiswhite = 0;
5db06880 6885 }
cd81e915 6886 start_force(PL_curforce);
6154021b 6887 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6888 PL_expect = XTERM;
6889 if (PL_madskills) {
cd81e915
NC
6890 PL_nextwhite = nextPL_nextwhite;
6891 curmad('X', PL_thistoken);
6b29d1f5 6892 PL_thistoken = newSVpvs("");
5db06880
NC
6893 }
6894 force_next(WORD);
78cdf107
Z
6895 if (!PL_lex_allbrackets &&
6896 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6897 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6898 TOKEN(NOAMP);
6899 }
6900 }
6901
6902 /* Guess harder when madskills require "best effort". */
6903 if (PL_madskills && (!gv || !GvCVu(gv))) {
6904 int probable_sub = 0;
6905 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6906 probable_sub = 1;
6907 else if (isALPHA(*s)) {
6908 char tmpbuf[1024];
6909 STRLEN tmplen;
6910 d = s;
6911 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6912 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6913 probable_sub = 1;
6914 else {
6915 while (d < PL_bufend && isSPACE(*d))
6916 d++;
6917 if (*d == '=' && d[1] == '>')
6918 probable_sub = 1;
6919 }
6920 }
6921 if (probable_sub) {
af9f5953
BF
6922 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
6923 SVt_PVCV);
6154021b 6924 op_free(pl_yylval.opval);
f7461760 6925 pl_yylval.opval = rv2cv_op;
6154021b 6926 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6927 PL_last_lop = PL_oldbufptr;
6928 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6929 PL_nextwhite = PL_thiswhite;
6930 PL_thiswhite = 0;
6931 start_force(PL_curforce);
6154021b 6932 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6933 PL_expect = XTERM;
cd81e915
NC
6934 PL_nextwhite = nextPL_nextwhite;
6935 curmad('X', PL_thistoken);
6b29d1f5 6936 PL_thistoken = newSVpvs("");
5db06880 6937 force_next(WORD);
78cdf107
Z
6938 if (!PL_lex_allbrackets &&
6939 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6940 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6941 TOKEN(NOAMP);
6942 }
6943#else
6154021b 6944 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6945 PL_expect = XTERM;
8990e307 6946 force_next(WORD);
78cdf107
Z
6947 if (!PL_lex_allbrackets &&
6948 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6949 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6950 TOKEN(NOAMP);
5db06880 6951#endif
8990e307 6952 }
748a9306 6953
8990e307
LW
6954 /* Call it a bare word */
6955
5603f27d 6956 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6957 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6958 else {
9a073a1d
RGS
6959 bareword:
6960 /* after "print" and similar functions (corresponding to
6961 * "F? L" in opcode.pl), whatever wasn't already parsed as
6962 * a filehandle should be subject to "strict subs".
6963 * Likewise for the optional indirect-object argument to system
6964 * or exec, which can't be a bareword */
6965 if ((PL_last_lop_op == OP_PRINT
6966 || PL_last_lop_op == OP_PRTF
6967 || PL_last_lop_op == OP_SAY
6968 || PL_last_lop_op == OP_SYSTEM
6969 || PL_last_lop_op == OP_EXEC)
6970 && (PL_hints & HINT_STRICT_SUBS))
6971 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6972 if (lastchar != '-') {
6973 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6974 d = PL_tokenbuf;
6975 while (isLOWER(*d))
6976 d++;
af9f5953 6977 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 6978 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6979 PL_tokenbuf);
6980 }
748a9306
LW
6981 }
6982 }
f7461760 6983 op_free(rv2cv_op);
c3e0f903
GS
6984
6985 safe_bareword:
9b387841
NC
6986 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6987 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6988 "Operator or semicolon missing before %c%s",
6989 lastchar, PL_tokenbuf);
6990 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6991 "Ambiguous use of %c resolved as operator %c",
6992 lastchar, lastchar);
748a9306 6993 }
93a17b20 6994 TOKEN(WORD);
79072805 6995 }
79072805 6996
68dc0745 6997 case KEY___FILE__:
7eb971ee 6998 FUN0OP(
14f0f125 6999 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 7000 );
46fc3d4c 7001
79072805 7002 case KEY___LINE__:
7eb971ee
FC
7003 FUN0OP(
7004 (OP*)newSVOP(OP_CONST, 0,
7005 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
7006 );
68dc0745 7007
7008 case KEY___PACKAGE__:
7eb971ee
FC
7009 FUN0OP(
7010 (OP*)newSVOP(OP_CONST, 0,
3280af22 7011 (PL_curstash
5aaec2b4 7012 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
7013 : &PL_sv_undef))
7014 );
79072805 7015
e50aee73 7016 case KEY___DATA__:
79072805
LW
7017 case KEY___END__: {
7018 GV *gv;
3280af22 7019 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 7020 const char *pname = "main";
affc13fc
FC
7021 STRLEN plen = 4;
7022 U32 putf8 = 0;
3280af22 7023 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
7024 {
7025 HV * const stash =
7026 PL_curstash ? PL_curstash : PL_defstash;
7027 pname = HvNAME_get(stash);
7028 plen = HvNAMELEN (stash);
7029 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
7030 }
7031 gv = gv_fetchpvn_flags(
7032 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
7033 plen+6, GV_ADD|putf8, SVt_PVIO
7034 );
a5f75d66 7035 GvMULTI_on(gv);
79072805 7036 if (!GvIO(gv))
a0d0e21e 7037 GvIOp(gv) = newIO();
3280af22 7038 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
7039#if defined(HAS_FCNTL) && defined(F_SETFD)
7040 {
f54cb97a 7041 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
7042 fcntl(fd,F_SETFD,fd >= 3);
7043 }
79072805 7044#endif
fd049845 7045 /* Mark this internal pseudo-handle as clean */
7046 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 7047 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 7048 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 7049 else
50952442 7050 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
7051#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
7052 /* if the script was opened in binmode, we need to revert
53129d29 7053 * it to text mode for compatibility; but only iff it has CRs
c39cd008 7054 * XXX this is a questionable hack at best. */
53129d29
GS
7055 if (PL_bufend-PL_bufptr > 2
7056 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
7057 {
7058 Off_t loc = 0;
50952442 7059 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
7060 loc = PerlIO_tell(PL_rsfp);
7061 (void)PerlIO_seek(PL_rsfp, 0L, 0);
7062 }
2986a63f
JH
7063#ifdef NETWARE
7064 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
7065#else
c39cd008 7066 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7067#endif /* NETWARE */
c39cd008
GS
7068 if (loc > 0)
7069 PerlIO_seek(PL_rsfp, loc, 0);
7070 }
7071 }
7072#endif
7948272d 7073#ifdef PERLIO_LAYERS
52d2e0f4
JH
7074 if (!IN_BYTES) {
7075 if (UTF)
7076 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7077 else if (PL_encoding) {
7078 SV *name;
7079 dSP;
7080 ENTER;
7081 SAVETMPS;
7082 PUSHMARK(sp);
7083 EXTEND(SP, 1);
7084 XPUSHs(PL_encoding);
7085 PUTBACK;
7086 call_method("name", G_SCALAR);
7087 SPAGAIN;
7088 name = POPs;
7089 PUTBACK;
bfed75c6 7090 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7091 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7092 SVfARG(name)));
52d2e0f4
JH
7093 FREETMPS;
7094 LEAVE;
7095 }
7096 }
7948272d 7097#endif
5db06880
NC
7098#ifdef PERL_MAD
7099 if (PL_madskills) {
cd81e915
NC
7100 if (PL_realtokenstart >= 0) {
7101 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7102 if (!PL_endwhite)
6b29d1f5 7103 PL_endwhite = newSVpvs("");
cd81e915
NC
7104 sv_catsv(PL_endwhite, PL_thiswhite);
7105 PL_thiswhite = 0;
7106 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7107 PL_realtokenstart = -1;
5db06880 7108 }
5cc814fd
NC
7109 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7110 != NULL) ;
5db06880
NC
7111 }
7112#endif
4608196e 7113 PL_rsfp = NULL;
79072805
LW
7114 }
7115 goto fake_eof;
e929a76b 7116 }
de3bb511 7117
8990e307 7118 case KEY_AUTOLOAD:
ed6116ce 7119 case KEY_DESTROY:
79072805 7120 case KEY_BEGIN:
3c10abe3 7121 case KEY_UNITCHECK:
7d30b5c4 7122 case KEY_CHECK:
7d07dbc2 7123 case KEY_INIT:
7d30b5c4 7124 case KEY_END:
3280af22
NIS
7125 if (PL_expect == XSTATE) {
7126 s = PL_bufptr;
93a17b20 7127 goto really_sub;
79072805
LW
7128 }
7129 goto just_a_word;
7130
a0d0e21e
LW
7131 case KEY_CORE:
7132 if (*s == ':' && s[1] == ':') {
7133 s += 2;
748a9306 7134 d = s;
3280af22 7135 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
9dcb8368 7136 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
6798c92b 7137 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7138 if (tmp < 0)
7139 tmp = -tmp;
d67594ff
FC
7140 else if (tmp == KEY_require || tmp == KEY_do
7141 || tmp == KEY_glob)
a72a1c8b 7142 /* that's a way to remember we saw "CORE::" */
850e8516 7143 orig_keyword = tmp;
a0d0e21e
LW
7144 goto reserved_word;
7145 }
7146 goto just_a_word;
7147
463ee0b2
LW
7148 case KEY_abs:
7149 UNI(OP_ABS);
7150
79072805
LW
7151 case KEY_alarm:
7152 UNI(OP_ALARM);
7153
7154 case KEY_accept:
a0d0e21e 7155 LOP(OP_ACCEPT,XTERM);
79072805 7156
463ee0b2 7157 case KEY_and:
78cdf107
Z
7158 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7159 return REPORT(0);
463ee0b2
LW
7160 OPERATOR(ANDOP);
7161
79072805 7162 case KEY_atan2:
a0d0e21e 7163 LOP(OP_ATAN2,XTERM);
85e6fe83 7164
79072805 7165 case KEY_bind:
a0d0e21e 7166 LOP(OP_BIND,XTERM);
79072805
LW
7167
7168 case KEY_binmode:
1c1fc3ea 7169 LOP(OP_BINMODE,XTERM);
79072805
LW
7170
7171 case KEY_bless:
a0d0e21e 7172 LOP(OP_BLESS,XTERM);
79072805 7173
0d863452
RH
7174 case KEY_break:
7175 FUN0(OP_BREAK);
7176
79072805
LW
7177 case KEY_chop:
7178 UNI(OP_CHOP);
7179
7180 case KEY_continue:
0d863452
RH
7181 /* We have to disambiguate the two senses of
7182 "continue". If the next token is a '{' then
7183 treat it as the start of a continue block;
7184 otherwise treat it as a control operator.
7185 */
7186 s = skipspace(s);
7187 if (*s == '{')
79072805 7188 PREBLOCK(CONTINUE);
0d863452
RH
7189 else
7190 FUN0(OP_CONTINUE);
79072805
LW
7191
7192 case KEY_chdir:
fafc274c
NC
7193 /* may use HOME */
7194 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7195 UNI(OP_CHDIR);
7196
7197 case KEY_close:
7198 UNI(OP_CLOSE);
7199
7200 case KEY_closedir:
7201 UNI(OP_CLOSEDIR);
7202
7203 case KEY_cmp:
78cdf107
Z
7204 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7205 return REPORT(0);
79072805
LW
7206 Eop(OP_SCMP);
7207
7208 case KEY_caller:
7209 UNI(OP_CALLER);
7210
7211 case KEY_crypt:
7212#ifdef FCRYPT
f4c556ac
GS
7213 if (!PL_cryptseen) {
7214 PL_cryptseen = TRUE;
de3bb511 7215 init_des();
f4c556ac 7216 }
a687059c 7217#endif
a0d0e21e 7218 LOP(OP_CRYPT,XTERM);
79072805
LW
7219
7220 case KEY_chmod:
a0d0e21e 7221 LOP(OP_CHMOD,XTERM);
79072805
LW
7222
7223 case KEY_chown:
a0d0e21e 7224 LOP(OP_CHOWN,XTERM);
79072805
LW
7225
7226 case KEY_connect:
a0d0e21e 7227 LOP(OP_CONNECT,XTERM);
79072805 7228
463ee0b2
LW
7229 case KEY_chr:
7230 UNI(OP_CHR);
7231
79072805
LW
7232 case KEY_cos:
7233 UNI(OP_COS);
7234
7235 case KEY_chroot:
7236 UNI(OP_CHROOT);
7237
0d863452
RH
7238 case KEY_default:
7239 PREBLOCK(DEFAULT);
7240
79072805 7241 case KEY_do:
29595ff2 7242 s = SKIPSPACE1(s);
79072805 7243 if (*s == '{')
a0d0e21e 7244 PRETERMBLOCK(DO);
79072805 7245 if (*s != '\'')
89c5585f 7246 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7247 if (orig_keyword == KEY_do) {
7248 orig_keyword = 0;
6154021b 7249 pl_yylval.ival = 1;
850e8516
RGS
7250 }
7251 else
6154021b 7252 pl_yylval.ival = 0;
378cc40b 7253 OPERATOR(DO);
79072805
LW
7254
7255 case KEY_die:
3280af22 7256 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7257 LOP(OP_DIE,XTERM);
79072805
LW
7258
7259 case KEY_defined:
7260 UNI(OP_DEFINED);
7261
7262 case KEY_delete:
a0d0e21e 7263 UNI(OP_DELETE);
79072805
LW
7264
7265 case KEY_dbmopen:
74e8ce34
NC
7266 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7267 STR_WITH_LEN("NDBM_File::"),
7268 STR_WITH_LEN("DB_File::"),
7269 STR_WITH_LEN("GDBM_File::"),
7270 STR_WITH_LEN("SDBM_File::"),
7271 STR_WITH_LEN("ODBM_File::"),
7272 NULL);
a0d0e21e 7273 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7274
7275 case KEY_dbmclose:
7276 UNI(OP_DBMCLOSE);
7277
7278 case KEY_dump:
a0d0e21e 7279 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7280 LOOPX(OP_DUMP);
7281
7282 case KEY_else:
7283 PREBLOCK(ELSE);
7284
7285 case KEY_elsif:
6154021b 7286 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7287 OPERATOR(ELSIF);
7288
7289 case KEY_eq:
78cdf107
Z
7290 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7291 return REPORT(0);
79072805
LW
7292 Eop(OP_SEQ);
7293
a0d0e21e
LW
7294 case KEY_exists:
7295 UNI(OP_EXISTS);
4e553d73 7296
79072805 7297 case KEY_exit:
5db06880
NC
7298 if (PL_madskills)
7299 UNI(OP_INT);
79072805
LW
7300 UNI(OP_EXIT);
7301
7302 case KEY_eval:
29595ff2 7303 s = SKIPSPACE1(s);
32e2a35d
RGS
7304 if (*s == '{') { /* block eval */
7305 PL_expect = XTERMBLOCK;
7306 UNIBRACK(OP_ENTERTRY);
7307 }
7308 else { /* string eval */
7309 PL_expect = XTERM;
7310 UNIBRACK(OP_ENTEREVAL);
7311 }
79072805 7312
7d789282
FC
7313 case KEY_evalbytes:
7314 PL_expect = XTERM;
7315 UNIBRACK(-OP_ENTEREVAL);
7316
79072805
LW
7317 case KEY_eof:
7318 UNI(OP_EOF);
7319
7320 case KEY_exp:
7321 UNI(OP_EXP);
7322
7323 case KEY_each:
7324 UNI(OP_EACH);
7325
7326 case KEY_exec:
a0d0e21e 7327 LOP(OP_EXEC,XREF);
79072805
LW
7328
7329 case KEY_endhostent:
7330 FUN0(OP_EHOSTENT);
7331
7332 case KEY_endnetent:
7333 FUN0(OP_ENETENT);
7334
7335 case KEY_endservent:
7336 FUN0(OP_ESERVENT);
7337
7338 case KEY_endprotoent:
7339 FUN0(OP_EPROTOENT);
7340
7341 case KEY_endpwent:
7342 FUN0(OP_EPWENT);
7343
7344 case KEY_endgrent:
7345 FUN0(OP_EGRENT);
7346
7347 case KEY_for:
7348 case KEY_foreach:
78cdf107
Z
7349 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7350 return REPORT(0);
6154021b 7351 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7352 s = SKIPSPACE1(s);
7e2040f0 7353 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7354 char *p = s;
5db06880
NC
7355#ifdef PERL_MAD
7356 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7357#endif
7358
3280af22 7359 if ((PL_bufend - p) >= 3 &&
55497cff 7360 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7361 p += 2;
77ca0c92
LW
7362 else if ((PL_bufend - p) >= 4 &&
7363 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7364 p += 3;
29595ff2 7365 p = PEEKSPACE(p);
7e2040f0 7366 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7367 p = scan_ident(p, PL_bufend,
7368 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7369 p = PEEKSPACE(p);
77ca0c92
LW
7370 }
7371 if (*p != '$')
cea2e8a9 7372 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7373#ifdef PERL_MAD
7374 s = SvPVX(PL_linestr) + soff;
7375#endif
55497cff 7376 }
79072805
LW
7377 OPERATOR(FOR);
7378
7379 case KEY_formline:
a0d0e21e 7380 LOP(OP_FORMLINE,XTERM);
79072805
LW
7381
7382 case KEY_fork:
7383 FUN0(OP_FORK);
7384
7385 case KEY_fcntl:
a0d0e21e 7386 LOP(OP_FCNTL,XTERM);
79072805
LW
7387
7388 case KEY_fileno:
7389 UNI(OP_FILENO);
7390
7391 case KEY_flock:
a0d0e21e 7392 LOP(OP_FLOCK,XTERM);
79072805
LW
7393
7394 case KEY_gt:
78cdf107
Z
7395 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7396 return REPORT(0);
79072805
LW
7397 Rop(OP_SGT);
7398
7399 case KEY_ge:
78cdf107
Z
7400 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7401 return REPORT(0);
79072805
LW
7402 Rop(OP_SGE);
7403
7404 case KEY_grep:
2c38e13d 7405 LOP(OP_GREPSTART, XREF);
79072805
LW
7406
7407 case KEY_goto:
a0d0e21e 7408 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7409 LOOPX(OP_GOTO);
7410
7411 case KEY_gmtime:
7412 UNI(OP_GMTIME);
7413
7414 case KEY_getc:
6f33ba73 7415 UNIDOR(OP_GETC);
79072805
LW
7416
7417 case KEY_getppid:
7418 FUN0(OP_GETPPID);
7419
7420 case KEY_getpgrp:
7421 UNI(OP_GETPGRP);
7422
7423 case KEY_getpriority:
a0d0e21e 7424 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7425
7426 case KEY_getprotobyname:
7427 UNI(OP_GPBYNAME);
7428
7429 case KEY_getprotobynumber:
a0d0e21e 7430 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7431
7432 case KEY_getprotoent:
7433 FUN0(OP_GPROTOENT);
7434
7435 case KEY_getpwent:
7436 FUN0(OP_GPWENT);
7437
7438 case KEY_getpwnam:
ff68c719 7439 UNI(OP_GPWNAM);
79072805
LW
7440
7441 case KEY_getpwuid:
ff68c719 7442 UNI(OP_GPWUID);
79072805
LW
7443
7444 case KEY_getpeername:
7445 UNI(OP_GETPEERNAME);
7446
7447 case KEY_gethostbyname:
7448 UNI(OP_GHBYNAME);
7449
7450 case KEY_gethostbyaddr:
a0d0e21e 7451 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7452
7453 case KEY_gethostent:
7454 FUN0(OP_GHOSTENT);
7455
7456 case KEY_getnetbyname:
7457 UNI(OP_GNBYNAME);
7458
7459 case KEY_getnetbyaddr:
a0d0e21e 7460 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7461
7462 case KEY_getnetent:
7463 FUN0(OP_GNETENT);
7464
7465 case KEY_getservbyname:
a0d0e21e 7466 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7467
7468 case KEY_getservbyport:
a0d0e21e 7469 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7470
7471 case KEY_getservent:
7472 FUN0(OP_GSERVENT);
7473
7474 case KEY_getsockname:
7475 UNI(OP_GETSOCKNAME);
7476
7477 case KEY_getsockopt:
a0d0e21e 7478 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7479
7480 case KEY_getgrent:
7481 FUN0(OP_GGRENT);
7482
7483 case KEY_getgrnam:
ff68c719 7484 UNI(OP_GGRNAM);
79072805
LW
7485
7486 case KEY_getgrgid:
ff68c719 7487 UNI(OP_GGRGID);
79072805
LW
7488
7489 case KEY_getlogin:
7490 FUN0(OP_GETLOGIN);
7491
0d863452 7492 case KEY_given:
6154021b 7493 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7494 OPERATOR(GIVEN);
7495
93a17b20 7496 case KEY_glob:
d67594ff
FC
7497 LOP(
7498 orig_keyword==KEY_glob ? (orig_keyword=0, -OP_GLOB) : OP_GLOB,
7499 XTERM
7500 );
93a17b20 7501
79072805
LW
7502 case KEY_hex:
7503 UNI(OP_HEX);
7504
7505 case KEY_if:
78cdf107
Z
7506 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7507 return REPORT(0);
6154021b 7508 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7509 OPERATOR(IF);
7510
7511 case KEY_index:
a0d0e21e 7512 LOP(OP_INDEX,XTERM);
79072805
LW
7513
7514 case KEY_int:
7515 UNI(OP_INT);
7516
7517 case KEY_ioctl:
a0d0e21e 7518 LOP(OP_IOCTL,XTERM);
79072805
LW
7519
7520 case KEY_join:
a0d0e21e 7521 LOP(OP_JOIN,XTERM);
79072805
LW
7522
7523 case KEY_keys:
7524 UNI(OP_KEYS);
7525
7526 case KEY_kill:
a0d0e21e 7527 LOP(OP_KILL,XTERM);
79072805
LW
7528
7529 case KEY_last:
a0d0e21e 7530 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7531 LOOPX(OP_LAST);
4e553d73 7532
79072805
LW
7533 case KEY_lc:
7534 UNI(OP_LC);
7535
7536 case KEY_lcfirst:
7537 UNI(OP_LCFIRST);
7538
7539 case KEY_local:
6154021b 7540 pl_yylval.ival = 0;
79072805
LW
7541 OPERATOR(LOCAL);
7542
7543 case KEY_length:
7544 UNI(OP_LENGTH);
7545
7546 case KEY_lt:
78cdf107
Z
7547 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7548 return REPORT(0);
79072805
LW
7549 Rop(OP_SLT);
7550
7551 case KEY_le:
78cdf107
Z
7552 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7553 return REPORT(0);
79072805
LW
7554 Rop(OP_SLE);
7555
7556 case KEY_localtime:
7557 UNI(OP_LOCALTIME);
7558
7559 case KEY_log:
7560 UNI(OP_LOG);
7561
7562 case KEY_link:
a0d0e21e 7563 LOP(OP_LINK,XTERM);
79072805
LW
7564
7565 case KEY_listen:
a0d0e21e 7566 LOP(OP_LISTEN,XTERM);
79072805 7567
c0329465
MB
7568 case KEY_lock:
7569 UNI(OP_LOCK);
7570
79072805
LW
7571 case KEY_lstat:
7572 UNI(OP_LSTAT);
7573
7574 case KEY_m:
8782bef2 7575 s = scan_pat(s,OP_MATCH);
79072805
LW
7576 TERM(sublex_start());
7577
a0d0e21e 7578 case KEY_map:
2c38e13d 7579 LOP(OP_MAPSTART, XREF);
4e4e412b 7580
79072805 7581 case KEY_mkdir:
a0d0e21e 7582 LOP(OP_MKDIR,XTERM);
79072805
LW
7583
7584 case KEY_msgctl:
a0d0e21e 7585 LOP(OP_MSGCTL,XTERM);
79072805
LW
7586
7587 case KEY_msgget:
a0d0e21e 7588 LOP(OP_MSGGET,XTERM);
79072805
LW
7589
7590 case KEY_msgrcv:
a0d0e21e 7591 LOP(OP_MSGRCV,XTERM);
79072805
LW
7592
7593 case KEY_msgsnd:
a0d0e21e 7594 LOP(OP_MSGSND,XTERM);
79072805 7595
77ca0c92 7596 case KEY_our:
93a17b20 7597 case KEY_my:
952306ac 7598 case KEY_state:
eac04b2e 7599 PL_in_my = (U16)tmp;
29595ff2 7600 s = SKIPSPACE1(s);
7e2040f0 7601 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7602#ifdef PERL_MAD
7603 char* start = s;
7604#endif
3280af22 7605 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7606 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7607 goto really_sub;
def3634b 7608 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7609 if (!PL_in_my_stash) {
c750a3ec 7610 char tmpbuf[1024];
3280af22 7611 PL_bufptr = s;
d9fad198 7612 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7613 yyerror(tmpbuf);
7614 }
5db06880
NC
7615#ifdef PERL_MAD
7616 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7617 sv_catsv(PL_thistoken, PL_nextwhite);
7618 PL_nextwhite = 0;
7619 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7620 }
7621#endif
c750a3ec 7622 }
6154021b 7623 pl_yylval.ival = 1;
55497cff 7624 OPERATOR(MY);
93a17b20 7625
79072805 7626 case KEY_next:
a0d0e21e 7627 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7628 LOOPX(OP_NEXT);
7629
7630 case KEY_ne:
78cdf107
Z
7631 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7632 return REPORT(0);
79072805
LW
7633 Eop(OP_SNE);
7634
a0d0e21e 7635 case KEY_no:
468aa647 7636 s = tokenize_use(0, s);
a0d0e21e
LW
7637 OPERATOR(USE);
7638
7639 case KEY_not:
29595ff2 7640 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7641 FUN1(OP_NOT);
78cdf107
Z
7642 else {
7643 if (!PL_lex_allbrackets &&
7644 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7645 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7646 OPERATOR(NOTOP);
78cdf107 7647 }
a0d0e21e 7648
79072805 7649 case KEY_open:
29595ff2 7650 s = SKIPSPACE1(s);
7e2040f0 7651 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7652 const char *t;
c35e046a
AL
7653 for (d = s; isALNUM_lazy_if(d,UTF);)
7654 d++;
7655 for (t=d; isSPACE(*t);)
7656 t++;
e2ab214b 7657 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7658 /* [perl #16184] */
7659 && !(t[0] == '=' && t[1] == '>')
7660 ) {
5f66b61c 7661 int parms_len = (int)(d-s);
9014280d 7662 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7663 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7664 parms_len, s, parms_len, s);
66fbe8fb 7665 }
93a17b20 7666 }
a0d0e21e 7667 LOP(OP_OPEN,XTERM);
79072805 7668
463ee0b2 7669 case KEY_or:
78cdf107
Z
7670 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7671 return REPORT(0);
6154021b 7672 pl_yylval.ival = OP_OR;
463ee0b2
LW
7673 OPERATOR(OROP);
7674
79072805
LW
7675 case KEY_ord:
7676 UNI(OP_ORD);
7677
7678 case KEY_oct:
7679 UNI(OP_OCT);
7680
7681 case KEY_opendir:
a0d0e21e 7682 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7683
7684 case KEY_print:
3280af22 7685 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7686 LOP(OP_PRINT,XREF);
79072805
LW
7687
7688 case KEY_printf:
3280af22 7689 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7690 LOP(OP_PRTF,XREF);
79072805 7691
c07a80fd 7692 case KEY_prototype:
7693 UNI(OP_PROTOTYPE);
7694
79072805 7695 case KEY_push:
a0d0e21e 7696 LOP(OP_PUSH,XTERM);
79072805
LW
7697
7698 case KEY_pop:
6f33ba73 7699 UNIDOR(OP_POP);
79072805 7700
a0d0e21e 7701 case KEY_pos:
6f33ba73 7702 UNIDOR(OP_POS);
4e553d73 7703
79072805 7704 case KEY_pack:
a0d0e21e 7705 LOP(OP_PACK,XTERM);
79072805
LW
7706
7707 case KEY_package:
a0d0e21e 7708 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7709 s = SKIPSPACE1(s);
91152fc1 7710 s = force_strict_version(s);
4e4da3ac 7711 PL_lex_expect = XBLOCK;
79072805
LW
7712 OPERATOR(PACKAGE);
7713
7714 case KEY_pipe:
a0d0e21e 7715 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7716
7717 case KEY_q:
5db06880 7718 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7719 if (!s)
d4c19fe8 7720 missingterm(NULL);
6154021b 7721 pl_yylval.ival = OP_CONST;
79072805
LW
7722 TERM(sublex_start());
7723
a0d0e21e
LW
7724 case KEY_quotemeta:
7725 UNI(OP_QUOTEMETA);
7726
ea25a9b2
Z
7727 case KEY_qw: {
7728 OP *words = NULL;
5db06880 7729 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7730 if (!s)
d4c19fe8 7731 missingterm(NULL);
3480a8d2 7732 PL_expect = XOPERATOR;
8127e0e3 7733 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
7734 int warned_comma = !ckWARN(WARN_QW);
7735 int warned_comment = warned_comma;
3280af22 7736 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7737 while (len) {
d4c19fe8
AL
7738 for (; isSPACE(*d) && len; --len, ++d)
7739 /**/;
8127e0e3 7740 if (len) {
d4c19fe8 7741 SV *sv;
f54cb97a 7742 const char *b = d;
7e03b518 7743 if (!warned_comma || !warned_comment) {
8127e0e3 7744 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 7745 if (!warned_comma && *d == ',') {
9014280d 7746 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7747 "Possible attempt to separate words with commas");
7e03b518 7748 ++warned_comma;
8127e0e3 7749 }
7e03b518 7750 else if (!warned_comment && *d == '#') {
9014280d 7751 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7752 "Possible attempt to put comments in qw() list");
7e03b518 7753 ++warned_comment;
8127e0e3
GS
7754 }
7755 }
7756 }
7757 else {
d4c19fe8
AL
7758 for (; !isSPACE(*d) && len; --len, ++d)
7759 /**/;
8127e0e3 7760 }
740cce10 7761 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7762 words = op_append_elem(OP_LIST, words,
7948272d 7763 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7764 }
7765 }
7766 }
ea25a9b2
Z
7767 if (!words)
7768 words = newNULLLIST();
37fd879b 7769 if (PL_lex_stuff) {
8127e0e3 7770 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7771 PL_lex_stuff = NULL;
37fd879b 7772 }
ea25a9b2
Z
7773 PL_expect = XOPERATOR;
7774 pl_yylval.opval = sawparens(words);
7775 TOKEN(QWLIST);
7776 }
8990e307 7777
79072805 7778 case KEY_qq:
5db06880 7779 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7780 if (!s)
d4c19fe8 7781 missingterm(NULL);
6154021b 7782 pl_yylval.ival = OP_STRINGIFY;
3280af22 7783 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7784 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7785 TERM(sublex_start());
7786
8782bef2
GB
7787 case KEY_qr:
7788 s = scan_pat(s,OP_QR);
7789 TERM(sublex_start());
7790
79072805 7791 case KEY_qx:
5db06880 7792 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7793 if (!s)
d4c19fe8 7794 missingterm(NULL);
9b201d7d 7795 readpipe_override();
79072805
LW
7796 TERM(sublex_start());
7797
7798 case KEY_return:
7799 OLDLOP(OP_RETURN);
7800
7801 case KEY_require:
29595ff2 7802 s = SKIPSPACE1(s);
e759cc13
RGS
7803 if (isDIGIT(*s)) {
7804 s = force_version(s, FALSE);
a7cb1f99 7805 }
e759cc13
RGS
7806 else if (*s != 'v' || !isDIGIT(s[1])
7807 || (s = force_version(s, TRUE), *s == 'v'))
7808 {
a7cb1f99
GS
7809 *PL_tokenbuf = '\0';
7810 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7811 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
7812 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7813 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
7814 else if (*s == '<')
7815 yyerror("<> should be quotes");
7816 }
a72a1c8b
RGS
7817 if (orig_keyword == KEY_require) {
7818 orig_keyword = 0;
6154021b 7819 pl_yylval.ival = 1;
a72a1c8b
RGS
7820 }
7821 else
6154021b 7822 pl_yylval.ival = 0;
a72a1c8b
RGS
7823 PL_expect = XTERM;
7824 PL_bufptr = s;
7825 PL_last_uni = PL_oldbufptr;
7826 PL_last_lop_op = OP_REQUIRE;
7827 s = skipspace(s);
7828 return REPORT( (int)REQUIRE );
79072805
LW
7829
7830 case KEY_reset:
7831 UNI(OP_RESET);
7832
7833 case KEY_redo:
a0d0e21e 7834 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7835 LOOPX(OP_REDO);
7836
7837 case KEY_rename:
a0d0e21e 7838 LOP(OP_RENAME,XTERM);
79072805
LW
7839
7840 case KEY_rand:
7841 UNI(OP_RAND);
7842
7843 case KEY_rmdir:
7844 UNI(OP_RMDIR);
7845
7846 case KEY_rindex:
a0d0e21e 7847 LOP(OP_RINDEX,XTERM);
79072805
LW
7848
7849 case KEY_read:
a0d0e21e 7850 LOP(OP_READ,XTERM);
79072805
LW
7851
7852 case KEY_readdir:
7853 UNI(OP_READDIR);
7854
93a17b20 7855 case KEY_readline:
6f33ba73 7856 UNIDOR(OP_READLINE);
93a17b20
LW
7857
7858 case KEY_readpipe:
0858480c 7859 UNIDOR(OP_BACKTICK);
93a17b20 7860
79072805
LW
7861 case KEY_rewinddir:
7862 UNI(OP_REWINDDIR);
7863
7864 case KEY_recv:
a0d0e21e 7865 LOP(OP_RECV,XTERM);
79072805
LW
7866
7867 case KEY_reverse:
a0d0e21e 7868 LOP(OP_REVERSE,XTERM);
79072805
LW
7869
7870 case KEY_readlink:
6f33ba73 7871 UNIDOR(OP_READLINK);
79072805
LW
7872
7873 case KEY_ref:
7874 UNI(OP_REF);
7875
7876 case KEY_s:
7877 s = scan_subst(s);
6154021b 7878 if (pl_yylval.opval)
79072805
LW
7879 TERM(sublex_start());
7880 else
7881 TOKEN(1); /* force error */
7882
0d863452
RH
7883 case KEY_say:
7884 checkcomma(s,PL_tokenbuf,"filehandle");
7885 LOP(OP_SAY,XREF);
7886
a0d0e21e
LW
7887 case KEY_chomp:
7888 UNI(OP_CHOMP);
4e553d73 7889
79072805
LW
7890 case KEY_scalar:
7891 UNI(OP_SCALAR);
7892
7893 case KEY_select:
a0d0e21e 7894 LOP(OP_SELECT,XTERM);
79072805
LW
7895
7896 case KEY_seek:
a0d0e21e 7897 LOP(OP_SEEK,XTERM);
79072805
LW
7898
7899 case KEY_semctl:
a0d0e21e 7900 LOP(OP_SEMCTL,XTERM);
79072805
LW
7901
7902 case KEY_semget:
a0d0e21e 7903 LOP(OP_SEMGET,XTERM);
79072805
LW
7904
7905 case KEY_semop:
a0d0e21e 7906 LOP(OP_SEMOP,XTERM);
79072805
LW
7907
7908 case KEY_send:
a0d0e21e 7909 LOP(OP_SEND,XTERM);
79072805
LW
7910
7911 case KEY_setpgrp:
a0d0e21e 7912 LOP(OP_SETPGRP,XTERM);
79072805
LW
7913
7914 case KEY_setpriority:
a0d0e21e 7915 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7916
7917 case KEY_sethostent:
ff68c719 7918 UNI(OP_SHOSTENT);
79072805
LW
7919
7920 case KEY_setnetent:
ff68c719 7921 UNI(OP_SNETENT);
79072805
LW
7922
7923 case KEY_setservent:
ff68c719 7924 UNI(OP_SSERVENT);
79072805
LW
7925
7926 case KEY_setprotoent:
ff68c719 7927 UNI(OP_SPROTOENT);
79072805
LW
7928
7929 case KEY_setpwent:
7930 FUN0(OP_SPWENT);
7931
7932 case KEY_setgrent:
7933 FUN0(OP_SGRENT);
7934
7935 case KEY_seekdir:
a0d0e21e 7936 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7937
7938 case KEY_setsockopt:
a0d0e21e 7939 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7940
7941 case KEY_shift:
6f33ba73 7942 UNIDOR(OP_SHIFT);
79072805
LW
7943
7944 case KEY_shmctl:
a0d0e21e 7945 LOP(OP_SHMCTL,XTERM);
79072805
LW
7946
7947 case KEY_shmget:
a0d0e21e 7948 LOP(OP_SHMGET,XTERM);
79072805
LW
7949
7950 case KEY_shmread:
a0d0e21e 7951 LOP(OP_SHMREAD,XTERM);
79072805
LW
7952
7953 case KEY_shmwrite:
a0d0e21e 7954 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7955
7956 case KEY_shutdown:
a0d0e21e 7957 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7958
7959 case KEY_sin:
7960 UNI(OP_SIN);
7961
7962 case KEY_sleep:
7963 UNI(OP_SLEEP);
7964
7965 case KEY_socket:
a0d0e21e 7966 LOP(OP_SOCKET,XTERM);
79072805
LW
7967
7968 case KEY_socketpair:
a0d0e21e 7969 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7970
7971 case KEY_sort:
3280af22 7972 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7973 s = SKIPSPACE1(s);
79072805 7974 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7975 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7976 PL_expect = XTERM;
15f0808c 7977 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7978 LOP(OP_SORT,XREF);
79072805
LW
7979
7980 case KEY_split:
a0d0e21e 7981 LOP(OP_SPLIT,XTERM);
79072805
LW
7982
7983 case KEY_sprintf:
a0d0e21e 7984 LOP(OP_SPRINTF,XTERM);
79072805
LW
7985
7986 case KEY_splice:
a0d0e21e 7987 LOP(OP_SPLICE,XTERM);
79072805
LW
7988
7989 case KEY_sqrt:
7990 UNI(OP_SQRT);
7991
7992 case KEY_srand:
7993 UNI(OP_SRAND);
7994
7995 case KEY_stat:
7996 UNI(OP_STAT);
7997
7998 case KEY_study:
79072805
LW
7999 UNI(OP_STUDY);
8000
8001 case KEY_substr:
a0d0e21e 8002 LOP(OP_SUBSTR,XTERM);
79072805
LW
8003
8004 case KEY_format:
8005 case KEY_sub:
93a17b20 8006 really_sub:
09bef843 8007 {
3280af22 8008 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 8009 SSize_t tboffset = 0;
09bef843 8010 expectation attrful;
28cc6278 8011 bool have_name, have_proto;
f54cb97a 8012 const int key = tmp;
09bef843 8013
5db06880
NC
8014#ifdef PERL_MAD
8015 SV *tmpwhite = 0;
8016
cd81e915 8017 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
af9f5953 8018 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
cd81e915 8019 PL_thistoken = 0;
5db06880
NC
8020
8021 d = s;
8022 s = SKIPSPACE2(s,tmpwhite);
8023#else
09bef843 8024 s = skipspace(s);
5db06880 8025#endif
09bef843 8026
7e2040f0 8027 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
8028 (*s == ':' && s[1] == ':'))
8029 {
5db06880 8030#ifdef PERL_MAD
4f61fd4b 8031 SV *nametoke = NULL;
5db06880
NC
8032#endif
8033
09bef843
SB
8034 PL_expect = XBLOCK;
8035 attrful = XATTRBLOCK;
b1b65b59
JH
8036 /* remember buffer pos'n for later force_word */
8037 tboffset = s - PL_oldbufptr;
09bef843 8038 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
8039#ifdef PERL_MAD
8040 if (PL_madskills)
af9f5953 8041 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 8042#endif
6502358f
NC
8043 if (memchr(tmpbuf, ':', len))
8044 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
8045 else {
8046 sv_setsv(PL_subname,PL_curstname);
396482e1 8047 sv_catpvs(PL_subname,"::");
09bef843
SB
8048 sv_catpvn(PL_subname,tmpbuf,len);
8049 }
af9f5953
BF
8050 if (SvUTF8(PL_linestr))
8051 SvUTF8_on(PL_subname);
09bef843 8052 have_name = TRUE;
5db06880
NC
8053
8054#ifdef PERL_MAD
8055
8056 start_force(0);
8057 CURMAD('X', nametoke);
8058 CURMAD('_', tmpwhite);
8059 (void) force_word(PL_oldbufptr + tboffset, WORD,
8060 FALSE, TRUE, TRUE);
8061
8062 s = SKIPSPACE2(d,tmpwhite);
8063#else
8064 s = skipspace(d);
8065#endif
09bef843 8066 }
463ee0b2 8067 else {
09bef843
SB
8068 if (key == KEY_my)
8069 Perl_croak(aTHX_ "Missing name in \"my sub\"");
8070 PL_expect = XTERMBLOCK;
8071 attrful = XATTRTERM;
76f68e9b 8072 sv_setpvs(PL_subname,"?");
09bef843 8073 have_name = FALSE;
463ee0b2 8074 }
4633a7c4 8075
09bef843
SB
8076 if (key == KEY_format) {
8077 if (*s == '=')
8078 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 8079#ifdef PERL_MAD
cd81e915 8080 PL_thistoken = subtoken;
5db06880
NC
8081 s = d;
8082#else
09bef843 8083 if (have_name)
b1b65b59
JH
8084 (void) force_word(PL_oldbufptr + tboffset, WORD,
8085 FALSE, TRUE, TRUE);
5db06880 8086#endif
09bef843
SB
8087 OPERATOR(FORMAT);
8088 }
79072805 8089
09bef843
SB
8090 /* Look for a prototype */
8091 if (*s == '(') {
d9f2850e
RGS
8092 char *p;
8093 bool bad_proto = FALSE;
9e8d7757
RB
8094 bool in_brackets = FALSE;
8095 char greedy_proto = ' ';
8096 bool proto_after_greedy_proto = FALSE;
8097 bool must_be_last = FALSE;
8098 bool underscore = FALSE;
aef2a98a 8099 bool seen_underscore = FALSE;
197afce1 8100 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8101 STRLEN tmplen;
09bef843 8102
5db06880 8103 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 8104 if (!s)
09bef843 8105 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8106 /* strip spaces and check for bad characters */
dab1c735 8107 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8108 tmp = 0;
dab1c735 8109 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8110 if (!isSPACE(*p)) {
dab1c735 8111 d[tmp++] = *p;
9e8d7757 8112
197afce1 8113 if (warnillegalproto) {
9e8d7757
RB
8114 if (must_be_last)
8115 proto_after_greedy_proto = TRUE;
dab1c735 8116 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8117 bad_proto = TRUE;
8118 }
8119 else {
8120 if ( underscore ) {
8121 if ( *p != ';' )
8122 bad_proto = TRUE;
8123 underscore = FALSE;
8124 }
8125 if ( *p == '[' ) {
8126 in_brackets = TRUE;
8127 }
8128 else if ( *p == ']' ) {
8129 in_brackets = FALSE;
8130 }
8131 else if ( (*p == '@' || *p == '%') &&
8132 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8133 !in_brackets ) {
8134 must_be_last = TRUE;
8135 greedy_proto = *p;
8136 }
8137 else if ( *p == '_' ) {
aef2a98a 8138 underscore = seen_underscore = TRUE;
9e8d7757
RB
8139 }
8140 }
8141 }
d37a9538 8142 }
09bef843 8143 }
dab1c735 8144 d[tmp] = '\0';
9e8d7757 8145 if (proto_after_greedy_proto)
197afce1 8146 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8147 "Prototype after '%c' for %"SVf" : %s",
8148 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8149 if (bad_proto) {
8150 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8151 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8152 "Illegal character %sin prototype for %"SVf" : %s",
8153 seen_underscore ? "after '_' " : "",
dab1c735
BF
8154 SVfARG(PL_subname),
8155 sv_uni_display(dsv,
8156 newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
8157 tmp, UNI_DISPLAY_ISPRINT));
8158 }
8159 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8160 have_proto = TRUE;
68dc0745 8161
5db06880
NC
8162#ifdef PERL_MAD
8163 start_force(0);
cd81e915 8164 CURMAD('q', PL_thisopen);
5db06880 8165 CURMAD('_', tmpwhite);
cd81e915
NC
8166 CURMAD('=', PL_thisstuff);
8167 CURMAD('Q', PL_thisclose);
5db06880
NC
8168 NEXTVAL_NEXTTOKE.opval =
8169 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8170 PL_lex_stuff = NULL;
5db06880
NC
8171 force_next(THING);
8172
8173 s = SKIPSPACE2(s,tmpwhite);
8174#else
09bef843 8175 s = skipspace(s);
5db06880 8176#endif
4633a7c4 8177 }
09bef843
SB
8178 else
8179 have_proto = FALSE;
8180
8181 if (*s == ':' && s[1] != ':')
8182 PL_expect = attrful;
8e742a20
MHM
8183 else if (*s != '{' && key == KEY_sub) {
8184 if (!have_name)
8185 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8186 else if (*s != ';' && *s != '}')
be2597df 8187 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8188 }
09bef843 8189
5db06880
NC
8190#ifdef PERL_MAD
8191 start_force(0);
8192 if (tmpwhite) {
8193 if (PL_madskills)
6b29d1f5 8194 curmad('^', newSVpvs(""));
5db06880
NC
8195 CURMAD('_', tmpwhite);
8196 }
8197 force_next(0);
8198
cd81e915 8199 PL_thistoken = subtoken;
5db06880 8200#else
09bef843 8201 if (have_proto) {
9ded7720 8202 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8203 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8204 PL_lex_stuff = NULL;
09bef843 8205 force_next(THING);
68dc0745 8206 }
5db06880 8207#endif
09bef843 8208 if (!have_name) {
49a54bbe
NC
8209 if (PL_curstash)
8210 sv_setpvs(PL_subname, "__ANON__");
8211 else
8212 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8213 TOKEN(ANONSUB);
4633a7c4 8214 }
5db06880 8215#ifndef PERL_MAD
b1b65b59
JH
8216 (void) force_word(PL_oldbufptr + tboffset, WORD,
8217 FALSE, TRUE, TRUE);
5db06880 8218#endif
09bef843
SB
8219 if (key == KEY_my)
8220 TOKEN(MYSUB);
8221 TOKEN(SUB);
4633a7c4 8222 }
79072805
LW
8223
8224 case KEY_system:
a0d0e21e 8225 LOP(OP_SYSTEM,XREF);
79072805
LW
8226
8227 case KEY_symlink:
a0d0e21e 8228 LOP(OP_SYMLINK,XTERM);
79072805
LW
8229
8230 case KEY_syscall:
a0d0e21e 8231 LOP(OP_SYSCALL,XTERM);
79072805 8232
c07a80fd 8233 case KEY_sysopen:
8234 LOP(OP_SYSOPEN,XTERM);
8235
137443ea 8236 case KEY_sysseek:
8237 LOP(OP_SYSSEEK,XTERM);
8238
79072805 8239 case KEY_sysread:
a0d0e21e 8240 LOP(OP_SYSREAD,XTERM);
79072805
LW
8241
8242 case KEY_syswrite:
a0d0e21e 8243 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8244
8245 case KEY_tr:
8246 s = scan_trans(s);
8247 TERM(sublex_start());
8248
8249 case KEY_tell:
8250 UNI(OP_TELL);
8251
8252 case KEY_telldir:
8253 UNI(OP_TELLDIR);
8254
463ee0b2 8255 case KEY_tie:
a0d0e21e 8256 LOP(OP_TIE,XTERM);
463ee0b2 8257
c07a80fd 8258 case KEY_tied:
8259 UNI(OP_TIED);
8260
79072805
LW
8261 case KEY_time:
8262 FUN0(OP_TIME);
8263
8264 case KEY_times:
8265 FUN0(OP_TMS);
8266
8267 case KEY_truncate:
a0d0e21e 8268 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8269
8270 case KEY_uc:
8271 UNI(OP_UC);
8272
8273 case KEY_ucfirst:
8274 UNI(OP_UCFIRST);
8275
463ee0b2
LW
8276 case KEY_untie:
8277 UNI(OP_UNTIE);
8278
79072805 8279 case KEY_until:
78cdf107
Z
8280 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8281 return REPORT(0);
6154021b 8282 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8283 OPERATOR(UNTIL);
8284
8285 case KEY_unless:
78cdf107
Z
8286 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8287 return REPORT(0);
6154021b 8288 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8289 OPERATOR(UNLESS);
8290
8291 case KEY_unlink:
a0d0e21e 8292 LOP(OP_UNLINK,XTERM);
79072805
LW
8293
8294 case KEY_undef:
6f33ba73 8295 UNIDOR(OP_UNDEF);
79072805
LW
8296
8297 case KEY_unpack:
a0d0e21e 8298 LOP(OP_UNPACK,XTERM);
79072805
LW
8299
8300 case KEY_utime:
a0d0e21e 8301 LOP(OP_UTIME,XTERM);
79072805
LW
8302
8303 case KEY_umask:
6f33ba73 8304 UNIDOR(OP_UMASK);
79072805
LW
8305
8306 case KEY_unshift:
a0d0e21e
LW
8307 LOP(OP_UNSHIFT,XTERM);
8308
8309 case KEY_use:
468aa647 8310 s = tokenize_use(1, s);
a0d0e21e 8311 OPERATOR(USE);
79072805
LW
8312
8313 case KEY_values:
8314 UNI(OP_VALUES);
8315
8316 case KEY_vec:
a0d0e21e 8317 LOP(OP_VEC,XTERM);
79072805 8318
0d863452 8319 case KEY_when:
78cdf107
Z
8320 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8321 return REPORT(0);
6154021b 8322 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8323 OPERATOR(WHEN);
8324
79072805 8325 case KEY_while:
78cdf107
Z
8326 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8327 return REPORT(0);
6154021b 8328 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8329 OPERATOR(WHILE);
8330
8331 case KEY_warn:
3280af22 8332 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8333 LOP(OP_WARN,XTERM);
79072805
LW
8334
8335 case KEY_wait:
8336 FUN0(OP_WAIT);
8337
8338 case KEY_waitpid:
a0d0e21e 8339 LOP(OP_WAITPID,XTERM);
79072805
LW
8340
8341 case KEY_wantarray:
8342 FUN0(OP_WANTARRAY);
8343
8344 case KEY_write:
9d116dd7
JH
8345#ifdef EBCDIC
8346 {
df3728a2
JH
8347 char ctl_l[2];
8348 ctl_l[0] = toCTRL('L');
8349 ctl_l[1] = '\0';
fafc274c 8350 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8351 }
8352#else
fafc274c
NC
8353 /* Make sure $^L is defined */
8354 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8355#endif
79072805
LW
8356 UNI(OP_ENTERWRITE);
8357
8358 case KEY_x:
78cdf107
Z
8359 if (PL_expect == XOPERATOR) {
8360 if (*s == '=' && !PL_lex_allbrackets &&
8361 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8362 return REPORT(0);
79072805 8363 Mop(OP_REPEAT);
78cdf107 8364 }
79072805
LW
8365 check_uni();
8366 goto just_a_word;
8367
a0d0e21e 8368 case KEY_xor:
78cdf107
Z
8369 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8370 return REPORT(0);
6154021b 8371 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8372 OPERATOR(OROP);
8373
79072805
LW
8374 case KEY_y:
8375 s = scan_trans(s);
8376 TERM(sublex_start());
8377 }
49dc05e3 8378 }}
79072805 8379}
bf4acbe4
GS
8380#ifdef __SC__
8381#pragma segment Main
8382#endif
79072805 8383
e930465f
JH
8384static int
8385S_pending_ident(pTHX)
8eceec63 8386{
97aff369 8387 dVAR;
8eceec63 8388 register char *d;
bbd11bfc 8389 PADOFFSET tmp = 0;
8eceec63
SC
8390 /* pit holds the identifier we read and pending_ident is reset */
8391 char pit = PL_pending_ident;
9bde8eb0
NC
8392 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8393 /* All routes through this function want to know if there is a colon. */
c099d646 8394 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8395 PL_pending_ident = 0;
8396
cd81e915 8397 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8398 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8399 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8400
8401 /* if we're in a my(), we can't allow dynamics here.
8402 $foo'bar has already been turned into $foo::bar, so
8403 just check for colons.
8404
8405 if it's a legal name, the OP is a PADANY.
8406 */
8407 if (PL_in_my) {
8408 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8409 if (has_colon)
8eceec63
SC
8410 yyerror(Perl_form(aTHX_ "No package name allowed for "
8411 "variable %s in \"our\"",
8412 PL_tokenbuf));
bc9b26ca 8413 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8414 }
8415 else {
9bde8eb0 8416 if (has_colon)
952306ac
RGS
8417 yyerror(Perl_form(aTHX_ PL_no_myglob,
8418 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8419
6154021b 8420 pl_yylval.opval = newOP(OP_PADANY, 0);
bc9b26ca
BF
8421 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8422 UTF ? SVf_UTF8 : 0);
8eceec63
SC
8423 return PRIVATEREF;
8424 }
8425 }
8426
8427 /*
8428 build the ops for accesses to a my() variable.
8429
8430 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8431 then used in a comparison. This catches most, but not
8432 all cases. For instance, it catches
8433 sort { my($a); $a <=> $b }
8434 but not
8435 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8436 (although why you'd do that is anyone's guess).
8437 */
8438
9bde8eb0 8439 if (!has_colon) {
8716503d 8440 if (!PL_in_my)
bc9b26ca
BF
8441 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8442 UTF ? SVf_UTF8 : 0);
8716503d 8443 if (tmp != NOT_IN_PAD) {
8eceec63 8444 /* might be an "our" variable" */
00b1698f 8445 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8446 /* build ops for a bareword */
b64e5050
AL
8447 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8448 HEK * const stashname = HvNAME_HEK(stash);
8449 SV * const sym = newSVhek(stashname);
396482e1 8450 sv_catpvs(sym, "::");
2a33114a 8451 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
6154021b
RGS
8452 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8453 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8454 gv_fetchsv(sym,
8eceec63
SC
8455 (PL_in_eval
8456 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8457 : GV_ADDMULTI
8eceec63
SC
8458 ),
8459 ((PL_tokenbuf[0] == '$') ? SVt_PV
8460 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8461 : SVt_PVHV));
8462 return WORD;
8463 }
8464
8465 /* if it's a sort block and they're naming $a or $b */
8466 if (PL_last_lop_op == OP_SORT &&
8467 PL_tokenbuf[0] == '$' &&
8468 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8469 && !PL_tokenbuf[2])
8470 {
8471 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8472 d < PL_bufend && *d != '\n';
8473 d++)
8474 {
8475 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8476 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8477 PL_tokenbuf);
8478 }
8479 }
8480 }
8481
6154021b
RGS
8482 pl_yylval.opval = newOP(OP_PADANY, 0);
8483 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8484 return PRIVATEREF;
8485 }
8486 }
8487
8488 /*
8489 Whine if they've said @foo in a doublequoted string,
8490 and @foo isn't a variable we can find in the symbol
8491 table.
8492 */
d824713b
NC
8493 if (ckWARN(WARN_AMBIGUOUS) &&
8494 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8495 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8496 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8497 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8498 /* DO NOT warn for @- and @+ */
8499 && !( PL_tokenbuf[2] == '\0' &&
8500 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8501 )
8eceec63
SC
8502 {
8503 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8504 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8505 "Possible unintended interpolation of %s in string",
8506 PL_tokenbuf);
8eceec63
SC
8507 }
8508 }
8509
8510 /* build ops for a bareword */
0be4d16f
BF
8511 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8512 tokenbuf_len - 1,
8513 UTF ? SVf_UTF8 : 0 ));
6154021b 8514 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7 8515 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8516 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8517 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8518 ((PL_tokenbuf[0] == '$') ? SVt_PV
8519 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8520 : SVt_PVHV));
8eceec63
SC
8521 return WORD;
8522}
8523
76e3520e 8524STATIC void
c94115d8 8525S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8526{
97aff369 8527 dVAR;
2f3197b3 8528
7918f24d
NC
8529 PERL_ARGS_ASSERT_CHECKCOMMA;
8530
d008e5eb 8531 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8532 if (ckWARN(WARN_SYNTAX)) {
8533 int level = 1;
26ff0806 8534 const char *w;
d008e5eb
GS
8535 for (w = s+2; *w && level; w++) {
8536 if (*w == '(')
8537 ++level;
8538 else if (*w == ')')
8539 --level;
8540 }
888fea98
NC
8541 while (isSPACE(*w))
8542 ++w;
b1439985
RGS
8543 /* the list of chars below is for end of statements or
8544 * block / parens, boolean operators (&&, ||, //) and branch
8545 * constructs (or, and, if, until, unless, while, err, for).
8546 * Not a very solid hack... */
8547 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8548 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8549 "%s (...) interpreted as function",name);
d008e5eb 8550 }
2f3197b3 8551 }
3280af22 8552 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8553 s++;
a687059c
LW
8554 if (*s == '(')
8555 s++;
3280af22 8556 while (s < PL_bufend && isSPACE(*s))
a687059c 8557 s++;
7e2040f0 8558 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8559 const char * const w = s++;
7e2040f0 8560 while (isALNUM_lazy_if(s,UTF))
a687059c 8561 s++;
3280af22 8562 while (s < PL_bufend && isSPACE(*s))
a687059c 8563 s++;
e929a76b 8564 if (*s == ',') {
c94115d8 8565 GV* gv;
5458a98a 8566 if (keyword(w, s - w, 0))
e929a76b 8567 return;
c94115d8 8568
2e38bce1 8569 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 8570 if (gv && GvCVu(gv))
abbb3198 8571 return;
cea2e8a9 8572 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8573 }
8574 }
8575}
8576
423cee85
JH
8577/* Either returns sv, or mortalizes sv and returns a new SV*.
8578 Best used as sv=new_constant(..., sv, ...).
8579 If s, pv are NULL, calls subroutine with one argument,
8580 and type is used with error messages only. */
8581
b3ac6de7 8582STATIC SV *
eb0d8d16
NC
8583S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8584 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8585{
27da23d5 8586 dVAR; dSP;
890ce7af 8587 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8588 SV *res;
b3ac6de7
IZ
8589 SV **cvp;
8590 SV *cv, *typesv;
89e33a05 8591 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8592
7918f24d
NC
8593 PERL_ARGS_ASSERT_NEW_CONSTANT;
8594
f0af216f 8595 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
8596 SV *msg;
8597
10edeb5d
JH
8598 why2 = (const char *)
8599 (strEQ(key,"charnames")
8600 ? "(possibly a missing \"use charnames ...\")"
8601 : "");
4e553d73 8602 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
8603 (type ? type: "undef"), why2);
8604
8605 /* This is convoluted and evil ("goto considered harmful")
8606 * but I do not understand the intricacies of all the different
8607 * failure modes of %^H in here. The goal here is to make
8608 * the most probable error message user-friendly. --jhi */
8609
8610 goto msgdone;
8611
423cee85 8612 report:
4e553d73 8613 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8614 (type ? type: "undef"), why1, why2, why3);
41ab332f 8615 msgdone:
95a20fc0 8616 yyerror(SvPVX_const(msg));
423cee85
JH
8617 SvREFCNT_dec(msg);
8618 return sv;
8619 }
ff3f963a
KW
8620
8621 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
8622 if (PL_error_count > 0 && strEQ(key,"charnames"))
8623 return &PL_sv_undef;
ff3f963a 8624
eb0d8d16 8625 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 8626 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
8627 why1 = "$^H{";
8628 why2 = key;
f0af216f 8629 why3 = "} is not defined";
423cee85 8630 goto report;
b3ac6de7
IZ
8631 }
8632 sv_2mortal(sv); /* Parent created it permanently */
8633 cv = *cvp;
423cee85 8634 if (!pv && s)
59cd0e26 8635 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8636 if (type && pv)
59cd0e26 8637 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8638 else
423cee85 8639 typesv = &PL_sv_undef;
4e553d73 8640
e788e7d3 8641 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8642 ENTER ;
8643 SAVETMPS;
4e553d73 8644
423cee85 8645 PUSHMARK(SP) ;
a5845cb7 8646 EXTEND(sp, 3);
423cee85
JH
8647 if (pv)
8648 PUSHs(pv);
b3ac6de7 8649 PUSHs(sv);
423cee85
JH
8650 if (pv)
8651 PUSHs(typesv);
b3ac6de7 8652 PUTBACK;
423cee85 8653 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8654
423cee85 8655 SPAGAIN ;
4e553d73 8656
423cee85 8657 /* Check the eval first */
9b0e499b 8658 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8659 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8660 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8661 (void)POPs;
b37c2d43 8662 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8663 }
8664 else {
8665 res = POPs;
b37c2d43 8666 SvREFCNT_inc_simple_void(res);
423cee85 8667 }
4e553d73 8668
423cee85
JH
8669 PUTBACK ;
8670 FREETMPS ;
8671 LEAVE ;
b3ac6de7 8672 POPSTACK;
4e553d73 8673
b3ac6de7 8674 if (!SvOK(res)) {
423cee85
JH
8675 why1 = "Call to &{$^H{";
8676 why2 = key;
f0af216f 8677 why3 = "}} did not return a defined value";
423cee85
JH
8678 sv = res;
8679 goto report;
9b0e499b 8680 }
423cee85 8681
9b0e499b 8682 return res;
b3ac6de7 8683}
4e553d73 8684
d0a148a6
NC
8685/* Returns a NUL terminated string, with the length of the string written to
8686 *slp
8687 */
76e3520e 8688STATIC char *
cea2e8a9 8689S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8690{
97aff369 8691 dVAR;
463ee0b2 8692 register char *d = dest;
890ce7af 8693 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8694
8695 PERL_ARGS_ASSERT_SCAN_WORD;
8696
463ee0b2 8697 for (;;) {
8903cb82 8698 if (d >= e)
cea2e8a9 8699 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8700 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8701 *d++ = *s++;
c35e046a 8702 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8703 *d++ = ':';
8704 *d++ = ':';
8705 s++;
8706 }
c35e046a 8707 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8708 *d++ = *s++;
8709 *d++ = *s++;
8710 }
fd400ab9 8711 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8712 char *t = s + UTF8SKIP(s);
c35e046a 8713 size_t len;
fd400ab9 8714 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8715 t += UTF8SKIP(t);
c35e046a
AL
8716 len = t - s;
8717 if (d + len > e)
cea2e8a9 8718 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8719 Copy(s, d, len, char);
8720 d += len;
a0ed51b3
LW
8721 s = t;
8722 }
463ee0b2
LW
8723 else {
8724 *d = '\0';
8725 *slp = d - dest;
8726 return s;
e929a76b 8727 }
378cc40b
LW
8728 }
8729}
8730
76e3520e 8731STATIC char *
f54cb97a 8732S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8733{
97aff369 8734 dVAR;
6136c704 8735 char *bracket = NULL;
748a9306 8736 char funny = *s++;
6136c704 8737 register char *d = dest;
0b3da58d 8738 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8739
7918f24d
NC
8740 PERL_ARGS_ASSERT_SCAN_IDENT;
8741
a0d0e21e 8742 if (isSPACE(*s))
29595ff2 8743 s = PEEKSPACE(s);
de3bb511 8744 if (isDIGIT(*s)) {
8903cb82 8745 while (isDIGIT(*s)) {
8746 if (d >= e)
cea2e8a9 8747 Perl_croak(aTHX_ ident_too_long);
378cc40b 8748 *d++ = *s++;
8903cb82 8749 }
378cc40b
LW
8750 }
8751 else {
463ee0b2 8752 for (;;) {
8903cb82 8753 if (d >= e)
cea2e8a9 8754 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8755 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8756 *d++ = *s++;
7e2040f0 8757 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8758 *d++ = ':';
8759 *d++ = ':';
8760 s++;
8761 }
a0d0e21e 8762 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8763 *d++ = *s++;
8764 *d++ = *s++;
8765 }
fd400ab9 8766 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8767 char *t = s + UTF8SKIP(s);
fd400ab9 8768 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8769 t += UTF8SKIP(t);
8770 if (d + (t - s) > e)
cea2e8a9 8771 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8772 Copy(s, d, t - s, char);
8773 d += t - s;
8774 s = t;
8775 }
463ee0b2
LW
8776 else
8777 break;
8778 }
378cc40b
LW
8779 }
8780 *d = '\0';
8781 d = dest;
79072805 8782 if (*d) {
3280af22
NIS
8783 if (PL_lex_state != LEX_NORMAL)
8784 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8785 return s;
378cc40b 8786 }
748a9306 8787 if (*s == '$' && s[1] &&
3792a11b 8788 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8789 {
4810e5ec 8790 return s;
5cd24f17 8791 }
79072805
LW
8792 if (*s == '{') {
8793 bracket = s;
8794 s++;
8795 }
8796 else if (ck_uni)
8797 check_uni();
204e6232
BF
8798 if (s < send) {
8799 if (UTF) {
8800 const STRLEN skip = UTF8SKIP(s);
8801 STRLEN i;
8802 d[skip] = '\0';
8803 for ( i = 0; i < skip; i++ )
8804 d[i] = *s++;
8805 }
8806 else {
8807 *d = *s++;
8808 d[1] = '\0';
8809 }
8810 }
2b92dfce 8811 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8812 *d = toCTRL(*s);
8813 s++;
de3bb511 8814 }
79072805 8815 if (bracket) {
748a9306 8816 if (isSPACE(s[-1])) {
fa83b5b6 8817 while (s < send) {
f54cb97a 8818 const char ch = *s++;
bf4acbe4 8819 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8820 *d = ch;
8821 break;
8822 }
8823 }
748a9306 8824 }
7e2040f0 8825 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 8826 d += UTF8SKIP(d);
a0ed51b3 8827 if (UTF) {
6136c704
AL
8828 char *end = s;
8829 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8830 end += UTF8SKIP(end);
8831 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8832 end += UTF8SKIP(end);
a0ed51b3 8833 }
6136c704
AL
8834 Copy(s, d, end - s, char);
8835 d += end - s;
8836 s = end;
a0ed51b3
LW
8837 }
8838 else {
2b92dfce 8839 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8840 *d++ = *s++;
2b92dfce 8841 if (d >= e)
cea2e8a9 8842 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8843 }
79072805 8844 *d = '\0';
c35e046a
AL
8845 while (s < send && SPACE_OR_TAB(*s))
8846 s++;
ff68c719 8847 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8848 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8849 const char * const brack =
8850 (const char *)
8851 ((*s == '[') ? "[...]" : "{...}");
e850844c 8852 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8853 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8854 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8855 funny, dest, brack, funny, dest, brack);
8856 }
79072805 8857 bracket++;
a0be28da 8858 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8859 PL_lex_allbrackets++;
79072805
LW
8860 return s;
8861 }
4e553d73
NIS
8862 }
8863 /* Handle extended ${^Foo} variables
2b92dfce
GS
8864 * 1999-02-27 mjd-perl-patch@plover.com */
8865 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8866 && isALNUM(*s))
8867 {
8868 d++;
8869 while (isALNUM(*s) && d < e) {
8870 *d++ = *s++;
8871 }
8872 if (d >= e)
cea2e8a9 8873 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8874 *d = '\0';
79072805
LW
8875 }
8876 if (*s == '}') {
8877 s++;
7df0d042 8878 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8879 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8880 PL_expect = XREF;
8881 }
d008e5eb 8882 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8883 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8884 (keyword(dest, d - dest, 0)
8885 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8886 {
c35e046a
AL
8887 if (funny == '#')
8888 funny = '@';
9014280d 8889 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8890 "Ambiguous use of %c{%s} resolved to %c%s",
8891 funny, dest, funny, dest);
8892 }
8893 }
79072805
LW
8894 }
8895 else {
8896 s = bracket; /* let the parser handle it */
93a17b20 8897 *dest = '\0';
79072805
LW
8898 }
8899 }
3280af22
NIS
8900 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8901 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8902 return s;
8903}
8904
858a358b 8905static bool
3955e1a9 8906S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
8907
8908 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8909 * the parse starting at 's', based on the subset that are valid in this
8910 * context input to this routine in 'valid_flags'. Advances s. Returns
8911 * TRUE if the input was a valid flag, so the next char may be as well;
3955e1a9
KW
8912 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8913 * current regex. This routine will set it to any charset modifier found.
8914 * The caller shouldn't change it. This way, another charset modifier
8915 * encountered in the parse can be detected as an error, as we have decided
8916 * allow only one */
858a358b
KW
8917
8918 const char c = **s;
94b03d7d 8919
858a358b
KW
8920 if (! strchr(valid_flags, c)) {
8921 if (isALNUM(c)) {
94b03d7d 8922 goto deprecate;
858a358b
KW
8923 }
8924 return FALSE;
8925 }
8926
8927 switch (c) {
94b03d7d 8928
858a358b
KW
8929 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8930 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8931 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8932 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8933 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8934 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d
KW
8935 case LOCALE_PAT_MOD:
8936
8937 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8938 * can't be regex modifiers.
8939 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8940 * much as easily done. s///lei, for example, has to mean regex
8941 * modifiers if it's not an error (as does any word character
8942 * following the 'e'). Otherwise, we resolve to the backwards-
8943 * compatible, but less likely 's/// le ...', i.e. as meaning
8944 * less-than-or-equal. The reason it's not likely is that s//
967ac236
KW
8945 * returns a number for code in the field (/r returns a string, but
8946 * that wasn't added until the 5.13 series), and so '<=' should be
8947 * used for comparing, not 'le'. */
94b03d7d
KW
8948 if (*((*s) + 1) == 't') {
8949 goto deprecate;
8950 }
563734a5 8951 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
aeac89a5
KW
8952
8953 /* 'e' is valid only for substitutes, s///e. If it is not
8954 * valid in the current context, then 'm//le' must mean the
8955 * comparison operator, so use the regular deprecation message.
8956 */
8957 if (! strchr(valid_flags, 'e')) {
8958 goto deprecate;
8959 }
94b03d7d
KW
8960 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8961 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8962 return FALSE;
8963 }
3955e1a9
KW
8964 if (*charset) {
8965 goto multiple_charsets;
8966 }
94b03d7d 8967 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 8968 *charset = c;
94b03d7d
KW
8969 break;
8970 case UNICODE_PAT_MOD:
8971 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8972 * 'n' means they can't be regex modifiers */
8973 if (*((*s) + 1) == 'n') {
8974 goto deprecate;
8975 }
3955e1a9
KW
8976 if (*charset) {
8977 goto multiple_charsets;
8978 }
94b03d7d 8979 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 8980 *charset = c;
94b03d7d
KW
8981 break;
8982 case ASCII_RESTRICT_PAT_MOD:
8983 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8984 * can't be regex modifiers */
8985 if (*((*s) + 1) == 'n') {
8986 goto deprecate;
8987 }
ff3f26d2
KW
8988
8989 if (! *charset) {
94b03d7d
KW
8990 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8991 }
ff3f26d2
KW
8992 else {
8993
8994 /* Error if previous modifier wasn't an 'a', but if it was, see
8995 * if, and accept, a second occurrence (only) */
8996 if (*charset != 'a'
8997 || get_regex_charset(*pmfl)
8998 != REGEX_ASCII_RESTRICTED_CHARSET)
8999 {
9000 goto multiple_charsets;
9001 }
9002 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
9003 }
9004 *charset = c;
94b03d7d
KW
9005 break;
9006 case DEPENDS_PAT_MOD:
3955e1a9
KW
9007 if (*charset) {
9008 goto multiple_charsets;
9009 }
94b03d7d 9010 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 9011 *charset = c;
94b03d7d 9012 break;
879d0c72 9013 }
94b03d7d 9014
858a358b
KW
9015 (*s)++;
9016 return TRUE;
94b03d7d
KW
9017
9018 deprecate:
9019 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
9020 "Having no space between pattern and following word is deprecated");
9021 return FALSE;
3955e1a9
KW
9022
9023 multiple_charsets:
9024 if (*charset != c) {
9025 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
9026 }
ff3f26d2
KW
9027 else if (c == 'a') {
9028 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
9029 }
3955e1a9
KW
9030 else {
9031 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
9032 }
9033
9034 /* Pretend that it worked, so will continue processing before dieing */
9035 (*s)++;
9036 return TRUE;
879d0c72
NC
9037}
9038
76e3520e 9039STATIC char *
cea2e8a9 9040S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 9041{
97aff369 9042 dVAR;
79072805 9043 PMOP *pm;
5db06880 9044 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 9045 const char * const valid_flags =
a20207d7 9046 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 9047 char charset = '\0'; /* character set modifier */
5db06880
NC
9048#ifdef PERL_MAD
9049 char *modstart;
9050#endif
9051
7918f24d 9052 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 9053
25c09cbf 9054 if (!s) {
6136c704 9055 const char * const delimiter = skipspace(start);
10edeb5d
JH
9056 Perl_croak(aTHX_
9057 (const char *)
9058 (*delimiter == '?'
9059 ? "Search pattern not terminated or ternary operator parsed as search pattern"
9060 : "Search pattern not terminated" ));
25c09cbf 9061 }
bbce6d69 9062
8782bef2 9063 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
9064 if (PL_multi_open == '?') {
9065 /* This is the only point in the code that sets PMf_ONCE: */
79072805 9066 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
9067
9068 /* Hence it's safe to do this bit of PMOP book-keeping here, which
9069 allows us to restrict the list needed by reset to just the ??
9070 matches. */
9071 assert(type != OP_TRANS);
9072 if (PL_curstash) {
daba3364 9073 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9074 U32 elements;
9075 if (!mg) {
daba3364 9076 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9077 0);
9078 }
9079 elements = mg->mg_len / sizeof(PMOP**);
9080 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9081 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9082 mg->mg_len = elements * sizeof(PMOP**);
9083 PmopSTASH_set(pm,PL_curstash);
9084 }
9085 }
5db06880
NC
9086#ifdef PERL_MAD
9087 modstart = s;
9088#endif
3955e1a9 9089 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9090#ifdef PERL_MAD
9091 if (PL_madskills && modstart != s) {
9092 SV* tmptoken = newSVpvn(modstart, s - modstart);
9093 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9094 }
9095#endif
4ac733c9 9096 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9097 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9098 {
a2a5de95
NC
9099 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9100 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9101 }
9102
3280af22 9103 PL_lex_op = (OP*)pm;
6154021b 9104 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9105 return s;
9106}
9107
76e3520e 9108STATIC char *
cea2e8a9 9109S_scan_subst(pTHX_ char *start)
79072805 9110{
27da23d5 9111 dVAR;
22594288 9112 char *s;
79072805 9113 register PMOP *pm;
4fdae800 9114 I32 first_start;
79072805 9115 I32 es = 0;
3955e1a9 9116 char charset = '\0'; /* character set modifier */
5db06880
NC
9117#ifdef PERL_MAD
9118 char *modstart;
9119#endif
79072805 9120
7918f24d
NC
9121 PERL_ARGS_ASSERT_SCAN_SUBST;
9122
6154021b 9123 pl_yylval.ival = OP_NULL;
79072805 9124
5db06880 9125 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9126
37fd879b 9127 if (!s)
cea2e8a9 9128 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9129
3280af22 9130 if (s[-1] == PL_multi_open)
79072805 9131 s--;
5db06880
NC
9132#ifdef PERL_MAD
9133 if (PL_madskills) {
cd81e915
NC
9134 CURMAD('q', PL_thisopen);
9135 CURMAD('_', PL_thiswhite);
9136 CURMAD('E', PL_thisstuff);
9137 CURMAD('Q', PL_thisclose);
9138 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9139 }
9140#endif
79072805 9141
3280af22 9142 first_start = PL_multi_start;
5db06880 9143 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9144 if (!s) {
37fd879b 9145 if (PL_lex_stuff) {
3280af22 9146 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9147 PL_lex_stuff = NULL;
37fd879b 9148 }
cea2e8a9 9149 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9150 }
3280af22 9151 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9152
79072805 9153 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9154
9155#ifdef PERL_MAD
9156 if (PL_madskills) {
cd81e915
NC
9157 CURMAD('z', PL_thisopen);
9158 CURMAD('R', PL_thisstuff);
9159 CURMAD('Z', PL_thisclose);
5db06880
NC
9160 }
9161 modstart = s;
9162#endif
9163
48c036b1 9164 while (*s) {
a20207d7 9165 if (*s == EXEC_PAT_MOD) {
a687059c 9166 s++;
2f3197b3 9167 es++;
a687059c 9168 }
3955e1a9
KW
9169 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9170 {
48c036b1 9171 break;
aa78b661 9172 }
378cc40b 9173 }
79072805 9174
5db06880
NC
9175#ifdef PERL_MAD
9176 if (PL_madskills) {
9177 if (modstart != s)
9178 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9179 append_madprops(PL_thismad, (OP*)pm, 0);
9180 PL_thismad = 0;
5db06880
NC
9181 }
9182#endif
a2a5de95
NC
9183 if ((pm->op_pmflags & PMf_CONTINUE)) {
9184 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9185 }
9186
79072805 9187 if (es) {
6136c704
AL
9188 SV * const repl = newSVpvs("");
9189
0244c3a4
GS
9190 PL_sublex_info.super_bufptr = s;
9191 PL_sublex_info.super_bufend = PL_bufend;
9192 PL_multi_end = 0;
79072805 9193 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9194 while (es-- > 0) {
9195 if (es)
9196 sv_catpvs(repl, "eval ");
9197 else
9198 sv_catpvs(repl, "do ");
9199 }
6f43d98f 9200 sv_catpvs(repl, "{");
3280af22 9201 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9202 if (strchr(SvPVX(PL_lex_repl), '#'))
9203 sv_catpvs(repl, "\n");
9204 sv_catpvs(repl, "}");
25da4f38 9205 SvEVALED_on(repl);
3280af22
NIS
9206 SvREFCNT_dec(PL_lex_repl);
9207 PL_lex_repl = repl;
378cc40b 9208 }
79072805 9209
3280af22 9210 PL_lex_op = (OP*)pm;
6154021b 9211 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9212 return s;
9213}
9214
76e3520e 9215STATIC char *
cea2e8a9 9216S_scan_trans(pTHX_ char *start)
378cc40b 9217{
97aff369 9218 dVAR;
a0d0e21e 9219 register char* s;
11343788 9220 OP *o;
79072805 9221 short *tbl;
b84c11c8
NC
9222 U8 squash;
9223 U8 del;
9224 U8 complement;
bb16bae8 9225 bool nondestruct = 0;
5db06880
NC
9226#ifdef PERL_MAD
9227 char *modstart;
9228#endif
79072805 9229
7918f24d
NC
9230 PERL_ARGS_ASSERT_SCAN_TRANS;
9231
6154021b 9232 pl_yylval.ival = OP_NULL;
79072805 9233
5db06880 9234 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 9235 if (!s)
cea2e8a9 9236 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9237
3280af22 9238 if (s[-1] == PL_multi_open)
2f3197b3 9239 s--;
5db06880
NC
9240#ifdef PERL_MAD
9241 if (PL_madskills) {
cd81e915
NC
9242 CURMAD('q', PL_thisopen);
9243 CURMAD('_', PL_thiswhite);
9244 CURMAD('E', PL_thisstuff);
9245 CURMAD('Q', PL_thisclose);
9246 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9247 }
9248#endif
2f3197b3 9249
5db06880 9250 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9251 if (!s) {
37fd879b 9252 if (PL_lex_stuff) {
3280af22 9253 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9254 PL_lex_stuff = NULL;
37fd879b 9255 }
cea2e8a9 9256 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9257 }
5db06880 9258 if (PL_madskills) {
cd81e915
NC
9259 CURMAD('z', PL_thisopen);
9260 CURMAD('R', PL_thisstuff);
9261 CURMAD('Z', PL_thisclose);
5db06880 9262 }
79072805 9263
a0ed51b3 9264 complement = del = squash = 0;
5db06880
NC
9265#ifdef PERL_MAD
9266 modstart = s;
9267#endif
7a1e2023
NC
9268 while (1) {
9269 switch (*s) {
9270 case 'c':
79072805 9271 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9272 break;
9273 case 'd':
a0ed51b3 9274 del = OPpTRANS_DELETE;
7a1e2023
NC
9275 break;
9276 case 's':
79072805 9277 squash = OPpTRANS_SQUASH;
7a1e2023 9278 break;
bb16bae8
FC
9279 case 'r':
9280 nondestruct = 1;
9281 break;
7a1e2023
NC
9282 default:
9283 goto no_more;
9284 }
395c3793
LW
9285 s++;
9286 }
7a1e2023 9287 no_more:
8973db79 9288
aa1f7c5b 9289 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9290 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9291 o->op_private &= ~OPpTRANS_ALL;
9292 o->op_private |= del|squash|complement|
7948272d
NIS
9293 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9294 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9295
3280af22 9296 PL_lex_op = o;
bb16bae8 9297 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9298
9299#ifdef PERL_MAD
9300 if (PL_madskills) {
9301 if (modstart != s)
9302 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9303 append_madprops(PL_thismad, o, 0);
9304 PL_thismad = 0;
5db06880
NC
9305 }
9306#endif
9307
79072805
LW
9308 return s;
9309}
9310
76e3520e 9311STATIC char *
cea2e8a9 9312S_scan_heredoc(pTHX_ register char *s)
79072805 9313{
97aff369 9314 dVAR;
79072805
LW
9315 SV *herewas;
9316 I32 op_type = OP_SCALAR;
9317 I32 len;
9318 SV *tmpstr;
9319 char term;
73d840c0 9320 const char *found_newline;
79072805 9321 register char *d;
fc36a67e 9322 register char *e;
4633a7c4 9323 char *peek;
60d63348
FC
9324 const int outer = (PL_rsfp || PL_parser->filtered)
9325 && !(PL_lex_inwhat == OP_SCALAR);
5db06880
NC
9326#ifdef PERL_MAD
9327 I32 stuffstart = s - SvPVX(PL_linestr);
9328 char *tstart;
9329
cd81e915 9330 PL_realtokenstart = -1;
5db06880 9331#endif
79072805 9332
7918f24d
NC
9333 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9334
79072805 9335 s += 2;
3280af22
NIS
9336 d = PL_tokenbuf;
9337 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9338 if (!outer)
79072805 9339 *d++ = '\n';
c35e046a
AL
9340 peek = s;
9341 while (SPACE_OR_TAB(*peek))
9342 peek++;
3792a11b 9343 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9344 s = peek;
79072805 9345 term = *s++;
3280af22 9346 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9347 d += len;
3280af22 9348 if (s < PL_bufend)
79072805 9349 s++;
79072805
LW
9350 }
9351 else {
9352 if (*s == '\\')
9353 s++, term = '\'';
9354 else
9355 term = '"';
7e2040f0 9356 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9357 deprecate("bare << to mean <<\"\"");
7e2040f0 9358 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9359 if (d < e)
9360 *d++ = *s;
9361 }
9362 }
3280af22 9363 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9364 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9365 *d++ = '\n';
9366 *d = '\0';
3280af22 9367 len = d - PL_tokenbuf;
5db06880
NC
9368
9369#ifdef PERL_MAD
9370 if (PL_madskills) {
9371 tstart = PL_tokenbuf + !outer;
cd81e915 9372 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9373 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9374 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9375 stuffstart = s - SvPVX(PL_linestr);
9376 }
9377#endif
6a27c188 9378#ifndef PERL_STRICT_CR
f63a84b2
LW
9379 d = strchr(s, '\r');
9380 if (d) {
b464bac0 9381 char * const olds = s;
f63a84b2 9382 s = d;
3280af22 9383 while (s < PL_bufend) {
f63a84b2
LW
9384 if (*s == '\r') {
9385 *d++ = '\n';
9386 if (*++s == '\n')
9387 s++;
9388 }
9389 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9390 *d++ = *s++;
9391 s++;
9392 }
9393 else
9394 *d++ = *s++;
9395 }
9396 *d = '\0';
3280af22 9397 PL_bufend = d;
95a20fc0 9398 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9399 s = olds;
9400 }
9401#endif
5db06880
NC
9402#ifdef PERL_MAD
9403 found_newline = 0;
9404#endif
10edeb5d 9405 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9406 herewas = newSVpvn(s,PL_bufend-s);
9407 }
9408 else {
5db06880
NC
9409#ifdef PERL_MAD
9410 herewas = newSVpvn(s-1,found_newline-s+1);
9411#else
73d840c0
AL
9412 s--;
9413 herewas = newSVpvn(s,found_newline-s);
5db06880 9414#endif
73d840c0 9415 }
5db06880
NC
9416#ifdef PERL_MAD
9417 if (PL_madskills) {
9418 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9419 if (PL_thisstuff)
9420 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9421 else
cd81e915 9422 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9423 }
9424#endif
79072805 9425 s += SvCUR(herewas);
748a9306 9426
5db06880
NC
9427#ifdef PERL_MAD
9428 stuffstart = s - SvPVX(PL_linestr);
9429
9430 if (found_newline)
9431 s--;
9432#endif
9433
7d0a29fe
NC
9434 tmpstr = newSV_type(SVt_PVIV);
9435 SvGROW(tmpstr, 80);
748a9306 9436 if (term == '\'') {
79072805 9437 op_type = OP_CONST;
45977657 9438 SvIV_set(tmpstr, -1);
748a9306
LW
9439 }
9440 else if (term == '`') {
79072805 9441 op_type = OP_BACKTICK;
45977657 9442 SvIV_set(tmpstr, '\\');
748a9306 9443 }
79072805
LW
9444
9445 CLINE;
57843af0 9446 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9447 PL_multi_open = PL_multi_close = '<';
9448 term = *PL_tokenbuf;
60d63348
FC
9449 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp
9450 && !PL_parser->filtered) {
6136c704
AL
9451 char * const bufptr = PL_sublex_info.super_bufptr;
9452 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9453 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9454 s = strchr(bufptr, '\n');
9455 if (!s)
9456 s = bufend;
9457 d = s;
9458 while (s < bufend &&
9459 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9460 if (*s++ == '\n')
57843af0 9461 CopLINE_inc(PL_curcop);
0244c3a4
GS
9462 }
9463 if (s >= bufend) {
eb160463 9464 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9465 missingterm(PL_tokenbuf);
9466 }
9467 sv_setpvn(herewas,bufptr,d-bufptr+1);
9468 sv_setpvn(tmpstr,d+1,s-d);
9469 s += len - 1;
9470 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9471 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9472
9473 s = olds;
9474 goto retval;
9475 }
9476 else if (!outer) {
79072805 9477 d = s;
3280af22
NIS
9478 while (s < PL_bufend &&
9479 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9480 if (*s++ == '\n')
57843af0 9481 CopLINE_inc(PL_curcop);
79072805 9482 }
3280af22 9483 if (s >= PL_bufend) {
eb160463 9484 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9485 missingterm(PL_tokenbuf);
79072805
LW
9486 }
9487 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9488#ifdef PERL_MAD
9489 if (PL_madskills) {
cd81e915
NC
9490 if (PL_thisstuff)
9491 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9492 else
cd81e915 9493 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9494 stuffstart = s - SvPVX(PL_linestr);
9495 }
9496#endif
79072805 9497 s += len - 1;
57843af0 9498 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9499
3280af22
NIS
9500 sv_catpvn(herewas,s,PL_bufend-s);
9501 sv_setsv(PL_linestr,herewas);
9502 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9503 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9504 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9505 }
9506 else
76f68e9b 9507 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9508 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9509#ifdef PERL_MAD
9510 if (PL_madskills) {
9511 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9512 if (PL_thisstuff)
9513 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9514 else
cd81e915 9515 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9516 }
9517#endif
f0e67a1d 9518 PL_bufptr = s;
17cc9359 9519 CopLINE_inc(PL_curcop);
f0e67a1d 9520 if (!outer || !lex_next_chunk(0)) {
eb160463 9521 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9522 missingterm(PL_tokenbuf);
79072805 9523 }
17cc9359 9524 CopLINE_dec(PL_curcop);
f0e67a1d 9525 s = PL_bufptr;
5db06880
NC
9526#ifdef PERL_MAD
9527 stuffstart = s - SvPVX(PL_linestr);
9528#endif
57843af0 9529 CopLINE_inc(PL_curcop);
3280af22 9530 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9531 PL_last_lop = PL_last_uni = NULL;
6a27c188 9532#ifndef PERL_STRICT_CR
3280af22 9533 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9534 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9535 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9536 {
3280af22
NIS
9537 PL_bufend[-2] = '\n';
9538 PL_bufend--;
95a20fc0 9539 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9540 }
3280af22
NIS
9541 else if (PL_bufend[-1] == '\r')
9542 PL_bufend[-1] = '\n';
f63a84b2 9543 }
3280af22
NIS
9544 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9545 PL_bufend[-1] = '\n';
f63a84b2 9546#endif
3280af22 9547 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9548 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9549 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9550 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9551 sv_catsv(PL_linestr,herewas);
9552 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9553 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9554 }
9555 else {
3280af22
NIS
9556 s = PL_bufend;
9557 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9558 }
9559 }
79072805 9560 s++;
0244c3a4 9561retval:
57843af0 9562 PL_multi_end = CopLINE(PL_curcop);
79072805 9563 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9564 SvPV_shrink_to_cur(tmpstr);
79072805 9565 }
8990e307 9566 SvREFCNT_dec(herewas);
2f31ce75 9567 if (!IN_BYTES) {
95a20fc0 9568 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9569 SvUTF8_on(tmpstr);
9570 else if (PL_encoding)
9571 sv_recode_to_utf8(tmpstr, PL_encoding);
9572 }
3280af22 9573 PL_lex_stuff = tmpstr;
6154021b 9574 pl_yylval.ival = op_type;
79072805
LW
9575 return s;
9576}
9577
02aa26ce
NT
9578/* scan_inputsymbol
9579 takes: current position in input buffer
9580 returns: new position in input buffer
6154021b 9581 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9582
9583 This code handles:
9584
9585 <> read from ARGV
9586 <FH> read from filehandle
9587 <pkg::FH> read from package qualified filehandle
9588 <pkg'FH> read from package qualified filehandle
9589 <$fh> read from filehandle in $fh
9590 <*.h> filename glob
9591
9592*/
9593
76e3520e 9594STATIC char *
cea2e8a9 9595S_scan_inputsymbol(pTHX_ char *start)
79072805 9596{
97aff369 9597 dVAR;
02aa26ce 9598 register char *s = start; /* current position in buffer */
1b420867 9599 char *end;
79072805 9600 I32 len;
6136c704
AL
9601 char *d = PL_tokenbuf; /* start of temp holding space */
9602 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9603
7918f24d
NC
9604 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9605
1b420867
GS
9606 end = strchr(s, '\n');
9607 if (!end)
9608 end = PL_bufend;
9609 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9610
9611 /* die if we didn't have space for the contents of the <>,
1b420867 9612 or if it didn't end, or if we see a newline
02aa26ce
NT
9613 */
9614
bb7a0f54 9615 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9616 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9617 if (s >= end)
cea2e8a9 9618 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9619
fc36a67e 9620 s++;
02aa26ce
NT
9621
9622 /* check for <$fh>
9623 Remember, only scalar variables are interpreted as filehandles by
9624 this code. Anything more complex (e.g., <$fh{$num}>) will be
9625 treated as a glob() call.
9626 This code makes use of the fact that except for the $ at the front,
9627 a scalar variable and a filehandle look the same.
9628 */
4633a7c4 9629 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9630
9631 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9632 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 9633 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
9634
9635 /* If we've tried to read what we allow filehandles to look like, and
9636 there's still text left, then it must be a glob() and not a getline.
9637 Use scan_str to pull out the stuff between the <> and treat it
9638 as nothing more than a string.
9639 */
9640
3280af22 9641 if (d - PL_tokenbuf != len) {
6154021b 9642 pl_yylval.ival = OP_GLOB;
5db06880 9643 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9644 if (!s)
cea2e8a9 9645 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9646 return s;
9647 }
395c3793 9648 else {
9b3023bc 9649 bool readline_overriden = FALSE;
6136c704 9650 GV *gv_readline;
9b3023bc 9651 GV **gvp;
02aa26ce 9652 /* we're in a filehandle read situation */
3280af22 9653 d = PL_tokenbuf;
02aa26ce
NT
9654
9655 /* turn <> into <ARGV> */
79072805 9656 if (!len)
689badd5 9657 Copy("ARGV",d,5,char);
02aa26ce 9658
9b3023bc 9659 /* Check whether readline() is overriden */
fafc274c 9660 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9661 if ((gv_readline
ba979b31 9662 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9663 ||
017a3ce5 9664 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9665 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9666 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9667 readline_overriden = TRUE;
9668
02aa26ce
NT
9669 /* if <$fh>, create the ops to turn the variable into a
9670 filehandle
9671 */
79072805 9672 if (*d == '$') {
02aa26ce
NT
9673 /* try to find it in the pad for this block, otherwise find
9674 add symbol table ops
9675 */
bc9b26ca 9676 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9677 if (tmp != NOT_IN_PAD) {
00b1698f 9678 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9679 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9680 HEK * const stashname = HvNAME_HEK(stash);
9681 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9682 sv_catpvs(sym, "::");
f558d5af
JH
9683 sv_catpv(sym, d+1);
9684 d = SvPVX(sym);
9685 goto intro_sym;
9686 }
9687 else {
6136c704 9688 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9689 o->op_targ = tmp;
9b3023bc
RGS
9690 PL_lex_op = readline_overriden
9691 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9692 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9693 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9694 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9695 }
a0d0e21e
LW
9696 }
9697 else {
f558d5af
JH
9698 GV *gv;
9699 ++d;
9700intro_sym:
9701 gv = gv_fetchpv(d,
9702 (PL_in_eval
9703 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 9704 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 9705 SVt_PV);
9b3023bc
RGS
9706 PL_lex_op = readline_overriden
9707 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9708 op_append_elem(OP_LIST,
9b3023bc
RGS
9709 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9710 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9711 : (OP*)newUNOP(OP_READLINE, 0,
9712 newUNOP(OP_RV2SV, 0,
9713 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9714 }
7c6fadd6
RGS
9715 if (!readline_overriden)
9716 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9717 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9718 pl_yylval.ival = OP_NULL;
79072805 9719 }
02aa26ce
NT
9720
9721 /* If it's none of the above, it must be a literal filehandle
9722 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9723 else {
25db2ea6 9724 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
9725 PL_lex_op = readline_overriden
9726 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9727 op_append_elem(OP_LIST,
9b3023bc
RGS
9728 newGVOP(OP_GV, 0, gv),
9729 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9730 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9731 pl_yylval.ival = OP_NULL;
79072805
LW
9732 }
9733 }
02aa26ce 9734
79072805
LW
9735 return s;
9736}
9737
02aa26ce
NT
9738
9739/* scan_str
9740 takes: start position in buffer
09bef843
SB
9741 keep_quoted preserve \ on the embedded delimiter(s)
9742 keep_delims preserve the delimiters around the string
02aa26ce
NT
9743 returns: position to continue reading from buffer
9744 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9745 updates the read buffer.
9746
9747 This subroutine pulls a string out of the input. It is called for:
9748 q single quotes q(literal text)
9749 ' single quotes 'literal text'
9750 qq double quotes qq(interpolate $here please)
9751 " double quotes "interpolate $here please"
9752 qx backticks qx(/bin/ls -l)
9753 ` backticks `/bin/ls -l`
9754 qw quote words @EXPORT_OK = qw( func() $spam )
9755 m// regexp match m/this/
9756 s/// regexp substitute s/this/that/
9757 tr/// string transliterate tr/this/that/
9758 y/// string transliterate y/this/that/
9759 ($*@) sub prototypes sub foo ($)
09bef843 9760 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9761 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9762
9763 In most of these cases (all but <>, patterns and transliterate)
9764 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9765 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9766 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9767 calls scan_str().
4e553d73 9768
02aa26ce
NT
9769 It skips whitespace before the string starts, and treats the first
9770 character as the delimiter. If the delimiter is one of ([{< then
9771 the corresponding "close" character )]}> is used as the closing
9772 delimiter. It allows quoting of delimiters, and if the string has
9773 balanced delimiters ([{<>}]) it allows nesting.
9774
37fd879b
HS
9775 On success, the SV with the resulting string is put into lex_stuff or,
9776 if that is already non-NULL, into lex_repl. The second case occurs only
9777 when parsing the RHS of the special constructs s/// and tr/// (y///).
9778 For convenience, the terminating delimiter character is stuffed into
9779 SvIVX of the SV.
02aa26ce
NT
9780*/
9781
76e3520e 9782STATIC char *
09bef843 9783S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9784{
97aff369 9785 dVAR;
02aa26ce 9786 SV *sv; /* scalar value: string */
d3fcec1f 9787 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9788 register char *s = start; /* current position in the buffer */
9789 register char term; /* terminating character */
9790 register char *to; /* current position in the sv's data */
9791 I32 brackets = 1; /* bracket nesting level */
89491803 9792 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9793 I32 termcode; /* terminating char. code */
89ebb4a3 9794 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9795 STRLEN termlen; /* length of terminating string */
0331ef07 9796 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9797#ifdef PERL_MAD
9798 int stuffstart;
9799 char *tstart;
9800#endif
02aa26ce 9801
7918f24d
NC
9802 PERL_ARGS_ASSERT_SCAN_STR;
9803
02aa26ce 9804 /* skip space before the delimiter */
29595ff2
NC
9805 if (isSPACE(*s)) {
9806 s = PEEKSPACE(s);
9807 }
02aa26ce 9808
5db06880 9809#ifdef PERL_MAD
cd81e915
NC
9810 if (PL_realtokenstart >= 0) {
9811 stuffstart = PL_realtokenstart;
9812 PL_realtokenstart = -1;
5db06880
NC
9813 }
9814 else
9815 stuffstart = start - SvPVX(PL_linestr);
9816#endif
02aa26ce 9817 /* mark where we are, in case we need to report errors */
79072805 9818 CLINE;
02aa26ce
NT
9819
9820 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9821 term = *s;
220e2d4e
IH
9822 if (!UTF) {
9823 termcode = termstr[0] = term;
9824 termlen = 1;
9825 }
9826 else {
f3b9ce0f 9827 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9828 Copy(s, termstr, termlen, U8);
9829 if (!UTF8_IS_INVARIANT(term))
9830 has_utf8 = TRUE;
9831 }
b1c7b182 9832
02aa26ce 9833 /* mark where we are */
57843af0 9834 PL_multi_start = CopLINE(PL_curcop);
3280af22 9835 PL_multi_open = term;
02aa26ce
NT
9836
9837 /* find corresponding closing delimiter */
93a17b20 9838 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9839 termcode = termstr[0] = term = tmps[5];
9840
3280af22 9841 PL_multi_close = term;
79072805 9842
561b68a9
SH
9843 /* create a new SV to hold the contents. 79 is the SV's initial length.
9844 What a random number. */
7d0a29fe
NC
9845 sv = newSV_type(SVt_PVIV);
9846 SvGROW(sv, 80);
45977657 9847 SvIV_set(sv, termcode);
a0d0e21e 9848 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9849
9850 /* move past delimiter and try to read a complete string */
09bef843 9851 if (keep_delims)
220e2d4e
IH
9852 sv_catpvn(sv, s, termlen);
9853 s += termlen;
5db06880
NC
9854#ifdef PERL_MAD
9855 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9856 if (!PL_thisopen && !keep_delims) {
9857 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9858 stuffstart = s - SvPVX(PL_linestr);
9859 }
9860#endif
93a17b20 9861 for (;;) {
220e2d4e
IH
9862 if (PL_encoding && !UTF) {
9863 bool cont = TRUE;
9864
9865 while (cont) {
95a20fc0 9866 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9867 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9868 &offset, (char*)termstr, termlen);
6136c704
AL
9869 const char * const ns = SvPVX_const(PL_linestr) + offset;
9870 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9871
9872 for (; s < ns; s++) {
60d63348 9873 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
220e2d4e
IH
9874 CopLINE_inc(PL_curcop);
9875 }
9876 if (!found)
9877 goto read_more_line;
9878 else {
9879 /* handle quoted delimiters */
52327caf 9880 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9881 const char *t;
95a20fc0 9882 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9883 t--;
9884 if ((svlast-1 - t) % 2) {
9885 if (!keep_quoted) {
9886 *(svlast-1) = term;
9887 *svlast = '\0';
9888 SvCUR_set(sv, SvCUR(sv) - 1);
9889 }
9890 continue;
9891 }
9892 }
9893 if (PL_multi_open == PL_multi_close) {
9894 cont = FALSE;
9895 }
9896 else {
f54cb97a
AL
9897 const char *t;
9898 char *w;
0331ef07 9899 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9900 /* At here, all closes are "was quoted" one,
9901 so we don't check PL_multi_close. */
9902 if (*t == '\\') {
9903 if (!keep_quoted && *(t+1) == PL_multi_open)
9904 t++;
9905 else
9906 *w++ = *t++;
9907 }
9908 else if (*t == PL_multi_open)
9909 brackets++;
9910
9911 *w = *t;
9912 }
9913 if (w < t) {
9914 *w++ = term;
9915 *w = '\0';
95a20fc0 9916 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9917 }
0331ef07 9918 last_off = w - SvPVX(sv);
220e2d4e
IH
9919 if (--brackets <= 0)
9920 cont = FALSE;
9921 }
9922 }
9923 }
9924 if (!keep_delims) {
9925 SvCUR_set(sv, SvCUR(sv) - 1);
9926 *SvEND(sv) = '\0';
9927 }
9928 break;
9929 }
9930
02aa26ce 9931 /* extend sv if need be */
3280af22 9932 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9933 /* set 'to' to the next character in the sv's string */
463ee0b2 9934 to = SvPVX(sv)+SvCUR(sv);
09bef843 9935
02aa26ce 9936 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9937 if (PL_multi_open == PL_multi_close) {
9938 for (; s < PL_bufend; s++,to++) {
02aa26ce 9939 /* embedded newlines increment the current line number */
60d63348 9940 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 9941 CopLINE_inc(PL_curcop);
02aa26ce 9942 /* handle quoted delimiters */
3280af22 9943 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9944 if (!keep_quoted && s[1] == term)
a0d0e21e 9945 s++;
02aa26ce 9946 /* any other quotes are simply copied straight through */
a0d0e21e
LW
9947 else
9948 *to++ = *s++;
9949 }
02aa26ce
NT
9950 /* terminate when run out of buffer (the for() condition), or
9951 have found the terminator */
220e2d4e
IH
9952 else if (*s == term) {
9953 if (termlen == 1)
9954 break;
f3b9ce0f 9955 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
9956 break;
9957 }
63cd0674 9958 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9959 has_utf8 = TRUE;
93a17b20
LW
9960 *to = *s;
9961 }
9962 }
02aa26ce
NT
9963
9964 /* if the terminator isn't the same as the start character (e.g.,
9965 matched brackets), we have to allow more in the quoting, and
9966 be prepared for nested brackets.
9967 */
93a17b20 9968 else {
02aa26ce 9969 /* read until we run out of string, or we find the terminator */
3280af22 9970 for (; s < PL_bufend; s++,to++) {
02aa26ce 9971 /* embedded newlines increment the line count */
60d63348 9972 if (*s == '\n' && !PL_rsfp && !PL_parser->filtered)
57843af0 9973 CopLINE_inc(PL_curcop);
02aa26ce 9974 /* backslashes can escape the open or closing characters */
3280af22 9975 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
9976 if (!keep_quoted &&
9977 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
9978 s++;
9979 else
9980 *to++ = *s++;
9981 }
02aa26ce 9982 /* allow nested opens and closes */
3280af22 9983 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 9984 break;
3280af22 9985 else if (*s == PL_multi_open)
93a17b20 9986 brackets++;
63cd0674 9987 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9988 has_utf8 = TRUE;
93a17b20
LW
9989 *to = *s;
9990 }
9991 }
02aa26ce 9992 /* terminate the copied string and update the sv's end-of-string */
93a17b20 9993 *to = '\0';
95a20fc0 9994 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 9995
02aa26ce
NT
9996 /*
9997 * this next chunk reads more into the buffer if we're not done yet
9998 */
9999
b1c7b182
GS
10000 if (s < PL_bufend)
10001 break; /* handle case where we are done yet :-) */
79072805 10002
6a27c188 10003#ifndef PERL_STRICT_CR
95a20fc0 10004 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
10005 if ((to[-2] == '\r' && to[-1] == '\n') ||
10006 (to[-2] == '\n' && to[-1] == '\r'))
10007 {
f63a84b2
LW
10008 to[-2] = '\n';
10009 to--;
95a20fc0 10010 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
10011 }
10012 else if (to[-1] == '\r')
10013 to[-1] = '\n';
10014 }
95a20fc0 10015 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
10016 to[-1] = '\n';
10017#endif
10018
220e2d4e 10019 read_more_line:
02aa26ce
NT
10020 /* if we're out of file, or a read fails, bail and reset the current
10021 line marker so we can report where the unterminated string began
10022 */
5db06880
NC
10023#ifdef PERL_MAD
10024 if (PL_madskills) {
c35e046a 10025 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
10026 if (PL_thisstuff)
10027 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 10028 else
cd81e915 10029 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
10030 }
10031#endif
f0e67a1d
Z
10032 CopLINE_inc(PL_curcop);
10033 PL_bufptr = PL_bufend;
10034 if (!lex_next_chunk(0)) {
c07a80fd 10035 sv_free(sv);
eb160463 10036 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 10037 return NULL;
79072805 10038 }
f0e67a1d 10039 s = PL_bufptr;
5db06880
NC
10040#ifdef PERL_MAD
10041 stuffstart = 0;
10042#endif
378cc40b 10043 }
4e553d73 10044
02aa26ce
NT
10045 /* at this point, we have successfully read the delimited string */
10046
220e2d4e 10047 if (!PL_encoding || UTF) {
5db06880
NC
10048#ifdef PERL_MAD
10049 if (PL_madskills) {
c35e046a 10050 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 10051 const int len = s - tstart;
cd81e915 10052 if (PL_thisstuff)
c35e046a 10053 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10054 else
c35e046a 10055 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10056 if (!PL_thisclose && !keep_delims)
10057 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
10058 }
10059#endif
10060
220e2d4e
IH
10061 if (keep_delims)
10062 sv_catpvn(sv, s, termlen);
10063 s += termlen;
10064 }
5db06880
NC
10065#ifdef PERL_MAD
10066 else {
10067 if (PL_madskills) {
c35e046a
AL
10068 char * const tstart = SvPVX(PL_linestr) + stuffstart;
10069 const int len = s - tstart - termlen;
cd81e915 10070 if (PL_thisstuff)
c35e046a 10071 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 10072 else
c35e046a 10073 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
10074 if (!PL_thisclose && !keep_delims)
10075 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10076 }
10077 }
10078#endif
220e2d4e 10079 if (has_utf8 || PL_encoding)
b1c7b182 10080 SvUTF8_on(sv);
d0063567 10081
57843af0 10082 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10083
10084 /* if we allocated too much space, give some back */
93a17b20
LW
10085 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10086 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10087 SvPV_renew(sv, SvLEN(sv));
79072805 10088 }
02aa26ce
NT
10089
10090 /* decide whether this is the first or second quoted string we've read
10091 for this op
10092 */
4e553d73 10093
3280af22
NIS
10094 if (PL_lex_stuff)
10095 PL_lex_repl = sv;
79072805 10096 else
3280af22 10097 PL_lex_stuff = sv;
378cc40b
LW
10098 return s;
10099}
10100
02aa26ce
NT
10101/*
10102 scan_num
10103 takes: pointer to position in buffer
10104 returns: pointer to new position in buffer
6154021b 10105 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10106
10107 Read a number in any of the formats that Perl accepts:
10108
7fd134d9
JH
10109 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10110 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10111 0b[01](_?[01])*
10112 0[0-7](_?[0-7])*
10113 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10114
3280af22 10115 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10116 thing it reads.
10117
10118 If it reads a number without a decimal point or an exponent, it will
10119 try converting the number to an integer and see if it can do so
10120 without loss of precision.
10121*/
4e553d73 10122
378cc40b 10123char *
bfed75c6 10124Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10125{
97aff369 10126 dVAR;
bfed75c6 10127 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10128 register char *d; /* destination in temp buffer */
10129 register char *e; /* end of temp buffer */
86554af2 10130 NV nv; /* number read, as a double */
a0714e2c 10131 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10132 bool floatit; /* boolean: int or float? */
cbbf8932 10133 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10134 static char const number_too_long[] = "Number too long";
378cc40b 10135
7918f24d
NC
10136 PERL_ARGS_ASSERT_SCAN_NUM;
10137
02aa26ce
NT
10138 /* We use the first character to decide what type of number this is */
10139
378cc40b 10140 switch (*s) {
79072805 10141 default:
cea2e8a9 10142 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 10143
02aa26ce 10144 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10145 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10146 case '0':
10147 {
02aa26ce
NT
10148 /* variables:
10149 u holds the "number so far"
4f19785b
WSI
10150 shift the power of 2 of the base
10151 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10152 overflowed was the number more than we can hold?
10153
10154 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10155 we in octal/hex/binary?" indicator to disallow hex characters
10156 when in octal mode.
02aa26ce 10157 */
9e24b6e2
JH
10158 NV n = 0.0;
10159 UV u = 0;
79072805 10160 I32 shift;
9e24b6e2 10161 bool overflowed = FALSE;
61f33854 10162 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10163 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10164 static const char* const bases[5] =
10165 { "", "binary", "", "octal", "hexadecimal" };
10166 static const char* const Bases[5] =
10167 { "", "Binary", "", "Octal", "Hexadecimal" };
10168 static const char* const maxima[5] =
10169 { "",
10170 "0b11111111111111111111111111111111",
10171 "",
10172 "037777777777",
10173 "0xffffffff" };
bfed75c6 10174 const char *base, *Base, *max;
378cc40b 10175
02aa26ce 10176 /* check for hex */
a674e8db 10177 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10178 shift = 4;
10179 s += 2;
61f33854 10180 just_zero = FALSE;
a674e8db 10181 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10182 shift = 1;
10183 s += 2;
61f33854 10184 just_zero = FALSE;
378cc40b 10185 }
02aa26ce 10186 /* check for a decimal in disguise */
b78218b7 10187 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10188 goto decimal;
02aa26ce 10189 /* so it must be octal */
928753ea 10190 else {
378cc40b 10191 shift = 3;
928753ea
JH
10192 s++;
10193 }
10194
10195 if (*s == '_') {
a2a5de95 10196 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10197 "Misplaced _ in number");
10198 lastub = s++;
10199 }
9e24b6e2
JH
10200
10201 base = bases[shift];
10202 Base = Bases[shift];
10203 max = maxima[shift];
02aa26ce 10204
4f19785b 10205 /* read the rest of the number */
378cc40b 10206 for (;;) {
9e24b6e2 10207 /* x is used in the overflow test,
893fe2c2 10208 b is the digit we're adding on. */
9e24b6e2 10209 UV x, b;
55497cff 10210
378cc40b 10211 switch (*s) {
02aa26ce
NT
10212
10213 /* if we don't mention it, we're done */
378cc40b
LW
10214 default:
10215 goto out;
02aa26ce 10216
928753ea 10217 /* _ are ignored -- but warned about if consecutive */
de3bb511 10218 case '_':
a2a5de95
NC
10219 if (lastub && s == lastub + 1)
10220 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10221 "Misplaced _ in number");
928753ea 10222 lastub = s++;
de3bb511 10223 break;
02aa26ce
NT
10224
10225 /* 8 and 9 are not octal */
378cc40b 10226 case '8': case '9':
4f19785b 10227 if (shift == 3)
cea2e8a9 10228 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10229 /* FALL THROUGH */
02aa26ce
NT
10230
10231 /* octal digits */
4f19785b 10232 case '2': case '3': case '4':
378cc40b 10233 case '5': case '6': case '7':
4f19785b 10234 if (shift == 1)
cea2e8a9 10235 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10236 /* FALL THROUGH */
10237
10238 case '0': case '1':
02aa26ce 10239 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10240 goto digit;
02aa26ce
NT
10241
10242 /* hex digits */
378cc40b
LW
10243 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10244 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10245 /* make sure they said 0x */
378cc40b
LW
10246 if (shift != 4)
10247 goto out;
55497cff 10248 b = (*s++ & 7) + 9;
02aa26ce
NT
10249
10250 /* Prepare to put the digit we have onto the end
10251 of the number so far. We check for overflows.
10252 */
10253
55497cff 10254 digit:
61f33854 10255 just_zero = FALSE;
9e24b6e2
JH
10256 if (!overflowed) {
10257 x = u << shift; /* make room for the digit */
10258
10259 if ((x >> shift) != u
10260 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10261 overflowed = TRUE;
10262 n = (NV) u;
9b387841
NC
10263 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10264 "Integer overflow in %s number",
10265 base);
9e24b6e2
JH
10266 } else
10267 u = x | b; /* add the digit to the end */
10268 }
10269 if (overflowed) {
10270 n *= nvshift[shift];
10271 /* If an NV has not enough bits in its
10272 * mantissa to represent an UV this summing of
10273 * small low-order numbers is a waste of time
10274 * (because the NV cannot preserve the
10275 * low-order bits anyway): we could just
10276 * remember when did we overflow and in the
10277 * end just multiply n by the right
10278 * amount. */
10279 n += (NV) b;
55497cff 10280 }
378cc40b
LW
10281 break;
10282 }
10283 }
02aa26ce
NT
10284
10285 /* if we get here, we had success: make a scalar value from
10286 the number.
10287 */
378cc40b 10288 out:
928753ea
JH
10289
10290 /* final misplaced underbar check */
10291 if (s[-1] == '_') {
a2a5de95 10292 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10293 }
10294
9e24b6e2 10295 if (overflowed) {
a2a5de95
NC
10296 if (n > 4294967295.0)
10297 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10298 "%s number > %s non-portable",
10299 Base, max);
b081dd7e 10300 sv = newSVnv(n);
9e24b6e2
JH
10301 }
10302 else {
15041a67 10303#if UVSIZE > 4
a2a5de95
NC
10304 if (u > 0xffffffff)
10305 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10306 "%s number > %s non-portable",
10307 Base, max);
2cc4c2dc 10308#endif
b081dd7e 10309 sv = newSVuv(u);
9e24b6e2 10310 }
61f33854 10311 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10312 sv = new_constant(start, s - start, "integer",
eb0d8d16 10313 sv, NULL, NULL, 0);
61f33854 10314 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10315 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10316 }
10317 break;
02aa26ce
NT
10318
10319 /*
10320 handle decimal numbers.
10321 we're also sent here when we read a 0 as the first digit
10322 */
378cc40b
LW
10323 case '1': case '2': case '3': case '4': case '5':
10324 case '6': case '7': case '8': case '9': case '.':
10325 decimal:
3280af22
NIS
10326 d = PL_tokenbuf;
10327 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10328 floatit = FALSE;
02aa26ce
NT
10329
10330 /* read next group of digits and _ and copy into d */
de3bb511 10331 while (isDIGIT(*s) || *s == '_') {
4e553d73 10332 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10333 if -w is on
10334 */
93a17b20 10335 if (*s == '_') {
a2a5de95
NC
10336 if (lastub && s == lastub + 1)
10337 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10338 "Misplaced _ in number");
928753ea 10339 lastub = s++;
93a17b20 10340 }
fc36a67e 10341 else {
02aa26ce 10342 /* check for end of fixed-length buffer */
fc36a67e 10343 if (d >= e)
cea2e8a9 10344 Perl_croak(aTHX_ number_too_long);
02aa26ce 10345 /* if we're ok, copy the character */
378cc40b 10346 *d++ = *s++;
fc36a67e 10347 }
378cc40b 10348 }
02aa26ce
NT
10349
10350 /* final misplaced underbar check */
928753ea 10351 if (lastub && s == lastub + 1) {
a2a5de95 10352 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10353 }
02aa26ce
NT
10354
10355 /* read a decimal portion if there is one. avoid
10356 3..5 being interpreted as the number 3. followed
10357 by .5
10358 */
2f3197b3 10359 if (*s == '.' && s[1] != '.') {
79072805 10360 floatit = TRUE;
378cc40b 10361 *d++ = *s++;
02aa26ce 10362
928753ea 10363 if (*s == '_') {
a2a5de95
NC
10364 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10365 "Misplaced _ in number");
928753ea
JH
10366 lastub = s;
10367 }
10368
10369 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10370 */
fc36a67e 10371 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10372 /* fixed length buffer check */
fc36a67e 10373 if (d >= e)
cea2e8a9 10374 Perl_croak(aTHX_ number_too_long);
928753ea 10375 if (*s == '_') {
a2a5de95
NC
10376 if (lastub && s == lastub + 1)
10377 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10378 "Misplaced _ in number");
928753ea
JH
10379 lastub = s;
10380 }
10381 else
fc36a67e 10382 *d++ = *s;
378cc40b 10383 }
928753ea
JH
10384 /* fractional part ending in underbar? */
10385 if (s[-1] == '_') {
a2a5de95
NC
10386 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10387 "Misplaced _ in number");
928753ea 10388 }
dd629d5b
GS
10389 if (*s == '.' && isDIGIT(s[1])) {
10390 /* oops, it's really a v-string, but without the "v" */
f4758303 10391 s = start;
dd629d5b
GS
10392 goto vstring;
10393 }
378cc40b 10394 }
02aa26ce
NT
10395
10396 /* read exponent part, if present */
3792a11b 10397 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10398 floatit = TRUE;
10399 s++;
02aa26ce
NT
10400
10401 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10402 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10403
7fd134d9
JH
10404 /* stray preinitial _ */
10405 if (*s == '_') {
a2a5de95
NC
10406 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10407 "Misplaced _ in number");
7fd134d9
JH
10408 lastub = s++;
10409 }
10410
02aa26ce 10411 /* allow positive or negative exponent */
378cc40b
LW
10412 if (*s == '+' || *s == '-')
10413 *d++ = *s++;
02aa26ce 10414
7fd134d9
JH
10415 /* stray initial _ */
10416 if (*s == '_') {
a2a5de95
NC
10417 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10418 "Misplaced _ in number");
7fd134d9
JH
10419 lastub = s++;
10420 }
10421
7fd134d9
JH
10422 /* read digits of exponent */
10423 while (isDIGIT(*s) || *s == '_') {
10424 if (isDIGIT(*s)) {
10425 if (d >= e)
10426 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10427 *d++ = *s++;
7fd134d9
JH
10428 }
10429 else {
041457d9 10430 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10431 (!isDIGIT(s[1]) && s[1] != '_')))
10432 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10433 "Misplaced _ in number");
b3b48e3e 10434 lastub = s++;
7fd134d9 10435 }
7fd134d9 10436 }
378cc40b 10437 }
02aa26ce 10438
02aa26ce 10439
0b7fceb9 10440 /*
58bb9ec3
NC
10441 We try to do an integer conversion first if no characters
10442 indicating "float" have been found.
0b7fceb9
MU
10443 */
10444
10445 if (!floatit) {
58bb9ec3 10446 UV uv;
6136c704 10447 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10448
10449 if (flags == IS_NUMBER_IN_UV) {
10450 if (uv <= IV_MAX)
b081dd7e 10451 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10452 else
b081dd7e 10453 sv = newSVuv(uv);
58bb9ec3
NC
10454 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10455 if (uv <= (UV) IV_MIN)
b081dd7e 10456 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10457 else
10458 floatit = TRUE;
10459 } else
10460 floatit = TRUE;
10461 }
0b7fceb9 10462 if (floatit) {
58bb9ec3
NC
10463 /* terminate the string */
10464 *d = '\0';
86554af2 10465 nv = Atof(PL_tokenbuf);
b081dd7e 10466 sv = newSVnv(nv);
86554af2 10467 }
86554af2 10468
eb0d8d16
NC
10469 if ( floatit
10470 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10471 const char *const key = floatit ? "float" : "integer";
10472 const STRLEN keylen = floatit ? 5 : 7;
10473 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10474 key, keylen, sv, NULL, NULL, 0);
10475 }
378cc40b 10476 break;
0b7fceb9 10477
e312add1 10478 /* if it starts with a v, it could be a v-string */
a7cb1f99 10479 case 'v':
dd629d5b 10480vstring:
561b68a9 10481 sv = newSV(5); /* preallocate storage space */
65b06e02 10482 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10483 break;
79072805 10484 }
a687059c 10485
02aa26ce
NT
10486 /* make the op for the constant and return */
10487
a86a20aa 10488 if (sv)
b73d6f50 10489 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10490 else
5f66b61c 10491 lvalp->opval = NULL;
a687059c 10492
73d840c0 10493 return (char *)s;
378cc40b
LW
10494}
10495
76e3520e 10496STATIC char *
cea2e8a9 10497S_scan_formline(pTHX_ register char *s)
378cc40b 10498{
97aff369 10499 dVAR;
79072805 10500 register char *eol;
378cc40b 10501 register char *t;
6136c704 10502 SV * const stuff = newSVpvs("");
79072805 10503 bool needargs = FALSE;
c5ee2135 10504 bool eofmt = FALSE;
5db06880
NC
10505#ifdef PERL_MAD
10506 char *tokenstart = s;
4f61fd4b
JC
10507 SV* savewhite = NULL;
10508
5db06880 10509 if (PL_madskills) {
cd81e915
NC
10510 savewhite = PL_thiswhite;
10511 PL_thiswhite = 0;
5db06880
NC
10512 }
10513#endif
378cc40b 10514
7918f24d
NC
10515 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10516
79072805 10517 while (!needargs) {
a1b95068 10518 if (*s == '.') {
c35e046a 10519 t = s+1;
51882d45 10520#ifdef PERL_STRICT_CR
c35e046a
AL
10521 while (SPACE_OR_TAB(*t))
10522 t++;
51882d45 10523#else
c35e046a
AL
10524 while (SPACE_OR_TAB(*t) || *t == '\r')
10525 t++;
51882d45 10526#endif
c5ee2135
WL
10527 if (*t == '\n' || t == PL_bufend) {
10528 eofmt = TRUE;
79072805 10529 break;
c5ee2135 10530 }
79072805 10531 }
60d63348 10532 if (PL_in_eval && !PL_rsfp && !PL_parser->filtered) {
07409e01 10533 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10534 if (!eol++)
3280af22 10535 eol = PL_bufend;
0f85fab0
LW
10536 }
10537 else
3280af22 10538 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10539 if (*s != '#') {
a0d0e21e
LW
10540 for (t = s; t < eol; t++) {
10541 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10542 needargs = FALSE;
10543 goto enough; /* ~~ must be first line in formline */
378cc40b 10544 }
a0d0e21e
LW
10545 if (*t == '@' || *t == '^')
10546 needargs = TRUE;
378cc40b 10547 }
7121b347
MG
10548 if (eol > s) {
10549 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10550#ifndef PERL_STRICT_CR
7121b347
MG
10551 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10552 char *end = SvPVX(stuff) + SvCUR(stuff);
10553 end[-2] = '\n';
10554 end[-1] = '\0';
b162af07 10555 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10556 }
2dc4c65b 10557#endif
7121b347
MG
10558 }
10559 else
10560 break;
79072805 10561 }
95a20fc0 10562 s = (char*)eol;
60d63348 10563 if (PL_rsfp || PL_parser->filtered) {
f0e67a1d 10564 bool got_some;
5db06880
NC
10565#ifdef PERL_MAD
10566 if (PL_madskills) {
cd81e915
NC
10567 if (PL_thistoken)
10568 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10569 else
cd81e915 10570 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10571 }
10572#endif
f0e67a1d
Z
10573 PL_bufptr = PL_bufend;
10574 CopLINE_inc(PL_curcop);
10575 got_some = lex_next_chunk(0);
10576 CopLINE_dec(PL_curcop);
10577 s = PL_bufptr;
5db06880 10578#ifdef PERL_MAD
f0e67a1d 10579 tokenstart = PL_bufptr;
5db06880 10580#endif
f0e67a1d 10581 if (!got_some)
378cc40b 10582 break;
378cc40b 10583 }
463ee0b2 10584 incline(s);
79072805 10585 }
a0d0e21e
LW
10586 enough:
10587 if (SvCUR(stuff)) {
3280af22 10588 PL_expect = XTERM;
79072805 10589 if (needargs) {
3280af22 10590 PL_lex_state = LEX_NORMAL;
cd81e915 10591 start_force(PL_curforce);
9ded7720 10592 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10593 force_next(',');
10594 }
a0d0e21e 10595 else
3280af22 10596 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10597 if (!IN_BYTES) {
95a20fc0 10598 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10599 SvUTF8_on(stuff);
10600 else if (PL_encoding)
10601 sv_recode_to_utf8(stuff, PL_encoding);
10602 }
cd81e915 10603 start_force(PL_curforce);
9ded7720 10604 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10605 force_next(THING);
cd81e915 10606 start_force(PL_curforce);
9ded7720 10607 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10608 force_next(LSTOP);
378cc40b 10609 }
79072805 10610 else {
8990e307 10611 SvREFCNT_dec(stuff);
c5ee2135
WL
10612 if (eofmt)
10613 PL_lex_formbrack = 0;
3280af22 10614 PL_bufptr = s;
79072805 10615 }
5db06880
NC
10616#ifdef PERL_MAD
10617 if (PL_madskills) {
cd81e915
NC
10618 if (PL_thistoken)
10619 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10620 else
cd81e915
NC
10621 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10622 PL_thiswhite = savewhite;
5db06880
NC
10623 }
10624#endif
79072805 10625 return s;
378cc40b 10626}
a687059c 10627
ba6d6ac9 10628I32
864dbfa3 10629Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10630{
97aff369 10631 dVAR;
a3b680e6 10632 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10633 CV* const outsidecv = PL_compcv;
8990e307 10634
3280af22
NIS
10635 if (PL_compcv) {
10636 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10637 }
7766f137 10638 SAVEI32(PL_subline);
3280af22 10639 save_item(PL_subname);
3280af22 10640 SAVESPTR(PL_compcv);
3280af22 10641
ea726b52 10642 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10643 CvFLAGS(PL_compcv) |= flags;
10644
57843af0 10645 PL_subline = CopLINE(PL_curcop);
dd2155a4 10646 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10647 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10648 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10649
8990e307
LW
10650 return oldsavestack_ix;
10651}
10652
084592ab
CN
10653#ifdef __SC__
10654#pragma segment Perl_yylex
10655#endif
af41e527
NC
10656static int
10657S_yywarn(pTHX_ const char *const s)
8990e307 10658{
97aff369 10659 dVAR;
7918f24d
NC
10660
10661 PERL_ARGS_ASSERT_YYWARN;
10662
faef0170 10663 PL_in_eval |= EVAL_WARNONLY;
748a9306 10664 yyerror(s);
faef0170 10665 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10666 return 0;
8990e307
LW
10667}
10668
10669int
15f169a1 10670Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10671{
97aff369 10672 dVAR;
bfed75c6
AL
10673 const char *where = NULL;
10674 const char *context = NULL;
68dc0745 10675 int contlen = -1;
46fc3d4c 10676 SV *msg;
5912531f 10677 int yychar = PL_parser->yychar;
463ee0b2 10678
7918f24d
NC
10679 PERL_ARGS_ASSERT_YYERROR;
10680
3280af22 10681 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10682 where = "at EOF";
8bcfe651
TM
10683 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10684 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10685 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10686 /*
10687 Only for NetWare:
10688 The code below is removed for NetWare because it abends/crashes on NetWare
10689 when the script has error such as not having the closing quotes like:
10690 if ($var eq "value)
10691 Checking of white spaces is anyway done in NetWare code.
10692 */
10693#ifndef NETWARE
3280af22
NIS
10694 while (isSPACE(*PL_oldoldbufptr))
10695 PL_oldoldbufptr++;
f355267c 10696#endif
3280af22
NIS
10697 context = PL_oldoldbufptr;
10698 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10699 }
8bcfe651
TM
10700 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10701 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10702 /*
10703 Only for NetWare:
10704 The code below is removed for NetWare because it abends/crashes on NetWare
10705 when the script has error such as not having the closing quotes like:
10706 if ($var eq "value)
10707 Checking of white spaces is anyway done in NetWare code.
10708 */
10709#ifndef NETWARE
3280af22
NIS
10710 while (isSPACE(*PL_oldbufptr))
10711 PL_oldbufptr++;
f355267c 10712#endif
3280af22
NIS
10713 context = PL_oldbufptr;
10714 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10715 }
10716 else if (yychar > 255)
68dc0745 10717 where = "next token ???";
12fbd33b 10718 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10719 if (PL_lex_state == LEX_NORMAL ||
10720 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10721 where = "at end of line";
3280af22 10722 else if (PL_lex_inpat)
68dc0745 10723 where = "within pattern";
463ee0b2 10724 else
68dc0745 10725 where = "within string";
463ee0b2 10726 }
46fc3d4c 10727 else {
84bafc02 10728 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10729 if (yychar < 32)
cea2e8a9 10730 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10731 else if (isPRINT_LC(yychar)) {
88c9ea1e 10732 const char string = yychar;
5e7aa789
NC
10733 sv_catpvn(where_sv, &string, 1);
10734 }
463ee0b2 10735 else
cea2e8a9 10736 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10737 where = SvPVX_const(where_sv);
463ee0b2 10738 }
46fc3d4c 10739 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10740 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10741 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10742 if (context)
cea2e8a9 10743 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10744 else
cea2e8a9 10745 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10746 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10747 Perl_sv_catpvf(aTHX_ msg,
57def98f 10748 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10749 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10750 PL_multi_end = 0;
a0d0e21e 10751 }
500960a6 10752 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10753 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10754 }
463ee0b2 10755 else
5a844595 10756 qerror(msg);
c7d6bfb2
GS
10757 if (PL_error_count >= 10) {
10758 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10759 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10760 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10761 else
10762 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10763 OutCopFILE(PL_curcop));
c7d6bfb2 10764 }
3280af22 10765 PL_in_my = 0;
5c284bb0 10766 PL_in_my_stash = NULL;
463ee0b2
LW
10767 return 0;
10768}
084592ab
CN
10769#ifdef __SC__
10770#pragma segment Main
10771#endif
4e35701f 10772
b250498f 10773STATIC char*
3ae08724 10774S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10775{
97aff369 10776 dVAR;
f54cb97a 10777 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10778
10779 PERL_ARGS_ASSERT_SWALLOW_BOM;
10780
7aa207d6 10781 switch (s[0]) {
4e553d73
NIS
10782 case 0xFF:
10783 if (s[1] == 0xFE) {
ee6ba15d 10784 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10785 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 10786 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10787#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10788 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10789 s += 2;
dea0fc0b 10790 if (PL_bufend > (char*)s) {
81a923f4 10791 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10792 }
b250498f 10793#else
ee6ba15d 10794 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10795#endif
01ec43d0
GS
10796 }
10797 break;
78ae23f5 10798 case 0xFE:
7aa207d6 10799 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10800#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10801 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10802 s += 2;
10803 if (PL_bufend > (char *)s) {
81a923f4 10804 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10805 }
b250498f 10806#else
ee6ba15d 10807 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10808#endif
01ec43d0
GS
10809 }
10810 break;
3ae08724
GS
10811 case 0xEF:
10812 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10813 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10814 s += 3; /* UTF-8 */
10815 }
10816 break;
10817 case 0:
7aa207d6
JH
10818 if (slen > 3) {
10819 if (s[1] == 0) {
10820 if (s[2] == 0xFE && s[3] == 0xFF) {
10821 /* UTF-32 big-endian */
ee6ba15d 10822 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10823 }
10824 }
10825 else if (s[2] == 0 && s[3] != 0) {
10826 /* Leading bytes
10827 * 00 xx 00 xx
10828 * are a good indicator of UTF-16BE. */
ee6ba15d 10829#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10830 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10831 s = add_utf16_textfilter(s, FALSE);
10832#else
10833 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10834#endif
7aa207d6 10835 }
01ec43d0 10836 }
e294cc5d
JH
10837#ifdef EBCDIC
10838 case 0xDD:
10839 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10840 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10841 s += 4; /* UTF-8 */
10842 }
10843 break;
10844#endif
10845
7aa207d6
JH
10846 default:
10847 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10848 /* Leading bytes
10849 * xx 00 xx 00
10850 * are a good indicator of UTF-16LE. */
ee6ba15d 10851#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10852 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10853 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
10854#else
10855 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10856#endif
7aa207d6 10857 }
01ec43d0 10858 }
b8f84bb2 10859 return (char*)s;
b250498f 10860}
4755096e 10861
6e3aabd6
GS
10862
10863#ifndef PERL_NO_UTF16_FILTER
10864static I32
a28af015 10865S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10866{
97aff369 10867 dVAR;
f3040f2c 10868 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10869 /* We re-use this each time round, throwing the contents away before we
10870 return. */
2a773401 10871 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10872 SV *const utf8_buffer = filter;
c28d6105 10873 IV status = IoPAGE(filter);
f2338a2e 10874 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10875 I32 retval;
c8b0cbae 10876
c85ae797
NC
10877 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10878
c8b0cbae
NC
10879 /* As we're automatically added, at the lowest level, and hence only called
10880 from this file, we can be sure that we're not called in block mode. Hence
10881 don't bother writing code to deal with block mode. */
10882 if (maxlen) {
10883 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10884 }
c28d6105
NC
10885 if (status < 0) {
10886 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10887 }
1de9afcd 10888 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10889 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10890 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10891 reverse ? 'l' : 'b', idx, maxlen, status,
10892 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10893
10894 while (1) {
10895 STRLEN chars;
10896 STRLEN have;
dea0fc0b 10897 I32 newlen;
2a773401 10898 U8 *end;
c28d6105
NC
10899 /* First, look in our buffer of existing UTF-8 data: */
10900 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10901
10902 if (nl) {
10903 ++nl;
10904 } else if (status == 0) {
10905 /* EOF */
10906 IoPAGE(filter) = 0;
10907 nl = SvEND(utf8_buffer);
10908 }
10909 if (nl) {
d2d1d4de
NC
10910 STRLEN got = nl - SvPVX(utf8_buffer);
10911 /* Did we have anything to append? */
10912 retval = got != 0;
10913 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10914 /* Everything else in this code works just fine if SVp_POK isn't
10915 set. This, however, needs it, and we need it to work, else
10916 we loop infinitely because the buffer is never consumed. */
10917 sv_chop(utf8_buffer, nl);
10918 break;
10919 }
ba77e4cc 10920
c28d6105
NC
10921 /* OK, not a complete line there, so need to read some more UTF-16.
10922 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10923 while (1) {
10924 if (status <= 0)
10925 break;
10926 if (SvCUR(utf16_buffer) >= 2) {
10927 /* Location of the high octet of the last complete code point.
10928 Gosh, UTF-16 is a pain. All the benefits of variable length,
10929 *coupled* with all the benefits of partial reads and
10930 endianness. */
10931 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10932 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10933
10934 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10935 break;
10936 }
10937
10938 /* We have the first half of a surrogate. Read more. */
10939 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10940 }
c28d6105 10941
c28d6105
NC
10942 status = FILTER_READ(idx + 1, utf16_buffer,
10943 160 + (SvCUR(utf16_buffer) & 1));
10944 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 10945 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
10946 if (status < 0) {
10947 /* Error */
10948 IoPAGE(filter) = status;
10949 return status;
10950 }
10951 }
10952
10953 chars = SvCUR(utf16_buffer) >> 1;
10954 have = SvCUR(utf8_buffer);
10955 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 10956
aa6dbd60 10957 if (reverse) {
c28d6105
NC
10958 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10959 (U8*)SvPVX_const(utf8_buffer) + have,
10960 chars * 2, &newlen);
aa6dbd60 10961 } else {
2a773401 10962 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
10963 (U8*)SvPVX_const(utf8_buffer) + have,
10964 chars * 2, &newlen);
2a773401 10965 }
c28d6105 10966 SvCUR_set(utf8_buffer, have + newlen);
2a773401 10967 *end = '\0';
c28d6105 10968
e07286ed
NC
10969 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10970 it's private to us, and utf16_to_utf8{,reversed} take a
10971 (pointer,length) pair, rather than a NUL-terminated string. */
10972 if(SvCUR(utf16_buffer) & 1) {
10973 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10974 SvCUR_set(utf16_buffer, 1);
10975 } else {
10976 SvCUR_set(utf16_buffer, 0);
10977 }
2a773401 10978 }
c28d6105
NC
10979 DEBUG_P(PerlIO_printf(Perl_debug_log,
10980 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10981 status,
10982 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10983 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 10984 return retval;
6e3aabd6 10985}
81a923f4
NC
10986
10987static U8 *
10988S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10989{
2a773401 10990 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 10991
c85ae797
NC
10992 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10993
c28d6105 10994 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 10995 sv_setpvs(filter, "");
2a773401 10996 IoLINES(filter) = reversed;
c28d6105
NC
10997 IoPAGE(filter) = 1; /* Not EOF */
10998
10999 /* Sadly, we have to return a valid pointer, come what may, so we have to
11000 ignore any error return from this. */
11001 SvCUR_set(PL_linestr, 0);
11002 if (FILTER_READ(0, PL_linestr, 0)) {
11003 SvUTF8_on(PL_linestr);
81a923f4 11004 } else {
c28d6105 11005 SvUTF8_on(PL_linestr);
81a923f4 11006 }
c28d6105 11007 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
11008 return (U8*)SvPVX(PL_linestr);
11009}
6e3aabd6 11010#endif
9f4817db 11011
f333445c
JP
11012/*
11013Returns a pointer to the next character after the parsed
11014vstring, as well as updating the passed in sv.
11015
11016Function must be called like
11017
561b68a9 11018 sv = newSV(5);
65b06e02 11019 s = scan_vstring(s,e,sv);
f333445c 11020
65b06e02 11021where s and e are the start and end of the string.
f333445c
JP
11022The sv should already be large enough to store the vstring
11023passed in, for performance reasons.
11024
11025*/
11026
11027char *
15f169a1 11028Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 11029{
97aff369 11030 dVAR;
bfed75c6
AL
11031 const char *pos = s;
11032 const char *start = s;
7918f24d
NC
11033
11034 PERL_ARGS_ASSERT_SCAN_VSTRING;
11035
f333445c 11036 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 11037 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 11038 pos++;
f333445c
JP
11039 if ( *pos != '.') {
11040 /* this may not be a v-string if followed by => */
bfed75c6 11041 const char *next = pos;
65b06e02 11042 while (next < e && isSPACE(*next))
8fc7bb1c 11043 ++next;
65b06e02 11044 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
11045 /* return string not v-string */
11046 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 11047 return (char *)pos;
f333445c
JP
11048 }
11049 }
11050
11051 if (!isALPHA(*pos)) {
89ebb4a3 11052 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 11053
d4c19fe8
AL
11054 if (*s == 'v')
11055 s++; /* get past 'v' */
f333445c 11056
76f68e9b 11057 sv_setpvs(sv, "");
f333445c
JP
11058
11059 for (;;) {
d4c19fe8 11060 /* this is atoi() that tolerates underscores */
0bd48802
AL
11061 U8 *tmpend;
11062 UV rev = 0;
d4c19fe8
AL
11063 const char *end = pos;
11064 UV mult = 1;
11065 while (--end >= s) {
11066 if (*end != '_') {
11067 const UV orev = rev;
f333445c
JP
11068 rev += (*end - '0') * mult;
11069 mult *= 10;
9b387841
NC
11070 if (orev > rev)
11071 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
11072 "Integer overflow in decimal number");
f333445c
JP
11073 }
11074 }
11075#ifdef EBCDIC
11076 if (rev > 0x7FFFFFFF)
11077 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11078#endif
11079 /* Append native character for the rev point */
11080 tmpend = uvchr_to_utf8(tmpbuf, rev);
11081 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11082 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11083 SvUTF8_on(sv);
65b06e02 11084 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11085 s = ++pos;
11086 else {
11087 s = pos;
11088 break;
11089 }
65b06e02 11090 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11091 pos++;
11092 }
11093 SvPOK_on(sv);
11094 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11095 SvRMAGICAL_on(sv);
11096 }
73d840c0 11097 return (char *)s;
f333445c
JP
11098}
11099
88e1f1a2
JV
11100int
11101Perl_keyword_plugin_standard(pTHX_
11102 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11103{
11104 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11105 PERL_UNUSED_CONTEXT;
11106 PERL_UNUSED_ARG(keyword_ptr);
11107 PERL_UNUSED_ARG(keyword_len);
11108 PERL_UNUSED_ARG(op_ptr);
11109 return KEYWORD_PLUGIN_DECLINE;
11110}
11111
78cdf107 11112#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11113static void
78cdf107 11114S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11115{
11116 SAVEI32(PL_lex_brackets);
11117 if (PL_lex_brackets > 100)
11118 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11119 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11120 SAVEI32(PL_lex_allbrackets);
11121 PL_lex_allbrackets = 0;
11122 SAVEI8(PL_lex_fakeeof);
2dcac756 11123 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11124 if(yyparse(gramtype) && !PL_parser->error_count)
11125 qerror(Perl_mess(aTHX_ "Parse error"));
11126}
11127
78cdf107 11128#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11129static OP *
78cdf107 11130S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11131{
11132 OP *o;
11133 ENTER;
11134 SAVEVPTR(PL_eval_root);
11135 PL_eval_root = NULL;
78cdf107 11136 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11137 o = PL_eval_root;
11138 LEAVE;
11139 return o;
11140}
11141
78cdf107
Z
11142#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11143static OP *
11144S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11145{
11146 OP *exprop;
11147 if (flags & ~PARSE_OPTIONAL)
11148 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11149 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11150 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11151 if (!PL_parser->error_count)
11152 qerror(Perl_mess(aTHX_ "Parse error"));
11153 exprop = newOP(OP_NULL, 0);
11154 }
11155 return exprop;
11156}
11157
11158/*
11159=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11160
11161Parse a Perl arithmetic expression. This may contain operators of precedence
11162down to the bit shift operators. The expression must be followed (and thus
11163terminated) either by a comparison or lower-precedence operator or by
11164something that would normally terminate an expression such as semicolon.
11165If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11166otherwise it is mandatory. It is up to the caller to ensure that the
11167dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11168the source of the code to be parsed and the lexical context for the
11169expression.
11170
11171The op tree representing the expression is returned. If an optional
11172expression is absent, a null pointer is returned, otherwise the pointer
11173will be non-null.
11174
11175If an error occurs in parsing or compilation, in most cases a valid op
11176tree is returned anyway. The error is reflected in the parser state,
11177normally resulting in a single exception at the top level of parsing
11178which covers all the compilation errors that occurred. Some compilation
11179errors, however, will throw an exception immediately.
11180
11181=cut
11182*/
11183
11184OP *
11185Perl_parse_arithexpr(pTHX_ U32 flags)
11186{
11187 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11188}
11189
11190/*
11191=for apidoc Amx|OP *|parse_termexpr|U32 flags
11192
11193Parse a Perl term expression. This may contain operators of precedence
11194down to the assignment operators. The expression must be followed (and thus
11195terminated) either by a comma or lower-precedence operator or by
11196something that would normally terminate an expression such as semicolon.
11197If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11198otherwise it is mandatory. It is up to the caller to ensure that the
11199dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11200the source of the code to be parsed and the lexical context for the
11201expression.
11202
11203The op tree representing the expression is returned. If an optional
11204expression is absent, a null pointer is returned, otherwise the pointer
11205will be non-null.
11206
11207If an error occurs in parsing or compilation, in most cases a valid op
11208tree is returned anyway. The error is reflected in the parser state,
11209normally resulting in a single exception at the top level of parsing
11210which covers all the compilation errors that occurred. Some compilation
11211errors, however, will throw an exception immediately.
11212
11213=cut
11214*/
11215
11216OP *
11217Perl_parse_termexpr(pTHX_ U32 flags)
11218{
11219 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11220}
11221
11222/*
11223=for apidoc Amx|OP *|parse_listexpr|U32 flags
11224
11225Parse a Perl list expression. This may contain operators of precedence
11226down to the comma operator. The expression must be followed (and thus
11227terminated) either by a low-precedence logic operator such as C<or> or by
11228something that would normally terminate an expression such as semicolon.
11229If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11230otherwise it is mandatory. It is up to the caller to ensure that the
11231dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11232the source of the code to be parsed and the lexical context for the
11233expression.
11234
11235The op tree representing the expression is returned. If an optional
11236expression is absent, a null pointer is returned, otherwise the pointer
11237will be non-null.
11238
11239If an error occurs in parsing or compilation, in most cases a valid op
11240tree is returned anyway. The error is reflected in the parser state,
11241normally resulting in a single exception at the top level of parsing
11242which covers all the compilation errors that occurred. Some compilation
11243errors, however, will throw an exception immediately.
11244
11245=cut
11246*/
11247
11248OP *
11249Perl_parse_listexpr(pTHX_ U32 flags)
11250{
11251 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11252}
11253
11254/*
11255=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11256
11257Parse a single complete Perl expression. This allows the full
11258expression grammar, including the lowest-precedence operators such
11259as C<or>. The expression must be followed (and thus terminated) by a
11260token that an expression would normally be terminated by: end-of-file,
11261closing bracketing punctuation, semicolon, or one of the keywords that
11262signals a postfix expression-statement modifier. If I<flags> includes
11263C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11264mandatory. It is up to the caller to ensure that the dynamic parser
11265state (L</PL_parser> et al) is correctly set to reflect the source of
11266the code to be parsed and the lexical context for the expression.
11267
11268The op tree representing the expression is returned. If an optional
11269expression is absent, a null pointer is returned, otherwise the pointer
11270will be non-null.
11271
11272If an error occurs in parsing or compilation, in most cases a valid op
11273tree is returned anyway. The error is reflected in the parser state,
11274normally resulting in a single exception at the top level of parsing
11275which covers all the compilation errors that occurred. Some compilation
11276errors, however, will throw an exception immediately.
11277
11278=cut
11279*/
11280
11281OP *
11282Perl_parse_fullexpr(pTHX_ U32 flags)
11283{
11284 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11285}
11286
e53d8f76
Z
11287/*
11288=for apidoc Amx|OP *|parse_block|U32 flags
11289
11290Parse a single complete Perl code block. This consists of an opening
11291brace, a sequence of statements, and a closing brace. The block
11292constitutes a lexical scope, so C<my> variables and various compile-time
11293effects can be contained within it. It is up to the caller to ensure
11294that the dynamic parser state (L</PL_parser> et al) is correctly set to
11295reflect the source of the code to be parsed and the lexical context for
11296the statement.
11297
11298The op tree representing the code block is returned. This is always a
11299real op, never a null pointer. It will normally be a C<lineseq> list,
11300including C<nextstate> or equivalent ops. No ops to construct any kind
11301of runtime scope are included by virtue of it being a block.
11302
11303If an error occurs in parsing or compilation, in most cases a valid op
11304tree (most likely null) is returned anyway. The error is reflected in
11305the parser state, normally resulting in a single exception at the top
11306level of parsing which covers all the compilation errors that occurred.
11307Some compilation errors, however, will throw an exception immediately.
11308
11309The I<flags> parameter is reserved for future use, and must always
11310be zero.
11311
11312=cut
11313*/
11314
11315OP *
11316Perl_parse_block(pTHX_ U32 flags)
11317{
11318 if (flags)
11319 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11320 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11321}
11322
1da4ca5f 11323/*
8359b381
Z
11324=for apidoc Amx|OP *|parse_barestmt|U32 flags
11325
11326Parse a single unadorned Perl statement. This may be a normal imperative
11327statement or a declaration that has compile-time effect. It does not
11328include any label or other affixture. It is up to the caller to ensure
11329that the dynamic parser state (L</PL_parser> et al) is correctly set to
11330reflect the source of the code to be parsed and the lexical context for
11331the statement.
11332
11333The op tree representing the statement is returned. This may be a
11334null pointer if the statement is null, for example if it was actually
11335a subroutine definition (which has compile-time side effects). If not
11336null, it will be ops directly implementing the statement, suitable to
11337pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11338equivalent op (except for those embedded in a scope contained entirely
11339within the statement).
11340
11341If an error occurs in parsing or compilation, in most cases a valid op
11342tree (most likely null) is returned anyway. The error is reflected in
11343the parser state, normally resulting in a single exception at the top
11344level of parsing which covers all the compilation errors that occurred.
11345Some compilation errors, however, will throw an exception immediately.
11346
11347The I<flags> parameter is reserved for future use, and must always
11348be zero.
11349
11350=cut
11351*/
11352
11353OP *
11354Perl_parse_barestmt(pTHX_ U32 flags)
11355{
11356 if (flags)
11357 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11358 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11359}
11360
11361/*
361d9b55
Z
11362=for apidoc Amx|SV *|parse_label|U32 flags
11363
11364Parse a single label, possibly optional, of the type that may prefix a
11365Perl statement. It is up to the caller to ensure that the dynamic parser
11366state (L</PL_parser> et al) is correctly set to reflect the source of
11367the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11368label is optional, otherwise it is mandatory.
11369
11370The name of the label is returned in the form of a fresh scalar. If an
11371optional label is absent, a null pointer is returned.
11372
11373If an error occurs in parsing, which can only occur if the label is
11374mandatory, a valid label is returned anyway. The error is reflected in
11375the parser state, normally resulting in a single exception at the top
11376level of parsing which covers all the compilation errors that occurred.
11377
11378=cut
11379*/
11380
11381SV *
11382Perl_parse_label(pTHX_ U32 flags)
11383{
11384 if (flags & ~PARSE_OPTIONAL)
11385 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11386 if (PL_lex_state == LEX_KNOWNEXT) {
11387 PL_parser->yychar = yylex();
11388 if (PL_parser->yychar == LABEL) {
11389 char *lpv = pl_yylval.pval;
11390 STRLEN llen = strlen(lpv);
11391 SV *lsv;
11392 PL_parser->yychar = YYEMPTY;
11393 lsv = newSV_type(SVt_PV);
11394 SvPV_set(lsv, lpv);
11395 SvCUR_set(lsv, llen);
11396 SvLEN_set(lsv, llen+1);
11397 SvPOK_on(lsv);
11398 return lsv;
11399 } else {
11400 yyunlex();
11401 goto no_label;
11402 }
11403 } else {
11404 char *s, *t;
11405 U8 c;
11406 STRLEN wlen, bufptr_pos;
11407 lex_read_space(0);
11408 t = s = PL_bufptr;
11409 c = (U8)*s;
11410 if (!isIDFIRST_A(c))
11411 goto no_label;
11412 do {
11413 c = (U8)*++t;
11414 } while(isWORDCHAR_A(c));
11415 wlen = t - s;
11416 if (word_takes_any_delimeter(s, wlen))
11417 goto no_label;
11418 bufptr_pos = s - SvPVX(PL_linestr);
11419 PL_bufptr = t;
11420 lex_read_space(LEX_KEEP_PREVIOUS);
11421 t = PL_bufptr;
11422 s = SvPVX(PL_linestr) + bufptr_pos;
11423 if (t[0] == ':' && t[1] != ':') {
11424 PL_oldoldbufptr = PL_oldbufptr;
11425 PL_oldbufptr = s;
11426 PL_bufptr = t+1;
11427 return newSVpvn(s, wlen);
11428 } else {
11429 PL_bufptr = s;
11430 no_label:
11431 if (flags & PARSE_OPTIONAL) {
11432 return NULL;
11433 } else {
11434 qerror(Perl_mess(aTHX_ "Parse error"));
11435 return newSVpvs("x");
11436 }
11437 }
11438 }
11439}
11440
11441/*
28ac2b49
Z
11442=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11443
11444Parse a single complete Perl statement. This may be a normal imperative
8359b381 11445statement or a declaration that has compile-time effect, and may include
8e720305 11446optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11447parser state (L</PL_parser> et al) is correctly set to reflect the source
11448of the code to be parsed and the lexical context for the statement.
11449
11450The op tree representing the statement is returned. This may be a
11451null pointer if the statement is null, for example if it was actually
11452a subroutine definition (which has compile-time side effects). If not
11453null, it will be the result of a L</newSTATEOP> call, normally including
11454a C<nextstate> or equivalent op.
11455
11456If an error occurs in parsing or compilation, in most cases a valid op
11457tree (most likely null) is returned anyway. The error is reflected in
11458the parser state, normally resulting in a single exception at the top
11459level of parsing which covers all the compilation errors that occurred.
11460Some compilation errors, however, will throw an exception immediately.
11461
11462The I<flags> parameter is reserved for future use, and must always
11463be zero.
11464
11465=cut
11466*/
11467
11468OP *
11469Perl_parse_fullstmt(pTHX_ U32 flags)
11470{
28ac2b49
Z
11471 if (flags)
11472 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11473 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11474}
11475
07ffcb73
Z
11476/*
11477=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11478
11479Parse a sequence of zero or more Perl statements. These may be normal
11480imperative statements, including optional labels, or declarations
11481that have compile-time effect, or any mixture thereof. The statement
11482sequence ends when a closing brace or end-of-file is encountered in a
11483place where a new statement could have validly started. It is up to
11484the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11485is correctly set to reflect the source of the code to be parsed and the
11486lexical context for the statements.
11487
11488The op tree representing the statement sequence is returned. This may
11489be a null pointer if the statements were all null, for example if there
11490were no statements or if there were only subroutine definitions (which
11491have compile-time side effects). If not null, it will be a C<lineseq>
11492list, normally including C<nextstate> or equivalent ops.
11493
11494If an error occurs in parsing or compilation, in most cases a valid op
11495tree is returned anyway. The error is reflected in the parser state,
11496normally resulting in a single exception at the top level of parsing
11497which covers all the compilation errors that occurred. Some compilation
11498errors, however, will throw an exception immediately.
11499
11500The I<flags> parameter is reserved for future use, and must always
11501be zero.
11502
11503=cut
11504*/
11505
11506OP *
11507Perl_parse_stmtseq(pTHX_ U32 flags)
11508{
11509 OP *stmtseqop;
e53d8f76 11510 I32 c;
07ffcb73 11511 if (flags)
78cdf107
Z
11512 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11513 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11514 c = lex_peek_unichar(0);
11515 if (c != -1 && c != /*{*/'}')
07ffcb73 11516 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11517 return stmtseqop;
11518}
11519
ea25a9b2 11520void
f7e3d326 11521Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11522{
f7e3d326 11523 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11524 deprecate("qw(...) as parentheses");
78cdf107 11525 force_next((4<<24)|')');
ea25a9b2
Z
11526 if (qwlist->op_type == OP_STUB) {
11527 op_free(qwlist);
11528 }
11529 else {
3d8e05a0 11530 start_force(PL_curforce);
ea25a9b2
Z
11531 NEXTVAL_NEXTTOKE.opval = qwlist;
11532 force_next(THING);
11533 }
78cdf107 11534 force_next((2<<24)|'(');
ea25a9b2
Z
11535}
11536
28ac2b49 11537/*
1da4ca5f
NC
11538 * Local variables:
11539 * c-indentation-style: bsd
11540 * c-basic-offset: 4
11541 * indent-tabs-mode: t
11542 * End:
11543 *
37442d52
RGS
11544 * ex: set ts=8 sts=4 sw=4 noet:
11545 */