This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Upgrade ExtUtils::ParseXS to 3.05
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
7eb971ee 227 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 228 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
229 * BOop : bitwise or or xor
230 * BAop : bitwise and
231 * SHop : shift operator
232 * PWop : power operator
9cbb5ea2 233 * PMop : pattern-matching operator
ffb4593c
NT
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
e5edeb50 237 * Rop : relational operator <= != gt
ffb4593c
NT
238 *
239 * Also see LOP and lop() below.
240 */
241
998054bd 242#ifdef DEBUGGING /* Serve -DT. */
704d4215 243# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 244#else
bbf60fe6 245# define REPORT(retval) (retval)
998054bd
SC
246#endif
247
bbf60fe6
DM
248#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
255#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 258#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
259#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 269
a687059c
LW
270/* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
a687059c 274 */
376fcdbf 275#define UNI2(f,x) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_expect = x; \
278 PL_bufptr = s; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
29595ff2 283 s = PEEKSPACE(s); \
376fcdbf
AL
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
285 }
6f33ba73
RGS
286#define UNI(f) UNI2(f,XTERM)
287#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 288
376fcdbf 289#define UNIBRACK(f) { \
6154021b 290 pl_yylval.ival = f; \
376fcdbf
AL
291 PL_bufptr = s; \
292 PL_last_uni = PL_oldbufptr; \
293 if (*s == '(') \
294 return REPORT( (int)FUNC1 ); \
29595ff2 295 s = PEEKSPACE(s); \
376fcdbf
AL
296 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
297 }
79072805 298
9f68db38 299/* grandfather return to old style */
78cdf107
Z
300#define OLDLOP(f) \
301 do { \
302 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
303 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
304 pl_yylval.ival = (f); \
305 PL_expect = XTERM; \
306 PL_bufptr = s; \
307 return (int)LSTOP; \
308 } while(0)
79072805 309
8fa7f367
JH
310#ifdef DEBUGGING
311
6154021b 312/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
313enum token_type {
314 TOKENTYPE_NONE,
315 TOKENTYPE_IVAL,
6154021b 316 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
317 TOKENTYPE_PVAL,
318 TOKENTYPE_OPVAL,
319 TOKENTYPE_GVVAL
320};
321
6d4a66ac
NC
322static struct debug_tokens {
323 const int token;
324 enum token_type type;
325 const char *name;
326} const debug_tokens[] =
9041c2e3 327{
bbf60fe6
DM
328 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
329 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
330 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
331 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
332 { ARROW, TOKENTYPE_NONE, "ARROW" },
333 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
334 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
335 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
336 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
337 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 338 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
339 { DO, TOKENTYPE_NONE, "DO" },
340 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
341 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
342 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
343 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
344 { ELSE, TOKENTYPE_NONE, "ELSE" },
345 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
346 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
347 { FOR, TOKENTYPE_IVAL, "FOR" },
348 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_PVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
373 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
374 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
375 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
376 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
377 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
378 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
379 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
380 { PREINC, TOKENTYPE_NONE, "PREINC" },
381 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
385 { SUB, TOKENTYPE_NONE, "SUB" },
386 { THING, TOKENTYPE_OPVAL, "THING" },
387 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
388 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
389 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
390 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
391 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
392 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 393 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
394 { WHILE, TOKENTYPE_IVAL, "WHILE" },
395 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 396 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 397 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
398};
399
6154021b 400/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 401
bbf60fe6 402STATIC int
704d4215 403S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 404{
97aff369 405 dVAR;
7918f24d
NC
406
407 PERL_ARGS_ASSERT_TOKEREPORT;
408
bbf60fe6 409 if (DEBUG_T_TEST) {
bd61b366 410 const char *name = NULL;
bbf60fe6 411 enum token_type type = TOKENTYPE_NONE;
f54cb97a 412 const struct debug_tokens *p;
396482e1 413 SV* const report = newSVpvs("<== ");
bbf60fe6 414
f54cb97a 415 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
416 if (p->token == (int)rv) {
417 name = p->name;
418 type = p->type;
419 break;
420 }
421 }
422 if (name)
54667de8 423 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
424 else if ((char)rv > ' ' && (char)rv < '~')
425 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
426 else if (!rv)
396482e1 427 sv_catpvs(report, "EOF");
bbf60fe6
DM
428 else
429 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
430 switch (type) {
431 case TOKENTYPE_NONE:
432 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
433 break;
434 case TOKENTYPE_IVAL:
704d4215 435 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
436 break;
437 case TOKENTYPE_OPNUM:
438 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 439 PL_op_name[lvalp->ival]);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_PVAL:
704d4215 442 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
443 break;
444 case TOKENTYPE_OPVAL:
704d4215 445 if (lvalp->opval) {
401441c0 446 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
447 PL_op_name[lvalp->opval->op_type]);
448 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 449 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 450 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
451 }
452
453 }
401441c0 454 else
396482e1 455 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
456 break;
457 }
b6007c36 458 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
459 };
460 return (int)rv;
998054bd
SC
461}
462
b6007c36
DM
463
464/* print the buffer with suitable escapes */
465
466STATIC void
15f169a1 467S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 468{
396482e1 469 SV* const tmp = newSVpvs("");
7918f24d
NC
470
471 PERL_ARGS_ASSERT_PRINTBUF;
472
b6007c36
DM
473 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
474 SvREFCNT_dec(tmp);
475}
476
8fa7f367
JH
477#endif
478
8290c323
NC
479static int
480S_deprecate_commaless_var_list(pTHX) {
481 PL_expect = XTERM;
482 deprecate("comma-less variable list");
483 return REPORT(','); /* grandfather non-comma-format format */
484}
485
ffb4593c
NT
486/*
487 * S_ao
488 *
c963b151
BD
489 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
490 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
491 */
492
76e3520e 493STATIC int
cea2e8a9 494S_ao(pTHX_ int toketype)
a0d0e21e 495{
97aff369 496 dVAR;
3280af22
NIS
497 if (*PL_bufptr == '=') {
498 PL_bufptr++;
a0d0e21e 499 if (toketype == ANDAND)
6154021b 500 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 501 else if (toketype == OROR)
6154021b 502 pl_yylval.ival = OP_ORASSIGN;
c963b151 503 else if (toketype == DORDOR)
6154021b 504 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
505 toketype = ASSIGNOP;
506 }
507 return toketype;
508}
509
ffb4593c
NT
510/*
511 * S_no_op
512 * When Perl expects an operator and finds something else, no_op
513 * prints the warning. It always prints "<something> found where
514 * operator expected. It prints "Missing semicolon on previous line?"
515 * if the surprise occurs at the start of the line. "do you need to
516 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
517 * where the compiler doesn't know if foo is a method call or a function.
518 * It prints "Missing operator before end of line" if there's nothing
519 * after the missing operator, or "... before <...>" if there is something
520 * after the missing operator.
521 */
522
76e3520e 523STATIC void
15f169a1 524S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 525{
97aff369 526 dVAR;
9d4ba2ae
AL
527 char * const oldbp = PL_bufptr;
528 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 529
7918f24d
NC
530 PERL_ARGS_ASSERT_NO_OP;
531
1189a94a
GS
532 if (!s)
533 s = oldbp;
07c798fb 534 else
1189a94a 535 PL_bufptr = s;
cea2e8a9 536 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
537 if (ckWARN_d(WARN_SYNTAX)) {
538 if (is_first)
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
540 "\t(Missing semicolon on previous line?)\n");
541 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 542 const char *t;
c35e046a
AL
543 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
544 NOOP;
56da5a46
RGS
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Do you need to predeclare %.*s?)\n",
551405c4 548 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
549 }
550 else {
551 assert(s >= oldbp);
552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 553 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 554 }
07c798fb 555 }
3280af22 556 PL_bufptr = oldbp;
8990e307
LW
557}
558
ffb4593c
NT
559/*
560 * S_missingterm
561 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 562 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
563 * If we're in a delimited string and the delimiter is a control
564 * character, it's reformatted into a two-char sequence like ^C.
565 * This is fatal.
566 */
567
76e3520e 568STATIC void
cea2e8a9 569S_missingterm(pTHX_ char *s)
8990e307 570{
97aff369 571 dVAR;
8990e307
LW
572 char tmpbuf[3];
573 char q;
574 if (s) {
9d4ba2ae 575 char * const nl = strrchr(s,'\n');
d2719217 576 if (nl)
8990e307
LW
577 *nl = '\0';
578 }
463559e7 579 else if (isCNTRL(PL_multi_close)) {
8990e307 580 *tmpbuf = '^';
585ec06d 581 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
582 tmpbuf[2] = '\0';
583 s = tmpbuf;
584 }
585 else {
eb160463 586 *tmpbuf = (char)PL_multi_close;
8990e307
LW
587 tmpbuf[1] = '\0';
588 s = tmpbuf;
589 }
590 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 591 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 592}
79072805 593
0d863452 594/*
0d863452
RH
595 * Check whether the named feature is enabled.
596 */
26ea9e12
NC
597bool
598Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 599{
97aff369 600 dVAR;
0d863452 601 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 602 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
603
604 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
605
26ea9e12
NC
606 if (namelen > MAX_FEATURE_LEN)
607 return FALSE;
4a731d7b 608 memcpy(&he_name[8], name, namelen);
d4c19fe8 609
7b9ef140 610 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
611}
612
ffb4593c 613/*
9cbb5ea2
GS
614 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
615 * utf16-to-utf8-reversed.
ffb4593c
NT
616 */
617
c39cd008
GS
618#ifdef PERL_CR_FILTER
619static void
620strip_return(SV *sv)
621{
95a20fc0 622 register const char *s = SvPVX_const(sv);
9d4ba2ae 623 register const char * const e = s + SvCUR(sv);
7918f24d
NC
624
625 PERL_ARGS_ASSERT_STRIP_RETURN;
626
c39cd008
GS
627 /* outer loop optimized to do nothing if there are no CR-LFs */
628 while (s < e) {
629 if (*s++ == '\r' && *s == '\n') {
630 /* hit a CR-LF, need to copy the rest */
631 register char *d = s - 1;
632 *d++ = *s++;
633 while (s < e) {
634 if (*s == '\r' && s[1] == '\n')
635 s++;
636 *d++ = *s++;
637 }
638 SvCUR(sv) -= s - d;
639 return;
640 }
641 }
642}
a868473f 643
76e3520e 644STATIC I32
c39cd008 645S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 646{
f54cb97a 647 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
648 if (count > 0 && !maxlen)
649 strip_return(sv);
650 return count;
a868473f
NIS
651}
652#endif
653
ffb4593c 654/*
8eaa0acf
Z
655=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
656
657Creates and initialises a new lexer/parser state object, supplying
658a context in which to lex and parse from a new source of Perl code.
659A pointer to the new state object is placed in L</PL_parser>. An entry
660is made on the save stack so that upon unwinding the new state object
661will be destroyed and the former value of L</PL_parser> will be restored.
662Nothing else need be done to clean up the parsing context.
663
664The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
665non-null, provides a string (in SV form) containing code to be parsed.
666A copy of the string is made, so subsequent modification of I<line>
667does not affect parsing. I<rsfp>, if non-null, provides an input stream
668from which code will be read to be parsed. If both are non-null, the
669code in I<line> comes first and must consist of complete lines of input,
670and I<rsfp> supplies the remainder of the source.
671
672The I<flags> parameter is reserved for future use, and must always
27fcb6ee
FC
673be zero, except for one flag that is currently reserved for perl's internal
674use.
8eaa0acf
Z
675
676=cut
677*/
ffb4593c 678
27fcb6ee
FC
679/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
680 can share filters with the current parser. */
681
a0d0e21e 682void
8eaa0acf 683Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 684{
97aff369 685 dVAR;
6ef55633 686 const char *s = NULL;
8990e307 687 STRLEN len;
5486870f 688 yy_parser *parser, *oparser;
27fcb6ee 689 if (flags && flags != LEX_START_SAME_FILTER)
8eaa0acf 690 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
691
692 /* create and initialise a parser */
693
199e78b7 694 Newxz(parser, 1, yy_parser);
5486870f 695 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
696 PL_parser = parser;
697
28ac2b49
Z
698 parser->stack = NULL;
699 parser->ps = NULL;
700 parser->stack_size = 0;
acdf0a21 701
e3abe207
DM
702 /* on scope exit, free this parser and restore any outer one */
703 SAVEPARSER(parser);
7c4baf47 704 parser->saved_curcop = PL_curcop;
e3abe207 705
acdf0a21 706 /* initialise lexer state */
8990e307 707
fb205e7a
DM
708#ifdef PERL_MAD
709 parser->curforce = -1;
710#else
711 parser->nexttoke = 0;
712#endif
ca4cfd28 713 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 714 parser->copline = NOLINE;
5afb0a62 715 parser->lex_state = LEX_NORMAL;
c2598295 716 parser->expect = XSTATE;
2f9285f8 717 parser->rsfp = rsfp;
27fcb6ee
FC
718 parser->rsfp_filters =
719 !(flags & LEX_START_SAME_FILTER) || !oparser
720 ? newAV()
721 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 722
199e78b7
DM
723 Newx(parser->lex_brackstack, 120, char);
724 Newx(parser->lex_casestack, 12, char);
725 *parser->lex_casestack = '\0';
02b34bbe 726
10efb74f
NC
727 if (line) {
728 s = SvPV_const(line, len);
729 } else {
730 len = 0;
731 }
bdc0bf6f 732
10efb74f 733 if (!len) {
bdc0bf6f 734 parser->linestr = newSVpvs("\n;");
805700c1 735 } else {
719a9bb0 736 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 737 if (s[len-1] != ';')
bdc0bf6f 738 sv_catpvs(parser->linestr, "\n;");
8990e307 739 }
f06b5848
DM
740 parser->oldoldbufptr =
741 parser->oldbufptr =
742 parser->bufptr =
743 parser->linestart = SvPVX(parser->linestr);
744 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
745 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
746
747 parser->in_pod = 0;
79072805 748}
a687059c 749
e3abe207
DM
750
751/* delete a parser object */
752
753void
754Perl_parser_free(pTHX_ const yy_parser *parser)
755{
7918f24d
NC
756 PERL_ARGS_ASSERT_PARSER_FREE;
757
7c4baf47 758 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
759 SvREFCNT_dec(parser->linestr);
760
2f9285f8
DM
761 if (parser->rsfp == PerlIO_stdin())
762 PerlIO_clearerr(parser->rsfp);
799361c3
SH
763 else if (parser->rsfp && (!parser->old_parser ||
764 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 765 PerlIO_close(parser->rsfp);
5486870f 766 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 767
e3abe207
DM
768 Safefree(parser->lex_brackstack);
769 Safefree(parser->lex_casestack);
770 PL_parser = parser->old_parser;
771 Safefree(parser);
772}
773
774
ffb4593c 775/*
f0e67a1d
Z
776=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
777
778Buffer scalar containing the chunk currently under consideration of the
779text currently being lexed. This is always a plain string scalar (for
780which C<SvPOK> is true). It is not intended to be used as a scalar by
781normal scalar means; instead refer to the buffer directly by the pointer
782variables described below.
783
784The lexer maintains various C<char*> pointers to things in the
785C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
786reallocated, all of these pointers must be updated. Don't attempt to
787do this manually, but rather use L</lex_grow_linestr> if you need to
788reallocate the buffer.
789
790The content of the text chunk in the buffer is commonly exactly one
791complete line of input, up to and including a newline terminator,
792but there are situations where it is otherwise. The octets of the
793buffer may be intended to be interpreted as either UTF-8 or Latin-1.
794The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
795flag on this scalar, which may disagree with it.
796
797For direct examination of the buffer, the variable
798L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
799lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
800of these pointers is usually preferable to examination of the scalar
801through normal scalar means.
802
803=for apidoc AmxU|char *|PL_parser-E<gt>bufend
804
805Direct pointer to the end of the chunk of text currently being lexed, the
806end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
807+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
808always located at the end of the buffer, and does not count as part of
809the buffer's contents.
810
811=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
812
813Points to the current position of lexing inside the lexer buffer.
814Characters around this point may be freely examined, within
815the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
816L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
817interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
818
819Lexing code (whether in the Perl core or not) moves this pointer past
820the characters that it consumes. It is also expected to perform some
821bookkeeping whenever a newline character is consumed. This movement
822can be more conveniently performed by the function L</lex_read_to>,
823which handles newlines appropriately.
824
825Interpretation of the buffer's octets can be abstracted out by
826using the slightly higher-level functions L</lex_peek_unichar> and
827L</lex_read_unichar>.
828
829=for apidoc AmxU|char *|PL_parser-E<gt>linestart
830
831Points to the start of the current line inside the lexer buffer.
832This is useful for indicating at which column an error occurred, and
833not much else. This must be updated by any lexing code that consumes
834a newline; the function L</lex_read_to> handles this detail.
835
836=cut
837*/
838
839/*
840=for apidoc Amx|bool|lex_bufutf8
841
842Indicates whether the octets in the lexer buffer
843(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
844of Unicode characters. If not, they should be interpreted as Latin-1
845characters. This is analogous to the C<SvUTF8> flag for scalars.
846
847In UTF-8 mode, it is not guaranteed that the lexer buffer actually
848contains valid UTF-8. Lexing code must be robust in the face of invalid
849encoding.
850
851The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
852is significant, but not the whole story regarding the input character
853encoding. Normally, when a file is being read, the scalar contains octets
854and its C<SvUTF8> flag is off, but the octets should be interpreted as
855UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
856however, the scalar may have the C<SvUTF8> flag on, and in this case its
857octets should be interpreted as UTF-8 unless the C<use bytes> pragma
858is in effect. This logic may change in the future; use this function
859instead of implementing the logic yourself.
860
861=cut
862*/
863
864bool
865Perl_lex_bufutf8(pTHX)
866{
867 return UTF;
868}
869
870/*
871=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
872
873Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
874at least I<len> octets (including terminating NUL). Returns a
875pointer to the reallocated buffer. This is necessary before making
876any direct modification of the buffer that would increase its length.
877L</lex_stuff_pvn> provides a more convenient way to insert text into
878the buffer.
879
880Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
881this function updates all of the lexer's variables that point directly
882into the buffer.
883
884=cut
885*/
886
887char *
888Perl_lex_grow_linestr(pTHX_ STRLEN len)
889{
890 SV *linestr;
891 char *buf;
892 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
893 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
894 linestr = PL_parser->linestr;
895 buf = SvPVX(linestr);
896 if (len <= SvLEN(linestr))
897 return buf;
898 bufend_pos = PL_parser->bufend - buf;
899 bufptr_pos = PL_parser->bufptr - buf;
900 oldbufptr_pos = PL_parser->oldbufptr - buf;
901 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
902 linestart_pos = PL_parser->linestart - buf;
903 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
904 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
905 buf = sv_grow(linestr, len);
906 PL_parser->bufend = buf + bufend_pos;
907 PL_parser->bufptr = buf + bufptr_pos;
908 PL_parser->oldbufptr = buf + oldbufptr_pos;
909 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
910 PL_parser->linestart = buf + linestart_pos;
911 if (PL_parser->last_uni)
912 PL_parser->last_uni = buf + last_uni_pos;
913 if (PL_parser->last_lop)
914 PL_parser->last_lop = buf + last_lop_pos;
915 return buf;
916}
917
918/*
83aa740e 919=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
920
921Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
922immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
923reallocating the buffer if necessary. This means that lexing code that
924runs later will see the characters as if they had appeared in the input.
925It is not recommended to do this as part of normal parsing, and most
926uses of this facility run the risk of the inserted characters being
927interpreted in an unintended manner.
928
929The string to be inserted is represented by I<len> octets starting
930at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
931according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
932The characters are recoded for the lexer buffer, according to how the
933buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 934to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
935function is more convenient.
936
937=cut
938*/
939
940void
83aa740e 941Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 942{
749123ff 943 dVAR;
f0e67a1d
Z
944 char *bufptr;
945 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
946 if (flags & ~(LEX_STUFF_UTF8))
947 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
948 if (UTF) {
949 if (flags & LEX_STUFF_UTF8) {
950 goto plain_copy;
951 } else {
952 STRLEN highhalf = 0;
83aa740e 953 const char *p, *e = pv+len;
f0e67a1d
Z
954 for (p = pv; p != e; p++)
955 highhalf += !!(((U8)*p) & 0x80);
956 if (!highhalf)
957 goto plain_copy;
958 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
959 bufptr = PL_parser->bufptr;
960 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
961 SvCUR_set(PL_parser->linestr,
962 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
963 PL_parser->bufend += len+highhalf;
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c & 0x80) {
967 *bufptr++ = (char)(0xc0 | (c >> 6));
968 *bufptr++ = (char)(0x80 | (c & 0x3f));
969 } else {
970 *bufptr++ = (char)c;
971 }
972 }
973 }
974 } else {
975 if (flags & LEX_STUFF_UTF8) {
976 STRLEN highhalf = 0;
83aa740e 977 const char *p, *e = pv+len;
f0e67a1d
Z
978 for (p = pv; p != e; p++) {
979 U8 c = (U8)*p;
980 if (c >= 0xc4) {
981 Perl_croak(aTHX_ "Lexing code attempted to stuff "
982 "non-Latin-1 character into Latin-1 input");
983 } else if (c >= 0xc2 && p+1 != e &&
984 (((U8)p[1]) & 0xc0) == 0x80) {
985 p++;
986 highhalf++;
987 } else if (c >= 0x80) {
988 /* malformed UTF-8 */
989 ENTER;
990 SAVESPTR(PL_warnhook);
991 PL_warnhook = PERL_WARNHOOK_FATAL;
992 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
993 LEAVE;
994 }
995 }
996 if (!highhalf)
997 goto plain_copy;
998 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
999 bufptr = PL_parser->bufptr;
1000 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1001 SvCUR_set(PL_parser->linestr,
1002 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1003 PL_parser->bufend += len-highhalf;
1004 for (p = pv; p != e; p++) {
1005 U8 c = (U8)*p;
1006 if (c & 0x80) {
1007 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008 p++;
1009 } else {
1010 *bufptr++ = (char)c;
1011 }
1012 }
1013 } else {
1014 plain_copy:
1015 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016 bufptr = PL_parser->bufptr;
1017 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1018 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1019 PL_parser->bufend += len;
1020 Copy(pv, bufptr, len, char);
1021 }
1022 }
1023}
1024
1025/*
9dcc53ea
Z
1026=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030reallocating the buffer if necessary. This means that lexing code that
1031runs later will see the characters as if they had appeared in the input.
1032It is not recommended to do this as part of normal parsing, and most
1033uses of this facility run the risk of the inserted characters being
1034interpreted in an unintended manner.
1035
1036The string to be inserted is represented by octets starting at I<pv>
1037and continuing to the first nul. These octets are interpreted as either
1038UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039in I<flags>. The characters are recoded for the lexer buffer, according
1040to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041If it is not convenient to nul-terminate a string to be inserted, the
1042L</lex_stuff_pvn> function is more appropriate.
1043
1044=cut
1045*/
1046
1047void
1048Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049{
1050 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051 lex_stuff_pvn(pv, strlen(pv), flags);
1052}
1053
1054/*
f0e67a1d
Z
1055=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059reallocating the buffer if necessary. This means that lexing code that
1060runs later will see the characters as if they had appeared in the input.
1061It is not recommended to do this as part of normal parsing, and most
1062uses of this facility run the risk of the inserted characters being
1063interpreted in an unintended manner.
1064
1065The string to be inserted is the string value of I<sv>. The characters
1066are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1067being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1068not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069need to construct a scalar.
1070
1071=cut
1072*/
1073
1074void
1075Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076{
1077 char *pv;
1078 STRLEN len;
1079 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080 if (flags)
1081 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082 pv = SvPV(sv, len);
1083 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084}
1085
1086/*
1087=for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1091This hides the discarded text from any lexing code that runs later,
1092as if the text had never appeared.
1093
1094This is not the normal way to consume lexed text. For that, use
1095L</lex_read_to>.
1096
1097=cut
1098*/
1099
1100void
1101Perl_lex_unstuff(pTHX_ char *ptr)
1102{
1103 char *buf, *bufend;
1104 STRLEN unstuff_len;
1105 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106 buf = PL_parser->bufptr;
1107 if (ptr < buf)
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109 if (ptr == buf)
1110 return;
1111 bufend = PL_parser->bufend;
1112 if (ptr > bufend)
1113 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114 unstuff_len = ptr - buf;
1115 Move(ptr, buf, bufend+1-ptr, char);
1116 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117 PL_parser->bufend = bufend - unstuff_len;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_read_to|char *ptr
1122
1123Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125performing the correct bookkeeping whenever a newline character is passed.
1126This is the normal way to consume lexed text.
1127
1128Interpretation of the buffer's octets can be abstracted out by
1129using the slightly higher-level functions L</lex_peek_unichar> and
1130L</lex_read_unichar>.
1131
1132=cut
1133*/
1134
1135void
1136Perl_lex_read_to(pTHX_ char *ptr)
1137{
1138 char *s;
1139 PERL_ARGS_ASSERT_LEX_READ_TO;
1140 s = PL_parser->bufptr;
1141 if (ptr < s || ptr > PL_parser->bufend)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143 for (; s != ptr; s++)
1144 if (*s == '\n') {
1145 CopLINE_inc(PL_curcop);
1146 PL_parser->linestart = s+1;
1147 }
1148 PL_parser->bufptr = ptr;
1149}
1150
1151/*
1152=for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155up to I<ptr>. The remaining content of the buffer will be moved, and
1156all pointers into the buffer updated appropriately. I<ptr> must not
1157be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158it is not permitted to discard text that has yet to be lexed.
1159
1160Normally it is not necessarily to do this directly, because it suffices to
1161use the implicit discarding behaviour of L</lex_next_chunk> and things
1162based on it. However, if a token stretches across multiple lines,
1f317c95 1163and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1164that purpose, then after completion of the token it would be wise to
1165explicitly discard the now-unneeded earlier lines, to avoid future
1166multi-line tokens growing the buffer without bound.
1167
1168=cut
1169*/
1170
1171void
1172Perl_lex_discard_to(pTHX_ char *ptr)
1173{
1174 char *buf;
1175 STRLEN discard_len;
1176 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177 buf = SvPVX(PL_parser->linestr);
1178 if (ptr < buf)
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180 if (ptr == buf)
1181 return;
1182 if (ptr > PL_parser->bufptr)
1183 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184 discard_len = ptr - buf;
1185 if (PL_parser->oldbufptr < ptr)
1186 PL_parser->oldbufptr = ptr;
1187 if (PL_parser->oldoldbufptr < ptr)
1188 PL_parser->oldoldbufptr = ptr;
1189 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190 PL_parser->last_uni = NULL;
1191 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192 PL_parser->last_lop = NULL;
1193 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195 PL_parser->bufend -= discard_len;
1196 PL_parser->bufptr -= discard_len;
1197 PL_parser->oldbufptr -= discard_len;
1198 PL_parser->oldoldbufptr -= discard_len;
1199 if (PL_parser->last_uni)
1200 PL_parser->last_uni -= discard_len;
1201 if (PL_parser->last_lop)
1202 PL_parser->last_lop -= discard_len;
1203}
1204
1205/*
1206=for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208Reads in the next chunk of text to be lexed, appending it to
1209L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1210looked to the end of the current chunk and wants to know more. It is
1211usual, but not necessary, for lexing to have consumed the entirety of
1212the current chunk at this time.
1213
1214If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215chunk (i.e., the current chunk has been entirely consumed), normally the
1216current chunk will be discarded at the same time that the new chunk is
1217read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218will not be discarded. If the current chunk has not been entirely
1219consumed, then it will not be discarded regardless of the flag.
1220
1221Returns true if some new text was added to the buffer, or false if the
1222buffer has reached the end of the input text.
1223
1224=cut
1225*/
1226
1227#define LEX_FAKE_EOF 0x80000000
1228
1229bool
1230Perl_lex_next_chunk(pTHX_ U32 flags)
1231{
1232 SV *linestr;
1233 char *buf;
1234 STRLEN old_bufend_pos, new_bufend_pos;
1235 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1237 bool got_some_for_debugger = 0;
f0e67a1d
Z
1238 bool got_some;
1239 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1241 linestr = PL_parser->linestr;
1242 buf = SvPVX(linestr);
1243 if (!(flags & LEX_KEEP_PREVIOUS) &&
1244 PL_parser->bufptr == PL_parser->bufend) {
1245 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246 linestart_pos = 0;
1247 if (PL_parser->last_uni != PL_parser->bufend)
1248 PL_parser->last_uni = NULL;
1249 if (PL_parser->last_lop != PL_parser->bufend)
1250 PL_parser->last_lop = NULL;
1251 last_uni_pos = last_lop_pos = 0;
1252 *buf = 0;
1253 SvCUR(linestr) = 0;
1254 } else {
1255 old_bufend_pos = PL_parser->bufend - buf;
1256 bufptr_pos = PL_parser->bufptr - buf;
1257 oldbufptr_pos = PL_parser->oldbufptr - buf;
1258 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259 linestart_pos = PL_parser->linestart - buf;
1260 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262 }
1263 if (flags & LEX_FAKE_EOF) {
1264 goto eof;
1265 } else if (!PL_parser->rsfp) {
1266 got_some = 0;
1267 } else if (filter_gets(linestr, old_bufend_pos)) {
1268 got_some = 1;
17cc9359 1269 got_some_for_debugger = 1;
f0e67a1d 1270 } else {
580561a3
Z
1271 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1272 sv_setpvs(linestr, "");
f0e67a1d
Z
1273 eof:
1274 /* End of real input. Close filehandle (unless it was STDIN),
1275 * then add implicit termination.
1276 */
1277 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278 PerlIO_clearerr(PL_parser->rsfp);
1279 else if (PL_parser->rsfp)
1280 (void)PerlIO_close(PL_parser->rsfp);
1281 PL_parser->rsfp = NULL;
737c24fc 1282 PL_parser->in_pod = 0;
f0e67a1d
Z
1283#ifdef PERL_MAD
1284 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285 PL_faketokens = 1;
1286#endif
1287 if (!PL_in_eval && PL_minus_p) {
1288 sv_catpvs(linestr,
1289 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290 PL_minus_n = PL_minus_p = 0;
1291 } else if (!PL_in_eval && PL_minus_n) {
1292 sv_catpvs(linestr, /*{*/";}");
1293 PL_minus_n = 0;
1294 } else
1295 sv_catpvs(linestr, ";");
1296 got_some = 1;
1297 }
1298 buf = SvPVX(linestr);
1299 new_bufend_pos = SvCUR(linestr);
1300 PL_parser->bufend = buf + new_bufend_pos;
1301 PL_parser->bufptr = buf + bufptr_pos;
1302 PL_parser->oldbufptr = buf + oldbufptr_pos;
1303 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304 PL_parser->linestart = buf + linestart_pos;
1305 if (PL_parser->last_uni)
1306 PL_parser->last_uni = buf + last_uni_pos;
1307 if (PL_parser->last_lop)
1308 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1309 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1310 PL_curstash != PL_debstash) {
1311 /* debugger active and we're not compiling the debugger code,
1312 * so store the line into the debugger's array of lines
1313 */
1314 update_debugger_info(NULL, buf+old_bufend_pos,
1315 new_bufend_pos-old_bufend_pos);
1316 }
1317 return got_some;
1318}
1319
1320/*
1321=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323Looks ahead one (Unicode) character in the text currently being lexed.
1324Returns the codepoint (unsigned integer value) of the next character,
1325or -1 if lexing has reached the end of the input text. To consume the
1326peeked character, use L</lex_read_unichar>.
1327
1328If the next character is in (or extends into) the next chunk of input
1329text, the next chunk will be read in. Normally the current chunk will be
1330discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331then the current chunk will not be discarded.
1332
1333If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334is encountered, an exception is generated.
1335
1336=cut
1337*/
1338
1339I32
1340Perl_lex_peek_unichar(pTHX_ U32 flags)
1341{
749123ff 1342 dVAR;
f0e67a1d
Z
1343 char *s, *bufend;
1344 if (flags & ~(LEX_KEEP_PREVIOUS))
1345 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346 s = PL_parser->bufptr;
1347 bufend = PL_parser->bufend;
1348 if (UTF) {
1349 U8 head;
1350 I32 unichar;
1351 STRLEN len, retlen;
1352 if (s == bufend) {
1353 if (!lex_next_chunk(flags))
1354 return -1;
1355 s = PL_parser->bufptr;
1356 bufend = PL_parser->bufend;
1357 }
1358 head = (U8)*s;
1359 if (!(head & 0x80))
1360 return head;
1361 if (head & 0x40) {
1362 len = PL_utf8skip[head];
1363 while ((STRLEN)(bufend-s) < len) {
1364 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365 break;
1366 s = PL_parser->bufptr;
1367 bufend = PL_parser->bufend;
1368 }
1369 }
1370 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371 if (retlen == (STRLEN)-1) {
1372 /* malformed UTF-8 */
1373 ENTER;
1374 SAVESPTR(PL_warnhook);
1375 PL_warnhook = PERL_WARNHOOK_FATAL;
1376 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377 LEAVE;
1378 }
1379 return unichar;
1380 } else {
1381 if (s == bufend) {
1382 if (!lex_next_chunk(flags))
1383 return -1;
1384 s = PL_parser->bufptr;
1385 }
1386 return (U8)*s;
1387 }
1388}
1389
1390/*
1391=for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393Reads the next (Unicode) character in the text currently being lexed.
1394Returns the codepoint (unsigned integer value) of the character read,
1395and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396if lexing has reached the end of the input text. To non-destructively
1397examine the next character, use L</lex_peek_unichar> instead.
1398
1399If the next character is in (or extends into) the next chunk of input
1400text, the next chunk will be read in. Normally the current chunk will be
1401discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402then the current chunk will not be discarded.
1403
1404If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405is encountered, an exception is generated.
1406
1407=cut
1408*/
1409
1410I32
1411Perl_lex_read_unichar(pTHX_ U32 flags)
1412{
1413 I32 c;
1414 if (flags & ~(LEX_KEEP_PREVIOUS))
1415 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416 c = lex_peek_unichar(flags);
1417 if (c != -1) {
1418 if (c == '\n')
1419 CopLINE_inc(PL_curcop);
d9018cbe
EB
1420 if (UTF)
1421 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422 else
1423 ++(PL_parser->bufptr);
f0e67a1d
Z
1424 }
1425 return c;
1426}
1427
1428/*
1429=for apidoc Amx|void|lex_read_space|U32 flags
1430
1431Reads optional spaces, in Perl style, in the text currently being
1432lexed. The spaces may include ordinary whitespace characters and
1433Perl-style comments. C<#line> directives are processed if encountered.
1434L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1435at a non-space character (or the end of the input text).
1436
1437If spaces extend into the next chunk of input text, the next chunk will
1438be read in. Normally the current chunk will be discarded at the same
1439time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1440chunk will not be discarded.
1441
1442=cut
1443*/
1444
f0998909
Z
1445#define LEX_NO_NEXT_CHUNK 0x80000000
1446
f0e67a1d
Z
1447void
1448Perl_lex_read_space(pTHX_ U32 flags)
1449{
1450 char *s, *bufend;
1451 bool need_incline = 0;
f0998909 1452 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1453 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1454#ifdef PERL_MAD
1455 if (PL_skipwhite) {
1456 sv_free(PL_skipwhite);
1457 PL_skipwhite = NULL;
1458 }
1459 if (PL_madskills)
1460 PL_skipwhite = newSVpvs("");
1461#endif /* PERL_MAD */
1462 s = PL_parser->bufptr;
1463 bufend = PL_parser->bufend;
1464 while (1) {
1465 char c = *s;
1466 if (c == '#') {
1467 do {
1468 c = *++s;
1469 } while (!(c == '\n' || (c == 0 && s == bufend)));
1470 } else if (c == '\n') {
1471 s++;
1472 PL_parser->linestart = s;
1473 if (s == bufend)
1474 need_incline = 1;
1475 else
1476 incline(s);
1477 } else if (isSPACE(c)) {
1478 s++;
1479 } else if (c == 0 && s == bufend) {
1480 bool got_more;
1481#ifdef PERL_MAD
1482 if (PL_madskills)
1483 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1484#endif /* PERL_MAD */
f0998909
Z
1485 if (flags & LEX_NO_NEXT_CHUNK)
1486 break;
f0e67a1d
Z
1487 PL_parser->bufptr = s;
1488 CopLINE_inc(PL_curcop);
1489 got_more = lex_next_chunk(flags);
1490 CopLINE_dec(PL_curcop);
1491 s = PL_parser->bufptr;
1492 bufend = PL_parser->bufend;
1493 if (!got_more)
1494 break;
1495 if (need_incline && PL_parser->rsfp) {
1496 incline(s);
1497 need_incline = 0;
1498 }
1499 } else {
1500 break;
1501 }
1502 }
1503#ifdef PERL_MAD
1504 if (PL_madskills)
1505 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1506#endif /* PERL_MAD */
1507 PL_parser->bufptr = s;
1508}
1509
1510/*
ffb4593c
NT
1511 * S_incline
1512 * This subroutine has nothing to do with tilting, whether at windmills
1513 * or pinball tables. Its name is short for "increment line". It
57843af0 1514 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1515 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1516 * # line 500 "foo.pm"
1517 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1518 */
1519
76e3520e 1520STATIC void
d9095cec 1521S_incline(pTHX_ const char *s)
463ee0b2 1522{
97aff369 1523 dVAR;
d9095cec
NC
1524 const char *t;
1525 const char *n;
1526 const char *e;
8818d409 1527 line_t line_num;
463ee0b2 1528
7918f24d
NC
1529 PERL_ARGS_ASSERT_INCLINE;
1530
57843af0 1531 CopLINE_inc(PL_curcop);
463ee0b2
LW
1532 if (*s++ != '#')
1533 return;
d4c19fe8
AL
1534 while (SPACE_OR_TAB(*s))
1535 s++;
73659bf1
GS
1536 if (strnEQ(s, "line", 4))
1537 s += 4;
1538 else
1539 return;
084592ab 1540 if (SPACE_OR_TAB(*s))
73659bf1 1541 s++;
4e553d73 1542 else
73659bf1 1543 return;
d4c19fe8
AL
1544 while (SPACE_OR_TAB(*s))
1545 s++;
463ee0b2
LW
1546 if (!isDIGIT(*s))
1547 return;
d4c19fe8 1548
463ee0b2
LW
1549 n = s;
1550 while (isDIGIT(*s))
1551 s++;
07714eb4 1552 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1553 return;
bf4acbe4 1554 while (SPACE_OR_TAB(*s))
463ee0b2 1555 s++;
73659bf1 1556 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1557 s++;
73659bf1
GS
1558 e = t + 1;
1559 }
463ee0b2 1560 else {
c35e046a
AL
1561 t = s;
1562 while (!isSPACE(*t))
1563 t++;
73659bf1 1564 e = t;
463ee0b2 1565 }
bf4acbe4 1566 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1567 e++;
1568 if (*e != '\n' && *e != '\0')
1569 return; /* false alarm */
1570
8818d409
FC
1571 line_num = atoi(n)-1;
1572
f4dd75d9 1573 if (t - s > 0) {
d9095cec 1574 const STRLEN len = t - s;
19bad673
NC
1575 SV *const temp_sv = CopFILESV(PL_curcop);
1576 const char *cf;
1577 STRLEN tmplen;
1578
1579 if (temp_sv) {
1580 cf = SvPVX(temp_sv);
1581 tmplen = SvCUR(temp_sv);
1582 } else {
1583 cf = NULL;
1584 tmplen = 0;
1585 }
1586
42d9b98d 1587 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1588 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1589 * to *{"::_<newfilename"} */
44867030
NC
1590 /* However, the long form of evals is only turned on by the
1591 debugger - usually they're "(eval %lu)" */
1592 char smallbuf[128];
1593 char *tmpbuf;
1594 GV **gvp;
d9095cec 1595 STRLEN tmplen2 = len;
798b63bc 1596 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1597 tmpbuf = smallbuf;
1598 else
2ae0db35 1599 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1600 tmpbuf[0] = '_';
1601 tmpbuf[1] = '<';
2ae0db35 1602 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1603 tmplen += 2;
8a5ee598
RGS
1604 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1605 if (gvp) {
44867030
NC
1606 char *tmpbuf2;
1607 GV *gv2;
1608
1609 if (tmplen2 + 2 <= sizeof smallbuf)
1610 tmpbuf2 = smallbuf;
1611 else
1612 Newx(tmpbuf2, tmplen2 + 2, char);
1613
1614 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1615 /* Either they malloc'd it, or we malloc'd it,
1616 so no prefix is present in ours. */
1617 tmpbuf2[0] = '_';
1618 tmpbuf2[1] = '<';
1619 }
1620
1621 memcpy(tmpbuf2 + 2, s, tmplen2);
1622 tmplen2 += 2;
1623
8a5ee598 1624 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1625 if (!isGV(gv2)) {
8a5ee598 1626 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1627 /* adjust ${"::_<newfilename"} to store the new file name */
1628 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1629 /* The line number may differ. If that is the case,
1630 alias the saved lines that are in the array.
1631 Otherwise alias the whole array. */
1632 if (CopLINE(PL_curcop) == line_num) {
1633 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1634 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1635 }
1636 else if (GvAV(*gvp)) {
1637 AV * const av = GvAV(*gvp);
1638 const I32 start = CopLINE(PL_curcop)+1;
1639 I32 items = AvFILLp(av) - start;
1640 if (items > 0) {
1641 AV * const av2 = GvAVn(gv2);
1642 SV **svp = AvARRAY(av) + start;
1643 I32 l = (I32)line_num+1;
1644 while (items--)
1645 av_store(av2, l++, SvREFCNT_inc(*svp++));
1646 }
1647 }
e5527e4b 1648 }
44867030
NC
1649
1650 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1651 }
e66cf94c 1652 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1653 }
05ec9bb3 1654 CopFILE_free(PL_curcop);
d9095cec 1655 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1656 }
8818d409 1657 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1658}
1659
29595ff2 1660#ifdef PERL_MAD
cd81e915 1661/* skip space before PL_thistoken */
29595ff2
NC
1662
1663STATIC char *
1664S_skipspace0(pTHX_ register char *s)
1665{
7918f24d
NC
1666 PERL_ARGS_ASSERT_SKIPSPACE0;
1667
29595ff2
NC
1668 s = skipspace(s);
1669 if (!PL_madskills)
1670 return s;
cd81e915
NC
1671 if (PL_skipwhite) {
1672 if (!PL_thiswhite)
6b29d1f5 1673 PL_thiswhite = newSVpvs("");
cd81e915
NC
1674 sv_catsv(PL_thiswhite, PL_skipwhite);
1675 sv_free(PL_skipwhite);
1676 PL_skipwhite = 0;
1677 }
1678 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1679 return s;
1680}
1681
cd81e915 1682/* skip space after PL_thistoken */
29595ff2
NC
1683
1684STATIC char *
1685S_skipspace1(pTHX_ register char *s)
1686{
d4c19fe8 1687 const char *start = s;
29595ff2
NC
1688 I32 startoff = start - SvPVX(PL_linestr);
1689
7918f24d
NC
1690 PERL_ARGS_ASSERT_SKIPSPACE1;
1691
29595ff2
NC
1692 s = skipspace(s);
1693 if (!PL_madskills)
1694 return s;
1695 start = SvPVX(PL_linestr) + startoff;
cd81e915 1696 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1697 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1698 PL_thistoken = newSVpvn(tstart, start - tstart);
1699 }
1700 PL_realtokenstart = -1;
1701 if (PL_skipwhite) {
1702 if (!PL_nextwhite)
6b29d1f5 1703 PL_nextwhite = newSVpvs("");
cd81e915
NC
1704 sv_catsv(PL_nextwhite, PL_skipwhite);
1705 sv_free(PL_skipwhite);
1706 PL_skipwhite = 0;
29595ff2
NC
1707 }
1708 return s;
1709}
1710
1711STATIC char *
1712S_skipspace2(pTHX_ register char *s, SV **svp)
1713{
c35e046a
AL
1714 char *start;
1715 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1716 const I32 startoff = s - SvPVX(PL_linestr);
1717
7918f24d
NC
1718 PERL_ARGS_ASSERT_SKIPSPACE2;
1719
29595ff2
NC
1720 s = skipspace(s);
1721 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1722 if (!PL_madskills || !svp)
1723 return s;
1724 start = SvPVX(PL_linestr) + startoff;
cd81e915 1725 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1726 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1727 PL_thistoken = newSVpvn(tstart, start - tstart);
1728 PL_realtokenstart = -1;
29595ff2 1729 }
cd81e915 1730 if (PL_skipwhite) {
29595ff2 1731 if (!*svp)
6b29d1f5 1732 *svp = newSVpvs("");
cd81e915
NC
1733 sv_setsv(*svp, PL_skipwhite);
1734 sv_free(PL_skipwhite);
1735 PL_skipwhite = 0;
29595ff2
NC
1736 }
1737
1738 return s;
1739}
1740#endif
1741
80a702cd 1742STATIC void
15f169a1 1743S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1744{
1745 AV *av = CopFILEAVx(PL_curcop);
1746 if (av) {
b9f83d2f 1747 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1748 if (orig_sv)
1749 sv_setsv(sv, orig_sv);
1750 else
1751 sv_setpvn(sv, buf, len);
80a702cd
RGS
1752 (void)SvIOK_on(sv);
1753 SvIV_set(sv, 0);
1754 av_store(av, (I32)CopLINE(PL_curcop), sv);
1755 }
1756}
1757
ffb4593c
NT
1758/*
1759 * S_skipspace
1760 * Called to gobble the appropriate amount and type of whitespace.
1761 * Skips comments as well.
1762 */
1763
76e3520e 1764STATIC char *
cea2e8a9 1765S_skipspace(pTHX_ register char *s)
a687059c 1766{
5db06880 1767#ifdef PERL_MAD
f0e67a1d
Z
1768 char *start = s;
1769#endif /* PERL_MAD */
7918f24d 1770 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1771#ifdef PERL_MAD
cd81e915
NC
1772 if (PL_skipwhite) {
1773 sv_free(PL_skipwhite);
f0e67a1d 1774 PL_skipwhite = NULL;
5db06880 1775 }
f0e67a1d 1776#endif /* PERL_MAD */
3280af22 1777 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1778 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1779 s++;
f0e67a1d
Z
1780 } else {
1781 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1782 PL_bufptr = s;
f0998909
Z
1783 lex_read_space(LEX_KEEP_PREVIOUS |
1784 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1785 LEX_NO_NEXT_CHUNK : 0));
3280af22 1786 s = PL_bufptr;
f0e67a1d
Z
1787 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1788 if (PL_linestart > PL_bufptr)
1789 PL_bufptr = PL_linestart;
1790 return s;
463ee0b2 1791 }
5db06880 1792#ifdef PERL_MAD
f0e67a1d
Z
1793 if (PL_madskills)
1794 PL_skipwhite = newSVpvn(start, s-start);
1795#endif /* PERL_MAD */
5db06880 1796 return s;
a687059c 1797}
378cc40b 1798
ffb4593c
NT
1799/*
1800 * S_check_uni
1801 * Check the unary operators to ensure there's no ambiguity in how they're
1802 * used. An ambiguous piece of code would be:
1803 * rand + 5
1804 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1805 * the +5 is its argument.
1806 */
1807
76e3520e 1808STATIC void
cea2e8a9 1809S_check_uni(pTHX)
ba106d47 1810{
97aff369 1811 dVAR;
d4c19fe8
AL
1812 const char *s;
1813 const char *t;
2f3197b3 1814
3280af22 1815 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1816 return;
3280af22
NIS
1817 while (isSPACE(*PL_last_uni))
1818 PL_last_uni++;
c35e046a
AL
1819 s = PL_last_uni;
1820 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1821 s++;
3280af22 1822 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1823 return;
6136c704 1824
9b387841
NC
1825 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1826 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1827 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1828}
1829
ffb4593c
NT
1830/*
1831 * LOP : macro to build a list operator. Its behaviour has been replaced
1832 * with a subroutine, S_lop() for which LOP is just another name.
1833 */
1834
a0d0e21e
LW
1835#define LOP(f,x) return lop(f,x,s)
1836
ffb4593c
NT
1837/*
1838 * S_lop
1839 * Build a list operator (or something that might be one). The rules:
1840 * - if we have a next token, then it's a list operator [why?]
1841 * - if the next thing is an opening paren, then it's a function
1842 * - else it's a list operator
1843 */
1844
76e3520e 1845STATIC I32
a0be28da 1846S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1847{
97aff369 1848 dVAR;
7918f24d
NC
1849
1850 PERL_ARGS_ASSERT_LOP;
1851
6154021b 1852 pl_yylval.ival = f;
35c8bce7 1853 CLINE;
3280af22
NIS
1854 PL_expect = x;
1855 PL_bufptr = s;
1856 PL_last_lop = PL_oldbufptr;
eb160463 1857 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1858#ifdef PERL_MAD
1859 if (PL_lasttoke)
78cdf107 1860 goto lstop;
5db06880 1861#else
3280af22 1862 if (PL_nexttoke)
78cdf107 1863 goto lstop;
5db06880 1864#endif
79072805 1865 if (*s == '(')
bbf60fe6 1866 return REPORT(FUNC);
29595ff2 1867 s = PEEKSPACE(s);
79072805 1868 if (*s == '(')
bbf60fe6 1869 return REPORT(FUNC);
78cdf107
Z
1870 else {
1871 lstop:
1872 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1873 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1874 return REPORT(LSTOP);
78cdf107 1875 }
79072805
LW
1876}
1877
5db06880
NC
1878#ifdef PERL_MAD
1879 /*
1880 * S_start_force
1881 * Sets up for an eventual force_next(). start_force(0) basically does
1882 * an unshift, while start_force(-1) does a push. yylex removes items
1883 * on the "pop" end.
1884 */
1885
1886STATIC void
1887S_start_force(pTHX_ int where)
1888{
1889 int i;
1890
cd81e915 1891 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1892 where = PL_lasttoke;
cd81e915
NC
1893 assert(PL_curforce < 0 || PL_curforce == where);
1894 if (PL_curforce != where) {
5db06880
NC
1895 for (i = PL_lasttoke; i > where; --i) {
1896 PL_nexttoke[i] = PL_nexttoke[i-1];
1897 }
1898 PL_lasttoke++;
1899 }
cd81e915 1900 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1901 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1902 PL_curforce = where;
1903 if (PL_nextwhite) {
5db06880 1904 if (PL_madskills)
6b29d1f5 1905 curmad('^', newSVpvs(""));
cd81e915 1906 CURMAD('_', PL_nextwhite);
5db06880
NC
1907 }
1908}
1909
1910STATIC void
1911S_curmad(pTHX_ char slot, SV *sv)
1912{
1913 MADPROP **where;
1914
1915 if (!sv)
1916 return;
cd81e915
NC
1917 if (PL_curforce < 0)
1918 where = &PL_thismad;
5db06880 1919 else
cd81e915 1920 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1921
cd81e915 1922 if (PL_faketokens)
76f68e9b 1923 sv_setpvs(sv, "");
5db06880
NC
1924 else {
1925 if (!IN_BYTES) {
1926 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1927 SvUTF8_on(sv);
1928 else if (PL_encoding) {
1929 sv_recode_to_utf8(sv, PL_encoding);
1930 }
1931 }
1932 }
1933
1934 /* keep a slot open for the head of the list? */
1935 if (slot != '_' && *where && (*where)->mad_key == '^') {
1936 (*where)->mad_key = slot;
daba3364 1937 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1938 (*where)->mad_val = (void*)sv;
1939 }
1940 else
1941 addmad(newMADsv(slot, sv), where, 0);
1942}
1943#else
b3f24c00
MHM
1944# define start_force(where) NOOP
1945# define curmad(slot, sv) NOOP
5db06880
NC
1946#endif
1947
ffb4593c
NT
1948/*
1949 * S_force_next
9cbb5ea2 1950 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1951 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1952 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1953 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1954 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1955 */
1956
4e553d73 1957STATIC void
cea2e8a9 1958S_force_next(pTHX_ I32 type)
79072805 1959{
97aff369 1960 dVAR;
704d4215
GG
1961#ifdef DEBUGGING
1962 if (DEBUG_T_TEST) {
1963 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1964 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1965 }
1966#endif
5db06880 1967#ifdef PERL_MAD
cd81e915 1968 if (PL_curforce < 0)
5db06880 1969 start_force(PL_lasttoke);
cd81e915 1970 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1971 if (PL_lex_state != LEX_KNOWNEXT)
1972 PL_lex_defer = PL_lex_state;
1973 PL_lex_state = LEX_KNOWNEXT;
1974 PL_lex_expect = PL_expect;
cd81e915 1975 PL_curforce = -1;
5db06880 1976#else
3280af22
NIS
1977 PL_nexttype[PL_nexttoke] = type;
1978 PL_nexttoke++;
1979 if (PL_lex_state != LEX_KNOWNEXT) {
1980 PL_lex_defer = PL_lex_state;
1981 PL_lex_expect = PL_expect;
1982 PL_lex_state = LEX_KNOWNEXT;
79072805 1983 }
5db06880 1984#endif
79072805
LW
1985}
1986
28ac2b49
Z
1987void
1988Perl_yyunlex(pTHX)
1989{
a7aaec61
Z
1990 int yyc = PL_parser->yychar;
1991 if (yyc != YYEMPTY) {
1992 if (yyc) {
1993 start_force(-1);
1994 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1995 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1996 PL_lex_allbrackets--;
a7aaec61 1997 PL_lex_brackets--;
78cdf107
Z
1998 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1999 } else if (yyc == '('/*)*/) {
2000 PL_lex_allbrackets--;
2001 yyc |= (2<<24);
a7aaec61
Z
2002 }
2003 force_next(yyc);
2004 }
28ac2b49
Z
2005 PL_parser->yychar = YYEMPTY;
2006 }
2007}
2008
d0a148a6 2009STATIC SV *
15f169a1 2010S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2011{
97aff369 2012 dVAR;
740cce10 2013 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2014 !IN_BYTES
2015 && UTF
2016 && !is_ascii_string((const U8*)start, len)
740cce10 2017 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2018 return sv;
2019}
2020
ffb4593c
NT
2021/*
2022 * S_force_word
2023 * When the lexer knows the next thing is a word (for instance, it has
2024 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2025 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2026 * lookahead.
ffb4593c
NT
2027 *
2028 * Arguments:
b1b65b59 2029 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2030 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2031 * int check_keyword : if true, Perl checks to make sure the word isn't
2032 * a keyword (do this if the word is a label, e.g. goto FOO)
2033 * int allow_pack : if true, : characters will also be allowed (require,
2034 * use, etc. do this)
9cbb5ea2 2035 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2036 */
2037
76e3520e 2038STATIC char *
cea2e8a9 2039S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2040{
97aff369 2041 dVAR;
463ee0b2
LW
2042 register char *s;
2043 STRLEN len;
4e553d73 2044
7918f24d
NC
2045 PERL_ARGS_ASSERT_FORCE_WORD;
2046
29595ff2 2047 start = SKIPSPACE1(start);
463ee0b2 2048 s = start;
7e2040f0 2049 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2050 (allow_pack && *s == ':') ||
15f0808c 2051 (allow_initial_tick && *s == '\'') )
a0d0e21e 2052 {
3280af22 2053 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2054 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2055 return start;
cd81e915 2056 start_force(PL_curforce);
5db06880
NC
2057 if (PL_madskills)
2058 curmad('X', newSVpvn(start,s-start));
463ee0b2 2059 if (token == METHOD) {
29595ff2 2060 s = SKIPSPACE1(s);
463ee0b2 2061 if (*s == '(')
3280af22 2062 PL_expect = XTERM;
463ee0b2 2063 else {
3280af22 2064 PL_expect = XOPERATOR;
463ee0b2 2065 }
79072805 2066 }
e74e6b3d 2067 if (PL_madskills)
63575281 2068 curmad('g', newSVpvs( "forced" ));
9ded7720 2069 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2070 = (OP*)newSVOP(OP_CONST,0,
2071 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2072 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2073 force_next(token);
2074 }
2075 return s;
2076}
2077
ffb4593c
NT
2078/*
2079 * S_force_ident
9cbb5ea2 2080 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2081 * text only contains the "foo" portion. The first argument is a pointer
2082 * to the "foo", and the second argument is the type symbol to prefix.
2083 * Forces the next token to be a "WORD".
9cbb5ea2 2084 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2085 */
2086
76e3520e 2087STATIC void
bfed75c6 2088S_force_ident(pTHX_ register const char *s, int kind)
79072805 2089{
97aff369 2090 dVAR;
7918f24d
NC
2091
2092 PERL_ARGS_ASSERT_FORCE_IDENT;
2093
c35e046a 2094 if (*s) {
90e5519e
NC
2095 const STRLEN len = strlen(s);
2096 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2097 start_force(PL_curforce);
9ded7720 2098 NEXTVAL_NEXTTOKE.opval = o;
79072805 2099 force_next(WORD);
748a9306 2100 if (kind) {
11343788 2101 o->op_private = OPpCONST_ENTERED;
55497cff 2102 /* XXX see note in pp_entereval() for why we forgo typo
2103 warnings if the symbol must be introduced in an eval.
2104 GSAR 96-10-12 */
90e5519e
NC
2105 gv_fetchpvn_flags(s, len,
2106 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2107 : GV_ADD,
2108 kind == '$' ? SVt_PV :
2109 kind == '@' ? SVt_PVAV :
2110 kind == '%' ? SVt_PVHV :
a0d0e21e 2111 SVt_PVGV
90e5519e 2112 );
748a9306 2113 }
79072805
LW
2114 }
2115}
2116
1571675a
GS
2117NV
2118Perl_str_to_version(pTHX_ SV *sv)
2119{
2120 NV retval = 0.0;
2121 NV nshift = 1.0;
2122 STRLEN len;
cfd0369c 2123 const char *start = SvPV_const(sv,len);
9d4ba2ae 2124 const char * const end = start + len;
504618e9 2125 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2126
2127 PERL_ARGS_ASSERT_STR_TO_VERSION;
2128
1571675a 2129 while (start < end) {
ba210ebe 2130 STRLEN skip;
1571675a
GS
2131 UV n;
2132 if (utf)
9041c2e3 2133 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2134 else {
2135 n = *(U8*)start;
2136 skip = 1;
2137 }
2138 retval += ((NV)n)/nshift;
2139 start += skip;
2140 nshift *= 1000;
2141 }
2142 return retval;
2143}
2144
4e553d73 2145/*
ffb4593c
NT
2146 * S_force_version
2147 * Forces the next token to be a version number.
e759cc13
RGS
2148 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2149 * and if "guessing" is TRUE, then no new token is created (and the caller
2150 * must use an alternative parsing method).
ffb4593c
NT
2151 */
2152
76e3520e 2153STATIC char *
e759cc13 2154S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2155{
97aff369 2156 dVAR;
5f66b61c 2157 OP *version = NULL;
44dcb63b 2158 char *d;
5db06880
NC
2159#ifdef PERL_MAD
2160 I32 startoff = s - SvPVX(PL_linestr);
2161#endif
89bfa8cd 2162
7918f24d
NC
2163 PERL_ARGS_ASSERT_FORCE_VERSION;
2164
29595ff2 2165 s = SKIPSPACE1(s);
89bfa8cd 2166
44dcb63b 2167 d = s;
dd629d5b 2168 if (*d == 'v')
44dcb63b 2169 d++;
44dcb63b 2170 if (isDIGIT(*d)) {
e759cc13
RGS
2171 while (isDIGIT(*d) || *d == '_' || *d == '.')
2172 d++;
5db06880
NC
2173#ifdef PERL_MAD
2174 if (PL_madskills) {
cd81e915 2175 start_force(PL_curforce);
5db06880
NC
2176 curmad('X', newSVpvn(s,d-s));
2177 }
2178#endif
4e4da3ac 2179 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2180 SV *ver;
8d08d9ba
DG
2181#ifdef USE_LOCALE_NUMERIC
2182 char *loc = setlocale(LC_NUMERIC, "C");
2183#endif
6154021b 2184 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2185#ifdef USE_LOCALE_NUMERIC
2186 setlocale(LC_NUMERIC, loc);
2187#endif
6154021b 2188 version = pl_yylval.opval;
dd629d5b
GS
2189 ver = cSVOPx(version)->op_sv;
2190 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2191 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2192 SvNV_set(ver, str_to_version(ver));
1571675a 2193 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2194 }
89bfa8cd 2195 }
5db06880
NC
2196 else if (guessing) {
2197#ifdef PERL_MAD
2198 if (PL_madskills) {
cd81e915
NC
2199 sv_free(PL_nextwhite); /* let next token collect whitespace */
2200 PL_nextwhite = 0;
5db06880
NC
2201 s = SvPVX(PL_linestr) + startoff;
2202 }
2203#endif
e759cc13 2204 return s;
5db06880 2205 }
89bfa8cd 2206 }
2207
5db06880
NC
2208#ifdef PERL_MAD
2209 if (PL_madskills && !version) {
cd81e915
NC
2210 sv_free(PL_nextwhite); /* let next token collect whitespace */
2211 PL_nextwhite = 0;
5db06880
NC
2212 s = SvPVX(PL_linestr) + startoff;
2213 }
2214#endif
89bfa8cd 2215 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2216 start_force(PL_curforce);
9ded7720 2217 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2218 force_next(WORD);
89bfa8cd 2219
e759cc13 2220 return s;
89bfa8cd 2221}
2222
ffb4593c 2223/*
91152fc1
DG
2224 * S_force_strict_version
2225 * Forces the next token to be a version number using strict syntax rules.
2226 */
2227
2228STATIC char *
2229S_force_strict_version(pTHX_ char *s)
2230{
2231 dVAR;
2232 OP *version = NULL;
2233#ifdef PERL_MAD
2234 I32 startoff = s - SvPVX(PL_linestr);
2235#endif
2236 const char *errstr = NULL;
2237
2238 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2239
2240 while (isSPACE(*s)) /* leading whitespace */
2241 s++;
2242
2243 if (is_STRICT_VERSION(s,&errstr)) {
2244 SV *ver = newSV(0);
2245 s = (char *)scan_version(s, ver, 0);
2246 version = newSVOP(OP_CONST, 0, ver);
2247 }
4e4da3ac
Z
2248 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2249 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2250 {
91152fc1
DG
2251 PL_bufptr = s;
2252 if (errstr)
2253 yyerror(errstr); /* version required */
2254 return s;
2255 }
2256
2257#ifdef PERL_MAD
2258 if (PL_madskills && !version) {
2259 sv_free(PL_nextwhite); /* let next token collect whitespace */
2260 PL_nextwhite = 0;
2261 s = SvPVX(PL_linestr) + startoff;
2262 }
2263#endif
2264 /* NOTE: The parser sees the package name and the VERSION swapped */
2265 start_force(PL_curforce);
2266 NEXTVAL_NEXTTOKE.opval = version;
2267 force_next(WORD);
2268
2269 return s;
2270}
2271
2272/*
ffb4593c
NT
2273 * S_tokeq
2274 * Tokenize a quoted string passed in as an SV. It finds the next
2275 * chunk, up to end of string or a backslash. It may make a new
2276 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2277 * turns \\ into \.
2278 */
2279
76e3520e 2280STATIC SV *
cea2e8a9 2281S_tokeq(pTHX_ SV *sv)
79072805 2282{
97aff369 2283 dVAR;
79072805
LW
2284 register char *s;
2285 register char *send;
2286 register char *d;
b3ac6de7
IZ
2287 STRLEN len = 0;
2288 SV *pv = sv;
79072805 2289
7918f24d
NC
2290 PERL_ARGS_ASSERT_TOKEQ;
2291
79072805 2292 if (!SvLEN(sv))
b3ac6de7 2293 goto finish;
79072805 2294
a0d0e21e 2295 s = SvPV_force(sv, len);
21a311ee 2296 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2297 goto finish;
463ee0b2 2298 send = s + len;
dcb21ed6
NC
2299 /* This is relying on the SV being "well formed" with a trailing '\0' */
2300 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2301 s++;
2302 if (s == send)
b3ac6de7 2303 goto finish;
79072805 2304 d = s;
be4731d2 2305 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2306 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2307 }
79072805
LW
2308 while (s < send) {
2309 if (*s == '\\') {
a0d0e21e 2310 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2311 s++; /* all that, just for this */
2312 }
2313 *d++ = *s++;
2314 }
2315 *d = '\0';
95a20fc0 2316 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2317 finish:
3280af22 2318 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2319 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2320 return sv;
2321}
2322
ffb4593c
NT
2323/*
2324 * Now come three functions related to double-quote context,
2325 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2326 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2327 * interact with PL_lex_state, and create fake ( ... ) argument lists
2328 * to handle functions and concatenation.
2329 * They assume that whoever calls them will be setting up a fake
2330 * join call, because each subthing puts a ',' after it. This lets
2331 * "lower \luPpEr"
2332 * become
2333 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2334 *
2335 * (I'm not sure whether the spurious commas at the end of lcfirst's
2336 * arguments and join's arguments are created or not).
2337 */
2338
2339/*
2340 * S_sublex_start
6154021b 2341 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2342 *
2343 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2344 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2345 *
2346 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2347 *
2348 * Everything else becomes a FUNC.
2349 *
2350 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2351 * had an OP_CONST or OP_READLINE). This just sets us up for a
2352 * call to S_sublex_push().
2353 */
2354
76e3520e 2355STATIC I32
cea2e8a9 2356S_sublex_start(pTHX)
79072805 2357{
97aff369 2358 dVAR;
6154021b 2359 register const I32 op_type = pl_yylval.ival;
79072805
LW
2360
2361 if (op_type == OP_NULL) {
6154021b 2362 pl_yylval.opval = PL_lex_op;
5f66b61c 2363 PL_lex_op = NULL;
79072805
LW
2364 return THING;
2365 }
2366 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2367 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2368
2369 if (SvTYPE(sv) == SVt_PVIV) {
2370 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2371 STRLEN len;
96a5add6 2372 const char * const p = SvPV_const(sv, len);
740cce10 2373 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2374 SvREFCNT_dec(sv);
2375 sv = nsv;
4e553d73 2376 }
6154021b 2377 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2378 PL_lex_stuff = NULL;
6f33ba73
RGS
2379 /* Allow <FH> // "foo" */
2380 if (op_type == OP_READLINE)
2381 PL_expect = XTERMORDORDOR;
79072805
LW
2382 return THING;
2383 }
e3f73d4e
RGS
2384 else if (op_type == OP_BACKTICK && PL_lex_op) {
2385 /* readpipe() vas overriden */
2386 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2387 pl_yylval.opval = PL_lex_op;
9b201d7d 2388 PL_lex_op = NULL;
e3f73d4e
RGS
2389 PL_lex_stuff = NULL;
2390 return THING;
2391 }
79072805 2392
3280af22 2393 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2394 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2395 PL_sublex_info.sub_op = PL_lex_op;
2396 PL_lex_state = LEX_INTERPPUSH;
55497cff 2397
3280af22
NIS
2398 PL_expect = XTERM;
2399 if (PL_lex_op) {
6154021b 2400 pl_yylval.opval = PL_lex_op;
5f66b61c 2401 PL_lex_op = NULL;
55497cff 2402 return PMFUNC;
2403 }
2404 else
2405 return FUNC;
2406}
2407
ffb4593c
NT
2408/*
2409 * S_sublex_push
2410 * Create a new scope to save the lexing state. The scope will be
2411 * ended in S_sublex_done. Returns a '(', starting the function arguments
2412 * to the uc, lc, etc. found before.
2413 * Sets PL_lex_state to LEX_INTERPCONCAT.
2414 */
2415
76e3520e 2416STATIC I32
cea2e8a9 2417S_sublex_push(pTHX)
55497cff 2418{
27da23d5 2419 dVAR;
f46d017c 2420 ENTER;
55497cff 2421
3280af22 2422 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2423 SAVEBOOL(PL_lex_dojoin);
3280af22 2424 SAVEI32(PL_lex_brackets);
78cdf107
Z
2425 SAVEI32(PL_lex_allbrackets);
2426 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2427 SAVEI32(PL_lex_casemods);
2428 SAVEI32(PL_lex_starts);
651b5b28 2429 SAVEI8(PL_lex_state);
7766f137 2430 SAVEVPTR(PL_lex_inpat);
98246f1e 2431 SAVEI16(PL_lex_inwhat);
57843af0 2432 SAVECOPLINE(PL_curcop);
3280af22 2433 SAVEPPTR(PL_bufptr);
8452ff4b 2434 SAVEPPTR(PL_bufend);
3280af22
NIS
2435 SAVEPPTR(PL_oldbufptr);
2436 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2437 SAVEPPTR(PL_last_lop);
2438 SAVEPPTR(PL_last_uni);
3280af22
NIS
2439 SAVEPPTR(PL_linestart);
2440 SAVESPTR(PL_linestr);
8edd5f42
RGS
2441 SAVEGENERICPV(PL_lex_brackstack);
2442 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2443
2444 PL_linestr = PL_lex_stuff;
a0714e2c 2445 PL_lex_stuff = NULL;
3280af22 2446
9cbb5ea2
GS
2447 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2448 = SvPVX(PL_linestr);
3280af22 2449 PL_bufend += SvCUR(PL_linestr);
bd61b366 2450 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2451 SAVEFREESV(PL_linestr);
2452
2453 PL_lex_dojoin = FALSE;
2454 PL_lex_brackets = 0;
78cdf107
Z
2455 PL_lex_allbrackets = 0;
2456 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2457 Newx(PL_lex_brackstack, 120, char);
2458 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2459 PL_lex_casemods = 0;
2460 *PL_lex_casestack = '\0';
2461 PL_lex_starts = 0;
2462 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2463 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2464
2465 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2466 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2467 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2468 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2469 else
5f66b61c 2470 PL_lex_inpat = NULL;
79072805 2471
55497cff 2472 return '(';
79072805
LW
2473}
2474
ffb4593c
NT
2475/*
2476 * S_sublex_done
2477 * Restores lexer state after a S_sublex_push.
2478 */
2479
76e3520e 2480STATIC I32
cea2e8a9 2481S_sublex_done(pTHX)
79072805 2482{
27da23d5 2483 dVAR;
3280af22 2484 if (!PL_lex_starts++) {
396482e1 2485 SV * const sv = newSVpvs("");
9aa983d2
JH
2486 if (SvUTF8(PL_linestr))
2487 SvUTF8_on(sv);
3280af22 2488 PL_expect = XOPERATOR;
6154021b 2489 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2490 return THING;
2491 }
2492
3280af22
NIS
2493 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2494 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2495 return yylex();
79072805
LW
2496 }
2497
ffb4593c 2498 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2499 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2500 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2501 PL_linestr = PL_lex_repl;
2502 PL_lex_inpat = 0;
2503 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2504 PL_bufend += SvCUR(PL_linestr);
bd61b366 2505 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2506 SAVEFREESV(PL_linestr);
2507 PL_lex_dojoin = FALSE;
2508 PL_lex_brackets = 0;
78cdf107
Z
2509 PL_lex_allbrackets = 0;
2510 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2511 PL_lex_casemods = 0;
2512 *PL_lex_casestack = '\0';
2513 PL_lex_starts = 0;
25da4f38 2514 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2515 PL_lex_state = LEX_INTERPNORMAL;
2516 PL_lex_starts++;
e9fa98b2
HS
2517 /* we don't clear PL_lex_repl here, so that we can check later
2518 whether this is an evalled subst; that means we rely on the
2519 logic to ensure sublex_done() is called again only via the
2520 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2521 }
e9fa98b2 2522 else {
3280af22 2523 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2524 PL_lex_repl = NULL;
e9fa98b2 2525 }
79072805 2526 return ',';
ffed7fef
LW
2527 }
2528 else {
5db06880
NC
2529#ifdef PERL_MAD
2530 if (PL_madskills) {
cd81e915
NC
2531 if (PL_thiswhite) {
2532 if (!PL_endwhite)
6b29d1f5 2533 PL_endwhite = newSVpvs("");
cd81e915
NC
2534 sv_catsv(PL_endwhite, PL_thiswhite);
2535 PL_thiswhite = 0;
2536 }
2537 if (PL_thistoken)
76f68e9b 2538 sv_setpvs(PL_thistoken,"");
5db06880 2539 else
cd81e915 2540 PL_realtokenstart = -1;
5db06880
NC
2541 }
2542#endif
f46d017c 2543 LEAVE;
3280af22
NIS
2544 PL_bufend = SvPVX(PL_linestr);
2545 PL_bufend += SvCUR(PL_linestr);
2546 PL_expect = XOPERATOR;
09bef843 2547 PL_sublex_info.sub_inwhat = 0;
79072805 2548 return ')';
ffed7fef
LW
2549 }
2550}
2551
02aa26ce
NT
2552/*
2553 scan_const
2554
2555 Extracts a pattern, double-quoted string, or transliteration. This
2556 is terrifying code.
2557
94def140 2558 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2559 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2560 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2561
94def140
TS
2562 Returns a pointer to the character scanned up to. If this is
2563 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2564 successfully parsed), will leave an OP for the substring scanned
6154021b 2565 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2566 by looking at the next characters herself.
2567
02aa26ce
NT
2568 In patterns:
2569 backslashes:
ff3f963a 2570 constants: \N{NAME} only
02aa26ce
NT
2571 case and quoting: \U \Q \E
2572 stops on @ and $, but not for $ as tail anchor
2573
2574 In transliterations:
2575 characters are VERY literal, except for - not at the start or end
94def140
TS
2576 of the string, which indicates a range. If the range is in bytes,
2577 scan_const expands the range to the full set of intermediate
2578 characters. If the range is in utf8, the hyphen is replaced with
2579 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2580
2581 In double-quoted strings:
2582 backslashes:
2583 double-quoted style: \r and \n
ff3f963a 2584 constants: \x31, etc.
94def140 2585 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2586 case and quoting: \U \Q \E
2587 stops on @ and $
2588
2589 scan_const does *not* construct ops to handle interpolated strings.
2590 It stops processing as soon as it finds an embedded $ or @ variable
2591 and leaves it to the caller to work out what's going on.
2592
94def140
TS
2593 embedded arrays (whether in pattern or not) could be:
2594 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2595
2596 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2597
2598 $ in pattern could be $foo or could be tail anchor. Assumption:
2599 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2600 followed by one of "()| \r\n\t"
02aa26ce
NT
2601
2602 \1 (backreferences) are turned into $1
2603
2604 The structure of the code is
2605 while (there's a character to process) {
94def140
TS
2606 handle transliteration ranges
2607 skip regexp comments /(?#comment)/ and codes /(?{code})/
2608 skip #-initiated comments in //x patterns
2609 check for embedded arrays
02aa26ce
NT
2610 check for embedded scalars
2611 if (backslash) {
94def140 2612 deprecate \1 in substitution replacements
02aa26ce
NT
2613 handle string-changing backslashes \l \U \Q \E, etc.
2614 switch (what was escaped) {
94def140 2615 handle \- in a transliteration (becomes a literal -)
ff3f963a 2616 if a pattern and not \N{, go treat as regular character
94def140
TS
2617 handle \132 (octal characters)
2618 handle \x15 and \x{1234} (hex characters)
ff3f963a 2619 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2620 handle \cV (control characters)
2621 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2622 } (end switch)
77a135fe 2623 continue
02aa26ce 2624 } (end if backslash)
77a135fe 2625 handle regular character
02aa26ce 2626 } (end while character to read)
4e553d73 2627
02aa26ce
NT
2628*/
2629
76e3520e 2630STATIC char *
cea2e8a9 2631S_scan_const(pTHX_ char *start)
79072805 2632{
97aff369 2633 dVAR;
3280af22 2634 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2635 SV *sv = newSV(send - start); /* sv for the constant. See
2636 note below on sizing. */
02aa26ce
NT
2637 register char *s = start; /* start of the constant */
2638 register char *d = SvPVX(sv); /* destination for copies */
2639 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2640 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2641 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2642 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2643 to be UTF8? But, this can
2644 show as true when the source
2645 isn't utf8, as for example
2646 when it is entirely composed
2647 of hex constants */
2648
2649 /* Note on sizing: The scanned constant is placed into sv, which is
2650 * initialized by newSV() assuming one byte of output for every byte of
2651 * input. This routine expects newSV() to allocate an extra byte for a
2652 * trailing NUL, which this routine will append if it gets to the end of
2653 * the input. There may be more bytes of input than output (eg., \N{LATIN
2654 * CAPITAL LETTER A}), or more output than input if the constant ends up
2655 * recoded to utf8, but each time a construct is found that might increase
2656 * the needed size, SvGROW() is called. Its size parameter each time is
2657 * based on the best guess estimate at the time, namely the length used so
2658 * far, plus the length the current construct will occupy, plus room for
2659 * the trailing NUL, plus one byte for every input byte still unscanned */
2660
012bcf8d 2661 UV uv;
4c3a8340
TS
2662#ifdef EBCDIC
2663 UV literal_endpoint = 0;
e294cc5d 2664 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2665#endif
012bcf8d 2666
7918f24d
NC
2667 PERL_ARGS_ASSERT_SCAN_CONST;
2668
bb16bae8 2669 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2670 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2671 /* If we are doing a trans and we know we want UTF8 set expectation */
2672 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2673 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2674 }
2675
2676
79072805 2677 while (s < send || dorange) {
ff3f963a 2678
02aa26ce 2679 /* get transliterations out of the way (they're most literal) */
3280af22 2680 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2681 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2682 if (dorange) {
1ba5c669
JH
2683 I32 i; /* current expanded character */
2684 I32 min; /* first character in range */
2685 I32 max; /* last character in range */
02aa26ce 2686
e294cc5d
JH
2687#ifdef EBCDIC
2688 UV uvmax = 0;
2689#endif
2690
2691 if (has_utf8
2692#ifdef EBCDIC
2693 && !native_range
2694#endif
2695 ) {
9d4ba2ae 2696 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2697 char *e = d++;
2698 while (e-- > c)
2699 *(e + 1) = *e;
25716404 2700 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2701 /* mark the range as done, and continue */
2702 dorange = FALSE;
2703 didrange = TRUE;
2704 continue;
2705 }
2b9d42f0 2706
95a20fc0 2707 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2708#ifdef EBCDIC
2709 SvGROW(sv,
2710 SvLEN(sv) + (has_utf8 ?
2711 (512 - UTF_CONTINUATION_MARK +
2712 UNISKIP(0x100))
2713 : 256));
2714 /* How many two-byte within 0..255: 128 in UTF-8,
2715 * 96 in UTF-8-mod. */
2716#else
9cbb5ea2 2717 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2718#endif
9cbb5ea2 2719 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2720#ifdef EBCDIC
2721 if (has_utf8) {
2722 int j;
2723 for (j = 0; j <= 1; j++) {
2724 char * const c = (char*)utf8_hop((U8*)d, -1);
2725 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2726 if (j)
2727 min = (U8)uv;
2728 else if (uv < 256)
2729 max = (U8)uv;
2730 else {
2731 max = (U8)0xff; /* only to \xff */
2732 uvmax = uv; /* \x{100} to uvmax */
2733 }
2734 d = c; /* eat endpoint chars */
2735 }
2736 }
2737 else {
2738#endif
2739 d -= 2; /* eat the first char and the - */
2740 min = (U8)*d; /* first char in range */
2741 max = (U8)d[1]; /* last char in range */
2742#ifdef EBCDIC
2743 }
2744#endif
8ada0baa 2745
c2e66d9e 2746 if (min > max) {
01ec43d0 2747 Perl_croak(aTHX_
d1573ac7 2748 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2749 (char)min, (char)max);
c2e66d9e
GS
2750 }
2751
c7f1f016 2752#ifdef EBCDIC
4c3a8340
TS
2753 if (literal_endpoint == 2 &&
2754 ((isLOWER(min) && isLOWER(max)) ||
2755 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2756 if (isLOWER(min)) {
2757 for (i = min; i <= max; i++)
2758 if (isLOWER(i))
db42d148 2759 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2760 } else {
2761 for (i = min; i <= max; i++)
2762 if (isUPPER(i))
db42d148 2763 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2764 }
2765 }
2766 else
2767#endif
2768 for (i = min; i <= max; i++)
e294cc5d
JH
2769#ifdef EBCDIC
2770 if (has_utf8) {
2771 const U8 ch = (U8)NATIVE_TO_UTF(i);
2772 if (UNI_IS_INVARIANT(ch))
2773 *d++ = (U8)i;
2774 else {
2775 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2776 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2777 }
2778 }
2779 else
2780#endif
2781 *d++ = (char)i;
2782
2783#ifdef EBCDIC
2784 if (uvmax) {
2785 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2786 if (uvmax > 0x101)
2787 *d++ = (char)UTF_TO_NATIVE(0xff);
2788 if (uvmax > 0x100)
2789 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2790 }
2791#endif
02aa26ce
NT
2792
2793 /* mark the range as done, and continue */
79072805 2794 dorange = FALSE;
01ec43d0 2795 didrange = TRUE;
4c3a8340
TS
2796#ifdef EBCDIC
2797 literal_endpoint = 0;
2798#endif
79072805 2799 continue;
4e553d73 2800 }
02aa26ce
NT
2801
2802 /* range begins (ignore - as first or last char) */
79072805 2803 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2804 if (didrange) {
1fafa243 2805 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2806 }
e294cc5d
JH
2807 if (has_utf8
2808#ifdef EBCDIC
2809 && !native_range
2810#endif
2811 ) {
25716404 2812 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2813 s++;
2814 continue;
2815 }
79072805
LW
2816 dorange = TRUE;
2817 s++;
01ec43d0
GS
2818 }
2819 else {
2820 didrange = FALSE;
4c3a8340
TS
2821#ifdef EBCDIC
2822 literal_endpoint = 0;
e294cc5d 2823 native_range = TRUE;
4c3a8340 2824#endif
01ec43d0 2825 }
79072805 2826 }
02aa26ce
NT
2827
2828 /* if we get here, we're not doing a transliteration */
2829
0f5d15d6
IZ
2830 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2831 except for the last char, which will be done separately. */
3280af22 2832 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2833 if (s[2] == '#') {
e994fd66 2834 while (s+1 < send && *s != ')')
db42d148 2835 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2836 }
2837 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2838 || (s[2] == '?' && s[3] == '{'))
155aba94 2839 {
cc6b7395 2840 I32 count = 1;
0f5d15d6 2841 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2842 char c;
2843
d9f97599
GS
2844 while (count && (c = *regparse)) {
2845 if (c == '\\' && regparse[1])
2846 regparse++;
4e553d73 2847 else if (c == '{')
cc6b7395 2848 count++;
4e553d73 2849 else if (c == '}')
cc6b7395 2850 count--;
d9f97599 2851 regparse++;
cc6b7395 2852 }
e994fd66 2853 if (*regparse != ')')
5bdf89e7 2854 regparse--; /* Leave one char for continuation. */
0f5d15d6 2855 while (s < regparse)
db42d148 2856 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2857 }
748a9306 2858 }
02aa26ce
NT
2859
2860 /* likewise skip #-initiated comments in //x patterns */
3280af22 2861 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2862 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2863 while (s+1 < send && *s != '\n')
db42d148 2864 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2865 }
02aa26ce 2866
5d1d4326 2867 /* check for embedded arrays
da6eedaa 2868 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2869 */
1749ea0d
TS
2870 else if (*s == '@' && s[1]) {
2871 if (isALNUM_lazy_if(s+1,UTF))
2872 break;
2873 if (strchr(":'{$", s[1]))
2874 break;
2875 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2876 break; /* in regexp, neither @+ nor @- are interpolated */
2877 }
02aa26ce
NT
2878
2879 /* check for embedded scalars. only stop if we're sure it's a
2880 variable.
2881 */
79072805 2882 else if (*s == '$') {
3280af22 2883 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2884 break;
77772344 2885 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2886 if (s[1] == '\\') {
2887 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2888 "Possible unintended interpolation of $\\ in regex");
77772344 2889 }
79072805 2890 break; /* in regexp, $ might be tail anchor */
77772344 2891 }
79072805 2892 }
02aa26ce 2893
2b9d42f0
NIS
2894 /* End of else if chain - OP_TRANS rejoin rest */
2895
02aa26ce 2896 /* backslashes */
79072805 2897 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2898 char* e; /* Can be used for ending '}', etc. */
2899
79072805 2900 s++;
02aa26ce 2901
7d0fc23c
KW
2902 /* warn on \1 - \9 in substitution replacements, but note that \11
2903 * is an octal; and \19 is \1 followed by '9' */
3280af22 2904 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2905 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2906 {
a2a5de95 2907 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2908 *--s = '$';
2909 break;
2910 }
02aa26ce
NT
2911
2912 /* string-change backslash escapes */
3280af22 2913 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2914 --s;
2915 break;
2916 }
ff3f963a
KW
2917 /* In a pattern, process \N, but skip any other backslash escapes.
2918 * This is because we don't want to translate an escape sequence
2919 * into a meta symbol and have the regex compiler use the meta
2920 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2921 * in spite of this, we do have to process \N here while the proper
2922 * charnames handler is in scope. See bugs #56444 and #62056.
2923 * There is a complication because \N in a pattern may also stand
2924 * for 'match a non-nl', and not mean a charname, in which case its
2925 * processing should be deferred to the regex compiler. To be a
2926 * charname it must be followed immediately by a '{', and not look
2927 * like \N followed by a curly quantifier, i.e., not something like
2928 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2929 * quantifier */
2930 else if (PL_lex_inpat
2931 && (*s != 'N'
2932 || s[1] != '{'
2933 || regcurly(s + 1)))
2934 {
cc74c5bd
TS
2935 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2936 goto default_action;
2937 }
02aa26ce 2938
79072805 2939 switch (*s) {
02aa26ce
NT
2940
2941 /* quoted - in transliterations */
79072805 2942 case '-':
3280af22 2943 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2944 *d++ = *s++;
2945 continue;
2946 }
2947 /* FALL THROUGH */
2948 default:
11b8faa4 2949 {
a2a5de95
NC
2950 if ((isALPHA(*s) || isDIGIT(*s)))
2951 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2952 "Unrecognized escape \\%c passed through",
2953 *s);
11b8faa4 2954 /* default action is to copy the quoted character */
f9a63242 2955 goto default_action;
11b8faa4 2956 }
02aa26ce 2957
632403cc 2958 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2959 case '0': case '1': case '2': case '3':
2960 case '4': case '5': case '6': case '7':
ba210ebe 2961 {
53305cf1
NC
2962 I32 flags = 0;
2963 STRLEN len = 3;
77a135fe 2964 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2965 s += len;
2966 }
012bcf8d 2967 goto NUM_ESCAPE_INSERT;
02aa26ce 2968
f0a2b745
KW
2969 /* eg. \o{24} indicates the octal constant \024 */
2970 case 'o':
2971 {
2972 STRLEN len;
454155d9 2973 const char* error;
f0a2b745 2974
454155d9 2975 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2976 s += len;
454155d9 2977 if (! valid) {
f0a2b745
KW
2978 yyerror(error);
2979 continue;
2980 }
2981 goto NUM_ESCAPE_INSERT;
2982 }
2983
77a135fe 2984 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2985 case 'x':
a0ed51b3
LW
2986 ++s;
2987 if (*s == '{') {
9d4ba2ae 2988 char* const e = strchr(s, '}');
a4c04bdc
NC
2989 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2990 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2991 STRLEN len;
355860ce 2992
53305cf1 2993 ++s;
adaeee49 2994 if (!e) {
a0ed51b3 2995 yyerror("Missing right brace on \\x{}");
355860ce 2996 continue;
ba210ebe 2997 }
53305cf1 2998 len = e - s;
77a135fe 2999 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 3000 s = e + 1;
a0ed51b3
LW
3001 }
3002 else {
ba210ebe 3003 {
53305cf1 3004 STRLEN len = 2;
a4c04bdc 3005 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 3006 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
3007 s += len;
3008 }
012bcf8d
GS
3009 }
3010
3011 NUM_ESCAPE_INSERT:
ff3f963a
KW
3012 /* Insert oct or hex escaped character. There will always be
3013 * enough room in sv since such escapes will be longer than any
3014 * UTF-8 sequence they can end up as, except if they force us
3015 * to recode the rest of the string into utf8 */
ba7cea30 3016
77a135fe 3017 /* Here uv is the ordinal of the next character being added in
ff3f963a 3018 * unicode (converted from native). */
77a135fe 3019 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3020 if (!has_utf8 && uv > 255) {
77a135fe
KW
3021 /* Might need to recode whatever we have accumulated so
3022 * far if it contains any chars variant in utf8 or
3023 * utf-ebcdic. */
3024
3025 SvCUR_set(sv, d - SvPVX_const(sv));
3026 SvPOK_on(sv);
3027 *d = '\0';
77a135fe 3028 /* See Note on sizing above. */
7bf79863
KW
3029 sv_utf8_upgrade_flags_grow(sv,
3030 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3031 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3032 d = SvPVX(sv) + SvCUR(sv);
3033 has_utf8 = TRUE;
012bcf8d
GS
3034 }
3035
77a135fe
KW
3036 if (has_utf8) {
3037 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3038 if (PL_lex_inwhat == OP_TRANS &&
3039 PL_sublex_info.sub_op) {
3040 PL_sublex_info.sub_op->op_private |=
3041 (PL_lex_repl ? OPpTRANS_FROM_UTF
3042 : OPpTRANS_TO_UTF);
f9a63242 3043 }
e294cc5d
JH
3044#ifdef EBCDIC
3045 if (uv > 255 && !dorange)
3046 native_range = FALSE;
3047#endif
012bcf8d 3048 }
a0ed51b3 3049 else {
012bcf8d 3050 *d++ = (char)uv;
a0ed51b3 3051 }
012bcf8d
GS
3052 }
3053 else {
c4d5f83a 3054 *d++ = (char) uv;
a0ed51b3 3055 }
79072805 3056 continue;
02aa26ce 3057
4a2d328f 3058 case 'N':
ff3f963a
KW
3059 /* In a non-pattern \N must be a named character, like \N{LATIN
3060 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3061 * mean to match a non-newline. For non-patterns, named
3062 * characters are converted to their string equivalents. In
3063 * patterns, named characters are not converted to their
3064 * ultimate forms for the same reasons that other escapes
3065 * aren't. Instead, they are converted to the \N{U+...} form
3066 * to get the value from the charnames that is in effect right
3067 * now, while preserving the fact that it was a named character
3068 * so that the regex compiler knows this */
3069
3070 /* This section of code doesn't generally use the
3071 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3072 * a close examination of this macro and determined it is a
3073 * no-op except on utfebcdic variant characters. Every
3074 * character generated by this that would normally need to be
3075 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3076 * needed, and would complicate use of copy(). XXX There are
3077 * other parts of this file where the macro is used
3078 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3079
3080 /* The structure of this section of code (besides checking for
3081 * errors and upgrading to utf8) is:
3082 * Further disambiguate between the two meanings of \N, and if
3083 * not a charname, go process it elsewhere
0a96133f
KW
3084 * If of form \N{U+...}, pass it through if a pattern;
3085 * otherwise convert to utf8
3086 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3087 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3088
3089 /* Here, s points to the 'N'; the test below is guaranteed to
3090 * succeed if we are being called on a pattern as we already
3091 * know from a test above that the next character is a '{'.
3092 * On a non-pattern \N must mean 'named sequence, which
3093 * requires braces */
3094 s++;
3095 if (*s != '{') {
3096 yyerror("Missing braces on \\N{}");
3097 continue;
3098 }
3099 s++;
3100
0a96133f 3101 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3102 if (! (e = strchr(s, '}'))) {
3103 if (! PL_lex_inpat) {
5777a3f7 3104 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3105 } else {
3106 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3107 }
0a96133f 3108 continue;
ff3f963a 3109 }
cddc7ef4 3110
ff3f963a 3111 /* Here it looks like a named character */
cddc7ef4 3112
ff3f963a
KW
3113 if (PL_lex_inpat) {
3114
3115 /* XXX This block is temporary code. \N{} implies that the
3116 * pattern is to have Unicode semantics, and therefore
3117 * currently has to be encoded in utf8. By putting it in
3118 * utf8 now, we save a whole pass in the regular expression
3119 * compiler. Once that code is changed so Unicode
3120 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3121 * block should be removed. However, the code that parses
3122 * the output of this would have to be changed to not
3123 * necessarily expect utf8 */
ff3f963a 3124 if (!has_utf8) {
77a135fe 3125 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3126 SvPOK_on(sv);
e4f3eed8 3127 *d = '\0';
77a135fe 3128 /* See Note on sizing above. */
7bf79863 3129 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3130 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3131 /* 5 = '\N{' + cur char + NUL */
3132 (STRLEN)(send - s) + 5);
f08d6ad9 3133 d = SvPVX(sv) + SvCUR(sv);
89491803 3134 has_utf8 = TRUE;
ff3f963a
KW
3135 }
3136 }
423cee85 3137
ff3f963a
KW
3138 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3139 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3140 | PERL_SCAN_DISALLOW_PREFIX;
3141 STRLEN len;
3142
3143 /* For \N{U+...}, the '...' is a unicode value even on
3144 * EBCDIC machines */
3145 s += 2; /* Skip to next char after the 'U+' */
3146 len = e - s;
3147 uv = grok_hex(s, &len, &flags, NULL);
3148 if (len == 0 || len != (STRLEN)(e - s)) {
3149 yyerror("Invalid hexadecimal number in \\N{U+...}");
3150 s = e + 1;
3151 continue;
3152 }
3153
3154 if (PL_lex_inpat) {
3155
e2a7e165
KW
3156 /* On non-EBCDIC platforms, pass through to the regex
3157 * compiler unchanged. The reason we evaluated the
3158 * number above is to make sure there wasn't a syntax
3159 * error. But on EBCDIC we convert to native so
3160 * downstream code can continue to assume it's native
3161 */
ff3f963a 3162 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3163#ifdef EBCDIC
3164 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3165 and the \0 */
3166 "\\N{U+%X}",
3167 (unsigned int) UNI_TO_NATIVE(uv));
3168#else
ff3f963a
KW
3169 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3170 d += e - s + 1;
e2a7e165 3171#endif
ff3f963a
KW
3172 }
3173 else { /* Not a pattern: convert the hex to string */
3174
3175 /* If destination is not in utf8, unconditionally
3176 * recode it to be so. This is because \N{} implies
3177 * Unicode semantics, and scalars have to be in utf8
3178 * to guarantee those semantics */
3179 if (! has_utf8) {
3180 SvCUR_set(sv, d - SvPVX_const(sv));
3181 SvPOK_on(sv);
3182 *d = '\0';
3183 /* See Note on sizing above. */
3184 sv_utf8_upgrade_flags_grow(
3185 sv,
3186 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3187 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3188 d = SvPVX(sv) + SvCUR(sv);
3189 has_utf8 = TRUE;
3190 }
3191
3192 /* Add the string to the output */
3193 if (UNI_IS_INVARIANT(uv)) {
3194 *d++ = (char) uv;
3195 }
3196 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3197 }
3198 }
3199 else { /* Here is \N{NAME} but not \N{U+...}. */
3200
3201 SV *res; /* result from charnames */
3202 const char *str; /* the string in 'res' */
3203 STRLEN len; /* its length */
3204
3205 /* Get the value for NAME */
3206 res = newSVpvn(s, e - s);
3207 res = new_constant( NULL, 0, "charnames",
3208 /* includes all of: \N{...} */
3209 res, NULL, s - 3, e - s + 4 );
3210
3211 /* Most likely res will be in utf8 already since the
3212 * standard charnames uses pack U, but a custom translator
3213 * can leave it otherwise, so make sure. XXX This can be
3214 * revisited to not have charnames use utf8 for characters
3215 * that don't need it when regexes don't have to be in utf8
3216 * for Unicode semantics. If doing so, remember EBCDIC */
3217 sv_utf8_upgrade(res);
3218 str = SvPV_const(res, len);
3219
3220 /* Don't accept malformed input */
3221 if (! is_utf8_string((U8 *) str, len)) {
3222 yyerror("Malformed UTF-8 returned by \\N");
3223 }
3224 else if (PL_lex_inpat) {
3225
3226 if (! len) { /* The name resolved to an empty string */
3227 Copy("\\N{}", d, 4, char);
3228 d += 4;
3229 }
3230 else {
3231 /* In order to not lose information for the regex
3232 * compiler, pass the result in the specially made
3233 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3234 * the code points in hex of each character
3235 * returned by charnames */
3236
3237 const char *str_end = str + len;
3238 STRLEN char_length; /* cur char's byte length */
3239 STRLEN output_length; /* and the number of bytes
3240 after this is translated
3241 into hex digits */
3242 const STRLEN off = d - SvPVX_const(sv);
3243
3244 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3245 * max('U+', '.'); and 1 for NUL */
3246 char hex_string[2 * UTF8_MAXBYTES + 5];
3247
3248 /* Get the first character of the result. */
3249 U32 uv = utf8n_to_uvuni((U8 *) str,
3250 len,
3251 &char_length,
3252 UTF8_ALLOW_ANYUV);
3253
3254 /* The call to is_utf8_string() above hopefully
3255 * guarantees that there won't be an error. But
3256 * it's easy here to make sure. The function just
3257 * above warns and returns 0 if invalid utf8, but
3258 * it can also return 0 if the input is validly a
3259 * NUL. Disambiguate */
3260 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3261 uv = UNICODE_REPLACEMENT;
3262 }
3263
3264 /* Convert first code point to hex, including the
e2a7e165
KW
3265 * boiler plate before it. For all these, we
3266 * convert to native format so that downstream code
3267 * can continue to assume the input is native */
78c35590 3268 output_length =
3353de27 3269 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3270 "\\N{U+%X",
3271 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3272
3273 /* Make sure there is enough space to hold it */
3274 d = off + SvGROW(sv, off
3275 + output_length
3276 + (STRLEN)(send - e)
3277 + 2); /* '}' + NUL */
3278 /* And output it */
3279 Copy(hex_string, d, output_length, char);
3280 d += output_length;
3281
3282 /* For each subsequent character, append dot and
3283 * its ordinal in hex */
3284 while ((str += char_length) < str_end) {
3285 const STRLEN off = d - SvPVX_const(sv);
3286 U32 uv = utf8n_to_uvuni((U8 *) str,
3287 str_end - str,
3288 &char_length,
3289 UTF8_ALLOW_ANYUV);
3290 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3291 uv = UNICODE_REPLACEMENT;
3292 }
3293
78c35590 3294 output_length =
3353de27 3295 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3296 ".%X",
3297 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3298
3299 d = off + SvGROW(sv, off
3300 + output_length
3301 + (STRLEN)(send - e)
3302 + 2); /* '}' + NUL */
3303 Copy(hex_string, d, output_length, char);
3304 d += output_length;
3305 }
3306
3307 *d++ = '}'; /* Done. Add the trailing brace */
3308 }
3309 }
3310 else { /* Here, not in a pattern. Convert the name to a
3311 * string. */
3312
3313 /* If destination is not in utf8, unconditionally
3314 * recode it to be so. This is because \N{} implies
3315 * Unicode semantics, and scalars have to be in utf8
3316 * to guarantee those semantics */
3317 if (! has_utf8) {
3318 SvCUR_set(sv, d - SvPVX_const(sv));
3319 SvPOK_on(sv);
3320 *d = '\0';
3321 /* See Note on sizing above. */
3322 sv_utf8_upgrade_flags_grow(sv,
3323 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3324 len + (STRLEN)(send - s) + 1);
3325 d = SvPVX(sv) + SvCUR(sv);
3326 has_utf8 = TRUE;
3327 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3328
3329 /* See Note on sizing above. (NOTE: SvCUR() is not
3330 * set correctly here). */
3331 const STRLEN off = d - SvPVX_const(sv);
3332 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3333 }
3334 Copy(str, d, len, char);
3335 d += len;
423cee85 3336 }
423cee85 3337 SvREFCNT_dec(res);
cb233ae3
KW
3338
3339 /* Deprecate non-approved name syntax */
3340 if (ckWARN_d(WARN_DEPRECATED)) {
3341 bool problematic = FALSE;
3342 char* i = s;
3343
3344 /* For non-ut8 input, look to see that the first
3345 * character is an alpha, then loop through the rest
3346 * checking that each is a continuation */
3347 if (! this_utf8) {
3348 if (! isALPHAU(*i)) problematic = TRUE;
3349 else for (i = s + 1; i < e; i++) {
3350 if (isCHARNAME_CONT(*i)) continue;
3351 problematic = TRUE;
3352 break;
3353 }
3354 }
3355 else {
3356 /* Similarly for utf8. For invariants can check
3357 * directly. We accept anything above the latin1
3358 * range because it is immaterial to Perl if it is
3359 * correct or not, and is expensive to check. But
3360 * it is fairly easy in the latin1 range to convert
3361 * the variants into a single character and check
3362 * those */
3363 if (UTF8_IS_INVARIANT(*i)) {
3364 if (! isALPHAU(*i)) problematic = TRUE;
3365 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3366 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3367 *(i+1)))))
3368 {
3369 problematic = TRUE;
3370 }
3371 }
3372 if (! problematic) for (i = s + UTF8SKIP(s);
3373 i < e;
3374 i+= UTF8SKIP(i))
3375 {
3376 if (UTF8_IS_INVARIANT(*i)) {
3377 if (isCHARNAME_CONT(*i)) continue;
3378 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3379 continue;
3380 } else if (isCHARNAME_CONT(
3381 UNI_TO_NATIVE(
81c14aa2 3382 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3383 {
3384 continue;
3385 }
3386 problematic = TRUE;
3387 break;
3388 }
3389 }
3390 if (problematic) {
6e1bad6c
KW
3391 /* The e-i passed to the final %.*s makes sure that
3392 * should the trailing NUL be missing that this
3393 * print won't run off the end of the string */
cb233ae3 3394 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3395 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3396 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3397 }
3398 }
3399 } /* End \N{NAME} */
ff3f963a
KW
3400#ifdef EBCDIC
3401 if (!dorange)
3402 native_range = FALSE; /* \N{} is defined to be Unicode */
3403#endif
3404 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3405 continue;
3406
02aa26ce 3407 /* \c is a control character */
79072805
LW
3408 case 'c':
3409 s++;
961ce445 3410 if (s < send) {
17a3df4c 3411 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3412 }
961ce445
RGS
3413 else {
3414 yyerror("Missing control char name in \\c");
3415 }
79072805 3416 continue;
02aa26ce
NT
3417
3418 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3419 case 'b':
db42d148 3420 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3421 break;
3422 case 'n':
db42d148 3423 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3424 break;
3425 case 'r':
db42d148 3426 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3427 break;
3428 case 'f':
db42d148 3429 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3430 break;
3431 case 't':
db42d148 3432 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3433 break;
34a3fe2a 3434 case 'e':
db42d148 3435 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3436 break;
3437 case 'a':
db42d148 3438 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3439 break;
02aa26ce
NT
3440 } /* end switch */
3441
79072805
LW
3442 s++;
3443 continue;
02aa26ce 3444 } /* end if (backslash) */
4c3a8340
TS
3445#ifdef EBCDIC
3446 else
3447 literal_endpoint++;
3448#endif
02aa26ce 3449
f9a63242 3450 default_action:
77a135fe
KW
3451 /* If we started with encoded form, or already know we want it,
3452 then encode the next character */
3453 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3454 STRLEN len = 1;
77a135fe
KW
3455
3456
3457 /* One might think that it is wasted effort in the case of the
3458 * source being utf8 (this_utf8 == TRUE) to take the next character
3459 * in the source, convert it to an unsigned value, and then convert
3460 * it back again. But the source has not been validated here. The
3461 * routine that does the conversion checks for errors like
3462 * malformed utf8 */
3463
5f66b61c
AL
3464 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3465 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3466 if (!has_utf8) {
3467 SvCUR_set(sv, d - SvPVX_const(sv));
3468 SvPOK_on(sv);
3469 *d = '\0';
77a135fe 3470 /* See Note on sizing above. */
7bf79863
KW
3471 sv_utf8_upgrade_flags_grow(sv,
3472 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3473 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3474 d = SvPVX(sv) + SvCUR(sv);
3475 has_utf8 = TRUE;
3476 } else if (need > len) {
3477 /* encoded value larger than old, may need extra space (NOTE:
3478 * SvCUR() is not set correctly here). See Note on sizing
3479 * above. */
9d4ba2ae 3480 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3481 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3482 }
77a135fe
KW
3483 s += len;
3484
5f66b61c 3485 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3486#ifdef EBCDIC
3487 if (uv > 255 && !dorange)
3488 native_range = FALSE;
3489#endif
2b9d42f0
NIS
3490 }
3491 else {
3492 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3493 }
02aa26ce
NT
3494 } /* while loop to process each character */
3495
3496 /* terminate the string and set up the sv */
79072805 3497 *d = '\0';
95a20fc0 3498 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3499 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3500 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3501
79072805 3502 SvPOK_on(sv);
9f4817db 3503 if (PL_encoding && !has_utf8) {
d0063567
DK
3504 sv_recode_to_utf8(sv, PL_encoding);
3505 if (SvUTF8(sv))
3506 has_utf8 = TRUE;
9f4817db 3507 }
2b9d42f0 3508 if (has_utf8) {
7e2040f0 3509 SvUTF8_on(sv);
2b9d42f0 3510 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3511 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3512 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3513 }
3514 }
79072805 3515
02aa26ce 3516 /* shrink the sv if we allocated more than we used */
79072805 3517 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3518 SvPV_shrink_to_cur(sv);
79072805 3519 }
02aa26ce 3520
6154021b 3521 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3522 if (s > PL_bufptr) {
eb0d8d16
NC
3523 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3524 const char *const key = PL_lex_inpat ? "qr" : "q";
3525 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3526 const char *type;
3527 STRLEN typelen;
3528
3529 if (PL_lex_inwhat == OP_TRANS) {
3530 type = "tr";
3531 typelen = 2;
3532 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3533 type = "s";
3534 typelen = 1;
3535 } else {
3536 type = "qq";
3537 typelen = 2;
3538 }
3539
3540 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3541 type, typelen);
3542 }
6154021b 3543 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3544 } else
8990e307 3545 SvREFCNT_dec(sv);
79072805
LW
3546 return s;
3547}
3548
ffb4593c
NT
3549/* S_intuit_more
3550 * Returns TRUE if there's more to the expression (e.g., a subscript),
3551 * FALSE otherwise.
ffb4593c
NT
3552 *
3553 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3554 *
3555 * ->[ and ->{ return TRUE
3556 * { and [ outside a pattern are always subscripts, so return TRUE
3557 * if we're outside a pattern and it's not { or [, then return FALSE
3558 * if we're in a pattern and the first char is a {
3559 * {4,5} (any digits around the comma) returns FALSE
3560 * if we're in a pattern and the first char is a [
3561 * [] returns FALSE
3562 * [SOMETHING] has a funky algorithm to decide whether it's a
3563 * character class or not. It has to deal with things like
3564 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3565 * anything else returns TRUE
3566 */
3567
9cbb5ea2
GS
3568/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3569
76e3520e 3570STATIC int
cea2e8a9 3571S_intuit_more(pTHX_ register char *s)
79072805 3572{
97aff369 3573 dVAR;
7918f24d
NC
3574
3575 PERL_ARGS_ASSERT_INTUIT_MORE;
3576
3280af22 3577 if (PL_lex_brackets)
79072805
LW
3578 return TRUE;
3579 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3580 return TRUE;
3581 if (*s != '{' && *s != '[')
3582 return FALSE;
3280af22 3583 if (!PL_lex_inpat)
79072805
LW
3584 return TRUE;
3585
3586 /* In a pattern, so maybe we have {n,m}. */
3587 if (*s == '{') {
b3155d95 3588 if (regcurly(s)) {
79072805 3589 return FALSE;
b3155d95 3590 }
79072805 3591 return TRUE;
79072805
LW
3592 }
3593
3594 /* On the other hand, maybe we have a character class */
3595
3596 s++;
3597 if (*s == ']' || *s == '^')
3598 return FALSE;
3599 else {
ffb4593c 3600 /* this is terrifying, and it works */
79072805
LW
3601 int weight = 2; /* let's weigh the evidence */
3602 char seen[256];
f27ffc4a 3603 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3604 const char * const send = strchr(s,']');
3280af22 3605 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3606
3607 if (!send) /* has to be an expression */
3608 return TRUE;
3609
3610 Zero(seen,256,char);
3611 if (*s == '$')
3612 weight -= 3;
3613 else if (isDIGIT(*s)) {
3614 if (s[1] != ']') {
3615 if (isDIGIT(s[1]) && s[2] == ']')
3616 weight -= 10;
3617 }
3618 else
3619 weight -= 100;
3620 }
3621 for (; s < send; s++) {
3622 last_un_char = un_char;
3623 un_char = (unsigned char)*s;
3624 switch (*s) {
3625 case '@':
3626 case '&':
3627 case '$':
3628 weight -= seen[un_char] * 10;
7e2040f0 3629 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3630 int len;
8903cb82 3631 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3632 len = (int)strlen(tmpbuf);
3633 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3634 weight -= 100;
3635 else
3636 weight -= 10;
3637 }
3638 else if (*s == '$' && s[1] &&
93a17b20
LW
3639 strchr("[#!%*<>()-=",s[1])) {
3640 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3641 weight -= 10;
3642 else
3643 weight -= 1;
3644 }
3645 break;
3646 case '\\':
3647 un_char = 254;
3648 if (s[1]) {
93a17b20 3649 if (strchr("wds]",s[1]))
79072805 3650 weight += 100;
10edeb5d 3651 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3652 weight += 1;
93a17b20 3653 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3654 weight += 40;
3655 else if (isDIGIT(s[1])) {
3656 weight += 40;
3657 while (s[1] && isDIGIT(s[1]))
3658 s++;
3659 }
3660 }
3661 else
3662 weight += 100;
3663 break;
3664 case '-':
3665 if (s[1] == '\\')
3666 weight += 50;
93a17b20 3667 if (strchr("aA01! ",last_un_char))
79072805 3668 weight += 30;
93a17b20 3669 if (strchr("zZ79~",s[1]))
79072805 3670 weight += 30;
f27ffc4a
GS
3671 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3672 weight -= 5; /* cope with negative subscript */
79072805
LW
3673 break;
3674 default:
3792a11b
NC
3675 if (!isALNUM(last_un_char)
3676 && !(last_un_char == '$' || last_un_char == '@'
3677 || last_un_char == '&')
3678 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3679 char *d = tmpbuf;
3680 while (isALPHA(*s))
3681 *d++ = *s++;
3682 *d = '\0';
5458a98a 3683 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3684 weight -= 150;
3685 }
3686 if (un_char == last_un_char + 1)
3687 weight += 5;
3688 weight -= seen[un_char];
3689 break;
3690 }
3691 seen[un_char]++;
3692 }
3693 if (weight >= 0) /* probably a character class */
3694 return FALSE;
3695 }
3696
3697 return TRUE;
3698}
ffed7fef 3699
ffb4593c
NT
3700/*
3701 * S_intuit_method
3702 *
3703 * Does all the checking to disambiguate
3704 * foo bar
3705 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3706 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3707 *
3708 * First argument is the stuff after the first token, e.g. "bar".
3709 *
3710 * Not a method if bar is a filehandle.
3711 * Not a method if foo is a subroutine prototyped to take a filehandle.
3712 * Not a method if it's really "Foo $bar"
3713 * Method if it's "foo $bar"
3714 * Not a method if it's really "print foo $bar"
3715 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3716 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3717 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3718 * =>
3719 */
3720
76e3520e 3721STATIC int
62d55b22 3722S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3723{
97aff369 3724 dVAR;
a0d0e21e 3725 char *s = start + (*start == '$');
3280af22 3726 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3727 STRLEN len;
3728 GV* indirgv;
5db06880
NC
3729#ifdef PERL_MAD
3730 int soff;
3731#endif
a0d0e21e 3732
7918f24d
NC
3733 PERL_ARGS_ASSERT_INTUIT_METHOD;
3734
a0d0e21e 3735 if (gv) {
62d55b22 3736 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3737 return 0;
62d55b22
NC
3738 if (cv) {
3739 if (SvPOK(cv)) {
3740 const char *proto = SvPVX_const(cv);
3741 if (proto) {
3742 if (*proto == ';')
3743 proto++;
3744 if (*proto == '*')
3745 return 0;
3746 }
b6c543e3
IZ
3747 }
3748 } else
c35e046a 3749 gv = NULL;
a0d0e21e 3750 }
8903cb82 3751 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3752 /* start is the beginning of the possible filehandle/object,
3753 * and s is the end of it
3754 * tmpbuf is a copy of it
3755 */
3756
a0d0e21e 3757 if (*start == '$') {
3ef1310e
RGS
3758 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3759 isUPPER(*PL_tokenbuf))
a0d0e21e 3760 return 0;
5db06880
NC
3761#ifdef PERL_MAD
3762 len = start - SvPVX(PL_linestr);
3763#endif
29595ff2 3764 s = PEEKSPACE(s);
f0092767 3765#ifdef PERL_MAD
5db06880
NC
3766 start = SvPVX(PL_linestr) + len;
3767#endif
3280af22
NIS
3768 PL_bufptr = start;
3769 PL_expect = XREF;
a0d0e21e
LW
3770 return *s == '(' ? FUNCMETH : METHOD;
3771 }
5458a98a 3772 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3773 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3774 len -= 2;
3775 tmpbuf[len] = '\0';
5db06880
NC
3776#ifdef PERL_MAD
3777 soff = s - SvPVX(PL_linestr);
3778#endif
c3e0f903
GS
3779 goto bare_package;
3780 }
90e5519e 3781 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3782 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3783 return 0;
3784 /* filehandle or package name makes it a method */
da51bb9b 3785 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3786#ifdef PERL_MAD
3787 soff = s - SvPVX(PL_linestr);
3788#endif
29595ff2 3789 s = PEEKSPACE(s);
3280af22 3790 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3791 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3792 bare_package:
cd81e915 3793 start_force(PL_curforce);
9ded7720 3794 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3795 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3796 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3797 if (PL_madskills)
3798 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3799 PL_expect = XTERM;
a0d0e21e 3800 force_next(WORD);
3280af22 3801 PL_bufptr = s;
5db06880
NC
3802#ifdef PERL_MAD
3803 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3804#endif
a0d0e21e
LW
3805 return *s == '(' ? FUNCMETH : METHOD;
3806 }
3807 }
3808 return 0;
3809}
3810
16d20bd9 3811/* Encoded script support. filter_add() effectively inserts a
4e553d73 3812 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3813 * Note that the filter function only applies to the current source file
3814 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3815 *
3816 * The datasv parameter (which may be NULL) can be used to pass
3817 * private data to this instance of the filter. The filter function
3818 * can recover the SV using the FILTER_DATA macro and use it to
3819 * store private buffers and state information.
3820 *
3821 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3822 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3823 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3824 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3825 * private use must be set using malloc'd pointers.
3826 */
16d20bd9
AD
3827
3828SV *
864dbfa3 3829Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3830{
97aff369 3831 dVAR;
f4c556ac 3832 if (!funcp)
a0714e2c 3833 return NULL;
f4c556ac 3834
5486870f
DM
3835 if (!PL_parser)
3836 return NULL;
3837
3280af22
NIS
3838 if (!PL_rsfp_filters)
3839 PL_rsfp_filters = newAV();
16d20bd9 3840 if (!datasv)
561b68a9 3841 datasv = newSV(0);
862a34c6 3842 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3843 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3844 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3845 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3846 FPTR2DPTR(void *, IoANY(datasv)),
3847 SvPV_nolen(datasv)));
3280af22
NIS
3848 av_unshift(PL_rsfp_filters, 1);
3849 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3850 return(datasv);
3851}
4e553d73 3852
16d20bd9
AD
3853
3854/* Delete most recently added instance of this filter function. */
a0d0e21e 3855void
864dbfa3 3856Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3857{
97aff369 3858 dVAR;
e0c19803 3859 SV *datasv;
24801a4b 3860
7918f24d
NC
3861 PERL_ARGS_ASSERT_FILTER_DEL;
3862
33073adb 3863#ifdef DEBUGGING
55662e27
JH
3864 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3865 FPTR2DPTR(void*, funcp)));
33073adb 3866#endif
5486870f 3867 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3868 return;
3869 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3870 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3871 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3872 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3873
16d20bd9
AD
3874 return;
3875 }
3876 /* we need to search for the correct entry and clear it */
cea2e8a9 3877 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3878}
3879
3880
1de9afcd
RGS
3881/* Invoke the idxth filter function for the current rsfp. */
3882/* maxlen 0 = read one text line */
16d20bd9 3883I32
864dbfa3 3884Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3885{
97aff369 3886 dVAR;
16d20bd9
AD
3887 filter_t funcp;
3888 SV *datasv = NULL;
f482118e
NC
3889 /* This API is bad. It should have been using unsigned int for maxlen.
3890 Not sure if we want to change the API, but if not we should sanity
3891 check the value here. */
39cd7a59
NC
3892 const unsigned int correct_length
3893 = maxlen < 0 ?
3894#ifdef PERL_MICRO
3895 0x7FFFFFFF
3896#else
3897 INT_MAX
3898#endif
3899 : maxlen;
e50aee73 3900
7918f24d
NC
3901 PERL_ARGS_ASSERT_FILTER_READ;
3902
5486870f 3903 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3904 return -1;
1de9afcd 3905 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3906 /* Provide a default input filter to make life easy. */
3907 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3908 DEBUG_P(PerlIO_printf(Perl_debug_log,
3909 "filter_read %d: from rsfp\n", idx));
f482118e 3910 if (correct_length) {
16d20bd9
AD
3911 /* Want a block */
3912 int len ;
f54cb97a 3913 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3914
3915 /* ensure buf_sv is large enough */
881d8f0a 3916 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3917 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3918 correct_length)) <= 0) {
3280af22 3919 if (PerlIO_error(PL_rsfp))
37120919
AD
3920 return -1; /* error */
3921 else
3922 return 0 ; /* end of file */
3923 }
16d20bd9 3924 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3925 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3926 } else {
3927 /* Want a line */
3280af22
NIS
3928 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3929 if (PerlIO_error(PL_rsfp))
37120919
AD
3930 return -1; /* error */
3931 else
3932 return 0 ; /* end of file */
3933 }
16d20bd9
AD
3934 }
3935 return SvCUR(buf_sv);
3936 }
3937 /* Skip this filter slot if filter has been deleted */
1de9afcd 3938 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3939 DEBUG_P(PerlIO_printf(Perl_debug_log,
3940 "filter_read %d: skipped (filter deleted)\n",
3941 idx));
f482118e 3942 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3943 }
3944 /* Get function pointer hidden within datasv */
8141890a 3945 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3946 DEBUG_P(PerlIO_printf(Perl_debug_log,
3947 "filter_read %d: via function %p (%s)\n",
ca0270c4 3948 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3949 /* Call function. The function is expected to */
3950 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3951 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3952 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3953}
3954
76e3520e 3955STATIC char *
5cc814fd 3956S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3957{
97aff369 3958 dVAR;
7918f24d
NC
3959
3960 PERL_ARGS_ASSERT_FILTER_GETS;
3961
c39cd008 3962#ifdef PERL_CR_FILTER
3280af22 3963 if (!PL_rsfp_filters) {
c39cd008 3964 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3965 }
3966#endif
3280af22 3967 if (PL_rsfp_filters) {
55497cff 3968 if (!append)
3969 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3970 if (FILTER_READ(0, sv, 0) > 0)
3971 return ( SvPVX(sv) ) ;
3972 else
bd61b366 3973 return NULL ;
16d20bd9 3974 }
9d116dd7 3975 else
5cc814fd 3976 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3977}
3978
01ec43d0 3979STATIC HV *
9bde8eb0 3980S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3981{
97aff369 3982 dVAR;
def3634b
GS
3983 GV *gv;
3984
7918f24d
NC
3985 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3986
01ec43d0 3987 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3988 return PL_curstash;
3989
3990 if (len > 2 &&
3991 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3992 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3993 {
3994 return GvHV(gv); /* Foo:: */
def3634b
GS
3995 }
3996
3997 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3998 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3999 if (gv && GvCV(gv)) {
4000 SV * const sv = cv_const_sv(GvCV(gv));
4001 if (sv)
9bde8eb0 4002 pkgname = SvPV_const(sv, len);
def3634b
GS
4003 }
4004
9bde8eb0 4005 return gv_stashpvn(pkgname, len, 0);
def3634b 4006}
a0d0e21e 4007
e3f73d4e
RGS
4008/*
4009 * S_readpipe_override
486ec47a 4010 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4011 * optree, provided sublex_start() is called afterwards.
4012 */
4013STATIC void
1d51329b 4014S_readpipe_override(pTHX)
e3f73d4e
RGS
4015{
4016 GV **gvp;
4017 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4018 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4019 if ((gv_readpipe
4020 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4021 ||
4022 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4023 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4024 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4025 {
4026 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4027 op_append_elem(OP_LIST,
e3f73d4e
RGS
4028 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4029 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4030 }
e3f73d4e
RGS
4031}
4032
5db06880
NC
4033#ifdef PERL_MAD
4034 /*
4035 * Perl_madlex
4036 * The intent of this yylex wrapper is to minimize the changes to the
4037 * tokener when we aren't interested in collecting madprops. It remains
4038 * to be seen how successful this strategy will be...
4039 */
4040
4041int
4042Perl_madlex(pTHX)
4043{
4044 int optype;
4045 char *s = PL_bufptr;
4046
cd81e915
NC
4047 /* make sure PL_thiswhite is initialized */
4048 PL_thiswhite = 0;
4049 PL_thismad = 0;
5db06880 4050
cd81e915 4051 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4052 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4053 return S_pending_ident(aTHX);
4054
4055 /* previous token ate up our whitespace? */
cd81e915
NC
4056 if (!PL_lasttoke && PL_nextwhite) {
4057 PL_thiswhite = PL_nextwhite;
4058 PL_nextwhite = 0;
5db06880
NC
4059 }
4060
4061 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4062 PL_realtokenstart = -1;
4063 PL_thistoken = 0;
5db06880
NC
4064 optype = yylex();
4065 s = PL_bufptr;
cd81e915 4066 assert(PL_curforce < 0);
5db06880 4067
cd81e915
NC
4068 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4069 if (!PL_thistoken) {
4070 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4071 PL_thistoken = newSVpvs("");
5db06880 4072 else {
c35e046a 4073 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4074 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4075 }
4076 }
cd81e915
NC
4077 if (PL_thismad) /* install head */
4078 CURMAD('X', PL_thistoken);
5db06880
NC
4079 }
4080
4081 /* last whitespace of a sublex? */
cd81e915
NC
4082 if (optype == ')' && PL_endwhite) {
4083 CURMAD('X', PL_endwhite);
5db06880
NC
4084 }
4085
cd81e915 4086 if (!PL_thismad) {
5db06880
NC
4087
4088 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4089 if (!PL_thiswhite && !PL_endwhite && !optype) {
4090 sv_free(PL_thistoken);
4091 PL_thistoken = 0;
5db06880
NC
4092 return 0;
4093 }
4094
4095 /* put off final whitespace till peg */
4096 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4097 PL_nextwhite = PL_thiswhite;
4098 PL_thiswhite = 0;
5db06880 4099 }
cd81e915
NC
4100 else if (PL_thisopen) {
4101 CURMAD('q', PL_thisopen);
4102 if (PL_thistoken)
4103 sv_free(PL_thistoken);
4104 PL_thistoken = 0;
5db06880
NC
4105 }
4106 else {
4107 /* Store actual token text as madprop X */
cd81e915 4108 CURMAD('X', PL_thistoken);
5db06880
NC
4109 }
4110
cd81e915 4111 if (PL_thiswhite) {
5db06880 4112 /* add preceding whitespace as madprop _ */
cd81e915 4113 CURMAD('_', PL_thiswhite);
5db06880
NC
4114 }
4115
cd81e915 4116 if (PL_thisstuff) {
5db06880 4117 /* add quoted material as madprop = */
cd81e915 4118 CURMAD('=', PL_thisstuff);
5db06880
NC
4119 }
4120
cd81e915 4121 if (PL_thisclose) {
5db06880 4122 /* add terminating quote as madprop Q */
cd81e915 4123 CURMAD('Q', PL_thisclose);
5db06880
NC
4124 }
4125 }
4126
4127 /* special processing based on optype */
4128
4129 switch (optype) {
4130
4131 /* opval doesn't need a TOKEN since it can already store mp */
4132 case WORD:
4133 case METHOD:
4134 case FUNCMETH:
4135 case THING:
4136 case PMFUNC:
4137 case PRIVATEREF:
4138 case FUNC0SUB:
4139 case UNIOPSUB:
4140 case LSTOPSUB:
6154021b
RGS
4141 if (pl_yylval.opval)
4142 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4143 PL_thismad = 0;
5db06880
NC
4144 return optype;
4145
4146 /* fake EOF */
4147 case 0:
4148 optype = PEG;
cd81e915
NC
4149 if (PL_endwhite) {
4150 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4151 PL_endwhite = 0;
5db06880
NC
4152 }
4153 break;
4154
4155 case ']':
4156 case '}':
cd81e915 4157 if (PL_faketokens)
5db06880
NC
4158 break;
4159 /* remember any fake bracket that lexer is about to discard */
4160 if (PL_lex_brackets == 1 &&
4161 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4162 {
4163 s = PL_bufptr;
4164 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4165 s++;
4166 if (*s == '}') {
cd81e915
NC
4167 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4168 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4169 PL_thiswhite = 0;
5db06880
NC
4170 PL_bufptr = s - 1;
4171 break; /* don't bother looking for trailing comment */
4172 }
4173 else
4174 s = PL_bufptr;
4175 }
4176 if (optype == ']')
4177 break;
4178 /* FALLTHROUGH */
4179
4180 /* attach a trailing comment to its statement instead of next token */
4181 case ';':
cd81e915 4182 if (PL_faketokens)
5db06880
NC
4183 break;
4184 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4185 s = PL_bufptr;
4186 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4187 s++;
4188 if (*s == '\n' || *s == '#') {
4189 while (s < PL_bufend && *s != '\n')
4190 s++;
4191 if (s < PL_bufend)
4192 s++;
cd81e915
NC
4193 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4194 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4195 PL_thiswhite = 0;
5db06880
NC
4196 PL_bufptr = s;
4197 }
4198 }
4199 break;
4200
4201 /* pval */
4202 case LABEL:
4203 break;
4204
4205 /* ival */
4206 default:
4207 break;
4208
4209 }
4210
4211 /* Create new token struct. Note: opvals return early above. */
6154021b 4212 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4213 PL_thismad = 0;
5db06880
NC
4214 return optype;
4215}
4216#endif
4217
468aa647 4218STATIC char *
cc6ed77d 4219S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4220 dVAR;
7918f24d
NC
4221
4222 PERL_ARGS_ASSERT_TOKENIZE_USE;
4223
468aa647
RGS
4224 if (PL_expect != XSTATE)
4225 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4226 is_use ? "use" : "no"));
29595ff2 4227 s = SKIPSPACE1(s);
468aa647
RGS
4228 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4229 s = force_version(s, TRUE);
17c59fdf
VP
4230 if (*s == ';' || *s == '}'
4231 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4232 start_force(PL_curforce);
9ded7720 4233 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4234 force_next(WORD);
4235 }
4236 else if (*s == 'v') {
4237 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4238 s = force_version(s, FALSE);
4239 }
4240 }
4241 else {
4242 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4243 s = force_version(s, FALSE);
4244 }
6154021b 4245 pl_yylval.ival = is_use;
468aa647
RGS
4246 return s;
4247}
748a9306 4248#ifdef DEBUGGING
27da23d5 4249 static const char* const exp_name[] =
09bef843 4250 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4251 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4252 };
748a9306 4253#endif
463ee0b2 4254
361d9b55
Z
4255#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4256STATIC bool
4257S_word_takes_any_delimeter(char *p, STRLEN len)
4258{
4259 return (len == 1 && strchr("msyq", p[0])) ||
4260 (len == 2 && (
4261 (p[0] == 't' && p[1] == 'r') ||
4262 (p[0] == 'q' && strchr("qwxr", p[1]))));
4263}
4264
02aa26ce
NT
4265/*
4266 yylex
4267
4268 Works out what to call the token just pulled out of the input
4269 stream. The yacc parser takes care of taking the ops we return and
4270 stitching them into a tree.
4271
4272 Returns:
4273 PRIVATEREF
4274
4275 Structure:
4276 if read an identifier
4277 if we're in a my declaration
4278 croak if they tried to say my($foo::bar)
4279 build the ops for a my() declaration
4280 if it's an access to a my() variable
4281 are we in a sort block?
4282 croak if my($a); $a <=> $b
4283 build ops for access to a my() variable
4284 if in a dq string, and they've said @foo and we can't find @foo
4285 croak
4286 build ops for a bareword
4287 if we already built the token before, use it.
4288*/
4289
20141f0e 4290
dba4d153
JH
4291#ifdef __SC__
4292#pragma segment Perl_yylex
4293#endif
dba4d153 4294int
dba4d153 4295Perl_yylex(pTHX)
20141f0e 4296{
97aff369 4297 dVAR;
3afc138a 4298 register char *s = PL_bufptr;
378cc40b 4299 register char *d;
463ee0b2 4300 STRLEN len;
aa7440fb 4301 bool bof = FALSE;
580561a3 4302 U32 fake_eof = 0;
a687059c 4303
10edeb5d
JH
4304 /* orig_keyword, gvp, and gv are initialized here because
4305 * jump to the label just_a_word_zero can bypass their
4306 * initialization later. */
4307 I32 orig_keyword = 0;
4308 GV *gv = NULL;
4309 GV **gvp = NULL;
4310
bbf60fe6 4311 DEBUG_T( {
396482e1 4312 SV* tmp = newSVpvs("");
b6007c36
DM
4313 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4314 (IV)CopLINE(PL_curcop),
4315 lex_state_names[PL_lex_state],
4316 exp_name[PL_expect],
4317 pv_display(tmp, s, strlen(s), 0, 60));
4318 SvREFCNT_dec(tmp);
bbf60fe6 4319 } );
02aa26ce 4320 /* check if there's an identifier for us to look at */
28ac2b49 4321 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4322 return REPORT(S_pending_ident(aTHX));
bbce6d69 4323
02aa26ce
NT
4324 /* no identifier pending identification */
4325
3280af22 4326 switch (PL_lex_state) {
79072805
LW
4327#ifdef COMMENTARY
4328 case LEX_NORMAL: /* Some compilers will produce faster */
4329 case LEX_INTERPNORMAL: /* code if we comment these out. */
4330 break;
4331#endif
4332
09bef843 4333 /* when we've already built the next token, just pull it out of the queue */
79072805 4334 case LEX_KNOWNEXT:
5db06880
NC
4335#ifdef PERL_MAD
4336 PL_lasttoke--;
6154021b 4337 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4338 if (PL_madskills) {
cd81e915 4339 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4340 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4341 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4342 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4343 PL_thismad->mad_val = 0;
4344 mad_free(PL_thismad);
4345 PL_thismad = 0;
5db06880
NC
4346 }
4347 }
4348 if (!PL_lasttoke) {
4349 PL_lex_state = PL_lex_defer;
4350 PL_expect = PL_lex_expect;
4351 PL_lex_defer = LEX_NORMAL;
4352 if (!PL_nexttoke[PL_lasttoke].next_type)
4353 return yylex();
4354 }
4355#else
3280af22 4356 PL_nexttoke--;
6154021b 4357 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4358 if (!PL_nexttoke) {
4359 PL_lex_state = PL_lex_defer;
4360 PL_expect = PL_lex_expect;
4361 PL_lex_defer = LEX_NORMAL;
463ee0b2 4362 }
5db06880 4363#endif
a7aaec61
Z
4364 {
4365 I32 next_type;
5db06880 4366#ifdef PERL_MAD
a7aaec61 4367 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4368#else
a7aaec61 4369 next_type = PL_nexttype[PL_nexttoke];
5db06880 4370#endif
78cdf107
Z
4371 if (next_type & (7<<24)) {
4372 if (next_type & (1<<24)) {
4373 if (PL_lex_brackets > 100)
4374 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4375 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4376 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4377 }
4378 if (next_type & (2<<24))
4379 PL_lex_allbrackets++;
4380 if (next_type & (4<<24))
4381 PL_lex_allbrackets--;
a7aaec61
Z
4382 next_type &= 0xffff;
4383 }
4384#ifdef PERL_MAD
4385 /* FIXME - can these be merged? */
4386 return next_type;
4387#else
4388 return REPORT(next_type);
4389#endif
4390 }
79072805 4391
02aa26ce 4392 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4393 when we get here, PL_bufptr is at the \
02aa26ce 4394 */
79072805
LW
4395 case LEX_INTERPCASEMOD:
4396#ifdef DEBUGGING
3280af22 4397 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4398 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4399#endif
02aa26ce 4400 /* handle \E or end of string */
3280af22 4401 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4402 /* if at a \E */
3280af22 4403 if (PL_lex_casemods) {
f54cb97a 4404 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4405 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4406
3792a11b
NC
4407 if (PL_bufptr != PL_bufend
4408 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4409 PL_bufptr += 2;
4410 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4411#ifdef PERL_MAD
4412 if (PL_madskills)
6b29d1f5 4413 PL_thistoken = newSVpvs("\\E");
5db06880 4414#endif
a0d0e21e 4415 }
78cdf107 4416 PL_lex_allbrackets--;
bbf60fe6 4417 return REPORT(')');
79072805 4418 }
5db06880
NC
4419#ifdef PERL_MAD
4420 while (PL_bufptr != PL_bufend &&
4421 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4422 if (!PL_thiswhite)
6b29d1f5 4423 PL_thiswhite = newSVpvs("");
cd81e915 4424 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4425 PL_bufptr += 2;
4426 }
4427#else
3280af22
NIS
4428 if (PL_bufptr != PL_bufend)
4429 PL_bufptr += 2;
5db06880 4430#endif
3280af22 4431 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4432 return yylex();
79072805
LW
4433 }
4434 else {
607df283 4435 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4436 "### Saw case modifier\n"); });
3280af22 4437 s = PL_bufptr + 1;
6e909404 4438 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4439#ifdef PERL_MAD
cd81e915 4440 if (!PL_thiswhite)
6b29d1f5 4441 PL_thiswhite = newSVpvs("");
cd81e915 4442 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4443#endif
89122651 4444 PL_bufptr = s + 3;
6e909404
JH
4445 PL_lex_state = LEX_INTERPCONCAT;
4446 return yylex();
a0d0e21e 4447 }
6e909404 4448 else {
90771dc0 4449 I32 tmp;
5db06880
NC
4450 if (!PL_madskills) /* when just compiling don't need correct */
4451 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4452 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4453 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4454 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4455 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4456 PL_lex_allbrackets--;
bbf60fe6 4457 return REPORT(')');
6e909404
JH
4458 }
4459 if (PL_lex_casemods > 10)
4460 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4461 PL_lex_casestack[PL_lex_casemods++] = *s;
4462 PL_lex_casestack[PL_lex_casemods] = '\0';
4463 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4464 start_force(PL_curforce);
9ded7720 4465 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4466 force_next((2<<24)|'(');
cd81e915 4467 start_force(PL_curforce);
6e909404 4468 if (*s == 'l')
9ded7720 4469 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4470 else if (*s == 'u')
9ded7720 4471 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4472 else if (*s == 'L')
9ded7720 4473 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4474 else if (*s == 'U')
9ded7720 4475 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4476 else if (*s == 'Q')
9ded7720 4477 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4478 else
4479 Perl_croak(aTHX_ "panic: yylex");
5db06880 4480 if (PL_madskills) {
a5849ce5
NC
4481 SV* const tmpsv = newSVpvs("\\ ");
4482 /* replace the space with the character we want to escape
4483 */
4484 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4485 curmad('_', tmpsv);
4486 }
6e909404 4487 PL_bufptr = s + 1;
a0d0e21e 4488 }
79072805 4489 force_next(FUNC);
3280af22
NIS
4490 if (PL_lex_starts) {
4491 s = PL_bufptr;
4492 PL_lex_starts = 0;
5db06880
NC
4493#ifdef PERL_MAD
4494 if (PL_madskills) {
cd81e915
NC
4495 if (PL_thistoken)
4496 sv_free(PL_thistoken);
6b29d1f5 4497 PL_thistoken = newSVpvs("");
5db06880
NC
4498 }
4499#endif
131b3ad0
DM
4500 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4501 if (PL_lex_casemods == 1 && PL_lex_inpat)
4502 OPERATOR(',');
4503 else
4504 Aop(OP_CONCAT);
79072805
LW
4505 }
4506 else
cea2e8a9 4507 return yylex();
79072805
LW
4508 }
4509
55497cff 4510 case LEX_INTERPPUSH:
bbf60fe6 4511 return REPORT(sublex_push());
55497cff 4512
79072805 4513 case LEX_INTERPSTART:
3280af22 4514 if (PL_bufptr == PL_bufend)
bbf60fe6 4515 return REPORT(sublex_done());
607df283 4516 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4517 "### Interpolated variable\n"); });
3280af22
NIS
4518 PL_expect = XTERM;
4519 PL_lex_dojoin = (*PL_bufptr == '@');
4520 PL_lex_state = LEX_INTERPNORMAL;
4521 if (PL_lex_dojoin) {
cd81e915 4522 start_force(PL_curforce);
9ded7720 4523 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4524 force_next(',');
cd81e915 4525 start_force(PL_curforce);
a0d0e21e 4526 force_ident("\"", '$');
cd81e915 4527 start_force(PL_curforce);
9ded7720 4528 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4529 force_next('$');
cd81e915 4530 start_force(PL_curforce);
9ded7720 4531 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4532 force_next((2<<24)|'(');
cd81e915 4533 start_force(PL_curforce);
9ded7720 4534 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4535 force_next(FUNC);
4536 }
3280af22
NIS
4537 if (PL_lex_starts++) {
4538 s = PL_bufptr;
5db06880
NC
4539#ifdef PERL_MAD
4540 if (PL_madskills) {
cd81e915
NC
4541 if (PL_thistoken)
4542 sv_free(PL_thistoken);
6b29d1f5 4543 PL_thistoken = newSVpvs("");
5db06880
NC
4544 }
4545#endif
131b3ad0
DM
4546 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4547 if (!PL_lex_casemods && PL_lex_inpat)
4548 OPERATOR(',');
4549 else
4550 Aop(OP_CONCAT);
79072805 4551 }
cea2e8a9 4552 return yylex();
79072805
LW
4553
4554 case LEX_INTERPENDMAYBE:
3280af22
NIS
4555 if (intuit_more(PL_bufptr)) {
4556 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4557 break;
4558 }
4559 /* FALL THROUGH */
4560
4561 case LEX_INTERPEND:
3280af22
NIS
4562 if (PL_lex_dojoin) {
4563 PL_lex_dojoin = FALSE;
4564 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4565#ifdef PERL_MAD
4566 if (PL_madskills) {
cd81e915
NC
4567 if (PL_thistoken)
4568 sv_free(PL_thistoken);
6b29d1f5 4569 PL_thistoken = newSVpvs("");
5db06880
NC
4570 }
4571#endif
78cdf107 4572 PL_lex_allbrackets--;
bbf60fe6 4573 return REPORT(')');
79072805 4574 }
43a16006 4575 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4576 && SvEVALED(PL_lex_repl))
43a16006 4577 {
e9fa98b2 4578 if (PL_bufptr != PL_bufend)
cea2e8a9 4579 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4580 PL_lex_repl = NULL;
e9fa98b2 4581 }
79072805
LW
4582 /* FALLTHROUGH */
4583 case LEX_INTERPCONCAT:
4584#ifdef DEBUGGING
3280af22 4585 if (PL_lex_brackets)
cea2e8a9 4586 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4587#endif
3280af22 4588 if (PL_bufptr == PL_bufend)
bbf60fe6 4589 return REPORT(sublex_done());
79072805 4590
3280af22
NIS
4591 if (SvIVX(PL_linestr) == '\'') {
4592 SV *sv = newSVsv(PL_linestr);
4593 if (!PL_lex_inpat)
76e3520e 4594 sv = tokeq(sv);
3280af22 4595 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4596 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4597 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4598 s = PL_bufend;
79072805
LW
4599 }
4600 else {
3280af22 4601 s = scan_const(PL_bufptr);
79072805 4602 if (*s == '\\')
3280af22 4603 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4604 else
3280af22 4605 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4606 }
4607
3280af22 4608 if (s != PL_bufptr) {
cd81e915 4609 start_force(PL_curforce);
5db06880
NC
4610 if (PL_madskills) {
4611 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4612 }
6154021b 4613 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4614 PL_expect = XTERM;
79072805 4615 force_next(THING);
131b3ad0 4616 if (PL_lex_starts++) {
5db06880
NC
4617#ifdef PERL_MAD
4618 if (PL_madskills) {
cd81e915
NC
4619 if (PL_thistoken)
4620 sv_free(PL_thistoken);
6b29d1f5 4621 PL_thistoken = newSVpvs("");
5db06880
NC
4622 }
4623#endif
131b3ad0
DM
4624 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4625 if (!PL_lex_casemods && PL_lex_inpat)
4626 OPERATOR(',');
4627 else
4628 Aop(OP_CONCAT);
4629 }
79072805 4630 else {
3280af22 4631 PL_bufptr = s;
cea2e8a9 4632 return yylex();
79072805
LW
4633 }
4634 }
4635
cea2e8a9 4636 return yylex();
a0d0e21e 4637 case LEX_FORMLINE:
3280af22
NIS
4638 PL_lex_state = LEX_NORMAL;
4639 s = scan_formline(PL_bufptr);
4640 if (!PL_lex_formbrack)
a0d0e21e
LW
4641 goto rightbracket;
4642 OPERATOR(';');
79072805
LW
4643 }
4644
3280af22
NIS
4645 s = PL_bufptr;
4646 PL_oldoldbufptr = PL_oldbufptr;
4647 PL_oldbufptr = s;
463ee0b2
LW
4648
4649 retry:
5db06880 4650#ifdef PERL_MAD
cd81e915
NC
4651 if (PL_thistoken) {
4652 sv_free(PL_thistoken);
4653 PL_thistoken = 0;
5db06880 4654 }
cd81e915 4655 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4656#endif
378cc40b
LW
4657 switch (*s) {
4658 default:
7e2040f0 4659 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4660 goto keylookup;
b1fc3636
CJ
4661 {
4662 unsigned char c = *s;
4663 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4664 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4665 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4666 } else {
4667 d = PL_linestart;
4668 }
4669 *s = '\0';
4670 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4671 }
e929a76b
LW
4672 case 4:
4673 case 26:
4674 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4675 case 0:
5db06880
NC
4676#ifdef PERL_MAD
4677 if (PL_madskills)
cd81e915 4678 PL_faketokens = 0;
5db06880 4679#endif
3280af22
NIS
4680 if (!PL_rsfp) {
4681 PL_last_uni = 0;
4682 PL_last_lop = 0;
a7aaec61
Z
4683 if (PL_lex_brackets &&
4684 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4685 yyerror((const char *)
4686 (PL_lex_formbrack
4687 ? "Format not terminated"
4688 : "Missing right curly or square bracket"));
c5ee2135 4689 }
4e553d73 4690 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4691 "### Tokener got EOF\n");
5f80b19c 4692 } );
79072805 4693 TOKEN(0);
463ee0b2 4694 }
3280af22 4695 if (s++ < PL_bufend)
a687059c 4696 goto retry; /* ignore stray nulls */
3280af22
NIS
4697 PL_last_uni = 0;
4698 PL_last_lop = 0;
4699 if (!PL_in_eval && !PL_preambled) {
4700 PL_preambled = TRUE;
5db06880
NC
4701#ifdef PERL_MAD
4702 if (PL_madskills)
cd81e915 4703 PL_faketokens = 1;
5db06880 4704#endif
5ab7ff98
NC
4705 if (PL_perldb) {
4706 /* Generate a string of Perl code to load the debugger.
4707 * If PERL5DB is set, it will return the contents of that,
4708 * otherwise a compile-time require of perl5db.pl. */
4709
4710 const char * const pdb = PerlEnv_getenv("PERL5DB");
4711
4712 if (pdb) {
4713 sv_setpv(PL_linestr, pdb);
4714 sv_catpvs(PL_linestr,";");
4715 } else {
4716 SETERRNO(0,SS_NORMAL);
4717 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4718 }
4719 } else
4720 sv_setpvs(PL_linestr,"");
c62eb204
NC
4721 if (PL_preambleav) {
4722 SV **svp = AvARRAY(PL_preambleav);
4723 SV **const end = svp + AvFILLp(PL_preambleav);
4724 while(svp <= end) {
4725 sv_catsv(PL_linestr, *svp);
4726 ++svp;
396482e1 4727 sv_catpvs(PL_linestr, ";");
91b7def8 4728 }
daba3364 4729 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4730 PL_preambleav = NULL;
91b7def8 4731 }
9f639728
FR
4732 if (PL_minus_E)
4733 sv_catpvs(PL_linestr,
4734 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4735 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4736 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4737 if (PL_minus_l)
396482e1 4738 sv_catpvs(PL_linestr,"chomp;");
3280af22 4739 if (PL_minus_a) {
3280af22 4740 if (PL_minus_F) {
3792a11b
NC
4741 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4742 || *PL_splitstr == '"')
3280af22 4743 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4744 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4745 else {
c8ef6a4b
NC
4746 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4747 bytes can be used as quoting characters. :-) */
dd374669 4748 const char *splits = PL_splitstr;
91d456ae 4749 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4750 do {
4751 /* Need to \ \s */
dd374669
AL
4752 if (*splits == '\\')
4753 sv_catpvn(PL_linestr, splits, 1);
4754 sv_catpvn(PL_linestr, splits, 1);
4755 } while (*splits++);
48c4c863
NC
4756 /* This loop will embed the trailing NUL of
4757 PL_linestr as the last thing it does before
4758 terminating. */
396482e1 4759 sv_catpvs(PL_linestr, ");");
54310121 4760 }
2304df62
AD
4761 }
4762 else
396482e1 4763 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4764 }
79072805 4765 }
396482e1 4766 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4767 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4768 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4769 PL_last_lop = PL_last_uni = NULL;
65269a95 4770 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4771 update_debugger_info(PL_linestr, NULL, 0);
79072805 4772 goto retry;
a687059c 4773 }
e929a76b 4774 do {
580561a3
Z
4775 fake_eof = 0;
4776 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4777 if (0) {
7e28d3af 4778 fake_eof:
f0e67a1d
Z
4779 fake_eof = LEX_FAKE_EOF;
4780 }
4781 PL_bufptr = PL_bufend;
17cc9359 4782 CopLINE_inc(PL_curcop);
f0e67a1d 4783 if (!lex_next_chunk(fake_eof)) {
17cc9359 4784 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4785 s = PL_bufptr;
4786 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4787 }
17cc9359 4788 CopLINE_dec(PL_curcop);
5db06880 4789#ifdef PERL_MAD
f0e67a1d 4790 if (!PL_rsfp)
cd81e915 4791 PL_realtokenstart = -1;
5db06880 4792#endif
f0e67a1d 4793 s = PL_bufptr;
7aa207d6
JH
4794 /* If it looks like the start of a BOM or raw UTF-16,
4795 * check if it in fact is. */
580561a3 4796 if (bof && PL_rsfp &&
7aa207d6
JH
4797 (*s == 0 ||
4798 *(U8*)s == 0xEF ||
4799 *(U8*)s >= 0xFE ||
4800 s[1] == 0)) {
879bc93b
DM
4801 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4802 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4803#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4804 /* offset may include swallowed CR */
4805 if (!bof)
879bc93b 4806 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4807#endif
7e28d3af 4808 if (bof) {
3280af22 4809 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4810 s = swallow_bom((U8*)s);
e929a76b 4811 }
378cc40b 4812 }
737c24fc 4813 if (PL_parser->in_pod) {
a0d0e21e 4814 /* Incest with pod. */
5db06880
NC
4815#ifdef PERL_MAD
4816 if (PL_madskills)
cd81e915 4817 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4818#endif
01a57ef7 4819 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4820 sv_setpvs(PL_linestr, "");
3280af22
NIS
4821 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4822 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4823 PL_last_lop = PL_last_uni = NULL;
737c24fc 4824 PL_parser->in_pod = 0;
a0d0e21e 4825 }
4e553d73 4826 }
85613cab
Z
4827 if (PL_rsfp)
4828 incline(s);
737c24fc 4829 } while (PL_parser->in_pod);
3280af22 4830 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4831 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4832 PL_last_lop = PL_last_uni = NULL;
57843af0 4833 if (CopLINE(PL_curcop) == 1) {
3280af22 4834 while (s < PL_bufend && isSPACE(*s))
79072805 4835 s++;
a0d0e21e 4836 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4837 s++;
5db06880
NC
4838#ifdef PERL_MAD
4839 if (PL_madskills)
cd81e915 4840 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4841#endif
bd61b366 4842 d = NULL;
3280af22 4843 if (!PL_in_eval) {
44a8e56a 4844 if (*s == '#' && *(s+1) == '!')
4845 d = s + 2;
4846#ifdef ALTERNATE_SHEBANG
4847 else {
bfed75c6 4848 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4849 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4850 d = s + (sizeof(as) - 1);
4851 }
4852#endif /* ALTERNATE_SHEBANG */
4853 }
4854 if (d) {
b8378b72 4855 char *ipath;
774d564b 4856 char *ipathend;
b8378b72 4857
774d564b 4858 while (isSPACE(*d))
b8378b72
CS
4859 d++;
4860 ipath = d;
774d564b 4861 while (*d && !isSPACE(*d))
4862 d++;
4863 ipathend = d;
4864
4865#ifdef ARG_ZERO_IS_SCRIPT
4866 if (ipathend > ipath) {
4867 /*
4868 * HP-UX (at least) sets argv[0] to the script name,
4869 * which makes $^X incorrect. And Digital UNIX and Linux,
4870 * at least, set argv[0] to the basename of the Perl
4871 * interpreter. So, having found "#!", we'll set it right.
4872 */
fafc274c
NC
4873 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4874 SVt_PV)); /* $^X */
774d564b 4875 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4876 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4877 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4878 SvSETMAGIC(x);
4879 }
556c1dec
JH
4880 else {
4881 STRLEN blen;
4882 STRLEN llen;
cfd0369c 4883 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4884 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4885 if (llen < blen) {
4886 bstart += blen - llen;
4887 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4888 sv_setpvn(x, ipath, ipathend - ipath);
4889 SvSETMAGIC(x);
4890 }
4891 }
4892 }
774d564b 4893 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4894 }
774d564b 4895#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4896
4897 /*
4898 * Look for options.
4899 */
748a9306 4900 d = instr(s,"perl -");
84e30d1a 4901 if (!d) {
748a9306 4902 d = instr(s,"perl");
84e30d1a
GS
4903#if defined(DOSISH)
4904 /* avoid getting into infinite loops when shebang
4905 * line contains "Perl" rather than "perl" */
4906 if (!d) {
4907 for (d = ipathend-4; d >= ipath; --d) {
4908 if ((*d == 'p' || *d == 'P')
4909 && !ibcmp(d, "perl", 4))
4910 {
4911 break;
4912 }
4913 }
4914 if (d < ipath)
bd61b366 4915 d = NULL;
84e30d1a
GS
4916 }
4917#endif
4918 }
44a8e56a 4919#ifdef ALTERNATE_SHEBANG
4920 /*
4921 * If the ALTERNATE_SHEBANG on this system starts with a
4922 * character that can be part of a Perl expression, then if
4923 * we see it but not "perl", we're probably looking at the
4924 * start of Perl code, not a request to hand off to some
4925 * other interpreter. Similarly, if "perl" is there, but
4926 * not in the first 'word' of the line, we assume the line
4927 * contains the start of the Perl program.
44a8e56a 4928 */
4929 if (d && *s != '#') {
f54cb97a 4930 const char *c = ipath;
44a8e56a 4931 while (*c && !strchr("; \t\r\n\f\v#", *c))
4932 c++;
4933 if (c < d)
bd61b366 4934 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4935 else
4936 *s = '#'; /* Don't try to parse shebang line */
4937 }
774d564b 4938#endif /* ALTERNATE_SHEBANG */
748a9306 4939 if (!d &&
44a8e56a 4940 *s == '#' &&
774d564b 4941 ipathend > ipath &&
3280af22 4942 !PL_minus_c &&
748a9306 4943 !instr(s,"indir") &&
3280af22 4944 instr(PL_origargv[0],"perl"))
748a9306 4945 {
27da23d5 4946 dVAR;
9f68db38 4947 char **newargv;
9f68db38 4948
774d564b 4949 *ipathend = '\0';
4950 s = ipathend + 1;
3280af22 4951 while (s < PL_bufend && isSPACE(*s))
9f68db38 4952 s++;
3280af22 4953 if (s < PL_bufend) {
d85f917e 4954 Newx(newargv,PL_origargc+3,char*);
9f68db38 4955 newargv[1] = s;
3280af22 4956 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4957 s++;
4958 *s = '\0';
3280af22 4959 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4960 }
4961 else
3280af22 4962 newargv = PL_origargv;
774d564b 4963 newargv[0] = ipath;
b35112e7 4964 PERL_FPU_PRE_EXEC
b4748376 4965 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4966 PERL_FPU_POST_EXEC
cea2e8a9 4967 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4968 }
748a9306 4969 if (d) {
c35e046a
AL
4970 while (*d && !isSPACE(*d))
4971 d++;
4972 while (SPACE_OR_TAB(*d))
4973 d++;
748a9306
LW
4974
4975 if (*d++ == '-') {
f54cb97a 4976 const bool switches_done = PL_doswitches;
fb993905
GA
4977 const U32 oldpdb = PL_perldb;
4978 const bool oldn = PL_minus_n;
4979 const bool oldp = PL_minus_p;
c7030b81 4980 const char *d1 = d;
fb993905 4981
8cc95fdb 4982 do {
4ba71d51
FC
4983 bool baduni = FALSE;
4984 if (*d1 == 'C') {
bd0ab00d
NC
4985 const char *d2 = d1 + 1;
4986 if (parse_unicode_opts((const char **)&d2)
4987 != PL_unicode)
4988 baduni = TRUE;
4ba71d51
FC
4989 }
4990 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4991 const char * const m = d1;
4992 while (*d1 && !isSPACE(*d1))
4993 d1++;
cea2e8a9 4994 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4995 (int)(d1 - m), m);
8cc95fdb 4996 }
c7030b81
NC
4997 d1 = moreswitches(d1);
4998 } while (d1);
f0b2cf55
YST
4999 if (PL_doswitches && !switches_done) {
5000 int argc = PL_origargc;
5001 char **argv = PL_origargv;
5002 do {
5003 argc--,argv++;
5004 } while (argc && argv[0][0] == '-' && argv[0][1]);
5005 init_argv_symbols(argc,argv);
5006 }
65269a95 5007 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5008 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5009 /* if we have already added "LINE: while (<>) {",
5010 we must not do it again */
748a9306 5011 {
76f68e9b 5012 sv_setpvs(PL_linestr, "");
3280af22
NIS
5013 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5014 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5015 PL_last_lop = PL_last_uni = NULL;
3280af22 5016 PL_preambled = FALSE;
65269a95 5017 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5018 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5019 goto retry;
5020 }
a0d0e21e 5021 }
79072805 5022 }
9f68db38 5023 }
79072805 5024 }
3280af22
NIS
5025 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5026 PL_bufptr = s;
5027 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5028 return yylex();
ae986130 5029 }
378cc40b 5030 goto retry;
4fdae800 5031 case '\r':
6a27c188 5032#ifdef PERL_STRICT_CR
cea2e8a9 5033 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5034 Perl_croak(aTHX_
cc507455 5035 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5036#endif
4fdae800 5037 case ' ': case '\t': case '\f': case 013:
5db06880 5038#ifdef PERL_MAD
cd81e915 5039 PL_realtokenstart = -1;
ac372eb8
RD
5040 if (!PL_thiswhite)
5041 PL_thiswhite = newSVpvs("");
5042 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5043#endif
ac372eb8 5044 s++;
378cc40b 5045 goto retry;
378cc40b 5046 case '#':
e929a76b 5047 case '\n':
5db06880 5048#ifdef PERL_MAD
cd81e915 5049 PL_realtokenstart = -1;
5db06880 5050 if (PL_madskills)
cd81e915 5051 PL_faketokens = 0;
5db06880 5052#endif
3280af22 5053 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
5054 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5055 /* handle eval qq[#line 1 "foo"\n ...] */
5056 CopLINE_dec(PL_curcop);
5057 incline(s);
5058 }
5db06880
NC
5059 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5060 s = SKIPSPACE0(s);
5061 if (!PL_in_eval || PL_rsfp)
5062 incline(s);
5063 }
5064 else {
5065 d = s;
5066 while (d < PL_bufend && *d != '\n')
5067 d++;
5068 if (d < PL_bufend)
5069 d++;
5070 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5071 Perl_croak(aTHX_ "panic: input overflow");
5072#ifdef PERL_MAD
5073 if (PL_madskills)
cd81e915 5074 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5075#endif
5076 s = d;
5077 incline(s);
5078 }
3280af22
NIS
5079 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5080 PL_bufptr = s;
5081 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5082 return yylex();
a687059c 5083 }
378cc40b 5084 }
a687059c 5085 else {
5db06880
NC
5086#ifdef PERL_MAD
5087 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5088 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5089 PL_faketokens = 0;
5db06880
NC
5090 s = SKIPSPACE0(s);
5091 TOKEN(PEG); /* make sure any #! line is accessible */
5092 }
5093 s = SKIPSPACE0(s);
5094 }
5095 else {
5096/* if (PL_madskills && PL_lex_formbrack) { */
5097 d = s;
5098 while (d < PL_bufend && *d != '\n')
5099 d++;
5100 if (d < PL_bufend)
5101 d++;
5102 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5103 Perl_croak(aTHX_ "panic: input overflow");
5104 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5105 if (!PL_thiswhite)
6b29d1f5 5106 PL_thiswhite = newSVpvs("");
5db06880 5107 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5108 sv_setpvs(PL_thiswhite, "");
cd81e915 5109 PL_faketokens = 0;
5db06880 5110 }
cd81e915 5111 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5112 }
5113 s = d;
5114/* }
5115 *s = '\0';
5116 PL_bufend = s; */
5117 }
5118#else
378cc40b 5119 *s = '\0';
3280af22 5120 PL_bufend = s;
5db06880 5121#endif
a687059c 5122 }
378cc40b
LW
5123 goto retry;
5124 case '-':
79072805 5125 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5126 I32 ftst = 0;
90771dc0 5127 char tmp;
e5edeb50 5128
378cc40b 5129 s++;
3280af22 5130 PL_bufptr = s;
748a9306
LW
5131 tmp = *s++;
5132
bf4acbe4 5133 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5134 s++;
5135
5136 if (strnEQ(s,"=>",2)) {
3280af22 5137 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5138 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5139 OPERATOR('-'); /* unary minus */
5140 }
3280af22 5141 PL_last_uni = PL_oldbufptr;
748a9306 5142 switch (tmp) {
e5edeb50
JH
5143 case 'r': ftst = OP_FTEREAD; break;
5144 case 'w': ftst = OP_FTEWRITE; break;
5145 case 'x': ftst = OP_FTEEXEC; break;
5146 case 'o': ftst = OP_FTEOWNED; break;
5147 case 'R': ftst = OP_FTRREAD; break;
5148 case 'W': ftst = OP_FTRWRITE; break;
5149 case 'X': ftst = OP_FTREXEC; break;
5150 case 'O': ftst = OP_FTROWNED; break;
5151 case 'e': ftst = OP_FTIS; break;
5152 case 'z': ftst = OP_FTZERO; break;
5153 case 's': ftst = OP_FTSIZE; break;
5154 case 'f': ftst = OP_FTFILE; break;
5155 case 'd': ftst = OP_FTDIR; break;
5156 case 'l': ftst = OP_FTLINK; break;
5157 case 'p': ftst = OP_FTPIPE; break;
5158 case 'S': ftst = OP_FTSOCK; break;
5159 case 'u': ftst = OP_FTSUID; break;
5160 case 'g': ftst = OP_FTSGID; break;
5161 case 'k': ftst = OP_FTSVTX; break;
5162 case 'b': ftst = OP_FTBLK; break;
5163 case 'c': ftst = OP_FTCHR; break;
5164 case 't': ftst = OP_FTTTY; break;
5165 case 'T': ftst = OP_FTTEXT; break;
5166 case 'B': ftst = OP_FTBINARY; break;
5167 case 'M': case 'A': case 'C':
fafc274c 5168 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5169 switch (tmp) {
5170 case 'M': ftst = OP_FTMTIME; break;
5171 case 'A': ftst = OP_FTATIME; break;
5172 case 'C': ftst = OP_FTCTIME; break;
5173 default: break;
5174 }
5175 break;
378cc40b 5176 default:
378cc40b
LW
5177 break;
5178 }
e5edeb50 5179 if (ftst) {
eb160463 5180 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5181 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5182 "### Saw file test %c\n", (int)tmp);
5f80b19c 5183 } );
e5edeb50
JH
5184 FTST(ftst);
5185 }
5186 else {
5187 /* Assume it was a minus followed by a one-letter named
5188 * subroutine call (or a -bareword), then. */
95c31fe3 5189 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5190 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5191 (int) tmp);
5f80b19c 5192 } );
3cf7b4c4 5193 s = --PL_bufptr;
e5edeb50 5194 }
378cc40b 5195 }
90771dc0
NC
5196 {
5197 const char tmp = *s++;
5198 if (*s == tmp) {
5199 s++;
5200 if (PL_expect == XOPERATOR)
5201 TERM(POSTDEC);
5202 else
5203 OPERATOR(PREDEC);
5204 }
5205 else if (*s == '>') {
5206 s++;
29595ff2 5207 s = SKIPSPACE1(s);
90771dc0
NC
5208 if (isIDFIRST_lazy_if(s,UTF)) {
5209 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5210 TOKEN(ARROW);
5211 }
5212 else if (*s == '$')
5213 OPERATOR(ARROW);
5214 else
5215 TERM(ARROW);
5216 }
78cdf107
Z
5217 if (PL_expect == XOPERATOR) {
5218 if (*s == '=' && !PL_lex_allbrackets &&
5219 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5220 s--;
5221 TOKEN(0);
5222 }
90771dc0 5223 Aop(OP_SUBTRACT);
78cdf107 5224 }
90771dc0
NC
5225 else {
5226 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5227 check_uni();
5228 OPERATOR('-'); /* unary minus */
79072805 5229 }
2f3197b3 5230 }
79072805 5231
378cc40b 5232 case '+':
90771dc0
NC
5233 {
5234 const char tmp = *s++;
5235 if (*s == tmp) {
5236 s++;
5237 if (PL_expect == XOPERATOR)
5238 TERM(POSTINC);
5239 else
5240 OPERATOR(PREINC);
5241 }
78cdf107
Z
5242 if (PL_expect == XOPERATOR) {
5243 if (*s == '=' && !PL_lex_allbrackets &&
5244 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5245 s--;
5246 TOKEN(0);
5247 }
90771dc0 5248 Aop(OP_ADD);
78cdf107 5249 }
90771dc0
NC
5250 else {
5251 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5252 check_uni();
5253 OPERATOR('+');
5254 }
2f3197b3 5255 }
a687059c 5256
378cc40b 5257 case '*':
3280af22
NIS
5258 if (PL_expect != XOPERATOR) {
5259 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5260 PL_expect = XOPERATOR;
5261 force_ident(PL_tokenbuf, '*');
5262 if (!*PL_tokenbuf)
a0d0e21e 5263 PREREF('*');
79072805 5264 TERM('*');
a687059c 5265 }
79072805
LW
5266 s++;
5267 if (*s == '*') {
a687059c 5268 s++;
78cdf107
Z
5269 if (*s == '=' && !PL_lex_allbrackets &&
5270 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5271 s -= 2;
5272 TOKEN(0);
5273 }
79072805 5274 PWop(OP_POW);
a687059c 5275 }
78cdf107
Z
5276 if (*s == '=' && !PL_lex_allbrackets &&
5277 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5278 s--;
5279 TOKEN(0);
5280 }
79072805
LW
5281 Mop(OP_MULTIPLY);
5282
378cc40b 5283 case '%':
3280af22 5284 if (PL_expect == XOPERATOR) {
78cdf107
Z
5285 if (s[1] == '=' && !PL_lex_allbrackets &&
5286 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5287 TOKEN(0);
bbce6d69 5288 ++s;
5289 Mop(OP_MODULO);
a687059c 5290 }
3280af22 5291 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5292 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5293 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5294 if (!PL_tokenbuf[1]) {
bbce6d69 5295 PREREF('%');
a687059c 5296 }
3280af22 5297 PL_pending_ident = '%';
bbce6d69 5298 TERM('%');
a687059c 5299
378cc40b 5300 case '^':
78cdf107
Z
5301 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5302 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5303 TOKEN(0);
79072805 5304 s++;
a0d0e21e 5305 BOop(OP_BIT_XOR);
79072805 5306 case '[':
a7aaec61
Z
5307 if (PL_lex_brackets > 100)
5308 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5309 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5310 PL_lex_allbrackets++;
df3467db
IG
5311 {
5312 const char tmp = *s++;
5313 OPERATOR(tmp);
5314 }
378cc40b 5315 case '~':
0d863452 5316 if (s[1] == '~'
3e7dd34d 5317 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5318 {
78cdf107
Z
5319 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5320 TOKEN(0);
0d863452
RH
5321 s += 2;
5322 Eop(OP_SMARTMATCH);
5323 }
78cdf107
Z
5324 s++;
5325 OPERATOR('~');
378cc40b 5326 case ',':
78cdf107
Z
5327 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5328 TOKEN(0);
5329 s++;
5330 OPERATOR(',');
a0d0e21e
LW
5331 case ':':
5332 if (s[1] == ':') {
5333 len = 0;
0bfa2a8a 5334 goto just_a_word_zero_gv;
a0d0e21e
LW
5335 }
5336 s++;
09bef843
SB
5337 switch (PL_expect) {
5338 OP *attrs;
5db06880
NC
5339#ifdef PERL_MAD
5340 I32 stuffstart;
5341#endif
09bef843
SB
5342 case XOPERATOR:
5343 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5344 break;
5345 PL_bufptr = s; /* update in case we back off */
d83f38d8 5346 if (*s == '=') {
2dc78664
NC
5347 Perl_croak(aTHX_
5348 "Use of := for an empty attribute list is not allowed");
d83f38d8 5349 }
09bef843
SB
5350 goto grabattrs;
5351 case XATTRBLOCK:
5352 PL_expect = XBLOCK;
5353 goto grabattrs;
5354 case XATTRTERM:
5355 PL_expect = XTERMBLOCK;
5356 grabattrs:
5db06880
NC
5357#ifdef PERL_MAD
5358 stuffstart = s - SvPVX(PL_linestr) - 1;
5359#endif
29595ff2 5360 s = PEEKSPACE(s);
5f66b61c 5361 attrs = NULL;
7e2040f0 5362 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5363 I32 tmp;
5cc237b8 5364 SV *sv;
09bef843 5365 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5366 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5367 if (tmp < 0) tmp = -tmp;
5368 switch (tmp) {
5369 case KEY_or:
5370 case KEY_and:
5371 case KEY_for:
11baf631 5372 case KEY_foreach:
f9829d6b
GS
5373 case KEY_unless:
5374 case KEY_if:
5375 case KEY_while:
5376 case KEY_until:
5377 goto got_attrs;
5378 default:
5379 break;
5380 }
5381 }
5cc237b8 5382 sv = newSVpvn(s, len);
09bef843
SB
5383 if (*d == '(') {
5384 d = scan_str(d,TRUE,TRUE);
5385 if (!d) {
09bef843
SB
5386 /* MUST advance bufptr here to avoid bogus
5387 "at end of line" context messages from yyerror().
5388 */
5389 PL_bufptr = s + len;
5390 yyerror("Unterminated attribute parameter in attribute list");
5391 if (attrs)
5392 op_free(attrs);
5cc237b8 5393 sv_free(sv);
bbf60fe6 5394 return REPORT(0); /* EOF indicator */
09bef843
SB
5395 }
5396 }
5397 if (PL_lex_stuff) {
09bef843 5398 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5399 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5400 newSVOP(OP_CONST, 0, sv));
5401 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5402 PL_lex_stuff = NULL;
09bef843
SB
5403 }
5404 else {
5cc237b8
BS
5405 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5406 sv_free(sv);
1108974d 5407 if (PL_in_my == KEY_our) {
df9a6019 5408 deprecate(":unique");
1108974d 5409 }
bfed75c6 5410 else
371fce9b
DM
5411 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5412 }
5413
d3cea301
SB
5414 /* NOTE: any CV attrs applied here need to be part of
5415 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5416 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5417 sv_free(sv);
78f9721b 5418 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5419 }
5420 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5421 sv_free(sv);
8e5dadda 5422 deprecate(":locked");
5cc237b8
BS
5423 }
5424 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5425 sv_free(sv);
78f9721b 5426 CvMETHOD_on(PL_compcv);
5cc237b8 5427 }
78f9721b
SM
5428 /* After we've set the flags, it could be argued that
5429 we don't need to do the attributes.pm-based setting
5430 process, and shouldn't bother appending recognized
d3cea301
SB
5431 flags. To experiment with that, uncomment the
5432 following "else". (Note that's already been
5433 uncommented. That keeps the above-applied built-in
5434 attributes from being intercepted (and possibly
5435 rejected) by a package's attribute routines, but is
5436 justified by the performance win for the common case
5437 of applying only built-in attributes.) */
0256094b 5438 else
2fcb4757 5439 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5440 newSVOP(OP_CONST, 0,
5cc237b8 5441 sv));
09bef843 5442 }
29595ff2 5443 s = PEEKSPACE(d);
0120eecf 5444 if (*s == ':' && s[1] != ':')
29595ff2 5445 s = PEEKSPACE(s+1);
0120eecf
GS
5446 else if (s == d)
5447 break; /* require real whitespace or :'s */
29595ff2 5448 /* XXX losing whitespace on sequential attributes here */
09bef843 5449 }
90771dc0
NC
5450 {
5451 const char tmp
5452 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5453 if (*s != ';' && *s != '}' && *s != tmp
5454 && (tmp != '=' || *s != ')')) {
5455 const char q = ((*s == '\'') ? '"' : '\'');
5456 /* If here for an expression, and parsed no attrs, back
5457 off. */
5458 if (tmp == '=' && !attrs) {
5459 s = PL_bufptr;
5460 break;
5461 }
5462 /* MUST advance bufptr here to avoid bogus "at end of line"
5463 context messages from yyerror().
5464 */
5465 PL_bufptr = s;
10edeb5d
JH
5466 yyerror( (const char *)
5467 (*s
5468 ? Perl_form(aTHX_ "Invalid separator character "
5469 "%c%c%c in attribute list", q, *s, q)
5470 : "Unterminated attribute list" ) );
90771dc0
NC
5471 if (attrs)
5472 op_free(attrs);
5473 OPERATOR(':');
09bef843 5474 }
09bef843 5475 }
f9829d6b 5476 got_attrs:
09bef843 5477 if (attrs) {
cd81e915 5478 start_force(PL_curforce);
9ded7720 5479 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5480 CURMAD('_', PL_nextwhite);
89122651 5481 force_next(THING);
5db06880
NC
5482 }
5483#ifdef PERL_MAD
5484 if (PL_madskills) {
cd81e915 5485 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5486 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5487 }
5db06880 5488#endif
09bef843
SB
5489 TOKEN(COLONATTR);
5490 }
78cdf107
Z
5491 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5492 s--;
5493 TOKEN(0);
5494 }
5495 PL_lex_allbrackets--;
a0d0e21e 5496 OPERATOR(':');
8990e307
LW
5497 case '(':
5498 s++;
3280af22
NIS
5499 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5500 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5501 else
3280af22 5502 PL_expect = XTERM;
29595ff2 5503 s = SKIPSPACE1(s);
78cdf107 5504 PL_lex_allbrackets++;
a0d0e21e 5505 TOKEN('(');
378cc40b 5506 case ';':
78cdf107
Z
5507 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5508 TOKEN(0);
f4dd75d9 5509 CLINE;
78cdf107
Z
5510 s++;
5511 OPERATOR(';');
378cc40b 5512 case ')':
78cdf107
Z
5513 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5514 TOKEN(0);
5515 s++;
5516 PL_lex_allbrackets--;
5517 s = SKIPSPACE1(s);
5518 if (*s == '{')
5519 PREBLOCK(')');
5520 TERM(')');
79072805 5521 case ']':
a7aaec61
Z
5522 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5523 TOKEN(0);
79072805 5524 s++;
3280af22 5525 if (PL_lex_brackets <= 0)
d98d5fff 5526 yyerror("Unmatched right square bracket");
463ee0b2 5527 else
3280af22 5528 --PL_lex_brackets;
78cdf107 5529 PL_lex_allbrackets--;
3280af22
NIS
5530 if (PL_lex_state == LEX_INTERPNORMAL) {
5531 if (PL_lex_brackets == 0) {
02255c60
FC
5532 if (*s == '-' && s[1] == '>')
5533 PL_lex_state = LEX_INTERPENDMAYBE;
5534 else if (*s != '[' && *s != '{')
3280af22 5535 PL_lex_state = LEX_INTERPEND;
79072805
LW
5536 }
5537 }
4633a7c4 5538 TERM(']');
79072805
LW
5539 case '{':
5540 leftbracket:
79072805 5541 s++;
3280af22 5542 if (PL_lex_brackets > 100) {
8edd5f42 5543 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5544 }
3280af22 5545 switch (PL_expect) {
a0d0e21e 5546 case XTERM:
3280af22 5547 if (PL_lex_formbrack) {
a0d0e21e
LW
5548 s--;
5549 PRETERMBLOCK(DO);
5550 }
3280af22
NIS
5551 if (PL_oldoldbufptr == PL_last_lop)
5552 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5553 else
3280af22 5554 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5555 PL_lex_allbrackets++;
79072805 5556 OPERATOR(HASHBRACK);
a0d0e21e 5557 case XOPERATOR:
bf4acbe4 5558 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5559 s++;
44a8e56a 5560 d = s;
3280af22
NIS
5561 PL_tokenbuf[0] = '\0';
5562 if (d < PL_bufend && *d == '-') {
5563 PL_tokenbuf[0] = '-';
44a8e56a 5564 d++;
bf4acbe4 5565 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5566 d++;
5567 }
7e2040f0 5568 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5569 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5570 FALSE, &len);
bf4acbe4 5571 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5572 d++;
5573 if (*d == '}') {
f54cb97a 5574 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5575 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5576 if (minus)
5577 force_next('-');
748a9306
LW
5578 }
5579 }
5580 /* FALL THROUGH */
09bef843 5581 case XATTRBLOCK:
748a9306 5582 case XBLOCK:
3280af22 5583 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5584 PL_lex_allbrackets++;
3280af22 5585 PL_expect = XSTATE;
a0d0e21e 5586 break;
09bef843 5587 case XATTRTERM:
a0d0e21e 5588 case XTERMBLOCK:
3280af22 5589 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5590 PL_lex_allbrackets++;
3280af22 5591 PL_expect = XSTATE;
a0d0e21e
LW
5592 break;
5593 default: {
f54cb97a 5594 const char *t;
3280af22
NIS
5595 if (PL_oldoldbufptr == PL_last_lop)
5596 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5597 else
3280af22 5598 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5599 PL_lex_allbrackets++;
29595ff2 5600 s = SKIPSPACE1(s);
8452ff4b
SB
5601 if (*s == '}') {
5602 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5603 PL_expect = XTERM;
5604 /* This hack is to get the ${} in the message. */
5605 PL_bufptr = s+1;
5606 yyerror("syntax error");
5607 break;
5608 }
a0d0e21e 5609 OPERATOR(HASHBRACK);
8452ff4b 5610 }
b8a4b1be
GS
5611 /* This hack serves to disambiguate a pair of curlies
5612 * as being a block or an anon hash. Normally, expectation
5613 * determines that, but in cases where we're not in a
5614 * position to expect anything in particular (like inside
5615 * eval"") we have to resolve the ambiguity. This code
5616 * covers the case where the first term in the curlies is a
5617 * quoted string. Most other cases need to be explicitly
a0288114 5618 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5619 * curly in order to force resolution as an anon hash.
5620 *
5621 * XXX should probably propagate the outer expectation
5622 * into eval"" to rely less on this hack, but that could
5623 * potentially break current behavior of eval"".
5624 * GSAR 97-07-21
5625 */
5626 t = s;
5627 if (*s == '\'' || *s == '"' || *s == '`') {
5628 /* common case: get past first string, handling escapes */
3280af22 5629 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5630 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5631 t++;
5632 t++;
a0d0e21e 5633 }
b8a4b1be 5634 else if (*s == 'q') {
3280af22 5635 if (++t < PL_bufend
b8a4b1be 5636 && (!isALNUM(*t)
3280af22 5637 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5638 && !isALNUM(*t))))
5639 {
abc667d1 5640 /* skip q//-like construct */
f54cb97a 5641 const char *tmps;
b8a4b1be
GS
5642 char open, close, term;
5643 I32 brackets = 1;
5644
3280af22 5645 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5646 t++;
abc667d1
DM
5647 /* check for q => */
5648 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5649 OPERATOR(HASHBRACK);
5650 }
b8a4b1be
GS
5651 term = *t;
5652 open = term;
5653 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5654 term = tmps[5];
5655 close = term;
5656 if (open == close)
3280af22
NIS
5657 for (t++; t < PL_bufend; t++) {
5658 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5659 t++;
6d07e5e9 5660 else if (*t == open)
b8a4b1be
GS
5661 break;
5662 }
abc667d1 5663 else {
3280af22
NIS
5664 for (t++; t < PL_bufend; t++) {
5665 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5666 t++;
6d07e5e9 5667 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5668 break;
5669 else if (*t == open)
5670 brackets++;
5671 }
abc667d1
DM
5672 }
5673 t++;
b8a4b1be 5674 }
abc667d1
DM
5675 else
5676 /* skip plain q word */
5677 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5678 t += UTF8SKIP(t);
a0d0e21e 5679 }
7e2040f0 5680 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5681 t += UTF8SKIP(t);
7e2040f0 5682 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5683 t += UTF8SKIP(t);
a0d0e21e 5684 }
3280af22 5685 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5686 t++;
b8a4b1be
GS
5687 /* if comma follows first term, call it an anon hash */
5688 /* XXX it could be a comma expression with loop modifiers */
3280af22 5689 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5690 || (*t == '=' && t[1] == '>')))
a0d0e21e 5691 OPERATOR(HASHBRACK);
3280af22 5692 if (PL_expect == XREF)
4e4e412b 5693 PL_expect = XTERM;
a0d0e21e 5694 else {
3280af22
NIS
5695 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5696 PL_expect = XSTATE;
a0d0e21e 5697 }
8990e307 5698 }
a0d0e21e 5699 break;
463ee0b2 5700 }
6154021b 5701 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5702 if (isSPACE(*s) || *s == '#')
3280af22 5703 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5704 TOKEN('{');
378cc40b 5705 case '}':
a7aaec61
Z
5706 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5707 TOKEN(0);
79072805
LW
5708 rightbracket:
5709 s++;
3280af22 5710 if (PL_lex_brackets <= 0)
d98d5fff 5711 yyerror("Unmatched right curly bracket");
463ee0b2 5712 else
3280af22 5713 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5714 PL_lex_allbrackets--;
c2e66d9e 5715 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5716 PL_lex_formbrack = 0;
5717 if (PL_lex_state == LEX_INTERPNORMAL) {
5718 if (PL_lex_brackets == 0) {
9059aa12
LW
5719 if (PL_expect & XFAKEBRACK) {
5720 PL_expect &= XENUMMASK;
3280af22
NIS
5721 PL_lex_state = LEX_INTERPEND;
5722 PL_bufptr = s;
5db06880
NC
5723#if 0
5724 if (PL_madskills) {
cd81e915 5725 if (!PL_thiswhite)
6b29d1f5 5726 PL_thiswhite = newSVpvs("");
76f68e9b 5727 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5728 }
5729#endif
cea2e8a9 5730 return yylex(); /* ignore fake brackets */
79072805 5731 }
fa83b5b6 5732 if (*s == '-' && s[1] == '>')
3280af22 5733 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5734 else if (*s != '[' && *s != '{')
3280af22 5735 PL_lex_state = LEX_INTERPEND;
79072805
LW
5736 }
5737 }
9059aa12
LW
5738 if (PL_expect & XFAKEBRACK) {
5739 PL_expect &= XENUMMASK;
3280af22 5740 PL_bufptr = s;
cea2e8a9 5741 return yylex(); /* ignore fake brackets */
748a9306 5742 }
cd81e915 5743 start_force(PL_curforce);
5db06880
NC
5744 if (PL_madskills) {
5745 curmad('X', newSVpvn(s-1,1));
cd81e915 5746 CURMAD('_', PL_thiswhite);
5db06880 5747 }
79072805 5748 force_next('}');
5db06880 5749#ifdef PERL_MAD
cd81e915 5750 if (!PL_thistoken)
6b29d1f5 5751 PL_thistoken = newSVpvs("");
5db06880 5752#endif
79072805 5753 TOKEN(';');
378cc40b
LW
5754 case '&':
5755 s++;
78cdf107
Z
5756 if (*s++ == '&') {
5757 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5758 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5759 s -= 2;
5760 TOKEN(0);
5761 }
a0d0e21e 5762 AOPERATOR(ANDAND);
78cdf107 5763 }
378cc40b 5764 s--;
3280af22 5765 if (PL_expect == XOPERATOR) {
041457d9
DM
5766 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5767 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5768 {
57843af0 5769 CopLINE_dec(PL_curcop);
f1f66076 5770 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5771 CopLINE_inc(PL_curcop);
463ee0b2 5772 }
78cdf107
Z
5773 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5774 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5775 s--;
5776 TOKEN(0);
5777 }
79072805 5778 BAop(OP_BIT_AND);
463ee0b2 5779 }
79072805 5780
3280af22
NIS
5781 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5782 if (*PL_tokenbuf) {
5783 PL_expect = XOPERATOR;
5784 force_ident(PL_tokenbuf, '&');
463ee0b2 5785 }
79072805
LW
5786 else
5787 PREREF('&');
6154021b 5788 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5789 TERM('&');
5790
378cc40b
LW
5791 case '|':
5792 s++;
78cdf107
Z
5793 if (*s++ == '|') {
5794 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5795 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5796 s -= 2;
5797 TOKEN(0);
5798 }
a0d0e21e 5799 AOPERATOR(OROR);
78cdf107 5800 }
378cc40b 5801 s--;
78cdf107
Z
5802 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5803 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5804 s--;
5805 TOKEN(0);
5806 }
79072805 5807 BOop(OP_BIT_OR);
378cc40b
LW
5808 case '=':
5809 s++;
748a9306 5810 {
90771dc0 5811 const char tmp = *s++;
78cdf107
Z
5812 if (tmp == '=') {
5813 if (!PL_lex_allbrackets &&
5814 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5815 s -= 2;
5816 TOKEN(0);
5817 }
90771dc0 5818 Eop(OP_EQ);
78cdf107
Z
5819 }
5820 if (tmp == '>') {
5821 if (!PL_lex_allbrackets &&
5822 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5823 s -= 2;
5824 TOKEN(0);
5825 }
90771dc0 5826 OPERATOR(',');
78cdf107 5827 }
90771dc0
NC
5828 if (tmp == '~')
5829 PMop(OP_MATCH);
5830 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5831 && strchr("+-*/%.^&|<",tmp))
5832 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5833 "Reversed %c= operator",(int)tmp);
5834 s--;
5835 if (PL_expect == XSTATE && isALPHA(tmp) &&
5836 (s == PL_linestart+1 || s[-2] == '\n') )
5837 {
5838 if (PL_in_eval && !PL_rsfp) {
5839 d = PL_bufend;
5840 while (s < d) {
5841 if (*s++ == '\n') {
5842 incline(s);
5843 if (strnEQ(s,"=cut",4)) {
5844 s = strchr(s,'\n');
5845 if (s)
5846 s++;
5847 else
5848 s = d;
5849 incline(s);
5850 goto retry;
5851 }
5852 }
a5f75d66 5853 }
90771dc0 5854 goto retry;
a5f75d66 5855 }
5db06880
NC
5856#ifdef PERL_MAD
5857 if (PL_madskills) {
cd81e915 5858 if (!PL_thiswhite)
6b29d1f5 5859 PL_thiswhite = newSVpvs("");
cd81e915 5860 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5861 PL_bufend - PL_linestart);
5862 }
5863#endif
90771dc0 5864 s = PL_bufend;
737c24fc 5865 PL_parser->in_pod = 1;
90771dc0 5866 goto retry;
a5f75d66 5867 }
a0d0e21e 5868 }
3280af22 5869 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5870 const char *t = s;
51882d45 5871#ifdef PERL_STRICT_CR
c35e046a 5872 while (SPACE_OR_TAB(*t))
51882d45 5873#else
c35e046a 5874 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5875#endif
c35e046a 5876 t++;
a0d0e21e
LW
5877 if (*t == '\n' || *t == '#') {
5878 s--;
3280af22 5879 PL_expect = XBLOCK;
a0d0e21e
LW
5880 goto leftbracket;
5881 }
79072805 5882 }
78cdf107
Z
5883 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5884 s--;
5885 TOKEN(0);
5886 }
6154021b 5887 pl_yylval.ival = 0;
a0d0e21e 5888 OPERATOR(ASSIGNOP);
378cc40b
LW
5889 case '!':
5890 s++;
90771dc0
NC
5891 {
5892 const char tmp = *s++;
5893 if (tmp == '=') {
5894 /* was this !=~ where !~ was meant?
5895 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5896
5897 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5898 const char *t = s+1;
5899
5900 while (t < PL_bufend && isSPACE(*t))
5901 ++t;
5902
5903 if (*t == '/' || *t == '?' ||
5904 ((*t == 'm' || *t == 's' || *t == 'y')
5905 && !isALNUM(t[1])) ||
5906 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5907 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5908 "!=~ should be !~");
5909 }
78cdf107
Z
5910 if (!PL_lex_allbrackets &&
5911 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5912 s -= 2;
5913 TOKEN(0);
5914 }
90771dc0
NC
5915 Eop(OP_NE);
5916 }
5917 if (tmp == '~')
5918 PMop(OP_NOT);
5919 }
378cc40b
LW
5920 s--;
5921 OPERATOR('!');
5922 case '<':
3280af22 5923 if (PL_expect != XOPERATOR) {
93a17b20 5924 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5925 check_uni();
79072805
LW
5926 if (s[1] == '<')
5927 s = scan_heredoc(s);
5928 else
5929 s = scan_inputsymbol(s);
5930 TERM(sublex_start());
378cc40b
LW
5931 }
5932 s++;
90771dc0
NC
5933 {
5934 char tmp = *s++;
78cdf107
Z
5935 if (tmp == '<') {
5936 if (*s == '=' && !PL_lex_allbrackets &&
5937 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5938 s -= 2;
5939 TOKEN(0);
5940 }
90771dc0 5941 SHop(OP_LEFT_SHIFT);
78cdf107 5942 }
90771dc0
NC
5943 if (tmp == '=') {
5944 tmp = *s++;
78cdf107
Z
5945 if (tmp == '>') {
5946 if (!PL_lex_allbrackets &&
5947 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5948 s -= 3;
5949 TOKEN(0);
5950 }
90771dc0 5951 Eop(OP_NCMP);
78cdf107 5952 }
90771dc0 5953 s--;
78cdf107
Z
5954 if (!PL_lex_allbrackets &&
5955 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5956 s -= 2;
5957 TOKEN(0);
5958 }
90771dc0
NC
5959 Rop(OP_LE);
5960 }
395c3793 5961 }
378cc40b 5962 s--;
78cdf107
Z
5963 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5964 s--;
5965 TOKEN(0);
5966 }
79072805 5967 Rop(OP_LT);
378cc40b
LW
5968 case '>':
5969 s++;
90771dc0
NC
5970 {
5971 const char tmp = *s++;
78cdf107
Z
5972 if (tmp == '>') {
5973 if (*s == '=' && !PL_lex_allbrackets &&
5974 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5975 s -= 2;
5976 TOKEN(0);
5977 }
90771dc0 5978 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
5979 }
5980 else if (tmp == '=') {
5981 if (!PL_lex_allbrackets &&
5982 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5983 s -= 2;
5984 TOKEN(0);
5985 }
90771dc0 5986 Rop(OP_GE);
78cdf107 5987 }
90771dc0 5988 }
378cc40b 5989 s--;
78cdf107
Z
5990 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5991 s--;
5992 TOKEN(0);
5993 }
79072805 5994 Rop(OP_GT);
378cc40b
LW
5995
5996 case '$':
bbce6d69 5997 CLINE;
5998
3280af22
NIS
5999 if (PL_expect == XOPERATOR) {
6000 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6001 return deprecate_commaless_var_list();
a0d0e21e 6002 }
8990e307 6003 }
a0d0e21e 6004
c0b977fd 6005 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6006 PL_tokenbuf[0] = '@';
376b8730
SM
6007 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6008 sizeof PL_tokenbuf - 1, FALSE);
6009 if (PL_expect == XOPERATOR)
6010 no_op("Array length", s);
3280af22 6011 if (!PL_tokenbuf[1])
a0d0e21e 6012 PREREF(DOLSHARP);
3280af22
NIS
6013 PL_expect = XOPERATOR;
6014 PL_pending_ident = '#';
463ee0b2 6015 TOKEN(DOLSHARP);
79072805 6016 }
bbce6d69 6017
3280af22 6018 PL_tokenbuf[0] = '$';
376b8730
SM
6019 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6020 sizeof PL_tokenbuf - 1, FALSE);
6021 if (PL_expect == XOPERATOR)
6022 no_op("Scalar", s);
3280af22
NIS
6023 if (!PL_tokenbuf[1]) {
6024 if (s == PL_bufend)
bbce6d69 6025 yyerror("Final $ should be \\$ or $name");
6026 PREREF('$');
8990e307 6027 }
a0d0e21e 6028
ff68c719 6029 d = s;
90771dc0
NC
6030 {
6031 const char tmp = *s;
ae28bb2a 6032 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6033 s = SKIPSPACE1(s);
ff68c719 6034
90771dc0
NC
6035 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6036 && intuit_more(s)) {
6037 if (*s == '[') {
6038 PL_tokenbuf[0] = '@';
6039 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6040 char *t = s+1;
6041
6042 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6043 t++;
90771dc0 6044 if (*t++ == ',') {
29595ff2 6045 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6046 while (t < PL_bufend && *t != ']')
6047 t++;
9014280d 6048 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6049 "Multidimensional syntax %.*s not supported",
36c7798d 6050 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6051 }
748a9306 6052 }
93a17b20 6053 }
90771dc0
NC
6054 else if (*s == '{') {
6055 char *t;
6056 PL_tokenbuf[0] = '%';
6057 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6058 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6059 {
6060 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6061 do {
6062 t++;
6063 } while (isSPACE(*t));
90771dc0 6064 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6065 STRLEN len;
90771dc0 6066 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6067 &len);
c35e046a
AL
6068 while (isSPACE(*t))
6069 t++;
780a5241 6070 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6071 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6072 "You need to quote \"%s\"",
6073 tmpbuf);
6074 }
6075 }
6076 }
93a17b20 6077 }
bbce6d69 6078
90771dc0
NC
6079 PL_expect = XOPERATOR;
6080 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6081 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6082 if (!islop || PL_last_lop_op == OP_GREPSTART)
6083 PL_expect = XOPERATOR;
6084 else if (strchr("$@\"'`q", *s))
6085 PL_expect = XTERM; /* e.g. print $fh "foo" */
6086 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6087 PL_expect = XTERM; /* e.g. print $fh &sub */
6088 else if (isIDFIRST_lazy_if(s,UTF)) {
6089 char tmpbuf[sizeof PL_tokenbuf];
6090 int t2;
6091 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6092 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6093 /* binary operators exclude handle interpretations */
6094 switch (t2) {
6095 case -KEY_x:
6096 case -KEY_eq:
6097 case -KEY_ne:
6098 case -KEY_gt:
6099 case -KEY_lt:
6100 case -KEY_ge:
6101 case -KEY_le:
6102 case -KEY_cmp:
6103 break;
6104 default:
6105 PL_expect = XTERM; /* e.g. print $fh length() */
6106 break;
6107 }
6108 }
6109 else {
6110 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6111 }
6112 }
90771dc0
NC
6113 else if (isDIGIT(*s))
6114 PL_expect = XTERM; /* e.g. print $fh 3 */
6115 else if (*s == '.' && isDIGIT(s[1]))
6116 PL_expect = XTERM; /* e.g. print $fh .3 */
6117 else if ((*s == '?' || *s == '-' || *s == '+')
6118 && !isSPACE(s[1]) && s[1] != '=')
6119 PL_expect = XTERM; /* e.g. print $fh -1 */
6120 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6121 && s[1] != '/')
6122 PL_expect = XTERM; /* e.g. print $fh /.../
6123 XXX except DORDOR operator
6124 */
6125 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6126 && s[2] != '=')
6127 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6128 }
bbce6d69 6129 }
3280af22 6130 PL_pending_ident = '$';
79072805 6131 TOKEN('$');
378cc40b
LW
6132
6133 case '@':
3280af22 6134 if (PL_expect == XOPERATOR)
bbce6d69 6135 no_op("Array", s);
3280af22
NIS
6136 PL_tokenbuf[0] = '@';
6137 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6138 if (!PL_tokenbuf[1]) {
bbce6d69 6139 PREREF('@');
6140 }
3280af22 6141 if (PL_lex_state == LEX_NORMAL)
29595ff2 6142 s = SKIPSPACE1(s);
3280af22 6143 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6144 if (*s == '{')
3280af22 6145 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6146
6147 /* Warn about @ where they meant $. */
041457d9
DM
6148 if (*s == '[' || *s == '{') {
6149 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6150 const char *t = s + 1;
7e2040f0 6151 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6152 t++;
6153 if (*t == '}' || *t == ']') {
6154 t++;
29595ff2 6155 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 6156 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6157 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6158 (int)(t-PL_bufptr), PL_bufptr,
6159 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6160 }
93a17b20
LW
6161 }
6162 }
463ee0b2 6163 }
3280af22 6164 PL_pending_ident = '@';
79072805 6165 TERM('@');
378cc40b 6166
c963b151 6167 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6168 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6169 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6170 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6171 TOKEN(0);
6f33ba73
RGS
6172 s += 2;
6173 AOPERATOR(DORDOR);
6174 }
c963b151 6175 case '?': /* may either be conditional or pattern */
be25f609 6176 if (PL_expect == XOPERATOR) {
90771dc0 6177 char tmp = *s++;
c963b151 6178 if(tmp == '?') {
78cdf107
Z
6179 if (!PL_lex_allbrackets &&
6180 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6181 s--;
6182 TOKEN(0);
6183 }
6184 PL_lex_allbrackets++;
be25f609 6185 OPERATOR('?');
c963b151
BD
6186 }
6187 else {
6188 tmp = *s++;
6189 if(tmp == '/') {
6190 /* A // operator. */
78cdf107
Z
6191 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6192 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6193 LEX_FAKEEOF_LOGIC)) {
6194 s -= 2;
6195 TOKEN(0);
6196 }
c963b151
BD
6197 AOPERATOR(DORDOR);
6198 }
6199 else {
6200 s--;
78cdf107
Z
6201 if (*s == '=' && !PL_lex_allbrackets &&
6202 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6203 s--;
6204 TOKEN(0);
6205 }
c963b151
BD
6206 Mop(OP_DIVIDE);
6207 }
6208 }
6209 }
6210 else {
6211 /* Disable warning on "study /blah/" */
6212 if (PL_oldoldbufptr == PL_last_uni
6213 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6214 || memNE(PL_last_uni, "study", 5)
6215 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6216 ))
6217 check_uni();
725a61d7
Z
6218 if (*s == '?')
6219 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6220 s = scan_pat(s,OP_MATCH);
6221 TERM(sublex_start());
6222 }
378cc40b
LW
6223
6224 case '.':
51882d45
GS
6225 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6226#ifdef PERL_STRICT_CR
6227 && s[1] == '\n'
6228#else
6229 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6230#endif
6231 && (s == PL_linestart || s[-1] == '\n') )
6232 {
3280af22
NIS
6233 PL_lex_formbrack = 0;
6234 PL_expect = XSTATE;
79072805
LW
6235 goto rightbracket;
6236 }
be25f609 6237 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6238 s += 3;
6239 OPERATOR(YADAYADA);
6240 }
3280af22 6241 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6242 char tmp = *s++;
a687059c 6243 if (*s == tmp) {
78cdf107
Z
6244 if (!PL_lex_allbrackets &&
6245 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6246 s--;
6247 TOKEN(0);
6248 }
a687059c 6249 s++;
2f3197b3
LW
6250 if (*s == tmp) {
6251 s++;
6154021b 6252 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6253 }
6254 else
6154021b 6255 pl_yylval.ival = 0;
378cc40b 6256 OPERATOR(DOTDOT);
a687059c 6257 }
78cdf107
Z
6258 if (*s == '=' && !PL_lex_allbrackets &&
6259 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6260 s--;
6261 TOKEN(0);
6262 }
79072805 6263 Aop(OP_CONCAT);
378cc40b
LW
6264 }
6265 /* FALL THROUGH */
6266 case '0': case '1': case '2': case '3': case '4':
6267 case '5': case '6': case '7': case '8': case '9':
6154021b 6268 s = scan_num(s, &pl_yylval);
931e0695 6269 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6270 if (PL_expect == XOPERATOR)
8990e307 6271 no_op("Number",s);
79072805
LW
6272 TERM(THING);
6273
6274 case '\'':
5db06880 6275 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6276 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6277 if (PL_expect == XOPERATOR) {
6278 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6279 return deprecate_commaless_var_list();
a0d0e21e 6280 }
463ee0b2 6281 else
8990e307 6282 no_op("String",s);
463ee0b2 6283 }
79072805 6284 if (!s)
d4c19fe8 6285 missingterm(NULL);
6154021b 6286 pl_yylval.ival = OP_CONST;
79072805
LW
6287 TERM(sublex_start());
6288
6289 case '"':
5db06880 6290 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6291 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6292 if (PL_expect == XOPERATOR) {
6293 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6294 return deprecate_commaless_var_list();
a0d0e21e 6295 }
463ee0b2 6296 else
8990e307 6297 no_op("String",s);
463ee0b2 6298 }
79072805 6299 if (!s)
d4c19fe8 6300 missingterm(NULL);
6154021b 6301 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6302 /* FIXME. I think that this can be const if char *d is replaced by
6303 more localised variables. */
3280af22 6304 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6305 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6306 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6307 break;
6308 }
6309 }
79072805
LW
6310 TERM(sublex_start());
6311
6312 case '`':
5db06880 6313 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6314 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6315 if (PL_expect == XOPERATOR)
8990e307 6316 no_op("Backticks",s);
79072805 6317 if (!s)
d4c19fe8 6318 missingterm(NULL);
9b201d7d 6319 readpipe_override();
79072805
LW
6320 TERM(sublex_start());
6321
6322 case '\\':
6323 s++;
a2a5de95
NC
6324 if (PL_lex_inwhat && isDIGIT(*s))
6325 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6326 *s, *s);
3280af22 6327 if (PL_expect == XOPERATOR)
8990e307 6328 no_op("Backslash",s);
79072805
LW
6329 OPERATOR(REFGEN);
6330
a7cb1f99 6331 case 'v':
e526c9e6 6332 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6333 char *start = s + 2;
dd629d5b 6334 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6335 start++;
6336 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6337 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6338 TERM(THING);
6339 }
e526c9e6 6340 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6341 else if (!isALPHA(*start) && (PL_expect == XTERM
6342 || PL_expect == XREF || PL_expect == XSTATE
6343 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6344 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6345 if (!gv) {
6154021b 6346 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6347 TERM(THING);
6348 }
6349 }
a7cb1f99
GS
6350 }
6351 goto keylookup;
79072805 6352 case 'x':
3280af22 6353 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6354 s++;
6355 Mop(OP_REPEAT);
2f3197b3 6356 }
79072805
LW
6357 goto keylookup;
6358
378cc40b 6359 case '_':
79072805
LW
6360 case 'a': case 'A':
6361 case 'b': case 'B':
6362 case 'c': case 'C':
6363 case 'd': case 'D':
6364 case 'e': case 'E':
6365 case 'f': case 'F':
6366 case 'g': case 'G':
6367 case 'h': case 'H':
6368 case 'i': case 'I':
6369 case 'j': case 'J':
6370 case 'k': case 'K':
6371 case 'l': case 'L':
6372 case 'm': case 'M':
6373 case 'n': case 'N':
6374 case 'o': case 'O':
6375 case 'p': case 'P':
6376 case 'q': case 'Q':
6377 case 'r': case 'R':
6378 case 's': case 'S':
6379 case 't': case 'T':
6380 case 'u': case 'U':
a7cb1f99 6381 case 'V':
79072805
LW
6382 case 'w': case 'W':
6383 case 'X':
6384 case 'y': case 'Y':
6385 case 'z': case 'Z':
6386
49dc05e3 6387 keylookup: {
88e1f1a2 6388 bool anydelim;
90771dc0 6389 I32 tmp;
10edeb5d
JH
6390
6391 orig_keyword = 0;
6392 gv = NULL;
6393 gvp = NULL;
49dc05e3 6394
3280af22
NIS
6395 PL_bufptr = s;
6396 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6397
6398 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6399 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6400
6401 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6402 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6403 goto just_a_word;
6404
3643fb5f 6405 d = s;
3280af22 6406 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6407 d++; /* no comments skipped here, or s### is misparsed */
6408
748a9306 6409 /* Is this a word before a => operator? */
1c3923b3 6410 if (*d == '=' && d[1] == '>') {
748a9306 6411 CLINE;
6154021b 6412 pl_yylval.opval
d0a148a6
NC
6413 = (OP*)newSVOP(OP_CONST, 0,
6414 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6415 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6416 TERM(WORD);
6417 }
6418
88e1f1a2
JV
6419 /* Check for plugged-in keyword */
6420 {
6421 OP *o;
6422 int result;
6423 char *saved_bufptr = PL_bufptr;
6424 PL_bufptr = s;
16c91539 6425 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6426 s = PL_bufptr;
6427 if (result == KEYWORD_PLUGIN_DECLINE) {
6428 /* not a plugged-in keyword */
6429 PL_bufptr = saved_bufptr;
6430 } else if (result == KEYWORD_PLUGIN_STMT) {
6431 pl_yylval.opval = o;
6432 CLINE;
6433 PL_expect = XSTATE;
6434 return REPORT(PLUGSTMT);
6435 } else if (result == KEYWORD_PLUGIN_EXPR) {
6436 pl_yylval.opval = o;
6437 CLINE;
6438 PL_expect = XOPERATOR;
6439 return REPORT(PLUGEXPR);
6440 } else {
6441 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6442 PL_tokenbuf);
6443 }
6444 }
6445
6446 /* Check for built-in keyword */
6447 tmp = keyword(PL_tokenbuf, len, 0);
6448
6449 /* Is this a label? */
6450 if (!anydelim && PL_expect == XSTATE
6451 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6452 s = d + 1;
6453 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6454 CLINE;
6455 TOKEN(LABEL);
6456 }
6457
a0d0e21e 6458 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6459 GV *ogv = NULL; /* override (winner) */
6460 GV *hgv = NULL; /* hidden (loser) */
3280af22 6461 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6462 CV *cv;
90e5519e 6463 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6464 (cv = GvCVu(gv)))
6465 {
6466 if (GvIMPORTED_CV(gv))
6467 ogv = gv;
6468 else if (! CvMETHOD(cv))
6469 hgv = gv;
6470 }
6471 if (!ogv &&
3280af22 6472 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6473 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6474 GvCVu(gv) && GvIMPORTED_CV(gv))
6475 {
6476 ogv = gv;
6477 }
6478 }
6479 if (ogv) {
30fe34ed 6480 orig_keyword = tmp;
56f7f34b 6481 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6482 }
6483 else if (gv && !gvp
6484 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6485 && GvCVu(gv))
6e7b2336
GS
6486 {
6487 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6488 }
56f7f34b
CS
6489 else { /* no override */
6490 tmp = -tmp;
a2a5de95
NC
6491 if (tmp == KEY_dump) {
6492 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6493 "dump() better written as CORE::dump()");
ac206dc8 6494 }
a0714e2c 6495 gv = NULL;
56f7f34b 6496 gvp = 0;
a2a5de95
NC
6497 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6498 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6499 "Ambiguous call resolved as CORE::%s(), "
6500 "qualify as such or use &",
6501 GvENAME(hgv));
49dc05e3 6502 }
a0d0e21e
LW
6503 }
6504
6505 reserved_word:
6506 switch (tmp) {
79072805
LW
6507
6508 default: /* not a keyword */
0bfa2a8a
NC
6509 /* Trade off - by using this evil construction we can pull the
6510 variable gv into the block labelled keylookup. If not, then
6511 we have to give it function scope so that the goto from the
6512 earlier ':' case doesn't bypass the initialisation. */
6513 if (0) {
6514 just_a_word_zero_gv:
6515 gv = NULL;
6516 gvp = NULL;
8bee0991 6517 orig_keyword = 0;
0bfa2a8a 6518 }
93a17b20 6519 just_a_word: {
96e4d5b1 6520 SV *sv;
ce29ac45 6521 int pkgname = 0;
f54cb97a 6522 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6523 OP *rv2cv_op;
5069cc75 6524 CV *cv;
5db06880 6525#ifdef PERL_MAD
cd81e915 6526 SV *nextPL_nextwhite = 0;
5db06880
NC
6527#endif
6528
8990e307
LW
6529
6530 /* Get the rest if it looks like a package qualifier */
6531
155aba94 6532 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6533 STRLEN morelen;
3280af22 6534 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6535 TRUE, &morelen);
6536 if (!morelen)
cea2e8a9 6537 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6538 *s == '\'' ? "'" : "::");
c3e0f903 6539 len += morelen;
ce29ac45 6540 pkgname = 1;
a0d0e21e 6541 }
8990e307 6542
3280af22
NIS
6543 if (PL_expect == XOPERATOR) {
6544 if (PL_bufptr == PL_linestart) {
57843af0 6545 CopLINE_dec(PL_curcop);
f1f66076 6546 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6547 CopLINE_inc(PL_curcop);
463ee0b2
LW
6548 }
6549 else
54310121 6550 no_op("Bareword",s);
463ee0b2 6551 }
8990e307 6552
c3e0f903 6553 /* Look for a subroutine with this name in current package,
486ec47a 6554 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6555 (and a package name). */
6556
5db06880 6557 if (len > 2 && !PL_madskills &&
3280af22 6558 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6559 {
f776e3cd 6560 if (ckWARN(WARN_BAREWORD)
90e5519e 6561 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6562 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6563 "Bareword \"%s\" refers to nonexistent package",
3280af22 6564 PL_tokenbuf);
c3e0f903 6565 len -= 2;
3280af22 6566 PL_tokenbuf[len] = '\0';
a0714e2c 6567 gv = NULL;
c3e0f903
GS
6568 gvp = 0;
6569 }
6570 else {
62d55b22
NC
6571 if (!gv) {
6572 /* Mustn't actually add anything to a symbol table.
6573 But also don't want to "initialise" any placeholder
6574 constants that might already be there into full
6575 blown PVGVs with attached PVCV. */
90e5519e
NC
6576 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6577 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6578 }
b3d904f3 6579 len = 0;
c3e0f903
GS
6580 }
6581
6582 /* if we saw a global override before, get the right name */
8990e307 6583
37bb7629
EB
6584 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6585 len ? len : strlen(PL_tokenbuf));
49dc05e3 6586 if (gvp) {
37bb7629 6587 SV * const tmp_sv = sv;
396482e1 6588 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6589 sv_catsv(sv, tmp_sv);
6590 SvREFCNT_dec(tmp_sv);
8a7a129d 6591 }
37bb7629 6592
5db06880 6593#ifdef PERL_MAD
cd81e915
NC
6594 if (PL_madskills && !PL_thistoken) {
6595 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6596 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6597 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6598 }
6599#endif
8990e307 6600
a0d0e21e 6601 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6602 CLINE;
6154021b
RGS
6603 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6604 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6605
c3e0f903 6606 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6607 if (len)
6608 goto safe_bareword;
6609
f7461760 6610 {
d8ebba9f 6611 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6612 const_op->op_private = OPpCONST_BARE;
6613 rv2cv_op = newCVREF(0, const_op);
6614 }
d9088386 6615 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6616
8990e307
LW
6617 /* See if it's the indirect object for a list operator. */
6618
3280af22
NIS
6619 if (PL_oldoldbufptr &&
6620 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6621 (PL_oldoldbufptr == PL_last_lop
6622 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6623 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6624 (PL_expect == XREF ||
6625 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6626 {
748a9306
LW
6627 bool immediate_paren = *s == '(';
6628
a0d0e21e 6629 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6630 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6631#ifdef PERL_MAD
cd81e915 6632 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6633#endif
a0d0e21e
LW
6634
6635 /* Two barewords in a row may indicate method call. */
6636
62d55b22 6637 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6638 (tmp = intuit_method(s, gv, cv))) {
6639 op_free(rv2cv_op);
78cdf107
Z
6640 if (tmp == METHOD && !PL_lex_allbrackets &&
6641 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6642 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6643 return REPORT(tmp);
f7461760 6644 }
a0d0e21e
LW
6645
6646 /* If not a declared subroutine, it's an indirect object. */
6647 /* (But it's an indir obj regardless for sort.) */
7294df96 6648 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6649
7294df96
RGS
6650 if (
6651 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6652 (!cv &&
a9ef352a 6653 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6654 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6655 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6656 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6657 )
a9ef352a 6658 {
3280af22 6659 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6660 goto bareword;
93a17b20
LW
6661 }
6662 }
8990e307 6663
3280af22 6664 PL_expect = XOPERATOR;
5db06880
NC
6665#ifdef PERL_MAD
6666 if (isSPACE(*s))
cd81e915
NC
6667 s = SKIPSPACE2(s,nextPL_nextwhite);
6668 PL_nextwhite = nextPL_nextwhite;
5db06880 6669#else
8990e307 6670 s = skipspace(s);
5db06880 6671#endif
1c3923b3
GS
6672
6673 /* Is this a word before a => operator? */
ce29ac45 6674 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6675 op_free(rv2cv_op);
1c3923b3 6676 CLINE;
6154021b 6677 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6678 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6679 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6680 TERM(WORD);
6681 }
6682
6683 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6684 if (*s == '(') {
79072805 6685 CLINE;
5069cc75 6686 if (cv) {
c35e046a
AL
6687 d = s + 1;
6688 while (SPACE_OR_TAB(*d))
6689 d++;
f7461760 6690 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6691 s = d + 1;
c631f32b 6692 goto its_constant;
96e4d5b1 6693 }
6694 }
5db06880
NC
6695#ifdef PERL_MAD
6696 if (PL_madskills) {
cd81e915
NC
6697 PL_nextwhite = PL_thiswhite;
6698 PL_thiswhite = 0;
5db06880 6699 }
cd81e915 6700 start_force(PL_curforce);
5db06880 6701#endif
6154021b 6702 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6703 PL_expect = XOPERATOR;
5db06880
NC
6704#ifdef PERL_MAD
6705 if (PL_madskills) {
cd81e915
NC
6706 PL_nextwhite = nextPL_nextwhite;
6707 curmad('X', PL_thistoken);
6b29d1f5 6708 PL_thistoken = newSVpvs("");
5db06880
NC
6709 }
6710#endif
f7461760 6711 op_free(rv2cv_op);
93a17b20 6712 force_next(WORD);
6154021b 6713 pl_yylval.ival = 0;
463ee0b2 6714 TOKEN('&');
79072805 6715 }
93a17b20 6716
a0d0e21e 6717 /* If followed by var or block, call it a method (unless sub) */
8990e307 6718
f7461760
Z
6719 if ((*s == '$' || *s == '{') && !cv) {
6720 op_free(rv2cv_op);
3280af22
NIS
6721 PL_last_lop = PL_oldbufptr;
6722 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6723 if (!PL_lex_allbrackets &&
6724 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6725 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6726 PREBLOCK(METHOD);
463ee0b2
LW
6727 }
6728
8990e307
LW
6729 /* If followed by a bareword, see if it looks like indir obj. */
6730
30fe34ed
RGS
6731 if (!orig_keyword
6732 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6733 && (tmp = intuit_method(s, gv, cv))) {
6734 op_free(rv2cv_op);
78cdf107
Z
6735 if (tmp == METHOD && !PL_lex_allbrackets &&
6736 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6737 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6738 return REPORT(tmp);
f7461760 6739 }
93a17b20 6740
8990e307
LW
6741 /* Not a method, so call it a subroutine (if defined) */
6742
5069cc75 6743 if (cv) {
9b387841
NC
6744 if (lastchar == '-')
6745 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6746 "Ambiguous use of -%s resolved as -&%s()",
6747 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6748 /* Check for a constant sub */
f7461760 6749 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6750 its_constant:
f7461760 6751 op_free(rv2cv_op);
6154021b
RGS
6752 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6753 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6754 pl_yylval.opval->op_private = 0;
6b7c6d95 6755 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6756 TOKEN(WORD);
89bfa8cd 6757 }
6758
6154021b 6759 op_free(pl_yylval.opval);
f7461760 6760 pl_yylval.opval = rv2cv_op;
6154021b 6761 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6762 PL_last_lop = PL_oldbufptr;
bf848113 6763 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6764 /* Is there a prototype? */
5db06880
NC
6765 if (
6766#ifdef PERL_MAD
6767 cv &&
6768#endif
d9f2850e
RGS
6769 SvPOK(cv))
6770 {
5f66b61c 6771 STRLEN protolen;
daba3364 6772 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6773 if (!protolen)
4633a7c4 6774 TERM(FUNC0SUB);
0f5d0394
AE
6775 while (*proto == ';')
6776 proto++;
649d02de
FC
6777 if (
6778 (
6779 (
6780 *proto == '$' || *proto == '_'
c035a075 6781 || *proto == '*' || *proto == '+'
649d02de
FC
6782 )
6783 && proto[1] == '\0'
6784 )
6785 || (
6786 *proto == '\\' && proto[1] && proto[2] == '\0'
6787 )
6788 )
6789 OPERATOR(UNIOPSUB);
6790 if (*proto == '\\' && proto[1] == '[') {
6791 const char *p = proto + 2;
6792 while(*p && *p != ']')
6793 ++p;
6794 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6795 }
7a52d87a 6796 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6797 if (PL_curstash)
6798 sv_setpvs(PL_subname, "__ANON__");
6799 else
6800 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6801 if (!PL_lex_allbrackets &&
6802 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6803 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6804 PREBLOCK(LSTOPSUB);
6805 }
a9ef352a 6806 }
5db06880
NC
6807#ifdef PERL_MAD
6808 {
6809 if (PL_madskills) {
cd81e915
NC
6810 PL_nextwhite = PL_thiswhite;
6811 PL_thiswhite = 0;
5db06880 6812 }
cd81e915 6813 start_force(PL_curforce);
6154021b 6814 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6815 PL_expect = XTERM;
6816 if (PL_madskills) {
cd81e915
NC
6817 PL_nextwhite = nextPL_nextwhite;
6818 curmad('X', PL_thistoken);
6b29d1f5 6819 PL_thistoken = newSVpvs("");
5db06880
NC
6820 }
6821 force_next(WORD);
78cdf107
Z
6822 if (!PL_lex_allbrackets &&
6823 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6824 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6825 TOKEN(NOAMP);
6826 }
6827 }
6828
6829 /* Guess harder when madskills require "best effort". */
6830 if (PL_madskills && (!gv || !GvCVu(gv))) {
6831 int probable_sub = 0;
6832 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6833 probable_sub = 1;
6834 else if (isALPHA(*s)) {
6835 char tmpbuf[1024];
6836 STRLEN tmplen;
6837 d = s;
6838 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6839 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6840 probable_sub = 1;
6841 else {
6842 while (d < PL_bufend && isSPACE(*d))
6843 d++;
6844 if (*d == '=' && d[1] == '>')
6845 probable_sub = 1;
6846 }
6847 }
6848 if (probable_sub) {
7a6d04f4 6849 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6850 op_free(pl_yylval.opval);
f7461760 6851 pl_yylval.opval = rv2cv_op;
6154021b 6852 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6853 PL_last_lop = PL_oldbufptr;
6854 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6855 PL_nextwhite = PL_thiswhite;
6856 PL_thiswhite = 0;
6857 start_force(PL_curforce);
6154021b 6858 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6859 PL_expect = XTERM;
cd81e915
NC
6860 PL_nextwhite = nextPL_nextwhite;
6861 curmad('X', PL_thistoken);
6b29d1f5 6862 PL_thistoken = newSVpvs("");
5db06880 6863 force_next(WORD);
78cdf107
Z
6864 if (!PL_lex_allbrackets &&
6865 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6866 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6867 TOKEN(NOAMP);
6868 }
6869#else
6154021b 6870 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6871 PL_expect = XTERM;
8990e307 6872 force_next(WORD);
78cdf107
Z
6873 if (!PL_lex_allbrackets &&
6874 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6875 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6876 TOKEN(NOAMP);
5db06880 6877#endif
8990e307 6878 }
748a9306 6879
8990e307
LW
6880 /* Call it a bare word */
6881
5603f27d 6882 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6883 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6884 else {
9a073a1d
RGS
6885 bareword:
6886 /* after "print" and similar functions (corresponding to
6887 * "F? L" in opcode.pl), whatever wasn't already parsed as
6888 * a filehandle should be subject to "strict subs".
6889 * Likewise for the optional indirect-object argument to system
6890 * or exec, which can't be a bareword */
6891 if ((PL_last_lop_op == OP_PRINT
6892 || PL_last_lop_op == OP_PRTF
6893 || PL_last_lop_op == OP_SAY
6894 || PL_last_lop_op == OP_SYSTEM
6895 || PL_last_lop_op == OP_EXEC)
6896 && (PL_hints & HINT_STRICT_SUBS))
6897 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6898 if (lastchar != '-') {
6899 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6900 d = PL_tokenbuf;
6901 while (isLOWER(*d))
6902 d++;
da51bb9b 6903 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6904 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6905 PL_tokenbuf);
6906 }
748a9306
LW
6907 }
6908 }
f7461760 6909 op_free(rv2cv_op);
c3e0f903
GS
6910
6911 safe_bareword:
9b387841
NC
6912 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6913 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6914 "Operator or semicolon missing before %c%s",
6915 lastchar, PL_tokenbuf);
6916 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6917 "Ambiguous use of %c resolved as operator %c",
6918 lastchar, lastchar);
748a9306 6919 }
93a17b20 6920 TOKEN(WORD);
79072805 6921 }
79072805 6922
68dc0745 6923 case KEY___FILE__:
7eb971ee 6924 FUN0OP(
14f0f125 6925 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 6926 );
46fc3d4c 6927
79072805 6928 case KEY___LINE__:
7eb971ee
FC
6929 FUN0OP(
6930 (OP*)newSVOP(OP_CONST, 0,
6931 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6932 );
68dc0745 6933
6934 case KEY___PACKAGE__:
7eb971ee
FC
6935 FUN0OP(
6936 (OP*)newSVOP(OP_CONST, 0,
3280af22 6937 (PL_curstash
5aaec2b4 6938 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
6939 : &PL_sv_undef))
6940 );
79072805 6941
e50aee73 6942 case KEY___DATA__:
79072805
LW
6943 case KEY___END__: {
6944 GV *gv;
3280af22 6945 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6946 const char *pname = "main";
3280af22 6947 if (PL_tokenbuf[2] == 'D')
bfcb3514 6948 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6949 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6950 SVt_PVIO);
a5f75d66 6951 GvMULTI_on(gv);
79072805 6952 if (!GvIO(gv))
a0d0e21e 6953 GvIOp(gv) = newIO();
3280af22 6954 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6955#if defined(HAS_FCNTL) && defined(F_SETFD)
6956 {
f54cb97a 6957 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6958 fcntl(fd,F_SETFD,fd >= 3);
6959 }
79072805 6960#endif
fd049845 6961 /* Mark this internal pseudo-handle as clean */
6962 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6963 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6964 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6965 else
50952442 6966 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6967#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6968 /* if the script was opened in binmode, we need to revert
53129d29 6969 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6970 * XXX this is a questionable hack at best. */
53129d29
GS
6971 if (PL_bufend-PL_bufptr > 2
6972 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6973 {
6974 Off_t loc = 0;
50952442 6975 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6976 loc = PerlIO_tell(PL_rsfp);
6977 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6978 }
2986a63f
JH
6979#ifdef NETWARE
6980 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6981#else
c39cd008 6982 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6983#endif /* NETWARE */
c39cd008
GS
6984 if (loc > 0)
6985 PerlIO_seek(PL_rsfp, loc, 0);
6986 }
6987 }
6988#endif
7948272d 6989#ifdef PERLIO_LAYERS
52d2e0f4
JH
6990 if (!IN_BYTES) {
6991 if (UTF)
6992 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6993 else if (PL_encoding) {
6994 SV *name;
6995 dSP;
6996 ENTER;
6997 SAVETMPS;
6998 PUSHMARK(sp);
6999 EXTEND(SP, 1);
7000 XPUSHs(PL_encoding);
7001 PUTBACK;
7002 call_method("name", G_SCALAR);
7003 SPAGAIN;
7004 name = POPs;
7005 PUTBACK;
bfed75c6 7006 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7007 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7008 SVfARG(name)));
52d2e0f4
JH
7009 FREETMPS;
7010 LEAVE;
7011 }
7012 }
7948272d 7013#endif
5db06880
NC
7014#ifdef PERL_MAD
7015 if (PL_madskills) {
cd81e915
NC
7016 if (PL_realtokenstart >= 0) {
7017 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7018 if (!PL_endwhite)
6b29d1f5 7019 PL_endwhite = newSVpvs("");
cd81e915
NC
7020 sv_catsv(PL_endwhite, PL_thiswhite);
7021 PL_thiswhite = 0;
7022 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7023 PL_realtokenstart = -1;
5db06880 7024 }
5cc814fd
NC
7025 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7026 != NULL) ;
5db06880
NC
7027 }
7028#endif
4608196e 7029 PL_rsfp = NULL;
79072805
LW
7030 }
7031 goto fake_eof;
e929a76b 7032 }
de3bb511 7033
8990e307 7034 case KEY_AUTOLOAD:
ed6116ce 7035 case KEY_DESTROY:
79072805 7036 case KEY_BEGIN:
3c10abe3 7037 case KEY_UNITCHECK:
7d30b5c4 7038 case KEY_CHECK:
7d07dbc2 7039 case KEY_INIT:
7d30b5c4 7040 case KEY_END:
3280af22
NIS
7041 if (PL_expect == XSTATE) {
7042 s = PL_bufptr;
93a17b20 7043 goto really_sub;
79072805
LW
7044 }
7045 goto just_a_word;
7046
a0d0e21e
LW
7047 case KEY_CORE:
7048 if (*s == ':' && s[1] == ':') {
7049 s += 2;
748a9306 7050 d = s;
3280af22 7051 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
9dcb8368 7052 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
6798c92b 7053 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7054 if (tmp < 0)
7055 tmp = -tmp;
a5c70c4d 7056 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 7057 /* that's a way to remember we saw "CORE::" */
850e8516 7058 orig_keyword = tmp;
a0d0e21e
LW
7059 goto reserved_word;
7060 }
7061 goto just_a_word;
7062
463ee0b2
LW
7063 case KEY_abs:
7064 UNI(OP_ABS);
7065
79072805
LW
7066 case KEY_alarm:
7067 UNI(OP_ALARM);
7068
7069 case KEY_accept:
a0d0e21e 7070 LOP(OP_ACCEPT,XTERM);
79072805 7071
463ee0b2 7072 case KEY_and:
78cdf107
Z
7073 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7074 return REPORT(0);
463ee0b2
LW
7075 OPERATOR(ANDOP);
7076
79072805 7077 case KEY_atan2:
a0d0e21e 7078 LOP(OP_ATAN2,XTERM);
85e6fe83 7079
79072805 7080 case KEY_bind:
a0d0e21e 7081 LOP(OP_BIND,XTERM);
79072805
LW
7082
7083 case KEY_binmode:
1c1fc3ea 7084 LOP(OP_BINMODE,XTERM);
79072805
LW
7085
7086 case KEY_bless:
a0d0e21e 7087 LOP(OP_BLESS,XTERM);
79072805 7088
0d863452
RH
7089 case KEY_break:
7090 FUN0(OP_BREAK);
7091
79072805
LW
7092 case KEY_chop:
7093 UNI(OP_CHOP);
7094
7095 case KEY_continue:
0d863452
RH
7096 /* We have to disambiguate the two senses of
7097 "continue". If the next token is a '{' then
7098 treat it as the start of a continue block;
7099 otherwise treat it as a control operator.
7100 */
7101 s = skipspace(s);
7102 if (*s == '{')
79072805 7103 PREBLOCK(CONTINUE);
0d863452
RH
7104 else
7105 FUN0(OP_CONTINUE);
79072805
LW
7106
7107 case KEY_chdir:
fafc274c
NC
7108 /* may use HOME */
7109 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7110 UNI(OP_CHDIR);
7111
7112 case KEY_close:
7113 UNI(OP_CLOSE);
7114
7115 case KEY_closedir:
7116 UNI(OP_CLOSEDIR);
7117
7118 case KEY_cmp:
78cdf107
Z
7119 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7120 return REPORT(0);
79072805
LW
7121 Eop(OP_SCMP);
7122
7123 case KEY_caller:
7124 UNI(OP_CALLER);
7125
7126 case KEY_crypt:
7127#ifdef FCRYPT
f4c556ac
GS
7128 if (!PL_cryptseen) {
7129 PL_cryptseen = TRUE;
de3bb511 7130 init_des();
f4c556ac 7131 }
a687059c 7132#endif
a0d0e21e 7133 LOP(OP_CRYPT,XTERM);
79072805
LW
7134
7135 case KEY_chmod:
a0d0e21e 7136 LOP(OP_CHMOD,XTERM);
79072805
LW
7137
7138 case KEY_chown:
a0d0e21e 7139 LOP(OP_CHOWN,XTERM);
79072805
LW
7140
7141 case KEY_connect:
a0d0e21e 7142 LOP(OP_CONNECT,XTERM);
79072805 7143
463ee0b2
LW
7144 case KEY_chr:
7145 UNI(OP_CHR);
7146
79072805
LW
7147 case KEY_cos:
7148 UNI(OP_COS);
7149
7150 case KEY_chroot:
7151 UNI(OP_CHROOT);
7152
0d863452
RH
7153 case KEY_default:
7154 PREBLOCK(DEFAULT);
7155
79072805 7156 case KEY_do:
29595ff2 7157 s = SKIPSPACE1(s);
79072805 7158 if (*s == '{')
a0d0e21e 7159 PRETERMBLOCK(DO);
79072805 7160 if (*s != '\'')
89c5585f 7161 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7162 if (orig_keyword == KEY_do) {
7163 orig_keyword = 0;
6154021b 7164 pl_yylval.ival = 1;
850e8516
RGS
7165 }
7166 else
6154021b 7167 pl_yylval.ival = 0;
378cc40b 7168 OPERATOR(DO);
79072805
LW
7169
7170 case KEY_die:
3280af22 7171 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7172 LOP(OP_DIE,XTERM);
79072805
LW
7173
7174 case KEY_defined:
7175 UNI(OP_DEFINED);
7176
7177 case KEY_delete:
a0d0e21e 7178 UNI(OP_DELETE);
79072805
LW
7179
7180 case KEY_dbmopen:
74e8ce34
NC
7181 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7182 STR_WITH_LEN("NDBM_File::"),
7183 STR_WITH_LEN("DB_File::"),
7184 STR_WITH_LEN("GDBM_File::"),
7185 STR_WITH_LEN("SDBM_File::"),
7186 STR_WITH_LEN("ODBM_File::"),
7187 NULL);
a0d0e21e 7188 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7189
7190 case KEY_dbmclose:
7191 UNI(OP_DBMCLOSE);
7192
7193 case KEY_dump:
a0d0e21e 7194 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7195 LOOPX(OP_DUMP);
7196
7197 case KEY_else:
7198 PREBLOCK(ELSE);
7199
7200 case KEY_elsif:
6154021b 7201 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7202 OPERATOR(ELSIF);
7203
7204 case KEY_eq:
78cdf107
Z
7205 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7206 return REPORT(0);
79072805
LW
7207 Eop(OP_SEQ);
7208
a0d0e21e
LW
7209 case KEY_exists:
7210 UNI(OP_EXISTS);
4e553d73 7211
79072805 7212 case KEY_exit:
5db06880
NC
7213 if (PL_madskills)
7214 UNI(OP_INT);
79072805
LW
7215 UNI(OP_EXIT);
7216
7217 case KEY_eval:
29595ff2 7218 s = SKIPSPACE1(s);
32e2a35d
RGS
7219 if (*s == '{') { /* block eval */
7220 PL_expect = XTERMBLOCK;
7221 UNIBRACK(OP_ENTERTRY);
7222 }
7223 else { /* string eval */
7224 PL_expect = XTERM;
7225 UNIBRACK(OP_ENTEREVAL);
7226 }
79072805
LW
7227
7228 case KEY_eof:
7229 UNI(OP_EOF);
7230
7231 case KEY_exp:
7232 UNI(OP_EXP);
7233
7234 case KEY_each:
7235 UNI(OP_EACH);
7236
7237 case KEY_exec:
a0d0e21e 7238 LOP(OP_EXEC,XREF);
79072805
LW
7239
7240 case KEY_endhostent:
7241 FUN0(OP_EHOSTENT);
7242
7243 case KEY_endnetent:
7244 FUN0(OP_ENETENT);
7245
7246 case KEY_endservent:
7247 FUN0(OP_ESERVENT);
7248
7249 case KEY_endprotoent:
7250 FUN0(OP_EPROTOENT);
7251
7252 case KEY_endpwent:
7253 FUN0(OP_EPWENT);
7254
7255 case KEY_endgrent:
7256 FUN0(OP_EGRENT);
7257
7258 case KEY_for:
7259 case KEY_foreach:
78cdf107
Z
7260 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7261 return REPORT(0);
6154021b 7262 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7263 s = SKIPSPACE1(s);
7e2040f0 7264 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7265 char *p = s;
5db06880
NC
7266#ifdef PERL_MAD
7267 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7268#endif
7269
3280af22 7270 if ((PL_bufend - p) >= 3 &&
55497cff 7271 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7272 p += 2;
77ca0c92
LW
7273 else if ((PL_bufend - p) >= 4 &&
7274 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7275 p += 3;
29595ff2 7276 p = PEEKSPACE(p);
7e2040f0 7277 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7278 p = scan_ident(p, PL_bufend,
7279 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7280 p = PEEKSPACE(p);
77ca0c92
LW
7281 }
7282 if (*p != '$')
cea2e8a9 7283 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7284#ifdef PERL_MAD
7285 s = SvPVX(PL_linestr) + soff;
7286#endif
55497cff 7287 }
79072805
LW
7288 OPERATOR(FOR);
7289
7290 case KEY_formline:
a0d0e21e 7291 LOP(OP_FORMLINE,XTERM);
79072805
LW
7292
7293 case KEY_fork:
7294 FUN0(OP_FORK);
7295
7296 case KEY_fcntl:
a0d0e21e 7297 LOP(OP_FCNTL,XTERM);
79072805
LW
7298
7299 case KEY_fileno:
7300 UNI(OP_FILENO);
7301
7302 case KEY_flock:
a0d0e21e 7303 LOP(OP_FLOCK,XTERM);
79072805
LW
7304
7305 case KEY_gt:
78cdf107
Z
7306 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7307 return REPORT(0);
79072805
LW
7308 Rop(OP_SGT);
7309
7310 case KEY_ge:
78cdf107
Z
7311 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7312 return REPORT(0);
79072805
LW
7313 Rop(OP_SGE);
7314
7315 case KEY_grep:
2c38e13d 7316 LOP(OP_GREPSTART, XREF);
79072805
LW
7317
7318 case KEY_goto:
a0d0e21e 7319 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7320 LOOPX(OP_GOTO);
7321
7322 case KEY_gmtime:
7323 UNI(OP_GMTIME);
7324
7325 case KEY_getc:
6f33ba73 7326 UNIDOR(OP_GETC);
79072805
LW
7327
7328 case KEY_getppid:
7329 FUN0(OP_GETPPID);
7330
7331 case KEY_getpgrp:
7332 UNI(OP_GETPGRP);
7333
7334 case KEY_getpriority:
a0d0e21e 7335 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7336
7337 case KEY_getprotobyname:
7338 UNI(OP_GPBYNAME);
7339
7340 case KEY_getprotobynumber:
a0d0e21e 7341 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7342
7343 case KEY_getprotoent:
7344 FUN0(OP_GPROTOENT);
7345
7346 case KEY_getpwent:
7347 FUN0(OP_GPWENT);
7348
7349 case KEY_getpwnam:
ff68c719 7350 UNI(OP_GPWNAM);
79072805
LW
7351
7352 case KEY_getpwuid:
ff68c719 7353 UNI(OP_GPWUID);
79072805
LW
7354
7355 case KEY_getpeername:
7356 UNI(OP_GETPEERNAME);
7357
7358 case KEY_gethostbyname:
7359 UNI(OP_GHBYNAME);
7360
7361 case KEY_gethostbyaddr:
a0d0e21e 7362 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7363
7364 case KEY_gethostent:
7365 FUN0(OP_GHOSTENT);
7366
7367 case KEY_getnetbyname:
7368 UNI(OP_GNBYNAME);
7369
7370 case KEY_getnetbyaddr:
a0d0e21e 7371 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7372
7373 case KEY_getnetent:
7374 FUN0(OP_GNETENT);
7375
7376 case KEY_getservbyname:
a0d0e21e 7377 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7378
7379 case KEY_getservbyport:
a0d0e21e 7380 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7381
7382 case KEY_getservent:
7383 FUN0(OP_GSERVENT);
7384
7385 case KEY_getsockname:
7386 UNI(OP_GETSOCKNAME);
7387
7388 case KEY_getsockopt:
a0d0e21e 7389 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7390
7391 case KEY_getgrent:
7392 FUN0(OP_GGRENT);
7393
7394 case KEY_getgrnam:
ff68c719 7395 UNI(OP_GGRNAM);
79072805
LW
7396
7397 case KEY_getgrgid:
ff68c719 7398 UNI(OP_GGRGID);
79072805
LW
7399
7400 case KEY_getlogin:
7401 FUN0(OP_GETLOGIN);
7402
0d863452 7403 case KEY_given:
6154021b 7404 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7405 OPERATOR(GIVEN);
7406
93a17b20 7407 case KEY_glob:
a0d0e21e 7408 LOP(OP_GLOB,XTERM);
93a17b20 7409
79072805
LW
7410 case KEY_hex:
7411 UNI(OP_HEX);
7412
7413 case KEY_if:
78cdf107
Z
7414 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7415 return REPORT(0);
6154021b 7416 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7417 OPERATOR(IF);
7418
7419 case KEY_index:
a0d0e21e 7420 LOP(OP_INDEX,XTERM);
79072805
LW
7421
7422 case KEY_int:
7423 UNI(OP_INT);
7424
7425 case KEY_ioctl:
a0d0e21e 7426 LOP(OP_IOCTL,XTERM);
79072805
LW
7427
7428 case KEY_join:
a0d0e21e 7429 LOP(OP_JOIN,XTERM);
79072805
LW
7430
7431 case KEY_keys:
7432 UNI(OP_KEYS);
7433
7434 case KEY_kill:
a0d0e21e 7435 LOP(OP_KILL,XTERM);
79072805
LW
7436
7437 case KEY_last:
a0d0e21e 7438 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7439 LOOPX(OP_LAST);
4e553d73 7440
79072805
LW
7441 case KEY_lc:
7442 UNI(OP_LC);
7443
7444 case KEY_lcfirst:
7445 UNI(OP_LCFIRST);
7446
7447 case KEY_local:
6154021b 7448 pl_yylval.ival = 0;
79072805
LW
7449 OPERATOR(LOCAL);
7450
7451 case KEY_length:
7452 UNI(OP_LENGTH);
7453
7454 case KEY_lt:
78cdf107
Z
7455 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7456 return REPORT(0);
79072805
LW
7457 Rop(OP_SLT);
7458
7459 case KEY_le:
78cdf107
Z
7460 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7461 return REPORT(0);
79072805
LW
7462 Rop(OP_SLE);
7463
7464 case KEY_localtime:
7465 UNI(OP_LOCALTIME);
7466
7467 case KEY_log:
7468 UNI(OP_LOG);
7469
7470 case KEY_link:
a0d0e21e 7471 LOP(OP_LINK,XTERM);
79072805
LW
7472
7473 case KEY_listen:
a0d0e21e 7474 LOP(OP_LISTEN,XTERM);
79072805 7475
c0329465
MB
7476 case KEY_lock:
7477 UNI(OP_LOCK);
7478
79072805
LW
7479 case KEY_lstat:
7480 UNI(OP_LSTAT);
7481
7482 case KEY_m:
8782bef2 7483 s = scan_pat(s,OP_MATCH);
79072805
LW
7484 TERM(sublex_start());
7485
a0d0e21e 7486 case KEY_map:
2c38e13d 7487 LOP(OP_MAPSTART, XREF);
4e4e412b 7488
79072805 7489 case KEY_mkdir:
a0d0e21e 7490 LOP(OP_MKDIR,XTERM);
79072805
LW
7491
7492 case KEY_msgctl:
a0d0e21e 7493 LOP(OP_MSGCTL,XTERM);
79072805
LW
7494
7495 case KEY_msgget:
a0d0e21e 7496 LOP(OP_MSGGET,XTERM);
79072805
LW
7497
7498 case KEY_msgrcv:
a0d0e21e 7499 LOP(OP_MSGRCV,XTERM);
79072805
LW
7500
7501 case KEY_msgsnd:
a0d0e21e 7502 LOP(OP_MSGSND,XTERM);
79072805 7503
77ca0c92 7504 case KEY_our:
93a17b20 7505 case KEY_my:
952306ac 7506 case KEY_state:
eac04b2e 7507 PL_in_my = (U16)tmp;
29595ff2 7508 s = SKIPSPACE1(s);
7e2040f0 7509 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7510#ifdef PERL_MAD
7511 char* start = s;
7512#endif
3280af22 7513 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7514 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7515 goto really_sub;
def3634b 7516 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7517 if (!PL_in_my_stash) {
c750a3ec 7518 char tmpbuf[1024];
3280af22 7519 PL_bufptr = s;
d9fad198 7520 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7521 yyerror(tmpbuf);
7522 }
5db06880
NC
7523#ifdef PERL_MAD
7524 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7525 sv_catsv(PL_thistoken, PL_nextwhite);
7526 PL_nextwhite = 0;
7527 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7528 }
7529#endif
c750a3ec 7530 }
6154021b 7531 pl_yylval.ival = 1;
55497cff 7532 OPERATOR(MY);
93a17b20 7533
79072805 7534 case KEY_next:
a0d0e21e 7535 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7536 LOOPX(OP_NEXT);
7537
7538 case KEY_ne:
78cdf107
Z
7539 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7540 return REPORT(0);
79072805
LW
7541 Eop(OP_SNE);
7542
a0d0e21e 7543 case KEY_no:
468aa647 7544 s = tokenize_use(0, s);
a0d0e21e
LW
7545 OPERATOR(USE);
7546
7547 case KEY_not:
29595ff2 7548 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7549 FUN1(OP_NOT);
78cdf107
Z
7550 else {
7551 if (!PL_lex_allbrackets &&
7552 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7553 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7554 OPERATOR(NOTOP);
78cdf107 7555 }
a0d0e21e 7556
79072805 7557 case KEY_open:
29595ff2 7558 s = SKIPSPACE1(s);
7e2040f0 7559 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7560 const char *t;
c35e046a
AL
7561 for (d = s; isALNUM_lazy_if(d,UTF);)
7562 d++;
7563 for (t=d; isSPACE(*t);)
7564 t++;
e2ab214b 7565 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7566 /* [perl #16184] */
7567 && !(t[0] == '=' && t[1] == '>')
7568 ) {
5f66b61c 7569 int parms_len = (int)(d-s);
9014280d 7570 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7571 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7572 parms_len, s, parms_len, s);
66fbe8fb 7573 }
93a17b20 7574 }
a0d0e21e 7575 LOP(OP_OPEN,XTERM);
79072805 7576
463ee0b2 7577 case KEY_or:
78cdf107
Z
7578 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7579 return REPORT(0);
6154021b 7580 pl_yylval.ival = OP_OR;
463ee0b2
LW
7581 OPERATOR(OROP);
7582
79072805
LW
7583 case KEY_ord:
7584 UNI(OP_ORD);
7585
7586 case KEY_oct:
7587 UNI(OP_OCT);
7588
7589 case KEY_opendir:
a0d0e21e 7590 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7591
7592 case KEY_print:
3280af22 7593 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7594 LOP(OP_PRINT,XREF);
79072805
LW
7595
7596 case KEY_printf:
3280af22 7597 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7598 LOP(OP_PRTF,XREF);
79072805 7599
c07a80fd 7600 case KEY_prototype:
7601 UNI(OP_PROTOTYPE);
7602
79072805 7603 case KEY_push:
a0d0e21e 7604 LOP(OP_PUSH,XTERM);
79072805
LW
7605
7606 case KEY_pop:
6f33ba73 7607 UNIDOR(OP_POP);
79072805 7608
a0d0e21e 7609 case KEY_pos:
6f33ba73 7610 UNIDOR(OP_POS);
4e553d73 7611
79072805 7612 case KEY_pack:
a0d0e21e 7613 LOP(OP_PACK,XTERM);
79072805
LW
7614
7615 case KEY_package:
a0d0e21e 7616 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7617 s = SKIPSPACE1(s);
91152fc1 7618 s = force_strict_version(s);
4e4da3ac 7619 PL_lex_expect = XBLOCK;
79072805
LW
7620 OPERATOR(PACKAGE);
7621
7622 case KEY_pipe:
a0d0e21e 7623 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7624
7625 case KEY_q:
5db06880 7626 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7627 if (!s)
d4c19fe8 7628 missingterm(NULL);
6154021b 7629 pl_yylval.ival = OP_CONST;
79072805
LW
7630 TERM(sublex_start());
7631
a0d0e21e
LW
7632 case KEY_quotemeta:
7633 UNI(OP_QUOTEMETA);
7634
ea25a9b2
Z
7635 case KEY_qw: {
7636 OP *words = NULL;
5db06880 7637 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7638 if (!s)
d4c19fe8 7639 missingterm(NULL);
3480a8d2 7640 PL_expect = XOPERATOR;
8127e0e3 7641 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
7642 int warned_comma = !ckWARN(WARN_QW);
7643 int warned_comment = warned_comma;
3280af22 7644 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7645 while (len) {
d4c19fe8
AL
7646 for (; isSPACE(*d) && len; --len, ++d)
7647 /**/;
8127e0e3 7648 if (len) {
d4c19fe8 7649 SV *sv;
f54cb97a 7650 const char *b = d;
7e03b518 7651 if (!warned_comma || !warned_comment) {
8127e0e3 7652 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 7653 if (!warned_comma && *d == ',') {
9014280d 7654 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7655 "Possible attempt to separate words with commas");
7e03b518 7656 ++warned_comma;
8127e0e3 7657 }
7e03b518 7658 else if (!warned_comment && *d == '#') {
9014280d 7659 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7660 "Possible attempt to put comments in qw() list");
7e03b518 7661 ++warned_comment;
8127e0e3
GS
7662 }
7663 }
7664 }
7665 else {
d4c19fe8
AL
7666 for (; !isSPACE(*d) && len; --len, ++d)
7667 /**/;
8127e0e3 7668 }
740cce10 7669 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7670 words = op_append_elem(OP_LIST, words,
7948272d 7671 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7672 }
7673 }
7674 }
ea25a9b2
Z
7675 if (!words)
7676 words = newNULLLIST();
37fd879b 7677 if (PL_lex_stuff) {
8127e0e3 7678 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7679 PL_lex_stuff = NULL;
37fd879b 7680 }
ea25a9b2
Z
7681 PL_expect = XOPERATOR;
7682 pl_yylval.opval = sawparens(words);
7683 TOKEN(QWLIST);
7684 }
8990e307 7685
79072805 7686 case KEY_qq:
5db06880 7687 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7688 if (!s)
d4c19fe8 7689 missingterm(NULL);
6154021b 7690 pl_yylval.ival = OP_STRINGIFY;
3280af22 7691 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7692 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7693 TERM(sublex_start());
7694
8782bef2
GB
7695 case KEY_qr:
7696 s = scan_pat(s,OP_QR);
7697 TERM(sublex_start());
7698
79072805 7699 case KEY_qx:
5db06880 7700 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7701 if (!s)
d4c19fe8 7702 missingterm(NULL);
9b201d7d 7703 readpipe_override();
79072805
LW
7704 TERM(sublex_start());
7705
7706 case KEY_return:
7707 OLDLOP(OP_RETURN);
7708
7709 case KEY_require:
29595ff2 7710 s = SKIPSPACE1(s);
e759cc13
RGS
7711 if (isDIGIT(*s)) {
7712 s = force_version(s, FALSE);
a7cb1f99 7713 }
e759cc13
RGS
7714 else if (*s != 'v' || !isDIGIT(s[1])
7715 || (s = force_version(s, TRUE), *s == 'v'))
7716 {
a7cb1f99
GS
7717 *PL_tokenbuf = '\0';
7718 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7719 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7720 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7721 else if (*s == '<')
7722 yyerror("<> should be quotes");
7723 }
a72a1c8b
RGS
7724 if (orig_keyword == KEY_require) {
7725 orig_keyword = 0;
6154021b 7726 pl_yylval.ival = 1;
a72a1c8b
RGS
7727 }
7728 else
6154021b 7729 pl_yylval.ival = 0;
a72a1c8b
RGS
7730 PL_expect = XTERM;
7731 PL_bufptr = s;
7732 PL_last_uni = PL_oldbufptr;
7733 PL_last_lop_op = OP_REQUIRE;
7734 s = skipspace(s);
7735 return REPORT( (int)REQUIRE );
79072805
LW
7736
7737 case KEY_reset:
7738 UNI(OP_RESET);
7739
7740 case KEY_redo:
a0d0e21e 7741 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7742 LOOPX(OP_REDO);
7743
7744 case KEY_rename:
a0d0e21e 7745 LOP(OP_RENAME,XTERM);
79072805
LW
7746
7747 case KEY_rand:
7748 UNI(OP_RAND);
7749
7750 case KEY_rmdir:
7751 UNI(OP_RMDIR);
7752
7753 case KEY_rindex:
a0d0e21e 7754 LOP(OP_RINDEX,XTERM);
79072805
LW
7755
7756 case KEY_read:
a0d0e21e 7757 LOP(OP_READ,XTERM);
79072805
LW
7758
7759 case KEY_readdir:
7760 UNI(OP_READDIR);
7761
93a17b20 7762 case KEY_readline:
6f33ba73 7763 UNIDOR(OP_READLINE);
93a17b20
LW
7764
7765 case KEY_readpipe:
0858480c 7766 UNIDOR(OP_BACKTICK);
93a17b20 7767
79072805
LW
7768 case KEY_rewinddir:
7769 UNI(OP_REWINDDIR);
7770
7771 case KEY_recv:
a0d0e21e 7772 LOP(OP_RECV,XTERM);
79072805
LW
7773
7774 case KEY_reverse:
a0d0e21e 7775 LOP(OP_REVERSE,XTERM);
79072805
LW
7776
7777 case KEY_readlink:
6f33ba73 7778 UNIDOR(OP_READLINK);
79072805
LW
7779
7780 case KEY_ref:
7781 UNI(OP_REF);
7782
7783 case KEY_s:
7784 s = scan_subst(s);
6154021b 7785 if (pl_yylval.opval)
79072805
LW
7786 TERM(sublex_start());
7787 else
7788 TOKEN(1); /* force error */
7789
0d863452
RH
7790 case KEY_say:
7791 checkcomma(s,PL_tokenbuf,"filehandle");
7792 LOP(OP_SAY,XREF);
7793
a0d0e21e
LW
7794 case KEY_chomp:
7795 UNI(OP_CHOMP);
4e553d73 7796
79072805
LW
7797 case KEY_scalar:
7798 UNI(OP_SCALAR);
7799
7800 case KEY_select:
a0d0e21e 7801 LOP(OP_SELECT,XTERM);
79072805
LW
7802
7803 case KEY_seek:
a0d0e21e 7804 LOP(OP_SEEK,XTERM);
79072805
LW
7805
7806 case KEY_semctl:
a0d0e21e 7807 LOP(OP_SEMCTL,XTERM);
79072805
LW
7808
7809 case KEY_semget:
a0d0e21e 7810 LOP(OP_SEMGET,XTERM);
79072805
LW
7811
7812 case KEY_semop:
a0d0e21e 7813 LOP(OP_SEMOP,XTERM);
79072805
LW
7814
7815 case KEY_send:
a0d0e21e 7816 LOP(OP_SEND,XTERM);
79072805
LW
7817
7818 case KEY_setpgrp:
a0d0e21e 7819 LOP(OP_SETPGRP,XTERM);
79072805
LW
7820
7821 case KEY_setpriority:
a0d0e21e 7822 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7823
7824 case KEY_sethostent:
ff68c719 7825 UNI(OP_SHOSTENT);
79072805
LW
7826
7827 case KEY_setnetent:
ff68c719 7828 UNI(OP_SNETENT);
79072805
LW
7829
7830 case KEY_setservent:
ff68c719 7831 UNI(OP_SSERVENT);
79072805
LW
7832
7833 case KEY_setprotoent:
ff68c719 7834 UNI(OP_SPROTOENT);
79072805
LW
7835
7836 case KEY_setpwent:
7837 FUN0(OP_SPWENT);
7838
7839 case KEY_setgrent:
7840 FUN0(OP_SGRENT);
7841
7842 case KEY_seekdir:
a0d0e21e 7843 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7844
7845 case KEY_setsockopt:
a0d0e21e 7846 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7847
7848 case KEY_shift:
6f33ba73 7849 UNIDOR(OP_SHIFT);
79072805
LW
7850
7851 case KEY_shmctl:
a0d0e21e 7852 LOP(OP_SHMCTL,XTERM);
79072805
LW
7853
7854 case KEY_shmget:
a0d0e21e 7855 LOP(OP_SHMGET,XTERM);
79072805
LW
7856
7857 case KEY_shmread:
a0d0e21e 7858 LOP(OP_SHMREAD,XTERM);
79072805
LW
7859
7860 case KEY_shmwrite:
a0d0e21e 7861 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7862
7863 case KEY_shutdown:
a0d0e21e 7864 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7865
7866 case KEY_sin:
7867 UNI(OP_SIN);
7868
7869 case KEY_sleep:
7870 UNI(OP_SLEEP);
7871
7872 case KEY_socket:
a0d0e21e 7873 LOP(OP_SOCKET,XTERM);
79072805
LW
7874
7875 case KEY_socketpair:
a0d0e21e 7876 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7877
7878 case KEY_sort:
3280af22 7879 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7880 s = SKIPSPACE1(s);
79072805 7881 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7882 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7883 PL_expect = XTERM;
15f0808c 7884 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7885 LOP(OP_SORT,XREF);
79072805
LW
7886
7887 case KEY_split:
a0d0e21e 7888 LOP(OP_SPLIT,XTERM);
79072805
LW
7889
7890 case KEY_sprintf:
a0d0e21e 7891 LOP(OP_SPRINTF,XTERM);
79072805
LW
7892
7893 case KEY_splice:
a0d0e21e 7894 LOP(OP_SPLICE,XTERM);
79072805
LW
7895
7896 case KEY_sqrt:
7897 UNI(OP_SQRT);
7898
7899 case KEY_srand:
7900 UNI(OP_SRAND);
7901
7902 case KEY_stat:
7903 UNI(OP_STAT);
7904
7905 case KEY_study:
79072805
LW
7906 UNI(OP_STUDY);
7907
7908 case KEY_substr:
a0d0e21e 7909 LOP(OP_SUBSTR,XTERM);
79072805
LW
7910
7911 case KEY_format:
7912 case KEY_sub:
93a17b20 7913 really_sub:
09bef843 7914 {
3280af22 7915 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7916 SSize_t tboffset = 0;
09bef843 7917 expectation attrful;
28cc6278 7918 bool have_name, have_proto;
f54cb97a 7919 const int key = tmp;
09bef843 7920
5db06880
NC
7921#ifdef PERL_MAD
7922 SV *tmpwhite = 0;
7923
cd81e915 7924 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7925 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7926 PL_thistoken = 0;
5db06880
NC
7927
7928 d = s;
7929 s = SKIPSPACE2(s,tmpwhite);
7930#else
09bef843 7931 s = skipspace(s);
5db06880 7932#endif
09bef843 7933
7e2040f0 7934 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7935 (*s == ':' && s[1] == ':'))
7936 {
5db06880 7937#ifdef PERL_MAD
4f61fd4b 7938 SV *nametoke = NULL;
5db06880
NC
7939#endif
7940
09bef843
SB
7941 PL_expect = XBLOCK;
7942 attrful = XATTRBLOCK;
b1b65b59
JH
7943 /* remember buffer pos'n for later force_word */
7944 tboffset = s - PL_oldbufptr;
09bef843 7945 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7946#ifdef PERL_MAD
7947 if (PL_madskills)
7948 nametoke = newSVpvn(s, d - s);
7949#endif
6502358f
NC
7950 if (memchr(tmpbuf, ':', len))
7951 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7952 else {
7953 sv_setsv(PL_subname,PL_curstname);
396482e1 7954 sv_catpvs(PL_subname,"::");
09bef843
SB
7955 sv_catpvn(PL_subname,tmpbuf,len);
7956 }
09bef843 7957 have_name = TRUE;
5db06880
NC
7958
7959#ifdef PERL_MAD
7960
7961 start_force(0);
7962 CURMAD('X', nametoke);
7963 CURMAD('_', tmpwhite);
7964 (void) force_word(PL_oldbufptr + tboffset, WORD,
7965 FALSE, TRUE, TRUE);
7966
7967 s = SKIPSPACE2(d,tmpwhite);
7968#else
7969 s = skipspace(d);
7970#endif
09bef843 7971 }
463ee0b2 7972 else {
09bef843
SB
7973 if (key == KEY_my)
7974 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7975 PL_expect = XTERMBLOCK;
7976 attrful = XATTRTERM;
76f68e9b 7977 sv_setpvs(PL_subname,"?");
09bef843 7978 have_name = FALSE;
463ee0b2 7979 }
4633a7c4 7980
09bef843
SB
7981 if (key == KEY_format) {
7982 if (*s == '=')
7983 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7984#ifdef PERL_MAD
cd81e915 7985 PL_thistoken = subtoken;
5db06880
NC
7986 s = d;
7987#else
09bef843 7988 if (have_name)
b1b65b59
JH
7989 (void) force_word(PL_oldbufptr + tboffset, WORD,
7990 FALSE, TRUE, TRUE);
5db06880 7991#endif
09bef843
SB
7992 OPERATOR(FORMAT);
7993 }
79072805 7994
09bef843
SB
7995 /* Look for a prototype */
7996 if (*s == '(') {
d9f2850e
RGS
7997 char *p;
7998 bool bad_proto = FALSE;
9e8d7757
RB
7999 bool in_brackets = FALSE;
8000 char greedy_proto = ' ';
8001 bool proto_after_greedy_proto = FALSE;
8002 bool must_be_last = FALSE;
8003 bool underscore = FALSE;
aef2a98a 8004 bool seen_underscore = FALSE;
197afce1 8005 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 8006
5db06880 8007 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 8008 if (!s)
09bef843 8009 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8010 /* strip spaces and check for bad characters */
09bef843
SB
8011 d = SvPVX(PL_lex_stuff);
8012 tmp = 0;
d9f2850e
RGS
8013 for (p = d; *p; ++p) {
8014 if (!isSPACE(*p)) {
8015 d[tmp++] = *p;
9e8d7757 8016
197afce1 8017 if (warnillegalproto) {
9e8d7757
RB
8018 if (must_be_last)
8019 proto_after_greedy_proto = TRUE;
c035a075 8020 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
8021 bad_proto = TRUE;
8022 }
8023 else {
8024 if ( underscore ) {
8025 if ( *p != ';' )
8026 bad_proto = TRUE;
8027 underscore = FALSE;
8028 }
8029 if ( *p == '[' ) {
8030 in_brackets = TRUE;
8031 }
8032 else if ( *p == ']' ) {
8033 in_brackets = FALSE;
8034 }
8035 else if ( (*p == '@' || *p == '%') &&
8036 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8037 !in_brackets ) {
8038 must_be_last = TRUE;
8039 greedy_proto = *p;
8040 }
8041 else if ( *p == '_' ) {
aef2a98a 8042 underscore = seen_underscore = TRUE;
9e8d7757
RB
8043 }
8044 }
8045 }
d37a9538 8046 }
09bef843 8047 }
d9f2850e 8048 d[tmp] = '\0';
9e8d7757 8049 if (proto_after_greedy_proto)
197afce1 8050 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8051 "Prototype after '%c' for %"SVf" : %s",
8052 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 8053 if (bad_proto)
197afce1 8054 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8055 "Illegal character %sin prototype for %"SVf" : %s",
8056 seen_underscore ? "after '_' " : "",
be2597df 8057 SVfARG(PL_subname), d);
b162af07 8058 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8059 have_proto = TRUE;
68dc0745 8060
5db06880
NC
8061#ifdef PERL_MAD
8062 start_force(0);
cd81e915 8063 CURMAD('q', PL_thisopen);
5db06880 8064 CURMAD('_', tmpwhite);
cd81e915
NC
8065 CURMAD('=', PL_thisstuff);
8066 CURMAD('Q', PL_thisclose);
5db06880
NC
8067 NEXTVAL_NEXTTOKE.opval =
8068 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8069 PL_lex_stuff = NULL;
5db06880
NC
8070 force_next(THING);
8071
8072 s = SKIPSPACE2(s,tmpwhite);
8073#else
09bef843 8074 s = skipspace(s);
5db06880 8075#endif
4633a7c4 8076 }
09bef843
SB
8077 else
8078 have_proto = FALSE;
8079
8080 if (*s == ':' && s[1] != ':')
8081 PL_expect = attrful;
8e742a20
MHM
8082 else if (*s != '{' && key == KEY_sub) {
8083 if (!have_name)
8084 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8085 else if (*s != ';' && *s != '}')
be2597df 8086 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8087 }
09bef843 8088
5db06880
NC
8089#ifdef PERL_MAD
8090 start_force(0);
8091 if (tmpwhite) {
8092 if (PL_madskills)
6b29d1f5 8093 curmad('^', newSVpvs(""));
5db06880
NC
8094 CURMAD('_', tmpwhite);
8095 }
8096 force_next(0);
8097
cd81e915 8098 PL_thistoken = subtoken;
5db06880 8099#else
09bef843 8100 if (have_proto) {
9ded7720 8101 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8102 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8103 PL_lex_stuff = NULL;
09bef843 8104 force_next(THING);
68dc0745 8105 }
5db06880 8106#endif
09bef843 8107 if (!have_name) {
49a54bbe
NC
8108 if (PL_curstash)
8109 sv_setpvs(PL_subname, "__ANON__");
8110 else
8111 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8112 TOKEN(ANONSUB);
4633a7c4 8113 }
5db06880 8114#ifndef PERL_MAD
b1b65b59
JH
8115 (void) force_word(PL_oldbufptr + tboffset, WORD,
8116 FALSE, TRUE, TRUE);
5db06880 8117#endif
09bef843
SB
8118 if (key == KEY_my)
8119 TOKEN(MYSUB);
8120 TOKEN(SUB);
4633a7c4 8121 }
79072805
LW
8122
8123 case KEY_system:
a0d0e21e 8124 LOP(OP_SYSTEM,XREF);
79072805
LW
8125
8126 case KEY_symlink:
a0d0e21e 8127 LOP(OP_SYMLINK,XTERM);
79072805
LW
8128
8129 case KEY_syscall:
a0d0e21e 8130 LOP(OP_SYSCALL,XTERM);
79072805 8131
c07a80fd 8132 case KEY_sysopen:
8133 LOP(OP_SYSOPEN,XTERM);
8134
137443ea 8135 case KEY_sysseek:
8136 LOP(OP_SYSSEEK,XTERM);
8137
79072805 8138 case KEY_sysread:
a0d0e21e 8139 LOP(OP_SYSREAD,XTERM);
79072805
LW
8140
8141 case KEY_syswrite:
a0d0e21e 8142 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8143
8144 case KEY_tr:
8145 s = scan_trans(s);
8146 TERM(sublex_start());
8147
8148 case KEY_tell:
8149 UNI(OP_TELL);
8150
8151 case KEY_telldir:
8152 UNI(OP_TELLDIR);
8153
463ee0b2 8154 case KEY_tie:
a0d0e21e 8155 LOP(OP_TIE,XTERM);
463ee0b2 8156
c07a80fd 8157 case KEY_tied:
8158 UNI(OP_TIED);
8159
79072805
LW
8160 case KEY_time:
8161 FUN0(OP_TIME);
8162
8163 case KEY_times:
8164 FUN0(OP_TMS);
8165
8166 case KEY_truncate:
a0d0e21e 8167 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8168
8169 case KEY_uc:
8170 UNI(OP_UC);
8171
8172 case KEY_ucfirst:
8173 UNI(OP_UCFIRST);
8174
463ee0b2
LW
8175 case KEY_untie:
8176 UNI(OP_UNTIE);
8177
79072805 8178 case KEY_until:
78cdf107
Z
8179 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8180 return REPORT(0);
6154021b 8181 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8182 OPERATOR(UNTIL);
8183
8184 case KEY_unless:
78cdf107
Z
8185 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8186 return REPORT(0);
6154021b 8187 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8188 OPERATOR(UNLESS);
8189
8190 case KEY_unlink:
a0d0e21e 8191 LOP(OP_UNLINK,XTERM);
79072805
LW
8192
8193 case KEY_undef:
6f33ba73 8194 UNIDOR(OP_UNDEF);
79072805
LW
8195
8196 case KEY_unpack:
a0d0e21e 8197 LOP(OP_UNPACK,XTERM);
79072805
LW
8198
8199 case KEY_utime:
a0d0e21e 8200 LOP(OP_UTIME,XTERM);
79072805
LW
8201
8202 case KEY_umask:
6f33ba73 8203 UNIDOR(OP_UMASK);
79072805
LW
8204
8205 case KEY_unshift:
a0d0e21e
LW
8206 LOP(OP_UNSHIFT,XTERM);
8207
8208 case KEY_use:
468aa647 8209 s = tokenize_use(1, s);
a0d0e21e 8210 OPERATOR(USE);
79072805
LW
8211
8212 case KEY_values:
8213 UNI(OP_VALUES);
8214
8215 case KEY_vec:
a0d0e21e 8216 LOP(OP_VEC,XTERM);
79072805 8217
0d863452 8218 case KEY_when:
78cdf107
Z
8219 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8220 return REPORT(0);
6154021b 8221 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8222 OPERATOR(WHEN);
8223
79072805 8224 case KEY_while:
78cdf107
Z
8225 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8226 return REPORT(0);
6154021b 8227 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8228 OPERATOR(WHILE);
8229
8230 case KEY_warn:
3280af22 8231 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8232 LOP(OP_WARN,XTERM);
79072805
LW
8233
8234 case KEY_wait:
8235 FUN0(OP_WAIT);
8236
8237 case KEY_waitpid:
a0d0e21e 8238 LOP(OP_WAITPID,XTERM);
79072805
LW
8239
8240 case KEY_wantarray:
8241 FUN0(OP_WANTARRAY);
8242
8243 case KEY_write:
9d116dd7
JH
8244#ifdef EBCDIC
8245 {
df3728a2
JH
8246 char ctl_l[2];
8247 ctl_l[0] = toCTRL('L');
8248 ctl_l[1] = '\0';
fafc274c 8249 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8250 }
8251#else
fafc274c
NC
8252 /* Make sure $^L is defined */
8253 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8254#endif
79072805
LW
8255 UNI(OP_ENTERWRITE);
8256
8257 case KEY_x:
78cdf107
Z
8258 if (PL_expect == XOPERATOR) {
8259 if (*s == '=' && !PL_lex_allbrackets &&
8260 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8261 return REPORT(0);
79072805 8262 Mop(OP_REPEAT);
78cdf107 8263 }
79072805
LW
8264 check_uni();
8265 goto just_a_word;
8266
a0d0e21e 8267 case KEY_xor:
78cdf107
Z
8268 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8269 return REPORT(0);
6154021b 8270 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8271 OPERATOR(OROP);
8272
79072805
LW
8273 case KEY_y:
8274 s = scan_trans(s);
8275 TERM(sublex_start());
8276 }
49dc05e3 8277 }}
79072805 8278}
bf4acbe4
GS
8279#ifdef __SC__
8280#pragma segment Main
8281#endif
79072805 8282
e930465f
JH
8283static int
8284S_pending_ident(pTHX)
8eceec63 8285{
97aff369 8286 dVAR;
8eceec63 8287 register char *d;
bbd11bfc 8288 PADOFFSET tmp = 0;
8eceec63
SC
8289 /* pit holds the identifier we read and pending_ident is reset */
8290 char pit = PL_pending_ident;
9bde8eb0
NC
8291 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8292 /* All routes through this function want to know if there is a colon. */
c099d646 8293 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8294 PL_pending_ident = 0;
8295
cd81e915 8296 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8297 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8298 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8299
8300 /* if we're in a my(), we can't allow dynamics here.
8301 $foo'bar has already been turned into $foo::bar, so
8302 just check for colons.
8303
8304 if it's a legal name, the OP is a PADANY.
8305 */
8306 if (PL_in_my) {
8307 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8308 if (has_colon)
8eceec63
SC
8309 yyerror(Perl_form(aTHX_ "No package name allowed for "
8310 "variable %s in \"our\"",
8311 PL_tokenbuf));
bc9b26ca 8312 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8313 }
8314 else {
9bde8eb0 8315 if (has_colon)
952306ac
RGS
8316 yyerror(Perl_form(aTHX_ PL_no_myglob,
8317 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8318
6154021b 8319 pl_yylval.opval = newOP(OP_PADANY, 0);
bc9b26ca
BF
8320 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8321 UTF ? SVf_UTF8 : 0);
8eceec63
SC
8322 return PRIVATEREF;
8323 }
8324 }
8325
8326 /*
8327 build the ops for accesses to a my() variable.
8328
8329 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8330 then used in a comparison. This catches most, but not
8331 all cases. For instance, it catches
8332 sort { my($a); $a <=> $b }
8333 but not
8334 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8335 (although why you'd do that is anyone's guess).
8336 */
8337
9bde8eb0 8338 if (!has_colon) {
8716503d 8339 if (!PL_in_my)
bc9b26ca
BF
8340 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8341 UTF ? SVf_UTF8 : 0);
8716503d 8342 if (tmp != NOT_IN_PAD) {
8eceec63 8343 /* might be an "our" variable" */
00b1698f 8344 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8345 /* build ops for a bareword */
b64e5050
AL
8346 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8347 HEK * const stashname = HvNAME_HEK(stash);
8348 SV * const sym = newSVhek(stashname);
396482e1 8349 sv_catpvs(sym, "::");
9bde8eb0 8350 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8351 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8352 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8353 gv_fetchsv(sym,
8eceec63
SC
8354 (PL_in_eval
8355 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8356 : GV_ADDMULTI
8eceec63
SC
8357 ),
8358 ((PL_tokenbuf[0] == '$') ? SVt_PV
8359 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8360 : SVt_PVHV));
8361 return WORD;
8362 }
8363
8364 /* if it's a sort block and they're naming $a or $b */
8365 if (PL_last_lop_op == OP_SORT &&
8366 PL_tokenbuf[0] == '$' &&
8367 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8368 && !PL_tokenbuf[2])
8369 {
8370 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8371 d < PL_bufend && *d != '\n';
8372 d++)
8373 {
8374 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8375 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8376 PL_tokenbuf);
8377 }
8378 }
8379 }
8380
6154021b
RGS
8381 pl_yylval.opval = newOP(OP_PADANY, 0);
8382 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8383 return PRIVATEREF;
8384 }
8385 }
8386
8387 /*
8388 Whine if they've said @foo in a doublequoted string,
8389 and @foo isn't a variable we can find in the symbol
8390 table.
8391 */
d824713b
NC
8392 if (ckWARN(WARN_AMBIGUOUS) &&
8393 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8394 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8395 SVt_PVAV);
8eceec63 8396 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8397 /* DO NOT warn for @- and @+ */
8398 && !( PL_tokenbuf[2] == '\0' &&
8399 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8400 )
8eceec63
SC
8401 {
8402 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8403 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8404 "Possible unintended interpolation of %s in string",
8405 PL_tokenbuf);
8eceec63
SC
8406 }
8407 }
8408
8409 /* build ops for a bareword */
6154021b 8410 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8411 tokenbuf_len - 1));
6154021b 8412 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8413 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8414 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8415 ((PL_tokenbuf[0] == '$') ? SVt_PV
8416 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8417 : SVt_PVHV));
8eceec63
SC
8418 return WORD;
8419}
8420
76e3520e 8421STATIC void
c94115d8 8422S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8423{
97aff369 8424 dVAR;
2f3197b3 8425
7918f24d
NC
8426 PERL_ARGS_ASSERT_CHECKCOMMA;
8427
d008e5eb 8428 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8429 if (ckWARN(WARN_SYNTAX)) {
8430 int level = 1;
26ff0806 8431 const char *w;
d008e5eb
GS
8432 for (w = s+2; *w && level; w++) {
8433 if (*w == '(')
8434 ++level;
8435 else if (*w == ')')
8436 --level;
8437 }
888fea98
NC
8438 while (isSPACE(*w))
8439 ++w;
b1439985
RGS
8440 /* the list of chars below is for end of statements or
8441 * block / parens, boolean operators (&&, ||, //) and branch
8442 * constructs (or, and, if, until, unless, while, err, for).
8443 * Not a very solid hack... */
8444 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8445 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8446 "%s (...) interpreted as function",name);
d008e5eb 8447 }
2f3197b3 8448 }
3280af22 8449 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8450 s++;
a687059c
LW
8451 if (*s == '(')
8452 s++;
3280af22 8453 while (s < PL_bufend && isSPACE(*s))
a687059c 8454 s++;
7e2040f0 8455 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8456 const char * const w = s++;
7e2040f0 8457 while (isALNUM_lazy_if(s,UTF))
a687059c 8458 s++;
3280af22 8459 while (s < PL_bufend && isSPACE(*s))
a687059c 8460 s++;
e929a76b 8461 if (*s == ',') {
c94115d8 8462 GV* gv;
5458a98a 8463 if (keyword(w, s - w, 0))
e929a76b 8464 return;
c94115d8
NC
8465
8466 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
8467 if (gv && GvCVu(gv))
abbb3198 8468 return;
cea2e8a9 8469 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8470 }
8471 }
8472}
8473
423cee85
JH
8474/* Either returns sv, or mortalizes sv and returns a new SV*.
8475 Best used as sv=new_constant(..., sv, ...).
8476 If s, pv are NULL, calls subroutine with one argument,
8477 and type is used with error messages only. */
8478
b3ac6de7 8479STATIC SV *
eb0d8d16
NC
8480S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8481 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8482{
27da23d5 8483 dVAR; dSP;
890ce7af 8484 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8485 SV *res;
b3ac6de7
IZ
8486 SV **cvp;
8487 SV *cv, *typesv;
89e33a05 8488 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8489
7918f24d
NC
8490 PERL_ARGS_ASSERT_NEW_CONSTANT;
8491
f0af216f 8492 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
8493 SV *msg;
8494
10edeb5d
JH
8495 why2 = (const char *)
8496 (strEQ(key,"charnames")
8497 ? "(possibly a missing \"use charnames ...\")"
8498 : "");
4e553d73 8499 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
8500 (type ? type: "undef"), why2);
8501
8502 /* This is convoluted and evil ("goto considered harmful")
8503 * but I do not understand the intricacies of all the different
8504 * failure modes of %^H in here. The goal here is to make
8505 * the most probable error message user-friendly. --jhi */
8506
8507 goto msgdone;
8508
423cee85 8509 report:
4e553d73 8510 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8511 (type ? type: "undef"), why1, why2, why3);
41ab332f 8512 msgdone:
95a20fc0 8513 yyerror(SvPVX_const(msg));
423cee85
JH
8514 SvREFCNT_dec(msg);
8515 return sv;
8516 }
ff3f963a
KW
8517
8518 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
8519 if (PL_error_count > 0 && strEQ(key,"charnames"))
8520 return &PL_sv_undef;
ff3f963a 8521
eb0d8d16 8522 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 8523 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
8524 why1 = "$^H{";
8525 why2 = key;
f0af216f 8526 why3 = "} is not defined";
423cee85 8527 goto report;
b3ac6de7
IZ
8528 }
8529 sv_2mortal(sv); /* Parent created it permanently */
8530 cv = *cvp;
423cee85 8531 if (!pv && s)
59cd0e26 8532 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8533 if (type && pv)
59cd0e26 8534 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8535 else
423cee85 8536 typesv = &PL_sv_undef;
4e553d73 8537
e788e7d3 8538 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8539 ENTER ;
8540 SAVETMPS;
4e553d73 8541
423cee85 8542 PUSHMARK(SP) ;
a5845cb7 8543 EXTEND(sp, 3);
423cee85
JH
8544 if (pv)
8545 PUSHs(pv);
b3ac6de7 8546 PUSHs(sv);
423cee85
JH
8547 if (pv)
8548 PUSHs(typesv);
b3ac6de7 8549 PUTBACK;
423cee85 8550 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8551
423cee85 8552 SPAGAIN ;
4e553d73 8553
423cee85 8554 /* Check the eval first */
9b0e499b 8555 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8556 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8557 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8558 (void)POPs;
b37c2d43 8559 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8560 }
8561 else {
8562 res = POPs;
b37c2d43 8563 SvREFCNT_inc_simple_void(res);
423cee85 8564 }
4e553d73 8565
423cee85
JH
8566 PUTBACK ;
8567 FREETMPS ;
8568 LEAVE ;
b3ac6de7 8569 POPSTACK;
4e553d73 8570
b3ac6de7 8571 if (!SvOK(res)) {
423cee85
JH
8572 why1 = "Call to &{$^H{";
8573 why2 = key;
f0af216f 8574 why3 = "}} did not return a defined value";
423cee85
JH
8575 sv = res;
8576 goto report;
9b0e499b 8577 }
423cee85 8578
9b0e499b 8579 return res;
b3ac6de7 8580}
4e553d73 8581
d0a148a6
NC
8582/* Returns a NUL terminated string, with the length of the string written to
8583 *slp
8584 */
76e3520e 8585STATIC char *
cea2e8a9 8586S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8587{
97aff369 8588 dVAR;
463ee0b2 8589 register char *d = dest;
890ce7af 8590 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8591
8592 PERL_ARGS_ASSERT_SCAN_WORD;
8593
463ee0b2 8594 for (;;) {
8903cb82 8595 if (d >= e)
cea2e8a9 8596 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8597 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8598 *d++ = *s++;
c35e046a 8599 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8600 *d++ = ':';
8601 *d++ = ':';
8602 s++;
8603 }
c35e046a 8604 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8605 *d++ = *s++;
8606 *d++ = *s++;
8607 }
fd400ab9 8608 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8609 char *t = s + UTF8SKIP(s);
c35e046a 8610 size_t len;
fd400ab9 8611 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8612 t += UTF8SKIP(t);
c35e046a
AL
8613 len = t - s;
8614 if (d + len > e)
cea2e8a9 8615 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8616 Copy(s, d, len, char);
8617 d += len;
a0ed51b3
LW
8618 s = t;
8619 }
463ee0b2
LW
8620 else {
8621 *d = '\0';
8622 *slp = d - dest;
8623 return s;
e929a76b 8624 }
378cc40b
LW
8625 }
8626}
8627
76e3520e 8628STATIC char *
f54cb97a 8629S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8630{
97aff369 8631 dVAR;
6136c704 8632 char *bracket = NULL;
748a9306 8633 char funny = *s++;
6136c704 8634 register char *d = dest;
0b3da58d 8635 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8636
7918f24d
NC
8637 PERL_ARGS_ASSERT_SCAN_IDENT;
8638
a0d0e21e 8639 if (isSPACE(*s))
29595ff2 8640 s = PEEKSPACE(s);
de3bb511 8641 if (isDIGIT(*s)) {
8903cb82 8642 while (isDIGIT(*s)) {
8643 if (d >= e)
cea2e8a9 8644 Perl_croak(aTHX_ ident_too_long);
378cc40b 8645 *d++ = *s++;
8903cb82 8646 }
378cc40b
LW
8647 }
8648 else {
463ee0b2 8649 for (;;) {
8903cb82 8650 if (d >= e)
cea2e8a9 8651 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8652 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8653 *d++ = *s++;
7e2040f0 8654 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8655 *d++ = ':';
8656 *d++ = ':';
8657 s++;
8658 }
a0d0e21e 8659 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8660 *d++ = *s++;
8661 *d++ = *s++;
8662 }
fd400ab9 8663 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8664 char *t = s + UTF8SKIP(s);
fd400ab9 8665 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8666 t += UTF8SKIP(t);
8667 if (d + (t - s) > e)
cea2e8a9 8668 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8669 Copy(s, d, t - s, char);
8670 d += t - s;
8671 s = t;
8672 }
463ee0b2
LW
8673 else
8674 break;
8675 }
378cc40b
LW
8676 }
8677 *d = '\0';
8678 d = dest;
79072805 8679 if (*d) {
3280af22
NIS
8680 if (PL_lex_state != LEX_NORMAL)
8681 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8682 return s;
378cc40b 8683 }
748a9306 8684 if (*s == '$' && s[1] &&
3792a11b 8685 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8686 {
4810e5ec 8687 return s;
5cd24f17 8688 }
79072805
LW
8689 if (*s == '{') {
8690 bracket = s;
8691 s++;
8692 }
8693 else if (ck_uni)
8694 check_uni();
93a17b20 8695 if (s < send)
79072805
LW
8696 *d = *s++;
8697 d[1] = '\0';
2b92dfce 8698 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8699 *d = toCTRL(*s);
8700 s++;
de3bb511 8701 }
79072805 8702 if (bracket) {
748a9306 8703 if (isSPACE(s[-1])) {
fa83b5b6 8704 while (s < send) {
f54cb97a 8705 const char ch = *s++;
bf4acbe4 8706 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8707 *d = ch;
8708 break;
8709 }
8710 }
748a9306 8711 }
7e2040f0 8712 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 8713 d++;
a0ed51b3 8714 if (UTF) {
6136c704
AL
8715 char *end = s;
8716 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8717 end += UTF8SKIP(end);
8718 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8719 end += UTF8SKIP(end);
a0ed51b3 8720 }
6136c704
AL
8721 Copy(s, d, end - s, char);
8722 d += end - s;
8723 s = end;
a0ed51b3
LW
8724 }
8725 else {
2b92dfce 8726 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8727 *d++ = *s++;
2b92dfce 8728 if (d >= e)
cea2e8a9 8729 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8730 }
79072805 8731 *d = '\0';
c35e046a
AL
8732 while (s < send && SPACE_OR_TAB(*s))
8733 s++;
ff68c719 8734 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8735 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8736 const char * const brack =
8737 (const char *)
8738 ((*s == '[') ? "[...]" : "{...}");
e850844c 8739 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8740 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8741 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8742 funny, dest, brack, funny, dest, brack);
8743 }
79072805 8744 bracket++;
a0be28da 8745 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8746 PL_lex_allbrackets++;
79072805
LW
8747 return s;
8748 }
4e553d73
NIS
8749 }
8750 /* Handle extended ${^Foo} variables
2b92dfce
GS
8751 * 1999-02-27 mjd-perl-patch@plover.com */
8752 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8753 && isALNUM(*s))
8754 {
8755 d++;
8756 while (isALNUM(*s) && d < e) {
8757 *d++ = *s++;
8758 }
8759 if (d >= e)
cea2e8a9 8760 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8761 *d = '\0';
79072805
LW
8762 }
8763 if (*s == '}') {
8764 s++;
7df0d042 8765 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8766 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8767 PL_expect = XREF;
8768 }
d008e5eb 8769 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8770 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8771 (keyword(dest, d - dest, 0)
8772 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8773 {
c35e046a
AL
8774 if (funny == '#')
8775 funny = '@';
9014280d 8776 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8777 "Ambiguous use of %c{%s} resolved to %c%s",
8778 funny, dest, funny, dest);
8779 }
8780 }
79072805
LW
8781 }
8782 else {
8783 s = bracket; /* let the parser handle it */
93a17b20 8784 *dest = '\0';
79072805
LW
8785 }
8786 }
3280af22
NIS
8787 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8788 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8789 return s;
8790}
8791
858a358b 8792static bool
3955e1a9 8793S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
8794
8795 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8796 * the parse starting at 's', based on the subset that are valid in this
8797 * context input to this routine in 'valid_flags'. Advances s. Returns
8798 * TRUE if the input was a valid flag, so the next char may be as well;
3955e1a9
KW
8799 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8800 * current regex. This routine will set it to any charset modifier found.
8801 * The caller shouldn't change it. This way, another charset modifier
8802 * encountered in the parse can be detected as an error, as we have decided
8803 * allow only one */
858a358b
KW
8804
8805 const char c = **s;
94b03d7d 8806
858a358b
KW
8807 if (! strchr(valid_flags, c)) {
8808 if (isALNUM(c)) {
94b03d7d 8809 goto deprecate;
858a358b
KW
8810 }
8811 return FALSE;
8812 }
8813
8814 switch (c) {
94b03d7d 8815
858a358b
KW
8816 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8817 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8818 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8819 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8820 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8821 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d
KW
8822 case LOCALE_PAT_MOD:
8823
8824 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8825 * can't be regex modifiers.
8826 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8827 * much as easily done. s///lei, for example, has to mean regex
8828 * modifiers if it's not an error (as does any word character
8829 * following the 'e'). Otherwise, we resolve to the backwards-
8830 * compatible, but less likely 's/// le ...', i.e. as meaning
8831 * less-than-or-equal. The reason it's not likely is that s//
967ac236
KW
8832 * returns a number for code in the field (/r returns a string, but
8833 * that wasn't added until the 5.13 series), and so '<=' should be
8834 * used for comparing, not 'le'. */
94b03d7d
KW
8835 if (*((*s) + 1) == 't') {
8836 goto deprecate;
8837 }
563734a5 8838 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
aeac89a5
KW
8839
8840 /* 'e' is valid only for substitutes, s///e. If it is not
8841 * valid in the current context, then 'm//le' must mean the
8842 * comparison operator, so use the regular deprecation message.
8843 */
8844 if (! strchr(valid_flags, 'e')) {
8845 goto deprecate;
8846 }
94b03d7d
KW
8847 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8848 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8849 return FALSE;
8850 }
3955e1a9
KW
8851 if (*charset) {
8852 goto multiple_charsets;
8853 }
94b03d7d 8854 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 8855 *charset = c;
94b03d7d
KW
8856 break;
8857 case UNICODE_PAT_MOD:
8858 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8859 * 'n' means they can't be regex modifiers */
8860 if (*((*s) + 1) == 'n') {
8861 goto deprecate;
8862 }
3955e1a9
KW
8863 if (*charset) {
8864 goto multiple_charsets;
8865 }
94b03d7d 8866 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 8867 *charset = c;
94b03d7d
KW
8868 break;
8869 case ASCII_RESTRICT_PAT_MOD:
8870 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8871 * can't be regex modifiers */
8872 if (*((*s) + 1) == 'n') {
8873 goto deprecate;
8874 }
ff3f26d2
KW
8875
8876 if (! *charset) {
94b03d7d
KW
8877 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8878 }
ff3f26d2
KW
8879 else {
8880
8881 /* Error if previous modifier wasn't an 'a', but if it was, see
8882 * if, and accept, a second occurrence (only) */
8883 if (*charset != 'a'
8884 || get_regex_charset(*pmfl)
8885 != REGEX_ASCII_RESTRICTED_CHARSET)
8886 {
8887 goto multiple_charsets;
8888 }
8889 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
8890 }
8891 *charset = c;
94b03d7d
KW
8892 break;
8893 case DEPENDS_PAT_MOD:
3955e1a9
KW
8894 if (*charset) {
8895 goto multiple_charsets;
8896 }
94b03d7d 8897 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 8898 *charset = c;
94b03d7d 8899 break;
879d0c72 8900 }
94b03d7d 8901
858a358b
KW
8902 (*s)++;
8903 return TRUE;
94b03d7d
KW
8904
8905 deprecate:
8906 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8907 "Having no space between pattern and following word is deprecated");
8908 return FALSE;
3955e1a9
KW
8909
8910 multiple_charsets:
8911 if (*charset != c) {
8912 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8913 }
ff3f26d2
KW
8914 else if (c == 'a') {
8915 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8916 }
3955e1a9
KW
8917 else {
8918 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8919 }
8920
8921 /* Pretend that it worked, so will continue processing before dieing */
8922 (*s)++;
8923 return TRUE;
879d0c72
NC
8924}
8925
76e3520e 8926STATIC char *
cea2e8a9 8927S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 8928{
97aff369 8929 dVAR;
79072805 8930 PMOP *pm;
5db06880 8931 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 8932 const char * const valid_flags =
a20207d7 8933 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 8934 char charset = '\0'; /* character set modifier */
5db06880
NC
8935#ifdef PERL_MAD
8936 char *modstart;
8937#endif
8938
7918f24d 8939 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 8940
25c09cbf 8941 if (!s) {
6136c704 8942 const char * const delimiter = skipspace(start);
10edeb5d
JH
8943 Perl_croak(aTHX_
8944 (const char *)
8945 (*delimiter == '?'
8946 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8947 : "Search pattern not terminated" ));
25c09cbf 8948 }
bbce6d69 8949
8782bef2 8950 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
8951 if (PL_multi_open == '?') {
8952 /* This is the only point in the code that sets PMf_ONCE: */
79072805 8953 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
8954
8955 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8956 allows us to restrict the list needed by reset to just the ??
8957 matches. */
8958 assert(type != OP_TRANS);
8959 if (PL_curstash) {
daba3364 8960 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
8961 U32 elements;
8962 if (!mg) {
daba3364 8963 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
8964 0);
8965 }
8966 elements = mg->mg_len / sizeof(PMOP**);
8967 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
8968 ((PMOP**)mg->mg_ptr) [elements++] = pm;
8969 mg->mg_len = elements * sizeof(PMOP**);
8970 PmopSTASH_set(pm,PL_curstash);
8971 }
8972 }
5db06880
NC
8973#ifdef PERL_MAD
8974 modstart = s;
8975#endif
3955e1a9 8976 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
8977#ifdef PERL_MAD
8978 if (PL_madskills && modstart != s) {
8979 SV* tmptoken = newSVpvn(modstart, s - modstart);
8980 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
8981 }
8982#endif
4ac733c9 8983 /* issue a warning if /c is specified,but /g is not */
a2a5de95 8984 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 8985 {
a2a5de95
NC
8986 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
8987 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
8988 }
8989
3280af22 8990 PL_lex_op = (OP*)pm;
6154021b 8991 pl_yylval.ival = OP_MATCH;
378cc40b
LW
8992 return s;
8993}
8994
76e3520e 8995STATIC char *
cea2e8a9 8996S_scan_subst(pTHX_ char *start)
79072805 8997{
27da23d5 8998 dVAR;
22594288 8999 char *s;
79072805 9000 register PMOP *pm;
4fdae800 9001 I32 first_start;
79072805 9002 I32 es = 0;
3955e1a9 9003 char charset = '\0'; /* character set modifier */
5db06880
NC
9004#ifdef PERL_MAD
9005 char *modstart;
9006#endif
79072805 9007
7918f24d
NC
9008 PERL_ARGS_ASSERT_SCAN_SUBST;
9009
6154021b 9010 pl_yylval.ival = OP_NULL;
79072805 9011
5db06880 9012 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9013
37fd879b 9014 if (!s)
cea2e8a9 9015 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9016
3280af22 9017 if (s[-1] == PL_multi_open)
79072805 9018 s--;
5db06880
NC
9019#ifdef PERL_MAD
9020 if (PL_madskills) {
cd81e915
NC
9021 CURMAD('q', PL_thisopen);
9022 CURMAD('_', PL_thiswhite);
9023 CURMAD('E', PL_thisstuff);
9024 CURMAD('Q', PL_thisclose);
9025 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9026 }
9027#endif
79072805 9028
3280af22 9029 first_start = PL_multi_start;
5db06880 9030 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9031 if (!s) {
37fd879b 9032 if (PL_lex_stuff) {
3280af22 9033 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9034 PL_lex_stuff = NULL;
37fd879b 9035 }
cea2e8a9 9036 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9037 }
3280af22 9038 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9039
79072805 9040 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9041
9042#ifdef PERL_MAD
9043 if (PL_madskills) {
cd81e915
NC
9044 CURMAD('z', PL_thisopen);
9045 CURMAD('R', PL_thisstuff);
9046 CURMAD('Z', PL_thisclose);
5db06880
NC
9047 }
9048 modstart = s;
9049#endif
9050
48c036b1 9051 while (*s) {
a20207d7 9052 if (*s == EXEC_PAT_MOD) {
a687059c 9053 s++;
2f3197b3 9054 es++;
a687059c 9055 }
3955e1a9
KW
9056 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9057 {
48c036b1 9058 break;
aa78b661 9059 }
378cc40b 9060 }
79072805 9061
5db06880
NC
9062#ifdef PERL_MAD
9063 if (PL_madskills) {
9064 if (modstart != s)
9065 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9066 append_madprops(PL_thismad, (OP*)pm, 0);
9067 PL_thismad = 0;
5db06880
NC
9068 }
9069#endif
a2a5de95
NC
9070 if ((pm->op_pmflags & PMf_CONTINUE)) {
9071 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9072 }
9073
79072805 9074 if (es) {
6136c704
AL
9075 SV * const repl = newSVpvs("");
9076
0244c3a4
GS
9077 PL_sublex_info.super_bufptr = s;
9078 PL_sublex_info.super_bufend = PL_bufend;
9079 PL_multi_end = 0;
79072805 9080 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9081 while (es-- > 0) {
9082 if (es)
9083 sv_catpvs(repl, "eval ");
9084 else
9085 sv_catpvs(repl, "do ");
9086 }
6f43d98f 9087 sv_catpvs(repl, "{");
3280af22 9088 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9089 if (strchr(SvPVX(PL_lex_repl), '#'))
9090 sv_catpvs(repl, "\n");
9091 sv_catpvs(repl, "}");
25da4f38 9092 SvEVALED_on(repl);
3280af22
NIS
9093 SvREFCNT_dec(PL_lex_repl);
9094 PL_lex_repl = repl;
378cc40b 9095 }
79072805 9096
3280af22 9097 PL_lex_op = (OP*)pm;
6154021b 9098 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9099 return s;
9100}
9101
76e3520e 9102STATIC char *
cea2e8a9 9103S_scan_trans(pTHX_ char *start)
378cc40b 9104{
97aff369 9105 dVAR;
a0d0e21e 9106 register char* s;
11343788 9107 OP *o;
79072805 9108 short *tbl;
b84c11c8
NC
9109 U8 squash;
9110 U8 del;
9111 U8 complement;
bb16bae8 9112 bool nondestruct = 0;
5db06880
NC
9113#ifdef PERL_MAD
9114 char *modstart;
9115#endif
79072805 9116
7918f24d
NC
9117 PERL_ARGS_ASSERT_SCAN_TRANS;
9118
6154021b 9119 pl_yylval.ival = OP_NULL;
79072805 9120
5db06880 9121 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 9122 if (!s)
cea2e8a9 9123 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9124
3280af22 9125 if (s[-1] == PL_multi_open)
2f3197b3 9126 s--;
5db06880
NC
9127#ifdef PERL_MAD
9128 if (PL_madskills) {
cd81e915
NC
9129 CURMAD('q', PL_thisopen);
9130 CURMAD('_', PL_thiswhite);
9131 CURMAD('E', PL_thisstuff);
9132 CURMAD('Q', PL_thisclose);
9133 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9134 }
9135#endif
2f3197b3 9136
5db06880 9137 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9138 if (!s) {
37fd879b 9139 if (PL_lex_stuff) {
3280af22 9140 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9141 PL_lex_stuff = NULL;
37fd879b 9142 }
cea2e8a9 9143 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9144 }
5db06880 9145 if (PL_madskills) {
cd81e915
NC
9146 CURMAD('z', PL_thisopen);
9147 CURMAD('R', PL_thisstuff);
9148 CURMAD('Z', PL_thisclose);
5db06880 9149 }
79072805 9150
a0ed51b3 9151 complement = del = squash = 0;
5db06880
NC
9152#ifdef PERL_MAD
9153 modstart = s;
9154#endif
7a1e2023
NC
9155 while (1) {
9156 switch (*s) {
9157 case 'c':
79072805 9158 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9159 break;
9160 case 'd':
a0ed51b3 9161 del = OPpTRANS_DELETE;
7a1e2023
NC
9162 break;
9163 case 's':
79072805 9164 squash = OPpTRANS_SQUASH;
7a1e2023 9165 break;
bb16bae8
FC
9166 case 'r':
9167 nondestruct = 1;
9168 break;
7a1e2023
NC
9169 default:
9170 goto no_more;
9171 }
395c3793
LW
9172 s++;
9173 }
7a1e2023 9174 no_more:
8973db79 9175
aa1f7c5b 9176 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9177 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9178 o->op_private &= ~OPpTRANS_ALL;
9179 o->op_private |= del|squash|complement|
7948272d
NIS
9180 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9181 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9182
3280af22 9183 PL_lex_op = o;
bb16bae8 9184 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9185
9186#ifdef PERL_MAD
9187 if (PL_madskills) {
9188 if (modstart != s)
9189 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9190 append_madprops(PL_thismad, o, 0);
9191 PL_thismad = 0;
5db06880
NC
9192 }
9193#endif
9194
79072805
LW
9195 return s;
9196}
9197
76e3520e 9198STATIC char *
cea2e8a9 9199S_scan_heredoc(pTHX_ register char *s)
79072805 9200{
97aff369 9201 dVAR;
79072805
LW
9202 SV *herewas;
9203 I32 op_type = OP_SCALAR;
9204 I32 len;
9205 SV *tmpstr;
9206 char term;
73d840c0 9207 const char *found_newline;
79072805 9208 register char *d;
fc36a67e 9209 register char *e;
4633a7c4 9210 char *peek;
f54cb97a 9211 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
9212#ifdef PERL_MAD
9213 I32 stuffstart = s - SvPVX(PL_linestr);
9214 char *tstart;
9215
cd81e915 9216 PL_realtokenstart = -1;
5db06880 9217#endif
79072805 9218
7918f24d
NC
9219 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9220
79072805 9221 s += 2;
3280af22
NIS
9222 d = PL_tokenbuf;
9223 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9224 if (!outer)
79072805 9225 *d++ = '\n';
c35e046a
AL
9226 peek = s;
9227 while (SPACE_OR_TAB(*peek))
9228 peek++;
3792a11b 9229 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9230 s = peek;
79072805 9231 term = *s++;
3280af22 9232 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9233 d += len;
3280af22 9234 if (s < PL_bufend)
79072805 9235 s++;
79072805
LW
9236 }
9237 else {
9238 if (*s == '\\')
9239 s++, term = '\'';
9240 else
9241 term = '"';
7e2040f0 9242 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9243 deprecate("bare << to mean <<\"\"");
7e2040f0 9244 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9245 if (d < e)
9246 *d++ = *s;
9247 }
9248 }
3280af22 9249 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9250 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9251 *d++ = '\n';
9252 *d = '\0';
3280af22 9253 len = d - PL_tokenbuf;
5db06880
NC
9254
9255#ifdef PERL_MAD
9256 if (PL_madskills) {
9257 tstart = PL_tokenbuf + !outer;
cd81e915 9258 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9259 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9260 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9261 stuffstart = s - SvPVX(PL_linestr);
9262 }
9263#endif
6a27c188 9264#ifndef PERL_STRICT_CR
f63a84b2
LW
9265 d = strchr(s, '\r');
9266 if (d) {
b464bac0 9267 char * const olds = s;
f63a84b2 9268 s = d;
3280af22 9269 while (s < PL_bufend) {
f63a84b2
LW
9270 if (*s == '\r') {
9271 *d++ = '\n';
9272 if (*++s == '\n')
9273 s++;
9274 }
9275 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9276 *d++ = *s++;
9277 s++;
9278 }
9279 else
9280 *d++ = *s++;
9281 }
9282 *d = '\0';
3280af22 9283 PL_bufend = d;
95a20fc0 9284 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9285 s = olds;
9286 }
9287#endif
5db06880
NC
9288#ifdef PERL_MAD
9289 found_newline = 0;
9290#endif
10edeb5d 9291 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9292 herewas = newSVpvn(s,PL_bufend-s);
9293 }
9294 else {
5db06880
NC
9295#ifdef PERL_MAD
9296 herewas = newSVpvn(s-1,found_newline-s+1);
9297#else
73d840c0
AL
9298 s--;
9299 herewas = newSVpvn(s,found_newline-s);
5db06880 9300#endif
73d840c0 9301 }
5db06880
NC
9302#ifdef PERL_MAD
9303 if (PL_madskills) {
9304 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9305 if (PL_thisstuff)
9306 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9307 else
cd81e915 9308 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9309 }
9310#endif
79072805 9311 s += SvCUR(herewas);
748a9306 9312
5db06880
NC
9313#ifdef PERL_MAD
9314 stuffstart = s - SvPVX(PL_linestr);
9315
9316 if (found_newline)
9317 s--;
9318#endif
9319
7d0a29fe
NC
9320 tmpstr = newSV_type(SVt_PVIV);
9321 SvGROW(tmpstr, 80);
748a9306 9322 if (term == '\'') {
79072805 9323 op_type = OP_CONST;
45977657 9324 SvIV_set(tmpstr, -1);
748a9306
LW
9325 }
9326 else if (term == '`') {
79072805 9327 op_type = OP_BACKTICK;
45977657 9328 SvIV_set(tmpstr, '\\');
748a9306 9329 }
79072805
LW
9330
9331 CLINE;
57843af0 9332 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9333 PL_multi_open = PL_multi_close = '<';
9334 term = *PL_tokenbuf;
0244c3a4 9335 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
9336 char * const bufptr = PL_sublex_info.super_bufptr;
9337 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9338 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9339 s = strchr(bufptr, '\n');
9340 if (!s)
9341 s = bufend;
9342 d = s;
9343 while (s < bufend &&
9344 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9345 if (*s++ == '\n')
57843af0 9346 CopLINE_inc(PL_curcop);
0244c3a4
GS
9347 }
9348 if (s >= bufend) {
eb160463 9349 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9350 missingterm(PL_tokenbuf);
9351 }
9352 sv_setpvn(herewas,bufptr,d-bufptr+1);
9353 sv_setpvn(tmpstr,d+1,s-d);
9354 s += len - 1;
9355 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9356 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9357
9358 s = olds;
9359 goto retval;
9360 }
9361 else if (!outer) {
79072805 9362 d = s;
3280af22
NIS
9363 while (s < PL_bufend &&
9364 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9365 if (*s++ == '\n')
57843af0 9366 CopLINE_inc(PL_curcop);
79072805 9367 }
3280af22 9368 if (s >= PL_bufend) {
eb160463 9369 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9370 missingterm(PL_tokenbuf);
79072805
LW
9371 }
9372 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9373#ifdef PERL_MAD
9374 if (PL_madskills) {
cd81e915
NC
9375 if (PL_thisstuff)
9376 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9377 else
cd81e915 9378 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9379 stuffstart = s - SvPVX(PL_linestr);
9380 }
9381#endif
79072805 9382 s += len - 1;
57843af0 9383 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9384
3280af22
NIS
9385 sv_catpvn(herewas,s,PL_bufend-s);
9386 sv_setsv(PL_linestr,herewas);
9387 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9388 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9389 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9390 }
9391 else
76f68e9b 9392 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9393 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9394#ifdef PERL_MAD
9395 if (PL_madskills) {
9396 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9397 if (PL_thisstuff)
9398 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9399 else
cd81e915 9400 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9401 }
9402#endif
f0e67a1d 9403 PL_bufptr = s;
17cc9359 9404 CopLINE_inc(PL_curcop);
f0e67a1d 9405 if (!outer || !lex_next_chunk(0)) {
eb160463 9406 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9407 missingterm(PL_tokenbuf);
79072805 9408 }
17cc9359 9409 CopLINE_dec(PL_curcop);
f0e67a1d 9410 s = PL_bufptr;
5db06880
NC
9411#ifdef PERL_MAD
9412 stuffstart = s - SvPVX(PL_linestr);
9413#endif
57843af0 9414 CopLINE_inc(PL_curcop);
3280af22 9415 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9416 PL_last_lop = PL_last_uni = NULL;
6a27c188 9417#ifndef PERL_STRICT_CR
3280af22 9418 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9419 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9420 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9421 {
3280af22
NIS
9422 PL_bufend[-2] = '\n';
9423 PL_bufend--;
95a20fc0 9424 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9425 }
3280af22
NIS
9426 else if (PL_bufend[-1] == '\r')
9427 PL_bufend[-1] = '\n';
f63a84b2 9428 }
3280af22
NIS
9429 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9430 PL_bufend[-1] = '\n';
f63a84b2 9431#endif
3280af22 9432 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9433 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9434 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9435 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9436 sv_catsv(PL_linestr,herewas);
9437 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9438 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9439 }
9440 else {
3280af22
NIS
9441 s = PL_bufend;
9442 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9443 }
9444 }
79072805 9445 s++;
0244c3a4 9446retval:
57843af0 9447 PL_multi_end = CopLINE(PL_curcop);
79072805 9448 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9449 SvPV_shrink_to_cur(tmpstr);
79072805 9450 }
8990e307 9451 SvREFCNT_dec(herewas);
2f31ce75 9452 if (!IN_BYTES) {
95a20fc0 9453 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9454 SvUTF8_on(tmpstr);
9455 else if (PL_encoding)
9456 sv_recode_to_utf8(tmpstr, PL_encoding);
9457 }
3280af22 9458 PL_lex_stuff = tmpstr;
6154021b 9459 pl_yylval.ival = op_type;
79072805
LW
9460 return s;
9461}
9462
02aa26ce
NT
9463/* scan_inputsymbol
9464 takes: current position in input buffer
9465 returns: new position in input buffer
6154021b 9466 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9467
9468 This code handles:
9469
9470 <> read from ARGV
9471 <FH> read from filehandle
9472 <pkg::FH> read from package qualified filehandle
9473 <pkg'FH> read from package qualified filehandle
9474 <$fh> read from filehandle in $fh
9475 <*.h> filename glob
9476
9477*/
9478
76e3520e 9479STATIC char *
cea2e8a9 9480S_scan_inputsymbol(pTHX_ char *start)
79072805 9481{
97aff369 9482 dVAR;
02aa26ce 9483 register char *s = start; /* current position in buffer */
1b420867 9484 char *end;
79072805 9485 I32 len;
6136c704
AL
9486 char *d = PL_tokenbuf; /* start of temp holding space */
9487 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9488
7918f24d
NC
9489 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9490
1b420867
GS
9491 end = strchr(s, '\n');
9492 if (!end)
9493 end = PL_bufend;
9494 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9495
9496 /* die if we didn't have space for the contents of the <>,
1b420867 9497 or if it didn't end, or if we see a newline
02aa26ce
NT
9498 */
9499
bb7a0f54 9500 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9501 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9502 if (s >= end)
cea2e8a9 9503 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9504
fc36a67e 9505 s++;
02aa26ce
NT
9506
9507 /* check for <$fh>
9508 Remember, only scalar variables are interpreted as filehandles by
9509 this code. Anything more complex (e.g., <$fh{$num}>) will be
9510 treated as a glob() call.
9511 This code makes use of the fact that except for the $ at the front,
9512 a scalar variable and a filehandle look the same.
9513 */
4633a7c4 9514 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9515
9516 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9517 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 9518 d++;
02aa26ce
NT
9519
9520 /* If we've tried to read what we allow filehandles to look like, and
9521 there's still text left, then it must be a glob() and not a getline.
9522 Use scan_str to pull out the stuff between the <> and treat it
9523 as nothing more than a string.
9524 */
9525
3280af22 9526 if (d - PL_tokenbuf != len) {
6154021b 9527 pl_yylval.ival = OP_GLOB;
5db06880 9528 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9529 if (!s)
cea2e8a9 9530 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9531 return s;
9532 }
395c3793 9533 else {
9b3023bc 9534 bool readline_overriden = FALSE;
6136c704 9535 GV *gv_readline;
9b3023bc 9536 GV **gvp;
02aa26ce 9537 /* we're in a filehandle read situation */
3280af22 9538 d = PL_tokenbuf;
02aa26ce
NT
9539
9540 /* turn <> into <ARGV> */
79072805 9541 if (!len)
689badd5 9542 Copy("ARGV",d,5,char);
02aa26ce 9543
9b3023bc 9544 /* Check whether readline() is overriden */
fafc274c 9545 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9546 if ((gv_readline
ba979b31 9547 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9548 ||
017a3ce5 9549 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9550 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9551 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9552 readline_overriden = TRUE;
9553
02aa26ce
NT
9554 /* if <$fh>, create the ops to turn the variable into a
9555 filehandle
9556 */
79072805 9557 if (*d == '$') {
02aa26ce
NT
9558 /* try to find it in the pad for this block, otherwise find
9559 add symbol table ops
9560 */
bc9b26ca 9561 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9562 if (tmp != NOT_IN_PAD) {
00b1698f 9563 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9564 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9565 HEK * const stashname = HvNAME_HEK(stash);
9566 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9567 sv_catpvs(sym, "::");
f558d5af
JH
9568 sv_catpv(sym, d+1);
9569 d = SvPVX(sym);
9570 goto intro_sym;
9571 }
9572 else {
6136c704 9573 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9574 o->op_targ = tmp;
9b3023bc
RGS
9575 PL_lex_op = readline_overriden
9576 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9577 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9578 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9579 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9580 }
a0d0e21e
LW
9581 }
9582 else {
f558d5af
JH
9583 GV *gv;
9584 ++d;
9585intro_sym:
9586 gv = gv_fetchpv(d,
9587 (PL_in_eval
9588 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 9589 : GV_ADDMULTI),
f558d5af 9590 SVt_PV);
9b3023bc
RGS
9591 PL_lex_op = readline_overriden
9592 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9593 op_append_elem(OP_LIST,
9b3023bc
RGS
9594 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9595 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9596 : (OP*)newUNOP(OP_READLINE, 0,
9597 newUNOP(OP_RV2SV, 0,
9598 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9599 }
7c6fadd6
RGS
9600 if (!readline_overriden)
9601 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9602 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9603 pl_yylval.ival = OP_NULL;
79072805 9604 }
02aa26ce
NT
9605
9606 /* If it's none of the above, it must be a literal filehandle
9607 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9608 else {
6136c704 9609 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
9610 PL_lex_op = readline_overriden
9611 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9612 op_append_elem(OP_LIST,
9b3023bc
RGS
9613 newGVOP(OP_GV, 0, gv),
9614 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9615 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9616 pl_yylval.ival = OP_NULL;
79072805
LW
9617 }
9618 }
02aa26ce 9619
79072805
LW
9620 return s;
9621}
9622
02aa26ce
NT
9623
9624/* scan_str
9625 takes: start position in buffer
09bef843
SB
9626 keep_quoted preserve \ on the embedded delimiter(s)
9627 keep_delims preserve the delimiters around the string
02aa26ce
NT
9628 returns: position to continue reading from buffer
9629 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9630 updates the read buffer.
9631
9632 This subroutine pulls a string out of the input. It is called for:
9633 q single quotes q(literal text)
9634 ' single quotes 'literal text'
9635 qq double quotes qq(interpolate $here please)
9636 " double quotes "interpolate $here please"
9637 qx backticks qx(/bin/ls -l)
9638 ` backticks `/bin/ls -l`
9639 qw quote words @EXPORT_OK = qw( func() $spam )
9640 m// regexp match m/this/
9641 s/// regexp substitute s/this/that/
9642 tr/// string transliterate tr/this/that/
9643 y/// string transliterate y/this/that/
9644 ($*@) sub prototypes sub foo ($)
09bef843 9645 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9646 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9647
9648 In most of these cases (all but <>, patterns and transliterate)
9649 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9650 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9651 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9652 calls scan_str().
4e553d73 9653
02aa26ce
NT
9654 It skips whitespace before the string starts, and treats the first
9655 character as the delimiter. If the delimiter is one of ([{< then
9656 the corresponding "close" character )]}> is used as the closing
9657 delimiter. It allows quoting of delimiters, and if the string has
9658 balanced delimiters ([{<>}]) it allows nesting.
9659
37fd879b
HS
9660 On success, the SV with the resulting string is put into lex_stuff or,
9661 if that is already non-NULL, into lex_repl. The second case occurs only
9662 when parsing the RHS of the special constructs s/// and tr/// (y///).
9663 For convenience, the terminating delimiter character is stuffed into
9664 SvIVX of the SV.
02aa26ce
NT
9665*/
9666
76e3520e 9667STATIC char *
09bef843 9668S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9669{
97aff369 9670 dVAR;
02aa26ce 9671 SV *sv; /* scalar value: string */
d3fcec1f 9672 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9673 register char *s = start; /* current position in the buffer */
9674 register char term; /* terminating character */
9675 register char *to; /* current position in the sv's data */
9676 I32 brackets = 1; /* bracket nesting level */
89491803 9677 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9678 I32 termcode; /* terminating char. code */
89ebb4a3 9679 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9680 STRLEN termlen; /* length of terminating string */
0331ef07 9681 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9682#ifdef PERL_MAD
9683 int stuffstart;
9684 char *tstart;
9685#endif
02aa26ce 9686
7918f24d
NC
9687 PERL_ARGS_ASSERT_SCAN_STR;
9688
02aa26ce 9689 /* skip space before the delimiter */
29595ff2
NC
9690 if (isSPACE(*s)) {
9691 s = PEEKSPACE(s);
9692 }
02aa26ce 9693
5db06880 9694#ifdef PERL_MAD
cd81e915
NC
9695 if (PL_realtokenstart >= 0) {
9696 stuffstart = PL_realtokenstart;
9697 PL_realtokenstart = -1;
5db06880
NC
9698 }
9699 else
9700 stuffstart = start - SvPVX(PL_linestr);
9701#endif
02aa26ce 9702 /* mark where we are, in case we need to report errors */
79072805 9703 CLINE;
02aa26ce
NT
9704
9705 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9706 term = *s;
220e2d4e
IH
9707 if (!UTF) {
9708 termcode = termstr[0] = term;
9709 termlen = 1;
9710 }
9711 else {
f3b9ce0f 9712 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9713 Copy(s, termstr, termlen, U8);
9714 if (!UTF8_IS_INVARIANT(term))
9715 has_utf8 = TRUE;
9716 }
b1c7b182 9717
02aa26ce 9718 /* mark where we are */
57843af0 9719 PL_multi_start = CopLINE(PL_curcop);
3280af22 9720 PL_multi_open = term;
02aa26ce
NT
9721
9722 /* find corresponding closing delimiter */
93a17b20 9723 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9724 termcode = termstr[0] = term = tmps[5];
9725
3280af22 9726 PL_multi_close = term;
79072805 9727
561b68a9
SH
9728 /* create a new SV to hold the contents. 79 is the SV's initial length.
9729 What a random number. */
7d0a29fe
NC
9730 sv = newSV_type(SVt_PVIV);
9731 SvGROW(sv, 80);
45977657 9732 SvIV_set(sv, termcode);
a0d0e21e 9733 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9734
9735 /* move past delimiter and try to read a complete string */
09bef843 9736 if (keep_delims)
220e2d4e
IH
9737 sv_catpvn(sv, s, termlen);
9738 s += termlen;
5db06880
NC
9739#ifdef PERL_MAD
9740 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9741 if (!PL_thisopen && !keep_delims) {
9742 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9743 stuffstart = s - SvPVX(PL_linestr);
9744 }
9745#endif
93a17b20 9746 for (;;) {
220e2d4e
IH
9747 if (PL_encoding && !UTF) {
9748 bool cont = TRUE;
9749
9750 while (cont) {
95a20fc0 9751 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9752 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9753 &offset, (char*)termstr, termlen);
6136c704
AL
9754 const char * const ns = SvPVX_const(PL_linestr) + offset;
9755 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9756
9757 for (; s < ns; s++) {
9758 if (*s == '\n' && !PL_rsfp)
9759 CopLINE_inc(PL_curcop);
9760 }
9761 if (!found)
9762 goto read_more_line;
9763 else {
9764 /* handle quoted delimiters */
52327caf 9765 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9766 const char *t;
95a20fc0 9767 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9768 t--;
9769 if ((svlast-1 - t) % 2) {
9770 if (!keep_quoted) {
9771 *(svlast-1) = term;
9772 *svlast = '\0';
9773 SvCUR_set(sv, SvCUR(sv) - 1);
9774 }
9775 continue;
9776 }
9777 }
9778 if (PL_multi_open == PL_multi_close) {
9779 cont = FALSE;
9780 }
9781 else {
f54cb97a
AL
9782 const char *t;
9783 char *w;
0331ef07 9784 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9785 /* At here, all closes are "was quoted" one,
9786 so we don't check PL_multi_close. */
9787 if (*t == '\\') {
9788 if (!keep_quoted && *(t+1) == PL_multi_open)
9789 t++;
9790 else
9791 *w++ = *t++;
9792 }
9793 else if (*t == PL_multi_open)
9794 brackets++;
9795
9796 *w = *t;
9797 }
9798 if (w < t) {
9799 *w++ = term;
9800 *w = '\0';
95a20fc0 9801 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9802 }
0331ef07 9803 last_off = w - SvPVX(sv);
220e2d4e
IH
9804 if (--brackets <= 0)
9805 cont = FALSE;
9806 }
9807 }
9808 }
9809 if (!keep_delims) {
9810 SvCUR_set(sv, SvCUR(sv) - 1);
9811 *SvEND(sv) = '\0';
9812 }
9813 break;
9814 }
9815
02aa26ce 9816 /* extend sv if need be */
3280af22 9817 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9818 /* set 'to' to the next character in the sv's string */
463ee0b2 9819 to = SvPVX(sv)+SvCUR(sv);
09bef843 9820
02aa26ce 9821 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9822 if (PL_multi_open == PL_multi_close) {
9823 for (; s < PL_bufend; s++,to++) {
02aa26ce 9824 /* embedded newlines increment the current line number */
3280af22 9825 if (*s == '\n' && !PL_rsfp)
57843af0 9826 CopLINE_inc(PL_curcop);
02aa26ce 9827 /* handle quoted delimiters */
3280af22 9828 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9829 if (!keep_quoted && s[1] == term)
a0d0e21e 9830 s++;
02aa26ce 9831 /* any other quotes are simply copied straight through */
a0d0e21e
LW
9832 else
9833 *to++ = *s++;
9834 }
02aa26ce
NT
9835 /* terminate when run out of buffer (the for() condition), or
9836 have found the terminator */
220e2d4e
IH
9837 else if (*s == term) {
9838 if (termlen == 1)
9839 break;
f3b9ce0f 9840 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
9841 break;
9842 }
63cd0674 9843 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9844 has_utf8 = TRUE;
93a17b20
LW
9845 *to = *s;
9846 }
9847 }
02aa26ce
NT
9848
9849 /* if the terminator isn't the same as the start character (e.g.,
9850 matched brackets), we have to allow more in the quoting, and
9851 be prepared for nested brackets.
9852 */
93a17b20 9853 else {
02aa26ce 9854 /* read until we run out of string, or we find the terminator */
3280af22 9855 for (; s < PL_bufend; s++,to++) {
02aa26ce 9856 /* embedded newlines increment the line count */
3280af22 9857 if (*s == '\n' && !PL_rsfp)
57843af0 9858 CopLINE_inc(PL_curcop);
02aa26ce 9859 /* backslashes can escape the open or closing characters */
3280af22 9860 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
9861 if (!keep_quoted &&
9862 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
9863 s++;
9864 else
9865 *to++ = *s++;
9866 }
02aa26ce 9867 /* allow nested opens and closes */
3280af22 9868 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 9869 break;
3280af22 9870 else if (*s == PL_multi_open)
93a17b20 9871 brackets++;
63cd0674 9872 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9873 has_utf8 = TRUE;
93a17b20
LW
9874 *to = *s;
9875 }
9876 }
02aa26ce 9877 /* terminate the copied string and update the sv's end-of-string */
93a17b20 9878 *to = '\0';
95a20fc0 9879 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 9880
02aa26ce
NT
9881 /*
9882 * this next chunk reads more into the buffer if we're not done yet
9883 */
9884
b1c7b182
GS
9885 if (s < PL_bufend)
9886 break; /* handle case where we are done yet :-) */
79072805 9887
6a27c188 9888#ifndef PERL_STRICT_CR
95a20fc0 9889 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
9890 if ((to[-2] == '\r' && to[-1] == '\n') ||
9891 (to[-2] == '\n' && to[-1] == '\r'))
9892 {
f63a84b2
LW
9893 to[-2] = '\n';
9894 to--;
95a20fc0 9895 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
9896 }
9897 else if (to[-1] == '\r')
9898 to[-1] = '\n';
9899 }
95a20fc0 9900 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
9901 to[-1] = '\n';
9902#endif
9903
220e2d4e 9904 read_more_line:
02aa26ce
NT
9905 /* if we're out of file, or a read fails, bail and reset the current
9906 line marker so we can report where the unterminated string began
9907 */
5db06880
NC
9908#ifdef PERL_MAD
9909 if (PL_madskills) {
c35e046a 9910 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9911 if (PL_thisstuff)
9912 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9913 else
cd81e915 9914 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9915 }
9916#endif
f0e67a1d
Z
9917 CopLINE_inc(PL_curcop);
9918 PL_bufptr = PL_bufend;
9919 if (!lex_next_chunk(0)) {
c07a80fd 9920 sv_free(sv);
eb160463 9921 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 9922 return NULL;
79072805 9923 }
f0e67a1d 9924 s = PL_bufptr;
5db06880
NC
9925#ifdef PERL_MAD
9926 stuffstart = 0;
9927#endif
378cc40b 9928 }
4e553d73 9929
02aa26ce
NT
9930 /* at this point, we have successfully read the delimited string */
9931
220e2d4e 9932 if (!PL_encoding || UTF) {
5db06880
NC
9933#ifdef PERL_MAD
9934 if (PL_madskills) {
c35e046a 9935 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 9936 const int len = s - tstart;
cd81e915 9937 if (PL_thisstuff)
c35e046a 9938 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9939 else
c35e046a 9940 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9941 if (!PL_thisclose && !keep_delims)
9942 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
9943 }
9944#endif
9945
220e2d4e
IH
9946 if (keep_delims)
9947 sv_catpvn(sv, s, termlen);
9948 s += termlen;
9949 }
5db06880
NC
9950#ifdef PERL_MAD
9951 else {
9952 if (PL_madskills) {
c35e046a
AL
9953 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9954 const int len = s - tstart - termlen;
cd81e915 9955 if (PL_thisstuff)
c35e046a 9956 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9957 else
c35e046a 9958 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9959 if (!PL_thisclose && !keep_delims)
9960 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
9961 }
9962 }
9963#endif
220e2d4e 9964 if (has_utf8 || PL_encoding)
b1c7b182 9965 SvUTF8_on(sv);
d0063567 9966
57843af0 9967 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
9968
9969 /* if we allocated too much space, give some back */
93a17b20
LW
9970 if (SvCUR(sv) + 5 < SvLEN(sv)) {
9971 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 9972 SvPV_renew(sv, SvLEN(sv));
79072805 9973 }
02aa26ce
NT
9974
9975 /* decide whether this is the first or second quoted string we've read
9976 for this op
9977 */
4e553d73 9978
3280af22
NIS
9979 if (PL_lex_stuff)
9980 PL_lex_repl = sv;
79072805 9981 else
3280af22 9982 PL_lex_stuff = sv;
378cc40b
LW
9983 return s;
9984}
9985
02aa26ce
NT
9986/*
9987 scan_num
9988 takes: pointer to position in buffer
9989 returns: pointer to new position in buffer
6154021b 9990 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
9991
9992 Read a number in any of the formats that Perl accepts:
9993
7fd134d9
JH
9994 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
9995 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
9996 0b[01](_?[01])*
9997 0[0-7](_?[0-7])*
9998 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 9999
3280af22 10000 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10001 thing it reads.
10002
10003 If it reads a number without a decimal point or an exponent, it will
10004 try converting the number to an integer and see if it can do so
10005 without loss of precision.
10006*/
4e553d73 10007
378cc40b 10008char *
bfed75c6 10009Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10010{
97aff369 10011 dVAR;
bfed75c6 10012 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10013 register char *d; /* destination in temp buffer */
10014 register char *e; /* end of temp buffer */
86554af2 10015 NV nv; /* number read, as a double */
a0714e2c 10016 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10017 bool floatit; /* boolean: int or float? */
cbbf8932 10018 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10019 static char const number_too_long[] = "Number too long";
378cc40b 10020
7918f24d
NC
10021 PERL_ARGS_ASSERT_SCAN_NUM;
10022
02aa26ce
NT
10023 /* We use the first character to decide what type of number this is */
10024
378cc40b 10025 switch (*s) {
79072805 10026 default:
cea2e8a9 10027 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 10028
02aa26ce 10029 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10030 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10031 case '0':
10032 {
02aa26ce
NT
10033 /* variables:
10034 u holds the "number so far"
4f19785b
WSI
10035 shift the power of 2 of the base
10036 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10037 overflowed was the number more than we can hold?
10038
10039 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10040 we in octal/hex/binary?" indicator to disallow hex characters
10041 when in octal mode.
02aa26ce 10042 */
9e24b6e2
JH
10043 NV n = 0.0;
10044 UV u = 0;
79072805 10045 I32 shift;
9e24b6e2 10046 bool overflowed = FALSE;
61f33854 10047 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10048 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10049 static const char* const bases[5] =
10050 { "", "binary", "", "octal", "hexadecimal" };
10051 static const char* const Bases[5] =
10052 { "", "Binary", "", "Octal", "Hexadecimal" };
10053 static const char* const maxima[5] =
10054 { "",
10055 "0b11111111111111111111111111111111",
10056 "",
10057 "037777777777",
10058 "0xffffffff" };
bfed75c6 10059 const char *base, *Base, *max;
378cc40b 10060
02aa26ce 10061 /* check for hex */
a674e8db 10062 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10063 shift = 4;
10064 s += 2;
61f33854 10065 just_zero = FALSE;
a674e8db 10066 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10067 shift = 1;
10068 s += 2;
61f33854 10069 just_zero = FALSE;
378cc40b 10070 }
02aa26ce 10071 /* check for a decimal in disguise */
b78218b7 10072 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10073 goto decimal;
02aa26ce 10074 /* so it must be octal */
928753ea 10075 else {
378cc40b 10076 shift = 3;
928753ea
JH
10077 s++;
10078 }
10079
10080 if (*s == '_') {
a2a5de95 10081 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10082 "Misplaced _ in number");
10083 lastub = s++;
10084 }
9e24b6e2
JH
10085
10086 base = bases[shift];
10087 Base = Bases[shift];
10088 max = maxima[shift];
02aa26ce 10089
4f19785b 10090 /* read the rest of the number */
378cc40b 10091 for (;;) {
9e24b6e2 10092 /* x is used in the overflow test,
893fe2c2 10093 b is the digit we're adding on. */
9e24b6e2 10094 UV x, b;
55497cff 10095
378cc40b 10096 switch (*s) {
02aa26ce
NT
10097
10098 /* if we don't mention it, we're done */
378cc40b
LW
10099 default:
10100 goto out;
02aa26ce 10101
928753ea 10102 /* _ are ignored -- but warned about if consecutive */
de3bb511 10103 case '_':
a2a5de95
NC
10104 if (lastub && s == lastub + 1)
10105 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10106 "Misplaced _ in number");
928753ea 10107 lastub = s++;
de3bb511 10108 break;
02aa26ce
NT
10109
10110 /* 8 and 9 are not octal */
378cc40b 10111 case '8': case '9':
4f19785b 10112 if (shift == 3)
cea2e8a9 10113 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10114 /* FALL THROUGH */
02aa26ce
NT
10115
10116 /* octal digits */
4f19785b 10117 case '2': case '3': case '4':
378cc40b 10118 case '5': case '6': case '7':
4f19785b 10119 if (shift == 1)
cea2e8a9 10120 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10121 /* FALL THROUGH */
10122
10123 case '0': case '1':
02aa26ce 10124 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10125 goto digit;
02aa26ce
NT
10126
10127 /* hex digits */
378cc40b
LW
10128 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10129 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10130 /* make sure they said 0x */
378cc40b
LW
10131 if (shift != 4)
10132 goto out;
55497cff 10133 b = (*s++ & 7) + 9;
02aa26ce
NT
10134
10135 /* Prepare to put the digit we have onto the end
10136 of the number so far. We check for overflows.
10137 */
10138
55497cff 10139 digit:
61f33854 10140 just_zero = FALSE;
9e24b6e2
JH
10141 if (!overflowed) {
10142 x = u << shift; /* make room for the digit */
10143
10144 if ((x >> shift) != u
10145 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10146 overflowed = TRUE;
10147 n = (NV) u;
9b387841
NC
10148 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10149 "Integer overflow in %s number",
10150 base);
9e24b6e2
JH
10151 } else
10152 u = x | b; /* add the digit to the end */
10153 }
10154 if (overflowed) {
10155 n *= nvshift[shift];
10156 /* If an NV has not enough bits in its
10157 * mantissa to represent an UV this summing of
10158 * small low-order numbers is a waste of time
10159 * (because the NV cannot preserve the
10160 * low-order bits anyway): we could just
10161 * remember when did we overflow and in the
10162 * end just multiply n by the right
10163 * amount. */
10164 n += (NV) b;
55497cff 10165 }
378cc40b
LW
10166 break;
10167 }
10168 }
02aa26ce
NT
10169
10170 /* if we get here, we had success: make a scalar value from
10171 the number.
10172 */
378cc40b 10173 out:
928753ea
JH
10174
10175 /* final misplaced underbar check */
10176 if (s[-1] == '_') {
a2a5de95 10177 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10178 }
10179
9e24b6e2 10180 if (overflowed) {
a2a5de95
NC
10181 if (n > 4294967295.0)
10182 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10183 "%s number > %s non-portable",
10184 Base, max);
b081dd7e 10185 sv = newSVnv(n);
9e24b6e2
JH
10186 }
10187 else {
15041a67 10188#if UVSIZE > 4
a2a5de95
NC
10189 if (u > 0xffffffff)
10190 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10191 "%s number > %s non-portable",
10192 Base, max);
2cc4c2dc 10193#endif
b081dd7e 10194 sv = newSVuv(u);
9e24b6e2 10195 }
61f33854 10196 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10197 sv = new_constant(start, s - start, "integer",
eb0d8d16 10198 sv, NULL, NULL, 0);
61f33854 10199 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10200 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10201 }
10202 break;
02aa26ce
NT
10203
10204 /*
10205 handle decimal numbers.
10206 we're also sent here when we read a 0 as the first digit
10207 */
378cc40b
LW
10208 case '1': case '2': case '3': case '4': case '5':
10209 case '6': case '7': case '8': case '9': case '.':
10210 decimal:
3280af22
NIS
10211 d = PL_tokenbuf;
10212 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10213 floatit = FALSE;
02aa26ce
NT
10214
10215 /* read next group of digits and _ and copy into d */
de3bb511 10216 while (isDIGIT(*s) || *s == '_') {
4e553d73 10217 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10218 if -w is on
10219 */
93a17b20 10220 if (*s == '_') {
a2a5de95
NC
10221 if (lastub && s == lastub + 1)
10222 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10223 "Misplaced _ in number");
928753ea 10224 lastub = s++;
93a17b20 10225 }
fc36a67e 10226 else {
02aa26ce 10227 /* check for end of fixed-length buffer */
fc36a67e 10228 if (d >= e)
cea2e8a9 10229 Perl_croak(aTHX_ number_too_long);
02aa26ce 10230 /* if we're ok, copy the character */
378cc40b 10231 *d++ = *s++;
fc36a67e 10232 }
378cc40b 10233 }
02aa26ce
NT
10234
10235 /* final misplaced underbar check */
928753ea 10236 if (lastub && s == lastub + 1) {
a2a5de95 10237 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10238 }
02aa26ce
NT
10239
10240 /* read a decimal portion if there is one. avoid
10241 3..5 being interpreted as the number 3. followed
10242 by .5
10243 */
2f3197b3 10244 if (*s == '.' && s[1] != '.') {
79072805 10245 floatit = TRUE;
378cc40b 10246 *d++ = *s++;
02aa26ce 10247
928753ea 10248 if (*s == '_') {
a2a5de95
NC
10249 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10250 "Misplaced _ in number");
928753ea
JH
10251 lastub = s;
10252 }
10253
10254 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10255 */
fc36a67e 10256 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10257 /* fixed length buffer check */
fc36a67e 10258 if (d >= e)
cea2e8a9 10259 Perl_croak(aTHX_ number_too_long);
928753ea 10260 if (*s == '_') {
a2a5de95
NC
10261 if (lastub && s == lastub + 1)
10262 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10263 "Misplaced _ in number");
928753ea
JH
10264 lastub = s;
10265 }
10266 else
fc36a67e 10267 *d++ = *s;
378cc40b 10268 }
928753ea
JH
10269 /* fractional part ending in underbar? */
10270 if (s[-1] == '_') {
a2a5de95
NC
10271 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10272 "Misplaced _ in number");
928753ea 10273 }
dd629d5b
GS
10274 if (*s == '.' && isDIGIT(s[1])) {
10275 /* oops, it's really a v-string, but without the "v" */
f4758303 10276 s = start;
dd629d5b
GS
10277 goto vstring;
10278 }
378cc40b 10279 }
02aa26ce
NT
10280
10281 /* read exponent part, if present */
3792a11b 10282 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10283 floatit = TRUE;
10284 s++;
02aa26ce
NT
10285
10286 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10287 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10288
7fd134d9
JH
10289 /* stray preinitial _ */
10290 if (*s == '_') {
a2a5de95
NC
10291 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10292 "Misplaced _ in number");
7fd134d9
JH
10293 lastub = s++;
10294 }
10295
02aa26ce 10296 /* allow positive or negative exponent */
378cc40b
LW
10297 if (*s == '+' || *s == '-')
10298 *d++ = *s++;
02aa26ce 10299
7fd134d9
JH
10300 /* stray initial _ */
10301 if (*s == '_') {
a2a5de95
NC
10302 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10303 "Misplaced _ in number");
7fd134d9
JH
10304 lastub = s++;
10305 }
10306
7fd134d9
JH
10307 /* read digits of exponent */
10308 while (isDIGIT(*s) || *s == '_') {
10309 if (isDIGIT(*s)) {
10310 if (d >= e)
10311 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10312 *d++ = *s++;
7fd134d9
JH
10313 }
10314 else {
041457d9 10315 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10316 (!isDIGIT(s[1]) && s[1] != '_')))
10317 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10318 "Misplaced _ in number");
b3b48e3e 10319 lastub = s++;
7fd134d9 10320 }
7fd134d9 10321 }
378cc40b 10322 }
02aa26ce 10323
02aa26ce 10324
0b7fceb9 10325 /*
58bb9ec3
NC
10326 We try to do an integer conversion first if no characters
10327 indicating "float" have been found.
0b7fceb9
MU
10328 */
10329
10330 if (!floatit) {
58bb9ec3 10331 UV uv;
6136c704 10332 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10333
10334 if (flags == IS_NUMBER_IN_UV) {
10335 if (uv <= IV_MAX)
b081dd7e 10336 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10337 else
b081dd7e 10338 sv = newSVuv(uv);
58bb9ec3
NC
10339 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10340 if (uv <= (UV) IV_MIN)
b081dd7e 10341 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10342 else
10343 floatit = TRUE;
10344 } else
10345 floatit = TRUE;
10346 }
0b7fceb9 10347 if (floatit) {
58bb9ec3
NC
10348 /* terminate the string */
10349 *d = '\0';
86554af2 10350 nv = Atof(PL_tokenbuf);
b081dd7e 10351 sv = newSVnv(nv);
86554af2 10352 }
86554af2 10353
eb0d8d16
NC
10354 if ( floatit
10355 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10356 const char *const key = floatit ? "float" : "integer";
10357 const STRLEN keylen = floatit ? 5 : 7;
10358 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10359 key, keylen, sv, NULL, NULL, 0);
10360 }
378cc40b 10361 break;
0b7fceb9 10362
e312add1 10363 /* if it starts with a v, it could be a v-string */
a7cb1f99 10364 case 'v':
dd629d5b 10365vstring:
561b68a9 10366 sv = newSV(5); /* preallocate storage space */
65b06e02 10367 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10368 break;
79072805 10369 }
a687059c 10370
02aa26ce
NT
10371 /* make the op for the constant and return */
10372
a86a20aa 10373 if (sv)
b73d6f50 10374 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10375 else
5f66b61c 10376 lvalp->opval = NULL;
a687059c 10377
73d840c0 10378 return (char *)s;
378cc40b
LW
10379}
10380
76e3520e 10381STATIC char *
cea2e8a9 10382S_scan_formline(pTHX_ register char *s)
378cc40b 10383{
97aff369 10384 dVAR;
79072805 10385 register char *eol;
378cc40b 10386 register char *t;
6136c704 10387 SV * const stuff = newSVpvs("");
79072805 10388 bool needargs = FALSE;
c5ee2135 10389 bool eofmt = FALSE;
5db06880
NC
10390#ifdef PERL_MAD
10391 char *tokenstart = s;
4f61fd4b
JC
10392 SV* savewhite = NULL;
10393
5db06880 10394 if (PL_madskills) {
cd81e915
NC
10395 savewhite = PL_thiswhite;
10396 PL_thiswhite = 0;
5db06880
NC
10397 }
10398#endif
378cc40b 10399
7918f24d
NC
10400 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10401
79072805 10402 while (!needargs) {
a1b95068 10403 if (*s == '.') {
c35e046a 10404 t = s+1;
51882d45 10405#ifdef PERL_STRICT_CR
c35e046a
AL
10406 while (SPACE_OR_TAB(*t))
10407 t++;
51882d45 10408#else
c35e046a
AL
10409 while (SPACE_OR_TAB(*t) || *t == '\r')
10410 t++;
51882d45 10411#endif
c5ee2135
WL
10412 if (*t == '\n' || t == PL_bufend) {
10413 eofmt = TRUE;
79072805 10414 break;
c5ee2135 10415 }
79072805 10416 }
3280af22 10417 if (PL_in_eval && !PL_rsfp) {
07409e01 10418 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10419 if (!eol++)
3280af22 10420 eol = PL_bufend;
0f85fab0
LW
10421 }
10422 else
3280af22 10423 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10424 if (*s != '#') {
a0d0e21e
LW
10425 for (t = s; t < eol; t++) {
10426 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10427 needargs = FALSE;
10428 goto enough; /* ~~ must be first line in formline */
378cc40b 10429 }
a0d0e21e
LW
10430 if (*t == '@' || *t == '^')
10431 needargs = TRUE;
378cc40b 10432 }
7121b347
MG
10433 if (eol > s) {
10434 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10435#ifndef PERL_STRICT_CR
7121b347
MG
10436 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10437 char *end = SvPVX(stuff) + SvCUR(stuff);
10438 end[-2] = '\n';
10439 end[-1] = '\0';
b162af07 10440 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10441 }
2dc4c65b 10442#endif
7121b347
MG
10443 }
10444 else
10445 break;
79072805 10446 }
95a20fc0 10447 s = (char*)eol;
3280af22 10448 if (PL_rsfp) {
f0e67a1d 10449 bool got_some;
5db06880
NC
10450#ifdef PERL_MAD
10451 if (PL_madskills) {
cd81e915
NC
10452 if (PL_thistoken)
10453 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10454 else
cd81e915 10455 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10456 }
10457#endif
f0e67a1d
Z
10458 PL_bufptr = PL_bufend;
10459 CopLINE_inc(PL_curcop);
10460 got_some = lex_next_chunk(0);
10461 CopLINE_dec(PL_curcop);
10462 s = PL_bufptr;
5db06880 10463#ifdef PERL_MAD
f0e67a1d 10464 tokenstart = PL_bufptr;
5db06880 10465#endif
f0e67a1d 10466 if (!got_some)
378cc40b 10467 break;
378cc40b 10468 }
463ee0b2 10469 incline(s);
79072805 10470 }
a0d0e21e
LW
10471 enough:
10472 if (SvCUR(stuff)) {
3280af22 10473 PL_expect = XTERM;
79072805 10474 if (needargs) {
3280af22 10475 PL_lex_state = LEX_NORMAL;
cd81e915 10476 start_force(PL_curforce);
9ded7720 10477 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10478 force_next(',');
10479 }
a0d0e21e 10480 else
3280af22 10481 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10482 if (!IN_BYTES) {
95a20fc0 10483 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10484 SvUTF8_on(stuff);
10485 else if (PL_encoding)
10486 sv_recode_to_utf8(stuff, PL_encoding);
10487 }
cd81e915 10488 start_force(PL_curforce);
9ded7720 10489 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10490 force_next(THING);
cd81e915 10491 start_force(PL_curforce);
9ded7720 10492 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10493 force_next(LSTOP);
378cc40b 10494 }
79072805 10495 else {
8990e307 10496 SvREFCNT_dec(stuff);
c5ee2135
WL
10497 if (eofmt)
10498 PL_lex_formbrack = 0;
3280af22 10499 PL_bufptr = s;
79072805 10500 }
5db06880
NC
10501#ifdef PERL_MAD
10502 if (PL_madskills) {
cd81e915
NC
10503 if (PL_thistoken)
10504 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10505 else
cd81e915
NC
10506 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10507 PL_thiswhite = savewhite;
5db06880
NC
10508 }
10509#endif
79072805 10510 return s;
378cc40b 10511}
a687059c 10512
ba6d6ac9 10513I32
864dbfa3 10514Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10515{
97aff369 10516 dVAR;
a3b680e6 10517 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10518 CV* const outsidecv = PL_compcv;
8990e307 10519
3280af22
NIS
10520 if (PL_compcv) {
10521 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10522 }
7766f137 10523 SAVEI32(PL_subline);
3280af22 10524 save_item(PL_subname);
3280af22 10525 SAVESPTR(PL_compcv);
3280af22 10526
ea726b52 10527 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10528 CvFLAGS(PL_compcv) |= flags;
10529
57843af0 10530 PL_subline = CopLINE(PL_curcop);
dd2155a4 10531 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10532 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10533 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10534
8990e307
LW
10535 return oldsavestack_ix;
10536}
10537
084592ab
CN
10538#ifdef __SC__
10539#pragma segment Perl_yylex
10540#endif
af41e527
NC
10541static int
10542S_yywarn(pTHX_ const char *const s)
8990e307 10543{
97aff369 10544 dVAR;
7918f24d
NC
10545
10546 PERL_ARGS_ASSERT_YYWARN;
10547
faef0170 10548 PL_in_eval |= EVAL_WARNONLY;
748a9306 10549 yyerror(s);
faef0170 10550 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10551 return 0;
8990e307
LW
10552}
10553
10554int
15f169a1 10555Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10556{
97aff369 10557 dVAR;
bfed75c6
AL
10558 const char *where = NULL;
10559 const char *context = NULL;
68dc0745 10560 int contlen = -1;
46fc3d4c 10561 SV *msg;
5912531f 10562 int yychar = PL_parser->yychar;
463ee0b2 10563
7918f24d
NC
10564 PERL_ARGS_ASSERT_YYERROR;
10565
3280af22 10566 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10567 where = "at EOF";
8bcfe651
TM
10568 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10569 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10570 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10571 /*
10572 Only for NetWare:
10573 The code below is removed for NetWare because it abends/crashes on NetWare
10574 when the script has error such as not having the closing quotes like:
10575 if ($var eq "value)
10576 Checking of white spaces is anyway done in NetWare code.
10577 */
10578#ifndef NETWARE
3280af22
NIS
10579 while (isSPACE(*PL_oldoldbufptr))
10580 PL_oldoldbufptr++;
f355267c 10581#endif
3280af22
NIS
10582 context = PL_oldoldbufptr;
10583 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10584 }
8bcfe651
TM
10585 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10586 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10587 /*
10588 Only for NetWare:
10589 The code below is removed for NetWare because it abends/crashes on NetWare
10590 when the script has error such as not having the closing quotes like:
10591 if ($var eq "value)
10592 Checking of white spaces is anyway done in NetWare code.
10593 */
10594#ifndef NETWARE
3280af22
NIS
10595 while (isSPACE(*PL_oldbufptr))
10596 PL_oldbufptr++;
f355267c 10597#endif
3280af22
NIS
10598 context = PL_oldbufptr;
10599 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10600 }
10601 else if (yychar > 255)
68dc0745 10602 where = "next token ???";
12fbd33b 10603 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10604 if (PL_lex_state == LEX_NORMAL ||
10605 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10606 where = "at end of line";
3280af22 10607 else if (PL_lex_inpat)
68dc0745 10608 where = "within pattern";
463ee0b2 10609 else
68dc0745 10610 where = "within string";
463ee0b2 10611 }
46fc3d4c 10612 else {
84bafc02 10613 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10614 if (yychar < 32)
cea2e8a9 10615 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10616 else if (isPRINT_LC(yychar)) {
88c9ea1e 10617 const char string = yychar;
5e7aa789
NC
10618 sv_catpvn(where_sv, &string, 1);
10619 }
463ee0b2 10620 else
cea2e8a9 10621 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10622 where = SvPVX_const(where_sv);
463ee0b2 10623 }
46fc3d4c 10624 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10625 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10626 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10627 if (context)
cea2e8a9 10628 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10629 else
cea2e8a9 10630 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10631 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10632 Perl_sv_catpvf(aTHX_ msg,
57def98f 10633 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10634 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10635 PL_multi_end = 0;
a0d0e21e 10636 }
500960a6 10637 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10638 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10639 }
463ee0b2 10640 else
5a844595 10641 qerror(msg);
c7d6bfb2
GS
10642 if (PL_error_count >= 10) {
10643 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10644 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10645 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10646 else
10647 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10648 OutCopFILE(PL_curcop));
c7d6bfb2 10649 }
3280af22 10650 PL_in_my = 0;
5c284bb0 10651 PL_in_my_stash = NULL;
463ee0b2
LW
10652 return 0;
10653}
084592ab
CN
10654#ifdef __SC__
10655#pragma segment Main
10656#endif
4e35701f 10657
b250498f 10658STATIC char*
3ae08724 10659S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10660{
97aff369 10661 dVAR;
f54cb97a 10662 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10663
10664 PERL_ARGS_ASSERT_SWALLOW_BOM;
10665
7aa207d6 10666 switch (s[0]) {
4e553d73
NIS
10667 case 0xFF:
10668 if (s[1] == 0xFE) {
ee6ba15d 10669 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10670 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 10671 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10672#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10673 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10674 s += 2;
dea0fc0b 10675 if (PL_bufend > (char*)s) {
81a923f4 10676 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10677 }
b250498f 10678#else
ee6ba15d 10679 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10680#endif
01ec43d0
GS
10681 }
10682 break;
78ae23f5 10683 case 0xFE:
7aa207d6 10684 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10685#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10686 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10687 s += 2;
10688 if (PL_bufend > (char *)s) {
81a923f4 10689 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10690 }
b250498f 10691#else
ee6ba15d 10692 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10693#endif
01ec43d0
GS
10694 }
10695 break;
3ae08724
GS
10696 case 0xEF:
10697 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10698 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10699 s += 3; /* UTF-8 */
10700 }
10701 break;
10702 case 0:
7aa207d6
JH
10703 if (slen > 3) {
10704 if (s[1] == 0) {
10705 if (s[2] == 0xFE && s[3] == 0xFF) {
10706 /* UTF-32 big-endian */
ee6ba15d 10707 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10708 }
10709 }
10710 else if (s[2] == 0 && s[3] != 0) {
10711 /* Leading bytes
10712 * 00 xx 00 xx
10713 * are a good indicator of UTF-16BE. */
ee6ba15d 10714#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10715 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10716 s = add_utf16_textfilter(s, FALSE);
10717#else
10718 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10719#endif
7aa207d6 10720 }
01ec43d0 10721 }
e294cc5d
JH
10722#ifdef EBCDIC
10723 case 0xDD:
10724 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10725 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10726 s += 4; /* UTF-8 */
10727 }
10728 break;
10729#endif
10730
7aa207d6
JH
10731 default:
10732 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10733 /* Leading bytes
10734 * xx 00 xx 00
10735 * are a good indicator of UTF-16LE. */
ee6ba15d 10736#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10737 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10738 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
10739#else
10740 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10741#endif
7aa207d6 10742 }
01ec43d0 10743 }
b8f84bb2 10744 return (char*)s;
b250498f 10745}
4755096e 10746
6e3aabd6
GS
10747
10748#ifndef PERL_NO_UTF16_FILTER
10749static I32
a28af015 10750S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10751{
97aff369 10752 dVAR;
f3040f2c 10753 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10754 /* We re-use this each time round, throwing the contents away before we
10755 return. */
2a773401 10756 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10757 SV *const utf8_buffer = filter;
c28d6105 10758 IV status = IoPAGE(filter);
f2338a2e 10759 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10760 I32 retval;
c8b0cbae 10761
c85ae797
NC
10762 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10763
c8b0cbae
NC
10764 /* As we're automatically added, at the lowest level, and hence only called
10765 from this file, we can be sure that we're not called in block mode. Hence
10766 don't bother writing code to deal with block mode. */
10767 if (maxlen) {
10768 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10769 }
c28d6105
NC
10770 if (status < 0) {
10771 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10772 }
1de9afcd 10773 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10774 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10775 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10776 reverse ? 'l' : 'b', idx, maxlen, status,
10777 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10778
10779 while (1) {
10780 STRLEN chars;
10781 STRLEN have;
dea0fc0b 10782 I32 newlen;
2a773401 10783 U8 *end;
c28d6105
NC
10784 /* First, look in our buffer of existing UTF-8 data: */
10785 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10786
10787 if (nl) {
10788 ++nl;
10789 } else if (status == 0) {
10790 /* EOF */
10791 IoPAGE(filter) = 0;
10792 nl = SvEND(utf8_buffer);
10793 }
10794 if (nl) {
d2d1d4de
NC
10795 STRLEN got = nl - SvPVX(utf8_buffer);
10796 /* Did we have anything to append? */
10797 retval = got != 0;
10798 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10799 /* Everything else in this code works just fine if SVp_POK isn't
10800 set. This, however, needs it, and we need it to work, else
10801 we loop infinitely because the buffer is never consumed. */
10802 sv_chop(utf8_buffer, nl);
10803 break;
10804 }
ba77e4cc 10805
c28d6105
NC
10806 /* OK, not a complete line there, so need to read some more UTF-16.
10807 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10808 while (1) {
10809 if (status <= 0)
10810 break;
10811 if (SvCUR(utf16_buffer) >= 2) {
10812 /* Location of the high octet of the last complete code point.
10813 Gosh, UTF-16 is a pain. All the benefits of variable length,
10814 *coupled* with all the benefits of partial reads and
10815 endianness. */
10816 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10817 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10818
10819 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10820 break;
10821 }
10822
10823 /* We have the first half of a surrogate. Read more. */
10824 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10825 }
c28d6105 10826
c28d6105
NC
10827 status = FILTER_READ(idx + 1, utf16_buffer,
10828 160 + (SvCUR(utf16_buffer) & 1));
10829 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 10830 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
10831 if (status < 0) {
10832 /* Error */
10833 IoPAGE(filter) = status;
10834 return status;
10835 }
10836 }
10837
10838 chars = SvCUR(utf16_buffer) >> 1;
10839 have = SvCUR(utf8_buffer);
10840 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 10841
aa6dbd60 10842 if (reverse) {
c28d6105
NC
10843 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10844 (U8*)SvPVX_const(utf8_buffer) + have,
10845 chars * 2, &newlen);
aa6dbd60 10846 } else {
2a773401 10847 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
10848 (U8*)SvPVX_const(utf8_buffer) + have,
10849 chars * 2, &newlen);
2a773401 10850 }
c28d6105 10851 SvCUR_set(utf8_buffer, have + newlen);
2a773401 10852 *end = '\0';
c28d6105 10853
e07286ed
NC
10854 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10855 it's private to us, and utf16_to_utf8{,reversed} take a
10856 (pointer,length) pair, rather than a NUL-terminated string. */
10857 if(SvCUR(utf16_buffer) & 1) {
10858 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10859 SvCUR_set(utf16_buffer, 1);
10860 } else {
10861 SvCUR_set(utf16_buffer, 0);
10862 }
2a773401 10863 }
c28d6105
NC
10864 DEBUG_P(PerlIO_printf(Perl_debug_log,
10865 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10866 status,
10867 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10868 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 10869 return retval;
6e3aabd6 10870}
81a923f4
NC
10871
10872static U8 *
10873S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10874{
2a773401 10875 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 10876
c85ae797
NC
10877 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10878
c28d6105 10879 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 10880 sv_setpvs(filter, "");
2a773401 10881 IoLINES(filter) = reversed;
c28d6105
NC
10882 IoPAGE(filter) = 1; /* Not EOF */
10883
10884 /* Sadly, we have to return a valid pointer, come what may, so we have to
10885 ignore any error return from this. */
10886 SvCUR_set(PL_linestr, 0);
10887 if (FILTER_READ(0, PL_linestr, 0)) {
10888 SvUTF8_on(PL_linestr);
81a923f4 10889 } else {
c28d6105 10890 SvUTF8_on(PL_linestr);
81a923f4 10891 }
c28d6105 10892 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
10893 return (U8*)SvPVX(PL_linestr);
10894}
6e3aabd6 10895#endif
9f4817db 10896
f333445c
JP
10897/*
10898Returns a pointer to the next character after the parsed
10899vstring, as well as updating the passed in sv.
10900
10901Function must be called like
10902
561b68a9 10903 sv = newSV(5);
65b06e02 10904 s = scan_vstring(s,e,sv);
f333445c 10905
65b06e02 10906where s and e are the start and end of the string.
f333445c
JP
10907The sv should already be large enough to store the vstring
10908passed in, for performance reasons.
10909
10910*/
10911
10912char *
15f169a1 10913Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 10914{
97aff369 10915 dVAR;
bfed75c6
AL
10916 const char *pos = s;
10917 const char *start = s;
7918f24d
NC
10918
10919 PERL_ARGS_ASSERT_SCAN_VSTRING;
10920
f333445c 10921 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 10922 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 10923 pos++;
f333445c
JP
10924 if ( *pos != '.') {
10925 /* this may not be a v-string if followed by => */
bfed75c6 10926 const char *next = pos;
65b06e02 10927 while (next < e && isSPACE(*next))
8fc7bb1c 10928 ++next;
65b06e02 10929 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
10930 /* return string not v-string */
10931 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 10932 return (char *)pos;
f333445c
JP
10933 }
10934 }
10935
10936 if (!isALPHA(*pos)) {
89ebb4a3 10937 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 10938
d4c19fe8
AL
10939 if (*s == 'v')
10940 s++; /* get past 'v' */
f333445c 10941
76f68e9b 10942 sv_setpvs(sv, "");
f333445c
JP
10943
10944 for (;;) {
d4c19fe8 10945 /* this is atoi() that tolerates underscores */
0bd48802
AL
10946 U8 *tmpend;
10947 UV rev = 0;
d4c19fe8
AL
10948 const char *end = pos;
10949 UV mult = 1;
10950 while (--end >= s) {
10951 if (*end != '_') {
10952 const UV orev = rev;
f333445c
JP
10953 rev += (*end - '0') * mult;
10954 mult *= 10;
9b387841
NC
10955 if (orev > rev)
10956 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10957 "Integer overflow in decimal number");
f333445c
JP
10958 }
10959 }
10960#ifdef EBCDIC
10961 if (rev > 0x7FFFFFFF)
10962 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
10963#endif
10964 /* Append native character for the rev point */
10965 tmpend = uvchr_to_utf8(tmpbuf, rev);
10966 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
10967 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
10968 SvUTF8_on(sv);
65b06e02 10969 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
10970 s = ++pos;
10971 else {
10972 s = pos;
10973 break;
10974 }
65b06e02 10975 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
10976 pos++;
10977 }
10978 SvPOK_on(sv);
10979 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
10980 SvRMAGICAL_on(sv);
10981 }
73d840c0 10982 return (char *)s;
f333445c
JP
10983}
10984
88e1f1a2
JV
10985int
10986Perl_keyword_plugin_standard(pTHX_
10987 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
10988{
10989 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
10990 PERL_UNUSED_CONTEXT;
10991 PERL_UNUSED_ARG(keyword_ptr);
10992 PERL_UNUSED_ARG(keyword_len);
10993 PERL_UNUSED_ARG(op_ptr);
10994 return KEYWORD_PLUGIN_DECLINE;
10995}
10996
78cdf107 10997#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 10998static void
78cdf107 10999S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11000{
11001 SAVEI32(PL_lex_brackets);
11002 if (PL_lex_brackets > 100)
11003 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11004 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11005 SAVEI32(PL_lex_allbrackets);
11006 PL_lex_allbrackets = 0;
11007 SAVEI8(PL_lex_fakeeof);
2dcac756 11008 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11009 if(yyparse(gramtype) && !PL_parser->error_count)
11010 qerror(Perl_mess(aTHX_ "Parse error"));
11011}
11012
78cdf107 11013#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11014static OP *
78cdf107 11015S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11016{
11017 OP *o;
11018 ENTER;
11019 SAVEVPTR(PL_eval_root);
11020 PL_eval_root = NULL;
78cdf107 11021 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11022 o = PL_eval_root;
11023 LEAVE;
11024 return o;
11025}
11026
78cdf107
Z
11027#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11028static OP *
11029S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11030{
11031 OP *exprop;
11032 if (flags & ~PARSE_OPTIONAL)
11033 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11034 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11035 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11036 if (!PL_parser->error_count)
11037 qerror(Perl_mess(aTHX_ "Parse error"));
11038 exprop = newOP(OP_NULL, 0);
11039 }
11040 return exprop;
11041}
11042
11043/*
11044=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11045
11046Parse a Perl arithmetic expression. This may contain operators of precedence
11047down to the bit shift operators. The expression must be followed (and thus
11048terminated) either by a comparison or lower-precedence operator or by
11049something that would normally terminate an expression such as semicolon.
11050If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11051otherwise it is mandatory. It is up to the caller to ensure that the
11052dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11053the source of the code to be parsed and the lexical context for the
11054expression.
11055
11056The op tree representing the expression is returned. If an optional
11057expression is absent, a null pointer is returned, otherwise the pointer
11058will be non-null.
11059
11060If an error occurs in parsing or compilation, in most cases a valid op
11061tree is returned anyway. The error is reflected in the parser state,
11062normally resulting in a single exception at the top level of parsing
11063which covers all the compilation errors that occurred. Some compilation
11064errors, however, will throw an exception immediately.
11065
11066=cut
11067*/
11068
11069OP *
11070Perl_parse_arithexpr(pTHX_ U32 flags)
11071{
11072 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11073}
11074
11075/*
11076=for apidoc Amx|OP *|parse_termexpr|U32 flags
11077
11078Parse a Perl term expression. This may contain operators of precedence
11079down to the assignment operators. The expression must be followed (and thus
11080terminated) either by a comma or lower-precedence operator or by
11081something that would normally terminate an expression such as semicolon.
11082If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11083otherwise it is mandatory. It is up to the caller to ensure that the
11084dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11085the source of the code to be parsed and the lexical context for the
11086expression.
11087
11088The op tree representing the expression is returned. If an optional
11089expression is absent, a null pointer is returned, otherwise the pointer
11090will be non-null.
11091
11092If an error occurs in parsing or compilation, in most cases a valid op
11093tree is returned anyway. The error is reflected in the parser state,
11094normally resulting in a single exception at the top level of parsing
11095which covers all the compilation errors that occurred. Some compilation
11096errors, however, will throw an exception immediately.
11097
11098=cut
11099*/
11100
11101OP *
11102Perl_parse_termexpr(pTHX_ U32 flags)
11103{
11104 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11105}
11106
11107/*
11108=for apidoc Amx|OP *|parse_listexpr|U32 flags
11109
11110Parse a Perl list expression. This may contain operators of precedence
11111down to the comma operator. The expression must be followed (and thus
11112terminated) either by a low-precedence logic operator such as C<or> or by
11113something that would normally terminate an expression such as semicolon.
11114If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11115otherwise it is mandatory. It is up to the caller to ensure that the
11116dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11117the source of the code to be parsed and the lexical context for the
11118expression.
11119
11120The op tree representing the expression is returned. If an optional
11121expression is absent, a null pointer is returned, otherwise the pointer
11122will be non-null.
11123
11124If an error occurs in parsing or compilation, in most cases a valid op
11125tree is returned anyway. The error is reflected in the parser state,
11126normally resulting in a single exception at the top level of parsing
11127which covers all the compilation errors that occurred. Some compilation
11128errors, however, will throw an exception immediately.
11129
11130=cut
11131*/
11132
11133OP *
11134Perl_parse_listexpr(pTHX_ U32 flags)
11135{
11136 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11137}
11138
11139/*
11140=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11141
11142Parse a single complete Perl expression. This allows the full
11143expression grammar, including the lowest-precedence operators such
11144as C<or>. The expression must be followed (and thus terminated) by a
11145token that an expression would normally be terminated by: end-of-file,
11146closing bracketing punctuation, semicolon, or one of the keywords that
11147signals a postfix expression-statement modifier. If I<flags> includes
11148C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11149mandatory. It is up to the caller to ensure that the dynamic parser
11150state (L</PL_parser> et al) is correctly set to reflect the source of
11151the code to be parsed and the lexical context for the expression.
11152
11153The op tree representing the expression is returned. If an optional
11154expression is absent, a null pointer is returned, otherwise the pointer
11155will be non-null.
11156
11157If an error occurs in parsing or compilation, in most cases a valid op
11158tree is returned anyway. The error is reflected in the parser state,
11159normally resulting in a single exception at the top level of parsing
11160which covers all the compilation errors that occurred. Some compilation
11161errors, however, will throw an exception immediately.
11162
11163=cut
11164*/
11165
11166OP *
11167Perl_parse_fullexpr(pTHX_ U32 flags)
11168{
11169 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11170}
11171
e53d8f76
Z
11172/*
11173=for apidoc Amx|OP *|parse_block|U32 flags
11174
11175Parse a single complete Perl code block. This consists of an opening
11176brace, a sequence of statements, and a closing brace. The block
11177constitutes a lexical scope, so C<my> variables and various compile-time
11178effects can be contained within it. It is up to the caller to ensure
11179that the dynamic parser state (L</PL_parser> et al) is correctly set to
11180reflect the source of the code to be parsed and the lexical context for
11181the statement.
11182
11183The op tree representing the code block is returned. This is always a
11184real op, never a null pointer. It will normally be a C<lineseq> list,
11185including C<nextstate> or equivalent ops. No ops to construct any kind
11186of runtime scope are included by virtue of it being a block.
11187
11188If an error occurs in parsing or compilation, in most cases a valid op
11189tree (most likely null) is returned anyway. The error is reflected in
11190the parser state, normally resulting in a single exception at the top
11191level of parsing which covers all the compilation errors that occurred.
11192Some compilation errors, however, will throw an exception immediately.
11193
11194The I<flags> parameter is reserved for future use, and must always
11195be zero.
11196
11197=cut
11198*/
11199
11200OP *
11201Perl_parse_block(pTHX_ U32 flags)
11202{
11203 if (flags)
11204 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11205 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11206}
11207
1da4ca5f 11208/*
8359b381
Z
11209=for apidoc Amx|OP *|parse_barestmt|U32 flags
11210
11211Parse a single unadorned Perl statement. This may be a normal imperative
11212statement or a declaration that has compile-time effect. It does not
11213include any label or other affixture. It is up to the caller to ensure
11214that the dynamic parser state (L</PL_parser> et al) is correctly set to
11215reflect the source of the code to be parsed and the lexical context for
11216the statement.
11217
11218The op tree representing the statement is returned. This may be a
11219null pointer if the statement is null, for example if it was actually
11220a subroutine definition (which has compile-time side effects). If not
11221null, it will be ops directly implementing the statement, suitable to
11222pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11223equivalent op (except for those embedded in a scope contained entirely
11224within the statement).
11225
11226If an error occurs in parsing or compilation, in most cases a valid op
11227tree (most likely null) is returned anyway. The error is reflected in
11228the parser state, normally resulting in a single exception at the top
11229level of parsing which covers all the compilation errors that occurred.
11230Some compilation errors, however, will throw an exception immediately.
11231
11232The I<flags> parameter is reserved for future use, and must always
11233be zero.
11234
11235=cut
11236*/
11237
11238OP *
11239Perl_parse_barestmt(pTHX_ U32 flags)
11240{
11241 if (flags)
11242 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11243 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11244}
11245
11246/*
361d9b55
Z
11247=for apidoc Amx|SV *|parse_label|U32 flags
11248
11249Parse a single label, possibly optional, of the type that may prefix a
11250Perl statement. It is up to the caller to ensure that the dynamic parser
11251state (L</PL_parser> et al) is correctly set to reflect the source of
11252the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11253label is optional, otherwise it is mandatory.
11254
11255The name of the label is returned in the form of a fresh scalar. If an
11256optional label is absent, a null pointer is returned.
11257
11258If an error occurs in parsing, which can only occur if the label is
11259mandatory, a valid label is returned anyway. The error is reflected in
11260the parser state, normally resulting in a single exception at the top
11261level of parsing which covers all the compilation errors that occurred.
11262
11263=cut
11264*/
11265
11266SV *
11267Perl_parse_label(pTHX_ U32 flags)
11268{
11269 if (flags & ~PARSE_OPTIONAL)
11270 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11271 if (PL_lex_state == LEX_KNOWNEXT) {
11272 PL_parser->yychar = yylex();
11273 if (PL_parser->yychar == LABEL) {
11274 char *lpv = pl_yylval.pval;
11275 STRLEN llen = strlen(lpv);
11276 SV *lsv;
11277 PL_parser->yychar = YYEMPTY;
11278 lsv = newSV_type(SVt_PV);
11279 SvPV_set(lsv, lpv);
11280 SvCUR_set(lsv, llen);
11281 SvLEN_set(lsv, llen+1);
11282 SvPOK_on(lsv);
11283 return lsv;
11284 } else {
11285 yyunlex();
11286 goto no_label;
11287 }
11288 } else {
11289 char *s, *t;
11290 U8 c;
11291 STRLEN wlen, bufptr_pos;
11292 lex_read_space(0);
11293 t = s = PL_bufptr;
11294 c = (U8)*s;
11295 if (!isIDFIRST_A(c))
11296 goto no_label;
11297 do {
11298 c = (U8)*++t;
11299 } while(isWORDCHAR_A(c));
11300 wlen = t - s;
11301 if (word_takes_any_delimeter(s, wlen))
11302 goto no_label;
11303 bufptr_pos = s - SvPVX(PL_linestr);
11304 PL_bufptr = t;
11305 lex_read_space(LEX_KEEP_PREVIOUS);
11306 t = PL_bufptr;
11307 s = SvPVX(PL_linestr) + bufptr_pos;
11308 if (t[0] == ':' && t[1] != ':') {
11309 PL_oldoldbufptr = PL_oldbufptr;
11310 PL_oldbufptr = s;
11311 PL_bufptr = t+1;
11312 return newSVpvn(s, wlen);
11313 } else {
11314 PL_bufptr = s;
11315 no_label:
11316 if (flags & PARSE_OPTIONAL) {
11317 return NULL;
11318 } else {
11319 qerror(Perl_mess(aTHX_ "Parse error"));
11320 return newSVpvs("x");
11321 }
11322 }
11323 }
11324}
11325
11326/*
28ac2b49
Z
11327=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11328
11329Parse a single complete Perl statement. This may be a normal imperative
8359b381 11330statement or a declaration that has compile-time effect, and may include
8e720305 11331optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11332parser state (L</PL_parser> et al) is correctly set to reflect the source
11333of the code to be parsed and the lexical context for the statement.
11334
11335The op tree representing the statement is returned. This may be a
11336null pointer if the statement is null, for example if it was actually
11337a subroutine definition (which has compile-time side effects). If not
11338null, it will be the result of a L</newSTATEOP> call, normally including
11339a C<nextstate> or equivalent op.
11340
11341If an error occurs in parsing or compilation, in most cases a valid op
11342tree (most likely null) is returned anyway. The error is reflected in
11343the parser state, normally resulting in a single exception at the top
11344level of parsing which covers all the compilation errors that occurred.
11345Some compilation errors, however, will throw an exception immediately.
11346
11347The I<flags> parameter is reserved for future use, and must always
11348be zero.
11349
11350=cut
11351*/
11352
11353OP *
11354Perl_parse_fullstmt(pTHX_ U32 flags)
11355{
28ac2b49
Z
11356 if (flags)
11357 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11358 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11359}
11360
07ffcb73
Z
11361/*
11362=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11363
11364Parse a sequence of zero or more Perl statements. These may be normal
11365imperative statements, including optional labels, or declarations
11366that have compile-time effect, or any mixture thereof. The statement
11367sequence ends when a closing brace or end-of-file is encountered in a
11368place where a new statement could have validly started. It is up to
11369the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11370is correctly set to reflect the source of the code to be parsed and the
11371lexical context for the statements.
11372
11373The op tree representing the statement sequence is returned. This may
11374be a null pointer if the statements were all null, for example if there
11375were no statements or if there were only subroutine definitions (which
11376have compile-time side effects). If not null, it will be a C<lineseq>
11377list, normally including C<nextstate> or equivalent ops.
11378
11379If an error occurs in parsing or compilation, in most cases a valid op
11380tree is returned anyway. The error is reflected in the parser state,
11381normally resulting in a single exception at the top level of parsing
11382which covers all the compilation errors that occurred. Some compilation
11383errors, however, will throw an exception immediately.
11384
11385The I<flags> parameter is reserved for future use, and must always
11386be zero.
11387
11388=cut
11389*/
11390
11391OP *
11392Perl_parse_stmtseq(pTHX_ U32 flags)
11393{
11394 OP *stmtseqop;
e53d8f76 11395 I32 c;
07ffcb73 11396 if (flags)
78cdf107
Z
11397 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11398 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11399 c = lex_peek_unichar(0);
11400 if (c != -1 && c != /*{*/'}')
07ffcb73 11401 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11402 return stmtseqop;
11403}
11404
ea25a9b2 11405void
f7e3d326 11406Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11407{
f7e3d326 11408 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11409 deprecate("qw(...) as parentheses");
78cdf107 11410 force_next((4<<24)|')');
ea25a9b2
Z
11411 if (qwlist->op_type == OP_STUB) {
11412 op_free(qwlist);
11413 }
11414 else {
3d8e05a0 11415 start_force(PL_curforce);
ea25a9b2
Z
11416 NEXTVAL_NEXTTOKE.opval = qwlist;
11417 force_next(THING);
11418 }
78cdf107 11419 force_next((2<<24)|'(');
ea25a9b2
Z
11420}
11421
28ac2b49 11422/*
1da4ca5f
NC
11423 * Local variables:
11424 * c-indentation-style: bsd
11425 * c-basic-offset: 4
11426 * indent-tabs-mode: t
11427 * End:
11428 *
37442d52
RGS
11429 * ex: set ts=8 sts=4 sw=4 noet:
11430 */