This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Correct skip counts for miniperl
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
78cdf107
Z
51#define PL_lex_allbrackets (PL_parser->lex_allbrackets)
52#define PL_lex_fakeeof (PL_parser->lex_fakeeof)
199e78b7
DM
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
a7aaec61
Z
129#define XENUMMASK 0x3f
130#define XFAKEEOF 0x40
131#define XFAKEBRACK 0x80
9059aa12 132
39e02b42
JH
133#ifdef USE_UTF8_SCRIPTS
134# define UTF (!IN_BYTES)
2b9d42f0 135#else
746b446a 136# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 137#endif
a0ed51b3 138
b1fc3636
CJ
139/* The maximum number of characters preceding the unrecognized one to display */
140#define UNRECOGNIZED_PRECEDE_COUNT 10
141
61f0cdd9 142/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
143 * 1999-02-27 mjd-perl-patch@plover.com */
144#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
145
bf4acbe4 146#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 147
ffb4593c
NT
148/* LEX_* are values for PL_lex_state, the state of the lexer.
149 * They are arranged oddly so that the guard on the switch statement
79072805
LW
150 * can get by with a single comparison (if the compiler is smart enough).
151 */
152
fb73857a 153/* #define LEX_NOTPARSING 11 is done in perl.h. */
154
b6007c36
DM
155#define LEX_NORMAL 10 /* normal code (ie not within "...") */
156#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
157#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
158#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
159#define LEX_INTERPSTART 6 /* expecting the start of a $var */
160
161 /* at end of code, eg "$x" followed by: */
162#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
163#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
164
165#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
166 string or after \E, $foo, etc */
167#define LEX_INTERPCONST 2 /* NOT USED */
168#define LEX_FORMLINE 1 /* expecting a format line */
169#define LEX_KNOWNEXT 0 /* next token known; just return it */
170
79072805 171
bbf60fe6 172#ifdef DEBUGGING
27da23d5 173static const char* const lex_state_names[] = {
bbf60fe6
DM
174 "KNOWNEXT",
175 "FORMLINE",
176 "INTERPCONST",
177 "INTERPCONCAT",
178 "INTERPENDMAYBE",
179 "INTERPEND",
180 "INTERPSTART",
181 "INTERPPUSH",
182 "INTERPCASEMOD",
183 "INTERPNORMAL",
184 "NORMAL"
185};
186#endif
187
79072805
LW
188#ifdef ff_next
189#undef ff_next
d48672a2
LW
190#endif
191
79072805 192#include "keywords.h"
fe14fcc3 193
ffb4593c
NT
194/* CLINE is a macro that ensures PL_copline has a sane value */
195
ae986130
LW
196#ifdef CLINE
197#undef CLINE
198#endif
57843af0 199#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 200
5db06880 201#ifdef PERL_MAD
29595ff2
NC
202# define SKIPSPACE0(s) skipspace0(s)
203# define SKIPSPACE1(s) skipspace1(s)
204# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
205# define PEEKSPACE(s) skipspace2(s,0)
206#else
207# define SKIPSPACE0(s) skipspace(s)
208# define SKIPSPACE1(s) skipspace(s)
209# define SKIPSPACE2(s,tsv) skipspace(s)
210# define PEEKSPACE(s) skipspace(s)
211#endif
212
ffb4593c
NT
213/*
214 * Convenience functions to return different tokens and prime the
9cbb5ea2 215 * lexer for the next token. They all take an argument.
ffb4593c
NT
216 *
217 * TOKEN : generic token (used for '(', DOLSHARP, etc)
218 * OPERATOR : generic operator
219 * AOPERATOR : assignment operator
220 * PREBLOCK : beginning the block after an if, while, foreach, ...
221 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
222 * PREREF : *EXPR where EXPR is not a simple identifier
223 * TERM : expression term
224 * LOOPX : loop exiting command (goto, last, dump, etc)
225 * FTST : file test operator
226 * FUN0 : zero-argument function
7eb971ee 227 * FUN0OP : zero-argument function, with its op created in this file
2d2e263d 228 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
229 * BOop : bitwise or or xor
230 * BAop : bitwise and
231 * SHop : shift operator
232 * PWop : power operator
9cbb5ea2 233 * PMop : pattern-matching operator
ffb4593c
NT
234 * Aop : addition-level operator
235 * Mop : multiplication-level operator
236 * Eop : equality-testing operator
e5edeb50 237 * Rop : relational operator <= != gt
ffb4593c
NT
238 *
239 * Also see LOP and lop() below.
240 */
241
998054bd 242#ifdef DEBUGGING /* Serve -DT. */
704d4215 243# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 244#else
bbf60fe6 245# define REPORT(retval) (retval)
998054bd
SC
246#endif
247
bbf60fe6
DM
248#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
249#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
250#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
251#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
252#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
253#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
254#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
255#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
256#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
257#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
7eb971ee 258#define FUN0OP(f) return (pl_yylval.opval=f, CLINE, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0OP))
6154021b
RGS
259#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
260#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
261#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
262#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
263#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
264#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
265#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
266#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
267#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
268#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 269
a687059c
LW
270/* This bit of chicanery makes a unary function followed by
271 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
272 * The UNIDOR macro is for unary functions that can be followed by the //
273 * operator (such as C<shift // 0>).
a687059c 274 */
376fcdbf 275#define UNI2(f,x) { \
6154021b 276 pl_yylval.ival = f; \
376fcdbf
AL
277 PL_expect = x; \
278 PL_bufptr = s; \
279 PL_last_uni = PL_oldbufptr; \
280 PL_last_lop_op = f; \
281 if (*s == '(') \
282 return REPORT( (int)FUNC1 ); \
29595ff2 283 s = PEEKSPACE(s); \
376fcdbf
AL
284 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
285 }
6f33ba73
RGS
286#define UNI(f) UNI2(f,XTERM)
287#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 288
376fcdbf 289#define UNIBRACK(f) { \
6154021b 290 pl_yylval.ival = f; \
376fcdbf
AL
291 PL_bufptr = s; \
292 PL_last_uni = PL_oldbufptr; \
293 if (*s == '(') \
294 return REPORT( (int)FUNC1 ); \
29595ff2 295 s = PEEKSPACE(s); \
376fcdbf
AL
296 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
297 }
79072805 298
9f68db38 299/* grandfather return to old style */
78cdf107
Z
300#define OLDLOP(f) \
301 do { \
302 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC) \
303 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC; \
304 pl_yylval.ival = (f); \
305 PL_expect = XTERM; \
306 PL_bufptr = s; \
307 return (int)LSTOP; \
308 } while(0)
79072805 309
8fa7f367
JH
310#ifdef DEBUGGING
311
6154021b 312/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
313enum token_type {
314 TOKENTYPE_NONE,
315 TOKENTYPE_IVAL,
6154021b 316 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
317 TOKENTYPE_PVAL,
318 TOKENTYPE_OPVAL,
319 TOKENTYPE_GVVAL
320};
321
6d4a66ac
NC
322static struct debug_tokens {
323 const int token;
324 enum token_type type;
325 const char *name;
326} const debug_tokens[] =
9041c2e3 327{
bbf60fe6
DM
328 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
329 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
330 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
331 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
332 { ARROW, TOKENTYPE_NONE, "ARROW" },
333 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
334 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
335 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
336 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
337 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 338 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
339 { DO, TOKENTYPE_NONE, "DO" },
340 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
341 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
342 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
343 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
344 { ELSE, TOKENTYPE_NONE, "ELSE" },
345 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
346 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
347 { FOR, TOKENTYPE_IVAL, "FOR" },
348 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
349 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
350 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
7eb971ee 351 { FUNC0OP, TOKENTYPE_OPVAL, "FUNC0OP" },
bbf60fe6
DM
352 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
353 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
354 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 355 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
356 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
357 { IF, TOKENTYPE_IVAL, "IF" },
358 { LABEL, TOKENTYPE_PVAL, "LABEL" },
359 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
360 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
361 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
362 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
363 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
364 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
365 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
366 { MY, TOKENTYPE_IVAL, "MY" },
367 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
368 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
369 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
370 { OROP, TOKENTYPE_IVAL, "OROP" },
371 { OROR, TOKENTYPE_NONE, "OROR" },
372 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
373 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
374 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
375 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
376 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
377 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
378 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
379 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
380 { PREINC, TOKENTYPE_NONE, "PREINC" },
381 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
382 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
383 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
384 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
385 { SUB, TOKENTYPE_NONE, "SUB" },
386 { THING, TOKENTYPE_OPVAL, "THING" },
387 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
388 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
389 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
390 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
391 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
392 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 393 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
394 { WHILE, TOKENTYPE_IVAL, "WHILE" },
395 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 396 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 397 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
398};
399
6154021b 400/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 401
bbf60fe6 402STATIC int
704d4215 403S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 404{
97aff369 405 dVAR;
7918f24d
NC
406
407 PERL_ARGS_ASSERT_TOKEREPORT;
408
bbf60fe6 409 if (DEBUG_T_TEST) {
bd61b366 410 const char *name = NULL;
bbf60fe6 411 enum token_type type = TOKENTYPE_NONE;
f54cb97a 412 const struct debug_tokens *p;
396482e1 413 SV* const report = newSVpvs("<== ");
bbf60fe6 414
f54cb97a 415 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
416 if (p->token == (int)rv) {
417 name = p->name;
418 type = p->type;
419 break;
420 }
421 }
422 if (name)
54667de8 423 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
424 else if ((char)rv > ' ' && (char)rv < '~')
425 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
426 else if (!rv)
396482e1 427 sv_catpvs(report, "EOF");
bbf60fe6
DM
428 else
429 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
430 switch (type) {
431 case TOKENTYPE_NONE:
432 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
433 break;
434 case TOKENTYPE_IVAL:
704d4215 435 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
436 break;
437 case TOKENTYPE_OPNUM:
438 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 439 PL_op_name[lvalp->ival]);
bbf60fe6
DM
440 break;
441 case TOKENTYPE_PVAL:
704d4215 442 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
443 break;
444 case TOKENTYPE_OPVAL:
704d4215 445 if (lvalp->opval) {
401441c0 446 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
447 PL_op_name[lvalp->opval->op_type]);
448 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 449 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 450 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
451 }
452
453 }
401441c0 454 else
396482e1 455 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
456 break;
457 }
b6007c36 458 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
459 };
460 return (int)rv;
998054bd
SC
461}
462
b6007c36
DM
463
464/* print the buffer with suitable escapes */
465
466STATIC void
15f169a1 467S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 468{
396482e1 469 SV* const tmp = newSVpvs("");
7918f24d
NC
470
471 PERL_ARGS_ASSERT_PRINTBUF;
472
b6007c36
DM
473 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
474 SvREFCNT_dec(tmp);
475}
476
8fa7f367
JH
477#endif
478
8290c323
NC
479static int
480S_deprecate_commaless_var_list(pTHX) {
481 PL_expect = XTERM;
482 deprecate("comma-less variable list");
483 return REPORT(','); /* grandfather non-comma-format format */
484}
485
ffb4593c
NT
486/*
487 * S_ao
488 *
c963b151
BD
489 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
490 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
491 */
492
76e3520e 493STATIC int
cea2e8a9 494S_ao(pTHX_ int toketype)
a0d0e21e 495{
97aff369 496 dVAR;
3280af22
NIS
497 if (*PL_bufptr == '=') {
498 PL_bufptr++;
a0d0e21e 499 if (toketype == ANDAND)
6154021b 500 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 501 else if (toketype == OROR)
6154021b 502 pl_yylval.ival = OP_ORASSIGN;
c963b151 503 else if (toketype == DORDOR)
6154021b 504 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
505 toketype = ASSIGNOP;
506 }
507 return toketype;
508}
509
ffb4593c
NT
510/*
511 * S_no_op
512 * When Perl expects an operator and finds something else, no_op
513 * prints the warning. It always prints "<something> found where
514 * operator expected. It prints "Missing semicolon on previous line?"
515 * if the surprise occurs at the start of the line. "do you need to
516 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
517 * where the compiler doesn't know if foo is a method call or a function.
518 * It prints "Missing operator before end of line" if there's nothing
519 * after the missing operator, or "... before <...>" if there is something
520 * after the missing operator.
521 */
522
76e3520e 523STATIC void
15f169a1 524S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 525{
97aff369 526 dVAR;
9d4ba2ae
AL
527 char * const oldbp = PL_bufptr;
528 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 529
7918f24d
NC
530 PERL_ARGS_ASSERT_NO_OP;
531
1189a94a
GS
532 if (!s)
533 s = oldbp;
07c798fb 534 else
1189a94a 535 PL_bufptr = s;
cea2e8a9 536 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
537 if (ckWARN_d(WARN_SYNTAX)) {
538 if (is_first)
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
540 "\t(Missing semicolon on previous line?)\n");
541 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 542 const char *t;
c35e046a
AL
543 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
544 NOOP;
56da5a46
RGS
545 if (t < PL_bufptr && isSPACE(*t))
546 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
547 "\t(Do you need to predeclare %.*s?)\n",
551405c4 548 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
549 }
550 else {
551 assert(s >= oldbp);
552 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 553 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 554 }
07c798fb 555 }
3280af22 556 PL_bufptr = oldbp;
8990e307
LW
557}
558
ffb4593c
NT
559/*
560 * S_missingterm
561 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 562 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
563 * If we're in a delimited string and the delimiter is a control
564 * character, it's reformatted into a two-char sequence like ^C.
565 * This is fatal.
566 */
567
76e3520e 568STATIC void
cea2e8a9 569S_missingterm(pTHX_ char *s)
8990e307 570{
97aff369 571 dVAR;
8990e307
LW
572 char tmpbuf[3];
573 char q;
574 if (s) {
9d4ba2ae 575 char * const nl = strrchr(s,'\n');
d2719217 576 if (nl)
8990e307
LW
577 *nl = '\0';
578 }
463559e7 579 else if (isCNTRL(PL_multi_close)) {
8990e307 580 *tmpbuf = '^';
585ec06d 581 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
582 tmpbuf[2] = '\0';
583 s = tmpbuf;
584 }
585 else {
eb160463 586 *tmpbuf = (char)PL_multi_close;
8990e307
LW
587 tmpbuf[1] = '\0';
588 s = tmpbuf;
589 }
590 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 591 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 592}
79072805 593
0d863452 594/*
0d863452
RH
595 * Check whether the named feature is enabled.
596 */
26ea9e12
NC
597bool
598Perl_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 599{
97aff369 600 dVAR;
0d863452 601 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 602 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
603
604 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
605
26ea9e12
NC
606 if (namelen > MAX_FEATURE_LEN)
607 return FALSE;
4a731d7b 608 memcpy(&he_name[8], name, namelen);
d4c19fe8 609
7b9ef140 610 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
611}
612
ffb4593c 613/*
9cbb5ea2
GS
614 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
615 * utf16-to-utf8-reversed.
ffb4593c
NT
616 */
617
c39cd008
GS
618#ifdef PERL_CR_FILTER
619static void
620strip_return(SV *sv)
621{
95a20fc0 622 register const char *s = SvPVX_const(sv);
9d4ba2ae 623 register const char * const e = s + SvCUR(sv);
7918f24d
NC
624
625 PERL_ARGS_ASSERT_STRIP_RETURN;
626
c39cd008
GS
627 /* outer loop optimized to do nothing if there are no CR-LFs */
628 while (s < e) {
629 if (*s++ == '\r' && *s == '\n') {
630 /* hit a CR-LF, need to copy the rest */
631 register char *d = s - 1;
632 *d++ = *s++;
633 while (s < e) {
634 if (*s == '\r' && s[1] == '\n')
635 s++;
636 *d++ = *s++;
637 }
638 SvCUR(sv) -= s - d;
639 return;
640 }
641 }
642}
a868473f 643
76e3520e 644STATIC I32
c39cd008 645S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 646{
f54cb97a 647 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
648 if (count > 0 && !maxlen)
649 strip_return(sv);
650 return count;
a868473f
NIS
651}
652#endif
653
ffb4593c 654/*
8eaa0acf
Z
655=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
656
657Creates and initialises a new lexer/parser state object, supplying
658a context in which to lex and parse from a new source of Perl code.
659A pointer to the new state object is placed in L</PL_parser>. An entry
660is made on the save stack so that upon unwinding the new state object
661will be destroyed and the former value of L</PL_parser> will be restored.
662Nothing else need be done to clean up the parsing context.
663
664The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
665non-null, provides a string (in SV form) containing code to be parsed.
666A copy of the string is made, so subsequent modification of I<line>
667does not affect parsing. I<rsfp>, if non-null, provides an input stream
668from which code will be read to be parsed. If both are non-null, the
669code in I<line> comes first and must consist of complete lines of input,
670and I<rsfp> supplies the remainder of the source.
671
672The I<flags> parameter is reserved for future use, and must always
27fcb6ee
FC
673be zero, except for one flag that is currently reserved for perl's internal
674use.
8eaa0acf
Z
675
676=cut
677*/
ffb4593c 678
27fcb6ee
FC
679/* LEX_START_SAME_FILTER indicates that this is not a new file, so it
680 can share filters with the current parser. */
681
a0d0e21e 682void
8eaa0acf 683Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 684{
97aff369 685 dVAR;
6ef55633 686 const char *s = NULL;
8990e307 687 STRLEN len;
5486870f 688 yy_parser *parser, *oparser;
27fcb6ee 689 if (flags && flags != LEX_START_SAME_FILTER)
8eaa0acf 690 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
691
692 /* create and initialise a parser */
693
199e78b7 694 Newxz(parser, 1, yy_parser);
5486870f 695 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
696 PL_parser = parser;
697
28ac2b49
Z
698 parser->stack = NULL;
699 parser->ps = NULL;
700 parser->stack_size = 0;
acdf0a21 701
e3abe207
DM
702 /* on scope exit, free this parser and restore any outer one */
703 SAVEPARSER(parser);
7c4baf47 704 parser->saved_curcop = PL_curcop;
e3abe207 705
acdf0a21 706 /* initialise lexer state */
8990e307 707
fb205e7a
DM
708#ifdef PERL_MAD
709 parser->curforce = -1;
710#else
711 parser->nexttoke = 0;
712#endif
ca4cfd28 713 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 714 parser->copline = NOLINE;
5afb0a62 715 parser->lex_state = LEX_NORMAL;
c2598295 716 parser->expect = XSTATE;
2f9285f8 717 parser->rsfp = rsfp;
27fcb6ee
FC
718 parser->rsfp_filters =
719 !(flags & LEX_START_SAME_FILTER) || !oparser
720 ? newAV()
721 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 722
199e78b7
DM
723 Newx(parser->lex_brackstack, 120, char);
724 Newx(parser->lex_casestack, 12, char);
725 *parser->lex_casestack = '\0';
02b34bbe 726
10efb74f
NC
727 if (line) {
728 s = SvPV_const(line, len);
729 } else {
730 len = 0;
731 }
bdc0bf6f 732
10efb74f 733 if (!len) {
bdc0bf6f 734 parser->linestr = newSVpvs("\n;");
805700c1 735 } else {
719a9bb0 736 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 737 if (s[len-1] != ';')
bdc0bf6f 738 sv_catpvs(parser->linestr, "\n;");
8990e307 739 }
f06b5848
DM
740 parser->oldoldbufptr =
741 parser->oldbufptr =
742 parser->bufptr =
743 parser->linestart = SvPVX(parser->linestr);
744 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
745 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
746
747 parser->in_pod = 0;
79072805 748}
a687059c 749
e3abe207
DM
750
751/* delete a parser object */
752
753void
754Perl_parser_free(pTHX_ const yy_parser *parser)
755{
7918f24d
NC
756 PERL_ARGS_ASSERT_PARSER_FREE;
757
7c4baf47 758 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
759 SvREFCNT_dec(parser->linestr);
760
2f9285f8
DM
761 if (parser->rsfp == PerlIO_stdin())
762 PerlIO_clearerr(parser->rsfp);
799361c3
SH
763 else if (parser->rsfp && (!parser->old_parser ||
764 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 765 PerlIO_close(parser->rsfp);
5486870f 766 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 767
e3abe207
DM
768 Safefree(parser->lex_brackstack);
769 Safefree(parser->lex_casestack);
770 PL_parser = parser->old_parser;
771 Safefree(parser);
772}
773
774
ffb4593c 775/*
f0e67a1d
Z
776=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
777
778Buffer scalar containing the chunk currently under consideration of the
779text currently being lexed. This is always a plain string scalar (for
780which C<SvPOK> is true). It is not intended to be used as a scalar by
781normal scalar means; instead refer to the buffer directly by the pointer
782variables described below.
783
784The lexer maintains various C<char*> pointers to things in the
785C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
786reallocated, all of these pointers must be updated. Don't attempt to
787do this manually, but rather use L</lex_grow_linestr> if you need to
788reallocate the buffer.
789
790The content of the text chunk in the buffer is commonly exactly one
791complete line of input, up to and including a newline terminator,
792but there are situations where it is otherwise. The octets of the
793buffer may be intended to be interpreted as either UTF-8 or Latin-1.
794The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
795flag on this scalar, which may disagree with it.
796
797For direct examination of the buffer, the variable
798L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
799lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
800of these pointers is usually preferable to examination of the scalar
801through normal scalar means.
802
803=for apidoc AmxU|char *|PL_parser-E<gt>bufend
804
805Direct pointer to the end of the chunk of text currently being lexed, the
806end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
807+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
808always located at the end of the buffer, and does not count as part of
809the buffer's contents.
810
811=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
812
813Points to the current position of lexing inside the lexer buffer.
814Characters around this point may be freely examined, within
815the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
816L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
817interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
818
819Lexing code (whether in the Perl core or not) moves this pointer past
820the characters that it consumes. It is also expected to perform some
821bookkeeping whenever a newline character is consumed. This movement
822can be more conveniently performed by the function L</lex_read_to>,
823which handles newlines appropriately.
824
825Interpretation of the buffer's octets can be abstracted out by
826using the slightly higher-level functions L</lex_peek_unichar> and
827L</lex_read_unichar>.
828
829=for apidoc AmxU|char *|PL_parser-E<gt>linestart
830
831Points to the start of the current line inside the lexer buffer.
832This is useful for indicating at which column an error occurred, and
833not much else. This must be updated by any lexing code that consumes
834a newline; the function L</lex_read_to> handles this detail.
835
836=cut
837*/
838
839/*
840=for apidoc Amx|bool|lex_bufutf8
841
842Indicates whether the octets in the lexer buffer
843(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
844of Unicode characters. If not, they should be interpreted as Latin-1
845characters. This is analogous to the C<SvUTF8> flag for scalars.
846
847In UTF-8 mode, it is not guaranteed that the lexer buffer actually
848contains valid UTF-8. Lexing code must be robust in the face of invalid
849encoding.
850
851The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
852is significant, but not the whole story regarding the input character
853encoding. Normally, when a file is being read, the scalar contains octets
854and its C<SvUTF8> flag is off, but the octets should be interpreted as
855UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
856however, the scalar may have the C<SvUTF8> flag on, and in this case its
857octets should be interpreted as UTF-8 unless the C<use bytes> pragma
858is in effect. This logic may change in the future; use this function
859instead of implementing the logic yourself.
860
861=cut
862*/
863
864bool
865Perl_lex_bufutf8(pTHX)
866{
867 return UTF;
868}
869
870/*
871=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
872
873Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
874at least I<len> octets (including terminating NUL). Returns a
875pointer to the reallocated buffer. This is necessary before making
876any direct modification of the buffer that would increase its length.
877L</lex_stuff_pvn> provides a more convenient way to insert text into
878the buffer.
879
880Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
881this function updates all of the lexer's variables that point directly
882into the buffer.
883
884=cut
885*/
886
887char *
888Perl_lex_grow_linestr(pTHX_ STRLEN len)
889{
890 SV *linestr;
891 char *buf;
892 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
893 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
894 linestr = PL_parser->linestr;
895 buf = SvPVX(linestr);
896 if (len <= SvLEN(linestr))
897 return buf;
898 bufend_pos = PL_parser->bufend - buf;
899 bufptr_pos = PL_parser->bufptr - buf;
900 oldbufptr_pos = PL_parser->oldbufptr - buf;
901 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
902 linestart_pos = PL_parser->linestart - buf;
903 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
904 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
905 buf = sv_grow(linestr, len);
906 PL_parser->bufend = buf + bufend_pos;
907 PL_parser->bufptr = buf + bufptr_pos;
908 PL_parser->oldbufptr = buf + oldbufptr_pos;
909 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
910 PL_parser->linestart = buf + linestart_pos;
911 if (PL_parser->last_uni)
912 PL_parser->last_uni = buf + last_uni_pos;
913 if (PL_parser->last_lop)
914 PL_parser->last_lop = buf + last_lop_pos;
915 return buf;
916}
917
918/*
83aa740e 919=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
920
921Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
922immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
923reallocating the buffer if necessary. This means that lexing code that
924runs later will see the characters as if they had appeared in the input.
925It is not recommended to do this as part of normal parsing, and most
926uses of this facility run the risk of the inserted characters being
927interpreted in an unintended manner.
928
929The string to be inserted is represented by I<len> octets starting
930at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
931according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
932The characters are recoded for the lexer buffer, according to how the
933buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 934to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
935function is more convenient.
936
937=cut
938*/
939
940void
83aa740e 941Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 942{
749123ff 943 dVAR;
f0e67a1d
Z
944 char *bufptr;
945 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
946 if (flags & ~(LEX_STUFF_UTF8))
947 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
948 if (UTF) {
949 if (flags & LEX_STUFF_UTF8) {
950 goto plain_copy;
951 } else {
952 STRLEN highhalf = 0;
83aa740e 953 const char *p, *e = pv+len;
f0e67a1d
Z
954 for (p = pv; p != e; p++)
955 highhalf += !!(((U8)*p) & 0x80);
956 if (!highhalf)
957 goto plain_copy;
958 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
959 bufptr = PL_parser->bufptr;
960 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
961 SvCUR_set(PL_parser->linestr,
962 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
963 PL_parser->bufend += len+highhalf;
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c & 0x80) {
967 *bufptr++ = (char)(0xc0 | (c >> 6));
968 *bufptr++ = (char)(0x80 | (c & 0x3f));
969 } else {
970 *bufptr++ = (char)c;
971 }
972 }
973 }
974 } else {
975 if (flags & LEX_STUFF_UTF8) {
976 STRLEN highhalf = 0;
83aa740e 977 const char *p, *e = pv+len;
f0e67a1d
Z
978 for (p = pv; p != e; p++) {
979 U8 c = (U8)*p;
980 if (c >= 0xc4) {
981 Perl_croak(aTHX_ "Lexing code attempted to stuff "
982 "non-Latin-1 character into Latin-1 input");
983 } else if (c >= 0xc2 && p+1 != e &&
984 (((U8)p[1]) & 0xc0) == 0x80) {
985 p++;
986 highhalf++;
987 } else if (c >= 0x80) {
988 /* malformed UTF-8 */
989 ENTER;
990 SAVESPTR(PL_warnhook);
991 PL_warnhook = PERL_WARNHOOK_FATAL;
992 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
993 LEAVE;
994 }
995 }
996 if (!highhalf)
997 goto plain_copy;
998 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
999 bufptr = PL_parser->bufptr;
1000 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1001 SvCUR_set(PL_parser->linestr,
1002 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1003 PL_parser->bufend += len-highhalf;
1004 for (p = pv; p != e; p++) {
1005 U8 c = (U8)*p;
1006 if (c & 0x80) {
1007 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1008 p++;
1009 } else {
1010 *bufptr++ = (char)c;
1011 }
1012 }
1013 } else {
1014 plain_copy:
1015 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1016 bufptr = PL_parser->bufptr;
1017 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1018 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1019 PL_parser->bufend += len;
1020 Copy(pv, bufptr, len, char);
1021 }
1022 }
1023}
1024
1025/*
9dcc53ea
Z
1026=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1027
1028Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1029immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1030reallocating the buffer if necessary. This means that lexing code that
1031runs later will see the characters as if they had appeared in the input.
1032It is not recommended to do this as part of normal parsing, and most
1033uses of this facility run the risk of the inserted characters being
1034interpreted in an unintended manner.
1035
1036The string to be inserted is represented by octets starting at I<pv>
1037and continuing to the first nul. These octets are interpreted as either
1038UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1039in I<flags>. The characters are recoded for the lexer buffer, according
1040to how the buffer is currently being interpreted (L</lex_bufutf8>).
1041If it is not convenient to nul-terminate a string to be inserted, the
1042L</lex_stuff_pvn> function is more appropriate.
1043
1044=cut
1045*/
1046
1047void
1048Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1049{
1050 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1051 lex_stuff_pvn(pv, strlen(pv), flags);
1052}
1053
1054/*
f0e67a1d
Z
1055=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1056
1057Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1058immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1059reallocating the buffer if necessary. This means that lexing code that
1060runs later will see the characters as if they had appeared in the input.
1061It is not recommended to do this as part of normal parsing, and most
1062uses of this facility run the risk of the inserted characters being
1063interpreted in an unintended manner.
1064
1065The string to be inserted is the string value of I<sv>. The characters
1066are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1067being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1068not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1069need to construct a scalar.
1070
1071=cut
1072*/
1073
1074void
1075Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1076{
1077 char *pv;
1078 STRLEN len;
1079 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1080 if (flags)
1081 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1082 pv = SvPV(sv, len);
1083 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1084}
1085
1086/*
1087=for apidoc Amx|void|lex_unstuff|char *ptr
1088
1089Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1090I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1091This hides the discarded text from any lexing code that runs later,
1092as if the text had never appeared.
1093
1094This is not the normal way to consume lexed text. For that, use
1095L</lex_read_to>.
1096
1097=cut
1098*/
1099
1100void
1101Perl_lex_unstuff(pTHX_ char *ptr)
1102{
1103 char *buf, *bufend;
1104 STRLEN unstuff_len;
1105 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1106 buf = PL_parser->bufptr;
1107 if (ptr < buf)
1108 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1109 if (ptr == buf)
1110 return;
1111 bufend = PL_parser->bufend;
1112 if (ptr > bufend)
1113 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1114 unstuff_len = ptr - buf;
1115 Move(ptr, buf, bufend+1-ptr, char);
1116 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1117 PL_parser->bufend = bufend - unstuff_len;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_read_to|char *ptr
1122
1123Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1124to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1125performing the correct bookkeeping whenever a newline character is passed.
1126This is the normal way to consume lexed text.
1127
1128Interpretation of the buffer's octets can be abstracted out by
1129using the slightly higher-level functions L</lex_peek_unichar> and
1130L</lex_read_unichar>.
1131
1132=cut
1133*/
1134
1135void
1136Perl_lex_read_to(pTHX_ char *ptr)
1137{
1138 char *s;
1139 PERL_ARGS_ASSERT_LEX_READ_TO;
1140 s = PL_parser->bufptr;
1141 if (ptr < s || ptr > PL_parser->bufend)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1143 for (; s != ptr; s++)
1144 if (*s == '\n') {
1145 CopLINE_inc(PL_curcop);
1146 PL_parser->linestart = s+1;
1147 }
1148 PL_parser->bufptr = ptr;
1149}
1150
1151/*
1152=for apidoc Amx|void|lex_discard_to|char *ptr
1153
1154Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1155up to I<ptr>. The remaining content of the buffer will be moved, and
1156all pointers into the buffer updated appropriately. I<ptr> must not
1157be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1158it is not permitted to discard text that has yet to be lexed.
1159
1160Normally it is not necessarily to do this directly, because it suffices to
1161use the implicit discarding behaviour of L</lex_next_chunk> and things
1162based on it. However, if a token stretches across multiple lines,
1f317c95 1163and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1164that purpose, then after completion of the token it would be wise to
1165explicitly discard the now-unneeded earlier lines, to avoid future
1166multi-line tokens growing the buffer without bound.
1167
1168=cut
1169*/
1170
1171void
1172Perl_lex_discard_to(pTHX_ char *ptr)
1173{
1174 char *buf;
1175 STRLEN discard_len;
1176 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1177 buf = SvPVX(PL_parser->linestr);
1178 if (ptr < buf)
1179 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1180 if (ptr == buf)
1181 return;
1182 if (ptr > PL_parser->bufptr)
1183 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1184 discard_len = ptr - buf;
1185 if (PL_parser->oldbufptr < ptr)
1186 PL_parser->oldbufptr = ptr;
1187 if (PL_parser->oldoldbufptr < ptr)
1188 PL_parser->oldoldbufptr = ptr;
1189 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1190 PL_parser->last_uni = NULL;
1191 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1192 PL_parser->last_lop = NULL;
1193 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1194 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1195 PL_parser->bufend -= discard_len;
1196 PL_parser->bufptr -= discard_len;
1197 PL_parser->oldbufptr -= discard_len;
1198 PL_parser->oldoldbufptr -= discard_len;
1199 if (PL_parser->last_uni)
1200 PL_parser->last_uni -= discard_len;
1201 if (PL_parser->last_lop)
1202 PL_parser->last_lop -= discard_len;
1203}
1204
1205/*
1206=for apidoc Amx|bool|lex_next_chunk|U32 flags
1207
1208Reads in the next chunk of text to be lexed, appending it to
1209L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1210looked to the end of the current chunk and wants to know more. It is
1211usual, but not necessary, for lexing to have consumed the entirety of
1212the current chunk at this time.
1213
1214If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1215chunk (i.e., the current chunk has been entirely consumed), normally the
1216current chunk will be discarded at the same time that the new chunk is
1217read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1218will not be discarded. If the current chunk has not been entirely
1219consumed, then it will not be discarded regardless of the flag.
1220
1221Returns true if some new text was added to the buffer, or false if the
1222buffer has reached the end of the input text.
1223
1224=cut
1225*/
1226
1227#define LEX_FAKE_EOF 0x80000000
1228
1229bool
1230Perl_lex_next_chunk(pTHX_ U32 flags)
1231{
1232 SV *linestr;
1233 char *buf;
1234 STRLEN old_bufend_pos, new_bufend_pos;
1235 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1236 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1237 bool got_some_for_debugger = 0;
f0e67a1d
Z
1238 bool got_some;
1239 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1240 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1241 linestr = PL_parser->linestr;
1242 buf = SvPVX(linestr);
1243 if (!(flags & LEX_KEEP_PREVIOUS) &&
1244 PL_parser->bufptr == PL_parser->bufend) {
1245 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1246 linestart_pos = 0;
1247 if (PL_parser->last_uni != PL_parser->bufend)
1248 PL_parser->last_uni = NULL;
1249 if (PL_parser->last_lop != PL_parser->bufend)
1250 PL_parser->last_lop = NULL;
1251 last_uni_pos = last_lop_pos = 0;
1252 *buf = 0;
1253 SvCUR(linestr) = 0;
1254 } else {
1255 old_bufend_pos = PL_parser->bufend - buf;
1256 bufptr_pos = PL_parser->bufptr - buf;
1257 oldbufptr_pos = PL_parser->oldbufptr - buf;
1258 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1259 linestart_pos = PL_parser->linestart - buf;
1260 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1261 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1262 }
1263 if (flags & LEX_FAKE_EOF) {
1264 goto eof;
1265 } else if (!PL_parser->rsfp) {
1266 got_some = 0;
1267 } else if (filter_gets(linestr, old_bufend_pos)) {
1268 got_some = 1;
17cc9359 1269 got_some_for_debugger = 1;
f0e67a1d 1270 } else {
580561a3
Z
1271 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1272 sv_setpvs(linestr, "");
f0e67a1d
Z
1273 eof:
1274 /* End of real input. Close filehandle (unless it was STDIN),
1275 * then add implicit termination.
1276 */
1277 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1278 PerlIO_clearerr(PL_parser->rsfp);
1279 else if (PL_parser->rsfp)
1280 (void)PerlIO_close(PL_parser->rsfp);
1281 PL_parser->rsfp = NULL;
737c24fc 1282 PL_parser->in_pod = 0;
f0e67a1d
Z
1283#ifdef PERL_MAD
1284 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1285 PL_faketokens = 1;
1286#endif
1287 if (!PL_in_eval && PL_minus_p) {
1288 sv_catpvs(linestr,
1289 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1290 PL_minus_n = PL_minus_p = 0;
1291 } else if (!PL_in_eval && PL_minus_n) {
1292 sv_catpvs(linestr, /*{*/";}");
1293 PL_minus_n = 0;
1294 } else
1295 sv_catpvs(linestr, ";");
1296 got_some = 1;
1297 }
1298 buf = SvPVX(linestr);
1299 new_bufend_pos = SvCUR(linestr);
1300 PL_parser->bufend = buf + new_bufend_pos;
1301 PL_parser->bufptr = buf + bufptr_pos;
1302 PL_parser->oldbufptr = buf + oldbufptr_pos;
1303 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1304 PL_parser->linestart = buf + linestart_pos;
1305 if (PL_parser->last_uni)
1306 PL_parser->last_uni = buf + last_uni_pos;
1307 if (PL_parser->last_lop)
1308 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1309 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1310 PL_curstash != PL_debstash) {
1311 /* debugger active and we're not compiling the debugger code,
1312 * so store the line into the debugger's array of lines
1313 */
1314 update_debugger_info(NULL, buf+old_bufend_pos,
1315 new_bufend_pos-old_bufend_pos);
1316 }
1317 return got_some;
1318}
1319
1320/*
1321=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1322
1323Looks ahead one (Unicode) character in the text currently being lexed.
1324Returns the codepoint (unsigned integer value) of the next character,
1325or -1 if lexing has reached the end of the input text. To consume the
1326peeked character, use L</lex_read_unichar>.
1327
1328If the next character is in (or extends into) the next chunk of input
1329text, the next chunk will be read in. Normally the current chunk will be
1330discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1331then the current chunk will not be discarded.
1332
1333If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1334is encountered, an exception is generated.
1335
1336=cut
1337*/
1338
1339I32
1340Perl_lex_peek_unichar(pTHX_ U32 flags)
1341{
749123ff 1342 dVAR;
f0e67a1d
Z
1343 char *s, *bufend;
1344 if (flags & ~(LEX_KEEP_PREVIOUS))
1345 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1346 s = PL_parser->bufptr;
1347 bufend = PL_parser->bufend;
1348 if (UTF) {
1349 U8 head;
1350 I32 unichar;
1351 STRLEN len, retlen;
1352 if (s == bufend) {
1353 if (!lex_next_chunk(flags))
1354 return -1;
1355 s = PL_parser->bufptr;
1356 bufend = PL_parser->bufend;
1357 }
1358 head = (U8)*s;
1359 if (!(head & 0x80))
1360 return head;
1361 if (head & 0x40) {
1362 len = PL_utf8skip[head];
1363 while ((STRLEN)(bufend-s) < len) {
1364 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1365 break;
1366 s = PL_parser->bufptr;
1367 bufend = PL_parser->bufend;
1368 }
1369 }
1370 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1371 if (retlen == (STRLEN)-1) {
1372 /* malformed UTF-8 */
1373 ENTER;
1374 SAVESPTR(PL_warnhook);
1375 PL_warnhook = PERL_WARNHOOK_FATAL;
1376 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1377 LEAVE;
1378 }
1379 return unichar;
1380 } else {
1381 if (s == bufend) {
1382 if (!lex_next_chunk(flags))
1383 return -1;
1384 s = PL_parser->bufptr;
1385 }
1386 return (U8)*s;
1387 }
1388}
1389
1390/*
1391=for apidoc Amx|I32|lex_read_unichar|U32 flags
1392
1393Reads the next (Unicode) character in the text currently being lexed.
1394Returns the codepoint (unsigned integer value) of the character read,
1395and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1396if lexing has reached the end of the input text. To non-destructively
1397examine the next character, use L</lex_peek_unichar> instead.
1398
1399If the next character is in (or extends into) the next chunk of input
1400text, the next chunk will be read in. Normally the current chunk will be
1401discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1402then the current chunk will not be discarded.
1403
1404If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1405is encountered, an exception is generated.
1406
1407=cut
1408*/
1409
1410I32
1411Perl_lex_read_unichar(pTHX_ U32 flags)
1412{
1413 I32 c;
1414 if (flags & ~(LEX_KEEP_PREVIOUS))
1415 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1416 c = lex_peek_unichar(flags);
1417 if (c != -1) {
1418 if (c == '\n')
1419 CopLINE_inc(PL_curcop);
d9018cbe
EB
1420 if (UTF)
1421 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1422 else
1423 ++(PL_parser->bufptr);
f0e67a1d
Z
1424 }
1425 return c;
1426}
1427
1428/*
1429=for apidoc Amx|void|lex_read_space|U32 flags
1430
1431Reads optional spaces, in Perl style, in the text currently being
1432lexed. The spaces may include ordinary whitespace characters and
1433Perl-style comments. C<#line> directives are processed if encountered.
1434L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1435at a non-space character (or the end of the input text).
1436
1437If spaces extend into the next chunk of input text, the next chunk will
1438be read in. Normally the current chunk will be discarded at the same
1439time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1440chunk will not be discarded.
1441
1442=cut
1443*/
1444
f0998909
Z
1445#define LEX_NO_NEXT_CHUNK 0x80000000
1446
f0e67a1d
Z
1447void
1448Perl_lex_read_space(pTHX_ U32 flags)
1449{
1450 char *s, *bufend;
1451 bool need_incline = 0;
f0998909 1452 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1453 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1454#ifdef PERL_MAD
1455 if (PL_skipwhite) {
1456 sv_free(PL_skipwhite);
1457 PL_skipwhite = NULL;
1458 }
1459 if (PL_madskills)
1460 PL_skipwhite = newSVpvs("");
1461#endif /* PERL_MAD */
1462 s = PL_parser->bufptr;
1463 bufend = PL_parser->bufend;
1464 while (1) {
1465 char c = *s;
1466 if (c == '#') {
1467 do {
1468 c = *++s;
1469 } while (!(c == '\n' || (c == 0 && s == bufend)));
1470 } else if (c == '\n') {
1471 s++;
1472 PL_parser->linestart = s;
1473 if (s == bufend)
1474 need_incline = 1;
1475 else
1476 incline(s);
1477 } else if (isSPACE(c)) {
1478 s++;
1479 } else if (c == 0 && s == bufend) {
1480 bool got_more;
1481#ifdef PERL_MAD
1482 if (PL_madskills)
1483 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1484#endif /* PERL_MAD */
f0998909
Z
1485 if (flags & LEX_NO_NEXT_CHUNK)
1486 break;
f0e67a1d
Z
1487 PL_parser->bufptr = s;
1488 CopLINE_inc(PL_curcop);
1489 got_more = lex_next_chunk(flags);
1490 CopLINE_dec(PL_curcop);
1491 s = PL_parser->bufptr;
1492 bufend = PL_parser->bufend;
1493 if (!got_more)
1494 break;
1495 if (need_incline && PL_parser->rsfp) {
1496 incline(s);
1497 need_incline = 0;
1498 }
1499 } else {
1500 break;
1501 }
1502 }
1503#ifdef PERL_MAD
1504 if (PL_madskills)
1505 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1506#endif /* PERL_MAD */
1507 PL_parser->bufptr = s;
1508}
1509
1510/*
ffb4593c
NT
1511 * S_incline
1512 * This subroutine has nothing to do with tilting, whether at windmills
1513 * or pinball tables. Its name is short for "increment line". It
57843af0 1514 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1515 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1516 * # line 500 "foo.pm"
1517 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1518 */
1519
76e3520e 1520STATIC void
d9095cec 1521S_incline(pTHX_ const char *s)
463ee0b2 1522{
97aff369 1523 dVAR;
d9095cec
NC
1524 const char *t;
1525 const char *n;
1526 const char *e;
8818d409 1527 line_t line_num;
463ee0b2 1528
7918f24d
NC
1529 PERL_ARGS_ASSERT_INCLINE;
1530
57843af0 1531 CopLINE_inc(PL_curcop);
463ee0b2
LW
1532 if (*s++ != '#')
1533 return;
d4c19fe8
AL
1534 while (SPACE_OR_TAB(*s))
1535 s++;
73659bf1
GS
1536 if (strnEQ(s, "line", 4))
1537 s += 4;
1538 else
1539 return;
084592ab 1540 if (SPACE_OR_TAB(*s))
73659bf1 1541 s++;
4e553d73 1542 else
73659bf1 1543 return;
d4c19fe8
AL
1544 while (SPACE_OR_TAB(*s))
1545 s++;
463ee0b2
LW
1546 if (!isDIGIT(*s))
1547 return;
d4c19fe8 1548
463ee0b2
LW
1549 n = s;
1550 while (isDIGIT(*s))
1551 s++;
07714eb4 1552 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1553 return;
bf4acbe4 1554 while (SPACE_OR_TAB(*s))
463ee0b2 1555 s++;
73659bf1 1556 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1557 s++;
73659bf1
GS
1558 e = t + 1;
1559 }
463ee0b2 1560 else {
c35e046a
AL
1561 t = s;
1562 while (!isSPACE(*t))
1563 t++;
73659bf1 1564 e = t;
463ee0b2 1565 }
bf4acbe4 1566 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1567 e++;
1568 if (*e != '\n' && *e != '\0')
1569 return; /* false alarm */
1570
8818d409
FC
1571 line_num = atoi(n)-1;
1572
f4dd75d9 1573 if (t - s > 0) {
d9095cec 1574 const STRLEN len = t - s;
19bad673
NC
1575 SV *const temp_sv = CopFILESV(PL_curcop);
1576 const char *cf;
1577 STRLEN tmplen;
1578
1579 if (temp_sv) {
1580 cf = SvPVX(temp_sv);
1581 tmplen = SvCUR(temp_sv);
1582 } else {
1583 cf = NULL;
1584 tmplen = 0;
1585 }
1586
42d9b98d 1587 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1588 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1589 * to *{"::_<newfilename"} */
44867030
NC
1590 /* However, the long form of evals is only turned on by the
1591 debugger - usually they're "(eval %lu)" */
1592 char smallbuf[128];
1593 char *tmpbuf;
1594 GV **gvp;
d9095cec 1595 STRLEN tmplen2 = len;
798b63bc 1596 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1597 tmpbuf = smallbuf;
1598 else
2ae0db35 1599 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1600 tmpbuf[0] = '_';
1601 tmpbuf[1] = '<';
2ae0db35 1602 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1603 tmplen += 2;
8a5ee598
RGS
1604 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1605 if (gvp) {
44867030
NC
1606 char *tmpbuf2;
1607 GV *gv2;
1608
1609 if (tmplen2 + 2 <= sizeof smallbuf)
1610 tmpbuf2 = smallbuf;
1611 else
1612 Newx(tmpbuf2, tmplen2 + 2, char);
1613
1614 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1615 /* Either they malloc'd it, or we malloc'd it,
1616 so no prefix is present in ours. */
1617 tmpbuf2[0] = '_';
1618 tmpbuf2[1] = '<';
1619 }
1620
1621 memcpy(tmpbuf2 + 2, s, tmplen2);
1622 tmplen2 += 2;
1623
8a5ee598 1624 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1625 if (!isGV(gv2)) {
8a5ee598 1626 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1627 /* adjust ${"::_<newfilename"} to store the new file name */
1628 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
8818d409
FC
1629 /* The line number may differ. If that is the case,
1630 alias the saved lines that are in the array.
1631 Otherwise alias the whole array. */
1632 if (CopLINE(PL_curcop) == line_num) {
1633 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1634 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
1635 }
1636 else if (GvAV(*gvp)) {
1637 AV * const av = GvAV(*gvp);
1638 const I32 start = CopLINE(PL_curcop)+1;
1639 I32 items = AvFILLp(av) - start;
1640 if (items > 0) {
1641 AV * const av2 = GvAVn(gv2);
1642 SV **svp = AvARRAY(av) + start;
1643 I32 l = (I32)line_num+1;
1644 while (items--)
1645 av_store(av2, l++, SvREFCNT_inc(*svp++));
1646 }
1647 }
e5527e4b 1648 }
44867030
NC
1649
1650 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1651 }
e66cf94c 1652 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1653 }
05ec9bb3 1654 CopFILE_free(PL_curcop);
d9095cec 1655 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1656 }
8818d409 1657 CopLINE_set(PL_curcop, line_num);
463ee0b2
LW
1658}
1659
29595ff2 1660#ifdef PERL_MAD
cd81e915 1661/* skip space before PL_thistoken */
29595ff2
NC
1662
1663STATIC char *
1664S_skipspace0(pTHX_ register char *s)
1665{
7918f24d
NC
1666 PERL_ARGS_ASSERT_SKIPSPACE0;
1667
29595ff2
NC
1668 s = skipspace(s);
1669 if (!PL_madskills)
1670 return s;
cd81e915
NC
1671 if (PL_skipwhite) {
1672 if (!PL_thiswhite)
6b29d1f5 1673 PL_thiswhite = newSVpvs("");
cd81e915
NC
1674 sv_catsv(PL_thiswhite, PL_skipwhite);
1675 sv_free(PL_skipwhite);
1676 PL_skipwhite = 0;
1677 }
1678 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1679 return s;
1680}
1681
cd81e915 1682/* skip space after PL_thistoken */
29595ff2
NC
1683
1684STATIC char *
1685S_skipspace1(pTHX_ register char *s)
1686{
d4c19fe8 1687 const char *start = s;
29595ff2
NC
1688 I32 startoff = start - SvPVX(PL_linestr);
1689
7918f24d
NC
1690 PERL_ARGS_ASSERT_SKIPSPACE1;
1691
29595ff2
NC
1692 s = skipspace(s);
1693 if (!PL_madskills)
1694 return s;
1695 start = SvPVX(PL_linestr) + startoff;
cd81e915 1696 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1697 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1698 PL_thistoken = newSVpvn(tstart, start - tstart);
1699 }
1700 PL_realtokenstart = -1;
1701 if (PL_skipwhite) {
1702 if (!PL_nextwhite)
6b29d1f5 1703 PL_nextwhite = newSVpvs("");
cd81e915
NC
1704 sv_catsv(PL_nextwhite, PL_skipwhite);
1705 sv_free(PL_skipwhite);
1706 PL_skipwhite = 0;
29595ff2
NC
1707 }
1708 return s;
1709}
1710
1711STATIC char *
1712S_skipspace2(pTHX_ register char *s, SV **svp)
1713{
c35e046a
AL
1714 char *start;
1715 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1716 const I32 startoff = s - SvPVX(PL_linestr);
1717
7918f24d
NC
1718 PERL_ARGS_ASSERT_SKIPSPACE2;
1719
29595ff2
NC
1720 s = skipspace(s);
1721 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1722 if (!PL_madskills || !svp)
1723 return s;
1724 start = SvPVX(PL_linestr) + startoff;
cd81e915 1725 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1726 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1727 PL_thistoken = newSVpvn(tstart, start - tstart);
1728 PL_realtokenstart = -1;
29595ff2 1729 }
cd81e915 1730 if (PL_skipwhite) {
29595ff2 1731 if (!*svp)
6b29d1f5 1732 *svp = newSVpvs("");
cd81e915
NC
1733 sv_setsv(*svp, PL_skipwhite);
1734 sv_free(PL_skipwhite);
1735 PL_skipwhite = 0;
29595ff2
NC
1736 }
1737
1738 return s;
1739}
1740#endif
1741
80a702cd 1742STATIC void
15f169a1 1743S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1744{
1745 AV *av = CopFILEAVx(PL_curcop);
1746 if (av) {
b9f83d2f 1747 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1748 if (orig_sv)
1749 sv_setsv(sv, orig_sv);
1750 else
1751 sv_setpvn(sv, buf, len);
80a702cd
RGS
1752 (void)SvIOK_on(sv);
1753 SvIV_set(sv, 0);
1754 av_store(av, (I32)CopLINE(PL_curcop), sv);
1755 }
1756}
1757
ffb4593c
NT
1758/*
1759 * S_skipspace
1760 * Called to gobble the appropriate amount and type of whitespace.
1761 * Skips comments as well.
1762 */
1763
76e3520e 1764STATIC char *
cea2e8a9 1765S_skipspace(pTHX_ register char *s)
a687059c 1766{
5db06880 1767#ifdef PERL_MAD
f0e67a1d
Z
1768 char *start = s;
1769#endif /* PERL_MAD */
7918f24d 1770 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1771#ifdef PERL_MAD
cd81e915
NC
1772 if (PL_skipwhite) {
1773 sv_free(PL_skipwhite);
f0e67a1d 1774 PL_skipwhite = NULL;
5db06880 1775 }
f0e67a1d 1776#endif /* PERL_MAD */
3280af22 1777 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1778 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1779 s++;
f0e67a1d
Z
1780 } else {
1781 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1782 PL_bufptr = s;
f0998909
Z
1783 lex_read_space(LEX_KEEP_PREVIOUS |
1784 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1785 LEX_NO_NEXT_CHUNK : 0));
3280af22 1786 s = PL_bufptr;
f0e67a1d
Z
1787 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1788 if (PL_linestart > PL_bufptr)
1789 PL_bufptr = PL_linestart;
1790 return s;
463ee0b2 1791 }
5db06880 1792#ifdef PERL_MAD
f0e67a1d
Z
1793 if (PL_madskills)
1794 PL_skipwhite = newSVpvn(start, s-start);
1795#endif /* PERL_MAD */
5db06880 1796 return s;
a687059c 1797}
378cc40b 1798
ffb4593c
NT
1799/*
1800 * S_check_uni
1801 * Check the unary operators to ensure there's no ambiguity in how they're
1802 * used. An ambiguous piece of code would be:
1803 * rand + 5
1804 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1805 * the +5 is its argument.
1806 */
1807
76e3520e 1808STATIC void
cea2e8a9 1809S_check_uni(pTHX)
ba106d47 1810{
97aff369 1811 dVAR;
d4c19fe8
AL
1812 const char *s;
1813 const char *t;
2f3197b3 1814
3280af22 1815 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1816 return;
3280af22
NIS
1817 while (isSPACE(*PL_last_uni))
1818 PL_last_uni++;
c35e046a
AL
1819 s = PL_last_uni;
1820 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1821 s++;
3280af22 1822 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1823 return;
6136c704 1824
9b387841
NC
1825 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1826 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1827 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1828}
1829
ffb4593c
NT
1830/*
1831 * LOP : macro to build a list operator. Its behaviour has been replaced
1832 * with a subroutine, S_lop() for which LOP is just another name.
1833 */
1834
a0d0e21e
LW
1835#define LOP(f,x) return lop(f,x,s)
1836
ffb4593c
NT
1837/*
1838 * S_lop
1839 * Build a list operator (or something that might be one). The rules:
1840 * - if we have a next token, then it's a list operator [why?]
1841 * - if the next thing is an opening paren, then it's a function
1842 * - else it's a list operator
1843 */
1844
76e3520e 1845STATIC I32
a0be28da 1846S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1847{
97aff369 1848 dVAR;
7918f24d
NC
1849
1850 PERL_ARGS_ASSERT_LOP;
1851
6154021b 1852 pl_yylval.ival = f;
35c8bce7 1853 CLINE;
3280af22
NIS
1854 PL_expect = x;
1855 PL_bufptr = s;
1856 PL_last_lop = PL_oldbufptr;
eb160463 1857 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1858#ifdef PERL_MAD
1859 if (PL_lasttoke)
78cdf107 1860 goto lstop;
5db06880 1861#else
3280af22 1862 if (PL_nexttoke)
78cdf107 1863 goto lstop;
5db06880 1864#endif
79072805 1865 if (*s == '(')
bbf60fe6 1866 return REPORT(FUNC);
29595ff2 1867 s = PEEKSPACE(s);
79072805 1868 if (*s == '(')
bbf60fe6 1869 return REPORT(FUNC);
78cdf107
Z
1870 else {
1871 lstop:
1872 if (!PL_lex_allbrackets && PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
1873 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 1874 return REPORT(LSTOP);
78cdf107 1875 }
79072805
LW
1876}
1877
5db06880
NC
1878#ifdef PERL_MAD
1879 /*
1880 * S_start_force
1881 * Sets up for an eventual force_next(). start_force(0) basically does
1882 * an unshift, while start_force(-1) does a push. yylex removes items
1883 * on the "pop" end.
1884 */
1885
1886STATIC void
1887S_start_force(pTHX_ int where)
1888{
1889 int i;
1890
cd81e915 1891 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1892 where = PL_lasttoke;
cd81e915
NC
1893 assert(PL_curforce < 0 || PL_curforce == where);
1894 if (PL_curforce != where) {
5db06880
NC
1895 for (i = PL_lasttoke; i > where; --i) {
1896 PL_nexttoke[i] = PL_nexttoke[i-1];
1897 }
1898 PL_lasttoke++;
1899 }
cd81e915 1900 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1901 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1902 PL_curforce = where;
1903 if (PL_nextwhite) {
5db06880 1904 if (PL_madskills)
6b29d1f5 1905 curmad('^', newSVpvs(""));
cd81e915 1906 CURMAD('_', PL_nextwhite);
5db06880
NC
1907 }
1908}
1909
1910STATIC void
1911S_curmad(pTHX_ char slot, SV *sv)
1912{
1913 MADPROP **where;
1914
1915 if (!sv)
1916 return;
cd81e915
NC
1917 if (PL_curforce < 0)
1918 where = &PL_thismad;
5db06880 1919 else
cd81e915 1920 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1921
cd81e915 1922 if (PL_faketokens)
76f68e9b 1923 sv_setpvs(sv, "");
5db06880
NC
1924 else {
1925 if (!IN_BYTES) {
1926 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1927 SvUTF8_on(sv);
1928 else if (PL_encoding) {
1929 sv_recode_to_utf8(sv, PL_encoding);
1930 }
1931 }
1932 }
1933
1934 /* keep a slot open for the head of the list? */
1935 if (slot != '_' && *where && (*where)->mad_key == '^') {
1936 (*where)->mad_key = slot;
daba3364 1937 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1938 (*where)->mad_val = (void*)sv;
1939 }
1940 else
1941 addmad(newMADsv(slot, sv), where, 0);
1942}
1943#else
b3f24c00
MHM
1944# define start_force(where) NOOP
1945# define curmad(slot, sv) NOOP
5db06880
NC
1946#endif
1947
ffb4593c
NT
1948/*
1949 * S_force_next
9cbb5ea2 1950 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1951 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1952 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1953 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1954 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1955 */
1956
4e553d73 1957STATIC void
cea2e8a9 1958S_force_next(pTHX_ I32 type)
79072805 1959{
97aff369 1960 dVAR;
704d4215
GG
1961#ifdef DEBUGGING
1962 if (DEBUG_T_TEST) {
1963 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1964 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1965 }
1966#endif
5db06880 1967#ifdef PERL_MAD
cd81e915 1968 if (PL_curforce < 0)
5db06880 1969 start_force(PL_lasttoke);
cd81e915 1970 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1971 if (PL_lex_state != LEX_KNOWNEXT)
1972 PL_lex_defer = PL_lex_state;
1973 PL_lex_state = LEX_KNOWNEXT;
1974 PL_lex_expect = PL_expect;
cd81e915 1975 PL_curforce = -1;
5db06880 1976#else
3280af22
NIS
1977 PL_nexttype[PL_nexttoke] = type;
1978 PL_nexttoke++;
1979 if (PL_lex_state != LEX_KNOWNEXT) {
1980 PL_lex_defer = PL_lex_state;
1981 PL_lex_expect = PL_expect;
1982 PL_lex_state = LEX_KNOWNEXT;
79072805 1983 }
5db06880 1984#endif
79072805
LW
1985}
1986
28ac2b49
Z
1987void
1988Perl_yyunlex(pTHX)
1989{
a7aaec61
Z
1990 int yyc = PL_parser->yychar;
1991 if (yyc != YYEMPTY) {
1992 if (yyc) {
1993 start_force(-1);
1994 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1995 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
78cdf107 1996 PL_lex_allbrackets--;
a7aaec61 1997 PL_lex_brackets--;
78cdf107
Z
1998 yyc |= (3<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1999 } else if (yyc == '('/*)*/) {
2000 PL_lex_allbrackets--;
2001 yyc |= (2<<24);
a7aaec61
Z
2002 }
2003 force_next(yyc);
2004 }
28ac2b49
Z
2005 PL_parser->yychar = YYEMPTY;
2006 }
2007}
2008
d0a148a6 2009STATIC SV *
15f169a1 2010S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 2011{
97aff369 2012 dVAR;
740cce10 2013 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
2014 !IN_BYTES
2015 && UTF
2016 && !is_ascii_string((const U8*)start, len)
740cce10 2017 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
2018 return sv;
2019}
2020
ffb4593c
NT
2021/*
2022 * S_force_word
2023 * When the lexer knows the next thing is a word (for instance, it has
2024 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
2025 * it calls S_force_word to stick the next word into the PL_nexttoke/val
2026 * lookahead.
ffb4593c
NT
2027 *
2028 * Arguments:
b1b65b59 2029 * char *start : buffer position (must be within PL_linestr)
02b34bbe 2030 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
2031 * int check_keyword : if true, Perl checks to make sure the word isn't
2032 * a keyword (do this if the word is a label, e.g. goto FOO)
2033 * int allow_pack : if true, : characters will also be allowed (require,
2034 * use, etc. do this)
9cbb5ea2 2035 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
2036 */
2037
76e3520e 2038STATIC char *
cea2e8a9 2039S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 2040{
97aff369 2041 dVAR;
463ee0b2
LW
2042 register char *s;
2043 STRLEN len;
4e553d73 2044
7918f24d
NC
2045 PERL_ARGS_ASSERT_FORCE_WORD;
2046
29595ff2 2047 start = SKIPSPACE1(start);
463ee0b2 2048 s = start;
7e2040f0 2049 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2050 (allow_pack && *s == ':') ||
15f0808c 2051 (allow_initial_tick && *s == '\'') )
a0d0e21e 2052 {
3280af22 2053 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2054 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2055 return start;
cd81e915 2056 start_force(PL_curforce);
5db06880
NC
2057 if (PL_madskills)
2058 curmad('X', newSVpvn(start,s-start));
463ee0b2 2059 if (token == METHOD) {
29595ff2 2060 s = SKIPSPACE1(s);
463ee0b2 2061 if (*s == '(')
3280af22 2062 PL_expect = XTERM;
463ee0b2 2063 else {
3280af22 2064 PL_expect = XOPERATOR;
463ee0b2 2065 }
79072805 2066 }
e74e6b3d 2067 if (PL_madskills)
63575281 2068 curmad('g', newSVpvs( "forced" ));
9ded7720 2069 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2070 = (OP*)newSVOP(OP_CONST,0,
2071 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2072 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2073 force_next(token);
2074 }
2075 return s;
2076}
2077
ffb4593c
NT
2078/*
2079 * S_force_ident
9cbb5ea2 2080 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2081 * text only contains the "foo" portion. The first argument is a pointer
2082 * to the "foo", and the second argument is the type symbol to prefix.
2083 * Forces the next token to be a "WORD".
9cbb5ea2 2084 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2085 */
2086
76e3520e 2087STATIC void
bfed75c6 2088S_force_ident(pTHX_ register const char *s, int kind)
79072805 2089{
97aff369 2090 dVAR;
7918f24d
NC
2091
2092 PERL_ARGS_ASSERT_FORCE_IDENT;
2093
c35e046a 2094 if (*s) {
90e5519e 2095 const STRLEN len = strlen(s);
728847b1
BF
2096 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(s, len,
2097 UTF ? SVf_UTF8 : 0));
cd81e915 2098 start_force(PL_curforce);
9ded7720 2099 NEXTVAL_NEXTTOKE.opval = o;
79072805 2100 force_next(WORD);
748a9306 2101 if (kind) {
11343788 2102 o->op_private = OPpCONST_ENTERED;
55497cff 2103 /* XXX see note in pp_entereval() for why we forgo typo
2104 warnings if the symbol must be introduced in an eval.
2105 GSAR 96-10-12 */
90e5519e 2106 gv_fetchpvn_flags(s, len,
728847b1
BF
2107 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2108 : GV_ADD) | ( UTF ? SVf_UTF8 : 0 ),
90e5519e
NC
2109 kind == '$' ? SVt_PV :
2110 kind == '@' ? SVt_PVAV :
2111 kind == '%' ? SVt_PVHV :
a0d0e21e 2112 SVt_PVGV
90e5519e 2113 );
748a9306 2114 }
79072805
LW
2115 }
2116}
2117
1571675a
GS
2118NV
2119Perl_str_to_version(pTHX_ SV *sv)
2120{
2121 NV retval = 0.0;
2122 NV nshift = 1.0;
2123 STRLEN len;
cfd0369c 2124 const char *start = SvPV_const(sv,len);
9d4ba2ae 2125 const char * const end = start + len;
504618e9 2126 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2127
2128 PERL_ARGS_ASSERT_STR_TO_VERSION;
2129
1571675a 2130 while (start < end) {
ba210ebe 2131 STRLEN skip;
1571675a
GS
2132 UV n;
2133 if (utf)
9041c2e3 2134 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2135 else {
2136 n = *(U8*)start;
2137 skip = 1;
2138 }
2139 retval += ((NV)n)/nshift;
2140 start += skip;
2141 nshift *= 1000;
2142 }
2143 return retval;
2144}
2145
4e553d73 2146/*
ffb4593c
NT
2147 * S_force_version
2148 * Forces the next token to be a version number.
e759cc13
RGS
2149 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2150 * and if "guessing" is TRUE, then no new token is created (and the caller
2151 * must use an alternative parsing method).
ffb4593c
NT
2152 */
2153
76e3520e 2154STATIC char *
e759cc13 2155S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2156{
97aff369 2157 dVAR;
5f66b61c 2158 OP *version = NULL;
44dcb63b 2159 char *d;
5db06880
NC
2160#ifdef PERL_MAD
2161 I32 startoff = s - SvPVX(PL_linestr);
2162#endif
89bfa8cd 2163
7918f24d
NC
2164 PERL_ARGS_ASSERT_FORCE_VERSION;
2165
29595ff2 2166 s = SKIPSPACE1(s);
89bfa8cd 2167
44dcb63b 2168 d = s;
dd629d5b 2169 if (*d == 'v')
44dcb63b 2170 d++;
44dcb63b 2171 if (isDIGIT(*d)) {
e759cc13
RGS
2172 while (isDIGIT(*d) || *d == '_' || *d == '.')
2173 d++;
5db06880
NC
2174#ifdef PERL_MAD
2175 if (PL_madskills) {
cd81e915 2176 start_force(PL_curforce);
5db06880
NC
2177 curmad('X', newSVpvn(s,d-s));
2178 }
2179#endif
4e4da3ac 2180 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2181 SV *ver;
8d08d9ba
DG
2182#ifdef USE_LOCALE_NUMERIC
2183 char *loc = setlocale(LC_NUMERIC, "C");
2184#endif
6154021b 2185 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2186#ifdef USE_LOCALE_NUMERIC
2187 setlocale(LC_NUMERIC, loc);
2188#endif
6154021b 2189 version = pl_yylval.opval;
dd629d5b
GS
2190 ver = cSVOPx(version)->op_sv;
2191 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2192 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2193 SvNV_set(ver, str_to_version(ver));
1571675a 2194 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2195 }
89bfa8cd 2196 }
5db06880
NC
2197 else if (guessing) {
2198#ifdef PERL_MAD
2199 if (PL_madskills) {
cd81e915
NC
2200 sv_free(PL_nextwhite); /* let next token collect whitespace */
2201 PL_nextwhite = 0;
5db06880
NC
2202 s = SvPVX(PL_linestr) + startoff;
2203 }
2204#endif
e759cc13 2205 return s;
5db06880 2206 }
89bfa8cd 2207 }
2208
5db06880
NC
2209#ifdef PERL_MAD
2210 if (PL_madskills && !version) {
cd81e915
NC
2211 sv_free(PL_nextwhite); /* let next token collect whitespace */
2212 PL_nextwhite = 0;
5db06880
NC
2213 s = SvPVX(PL_linestr) + startoff;
2214 }
2215#endif
89bfa8cd 2216 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2217 start_force(PL_curforce);
9ded7720 2218 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2219 force_next(WORD);
89bfa8cd 2220
e759cc13 2221 return s;
89bfa8cd 2222}
2223
ffb4593c 2224/*
91152fc1
DG
2225 * S_force_strict_version
2226 * Forces the next token to be a version number using strict syntax rules.
2227 */
2228
2229STATIC char *
2230S_force_strict_version(pTHX_ char *s)
2231{
2232 dVAR;
2233 OP *version = NULL;
2234#ifdef PERL_MAD
2235 I32 startoff = s - SvPVX(PL_linestr);
2236#endif
2237 const char *errstr = NULL;
2238
2239 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2240
2241 while (isSPACE(*s)) /* leading whitespace */
2242 s++;
2243
2244 if (is_STRICT_VERSION(s,&errstr)) {
2245 SV *ver = newSV(0);
2246 s = (char *)scan_version(s, ver, 0);
2247 version = newSVOP(OP_CONST, 0, ver);
2248 }
4e4da3ac
Z
2249 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2250 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2251 {
91152fc1
DG
2252 PL_bufptr = s;
2253 if (errstr)
2254 yyerror(errstr); /* version required */
2255 return s;
2256 }
2257
2258#ifdef PERL_MAD
2259 if (PL_madskills && !version) {
2260 sv_free(PL_nextwhite); /* let next token collect whitespace */
2261 PL_nextwhite = 0;
2262 s = SvPVX(PL_linestr) + startoff;
2263 }
2264#endif
2265 /* NOTE: The parser sees the package name and the VERSION swapped */
2266 start_force(PL_curforce);
2267 NEXTVAL_NEXTTOKE.opval = version;
2268 force_next(WORD);
2269
2270 return s;
2271}
2272
2273/*
ffb4593c
NT
2274 * S_tokeq
2275 * Tokenize a quoted string passed in as an SV. It finds the next
2276 * chunk, up to end of string or a backslash. It may make a new
2277 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2278 * turns \\ into \.
2279 */
2280
76e3520e 2281STATIC SV *
cea2e8a9 2282S_tokeq(pTHX_ SV *sv)
79072805 2283{
97aff369 2284 dVAR;
79072805
LW
2285 register char *s;
2286 register char *send;
2287 register char *d;
b3ac6de7
IZ
2288 STRLEN len = 0;
2289 SV *pv = sv;
79072805 2290
7918f24d
NC
2291 PERL_ARGS_ASSERT_TOKEQ;
2292
79072805 2293 if (!SvLEN(sv))
b3ac6de7 2294 goto finish;
79072805 2295
a0d0e21e 2296 s = SvPV_force(sv, len);
21a311ee 2297 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2298 goto finish;
463ee0b2 2299 send = s + len;
dcb21ed6
NC
2300 /* This is relying on the SV being "well formed" with a trailing '\0' */
2301 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2302 s++;
2303 if (s == send)
b3ac6de7 2304 goto finish;
79072805 2305 d = s;
be4731d2 2306 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2307 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2308 }
79072805
LW
2309 while (s < send) {
2310 if (*s == '\\') {
a0d0e21e 2311 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2312 s++; /* all that, just for this */
2313 }
2314 *d++ = *s++;
2315 }
2316 *d = '\0';
95a20fc0 2317 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2318 finish:
3280af22 2319 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2320 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2321 return sv;
2322}
2323
ffb4593c
NT
2324/*
2325 * Now come three functions related to double-quote context,
2326 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2327 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2328 * interact with PL_lex_state, and create fake ( ... ) argument lists
2329 * to handle functions and concatenation.
2330 * They assume that whoever calls them will be setting up a fake
2331 * join call, because each subthing puts a ',' after it. This lets
2332 * "lower \luPpEr"
2333 * become
2334 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2335 *
2336 * (I'm not sure whether the spurious commas at the end of lcfirst's
2337 * arguments and join's arguments are created or not).
2338 */
2339
2340/*
2341 * S_sublex_start
6154021b 2342 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2343 *
2344 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2345 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2346 *
2347 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2348 *
2349 * Everything else becomes a FUNC.
2350 *
2351 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2352 * had an OP_CONST or OP_READLINE). This just sets us up for a
2353 * call to S_sublex_push().
2354 */
2355
76e3520e 2356STATIC I32
cea2e8a9 2357S_sublex_start(pTHX)
79072805 2358{
97aff369 2359 dVAR;
6154021b 2360 register const I32 op_type = pl_yylval.ival;
79072805
LW
2361
2362 if (op_type == OP_NULL) {
6154021b 2363 pl_yylval.opval = PL_lex_op;
5f66b61c 2364 PL_lex_op = NULL;
79072805
LW
2365 return THING;
2366 }
2367 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2368 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2369
2370 if (SvTYPE(sv) == SVt_PVIV) {
2371 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2372 STRLEN len;
96a5add6 2373 const char * const p = SvPV_const(sv, len);
740cce10 2374 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2375 SvREFCNT_dec(sv);
2376 sv = nsv;
4e553d73 2377 }
6154021b 2378 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2379 PL_lex_stuff = NULL;
6f33ba73
RGS
2380 /* Allow <FH> // "foo" */
2381 if (op_type == OP_READLINE)
2382 PL_expect = XTERMORDORDOR;
79072805
LW
2383 return THING;
2384 }
e3f73d4e
RGS
2385 else if (op_type == OP_BACKTICK && PL_lex_op) {
2386 /* readpipe() vas overriden */
2387 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2388 pl_yylval.opval = PL_lex_op;
9b201d7d 2389 PL_lex_op = NULL;
e3f73d4e
RGS
2390 PL_lex_stuff = NULL;
2391 return THING;
2392 }
79072805 2393
3280af22 2394 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2395 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2396 PL_sublex_info.sub_op = PL_lex_op;
2397 PL_lex_state = LEX_INTERPPUSH;
55497cff 2398
3280af22
NIS
2399 PL_expect = XTERM;
2400 if (PL_lex_op) {
6154021b 2401 pl_yylval.opval = PL_lex_op;
5f66b61c 2402 PL_lex_op = NULL;
55497cff 2403 return PMFUNC;
2404 }
2405 else
2406 return FUNC;
2407}
2408
ffb4593c
NT
2409/*
2410 * S_sublex_push
2411 * Create a new scope to save the lexing state. The scope will be
2412 * ended in S_sublex_done. Returns a '(', starting the function arguments
2413 * to the uc, lc, etc. found before.
2414 * Sets PL_lex_state to LEX_INTERPCONCAT.
2415 */
2416
76e3520e 2417STATIC I32
cea2e8a9 2418S_sublex_push(pTHX)
55497cff 2419{
27da23d5 2420 dVAR;
f46d017c 2421 ENTER;
55497cff 2422
3280af22 2423 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2424 SAVEBOOL(PL_lex_dojoin);
3280af22 2425 SAVEI32(PL_lex_brackets);
78cdf107
Z
2426 SAVEI32(PL_lex_allbrackets);
2427 SAVEI8(PL_lex_fakeeof);
3280af22
NIS
2428 SAVEI32(PL_lex_casemods);
2429 SAVEI32(PL_lex_starts);
651b5b28 2430 SAVEI8(PL_lex_state);
7766f137 2431 SAVEVPTR(PL_lex_inpat);
98246f1e 2432 SAVEI16(PL_lex_inwhat);
57843af0 2433 SAVECOPLINE(PL_curcop);
3280af22 2434 SAVEPPTR(PL_bufptr);
8452ff4b 2435 SAVEPPTR(PL_bufend);
3280af22
NIS
2436 SAVEPPTR(PL_oldbufptr);
2437 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2438 SAVEPPTR(PL_last_lop);
2439 SAVEPPTR(PL_last_uni);
3280af22
NIS
2440 SAVEPPTR(PL_linestart);
2441 SAVESPTR(PL_linestr);
8edd5f42
RGS
2442 SAVEGENERICPV(PL_lex_brackstack);
2443 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2444
2445 PL_linestr = PL_lex_stuff;
a0714e2c 2446 PL_lex_stuff = NULL;
3280af22 2447
9cbb5ea2
GS
2448 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2449 = SvPVX(PL_linestr);
3280af22 2450 PL_bufend += SvCUR(PL_linestr);
bd61b366 2451 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2452 SAVEFREESV(PL_linestr);
2453
2454 PL_lex_dojoin = FALSE;
2455 PL_lex_brackets = 0;
78cdf107
Z
2456 PL_lex_allbrackets = 0;
2457 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
a02a5408
JC
2458 Newx(PL_lex_brackstack, 120, char);
2459 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2460 PL_lex_casemods = 0;
2461 *PL_lex_casestack = '\0';
2462 PL_lex_starts = 0;
2463 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2464 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2465
2466 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2467 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2468 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2469 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2470 else
5f66b61c 2471 PL_lex_inpat = NULL;
79072805 2472
55497cff 2473 return '(';
79072805
LW
2474}
2475
ffb4593c
NT
2476/*
2477 * S_sublex_done
2478 * Restores lexer state after a S_sublex_push.
2479 */
2480
76e3520e 2481STATIC I32
cea2e8a9 2482S_sublex_done(pTHX)
79072805 2483{
27da23d5 2484 dVAR;
3280af22 2485 if (!PL_lex_starts++) {
396482e1 2486 SV * const sv = newSVpvs("");
9aa983d2
JH
2487 if (SvUTF8(PL_linestr))
2488 SvUTF8_on(sv);
3280af22 2489 PL_expect = XOPERATOR;
6154021b 2490 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2491 return THING;
2492 }
2493
3280af22
NIS
2494 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2495 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2496 return yylex();
79072805
LW
2497 }
2498
ffb4593c 2499 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2500 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2501 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2502 PL_linestr = PL_lex_repl;
2503 PL_lex_inpat = 0;
2504 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2505 PL_bufend += SvCUR(PL_linestr);
bd61b366 2506 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2507 SAVEFREESV(PL_linestr);
2508 PL_lex_dojoin = FALSE;
2509 PL_lex_brackets = 0;
78cdf107
Z
2510 PL_lex_allbrackets = 0;
2511 PL_lex_fakeeof = LEX_FAKEEOF_NEVER;
3280af22
NIS
2512 PL_lex_casemods = 0;
2513 *PL_lex_casestack = '\0';
2514 PL_lex_starts = 0;
25da4f38 2515 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2516 PL_lex_state = LEX_INTERPNORMAL;
2517 PL_lex_starts++;
e9fa98b2
HS
2518 /* we don't clear PL_lex_repl here, so that we can check later
2519 whether this is an evalled subst; that means we rely on the
2520 logic to ensure sublex_done() is called again only via the
2521 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2522 }
e9fa98b2 2523 else {
3280af22 2524 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2525 PL_lex_repl = NULL;
e9fa98b2 2526 }
79072805 2527 return ',';
ffed7fef
LW
2528 }
2529 else {
5db06880
NC
2530#ifdef PERL_MAD
2531 if (PL_madskills) {
cd81e915
NC
2532 if (PL_thiswhite) {
2533 if (!PL_endwhite)
6b29d1f5 2534 PL_endwhite = newSVpvs("");
cd81e915
NC
2535 sv_catsv(PL_endwhite, PL_thiswhite);
2536 PL_thiswhite = 0;
2537 }
2538 if (PL_thistoken)
76f68e9b 2539 sv_setpvs(PL_thistoken,"");
5db06880 2540 else
cd81e915 2541 PL_realtokenstart = -1;
5db06880
NC
2542 }
2543#endif
f46d017c 2544 LEAVE;
3280af22
NIS
2545 PL_bufend = SvPVX(PL_linestr);
2546 PL_bufend += SvCUR(PL_linestr);
2547 PL_expect = XOPERATOR;
09bef843 2548 PL_sublex_info.sub_inwhat = 0;
79072805 2549 return ')';
ffed7fef
LW
2550 }
2551}
2552
02aa26ce
NT
2553/*
2554 scan_const
2555
2556 Extracts a pattern, double-quoted string, or transliteration. This
2557 is terrifying code.
2558
94def140 2559 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2560 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2561 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2562
94def140
TS
2563 Returns a pointer to the character scanned up to. If this is
2564 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2565 successfully parsed), will leave an OP for the substring scanned
6154021b 2566 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2567 by looking at the next characters herself.
2568
02aa26ce
NT
2569 In patterns:
2570 backslashes:
ff3f963a 2571 constants: \N{NAME} only
02aa26ce
NT
2572 case and quoting: \U \Q \E
2573 stops on @ and $, but not for $ as tail anchor
2574
2575 In transliterations:
2576 characters are VERY literal, except for - not at the start or end
94def140
TS
2577 of the string, which indicates a range. If the range is in bytes,
2578 scan_const expands the range to the full set of intermediate
2579 characters. If the range is in utf8, the hyphen is replaced with
2580 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2581
2582 In double-quoted strings:
2583 backslashes:
2584 double-quoted style: \r and \n
ff3f963a 2585 constants: \x31, etc.
94def140 2586 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2587 case and quoting: \U \Q \E
2588 stops on @ and $
2589
2590 scan_const does *not* construct ops to handle interpolated strings.
2591 It stops processing as soon as it finds an embedded $ or @ variable
2592 and leaves it to the caller to work out what's going on.
2593
94def140
TS
2594 embedded arrays (whether in pattern or not) could be:
2595 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2596
2597 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2598
2599 $ in pattern could be $foo or could be tail anchor. Assumption:
2600 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2601 followed by one of "()| \r\n\t"
02aa26ce
NT
2602
2603 \1 (backreferences) are turned into $1
2604
2605 The structure of the code is
2606 while (there's a character to process) {
94def140
TS
2607 handle transliteration ranges
2608 skip regexp comments /(?#comment)/ and codes /(?{code})/
2609 skip #-initiated comments in //x patterns
2610 check for embedded arrays
02aa26ce
NT
2611 check for embedded scalars
2612 if (backslash) {
94def140 2613 deprecate \1 in substitution replacements
02aa26ce
NT
2614 handle string-changing backslashes \l \U \Q \E, etc.
2615 switch (what was escaped) {
94def140 2616 handle \- in a transliteration (becomes a literal -)
ff3f963a 2617 if a pattern and not \N{, go treat as regular character
94def140
TS
2618 handle \132 (octal characters)
2619 handle \x15 and \x{1234} (hex characters)
ff3f963a 2620 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2621 handle \cV (control characters)
2622 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2623 } (end switch)
77a135fe 2624 continue
02aa26ce 2625 } (end if backslash)
77a135fe 2626 handle regular character
02aa26ce 2627 } (end while character to read)
4e553d73 2628
02aa26ce
NT
2629*/
2630
76e3520e 2631STATIC char *
cea2e8a9 2632S_scan_const(pTHX_ char *start)
79072805 2633{
97aff369 2634 dVAR;
3280af22 2635 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2636 SV *sv = newSV(send - start); /* sv for the constant. See
2637 note below on sizing. */
02aa26ce
NT
2638 register char *s = start; /* start of the constant */
2639 register char *d = SvPVX(sv); /* destination for copies */
2640 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2641 bool didrange = FALSE; /* did we just finish a range? */
b953e60c
KW
2642 bool has_utf8 = FALSE; /* Output constant is UTF8 */
2643 bool this_utf8 = cBOOL(UTF); /* Is the source string assumed
77a135fe
KW
2644 to be UTF8? But, this can
2645 show as true when the source
2646 isn't utf8, as for example
2647 when it is entirely composed
2648 of hex constants */
2649
2650 /* Note on sizing: The scanned constant is placed into sv, which is
2651 * initialized by newSV() assuming one byte of output for every byte of
2652 * input. This routine expects newSV() to allocate an extra byte for a
2653 * trailing NUL, which this routine will append if it gets to the end of
2654 * the input. There may be more bytes of input than output (eg., \N{LATIN
2655 * CAPITAL LETTER A}), or more output than input if the constant ends up
2656 * recoded to utf8, but each time a construct is found that might increase
2657 * the needed size, SvGROW() is called. Its size parameter each time is
2658 * based on the best guess estimate at the time, namely the length used so
2659 * far, plus the length the current construct will occupy, plus room for
2660 * the trailing NUL, plus one byte for every input byte still unscanned */
2661
012bcf8d 2662 UV uv;
4c3a8340
TS
2663#ifdef EBCDIC
2664 UV literal_endpoint = 0;
e294cc5d 2665 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2666#endif
012bcf8d 2667
7918f24d
NC
2668 PERL_ARGS_ASSERT_SCAN_CONST;
2669
bb16bae8 2670 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2671 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2672 /* If we are doing a trans and we know we want UTF8 set expectation */
2673 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2674 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2675 }
2676
2677
79072805 2678 while (s < send || dorange) {
ff3f963a 2679
02aa26ce 2680 /* get transliterations out of the way (they're most literal) */
3280af22 2681 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2682 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2683 if (dorange) {
1ba5c669
JH
2684 I32 i; /* current expanded character */
2685 I32 min; /* first character in range */
2686 I32 max; /* last character in range */
02aa26ce 2687
e294cc5d
JH
2688#ifdef EBCDIC
2689 UV uvmax = 0;
2690#endif
2691
2692 if (has_utf8
2693#ifdef EBCDIC
2694 && !native_range
2695#endif
2696 ) {
9d4ba2ae 2697 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2698 char *e = d++;
2699 while (e-- > c)
2700 *(e + 1) = *e;
25716404 2701 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2702 /* mark the range as done, and continue */
2703 dorange = FALSE;
2704 didrange = TRUE;
2705 continue;
2706 }
2b9d42f0 2707
95a20fc0 2708 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2709#ifdef EBCDIC
2710 SvGROW(sv,
2711 SvLEN(sv) + (has_utf8 ?
2712 (512 - UTF_CONTINUATION_MARK +
2713 UNISKIP(0x100))
2714 : 256));
2715 /* How many two-byte within 0..255: 128 in UTF-8,
2716 * 96 in UTF-8-mod. */
2717#else
9cbb5ea2 2718 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2719#endif
9cbb5ea2 2720 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2721#ifdef EBCDIC
2722 if (has_utf8) {
2723 int j;
2724 for (j = 0; j <= 1; j++) {
2725 char * const c = (char*)utf8_hop((U8*)d, -1);
2726 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2727 if (j)
2728 min = (U8)uv;
2729 else if (uv < 256)
2730 max = (U8)uv;
2731 else {
2732 max = (U8)0xff; /* only to \xff */
2733 uvmax = uv; /* \x{100} to uvmax */
2734 }
2735 d = c; /* eat endpoint chars */
2736 }
2737 }
2738 else {
2739#endif
2740 d -= 2; /* eat the first char and the - */
2741 min = (U8)*d; /* first char in range */
2742 max = (U8)d[1]; /* last char in range */
2743#ifdef EBCDIC
2744 }
2745#endif
8ada0baa 2746
c2e66d9e 2747 if (min > max) {
01ec43d0 2748 Perl_croak(aTHX_
d1573ac7 2749 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2750 (char)min, (char)max);
c2e66d9e
GS
2751 }
2752
c7f1f016 2753#ifdef EBCDIC
4c3a8340
TS
2754 if (literal_endpoint == 2 &&
2755 ((isLOWER(min) && isLOWER(max)) ||
2756 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2757 if (isLOWER(min)) {
2758 for (i = min; i <= max; i++)
2759 if (isLOWER(i))
db42d148 2760 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2761 } else {
2762 for (i = min; i <= max; i++)
2763 if (isUPPER(i))
db42d148 2764 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2765 }
2766 }
2767 else
2768#endif
2769 for (i = min; i <= max; i++)
e294cc5d
JH
2770#ifdef EBCDIC
2771 if (has_utf8) {
2772 const U8 ch = (U8)NATIVE_TO_UTF(i);
2773 if (UNI_IS_INVARIANT(ch))
2774 *d++ = (U8)i;
2775 else {
2776 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2777 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2778 }
2779 }
2780 else
2781#endif
2782 *d++ = (char)i;
2783
2784#ifdef EBCDIC
2785 if (uvmax) {
2786 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2787 if (uvmax > 0x101)
2788 *d++ = (char)UTF_TO_NATIVE(0xff);
2789 if (uvmax > 0x100)
2790 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2791 }
2792#endif
02aa26ce
NT
2793
2794 /* mark the range as done, and continue */
79072805 2795 dorange = FALSE;
01ec43d0 2796 didrange = TRUE;
4c3a8340
TS
2797#ifdef EBCDIC
2798 literal_endpoint = 0;
2799#endif
79072805 2800 continue;
4e553d73 2801 }
02aa26ce
NT
2802
2803 /* range begins (ignore - as first or last char) */
79072805 2804 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2805 if (didrange) {
1fafa243 2806 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2807 }
e294cc5d
JH
2808 if (has_utf8
2809#ifdef EBCDIC
2810 && !native_range
2811#endif
2812 ) {
25716404 2813 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2814 s++;
2815 continue;
2816 }
79072805
LW
2817 dorange = TRUE;
2818 s++;
01ec43d0
GS
2819 }
2820 else {
2821 didrange = FALSE;
4c3a8340
TS
2822#ifdef EBCDIC
2823 literal_endpoint = 0;
e294cc5d 2824 native_range = TRUE;
4c3a8340 2825#endif
01ec43d0 2826 }
79072805 2827 }
02aa26ce
NT
2828
2829 /* if we get here, we're not doing a transliteration */
2830
0f5d15d6
IZ
2831 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2832 except for the last char, which will be done separately. */
3280af22 2833 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2834 if (s[2] == '#') {
e994fd66 2835 while (s+1 < send && *s != ')')
db42d148 2836 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2837 }
2838 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2839 || (s[2] == '?' && s[3] == '{'))
155aba94 2840 {
cc6b7395 2841 I32 count = 1;
0f5d15d6 2842 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2843 char c;
2844
d9f97599
GS
2845 while (count && (c = *regparse)) {
2846 if (c == '\\' && regparse[1])
2847 regparse++;
4e553d73 2848 else if (c == '{')
cc6b7395 2849 count++;
4e553d73 2850 else if (c == '}')
cc6b7395 2851 count--;
d9f97599 2852 regparse++;
cc6b7395 2853 }
e994fd66 2854 if (*regparse != ')')
5bdf89e7 2855 regparse--; /* Leave one char for continuation. */
0f5d15d6 2856 while (s < regparse)
db42d148 2857 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2858 }
748a9306 2859 }
02aa26ce
NT
2860
2861 /* likewise skip #-initiated comments in //x patterns */
3280af22 2862 else if (*s == '#' && PL_lex_inpat &&
73134a2e 2863 ((PMOP*)PL_lex_inpat)->op_pmflags & RXf_PMf_EXTENDED) {
748a9306 2864 while (s+1 < send && *s != '\n')
db42d148 2865 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2866 }
02aa26ce 2867
5d1d4326 2868 /* check for embedded arrays
da6eedaa 2869 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2870 */
1749ea0d
TS
2871 else if (*s == '@' && s[1]) {
2872 if (isALNUM_lazy_if(s+1,UTF))
2873 break;
2874 if (strchr(":'{$", s[1]))
2875 break;
2876 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2877 break; /* in regexp, neither @+ nor @- are interpolated */
2878 }
02aa26ce
NT
2879
2880 /* check for embedded scalars. only stop if we're sure it's a
2881 variable.
2882 */
79072805 2883 else if (*s == '$') {
3280af22 2884 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2885 break;
77772344 2886 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2887 if (s[1] == '\\') {
2888 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2889 "Possible unintended interpolation of $\\ in regex");
77772344 2890 }
79072805 2891 break; /* in regexp, $ might be tail anchor */
77772344 2892 }
79072805 2893 }
02aa26ce 2894
2b9d42f0
NIS
2895 /* End of else if chain - OP_TRANS rejoin rest */
2896
02aa26ce 2897 /* backslashes */
79072805 2898 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2899 char* e; /* Can be used for ending '}', etc. */
2900
79072805 2901 s++;
02aa26ce 2902
7d0fc23c
KW
2903 /* warn on \1 - \9 in substitution replacements, but note that \11
2904 * is an octal; and \19 is \1 followed by '9' */
3280af22 2905 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2906 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2907 {
a2a5de95 2908 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2909 *--s = '$';
2910 break;
2911 }
02aa26ce
NT
2912
2913 /* string-change backslash escapes */
3280af22 2914 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2915 --s;
2916 break;
2917 }
ff3f963a
KW
2918 /* In a pattern, process \N, but skip any other backslash escapes.
2919 * This is because we don't want to translate an escape sequence
2920 * into a meta symbol and have the regex compiler use the meta
2921 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2922 * in spite of this, we do have to process \N here while the proper
2923 * charnames handler is in scope. See bugs #56444 and #62056.
2924 * There is a complication because \N in a pattern may also stand
2925 * for 'match a non-nl', and not mean a charname, in which case its
2926 * processing should be deferred to the regex compiler. To be a
2927 * charname it must be followed immediately by a '{', and not look
2928 * like \N followed by a curly quantifier, i.e., not something like
2929 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2930 * quantifier */
2931 else if (PL_lex_inpat
2932 && (*s != 'N'
2933 || s[1] != '{'
2934 || regcurly(s + 1)))
2935 {
cc74c5bd
TS
2936 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2937 goto default_action;
2938 }
02aa26ce 2939
79072805 2940 switch (*s) {
02aa26ce
NT
2941
2942 /* quoted - in transliterations */
79072805 2943 case '-':
3280af22 2944 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2945 *d++ = *s++;
2946 continue;
2947 }
2948 /* FALL THROUGH */
2949 default:
11b8faa4 2950 {
a2a5de95
NC
2951 if ((isALPHA(*s) || isDIGIT(*s)))
2952 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2953 "Unrecognized escape \\%c passed through",
2954 *s);
11b8faa4 2955 /* default action is to copy the quoted character */
f9a63242 2956 goto default_action;
11b8faa4 2957 }
02aa26ce 2958
632403cc 2959 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2960 case '0': case '1': case '2': case '3':
2961 case '4': case '5': case '6': case '7':
ba210ebe 2962 {
53305cf1
NC
2963 I32 flags = 0;
2964 STRLEN len = 3;
77a135fe 2965 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2966 s += len;
2967 }
012bcf8d 2968 goto NUM_ESCAPE_INSERT;
02aa26ce 2969
f0a2b745
KW
2970 /* eg. \o{24} indicates the octal constant \024 */
2971 case 'o':
2972 {
2973 STRLEN len;
454155d9 2974 const char* error;
f0a2b745 2975
454155d9 2976 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2977 s += len;
454155d9 2978 if (! valid) {
f0a2b745
KW
2979 yyerror(error);
2980 continue;
2981 }
2982 goto NUM_ESCAPE_INSERT;
2983 }
2984
77a135fe 2985 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2986 case 'x':
a0ed51b3
LW
2987 ++s;
2988 if (*s == '{') {
9d4ba2ae 2989 char* const e = strchr(s, '}');
a4c04bdc
NC
2990 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2991 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2992 STRLEN len;
355860ce 2993
53305cf1 2994 ++s;
adaeee49 2995 if (!e) {
a0ed51b3 2996 yyerror("Missing right brace on \\x{}");
355860ce 2997 continue;
ba210ebe 2998 }
53305cf1 2999 len = e - s;
77a135fe 3000 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 3001 s = e + 1;
a0ed51b3
LW
3002 }
3003 else {
ba210ebe 3004 {
53305cf1 3005 STRLEN len = 2;
a4c04bdc 3006 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 3007 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
3008 s += len;
3009 }
012bcf8d
GS
3010 }
3011
3012 NUM_ESCAPE_INSERT:
ff3f963a
KW
3013 /* Insert oct or hex escaped character. There will always be
3014 * enough room in sv since such escapes will be longer than any
3015 * UTF-8 sequence they can end up as, except if they force us
3016 * to recode the rest of the string into utf8 */
ba7cea30 3017
77a135fe 3018 /* Here uv is the ordinal of the next character being added in
ff3f963a 3019 * unicode (converted from native). */
77a135fe 3020 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 3021 if (!has_utf8 && uv > 255) {
77a135fe
KW
3022 /* Might need to recode whatever we have accumulated so
3023 * far if it contains any chars variant in utf8 or
3024 * utf-ebcdic. */
3025
3026 SvCUR_set(sv, d - SvPVX_const(sv));
3027 SvPOK_on(sv);
3028 *d = '\0';
77a135fe 3029 /* See Note on sizing above. */
7bf79863
KW
3030 sv_utf8_upgrade_flags_grow(sv,
3031 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3032 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
3033 d = SvPVX(sv) + SvCUR(sv);
3034 has_utf8 = TRUE;
012bcf8d
GS
3035 }
3036
77a135fe
KW
3037 if (has_utf8) {
3038 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
3039 if (PL_lex_inwhat == OP_TRANS &&
3040 PL_sublex_info.sub_op) {
3041 PL_sublex_info.sub_op->op_private |=
3042 (PL_lex_repl ? OPpTRANS_FROM_UTF
3043 : OPpTRANS_TO_UTF);
f9a63242 3044 }
e294cc5d
JH
3045#ifdef EBCDIC
3046 if (uv > 255 && !dorange)
3047 native_range = FALSE;
3048#endif
012bcf8d 3049 }
a0ed51b3 3050 else {
012bcf8d 3051 *d++ = (char)uv;
a0ed51b3 3052 }
012bcf8d
GS
3053 }
3054 else {
c4d5f83a 3055 *d++ = (char) uv;
a0ed51b3 3056 }
79072805 3057 continue;
02aa26ce 3058
4a2d328f 3059 case 'N':
ff3f963a
KW
3060 /* In a non-pattern \N must be a named character, like \N{LATIN
3061 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3062 * mean to match a non-newline. For non-patterns, named
3063 * characters are converted to their string equivalents. In
3064 * patterns, named characters are not converted to their
3065 * ultimate forms for the same reasons that other escapes
3066 * aren't. Instead, they are converted to the \N{U+...} form
3067 * to get the value from the charnames that is in effect right
3068 * now, while preserving the fact that it was a named character
3069 * so that the regex compiler knows this */
3070
3071 /* This section of code doesn't generally use the
3072 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3073 * a close examination of this macro and determined it is a
3074 * no-op except on utfebcdic variant characters. Every
3075 * character generated by this that would normally need to be
3076 * enclosed by this macro is invariant, so the macro is not
7538f724
KW
3077 * needed, and would complicate use of copy(). XXX There are
3078 * other parts of this file where the macro is used
3079 * inconsistently, but are saved by it being a no-op */
ff3f963a
KW
3080
3081 /* The structure of this section of code (besides checking for
3082 * errors and upgrading to utf8) is:
3083 * Further disambiguate between the two meanings of \N, and if
3084 * not a charname, go process it elsewhere
0a96133f
KW
3085 * If of form \N{U+...}, pass it through if a pattern;
3086 * otherwise convert to utf8
3087 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3088 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3089
3090 /* Here, s points to the 'N'; the test below is guaranteed to
3091 * succeed if we are being called on a pattern as we already
3092 * know from a test above that the next character is a '{'.
3093 * On a non-pattern \N must mean 'named sequence, which
3094 * requires braces */
3095 s++;
3096 if (*s != '{') {
3097 yyerror("Missing braces on \\N{}");
3098 continue;
3099 }
3100 s++;
3101
0a96133f 3102 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3103 if (! (e = strchr(s, '}'))) {
3104 if (! PL_lex_inpat) {
5777a3f7 3105 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3106 } else {
3107 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3108 }
0a96133f 3109 continue;
ff3f963a 3110 }
cddc7ef4 3111
ff3f963a 3112 /* Here it looks like a named character */
cddc7ef4 3113
ff3f963a
KW
3114 if (PL_lex_inpat) {
3115
3116 /* XXX This block is temporary code. \N{} implies that the
3117 * pattern is to have Unicode semantics, and therefore
3118 * currently has to be encoded in utf8. By putting it in
3119 * utf8 now, we save a whole pass in the regular expression
3120 * compiler. Once that code is changed so Unicode
3121 * semantics doesn't necessarily have to be in utf8, this
da3a4baf
KW
3122 * block should be removed. However, the code that parses
3123 * the output of this would have to be changed to not
3124 * necessarily expect utf8 */
ff3f963a 3125 if (!has_utf8) {
77a135fe 3126 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3127 SvPOK_on(sv);
e4f3eed8 3128 *d = '\0';
77a135fe 3129 /* See Note on sizing above. */
7bf79863 3130 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3131 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3132 /* 5 = '\N{' + cur char + NUL */
3133 (STRLEN)(send - s) + 5);
f08d6ad9 3134 d = SvPVX(sv) + SvCUR(sv);
89491803 3135 has_utf8 = TRUE;
ff3f963a
KW
3136 }
3137 }
423cee85 3138
ff3f963a
KW
3139 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3140 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3141 | PERL_SCAN_DISALLOW_PREFIX;
3142 STRLEN len;
3143
3144 /* For \N{U+...}, the '...' is a unicode value even on
3145 * EBCDIC machines */
3146 s += 2; /* Skip to next char after the 'U+' */
3147 len = e - s;
3148 uv = grok_hex(s, &len, &flags, NULL);
3149 if (len == 0 || len != (STRLEN)(e - s)) {
3150 yyerror("Invalid hexadecimal number in \\N{U+...}");
3151 s = e + 1;
3152 continue;
3153 }
3154
3155 if (PL_lex_inpat) {
3156
e2a7e165
KW
3157 /* On non-EBCDIC platforms, pass through to the regex
3158 * compiler unchanged. The reason we evaluated the
3159 * number above is to make sure there wasn't a syntax
3160 * error. But on EBCDIC we convert to native so
3161 * downstream code can continue to assume it's native
3162 */
ff3f963a 3163 s -= 5; /* Include the '\N{U+' */
e2a7e165
KW
3164#ifdef EBCDIC
3165 d += my_snprintf(d, e - s + 1 + 1, /* includes the }
3166 and the \0 */
3167 "\\N{U+%X}",
3168 (unsigned int) UNI_TO_NATIVE(uv));
3169#else
ff3f963a
KW
3170 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3171 d += e - s + 1;
e2a7e165 3172#endif
ff3f963a
KW
3173 }
3174 else { /* Not a pattern: convert the hex to string */
3175
3176 /* If destination is not in utf8, unconditionally
3177 * recode it to be so. This is because \N{} implies
3178 * Unicode semantics, and scalars have to be in utf8
3179 * to guarantee those semantics */
3180 if (! has_utf8) {
3181 SvCUR_set(sv, d - SvPVX_const(sv));
3182 SvPOK_on(sv);
3183 *d = '\0';
3184 /* See Note on sizing above. */
3185 sv_utf8_upgrade_flags_grow(
3186 sv,
3187 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3188 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3189 d = SvPVX(sv) + SvCUR(sv);
3190 has_utf8 = TRUE;
3191 }
3192
3193 /* Add the string to the output */
3194 if (UNI_IS_INVARIANT(uv)) {
3195 *d++ = (char) uv;
3196 }
3197 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3198 }
3199 }
3200 else { /* Here is \N{NAME} but not \N{U+...}. */
3201
3202 SV *res; /* result from charnames */
3203 const char *str; /* the string in 'res' */
3204 STRLEN len; /* its length */
3205
3206 /* Get the value for NAME */
3207 res = newSVpvn(s, e - s);
3208 res = new_constant( NULL, 0, "charnames",
3209 /* includes all of: \N{...} */
3210 res, NULL, s - 3, e - s + 4 );
3211
3212 /* Most likely res will be in utf8 already since the
3213 * standard charnames uses pack U, but a custom translator
3214 * can leave it otherwise, so make sure. XXX This can be
3215 * revisited to not have charnames use utf8 for characters
3216 * that don't need it when regexes don't have to be in utf8
3217 * for Unicode semantics. If doing so, remember EBCDIC */
3218 sv_utf8_upgrade(res);
3219 str = SvPV_const(res, len);
3220
3221 /* Don't accept malformed input */
3222 if (! is_utf8_string((U8 *) str, len)) {
3223 yyerror("Malformed UTF-8 returned by \\N");
3224 }
3225 else if (PL_lex_inpat) {
3226
3227 if (! len) { /* The name resolved to an empty string */
3228 Copy("\\N{}", d, 4, char);
3229 d += 4;
3230 }
3231 else {
3232 /* In order to not lose information for the regex
3233 * compiler, pass the result in the specially made
3234 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3235 * the code points in hex of each character
3236 * returned by charnames */
3237
3238 const char *str_end = str + len;
3239 STRLEN char_length; /* cur char's byte length */
3240 STRLEN output_length; /* and the number of bytes
3241 after this is translated
3242 into hex digits */
3243 const STRLEN off = d - SvPVX_const(sv);
3244
3245 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3246 * max('U+', '.'); and 1 for NUL */
3247 char hex_string[2 * UTF8_MAXBYTES + 5];
3248
3249 /* Get the first character of the result. */
3250 U32 uv = utf8n_to_uvuni((U8 *) str,
3251 len,
3252 &char_length,
3253 UTF8_ALLOW_ANYUV);
3254
3255 /* The call to is_utf8_string() above hopefully
3256 * guarantees that there won't be an error. But
3257 * it's easy here to make sure. The function just
3258 * above warns and returns 0 if invalid utf8, but
3259 * it can also return 0 if the input is validly a
3260 * NUL. Disambiguate */
3261 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3262 uv = UNICODE_REPLACEMENT;
3263 }
3264
3265 /* Convert first code point to hex, including the
e2a7e165
KW
3266 * boiler plate before it. For all these, we
3267 * convert to native format so that downstream code
3268 * can continue to assume the input is native */
78c35590 3269 output_length =
3353de27 3270 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3271 "\\N{U+%X",
3272 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3273
3274 /* Make sure there is enough space to hold it */
3275 d = off + SvGROW(sv, off
3276 + output_length
3277 + (STRLEN)(send - e)
3278 + 2); /* '}' + NUL */
3279 /* And output it */
3280 Copy(hex_string, d, output_length, char);
3281 d += output_length;
3282
3283 /* For each subsequent character, append dot and
3284 * its ordinal in hex */
3285 while ((str += char_length) < str_end) {
3286 const STRLEN off = d - SvPVX_const(sv);
3287 U32 uv = utf8n_to_uvuni((U8 *) str,
3288 str_end - str,
3289 &char_length,
3290 UTF8_ALLOW_ANYUV);
3291 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3292 uv = UNICODE_REPLACEMENT;
3293 }
3294
78c35590 3295 output_length =
3353de27 3296 my_snprintf(hex_string, sizeof(hex_string),
e2a7e165
KW
3297 ".%X",
3298 (unsigned int) UNI_TO_NATIVE(uv));
ff3f963a
KW
3299
3300 d = off + SvGROW(sv, off
3301 + output_length
3302 + (STRLEN)(send - e)
3303 + 2); /* '}' + NUL */
3304 Copy(hex_string, d, output_length, char);
3305 d += output_length;
3306 }
3307
3308 *d++ = '}'; /* Done. Add the trailing brace */
3309 }
3310 }
3311 else { /* Here, not in a pattern. Convert the name to a
3312 * string. */
3313
3314 /* If destination is not in utf8, unconditionally
3315 * recode it to be so. This is because \N{} implies
3316 * Unicode semantics, and scalars have to be in utf8
3317 * to guarantee those semantics */
3318 if (! has_utf8) {
3319 SvCUR_set(sv, d - SvPVX_const(sv));
3320 SvPOK_on(sv);
3321 *d = '\0';
3322 /* See Note on sizing above. */
3323 sv_utf8_upgrade_flags_grow(sv,
3324 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3325 len + (STRLEN)(send - s) + 1);
3326 d = SvPVX(sv) + SvCUR(sv);
3327 has_utf8 = TRUE;
3328 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3329
3330 /* See Note on sizing above. (NOTE: SvCUR() is not
3331 * set correctly here). */
3332 const STRLEN off = d - SvPVX_const(sv);
3333 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3334 }
3335 Copy(str, d, len, char);
3336 d += len;
423cee85 3337 }
423cee85 3338 SvREFCNT_dec(res);
cb233ae3
KW
3339
3340 /* Deprecate non-approved name syntax */
3341 if (ckWARN_d(WARN_DEPRECATED)) {
3342 bool problematic = FALSE;
3343 char* i = s;
3344
3345 /* For non-ut8 input, look to see that the first
3346 * character is an alpha, then loop through the rest
3347 * checking that each is a continuation */
3348 if (! this_utf8) {
3349 if (! isALPHAU(*i)) problematic = TRUE;
3350 else for (i = s + 1; i < e; i++) {
3351 if (isCHARNAME_CONT(*i)) continue;
3352 problematic = TRUE;
3353 break;
3354 }
3355 }
3356 else {
3357 /* Similarly for utf8. For invariants can check
3358 * directly. We accept anything above the latin1
3359 * range because it is immaterial to Perl if it is
3360 * correct or not, and is expensive to check. But
3361 * it is fairly easy in the latin1 range to convert
3362 * the variants into a single character and check
3363 * those */
3364 if (UTF8_IS_INVARIANT(*i)) {
3365 if (! isALPHAU(*i)) problematic = TRUE;
3366 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
81c14aa2 3367 if (! isALPHAU(UNI_TO_NATIVE(TWO_BYTE_UTF8_TO_UNI(*i,
cb233ae3
KW
3368 *(i+1)))))
3369 {
3370 problematic = TRUE;
3371 }
3372 }
3373 if (! problematic) for (i = s + UTF8SKIP(s);
3374 i < e;
3375 i+= UTF8SKIP(i))
3376 {
3377 if (UTF8_IS_INVARIANT(*i)) {
3378 if (isCHARNAME_CONT(*i)) continue;
3379 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3380 continue;
3381 } else if (isCHARNAME_CONT(
3382 UNI_TO_NATIVE(
81c14aa2 3383 TWO_BYTE_UTF8_TO_UNI(*i, *(i+1)))))
cb233ae3
KW
3384 {
3385 continue;
3386 }
3387 problematic = TRUE;
3388 break;
3389 }
3390 }
3391 if (problematic) {
6e1bad6c
KW
3392 /* The e-i passed to the final %.*s makes sure that
3393 * should the trailing NUL be missing that this
3394 * print won't run off the end of the string */
cb233ae3 3395 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3396 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3397 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3398 }
3399 }
3400 } /* End \N{NAME} */
ff3f963a
KW
3401#ifdef EBCDIC
3402 if (!dorange)
3403 native_range = FALSE; /* \N{} is defined to be Unicode */
3404#endif
3405 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3406 continue;
3407
02aa26ce 3408 /* \c is a control character */
79072805
LW
3409 case 'c':
3410 s++;
961ce445 3411 if (s < send) {
17a3df4c 3412 *d++ = grok_bslash_c(*s++, has_utf8, 1);
ba210ebe 3413 }
961ce445
RGS
3414 else {
3415 yyerror("Missing control char name in \\c");
3416 }
79072805 3417 continue;
02aa26ce
NT
3418
3419 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3420 case 'b':
db42d148 3421 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3422 break;
3423 case 'n':
db42d148 3424 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3425 break;
3426 case 'r':
db42d148 3427 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3428 break;
3429 case 'f':
db42d148 3430 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3431 break;
3432 case 't':
db42d148 3433 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3434 break;
34a3fe2a 3435 case 'e':
db42d148 3436 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3437 break;
3438 case 'a':
db42d148 3439 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3440 break;
02aa26ce
NT
3441 } /* end switch */
3442
79072805
LW
3443 s++;
3444 continue;
02aa26ce 3445 } /* end if (backslash) */
4c3a8340
TS
3446#ifdef EBCDIC
3447 else
3448 literal_endpoint++;
3449#endif
02aa26ce 3450
f9a63242 3451 default_action:
77a135fe
KW
3452 /* If we started with encoded form, or already know we want it,
3453 then encode the next character */
3454 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3455 STRLEN len = 1;
77a135fe
KW
3456
3457
3458 /* One might think that it is wasted effort in the case of the
3459 * source being utf8 (this_utf8 == TRUE) to take the next character
3460 * in the source, convert it to an unsigned value, and then convert
3461 * it back again. But the source has not been validated here. The
3462 * routine that does the conversion checks for errors like
3463 * malformed utf8 */
3464
5f66b61c
AL
3465 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3466 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3467 if (!has_utf8) {
3468 SvCUR_set(sv, d - SvPVX_const(sv));
3469 SvPOK_on(sv);
3470 *d = '\0';
77a135fe 3471 /* See Note on sizing above. */
7bf79863
KW
3472 sv_utf8_upgrade_flags_grow(sv,
3473 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3474 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3475 d = SvPVX(sv) + SvCUR(sv);
3476 has_utf8 = TRUE;
3477 } else if (need > len) {
3478 /* encoded value larger than old, may need extra space (NOTE:
3479 * SvCUR() is not set correctly here). See Note on sizing
3480 * above. */
9d4ba2ae 3481 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3482 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3483 }
77a135fe
KW
3484 s += len;
3485
5f66b61c 3486 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3487#ifdef EBCDIC
3488 if (uv > 255 && !dorange)
3489 native_range = FALSE;
3490#endif
2b9d42f0
NIS
3491 }
3492 else {
3493 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3494 }
02aa26ce
NT
3495 } /* while loop to process each character */
3496
3497 /* terminate the string and set up the sv */
79072805 3498 *d = '\0';
95a20fc0 3499 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3500 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3501 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3502
79072805 3503 SvPOK_on(sv);
9f4817db 3504 if (PL_encoding && !has_utf8) {
d0063567
DK
3505 sv_recode_to_utf8(sv, PL_encoding);
3506 if (SvUTF8(sv))
3507 has_utf8 = TRUE;
9f4817db 3508 }
2b9d42f0 3509 if (has_utf8) {
7e2040f0 3510 SvUTF8_on(sv);
2b9d42f0 3511 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3512 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3513 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3514 }
3515 }
79072805 3516
02aa26ce 3517 /* shrink the sv if we allocated more than we used */
79072805 3518 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3519 SvPV_shrink_to_cur(sv);
79072805 3520 }
02aa26ce 3521
6154021b 3522 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3523 if (s > PL_bufptr) {
eb0d8d16
NC
3524 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3525 const char *const key = PL_lex_inpat ? "qr" : "q";
3526 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3527 const char *type;
3528 STRLEN typelen;
3529
3530 if (PL_lex_inwhat == OP_TRANS) {
3531 type = "tr";
3532 typelen = 2;
3533 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3534 type = "s";
3535 typelen = 1;
3536 } else {
3537 type = "qq";
3538 typelen = 2;
3539 }
3540
3541 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3542 type, typelen);
3543 }
6154021b 3544 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3545 } else
8990e307 3546 SvREFCNT_dec(sv);
79072805
LW
3547 return s;
3548}
3549
ffb4593c
NT
3550/* S_intuit_more
3551 * Returns TRUE if there's more to the expression (e.g., a subscript),
3552 * FALSE otherwise.
ffb4593c
NT
3553 *
3554 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3555 *
3556 * ->[ and ->{ return TRUE
3557 * { and [ outside a pattern are always subscripts, so return TRUE
3558 * if we're outside a pattern and it's not { or [, then return FALSE
3559 * if we're in a pattern and the first char is a {
3560 * {4,5} (any digits around the comma) returns FALSE
3561 * if we're in a pattern and the first char is a [
3562 * [] returns FALSE
3563 * [SOMETHING] has a funky algorithm to decide whether it's a
3564 * character class or not. It has to deal with things like
3565 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3566 * anything else returns TRUE
3567 */
3568
9cbb5ea2
GS
3569/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3570
76e3520e 3571STATIC int
cea2e8a9 3572S_intuit_more(pTHX_ register char *s)
79072805 3573{
97aff369 3574 dVAR;
7918f24d
NC
3575
3576 PERL_ARGS_ASSERT_INTUIT_MORE;
3577
3280af22 3578 if (PL_lex_brackets)
79072805
LW
3579 return TRUE;
3580 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3581 return TRUE;
3582 if (*s != '{' && *s != '[')
3583 return FALSE;
3280af22 3584 if (!PL_lex_inpat)
79072805
LW
3585 return TRUE;
3586
3587 /* In a pattern, so maybe we have {n,m}. */
3588 if (*s == '{') {
b3155d95 3589 if (regcurly(s)) {
79072805 3590 return FALSE;
b3155d95 3591 }
79072805 3592 return TRUE;
79072805
LW
3593 }
3594
3595 /* On the other hand, maybe we have a character class */
3596
3597 s++;
3598 if (*s == ']' || *s == '^')
3599 return FALSE;
3600 else {
ffb4593c 3601 /* this is terrifying, and it works */
79072805
LW
3602 int weight = 2; /* let's weigh the evidence */
3603 char seen[256];
f27ffc4a 3604 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3605 const char * const send = strchr(s,']');
3280af22 3606 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3607
3608 if (!send) /* has to be an expression */
3609 return TRUE;
3610
3611 Zero(seen,256,char);
3612 if (*s == '$')
3613 weight -= 3;
3614 else if (isDIGIT(*s)) {
3615 if (s[1] != ']') {
3616 if (isDIGIT(s[1]) && s[2] == ']')
3617 weight -= 10;
3618 }
3619 else
3620 weight -= 100;
3621 }
3622 for (; s < send; s++) {
3623 last_un_char = un_char;
3624 un_char = (unsigned char)*s;
3625 switch (*s) {
3626 case '@':
3627 case '&':
3628 case '$':
3629 weight -= seen[un_char] * 10;
7e2040f0 3630 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3631 int len;
8903cb82 3632 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e 3633 len = (int)strlen(tmpbuf);
6fbd0d97
BF
3634 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len,
3635 UTF ? SVf_UTF8 : 0, SVt_PV))
79072805
LW
3636 weight -= 100;
3637 else
3638 weight -= 10;
3639 }
3640 else if (*s == '$' && s[1] &&
93a17b20
LW
3641 strchr("[#!%*<>()-=",s[1])) {
3642 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3643 weight -= 10;
3644 else
3645 weight -= 1;
3646 }
3647 break;
3648 case '\\':
3649 un_char = 254;
3650 if (s[1]) {
93a17b20 3651 if (strchr("wds]",s[1]))
79072805 3652 weight += 100;
10edeb5d 3653 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3654 weight += 1;
93a17b20 3655 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3656 weight += 40;
3657 else if (isDIGIT(s[1])) {
3658 weight += 40;
3659 while (s[1] && isDIGIT(s[1]))
3660 s++;
3661 }
3662 }
3663 else
3664 weight += 100;
3665 break;
3666 case '-':
3667 if (s[1] == '\\')
3668 weight += 50;
93a17b20 3669 if (strchr("aA01! ",last_un_char))
79072805 3670 weight += 30;
93a17b20 3671 if (strchr("zZ79~",s[1]))
79072805 3672 weight += 30;
f27ffc4a
GS
3673 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3674 weight -= 5; /* cope with negative subscript */
79072805
LW
3675 break;
3676 default:
3792a11b
NC
3677 if (!isALNUM(last_un_char)
3678 && !(last_un_char == '$' || last_un_char == '@'
3679 || last_un_char == '&')
3680 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3681 char *d = tmpbuf;
3682 while (isALPHA(*s))
3683 *d++ = *s++;
3684 *d = '\0';
5458a98a 3685 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3686 weight -= 150;
3687 }
3688 if (un_char == last_un_char + 1)
3689 weight += 5;
3690 weight -= seen[un_char];
3691 break;
3692 }
3693 seen[un_char]++;
3694 }
3695 if (weight >= 0) /* probably a character class */
3696 return FALSE;
3697 }
3698
3699 return TRUE;
3700}
ffed7fef 3701
ffb4593c
NT
3702/*
3703 * S_intuit_method
3704 *
3705 * Does all the checking to disambiguate
3706 * foo bar
3707 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3708 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3709 *
3710 * First argument is the stuff after the first token, e.g. "bar".
3711 *
3712 * Not a method if bar is a filehandle.
3713 * Not a method if foo is a subroutine prototyped to take a filehandle.
3714 * Not a method if it's really "Foo $bar"
3715 * Method if it's "foo $bar"
3716 * Not a method if it's really "print foo $bar"
3717 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3718 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3719 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3720 * =>
3721 */
3722
76e3520e 3723STATIC int
62d55b22 3724S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3725{
97aff369 3726 dVAR;
a0d0e21e 3727 char *s = start + (*start == '$');
3280af22 3728 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3729 STRLEN len;
3730 GV* indirgv;
5db06880
NC
3731#ifdef PERL_MAD
3732 int soff;
3733#endif
a0d0e21e 3734
7918f24d
NC
3735 PERL_ARGS_ASSERT_INTUIT_METHOD;
3736
a0d0e21e 3737 if (gv) {
62d55b22 3738 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3739 return 0;
62d55b22
NC
3740 if (cv) {
3741 if (SvPOK(cv)) {
3742 const char *proto = SvPVX_const(cv);
3743 if (proto) {
3744 if (*proto == ';')
3745 proto++;
3746 if (*proto == '*')
3747 return 0;
3748 }
b6c543e3
IZ
3749 }
3750 } else
c35e046a 3751 gv = NULL;
a0d0e21e 3752 }
8903cb82 3753 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3754 /* start is the beginning of the possible filehandle/object,
3755 * and s is the end of it
3756 * tmpbuf is a copy of it
3757 */
3758
a0d0e21e 3759 if (*start == '$') {
3ef1310e
RGS
3760 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3761 isUPPER(*PL_tokenbuf))
a0d0e21e 3762 return 0;
5db06880
NC
3763#ifdef PERL_MAD
3764 len = start - SvPVX(PL_linestr);
3765#endif
29595ff2 3766 s = PEEKSPACE(s);
f0092767 3767#ifdef PERL_MAD
5db06880
NC
3768 start = SvPVX(PL_linestr) + len;
3769#endif
3280af22
NIS
3770 PL_bufptr = start;
3771 PL_expect = XREF;
a0d0e21e
LW
3772 return *s == '(' ? FUNCMETH : METHOD;
3773 }
5458a98a 3774 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3775 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3776 len -= 2;
3777 tmpbuf[len] = '\0';
5db06880
NC
3778#ifdef PERL_MAD
3779 soff = s - SvPVX(PL_linestr);
3780#endif
c3e0f903
GS
3781 goto bare_package;
3782 }
38d2cf30 3783 indirgv = gv_fetchpvn_flags(tmpbuf, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
8ebc5c01 3784 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3785 return 0;
3786 /* filehandle or package name makes it a method */
38d2cf30 3787 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, UTF ? SVf_UTF8 : 0)) {
5db06880
NC
3788#ifdef PERL_MAD
3789 soff = s - SvPVX(PL_linestr);
3790#endif
29595ff2 3791 s = PEEKSPACE(s);
3280af22 3792 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
486ec47a 3793 return 0; /* no assumptions -- "=>" quotes bareword */
c3e0f903 3794 bare_package:
cd81e915 3795 start_force(PL_curforce);
9ded7720 3796 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3797 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3798 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880 3799 if (PL_madskills)
38d2cf30
BF
3800 curmad('X', newSVpvn_flags(start,SvPVX(PL_linestr) + soff - start,
3801 ( UTF ? SVf_UTF8 : 0 )));
3280af22 3802 PL_expect = XTERM;
a0d0e21e 3803 force_next(WORD);
3280af22 3804 PL_bufptr = s;
5db06880
NC
3805#ifdef PERL_MAD
3806 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3807#endif
a0d0e21e
LW
3808 return *s == '(' ? FUNCMETH : METHOD;
3809 }
3810 }
3811 return 0;
3812}
3813
16d20bd9 3814/* Encoded script support. filter_add() effectively inserts a
4e553d73 3815 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3816 * Note that the filter function only applies to the current source file
3817 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3818 *
3819 * The datasv parameter (which may be NULL) can be used to pass
3820 * private data to this instance of the filter. The filter function
3821 * can recover the SV using the FILTER_DATA macro and use it to
3822 * store private buffers and state information.
3823 *
3824 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3825 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3826 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3827 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3828 * private use must be set using malloc'd pointers.
3829 */
16d20bd9
AD
3830
3831SV *
864dbfa3 3832Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3833{
97aff369 3834 dVAR;
f4c556ac 3835 if (!funcp)
a0714e2c 3836 return NULL;
f4c556ac 3837
5486870f
DM
3838 if (!PL_parser)
3839 return NULL;
3840
3280af22
NIS
3841 if (!PL_rsfp_filters)
3842 PL_rsfp_filters = newAV();
16d20bd9 3843 if (!datasv)
561b68a9 3844 datasv = newSV(0);
862a34c6 3845 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3846 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3847 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3848 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3849 FPTR2DPTR(void *, IoANY(datasv)),
3850 SvPV_nolen(datasv)));
3280af22
NIS
3851 av_unshift(PL_rsfp_filters, 1);
3852 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3853 return(datasv);
3854}
4e553d73 3855
16d20bd9
AD
3856
3857/* Delete most recently added instance of this filter function. */
a0d0e21e 3858void
864dbfa3 3859Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3860{
97aff369 3861 dVAR;
e0c19803 3862 SV *datasv;
24801a4b 3863
7918f24d
NC
3864 PERL_ARGS_ASSERT_FILTER_DEL;
3865
33073adb 3866#ifdef DEBUGGING
55662e27
JH
3867 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3868 FPTR2DPTR(void*, funcp)));
33073adb 3869#endif
5486870f 3870 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3871 return;
3872 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3873 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3874 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3875 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3876
16d20bd9
AD
3877 return;
3878 }
3879 /* we need to search for the correct entry and clear it */
cea2e8a9 3880 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3881}
3882
3883
1de9afcd
RGS
3884/* Invoke the idxth filter function for the current rsfp. */
3885/* maxlen 0 = read one text line */
16d20bd9 3886I32
864dbfa3 3887Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3888{
97aff369 3889 dVAR;
16d20bd9
AD
3890 filter_t funcp;
3891 SV *datasv = NULL;
f482118e
NC
3892 /* This API is bad. It should have been using unsigned int for maxlen.
3893 Not sure if we want to change the API, but if not we should sanity
3894 check the value here. */
39cd7a59
NC
3895 const unsigned int correct_length
3896 = maxlen < 0 ?
3897#ifdef PERL_MICRO
3898 0x7FFFFFFF
3899#else
3900 INT_MAX
3901#endif
3902 : maxlen;
e50aee73 3903
7918f24d
NC
3904 PERL_ARGS_ASSERT_FILTER_READ;
3905
5486870f 3906 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3907 return -1;
1de9afcd 3908 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3909 /* Provide a default input filter to make life easy. */
3910 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3911 DEBUG_P(PerlIO_printf(Perl_debug_log,
3912 "filter_read %d: from rsfp\n", idx));
f482118e 3913 if (correct_length) {
16d20bd9
AD
3914 /* Want a block */
3915 int len ;
f54cb97a 3916 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3917
3918 /* ensure buf_sv is large enough */
881d8f0a 3919 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3920 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3921 correct_length)) <= 0) {
3280af22 3922 if (PerlIO_error(PL_rsfp))
37120919
AD
3923 return -1; /* error */
3924 else
3925 return 0 ; /* end of file */
3926 }
16d20bd9 3927 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3928 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3929 } else {
3930 /* Want a line */
3280af22
NIS
3931 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3932 if (PerlIO_error(PL_rsfp))
37120919
AD
3933 return -1; /* error */
3934 else
3935 return 0 ; /* end of file */
3936 }
16d20bd9
AD
3937 }
3938 return SvCUR(buf_sv);
3939 }
3940 /* Skip this filter slot if filter has been deleted */
1de9afcd 3941 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3942 DEBUG_P(PerlIO_printf(Perl_debug_log,
3943 "filter_read %d: skipped (filter deleted)\n",
3944 idx));
f482118e 3945 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3946 }
3947 /* Get function pointer hidden within datasv */
8141890a 3948 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3949 DEBUG_P(PerlIO_printf(Perl_debug_log,
3950 "filter_read %d: via function %p (%s)\n",
ca0270c4 3951 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3952 /* Call function. The function is expected to */
3953 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3954 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3955 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3956}
3957
76e3520e 3958STATIC char *
5cc814fd 3959S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3960{
97aff369 3961 dVAR;
7918f24d
NC
3962
3963 PERL_ARGS_ASSERT_FILTER_GETS;
3964
c39cd008 3965#ifdef PERL_CR_FILTER
3280af22 3966 if (!PL_rsfp_filters) {
c39cd008 3967 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3968 }
3969#endif
3280af22 3970 if (PL_rsfp_filters) {
55497cff 3971 if (!append)
3972 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3973 if (FILTER_READ(0, sv, 0) > 0)
3974 return ( SvPVX(sv) ) ;
3975 else
bd61b366 3976 return NULL ;
16d20bd9 3977 }
9d116dd7 3978 else
5cc814fd 3979 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3980}
3981
01ec43d0 3982STATIC HV *
9bde8eb0 3983S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3984{
97aff369 3985 dVAR;
def3634b
GS
3986 GV *gv;
3987
7918f24d
NC
3988 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3989
01ec43d0 3990 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3991 return PL_curstash;
3992
3993 if (len > 2 &&
3994 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
acc6da14 3995 (gv = gv_fetchpvn_flags(pkgname, len, ( UTF ? SVf_UTF8 : 0 ), SVt_PVHV)))
01ec43d0
GS
3996 {
3997 return GvHV(gv); /* Foo:: */
def3634b
GS
3998 }
3999
4000 /* use constant CLASS => 'MyClass' */
acc6da14 4001 gv = gv_fetchpvn_flags(pkgname, len, UTF ? SVf_UTF8 : 0, SVt_PVCV);
c35e046a
AL
4002 if (gv && GvCV(gv)) {
4003 SV * const sv = cv_const_sv(GvCV(gv));
4004 if (sv)
9bde8eb0 4005 pkgname = SvPV_const(sv, len);
def3634b
GS
4006 }
4007
acc6da14 4008 return gv_stashpvn(pkgname, len, UTF ? SVf_UTF8 : 0);
def3634b 4009}
a0d0e21e 4010
e3f73d4e
RGS
4011/*
4012 * S_readpipe_override
486ec47a 4013 * Check whether readpipe() is overridden, and generates the appropriate
e3f73d4e
RGS
4014 * optree, provided sublex_start() is called afterwards.
4015 */
4016STATIC void
1d51329b 4017S_readpipe_override(pTHX)
e3f73d4e
RGS
4018{
4019 GV **gvp;
4020 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 4021 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
4022 if ((gv_readpipe
4023 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
4024 ||
4025 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 4026 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
4027 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
4028 {
4029 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 4030 op_append_elem(OP_LIST,
e3f73d4e
RGS
4031 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
4032 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
4033 }
e3f73d4e
RGS
4034}
4035
5db06880
NC
4036#ifdef PERL_MAD
4037 /*
4038 * Perl_madlex
4039 * The intent of this yylex wrapper is to minimize the changes to the
4040 * tokener when we aren't interested in collecting madprops. It remains
4041 * to be seen how successful this strategy will be...
4042 */
4043
4044int
4045Perl_madlex(pTHX)
4046{
4047 int optype;
4048 char *s = PL_bufptr;
4049
cd81e915
NC
4050 /* make sure PL_thiswhite is initialized */
4051 PL_thiswhite = 0;
4052 PL_thismad = 0;
5db06880 4053
cd81e915 4054 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 4055 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
4056 return S_pending_ident(aTHX);
4057
4058 /* previous token ate up our whitespace? */
cd81e915
NC
4059 if (!PL_lasttoke && PL_nextwhite) {
4060 PL_thiswhite = PL_nextwhite;
4061 PL_nextwhite = 0;
5db06880
NC
4062 }
4063
4064 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
4065 PL_realtokenstart = -1;
4066 PL_thistoken = 0;
5db06880
NC
4067 optype = yylex();
4068 s = PL_bufptr;
cd81e915 4069 assert(PL_curforce < 0);
5db06880 4070
cd81e915
NC
4071 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4072 if (!PL_thistoken) {
4073 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4074 PL_thistoken = newSVpvs("");
5db06880 4075 else {
c35e046a 4076 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4077 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4078 }
4079 }
cd81e915
NC
4080 if (PL_thismad) /* install head */
4081 CURMAD('X', PL_thistoken);
5db06880
NC
4082 }
4083
4084 /* last whitespace of a sublex? */
cd81e915
NC
4085 if (optype == ')' && PL_endwhite) {
4086 CURMAD('X', PL_endwhite);
5db06880
NC
4087 }
4088
cd81e915 4089 if (!PL_thismad) {
5db06880
NC
4090
4091 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4092 if (!PL_thiswhite && !PL_endwhite && !optype) {
4093 sv_free(PL_thistoken);
4094 PL_thistoken = 0;
5db06880
NC
4095 return 0;
4096 }
4097
4098 /* put off final whitespace till peg */
4099 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4100 PL_nextwhite = PL_thiswhite;
4101 PL_thiswhite = 0;
5db06880 4102 }
cd81e915
NC
4103 else if (PL_thisopen) {
4104 CURMAD('q', PL_thisopen);
4105 if (PL_thistoken)
4106 sv_free(PL_thistoken);
4107 PL_thistoken = 0;
5db06880
NC
4108 }
4109 else {
4110 /* Store actual token text as madprop X */
cd81e915 4111 CURMAD('X', PL_thistoken);
5db06880
NC
4112 }
4113
cd81e915 4114 if (PL_thiswhite) {
5db06880 4115 /* add preceding whitespace as madprop _ */
cd81e915 4116 CURMAD('_', PL_thiswhite);
5db06880
NC
4117 }
4118
cd81e915 4119 if (PL_thisstuff) {
5db06880 4120 /* add quoted material as madprop = */
cd81e915 4121 CURMAD('=', PL_thisstuff);
5db06880
NC
4122 }
4123
cd81e915 4124 if (PL_thisclose) {
5db06880 4125 /* add terminating quote as madprop Q */
cd81e915 4126 CURMAD('Q', PL_thisclose);
5db06880
NC
4127 }
4128 }
4129
4130 /* special processing based on optype */
4131
4132 switch (optype) {
4133
4134 /* opval doesn't need a TOKEN since it can already store mp */
4135 case WORD:
4136 case METHOD:
4137 case FUNCMETH:
4138 case THING:
4139 case PMFUNC:
4140 case PRIVATEREF:
4141 case FUNC0SUB:
4142 case UNIOPSUB:
4143 case LSTOPSUB:
6154021b
RGS
4144 if (pl_yylval.opval)
4145 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4146 PL_thismad = 0;
5db06880
NC
4147 return optype;
4148
4149 /* fake EOF */
4150 case 0:
4151 optype = PEG;
cd81e915
NC
4152 if (PL_endwhite) {
4153 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4154 PL_endwhite = 0;
5db06880
NC
4155 }
4156 break;
4157
4158 case ']':
4159 case '}':
cd81e915 4160 if (PL_faketokens)
5db06880
NC
4161 break;
4162 /* remember any fake bracket that lexer is about to discard */
4163 if (PL_lex_brackets == 1 &&
4164 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4165 {
4166 s = PL_bufptr;
4167 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4168 s++;
4169 if (*s == '}') {
cd81e915
NC
4170 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4171 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4172 PL_thiswhite = 0;
5db06880
NC
4173 PL_bufptr = s - 1;
4174 break; /* don't bother looking for trailing comment */
4175 }
4176 else
4177 s = PL_bufptr;
4178 }
4179 if (optype == ']')
4180 break;
4181 /* FALLTHROUGH */
4182
4183 /* attach a trailing comment to its statement instead of next token */
4184 case ';':
cd81e915 4185 if (PL_faketokens)
5db06880
NC
4186 break;
4187 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4188 s = PL_bufptr;
4189 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4190 s++;
4191 if (*s == '\n' || *s == '#') {
4192 while (s < PL_bufend && *s != '\n')
4193 s++;
4194 if (s < PL_bufend)
4195 s++;
cd81e915
NC
4196 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4197 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4198 PL_thiswhite = 0;
5db06880
NC
4199 PL_bufptr = s;
4200 }
4201 }
4202 break;
4203
4204 /* pval */
4205 case LABEL:
4206 break;
4207
4208 /* ival */
4209 default:
4210 break;
4211
4212 }
4213
4214 /* Create new token struct. Note: opvals return early above. */
6154021b 4215 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4216 PL_thismad = 0;
5db06880
NC
4217 return optype;
4218}
4219#endif
4220
468aa647 4221STATIC char *
cc6ed77d 4222S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4223 dVAR;
7918f24d
NC
4224
4225 PERL_ARGS_ASSERT_TOKENIZE_USE;
4226
468aa647
RGS
4227 if (PL_expect != XSTATE)
4228 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4229 is_use ? "use" : "no"));
29595ff2 4230 s = SKIPSPACE1(s);
468aa647
RGS
4231 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4232 s = force_version(s, TRUE);
17c59fdf
VP
4233 if (*s == ';' || *s == '}'
4234 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4235 start_force(PL_curforce);
9ded7720 4236 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4237 force_next(WORD);
4238 }
4239 else if (*s == 'v') {
4240 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4241 s = force_version(s, FALSE);
4242 }
4243 }
4244 else {
4245 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4246 s = force_version(s, FALSE);
4247 }
6154021b 4248 pl_yylval.ival = is_use;
468aa647
RGS
4249 return s;
4250}
748a9306 4251#ifdef DEBUGGING
27da23d5 4252 static const char* const exp_name[] =
09bef843 4253 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4254 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4255 };
748a9306 4256#endif
463ee0b2 4257
361d9b55
Z
4258#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4259STATIC bool
4260S_word_takes_any_delimeter(char *p, STRLEN len)
4261{
4262 return (len == 1 && strchr("msyq", p[0])) ||
4263 (len == 2 && (
4264 (p[0] == 't' && p[1] == 'r') ||
4265 (p[0] == 'q' && strchr("qwxr", p[1]))));
4266}
4267
02aa26ce
NT
4268/*
4269 yylex
4270
4271 Works out what to call the token just pulled out of the input
4272 stream. The yacc parser takes care of taking the ops we return and
4273 stitching them into a tree.
4274
4275 Returns:
4276 PRIVATEREF
4277
4278 Structure:
4279 if read an identifier
4280 if we're in a my declaration
4281 croak if they tried to say my($foo::bar)
4282 build the ops for a my() declaration
4283 if it's an access to a my() variable
4284 are we in a sort block?
4285 croak if my($a); $a <=> $b
4286 build ops for access to a my() variable
4287 if in a dq string, and they've said @foo and we can't find @foo
4288 croak
4289 build ops for a bareword
4290 if we already built the token before, use it.
4291*/
4292
20141f0e 4293
dba4d153
JH
4294#ifdef __SC__
4295#pragma segment Perl_yylex
4296#endif
dba4d153 4297int
dba4d153 4298Perl_yylex(pTHX)
20141f0e 4299{
97aff369 4300 dVAR;
3afc138a 4301 register char *s = PL_bufptr;
378cc40b 4302 register char *d;
463ee0b2 4303 STRLEN len;
aa7440fb 4304 bool bof = FALSE;
580561a3 4305 U32 fake_eof = 0;
a687059c 4306
10edeb5d
JH
4307 /* orig_keyword, gvp, and gv are initialized here because
4308 * jump to the label just_a_word_zero can bypass their
4309 * initialization later. */
4310 I32 orig_keyword = 0;
4311 GV *gv = NULL;
4312 GV **gvp = NULL;
4313
bbf60fe6 4314 DEBUG_T( {
396482e1 4315 SV* tmp = newSVpvs("");
b6007c36
DM
4316 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4317 (IV)CopLINE(PL_curcop),
4318 lex_state_names[PL_lex_state],
4319 exp_name[PL_expect],
4320 pv_display(tmp, s, strlen(s), 0, 60));
4321 SvREFCNT_dec(tmp);
bbf60fe6 4322 } );
02aa26ce 4323 /* check if there's an identifier for us to look at */
28ac2b49 4324 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4325 return REPORT(S_pending_ident(aTHX));
bbce6d69 4326
02aa26ce
NT
4327 /* no identifier pending identification */
4328
3280af22 4329 switch (PL_lex_state) {
79072805
LW
4330#ifdef COMMENTARY
4331 case LEX_NORMAL: /* Some compilers will produce faster */
4332 case LEX_INTERPNORMAL: /* code if we comment these out. */
4333 break;
4334#endif
4335
09bef843 4336 /* when we've already built the next token, just pull it out of the queue */
79072805 4337 case LEX_KNOWNEXT:
5db06880
NC
4338#ifdef PERL_MAD
4339 PL_lasttoke--;
6154021b 4340 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4341 if (PL_madskills) {
cd81e915 4342 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4343 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4344 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4345 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4346 PL_thismad->mad_val = 0;
4347 mad_free(PL_thismad);
4348 PL_thismad = 0;
5db06880
NC
4349 }
4350 }
4351 if (!PL_lasttoke) {
4352 PL_lex_state = PL_lex_defer;
4353 PL_expect = PL_lex_expect;
4354 PL_lex_defer = LEX_NORMAL;
4355 if (!PL_nexttoke[PL_lasttoke].next_type)
4356 return yylex();
4357 }
4358#else
3280af22 4359 PL_nexttoke--;
6154021b 4360 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4361 if (!PL_nexttoke) {
4362 PL_lex_state = PL_lex_defer;
4363 PL_expect = PL_lex_expect;
4364 PL_lex_defer = LEX_NORMAL;
463ee0b2 4365 }
5db06880 4366#endif
a7aaec61
Z
4367 {
4368 I32 next_type;
5db06880 4369#ifdef PERL_MAD
a7aaec61 4370 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4371#else
a7aaec61 4372 next_type = PL_nexttype[PL_nexttoke];
5db06880 4373#endif
78cdf107
Z
4374 if (next_type & (7<<24)) {
4375 if (next_type & (1<<24)) {
4376 if (PL_lex_brackets > 100)
4377 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4378 PL_lex_brackstack[PL_lex_brackets++] =
9d8a3661 4379 (char) ((next_type >> 16) & 0xff);
78cdf107
Z
4380 }
4381 if (next_type & (2<<24))
4382 PL_lex_allbrackets++;
4383 if (next_type & (4<<24))
4384 PL_lex_allbrackets--;
a7aaec61
Z
4385 next_type &= 0xffff;
4386 }
4387#ifdef PERL_MAD
4388 /* FIXME - can these be merged? */
4389 return next_type;
4390#else
4391 return REPORT(next_type);
4392#endif
4393 }
79072805 4394
02aa26ce 4395 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4396 when we get here, PL_bufptr is at the \
02aa26ce 4397 */
79072805
LW
4398 case LEX_INTERPCASEMOD:
4399#ifdef DEBUGGING
3280af22 4400 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4401 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4402#endif
02aa26ce 4403 /* handle \E or end of string */
3280af22 4404 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4405 /* if at a \E */
3280af22 4406 if (PL_lex_casemods) {
f54cb97a 4407 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4408 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4409
3792a11b
NC
4410 if (PL_bufptr != PL_bufend
4411 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4412 PL_bufptr += 2;
4413 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4414#ifdef PERL_MAD
4415 if (PL_madskills)
6b29d1f5 4416 PL_thistoken = newSVpvs("\\E");
5db06880 4417#endif
a0d0e21e 4418 }
78cdf107 4419 PL_lex_allbrackets--;
bbf60fe6 4420 return REPORT(')');
79072805 4421 }
5db06880
NC
4422#ifdef PERL_MAD
4423 while (PL_bufptr != PL_bufend &&
4424 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4425 if (!PL_thiswhite)
6b29d1f5 4426 PL_thiswhite = newSVpvs("");
cd81e915 4427 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4428 PL_bufptr += 2;
4429 }
4430#else
3280af22
NIS
4431 if (PL_bufptr != PL_bufend)
4432 PL_bufptr += 2;
5db06880 4433#endif
3280af22 4434 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4435 return yylex();
79072805
LW
4436 }
4437 else {
607df283 4438 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4439 "### Saw case modifier\n"); });
3280af22 4440 s = PL_bufptr + 1;
6e909404 4441 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4442#ifdef PERL_MAD
cd81e915 4443 if (!PL_thiswhite)
6b29d1f5 4444 PL_thiswhite = newSVpvs("");
cd81e915 4445 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4446#endif
89122651 4447 PL_bufptr = s + 3;
6e909404
JH
4448 PL_lex_state = LEX_INTERPCONCAT;
4449 return yylex();
a0d0e21e 4450 }
6e909404 4451 else {
90771dc0 4452 I32 tmp;
5db06880
NC
4453 if (!PL_madskills) /* when just compiling don't need correct */
4454 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4455 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4456 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4457 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4458 PL_lex_casestack[--PL_lex_casemods] = '\0';
78cdf107 4459 PL_lex_allbrackets--;
bbf60fe6 4460 return REPORT(')');
6e909404
JH
4461 }
4462 if (PL_lex_casemods > 10)
4463 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4464 PL_lex_casestack[PL_lex_casemods++] = *s;
4465 PL_lex_casestack[PL_lex_casemods] = '\0';
4466 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4467 start_force(PL_curforce);
9ded7720 4468 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4469 force_next((2<<24)|'(');
cd81e915 4470 start_force(PL_curforce);
6e909404 4471 if (*s == 'l')
9ded7720 4472 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4473 else if (*s == 'u')
9ded7720 4474 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4475 else if (*s == 'L')
9ded7720 4476 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4477 else if (*s == 'U')
9ded7720 4478 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4479 else if (*s == 'Q')
9ded7720 4480 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4481 else
4482 Perl_croak(aTHX_ "panic: yylex");
5db06880 4483 if (PL_madskills) {
a5849ce5
NC
4484 SV* const tmpsv = newSVpvs("\\ ");
4485 /* replace the space with the character we want to escape
4486 */
4487 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4488 curmad('_', tmpsv);
4489 }
6e909404 4490 PL_bufptr = s + 1;
a0d0e21e 4491 }
79072805 4492 force_next(FUNC);
3280af22
NIS
4493 if (PL_lex_starts) {
4494 s = PL_bufptr;
4495 PL_lex_starts = 0;
5db06880
NC
4496#ifdef PERL_MAD
4497 if (PL_madskills) {
cd81e915
NC
4498 if (PL_thistoken)
4499 sv_free(PL_thistoken);
6b29d1f5 4500 PL_thistoken = newSVpvs("");
5db06880
NC
4501 }
4502#endif
131b3ad0
DM
4503 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4504 if (PL_lex_casemods == 1 && PL_lex_inpat)
4505 OPERATOR(',');
4506 else
4507 Aop(OP_CONCAT);
79072805
LW
4508 }
4509 else
cea2e8a9 4510 return yylex();
79072805
LW
4511 }
4512
55497cff 4513 case LEX_INTERPPUSH:
bbf60fe6 4514 return REPORT(sublex_push());
55497cff 4515
79072805 4516 case LEX_INTERPSTART:
3280af22 4517 if (PL_bufptr == PL_bufend)
bbf60fe6 4518 return REPORT(sublex_done());
607df283 4519 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4520 "### Interpolated variable\n"); });
3280af22
NIS
4521 PL_expect = XTERM;
4522 PL_lex_dojoin = (*PL_bufptr == '@');
4523 PL_lex_state = LEX_INTERPNORMAL;
4524 if (PL_lex_dojoin) {
cd81e915 4525 start_force(PL_curforce);
9ded7720 4526 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4527 force_next(',');
cd81e915 4528 start_force(PL_curforce);
a0d0e21e 4529 force_ident("\"", '$');
cd81e915 4530 start_force(PL_curforce);
9ded7720 4531 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4532 force_next('$');
cd81e915 4533 start_force(PL_curforce);
9ded7720 4534 NEXTVAL_NEXTTOKE.ival = 0;
78cdf107 4535 force_next((2<<24)|'(');
cd81e915 4536 start_force(PL_curforce);
9ded7720 4537 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4538 force_next(FUNC);
4539 }
3280af22
NIS
4540 if (PL_lex_starts++) {
4541 s = PL_bufptr;
5db06880
NC
4542#ifdef PERL_MAD
4543 if (PL_madskills) {
cd81e915
NC
4544 if (PL_thistoken)
4545 sv_free(PL_thistoken);
6b29d1f5 4546 PL_thistoken = newSVpvs("");
5db06880
NC
4547 }
4548#endif
131b3ad0
DM
4549 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4550 if (!PL_lex_casemods && PL_lex_inpat)
4551 OPERATOR(',');
4552 else
4553 Aop(OP_CONCAT);
79072805 4554 }
cea2e8a9 4555 return yylex();
79072805
LW
4556
4557 case LEX_INTERPENDMAYBE:
3280af22
NIS
4558 if (intuit_more(PL_bufptr)) {
4559 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4560 break;
4561 }
4562 /* FALL THROUGH */
4563
4564 case LEX_INTERPEND:
3280af22
NIS
4565 if (PL_lex_dojoin) {
4566 PL_lex_dojoin = FALSE;
4567 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4568#ifdef PERL_MAD
4569 if (PL_madskills) {
cd81e915
NC
4570 if (PL_thistoken)
4571 sv_free(PL_thistoken);
6b29d1f5 4572 PL_thistoken = newSVpvs("");
5db06880
NC
4573 }
4574#endif
78cdf107 4575 PL_lex_allbrackets--;
bbf60fe6 4576 return REPORT(')');
79072805 4577 }
43a16006 4578 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4579 && SvEVALED(PL_lex_repl))
43a16006 4580 {
e9fa98b2 4581 if (PL_bufptr != PL_bufend)
cea2e8a9 4582 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4583 PL_lex_repl = NULL;
e9fa98b2 4584 }
79072805
LW
4585 /* FALLTHROUGH */
4586 case LEX_INTERPCONCAT:
4587#ifdef DEBUGGING
3280af22 4588 if (PL_lex_brackets)
cea2e8a9 4589 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4590#endif
3280af22 4591 if (PL_bufptr == PL_bufend)
bbf60fe6 4592 return REPORT(sublex_done());
79072805 4593
3280af22
NIS
4594 if (SvIVX(PL_linestr) == '\'') {
4595 SV *sv = newSVsv(PL_linestr);
4596 if (!PL_lex_inpat)
76e3520e 4597 sv = tokeq(sv);
3280af22 4598 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4599 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4600 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4601 s = PL_bufend;
79072805
LW
4602 }
4603 else {
3280af22 4604 s = scan_const(PL_bufptr);
79072805 4605 if (*s == '\\')
3280af22 4606 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4607 else
3280af22 4608 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4609 }
4610
3280af22 4611 if (s != PL_bufptr) {
cd81e915 4612 start_force(PL_curforce);
5db06880
NC
4613 if (PL_madskills) {
4614 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4615 }
6154021b 4616 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4617 PL_expect = XTERM;
79072805 4618 force_next(THING);
131b3ad0 4619 if (PL_lex_starts++) {
5db06880
NC
4620#ifdef PERL_MAD
4621 if (PL_madskills) {
cd81e915
NC
4622 if (PL_thistoken)
4623 sv_free(PL_thistoken);
6b29d1f5 4624 PL_thistoken = newSVpvs("");
5db06880
NC
4625 }
4626#endif
131b3ad0
DM
4627 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4628 if (!PL_lex_casemods && PL_lex_inpat)
4629 OPERATOR(',');
4630 else
4631 Aop(OP_CONCAT);
4632 }
79072805 4633 else {
3280af22 4634 PL_bufptr = s;
cea2e8a9 4635 return yylex();
79072805
LW
4636 }
4637 }
4638
cea2e8a9 4639 return yylex();
a0d0e21e 4640 case LEX_FORMLINE:
3280af22
NIS
4641 PL_lex_state = LEX_NORMAL;
4642 s = scan_formline(PL_bufptr);
4643 if (!PL_lex_formbrack)
a0d0e21e
LW
4644 goto rightbracket;
4645 OPERATOR(';');
79072805
LW
4646 }
4647
3280af22
NIS
4648 s = PL_bufptr;
4649 PL_oldoldbufptr = PL_oldbufptr;
4650 PL_oldbufptr = s;
463ee0b2
LW
4651
4652 retry:
5db06880 4653#ifdef PERL_MAD
cd81e915
NC
4654 if (PL_thistoken) {
4655 sv_free(PL_thistoken);
4656 PL_thistoken = 0;
5db06880 4657 }
cd81e915 4658 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4659#endif
378cc40b
LW
4660 switch (*s) {
4661 default:
7e2040f0 4662 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4663 goto keylookup;
b1fc3636
CJ
4664 {
4665 unsigned char c = *s;
4666 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4667 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4668 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4669 } else {
4670 d = PL_linestart;
4671 }
4672 *s = '\0';
4673 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4674 }
e929a76b
LW
4675 case 4:
4676 case 26:
4677 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4678 case 0:
5db06880
NC
4679#ifdef PERL_MAD
4680 if (PL_madskills)
cd81e915 4681 PL_faketokens = 0;
5db06880 4682#endif
3280af22
NIS
4683 if (!PL_rsfp) {
4684 PL_last_uni = 0;
4685 PL_last_lop = 0;
a7aaec61
Z
4686 if (PL_lex_brackets &&
4687 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4688 yyerror((const char *)
4689 (PL_lex_formbrack
4690 ? "Format not terminated"
4691 : "Missing right curly or square bracket"));
c5ee2135 4692 }
4e553d73 4693 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4694 "### Tokener got EOF\n");
5f80b19c 4695 } );
79072805 4696 TOKEN(0);
463ee0b2 4697 }
3280af22 4698 if (s++ < PL_bufend)
a687059c 4699 goto retry; /* ignore stray nulls */
3280af22
NIS
4700 PL_last_uni = 0;
4701 PL_last_lop = 0;
4702 if (!PL_in_eval && !PL_preambled) {
4703 PL_preambled = TRUE;
5db06880
NC
4704#ifdef PERL_MAD
4705 if (PL_madskills)
cd81e915 4706 PL_faketokens = 1;
5db06880 4707#endif
5ab7ff98
NC
4708 if (PL_perldb) {
4709 /* Generate a string of Perl code to load the debugger.
4710 * If PERL5DB is set, it will return the contents of that,
4711 * otherwise a compile-time require of perl5db.pl. */
4712
4713 const char * const pdb = PerlEnv_getenv("PERL5DB");
4714
4715 if (pdb) {
4716 sv_setpv(PL_linestr, pdb);
4717 sv_catpvs(PL_linestr,";");
4718 } else {
4719 SETERRNO(0,SS_NORMAL);
4720 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4721 }
4722 } else
4723 sv_setpvs(PL_linestr,"");
c62eb204
NC
4724 if (PL_preambleav) {
4725 SV **svp = AvARRAY(PL_preambleav);
4726 SV **const end = svp + AvFILLp(PL_preambleav);
4727 while(svp <= end) {
4728 sv_catsv(PL_linestr, *svp);
4729 ++svp;
396482e1 4730 sv_catpvs(PL_linestr, ";");
91b7def8 4731 }
daba3364 4732 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4733 PL_preambleav = NULL;
91b7def8 4734 }
9f639728
FR
4735 if (PL_minus_E)
4736 sv_catpvs(PL_linestr,
4737 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4738 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4739 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4740 if (PL_minus_l)
396482e1 4741 sv_catpvs(PL_linestr,"chomp;");
3280af22 4742 if (PL_minus_a) {
3280af22 4743 if (PL_minus_F) {
3792a11b
NC
4744 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4745 || *PL_splitstr == '"')
3280af22 4746 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4747 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4748 else {
c8ef6a4b
NC
4749 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4750 bytes can be used as quoting characters. :-) */
dd374669 4751 const char *splits = PL_splitstr;
91d456ae 4752 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4753 do {
4754 /* Need to \ \s */
dd374669
AL
4755 if (*splits == '\\')
4756 sv_catpvn(PL_linestr, splits, 1);
4757 sv_catpvn(PL_linestr, splits, 1);
4758 } while (*splits++);
48c4c863
NC
4759 /* This loop will embed the trailing NUL of
4760 PL_linestr as the last thing it does before
4761 terminating. */
396482e1 4762 sv_catpvs(PL_linestr, ");");
54310121 4763 }
2304df62
AD
4764 }
4765 else
396482e1 4766 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4767 }
79072805 4768 }
396482e1 4769 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4770 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4771 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4772 PL_last_lop = PL_last_uni = NULL;
65269a95 4773 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4774 update_debugger_info(PL_linestr, NULL, 0);
79072805 4775 goto retry;
a687059c 4776 }
e929a76b 4777 do {
580561a3
Z
4778 fake_eof = 0;
4779 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4780 if (0) {
7e28d3af 4781 fake_eof:
f0e67a1d
Z
4782 fake_eof = LEX_FAKE_EOF;
4783 }
4784 PL_bufptr = PL_bufend;
17cc9359 4785 CopLINE_inc(PL_curcop);
f0e67a1d 4786 if (!lex_next_chunk(fake_eof)) {
17cc9359 4787 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4788 s = PL_bufptr;
4789 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4790 }
17cc9359 4791 CopLINE_dec(PL_curcop);
5db06880 4792#ifdef PERL_MAD
f0e67a1d 4793 if (!PL_rsfp)
cd81e915 4794 PL_realtokenstart = -1;
5db06880 4795#endif
f0e67a1d 4796 s = PL_bufptr;
7aa207d6
JH
4797 /* If it looks like the start of a BOM or raw UTF-16,
4798 * check if it in fact is. */
580561a3 4799 if (bof && PL_rsfp &&
7aa207d6
JH
4800 (*s == 0 ||
4801 *(U8*)s == 0xEF ||
4802 *(U8*)s >= 0xFE ||
4803 s[1] == 0)) {
879bc93b
DM
4804 Off_t offset = (IV)PerlIO_tell(PL_rsfp);
4805 bof = (offset == (Off_t)SvCUR(PL_linestr));
6d510155
JD
4806#if defined(PERLIO_USING_CRLF) && defined(PERL_TEXTMODE_SCRIPTS)
4807 /* offset may include swallowed CR */
4808 if (!bof)
879bc93b 4809 bof = (offset == (Off_t)SvCUR(PL_linestr)+1);
6d510155 4810#endif
7e28d3af 4811 if (bof) {
3280af22 4812 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4813 s = swallow_bom((U8*)s);
e929a76b 4814 }
378cc40b 4815 }
737c24fc 4816 if (PL_parser->in_pod) {
a0d0e21e 4817 /* Incest with pod. */
5db06880
NC
4818#ifdef PERL_MAD
4819 if (PL_madskills)
cd81e915 4820 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4821#endif
01a57ef7 4822 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4823 sv_setpvs(PL_linestr, "");
3280af22
NIS
4824 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4825 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4826 PL_last_lop = PL_last_uni = NULL;
737c24fc 4827 PL_parser->in_pod = 0;
a0d0e21e 4828 }
4e553d73 4829 }
85613cab
Z
4830 if (PL_rsfp)
4831 incline(s);
737c24fc 4832 } while (PL_parser->in_pod);
3280af22 4833 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4834 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4835 PL_last_lop = PL_last_uni = NULL;
57843af0 4836 if (CopLINE(PL_curcop) == 1) {
3280af22 4837 while (s < PL_bufend && isSPACE(*s))
79072805 4838 s++;
a0d0e21e 4839 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4840 s++;
5db06880
NC
4841#ifdef PERL_MAD
4842 if (PL_madskills)
cd81e915 4843 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4844#endif
bd61b366 4845 d = NULL;
3280af22 4846 if (!PL_in_eval) {
44a8e56a 4847 if (*s == '#' && *(s+1) == '!')
4848 d = s + 2;
4849#ifdef ALTERNATE_SHEBANG
4850 else {
bfed75c6 4851 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4852 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4853 d = s + (sizeof(as) - 1);
4854 }
4855#endif /* ALTERNATE_SHEBANG */
4856 }
4857 if (d) {
b8378b72 4858 char *ipath;
774d564b 4859 char *ipathend;
b8378b72 4860
774d564b 4861 while (isSPACE(*d))
b8378b72
CS
4862 d++;
4863 ipath = d;
774d564b 4864 while (*d && !isSPACE(*d))
4865 d++;
4866 ipathend = d;
4867
4868#ifdef ARG_ZERO_IS_SCRIPT
4869 if (ipathend > ipath) {
4870 /*
4871 * HP-UX (at least) sets argv[0] to the script name,
4872 * which makes $^X incorrect. And Digital UNIX and Linux,
4873 * at least, set argv[0] to the basename of the Perl
4874 * interpreter. So, having found "#!", we'll set it right.
4875 */
fafc274c
NC
4876 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4877 SVt_PV)); /* $^X */
774d564b 4878 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4879 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4880 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4881 SvSETMAGIC(x);
4882 }
556c1dec
JH
4883 else {
4884 STRLEN blen;
4885 STRLEN llen;
cfd0369c 4886 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4887 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4888 if (llen < blen) {
4889 bstart += blen - llen;
4890 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4891 sv_setpvn(x, ipath, ipathend - ipath);
4892 SvSETMAGIC(x);
4893 }
4894 }
4895 }
774d564b 4896 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4897 }
774d564b 4898#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4899
4900 /*
4901 * Look for options.
4902 */
748a9306 4903 d = instr(s,"perl -");
84e30d1a 4904 if (!d) {
748a9306 4905 d = instr(s,"perl");
84e30d1a
GS
4906#if defined(DOSISH)
4907 /* avoid getting into infinite loops when shebang
4908 * line contains "Perl" rather than "perl" */
4909 if (!d) {
4910 for (d = ipathend-4; d >= ipath; --d) {
4911 if ((*d == 'p' || *d == 'P')
4912 && !ibcmp(d, "perl", 4))
4913 {
4914 break;
4915 }
4916 }
4917 if (d < ipath)
bd61b366 4918 d = NULL;
84e30d1a
GS
4919 }
4920#endif
4921 }
44a8e56a 4922#ifdef ALTERNATE_SHEBANG
4923 /*
4924 * If the ALTERNATE_SHEBANG on this system starts with a
4925 * character that can be part of a Perl expression, then if
4926 * we see it but not "perl", we're probably looking at the
4927 * start of Perl code, not a request to hand off to some
4928 * other interpreter. Similarly, if "perl" is there, but
4929 * not in the first 'word' of the line, we assume the line
4930 * contains the start of the Perl program.
44a8e56a 4931 */
4932 if (d && *s != '#') {
f54cb97a 4933 const char *c = ipath;
44a8e56a 4934 while (*c && !strchr("; \t\r\n\f\v#", *c))
4935 c++;
4936 if (c < d)
bd61b366 4937 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4938 else
4939 *s = '#'; /* Don't try to parse shebang line */
4940 }
774d564b 4941#endif /* ALTERNATE_SHEBANG */
748a9306 4942 if (!d &&
44a8e56a 4943 *s == '#' &&
774d564b 4944 ipathend > ipath &&
3280af22 4945 !PL_minus_c &&
748a9306 4946 !instr(s,"indir") &&
3280af22 4947 instr(PL_origargv[0],"perl"))
748a9306 4948 {
27da23d5 4949 dVAR;
9f68db38 4950 char **newargv;
9f68db38 4951
774d564b 4952 *ipathend = '\0';
4953 s = ipathend + 1;
3280af22 4954 while (s < PL_bufend && isSPACE(*s))
9f68db38 4955 s++;
3280af22 4956 if (s < PL_bufend) {
d85f917e 4957 Newx(newargv,PL_origargc+3,char*);
9f68db38 4958 newargv[1] = s;
3280af22 4959 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4960 s++;
4961 *s = '\0';
3280af22 4962 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4963 }
4964 else
3280af22 4965 newargv = PL_origargv;
774d564b 4966 newargv[0] = ipath;
b35112e7 4967 PERL_FPU_PRE_EXEC
b4748376 4968 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4969 PERL_FPU_POST_EXEC
cea2e8a9 4970 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4971 }
748a9306 4972 if (d) {
c35e046a
AL
4973 while (*d && !isSPACE(*d))
4974 d++;
4975 while (SPACE_OR_TAB(*d))
4976 d++;
748a9306
LW
4977
4978 if (*d++ == '-') {
f54cb97a 4979 const bool switches_done = PL_doswitches;
fb993905
GA
4980 const U32 oldpdb = PL_perldb;
4981 const bool oldn = PL_minus_n;
4982 const bool oldp = PL_minus_p;
c7030b81 4983 const char *d1 = d;
fb993905 4984
8cc95fdb 4985 do {
4ba71d51
FC
4986 bool baduni = FALSE;
4987 if (*d1 == 'C') {
bd0ab00d
NC
4988 const char *d2 = d1 + 1;
4989 if (parse_unicode_opts((const char **)&d2)
4990 != PL_unicode)
4991 baduni = TRUE;
4ba71d51
FC
4992 }
4993 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4994 const char * const m = d1;
4995 while (*d1 && !isSPACE(*d1))
4996 d1++;
cea2e8a9 4997 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4998 (int)(d1 - m), m);
8cc95fdb 4999 }
c7030b81
NC
5000 d1 = moreswitches(d1);
5001 } while (d1);
f0b2cf55
YST
5002 if (PL_doswitches && !switches_done) {
5003 int argc = PL_origargc;
5004 char **argv = PL_origargv;
5005 do {
5006 argc--,argv++;
5007 } while (argc && argv[0][0] == '-' && argv[0][1]);
5008 init_argv_symbols(argc,argv);
5009 }
65269a95 5010 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 5011 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 5012 /* if we have already added "LINE: while (<>) {",
5013 we must not do it again */
748a9306 5014 {
76f68e9b 5015 sv_setpvs(PL_linestr, "");
3280af22
NIS
5016 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
5017 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 5018 PL_last_lop = PL_last_uni = NULL;
3280af22 5019 PL_preambled = FALSE;
65269a95 5020 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 5021 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
5022 goto retry;
5023 }
a0d0e21e 5024 }
79072805 5025 }
9f68db38 5026 }
79072805 5027 }
3280af22
NIS
5028 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5029 PL_bufptr = s;
5030 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5031 return yylex();
ae986130 5032 }
378cc40b 5033 goto retry;
4fdae800 5034 case '\r':
6a27c188 5035#ifdef PERL_STRICT_CR
cea2e8a9 5036 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 5037 Perl_croak(aTHX_
cc507455 5038 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 5039#endif
4fdae800 5040 case ' ': case '\t': case '\f': case 013:
5db06880 5041#ifdef PERL_MAD
cd81e915 5042 PL_realtokenstart = -1;
ac372eb8
RD
5043 if (!PL_thiswhite)
5044 PL_thiswhite = newSVpvs("");
5045 sv_catpvn(PL_thiswhite, s, 1);
5db06880 5046#endif
ac372eb8 5047 s++;
378cc40b 5048 goto retry;
378cc40b 5049 case '#':
e929a76b 5050 case '\n':
5db06880 5051#ifdef PERL_MAD
cd81e915 5052 PL_realtokenstart = -1;
5db06880 5053 if (PL_madskills)
cd81e915 5054 PL_faketokens = 0;
5db06880 5055#endif
3280af22 5056 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
5057 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
5058 /* handle eval qq[#line 1 "foo"\n ...] */
5059 CopLINE_dec(PL_curcop);
5060 incline(s);
5061 }
5db06880
NC
5062 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
5063 s = SKIPSPACE0(s);
5064 if (!PL_in_eval || PL_rsfp)
5065 incline(s);
5066 }
5067 else {
5068 d = s;
5069 while (d < PL_bufend && *d != '\n')
5070 d++;
5071 if (d < PL_bufend)
5072 d++;
5073 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5074 Perl_croak(aTHX_ "panic: input overflow");
5075#ifdef PERL_MAD
5076 if (PL_madskills)
cd81e915 5077 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
5078#endif
5079 s = d;
5080 incline(s);
5081 }
3280af22
NIS
5082 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
5083 PL_bufptr = s;
5084 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5085 return yylex();
a687059c 5086 }
378cc40b 5087 }
a687059c 5088 else {
5db06880
NC
5089#ifdef PERL_MAD
5090 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5091 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5092 PL_faketokens = 0;
5db06880
NC
5093 s = SKIPSPACE0(s);
5094 TOKEN(PEG); /* make sure any #! line is accessible */
5095 }
5096 s = SKIPSPACE0(s);
5097 }
5098 else {
5099/* if (PL_madskills && PL_lex_formbrack) { */
5100 d = s;
5101 while (d < PL_bufend && *d != '\n')
5102 d++;
5103 if (d < PL_bufend)
5104 d++;
5105 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5106 Perl_croak(aTHX_ "panic: input overflow");
5107 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5108 if (!PL_thiswhite)
6b29d1f5 5109 PL_thiswhite = newSVpvs("");
5db06880 5110 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5111 sv_setpvs(PL_thiswhite, "");
cd81e915 5112 PL_faketokens = 0;
5db06880 5113 }
cd81e915 5114 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5115 }
5116 s = d;
5117/* }
5118 *s = '\0';
5119 PL_bufend = s; */
5120 }
5121#else
378cc40b 5122 *s = '\0';
3280af22 5123 PL_bufend = s;
5db06880 5124#endif
a687059c 5125 }
378cc40b
LW
5126 goto retry;
5127 case '-':
79072805 5128 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5129 I32 ftst = 0;
90771dc0 5130 char tmp;
e5edeb50 5131
378cc40b 5132 s++;
3280af22 5133 PL_bufptr = s;
748a9306
LW
5134 tmp = *s++;
5135
bf4acbe4 5136 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5137 s++;
5138
5139 if (strnEQ(s,"=>",2)) {
3280af22 5140 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5141 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5142 OPERATOR('-'); /* unary minus */
5143 }
3280af22 5144 PL_last_uni = PL_oldbufptr;
748a9306 5145 switch (tmp) {
e5edeb50
JH
5146 case 'r': ftst = OP_FTEREAD; break;
5147 case 'w': ftst = OP_FTEWRITE; break;
5148 case 'x': ftst = OP_FTEEXEC; break;
5149 case 'o': ftst = OP_FTEOWNED; break;
5150 case 'R': ftst = OP_FTRREAD; break;
5151 case 'W': ftst = OP_FTRWRITE; break;
5152 case 'X': ftst = OP_FTREXEC; break;
5153 case 'O': ftst = OP_FTROWNED; break;
5154 case 'e': ftst = OP_FTIS; break;
5155 case 'z': ftst = OP_FTZERO; break;
5156 case 's': ftst = OP_FTSIZE; break;
5157 case 'f': ftst = OP_FTFILE; break;
5158 case 'd': ftst = OP_FTDIR; break;
5159 case 'l': ftst = OP_FTLINK; break;
5160 case 'p': ftst = OP_FTPIPE; break;
5161 case 'S': ftst = OP_FTSOCK; break;
5162 case 'u': ftst = OP_FTSUID; break;
5163 case 'g': ftst = OP_FTSGID; break;
5164 case 'k': ftst = OP_FTSVTX; break;
5165 case 'b': ftst = OP_FTBLK; break;
5166 case 'c': ftst = OP_FTCHR; break;
5167 case 't': ftst = OP_FTTTY; break;
5168 case 'T': ftst = OP_FTTEXT; break;
5169 case 'B': ftst = OP_FTBINARY; break;
5170 case 'M': case 'A': case 'C':
fafc274c 5171 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5172 switch (tmp) {
5173 case 'M': ftst = OP_FTMTIME; break;
5174 case 'A': ftst = OP_FTATIME; break;
5175 case 'C': ftst = OP_FTCTIME; break;
5176 default: break;
5177 }
5178 break;
378cc40b 5179 default:
378cc40b
LW
5180 break;
5181 }
e5edeb50 5182 if (ftst) {
eb160463 5183 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5184 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5185 "### Saw file test %c\n", (int)tmp);
5f80b19c 5186 } );
e5edeb50
JH
5187 FTST(ftst);
5188 }
5189 else {
5190 /* Assume it was a minus followed by a one-letter named
5191 * subroutine call (or a -bareword), then. */
95c31fe3 5192 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5193 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5194 (int) tmp);
5f80b19c 5195 } );
3cf7b4c4 5196 s = --PL_bufptr;
e5edeb50 5197 }
378cc40b 5198 }
90771dc0
NC
5199 {
5200 const char tmp = *s++;
5201 if (*s == tmp) {
5202 s++;
5203 if (PL_expect == XOPERATOR)
5204 TERM(POSTDEC);
5205 else
5206 OPERATOR(PREDEC);
5207 }
5208 else if (*s == '>') {
5209 s++;
29595ff2 5210 s = SKIPSPACE1(s);
90771dc0
NC
5211 if (isIDFIRST_lazy_if(s,UTF)) {
5212 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5213 TOKEN(ARROW);
5214 }
5215 else if (*s == '$')
5216 OPERATOR(ARROW);
5217 else
5218 TERM(ARROW);
5219 }
78cdf107
Z
5220 if (PL_expect == XOPERATOR) {
5221 if (*s == '=' && !PL_lex_allbrackets &&
5222 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5223 s--;
5224 TOKEN(0);
5225 }
90771dc0 5226 Aop(OP_SUBTRACT);
78cdf107 5227 }
90771dc0
NC
5228 else {
5229 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5230 check_uni();
5231 OPERATOR('-'); /* unary minus */
79072805 5232 }
2f3197b3 5233 }
79072805 5234
378cc40b 5235 case '+':
90771dc0
NC
5236 {
5237 const char tmp = *s++;
5238 if (*s == tmp) {
5239 s++;
5240 if (PL_expect == XOPERATOR)
5241 TERM(POSTINC);
5242 else
5243 OPERATOR(PREINC);
5244 }
78cdf107
Z
5245 if (PL_expect == XOPERATOR) {
5246 if (*s == '=' && !PL_lex_allbrackets &&
5247 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5248 s--;
5249 TOKEN(0);
5250 }
90771dc0 5251 Aop(OP_ADD);
78cdf107 5252 }
90771dc0
NC
5253 else {
5254 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5255 check_uni();
5256 OPERATOR('+');
5257 }
2f3197b3 5258 }
a687059c 5259
378cc40b 5260 case '*':
3280af22
NIS
5261 if (PL_expect != XOPERATOR) {
5262 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5263 PL_expect = XOPERATOR;
5264 force_ident(PL_tokenbuf, '*');
5265 if (!*PL_tokenbuf)
a0d0e21e 5266 PREREF('*');
79072805 5267 TERM('*');
a687059c 5268 }
79072805
LW
5269 s++;
5270 if (*s == '*') {
a687059c 5271 s++;
78cdf107
Z
5272 if (*s == '=' && !PL_lex_allbrackets &&
5273 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5274 s -= 2;
5275 TOKEN(0);
5276 }
79072805 5277 PWop(OP_POW);
a687059c 5278 }
78cdf107
Z
5279 if (*s == '=' && !PL_lex_allbrackets &&
5280 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5281 s--;
5282 TOKEN(0);
5283 }
79072805
LW
5284 Mop(OP_MULTIPLY);
5285
378cc40b 5286 case '%':
3280af22 5287 if (PL_expect == XOPERATOR) {
78cdf107
Z
5288 if (s[1] == '=' && !PL_lex_allbrackets &&
5289 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
5290 TOKEN(0);
bbce6d69 5291 ++s;
5292 Mop(OP_MODULO);
a687059c 5293 }
3280af22 5294 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5295 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5296 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5297 if (!PL_tokenbuf[1]) {
bbce6d69 5298 PREREF('%');
a687059c 5299 }
3280af22 5300 PL_pending_ident = '%';
bbce6d69 5301 TERM('%');
a687059c 5302
378cc40b 5303 case '^':
78cdf107
Z
5304 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5305 (s[1] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE))
5306 TOKEN(0);
79072805 5307 s++;
a0d0e21e 5308 BOop(OP_BIT_XOR);
79072805 5309 case '[':
a7aaec61
Z
5310 if (PL_lex_brackets > 100)
5311 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5312 PL_lex_brackstack[PL_lex_brackets++] = 0;
78cdf107 5313 PL_lex_allbrackets++;
df3467db
IG
5314 {
5315 const char tmp = *s++;
5316 OPERATOR(tmp);
5317 }
378cc40b 5318 case '~':
0d863452 5319 if (s[1] == '~'
3e7dd34d 5320 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452 5321 {
78cdf107
Z
5322 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
5323 TOKEN(0);
0d863452
RH
5324 s += 2;
5325 Eop(OP_SMARTMATCH);
5326 }
78cdf107
Z
5327 s++;
5328 OPERATOR('~');
378cc40b 5329 case ',':
78cdf107
Z
5330 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMMA)
5331 TOKEN(0);
5332 s++;
5333 OPERATOR(',');
a0d0e21e
LW
5334 case ':':
5335 if (s[1] == ':') {
5336 len = 0;
0bfa2a8a 5337 goto just_a_word_zero_gv;
a0d0e21e
LW
5338 }
5339 s++;
09bef843
SB
5340 switch (PL_expect) {
5341 OP *attrs;
5db06880
NC
5342#ifdef PERL_MAD
5343 I32 stuffstart;
5344#endif
09bef843
SB
5345 case XOPERATOR:
5346 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5347 break;
5348 PL_bufptr = s; /* update in case we back off */
d83f38d8 5349 if (*s == '=') {
2dc78664
NC
5350 Perl_croak(aTHX_
5351 "Use of := for an empty attribute list is not allowed");
d83f38d8 5352 }
09bef843
SB
5353 goto grabattrs;
5354 case XATTRBLOCK:
5355 PL_expect = XBLOCK;
5356 goto grabattrs;
5357 case XATTRTERM:
5358 PL_expect = XTERMBLOCK;
5359 grabattrs:
5db06880
NC
5360#ifdef PERL_MAD
5361 stuffstart = s - SvPVX(PL_linestr) - 1;
5362#endif
29595ff2 5363 s = PEEKSPACE(s);
5f66b61c 5364 attrs = NULL;
7e2040f0 5365 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5366 I32 tmp;
5cc237b8 5367 SV *sv;
09bef843 5368 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5369 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5370 if (tmp < 0) tmp = -tmp;
5371 switch (tmp) {
5372 case KEY_or:
5373 case KEY_and:
5374 case KEY_for:
11baf631 5375 case KEY_foreach:
f9829d6b
GS
5376 case KEY_unless:
5377 case KEY_if:
5378 case KEY_while:
5379 case KEY_until:
5380 goto got_attrs;
5381 default:
5382 break;
5383 }
5384 }
89a5757c 5385 sv = newSVpvn_flags(s, len, UTF ? SVf_UTF8 : 0);
09bef843
SB
5386 if (*d == '(') {
5387 d = scan_str(d,TRUE,TRUE);
5388 if (!d) {
09bef843
SB
5389 /* MUST advance bufptr here to avoid bogus
5390 "at end of line" context messages from yyerror().
5391 */
5392 PL_bufptr = s + len;
5393 yyerror("Unterminated attribute parameter in attribute list");
5394 if (attrs)
5395 op_free(attrs);
5cc237b8 5396 sv_free(sv);
bbf60fe6 5397 return REPORT(0); /* EOF indicator */
09bef843
SB
5398 }
5399 }
5400 if (PL_lex_stuff) {
09bef843 5401 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5402 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5403 newSVOP(OP_CONST, 0, sv));
5404 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5405 PL_lex_stuff = NULL;
09bef843
SB
5406 }
5407 else {
5cc237b8
BS
5408 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5409 sv_free(sv);
1108974d 5410 if (PL_in_my == KEY_our) {
df9a6019 5411 deprecate(":unique");
1108974d 5412 }
bfed75c6 5413 else
371fce9b
DM
5414 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5415 }
5416
d3cea301
SB
5417 /* NOTE: any CV attrs applied here need to be part of
5418 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5419 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5420 sv_free(sv);
78f9721b 5421 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5422 }
5423 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5424 sv_free(sv);
8e5dadda 5425 deprecate(":locked");
5cc237b8
BS
5426 }
5427 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5428 sv_free(sv);
78f9721b 5429 CvMETHOD_on(PL_compcv);
5cc237b8 5430 }
78f9721b
SM
5431 /* After we've set the flags, it could be argued that
5432 we don't need to do the attributes.pm-based setting
5433 process, and shouldn't bother appending recognized
d3cea301
SB
5434 flags. To experiment with that, uncomment the
5435 following "else". (Note that's already been
5436 uncommented. That keeps the above-applied built-in
5437 attributes from being intercepted (and possibly
5438 rejected) by a package's attribute routines, but is
5439 justified by the performance win for the common case
5440 of applying only built-in attributes.) */
0256094b 5441 else
2fcb4757 5442 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5443 newSVOP(OP_CONST, 0,
5cc237b8 5444 sv));
09bef843 5445 }
29595ff2 5446 s = PEEKSPACE(d);
0120eecf 5447 if (*s == ':' && s[1] != ':')
29595ff2 5448 s = PEEKSPACE(s+1);
0120eecf
GS
5449 else if (s == d)
5450 break; /* require real whitespace or :'s */
29595ff2 5451 /* XXX losing whitespace on sequential attributes here */
09bef843 5452 }
90771dc0
NC
5453 {
5454 const char tmp
5455 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5456 if (*s != ';' && *s != '}' && *s != tmp
5457 && (tmp != '=' || *s != ')')) {
5458 const char q = ((*s == '\'') ? '"' : '\'');
5459 /* If here for an expression, and parsed no attrs, back
5460 off. */
5461 if (tmp == '=' && !attrs) {
5462 s = PL_bufptr;
5463 break;
5464 }
5465 /* MUST advance bufptr here to avoid bogus "at end of line"
5466 context messages from yyerror().
5467 */
5468 PL_bufptr = s;
10edeb5d
JH
5469 yyerror( (const char *)
5470 (*s
5471 ? Perl_form(aTHX_ "Invalid separator character "
5472 "%c%c%c in attribute list", q, *s, q)
5473 : "Unterminated attribute list" ) );
90771dc0
NC
5474 if (attrs)
5475 op_free(attrs);
5476 OPERATOR(':');
09bef843 5477 }
09bef843 5478 }
f9829d6b 5479 got_attrs:
09bef843 5480 if (attrs) {
cd81e915 5481 start_force(PL_curforce);
9ded7720 5482 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5483 CURMAD('_', PL_nextwhite);
89122651 5484 force_next(THING);
5db06880
NC
5485 }
5486#ifdef PERL_MAD
5487 if (PL_madskills) {
cd81e915 5488 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5489 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5490 }
5db06880 5491#endif
09bef843
SB
5492 TOKEN(COLONATTR);
5493 }
78cdf107
Z
5494 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING) {
5495 s--;
5496 TOKEN(0);
5497 }
5498 PL_lex_allbrackets--;
a0d0e21e 5499 OPERATOR(':');
8990e307
LW
5500 case '(':
5501 s++;
3280af22
NIS
5502 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5503 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5504 else
3280af22 5505 PL_expect = XTERM;
29595ff2 5506 s = SKIPSPACE1(s);
78cdf107 5507 PL_lex_allbrackets++;
a0d0e21e 5508 TOKEN('(');
378cc40b 5509 case ';':
78cdf107
Z
5510 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
5511 TOKEN(0);
f4dd75d9 5512 CLINE;
78cdf107
Z
5513 s++;
5514 OPERATOR(';');
378cc40b 5515 case ')':
78cdf107
Z
5516 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_CLOSING)
5517 TOKEN(0);
5518 s++;
5519 PL_lex_allbrackets--;
5520 s = SKIPSPACE1(s);
5521 if (*s == '{')
5522 PREBLOCK(')');
5523 TERM(')');
79072805 5524 case ']':
a7aaec61
Z
5525 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5526 TOKEN(0);
79072805 5527 s++;
3280af22 5528 if (PL_lex_brackets <= 0)
d98d5fff 5529 yyerror("Unmatched right square bracket");
463ee0b2 5530 else
3280af22 5531 --PL_lex_brackets;
78cdf107 5532 PL_lex_allbrackets--;
3280af22
NIS
5533 if (PL_lex_state == LEX_INTERPNORMAL) {
5534 if (PL_lex_brackets == 0) {
02255c60
FC
5535 if (*s == '-' && s[1] == '>')
5536 PL_lex_state = LEX_INTERPENDMAYBE;
5537 else if (*s != '[' && *s != '{')
3280af22 5538 PL_lex_state = LEX_INTERPEND;
79072805
LW
5539 }
5540 }
4633a7c4 5541 TERM(']');
79072805
LW
5542 case '{':
5543 leftbracket:
79072805 5544 s++;
3280af22 5545 if (PL_lex_brackets > 100) {
8edd5f42 5546 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5547 }
3280af22 5548 switch (PL_expect) {
a0d0e21e 5549 case XTERM:
3280af22 5550 if (PL_lex_formbrack) {
a0d0e21e
LW
5551 s--;
5552 PRETERMBLOCK(DO);
5553 }
3280af22
NIS
5554 if (PL_oldoldbufptr == PL_last_lop)
5555 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5556 else
3280af22 5557 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5558 PL_lex_allbrackets++;
79072805 5559 OPERATOR(HASHBRACK);
a0d0e21e 5560 case XOPERATOR:
bf4acbe4 5561 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5562 s++;
44a8e56a 5563 d = s;
3280af22
NIS
5564 PL_tokenbuf[0] = '\0';
5565 if (d < PL_bufend && *d == '-') {
5566 PL_tokenbuf[0] = '-';
44a8e56a 5567 d++;
bf4acbe4 5568 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5569 d++;
5570 }
7e2040f0 5571 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5572 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5573 FALSE, &len);
bf4acbe4 5574 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5575 d++;
5576 if (*d == '}') {
f54cb97a 5577 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5578 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5579 if (minus)
5580 force_next('-');
748a9306
LW
5581 }
5582 }
5583 /* FALL THROUGH */
09bef843 5584 case XATTRBLOCK:
748a9306 5585 case XBLOCK:
3280af22 5586 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
78cdf107 5587 PL_lex_allbrackets++;
3280af22 5588 PL_expect = XSTATE;
a0d0e21e 5589 break;
09bef843 5590 case XATTRTERM:
a0d0e21e 5591 case XTERMBLOCK:
3280af22 5592 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5593 PL_lex_allbrackets++;
3280af22 5594 PL_expect = XSTATE;
a0d0e21e
LW
5595 break;
5596 default: {
f54cb97a 5597 const char *t;
3280af22
NIS
5598 if (PL_oldoldbufptr == PL_last_lop)
5599 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5600 else
3280af22 5601 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
78cdf107 5602 PL_lex_allbrackets++;
29595ff2 5603 s = SKIPSPACE1(s);
8452ff4b
SB
5604 if (*s == '}') {
5605 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5606 PL_expect = XTERM;
5607 /* This hack is to get the ${} in the message. */
5608 PL_bufptr = s+1;
5609 yyerror("syntax error");
5610 break;
5611 }
a0d0e21e 5612 OPERATOR(HASHBRACK);
8452ff4b 5613 }
b8a4b1be
GS
5614 /* This hack serves to disambiguate a pair of curlies
5615 * as being a block or an anon hash. Normally, expectation
5616 * determines that, but in cases where we're not in a
5617 * position to expect anything in particular (like inside
5618 * eval"") we have to resolve the ambiguity. This code
5619 * covers the case where the first term in the curlies is a
5620 * quoted string. Most other cases need to be explicitly
a0288114 5621 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5622 * curly in order to force resolution as an anon hash.
5623 *
5624 * XXX should probably propagate the outer expectation
5625 * into eval"" to rely less on this hack, but that could
5626 * potentially break current behavior of eval"".
5627 * GSAR 97-07-21
5628 */
5629 t = s;
5630 if (*s == '\'' || *s == '"' || *s == '`') {
5631 /* common case: get past first string, handling escapes */
3280af22 5632 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5633 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5634 t++;
5635 t++;
a0d0e21e 5636 }
b8a4b1be 5637 else if (*s == 'q') {
3280af22 5638 if (++t < PL_bufend
b8a4b1be 5639 && (!isALNUM(*t)
3280af22 5640 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5641 && !isALNUM(*t))))
5642 {
abc667d1 5643 /* skip q//-like construct */
f54cb97a 5644 const char *tmps;
b8a4b1be
GS
5645 char open, close, term;
5646 I32 brackets = 1;
5647
3280af22 5648 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5649 t++;
abc667d1
DM
5650 /* check for q => */
5651 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5652 OPERATOR(HASHBRACK);
5653 }
b8a4b1be
GS
5654 term = *t;
5655 open = term;
5656 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5657 term = tmps[5];
5658 close = term;
5659 if (open == close)
3280af22
NIS
5660 for (t++; t < PL_bufend; t++) {
5661 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5662 t++;
6d07e5e9 5663 else if (*t == open)
b8a4b1be
GS
5664 break;
5665 }
abc667d1 5666 else {
3280af22
NIS
5667 for (t++; t < PL_bufend; t++) {
5668 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5669 t++;
6d07e5e9 5670 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5671 break;
5672 else if (*t == open)
5673 brackets++;
5674 }
abc667d1
DM
5675 }
5676 t++;
b8a4b1be 5677 }
abc667d1
DM
5678 else
5679 /* skip plain q word */
5680 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5681 t += UTF8SKIP(t);
a0d0e21e 5682 }
7e2040f0 5683 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5684 t += UTF8SKIP(t);
7e2040f0 5685 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5686 t += UTF8SKIP(t);
a0d0e21e 5687 }
3280af22 5688 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5689 t++;
b8a4b1be
GS
5690 /* if comma follows first term, call it an anon hash */
5691 /* XXX it could be a comma expression with loop modifiers */
3280af22 5692 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5693 || (*t == '=' && t[1] == '>')))
a0d0e21e 5694 OPERATOR(HASHBRACK);
3280af22 5695 if (PL_expect == XREF)
4e4e412b 5696 PL_expect = XTERM;
a0d0e21e 5697 else {
3280af22
NIS
5698 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5699 PL_expect = XSTATE;
a0d0e21e 5700 }
8990e307 5701 }
a0d0e21e 5702 break;
463ee0b2 5703 }
6154021b 5704 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5705 if (isSPACE(*s) || *s == '#')
3280af22 5706 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5707 TOKEN('{');
378cc40b 5708 case '}':
a7aaec61
Z
5709 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5710 TOKEN(0);
79072805
LW
5711 rightbracket:
5712 s++;
3280af22 5713 if (PL_lex_brackets <= 0)
d98d5fff 5714 yyerror("Unmatched right curly bracket");
463ee0b2 5715 else
3280af22 5716 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
78cdf107 5717 PL_lex_allbrackets--;
c2e66d9e 5718 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5719 PL_lex_formbrack = 0;
5720 if (PL_lex_state == LEX_INTERPNORMAL) {
5721 if (PL_lex_brackets == 0) {
9059aa12
LW
5722 if (PL_expect & XFAKEBRACK) {
5723 PL_expect &= XENUMMASK;
3280af22
NIS
5724 PL_lex_state = LEX_INTERPEND;
5725 PL_bufptr = s;
5db06880
NC
5726#if 0
5727 if (PL_madskills) {
cd81e915 5728 if (!PL_thiswhite)
6b29d1f5 5729 PL_thiswhite = newSVpvs("");
76f68e9b 5730 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5731 }
5732#endif
cea2e8a9 5733 return yylex(); /* ignore fake brackets */
79072805 5734 }
fa83b5b6 5735 if (*s == '-' && s[1] == '>')
3280af22 5736 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5737 else if (*s != '[' && *s != '{')
3280af22 5738 PL_lex_state = LEX_INTERPEND;
79072805
LW
5739 }
5740 }
9059aa12
LW
5741 if (PL_expect & XFAKEBRACK) {
5742 PL_expect &= XENUMMASK;
3280af22 5743 PL_bufptr = s;
cea2e8a9 5744 return yylex(); /* ignore fake brackets */
748a9306 5745 }
cd81e915 5746 start_force(PL_curforce);
5db06880
NC
5747 if (PL_madskills) {
5748 curmad('X', newSVpvn(s-1,1));
cd81e915 5749 CURMAD('_', PL_thiswhite);
5db06880 5750 }
79072805 5751 force_next('}');
5db06880 5752#ifdef PERL_MAD
cd81e915 5753 if (!PL_thistoken)
6b29d1f5 5754 PL_thistoken = newSVpvs("");
5db06880 5755#endif
79072805 5756 TOKEN(';');
378cc40b
LW
5757 case '&':
5758 s++;
78cdf107
Z
5759 if (*s++ == '&') {
5760 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5761 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5762 s -= 2;
5763 TOKEN(0);
5764 }
a0d0e21e 5765 AOPERATOR(ANDAND);
78cdf107 5766 }
378cc40b 5767 s--;
3280af22 5768 if (PL_expect == XOPERATOR) {
041457d9
DM
5769 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5770 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5771 {
57843af0 5772 CopLINE_dec(PL_curcop);
f1f66076 5773 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5774 CopLINE_inc(PL_curcop);
463ee0b2 5775 }
78cdf107
Z
5776 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5777 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5778 s--;
5779 TOKEN(0);
5780 }
79072805 5781 BAop(OP_BIT_AND);
463ee0b2 5782 }
79072805 5783
3280af22
NIS
5784 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5785 if (*PL_tokenbuf) {
5786 PL_expect = XOPERATOR;
5787 force_ident(PL_tokenbuf, '&');
463ee0b2 5788 }
79072805
LW
5789 else
5790 PREREF('&');
6154021b 5791 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5792 TERM('&');
5793
378cc40b
LW
5794 case '|':
5795 s++;
78cdf107
Z
5796 if (*s++ == '|') {
5797 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5798 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC)) {
5799 s -= 2;
5800 TOKEN(0);
5801 }
a0d0e21e 5802 AOPERATOR(OROR);
78cdf107 5803 }
378cc40b 5804 s--;
78cdf107
Z
5805 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
5806 (*s == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_BITWISE)) {
5807 s--;
5808 TOKEN(0);
5809 }
79072805 5810 BOop(OP_BIT_OR);
378cc40b
LW
5811 case '=':
5812 s++;
748a9306 5813 {
90771dc0 5814 const char tmp = *s++;
78cdf107
Z
5815 if (tmp == '=') {
5816 if (!PL_lex_allbrackets &&
5817 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5818 s -= 2;
5819 TOKEN(0);
5820 }
90771dc0 5821 Eop(OP_EQ);
78cdf107
Z
5822 }
5823 if (tmp == '>') {
5824 if (!PL_lex_allbrackets &&
5825 PL_lex_fakeeof >= LEX_FAKEEOF_COMMA) {
5826 s -= 2;
5827 TOKEN(0);
5828 }
90771dc0 5829 OPERATOR(',');
78cdf107 5830 }
90771dc0
NC
5831 if (tmp == '~')
5832 PMop(OP_MATCH);
5833 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5834 && strchr("+-*/%.^&|<",tmp))
5835 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5836 "Reversed %c= operator",(int)tmp);
5837 s--;
5838 if (PL_expect == XSTATE && isALPHA(tmp) &&
5839 (s == PL_linestart+1 || s[-2] == '\n') )
5840 {
5841 if (PL_in_eval && !PL_rsfp) {
5842 d = PL_bufend;
5843 while (s < d) {
5844 if (*s++ == '\n') {
5845 incline(s);
5846 if (strnEQ(s,"=cut",4)) {
5847 s = strchr(s,'\n');
5848 if (s)
5849 s++;
5850 else
5851 s = d;
5852 incline(s);
5853 goto retry;
5854 }
5855 }
a5f75d66 5856 }
90771dc0 5857 goto retry;
a5f75d66 5858 }
5db06880
NC
5859#ifdef PERL_MAD
5860 if (PL_madskills) {
cd81e915 5861 if (!PL_thiswhite)
6b29d1f5 5862 PL_thiswhite = newSVpvs("");
cd81e915 5863 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5864 PL_bufend - PL_linestart);
5865 }
5866#endif
90771dc0 5867 s = PL_bufend;
737c24fc 5868 PL_parser->in_pod = 1;
90771dc0 5869 goto retry;
a5f75d66 5870 }
a0d0e21e 5871 }
3280af22 5872 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5873 const char *t = s;
51882d45 5874#ifdef PERL_STRICT_CR
c35e046a 5875 while (SPACE_OR_TAB(*t))
51882d45 5876#else
c35e046a 5877 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5878#endif
c35e046a 5879 t++;
a0d0e21e
LW
5880 if (*t == '\n' || *t == '#') {
5881 s--;
3280af22 5882 PL_expect = XBLOCK;
a0d0e21e
LW
5883 goto leftbracket;
5884 }
79072805 5885 }
78cdf107
Z
5886 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5887 s--;
5888 TOKEN(0);
5889 }
6154021b 5890 pl_yylval.ival = 0;
a0d0e21e 5891 OPERATOR(ASSIGNOP);
378cc40b
LW
5892 case '!':
5893 s++;
90771dc0
NC
5894 {
5895 const char tmp = *s++;
5896 if (tmp == '=') {
5897 /* was this !=~ where !~ was meant?
5898 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5899
5900 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5901 const char *t = s+1;
5902
5903 while (t < PL_bufend && isSPACE(*t))
5904 ++t;
5905
5906 if (*t == '/' || *t == '?' ||
5907 ((*t == 'm' || *t == 's' || *t == 'y')
5908 && !isALNUM(t[1])) ||
5909 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5910 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5911 "!=~ should be !~");
5912 }
78cdf107
Z
5913 if (!PL_lex_allbrackets &&
5914 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5915 s -= 2;
5916 TOKEN(0);
5917 }
90771dc0
NC
5918 Eop(OP_NE);
5919 }
5920 if (tmp == '~')
5921 PMop(OP_NOT);
5922 }
378cc40b
LW
5923 s--;
5924 OPERATOR('!');
5925 case '<':
3280af22 5926 if (PL_expect != XOPERATOR) {
93a17b20 5927 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5928 check_uni();
79072805
LW
5929 if (s[1] == '<')
5930 s = scan_heredoc(s);
5931 else
5932 s = scan_inputsymbol(s);
5933 TERM(sublex_start());
378cc40b
LW
5934 }
5935 s++;
90771dc0
NC
5936 {
5937 char tmp = *s++;
78cdf107
Z
5938 if (tmp == '<') {
5939 if (*s == '=' && !PL_lex_allbrackets &&
5940 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5941 s -= 2;
5942 TOKEN(0);
5943 }
90771dc0 5944 SHop(OP_LEFT_SHIFT);
78cdf107 5945 }
90771dc0
NC
5946 if (tmp == '=') {
5947 tmp = *s++;
78cdf107
Z
5948 if (tmp == '>') {
5949 if (!PL_lex_allbrackets &&
5950 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5951 s -= 3;
5952 TOKEN(0);
5953 }
90771dc0 5954 Eop(OP_NCMP);
78cdf107 5955 }
90771dc0 5956 s--;
78cdf107
Z
5957 if (!PL_lex_allbrackets &&
5958 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5959 s -= 2;
5960 TOKEN(0);
5961 }
90771dc0
NC
5962 Rop(OP_LE);
5963 }
395c3793 5964 }
378cc40b 5965 s--;
78cdf107
Z
5966 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5967 s--;
5968 TOKEN(0);
5969 }
79072805 5970 Rop(OP_LT);
378cc40b
LW
5971 case '>':
5972 s++;
90771dc0
NC
5973 {
5974 const char tmp = *s++;
78cdf107
Z
5975 if (tmp == '>') {
5976 if (*s == '=' && !PL_lex_allbrackets &&
5977 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
5978 s -= 2;
5979 TOKEN(0);
5980 }
90771dc0 5981 SHop(OP_RIGHT_SHIFT);
78cdf107
Z
5982 }
5983 else if (tmp == '=') {
5984 if (!PL_lex_allbrackets &&
5985 PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5986 s -= 2;
5987 TOKEN(0);
5988 }
90771dc0 5989 Rop(OP_GE);
78cdf107 5990 }
90771dc0 5991 }
378cc40b 5992 s--;
78cdf107
Z
5993 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE) {
5994 s--;
5995 TOKEN(0);
5996 }
79072805 5997 Rop(OP_GT);
378cc40b
LW
5998
5999 case '$':
bbce6d69 6000 CLINE;
6001
3280af22
NIS
6002 if (PL_expect == XOPERATOR) {
6003 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6004 return deprecate_commaless_var_list();
a0d0e21e 6005 }
8990e307 6006 }
a0d0e21e 6007
c0b977fd 6008 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 6009 PL_tokenbuf[0] = '@';
376b8730
SM
6010 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
6011 sizeof PL_tokenbuf - 1, FALSE);
6012 if (PL_expect == XOPERATOR)
6013 no_op("Array length", s);
3280af22 6014 if (!PL_tokenbuf[1])
a0d0e21e 6015 PREREF(DOLSHARP);
3280af22
NIS
6016 PL_expect = XOPERATOR;
6017 PL_pending_ident = '#';
463ee0b2 6018 TOKEN(DOLSHARP);
79072805 6019 }
bbce6d69 6020
3280af22 6021 PL_tokenbuf[0] = '$';
376b8730
SM
6022 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
6023 sizeof PL_tokenbuf - 1, FALSE);
6024 if (PL_expect == XOPERATOR)
6025 no_op("Scalar", s);
3280af22
NIS
6026 if (!PL_tokenbuf[1]) {
6027 if (s == PL_bufend)
bbce6d69 6028 yyerror("Final $ should be \\$ or $name");
6029 PREREF('$');
8990e307 6030 }
a0d0e21e 6031
ff68c719 6032 d = s;
90771dc0
NC
6033 {
6034 const char tmp = *s;
ae28bb2a 6035 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 6036 s = SKIPSPACE1(s);
ff68c719 6037
90771dc0
NC
6038 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
6039 && intuit_more(s)) {
6040 if (*s == '[') {
6041 PL_tokenbuf[0] = '@';
6042 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
6043 char *t = s+1;
6044
6045 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
6046 t++;
90771dc0 6047 if (*t++ == ',') {
29595ff2 6048 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
6049 while (t < PL_bufend && *t != ']')
6050 t++;
9014280d 6051 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 6052 "Multidimensional syntax %.*s not supported",
36c7798d 6053 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 6054 }
748a9306 6055 }
93a17b20 6056 }
90771dc0
NC
6057 else if (*s == '{') {
6058 char *t;
6059 PL_tokenbuf[0] = '%';
6060 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
6061 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
6062 {
6063 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
6064 do {
6065 t++;
6066 } while (isSPACE(*t));
90771dc0 6067 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 6068 STRLEN len;
90771dc0 6069 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 6070 &len);
c35e046a
AL
6071 while (isSPACE(*t))
6072 t++;
780a5241 6073 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
6074 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
6075 "You need to quote \"%s\"",
6076 tmpbuf);
6077 }
6078 }
6079 }
93a17b20 6080 }
bbce6d69 6081
90771dc0
NC
6082 PL_expect = XOPERATOR;
6083 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
6084 const bool islop = (PL_last_lop == PL_oldoldbufptr);
6085 if (!islop || PL_last_lop_op == OP_GREPSTART)
6086 PL_expect = XOPERATOR;
6087 else if (strchr("$@\"'`q", *s))
6088 PL_expect = XTERM; /* e.g. print $fh "foo" */
6089 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
6090 PL_expect = XTERM; /* e.g. print $fh &sub */
6091 else if (isIDFIRST_lazy_if(s,UTF)) {
6092 char tmpbuf[sizeof PL_tokenbuf];
6093 int t2;
6094 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 6095 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
6096 /* binary operators exclude handle interpretations */
6097 switch (t2) {
6098 case -KEY_x:
6099 case -KEY_eq:
6100 case -KEY_ne:
6101 case -KEY_gt:
6102 case -KEY_lt:
6103 case -KEY_ge:
6104 case -KEY_le:
6105 case -KEY_cmp:
6106 break;
6107 default:
6108 PL_expect = XTERM; /* e.g. print $fh length() */
6109 break;
6110 }
6111 }
6112 else {
6113 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
6114 }
6115 }
90771dc0
NC
6116 else if (isDIGIT(*s))
6117 PL_expect = XTERM; /* e.g. print $fh 3 */
6118 else if (*s == '.' && isDIGIT(s[1]))
6119 PL_expect = XTERM; /* e.g. print $fh .3 */
6120 else if ((*s == '?' || *s == '-' || *s == '+')
6121 && !isSPACE(s[1]) && s[1] != '=')
6122 PL_expect = XTERM; /* e.g. print $fh -1 */
6123 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
6124 && s[1] != '/')
6125 PL_expect = XTERM; /* e.g. print $fh /.../
6126 XXX except DORDOR operator
6127 */
6128 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
6129 && s[2] != '=')
6130 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 6131 }
bbce6d69 6132 }
3280af22 6133 PL_pending_ident = '$';
79072805 6134 TOKEN('$');
378cc40b
LW
6135
6136 case '@':
3280af22 6137 if (PL_expect == XOPERATOR)
bbce6d69 6138 no_op("Array", s);
3280af22
NIS
6139 PL_tokenbuf[0] = '@';
6140 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
6141 if (!PL_tokenbuf[1]) {
bbce6d69 6142 PREREF('@');
6143 }
3280af22 6144 if (PL_lex_state == LEX_NORMAL)
29595ff2 6145 s = SKIPSPACE1(s);
3280af22 6146 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 6147 if (*s == '{')
3280af22 6148 PL_tokenbuf[0] = '%';
a0d0e21e
LW
6149
6150 /* Warn about @ where they meant $. */
041457d9
DM
6151 if (*s == '[' || *s == '{') {
6152 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 6153 const char *t = s + 1;
7e2040f0 6154 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
6155 t++;
6156 if (*t == '}' || *t == ']') {
6157 t++;
29595ff2 6158 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 6159 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 6160 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
6161 (int)(t-PL_bufptr), PL_bufptr,
6162 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 6163 }
93a17b20
LW
6164 }
6165 }
463ee0b2 6166 }
3280af22 6167 PL_pending_ident = '@';
79072805 6168 TERM('@');
378cc40b 6169
c963b151 6170 case '/': /* may be division, defined-or, or pattern */
6f33ba73 6171 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
78cdf107
Z
6172 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6173 (s[2] == '=' ? LEX_FAKEEOF_ASSIGN : LEX_FAKEEOF_LOGIC))
6174 TOKEN(0);
6f33ba73
RGS
6175 s += 2;
6176 AOPERATOR(DORDOR);
6177 }
c963b151 6178 case '?': /* may either be conditional or pattern */
be25f609 6179 if (PL_expect == XOPERATOR) {
90771dc0 6180 char tmp = *s++;
c963b151 6181 if(tmp == '?') {
78cdf107
Z
6182 if (!PL_lex_allbrackets &&
6183 PL_lex_fakeeof >= LEX_FAKEEOF_IFELSE) {
6184 s--;
6185 TOKEN(0);
6186 }
6187 PL_lex_allbrackets++;
be25f609 6188 OPERATOR('?');
c963b151
BD
6189 }
6190 else {
6191 tmp = *s++;
6192 if(tmp == '/') {
6193 /* A // operator. */
78cdf107
Z
6194 if (!PL_lex_allbrackets && PL_lex_fakeeof >=
6195 (*s == '=' ? LEX_FAKEEOF_ASSIGN :
6196 LEX_FAKEEOF_LOGIC)) {
6197 s -= 2;
6198 TOKEN(0);
6199 }
c963b151
BD
6200 AOPERATOR(DORDOR);
6201 }
6202 else {
6203 s--;
78cdf107
Z
6204 if (*s == '=' && !PL_lex_allbrackets &&
6205 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6206 s--;
6207 TOKEN(0);
6208 }
c963b151
BD
6209 Mop(OP_DIVIDE);
6210 }
6211 }
6212 }
6213 else {
6214 /* Disable warning on "study /blah/" */
6215 if (PL_oldoldbufptr == PL_last_uni
6216 && (*PL_last_uni != 's' || s - PL_last_uni < 5
6217 || memNE(PL_last_uni, "study", 5)
6218 || isALNUM_lazy_if(PL_last_uni+5,UTF)
6219 ))
6220 check_uni();
725a61d7
Z
6221 if (*s == '?')
6222 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6223 s = scan_pat(s,OP_MATCH);
6224 TERM(sublex_start());
6225 }
378cc40b
LW
6226
6227 case '.':
51882d45
GS
6228 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6229#ifdef PERL_STRICT_CR
6230 && s[1] == '\n'
6231#else
6232 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6233#endif
6234 && (s == PL_linestart || s[-1] == '\n') )
6235 {
3280af22
NIS
6236 PL_lex_formbrack = 0;
6237 PL_expect = XSTATE;
79072805
LW
6238 goto rightbracket;
6239 }
be25f609 6240 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6241 s += 3;
6242 OPERATOR(YADAYADA);
6243 }
3280af22 6244 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6245 char tmp = *s++;
a687059c 6246 if (*s == tmp) {
78cdf107
Z
6247 if (!PL_lex_allbrackets &&
6248 PL_lex_fakeeof >= LEX_FAKEEOF_RANGE) {
6249 s--;
6250 TOKEN(0);
6251 }
a687059c 6252 s++;
2f3197b3
LW
6253 if (*s == tmp) {
6254 s++;
6154021b 6255 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6256 }
6257 else
6154021b 6258 pl_yylval.ival = 0;
378cc40b 6259 OPERATOR(DOTDOT);
a687059c 6260 }
78cdf107
Z
6261 if (*s == '=' && !PL_lex_allbrackets &&
6262 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN) {
6263 s--;
6264 TOKEN(0);
6265 }
79072805 6266 Aop(OP_CONCAT);
378cc40b
LW
6267 }
6268 /* FALL THROUGH */
6269 case '0': case '1': case '2': case '3': case '4':
6270 case '5': case '6': case '7': case '8': case '9':
6154021b 6271 s = scan_num(s, &pl_yylval);
931e0695 6272 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6273 if (PL_expect == XOPERATOR)
8990e307 6274 no_op("Number",s);
79072805
LW
6275 TERM(THING);
6276
6277 case '\'':
5db06880 6278 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6279 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6280 if (PL_expect == XOPERATOR) {
6281 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6282 return deprecate_commaless_var_list();
a0d0e21e 6283 }
463ee0b2 6284 else
8990e307 6285 no_op("String",s);
463ee0b2 6286 }
79072805 6287 if (!s)
d4c19fe8 6288 missingterm(NULL);
6154021b 6289 pl_yylval.ival = OP_CONST;
79072805
LW
6290 TERM(sublex_start());
6291
6292 case '"':
5db06880 6293 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6294 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6295 if (PL_expect == XOPERATOR) {
6296 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6297 return deprecate_commaless_var_list();
a0d0e21e 6298 }
463ee0b2 6299 else
8990e307 6300 no_op("String",s);
463ee0b2 6301 }
79072805 6302 if (!s)
d4c19fe8 6303 missingterm(NULL);
6154021b 6304 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6305 /* FIXME. I think that this can be const if char *d is replaced by
6306 more localised variables. */
3280af22 6307 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6308 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6309 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6310 break;
6311 }
6312 }
79072805
LW
6313 TERM(sublex_start());
6314
6315 case '`':
5db06880 6316 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6317 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6318 if (PL_expect == XOPERATOR)
8990e307 6319 no_op("Backticks",s);
79072805 6320 if (!s)
d4c19fe8 6321 missingterm(NULL);
9b201d7d 6322 readpipe_override();
79072805
LW
6323 TERM(sublex_start());
6324
6325 case '\\':
6326 s++;
a2a5de95
NC
6327 if (PL_lex_inwhat && isDIGIT(*s))
6328 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6329 *s, *s);
3280af22 6330 if (PL_expect == XOPERATOR)
8990e307 6331 no_op("Backslash",s);
79072805
LW
6332 OPERATOR(REFGEN);
6333
a7cb1f99 6334 case 'v':
e526c9e6 6335 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6336 char *start = s + 2;
dd629d5b 6337 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6338 start++;
6339 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6340 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6341 TERM(THING);
6342 }
e526c9e6 6343 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6344 else if (!isALPHA(*start) && (PL_expect == XTERM
6345 || PL_expect == XREF || PL_expect == XSTATE
6346 || PL_expect == XTERMORDORDOR)) {
af9f5953
BF
6347 GV *const gv = gv_fetchpvn_flags(s, start - s,
6348 UTF ? SVf_UTF8 : 0, SVt_PVCV);
e526c9e6 6349 if (!gv) {
6154021b 6350 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6351 TERM(THING);
6352 }
6353 }
a7cb1f99
GS
6354 }
6355 goto keylookup;
79072805 6356 case 'x':
3280af22 6357 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6358 s++;
6359 Mop(OP_REPEAT);
2f3197b3 6360 }
79072805
LW
6361 goto keylookup;
6362
378cc40b 6363 case '_':
79072805
LW
6364 case 'a': case 'A':
6365 case 'b': case 'B':
6366 case 'c': case 'C':
6367 case 'd': case 'D':
6368 case 'e': case 'E':
6369 case 'f': case 'F':
6370 case 'g': case 'G':
6371 case 'h': case 'H':
6372 case 'i': case 'I':
6373 case 'j': case 'J':
6374 case 'k': case 'K':
6375 case 'l': case 'L':
6376 case 'm': case 'M':
6377 case 'n': case 'N':
6378 case 'o': case 'O':
6379 case 'p': case 'P':
6380 case 'q': case 'Q':
6381 case 'r': case 'R':
6382 case 's': case 'S':
6383 case 't': case 'T':
6384 case 'u': case 'U':
a7cb1f99 6385 case 'V':
79072805
LW
6386 case 'w': case 'W':
6387 case 'X':
6388 case 'y': case 'Y':
6389 case 'z': case 'Z':
6390
49dc05e3 6391 keylookup: {
88e1f1a2 6392 bool anydelim;
90771dc0 6393 I32 tmp;
10edeb5d
JH
6394
6395 orig_keyword = 0;
6396 gv = NULL;
6397 gvp = NULL;
49dc05e3 6398
3280af22
NIS
6399 PL_bufptr = s;
6400 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6401
6402 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6403 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6404
6405 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6406 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6407 goto just_a_word;
6408
3643fb5f 6409 d = s;
3280af22 6410 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6411 d++; /* no comments skipped here, or s### is misparsed */
6412
748a9306 6413 /* Is this a word before a => operator? */
1c3923b3 6414 if (*d == '=' && d[1] == '>') {
748a9306 6415 CLINE;
6154021b 6416 pl_yylval.opval
d0a148a6
NC
6417 = (OP*)newSVOP(OP_CONST, 0,
6418 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6419 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6420 TERM(WORD);
6421 }
6422
88e1f1a2
JV
6423 /* Check for plugged-in keyword */
6424 {
6425 OP *o;
6426 int result;
6427 char *saved_bufptr = PL_bufptr;
6428 PL_bufptr = s;
16c91539 6429 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6430 s = PL_bufptr;
6431 if (result == KEYWORD_PLUGIN_DECLINE) {
6432 /* not a plugged-in keyword */
6433 PL_bufptr = saved_bufptr;
6434 } else if (result == KEYWORD_PLUGIN_STMT) {
6435 pl_yylval.opval = o;
6436 CLINE;
6437 PL_expect = XSTATE;
6438 return REPORT(PLUGSTMT);
6439 } else if (result == KEYWORD_PLUGIN_EXPR) {
6440 pl_yylval.opval = o;
6441 CLINE;
6442 PL_expect = XOPERATOR;
6443 return REPORT(PLUGEXPR);
6444 } else {
6445 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6446 PL_tokenbuf);
6447 }
6448 }
6449
6450 /* Check for built-in keyword */
6451 tmp = keyword(PL_tokenbuf, len, 0);
6452
6453 /* Is this a label? */
6454 if (!anydelim && PL_expect == XSTATE
6455 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6456 s = d + 1;
6457 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6458 CLINE;
6459 TOKEN(LABEL);
6460 }
6461
a0d0e21e 6462 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6463 GV *ogv = NULL; /* override (winner) */
6464 GV *hgv = NULL; /* hidden (loser) */
3280af22 6465 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6466 CV *cv;
af9f5953
BF
6467 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6468 UTF ? SVf_UTF8 : 0, SVt_PVCV)) &&
56f7f34b
CS
6469 (cv = GvCVu(gv)))
6470 {
6471 if (GvIMPORTED_CV(gv))
6472 ogv = gv;
6473 else if (! CvMETHOD(cv))
6474 hgv = gv;
6475 }
6476 if (!ogv &&
af9f5953
BF
6477 (gvp = (GV**)hv_fetch(PL_globalstash, PL_tokenbuf,
6478 UTF ? -len : len, FALSE)) &&
9e0d86f8 6479 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6480 GvCVu(gv) && GvIMPORTED_CV(gv))
6481 {
6482 ogv = gv;
6483 }
6484 }
6485 if (ogv) {
30fe34ed 6486 orig_keyword = tmp;
56f7f34b 6487 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6488 }
6489 else if (gv && !gvp
6490 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6491 && GvCVu(gv))
6e7b2336
GS
6492 {
6493 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6494 }
56f7f34b
CS
6495 else { /* no override */
6496 tmp = -tmp;
a2a5de95
NC
6497 if (tmp == KEY_dump) {
6498 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6499 "dump() better written as CORE::dump()");
ac206dc8 6500 }
a0714e2c 6501 gv = NULL;
56f7f34b 6502 gvp = 0;
a2a5de95
NC
6503 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6504 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6505 "Ambiguous call resolved as CORE::%s(), "
6506 "qualify as such or use &",
6507 GvENAME(hgv));
49dc05e3 6508 }
a0d0e21e
LW
6509 }
6510
6511 reserved_word:
6512 switch (tmp) {
79072805
LW
6513
6514 default: /* not a keyword */
0bfa2a8a
NC
6515 /* Trade off - by using this evil construction we can pull the
6516 variable gv into the block labelled keylookup. If not, then
6517 we have to give it function scope so that the goto from the
6518 earlier ':' case doesn't bypass the initialisation. */
6519 if (0) {
6520 just_a_word_zero_gv:
6521 gv = NULL;
6522 gvp = NULL;
8bee0991 6523 orig_keyword = 0;
0bfa2a8a 6524 }
93a17b20 6525 just_a_word: {
96e4d5b1 6526 SV *sv;
ce29ac45 6527 int pkgname = 0;
f54cb97a 6528 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6529 OP *rv2cv_op;
5069cc75 6530 CV *cv;
5db06880 6531#ifdef PERL_MAD
cd81e915 6532 SV *nextPL_nextwhite = 0;
5db06880
NC
6533#endif
6534
8990e307
LW
6535
6536 /* Get the rest if it looks like a package qualifier */
6537
155aba94 6538 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6539 STRLEN morelen;
3280af22 6540 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6541 TRUE, &morelen);
6542 if (!morelen)
cea2e8a9 6543 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6544 *s == '\'' ? "'" : "::");
c3e0f903 6545 len += morelen;
ce29ac45 6546 pkgname = 1;
a0d0e21e 6547 }
8990e307 6548
3280af22
NIS
6549 if (PL_expect == XOPERATOR) {
6550 if (PL_bufptr == PL_linestart) {
57843af0 6551 CopLINE_dec(PL_curcop);
f1f66076 6552 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6553 CopLINE_inc(PL_curcop);
463ee0b2
LW
6554 }
6555 else
54310121 6556 no_op("Bareword",s);
463ee0b2 6557 }
8990e307 6558
c3e0f903 6559 /* Look for a subroutine with this name in current package,
486ec47a 6560 unless name is "Foo::", in which case Foo is a bareword
c3e0f903
GS
6561 (and a package name). */
6562
5db06880 6563 if (len > 2 && !PL_madskills &&
3280af22 6564 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6565 {
f776e3cd 6566 if (ckWARN(WARN_BAREWORD)
af9f5953 6567 && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
9014280d 6568 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6569 "Bareword \"%s\" refers to nonexistent package",
3280af22 6570 PL_tokenbuf);
c3e0f903 6571 len -= 2;
3280af22 6572 PL_tokenbuf[len] = '\0';
a0714e2c 6573 gv = NULL;
c3e0f903
GS
6574 gvp = 0;
6575 }
6576 else {
62d55b22
NC
6577 if (!gv) {
6578 /* Mustn't actually add anything to a symbol table.
6579 But also don't want to "initialise" any placeholder
6580 constants that might already be there into full
6581 blown PVGVs with attached PVCV. */
90e5519e 6582 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
af9f5953
BF
6583 GV_NOADD_NOINIT | ( UTF ? SVf_UTF8 : 0 ),
6584 SVt_PVCV);
62d55b22 6585 }
b3d904f3 6586 len = 0;
c3e0f903
GS
6587 }
6588
6589 /* if we saw a global override before, get the right name */
8990e307 6590
37bb7629
EB
6591 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6592 len ? len : strlen(PL_tokenbuf));
49dc05e3 6593 if (gvp) {
37bb7629 6594 SV * const tmp_sv = sv;
396482e1 6595 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6596 sv_catsv(sv, tmp_sv);
6597 SvREFCNT_dec(tmp_sv);
8a7a129d 6598 }
37bb7629 6599
5db06880 6600#ifdef PERL_MAD
cd81e915
NC
6601 if (PL_madskills && !PL_thistoken) {
6602 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6603 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6604 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6605 }
6606#endif
8990e307 6607
a0d0e21e 6608 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6609 CLINE;
6154021b
RGS
6610 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6611 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6612
c3e0f903 6613 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6614 if (len)
6615 goto safe_bareword;
6616
f7461760 6617 {
d8ebba9f 6618 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc_NN(sv));
f7461760
Z
6619 const_op->op_private = OPpCONST_BARE;
6620 rv2cv_op = newCVREF(0, const_op);
6621 }
d9088386 6622 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6623
8990e307
LW
6624 /* See if it's the indirect object for a list operator. */
6625
3280af22
NIS
6626 if (PL_oldoldbufptr &&
6627 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6628 (PL_oldoldbufptr == PL_last_lop
6629 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6630 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6631 (PL_expect == XREF ||
6632 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6633 {
748a9306
LW
6634 bool immediate_paren = *s == '(';
6635
a0d0e21e 6636 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6637 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6638#ifdef PERL_MAD
cd81e915 6639 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6640#endif
a0d0e21e
LW
6641
6642 /* Two barewords in a row may indicate method call. */
6643
62d55b22 6644 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6645 (tmp = intuit_method(s, gv, cv))) {
6646 op_free(rv2cv_op);
78cdf107
Z
6647 if (tmp == METHOD && !PL_lex_allbrackets &&
6648 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6649 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6650 return REPORT(tmp);
f7461760 6651 }
a0d0e21e
LW
6652
6653 /* If not a declared subroutine, it's an indirect object. */
6654 /* (But it's an indir obj regardless for sort.) */
7294df96 6655 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6656
7294df96
RGS
6657 if (
6658 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6659 (!cv &&
a9ef352a 6660 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6661 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6662 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6663 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6664 )
a9ef352a 6665 {
3280af22 6666 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6667 goto bareword;
93a17b20
LW
6668 }
6669 }
8990e307 6670
3280af22 6671 PL_expect = XOPERATOR;
5db06880
NC
6672#ifdef PERL_MAD
6673 if (isSPACE(*s))
cd81e915
NC
6674 s = SKIPSPACE2(s,nextPL_nextwhite);
6675 PL_nextwhite = nextPL_nextwhite;
5db06880 6676#else
8990e307 6677 s = skipspace(s);
5db06880 6678#endif
1c3923b3
GS
6679
6680 /* Is this a word before a => operator? */
ce29ac45 6681 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6682 op_free(rv2cv_op);
1c3923b3 6683 CLINE;
6154021b 6684 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6685 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6686 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6687 TERM(WORD);
6688 }
6689
6690 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6691 if (*s == '(') {
79072805 6692 CLINE;
5069cc75 6693 if (cv) {
c35e046a
AL
6694 d = s + 1;
6695 while (SPACE_OR_TAB(*d))
6696 d++;
f7461760 6697 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6698 s = d + 1;
c631f32b 6699 goto its_constant;
96e4d5b1 6700 }
6701 }
5db06880
NC
6702#ifdef PERL_MAD
6703 if (PL_madskills) {
cd81e915
NC
6704 PL_nextwhite = PL_thiswhite;
6705 PL_thiswhite = 0;
5db06880 6706 }
cd81e915 6707 start_force(PL_curforce);
5db06880 6708#endif
6154021b 6709 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6710 PL_expect = XOPERATOR;
5db06880
NC
6711#ifdef PERL_MAD
6712 if (PL_madskills) {
cd81e915
NC
6713 PL_nextwhite = nextPL_nextwhite;
6714 curmad('X', PL_thistoken);
6b29d1f5 6715 PL_thistoken = newSVpvs("");
5db06880
NC
6716 }
6717#endif
f7461760 6718 op_free(rv2cv_op);
93a17b20 6719 force_next(WORD);
6154021b 6720 pl_yylval.ival = 0;
463ee0b2 6721 TOKEN('&');
79072805 6722 }
93a17b20 6723
a0d0e21e 6724 /* If followed by var or block, call it a method (unless sub) */
8990e307 6725
f7461760
Z
6726 if ((*s == '$' || *s == '{') && !cv) {
6727 op_free(rv2cv_op);
3280af22
NIS
6728 PL_last_lop = PL_oldbufptr;
6729 PL_last_lop_op = OP_METHOD;
78cdf107
Z
6730 if (!PL_lex_allbrackets &&
6731 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6732 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
93a17b20 6733 PREBLOCK(METHOD);
463ee0b2
LW
6734 }
6735
8990e307
LW
6736 /* If followed by a bareword, see if it looks like indir obj. */
6737
30fe34ed
RGS
6738 if (!orig_keyword
6739 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6740 && (tmp = intuit_method(s, gv, cv))) {
6741 op_free(rv2cv_op);
78cdf107
Z
6742 if (tmp == METHOD && !PL_lex_allbrackets &&
6743 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6744 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
bbf60fe6 6745 return REPORT(tmp);
f7461760 6746 }
93a17b20 6747
8990e307
LW
6748 /* Not a method, so call it a subroutine (if defined) */
6749
5069cc75 6750 if (cv) {
9b387841
NC
6751 if (lastchar == '-')
6752 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6753 "Ambiguous use of -%s resolved as -&%s()",
6754 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6755 /* Check for a constant sub */
f7461760 6756 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6757 its_constant:
f7461760 6758 op_free(rv2cv_op);
6154021b
RGS
6759 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6760 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6761 pl_yylval.opval->op_private = 0;
6b7c6d95 6762 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6763 TOKEN(WORD);
89bfa8cd 6764 }
6765
6154021b 6766 op_free(pl_yylval.opval);
f7461760 6767 pl_yylval.opval = rv2cv_op;
6154021b 6768 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6769 PL_last_lop = PL_oldbufptr;
bf848113 6770 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6771 /* Is there a prototype? */
5db06880
NC
6772 if (
6773#ifdef PERL_MAD
6774 cv &&
6775#endif
d9f2850e
RGS
6776 SvPOK(cv))
6777 {
5f66b61c 6778 STRLEN protolen;
daba3364 6779 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6780 if (!protolen)
4633a7c4 6781 TERM(FUNC0SUB);
0f5d0394
AE
6782 while (*proto == ';')
6783 proto++;
649d02de
FC
6784 if (
6785 (
6786 (
6787 *proto == '$' || *proto == '_'
c035a075 6788 || *proto == '*' || *proto == '+'
649d02de
FC
6789 )
6790 && proto[1] == '\0'
6791 )
6792 || (
6793 *proto == '\\' && proto[1] && proto[2] == '\0'
6794 )
6795 )
6796 OPERATOR(UNIOPSUB);
6797 if (*proto == '\\' && proto[1] == '[') {
6798 const char *p = proto + 2;
6799 while(*p && *p != ']')
6800 ++p;
6801 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6802 }
7a52d87a 6803 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6804 if (PL_curstash)
6805 sv_setpvs(PL_subname, "__ANON__");
6806 else
6807 sv_setpvs(PL_subname, "__ANON__::__ANON__");
78cdf107
Z
6808 if (!PL_lex_allbrackets &&
6809 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6810 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
4633a7c4
LW
6811 PREBLOCK(LSTOPSUB);
6812 }
a9ef352a 6813 }
5db06880
NC
6814#ifdef PERL_MAD
6815 {
6816 if (PL_madskills) {
cd81e915
NC
6817 PL_nextwhite = PL_thiswhite;
6818 PL_thiswhite = 0;
5db06880 6819 }
cd81e915 6820 start_force(PL_curforce);
6154021b 6821 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6822 PL_expect = XTERM;
6823 if (PL_madskills) {
cd81e915
NC
6824 PL_nextwhite = nextPL_nextwhite;
6825 curmad('X', PL_thistoken);
6b29d1f5 6826 PL_thistoken = newSVpvs("");
5db06880
NC
6827 }
6828 force_next(WORD);
78cdf107
Z
6829 if (!PL_lex_allbrackets &&
6830 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6831 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6832 TOKEN(NOAMP);
6833 }
6834 }
6835
6836 /* Guess harder when madskills require "best effort". */
6837 if (PL_madskills && (!gv || !GvCVu(gv))) {
6838 int probable_sub = 0;
6839 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6840 probable_sub = 1;
6841 else if (isALPHA(*s)) {
6842 char tmpbuf[1024];
6843 STRLEN tmplen;
6844 d = s;
6845 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6846 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6847 probable_sub = 1;
6848 else {
6849 while (d < PL_bufend && isSPACE(*d))
6850 d++;
6851 if (*d == '=' && d[1] == '>')
6852 probable_sub = 1;
6853 }
6854 }
6855 if (probable_sub) {
af9f5953
BF
6856 gv = gv_fetchpv(PL_tokenbuf, GV_ADD | ( UTF ? SVf_UTF8 : 0 ),
6857 SVt_PVCV);
6154021b 6858 op_free(pl_yylval.opval);
f7461760 6859 pl_yylval.opval = rv2cv_op;
6154021b 6860 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6861 PL_last_lop = PL_oldbufptr;
6862 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6863 PL_nextwhite = PL_thiswhite;
6864 PL_thiswhite = 0;
6865 start_force(PL_curforce);
6154021b 6866 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6867 PL_expect = XTERM;
cd81e915
NC
6868 PL_nextwhite = nextPL_nextwhite;
6869 curmad('X', PL_thistoken);
6b29d1f5 6870 PL_thistoken = newSVpvs("");
5db06880 6871 force_next(WORD);
78cdf107
Z
6872 if (!PL_lex_allbrackets &&
6873 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6874 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
5db06880
NC
6875 TOKEN(NOAMP);
6876 }
6877#else
6154021b 6878 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6879 PL_expect = XTERM;
8990e307 6880 force_next(WORD);
78cdf107
Z
6881 if (!PL_lex_allbrackets &&
6882 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
6883 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
8990e307 6884 TOKEN(NOAMP);
5db06880 6885#endif
8990e307 6886 }
748a9306 6887
8990e307
LW
6888 /* Call it a bare word */
6889
5603f27d 6890 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6891 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6892 else {
9a073a1d
RGS
6893 bareword:
6894 /* after "print" and similar functions (corresponding to
6895 * "F? L" in opcode.pl), whatever wasn't already parsed as
6896 * a filehandle should be subject to "strict subs".
6897 * Likewise for the optional indirect-object argument to system
6898 * or exec, which can't be a bareword */
6899 if ((PL_last_lop_op == OP_PRINT
6900 || PL_last_lop_op == OP_PRTF
6901 || PL_last_lop_op == OP_SAY
6902 || PL_last_lop_op == OP_SYSTEM
6903 || PL_last_lop_op == OP_EXEC)
6904 && (PL_hints & HINT_STRICT_SUBS))
6905 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6906 if (lastchar != '-') {
6907 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6908 d = PL_tokenbuf;
6909 while (isLOWER(*d))
6910 d++;
af9f5953 6911 if (!*d && !gv_stashpv(PL_tokenbuf, UTF ? SVf_UTF8 : 0))
9014280d 6912 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6913 PL_tokenbuf);
6914 }
748a9306
LW
6915 }
6916 }
f7461760 6917 op_free(rv2cv_op);
c3e0f903
GS
6918
6919 safe_bareword:
9b387841
NC
6920 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6921 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6922 "Operator or semicolon missing before %c%s",
6923 lastchar, PL_tokenbuf);
6924 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6925 "Ambiguous use of %c resolved as operator %c",
6926 lastchar, lastchar);
748a9306 6927 }
93a17b20 6928 TOKEN(WORD);
79072805 6929 }
79072805 6930
68dc0745 6931 case KEY___FILE__:
7eb971ee 6932 FUN0OP(
14f0f125 6933 (OP*)newSVOP(OP_CONST, 0, newSVpv(CopFILE(PL_curcop),0))
7eb971ee 6934 );
46fc3d4c 6935
79072805 6936 case KEY___LINE__:
7eb971ee
FC
6937 FUN0OP(
6938 (OP*)newSVOP(OP_CONST, 0,
6939 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)))
6940 );
68dc0745 6941
6942 case KEY___PACKAGE__:
7eb971ee
FC
6943 FUN0OP(
6944 (OP*)newSVOP(OP_CONST, 0,
3280af22 6945 (PL_curstash
5aaec2b4 6946 ? newSVhek(HvNAME_HEK(PL_curstash))
7eb971ee
FC
6947 : &PL_sv_undef))
6948 );
79072805 6949
e50aee73 6950 case KEY___DATA__:
79072805
LW
6951 case KEY___END__: {
6952 GV *gv;
3280af22 6953 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6954 const char *pname = "main";
affc13fc
FC
6955 STRLEN plen = 4;
6956 U32 putf8 = 0;
3280af22 6957 if (PL_tokenbuf[2] == 'D')
affc13fc
FC
6958 {
6959 HV * const stash =
6960 PL_curstash ? PL_curstash : PL_defstash;
6961 pname = HvNAME_get(stash);
6962 plen = HvNAMELEN (stash);
6963 if(HvNAMEUTF8(stash)) putf8 = SVf_UTF8;
6964 }
6965 gv = gv_fetchpvn_flags(
6966 Perl_form(aTHX_ "%*s::DATA", (int)plen, pname),
6967 plen+6, GV_ADD|putf8, SVt_PVIO
6968 );
a5f75d66 6969 GvMULTI_on(gv);
79072805 6970 if (!GvIO(gv))
a0d0e21e 6971 GvIOp(gv) = newIO();
3280af22 6972 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6973#if defined(HAS_FCNTL) && defined(F_SETFD)
6974 {
f54cb97a 6975 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6976 fcntl(fd,F_SETFD,fd >= 3);
6977 }
79072805 6978#endif
fd049845 6979 /* Mark this internal pseudo-handle as clean */
6980 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6981 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6982 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6983 else
50952442 6984 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6985#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6986 /* if the script was opened in binmode, we need to revert
53129d29 6987 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6988 * XXX this is a questionable hack at best. */
53129d29
GS
6989 if (PL_bufend-PL_bufptr > 2
6990 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6991 {
6992 Off_t loc = 0;
50952442 6993 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6994 loc = PerlIO_tell(PL_rsfp);
6995 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6996 }
2986a63f
JH
6997#ifdef NETWARE
6998 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6999#else
c39cd008 7000 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 7001#endif /* NETWARE */
c39cd008
GS
7002 if (loc > 0)
7003 PerlIO_seek(PL_rsfp, loc, 0);
7004 }
7005 }
7006#endif
7948272d 7007#ifdef PERLIO_LAYERS
52d2e0f4
JH
7008 if (!IN_BYTES) {
7009 if (UTF)
7010 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
7011 else if (PL_encoding) {
7012 SV *name;
7013 dSP;
7014 ENTER;
7015 SAVETMPS;
7016 PUSHMARK(sp);
7017 EXTEND(SP, 1);
7018 XPUSHs(PL_encoding);
7019 PUTBACK;
7020 call_method("name", G_SCALAR);
7021 SPAGAIN;
7022 name = POPs;
7023 PUTBACK;
bfed75c6 7024 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 7025 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 7026 SVfARG(name)));
52d2e0f4
JH
7027 FREETMPS;
7028 LEAVE;
7029 }
7030 }
7948272d 7031#endif
5db06880
NC
7032#ifdef PERL_MAD
7033 if (PL_madskills) {
cd81e915
NC
7034 if (PL_realtokenstart >= 0) {
7035 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
7036 if (!PL_endwhite)
6b29d1f5 7037 PL_endwhite = newSVpvs("");
cd81e915
NC
7038 sv_catsv(PL_endwhite, PL_thiswhite);
7039 PL_thiswhite = 0;
7040 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
7041 PL_realtokenstart = -1;
5db06880 7042 }
5cc814fd
NC
7043 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
7044 != NULL) ;
5db06880
NC
7045 }
7046#endif
4608196e 7047 PL_rsfp = NULL;
79072805
LW
7048 }
7049 goto fake_eof;
e929a76b 7050 }
de3bb511 7051
8990e307 7052 case KEY_AUTOLOAD:
ed6116ce 7053 case KEY_DESTROY:
79072805 7054 case KEY_BEGIN:
3c10abe3 7055 case KEY_UNITCHECK:
7d30b5c4 7056 case KEY_CHECK:
7d07dbc2 7057 case KEY_INIT:
7d30b5c4 7058 case KEY_END:
3280af22
NIS
7059 if (PL_expect == XSTATE) {
7060 s = PL_bufptr;
93a17b20 7061 goto really_sub;
79072805
LW
7062 }
7063 goto just_a_word;
7064
a0d0e21e
LW
7065 case KEY_CORE:
7066 if (*s == ':' && s[1] == ':') {
7067 s += 2;
748a9306 7068 d = s;
3280af22 7069 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
9dcb8368 7070 if (!(tmp = keyword(PL_tokenbuf, len, 1)))
6798c92b 7071 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
7072 if (tmp < 0)
7073 tmp = -tmp;
a5c70c4d 7074 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 7075 /* that's a way to remember we saw "CORE::" */
850e8516 7076 orig_keyword = tmp;
a0d0e21e
LW
7077 goto reserved_word;
7078 }
7079 goto just_a_word;
7080
463ee0b2
LW
7081 case KEY_abs:
7082 UNI(OP_ABS);
7083
79072805
LW
7084 case KEY_alarm:
7085 UNI(OP_ALARM);
7086
7087 case KEY_accept:
a0d0e21e 7088 LOP(OP_ACCEPT,XTERM);
79072805 7089
463ee0b2 7090 case KEY_and:
78cdf107
Z
7091 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7092 return REPORT(0);
463ee0b2
LW
7093 OPERATOR(ANDOP);
7094
79072805 7095 case KEY_atan2:
a0d0e21e 7096 LOP(OP_ATAN2,XTERM);
85e6fe83 7097
79072805 7098 case KEY_bind:
a0d0e21e 7099 LOP(OP_BIND,XTERM);
79072805
LW
7100
7101 case KEY_binmode:
1c1fc3ea 7102 LOP(OP_BINMODE,XTERM);
79072805
LW
7103
7104 case KEY_bless:
a0d0e21e 7105 LOP(OP_BLESS,XTERM);
79072805 7106
0d863452
RH
7107 case KEY_break:
7108 FUN0(OP_BREAK);
7109
79072805
LW
7110 case KEY_chop:
7111 UNI(OP_CHOP);
7112
7113 case KEY_continue:
0d863452
RH
7114 /* We have to disambiguate the two senses of
7115 "continue". If the next token is a '{' then
7116 treat it as the start of a continue block;
7117 otherwise treat it as a control operator.
7118 */
7119 s = skipspace(s);
7120 if (*s == '{')
79072805 7121 PREBLOCK(CONTINUE);
0d863452
RH
7122 else
7123 FUN0(OP_CONTINUE);
79072805
LW
7124
7125 case KEY_chdir:
fafc274c
NC
7126 /* may use HOME */
7127 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
7128 UNI(OP_CHDIR);
7129
7130 case KEY_close:
7131 UNI(OP_CLOSE);
7132
7133 case KEY_closedir:
7134 UNI(OP_CLOSEDIR);
7135
7136 case KEY_cmp:
78cdf107
Z
7137 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7138 return REPORT(0);
79072805
LW
7139 Eop(OP_SCMP);
7140
7141 case KEY_caller:
7142 UNI(OP_CALLER);
7143
7144 case KEY_crypt:
7145#ifdef FCRYPT
f4c556ac
GS
7146 if (!PL_cryptseen) {
7147 PL_cryptseen = TRUE;
de3bb511 7148 init_des();
f4c556ac 7149 }
a687059c 7150#endif
a0d0e21e 7151 LOP(OP_CRYPT,XTERM);
79072805
LW
7152
7153 case KEY_chmod:
a0d0e21e 7154 LOP(OP_CHMOD,XTERM);
79072805
LW
7155
7156 case KEY_chown:
a0d0e21e 7157 LOP(OP_CHOWN,XTERM);
79072805
LW
7158
7159 case KEY_connect:
a0d0e21e 7160 LOP(OP_CONNECT,XTERM);
79072805 7161
463ee0b2
LW
7162 case KEY_chr:
7163 UNI(OP_CHR);
7164
79072805
LW
7165 case KEY_cos:
7166 UNI(OP_COS);
7167
7168 case KEY_chroot:
7169 UNI(OP_CHROOT);
7170
0d863452
RH
7171 case KEY_default:
7172 PREBLOCK(DEFAULT);
7173
79072805 7174 case KEY_do:
29595ff2 7175 s = SKIPSPACE1(s);
79072805 7176 if (*s == '{')
a0d0e21e 7177 PRETERMBLOCK(DO);
79072805 7178 if (*s != '\'')
89c5585f 7179 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
7180 if (orig_keyword == KEY_do) {
7181 orig_keyword = 0;
6154021b 7182 pl_yylval.ival = 1;
850e8516
RGS
7183 }
7184 else
6154021b 7185 pl_yylval.ival = 0;
378cc40b 7186 OPERATOR(DO);
79072805
LW
7187
7188 case KEY_die:
3280af22 7189 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7190 LOP(OP_DIE,XTERM);
79072805
LW
7191
7192 case KEY_defined:
7193 UNI(OP_DEFINED);
7194
7195 case KEY_delete:
a0d0e21e 7196 UNI(OP_DELETE);
79072805
LW
7197
7198 case KEY_dbmopen:
74e8ce34
NC
7199 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
7200 STR_WITH_LEN("NDBM_File::"),
7201 STR_WITH_LEN("DB_File::"),
7202 STR_WITH_LEN("GDBM_File::"),
7203 STR_WITH_LEN("SDBM_File::"),
7204 STR_WITH_LEN("ODBM_File::"),
7205 NULL);
a0d0e21e 7206 LOP(OP_DBMOPEN,XTERM);
79072805
LW
7207
7208 case KEY_dbmclose:
7209 UNI(OP_DBMCLOSE);
7210
7211 case KEY_dump:
a0d0e21e 7212 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7213 LOOPX(OP_DUMP);
7214
7215 case KEY_else:
7216 PREBLOCK(ELSE);
7217
7218 case KEY_elsif:
6154021b 7219 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7220 OPERATOR(ELSIF);
7221
7222 case KEY_eq:
78cdf107
Z
7223 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7224 return REPORT(0);
79072805
LW
7225 Eop(OP_SEQ);
7226
a0d0e21e
LW
7227 case KEY_exists:
7228 UNI(OP_EXISTS);
4e553d73 7229
79072805 7230 case KEY_exit:
5db06880
NC
7231 if (PL_madskills)
7232 UNI(OP_INT);
79072805
LW
7233 UNI(OP_EXIT);
7234
7235 case KEY_eval:
29595ff2 7236 s = SKIPSPACE1(s);
32e2a35d
RGS
7237 if (*s == '{') { /* block eval */
7238 PL_expect = XTERMBLOCK;
7239 UNIBRACK(OP_ENTERTRY);
7240 }
7241 else { /* string eval */
7242 PL_expect = XTERM;
7243 UNIBRACK(OP_ENTEREVAL);
7244 }
79072805
LW
7245
7246 case KEY_eof:
7247 UNI(OP_EOF);
7248
7249 case KEY_exp:
7250 UNI(OP_EXP);
7251
7252 case KEY_each:
7253 UNI(OP_EACH);
7254
7255 case KEY_exec:
a0d0e21e 7256 LOP(OP_EXEC,XREF);
79072805
LW
7257
7258 case KEY_endhostent:
7259 FUN0(OP_EHOSTENT);
7260
7261 case KEY_endnetent:
7262 FUN0(OP_ENETENT);
7263
7264 case KEY_endservent:
7265 FUN0(OP_ESERVENT);
7266
7267 case KEY_endprotoent:
7268 FUN0(OP_EPROTOENT);
7269
7270 case KEY_endpwent:
7271 FUN0(OP_EPWENT);
7272
7273 case KEY_endgrent:
7274 FUN0(OP_EGRENT);
7275
7276 case KEY_for:
7277 case KEY_foreach:
78cdf107
Z
7278 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7279 return REPORT(0);
6154021b 7280 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7281 s = SKIPSPACE1(s);
7e2040f0 7282 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7283 char *p = s;
5db06880
NC
7284#ifdef PERL_MAD
7285 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7286#endif
7287
3280af22 7288 if ((PL_bufend - p) >= 3 &&
55497cff 7289 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7290 p += 2;
77ca0c92
LW
7291 else if ((PL_bufend - p) >= 4 &&
7292 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7293 p += 3;
29595ff2 7294 p = PEEKSPACE(p);
7e2040f0 7295 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7296 p = scan_ident(p, PL_bufend,
7297 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7298 p = PEEKSPACE(p);
77ca0c92
LW
7299 }
7300 if (*p != '$')
cea2e8a9 7301 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7302#ifdef PERL_MAD
7303 s = SvPVX(PL_linestr) + soff;
7304#endif
55497cff 7305 }
79072805
LW
7306 OPERATOR(FOR);
7307
7308 case KEY_formline:
a0d0e21e 7309 LOP(OP_FORMLINE,XTERM);
79072805
LW
7310
7311 case KEY_fork:
7312 FUN0(OP_FORK);
7313
7314 case KEY_fcntl:
a0d0e21e 7315 LOP(OP_FCNTL,XTERM);
79072805
LW
7316
7317 case KEY_fileno:
7318 UNI(OP_FILENO);
7319
7320 case KEY_flock:
a0d0e21e 7321 LOP(OP_FLOCK,XTERM);
79072805
LW
7322
7323 case KEY_gt:
78cdf107
Z
7324 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7325 return REPORT(0);
79072805
LW
7326 Rop(OP_SGT);
7327
7328 case KEY_ge:
78cdf107
Z
7329 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7330 return REPORT(0);
79072805
LW
7331 Rop(OP_SGE);
7332
7333 case KEY_grep:
2c38e13d 7334 LOP(OP_GREPSTART, XREF);
79072805
LW
7335
7336 case KEY_goto:
a0d0e21e 7337 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7338 LOOPX(OP_GOTO);
7339
7340 case KEY_gmtime:
7341 UNI(OP_GMTIME);
7342
7343 case KEY_getc:
6f33ba73 7344 UNIDOR(OP_GETC);
79072805
LW
7345
7346 case KEY_getppid:
7347 FUN0(OP_GETPPID);
7348
7349 case KEY_getpgrp:
7350 UNI(OP_GETPGRP);
7351
7352 case KEY_getpriority:
a0d0e21e 7353 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7354
7355 case KEY_getprotobyname:
7356 UNI(OP_GPBYNAME);
7357
7358 case KEY_getprotobynumber:
a0d0e21e 7359 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7360
7361 case KEY_getprotoent:
7362 FUN0(OP_GPROTOENT);
7363
7364 case KEY_getpwent:
7365 FUN0(OP_GPWENT);
7366
7367 case KEY_getpwnam:
ff68c719 7368 UNI(OP_GPWNAM);
79072805
LW
7369
7370 case KEY_getpwuid:
ff68c719 7371 UNI(OP_GPWUID);
79072805
LW
7372
7373 case KEY_getpeername:
7374 UNI(OP_GETPEERNAME);
7375
7376 case KEY_gethostbyname:
7377 UNI(OP_GHBYNAME);
7378
7379 case KEY_gethostbyaddr:
a0d0e21e 7380 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7381
7382 case KEY_gethostent:
7383 FUN0(OP_GHOSTENT);
7384
7385 case KEY_getnetbyname:
7386 UNI(OP_GNBYNAME);
7387
7388 case KEY_getnetbyaddr:
a0d0e21e 7389 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7390
7391 case KEY_getnetent:
7392 FUN0(OP_GNETENT);
7393
7394 case KEY_getservbyname:
a0d0e21e 7395 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7396
7397 case KEY_getservbyport:
a0d0e21e 7398 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7399
7400 case KEY_getservent:
7401 FUN0(OP_GSERVENT);
7402
7403 case KEY_getsockname:
7404 UNI(OP_GETSOCKNAME);
7405
7406 case KEY_getsockopt:
a0d0e21e 7407 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7408
7409 case KEY_getgrent:
7410 FUN0(OP_GGRENT);
7411
7412 case KEY_getgrnam:
ff68c719 7413 UNI(OP_GGRNAM);
79072805
LW
7414
7415 case KEY_getgrgid:
ff68c719 7416 UNI(OP_GGRGID);
79072805
LW
7417
7418 case KEY_getlogin:
7419 FUN0(OP_GETLOGIN);
7420
0d863452 7421 case KEY_given:
6154021b 7422 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7423 OPERATOR(GIVEN);
7424
93a17b20 7425 case KEY_glob:
a0d0e21e 7426 LOP(OP_GLOB,XTERM);
93a17b20 7427
79072805
LW
7428 case KEY_hex:
7429 UNI(OP_HEX);
7430
7431 case KEY_if:
78cdf107
Z
7432 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
7433 return REPORT(0);
6154021b 7434 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7435 OPERATOR(IF);
7436
7437 case KEY_index:
a0d0e21e 7438 LOP(OP_INDEX,XTERM);
79072805
LW
7439
7440 case KEY_int:
7441 UNI(OP_INT);
7442
7443 case KEY_ioctl:
a0d0e21e 7444 LOP(OP_IOCTL,XTERM);
79072805
LW
7445
7446 case KEY_join:
a0d0e21e 7447 LOP(OP_JOIN,XTERM);
79072805
LW
7448
7449 case KEY_keys:
7450 UNI(OP_KEYS);
7451
7452 case KEY_kill:
a0d0e21e 7453 LOP(OP_KILL,XTERM);
79072805
LW
7454
7455 case KEY_last:
a0d0e21e 7456 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7457 LOOPX(OP_LAST);
4e553d73 7458
79072805
LW
7459 case KEY_lc:
7460 UNI(OP_LC);
7461
7462 case KEY_lcfirst:
7463 UNI(OP_LCFIRST);
7464
7465 case KEY_local:
6154021b 7466 pl_yylval.ival = 0;
79072805
LW
7467 OPERATOR(LOCAL);
7468
7469 case KEY_length:
7470 UNI(OP_LENGTH);
7471
7472 case KEY_lt:
78cdf107
Z
7473 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7474 return REPORT(0);
79072805
LW
7475 Rop(OP_SLT);
7476
7477 case KEY_le:
78cdf107
Z
7478 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7479 return REPORT(0);
79072805
LW
7480 Rop(OP_SLE);
7481
7482 case KEY_localtime:
7483 UNI(OP_LOCALTIME);
7484
7485 case KEY_log:
7486 UNI(OP_LOG);
7487
7488 case KEY_link:
a0d0e21e 7489 LOP(OP_LINK,XTERM);
79072805
LW
7490
7491 case KEY_listen:
a0d0e21e 7492 LOP(OP_LISTEN,XTERM);
79072805 7493
c0329465
MB
7494 case KEY_lock:
7495 UNI(OP_LOCK);
7496
79072805
LW
7497 case KEY_lstat:
7498 UNI(OP_LSTAT);
7499
7500 case KEY_m:
8782bef2 7501 s = scan_pat(s,OP_MATCH);
79072805
LW
7502 TERM(sublex_start());
7503
a0d0e21e 7504 case KEY_map:
2c38e13d 7505 LOP(OP_MAPSTART, XREF);
4e4e412b 7506
79072805 7507 case KEY_mkdir:
a0d0e21e 7508 LOP(OP_MKDIR,XTERM);
79072805
LW
7509
7510 case KEY_msgctl:
a0d0e21e 7511 LOP(OP_MSGCTL,XTERM);
79072805
LW
7512
7513 case KEY_msgget:
a0d0e21e 7514 LOP(OP_MSGGET,XTERM);
79072805
LW
7515
7516 case KEY_msgrcv:
a0d0e21e 7517 LOP(OP_MSGRCV,XTERM);
79072805
LW
7518
7519 case KEY_msgsnd:
a0d0e21e 7520 LOP(OP_MSGSND,XTERM);
79072805 7521
77ca0c92 7522 case KEY_our:
93a17b20 7523 case KEY_my:
952306ac 7524 case KEY_state:
eac04b2e 7525 PL_in_my = (U16)tmp;
29595ff2 7526 s = SKIPSPACE1(s);
7e2040f0 7527 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7528#ifdef PERL_MAD
7529 char* start = s;
7530#endif
3280af22 7531 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7532 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7533 goto really_sub;
def3634b 7534 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7535 if (!PL_in_my_stash) {
c750a3ec 7536 char tmpbuf[1024];
3280af22 7537 PL_bufptr = s;
d9fad198 7538 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7539 yyerror(tmpbuf);
7540 }
5db06880
NC
7541#ifdef PERL_MAD
7542 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7543 sv_catsv(PL_thistoken, PL_nextwhite);
7544 PL_nextwhite = 0;
7545 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7546 }
7547#endif
c750a3ec 7548 }
6154021b 7549 pl_yylval.ival = 1;
55497cff 7550 OPERATOR(MY);
93a17b20 7551
79072805 7552 case KEY_next:
a0d0e21e 7553 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7554 LOOPX(OP_NEXT);
7555
7556 case KEY_ne:
78cdf107
Z
7557 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_COMPARE)
7558 return REPORT(0);
79072805
LW
7559 Eop(OP_SNE);
7560
a0d0e21e 7561 case KEY_no:
468aa647 7562 s = tokenize_use(0, s);
a0d0e21e
LW
7563 OPERATOR(USE);
7564
7565 case KEY_not:
29595ff2 7566 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d 7567 FUN1(OP_NOT);
78cdf107
Z
7568 else {
7569 if (!PL_lex_allbrackets &&
7570 PL_lex_fakeeof > LEX_FAKEEOF_LOWLOGIC)
7571 PL_lex_fakeeof = LEX_FAKEEOF_LOWLOGIC;
2d2e263d 7572 OPERATOR(NOTOP);
78cdf107 7573 }
a0d0e21e 7574
79072805 7575 case KEY_open:
29595ff2 7576 s = SKIPSPACE1(s);
7e2040f0 7577 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7578 const char *t;
c35e046a
AL
7579 for (d = s; isALNUM_lazy_if(d,UTF);)
7580 d++;
7581 for (t=d; isSPACE(*t);)
7582 t++;
e2ab214b 7583 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7584 /* [perl #16184] */
7585 && !(t[0] == '=' && t[1] == '>')
7586 ) {
5f66b61c 7587 int parms_len = (int)(d-s);
9014280d 7588 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7589 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7590 parms_len, s, parms_len, s);
66fbe8fb 7591 }
93a17b20 7592 }
a0d0e21e 7593 LOP(OP_OPEN,XTERM);
79072805 7594
463ee0b2 7595 case KEY_or:
78cdf107
Z
7596 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
7597 return REPORT(0);
6154021b 7598 pl_yylval.ival = OP_OR;
463ee0b2
LW
7599 OPERATOR(OROP);
7600
79072805
LW
7601 case KEY_ord:
7602 UNI(OP_ORD);
7603
7604 case KEY_oct:
7605 UNI(OP_OCT);
7606
7607 case KEY_opendir:
a0d0e21e 7608 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7609
7610 case KEY_print:
3280af22 7611 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7612 LOP(OP_PRINT,XREF);
79072805
LW
7613
7614 case KEY_printf:
3280af22 7615 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7616 LOP(OP_PRTF,XREF);
79072805 7617
c07a80fd 7618 case KEY_prototype:
7619 UNI(OP_PROTOTYPE);
7620
79072805 7621 case KEY_push:
a0d0e21e 7622 LOP(OP_PUSH,XTERM);
79072805
LW
7623
7624 case KEY_pop:
6f33ba73 7625 UNIDOR(OP_POP);
79072805 7626
a0d0e21e 7627 case KEY_pos:
6f33ba73 7628 UNIDOR(OP_POS);
4e553d73 7629
79072805 7630 case KEY_pack:
a0d0e21e 7631 LOP(OP_PACK,XTERM);
79072805
LW
7632
7633 case KEY_package:
a0d0e21e 7634 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7635 s = SKIPSPACE1(s);
91152fc1 7636 s = force_strict_version(s);
4e4da3ac 7637 PL_lex_expect = XBLOCK;
79072805
LW
7638 OPERATOR(PACKAGE);
7639
7640 case KEY_pipe:
a0d0e21e 7641 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7642
7643 case KEY_q:
5db06880 7644 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7645 if (!s)
d4c19fe8 7646 missingterm(NULL);
6154021b 7647 pl_yylval.ival = OP_CONST;
79072805
LW
7648 TERM(sublex_start());
7649
a0d0e21e
LW
7650 case KEY_quotemeta:
7651 UNI(OP_QUOTEMETA);
7652
ea25a9b2
Z
7653 case KEY_qw: {
7654 OP *words = NULL;
5db06880 7655 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7656 if (!s)
d4c19fe8 7657 missingterm(NULL);
3480a8d2 7658 PL_expect = XOPERATOR;
8127e0e3 7659 if (SvCUR(PL_lex_stuff)) {
7e03b518
EB
7660 int warned_comma = !ckWARN(WARN_QW);
7661 int warned_comment = warned_comma;
3280af22 7662 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7663 while (len) {
d4c19fe8
AL
7664 for (; isSPACE(*d) && len; --len, ++d)
7665 /**/;
8127e0e3 7666 if (len) {
d4c19fe8 7667 SV *sv;
f54cb97a 7668 const char *b = d;
7e03b518 7669 if (!warned_comma || !warned_comment) {
8127e0e3 7670 for (; !isSPACE(*d) && len; --len, ++d) {
7e03b518 7671 if (!warned_comma && *d == ',') {
9014280d 7672 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7673 "Possible attempt to separate words with commas");
7e03b518 7674 ++warned_comma;
8127e0e3 7675 }
7e03b518 7676 else if (!warned_comment && *d == '#') {
9014280d 7677 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3 7678 "Possible attempt to put comments in qw() list");
7e03b518 7679 ++warned_comment;
8127e0e3
GS
7680 }
7681 }
7682 }
7683 else {
d4c19fe8
AL
7684 for (; !isSPACE(*d) && len; --len, ++d)
7685 /**/;
8127e0e3 7686 }
740cce10 7687 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7688 words = op_append_elem(OP_LIST, words,
7948272d 7689 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7690 }
7691 }
7692 }
ea25a9b2
Z
7693 if (!words)
7694 words = newNULLLIST();
37fd879b 7695 if (PL_lex_stuff) {
8127e0e3 7696 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7697 PL_lex_stuff = NULL;
37fd879b 7698 }
ea25a9b2
Z
7699 PL_expect = XOPERATOR;
7700 pl_yylval.opval = sawparens(words);
7701 TOKEN(QWLIST);
7702 }
8990e307 7703
79072805 7704 case KEY_qq:
5db06880 7705 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7706 if (!s)
d4c19fe8 7707 missingterm(NULL);
6154021b 7708 pl_yylval.ival = OP_STRINGIFY;
3280af22 7709 if (SvIVX(PL_lex_stuff) == '\'')
486ec47a 7710 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should interpolate */
79072805
LW
7711 TERM(sublex_start());
7712
8782bef2
GB
7713 case KEY_qr:
7714 s = scan_pat(s,OP_QR);
7715 TERM(sublex_start());
7716
79072805 7717 case KEY_qx:
5db06880 7718 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7719 if (!s)
d4c19fe8 7720 missingterm(NULL);
9b201d7d 7721 readpipe_override();
79072805
LW
7722 TERM(sublex_start());
7723
7724 case KEY_return:
7725 OLDLOP(OP_RETURN);
7726
7727 case KEY_require:
29595ff2 7728 s = SKIPSPACE1(s);
e759cc13
RGS
7729 if (isDIGIT(*s)) {
7730 s = force_version(s, FALSE);
a7cb1f99 7731 }
e759cc13
RGS
7732 else if (*s != 'v' || !isDIGIT(s[1])
7733 || (s = force_version(s, TRUE), *s == 'v'))
7734 {
a7cb1f99
GS
7735 *PL_tokenbuf = '\0';
7736 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7737 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
af9f5953
BF
7738 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf),
7739 GV_ADD | (UTF ? SVf_UTF8 : 0));
a7cb1f99
GS
7740 else if (*s == '<')
7741 yyerror("<> should be quotes");
7742 }
a72a1c8b
RGS
7743 if (orig_keyword == KEY_require) {
7744 orig_keyword = 0;
6154021b 7745 pl_yylval.ival = 1;
a72a1c8b
RGS
7746 }
7747 else
6154021b 7748 pl_yylval.ival = 0;
a72a1c8b
RGS
7749 PL_expect = XTERM;
7750 PL_bufptr = s;
7751 PL_last_uni = PL_oldbufptr;
7752 PL_last_lop_op = OP_REQUIRE;
7753 s = skipspace(s);
7754 return REPORT( (int)REQUIRE );
79072805
LW
7755
7756 case KEY_reset:
7757 UNI(OP_RESET);
7758
7759 case KEY_redo:
a0d0e21e 7760 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7761 LOOPX(OP_REDO);
7762
7763 case KEY_rename:
a0d0e21e 7764 LOP(OP_RENAME,XTERM);
79072805
LW
7765
7766 case KEY_rand:
7767 UNI(OP_RAND);
7768
7769 case KEY_rmdir:
7770 UNI(OP_RMDIR);
7771
7772 case KEY_rindex:
a0d0e21e 7773 LOP(OP_RINDEX,XTERM);
79072805
LW
7774
7775 case KEY_read:
a0d0e21e 7776 LOP(OP_READ,XTERM);
79072805
LW
7777
7778 case KEY_readdir:
7779 UNI(OP_READDIR);
7780
93a17b20 7781 case KEY_readline:
6f33ba73 7782 UNIDOR(OP_READLINE);
93a17b20
LW
7783
7784 case KEY_readpipe:
0858480c 7785 UNIDOR(OP_BACKTICK);
93a17b20 7786
79072805
LW
7787 case KEY_rewinddir:
7788 UNI(OP_REWINDDIR);
7789
7790 case KEY_recv:
a0d0e21e 7791 LOP(OP_RECV,XTERM);
79072805
LW
7792
7793 case KEY_reverse:
a0d0e21e 7794 LOP(OP_REVERSE,XTERM);
79072805
LW
7795
7796 case KEY_readlink:
6f33ba73 7797 UNIDOR(OP_READLINK);
79072805
LW
7798
7799 case KEY_ref:
7800 UNI(OP_REF);
7801
7802 case KEY_s:
7803 s = scan_subst(s);
6154021b 7804 if (pl_yylval.opval)
79072805
LW
7805 TERM(sublex_start());
7806 else
7807 TOKEN(1); /* force error */
7808
0d863452
RH
7809 case KEY_say:
7810 checkcomma(s,PL_tokenbuf,"filehandle");
7811 LOP(OP_SAY,XREF);
7812
a0d0e21e
LW
7813 case KEY_chomp:
7814 UNI(OP_CHOMP);
4e553d73 7815
79072805
LW
7816 case KEY_scalar:
7817 UNI(OP_SCALAR);
7818
7819 case KEY_select:
a0d0e21e 7820 LOP(OP_SELECT,XTERM);
79072805
LW
7821
7822 case KEY_seek:
a0d0e21e 7823 LOP(OP_SEEK,XTERM);
79072805
LW
7824
7825 case KEY_semctl:
a0d0e21e 7826 LOP(OP_SEMCTL,XTERM);
79072805
LW
7827
7828 case KEY_semget:
a0d0e21e 7829 LOP(OP_SEMGET,XTERM);
79072805
LW
7830
7831 case KEY_semop:
a0d0e21e 7832 LOP(OP_SEMOP,XTERM);
79072805
LW
7833
7834 case KEY_send:
a0d0e21e 7835 LOP(OP_SEND,XTERM);
79072805
LW
7836
7837 case KEY_setpgrp:
a0d0e21e 7838 LOP(OP_SETPGRP,XTERM);
79072805
LW
7839
7840 case KEY_setpriority:
a0d0e21e 7841 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7842
7843 case KEY_sethostent:
ff68c719 7844 UNI(OP_SHOSTENT);
79072805
LW
7845
7846 case KEY_setnetent:
ff68c719 7847 UNI(OP_SNETENT);
79072805
LW
7848
7849 case KEY_setservent:
ff68c719 7850 UNI(OP_SSERVENT);
79072805
LW
7851
7852 case KEY_setprotoent:
ff68c719 7853 UNI(OP_SPROTOENT);
79072805
LW
7854
7855 case KEY_setpwent:
7856 FUN0(OP_SPWENT);
7857
7858 case KEY_setgrent:
7859 FUN0(OP_SGRENT);
7860
7861 case KEY_seekdir:
a0d0e21e 7862 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7863
7864 case KEY_setsockopt:
a0d0e21e 7865 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7866
7867 case KEY_shift:
6f33ba73 7868 UNIDOR(OP_SHIFT);
79072805
LW
7869
7870 case KEY_shmctl:
a0d0e21e 7871 LOP(OP_SHMCTL,XTERM);
79072805
LW
7872
7873 case KEY_shmget:
a0d0e21e 7874 LOP(OP_SHMGET,XTERM);
79072805
LW
7875
7876 case KEY_shmread:
a0d0e21e 7877 LOP(OP_SHMREAD,XTERM);
79072805
LW
7878
7879 case KEY_shmwrite:
a0d0e21e 7880 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7881
7882 case KEY_shutdown:
a0d0e21e 7883 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7884
7885 case KEY_sin:
7886 UNI(OP_SIN);
7887
7888 case KEY_sleep:
7889 UNI(OP_SLEEP);
7890
7891 case KEY_socket:
a0d0e21e 7892 LOP(OP_SOCKET,XTERM);
79072805
LW
7893
7894 case KEY_socketpair:
a0d0e21e 7895 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7896
7897 case KEY_sort:
3280af22 7898 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7899 s = SKIPSPACE1(s);
79072805 7900 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7901 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7902 PL_expect = XTERM;
15f0808c 7903 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7904 LOP(OP_SORT,XREF);
79072805
LW
7905
7906 case KEY_split:
a0d0e21e 7907 LOP(OP_SPLIT,XTERM);
79072805
LW
7908
7909 case KEY_sprintf:
a0d0e21e 7910 LOP(OP_SPRINTF,XTERM);
79072805
LW
7911
7912 case KEY_splice:
a0d0e21e 7913 LOP(OP_SPLICE,XTERM);
79072805
LW
7914
7915 case KEY_sqrt:
7916 UNI(OP_SQRT);
7917
7918 case KEY_srand:
7919 UNI(OP_SRAND);
7920
7921 case KEY_stat:
7922 UNI(OP_STAT);
7923
7924 case KEY_study:
79072805
LW
7925 UNI(OP_STUDY);
7926
7927 case KEY_substr:
a0d0e21e 7928 LOP(OP_SUBSTR,XTERM);
79072805
LW
7929
7930 case KEY_format:
7931 case KEY_sub:
93a17b20 7932 really_sub:
09bef843 7933 {
3280af22 7934 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7935 SSize_t tboffset = 0;
09bef843 7936 expectation attrful;
28cc6278 7937 bool have_name, have_proto;
f54cb97a 7938 const int key = tmp;
09bef843 7939
5db06880
NC
7940#ifdef PERL_MAD
7941 SV *tmpwhite = 0;
7942
cd81e915 7943 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
af9f5953 7944 SV *subtoken = newSVpvn_flags(tstart, s - tstart, SvUTF8(PL_linestr));
cd81e915 7945 PL_thistoken = 0;
5db06880
NC
7946
7947 d = s;
7948 s = SKIPSPACE2(s,tmpwhite);
7949#else
09bef843 7950 s = skipspace(s);
5db06880 7951#endif
09bef843 7952
7e2040f0 7953 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7954 (*s == ':' && s[1] == ':'))
7955 {
5db06880 7956#ifdef PERL_MAD
4f61fd4b 7957 SV *nametoke = NULL;
5db06880
NC
7958#endif
7959
09bef843
SB
7960 PL_expect = XBLOCK;
7961 attrful = XATTRBLOCK;
b1b65b59
JH
7962 /* remember buffer pos'n for later force_word */
7963 tboffset = s - PL_oldbufptr;
09bef843 7964 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7965#ifdef PERL_MAD
7966 if (PL_madskills)
af9f5953 7967 nametoke = newSVpvn_flags(s, d - s, SvUTF8(PL_linestr));
5db06880 7968#endif
6502358f
NC
7969 if (memchr(tmpbuf, ':', len))
7970 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7971 else {
7972 sv_setsv(PL_subname,PL_curstname);
396482e1 7973 sv_catpvs(PL_subname,"::");
09bef843
SB
7974 sv_catpvn(PL_subname,tmpbuf,len);
7975 }
af9f5953
BF
7976 if (SvUTF8(PL_linestr))
7977 SvUTF8_on(PL_subname);
09bef843 7978 have_name = TRUE;
5db06880
NC
7979
7980#ifdef PERL_MAD
7981
7982 start_force(0);
7983 CURMAD('X', nametoke);
7984 CURMAD('_', tmpwhite);
7985 (void) force_word(PL_oldbufptr + tboffset, WORD,
7986 FALSE, TRUE, TRUE);
7987
7988 s = SKIPSPACE2(d,tmpwhite);
7989#else
7990 s = skipspace(d);
7991#endif
09bef843 7992 }
463ee0b2 7993 else {
09bef843
SB
7994 if (key == KEY_my)
7995 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7996 PL_expect = XTERMBLOCK;
7997 attrful = XATTRTERM;
76f68e9b 7998 sv_setpvs(PL_subname,"?");
09bef843 7999 have_name = FALSE;
463ee0b2 8000 }
4633a7c4 8001
09bef843
SB
8002 if (key == KEY_format) {
8003 if (*s == '=')
8004 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 8005#ifdef PERL_MAD
cd81e915 8006 PL_thistoken = subtoken;
5db06880
NC
8007 s = d;
8008#else
09bef843 8009 if (have_name)
b1b65b59
JH
8010 (void) force_word(PL_oldbufptr + tboffset, WORD,
8011 FALSE, TRUE, TRUE);
5db06880 8012#endif
09bef843
SB
8013 OPERATOR(FORMAT);
8014 }
79072805 8015
09bef843
SB
8016 /* Look for a prototype */
8017 if (*s == '(') {
d9f2850e
RGS
8018 char *p;
8019 bool bad_proto = FALSE;
9e8d7757
RB
8020 bool in_brackets = FALSE;
8021 char greedy_proto = ' ';
8022 bool proto_after_greedy_proto = FALSE;
8023 bool must_be_last = FALSE;
8024 bool underscore = FALSE;
aef2a98a 8025 bool seen_underscore = FALSE;
197afce1 8026 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
dab1c735 8027 STRLEN tmplen;
09bef843 8028
5db06880 8029 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 8030 if (!s)
09bef843 8031 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 8032 /* strip spaces and check for bad characters */
dab1c735 8033 d = SvPV(PL_lex_stuff, tmplen);
09bef843 8034 tmp = 0;
dab1c735 8035 for (p = d; tmplen; tmplen--, ++p) {
d9f2850e 8036 if (!isSPACE(*p)) {
dab1c735 8037 d[tmp++] = *p;
9e8d7757 8038
197afce1 8039 if (warnillegalproto) {
9e8d7757
RB
8040 if (must_be_last)
8041 proto_after_greedy_proto = TRUE;
dab1c735 8042 if (!strchr("$@%*;[]&\\_+", *p) || *p == '\0') {
9e8d7757
RB
8043 bad_proto = TRUE;
8044 }
8045 else {
8046 if ( underscore ) {
8047 if ( *p != ';' )
8048 bad_proto = TRUE;
8049 underscore = FALSE;
8050 }
8051 if ( *p == '[' ) {
8052 in_brackets = TRUE;
8053 }
8054 else if ( *p == ']' ) {
8055 in_brackets = FALSE;
8056 }
8057 else if ( (*p == '@' || *p == '%') &&
8058 ( tmp < 2 || d[tmp-2] != '\\' ) &&
8059 !in_brackets ) {
8060 must_be_last = TRUE;
8061 greedy_proto = *p;
8062 }
8063 else if ( *p == '_' ) {
aef2a98a 8064 underscore = seen_underscore = TRUE;
9e8d7757
RB
8065 }
8066 }
8067 }
d37a9538 8068 }
09bef843 8069 }
dab1c735 8070 d[tmp] = '\0';
9e8d7757 8071 if (proto_after_greedy_proto)
197afce1 8072 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
8073 "Prototype after '%c' for %"SVf" : %s",
8074 greedy_proto, SVfARG(PL_subname), d);
dab1c735
BF
8075 if (bad_proto) {
8076 SV *dsv = newSVpvs_flags("", SVs_TEMP);
197afce1 8077 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
8078 "Illegal character %sin prototype for %"SVf" : %s",
8079 seen_underscore ? "after '_' " : "",
dab1c735
BF
8080 SVfARG(PL_subname),
8081 sv_uni_display(dsv,
8082 newSVpvn_flags(d, tmp, SVs_TEMP | SvUTF8(PL_lex_stuff)),
8083 tmp, UNI_DISPLAY_ISPRINT));
8084 }
8085 SvCUR_set(PL_lex_stuff, tmp);
09bef843 8086 have_proto = TRUE;
68dc0745 8087
5db06880
NC
8088#ifdef PERL_MAD
8089 start_force(0);
cd81e915 8090 CURMAD('q', PL_thisopen);
5db06880 8091 CURMAD('_', tmpwhite);
cd81e915
NC
8092 CURMAD('=', PL_thisstuff);
8093 CURMAD('Q', PL_thisclose);
5db06880
NC
8094 NEXTVAL_NEXTTOKE.opval =
8095 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 8096 PL_lex_stuff = NULL;
5db06880
NC
8097 force_next(THING);
8098
8099 s = SKIPSPACE2(s,tmpwhite);
8100#else
09bef843 8101 s = skipspace(s);
5db06880 8102#endif
4633a7c4 8103 }
09bef843
SB
8104 else
8105 have_proto = FALSE;
8106
8107 if (*s == ':' && s[1] != ':')
8108 PL_expect = attrful;
8e742a20
MHM
8109 else if (*s != '{' && key == KEY_sub) {
8110 if (!have_name)
8111 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 8112 else if (*s != ';' && *s != '}')
be2597df 8113 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 8114 }
09bef843 8115
5db06880
NC
8116#ifdef PERL_MAD
8117 start_force(0);
8118 if (tmpwhite) {
8119 if (PL_madskills)
6b29d1f5 8120 curmad('^', newSVpvs(""));
5db06880
NC
8121 CURMAD('_', tmpwhite);
8122 }
8123 force_next(0);
8124
cd81e915 8125 PL_thistoken = subtoken;
5db06880 8126#else
09bef843 8127 if (have_proto) {
9ded7720 8128 NEXTVAL_NEXTTOKE.opval =
b1b65b59 8129 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 8130 PL_lex_stuff = NULL;
09bef843 8131 force_next(THING);
68dc0745 8132 }
5db06880 8133#endif
09bef843 8134 if (!have_name) {
49a54bbe
NC
8135 if (PL_curstash)
8136 sv_setpvs(PL_subname, "__ANON__");
8137 else
8138 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 8139 TOKEN(ANONSUB);
4633a7c4 8140 }
5db06880 8141#ifndef PERL_MAD
b1b65b59
JH
8142 (void) force_word(PL_oldbufptr + tboffset, WORD,
8143 FALSE, TRUE, TRUE);
5db06880 8144#endif
09bef843
SB
8145 if (key == KEY_my)
8146 TOKEN(MYSUB);
8147 TOKEN(SUB);
4633a7c4 8148 }
79072805
LW
8149
8150 case KEY_system:
a0d0e21e 8151 LOP(OP_SYSTEM,XREF);
79072805
LW
8152
8153 case KEY_symlink:
a0d0e21e 8154 LOP(OP_SYMLINK,XTERM);
79072805
LW
8155
8156 case KEY_syscall:
a0d0e21e 8157 LOP(OP_SYSCALL,XTERM);
79072805 8158
c07a80fd 8159 case KEY_sysopen:
8160 LOP(OP_SYSOPEN,XTERM);
8161
137443ea 8162 case KEY_sysseek:
8163 LOP(OP_SYSSEEK,XTERM);
8164
79072805 8165 case KEY_sysread:
a0d0e21e 8166 LOP(OP_SYSREAD,XTERM);
79072805
LW
8167
8168 case KEY_syswrite:
a0d0e21e 8169 LOP(OP_SYSWRITE,XTERM);
79072805
LW
8170
8171 case KEY_tr:
8172 s = scan_trans(s);
8173 TERM(sublex_start());
8174
8175 case KEY_tell:
8176 UNI(OP_TELL);
8177
8178 case KEY_telldir:
8179 UNI(OP_TELLDIR);
8180
463ee0b2 8181 case KEY_tie:
a0d0e21e 8182 LOP(OP_TIE,XTERM);
463ee0b2 8183
c07a80fd 8184 case KEY_tied:
8185 UNI(OP_TIED);
8186
79072805
LW
8187 case KEY_time:
8188 FUN0(OP_TIME);
8189
8190 case KEY_times:
8191 FUN0(OP_TMS);
8192
8193 case KEY_truncate:
a0d0e21e 8194 LOP(OP_TRUNCATE,XTERM);
79072805
LW
8195
8196 case KEY_uc:
8197 UNI(OP_UC);
8198
8199 case KEY_ucfirst:
8200 UNI(OP_UCFIRST);
8201
463ee0b2
LW
8202 case KEY_untie:
8203 UNI(OP_UNTIE);
8204
79072805 8205 case KEY_until:
78cdf107
Z
8206 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8207 return REPORT(0);
6154021b 8208 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8209 OPERATOR(UNTIL);
8210
8211 case KEY_unless:
78cdf107
Z
8212 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8213 return REPORT(0);
6154021b 8214 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8215 OPERATOR(UNLESS);
8216
8217 case KEY_unlink:
a0d0e21e 8218 LOP(OP_UNLINK,XTERM);
79072805
LW
8219
8220 case KEY_undef:
6f33ba73 8221 UNIDOR(OP_UNDEF);
79072805
LW
8222
8223 case KEY_unpack:
a0d0e21e 8224 LOP(OP_UNPACK,XTERM);
79072805
LW
8225
8226 case KEY_utime:
a0d0e21e 8227 LOP(OP_UTIME,XTERM);
79072805
LW
8228
8229 case KEY_umask:
6f33ba73 8230 UNIDOR(OP_UMASK);
79072805
LW
8231
8232 case KEY_unshift:
a0d0e21e
LW
8233 LOP(OP_UNSHIFT,XTERM);
8234
8235 case KEY_use:
468aa647 8236 s = tokenize_use(1, s);
a0d0e21e 8237 OPERATOR(USE);
79072805
LW
8238
8239 case KEY_values:
8240 UNI(OP_VALUES);
8241
8242 case KEY_vec:
a0d0e21e 8243 LOP(OP_VEC,XTERM);
79072805 8244
0d863452 8245 case KEY_when:
78cdf107
Z
8246 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8247 return REPORT(0);
6154021b 8248 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
8249 OPERATOR(WHEN);
8250
79072805 8251 case KEY_while:
78cdf107
Z
8252 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_NONEXPR)
8253 return REPORT(0);
6154021b 8254 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
8255 OPERATOR(WHILE);
8256
8257 case KEY_warn:
3280af22 8258 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 8259 LOP(OP_WARN,XTERM);
79072805
LW
8260
8261 case KEY_wait:
8262 FUN0(OP_WAIT);
8263
8264 case KEY_waitpid:
a0d0e21e 8265 LOP(OP_WAITPID,XTERM);
79072805
LW
8266
8267 case KEY_wantarray:
8268 FUN0(OP_WANTARRAY);
8269
8270 case KEY_write:
9d116dd7
JH
8271#ifdef EBCDIC
8272 {
df3728a2
JH
8273 char ctl_l[2];
8274 ctl_l[0] = toCTRL('L');
8275 ctl_l[1] = '\0';
fafc274c 8276 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
8277 }
8278#else
fafc274c
NC
8279 /* Make sure $^L is defined */
8280 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 8281#endif
79072805
LW
8282 UNI(OP_ENTERWRITE);
8283
8284 case KEY_x:
78cdf107
Z
8285 if (PL_expect == XOPERATOR) {
8286 if (*s == '=' && !PL_lex_allbrackets &&
8287 PL_lex_fakeeof >= LEX_FAKEEOF_ASSIGN)
8288 return REPORT(0);
79072805 8289 Mop(OP_REPEAT);
78cdf107 8290 }
79072805
LW
8291 check_uni();
8292 goto just_a_word;
8293
a0d0e21e 8294 case KEY_xor:
78cdf107
Z
8295 if (!PL_lex_allbrackets && PL_lex_fakeeof >= LEX_FAKEEOF_LOWLOGIC)
8296 return REPORT(0);
6154021b 8297 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
8298 OPERATOR(OROP);
8299
79072805
LW
8300 case KEY_y:
8301 s = scan_trans(s);
8302 TERM(sublex_start());
8303 }
49dc05e3 8304 }}
79072805 8305}
bf4acbe4
GS
8306#ifdef __SC__
8307#pragma segment Main
8308#endif
79072805 8309
e930465f
JH
8310static int
8311S_pending_ident(pTHX)
8eceec63 8312{
97aff369 8313 dVAR;
8eceec63 8314 register char *d;
bbd11bfc 8315 PADOFFSET tmp = 0;
8eceec63
SC
8316 /* pit holds the identifier we read and pending_ident is reset */
8317 char pit = PL_pending_ident;
9bde8eb0
NC
8318 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8319 /* All routes through this function want to know if there is a colon. */
c099d646 8320 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8321 PL_pending_ident = 0;
8322
cd81e915 8323 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8324 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8325 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8326
8327 /* if we're in a my(), we can't allow dynamics here.
8328 $foo'bar has already been turned into $foo::bar, so
8329 just check for colons.
8330
8331 if it's a legal name, the OP is a PADANY.
8332 */
8333 if (PL_in_my) {
8334 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8335 if (has_colon)
8eceec63
SC
8336 yyerror(Perl_form(aTHX_ "No package name allowed for "
8337 "variable %s in \"our\"",
8338 PL_tokenbuf));
bc9b26ca 8339 tmp = allocmy(PL_tokenbuf, tokenbuf_len, UTF ? SVf_UTF8 : 0);
8eceec63
SC
8340 }
8341 else {
9bde8eb0 8342 if (has_colon)
952306ac
RGS
8343 yyerror(Perl_form(aTHX_ PL_no_myglob,
8344 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8345
6154021b 8346 pl_yylval.opval = newOP(OP_PADANY, 0);
bc9b26ca
BF
8347 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len,
8348 UTF ? SVf_UTF8 : 0);
8eceec63
SC
8349 return PRIVATEREF;
8350 }
8351 }
8352
8353 /*
8354 build the ops for accesses to a my() variable.
8355
8356 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8357 then used in a comparison. This catches most, but not
8358 all cases. For instance, it catches
8359 sort { my($a); $a <=> $b }
8360 but not
8361 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8362 (although why you'd do that is anyone's guess).
8363 */
8364
9bde8eb0 8365 if (!has_colon) {
8716503d 8366 if (!PL_in_my)
bc9b26ca
BF
8367 tmp = pad_findmy_pvn(PL_tokenbuf, tokenbuf_len,
8368 UTF ? SVf_UTF8 : 0);
8716503d 8369 if (tmp != NOT_IN_PAD) {
8eceec63 8370 /* might be an "our" variable" */
00b1698f 8371 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8372 /* build ops for a bareword */
b64e5050
AL
8373 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8374 HEK * const stashname = HvNAME_HEK(stash);
8375 SV * const sym = newSVhek(stashname);
396482e1 8376 sv_catpvs(sym, "::");
2a33114a 8377 sv_catpvn_flags(sym, PL_tokenbuf+1, tokenbuf_len - 1, (UTF ? SV_CATUTF8 : SV_CATBYTES ));
6154021b
RGS
8378 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8379 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8380 gv_fetchsv(sym,
8eceec63
SC
8381 (PL_in_eval
8382 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8383 : GV_ADDMULTI
8eceec63
SC
8384 ),
8385 ((PL_tokenbuf[0] == '$') ? SVt_PV
8386 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8387 : SVt_PVHV));
8388 return WORD;
8389 }
8390
8391 /* if it's a sort block and they're naming $a or $b */
8392 if (PL_last_lop_op == OP_SORT &&
8393 PL_tokenbuf[0] == '$' &&
8394 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8395 && !PL_tokenbuf[2])
8396 {
8397 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8398 d < PL_bufend && *d != '\n';
8399 d++)
8400 {
8401 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8402 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8403 PL_tokenbuf);
8404 }
8405 }
8406 }
8407
6154021b
RGS
8408 pl_yylval.opval = newOP(OP_PADANY, 0);
8409 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8410 return PRIVATEREF;
8411 }
8412 }
8413
8414 /*
8415 Whine if they've said @foo in a doublequoted string,
8416 and @foo isn't a variable we can find in the symbol
8417 table.
8418 */
d824713b
NC
8419 if (ckWARN(WARN_AMBIGUOUS) &&
8420 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
0be4d16f
BF
8421 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1,
8422 ( UTF ? SVf_UTF8 : 0 ), SVt_PVAV);
8eceec63 8423 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8424 /* DO NOT warn for @- and @+ */
8425 && !( PL_tokenbuf[2] == '\0' &&
8426 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8427 )
8eceec63
SC
8428 {
8429 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8430 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8431 "Possible unintended interpolation of %s in string",
8432 PL_tokenbuf);
8eceec63
SC
8433 }
8434 }
8435
8436 /* build ops for a bareword */
0be4d16f
BF
8437 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn_flags(PL_tokenbuf + 1,
8438 tokenbuf_len - 1,
8439 UTF ? SVf_UTF8 : 0 ));
6154021b 8440 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7 8441 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
0be4d16f
BF
8442 (PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD)
8443 | ( UTF ? SVf_UTF8 : 0 ),
223f0fb7
NC
8444 ((PL_tokenbuf[0] == '$') ? SVt_PV
8445 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8446 : SVt_PVHV));
8eceec63
SC
8447 return WORD;
8448}
8449
76e3520e 8450STATIC void
c94115d8 8451S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 8452{
97aff369 8453 dVAR;
2f3197b3 8454
7918f24d
NC
8455 PERL_ARGS_ASSERT_CHECKCOMMA;
8456
d008e5eb 8457 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
8458 if (ckWARN(WARN_SYNTAX)) {
8459 int level = 1;
26ff0806 8460 const char *w;
d008e5eb
GS
8461 for (w = s+2; *w && level; w++) {
8462 if (*w == '(')
8463 ++level;
8464 else if (*w == ')')
8465 --level;
8466 }
888fea98
NC
8467 while (isSPACE(*w))
8468 ++w;
b1439985
RGS
8469 /* the list of chars below is for end of statements or
8470 * block / parens, boolean operators (&&, ||, //) and branch
8471 * constructs (or, and, if, until, unless, while, err, for).
8472 * Not a very solid hack... */
8473 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 8474 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 8475 "%s (...) interpreted as function",name);
d008e5eb 8476 }
2f3197b3 8477 }
3280af22 8478 while (s < PL_bufend && isSPACE(*s))
2f3197b3 8479 s++;
a687059c
LW
8480 if (*s == '(')
8481 s++;
3280af22 8482 while (s < PL_bufend && isSPACE(*s))
a687059c 8483 s++;
7e2040f0 8484 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 8485 const char * const w = s++;
7e2040f0 8486 while (isALNUM_lazy_if(s,UTF))
a687059c 8487 s++;
3280af22 8488 while (s < PL_bufend && isSPACE(*s))
a687059c 8489 s++;
e929a76b 8490 if (*s == ',') {
c94115d8 8491 GV* gv;
5458a98a 8492 if (keyword(w, s - w, 0))
e929a76b 8493 return;
c94115d8 8494
2e38bce1 8495 gv = gv_fetchpvn_flags(w, s - w, ( UTF ? SVf_UTF8 : 0 ), SVt_PVCV);
c94115d8 8496 if (gv && GvCVu(gv))
abbb3198 8497 return;
cea2e8a9 8498 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
8499 }
8500 }
8501}
8502
423cee85
JH
8503/* Either returns sv, or mortalizes sv and returns a new SV*.
8504 Best used as sv=new_constant(..., sv, ...).
8505 If s, pv are NULL, calls subroutine with one argument,
8506 and type is used with error messages only. */
8507
b3ac6de7 8508STATIC SV *
eb0d8d16
NC
8509S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
8510 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 8511{
27da23d5 8512 dVAR; dSP;
890ce7af 8513 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 8514 SV *res;
b3ac6de7
IZ
8515 SV **cvp;
8516 SV *cv, *typesv;
89e33a05 8517 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 8518
7918f24d
NC
8519 PERL_ARGS_ASSERT_NEW_CONSTANT;
8520
f0af216f 8521 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
8522 SV *msg;
8523
10edeb5d
JH
8524 why2 = (const char *)
8525 (strEQ(key,"charnames")
8526 ? "(possibly a missing \"use charnames ...\")"
8527 : "");
4e553d73 8528 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
8529 (type ? type: "undef"), why2);
8530
8531 /* This is convoluted and evil ("goto considered harmful")
8532 * but I do not understand the intricacies of all the different
8533 * failure modes of %^H in here. The goal here is to make
8534 * the most probable error message user-friendly. --jhi */
8535
8536 goto msgdone;
8537
423cee85 8538 report:
4e553d73 8539 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 8540 (type ? type: "undef"), why1, why2, why3);
41ab332f 8541 msgdone:
95a20fc0 8542 yyerror(SvPVX_const(msg));
423cee85
JH
8543 SvREFCNT_dec(msg);
8544 return sv;
8545 }
ff3f963a
KW
8546
8547 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
8548 if (PL_error_count > 0 && strEQ(key,"charnames"))
8549 return &PL_sv_undef;
ff3f963a 8550
eb0d8d16 8551 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 8552 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
8553 why1 = "$^H{";
8554 why2 = key;
f0af216f 8555 why3 = "} is not defined";
423cee85 8556 goto report;
b3ac6de7
IZ
8557 }
8558 sv_2mortal(sv); /* Parent created it permanently */
8559 cv = *cvp;
423cee85 8560 if (!pv && s)
59cd0e26 8561 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 8562 if (type && pv)
59cd0e26 8563 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 8564 else
423cee85 8565 typesv = &PL_sv_undef;
4e553d73 8566
e788e7d3 8567 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
8568 ENTER ;
8569 SAVETMPS;
4e553d73 8570
423cee85 8571 PUSHMARK(SP) ;
a5845cb7 8572 EXTEND(sp, 3);
423cee85
JH
8573 if (pv)
8574 PUSHs(pv);
b3ac6de7 8575 PUSHs(sv);
423cee85
JH
8576 if (pv)
8577 PUSHs(typesv);
b3ac6de7 8578 PUTBACK;
423cee85 8579 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 8580
423cee85 8581 SPAGAIN ;
4e553d73 8582
423cee85 8583 /* Check the eval first */
9b0e499b 8584 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 8585 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 8586 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 8587 (void)POPs;
b37c2d43 8588 res = SvREFCNT_inc_simple(sv);
423cee85
JH
8589 }
8590 else {
8591 res = POPs;
b37c2d43 8592 SvREFCNT_inc_simple_void(res);
423cee85 8593 }
4e553d73 8594
423cee85
JH
8595 PUTBACK ;
8596 FREETMPS ;
8597 LEAVE ;
b3ac6de7 8598 POPSTACK;
4e553d73 8599
b3ac6de7 8600 if (!SvOK(res)) {
423cee85
JH
8601 why1 = "Call to &{$^H{";
8602 why2 = key;
f0af216f 8603 why3 = "}} did not return a defined value";
423cee85
JH
8604 sv = res;
8605 goto report;
9b0e499b 8606 }
423cee85 8607
9b0e499b 8608 return res;
b3ac6de7 8609}
4e553d73 8610
d0a148a6
NC
8611/* Returns a NUL terminated string, with the length of the string written to
8612 *slp
8613 */
76e3520e 8614STATIC char *
cea2e8a9 8615S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 8616{
97aff369 8617 dVAR;
463ee0b2 8618 register char *d = dest;
890ce7af 8619 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
8620
8621 PERL_ARGS_ASSERT_SCAN_WORD;
8622
463ee0b2 8623 for (;;) {
8903cb82 8624 if (d >= e)
cea2e8a9 8625 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8626 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8627 *d++ = *s++;
c35e046a 8628 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8629 *d++ = ':';
8630 *d++ = ':';
8631 s++;
8632 }
c35e046a 8633 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
8634 *d++ = *s++;
8635 *d++ = *s++;
8636 }
fd400ab9 8637 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8638 char *t = s + UTF8SKIP(s);
c35e046a 8639 size_t len;
fd400ab9 8640 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 8641 t += UTF8SKIP(t);
c35e046a
AL
8642 len = t - s;
8643 if (d + len > e)
cea2e8a9 8644 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
8645 Copy(s, d, len, char);
8646 d += len;
a0ed51b3
LW
8647 s = t;
8648 }
463ee0b2
LW
8649 else {
8650 *d = '\0';
8651 *slp = d - dest;
8652 return s;
e929a76b 8653 }
378cc40b
LW
8654 }
8655}
8656
76e3520e 8657STATIC char *
f54cb97a 8658S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 8659{
97aff369 8660 dVAR;
6136c704 8661 char *bracket = NULL;
748a9306 8662 char funny = *s++;
6136c704 8663 register char *d = dest;
0b3da58d 8664 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 8665
7918f24d
NC
8666 PERL_ARGS_ASSERT_SCAN_IDENT;
8667
a0d0e21e 8668 if (isSPACE(*s))
29595ff2 8669 s = PEEKSPACE(s);
de3bb511 8670 if (isDIGIT(*s)) {
8903cb82 8671 while (isDIGIT(*s)) {
8672 if (d >= e)
cea2e8a9 8673 Perl_croak(aTHX_ ident_too_long);
378cc40b 8674 *d++ = *s++;
8903cb82 8675 }
378cc40b
LW
8676 }
8677 else {
463ee0b2 8678 for (;;) {
8903cb82 8679 if (d >= e)
cea2e8a9 8680 Perl_croak(aTHX_ ident_too_long);
834a4ddd 8681 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 8682 *d++ = *s++;
7e2040f0 8683 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
8684 *d++ = ':';
8685 *d++ = ':';
8686 s++;
8687 }
a0d0e21e 8688 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
8689 *d++ = *s++;
8690 *d++ = *s++;
8691 }
fd400ab9 8692 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 8693 char *t = s + UTF8SKIP(s);
fd400ab9 8694 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
8695 t += UTF8SKIP(t);
8696 if (d + (t - s) > e)
cea2e8a9 8697 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
8698 Copy(s, d, t - s, char);
8699 d += t - s;
8700 s = t;
8701 }
463ee0b2
LW
8702 else
8703 break;
8704 }
378cc40b
LW
8705 }
8706 *d = '\0';
8707 d = dest;
79072805 8708 if (*d) {
3280af22
NIS
8709 if (PL_lex_state != LEX_NORMAL)
8710 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 8711 return s;
378cc40b 8712 }
748a9306 8713 if (*s == '$' && s[1] &&
3792a11b 8714 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 8715 {
4810e5ec 8716 return s;
5cd24f17 8717 }
79072805
LW
8718 if (*s == '{') {
8719 bracket = s;
8720 s++;
8721 }
8722 else if (ck_uni)
8723 check_uni();
204e6232
BF
8724 if (s < send) {
8725 if (UTF) {
8726 const STRLEN skip = UTF8SKIP(s);
8727 STRLEN i;
8728 d[skip] = '\0';
8729 for ( i = 0; i < skip; i++ )
8730 d[i] = *s++;
8731 }
8732 else {
8733 *d = *s++;
8734 d[1] = '\0';
8735 }
8736 }
2b92dfce 8737 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 8738 *d = toCTRL(*s);
8739 s++;
de3bb511 8740 }
79072805 8741 if (bracket) {
748a9306 8742 if (isSPACE(s[-1])) {
fa83b5b6 8743 while (s < send) {
f54cb97a 8744 const char ch = *s++;
bf4acbe4 8745 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 8746 *d = ch;
8747 break;
8748 }
8749 }
748a9306 8750 }
7e2040f0 8751 if (isIDFIRST_lazy_if(d,UTF)) {
204e6232 8752 d += UTF8SKIP(d);
a0ed51b3 8753 if (UTF) {
6136c704
AL
8754 char *end = s;
8755 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
8756 end += UTF8SKIP(end);
8757 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
8758 end += UTF8SKIP(end);
a0ed51b3 8759 }
6136c704
AL
8760 Copy(s, d, end - s, char);
8761 d += end - s;
8762 s = end;
a0ed51b3
LW
8763 }
8764 else {
2b92dfce 8765 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 8766 *d++ = *s++;
2b92dfce 8767 if (d >= e)
cea2e8a9 8768 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 8769 }
79072805 8770 *d = '\0';
c35e046a
AL
8771 while (s < send && SPACE_OR_TAB(*s))
8772 s++;
ff68c719 8773 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 8774 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
8775 const char * const brack =
8776 (const char *)
8777 ((*s == '[') ? "[...]" : "{...}");
e850844c 8778 /* diag_listed_as: Ambiguous use of %c{%s[...]} resolved to %c%s[...] */
9014280d 8779 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 8780 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
8781 funny, dest, brack, funny, dest, brack);
8782 }
79072805 8783 bracket++;
a0be28da 8784 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
78cdf107 8785 PL_lex_allbrackets++;
79072805
LW
8786 return s;
8787 }
4e553d73
NIS
8788 }
8789 /* Handle extended ${^Foo} variables
2b92dfce
GS
8790 * 1999-02-27 mjd-perl-patch@plover.com */
8791 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
8792 && isALNUM(*s))
8793 {
8794 d++;
8795 while (isALNUM(*s) && d < e) {
8796 *d++ = *s++;
8797 }
8798 if (d >= e)
cea2e8a9 8799 Perl_croak(aTHX_ ident_too_long);
2b92dfce 8800 *d = '\0';
79072805
LW
8801 }
8802 if (*s == '}') {
8803 s++;
7df0d042 8804 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 8805 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
8806 PL_expect = XREF;
8807 }
d008e5eb 8808 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 8809 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
8810 (keyword(dest, d - dest, 0)
8811 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 8812 {
c35e046a
AL
8813 if (funny == '#')
8814 funny = '@';
9014280d 8815 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
8816 "Ambiguous use of %c{%s} resolved to %c%s",
8817 funny, dest, funny, dest);
8818 }
8819 }
79072805
LW
8820 }
8821 else {
8822 s = bracket; /* let the parser handle it */
93a17b20 8823 *dest = '\0';
79072805
LW
8824 }
8825 }
3280af22
NIS
8826 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
8827 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
8828 return s;
8829}
8830
858a358b 8831static bool
3955e1a9 8832S_pmflag(pTHX_ const char* const valid_flags, U32 * pmfl, char** s, char* charset) {
858a358b
KW
8833
8834 /* Adds, subtracts to/from 'pmfl' based on regex modifier flags found in
8835 * the parse starting at 's', based on the subset that are valid in this
8836 * context input to this routine in 'valid_flags'. Advances s. Returns
8837 * TRUE if the input was a valid flag, so the next char may be as well;
3955e1a9
KW
8838 * otherwise FALSE. 'charset' should point to a NUL upon first call on the
8839 * current regex. This routine will set it to any charset modifier found.
8840 * The caller shouldn't change it. This way, another charset modifier
8841 * encountered in the parse can be detected as an error, as we have decided
8842 * allow only one */
858a358b
KW
8843
8844 const char c = **s;
94b03d7d 8845
858a358b
KW
8846 if (! strchr(valid_flags, c)) {
8847 if (isALNUM(c)) {
94b03d7d 8848 goto deprecate;
858a358b
KW
8849 }
8850 return FALSE;
8851 }
8852
8853 switch (c) {
94b03d7d 8854
858a358b
KW
8855 CASE_STD_PMMOD_FLAGS_PARSE_SET(pmfl);
8856 case GLOBAL_PAT_MOD: *pmfl |= PMf_GLOBAL; break;
8857 case CONTINUE_PAT_MOD: *pmfl |= PMf_CONTINUE; break;
8858 case ONCE_PAT_MOD: *pmfl |= PMf_KEEP; break;
8859 case KEEPCOPY_PAT_MOD: *pmfl |= RXf_PMf_KEEPCOPY; break;
8860 case NONDESTRUCT_PAT_MOD: *pmfl |= PMf_NONDESTRUCT; break;
94b03d7d
KW
8861 case LOCALE_PAT_MOD:
8862
8863 /* In 5.14, qr//lt is legal but deprecated; the 't' means they
8864 * can't be regex modifiers.
8865 * In 5.14, s///le is legal and ambiguous. Try to disambiguate as
8866 * much as easily done. s///lei, for example, has to mean regex
8867 * modifiers if it's not an error (as does any word character
8868 * following the 'e'). Otherwise, we resolve to the backwards-
8869 * compatible, but less likely 's/// le ...', i.e. as meaning
8870 * less-than-or-equal. The reason it's not likely is that s//
967ac236
KW
8871 * returns a number for code in the field (/r returns a string, but
8872 * that wasn't added until the 5.13 series), and so '<=' should be
8873 * used for comparing, not 'le'. */
94b03d7d
KW
8874 if (*((*s) + 1) == 't') {
8875 goto deprecate;
8876 }
563734a5 8877 else if (*((*s) + 1) == 'e' && ! isALNUM(*((*s) + 2))) {
aeac89a5
KW
8878
8879 /* 'e' is valid only for substitutes, s///e. If it is not
8880 * valid in the current context, then 'm//le' must mean the
8881 * comparison operator, so use the regular deprecation message.
8882 */
8883 if (! strchr(valid_flags, 'e')) {
8884 goto deprecate;
8885 }
94b03d7d
KW
8886 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
8887 "Ambiguous use of 's//le...' resolved as 's// le...'; Rewrite as 's//el' if you meant 'use locale rules and evaluate rhs as an expression'. In Perl 5.16, it will be resolved the other way");
8888 return FALSE;
8889 }
3955e1a9
KW
8890 if (*charset) {
8891 goto multiple_charsets;
8892 }
94b03d7d 8893 set_regex_charset(pmfl, REGEX_LOCALE_CHARSET);
3955e1a9 8894 *charset = c;
94b03d7d
KW
8895 break;
8896 case UNICODE_PAT_MOD:
8897 /* In 5.14, qr//unless and qr//until are legal but deprecated; the
8898 * 'n' means they can't be regex modifiers */
8899 if (*((*s) + 1) == 'n') {
8900 goto deprecate;
8901 }
3955e1a9
KW
8902 if (*charset) {
8903 goto multiple_charsets;
8904 }
94b03d7d 8905 set_regex_charset(pmfl, REGEX_UNICODE_CHARSET);
3955e1a9 8906 *charset = c;
94b03d7d
KW
8907 break;
8908 case ASCII_RESTRICT_PAT_MOD:
8909 /* In 5.14, qr//and is legal but deprecated; the 'n' means they
8910 * can't be regex modifiers */
8911 if (*((*s) + 1) == 'n') {
8912 goto deprecate;
8913 }
ff3f26d2
KW
8914
8915 if (! *charset) {
94b03d7d
KW
8916 set_regex_charset(pmfl, REGEX_ASCII_RESTRICTED_CHARSET);
8917 }
ff3f26d2
KW
8918 else {
8919
8920 /* Error if previous modifier wasn't an 'a', but if it was, see
8921 * if, and accept, a second occurrence (only) */
8922 if (*charset != 'a'
8923 || get_regex_charset(*pmfl)
8924 != REGEX_ASCII_RESTRICTED_CHARSET)
8925 {
8926 goto multiple_charsets;
8927 }
8928 set_regex_charset(pmfl, REGEX_ASCII_MORE_RESTRICTED_CHARSET);
3955e1a9
KW
8929 }
8930 *charset = c;
94b03d7d
KW
8931 break;
8932 case DEPENDS_PAT_MOD:
3955e1a9
KW
8933 if (*charset) {
8934 goto multiple_charsets;
8935 }
94b03d7d 8936 set_regex_charset(pmfl, REGEX_DEPENDS_CHARSET);
3955e1a9 8937 *charset = c;
94b03d7d 8938 break;
879d0c72 8939 }
94b03d7d 8940
858a358b
KW
8941 (*s)++;
8942 return TRUE;
94b03d7d
KW
8943
8944 deprecate:
8945 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
8946 "Having no space between pattern and following word is deprecated");
8947 return FALSE;
3955e1a9
KW
8948
8949 multiple_charsets:
8950 if (*charset != c) {
8951 yyerror(Perl_form(aTHX_ "Regexp modifiers \"/%c\" and \"/%c\" are mutually exclusive", *charset, c));
8952 }
ff3f26d2
KW
8953 else if (c == 'a') {
8954 yyerror("Regexp modifier \"/a\" may appear a maximum of twice");
8955 }
3955e1a9
KW
8956 else {
8957 yyerror(Perl_form(aTHX_ "Regexp modifier \"/%c\" may not appear twice", c));
8958 }
8959
8960 /* Pretend that it worked, so will continue processing before dieing */
8961 (*s)++;
8962 return TRUE;
879d0c72
NC
8963}
8964
76e3520e 8965STATIC char *
cea2e8a9 8966S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 8967{
97aff369 8968 dVAR;
79072805 8969 PMOP *pm;
5db06880 8970 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 8971 const char * const valid_flags =
a20207d7 8972 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
3955e1a9 8973 char charset = '\0'; /* character set modifier */
5db06880
NC
8974#ifdef PERL_MAD
8975 char *modstart;
8976#endif
8977
7918f24d 8978 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 8979
25c09cbf 8980 if (!s) {
6136c704 8981 const char * const delimiter = skipspace(start);
10edeb5d
JH
8982 Perl_croak(aTHX_
8983 (const char *)
8984 (*delimiter == '?'
8985 ? "Search pattern not terminated or ternary operator parsed as search pattern"
8986 : "Search pattern not terminated" ));
25c09cbf 8987 }
bbce6d69 8988
8782bef2 8989 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
8990 if (PL_multi_open == '?') {
8991 /* This is the only point in the code that sets PMf_ONCE: */
79072805 8992 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
8993
8994 /* Hence it's safe to do this bit of PMOP book-keeping here, which
8995 allows us to restrict the list needed by reset to just the ??
8996 matches. */
8997 assert(type != OP_TRANS);
8998 if (PL_curstash) {
daba3364 8999 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
9000 U32 elements;
9001 if (!mg) {
daba3364 9002 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
9003 0);
9004 }
9005 elements = mg->mg_len / sizeof(PMOP**);
9006 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
9007 ((PMOP**)mg->mg_ptr) [elements++] = pm;
9008 mg->mg_len = elements * sizeof(PMOP**);
9009 PmopSTASH_set(pm,PL_curstash);
9010 }
9011 }
5db06880
NC
9012#ifdef PERL_MAD
9013 modstart = s;
9014#endif
3955e1a9 9015 while (*s && S_pmflag(aTHX_ valid_flags, &(pm->op_pmflags), &s, &charset)) {};
5db06880
NC
9016#ifdef PERL_MAD
9017 if (PL_madskills && modstart != s) {
9018 SV* tmptoken = newSVpvn(modstart, s - modstart);
9019 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
9020 }
9021#endif
4ac733c9 9022 /* issue a warning if /c is specified,but /g is not */
a2a5de95 9023 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 9024 {
a2a5de95
NC
9025 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
9026 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
9027 }
9028
3280af22 9029 PL_lex_op = (OP*)pm;
6154021b 9030 pl_yylval.ival = OP_MATCH;
378cc40b
LW
9031 return s;
9032}
9033
76e3520e 9034STATIC char *
cea2e8a9 9035S_scan_subst(pTHX_ char *start)
79072805 9036{
27da23d5 9037 dVAR;
22594288 9038 char *s;
79072805 9039 register PMOP *pm;
4fdae800 9040 I32 first_start;
79072805 9041 I32 es = 0;
3955e1a9 9042 char charset = '\0'; /* character set modifier */
5db06880
NC
9043#ifdef PERL_MAD
9044 char *modstart;
9045#endif
79072805 9046
7918f24d
NC
9047 PERL_ARGS_ASSERT_SCAN_SUBST;
9048
6154021b 9049 pl_yylval.ival = OP_NULL;
79072805 9050
5db06880 9051 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9052
37fd879b 9053 if (!s)
cea2e8a9 9054 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 9055
3280af22 9056 if (s[-1] == PL_multi_open)
79072805 9057 s--;
5db06880
NC
9058#ifdef PERL_MAD
9059 if (PL_madskills) {
cd81e915
NC
9060 CURMAD('q', PL_thisopen);
9061 CURMAD('_', PL_thiswhite);
9062 CURMAD('E', PL_thisstuff);
9063 CURMAD('Q', PL_thisclose);
9064 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9065 }
9066#endif
79072805 9067
3280af22 9068 first_start = PL_multi_start;
5db06880 9069 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9070 if (!s) {
37fd879b 9071 if (PL_lex_stuff) {
3280af22 9072 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9073 PL_lex_stuff = NULL;
37fd879b 9074 }
cea2e8a9 9075 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 9076 }
3280af22 9077 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 9078
79072805 9079 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
9080
9081#ifdef PERL_MAD
9082 if (PL_madskills) {
cd81e915
NC
9083 CURMAD('z', PL_thisopen);
9084 CURMAD('R', PL_thisstuff);
9085 CURMAD('Z', PL_thisclose);
5db06880
NC
9086 }
9087 modstart = s;
9088#endif
9089
48c036b1 9090 while (*s) {
a20207d7 9091 if (*s == EXEC_PAT_MOD) {
a687059c 9092 s++;
2f3197b3 9093 es++;
a687059c 9094 }
3955e1a9
KW
9095 else if (! S_pmflag(aTHX_ S_PAT_MODS, &(pm->op_pmflags), &s, &charset))
9096 {
48c036b1 9097 break;
aa78b661 9098 }
378cc40b 9099 }
79072805 9100
5db06880
NC
9101#ifdef PERL_MAD
9102 if (PL_madskills) {
9103 if (modstart != s)
9104 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9105 append_madprops(PL_thismad, (OP*)pm, 0);
9106 PL_thismad = 0;
5db06880
NC
9107 }
9108#endif
a2a5de95
NC
9109 if ((pm->op_pmflags & PMf_CONTINUE)) {
9110 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
9111 }
9112
79072805 9113 if (es) {
6136c704
AL
9114 SV * const repl = newSVpvs("");
9115
0244c3a4
GS
9116 PL_sublex_info.super_bufptr = s;
9117 PL_sublex_info.super_bufend = PL_bufend;
9118 PL_multi_end = 0;
79072805 9119 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
9120 while (es-- > 0) {
9121 if (es)
9122 sv_catpvs(repl, "eval ");
9123 else
9124 sv_catpvs(repl, "do ");
9125 }
6f43d98f 9126 sv_catpvs(repl, "{");
3280af22 9127 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
9128 if (strchr(SvPVX(PL_lex_repl), '#'))
9129 sv_catpvs(repl, "\n");
9130 sv_catpvs(repl, "}");
25da4f38 9131 SvEVALED_on(repl);
3280af22
NIS
9132 SvREFCNT_dec(PL_lex_repl);
9133 PL_lex_repl = repl;
378cc40b 9134 }
79072805 9135
3280af22 9136 PL_lex_op = (OP*)pm;
6154021b 9137 pl_yylval.ival = OP_SUBST;
378cc40b
LW
9138 return s;
9139}
9140
76e3520e 9141STATIC char *
cea2e8a9 9142S_scan_trans(pTHX_ char *start)
378cc40b 9143{
97aff369 9144 dVAR;
a0d0e21e 9145 register char* s;
11343788 9146 OP *o;
79072805 9147 short *tbl;
b84c11c8
NC
9148 U8 squash;
9149 U8 del;
9150 U8 complement;
bb16bae8 9151 bool nondestruct = 0;
5db06880
NC
9152#ifdef PERL_MAD
9153 char *modstart;
9154#endif
79072805 9155
7918f24d
NC
9156 PERL_ARGS_ASSERT_SCAN_TRANS;
9157
6154021b 9158 pl_yylval.ival = OP_NULL;
79072805 9159
5db06880 9160 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 9161 if (!s)
cea2e8a9 9162 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 9163
3280af22 9164 if (s[-1] == PL_multi_open)
2f3197b3 9165 s--;
5db06880
NC
9166#ifdef PERL_MAD
9167 if (PL_madskills) {
cd81e915
NC
9168 CURMAD('q', PL_thisopen);
9169 CURMAD('_', PL_thiswhite);
9170 CURMAD('E', PL_thisstuff);
9171 CURMAD('Q', PL_thisclose);
9172 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
9173 }
9174#endif
2f3197b3 9175
5db06880 9176 s = scan_str(s,!!PL_madskills,FALSE);
79072805 9177 if (!s) {
37fd879b 9178 if (PL_lex_stuff) {
3280af22 9179 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 9180 PL_lex_stuff = NULL;
37fd879b 9181 }
cea2e8a9 9182 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 9183 }
5db06880 9184 if (PL_madskills) {
cd81e915
NC
9185 CURMAD('z', PL_thisopen);
9186 CURMAD('R', PL_thisstuff);
9187 CURMAD('Z', PL_thisclose);
5db06880 9188 }
79072805 9189
a0ed51b3 9190 complement = del = squash = 0;
5db06880
NC
9191#ifdef PERL_MAD
9192 modstart = s;
9193#endif
7a1e2023
NC
9194 while (1) {
9195 switch (*s) {
9196 case 'c':
79072805 9197 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
9198 break;
9199 case 'd':
a0ed51b3 9200 del = OPpTRANS_DELETE;
7a1e2023
NC
9201 break;
9202 case 's':
79072805 9203 squash = OPpTRANS_SQUASH;
7a1e2023 9204 break;
bb16bae8
FC
9205 case 'r':
9206 nondestruct = 1;
9207 break;
7a1e2023
NC
9208 default:
9209 goto no_more;
9210 }
395c3793
LW
9211 s++;
9212 }
7a1e2023 9213 no_more:
8973db79 9214
aa1f7c5b 9215 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 9216 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
9217 o->op_private &= ~OPpTRANS_ALL;
9218 o->op_private |= del|squash|complement|
7948272d
NIS
9219 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
9220 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 9221
3280af22 9222 PL_lex_op = o;
bb16bae8 9223 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
9224
9225#ifdef PERL_MAD
9226 if (PL_madskills) {
9227 if (modstart != s)
9228 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
9229 append_madprops(PL_thismad, o, 0);
9230 PL_thismad = 0;
5db06880
NC
9231 }
9232#endif
9233
79072805
LW
9234 return s;
9235}
9236
76e3520e 9237STATIC char *
cea2e8a9 9238S_scan_heredoc(pTHX_ register char *s)
79072805 9239{
97aff369 9240 dVAR;
79072805
LW
9241 SV *herewas;
9242 I32 op_type = OP_SCALAR;
9243 I32 len;
9244 SV *tmpstr;
9245 char term;
73d840c0 9246 const char *found_newline;
79072805 9247 register char *d;
fc36a67e 9248 register char *e;
4633a7c4 9249 char *peek;
f54cb97a 9250 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
9251#ifdef PERL_MAD
9252 I32 stuffstart = s - SvPVX(PL_linestr);
9253 char *tstart;
9254
cd81e915 9255 PL_realtokenstart = -1;
5db06880 9256#endif
79072805 9257
7918f24d
NC
9258 PERL_ARGS_ASSERT_SCAN_HEREDOC;
9259
79072805 9260 s += 2;
3280af22
NIS
9261 d = PL_tokenbuf;
9262 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 9263 if (!outer)
79072805 9264 *d++ = '\n';
c35e046a
AL
9265 peek = s;
9266 while (SPACE_OR_TAB(*peek))
9267 peek++;
3792a11b 9268 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 9269 s = peek;
79072805 9270 term = *s++;
3280af22 9271 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 9272 d += len;
3280af22 9273 if (s < PL_bufend)
79072805 9274 s++;
79072805
LW
9275 }
9276 else {
9277 if (*s == '\\')
9278 s++, term = '\'';
9279 else
9280 term = '"';
7e2040f0 9281 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 9282 deprecate("bare << to mean <<\"\"");
7e2040f0 9283 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 9284 if (d < e)
9285 *d++ = *s;
9286 }
9287 }
3280af22 9288 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 9289 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
9290 *d++ = '\n';
9291 *d = '\0';
3280af22 9292 len = d - PL_tokenbuf;
5db06880
NC
9293
9294#ifdef PERL_MAD
9295 if (PL_madskills) {
9296 tstart = PL_tokenbuf + !outer;
cd81e915 9297 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 9298 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 9299 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9300 stuffstart = s - SvPVX(PL_linestr);
9301 }
9302#endif
6a27c188 9303#ifndef PERL_STRICT_CR
f63a84b2
LW
9304 d = strchr(s, '\r');
9305 if (d) {
b464bac0 9306 char * const olds = s;
f63a84b2 9307 s = d;
3280af22 9308 while (s < PL_bufend) {
f63a84b2
LW
9309 if (*s == '\r') {
9310 *d++ = '\n';
9311 if (*++s == '\n')
9312 s++;
9313 }
9314 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
9315 *d++ = *s++;
9316 s++;
9317 }
9318 else
9319 *d++ = *s++;
9320 }
9321 *d = '\0';
3280af22 9322 PL_bufend = d;
95a20fc0 9323 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
9324 s = olds;
9325 }
9326#endif
5db06880
NC
9327#ifdef PERL_MAD
9328 found_newline = 0;
9329#endif
10edeb5d 9330 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
9331 herewas = newSVpvn(s,PL_bufend-s);
9332 }
9333 else {
5db06880
NC
9334#ifdef PERL_MAD
9335 herewas = newSVpvn(s-1,found_newline-s+1);
9336#else
73d840c0
AL
9337 s--;
9338 herewas = newSVpvn(s,found_newline-s);
5db06880 9339#endif
73d840c0 9340 }
5db06880
NC
9341#ifdef PERL_MAD
9342 if (PL_madskills) {
9343 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9344 if (PL_thisstuff)
9345 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 9346 else
cd81e915 9347 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
9348 }
9349#endif
79072805 9350 s += SvCUR(herewas);
748a9306 9351
5db06880
NC
9352#ifdef PERL_MAD
9353 stuffstart = s - SvPVX(PL_linestr);
9354
9355 if (found_newline)
9356 s--;
9357#endif
9358
7d0a29fe
NC
9359 tmpstr = newSV_type(SVt_PVIV);
9360 SvGROW(tmpstr, 80);
748a9306 9361 if (term == '\'') {
79072805 9362 op_type = OP_CONST;
45977657 9363 SvIV_set(tmpstr, -1);
748a9306
LW
9364 }
9365 else if (term == '`') {
79072805 9366 op_type = OP_BACKTICK;
45977657 9367 SvIV_set(tmpstr, '\\');
748a9306 9368 }
79072805
LW
9369
9370 CLINE;
57843af0 9371 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
9372 PL_multi_open = PL_multi_close = '<';
9373 term = *PL_tokenbuf;
0244c3a4 9374 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
9375 char * const bufptr = PL_sublex_info.super_bufptr;
9376 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 9377 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
9378 s = strchr(bufptr, '\n');
9379 if (!s)
9380 s = bufend;
9381 d = s;
9382 while (s < bufend &&
9383 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
9384 if (*s++ == '\n')
57843af0 9385 CopLINE_inc(PL_curcop);
0244c3a4
GS
9386 }
9387 if (s >= bufend) {
eb160463 9388 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
9389 missingterm(PL_tokenbuf);
9390 }
9391 sv_setpvn(herewas,bufptr,d-bufptr+1);
9392 sv_setpvn(tmpstr,d+1,s-d);
9393 s += len - 1;
9394 sv_catpvn(herewas,s,bufend-s);
95a20fc0 9395 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
9396
9397 s = olds;
9398 goto retval;
9399 }
9400 else if (!outer) {
79072805 9401 d = s;
3280af22
NIS
9402 while (s < PL_bufend &&
9403 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 9404 if (*s++ == '\n')
57843af0 9405 CopLINE_inc(PL_curcop);
79072805 9406 }
3280af22 9407 if (s >= PL_bufend) {
eb160463 9408 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9409 missingterm(PL_tokenbuf);
79072805
LW
9410 }
9411 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
9412#ifdef PERL_MAD
9413 if (PL_madskills) {
cd81e915
NC
9414 if (PL_thisstuff)
9415 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 9416 else
cd81e915 9417 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
9418 stuffstart = s - SvPVX(PL_linestr);
9419 }
9420#endif
79072805 9421 s += len - 1;
57843af0 9422 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 9423
3280af22
NIS
9424 sv_catpvn(herewas,s,PL_bufend-s);
9425 sv_setsv(PL_linestr,herewas);
9426 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
9427 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9428 PL_last_lop = PL_last_uni = NULL;
79072805
LW
9429 }
9430 else
76f68e9b 9431 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 9432 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
9433#ifdef PERL_MAD
9434 if (PL_madskills) {
9435 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9436 if (PL_thisstuff)
9437 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9438 else
cd81e915 9439 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9440 }
9441#endif
f0e67a1d 9442 PL_bufptr = s;
17cc9359 9443 CopLINE_inc(PL_curcop);
f0e67a1d 9444 if (!outer || !lex_next_chunk(0)) {
eb160463 9445 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 9446 missingterm(PL_tokenbuf);
79072805 9447 }
17cc9359 9448 CopLINE_dec(PL_curcop);
f0e67a1d 9449 s = PL_bufptr;
5db06880
NC
9450#ifdef PERL_MAD
9451 stuffstart = s - SvPVX(PL_linestr);
9452#endif
57843af0 9453 CopLINE_inc(PL_curcop);
3280af22 9454 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 9455 PL_last_lop = PL_last_uni = NULL;
6a27c188 9456#ifndef PERL_STRICT_CR
3280af22 9457 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
9458 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
9459 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 9460 {
3280af22
NIS
9461 PL_bufend[-2] = '\n';
9462 PL_bufend--;
95a20fc0 9463 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 9464 }
3280af22
NIS
9465 else if (PL_bufend[-1] == '\r')
9466 PL_bufend[-1] = '\n';
f63a84b2 9467 }
3280af22
NIS
9468 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
9469 PL_bufend[-1] = '\n';
f63a84b2 9470#endif
3280af22 9471 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 9472 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 9473 *(SvPVX(PL_linestr) + off ) = ' ';
37c6a70c 9474 lex_grow_linestr(SvCUR(PL_linestr) + SvCUR(herewas) + 1);
3280af22
NIS
9475 sv_catsv(PL_linestr,herewas);
9476 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 9477 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
9478 }
9479 else {
3280af22
NIS
9480 s = PL_bufend;
9481 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
9482 }
9483 }
79072805 9484 s++;
0244c3a4 9485retval:
57843af0 9486 PL_multi_end = CopLINE(PL_curcop);
79072805 9487 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 9488 SvPV_shrink_to_cur(tmpstr);
79072805 9489 }
8990e307 9490 SvREFCNT_dec(herewas);
2f31ce75 9491 if (!IN_BYTES) {
95a20fc0 9492 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
9493 SvUTF8_on(tmpstr);
9494 else if (PL_encoding)
9495 sv_recode_to_utf8(tmpstr, PL_encoding);
9496 }
3280af22 9497 PL_lex_stuff = tmpstr;
6154021b 9498 pl_yylval.ival = op_type;
79072805
LW
9499 return s;
9500}
9501
02aa26ce
NT
9502/* scan_inputsymbol
9503 takes: current position in input buffer
9504 returns: new position in input buffer
6154021b 9505 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
9506
9507 This code handles:
9508
9509 <> read from ARGV
9510 <FH> read from filehandle
9511 <pkg::FH> read from package qualified filehandle
9512 <pkg'FH> read from package qualified filehandle
9513 <$fh> read from filehandle in $fh
9514 <*.h> filename glob
9515
9516*/
9517
76e3520e 9518STATIC char *
cea2e8a9 9519S_scan_inputsymbol(pTHX_ char *start)
79072805 9520{
97aff369 9521 dVAR;
02aa26ce 9522 register char *s = start; /* current position in buffer */
1b420867 9523 char *end;
79072805 9524 I32 len;
6136c704
AL
9525 char *d = PL_tokenbuf; /* start of temp holding space */
9526 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
9527
7918f24d
NC
9528 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
9529
1b420867
GS
9530 end = strchr(s, '\n');
9531 if (!end)
9532 end = PL_bufend;
9533 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
9534
9535 /* die if we didn't have space for the contents of the <>,
1b420867 9536 or if it didn't end, or if we see a newline
02aa26ce
NT
9537 */
9538
bb7a0f54 9539 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 9540 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 9541 if (s >= end)
cea2e8a9 9542 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 9543
fc36a67e 9544 s++;
02aa26ce
NT
9545
9546 /* check for <$fh>
9547 Remember, only scalar variables are interpreted as filehandles by
9548 this code. Anything more complex (e.g., <$fh{$num}>) will be
9549 treated as a glob() call.
9550 This code makes use of the fact that except for the $ at the front,
9551 a scalar variable and a filehandle look the same.
9552 */
4633a7c4 9553 if (*d == '$' && d[1]) d++;
02aa26ce
NT
9554
9555 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 9556 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
2a507800 9557 d += UTF ? UTF8SKIP(d) : 1;
02aa26ce
NT
9558
9559 /* If we've tried to read what we allow filehandles to look like, and
9560 there's still text left, then it must be a glob() and not a getline.
9561 Use scan_str to pull out the stuff between the <> and treat it
9562 as nothing more than a string.
9563 */
9564
3280af22 9565 if (d - PL_tokenbuf != len) {
6154021b 9566 pl_yylval.ival = OP_GLOB;
5db06880 9567 s = scan_str(start,!!PL_madskills,FALSE);
79072805 9568 if (!s)
cea2e8a9 9569 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
9570 return s;
9571 }
395c3793 9572 else {
9b3023bc 9573 bool readline_overriden = FALSE;
6136c704 9574 GV *gv_readline;
9b3023bc 9575 GV **gvp;
02aa26ce 9576 /* we're in a filehandle read situation */
3280af22 9577 d = PL_tokenbuf;
02aa26ce
NT
9578
9579 /* turn <> into <ARGV> */
79072805 9580 if (!len)
689badd5 9581 Copy("ARGV",d,5,char);
02aa26ce 9582
9b3023bc 9583 /* Check whether readline() is overriden */
fafc274c 9584 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 9585 if ((gv_readline
ba979b31 9586 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 9587 ||
017a3ce5 9588 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 9589 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 9590 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
9591 readline_overriden = TRUE;
9592
02aa26ce
NT
9593 /* if <$fh>, create the ops to turn the variable into a
9594 filehandle
9595 */
79072805 9596 if (*d == '$') {
02aa26ce
NT
9597 /* try to find it in the pad for this block, otherwise find
9598 add symbol table ops
9599 */
bc9b26ca 9600 const PADOFFSET tmp = pad_findmy_pvn(d, len, UTF ? SVf_UTF8 : 0);
bbd11bfc 9601 if (tmp != NOT_IN_PAD) {
00b1698f 9602 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
9603 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
9604 HEK * const stashname = HvNAME_HEK(stash);
9605 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 9606 sv_catpvs(sym, "::");
f558d5af
JH
9607 sv_catpv(sym, d+1);
9608 d = SvPVX(sym);
9609 goto intro_sym;
9610 }
9611 else {
6136c704 9612 OP * const o = newOP(OP_PADSV, 0);
f558d5af 9613 o->op_targ = tmp;
9b3023bc
RGS
9614 PL_lex_op = readline_overriden
9615 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9616 op_append_elem(OP_LIST, o,
9b3023bc
RGS
9617 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
9618 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 9619 }
a0d0e21e
LW
9620 }
9621 else {
f558d5af
JH
9622 GV *gv;
9623 ++d;
9624intro_sym:
9625 gv = gv_fetchpv(d,
9626 (PL_in_eval
9627 ? (GV_ADDMULTI | GV_ADDINEVAL)
25db2ea6 9628 : GV_ADDMULTI) | ( UTF ? SVf_UTF8 : 0 ),
f558d5af 9629 SVt_PV);
9b3023bc
RGS
9630 PL_lex_op = readline_overriden
9631 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9632 op_append_elem(OP_LIST,
9b3023bc
RGS
9633 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
9634 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9635 : (OP*)newUNOP(OP_READLINE, 0,
9636 newUNOP(OP_RV2SV, 0,
9637 newGVOP(OP_GV, 0, gv)));
a0d0e21e 9638 }
7c6fadd6
RGS
9639 if (!readline_overriden)
9640 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
9641 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
9642 pl_yylval.ival = OP_NULL;
79072805 9643 }
02aa26ce
NT
9644
9645 /* If it's none of the above, it must be a literal filehandle
9646 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 9647 else {
25db2ea6 9648 GV * const gv = gv_fetchpv(d, GV_ADD | ( UTF ? SVf_UTF8 : 0 ), SVt_PVIO);
9b3023bc
RGS
9649 PL_lex_op = readline_overriden
9650 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 9651 op_append_elem(OP_LIST,
9b3023bc
RGS
9652 newGVOP(OP_GV, 0, gv),
9653 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
9654 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 9655 pl_yylval.ival = OP_NULL;
79072805
LW
9656 }
9657 }
02aa26ce 9658
79072805
LW
9659 return s;
9660}
9661
02aa26ce
NT
9662
9663/* scan_str
9664 takes: start position in buffer
09bef843
SB
9665 keep_quoted preserve \ on the embedded delimiter(s)
9666 keep_delims preserve the delimiters around the string
02aa26ce
NT
9667 returns: position to continue reading from buffer
9668 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
9669 updates the read buffer.
9670
9671 This subroutine pulls a string out of the input. It is called for:
9672 q single quotes q(literal text)
9673 ' single quotes 'literal text'
9674 qq double quotes qq(interpolate $here please)
9675 " double quotes "interpolate $here please"
9676 qx backticks qx(/bin/ls -l)
9677 ` backticks `/bin/ls -l`
9678 qw quote words @EXPORT_OK = qw( func() $spam )
9679 m// regexp match m/this/
9680 s/// regexp substitute s/this/that/
9681 tr/// string transliterate tr/this/that/
9682 y/// string transliterate y/this/that/
9683 ($*@) sub prototypes sub foo ($)
09bef843 9684 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
9685 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
9686
9687 In most of these cases (all but <>, patterns and transliterate)
9688 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
9689 calls scan_str(). s/// makes yylex() call scan_subst() which calls
9690 scan_str(). tr/// and y/// make yylex() call scan_trans() which
9691 calls scan_str().
4e553d73 9692
02aa26ce
NT
9693 It skips whitespace before the string starts, and treats the first
9694 character as the delimiter. If the delimiter is one of ([{< then
9695 the corresponding "close" character )]}> is used as the closing
9696 delimiter. It allows quoting of delimiters, and if the string has
9697 balanced delimiters ([{<>}]) it allows nesting.
9698
37fd879b
HS
9699 On success, the SV with the resulting string is put into lex_stuff or,
9700 if that is already non-NULL, into lex_repl. The second case occurs only
9701 when parsing the RHS of the special constructs s/// and tr/// (y///).
9702 For convenience, the terminating delimiter character is stuffed into
9703 SvIVX of the SV.
02aa26ce
NT
9704*/
9705
76e3520e 9706STATIC char *
09bef843 9707S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 9708{
97aff369 9709 dVAR;
02aa26ce 9710 SV *sv; /* scalar value: string */
d3fcec1f 9711 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
9712 register char *s = start; /* current position in the buffer */
9713 register char term; /* terminating character */
9714 register char *to; /* current position in the sv's data */
9715 I32 brackets = 1; /* bracket nesting level */
89491803 9716 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 9717 I32 termcode; /* terminating char. code */
89ebb4a3 9718 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 9719 STRLEN termlen; /* length of terminating string */
0331ef07 9720 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
9721#ifdef PERL_MAD
9722 int stuffstart;
9723 char *tstart;
9724#endif
02aa26ce 9725
7918f24d
NC
9726 PERL_ARGS_ASSERT_SCAN_STR;
9727
02aa26ce 9728 /* skip space before the delimiter */
29595ff2
NC
9729 if (isSPACE(*s)) {
9730 s = PEEKSPACE(s);
9731 }
02aa26ce 9732
5db06880 9733#ifdef PERL_MAD
cd81e915
NC
9734 if (PL_realtokenstart >= 0) {
9735 stuffstart = PL_realtokenstart;
9736 PL_realtokenstart = -1;
5db06880
NC
9737 }
9738 else
9739 stuffstart = start - SvPVX(PL_linestr);
9740#endif
02aa26ce 9741 /* mark where we are, in case we need to report errors */
79072805 9742 CLINE;
02aa26ce
NT
9743
9744 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 9745 term = *s;
220e2d4e
IH
9746 if (!UTF) {
9747 termcode = termstr[0] = term;
9748 termlen = 1;
9749 }
9750 else {
f3b9ce0f 9751 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
9752 Copy(s, termstr, termlen, U8);
9753 if (!UTF8_IS_INVARIANT(term))
9754 has_utf8 = TRUE;
9755 }
b1c7b182 9756
02aa26ce 9757 /* mark where we are */
57843af0 9758 PL_multi_start = CopLINE(PL_curcop);
3280af22 9759 PL_multi_open = term;
02aa26ce
NT
9760
9761 /* find corresponding closing delimiter */
93a17b20 9762 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
9763 termcode = termstr[0] = term = tmps[5];
9764
3280af22 9765 PL_multi_close = term;
79072805 9766
561b68a9
SH
9767 /* create a new SV to hold the contents. 79 is the SV's initial length.
9768 What a random number. */
7d0a29fe
NC
9769 sv = newSV_type(SVt_PVIV);
9770 SvGROW(sv, 80);
45977657 9771 SvIV_set(sv, termcode);
a0d0e21e 9772 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
9773
9774 /* move past delimiter and try to read a complete string */
09bef843 9775 if (keep_delims)
220e2d4e
IH
9776 sv_catpvn(sv, s, termlen);
9777 s += termlen;
5db06880
NC
9778#ifdef PERL_MAD
9779 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9780 if (!PL_thisopen && !keep_delims) {
9781 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
9782 stuffstart = s - SvPVX(PL_linestr);
9783 }
9784#endif
93a17b20 9785 for (;;) {
220e2d4e
IH
9786 if (PL_encoding && !UTF) {
9787 bool cont = TRUE;
9788
9789 while (cont) {
95a20fc0 9790 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 9791 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 9792 &offset, (char*)termstr, termlen);
6136c704
AL
9793 const char * const ns = SvPVX_const(PL_linestr) + offset;
9794 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
9795
9796 for (; s < ns; s++) {
9797 if (*s == '\n' && !PL_rsfp)
9798 CopLINE_inc(PL_curcop);
9799 }
9800 if (!found)
9801 goto read_more_line;
9802 else {
9803 /* handle quoted delimiters */
52327caf 9804 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 9805 const char *t;
95a20fc0 9806 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
9807 t--;
9808 if ((svlast-1 - t) % 2) {
9809 if (!keep_quoted) {
9810 *(svlast-1) = term;
9811 *svlast = '\0';
9812 SvCUR_set(sv, SvCUR(sv) - 1);
9813 }
9814 continue;
9815 }
9816 }
9817 if (PL_multi_open == PL_multi_close) {
9818 cont = FALSE;
9819 }
9820 else {
f54cb97a
AL
9821 const char *t;
9822 char *w;
0331ef07 9823 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
9824 /* At here, all closes are "was quoted" one,
9825 so we don't check PL_multi_close. */
9826 if (*t == '\\') {
9827 if (!keep_quoted && *(t+1) == PL_multi_open)
9828 t++;
9829 else
9830 *w++ = *t++;
9831 }
9832 else if (*t == PL_multi_open)
9833 brackets++;
9834
9835 *w = *t;
9836 }
9837 if (w < t) {
9838 *w++ = term;
9839 *w = '\0';
95a20fc0 9840 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 9841 }
0331ef07 9842 last_off = w - SvPVX(sv);
220e2d4e
IH
9843 if (--brackets <= 0)
9844 cont = FALSE;
9845 }
9846 }
9847 }
9848 if (!keep_delims) {
9849 SvCUR_set(sv, SvCUR(sv) - 1);
9850 *SvEND(sv) = '\0';
9851 }
9852 break;
9853 }
9854
02aa26ce 9855 /* extend sv if need be */
3280af22 9856 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 9857 /* set 'to' to the next character in the sv's string */
463ee0b2 9858 to = SvPVX(sv)+SvCUR(sv);
09bef843 9859
02aa26ce 9860 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
9861 if (PL_multi_open == PL_multi_close) {
9862 for (; s < PL_bufend; s++,to++) {
02aa26ce 9863 /* embedded newlines increment the current line number */
3280af22 9864 if (*s == '\n' && !PL_rsfp)
57843af0 9865 CopLINE_inc(PL_curcop);
02aa26ce 9866 /* handle quoted delimiters */
3280af22 9867 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 9868 if (!keep_quoted && s[1] == term)
a0d0e21e 9869 s++;
02aa26ce 9870 /* any other quotes are simply copied straight through */
a0d0e21e
LW
9871 else
9872 *to++ = *s++;
9873 }
02aa26ce
NT
9874 /* terminate when run out of buffer (the for() condition), or
9875 have found the terminator */
220e2d4e
IH
9876 else if (*s == term) {
9877 if (termlen == 1)
9878 break;
f3b9ce0f 9879 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
9880 break;
9881 }
63cd0674 9882 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9883 has_utf8 = TRUE;
93a17b20
LW
9884 *to = *s;
9885 }
9886 }
02aa26ce
NT
9887
9888 /* if the terminator isn't the same as the start character (e.g.,
9889 matched brackets), we have to allow more in the quoting, and
9890 be prepared for nested brackets.
9891 */
93a17b20 9892 else {
02aa26ce 9893 /* read until we run out of string, or we find the terminator */
3280af22 9894 for (; s < PL_bufend; s++,to++) {
02aa26ce 9895 /* embedded newlines increment the line count */
3280af22 9896 if (*s == '\n' && !PL_rsfp)
57843af0 9897 CopLINE_inc(PL_curcop);
02aa26ce 9898 /* backslashes can escape the open or closing characters */
3280af22 9899 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
9900 if (!keep_quoted &&
9901 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
9902 s++;
9903 else
9904 *to++ = *s++;
9905 }
02aa26ce 9906 /* allow nested opens and closes */
3280af22 9907 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 9908 break;
3280af22 9909 else if (*s == PL_multi_open)
93a17b20 9910 brackets++;
63cd0674 9911 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 9912 has_utf8 = TRUE;
93a17b20
LW
9913 *to = *s;
9914 }
9915 }
02aa26ce 9916 /* terminate the copied string and update the sv's end-of-string */
93a17b20 9917 *to = '\0';
95a20fc0 9918 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 9919
02aa26ce
NT
9920 /*
9921 * this next chunk reads more into the buffer if we're not done yet
9922 */
9923
b1c7b182
GS
9924 if (s < PL_bufend)
9925 break; /* handle case where we are done yet :-) */
79072805 9926
6a27c188 9927#ifndef PERL_STRICT_CR
95a20fc0 9928 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
9929 if ((to[-2] == '\r' && to[-1] == '\n') ||
9930 (to[-2] == '\n' && to[-1] == '\r'))
9931 {
f63a84b2
LW
9932 to[-2] = '\n';
9933 to--;
95a20fc0 9934 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
9935 }
9936 else if (to[-1] == '\r')
9937 to[-1] = '\n';
9938 }
95a20fc0 9939 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
9940 to[-1] = '\n';
9941#endif
9942
220e2d4e 9943 read_more_line:
02aa26ce
NT
9944 /* if we're out of file, or a read fails, bail and reset the current
9945 line marker so we can report where the unterminated string began
9946 */
5db06880
NC
9947#ifdef PERL_MAD
9948 if (PL_madskills) {
c35e046a 9949 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
9950 if (PL_thisstuff)
9951 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 9952 else
cd81e915 9953 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
9954 }
9955#endif
f0e67a1d
Z
9956 CopLINE_inc(PL_curcop);
9957 PL_bufptr = PL_bufend;
9958 if (!lex_next_chunk(0)) {
c07a80fd 9959 sv_free(sv);
eb160463 9960 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 9961 return NULL;
79072805 9962 }
f0e67a1d 9963 s = PL_bufptr;
5db06880
NC
9964#ifdef PERL_MAD
9965 stuffstart = 0;
9966#endif
378cc40b 9967 }
4e553d73 9968
02aa26ce
NT
9969 /* at this point, we have successfully read the delimited string */
9970
220e2d4e 9971 if (!PL_encoding || UTF) {
5db06880
NC
9972#ifdef PERL_MAD
9973 if (PL_madskills) {
c35e046a 9974 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 9975 const int len = s - tstart;
cd81e915 9976 if (PL_thisstuff)
c35e046a 9977 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9978 else
c35e046a 9979 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9980 if (!PL_thisclose && !keep_delims)
9981 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
9982 }
9983#endif
9984
220e2d4e
IH
9985 if (keep_delims)
9986 sv_catpvn(sv, s, termlen);
9987 s += termlen;
9988 }
5db06880
NC
9989#ifdef PERL_MAD
9990 else {
9991 if (PL_madskills) {
c35e046a
AL
9992 char * const tstart = SvPVX(PL_linestr) + stuffstart;
9993 const int len = s - tstart - termlen;
cd81e915 9994 if (PL_thisstuff)
c35e046a 9995 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 9996 else
c35e046a 9997 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
9998 if (!PL_thisclose && !keep_delims)
9999 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
10000 }
10001 }
10002#endif
220e2d4e 10003 if (has_utf8 || PL_encoding)
b1c7b182 10004 SvUTF8_on(sv);
d0063567 10005
57843af0 10006 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
10007
10008 /* if we allocated too much space, give some back */
93a17b20
LW
10009 if (SvCUR(sv) + 5 < SvLEN(sv)) {
10010 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 10011 SvPV_renew(sv, SvLEN(sv));
79072805 10012 }
02aa26ce
NT
10013
10014 /* decide whether this is the first or second quoted string we've read
10015 for this op
10016 */
4e553d73 10017
3280af22
NIS
10018 if (PL_lex_stuff)
10019 PL_lex_repl = sv;
79072805 10020 else
3280af22 10021 PL_lex_stuff = sv;
378cc40b
LW
10022 return s;
10023}
10024
02aa26ce
NT
10025/*
10026 scan_num
10027 takes: pointer to position in buffer
10028 returns: pointer to new position in buffer
6154021b 10029 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
10030
10031 Read a number in any of the formats that Perl accepts:
10032
7fd134d9
JH
10033 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
10034 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
10035 0b[01](_?[01])*
10036 0[0-7](_?[0-7])*
10037 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 10038
3280af22 10039 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
10040 thing it reads.
10041
10042 If it reads a number without a decimal point or an exponent, it will
10043 try converting the number to an integer and see if it can do so
10044 without loss of precision.
10045*/
4e553d73 10046
378cc40b 10047char *
bfed75c6 10048Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 10049{
97aff369 10050 dVAR;
bfed75c6 10051 register const char *s = start; /* current position in buffer */
02aa26ce
NT
10052 register char *d; /* destination in temp buffer */
10053 register char *e; /* end of temp buffer */
86554af2 10054 NV nv; /* number read, as a double */
a0714e2c 10055 SV *sv = NULL; /* place to put the converted number */
a86a20aa 10056 bool floatit; /* boolean: int or float? */
cbbf8932 10057 const char *lastub = NULL; /* position of last underbar */
bfed75c6 10058 static char const number_too_long[] = "Number too long";
378cc40b 10059
7918f24d
NC
10060 PERL_ARGS_ASSERT_SCAN_NUM;
10061
02aa26ce
NT
10062 /* We use the first character to decide what type of number this is */
10063
378cc40b 10064 switch (*s) {
79072805 10065 default:
cea2e8a9 10066 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 10067
02aa26ce 10068 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 10069 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
10070 case '0':
10071 {
02aa26ce
NT
10072 /* variables:
10073 u holds the "number so far"
4f19785b
WSI
10074 shift the power of 2 of the base
10075 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
10076 overflowed was the number more than we can hold?
10077
10078 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
10079 we in octal/hex/binary?" indicator to disallow hex characters
10080 when in octal mode.
02aa26ce 10081 */
9e24b6e2
JH
10082 NV n = 0.0;
10083 UV u = 0;
79072805 10084 I32 shift;
9e24b6e2 10085 bool overflowed = FALSE;
61f33854 10086 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
10087 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
10088 static const char* const bases[5] =
10089 { "", "binary", "", "octal", "hexadecimal" };
10090 static const char* const Bases[5] =
10091 { "", "Binary", "", "Octal", "Hexadecimal" };
10092 static const char* const maxima[5] =
10093 { "",
10094 "0b11111111111111111111111111111111",
10095 "",
10096 "037777777777",
10097 "0xffffffff" };
bfed75c6 10098 const char *base, *Base, *max;
378cc40b 10099
02aa26ce 10100 /* check for hex */
a674e8db 10101 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
10102 shift = 4;
10103 s += 2;
61f33854 10104 just_zero = FALSE;
a674e8db 10105 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
10106 shift = 1;
10107 s += 2;
61f33854 10108 just_zero = FALSE;
378cc40b 10109 }
02aa26ce 10110 /* check for a decimal in disguise */
b78218b7 10111 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 10112 goto decimal;
02aa26ce 10113 /* so it must be octal */
928753ea 10114 else {
378cc40b 10115 shift = 3;
928753ea
JH
10116 s++;
10117 }
10118
10119 if (*s == '_') {
a2a5de95 10120 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
10121 "Misplaced _ in number");
10122 lastub = s++;
10123 }
9e24b6e2
JH
10124
10125 base = bases[shift];
10126 Base = Bases[shift];
10127 max = maxima[shift];
02aa26ce 10128
4f19785b 10129 /* read the rest of the number */
378cc40b 10130 for (;;) {
9e24b6e2 10131 /* x is used in the overflow test,
893fe2c2 10132 b is the digit we're adding on. */
9e24b6e2 10133 UV x, b;
55497cff 10134
378cc40b 10135 switch (*s) {
02aa26ce
NT
10136
10137 /* if we don't mention it, we're done */
378cc40b
LW
10138 default:
10139 goto out;
02aa26ce 10140
928753ea 10141 /* _ are ignored -- but warned about if consecutive */
de3bb511 10142 case '_':
a2a5de95
NC
10143 if (lastub && s == lastub + 1)
10144 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10145 "Misplaced _ in number");
928753ea 10146 lastub = s++;
de3bb511 10147 break;
02aa26ce
NT
10148
10149 /* 8 and 9 are not octal */
378cc40b 10150 case '8': case '9':
4f19785b 10151 if (shift == 3)
cea2e8a9 10152 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 10153 /* FALL THROUGH */
02aa26ce
NT
10154
10155 /* octal digits */
4f19785b 10156 case '2': case '3': case '4':
378cc40b 10157 case '5': case '6': case '7':
4f19785b 10158 if (shift == 1)
cea2e8a9 10159 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
10160 /* FALL THROUGH */
10161
10162 case '0': case '1':
02aa26ce 10163 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 10164 goto digit;
02aa26ce
NT
10165
10166 /* hex digits */
378cc40b
LW
10167 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
10168 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 10169 /* make sure they said 0x */
378cc40b
LW
10170 if (shift != 4)
10171 goto out;
55497cff 10172 b = (*s++ & 7) + 9;
02aa26ce
NT
10173
10174 /* Prepare to put the digit we have onto the end
10175 of the number so far. We check for overflows.
10176 */
10177
55497cff 10178 digit:
61f33854 10179 just_zero = FALSE;
9e24b6e2
JH
10180 if (!overflowed) {
10181 x = u << shift; /* make room for the digit */
10182
10183 if ((x >> shift) != u
10184 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
10185 overflowed = TRUE;
10186 n = (NV) u;
9b387841
NC
10187 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10188 "Integer overflow in %s number",
10189 base);
9e24b6e2
JH
10190 } else
10191 u = x | b; /* add the digit to the end */
10192 }
10193 if (overflowed) {
10194 n *= nvshift[shift];
10195 /* If an NV has not enough bits in its
10196 * mantissa to represent an UV this summing of
10197 * small low-order numbers is a waste of time
10198 * (because the NV cannot preserve the
10199 * low-order bits anyway): we could just
10200 * remember when did we overflow and in the
10201 * end just multiply n by the right
10202 * amount. */
10203 n += (NV) b;
55497cff 10204 }
378cc40b
LW
10205 break;
10206 }
10207 }
02aa26ce
NT
10208
10209 /* if we get here, we had success: make a scalar value from
10210 the number.
10211 */
378cc40b 10212 out:
928753ea
JH
10213
10214 /* final misplaced underbar check */
10215 if (s[-1] == '_') {
a2a5de95 10216 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
10217 }
10218
9e24b6e2 10219 if (overflowed) {
a2a5de95
NC
10220 if (n > 4294967295.0)
10221 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10222 "%s number > %s non-portable",
10223 Base, max);
b081dd7e 10224 sv = newSVnv(n);
9e24b6e2
JH
10225 }
10226 else {
15041a67 10227#if UVSIZE > 4
a2a5de95
NC
10228 if (u > 0xffffffff)
10229 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
10230 "%s number > %s non-portable",
10231 Base, max);
2cc4c2dc 10232#endif
b081dd7e 10233 sv = newSVuv(u);
9e24b6e2 10234 }
61f33854 10235 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 10236 sv = new_constant(start, s - start, "integer",
eb0d8d16 10237 sv, NULL, NULL, 0);
61f33854 10238 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 10239 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
10240 }
10241 break;
02aa26ce
NT
10242
10243 /*
10244 handle decimal numbers.
10245 we're also sent here when we read a 0 as the first digit
10246 */
378cc40b
LW
10247 case '1': case '2': case '3': case '4': case '5':
10248 case '6': case '7': case '8': case '9': case '.':
10249 decimal:
3280af22
NIS
10250 d = PL_tokenbuf;
10251 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 10252 floatit = FALSE;
02aa26ce
NT
10253
10254 /* read next group of digits and _ and copy into d */
de3bb511 10255 while (isDIGIT(*s) || *s == '_') {
4e553d73 10256 /* skip underscores, checking for misplaced ones
02aa26ce
NT
10257 if -w is on
10258 */
93a17b20 10259 if (*s == '_') {
a2a5de95
NC
10260 if (lastub && s == lastub + 1)
10261 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10262 "Misplaced _ in number");
928753ea 10263 lastub = s++;
93a17b20 10264 }
fc36a67e 10265 else {
02aa26ce 10266 /* check for end of fixed-length buffer */
fc36a67e 10267 if (d >= e)
cea2e8a9 10268 Perl_croak(aTHX_ number_too_long);
02aa26ce 10269 /* if we're ok, copy the character */
378cc40b 10270 *d++ = *s++;
fc36a67e 10271 }
378cc40b 10272 }
02aa26ce
NT
10273
10274 /* final misplaced underbar check */
928753ea 10275 if (lastub && s == lastub + 1) {
a2a5de95 10276 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 10277 }
02aa26ce
NT
10278
10279 /* read a decimal portion if there is one. avoid
10280 3..5 being interpreted as the number 3. followed
10281 by .5
10282 */
2f3197b3 10283 if (*s == '.' && s[1] != '.') {
79072805 10284 floatit = TRUE;
378cc40b 10285 *d++ = *s++;
02aa26ce 10286
928753ea 10287 if (*s == '_') {
a2a5de95
NC
10288 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10289 "Misplaced _ in number");
928753ea
JH
10290 lastub = s;
10291 }
10292
10293 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 10294 */
fc36a67e 10295 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 10296 /* fixed length buffer check */
fc36a67e 10297 if (d >= e)
cea2e8a9 10298 Perl_croak(aTHX_ number_too_long);
928753ea 10299 if (*s == '_') {
a2a5de95
NC
10300 if (lastub && s == lastub + 1)
10301 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10302 "Misplaced _ in number");
928753ea
JH
10303 lastub = s;
10304 }
10305 else
fc36a67e 10306 *d++ = *s;
378cc40b 10307 }
928753ea
JH
10308 /* fractional part ending in underbar? */
10309 if (s[-1] == '_') {
a2a5de95
NC
10310 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10311 "Misplaced _ in number");
928753ea 10312 }
dd629d5b
GS
10313 if (*s == '.' && isDIGIT(s[1])) {
10314 /* oops, it's really a v-string, but without the "v" */
f4758303 10315 s = start;
dd629d5b
GS
10316 goto vstring;
10317 }
378cc40b 10318 }
02aa26ce
NT
10319
10320 /* read exponent part, if present */
3792a11b 10321 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
10322 floatit = TRUE;
10323 s++;
02aa26ce
NT
10324
10325 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 10326 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 10327
7fd134d9
JH
10328 /* stray preinitial _ */
10329 if (*s == '_') {
a2a5de95
NC
10330 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10331 "Misplaced _ in number");
7fd134d9
JH
10332 lastub = s++;
10333 }
10334
02aa26ce 10335 /* allow positive or negative exponent */
378cc40b
LW
10336 if (*s == '+' || *s == '-')
10337 *d++ = *s++;
02aa26ce 10338
7fd134d9
JH
10339 /* stray initial _ */
10340 if (*s == '_') {
a2a5de95
NC
10341 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10342 "Misplaced _ in number");
7fd134d9
JH
10343 lastub = s++;
10344 }
10345
7fd134d9
JH
10346 /* read digits of exponent */
10347 while (isDIGIT(*s) || *s == '_') {
10348 if (isDIGIT(*s)) {
10349 if (d >= e)
10350 Perl_croak(aTHX_ number_too_long);
b3b48e3e 10351 *d++ = *s++;
7fd134d9
JH
10352 }
10353 else {
041457d9 10354 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
10355 (!isDIGIT(s[1]) && s[1] != '_')))
10356 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
10357 "Misplaced _ in number");
b3b48e3e 10358 lastub = s++;
7fd134d9 10359 }
7fd134d9 10360 }
378cc40b 10361 }
02aa26ce 10362
02aa26ce 10363
0b7fceb9 10364 /*
58bb9ec3
NC
10365 We try to do an integer conversion first if no characters
10366 indicating "float" have been found.
0b7fceb9
MU
10367 */
10368
10369 if (!floatit) {
58bb9ec3 10370 UV uv;
6136c704 10371 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
10372
10373 if (flags == IS_NUMBER_IN_UV) {
10374 if (uv <= IV_MAX)
b081dd7e 10375 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 10376 else
b081dd7e 10377 sv = newSVuv(uv);
58bb9ec3
NC
10378 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
10379 if (uv <= (UV) IV_MIN)
b081dd7e 10380 sv = newSViv(-(IV)uv);
58bb9ec3
NC
10381 else
10382 floatit = TRUE;
10383 } else
10384 floatit = TRUE;
10385 }
0b7fceb9 10386 if (floatit) {
58bb9ec3
NC
10387 /* terminate the string */
10388 *d = '\0';
86554af2 10389 nv = Atof(PL_tokenbuf);
b081dd7e 10390 sv = newSVnv(nv);
86554af2 10391 }
86554af2 10392
eb0d8d16
NC
10393 if ( floatit
10394 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
10395 const char *const key = floatit ? "float" : "integer";
10396 const STRLEN keylen = floatit ? 5 : 7;
10397 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
10398 key, keylen, sv, NULL, NULL, 0);
10399 }
378cc40b 10400 break;
0b7fceb9 10401
e312add1 10402 /* if it starts with a v, it could be a v-string */
a7cb1f99 10403 case 'v':
dd629d5b 10404vstring:
561b68a9 10405 sv = newSV(5); /* preallocate storage space */
65b06e02 10406 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 10407 break;
79072805 10408 }
a687059c 10409
02aa26ce
NT
10410 /* make the op for the constant and return */
10411
a86a20aa 10412 if (sv)
b73d6f50 10413 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 10414 else
5f66b61c 10415 lvalp->opval = NULL;
a687059c 10416
73d840c0 10417 return (char *)s;
378cc40b
LW
10418}
10419
76e3520e 10420STATIC char *
cea2e8a9 10421S_scan_formline(pTHX_ register char *s)
378cc40b 10422{
97aff369 10423 dVAR;
79072805 10424 register char *eol;
378cc40b 10425 register char *t;
6136c704 10426 SV * const stuff = newSVpvs("");
79072805 10427 bool needargs = FALSE;
c5ee2135 10428 bool eofmt = FALSE;
5db06880
NC
10429#ifdef PERL_MAD
10430 char *tokenstart = s;
4f61fd4b
JC
10431 SV* savewhite = NULL;
10432
5db06880 10433 if (PL_madskills) {
cd81e915
NC
10434 savewhite = PL_thiswhite;
10435 PL_thiswhite = 0;
5db06880
NC
10436 }
10437#endif
378cc40b 10438
7918f24d
NC
10439 PERL_ARGS_ASSERT_SCAN_FORMLINE;
10440
79072805 10441 while (!needargs) {
a1b95068 10442 if (*s == '.') {
c35e046a 10443 t = s+1;
51882d45 10444#ifdef PERL_STRICT_CR
c35e046a
AL
10445 while (SPACE_OR_TAB(*t))
10446 t++;
51882d45 10447#else
c35e046a
AL
10448 while (SPACE_OR_TAB(*t) || *t == '\r')
10449 t++;
51882d45 10450#endif
c5ee2135
WL
10451 if (*t == '\n' || t == PL_bufend) {
10452 eofmt = TRUE;
79072805 10453 break;
c5ee2135 10454 }
79072805 10455 }
3280af22 10456 if (PL_in_eval && !PL_rsfp) {
07409e01 10457 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 10458 if (!eol++)
3280af22 10459 eol = PL_bufend;
0f85fab0
LW
10460 }
10461 else
3280af22 10462 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 10463 if (*s != '#') {
a0d0e21e
LW
10464 for (t = s; t < eol; t++) {
10465 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
10466 needargs = FALSE;
10467 goto enough; /* ~~ must be first line in formline */
378cc40b 10468 }
a0d0e21e
LW
10469 if (*t == '@' || *t == '^')
10470 needargs = TRUE;
378cc40b 10471 }
7121b347
MG
10472 if (eol > s) {
10473 sv_catpvn(stuff, s, eol-s);
2dc4c65b 10474#ifndef PERL_STRICT_CR
7121b347
MG
10475 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
10476 char *end = SvPVX(stuff) + SvCUR(stuff);
10477 end[-2] = '\n';
10478 end[-1] = '\0';
b162af07 10479 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 10480 }
2dc4c65b 10481#endif
7121b347
MG
10482 }
10483 else
10484 break;
79072805 10485 }
95a20fc0 10486 s = (char*)eol;
3280af22 10487 if (PL_rsfp) {
f0e67a1d 10488 bool got_some;
5db06880
NC
10489#ifdef PERL_MAD
10490 if (PL_madskills) {
cd81e915
NC
10491 if (PL_thistoken)
10492 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 10493 else
cd81e915 10494 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
10495 }
10496#endif
f0e67a1d
Z
10497 PL_bufptr = PL_bufend;
10498 CopLINE_inc(PL_curcop);
10499 got_some = lex_next_chunk(0);
10500 CopLINE_dec(PL_curcop);
10501 s = PL_bufptr;
5db06880 10502#ifdef PERL_MAD
f0e67a1d 10503 tokenstart = PL_bufptr;
5db06880 10504#endif
f0e67a1d 10505 if (!got_some)
378cc40b 10506 break;
378cc40b 10507 }
463ee0b2 10508 incline(s);
79072805 10509 }
a0d0e21e
LW
10510 enough:
10511 if (SvCUR(stuff)) {
3280af22 10512 PL_expect = XTERM;
79072805 10513 if (needargs) {
3280af22 10514 PL_lex_state = LEX_NORMAL;
cd81e915 10515 start_force(PL_curforce);
9ded7720 10516 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
10517 force_next(',');
10518 }
a0d0e21e 10519 else
3280af22 10520 PL_lex_state = LEX_FORMLINE;
1bd51a4c 10521 if (!IN_BYTES) {
95a20fc0 10522 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
10523 SvUTF8_on(stuff);
10524 else if (PL_encoding)
10525 sv_recode_to_utf8(stuff, PL_encoding);
10526 }
cd81e915 10527 start_force(PL_curforce);
9ded7720 10528 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 10529 force_next(THING);
cd81e915 10530 start_force(PL_curforce);
9ded7720 10531 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 10532 force_next(LSTOP);
378cc40b 10533 }
79072805 10534 else {
8990e307 10535 SvREFCNT_dec(stuff);
c5ee2135
WL
10536 if (eofmt)
10537 PL_lex_formbrack = 0;
3280af22 10538 PL_bufptr = s;
79072805 10539 }
5db06880
NC
10540#ifdef PERL_MAD
10541 if (PL_madskills) {
cd81e915
NC
10542 if (PL_thistoken)
10543 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 10544 else
cd81e915
NC
10545 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
10546 PL_thiswhite = savewhite;
5db06880
NC
10547 }
10548#endif
79072805 10549 return s;
378cc40b 10550}
a687059c 10551
ba6d6ac9 10552I32
864dbfa3 10553Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 10554{
97aff369 10555 dVAR;
a3b680e6 10556 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 10557 CV* const outsidecv = PL_compcv;
8990e307 10558
3280af22
NIS
10559 if (PL_compcv) {
10560 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 10561 }
7766f137 10562 SAVEI32(PL_subline);
3280af22 10563 save_item(PL_subname);
3280af22 10564 SAVESPTR(PL_compcv);
3280af22 10565
ea726b52 10566 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
10567 CvFLAGS(PL_compcv) |= flags;
10568
57843af0 10569 PL_subline = CopLINE(PL_curcop);
dd2155a4 10570 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 10571 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 10572 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 10573
8990e307
LW
10574 return oldsavestack_ix;
10575}
10576
084592ab
CN
10577#ifdef __SC__
10578#pragma segment Perl_yylex
10579#endif
af41e527
NC
10580static int
10581S_yywarn(pTHX_ const char *const s)
8990e307 10582{
97aff369 10583 dVAR;
7918f24d
NC
10584
10585 PERL_ARGS_ASSERT_YYWARN;
10586
faef0170 10587 PL_in_eval |= EVAL_WARNONLY;
748a9306 10588 yyerror(s);
faef0170 10589 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 10590 return 0;
8990e307
LW
10591}
10592
10593int
15f169a1 10594Perl_yyerror(pTHX_ const char *const s)
463ee0b2 10595{
97aff369 10596 dVAR;
bfed75c6
AL
10597 const char *where = NULL;
10598 const char *context = NULL;
68dc0745 10599 int contlen = -1;
46fc3d4c 10600 SV *msg;
5912531f 10601 int yychar = PL_parser->yychar;
463ee0b2 10602
7918f24d
NC
10603 PERL_ARGS_ASSERT_YYERROR;
10604
3280af22 10605 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 10606 where = "at EOF";
8bcfe651
TM
10607 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
10608 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
10609 PL_oldbufptr != PL_bufptr) {
f355267c
JH
10610 /*
10611 Only for NetWare:
10612 The code below is removed for NetWare because it abends/crashes on NetWare
10613 when the script has error such as not having the closing quotes like:
10614 if ($var eq "value)
10615 Checking of white spaces is anyway done in NetWare code.
10616 */
10617#ifndef NETWARE
3280af22
NIS
10618 while (isSPACE(*PL_oldoldbufptr))
10619 PL_oldoldbufptr++;
f355267c 10620#endif
3280af22
NIS
10621 context = PL_oldoldbufptr;
10622 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 10623 }
8bcfe651
TM
10624 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
10625 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
10626 /*
10627 Only for NetWare:
10628 The code below is removed for NetWare because it abends/crashes on NetWare
10629 when the script has error such as not having the closing quotes like:
10630 if ($var eq "value)
10631 Checking of white spaces is anyway done in NetWare code.
10632 */
10633#ifndef NETWARE
3280af22
NIS
10634 while (isSPACE(*PL_oldbufptr))
10635 PL_oldbufptr++;
f355267c 10636#endif
3280af22
NIS
10637 context = PL_oldbufptr;
10638 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
10639 }
10640 else if (yychar > 255)
68dc0745 10641 where = "next token ???";
12fbd33b 10642 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
10643 if (PL_lex_state == LEX_NORMAL ||
10644 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 10645 where = "at end of line";
3280af22 10646 else if (PL_lex_inpat)
68dc0745 10647 where = "within pattern";
463ee0b2 10648 else
68dc0745 10649 where = "within string";
463ee0b2 10650 }
46fc3d4c 10651 else {
84bafc02 10652 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 10653 if (yychar < 32)
cea2e8a9 10654 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 10655 else if (isPRINT_LC(yychar)) {
88c9ea1e 10656 const char string = yychar;
5e7aa789
NC
10657 sv_catpvn(where_sv, &string, 1);
10658 }
463ee0b2 10659 else
cea2e8a9 10660 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 10661 where = SvPVX_const(where_sv);
463ee0b2 10662 }
46fc3d4c 10663 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 10664 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 10665 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 10666 if (context)
cea2e8a9 10667 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 10668 else
cea2e8a9 10669 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 10670 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 10671 Perl_sv_catpvf(aTHX_ msg,
57def98f 10672 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 10673 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 10674 PL_multi_end = 0;
a0d0e21e 10675 }
500960a6 10676 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 10677 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 10678 }
463ee0b2 10679 else
5a844595 10680 qerror(msg);
c7d6bfb2
GS
10681 if (PL_error_count >= 10) {
10682 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 10683 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 10684 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
10685 else
10686 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 10687 OutCopFILE(PL_curcop));
c7d6bfb2 10688 }
3280af22 10689 PL_in_my = 0;
5c284bb0 10690 PL_in_my_stash = NULL;
463ee0b2
LW
10691 return 0;
10692}
084592ab
CN
10693#ifdef __SC__
10694#pragma segment Main
10695#endif
4e35701f 10696
b250498f 10697STATIC char*
3ae08724 10698S_swallow_bom(pTHX_ U8 *s)
01ec43d0 10699{
97aff369 10700 dVAR;
f54cb97a 10701 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
10702
10703 PERL_ARGS_ASSERT_SWALLOW_BOM;
10704
7aa207d6 10705 switch (s[0]) {
4e553d73
NIS
10706 case 0xFF:
10707 if (s[1] == 0xFE) {
ee6ba15d 10708 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 10709 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 10710 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 10711#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 10712 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 10713 s += 2;
dea0fc0b 10714 if (PL_bufend > (char*)s) {
81a923f4 10715 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 10716 }
b250498f 10717#else
ee6ba15d 10718 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 10719#endif
01ec43d0
GS
10720 }
10721 break;
78ae23f5 10722 case 0xFE:
7aa207d6 10723 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 10724#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10725 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
10726 s += 2;
10727 if (PL_bufend > (char *)s) {
81a923f4 10728 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 10729 }
b250498f 10730#else
ee6ba15d 10731 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 10732#endif
01ec43d0
GS
10733 }
10734 break;
3ae08724
GS
10735 case 0xEF:
10736 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 10737 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
10738 s += 3; /* UTF-8 */
10739 }
10740 break;
10741 case 0:
7aa207d6
JH
10742 if (slen > 3) {
10743 if (s[1] == 0) {
10744 if (s[2] == 0xFE && s[3] == 0xFF) {
10745 /* UTF-32 big-endian */
ee6ba15d 10746 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
10747 }
10748 }
10749 else if (s[2] == 0 && s[3] != 0) {
10750 /* Leading bytes
10751 * 00 xx 00 xx
10752 * are a good indicator of UTF-16BE. */
ee6ba15d 10753#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10754 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
10755 s = add_utf16_textfilter(s, FALSE);
10756#else
10757 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
10758#endif
7aa207d6 10759 }
01ec43d0 10760 }
e294cc5d
JH
10761#ifdef EBCDIC
10762 case 0xDD:
10763 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
10764 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
10765 s += 4; /* UTF-8 */
10766 }
10767 break;
10768#endif
10769
7aa207d6
JH
10770 default:
10771 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
10772 /* Leading bytes
10773 * xx 00 xx 00
10774 * are a good indicator of UTF-16LE. */
ee6ba15d 10775#ifndef PERL_NO_UTF16_FILTER
7aa207d6 10776 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 10777 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
10778#else
10779 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
10780#endif
7aa207d6 10781 }
01ec43d0 10782 }
b8f84bb2 10783 return (char*)s;
b250498f 10784}
4755096e 10785
6e3aabd6
GS
10786
10787#ifndef PERL_NO_UTF16_FILTER
10788static I32
a28af015 10789S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 10790{
97aff369 10791 dVAR;
f3040f2c 10792 SV *const filter = FILTER_DATA(idx);
2a773401
NC
10793 /* We re-use this each time round, throwing the contents away before we
10794 return. */
2a773401 10795 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 10796 SV *const utf8_buffer = filter;
c28d6105 10797 IV status = IoPAGE(filter);
f2338a2e 10798 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 10799 I32 retval;
c8b0cbae 10800
c85ae797
NC
10801 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
10802
c8b0cbae
NC
10803 /* As we're automatically added, at the lowest level, and hence only called
10804 from this file, we can be sure that we're not called in block mode. Hence
10805 don't bother writing code to deal with block mode. */
10806 if (maxlen) {
10807 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
10808 }
c28d6105
NC
10809 if (status < 0) {
10810 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
10811 }
1de9afcd 10812 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 10813 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 10814 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
10815 reverse ? 'l' : 'b', idx, maxlen, status,
10816 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10817
10818 while (1) {
10819 STRLEN chars;
10820 STRLEN have;
dea0fc0b 10821 I32 newlen;
2a773401 10822 U8 *end;
c28d6105
NC
10823 /* First, look in our buffer of existing UTF-8 data: */
10824 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
10825
10826 if (nl) {
10827 ++nl;
10828 } else if (status == 0) {
10829 /* EOF */
10830 IoPAGE(filter) = 0;
10831 nl = SvEND(utf8_buffer);
10832 }
10833 if (nl) {
d2d1d4de
NC
10834 STRLEN got = nl - SvPVX(utf8_buffer);
10835 /* Did we have anything to append? */
10836 retval = got != 0;
10837 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
10838 /* Everything else in this code works just fine if SVp_POK isn't
10839 set. This, however, needs it, and we need it to work, else
10840 we loop infinitely because the buffer is never consumed. */
10841 sv_chop(utf8_buffer, nl);
10842 break;
10843 }
ba77e4cc 10844
c28d6105
NC
10845 /* OK, not a complete line there, so need to read some more UTF-16.
10846 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
10847 while (1) {
10848 if (status <= 0)
10849 break;
10850 if (SvCUR(utf16_buffer) >= 2) {
10851 /* Location of the high octet of the last complete code point.
10852 Gosh, UTF-16 is a pain. All the benefits of variable length,
10853 *coupled* with all the benefits of partial reads and
10854 endianness. */
10855 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
10856 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
10857
10858 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
10859 break;
10860 }
10861
10862 /* We have the first half of a surrogate. Read more. */
10863 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
10864 }
c28d6105 10865
c28d6105
NC
10866 status = FILTER_READ(idx + 1, utf16_buffer,
10867 160 + (SvCUR(utf16_buffer) & 1));
10868 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 10869 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
10870 if (status < 0) {
10871 /* Error */
10872 IoPAGE(filter) = status;
10873 return status;
10874 }
10875 }
10876
10877 chars = SvCUR(utf16_buffer) >> 1;
10878 have = SvCUR(utf8_buffer);
10879 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 10880
aa6dbd60 10881 if (reverse) {
c28d6105
NC
10882 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
10883 (U8*)SvPVX_const(utf8_buffer) + have,
10884 chars * 2, &newlen);
aa6dbd60 10885 } else {
2a773401 10886 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
10887 (U8*)SvPVX_const(utf8_buffer) + have,
10888 chars * 2, &newlen);
2a773401 10889 }
c28d6105 10890 SvCUR_set(utf8_buffer, have + newlen);
2a773401 10891 *end = '\0';
c28d6105 10892
e07286ed
NC
10893 /* No need to keep this SV "well-formed" with a '\0' after the end, as
10894 it's private to us, and utf16_to_utf8{,reversed} take a
10895 (pointer,length) pair, rather than a NUL-terminated string. */
10896 if(SvCUR(utf16_buffer) & 1) {
10897 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
10898 SvCUR_set(utf16_buffer, 1);
10899 } else {
10900 SvCUR_set(utf16_buffer, 0);
10901 }
2a773401 10902 }
c28d6105
NC
10903 DEBUG_P(PerlIO_printf(Perl_debug_log,
10904 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
10905 status,
10906 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
10907 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 10908 return retval;
6e3aabd6 10909}
81a923f4
NC
10910
10911static U8 *
10912S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
10913{
2a773401 10914 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 10915
c85ae797
NC
10916 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
10917
c28d6105 10918 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 10919 sv_setpvs(filter, "");
2a773401 10920 IoLINES(filter) = reversed;
c28d6105
NC
10921 IoPAGE(filter) = 1; /* Not EOF */
10922
10923 /* Sadly, we have to return a valid pointer, come what may, so we have to
10924 ignore any error return from this. */
10925 SvCUR_set(PL_linestr, 0);
10926 if (FILTER_READ(0, PL_linestr, 0)) {
10927 SvUTF8_on(PL_linestr);
81a923f4 10928 } else {
c28d6105 10929 SvUTF8_on(PL_linestr);
81a923f4 10930 }
c28d6105 10931 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
10932 return (U8*)SvPVX(PL_linestr);
10933}
6e3aabd6 10934#endif
9f4817db 10935
f333445c
JP
10936/*
10937Returns a pointer to the next character after the parsed
10938vstring, as well as updating the passed in sv.
10939
10940Function must be called like
10941
561b68a9 10942 sv = newSV(5);
65b06e02 10943 s = scan_vstring(s,e,sv);
f333445c 10944
65b06e02 10945where s and e are the start and end of the string.
f333445c
JP
10946The sv should already be large enough to store the vstring
10947passed in, for performance reasons.
10948
10949*/
10950
10951char *
15f169a1 10952Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 10953{
97aff369 10954 dVAR;
bfed75c6
AL
10955 const char *pos = s;
10956 const char *start = s;
7918f24d
NC
10957
10958 PERL_ARGS_ASSERT_SCAN_VSTRING;
10959
f333445c 10960 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 10961 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 10962 pos++;
f333445c
JP
10963 if ( *pos != '.') {
10964 /* this may not be a v-string if followed by => */
bfed75c6 10965 const char *next = pos;
65b06e02 10966 while (next < e && isSPACE(*next))
8fc7bb1c 10967 ++next;
65b06e02 10968 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
10969 /* return string not v-string */
10970 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 10971 return (char *)pos;
f333445c
JP
10972 }
10973 }
10974
10975 if (!isALPHA(*pos)) {
89ebb4a3 10976 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 10977
d4c19fe8
AL
10978 if (*s == 'v')
10979 s++; /* get past 'v' */
f333445c 10980
76f68e9b 10981 sv_setpvs(sv, "");
f333445c
JP
10982
10983 for (;;) {
d4c19fe8 10984 /* this is atoi() that tolerates underscores */
0bd48802
AL
10985 U8 *tmpend;
10986 UV rev = 0;
d4c19fe8
AL
10987 const char *end = pos;
10988 UV mult = 1;
10989 while (--end >= s) {
10990 if (*end != '_') {
10991 const UV orev = rev;
f333445c
JP
10992 rev += (*end - '0') * mult;
10993 mult *= 10;
9b387841
NC
10994 if (orev > rev)
10995 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
10996 "Integer overflow in decimal number");
f333445c
JP
10997 }
10998 }
10999#ifdef EBCDIC
11000 if (rev > 0x7FFFFFFF)
11001 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
11002#endif
11003 /* Append native character for the rev point */
11004 tmpend = uvchr_to_utf8(tmpbuf, rev);
11005 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
11006 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
11007 SvUTF8_on(sv);
65b06e02 11008 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
11009 s = ++pos;
11010 else {
11011 s = pos;
11012 break;
11013 }
65b06e02 11014 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
11015 pos++;
11016 }
11017 SvPOK_on(sv);
11018 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
11019 SvRMAGICAL_on(sv);
11020 }
73d840c0 11021 return (char *)s;
f333445c
JP
11022}
11023
88e1f1a2
JV
11024int
11025Perl_keyword_plugin_standard(pTHX_
11026 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
11027{
11028 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
11029 PERL_UNUSED_CONTEXT;
11030 PERL_UNUSED_ARG(keyword_ptr);
11031 PERL_UNUSED_ARG(keyword_len);
11032 PERL_UNUSED_ARG(op_ptr);
11033 return KEYWORD_PLUGIN_DECLINE;
11034}
11035
78cdf107 11036#define parse_recdescent(g,p) S_parse_recdescent(aTHX_ g,p)
e53d8f76 11037static void
78cdf107 11038S_parse_recdescent(pTHX_ int gramtype, I32 fakeeof)
a7aaec61
Z
11039{
11040 SAVEI32(PL_lex_brackets);
11041 if (PL_lex_brackets > 100)
11042 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
11043 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
78cdf107
Z
11044 SAVEI32(PL_lex_allbrackets);
11045 PL_lex_allbrackets = 0;
11046 SAVEI8(PL_lex_fakeeof);
2dcac756 11047 PL_lex_fakeeof = (U8)fakeeof;
a7aaec61
Z
11048 if(yyparse(gramtype) && !PL_parser->error_count)
11049 qerror(Perl_mess(aTHX_ "Parse error"));
11050}
11051
78cdf107 11052#define parse_recdescent_for_op(g,p) S_parse_recdescent_for_op(aTHX_ g,p)
e53d8f76 11053static OP *
78cdf107 11054S_parse_recdescent_for_op(pTHX_ int gramtype, I32 fakeeof)
e53d8f76
Z
11055{
11056 OP *o;
11057 ENTER;
11058 SAVEVPTR(PL_eval_root);
11059 PL_eval_root = NULL;
78cdf107 11060 parse_recdescent(gramtype, fakeeof);
e53d8f76
Z
11061 o = PL_eval_root;
11062 LEAVE;
11063 return o;
11064}
11065
78cdf107
Z
11066#define parse_expr(p,f) S_parse_expr(aTHX_ p,f)
11067static OP *
11068S_parse_expr(pTHX_ I32 fakeeof, U32 flags)
11069{
11070 OP *exprop;
11071 if (flags & ~PARSE_OPTIONAL)
11072 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_expr");
11073 exprop = parse_recdescent_for_op(GRAMEXPR, fakeeof);
11074 if (!exprop && !(flags & PARSE_OPTIONAL)) {
11075 if (!PL_parser->error_count)
11076 qerror(Perl_mess(aTHX_ "Parse error"));
11077 exprop = newOP(OP_NULL, 0);
11078 }
11079 return exprop;
11080}
11081
11082/*
11083=for apidoc Amx|OP *|parse_arithexpr|U32 flags
11084
11085Parse a Perl arithmetic expression. This may contain operators of precedence
11086down to the bit shift operators. The expression must be followed (and thus
11087terminated) either by a comparison or lower-precedence operator or by
11088something that would normally terminate an expression such as semicolon.
11089If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11090otherwise it is mandatory. It is up to the caller to ensure that the
11091dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11092the source of the code to be parsed and the lexical context for the
11093expression.
11094
11095The op tree representing the expression is returned. If an optional
11096expression is absent, a null pointer is returned, otherwise the pointer
11097will be non-null.
11098
11099If an error occurs in parsing or compilation, in most cases a valid op
11100tree is returned anyway. The error is reflected in the parser state,
11101normally resulting in a single exception at the top level of parsing
11102which covers all the compilation errors that occurred. Some compilation
11103errors, however, will throw an exception immediately.
11104
11105=cut
11106*/
11107
11108OP *
11109Perl_parse_arithexpr(pTHX_ U32 flags)
11110{
11111 return parse_expr(LEX_FAKEEOF_COMPARE, flags);
11112}
11113
11114/*
11115=for apidoc Amx|OP *|parse_termexpr|U32 flags
11116
11117Parse a Perl term expression. This may contain operators of precedence
11118down to the assignment operators. The expression must be followed (and thus
11119terminated) either by a comma or lower-precedence operator or by
11120something that would normally terminate an expression such as semicolon.
11121If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11122otherwise it is mandatory. It is up to the caller to ensure that the
11123dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11124the source of the code to be parsed and the lexical context for the
11125expression.
11126
11127The op tree representing the expression is returned. If an optional
11128expression is absent, a null pointer is returned, otherwise the pointer
11129will be non-null.
11130
11131If an error occurs in parsing or compilation, in most cases a valid op
11132tree is returned anyway. The error is reflected in the parser state,
11133normally resulting in a single exception at the top level of parsing
11134which covers all the compilation errors that occurred. Some compilation
11135errors, however, will throw an exception immediately.
11136
11137=cut
11138*/
11139
11140OP *
11141Perl_parse_termexpr(pTHX_ U32 flags)
11142{
11143 return parse_expr(LEX_FAKEEOF_COMMA, flags);
11144}
11145
11146/*
11147=for apidoc Amx|OP *|parse_listexpr|U32 flags
11148
11149Parse a Perl list expression. This may contain operators of precedence
11150down to the comma operator. The expression must be followed (and thus
11151terminated) either by a low-precedence logic operator such as C<or> or by
11152something that would normally terminate an expression such as semicolon.
11153If I<flags> includes C<PARSE_OPTIONAL> then the expression is optional,
11154otherwise it is mandatory. It is up to the caller to ensure that the
11155dynamic parser state (L</PL_parser> et al) is correctly set to reflect
11156the source of the code to be parsed and the lexical context for the
11157expression.
11158
11159The op tree representing the expression is returned. If an optional
11160expression is absent, a null pointer is returned, otherwise the pointer
11161will be non-null.
11162
11163If an error occurs in parsing or compilation, in most cases a valid op
11164tree is returned anyway. The error is reflected in the parser state,
11165normally resulting in a single exception at the top level of parsing
11166which covers all the compilation errors that occurred. Some compilation
11167errors, however, will throw an exception immediately.
11168
11169=cut
11170*/
11171
11172OP *
11173Perl_parse_listexpr(pTHX_ U32 flags)
11174{
11175 return parse_expr(LEX_FAKEEOF_LOWLOGIC, flags);
11176}
11177
11178/*
11179=for apidoc Amx|OP *|parse_fullexpr|U32 flags
11180
11181Parse a single complete Perl expression. This allows the full
11182expression grammar, including the lowest-precedence operators such
11183as C<or>. The expression must be followed (and thus terminated) by a
11184token that an expression would normally be terminated by: end-of-file,
11185closing bracketing punctuation, semicolon, or one of the keywords that
11186signals a postfix expression-statement modifier. If I<flags> includes
11187C<PARSE_OPTIONAL> then the expression is optional, otherwise it is
11188mandatory. It is up to the caller to ensure that the dynamic parser
11189state (L</PL_parser> et al) is correctly set to reflect the source of
11190the code to be parsed and the lexical context for the expression.
11191
11192The op tree representing the expression is returned. If an optional
11193expression is absent, a null pointer is returned, otherwise the pointer
11194will be non-null.
11195
11196If an error occurs in parsing or compilation, in most cases a valid op
11197tree is returned anyway. The error is reflected in the parser state,
11198normally resulting in a single exception at the top level of parsing
11199which covers all the compilation errors that occurred. Some compilation
11200errors, however, will throw an exception immediately.
11201
11202=cut
11203*/
11204
11205OP *
11206Perl_parse_fullexpr(pTHX_ U32 flags)
11207{
11208 return parse_expr(LEX_FAKEEOF_NONEXPR, flags);
11209}
11210
e53d8f76
Z
11211/*
11212=for apidoc Amx|OP *|parse_block|U32 flags
11213
11214Parse a single complete Perl code block. This consists of an opening
11215brace, a sequence of statements, and a closing brace. The block
11216constitutes a lexical scope, so C<my> variables and various compile-time
11217effects can be contained within it. It is up to the caller to ensure
11218that the dynamic parser state (L</PL_parser> et al) is correctly set to
11219reflect the source of the code to be parsed and the lexical context for
11220the statement.
11221
11222The op tree representing the code block is returned. This is always a
11223real op, never a null pointer. It will normally be a C<lineseq> list,
11224including C<nextstate> or equivalent ops. No ops to construct any kind
11225of runtime scope are included by virtue of it being a block.
11226
11227If an error occurs in parsing or compilation, in most cases a valid op
11228tree (most likely null) is returned anyway. The error is reflected in
11229the parser state, normally resulting in a single exception at the top
11230level of parsing which covers all the compilation errors that occurred.
11231Some compilation errors, however, will throw an exception immediately.
11232
11233The I<flags> parameter is reserved for future use, and must always
11234be zero.
11235
11236=cut
11237*/
11238
11239OP *
11240Perl_parse_block(pTHX_ U32 flags)
11241{
11242 if (flags)
11243 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
78cdf107 11244 return parse_recdescent_for_op(GRAMBLOCK, LEX_FAKEEOF_NEVER);
e53d8f76
Z
11245}
11246
1da4ca5f 11247/*
8359b381
Z
11248=for apidoc Amx|OP *|parse_barestmt|U32 flags
11249
11250Parse a single unadorned Perl statement. This may be a normal imperative
11251statement or a declaration that has compile-time effect. It does not
11252include any label or other affixture. It is up to the caller to ensure
11253that the dynamic parser state (L</PL_parser> et al) is correctly set to
11254reflect the source of the code to be parsed and the lexical context for
11255the statement.
11256
11257The op tree representing the statement is returned. This may be a
11258null pointer if the statement is null, for example if it was actually
11259a subroutine definition (which has compile-time side effects). If not
11260null, it will be ops directly implementing the statement, suitable to
11261pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
11262equivalent op (except for those embedded in a scope contained entirely
11263within the statement).
11264
11265If an error occurs in parsing or compilation, in most cases a valid op
11266tree (most likely null) is returned anyway. The error is reflected in
11267the parser state, normally resulting in a single exception at the top
11268level of parsing which covers all the compilation errors that occurred.
11269Some compilation errors, however, will throw an exception immediately.
11270
11271The I<flags> parameter is reserved for future use, and must always
11272be zero.
11273
11274=cut
11275*/
11276
11277OP *
11278Perl_parse_barestmt(pTHX_ U32 flags)
11279{
11280 if (flags)
11281 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
78cdf107 11282 return parse_recdescent_for_op(GRAMBARESTMT, LEX_FAKEEOF_NEVER);
8359b381
Z
11283}
11284
11285/*
361d9b55
Z
11286=for apidoc Amx|SV *|parse_label|U32 flags
11287
11288Parse a single label, possibly optional, of the type that may prefix a
11289Perl statement. It is up to the caller to ensure that the dynamic parser
11290state (L</PL_parser> et al) is correctly set to reflect the source of
11291the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
11292label is optional, otherwise it is mandatory.
11293
11294The name of the label is returned in the form of a fresh scalar. If an
11295optional label is absent, a null pointer is returned.
11296
11297If an error occurs in parsing, which can only occur if the label is
11298mandatory, a valid label is returned anyway. The error is reflected in
11299the parser state, normally resulting in a single exception at the top
11300level of parsing which covers all the compilation errors that occurred.
11301
11302=cut
11303*/
11304
11305SV *
11306Perl_parse_label(pTHX_ U32 flags)
11307{
11308 if (flags & ~PARSE_OPTIONAL)
11309 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
11310 if (PL_lex_state == LEX_KNOWNEXT) {
11311 PL_parser->yychar = yylex();
11312 if (PL_parser->yychar == LABEL) {
11313 char *lpv = pl_yylval.pval;
11314 STRLEN llen = strlen(lpv);
11315 SV *lsv;
11316 PL_parser->yychar = YYEMPTY;
11317 lsv = newSV_type(SVt_PV);
11318 SvPV_set(lsv, lpv);
11319 SvCUR_set(lsv, llen);
11320 SvLEN_set(lsv, llen+1);
11321 SvPOK_on(lsv);
11322 return lsv;
11323 } else {
11324 yyunlex();
11325 goto no_label;
11326 }
11327 } else {
11328 char *s, *t;
11329 U8 c;
11330 STRLEN wlen, bufptr_pos;
11331 lex_read_space(0);
11332 t = s = PL_bufptr;
11333 c = (U8)*s;
11334 if (!isIDFIRST_A(c))
11335 goto no_label;
11336 do {
11337 c = (U8)*++t;
11338 } while(isWORDCHAR_A(c));
11339 wlen = t - s;
11340 if (word_takes_any_delimeter(s, wlen))
11341 goto no_label;
11342 bufptr_pos = s - SvPVX(PL_linestr);
11343 PL_bufptr = t;
11344 lex_read_space(LEX_KEEP_PREVIOUS);
11345 t = PL_bufptr;
11346 s = SvPVX(PL_linestr) + bufptr_pos;
11347 if (t[0] == ':' && t[1] != ':') {
11348 PL_oldoldbufptr = PL_oldbufptr;
11349 PL_oldbufptr = s;
11350 PL_bufptr = t+1;
11351 return newSVpvn(s, wlen);
11352 } else {
11353 PL_bufptr = s;
11354 no_label:
11355 if (flags & PARSE_OPTIONAL) {
11356 return NULL;
11357 } else {
11358 qerror(Perl_mess(aTHX_ "Parse error"));
11359 return newSVpvs("x");
11360 }
11361 }
11362 }
11363}
11364
11365/*
28ac2b49
Z
11366=for apidoc Amx|OP *|parse_fullstmt|U32 flags
11367
11368Parse a single complete Perl statement. This may be a normal imperative
8359b381 11369statement or a declaration that has compile-time effect, and may include
8e720305 11370optional labels. It is up to the caller to ensure that the dynamic
28ac2b49
Z
11371parser state (L</PL_parser> et al) is correctly set to reflect the source
11372of the code to be parsed and the lexical context for the statement.
11373
11374The op tree representing the statement is returned. This may be a
11375null pointer if the statement is null, for example if it was actually
11376a subroutine definition (which has compile-time side effects). If not
11377null, it will be the result of a L</newSTATEOP> call, normally including
11378a C<nextstate> or equivalent op.
11379
11380If an error occurs in parsing or compilation, in most cases a valid op
11381tree (most likely null) is returned anyway. The error is reflected in
11382the parser state, normally resulting in a single exception at the top
11383level of parsing which covers all the compilation errors that occurred.
11384Some compilation errors, however, will throw an exception immediately.
11385
11386The I<flags> parameter is reserved for future use, and must always
11387be zero.
11388
11389=cut
11390*/
11391
11392OP *
11393Perl_parse_fullstmt(pTHX_ U32 flags)
11394{
28ac2b49
Z
11395 if (flags)
11396 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
78cdf107 11397 return parse_recdescent_for_op(GRAMFULLSTMT, LEX_FAKEEOF_NEVER);
28ac2b49
Z
11398}
11399
07ffcb73
Z
11400/*
11401=for apidoc Amx|OP *|parse_stmtseq|U32 flags
11402
11403Parse a sequence of zero or more Perl statements. These may be normal
11404imperative statements, including optional labels, or declarations
11405that have compile-time effect, or any mixture thereof. The statement
11406sequence ends when a closing brace or end-of-file is encountered in a
11407place where a new statement could have validly started. It is up to
11408the caller to ensure that the dynamic parser state (L</PL_parser> et al)
11409is correctly set to reflect the source of the code to be parsed and the
11410lexical context for the statements.
11411
11412The op tree representing the statement sequence is returned. This may
11413be a null pointer if the statements were all null, for example if there
11414were no statements or if there were only subroutine definitions (which
11415have compile-time side effects). If not null, it will be a C<lineseq>
11416list, normally including C<nextstate> or equivalent ops.
11417
11418If an error occurs in parsing or compilation, in most cases a valid op
11419tree is returned anyway. The error is reflected in the parser state,
11420normally resulting in a single exception at the top level of parsing
11421which covers all the compilation errors that occurred. Some compilation
11422errors, however, will throw an exception immediately.
11423
11424The I<flags> parameter is reserved for future use, and must always
11425be zero.
11426
11427=cut
11428*/
11429
11430OP *
11431Perl_parse_stmtseq(pTHX_ U32 flags)
11432{
11433 OP *stmtseqop;
e53d8f76 11434 I32 c;
07ffcb73 11435 if (flags)
78cdf107
Z
11436 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_stmtseq");
11437 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ, LEX_FAKEEOF_CLOSING);
e53d8f76
Z
11438 c = lex_peek_unichar(0);
11439 if (c != -1 && c != /*{*/'}')
07ffcb73 11440 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
11441 return stmtseqop;
11442}
11443
ea25a9b2 11444void
f7e3d326 11445Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 11446{
f7e3d326 11447 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2 11448 deprecate("qw(...) as parentheses");
78cdf107 11449 force_next((4<<24)|')');
ea25a9b2
Z
11450 if (qwlist->op_type == OP_STUB) {
11451 op_free(qwlist);
11452 }
11453 else {
3d8e05a0 11454 start_force(PL_curforce);
ea25a9b2
Z
11455 NEXTVAL_NEXTTOKE.opval = qwlist;
11456 force_next(THING);
11457 }
78cdf107 11458 force_next((2<<24)|'(');
ea25a9b2
Z
11459}
11460
28ac2b49 11461/*
1da4ca5f
NC
11462 * Local variables:
11463 * c-indentation-style: bsd
11464 * c-basic-offset: 4
11465 * indent-tabs-mode: t
11466 * End:
11467 *
37442d52
RGS
11468 * ex: set ts=8 sts=4 sw=4 noet:
11469 */