This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Increase PerlIO::scalar’s version
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
51#define PL_lex_brackstack (PL_parser->lex_brackstack)
52#define PL_lex_casemods (PL_parser->lex_casemods)
53#define PL_lex_casestack (PL_parser->lex_casestack)
54#define PL_lex_defer (PL_parser->lex_defer)
55#define PL_lex_dojoin (PL_parser->lex_dojoin)
56#define PL_lex_expect (PL_parser->lex_expect)
57#define PL_lex_formbrack (PL_parser->lex_formbrack)
58#define PL_lex_inpat (PL_parser->lex_inpat)
59#define PL_lex_inwhat (PL_parser->lex_inwhat)
60#define PL_lex_op (PL_parser->lex_op)
61#define PL_lex_repl (PL_parser->lex_repl)
62#define PL_lex_starts (PL_parser->lex_starts)
63#define PL_lex_stuff (PL_parser->lex_stuff)
64#define PL_multi_start (PL_parser->multi_start)
65#define PL_multi_open (PL_parser->multi_open)
66#define PL_multi_close (PL_parser->multi_close)
67#define PL_pending_ident (PL_parser->pending_ident)
68#define PL_preambled (PL_parser->preambled)
69#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 70#define PL_linestr (PL_parser->linestr)
c2598295
DM
71#define PL_expect (PL_parser->expect)
72#define PL_copline (PL_parser->copline)
f06b5848
DM
73#define PL_bufptr (PL_parser->bufptr)
74#define PL_oldbufptr (PL_parser->oldbufptr)
75#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
76#define PL_linestart (PL_parser->linestart)
77#define PL_bufend (PL_parser->bufend)
78#define PL_last_uni (PL_parser->last_uni)
79#define PL_last_lop (PL_parser->last_lop)
80#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 81#define PL_lex_state (PL_parser->lex_state)
2f9285f8 82#define PL_rsfp (PL_parser->rsfp)
5486870f 83#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
84#define PL_in_my (PL_parser->in_my)
85#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 86#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 87#define PL_multi_end (PL_parser->multi_end)
13765c85 88#define PL_error_count (PL_parser->error_count)
199e78b7
DM
89
90#ifdef PERL_MAD
91# define PL_endwhite (PL_parser->endwhite)
92# define PL_faketokens (PL_parser->faketokens)
93# define PL_lasttoke (PL_parser->lasttoke)
94# define PL_nextwhite (PL_parser->nextwhite)
95# define PL_realtokenstart (PL_parser->realtokenstart)
96# define PL_skipwhite (PL_parser->skipwhite)
97# define PL_thisclose (PL_parser->thisclose)
98# define PL_thismad (PL_parser->thismad)
99# define PL_thisopen (PL_parser->thisopen)
100# define PL_thisstuff (PL_parser->thisstuff)
101# define PL_thistoken (PL_parser->thistoken)
102# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
103# define PL_thiswhite (PL_parser->thiswhite)
104# define PL_nexttoke (PL_parser->nexttoke)
105# define PL_curforce (PL_parser->curforce)
106#else
107# define PL_nexttoke (PL_parser->nexttoke)
108# define PL_nexttype (PL_parser->nexttype)
109# define PL_nextval (PL_parser->nextval)
199e78b7
DM
110#endif
111
16173588
NC
112/* This can't be done with embed.fnc, because struct yy_parser contains a
113 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
114static int
115S_pending_ident(pTHX);
199e78b7 116
0bd48802 117static const char ident_too_long[] = "Identifier too long";
8903cb82 118
29595ff2 119#ifdef PERL_MAD
29595ff2 120# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 121# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 122#else
5db06880 123# define CURMAD(slot,sv)
9ded7720 124# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
125#endif
126
a7aaec61
Z
127#define XENUMMASK 0x3f
128#define XFAKEEOF 0x40
129#define XFAKEBRACK 0x80
9059aa12 130
39e02b42
JH
131#ifdef USE_UTF8_SCRIPTS
132# define UTF (!IN_BYTES)
2b9d42f0 133#else
746b446a 134# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 135#endif
a0ed51b3 136
b1fc3636
CJ
137/* The maximum number of characters preceding the unrecognized one to display */
138#define UNRECOGNIZED_PRECEDE_COUNT 10
139
61f0cdd9 140/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
141 * 1999-02-27 mjd-perl-patch@plover.com */
142#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
143
bf4acbe4 144#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 145
ffb4593c
NT
146/* LEX_* are values for PL_lex_state, the state of the lexer.
147 * They are arranged oddly so that the guard on the switch statement
79072805
LW
148 * can get by with a single comparison (if the compiler is smart enough).
149 */
150
fb73857a 151/* #define LEX_NOTPARSING 11 is done in perl.h. */
152
b6007c36
DM
153#define LEX_NORMAL 10 /* normal code (ie not within "...") */
154#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
155#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
156#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
157#define LEX_INTERPSTART 6 /* expecting the start of a $var */
158
159 /* at end of code, eg "$x" followed by: */
160#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
161#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162
163#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
164 string or after \E, $foo, etc */
165#define LEX_INTERPCONST 2 /* NOT USED */
166#define LEX_FORMLINE 1 /* expecting a format line */
167#define LEX_KNOWNEXT 0 /* next token known; just return it */
168
79072805 169
bbf60fe6 170#ifdef DEBUGGING
27da23d5 171static const char* const lex_state_names[] = {
bbf60fe6
DM
172 "KNOWNEXT",
173 "FORMLINE",
174 "INTERPCONST",
175 "INTERPCONCAT",
176 "INTERPENDMAYBE",
177 "INTERPEND",
178 "INTERPSTART",
179 "INTERPPUSH",
180 "INTERPCASEMOD",
181 "INTERPNORMAL",
182 "NORMAL"
183};
184#endif
185
79072805
LW
186#ifdef ff_next
187#undef ff_next
d48672a2
LW
188#endif
189
79072805 190#include "keywords.h"
fe14fcc3 191
ffb4593c
NT
192/* CLINE is a macro that ensures PL_copline has a sane value */
193
ae986130
LW
194#ifdef CLINE
195#undef CLINE
196#endif
57843af0 197#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 198
5db06880 199#ifdef PERL_MAD
29595ff2
NC
200# define SKIPSPACE0(s) skipspace0(s)
201# define SKIPSPACE1(s) skipspace1(s)
202# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
203# define PEEKSPACE(s) skipspace2(s,0)
204#else
205# define SKIPSPACE0(s) skipspace(s)
206# define SKIPSPACE1(s) skipspace(s)
207# define SKIPSPACE2(s,tsv) skipspace(s)
208# define PEEKSPACE(s) skipspace(s)
209#endif
210
ffb4593c
NT
211/*
212 * Convenience functions to return different tokens and prime the
9cbb5ea2 213 * lexer for the next token. They all take an argument.
ffb4593c
NT
214 *
215 * TOKEN : generic token (used for '(', DOLSHARP, etc)
216 * OPERATOR : generic operator
217 * AOPERATOR : assignment operator
218 * PREBLOCK : beginning the block after an if, while, foreach, ...
219 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
220 * PREREF : *EXPR where EXPR is not a simple identifier
221 * TERM : expression term
222 * LOOPX : loop exiting command (goto, last, dump, etc)
223 * FTST : file test operator
224 * FUN0 : zero-argument function
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
256#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
257#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
258#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
259#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
260#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
261#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
262#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
263#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
264#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 265
a687059c
LW
266/* This bit of chicanery makes a unary function followed by
267 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
268 * The UNIDOR macro is for unary functions that can be followed by the //
269 * operator (such as C<shift // 0>).
a687059c 270 */
376fcdbf 271#define UNI2(f,x) { \
6154021b 272 pl_yylval.ival = f; \
376fcdbf
AL
273 PL_expect = x; \
274 PL_bufptr = s; \
275 PL_last_uni = PL_oldbufptr; \
276 PL_last_lop_op = f; \
277 if (*s == '(') \
278 return REPORT( (int)FUNC1 ); \
29595ff2 279 s = PEEKSPACE(s); \
376fcdbf
AL
280 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
281 }
6f33ba73
RGS
282#define UNI(f) UNI2(f,XTERM)
283#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 284
376fcdbf 285#define UNIBRACK(f) { \
6154021b 286 pl_yylval.ival = f; \
376fcdbf
AL
287 PL_bufptr = s; \
288 PL_last_uni = PL_oldbufptr; \
289 if (*s == '(') \
290 return REPORT( (int)FUNC1 ); \
29595ff2 291 s = PEEKSPACE(s); \
376fcdbf
AL
292 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
293 }
79072805 294
9f68db38 295/* grandfather return to old style */
6154021b 296#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 297
8fa7f367
JH
298#ifdef DEBUGGING
299
6154021b 300/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
301enum token_type {
302 TOKENTYPE_NONE,
303 TOKENTYPE_IVAL,
6154021b 304 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
305 TOKENTYPE_PVAL,
306 TOKENTYPE_OPVAL,
307 TOKENTYPE_GVVAL
308};
309
6d4a66ac
NC
310static struct debug_tokens {
311 const int token;
312 enum token_type type;
313 const char *name;
314} const debug_tokens[] =
9041c2e3 315{
bbf60fe6
DM
316 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
317 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
318 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
319 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
320 { ARROW, TOKENTYPE_NONE, "ARROW" },
321 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
322 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
323 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
324 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
325 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 326 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
327 { DO, TOKENTYPE_NONE, "DO" },
328 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
329 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
330 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
331 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
332 { ELSE, TOKENTYPE_NONE, "ELSE" },
333 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
334 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
335 { FOR, TOKENTYPE_IVAL, "FOR" },
336 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
337 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
338 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
339 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
340 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
341 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 342 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
343 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
344 { IF, TOKENTYPE_IVAL, "IF" },
345 { LABEL, TOKENTYPE_PVAL, "LABEL" },
346 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
347 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
348 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
349 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
350 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
351 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
352 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
353 { MY, TOKENTYPE_IVAL, "MY" },
354 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
355 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
356 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
357 { OROP, TOKENTYPE_IVAL, "OROP" },
358 { OROR, TOKENTYPE_NONE, "OROR" },
359 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
360 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
361 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
362 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
363 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
364 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
365 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
366 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
367 { PREINC, TOKENTYPE_NONE, "PREINC" },
368 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
369 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
370 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
371 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
372 { SUB, TOKENTYPE_NONE, "SUB" },
373 { THING, TOKENTYPE_OPVAL, "THING" },
374 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
375 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
376 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
377 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
378 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
379 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 380 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
381 { WHILE, TOKENTYPE_IVAL, "WHILE" },
382 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 383 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 384 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
385};
386
6154021b 387/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 388
bbf60fe6 389STATIC int
704d4215 390S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 391{
97aff369 392 dVAR;
7918f24d
NC
393
394 PERL_ARGS_ASSERT_TOKEREPORT;
395
bbf60fe6 396 if (DEBUG_T_TEST) {
bd61b366 397 const char *name = NULL;
bbf60fe6 398 enum token_type type = TOKENTYPE_NONE;
f54cb97a 399 const struct debug_tokens *p;
396482e1 400 SV* const report = newSVpvs("<== ");
bbf60fe6 401
f54cb97a 402 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
403 if (p->token == (int)rv) {
404 name = p->name;
405 type = p->type;
406 break;
407 }
408 }
409 if (name)
54667de8 410 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
411 else if ((char)rv > ' ' && (char)rv < '~')
412 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
413 else if (!rv)
396482e1 414 sv_catpvs(report, "EOF");
bbf60fe6
DM
415 else
416 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
417 switch (type) {
418 case TOKENTYPE_NONE:
419 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
420 break;
421 case TOKENTYPE_IVAL:
704d4215 422 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
423 break;
424 case TOKENTYPE_OPNUM:
425 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 426 PL_op_name[lvalp->ival]);
bbf60fe6
DM
427 break;
428 case TOKENTYPE_PVAL:
704d4215 429 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
430 break;
431 case TOKENTYPE_OPVAL:
704d4215 432 if (lvalp->opval) {
401441c0 433 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
434 PL_op_name[lvalp->opval->op_type]);
435 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 436 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 437 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
438 }
439
440 }
401441c0 441 else
396482e1 442 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
443 break;
444 }
b6007c36 445 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
446 };
447 return (int)rv;
998054bd
SC
448}
449
b6007c36
DM
450
451/* print the buffer with suitable escapes */
452
453STATIC void
15f169a1 454S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 455{
396482e1 456 SV* const tmp = newSVpvs("");
7918f24d
NC
457
458 PERL_ARGS_ASSERT_PRINTBUF;
459
b6007c36
DM
460 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
461 SvREFCNT_dec(tmp);
462}
463
8fa7f367
JH
464#endif
465
8290c323
NC
466static int
467S_deprecate_commaless_var_list(pTHX) {
468 PL_expect = XTERM;
469 deprecate("comma-less variable list");
470 return REPORT(','); /* grandfather non-comma-format format */
471}
472
ffb4593c
NT
473/*
474 * S_ao
475 *
c963b151
BD
476 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
477 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
478 */
479
76e3520e 480STATIC int
cea2e8a9 481S_ao(pTHX_ int toketype)
a0d0e21e 482{
97aff369 483 dVAR;
3280af22
NIS
484 if (*PL_bufptr == '=') {
485 PL_bufptr++;
a0d0e21e 486 if (toketype == ANDAND)
6154021b 487 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 488 else if (toketype == OROR)
6154021b 489 pl_yylval.ival = OP_ORASSIGN;
c963b151 490 else if (toketype == DORDOR)
6154021b 491 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
492 toketype = ASSIGNOP;
493 }
494 return toketype;
495}
496
ffb4593c
NT
497/*
498 * S_no_op
499 * When Perl expects an operator and finds something else, no_op
500 * prints the warning. It always prints "<something> found where
501 * operator expected. It prints "Missing semicolon on previous line?"
502 * if the surprise occurs at the start of the line. "do you need to
503 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
504 * where the compiler doesn't know if foo is a method call or a function.
505 * It prints "Missing operator before end of line" if there's nothing
506 * after the missing operator, or "... before <...>" if there is something
507 * after the missing operator.
508 */
509
76e3520e 510STATIC void
15f169a1 511S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 512{
97aff369 513 dVAR;
9d4ba2ae
AL
514 char * const oldbp = PL_bufptr;
515 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 516
7918f24d
NC
517 PERL_ARGS_ASSERT_NO_OP;
518
1189a94a
GS
519 if (!s)
520 s = oldbp;
07c798fb 521 else
1189a94a 522 PL_bufptr = s;
cea2e8a9 523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
524 if (ckWARN_d(WARN_SYNTAX)) {
525 if (is_first)
526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 "\t(Missing semicolon on previous line?)\n");
528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 529 const char *t;
c35e046a
AL
530 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
531 NOOP;
56da5a46
RGS
532 if (t < PL_bufptr && isSPACE(*t))
533 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
534 "\t(Do you need to predeclare %.*s?)\n",
551405c4 535 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
536 }
537 else {
538 assert(s >= oldbp);
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 540 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 541 }
07c798fb 542 }
3280af22 543 PL_bufptr = oldbp;
8990e307
LW
544}
545
ffb4593c
NT
546/*
547 * S_missingterm
548 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 549 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
550 * If we're in a delimited string and the delimiter is a control
551 * character, it's reformatted into a two-char sequence like ^C.
552 * This is fatal.
553 */
554
76e3520e 555STATIC void
cea2e8a9 556S_missingterm(pTHX_ char *s)
8990e307 557{
97aff369 558 dVAR;
8990e307
LW
559 char tmpbuf[3];
560 char q;
561 if (s) {
9d4ba2ae 562 char * const nl = strrchr(s,'\n');
d2719217 563 if (nl)
8990e307
LW
564 *nl = '\0';
565 }
463559e7 566 else if (isCNTRL(PL_multi_close)) {
8990e307 567 *tmpbuf = '^';
585ec06d 568 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
569 tmpbuf[2] = '\0';
570 s = tmpbuf;
571 }
572 else {
eb160463 573 *tmpbuf = (char)PL_multi_close;
8990e307
LW
574 tmpbuf[1] = '\0';
575 s = tmpbuf;
576 }
577 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 578 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 579}
79072805 580
ef89dcc3 581#define FEATURE_IS_ENABLED(name) \
0d863452 582 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 583 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 584/* The longest string we pass in. */
1863b879 585#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 586
0d863452
RH
587/*
588 * S_feature_is_enabled
589 * Check whether the named feature is enabled.
590 */
591STATIC bool
15f169a1 592S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 593{
97aff369 594 dVAR;
0d863452 595 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 596 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
597
598 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
599
4a731d7b
NC
600 assert(namelen <= MAX_FEATURE_LEN);
601 memcpy(&he_name[8], name, namelen);
d4c19fe8 602
7b9ef140 603 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
604}
605
ffb4593c 606/*
9cbb5ea2
GS
607 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
608 * utf16-to-utf8-reversed.
ffb4593c
NT
609 */
610
c39cd008
GS
611#ifdef PERL_CR_FILTER
612static void
613strip_return(SV *sv)
614{
95a20fc0 615 register const char *s = SvPVX_const(sv);
9d4ba2ae 616 register const char * const e = s + SvCUR(sv);
7918f24d
NC
617
618 PERL_ARGS_ASSERT_STRIP_RETURN;
619
c39cd008
GS
620 /* outer loop optimized to do nothing if there are no CR-LFs */
621 while (s < e) {
622 if (*s++ == '\r' && *s == '\n') {
623 /* hit a CR-LF, need to copy the rest */
624 register char *d = s - 1;
625 *d++ = *s++;
626 while (s < e) {
627 if (*s == '\r' && s[1] == '\n')
628 s++;
629 *d++ = *s++;
630 }
631 SvCUR(sv) -= s - d;
632 return;
633 }
634 }
635}
a868473f 636
76e3520e 637STATIC I32
c39cd008 638S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 639{
f54cb97a 640 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
641 if (count > 0 && !maxlen)
642 strip_return(sv);
643 return count;
a868473f
NIS
644}
645#endif
646
ffb4593c 647/*
8eaa0acf
Z
648=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
649
650Creates and initialises a new lexer/parser state object, supplying
651a context in which to lex and parse from a new source of Perl code.
652A pointer to the new state object is placed in L</PL_parser>. An entry
653is made on the save stack so that upon unwinding the new state object
654will be destroyed and the former value of L</PL_parser> will be restored.
655Nothing else need be done to clean up the parsing context.
656
657The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
658non-null, provides a string (in SV form) containing code to be parsed.
659A copy of the string is made, so subsequent modification of I<line>
660does not affect parsing. I<rsfp>, if non-null, provides an input stream
661from which code will be read to be parsed. If both are non-null, the
662code in I<line> comes first and must consist of complete lines of input,
663and I<rsfp> supplies the remainder of the source.
664
665The I<flags> parameter is reserved for future use, and must always
666be zero.
667
668=cut
669*/
ffb4593c 670
a0d0e21e 671void
8eaa0acf 672Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 673{
97aff369 674 dVAR;
6ef55633 675 const char *s = NULL;
8990e307 676 STRLEN len;
5486870f 677 yy_parser *parser, *oparser;
8eaa0acf
Z
678 if (flags)
679 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
680
681 /* create and initialise a parser */
682
199e78b7 683 Newxz(parser, 1, yy_parser);
5486870f 684 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
685 PL_parser = parser;
686
28ac2b49
Z
687 parser->stack = NULL;
688 parser->ps = NULL;
689 parser->stack_size = 0;
acdf0a21 690
e3abe207
DM
691 /* on scope exit, free this parser and restore any outer one */
692 SAVEPARSER(parser);
7c4baf47 693 parser->saved_curcop = PL_curcop;
e3abe207 694
acdf0a21 695 /* initialise lexer state */
8990e307 696
fb205e7a
DM
697#ifdef PERL_MAD
698 parser->curforce = -1;
699#else
700 parser->nexttoke = 0;
701#endif
ca4cfd28 702 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 703 parser->copline = NOLINE;
5afb0a62 704 parser->lex_state = LEX_NORMAL;
c2598295 705 parser->expect = XSTATE;
2f9285f8 706 parser->rsfp = rsfp;
f07ec6dd 707 parser->rsfp_filters = newAV();
2f9285f8 708
199e78b7
DM
709 Newx(parser->lex_brackstack, 120, char);
710 Newx(parser->lex_casestack, 12, char);
711 *parser->lex_casestack = '\0';
02b34bbe 712
10efb74f
NC
713 if (line) {
714 s = SvPV_const(line, len);
715 } else {
716 len = 0;
717 }
bdc0bf6f 718
10efb74f 719 if (!len) {
bdc0bf6f 720 parser->linestr = newSVpvs("\n;");
805700c1 721 } else {
719a9bb0 722 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 723 if (s[len-1] != ';')
bdc0bf6f 724 sv_catpvs(parser->linestr, "\n;");
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
732
733 parser->in_pod = 0;
79072805 734}
a687059c 735
e3abe207
DM
736
737/* delete a parser object */
738
739void
740Perl_parser_free(pTHX_ const yy_parser *parser)
741{
7918f24d
NC
742 PERL_ARGS_ASSERT_PARSER_FREE;
743
7c4baf47 744 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
745 SvREFCNT_dec(parser->linestr);
746
2f9285f8
DM
747 if (parser->rsfp == PerlIO_stdin())
748 PerlIO_clearerr(parser->rsfp);
799361c3
SH
749 else if (parser->rsfp && (!parser->old_parser ||
750 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 751 PerlIO_close(parser->rsfp);
5486870f 752 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 753
e3abe207
DM
754 Safefree(parser->lex_brackstack);
755 Safefree(parser->lex_casestack);
756 PL_parser = parser->old_parser;
757 Safefree(parser);
758}
759
760
ffb4593c 761/*
f0e67a1d
Z
762=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
763
764Buffer scalar containing the chunk currently under consideration of the
765text currently being lexed. This is always a plain string scalar (for
766which C<SvPOK> is true). It is not intended to be used as a scalar by
767normal scalar means; instead refer to the buffer directly by the pointer
768variables described below.
769
770The lexer maintains various C<char*> pointers to things in the
771C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
772reallocated, all of these pointers must be updated. Don't attempt to
773do this manually, but rather use L</lex_grow_linestr> if you need to
774reallocate the buffer.
775
776The content of the text chunk in the buffer is commonly exactly one
777complete line of input, up to and including a newline terminator,
778but there are situations where it is otherwise. The octets of the
779buffer may be intended to be interpreted as either UTF-8 or Latin-1.
780The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
781flag on this scalar, which may disagree with it.
782
783For direct examination of the buffer, the variable
784L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
785lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
786of these pointers is usually preferable to examination of the scalar
787through normal scalar means.
788
789=for apidoc AmxU|char *|PL_parser-E<gt>bufend
790
791Direct pointer to the end of the chunk of text currently being lexed, the
792end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
793+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
794always located at the end of the buffer, and does not count as part of
795the buffer's contents.
796
797=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
798
799Points to the current position of lexing inside the lexer buffer.
800Characters around this point may be freely examined, within
801the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
802L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
803interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
804
805Lexing code (whether in the Perl core or not) moves this pointer past
806the characters that it consumes. It is also expected to perform some
807bookkeeping whenever a newline character is consumed. This movement
808can be more conveniently performed by the function L</lex_read_to>,
809which handles newlines appropriately.
810
811Interpretation of the buffer's octets can be abstracted out by
812using the slightly higher-level functions L</lex_peek_unichar> and
813L</lex_read_unichar>.
814
815=for apidoc AmxU|char *|PL_parser-E<gt>linestart
816
817Points to the start of the current line inside the lexer buffer.
818This is useful for indicating at which column an error occurred, and
819not much else. This must be updated by any lexing code that consumes
820a newline; the function L</lex_read_to> handles this detail.
821
822=cut
823*/
824
825/*
826=for apidoc Amx|bool|lex_bufutf8
827
828Indicates whether the octets in the lexer buffer
829(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
830of Unicode characters. If not, they should be interpreted as Latin-1
831characters. This is analogous to the C<SvUTF8> flag for scalars.
832
833In UTF-8 mode, it is not guaranteed that the lexer buffer actually
834contains valid UTF-8. Lexing code must be robust in the face of invalid
835encoding.
836
837The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
838is significant, but not the whole story regarding the input character
839encoding. Normally, when a file is being read, the scalar contains octets
840and its C<SvUTF8> flag is off, but the octets should be interpreted as
841UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
842however, the scalar may have the C<SvUTF8> flag on, and in this case its
843octets should be interpreted as UTF-8 unless the C<use bytes> pragma
844is in effect. This logic may change in the future; use this function
845instead of implementing the logic yourself.
846
847=cut
848*/
849
850bool
851Perl_lex_bufutf8(pTHX)
852{
853 return UTF;
854}
855
856/*
857=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
858
859Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
860at least I<len> octets (including terminating NUL). Returns a
861pointer to the reallocated buffer. This is necessary before making
862any direct modification of the buffer that would increase its length.
863L</lex_stuff_pvn> provides a more convenient way to insert text into
864the buffer.
865
866Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
867this function updates all of the lexer's variables that point directly
868into the buffer.
869
870=cut
871*/
872
873char *
874Perl_lex_grow_linestr(pTHX_ STRLEN len)
875{
876 SV *linestr;
877 char *buf;
878 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
879 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
880 linestr = PL_parser->linestr;
881 buf = SvPVX(linestr);
882 if (len <= SvLEN(linestr))
883 return buf;
884 bufend_pos = PL_parser->bufend - buf;
885 bufptr_pos = PL_parser->bufptr - buf;
886 oldbufptr_pos = PL_parser->oldbufptr - buf;
887 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
888 linestart_pos = PL_parser->linestart - buf;
889 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
890 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
891 buf = sv_grow(linestr, len);
892 PL_parser->bufend = buf + bufend_pos;
893 PL_parser->bufptr = buf + bufptr_pos;
894 PL_parser->oldbufptr = buf + oldbufptr_pos;
895 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
896 PL_parser->linestart = buf + linestart_pos;
897 if (PL_parser->last_uni)
898 PL_parser->last_uni = buf + last_uni_pos;
899 if (PL_parser->last_lop)
900 PL_parser->last_lop = buf + last_lop_pos;
901 return buf;
902}
903
904/*
83aa740e 905=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
906
907Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
908immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
909reallocating the buffer if necessary. This means that lexing code that
910runs later will see the characters as if they had appeared in the input.
911It is not recommended to do this as part of normal parsing, and most
912uses of this facility run the risk of the inserted characters being
913interpreted in an unintended manner.
914
915The string to be inserted is represented by I<len> octets starting
916at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
917according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
918The characters are recoded for the lexer buffer, according to how the
919buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 920to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
921function is more convenient.
922
923=cut
924*/
925
926void
83aa740e 927Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 928{
749123ff 929 dVAR;
f0e67a1d
Z
930 char *bufptr;
931 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
932 if (flags & ~(LEX_STUFF_UTF8))
933 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
934 if (UTF) {
935 if (flags & LEX_STUFF_UTF8) {
936 goto plain_copy;
937 } else {
938 STRLEN highhalf = 0;
83aa740e 939 const char *p, *e = pv+len;
f0e67a1d
Z
940 for (p = pv; p != e; p++)
941 highhalf += !!(((U8)*p) & 0x80);
942 if (!highhalf)
943 goto plain_copy;
944 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
945 bufptr = PL_parser->bufptr;
946 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
947 SvCUR_set(PL_parser->linestr,
948 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
949 PL_parser->bufend += len+highhalf;
950 for (p = pv; p != e; p++) {
951 U8 c = (U8)*p;
952 if (c & 0x80) {
953 *bufptr++ = (char)(0xc0 | (c >> 6));
954 *bufptr++ = (char)(0x80 | (c & 0x3f));
955 } else {
956 *bufptr++ = (char)c;
957 }
958 }
959 }
960 } else {
961 if (flags & LEX_STUFF_UTF8) {
962 STRLEN highhalf = 0;
83aa740e 963 const char *p, *e = pv+len;
f0e67a1d
Z
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c >= 0xc4) {
967 Perl_croak(aTHX_ "Lexing code attempted to stuff "
968 "non-Latin-1 character into Latin-1 input");
969 } else if (c >= 0xc2 && p+1 != e &&
970 (((U8)p[1]) & 0xc0) == 0x80) {
971 p++;
972 highhalf++;
973 } else if (c >= 0x80) {
974 /* malformed UTF-8 */
975 ENTER;
976 SAVESPTR(PL_warnhook);
977 PL_warnhook = PERL_WARNHOOK_FATAL;
978 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
979 LEAVE;
980 }
981 }
982 if (!highhalf)
983 goto plain_copy;
984 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
985 bufptr = PL_parser->bufptr;
986 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
987 SvCUR_set(PL_parser->linestr,
988 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
989 PL_parser->bufend += len-highhalf;
990 for (p = pv; p != e; p++) {
991 U8 c = (U8)*p;
992 if (c & 0x80) {
993 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
994 p++;
995 } else {
996 *bufptr++ = (char)c;
997 }
998 }
999 } else {
1000 plain_copy:
1001 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1002 bufptr = PL_parser->bufptr;
1003 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1004 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1005 PL_parser->bufend += len;
1006 Copy(pv, bufptr, len, char);
1007 }
1008 }
1009}
1010
1011/*
9dcc53ea
Z
1012=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1013
1014Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1015immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1016reallocating the buffer if necessary. This means that lexing code that
1017runs later will see the characters as if they had appeared in the input.
1018It is not recommended to do this as part of normal parsing, and most
1019uses of this facility run the risk of the inserted characters being
1020interpreted in an unintended manner.
1021
1022The string to be inserted is represented by octets starting at I<pv>
1023and continuing to the first nul. These octets are interpreted as either
1024UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1025in I<flags>. The characters are recoded for the lexer buffer, according
1026to how the buffer is currently being interpreted (L</lex_bufutf8>).
1027If it is not convenient to nul-terminate a string to be inserted, the
1028L</lex_stuff_pvn> function is more appropriate.
1029
1030=cut
1031*/
1032
1033void
1034Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1035{
1036 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1037 lex_stuff_pvn(pv, strlen(pv), flags);
1038}
1039
1040/*
f0e67a1d
Z
1041=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1042
1043Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1044immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1045reallocating the buffer if necessary. This means that lexing code that
1046runs later will see the characters as if they had appeared in the input.
1047It is not recommended to do this as part of normal parsing, and most
1048uses of this facility run the risk of the inserted characters being
1049interpreted in an unintended manner.
1050
1051The string to be inserted is the string value of I<sv>. The characters
1052are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1053being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1054not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1055need to construct a scalar.
1056
1057=cut
1058*/
1059
1060void
1061Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1062{
1063 char *pv;
1064 STRLEN len;
1065 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1066 if (flags)
1067 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1068 pv = SvPV(sv, len);
1069 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1070}
1071
1072/*
1073=for apidoc Amx|void|lex_unstuff|char *ptr
1074
1075Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1076I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1077This hides the discarded text from any lexing code that runs later,
1078as if the text had never appeared.
1079
1080This is not the normal way to consume lexed text. For that, use
1081L</lex_read_to>.
1082
1083=cut
1084*/
1085
1086void
1087Perl_lex_unstuff(pTHX_ char *ptr)
1088{
1089 char *buf, *bufend;
1090 STRLEN unstuff_len;
1091 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1092 buf = PL_parser->bufptr;
1093 if (ptr < buf)
1094 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1095 if (ptr == buf)
1096 return;
1097 bufend = PL_parser->bufend;
1098 if (ptr > bufend)
1099 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1100 unstuff_len = ptr - buf;
1101 Move(ptr, buf, bufend+1-ptr, char);
1102 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1103 PL_parser->bufend = bufend - unstuff_len;
1104}
1105
1106/*
1107=for apidoc Amx|void|lex_read_to|char *ptr
1108
1109Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1110to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1111performing the correct bookkeeping whenever a newline character is passed.
1112This is the normal way to consume lexed text.
1113
1114Interpretation of the buffer's octets can be abstracted out by
1115using the slightly higher-level functions L</lex_peek_unichar> and
1116L</lex_read_unichar>.
1117
1118=cut
1119*/
1120
1121void
1122Perl_lex_read_to(pTHX_ char *ptr)
1123{
1124 char *s;
1125 PERL_ARGS_ASSERT_LEX_READ_TO;
1126 s = PL_parser->bufptr;
1127 if (ptr < s || ptr > PL_parser->bufend)
1128 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1129 for (; s != ptr; s++)
1130 if (*s == '\n') {
1131 CopLINE_inc(PL_curcop);
1132 PL_parser->linestart = s+1;
1133 }
1134 PL_parser->bufptr = ptr;
1135}
1136
1137/*
1138=for apidoc Amx|void|lex_discard_to|char *ptr
1139
1140Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1141up to I<ptr>. The remaining content of the buffer will be moved, and
1142all pointers into the buffer updated appropriately. I<ptr> must not
1143be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1144it is not permitted to discard text that has yet to be lexed.
1145
1146Normally it is not necessarily to do this directly, because it suffices to
1147use the implicit discarding behaviour of L</lex_next_chunk> and things
1148based on it. However, if a token stretches across multiple lines,
1f317c95 1149and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1150that purpose, then after completion of the token it would be wise to
1151explicitly discard the now-unneeded earlier lines, to avoid future
1152multi-line tokens growing the buffer without bound.
1153
1154=cut
1155*/
1156
1157void
1158Perl_lex_discard_to(pTHX_ char *ptr)
1159{
1160 char *buf;
1161 STRLEN discard_len;
1162 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1163 buf = SvPVX(PL_parser->linestr);
1164 if (ptr < buf)
1165 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1166 if (ptr == buf)
1167 return;
1168 if (ptr > PL_parser->bufptr)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170 discard_len = ptr - buf;
1171 if (PL_parser->oldbufptr < ptr)
1172 PL_parser->oldbufptr = ptr;
1173 if (PL_parser->oldoldbufptr < ptr)
1174 PL_parser->oldoldbufptr = ptr;
1175 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1176 PL_parser->last_uni = NULL;
1177 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1178 PL_parser->last_lop = NULL;
1179 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1180 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1181 PL_parser->bufend -= discard_len;
1182 PL_parser->bufptr -= discard_len;
1183 PL_parser->oldbufptr -= discard_len;
1184 PL_parser->oldoldbufptr -= discard_len;
1185 if (PL_parser->last_uni)
1186 PL_parser->last_uni -= discard_len;
1187 if (PL_parser->last_lop)
1188 PL_parser->last_lop -= discard_len;
1189}
1190
1191/*
1192=for apidoc Amx|bool|lex_next_chunk|U32 flags
1193
1194Reads in the next chunk of text to be lexed, appending it to
1195L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1196looked to the end of the current chunk and wants to know more. It is
1197usual, but not necessary, for lexing to have consumed the entirety of
1198the current chunk at this time.
1199
1200If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1201chunk (i.e., the current chunk has been entirely consumed), normally the
1202current chunk will be discarded at the same time that the new chunk is
1203read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1204will not be discarded. If the current chunk has not been entirely
1205consumed, then it will not be discarded regardless of the flag.
1206
1207Returns true if some new text was added to the buffer, or false if the
1208buffer has reached the end of the input text.
1209
1210=cut
1211*/
1212
1213#define LEX_FAKE_EOF 0x80000000
1214
1215bool
1216Perl_lex_next_chunk(pTHX_ U32 flags)
1217{
1218 SV *linestr;
1219 char *buf;
1220 STRLEN old_bufend_pos, new_bufend_pos;
1221 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1222 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1223 bool got_some_for_debugger = 0;
f0e67a1d
Z
1224 bool got_some;
1225 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1226 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1227 linestr = PL_parser->linestr;
1228 buf = SvPVX(linestr);
1229 if (!(flags & LEX_KEEP_PREVIOUS) &&
1230 PL_parser->bufptr == PL_parser->bufend) {
1231 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1232 linestart_pos = 0;
1233 if (PL_parser->last_uni != PL_parser->bufend)
1234 PL_parser->last_uni = NULL;
1235 if (PL_parser->last_lop != PL_parser->bufend)
1236 PL_parser->last_lop = NULL;
1237 last_uni_pos = last_lop_pos = 0;
1238 *buf = 0;
1239 SvCUR(linestr) = 0;
1240 } else {
1241 old_bufend_pos = PL_parser->bufend - buf;
1242 bufptr_pos = PL_parser->bufptr - buf;
1243 oldbufptr_pos = PL_parser->oldbufptr - buf;
1244 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1245 linestart_pos = PL_parser->linestart - buf;
1246 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1247 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1248 }
1249 if (flags & LEX_FAKE_EOF) {
1250 goto eof;
1251 } else if (!PL_parser->rsfp) {
1252 got_some = 0;
1253 } else if (filter_gets(linestr, old_bufend_pos)) {
1254 got_some = 1;
17cc9359 1255 got_some_for_debugger = 1;
f0e67a1d 1256 } else {
580561a3
Z
1257 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1258 sv_setpvs(linestr, "");
f0e67a1d
Z
1259 eof:
1260 /* End of real input. Close filehandle (unless it was STDIN),
1261 * then add implicit termination.
1262 */
1263 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1264 PerlIO_clearerr(PL_parser->rsfp);
1265 else if (PL_parser->rsfp)
1266 (void)PerlIO_close(PL_parser->rsfp);
1267 PL_parser->rsfp = NULL;
737c24fc 1268 PL_parser->in_pod = 0;
f0e67a1d
Z
1269#ifdef PERL_MAD
1270 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1271 PL_faketokens = 1;
1272#endif
1273 if (!PL_in_eval && PL_minus_p) {
1274 sv_catpvs(linestr,
1275 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1276 PL_minus_n = PL_minus_p = 0;
1277 } else if (!PL_in_eval && PL_minus_n) {
1278 sv_catpvs(linestr, /*{*/";}");
1279 PL_minus_n = 0;
1280 } else
1281 sv_catpvs(linestr, ";");
1282 got_some = 1;
1283 }
1284 buf = SvPVX(linestr);
1285 new_bufend_pos = SvCUR(linestr);
1286 PL_parser->bufend = buf + new_bufend_pos;
1287 PL_parser->bufptr = buf + bufptr_pos;
1288 PL_parser->oldbufptr = buf + oldbufptr_pos;
1289 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1290 PL_parser->linestart = buf + linestart_pos;
1291 if (PL_parser->last_uni)
1292 PL_parser->last_uni = buf + last_uni_pos;
1293 if (PL_parser->last_lop)
1294 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1295 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1296 PL_curstash != PL_debstash) {
1297 /* debugger active and we're not compiling the debugger code,
1298 * so store the line into the debugger's array of lines
1299 */
1300 update_debugger_info(NULL, buf+old_bufend_pos,
1301 new_bufend_pos-old_bufend_pos);
1302 }
1303 return got_some;
1304}
1305
1306/*
1307=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1308
1309Looks ahead one (Unicode) character in the text currently being lexed.
1310Returns the codepoint (unsigned integer value) of the next character,
1311or -1 if lexing has reached the end of the input text. To consume the
1312peeked character, use L</lex_read_unichar>.
1313
1314If the next character is in (or extends into) the next chunk of input
1315text, the next chunk will be read in. Normally the current chunk will be
1316discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1317then the current chunk will not be discarded.
1318
1319If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1320is encountered, an exception is generated.
1321
1322=cut
1323*/
1324
1325I32
1326Perl_lex_peek_unichar(pTHX_ U32 flags)
1327{
749123ff 1328 dVAR;
f0e67a1d
Z
1329 char *s, *bufend;
1330 if (flags & ~(LEX_KEEP_PREVIOUS))
1331 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1332 s = PL_parser->bufptr;
1333 bufend = PL_parser->bufend;
1334 if (UTF) {
1335 U8 head;
1336 I32 unichar;
1337 STRLEN len, retlen;
1338 if (s == bufend) {
1339 if (!lex_next_chunk(flags))
1340 return -1;
1341 s = PL_parser->bufptr;
1342 bufend = PL_parser->bufend;
1343 }
1344 head = (U8)*s;
1345 if (!(head & 0x80))
1346 return head;
1347 if (head & 0x40) {
1348 len = PL_utf8skip[head];
1349 while ((STRLEN)(bufend-s) < len) {
1350 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1351 break;
1352 s = PL_parser->bufptr;
1353 bufend = PL_parser->bufend;
1354 }
1355 }
1356 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1357 if (retlen == (STRLEN)-1) {
1358 /* malformed UTF-8 */
1359 ENTER;
1360 SAVESPTR(PL_warnhook);
1361 PL_warnhook = PERL_WARNHOOK_FATAL;
1362 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1363 LEAVE;
1364 }
1365 return unichar;
1366 } else {
1367 if (s == bufend) {
1368 if (!lex_next_chunk(flags))
1369 return -1;
1370 s = PL_parser->bufptr;
1371 }
1372 return (U8)*s;
1373 }
1374}
1375
1376/*
1377=for apidoc Amx|I32|lex_read_unichar|U32 flags
1378
1379Reads the next (Unicode) character in the text currently being lexed.
1380Returns the codepoint (unsigned integer value) of the character read,
1381and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1382if lexing has reached the end of the input text. To non-destructively
1383examine the next character, use L</lex_peek_unichar> instead.
1384
1385If the next character is in (or extends into) the next chunk of input
1386text, the next chunk will be read in. Normally the current chunk will be
1387discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1388then the current chunk will not be discarded.
1389
1390If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1391is encountered, an exception is generated.
1392
1393=cut
1394*/
1395
1396I32
1397Perl_lex_read_unichar(pTHX_ U32 flags)
1398{
1399 I32 c;
1400 if (flags & ~(LEX_KEEP_PREVIOUS))
1401 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1402 c = lex_peek_unichar(flags);
1403 if (c != -1) {
1404 if (c == '\n')
1405 CopLINE_inc(PL_curcop);
1406 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1407 }
1408 return c;
1409}
1410
1411/*
1412=for apidoc Amx|void|lex_read_space|U32 flags
1413
1414Reads optional spaces, in Perl style, in the text currently being
1415lexed. The spaces may include ordinary whitespace characters and
1416Perl-style comments. C<#line> directives are processed if encountered.
1417L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1418at a non-space character (or the end of the input text).
1419
1420If spaces extend into the next chunk of input text, the next chunk will
1421be read in. Normally the current chunk will be discarded at the same
1422time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1423chunk will not be discarded.
1424
1425=cut
1426*/
1427
f0998909
Z
1428#define LEX_NO_NEXT_CHUNK 0x80000000
1429
f0e67a1d
Z
1430void
1431Perl_lex_read_space(pTHX_ U32 flags)
1432{
1433 char *s, *bufend;
1434 bool need_incline = 0;
f0998909 1435 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1436 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1437#ifdef PERL_MAD
1438 if (PL_skipwhite) {
1439 sv_free(PL_skipwhite);
1440 PL_skipwhite = NULL;
1441 }
1442 if (PL_madskills)
1443 PL_skipwhite = newSVpvs("");
1444#endif /* PERL_MAD */
1445 s = PL_parser->bufptr;
1446 bufend = PL_parser->bufend;
1447 while (1) {
1448 char c = *s;
1449 if (c == '#') {
1450 do {
1451 c = *++s;
1452 } while (!(c == '\n' || (c == 0 && s == bufend)));
1453 } else if (c == '\n') {
1454 s++;
1455 PL_parser->linestart = s;
1456 if (s == bufend)
1457 need_incline = 1;
1458 else
1459 incline(s);
1460 } else if (isSPACE(c)) {
1461 s++;
1462 } else if (c == 0 && s == bufend) {
1463 bool got_more;
1464#ifdef PERL_MAD
1465 if (PL_madskills)
1466 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1467#endif /* PERL_MAD */
f0998909
Z
1468 if (flags & LEX_NO_NEXT_CHUNK)
1469 break;
f0e67a1d
Z
1470 PL_parser->bufptr = s;
1471 CopLINE_inc(PL_curcop);
1472 got_more = lex_next_chunk(flags);
1473 CopLINE_dec(PL_curcop);
1474 s = PL_parser->bufptr;
1475 bufend = PL_parser->bufend;
1476 if (!got_more)
1477 break;
1478 if (need_incline && PL_parser->rsfp) {
1479 incline(s);
1480 need_incline = 0;
1481 }
1482 } else {
1483 break;
1484 }
1485 }
1486#ifdef PERL_MAD
1487 if (PL_madskills)
1488 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1489#endif /* PERL_MAD */
1490 PL_parser->bufptr = s;
1491}
1492
1493/*
ffb4593c
NT
1494 * S_incline
1495 * This subroutine has nothing to do with tilting, whether at windmills
1496 * or pinball tables. Its name is short for "increment line". It
57843af0 1497 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1498 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1499 * # line 500 "foo.pm"
1500 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1501 */
1502
76e3520e 1503STATIC void
d9095cec 1504S_incline(pTHX_ const char *s)
463ee0b2 1505{
97aff369 1506 dVAR;
d9095cec
NC
1507 const char *t;
1508 const char *n;
1509 const char *e;
463ee0b2 1510
7918f24d
NC
1511 PERL_ARGS_ASSERT_INCLINE;
1512
57843af0 1513 CopLINE_inc(PL_curcop);
463ee0b2
LW
1514 if (*s++ != '#')
1515 return;
d4c19fe8
AL
1516 while (SPACE_OR_TAB(*s))
1517 s++;
73659bf1
GS
1518 if (strnEQ(s, "line", 4))
1519 s += 4;
1520 else
1521 return;
084592ab 1522 if (SPACE_OR_TAB(*s))
73659bf1 1523 s++;
4e553d73 1524 else
73659bf1 1525 return;
d4c19fe8
AL
1526 while (SPACE_OR_TAB(*s))
1527 s++;
463ee0b2
LW
1528 if (!isDIGIT(*s))
1529 return;
d4c19fe8 1530
463ee0b2
LW
1531 n = s;
1532 while (isDIGIT(*s))
1533 s++;
07714eb4 1534 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1535 return;
bf4acbe4 1536 while (SPACE_OR_TAB(*s))
463ee0b2 1537 s++;
73659bf1 1538 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1539 s++;
73659bf1
GS
1540 e = t + 1;
1541 }
463ee0b2 1542 else {
c35e046a
AL
1543 t = s;
1544 while (!isSPACE(*t))
1545 t++;
73659bf1 1546 e = t;
463ee0b2 1547 }
bf4acbe4 1548 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1549 e++;
1550 if (*e != '\n' && *e != '\0')
1551 return; /* false alarm */
1552
f4dd75d9 1553 if (t - s > 0) {
d9095cec 1554 const STRLEN len = t - s;
8a5ee598 1555#ifndef USE_ITHREADS
19bad673
NC
1556 SV *const temp_sv = CopFILESV(PL_curcop);
1557 const char *cf;
1558 STRLEN tmplen;
1559
1560 if (temp_sv) {
1561 cf = SvPVX(temp_sv);
1562 tmplen = SvCUR(temp_sv);
1563 } else {
1564 cf = NULL;
1565 tmplen = 0;
1566 }
1567
42d9b98d 1568 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1569 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1570 * to *{"::_<newfilename"} */
44867030
NC
1571 /* However, the long form of evals is only turned on by the
1572 debugger - usually they're "(eval %lu)" */
1573 char smallbuf[128];
1574 char *tmpbuf;
1575 GV **gvp;
d9095cec 1576 STRLEN tmplen2 = len;
798b63bc 1577 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1578 tmpbuf = smallbuf;
1579 else
2ae0db35 1580 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1581 tmpbuf[0] = '_';
1582 tmpbuf[1] = '<';
2ae0db35 1583 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1584 tmplen += 2;
8a5ee598
RGS
1585 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1586 if (gvp) {
44867030
NC
1587 char *tmpbuf2;
1588 GV *gv2;
1589
1590 if (tmplen2 + 2 <= sizeof smallbuf)
1591 tmpbuf2 = smallbuf;
1592 else
1593 Newx(tmpbuf2, tmplen2 + 2, char);
1594
1595 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1596 /* Either they malloc'd it, or we malloc'd it,
1597 so no prefix is present in ours. */
1598 tmpbuf2[0] = '_';
1599 tmpbuf2[1] = '<';
1600 }
1601
1602 memcpy(tmpbuf2 + 2, s, tmplen2);
1603 tmplen2 += 2;
1604
8a5ee598 1605 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1606 if (!isGV(gv2)) {
8a5ee598 1607 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1608 /* adjust ${"::_<newfilename"} to store the new file name */
1609 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1610 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1611 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1612 }
44867030
NC
1613
1614 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1615 }
e66cf94c 1616 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1617 }
8a5ee598 1618#endif
05ec9bb3 1619 CopFILE_free(PL_curcop);
d9095cec 1620 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1621 }
57843af0 1622 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1623}
1624
29595ff2 1625#ifdef PERL_MAD
cd81e915 1626/* skip space before PL_thistoken */
29595ff2
NC
1627
1628STATIC char *
1629S_skipspace0(pTHX_ register char *s)
1630{
7918f24d
NC
1631 PERL_ARGS_ASSERT_SKIPSPACE0;
1632
29595ff2
NC
1633 s = skipspace(s);
1634 if (!PL_madskills)
1635 return s;
cd81e915
NC
1636 if (PL_skipwhite) {
1637 if (!PL_thiswhite)
6b29d1f5 1638 PL_thiswhite = newSVpvs("");
cd81e915
NC
1639 sv_catsv(PL_thiswhite, PL_skipwhite);
1640 sv_free(PL_skipwhite);
1641 PL_skipwhite = 0;
1642 }
1643 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1644 return s;
1645}
1646
cd81e915 1647/* skip space after PL_thistoken */
29595ff2
NC
1648
1649STATIC char *
1650S_skipspace1(pTHX_ register char *s)
1651{
d4c19fe8 1652 const char *start = s;
29595ff2
NC
1653 I32 startoff = start - SvPVX(PL_linestr);
1654
7918f24d
NC
1655 PERL_ARGS_ASSERT_SKIPSPACE1;
1656
29595ff2
NC
1657 s = skipspace(s);
1658 if (!PL_madskills)
1659 return s;
1660 start = SvPVX(PL_linestr) + startoff;
cd81e915 1661 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1662 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1663 PL_thistoken = newSVpvn(tstart, start - tstart);
1664 }
1665 PL_realtokenstart = -1;
1666 if (PL_skipwhite) {
1667 if (!PL_nextwhite)
6b29d1f5 1668 PL_nextwhite = newSVpvs("");
cd81e915
NC
1669 sv_catsv(PL_nextwhite, PL_skipwhite);
1670 sv_free(PL_skipwhite);
1671 PL_skipwhite = 0;
29595ff2
NC
1672 }
1673 return s;
1674}
1675
1676STATIC char *
1677S_skipspace2(pTHX_ register char *s, SV **svp)
1678{
c35e046a
AL
1679 char *start;
1680 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1681 const I32 startoff = s - SvPVX(PL_linestr);
1682
7918f24d
NC
1683 PERL_ARGS_ASSERT_SKIPSPACE2;
1684
29595ff2
NC
1685 s = skipspace(s);
1686 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1687 if (!PL_madskills || !svp)
1688 return s;
1689 start = SvPVX(PL_linestr) + startoff;
cd81e915 1690 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1691 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1692 PL_thistoken = newSVpvn(tstart, start - tstart);
1693 PL_realtokenstart = -1;
29595ff2 1694 }
cd81e915 1695 if (PL_skipwhite) {
29595ff2 1696 if (!*svp)
6b29d1f5 1697 *svp = newSVpvs("");
cd81e915
NC
1698 sv_setsv(*svp, PL_skipwhite);
1699 sv_free(PL_skipwhite);
1700 PL_skipwhite = 0;
29595ff2
NC
1701 }
1702
1703 return s;
1704}
1705#endif
1706
80a702cd 1707STATIC void
15f169a1 1708S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1709{
1710 AV *av = CopFILEAVx(PL_curcop);
1711 if (av) {
b9f83d2f 1712 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1713 if (orig_sv)
1714 sv_setsv(sv, orig_sv);
1715 else
1716 sv_setpvn(sv, buf, len);
80a702cd
RGS
1717 (void)SvIOK_on(sv);
1718 SvIV_set(sv, 0);
1719 av_store(av, (I32)CopLINE(PL_curcop), sv);
1720 }
1721}
1722
ffb4593c
NT
1723/*
1724 * S_skipspace
1725 * Called to gobble the appropriate amount and type of whitespace.
1726 * Skips comments as well.
1727 */
1728
76e3520e 1729STATIC char *
cea2e8a9 1730S_skipspace(pTHX_ register char *s)
a687059c 1731{
5db06880 1732#ifdef PERL_MAD
f0e67a1d
Z
1733 char *start = s;
1734#endif /* PERL_MAD */
7918f24d 1735 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1736#ifdef PERL_MAD
cd81e915
NC
1737 if (PL_skipwhite) {
1738 sv_free(PL_skipwhite);
f0e67a1d 1739 PL_skipwhite = NULL;
5db06880 1740 }
f0e67a1d 1741#endif /* PERL_MAD */
3280af22 1742 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1743 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1744 s++;
f0e67a1d
Z
1745 } else {
1746 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1747 PL_bufptr = s;
f0998909
Z
1748 lex_read_space(LEX_KEEP_PREVIOUS |
1749 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1750 LEX_NO_NEXT_CHUNK : 0));
3280af22 1751 s = PL_bufptr;
f0e67a1d
Z
1752 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1753 if (PL_linestart > PL_bufptr)
1754 PL_bufptr = PL_linestart;
1755 return s;
463ee0b2 1756 }
5db06880 1757#ifdef PERL_MAD
f0e67a1d
Z
1758 if (PL_madskills)
1759 PL_skipwhite = newSVpvn(start, s-start);
1760#endif /* PERL_MAD */
5db06880 1761 return s;
a687059c 1762}
378cc40b 1763
ffb4593c
NT
1764/*
1765 * S_check_uni
1766 * Check the unary operators to ensure there's no ambiguity in how they're
1767 * used. An ambiguous piece of code would be:
1768 * rand + 5
1769 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1770 * the +5 is its argument.
1771 */
1772
76e3520e 1773STATIC void
cea2e8a9 1774S_check_uni(pTHX)
ba106d47 1775{
97aff369 1776 dVAR;
d4c19fe8
AL
1777 const char *s;
1778 const char *t;
2f3197b3 1779
3280af22 1780 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1781 return;
3280af22
NIS
1782 while (isSPACE(*PL_last_uni))
1783 PL_last_uni++;
c35e046a
AL
1784 s = PL_last_uni;
1785 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1786 s++;
3280af22 1787 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1788 return;
6136c704 1789
9b387841
NC
1790 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1791 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1792 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1793}
1794
ffb4593c
NT
1795/*
1796 * LOP : macro to build a list operator. Its behaviour has been replaced
1797 * with a subroutine, S_lop() for which LOP is just another name.
1798 */
1799
a0d0e21e
LW
1800#define LOP(f,x) return lop(f,x,s)
1801
ffb4593c
NT
1802/*
1803 * S_lop
1804 * Build a list operator (or something that might be one). The rules:
1805 * - if we have a next token, then it's a list operator [why?]
1806 * - if the next thing is an opening paren, then it's a function
1807 * - else it's a list operator
1808 */
1809
76e3520e 1810STATIC I32
a0be28da 1811S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1812{
97aff369 1813 dVAR;
7918f24d
NC
1814
1815 PERL_ARGS_ASSERT_LOP;
1816
6154021b 1817 pl_yylval.ival = f;
35c8bce7 1818 CLINE;
3280af22
NIS
1819 PL_expect = x;
1820 PL_bufptr = s;
1821 PL_last_lop = PL_oldbufptr;
eb160463 1822 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1823#ifdef PERL_MAD
1824 if (PL_lasttoke)
1825 return REPORT(LSTOP);
1826#else
3280af22 1827 if (PL_nexttoke)
bbf60fe6 1828 return REPORT(LSTOP);
5db06880 1829#endif
79072805 1830 if (*s == '(')
bbf60fe6 1831 return REPORT(FUNC);
29595ff2 1832 s = PEEKSPACE(s);
79072805 1833 if (*s == '(')
bbf60fe6 1834 return REPORT(FUNC);
79072805 1835 else
bbf60fe6 1836 return REPORT(LSTOP);
79072805
LW
1837}
1838
5db06880
NC
1839#ifdef PERL_MAD
1840 /*
1841 * S_start_force
1842 * Sets up for an eventual force_next(). start_force(0) basically does
1843 * an unshift, while start_force(-1) does a push. yylex removes items
1844 * on the "pop" end.
1845 */
1846
1847STATIC void
1848S_start_force(pTHX_ int where)
1849{
1850 int i;
1851
cd81e915 1852 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1853 where = PL_lasttoke;
cd81e915
NC
1854 assert(PL_curforce < 0 || PL_curforce == where);
1855 if (PL_curforce != where) {
5db06880
NC
1856 for (i = PL_lasttoke; i > where; --i) {
1857 PL_nexttoke[i] = PL_nexttoke[i-1];
1858 }
1859 PL_lasttoke++;
1860 }
cd81e915 1861 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1862 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1863 PL_curforce = where;
1864 if (PL_nextwhite) {
5db06880 1865 if (PL_madskills)
6b29d1f5 1866 curmad('^', newSVpvs(""));
cd81e915 1867 CURMAD('_', PL_nextwhite);
5db06880
NC
1868 }
1869}
1870
1871STATIC void
1872S_curmad(pTHX_ char slot, SV *sv)
1873{
1874 MADPROP **where;
1875
1876 if (!sv)
1877 return;
cd81e915
NC
1878 if (PL_curforce < 0)
1879 where = &PL_thismad;
5db06880 1880 else
cd81e915 1881 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1882
cd81e915 1883 if (PL_faketokens)
76f68e9b 1884 sv_setpvs(sv, "");
5db06880
NC
1885 else {
1886 if (!IN_BYTES) {
1887 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1888 SvUTF8_on(sv);
1889 else if (PL_encoding) {
1890 sv_recode_to_utf8(sv, PL_encoding);
1891 }
1892 }
1893 }
1894
1895 /* keep a slot open for the head of the list? */
1896 if (slot != '_' && *where && (*where)->mad_key == '^') {
1897 (*where)->mad_key = slot;
daba3364 1898 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1899 (*where)->mad_val = (void*)sv;
1900 }
1901 else
1902 addmad(newMADsv(slot, sv), where, 0);
1903}
1904#else
b3f24c00
MHM
1905# define start_force(where) NOOP
1906# define curmad(slot, sv) NOOP
5db06880
NC
1907#endif
1908
ffb4593c
NT
1909/*
1910 * S_force_next
9cbb5ea2 1911 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1912 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1913 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1914 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1915 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1916 */
1917
4e553d73 1918STATIC void
cea2e8a9 1919S_force_next(pTHX_ I32 type)
79072805 1920{
97aff369 1921 dVAR;
704d4215
GG
1922#ifdef DEBUGGING
1923 if (DEBUG_T_TEST) {
1924 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1925 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1926 }
1927#endif
5db06880 1928#ifdef PERL_MAD
cd81e915 1929 if (PL_curforce < 0)
5db06880 1930 start_force(PL_lasttoke);
cd81e915 1931 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1932 if (PL_lex_state != LEX_KNOWNEXT)
1933 PL_lex_defer = PL_lex_state;
1934 PL_lex_state = LEX_KNOWNEXT;
1935 PL_lex_expect = PL_expect;
cd81e915 1936 PL_curforce = -1;
5db06880 1937#else
3280af22
NIS
1938 PL_nexttype[PL_nexttoke] = type;
1939 PL_nexttoke++;
1940 if (PL_lex_state != LEX_KNOWNEXT) {
1941 PL_lex_defer = PL_lex_state;
1942 PL_lex_expect = PL_expect;
1943 PL_lex_state = LEX_KNOWNEXT;
79072805 1944 }
5db06880 1945#endif
79072805
LW
1946}
1947
28ac2b49
Z
1948void
1949Perl_yyunlex(pTHX)
1950{
a7aaec61
Z
1951 int yyc = PL_parser->yychar;
1952 if (yyc != YYEMPTY) {
1953 if (yyc) {
1954 start_force(-1);
1955 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1956 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1957 PL_lex_brackets--;
1958 yyc |= (1<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1959 }
1960 force_next(yyc);
1961 }
28ac2b49
Z
1962 PL_parser->yychar = YYEMPTY;
1963 }
1964}
1965
d0a148a6 1966STATIC SV *
15f169a1 1967S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1968{
97aff369 1969 dVAR;
740cce10 1970 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1971 !IN_BYTES
1972 && UTF
1973 && !is_ascii_string((const U8*)start, len)
740cce10 1974 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1975 return sv;
1976}
1977
ffb4593c
NT
1978/*
1979 * S_force_word
1980 * When the lexer knows the next thing is a word (for instance, it has
1981 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1982 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1983 * lookahead.
ffb4593c
NT
1984 *
1985 * Arguments:
b1b65b59 1986 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1987 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1988 * int check_keyword : if true, Perl checks to make sure the word isn't
1989 * a keyword (do this if the word is a label, e.g. goto FOO)
1990 * int allow_pack : if true, : characters will also be allowed (require,
1991 * use, etc. do this)
9cbb5ea2 1992 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1993 */
1994
76e3520e 1995STATIC char *
cea2e8a9 1996S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1997{
97aff369 1998 dVAR;
463ee0b2
LW
1999 register char *s;
2000 STRLEN len;
4e553d73 2001
7918f24d
NC
2002 PERL_ARGS_ASSERT_FORCE_WORD;
2003
29595ff2 2004 start = SKIPSPACE1(start);
463ee0b2 2005 s = start;
7e2040f0 2006 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2007 (allow_pack && *s == ':') ||
15f0808c 2008 (allow_initial_tick && *s == '\'') )
a0d0e21e 2009 {
3280af22 2010 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2011 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2012 return start;
cd81e915 2013 start_force(PL_curforce);
5db06880
NC
2014 if (PL_madskills)
2015 curmad('X', newSVpvn(start,s-start));
463ee0b2 2016 if (token == METHOD) {
29595ff2 2017 s = SKIPSPACE1(s);
463ee0b2 2018 if (*s == '(')
3280af22 2019 PL_expect = XTERM;
463ee0b2 2020 else {
3280af22 2021 PL_expect = XOPERATOR;
463ee0b2 2022 }
79072805 2023 }
e74e6b3d 2024 if (PL_madskills)
63575281 2025 curmad('g', newSVpvs( "forced" ));
9ded7720 2026 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2027 = (OP*)newSVOP(OP_CONST,0,
2028 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2029 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2030 force_next(token);
2031 }
2032 return s;
2033}
2034
ffb4593c
NT
2035/*
2036 * S_force_ident
9cbb5ea2 2037 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2038 * text only contains the "foo" portion. The first argument is a pointer
2039 * to the "foo", and the second argument is the type symbol to prefix.
2040 * Forces the next token to be a "WORD".
9cbb5ea2 2041 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2042 */
2043
76e3520e 2044STATIC void
bfed75c6 2045S_force_ident(pTHX_ register const char *s, int kind)
79072805 2046{
97aff369 2047 dVAR;
7918f24d
NC
2048
2049 PERL_ARGS_ASSERT_FORCE_IDENT;
2050
c35e046a 2051 if (*s) {
90e5519e
NC
2052 const STRLEN len = strlen(s);
2053 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2054 start_force(PL_curforce);
9ded7720 2055 NEXTVAL_NEXTTOKE.opval = o;
79072805 2056 force_next(WORD);
748a9306 2057 if (kind) {
11343788 2058 o->op_private = OPpCONST_ENTERED;
55497cff 2059 /* XXX see note in pp_entereval() for why we forgo typo
2060 warnings if the symbol must be introduced in an eval.
2061 GSAR 96-10-12 */
90e5519e
NC
2062 gv_fetchpvn_flags(s, len,
2063 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2064 : GV_ADD,
2065 kind == '$' ? SVt_PV :
2066 kind == '@' ? SVt_PVAV :
2067 kind == '%' ? SVt_PVHV :
a0d0e21e 2068 SVt_PVGV
90e5519e 2069 );
748a9306 2070 }
79072805
LW
2071 }
2072}
2073
1571675a
GS
2074NV
2075Perl_str_to_version(pTHX_ SV *sv)
2076{
2077 NV retval = 0.0;
2078 NV nshift = 1.0;
2079 STRLEN len;
cfd0369c 2080 const char *start = SvPV_const(sv,len);
9d4ba2ae 2081 const char * const end = start + len;
504618e9 2082 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2083
2084 PERL_ARGS_ASSERT_STR_TO_VERSION;
2085
1571675a 2086 while (start < end) {
ba210ebe 2087 STRLEN skip;
1571675a
GS
2088 UV n;
2089 if (utf)
9041c2e3 2090 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2091 else {
2092 n = *(U8*)start;
2093 skip = 1;
2094 }
2095 retval += ((NV)n)/nshift;
2096 start += skip;
2097 nshift *= 1000;
2098 }
2099 return retval;
2100}
2101
4e553d73 2102/*
ffb4593c
NT
2103 * S_force_version
2104 * Forces the next token to be a version number.
e759cc13
RGS
2105 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2106 * and if "guessing" is TRUE, then no new token is created (and the caller
2107 * must use an alternative parsing method).
ffb4593c
NT
2108 */
2109
76e3520e 2110STATIC char *
e759cc13 2111S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2112{
97aff369 2113 dVAR;
5f66b61c 2114 OP *version = NULL;
44dcb63b 2115 char *d;
5db06880
NC
2116#ifdef PERL_MAD
2117 I32 startoff = s - SvPVX(PL_linestr);
2118#endif
89bfa8cd 2119
7918f24d
NC
2120 PERL_ARGS_ASSERT_FORCE_VERSION;
2121
29595ff2 2122 s = SKIPSPACE1(s);
89bfa8cd 2123
44dcb63b 2124 d = s;
dd629d5b 2125 if (*d == 'v')
44dcb63b 2126 d++;
44dcb63b 2127 if (isDIGIT(*d)) {
e759cc13
RGS
2128 while (isDIGIT(*d) || *d == '_' || *d == '.')
2129 d++;
5db06880
NC
2130#ifdef PERL_MAD
2131 if (PL_madskills) {
cd81e915 2132 start_force(PL_curforce);
5db06880
NC
2133 curmad('X', newSVpvn(s,d-s));
2134 }
2135#endif
4e4da3ac 2136 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2137 SV *ver;
8d08d9ba
DG
2138#ifdef USE_LOCALE_NUMERIC
2139 char *loc = setlocale(LC_NUMERIC, "C");
2140#endif
6154021b 2141 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2142#ifdef USE_LOCALE_NUMERIC
2143 setlocale(LC_NUMERIC, loc);
2144#endif
6154021b 2145 version = pl_yylval.opval;
dd629d5b
GS
2146 ver = cSVOPx(version)->op_sv;
2147 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2148 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2149 SvNV_set(ver, str_to_version(ver));
1571675a 2150 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2151 }
89bfa8cd 2152 }
5db06880
NC
2153 else if (guessing) {
2154#ifdef PERL_MAD
2155 if (PL_madskills) {
cd81e915
NC
2156 sv_free(PL_nextwhite); /* let next token collect whitespace */
2157 PL_nextwhite = 0;
5db06880
NC
2158 s = SvPVX(PL_linestr) + startoff;
2159 }
2160#endif
e759cc13 2161 return s;
5db06880 2162 }
89bfa8cd 2163 }
2164
5db06880
NC
2165#ifdef PERL_MAD
2166 if (PL_madskills && !version) {
cd81e915
NC
2167 sv_free(PL_nextwhite); /* let next token collect whitespace */
2168 PL_nextwhite = 0;
5db06880
NC
2169 s = SvPVX(PL_linestr) + startoff;
2170 }
2171#endif
89bfa8cd 2172 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2173 start_force(PL_curforce);
9ded7720 2174 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2175 force_next(WORD);
89bfa8cd 2176
e759cc13 2177 return s;
89bfa8cd 2178}
2179
ffb4593c 2180/*
91152fc1
DG
2181 * S_force_strict_version
2182 * Forces the next token to be a version number using strict syntax rules.
2183 */
2184
2185STATIC char *
2186S_force_strict_version(pTHX_ char *s)
2187{
2188 dVAR;
2189 OP *version = NULL;
2190#ifdef PERL_MAD
2191 I32 startoff = s - SvPVX(PL_linestr);
2192#endif
2193 const char *errstr = NULL;
2194
2195 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2196
2197 while (isSPACE(*s)) /* leading whitespace */
2198 s++;
2199
2200 if (is_STRICT_VERSION(s,&errstr)) {
2201 SV *ver = newSV(0);
2202 s = (char *)scan_version(s, ver, 0);
2203 version = newSVOP(OP_CONST, 0, ver);
2204 }
4e4da3ac
Z
2205 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2206 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2207 {
91152fc1
DG
2208 PL_bufptr = s;
2209 if (errstr)
2210 yyerror(errstr); /* version required */
2211 return s;
2212 }
2213
2214#ifdef PERL_MAD
2215 if (PL_madskills && !version) {
2216 sv_free(PL_nextwhite); /* let next token collect whitespace */
2217 PL_nextwhite = 0;
2218 s = SvPVX(PL_linestr) + startoff;
2219 }
2220#endif
2221 /* NOTE: The parser sees the package name and the VERSION swapped */
2222 start_force(PL_curforce);
2223 NEXTVAL_NEXTTOKE.opval = version;
2224 force_next(WORD);
2225
2226 return s;
2227}
2228
2229/*
ffb4593c
NT
2230 * S_tokeq
2231 * Tokenize a quoted string passed in as an SV. It finds the next
2232 * chunk, up to end of string or a backslash. It may make a new
2233 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2234 * turns \\ into \.
2235 */
2236
76e3520e 2237STATIC SV *
cea2e8a9 2238S_tokeq(pTHX_ SV *sv)
79072805 2239{
97aff369 2240 dVAR;
79072805
LW
2241 register char *s;
2242 register char *send;
2243 register char *d;
b3ac6de7
IZ
2244 STRLEN len = 0;
2245 SV *pv = sv;
79072805 2246
7918f24d
NC
2247 PERL_ARGS_ASSERT_TOKEQ;
2248
79072805 2249 if (!SvLEN(sv))
b3ac6de7 2250 goto finish;
79072805 2251
a0d0e21e 2252 s = SvPV_force(sv, len);
21a311ee 2253 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2254 goto finish;
463ee0b2 2255 send = s + len;
dcb21ed6
NC
2256 /* This is relying on the SV being "well formed" with a trailing '\0' */
2257 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2258 s++;
2259 if (s == send)
b3ac6de7 2260 goto finish;
79072805 2261 d = s;
be4731d2 2262 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2263 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2264 }
79072805
LW
2265 while (s < send) {
2266 if (*s == '\\') {
a0d0e21e 2267 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2268 s++; /* all that, just for this */
2269 }
2270 *d++ = *s++;
2271 }
2272 *d = '\0';
95a20fc0 2273 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2274 finish:
3280af22 2275 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2276 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2277 return sv;
2278}
2279
ffb4593c
NT
2280/*
2281 * Now come three functions related to double-quote context,
2282 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2283 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2284 * interact with PL_lex_state, and create fake ( ... ) argument lists
2285 * to handle functions and concatenation.
2286 * They assume that whoever calls them will be setting up a fake
2287 * join call, because each subthing puts a ',' after it. This lets
2288 * "lower \luPpEr"
2289 * become
2290 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2291 *
2292 * (I'm not sure whether the spurious commas at the end of lcfirst's
2293 * arguments and join's arguments are created or not).
2294 */
2295
2296/*
2297 * S_sublex_start
6154021b 2298 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2299 *
2300 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2301 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2302 *
2303 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2304 *
2305 * Everything else becomes a FUNC.
2306 *
2307 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2308 * had an OP_CONST or OP_READLINE). This just sets us up for a
2309 * call to S_sublex_push().
2310 */
2311
76e3520e 2312STATIC I32
cea2e8a9 2313S_sublex_start(pTHX)
79072805 2314{
97aff369 2315 dVAR;
6154021b 2316 register const I32 op_type = pl_yylval.ival;
79072805
LW
2317
2318 if (op_type == OP_NULL) {
6154021b 2319 pl_yylval.opval = PL_lex_op;
5f66b61c 2320 PL_lex_op = NULL;
79072805
LW
2321 return THING;
2322 }
2323 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2324 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2325
2326 if (SvTYPE(sv) == SVt_PVIV) {
2327 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2328 STRLEN len;
96a5add6 2329 const char * const p = SvPV_const(sv, len);
740cce10 2330 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2331 SvREFCNT_dec(sv);
2332 sv = nsv;
4e553d73 2333 }
6154021b 2334 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2335 PL_lex_stuff = NULL;
6f33ba73
RGS
2336 /* Allow <FH> // "foo" */
2337 if (op_type == OP_READLINE)
2338 PL_expect = XTERMORDORDOR;
79072805
LW
2339 return THING;
2340 }
e3f73d4e
RGS
2341 else if (op_type == OP_BACKTICK && PL_lex_op) {
2342 /* readpipe() vas overriden */
2343 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2344 pl_yylval.opval = PL_lex_op;
9b201d7d 2345 PL_lex_op = NULL;
e3f73d4e
RGS
2346 PL_lex_stuff = NULL;
2347 return THING;
2348 }
79072805 2349
3280af22 2350 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2351 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2352 PL_sublex_info.sub_op = PL_lex_op;
2353 PL_lex_state = LEX_INTERPPUSH;
55497cff 2354
3280af22
NIS
2355 PL_expect = XTERM;
2356 if (PL_lex_op) {
6154021b 2357 pl_yylval.opval = PL_lex_op;
5f66b61c 2358 PL_lex_op = NULL;
55497cff 2359 return PMFUNC;
2360 }
2361 else
2362 return FUNC;
2363}
2364
ffb4593c
NT
2365/*
2366 * S_sublex_push
2367 * Create a new scope to save the lexing state. The scope will be
2368 * ended in S_sublex_done. Returns a '(', starting the function arguments
2369 * to the uc, lc, etc. found before.
2370 * Sets PL_lex_state to LEX_INTERPCONCAT.
2371 */
2372
76e3520e 2373STATIC I32
cea2e8a9 2374S_sublex_push(pTHX)
55497cff 2375{
27da23d5 2376 dVAR;
f46d017c 2377 ENTER;
55497cff 2378
3280af22 2379 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2380 SAVEBOOL(PL_lex_dojoin);
3280af22 2381 SAVEI32(PL_lex_brackets);
3280af22
NIS
2382 SAVEI32(PL_lex_casemods);
2383 SAVEI32(PL_lex_starts);
651b5b28 2384 SAVEI8(PL_lex_state);
7766f137 2385 SAVEVPTR(PL_lex_inpat);
98246f1e 2386 SAVEI16(PL_lex_inwhat);
57843af0 2387 SAVECOPLINE(PL_curcop);
3280af22 2388 SAVEPPTR(PL_bufptr);
8452ff4b 2389 SAVEPPTR(PL_bufend);
3280af22
NIS
2390 SAVEPPTR(PL_oldbufptr);
2391 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2392 SAVEPPTR(PL_last_lop);
2393 SAVEPPTR(PL_last_uni);
3280af22
NIS
2394 SAVEPPTR(PL_linestart);
2395 SAVESPTR(PL_linestr);
8edd5f42
RGS
2396 SAVEGENERICPV(PL_lex_brackstack);
2397 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2398
2399 PL_linestr = PL_lex_stuff;
a0714e2c 2400 PL_lex_stuff = NULL;
3280af22 2401
9cbb5ea2
GS
2402 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2403 = SvPVX(PL_linestr);
3280af22 2404 PL_bufend += SvCUR(PL_linestr);
bd61b366 2405 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2406 SAVEFREESV(PL_linestr);
2407
2408 PL_lex_dojoin = FALSE;
2409 PL_lex_brackets = 0;
a02a5408
JC
2410 Newx(PL_lex_brackstack, 120, char);
2411 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2412 PL_lex_casemods = 0;
2413 *PL_lex_casestack = '\0';
2414 PL_lex_starts = 0;
2415 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2416 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2417
2418 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2419 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2420 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2421 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2422 else
5f66b61c 2423 PL_lex_inpat = NULL;
79072805 2424
55497cff 2425 return '(';
79072805
LW
2426}
2427
ffb4593c
NT
2428/*
2429 * S_sublex_done
2430 * Restores lexer state after a S_sublex_push.
2431 */
2432
76e3520e 2433STATIC I32
cea2e8a9 2434S_sublex_done(pTHX)
79072805 2435{
27da23d5 2436 dVAR;
3280af22 2437 if (!PL_lex_starts++) {
396482e1 2438 SV * const sv = newSVpvs("");
9aa983d2
JH
2439 if (SvUTF8(PL_linestr))
2440 SvUTF8_on(sv);
3280af22 2441 PL_expect = XOPERATOR;
6154021b 2442 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2443 return THING;
2444 }
2445
3280af22
NIS
2446 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2447 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2448 return yylex();
79072805
LW
2449 }
2450
ffb4593c 2451 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2452 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2453 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2454 PL_linestr = PL_lex_repl;
2455 PL_lex_inpat = 0;
2456 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2457 PL_bufend += SvCUR(PL_linestr);
bd61b366 2458 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2459 SAVEFREESV(PL_linestr);
2460 PL_lex_dojoin = FALSE;
2461 PL_lex_brackets = 0;
3280af22
NIS
2462 PL_lex_casemods = 0;
2463 *PL_lex_casestack = '\0';
2464 PL_lex_starts = 0;
25da4f38 2465 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2466 PL_lex_state = LEX_INTERPNORMAL;
2467 PL_lex_starts++;
e9fa98b2
HS
2468 /* we don't clear PL_lex_repl here, so that we can check later
2469 whether this is an evalled subst; that means we rely on the
2470 logic to ensure sublex_done() is called again only via the
2471 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2472 }
e9fa98b2 2473 else {
3280af22 2474 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2475 PL_lex_repl = NULL;
e9fa98b2 2476 }
79072805 2477 return ',';
ffed7fef
LW
2478 }
2479 else {
5db06880
NC
2480#ifdef PERL_MAD
2481 if (PL_madskills) {
cd81e915
NC
2482 if (PL_thiswhite) {
2483 if (!PL_endwhite)
6b29d1f5 2484 PL_endwhite = newSVpvs("");
cd81e915
NC
2485 sv_catsv(PL_endwhite, PL_thiswhite);
2486 PL_thiswhite = 0;
2487 }
2488 if (PL_thistoken)
76f68e9b 2489 sv_setpvs(PL_thistoken,"");
5db06880 2490 else
cd81e915 2491 PL_realtokenstart = -1;
5db06880
NC
2492 }
2493#endif
f46d017c 2494 LEAVE;
3280af22
NIS
2495 PL_bufend = SvPVX(PL_linestr);
2496 PL_bufend += SvCUR(PL_linestr);
2497 PL_expect = XOPERATOR;
09bef843 2498 PL_sublex_info.sub_inwhat = 0;
79072805 2499 return ')';
ffed7fef
LW
2500 }
2501}
2502
02aa26ce
NT
2503/*
2504 scan_const
2505
2506 Extracts a pattern, double-quoted string, or transliteration. This
2507 is terrifying code.
2508
94def140 2509 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2510 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2511 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2512
94def140
TS
2513 Returns a pointer to the character scanned up to. If this is
2514 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2515 successfully parsed), will leave an OP for the substring scanned
6154021b 2516 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2517 by looking at the next characters herself.
2518
02aa26ce
NT
2519 In patterns:
2520 backslashes:
ff3f963a 2521 constants: \N{NAME} only
02aa26ce
NT
2522 case and quoting: \U \Q \E
2523 stops on @ and $, but not for $ as tail anchor
2524
2525 In transliterations:
2526 characters are VERY literal, except for - not at the start or end
94def140
TS
2527 of the string, which indicates a range. If the range is in bytes,
2528 scan_const expands the range to the full set of intermediate
2529 characters. If the range is in utf8, the hyphen is replaced with
2530 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2531
2532 In double-quoted strings:
2533 backslashes:
2534 double-quoted style: \r and \n
ff3f963a 2535 constants: \x31, etc.
94def140 2536 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2537 case and quoting: \U \Q \E
2538 stops on @ and $
2539
2540 scan_const does *not* construct ops to handle interpolated strings.
2541 It stops processing as soon as it finds an embedded $ or @ variable
2542 and leaves it to the caller to work out what's going on.
2543
94def140
TS
2544 embedded arrays (whether in pattern or not) could be:
2545 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2546
2547 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2548
2549 $ in pattern could be $foo or could be tail anchor. Assumption:
2550 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2551 followed by one of "()| \r\n\t"
02aa26ce
NT
2552
2553 \1 (backreferences) are turned into $1
2554
2555 The structure of the code is
2556 while (there's a character to process) {
94def140
TS
2557 handle transliteration ranges
2558 skip regexp comments /(?#comment)/ and codes /(?{code})/
2559 skip #-initiated comments in //x patterns
2560 check for embedded arrays
02aa26ce
NT
2561 check for embedded scalars
2562 if (backslash) {
94def140 2563 deprecate \1 in substitution replacements
02aa26ce
NT
2564 handle string-changing backslashes \l \U \Q \E, etc.
2565 switch (what was escaped) {
94def140 2566 handle \- in a transliteration (becomes a literal -)
ff3f963a 2567 if a pattern and not \N{, go treat as regular character
94def140
TS
2568 handle \132 (octal characters)
2569 handle \x15 and \x{1234} (hex characters)
ff3f963a 2570 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2571 handle \cV (control characters)
2572 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2573 } (end switch)
77a135fe 2574 continue
02aa26ce 2575 } (end if backslash)
77a135fe 2576 handle regular character
02aa26ce 2577 } (end while character to read)
4e553d73 2578
02aa26ce
NT
2579*/
2580
76e3520e 2581STATIC char *
cea2e8a9 2582S_scan_const(pTHX_ char *start)
79072805 2583{
97aff369 2584 dVAR;
3280af22 2585 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2586 SV *sv = newSV(send - start); /* sv for the constant. See
2587 note below on sizing. */
02aa26ce
NT
2588 register char *s = start; /* start of the constant */
2589 register char *d = SvPVX(sv); /* destination for copies */
2590 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2591 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2592 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2593 I32 this_utf8 = UTF; /* Is the source string assumed
2594 to be UTF8? But, this can
2595 show as true when the source
2596 isn't utf8, as for example
2597 when it is entirely composed
2598 of hex constants */
2599
2600 /* Note on sizing: The scanned constant is placed into sv, which is
2601 * initialized by newSV() assuming one byte of output for every byte of
2602 * input. This routine expects newSV() to allocate an extra byte for a
2603 * trailing NUL, which this routine will append if it gets to the end of
2604 * the input. There may be more bytes of input than output (eg., \N{LATIN
2605 * CAPITAL LETTER A}), or more output than input if the constant ends up
2606 * recoded to utf8, but each time a construct is found that might increase
2607 * the needed size, SvGROW() is called. Its size parameter each time is
2608 * based on the best guess estimate at the time, namely the length used so
2609 * far, plus the length the current construct will occupy, plus room for
2610 * the trailing NUL, plus one byte for every input byte still unscanned */
2611
012bcf8d 2612 UV uv;
4c3a8340
TS
2613#ifdef EBCDIC
2614 UV literal_endpoint = 0;
e294cc5d 2615 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2616#endif
012bcf8d 2617
7918f24d
NC
2618 PERL_ARGS_ASSERT_SCAN_CONST;
2619
bb16bae8 2620 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2621 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2622 /* If we are doing a trans and we know we want UTF8 set expectation */
2623 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2624 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2625 }
2626
2627
79072805 2628 while (s < send || dorange) {
ff3f963a 2629
02aa26ce 2630 /* get transliterations out of the way (they're most literal) */
3280af22 2631 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2632 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2633 if (dorange) {
1ba5c669
JH
2634 I32 i; /* current expanded character */
2635 I32 min; /* first character in range */
2636 I32 max; /* last character in range */
02aa26ce 2637
e294cc5d
JH
2638#ifdef EBCDIC
2639 UV uvmax = 0;
2640#endif
2641
2642 if (has_utf8
2643#ifdef EBCDIC
2644 && !native_range
2645#endif
2646 ) {
9d4ba2ae 2647 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2648 char *e = d++;
2649 while (e-- > c)
2650 *(e + 1) = *e;
25716404 2651 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2652 /* mark the range as done, and continue */
2653 dorange = FALSE;
2654 didrange = TRUE;
2655 continue;
2656 }
2b9d42f0 2657
95a20fc0 2658 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2659#ifdef EBCDIC
2660 SvGROW(sv,
2661 SvLEN(sv) + (has_utf8 ?
2662 (512 - UTF_CONTINUATION_MARK +
2663 UNISKIP(0x100))
2664 : 256));
2665 /* How many two-byte within 0..255: 128 in UTF-8,
2666 * 96 in UTF-8-mod. */
2667#else
9cbb5ea2 2668 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2669#endif
9cbb5ea2 2670 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2671#ifdef EBCDIC
2672 if (has_utf8) {
2673 int j;
2674 for (j = 0; j <= 1; j++) {
2675 char * const c = (char*)utf8_hop((U8*)d, -1);
2676 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2677 if (j)
2678 min = (U8)uv;
2679 else if (uv < 256)
2680 max = (U8)uv;
2681 else {
2682 max = (U8)0xff; /* only to \xff */
2683 uvmax = uv; /* \x{100} to uvmax */
2684 }
2685 d = c; /* eat endpoint chars */
2686 }
2687 }
2688 else {
2689#endif
2690 d -= 2; /* eat the first char and the - */
2691 min = (U8)*d; /* first char in range */
2692 max = (U8)d[1]; /* last char in range */
2693#ifdef EBCDIC
2694 }
2695#endif
8ada0baa 2696
c2e66d9e 2697 if (min > max) {
01ec43d0 2698 Perl_croak(aTHX_
d1573ac7 2699 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2700 (char)min, (char)max);
c2e66d9e
GS
2701 }
2702
c7f1f016 2703#ifdef EBCDIC
4c3a8340
TS
2704 if (literal_endpoint == 2 &&
2705 ((isLOWER(min) && isLOWER(max)) ||
2706 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2707 if (isLOWER(min)) {
2708 for (i = min; i <= max; i++)
2709 if (isLOWER(i))
db42d148 2710 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2711 } else {
2712 for (i = min; i <= max; i++)
2713 if (isUPPER(i))
db42d148 2714 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2715 }
2716 }
2717 else
2718#endif
2719 for (i = min; i <= max; i++)
e294cc5d
JH
2720#ifdef EBCDIC
2721 if (has_utf8) {
2722 const U8 ch = (U8)NATIVE_TO_UTF(i);
2723 if (UNI_IS_INVARIANT(ch))
2724 *d++ = (U8)i;
2725 else {
2726 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2727 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2728 }
2729 }
2730 else
2731#endif
2732 *d++ = (char)i;
2733
2734#ifdef EBCDIC
2735 if (uvmax) {
2736 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2737 if (uvmax > 0x101)
2738 *d++ = (char)UTF_TO_NATIVE(0xff);
2739 if (uvmax > 0x100)
2740 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2741 }
2742#endif
02aa26ce
NT
2743
2744 /* mark the range as done, and continue */
79072805 2745 dorange = FALSE;
01ec43d0 2746 didrange = TRUE;
4c3a8340
TS
2747#ifdef EBCDIC
2748 literal_endpoint = 0;
2749#endif
79072805 2750 continue;
4e553d73 2751 }
02aa26ce
NT
2752
2753 /* range begins (ignore - as first or last char) */
79072805 2754 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2755 if (didrange) {
1fafa243 2756 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2757 }
e294cc5d
JH
2758 if (has_utf8
2759#ifdef EBCDIC
2760 && !native_range
2761#endif
2762 ) {
25716404 2763 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2764 s++;
2765 continue;
2766 }
79072805
LW
2767 dorange = TRUE;
2768 s++;
01ec43d0
GS
2769 }
2770 else {
2771 didrange = FALSE;
4c3a8340
TS
2772#ifdef EBCDIC
2773 literal_endpoint = 0;
e294cc5d 2774 native_range = TRUE;
4c3a8340 2775#endif
01ec43d0 2776 }
79072805 2777 }
02aa26ce
NT
2778
2779 /* if we get here, we're not doing a transliteration */
2780
0f5d15d6
IZ
2781 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2782 except for the last char, which will be done separately. */
3280af22 2783 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2784 if (s[2] == '#') {
e994fd66 2785 while (s+1 < send && *s != ')')
db42d148 2786 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2787 }
2788 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2789 || (s[2] == '?' && s[3] == '{'))
155aba94 2790 {
cc6b7395 2791 I32 count = 1;
0f5d15d6 2792 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2793 char c;
2794
d9f97599
GS
2795 while (count && (c = *regparse)) {
2796 if (c == '\\' && regparse[1])
2797 regparse++;
4e553d73 2798 else if (c == '{')
cc6b7395 2799 count++;
4e553d73 2800 else if (c == '}')
cc6b7395 2801 count--;
d9f97599 2802 regparse++;
cc6b7395 2803 }
e994fd66 2804 if (*regparse != ')')
5bdf89e7 2805 regparse--; /* Leave one char for continuation. */
0f5d15d6 2806 while (s < regparse)
db42d148 2807 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2808 }
748a9306 2809 }
02aa26ce
NT
2810
2811 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2812 else if (*s == '#' && PL_lex_inpat &&
2813 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2814 while (s+1 < send && *s != '\n')
db42d148 2815 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2816 }
02aa26ce 2817
5d1d4326 2818 /* check for embedded arrays
da6eedaa 2819 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2820 */
1749ea0d
TS
2821 else if (*s == '@' && s[1]) {
2822 if (isALNUM_lazy_if(s+1,UTF))
2823 break;
2824 if (strchr(":'{$", s[1]))
2825 break;
2826 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2827 break; /* in regexp, neither @+ nor @- are interpolated */
2828 }
02aa26ce
NT
2829
2830 /* check for embedded scalars. only stop if we're sure it's a
2831 variable.
2832 */
79072805 2833 else if (*s == '$') {
3280af22 2834 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2835 break;
77772344 2836 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2837 if (s[1] == '\\') {
2838 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2839 "Possible unintended interpolation of $\\ in regex");
77772344 2840 }
79072805 2841 break; /* in regexp, $ might be tail anchor */
77772344 2842 }
79072805 2843 }
02aa26ce 2844
2b9d42f0
NIS
2845 /* End of else if chain - OP_TRANS rejoin rest */
2846
02aa26ce 2847 /* backslashes */
79072805 2848 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2849 char* e; /* Can be used for ending '}', etc. */
2850
79072805 2851 s++;
02aa26ce 2852
7d0fc23c
KW
2853 /* warn on \1 - \9 in substitution replacements, but note that \11
2854 * is an octal; and \19 is \1 followed by '9' */
3280af22 2855 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2856 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2857 {
a2a5de95 2858 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2859 *--s = '$';
2860 break;
2861 }
02aa26ce
NT
2862
2863 /* string-change backslash escapes */
3280af22 2864 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2865 --s;
2866 break;
2867 }
ff3f963a
KW
2868 /* In a pattern, process \N, but skip any other backslash escapes.
2869 * This is because we don't want to translate an escape sequence
2870 * into a meta symbol and have the regex compiler use the meta
2871 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2872 * in spite of this, we do have to process \N here while the proper
2873 * charnames handler is in scope. See bugs #56444 and #62056.
2874 * There is a complication because \N in a pattern may also stand
2875 * for 'match a non-nl', and not mean a charname, in which case its
2876 * processing should be deferred to the regex compiler. To be a
2877 * charname it must be followed immediately by a '{', and not look
2878 * like \N followed by a curly quantifier, i.e., not something like
2879 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2880 * quantifier */
2881 else if (PL_lex_inpat
2882 && (*s != 'N'
2883 || s[1] != '{'
2884 || regcurly(s + 1)))
2885 {
cc74c5bd
TS
2886 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2887 goto default_action;
2888 }
02aa26ce 2889
79072805 2890 switch (*s) {
02aa26ce
NT
2891
2892 /* quoted - in transliterations */
79072805 2893 case '-':
3280af22 2894 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2895 *d++ = *s++;
2896 continue;
2897 }
2898 /* FALL THROUGH */
2899 default:
11b8faa4 2900 {
a2a5de95
NC
2901 if ((isALPHA(*s) || isDIGIT(*s)))
2902 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2903 "Unrecognized escape \\%c passed through",
2904 *s);
11b8faa4 2905 /* default action is to copy the quoted character */
f9a63242 2906 goto default_action;
11b8faa4 2907 }
02aa26ce 2908
632403cc 2909 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2910 case '0': case '1': case '2': case '3':
2911 case '4': case '5': case '6': case '7':
ba210ebe 2912 {
53305cf1
NC
2913 I32 flags = 0;
2914 STRLEN len = 3;
77a135fe 2915 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2916 s += len;
2917 }
012bcf8d 2918 goto NUM_ESCAPE_INSERT;
02aa26ce 2919
f0a2b745
KW
2920 /* eg. \o{24} indicates the octal constant \024 */
2921 case 'o':
2922 {
2923 STRLEN len;
454155d9 2924 const char* error;
f0a2b745 2925
454155d9 2926 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2927 s += len;
454155d9 2928 if (! valid) {
f0a2b745
KW
2929 yyerror(error);
2930 continue;
2931 }
2932 goto NUM_ESCAPE_INSERT;
2933 }
2934
77a135fe 2935 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2936 case 'x':
a0ed51b3
LW
2937 ++s;
2938 if (*s == '{') {
9d4ba2ae 2939 char* const e = strchr(s, '}');
a4c04bdc
NC
2940 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2941 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2942 STRLEN len;
355860ce 2943
53305cf1 2944 ++s;
adaeee49 2945 if (!e) {
a0ed51b3 2946 yyerror("Missing right brace on \\x{}");
355860ce 2947 continue;
ba210ebe 2948 }
53305cf1 2949 len = e - s;
77a135fe 2950 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2951 s = e + 1;
a0ed51b3
LW
2952 }
2953 else {
ba210ebe 2954 {
53305cf1 2955 STRLEN len = 2;
a4c04bdc 2956 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2957 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2958 s += len;
2959 }
012bcf8d
GS
2960 }
2961
2962 NUM_ESCAPE_INSERT:
ff3f963a
KW
2963 /* Insert oct or hex escaped character. There will always be
2964 * enough room in sv since such escapes will be longer than any
2965 * UTF-8 sequence they can end up as, except if they force us
2966 * to recode the rest of the string into utf8 */
ba7cea30 2967
77a135fe 2968 /* Here uv is the ordinal of the next character being added in
ff3f963a 2969 * unicode (converted from native). */
77a135fe 2970 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2971 if (!has_utf8 && uv > 255) {
77a135fe
KW
2972 /* Might need to recode whatever we have accumulated so
2973 * far if it contains any chars variant in utf8 or
2974 * utf-ebcdic. */
2975
2976 SvCUR_set(sv, d - SvPVX_const(sv));
2977 SvPOK_on(sv);
2978 *d = '\0';
77a135fe 2979 /* See Note on sizing above. */
7bf79863
KW
2980 sv_utf8_upgrade_flags_grow(sv,
2981 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2982 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2983 d = SvPVX(sv) + SvCUR(sv);
2984 has_utf8 = TRUE;
012bcf8d
GS
2985 }
2986
77a135fe
KW
2987 if (has_utf8) {
2988 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2989 if (PL_lex_inwhat == OP_TRANS &&
2990 PL_sublex_info.sub_op) {
2991 PL_sublex_info.sub_op->op_private |=
2992 (PL_lex_repl ? OPpTRANS_FROM_UTF
2993 : OPpTRANS_TO_UTF);
f9a63242 2994 }
e294cc5d
JH
2995#ifdef EBCDIC
2996 if (uv > 255 && !dorange)
2997 native_range = FALSE;
2998#endif
012bcf8d 2999 }
a0ed51b3 3000 else {
012bcf8d 3001 *d++ = (char)uv;
a0ed51b3 3002 }
012bcf8d
GS
3003 }
3004 else {
c4d5f83a 3005 *d++ = (char) uv;
a0ed51b3 3006 }
79072805 3007 continue;
02aa26ce 3008
4a2d328f 3009 case 'N':
ff3f963a
KW
3010 /* In a non-pattern \N must be a named character, like \N{LATIN
3011 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3012 * mean to match a non-newline. For non-patterns, named
3013 * characters are converted to their string equivalents. In
3014 * patterns, named characters are not converted to their
3015 * ultimate forms for the same reasons that other escapes
3016 * aren't. Instead, they are converted to the \N{U+...} form
3017 * to get the value from the charnames that is in effect right
3018 * now, while preserving the fact that it was a named character
3019 * so that the regex compiler knows this */
3020
3021 /* This section of code doesn't generally use the
3022 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3023 * a close examination of this macro and determined it is a
3024 * no-op except on utfebcdic variant characters. Every
3025 * character generated by this that would normally need to be
3026 * enclosed by this macro is invariant, so the macro is not
3027 * needed, and would complicate use of copy(). There are other
3028 * parts of this file where the macro is used inconsistently,
3029 * but are saved by it being a no-op */
3030
3031 /* The structure of this section of code (besides checking for
3032 * errors and upgrading to utf8) is:
3033 * Further disambiguate between the two meanings of \N, and if
3034 * not a charname, go process it elsewhere
0a96133f
KW
3035 * If of form \N{U+...}, pass it through if a pattern;
3036 * otherwise convert to utf8
3037 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3038 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3039
3040 /* Here, s points to the 'N'; the test below is guaranteed to
3041 * succeed if we are being called on a pattern as we already
3042 * know from a test above that the next character is a '{'.
3043 * On a non-pattern \N must mean 'named sequence, which
3044 * requires braces */
3045 s++;
3046 if (*s != '{') {
3047 yyerror("Missing braces on \\N{}");
3048 continue;
3049 }
3050 s++;
3051
0a96133f 3052 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3053 if (! (e = strchr(s, '}'))) {
3054 if (! PL_lex_inpat) {
5777a3f7 3055 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3056 } else {
3057 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3058 }
0a96133f 3059 continue;
ff3f963a 3060 }
cddc7ef4 3061
ff3f963a 3062 /* Here it looks like a named character */
cddc7ef4 3063
ff3f963a
KW
3064 if (PL_lex_inpat) {
3065
3066 /* XXX This block is temporary code. \N{} implies that the
3067 * pattern is to have Unicode semantics, and therefore
3068 * currently has to be encoded in utf8. By putting it in
3069 * utf8 now, we save a whole pass in the regular expression
3070 * compiler. Once that code is changed so Unicode
3071 * semantics doesn't necessarily have to be in utf8, this
3072 * block should be removed */
3073 if (!has_utf8) {
77a135fe 3074 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3075 SvPOK_on(sv);
e4f3eed8 3076 *d = '\0';
77a135fe 3077 /* See Note on sizing above. */
7bf79863 3078 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3079 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3080 /* 5 = '\N{' + cur char + NUL */
3081 (STRLEN)(send - s) + 5);
f08d6ad9 3082 d = SvPVX(sv) + SvCUR(sv);
89491803 3083 has_utf8 = TRUE;
ff3f963a
KW
3084 }
3085 }
423cee85 3086
ff3f963a
KW
3087 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3088 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3089 | PERL_SCAN_DISALLOW_PREFIX;
3090 STRLEN len;
3091
3092 /* For \N{U+...}, the '...' is a unicode value even on
3093 * EBCDIC machines */
3094 s += 2; /* Skip to next char after the 'U+' */
3095 len = e - s;
3096 uv = grok_hex(s, &len, &flags, NULL);
3097 if (len == 0 || len != (STRLEN)(e - s)) {
3098 yyerror("Invalid hexadecimal number in \\N{U+...}");
3099 s = e + 1;
3100 continue;
3101 }
3102
3103 if (PL_lex_inpat) {
3104
3105 /* Pass through to the regex compiler unchanged. The
3106 * reason we evaluated the number above is to make sure
0a96133f 3107 * there wasn't a syntax error. */
ff3f963a
KW
3108 s -= 5; /* Include the '\N{U+' */
3109 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3110 d += e - s + 1;
3111 }
3112 else { /* Not a pattern: convert the hex to string */
3113
3114 /* If destination is not in utf8, unconditionally
3115 * recode it to be so. This is because \N{} implies
3116 * Unicode semantics, and scalars have to be in utf8
3117 * to guarantee those semantics */
3118 if (! has_utf8) {
3119 SvCUR_set(sv, d - SvPVX_const(sv));
3120 SvPOK_on(sv);
3121 *d = '\0';
3122 /* See Note on sizing above. */
3123 sv_utf8_upgrade_flags_grow(
3124 sv,
3125 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3126 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3127 d = SvPVX(sv) + SvCUR(sv);
3128 has_utf8 = TRUE;
3129 }
3130
3131 /* Add the string to the output */
3132 if (UNI_IS_INVARIANT(uv)) {
3133 *d++ = (char) uv;
3134 }
3135 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3136 }
3137 }
3138 else { /* Here is \N{NAME} but not \N{U+...}. */
3139
3140 SV *res; /* result from charnames */
3141 const char *str; /* the string in 'res' */
3142 STRLEN len; /* its length */
3143
3144 /* Get the value for NAME */
3145 res = newSVpvn(s, e - s);
3146 res = new_constant( NULL, 0, "charnames",
3147 /* includes all of: \N{...} */
3148 res, NULL, s - 3, e - s + 4 );
3149
3150 /* Most likely res will be in utf8 already since the
3151 * standard charnames uses pack U, but a custom translator
3152 * can leave it otherwise, so make sure. XXX This can be
3153 * revisited to not have charnames use utf8 for characters
3154 * that don't need it when regexes don't have to be in utf8
3155 * for Unicode semantics. If doing so, remember EBCDIC */
3156 sv_utf8_upgrade(res);
3157 str = SvPV_const(res, len);
3158
3159 /* Don't accept malformed input */
3160 if (! is_utf8_string((U8 *) str, len)) {
3161 yyerror("Malformed UTF-8 returned by \\N");
3162 }
3163 else if (PL_lex_inpat) {
3164
3165 if (! len) { /* The name resolved to an empty string */
3166 Copy("\\N{}", d, 4, char);
3167 d += 4;
3168 }
3169 else {
3170 /* In order to not lose information for the regex
3171 * compiler, pass the result in the specially made
3172 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3173 * the code points in hex of each character
3174 * returned by charnames */
3175
3176 const char *str_end = str + len;
3177 STRLEN char_length; /* cur char's byte length */
3178 STRLEN output_length; /* and the number of bytes
3179 after this is translated
3180 into hex digits */
3181 const STRLEN off = d - SvPVX_const(sv);
3182
3183 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3184 * max('U+', '.'); and 1 for NUL */
3185 char hex_string[2 * UTF8_MAXBYTES + 5];
3186
3187 /* Get the first character of the result. */
3188 U32 uv = utf8n_to_uvuni((U8 *) str,
3189 len,
3190 &char_length,
3191 UTF8_ALLOW_ANYUV);
3192
3193 /* The call to is_utf8_string() above hopefully
3194 * guarantees that there won't be an error. But
3195 * it's easy here to make sure. The function just
3196 * above warns and returns 0 if invalid utf8, but
3197 * it can also return 0 if the input is validly a
3198 * NUL. Disambiguate */
3199 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3200 uv = UNICODE_REPLACEMENT;
3201 }
3202
3203 /* Convert first code point to hex, including the
3204 * boiler plate before it */
78c35590
NC
3205 output_length =
3206 my_sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
ff3f963a
KW
3207
3208 /* Make sure there is enough space to hold it */
3209 d = off + SvGROW(sv, off
3210 + output_length
3211 + (STRLEN)(send - e)
3212 + 2); /* '}' + NUL */
3213 /* And output it */
3214 Copy(hex_string, d, output_length, char);
3215 d += output_length;
3216
3217 /* For each subsequent character, append dot and
3218 * its ordinal in hex */
3219 while ((str += char_length) < str_end) {
3220 const STRLEN off = d - SvPVX_const(sv);
3221 U32 uv = utf8n_to_uvuni((U8 *) str,
3222 str_end - str,
3223 &char_length,
3224 UTF8_ALLOW_ANYUV);
3225 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3226 uv = UNICODE_REPLACEMENT;
3227 }
3228
78c35590
NC
3229 output_length =
3230 my_sprintf(hex_string, ".%X", (unsigned int) uv);
ff3f963a
KW
3231
3232 d = off + SvGROW(sv, off
3233 + output_length
3234 + (STRLEN)(send - e)
3235 + 2); /* '}' + NUL */
3236 Copy(hex_string, d, output_length, char);
3237 d += output_length;
3238 }
3239
3240 *d++ = '}'; /* Done. Add the trailing brace */
3241 }
3242 }
3243 else { /* Here, not in a pattern. Convert the name to a
3244 * string. */
3245
3246 /* If destination is not in utf8, unconditionally
3247 * recode it to be so. This is because \N{} implies
3248 * Unicode semantics, and scalars have to be in utf8
3249 * to guarantee those semantics */
3250 if (! has_utf8) {
3251 SvCUR_set(sv, d - SvPVX_const(sv));
3252 SvPOK_on(sv);
3253 *d = '\0';
3254 /* See Note on sizing above. */
3255 sv_utf8_upgrade_flags_grow(sv,
3256 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3257 len + (STRLEN)(send - s) + 1);
3258 d = SvPVX(sv) + SvCUR(sv);
3259 has_utf8 = TRUE;
3260 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3261
3262 /* See Note on sizing above. (NOTE: SvCUR() is not
3263 * set correctly here). */
3264 const STRLEN off = d - SvPVX_const(sv);
3265 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3266 }
3267 Copy(str, d, len, char);
3268 d += len;
423cee85 3269 }
423cee85 3270 SvREFCNT_dec(res);
cb233ae3
KW
3271
3272 /* Deprecate non-approved name syntax */
3273 if (ckWARN_d(WARN_DEPRECATED)) {
3274 bool problematic = FALSE;
3275 char* i = s;
3276
3277 /* For non-ut8 input, look to see that the first
3278 * character is an alpha, then loop through the rest
3279 * checking that each is a continuation */
3280 if (! this_utf8) {
3281 if (! isALPHAU(*i)) problematic = TRUE;
3282 else for (i = s + 1; i < e; i++) {
3283 if (isCHARNAME_CONT(*i)) continue;
3284 problematic = TRUE;
3285 break;
3286 }
3287 }
3288 else {
3289 /* Similarly for utf8. For invariants can check
3290 * directly. We accept anything above the latin1
3291 * range because it is immaterial to Perl if it is
3292 * correct or not, and is expensive to check. But
3293 * it is fairly easy in the latin1 range to convert
3294 * the variants into a single character and check
3295 * those */
3296 if (UTF8_IS_INVARIANT(*i)) {
3297 if (! isALPHAU(*i)) problematic = TRUE;
3298 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3299 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3300 *(i+1)))))
3301 {
3302 problematic = TRUE;
3303 }
3304 }
3305 if (! problematic) for (i = s + UTF8SKIP(s);
3306 i < e;
3307 i+= UTF8SKIP(i))
3308 {
3309 if (UTF8_IS_INVARIANT(*i)) {
3310 if (isCHARNAME_CONT(*i)) continue;
3311 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3312 continue;
3313 } else if (isCHARNAME_CONT(
3314 UNI_TO_NATIVE(
3315 UTF8_ACCUMULATE(*i, *(i+1)))))
3316 {
3317 continue;
3318 }
3319 problematic = TRUE;
3320 break;
3321 }
3322 }
3323 if (problematic) {
6e1bad6c
KW
3324 /* The e-i passed to the final %.*s makes sure that
3325 * should the trailing NUL be missing that this
3326 * print won't run off the end of the string */
cb233ae3 3327 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3328 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3329 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3330 }
3331 }
3332 } /* End \N{NAME} */
ff3f963a
KW
3333#ifdef EBCDIC
3334 if (!dorange)
3335 native_range = FALSE; /* \N{} is defined to be Unicode */
3336#endif
3337 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3338 continue;
3339
02aa26ce 3340 /* \c is a control character */
79072805
LW
3341 case 'c':
3342 s++;
961ce445 3343 if (s < send) {
f9d13529 3344 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3345 }
961ce445
RGS
3346 else {
3347 yyerror("Missing control char name in \\c");
3348 }
79072805 3349 continue;
02aa26ce
NT
3350
3351 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3352 case 'b':
db42d148 3353 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3354 break;
3355 case 'n':
db42d148 3356 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3357 break;
3358 case 'r':
db42d148 3359 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3360 break;
3361 case 'f':
db42d148 3362 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3363 break;
3364 case 't':
db42d148 3365 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3366 break;
34a3fe2a 3367 case 'e':
db42d148 3368 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3369 break;
3370 case 'a':
db42d148 3371 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3372 break;
02aa26ce
NT
3373 } /* end switch */
3374
79072805
LW
3375 s++;
3376 continue;
02aa26ce 3377 } /* end if (backslash) */
4c3a8340
TS
3378#ifdef EBCDIC
3379 else
3380 literal_endpoint++;
3381#endif
02aa26ce 3382
f9a63242 3383 default_action:
77a135fe
KW
3384 /* If we started with encoded form, or already know we want it,
3385 then encode the next character */
3386 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3387 STRLEN len = 1;
77a135fe
KW
3388
3389
3390 /* One might think that it is wasted effort in the case of the
3391 * source being utf8 (this_utf8 == TRUE) to take the next character
3392 * in the source, convert it to an unsigned value, and then convert
3393 * it back again. But the source has not been validated here. The
3394 * routine that does the conversion checks for errors like
3395 * malformed utf8 */
3396
5f66b61c
AL
3397 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3398 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3399 if (!has_utf8) {
3400 SvCUR_set(sv, d - SvPVX_const(sv));
3401 SvPOK_on(sv);
3402 *d = '\0';
77a135fe 3403 /* See Note on sizing above. */
7bf79863
KW
3404 sv_utf8_upgrade_flags_grow(sv,
3405 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3406 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3407 d = SvPVX(sv) + SvCUR(sv);
3408 has_utf8 = TRUE;
3409 } else if (need > len) {
3410 /* encoded value larger than old, may need extra space (NOTE:
3411 * SvCUR() is not set correctly here). See Note on sizing
3412 * above. */
9d4ba2ae 3413 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3414 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3415 }
77a135fe
KW
3416 s += len;
3417
5f66b61c 3418 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3419#ifdef EBCDIC
3420 if (uv > 255 && !dorange)
3421 native_range = FALSE;
3422#endif
2b9d42f0
NIS
3423 }
3424 else {
3425 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3426 }
02aa26ce
NT
3427 } /* while loop to process each character */
3428
3429 /* terminate the string and set up the sv */
79072805 3430 *d = '\0';
95a20fc0 3431 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3432 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3433 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3434
79072805 3435 SvPOK_on(sv);
9f4817db 3436 if (PL_encoding && !has_utf8) {
d0063567
DK
3437 sv_recode_to_utf8(sv, PL_encoding);
3438 if (SvUTF8(sv))
3439 has_utf8 = TRUE;
9f4817db 3440 }
2b9d42f0 3441 if (has_utf8) {
7e2040f0 3442 SvUTF8_on(sv);
2b9d42f0 3443 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3444 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3445 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3446 }
3447 }
79072805 3448
02aa26ce 3449 /* shrink the sv if we allocated more than we used */
79072805 3450 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3451 SvPV_shrink_to_cur(sv);
79072805 3452 }
02aa26ce 3453
6154021b 3454 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3455 if (s > PL_bufptr) {
eb0d8d16
NC
3456 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3457 const char *const key = PL_lex_inpat ? "qr" : "q";
3458 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3459 const char *type;
3460 STRLEN typelen;
3461
3462 if (PL_lex_inwhat == OP_TRANS) {
3463 type = "tr";
3464 typelen = 2;
3465 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3466 type = "s";
3467 typelen = 1;
3468 } else {
3469 type = "qq";
3470 typelen = 2;
3471 }
3472
3473 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3474 type, typelen);
3475 }
6154021b 3476 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3477 } else
8990e307 3478 SvREFCNT_dec(sv);
79072805
LW
3479 return s;
3480}
3481
ffb4593c
NT
3482/* S_intuit_more
3483 * Returns TRUE if there's more to the expression (e.g., a subscript),
3484 * FALSE otherwise.
ffb4593c
NT
3485 *
3486 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3487 *
3488 * ->[ and ->{ return TRUE
3489 * { and [ outside a pattern are always subscripts, so return TRUE
3490 * if we're outside a pattern and it's not { or [, then return FALSE
3491 * if we're in a pattern and the first char is a {
3492 * {4,5} (any digits around the comma) returns FALSE
3493 * if we're in a pattern and the first char is a [
3494 * [] returns FALSE
3495 * [SOMETHING] has a funky algorithm to decide whether it's a
3496 * character class or not. It has to deal with things like
3497 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3498 * anything else returns TRUE
3499 */
3500
9cbb5ea2
GS
3501/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3502
76e3520e 3503STATIC int
cea2e8a9 3504S_intuit_more(pTHX_ register char *s)
79072805 3505{
97aff369 3506 dVAR;
7918f24d
NC
3507
3508 PERL_ARGS_ASSERT_INTUIT_MORE;
3509
3280af22 3510 if (PL_lex_brackets)
79072805
LW
3511 return TRUE;
3512 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3513 return TRUE;
3514 if (*s != '{' && *s != '[')
3515 return FALSE;
3280af22 3516 if (!PL_lex_inpat)
79072805
LW
3517 return TRUE;
3518
3519 /* In a pattern, so maybe we have {n,m}. */
3520 if (*s == '{') {
b3155d95 3521 if (regcurly(s)) {
79072805 3522 return FALSE;
b3155d95 3523 }
79072805 3524 return TRUE;
79072805
LW
3525 }
3526
3527 /* On the other hand, maybe we have a character class */
3528
3529 s++;
3530 if (*s == ']' || *s == '^')
3531 return FALSE;
3532 else {
ffb4593c 3533 /* this is terrifying, and it works */
79072805
LW
3534 int weight = 2; /* let's weigh the evidence */
3535 char seen[256];
f27ffc4a 3536 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3537 const char * const send = strchr(s,']');
3280af22 3538 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3539
3540 if (!send) /* has to be an expression */
3541 return TRUE;
3542
3543 Zero(seen,256,char);
3544 if (*s == '$')
3545 weight -= 3;
3546 else if (isDIGIT(*s)) {
3547 if (s[1] != ']') {
3548 if (isDIGIT(s[1]) && s[2] == ']')
3549 weight -= 10;
3550 }
3551 else
3552 weight -= 100;
3553 }
3554 for (; s < send; s++) {
3555 last_un_char = un_char;
3556 un_char = (unsigned char)*s;
3557 switch (*s) {
3558 case '@':
3559 case '&':
3560 case '$':
3561 weight -= seen[un_char] * 10;
7e2040f0 3562 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3563 int len;
8903cb82 3564 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3565 len = (int)strlen(tmpbuf);
3566 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3567 weight -= 100;
3568 else
3569 weight -= 10;
3570 }
3571 else if (*s == '$' && s[1] &&
93a17b20
LW
3572 strchr("[#!%*<>()-=",s[1])) {
3573 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3574 weight -= 10;
3575 else
3576 weight -= 1;
3577 }
3578 break;
3579 case '\\':
3580 un_char = 254;
3581 if (s[1]) {
93a17b20 3582 if (strchr("wds]",s[1]))
79072805 3583 weight += 100;
10edeb5d 3584 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3585 weight += 1;
93a17b20 3586 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3587 weight += 40;
3588 else if (isDIGIT(s[1])) {
3589 weight += 40;
3590 while (s[1] && isDIGIT(s[1]))
3591 s++;
3592 }
3593 }
3594 else
3595 weight += 100;
3596 break;
3597 case '-':
3598 if (s[1] == '\\')
3599 weight += 50;
93a17b20 3600 if (strchr("aA01! ",last_un_char))
79072805 3601 weight += 30;
93a17b20 3602 if (strchr("zZ79~",s[1]))
79072805 3603 weight += 30;
f27ffc4a
GS
3604 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3605 weight -= 5; /* cope with negative subscript */
79072805
LW
3606 break;
3607 default:
3792a11b
NC
3608 if (!isALNUM(last_un_char)
3609 && !(last_un_char == '$' || last_un_char == '@'
3610 || last_un_char == '&')
3611 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3612 char *d = tmpbuf;
3613 while (isALPHA(*s))
3614 *d++ = *s++;
3615 *d = '\0';
5458a98a 3616 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3617 weight -= 150;
3618 }
3619 if (un_char == last_un_char + 1)
3620 weight += 5;
3621 weight -= seen[un_char];
3622 break;
3623 }
3624 seen[un_char]++;
3625 }
3626 if (weight >= 0) /* probably a character class */
3627 return FALSE;
3628 }
3629
3630 return TRUE;
3631}
ffed7fef 3632
ffb4593c
NT
3633/*
3634 * S_intuit_method
3635 *
3636 * Does all the checking to disambiguate
3637 * foo bar
3638 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3639 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3640 *
3641 * First argument is the stuff after the first token, e.g. "bar".
3642 *
3643 * Not a method if bar is a filehandle.
3644 * Not a method if foo is a subroutine prototyped to take a filehandle.
3645 * Not a method if it's really "Foo $bar"
3646 * Method if it's "foo $bar"
3647 * Not a method if it's really "print foo $bar"
3648 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3649 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3650 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3651 * =>
3652 */
3653
76e3520e 3654STATIC int
62d55b22 3655S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3656{
97aff369 3657 dVAR;
a0d0e21e 3658 char *s = start + (*start == '$');
3280af22 3659 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3660 STRLEN len;
3661 GV* indirgv;
5db06880
NC
3662#ifdef PERL_MAD
3663 int soff;
3664#endif
a0d0e21e 3665
7918f24d
NC
3666 PERL_ARGS_ASSERT_INTUIT_METHOD;
3667
a0d0e21e 3668 if (gv) {
62d55b22 3669 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3670 return 0;
62d55b22
NC
3671 if (cv) {
3672 if (SvPOK(cv)) {
3673 const char *proto = SvPVX_const(cv);
3674 if (proto) {
3675 if (*proto == ';')
3676 proto++;
3677 if (*proto == '*')
3678 return 0;
3679 }
b6c543e3
IZ
3680 }
3681 } else
c35e046a 3682 gv = NULL;
a0d0e21e 3683 }
8903cb82 3684 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3685 /* start is the beginning of the possible filehandle/object,
3686 * and s is the end of it
3687 * tmpbuf is a copy of it
3688 */
3689
a0d0e21e 3690 if (*start == '$') {
3ef1310e
RGS
3691 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3692 isUPPER(*PL_tokenbuf))
a0d0e21e 3693 return 0;
5db06880
NC
3694#ifdef PERL_MAD
3695 len = start - SvPVX(PL_linestr);
3696#endif
29595ff2 3697 s = PEEKSPACE(s);
f0092767 3698#ifdef PERL_MAD
5db06880
NC
3699 start = SvPVX(PL_linestr) + len;
3700#endif
3280af22
NIS
3701 PL_bufptr = start;
3702 PL_expect = XREF;
a0d0e21e
LW
3703 return *s == '(' ? FUNCMETH : METHOD;
3704 }
5458a98a 3705 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3706 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3707 len -= 2;
3708 tmpbuf[len] = '\0';
5db06880
NC
3709#ifdef PERL_MAD
3710 soff = s - SvPVX(PL_linestr);
3711#endif
c3e0f903
GS
3712 goto bare_package;
3713 }
90e5519e 3714 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3715 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3716 return 0;
3717 /* filehandle or package name makes it a method */
da51bb9b 3718 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3719#ifdef PERL_MAD
3720 soff = s - SvPVX(PL_linestr);
3721#endif
29595ff2 3722 s = PEEKSPACE(s);
3280af22 3723 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3724 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3725 bare_package:
cd81e915 3726 start_force(PL_curforce);
9ded7720 3727 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3728 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3729 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3730 if (PL_madskills)
3731 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3732 PL_expect = XTERM;
a0d0e21e 3733 force_next(WORD);
3280af22 3734 PL_bufptr = s;
5db06880
NC
3735#ifdef PERL_MAD
3736 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3737#endif
a0d0e21e
LW
3738 return *s == '(' ? FUNCMETH : METHOD;
3739 }
3740 }
3741 return 0;
3742}
3743
16d20bd9 3744/* Encoded script support. filter_add() effectively inserts a
4e553d73 3745 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3746 * Note that the filter function only applies to the current source file
3747 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3748 *
3749 * The datasv parameter (which may be NULL) can be used to pass
3750 * private data to this instance of the filter. The filter function
3751 * can recover the SV using the FILTER_DATA macro and use it to
3752 * store private buffers and state information.
3753 *
3754 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3755 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3756 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3757 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3758 * private use must be set using malloc'd pointers.
3759 */
16d20bd9
AD
3760
3761SV *
864dbfa3 3762Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3763{
97aff369 3764 dVAR;
f4c556ac 3765 if (!funcp)
a0714e2c 3766 return NULL;
f4c556ac 3767
5486870f
DM
3768 if (!PL_parser)
3769 return NULL;
3770
3280af22
NIS
3771 if (!PL_rsfp_filters)
3772 PL_rsfp_filters = newAV();
16d20bd9 3773 if (!datasv)
561b68a9 3774 datasv = newSV(0);
862a34c6 3775 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3776 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3777 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3778 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3779 FPTR2DPTR(void *, IoANY(datasv)),
3780 SvPV_nolen(datasv)));
3280af22
NIS
3781 av_unshift(PL_rsfp_filters, 1);
3782 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3783 return(datasv);
3784}
4e553d73 3785
16d20bd9
AD
3786
3787/* Delete most recently added instance of this filter function. */
a0d0e21e 3788void
864dbfa3 3789Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3790{
97aff369 3791 dVAR;
e0c19803 3792 SV *datasv;
24801a4b 3793
7918f24d
NC
3794 PERL_ARGS_ASSERT_FILTER_DEL;
3795
33073adb 3796#ifdef DEBUGGING
55662e27
JH
3797 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3798 FPTR2DPTR(void*, funcp)));
33073adb 3799#endif
5486870f 3800 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3801 return;
3802 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3803 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3804 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3805 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3806
16d20bd9
AD
3807 return;
3808 }
3809 /* we need to search for the correct entry and clear it */
cea2e8a9 3810 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3811}
3812
3813
1de9afcd
RGS
3814/* Invoke the idxth filter function for the current rsfp. */
3815/* maxlen 0 = read one text line */
16d20bd9 3816I32
864dbfa3 3817Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3818{
97aff369 3819 dVAR;
16d20bd9
AD
3820 filter_t funcp;
3821 SV *datasv = NULL;
f482118e
NC
3822 /* This API is bad. It should have been using unsigned int for maxlen.
3823 Not sure if we want to change the API, but if not we should sanity
3824 check the value here. */
39cd7a59
NC
3825 const unsigned int correct_length
3826 = maxlen < 0 ?
3827#ifdef PERL_MICRO
3828 0x7FFFFFFF
3829#else
3830 INT_MAX
3831#endif
3832 : maxlen;
e50aee73 3833
7918f24d
NC
3834 PERL_ARGS_ASSERT_FILTER_READ;
3835
5486870f 3836 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3837 return -1;
1de9afcd 3838 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3839 /* Provide a default input filter to make life easy. */
3840 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3841 DEBUG_P(PerlIO_printf(Perl_debug_log,
3842 "filter_read %d: from rsfp\n", idx));
f482118e 3843 if (correct_length) {
16d20bd9
AD
3844 /* Want a block */
3845 int len ;
f54cb97a 3846 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3847
3848 /* ensure buf_sv is large enough */
881d8f0a 3849 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3850 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3851 correct_length)) <= 0) {
3280af22 3852 if (PerlIO_error(PL_rsfp))
37120919
AD
3853 return -1; /* error */
3854 else
3855 return 0 ; /* end of file */
3856 }
16d20bd9 3857 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3858 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3859 } else {
3860 /* Want a line */
3280af22
NIS
3861 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3862 if (PerlIO_error(PL_rsfp))
37120919
AD
3863 return -1; /* error */
3864 else
3865 return 0 ; /* end of file */
3866 }
16d20bd9
AD
3867 }
3868 return SvCUR(buf_sv);
3869 }
3870 /* Skip this filter slot if filter has been deleted */
1de9afcd 3871 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3872 DEBUG_P(PerlIO_printf(Perl_debug_log,
3873 "filter_read %d: skipped (filter deleted)\n",
3874 idx));
f482118e 3875 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3876 }
3877 /* Get function pointer hidden within datasv */
8141890a 3878 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3879 DEBUG_P(PerlIO_printf(Perl_debug_log,
3880 "filter_read %d: via function %p (%s)\n",
ca0270c4 3881 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3882 /* Call function. The function is expected to */
3883 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3884 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3885 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3886}
3887
76e3520e 3888STATIC char *
5cc814fd 3889S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3890{
97aff369 3891 dVAR;
7918f24d
NC
3892
3893 PERL_ARGS_ASSERT_FILTER_GETS;
3894
c39cd008 3895#ifdef PERL_CR_FILTER
3280af22 3896 if (!PL_rsfp_filters) {
c39cd008 3897 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3898 }
3899#endif
3280af22 3900 if (PL_rsfp_filters) {
55497cff 3901 if (!append)
3902 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3903 if (FILTER_READ(0, sv, 0) > 0)
3904 return ( SvPVX(sv) ) ;
3905 else
bd61b366 3906 return NULL ;
16d20bd9 3907 }
9d116dd7 3908 else
5cc814fd 3909 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3910}
3911
01ec43d0 3912STATIC HV *
9bde8eb0 3913S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3914{
97aff369 3915 dVAR;
def3634b
GS
3916 GV *gv;
3917
7918f24d
NC
3918 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3919
01ec43d0 3920 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3921 return PL_curstash;
3922
3923 if (len > 2 &&
3924 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3925 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3926 {
3927 return GvHV(gv); /* Foo:: */
def3634b
GS
3928 }
3929
3930 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3931 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3932 if (gv && GvCV(gv)) {
3933 SV * const sv = cv_const_sv(GvCV(gv));
3934 if (sv)
9bde8eb0 3935 pkgname = SvPV_const(sv, len);
def3634b
GS
3936 }
3937
9bde8eb0 3938 return gv_stashpvn(pkgname, len, 0);
def3634b 3939}
a0d0e21e 3940
e3f73d4e
RGS
3941/*
3942 * S_readpipe_override
3943 * Check whether readpipe() is overriden, and generates the appropriate
3944 * optree, provided sublex_start() is called afterwards.
3945 */
3946STATIC void
1d51329b 3947S_readpipe_override(pTHX)
e3f73d4e
RGS
3948{
3949 GV **gvp;
3950 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3951 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3952 if ((gv_readpipe
3953 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3954 ||
3955 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3956 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3957 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3958 {
3959 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 3960 op_append_elem(OP_LIST,
e3f73d4e
RGS
3961 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3962 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3963 }
e3f73d4e
RGS
3964}
3965
5db06880
NC
3966#ifdef PERL_MAD
3967 /*
3968 * Perl_madlex
3969 * The intent of this yylex wrapper is to minimize the changes to the
3970 * tokener when we aren't interested in collecting madprops. It remains
3971 * to be seen how successful this strategy will be...
3972 */
3973
3974int
3975Perl_madlex(pTHX)
3976{
3977 int optype;
3978 char *s = PL_bufptr;
3979
cd81e915
NC
3980 /* make sure PL_thiswhite is initialized */
3981 PL_thiswhite = 0;
3982 PL_thismad = 0;
5db06880 3983
cd81e915 3984 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 3985 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
3986 return S_pending_ident(aTHX);
3987
3988 /* previous token ate up our whitespace? */
cd81e915
NC
3989 if (!PL_lasttoke && PL_nextwhite) {
3990 PL_thiswhite = PL_nextwhite;
3991 PL_nextwhite = 0;
5db06880
NC
3992 }
3993
3994 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3995 PL_realtokenstart = -1;
3996 PL_thistoken = 0;
5db06880
NC
3997 optype = yylex();
3998 s = PL_bufptr;
cd81e915 3999 assert(PL_curforce < 0);
5db06880 4000
cd81e915
NC
4001 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4002 if (!PL_thistoken) {
4003 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4004 PL_thistoken = newSVpvs("");
5db06880 4005 else {
c35e046a 4006 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4007 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4008 }
4009 }
cd81e915
NC
4010 if (PL_thismad) /* install head */
4011 CURMAD('X', PL_thistoken);
5db06880
NC
4012 }
4013
4014 /* last whitespace of a sublex? */
cd81e915
NC
4015 if (optype == ')' && PL_endwhite) {
4016 CURMAD('X', PL_endwhite);
5db06880
NC
4017 }
4018
cd81e915 4019 if (!PL_thismad) {
5db06880
NC
4020
4021 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4022 if (!PL_thiswhite && !PL_endwhite && !optype) {
4023 sv_free(PL_thistoken);
4024 PL_thistoken = 0;
5db06880
NC
4025 return 0;
4026 }
4027
4028 /* put off final whitespace till peg */
4029 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4030 PL_nextwhite = PL_thiswhite;
4031 PL_thiswhite = 0;
5db06880 4032 }
cd81e915
NC
4033 else if (PL_thisopen) {
4034 CURMAD('q', PL_thisopen);
4035 if (PL_thistoken)
4036 sv_free(PL_thistoken);
4037 PL_thistoken = 0;
5db06880
NC
4038 }
4039 else {
4040 /* Store actual token text as madprop X */
cd81e915 4041 CURMAD('X', PL_thistoken);
5db06880
NC
4042 }
4043
cd81e915 4044 if (PL_thiswhite) {
5db06880 4045 /* add preceding whitespace as madprop _ */
cd81e915 4046 CURMAD('_', PL_thiswhite);
5db06880
NC
4047 }
4048
cd81e915 4049 if (PL_thisstuff) {
5db06880 4050 /* add quoted material as madprop = */
cd81e915 4051 CURMAD('=', PL_thisstuff);
5db06880
NC
4052 }
4053
cd81e915 4054 if (PL_thisclose) {
5db06880 4055 /* add terminating quote as madprop Q */
cd81e915 4056 CURMAD('Q', PL_thisclose);
5db06880
NC
4057 }
4058 }
4059
4060 /* special processing based on optype */
4061
4062 switch (optype) {
4063
4064 /* opval doesn't need a TOKEN since it can already store mp */
4065 case WORD:
4066 case METHOD:
4067 case FUNCMETH:
4068 case THING:
4069 case PMFUNC:
4070 case PRIVATEREF:
4071 case FUNC0SUB:
4072 case UNIOPSUB:
4073 case LSTOPSUB:
6154021b
RGS
4074 if (pl_yylval.opval)
4075 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4076 PL_thismad = 0;
5db06880
NC
4077 return optype;
4078
4079 /* fake EOF */
4080 case 0:
4081 optype = PEG;
cd81e915
NC
4082 if (PL_endwhite) {
4083 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4084 PL_endwhite = 0;
5db06880
NC
4085 }
4086 break;
4087
4088 case ']':
4089 case '}':
cd81e915 4090 if (PL_faketokens)
5db06880
NC
4091 break;
4092 /* remember any fake bracket that lexer is about to discard */
4093 if (PL_lex_brackets == 1 &&
4094 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4095 {
4096 s = PL_bufptr;
4097 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4098 s++;
4099 if (*s == '}') {
cd81e915
NC
4100 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4101 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4102 PL_thiswhite = 0;
5db06880
NC
4103 PL_bufptr = s - 1;
4104 break; /* don't bother looking for trailing comment */
4105 }
4106 else
4107 s = PL_bufptr;
4108 }
4109 if (optype == ']')
4110 break;
4111 /* FALLTHROUGH */
4112
4113 /* attach a trailing comment to its statement instead of next token */
4114 case ';':
cd81e915 4115 if (PL_faketokens)
5db06880
NC
4116 break;
4117 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4118 s = PL_bufptr;
4119 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4120 s++;
4121 if (*s == '\n' || *s == '#') {
4122 while (s < PL_bufend && *s != '\n')
4123 s++;
4124 if (s < PL_bufend)
4125 s++;
cd81e915
NC
4126 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4127 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4128 PL_thiswhite = 0;
5db06880
NC
4129 PL_bufptr = s;
4130 }
4131 }
4132 break;
4133
4134 /* pval */
4135 case LABEL:
4136 break;
4137
4138 /* ival */
4139 default:
4140 break;
4141
4142 }
4143
4144 /* Create new token struct. Note: opvals return early above. */
6154021b 4145 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4146 PL_thismad = 0;
5db06880
NC
4147 return optype;
4148}
4149#endif
4150
468aa647 4151STATIC char *
cc6ed77d 4152S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4153 dVAR;
7918f24d
NC
4154
4155 PERL_ARGS_ASSERT_TOKENIZE_USE;
4156
468aa647
RGS
4157 if (PL_expect != XSTATE)
4158 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4159 is_use ? "use" : "no"));
29595ff2 4160 s = SKIPSPACE1(s);
468aa647
RGS
4161 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4162 s = force_version(s, TRUE);
17c59fdf
VP
4163 if (*s == ';' || *s == '}'
4164 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4165 start_force(PL_curforce);
9ded7720 4166 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4167 force_next(WORD);
4168 }
4169 else if (*s == 'v') {
4170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4171 s = force_version(s, FALSE);
4172 }
4173 }
4174 else {
4175 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4176 s = force_version(s, FALSE);
4177 }
6154021b 4178 pl_yylval.ival = is_use;
468aa647
RGS
4179 return s;
4180}
748a9306 4181#ifdef DEBUGGING
27da23d5 4182 static const char* const exp_name[] =
09bef843 4183 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4184 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4185 };
748a9306 4186#endif
463ee0b2 4187
361d9b55
Z
4188#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4189STATIC bool
4190S_word_takes_any_delimeter(char *p, STRLEN len)
4191{
4192 return (len == 1 && strchr("msyq", p[0])) ||
4193 (len == 2 && (
4194 (p[0] == 't' && p[1] == 'r') ||
4195 (p[0] == 'q' && strchr("qwxr", p[1]))));
4196}
4197
02aa26ce
NT
4198/*
4199 yylex
4200
4201 Works out what to call the token just pulled out of the input
4202 stream. The yacc parser takes care of taking the ops we return and
4203 stitching them into a tree.
4204
4205 Returns:
4206 PRIVATEREF
4207
4208 Structure:
4209 if read an identifier
4210 if we're in a my declaration
4211 croak if they tried to say my($foo::bar)
4212 build the ops for a my() declaration
4213 if it's an access to a my() variable
4214 are we in a sort block?
4215 croak if my($a); $a <=> $b
4216 build ops for access to a my() variable
4217 if in a dq string, and they've said @foo and we can't find @foo
4218 croak
4219 build ops for a bareword
4220 if we already built the token before, use it.
4221*/
4222
20141f0e 4223
dba4d153
JH
4224#ifdef __SC__
4225#pragma segment Perl_yylex
4226#endif
dba4d153 4227int
dba4d153 4228Perl_yylex(pTHX)
20141f0e 4229{
97aff369 4230 dVAR;
3afc138a 4231 register char *s = PL_bufptr;
378cc40b 4232 register char *d;
463ee0b2 4233 STRLEN len;
aa7440fb 4234 bool bof = FALSE;
580561a3 4235 U32 fake_eof = 0;
a687059c 4236
10edeb5d
JH
4237 /* orig_keyword, gvp, and gv are initialized here because
4238 * jump to the label just_a_word_zero can bypass their
4239 * initialization later. */
4240 I32 orig_keyword = 0;
4241 GV *gv = NULL;
4242 GV **gvp = NULL;
4243
bbf60fe6 4244 DEBUG_T( {
396482e1 4245 SV* tmp = newSVpvs("");
b6007c36
DM
4246 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4247 (IV)CopLINE(PL_curcop),
4248 lex_state_names[PL_lex_state],
4249 exp_name[PL_expect],
4250 pv_display(tmp, s, strlen(s), 0, 60));
4251 SvREFCNT_dec(tmp);
bbf60fe6 4252 } );
02aa26ce 4253 /* check if there's an identifier for us to look at */
28ac2b49 4254 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4255 return REPORT(S_pending_ident(aTHX));
bbce6d69 4256
02aa26ce
NT
4257 /* no identifier pending identification */
4258
3280af22 4259 switch (PL_lex_state) {
79072805
LW
4260#ifdef COMMENTARY
4261 case LEX_NORMAL: /* Some compilers will produce faster */
4262 case LEX_INTERPNORMAL: /* code if we comment these out. */
4263 break;
4264#endif
4265
09bef843 4266 /* when we've already built the next token, just pull it out of the queue */
79072805 4267 case LEX_KNOWNEXT:
5db06880
NC
4268#ifdef PERL_MAD
4269 PL_lasttoke--;
6154021b 4270 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4271 if (PL_madskills) {
cd81e915 4272 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4273 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4274 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4275 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4276 PL_thismad->mad_val = 0;
4277 mad_free(PL_thismad);
4278 PL_thismad = 0;
5db06880
NC
4279 }
4280 }
4281 if (!PL_lasttoke) {
4282 PL_lex_state = PL_lex_defer;
4283 PL_expect = PL_lex_expect;
4284 PL_lex_defer = LEX_NORMAL;
4285 if (!PL_nexttoke[PL_lasttoke].next_type)
4286 return yylex();
4287 }
4288#else
3280af22 4289 PL_nexttoke--;
6154021b 4290 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4291 if (!PL_nexttoke) {
4292 PL_lex_state = PL_lex_defer;
4293 PL_expect = PL_lex_expect;
4294 PL_lex_defer = LEX_NORMAL;
463ee0b2 4295 }
5db06880 4296#endif
a7aaec61
Z
4297 {
4298 I32 next_type;
5db06880 4299#ifdef PERL_MAD
a7aaec61 4300 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4301#else
a7aaec61 4302 next_type = PL_nexttype[PL_nexttoke];
5db06880 4303#endif
a7aaec61
Z
4304 if (next_type & (1<<24)) {
4305 if (PL_lex_brackets > 100)
4306 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4307 PL_lex_brackstack[PL_lex_brackets++] = (next_type >> 16) & 0xff;
4308 next_type &= 0xffff;
4309 }
4310#ifdef PERL_MAD
4311 /* FIXME - can these be merged? */
4312 return next_type;
4313#else
4314 return REPORT(next_type);
4315#endif
4316 }
79072805 4317
02aa26ce 4318 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4319 when we get here, PL_bufptr is at the \
02aa26ce 4320 */
79072805
LW
4321 case LEX_INTERPCASEMOD:
4322#ifdef DEBUGGING
3280af22 4323 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4324 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4325#endif
02aa26ce 4326 /* handle \E or end of string */
3280af22 4327 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4328 /* if at a \E */
3280af22 4329 if (PL_lex_casemods) {
f54cb97a 4330 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4331 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4332
3792a11b
NC
4333 if (PL_bufptr != PL_bufend
4334 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4335 PL_bufptr += 2;
4336 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4337#ifdef PERL_MAD
4338 if (PL_madskills)
6b29d1f5 4339 PL_thistoken = newSVpvs("\\E");
5db06880 4340#endif
a0d0e21e 4341 }
bbf60fe6 4342 return REPORT(')');
79072805 4343 }
5db06880
NC
4344#ifdef PERL_MAD
4345 while (PL_bufptr != PL_bufend &&
4346 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4347 if (!PL_thiswhite)
6b29d1f5 4348 PL_thiswhite = newSVpvs("");
cd81e915 4349 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4350 PL_bufptr += 2;
4351 }
4352#else
3280af22
NIS
4353 if (PL_bufptr != PL_bufend)
4354 PL_bufptr += 2;
5db06880 4355#endif
3280af22 4356 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4357 return yylex();
79072805
LW
4358 }
4359 else {
607df283 4360 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4361 "### Saw case modifier\n"); });
3280af22 4362 s = PL_bufptr + 1;
6e909404 4363 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4364#ifdef PERL_MAD
cd81e915 4365 if (!PL_thiswhite)
6b29d1f5 4366 PL_thiswhite = newSVpvs("");
cd81e915 4367 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4368#endif
89122651 4369 PL_bufptr = s + 3;
6e909404
JH
4370 PL_lex_state = LEX_INTERPCONCAT;
4371 return yylex();
a0d0e21e 4372 }
6e909404 4373 else {
90771dc0 4374 I32 tmp;
5db06880
NC
4375 if (!PL_madskills) /* when just compiling don't need correct */
4376 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4377 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4378 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4379 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4380 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4381 return REPORT(')');
6e909404
JH
4382 }
4383 if (PL_lex_casemods > 10)
4384 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4385 PL_lex_casestack[PL_lex_casemods++] = *s;
4386 PL_lex_casestack[PL_lex_casemods] = '\0';
4387 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4388 start_force(PL_curforce);
9ded7720 4389 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4390 force_next('(');
cd81e915 4391 start_force(PL_curforce);
6e909404 4392 if (*s == 'l')
9ded7720 4393 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4394 else if (*s == 'u')
9ded7720 4395 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4396 else if (*s == 'L')
9ded7720 4397 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4398 else if (*s == 'U')
9ded7720 4399 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4400 else if (*s == 'Q')
9ded7720 4401 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4402 else
4403 Perl_croak(aTHX_ "panic: yylex");
5db06880 4404 if (PL_madskills) {
a5849ce5
NC
4405 SV* const tmpsv = newSVpvs("\\ ");
4406 /* replace the space with the character we want to escape
4407 */
4408 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4409 curmad('_', tmpsv);
4410 }
6e909404 4411 PL_bufptr = s + 1;
a0d0e21e 4412 }
79072805 4413 force_next(FUNC);
3280af22
NIS
4414 if (PL_lex_starts) {
4415 s = PL_bufptr;
4416 PL_lex_starts = 0;
5db06880
NC
4417#ifdef PERL_MAD
4418 if (PL_madskills) {
cd81e915
NC
4419 if (PL_thistoken)
4420 sv_free(PL_thistoken);
6b29d1f5 4421 PL_thistoken = newSVpvs("");
5db06880
NC
4422 }
4423#endif
131b3ad0
DM
4424 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4425 if (PL_lex_casemods == 1 && PL_lex_inpat)
4426 OPERATOR(',');
4427 else
4428 Aop(OP_CONCAT);
79072805
LW
4429 }
4430 else
cea2e8a9 4431 return yylex();
79072805
LW
4432 }
4433
55497cff 4434 case LEX_INTERPPUSH:
bbf60fe6 4435 return REPORT(sublex_push());
55497cff 4436
79072805 4437 case LEX_INTERPSTART:
3280af22 4438 if (PL_bufptr == PL_bufend)
bbf60fe6 4439 return REPORT(sublex_done());
607df283 4440 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4441 "### Interpolated variable\n"); });
3280af22
NIS
4442 PL_expect = XTERM;
4443 PL_lex_dojoin = (*PL_bufptr == '@');
4444 PL_lex_state = LEX_INTERPNORMAL;
4445 if (PL_lex_dojoin) {
cd81e915 4446 start_force(PL_curforce);
9ded7720 4447 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4448 force_next(',');
cd81e915 4449 start_force(PL_curforce);
a0d0e21e 4450 force_ident("\"", '$');
cd81e915 4451 start_force(PL_curforce);
9ded7720 4452 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4453 force_next('$');
cd81e915 4454 start_force(PL_curforce);
9ded7720 4455 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4456 force_next('(');
cd81e915 4457 start_force(PL_curforce);
9ded7720 4458 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4459 force_next(FUNC);
4460 }
3280af22
NIS
4461 if (PL_lex_starts++) {
4462 s = PL_bufptr;
5db06880
NC
4463#ifdef PERL_MAD
4464 if (PL_madskills) {
cd81e915
NC
4465 if (PL_thistoken)
4466 sv_free(PL_thistoken);
6b29d1f5 4467 PL_thistoken = newSVpvs("");
5db06880
NC
4468 }
4469#endif
131b3ad0
DM
4470 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4471 if (!PL_lex_casemods && PL_lex_inpat)
4472 OPERATOR(',');
4473 else
4474 Aop(OP_CONCAT);
79072805 4475 }
cea2e8a9 4476 return yylex();
79072805
LW
4477
4478 case LEX_INTERPENDMAYBE:
3280af22
NIS
4479 if (intuit_more(PL_bufptr)) {
4480 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4481 break;
4482 }
4483 /* FALL THROUGH */
4484
4485 case LEX_INTERPEND:
3280af22
NIS
4486 if (PL_lex_dojoin) {
4487 PL_lex_dojoin = FALSE;
4488 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4489#ifdef PERL_MAD
4490 if (PL_madskills) {
cd81e915
NC
4491 if (PL_thistoken)
4492 sv_free(PL_thistoken);
6b29d1f5 4493 PL_thistoken = newSVpvs("");
5db06880
NC
4494 }
4495#endif
bbf60fe6 4496 return REPORT(')');
79072805 4497 }
43a16006 4498 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4499 && SvEVALED(PL_lex_repl))
43a16006 4500 {
e9fa98b2 4501 if (PL_bufptr != PL_bufend)
cea2e8a9 4502 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4503 PL_lex_repl = NULL;
e9fa98b2 4504 }
79072805
LW
4505 /* FALLTHROUGH */
4506 case LEX_INTERPCONCAT:
4507#ifdef DEBUGGING
3280af22 4508 if (PL_lex_brackets)
cea2e8a9 4509 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4510#endif
3280af22 4511 if (PL_bufptr == PL_bufend)
bbf60fe6 4512 return REPORT(sublex_done());
79072805 4513
3280af22
NIS
4514 if (SvIVX(PL_linestr) == '\'') {
4515 SV *sv = newSVsv(PL_linestr);
4516 if (!PL_lex_inpat)
76e3520e 4517 sv = tokeq(sv);
3280af22 4518 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4519 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4520 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4521 s = PL_bufend;
79072805
LW
4522 }
4523 else {
3280af22 4524 s = scan_const(PL_bufptr);
79072805 4525 if (*s == '\\')
3280af22 4526 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4527 else
3280af22 4528 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4529 }
4530
3280af22 4531 if (s != PL_bufptr) {
cd81e915 4532 start_force(PL_curforce);
5db06880
NC
4533 if (PL_madskills) {
4534 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4535 }
6154021b 4536 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4537 PL_expect = XTERM;
79072805 4538 force_next(THING);
131b3ad0 4539 if (PL_lex_starts++) {
5db06880
NC
4540#ifdef PERL_MAD
4541 if (PL_madskills) {
cd81e915
NC
4542 if (PL_thistoken)
4543 sv_free(PL_thistoken);
6b29d1f5 4544 PL_thistoken = newSVpvs("");
5db06880
NC
4545 }
4546#endif
131b3ad0
DM
4547 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4548 if (!PL_lex_casemods && PL_lex_inpat)
4549 OPERATOR(',');
4550 else
4551 Aop(OP_CONCAT);
4552 }
79072805 4553 else {
3280af22 4554 PL_bufptr = s;
cea2e8a9 4555 return yylex();
79072805
LW
4556 }
4557 }
4558
cea2e8a9 4559 return yylex();
a0d0e21e 4560 case LEX_FORMLINE:
3280af22
NIS
4561 PL_lex_state = LEX_NORMAL;
4562 s = scan_formline(PL_bufptr);
4563 if (!PL_lex_formbrack)
a0d0e21e
LW
4564 goto rightbracket;
4565 OPERATOR(';');
79072805
LW
4566 }
4567
3280af22
NIS
4568 s = PL_bufptr;
4569 PL_oldoldbufptr = PL_oldbufptr;
4570 PL_oldbufptr = s;
463ee0b2
LW
4571
4572 retry:
5db06880 4573#ifdef PERL_MAD
cd81e915
NC
4574 if (PL_thistoken) {
4575 sv_free(PL_thistoken);
4576 PL_thistoken = 0;
5db06880 4577 }
cd81e915 4578 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4579#endif
378cc40b
LW
4580 switch (*s) {
4581 default:
7e2040f0 4582 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4583 goto keylookup;
b1fc3636
CJ
4584 {
4585 unsigned char c = *s;
4586 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4587 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4588 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4589 } else {
4590 d = PL_linestart;
4591 }
4592 *s = '\0';
4593 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4594 }
e929a76b
LW
4595 case 4:
4596 case 26:
4597 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4598 case 0:
5db06880
NC
4599#ifdef PERL_MAD
4600 if (PL_madskills)
cd81e915 4601 PL_faketokens = 0;
5db06880 4602#endif
3280af22
NIS
4603 if (!PL_rsfp) {
4604 PL_last_uni = 0;
4605 PL_last_lop = 0;
a7aaec61
Z
4606 if (PL_lex_brackets &&
4607 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4608 yyerror((const char *)
4609 (PL_lex_formbrack
4610 ? "Format not terminated"
4611 : "Missing right curly or square bracket"));
c5ee2135 4612 }
4e553d73 4613 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4614 "### Tokener got EOF\n");
5f80b19c 4615 } );
79072805 4616 TOKEN(0);
463ee0b2 4617 }
3280af22 4618 if (s++ < PL_bufend)
a687059c 4619 goto retry; /* ignore stray nulls */
3280af22
NIS
4620 PL_last_uni = 0;
4621 PL_last_lop = 0;
4622 if (!PL_in_eval && !PL_preambled) {
4623 PL_preambled = TRUE;
5db06880
NC
4624#ifdef PERL_MAD
4625 if (PL_madskills)
cd81e915 4626 PL_faketokens = 1;
5db06880 4627#endif
5ab7ff98
NC
4628 if (PL_perldb) {
4629 /* Generate a string of Perl code to load the debugger.
4630 * If PERL5DB is set, it will return the contents of that,
4631 * otherwise a compile-time require of perl5db.pl. */
4632
4633 const char * const pdb = PerlEnv_getenv("PERL5DB");
4634
4635 if (pdb) {
4636 sv_setpv(PL_linestr, pdb);
4637 sv_catpvs(PL_linestr,";");
4638 } else {
4639 SETERRNO(0,SS_NORMAL);
4640 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4641 }
4642 } else
4643 sv_setpvs(PL_linestr,"");
c62eb204
NC
4644 if (PL_preambleav) {
4645 SV **svp = AvARRAY(PL_preambleav);
4646 SV **const end = svp + AvFILLp(PL_preambleav);
4647 while(svp <= end) {
4648 sv_catsv(PL_linestr, *svp);
4649 ++svp;
396482e1 4650 sv_catpvs(PL_linestr, ";");
91b7def8 4651 }
daba3364 4652 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4653 PL_preambleav = NULL;
91b7def8 4654 }
9f639728
FR
4655 if (PL_minus_E)
4656 sv_catpvs(PL_linestr,
4657 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4658 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4659 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4660 if (PL_minus_l)
396482e1 4661 sv_catpvs(PL_linestr,"chomp;");
3280af22 4662 if (PL_minus_a) {
3280af22 4663 if (PL_minus_F) {
3792a11b
NC
4664 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4665 || *PL_splitstr == '"')
3280af22 4666 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4667 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4668 else {
c8ef6a4b
NC
4669 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4670 bytes can be used as quoting characters. :-) */
dd374669 4671 const char *splits = PL_splitstr;
91d456ae 4672 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4673 do {
4674 /* Need to \ \s */
dd374669
AL
4675 if (*splits == '\\')
4676 sv_catpvn(PL_linestr, splits, 1);
4677 sv_catpvn(PL_linestr, splits, 1);
4678 } while (*splits++);
48c4c863
NC
4679 /* This loop will embed the trailing NUL of
4680 PL_linestr as the last thing it does before
4681 terminating. */
396482e1 4682 sv_catpvs(PL_linestr, ");");
54310121 4683 }
2304df62
AD
4684 }
4685 else
396482e1 4686 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4687 }
79072805 4688 }
396482e1 4689 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4690 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4691 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4692 PL_last_lop = PL_last_uni = NULL;
65269a95 4693 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4694 update_debugger_info(PL_linestr, NULL, 0);
79072805 4695 goto retry;
a687059c 4696 }
e929a76b 4697 do {
580561a3
Z
4698 fake_eof = 0;
4699 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4700 if (0) {
7e28d3af 4701 fake_eof:
f0e67a1d
Z
4702 fake_eof = LEX_FAKE_EOF;
4703 }
4704 PL_bufptr = PL_bufend;
17cc9359 4705 CopLINE_inc(PL_curcop);
f0e67a1d 4706 if (!lex_next_chunk(fake_eof)) {
17cc9359 4707 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4708 s = PL_bufptr;
4709 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4710 }
17cc9359 4711 CopLINE_dec(PL_curcop);
5db06880 4712#ifdef PERL_MAD
f0e67a1d 4713 if (!PL_rsfp)
cd81e915 4714 PL_realtokenstart = -1;
5db06880 4715#endif
f0e67a1d 4716 s = PL_bufptr;
7aa207d6
JH
4717 /* If it looks like the start of a BOM or raw UTF-16,
4718 * check if it in fact is. */
580561a3 4719 if (bof && PL_rsfp &&
7aa207d6
JH
4720 (*s == 0 ||
4721 *(U8*)s == 0xEF ||
4722 *(U8*)s >= 0xFE ||
4723 s[1] == 0)) {
eb160463 4724 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4725 if (bof) {
3280af22 4726 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4727 s = swallow_bom((U8*)s);
e929a76b 4728 }
378cc40b 4729 }
737c24fc 4730 if (PL_parser->in_pod) {
a0d0e21e 4731 /* Incest with pod. */
5db06880
NC
4732#ifdef PERL_MAD
4733 if (PL_madskills)
cd81e915 4734 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4735#endif
01a57ef7 4736 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4737 sv_setpvs(PL_linestr, "");
3280af22
NIS
4738 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4739 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4740 PL_last_lop = PL_last_uni = NULL;
737c24fc 4741 PL_parser->in_pod = 0;
a0d0e21e 4742 }
4e553d73 4743 }
85613cab
Z
4744 if (PL_rsfp)
4745 incline(s);
737c24fc 4746 } while (PL_parser->in_pod);
3280af22 4747 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4748 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4749 PL_last_lop = PL_last_uni = NULL;
57843af0 4750 if (CopLINE(PL_curcop) == 1) {
3280af22 4751 while (s < PL_bufend && isSPACE(*s))
79072805 4752 s++;
a0d0e21e 4753 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4754 s++;
5db06880
NC
4755#ifdef PERL_MAD
4756 if (PL_madskills)
cd81e915 4757 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4758#endif
bd61b366 4759 d = NULL;
3280af22 4760 if (!PL_in_eval) {
44a8e56a 4761 if (*s == '#' && *(s+1) == '!')
4762 d = s + 2;
4763#ifdef ALTERNATE_SHEBANG
4764 else {
bfed75c6 4765 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4766 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4767 d = s + (sizeof(as) - 1);
4768 }
4769#endif /* ALTERNATE_SHEBANG */
4770 }
4771 if (d) {
b8378b72 4772 char *ipath;
774d564b 4773 char *ipathend;
b8378b72 4774
774d564b 4775 while (isSPACE(*d))
b8378b72
CS
4776 d++;
4777 ipath = d;
774d564b 4778 while (*d && !isSPACE(*d))
4779 d++;
4780 ipathend = d;
4781
4782#ifdef ARG_ZERO_IS_SCRIPT
4783 if (ipathend > ipath) {
4784 /*
4785 * HP-UX (at least) sets argv[0] to the script name,
4786 * which makes $^X incorrect. And Digital UNIX and Linux,
4787 * at least, set argv[0] to the basename of the Perl
4788 * interpreter. So, having found "#!", we'll set it right.
4789 */
fafc274c
NC
4790 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4791 SVt_PV)); /* $^X */
774d564b 4792 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4793 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4794 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4795 SvSETMAGIC(x);
4796 }
556c1dec
JH
4797 else {
4798 STRLEN blen;
4799 STRLEN llen;
cfd0369c 4800 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4801 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4802 if (llen < blen) {
4803 bstart += blen - llen;
4804 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4805 sv_setpvn(x, ipath, ipathend - ipath);
4806 SvSETMAGIC(x);
4807 }
4808 }
4809 }
774d564b 4810 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4811 }
774d564b 4812#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4813
4814 /*
4815 * Look for options.
4816 */
748a9306 4817 d = instr(s,"perl -");
84e30d1a 4818 if (!d) {
748a9306 4819 d = instr(s,"perl");
84e30d1a
GS
4820#if defined(DOSISH)
4821 /* avoid getting into infinite loops when shebang
4822 * line contains "Perl" rather than "perl" */
4823 if (!d) {
4824 for (d = ipathend-4; d >= ipath; --d) {
4825 if ((*d == 'p' || *d == 'P')
4826 && !ibcmp(d, "perl", 4))
4827 {
4828 break;
4829 }
4830 }
4831 if (d < ipath)
bd61b366 4832 d = NULL;
84e30d1a
GS
4833 }
4834#endif
4835 }
44a8e56a 4836#ifdef ALTERNATE_SHEBANG
4837 /*
4838 * If the ALTERNATE_SHEBANG on this system starts with a
4839 * character that can be part of a Perl expression, then if
4840 * we see it but not "perl", we're probably looking at the
4841 * start of Perl code, not a request to hand off to some
4842 * other interpreter. Similarly, if "perl" is there, but
4843 * not in the first 'word' of the line, we assume the line
4844 * contains the start of the Perl program.
44a8e56a 4845 */
4846 if (d && *s != '#') {
f54cb97a 4847 const char *c = ipath;
44a8e56a 4848 while (*c && !strchr("; \t\r\n\f\v#", *c))
4849 c++;
4850 if (c < d)
bd61b366 4851 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4852 else
4853 *s = '#'; /* Don't try to parse shebang line */
4854 }
774d564b 4855#endif /* ALTERNATE_SHEBANG */
748a9306 4856 if (!d &&
44a8e56a 4857 *s == '#' &&
774d564b 4858 ipathend > ipath &&
3280af22 4859 !PL_minus_c &&
748a9306 4860 !instr(s,"indir") &&
3280af22 4861 instr(PL_origargv[0],"perl"))
748a9306 4862 {
27da23d5 4863 dVAR;
9f68db38 4864 char **newargv;
9f68db38 4865
774d564b 4866 *ipathend = '\0';
4867 s = ipathend + 1;
3280af22 4868 while (s < PL_bufend && isSPACE(*s))
9f68db38 4869 s++;
3280af22 4870 if (s < PL_bufend) {
d85f917e 4871 Newx(newargv,PL_origargc+3,char*);
9f68db38 4872 newargv[1] = s;
3280af22 4873 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4874 s++;
4875 *s = '\0';
3280af22 4876 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4877 }
4878 else
3280af22 4879 newargv = PL_origargv;
774d564b 4880 newargv[0] = ipath;
b35112e7 4881 PERL_FPU_PRE_EXEC
b4748376 4882 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4883 PERL_FPU_POST_EXEC
cea2e8a9 4884 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4885 }
748a9306 4886 if (d) {
c35e046a
AL
4887 while (*d && !isSPACE(*d))
4888 d++;
4889 while (SPACE_OR_TAB(*d))
4890 d++;
748a9306
LW
4891
4892 if (*d++ == '-') {
f54cb97a 4893 const bool switches_done = PL_doswitches;
fb993905
GA
4894 const U32 oldpdb = PL_perldb;
4895 const bool oldn = PL_minus_n;
4896 const bool oldp = PL_minus_p;
c7030b81 4897 const char *d1 = d;
fb993905 4898
8cc95fdb 4899 do {
4ba71d51
FC
4900 bool baduni = FALSE;
4901 if (*d1 == 'C') {
bd0ab00d
NC
4902 const char *d2 = d1 + 1;
4903 if (parse_unicode_opts((const char **)&d2)
4904 != PL_unicode)
4905 baduni = TRUE;
4ba71d51
FC
4906 }
4907 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4908 const char * const m = d1;
4909 while (*d1 && !isSPACE(*d1))
4910 d1++;
cea2e8a9 4911 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4912 (int)(d1 - m), m);
8cc95fdb 4913 }
c7030b81
NC
4914 d1 = moreswitches(d1);
4915 } while (d1);
f0b2cf55
YST
4916 if (PL_doswitches && !switches_done) {
4917 int argc = PL_origargc;
4918 char **argv = PL_origargv;
4919 do {
4920 argc--,argv++;
4921 } while (argc && argv[0][0] == '-' && argv[0][1]);
4922 init_argv_symbols(argc,argv);
4923 }
65269a95 4924 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4925 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4926 /* if we have already added "LINE: while (<>) {",
4927 we must not do it again */
748a9306 4928 {
76f68e9b 4929 sv_setpvs(PL_linestr, "");
3280af22
NIS
4930 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4931 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4932 PL_last_lop = PL_last_uni = NULL;
3280af22 4933 PL_preambled = FALSE;
65269a95 4934 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4935 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4936 goto retry;
4937 }
a0d0e21e 4938 }
79072805 4939 }
9f68db38 4940 }
79072805 4941 }
3280af22
NIS
4942 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4943 PL_bufptr = s;
4944 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4945 return yylex();
ae986130 4946 }
378cc40b 4947 goto retry;
4fdae800 4948 case '\r':
6a27c188 4949#ifdef PERL_STRICT_CR
cea2e8a9 4950 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4951 Perl_croak(aTHX_
cc507455 4952 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4953#endif
4fdae800 4954 case ' ': case '\t': case '\f': case 013:
5db06880 4955#ifdef PERL_MAD
cd81e915 4956 PL_realtokenstart = -1;
ac372eb8
RD
4957 if (!PL_thiswhite)
4958 PL_thiswhite = newSVpvs("");
4959 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4960#endif
ac372eb8 4961 s++;
378cc40b 4962 goto retry;
378cc40b 4963 case '#':
e929a76b 4964 case '\n':
5db06880 4965#ifdef PERL_MAD
cd81e915 4966 PL_realtokenstart = -1;
5db06880 4967 if (PL_madskills)
cd81e915 4968 PL_faketokens = 0;
5db06880 4969#endif
3280af22 4970 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4971 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4972 /* handle eval qq[#line 1 "foo"\n ...] */
4973 CopLINE_dec(PL_curcop);
4974 incline(s);
4975 }
5db06880
NC
4976 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4977 s = SKIPSPACE0(s);
4978 if (!PL_in_eval || PL_rsfp)
4979 incline(s);
4980 }
4981 else {
4982 d = s;
4983 while (d < PL_bufend && *d != '\n')
4984 d++;
4985 if (d < PL_bufend)
4986 d++;
4987 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4988 Perl_croak(aTHX_ "panic: input overflow");
4989#ifdef PERL_MAD
4990 if (PL_madskills)
cd81e915 4991 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4992#endif
4993 s = d;
4994 incline(s);
4995 }
3280af22
NIS
4996 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4997 PL_bufptr = s;
4998 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4999 return yylex();
a687059c 5000 }
378cc40b 5001 }
a687059c 5002 else {
5db06880
NC
5003#ifdef PERL_MAD
5004 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5005 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5006 PL_faketokens = 0;
5db06880
NC
5007 s = SKIPSPACE0(s);
5008 TOKEN(PEG); /* make sure any #! line is accessible */
5009 }
5010 s = SKIPSPACE0(s);
5011 }
5012 else {
5013/* if (PL_madskills && PL_lex_formbrack) { */
5014 d = s;
5015 while (d < PL_bufend && *d != '\n')
5016 d++;
5017 if (d < PL_bufend)
5018 d++;
5019 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5020 Perl_croak(aTHX_ "panic: input overflow");
5021 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5022 if (!PL_thiswhite)
6b29d1f5 5023 PL_thiswhite = newSVpvs("");
5db06880 5024 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5025 sv_setpvs(PL_thiswhite, "");
cd81e915 5026 PL_faketokens = 0;
5db06880 5027 }
cd81e915 5028 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5029 }
5030 s = d;
5031/* }
5032 *s = '\0';
5033 PL_bufend = s; */
5034 }
5035#else
378cc40b 5036 *s = '\0';
3280af22 5037 PL_bufend = s;
5db06880 5038#endif
a687059c 5039 }
378cc40b
LW
5040 goto retry;
5041 case '-':
79072805 5042 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5043 I32 ftst = 0;
90771dc0 5044 char tmp;
e5edeb50 5045
378cc40b 5046 s++;
3280af22 5047 PL_bufptr = s;
748a9306
LW
5048 tmp = *s++;
5049
bf4acbe4 5050 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5051 s++;
5052
5053 if (strnEQ(s,"=>",2)) {
3280af22 5054 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5055 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5056 OPERATOR('-'); /* unary minus */
5057 }
3280af22 5058 PL_last_uni = PL_oldbufptr;
748a9306 5059 switch (tmp) {
e5edeb50
JH
5060 case 'r': ftst = OP_FTEREAD; break;
5061 case 'w': ftst = OP_FTEWRITE; break;
5062 case 'x': ftst = OP_FTEEXEC; break;
5063 case 'o': ftst = OP_FTEOWNED; break;
5064 case 'R': ftst = OP_FTRREAD; break;
5065 case 'W': ftst = OP_FTRWRITE; break;
5066 case 'X': ftst = OP_FTREXEC; break;
5067 case 'O': ftst = OP_FTROWNED; break;
5068 case 'e': ftst = OP_FTIS; break;
5069 case 'z': ftst = OP_FTZERO; break;
5070 case 's': ftst = OP_FTSIZE; break;
5071 case 'f': ftst = OP_FTFILE; break;
5072 case 'd': ftst = OP_FTDIR; break;
5073 case 'l': ftst = OP_FTLINK; break;
5074 case 'p': ftst = OP_FTPIPE; break;
5075 case 'S': ftst = OP_FTSOCK; break;
5076 case 'u': ftst = OP_FTSUID; break;
5077 case 'g': ftst = OP_FTSGID; break;
5078 case 'k': ftst = OP_FTSVTX; break;
5079 case 'b': ftst = OP_FTBLK; break;
5080 case 'c': ftst = OP_FTCHR; break;
5081 case 't': ftst = OP_FTTTY; break;
5082 case 'T': ftst = OP_FTTEXT; break;
5083 case 'B': ftst = OP_FTBINARY; break;
5084 case 'M': case 'A': case 'C':
fafc274c 5085 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5086 switch (tmp) {
5087 case 'M': ftst = OP_FTMTIME; break;
5088 case 'A': ftst = OP_FTATIME; break;
5089 case 'C': ftst = OP_FTCTIME; break;
5090 default: break;
5091 }
5092 break;
378cc40b 5093 default:
378cc40b
LW
5094 break;
5095 }
e5edeb50 5096 if (ftst) {
eb160463 5097 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5098 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5099 "### Saw file test %c\n", (int)tmp);
5f80b19c 5100 } );
e5edeb50
JH
5101 FTST(ftst);
5102 }
5103 else {
5104 /* Assume it was a minus followed by a one-letter named
5105 * subroutine call (or a -bareword), then. */
95c31fe3 5106 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5107 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5108 (int) tmp);
5f80b19c 5109 } );
3cf7b4c4 5110 s = --PL_bufptr;
e5edeb50 5111 }
378cc40b 5112 }
90771dc0
NC
5113 {
5114 const char tmp = *s++;
5115 if (*s == tmp) {
5116 s++;
5117 if (PL_expect == XOPERATOR)
5118 TERM(POSTDEC);
5119 else
5120 OPERATOR(PREDEC);
5121 }
5122 else if (*s == '>') {
5123 s++;
29595ff2 5124 s = SKIPSPACE1(s);
90771dc0
NC
5125 if (isIDFIRST_lazy_if(s,UTF)) {
5126 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5127 TOKEN(ARROW);
5128 }
5129 else if (*s == '$')
5130 OPERATOR(ARROW);
5131 else
5132 TERM(ARROW);
5133 }
3280af22 5134 if (PL_expect == XOPERATOR)
90771dc0
NC
5135 Aop(OP_SUBTRACT);
5136 else {
5137 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5138 check_uni();
5139 OPERATOR('-'); /* unary minus */
79072805 5140 }
2f3197b3 5141 }
79072805 5142
378cc40b 5143 case '+':
90771dc0
NC
5144 {
5145 const char tmp = *s++;
5146 if (*s == tmp) {
5147 s++;
5148 if (PL_expect == XOPERATOR)
5149 TERM(POSTINC);
5150 else
5151 OPERATOR(PREINC);
5152 }
3280af22 5153 if (PL_expect == XOPERATOR)
90771dc0
NC
5154 Aop(OP_ADD);
5155 else {
5156 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5157 check_uni();
5158 OPERATOR('+');
5159 }
2f3197b3 5160 }
a687059c 5161
378cc40b 5162 case '*':
3280af22
NIS
5163 if (PL_expect != XOPERATOR) {
5164 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5165 PL_expect = XOPERATOR;
5166 force_ident(PL_tokenbuf, '*');
5167 if (!*PL_tokenbuf)
a0d0e21e 5168 PREREF('*');
79072805 5169 TERM('*');
a687059c 5170 }
79072805
LW
5171 s++;
5172 if (*s == '*') {
a687059c 5173 s++;
79072805 5174 PWop(OP_POW);
a687059c 5175 }
79072805
LW
5176 Mop(OP_MULTIPLY);
5177
378cc40b 5178 case '%':
3280af22 5179 if (PL_expect == XOPERATOR) {
bbce6d69 5180 ++s;
5181 Mop(OP_MODULO);
a687059c 5182 }
3280af22 5183 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5184 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5185 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5186 if (!PL_tokenbuf[1]) {
bbce6d69 5187 PREREF('%');
a687059c 5188 }
3280af22 5189 PL_pending_ident = '%';
bbce6d69 5190 TERM('%');
a687059c 5191
378cc40b 5192 case '^':
79072805 5193 s++;
a0d0e21e 5194 BOop(OP_BIT_XOR);
79072805 5195 case '[':
a7aaec61
Z
5196 if (PL_lex_brackets > 100)
5197 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5198 PL_lex_brackstack[PL_lex_brackets++] = 0;
df3467db
IG
5199 {
5200 const char tmp = *s++;
5201 OPERATOR(tmp);
5202 }
378cc40b 5203 case '~':
0d863452 5204 if (s[1] == '~'
3e7dd34d 5205 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5206 {
5207 s += 2;
5208 Eop(OP_SMARTMATCH);
5209 }
378cc40b 5210 case ',':
90771dc0
NC
5211 {
5212 const char tmp = *s++;
5213 OPERATOR(tmp);
5214 }
a0d0e21e
LW
5215 case ':':
5216 if (s[1] == ':') {
5217 len = 0;
0bfa2a8a 5218 goto just_a_word_zero_gv;
a0d0e21e
LW
5219 }
5220 s++;
09bef843
SB
5221 switch (PL_expect) {
5222 OP *attrs;
5db06880
NC
5223#ifdef PERL_MAD
5224 I32 stuffstart;
5225#endif
09bef843
SB
5226 case XOPERATOR:
5227 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5228 break;
5229 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5230 if (*s == '=') {
5231 deprecate(":= for an empty attribute list");
5232 }
09bef843
SB
5233 goto grabattrs;
5234 case XATTRBLOCK:
5235 PL_expect = XBLOCK;
5236 goto grabattrs;
5237 case XATTRTERM:
5238 PL_expect = XTERMBLOCK;
5239 grabattrs:
5db06880
NC
5240#ifdef PERL_MAD
5241 stuffstart = s - SvPVX(PL_linestr) - 1;
5242#endif
29595ff2 5243 s = PEEKSPACE(s);
5f66b61c 5244 attrs = NULL;
7e2040f0 5245 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5246 I32 tmp;
5cc237b8 5247 SV *sv;
09bef843 5248 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5249 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5250 if (tmp < 0) tmp = -tmp;
5251 switch (tmp) {
5252 case KEY_or:
5253 case KEY_and:
5254 case KEY_for:
11baf631 5255 case KEY_foreach:
f9829d6b
GS
5256 case KEY_unless:
5257 case KEY_if:
5258 case KEY_while:
5259 case KEY_until:
5260 goto got_attrs;
5261 default:
5262 break;
5263 }
5264 }
5cc237b8 5265 sv = newSVpvn(s, len);
09bef843
SB
5266 if (*d == '(') {
5267 d = scan_str(d,TRUE,TRUE);
5268 if (!d) {
09bef843
SB
5269 /* MUST advance bufptr here to avoid bogus
5270 "at end of line" context messages from yyerror().
5271 */
5272 PL_bufptr = s + len;
5273 yyerror("Unterminated attribute parameter in attribute list");
5274 if (attrs)
5275 op_free(attrs);
5cc237b8 5276 sv_free(sv);
bbf60fe6 5277 return REPORT(0); /* EOF indicator */
09bef843
SB
5278 }
5279 }
5280 if (PL_lex_stuff) {
09bef843 5281 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5282 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5283 newSVOP(OP_CONST, 0, sv));
5284 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5285 PL_lex_stuff = NULL;
09bef843
SB
5286 }
5287 else {
5cc237b8
BS
5288 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5289 sv_free(sv);
1108974d 5290 if (PL_in_my == KEY_our) {
df9a6019 5291 deprecate(":unique");
1108974d 5292 }
bfed75c6 5293 else
371fce9b
DM
5294 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5295 }
5296
d3cea301
SB
5297 /* NOTE: any CV attrs applied here need to be part of
5298 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5299 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5300 sv_free(sv);
78f9721b 5301 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5302 }
5303 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5304 sv_free(sv);
8e5dadda 5305 deprecate(":locked");
5cc237b8
BS
5306 }
5307 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5308 sv_free(sv);
78f9721b 5309 CvMETHOD_on(PL_compcv);
5cc237b8 5310 }
78f9721b
SM
5311 /* After we've set the flags, it could be argued that
5312 we don't need to do the attributes.pm-based setting
5313 process, and shouldn't bother appending recognized
d3cea301
SB
5314 flags. To experiment with that, uncomment the
5315 following "else". (Note that's already been
5316 uncommented. That keeps the above-applied built-in
5317 attributes from being intercepted (and possibly
5318 rejected) by a package's attribute routines, but is
5319 justified by the performance win for the common case
5320 of applying only built-in attributes.) */
0256094b 5321 else
2fcb4757 5322 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5323 newSVOP(OP_CONST, 0,
5cc237b8 5324 sv));
09bef843 5325 }
29595ff2 5326 s = PEEKSPACE(d);
0120eecf 5327 if (*s == ':' && s[1] != ':')
29595ff2 5328 s = PEEKSPACE(s+1);
0120eecf
GS
5329 else if (s == d)
5330 break; /* require real whitespace or :'s */
29595ff2 5331 /* XXX losing whitespace on sequential attributes here */
09bef843 5332 }
90771dc0
NC
5333 {
5334 const char tmp
5335 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5336 if (*s != ';' && *s != '}' && *s != tmp
5337 && (tmp != '=' || *s != ')')) {
5338 const char q = ((*s == '\'') ? '"' : '\'');
5339 /* If here for an expression, and parsed no attrs, back
5340 off. */
5341 if (tmp == '=' && !attrs) {
5342 s = PL_bufptr;
5343 break;
5344 }
5345 /* MUST advance bufptr here to avoid bogus "at end of line"
5346 context messages from yyerror().
5347 */
5348 PL_bufptr = s;
10edeb5d
JH
5349 yyerror( (const char *)
5350 (*s
5351 ? Perl_form(aTHX_ "Invalid separator character "
5352 "%c%c%c in attribute list", q, *s, q)
5353 : "Unterminated attribute list" ) );
90771dc0
NC
5354 if (attrs)
5355 op_free(attrs);
5356 OPERATOR(':');
09bef843 5357 }
09bef843 5358 }
f9829d6b 5359 got_attrs:
09bef843 5360 if (attrs) {
cd81e915 5361 start_force(PL_curforce);
9ded7720 5362 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5363 CURMAD('_', PL_nextwhite);
89122651 5364 force_next(THING);
5db06880
NC
5365 }
5366#ifdef PERL_MAD
5367 if (PL_madskills) {
cd81e915 5368 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5369 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5370 }
5db06880 5371#endif
09bef843
SB
5372 TOKEN(COLONATTR);
5373 }
a0d0e21e 5374 OPERATOR(':');
8990e307
LW
5375 case '(':
5376 s++;
3280af22
NIS
5377 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5378 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5379 else
3280af22 5380 PL_expect = XTERM;
29595ff2 5381 s = SKIPSPACE1(s);
a0d0e21e 5382 TOKEN('(');
378cc40b 5383 case ';':
f4dd75d9 5384 CLINE;
90771dc0
NC
5385 {
5386 const char tmp = *s++;
5387 OPERATOR(tmp);
5388 }
378cc40b 5389 case ')':
90771dc0
NC
5390 {
5391 const char tmp = *s++;
29595ff2 5392 s = SKIPSPACE1(s);
90771dc0
NC
5393 if (*s == '{')
5394 PREBLOCK(tmp);
5395 TERM(tmp);
5396 }
79072805 5397 case ']':
a7aaec61
Z
5398 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5399 TOKEN(0);
79072805 5400 s++;
3280af22 5401 if (PL_lex_brackets <= 0)
d98d5fff 5402 yyerror("Unmatched right square bracket");
463ee0b2 5403 else
3280af22
NIS
5404 --PL_lex_brackets;
5405 if (PL_lex_state == LEX_INTERPNORMAL) {
5406 if (PL_lex_brackets == 0) {
02255c60
FC
5407 if (*s == '-' && s[1] == '>')
5408 PL_lex_state = LEX_INTERPENDMAYBE;
5409 else if (*s != '[' && *s != '{')
3280af22 5410 PL_lex_state = LEX_INTERPEND;
79072805
LW
5411 }
5412 }
4633a7c4 5413 TERM(']');
79072805
LW
5414 case '{':
5415 leftbracket:
79072805 5416 s++;
3280af22 5417 if (PL_lex_brackets > 100) {
8edd5f42 5418 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5419 }
3280af22 5420 switch (PL_expect) {
a0d0e21e 5421 case XTERM:
3280af22 5422 if (PL_lex_formbrack) {
a0d0e21e
LW
5423 s--;
5424 PRETERMBLOCK(DO);
5425 }
3280af22
NIS
5426 if (PL_oldoldbufptr == PL_last_lop)
5427 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5428 else
3280af22 5429 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5430 OPERATOR(HASHBRACK);
a0d0e21e 5431 case XOPERATOR:
bf4acbe4 5432 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5433 s++;
44a8e56a 5434 d = s;
3280af22
NIS
5435 PL_tokenbuf[0] = '\0';
5436 if (d < PL_bufend && *d == '-') {
5437 PL_tokenbuf[0] = '-';
44a8e56a 5438 d++;
bf4acbe4 5439 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5440 d++;
5441 }
7e2040f0 5442 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5443 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5444 FALSE, &len);
bf4acbe4 5445 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5446 d++;
5447 if (*d == '}') {
f54cb97a 5448 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5449 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5450 if (minus)
5451 force_next('-');
748a9306
LW
5452 }
5453 }
5454 /* FALL THROUGH */
09bef843 5455 case XATTRBLOCK:
748a9306 5456 case XBLOCK:
3280af22
NIS
5457 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5458 PL_expect = XSTATE;
a0d0e21e 5459 break;
09bef843 5460 case XATTRTERM:
a0d0e21e 5461 case XTERMBLOCK:
3280af22
NIS
5462 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5463 PL_expect = XSTATE;
a0d0e21e
LW
5464 break;
5465 default: {
f54cb97a 5466 const char *t;
3280af22
NIS
5467 if (PL_oldoldbufptr == PL_last_lop)
5468 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5469 else
3280af22 5470 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5471 s = SKIPSPACE1(s);
8452ff4b
SB
5472 if (*s == '}') {
5473 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5474 PL_expect = XTERM;
5475 /* This hack is to get the ${} in the message. */
5476 PL_bufptr = s+1;
5477 yyerror("syntax error");
5478 break;
5479 }
a0d0e21e 5480 OPERATOR(HASHBRACK);
8452ff4b 5481 }
b8a4b1be
GS
5482 /* This hack serves to disambiguate a pair of curlies
5483 * as being a block or an anon hash. Normally, expectation
5484 * determines that, but in cases where we're not in a
5485 * position to expect anything in particular (like inside
5486 * eval"") we have to resolve the ambiguity. This code
5487 * covers the case where the first term in the curlies is a
5488 * quoted string. Most other cases need to be explicitly
a0288114 5489 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5490 * curly in order to force resolution as an anon hash.
5491 *
5492 * XXX should probably propagate the outer expectation
5493 * into eval"" to rely less on this hack, but that could
5494 * potentially break current behavior of eval"".
5495 * GSAR 97-07-21
5496 */
5497 t = s;
5498 if (*s == '\'' || *s == '"' || *s == '`') {
5499 /* common case: get past first string, handling escapes */
3280af22 5500 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5501 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5502 t++;
5503 t++;
a0d0e21e 5504 }
b8a4b1be 5505 else if (*s == 'q') {
3280af22 5506 if (++t < PL_bufend
b8a4b1be 5507 && (!isALNUM(*t)
3280af22 5508 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5509 && !isALNUM(*t))))
5510 {
abc667d1 5511 /* skip q//-like construct */
f54cb97a 5512 const char *tmps;
b8a4b1be
GS
5513 char open, close, term;
5514 I32 brackets = 1;
5515
3280af22 5516 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5517 t++;
abc667d1
DM
5518 /* check for q => */
5519 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5520 OPERATOR(HASHBRACK);
5521 }
b8a4b1be
GS
5522 term = *t;
5523 open = term;
5524 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5525 term = tmps[5];
5526 close = term;
5527 if (open == close)
3280af22
NIS
5528 for (t++; t < PL_bufend; t++) {
5529 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5530 t++;
6d07e5e9 5531 else if (*t == open)
b8a4b1be
GS
5532 break;
5533 }
abc667d1 5534 else {
3280af22
NIS
5535 for (t++; t < PL_bufend; t++) {
5536 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5537 t++;
6d07e5e9 5538 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5539 break;
5540 else if (*t == open)
5541 brackets++;
5542 }
abc667d1
DM
5543 }
5544 t++;
b8a4b1be 5545 }
abc667d1
DM
5546 else
5547 /* skip plain q word */
5548 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5549 t += UTF8SKIP(t);
a0d0e21e 5550 }
7e2040f0 5551 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5552 t += UTF8SKIP(t);
7e2040f0 5553 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5554 t += UTF8SKIP(t);
a0d0e21e 5555 }
3280af22 5556 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5557 t++;
b8a4b1be
GS
5558 /* if comma follows first term, call it an anon hash */
5559 /* XXX it could be a comma expression with loop modifiers */
3280af22 5560 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5561 || (*t == '=' && t[1] == '>')))
a0d0e21e 5562 OPERATOR(HASHBRACK);
3280af22 5563 if (PL_expect == XREF)
4e4e412b 5564 PL_expect = XTERM;
a0d0e21e 5565 else {
3280af22
NIS
5566 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5567 PL_expect = XSTATE;
a0d0e21e 5568 }
8990e307 5569 }
a0d0e21e 5570 break;
463ee0b2 5571 }
6154021b 5572 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5573 if (isSPACE(*s) || *s == '#')
3280af22 5574 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5575 TOKEN('{');
378cc40b 5576 case '}':
a7aaec61
Z
5577 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5578 TOKEN(0);
79072805
LW
5579 rightbracket:
5580 s++;
3280af22 5581 if (PL_lex_brackets <= 0)
d98d5fff 5582 yyerror("Unmatched right curly bracket");
463ee0b2 5583 else
3280af22 5584 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5585 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5586 PL_lex_formbrack = 0;
5587 if (PL_lex_state == LEX_INTERPNORMAL) {
5588 if (PL_lex_brackets == 0) {
9059aa12
LW
5589 if (PL_expect & XFAKEBRACK) {
5590 PL_expect &= XENUMMASK;
3280af22
NIS
5591 PL_lex_state = LEX_INTERPEND;
5592 PL_bufptr = s;
5db06880
NC
5593#if 0
5594 if (PL_madskills) {
cd81e915 5595 if (!PL_thiswhite)
6b29d1f5 5596 PL_thiswhite = newSVpvs("");
76f68e9b 5597 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5598 }
5599#endif
cea2e8a9 5600 return yylex(); /* ignore fake brackets */
79072805 5601 }
fa83b5b6 5602 if (*s == '-' && s[1] == '>')
3280af22 5603 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5604 else if (*s != '[' && *s != '{')
3280af22 5605 PL_lex_state = LEX_INTERPEND;
79072805
LW
5606 }
5607 }
9059aa12
LW
5608 if (PL_expect & XFAKEBRACK) {
5609 PL_expect &= XENUMMASK;
3280af22 5610 PL_bufptr = s;
cea2e8a9 5611 return yylex(); /* ignore fake brackets */
748a9306 5612 }
cd81e915 5613 start_force(PL_curforce);
5db06880
NC
5614 if (PL_madskills) {
5615 curmad('X', newSVpvn(s-1,1));
cd81e915 5616 CURMAD('_', PL_thiswhite);
5db06880 5617 }
79072805 5618 force_next('}');
5db06880 5619#ifdef PERL_MAD
cd81e915 5620 if (!PL_thistoken)
6b29d1f5 5621 PL_thistoken = newSVpvs("");
5db06880 5622#endif
79072805 5623 TOKEN(';');
378cc40b
LW
5624 case '&':
5625 s++;
90771dc0 5626 if (*s++ == '&')
a0d0e21e 5627 AOPERATOR(ANDAND);
378cc40b 5628 s--;
3280af22 5629 if (PL_expect == XOPERATOR) {
041457d9
DM
5630 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5631 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5632 {
57843af0 5633 CopLINE_dec(PL_curcop);
f1f66076 5634 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5635 CopLINE_inc(PL_curcop);
463ee0b2 5636 }
79072805 5637 BAop(OP_BIT_AND);
463ee0b2 5638 }
79072805 5639
3280af22
NIS
5640 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5641 if (*PL_tokenbuf) {
5642 PL_expect = XOPERATOR;
5643 force_ident(PL_tokenbuf, '&');
463ee0b2 5644 }
79072805
LW
5645 else
5646 PREREF('&');
6154021b 5647 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5648 TERM('&');
5649
378cc40b
LW
5650 case '|':
5651 s++;
90771dc0 5652 if (*s++ == '|')
a0d0e21e 5653 AOPERATOR(OROR);
378cc40b 5654 s--;
79072805 5655 BOop(OP_BIT_OR);
378cc40b
LW
5656 case '=':
5657 s++;
748a9306 5658 {
90771dc0
NC
5659 const char tmp = *s++;
5660 if (tmp == '=')
5661 Eop(OP_EQ);
5662 if (tmp == '>')
5663 OPERATOR(',');
5664 if (tmp == '~')
5665 PMop(OP_MATCH);
5666 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5667 && strchr("+-*/%.^&|<",tmp))
5668 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5669 "Reversed %c= operator",(int)tmp);
5670 s--;
5671 if (PL_expect == XSTATE && isALPHA(tmp) &&
5672 (s == PL_linestart+1 || s[-2] == '\n') )
5673 {
5674 if (PL_in_eval && !PL_rsfp) {
5675 d = PL_bufend;
5676 while (s < d) {
5677 if (*s++ == '\n') {
5678 incline(s);
5679 if (strnEQ(s,"=cut",4)) {
5680 s = strchr(s,'\n');
5681 if (s)
5682 s++;
5683 else
5684 s = d;
5685 incline(s);
5686 goto retry;
5687 }
5688 }
a5f75d66 5689 }
90771dc0 5690 goto retry;
a5f75d66 5691 }
5db06880
NC
5692#ifdef PERL_MAD
5693 if (PL_madskills) {
cd81e915 5694 if (!PL_thiswhite)
6b29d1f5 5695 PL_thiswhite = newSVpvs("");
cd81e915 5696 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5697 PL_bufend - PL_linestart);
5698 }
5699#endif
90771dc0 5700 s = PL_bufend;
737c24fc 5701 PL_parser->in_pod = 1;
90771dc0 5702 goto retry;
a5f75d66 5703 }
a0d0e21e 5704 }
3280af22 5705 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5706 const char *t = s;
51882d45 5707#ifdef PERL_STRICT_CR
c35e046a 5708 while (SPACE_OR_TAB(*t))
51882d45 5709#else
c35e046a 5710 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5711#endif
c35e046a 5712 t++;
a0d0e21e
LW
5713 if (*t == '\n' || *t == '#') {
5714 s--;
3280af22 5715 PL_expect = XBLOCK;
a0d0e21e
LW
5716 goto leftbracket;
5717 }
79072805 5718 }
6154021b 5719 pl_yylval.ival = 0;
a0d0e21e 5720 OPERATOR(ASSIGNOP);
378cc40b
LW
5721 case '!':
5722 s++;
90771dc0
NC
5723 {
5724 const char tmp = *s++;
5725 if (tmp == '=') {
5726 /* was this !=~ where !~ was meant?
5727 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5728
5729 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5730 const char *t = s+1;
5731
5732 while (t < PL_bufend && isSPACE(*t))
5733 ++t;
5734
5735 if (*t == '/' || *t == '?' ||
5736 ((*t == 'm' || *t == 's' || *t == 'y')
5737 && !isALNUM(t[1])) ||
5738 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5739 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5740 "!=~ should be !~");
5741 }
5742 Eop(OP_NE);
5743 }
5744 if (tmp == '~')
5745 PMop(OP_NOT);
5746 }
378cc40b
LW
5747 s--;
5748 OPERATOR('!');
5749 case '<':
3280af22 5750 if (PL_expect != XOPERATOR) {
93a17b20 5751 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5752 check_uni();
79072805
LW
5753 if (s[1] == '<')
5754 s = scan_heredoc(s);
5755 else
5756 s = scan_inputsymbol(s);
5757 TERM(sublex_start());
378cc40b
LW
5758 }
5759 s++;
90771dc0
NC
5760 {
5761 char tmp = *s++;
5762 if (tmp == '<')
5763 SHop(OP_LEFT_SHIFT);
5764 if (tmp == '=') {
5765 tmp = *s++;
5766 if (tmp == '>')
5767 Eop(OP_NCMP);
5768 s--;
5769 Rop(OP_LE);
5770 }
395c3793 5771 }
378cc40b 5772 s--;
79072805 5773 Rop(OP_LT);
378cc40b
LW
5774 case '>':
5775 s++;
90771dc0
NC
5776 {
5777 const char tmp = *s++;
5778 if (tmp == '>')
5779 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5780 else if (tmp == '=')
90771dc0
NC
5781 Rop(OP_GE);
5782 }
378cc40b 5783 s--;
79072805 5784 Rop(OP_GT);
378cc40b
LW
5785
5786 case '$':
bbce6d69 5787 CLINE;
5788
3280af22
NIS
5789 if (PL_expect == XOPERATOR) {
5790 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5791 return deprecate_commaless_var_list();
a0d0e21e 5792 }
8990e307 5793 }
a0d0e21e 5794
c0b977fd 5795 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5796 PL_tokenbuf[0] = '@';
376b8730
SM
5797 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5798 sizeof PL_tokenbuf - 1, FALSE);
5799 if (PL_expect == XOPERATOR)
5800 no_op("Array length", s);
3280af22 5801 if (!PL_tokenbuf[1])
a0d0e21e 5802 PREREF(DOLSHARP);
3280af22
NIS
5803 PL_expect = XOPERATOR;
5804 PL_pending_ident = '#';
463ee0b2 5805 TOKEN(DOLSHARP);
79072805 5806 }
bbce6d69 5807
3280af22 5808 PL_tokenbuf[0] = '$';
376b8730
SM
5809 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5810 sizeof PL_tokenbuf - 1, FALSE);
5811 if (PL_expect == XOPERATOR)
5812 no_op("Scalar", s);
3280af22
NIS
5813 if (!PL_tokenbuf[1]) {
5814 if (s == PL_bufend)
bbce6d69 5815 yyerror("Final $ should be \\$ or $name");
5816 PREREF('$');
8990e307 5817 }
a0d0e21e 5818
bbce6d69 5819 /* This kludge not intended to be bulletproof. */
3280af22 5820 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5821 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5822 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5823 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5824 TERM(THING);
5825 }
5826
ff68c719 5827 d = s;
90771dc0
NC
5828 {
5829 const char tmp = *s;
ae28bb2a 5830 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5831 s = SKIPSPACE1(s);
ff68c719 5832
90771dc0
NC
5833 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5834 && intuit_more(s)) {
5835 if (*s == '[') {
5836 PL_tokenbuf[0] = '@';
5837 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5838 char *t = s+1;
5839
5840 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5841 t++;
90771dc0 5842 if (*t++ == ',') {
29595ff2 5843 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5844 while (t < PL_bufend && *t != ']')
5845 t++;
9014280d 5846 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5847 "Multidimensional syntax %.*s not supported",
36c7798d 5848 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5849 }
748a9306 5850 }
93a17b20 5851 }
90771dc0
NC
5852 else if (*s == '{') {
5853 char *t;
5854 PL_tokenbuf[0] = '%';
5855 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5856 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5857 {
5858 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5859 do {
5860 t++;
5861 } while (isSPACE(*t));
90771dc0 5862 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5863 STRLEN len;
90771dc0 5864 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5865 &len);
c35e046a
AL
5866 while (isSPACE(*t))
5867 t++;
780a5241 5868 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5869 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5870 "You need to quote \"%s\"",
5871 tmpbuf);
5872 }
5873 }
5874 }
93a17b20 5875 }
bbce6d69 5876
90771dc0
NC
5877 PL_expect = XOPERATOR;
5878 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5879 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5880 if (!islop || PL_last_lop_op == OP_GREPSTART)
5881 PL_expect = XOPERATOR;
5882 else if (strchr("$@\"'`q", *s))
5883 PL_expect = XTERM; /* e.g. print $fh "foo" */
5884 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5885 PL_expect = XTERM; /* e.g. print $fh &sub */
5886 else if (isIDFIRST_lazy_if(s,UTF)) {
5887 char tmpbuf[sizeof PL_tokenbuf];
5888 int t2;
5889 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5890 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5891 /* binary operators exclude handle interpretations */
5892 switch (t2) {
5893 case -KEY_x:
5894 case -KEY_eq:
5895 case -KEY_ne:
5896 case -KEY_gt:
5897 case -KEY_lt:
5898 case -KEY_ge:
5899 case -KEY_le:
5900 case -KEY_cmp:
5901 break;
5902 default:
5903 PL_expect = XTERM; /* e.g. print $fh length() */
5904 break;
5905 }
5906 }
5907 else {
5908 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5909 }
5910 }
90771dc0
NC
5911 else if (isDIGIT(*s))
5912 PL_expect = XTERM; /* e.g. print $fh 3 */
5913 else if (*s == '.' && isDIGIT(s[1]))
5914 PL_expect = XTERM; /* e.g. print $fh .3 */
5915 else if ((*s == '?' || *s == '-' || *s == '+')
5916 && !isSPACE(s[1]) && s[1] != '=')
5917 PL_expect = XTERM; /* e.g. print $fh -1 */
5918 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5919 && s[1] != '/')
5920 PL_expect = XTERM; /* e.g. print $fh /.../
5921 XXX except DORDOR operator
5922 */
5923 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5924 && s[2] != '=')
5925 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5926 }
bbce6d69 5927 }
3280af22 5928 PL_pending_ident = '$';
79072805 5929 TOKEN('$');
378cc40b
LW
5930
5931 case '@':
3280af22 5932 if (PL_expect == XOPERATOR)
bbce6d69 5933 no_op("Array", s);
3280af22
NIS
5934 PL_tokenbuf[0] = '@';
5935 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5936 if (!PL_tokenbuf[1]) {
bbce6d69 5937 PREREF('@');
5938 }
3280af22 5939 if (PL_lex_state == LEX_NORMAL)
29595ff2 5940 s = SKIPSPACE1(s);
3280af22 5941 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5942 if (*s == '{')
3280af22 5943 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5944
5945 /* Warn about @ where they meant $. */
041457d9
DM
5946 if (*s == '[' || *s == '{') {
5947 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5948 const char *t = s + 1;
7e2040f0 5949 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5950 t++;
5951 if (*t == '}' || *t == ']') {
5952 t++;
29595ff2 5953 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5954 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5955 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5956 (int)(t-PL_bufptr), PL_bufptr,
5957 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5958 }
93a17b20
LW
5959 }
5960 }
463ee0b2 5961 }
3280af22 5962 PL_pending_ident = '@';
79072805 5963 TERM('@');
378cc40b 5964
c963b151 5965 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5966 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5967 s += 2;
5968 AOPERATOR(DORDOR);
5969 }
c963b151 5970 case '?': /* may either be conditional or pattern */
be25f609 5971 if (PL_expect == XOPERATOR) {
90771dc0 5972 char tmp = *s++;
c963b151 5973 if(tmp == '?') {
be25f609 5974 OPERATOR('?');
c963b151
BD
5975 }
5976 else {
5977 tmp = *s++;
5978 if(tmp == '/') {
5979 /* A // operator. */
5980 AOPERATOR(DORDOR);
5981 }
5982 else {
5983 s--;
5984 Mop(OP_DIVIDE);
5985 }
5986 }
5987 }
5988 else {
5989 /* Disable warning on "study /blah/" */
5990 if (PL_oldoldbufptr == PL_last_uni
5991 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5992 || memNE(PL_last_uni, "study", 5)
5993 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5994 ))
5995 check_uni();
725a61d7
Z
5996 if (*s == '?')
5997 deprecate("?PATTERN? without explicit operator");
c963b151
BD
5998 s = scan_pat(s,OP_MATCH);
5999 TERM(sublex_start());
6000 }
378cc40b
LW
6001
6002 case '.':
51882d45
GS
6003 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6004#ifdef PERL_STRICT_CR
6005 && s[1] == '\n'
6006#else
6007 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6008#endif
6009 && (s == PL_linestart || s[-1] == '\n') )
6010 {
3280af22
NIS
6011 PL_lex_formbrack = 0;
6012 PL_expect = XSTATE;
79072805
LW
6013 goto rightbracket;
6014 }
be25f609 6015 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6016 s += 3;
6017 OPERATOR(YADAYADA);
6018 }
3280af22 6019 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6020 char tmp = *s++;
a687059c
LW
6021 if (*s == tmp) {
6022 s++;
2f3197b3
LW
6023 if (*s == tmp) {
6024 s++;
6154021b 6025 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6026 }
6027 else
6154021b 6028 pl_yylval.ival = 0;
378cc40b 6029 OPERATOR(DOTDOT);
a687059c 6030 }
79072805 6031 Aop(OP_CONCAT);
378cc40b
LW
6032 }
6033 /* FALL THROUGH */
6034 case '0': case '1': case '2': case '3': case '4':
6035 case '5': case '6': case '7': case '8': case '9':
6154021b 6036 s = scan_num(s, &pl_yylval);
931e0695 6037 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6038 if (PL_expect == XOPERATOR)
8990e307 6039 no_op("Number",s);
79072805
LW
6040 TERM(THING);
6041
6042 case '\'':
5db06880 6043 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6044 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6045 if (PL_expect == XOPERATOR) {
6046 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6047 return deprecate_commaless_var_list();
a0d0e21e 6048 }
463ee0b2 6049 else
8990e307 6050 no_op("String",s);
463ee0b2 6051 }
79072805 6052 if (!s)
d4c19fe8 6053 missingterm(NULL);
6154021b 6054 pl_yylval.ival = OP_CONST;
79072805
LW
6055 TERM(sublex_start());
6056
6057 case '"':
5db06880 6058 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6059 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6060 if (PL_expect == XOPERATOR) {
6061 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6062 return deprecate_commaless_var_list();
a0d0e21e 6063 }
463ee0b2 6064 else
8990e307 6065 no_op("String",s);
463ee0b2 6066 }
79072805 6067 if (!s)
d4c19fe8 6068 missingterm(NULL);
6154021b 6069 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6070 /* FIXME. I think that this can be const if char *d is replaced by
6071 more localised variables. */
3280af22 6072 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6073 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6074 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6075 break;
6076 }
6077 }
79072805
LW
6078 TERM(sublex_start());
6079
6080 case '`':
5db06880 6081 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6082 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6083 if (PL_expect == XOPERATOR)
8990e307 6084 no_op("Backticks",s);
79072805 6085 if (!s)
d4c19fe8 6086 missingterm(NULL);
9b201d7d 6087 readpipe_override();
79072805
LW
6088 TERM(sublex_start());
6089
6090 case '\\':
6091 s++;
a2a5de95
NC
6092 if (PL_lex_inwhat && isDIGIT(*s))
6093 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6094 *s, *s);
3280af22 6095 if (PL_expect == XOPERATOR)
8990e307 6096 no_op("Backslash",s);
79072805
LW
6097 OPERATOR(REFGEN);
6098
a7cb1f99 6099 case 'v':
e526c9e6 6100 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6101 char *start = s + 2;
dd629d5b 6102 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6103 start++;
6104 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6105 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6106 TERM(THING);
6107 }
e526c9e6 6108 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6109 else if (!isALPHA(*start) && (PL_expect == XTERM
6110 || PL_expect == XREF || PL_expect == XSTATE
6111 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6112 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6113 if (!gv) {
6154021b 6114 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6115 TERM(THING);
6116 }
6117 }
a7cb1f99
GS
6118 }
6119 goto keylookup;
79072805 6120 case 'x':
3280af22 6121 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6122 s++;
6123 Mop(OP_REPEAT);
2f3197b3 6124 }
79072805
LW
6125 goto keylookup;
6126
378cc40b 6127 case '_':
79072805
LW
6128 case 'a': case 'A':
6129 case 'b': case 'B':
6130 case 'c': case 'C':
6131 case 'd': case 'D':
6132 case 'e': case 'E':
6133 case 'f': case 'F':
6134 case 'g': case 'G':
6135 case 'h': case 'H':
6136 case 'i': case 'I':
6137 case 'j': case 'J':
6138 case 'k': case 'K':
6139 case 'l': case 'L':
6140 case 'm': case 'M':
6141 case 'n': case 'N':
6142 case 'o': case 'O':
6143 case 'p': case 'P':
6144 case 'q': case 'Q':
6145 case 'r': case 'R':
6146 case 's': case 'S':
6147 case 't': case 'T':
6148 case 'u': case 'U':
a7cb1f99 6149 case 'V':
79072805
LW
6150 case 'w': case 'W':
6151 case 'X':
6152 case 'y': case 'Y':
6153 case 'z': case 'Z':
6154
49dc05e3 6155 keylookup: {
88e1f1a2 6156 bool anydelim;
90771dc0 6157 I32 tmp;
10edeb5d
JH
6158
6159 orig_keyword = 0;
6160 gv = NULL;
6161 gvp = NULL;
49dc05e3 6162
3280af22
NIS
6163 PL_bufptr = s;
6164 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6165
6166 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6167 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6168
6169 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6170 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6171 goto just_a_word;
6172
3643fb5f 6173 d = s;
3280af22 6174 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6175 d++; /* no comments skipped here, or s### is misparsed */
6176
748a9306 6177 /* Is this a word before a => operator? */
1c3923b3 6178 if (*d == '=' && d[1] == '>') {
748a9306 6179 CLINE;
6154021b 6180 pl_yylval.opval
d0a148a6
NC
6181 = (OP*)newSVOP(OP_CONST, 0,
6182 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6183 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6184 TERM(WORD);
6185 }
6186
88e1f1a2
JV
6187 /* Check for plugged-in keyword */
6188 {
6189 OP *o;
6190 int result;
6191 char *saved_bufptr = PL_bufptr;
6192 PL_bufptr = s;
16c91539 6193 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6194 s = PL_bufptr;
6195 if (result == KEYWORD_PLUGIN_DECLINE) {
6196 /* not a plugged-in keyword */
6197 PL_bufptr = saved_bufptr;
6198 } else if (result == KEYWORD_PLUGIN_STMT) {
6199 pl_yylval.opval = o;
6200 CLINE;
6201 PL_expect = XSTATE;
6202 return REPORT(PLUGSTMT);
6203 } else if (result == KEYWORD_PLUGIN_EXPR) {
6204 pl_yylval.opval = o;
6205 CLINE;
6206 PL_expect = XOPERATOR;
6207 return REPORT(PLUGEXPR);
6208 } else {
6209 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6210 PL_tokenbuf);
6211 }
6212 }
6213
6214 /* Check for built-in keyword */
6215 tmp = keyword(PL_tokenbuf, len, 0);
6216
6217 /* Is this a label? */
6218 if (!anydelim && PL_expect == XSTATE
6219 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6220 s = d + 1;
6221 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6222 CLINE;
6223 TOKEN(LABEL);
6224 }
6225
a0d0e21e 6226 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6227 GV *ogv = NULL; /* override (winner) */
6228 GV *hgv = NULL; /* hidden (loser) */
3280af22 6229 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6230 CV *cv;
90e5519e 6231 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6232 (cv = GvCVu(gv)))
6233 {
6234 if (GvIMPORTED_CV(gv))
6235 ogv = gv;
6236 else if (! CvMETHOD(cv))
6237 hgv = gv;
6238 }
6239 if (!ogv &&
3280af22 6240 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6241 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6242 GvCVu(gv) && GvIMPORTED_CV(gv))
6243 {
6244 ogv = gv;
6245 }
6246 }
6247 if (ogv) {
30fe34ed 6248 orig_keyword = tmp;
56f7f34b 6249 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6250 }
6251 else if (gv && !gvp
6252 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6253 && GvCVu(gv))
6e7b2336
GS
6254 {
6255 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6256 }
56f7f34b
CS
6257 else { /* no override */
6258 tmp = -tmp;
a2a5de95
NC
6259 if (tmp == KEY_dump) {
6260 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6261 "dump() better written as CORE::dump()");
ac206dc8 6262 }
a0714e2c 6263 gv = NULL;
56f7f34b 6264 gvp = 0;
a2a5de95
NC
6265 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6266 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6267 "Ambiguous call resolved as CORE::%s(), "
6268 "qualify as such or use &",
6269 GvENAME(hgv));
49dc05e3 6270 }
a0d0e21e
LW
6271 }
6272
6273 reserved_word:
6274 switch (tmp) {
79072805
LW
6275
6276 default: /* not a keyword */
0bfa2a8a
NC
6277 /* Trade off - by using this evil construction we can pull the
6278 variable gv into the block labelled keylookup. If not, then
6279 we have to give it function scope so that the goto from the
6280 earlier ':' case doesn't bypass the initialisation. */
6281 if (0) {
6282 just_a_word_zero_gv:
6283 gv = NULL;
6284 gvp = NULL;
8bee0991 6285 orig_keyword = 0;
0bfa2a8a 6286 }
93a17b20 6287 just_a_word: {
96e4d5b1 6288 SV *sv;
ce29ac45 6289 int pkgname = 0;
f54cb97a 6290 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6291 OP *rv2cv_op;
5069cc75 6292 CV *cv;
5db06880 6293#ifdef PERL_MAD
cd81e915 6294 SV *nextPL_nextwhite = 0;
5db06880
NC
6295#endif
6296
8990e307
LW
6297
6298 /* Get the rest if it looks like a package qualifier */
6299
155aba94 6300 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6301 STRLEN morelen;
3280af22 6302 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6303 TRUE, &morelen);
6304 if (!morelen)
cea2e8a9 6305 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6306 *s == '\'' ? "'" : "::");
c3e0f903 6307 len += morelen;
ce29ac45 6308 pkgname = 1;
a0d0e21e 6309 }
8990e307 6310
3280af22
NIS
6311 if (PL_expect == XOPERATOR) {
6312 if (PL_bufptr == PL_linestart) {
57843af0 6313 CopLINE_dec(PL_curcop);
f1f66076 6314 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6315 CopLINE_inc(PL_curcop);
463ee0b2
LW
6316 }
6317 else
54310121 6318 no_op("Bareword",s);
463ee0b2 6319 }
8990e307 6320
c3e0f903
GS
6321 /* Look for a subroutine with this name in current package,
6322 unless name is "Foo::", in which case Foo is a bearword
6323 (and a package name). */
6324
5db06880 6325 if (len > 2 && !PL_madskills &&
3280af22 6326 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6327 {
f776e3cd 6328 if (ckWARN(WARN_BAREWORD)
90e5519e 6329 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6330 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6331 "Bareword \"%s\" refers to nonexistent package",
3280af22 6332 PL_tokenbuf);
c3e0f903 6333 len -= 2;
3280af22 6334 PL_tokenbuf[len] = '\0';
a0714e2c 6335 gv = NULL;
c3e0f903
GS
6336 gvp = 0;
6337 }
6338 else {
62d55b22
NC
6339 if (!gv) {
6340 /* Mustn't actually add anything to a symbol table.
6341 But also don't want to "initialise" any placeholder
6342 constants that might already be there into full
6343 blown PVGVs with attached PVCV. */
90e5519e
NC
6344 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6345 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6346 }
b3d904f3 6347 len = 0;
c3e0f903
GS
6348 }
6349
6350 /* if we saw a global override before, get the right name */
8990e307 6351
37bb7629
EB
6352 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6353 len ? len : strlen(PL_tokenbuf));
49dc05e3 6354 if (gvp) {
37bb7629 6355 SV * const tmp_sv = sv;
396482e1 6356 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6357 sv_catsv(sv, tmp_sv);
6358 SvREFCNT_dec(tmp_sv);
8a7a129d 6359 }
37bb7629 6360
5db06880 6361#ifdef PERL_MAD
cd81e915
NC
6362 if (PL_madskills && !PL_thistoken) {
6363 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6364 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6365 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6366 }
6367#endif
8990e307 6368
a0d0e21e 6369 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6370 CLINE;
6154021b
RGS
6371 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6372 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6373
c3e0f903 6374 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6375 if (len)
6376 goto safe_bareword;
6377
f7461760
Z
6378 {
6379 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6380 const_op->op_private = OPpCONST_BARE;
6381 rv2cv_op = newCVREF(0, const_op);
6382 }
d9088386 6383 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6384
8990e307
LW
6385 /* See if it's the indirect object for a list operator. */
6386
3280af22
NIS
6387 if (PL_oldoldbufptr &&
6388 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6389 (PL_oldoldbufptr == PL_last_lop
6390 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6391 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6392 (PL_expect == XREF ||
6393 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6394 {
748a9306
LW
6395 bool immediate_paren = *s == '(';
6396
a0d0e21e 6397 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6398 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6399#ifdef PERL_MAD
cd81e915 6400 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6401#endif
a0d0e21e
LW
6402
6403 /* Two barewords in a row may indicate method call. */
6404
62d55b22 6405 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6406 (tmp = intuit_method(s, gv, cv))) {
6407 op_free(rv2cv_op);
bbf60fe6 6408 return REPORT(tmp);
f7461760 6409 }
a0d0e21e
LW
6410
6411 /* If not a declared subroutine, it's an indirect object. */
6412 /* (But it's an indir obj regardless for sort.) */
7294df96 6413 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6414
7294df96
RGS
6415 if (
6416 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6417 (!cv &&
a9ef352a 6418 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6419 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6420 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6421 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6422 )
a9ef352a 6423 {
3280af22 6424 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6425 goto bareword;
93a17b20
LW
6426 }
6427 }
8990e307 6428
3280af22 6429 PL_expect = XOPERATOR;
5db06880
NC
6430#ifdef PERL_MAD
6431 if (isSPACE(*s))
cd81e915
NC
6432 s = SKIPSPACE2(s,nextPL_nextwhite);
6433 PL_nextwhite = nextPL_nextwhite;
5db06880 6434#else
8990e307 6435 s = skipspace(s);
5db06880 6436#endif
1c3923b3
GS
6437
6438 /* Is this a word before a => operator? */
ce29ac45 6439 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6440 op_free(rv2cv_op);
1c3923b3 6441 CLINE;
6154021b 6442 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6443 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6444 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6445 TERM(WORD);
6446 }
6447
6448 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6449 if (*s == '(') {
79072805 6450 CLINE;
5069cc75 6451 if (cv) {
c35e046a
AL
6452 d = s + 1;
6453 while (SPACE_OR_TAB(*d))
6454 d++;
f7461760 6455 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6456 s = d + 1;
c631f32b 6457 goto its_constant;
96e4d5b1 6458 }
6459 }
5db06880
NC
6460#ifdef PERL_MAD
6461 if (PL_madskills) {
cd81e915
NC
6462 PL_nextwhite = PL_thiswhite;
6463 PL_thiswhite = 0;
5db06880 6464 }
cd81e915 6465 start_force(PL_curforce);
5db06880 6466#endif
6154021b 6467 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6468 PL_expect = XOPERATOR;
5db06880
NC
6469#ifdef PERL_MAD
6470 if (PL_madskills) {
cd81e915
NC
6471 PL_nextwhite = nextPL_nextwhite;
6472 curmad('X', PL_thistoken);
6b29d1f5 6473 PL_thistoken = newSVpvs("");
5db06880
NC
6474 }
6475#endif
f7461760 6476 op_free(rv2cv_op);
93a17b20 6477 force_next(WORD);
6154021b 6478 pl_yylval.ival = 0;
463ee0b2 6479 TOKEN('&');
79072805 6480 }
93a17b20 6481
a0d0e21e 6482 /* If followed by var or block, call it a method (unless sub) */
8990e307 6483
f7461760
Z
6484 if ((*s == '$' || *s == '{') && !cv) {
6485 op_free(rv2cv_op);
3280af22
NIS
6486 PL_last_lop = PL_oldbufptr;
6487 PL_last_lop_op = OP_METHOD;
93a17b20 6488 PREBLOCK(METHOD);
463ee0b2
LW
6489 }
6490
8990e307
LW
6491 /* If followed by a bareword, see if it looks like indir obj. */
6492
30fe34ed
RGS
6493 if (!orig_keyword
6494 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6495 && (tmp = intuit_method(s, gv, cv))) {
6496 op_free(rv2cv_op);
bbf60fe6 6497 return REPORT(tmp);
f7461760 6498 }
93a17b20 6499
8990e307
LW
6500 /* Not a method, so call it a subroutine (if defined) */
6501
5069cc75 6502 if (cv) {
9b387841
NC
6503 if (lastchar == '-')
6504 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6505 "Ambiguous use of -%s resolved as -&%s()",
6506 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6507 /* Check for a constant sub */
f7461760 6508 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6509 its_constant:
f7461760 6510 op_free(rv2cv_op);
6154021b
RGS
6511 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6512 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6513 pl_yylval.opval->op_private = 0;
96e4d5b1 6514 TOKEN(WORD);
89bfa8cd 6515 }
6516
6154021b 6517 op_free(pl_yylval.opval);
f7461760 6518 pl_yylval.opval = rv2cv_op;
6154021b 6519 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6520 PL_last_lop = PL_oldbufptr;
bf848113 6521 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6522 /* Is there a prototype? */
5db06880
NC
6523 if (
6524#ifdef PERL_MAD
6525 cv &&
6526#endif
d9f2850e
RGS
6527 SvPOK(cv))
6528 {
5f66b61c 6529 STRLEN protolen;
daba3364 6530 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6531 if (!protolen)
4633a7c4 6532 TERM(FUNC0SUB);
0f5d0394
AE
6533 while (*proto == ';')
6534 proto++;
649d02de
FC
6535 if (
6536 (
6537 (
6538 *proto == '$' || *proto == '_'
c035a075 6539 || *proto == '*' || *proto == '+'
649d02de
FC
6540 )
6541 && proto[1] == '\0'
6542 )
6543 || (
6544 *proto == '\\' && proto[1] && proto[2] == '\0'
6545 )
6546 )
6547 OPERATOR(UNIOPSUB);
6548 if (*proto == '\\' && proto[1] == '[') {
6549 const char *p = proto + 2;
6550 while(*p && *p != ']')
6551 ++p;
6552 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6553 }
7a52d87a 6554 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6555 if (PL_curstash)
6556 sv_setpvs(PL_subname, "__ANON__");
6557 else
6558 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6559 PREBLOCK(LSTOPSUB);
6560 }
a9ef352a 6561 }
5db06880
NC
6562#ifdef PERL_MAD
6563 {
6564 if (PL_madskills) {
cd81e915
NC
6565 PL_nextwhite = PL_thiswhite;
6566 PL_thiswhite = 0;
5db06880 6567 }
cd81e915 6568 start_force(PL_curforce);
6154021b 6569 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6570 PL_expect = XTERM;
6571 if (PL_madskills) {
cd81e915
NC
6572 PL_nextwhite = nextPL_nextwhite;
6573 curmad('X', PL_thistoken);
6b29d1f5 6574 PL_thistoken = newSVpvs("");
5db06880
NC
6575 }
6576 force_next(WORD);
6577 TOKEN(NOAMP);
6578 }
6579 }
6580
6581 /* Guess harder when madskills require "best effort". */
6582 if (PL_madskills && (!gv || !GvCVu(gv))) {
6583 int probable_sub = 0;
6584 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6585 probable_sub = 1;
6586 else if (isALPHA(*s)) {
6587 char tmpbuf[1024];
6588 STRLEN tmplen;
6589 d = s;
6590 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6591 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6592 probable_sub = 1;
6593 else {
6594 while (d < PL_bufend && isSPACE(*d))
6595 d++;
6596 if (*d == '=' && d[1] == '>')
6597 probable_sub = 1;
6598 }
6599 }
6600 if (probable_sub) {
7a6d04f4 6601 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6602 op_free(pl_yylval.opval);
f7461760 6603 pl_yylval.opval = rv2cv_op;
6154021b 6604 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6605 PL_last_lop = PL_oldbufptr;
6606 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6607 PL_nextwhite = PL_thiswhite;
6608 PL_thiswhite = 0;
6609 start_force(PL_curforce);
6154021b 6610 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6611 PL_expect = XTERM;
cd81e915
NC
6612 PL_nextwhite = nextPL_nextwhite;
6613 curmad('X', PL_thistoken);
6b29d1f5 6614 PL_thistoken = newSVpvs("");
5db06880
NC
6615 force_next(WORD);
6616 TOKEN(NOAMP);
6617 }
6618#else
6154021b 6619 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6620 PL_expect = XTERM;
8990e307
LW
6621 force_next(WORD);
6622 TOKEN(NOAMP);
5db06880 6623#endif
8990e307 6624 }
748a9306 6625
8990e307
LW
6626 /* Call it a bare word */
6627
5603f27d 6628 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6629 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6630 else {
9a073a1d
RGS
6631 bareword:
6632 /* after "print" and similar functions (corresponding to
6633 * "F? L" in opcode.pl), whatever wasn't already parsed as
6634 * a filehandle should be subject to "strict subs".
6635 * Likewise for the optional indirect-object argument to system
6636 * or exec, which can't be a bareword */
6637 if ((PL_last_lop_op == OP_PRINT
6638 || PL_last_lop_op == OP_PRTF
6639 || PL_last_lop_op == OP_SAY
6640 || PL_last_lop_op == OP_SYSTEM
6641 || PL_last_lop_op == OP_EXEC)
6642 && (PL_hints & HINT_STRICT_SUBS))
6643 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6644 if (lastchar != '-') {
6645 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6646 d = PL_tokenbuf;
6647 while (isLOWER(*d))
6648 d++;
da51bb9b 6649 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6650 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6651 PL_tokenbuf);
6652 }
748a9306
LW
6653 }
6654 }
f7461760 6655 op_free(rv2cv_op);
c3e0f903
GS
6656
6657 safe_bareword:
9b387841
NC
6658 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6659 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6660 "Operator or semicolon missing before %c%s",
6661 lastchar, PL_tokenbuf);
6662 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6663 "Ambiguous use of %c resolved as operator %c",
6664 lastchar, lastchar);
748a9306 6665 }
93a17b20 6666 TOKEN(WORD);
79072805 6667 }
79072805 6668
68dc0745 6669 case KEY___FILE__:
6154021b 6670 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6671 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6672 TERM(THING);
6673
79072805 6674 case KEY___LINE__:
6154021b 6675 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6676 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6677 TERM(THING);
68dc0745 6678
6679 case KEY___PACKAGE__:
6154021b 6680 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6681 (PL_curstash
5aaec2b4 6682 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6683 : &PL_sv_undef));
79072805 6684 TERM(THING);
79072805 6685
e50aee73 6686 case KEY___DATA__:
79072805
LW
6687 case KEY___END__: {
6688 GV *gv;
3280af22 6689 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6690 const char *pname = "main";
3280af22 6691 if (PL_tokenbuf[2] == 'D')
bfcb3514 6692 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6693 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6694 SVt_PVIO);
a5f75d66 6695 GvMULTI_on(gv);
79072805 6696 if (!GvIO(gv))
a0d0e21e 6697 GvIOp(gv) = newIO();
3280af22 6698 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6699#if defined(HAS_FCNTL) && defined(F_SETFD)
6700 {
f54cb97a 6701 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6702 fcntl(fd,F_SETFD,fd >= 3);
6703 }
79072805 6704#endif
fd049845 6705 /* Mark this internal pseudo-handle as clean */
6706 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6707 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6708 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6709 else
50952442 6710 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6711#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6712 /* if the script was opened in binmode, we need to revert
53129d29 6713 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6714 * XXX this is a questionable hack at best. */
53129d29
GS
6715 if (PL_bufend-PL_bufptr > 2
6716 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6717 {
6718 Off_t loc = 0;
50952442 6719 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6720 loc = PerlIO_tell(PL_rsfp);
6721 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6722 }
2986a63f
JH
6723#ifdef NETWARE
6724 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6725#else
c39cd008 6726 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6727#endif /* NETWARE */
1143fce0
JH
6728#ifdef PERLIO_IS_STDIO /* really? */
6729# if defined(__BORLANDC__)
cb359b41
JH
6730 /* XXX see note in do_binmode() */
6731 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6732# endif
6733#endif
c39cd008
GS
6734 if (loc > 0)
6735 PerlIO_seek(PL_rsfp, loc, 0);
6736 }
6737 }
6738#endif
7948272d 6739#ifdef PERLIO_LAYERS
52d2e0f4
JH
6740 if (!IN_BYTES) {
6741 if (UTF)
6742 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6743 else if (PL_encoding) {
6744 SV *name;
6745 dSP;
6746 ENTER;
6747 SAVETMPS;
6748 PUSHMARK(sp);
6749 EXTEND(SP, 1);
6750 XPUSHs(PL_encoding);
6751 PUTBACK;
6752 call_method("name", G_SCALAR);
6753 SPAGAIN;
6754 name = POPs;
6755 PUTBACK;
bfed75c6 6756 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6757 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6758 SVfARG(name)));
52d2e0f4
JH
6759 FREETMPS;
6760 LEAVE;
6761 }
6762 }
7948272d 6763#endif
5db06880
NC
6764#ifdef PERL_MAD
6765 if (PL_madskills) {
cd81e915
NC
6766 if (PL_realtokenstart >= 0) {
6767 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6768 if (!PL_endwhite)
6b29d1f5 6769 PL_endwhite = newSVpvs("");
cd81e915
NC
6770 sv_catsv(PL_endwhite, PL_thiswhite);
6771 PL_thiswhite = 0;
6772 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6773 PL_realtokenstart = -1;
5db06880 6774 }
5cc814fd
NC
6775 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6776 != NULL) ;
5db06880
NC
6777 }
6778#endif
4608196e 6779 PL_rsfp = NULL;
79072805
LW
6780 }
6781 goto fake_eof;
e929a76b 6782 }
de3bb511 6783
8990e307 6784 case KEY_AUTOLOAD:
ed6116ce 6785 case KEY_DESTROY:
79072805 6786 case KEY_BEGIN:
3c10abe3 6787 case KEY_UNITCHECK:
7d30b5c4 6788 case KEY_CHECK:
7d07dbc2 6789 case KEY_INIT:
7d30b5c4 6790 case KEY_END:
3280af22
NIS
6791 if (PL_expect == XSTATE) {
6792 s = PL_bufptr;
93a17b20 6793 goto really_sub;
79072805
LW
6794 }
6795 goto just_a_word;
6796
a0d0e21e
LW
6797 case KEY_CORE:
6798 if (*s == ':' && s[1] == ':') {
6799 s += 2;
748a9306 6800 d = s;
3280af22 6801 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6802 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6803 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6804 if (tmp < 0)
6805 tmp = -tmp;
850e8516 6806 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6807 /* that's a way to remember we saw "CORE::" */
850e8516 6808 orig_keyword = tmp;
a0d0e21e
LW
6809 goto reserved_word;
6810 }
6811 goto just_a_word;
6812
463ee0b2
LW
6813 case KEY_abs:
6814 UNI(OP_ABS);
6815
79072805
LW
6816 case KEY_alarm:
6817 UNI(OP_ALARM);
6818
6819 case KEY_accept:
a0d0e21e 6820 LOP(OP_ACCEPT,XTERM);
79072805 6821
463ee0b2
LW
6822 case KEY_and:
6823 OPERATOR(ANDOP);
6824
79072805 6825 case KEY_atan2:
a0d0e21e 6826 LOP(OP_ATAN2,XTERM);
85e6fe83 6827
79072805 6828 case KEY_bind:
a0d0e21e 6829 LOP(OP_BIND,XTERM);
79072805
LW
6830
6831 case KEY_binmode:
1c1fc3ea 6832 LOP(OP_BINMODE,XTERM);
79072805
LW
6833
6834 case KEY_bless:
a0d0e21e 6835 LOP(OP_BLESS,XTERM);
79072805 6836
0d863452
RH
6837 case KEY_break:
6838 FUN0(OP_BREAK);
6839
79072805
LW
6840 case KEY_chop:
6841 UNI(OP_CHOP);
6842
6843 case KEY_continue:
0d863452
RH
6844 /* When 'use switch' is in effect, continue has a dual
6845 life as a control operator. */
6846 {
ef89dcc3 6847 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6848 PREBLOCK(CONTINUE);
6849 else {
6850 /* We have to disambiguate the two senses of
6851 "continue". If the next token is a '{' then
6852 treat it as the start of a continue block;
6853 otherwise treat it as a control operator.
6854 */
6855 s = skipspace(s);
6856 if (*s == '{')
79072805 6857 PREBLOCK(CONTINUE);
0d863452
RH
6858 else
6859 FUN0(OP_CONTINUE);
6860 }
6861 }
79072805
LW
6862
6863 case KEY_chdir:
fafc274c
NC
6864 /* may use HOME */
6865 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6866 UNI(OP_CHDIR);
6867
6868 case KEY_close:
6869 UNI(OP_CLOSE);
6870
6871 case KEY_closedir:
6872 UNI(OP_CLOSEDIR);
6873
6874 case KEY_cmp:
6875 Eop(OP_SCMP);
6876
6877 case KEY_caller:
6878 UNI(OP_CALLER);
6879
6880 case KEY_crypt:
6881#ifdef FCRYPT
f4c556ac
GS
6882 if (!PL_cryptseen) {
6883 PL_cryptseen = TRUE;
de3bb511 6884 init_des();
f4c556ac 6885 }
a687059c 6886#endif
a0d0e21e 6887 LOP(OP_CRYPT,XTERM);
79072805
LW
6888
6889 case KEY_chmod:
a0d0e21e 6890 LOP(OP_CHMOD,XTERM);
79072805
LW
6891
6892 case KEY_chown:
a0d0e21e 6893 LOP(OP_CHOWN,XTERM);
79072805
LW
6894
6895 case KEY_connect:
a0d0e21e 6896 LOP(OP_CONNECT,XTERM);
79072805 6897
463ee0b2
LW
6898 case KEY_chr:
6899 UNI(OP_CHR);
6900
79072805
LW
6901 case KEY_cos:
6902 UNI(OP_COS);
6903
6904 case KEY_chroot:
6905 UNI(OP_CHROOT);
6906
0d863452
RH
6907 case KEY_default:
6908 PREBLOCK(DEFAULT);
6909
79072805 6910 case KEY_do:
29595ff2 6911 s = SKIPSPACE1(s);
79072805 6912 if (*s == '{')
a0d0e21e 6913 PRETERMBLOCK(DO);
79072805 6914 if (*s != '\'')
89c5585f 6915 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6916 if (orig_keyword == KEY_do) {
6917 orig_keyword = 0;
6154021b 6918 pl_yylval.ival = 1;
850e8516
RGS
6919 }
6920 else
6154021b 6921 pl_yylval.ival = 0;
378cc40b 6922 OPERATOR(DO);
79072805
LW
6923
6924 case KEY_die:
3280af22 6925 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6926 LOP(OP_DIE,XTERM);
79072805
LW
6927
6928 case KEY_defined:
6929 UNI(OP_DEFINED);
6930
6931 case KEY_delete:
a0d0e21e 6932 UNI(OP_DELETE);
79072805
LW
6933
6934 case KEY_dbmopen:
74e8ce34
NC
6935 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
6936 STR_WITH_LEN("NDBM_File::"),
6937 STR_WITH_LEN("DB_File::"),
6938 STR_WITH_LEN("GDBM_File::"),
6939 STR_WITH_LEN("SDBM_File::"),
6940 STR_WITH_LEN("ODBM_File::"),
6941 NULL);
a0d0e21e 6942 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6943
6944 case KEY_dbmclose:
6945 UNI(OP_DBMCLOSE);
6946
6947 case KEY_dump:
a0d0e21e 6948 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6949 LOOPX(OP_DUMP);
6950
6951 case KEY_else:
6952 PREBLOCK(ELSE);
6953
6954 case KEY_elsif:
6154021b 6955 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6956 OPERATOR(ELSIF);
6957
6958 case KEY_eq:
6959 Eop(OP_SEQ);
6960
a0d0e21e
LW
6961 case KEY_exists:
6962 UNI(OP_EXISTS);
4e553d73 6963
79072805 6964 case KEY_exit:
5db06880
NC
6965 if (PL_madskills)
6966 UNI(OP_INT);
79072805
LW
6967 UNI(OP_EXIT);
6968
6969 case KEY_eval:
29595ff2 6970 s = SKIPSPACE1(s);
32e2a35d
RGS
6971 if (*s == '{') { /* block eval */
6972 PL_expect = XTERMBLOCK;
6973 UNIBRACK(OP_ENTERTRY);
6974 }
6975 else { /* string eval */
6976 PL_expect = XTERM;
6977 UNIBRACK(OP_ENTEREVAL);
6978 }
79072805
LW
6979
6980 case KEY_eof:
6981 UNI(OP_EOF);
6982
6983 case KEY_exp:
6984 UNI(OP_EXP);
6985
6986 case KEY_each:
6987 UNI(OP_EACH);
6988
6989 case KEY_exec:
a0d0e21e 6990 LOP(OP_EXEC,XREF);
79072805
LW
6991
6992 case KEY_endhostent:
6993 FUN0(OP_EHOSTENT);
6994
6995 case KEY_endnetent:
6996 FUN0(OP_ENETENT);
6997
6998 case KEY_endservent:
6999 FUN0(OP_ESERVENT);
7000
7001 case KEY_endprotoent:
7002 FUN0(OP_EPROTOENT);
7003
7004 case KEY_endpwent:
7005 FUN0(OP_EPWENT);
7006
7007 case KEY_endgrent:
7008 FUN0(OP_EGRENT);
7009
7010 case KEY_for:
7011 case KEY_foreach:
6154021b 7012 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7013 s = SKIPSPACE1(s);
7e2040f0 7014 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7015 char *p = s;
5db06880
NC
7016#ifdef PERL_MAD
7017 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7018#endif
7019
3280af22 7020 if ((PL_bufend - p) >= 3 &&
55497cff 7021 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7022 p += 2;
77ca0c92
LW
7023 else if ((PL_bufend - p) >= 4 &&
7024 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7025 p += 3;
29595ff2 7026 p = PEEKSPACE(p);
7e2040f0 7027 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7028 p = scan_ident(p, PL_bufend,
7029 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7030 p = PEEKSPACE(p);
77ca0c92
LW
7031 }
7032 if (*p != '$')
cea2e8a9 7033 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7034#ifdef PERL_MAD
7035 s = SvPVX(PL_linestr) + soff;
7036#endif
55497cff 7037 }
79072805
LW
7038 OPERATOR(FOR);
7039
7040 case KEY_formline:
a0d0e21e 7041 LOP(OP_FORMLINE,XTERM);
79072805
LW
7042
7043 case KEY_fork:
7044 FUN0(OP_FORK);
7045
7046 case KEY_fcntl:
a0d0e21e 7047 LOP(OP_FCNTL,XTERM);
79072805
LW
7048
7049 case KEY_fileno:
7050 UNI(OP_FILENO);
7051
7052 case KEY_flock:
a0d0e21e 7053 LOP(OP_FLOCK,XTERM);
79072805
LW
7054
7055 case KEY_gt:
7056 Rop(OP_SGT);
7057
7058 case KEY_ge:
7059 Rop(OP_SGE);
7060
7061 case KEY_grep:
2c38e13d 7062 LOP(OP_GREPSTART, XREF);
79072805
LW
7063
7064 case KEY_goto:
a0d0e21e 7065 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7066 LOOPX(OP_GOTO);
7067
7068 case KEY_gmtime:
7069 UNI(OP_GMTIME);
7070
7071 case KEY_getc:
6f33ba73 7072 UNIDOR(OP_GETC);
79072805
LW
7073
7074 case KEY_getppid:
7075 FUN0(OP_GETPPID);
7076
7077 case KEY_getpgrp:
7078 UNI(OP_GETPGRP);
7079
7080 case KEY_getpriority:
a0d0e21e 7081 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7082
7083 case KEY_getprotobyname:
7084 UNI(OP_GPBYNAME);
7085
7086 case KEY_getprotobynumber:
a0d0e21e 7087 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7088
7089 case KEY_getprotoent:
7090 FUN0(OP_GPROTOENT);
7091
7092 case KEY_getpwent:
7093 FUN0(OP_GPWENT);
7094
7095 case KEY_getpwnam:
ff68c719 7096 UNI(OP_GPWNAM);
79072805
LW
7097
7098 case KEY_getpwuid:
ff68c719 7099 UNI(OP_GPWUID);
79072805
LW
7100
7101 case KEY_getpeername:
7102 UNI(OP_GETPEERNAME);
7103
7104 case KEY_gethostbyname:
7105 UNI(OP_GHBYNAME);
7106
7107 case KEY_gethostbyaddr:
a0d0e21e 7108 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7109
7110 case KEY_gethostent:
7111 FUN0(OP_GHOSTENT);
7112
7113 case KEY_getnetbyname:
7114 UNI(OP_GNBYNAME);
7115
7116 case KEY_getnetbyaddr:
a0d0e21e 7117 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7118
7119 case KEY_getnetent:
7120 FUN0(OP_GNETENT);
7121
7122 case KEY_getservbyname:
a0d0e21e 7123 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7124
7125 case KEY_getservbyport:
a0d0e21e 7126 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7127
7128 case KEY_getservent:
7129 FUN0(OP_GSERVENT);
7130
7131 case KEY_getsockname:
7132 UNI(OP_GETSOCKNAME);
7133
7134 case KEY_getsockopt:
a0d0e21e 7135 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7136
7137 case KEY_getgrent:
7138 FUN0(OP_GGRENT);
7139
7140 case KEY_getgrnam:
ff68c719 7141 UNI(OP_GGRNAM);
79072805
LW
7142
7143 case KEY_getgrgid:
ff68c719 7144 UNI(OP_GGRGID);
79072805
LW
7145
7146 case KEY_getlogin:
7147 FUN0(OP_GETLOGIN);
7148
0d863452 7149 case KEY_given:
6154021b 7150 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7151 OPERATOR(GIVEN);
7152
93a17b20 7153 case KEY_glob:
a0d0e21e 7154 LOP(OP_GLOB,XTERM);
93a17b20 7155
79072805
LW
7156 case KEY_hex:
7157 UNI(OP_HEX);
7158
7159 case KEY_if:
6154021b 7160 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7161 OPERATOR(IF);
7162
7163 case KEY_index:
a0d0e21e 7164 LOP(OP_INDEX,XTERM);
79072805
LW
7165
7166 case KEY_int:
7167 UNI(OP_INT);
7168
7169 case KEY_ioctl:
a0d0e21e 7170 LOP(OP_IOCTL,XTERM);
79072805
LW
7171
7172 case KEY_join:
a0d0e21e 7173 LOP(OP_JOIN,XTERM);
79072805
LW
7174
7175 case KEY_keys:
7176 UNI(OP_KEYS);
7177
7178 case KEY_kill:
a0d0e21e 7179 LOP(OP_KILL,XTERM);
79072805
LW
7180
7181 case KEY_last:
a0d0e21e 7182 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7183 LOOPX(OP_LAST);
4e553d73 7184
79072805
LW
7185 case KEY_lc:
7186 UNI(OP_LC);
7187
7188 case KEY_lcfirst:
7189 UNI(OP_LCFIRST);
7190
7191 case KEY_local:
6154021b 7192 pl_yylval.ival = 0;
79072805
LW
7193 OPERATOR(LOCAL);
7194
7195 case KEY_length:
7196 UNI(OP_LENGTH);
7197
7198 case KEY_lt:
7199 Rop(OP_SLT);
7200
7201 case KEY_le:
7202 Rop(OP_SLE);
7203
7204 case KEY_localtime:
7205 UNI(OP_LOCALTIME);
7206
7207 case KEY_log:
7208 UNI(OP_LOG);
7209
7210 case KEY_link:
a0d0e21e 7211 LOP(OP_LINK,XTERM);
79072805
LW
7212
7213 case KEY_listen:
a0d0e21e 7214 LOP(OP_LISTEN,XTERM);
79072805 7215
c0329465
MB
7216 case KEY_lock:
7217 UNI(OP_LOCK);
7218
79072805
LW
7219 case KEY_lstat:
7220 UNI(OP_LSTAT);
7221
7222 case KEY_m:
8782bef2 7223 s = scan_pat(s,OP_MATCH);
79072805
LW
7224 TERM(sublex_start());
7225
a0d0e21e 7226 case KEY_map:
2c38e13d 7227 LOP(OP_MAPSTART, XREF);
4e4e412b 7228
79072805 7229 case KEY_mkdir:
a0d0e21e 7230 LOP(OP_MKDIR,XTERM);
79072805
LW
7231
7232 case KEY_msgctl:
a0d0e21e 7233 LOP(OP_MSGCTL,XTERM);
79072805
LW
7234
7235 case KEY_msgget:
a0d0e21e 7236 LOP(OP_MSGGET,XTERM);
79072805
LW
7237
7238 case KEY_msgrcv:
a0d0e21e 7239 LOP(OP_MSGRCV,XTERM);
79072805
LW
7240
7241 case KEY_msgsnd:
a0d0e21e 7242 LOP(OP_MSGSND,XTERM);
79072805 7243
77ca0c92 7244 case KEY_our:
93a17b20 7245 case KEY_my:
952306ac 7246 case KEY_state:
eac04b2e 7247 PL_in_my = (U16)tmp;
29595ff2 7248 s = SKIPSPACE1(s);
7e2040f0 7249 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7250#ifdef PERL_MAD
7251 char* start = s;
7252#endif
3280af22 7253 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7254 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7255 goto really_sub;
def3634b 7256 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7257 if (!PL_in_my_stash) {
c750a3ec 7258 char tmpbuf[1024];
3280af22 7259 PL_bufptr = s;
d9fad198 7260 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7261 yyerror(tmpbuf);
7262 }
5db06880
NC
7263#ifdef PERL_MAD
7264 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7265 sv_catsv(PL_thistoken, PL_nextwhite);
7266 PL_nextwhite = 0;
7267 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7268 }
7269#endif
c750a3ec 7270 }
6154021b 7271 pl_yylval.ival = 1;
55497cff 7272 OPERATOR(MY);
93a17b20 7273
79072805 7274 case KEY_next:
a0d0e21e 7275 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7276 LOOPX(OP_NEXT);
7277
7278 case KEY_ne:
7279 Eop(OP_SNE);
7280
a0d0e21e 7281 case KEY_no:
468aa647 7282 s = tokenize_use(0, s);
a0d0e21e
LW
7283 OPERATOR(USE);
7284
7285 case KEY_not:
29595ff2 7286 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7287 FUN1(OP_NOT);
7288 else
7289 OPERATOR(NOTOP);
a0d0e21e 7290
79072805 7291 case KEY_open:
29595ff2 7292 s = SKIPSPACE1(s);
7e2040f0 7293 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7294 const char *t;
c35e046a
AL
7295 for (d = s; isALNUM_lazy_if(d,UTF);)
7296 d++;
7297 for (t=d; isSPACE(*t);)
7298 t++;
e2ab214b 7299 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7300 /* [perl #16184] */
7301 && !(t[0] == '=' && t[1] == '>')
7302 ) {
5f66b61c 7303 int parms_len = (int)(d-s);
9014280d 7304 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7305 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7306 parms_len, s, parms_len, s);
66fbe8fb 7307 }
93a17b20 7308 }
a0d0e21e 7309 LOP(OP_OPEN,XTERM);
79072805 7310
463ee0b2 7311 case KEY_or:
6154021b 7312 pl_yylval.ival = OP_OR;
463ee0b2
LW
7313 OPERATOR(OROP);
7314
79072805
LW
7315 case KEY_ord:
7316 UNI(OP_ORD);
7317
7318 case KEY_oct:
7319 UNI(OP_OCT);
7320
7321 case KEY_opendir:
a0d0e21e 7322 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7323
7324 case KEY_print:
3280af22 7325 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7326 LOP(OP_PRINT,XREF);
79072805
LW
7327
7328 case KEY_printf:
3280af22 7329 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7330 LOP(OP_PRTF,XREF);
79072805 7331
c07a80fd 7332 case KEY_prototype:
7333 UNI(OP_PROTOTYPE);
7334
79072805 7335 case KEY_push:
a0d0e21e 7336 LOP(OP_PUSH,XTERM);
79072805
LW
7337
7338 case KEY_pop:
6f33ba73 7339 UNIDOR(OP_POP);
79072805 7340
a0d0e21e 7341 case KEY_pos:
6f33ba73 7342 UNIDOR(OP_POS);
4e553d73 7343
79072805 7344 case KEY_pack:
a0d0e21e 7345 LOP(OP_PACK,XTERM);
79072805
LW
7346
7347 case KEY_package:
a0d0e21e 7348 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7349 s = SKIPSPACE1(s);
91152fc1 7350 s = force_strict_version(s);
4e4da3ac 7351 PL_lex_expect = XBLOCK;
79072805
LW
7352 OPERATOR(PACKAGE);
7353
7354 case KEY_pipe:
a0d0e21e 7355 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7356
7357 case KEY_q:
5db06880 7358 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7359 if (!s)
d4c19fe8 7360 missingterm(NULL);
6154021b 7361 pl_yylval.ival = OP_CONST;
79072805
LW
7362 TERM(sublex_start());
7363
a0d0e21e
LW
7364 case KEY_quotemeta:
7365 UNI(OP_QUOTEMETA);
7366
ea25a9b2
Z
7367 case KEY_qw: {
7368 OP *words = NULL;
5db06880 7369 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7370 if (!s)
d4c19fe8 7371 missingterm(NULL);
3480a8d2 7372 PL_expect = XOPERATOR;
8127e0e3 7373 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7374 int warned = 0;
3280af22 7375 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7376 while (len) {
d4c19fe8
AL
7377 for (; isSPACE(*d) && len; --len, ++d)
7378 /**/;
8127e0e3 7379 if (len) {
d4c19fe8 7380 SV *sv;
f54cb97a 7381 const char *b = d;
e476b1b5 7382 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7383 for (; !isSPACE(*d) && len; --len, ++d) {
7384 if (*d == ',') {
9014280d 7385 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7386 "Possible attempt to separate words with commas");
7387 ++warned;
7388 }
7389 else if (*d == '#') {
9014280d 7390 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7391 "Possible attempt to put comments in qw() list");
7392 ++warned;
7393 }
7394 }
7395 }
7396 else {
d4c19fe8
AL
7397 for (; !isSPACE(*d) && len; --len, ++d)
7398 /**/;
8127e0e3 7399 }
740cce10 7400 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7401 words = op_append_elem(OP_LIST, words,
7948272d 7402 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7403 }
7404 }
7405 }
ea25a9b2
Z
7406 if (!words)
7407 words = newNULLLIST();
37fd879b 7408 if (PL_lex_stuff) {
8127e0e3 7409 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7410 PL_lex_stuff = NULL;
37fd879b 7411 }
ea25a9b2
Z
7412 PL_expect = XOPERATOR;
7413 pl_yylval.opval = sawparens(words);
7414 TOKEN(QWLIST);
7415 }
8990e307 7416
79072805 7417 case KEY_qq:
5db06880 7418 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7419 if (!s)
d4c19fe8 7420 missingterm(NULL);
6154021b 7421 pl_yylval.ival = OP_STRINGIFY;
3280af22 7422 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7423 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7424 TERM(sublex_start());
7425
8782bef2
GB
7426 case KEY_qr:
7427 s = scan_pat(s,OP_QR);
7428 TERM(sublex_start());
7429
79072805 7430 case KEY_qx:
5db06880 7431 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7432 if (!s)
d4c19fe8 7433 missingterm(NULL);
9b201d7d 7434 readpipe_override();
79072805
LW
7435 TERM(sublex_start());
7436
7437 case KEY_return:
7438 OLDLOP(OP_RETURN);
7439
7440 case KEY_require:
29595ff2 7441 s = SKIPSPACE1(s);
e759cc13
RGS
7442 if (isDIGIT(*s)) {
7443 s = force_version(s, FALSE);
a7cb1f99 7444 }
e759cc13
RGS
7445 else if (*s != 'v' || !isDIGIT(s[1])
7446 || (s = force_version(s, TRUE), *s == 'v'))
7447 {
a7cb1f99
GS
7448 *PL_tokenbuf = '\0';
7449 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7450 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7451 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7452 else if (*s == '<')
7453 yyerror("<> should be quotes");
7454 }
a72a1c8b
RGS
7455 if (orig_keyword == KEY_require) {
7456 orig_keyword = 0;
6154021b 7457 pl_yylval.ival = 1;
a72a1c8b
RGS
7458 }
7459 else
6154021b 7460 pl_yylval.ival = 0;
a72a1c8b
RGS
7461 PL_expect = XTERM;
7462 PL_bufptr = s;
7463 PL_last_uni = PL_oldbufptr;
7464 PL_last_lop_op = OP_REQUIRE;
7465 s = skipspace(s);
7466 return REPORT( (int)REQUIRE );
79072805
LW
7467
7468 case KEY_reset:
7469 UNI(OP_RESET);
7470
7471 case KEY_redo:
a0d0e21e 7472 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7473 LOOPX(OP_REDO);
7474
7475 case KEY_rename:
a0d0e21e 7476 LOP(OP_RENAME,XTERM);
79072805
LW
7477
7478 case KEY_rand:
7479 UNI(OP_RAND);
7480
7481 case KEY_rmdir:
7482 UNI(OP_RMDIR);
7483
7484 case KEY_rindex:
a0d0e21e 7485 LOP(OP_RINDEX,XTERM);
79072805
LW
7486
7487 case KEY_read:
a0d0e21e 7488 LOP(OP_READ,XTERM);
79072805
LW
7489
7490 case KEY_readdir:
7491 UNI(OP_READDIR);
7492
93a17b20 7493 case KEY_readline:
6f33ba73 7494 UNIDOR(OP_READLINE);
93a17b20
LW
7495
7496 case KEY_readpipe:
0858480c 7497 UNIDOR(OP_BACKTICK);
93a17b20 7498
79072805
LW
7499 case KEY_rewinddir:
7500 UNI(OP_REWINDDIR);
7501
7502 case KEY_recv:
a0d0e21e 7503 LOP(OP_RECV,XTERM);
79072805
LW
7504
7505 case KEY_reverse:
a0d0e21e 7506 LOP(OP_REVERSE,XTERM);
79072805
LW
7507
7508 case KEY_readlink:
6f33ba73 7509 UNIDOR(OP_READLINK);
79072805
LW
7510
7511 case KEY_ref:
7512 UNI(OP_REF);
7513
7514 case KEY_s:
7515 s = scan_subst(s);
6154021b 7516 if (pl_yylval.opval)
79072805
LW
7517 TERM(sublex_start());
7518 else
7519 TOKEN(1); /* force error */
7520
0d863452
RH
7521 case KEY_say:
7522 checkcomma(s,PL_tokenbuf,"filehandle");
7523 LOP(OP_SAY,XREF);
7524
a0d0e21e
LW
7525 case KEY_chomp:
7526 UNI(OP_CHOMP);
4e553d73 7527
79072805
LW
7528 case KEY_scalar:
7529 UNI(OP_SCALAR);
7530
7531 case KEY_select:
a0d0e21e 7532 LOP(OP_SELECT,XTERM);
79072805
LW
7533
7534 case KEY_seek:
a0d0e21e 7535 LOP(OP_SEEK,XTERM);
79072805
LW
7536
7537 case KEY_semctl:
a0d0e21e 7538 LOP(OP_SEMCTL,XTERM);
79072805
LW
7539
7540 case KEY_semget:
a0d0e21e 7541 LOP(OP_SEMGET,XTERM);
79072805
LW
7542
7543 case KEY_semop:
a0d0e21e 7544 LOP(OP_SEMOP,XTERM);
79072805
LW
7545
7546 case KEY_send:
a0d0e21e 7547 LOP(OP_SEND,XTERM);
79072805
LW
7548
7549 case KEY_setpgrp:
a0d0e21e 7550 LOP(OP_SETPGRP,XTERM);
79072805
LW
7551
7552 case KEY_setpriority:
a0d0e21e 7553 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7554
7555 case KEY_sethostent:
ff68c719 7556 UNI(OP_SHOSTENT);
79072805
LW
7557
7558 case KEY_setnetent:
ff68c719 7559 UNI(OP_SNETENT);
79072805
LW
7560
7561 case KEY_setservent:
ff68c719 7562 UNI(OP_SSERVENT);
79072805
LW
7563
7564 case KEY_setprotoent:
ff68c719 7565 UNI(OP_SPROTOENT);
79072805
LW
7566
7567 case KEY_setpwent:
7568 FUN0(OP_SPWENT);
7569
7570 case KEY_setgrent:
7571 FUN0(OP_SGRENT);
7572
7573 case KEY_seekdir:
a0d0e21e 7574 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7575
7576 case KEY_setsockopt:
a0d0e21e 7577 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7578
7579 case KEY_shift:
6f33ba73 7580 UNIDOR(OP_SHIFT);
79072805
LW
7581
7582 case KEY_shmctl:
a0d0e21e 7583 LOP(OP_SHMCTL,XTERM);
79072805
LW
7584
7585 case KEY_shmget:
a0d0e21e 7586 LOP(OP_SHMGET,XTERM);
79072805
LW
7587
7588 case KEY_shmread:
a0d0e21e 7589 LOP(OP_SHMREAD,XTERM);
79072805
LW
7590
7591 case KEY_shmwrite:
a0d0e21e 7592 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7593
7594 case KEY_shutdown:
a0d0e21e 7595 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7596
7597 case KEY_sin:
7598 UNI(OP_SIN);
7599
7600 case KEY_sleep:
7601 UNI(OP_SLEEP);
7602
7603 case KEY_socket:
a0d0e21e 7604 LOP(OP_SOCKET,XTERM);
79072805
LW
7605
7606 case KEY_socketpair:
a0d0e21e 7607 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7608
7609 case KEY_sort:
3280af22 7610 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7611 s = SKIPSPACE1(s);
79072805 7612 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7613 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7614 PL_expect = XTERM;
15f0808c 7615 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7616 LOP(OP_SORT,XREF);
79072805
LW
7617
7618 case KEY_split:
a0d0e21e 7619 LOP(OP_SPLIT,XTERM);
79072805
LW
7620
7621 case KEY_sprintf:
a0d0e21e 7622 LOP(OP_SPRINTF,XTERM);
79072805
LW
7623
7624 case KEY_splice:
a0d0e21e 7625 LOP(OP_SPLICE,XTERM);
79072805
LW
7626
7627 case KEY_sqrt:
7628 UNI(OP_SQRT);
7629
7630 case KEY_srand:
7631 UNI(OP_SRAND);
7632
7633 case KEY_stat:
7634 UNI(OP_STAT);
7635
7636 case KEY_study:
79072805
LW
7637 UNI(OP_STUDY);
7638
7639 case KEY_substr:
a0d0e21e 7640 LOP(OP_SUBSTR,XTERM);
79072805
LW
7641
7642 case KEY_format:
7643 case KEY_sub:
93a17b20 7644 really_sub:
09bef843 7645 {
3280af22 7646 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7647 SSize_t tboffset = 0;
09bef843 7648 expectation attrful;
28cc6278 7649 bool have_name, have_proto;
f54cb97a 7650 const int key = tmp;
09bef843 7651
5db06880
NC
7652#ifdef PERL_MAD
7653 SV *tmpwhite = 0;
7654
cd81e915 7655 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7656 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7657 PL_thistoken = 0;
5db06880
NC
7658
7659 d = s;
7660 s = SKIPSPACE2(s,tmpwhite);
7661#else
09bef843 7662 s = skipspace(s);
5db06880 7663#endif
09bef843 7664
7e2040f0 7665 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7666 (*s == ':' && s[1] == ':'))
7667 {
5db06880 7668#ifdef PERL_MAD
4f61fd4b 7669 SV *nametoke = NULL;
5db06880
NC
7670#endif
7671
09bef843
SB
7672 PL_expect = XBLOCK;
7673 attrful = XATTRBLOCK;
b1b65b59
JH
7674 /* remember buffer pos'n for later force_word */
7675 tboffset = s - PL_oldbufptr;
09bef843 7676 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7677#ifdef PERL_MAD
7678 if (PL_madskills)
7679 nametoke = newSVpvn(s, d - s);
7680#endif
6502358f
NC
7681 if (memchr(tmpbuf, ':', len))
7682 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7683 else {
7684 sv_setsv(PL_subname,PL_curstname);
396482e1 7685 sv_catpvs(PL_subname,"::");
09bef843
SB
7686 sv_catpvn(PL_subname,tmpbuf,len);
7687 }
09bef843 7688 have_name = TRUE;
5db06880
NC
7689
7690#ifdef PERL_MAD
7691
7692 start_force(0);
7693 CURMAD('X', nametoke);
7694 CURMAD('_', tmpwhite);
7695 (void) force_word(PL_oldbufptr + tboffset, WORD,
7696 FALSE, TRUE, TRUE);
7697
7698 s = SKIPSPACE2(d,tmpwhite);
7699#else
7700 s = skipspace(d);
7701#endif
09bef843 7702 }
463ee0b2 7703 else {
09bef843
SB
7704 if (key == KEY_my)
7705 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7706 PL_expect = XTERMBLOCK;
7707 attrful = XATTRTERM;
76f68e9b 7708 sv_setpvs(PL_subname,"?");
09bef843 7709 have_name = FALSE;
463ee0b2 7710 }
4633a7c4 7711
09bef843
SB
7712 if (key == KEY_format) {
7713 if (*s == '=')
7714 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7715#ifdef PERL_MAD
cd81e915 7716 PL_thistoken = subtoken;
5db06880
NC
7717 s = d;
7718#else
09bef843 7719 if (have_name)
b1b65b59
JH
7720 (void) force_word(PL_oldbufptr + tboffset, WORD,
7721 FALSE, TRUE, TRUE);
5db06880 7722#endif
09bef843
SB
7723 OPERATOR(FORMAT);
7724 }
79072805 7725
09bef843
SB
7726 /* Look for a prototype */
7727 if (*s == '(') {
d9f2850e
RGS
7728 char *p;
7729 bool bad_proto = FALSE;
9e8d7757
RB
7730 bool in_brackets = FALSE;
7731 char greedy_proto = ' ';
7732 bool proto_after_greedy_proto = FALSE;
7733 bool must_be_last = FALSE;
7734 bool underscore = FALSE;
aef2a98a 7735 bool seen_underscore = FALSE;
197afce1 7736 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7737
5db06880 7738 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7739 if (!s)
09bef843 7740 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7741 /* strip spaces and check for bad characters */
09bef843
SB
7742 d = SvPVX(PL_lex_stuff);
7743 tmp = 0;
d9f2850e
RGS
7744 for (p = d; *p; ++p) {
7745 if (!isSPACE(*p)) {
7746 d[tmp++] = *p;
9e8d7757 7747
197afce1 7748 if (warnillegalproto) {
9e8d7757
RB
7749 if (must_be_last)
7750 proto_after_greedy_proto = TRUE;
c035a075 7751 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
7752 bad_proto = TRUE;
7753 }
7754 else {
7755 if ( underscore ) {
7756 if ( *p != ';' )
7757 bad_proto = TRUE;
7758 underscore = FALSE;
7759 }
7760 if ( *p == '[' ) {
7761 in_brackets = TRUE;
7762 }
7763 else if ( *p == ']' ) {
7764 in_brackets = FALSE;
7765 }
7766 else if ( (*p == '@' || *p == '%') &&
7767 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7768 !in_brackets ) {
7769 must_be_last = TRUE;
7770 greedy_proto = *p;
7771 }
7772 else if ( *p == '_' ) {
aef2a98a 7773 underscore = seen_underscore = TRUE;
9e8d7757
RB
7774 }
7775 }
7776 }
d37a9538 7777 }
09bef843 7778 }
d9f2850e 7779 d[tmp] = '\0';
9e8d7757 7780 if (proto_after_greedy_proto)
197afce1 7781 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7782 "Prototype after '%c' for %"SVf" : %s",
7783 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7784 if (bad_proto)
197afce1 7785 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7786 "Illegal character %sin prototype for %"SVf" : %s",
7787 seen_underscore ? "after '_' " : "",
be2597df 7788 SVfARG(PL_subname), d);
b162af07 7789 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7790 have_proto = TRUE;
68dc0745 7791
5db06880
NC
7792#ifdef PERL_MAD
7793 start_force(0);
cd81e915 7794 CURMAD('q', PL_thisopen);
5db06880 7795 CURMAD('_', tmpwhite);
cd81e915
NC
7796 CURMAD('=', PL_thisstuff);
7797 CURMAD('Q', PL_thisclose);
5db06880
NC
7798 NEXTVAL_NEXTTOKE.opval =
7799 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7800 PL_lex_stuff = NULL;
5db06880
NC
7801 force_next(THING);
7802
7803 s = SKIPSPACE2(s,tmpwhite);
7804#else
09bef843 7805 s = skipspace(s);
5db06880 7806#endif
4633a7c4 7807 }
09bef843
SB
7808 else
7809 have_proto = FALSE;
7810
7811 if (*s == ':' && s[1] != ':')
7812 PL_expect = attrful;
8e742a20
MHM
7813 else if (*s != '{' && key == KEY_sub) {
7814 if (!have_name)
7815 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7816 else if (*s != ';' && *s != '}')
be2597df 7817 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7818 }
09bef843 7819
5db06880
NC
7820#ifdef PERL_MAD
7821 start_force(0);
7822 if (tmpwhite) {
7823 if (PL_madskills)
6b29d1f5 7824 curmad('^', newSVpvs(""));
5db06880
NC
7825 CURMAD('_', tmpwhite);
7826 }
7827 force_next(0);
7828
cd81e915 7829 PL_thistoken = subtoken;
5db06880 7830#else
09bef843 7831 if (have_proto) {
9ded7720 7832 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7833 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7834 PL_lex_stuff = NULL;
09bef843 7835 force_next(THING);
68dc0745 7836 }
5db06880 7837#endif
09bef843 7838 if (!have_name) {
49a54bbe
NC
7839 if (PL_curstash)
7840 sv_setpvs(PL_subname, "__ANON__");
7841 else
7842 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7843 TOKEN(ANONSUB);
4633a7c4 7844 }
5db06880 7845#ifndef PERL_MAD
b1b65b59
JH
7846 (void) force_word(PL_oldbufptr + tboffset, WORD,
7847 FALSE, TRUE, TRUE);
5db06880 7848#endif
09bef843
SB
7849 if (key == KEY_my)
7850 TOKEN(MYSUB);
7851 TOKEN(SUB);
4633a7c4 7852 }
79072805
LW
7853
7854 case KEY_system:
a0d0e21e 7855 LOP(OP_SYSTEM,XREF);
79072805
LW
7856
7857 case KEY_symlink:
a0d0e21e 7858 LOP(OP_SYMLINK,XTERM);
79072805
LW
7859
7860 case KEY_syscall:
a0d0e21e 7861 LOP(OP_SYSCALL,XTERM);
79072805 7862
c07a80fd 7863 case KEY_sysopen:
7864 LOP(OP_SYSOPEN,XTERM);
7865
137443ea 7866 case KEY_sysseek:
7867 LOP(OP_SYSSEEK,XTERM);
7868
79072805 7869 case KEY_sysread:
a0d0e21e 7870 LOP(OP_SYSREAD,XTERM);
79072805
LW
7871
7872 case KEY_syswrite:
a0d0e21e 7873 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7874
7875 case KEY_tr:
7876 s = scan_trans(s);
7877 TERM(sublex_start());
7878
7879 case KEY_tell:
7880 UNI(OP_TELL);
7881
7882 case KEY_telldir:
7883 UNI(OP_TELLDIR);
7884
463ee0b2 7885 case KEY_tie:
a0d0e21e 7886 LOP(OP_TIE,XTERM);
463ee0b2 7887
c07a80fd 7888 case KEY_tied:
7889 UNI(OP_TIED);
7890
79072805
LW
7891 case KEY_time:
7892 FUN0(OP_TIME);
7893
7894 case KEY_times:
7895 FUN0(OP_TMS);
7896
7897 case KEY_truncate:
a0d0e21e 7898 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7899
7900 case KEY_uc:
7901 UNI(OP_UC);
7902
7903 case KEY_ucfirst:
7904 UNI(OP_UCFIRST);
7905
463ee0b2
LW
7906 case KEY_untie:
7907 UNI(OP_UNTIE);
7908
79072805 7909 case KEY_until:
6154021b 7910 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7911 OPERATOR(UNTIL);
7912
7913 case KEY_unless:
6154021b 7914 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7915 OPERATOR(UNLESS);
7916
7917 case KEY_unlink:
a0d0e21e 7918 LOP(OP_UNLINK,XTERM);
79072805
LW
7919
7920 case KEY_undef:
6f33ba73 7921 UNIDOR(OP_UNDEF);
79072805
LW
7922
7923 case KEY_unpack:
a0d0e21e 7924 LOP(OP_UNPACK,XTERM);
79072805
LW
7925
7926 case KEY_utime:
a0d0e21e 7927 LOP(OP_UTIME,XTERM);
79072805
LW
7928
7929 case KEY_umask:
6f33ba73 7930 UNIDOR(OP_UMASK);
79072805
LW
7931
7932 case KEY_unshift:
a0d0e21e
LW
7933 LOP(OP_UNSHIFT,XTERM);
7934
7935 case KEY_use:
468aa647 7936 s = tokenize_use(1, s);
a0d0e21e 7937 OPERATOR(USE);
79072805
LW
7938
7939 case KEY_values:
7940 UNI(OP_VALUES);
7941
7942 case KEY_vec:
a0d0e21e 7943 LOP(OP_VEC,XTERM);
79072805 7944
0d863452 7945 case KEY_when:
6154021b 7946 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7947 OPERATOR(WHEN);
7948
79072805 7949 case KEY_while:
6154021b 7950 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7951 OPERATOR(WHILE);
7952
7953 case KEY_warn:
3280af22 7954 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7955 LOP(OP_WARN,XTERM);
79072805
LW
7956
7957 case KEY_wait:
7958 FUN0(OP_WAIT);
7959
7960 case KEY_waitpid:
a0d0e21e 7961 LOP(OP_WAITPID,XTERM);
79072805
LW
7962
7963 case KEY_wantarray:
7964 FUN0(OP_WANTARRAY);
7965
7966 case KEY_write:
9d116dd7
JH
7967#ifdef EBCDIC
7968 {
df3728a2
JH
7969 char ctl_l[2];
7970 ctl_l[0] = toCTRL('L');
7971 ctl_l[1] = '\0';
fafc274c 7972 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7973 }
7974#else
fafc274c
NC
7975 /* Make sure $^L is defined */
7976 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7977#endif
79072805
LW
7978 UNI(OP_ENTERWRITE);
7979
7980 case KEY_x:
3280af22 7981 if (PL_expect == XOPERATOR)
79072805
LW
7982 Mop(OP_REPEAT);
7983 check_uni();
7984 goto just_a_word;
7985
a0d0e21e 7986 case KEY_xor:
6154021b 7987 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7988 OPERATOR(OROP);
7989
79072805
LW
7990 case KEY_y:
7991 s = scan_trans(s);
7992 TERM(sublex_start());
7993 }
49dc05e3 7994 }}
79072805 7995}
bf4acbe4
GS
7996#ifdef __SC__
7997#pragma segment Main
7998#endif
79072805 7999
e930465f
JH
8000static int
8001S_pending_ident(pTHX)
8eceec63 8002{
97aff369 8003 dVAR;
8eceec63 8004 register char *d;
bbd11bfc 8005 PADOFFSET tmp = 0;
8eceec63
SC
8006 /* pit holds the identifier we read and pending_ident is reset */
8007 char pit = PL_pending_ident;
9bde8eb0
NC
8008 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8009 /* All routes through this function want to know if there is a colon. */
c099d646 8010 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8011 PL_pending_ident = 0;
8012
cd81e915 8013 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8014 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8015 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8016
8017 /* if we're in a my(), we can't allow dynamics here.
8018 $foo'bar has already been turned into $foo::bar, so
8019 just check for colons.
8020
8021 if it's a legal name, the OP is a PADANY.
8022 */
8023 if (PL_in_my) {
8024 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8025 if (has_colon)
8eceec63
SC
8026 yyerror(Perl_form(aTHX_ "No package name allowed for "
8027 "variable %s in \"our\"",
8028 PL_tokenbuf));
d6447115 8029 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8030 }
8031 else {
9bde8eb0 8032 if (has_colon)
952306ac
RGS
8033 yyerror(Perl_form(aTHX_ PL_no_myglob,
8034 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8035
6154021b 8036 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 8037 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8038 return PRIVATEREF;
8039 }
8040 }
8041
8042 /*
8043 build the ops for accesses to a my() variable.
8044
8045 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8046 then used in a comparison. This catches most, but not
8047 all cases. For instance, it catches
8048 sort { my($a); $a <=> $b }
8049 but not
8050 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8051 (although why you'd do that is anyone's guess).
8052 */
8053
9bde8eb0 8054 if (!has_colon) {
8716503d 8055 if (!PL_in_my)
f8f98e0a 8056 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8057 if (tmp != NOT_IN_PAD) {
8eceec63 8058 /* might be an "our" variable" */
00b1698f 8059 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8060 /* build ops for a bareword */
b64e5050
AL
8061 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8062 HEK * const stashname = HvNAME_HEK(stash);
8063 SV * const sym = newSVhek(stashname);
396482e1 8064 sv_catpvs(sym, "::");
9bde8eb0 8065 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8066 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8067 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8068 gv_fetchsv(sym,
8eceec63
SC
8069 (PL_in_eval
8070 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8071 : GV_ADDMULTI
8eceec63
SC
8072 ),
8073 ((PL_tokenbuf[0] == '$') ? SVt_PV
8074 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8075 : SVt_PVHV));
8076 return WORD;
8077 }
8078
8079 /* if it's a sort block and they're naming $a or $b */
8080 if (PL_last_lop_op == OP_SORT &&
8081 PL_tokenbuf[0] == '$' &&
8082 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8083 && !PL_tokenbuf[2])
8084 {
8085 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8086 d < PL_bufend && *d != '\n';
8087 d++)
8088 {
8089 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8090 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8091 PL_tokenbuf);
8092 }
8093 }
8094 }
8095
6154021b
RGS
8096 pl_yylval.opval = newOP(OP_PADANY, 0);
8097 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8098 return PRIVATEREF;
8099 }
8100 }
8101
8102 /*
8103 Whine if they've said @foo in a doublequoted string,
8104 and @foo isn't a variable we can find in the symbol
8105 table.
8106 */
d824713b
NC
8107 if (ckWARN(WARN_AMBIGUOUS) &&
8108 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8109 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8110 SVt_PVAV);
8eceec63 8111 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8112 /* DO NOT warn for @- and @+ */
8113 && !( PL_tokenbuf[2] == '\0' &&
8114 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8115 )
8eceec63
SC
8116 {
8117 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8118 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8119 "Possible unintended interpolation of %s in string",
8120 PL_tokenbuf);
8eceec63
SC
8121 }
8122 }
8123
8124 /* build ops for a bareword */
6154021b 8125 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8126 tokenbuf_len - 1));
6154021b 8127 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8128 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8129 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8130 ((PL_tokenbuf[0] == '$') ? SVt_PV
8131 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8132 : SVt_PVHV));
8eceec63
SC
8133 return WORD;
8134}
8135
4c3bbe0f
MHM
8136/*
8137 * The following code was generated by perl_keyword.pl.
8138 */
e2e1dd5a 8139
79072805 8140I32
5458a98a 8141Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8142{
952306ac 8143 dVAR;
7918f24d
NC
8144
8145 PERL_ARGS_ASSERT_KEYWORD;
8146
4c3bbe0f
MHM
8147 switch (len)
8148 {
8149 case 1: /* 5 tokens of length 1 */
8150 switch (name[0])
e2e1dd5a 8151 {
4c3bbe0f
MHM
8152 case 'm':
8153 { /* m */
8154 return KEY_m;
8155 }
8156
4c3bbe0f
MHM
8157 case 'q':
8158 { /* q */
8159 return KEY_q;
8160 }
8161
4c3bbe0f
MHM
8162 case 's':
8163 { /* s */
8164 return KEY_s;
8165 }
8166
4c3bbe0f
MHM
8167 case 'x':
8168 { /* x */
8169 return -KEY_x;
8170 }
8171
4c3bbe0f
MHM
8172 case 'y':
8173 { /* y */
8174 return KEY_y;
8175 }
8176
4c3bbe0f
MHM
8177 default:
8178 goto unknown;
e2e1dd5a 8179 }
4c3bbe0f
MHM
8180
8181 case 2: /* 18 tokens of length 2 */
8182 switch (name[0])
e2e1dd5a 8183 {
4c3bbe0f
MHM
8184 case 'd':
8185 if (name[1] == 'o')
8186 { /* do */
8187 return KEY_do;
8188 }
8189
8190 goto unknown;
8191
8192 case 'e':
8193 if (name[1] == 'q')
8194 { /* eq */
8195 return -KEY_eq;
8196 }
8197
8198 goto unknown;
8199
8200 case 'g':
8201 switch (name[1])
8202 {
8203 case 'e':
8204 { /* ge */
8205 return -KEY_ge;
8206 }
8207
4c3bbe0f
MHM
8208 case 't':
8209 { /* gt */
8210 return -KEY_gt;
8211 }
8212
4c3bbe0f
MHM
8213 default:
8214 goto unknown;
8215 }
8216
8217 case 'i':
8218 if (name[1] == 'f')
8219 { /* if */
8220 return KEY_if;
8221 }
8222
8223 goto unknown;
8224
8225 case 'l':
8226 switch (name[1])
8227 {
8228 case 'c':
8229 { /* lc */
8230 return -KEY_lc;
8231 }
8232
4c3bbe0f
MHM
8233 case 'e':
8234 { /* le */
8235 return -KEY_le;
8236 }
8237
4c3bbe0f
MHM
8238 case 't':
8239 { /* lt */
8240 return -KEY_lt;
8241 }
8242
4c3bbe0f
MHM
8243 default:
8244 goto unknown;
8245 }
8246
8247 case 'm':
8248 if (name[1] == 'y')
8249 { /* my */
8250 return KEY_my;
8251 }
8252
8253 goto unknown;
8254
8255 case 'n':
8256 switch (name[1])
8257 {
8258 case 'e':
8259 { /* ne */
8260 return -KEY_ne;
8261 }
8262
4c3bbe0f
MHM
8263 case 'o':
8264 { /* no */
8265 return KEY_no;
8266 }
8267
4c3bbe0f
MHM
8268 default:
8269 goto unknown;
8270 }
8271
8272 case 'o':
8273 if (name[1] == 'r')
8274 { /* or */
8275 return -KEY_or;
8276 }
8277
8278 goto unknown;
8279
8280 case 'q':
8281 switch (name[1])
8282 {
8283 case 'q':
8284 { /* qq */
8285 return KEY_qq;
8286 }
8287
4c3bbe0f
MHM
8288 case 'r':
8289 { /* qr */
8290 return KEY_qr;
8291 }
8292
4c3bbe0f
MHM
8293 case 'w':
8294 { /* qw */
8295 return KEY_qw;
8296 }
8297
4c3bbe0f
MHM
8298 case 'x':
8299 { /* qx */
8300 return KEY_qx;
8301 }
8302
4c3bbe0f
MHM
8303 default:
8304 goto unknown;
8305 }
8306
8307 case 't':
8308 if (name[1] == 'r')
8309 { /* tr */
8310 return KEY_tr;
8311 }
8312
8313 goto unknown;
8314
8315 case 'u':
8316 if (name[1] == 'c')
8317 { /* uc */
8318 return -KEY_uc;
8319 }
8320
8321 goto unknown;
8322
8323 default:
8324 goto unknown;
e2e1dd5a 8325 }
4c3bbe0f 8326
0d863452 8327 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8328 switch (name[0])
e2e1dd5a 8329 {
4c3bbe0f
MHM
8330 case 'E':
8331 if (name[1] == 'N' &&
8332 name[2] == 'D')
8333 { /* END */
8334 return KEY_END;
8335 }
8336
8337 goto unknown;
8338
8339 case 'a':
8340 switch (name[1])
8341 {
8342 case 'b':
8343 if (name[2] == 's')
8344 { /* abs */
8345 return -KEY_abs;
8346 }
8347
8348 goto unknown;
8349
8350 case 'n':
8351 if (name[2] == 'd')
8352 { /* and */
8353 return -KEY_and;
8354 }
8355
8356 goto unknown;
8357
8358 default:
8359 goto unknown;
8360 }
8361
8362 case 'c':
8363 switch (name[1])
8364 {
8365 case 'h':
8366 if (name[2] == 'r')
8367 { /* chr */
8368 return -KEY_chr;
8369 }
8370
8371 goto unknown;
8372
8373 case 'm':
8374 if (name[2] == 'p')
8375 { /* cmp */
8376 return -KEY_cmp;
8377 }
8378
8379 goto unknown;
8380
8381 case 'o':
8382 if (name[2] == 's')
8383 { /* cos */
8384 return -KEY_cos;
8385 }
8386
8387 goto unknown;
8388
8389 default:
8390 goto unknown;
8391 }
8392
8393 case 'd':
8394 if (name[1] == 'i' &&
8395 name[2] == 'e')
8396 { /* die */
8397 return -KEY_die;
8398 }
8399
8400 goto unknown;
8401
8402 case 'e':
8403 switch (name[1])
8404 {
8405 case 'o':
8406 if (name[2] == 'f')
8407 { /* eof */
8408 return -KEY_eof;
8409 }
8410
8411 goto unknown;
8412
4c3bbe0f
MHM
8413 case 'x':
8414 if (name[2] == 'p')
8415 { /* exp */
8416 return -KEY_exp;
8417 }
8418
8419 goto unknown;
8420
8421 default:
8422 goto unknown;
8423 }
8424
8425 case 'f':
8426 if (name[1] == 'o' &&
8427 name[2] == 'r')
8428 { /* for */
8429 return KEY_for;
8430 }
8431
8432 goto unknown;
8433
8434 case 'h':
8435 if (name[1] == 'e' &&
8436 name[2] == 'x')
8437 { /* hex */
8438 return -KEY_hex;
8439 }
8440
8441 goto unknown;
8442
8443 case 'i':
8444 if (name[1] == 'n' &&
8445 name[2] == 't')
8446 { /* int */
8447 return -KEY_int;
8448 }
8449
8450 goto unknown;
8451
8452 case 'l':
8453 if (name[1] == 'o' &&
8454 name[2] == 'g')
8455 { /* log */
8456 return -KEY_log;
8457 }
8458
8459 goto unknown;
8460
8461 case 'm':
8462 if (name[1] == 'a' &&
8463 name[2] == 'p')
8464 { /* map */
8465 return KEY_map;
8466 }
8467
8468 goto unknown;
8469
8470 case 'n':
8471 if (name[1] == 'o' &&
8472 name[2] == 't')
8473 { /* not */
8474 return -KEY_not;
8475 }
8476
8477 goto unknown;
8478
8479 case 'o':
8480 switch (name[1])
8481 {
8482 case 'c':
8483 if (name[2] == 't')
8484 { /* oct */
8485 return -KEY_oct;
8486 }
8487
8488 goto unknown;
8489
8490 case 'r':
8491 if (name[2] == 'd')
8492 { /* ord */
8493 return -KEY_ord;
8494 }
8495
8496 goto unknown;
8497
8498 case 'u':
8499 if (name[2] == 'r')
8500 { /* our */
8501 return KEY_our;
8502 }
8503
8504 goto unknown;
8505
8506 default:
8507 goto unknown;
8508 }
8509
8510 case 'p':
8511 if (name[1] == 'o')
8512 {
8513 switch (name[2])
8514 {
8515 case 'p':
8516 { /* pop */
8517 return -KEY_pop;
8518 }
8519
4c3bbe0f
MHM
8520 case 's':
8521 { /* pos */
8522 return KEY_pos;
8523 }
8524
4c3bbe0f
MHM
8525 default:
8526 goto unknown;
8527 }
8528 }
8529
8530 goto unknown;
8531
8532 case 'r':
8533 if (name[1] == 'e' &&
8534 name[2] == 'f')
8535 { /* ref */
8536 return -KEY_ref;
8537 }
8538
8539 goto unknown;
8540
8541 case 's':
8542 switch (name[1])
8543 {
0d863452
RH
8544 case 'a':
8545 if (name[2] == 'y')
8546 { /* say */
e3e804c9 8547 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8548 }
8549
8550 goto unknown;
8551
4c3bbe0f
MHM
8552 case 'i':
8553 if (name[2] == 'n')
8554 { /* sin */
8555 return -KEY_sin;
8556 }
8557
8558 goto unknown;
8559
8560 case 'u':
8561 if (name[2] == 'b')
8562 { /* sub */
8563 return KEY_sub;
8564 }
8565
8566 goto unknown;
8567
8568 default:
8569 goto unknown;
8570 }
8571
8572 case 't':
8573 if (name[1] == 'i' &&
8574 name[2] == 'e')
8575 { /* tie */
1db4d195 8576 return -KEY_tie;
4c3bbe0f
MHM
8577 }
8578
8579 goto unknown;
8580
8581 case 'u':
8582 if (name[1] == 's' &&
8583 name[2] == 'e')
8584 { /* use */
8585 return KEY_use;
8586 }
8587
8588 goto unknown;
8589
8590 case 'v':
8591 if (name[1] == 'e' &&
8592 name[2] == 'c')
8593 { /* vec */
8594 return -KEY_vec;
8595 }
8596
8597 goto unknown;
8598
8599 case 'x':
8600 if (name[1] == 'o' &&
8601 name[2] == 'r')
8602 { /* xor */
8603 return -KEY_xor;
8604 }
8605
8606 goto unknown;
8607
8608 default:
8609 goto unknown;
e2e1dd5a 8610 }
4c3bbe0f 8611
0d863452 8612 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8613 switch (name[0])
e2e1dd5a 8614 {
4c3bbe0f
MHM
8615 case 'C':
8616 if (name[1] == 'O' &&
8617 name[2] == 'R' &&
8618 name[3] == 'E')
8619 { /* CORE */
8620 return -KEY_CORE;
8621 }
8622
8623 goto unknown;
8624
8625 case 'I':
8626 if (name[1] == 'N' &&
8627 name[2] == 'I' &&
8628 name[3] == 'T')
8629 { /* INIT */
8630 return KEY_INIT;
8631 }
8632
8633 goto unknown;
8634
8635 case 'b':
8636 if (name[1] == 'i' &&
8637 name[2] == 'n' &&
8638 name[3] == 'd')
8639 { /* bind */
8640 return -KEY_bind;
8641 }
8642
8643 goto unknown;
8644
8645 case 'c':
8646 if (name[1] == 'h' &&
8647 name[2] == 'o' &&
8648 name[3] == 'p')
8649 { /* chop */
8650 return -KEY_chop;
8651 }
8652
8653 goto unknown;
8654
8655 case 'd':
8656 if (name[1] == 'u' &&
8657 name[2] == 'm' &&
8658 name[3] == 'p')
8659 { /* dump */
8660 return -KEY_dump;
8661 }
8662
8663 goto unknown;
8664
8665 case 'e':
8666 switch (name[1])
8667 {
8668 case 'a':
8669 if (name[2] == 'c' &&
8670 name[3] == 'h')
8671 { /* each */
8672 return -KEY_each;
8673 }
8674
8675 goto unknown;
8676
8677 case 'l':
8678 if (name[2] == 's' &&
8679 name[3] == 'e')
8680 { /* else */
8681 return KEY_else;
8682 }
8683
8684 goto unknown;
8685
8686 case 'v':
8687 if (name[2] == 'a' &&
8688 name[3] == 'l')
8689 { /* eval */
8690 return KEY_eval;
8691 }
8692
8693 goto unknown;
8694
8695 case 'x':
8696 switch (name[2])
8697 {
8698 case 'e':
8699 if (name[3] == 'c')
8700 { /* exec */
8701 return -KEY_exec;
8702 }
8703
8704 goto unknown;
8705
8706 case 'i':
8707 if (name[3] == 't')
8708 { /* exit */
8709 return -KEY_exit;
8710 }
8711
8712 goto unknown;
8713
8714 default:
8715 goto unknown;
8716 }
8717
8718 default:
8719 goto unknown;
8720 }
8721
8722 case 'f':
8723 if (name[1] == 'o' &&
8724 name[2] == 'r' &&
8725 name[3] == 'k')
8726 { /* fork */
8727 return -KEY_fork;
8728 }
8729
8730 goto unknown;
8731
8732 case 'g':
8733 switch (name[1])
8734 {
8735 case 'e':
8736 if (name[2] == 't' &&
8737 name[3] == 'c')
8738 { /* getc */
8739 return -KEY_getc;
8740 }
8741
8742 goto unknown;
8743
8744 case 'l':
8745 if (name[2] == 'o' &&
8746 name[3] == 'b')
8747 { /* glob */
8748 return KEY_glob;
8749 }
8750
8751 goto unknown;
8752
8753 case 'o':
8754 if (name[2] == 't' &&
8755 name[3] == 'o')
8756 { /* goto */
8757 return KEY_goto;
8758 }
8759
8760 goto unknown;
8761
8762 case 'r':
8763 if (name[2] == 'e' &&
8764 name[3] == 'p')
8765 { /* grep */
8766 return KEY_grep;
8767 }
8768
8769 goto unknown;
8770
8771 default:
8772 goto unknown;
8773 }
8774
8775 case 'j':
8776 if (name[1] == 'o' &&
8777 name[2] == 'i' &&
8778 name[3] == 'n')
8779 { /* join */
8780 return -KEY_join;
8781 }
8782
8783 goto unknown;
8784
8785 case 'k':
8786 switch (name[1])
8787 {
8788 case 'e':
8789 if (name[2] == 'y' &&
8790 name[3] == 's')
8791 { /* keys */
8792 return -KEY_keys;
8793 }
8794
8795 goto unknown;
8796
8797 case 'i':
8798 if (name[2] == 'l' &&
8799 name[3] == 'l')
8800 { /* kill */
8801 return -KEY_kill;
8802 }
8803
8804 goto unknown;
8805
8806 default:
8807 goto unknown;
8808 }
8809
8810 case 'l':
8811 switch (name[1])
8812 {
8813 case 'a':
8814 if (name[2] == 's' &&
8815 name[3] == 't')
8816 { /* last */
8817 return KEY_last;
8818 }
8819
8820 goto unknown;
8821
8822 case 'i':
8823 if (name[2] == 'n' &&
8824 name[3] == 'k')
8825 { /* link */
8826 return -KEY_link;
8827 }
8828
8829 goto unknown;
8830
8831 case 'o':
8832 if (name[2] == 'c' &&
8833 name[3] == 'k')
8834 { /* lock */
8835 return -KEY_lock;
8836 }
8837
8838 goto unknown;
8839
8840 default:
8841 goto unknown;
8842 }
8843
8844 case 'n':
8845 if (name[1] == 'e' &&
8846 name[2] == 'x' &&
8847 name[3] == 't')
8848 { /* next */
8849 return KEY_next;
8850 }
8851
8852 goto unknown;
8853
8854 case 'o':
8855 if (name[1] == 'p' &&
8856 name[2] == 'e' &&
8857 name[3] == 'n')
8858 { /* open */
8859 return -KEY_open;
8860 }
8861
8862 goto unknown;
8863
8864 case 'p':
8865 switch (name[1])
8866 {
8867 case 'a':
8868 if (name[2] == 'c' &&
8869 name[3] == 'k')
8870 { /* pack */
8871 return -KEY_pack;
8872 }
8873
8874 goto unknown;
8875
8876 case 'i':
8877 if (name[2] == 'p' &&
8878 name[3] == 'e')
8879 { /* pipe */
8880 return -KEY_pipe;
8881 }
8882
8883 goto unknown;
8884
8885 case 'u':
8886 if (name[2] == 's' &&
8887 name[3] == 'h')
8888 { /* push */
8889 return -KEY_push;
8890 }
8891
8892 goto unknown;
8893
8894 default:
8895 goto unknown;
8896 }
8897
8898 case 'r':
8899 switch (name[1])
8900 {
8901 case 'a':
8902 if (name[2] == 'n' &&
8903 name[3] == 'd')
8904 { /* rand */
8905 return -KEY_rand;
8906 }
8907
8908 goto unknown;
8909
8910 case 'e':
8911 switch (name[2])
8912 {
8913 case 'a':
8914 if (name[3] == 'd')
8915 { /* read */
8916 return -KEY_read;
8917 }
8918
8919 goto unknown;
8920
8921 case 'c':
8922 if (name[3] == 'v')
8923 { /* recv */
8924 return -KEY_recv;
8925 }
8926
8927 goto unknown;
8928
8929 case 'd':
8930 if (name[3] == 'o')
8931 { /* redo */
8932 return KEY_redo;
8933 }
8934
8935 goto unknown;
8936
8937 default:
8938 goto unknown;
8939 }
8940
8941 default:
8942 goto unknown;
8943 }
8944
8945 case 's':
8946 switch (name[1])
8947 {
8948 case 'e':
8949 switch (name[2])
8950 {
8951 case 'e':
8952 if (name[3] == 'k')
8953 { /* seek */
8954 return -KEY_seek;
8955 }
8956
8957 goto unknown;
8958
8959 case 'n':
8960 if (name[3] == 'd')
8961 { /* send */
8962 return -KEY_send;
8963 }
8964
8965 goto unknown;
8966
8967 default:
8968 goto unknown;
8969 }
8970
8971 case 'o':
8972 if (name[2] == 'r' &&
8973 name[3] == 't')
8974 { /* sort */
8975 return KEY_sort;
8976 }
8977
8978 goto unknown;
8979
8980 case 'q':
8981 if (name[2] == 'r' &&
8982 name[3] == 't')
8983 { /* sqrt */
8984 return -KEY_sqrt;
8985 }
8986
8987 goto unknown;
8988
8989 case 't':
8990 if (name[2] == 'a' &&
8991 name[3] == 't')
8992 { /* stat */
8993 return -KEY_stat;
8994 }
8995
8996 goto unknown;
8997
8998 default:
8999 goto unknown;
9000 }
9001
9002 case 't':
9003 switch (name[1])
9004 {
9005 case 'e':
9006 if (name[2] == 'l' &&
9007 name[3] == 'l')
9008 { /* tell */
9009 return -KEY_tell;
9010 }
9011
9012 goto unknown;
9013
9014 case 'i':
9015 switch (name[2])
9016 {
9017 case 'e':
9018 if (name[3] == 'd')
9019 { /* tied */
1db4d195 9020 return -KEY_tied;
4c3bbe0f
MHM
9021 }
9022
9023 goto unknown;
9024
9025 case 'm':
9026 if (name[3] == 'e')
9027 { /* time */
9028 return -KEY_time;
9029 }
9030
9031 goto unknown;
9032
9033 default:
9034 goto unknown;
9035 }
9036
9037 default:
9038 goto unknown;
9039 }
9040
9041 case 'w':
0d863452 9042 switch (name[1])
4c3bbe0f 9043 {
0d863452 9044 case 'a':
952306ac
RGS
9045 switch (name[2])
9046 {
9047 case 'i':
9048 if (name[3] == 't')
9049 { /* wait */
9050 return -KEY_wait;
9051 }
4c3bbe0f 9052
952306ac 9053 goto unknown;
4c3bbe0f 9054
952306ac
RGS
9055 case 'r':
9056 if (name[3] == 'n')
9057 { /* warn */
9058 return -KEY_warn;
9059 }
4c3bbe0f 9060
952306ac 9061 goto unknown;
4c3bbe0f 9062
952306ac
RGS
9063 default:
9064 goto unknown;
9065 }
0d863452
RH
9066
9067 case 'h':
9068 if (name[2] == 'e' &&
9069 name[3] == 'n')
9070 { /* when */
5458a98a 9071 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9072 }
4c3bbe0f 9073
952306ac 9074 goto unknown;
4c3bbe0f 9075
952306ac
RGS
9076 default:
9077 goto unknown;
9078 }
4c3bbe0f 9079
0d863452
RH
9080 default:
9081 goto unknown;
9082 }
9083
952306ac 9084 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9085 switch (name[0])
e2e1dd5a 9086 {
4c3bbe0f
MHM
9087 case 'B':
9088 if (name[1] == 'E' &&
9089 name[2] == 'G' &&
9090 name[3] == 'I' &&
9091 name[4] == 'N')
9092 { /* BEGIN */
9093 return KEY_BEGIN;
9094 }
9095
9096 goto unknown;
9097
9098 case 'C':
9099 if (name[1] == 'H' &&
9100 name[2] == 'E' &&
9101 name[3] == 'C' &&
9102 name[4] == 'K')
9103 { /* CHECK */
9104 return KEY_CHECK;
9105 }
9106
9107 goto unknown;
9108
9109 case 'a':
9110 switch (name[1])
9111 {
9112 case 'l':
9113 if (name[2] == 'a' &&
9114 name[3] == 'r' &&
9115 name[4] == 'm')
9116 { /* alarm */
9117 return -KEY_alarm;
9118 }
9119
9120 goto unknown;
9121
9122 case 't':
9123 if (name[2] == 'a' &&
9124 name[3] == 'n' &&
9125 name[4] == '2')
9126 { /* atan2 */
9127 return -KEY_atan2;
9128 }
9129
9130 goto unknown;
9131
9132 default:
9133 goto unknown;
9134 }
9135
9136 case 'b':
0d863452
RH
9137 switch (name[1])
9138 {
9139 case 'l':
9140 if (name[2] == 'e' &&
952306ac
RGS
9141 name[3] == 's' &&
9142 name[4] == 's')
9143 { /* bless */
9144 return -KEY_bless;
9145 }
4c3bbe0f 9146
952306ac 9147 goto unknown;
4c3bbe0f 9148
0d863452
RH
9149 case 'r':
9150 if (name[2] == 'e' &&
9151 name[3] == 'a' &&
9152 name[4] == 'k')
9153 { /* break */
5458a98a 9154 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9155 }
9156
9157 goto unknown;
9158
9159 default:
9160 goto unknown;
9161 }
9162
4c3bbe0f
MHM
9163 case 'c':
9164 switch (name[1])
9165 {
9166 case 'h':
9167 switch (name[2])
9168 {
9169 case 'd':
9170 if (name[3] == 'i' &&
9171 name[4] == 'r')
9172 { /* chdir */
9173 return -KEY_chdir;
9174 }
9175
9176 goto unknown;
9177
9178 case 'm':
9179 if (name[3] == 'o' &&
9180 name[4] == 'd')
9181 { /* chmod */
9182 return -KEY_chmod;
9183 }
9184
9185 goto unknown;
9186
9187 case 'o':
9188 switch (name[3])
9189 {
9190 case 'm':
9191 if (name[4] == 'p')
9192 { /* chomp */
9193 return -KEY_chomp;
9194 }
9195
9196 goto unknown;
9197
9198 case 'w':
9199 if (name[4] == 'n')
9200 { /* chown */
9201 return -KEY_chown;
9202 }
9203
9204 goto unknown;
9205
9206 default:
9207 goto unknown;
9208 }
9209
9210 default:
9211 goto unknown;
9212 }
9213
9214 case 'l':
9215 if (name[2] == 'o' &&
9216 name[3] == 's' &&
9217 name[4] == 'e')
9218 { /* close */
9219 return -KEY_close;
9220 }
9221
9222 goto unknown;
9223
9224 case 'r':
9225 if (name[2] == 'y' &&
9226 name[3] == 'p' &&
9227 name[4] == 't')
9228 { /* crypt */
9229 return -KEY_crypt;
9230 }
9231
9232 goto unknown;
9233
9234 default:
9235 goto unknown;
9236 }
9237
9238 case 'e':
9239 if (name[1] == 'l' &&
9240 name[2] == 's' &&
9241 name[3] == 'i' &&
9242 name[4] == 'f')
9243 { /* elsif */
9244 return KEY_elsif;
9245 }
9246
9247 goto unknown;
9248
9249 case 'f':
9250 switch (name[1])
9251 {
9252 case 'c':
9253 if (name[2] == 'n' &&
9254 name[3] == 't' &&
9255 name[4] == 'l')
9256 { /* fcntl */
9257 return -KEY_fcntl;
9258 }
9259
9260 goto unknown;
9261
9262 case 'l':
9263 if (name[2] == 'o' &&
9264 name[3] == 'c' &&
9265 name[4] == 'k')
9266 { /* flock */
9267 return -KEY_flock;
9268 }
9269
9270 goto unknown;
9271
9272 default:
9273 goto unknown;
9274 }
9275
0d863452
RH
9276 case 'g':
9277 if (name[1] == 'i' &&
9278 name[2] == 'v' &&
9279 name[3] == 'e' &&
9280 name[4] == 'n')
9281 { /* given */
5458a98a 9282 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9283 }
9284
9285 goto unknown;
9286
4c3bbe0f
MHM
9287 case 'i':
9288 switch (name[1])
9289 {
9290 case 'n':
9291 if (name[2] == 'd' &&
9292 name[3] == 'e' &&
9293 name[4] == 'x')
9294 { /* index */
9295 return -KEY_index;
9296 }
9297
9298 goto unknown;
9299
9300 case 'o':
9301 if (name[2] == 'c' &&
9302 name[3] == 't' &&
9303 name[4] == 'l')
9304 { /* ioctl */
9305 return -KEY_ioctl;
9306 }
9307
9308 goto unknown;
9309
9310 default:
9311 goto unknown;
9312 }
9313
9314 case 'l':
9315 switch (name[1])
9316 {
9317 case 'o':
9318 if (name[2] == 'c' &&
9319 name[3] == 'a' &&
9320 name[4] == 'l')
9321 { /* local */
9322 return KEY_local;
9323 }
9324
9325 goto unknown;
9326
9327 case 's':
9328 if (name[2] == 't' &&
9329 name[3] == 'a' &&
9330 name[4] == 't')
9331 { /* lstat */
9332 return -KEY_lstat;
9333 }
9334
9335 goto unknown;
9336
9337 default:
9338 goto unknown;
9339 }
9340
9341 case 'm':
9342 if (name[1] == 'k' &&
9343 name[2] == 'd' &&
9344 name[3] == 'i' &&
9345 name[4] == 'r')
9346 { /* mkdir */
9347 return -KEY_mkdir;
9348 }
9349
9350 goto unknown;
9351
9352 case 'p':
9353 if (name[1] == 'r' &&
9354 name[2] == 'i' &&
9355 name[3] == 'n' &&
9356 name[4] == 't')
9357 { /* print */
9358 return KEY_print;
9359 }
9360
9361 goto unknown;
9362
9363 case 'r':
9364 switch (name[1])
9365 {
9366 case 'e':
9367 if (name[2] == 's' &&
9368 name[3] == 'e' &&
9369 name[4] == 't')
9370 { /* reset */
9371 return -KEY_reset;
9372 }
9373
9374 goto unknown;
9375
9376 case 'm':
9377 if (name[2] == 'd' &&
9378 name[3] == 'i' &&
9379 name[4] == 'r')
9380 { /* rmdir */
9381 return -KEY_rmdir;
9382 }
9383
9384 goto unknown;
9385
9386 default:
9387 goto unknown;
9388 }
9389
9390 case 's':
9391 switch (name[1])
9392 {
9393 case 'e':
9394 if (name[2] == 'm' &&
9395 name[3] == 'o' &&
9396 name[4] == 'p')
9397 { /* semop */
9398 return -KEY_semop;
9399 }
9400
9401 goto unknown;
9402
9403 case 'h':
9404 if (name[2] == 'i' &&
9405 name[3] == 'f' &&
9406 name[4] == 't')
9407 { /* shift */
9408 return -KEY_shift;
9409 }
9410
9411 goto unknown;
9412
9413 case 'l':
9414 if (name[2] == 'e' &&
9415 name[3] == 'e' &&
9416 name[4] == 'p')
9417 { /* sleep */
9418 return -KEY_sleep;
9419 }
9420
9421 goto unknown;
9422
9423 case 'p':
9424 if (name[2] == 'l' &&
9425 name[3] == 'i' &&
9426 name[4] == 't')
9427 { /* split */
9428 return KEY_split;
9429 }
9430
9431 goto unknown;
9432
9433 case 'r':
9434 if (name[2] == 'a' &&
9435 name[3] == 'n' &&
9436 name[4] == 'd')
9437 { /* srand */
9438 return -KEY_srand;
9439 }
9440
9441 goto unknown;
9442
9443 case 't':
952306ac
RGS
9444 switch (name[2])
9445 {
9446 case 'a':
9447 if (name[3] == 't' &&
9448 name[4] == 'e')
9449 { /* state */
5458a98a 9450 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9451 }
4c3bbe0f 9452
952306ac
RGS
9453 goto unknown;
9454
9455 case 'u':
9456 if (name[3] == 'd' &&
9457 name[4] == 'y')
9458 { /* study */
9459 return KEY_study;
9460 }
9461
9462 goto unknown;
9463
9464 default:
9465 goto unknown;
9466 }
4c3bbe0f
MHM
9467
9468 default:
9469 goto unknown;
9470 }
9471
9472 case 't':
9473 if (name[1] == 'i' &&
9474 name[2] == 'm' &&
9475 name[3] == 'e' &&
9476 name[4] == 's')
9477 { /* times */
9478 return -KEY_times;
9479 }
9480
9481 goto unknown;
9482
9483 case 'u':
9484 switch (name[1])
9485 {
9486 case 'm':
9487 if (name[2] == 'a' &&
9488 name[3] == 's' &&
9489 name[4] == 'k')
9490 { /* umask */
9491 return -KEY_umask;
9492 }
9493
9494 goto unknown;
9495
9496 case 'n':
9497 switch (name[2])
9498 {
9499 case 'd':
9500 if (name[3] == 'e' &&
9501 name[4] == 'f')
9502 { /* undef */
9503 return KEY_undef;
9504 }
9505
9506 goto unknown;
9507
9508 case 't':
9509 if (name[3] == 'i')
9510 {
9511 switch (name[4])
9512 {
9513 case 'e':
9514 { /* untie */
1db4d195 9515 return -KEY_untie;
4c3bbe0f
MHM
9516 }
9517
4c3bbe0f
MHM
9518 case 'l':
9519 { /* until */
9520 return KEY_until;
9521 }
9522
4c3bbe0f
MHM
9523 default:
9524 goto unknown;
9525 }
9526 }
9527
9528 goto unknown;
9529
9530 default:
9531 goto unknown;
9532 }
9533
9534 case 't':
9535 if (name[2] == 'i' &&
9536 name[3] == 'm' &&
9537 name[4] == 'e')
9538 { /* utime */
9539 return -KEY_utime;
9540 }
9541
9542 goto unknown;
9543
9544 default:
9545 goto unknown;
9546 }
9547
9548 case 'w':
9549 switch (name[1])
9550 {
9551 case 'h':
9552 if (name[2] == 'i' &&
9553 name[3] == 'l' &&
9554 name[4] == 'e')
9555 { /* while */
9556 return KEY_while;
9557 }
9558
9559 goto unknown;
9560
9561 case 'r':
9562 if (name[2] == 'i' &&
9563 name[3] == 't' &&
9564 name[4] == 'e')
9565 { /* write */
9566 return -KEY_write;
9567 }
9568
9569 goto unknown;
9570
9571 default:
9572 goto unknown;
9573 }
9574
9575 default:
9576 goto unknown;
e2e1dd5a 9577 }
4c3bbe0f
MHM
9578
9579 case 6: /* 33 tokens of length 6 */
9580 switch (name[0])
9581 {
9582 case 'a':
9583 if (name[1] == 'c' &&
9584 name[2] == 'c' &&
9585 name[3] == 'e' &&
9586 name[4] == 'p' &&
9587 name[5] == 't')
9588 { /* accept */
9589 return -KEY_accept;
9590 }
9591
9592 goto unknown;
9593
9594 case 'c':
9595 switch (name[1])
9596 {
9597 case 'a':
9598 if (name[2] == 'l' &&
9599 name[3] == 'l' &&
9600 name[4] == 'e' &&
9601 name[5] == 'r')
9602 { /* caller */
9603 return -KEY_caller;
9604 }
9605
9606 goto unknown;
9607
9608 case 'h':
9609 if (name[2] == 'r' &&
9610 name[3] == 'o' &&
9611 name[4] == 'o' &&
9612 name[5] == 't')
9613 { /* chroot */
9614 return -KEY_chroot;
9615 }
9616
9617 goto unknown;
9618
9619 default:
9620 goto unknown;
9621 }
9622
9623 case 'd':
9624 if (name[1] == 'e' &&
9625 name[2] == 'l' &&
9626 name[3] == 'e' &&
9627 name[4] == 't' &&
9628 name[5] == 'e')
9629 { /* delete */
9630 return KEY_delete;
9631 }
9632
9633 goto unknown;
9634
9635 case 'e':
9636 switch (name[1])
9637 {
9638 case 'l':
9639 if (name[2] == 's' &&
9640 name[3] == 'e' &&
9641 name[4] == 'i' &&
9642 name[5] == 'f')
9643 { /* elseif */
9b387841 9644 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9645 }
9646
9647 goto unknown;
9648
9649 case 'x':
9650 if (name[2] == 'i' &&
9651 name[3] == 's' &&
9652 name[4] == 't' &&
9653 name[5] == 's')
9654 { /* exists */
9655 return KEY_exists;
9656 }
9657
9658 goto unknown;
9659
9660 default:
9661 goto unknown;
9662 }
9663
9664 case 'f':
9665 switch (name[1])
9666 {
9667 case 'i':
9668 if (name[2] == 'l' &&
9669 name[3] == 'e' &&
9670 name[4] == 'n' &&
9671 name[5] == 'o')
9672 { /* fileno */
9673 return -KEY_fileno;
9674 }
9675
9676 goto unknown;
9677
9678 case 'o':
9679 if (name[2] == 'r' &&
9680 name[3] == 'm' &&
9681 name[4] == 'a' &&
9682 name[5] == 't')
9683 { /* format */
9684 return KEY_format;
9685 }
9686
9687 goto unknown;
9688
9689 default:
9690 goto unknown;
9691 }
9692
9693 case 'g':
9694 if (name[1] == 'm' &&
9695 name[2] == 't' &&
9696 name[3] == 'i' &&
9697 name[4] == 'm' &&
9698 name[5] == 'e')
9699 { /* gmtime */
9700 return -KEY_gmtime;
9701 }
9702
9703 goto unknown;
9704
9705 case 'l':
9706 switch (name[1])
9707 {
9708 case 'e':
9709 if (name[2] == 'n' &&
9710 name[3] == 'g' &&
9711 name[4] == 't' &&
9712 name[5] == 'h')
9713 { /* length */
9714 return -KEY_length;
9715 }
9716
9717 goto unknown;
9718
9719 case 'i':
9720 if (name[2] == 's' &&
9721 name[3] == 't' &&
9722 name[4] == 'e' &&
9723 name[5] == 'n')
9724 { /* listen */
9725 return -KEY_listen;
9726 }
9727
9728 goto unknown;
9729
9730 default:
9731 goto unknown;
9732 }
9733
9734 case 'm':
9735 if (name[1] == 's' &&
9736 name[2] == 'g')
9737 {
9738 switch (name[3])
9739 {
9740 case 'c':
9741 if (name[4] == 't' &&
9742 name[5] == 'l')
9743 { /* msgctl */
9744 return -KEY_msgctl;
9745 }
9746
9747 goto unknown;
9748
9749 case 'g':
9750 if (name[4] == 'e' &&
9751 name[5] == 't')
9752 { /* msgget */
9753 return -KEY_msgget;
9754 }
9755
9756 goto unknown;
9757
9758 case 'r':
9759 if (name[4] == 'c' &&
9760 name[5] == 'v')
9761 { /* msgrcv */
9762 return -KEY_msgrcv;
9763 }
9764
9765 goto unknown;
9766
9767 case 's':
9768 if (name[4] == 'n' &&
9769 name[5] == 'd')
9770 { /* msgsnd */
9771 return -KEY_msgsnd;
9772 }
9773
9774 goto unknown;
9775
9776 default:
9777 goto unknown;
9778 }
9779 }
9780
9781 goto unknown;
9782
9783 case 'p':
9784 if (name[1] == 'r' &&
9785 name[2] == 'i' &&
9786 name[3] == 'n' &&
9787 name[4] == 't' &&
9788 name[5] == 'f')
9789 { /* printf */
9790 return KEY_printf;
9791 }
9792
9793 goto unknown;
9794
9795 case 'r':
9796 switch (name[1])
9797 {
9798 case 'e':
9799 switch (name[2])
9800 {
9801 case 'n':
9802 if (name[3] == 'a' &&
9803 name[4] == 'm' &&
9804 name[5] == 'e')
9805 { /* rename */
9806 return -KEY_rename;
9807 }
9808
9809 goto unknown;
9810
9811 case 't':
9812 if (name[3] == 'u' &&
9813 name[4] == 'r' &&
9814 name[5] == 'n')
9815 { /* return */
9816 return KEY_return;
9817 }
9818
9819 goto unknown;
9820
9821 default:
9822 goto unknown;
9823 }
9824
9825 case 'i':
9826 if (name[2] == 'n' &&
9827 name[3] == 'd' &&
9828 name[4] == 'e' &&
9829 name[5] == 'x')
9830 { /* rindex */
9831 return -KEY_rindex;
9832 }
9833
9834 goto unknown;
9835
9836 default:
9837 goto unknown;
9838 }
9839
9840 case 's':
9841 switch (name[1])
9842 {
9843 case 'c':
9844 if (name[2] == 'a' &&
9845 name[3] == 'l' &&
9846 name[4] == 'a' &&
9847 name[5] == 'r')
9848 { /* scalar */
9849 return KEY_scalar;
9850 }
9851
9852 goto unknown;
9853
9854 case 'e':
9855 switch (name[2])
9856 {
9857 case 'l':
9858 if (name[3] == 'e' &&
9859 name[4] == 'c' &&
9860 name[5] == 't')
9861 { /* select */
9862 return -KEY_select;
9863 }
9864
9865 goto unknown;
9866
9867 case 'm':
9868 switch (name[3])
9869 {
9870 case 'c':
9871 if (name[4] == 't' &&
9872 name[5] == 'l')
9873 { /* semctl */
9874 return -KEY_semctl;
9875 }
9876
9877 goto unknown;
9878
9879 case 'g':
9880 if (name[4] == 'e' &&
9881 name[5] == 't')
9882 { /* semget */
9883 return -KEY_semget;
9884 }
9885
9886 goto unknown;
9887
9888 default:
9889 goto unknown;
9890 }
9891
9892 default:
9893 goto unknown;
9894 }
9895
9896 case 'h':
9897 if (name[2] == 'm')
9898 {
9899 switch (name[3])
9900 {
9901 case 'c':
9902 if (name[4] == 't' &&
9903 name[5] == 'l')
9904 { /* shmctl */
9905 return -KEY_shmctl;
9906 }
9907
9908 goto unknown;
9909
9910 case 'g':
9911 if (name[4] == 'e' &&
9912 name[5] == 't')
9913 { /* shmget */
9914 return -KEY_shmget;
9915 }
9916
9917 goto unknown;
9918
9919 default:
9920 goto unknown;
9921 }
9922 }
9923
9924 goto unknown;
9925
9926 case 'o':
9927 if (name[2] == 'c' &&
9928 name[3] == 'k' &&
9929 name[4] == 'e' &&
9930 name[5] == 't')
9931 { /* socket */
9932 return -KEY_socket;
9933 }
9934
9935 goto unknown;
9936
9937 case 'p':
9938 if (name[2] == 'l' &&
9939 name[3] == 'i' &&
9940 name[4] == 'c' &&
9941 name[5] == 'e')
9942 { /* splice */
9943 return -KEY_splice;
9944 }
9945
9946 goto unknown;
9947
9948 case 'u':
9949 if (name[2] == 'b' &&
9950 name[3] == 's' &&
9951 name[4] == 't' &&
9952 name[5] == 'r')
9953 { /* substr */
9954 return -KEY_substr;
9955 }
9956
9957 goto unknown;
9958
9959 case 'y':
9960 if (name[2] == 's' &&
9961 name[3] == 't' &&
9962 name[4] == 'e' &&
9963 name[5] == 'm')
9964 { /* system */
9965 return -KEY_system;
9966 }
9967
9968 goto unknown;
9969
9970 default:
9971 goto unknown;
9972 }
9973
9974 case 'u':
9975 if (name[1] == 'n')
9976 {
9977 switch (name[2])
9978 {
9979 case 'l':
9980 switch (name[3])
9981 {
9982 case 'e':
9983 if (name[4] == 's' &&
9984 name[5] == 's')
9985 { /* unless */
9986 return KEY_unless;
9987 }
9988
9989 goto unknown;
9990
9991 case 'i':
9992 if (name[4] == 'n' &&
9993 name[5] == 'k')
9994 { /* unlink */
9995 return -KEY_unlink;
9996 }
9997
9998 goto unknown;
9999
10000 default:
10001 goto unknown;
10002 }
10003
10004 case 'p':
10005 if (name[3] == 'a' &&
10006 name[4] == 'c' &&
10007 name[5] == 'k')
10008 { /* unpack */
10009 return -KEY_unpack;
10010 }
10011
10012 goto unknown;
10013
10014 default:
10015 goto unknown;
10016 }
10017 }
10018
10019 goto unknown;
10020
10021 case 'v':
10022 if (name[1] == 'a' &&
10023 name[2] == 'l' &&
10024 name[3] == 'u' &&
10025 name[4] == 'e' &&
10026 name[5] == 's')
10027 { /* values */
10028 return -KEY_values;
10029 }
10030
10031 goto unknown;
10032
10033 default:
10034 goto unknown;
e2e1dd5a 10035 }
4c3bbe0f 10036
0d863452 10037 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
10038 switch (name[0])
10039 {
10040 case 'D':
10041 if (name[1] == 'E' &&
10042 name[2] == 'S' &&
10043 name[3] == 'T' &&
10044 name[4] == 'R' &&
10045 name[5] == 'O' &&
10046 name[6] == 'Y')
10047 { /* DESTROY */
10048 return KEY_DESTROY;
10049 }
10050
10051 goto unknown;
10052
10053 case '_':
10054 if (name[1] == '_' &&
10055 name[2] == 'E' &&
10056 name[3] == 'N' &&
10057 name[4] == 'D' &&
10058 name[5] == '_' &&
10059 name[6] == '_')
10060 { /* __END__ */
10061 return KEY___END__;
10062 }
10063
10064 goto unknown;
10065
10066 case 'b':
10067 if (name[1] == 'i' &&
10068 name[2] == 'n' &&
10069 name[3] == 'm' &&
10070 name[4] == 'o' &&
10071 name[5] == 'd' &&
10072 name[6] == 'e')
10073 { /* binmode */
10074 return -KEY_binmode;
10075 }
10076
10077 goto unknown;
10078
10079 case 'c':
10080 if (name[1] == 'o' &&
10081 name[2] == 'n' &&
10082 name[3] == 'n' &&
10083 name[4] == 'e' &&
10084 name[5] == 'c' &&
10085 name[6] == 't')
10086 { /* connect */
10087 return -KEY_connect;
10088 }
10089
10090 goto unknown;
10091
10092 case 'd':
10093 switch (name[1])
10094 {
10095 case 'b':
10096 if (name[2] == 'm' &&
10097 name[3] == 'o' &&
10098 name[4] == 'p' &&
10099 name[5] == 'e' &&
10100 name[6] == 'n')
10101 { /* dbmopen */
10102 return -KEY_dbmopen;
10103 }
10104
10105 goto unknown;
10106
10107 case 'e':
0d863452
RH
10108 if (name[2] == 'f')
10109 {
10110 switch (name[3])
10111 {
10112 case 'a':
10113 if (name[4] == 'u' &&
10114 name[5] == 'l' &&
10115 name[6] == 't')
10116 { /* default */
5458a98a 10117 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10118 }
10119
10120 goto unknown;
10121
10122 case 'i':
10123 if (name[4] == 'n' &&
952306ac
RGS
10124 name[5] == 'e' &&
10125 name[6] == 'd')
10126 { /* defined */
10127 return KEY_defined;
10128 }
4c3bbe0f 10129
952306ac 10130 goto unknown;
4c3bbe0f 10131
952306ac
RGS
10132 default:
10133 goto unknown;
10134 }
0d863452
RH
10135 }
10136
10137 goto unknown;
10138
10139 default:
10140 goto unknown;
10141 }
4c3bbe0f
MHM
10142
10143 case 'f':
10144 if (name[1] == 'o' &&
10145 name[2] == 'r' &&
10146 name[3] == 'e' &&
10147 name[4] == 'a' &&
10148 name[5] == 'c' &&
10149 name[6] == 'h')
10150 { /* foreach */
10151 return KEY_foreach;
10152 }
10153
10154 goto unknown;
10155
10156 case 'g':
10157 if (name[1] == 'e' &&
10158 name[2] == 't' &&
10159 name[3] == 'p')
10160 {
10161 switch (name[4])
10162 {
10163 case 'g':
10164 if (name[5] == 'r' &&
10165 name[6] == 'p')
10166 { /* getpgrp */
10167 return -KEY_getpgrp;
10168 }
10169
10170 goto unknown;
10171
10172 case 'p':
10173 if (name[5] == 'i' &&
10174 name[6] == 'd')
10175 { /* getppid */
10176 return -KEY_getppid;
10177 }
10178
10179 goto unknown;
10180
10181 default:
10182 goto unknown;
10183 }
10184 }
10185
10186 goto unknown;
10187
10188 case 'l':
10189 if (name[1] == 'c' &&
10190 name[2] == 'f' &&
10191 name[3] == 'i' &&
10192 name[4] == 'r' &&
10193 name[5] == 's' &&
10194 name[6] == 't')
10195 { /* lcfirst */
10196 return -KEY_lcfirst;
10197 }
10198
10199 goto unknown;
10200
10201 case 'o':
10202 if (name[1] == 'p' &&
10203 name[2] == 'e' &&
10204 name[3] == 'n' &&
10205 name[4] == 'd' &&
10206 name[5] == 'i' &&
10207 name[6] == 'r')
10208 { /* opendir */
10209 return -KEY_opendir;
10210 }
10211
10212 goto unknown;
10213
10214 case 'p':
10215 if (name[1] == 'a' &&
10216 name[2] == 'c' &&
10217 name[3] == 'k' &&
10218 name[4] == 'a' &&
10219 name[5] == 'g' &&
10220 name[6] == 'e')
10221 { /* package */
10222 return KEY_package;
10223 }
10224
10225 goto unknown;
10226
10227 case 'r':
10228 if (name[1] == 'e')
10229 {
10230 switch (name[2])
10231 {
10232 case 'a':
10233 if (name[3] == 'd' &&
10234 name[4] == 'd' &&
10235 name[5] == 'i' &&
10236 name[6] == 'r')
10237 { /* readdir */
10238 return -KEY_readdir;
10239 }
10240
10241 goto unknown;
10242
10243 case 'q':
10244 if (name[3] == 'u' &&
10245 name[4] == 'i' &&
10246 name[5] == 'r' &&
10247 name[6] == 'e')
10248 { /* require */
10249 return KEY_require;
10250 }
10251
10252 goto unknown;
10253
10254 case 'v':
10255 if (name[3] == 'e' &&
10256 name[4] == 'r' &&
10257 name[5] == 's' &&
10258 name[6] == 'e')
10259 { /* reverse */
10260 return -KEY_reverse;
10261 }
10262
10263 goto unknown;
10264
10265 default:
10266 goto unknown;
10267 }
10268 }
10269
10270 goto unknown;
10271
10272 case 's':
10273 switch (name[1])
10274 {
10275 case 'e':
10276 switch (name[2])
10277 {
10278 case 'e':
10279 if (name[3] == 'k' &&
10280 name[4] == 'd' &&
10281 name[5] == 'i' &&
10282 name[6] == 'r')
10283 { /* seekdir */
10284 return -KEY_seekdir;
10285 }
10286
10287 goto unknown;
10288
10289 case 't':
10290 if (name[3] == 'p' &&
10291 name[4] == 'g' &&
10292 name[5] == 'r' &&
10293 name[6] == 'p')
10294 { /* setpgrp */
10295 return -KEY_setpgrp;
10296 }
10297
10298 goto unknown;
10299
10300 default:
10301 goto unknown;
10302 }
10303
10304 case 'h':
10305 if (name[2] == 'm' &&
10306 name[3] == 'r' &&
10307 name[4] == 'e' &&
10308 name[5] == 'a' &&
10309 name[6] == 'd')
10310 { /* shmread */
10311 return -KEY_shmread;
10312 }
10313
10314 goto unknown;
10315
10316 case 'p':
10317 if (name[2] == 'r' &&
10318 name[3] == 'i' &&
10319 name[4] == 'n' &&
10320 name[5] == 't' &&
10321 name[6] == 'f')
10322 { /* sprintf */
10323 return -KEY_sprintf;
10324 }
10325
10326 goto unknown;
10327
10328 case 'y':
10329 switch (name[2])
10330 {
10331 case 'm':
10332 if (name[3] == 'l' &&
10333 name[4] == 'i' &&
10334 name[5] == 'n' &&
10335 name[6] == 'k')
10336 { /* symlink */
10337 return -KEY_symlink;
10338 }
10339
10340 goto unknown;
10341
10342 case 's':
10343 switch (name[3])
10344 {
10345 case 'c':
10346 if (name[4] == 'a' &&
10347 name[5] == 'l' &&
10348 name[6] == 'l')
10349 { /* syscall */
10350 return -KEY_syscall;
10351 }
10352
10353 goto unknown;
10354
10355 case 'o':
10356 if (name[4] == 'p' &&
10357 name[5] == 'e' &&
10358 name[6] == 'n')
10359 { /* sysopen */
10360 return -KEY_sysopen;
10361 }
10362
10363 goto unknown;
10364
10365 case 'r':
10366 if (name[4] == 'e' &&
10367 name[5] == 'a' &&
10368 name[6] == 'd')
10369 { /* sysread */
10370 return -KEY_sysread;
10371 }
10372
10373 goto unknown;
10374
10375 case 's':
10376 if (name[4] == 'e' &&
10377 name[5] == 'e' &&
10378 name[6] == 'k')
10379 { /* sysseek */
10380 return -KEY_sysseek;
10381 }
10382
10383 goto unknown;
10384
10385 default:
10386 goto unknown;
10387 }
10388
10389 default:
10390 goto unknown;
10391 }
10392
10393 default:
10394 goto unknown;
10395 }
10396
10397 case 't':
10398 if (name[1] == 'e' &&
10399 name[2] == 'l' &&
10400 name[3] == 'l' &&
10401 name[4] == 'd' &&
10402 name[5] == 'i' &&
10403 name[6] == 'r')
10404 { /* telldir */
10405 return -KEY_telldir;
10406 }
10407
10408 goto unknown;
10409
10410 case 'u':
10411 switch (name[1])
10412 {
10413 case 'c':
10414 if (name[2] == 'f' &&
10415 name[3] == 'i' &&
10416 name[4] == 'r' &&
10417 name[5] == 's' &&
10418 name[6] == 't')
10419 { /* ucfirst */
10420 return -KEY_ucfirst;
10421 }
10422
10423 goto unknown;
10424
10425 case 'n':
10426 if (name[2] == 's' &&
10427 name[3] == 'h' &&
10428 name[4] == 'i' &&
10429 name[5] == 'f' &&
10430 name[6] == 't')
10431 { /* unshift */
10432 return -KEY_unshift;
10433 }
10434
10435 goto unknown;
10436
10437 default:
10438 goto unknown;
10439 }
10440
10441 case 'w':
10442 if (name[1] == 'a' &&
10443 name[2] == 'i' &&
10444 name[3] == 't' &&
10445 name[4] == 'p' &&
10446 name[5] == 'i' &&
10447 name[6] == 'd')
10448 { /* waitpid */
10449 return -KEY_waitpid;
10450 }
10451
10452 goto unknown;
10453
10454 default:
10455 goto unknown;
10456 }
10457
10458 case 8: /* 26 tokens of length 8 */
10459 switch (name[0])
10460 {
10461 case 'A':
10462 if (name[1] == 'U' &&
10463 name[2] == 'T' &&
10464 name[3] == 'O' &&
10465 name[4] == 'L' &&
10466 name[5] == 'O' &&
10467 name[6] == 'A' &&
10468 name[7] == 'D')
10469 { /* AUTOLOAD */
10470 return KEY_AUTOLOAD;
10471 }
10472
10473 goto unknown;
10474
10475 case '_':
10476 if (name[1] == '_')
10477 {
10478 switch (name[2])
10479 {
10480 case 'D':
10481 if (name[3] == 'A' &&
10482 name[4] == 'T' &&
10483 name[5] == 'A' &&
10484 name[6] == '_' &&
10485 name[7] == '_')
10486 { /* __DATA__ */
10487 return KEY___DATA__;
10488 }
10489
10490 goto unknown;
10491
10492 case 'F':
10493 if (name[3] == 'I' &&
10494 name[4] == 'L' &&
10495 name[5] == 'E' &&
10496 name[6] == '_' &&
10497 name[7] == '_')
10498 { /* __FILE__ */
10499 return -KEY___FILE__;
10500 }
10501
10502 goto unknown;
10503
10504 case 'L':
10505 if (name[3] == 'I' &&
10506 name[4] == 'N' &&
10507 name[5] == 'E' &&
10508 name[6] == '_' &&
10509 name[7] == '_')
10510 { /* __LINE__ */
10511 return -KEY___LINE__;
10512 }
10513
10514 goto unknown;
10515
10516 default:
10517 goto unknown;
10518 }
10519 }
10520
10521 goto unknown;
10522
10523 case 'c':
10524 switch (name[1])
10525 {
10526 case 'l':
10527 if (name[2] == 'o' &&
10528 name[3] == 's' &&
10529 name[4] == 'e' &&
10530 name[5] == 'd' &&
10531 name[6] == 'i' &&
10532 name[7] == 'r')
10533 { /* closedir */
10534 return -KEY_closedir;
10535 }
10536
10537 goto unknown;
10538
10539 case 'o':
10540 if (name[2] == 'n' &&
10541 name[3] == 't' &&
10542 name[4] == 'i' &&
10543 name[5] == 'n' &&
10544 name[6] == 'u' &&
10545 name[7] == 'e')
10546 { /* continue */
10547 return -KEY_continue;
10548 }
10549
10550 goto unknown;
10551
10552 default:
10553 goto unknown;
10554 }
10555
10556 case 'd':
10557 if (name[1] == 'b' &&
10558 name[2] == 'm' &&
10559 name[3] == 'c' &&
10560 name[4] == 'l' &&
10561 name[5] == 'o' &&
10562 name[6] == 's' &&
10563 name[7] == 'e')
10564 { /* dbmclose */
10565 return -KEY_dbmclose;
10566 }
10567
10568 goto unknown;
10569
10570 case 'e':
10571 if (name[1] == 'n' &&
10572 name[2] == 'd')
10573 {
10574 switch (name[3])
10575 {
10576 case 'g':
10577 if (name[4] == 'r' &&
10578 name[5] == 'e' &&
10579 name[6] == 'n' &&
10580 name[7] == 't')
10581 { /* endgrent */
10582 return -KEY_endgrent;
10583 }
10584
10585 goto unknown;
10586
10587 case 'p':
10588 if (name[4] == 'w' &&
10589 name[5] == 'e' &&
10590 name[6] == 'n' &&
10591 name[7] == 't')
10592 { /* endpwent */
10593 return -KEY_endpwent;
10594 }
10595
10596 goto unknown;
10597
10598 default:
10599 goto unknown;
10600 }
10601 }
10602
10603 goto unknown;
10604
10605 case 'f':
10606 if (name[1] == 'o' &&
10607 name[2] == 'r' &&
10608 name[3] == 'm' &&
10609 name[4] == 'l' &&
10610 name[5] == 'i' &&
10611 name[6] == 'n' &&
10612 name[7] == 'e')
10613 { /* formline */
10614 return -KEY_formline;
10615 }
10616
10617 goto unknown;
10618
10619 case 'g':
10620 if (name[1] == 'e' &&
10621 name[2] == 't')
10622 {
10623 switch (name[3])
10624 {
10625 case 'g':
10626 if (name[4] == 'r')
10627 {
10628 switch (name[5])
10629 {
10630 case 'e':
10631 if (name[6] == 'n' &&
10632 name[7] == 't')
10633 { /* getgrent */
10634 return -KEY_getgrent;
10635 }
10636
10637 goto unknown;
10638
10639 case 'g':
10640 if (name[6] == 'i' &&
10641 name[7] == 'd')
10642 { /* getgrgid */
10643 return -KEY_getgrgid;
10644 }
10645
10646 goto unknown;
10647
10648 case 'n':
10649 if (name[6] == 'a' &&
10650 name[7] == 'm')
10651 { /* getgrnam */
10652 return -KEY_getgrnam;
10653 }
10654
10655 goto unknown;
10656
10657 default:
10658 goto unknown;
10659 }
10660 }
10661
10662 goto unknown;
10663
10664 case 'l':
10665 if (name[4] == 'o' &&
10666 name[5] == 'g' &&
10667 name[6] == 'i' &&
10668 name[7] == 'n')
10669 { /* getlogin */
10670 return -KEY_getlogin;
10671 }
10672
10673 goto unknown;
10674
10675 case 'p':
10676 if (name[4] == 'w')
10677 {
10678 switch (name[5])
10679 {
10680 case 'e':
10681 if (name[6] == 'n' &&
10682 name[7] == 't')
10683 { /* getpwent */
10684 return -KEY_getpwent;
10685 }
10686
10687 goto unknown;
10688
10689 case 'n':
10690 if (name[6] == 'a' &&
10691 name[7] == 'm')
10692 { /* getpwnam */
10693 return -KEY_getpwnam;
10694 }
10695
10696 goto unknown;
10697
10698 case 'u':
10699 if (name[6] == 'i' &&
10700 name[7] == 'd')
10701 { /* getpwuid */
10702 return -KEY_getpwuid;
10703 }
10704
10705 goto unknown;
10706
10707 default:
10708 goto unknown;
10709 }
10710 }
10711
10712 goto unknown;
10713
10714 default:
10715 goto unknown;
10716 }
10717 }
10718
10719 goto unknown;
10720
10721 case 'r':
10722 if (name[1] == 'e' &&
10723 name[2] == 'a' &&
10724 name[3] == 'd')
10725 {
10726 switch (name[4])
10727 {
10728 case 'l':
10729 if (name[5] == 'i' &&
10730 name[6] == 'n')
10731 {
10732 switch (name[7])
10733 {
10734 case 'e':
10735 { /* readline */
10736 return -KEY_readline;
10737 }
10738
4c3bbe0f
MHM
10739 case 'k':
10740 { /* readlink */
10741 return -KEY_readlink;
10742 }
10743
4c3bbe0f
MHM
10744 default:
10745 goto unknown;
10746 }
10747 }
10748
10749 goto unknown;
10750
10751 case 'p':
10752 if (name[5] == 'i' &&
10753 name[6] == 'p' &&
10754 name[7] == 'e')
10755 { /* readpipe */
10756 return -KEY_readpipe;
10757 }
10758
10759 goto unknown;
10760
10761 default:
10762 goto unknown;
10763 }
10764 }
10765
10766 goto unknown;
10767
10768 case 's':
10769 switch (name[1])
10770 {
10771 case 'e':
10772 if (name[2] == 't')
10773 {
10774 switch (name[3])
10775 {
10776 case 'g':
10777 if (name[4] == 'r' &&
10778 name[5] == 'e' &&
10779 name[6] == 'n' &&
10780 name[7] == 't')
10781 { /* setgrent */
10782 return -KEY_setgrent;
10783 }
10784
10785 goto unknown;
10786
10787 case 'p':
10788 if (name[4] == 'w' &&
10789 name[5] == 'e' &&
10790 name[6] == 'n' &&
10791 name[7] == 't')
10792 { /* setpwent */
10793 return -KEY_setpwent;
10794 }
10795
10796 goto unknown;
10797
10798 default:
10799 goto unknown;
10800 }
10801 }
10802
10803 goto unknown;
10804
10805 case 'h':
10806 switch (name[2])
10807 {
10808 case 'm':
10809 if (name[3] == 'w' &&
10810 name[4] == 'r' &&
10811 name[5] == 'i' &&
10812 name[6] == 't' &&
10813 name[7] == 'e')
10814 { /* shmwrite */
10815 return -KEY_shmwrite;
10816 }
10817
10818 goto unknown;
10819
10820 case 'u':
10821 if (name[3] == 't' &&
10822 name[4] == 'd' &&
10823 name[5] == 'o' &&
10824 name[6] == 'w' &&
10825 name[7] == 'n')
10826 { /* shutdown */
10827 return -KEY_shutdown;
10828 }
10829
10830 goto unknown;
10831
10832 default:
10833 goto unknown;
10834 }
10835
10836 case 'y':
10837 if (name[2] == 's' &&
10838 name[3] == 'w' &&
10839 name[4] == 'r' &&
10840 name[5] == 'i' &&
10841 name[6] == 't' &&
10842 name[7] == 'e')
10843 { /* syswrite */
10844 return -KEY_syswrite;
10845 }
10846
10847 goto unknown;
10848
10849 default:
10850 goto unknown;
10851 }
10852
10853 case 't':
10854 if (name[1] == 'r' &&
10855 name[2] == 'u' &&
10856 name[3] == 'n' &&
10857 name[4] == 'c' &&
10858 name[5] == 'a' &&
10859 name[6] == 't' &&
10860 name[7] == 'e')
10861 { /* truncate */
10862 return -KEY_truncate;
10863 }
10864
10865 goto unknown;
10866
10867 default:
10868 goto unknown;
10869 }
10870
3c10abe3 10871 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10872 switch (name[0])
10873 {
3c10abe3
AG
10874 case 'U':
10875 if (name[1] == 'N' &&
10876 name[2] == 'I' &&
10877 name[3] == 'T' &&
10878 name[4] == 'C' &&
10879 name[5] == 'H' &&
10880 name[6] == 'E' &&
10881 name[7] == 'C' &&
10882 name[8] == 'K')
10883 { /* UNITCHECK */
10884 return KEY_UNITCHECK;
10885 }
10886
10887 goto unknown;
10888
4c3bbe0f
MHM
10889 case 'e':
10890 if (name[1] == 'n' &&
10891 name[2] == 'd' &&
10892 name[3] == 'n' &&
10893 name[4] == 'e' &&
10894 name[5] == 't' &&
10895 name[6] == 'e' &&
10896 name[7] == 'n' &&
10897 name[8] == 't')
10898 { /* endnetent */
10899 return -KEY_endnetent;
10900 }
10901
10902 goto unknown;
10903
10904 case 'g':
10905 if (name[1] == 'e' &&
10906 name[2] == 't' &&
10907 name[3] == 'n' &&
10908 name[4] == 'e' &&
10909 name[5] == 't' &&
10910 name[6] == 'e' &&
10911 name[7] == 'n' &&
10912 name[8] == 't')
10913 { /* getnetent */
10914 return -KEY_getnetent;
10915 }
10916
10917 goto unknown;
10918
10919 case 'l':
10920 if (name[1] == 'o' &&
10921 name[2] == 'c' &&
10922 name[3] == 'a' &&
10923 name[4] == 'l' &&
10924 name[5] == 't' &&
10925 name[6] == 'i' &&
10926 name[7] == 'm' &&
10927 name[8] == 'e')
10928 { /* localtime */
10929 return -KEY_localtime;
10930 }
10931
10932 goto unknown;
10933
10934 case 'p':
10935 if (name[1] == 'r' &&
10936 name[2] == 'o' &&
10937 name[3] == 't' &&
10938 name[4] == 'o' &&
10939 name[5] == 't' &&
10940 name[6] == 'y' &&
10941 name[7] == 'p' &&
10942 name[8] == 'e')
10943 { /* prototype */
10944 return KEY_prototype;
10945 }
10946
10947 goto unknown;
10948
10949 case 'q':
10950 if (name[1] == 'u' &&
10951 name[2] == 'o' &&
10952 name[3] == 't' &&
10953 name[4] == 'e' &&
10954 name[5] == 'm' &&
10955 name[6] == 'e' &&
10956 name[7] == 't' &&
10957 name[8] == 'a')
10958 { /* quotemeta */
10959 return -KEY_quotemeta;
10960 }
10961
10962 goto unknown;
10963
10964 case 'r':
10965 if (name[1] == 'e' &&
10966 name[2] == 'w' &&
10967 name[3] == 'i' &&
10968 name[4] == 'n' &&
10969 name[5] == 'd' &&
10970 name[6] == 'd' &&
10971 name[7] == 'i' &&
10972 name[8] == 'r')
10973 { /* rewinddir */
10974 return -KEY_rewinddir;
10975 }
10976
10977 goto unknown;
10978
10979 case 's':
10980 if (name[1] == 'e' &&
10981 name[2] == 't' &&
10982 name[3] == 'n' &&
10983 name[4] == 'e' &&
10984 name[5] == 't' &&
10985 name[6] == 'e' &&
10986 name[7] == 'n' &&
10987 name[8] == 't')
10988 { /* setnetent */
10989 return -KEY_setnetent;
10990 }
10991
10992 goto unknown;
10993
10994 case 'w':
10995 if (name[1] == 'a' &&
10996 name[2] == 'n' &&
10997 name[3] == 't' &&
10998 name[4] == 'a' &&
10999 name[5] == 'r' &&
11000 name[6] == 'r' &&
11001 name[7] == 'a' &&
11002 name[8] == 'y')
11003 { /* wantarray */
11004 return -KEY_wantarray;
11005 }
11006
11007 goto unknown;
11008
11009 default:
11010 goto unknown;
11011 }
11012
11013 case 10: /* 9 tokens of length 10 */
11014 switch (name[0])
11015 {
11016 case 'e':
11017 if (name[1] == 'n' &&
11018 name[2] == 'd')
11019 {
11020 switch (name[3])
11021 {
11022 case 'h':
11023 if (name[4] == 'o' &&
11024 name[5] == 's' &&
11025 name[6] == 't' &&
11026 name[7] == 'e' &&
11027 name[8] == 'n' &&
11028 name[9] == 't')
11029 { /* endhostent */
11030 return -KEY_endhostent;
11031 }
11032
11033 goto unknown;
11034
11035 case 's':
11036 if (name[4] == 'e' &&
11037 name[5] == 'r' &&
11038 name[6] == 'v' &&
11039 name[7] == 'e' &&
11040 name[8] == 'n' &&
11041 name[9] == 't')
11042 { /* endservent */
11043 return -KEY_endservent;
11044 }
11045
11046 goto unknown;
11047
11048 default:
11049 goto unknown;
11050 }
11051 }
11052
11053 goto unknown;
11054
11055 case 'g':
11056 if (name[1] == 'e' &&
11057 name[2] == 't')
11058 {
11059 switch (name[3])
11060 {
11061 case 'h':
11062 if (name[4] == 'o' &&
11063 name[5] == 's' &&
11064 name[6] == 't' &&
11065 name[7] == 'e' &&
11066 name[8] == 'n' &&
11067 name[9] == 't')
11068 { /* gethostent */
11069 return -KEY_gethostent;
11070 }
11071
11072 goto unknown;
11073
11074 case 's':
11075 switch (name[4])
11076 {
11077 case 'e':
11078 if (name[5] == 'r' &&
11079 name[6] == 'v' &&
11080 name[7] == 'e' &&
11081 name[8] == 'n' &&
11082 name[9] == 't')
11083 { /* getservent */
11084 return -KEY_getservent;
11085 }
11086
11087 goto unknown;
11088
11089 case 'o':
11090 if (name[5] == 'c' &&
11091 name[6] == 'k' &&
11092 name[7] == 'o' &&
11093 name[8] == 'p' &&
11094 name[9] == 't')
11095 { /* getsockopt */
11096 return -KEY_getsockopt;
11097 }
11098
11099 goto unknown;
11100
11101 default:
11102 goto unknown;
11103 }
11104
11105 default:
11106 goto unknown;
11107 }
11108 }
11109
11110 goto unknown;
11111
11112 case 's':
11113 switch (name[1])
11114 {
11115 case 'e':
11116 if (name[2] == 't')
11117 {
11118 switch (name[3])
11119 {
11120 case 'h':
11121 if (name[4] == 'o' &&
11122 name[5] == 's' &&
11123 name[6] == 't' &&
11124 name[7] == 'e' &&
11125 name[8] == 'n' &&
11126 name[9] == 't')
11127 { /* sethostent */
11128 return -KEY_sethostent;
11129 }
11130
11131 goto unknown;
11132
11133 case 's':
11134 switch (name[4])
11135 {
11136 case 'e':
11137 if (name[5] == 'r' &&
11138 name[6] == 'v' &&
11139 name[7] == 'e' &&
11140 name[8] == 'n' &&
11141 name[9] == 't')
11142 { /* setservent */
11143 return -KEY_setservent;
11144 }
11145
11146 goto unknown;
11147
11148 case 'o':
11149 if (name[5] == 'c' &&
11150 name[6] == 'k' &&
11151 name[7] == 'o' &&
11152 name[8] == 'p' &&
11153 name[9] == 't')
11154 { /* setsockopt */
11155 return -KEY_setsockopt;
11156 }
11157
11158 goto unknown;
11159
11160 default:
11161 goto unknown;
11162 }
11163
11164 default:
11165 goto unknown;
11166 }
11167 }
11168
11169 goto unknown;
11170
11171 case 'o':
11172 if (name[2] == 'c' &&
11173 name[3] == 'k' &&
11174 name[4] == 'e' &&
11175 name[5] == 't' &&
11176 name[6] == 'p' &&
11177 name[7] == 'a' &&
11178 name[8] == 'i' &&
11179 name[9] == 'r')
11180 { /* socketpair */
11181 return -KEY_socketpair;
11182 }
11183
11184 goto unknown;
11185
11186 default:
11187 goto unknown;
11188 }
11189
11190 default:
11191 goto unknown;
e2e1dd5a 11192 }
4c3bbe0f
MHM
11193
11194 case 11: /* 8 tokens of length 11 */
11195 switch (name[0])
11196 {
11197 case '_':
11198 if (name[1] == '_' &&
11199 name[2] == 'P' &&
11200 name[3] == 'A' &&
11201 name[4] == 'C' &&
11202 name[5] == 'K' &&
11203 name[6] == 'A' &&
11204 name[7] == 'G' &&
11205 name[8] == 'E' &&
11206 name[9] == '_' &&
11207 name[10] == '_')
11208 { /* __PACKAGE__ */
11209 return -KEY___PACKAGE__;
11210 }
11211
11212 goto unknown;
11213
11214 case 'e':
11215 if (name[1] == 'n' &&
11216 name[2] == 'd' &&
11217 name[3] == 'p' &&
11218 name[4] == 'r' &&
11219 name[5] == 'o' &&
11220 name[6] == 't' &&
11221 name[7] == 'o' &&
11222 name[8] == 'e' &&
11223 name[9] == 'n' &&
11224 name[10] == 't')
11225 { /* endprotoent */
11226 return -KEY_endprotoent;
11227 }
11228
11229 goto unknown;
11230
11231 case 'g':
11232 if (name[1] == 'e' &&
11233 name[2] == 't')
11234 {
11235 switch (name[3])
11236 {
11237 case 'p':
11238 switch (name[4])
11239 {
11240 case 'e':
11241 if (name[5] == 'e' &&
11242 name[6] == 'r' &&
11243 name[7] == 'n' &&
11244 name[8] == 'a' &&
11245 name[9] == 'm' &&
11246 name[10] == 'e')
11247 { /* getpeername */
11248 return -KEY_getpeername;
11249 }
11250
11251 goto unknown;
11252
11253 case 'r':
11254 switch (name[5])
11255 {
11256 case 'i':
11257 if (name[6] == 'o' &&
11258 name[7] == 'r' &&
11259 name[8] == 'i' &&
11260 name[9] == 't' &&
11261 name[10] == 'y')
11262 { /* getpriority */
11263 return -KEY_getpriority;
11264 }
11265
11266 goto unknown;
11267
11268 case 'o':
11269 if (name[6] == 't' &&
11270 name[7] == 'o' &&
11271 name[8] == 'e' &&
11272 name[9] == 'n' &&
11273 name[10] == 't')
11274 { /* getprotoent */
11275 return -KEY_getprotoent;
11276 }
11277
11278 goto unknown;
11279
11280 default:
11281 goto unknown;
11282 }
11283
11284 default:
11285 goto unknown;
11286 }
11287
11288 case 's':
11289 if (name[4] == 'o' &&
11290 name[5] == 'c' &&
11291 name[6] == 'k' &&
11292 name[7] == 'n' &&
11293 name[8] == 'a' &&
11294 name[9] == 'm' &&
11295 name[10] == 'e')
11296 { /* getsockname */
11297 return -KEY_getsockname;
11298 }
11299
11300 goto unknown;
11301
11302 default:
11303 goto unknown;
11304 }
11305 }
11306
11307 goto unknown;
11308
11309 case 's':
11310 if (name[1] == 'e' &&
11311 name[2] == 't' &&
11312 name[3] == 'p' &&
11313 name[4] == 'r')
11314 {
11315 switch (name[5])
11316 {
11317 case 'i':
11318 if (name[6] == 'o' &&
11319 name[7] == 'r' &&
11320 name[8] == 'i' &&
11321 name[9] == 't' &&
11322 name[10] == 'y')
11323 { /* setpriority */
11324 return -KEY_setpriority;
11325 }
11326
11327 goto unknown;
11328
11329 case 'o':
11330 if (name[6] == 't' &&
11331 name[7] == 'o' &&
11332 name[8] == 'e' &&
11333 name[9] == 'n' &&
11334 name[10] == 't')
11335 { /* setprotoent */
11336 return -KEY_setprotoent;
11337 }
11338
11339 goto unknown;
11340
11341 default:
11342 goto unknown;
11343 }
11344 }
11345
11346 goto unknown;
11347
11348 default:
11349 goto unknown;
e2e1dd5a 11350 }
4c3bbe0f
MHM
11351
11352 case 12: /* 2 tokens of length 12 */
11353 if (name[0] == 'g' &&
11354 name[1] == 'e' &&
11355 name[2] == 't' &&
11356 name[3] == 'n' &&
11357 name[4] == 'e' &&
11358 name[5] == 't' &&
11359 name[6] == 'b' &&
11360 name[7] == 'y')
11361 {
11362 switch (name[8])
11363 {
11364 case 'a':
11365 if (name[9] == 'd' &&
11366 name[10] == 'd' &&
11367 name[11] == 'r')
11368 { /* getnetbyaddr */
11369 return -KEY_getnetbyaddr;
11370 }
11371
11372 goto unknown;
11373
11374 case 'n':
11375 if (name[9] == 'a' &&
11376 name[10] == 'm' &&
11377 name[11] == 'e')
11378 { /* getnetbyname */
11379 return -KEY_getnetbyname;
11380 }
11381
11382 goto unknown;
11383
11384 default:
11385 goto unknown;
11386 }
e2e1dd5a 11387 }
4c3bbe0f
MHM
11388
11389 goto unknown;
11390
11391 case 13: /* 4 tokens of length 13 */
11392 if (name[0] == 'g' &&
11393 name[1] == 'e' &&
11394 name[2] == 't')
11395 {
11396 switch (name[3])
11397 {
11398 case 'h':
11399 if (name[4] == 'o' &&
11400 name[5] == 's' &&
11401 name[6] == 't' &&
11402 name[7] == 'b' &&
11403 name[8] == 'y')
11404 {
11405 switch (name[9])
11406 {
11407 case 'a':
11408 if (name[10] == 'd' &&
11409 name[11] == 'd' &&
11410 name[12] == 'r')
11411 { /* gethostbyaddr */
11412 return -KEY_gethostbyaddr;
11413 }
11414
11415 goto unknown;
11416
11417 case 'n':
11418 if (name[10] == 'a' &&
11419 name[11] == 'm' &&
11420 name[12] == 'e')
11421 { /* gethostbyname */
11422 return -KEY_gethostbyname;
11423 }
11424
11425 goto unknown;
11426
11427 default:
11428 goto unknown;
11429 }
11430 }
11431
11432 goto unknown;
11433
11434 case 's':
11435 if (name[4] == 'e' &&
11436 name[5] == 'r' &&
11437 name[6] == 'v' &&
11438 name[7] == 'b' &&
11439 name[8] == 'y')
11440 {
11441 switch (name[9])
11442 {
11443 case 'n':
11444 if (name[10] == 'a' &&
11445 name[11] == 'm' &&
11446 name[12] == 'e')
11447 { /* getservbyname */
11448 return -KEY_getservbyname;
11449 }
11450
11451 goto unknown;
11452
11453 case 'p':
11454 if (name[10] == 'o' &&
11455 name[11] == 'r' &&
11456 name[12] == 't')
11457 { /* getservbyport */
11458 return -KEY_getservbyport;
11459 }
11460
11461 goto unknown;
11462
11463 default:
11464 goto unknown;
11465 }
11466 }
11467
11468 goto unknown;
11469
11470 default:
11471 goto unknown;
11472 }
e2e1dd5a 11473 }
4c3bbe0f
MHM
11474
11475 goto unknown;
11476
11477 case 14: /* 1 tokens of length 14 */
11478 if (name[0] == 'g' &&
11479 name[1] == 'e' &&
11480 name[2] == 't' &&
11481 name[3] == 'p' &&
11482 name[4] == 'r' &&
11483 name[5] == 'o' &&
11484 name[6] == 't' &&
11485 name[7] == 'o' &&
11486 name[8] == 'b' &&
11487 name[9] == 'y' &&
11488 name[10] == 'n' &&
11489 name[11] == 'a' &&
11490 name[12] == 'm' &&
11491 name[13] == 'e')
11492 { /* getprotobyname */
11493 return -KEY_getprotobyname;
11494 }
11495
11496 goto unknown;
11497
11498 case 16: /* 1 tokens of length 16 */
11499 if (name[0] == 'g' &&
11500 name[1] == 'e' &&
11501 name[2] == 't' &&
11502 name[3] == 'p' &&
11503 name[4] == 'r' &&
11504 name[5] == 'o' &&
11505 name[6] == 't' &&
11506 name[7] == 'o' &&
11507 name[8] == 'b' &&
11508 name[9] == 'y' &&
11509 name[10] == 'n' &&
11510 name[11] == 'u' &&
11511 name[12] == 'm' &&
11512 name[13] == 'b' &&
11513 name[14] == 'e' &&
11514 name[15] == 'r')
11515 { /* getprotobynumber */
11516 return -KEY_getprotobynumber;
11517 }
11518
11519 goto unknown;
11520
11521 default:
11522 goto unknown;
e2e1dd5a 11523 }
4c3bbe0f
MHM
11524
11525unknown:
e2e1dd5a 11526 return 0;
a687059c
LW
11527}
11528
76e3520e 11529STATIC void
c94115d8 11530S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11531{
97aff369 11532 dVAR;
2f3197b3 11533
7918f24d
NC
11534 PERL_ARGS_ASSERT_CHECKCOMMA;
11535
d008e5eb 11536 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11537 if (ckWARN(WARN_SYNTAX)) {
11538 int level = 1;
26ff0806 11539 const char *w;
d008e5eb
GS
11540 for (w = s+2; *w && level; w++) {
11541 if (*w == '(')
11542 ++level;
11543 else if (*w == ')')
11544 --level;
11545 }
888fea98
NC
11546 while (isSPACE(*w))
11547 ++w;
b1439985
RGS
11548 /* the list of chars below is for end of statements or
11549 * block / parens, boolean operators (&&, ||, //) and branch
11550 * constructs (or, and, if, until, unless, while, err, for).
11551 * Not a very solid hack... */
11552 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11553 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11554 "%s (...) interpreted as function",name);
d008e5eb 11555 }
2f3197b3 11556 }
3280af22 11557 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11558 s++;
a687059c
LW
11559 if (*s == '(')
11560 s++;
3280af22 11561 while (s < PL_bufend && isSPACE(*s))
a687059c 11562 s++;
7e2040f0 11563 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11564 const char * const w = s++;
7e2040f0 11565 while (isALNUM_lazy_if(s,UTF))
a687059c 11566 s++;
3280af22 11567 while (s < PL_bufend && isSPACE(*s))
a687059c 11568 s++;
e929a76b 11569 if (*s == ',') {
c94115d8 11570 GV* gv;
5458a98a 11571 if (keyword(w, s - w, 0))
e929a76b 11572 return;
c94115d8
NC
11573
11574 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11575 if (gv && GvCVu(gv))
abbb3198 11576 return;
cea2e8a9 11577 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11578 }
11579 }
11580}
11581
423cee85
JH
11582/* Either returns sv, or mortalizes sv and returns a new SV*.
11583 Best used as sv=new_constant(..., sv, ...).
11584 If s, pv are NULL, calls subroutine with one argument,
11585 and type is used with error messages only. */
11586
b3ac6de7 11587STATIC SV *
eb0d8d16
NC
11588S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11589 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11590{
27da23d5 11591 dVAR; dSP;
890ce7af 11592 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11593 SV *res;
b3ac6de7
IZ
11594 SV **cvp;
11595 SV *cv, *typesv;
89e33a05 11596 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11597
7918f24d
NC
11598 PERL_ARGS_ASSERT_NEW_CONSTANT;
11599
f0af216f 11600 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11601 SV *msg;
11602
10edeb5d
JH
11603 why2 = (const char *)
11604 (strEQ(key,"charnames")
11605 ? "(possibly a missing \"use charnames ...\")"
11606 : "");
4e553d73 11607 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11608 (type ? type: "undef"), why2);
11609
11610 /* This is convoluted and evil ("goto considered harmful")
11611 * but I do not understand the intricacies of all the different
11612 * failure modes of %^H in here. The goal here is to make
11613 * the most probable error message user-friendly. --jhi */
11614
11615 goto msgdone;
11616
423cee85 11617 report:
4e553d73 11618 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11619 (type ? type: "undef"), why1, why2, why3);
41ab332f 11620 msgdone:
95a20fc0 11621 yyerror(SvPVX_const(msg));
423cee85
JH
11622 SvREFCNT_dec(msg);
11623 return sv;
11624 }
ff3f963a
KW
11625
11626 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11627 if (PL_error_count > 0 && strEQ(key,"charnames"))
11628 return &PL_sv_undef;
ff3f963a 11629
eb0d8d16 11630 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11631 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11632 why1 = "$^H{";
11633 why2 = key;
f0af216f 11634 why3 = "} is not defined";
423cee85 11635 goto report;
b3ac6de7
IZ
11636 }
11637 sv_2mortal(sv); /* Parent created it permanently */
11638 cv = *cvp;
423cee85 11639 if (!pv && s)
59cd0e26 11640 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11641 if (type && pv)
59cd0e26 11642 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11643 else
423cee85 11644 typesv = &PL_sv_undef;
4e553d73 11645
e788e7d3 11646 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11647 ENTER ;
11648 SAVETMPS;
4e553d73 11649
423cee85 11650 PUSHMARK(SP) ;
a5845cb7 11651 EXTEND(sp, 3);
423cee85
JH
11652 if (pv)
11653 PUSHs(pv);
b3ac6de7 11654 PUSHs(sv);
423cee85
JH
11655 if (pv)
11656 PUSHs(typesv);
b3ac6de7 11657 PUTBACK;
423cee85 11658 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11659
423cee85 11660 SPAGAIN ;
4e553d73 11661
423cee85 11662 /* Check the eval first */
9b0e499b 11663 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11664 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11665 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11666 (void)POPs;
b37c2d43 11667 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11668 }
11669 else {
11670 res = POPs;
b37c2d43 11671 SvREFCNT_inc_simple_void(res);
423cee85 11672 }
4e553d73 11673
423cee85
JH
11674 PUTBACK ;
11675 FREETMPS ;
11676 LEAVE ;
b3ac6de7 11677 POPSTACK;
4e553d73 11678
b3ac6de7 11679 if (!SvOK(res)) {
423cee85
JH
11680 why1 = "Call to &{$^H{";
11681 why2 = key;
f0af216f 11682 why3 = "}} did not return a defined value";
423cee85
JH
11683 sv = res;
11684 goto report;
9b0e499b 11685 }
423cee85 11686
9b0e499b 11687 return res;
b3ac6de7 11688}
4e553d73 11689
d0a148a6
NC
11690/* Returns a NUL terminated string, with the length of the string written to
11691 *slp
11692 */
76e3520e 11693STATIC char *
cea2e8a9 11694S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11695{
97aff369 11696 dVAR;
463ee0b2 11697 register char *d = dest;
890ce7af 11698 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11699
11700 PERL_ARGS_ASSERT_SCAN_WORD;
11701
463ee0b2 11702 for (;;) {
8903cb82 11703 if (d >= e)
cea2e8a9 11704 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11705 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11706 *d++ = *s++;
c35e046a 11707 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11708 *d++ = ':';
11709 *d++ = ':';
11710 s++;
11711 }
c35e046a 11712 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11713 *d++ = *s++;
11714 *d++ = *s++;
11715 }
fd400ab9 11716 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11717 char *t = s + UTF8SKIP(s);
c35e046a 11718 size_t len;
fd400ab9 11719 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11720 t += UTF8SKIP(t);
c35e046a
AL
11721 len = t - s;
11722 if (d + len > e)
cea2e8a9 11723 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11724 Copy(s, d, len, char);
11725 d += len;
a0ed51b3
LW
11726 s = t;
11727 }
463ee0b2
LW
11728 else {
11729 *d = '\0';
11730 *slp = d - dest;
11731 return s;
e929a76b 11732 }
378cc40b
LW
11733 }
11734}
11735
76e3520e 11736STATIC char *
f54cb97a 11737S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11738{
97aff369 11739 dVAR;
6136c704 11740 char *bracket = NULL;
748a9306 11741 char funny = *s++;
6136c704 11742 register char *d = dest;
0b3da58d 11743 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11744
7918f24d
NC
11745 PERL_ARGS_ASSERT_SCAN_IDENT;
11746
a0d0e21e 11747 if (isSPACE(*s))
29595ff2 11748 s = PEEKSPACE(s);
de3bb511 11749 if (isDIGIT(*s)) {
8903cb82 11750 while (isDIGIT(*s)) {
11751 if (d >= e)
cea2e8a9 11752 Perl_croak(aTHX_ ident_too_long);
378cc40b 11753 *d++ = *s++;
8903cb82 11754 }
378cc40b
LW
11755 }
11756 else {
463ee0b2 11757 for (;;) {
8903cb82 11758 if (d >= e)
cea2e8a9 11759 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11760 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11761 *d++ = *s++;
7e2040f0 11762 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11763 *d++ = ':';
11764 *d++ = ':';
11765 s++;
11766 }
a0d0e21e 11767 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11768 *d++ = *s++;
11769 *d++ = *s++;
11770 }
fd400ab9 11771 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11772 char *t = s + UTF8SKIP(s);
fd400ab9 11773 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11774 t += UTF8SKIP(t);
11775 if (d + (t - s) > e)
cea2e8a9 11776 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11777 Copy(s, d, t - s, char);
11778 d += t - s;
11779 s = t;
11780 }
463ee0b2
LW
11781 else
11782 break;
11783 }
378cc40b
LW
11784 }
11785 *d = '\0';
11786 d = dest;
79072805 11787 if (*d) {
3280af22
NIS
11788 if (PL_lex_state != LEX_NORMAL)
11789 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11790 return s;
378cc40b 11791 }
748a9306 11792 if (*s == '$' && s[1] &&
3792a11b 11793 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11794 {
4810e5ec 11795 return s;
5cd24f17 11796 }
79072805
LW
11797 if (*s == '{') {
11798 bracket = s;
11799 s++;
11800 }
11801 else if (ck_uni)
11802 check_uni();
93a17b20 11803 if (s < send)
79072805
LW
11804 *d = *s++;
11805 d[1] = '\0';
2b92dfce 11806 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11807 *d = toCTRL(*s);
11808 s++;
de3bb511 11809 }
79072805 11810 if (bracket) {
748a9306 11811 if (isSPACE(s[-1])) {
fa83b5b6 11812 while (s < send) {
f54cb97a 11813 const char ch = *s++;
bf4acbe4 11814 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11815 *d = ch;
11816 break;
11817 }
11818 }
748a9306 11819 }
7e2040f0 11820 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11821 d++;
a0ed51b3 11822 if (UTF) {
6136c704
AL
11823 char *end = s;
11824 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11825 end += UTF8SKIP(end);
11826 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11827 end += UTF8SKIP(end);
a0ed51b3 11828 }
6136c704
AL
11829 Copy(s, d, end - s, char);
11830 d += end - s;
11831 s = end;
a0ed51b3
LW
11832 }
11833 else {
2b92dfce 11834 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11835 *d++ = *s++;
2b92dfce 11836 if (d >= e)
cea2e8a9 11837 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11838 }
79072805 11839 *d = '\0';
c35e046a
AL
11840 while (s < send && SPACE_OR_TAB(*s))
11841 s++;
ff68c719 11842 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11843 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11844 const char * const brack =
11845 (const char *)
11846 ((*s == '[') ? "[...]" : "{...}");
9014280d 11847 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11848 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11849 funny, dest, brack, funny, dest, brack);
11850 }
79072805 11851 bracket++;
a0be28da 11852 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11853 return s;
11854 }
4e553d73
NIS
11855 }
11856 /* Handle extended ${^Foo} variables
2b92dfce
GS
11857 * 1999-02-27 mjd-perl-patch@plover.com */
11858 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11859 && isALNUM(*s))
11860 {
11861 d++;
11862 while (isALNUM(*s) && d < e) {
11863 *d++ = *s++;
11864 }
11865 if (d >= e)
cea2e8a9 11866 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11867 *d = '\0';
79072805
LW
11868 }
11869 if (*s == '}') {
11870 s++;
7df0d042 11871 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11872 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11873 PL_expect = XREF;
11874 }
d008e5eb 11875 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11876 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11877 (keyword(dest, d - dest, 0)
11878 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11879 {
c35e046a
AL
11880 if (funny == '#')
11881 funny = '@';
9014280d 11882 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11883 "Ambiguous use of %c{%s} resolved to %c%s",
11884 funny, dest, funny, dest);
11885 }
11886 }
79072805
LW
11887 }
11888 else {
11889 s = bracket; /* let the parser handle it */
93a17b20 11890 *dest = '\0';
79072805
LW
11891 }
11892 }
3280af22
NIS
11893 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11894 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11895 return s;
11896}
11897
879d0c72
NC
11898static U32
11899S_pmflag(U32 pmfl, const char ch) {
11900 switch (ch) {
11901 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11902 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11903 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11904 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11905 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11906 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11907 }
11908 return pmfl;
11909}
11910
76e3520e 11911STATIC char *
cea2e8a9 11912S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11913{
97aff369 11914 dVAR;
79072805 11915 PMOP *pm;
5db06880 11916 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11917 const char * const valid_flags =
a20207d7 11918 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11919#ifdef PERL_MAD
11920 char *modstart;
11921#endif
11922
7918f24d 11923 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11924
25c09cbf 11925 if (!s) {
6136c704 11926 const char * const delimiter = skipspace(start);
10edeb5d
JH
11927 Perl_croak(aTHX_
11928 (const char *)
11929 (*delimiter == '?'
11930 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11931 : "Search pattern not terminated" ));
25c09cbf 11932 }
bbce6d69 11933
8782bef2 11934 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11935 if (PL_multi_open == '?') {
11936 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11937 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11938
11939 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11940 allows us to restrict the list needed by reset to just the ??
11941 matches. */
11942 assert(type != OP_TRANS);
11943 if (PL_curstash) {
daba3364 11944 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11945 U32 elements;
11946 if (!mg) {
daba3364 11947 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11948 0);
11949 }
11950 elements = mg->mg_len / sizeof(PMOP**);
11951 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11952 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11953 mg->mg_len = elements * sizeof(PMOP**);
11954 PmopSTASH_set(pm,PL_curstash);
11955 }
11956 }
5db06880
NC
11957#ifdef PERL_MAD
11958 modstart = s;
11959#endif
6136c704 11960 while (*s && strchr(valid_flags, *s))
879d0c72 11961 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11962
11963 if (isALNUM(*s)) {
11964 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11965 "Having no space between pattern and following word is deprecated");
11966
11967 }
5db06880
NC
11968#ifdef PERL_MAD
11969 if (PL_madskills && modstart != s) {
11970 SV* tmptoken = newSVpvn(modstart, s - modstart);
11971 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11972 }
11973#endif
4ac733c9 11974 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11975 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11976 {
a2a5de95
NC
11977 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11978 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11979 }
11980
3280af22 11981 PL_lex_op = (OP*)pm;
6154021b 11982 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11983 return s;
11984}
11985
76e3520e 11986STATIC char *
cea2e8a9 11987S_scan_subst(pTHX_ char *start)
79072805 11988{
27da23d5 11989 dVAR;
a0d0e21e 11990 register char *s;
79072805 11991 register PMOP *pm;
4fdae800 11992 I32 first_start;
79072805 11993 I32 es = 0;
5db06880
NC
11994#ifdef PERL_MAD
11995 char *modstart;
11996#endif
79072805 11997
7918f24d
NC
11998 PERL_ARGS_ASSERT_SCAN_SUBST;
11999
6154021b 12000 pl_yylval.ival = OP_NULL;
79072805 12001
5db06880 12002 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12003
37fd879b 12004 if (!s)
cea2e8a9 12005 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 12006
3280af22 12007 if (s[-1] == PL_multi_open)
79072805 12008 s--;
5db06880
NC
12009#ifdef PERL_MAD
12010 if (PL_madskills) {
cd81e915
NC
12011 CURMAD('q', PL_thisopen);
12012 CURMAD('_', PL_thiswhite);
12013 CURMAD('E', PL_thisstuff);
12014 CURMAD('Q', PL_thisclose);
12015 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12016 }
12017#endif
79072805 12018
3280af22 12019 first_start = PL_multi_start;
5db06880 12020 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12021 if (!s) {
37fd879b 12022 if (PL_lex_stuff) {
3280af22 12023 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12024 PL_lex_stuff = NULL;
37fd879b 12025 }
cea2e8a9 12026 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 12027 }
3280af22 12028 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 12029
79072805 12030 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
12031
12032#ifdef PERL_MAD
12033 if (PL_madskills) {
cd81e915
NC
12034 CURMAD('z', PL_thisopen);
12035 CURMAD('R', PL_thisstuff);
12036 CURMAD('Z', PL_thisclose);
5db06880
NC
12037 }
12038 modstart = s;
12039#endif
12040
48c036b1 12041 while (*s) {
a20207d7 12042 if (*s == EXEC_PAT_MOD) {
a687059c 12043 s++;
2f3197b3 12044 es++;
a687059c 12045 }
a20207d7 12046 else if (strchr(S_PAT_MODS, *s))
879d0c72 12047 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
12048 else {
12049 if (isALNUM(*s)) {
12050 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12051 "Having no space between pattern and following word is deprecated");
12052
12053 }
48c036b1 12054 break;
aa78b661 12055 }
378cc40b 12056 }
79072805 12057
5db06880
NC
12058#ifdef PERL_MAD
12059 if (PL_madskills) {
12060 if (modstart != s)
12061 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12062 append_madprops(PL_thismad, (OP*)pm, 0);
12063 PL_thismad = 0;
5db06880
NC
12064 }
12065#endif
a2a5de95
NC
12066 if ((pm->op_pmflags & PMf_CONTINUE)) {
12067 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12068 }
12069
79072805 12070 if (es) {
6136c704
AL
12071 SV * const repl = newSVpvs("");
12072
0244c3a4
GS
12073 PL_sublex_info.super_bufptr = s;
12074 PL_sublex_info.super_bufend = PL_bufend;
12075 PL_multi_end = 0;
79072805 12076 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12077 while (es-- > 0) {
12078 if (es)
12079 sv_catpvs(repl, "eval ");
12080 else
12081 sv_catpvs(repl, "do ");
12082 }
6f43d98f 12083 sv_catpvs(repl, "{");
3280af22 12084 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12085 if (strchr(SvPVX(PL_lex_repl), '#'))
12086 sv_catpvs(repl, "\n");
12087 sv_catpvs(repl, "}");
25da4f38 12088 SvEVALED_on(repl);
3280af22
NIS
12089 SvREFCNT_dec(PL_lex_repl);
12090 PL_lex_repl = repl;
378cc40b 12091 }
79072805 12092
3280af22 12093 PL_lex_op = (OP*)pm;
6154021b 12094 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12095 return s;
12096}
12097
76e3520e 12098STATIC char *
cea2e8a9 12099S_scan_trans(pTHX_ char *start)
378cc40b 12100{
97aff369 12101 dVAR;
a0d0e21e 12102 register char* s;
11343788 12103 OP *o;
79072805 12104 short *tbl;
b84c11c8
NC
12105 U8 squash;
12106 U8 del;
12107 U8 complement;
bb16bae8 12108 bool nondestruct = 0;
5db06880
NC
12109#ifdef PERL_MAD
12110 char *modstart;
12111#endif
79072805 12112
7918f24d
NC
12113 PERL_ARGS_ASSERT_SCAN_TRANS;
12114
6154021b 12115 pl_yylval.ival = OP_NULL;
79072805 12116
5db06880 12117 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12118 if (!s)
cea2e8a9 12119 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12120
3280af22 12121 if (s[-1] == PL_multi_open)
2f3197b3 12122 s--;
5db06880
NC
12123#ifdef PERL_MAD
12124 if (PL_madskills) {
cd81e915
NC
12125 CURMAD('q', PL_thisopen);
12126 CURMAD('_', PL_thiswhite);
12127 CURMAD('E', PL_thisstuff);
12128 CURMAD('Q', PL_thisclose);
12129 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12130 }
12131#endif
2f3197b3 12132
5db06880 12133 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12134 if (!s) {
37fd879b 12135 if (PL_lex_stuff) {
3280af22 12136 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12137 PL_lex_stuff = NULL;
37fd879b 12138 }
cea2e8a9 12139 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12140 }
5db06880 12141 if (PL_madskills) {
cd81e915
NC
12142 CURMAD('z', PL_thisopen);
12143 CURMAD('R', PL_thisstuff);
12144 CURMAD('Z', PL_thisclose);
5db06880 12145 }
79072805 12146
a0ed51b3 12147 complement = del = squash = 0;
5db06880
NC
12148#ifdef PERL_MAD
12149 modstart = s;
12150#endif
7a1e2023
NC
12151 while (1) {
12152 switch (*s) {
12153 case 'c':
79072805 12154 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12155 break;
12156 case 'd':
a0ed51b3 12157 del = OPpTRANS_DELETE;
7a1e2023
NC
12158 break;
12159 case 's':
79072805 12160 squash = OPpTRANS_SQUASH;
7a1e2023 12161 break;
bb16bae8
FC
12162 case 'r':
12163 nondestruct = 1;
12164 break;
7a1e2023
NC
12165 default:
12166 goto no_more;
12167 }
395c3793
LW
12168 s++;
12169 }
7a1e2023 12170 no_more:
8973db79 12171
aa1f7c5b 12172 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 12173 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12174 o->op_private &= ~OPpTRANS_ALL;
12175 o->op_private |= del|squash|complement|
7948272d
NIS
12176 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12177 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12178
3280af22 12179 PL_lex_op = o;
bb16bae8 12180 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
12181
12182#ifdef PERL_MAD
12183 if (PL_madskills) {
12184 if (modstart != s)
12185 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12186 append_madprops(PL_thismad, o, 0);
12187 PL_thismad = 0;
5db06880
NC
12188 }
12189#endif
12190
79072805
LW
12191 return s;
12192}
12193
76e3520e 12194STATIC char *
cea2e8a9 12195S_scan_heredoc(pTHX_ register char *s)
79072805 12196{
97aff369 12197 dVAR;
79072805
LW
12198 SV *herewas;
12199 I32 op_type = OP_SCALAR;
12200 I32 len;
12201 SV *tmpstr;
12202 char term;
73d840c0 12203 const char *found_newline;
79072805 12204 register char *d;
fc36a67e 12205 register char *e;
4633a7c4 12206 char *peek;
f54cb97a 12207 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12208#ifdef PERL_MAD
12209 I32 stuffstart = s - SvPVX(PL_linestr);
12210 char *tstart;
12211
cd81e915 12212 PL_realtokenstart = -1;
5db06880 12213#endif
79072805 12214
7918f24d
NC
12215 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12216
79072805 12217 s += 2;
3280af22
NIS
12218 d = PL_tokenbuf;
12219 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12220 if (!outer)
79072805 12221 *d++ = '\n';
c35e046a
AL
12222 peek = s;
12223 while (SPACE_OR_TAB(*peek))
12224 peek++;
3792a11b 12225 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12226 s = peek;
79072805 12227 term = *s++;
3280af22 12228 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12229 d += len;
3280af22 12230 if (s < PL_bufend)
79072805 12231 s++;
79072805
LW
12232 }
12233 else {
12234 if (*s == '\\')
12235 s++, term = '\'';
12236 else
12237 term = '"';
7e2040f0 12238 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12239 deprecate("bare << to mean <<\"\"");
7e2040f0 12240 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12241 if (d < e)
12242 *d++ = *s;
12243 }
12244 }
3280af22 12245 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12246 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12247 *d++ = '\n';
12248 *d = '\0';
3280af22 12249 len = d - PL_tokenbuf;
5db06880
NC
12250
12251#ifdef PERL_MAD
12252 if (PL_madskills) {
12253 tstart = PL_tokenbuf + !outer;
cd81e915 12254 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12255 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12256 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12257 stuffstart = s - SvPVX(PL_linestr);
12258 }
12259#endif
6a27c188 12260#ifndef PERL_STRICT_CR
f63a84b2
LW
12261 d = strchr(s, '\r');
12262 if (d) {
b464bac0 12263 char * const olds = s;
f63a84b2 12264 s = d;
3280af22 12265 while (s < PL_bufend) {
f63a84b2
LW
12266 if (*s == '\r') {
12267 *d++ = '\n';
12268 if (*++s == '\n')
12269 s++;
12270 }
12271 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12272 *d++ = *s++;
12273 s++;
12274 }
12275 else
12276 *d++ = *s++;
12277 }
12278 *d = '\0';
3280af22 12279 PL_bufend = d;
95a20fc0 12280 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12281 s = olds;
12282 }
12283#endif
5db06880
NC
12284#ifdef PERL_MAD
12285 found_newline = 0;
12286#endif
10edeb5d 12287 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12288 herewas = newSVpvn(s,PL_bufend-s);
12289 }
12290 else {
5db06880
NC
12291#ifdef PERL_MAD
12292 herewas = newSVpvn(s-1,found_newline-s+1);
12293#else
73d840c0
AL
12294 s--;
12295 herewas = newSVpvn(s,found_newline-s);
5db06880 12296#endif
73d840c0 12297 }
5db06880
NC
12298#ifdef PERL_MAD
12299 if (PL_madskills) {
12300 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12301 if (PL_thisstuff)
12302 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12303 else
cd81e915 12304 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12305 }
12306#endif
79072805 12307 s += SvCUR(herewas);
748a9306 12308
5db06880
NC
12309#ifdef PERL_MAD
12310 stuffstart = s - SvPVX(PL_linestr);
12311
12312 if (found_newline)
12313 s--;
12314#endif
12315
7d0a29fe
NC
12316 tmpstr = newSV_type(SVt_PVIV);
12317 SvGROW(tmpstr, 80);
748a9306 12318 if (term == '\'') {
79072805 12319 op_type = OP_CONST;
45977657 12320 SvIV_set(tmpstr, -1);
748a9306
LW
12321 }
12322 else if (term == '`') {
79072805 12323 op_type = OP_BACKTICK;
45977657 12324 SvIV_set(tmpstr, '\\');
748a9306 12325 }
79072805
LW
12326
12327 CLINE;
57843af0 12328 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12329 PL_multi_open = PL_multi_close = '<';
12330 term = *PL_tokenbuf;
0244c3a4 12331 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12332 char * const bufptr = PL_sublex_info.super_bufptr;
12333 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12334 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12335 s = strchr(bufptr, '\n');
12336 if (!s)
12337 s = bufend;
12338 d = s;
12339 while (s < bufend &&
12340 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12341 if (*s++ == '\n')
57843af0 12342 CopLINE_inc(PL_curcop);
0244c3a4
GS
12343 }
12344 if (s >= bufend) {
eb160463 12345 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12346 missingterm(PL_tokenbuf);
12347 }
12348 sv_setpvn(herewas,bufptr,d-bufptr+1);
12349 sv_setpvn(tmpstr,d+1,s-d);
12350 s += len - 1;
12351 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12352 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12353
12354 s = olds;
12355 goto retval;
12356 }
12357 else if (!outer) {
79072805 12358 d = s;
3280af22
NIS
12359 while (s < PL_bufend &&
12360 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12361 if (*s++ == '\n')
57843af0 12362 CopLINE_inc(PL_curcop);
79072805 12363 }
3280af22 12364 if (s >= PL_bufend) {
eb160463 12365 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12366 missingterm(PL_tokenbuf);
79072805
LW
12367 }
12368 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12369#ifdef PERL_MAD
12370 if (PL_madskills) {
cd81e915
NC
12371 if (PL_thisstuff)
12372 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12373 else
cd81e915 12374 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12375 stuffstart = s - SvPVX(PL_linestr);
12376 }
12377#endif
79072805 12378 s += len - 1;
57843af0 12379 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12380
3280af22
NIS
12381 sv_catpvn(herewas,s,PL_bufend-s);
12382 sv_setsv(PL_linestr,herewas);
12383 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12384 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12385 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12386 }
12387 else
76f68e9b 12388 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12389 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12390#ifdef PERL_MAD
12391 if (PL_madskills) {
12392 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12393 if (PL_thisstuff)
12394 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12395 else
cd81e915 12396 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12397 }
12398#endif
f0e67a1d 12399 PL_bufptr = s;
17cc9359 12400 CopLINE_inc(PL_curcop);
f0e67a1d 12401 if (!outer || !lex_next_chunk(0)) {
eb160463 12402 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12403 missingterm(PL_tokenbuf);
79072805 12404 }
17cc9359 12405 CopLINE_dec(PL_curcop);
f0e67a1d 12406 s = PL_bufptr;
5db06880
NC
12407#ifdef PERL_MAD
12408 stuffstart = s - SvPVX(PL_linestr);
12409#endif
57843af0 12410 CopLINE_inc(PL_curcop);
3280af22 12411 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12412 PL_last_lop = PL_last_uni = NULL;
6a27c188 12413#ifndef PERL_STRICT_CR
3280af22 12414 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12415 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12416 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12417 {
3280af22
NIS
12418 PL_bufend[-2] = '\n';
12419 PL_bufend--;
95a20fc0 12420 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12421 }
3280af22
NIS
12422 else if (PL_bufend[-1] == '\r')
12423 PL_bufend[-1] = '\n';
f63a84b2 12424 }
3280af22
NIS
12425 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12426 PL_bufend[-1] = '\n';
f63a84b2 12427#endif
3280af22 12428 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12429 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12430 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12431 sv_catsv(PL_linestr,herewas);
12432 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12433 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12434 }
12435 else {
3280af22
NIS
12436 s = PL_bufend;
12437 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12438 }
12439 }
79072805 12440 s++;
0244c3a4 12441retval:
57843af0 12442 PL_multi_end = CopLINE(PL_curcop);
79072805 12443 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12444 SvPV_shrink_to_cur(tmpstr);
79072805 12445 }
8990e307 12446 SvREFCNT_dec(herewas);
2f31ce75 12447 if (!IN_BYTES) {
95a20fc0 12448 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12449 SvUTF8_on(tmpstr);
12450 else if (PL_encoding)
12451 sv_recode_to_utf8(tmpstr, PL_encoding);
12452 }
3280af22 12453 PL_lex_stuff = tmpstr;
6154021b 12454 pl_yylval.ival = op_type;
79072805
LW
12455 return s;
12456}
12457
02aa26ce
NT
12458/* scan_inputsymbol
12459 takes: current position in input buffer
12460 returns: new position in input buffer
6154021b 12461 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12462
12463 This code handles:
12464
12465 <> read from ARGV
12466 <FH> read from filehandle
12467 <pkg::FH> read from package qualified filehandle
12468 <pkg'FH> read from package qualified filehandle
12469 <$fh> read from filehandle in $fh
12470 <*.h> filename glob
12471
12472*/
12473
76e3520e 12474STATIC char *
cea2e8a9 12475S_scan_inputsymbol(pTHX_ char *start)
79072805 12476{
97aff369 12477 dVAR;
02aa26ce 12478 register char *s = start; /* current position in buffer */
1b420867 12479 char *end;
79072805 12480 I32 len;
6136c704
AL
12481 char *d = PL_tokenbuf; /* start of temp holding space */
12482 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12483
7918f24d
NC
12484 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12485
1b420867
GS
12486 end = strchr(s, '\n');
12487 if (!end)
12488 end = PL_bufend;
12489 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12490
12491 /* die if we didn't have space for the contents of the <>,
1b420867 12492 or if it didn't end, or if we see a newline
02aa26ce
NT
12493 */
12494
bb7a0f54 12495 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12496 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12497 if (s >= end)
cea2e8a9 12498 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12499
fc36a67e 12500 s++;
02aa26ce
NT
12501
12502 /* check for <$fh>
12503 Remember, only scalar variables are interpreted as filehandles by
12504 this code. Anything more complex (e.g., <$fh{$num}>) will be
12505 treated as a glob() call.
12506 This code makes use of the fact that except for the $ at the front,
12507 a scalar variable and a filehandle look the same.
12508 */
4633a7c4 12509 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12510
12511 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12512 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12513 d++;
02aa26ce
NT
12514
12515 /* If we've tried to read what we allow filehandles to look like, and
12516 there's still text left, then it must be a glob() and not a getline.
12517 Use scan_str to pull out the stuff between the <> and treat it
12518 as nothing more than a string.
12519 */
12520
3280af22 12521 if (d - PL_tokenbuf != len) {
6154021b 12522 pl_yylval.ival = OP_GLOB;
5db06880 12523 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12524 if (!s)
cea2e8a9 12525 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12526 return s;
12527 }
395c3793 12528 else {
9b3023bc 12529 bool readline_overriden = FALSE;
6136c704 12530 GV *gv_readline;
9b3023bc 12531 GV **gvp;
02aa26ce 12532 /* we're in a filehandle read situation */
3280af22 12533 d = PL_tokenbuf;
02aa26ce
NT
12534
12535 /* turn <> into <ARGV> */
79072805 12536 if (!len)
689badd5 12537 Copy("ARGV",d,5,char);
02aa26ce 12538
9b3023bc 12539 /* Check whether readline() is overriden */
fafc274c 12540 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12541 if ((gv_readline
ba979b31 12542 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12543 ||
017a3ce5 12544 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12545 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12546 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12547 readline_overriden = TRUE;
12548
02aa26ce
NT
12549 /* if <$fh>, create the ops to turn the variable into a
12550 filehandle
12551 */
79072805 12552 if (*d == '$') {
02aa26ce
NT
12553 /* try to find it in the pad for this block, otherwise find
12554 add symbol table ops
12555 */
f8f98e0a 12556 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12557 if (tmp != NOT_IN_PAD) {
00b1698f 12558 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12559 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12560 HEK * const stashname = HvNAME_HEK(stash);
12561 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12562 sv_catpvs(sym, "::");
f558d5af
JH
12563 sv_catpv(sym, d+1);
12564 d = SvPVX(sym);
12565 goto intro_sym;
12566 }
12567 else {
6136c704 12568 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12569 o->op_targ = tmp;
9b3023bc
RGS
12570 PL_lex_op = readline_overriden
12571 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12572 op_append_elem(OP_LIST, o,
9b3023bc
RGS
12573 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12574 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12575 }
a0d0e21e
LW
12576 }
12577 else {
f558d5af
JH
12578 GV *gv;
12579 ++d;
12580intro_sym:
12581 gv = gv_fetchpv(d,
12582 (PL_in_eval
12583 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12584 : GV_ADDMULTI),
f558d5af 12585 SVt_PV);
9b3023bc
RGS
12586 PL_lex_op = readline_overriden
12587 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12588 op_append_elem(OP_LIST,
9b3023bc
RGS
12589 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12590 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12591 : (OP*)newUNOP(OP_READLINE, 0,
12592 newUNOP(OP_RV2SV, 0,
12593 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12594 }
7c6fadd6
RGS
12595 if (!readline_overriden)
12596 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12597 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12598 pl_yylval.ival = OP_NULL;
79072805 12599 }
02aa26ce
NT
12600
12601 /* If it's none of the above, it must be a literal filehandle
12602 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12603 else {
6136c704 12604 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12605 PL_lex_op = readline_overriden
12606 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12607 op_append_elem(OP_LIST,
9b3023bc
RGS
12608 newGVOP(OP_GV, 0, gv),
12609 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12610 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12611 pl_yylval.ival = OP_NULL;
79072805
LW
12612 }
12613 }
02aa26ce 12614
79072805
LW
12615 return s;
12616}
12617
02aa26ce
NT
12618
12619/* scan_str
12620 takes: start position in buffer
09bef843
SB
12621 keep_quoted preserve \ on the embedded delimiter(s)
12622 keep_delims preserve the delimiters around the string
02aa26ce
NT
12623 returns: position to continue reading from buffer
12624 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12625 updates the read buffer.
12626
12627 This subroutine pulls a string out of the input. It is called for:
12628 q single quotes q(literal text)
12629 ' single quotes 'literal text'
12630 qq double quotes qq(interpolate $here please)
12631 " double quotes "interpolate $here please"
12632 qx backticks qx(/bin/ls -l)
12633 ` backticks `/bin/ls -l`
12634 qw quote words @EXPORT_OK = qw( func() $spam )
12635 m// regexp match m/this/
12636 s/// regexp substitute s/this/that/
12637 tr/// string transliterate tr/this/that/
12638 y/// string transliterate y/this/that/
12639 ($*@) sub prototypes sub foo ($)
09bef843 12640 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12641 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12642
12643 In most of these cases (all but <>, patterns and transliterate)
12644 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12645 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12646 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12647 calls scan_str().
4e553d73 12648
02aa26ce
NT
12649 It skips whitespace before the string starts, and treats the first
12650 character as the delimiter. If the delimiter is one of ([{< then
12651 the corresponding "close" character )]}> is used as the closing
12652 delimiter. It allows quoting of delimiters, and if the string has
12653 balanced delimiters ([{<>}]) it allows nesting.
12654
37fd879b
HS
12655 On success, the SV with the resulting string is put into lex_stuff or,
12656 if that is already non-NULL, into lex_repl. The second case occurs only
12657 when parsing the RHS of the special constructs s/// and tr/// (y///).
12658 For convenience, the terminating delimiter character is stuffed into
12659 SvIVX of the SV.
02aa26ce
NT
12660*/
12661
76e3520e 12662STATIC char *
09bef843 12663S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12664{
97aff369 12665 dVAR;
02aa26ce 12666 SV *sv; /* scalar value: string */
d3fcec1f 12667 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12668 register char *s = start; /* current position in the buffer */
12669 register char term; /* terminating character */
12670 register char *to; /* current position in the sv's data */
12671 I32 brackets = 1; /* bracket nesting level */
89491803 12672 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12673 I32 termcode; /* terminating char. code */
89ebb4a3 12674 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12675 STRLEN termlen; /* length of terminating string */
0331ef07 12676 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12677#ifdef PERL_MAD
12678 int stuffstart;
12679 char *tstart;
12680#endif
02aa26ce 12681
7918f24d
NC
12682 PERL_ARGS_ASSERT_SCAN_STR;
12683
02aa26ce 12684 /* skip space before the delimiter */
29595ff2
NC
12685 if (isSPACE(*s)) {
12686 s = PEEKSPACE(s);
12687 }
02aa26ce 12688
5db06880 12689#ifdef PERL_MAD
cd81e915
NC
12690 if (PL_realtokenstart >= 0) {
12691 stuffstart = PL_realtokenstart;
12692 PL_realtokenstart = -1;
5db06880
NC
12693 }
12694 else
12695 stuffstart = start - SvPVX(PL_linestr);
12696#endif
02aa26ce 12697 /* mark where we are, in case we need to report errors */
79072805 12698 CLINE;
02aa26ce
NT
12699
12700 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12701 term = *s;
220e2d4e
IH
12702 if (!UTF) {
12703 termcode = termstr[0] = term;
12704 termlen = 1;
12705 }
12706 else {
f3b9ce0f 12707 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12708 Copy(s, termstr, termlen, U8);
12709 if (!UTF8_IS_INVARIANT(term))
12710 has_utf8 = TRUE;
12711 }
b1c7b182 12712
02aa26ce 12713 /* mark where we are */
57843af0 12714 PL_multi_start = CopLINE(PL_curcop);
3280af22 12715 PL_multi_open = term;
02aa26ce
NT
12716
12717 /* find corresponding closing delimiter */
93a17b20 12718 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12719 termcode = termstr[0] = term = tmps[5];
12720
3280af22 12721 PL_multi_close = term;
79072805 12722
561b68a9
SH
12723 /* create a new SV to hold the contents. 79 is the SV's initial length.
12724 What a random number. */
7d0a29fe
NC
12725 sv = newSV_type(SVt_PVIV);
12726 SvGROW(sv, 80);
45977657 12727 SvIV_set(sv, termcode);
a0d0e21e 12728 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12729
12730 /* move past delimiter and try to read a complete string */
09bef843 12731 if (keep_delims)
220e2d4e
IH
12732 sv_catpvn(sv, s, termlen);
12733 s += termlen;
5db06880
NC
12734#ifdef PERL_MAD
12735 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12736 if (!PL_thisopen && !keep_delims) {
12737 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12738 stuffstart = s - SvPVX(PL_linestr);
12739 }
12740#endif
93a17b20 12741 for (;;) {
220e2d4e
IH
12742 if (PL_encoding && !UTF) {
12743 bool cont = TRUE;
12744
12745 while (cont) {
95a20fc0 12746 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12747 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12748 &offset, (char*)termstr, termlen);
6136c704
AL
12749 const char * const ns = SvPVX_const(PL_linestr) + offset;
12750 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12751
12752 for (; s < ns; s++) {
12753 if (*s == '\n' && !PL_rsfp)
12754 CopLINE_inc(PL_curcop);
12755 }
12756 if (!found)
12757 goto read_more_line;
12758 else {
12759 /* handle quoted delimiters */
52327caf 12760 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12761 const char *t;
95a20fc0 12762 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12763 t--;
12764 if ((svlast-1 - t) % 2) {
12765 if (!keep_quoted) {
12766 *(svlast-1) = term;
12767 *svlast = '\0';
12768 SvCUR_set(sv, SvCUR(sv) - 1);
12769 }
12770 continue;
12771 }
12772 }
12773 if (PL_multi_open == PL_multi_close) {
12774 cont = FALSE;
12775 }
12776 else {
f54cb97a
AL
12777 const char *t;
12778 char *w;
0331ef07 12779 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12780 /* At here, all closes are "was quoted" one,
12781 so we don't check PL_multi_close. */
12782 if (*t == '\\') {
12783 if (!keep_quoted && *(t+1) == PL_multi_open)
12784 t++;
12785 else
12786 *w++ = *t++;
12787 }
12788 else if (*t == PL_multi_open)
12789 brackets++;
12790
12791 *w = *t;
12792 }
12793 if (w < t) {
12794 *w++ = term;
12795 *w = '\0';
95a20fc0 12796 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12797 }
0331ef07 12798 last_off = w - SvPVX(sv);
220e2d4e
IH
12799 if (--brackets <= 0)
12800 cont = FALSE;
12801 }
12802 }
12803 }
12804 if (!keep_delims) {
12805 SvCUR_set(sv, SvCUR(sv) - 1);
12806 *SvEND(sv) = '\0';
12807 }
12808 break;
12809 }
12810
02aa26ce 12811 /* extend sv if need be */
3280af22 12812 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12813 /* set 'to' to the next character in the sv's string */
463ee0b2 12814 to = SvPVX(sv)+SvCUR(sv);
09bef843 12815
02aa26ce 12816 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12817 if (PL_multi_open == PL_multi_close) {
12818 for (; s < PL_bufend; s++,to++) {
02aa26ce 12819 /* embedded newlines increment the current line number */
3280af22 12820 if (*s == '\n' && !PL_rsfp)
57843af0 12821 CopLINE_inc(PL_curcop);
02aa26ce 12822 /* handle quoted delimiters */
3280af22 12823 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12824 if (!keep_quoted && s[1] == term)
a0d0e21e 12825 s++;
02aa26ce 12826 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12827 else
12828 *to++ = *s++;
12829 }
02aa26ce
NT
12830 /* terminate when run out of buffer (the for() condition), or
12831 have found the terminator */
220e2d4e
IH
12832 else if (*s == term) {
12833 if (termlen == 1)
12834 break;
f3b9ce0f 12835 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12836 break;
12837 }
63cd0674 12838 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12839 has_utf8 = TRUE;
93a17b20
LW
12840 *to = *s;
12841 }
12842 }
02aa26ce
NT
12843
12844 /* if the terminator isn't the same as the start character (e.g.,
12845 matched brackets), we have to allow more in the quoting, and
12846 be prepared for nested brackets.
12847 */
93a17b20 12848 else {
02aa26ce 12849 /* read until we run out of string, or we find the terminator */
3280af22 12850 for (; s < PL_bufend; s++,to++) {
02aa26ce 12851 /* embedded newlines increment the line count */
3280af22 12852 if (*s == '\n' && !PL_rsfp)
57843af0 12853 CopLINE_inc(PL_curcop);
02aa26ce 12854 /* backslashes can escape the open or closing characters */
3280af22 12855 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12856 if (!keep_quoted &&
12857 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12858 s++;
12859 else
12860 *to++ = *s++;
12861 }
02aa26ce 12862 /* allow nested opens and closes */
3280af22 12863 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12864 break;
3280af22 12865 else if (*s == PL_multi_open)
93a17b20 12866 brackets++;
63cd0674 12867 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12868 has_utf8 = TRUE;
93a17b20
LW
12869 *to = *s;
12870 }
12871 }
02aa26ce 12872 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12873 *to = '\0';
95a20fc0 12874 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12875
02aa26ce
NT
12876 /*
12877 * this next chunk reads more into the buffer if we're not done yet
12878 */
12879
b1c7b182
GS
12880 if (s < PL_bufend)
12881 break; /* handle case where we are done yet :-) */
79072805 12882
6a27c188 12883#ifndef PERL_STRICT_CR
95a20fc0 12884 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12885 if ((to[-2] == '\r' && to[-1] == '\n') ||
12886 (to[-2] == '\n' && to[-1] == '\r'))
12887 {
f63a84b2
LW
12888 to[-2] = '\n';
12889 to--;
95a20fc0 12890 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12891 }
12892 else if (to[-1] == '\r')
12893 to[-1] = '\n';
12894 }
95a20fc0 12895 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12896 to[-1] = '\n';
12897#endif
12898
220e2d4e 12899 read_more_line:
02aa26ce
NT
12900 /* if we're out of file, or a read fails, bail and reset the current
12901 line marker so we can report where the unterminated string began
12902 */
5db06880
NC
12903#ifdef PERL_MAD
12904 if (PL_madskills) {
c35e046a 12905 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12906 if (PL_thisstuff)
12907 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12908 else
cd81e915 12909 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12910 }
12911#endif
f0e67a1d
Z
12912 CopLINE_inc(PL_curcop);
12913 PL_bufptr = PL_bufend;
12914 if (!lex_next_chunk(0)) {
c07a80fd 12915 sv_free(sv);
eb160463 12916 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12917 return NULL;
79072805 12918 }
f0e67a1d 12919 s = PL_bufptr;
5db06880
NC
12920#ifdef PERL_MAD
12921 stuffstart = 0;
12922#endif
378cc40b 12923 }
4e553d73 12924
02aa26ce
NT
12925 /* at this point, we have successfully read the delimited string */
12926
220e2d4e 12927 if (!PL_encoding || UTF) {
5db06880
NC
12928#ifdef PERL_MAD
12929 if (PL_madskills) {
c35e046a 12930 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12931 const int len = s - tstart;
cd81e915 12932 if (PL_thisstuff)
c35e046a 12933 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12934 else
c35e046a 12935 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12936 if (!PL_thisclose && !keep_delims)
12937 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12938 }
12939#endif
12940
220e2d4e
IH
12941 if (keep_delims)
12942 sv_catpvn(sv, s, termlen);
12943 s += termlen;
12944 }
5db06880
NC
12945#ifdef PERL_MAD
12946 else {
12947 if (PL_madskills) {
c35e046a
AL
12948 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12949 const int len = s - tstart - termlen;
cd81e915 12950 if (PL_thisstuff)
c35e046a 12951 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12952 else
c35e046a 12953 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12954 if (!PL_thisclose && !keep_delims)
12955 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12956 }
12957 }
12958#endif
220e2d4e 12959 if (has_utf8 || PL_encoding)
b1c7b182 12960 SvUTF8_on(sv);
d0063567 12961
57843af0 12962 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12963
12964 /* if we allocated too much space, give some back */
93a17b20
LW
12965 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12966 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12967 SvPV_renew(sv, SvLEN(sv));
79072805 12968 }
02aa26ce
NT
12969
12970 /* decide whether this is the first or second quoted string we've read
12971 for this op
12972 */
4e553d73 12973
3280af22
NIS
12974 if (PL_lex_stuff)
12975 PL_lex_repl = sv;
79072805 12976 else
3280af22 12977 PL_lex_stuff = sv;
378cc40b
LW
12978 return s;
12979}
12980
02aa26ce
NT
12981/*
12982 scan_num
12983 takes: pointer to position in buffer
12984 returns: pointer to new position in buffer
6154021b 12985 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12986
12987 Read a number in any of the formats that Perl accepts:
12988
7fd134d9
JH
12989 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12990 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12991 0b[01](_?[01])*
12992 0[0-7](_?[0-7])*
12993 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12994
3280af22 12995 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12996 thing it reads.
12997
12998 If it reads a number without a decimal point or an exponent, it will
12999 try converting the number to an integer and see if it can do so
13000 without loss of precision.
13001*/
4e553d73 13002
378cc40b 13003char *
bfed75c6 13004Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 13005{
97aff369 13006 dVAR;
bfed75c6 13007 register const char *s = start; /* current position in buffer */
02aa26ce
NT
13008 register char *d; /* destination in temp buffer */
13009 register char *e; /* end of temp buffer */
86554af2 13010 NV nv; /* number read, as a double */
a0714e2c 13011 SV *sv = NULL; /* place to put the converted number */
a86a20aa 13012 bool floatit; /* boolean: int or float? */
cbbf8932 13013 const char *lastub = NULL; /* position of last underbar */
bfed75c6 13014 static char const number_too_long[] = "Number too long";
378cc40b 13015
7918f24d
NC
13016 PERL_ARGS_ASSERT_SCAN_NUM;
13017
02aa26ce
NT
13018 /* We use the first character to decide what type of number this is */
13019
378cc40b 13020 switch (*s) {
79072805 13021 default:
cea2e8a9 13022 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 13023
02aa26ce 13024 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 13025 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
13026 case '0':
13027 {
02aa26ce
NT
13028 /* variables:
13029 u holds the "number so far"
4f19785b
WSI
13030 shift the power of 2 of the base
13031 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
13032 overflowed was the number more than we can hold?
13033
13034 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
13035 we in octal/hex/binary?" indicator to disallow hex characters
13036 when in octal mode.
02aa26ce 13037 */
9e24b6e2
JH
13038 NV n = 0.0;
13039 UV u = 0;
79072805 13040 I32 shift;
9e24b6e2 13041 bool overflowed = FALSE;
61f33854 13042 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
13043 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13044 static const char* const bases[5] =
13045 { "", "binary", "", "octal", "hexadecimal" };
13046 static const char* const Bases[5] =
13047 { "", "Binary", "", "Octal", "Hexadecimal" };
13048 static const char* const maxima[5] =
13049 { "",
13050 "0b11111111111111111111111111111111",
13051 "",
13052 "037777777777",
13053 "0xffffffff" };
bfed75c6 13054 const char *base, *Base, *max;
378cc40b 13055
02aa26ce 13056 /* check for hex */
a674e8db 13057 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
13058 shift = 4;
13059 s += 2;
61f33854 13060 just_zero = FALSE;
a674e8db 13061 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13062 shift = 1;
13063 s += 2;
61f33854 13064 just_zero = FALSE;
378cc40b 13065 }
02aa26ce 13066 /* check for a decimal in disguise */
b78218b7 13067 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13068 goto decimal;
02aa26ce 13069 /* so it must be octal */
928753ea 13070 else {
378cc40b 13071 shift = 3;
928753ea
JH
13072 s++;
13073 }
13074
13075 if (*s == '_') {
a2a5de95 13076 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13077 "Misplaced _ in number");
13078 lastub = s++;
13079 }
9e24b6e2
JH
13080
13081 base = bases[shift];
13082 Base = Bases[shift];
13083 max = maxima[shift];
02aa26ce 13084
4f19785b 13085 /* read the rest of the number */
378cc40b 13086 for (;;) {
9e24b6e2 13087 /* x is used in the overflow test,
893fe2c2 13088 b is the digit we're adding on. */
9e24b6e2 13089 UV x, b;
55497cff 13090
378cc40b 13091 switch (*s) {
02aa26ce
NT
13092
13093 /* if we don't mention it, we're done */
378cc40b
LW
13094 default:
13095 goto out;
02aa26ce 13096
928753ea 13097 /* _ are ignored -- but warned about if consecutive */
de3bb511 13098 case '_':
a2a5de95
NC
13099 if (lastub && s == lastub + 1)
13100 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13101 "Misplaced _ in number");
928753ea 13102 lastub = s++;
de3bb511 13103 break;
02aa26ce
NT
13104
13105 /* 8 and 9 are not octal */
378cc40b 13106 case '8': case '9':
4f19785b 13107 if (shift == 3)
cea2e8a9 13108 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13109 /* FALL THROUGH */
02aa26ce
NT
13110
13111 /* octal digits */
4f19785b 13112 case '2': case '3': case '4':
378cc40b 13113 case '5': case '6': case '7':
4f19785b 13114 if (shift == 1)
cea2e8a9 13115 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13116 /* FALL THROUGH */
13117
13118 case '0': case '1':
02aa26ce 13119 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13120 goto digit;
02aa26ce
NT
13121
13122 /* hex digits */
378cc40b
LW
13123 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13124 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13125 /* make sure they said 0x */
378cc40b
LW
13126 if (shift != 4)
13127 goto out;
55497cff 13128 b = (*s++ & 7) + 9;
02aa26ce
NT
13129
13130 /* Prepare to put the digit we have onto the end
13131 of the number so far. We check for overflows.
13132 */
13133
55497cff 13134 digit:
61f33854 13135 just_zero = FALSE;
9e24b6e2
JH
13136 if (!overflowed) {
13137 x = u << shift; /* make room for the digit */
13138
13139 if ((x >> shift) != u
13140 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13141 overflowed = TRUE;
13142 n = (NV) u;
9b387841
NC
13143 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13144 "Integer overflow in %s number",
13145 base);
9e24b6e2
JH
13146 } else
13147 u = x | b; /* add the digit to the end */
13148 }
13149 if (overflowed) {
13150 n *= nvshift[shift];
13151 /* If an NV has not enough bits in its
13152 * mantissa to represent an UV this summing of
13153 * small low-order numbers is a waste of time
13154 * (because the NV cannot preserve the
13155 * low-order bits anyway): we could just
13156 * remember when did we overflow and in the
13157 * end just multiply n by the right
13158 * amount. */
13159 n += (NV) b;
55497cff 13160 }
378cc40b
LW
13161 break;
13162 }
13163 }
02aa26ce
NT
13164
13165 /* if we get here, we had success: make a scalar value from
13166 the number.
13167 */
378cc40b 13168 out:
928753ea
JH
13169
13170 /* final misplaced underbar check */
13171 if (s[-1] == '_') {
a2a5de95 13172 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13173 }
13174
9e24b6e2 13175 if (overflowed) {
a2a5de95
NC
13176 if (n > 4294967295.0)
13177 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13178 "%s number > %s non-portable",
13179 Base, max);
b081dd7e 13180 sv = newSVnv(n);
9e24b6e2
JH
13181 }
13182 else {
15041a67 13183#if UVSIZE > 4
a2a5de95
NC
13184 if (u > 0xffffffff)
13185 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13186 "%s number > %s non-portable",
13187 Base, max);
2cc4c2dc 13188#endif
b081dd7e 13189 sv = newSVuv(u);
9e24b6e2 13190 }
61f33854 13191 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13192 sv = new_constant(start, s - start, "integer",
eb0d8d16 13193 sv, NULL, NULL, 0);
61f33854 13194 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13195 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13196 }
13197 break;
02aa26ce
NT
13198
13199 /*
13200 handle decimal numbers.
13201 we're also sent here when we read a 0 as the first digit
13202 */
378cc40b
LW
13203 case '1': case '2': case '3': case '4': case '5':
13204 case '6': case '7': case '8': case '9': case '.':
13205 decimal:
3280af22
NIS
13206 d = PL_tokenbuf;
13207 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13208 floatit = FALSE;
02aa26ce
NT
13209
13210 /* read next group of digits and _ and copy into d */
de3bb511 13211 while (isDIGIT(*s) || *s == '_') {
4e553d73 13212 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13213 if -w is on
13214 */
93a17b20 13215 if (*s == '_') {
a2a5de95
NC
13216 if (lastub && s == lastub + 1)
13217 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13218 "Misplaced _ in number");
928753ea 13219 lastub = s++;
93a17b20 13220 }
fc36a67e 13221 else {
02aa26ce 13222 /* check for end of fixed-length buffer */
fc36a67e 13223 if (d >= e)
cea2e8a9 13224 Perl_croak(aTHX_ number_too_long);
02aa26ce 13225 /* if we're ok, copy the character */
378cc40b 13226 *d++ = *s++;
fc36a67e 13227 }
378cc40b 13228 }
02aa26ce
NT
13229
13230 /* final misplaced underbar check */
928753ea 13231 if (lastub && s == lastub + 1) {
a2a5de95 13232 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13233 }
02aa26ce
NT
13234
13235 /* read a decimal portion if there is one. avoid
13236 3..5 being interpreted as the number 3. followed
13237 by .5
13238 */
2f3197b3 13239 if (*s == '.' && s[1] != '.') {
79072805 13240 floatit = TRUE;
378cc40b 13241 *d++ = *s++;
02aa26ce 13242
928753ea 13243 if (*s == '_') {
a2a5de95
NC
13244 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13245 "Misplaced _ in number");
928753ea
JH
13246 lastub = s;
13247 }
13248
13249 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13250 */
fc36a67e 13251 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13252 /* fixed length buffer check */
fc36a67e 13253 if (d >= e)
cea2e8a9 13254 Perl_croak(aTHX_ number_too_long);
928753ea 13255 if (*s == '_') {
a2a5de95
NC
13256 if (lastub && s == lastub + 1)
13257 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13258 "Misplaced _ in number");
928753ea
JH
13259 lastub = s;
13260 }
13261 else
fc36a67e 13262 *d++ = *s;
378cc40b 13263 }
928753ea
JH
13264 /* fractional part ending in underbar? */
13265 if (s[-1] == '_') {
a2a5de95
NC
13266 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13267 "Misplaced _ in number");
928753ea 13268 }
dd629d5b
GS
13269 if (*s == '.' && isDIGIT(s[1])) {
13270 /* oops, it's really a v-string, but without the "v" */
f4758303 13271 s = start;
dd629d5b
GS
13272 goto vstring;
13273 }
378cc40b 13274 }
02aa26ce
NT
13275
13276 /* read exponent part, if present */
3792a11b 13277 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13278 floatit = TRUE;
13279 s++;
02aa26ce
NT
13280
13281 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13282 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13283
7fd134d9
JH
13284 /* stray preinitial _ */
13285 if (*s == '_') {
a2a5de95
NC
13286 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13287 "Misplaced _ in number");
7fd134d9
JH
13288 lastub = s++;
13289 }
13290
02aa26ce 13291 /* allow positive or negative exponent */
378cc40b
LW
13292 if (*s == '+' || *s == '-')
13293 *d++ = *s++;
02aa26ce 13294
7fd134d9
JH
13295 /* stray initial _ */
13296 if (*s == '_') {
a2a5de95
NC
13297 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13298 "Misplaced _ in number");
7fd134d9
JH
13299 lastub = s++;
13300 }
13301
7fd134d9
JH
13302 /* read digits of exponent */
13303 while (isDIGIT(*s) || *s == '_') {
13304 if (isDIGIT(*s)) {
13305 if (d >= e)
13306 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13307 *d++ = *s++;
7fd134d9
JH
13308 }
13309 else {
041457d9 13310 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13311 (!isDIGIT(s[1]) && s[1] != '_')))
13312 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13313 "Misplaced _ in number");
b3b48e3e 13314 lastub = s++;
7fd134d9 13315 }
7fd134d9 13316 }
378cc40b 13317 }
02aa26ce 13318
02aa26ce 13319
0b7fceb9 13320 /*
58bb9ec3
NC
13321 We try to do an integer conversion first if no characters
13322 indicating "float" have been found.
0b7fceb9
MU
13323 */
13324
13325 if (!floatit) {
58bb9ec3 13326 UV uv;
6136c704 13327 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13328
13329 if (flags == IS_NUMBER_IN_UV) {
13330 if (uv <= IV_MAX)
b081dd7e 13331 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13332 else
b081dd7e 13333 sv = newSVuv(uv);
58bb9ec3
NC
13334 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13335 if (uv <= (UV) IV_MIN)
b081dd7e 13336 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13337 else
13338 floatit = TRUE;
13339 } else
13340 floatit = TRUE;
13341 }
0b7fceb9 13342 if (floatit) {
58bb9ec3
NC
13343 /* terminate the string */
13344 *d = '\0';
86554af2 13345 nv = Atof(PL_tokenbuf);
b081dd7e 13346 sv = newSVnv(nv);
86554af2 13347 }
86554af2 13348
eb0d8d16
NC
13349 if ( floatit
13350 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13351 const char *const key = floatit ? "float" : "integer";
13352 const STRLEN keylen = floatit ? 5 : 7;
13353 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13354 key, keylen, sv, NULL, NULL, 0);
13355 }
378cc40b 13356 break;
0b7fceb9 13357
e312add1 13358 /* if it starts with a v, it could be a v-string */
a7cb1f99 13359 case 'v':
dd629d5b 13360vstring:
561b68a9 13361 sv = newSV(5); /* preallocate storage space */
65b06e02 13362 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13363 break;
79072805 13364 }
a687059c 13365
02aa26ce
NT
13366 /* make the op for the constant and return */
13367
a86a20aa 13368 if (sv)
b73d6f50 13369 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13370 else
5f66b61c 13371 lvalp->opval = NULL;
a687059c 13372
73d840c0 13373 return (char *)s;
378cc40b
LW
13374}
13375
76e3520e 13376STATIC char *
cea2e8a9 13377S_scan_formline(pTHX_ register char *s)
378cc40b 13378{
97aff369 13379 dVAR;
79072805 13380 register char *eol;
378cc40b 13381 register char *t;
6136c704 13382 SV * const stuff = newSVpvs("");
79072805 13383 bool needargs = FALSE;
c5ee2135 13384 bool eofmt = FALSE;
5db06880
NC
13385#ifdef PERL_MAD
13386 char *tokenstart = s;
4f61fd4b
JC
13387 SV* savewhite = NULL;
13388
5db06880 13389 if (PL_madskills) {
cd81e915
NC
13390 savewhite = PL_thiswhite;
13391 PL_thiswhite = 0;
5db06880
NC
13392 }
13393#endif
378cc40b 13394
7918f24d
NC
13395 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13396
79072805 13397 while (!needargs) {
a1b95068 13398 if (*s == '.') {
c35e046a 13399 t = s+1;
51882d45 13400#ifdef PERL_STRICT_CR
c35e046a
AL
13401 while (SPACE_OR_TAB(*t))
13402 t++;
51882d45 13403#else
c35e046a
AL
13404 while (SPACE_OR_TAB(*t) || *t == '\r')
13405 t++;
51882d45 13406#endif
c5ee2135
WL
13407 if (*t == '\n' || t == PL_bufend) {
13408 eofmt = TRUE;
79072805 13409 break;
c5ee2135 13410 }
79072805 13411 }
3280af22 13412 if (PL_in_eval && !PL_rsfp) {
07409e01 13413 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13414 if (!eol++)
3280af22 13415 eol = PL_bufend;
0f85fab0
LW
13416 }
13417 else
3280af22 13418 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13419 if (*s != '#') {
a0d0e21e
LW
13420 for (t = s; t < eol; t++) {
13421 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13422 needargs = FALSE;
13423 goto enough; /* ~~ must be first line in formline */
378cc40b 13424 }
a0d0e21e
LW
13425 if (*t == '@' || *t == '^')
13426 needargs = TRUE;
378cc40b 13427 }
7121b347
MG
13428 if (eol > s) {
13429 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13430#ifndef PERL_STRICT_CR
7121b347
MG
13431 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13432 char *end = SvPVX(stuff) + SvCUR(stuff);
13433 end[-2] = '\n';
13434 end[-1] = '\0';
b162af07 13435 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13436 }
2dc4c65b 13437#endif
7121b347
MG
13438 }
13439 else
13440 break;
79072805 13441 }
95a20fc0 13442 s = (char*)eol;
3280af22 13443 if (PL_rsfp) {
f0e67a1d 13444 bool got_some;
5db06880
NC
13445#ifdef PERL_MAD
13446 if (PL_madskills) {
cd81e915
NC
13447 if (PL_thistoken)
13448 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13449 else
cd81e915 13450 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13451 }
13452#endif
f0e67a1d
Z
13453 PL_bufptr = PL_bufend;
13454 CopLINE_inc(PL_curcop);
13455 got_some = lex_next_chunk(0);
13456 CopLINE_dec(PL_curcop);
13457 s = PL_bufptr;
5db06880 13458#ifdef PERL_MAD
f0e67a1d 13459 tokenstart = PL_bufptr;
5db06880 13460#endif
f0e67a1d 13461 if (!got_some)
378cc40b 13462 break;
378cc40b 13463 }
463ee0b2 13464 incline(s);
79072805 13465 }
a0d0e21e
LW
13466 enough:
13467 if (SvCUR(stuff)) {
3280af22 13468 PL_expect = XTERM;
79072805 13469 if (needargs) {
3280af22 13470 PL_lex_state = LEX_NORMAL;
cd81e915 13471 start_force(PL_curforce);
9ded7720 13472 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13473 force_next(',');
13474 }
a0d0e21e 13475 else
3280af22 13476 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13477 if (!IN_BYTES) {
95a20fc0 13478 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13479 SvUTF8_on(stuff);
13480 else if (PL_encoding)
13481 sv_recode_to_utf8(stuff, PL_encoding);
13482 }
cd81e915 13483 start_force(PL_curforce);
9ded7720 13484 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13485 force_next(THING);
cd81e915 13486 start_force(PL_curforce);
9ded7720 13487 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13488 force_next(LSTOP);
378cc40b 13489 }
79072805 13490 else {
8990e307 13491 SvREFCNT_dec(stuff);
c5ee2135
WL
13492 if (eofmt)
13493 PL_lex_formbrack = 0;
3280af22 13494 PL_bufptr = s;
79072805 13495 }
5db06880
NC
13496#ifdef PERL_MAD
13497 if (PL_madskills) {
cd81e915
NC
13498 if (PL_thistoken)
13499 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13500 else
cd81e915
NC
13501 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13502 PL_thiswhite = savewhite;
5db06880
NC
13503 }
13504#endif
79072805 13505 return s;
378cc40b 13506}
a687059c 13507
ba6d6ac9 13508I32
864dbfa3 13509Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13510{
97aff369 13511 dVAR;
a3b680e6 13512 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13513 CV* const outsidecv = PL_compcv;
8990e307 13514
3280af22
NIS
13515 if (PL_compcv) {
13516 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13517 }
7766f137 13518 SAVEI32(PL_subline);
3280af22 13519 save_item(PL_subname);
3280af22 13520 SAVESPTR(PL_compcv);
3280af22 13521
ea726b52 13522 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13523 CvFLAGS(PL_compcv) |= flags;
13524
57843af0 13525 PL_subline = CopLINE(PL_curcop);
dd2155a4 13526 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13527 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13528 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13529
8990e307
LW
13530 return oldsavestack_ix;
13531}
13532
084592ab
CN
13533#ifdef __SC__
13534#pragma segment Perl_yylex
13535#endif
af41e527
NC
13536static int
13537S_yywarn(pTHX_ const char *const s)
8990e307 13538{
97aff369 13539 dVAR;
7918f24d
NC
13540
13541 PERL_ARGS_ASSERT_YYWARN;
13542
faef0170 13543 PL_in_eval |= EVAL_WARNONLY;
748a9306 13544 yyerror(s);
faef0170 13545 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13546 return 0;
8990e307
LW
13547}
13548
13549int
15f169a1 13550Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13551{
97aff369 13552 dVAR;
bfed75c6
AL
13553 const char *where = NULL;
13554 const char *context = NULL;
68dc0745 13555 int contlen = -1;
46fc3d4c 13556 SV *msg;
5912531f 13557 int yychar = PL_parser->yychar;
463ee0b2 13558
7918f24d
NC
13559 PERL_ARGS_ASSERT_YYERROR;
13560
3280af22 13561 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13562 where = "at EOF";
8bcfe651
TM
13563 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13564 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13565 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13566 /*
13567 Only for NetWare:
13568 The code below is removed for NetWare because it abends/crashes on NetWare
13569 when the script has error such as not having the closing quotes like:
13570 if ($var eq "value)
13571 Checking of white spaces is anyway done in NetWare code.
13572 */
13573#ifndef NETWARE
3280af22
NIS
13574 while (isSPACE(*PL_oldoldbufptr))
13575 PL_oldoldbufptr++;
f355267c 13576#endif
3280af22
NIS
13577 context = PL_oldoldbufptr;
13578 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13579 }
8bcfe651
TM
13580 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13581 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13582 /*
13583 Only for NetWare:
13584 The code below is removed for NetWare because it abends/crashes on NetWare
13585 when the script has error such as not having the closing quotes like:
13586 if ($var eq "value)
13587 Checking of white spaces is anyway done in NetWare code.
13588 */
13589#ifndef NETWARE
3280af22
NIS
13590 while (isSPACE(*PL_oldbufptr))
13591 PL_oldbufptr++;
f355267c 13592#endif
3280af22
NIS
13593 context = PL_oldbufptr;
13594 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13595 }
13596 else if (yychar > 255)
68dc0745 13597 where = "next token ???";
12fbd33b 13598 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13599 if (PL_lex_state == LEX_NORMAL ||
13600 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13601 where = "at end of line";
3280af22 13602 else if (PL_lex_inpat)
68dc0745 13603 where = "within pattern";
463ee0b2 13604 else
68dc0745 13605 where = "within string";
463ee0b2 13606 }
46fc3d4c 13607 else {
84bafc02 13608 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13609 if (yychar < 32)
cea2e8a9 13610 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13611 else if (isPRINT_LC(yychar)) {
88c9ea1e 13612 const char string = yychar;
5e7aa789
NC
13613 sv_catpvn(where_sv, &string, 1);
13614 }
463ee0b2 13615 else
cea2e8a9 13616 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13617 where = SvPVX_const(where_sv);
463ee0b2 13618 }
46fc3d4c 13619 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13620 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13621 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13622 if (context)
cea2e8a9 13623 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13624 else
cea2e8a9 13625 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13626 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13627 Perl_sv_catpvf(aTHX_ msg,
57def98f 13628 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13629 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13630 PL_multi_end = 0;
a0d0e21e 13631 }
500960a6 13632 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13633 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13634 }
463ee0b2 13635 else
5a844595 13636 qerror(msg);
c7d6bfb2
GS
13637 if (PL_error_count >= 10) {
13638 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13639 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13640 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13641 else
13642 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13643 OutCopFILE(PL_curcop));
c7d6bfb2 13644 }
3280af22 13645 PL_in_my = 0;
5c284bb0 13646 PL_in_my_stash = NULL;
463ee0b2
LW
13647 return 0;
13648}
084592ab
CN
13649#ifdef __SC__
13650#pragma segment Main
13651#endif
4e35701f 13652
b250498f 13653STATIC char*
3ae08724 13654S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13655{
97aff369 13656 dVAR;
f54cb97a 13657 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13658
13659 PERL_ARGS_ASSERT_SWALLOW_BOM;
13660
7aa207d6 13661 switch (s[0]) {
4e553d73
NIS
13662 case 0xFF:
13663 if (s[1] == 0xFE) {
ee6ba15d 13664 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13665 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13666 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13667#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13668 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13669 s += 2;
dea0fc0b 13670 if (PL_bufend > (char*)s) {
81a923f4 13671 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13672 }
b250498f 13673#else
ee6ba15d 13674 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13675#endif
01ec43d0
GS
13676 }
13677 break;
78ae23f5 13678 case 0xFE:
7aa207d6 13679 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13680#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13681 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13682 s += 2;
13683 if (PL_bufend > (char *)s) {
81a923f4 13684 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13685 }
b250498f 13686#else
ee6ba15d 13687 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13688#endif
01ec43d0
GS
13689 }
13690 break;
3ae08724
GS
13691 case 0xEF:
13692 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13693 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13694 s += 3; /* UTF-8 */
13695 }
13696 break;
13697 case 0:
7aa207d6
JH
13698 if (slen > 3) {
13699 if (s[1] == 0) {
13700 if (s[2] == 0xFE && s[3] == 0xFF) {
13701 /* UTF-32 big-endian */
ee6ba15d 13702 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13703 }
13704 }
13705 else if (s[2] == 0 && s[3] != 0) {
13706 /* Leading bytes
13707 * 00 xx 00 xx
13708 * are a good indicator of UTF-16BE. */
ee6ba15d 13709#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13710 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13711 s = add_utf16_textfilter(s, FALSE);
13712#else
13713 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13714#endif
7aa207d6 13715 }
01ec43d0 13716 }
e294cc5d
JH
13717#ifdef EBCDIC
13718 case 0xDD:
13719 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13720 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13721 s += 4; /* UTF-8 */
13722 }
13723 break;
13724#endif
13725
7aa207d6
JH
13726 default:
13727 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13728 /* Leading bytes
13729 * xx 00 xx 00
13730 * are a good indicator of UTF-16LE. */
ee6ba15d 13731#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13732 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13733 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13734#else
13735 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13736#endif
7aa207d6 13737 }
01ec43d0 13738 }
b8f84bb2 13739 return (char*)s;
b250498f 13740}
4755096e 13741
6e3aabd6
GS
13742
13743#ifndef PERL_NO_UTF16_FILTER
13744static I32
a28af015 13745S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13746{
97aff369 13747 dVAR;
f3040f2c 13748 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13749 /* We re-use this each time round, throwing the contents away before we
13750 return. */
2a773401 13751 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13752 SV *const utf8_buffer = filter;
c28d6105 13753 IV status = IoPAGE(filter);
f2338a2e 13754 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13755 I32 retval;
c8b0cbae 13756
c85ae797
NC
13757 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13758
c8b0cbae
NC
13759 /* As we're automatically added, at the lowest level, and hence only called
13760 from this file, we can be sure that we're not called in block mode. Hence
13761 don't bother writing code to deal with block mode. */
13762 if (maxlen) {
13763 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13764 }
c28d6105
NC
13765 if (status < 0) {
13766 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13767 }
1de9afcd 13768 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13769 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13770 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13771 reverse ? 'l' : 'b', idx, maxlen, status,
13772 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13773
13774 while (1) {
13775 STRLEN chars;
13776 STRLEN have;
dea0fc0b 13777 I32 newlen;
2a773401 13778 U8 *end;
c28d6105
NC
13779 /* First, look in our buffer of existing UTF-8 data: */
13780 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13781
13782 if (nl) {
13783 ++nl;
13784 } else if (status == 0) {
13785 /* EOF */
13786 IoPAGE(filter) = 0;
13787 nl = SvEND(utf8_buffer);
13788 }
13789 if (nl) {
d2d1d4de
NC
13790 STRLEN got = nl - SvPVX(utf8_buffer);
13791 /* Did we have anything to append? */
13792 retval = got != 0;
13793 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13794 /* Everything else in this code works just fine if SVp_POK isn't
13795 set. This, however, needs it, and we need it to work, else
13796 we loop infinitely because the buffer is never consumed. */
13797 sv_chop(utf8_buffer, nl);
13798 break;
13799 }
ba77e4cc 13800
c28d6105
NC
13801 /* OK, not a complete line there, so need to read some more UTF-16.
13802 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13803 while (1) {
13804 if (status <= 0)
13805 break;
13806 if (SvCUR(utf16_buffer) >= 2) {
13807 /* Location of the high octet of the last complete code point.
13808 Gosh, UTF-16 is a pain. All the benefits of variable length,
13809 *coupled* with all the benefits of partial reads and
13810 endianness. */
13811 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13812 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13813
13814 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13815 break;
13816 }
13817
13818 /* We have the first half of a surrogate. Read more. */
13819 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13820 }
c28d6105 13821
c28d6105
NC
13822 status = FILTER_READ(idx + 1, utf16_buffer,
13823 160 + (SvCUR(utf16_buffer) & 1));
13824 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13825 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13826 if (status < 0) {
13827 /* Error */
13828 IoPAGE(filter) = status;
13829 return status;
13830 }
13831 }
13832
13833 chars = SvCUR(utf16_buffer) >> 1;
13834 have = SvCUR(utf8_buffer);
13835 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13836
aa6dbd60 13837 if (reverse) {
c28d6105
NC
13838 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13839 (U8*)SvPVX_const(utf8_buffer) + have,
13840 chars * 2, &newlen);
aa6dbd60 13841 } else {
2a773401 13842 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13843 (U8*)SvPVX_const(utf8_buffer) + have,
13844 chars * 2, &newlen);
2a773401 13845 }
c28d6105 13846 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13847 *end = '\0';
c28d6105 13848
e07286ed
NC
13849 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13850 it's private to us, and utf16_to_utf8{,reversed} take a
13851 (pointer,length) pair, rather than a NUL-terminated string. */
13852 if(SvCUR(utf16_buffer) & 1) {
13853 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13854 SvCUR_set(utf16_buffer, 1);
13855 } else {
13856 SvCUR_set(utf16_buffer, 0);
13857 }
2a773401 13858 }
c28d6105
NC
13859 DEBUG_P(PerlIO_printf(Perl_debug_log,
13860 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13861 status,
13862 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13863 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13864 return retval;
6e3aabd6 13865}
81a923f4
NC
13866
13867static U8 *
13868S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13869{
2a773401 13870 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13871
c85ae797
NC
13872 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13873
c28d6105 13874 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13875 sv_setpvs(filter, "");
2a773401 13876 IoLINES(filter) = reversed;
c28d6105
NC
13877 IoPAGE(filter) = 1; /* Not EOF */
13878
13879 /* Sadly, we have to return a valid pointer, come what may, so we have to
13880 ignore any error return from this. */
13881 SvCUR_set(PL_linestr, 0);
13882 if (FILTER_READ(0, PL_linestr, 0)) {
13883 SvUTF8_on(PL_linestr);
81a923f4 13884 } else {
c28d6105 13885 SvUTF8_on(PL_linestr);
81a923f4 13886 }
c28d6105 13887 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13888 return (U8*)SvPVX(PL_linestr);
13889}
6e3aabd6 13890#endif
9f4817db 13891
f333445c
JP
13892/*
13893Returns a pointer to the next character after the parsed
13894vstring, as well as updating the passed in sv.
13895
13896Function must be called like
13897
561b68a9 13898 sv = newSV(5);
65b06e02 13899 s = scan_vstring(s,e,sv);
f333445c 13900
65b06e02 13901where s and e are the start and end of the string.
f333445c
JP
13902The sv should already be large enough to store the vstring
13903passed in, for performance reasons.
13904
13905*/
13906
13907char *
15f169a1 13908Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13909{
97aff369 13910 dVAR;
bfed75c6
AL
13911 const char *pos = s;
13912 const char *start = s;
7918f24d
NC
13913
13914 PERL_ARGS_ASSERT_SCAN_VSTRING;
13915
f333445c 13916 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13917 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13918 pos++;
f333445c
JP
13919 if ( *pos != '.') {
13920 /* this may not be a v-string if followed by => */
bfed75c6 13921 const char *next = pos;
65b06e02 13922 while (next < e && isSPACE(*next))
8fc7bb1c 13923 ++next;
65b06e02 13924 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13925 /* return string not v-string */
13926 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13927 return (char *)pos;
f333445c
JP
13928 }
13929 }
13930
13931 if (!isALPHA(*pos)) {
89ebb4a3 13932 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13933
d4c19fe8
AL
13934 if (*s == 'v')
13935 s++; /* get past 'v' */
f333445c 13936
76f68e9b 13937 sv_setpvs(sv, "");
f333445c
JP
13938
13939 for (;;) {
d4c19fe8 13940 /* this is atoi() that tolerates underscores */
0bd48802
AL
13941 U8 *tmpend;
13942 UV rev = 0;
d4c19fe8
AL
13943 const char *end = pos;
13944 UV mult = 1;
13945 while (--end >= s) {
13946 if (*end != '_') {
13947 const UV orev = rev;
f333445c
JP
13948 rev += (*end - '0') * mult;
13949 mult *= 10;
9b387841
NC
13950 if (orev > rev)
13951 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13952 "Integer overflow in decimal number");
f333445c
JP
13953 }
13954 }
13955#ifdef EBCDIC
13956 if (rev > 0x7FFFFFFF)
13957 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13958#endif
13959 /* Append native character for the rev point */
13960 tmpend = uvchr_to_utf8(tmpbuf, rev);
13961 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13962 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13963 SvUTF8_on(sv);
65b06e02 13964 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13965 s = ++pos;
13966 else {
13967 s = pos;
13968 break;
13969 }
65b06e02 13970 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13971 pos++;
13972 }
13973 SvPOK_on(sv);
13974 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13975 SvRMAGICAL_on(sv);
13976 }
73d840c0 13977 return (char *)s;
f333445c
JP
13978}
13979
88e1f1a2
JV
13980int
13981Perl_keyword_plugin_standard(pTHX_
13982 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13983{
13984 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13985 PERL_UNUSED_CONTEXT;
13986 PERL_UNUSED_ARG(keyword_ptr);
13987 PERL_UNUSED_ARG(keyword_len);
13988 PERL_UNUSED_ARG(op_ptr);
13989 return KEYWORD_PLUGIN_DECLINE;
13990}
13991
a7aaec61 13992#define parse_recdescent(g) S_parse_recdescent(aTHX_ g)
e53d8f76
Z
13993static void
13994S_parse_recdescent(pTHX_ int gramtype)
a7aaec61
Z
13995{
13996 SAVEI32(PL_lex_brackets);
13997 if (PL_lex_brackets > 100)
13998 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13999 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
14000 if(yyparse(gramtype) && !PL_parser->error_count)
14001 qerror(Perl_mess(aTHX_ "Parse error"));
14002}
14003
e53d8f76
Z
14004#define parse_recdescent_for_op(g) S_parse_recdescent_for_op(aTHX_ g)
14005static OP *
14006S_parse_recdescent_for_op(pTHX_ int gramtype)
14007{
14008 OP *o;
14009 ENTER;
14010 SAVEVPTR(PL_eval_root);
14011 PL_eval_root = NULL;
14012 parse_recdescent(gramtype);
14013 o = PL_eval_root;
14014 LEAVE;
14015 return o;
14016}
14017
14018/*
14019=for apidoc Amx|OP *|parse_block|U32 flags
14020
14021Parse a single complete Perl code block. This consists of an opening
14022brace, a sequence of statements, and a closing brace. The block
14023constitutes a lexical scope, so C<my> variables and various compile-time
14024effects can be contained within it. It is up to the caller to ensure
14025that the dynamic parser state (L</PL_parser> et al) is correctly set to
14026reflect the source of the code to be parsed and the lexical context for
14027the statement.
14028
14029The op tree representing the code block is returned. This is always a
14030real op, never a null pointer. It will normally be a C<lineseq> list,
14031including C<nextstate> or equivalent ops. No ops to construct any kind
14032of runtime scope are included by virtue of it being a block.
14033
14034If an error occurs in parsing or compilation, in most cases a valid op
14035tree (most likely null) is returned anyway. The error is reflected in
14036the parser state, normally resulting in a single exception at the top
14037level of parsing which covers all the compilation errors that occurred.
14038Some compilation errors, however, will throw an exception immediately.
14039
14040The I<flags> parameter is reserved for future use, and must always
14041be zero.
14042
14043=cut
14044*/
14045
14046OP *
14047Perl_parse_block(pTHX_ U32 flags)
14048{
14049 if (flags)
14050 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
14051 return parse_recdescent_for_op(GRAMBLOCK);
14052}
14053
1da4ca5f 14054/*
8359b381
Z
14055=for apidoc Amx|OP *|parse_barestmt|U32 flags
14056
14057Parse a single unadorned Perl statement. This may be a normal imperative
14058statement or a declaration that has compile-time effect. It does not
14059include any label or other affixture. It is up to the caller to ensure
14060that the dynamic parser state (L</PL_parser> et al) is correctly set to
14061reflect the source of the code to be parsed and the lexical context for
14062the statement.
14063
14064The op tree representing the statement is returned. This may be a
14065null pointer if the statement is null, for example if it was actually
14066a subroutine definition (which has compile-time side effects). If not
14067null, it will be ops directly implementing the statement, suitable to
14068pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
14069equivalent op (except for those embedded in a scope contained entirely
14070within the statement).
14071
14072If an error occurs in parsing or compilation, in most cases a valid op
14073tree (most likely null) is returned anyway. The error is reflected in
14074the parser state, normally resulting in a single exception at the top
14075level of parsing which covers all the compilation errors that occurred.
14076Some compilation errors, however, will throw an exception immediately.
14077
14078The I<flags> parameter is reserved for future use, and must always
14079be zero.
14080
14081=cut
14082*/
14083
14084OP *
14085Perl_parse_barestmt(pTHX_ U32 flags)
14086{
14087 if (flags)
14088 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
14089 return parse_recdescent_for_op(GRAMBARESTMT);
14090}
14091
14092/*
361d9b55
Z
14093=for apidoc Amx|SV *|parse_label|U32 flags
14094
14095Parse a single label, possibly optional, of the type that may prefix a
14096Perl statement. It is up to the caller to ensure that the dynamic parser
14097state (L</PL_parser> et al) is correctly set to reflect the source of
14098the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
14099label is optional, otherwise it is mandatory.
14100
14101The name of the label is returned in the form of a fresh scalar. If an
14102optional label is absent, a null pointer is returned.
14103
14104If an error occurs in parsing, which can only occur if the label is
14105mandatory, a valid label is returned anyway. The error is reflected in
14106the parser state, normally resulting in a single exception at the top
14107level of parsing which covers all the compilation errors that occurred.
14108
14109=cut
14110*/
14111
14112SV *
14113Perl_parse_label(pTHX_ U32 flags)
14114{
14115 if (flags & ~PARSE_OPTIONAL)
14116 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
14117 if (PL_lex_state == LEX_KNOWNEXT) {
14118 PL_parser->yychar = yylex();
14119 if (PL_parser->yychar == LABEL) {
14120 char *lpv = pl_yylval.pval;
14121 STRLEN llen = strlen(lpv);
14122 SV *lsv;
14123 PL_parser->yychar = YYEMPTY;
14124 lsv = newSV_type(SVt_PV);
14125 SvPV_set(lsv, lpv);
14126 SvCUR_set(lsv, llen);
14127 SvLEN_set(lsv, llen+1);
14128 SvPOK_on(lsv);
14129 return lsv;
14130 } else {
14131 yyunlex();
14132 goto no_label;
14133 }
14134 } else {
14135 char *s, *t;
14136 U8 c;
14137 STRLEN wlen, bufptr_pos;
14138 lex_read_space(0);
14139 t = s = PL_bufptr;
14140 c = (U8)*s;
14141 if (!isIDFIRST_A(c))
14142 goto no_label;
14143 do {
14144 c = (U8)*++t;
14145 } while(isWORDCHAR_A(c));
14146 wlen = t - s;
14147 if (word_takes_any_delimeter(s, wlen))
14148 goto no_label;
14149 bufptr_pos = s - SvPVX(PL_linestr);
14150 PL_bufptr = t;
14151 lex_read_space(LEX_KEEP_PREVIOUS);
14152 t = PL_bufptr;
14153 s = SvPVX(PL_linestr) + bufptr_pos;
14154 if (t[0] == ':' && t[1] != ':') {
14155 PL_oldoldbufptr = PL_oldbufptr;
14156 PL_oldbufptr = s;
14157 PL_bufptr = t+1;
14158 return newSVpvn(s, wlen);
14159 } else {
14160 PL_bufptr = s;
14161 no_label:
14162 if (flags & PARSE_OPTIONAL) {
14163 return NULL;
14164 } else {
14165 qerror(Perl_mess(aTHX_ "Parse error"));
14166 return newSVpvs("x");
14167 }
14168 }
14169 }
14170}
14171
14172/*
28ac2b49
Z
14173=for apidoc Amx|OP *|parse_fullstmt|U32 flags
14174
14175Parse a single complete Perl statement. This may be a normal imperative
8359b381
Z
14176statement or a declaration that has compile-time effect, and may include
14177an optional label. It is up to the caller to ensure that the dynamic
28ac2b49
Z
14178parser state (L</PL_parser> et al) is correctly set to reflect the source
14179of the code to be parsed and the lexical context for the statement.
14180
14181The op tree representing the statement is returned. This may be a
14182null pointer if the statement is null, for example if it was actually
14183a subroutine definition (which has compile-time side effects). If not
14184null, it will be the result of a L</newSTATEOP> call, normally including
14185a C<nextstate> or equivalent op.
14186
14187If an error occurs in parsing or compilation, in most cases a valid op
14188tree (most likely null) is returned anyway. The error is reflected in
14189the parser state, normally resulting in a single exception at the top
14190level of parsing which covers all the compilation errors that occurred.
14191Some compilation errors, however, will throw an exception immediately.
14192
14193The I<flags> parameter is reserved for future use, and must always
14194be zero.
14195
14196=cut
14197*/
14198
14199OP *
14200Perl_parse_fullstmt(pTHX_ U32 flags)
14201{
28ac2b49
Z
14202 if (flags)
14203 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
e53d8f76 14204 return parse_recdescent_for_op(GRAMFULLSTMT);
28ac2b49
Z
14205}
14206
07ffcb73
Z
14207/*
14208=for apidoc Amx|OP *|parse_stmtseq|U32 flags
14209
14210Parse a sequence of zero or more Perl statements. These may be normal
14211imperative statements, including optional labels, or declarations
14212that have compile-time effect, or any mixture thereof. The statement
14213sequence ends when a closing brace or end-of-file is encountered in a
14214place where a new statement could have validly started. It is up to
14215the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14216is correctly set to reflect the source of the code to be parsed and the
14217lexical context for the statements.
14218
14219The op tree representing the statement sequence is returned. This may
14220be a null pointer if the statements were all null, for example if there
14221were no statements or if there were only subroutine definitions (which
14222have compile-time side effects). If not null, it will be a C<lineseq>
14223list, normally including C<nextstate> or equivalent ops.
14224
14225If an error occurs in parsing or compilation, in most cases a valid op
14226tree is returned anyway. The error is reflected in the parser state,
14227normally resulting in a single exception at the top level of parsing
14228which covers all the compilation errors that occurred. Some compilation
14229errors, however, will throw an exception immediately.
14230
14231The I<flags> parameter is reserved for future use, and must always
14232be zero.
14233
14234=cut
14235*/
14236
14237OP *
14238Perl_parse_stmtseq(pTHX_ U32 flags)
14239{
14240 OP *stmtseqop;
e53d8f76 14241 I32 c;
07ffcb73
Z
14242 if (flags)
14243 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
e53d8f76
Z
14244 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ);
14245 c = lex_peek_unichar(0);
14246 if (c != -1 && c != /*{*/'}')
07ffcb73 14247 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
14248 return stmtseqop;
14249}
14250
ea25a9b2 14251void
f7e3d326 14252Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 14253{
f7e3d326 14254 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2
Z
14255 deprecate("qw(...) as parentheses");
14256 force_next(')');
14257 if (qwlist->op_type == OP_STUB) {
14258 op_free(qwlist);
14259 }
14260 else {
3d8e05a0 14261 start_force(PL_curforce);
ea25a9b2
Z
14262 NEXTVAL_NEXTTOKE.opval = qwlist;
14263 force_next(THING);
14264 }
14265 force_next('(');
14266}
14267
28ac2b49 14268/*
1da4ca5f
NC
14269 * Local variables:
14270 * c-indentation-style: bsd
14271 * c-basic-offset: 4
14272 * indent-tabs-mode: t
14273 * End:
14274 *
37442d52
RGS
14275 * ex: set ts=8 sts=4 sw=4 noet:
14276 */