This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
[perl #77238] Aliased @ISA does not work
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
51#define PL_lex_brackstack (PL_parser->lex_brackstack)
52#define PL_lex_casemods (PL_parser->lex_casemods)
53#define PL_lex_casestack (PL_parser->lex_casestack)
54#define PL_lex_defer (PL_parser->lex_defer)
55#define PL_lex_dojoin (PL_parser->lex_dojoin)
56#define PL_lex_expect (PL_parser->lex_expect)
57#define PL_lex_formbrack (PL_parser->lex_formbrack)
58#define PL_lex_inpat (PL_parser->lex_inpat)
59#define PL_lex_inwhat (PL_parser->lex_inwhat)
60#define PL_lex_op (PL_parser->lex_op)
61#define PL_lex_repl (PL_parser->lex_repl)
62#define PL_lex_starts (PL_parser->lex_starts)
63#define PL_lex_stuff (PL_parser->lex_stuff)
64#define PL_multi_start (PL_parser->multi_start)
65#define PL_multi_open (PL_parser->multi_open)
66#define PL_multi_close (PL_parser->multi_close)
67#define PL_pending_ident (PL_parser->pending_ident)
68#define PL_preambled (PL_parser->preambled)
69#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 70#define PL_linestr (PL_parser->linestr)
c2598295
DM
71#define PL_expect (PL_parser->expect)
72#define PL_copline (PL_parser->copline)
f06b5848
DM
73#define PL_bufptr (PL_parser->bufptr)
74#define PL_oldbufptr (PL_parser->oldbufptr)
75#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
76#define PL_linestart (PL_parser->linestart)
77#define PL_bufend (PL_parser->bufend)
78#define PL_last_uni (PL_parser->last_uni)
79#define PL_last_lop (PL_parser->last_lop)
80#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 81#define PL_lex_state (PL_parser->lex_state)
2f9285f8 82#define PL_rsfp (PL_parser->rsfp)
5486870f 83#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
84#define PL_in_my (PL_parser->in_my)
85#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 86#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 87#define PL_multi_end (PL_parser->multi_end)
13765c85 88#define PL_error_count (PL_parser->error_count)
199e78b7
DM
89
90#ifdef PERL_MAD
91# define PL_endwhite (PL_parser->endwhite)
92# define PL_faketokens (PL_parser->faketokens)
93# define PL_lasttoke (PL_parser->lasttoke)
94# define PL_nextwhite (PL_parser->nextwhite)
95# define PL_realtokenstart (PL_parser->realtokenstart)
96# define PL_skipwhite (PL_parser->skipwhite)
97# define PL_thisclose (PL_parser->thisclose)
98# define PL_thismad (PL_parser->thismad)
99# define PL_thisopen (PL_parser->thisopen)
100# define PL_thisstuff (PL_parser->thisstuff)
101# define PL_thistoken (PL_parser->thistoken)
102# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
103# define PL_thiswhite (PL_parser->thiswhite)
104# define PL_nexttoke (PL_parser->nexttoke)
105# define PL_curforce (PL_parser->curforce)
106#else
107# define PL_nexttoke (PL_parser->nexttoke)
108# define PL_nexttype (PL_parser->nexttype)
109# define PL_nextval (PL_parser->nextval)
199e78b7
DM
110#endif
111
16173588
NC
112/* This can't be done with embed.fnc, because struct yy_parser contains a
113 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
114static int
115S_pending_ident(pTHX);
199e78b7 116
0bd48802 117static const char ident_too_long[] = "Identifier too long";
8903cb82 118
29595ff2 119#ifdef PERL_MAD
29595ff2 120# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 121# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 122#else
5db06880 123# define CURMAD(slot,sv)
9ded7720 124# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
125#endif
126
a7aaec61
Z
127#define XENUMMASK 0x3f
128#define XFAKEEOF 0x40
129#define XFAKEBRACK 0x80
9059aa12 130
39e02b42
JH
131#ifdef USE_UTF8_SCRIPTS
132# define UTF (!IN_BYTES)
2b9d42f0 133#else
746b446a 134# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 135#endif
a0ed51b3 136
b1fc3636
CJ
137/* The maximum number of characters preceding the unrecognized one to display */
138#define UNRECOGNIZED_PRECEDE_COUNT 10
139
61f0cdd9 140/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
141 * 1999-02-27 mjd-perl-patch@plover.com */
142#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
143
bf4acbe4 144#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 145
ffb4593c
NT
146/* LEX_* are values for PL_lex_state, the state of the lexer.
147 * They are arranged oddly so that the guard on the switch statement
79072805
LW
148 * can get by with a single comparison (if the compiler is smart enough).
149 */
150
fb73857a 151/* #define LEX_NOTPARSING 11 is done in perl.h. */
152
b6007c36
DM
153#define LEX_NORMAL 10 /* normal code (ie not within "...") */
154#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
155#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
156#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
157#define LEX_INTERPSTART 6 /* expecting the start of a $var */
158
159 /* at end of code, eg "$x" followed by: */
160#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
161#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162
163#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
164 string or after \E, $foo, etc */
165#define LEX_INTERPCONST 2 /* NOT USED */
166#define LEX_FORMLINE 1 /* expecting a format line */
167#define LEX_KNOWNEXT 0 /* next token known; just return it */
168
79072805 169
bbf60fe6 170#ifdef DEBUGGING
27da23d5 171static const char* const lex_state_names[] = {
bbf60fe6
DM
172 "KNOWNEXT",
173 "FORMLINE",
174 "INTERPCONST",
175 "INTERPCONCAT",
176 "INTERPENDMAYBE",
177 "INTERPEND",
178 "INTERPSTART",
179 "INTERPPUSH",
180 "INTERPCASEMOD",
181 "INTERPNORMAL",
182 "NORMAL"
183};
184#endif
185
79072805
LW
186#ifdef ff_next
187#undef ff_next
d48672a2
LW
188#endif
189
79072805 190#include "keywords.h"
fe14fcc3 191
ffb4593c
NT
192/* CLINE is a macro that ensures PL_copline has a sane value */
193
ae986130
LW
194#ifdef CLINE
195#undef CLINE
196#endif
57843af0 197#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 198
5db06880 199#ifdef PERL_MAD
29595ff2
NC
200# define SKIPSPACE0(s) skipspace0(s)
201# define SKIPSPACE1(s) skipspace1(s)
202# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
203# define PEEKSPACE(s) skipspace2(s,0)
204#else
205# define SKIPSPACE0(s) skipspace(s)
206# define SKIPSPACE1(s) skipspace(s)
207# define SKIPSPACE2(s,tsv) skipspace(s)
208# define PEEKSPACE(s) skipspace(s)
209#endif
210
ffb4593c
NT
211/*
212 * Convenience functions to return different tokens and prime the
9cbb5ea2 213 * lexer for the next token. They all take an argument.
ffb4593c
NT
214 *
215 * TOKEN : generic token (used for '(', DOLSHARP, etc)
216 * OPERATOR : generic operator
217 * AOPERATOR : assignment operator
218 * PREBLOCK : beginning the block after an if, while, foreach, ...
219 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
220 * PREREF : *EXPR where EXPR is not a simple identifier
221 * TERM : expression term
222 * LOOPX : loop exiting command (goto, last, dump, etc)
223 * FTST : file test operator
224 * FUN0 : zero-argument function
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
256#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
257#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
258#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
259#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
260#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
261#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
262#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
263#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
264#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 265
a687059c
LW
266/* This bit of chicanery makes a unary function followed by
267 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
268 * The UNIDOR macro is for unary functions that can be followed by the //
269 * operator (such as C<shift // 0>).
a687059c 270 */
376fcdbf 271#define UNI2(f,x) { \
6154021b 272 pl_yylval.ival = f; \
376fcdbf
AL
273 PL_expect = x; \
274 PL_bufptr = s; \
275 PL_last_uni = PL_oldbufptr; \
276 PL_last_lop_op = f; \
277 if (*s == '(') \
278 return REPORT( (int)FUNC1 ); \
29595ff2 279 s = PEEKSPACE(s); \
376fcdbf
AL
280 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
281 }
6f33ba73
RGS
282#define UNI(f) UNI2(f,XTERM)
283#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 284
376fcdbf 285#define UNIBRACK(f) { \
6154021b 286 pl_yylval.ival = f; \
376fcdbf
AL
287 PL_bufptr = s; \
288 PL_last_uni = PL_oldbufptr; \
289 if (*s == '(') \
290 return REPORT( (int)FUNC1 ); \
29595ff2 291 s = PEEKSPACE(s); \
376fcdbf
AL
292 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
293 }
79072805 294
9f68db38 295/* grandfather return to old style */
6154021b 296#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 297
8fa7f367
JH
298#ifdef DEBUGGING
299
6154021b 300/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
301enum token_type {
302 TOKENTYPE_NONE,
303 TOKENTYPE_IVAL,
6154021b 304 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
305 TOKENTYPE_PVAL,
306 TOKENTYPE_OPVAL,
307 TOKENTYPE_GVVAL
308};
309
6d4a66ac
NC
310static struct debug_tokens {
311 const int token;
312 enum token_type type;
313 const char *name;
314} const debug_tokens[] =
9041c2e3 315{
bbf60fe6
DM
316 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
317 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
318 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
319 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
320 { ARROW, TOKENTYPE_NONE, "ARROW" },
321 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
322 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
323 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
324 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
325 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 326 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
327 { DO, TOKENTYPE_NONE, "DO" },
328 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
329 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
330 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
331 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
332 { ELSE, TOKENTYPE_NONE, "ELSE" },
333 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
334 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
335 { FOR, TOKENTYPE_IVAL, "FOR" },
336 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
337 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
338 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
339 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
340 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
341 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 342 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
343 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
344 { IF, TOKENTYPE_IVAL, "IF" },
345 { LABEL, TOKENTYPE_PVAL, "LABEL" },
346 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
347 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
348 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
349 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
350 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
351 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
352 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
353 { MY, TOKENTYPE_IVAL, "MY" },
354 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
355 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
356 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
357 { OROP, TOKENTYPE_IVAL, "OROP" },
358 { OROR, TOKENTYPE_NONE, "OROR" },
359 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
360 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
361 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
362 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
363 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
364 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
365 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
366 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
367 { PREINC, TOKENTYPE_NONE, "PREINC" },
368 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
369 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
370 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
371 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
372 { SUB, TOKENTYPE_NONE, "SUB" },
373 { THING, TOKENTYPE_OPVAL, "THING" },
374 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
375 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
376 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
377 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
378 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
379 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 380 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
381 { WHILE, TOKENTYPE_IVAL, "WHILE" },
382 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 383 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 384 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
385};
386
6154021b 387/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 388
bbf60fe6 389STATIC int
704d4215 390S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 391{
97aff369 392 dVAR;
7918f24d
NC
393
394 PERL_ARGS_ASSERT_TOKEREPORT;
395
bbf60fe6 396 if (DEBUG_T_TEST) {
bd61b366 397 const char *name = NULL;
bbf60fe6 398 enum token_type type = TOKENTYPE_NONE;
f54cb97a 399 const struct debug_tokens *p;
396482e1 400 SV* const report = newSVpvs("<== ");
bbf60fe6 401
f54cb97a 402 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
403 if (p->token == (int)rv) {
404 name = p->name;
405 type = p->type;
406 break;
407 }
408 }
409 if (name)
54667de8 410 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
411 else if ((char)rv > ' ' && (char)rv < '~')
412 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
413 else if (!rv)
396482e1 414 sv_catpvs(report, "EOF");
bbf60fe6
DM
415 else
416 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
417 switch (type) {
418 case TOKENTYPE_NONE:
419 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
420 break;
421 case TOKENTYPE_IVAL:
704d4215 422 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
423 break;
424 case TOKENTYPE_OPNUM:
425 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 426 PL_op_name[lvalp->ival]);
bbf60fe6
DM
427 break;
428 case TOKENTYPE_PVAL:
704d4215 429 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
430 break;
431 case TOKENTYPE_OPVAL:
704d4215 432 if (lvalp->opval) {
401441c0 433 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
434 PL_op_name[lvalp->opval->op_type]);
435 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 436 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 437 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
438 }
439
440 }
401441c0 441 else
396482e1 442 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
443 break;
444 }
b6007c36 445 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
446 };
447 return (int)rv;
998054bd
SC
448}
449
b6007c36
DM
450
451/* print the buffer with suitable escapes */
452
453STATIC void
15f169a1 454S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 455{
396482e1 456 SV* const tmp = newSVpvs("");
7918f24d
NC
457
458 PERL_ARGS_ASSERT_PRINTBUF;
459
b6007c36
DM
460 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
461 SvREFCNT_dec(tmp);
462}
463
8fa7f367
JH
464#endif
465
8290c323
NC
466static int
467S_deprecate_commaless_var_list(pTHX) {
468 PL_expect = XTERM;
469 deprecate("comma-less variable list");
470 return REPORT(','); /* grandfather non-comma-format format */
471}
472
ffb4593c
NT
473/*
474 * S_ao
475 *
c963b151
BD
476 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
477 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
478 */
479
76e3520e 480STATIC int
cea2e8a9 481S_ao(pTHX_ int toketype)
a0d0e21e 482{
97aff369 483 dVAR;
3280af22
NIS
484 if (*PL_bufptr == '=') {
485 PL_bufptr++;
a0d0e21e 486 if (toketype == ANDAND)
6154021b 487 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 488 else if (toketype == OROR)
6154021b 489 pl_yylval.ival = OP_ORASSIGN;
c963b151 490 else if (toketype == DORDOR)
6154021b 491 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
492 toketype = ASSIGNOP;
493 }
494 return toketype;
495}
496
ffb4593c
NT
497/*
498 * S_no_op
499 * When Perl expects an operator and finds something else, no_op
500 * prints the warning. It always prints "<something> found where
501 * operator expected. It prints "Missing semicolon on previous line?"
502 * if the surprise occurs at the start of the line. "do you need to
503 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
504 * where the compiler doesn't know if foo is a method call or a function.
505 * It prints "Missing operator before end of line" if there's nothing
506 * after the missing operator, or "... before <...>" if there is something
507 * after the missing operator.
508 */
509
76e3520e 510STATIC void
15f169a1 511S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 512{
97aff369 513 dVAR;
9d4ba2ae
AL
514 char * const oldbp = PL_bufptr;
515 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 516
7918f24d
NC
517 PERL_ARGS_ASSERT_NO_OP;
518
1189a94a
GS
519 if (!s)
520 s = oldbp;
07c798fb 521 else
1189a94a 522 PL_bufptr = s;
cea2e8a9 523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
524 if (ckWARN_d(WARN_SYNTAX)) {
525 if (is_first)
526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 "\t(Missing semicolon on previous line?)\n");
528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 529 const char *t;
c35e046a
AL
530 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
531 NOOP;
56da5a46
RGS
532 if (t < PL_bufptr && isSPACE(*t))
533 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
534 "\t(Do you need to predeclare %.*s?)\n",
551405c4 535 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
536 }
537 else {
538 assert(s >= oldbp);
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 540 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 541 }
07c798fb 542 }
3280af22 543 PL_bufptr = oldbp;
8990e307
LW
544}
545
ffb4593c
NT
546/*
547 * S_missingterm
548 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 549 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
550 * If we're in a delimited string and the delimiter is a control
551 * character, it's reformatted into a two-char sequence like ^C.
552 * This is fatal.
553 */
554
76e3520e 555STATIC void
cea2e8a9 556S_missingterm(pTHX_ char *s)
8990e307 557{
97aff369 558 dVAR;
8990e307
LW
559 char tmpbuf[3];
560 char q;
561 if (s) {
9d4ba2ae 562 char * const nl = strrchr(s,'\n');
d2719217 563 if (nl)
8990e307
LW
564 *nl = '\0';
565 }
463559e7 566 else if (isCNTRL(PL_multi_close)) {
8990e307 567 *tmpbuf = '^';
585ec06d 568 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
569 tmpbuf[2] = '\0';
570 s = tmpbuf;
571 }
572 else {
eb160463 573 *tmpbuf = (char)PL_multi_close;
8990e307
LW
574 tmpbuf[1] = '\0';
575 s = tmpbuf;
576 }
577 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 578 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 579}
79072805 580
ef89dcc3 581#define FEATURE_IS_ENABLED(name) \
0d863452 582 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 583 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 584/* The longest string we pass in. */
1863b879 585#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 586
0d863452
RH
587/*
588 * S_feature_is_enabled
589 * Check whether the named feature is enabled.
590 */
591STATIC bool
15f169a1 592S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 593{
97aff369 594 dVAR;
0d863452 595 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 596 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
597
598 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
599
4a731d7b
NC
600 assert(namelen <= MAX_FEATURE_LEN);
601 memcpy(&he_name[8], name, namelen);
d4c19fe8 602
7b9ef140 603 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
604}
605
ffb4593c 606/*
9cbb5ea2
GS
607 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
608 * utf16-to-utf8-reversed.
ffb4593c
NT
609 */
610
c39cd008
GS
611#ifdef PERL_CR_FILTER
612static void
613strip_return(SV *sv)
614{
95a20fc0 615 register const char *s = SvPVX_const(sv);
9d4ba2ae 616 register const char * const e = s + SvCUR(sv);
7918f24d
NC
617
618 PERL_ARGS_ASSERT_STRIP_RETURN;
619
c39cd008
GS
620 /* outer loop optimized to do nothing if there are no CR-LFs */
621 while (s < e) {
622 if (*s++ == '\r' && *s == '\n') {
623 /* hit a CR-LF, need to copy the rest */
624 register char *d = s - 1;
625 *d++ = *s++;
626 while (s < e) {
627 if (*s == '\r' && s[1] == '\n')
628 s++;
629 *d++ = *s++;
630 }
631 SvCUR(sv) -= s - d;
632 return;
633 }
634 }
635}
a868473f 636
76e3520e 637STATIC I32
c39cd008 638S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 639{
f54cb97a 640 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
641 if (count > 0 && !maxlen)
642 strip_return(sv);
643 return count;
a868473f
NIS
644}
645#endif
646
ffb4593c 647/*
8eaa0acf
Z
648=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
649
650Creates and initialises a new lexer/parser state object, supplying
651a context in which to lex and parse from a new source of Perl code.
652A pointer to the new state object is placed in L</PL_parser>. An entry
653is made on the save stack so that upon unwinding the new state object
654will be destroyed and the former value of L</PL_parser> will be restored.
655Nothing else need be done to clean up the parsing context.
656
657The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
658non-null, provides a string (in SV form) containing code to be parsed.
659A copy of the string is made, so subsequent modification of I<line>
660does not affect parsing. I<rsfp>, if non-null, provides an input stream
661from which code will be read to be parsed. If both are non-null, the
662code in I<line> comes first and must consist of complete lines of input,
663and I<rsfp> supplies the remainder of the source.
664
665The I<flags> parameter is reserved for future use, and must always
666be zero.
667
668=cut
669*/
ffb4593c 670
a0d0e21e 671void
8eaa0acf 672Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 673{
97aff369 674 dVAR;
6ef55633 675 const char *s = NULL;
8990e307 676 STRLEN len;
5486870f 677 yy_parser *parser, *oparser;
8eaa0acf
Z
678 if (flags)
679 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
680
681 /* create and initialise a parser */
682
199e78b7 683 Newxz(parser, 1, yy_parser);
5486870f 684 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
685 PL_parser = parser;
686
28ac2b49
Z
687 parser->stack = NULL;
688 parser->ps = NULL;
689 parser->stack_size = 0;
acdf0a21 690
e3abe207
DM
691 /* on scope exit, free this parser and restore any outer one */
692 SAVEPARSER(parser);
7c4baf47 693 parser->saved_curcop = PL_curcop;
e3abe207 694
acdf0a21 695 /* initialise lexer state */
8990e307 696
fb205e7a
DM
697#ifdef PERL_MAD
698 parser->curforce = -1;
699#else
700 parser->nexttoke = 0;
701#endif
ca4cfd28 702 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 703 parser->copline = NOLINE;
5afb0a62 704 parser->lex_state = LEX_NORMAL;
c2598295 705 parser->expect = XSTATE;
2f9285f8 706 parser->rsfp = rsfp;
f07ec6dd 707 parser->rsfp_filters = newAV();
2f9285f8 708
199e78b7
DM
709 Newx(parser->lex_brackstack, 120, char);
710 Newx(parser->lex_casestack, 12, char);
711 *parser->lex_casestack = '\0';
02b34bbe 712
10efb74f
NC
713 if (line) {
714 s = SvPV_const(line, len);
715 } else {
716 len = 0;
717 }
bdc0bf6f 718
10efb74f 719 if (!len) {
bdc0bf6f 720 parser->linestr = newSVpvs("\n;");
805700c1 721 } else {
719a9bb0 722 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 723 if (s[len-1] != ';')
bdc0bf6f 724 sv_catpvs(parser->linestr, "\n;");
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
732
733 parser->in_pod = 0;
79072805 734}
a687059c 735
e3abe207
DM
736
737/* delete a parser object */
738
739void
740Perl_parser_free(pTHX_ const yy_parser *parser)
741{
7918f24d
NC
742 PERL_ARGS_ASSERT_PARSER_FREE;
743
7c4baf47 744 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
745 SvREFCNT_dec(parser->linestr);
746
2f9285f8
DM
747 if (parser->rsfp == PerlIO_stdin())
748 PerlIO_clearerr(parser->rsfp);
799361c3
SH
749 else if (parser->rsfp && (!parser->old_parser ||
750 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 751 PerlIO_close(parser->rsfp);
5486870f 752 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 753
e3abe207
DM
754 Safefree(parser->lex_brackstack);
755 Safefree(parser->lex_casestack);
756 PL_parser = parser->old_parser;
757 Safefree(parser);
758}
759
760
ffb4593c 761/*
f0e67a1d
Z
762=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
763
764Buffer scalar containing the chunk currently under consideration of the
765text currently being lexed. This is always a plain string scalar (for
766which C<SvPOK> is true). It is not intended to be used as a scalar by
767normal scalar means; instead refer to the buffer directly by the pointer
768variables described below.
769
770The lexer maintains various C<char*> pointers to things in the
771C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
772reallocated, all of these pointers must be updated. Don't attempt to
773do this manually, but rather use L</lex_grow_linestr> if you need to
774reallocate the buffer.
775
776The content of the text chunk in the buffer is commonly exactly one
777complete line of input, up to and including a newline terminator,
778but there are situations where it is otherwise. The octets of the
779buffer may be intended to be interpreted as either UTF-8 or Latin-1.
780The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
781flag on this scalar, which may disagree with it.
782
783For direct examination of the buffer, the variable
784L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
785lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
786of these pointers is usually preferable to examination of the scalar
787through normal scalar means.
788
789=for apidoc AmxU|char *|PL_parser-E<gt>bufend
790
791Direct pointer to the end of the chunk of text currently being lexed, the
792end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
793+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
794always located at the end of the buffer, and does not count as part of
795the buffer's contents.
796
797=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
798
799Points to the current position of lexing inside the lexer buffer.
800Characters around this point may be freely examined, within
801the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
802L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
803interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
804
805Lexing code (whether in the Perl core or not) moves this pointer past
806the characters that it consumes. It is also expected to perform some
807bookkeeping whenever a newline character is consumed. This movement
808can be more conveniently performed by the function L</lex_read_to>,
809which handles newlines appropriately.
810
811Interpretation of the buffer's octets can be abstracted out by
812using the slightly higher-level functions L</lex_peek_unichar> and
813L</lex_read_unichar>.
814
815=for apidoc AmxU|char *|PL_parser-E<gt>linestart
816
817Points to the start of the current line inside the lexer buffer.
818This is useful for indicating at which column an error occurred, and
819not much else. This must be updated by any lexing code that consumes
820a newline; the function L</lex_read_to> handles this detail.
821
822=cut
823*/
824
825/*
826=for apidoc Amx|bool|lex_bufutf8
827
828Indicates whether the octets in the lexer buffer
829(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
830of Unicode characters. If not, they should be interpreted as Latin-1
831characters. This is analogous to the C<SvUTF8> flag for scalars.
832
833In UTF-8 mode, it is not guaranteed that the lexer buffer actually
834contains valid UTF-8. Lexing code must be robust in the face of invalid
835encoding.
836
837The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
838is significant, but not the whole story regarding the input character
839encoding. Normally, when a file is being read, the scalar contains octets
840and its C<SvUTF8> flag is off, but the octets should be interpreted as
841UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
842however, the scalar may have the C<SvUTF8> flag on, and in this case its
843octets should be interpreted as UTF-8 unless the C<use bytes> pragma
844is in effect. This logic may change in the future; use this function
845instead of implementing the logic yourself.
846
847=cut
848*/
849
850bool
851Perl_lex_bufutf8(pTHX)
852{
853 return UTF;
854}
855
856/*
857=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
858
859Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
860at least I<len> octets (including terminating NUL). Returns a
861pointer to the reallocated buffer. This is necessary before making
862any direct modification of the buffer that would increase its length.
863L</lex_stuff_pvn> provides a more convenient way to insert text into
864the buffer.
865
866Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
867this function updates all of the lexer's variables that point directly
868into the buffer.
869
870=cut
871*/
872
873char *
874Perl_lex_grow_linestr(pTHX_ STRLEN len)
875{
876 SV *linestr;
877 char *buf;
878 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
879 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
880 linestr = PL_parser->linestr;
881 buf = SvPVX(linestr);
882 if (len <= SvLEN(linestr))
883 return buf;
884 bufend_pos = PL_parser->bufend - buf;
885 bufptr_pos = PL_parser->bufptr - buf;
886 oldbufptr_pos = PL_parser->oldbufptr - buf;
887 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
888 linestart_pos = PL_parser->linestart - buf;
889 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
890 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
891 buf = sv_grow(linestr, len);
892 PL_parser->bufend = buf + bufend_pos;
893 PL_parser->bufptr = buf + bufptr_pos;
894 PL_parser->oldbufptr = buf + oldbufptr_pos;
895 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
896 PL_parser->linestart = buf + linestart_pos;
897 if (PL_parser->last_uni)
898 PL_parser->last_uni = buf + last_uni_pos;
899 if (PL_parser->last_lop)
900 PL_parser->last_lop = buf + last_lop_pos;
901 return buf;
902}
903
904/*
83aa740e 905=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
906
907Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
908immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
909reallocating the buffer if necessary. This means that lexing code that
910runs later will see the characters as if they had appeared in the input.
911It is not recommended to do this as part of normal parsing, and most
912uses of this facility run the risk of the inserted characters being
913interpreted in an unintended manner.
914
915The string to be inserted is represented by I<len> octets starting
916at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
917according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
918The characters are recoded for the lexer buffer, according to how the
919buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 920to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
921function is more convenient.
922
923=cut
924*/
925
926void
83aa740e 927Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 928{
749123ff 929 dVAR;
f0e67a1d
Z
930 char *bufptr;
931 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
932 if (flags & ~(LEX_STUFF_UTF8))
933 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
934 if (UTF) {
935 if (flags & LEX_STUFF_UTF8) {
936 goto plain_copy;
937 } else {
938 STRLEN highhalf = 0;
83aa740e 939 const char *p, *e = pv+len;
f0e67a1d
Z
940 for (p = pv; p != e; p++)
941 highhalf += !!(((U8)*p) & 0x80);
942 if (!highhalf)
943 goto plain_copy;
944 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
945 bufptr = PL_parser->bufptr;
946 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
947 SvCUR_set(PL_parser->linestr,
948 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
949 PL_parser->bufend += len+highhalf;
950 for (p = pv; p != e; p++) {
951 U8 c = (U8)*p;
952 if (c & 0x80) {
953 *bufptr++ = (char)(0xc0 | (c >> 6));
954 *bufptr++ = (char)(0x80 | (c & 0x3f));
955 } else {
956 *bufptr++ = (char)c;
957 }
958 }
959 }
960 } else {
961 if (flags & LEX_STUFF_UTF8) {
962 STRLEN highhalf = 0;
83aa740e 963 const char *p, *e = pv+len;
f0e67a1d
Z
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c >= 0xc4) {
967 Perl_croak(aTHX_ "Lexing code attempted to stuff "
968 "non-Latin-1 character into Latin-1 input");
969 } else if (c >= 0xc2 && p+1 != e &&
970 (((U8)p[1]) & 0xc0) == 0x80) {
971 p++;
972 highhalf++;
973 } else if (c >= 0x80) {
974 /* malformed UTF-8 */
975 ENTER;
976 SAVESPTR(PL_warnhook);
977 PL_warnhook = PERL_WARNHOOK_FATAL;
978 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
979 LEAVE;
980 }
981 }
982 if (!highhalf)
983 goto plain_copy;
984 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
985 bufptr = PL_parser->bufptr;
986 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
987 SvCUR_set(PL_parser->linestr,
988 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
989 PL_parser->bufend += len-highhalf;
990 for (p = pv; p != e; p++) {
991 U8 c = (U8)*p;
992 if (c & 0x80) {
993 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
994 p++;
995 } else {
996 *bufptr++ = (char)c;
997 }
998 }
999 } else {
1000 plain_copy:
1001 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1002 bufptr = PL_parser->bufptr;
1003 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1004 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1005 PL_parser->bufend += len;
1006 Copy(pv, bufptr, len, char);
1007 }
1008 }
1009}
1010
1011/*
9dcc53ea
Z
1012=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1013
1014Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1015immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1016reallocating the buffer if necessary. This means that lexing code that
1017runs later will see the characters as if they had appeared in the input.
1018It is not recommended to do this as part of normal parsing, and most
1019uses of this facility run the risk of the inserted characters being
1020interpreted in an unintended manner.
1021
1022The string to be inserted is represented by octets starting at I<pv>
1023and continuing to the first nul. These octets are interpreted as either
1024UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1025in I<flags>. The characters are recoded for the lexer buffer, according
1026to how the buffer is currently being interpreted (L</lex_bufutf8>).
1027If it is not convenient to nul-terminate a string to be inserted, the
1028L</lex_stuff_pvn> function is more appropriate.
1029
1030=cut
1031*/
1032
1033void
1034Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1035{
1036 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1037 lex_stuff_pvn(pv, strlen(pv), flags);
1038}
1039
1040/*
f0e67a1d
Z
1041=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1042
1043Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1044immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1045reallocating the buffer if necessary. This means that lexing code that
1046runs later will see the characters as if they had appeared in the input.
1047It is not recommended to do this as part of normal parsing, and most
1048uses of this facility run the risk of the inserted characters being
1049interpreted in an unintended manner.
1050
1051The string to be inserted is the string value of I<sv>. The characters
1052are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1053being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1054not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1055need to construct a scalar.
1056
1057=cut
1058*/
1059
1060void
1061Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1062{
1063 char *pv;
1064 STRLEN len;
1065 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1066 if (flags)
1067 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1068 pv = SvPV(sv, len);
1069 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1070}
1071
1072/*
1073=for apidoc Amx|void|lex_unstuff|char *ptr
1074
1075Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1076I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1077This hides the discarded text from any lexing code that runs later,
1078as if the text had never appeared.
1079
1080This is not the normal way to consume lexed text. For that, use
1081L</lex_read_to>.
1082
1083=cut
1084*/
1085
1086void
1087Perl_lex_unstuff(pTHX_ char *ptr)
1088{
1089 char *buf, *bufend;
1090 STRLEN unstuff_len;
1091 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1092 buf = PL_parser->bufptr;
1093 if (ptr < buf)
1094 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1095 if (ptr == buf)
1096 return;
1097 bufend = PL_parser->bufend;
1098 if (ptr > bufend)
1099 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1100 unstuff_len = ptr - buf;
1101 Move(ptr, buf, bufend+1-ptr, char);
1102 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1103 PL_parser->bufend = bufend - unstuff_len;
1104}
1105
1106/*
1107=for apidoc Amx|void|lex_read_to|char *ptr
1108
1109Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1110to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1111performing the correct bookkeeping whenever a newline character is passed.
1112This is the normal way to consume lexed text.
1113
1114Interpretation of the buffer's octets can be abstracted out by
1115using the slightly higher-level functions L</lex_peek_unichar> and
1116L</lex_read_unichar>.
1117
1118=cut
1119*/
1120
1121void
1122Perl_lex_read_to(pTHX_ char *ptr)
1123{
1124 char *s;
1125 PERL_ARGS_ASSERT_LEX_READ_TO;
1126 s = PL_parser->bufptr;
1127 if (ptr < s || ptr > PL_parser->bufend)
1128 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1129 for (; s != ptr; s++)
1130 if (*s == '\n') {
1131 CopLINE_inc(PL_curcop);
1132 PL_parser->linestart = s+1;
1133 }
1134 PL_parser->bufptr = ptr;
1135}
1136
1137/*
1138=for apidoc Amx|void|lex_discard_to|char *ptr
1139
1140Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1141up to I<ptr>. The remaining content of the buffer will be moved, and
1142all pointers into the buffer updated appropriately. I<ptr> must not
1143be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1144it is not permitted to discard text that has yet to be lexed.
1145
1146Normally it is not necessarily to do this directly, because it suffices to
1147use the implicit discarding behaviour of L</lex_next_chunk> and things
1148based on it. However, if a token stretches across multiple lines,
1f317c95 1149and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1150that purpose, then after completion of the token it would be wise to
1151explicitly discard the now-unneeded earlier lines, to avoid future
1152multi-line tokens growing the buffer without bound.
1153
1154=cut
1155*/
1156
1157void
1158Perl_lex_discard_to(pTHX_ char *ptr)
1159{
1160 char *buf;
1161 STRLEN discard_len;
1162 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1163 buf = SvPVX(PL_parser->linestr);
1164 if (ptr < buf)
1165 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1166 if (ptr == buf)
1167 return;
1168 if (ptr > PL_parser->bufptr)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170 discard_len = ptr - buf;
1171 if (PL_parser->oldbufptr < ptr)
1172 PL_parser->oldbufptr = ptr;
1173 if (PL_parser->oldoldbufptr < ptr)
1174 PL_parser->oldoldbufptr = ptr;
1175 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1176 PL_parser->last_uni = NULL;
1177 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1178 PL_parser->last_lop = NULL;
1179 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1180 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1181 PL_parser->bufend -= discard_len;
1182 PL_parser->bufptr -= discard_len;
1183 PL_parser->oldbufptr -= discard_len;
1184 PL_parser->oldoldbufptr -= discard_len;
1185 if (PL_parser->last_uni)
1186 PL_parser->last_uni -= discard_len;
1187 if (PL_parser->last_lop)
1188 PL_parser->last_lop -= discard_len;
1189}
1190
1191/*
1192=for apidoc Amx|bool|lex_next_chunk|U32 flags
1193
1194Reads in the next chunk of text to be lexed, appending it to
1195L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1196looked to the end of the current chunk and wants to know more. It is
1197usual, but not necessary, for lexing to have consumed the entirety of
1198the current chunk at this time.
1199
1200If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1201chunk (i.e., the current chunk has been entirely consumed), normally the
1202current chunk will be discarded at the same time that the new chunk is
1203read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1204will not be discarded. If the current chunk has not been entirely
1205consumed, then it will not be discarded regardless of the flag.
1206
1207Returns true if some new text was added to the buffer, or false if the
1208buffer has reached the end of the input text.
1209
1210=cut
1211*/
1212
1213#define LEX_FAKE_EOF 0x80000000
1214
1215bool
1216Perl_lex_next_chunk(pTHX_ U32 flags)
1217{
1218 SV *linestr;
1219 char *buf;
1220 STRLEN old_bufend_pos, new_bufend_pos;
1221 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1222 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1223 bool got_some_for_debugger = 0;
f0e67a1d
Z
1224 bool got_some;
1225 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1226 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1227 linestr = PL_parser->linestr;
1228 buf = SvPVX(linestr);
1229 if (!(flags & LEX_KEEP_PREVIOUS) &&
1230 PL_parser->bufptr == PL_parser->bufend) {
1231 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1232 linestart_pos = 0;
1233 if (PL_parser->last_uni != PL_parser->bufend)
1234 PL_parser->last_uni = NULL;
1235 if (PL_parser->last_lop != PL_parser->bufend)
1236 PL_parser->last_lop = NULL;
1237 last_uni_pos = last_lop_pos = 0;
1238 *buf = 0;
1239 SvCUR(linestr) = 0;
1240 } else {
1241 old_bufend_pos = PL_parser->bufend - buf;
1242 bufptr_pos = PL_parser->bufptr - buf;
1243 oldbufptr_pos = PL_parser->oldbufptr - buf;
1244 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1245 linestart_pos = PL_parser->linestart - buf;
1246 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1247 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1248 }
1249 if (flags & LEX_FAKE_EOF) {
1250 goto eof;
1251 } else if (!PL_parser->rsfp) {
1252 got_some = 0;
1253 } else if (filter_gets(linestr, old_bufend_pos)) {
1254 got_some = 1;
17cc9359 1255 got_some_for_debugger = 1;
f0e67a1d 1256 } else {
580561a3
Z
1257 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1258 sv_setpvs(linestr, "");
f0e67a1d
Z
1259 eof:
1260 /* End of real input. Close filehandle (unless it was STDIN),
1261 * then add implicit termination.
1262 */
1263 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1264 PerlIO_clearerr(PL_parser->rsfp);
1265 else if (PL_parser->rsfp)
1266 (void)PerlIO_close(PL_parser->rsfp);
1267 PL_parser->rsfp = NULL;
737c24fc 1268 PL_parser->in_pod = 0;
f0e67a1d
Z
1269#ifdef PERL_MAD
1270 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1271 PL_faketokens = 1;
1272#endif
1273 if (!PL_in_eval && PL_minus_p) {
1274 sv_catpvs(linestr,
1275 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1276 PL_minus_n = PL_minus_p = 0;
1277 } else if (!PL_in_eval && PL_minus_n) {
1278 sv_catpvs(linestr, /*{*/";}");
1279 PL_minus_n = 0;
1280 } else
1281 sv_catpvs(linestr, ";");
1282 got_some = 1;
1283 }
1284 buf = SvPVX(linestr);
1285 new_bufend_pos = SvCUR(linestr);
1286 PL_parser->bufend = buf + new_bufend_pos;
1287 PL_parser->bufptr = buf + bufptr_pos;
1288 PL_parser->oldbufptr = buf + oldbufptr_pos;
1289 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1290 PL_parser->linestart = buf + linestart_pos;
1291 if (PL_parser->last_uni)
1292 PL_parser->last_uni = buf + last_uni_pos;
1293 if (PL_parser->last_lop)
1294 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1295 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1296 PL_curstash != PL_debstash) {
1297 /* debugger active and we're not compiling the debugger code,
1298 * so store the line into the debugger's array of lines
1299 */
1300 update_debugger_info(NULL, buf+old_bufend_pos,
1301 new_bufend_pos-old_bufend_pos);
1302 }
1303 return got_some;
1304}
1305
1306/*
1307=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1308
1309Looks ahead one (Unicode) character in the text currently being lexed.
1310Returns the codepoint (unsigned integer value) of the next character,
1311or -1 if lexing has reached the end of the input text. To consume the
1312peeked character, use L</lex_read_unichar>.
1313
1314If the next character is in (or extends into) the next chunk of input
1315text, the next chunk will be read in. Normally the current chunk will be
1316discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1317then the current chunk will not be discarded.
1318
1319If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1320is encountered, an exception is generated.
1321
1322=cut
1323*/
1324
1325I32
1326Perl_lex_peek_unichar(pTHX_ U32 flags)
1327{
749123ff 1328 dVAR;
f0e67a1d
Z
1329 char *s, *bufend;
1330 if (flags & ~(LEX_KEEP_PREVIOUS))
1331 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1332 s = PL_parser->bufptr;
1333 bufend = PL_parser->bufend;
1334 if (UTF) {
1335 U8 head;
1336 I32 unichar;
1337 STRLEN len, retlen;
1338 if (s == bufend) {
1339 if (!lex_next_chunk(flags))
1340 return -1;
1341 s = PL_parser->bufptr;
1342 bufend = PL_parser->bufend;
1343 }
1344 head = (U8)*s;
1345 if (!(head & 0x80))
1346 return head;
1347 if (head & 0x40) {
1348 len = PL_utf8skip[head];
1349 while ((STRLEN)(bufend-s) < len) {
1350 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1351 break;
1352 s = PL_parser->bufptr;
1353 bufend = PL_parser->bufend;
1354 }
1355 }
1356 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1357 if (retlen == (STRLEN)-1) {
1358 /* malformed UTF-8 */
1359 ENTER;
1360 SAVESPTR(PL_warnhook);
1361 PL_warnhook = PERL_WARNHOOK_FATAL;
1362 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1363 LEAVE;
1364 }
1365 return unichar;
1366 } else {
1367 if (s == bufend) {
1368 if (!lex_next_chunk(flags))
1369 return -1;
1370 s = PL_parser->bufptr;
1371 }
1372 return (U8)*s;
1373 }
1374}
1375
1376/*
1377=for apidoc Amx|I32|lex_read_unichar|U32 flags
1378
1379Reads the next (Unicode) character in the text currently being lexed.
1380Returns the codepoint (unsigned integer value) of the character read,
1381and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1382if lexing has reached the end of the input text. To non-destructively
1383examine the next character, use L</lex_peek_unichar> instead.
1384
1385If the next character is in (or extends into) the next chunk of input
1386text, the next chunk will be read in. Normally the current chunk will be
1387discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1388then the current chunk will not be discarded.
1389
1390If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1391is encountered, an exception is generated.
1392
1393=cut
1394*/
1395
1396I32
1397Perl_lex_read_unichar(pTHX_ U32 flags)
1398{
1399 I32 c;
1400 if (flags & ~(LEX_KEEP_PREVIOUS))
1401 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1402 c = lex_peek_unichar(flags);
1403 if (c != -1) {
1404 if (c == '\n')
1405 CopLINE_inc(PL_curcop);
1406 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1407 }
1408 return c;
1409}
1410
1411/*
1412=for apidoc Amx|void|lex_read_space|U32 flags
1413
1414Reads optional spaces, in Perl style, in the text currently being
1415lexed. The spaces may include ordinary whitespace characters and
1416Perl-style comments. C<#line> directives are processed if encountered.
1417L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1418at a non-space character (or the end of the input text).
1419
1420If spaces extend into the next chunk of input text, the next chunk will
1421be read in. Normally the current chunk will be discarded at the same
1422time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1423chunk will not be discarded.
1424
1425=cut
1426*/
1427
f0998909
Z
1428#define LEX_NO_NEXT_CHUNK 0x80000000
1429
f0e67a1d
Z
1430void
1431Perl_lex_read_space(pTHX_ U32 flags)
1432{
1433 char *s, *bufend;
1434 bool need_incline = 0;
f0998909 1435 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1436 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1437#ifdef PERL_MAD
1438 if (PL_skipwhite) {
1439 sv_free(PL_skipwhite);
1440 PL_skipwhite = NULL;
1441 }
1442 if (PL_madskills)
1443 PL_skipwhite = newSVpvs("");
1444#endif /* PERL_MAD */
1445 s = PL_parser->bufptr;
1446 bufend = PL_parser->bufend;
1447 while (1) {
1448 char c = *s;
1449 if (c == '#') {
1450 do {
1451 c = *++s;
1452 } while (!(c == '\n' || (c == 0 && s == bufend)));
1453 } else if (c == '\n') {
1454 s++;
1455 PL_parser->linestart = s;
1456 if (s == bufend)
1457 need_incline = 1;
1458 else
1459 incline(s);
1460 } else if (isSPACE(c)) {
1461 s++;
1462 } else if (c == 0 && s == bufend) {
1463 bool got_more;
1464#ifdef PERL_MAD
1465 if (PL_madskills)
1466 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1467#endif /* PERL_MAD */
f0998909
Z
1468 if (flags & LEX_NO_NEXT_CHUNK)
1469 break;
f0e67a1d
Z
1470 PL_parser->bufptr = s;
1471 CopLINE_inc(PL_curcop);
1472 got_more = lex_next_chunk(flags);
1473 CopLINE_dec(PL_curcop);
1474 s = PL_parser->bufptr;
1475 bufend = PL_parser->bufend;
1476 if (!got_more)
1477 break;
1478 if (need_incline && PL_parser->rsfp) {
1479 incline(s);
1480 need_incline = 0;
1481 }
1482 } else {
1483 break;
1484 }
1485 }
1486#ifdef PERL_MAD
1487 if (PL_madskills)
1488 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1489#endif /* PERL_MAD */
1490 PL_parser->bufptr = s;
1491}
1492
1493/*
ffb4593c
NT
1494 * S_incline
1495 * This subroutine has nothing to do with tilting, whether at windmills
1496 * or pinball tables. Its name is short for "increment line". It
57843af0 1497 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1498 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1499 * # line 500 "foo.pm"
1500 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1501 */
1502
76e3520e 1503STATIC void
d9095cec 1504S_incline(pTHX_ const char *s)
463ee0b2 1505{
97aff369 1506 dVAR;
d9095cec
NC
1507 const char *t;
1508 const char *n;
1509 const char *e;
463ee0b2 1510
7918f24d
NC
1511 PERL_ARGS_ASSERT_INCLINE;
1512
57843af0 1513 CopLINE_inc(PL_curcop);
463ee0b2
LW
1514 if (*s++ != '#')
1515 return;
d4c19fe8
AL
1516 while (SPACE_OR_TAB(*s))
1517 s++;
73659bf1
GS
1518 if (strnEQ(s, "line", 4))
1519 s += 4;
1520 else
1521 return;
084592ab 1522 if (SPACE_OR_TAB(*s))
73659bf1 1523 s++;
4e553d73 1524 else
73659bf1 1525 return;
d4c19fe8
AL
1526 while (SPACE_OR_TAB(*s))
1527 s++;
463ee0b2
LW
1528 if (!isDIGIT(*s))
1529 return;
d4c19fe8 1530
463ee0b2
LW
1531 n = s;
1532 while (isDIGIT(*s))
1533 s++;
07714eb4 1534 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1535 return;
bf4acbe4 1536 while (SPACE_OR_TAB(*s))
463ee0b2 1537 s++;
73659bf1 1538 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1539 s++;
73659bf1
GS
1540 e = t + 1;
1541 }
463ee0b2 1542 else {
c35e046a
AL
1543 t = s;
1544 while (!isSPACE(*t))
1545 t++;
73659bf1 1546 e = t;
463ee0b2 1547 }
bf4acbe4 1548 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1549 e++;
1550 if (*e != '\n' && *e != '\0')
1551 return; /* false alarm */
1552
f4dd75d9 1553 if (t - s > 0) {
d9095cec 1554 const STRLEN len = t - s;
8a5ee598 1555#ifndef USE_ITHREADS
19bad673
NC
1556 SV *const temp_sv = CopFILESV(PL_curcop);
1557 const char *cf;
1558 STRLEN tmplen;
1559
1560 if (temp_sv) {
1561 cf = SvPVX(temp_sv);
1562 tmplen = SvCUR(temp_sv);
1563 } else {
1564 cf = NULL;
1565 tmplen = 0;
1566 }
1567
42d9b98d 1568 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1569 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1570 * to *{"::_<newfilename"} */
44867030
NC
1571 /* However, the long form of evals is only turned on by the
1572 debugger - usually they're "(eval %lu)" */
1573 char smallbuf[128];
1574 char *tmpbuf;
1575 GV **gvp;
d9095cec 1576 STRLEN tmplen2 = len;
798b63bc 1577 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1578 tmpbuf = smallbuf;
1579 else
2ae0db35 1580 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1581 tmpbuf[0] = '_';
1582 tmpbuf[1] = '<';
2ae0db35 1583 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1584 tmplen += 2;
8a5ee598
RGS
1585 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1586 if (gvp) {
44867030
NC
1587 char *tmpbuf2;
1588 GV *gv2;
1589
1590 if (tmplen2 + 2 <= sizeof smallbuf)
1591 tmpbuf2 = smallbuf;
1592 else
1593 Newx(tmpbuf2, tmplen2 + 2, char);
1594
1595 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1596 /* Either they malloc'd it, or we malloc'd it,
1597 so no prefix is present in ours. */
1598 tmpbuf2[0] = '_';
1599 tmpbuf2[1] = '<';
1600 }
1601
1602 memcpy(tmpbuf2 + 2, s, tmplen2);
1603 tmplen2 += 2;
1604
8a5ee598 1605 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1606 if (!isGV(gv2)) {
8a5ee598 1607 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1608 /* adjust ${"::_<newfilename"} to store the new file name */
1609 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1610 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1611 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1612 }
44867030
NC
1613
1614 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1615 }
e66cf94c 1616 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1617 }
8a5ee598 1618#endif
05ec9bb3 1619 CopFILE_free(PL_curcop);
d9095cec 1620 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1621 }
57843af0 1622 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1623}
1624
29595ff2 1625#ifdef PERL_MAD
cd81e915 1626/* skip space before PL_thistoken */
29595ff2
NC
1627
1628STATIC char *
1629S_skipspace0(pTHX_ register char *s)
1630{
7918f24d
NC
1631 PERL_ARGS_ASSERT_SKIPSPACE0;
1632
29595ff2
NC
1633 s = skipspace(s);
1634 if (!PL_madskills)
1635 return s;
cd81e915
NC
1636 if (PL_skipwhite) {
1637 if (!PL_thiswhite)
6b29d1f5 1638 PL_thiswhite = newSVpvs("");
cd81e915
NC
1639 sv_catsv(PL_thiswhite, PL_skipwhite);
1640 sv_free(PL_skipwhite);
1641 PL_skipwhite = 0;
1642 }
1643 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1644 return s;
1645}
1646
cd81e915 1647/* skip space after PL_thistoken */
29595ff2
NC
1648
1649STATIC char *
1650S_skipspace1(pTHX_ register char *s)
1651{
d4c19fe8 1652 const char *start = s;
29595ff2
NC
1653 I32 startoff = start - SvPVX(PL_linestr);
1654
7918f24d
NC
1655 PERL_ARGS_ASSERT_SKIPSPACE1;
1656
29595ff2
NC
1657 s = skipspace(s);
1658 if (!PL_madskills)
1659 return s;
1660 start = SvPVX(PL_linestr) + startoff;
cd81e915 1661 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1662 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1663 PL_thistoken = newSVpvn(tstart, start - tstart);
1664 }
1665 PL_realtokenstart = -1;
1666 if (PL_skipwhite) {
1667 if (!PL_nextwhite)
6b29d1f5 1668 PL_nextwhite = newSVpvs("");
cd81e915
NC
1669 sv_catsv(PL_nextwhite, PL_skipwhite);
1670 sv_free(PL_skipwhite);
1671 PL_skipwhite = 0;
29595ff2
NC
1672 }
1673 return s;
1674}
1675
1676STATIC char *
1677S_skipspace2(pTHX_ register char *s, SV **svp)
1678{
c35e046a
AL
1679 char *start;
1680 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1681 const I32 startoff = s - SvPVX(PL_linestr);
1682
7918f24d
NC
1683 PERL_ARGS_ASSERT_SKIPSPACE2;
1684
29595ff2
NC
1685 s = skipspace(s);
1686 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1687 if (!PL_madskills || !svp)
1688 return s;
1689 start = SvPVX(PL_linestr) + startoff;
cd81e915 1690 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1691 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1692 PL_thistoken = newSVpvn(tstart, start - tstart);
1693 PL_realtokenstart = -1;
29595ff2 1694 }
cd81e915 1695 if (PL_skipwhite) {
29595ff2 1696 if (!*svp)
6b29d1f5 1697 *svp = newSVpvs("");
cd81e915
NC
1698 sv_setsv(*svp, PL_skipwhite);
1699 sv_free(PL_skipwhite);
1700 PL_skipwhite = 0;
29595ff2
NC
1701 }
1702
1703 return s;
1704}
1705#endif
1706
80a702cd 1707STATIC void
15f169a1 1708S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1709{
1710 AV *av = CopFILEAVx(PL_curcop);
1711 if (av) {
b9f83d2f 1712 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1713 if (orig_sv)
1714 sv_setsv(sv, orig_sv);
1715 else
1716 sv_setpvn(sv, buf, len);
80a702cd
RGS
1717 (void)SvIOK_on(sv);
1718 SvIV_set(sv, 0);
1719 av_store(av, (I32)CopLINE(PL_curcop), sv);
1720 }
1721}
1722
ffb4593c
NT
1723/*
1724 * S_skipspace
1725 * Called to gobble the appropriate amount and type of whitespace.
1726 * Skips comments as well.
1727 */
1728
76e3520e 1729STATIC char *
cea2e8a9 1730S_skipspace(pTHX_ register char *s)
a687059c 1731{
5db06880 1732#ifdef PERL_MAD
f0e67a1d
Z
1733 char *start = s;
1734#endif /* PERL_MAD */
7918f24d 1735 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1736#ifdef PERL_MAD
cd81e915
NC
1737 if (PL_skipwhite) {
1738 sv_free(PL_skipwhite);
f0e67a1d 1739 PL_skipwhite = NULL;
5db06880 1740 }
f0e67a1d 1741#endif /* PERL_MAD */
3280af22 1742 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1743 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1744 s++;
f0e67a1d
Z
1745 } else {
1746 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1747 PL_bufptr = s;
f0998909
Z
1748 lex_read_space(LEX_KEEP_PREVIOUS |
1749 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1750 LEX_NO_NEXT_CHUNK : 0));
3280af22 1751 s = PL_bufptr;
f0e67a1d
Z
1752 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1753 if (PL_linestart > PL_bufptr)
1754 PL_bufptr = PL_linestart;
1755 return s;
463ee0b2 1756 }
5db06880 1757#ifdef PERL_MAD
f0e67a1d
Z
1758 if (PL_madskills)
1759 PL_skipwhite = newSVpvn(start, s-start);
1760#endif /* PERL_MAD */
5db06880 1761 return s;
a687059c 1762}
378cc40b 1763
ffb4593c
NT
1764/*
1765 * S_check_uni
1766 * Check the unary operators to ensure there's no ambiguity in how they're
1767 * used. An ambiguous piece of code would be:
1768 * rand + 5
1769 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1770 * the +5 is its argument.
1771 */
1772
76e3520e 1773STATIC void
cea2e8a9 1774S_check_uni(pTHX)
ba106d47 1775{
97aff369 1776 dVAR;
d4c19fe8
AL
1777 const char *s;
1778 const char *t;
2f3197b3 1779
3280af22 1780 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1781 return;
3280af22
NIS
1782 while (isSPACE(*PL_last_uni))
1783 PL_last_uni++;
c35e046a
AL
1784 s = PL_last_uni;
1785 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1786 s++;
3280af22 1787 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1788 return;
6136c704 1789
9b387841
NC
1790 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1791 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1792 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1793}
1794
ffb4593c
NT
1795/*
1796 * LOP : macro to build a list operator. Its behaviour has been replaced
1797 * with a subroutine, S_lop() for which LOP is just another name.
1798 */
1799
a0d0e21e
LW
1800#define LOP(f,x) return lop(f,x,s)
1801
ffb4593c
NT
1802/*
1803 * S_lop
1804 * Build a list operator (or something that might be one). The rules:
1805 * - if we have a next token, then it's a list operator [why?]
1806 * - if the next thing is an opening paren, then it's a function
1807 * - else it's a list operator
1808 */
1809
76e3520e 1810STATIC I32
a0be28da 1811S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1812{
97aff369 1813 dVAR;
7918f24d
NC
1814
1815 PERL_ARGS_ASSERT_LOP;
1816
6154021b 1817 pl_yylval.ival = f;
35c8bce7 1818 CLINE;
3280af22
NIS
1819 PL_expect = x;
1820 PL_bufptr = s;
1821 PL_last_lop = PL_oldbufptr;
eb160463 1822 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1823#ifdef PERL_MAD
1824 if (PL_lasttoke)
1825 return REPORT(LSTOP);
1826#else
3280af22 1827 if (PL_nexttoke)
bbf60fe6 1828 return REPORT(LSTOP);
5db06880 1829#endif
79072805 1830 if (*s == '(')
bbf60fe6 1831 return REPORT(FUNC);
29595ff2 1832 s = PEEKSPACE(s);
79072805 1833 if (*s == '(')
bbf60fe6 1834 return REPORT(FUNC);
79072805 1835 else
bbf60fe6 1836 return REPORT(LSTOP);
79072805
LW
1837}
1838
5db06880
NC
1839#ifdef PERL_MAD
1840 /*
1841 * S_start_force
1842 * Sets up for an eventual force_next(). start_force(0) basically does
1843 * an unshift, while start_force(-1) does a push. yylex removes items
1844 * on the "pop" end.
1845 */
1846
1847STATIC void
1848S_start_force(pTHX_ int where)
1849{
1850 int i;
1851
cd81e915 1852 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1853 where = PL_lasttoke;
cd81e915
NC
1854 assert(PL_curforce < 0 || PL_curforce == where);
1855 if (PL_curforce != where) {
5db06880
NC
1856 for (i = PL_lasttoke; i > where; --i) {
1857 PL_nexttoke[i] = PL_nexttoke[i-1];
1858 }
1859 PL_lasttoke++;
1860 }
cd81e915 1861 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1862 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1863 PL_curforce = where;
1864 if (PL_nextwhite) {
5db06880 1865 if (PL_madskills)
6b29d1f5 1866 curmad('^', newSVpvs(""));
cd81e915 1867 CURMAD('_', PL_nextwhite);
5db06880
NC
1868 }
1869}
1870
1871STATIC void
1872S_curmad(pTHX_ char slot, SV *sv)
1873{
1874 MADPROP **where;
1875
1876 if (!sv)
1877 return;
cd81e915
NC
1878 if (PL_curforce < 0)
1879 where = &PL_thismad;
5db06880 1880 else
cd81e915 1881 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1882
cd81e915 1883 if (PL_faketokens)
76f68e9b 1884 sv_setpvs(sv, "");
5db06880
NC
1885 else {
1886 if (!IN_BYTES) {
1887 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1888 SvUTF8_on(sv);
1889 else if (PL_encoding) {
1890 sv_recode_to_utf8(sv, PL_encoding);
1891 }
1892 }
1893 }
1894
1895 /* keep a slot open for the head of the list? */
1896 if (slot != '_' && *where && (*where)->mad_key == '^') {
1897 (*where)->mad_key = slot;
daba3364 1898 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1899 (*where)->mad_val = (void*)sv;
1900 }
1901 else
1902 addmad(newMADsv(slot, sv), where, 0);
1903}
1904#else
b3f24c00
MHM
1905# define start_force(where) NOOP
1906# define curmad(slot, sv) NOOP
5db06880
NC
1907#endif
1908
ffb4593c
NT
1909/*
1910 * S_force_next
9cbb5ea2 1911 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1912 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1913 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1914 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1915 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1916 */
1917
4e553d73 1918STATIC void
cea2e8a9 1919S_force_next(pTHX_ I32 type)
79072805 1920{
97aff369 1921 dVAR;
704d4215
GG
1922#ifdef DEBUGGING
1923 if (DEBUG_T_TEST) {
1924 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1925 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1926 }
1927#endif
5db06880 1928#ifdef PERL_MAD
cd81e915 1929 if (PL_curforce < 0)
5db06880 1930 start_force(PL_lasttoke);
cd81e915 1931 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1932 if (PL_lex_state != LEX_KNOWNEXT)
1933 PL_lex_defer = PL_lex_state;
1934 PL_lex_state = LEX_KNOWNEXT;
1935 PL_lex_expect = PL_expect;
cd81e915 1936 PL_curforce = -1;
5db06880 1937#else
3280af22
NIS
1938 PL_nexttype[PL_nexttoke] = type;
1939 PL_nexttoke++;
1940 if (PL_lex_state != LEX_KNOWNEXT) {
1941 PL_lex_defer = PL_lex_state;
1942 PL_lex_expect = PL_expect;
1943 PL_lex_state = LEX_KNOWNEXT;
79072805 1944 }
5db06880 1945#endif
79072805
LW
1946}
1947
28ac2b49
Z
1948void
1949Perl_yyunlex(pTHX)
1950{
a7aaec61
Z
1951 int yyc = PL_parser->yychar;
1952 if (yyc != YYEMPTY) {
1953 if (yyc) {
1954 start_force(-1);
1955 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1956 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1957 PL_lex_brackets--;
1958 yyc |= (1<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1959 }
1960 force_next(yyc);
1961 }
28ac2b49
Z
1962 PL_parser->yychar = YYEMPTY;
1963 }
1964}
1965
d0a148a6 1966STATIC SV *
15f169a1 1967S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1968{
97aff369 1969 dVAR;
740cce10 1970 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1971 !IN_BYTES
1972 && UTF
1973 && !is_ascii_string((const U8*)start, len)
740cce10 1974 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1975 return sv;
1976}
1977
ffb4593c
NT
1978/*
1979 * S_force_word
1980 * When the lexer knows the next thing is a word (for instance, it has
1981 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1982 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1983 * lookahead.
ffb4593c
NT
1984 *
1985 * Arguments:
b1b65b59 1986 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1987 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1988 * int check_keyword : if true, Perl checks to make sure the word isn't
1989 * a keyword (do this if the word is a label, e.g. goto FOO)
1990 * int allow_pack : if true, : characters will also be allowed (require,
1991 * use, etc. do this)
9cbb5ea2 1992 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1993 */
1994
76e3520e 1995STATIC char *
cea2e8a9 1996S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1997{
97aff369 1998 dVAR;
463ee0b2
LW
1999 register char *s;
2000 STRLEN len;
4e553d73 2001
7918f24d
NC
2002 PERL_ARGS_ASSERT_FORCE_WORD;
2003
29595ff2 2004 start = SKIPSPACE1(start);
463ee0b2 2005 s = start;
7e2040f0 2006 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2007 (allow_pack && *s == ':') ||
15f0808c 2008 (allow_initial_tick && *s == '\'') )
a0d0e21e 2009 {
3280af22 2010 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2011 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2012 return start;
cd81e915 2013 start_force(PL_curforce);
5db06880
NC
2014 if (PL_madskills)
2015 curmad('X', newSVpvn(start,s-start));
463ee0b2 2016 if (token == METHOD) {
29595ff2 2017 s = SKIPSPACE1(s);
463ee0b2 2018 if (*s == '(')
3280af22 2019 PL_expect = XTERM;
463ee0b2 2020 else {
3280af22 2021 PL_expect = XOPERATOR;
463ee0b2 2022 }
79072805 2023 }
e74e6b3d 2024 if (PL_madskills)
63575281 2025 curmad('g', newSVpvs( "forced" ));
9ded7720 2026 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2027 = (OP*)newSVOP(OP_CONST,0,
2028 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2029 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2030 force_next(token);
2031 }
2032 return s;
2033}
2034
ffb4593c
NT
2035/*
2036 * S_force_ident
9cbb5ea2 2037 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2038 * text only contains the "foo" portion. The first argument is a pointer
2039 * to the "foo", and the second argument is the type symbol to prefix.
2040 * Forces the next token to be a "WORD".
9cbb5ea2 2041 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2042 */
2043
76e3520e 2044STATIC void
bfed75c6 2045S_force_ident(pTHX_ register const char *s, int kind)
79072805 2046{
97aff369 2047 dVAR;
7918f24d
NC
2048
2049 PERL_ARGS_ASSERT_FORCE_IDENT;
2050
c35e046a 2051 if (*s) {
90e5519e
NC
2052 const STRLEN len = strlen(s);
2053 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2054 start_force(PL_curforce);
9ded7720 2055 NEXTVAL_NEXTTOKE.opval = o;
79072805 2056 force_next(WORD);
748a9306 2057 if (kind) {
11343788 2058 o->op_private = OPpCONST_ENTERED;
55497cff 2059 /* XXX see note in pp_entereval() for why we forgo typo
2060 warnings if the symbol must be introduced in an eval.
2061 GSAR 96-10-12 */
90e5519e
NC
2062 gv_fetchpvn_flags(s, len,
2063 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2064 : GV_ADD,
2065 kind == '$' ? SVt_PV :
2066 kind == '@' ? SVt_PVAV :
2067 kind == '%' ? SVt_PVHV :
a0d0e21e 2068 SVt_PVGV
90e5519e 2069 );
748a9306 2070 }
79072805
LW
2071 }
2072}
2073
1571675a
GS
2074NV
2075Perl_str_to_version(pTHX_ SV *sv)
2076{
2077 NV retval = 0.0;
2078 NV nshift = 1.0;
2079 STRLEN len;
cfd0369c 2080 const char *start = SvPV_const(sv,len);
9d4ba2ae 2081 const char * const end = start + len;
504618e9 2082 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2083
2084 PERL_ARGS_ASSERT_STR_TO_VERSION;
2085
1571675a 2086 while (start < end) {
ba210ebe 2087 STRLEN skip;
1571675a
GS
2088 UV n;
2089 if (utf)
9041c2e3 2090 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2091 else {
2092 n = *(U8*)start;
2093 skip = 1;
2094 }
2095 retval += ((NV)n)/nshift;
2096 start += skip;
2097 nshift *= 1000;
2098 }
2099 return retval;
2100}
2101
4e553d73 2102/*
ffb4593c
NT
2103 * S_force_version
2104 * Forces the next token to be a version number.
e759cc13
RGS
2105 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2106 * and if "guessing" is TRUE, then no new token is created (and the caller
2107 * must use an alternative parsing method).
ffb4593c
NT
2108 */
2109
76e3520e 2110STATIC char *
e759cc13 2111S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2112{
97aff369 2113 dVAR;
5f66b61c 2114 OP *version = NULL;
44dcb63b 2115 char *d;
5db06880
NC
2116#ifdef PERL_MAD
2117 I32 startoff = s - SvPVX(PL_linestr);
2118#endif
89bfa8cd 2119
7918f24d
NC
2120 PERL_ARGS_ASSERT_FORCE_VERSION;
2121
29595ff2 2122 s = SKIPSPACE1(s);
89bfa8cd 2123
44dcb63b 2124 d = s;
dd629d5b 2125 if (*d == 'v')
44dcb63b 2126 d++;
44dcb63b 2127 if (isDIGIT(*d)) {
e759cc13
RGS
2128 while (isDIGIT(*d) || *d == '_' || *d == '.')
2129 d++;
5db06880
NC
2130#ifdef PERL_MAD
2131 if (PL_madskills) {
cd81e915 2132 start_force(PL_curforce);
5db06880
NC
2133 curmad('X', newSVpvn(s,d-s));
2134 }
2135#endif
4e4da3ac 2136 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2137 SV *ver;
8d08d9ba
DG
2138#ifdef USE_LOCALE_NUMERIC
2139 char *loc = setlocale(LC_NUMERIC, "C");
2140#endif
6154021b 2141 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2142#ifdef USE_LOCALE_NUMERIC
2143 setlocale(LC_NUMERIC, loc);
2144#endif
6154021b 2145 version = pl_yylval.opval;
dd629d5b
GS
2146 ver = cSVOPx(version)->op_sv;
2147 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2148 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2149 SvNV_set(ver, str_to_version(ver));
1571675a 2150 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2151 }
89bfa8cd 2152 }
5db06880
NC
2153 else if (guessing) {
2154#ifdef PERL_MAD
2155 if (PL_madskills) {
cd81e915
NC
2156 sv_free(PL_nextwhite); /* let next token collect whitespace */
2157 PL_nextwhite = 0;
5db06880
NC
2158 s = SvPVX(PL_linestr) + startoff;
2159 }
2160#endif
e759cc13 2161 return s;
5db06880 2162 }
89bfa8cd 2163 }
2164
5db06880
NC
2165#ifdef PERL_MAD
2166 if (PL_madskills && !version) {
cd81e915
NC
2167 sv_free(PL_nextwhite); /* let next token collect whitespace */
2168 PL_nextwhite = 0;
5db06880
NC
2169 s = SvPVX(PL_linestr) + startoff;
2170 }
2171#endif
89bfa8cd 2172 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2173 start_force(PL_curforce);
9ded7720 2174 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2175 force_next(WORD);
89bfa8cd 2176
e759cc13 2177 return s;
89bfa8cd 2178}
2179
ffb4593c 2180/*
91152fc1
DG
2181 * S_force_strict_version
2182 * Forces the next token to be a version number using strict syntax rules.
2183 */
2184
2185STATIC char *
2186S_force_strict_version(pTHX_ char *s)
2187{
2188 dVAR;
2189 OP *version = NULL;
2190#ifdef PERL_MAD
2191 I32 startoff = s - SvPVX(PL_linestr);
2192#endif
2193 const char *errstr = NULL;
2194
2195 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2196
2197 while (isSPACE(*s)) /* leading whitespace */
2198 s++;
2199
2200 if (is_STRICT_VERSION(s,&errstr)) {
2201 SV *ver = newSV(0);
2202 s = (char *)scan_version(s, ver, 0);
2203 version = newSVOP(OP_CONST, 0, ver);
2204 }
4e4da3ac
Z
2205 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2206 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2207 {
91152fc1
DG
2208 PL_bufptr = s;
2209 if (errstr)
2210 yyerror(errstr); /* version required */
2211 return s;
2212 }
2213
2214#ifdef PERL_MAD
2215 if (PL_madskills && !version) {
2216 sv_free(PL_nextwhite); /* let next token collect whitespace */
2217 PL_nextwhite = 0;
2218 s = SvPVX(PL_linestr) + startoff;
2219 }
2220#endif
2221 /* NOTE: The parser sees the package name and the VERSION swapped */
2222 start_force(PL_curforce);
2223 NEXTVAL_NEXTTOKE.opval = version;
2224 force_next(WORD);
2225
2226 return s;
2227}
2228
2229/*
ffb4593c
NT
2230 * S_tokeq
2231 * Tokenize a quoted string passed in as an SV. It finds the next
2232 * chunk, up to end of string or a backslash. It may make a new
2233 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2234 * turns \\ into \.
2235 */
2236
76e3520e 2237STATIC SV *
cea2e8a9 2238S_tokeq(pTHX_ SV *sv)
79072805 2239{
97aff369 2240 dVAR;
79072805
LW
2241 register char *s;
2242 register char *send;
2243 register char *d;
b3ac6de7
IZ
2244 STRLEN len = 0;
2245 SV *pv = sv;
79072805 2246
7918f24d
NC
2247 PERL_ARGS_ASSERT_TOKEQ;
2248
79072805 2249 if (!SvLEN(sv))
b3ac6de7 2250 goto finish;
79072805 2251
a0d0e21e 2252 s = SvPV_force(sv, len);
21a311ee 2253 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2254 goto finish;
463ee0b2 2255 send = s + len;
dcb21ed6
NC
2256 /* This is relying on the SV being "well formed" with a trailing '\0' */
2257 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2258 s++;
2259 if (s == send)
b3ac6de7 2260 goto finish;
79072805 2261 d = s;
be4731d2 2262 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2263 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2264 }
79072805
LW
2265 while (s < send) {
2266 if (*s == '\\') {
a0d0e21e 2267 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2268 s++; /* all that, just for this */
2269 }
2270 *d++ = *s++;
2271 }
2272 *d = '\0';
95a20fc0 2273 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2274 finish:
3280af22 2275 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2276 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2277 return sv;
2278}
2279
ffb4593c
NT
2280/*
2281 * Now come three functions related to double-quote context,
2282 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2283 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2284 * interact with PL_lex_state, and create fake ( ... ) argument lists
2285 * to handle functions and concatenation.
2286 * They assume that whoever calls them will be setting up a fake
2287 * join call, because each subthing puts a ',' after it. This lets
2288 * "lower \luPpEr"
2289 * become
2290 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2291 *
2292 * (I'm not sure whether the spurious commas at the end of lcfirst's
2293 * arguments and join's arguments are created or not).
2294 */
2295
2296/*
2297 * S_sublex_start
6154021b 2298 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2299 *
2300 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2301 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2302 *
2303 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2304 *
2305 * Everything else becomes a FUNC.
2306 *
2307 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2308 * had an OP_CONST or OP_READLINE). This just sets us up for a
2309 * call to S_sublex_push().
2310 */
2311
76e3520e 2312STATIC I32
cea2e8a9 2313S_sublex_start(pTHX)
79072805 2314{
97aff369 2315 dVAR;
6154021b 2316 register const I32 op_type = pl_yylval.ival;
79072805
LW
2317
2318 if (op_type == OP_NULL) {
6154021b 2319 pl_yylval.opval = PL_lex_op;
5f66b61c 2320 PL_lex_op = NULL;
79072805
LW
2321 return THING;
2322 }
2323 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2324 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2325
2326 if (SvTYPE(sv) == SVt_PVIV) {
2327 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2328 STRLEN len;
96a5add6 2329 const char * const p = SvPV_const(sv, len);
740cce10 2330 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2331 SvREFCNT_dec(sv);
2332 sv = nsv;
4e553d73 2333 }
6154021b 2334 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2335 PL_lex_stuff = NULL;
6f33ba73
RGS
2336 /* Allow <FH> // "foo" */
2337 if (op_type == OP_READLINE)
2338 PL_expect = XTERMORDORDOR;
79072805
LW
2339 return THING;
2340 }
e3f73d4e
RGS
2341 else if (op_type == OP_BACKTICK && PL_lex_op) {
2342 /* readpipe() vas overriden */
2343 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2344 pl_yylval.opval = PL_lex_op;
9b201d7d 2345 PL_lex_op = NULL;
e3f73d4e
RGS
2346 PL_lex_stuff = NULL;
2347 return THING;
2348 }
79072805 2349
3280af22 2350 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2351 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2352 PL_sublex_info.sub_op = PL_lex_op;
2353 PL_lex_state = LEX_INTERPPUSH;
55497cff 2354
3280af22
NIS
2355 PL_expect = XTERM;
2356 if (PL_lex_op) {
6154021b 2357 pl_yylval.opval = PL_lex_op;
5f66b61c 2358 PL_lex_op = NULL;
55497cff 2359 return PMFUNC;
2360 }
2361 else
2362 return FUNC;
2363}
2364
ffb4593c
NT
2365/*
2366 * S_sublex_push
2367 * Create a new scope to save the lexing state. The scope will be
2368 * ended in S_sublex_done. Returns a '(', starting the function arguments
2369 * to the uc, lc, etc. found before.
2370 * Sets PL_lex_state to LEX_INTERPCONCAT.
2371 */
2372
76e3520e 2373STATIC I32
cea2e8a9 2374S_sublex_push(pTHX)
55497cff 2375{
27da23d5 2376 dVAR;
f46d017c 2377 ENTER;
55497cff 2378
3280af22 2379 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2380 SAVEBOOL(PL_lex_dojoin);
3280af22 2381 SAVEI32(PL_lex_brackets);
3280af22
NIS
2382 SAVEI32(PL_lex_casemods);
2383 SAVEI32(PL_lex_starts);
651b5b28 2384 SAVEI8(PL_lex_state);
7766f137 2385 SAVEVPTR(PL_lex_inpat);
98246f1e 2386 SAVEI16(PL_lex_inwhat);
57843af0 2387 SAVECOPLINE(PL_curcop);
3280af22 2388 SAVEPPTR(PL_bufptr);
8452ff4b 2389 SAVEPPTR(PL_bufend);
3280af22
NIS
2390 SAVEPPTR(PL_oldbufptr);
2391 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2392 SAVEPPTR(PL_last_lop);
2393 SAVEPPTR(PL_last_uni);
3280af22
NIS
2394 SAVEPPTR(PL_linestart);
2395 SAVESPTR(PL_linestr);
8edd5f42
RGS
2396 SAVEGENERICPV(PL_lex_brackstack);
2397 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2398
2399 PL_linestr = PL_lex_stuff;
a0714e2c 2400 PL_lex_stuff = NULL;
3280af22 2401
9cbb5ea2
GS
2402 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2403 = SvPVX(PL_linestr);
3280af22 2404 PL_bufend += SvCUR(PL_linestr);
bd61b366 2405 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2406 SAVEFREESV(PL_linestr);
2407
2408 PL_lex_dojoin = FALSE;
2409 PL_lex_brackets = 0;
a02a5408
JC
2410 Newx(PL_lex_brackstack, 120, char);
2411 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2412 PL_lex_casemods = 0;
2413 *PL_lex_casestack = '\0';
2414 PL_lex_starts = 0;
2415 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2416 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2417
2418 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2419 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2420 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2421 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2422 else
5f66b61c 2423 PL_lex_inpat = NULL;
79072805 2424
55497cff 2425 return '(';
79072805
LW
2426}
2427
ffb4593c
NT
2428/*
2429 * S_sublex_done
2430 * Restores lexer state after a S_sublex_push.
2431 */
2432
76e3520e 2433STATIC I32
cea2e8a9 2434S_sublex_done(pTHX)
79072805 2435{
27da23d5 2436 dVAR;
3280af22 2437 if (!PL_lex_starts++) {
396482e1 2438 SV * const sv = newSVpvs("");
9aa983d2
JH
2439 if (SvUTF8(PL_linestr))
2440 SvUTF8_on(sv);
3280af22 2441 PL_expect = XOPERATOR;
6154021b 2442 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2443 return THING;
2444 }
2445
3280af22
NIS
2446 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2447 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2448 return yylex();
79072805
LW
2449 }
2450
ffb4593c 2451 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2452 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2453 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2454 PL_linestr = PL_lex_repl;
2455 PL_lex_inpat = 0;
2456 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2457 PL_bufend += SvCUR(PL_linestr);
bd61b366 2458 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2459 SAVEFREESV(PL_linestr);
2460 PL_lex_dojoin = FALSE;
2461 PL_lex_brackets = 0;
3280af22
NIS
2462 PL_lex_casemods = 0;
2463 *PL_lex_casestack = '\0';
2464 PL_lex_starts = 0;
25da4f38 2465 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2466 PL_lex_state = LEX_INTERPNORMAL;
2467 PL_lex_starts++;
e9fa98b2
HS
2468 /* we don't clear PL_lex_repl here, so that we can check later
2469 whether this is an evalled subst; that means we rely on the
2470 logic to ensure sublex_done() is called again only via the
2471 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2472 }
e9fa98b2 2473 else {
3280af22 2474 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2475 PL_lex_repl = NULL;
e9fa98b2 2476 }
79072805 2477 return ',';
ffed7fef
LW
2478 }
2479 else {
5db06880
NC
2480#ifdef PERL_MAD
2481 if (PL_madskills) {
cd81e915
NC
2482 if (PL_thiswhite) {
2483 if (!PL_endwhite)
6b29d1f5 2484 PL_endwhite = newSVpvs("");
cd81e915
NC
2485 sv_catsv(PL_endwhite, PL_thiswhite);
2486 PL_thiswhite = 0;
2487 }
2488 if (PL_thistoken)
76f68e9b 2489 sv_setpvs(PL_thistoken,"");
5db06880 2490 else
cd81e915 2491 PL_realtokenstart = -1;
5db06880
NC
2492 }
2493#endif
f46d017c 2494 LEAVE;
3280af22
NIS
2495 PL_bufend = SvPVX(PL_linestr);
2496 PL_bufend += SvCUR(PL_linestr);
2497 PL_expect = XOPERATOR;
09bef843 2498 PL_sublex_info.sub_inwhat = 0;
79072805 2499 return ')';
ffed7fef
LW
2500 }
2501}
2502
02aa26ce
NT
2503/*
2504 scan_const
2505
2506 Extracts a pattern, double-quoted string, or transliteration. This
2507 is terrifying code.
2508
94def140 2509 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2510 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2511 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2512
94def140
TS
2513 Returns a pointer to the character scanned up to. If this is
2514 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2515 successfully parsed), will leave an OP for the substring scanned
6154021b 2516 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2517 by looking at the next characters herself.
2518
02aa26ce
NT
2519 In patterns:
2520 backslashes:
ff3f963a 2521 constants: \N{NAME} only
02aa26ce
NT
2522 case and quoting: \U \Q \E
2523 stops on @ and $, but not for $ as tail anchor
2524
2525 In transliterations:
2526 characters are VERY literal, except for - not at the start or end
94def140
TS
2527 of the string, which indicates a range. If the range is in bytes,
2528 scan_const expands the range to the full set of intermediate
2529 characters. If the range is in utf8, the hyphen is replaced with
2530 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2531
2532 In double-quoted strings:
2533 backslashes:
2534 double-quoted style: \r and \n
ff3f963a 2535 constants: \x31, etc.
94def140 2536 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2537 case and quoting: \U \Q \E
2538 stops on @ and $
2539
2540 scan_const does *not* construct ops to handle interpolated strings.
2541 It stops processing as soon as it finds an embedded $ or @ variable
2542 and leaves it to the caller to work out what's going on.
2543
94def140
TS
2544 embedded arrays (whether in pattern or not) could be:
2545 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2546
2547 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2548
2549 $ in pattern could be $foo or could be tail anchor. Assumption:
2550 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2551 followed by one of "()| \r\n\t"
02aa26ce
NT
2552
2553 \1 (backreferences) are turned into $1
2554
2555 The structure of the code is
2556 while (there's a character to process) {
94def140
TS
2557 handle transliteration ranges
2558 skip regexp comments /(?#comment)/ and codes /(?{code})/
2559 skip #-initiated comments in //x patterns
2560 check for embedded arrays
02aa26ce
NT
2561 check for embedded scalars
2562 if (backslash) {
94def140 2563 deprecate \1 in substitution replacements
02aa26ce
NT
2564 handle string-changing backslashes \l \U \Q \E, etc.
2565 switch (what was escaped) {
94def140 2566 handle \- in a transliteration (becomes a literal -)
ff3f963a 2567 if a pattern and not \N{, go treat as regular character
94def140
TS
2568 handle \132 (octal characters)
2569 handle \x15 and \x{1234} (hex characters)
ff3f963a 2570 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2571 handle \cV (control characters)
2572 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2573 } (end switch)
77a135fe 2574 continue
02aa26ce 2575 } (end if backslash)
77a135fe 2576 handle regular character
02aa26ce 2577 } (end while character to read)
4e553d73 2578
02aa26ce
NT
2579*/
2580
76e3520e 2581STATIC char *
cea2e8a9 2582S_scan_const(pTHX_ char *start)
79072805 2583{
97aff369 2584 dVAR;
3280af22 2585 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2586 SV *sv = newSV(send - start); /* sv for the constant. See
2587 note below on sizing. */
02aa26ce
NT
2588 register char *s = start; /* start of the constant */
2589 register char *d = SvPVX(sv); /* destination for copies */
2590 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2591 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2592 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2593 I32 this_utf8 = UTF; /* Is the source string assumed
2594 to be UTF8? But, this can
2595 show as true when the source
2596 isn't utf8, as for example
2597 when it is entirely composed
2598 of hex constants */
2599
2600 /* Note on sizing: The scanned constant is placed into sv, which is
2601 * initialized by newSV() assuming one byte of output for every byte of
2602 * input. This routine expects newSV() to allocate an extra byte for a
2603 * trailing NUL, which this routine will append if it gets to the end of
2604 * the input. There may be more bytes of input than output (eg., \N{LATIN
2605 * CAPITAL LETTER A}), or more output than input if the constant ends up
2606 * recoded to utf8, but each time a construct is found that might increase
2607 * the needed size, SvGROW() is called. Its size parameter each time is
2608 * based on the best guess estimate at the time, namely the length used so
2609 * far, plus the length the current construct will occupy, plus room for
2610 * the trailing NUL, plus one byte for every input byte still unscanned */
2611
012bcf8d 2612 UV uv;
4c3a8340
TS
2613#ifdef EBCDIC
2614 UV literal_endpoint = 0;
e294cc5d 2615 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2616#endif
012bcf8d 2617
7918f24d
NC
2618 PERL_ARGS_ASSERT_SCAN_CONST;
2619
bb16bae8 2620 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2621 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2622 /* If we are doing a trans and we know we want UTF8 set expectation */
2623 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2624 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2625 }
2626
2627
79072805 2628 while (s < send || dorange) {
ff3f963a 2629
02aa26ce 2630 /* get transliterations out of the way (they're most literal) */
3280af22 2631 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2632 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2633 if (dorange) {
1ba5c669
JH
2634 I32 i; /* current expanded character */
2635 I32 min; /* first character in range */
2636 I32 max; /* last character in range */
02aa26ce 2637
e294cc5d
JH
2638#ifdef EBCDIC
2639 UV uvmax = 0;
2640#endif
2641
2642 if (has_utf8
2643#ifdef EBCDIC
2644 && !native_range
2645#endif
2646 ) {
9d4ba2ae 2647 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2648 char *e = d++;
2649 while (e-- > c)
2650 *(e + 1) = *e;
25716404 2651 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2652 /* mark the range as done, and continue */
2653 dorange = FALSE;
2654 didrange = TRUE;
2655 continue;
2656 }
2b9d42f0 2657
95a20fc0 2658 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2659#ifdef EBCDIC
2660 SvGROW(sv,
2661 SvLEN(sv) + (has_utf8 ?
2662 (512 - UTF_CONTINUATION_MARK +
2663 UNISKIP(0x100))
2664 : 256));
2665 /* How many two-byte within 0..255: 128 in UTF-8,
2666 * 96 in UTF-8-mod. */
2667#else
9cbb5ea2 2668 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2669#endif
9cbb5ea2 2670 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2671#ifdef EBCDIC
2672 if (has_utf8) {
2673 int j;
2674 for (j = 0; j <= 1; j++) {
2675 char * const c = (char*)utf8_hop((U8*)d, -1);
2676 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2677 if (j)
2678 min = (U8)uv;
2679 else if (uv < 256)
2680 max = (U8)uv;
2681 else {
2682 max = (U8)0xff; /* only to \xff */
2683 uvmax = uv; /* \x{100} to uvmax */
2684 }
2685 d = c; /* eat endpoint chars */
2686 }
2687 }
2688 else {
2689#endif
2690 d -= 2; /* eat the first char and the - */
2691 min = (U8)*d; /* first char in range */
2692 max = (U8)d[1]; /* last char in range */
2693#ifdef EBCDIC
2694 }
2695#endif
8ada0baa 2696
c2e66d9e 2697 if (min > max) {
01ec43d0 2698 Perl_croak(aTHX_
d1573ac7 2699 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2700 (char)min, (char)max);
c2e66d9e
GS
2701 }
2702
c7f1f016 2703#ifdef EBCDIC
4c3a8340
TS
2704 if (literal_endpoint == 2 &&
2705 ((isLOWER(min) && isLOWER(max)) ||
2706 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2707 if (isLOWER(min)) {
2708 for (i = min; i <= max; i++)
2709 if (isLOWER(i))
db42d148 2710 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2711 } else {
2712 for (i = min; i <= max; i++)
2713 if (isUPPER(i))
db42d148 2714 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2715 }
2716 }
2717 else
2718#endif
2719 for (i = min; i <= max; i++)
e294cc5d
JH
2720#ifdef EBCDIC
2721 if (has_utf8) {
2722 const U8 ch = (U8)NATIVE_TO_UTF(i);
2723 if (UNI_IS_INVARIANT(ch))
2724 *d++ = (U8)i;
2725 else {
2726 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2727 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2728 }
2729 }
2730 else
2731#endif
2732 *d++ = (char)i;
2733
2734#ifdef EBCDIC
2735 if (uvmax) {
2736 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2737 if (uvmax > 0x101)
2738 *d++ = (char)UTF_TO_NATIVE(0xff);
2739 if (uvmax > 0x100)
2740 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2741 }
2742#endif
02aa26ce
NT
2743
2744 /* mark the range as done, and continue */
79072805 2745 dorange = FALSE;
01ec43d0 2746 didrange = TRUE;
4c3a8340
TS
2747#ifdef EBCDIC
2748 literal_endpoint = 0;
2749#endif
79072805 2750 continue;
4e553d73 2751 }
02aa26ce
NT
2752
2753 /* range begins (ignore - as first or last char) */
79072805 2754 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2755 if (didrange) {
1fafa243 2756 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2757 }
e294cc5d
JH
2758 if (has_utf8
2759#ifdef EBCDIC
2760 && !native_range
2761#endif
2762 ) {
25716404 2763 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2764 s++;
2765 continue;
2766 }
79072805
LW
2767 dorange = TRUE;
2768 s++;
01ec43d0
GS
2769 }
2770 else {
2771 didrange = FALSE;
4c3a8340
TS
2772#ifdef EBCDIC
2773 literal_endpoint = 0;
e294cc5d 2774 native_range = TRUE;
4c3a8340 2775#endif
01ec43d0 2776 }
79072805 2777 }
02aa26ce
NT
2778
2779 /* if we get here, we're not doing a transliteration */
2780
0f5d15d6
IZ
2781 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2782 except for the last char, which will be done separately. */
3280af22 2783 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2784 if (s[2] == '#') {
e994fd66 2785 while (s+1 < send && *s != ')')
db42d148 2786 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2787 }
2788 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2789 || (s[2] == '?' && s[3] == '{'))
155aba94 2790 {
cc6b7395 2791 I32 count = 1;
0f5d15d6 2792 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2793 char c;
2794
d9f97599
GS
2795 while (count && (c = *regparse)) {
2796 if (c == '\\' && regparse[1])
2797 regparse++;
4e553d73 2798 else if (c == '{')
cc6b7395 2799 count++;
4e553d73 2800 else if (c == '}')
cc6b7395 2801 count--;
d9f97599 2802 regparse++;
cc6b7395 2803 }
e994fd66 2804 if (*regparse != ')')
5bdf89e7 2805 regparse--; /* Leave one char for continuation. */
0f5d15d6 2806 while (s < regparse)
db42d148 2807 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2808 }
748a9306 2809 }
02aa26ce
NT
2810
2811 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2812 else if (*s == '#' && PL_lex_inpat &&
2813 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2814 while (s+1 < send && *s != '\n')
db42d148 2815 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2816 }
02aa26ce 2817
5d1d4326 2818 /* check for embedded arrays
da6eedaa 2819 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2820 */
1749ea0d
TS
2821 else if (*s == '@' && s[1]) {
2822 if (isALNUM_lazy_if(s+1,UTF))
2823 break;
2824 if (strchr(":'{$", s[1]))
2825 break;
2826 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2827 break; /* in regexp, neither @+ nor @- are interpolated */
2828 }
02aa26ce
NT
2829
2830 /* check for embedded scalars. only stop if we're sure it's a
2831 variable.
2832 */
79072805 2833 else if (*s == '$') {
3280af22 2834 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2835 break;
77772344 2836 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2837 if (s[1] == '\\') {
2838 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2839 "Possible unintended interpolation of $\\ in regex");
77772344 2840 }
79072805 2841 break; /* in regexp, $ might be tail anchor */
77772344 2842 }
79072805 2843 }
02aa26ce 2844
2b9d42f0
NIS
2845 /* End of else if chain - OP_TRANS rejoin rest */
2846
02aa26ce 2847 /* backslashes */
79072805 2848 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2849 char* e; /* Can be used for ending '}', etc. */
2850
79072805 2851 s++;
02aa26ce 2852
7d0fc23c
KW
2853 /* warn on \1 - \9 in substitution replacements, but note that \11
2854 * is an octal; and \19 is \1 followed by '9' */
3280af22 2855 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2856 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2857 {
a2a5de95 2858 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2859 *--s = '$';
2860 break;
2861 }
02aa26ce
NT
2862
2863 /* string-change backslash escapes */
3280af22 2864 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2865 --s;
2866 break;
2867 }
ff3f963a
KW
2868 /* In a pattern, process \N, but skip any other backslash escapes.
2869 * This is because we don't want to translate an escape sequence
2870 * into a meta symbol and have the regex compiler use the meta
2871 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2872 * in spite of this, we do have to process \N here while the proper
2873 * charnames handler is in scope. See bugs #56444 and #62056.
2874 * There is a complication because \N in a pattern may also stand
2875 * for 'match a non-nl', and not mean a charname, in which case its
2876 * processing should be deferred to the regex compiler. To be a
2877 * charname it must be followed immediately by a '{', and not look
2878 * like \N followed by a curly quantifier, i.e., not something like
2879 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2880 * quantifier */
2881 else if (PL_lex_inpat
2882 && (*s != 'N'
2883 || s[1] != '{'
2884 || regcurly(s + 1)))
2885 {
cc74c5bd
TS
2886 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2887 goto default_action;
2888 }
02aa26ce 2889
79072805 2890 switch (*s) {
02aa26ce
NT
2891
2892 /* quoted - in transliterations */
79072805 2893 case '-':
3280af22 2894 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2895 *d++ = *s++;
2896 continue;
2897 }
2898 /* FALL THROUGH */
2899 default:
11b8faa4 2900 {
a2a5de95
NC
2901 if ((isALPHA(*s) || isDIGIT(*s)))
2902 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2903 "Unrecognized escape \\%c passed through",
2904 *s);
11b8faa4 2905 /* default action is to copy the quoted character */
f9a63242 2906 goto default_action;
11b8faa4 2907 }
02aa26ce 2908
632403cc 2909 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2910 case '0': case '1': case '2': case '3':
2911 case '4': case '5': case '6': case '7':
ba210ebe 2912 {
53305cf1
NC
2913 I32 flags = 0;
2914 STRLEN len = 3;
77a135fe 2915 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2916 s += len;
2917 }
012bcf8d 2918 goto NUM_ESCAPE_INSERT;
02aa26ce 2919
f0a2b745
KW
2920 /* eg. \o{24} indicates the octal constant \024 */
2921 case 'o':
2922 {
2923 STRLEN len;
454155d9 2924 const char* error;
f0a2b745 2925
454155d9 2926 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2927 s += len;
454155d9 2928 if (! valid) {
f0a2b745
KW
2929 yyerror(error);
2930 continue;
2931 }
2932 goto NUM_ESCAPE_INSERT;
2933 }
2934
77a135fe 2935 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2936 case 'x':
a0ed51b3
LW
2937 ++s;
2938 if (*s == '{') {
9d4ba2ae 2939 char* const e = strchr(s, '}');
a4c04bdc
NC
2940 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2941 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2942 STRLEN len;
355860ce 2943
53305cf1 2944 ++s;
adaeee49 2945 if (!e) {
a0ed51b3 2946 yyerror("Missing right brace on \\x{}");
355860ce 2947 continue;
ba210ebe 2948 }
53305cf1 2949 len = e - s;
77a135fe 2950 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2951 s = e + 1;
a0ed51b3
LW
2952 }
2953 else {
ba210ebe 2954 {
53305cf1 2955 STRLEN len = 2;
a4c04bdc 2956 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2957 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2958 s += len;
2959 }
012bcf8d
GS
2960 }
2961
2962 NUM_ESCAPE_INSERT:
ff3f963a
KW
2963 /* Insert oct or hex escaped character. There will always be
2964 * enough room in sv since such escapes will be longer than any
2965 * UTF-8 sequence they can end up as, except if they force us
2966 * to recode the rest of the string into utf8 */
ba7cea30 2967
77a135fe 2968 /* Here uv is the ordinal of the next character being added in
ff3f963a 2969 * unicode (converted from native). */
77a135fe 2970 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2971 if (!has_utf8 && uv > 255) {
77a135fe
KW
2972 /* Might need to recode whatever we have accumulated so
2973 * far if it contains any chars variant in utf8 or
2974 * utf-ebcdic. */
2975
2976 SvCUR_set(sv, d - SvPVX_const(sv));
2977 SvPOK_on(sv);
2978 *d = '\0';
77a135fe 2979 /* See Note on sizing above. */
7bf79863
KW
2980 sv_utf8_upgrade_flags_grow(sv,
2981 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2982 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2983 d = SvPVX(sv) + SvCUR(sv);
2984 has_utf8 = TRUE;
012bcf8d
GS
2985 }
2986
77a135fe
KW
2987 if (has_utf8) {
2988 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2989 if (PL_lex_inwhat == OP_TRANS &&
2990 PL_sublex_info.sub_op) {
2991 PL_sublex_info.sub_op->op_private |=
2992 (PL_lex_repl ? OPpTRANS_FROM_UTF
2993 : OPpTRANS_TO_UTF);
f9a63242 2994 }
e294cc5d
JH
2995#ifdef EBCDIC
2996 if (uv > 255 && !dorange)
2997 native_range = FALSE;
2998#endif
012bcf8d 2999 }
a0ed51b3 3000 else {
012bcf8d 3001 *d++ = (char)uv;
a0ed51b3 3002 }
012bcf8d
GS
3003 }
3004 else {
c4d5f83a 3005 *d++ = (char) uv;
a0ed51b3 3006 }
79072805 3007 continue;
02aa26ce 3008
4a2d328f 3009 case 'N':
ff3f963a
KW
3010 /* In a non-pattern \N must be a named character, like \N{LATIN
3011 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3012 * mean to match a non-newline. For non-patterns, named
3013 * characters are converted to their string equivalents. In
3014 * patterns, named characters are not converted to their
3015 * ultimate forms for the same reasons that other escapes
3016 * aren't. Instead, they are converted to the \N{U+...} form
3017 * to get the value from the charnames that is in effect right
3018 * now, while preserving the fact that it was a named character
3019 * so that the regex compiler knows this */
3020
3021 /* This section of code doesn't generally use the
3022 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3023 * a close examination of this macro and determined it is a
3024 * no-op except on utfebcdic variant characters. Every
3025 * character generated by this that would normally need to be
3026 * enclosed by this macro is invariant, so the macro is not
3027 * needed, and would complicate use of copy(). There are other
3028 * parts of this file where the macro is used inconsistently,
3029 * but are saved by it being a no-op */
3030
3031 /* The structure of this section of code (besides checking for
3032 * errors and upgrading to utf8) is:
3033 * Further disambiguate between the two meanings of \N, and if
3034 * not a charname, go process it elsewhere
0a96133f
KW
3035 * If of form \N{U+...}, pass it through if a pattern;
3036 * otherwise convert to utf8
3037 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3038 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3039
3040 /* Here, s points to the 'N'; the test below is guaranteed to
3041 * succeed if we are being called on a pattern as we already
3042 * know from a test above that the next character is a '{'.
3043 * On a non-pattern \N must mean 'named sequence, which
3044 * requires braces */
3045 s++;
3046 if (*s != '{') {
3047 yyerror("Missing braces on \\N{}");
3048 continue;
3049 }
3050 s++;
3051
0a96133f 3052 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3053 if (! (e = strchr(s, '}'))) {
3054 if (! PL_lex_inpat) {
5777a3f7 3055 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3056 } else {
3057 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3058 }
0a96133f 3059 continue;
ff3f963a 3060 }
cddc7ef4 3061
ff3f963a 3062 /* Here it looks like a named character */
cddc7ef4 3063
ff3f963a
KW
3064 if (PL_lex_inpat) {
3065
3066 /* XXX This block is temporary code. \N{} implies that the
3067 * pattern is to have Unicode semantics, and therefore
3068 * currently has to be encoded in utf8. By putting it in
3069 * utf8 now, we save a whole pass in the regular expression
3070 * compiler. Once that code is changed so Unicode
3071 * semantics doesn't necessarily have to be in utf8, this
3072 * block should be removed */
3073 if (!has_utf8) {
77a135fe 3074 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3075 SvPOK_on(sv);
e4f3eed8 3076 *d = '\0';
77a135fe 3077 /* See Note on sizing above. */
7bf79863 3078 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3079 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3080 /* 5 = '\N{' + cur char + NUL */
3081 (STRLEN)(send - s) + 5);
f08d6ad9 3082 d = SvPVX(sv) + SvCUR(sv);
89491803 3083 has_utf8 = TRUE;
ff3f963a
KW
3084 }
3085 }
423cee85 3086
ff3f963a
KW
3087 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3088 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3089 | PERL_SCAN_DISALLOW_PREFIX;
3090 STRLEN len;
3091
3092 /* For \N{U+...}, the '...' is a unicode value even on
3093 * EBCDIC machines */
3094 s += 2; /* Skip to next char after the 'U+' */
3095 len = e - s;
3096 uv = grok_hex(s, &len, &flags, NULL);
3097 if (len == 0 || len != (STRLEN)(e - s)) {
3098 yyerror("Invalid hexadecimal number in \\N{U+...}");
3099 s = e + 1;
3100 continue;
3101 }
3102
3103 if (PL_lex_inpat) {
3104
3105 /* Pass through to the regex compiler unchanged. The
3106 * reason we evaluated the number above is to make sure
0a96133f 3107 * there wasn't a syntax error. */
ff3f963a
KW
3108 s -= 5; /* Include the '\N{U+' */
3109 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3110 d += e - s + 1;
3111 }
3112 else { /* Not a pattern: convert the hex to string */
3113
3114 /* If destination is not in utf8, unconditionally
3115 * recode it to be so. This is because \N{} implies
3116 * Unicode semantics, and scalars have to be in utf8
3117 * to guarantee those semantics */
3118 if (! has_utf8) {
3119 SvCUR_set(sv, d - SvPVX_const(sv));
3120 SvPOK_on(sv);
3121 *d = '\0';
3122 /* See Note on sizing above. */
3123 sv_utf8_upgrade_flags_grow(
3124 sv,
3125 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3126 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3127 d = SvPVX(sv) + SvCUR(sv);
3128 has_utf8 = TRUE;
3129 }
3130
3131 /* Add the string to the output */
3132 if (UNI_IS_INVARIANT(uv)) {
3133 *d++ = (char) uv;
3134 }
3135 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3136 }
3137 }
3138 else { /* Here is \N{NAME} but not \N{U+...}. */
3139
3140 SV *res; /* result from charnames */
3141 const char *str; /* the string in 'res' */
3142 STRLEN len; /* its length */
3143
3144 /* Get the value for NAME */
3145 res = newSVpvn(s, e - s);
3146 res = new_constant( NULL, 0, "charnames",
3147 /* includes all of: \N{...} */
3148 res, NULL, s - 3, e - s + 4 );
3149
3150 /* Most likely res will be in utf8 already since the
3151 * standard charnames uses pack U, but a custom translator
3152 * can leave it otherwise, so make sure. XXX This can be
3153 * revisited to not have charnames use utf8 for characters
3154 * that don't need it when regexes don't have to be in utf8
3155 * for Unicode semantics. If doing so, remember EBCDIC */
3156 sv_utf8_upgrade(res);
3157 str = SvPV_const(res, len);
3158
3159 /* Don't accept malformed input */
3160 if (! is_utf8_string((U8 *) str, len)) {
3161 yyerror("Malformed UTF-8 returned by \\N");
3162 }
3163 else if (PL_lex_inpat) {
3164
3165 if (! len) { /* The name resolved to an empty string */
3166 Copy("\\N{}", d, 4, char);
3167 d += 4;
3168 }
3169 else {
3170 /* In order to not lose information for the regex
3171 * compiler, pass the result in the specially made
3172 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3173 * the code points in hex of each character
3174 * returned by charnames */
3175
3176 const char *str_end = str + len;
3177 STRLEN char_length; /* cur char's byte length */
3178 STRLEN output_length; /* and the number of bytes
3179 after this is translated
3180 into hex digits */
3181 const STRLEN off = d - SvPVX_const(sv);
3182
3183 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3184 * max('U+', '.'); and 1 for NUL */
3185 char hex_string[2 * UTF8_MAXBYTES + 5];
3186
3187 /* Get the first character of the result. */
3188 U32 uv = utf8n_to_uvuni((U8 *) str,
3189 len,
3190 &char_length,
3191 UTF8_ALLOW_ANYUV);
3192
3193 /* The call to is_utf8_string() above hopefully
3194 * guarantees that there won't be an error. But
3195 * it's easy here to make sure. The function just
3196 * above warns and returns 0 if invalid utf8, but
3197 * it can also return 0 if the input is validly a
3198 * NUL. Disambiguate */
3199 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3200 uv = UNICODE_REPLACEMENT;
3201 }
3202
3203 /* Convert first code point to hex, including the
3204 * boiler plate before it */
78c35590
NC
3205 output_length =
3206 my_sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
ff3f963a
KW
3207
3208 /* Make sure there is enough space to hold it */
3209 d = off + SvGROW(sv, off
3210 + output_length
3211 + (STRLEN)(send - e)
3212 + 2); /* '}' + NUL */
3213 /* And output it */
3214 Copy(hex_string, d, output_length, char);
3215 d += output_length;
3216
3217 /* For each subsequent character, append dot and
3218 * its ordinal in hex */
3219 while ((str += char_length) < str_end) {
3220 const STRLEN off = d - SvPVX_const(sv);
3221 U32 uv = utf8n_to_uvuni((U8 *) str,
3222 str_end - str,
3223 &char_length,
3224 UTF8_ALLOW_ANYUV);
3225 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3226 uv = UNICODE_REPLACEMENT;
3227 }
3228
78c35590
NC
3229 output_length =
3230 my_sprintf(hex_string, ".%X", (unsigned int) uv);
ff3f963a
KW
3231
3232 d = off + SvGROW(sv, off
3233 + output_length
3234 + (STRLEN)(send - e)
3235 + 2); /* '}' + NUL */
3236 Copy(hex_string, d, output_length, char);
3237 d += output_length;
3238 }
3239
3240 *d++ = '}'; /* Done. Add the trailing brace */
3241 }
3242 }
3243 else { /* Here, not in a pattern. Convert the name to a
3244 * string. */
3245
3246 /* If destination is not in utf8, unconditionally
3247 * recode it to be so. This is because \N{} implies
3248 * Unicode semantics, and scalars have to be in utf8
3249 * to guarantee those semantics */
3250 if (! has_utf8) {
3251 SvCUR_set(sv, d - SvPVX_const(sv));
3252 SvPOK_on(sv);
3253 *d = '\0';
3254 /* See Note on sizing above. */
3255 sv_utf8_upgrade_flags_grow(sv,
3256 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3257 len + (STRLEN)(send - s) + 1);
3258 d = SvPVX(sv) + SvCUR(sv);
3259 has_utf8 = TRUE;
3260 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3261
3262 /* See Note on sizing above. (NOTE: SvCUR() is not
3263 * set correctly here). */
3264 const STRLEN off = d - SvPVX_const(sv);
3265 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3266 }
3267 Copy(str, d, len, char);
3268 d += len;
423cee85 3269 }
423cee85 3270 SvREFCNT_dec(res);
cb233ae3
KW
3271
3272 /* Deprecate non-approved name syntax */
3273 if (ckWARN_d(WARN_DEPRECATED)) {
3274 bool problematic = FALSE;
3275 char* i = s;
3276
3277 /* For non-ut8 input, look to see that the first
3278 * character is an alpha, then loop through the rest
3279 * checking that each is a continuation */
3280 if (! this_utf8) {
3281 if (! isALPHAU(*i)) problematic = TRUE;
3282 else for (i = s + 1; i < e; i++) {
3283 if (isCHARNAME_CONT(*i)) continue;
3284 problematic = TRUE;
3285 break;
3286 }
3287 }
3288 else {
3289 /* Similarly for utf8. For invariants can check
3290 * directly. We accept anything above the latin1
3291 * range because it is immaterial to Perl if it is
3292 * correct or not, and is expensive to check. But
3293 * it is fairly easy in the latin1 range to convert
3294 * the variants into a single character and check
3295 * those */
3296 if (UTF8_IS_INVARIANT(*i)) {
3297 if (! isALPHAU(*i)) problematic = TRUE;
3298 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3299 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3300 *(i+1)))))
3301 {
3302 problematic = TRUE;
3303 }
3304 }
3305 if (! problematic) for (i = s + UTF8SKIP(s);
3306 i < e;
3307 i+= UTF8SKIP(i))
3308 {
3309 if (UTF8_IS_INVARIANT(*i)) {
3310 if (isCHARNAME_CONT(*i)) continue;
3311 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3312 continue;
3313 } else if (isCHARNAME_CONT(
3314 UNI_TO_NATIVE(
3315 UTF8_ACCUMULATE(*i, *(i+1)))))
3316 {
3317 continue;
3318 }
3319 problematic = TRUE;
3320 break;
3321 }
3322 }
3323 if (problematic) {
6e1bad6c
KW
3324 /* The e-i passed to the final %.*s makes sure that
3325 * should the trailing NUL be missing that this
3326 * print won't run off the end of the string */
cb233ae3 3327 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3328 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3329 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3330 }
3331 }
3332 } /* End \N{NAME} */
ff3f963a
KW
3333#ifdef EBCDIC
3334 if (!dorange)
3335 native_range = FALSE; /* \N{} is defined to be Unicode */
3336#endif
3337 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3338 continue;
3339
02aa26ce 3340 /* \c is a control character */
79072805
LW
3341 case 'c':
3342 s++;
961ce445 3343 if (s < send) {
f9d13529 3344 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3345 }
961ce445
RGS
3346 else {
3347 yyerror("Missing control char name in \\c");
3348 }
79072805 3349 continue;
02aa26ce
NT
3350
3351 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3352 case 'b':
db42d148 3353 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3354 break;
3355 case 'n':
db42d148 3356 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3357 break;
3358 case 'r':
db42d148 3359 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3360 break;
3361 case 'f':
db42d148 3362 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3363 break;
3364 case 't':
db42d148 3365 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3366 break;
34a3fe2a 3367 case 'e':
db42d148 3368 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3369 break;
3370 case 'a':
db42d148 3371 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3372 break;
02aa26ce
NT
3373 } /* end switch */
3374
79072805
LW
3375 s++;
3376 continue;
02aa26ce 3377 } /* end if (backslash) */
4c3a8340
TS
3378#ifdef EBCDIC
3379 else
3380 literal_endpoint++;
3381#endif
02aa26ce 3382
f9a63242 3383 default_action:
77a135fe
KW
3384 /* If we started with encoded form, or already know we want it,
3385 then encode the next character */
3386 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3387 STRLEN len = 1;
77a135fe
KW
3388
3389
3390 /* One might think that it is wasted effort in the case of the
3391 * source being utf8 (this_utf8 == TRUE) to take the next character
3392 * in the source, convert it to an unsigned value, and then convert
3393 * it back again. But the source has not been validated here. The
3394 * routine that does the conversion checks for errors like
3395 * malformed utf8 */
3396
5f66b61c
AL
3397 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3398 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3399 if (!has_utf8) {
3400 SvCUR_set(sv, d - SvPVX_const(sv));
3401 SvPOK_on(sv);
3402 *d = '\0';
77a135fe 3403 /* See Note on sizing above. */
7bf79863
KW
3404 sv_utf8_upgrade_flags_grow(sv,
3405 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3406 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3407 d = SvPVX(sv) + SvCUR(sv);
3408 has_utf8 = TRUE;
3409 } else if (need > len) {
3410 /* encoded value larger than old, may need extra space (NOTE:
3411 * SvCUR() is not set correctly here). See Note on sizing
3412 * above. */
9d4ba2ae 3413 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3414 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3415 }
77a135fe
KW
3416 s += len;
3417
5f66b61c 3418 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3419#ifdef EBCDIC
3420 if (uv > 255 && !dorange)
3421 native_range = FALSE;
3422#endif
2b9d42f0
NIS
3423 }
3424 else {
3425 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3426 }
02aa26ce
NT
3427 } /* while loop to process each character */
3428
3429 /* terminate the string and set up the sv */
79072805 3430 *d = '\0';
95a20fc0 3431 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3432 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3433 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3434
79072805 3435 SvPOK_on(sv);
9f4817db 3436 if (PL_encoding && !has_utf8) {
d0063567
DK
3437 sv_recode_to_utf8(sv, PL_encoding);
3438 if (SvUTF8(sv))
3439 has_utf8 = TRUE;
9f4817db 3440 }
2b9d42f0 3441 if (has_utf8) {
7e2040f0 3442 SvUTF8_on(sv);
2b9d42f0 3443 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3444 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3445 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3446 }
3447 }
79072805 3448
02aa26ce 3449 /* shrink the sv if we allocated more than we used */
79072805 3450 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3451 SvPV_shrink_to_cur(sv);
79072805 3452 }
02aa26ce 3453
6154021b 3454 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3455 if (s > PL_bufptr) {
eb0d8d16
NC
3456 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3457 const char *const key = PL_lex_inpat ? "qr" : "q";
3458 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3459 const char *type;
3460 STRLEN typelen;
3461
3462 if (PL_lex_inwhat == OP_TRANS) {
3463 type = "tr";
3464 typelen = 2;
3465 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3466 type = "s";
3467 typelen = 1;
3468 } else {
3469 type = "qq";
3470 typelen = 2;
3471 }
3472
3473 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3474 type, typelen);
3475 }
6154021b 3476 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3477 } else
8990e307 3478 SvREFCNT_dec(sv);
79072805
LW
3479 return s;
3480}
3481
ffb4593c
NT
3482/* S_intuit_more
3483 * Returns TRUE if there's more to the expression (e.g., a subscript),
3484 * FALSE otherwise.
ffb4593c
NT
3485 *
3486 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3487 *
3488 * ->[ and ->{ return TRUE
3489 * { and [ outside a pattern are always subscripts, so return TRUE
3490 * if we're outside a pattern and it's not { or [, then return FALSE
3491 * if we're in a pattern and the first char is a {
3492 * {4,5} (any digits around the comma) returns FALSE
3493 * if we're in a pattern and the first char is a [
3494 * [] returns FALSE
3495 * [SOMETHING] has a funky algorithm to decide whether it's a
3496 * character class or not. It has to deal with things like
3497 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3498 * anything else returns TRUE
3499 */
3500
9cbb5ea2
GS
3501/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3502
76e3520e 3503STATIC int
cea2e8a9 3504S_intuit_more(pTHX_ register char *s)
79072805 3505{
97aff369 3506 dVAR;
7918f24d
NC
3507
3508 PERL_ARGS_ASSERT_INTUIT_MORE;
3509
3280af22 3510 if (PL_lex_brackets)
79072805
LW
3511 return TRUE;
3512 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3513 return TRUE;
3514 if (*s != '{' && *s != '[')
3515 return FALSE;
3280af22 3516 if (!PL_lex_inpat)
79072805
LW
3517 return TRUE;
3518
3519 /* In a pattern, so maybe we have {n,m}. */
3520 if (*s == '{') {
b3155d95 3521 if (regcurly(s)) {
79072805 3522 return FALSE;
b3155d95 3523 }
79072805 3524 return TRUE;
79072805
LW
3525 }
3526
3527 /* On the other hand, maybe we have a character class */
3528
3529 s++;
3530 if (*s == ']' || *s == '^')
3531 return FALSE;
3532 else {
ffb4593c 3533 /* this is terrifying, and it works */
79072805
LW
3534 int weight = 2; /* let's weigh the evidence */
3535 char seen[256];
f27ffc4a 3536 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3537 const char * const send = strchr(s,']');
3280af22 3538 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3539
3540 if (!send) /* has to be an expression */
3541 return TRUE;
3542
3543 Zero(seen,256,char);
3544 if (*s == '$')
3545 weight -= 3;
3546 else if (isDIGIT(*s)) {
3547 if (s[1] != ']') {
3548 if (isDIGIT(s[1]) && s[2] == ']')
3549 weight -= 10;
3550 }
3551 else
3552 weight -= 100;
3553 }
3554 for (; s < send; s++) {
3555 last_un_char = un_char;
3556 un_char = (unsigned char)*s;
3557 switch (*s) {
3558 case '@':
3559 case '&':
3560 case '$':
3561 weight -= seen[un_char] * 10;
7e2040f0 3562 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3563 int len;
8903cb82 3564 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3565 len = (int)strlen(tmpbuf);
3566 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3567 weight -= 100;
3568 else
3569 weight -= 10;
3570 }
3571 else if (*s == '$' && s[1] &&
93a17b20
LW
3572 strchr("[#!%*<>()-=",s[1])) {
3573 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3574 weight -= 10;
3575 else
3576 weight -= 1;
3577 }
3578 break;
3579 case '\\':
3580 un_char = 254;
3581 if (s[1]) {
93a17b20 3582 if (strchr("wds]",s[1]))
79072805 3583 weight += 100;
10edeb5d 3584 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3585 weight += 1;
93a17b20 3586 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3587 weight += 40;
3588 else if (isDIGIT(s[1])) {
3589 weight += 40;
3590 while (s[1] && isDIGIT(s[1]))
3591 s++;
3592 }
3593 }
3594 else
3595 weight += 100;
3596 break;
3597 case '-':
3598 if (s[1] == '\\')
3599 weight += 50;
93a17b20 3600 if (strchr("aA01! ",last_un_char))
79072805 3601 weight += 30;
93a17b20 3602 if (strchr("zZ79~",s[1]))
79072805 3603 weight += 30;
f27ffc4a
GS
3604 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3605 weight -= 5; /* cope with negative subscript */
79072805
LW
3606 break;
3607 default:
3792a11b
NC
3608 if (!isALNUM(last_un_char)
3609 && !(last_un_char == '$' || last_un_char == '@'
3610 || last_un_char == '&')
3611 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3612 char *d = tmpbuf;
3613 while (isALPHA(*s))
3614 *d++ = *s++;
3615 *d = '\0';
5458a98a 3616 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3617 weight -= 150;
3618 }
3619 if (un_char == last_un_char + 1)
3620 weight += 5;
3621 weight -= seen[un_char];
3622 break;
3623 }
3624 seen[un_char]++;
3625 }
3626 if (weight >= 0) /* probably a character class */
3627 return FALSE;
3628 }
3629
3630 return TRUE;
3631}
ffed7fef 3632
ffb4593c
NT
3633/*
3634 * S_intuit_method
3635 *
3636 * Does all the checking to disambiguate
3637 * foo bar
3638 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3639 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3640 *
3641 * First argument is the stuff after the first token, e.g. "bar".
3642 *
3643 * Not a method if bar is a filehandle.
3644 * Not a method if foo is a subroutine prototyped to take a filehandle.
3645 * Not a method if it's really "Foo $bar"
3646 * Method if it's "foo $bar"
3647 * Not a method if it's really "print foo $bar"
3648 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3649 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3650 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3651 * =>
3652 */
3653
76e3520e 3654STATIC int
62d55b22 3655S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3656{
97aff369 3657 dVAR;
a0d0e21e 3658 char *s = start + (*start == '$');
3280af22 3659 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3660 STRLEN len;
3661 GV* indirgv;
5db06880
NC
3662#ifdef PERL_MAD
3663 int soff;
3664#endif
a0d0e21e 3665
7918f24d
NC
3666 PERL_ARGS_ASSERT_INTUIT_METHOD;
3667
a0d0e21e 3668 if (gv) {
62d55b22 3669 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3670 return 0;
62d55b22
NC
3671 if (cv) {
3672 if (SvPOK(cv)) {
3673 const char *proto = SvPVX_const(cv);
3674 if (proto) {
3675 if (*proto == ';')
3676 proto++;
3677 if (*proto == '*')
3678 return 0;
3679 }
b6c543e3
IZ
3680 }
3681 } else
c35e046a 3682 gv = NULL;
a0d0e21e 3683 }
8903cb82 3684 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3685 /* start is the beginning of the possible filehandle/object,
3686 * and s is the end of it
3687 * tmpbuf is a copy of it
3688 */
3689
a0d0e21e 3690 if (*start == '$') {
3ef1310e
RGS
3691 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3692 isUPPER(*PL_tokenbuf))
a0d0e21e 3693 return 0;
5db06880
NC
3694#ifdef PERL_MAD
3695 len = start - SvPVX(PL_linestr);
3696#endif
29595ff2 3697 s = PEEKSPACE(s);
f0092767 3698#ifdef PERL_MAD
5db06880
NC
3699 start = SvPVX(PL_linestr) + len;
3700#endif
3280af22
NIS
3701 PL_bufptr = start;
3702 PL_expect = XREF;
a0d0e21e
LW
3703 return *s == '(' ? FUNCMETH : METHOD;
3704 }
5458a98a 3705 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3706 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3707 len -= 2;
3708 tmpbuf[len] = '\0';
5db06880
NC
3709#ifdef PERL_MAD
3710 soff = s - SvPVX(PL_linestr);
3711#endif
c3e0f903
GS
3712 goto bare_package;
3713 }
90e5519e 3714 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3715 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3716 return 0;
3717 /* filehandle or package name makes it a method */
da51bb9b 3718 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3719#ifdef PERL_MAD
3720 soff = s - SvPVX(PL_linestr);
3721#endif
29595ff2 3722 s = PEEKSPACE(s);
3280af22 3723 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3724 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3725 bare_package:
cd81e915 3726 start_force(PL_curforce);
9ded7720 3727 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3728 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3729 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3730 if (PL_madskills)
3731 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3732 PL_expect = XTERM;
a0d0e21e 3733 force_next(WORD);
3280af22 3734 PL_bufptr = s;
5db06880
NC
3735#ifdef PERL_MAD
3736 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3737#endif
a0d0e21e
LW
3738 return *s == '(' ? FUNCMETH : METHOD;
3739 }
3740 }
3741 return 0;
3742}
3743
16d20bd9 3744/* Encoded script support. filter_add() effectively inserts a
4e553d73 3745 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3746 * Note that the filter function only applies to the current source file
3747 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3748 *
3749 * The datasv parameter (which may be NULL) can be used to pass
3750 * private data to this instance of the filter. The filter function
3751 * can recover the SV using the FILTER_DATA macro and use it to
3752 * store private buffers and state information.
3753 *
3754 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3755 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3756 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3757 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3758 * private use must be set using malloc'd pointers.
3759 */
16d20bd9
AD
3760
3761SV *
864dbfa3 3762Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3763{
97aff369 3764 dVAR;
f4c556ac 3765 if (!funcp)
a0714e2c 3766 return NULL;
f4c556ac 3767
5486870f
DM
3768 if (!PL_parser)
3769 return NULL;
3770
3280af22
NIS
3771 if (!PL_rsfp_filters)
3772 PL_rsfp_filters = newAV();
16d20bd9 3773 if (!datasv)
561b68a9 3774 datasv = newSV(0);
862a34c6 3775 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3776 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3777 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3778 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3779 FPTR2DPTR(void *, IoANY(datasv)),
3780 SvPV_nolen(datasv)));
3280af22
NIS
3781 av_unshift(PL_rsfp_filters, 1);
3782 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3783 return(datasv);
3784}
4e553d73 3785
16d20bd9
AD
3786
3787/* Delete most recently added instance of this filter function. */
a0d0e21e 3788void
864dbfa3 3789Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3790{
97aff369 3791 dVAR;
e0c19803 3792 SV *datasv;
24801a4b 3793
7918f24d
NC
3794 PERL_ARGS_ASSERT_FILTER_DEL;
3795
33073adb 3796#ifdef DEBUGGING
55662e27
JH
3797 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3798 FPTR2DPTR(void*, funcp)));
33073adb 3799#endif
5486870f 3800 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3801 return;
3802 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3803 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3804 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3805 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3806
16d20bd9
AD
3807 return;
3808 }
3809 /* we need to search for the correct entry and clear it */
cea2e8a9 3810 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3811}
3812
3813
1de9afcd
RGS
3814/* Invoke the idxth filter function for the current rsfp. */
3815/* maxlen 0 = read one text line */
16d20bd9 3816I32
864dbfa3 3817Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3818{
97aff369 3819 dVAR;
16d20bd9
AD
3820 filter_t funcp;
3821 SV *datasv = NULL;
f482118e
NC
3822 /* This API is bad. It should have been using unsigned int for maxlen.
3823 Not sure if we want to change the API, but if not we should sanity
3824 check the value here. */
39cd7a59
NC
3825 const unsigned int correct_length
3826 = maxlen < 0 ?
3827#ifdef PERL_MICRO
3828 0x7FFFFFFF
3829#else
3830 INT_MAX
3831#endif
3832 : maxlen;
e50aee73 3833
7918f24d
NC
3834 PERL_ARGS_ASSERT_FILTER_READ;
3835
5486870f 3836 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3837 return -1;
1de9afcd 3838 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3839 /* Provide a default input filter to make life easy. */
3840 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3841 DEBUG_P(PerlIO_printf(Perl_debug_log,
3842 "filter_read %d: from rsfp\n", idx));
f482118e 3843 if (correct_length) {
16d20bd9
AD
3844 /* Want a block */
3845 int len ;
f54cb97a 3846 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3847
3848 /* ensure buf_sv is large enough */
881d8f0a 3849 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3850 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3851 correct_length)) <= 0) {
3280af22 3852 if (PerlIO_error(PL_rsfp))
37120919
AD
3853 return -1; /* error */
3854 else
3855 return 0 ; /* end of file */
3856 }
16d20bd9 3857 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3858 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3859 } else {
3860 /* Want a line */
3280af22
NIS
3861 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3862 if (PerlIO_error(PL_rsfp))
37120919
AD
3863 return -1; /* error */
3864 else
3865 return 0 ; /* end of file */
3866 }
16d20bd9
AD
3867 }
3868 return SvCUR(buf_sv);
3869 }
3870 /* Skip this filter slot if filter has been deleted */
1de9afcd 3871 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3872 DEBUG_P(PerlIO_printf(Perl_debug_log,
3873 "filter_read %d: skipped (filter deleted)\n",
3874 idx));
f482118e 3875 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3876 }
3877 /* Get function pointer hidden within datasv */
8141890a 3878 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3879 DEBUG_P(PerlIO_printf(Perl_debug_log,
3880 "filter_read %d: via function %p (%s)\n",
ca0270c4 3881 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3882 /* Call function. The function is expected to */
3883 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3884 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3885 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3886}
3887
76e3520e 3888STATIC char *
5cc814fd 3889S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3890{
97aff369 3891 dVAR;
7918f24d
NC
3892
3893 PERL_ARGS_ASSERT_FILTER_GETS;
3894
c39cd008 3895#ifdef PERL_CR_FILTER
3280af22 3896 if (!PL_rsfp_filters) {
c39cd008 3897 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3898 }
3899#endif
3280af22 3900 if (PL_rsfp_filters) {
55497cff 3901 if (!append)
3902 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3903 if (FILTER_READ(0, sv, 0) > 0)
3904 return ( SvPVX(sv) ) ;
3905 else
bd61b366 3906 return NULL ;
16d20bd9 3907 }
9d116dd7 3908 else
5cc814fd 3909 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3910}
3911
01ec43d0 3912STATIC HV *
9bde8eb0 3913S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3914{
97aff369 3915 dVAR;
def3634b
GS
3916 GV *gv;
3917
7918f24d
NC
3918 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3919
01ec43d0 3920 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3921 return PL_curstash;
3922
3923 if (len > 2 &&
3924 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3925 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3926 {
3927 return GvHV(gv); /* Foo:: */
def3634b
GS
3928 }
3929
3930 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3931 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3932 if (gv && GvCV(gv)) {
3933 SV * const sv = cv_const_sv(GvCV(gv));
3934 if (sv)
9bde8eb0 3935 pkgname = SvPV_const(sv, len);
def3634b
GS
3936 }
3937
9bde8eb0 3938 return gv_stashpvn(pkgname, len, 0);
def3634b 3939}
a0d0e21e 3940
e3f73d4e
RGS
3941/*
3942 * S_readpipe_override
3943 * Check whether readpipe() is overriden, and generates the appropriate
3944 * optree, provided sublex_start() is called afterwards.
3945 */
3946STATIC void
1d51329b 3947S_readpipe_override(pTHX)
e3f73d4e
RGS
3948{
3949 GV **gvp;
3950 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3951 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3952 if ((gv_readpipe
3953 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3954 ||
3955 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3956 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3957 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3958 {
3959 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 3960 op_append_elem(OP_LIST,
e3f73d4e
RGS
3961 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3962 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3963 }
e3f73d4e
RGS
3964}
3965
5db06880
NC
3966#ifdef PERL_MAD
3967 /*
3968 * Perl_madlex
3969 * The intent of this yylex wrapper is to minimize the changes to the
3970 * tokener when we aren't interested in collecting madprops. It remains
3971 * to be seen how successful this strategy will be...
3972 */
3973
3974int
3975Perl_madlex(pTHX)
3976{
3977 int optype;
3978 char *s = PL_bufptr;
3979
cd81e915
NC
3980 /* make sure PL_thiswhite is initialized */
3981 PL_thiswhite = 0;
3982 PL_thismad = 0;
5db06880 3983
cd81e915 3984 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 3985 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
3986 return S_pending_ident(aTHX);
3987
3988 /* previous token ate up our whitespace? */
cd81e915
NC
3989 if (!PL_lasttoke && PL_nextwhite) {
3990 PL_thiswhite = PL_nextwhite;
3991 PL_nextwhite = 0;
5db06880
NC
3992 }
3993
3994 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3995 PL_realtokenstart = -1;
3996 PL_thistoken = 0;
5db06880
NC
3997 optype = yylex();
3998 s = PL_bufptr;
cd81e915 3999 assert(PL_curforce < 0);
5db06880 4000
cd81e915
NC
4001 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4002 if (!PL_thistoken) {
4003 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4004 PL_thistoken = newSVpvs("");
5db06880 4005 else {
c35e046a 4006 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4007 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4008 }
4009 }
cd81e915
NC
4010 if (PL_thismad) /* install head */
4011 CURMAD('X', PL_thistoken);
5db06880
NC
4012 }
4013
4014 /* last whitespace of a sublex? */
cd81e915
NC
4015 if (optype == ')' && PL_endwhite) {
4016 CURMAD('X', PL_endwhite);
5db06880
NC
4017 }
4018
cd81e915 4019 if (!PL_thismad) {
5db06880
NC
4020
4021 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4022 if (!PL_thiswhite && !PL_endwhite && !optype) {
4023 sv_free(PL_thistoken);
4024 PL_thistoken = 0;
5db06880
NC
4025 return 0;
4026 }
4027
4028 /* put off final whitespace till peg */
4029 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4030 PL_nextwhite = PL_thiswhite;
4031 PL_thiswhite = 0;
5db06880 4032 }
cd81e915
NC
4033 else if (PL_thisopen) {
4034 CURMAD('q', PL_thisopen);
4035 if (PL_thistoken)
4036 sv_free(PL_thistoken);
4037 PL_thistoken = 0;
5db06880
NC
4038 }
4039 else {
4040 /* Store actual token text as madprop X */
cd81e915 4041 CURMAD('X', PL_thistoken);
5db06880
NC
4042 }
4043
cd81e915 4044 if (PL_thiswhite) {
5db06880 4045 /* add preceding whitespace as madprop _ */
cd81e915 4046 CURMAD('_', PL_thiswhite);
5db06880
NC
4047 }
4048
cd81e915 4049 if (PL_thisstuff) {
5db06880 4050 /* add quoted material as madprop = */
cd81e915 4051 CURMAD('=', PL_thisstuff);
5db06880
NC
4052 }
4053
cd81e915 4054 if (PL_thisclose) {
5db06880 4055 /* add terminating quote as madprop Q */
cd81e915 4056 CURMAD('Q', PL_thisclose);
5db06880
NC
4057 }
4058 }
4059
4060 /* special processing based on optype */
4061
4062 switch (optype) {
4063
4064 /* opval doesn't need a TOKEN since it can already store mp */
4065 case WORD:
4066 case METHOD:
4067 case FUNCMETH:
4068 case THING:
4069 case PMFUNC:
4070 case PRIVATEREF:
4071 case FUNC0SUB:
4072 case UNIOPSUB:
4073 case LSTOPSUB:
6154021b
RGS
4074 if (pl_yylval.opval)
4075 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4076 PL_thismad = 0;
5db06880
NC
4077 return optype;
4078
4079 /* fake EOF */
4080 case 0:
4081 optype = PEG;
cd81e915
NC
4082 if (PL_endwhite) {
4083 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4084 PL_endwhite = 0;
5db06880
NC
4085 }
4086 break;
4087
4088 case ']':
4089 case '}':
cd81e915 4090 if (PL_faketokens)
5db06880
NC
4091 break;
4092 /* remember any fake bracket that lexer is about to discard */
4093 if (PL_lex_brackets == 1 &&
4094 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4095 {
4096 s = PL_bufptr;
4097 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4098 s++;
4099 if (*s == '}') {
cd81e915
NC
4100 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4101 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4102 PL_thiswhite = 0;
5db06880
NC
4103 PL_bufptr = s - 1;
4104 break; /* don't bother looking for trailing comment */
4105 }
4106 else
4107 s = PL_bufptr;
4108 }
4109 if (optype == ']')
4110 break;
4111 /* FALLTHROUGH */
4112
4113 /* attach a trailing comment to its statement instead of next token */
4114 case ';':
cd81e915 4115 if (PL_faketokens)
5db06880
NC
4116 break;
4117 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4118 s = PL_bufptr;
4119 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4120 s++;
4121 if (*s == '\n' || *s == '#') {
4122 while (s < PL_bufend && *s != '\n')
4123 s++;
4124 if (s < PL_bufend)
4125 s++;
cd81e915
NC
4126 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4127 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4128 PL_thiswhite = 0;
5db06880
NC
4129 PL_bufptr = s;
4130 }
4131 }
4132 break;
4133
4134 /* pval */
4135 case LABEL:
4136 break;
4137
4138 /* ival */
4139 default:
4140 break;
4141
4142 }
4143
4144 /* Create new token struct. Note: opvals return early above. */
6154021b 4145 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4146 PL_thismad = 0;
5db06880
NC
4147 return optype;
4148}
4149#endif
4150
468aa647 4151STATIC char *
cc6ed77d 4152S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4153 dVAR;
7918f24d
NC
4154
4155 PERL_ARGS_ASSERT_TOKENIZE_USE;
4156
468aa647
RGS
4157 if (PL_expect != XSTATE)
4158 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4159 is_use ? "use" : "no"));
29595ff2 4160 s = SKIPSPACE1(s);
468aa647
RGS
4161 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4162 s = force_version(s, TRUE);
17c59fdf
VP
4163 if (*s == ';' || *s == '}'
4164 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4165 start_force(PL_curforce);
9ded7720 4166 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4167 force_next(WORD);
4168 }
4169 else if (*s == 'v') {
4170 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4171 s = force_version(s, FALSE);
4172 }
4173 }
4174 else {
4175 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4176 s = force_version(s, FALSE);
4177 }
6154021b 4178 pl_yylval.ival = is_use;
468aa647
RGS
4179 return s;
4180}
748a9306 4181#ifdef DEBUGGING
27da23d5 4182 static const char* const exp_name[] =
09bef843 4183 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4184 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4185 };
748a9306 4186#endif
463ee0b2 4187
361d9b55
Z
4188#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4189STATIC bool
4190S_word_takes_any_delimeter(char *p, STRLEN len)
4191{
4192 return (len == 1 && strchr("msyq", p[0])) ||
4193 (len == 2 && (
4194 (p[0] == 't' && p[1] == 'r') ||
4195 (p[0] == 'q' && strchr("qwxr", p[1]))));
4196}
4197
02aa26ce
NT
4198/*
4199 yylex
4200
4201 Works out what to call the token just pulled out of the input
4202 stream. The yacc parser takes care of taking the ops we return and
4203 stitching them into a tree.
4204
4205 Returns:
4206 PRIVATEREF
4207
4208 Structure:
4209 if read an identifier
4210 if we're in a my declaration
4211 croak if they tried to say my($foo::bar)
4212 build the ops for a my() declaration
4213 if it's an access to a my() variable
4214 are we in a sort block?
4215 croak if my($a); $a <=> $b
4216 build ops for access to a my() variable
4217 if in a dq string, and they've said @foo and we can't find @foo
4218 croak
4219 build ops for a bareword
4220 if we already built the token before, use it.
4221*/
4222
20141f0e 4223
dba4d153
JH
4224#ifdef __SC__
4225#pragma segment Perl_yylex
4226#endif
dba4d153 4227int
dba4d153 4228Perl_yylex(pTHX)
20141f0e 4229{
97aff369 4230 dVAR;
3afc138a 4231 register char *s = PL_bufptr;
378cc40b 4232 register char *d;
463ee0b2 4233 STRLEN len;
aa7440fb 4234 bool bof = FALSE;
580561a3 4235 U32 fake_eof = 0;
a687059c 4236
10edeb5d
JH
4237 /* orig_keyword, gvp, and gv are initialized here because
4238 * jump to the label just_a_word_zero can bypass their
4239 * initialization later. */
4240 I32 orig_keyword = 0;
4241 GV *gv = NULL;
4242 GV **gvp = NULL;
4243
bbf60fe6 4244 DEBUG_T( {
396482e1 4245 SV* tmp = newSVpvs("");
b6007c36
DM
4246 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4247 (IV)CopLINE(PL_curcop),
4248 lex_state_names[PL_lex_state],
4249 exp_name[PL_expect],
4250 pv_display(tmp, s, strlen(s), 0, 60));
4251 SvREFCNT_dec(tmp);
bbf60fe6 4252 } );
02aa26ce 4253 /* check if there's an identifier for us to look at */
28ac2b49 4254 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4255 return REPORT(S_pending_ident(aTHX));
bbce6d69 4256
02aa26ce
NT
4257 /* no identifier pending identification */
4258
3280af22 4259 switch (PL_lex_state) {
79072805
LW
4260#ifdef COMMENTARY
4261 case LEX_NORMAL: /* Some compilers will produce faster */
4262 case LEX_INTERPNORMAL: /* code if we comment these out. */
4263 break;
4264#endif
4265
09bef843 4266 /* when we've already built the next token, just pull it out of the queue */
79072805 4267 case LEX_KNOWNEXT:
5db06880
NC
4268#ifdef PERL_MAD
4269 PL_lasttoke--;
6154021b 4270 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4271 if (PL_madskills) {
cd81e915 4272 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4273 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4274 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4275 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4276 PL_thismad->mad_val = 0;
4277 mad_free(PL_thismad);
4278 PL_thismad = 0;
5db06880
NC
4279 }
4280 }
4281 if (!PL_lasttoke) {
4282 PL_lex_state = PL_lex_defer;
4283 PL_expect = PL_lex_expect;
4284 PL_lex_defer = LEX_NORMAL;
4285 if (!PL_nexttoke[PL_lasttoke].next_type)
4286 return yylex();
4287 }
4288#else
3280af22 4289 PL_nexttoke--;
6154021b 4290 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4291 if (!PL_nexttoke) {
4292 PL_lex_state = PL_lex_defer;
4293 PL_expect = PL_lex_expect;
4294 PL_lex_defer = LEX_NORMAL;
463ee0b2 4295 }
5db06880 4296#endif
a7aaec61
Z
4297 {
4298 I32 next_type;
5db06880 4299#ifdef PERL_MAD
a7aaec61 4300 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4301#else
a7aaec61 4302 next_type = PL_nexttype[PL_nexttoke];
5db06880 4303#endif
a7aaec61
Z
4304 if (next_type & (1<<24)) {
4305 if (PL_lex_brackets > 100)
4306 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4307 PL_lex_brackstack[PL_lex_brackets++] = (next_type >> 16) & 0xff;
4308 next_type &= 0xffff;
4309 }
4310#ifdef PERL_MAD
4311 /* FIXME - can these be merged? */
4312 return next_type;
4313#else
4314 return REPORT(next_type);
4315#endif
4316 }
79072805 4317
02aa26ce 4318 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4319 when we get here, PL_bufptr is at the \
02aa26ce 4320 */
79072805
LW
4321 case LEX_INTERPCASEMOD:
4322#ifdef DEBUGGING
3280af22 4323 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4324 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4325#endif
02aa26ce 4326 /* handle \E or end of string */
3280af22 4327 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4328 /* if at a \E */
3280af22 4329 if (PL_lex_casemods) {
f54cb97a 4330 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4331 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4332
3792a11b
NC
4333 if (PL_bufptr != PL_bufend
4334 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4335 PL_bufptr += 2;
4336 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4337#ifdef PERL_MAD
4338 if (PL_madskills)
6b29d1f5 4339 PL_thistoken = newSVpvs("\\E");
5db06880 4340#endif
a0d0e21e 4341 }
bbf60fe6 4342 return REPORT(')');
79072805 4343 }
5db06880
NC
4344#ifdef PERL_MAD
4345 while (PL_bufptr != PL_bufend &&
4346 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4347 if (!PL_thiswhite)
6b29d1f5 4348 PL_thiswhite = newSVpvs("");
cd81e915 4349 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4350 PL_bufptr += 2;
4351 }
4352#else
3280af22
NIS
4353 if (PL_bufptr != PL_bufend)
4354 PL_bufptr += 2;
5db06880 4355#endif
3280af22 4356 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4357 return yylex();
79072805
LW
4358 }
4359 else {
607df283 4360 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4361 "### Saw case modifier\n"); });
3280af22 4362 s = PL_bufptr + 1;
6e909404 4363 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4364#ifdef PERL_MAD
cd81e915 4365 if (!PL_thiswhite)
6b29d1f5 4366 PL_thiswhite = newSVpvs("");
cd81e915 4367 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4368#endif
89122651 4369 PL_bufptr = s + 3;
6e909404
JH
4370 PL_lex_state = LEX_INTERPCONCAT;
4371 return yylex();
a0d0e21e 4372 }
6e909404 4373 else {
90771dc0 4374 I32 tmp;
5db06880
NC
4375 if (!PL_madskills) /* when just compiling don't need correct */
4376 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4377 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4378 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4379 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4380 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4381 return REPORT(')');
6e909404
JH
4382 }
4383 if (PL_lex_casemods > 10)
4384 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4385 PL_lex_casestack[PL_lex_casemods++] = *s;
4386 PL_lex_casestack[PL_lex_casemods] = '\0';
4387 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4388 start_force(PL_curforce);
9ded7720 4389 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4390 force_next('(');
cd81e915 4391 start_force(PL_curforce);
6e909404 4392 if (*s == 'l')
9ded7720 4393 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4394 else if (*s == 'u')
9ded7720 4395 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4396 else if (*s == 'L')
9ded7720 4397 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4398 else if (*s == 'U')
9ded7720 4399 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4400 else if (*s == 'Q')
9ded7720 4401 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4402 else
4403 Perl_croak(aTHX_ "panic: yylex");
5db06880 4404 if (PL_madskills) {
a5849ce5
NC
4405 SV* const tmpsv = newSVpvs("\\ ");
4406 /* replace the space with the character we want to escape
4407 */
4408 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4409 curmad('_', tmpsv);
4410 }
6e909404 4411 PL_bufptr = s + 1;
a0d0e21e 4412 }
79072805 4413 force_next(FUNC);
3280af22
NIS
4414 if (PL_lex_starts) {
4415 s = PL_bufptr;
4416 PL_lex_starts = 0;
5db06880
NC
4417#ifdef PERL_MAD
4418 if (PL_madskills) {
cd81e915
NC
4419 if (PL_thistoken)
4420 sv_free(PL_thistoken);
6b29d1f5 4421 PL_thistoken = newSVpvs("");
5db06880
NC
4422 }
4423#endif
131b3ad0
DM
4424 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4425 if (PL_lex_casemods == 1 && PL_lex_inpat)
4426 OPERATOR(',');
4427 else
4428 Aop(OP_CONCAT);
79072805
LW
4429 }
4430 else
cea2e8a9 4431 return yylex();
79072805
LW
4432 }
4433
55497cff 4434 case LEX_INTERPPUSH:
bbf60fe6 4435 return REPORT(sublex_push());
55497cff 4436
79072805 4437 case LEX_INTERPSTART:
3280af22 4438 if (PL_bufptr == PL_bufend)
bbf60fe6 4439 return REPORT(sublex_done());
607df283 4440 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4441 "### Interpolated variable\n"); });
3280af22
NIS
4442 PL_expect = XTERM;
4443 PL_lex_dojoin = (*PL_bufptr == '@');
4444 PL_lex_state = LEX_INTERPNORMAL;
4445 if (PL_lex_dojoin) {
cd81e915 4446 start_force(PL_curforce);
9ded7720 4447 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4448 force_next(',');
cd81e915 4449 start_force(PL_curforce);
a0d0e21e 4450 force_ident("\"", '$');
cd81e915 4451 start_force(PL_curforce);
9ded7720 4452 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4453 force_next('$');
cd81e915 4454 start_force(PL_curforce);
9ded7720 4455 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4456 force_next('(');
cd81e915 4457 start_force(PL_curforce);
9ded7720 4458 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4459 force_next(FUNC);
4460 }
3280af22
NIS
4461 if (PL_lex_starts++) {
4462 s = PL_bufptr;
5db06880
NC
4463#ifdef PERL_MAD
4464 if (PL_madskills) {
cd81e915
NC
4465 if (PL_thistoken)
4466 sv_free(PL_thistoken);
6b29d1f5 4467 PL_thistoken = newSVpvs("");
5db06880
NC
4468 }
4469#endif
131b3ad0
DM
4470 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4471 if (!PL_lex_casemods && PL_lex_inpat)
4472 OPERATOR(',');
4473 else
4474 Aop(OP_CONCAT);
79072805 4475 }
cea2e8a9 4476 return yylex();
79072805
LW
4477
4478 case LEX_INTERPENDMAYBE:
3280af22
NIS
4479 if (intuit_more(PL_bufptr)) {
4480 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4481 break;
4482 }
4483 /* FALL THROUGH */
4484
4485 case LEX_INTERPEND:
3280af22
NIS
4486 if (PL_lex_dojoin) {
4487 PL_lex_dojoin = FALSE;
4488 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4489#ifdef PERL_MAD
4490 if (PL_madskills) {
cd81e915
NC
4491 if (PL_thistoken)
4492 sv_free(PL_thistoken);
6b29d1f5 4493 PL_thistoken = newSVpvs("");
5db06880
NC
4494 }
4495#endif
bbf60fe6 4496 return REPORT(')');
79072805 4497 }
43a16006 4498 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4499 && SvEVALED(PL_lex_repl))
43a16006 4500 {
e9fa98b2 4501 if (PL_bufptr != PL_bufend)
cea2e8a9 4502 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4503 PL_lex_repl = NULL;
e9fa98b2 4504 }
79072805
LW
4505 /* FALLTHROUGH */
4506 case LEX_INTERPCONCAT:
4507#ifdef DEBUGGING
3280af22 4508 if (PL_lex_brackets)
cea2e8a9 4509 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4510#endif
3280af22 4511 if (PL_bufptr == PL_bufend)
bbf60fe6 4512 return REPORT(sublex_done());
79072805 4513
3280af22
NIS
4514 if (SvIVX(PL_linestr) == '\'') {
4515 SV *sv = newSVsv(PL_linestr);
4516 if (!PL_lex_inpat)
76e3520e 4517 sv = tokeq(sv);
3280af22 4518 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4519 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4520 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4521 s = PL_bufend;
79072805
LW
4522 }
4523 else {
3280af22 4524 s = scan_const(PL_bufptr);
79072805 4525 if (*s == '\\')
3280af22 4526 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4527 else
3280af22 4528 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4529 }
4530
3280af22 4531 if (s != PL_bufptr) {
cd81e915 4532 start_force(PL_curforce);
5db06880
NC
4533 if (PL_madskills) {
4534 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4535 }
6154021b 4536 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4537 PL_expect = XTERM;
79072805 4538 force_next(THING);
131b3ad0 4539 if (PL_lex_starts++) {
5db06880
NC
4540#ifdef PERL_MAD
4541 if (PL_madskills) {
cd81e915
NC
4542 if (PL_thistoken)
4543 sv_free(PL_thistoken);
6b29d1f5 4544 PL_thistoken = newSVpvs("");
5db06880
NC
4545 }
4546#endif
131b3ad0
DM
4547 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4548 if (!PL_lex_casemods && PL_lex_inpat)
4549 OPERATOR(',');
4550 else
4551 Aop(OP_CONCAT);
4552 }
79072805 4553 else {
3280af22 4554 PL_bufptr = s;
cea2e8a9 4555 return yylex();
79072805
LW
4556 }
4557 }
4558
cea2e8a9 4559 return yylex();
a0d0e21e 4560 case LEX_FORMLINE:
3280af22
NIS
4561 PL_lex_state = LEX_NORMAL;
4562 s = scan_formline(PL_bufptr);
4563 if (!PL_lex_formbrack)
a0d0e21e
LW
4564 goto rightbracket;
4565 OPERATOR(';');
79072805
LW
4566 }
4567
3280af22
NIS
4568 s = PL_bufptr;
4569 PL_oldoldbufptr = PL_oldbufptr;
4570 PL_oldbufptr = s;
463ee0b2
LW
4571
4572 retry:
5db06880 4573#ifdef PERL_MAD
cd81e915
NC
4574 if (PL_thistoken) {
4575 sv_free(PL_thistoken);
4576 PL_thistoken = 0;
5db06880 4577 }
cd81e915 4578 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4579#endif
378cc40b
LW
4580 switch (*s) {
4581 default:
7e2040f0 4582 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4583 goto keylookup;
b1fc3636
CJ
4584 {
4585 unsigned char c = *s;
4586 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4587 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4588 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4589 } else {
4590 d = PL_linestart;
4591 }
4592 *s = '\0';
4593 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4594 }
e929a76b
LW
4595 case 4:
4596 case 26:
4597 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4598 case 0:
5db06880
NC
4599#ifdef PERL_MAD
4600 if (PL_madskills)
cd81e915 4601 PL_faketokens = 0;
5db06880 4602#endif
3280af22
NIS
4603 if (!PL_rsfp) {
4604 PL_last_uni = 0;
4605 PL_last_lop = 0;
a7aaec61
Z
4606 if (PL_lex_brackets &&
4607 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4608 yyerror((const char *)
4609 (PL_lex_formbrack
4610 ? "Format not terminated"
4611 : "Missing right curly or square bracket"));
c5ee2135 4612 }
4e553d73 4613 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4614 "### Tokener got EOF\n");
5f80b19c 4615 } );
79072805 4616 TOKEN(0);
463ee0b2 4617 }
3280af22 4618 if (s++ < PL_bufend)
a687059c 4619 goto retry; /* ignore stray nulls */
3280af22
NIS
4620 PL_last_uni = 0;
4621 PL_last_lop = 0;
4622 if (!PL_in_eval && !PL_preambled) {
4623 PL_preambled = TRUE;
5db06880
NC
4624#ifdef PERL_MAD
4625 if (PL_madskills)
cd81e915 4626 PL_faketokens = 1;
5db06880 4627#endif
5ab7ff98
NC
4628 if (PL_perldb) {
4629 /* Generate a string of Perl code to load the debugger.
4630 * If PERL5DB is set, it will return the contents of that,
4631 * otherwise a compile-time require of perl5db.pl. */
4632
4633 const char * const pdb = PerlEnv_getenv("PERL5DB");
4634
4635 if (pdb) {
4636 sv_setpv(PL_linestr, pdb);
4637 sv_catpvs(PL_linestr,";");
4638 } else {
4639 SETERRNO(0,SS_NORMAL);
4640 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4641 }
4642 } else
4643 sv_setpvs(PL_linestr,"");
c62eb204
NC
4644 if (PL_preambleav) {
4645 SV **svp = AvARRAY(PL_preambleav);
4646 SV **const end = svp + AvFILLp(PL_preambleav);
4647 while(svp <= end) {
4648 sv_catsv(PL_linestr, *svp);
4649 ++svp;
396482e1 4650 sv_catpvs(PL_linestr, ";");
91b7def8 4651 }
daba3364 4652 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4653 PL_preambleav = NULL;
91b7def8 4654 }
9f639728
FR
4655 if (PL_minus_E)
4656 sv_catpvs(PL_linestr,
4657 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4658 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4659 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4660 if (PL_minus_l)
396482e1 4661 sv_catpvs(PL_linestr,"chomp;");
3280af22 4662 if (PL_minus_a) {
3280af22 4663 if (PL_minus_F) {
3792a11b
NC
4664 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4665 || *PL_splitstr == '"')
3280af22 4666 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4667 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4668 else {
c8ef6a4b
NC
4669 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4670 bytes can be used as quoting characters. :-) */
dd374669 4671 const char *splits = PL_splitstr;
91d456ae 4672 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4673 do {
4674 /* Need to \ \s */
dd374669
AL
4675 if (*splits == '\\')
4676 sv_catpvn(PL_linestr, splits, 1);
4677 sv_catpvn(PL_linestr, splits, 1);
4678 } while (*splits++);
48c4c863
NC
4679 /* This loop will embed the trailing NUL of
4680 PL_linestr as the last thing it does before
4681 terminating. */
396482e1 4682 sv_catpvs(PL_linestr, ");");
54310121 4683 }
2304df62
AD
4684 }
4685 else
396482e1 4686 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4687 }
79072805 4688 }
396482e1 4689 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4690 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4691 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4692 PL_last_lop = PL_last_uni = NULL;
65269a95 4693 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4694 update_debugger_info(PL_linestr, NULL, 0);
79072805 4695 goto retry;
a687059c 4696 }
e929a76b 4697 do {
580561a3
Z
4698 fake_eof = 0;
4699 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4700 if (0) {
7e28d3af 4701 fake_eof:
f0e67a1d
Z
4702 fake_eof = LEX_FAKE_EOF;
4703 }
4704 PL_bufptr = PL_bufend;
17cc9359 4705 CopLINE_inc(PL_curcop);
f0e67a1d 4706 if (!lex_next_chunk(fake_eof)) {
17cc9359 4707 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4708 s = PL_bufptr;
4709 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4710 }
17cc9359 4711 CopLINE_dec(PL_curcop);
5db06880 4712#ifdef PERL_MAD
f0e67a1d 4713 if (!PL_rsfp)
cd81e915 4714 PL_realtokenstart = -1;
5db06880 4715#endif
f0e67a1d 4716 s = PL_bufptr;
7aa207d6
JH
4717 /* If it looks like the start of a BOM or raw UTF-16,
4718 * check if it in fact is. */
580561a3 4719 if (bof && PL_rsfp &&
7aa207d6
JH
4720 (*s == 0 ||
4721 *(U8*)s == 0xEF ||
4722 *(U8*)s >= 0xFE ||
4723 s[1] == 0)) {
eb160463 4724 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4725 if (bof) {
3280af22 4726 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4727 s = swallow_bom((U8*)s);
e929a76b 4728 }
378cc40b 4729 }
737c24fc 4730 if (PL_parser->in_pod) {
a0d0e21e 4731 /* Incest with pod. */
5db06880
NC
4732#ifdef PERL_MAD
4733 if (PL_madskills)
cd81e915 4734 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4735#endif
01a57ef7 4736 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4737 sv_setpvs(PL_linestr, "");
3280af22
NIS
4738 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4739 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4740 PL_last_lop = PL_last_uni = NULL;
737c24fc 4741 PL_parser->in_pod = 0;
a0d0e21e 4742 }
4e553d73 4743 }
85613cab
Z
4744 if (PL_rsfp)
4745 incline(s);
737c24fc 4746 } while (PL_parser->in_pod);
3280af22 4747 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4748 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4749 PL_last_lop = PL_last_uni = NULL;
57843af0 4750 if (CopLINE(PL_curcop) == 1) {
3280af22 4751 while (s < PL_bufend && isSPACE(*s))
79072805 4752 s++;
a0d0e21e 4753 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4754 s++;
5db06880
NC
4755#ifdef PERL_MAD
4756 if (PL_madskills)
cd81e915 4757 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4758#endif
bd61b366 4759 d = NULL;
3280af22 4760 if (!PL_in_eval) {
44a8e56a 4761 if (*s == '#' && *(s+1) == '!')
4762 d = s + 2;
4763#ifdef ALTERNATE_SHEBANG
4764 else {
bfed75c6 4765 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4766 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4767 d = s + (sizeof(as) - 1);
4768 }
4769#endif /* ALTERNATE_SHEBANG */
4770 }
4771 if (d) {
b8378b72 4772 char *ipath;
774d564b 4773 char *ipathend;
b8378b72 4774
774d564b 4775 while (isSPACE(*d))
b8378b72
CS
4776 d++;
4777 ipath = d;
774d564b 4778 while (*d && !isSPACE(*d))
4779 d++;
4780 ipathend = d;
4781
4782#ifdef ARG_ZERO_IS_SCRIPT
4783 if (ipathend > ipath) {
4784 /*
4785 * HP-UX (at least) sets argv[0] to the script name,
4786 * which makes $^X incorrect. And Digital UNIX and Linux,
4787 * at least, set argv[0] to the basename of the Perl
4788 * interpreter. So, having found "#!", we'll set it right.
4789 */
fafc274c
NC
4790 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4791 SVt_PV)); /* $^X */
774d564b 4792 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4793 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4794 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4795 SvSETMAGIC(x);
4796 }
556c1dec
JH
4797 else {
4798 STRLEN blen;
4799 STRLEN llen;
cfd0369c 4800 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4801 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4802 if (llen < blen) {
4803 bstart += blen - llen;
4804 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4805 sv_setpvn(x, ipath, ipathend - ipath);
4806 SvSETMAGIC(x);
4807 }
4808 }
4809 }
774d564b 4810 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4811 }
774d564b 4812#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4813
4814 /*
4815 * Look for options.
4816 */
748a9306 4817 d = instr(s,"perl -");
84e30d1a 4818 if (!d) {
748a9306 4819 d = instr(s,"perl");
84e30d1a
GS
4820#if defined(DOSISH)
4821 /* avoid getting into infinite loops when shebang
4822 * line contains "Perl" rather than "perl" */
4823 if (!d) {
4824 for (d = ipathend-4; d >= ipath; --d) {
4825 if ((*d == 'p' || *d == 'P')
4826 && !ibcmp(d, "perl", 4))
4827 {
4828 break;
4829 }
4830 }
4831 if (d < ipath)
bd61b366 4832 d = NULL;
84e30d1a
GS
4833 }
4834#endif
4835 }
44a8e56a 4836#ifdef ALTERNATE_SHEBANG
4837 /*
4838 * If the ALTERNATE_SHEBANG on this system starts with a
4839 * character that can be part of a Perl expression, then if
4840 * we see it but not "perl", we're probably looking at the
4841 * start of Perl code, not a request to hand off to some
4842 * other interpreter. Similarly, if "perl" is there, but
4843 * not in the first 'word' of the line, we assume the line
4844 * contains the start of the Perl program.
44a8e56a 4845 */
4846 if (d && *s != '#') {
f54cb97a 4847 const char *c = ipath;
44a8e56a 4848 while (*c && !strchr("; \t\r\n\f\v#", *c))
4849 c++;
4850 if (c < d)
bd61b366 4851 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4852 else
4853 *s = '#'; /* Don't try to parse shebang line */
4854 }
774d564b 4855#endif /* ALTERNATE_SHEBANG */
748a9306 4856 if (!d &&
44a8e56a 4857 *s == '#' &&
774d564b 4858 ipathend > ipath &&
3280af22 4859 !PL_minus_c &&
748a9306 4860 !instr(s,"indir") &&
3280af22 4861 instr(PL_origargv[0],"perl"))
748a9306 4862 {
27da23d5 4863 dVAR;
9f68db38 4864 char **newargv;
9f68db38 4865
774d564b 4866 *ipathend = '\0';
4867 s = ipathend + 1;
3280af22 4868 while (s < PL_bufend && isSPACE(*s))
9f68db38 4869 s++;
3280af22 4870 if (s < PL_bufend) {
d85f917e 4871 Newx(newargv,PL_origargc+3,char*);
9f68db38 4872 newargv[1] = s;
3280af22 4873 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4874 s++;
4875 *s = '\0';
3280af22 4876 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4877 }
4878 else
3280af22 4879 newargv = PL_origargv;
774d564b 4880 newargv[0] = ipath;
b35112e7 4881 PERL_FPU_PRE_EXEC
b4748376 4882 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4883 PERL_FPU_POST_EXEC
cea2e8a9 4884 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4885 }
748a9306 4886 if (d) {
c35e046a
AL
4887 while (*d && !isSPACE(*d))
4888 d++;
4889 while (SPACE_OR_TAB(*d))
4890 d++;
748a9306
LW
4891
4892 if (*d++ == '-') {
f54cb97a 4893 const bool switches_done = PL_doswitches;
fb993905
GA
4894 const U32 oldpdb = PL_perldb;
4895 const bool oldn = PL_minus_n;
4896 const bool oldp = PL_minus_p;
c7030b81 4897 const char *d1 = d;
fb993905 4898
8cc95fdb 4899 do {
4ba71d51
FC
4900 bool baduni = FALSE;
4901 if (*d1 == 'C') {
bd0ab00d
NC
4902 const char *d2 = d1 + 1;
4903 if (parse_unicode_opts((const char **)&d2)
4904 != PL_unicode)
4905 baduni = TRUE;
4ba71d51
FC
4906 }
4907 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4908 const char * const m = d1;
4909 while (*d1 && !isSPACE(*d1))
4910 d1++;
cea2e8a9 4911 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4912 (int)(d1 - m), m);
8cc95fdb 4913 }
c7030b81
NC
4914 d1 = moreswitches(d1);
4915 } while (d1);
f0b2cf55
YST
4916 if (PL_doswitches && !switches_done) {
4917 int argc = PL_origargc;
4918 char **argv = PL_origargv;
4919 do {
4920 argc--,argv++;
4921 } while (argc && argv[0][0] == '-' && argv[0][1]);
4922 init_argv_symbols(argc,argv);
4923 }
65269a95 4924 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4925 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4926 /* if we have already added "LINE: while (<>) {",
4927 we must not do it again */
748a9306 4928 {
76f68e9b 4929 sv_setpvs(PL_linestr, "");
3280af22
NIS
4930 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4931 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4932 PL_last_lop = PL_last_uni = NULL;
3280af22 4933 PL_preambled = FALSE;
65269a95 4934 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4935 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4936 goto retry;
4937 }
a0d0e21e 4938 }
79072805 4939 }
9f68db38 4940 }
79072805 4941 }
3280af22
NIS
4942 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4943 PL_bufptr = s;
4944 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4945 return yylex();
ae986130 4946 }
378cc40b 4947 goto retry;
4fdae800 4948 case '\r':
6a27c188 4949#ifdef PERL_STRICT_CR
cea2e8a9 4950 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4951 Perl_croak(aTHX_
cc507455 4952 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4953#endif
4fdae800 4954 case ' ': case '\t': case '\f': case 013:
5db06880 4955#ifdef PERL_MAD
cd81e915 4956 PL_realtokenstart = -1;
ac372eb8
RD
4957 if (!PL_thiswhite)
4958 PL_thiswhite = newSVpvs("");
4959 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4960#endif
ac372eb8 4961 s++;
378cc40b 4962 goto retry;
378cc40b 4963 case '#':
e929a76b 4964 case '\n':
5db06880 4965#ifdef PERL_MAD
cd81e915 4966 PL_realtokenstart = -1;
5db06880 4967 if (PL_madskills)
cd81e915 4968 PL_faketokens = 0;
5db06880 4969#endif
3280af22 4970 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4971 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4972 /* handle eval qq[#line 1 "foo"\n ...] */
4973 CopLINE_dec(PL_curcop);
4974 incline(s);
4975 }
5db06880
NC
4976 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4977 s = SKIPSPACE0(s);
4978 if (!PL_in_eval || PL_rsfp)
4979 incline(s);
4980 }
4981 else {
4982 d = s;
4983 while (d < PL_bufend && *d != '\n')
4984 d++;
4985 if (d < PL_bufend)
4986 d++;
4987 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4988 Perl_croak(aTHX_ "panic: input overflow");
4989#ifdef PERL_MAD
4990 if (PL_madskills)
cd81e915 4991 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4992#endif
4993 s = d;
4994 incline(s);
4995 }
3280af22
NIS
4996 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4997 PL_bufptr = s;
4998 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4999 return yylex();
a687059c 5000 }
378cc40b 5001 }
a687059c 5002 else {
5db06880
NC
5003#ifdef PERL_MAD
5004 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5005 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5006 PL_faketokens = 0;
5db06880
NC
5007 s = SKIPSPACE0(s);
5008 TOKEN(PEG); /* make sure any #! line is accessible */
5009 }
5010 s = SKIPSPACE0(s);
5011 }
5012 else {
5013/* if (PL_madskills && PL_lex_formbrack) { */
5014 d = s;
5015 while (d < PL_bufend && *d != '\n')
5016 d++;
5017 if (d < PL_bufend)
5018 d++;
5019 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5020 Perl_croak(aTHX_ "panic: input overflow");
5021 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5022 if (!PL_thiswhite)
6b29d1f5 5023 PL_thiswhite = newSVpvs("");
5db06880 5024 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5025 sv_setpvs(PL_thiswhite, "");
cd81e915 5026 PL_faketokens = 0;
5db06880 5027 }
cd81e915 5028 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5029 }
5030 s = d;
5031/* }
5032 *s = '\0';
5033 PL_bufend = s; */
5034 }
5035#else
378cc40b 5036 *s = '\0';
3280af22 5037 PL_bufend = s;
5db06880 5038#endif
a687059c 5039 }
378cc40b
LW
5040 goto retry;
5041 case '-':
79072805 5042 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5043 I32 ftst = 0;
90771dc0 5044 char tmp;
e5edeb50 5045
378cc40b 5046 s++;
3280af22 5047 PL_bufptr = s;
748a9306
LW
5048 tmp = *s++;
5049
bf4acbe4 5050 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5051 s++;
5052
5053 if (strnEQ(s,"=>",2)) {
3280af22 5054 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5055 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5056 OPERATOR('-'); /* unary minus */
5057 }
3280af22 5058 PL_last_uni = PL_oldbufptr;
748a9306 5059 switch (tmp) {
e5edeb50
JH
5060 case 'r': ftst = OP_FTEREAD; break;
5061 case 'w': ftst = OP_FTEWRITE; break;
5062 case 'x': ftst = OP_FTEEXEC; break;
5063 case 'o': ftst = OP_FTEOWNED; break;
5064 case 'R': ftst = OP_FTRREAD; break;
5065 case 'W': ftst = OP_FTRWRITE; break;
5066 case 'X': ftst = OP_FTREXEC; break;
5067 case 'O': ftst = OP_FTROWNED; break;
5068 case 'e': ftst = OP_FTIS; break;
5069 case 'z': ftst = OP_FTZERO; break;
5070 case 's': ftst = OP_FTSIZE; break;
5071 case 'f': ftst = OP_FTFILE; break;
5072 case 'd': ftst = OP_FTDIR; break;
5073 case 'l': ftst = OP_FTLINK; break;
5074 case 'p': ftst = OP_FTPIPE; break;
5075 case 'S': ftst = OP_FTSOCK; break;
5076 case 'u': ftst = OP_FTSUID; break;
5077 case 'g': ftst = OP_FTSGID; break;
5078 case 'k': ftst = OP_FTSVTX; break;
5079 case 'b': ftst = OP_FTBLK; break;
5080 case 'c': ftst = OP_FTCHR; break;
5081 case 't': ftst = OP_FTTTY; break;
5082 case 'T': ftst = OP_FTTEXT; break;
5083 case 'B': ftst = OP_FTBINARY; break;
5084 case 'M': case 'A': case 'C':
fafc274c 5085 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5086 switch (tmp) {
5087 case 'M': ftst = OP_FTMTIME; break;
5088 case 'A': ftst = OP_FTATIME; break;
5089 case 'C': ftst = OP_FTCTIME; break;
5090 default: break;
5091 }
5092 break;
378cc40b 5093 default:
378cc40b
LW
5094 break;
5095 }
e5edeb50 5096 if (ftst) {
eb160463 5097 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5098 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5099 "### Saw file test %c\n", (int)tmp);
5f80b19c 5100 } );
e5edeb50
JH
5101 FTST(ftst);
5102 }
5103 else {
5104 /* Assume it was a minus followed by a one-letter named
5105 * subroutine call (or a -bareword), then. */
95c31fe3 5106 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5107 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5108 (int) tmp);
5f80b19c 5109 } );
3cf7b4c4 5110 s = --PL_bufptr;
e5edeb50 5111 }
378cc40b 5112 }
90771dc0
NC
5113 {
5114 const char tmp = *s++;
5115 if (*s == tmp) {
5116 s++;
5117 if (PL_expect == XOPERATOR)
5118 TERM(POSTDEC);
5119 else
5120 OPERATOR(PREDEC);
5121 }
5122 else if (*s == '>') {
5123 s++;
29595ff2 5124 s = SKIPSPACE1(s);
90771dc0
NC
5125 if (isIDFIRST_lazy_if(s,UTF)) {
5126 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5127 TOKEN(ARROW);
5128 }
5129 else if (*s == '$')
5130 OPERATOR(ARROW);
5131 else
5132 TERM(ARROW);
5133 }
3280af22 5134 if (PL_expect == XOPERATOR)
90771dc0
NC
5135 Aop(OP_SUBTRACT);
5136 else {
5137 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5138 check_uni();
5139 OPERATOR('-'); /* unary minus */
79072805 5140 }
2f3197b3 5141 }
79072805 5142
378cc40b 5143 case '+':
90771dc0
NC
5144 {
5145 const char tmp = *s++;
5146 if (*s == tmp) {
5147 s++;
5148 if (PL_expect == XOPERATOR)
5149 TERM(POSTINC);
5150 else
5151 OPERATOR(PREINC);
5152 }
3280af22 5153 if (PL_expect == XOPERATOR)
90771dc0
NC
5154 Aop(OP_ADD);
5155 else {
5156 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5157 check_uni();
5158 OPERATOR('+');
5159 }
2f3197b3 5160 }
a687059c 5161
378cc40b 5162 case '*':
3280af22
NIS
5163 if (PL_expect != XOPERATOR) {
5164 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5165 PL_expect = XOPERATOR;
5166 force_ident(PL_tokenbuf, '*');
5167 if (!*PL_tokenbuf)
a0d0e21e 5168 PREREF('*');
79072805 5169 TERM('*');
a687059c 5170 }
79072805
LW
5171 s++;
5172 if (*s == '*') {
a687059c 5173 s++;
79072805 5174 PWop(OP_POW);
a687059c 5175 }
79072805
LW
5176 Mop(OP_MULTIPLY);
5177
378cc40b 5178 case '%':
3280af22 5179 if (PL_expect == XOPERATOR) {
bbce6d69 5180 ++s;
5181 Mop(OP_MODULO);
a687059c 5182 }
3280af22 5183 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5184 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5185 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5186 if (!PL_tokenbuf[1]) {
bbce6d69 5187 PREREF('%');
a687059c 5188 }
3280af22 5189 PL_pending_ident = '%';
bbce6d69 5190 TERM('%');
a687059c 5191
378cc40b 5192 case '^':
79072805 5193 s++;
a0d0e21e 5194 BOop(OP_BIT_XOR);
79072805 5195 case '[':
a7aaec61
Z
5196 if (PL_lex_brackets > 100)
5197 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5198 PL_lex_brackstack[PL_lex_brackets++] = 0;
df3467db
IG
5199 {
5200 const char tmp = *s++;
5201 OPERATOR(tmp);
5202 }
378cc40b 5203 case '~':
0d863452 5204 if (s[1] == '~'
3e7dd34d 5205 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5206 {
5207 s += 2;
5208 Eop(OP_SMARTMATCH);
5209 }
378cc40b 5210 case ',':
90771dc0
NC
5211 {
5212 const char tmp = *s++;
5213 OPERATOR(tmp);
5214 }
a0d0e21e
LW
5215 case ':':
5216 if (s[1] == ':') {
5217 len = 0;
0bfa2a8a 5218 goto just_a_word_zero_gv;
a0d0e21e
LW
5219 }
5220 s++;
09bef843
SB
5221 switch (PL_expect) {
5222 OP *attrs;
5db06880
NC
5223#ifdef PERL_MAD
5224 I32 stuffstart;
5225#endif
09bef843
SB
5226 case XOPERATOR:
5227 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5228 break;
5229 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5230 if (*s == '=') {
5231 deprecate(":= for an empty attribute list");
5232 }
09bef843
SB
5233 goto grabattrs;
5234 case XATTRBLOCK:
5235 PL_expect = XBLOCK;
5236 goto grabattrs;
5237 case XATTRTERM:
5238 PL_expect = XTERMBLOCK;
5239 grabattrs:
5db06880
NC
5240#ifdef PERL_MAD
5241 stuffstart = s - SvPVX(PL_linestr) - 1;
5242#endif
29595ff2 5243 s = PEEKSPACE(s);
5f66b61c 5244 attrs = NULL;
7e2040f0 5245 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5246 I32 tmp;
5cc237b8 5247 SV *sv;
09bef843 5248 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5249 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5250 if (tmp < 0) tmp = -tmp;
5251 switch (tmp) {
5252 case KEY_or:
5253 case KEY_and:
5254 case KEY_for:
11baf631 5255 case KEY_foreach:
f9829d6b
GS
5256 case KEY_unless:
5257 case KEY_if:
5258 case KEY_while:
5259 case KEY_until:
5260 goto got_attrs;
5261 default:
5262 break;
5263 }
5264 }
5cc237b8 5265 sv = newSVpvn(s, len);
09bef843
SB
5266 if (*d == '(') {
5267 d = scan_str(d,TRUE,TRUE);
5268 if (!d) {
09bef843
SB
5269 /* MUST advance bufptr here to avoid bogus
5270 "at end of line" context messages from yyerror().
5271 */
5272 PL_bufptr = s + len;
5273 yyerror("Unterminated attribute parameter in attribute list");
5274 if (attrs)
5275 op_free(attrs);
5cc237b8 5276 sv_free(sv);
bbf60fe6 5277 return REPORT(0); /* EOF indicator */
09bef843
SB
5278 }
5279 }
5280 if (PL_lex_stuff) {
09bef843 5281 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5282 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5283 newSVOP(OP_CONST, 0, sv));
5284 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5285 PL_lex_stuff = NULL;
09bef843
SB
5286 }
5287 else {
5cc237b8
BS
5288 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5289 sv_free(sv);
1108974d 5290 if (PL_in_my == KEY_our) {
df9a6019 5291 deprecate(":unique");
1108974d 5292 }
bfed75c6 5293 else
371fce9b
DM
5294 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5295 }
5296
d3cea301
SB
5297 /* NOTE: any CV attrs applied here need to be part of
5298 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5299 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5300 sv_free(sv);
78f9721b 5301 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5302 }
5303 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5304 sv_free(sv);
8e5dadda 5305 deprecate(":locked");
5cc237b8
BS
5306 }
5307 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5308 sv_free(sv);
78f9721b 5309 CvMETHOD_on(PL_compcv);
5cc237b8 5310 }
78f9721b
SM
5311 /* After we've set the flags, it could be argued that
5312 we don't need to do the attributes.pm-based setting
5313 process, and shouldn't bother appending recognized
d3cea301
SB
5314 flags. To experiment with that, uncomment the
5315 following "else". (Note that's already been
5316 uncommented. That keeps the above-applied built-in
5317 attributes from being intercepted (and possibly
5318 rejected) by a package's attribute routines, but is
5319 justified by the performance win for the common case
5320 of applying only built-in attributes.) */
0256094b 5321 else
2fcb4757 5322 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5323 newSVOP(OP_CONST, 0,
5cc237b8 5324 sv));
09bef843 5325 }
29595ff2 5326 s = PEEKSPACE(d);
0120eecf 5327 if (*s == ':' && s[1] != ':')
29595ff2 5328 s = PEEKSPACE(s+1);
0120eecf
GS
5329 else if (s == d)
5330 break; /* require real whitespace or :'s */
29595ff2 5331 /* XXX losing whitespace on sequential attributes here */
09bef843 5332 }
90771dc0
NC
5333 {
5334 const char tmp
5335 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5336 if (*s != ';' && *s != '}' && *s != tmp
5337 && (tmp != '=' || *s != ')')) {
5338 const char q = ((*s == '\'') ? '"' : '\'');
5339 /* If here for an expression, and parsed no attrs, back
5340 off. */
5341 if (tmp == '=' && !attrs) {
5342 s = PL_bufptr;
5343 break;
5344 }
5345 /* MUST advance bufptr here to avoid bogus "at end of line"
5346 context messages from yyerror().
5347 */
5348 PL_bufptr = s;
10edeb5d
JH
5349 yyerror( (const char *)
5350 (*s
5351 ? Perl_form(aTHX_ "Invalid separator character "
5352 "%c%c%c in attribute list", q, *s, q)
5353 : "Unterminated attribute list" ) );
90771dc0
NC
5354 if (attrs)
5355 op_free(attrs);
5356 OPERATOR(':');
09bef843 5357 }
09bef843 5358 }
f9829d6b 5359 got_attrs:
09bef843 5360 if (attrs) {
cd81e915 5361 start_force(PL_curforce);
9ded7720 5362 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5363 CURMAD('_', PL_nextwhite);
89122651 5364 force_next(THING);
5db06880
NC
5365 }
5366#ifdef PERL_MAD
5367 if (PL_madskills) {
cd81e915 5368 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5369 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5370 }
5db06880 5371#endif
09bef843
SB
5372 TOKEN(COLONATTR);
5373 }
a0d0e21e 5374 OPERATOR(':');
8990e307
LW
5375 case '(':
5376 s++;
3280af22
NIS
5377 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5378 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5379 else
3280af22 5380 PL_expect = XTERM;
29595ff2 5381 s = SKIPSPACE1(s);
a0d0e21e 5382 TOKEN('(');
378cc40b 5383 case ';':
f4dd75d9 5384 CLINE;
90771dc0
NC
5385 {
5386 const char tmp = *s++;
5387 OPERATOR(tmp);
5388 }
378cc40b 5389 case ')':
90771dc0
NC
5390 {
5391 const char tmp = *s++;
29595ff2 5392 s = SKIPSPACE1(s);
90771dc0
NC
5393 if (*s == '{')
5394 PREBLOCK(tmp);
5395 TERM(tmp);
5396 }
79072805 5397 case ']':
a7aaec61
Z
5398 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5399 TOKEN(0);
79072805 5400 s++;
3280af22 5401 if (PL_lex_brackets <= 0)
d98d5fff 5402 yyerror("Unmatched right square bracket");
463ee0b2 5403 else
3280af22
NIS
5404 --PL_lex_brackets;
5405 if (PL_lex_state == LEX_INTERPNORMAL) {
5406 if (PL_lex_brackets == 0) {
02255c60
FC
5407 if (*s == '-' && s[1] == '>')
5408 PL_lex_state = LEX_INTERPENDMAYBE;
5409 else if (*s != '[' && *s != '{')
3280af22 5410 PL_lex_state = LEX_INTERPEND;
79072805
LW
5411 }
5412 }
4633a7c4 5413 TERM(']');
79072805
LW
5414 case '{':
5415 leftbracket:
79072805 5416 s++;
3280af22 5417 if (PL_lex_brackets > 100) {
8edd5f42 5418 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5419 }
3280af22 5420 switch (PL_expect) {
a0d0e21e 5421 case XTERM:
3280af22 5422 if (PL_lex_formbrack) {
a0d0e21e
LW
5423 s--;
5424 PRETERMBLOCK(DO);
5425 }
3280af22
NIS
5426 if (PL_oldoldbufptr == PL_last_lop)
5427 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5428 else
3280af22 5429 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5430 OPERATOR(HASHBRACK);
a0d0e21e 5431 case XOPERATOR:
bf4acbe4 5432 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5433 s++;
44a8e56a 5434 d = s;
3280af22
NIS
5435 PL_tokenbuf[0] = '\0';
5436 if (d < PL_bufend && *d == '-') {
5437 PL_tokenbuf[0] = '-';
44a8e56a 5438 d++;
bf4acbe4 5439 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5440 d++;
5441 }
7e2040f0 5442 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5443 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5444 FALSE, &len);
bf4acbe4 5445 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5446 d++;
5447 if (*d == '}') {
f54cb97a 5448 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5449 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5450 if (minus)
5451 force_next('-');
748a9306
LW
5452 }
5453 }
5454 /* FALL THROUGH */
09bef843 5455 case XATTRBLOCK:
748a9306 5456 case XBLOCK:
3280af22
NIS
5457 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5458 PL_expect = XSTATE;
a0d0e21e 5459 break;
09bef843 5460 case XATTRTERM:
a0d0e21e 5461 case XTERMBLOCK:
3280af22
NIS
5462 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5463 PL_expect = XSTATE;
a0d0e21e
LW
5464 break;
5465 default: {
f54cb97a 5466 const char *t;
3280af22
NIS
5467 if (PL_oldoldbufptr == PL_last_lop)
5468 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5469 else
3280af22 5470 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5471 s = SKIPSPACE1(s);
8452ff4b
SB
5472 if (*s == '}') {
5473 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5474 PL_expect = XTERM;
5475 /* This hack is to get the ${} in the message. */
5476 PL_bufptr = s+1;
5477 yyerror("syntax error");
5478 break;
5479 }
a0d0e21e 5480 OPERATOR(HASHBRACK);
8452ff4b 5481 }
b8a4b1be
GS
5482 /* This hack serves to disambiguate a pair of curlies
5483 * as being a block or an anon hash. Normally, expectation
5484 * determines that, but in cases where we're not in a
5485 * position to expect anything in particular (like inside
5486 * eval"") we have to resolve the ambiguity. This code
5487 * covers the case where the first term in the curlies is a
5488 * quoted string. Most other cases need to be explicitly
a0288114 5489 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5490 * curly in order to force resolution as an anon hash.
5491 *
5492 * XXX should probably propagate the outer expectation
5493 * into eval"" to rely less on this hack, but that could
5494 * potentially break current behavior of eval"".
5495 * GSAR 97-07-21
5496 */
5497 t = s;
5498 if (*s == '\'' || *s == '"' || *s == '`') {
5499 /* common case: get past first string, handling escapes */
3280af22 5500 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5501 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5502 t++;
5503 t++;
a0d0e21e 5504 }
b8a4b1be 5505 else if (*s == 'q') {
3280af22 5506 if (++t < PL_bufend
b8a4b1be 5507 && (!isALNUM(*t)
3280af22 5508 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5509 && !isALNUM(*t))))
5510 {
abc667d1 5511 /* skip q//-like construct */
f54cb97a 5512 const char *tmps;
b8a4b1be
GS
5513 char open, close, term;
5514 I32 brackets = 1;
5515
3280af22 5516 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5517 t++;
abc667d1
DM
5518 /* check for q => */
5519 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5520 OPERATOR(HASHBRACK);
5521 }
b8a4b1be
GS
5522 term = *t;
5523 open = term;
5524 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5525 term = tmps[5];
5526 close = term;
5527 if (open == close)
3280af22
NIS
5528 for (t++; t < PL_bufend; t++) {
5529 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5530 t++;
6d07e5e9 5531 else if (*t == open)
b8a4b1be
GS
5532 break;
5533 }
abc667d1 5534 else {
3280af22
NIS
5535 for (t++; t < PL_bufend; t++) {
5536 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5537 t++;
6d07e5e9 5538 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5539 break;
5540 else if (*t == open)
5541 brackets++;
5542 }
abc667d1
DM
5543 }
5544 t++;
b8a4b1be 5545 }
abc667d1
DM
5546 else
5547 /* skip plain q word */
5548 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5549 t += UTF8SKIP(t);
a0d0e21e 5550 }
7e2040f0 5551 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5552 t += UTF8SKIP(t);
7e2040f0 5553 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5554 t += UTF8SKIP(t);
a0d0e21e 5555 }
3280af22 5556 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5557 t++;
b8a4b1be
GS
5558 /* if comma follows first term, call it an anon hash */
5559 /* XXX it could be a comma expression with loop modifiers */
3280af22 5560 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5561 || (*t == '=' && t[1] == '>')))
a0d0e21e 5562 OPERATOR(HASHBRACK);
3280af22 5563 if (PL_expect == XREF)
4e4e412b 5564 PL_expect = XTERM;
a0d0e21e 5565 else {
3280af22
NIS
5566 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5567 PL_expect = XSTATE;
a0d0e21e 5568 }
8990e307 5569 }
a0d0e21e 5570 break;
463ee0b2 5571 }
6154021b 5572 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5573 if (isSPACE(*s) || *s == '#')
3280af22 5574 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5575 TOKEN('{');
378cc40b 5576 case '}':
a7aaec61
Z
5577 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5578 TOKEN(0);
79072805
LW
5579 rightbracket:
5580 s++;
3280af22 5581 if (PL_lex_brackets <= 0)
d98d5fff 5582 yyerror("Unmatched right curly bracket");
463ee0b2 5583 else
3280af22 5584 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5585 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5586 PL_lex_formbrack = 0;
5587 if (PL_lex_state == LEX_INTERPNORMAL) {
5588 if (PL_lex_brackets == 0) {
9059aa12
LW
5589 if (PL_expect & XFAKEBRACK) {
5590 PL_expect &= XENUMMASK;
3280af22
NIS
5591 PL_lex_state = LEX_INTERPEND;
5592 PL_bufptr = s;
5db06880
NC
5593#if 0
5594 if (PL_madskills) {
cd81e915 5595 if (!PL_thiswhite)
6b29d1f5 5596 PL_thiswhite = newSVpvs("");
76f68e9b 5597 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5598 }
5599#endif
cea2e8a9 5600 return yylex(); /* ignore fake brackets */
79072805 5601 }
fa83b5b6 5602 if (*s == '-' && s[1] == '>')
3280af22 5603 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5604 else if (*s != '[' && *s != '{')
3280af22 5605 PL_lex_state = LEX_INTERPEND;
79072805
LW
5606 }
5607 }
9059aa12
LW
5608 if (PL_expect & XFAKEBRACK) {
5609 PL_expect &= XENUMMASK;
3280af22 5610 PL_bufptr = s;
cea2e8a9 5611 return yylex(); /* ignore fake brackets */
748a9306 5612 }
cd81e915 5613 start_force(PL_curforce);
5db06880
NC
5614 if (PL_madskills) {
5615 curmad('X', newSVpvn(s-1,1));
cd81e915 5616 CURMAD('_', PL_thiswhite);
5db06880 5617 }
79072805 5618 force_next('}');
5db06880 5619#ifdef PERL_MAD
cd81e915 5620 if (!PL_thistoken)
6b29d1f5 5621 PL_thistoken = newSVpvs("");
5db06880 5622#endif
79072805 5623 TOKEN(';');
378cc40b
LW
5624 case '&':
5625 s++;
90771dc0 5626 if (*s++ == '&')
a0d0e21e 5627 AOPERATOR(ANDAND);
378cc40b 5628 s--;
3280af22 5629 if (PL_expect == XOPERATOR) {
041457d9
DM
5630 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5631 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5632 {
57843af0 5633 CopLINE_dec(PL_curcop);
f1f66076 5634 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5635 CopLINE_inc(PL_curcop);
463ee0b2 5636 }
79072805 5637 BAop(OP_BIT_AND);
463ee0b2 5638 }
79072805 5639
3280af22
NIS
5640 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5641 if (*PL_tokenbuf) {
5642 PL_expect = XOPERATOR;
5643 force_ident(PL_tokenbuf, '&');
463ee0b2 5644 }
79072805
LW
5645 else
5646 PREREF('&');
6154021b 5647 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5648 TERM('&');
5649
378cc40b
LW
5650 case '|':
5651 s++;
90771dc0 5652 if (*s++ == '|')
a0d0e21e 5653 AOPERATOR(OROR);
378cc40b 5654 s--;
79072805 5655 BOop(OP_BIT_OR);
378cc40b
LW
5656 case '=':
5657 s++;
748a9306 5658 {
90771dc0
NC
5659 const char tmp = *s++;
5660 if (tmp == '=')
5661 Eop(OP_EQ);
5662 if (tmp == '>')
5663 OPERATOR(',');
5664 if (tmp == '~')
5665 PMop(OP_MATCH);
5666 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5667 && strchr("+-*/%.^&|<",tmp))
5668 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5669 "Reversed %c= operator",(int)tmp);
5670 s--;
5671 if (PL_expect == XSTATE && isALPHA(tmp) &&
5672 (s == PL_linestart+1 || s[-2] == '\n') )
5673 {
5674 if (PL_in_eval && !PL_rsfp) {
5675 d = PL_bufend;
5676 while (s < d) {
5677 if (*s++ == '\n') {
5678 incline(s);
5679 if (strnEQ(s,"=cut",4)) {
5680 s = strchr(s,'\n');
5681 if (s)
5682 s++;
5683 else
5684 s = d;
5685 incline(s);
5686 goto retry;
5687 }
5688 }
a5f75d66 5689 }
90771dc0 5690 goto retry;
a5f75d66 5691 }
5db06880
NC
5692#ifdef PERL_MAD
5693 if (PL_madskills) {
cd81e915 5694 if (!PL_thiswhite)
6b29d1f5 5695 PL_thiswhite = newSVpvs("");
cd81e915 5696 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5697 PL_bufend - PL_linestart);
5698 }
5699#endif
90771dc0 5700 s = PL_bufend;
737c24fc 5701 PL_parser->in_pod = 1;
90771dc0 5702 goto retry;
a5f75d66 5703 }
a0d0e21e 5704 }
3280af22 5705 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5706 const char *t = s;
51882d45 5707#ifdef PERL_STRICT_CR
c35e046a 5708 while (SPACE_OR_TAB(*t))
51882d45 5709#else
c35e046a 5710 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5711#endif
c35e046a 5712 t++;
a0d0e21e
LW
5713 if (*t == '\n' || *t == '#') {
5714 s--;
3280af22 5715 PL_expect = XBLOCK;
a0d0e21e
LW
5716 goto leftbracket;
5717 }
79072805 5718 }
6154021b 5719 pl_yylval.ival = 0;
a0d0e21e 5720 OPERATOR(ASSIGNOP);
378cc40b
LW
5721 case '!':
5722 s++;
90771dc0
NC
5723 {
5724 const char tmp = *s++;
5725 if (tmp == '=') {
5726 /* was this !=~ where !~ was meant?
5727 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5728
5729 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5730 const char *t = s+1;
5731
5732 while (t < PL_bufend && isSPACE(*t))
5733 ++t;
5734
5735 if (*t == '/' || *t == '?' ||
5736 ((*t == 'm' || *t == 's' || *t == 'y')
5737 && !isALNUM(t[1])) ||
5738 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5739 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5740 "!=~ should be !~");
5741 }
5742 Eop(OP_NE);
5743 }
5744 if (tmp == '~')
5745 PMop(OP_NOT);
5746 }
378cc40b
LW
5747 s--;
5748 OPERATOR('!');
5749 case '<':
3280af22 5750 if (PL_expect != XOPERATOR) {
93a17b20 5751 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5752 check_uni();
79072805
LW
5753 if (s[1] == '<')
5754 s = scan_heredoc(s);
5755 else
5756 s = scan_inputsymbol(s);
5757 TERM(sublex_start());
378cc40b
LW
5758 }
5759 s++;
90771dc0
NC
5760 {
5761 char tmp = *s++;
5762 if (tmp == '<')
5763 SHop(OP_LEFT_SHIFT);
5764 if (tmp == '=') {
5765 tmp = *s++;
5766 if (tmp == '>')
5767 Eop(OP_NCMP);
5768 s--;
5769 Rop(OP_LE);
5770 }
395c3793 5771 }
378cc40b 5772 s--;
79072805 5773 Rop(OP_LT);
378cc40b
LW
5774 case '>':
5775 s++;
90771dc0
NC
5776 {
5777 const char tmp = *s++;
5778 if (tmp == '>')
5779 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5780 else if (tmp == '=')
90771dc0
NC
5781 Rop(OP_GE);
5782 }
378cc40b 5783 s--;
79072805 5784 Rop(OP_GT);
378cc40b
LW
5785
5786 case '$':
bbce6d69 5787 CLINE;
5788
3280af22
NIS
5789 if (PL_expect == XOPERATOR) {
5790 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5791 return deprecate_commaless_var_list();
a0d0e21e 5792 }
8990e307 5793 }
a0d0e21e 5794
c0b977fd 5795 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5796 PL_tokenbuf[0] = '@';
376b8730
SM
5797 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5798 sizeof PL_tokenbuf - 1, FALSE);
5799 if (PL_expect == XOPERATOR)
5800 no_op("Array length", s);
3280af22 5801 if (!PL_tokenbuf[1])
a0d0e21e 5802 PREREF(DOLSHARP);
3280af22
NIS
5803 PL_expect = XOPERATOR;
5804 PL_pending_ident = '#';
463ee0b2 5805 TOKEN(DOLSHARP);
79072805 5806 }
bbce6d69 5807
3280af22 5808 PL_tokenbuf[0] = '$';
376b8730
SM
5809 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5810 sizeof PL_tokenbuf - 1, FALSE);
5811 if (PL_expect == XOPERATOR)
5812 no_op("Scalar", s);
3280af22
NIS
5813 if (!PL_tokenbuf[1]) {
5814 if (s == PL_bufend)
bbce6d69 5815 yyerror("Final $ should be \\$ or $name");
5816 PREREF('$');
8990e307 5817 }
a0d0e21e 5818
bbce6d69 5819 /* This kludge not intended to be bulletproof. */
3280af22 5820 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5821 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5822 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5823 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5824 TERM(THING);
5825 }
5826
ff68c719 5827 d = s;
90771dc0
NC
5828 {
5829 const char tmp = *s;
ae28bb2a 5830 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5831 s = SKIPSPACE1(s);
ff68c719 5832
90771dc0
NC
5833 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5834 && intuit_more(s)) {
5835 if (*s == '[') {
5836 PL_tokenbuf[0] = '@';
5837 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5838 char *t = s+1;
5839
5840 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5841 t++;
90771dc0 5842 if (*t++ == ',') {
29595ff2 5843 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5844 while (t < PL_bufend && *t != ']')
5845 t++;
9014280d 5846 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5847 "Multidimensional syntax %.*s not supported",
36c7798d 5848 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5849 }
748a9306 5850 }
93a17b20 5851 }
90771dc0
NC
5852 else if (*s == '{') {
5853 char *t;
5854 PL_tokenbuf[0] = '%';
5855 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5856 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5857 {
5858 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5859 do {
5860 t++;
5861 } while (isSPACE(*t));
90771dc0 5862 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5863 STRLEN len;
90771dc0 5864 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5865 &len);
c35e046a
AL
5866 while (isSPACE(*t))
5867 t++;
780a5241 5868 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5869 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5870 "You need to quote \"%s\"",
5871 tmpbuf);
5872 }
5873 }
5874 }
93a17b20 5875 }
bbce6d69 5876
90771dc0
NC
5877 PL_expect = XOPERATOR;
5878 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5879 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5880 if (!islop || PL_last_lop_op == OP_GREPSTART)
5881 PL_expect = XOPERATOR;
5882 else if (strchr("$@\"'`q", *s))
5883 PL_expect = XTERM; /* e.g. print $fh "foo" */
5884 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5885 PL_expect = XTERM; /* e.g. print $fh &sub */
5886 else if (isIDFIRST_lazy_if(s,UTF)) {
5887 char tmpbuf[sizeof PL_tokenbuf];
5888 int t2;
5889 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5890 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5891 /* binary operators exclude handle interpretations */
5892 switch (t2) {
5893 case -KEY_x:
5894 case -KEY_eq:
5895 case -KEY_ne:
5896 case -KEY_gt:
5897 case -KEY_lt:
5898 case -KEY_ge:
5899 case -KEY_le:
5900 case -KEY_cmp:
5901 break;
5902 default:
5903 PL_expect = XTERM; /* e.g. print $fh length() */
5904 break;
5905 }
5906 }
5907 else {
5908 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5909 }
5910 }
90771dc0
NC
5911 else if (isDIGIT(*s))
5912 PL_expect = XTERM; /* e.g. print $fh 3 */
5913 else if (*s == '.' && isDIGIT(s[1]))
5914 PL_expect = XTERM; /* e.g. print $fh .3 */
5915 else if ((*s == '?' || *s == '-' || *s == '+')
5916 && !isSPACE(s[1]) && s[1] != '=')
5917 PL_expect = XTERM; /* e.g. print $fh -1 */
5918 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5919 && s[1] != '/')
5920 PL_expect = XTERM; /* e.g. print $fh /.../
5921 XXX except DORDOR operator
5922 */
5923 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5924 && s[2] != '=')
5925 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5926 }
bbce6d69 5927 }
3280af22 5928 PL_pending_ident = '$';
79072805 5929 TOKEN('$');
378cc40b
LW
5930
5931 case '@':
3280af22 5932 if (PL_expect == XOPERATOR)
bbce6d69 5933 no_op("Array", s);
3280af22
NIS
5934 PL_tokenbuf[0] = '@';
5935 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5936 if (!PL_tokenbuf[1]) {
bbce6d69 5937 PREREF('@');
5938 }
3280af22 5939 if (PL_lex_state == LEX_NORMAL)
29595ff2 5940 s = SKIPSPACE1(s);
3280af22 5941 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5942 if (*s == '{')
3280af22 5943 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5944
5945 /* Warn about @ where they meant $. */
041457d9
DM
5946 if (*s == '[' || *s == '{') {
5947 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5948 const char *t = s + 1;
7e2040f0 5949 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5950 t++;
5951 if (*t == '}' || *t == ']') {
5952 t++;
29595ff2 5953 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5954 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5955 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5956 (int)(t-PL_bufptr), PL_bufptr,
5957 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5958 }
93a17b20
LW
5959 }
5960 }
463ee0b2 5961 }
3280af22 5962 PL_pending_ident = '@';
79072805 5963 TERM('@');
378cc40b 5964
c963b151 5965 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5966 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5967 s += 2;
5968 AOPERATOR(DORDOR);
5969 }
c963b151 5970 case '?': /* may either be conditional or pattern */
be25f609 5971 if (PL_expect == XOPERATOR) {
90771dc0 5972 char tmp = *s++;
c963b151 5973 if(tmp == '?') {
be25f609 5974 OPERATOR('?');
c963b151
BD
5975 }
5976 else {
5977 tmp = *s++;
5978 if(tmp == '/') {
5979 /* A // operator. */
5980 AOPERATOR(DORDOR);
5981 }
5982 else {
5983 s--;
5984 Mop(OP_DIVIDE);
5985 }
5986 }
5987 }
5988 else {
5989 /* Disable warning on "study /blah/" */
5990 if (PL_oldoldbufptr == PL_last_uni
5991 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5992 || memNE(PL_last_uni, "study", 5)
5993 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5994 ))
5995 check_uni();
5996 s = scan_pat(s,OP_MATCH);
5997 TERM(sublex_start());
5998 }
378cc40b
LW
5999
6000 case '.':
51882d45
GS
6001 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6002#ifdef PERL_STRICT_CR
6003 && s[1] == '\n'
6004#else
6005 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6006#endif
6007 && (s == PL_linestart || s[-1] == '\n') )
6008 {
3280af22
NIS
6009 PL_lex_formbrack = 0;
6010 PL_expect = XSTATE;
79072805
LW
6011 goto rightbracket;
6012 }
be25f609 6013 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6014 s += 3;
6015 OPERATOR(YADAYADA);
6016 }
3280af22 6017 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6018 char tmp = *s++;
a687059c
LW
6019 if (*s == tmp) {
6020 s++;
2f3197b3
LW
6021 if (*s == tmp) {
6022 s++;
6154021b 6023 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6024 }
6025 else
6154021b 6026 pl_yylval.ival = 0;
378cc40b 6027 OPERATOR(DOTDOT);
a687059c 6028 }
79072805 6029 Aop(OP_CONCAT);
378cc40b
LW
6030 }
6031 /* FALL THROUGH */
6032 case '0': case '1': case '2': case '3': case '4':
6033 case '5': case '6': case '7': case '8': case '9':
6154021b 6034 s = scan_num(s, &pl_yylval);
931e0695 6035 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6036 if (PL_expect == XOPERATOR)
8990e307 6037 no_op("Number",s);
79072805
LW
6038 TERM(THING);
6039
6040 case '\'':
5db06880 6041 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6042 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6043 if (PL_expect == XOPERATOR) {
6044 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6045 return deprecate_commaless_var_list();
a0d0e21e 6046 }
463ee0b2 6047 else
8990e307 6048 no_op("String",s);
463ee0b2 6049 }
79072805 6050 if (!s)
d4c19fe8 6051 missingterm(NULL);
6154021b 6052 pl_yylval.ival = OP_CONST;
79072805
LW
6053 TERM(sublex_start());
6054
6055 case '"':
5db06880 6056 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6057 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6058 if (PL_expect == XOPERATOR) {
6059 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6060 return deprecate_commaless_var_list();
a0d0e21e 6061 }
463ee0b2 6062 else
8990e307 6063 no_op("String",s);
463ee0b2 6064 }
79072805 6065 if (!s)
d4c19fe8 6066 missingterm(NULL);
6154021b 6067 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6068 /* FIXME. I think that this can be const if char *d is replaced by
6069 more localised variables. */
3280af22 6070 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6071 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6072 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6073 break;
6074 }
6075 }
79072805
LW
6076 TERM(sublex_start());
6077
6078 case '`':
5db06880 6079 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6080 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6081 if (PL_expect == XOPERATOR)
8990e307 6082 no_op("Backticks",s);
79072805 6083 if (!s)
d4c19fe8 6084 missingterm(NULL);
9b201d7d 6085 readpipe_override();
79072805
LW
6086 TERM(sublex_start());
6087
6088 case '\\':
6089 s++;
a2a5de95
NC
6090 if (PL_lex_inwhat && isDIGIT(*s))
6091 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6092 *s, *s);
3280af22 6093 if (PL_expect == XOPERATOR)
8990e307 6094 no_op("Backslash",s);
79072805
LW
6095 OPERATOR(REFGEN);
6096
a7cb1f99 6097 case 'v':
e526c9e6 6098 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6099 char *start = s + 2;
dd629d5b 6100 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6101 start++;
6102 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6103 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6104 TERM(THING);
6105 }
e526c9e6 6106 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6107 else if (!isALPHA(*start) && (PL_expect == XTERM
6108 || PL_expect == XREF || PL_expect == XSTATE
6109 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6110 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6111 if (!gv) {
6154021b 6112 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6113 TERM(THING);
6114 }
6115 }
a7cb1f99
GS
6116 }
6117 goto keylookup;
79072805 6118 case 'x':
3280af22 6119 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6120 s++;
6121 Mop(OP_REPEAT);
2f3197b3 6122 }
79072805
LW
6123 goto keylookup;
6124
378cc40b 6125 case '_':
79072805
LW
6126 case 'a': case 'A':
6127 case 'b': case 'B':
6128 case 'c': case 'C':
6129 case 'd': case 'D':
6130 case 'e': case 'E':
6131 case 'f': case 'F':
6132 case 'g': case 'G':
6133 case 'h': case 'H':
6134 case 'i': case 'I':
6135 case 'j': case 'J':
6136 case 'k': case 'K':
6137 case 'l': case 'L':
6138 case 'm': case 'M':
6139 case 'n': case 'N':
6140 case 'o': case 'O':
6141 case 'p': case 'P':
6142 case 'q': case 'Q':
6143 case 'r': case 'R':
6144 case 's': case 'S':
6145 case 't': case 'T':
6146 case 'u': case 'U':
a7cb1f99 6147 case 'V':
79072805
LW
6148 case 'w': case 'W':
6149 case 'X':
6150 case 'y': case 'Y':
6151 case 'z': case 'Z':
6152
49dc05e3 6153 keylookup: {
88e1f1a2 6154 bool anydelim;
90771dc0 6155 I32 tmp;
10edeb5d
JH
6156
6157 orig_keyword = 0;
6158 gv = NULL;
6159 gvp = NULL;
49dc05e3 6160
3280af22
NIS
6161 PL_bufptr = s;
6162 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6163
6164 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6165 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6166
6167 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6168 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6169 goto just_a_word;
6170
3643fb5f 6171 d = s;
3280af22 6172 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6173 d++; /* no comments skipped here, or s### is misparsed */
6174
748a9306 6175 /* Is this a word before a => operator? */
1c3923b3 6176 if (*d == '=' && d[1] == '>') {
748a9306 6177 CLINE;
6154021b 6178 pl_yylval.opval
d0a148a6
NC
6179 = (OP*)newSVOP(OP_CONST, 0,
6180 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6181 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6182 TERM(WORD);
6183 }
6184
88e1f1a2
JV
6185 /* Check for plugged-in keyword */
6186 {
6187 OP *o;
6188 int result;
6189 char *saved_bufptr = PL_bufptr;
6190 PL_bufptr = s;
16c91539 6191 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6192 s = PL_bufptr;
6193 if (result == KEYWORD_PLUGIN_DECLINE) {
6194 /* not a plugged-in keyword */
6195 PL_bufptr = saved_bufptr;
6196 } else if (result == KEYWORD_PLUGIN_STMT) {
6197 pl_yylval.opval = o;
6198 CLINE;
6199 PL_expect = XSTATE;
6200 return REPORT(PLUGSTMT);
6201 } else if (result == KEYWORD_PLUGIN_EXPR) {
6202 pl_yylval.opval = o;
6203 CLINE;
6204 PL_expect = XOPERATOR;
6205 return REPORT(PLUGEXPR);
6206 } else {
6207 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6208 PL_tokenbuf);
6209 }
6210 }
6211
6212 /* Check for built-in keyword */
6213 tmp = keyword(PL_tokenbuf, len, 0);
6214
6215 /* Is this a label? */
6216 if (!anydelim && PL_expect == XSTATE
6217 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6218 s = d + 1;
6219 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6220 CLINE;
6221 TOKEN(LABEL);
6222 }
6223
a0d0e21e 6224 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6225 GV *ogv = NULL; /* override (winner) */
6226 GV *hgv = NULL; /* hidden (loser) */
3280af22 6227 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6228 CV *cv;
90e5519e 6229 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6230 (cv = GvCVu(gv)))
6231 {
6232 if (GvIMPORTED_CV(gv))
6233 ogv = gv;
6234 else if (! CvMETHOD(cv))
6235 hgv = gv;
6236 }
6237 if (!ogv &&
3280af22 6238 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6239 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6240 GvCVu(gv) && GvIMPORTED_CV(gv))
6241 {
6242 ogv = gv;
6243 }
6244 }
6245 if (ogv) {
30fe34ed 6246 orig_keyword = tmp;
56f7f34b 6247 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6248 }
6249 else if (gv && !gvp
6250 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6251 && GvCVu(gv))
6e7b2336
GS
6252 {
6253 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6254 }
56f7f34b
CS
6255 else { /* no override */
6256 tmp = -tmp;
a2a5de95
NC
6257 if (tmp == KEY_dump) {
6258 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6259 "dump() better written as CORE::dump()");
ac206dc8 6260 }
a0714e2c 6261 gv = NULL;
56f7f34b 6262 gvp = 0;
a2a5de95
NC
6263 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6264 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6265 "Ambiguous call resolved as CORE::%s(), "
6266 "qualify as such or use &",
6267 GvENAME(hgv));
49dc05e3 6268 }
a0d0e21e
LW
6269 }
6270
6271 reserved_word:
6272 switch (tmp) {
79072805
LW
6273
6274 default: /* not a keyword */
0bfa2a8a
NC
6275 /* Trade off - by using this evil construction we can pull the
6276 variable gv into the block labelled keylookup. If not, then
6277 we have to give it function scope so that the goto from the
6278 earlier ':' case doesn't bypass the initialisation. */
6279 if (0) {
6280 just_a_word_zero_gv:
6281 gv = NULL;
6282 gvp = NULL;
8bee0991 6283 orig_keyword = 0;
0bfa2a8a 6284 }
93a17b20 6285 just_a_word: {
96e4d5b1 6286 SV *sv;
ce29ac45 6287 int pkgname = 0;
f54cb97a 6288 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6289 OP *rv2cv_op;
5069cc75 6290 CV *cv;
5db06880 6291#ifdef PERL_MAD
cd81e915 6292 SV *nextPL_nextwhite = 0;
5db06880
NC
6293#endif
6294
8990e307
LW
6295
6296 /* Get the rest if it looks like a package qualifier */
6297
155aba94 6298 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6299 STRLEN morelen;
3280af22 6300 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6301 TRUE, &morelen);
6302 if (!morelen)
cea2e8a9 6303 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6304 *s == '\'' ? "'" : "::");
c3e0f903 6305 len += morelen;
ce29ac45 6306 pkgname = 1;
a0d0e21e 6307 }
8990e307 6308
3280af22
NIS
6309 if (PL_expect == XOPERATOR) {
6310 if (PL_bufptr == PL_linestart) {
57843af0 6311 CopLINE_dec(PL_curcop);
f1f66076 6312 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6313 CopLINE_inc(PL_curcop);
463ee0b2
LW
6314 }
6315 else
54310121 6316 no_op("Bareword",s);
463ee0b2 6317 }
8990e307 6318
c3e0f903
GS
6319 /* Look for a subroutine with this name in current package,
6320 unless name is "Foo::", in which case Foo is a bearword
6321 (and a package name). */
6322
5db06880 6323 if (len > 2 && !PL_madskills &&
3280af22 6324 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6325 {
f776e3cd 6326 if (ckWARN(WARN_BAREWORD)
90e5519e 6327 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6328 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6329 "Bareword \"%s\" refers to nonexistent package",
3280af22 6330 PL_tokenbuf);
c3e0f903 6331 len -= 2;
3280af22 6332 PL_tokenbuf[len] = '\0';
a0714e2c 6333 gv = NULL;
c3e0f903
GS
6334 gvp = 0;
6335 }
6336 else {
62d55b22
NC
6337 if (!gv) {
6338 /* Mustn't actually add anything to a symbol table.
6339 But also don't want to "initialise" any placeholder
6340 constants that might already be there into full
6341 blown PVGVs with attached PVCV. */
90e5519e
NC
6342 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6343 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6344 }
b3d904f3 6345 len = 0;
c3e0f903
GS
6346 }
6347
6348 /* if we saw a global override before, get the right name */
8990e307 6349
37bb7629
EB
6350 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6351 len ? len : strlen(PL_tokenbuf));
49dc05e3 6352 if (gvp) {
37bb7629 6353 SV * const tmp_sv = sv;
396482e1 6354 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6355 sv_catsv(sv, tmp_sv);
6356 SvREFCNT_dec(tmp_sv);
8a7a129d 6357 }
37bb7629 6358
5db06880 6359#ifdef PERL_MAD
cd81e915
NC
6360 if (PL_madskills && !PL_thistoken) {
6361 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6362 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6363 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6364 }
6365#endif
8990e307 6366
a0d0e21e 6367 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6368 CLINE;
6154021b
RGS
6369 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6370 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6371
c3e0f903 6372 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6373 if (len)
6374 goto safe_bareword;
6375
f7461760
Z
6376 {
6377 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6378 const_op->op_private = OPpCONST_BARE;
6379 rv2cv_op = newCVREF(0, const_op);
6380 }
d9088386 6381 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6382
8990e307
LW
6383 /* See if it's the indirect object for a list operator. */
6384
3280af22
NIS
6385 if (PL_oldoldbufptr &&
6386 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6387 (PL_oldoldbufptr == PL_last_lop
6388 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6389 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6390 (PL_expect == XREF ||
6391 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6392 {
748a9306
LW
6393 bool immediate_paren = *s == '(';
6394
a0d0e21e 6395 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6396 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6397#ifdef PERL_MAD
cd81e915 6398 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6399#endif
a0d0e21e
LW
6400
6401 /* Two barewords in a row may indicate method call. */
6402
62d55b22 6403 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6404 (tmp = intuit_method(s, gv, cv))) {
6405 op_free(rv2cv_op);
bbf60fe6 6406 return REPORT(tmp);
f7461760 6407 }
a0d0e21e
LW
6408
6409 /* If not a declared subroutine, it's an indirect object. */
6410 /* (But it's an indir obj regardless for sort.) */
7294df96 6411 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6412
7294df96
RGS
6413 if (
6414 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6415 (!cv &&
a9ef352a 6416 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6417 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6418 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6419 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6420 )
a9ef352a 6421 {
3280af22 6422 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6423 goto bareword;
93a17b20
LW
6424 }
6425 }
8990e307 6426
3280af22 6427 PL_expect = XOPERATOR;
5db06880
NC
6428#ifdef PERL_MAD
6429 if (isSPACE(*s))
cd81e915
NC
6430 s = SKIPSPACE2(s,nextPL_nextwhite);
6431 PL_nextwhite = nextPL_nextwhite;
5db06880 6432#else
8990e307 6433 s = skipspace(s);
5db06880 6434#endif
1c3923b3
GS
6435
6436 /* Is this a word before a => operator? */
ce29ac45 6437 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6438 op_free(rv2cv_op);
1c3923b3 6439 CLINE;
6154021b 6440 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6441 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6442 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6443 TERM(WORD);
6444 }
6445
6446 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6447 if (*s == '(') {
79072805 6448 CLINE;
5069cc75 6449 if (cv) {
c35e046a
AL
6450 d = s + 1;
6451 while (SPACE_OR_TAB(*d))
6452 d++;
f7461760 6453 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6454 s = d + 1;
c631f32b 6455 goto its_constant;
96e4d5b1 6456 }
6457 }
5db06880
NC
6458#ifdef PERL_MAD
6459 if (PL_madskills) {
cd81e915
NC
6460 PL_nextwhite = PL_thiswhite;
6461 PL_thiswhite = 0;
5db06880 6462 }
cd81e915 6463 start_force(PL_curforce);
5db06880 6464#endif
6154021b 6465 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6466 PL_expect = XOPERATOR;
5db06880
NC
6467#ifdef PERL_MAD
6468 if (PL_madskills) {
cd81e915
NC
6469 PL_nextwhite = nextPL_nextwhite;
6470 curmad('X', PL_thistoken);
6b29d1f5 6471 PL_thistoken = newSVpvs("");
5db06880
NC
6472 }
6473#endif
f7461760 6474 op_free(rv2cv_op);
93a17b20 6475 force_next(WORD);
6154021b 6476 pl_yylval.ival = 0;
463ee0b2 6477 TOKEN('&');
79072805 6478 }
93a17b20 6479
a0d0e21e 6480 /* If followed by var or block, call it a method (unless sub) */
8990e307 6481
f7461760
Z
6482 if ((*s == '$' || *s == '{') && !cv) {
6483 op_free(rv2cv_op);
3280af22
NIS
6484 PL_last_lop = PL_oldbufptr;
6485 PL_last_lop_op = OP_METHOD;
93a17b20 6486 PREBLOCK(METHOD);
463ee0b2
LW
6487 }
6488
8990e307
LW
6489 /* If followed by a bareword, see if it looks like indir obj. */
6490
30fe34ed
RGS
6491 if (!orig_keyword
6492 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6493 && (tmp = intuit_method(s, gv, cv))) {
6494 op_free(rv2cv_op);
bbf60fe6 6495 return REPORT(tmp);
f7461760 6496 }
93a17b20 6497
8990e307
LW
6498 /* Not a method, so call it a subroutine (if defined) */
6499
5069cc75 6500 if (cv) {
9b387841
NC
6501 if (lastchar == '-')
6502 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6503 "Ambiguous use of -%s resolved as -&%s()",
6504 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6505 /* Check for a constant sub */
f7461760 6506 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6507 its_constant:
f7461760 6508 op_free(rv2cv_op);
6154021b
RGS
6509 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6510 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6511 pl_yylval.opval->op_private = 0;
96e4d5b1 6512 TOKEN(WORD);
89bfa8cd 6513 }
6514
6154021b 6515 op_free(pl_yylval.opval);
f7461760 6516 pl_yylval.opval = rv2cv_op;
6154021b 6517 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6518 PL_last_lop = PL_oldbufptr;
bf848113 6519 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6520 /* Is there a prototype? */
5db06880
NC
6521 if (
6522#ifdef PERL_MAD
6523 cv &&
6524#endif
d9f2850e
RGS
6525 SvPOK(cv))
6526 {
5f66b61c 6527 STRLEN protolen;
daba3364 6528 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6529 if (!protolen)
4633a7c4 6530 TERM(FUNC0SUB);
0f5d0394
AE
6531 while (*proto == ';')
6532 proto++;
649d02de
FC
6533 if (
6534 (
6535 (
6536 *proto == '$' || *proto == '_'
c035a075 6537 || *proto == '*' || *proto == '+'
649d02de
FC
6538 )
6539 && proto[1] == '\0'
6540 )
6541 || (
6542 *proto == '\\' && proto[1] && proto[2] == '\0'
6543 )
6544 )
6545 OPERATOR(UNIOPSUB);
6546 if (*proto == '\\' && proto[1] == '[') {
6547 const char *p = proto + 2;
6548 while(*p && *p != ']')
6549 ++p;
6550 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6551 }
7a52d87a 6552 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6553 if (PL_curstash)
6554 sv_setpvs(PL_subname, "__ANON__");
6555 else
6556 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6557 PREBLOCK(LSTOPSUB);
6558 }
a9ef352a 6559 }
5db06880
NC
6560#ifdef PERL_MAD
6561 {
6562 if (PL_madskills) {
cd81e915
NC
6563 PL_nextwhite = PL_thiswhite;
6564 PL_thiswhite = 0;
5db06880 6565 }
cd81e915 6566 start_force(PL_curforce);
6154021b 6567 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6568 PL_expect = XTERM;
6569 if (PL_madskills) {
cd81e915
NC
6570 PL_nextwhite = nextPL_nextwhite;
6571 curmad('X', PL_thistoken);
6b29d1f5 6572 PL_thistoken = newSVpvs("");
5db06880
NC
6573 }
6574 force_next(WORD);
6575 TOKEN(NOAMP);
6576 }
6577 }
6578
6579 /* Guess harder when madskills require "best effort". */
6580 if (PL_madskills && (!gv || !GvCVu(gv))) {
6581 int probable_sub = 0;
6582 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6583 probable_sub = 1;
6584 else if (isALPHA(*s)) {
6585 char tmpbuf[1024];
6586 STRLEN tmplen;
6587 d = s;
6588 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6589 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6590 probable_sub = 1;
6591 else {
6592 while (d < PL_bufend && isSPACE(*d))
6593 d++;
6594 if (*d == '=' && d[1] == '>')
6595 probable_sub = 1;
6596 }
6597 }
6598 if (probable_sub) {
7a6d04f4 6599 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6600 op_free(pl_yylval.opval);
f7461760 6601 pl_yylval.opval = rv2cv_op;
6154021b 6602 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6603 PL_last_lop = PL_oldbufptr;
6604 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6605 PL_nextwhite = PL_thiswhite;
6606 PL_thiswhite = 0;
6607 start_force(PL_curforce);
6154021b 6608 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6609 PL_expect = XTERM;
cd81e915
NC
6610 PL_nextwhite = nextPL_nextwhite;
6611 curmad('X', PL_thistoken);
6b29d1f5 6612 PL_thistoken = newSVpvs("");
5db06880
NC
6613 force_next(WORD);
6614 TOKEN(NOAMP);
6615 }
6616#else
6154021b 6617 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6618 PL_expect = XTERM;
8990e307
LW
6619 force_next(WORD);
6620 TOKEN(NOAMP);
5db06880 6621#endif
8990e307 6622 }
748a9306 6623
8990e307
LW
6624 /* Call it a bare word */
6625
5603f27d 6626 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6627 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6628 else {
9a073a1d
RGS
6629 bareword:
6630 /* after "print" and similar functions (corresponding to
6631 * "F? L" in opcode.pl), whatever wasn't already parsed as
6632 * a filehandle should be subject to "strict subs".
6633 * Likewise for the optional indirect-object argument to system
6634 * or exec, which can't be a bareword */
6635 if ((PL_last_lop_op == OP_PRINT
6636 || PL_last_lop_op == OP_PRTF
6637 || PL_last_lop_op == OP_SAY
6638 || PL_last_lop_op == OP_SYSTEM
6639 || PL_last_lop_op == OP_EXEC)
6640 && (PL_hints & HINT_STRICT_SUBS))
6641 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6642 if (lastchar != '-') {
6643 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6644 d = PL_tokenbuf;
6645 while (isLOWER(*d))
6646 d++;
da51bb9b 6647 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6648 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6649 PL_tokenbuf);
6650 }
748a9306
LW
6651 }
6652 }
f7461760 6653 op_free(rv2cv_op);
c3e0f903
GS
6654
6655 safe_bareword:
9b387841
NC
6656 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6657 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6658 "Operator or semicolon missing before %c%s",
6659 lastchar, PL_tokenbuf);
6660 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6661 "Ambiguous use of %c resolved as operator %c",
6662 lastchar, lastchar);
748a9306 6663 }
93a17b20 6664 TOKEN(WORD);
79072805 6665 }
79072805 6666
68dc0745 6667 case KEY___FILE__:
6154021b 6668 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6669 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6670 TERM(THING);
6671
79072805 6672 case KEY___LINE__:
6154021b 6673 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6674 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6675 TERM(THING);
68dc0745 6676
6677 case KEY___PACKAGE__:
6154021b 6678 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6679 (PL_curstash
5aaec2b4 6680 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6681 : &PL_sv_undef));
79072805 6682 TERM(THING);
79072805 6683
e50aee73 6684 case KEY___DATA__:
79072805
LW
6685 case KEY___END__: {
6686 GV *gv;
3280af22 6687 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6688 const char *pname = "main";
3280af22 6689 if (PL_tokenbuf[2] == 'D')
bfcb3514 6690 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6691 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6692 SVt_PVIO);
a5f75d66 6693 GvMULTI_on(gv);
79072805 6694 if (!GvIO(gv))
a0d0e21e 6695 GvIOp(gv) = newIO();
3280af22 6696 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6697#if defined(HAS_FCNTL) && defined(F_SETFD)
6698 {
f54cb97a 6699 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6700 fcntl(fd,F_SETFD,fd >= 3);
6701 }
79072805 6702#endif
fd049845 6703 /* Mark this internal pseudo-handle as clean */
6704 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6705 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6706 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6707 else
50952442 6708 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6709#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6710 /* if the script was opened in binmode, we need to revert
53129d29 6711 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6712 * XXX this is a questionable hack at best. */
53129d29
GS
6713 if (PL_bufend-PL_bufptr > 2
6714 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6715 {
6716 Off_t loc = 0;
50952442 6717 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6718 loc = PerlIO_tell(PL_rsfp);
6719 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6720 }
2986a63f
JH
6721#ifdef NETWARE
6722 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6723#else
c39cd008 6724 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6725#endif /* NETWARE */
1143fce0
JH
6726#ifdef PERLIO_IS_STDIO /* really? */
6727# if defined(__BORLANDC__)
cb359b41
JH
6728 /* XXX see note in do_binmode() */
6729 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6730# endif
6731#endif
c39cd008
GS
6732 if (loc > 0)
6733 PerlIO_seek(PL_rsfp, loc, 0);
6734 }
6735 }
6736#endif
7948272d 6737#ifdef PERLIO_LAYERS
52d2e0f4
JH
6738 if (!IN_BYTES) {
6739 if (UTF)
6740 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6741 else if (PL_encoding) {
6742 SV *name;
6743 dSP;
6744 ENTER;
6745 SAVETMPS;
6746 PUSHMARK(sp);
6747 EXTEND(SP, 1);
6748 XPUSHs(PL_encoding);
6749 PUTBACK;
6750 call_method("name", G_SCALAR);
6751 SPAGAIN;
6752 name = POPs;
6753 PUTBACK;
bfed75c6 6754 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6755 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6756 SVfARG(name)));
52d2e0f4
JH
6757 FREETMPS;
6758 LEAVE;
6759 }
6760 }
7948272d 6761#endif
5db06880
NC
6762#ifdef PERL_MAD
6763 if (PL_madskills) {
cd81e915
NC
6764 if (PL_realtokenstart >= 0) {
6765 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6766 if (!PL_endwhite)
6b29d1f5 6767 PL_endwhite = newSVpvs("");
cd81e915
NC
6768 sv_catsv(PL_endwhite, PL_thiswhite);
6769 PL_thiswhite = 0;
6770 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6771 PL_realtokenstart = -1;
5db06880 6772 }
5cc814fd
NC
6773 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6774 != NULL) ;
5db06880
NC
6775 }
6776#endif
4608196e 6777 PL_rsfp = NULL;
79072805
LW
6778 }
6779 goto fake_eof;
e929a76b 6780 }
de3bb511 6781
8990e307 6782 case KEY_AUTOLOAD:
ed6116ce 6783 case KEY_DESTROY:
79072805 6784 case KEY_BEGIN:
3c10abe3 6785 case KEY_UNITCHECK:
7d30b5c4 6786 case KEY_CHECK:
7d07dbc2 6787 case KEY_INIT:
7d30b5c4 6788 case KEY_END:
3280af22
NIS
6789 if (PL_expect == XSTATE) {
6790 s = PL_bufptr;
93a17b20 6791 goto really_sub;
79072805
LW
6792 }
6793 goto just_a_word;
6794
a0d0e21e
LW
6795 case KEY_CORE:
6796 if (*s == ':' && s[1] == ':') {
6797 s += 2;
748a9306 6798 d = s;
3280af22 6799 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6800 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6801 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6802 if (tmp < 0)
6803 tmp = -tmp;
850e8516 6804 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6805 /* that's a way to remember we saw "CORE::" */
850e8516 6806 orig_keyword = tmp;
a0d0e21e
LW
6807 goto reserved_word;
6808 }
6809 goto just_a_word;
6810
463ee0b2
LW
6811 case KEY_abs:
6812 UNI(OP_ABS);
6813
79072805
LW
6814 case KEY_alarm:
6815 UNI(OP_ALARM);
6816
6817 case KEY_accept:
a0d0e21e 6818 LOP(OP_ACCEPT,XTERM);
79072805 6819
463ee0b2
LW
6820 case KEY_and:
6821 OPERATOR(ANDOP);
6822
79072805 6823 case KEY_atan2:
a0d0e21e 6824 LOP(OP_ATAN2,XTERM);
85e6fe83 6825
79072805 6826 case KEY_bind:
a0d0e21e 6827 LOP(OP_BIND,XTERM);
79072805
LW
6828
6829 case KEY_binmode:
1c1fc3ea 6830 LOP(OP_BINMODE,XTERM);
79072805
LW
6831
6832 case KEY_bless:
a0d0e21e 6833 LOP(OP_BLESS,XTERM);
79072805 6834
0d863452
RH
6835 case KEY_break:
6836 FUN0(OP_BREAK);
6837
79072805
LW
6838 case KEY_chop:
6839 UNI(OP_CHOP);
6840
6841 case KEY_continue:
0d863452
RH
6842 /* When 'use switch' is in effect, continue has a dual
6843 life as a control operator. */
6844 {
ef89dcc3 6845 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6846 PREBLOCK(CONTINUE);
6847 else {
6848 /* We have to disambiguate the two senses of
6849 "continue". If the next token is a '{' then
6850 treat it as the start of a continue block;
6851 otherwise treat it as a control operator.
6852 */
6853 s = skipspace(s);
6854 if (*s == '{')
79072805 6855 PREBLOCK(CONTINUE);
0d863452
RH
6856 else
6857 FUN0(OP_CONTINUE);
6858 }
6859 }
79072805
LW
6860
6861 case KEY_chdir:
fafc274c
NC
6862 /* may use HOME */
6863 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6864 UNI(OP_CHDIR);
6865
6866 case KEY_close:
6867 UNI(OP_CLOSE);
6868
6869 case KEY_closedir:
6870 UNI(OP_CLOSEDIR);
6871
6872 case KEY_cmp:
6873 Eop(OP_SCMP);
6874
6875 case KEY_caller:
6876 UNI(OP_CALLER);
6877
6878 case KEY_crypt:
6879#ifdef FCRYPT
f4c556ac
GS
6880 if (!PL_cryptseen) {
6881 PL_cryptseen = TRUE;
de3bb511 6882 init_des();
f4c556ac 6883 }
a687059c 6884#endif
a0d0e21e 6885 LOP(OP_CRYPT,XTERM);
79072805
LW
6886
6887 case KEY_chmod:
a0d0e21e 6888 LOP(OP_CHMOD,XTERM);
79072805
LW
6889
6890 case KEY_chown:
a0d0e21e 6891 LOP(OP_CHOWN,XTERM);
79072805
LW
6892
6893 case KEY_connect:
a0d0e21e 6894 LOP(OP_CONNECT,XTERM);
79072805 6895
463ee0b2
LW
6896 case KEY_chr:
6897 UNI(OP_CHR);
6898
79072805
LW
6899 case KEY_cos:
6900 UNI(OP_COS);
6901
6902 case KEY_chroot:
6903 UNI(OP_CHROOT);
6904
0d863452
RH
6905 case KEY_default:
6906 PREBLOCK(DEFAULT);
6907
79072805 6908 case KEY_do:
29595ff2 6909 s = SKIPSPACE1(s);
79072805 6910 if (*s == '{')
a0d0e21e 6911 PRETERMBLOCK(DO);
79072805 6912 if (*s != '\'')
89c5585f 6913 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6914 if (orig_keyword == KEY_do) {
6915 orig_keyword = 0;
6154021b 6916 pl_yylval.ival = 1;
850e8516
RGS
6917 }
6918 else
6154021b 6919 pl_yylval.ival = 0;
378cc40b 6920 OPERATOR(DO);
79072805
LW
6921
6922 case KEY_die:
3280af22 6923 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6924 LOP(OP_DIE,XTERM);
79072805
LW
6925
6926 case KEY_defined:
6927 UNI(OP_DEFINED);
6928
6929 case KEY_delete:
a0d0e21e 6930 UNI(OP_DELETE);
79072805
LW
6931
6932 case KEY_dbmopen:
74e8ce34
NC
6933 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
6934 STR_WITH_LEN("NDBM_File::"),
6935 STR_WITH_LEN("DB_File::"),
6936 STR_WITH_LEN("GDBM_File::"),
6937 STR_WITH_LEN("SDBM_File::"),
6938 STR_WITH_LEN("ODBM_File::"),
6939 NULL);
a0d0e21e 6940 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6941
6942 case KEY_dbmclose:
6943 UNI(OP_DBMCLOSE);
6944
6945 case KEY_dump:
a0d0e21e 6946 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6947 LOOPX(OP_DUMP);
6948
6949 case KEY_else:
6950 PREBLOCK(ELSE);
6951
6952 case KEY_elsif:
6154021b 6953 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6954 OPERATOR(ELSIF);
6955
6956 case KEY_eq:
6957 Eop(OP_SEQ);
6958
a0d0e21e
LW
6959 case KEY_exists:
6960 UNI(OP_EXISTS);
4e553d73 6961
79072805 6962 case KEY_exit:
5db06880
NC
6963 if (PL_madskills)
6964 UNI(OP_INT);
79072805
LW
6965 UNI(OP_EXIT);
6966
6967 case KEY_eval:
29595ff2 6968 s = SKIPSPACE1(s);
32e2a35d
RGS
6969 if (*s == '{') { /* block eval */
6970 PL_expect = XTERMBLOCK;
6971 UNIBRACK(OP_ENTERTRY);
6972 }
6973 else { /* string eval */
6974 PL_expect = XTERM;
6975 UNIBRACK(OP_ENTEREVAL);
6976 }
79072805
LW
6977
6978 case KEY_eof:
6979 UNI(OP_EOF);
6980
6981 case KEY_exp:
6982 UNI(OP_EXP);
6983
6984 case KEY_each:
6985 UNI(OP_EACH);
6986
6987 case KEY_exec:
a0d0e21e 6988 LOP(OP_EXEC,XREF);
79072805
LW
6989
6990 case KEY_endhostent:
6991 FUN0(OP_EHOSTENT);
6992
6993 case KEY_endnetent:
6994 FUN0(OP_ENETENT);
6995
6996 case KEY_endservent:
6997 FUN0(OP_ESERVENT);
6998
6999 case KEY_endprotoent:
7000 FUN0(OP_EPROTOENT);
7001
7002 case KEY_endpwent:
7003 FUN0(OP_EPWENT);
7004
7005 case KEY_endgrent:
7006 FUN0(OP_EGRENT);
7007
7008 case KEY_for:
7009 case KEY_foreach:
6154021b 7010 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7011 s = SKIPSPACE1(s);
7e2040f0 7012 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7013 char *p = s;
5db06880
NC
7014#ifdef PERL_MAD
7015 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7016#endif
7017
3280af22 7018 if ((PL_bufend - p) >= 3 &&
55497cff 7019 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7020 p += 2;
77ca0c92
LW
7021 else if ((PL_bufend - p) >= 4 &&
7022 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7023 p += 3;
29595ff2 7024 p = PEEKSPACE(p);
7e2040f0 7025 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7026 p = scan_ident(p, PL_bufend,
7027 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7028 p = PEEKSPACE(p);
77ca0c92
LW
7029 }
7030 if (*p != '$')
cea2e8a9 7031 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7032#ifdef PERL_MAD
7033 s = SvPVX(PL_linestr) + soff;
7034#endif
55497cff 7035 }
79072805
LW
7036 OPERATOR(FOR);
7037
7038 case KEY_formline:
a0d0e21e 7039 LOP(OP_FORMLINE,XTERM);
79072805
LW
7040
7041 case KEY_fork:
7042 FUN0(OP_FORK);
7043
7044 case KEY_fcntl:
a0d0e21e 7045 LOP(OP_FCNTL,XTERM);
79072805
LW
7046
7047 case KEY_fileno:
7048 UNI(OP_FILENO);
7049
7050 case KEY_flock:
a0d0e21e 7051 LOP(OP_FLOCK,XTERM);
79072805
LW
7052
7053 case KEY_gt:
7054 Rop(OP_SGT);
7055
7056 case KEY_ge:
7057 Rop(OP_SGE);
7058
7059 case KEY_grep:
2c38e13d 7060 LOP(OP_GREPSTART, XREF);
79072805
LW
7061
7062 case KEY_goto:
a0d0e21e 7063 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7064 LOOPX(OP_GOTO);
7065
7066 case KEY_gmtime:
7067 UNI(OP_GMTIME);
7068
7069 case KEY_getc:
6f33ba73 7070 UNIDOR(OP_GETC);
79072805
LW
7071
7072 case KEY_getppid:
7073 FUN0(OP_GETPPID);
7074
7075 case KEY_getpgrp:
7076 UNI(OP_GETPGRP);
7077
7078 case KEY_getpriority:
a0d0e21e 7079 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7080
7081 case KEY_getprotobyname:
7082 UNI(OP_GPBYNAME);
7083
7084 case KEY_getprotobynumber:
a0d0e21e 7085 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7086
7087 case KEY_getprotoent:
7088 FUN0(OP_GPROTOENT);
7089
7090 case KEY_getpwent:
7091 FUN0(OP_GPWENT);
7092
7093 case KEY_getpwnam:
ff68c719 7094 UNI(OP_GPWNAM);
79072805
LW
7095
7096 case KEY_getpwuid:
ff68c719 7097 UNI(OP_GPWUID);
79072805
LW
7098
7099 case KEY_getpeername:
7100 UNI(OP_GETPEERNAME);
7101
7102 case KEY_gethostbyname:
7103 UNI(OP_GHBYNAME);
7104
7105 case KEY_gethostbyaddr:
a0d0e21e 7106 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7107
7108 case KEY_gethostent:
7109 FUN0(OP_GHOSTENT);
7110
7111 case KEY_getnetbyname:
7112 UNI(OP_GNBYNAME);
7113
7114 case KEY_getnetbyaddr:
a0d0e21e 7115 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7116
7117 case KEY_getnetent:
7118 FUN0(OP_GNETENT);
7119
7120 case KEY_getservbyname:
a0d0e21e 7121 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7122
7123 case KEY_getservbyport:
a0d0e21e 7124 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7125
7126 case KEY_getservent:
7127 FUN0(OP_GSERVENT);
7128
7129 case KEY_getsockname:
7130 UNI(OP_GETSOCKNAME);
7131
7132 case KEY_getsockopt:
a0d0e21e 7133 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7134
7135 case KEY_getgrent:
7136 FUN0(OP_GGRENT);
7137
7138 case KEY_getgrnam:
ff68c719 7139 UNI(OP_GGRNAM);
79072805
LW
7140
7141 case KEY_getgrgid:
ff68c719 7142 UNI(OP_GGRGID);
79072805
LW
7143
7144 case KEY_getlogin:
7145 FUN0(OP_GETLOGIN);
7146
0d863452 7147 case KEY_given:
6154021b 7148 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7149 OPERATOR(GIVEN);
7150
93a17b20 7151 case KEY_glob:
a0d0e21e 7152 LOP(OP_GLOB,XTERM);
93a17b20 7153
79072805
LW
7154 case KEY_hex:
7155 UNI(OP_HEX);
7156
7157 case KEY_if:
6154021b 7158 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7159 OPERATOR(IF);
7160
7161 case KEY_index:
a0d0e21e 7162 LOP(OP_INDEX,XTERM);
79072805
LW
7163
7164 case KEY_int:
7165 UNI(OP_INT);
7166
7167 case KEY_ioctl:
a0d0e21e 7168 LOP(OP_IOCTL,XTERM);
79072805
LW
7169
7170 case KEY_join:
a0d0e21e 7171 LOP(OP_JOIN,XTERM);
79072805
LW
7172
7173 case KEY_keys:
7174 UNI(OP_KEYS);
7175
7176 case KEY_kill:
a0d0e21e 7177 LOP(OP_KILL,XTERM);
79072805
LW
7178
7179 case KEY_last:
a0d0e21e 7180 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7181 LOOPX(OP_LAST);
4e553d73 7182
79072805
LW
7183 case KEY_lc:
7184 UNI(OP_LC);
7185
7186 case KEY_lcfirst:
7187 UNI(OP_LCFIRST);
7188
7189 case KEY_local:
6154021b 7190 pl_yylval.ival = 0;
79072805
LW
7191 OPERATOR(LOCAL);
7192
7193 case KEY_length:
7194 UNI(OP_LENGTH);
7195
7196 case KEY_lt:
7197 Rop(OP_SLT);
7198
7199 case KEY_le:
7200 Rop(OP_SLE);
7201
7202 case KEY_localtime:
7203 UNI(OP_LOCALTIME);
7204
7205 case KEY_log:
7206 UNI(OP_LOG);
7207
7208 case KEY_link:
a0d0e21e 7209 LOP(OP_LINK,XTERM);
79072805
LW
7210
7211 case KEY_listen:
a0d0e21e 7212 LOP(OP_LISTEN,XTERM);
79072805 7213
c0329465
MB
7214 case KEY_lock:
7215 UNI(OP_LOCK);
7216
79072805
LW
7217 case KEY_lstat:
7218 UNI(OP_LSTAT);
7219
7220 case KEY_m:
8782bef2 7221 s = scan_pat(s,OP_MATCH);
79072805
LW
7222 TERM(sublex_start());
7223
a0d0e21e 7224 case KEY_map:
2c38e13d 7225 LOP(OP_MAPSTART, XREF);
4e4e412b 7226
79072805 7227 case KEY_mkdir:
a0d0e21e 7228 LOP(OP_MKDIR,XTERM);
79072805
LW
7229
7230 case KEY_msgctl:
a0d0e21e 7231 LOP(OP_MSGCTL,XTERM);
79072805
LW
7232
7233 case KEY_msgget:
a0d0e21e 7234 LOP(OP_MSGGET,XTERM);
79072805
LW
7235
7236 case KEY_msgrcv:
a0d0e21e 7237 LOP(OP_MSGRCV,XTERM);
79072805
LW
7238
7239 case KEY_msgsnd:
a0d0e21e 7240 LOP(OP_MSGSND,XTERM);
79072805 7241
77ca0c92 7242 case KEY_our:
93a17b20 7243 case KEY_my:
952306ac 7244 case KEY_state:
eac04b2e 7245 PL_in_my = (U16)tmp;
29595ff2 7246 s = SKIPSPACE1(s);
7e2040f0 7247 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7248#ifdef PERL_MAD
7249 char* start = s;
7250#endif
3280af22 7251 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7252 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7253 goto really_sub;
def3634b 7254 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7255 if (!PL_in_my_stash) {
c750a3ec 7256 char tmpbuf[1024];
3280af22 7257 PL_bufptr = s;
d9fad198 7258 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7259 yyerror(tmpbuf);
7260 }
5db06880
NC
7261#ifdef PERL_MAD
7262 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7263 sv_catsv(PL_thistoken, PL_nextwhite);
7264 PL_nextwhite = 0;
7265 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7266 }
7267#endif
c750a3ec 7268 }
6154021b 7269 pl_yylval.ival = 1;
55497cff 7270 OPERATOR(MY);
93a17b20 7271
79072805 7272 case KEY_next:
a0d0e21e 7273 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7274 LOOPX(OP_NEXT);
7275
7276 case KEY_ne:
7277 Eop(OP_SNE);
7278
a0d0e21e 7279 case KEY_no:
468aa647 7280 s = tokenize_use(0, s);
a0d0e21e
LW
7281 OPERATOR(USE);
7282
7283 case KEY_not:
29595ff2 7284 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7285 FUN1(OP_NOT);
7286 else
7287 OPERATOR(NOTOP);
a0d0e21e 7288
79072805 7289 case KEY_open:
29595ff2 7290 s = SKIPSPACE1(s);
7e2040f0 7291 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7292 const char *t;
c35e046a
AL
7293 for (d = s; isALNUM_lazy_if(d,UTF);)
7294 d++;
7295 for (t=d; isSPACE(*t);)
7296 t++;
e2ab214b 7297 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7298 /* [perl #16184] */
7299 && !(t[0] == '=' && t[1] == '>')
7300 ) {
5f66b61c 7301 int parms_len = (int)(d-s);
9014280d 7302 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7303 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7304 parms_len, s, parms_len, s);
66fbe8fb 7305 }
93a17b20 7306 }
a0d0e21e 7307 LOP(OP_OPEN,XTERM);
79072805 7308
463ee0b2 7309 case KEY_or:
6154021b 7310 pl_yylval.ival = OP_OR;
463ee0b2
LW
7311 OPERATOR(OROP);
7312
79072805
LW
7313 case KEY_ord:
7314 UNI(OP_ORD);
7315
7316 case KEY_oct:
7317 UNI(OP_OCT);
7318
7319 case KEY_opendir:
a0d0e21e 7320 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7321
7322 case KEY_print:
3280af22 7323 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7324 LOP(OP_PRINT,XREF);
79072805
LW
7325
7326 case KEY_printf:
3280af22 7327 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7328 LOP(OP_PRTF,XREF);
79072805 7329
c07a80fd 7330 case KEY_prototype:
7331 UNI(OP_PROTOTYPE);
7332
79072805 7333 case KEY_push:
a0d0e21e 7334 LOP(OP_PUSH,XTERM);
79072805
LW
7335
7336 case KEY_pop:
6f33ba73 7337 UNIDOR(OP_POP);
79072805 7338
a0d0e21e 7339 case KEY_pos:
6f33ba73 7340 UNIDOR(OP_POS);
4e553d73 7341
79072805 7342 case KEY_pack:
a0d0e21e 7343 LOP(OP_PACK,XTERM);
79072805
LW
7344
7345 case KEY_package:
a0d0e21e 7346 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7347 s = SKIPSPACE1(s);
91152fc1 7348 s = force_strict_version(s);
4e4da3ac 7349 PL_lex_expect = XBLOCK;
79072805
LW
7350 OPERATOR(PACKAGE);
7351
7352 case KEY_pipe:
a0d0e21e 7353 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7354
7355 case KEY_q:
5db06880 7356 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7357 if (!s)
d4c19fe8 7358 missingterm(NULL);
6154021b 7359 pl_yylval.ival = OP_CONST;
79072805
LW
7360 TERM(sublex_start());
7361
a0d0e21e
LW
7362 case KEY_quotemeta:
7363 UNI(OP_QUOTEMETA);
7364
ea25a9b2
Z
7365 case KEY_qw: {
7366 OP *words = NULL;
5db06880 7367 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7368 if (!s)
d4c19fe8 7369 missingterm(NULL);
3480a8d2 7370 PL_expect = XOPERATOR;
8127e0e3 7371 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7372 int warned = 0;
3280af22 7373 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7374 while (len) {
d4c19fe8
AL
7375 for (; isSPACE(*d) && len; --len, ++d)
7376 /**/;
8127e0e3 7377 if (len) {
d4c19fe8 7378 SV *sv;
f54cb97a 7379 const char *b = d;
e476b1b5 7380 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7381 for (; !isSPACE(*d) && len; --len, ++d) {
7382 if (*d == ',') {
9014280d 7383 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7384 "Possible attempt to separate words with commas");
7385 ++warned;
7386 }
7387 else if (*d == '#') {
9014280d 7388 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7389 "Possible attempt to put comments in qw() list");
7390 ++warned;
7391 }
7392 }
7393 }
7394 else {
d4c19fe8
AL
7395 for (; !isSPACE(*d) && len; --len, ++d)
7396 /**/;
8127e0e3 7397 }
740cce10 7398 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7399 words = op_append_elem(OP_LIST, words,
7948272d 7400 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7401 }
7402 }
7403 }
ea25a9b2
Z
7404 if (!words)
7405 words = newNULLLIST();
37fd879b 7406 if (PL_lex_stuff) {
8127e0e3 7407 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7408 PL_lex_stuff = NULL;
37fd879b 7409 }
ea25a9b2
Z
7410 PL_expect = XOPERATOR;
7411 pl_yylval.opval = sawparens(words);
7412 TOKEN(QWLIST);
7413 }
8990e307 7414
79072805 7415 case KEY_qq:
5db06880 7416 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7417 if (!s)
d4c19fe8 7418 missingterm(NULL);
6154021b 7419 pl_yylval.ival = OP_STRINGIFY;
3280af22 7420 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7421 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7422 TERM(sublex_start());
7423
8782bef2
GB
7424 case KEY_qr:
7425 s = scan_pat(s,OP_QR);
7426 TERM(sublex_start());
7427
79072805 7428 case KEY_qx:
5db06880 7429 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7430 if (!s)
d4c19fe8 7431 missingterm(NULL);
9b201d7d 7432 readpipe_override();
79072805
LW
7433 TERM(sublex_start());
7434
7435 case KEY_return:
7436 OLDLOP(OP_RETURN);
7437
7438 case KEY_require:
29595ff2 7439 s = SKIPSPACE1(s);
e759cc13
RGS
7440 if (isDIGIT(*s)) {
7441 s = force_version(s, FALSE);
a7cb1f99 7442 }
e759cc13
RGS
7443 else if (*s != 'v' || !isDIGIT(s[1])
7444 || (s = force_version(s, TRUE), *s == 'v'))
7445 {
a7cb1f99
GS
7446 *PL_tokenbuf = '\0';
7447 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7448 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7449 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7450 else if (*s == '<')
7451 yyerror("<> should be quotes");
7452 }
a72a1c8b
RGS
7453 if (orig_keyword == KEY_require) {
7454 orig_keyword = 0;
6154021b 7455 pl_yylval.ival = 1;
a72a1c8b
RGS
7456 }
7457 else
6154021b 7458 pl_yylval.ival = 0;
a72a1c8b
RGS
7459 PL_expect = XTERM;
7460 PL_bufptr = s;
7461 PL_last_uni = PL_oldbufptr;
7462 PL_last_lop_op = OP_REQUIRE;
7463 s = skipspace(s);
7464 return REPORT( (int)REQUIRE );
79072805
LW
7465
7466 case KEY_reset:
7467 UNI(OP_RESET);
7468
7469 case KEY_redo:
a0d0e21e 7470 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7471 LOOPX(OP_REDO);
7472
7473 case KEY_rename:
a0d0e21e 7474 LOP(OP_RENAME,XTERM);
79072805
LW
7475
7476 case KEY_rand:
7477 UNI(OP_RAND);
7478
7479 case KEY_rmdir:
7480 UNI(OP_RMDIR);
7481
7482 case KEY_rindex:
a0d0e21e 7483 LOP(OP_RINDEX,XTERM);
79072805
LW
7484
7485 case KEY_read:
a0d0e21e 7486 LOP(OP_READ,XTERM);
79072805
LW
7487
7488 case KEY_readdir:
7489 UNI(OP_READDIR);
7490
93a17b20 7491 case KEY_readline:
6f33ba73 7492 UNIDOR(OP_READLINE);
93a17b20
LW
7493
7494 case KEY_readpipe:
0858480c 7495 UNIDOR(OP_BACKTICK);
93a17b20 7496
79072805
LW
7497 case KEY_rewinddir:
7498 UNI(OP_REWINDDIR);
7499
7500 case KEY_recv:
a0d0e21e 7501 LOP(OP_RECV,XTERM);
79072805
LW
7502
7503 case KEY_reverse:
a0d0e21e 7504 LOP(OP_REVERSE,XTERM);
79072805
LW
7505
7506 case KEY_readlink:
6f33ba73 7507 UNIDOR(OP_READLINK);
79072805
LW
7508
7509 case KEY_ref:
7510 UNI(OP_REF);
7511
7512 case KEY_s:
7513 s = scan_subst(s);
6154021b 7514 if (pl_yylval.opval)
79072805
LW
7515 TERM(sublex_start());
7516 else
7517 TOKEN(1); /* force error */
7518
0d863452
RH
7519 case KEY_say:
7520 checkcomma(s,PL_tokenbuf,"filehandle");
7521 LOP(OP_SAY,XREF);
7522
a0d0e21e
LW
7523 case KEY_chomp:
7524 UNI(OP_CHOMP);
4e553d73 7525
79072805
LW
7526 case KEY_scalar:
7527 UNI(OP_SCALAR);
7528
7529 case KEY_select:
a0d0e21e 7530 LOP(OP_SELECT,XTERM);
79072805
LW
7531
7532 case KEY_seek:
a0d0e21e 7533 LOP(OP_SEEK,XTERM);
79072805
LW
7534
7535 case KEY_semctl:
a0d0e21e 7536 LOP(OP_SEMCTL,XTERM);
79072805
LW
7537
7538 case KEY_semget:
a0d0e21e 7539 LOP(OP_SEMGET,XTERM);
79072805
LW
7540
7541 case KEY_semop:
a0d0e21e 7542 LOP(OP_SEMOP,XTERM);
79072805
LW
7543
7544 case KEY_send:
a0d0e21e 7545 LOP(OP_SEND,XTERM);
79072805
LW
7546
7547 case KEY_setpgrp:
a0d0e21e 7548 LOP(OP_SETPGRP,XTERM);
79072805
LW
7549
7550 case KEY_setpriority:
a0d0e21e 7551 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7552
7553 case KEY_sethostent:
ff68c719 7554 UNI(OP_SHOSTENT);
79072805
LW
7555
7556 case KEY_setnetent:
ff68c719 7557 UNI(OP_SNETENT);
79072805
LW
7558
7559 case KEY_setservent:
ff68c719 7560 UNI(OP_SSERVENT);
79072805
LW
7561
7562 case KEY_setprotoent:
ff68c719 7563 UNI(OP_SPROTOENT);
79072805
LW
7564
7565 case KEY_setpwent:
7566 FUN0(OP_SPWENT);
7567
7568 case KEY_setgrent:
7569 FUN0(OP_SGRENT);
7570
7571 case KEY_seekdir:
a0d0e21e 7572 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7573
7574 case KEY_setsockopt:
a0d0e21e 7575 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7576
7577 case KEY_shift:
6f33ba73 7578 UNIDOR(OP_SHIFT);
79072805
LW
7579
7580 case KEY_shmctl:
a0d0e21e 7581 LOP(OP_SHMCTL,XTERM);
79072805
LW
7582
7583 case KEY_shmget:
a0d0e21e 7584 LOP(OP_SHMGET,XTERM);
79072805
LW
7585
7586 case KEY_shmread:
a0d0e21e 7587 LOP(OP_SHMREAD,XTERM);
79072805
LW
7588
7589 case KEY_shmwrite:
a0d0e21e 7590 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7591
7592 case KEY_shutdown:
a0d0e21e 7593 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7594
7595 case KEY_sin:
7596 UNI(OP_SIN);
7597
7598 case KEY_sleep:
7599 UNI(OP_SLEEP);
7600
7601 case KEY_socket:
a0d0e21e 7602 LOP(OP_SOCKET,XTERM);
79072805
LW
7603
7604 case KEY_socketpair:
a0d0e21e 7605 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7606
7607 case KEY_sort:
3280af22 7608 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7609 s = SKIPSPACE1(s);
79072805 7610 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7611 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7612 PL_expect = XTERM;
15f0808c 7613 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7614 LOP(OP_SORT,XREF);
79072805
LW
7615
7616 case KEY_split:
a0d0e21e 7617 LOP(OP_SPLIT,XTERM);
79072805
LW
7618
7619 case KEY_sprintf:
a0d0e21e 7620 LOP(OP_SPRINTF,XTERM);
79072805
LW
7621
7622 case KEY_splice:
a0d0e21e 7623 LOP(OP_SPLICE,XTERM);
79072805
LW
7624
7625 case KEY_sqrt:
7626 UNI(OP_SQRT);
7627
7628 case KEY_srand:
7629 UNI(OP_SRAND);
7630
7631 case KEY_stat:
7632 UNI(OP_STAT);
7633
7634 case KEY_study:
79072805
LW
7635 UNI(OP_STUDY);
7636
7637 case KEY_substr:
a0d0e21e 7638 LOP(OP_SUBSTR,XTERM);
79072805
LW
7639
7640 case KEY_format:
7641 case KEY_sub:
93a17b20 7642 really_sub:
09bef843 7643 {
3280af22 7644 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7645 SSize_t tboffset = 0;
09bef843 7646 expectation attrful;
28cc6278 7647 bool have_name, have_proto;
f54cb97a 7648 const int key = tmp;
09bef843 7649
5db06880
NC
7650#ifdef PERL_MAD
7651 SV *tmpwhite = 0;
7652
cd81e915 7653 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7654 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7655 PL_thistoken = 0;
5db06880
NC
7656
7657 d = s;
7658 s = SKIPSPACE2(s,tmpwhite);
7659#else
09bef843 7660 s = skipspace(s);
5db06880 7661#endif
09bef843 7662
7e2040f0 7663 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7664 (*s == ':' && s[1] == ':'))
7665 {
5db06880 7666#ifdef PERL_MAD
4f61fd4b 7667 SV *nametoke = NULL;
5db06880
NC
7668#endif
7669
09bef843
SB
7670 PL_expect = XBLOCK;
7671 attrful = XATTRBLOCK;
b1b65b59
JH
7672 /* remember buffer pos'n for later force_word */
7673 tboffset = s - PL_oldbufptr;
09bef843 7674 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7675#ifdef PERL_MAD
7676 if (PL_madskills)
7677 nametoke = newSVpvn(s, d - s);
7678#endif
6502358f
NC
7679 if (memchr(tmpbuf, ':', len))
7680 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7681 else {
7682 sv_setsv(PL_subname,PL_curstname);
396482e1 7683 sv_catpvs(PL_subname,"::");
09bef843
SB
7684 sv_catpvn(PL_subname,tmpbuf,len);
7685 }
09bef843 7686 have_name = TRUE;
5db06880
NC
7687
7688#ifdef PERL_MAD
7689
7690 start_force(0);
7691 CURMAD('X', nametoke);
7692 CURMAD('_', tmpwhite);
7693 (void) force_word(PL_oldbufptr + tboffset, WORD,
7694 FALSE, TRUE, TRUE);
7695
7696 s = SKIPSPACE2(d,tmpwhite);
7697#else
7698 s = skipspace(d);
7699#endif
09bef843 7700 }
463ee0b2 7701 else {
09bef843
SB
7702 if (key == KEY_my)
7703 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7704 PL_expect = XTERMBLOCK;
7705 attrful = XATTRTERM;
76f68e9b 7706 sv_setpvs(PL_subname,"?");
09bef843 7707 have_name = FALSE;
463ee0b2 7708 }
4633a7c4 7709
09bef843
SB
7710 if (key == KEY_format) {
7711 if (*s == '=')
7712 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7713#ifdef PERL_MAD
cd81e915 7714 PL_thistoken = subtoken;
5db06880
NC
7715 s = d;
7716#else
09bef843 7717 if (have_name)
b1b65b59
JH
7718 (void) force_word(PL_oldbufptr + tboffset, WORD,
7719 FALSE, TRUE, TRUE);
5db06880 7720#endif
09bef843
SB
7721 OPERATOR(FORMAT);
7722 }
79072805 7723
09bef843
SB
7724 /* Look for a prototype */
7725 if (*s == '(') {
d9f2850e
RGS
7726 char *p;
7727 bool bad_proto = FALSE;
9e8d7757
RB
7728 bool in_brackets = FALSE;
7729 char greedy_proto = ' ';
7730 bool proto_after_greedy_proto = FALSE;
7731 bool must_be_last = FALSE;
7732 bool underscore = FALSE;
aef2a98a 7733 bool seen_underscore = FALSE;
197afce1 7734 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7735
5db06880 7736 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7737 if (!s)
09bef843 7738 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7739 /* strip spaces and check for bad characters */
09bef843
SB
7740 d = SvPVX(PL_lex_stuff);
7741 tmp = 0;
d9f2850e
RGS
7742 for (p = d; *p; ++p) {
7743 if (!isSPACE(*p)) {
7744 d[tmp++] = *p;
9e8d7757 7745
197afce1 7746 if (warnillegalproto) {
9e8d7757
RB
7747 if (must_be_last)
7748 proto_after_greedy_proto = TRUE;
c035a075 7749 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
7750 bad_proto = TRUE;
7751 }
7752 else {
7753 if ( underscore ) {
7754 if ( *p != ';' )
7755 bad_proto = TRUE;
7756 underscore = FALSE;
7757 }
7758 if ( *p == '[' ) {
7759 in_brackets = TRUE;
7760 }
7761 else if ( *p == ']' ) {
7762 in_brackets = FALSE;
7763 }
7764 else if ( (*p == '@' || *p == '%') &&
7765 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7766 !in_brackets ) {
7767 must_be_last = TRUE;
7768 greedy_proto = *p;
7769 }
7770 else if ( *p == '_' ) {
aef2a98a 7771 underscore = seen_underscore = TRUE;
9e8d7757
RB
7772 }
7773 }
7774 }
d37a9538 7775 }
09bef843 7776 }
d9f2850e 7777 d[tmp] = '\0';
9e8d7757 7778 if (proto_after_greedy_proto)
197afce1 7779 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7780 "Prototype after '%c' for %"SVf" : %s",
7781 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7782 if (bad_proto)
197afce1 7783 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7784 "Illegal character %sin prototype for %"SVf" : %s",
7785 seen_underscore ? "after '_' " : "",
be2597df 7786 SVfARG(PL_subname), d);
b162af07 7787 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7788 have_proto = TRUE;
68dc0745 7789
5db06880
NC
7790#ifdef PERL_MAD
7791 start_force(0);
cd81e915 7792 CURMAD('q', PL_thisopen);
5db06880 7793 CURMAD('_', tmpwhite);
cd81e915
NC
7794 CURMAD('=', PL_thisstuff);
7795 CURMAD('Q', PL_thisclose);
5db06880
NC
7796 NEXTVAL_NEXTTOKE.opval =
7797 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7798 PL_lex_stuff = NULL;
5db06880
NC
7799 force_next(THING);
7800
7801 s = SKIPSPACE2(s,tmpwhite);
7802#else
09bef843 7803 s = skipspace(s);
5db06880 7804#endif
4633a7c4 7805 }
09bef843
SB
7806 else
7807 have_proto = FALSE;
7808
7809 if (*s == ':' && s[1] != ':')
7810 PL_expect = attrful;
8e742a20
MHM
7811 else if (*s != '{' && key == KEY_sub) {
7812 if (!have_name)
7813 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7814 else if (*s != ';' && *s != '}')
be2597df 7815 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7816 }
09bef843 7817
5db06880
NC
7818#ifdef PERL_MAD
7819 start_force(0);
7820 if (tmpwhite) {
7821 if (PL_madskills)
6b29d1f5 7822 curmad('^', newSVpvs(""));
5db06880
NC
7823 CURMAD('_', tmpwhite);
7824 }
7825 force_next(0);
7826
cd81e915 7827 PL_thistoken = subtoken;
5db06880 7828#else
09bef843 7829 if (have_proto) {
9ded7720 7830 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7831 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7832 PL_lex_stuff = NULL;
09bef843 7833 force_next(THING);
68dc0745 7834 }
5db06880 7835#endif
09bef843 7836 if (!have_name) {
49a54bbe
NC
7837 if (PL_curstash)
7838 sv_setpvs(PL_subname, "__ANON__");
7839 else
7840 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7841 TOKEN(ANONSUB);
4633a7c4 7842 }
5db06880 7843#ifndef PERL_MAD
b1b65b59
JH
7844 (void) force_word(PL_oldbufptr + tboffset, WORD,
7845 FALSE, TRUE, TRUE);
5db06880 7846#endif
09bef843
SB
7847 if (key == KEY_my)
7848 TOKEN(MYSUB);
7849 TOKEN(SUB);
4633a7c4 7850 }
79072805
LW
7851
7852 case KEY_system:
a0d0e21e 7853 LOP(OP_SYSTEM,XREF);
79072805
LW
7854
7855 case KEY_symlink:
a0d0e21e 7856 LOP(OP_SYMLINK,XTERM);
79072805
LW
7857
7858 case KEY_syscall:
a0d0e21e 7859 LOP(OP_SYSCALL,XTERM);
79072805 7860
c07a80fd 7861 case KEY_sysopen:
7862 LOP(OP_SYSOPEN,XTERM);
7863
137443ea 7864 case KEY_sysseek:
7865 LOP(OP_SYSSEEK,XTERM);
7866
79072805 7867 case KEY_sysread:
a0d0e21e 7868 LOP(OP_SYSREAD,XTERM);
79072805
LW
7869
7870 case KEY_syswrite:
a0d0e21e 7871 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7872
7873 case KEY_tr:
7874 s = scan_trans(s);
7875 TERM(sublex_start());
7876
7877 case KEY_tell:
7878 UNI(OP_TELL);
7879
7880 case KEY_telldir:
7881 UNI(OP_TELLDIR);
7882
463ee0b2 7883 case KEY_tie:
a0d0e21e 7884 LOP(OP_TIE,XTERM);
463ee0b2 7885
c07a80fd 7886 case KEY_tied:
7887 UNI(OP_TIED);
7888
79072805
LW
7889 case KEY_time:
7890 FUN0(OP_TIME);
7891
7892 case KEY_times:
7893 FUN0(OP_TMS);
7894
7895 case KEY_truncate:
a0d0e21e 7896 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7897
7898 case KEY_uc:
7899 UNI(OP_UC);
7900
7901 case KEY_ucfirst:
7902 UNI(OP_UCFIRST);
7903
463ee0b2
LW
7904 case KEY_untie:
7905 UNI(OP_UNTIE);
7906
79072805 7907 case KEY_until:
6154021b 7908 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7909 OPERATOR(UNTIL);
7910
7911 case KEY_unless:
6154021b 7912 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7913 OPERATOR(UNLESS);
7914
7915 case KEY_unlink:
a0d0e21e 7916 LOP(OP_UNLINK,XTERM);
79072805
LW
7917
7918 case KEY_undef:
6f33ba73 7919 UNIDOR(OP_UNDEF);
79072805
LW
7920
7921 case KEY_unpack:
a0d0e21e 7922 LOP(OP_UNPACK,XTERM);
79072805
LW
7923
7924 case KEY_utime:
a0d0e21e 7925 LOP(OP_UTIME,XTERM);
79072805
LW
7926
7927 case KEY_umask:
6f33ba73 7928 UNIDOR(OP_UMASK);
79072805
LW
7929
7930 case KEY_unshift:
a0d0e21e
LW
7931 LOP(OP_UNSHIFT,XTERM);
7932
7933 case KEY_use:
468aa647 7934 s = tokenize_use(1, s);
a0d0e21e 7935 OPERATOR(USE);
79072805
LW
7936
7937 case KEY_values:
7938 UNI(OP_VALUES);
7939
7940 case KEY_vec:
a0d0e21e 7941 LOP(OP_VEC,XTERM);
79072805 7942
0d863452 7943 case KEY_when:
6154021b 7944 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7945 OPERATOR(WHEN);
7946
79072805 7947 case KEY_while:
6154021b 7948 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7949 OPERATOR(WHILE);
7950
7951 case KEY_warn:
3280af22 7952 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7953 LOP(OP_WARN,XTERM);
79072805
LW
7954
7955 case KEY_wait:
7956 FUN0(OP_WAIT);
7957
7958 case KEY_waitpid:
a0d0e21e 7959 LOP(OP_WAITPID,XTERM);
79072805
LW
7960
7961 case KEY_wantarray:
7962 FUN0(OP_WANTARRAY);
7963
7964 case KEY_write:
9d116dd7
JH
7965#ifdef EBCDIC
7966 {
df3728a2
JH
7967 char ctl_l[2];
7968 ctl_l[0] = toCTRL('L');
7969 ctl_l[1] = '\0';
fafc274c 7970 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7971 }
7972#else
fafc274c
NC
7973 /* Make sure $^L is defined */
7974 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7975#endif
79072805
LW
7976 UNI(OP_ENTERWRITE);
7977
7978 case KEY_x:
3280af22 7979 if (PL_expect == XOPERATOR)
79072805
LW
7980 Mop(OP_REPEAT);
7981 check_uni();
7982 goto just_a_word;
7983
a0d0e21e 7984 case KEY_xor:
6154021b 7985 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7986 OPERATOR(OROP);
7987
79072805
LW
7988 case KEY_y:
7989 s = scan_trans(s);
7990 TERM(sublex_start());
7991 }
49dc05e3 7992 }}
79072805 7993}
bf4acbe4
GS
7994#ifdef __SC__
7995#pragma segment Main
7996#endif
79072805 7997
e930465f
JH
7998static int
7999S_pending_ident(pTHX)
8eceec63 8000{
97aff369 8001 dVAR;
8eceec63 8002 register char *d;
bbd11bfc 8003 PADOFFSET tmp = 0;
8eceec63
SC
8004 /* pit holds the identifier we read and pending_ident is reset */
8005 char pit = PL_pending_ident;
9bde8eb0
NC
8006 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8007 /* All routes through this function want to know if there is a colon. */
c099d646 8008 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8009 PL_pending_ident = 0;
8010
cd81e915 8011 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8012 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8013 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8014
8015 /* if we're in a my(), we can't allow dynamics here.
8016 $foo'bar has already been turned into $foo::bar, so
8017 just check for colons.
8018
8019 if it's a legal name, the OP is a PADANY.
8020 */
8021 if (PL_in_my) {
8022 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8023 if (has_colon)
8eceec63
SC
8024 yyerror(Perl_form(aTHX_ "No package name allowed for "
8025 "variable %s in \"our\"",
8026 PL_tokenbuf));
d6447115 8027 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8028 }
8029 else {
9bde8eb0 8030 if (has_colon)
952306ac
RGS
8031 yyerror(Perl_form(aTHX_ PL_no_myglob,
8032 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8033
6154021b 8034 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 8035 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8036 return PRIVATEREF;
8037 }
8038 }
8039
8040 /*
8041 build the ops for accesses to a my() variable.
8042
8043 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8044 then used in a comparison. This catches most, but not
8045 all cases. For instance, it catches
8046 sort { my($a); $a <=> $b }
8047 but not
8048 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8049 (although why you'd do that is anyone's guess).
8050 */
8051
9bde8eb0 8052 if (!has_colon) {
8716503d 8053 if (!PL_in_my)
f8f98e0a 8054 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8055 if (tmp != NOT_IN_PAD) {
8eceec63 8056 /* might be an "our" variable" */
00b1698f 8057 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8058 /* build ops for a bareword */
b64e5050
AL
8059 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8060 HEK * const stashname = HvNAME_HEK(stash);
8061 SV * const sym = newSVhek(stashname);
396482e1 8062 sv_catpvs(sym, "::");
9bde8eb0 8063 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8064 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8065 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8066 gv_fetchsv(sym,
8eceec63
SC
8067 (PL_in_eval
8068 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8069 : GV_ADDMULTI
8eceec63
SC
8070 ),
8071 ((PL_tokenbuf[0] == '$') ? SVt_PV
8072 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8073 : SVt_PVHV));
8074 return WORD;
8075 }
8076
8077 /* if it's a sort block and they're naming $a or $b */
8078 if (PL_last_lop_op == OP_SORT &&
8079 PL_tokenbuf[0] == '$' &&
8080 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8081 && !PL_tokenbuf[2])
8082 {
8083 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8084 d < PL_bufend && *d != '\n';
8085 d++)
8086 {
8087 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8088 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8089 PL_tokenbuf);
8090 }
8091 }
8092 }
8093
6154021b
RGS
8094 pl_yylval.opval = newOP(OP_PADANY, 0);
8095 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8096 return PRIVATEREF;
8097 }
8098 }
8099
8100 /*
8101 Whine if they've said @foo in a doublequoted string,
8102 and @foo isn't a variable we can find in the symbol
8103 table.
8104 */
d824713b
NC
8105 if (ckWARN(WARN_AMBIGUOUS) &&
8106 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8107 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8108 SVt_PVAV);
8eceec63 8109 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8110 /* DO NOT warn for @- and @+ */
8111 && !( PL_tokenbuf[2] == '\0' &&
8112 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8113 )
8eceec63
SC
8114 {
8115 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8116 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8117 "Possible unintended interpolation of %s in string",
8118 PL_tokenbuf);
8eceec63
SC
8119 }
8120 }
8121
8122 /* build ops for a bareword */
6154021b 8123 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8124 tokenbuf_len - 1));
6154021b 8125 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8126 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8127 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8128 ((PL_tokenbuf[0] == '$') ? SVt_PV
8129 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8130 : SVt_PVHV));
8eceec63
SC
8131 return WORD;
8132}
8133
4c3bbe0f
MHM
8134/*
8135 * The following code was generated by perl_keyword.pl.
8136 */
e2e1dd5a 8137
79072805 8138I32
5458a98a 8139Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8140{
952306ac 8141 dVAR;
7918f24d
NC
8142
8143 PERL_ARGS_ASSERT_KEYWORD;
8144
4c3bbe0f
MHM
8145 switch (len)
8146 {
8147 case 1: /* 5 tokens of length 1 */
8148 switch (name[0])
e2e1dd5a 8149 {
4c3bbe0f
MHM
8150 case 'm':
8151 { /* m */
8152 return KEY_m;
8153 }
8154
4c3bbe0f
MHM
8155 case 'q':
8156 { /* q */
8157 return KEY_q;
8158 }
8159
4c3bbe0f
MHM
8160 case 's':
8161 { /* s */
8162 return KEY_s;
8163 }
8164
4c3bbe0f
MHM
8165 case 'x':
8166 { /* x */
8167 return -KEY_x;
8168 }
8169
4c3bbe0f
MHM
8170 case 'y':
8171 { /* y */
8172 return KEY_y;
8173 }
8174
4c3bbe0f
MHM
8175 default:
8176 goto unknown;
e2e1dd5a 8177 }
4c3bbe0f
MHM
8178
8179 case 2: /* 18 tokens of length 2 */
8180 switch (name[0])
e2e1dd5a 8181 {
4c3bbe0f
MHM
8182 case 'd':
8183 if (name[1] == 'o')
8184 { /* do */
8185 return KEY_do;
8186 }
8187
8188 goto unknown;
8189
8190 case 'e':
8191 if (name[1] == 'q')
8192 { /* eq */
8193 return -KEY_eq;
8194 }
8195
8196 goto unknown;
8197
8198 case 'g':
8199 switch (name[1])
8200 {
8201 case 'e':
8202 { /* ge */
8203 return -KEY_ge;
8204 }
8205
4c3bbe0f
MHM
8206 case 't':
8207 { /* gt */
8208 return -KEY_gt;
8209 }
8210
4c3bbe0f
MHM
8211 default:
8212 goto unknown;
8213 }
8214
8215 case 'i':
8216 if (name[1] == 'f')
8217 { /* if */
8218 return KEY_if;
8219 }
8220
8221 goto unknown;
8222
8223 case 'l':
8224 switch (name[1])
8225 {
8226 case 'c':
8227 { /* lc */
8228 return -KEY_lc;
8229 }
8230
4c3bbe0f
MHM
8231 case 'e':
8232 { /* le */
8233 return -KEY_le;
8234 }
8235
4c3bbe0f
MHM
8236 case 't':
8237 { /* lt */
8238 return -KEY_lt;
8239 }
8240
4c3bbe0f
MHM
8241 default:
8242 goto unknown;
8243 }
8244
8245 case 'm':
8246 if (name[1] == 'y')
8247 { /* my */
8248 return KEY_my;
8249 }
8250
8251 goto unknown;
8252
8253 case 'n':
8254 switch (name[1])
8255 {
8256 case 'e':
8257 { /* ne */
8258 return -KEY_ne;
8259 }
8260
4c3bbe0f
MHM
8261 case 'o':
8262 { /* no */
8263 return KEY_no;
8264 }
8265
4c3bbe0f
MHM
8266 default:
8267 goto unknown;
8268 }
8269
8270 case 'o':
8271 if (name[1] == 'r')
8272 { /* or */
8273 return -KEY_or;
8274 }
8275
8276 goto unknown;
8277
8278 case 'q':
8279 switch (name[1])
8280 {
8281 case 'q':
8282 { /* qq */
8283 return KEY_qq;
8284 }
8285
4c3bbe0f
MHM
8286 case 'r':
8287 { /* qr */
8288 return KEY_qr;
8289 }
8290
4c3bbe0f
MHM
8291 case 'w':
8292 { /* qw */
8293 return KEY_qw;
8294 }
8295
4c3bbe0f
MHM
8296 case 'x':
8297 { /* qx */
8298 return KEY_qx;
8299 }
8300
4c3bbe0f
MHM
8301 default:
8302 goto unknown;
8303 }
8304
8305 case 't':
8306 if (name[1] == 'r')
8307 { /* tr */
8308 return KEY_tr;
8309 }
8310
8311 goto unknown;
8312
8313 case 'u':
8314 if (name[1] == 'c')
8315 { /* uc */
8316 return -KEY_uc;
8317 }
8318
8319 goto unknown;
8320
8321 default:
8322 goto unknown;
e2e1dd5a 8323 }
4c3bbe0f 8324
0d863452 8325 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8326 switch (name[0])
e2e1dd5a 8327 {
4c3bbe0f
MHM
8328 case 'E':
8329 if (name[1] == 'N' &&
8330 name[2] == 'D')
8331 { /* END */
8332 return KEY_END;
8333 }
8334
8335 goto unknown;
8336
8337 case 'a':
8338 switch (name[1])
8339 {
8340 case 'b':
8341 if (name[2] == 's')
8342 { /* abs */
8343 return -KEY_abs;
8344 }
8345
8346 goto unknown;
8347
8348 case 'n':
8349 if (name[2] == 'd')
8350 { /* and */
8351 return -KEY_and;
8352 }
8353
8354 goto unknown;
8355
8356 default:
8357 goto unknown;
8358 }
8359
8360 case 'c':
8361 switch (name[1])
8362 {
8363 case 'h':
8364 if (name[2] == 'r')
8365 { /* chr */
8366 return -KEY_chr;
8367 }
8368
8369 goto unknown;
8370
8371 case 'm':
8372 if (name[2] == 'p')
8373 { /* cmp */
8374 return -KEY_cmp;
8375 }
8376
8377 goto unknown;
8378
8379 case 'o':
8380 if (name[2] == 's')
8381 { /* cos */
8382 return -KEY_cos;
8383 }
8384
8385 goto unknown;
8386
8387 default:
8388 goto unknown;
8389 }
8390
8391 case 'd':
8392 if (name[1] == 'i' &&
8393 name[2] == 'e')
8394 { /* die */
8395 return -KEY_die;
8396 }
8397
8398 goto unknown;
8399
8400 case 'e':
8401 switch (name[1])
8402 {
8403 case 'o':
8404 if (name[2] == 'f')
8405 { /* eof */
8406 return -KEY_eof;
8407 }
8408
8409 goto unknown;
8410
4c3bbe0f
MHM
8411 case 'x':
8412 if (name[2] == 'p')
8413 { /* exp */
8414 return -KEY_exp;
8415 }
8416
8417 goto unknown;
8418
8419 default:
8420 goto unknown;
8421 }
8422
8423 case 'f':
8424 if (name[1] == 'o' &&
8425 name[2] == 'r')
8426 { /* for */
8427 return KEY_for;
8428 }
8429
8430 goto unknown;
8431
8432 case 'h':
8433 if (name[1] == 'e' &&
8434 name[2] == 'x')
8435 { /* hex */
8436 return -KEY_hex;
8437 }
8438
8439 goto unknown;
8440
8441 case 'i':
8442 if (name[1] == 'n' &&
8443 name[2] == 't')
8444 { /* int */
8445 return -KEY_int;
8446 }
8447
8448 goto unknown;
8449
8450 case 'l':
8451 if (name[1] == 'o' &&
8452 name[2] == 'g')
8453 { /* log */
8454 return -KEY_log;
8455 }
8456
8457 goto unknown;
8458
8459 case 'm':
8460 if (name[1] == 'a' &&
8461 name[2] == 'p')
8462 { /* map */
8463 return KEY_map;
8464 }
8465
8466 goto unknown;
8467
8468 case 'n':
8469 if (name[1] == 'o' &&
8470 name[2] == 't')
8471 { /* not */
8472 return -KEY_not;
8473 }
8474
8475 goto unknown;
8476
8477 case 'o':
8478 switch (name[1])
8479 {
8480 case 'c':
8481 if (name[2] == 't')
8482 { /* oct */
8483 return -KEY_oct;
8484 }
8485
8486 goto unknown;
8487
8488 case 'r':
8489 if (name[2] == 'd')
8490 { /* ord */
8491 return -KEY_ord;
8492 }
8493
8494 goto unknown;
8495
8496 case 'u':
8497 if (name[2] == 'r')
8498 { /* our */
8499 return KEY_our;
8500 }
8501
8502 goto unknown;
8503
8504 default:
8505 goto unknown;
8506 }
8507
8508 case 'p':
8509 if (name[1] == 'o')
8510 {
8511 switch (name[2])
8512 {
8513 case 'p':
8514 { /* pop */
8515 return -KEY_pop;
8516 }
8517
4c3bbe0f
MHM
8518 case 's':
8519 { /* pos */
8520 return KEY_pos;
8521 }
8522
4c3bbe0f
MHM
8523 default:
8524 goto unknown;
8525 }
8526 }
8527
8528 goto unknown;
8529
8530 case 'r':
8531 if (name[1] == 'e' &&
8532 name[2] == 'f')
8533 { /* ref */
8534 return -KEY_ref;
8535 }
8536
8537 goto unknown;
8538
8539 case 's':
8540 switch (name[1])
8541 {
0d863452
RH
8542 case 'a':
8543 if (name[2] == 'y')
8544 { /* say */
e3e804c9 8545 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8546 }
8547
8548 goto unknown;
8549
4c3bbe0f
MHM
8550 case 'i':
8551 if (name[2] == 'n')
8552 { /* sin */
8553 return -KEY_sin;
8554 }
8555
8556 goto unknown;
8557
8558 case 'u':
8559 if (name[2] == 'b')
8560 { /* sub */
8561 return KEY_sub;
8562 }
8563
8564 goto unknown;
8565
8566 default:
8567 goto unknown;
8568 }
8569
8570 case 't':
8571 if (name[1] == 'i' &&
8572 name[2] == 'e')
8573 { /* tie */
1db4d195 8574 return -KEY_tie;
4c3bbe0f
MHM
8575 }
8576
8577 goto unknown;
8578
8579 case 'u':
8580 if (name[1] == 's' &&
8581 name[2] == 'e')
8582 { /* use */
8583 return KEY_use;
8584 }
8585
8586 goto unknown;
8587
8588 case 'v':
8589 if (name[1] == 'e' &&
8590 name[2] == 'c')
8591 { /* vec */
8592 return -KEY_vec;
8593 }
8594
8595 goto unknown;
8596
8597 case 'x':
8598 if (name[1] == 'o' &&
8599 name[2] == 'r')
8600 { /* xor */
8601 return -KEY_xor;
8602 }
8603
8604 goto unknown;
8605
8606 default:
8607 goto unknown;
e2e1dd5a 8608 }
4c3bbe0f 8609
0d863452 8610 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8611 switch (name[0])
e2e1dd5a 8612 {
4c3bbe0f
MHM
8613 case 'C':
8614 if (name[1] == 'O' &&
8615 name[2] == 'R' &&
8616 name[3] == 'E')
8617 { /* CORE */
8618 return -KEY_CORE;
8619 }
8620
8621 goto unknown;
8622
8623 case 'I':
8624 if (name[1] == 'N' &&
8625 name[2] == 'I' &&
8626 name[3] == 'T')
8627 { /* INIT */
8628 return KEY_INIT;
8629 }
8630
8631 goto unknown;
8632
8633 case 'b':
8634 if (name[1] == 'i' &&
8635 name[2] == 'n' &&
8636 name[3] == 'd')
8637 { /* bind */
8638 return -KEY_bind;
8639 }
8640
8641 goto unknown;
8642
8643 case 'c':
8644 if (name[1] == 'h' &&
8645 name[2] == 'o' &&
8646 name[3] == 'p')
8647 { /* chop */
8648 return -KEY_chop;
8649 }
8650
8651 goto unknown;
8652
8653 case 'd':
8654 if (name[1] == 'u' &&
8655 name[2] == 'm' &&
8656 name[3] == 'p')
8657 { /* dump */
8658 return -KEY_dump;
8659 }
8660
8661 goto unknown;
8662
8663 case 'e':
8664 switch (name[1])
8665 {
8666 case 'a':
8667 if (name[2] == 'c' &&
8668 name[3] == 'h')
8669 { /* each */
8670 return -KEY_each;
8671 }
8672
8673 goto unknown;
8674
8675 case 'l':
8676 if (name[2] == 's' &&
8677 name[3] == 'e')
8678 { /* else */
8679 return KEY_else;
8680 }
8681
8682 goto unknown;
8683
8684 case 'v':
8685 if (name[2] == 'a' &&
8686 name[3] == 'l')
8687 { /* eval */
8688 return KEY_eval;
8689 }
8690
8691 goto unknown;
8692
8693 case 'x':
8694 switch (name[2])
8695 {
8696 case 'e':
8697 if (name[3] == 'c')
8698 { /* exec */
8699 return -KEY_exec;
8700 }
8701
8702 goto unknown;
8703
8704 case 'i':
8705 if (name[3] == 't')
8706 { /* exit */
8707 return -KEY_exit;
8708 }
8709
8710 goto unknown;
8711
8712 default:
8713 goto unknown;
8714 }
8715
8716 default:
8717 goto unknown;
8718 }
8719
8720 case 'f':
8721 if (name[1] == 'o' &&
8722 name[2] == 'r' &&
8723 name[3] == 'k')
8724 { /* fork */
8725 return -KEY_fork;
8726 }
8727
8728 goto unknown;
8729
8730 case 'g':
8731 switch (name[1])
8732 {
8733 case 'e':
8734 if (name[2] == 't' &&
8735 name[3] == 'c')
8736 { /* getc */
8737 return -KEY_getc;
8738 }
8739
8740 goto unknown;
8741
8742 case 'l':
8743 if (name[2] == 'o' &&
8744 name[3] == 'b')
8745 { /* glob */
8746 return KEY_glob;
8747 }
8748
8749 goto unknown;
8750
8751 case 'o':
8752 if (name[2] == 't' &&
8753 name[3] == 'o')
8754 { /* goto */
8755 return KEY_goto;
8756 }
8757
8758 goto unknown;
8759
8760 case 'r':
8761 if (name[2] == 'e' &&
8762 name[3] == 'p')
8763 { /* grep */
8764 return KEY_grep;
8765 }
8766
8767 goto unknown;
8768
8769 default:
8770 goto unknown;
8771 }
8772
8773 case 'j':
8774 if (name[1] == 'o' &&
8775 name[2] == 'i' &&
8776 name[3] == 'n')
8777 { /* join */
8778 return -KEY_join;
8779 }
8780
8781 goto unknown;
8782
8783 case 'k':
8784 switch (name[1])
8785 {
8786 case 'e':
8787 if (name[2] == 'y' &&
8788 name[3] == 's')
8789 { /* keys */
8790 return -KEY_keys;
8791 }
8792
8793 goto unknown;
8794
8795 case 'i':
8796 if (name[2] == 'l' &&
8797 name[3] == 'l')
8798 { /* kill */
8799 return -KEY_kill;
8800 }
8801
8802 goto unknown;
8803
8804 default:
8805 goto unknown;
8806 }
8807
8808 case 'l':
8809 switch (name[1])
8810 {
8811 case 'a':
8812 if (name[2] == 's' &&
8813 name[3] == 't')
8814 { /* last */
8815 return KEY_last;
8816 }
8817
8818 goto unknown;
8819
8820 case 'i':
8821 if (name[2] == 'n' &&
8822 name[3] == 'k')
8823 { /* link */
8824 return -KEY_link;
8825 }
8826
8827 goto unknown;
8828
8829 case 'o':
8830 if (name[2] == 'c' &&
8831 name[3] == 'k')
8832 { /* lock */
8833 return -KEY_lock;
8834 }
8835
8836 goto unknown;
8837
8838 default:
8839 goto unknown;
8840 }
8841
8842 case 'n':
8843 if (name[1] == 'e' &&
8844 name[2] == 'x' &&
8845 name[3] == 't')
8846 { /* next */
8847 return KEY_next;
8848 }
8849
8850 goto unknown;
8851
8852 case 'o':
8853 if (name[1] == 'p' &&
8854 name[2] == 'e' &&
8855 name[3] == 'n')
8856 { /* open */
8857 return -KEY_open;
8858 }
8859
8860 goto unknown;
8861
8862 case 'p':
8863 switch (name[1])
8864 {
8865 case 'a':
8866 if (name[2] == 'c' &&
8867 name[3] == 'k')
8868 { /* pack */
8869 return -KEY_pack;
8870 }
8871
8872 goto unknown;
8873
8874 case 'i':
8875 if (name[2] == 'p' &&
8876 name[3] == 'e')
8877 { /* pipe */
8878 return -KEY_pipe;
8879 }
8880
8881 goto unknown;
8882
8883 case 'u':
8884 if (name[2] == 's' &&
8885 name[3] == 'h')
8886 { /* push */
8887 return -KEY_push;
8888 }
8889
8890 goto unknown;
8891
8892 default:
8893 goto unknown;
8894 }
8895
8896 case 'r':
8897 switch (name[1])
8898 {
8899 case 'a':
8900 if (name[2] == 'n' &&
8901 name[3] == 'd')
8902 { /* rand */
8903 return -KEY_rand;
8904 }
8905
8906 goto unknown;
8907
8908 case 'e':
8909 switch (name[2])
8910 {
8911 case 'a':
8912 if (name[3] == 'd')
8913 { /* read */
8914 return -KEY_read;
8915 }
8916
8917 goto unknown;
8918
8919 case 'c':
8920 if (name[3] == 'v')
8921 { /* recv */
8922 return -KEY_recv;
8923 }
8924
8925 goto unknown;
8926
8927 case 'd':
8928 if (name[3] == 'o')
8929 { /* redo */
8930 return KEY_redo;
8931 }
8932
8933 goto unknown;
8934
8935 default:
8936 goto unknown;
8937 }
8938
8939 default:
8940 goto unknown;
8941 }
8942
8943 case 's':
8944 switch (name[1])
8945 {
8946 case 'e':
8947 switch (name[2])
8948 {
8949 case 'e':
8950 if (name[3] == 'k')
8951 { /* seek */
8952 return -KEY_seek;
8953 }
8954
8955 goto unknown;
8956
8957 case 'n':
8958 if (name[3] == 'd')
8959 { /* send */
8960 return -KEY_send;
8961 }
8962
8963 goto unknown;
8964
8965 default:
8966 goto unknown;
8967 }
8968
8969 case 'o':
8970 if (name[2] == 'r' &&
8971 name[3] == 't')
8972 { /* sort */
8973 return KEY_sort;
8974 }
8975
8976 goto unknown;
8977
8978 case 'q':
8979 if (name[2] == 'r' &&
8980 name[3] == 't')
8981 { /* sqrt */
8982 return -KEY_sqrt;
8983 }
8984
8985 goto unknown;
8986
8987 case 't':
8988 if (name[2] == 'a' &&
8989 name[3] == 't')
8990 { /* stat */
8991 return -KEY_stat;
8992 }
8993
8994 goto unknown;
8995
8996 default:
8997 goto unknown;
8998 }
8999
9000 case 't':
9001 switch (name[1])
9002 {
9003 case 'e':
9004 if (name[2] == 'l' &&
9005 name[3] == 'l')
9006 { /* tell */
9007 return -KEY_tell;
9008 }
9009
9010 goto unknown;
9011
9012 case 'i':
9013 switch (name[2])
9014 {
9015 case 'e':
9016 if (name[3] == 'd')
9017 { /* tied */
1db4d195 9018 return -KEY_tied;
4c3bbe0f
MHM
9019 }
9020
9021 goto unknown;
9022
9023 case 'm':
9024 if (name[3] == 'e')
9025 { /* time */
9026 return -KEY_time;
9027 }
9028
9029 goto unknown;
9030
9031 default:
9032 goto unknown;
9033 }
9034
9035 default:
9036 goto unknown;
9037 }
9038
9039 case 'w':
0d863452 9040 switch (name[1])
4c3bbe0f 9041 {
0d863452 9042 case 'a':
952306ac
RGS
9043 switch (name[2])
9044 {
9045 case 'i':
9046 if (name[3] == 't')
9047 { /* wait */
9048 return -KEY_wait;
9049 }
4c3bbe0f 9050
952306ac 9051 goto unknown;
4c3bbe0f 9052
952306ac
RGS
9053 case 'r':
9054 if (name[3] == 'n')
9055 { /* warn */
9056 return -KEY_warn;
9057 }
4c3bbe0f 9058
952306ac 9059 goto unknown;
4c3bbe0f 9060
952306ac
RGS
9061 default:
9062 goto unknown;
9063 }
0d863452
RH
9064
9065 case 'h':
9066 if (name[2] == 'e' &&
9067 name[3] == 'n')
9068 { /* when */
5458a98a 9069 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9070 }
4c3bbe0f 9071
952306ac 9072 goto unknown;
4c3bbe0f 9073
952306ac
RGS
9074 default:
9075 goto unknown;
9076 }
4c3bbe0f 9077
0d863452
RH
9078 default:
9079 goto unknown;
9080 }
9081
952306ac 9082 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9083 switch (name[0])
e2e1dd5a 9084 {
4c3bbe0f
MHM
9085 case 'B':
9086 if (name[1] == 'E' &&
9087 name[2] == 'G' &&
9088 name[3] == 'I' &&
9089 name[4] == 'N')
9090 { /* BEGIN */
9091 return KEY_BEGIN;
9092 }
9093
9094 goto unknown;
9095
9096 case 'C':
9097 if (name[1] == 'H' &&
9098 name[2] == 'E' &&
9099 name[3] == 'C' &&
9100 name[4] == 'K')
9101 { /* CHECK */
9102 return KEY_CHECK;
9103 }
9104
9105 goto unknown;
9106
9107 case 'a':
9108 switch (name[1])
9109 {
9110 case 'l':
9111 if (name[2] == 'a' &&
9112 name[3] == 'r' &&
9113 name[4] == 'm')
9114 { /* alarm */
9115 return -KEY_alarm;
9116 }
9117
9118 goto unknown;
9119
9120 case 't':
9121 if (name[2] == 'a' &&
9122 name[3] == 'n' &&
9123 name[4] == '2')
9124 { /* atan2 */
9125 return -KEY_atan2;
9126 }
9127
9128 goto unknown;
9129
9130 default:
9131 goto unknown;
9132 }
9133
9134 case 'b':
0d863452
RH
9135 switch (name[1])
9136 {
9137 case 'l':
9138 if (name[2] == 'e' &&
952306ac
RGS
9139 name[3] == 's' &&
9140 name[4] == 's')
9141 { /* bless */
9142 return -KEY_bless;
9143 }
4c3bbe0f 9144
952306ac 9145 goto unknown;
4c3bbe0f 9146
0d863452
RH
9147 case 'r':
9148 if (name[2] == 'e' &&
9149 name[3] == 'a' &&
9150 name[4] == 'k')
9151 { /* break */
5458a98a 9152 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9153 }
9154
9155 goto unknown;
9156
9157 default:
9158 goto unknown;
9159 }
9160
4c3bbe0f
MHM
9161 case 'c':
9162 switch (name[1])
9163 {
9164 case 'h':
9165 switch (name[2])
9166 {
9167 case 'd':
9168 if (name[3] == 'i' &&
9169 name[4] == 'r')
9170 { /* chdir */
9171 return -KEY_chdir;
9172 }
9173
9174 goto unknown;
9175
9176 case 'm':
9177 if (name[3] == 'o' &&
9178 name[4] == 'd')
9179 { /* chmod */
9180 return -KEY_chmod;
9181 }
9182
9183 goto unknown;
9184
9185 case 'o':
9186 switch (name[3])
9187 {
9188 case 'm':
9189 if (name[4] == 'p')
9190 { /* chomp */
9191 return -KEY_chomp;
9192 }
9193
9194 goto unknown;
9195
9196 case 'w':
9197 if (name[4] == 'n')
9198 { /* chown */
9199 return -KEY_chown;
9200 }
9201
9202 goto unknown;
9203
9204 default:
9205 goto unknown;
9206 }
9207
9208 default:
9209 goto unknown;
9210 }
9211
9212 case 'l':
9213 if (name[2] == 'o' &&
9214 name[3] == 's' &&
9215 name[4] == 'e')
9216 { /* close */
9217 return -KEY_close;
9218 }
9219
9220 goto unknown;
9221
9222 case 'r':
9223 if (name[2] == 'y' &&
9224 name[3] == 'p' &&
9225 name[4] == 't')
9226 { /* crypt */
9227 return -KEY_crypt;
9228 }
9229
9230 goto unknown;
9231
9232 default:
9233 goto unknown;
9234 }
9235
9236 case 'e':
9237 if (name[1] == 'l' &&
9238 name[2] == 's' &&
9239 name[3] == 'i' &&
9240 name[4] == 'f')
9241 { /* elsif */
9242 return KEY_elsif;
9243 }
9244
9245 goto unknown;
9246
9247 case 'f':
9248 switch (name[1])
9249 {
9250 case 'c':
9251 if (name[2] == 'n' &&
9252 name[3] == 't' &&
9253 name[4] == 'l')
9254 { /* fcntl */
9255 return -KEY_fcntl;
9256 }
9257
9258 goto unknown;
9259
9260 case 'l':
9261 if (name[2] == 'o' &&
9262 name[3] == 'c' &&
9263 name[4] == 'k')
9264 { /* flock */
9265 return -KEY_flock;
9266 }
9267
9268 goto unknown;
9269
9270 default:
9271 goto unknown;
9272 }
9273
0d863452
RH
9274 case 'g':
9275 if (name[1] == 'i' &&
9276 name[2] == 'v' &&
9277 name[3] == 'e' &&
9278 name[4] == 'n')
9279 { /* given */
5458a98a 9280 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9281 }
9282
9283 goto unknown;
9284
4c3bbe0f
MHM
9285 case 'i':
9286 switch (name[1])
9287 {
9288 case 'n':
9289 if (name[2] == 'd' &&
9290 name[3] == 'e' &&
9291 name[4] == 'x')
9292 { /* index */
9293 return -KEY_index;
9294 }
9295
9296 goto unknown;
9297
9298 case 'o':
9299 if (name[2] == 'c' &&
9300 name[3] == 't' &&
9301 name[4] == 'l')
9302 { /* ioctl */
9303 return -KEY_ioctl;
9304 }
9305
9306 goto unknown;
9307
9308 default:
9309 goto unknown;
9310 }
9311
9312 case 'l':
9313 switch (name[1])
9314 {
9315 case 'o':
9316 if (name[2] == 'c' &&
9317 name[3] == 'a' &&
9318 name[4] == 'l')
9319 { /* local */
9320 return KEY_local;
9321 }
9322
9323 goto unknown;
9324
9325 case 's':
9326 if (name[2] == 't' &&
9327 name[3] == 'a' &&
9328 name[4] == 't')
9329 { /* lstat */
9330 return -KEY_lstat;
9331 }
9332
9333 goto unknown;
9334
9335 default:
9336 goto unknown;
9337 }
9338
9339 case 'm':
9340 if (name[1] == 'k' &&
9341 name[2] == 'd' &&
9342 name[3] == 'i' &&
9343 name[4] == 'r')
9344 { /* mkdir */
9345 return -KEY_mkdir;
9346 }
9347
9348 goto unknown;
9349
9350 case 'p':
9351 if (name[1] == 'r' &&
9352 name[2] == 'i' &&
9353 name[3] == 'n' &&
9354 name[4] == 't')
9355 { /* print */
9356 return KEY_print;
9357 }
9358
9359 goto unknown;
9360
9361 case 'r':
9362 switch (name[1])
9363 {
9364 case 'e':
9365 if (name[2] == 's' &&
9366 name[3] == 'e' &&
9367 name[4] == 't')
9368 { /* reset */
9369 return -KEY_reset;
9370 }
9371
9372 goto unknown;
9373
9374 case 'm':
9375 if (name[2] == 'd' &&
9376 name[3] == 'i' &&
9377 name[4] == 'r')
9378 { /* rmdir */
9379 return -KEY_rmdir;
9380 }
9381
9382 goto unknown;
9383
9384 default:
9385 goto unknown;
9386 }
9387
9388 case 's':
9389 switch (name[1])
9390 {
9391 case 'e':
9392 if (name[2] == 'm' &&
9393 name[3] == 'o' &&
9394 name[4] == 'p')
9395 { /* semop */
9396 return -KEY_semop;
9397 }
9398
9399 goto unknown;
9400
9401 case 'h':
9402 if (name[2] == 'i' &&
9403 name[3] == 'f' &&
9404 name[4] == 't')
9405 { /* shift */
9406 return -KEY_shift;
9407 }
9408
9409 goto unknown;
9410
9411 case 'l':
9412 if (name[2] == 'e' &&
9413 name[3] == 'e' &&
9414 name[4] == 'p')
9415 { /* sleep */
9416 return -KEY_sleep;
9417 }
9418
9419 goto unknown;
9420
9421 case 'p':
9422 if (name[2] == 'l' &&
9423 name[3] == 'i' &&
9424 name[4] == 't')
9425 { /* split */
9426 return KEY_split;
9427 }
9428
9429 goto unknown;
9430
9431 case 'r':
9432 if (name[2] == 'a' &&
9433 name[3] == 'n' &&
9434 name[4] == 'd')
9435 { /* srand */
9436 return -KEY_srand;
9437 }
9438
9439 goto unknown;
9440
9441 case 't':
952306ac
RGS
9442 switch (name[2])
9443 {
9444 case 'a':
9445 if (name[3] == 't' &&
9446 name[4] == 'e')
9447 { /* state */
5458a98a 9448 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9449 }
4c3bbe0f 9450
952306ac
RGS
9451 goto unknown;
9452
9453 case 'u':
9454 if (name[3] == 'd' &&
9455 name[4] == 'y')
9456 { /* study */
9457 return KEY_study;
9458 }
9459
9460 goto unknown;
9461
9462 default:
9463 goto unknown;
9464 }
4c3bbe0f
MHM
9465
9466 default:
9467 goto unknown;
9468 }
9469
9470 case 't':
9471 if (name[1] == 'i' &&
9472 name[2] == 'm' &&
9473 name[3] == 'e' &&
9474 name[4] == 's')
9475 { /* times */
9476 return -KEY_times;
9477 }
9478
9479 goto unknown;
9480
9481 case 'u':
9482 switch (name[1])
9483 {
9484 case 'm':
9485 if (name[2] == 'a' &&
9486 name[3] == 's' &&
9487 name[4] == 'k')
9488 { /* umask */
9489 return -KEY_umask;
9490 }
9491
9492 goto unknown;
9493
9494 case 'n':
9495 switch (name[2])
9496 {
9497 case 'd':
9498 if (name[3] == 'e' &&
9499 name[4] == 'f')
9500 { /* undef */
9501 return KEY_undef;
9502 }
9503
9504 goto unknown;
9505
9506 case 't':
9507 if (name[3] == 'i')
9508 {
9509 switch (name[4])
9510 {
9511 case 'e':
9512 { /* untie */
1db4d195 9513 return -KEY_untie;
4c3bbe0f
MHM
9514 }
9515
4c3bbe0f
MHM
9516 case 'l':
9517 { /* until */
9518 return KEY_until;
9519 }
9520
4c3bbe0f
MHM
9521 default:
9522 goto unknown;
9523 }
9524 }
9525
9526 goto unknown;
9527
9528 default:
9529 goto unknown;
9530 }
9531
9532 case 't':
9533 if (name[2] == 'i' &&
9534 name[3] == 'm' &&
9535 name[4] == 'e')
9536 { /* utime */
9537 return -KEY_utime;
9538 }
9539
9540 goto unknown;
9541
9542 default:
9543 goto unknown;
9544 }
9545
9546 case 'w':
9547 switch (name[1])
9548 {
9549 case 'h':
9550 if (name[2] == 'i' &&
9551 name[3] == 'l' &&
9552 name[4] == 'e')
9553 { /* while */
9554 return KEY_while;
9555 }
9556
9557 goto unknown;
9558
9559 case 'r':
9560 if (name[2] == 'i' &&
9561 name[3] == 't' &&
9562 name[4] == 'e')
9563 { /* write */
9564 return -KEY_write;
9565 }
9566
9567 goto unknown;
9568
9569 default:
9570 goto unknown;
9571 }
9572
9573 default:
9574 goto unknown;
e2e1dd5a 9575 }
4c3bbe0f
MHM
9576
9577 case 6: /* 33 tokens of length 6 */
9578 switch (name[0])
9579 {
9580 case 'a':
9581 if (name[1] == 'c' &&
9582 name[2] == 'c' &&
9583 name[3] == 'e' &&
9584 name[4] == 'p' &&
9585 name[5] == 't')
9586 { /* accept */
9587 return -KEY_accept;
9588 }
9589
9590 goto unknown;
9591
9592 case 'c':
9593 switch (name[1])
9594 {
9595 case 'a':
9596 if (name[2] == 'l' &&
9597 name[3] == 'l' &&
9598 name[4] == 'e' &&
9599 name[5] == 'r')
9600 { /* caller */
9601 return -KEY_caller;
9602 }
9603
9604 goto unknown;
9605
9606 case 'h':
9607 if (name[2] == 'r' &&
9608 name[3] == 'o' &&
9609 name[4] == 'o' &&
9610 name[5] == 't')
9611 { /* chroot */
9612 return -KEY_chroot;
9613 }
9614
9615 goto unknown;
9616
9617 default:
9618 goto unknown;
9619 }
9620
9621 case 'd':
9622 if (name[1] == 'e' &&
9623 name[2] == 'l' &&
9624 name[3] == 'e' &&
9625 name[4] == 't' &&
9626 name[5] == 'e')
9627 { /* delete */
9628 return KEY_delete;
9629 }
9630
9631 goto unknown;
9632
9633 case 'e':
9634 switch (name[1])
9635 {
9636 case 'l':
9637 if (name[2] == 's' &&
9638 name[3] == 'e' &&
9639 name[4] == 'i' &&
9640 name[5] == 'f')
9641 { /* elseif */
9b387841 9642 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9643 }
9644
9645 goto unknown;
9646
9647 case 'x':
9648 if (name[2] == 'i' &&
9649 name[3] == 's' &&
9650 name[4] == 't' &&
9651 name[5] == 's')
9652 { /* exists */
9653 return KEY_exists;
9654 }
9655
9656 goto unknown;
9657
9658 default:
9659 goto unknown;
9660 }
9661
9662 case 'f':
9663 switch (name[1])
9664 {
9665 case 'i':
9666 if (name[2] == 'l' &&
9667 name[3] == 'e' &&
9668 name[4] == 'n' &&
9669 name[5] == 'o')
9670 { /* fileno */
9671 return -KEY_fileno;
9672 }
9673
9674 goto unknown;
9675
9676 case 'o':
9677 if (name[2] == 'r' &&
9678 name[3] == 'm' &&
9679 name[4] == 'a' &&
9680 name[5] == 't')
9681 { /* format */
9682 return KEY_format;
9683 }
9684
9685 goto unknown;
9686
9687 default:
9688 goto unknown;
9689 }
9690
9691 case 'g':
9692 if (name[1] == 'm' &&
9693 name[2] == 't' &&
9694 name[3] == 'i' &&
9695 name[4] == 'm' &&
9696 name[5] == 'e')
9697 { /* gmtime */
9698 return -KEY_gmtime;
9699 }
9700
9701 goto unknown;
9702
9703 case 'l':
9704 switch (name[1])
9705 {
9706 case 'e':
9707 if (name[2] == 'n' &&
9708 name[3] == 'g' &&
9709 name[4] == 't' &&
9710 name[5] == 'h')
9711 { /* length */
9712 return -KEY_length;
9713 }
9714
9715 goto unknown;
9716
9717 case 'i':
9718 if (name[2] == 's' &&
9719 name[3] == 't' &&
9720 name[4] == 'e' &&
9721 name[5] == 'n')
9722 { /* listen */
9723 return -KEY_listen;
9724 }
9725
9726 goto unknown;
9727
9728 default:
9729 goto unknown;
9730 }
9731
9732 case 'm':
9733 if (name[1] == 's' &&
9734 name[2] == 'g')
9735 {
9736 switch (name[3])
9737 {
9738 case 'c':
9739 if (name[4] == 't' &&
9740 name[5] == 'l')
9741 { /* msgctl */
9742 return -KEY_msgctl;
9743 }
9744
9745 goto unknown;
9746
9747 case 'g':
9748 if (name[4] == 'e' &&
9749 name[5] == 't')
9750 { /* msgget */
9751 return -KEY_msgget;
9752 }
9753
9754 goto unknown;
9755
9756 case 'r':
9757 if (name[4] == 'c' &&
9758 name[5] == 'v')
9759 { /* msgrcv */
9760 return -KEY_msgrcv;
9761 }
9762
9763 goto unknown;
9764
9765 case 's':
9766 if (name[4] == 'n' &&
9767 name[5] == 'd')
9768 { /* msgsnd */
9769 return -KEY_msgsnd;
9770 }
9771
9772 goto unknown;
9773
9774 default:
9775 goto unknown;
9776 }
9777 }
9778
9779 goto unknown;
9780
9781 case 'p':
9782 if (name[1] == 'r' &&
9783 name[2] == 'i' &&
9784 name[3] == 'n' &&
9785 name[4] == 't' &&
9786 name[5] == 'f')
9787 { /* printf */
9788 return KEY_printf;
9789 }
9790
9791 goto unknown;
9792
9793 case 'r':
9794 switch (name[1])
9795 {
9796 case 'e':
9797 switch (name[2])
9798 {
9799 case 'n':
9800 if (name[3] == 'a' &&
9801 name[4] == 'm' &&
9802 name[5] == 'e')
9803 { /* rename */
9804 return -KEY_rename;
9805 }
9806
9807 goto unknown;
9808
9809 case 't':
9810 if (name[3] == 'u' &&
9811 name[4] == 'r' &&
9812 name[5] == 'n')
9813 { /* return */
9814 return KEY_return;
9815 }
9816
9817 goto unknown;
9818
9819 default:
9820 goto unknown;
9821 }
9822
9823 case 'i':
9824 if (name[2] == 'n' &&
9825 name[3] == 'd' &&
9826 name[4] == 'e' &&
9827 name[5] == 'x')
9828 { /* rindex */
9829 return -KEY_rindex;
9830 }
9831
9832 goto unknown;
9833
9834 default:
9835 goto unknown;
9836 }
9837
9838 case 's':
9839 switch (name[1])
9840 {
9841 case 'c':
9842 if (name[2] == 'a' &&
9843 name[3] == 'l' &&
9844 name[4] == 'a' &&
9845 name[5] == 'r')
9846 { /* scalar */
9847 return KEY_scalar;
9848 }
9849
9850 goto unknown;
9851
9852 case 'e':
9853 switch (name[2])
9854 {
9855 case 'l':
9856 if (name[3] == 'e' &&
9857 name[4] == 'c' &&
9858 name[5] == 't')
9859 { /* select */
9860 return -KEY_select;
9861 }
9862
9863 goto unknown;
9864
9865 case 'm':
9866 switch (name[3])
9867 {
9868 case 'c':
9869 if (name[4] == 't' &&
9870 name[5] == 'l')
9871 { /* semctl */
9872 return -KEY_semctl;
9873 }
9874
9875 goto unknown;
9876
9877 case 'g':
9878 if (name[4] == 'e' &&
9879 name[5] == 't')
9880 { /* semget */
9881 return -KEY_semget;
9882 }
9883
9884 goto unknown;
9885
9886 default:
9887 goto unknown;
9888 }
9889
9890 default:
9891 goto unknown;
9892 }
9893
9894 case 'h':
9895 if (name[2] == 'm')
9896 {
9897 switch (name[3])
9898 {
9899 case 'c':
9900 if (name[4] == 't' &&
9901 name[5] == 'l')
9902 { /* shmctl */
9903 return -KEY_shmctl;
9904 }
9905
9906 goto unknown;
9907
9908 case 'g':
9909 if (name[4] == 'e' &&
9910 name[5] == 't')
9911 { /* shmget */
9912 return -KEY_shmget;
9913 }
9914
9915 goto unknown;
9916
9917 default:
9918 goto unknown;
9919 }
9920 }
9921
9922 goto unknown;
9923
9924 case 'o':
9925 if (name[2] == 'c' &&
9926 name[3] == 'k' &&
9927 name[4] == 'e' &&
9928 name[5] == 't')
9929 { /* socket */
9930 return -KEY_socket;
9931 }
9932
9933 goto unknown;
9934
9935 case 'p':
9936 if (name[2] == 'l' &&
9937 name[3] == 'i' &&
9938 name[4] == 'c' &&
9939 name[5] == 'e')
9940 { /* splice */
9941 return -KEY_splice;
9942 }
9943
9944 goto unknown;
9945
9946 case 'u':
9947 if (name[2] == 'b' &&
9948 name[3] == 's' &&
9949 name[4] == 't' &&
9950 name[5] == 'r')
9951 { /* substr */
9952 return -KEY_substr;
9953 }
9954
9955 goto unknown;
9956
9957 case 'y':
9958 if (name[2] == 's' &&
9959 name[3] == 't' &&
9960 name[4] == 'e' &&
9961 name[5] == 'm')
9962 { /* system */
9963 return -KEY_system;
9964 }
9965
9966 goto unknown;
9967
9968 default:
9969 goto unknown;
9970 }
9971
9972 case 'u':
9973 if (name[1] == 'n')
9974 {
9975 switch (name[2])
9976 {
9977 case 'l':
9978 switch (name[3])
9979 {
9980 case 'e':
9981 if (name[4] == 's' &&
9982 name[5] == 's')
9983 { /* unless */
9984 return KEY_unless;
9985 }
9986
9987 goto unknown;
9988
9989 case 'i':
9990 if (name[4] == 'n' &&
9991 name[5] == 'k')
9992 { /* unlink */
9993 return -KEY_unlink;
9994 }
9995
9996 goto unknown;
9997
9998 default:
9999 goto unknown;
10000 }
10001
10002 case 'p':
10003 if (name[3] == 'a' &&
10004 name[4] == 'c' &&
10005 name[5] == 'k')
10006 { /* unpack */
10007 return -KEY_unpack;
10008 }
10009
10010 goto unknown;
10011
10012 default:
10013 goto unknown;
10014 }
10015 }
10016
10017 goto unknown;
10018
10019 case 'v':
10020 if (name[1] == 'a' &&
10021 name[2] == 'l' &&
10022 name[3] == 'u' &&
10023 name[4] == 'e' &&
10024 name[5] == 's')
10025 { /* values */
10026 return -KEY_values;
10027 }
10028
10029 goto unknown;
10030
10031 default:
10032 goto unknown;
e2e1dd5a 10033 }
4c3bbe0f 10034
0d863452 10035 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
10036 switch (name[0])
10037 {
10038 case 'D':
10039 if (name[1] == 'E' &&
10040 name[2] == 'S' &&
10041 name[3] == 'T' &&
10042 name[4] == 'R' &&
10043 name[5] == 'O' &&
10044 name[6] == 'Y')
10045 { /* DESTROY */
10046 return KEY_DESTROY;
10047 }
10048
10049 goto unknown;
10050
10051 case '_':
10052 if (name[1] == '_' &&
10053 name[2] == 'E' &&
10054 name[3] == 'N' &&
10055 name[4] == 'D' &&
10056 name[5] == '_' &&
10057 name[6] == '_')
10058 { /* __END__ */
10059 return KEY___END__;
10060 }
10061
10062 goto unknown;
10063
10064 case 'b':
10065 if (name[1] == 'i' &&
10066 name[2] == 'n' &&
10067 name[3] == 'm' &&
10068 name[4] == 'o' &&
10069 name[5] == 'd' &&
10070 name[6] == 'e')
10071 { /* binmode */
10072 return -KEY_binmode;
10073 }
10074
10075 goto unknown;
10076
10077 case 'c':
10078 if (name[1] == 'o' &&
10079 name[2] == 'n' &&
10080 name[3] == 'n' &&
10081 name[4] == 'e' &&
10082 name[5] == 'c' &&
10083 name[6] == 't')
10084 { /* connect */
10085 return -KEY_connect;
10086 }
10087
10088 goto unknown;
10089
10090 case 'd':
10091 switch (name[1])
10092 {
10093 case 'b':
10094 if (name[2] == 'm' &&
10095 name[3] == 'o' &&
10096 name[4] == 'p' &&
10097 name[5] == 'e' &&
10098 name[6] == 'n')
10099 { /* dbmopen */
10100 return -KEY_dbmopen;
10101 }
10102
10103 goto unknown;
10104
10105 case 'e':
0d863452
RH
10106 if (name[2] == 'f')
10107 {
10108 switch (name[3])
10109 {
10110 case 'a':
10111 if (name[4] == 'u' &&
10112 name[5] == 'l' &&
10113 name[6] == 't')
10114 { /* default */
5458a98a 10115 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10116 }
10117
10118 goto unknown;
10119
10120 case 'i':
10121 if (name[4] == 'n' &&
952306ac
RGS
10122 name[5] == 'e' &&
10123 name[6] == 'd')
10124 { /* defined */
10125 return KEY_defined;
10126 }
4c3bbe0f 10127
952306ac 10128 goto unknown;
4c3bbe0f 10129
952306ac
RGS
10130 default:
10131 goto unknown;
10132 }
0d863452
RH
10133 }
10134
10135 goto unknown;
10136
10137 default:
10138 goto unknown;
10139 }
4c3bbe0f
MHM
10140
10141 case 'f':
10142 if (name[1] == 'o' &&
10143 name[2] == 'r' &&
10144 name[3] == 'e' &&
10145 name[4] == 'a' &&
10146 name[5] == 'c' &&
10147 name[6] == 'h')
10148 { /* foreach */
10149 return KEY_foreach;
10150 }
10151
10152 goto unknown;
10153
10154 case 'g':
10155 if (name[1] == 'e' &&
10156 name[2] == 't' &&
10157 name[3] == 'p')
10158 {
10159 switch (name[4])
10160 {
10161 case 'g':
10162 if (name[5] == 'r' &&
10163 name[6] == 'p')
10164 { /* getpgrp */
10165 return -KEY_getpgrp;
10166 }
10167
10168 goto unknown;
10169
10170 case 'p':
10171 if (name[5] == 'i' &&
10172 name[6] == 'd')
10173 { /* getppid */
10174 return -KEY_getppid;
10175 }
10176
10177 goto unknown;
10178
10179 default:
10180 goto unknown;
10181 }
10182 }
10183
10184 goto unknown;
10185
10186 case 'l':
10187 if (name[1] == 'c' &&
10188 name[2] == 'f' &&
10189 name[3] == 'i' &&
10190 name[4] == 'r' &&
10191 name[5] == 's' &&
10192 name[6] == 't')
10193 { /* lcfirst */
10194 return -KEY_lcfirst;
10195 }
10196
10197 goto unknown;
10198
10199 case 'o':
10200 if (name[1] == 'p' &&
10201 name[2] == 'e' &&
10202 name[3] == 'n' &&
10203 name[4] == 'd' &&
10204 name[5] == 'i' &&
10205 name[6] == 'r')
10206 { /* opendir */
10207 return -KEY_opendir;
10208 }
10209
10210 goto unknown;
10211
10212 case 'p':
10213 if (name[1] == 'a' &&
10214 name[2] == 'c' &&
10215 name[3] == 'k' &&
10216 name[4] == 'a' &&
10217 name[5] == 'g' &&
10218 name[6] == 'e')
10219 { /* package */
10220 return KEY_package;
10221 }
10222
10223 goto unknown;
10224
10225 case 'r':
10226 if (name[1] == 'e')
10227 {
10228 switch (name[2])
10229 {
10230 case 'a':
10231 if (name[3] == 'd' &&
10232 name[4] == 'd' &&
10233 name[5] == 'i' &&
10234 name[6] == 'r')
10235 { /* readdir */
10236 return -KEY_readdir;
10237 }
10238
10239 goto unknown;
10240
10241 case 'q':
10242 if (name[3] == 'u' &&
10243 name[4] == 'i' &&
10244 name[5] == 'r' &&
10245 name[6] == 'e')
10246 { /* require */
10247 return KEY_require;
10248 }
10249
10250 goto unknown;
10251
10252 case 'v':
10253 if (name[3] == 'e' &&
10254 name[4] == 'r' &&
10255 name[5] == 's' &&
10256 name[6] == 'e')
10257 { /* reverse */
10258 return -KEY_reverse;
10259 }
10260
10261 goto unknown;
10262
10263 default:
10264 goto unknown;
10265 }
10266 }
10267
10268 goto unknown;
10269
10270 case 's':
10271 switch (name[1])
10272 {
10273 case 'e':
10274 switch (name[2])
10275 {
10276 case 'e':
10277 if (name[3] == 'k' &&
10278 name[4] == 'd' &&
10279 name[5] == 'i' &&
10280 name[6] == 'r')
10281 { /* seekdir */
10282 return -KEY_seekdir;
10283 }
10284
10285 goto unknown;
10286
10287 case 't':
10288 if (name[3] == 'p' &&
10289 name[4] == 'g' &&
10290 name[5] == 'r' &&
10291 name[6] == 'p')
10292 { /* setpgrp */
10293 return -KEY_setpgrp;
10294 }
10295
10296 goto unknown;
10297
10298 default:
10299 goto unknown;
10300 }
10301
10302 case 'h':
10303 if (name[2] == 'm' &&
10304 name[3] == 'r' &&
10305 name[4] == 'e' &&
10306 name[5] == 'a' &&
10307 name[6] == 'd')
10308 { /* shmread */
10309 return -KEY_shmread;
10310 }
10311
10312 goto unknown;
10313
10314 case 'p':
10315 if (name[2] == 'r' &&
10316 name[3] == 'i' &&
10317 name[4] == 'n' &&
10318 name[5] == 't' &&
10319 name[6] == 'f')
10320 { /* sprintf */
10321 return -KEY_sprintf;
10322 }
10323
10324 goto unknown;
10325
10326 case 'y':
10327 switch (name[2])
10328 {
10329 case 'm':
10330 if (name[3] == 'l' &&
10331 name[4] == 'i' &&
10332 name[5] == 'n' &&
10333 name[6] == 'k')
10334 { /* symlink */
10335 return -KEY_symlink;
10336 }
10337
10338 goto unknown;
10339
10340 case 's':
10341 switch (name[3])
10342 {
10343 case 'c':
10344 if (name[4] == 'a' &&
10345 name[5] == 'l' &&
10346 name[6] == 'l')
10347 { /* syscall */
10348 return -KEY_syscall;
10349 }
10350
10351 goto unknown;
10352
10353 case 'o':
10354 if (name[4] == 'p' &&
10355 name[5] == 'e' &&
10356 name[6] == 'n')
10357 { /* sysopen */
10358 return -KEY_sysopen;
10359 }
10360
10361 goto unknown;
10362
10363 case 'r':
10364 if (name[4] == 'e' &&
10365 name[5] == 'a' &&
10366 name[6] == 'd')
10367 { /* sysread */
10368 return -KEY_sysread;
10369 }
10370
10371 goto unknown;
10372
10373 case 's':
10374 if (name[4] == 'e' &&
10375 name[5] == 'e' &&
10376 name[6] == 'k')
10377 { /* sysseek */
10378 return -KEY_sysseek;
10379 }
10380
10381 goto unknown;
10382
10383 default:
10384 goto unknown;
10385 }
10386
10387 default:
10388 goto unknown;
10389 }
10390
10391 default:
10392 goto unknown;
10393 }
10394
10395 case 't':
10396 if (name[1] == 'e' &&
10397 name[2] == 'l' &&
10398 name[3] == 'l' &&
10399 name[4] == 'd' &&
10400 name[5] == 'i' &&
10401 name[6] == 'r')
10402 { /* telldir */
10403 return -KEY_telldir;
10404 }
10405
10406 goto unknown;
10407
10408 case 'u':
10409 switch (name[1])
10410 {
10411 case 'c':
10412 if (name[2] == 'f' &&
10413 name[3] == 'i' &&
10414 name[4] == 'r' &&
10415 name[5] == 's' &&
10416 name[6] == 't')
10417 { /* ucfirst */
10418 return -KEY_ucfirst;
10419 }
10420
10421 goto unknown;
10422
10423 case 'n':
10424 if (name[2] == 's' &&
10425 name[3] == 'h' &&
10426 name[4] == 'i' &&
10427 name[5] == 'f' &&
10428 name[6] == 't')
10429 { /* unshift */
10430 return -KEY_unshift;
10431 }
10432
10433 goto unknown;
10434
10435 default:
10436 goto unknown;
10437 }
10438
10439 case 'w':
10440 if (name[1] == 'a' &&
10441 name[2] == 'i' &&
10442 name[3] == 't' &&
10443 name[4] == 'p' &&
10444 name[5] == 'i' &&
10445 name[6] == 'd')
10446 { /* waitpid */
10447 return -KEY_waitpid;
10448 }
10449
10450 goto unknown;
10451
10452 default:
10453 goto unknown;
10454 }
10455
10456 case 8: /* 26 tokens of length 8 */
10457 switch (name[0])
10458 {
10459 case 'A':
10460 if (name[1] == 'U' &&
10461 name[2] == 'T' &&
10462 name[3] == 'O' &&
10463 name[4] == 'L' &&
10464 name[5] == 'O' &&
10465 name[6] == 'A' &&
10466 name[7] == 'D')
10467 { /* AUTOLOAD */
10468 return KEY_AUTOLOAD;
10469 }
10470
10471 goto unknown;
10472
10473 case '_':
10474 if (name[1] == '_')
10475 {
10476 switch (name[2])
10477 {
10478 case 'D':
10479 if (name[3] == 'A' &&
10480 name[4] == 'T' &&
10481 name[5] == 'A' &&
10482 name[6] == '_' &&
10483 name[7] == '_')
10484 { /* __DATA__ */
10485 return KEY___DATA__;
10486 }
10487
10488 goto unknown;
10489
10490 case 'F':
10491 if (name[3] == 'I' &&
10492 name[4] == 'L' &&
10493 name[5] == 'E' &&
10494 name[6] == '_' &&
10495 name[7] == '_')
10496 { /* __FILE__ */
10497 return -KEY___FILE__;
10498 }
10499
10500 goto unknown;
10501
10502 case 'L':
10503 if (name[3] == 'I' &&
10504 name[4] == 'N' &&
10505 name[5] == 'E' &&
10506 name[6] == '_' &&
10507 name[7] == '_')
10508 { /* __LINE__ */
10509 return -KEY___LINE__;
10510 }
10511
10512 goto unknown;
10513
10514 default:
10515 goto unknown;
10516 }
10517 }
10518
10519 goto unknown;
10520
10521 case 'c':
10522 switch (name[1])
10523 {
10524 case 'l':
10525 if (name[2] == 'o' &&
10526 name[3] == 's' &&
10527 name[4] == 'e' &&
10528 name[5] == 'd' &&
10529 name[6] == 'i' &&
10530 name[7] == 'r')
10531 { /* closedir */
10532 return -KEY_closedir;
10533 }
10534
10535 goto unknown;
10536
10537 case 'o':
10538 if (name[2] == 'n' &&
10539 name[3] == 't' &&
10540 name[4] == 'i' &&
10541 name[5] == 'n' &&
10542 name[6] == 'u' &&
10543 name[7] == 'e')
10544 { /* continue */
10545 return -KEY_continue;
10546 }
10547
10548 goto unknown;
10549
10550 default:
10551 goto unknown;
10552 }
10553
10554 case 'd':
10555 if (name[1] == 'b' &&
10556 name[2] == 'm' &&
10557 name[3] == 'c' &&
10558 name[4] == 'l' &&
10559 name[5] == 'o' &&
10560 name[6] == 's' &&
10561 name[7] == 'e')
10562 { /* dbmclose */
10563 return -KEY_dbmclose;
10564 }
10565
10566 goto unknown;
10567
10568 case 'e':
10569 if (name[1] == 'n' &&
10570 name[2] == 'd')
10571 {
10572 switch (name[3])
10573 {
10574 case 'g':
10575 if (name[4] == 'r' &&
10576 name[5] == 'e' &&
10577 name[6] == 'n' &&
10578 name[7] == 't')
10579 { /* endgrent */
10580 return -KEY_endgrent;
10581 }
10582
10583 goto unknown;
10584
10585 case 'p':
10586 if (name[4] == 'w' &&
10587 name[5] == 'e' &&
10588 name[6] == 'n' &&
10589 name[7] == 't')
10590 { /* endpwent */
10591 return -KEY_endpwent;
10592 }
10593
10594 goto unknown;
10595
10596 default:
10597 goto unknown;
10598 }
10599 }
10600
10601 goto unknown;
10602
10603 case 'f':
10604 if (name[1] == 'o' &&
10605 name[2] == 'r' &&
10606 name[3] == 'm' &&
10607 name[4] == 'l' &&
10608 name[5] == 'i' &&
10609 name[6] == 'n' &&
10610 name[7] == 'e')
10611 { /* formline */
10612 return -KEY_formline;
10613 }
10614
10615 goto unknown;
10616
10617 case 'g':
10618 if (name[1] == 'e' &&
10619 name[2] == 't')
10620 {
10621 switch (name[3])
10622 {
10623 case 'g':
10624 if (name[4] == 'r')
10625 {
10626 switch (name[5])
10627 {
10628 case 'e':
10629 if (name[6] == 'n' &&
10630 name[7] == 't')
10631 { /* getgrent */
10632 return -KEY_getgrent;
10633 }
10634
10635 goto unknown;
10636
10637 case 'g':
10638 if (name[6] == 'i' &&
10639 name[7] == 'd')
10640 { /* getgrgid */
10641 return -KEY_getgrgid;
10642 }
10643
10644 goto unknown;
10645
10646 case 'n':
10647 if (name[6] == 'a' &&
10648 name[7] == 'm')
10649 { /* getgrnam */
10650 return -KEY_getgrnam;
10651 }
10652
10653 goto unknown;
10654
10655 default:
10656 goto unknown;
10657 }
10658 }
10659
10660 goto unknown;
10661
10662 case 'l':
10663 if (name[4] == 'o' &&
10664 name[5] == 'g' &&
10665 name[6] == 'i' &&
10666 name[7] == 'n')
10667 { /* getlogin */
10668 return -KEY_getlogin;
10669 }
10670
10671 goto unknown;
10672
10673 case 'p':
10674 if (name[4] == 'w')
10675 {
10676 switch (name[5])
10677 {
10678 case 'e':
10679 if (name[6] == 'n' &&
10680 name[7] == 't')
10681 { /* getpwent */
10682 return -KEY_getpwent;
10683 }
10684
10685 goto unknown;
10686
10687 case 'n':
10688 if (name[6] == 'a' &&
10689 name[7] == 'm')
10690 { /* getpwnam */
10691 return -KEY_getpwnam;
10692 }
10693
10694 goto unknown;
10695
10696 case 'u':
10697 if (name[6] == 'i' &&
10698 name[7] == 'd')
10699 { /* getpwuid */
10700 return -KEY_getpwuid;
10701 }
10702
10703 goto unknown;
10704
10705 default:
10706 goto unknown;
10707 }
10708 }
10709
10710 goto unknown;
10711
10712 default:
10713 goto unknown;
10714 }
10715 }
10716
10717 goto unknown;
10718
10719 case 'r':
10720 if (name[1] == 'e' &&
10721 name[2] == 'a' &&
10722 name[3] == 'd')
10723 {
10724 switch (name[4])
10725 {
10726 case 'l':
10727 if (name[5] == 'i' &&
10728 name[6] == 'n')
10729 {
10730 switch (name[7])
10731 {
10732 case 'e':
10733 { /* readline */
10734 return -KEY_readline;
10735 }
10736
4c3bbe0f
MHM
10737 case 'k':
10738 { /* readlink */
10739 return -KEY_readlink;
10740 }
10741
4c3bbe0f
MHM
10742 default:
10743 goto unknown;
10744 }
10745 }
10746
10747 goto unknown;
10748
10749 case 'p':
10750 if (name[5] == 'i' &&
10751 name[6] == 'p' &&
10752 name[7] == 'e')
10753 { /* readpipe */
10754 return -KEY_readpipe;
10755 }
10756
10757 goto unknown;
10758
10759 default:
10760 goto unknown;
10761 }
10762 }
10763
10764 goto unknown;
10765
10766 case 's':
10767 switch (name[1])
10768 {
10769 case 'e':
10770 if (name[2] == 't')
10771 {
10772 switch (name[3])
10773 {
10774 case 'g':
10775 if (name[4] == 'r' &&
10776 name[5] == 'e' &&
10777 name[6] == 'n' &&
10778 name[7] == 't')
10779 { /* setgrent */
10780 return -KEY_setgrent;
10781 }
10782
10783 goto unknown;
10784
10785 case 'p':
10786 if (name[4] == 'w' &&
10787 name[5] == 'e' &&
10788 name[6] == 'n' &&
10789 name[7] == 't')
10790 { /* setpwent */
10791 return -KEY_setpwent;
10792 }
10793
10794 goto unknown;
10795
10796 default:
10797 goto unknown;
10798 }
10799 }
10800
10801 goto unknown;
10802
10803 case 'h':
10804 switch (name[2])
10805 {
10806 case 'm':
10807 if (name[3] == 'w' &&
10808 name[4] == 'r' &&
10809 name[5] == 'i' &&
10810 name[6] == 't' &&
10811 name[7] == 'e')
10812 { /* shmwrite */
10813 return -KEY_shmwrite;
10814 }
10815
10816 goto unknown;
10817
10818 case 'u':
10819 if (name[3] == 't' &&
10820 name[4] == 'd' &&
10821 name[5] == 'o' &&
10822 name[6] == 'w' &&
10823 name[7] == 'n')
10824 { /* shutdown */
10825 return -KEY_shutdown;
10826 }
10827
10828 goto unknown;
10829
10830 default:
10831 goto unknown;
10832 }
10833
10834 case 'y':
10835 if (name[2] == 's' &&
10836 name[3] == 'w' &&
10837 name[4] == 'r' &&
10838 name[5] == 'i' &&
10839 name[6] == 't' &&
10840 name[7] == 'e')
10841 { /* syswrite */
10842 return -KEY_syswrite;
10843 }
10844
10845 goto unknown;
10846
10847 default:
10848 goto unknown;
10849 }
10850
10851 case 't':
10852 if (name[1] == 'r' &&
10853 name[2] == 'u' &&
10854 name[3] == 'n' &&
10855 name[4] == 'c' &&
10856 name[5] == 'a' &&
10857 name[6] == 't' &&
10858 name[7] == 'e')
10859 { /* truncate */
10860 return -KEY_truncate;
10861 }
10862
10863 goto unknown;
10864
10865 default:
10866 goto unknown;
10867 }
10868
3c10abe3 10869 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10870 switch (name[0])
10871 {
3c10abe3
AG
10872 case 'U':
10873 if (name[1] == 'N' &&
10874 name[2] == 'I' &&
10875 name[3] == 'T' &&
10876 name[4] == 'C' &&
10877 name[5] == 'H' &&
10878 name[6] == 'E' &&
10879 name[7] == 'C' &&
10880 name[8] == 'K')
10881 { /* UNITCHECK */
10882 return KEY_UNITCHECK;
10883 }
10884
10885 goto unknown;
10886
4c3bbe0f
MHM
10887 case 'e':
10888 if (name[1] == 'n' &&
10889 name[2] == 'd' &&
10890 name[3] == 'n' &&
10891 name[4] == 'e' &&
10892 name[5] == 't' &&
10893 name[6] == 'e' &&
10894 name[7] == 'n' &&
10895 name[8] == 't')
10896 { /* endnetent */
10897 return -KEY_endnetent;
10898 }
10899
10900 goto unknown;
10901
10902 case 'g':
10903 if (name[1] == 'e' &&
10904 name[2] == 't' &&
10905 name[3] == 'n' &&
10906 name[4] == 'e' &&
10907 name[5] == 't' &&
10908 name[6] == 'e' &&
10909 name[7] == 'n' &&
10910 name[8] == 't')
10911 { /* getnetent */
10912 return -KEY_getnetent;
10913 }
10914
10915 goto unknown;
10916
10917 case 'l':
10918 if (name[1] == 'o' &&
10919 name[2] == 'c' &&
10920 name[3] == 'a' &&
10921 name[4] == 'l' &&
10922 name[5] == 't' &&
10923 name[6] == 'i' &&
10924 name[7] == 'm' &&
10925 name[8] == 'e')
10926 { /* localtime */
10927 return -KEY_localtime;
10928 }
10929
10930 goto unknown;
10931
10932 case 'p':
10933 if (name[1] == 'r' &&
10934 name[2] == 'o' &&
10935 name[3] == 't' &&
10936 name[4] == 'o' &&
10937 name[5] == 't' &&
10938 name[6] == 'y' &&
10939 name[7] == 'p' &&
10940 name[8] == 'e')
10941 { /* prototype */
10942 return KEY_prototype;
10943 }
10944
10945 goto unknown;
10946
10947 case 'q':
10948 if (name[1] == 'u' &&
10949 name[2] == 'o' &&
10950 name[3] == 't' &&
10951 name[4] == 'e' &&
10952 name[5] == 'm' &&
10953 name[6] == 'e' &&
10954 name[7] == 't' &&
10955 name[8] == 'a')
10956 { /* quotemeta */
10957 return -KEY_quotemeta;
10958 }
10959
10960 goto unknown;
10961
10962 case 'r':
10963 if (name[1] == 'e' &&
10964 name[2] == 'w' &&
10965 name[3] == 'i' &&
10966 name[4] == 'n' &&
10967 name[5] == 'd' &&
10968 name[6] == 'd' &&
10969 name[7] == 'i' &&
10970 name[8] == 'r')
10971 { /* rewinddir */
10972 return -KEY_rewinddir;
10973 }
10974
10975 goto unknown;
10976
10977 case 's':
10978 if (name[1] == 'e' &&
10979 name[2] == 't' &&
10980 name[3] == 'n' &&
10981 name[4] == 'e' &&
10982 name[5] == 't' &&
10983 name[6] == 'e' &&
10984 name[7] == 'n' &&
10985 name[8] == 't')
10986 { /* setnetent */
10987 return -KEY_setnetent;
10988 }
10989
10990 goto unknown;
10991
10992 case 'w':
10993 if (name[1] == 'a' &&
10994 name[2] == 'n' &&
10995 name[3] == 't' &&
10996 name[4] == 'a' &&
10997 name[5] == 'r' &&
10998 name[6] == 'r' &&
10999 name[7] == 'a' &&
11000 name[8] == 'y')
11001 { /* wantarray */
11002 return -KEY_wantarray;
11003 }
11004
11005 goto unknown;
11006
11007 default:
11008 goto unknown;
11009 }
11010
11011 case 10: /* 9 tokens of length 10 */
11012 switch (name[0])
11013 {
11014 case 'e':
11015 if (name[1] == 'n' &&
11016 name[2] == 'd')
11017 {
11018 switch (name[3])
11019 {
11020 case 'h':
11021 if (name[4] == 'o' &&
11022 name[5] == 's' &&
11023 name[6] == 't' &&
11024 name[7] == 'e' &&
11025 name[8] == 'n' &&
11026 name[9] == 't')
11027 { /* endhostent */
11028 return -KEY_endhostent;
11029 }
11030
11031 goto unknown;
11032
11033 case 's':
11034 if (name[4] == 'e' &&
11035 name[5] == 'r' &&
11036 name[6] == 'v' &&
11037 name[7] == 'e' &&
11038 name[8] == 'n' &&
11039 name[9] == 't')
11040 { /* endservent */
11041 return -KEY_endservent;
11042 }
11043
11044 goto unknown;
11045
11046 default:
11047 goto unknown;
11048 }
11049 }
11050
11051 goto unknown;
11052
11053 case 'g':
11054 if (name[1] == 'e' &&
11055 name[2] == 't')
11056 {
11057 switch (name[3])
11058 {
11059 case 'h':
11060 if (name[4] == 'o' &&
11061 name[5] == 's' &&
11062 name[6] == 't' &&
11063 name[7] == 'e' &&
11064 name[8] == 'n' &&
11065 name[9] == 't')
11066 { /* gethostent */
11067 return -KEY_gethostent;
11068 }
11069
11070 goto unknown;
11071
11072 case 's':
11073 switch (name[4])
11074 {
11075 case 'e':
11076 if (name[5] == 'r' &&
11077 name[6] == 'v' &&
11078 name[7] == 'e' &&
11079 name[8] == 'n' &&
11080 name[9] == 't')
11081 { /* getservent */
11082 return -KEY_getservent;
11083 }
11084
11085 goto unknown;
11086
11087 case 'o':
11088 if (name[5] == 'c' &&
11089 name[6] == 'k' &&
11090 name[7] == 'o' &&
11091 name[8] == 'p' &&
11092 name[9] == 't')
11093 { /* getsockopt */
11094 return -KEY_getsockopt;
11095 }
11096
11097 goto unknown;
11098
11099 default:
11100 goto unknown;
11101 }
11102
11103 default:
11104 goto unknown;
11105 }
11106 }
11107
11108 goto unknown;
11109
11110 case 's':
11111 switch (name[1])
11112 {
11113 case 'e':
11114 if (name[2] == 't')
11115 {
11116 switch (name[3])
11117 {
11118 case 'h':
11119 if (name[4] == 'o' &&
11120 name[5] == 's' &&
11121 name[6] == 't' &&
11122 name[7] == 'e' &&
11123 name[8] == 'n' &&
11124 name[9] == 't')
11125 { /* sethostent */
11126 return -KEY_sethostent;
11127 }
11128
11129 goto unknown;
11130
11131 case 's':
11132 switch (name[4])
11133 {
11134 case 'e':
11135 if (name[5] == 'r' &&
11136 name[6] == 'v' &&
11137 name[7] == 'e' &&
11138 name[8] == 'n' &&
11139 name[9] == 't')
11140 { /* setservent */
11141 return -KEY_setservent;
11142 }
11143
11144 goto unknown;
11145
11146 case 'o':
11147 if (name[5] == 'c' &&
11148 name[6] == 'k' &&
11149 name[7] == 'o' &&
11150 name[8] == 'p' &&
11151 name[9] == 't')
11152 { /* setsockopt */
11153 return -KEY_setsockopt;
11154 }
11155
11156 goto unknown;
11157
11158 default:
11159 goto unknown;
11160 }
11161
11162 default:
11163 goto unknown;
11164 }
11165 }
11166
11167 goto unknown;
11168
11169 case 'o':
11170 if (name[2] == 'c' &&
11171 name[3] == 'k' &&
11172 name[4] == 'e' &&
11173 name[5] == 't' &&
11174 name[6] == 'p' &&
11175 name[7] == 'a' &&
11176 name[8] == 'i' &&
11177 name[9] == 'r')
11178 { /* socketpair */
11179 return -KEY_socketpair;
11180 }
11181
11182 goto unknown;
11183
11184 default:
11185 goto unknown;
11186 }
11187
11188 default:
11189 goto unknown;
e2e1dd5a 11190 }
4c3bbe0f
MHM
11191
11192 case 11: /* 8 tokens of length 11 */
11193 switch (name[0])
11194 {
11195 case '_':
11196 if (name[1] == '_' &&
11197 name[2] == 'P' &&
11198 name[3] == 'A' &&
11199 name[4] == 'C' &&
11200 name[5] == 'K' &&
11201 name[6] == 'A' &&
11202 name[7] == 'G' &&
11203 name[8] == 'E' &&
11204 name[9] == '_' &&
11205 name[10] == '_')
11206 { /* __PACKAGE__ */
11207 return -KEY___PACKAGE__;
11208 }
11209
11210 goto unknown;
11211
11212 case 'e':
11213 if (name[1] == 'n' &&
11214 name[2] == 'd' &&
11215 name[3] == 'p' &&
11216 name[4] == 'r' &&
11217 name[5] == 'o' &&
11218 name[6] == 't' &&
11219 name[7] == 'o' &&
11220 name[8] == 'e' &&
11221 name[9] == 'n' &&
11222 name[10] == 't')
11223 { /* endprotoent */
11224 return -KEY_endprotoent;
11225 }
11226
11227 goto unknown;
11228
11229 case 'g':
11230 if (name[1] == 'e' &&
11231 name[2] == 't')
11232 {
11233 switch (name[3])
11234 {
11235 case 'p':
11236 switch (name[4])
11237 {
11238 case 'e':
11239 if (name[5] == 'e' &&
11240 name[6] == 'r' &&
11241 name[7] == 'n' &&
11242 name[8] == 'a' &&
11243 name[9] == 'm' &&
11244 name[10] == 'e')
11245 { /* getpeername */
11246 return -KEY_getpeername;
11247 }
11248
11249 goto unknown;
11250
11251 case 'r':
11252 switch (name[5])
11253 {
11254 case 'i':
11255 if (name[6] == 'o' &&
11256 name[7] == 'r' &&
11257 name[8] == 'i' &&
11258 name[9] == 't' &&
11259 name[10] == 'y')
11260 { /* getpriority */
11261 return -KEY_getpriority;
11262 }
11263
11264 goto unknown;
11265
11266 case 'o':
11267 if (name[6] == 't' &&
11268 name[7] == 'o' &&
11269 name[8] == 'e' &&
11270 name[9] == 'n' &&
11271 name[10] == 't')
11272 { /* getprotoent */
11273 return -KEY_getprotoent;
11274 }
11275
11276 goto unknown;
11277
11278 default:
11279 goto unknown;
11280 }
11281
11282 default:
11283 goto unknown;
11284 }
11285
11286 case 's':
11287 if (name[4] == 'o' &&
11288 name[5] == 'c' &&
11289 name[6] == 'k' &&
11290 name[7] == 'n' &&
11291 name[8] == 'a' &&
11292 name[9] == 'm' &&
11293 name[10] == 'e')
11294 { /* getsockname */
11295 return -KEY_getsockname;
11296 }
11297
11298 goto unknown;
11299
11300 default:
11301 goto unknown;
11302 }
11303 }
11304
11305 goto unknown;
11306
11307 case 's':
11308 if (name[1] == 'e' &&
11309 name[2] == 't' &&
11310 name[3] == 'p' &&
11311 name[4] == 'r')
11312 {
11313 switch (name[5])
11314 {
11315 case 'i':
11316 if (name[6] == 'o' &&
11317 name[7] == 'r' &&
11318 name[8] == 'i' &&
11319 name[9] == 't' &&
11320 name[10] == 'y')
11321 { /* setpriority */
11322 return -KEY_setpriority;
11323 }
11324
11325 goto unknown;
11326
11327 case 'o':
11328 if (name[6] == 't' &&
11329 name[7] == 'o' &&
11330 name[8] == 'e' &&
11331 name[9] == 'n' &&
11332 name[10] == 't')
11333 { /* setprotoent */
11334 return -KEY_setprotoent;
11335 }
11336
11337 goto unknown;
11338
11339 default:
11340 goto unknown;
11341 }
11342 }
11343
11344 goto unknown;
11345
11346 default:
11347 goto unknown;
e2e1dd5a 11348 }
4c3bbe0f
MHM
11349
11350 case 12: /* 2 tokens of length 12 */
11351 if (name[0] == 'g' &&
11352 name[1] == 'e' &&
11353 name[2] == 't' &&
11354 name[3] == 'n' &&
11355 name[4] == 'e' &&
11356 name[5] == 't' &&
11357 name[6] == 'b' &&
11358 name[7] == 'y')
11359 {
11360 switch (name[8])
11361 {
11362 case 'a':
11363 if (name[9] == 'd' &&
11364 name[10] == 'd' &&
11365 name[11] == 'r')
11366 { /* getnetbyaddr */
11367 return -KEY_getnetbyaddr;
11368 }
11369
11370 goto unknown;
11371
11372 case 'n':
11373 if (name[9] == 'a' &&
11374 name[10] == 'm' &&
11375 name[11] == 'e')
11376 { /* getnetbyname */
11377 return -KEY_getnetbyname;
11378 }
11379
11380 goto unknown;
11381
11382 default:
11383 goto unknown;
11384 }
e2e1dd5a 11385 }
4c3bbe0f
MHM
11386
11387 goto unknown;
11388
11389 case 13: /* 4 tokens of length 13 */
11390 if (name[0] == 'g' &&
11391 name[1] == 'e' &&
11392 name[2] == 't')
11393 {
11394 switch (name[3])
11395 {
11396 case 'h':
11397 if (name[4] == 'o' &&
11398 name[5] == 's' &&
11399 name[6] == 't' &&
11400 name[7] == 'b' &&
11401 name[8] == 'y')
11402 {
11403 switch (name[9])
11404 {
11405 case 'a':
11406 if (name[10] == 'd' &&
11407 name[11] == 'd' &&
11408 name[12] == 'r')
11409 { /* gethostbyaddr */
11410 return -KEY_gethostbyaddr;
11411 }
11412
11413 goto unknown;
11414
11415 case 'n':
11416 if (name[10] == 'a' &&
11417 name[11] == 'm' &&
11418 name[12] == 'e')
11419 { /* gethostbyname */
11420 return -KEY_gethostbyname;
11421 }
11422
11423 goto unknown;
11424
11425 default:
11426 goto unknown;
11427 }
11428 }
11429
11430 goto unknown;
11431
11432 case 's':
11433 if (name[4] == 'e' &&
11434 name[5] == 'r' &&
11435 name[6] == 'v' &&
11436 name[7] == 'b' &&
11437 name[8] == 'y')
11438 {
11439 switch (name[9])
11440 {
11441 case 'n':
11442 if (name[10] == 'a' &&
11443 name[11] == 'm' &&
11444 name[12] == 'e')
11445 { /* getservbyname */
11446 return -KEY_getservbyname;
11447 }
11448
11449 goto unknown;
11450
11451 case 'p':
11452 if (name[10] == 'o' &&
11453 name[11] == 'r' &&
11454 name[12] == 't')
11455 { /* getservbyport */
11456 return -KEY_getservbyport;
11457 }
11458
11459 goto unknown;
11460
11461 default:
11462 goto unknown;
11463 }
11464 }
11465
11466 goto unknown;
11467
11468 default:
11469 goto unknown;
11470 }
e2e1dd5a 11471 }
4c3bbe0f
MHM
11472
11473 goto unknown;
11474
11475 case 14: /* 1 tokens of length 14 */
11476 if (name[0] == 'g' &&
11477 name[1] == 'e' &&
11478 name[2] == 't' &&
11479 name[3] == 'p' &&
11480 name[4] == 'r' &&
11481 name[5] == 'o' &&
11482 name[6] == 't' &&
11483 name[7] == 'o' &&
11484 name[8] == 'b' &&
11485 name[9] == 'y' &&
11486 name[10] == 'n' &&
11487 name[11] == 'a' &&
11488 name[12] == 'm' &&
11489 name[13] == 'e')
11490 { /* getprotobyname */
11491 return -KEY_getprotobyname;
11492 }
11493
11494 goto unknown;
11495
11496 case 16: /* 1 tokens of length 16 */
11497 if (name[0] == 'g' &&
11498 name[1] == 'e' &&
11499 name[2] == 't' &&
11500 name[3] == 'p' &&
11501 name[4] == 'r' &&
11502 name[5] == 'o' &&
11503 name[6] == 't' &&
11504 name[7] == 'o' &&
11505 name[8] == 'b' &&
11506 name[9] == 'y' &&
11507 name[10] == 'n' &&
11508 name[11] == 'u' &&
11509 name[12] == 'm' &&
11510 name[13] == 'b' &&
11511 name[14] == 'e' &&
11512 name[15] == 'r')
11513 { /* getprotobynumber */
11514 return -KEY_getprotobynumber;
11515 }
11516
11517 goto unknown;
11518
11519 default:
11520 goto unknown;
e2e1dd5a 11521 }
4c3bbe0f
MHM
11522
11523unknown:
e2e1dd5a 11524 return 0;
a687059c
LW
11525}
11526
76e3520e 11527STATIC void
c94115d8 11528S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11529{
97aff369 11530 dVAR;
2f3197b3 11531
7918f24d
NC
11532 PERL_ARGS_ASSERT_CHECKCOMMA;
11533
d008e5eb 11534 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11535 if (ckWARN(WARN_SYNTAX)) {
11536 int level = 1;
26ff0806 11537 const char *w;
d008e5eb
GS
11538 for (w = s+2; *w && level; w++) {
11539 if (*w == '(')
11540 ++level;
11541 else if (*w == ')')
11542 --level;
11543 }
888fea98
NC
11544 while (isSPACE(*w))
11545 ++w;
b1439985
RGS
11546 /* the list of chars below is for end of statements or
11547 * block / parens, boolean operators (&&, ||, //) and branch
11548 * constructs (or, and, if, until, unless, while, err, for).
11549 * Not a very solid hack... */
11550 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11551 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11552 "%s (...) interpreted as function",name);
d008e5eb 11553 }
2f3197b3 11554 }
3280af22 11555 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11556 s++;
a687059c
LW
11557 if (*s == '(')
11558 s++;
3280af22 11559 while (s < PL_bufend && isSPACE(*s))
a687059c 11560 s++;
7e2040f0 11561 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11562 const char * const w = s++;
7e2040f0 11563 while (isALNUM_lazy_if(s,UTF))
a687059c 11564 s++;
3280af22 11565 while (s < PL_bufend && isSPACE(*s))
a687059c 11566 s++;
e929a76b 11567 if (*s == ',') {
c94115d8 11568 GV* gv;
5458a98a 11569 if (keyword(w, s - w, 0))
e929a76b 11570 return;
c94115d8
NC
11571
11572 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11573 if (gv && GvCVu(gv))
abbb3198 11574 return;
cea2e8a9 11575 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11576 }
11577 }
11578}
11579
423cee85
JH
11580/* Either returns sv, or mortalizes sv and returns a new SV*.
11581 Best used as sv=new_constant(..., sv, ...).
11582 If s, pv are NULL, calls subroutine with one argument,
11583 and type is used with error messages only. */
11584
b3ac6de7 11585STATIC SV *
eb0d8d16
NC
11586S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11587 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11588{
27da23d5 11589 dVAR; dSP;
890ce7af 11590 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11591 SV *res;
b3ac6de7
IZ
11592 SV **cvp;
11593 SV *cv, *typesv;
89e33a05 11594 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11595
7918f24d
NC
11596 PERL_ARGS_ASSERT_NEW_CONSTANT;
11597
f0af216f 11598 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11599 SV *msg;
11600
10edeb5d
JH
11601 why2 = (const char *)
11602 (strEQ(key,"charnames")
11603 ? "(possibly a missing \"use charnames ...\")"
11604 : "");
4e553d73 11605 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11606 (type ? type: "undef"), why2);
11607
11608 /* This is convoluted and evil ("goto considered harmful")
11609 * but I do not understand the intricacies of all the different
11610 * failure modes of %^H in here. The goal here is to make
11611 * the most probable error message user-friendly. --jhi */
11612
11613 goto msgdone;
11614
423cee85 11615 report:
4e553d73 11616 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11617 (type ? type: "undef"), why1, why2, why3);
41ab332f 11618 msgdone:
95a20fc0 11619 yyerror(SvPVX_const(msg));
423cee85
JH
11620 SvREFCNT_dec(msg);
11621 return sv;
11622 }
ff3f963a
KW
11623
11624 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11625 if (PL_error_count > 0 && strEQ(key,"charnames"))
11626 return &PL_sv_undef;
ff3f963a 11627
eb0d8d16 11628 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11629 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11630 why1 = "$^H{";
11631 why2 = key;
f0af216f 11632 why3 = "} is not defined";
423cee85 11633 goto report;
b3ac6de7
IZ
11634 }
11635 sv_2mortal(sv); /* Parent created it permanently */
11636 cv = *cvp;
423cee85 11637 if (!pv && s)
59cd0e26 11638 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11639 if (type && pv)
59cd0e26 11640 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11641 else
423cee85 11642 typesv = &PL_sv_undef;
4e553d73 11643
e788e7d3 11644 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11645 ENTER ;
11646 SAVETMPS;
4e553d73 11647
423cee85 11648 PUSHMARK(SP) ;
a5845cb7 11649 EXTEND(sp, 3);
423cee85
JH
11650 if (pv)
11651 PUSHs(pv);
b3ac6de7 11652 PUSHs(sv);
423cee85
JH
11653 if (pv)
11654 PUSHs(typesv);
b3ac6de7 11655 PUTBACK;
423cee85 11656 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11657
423cee85 11658 SPAGAIN ;
4e553d73 11659
423cee85 11660 /* Check the eval first */
9b0e499b 11661 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11662 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11663 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11664 (void)POPs;
b37c2d43 11665 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11666 }
11667 else {
11668 res = POPs;
b37c2d43 11669 SvREFCNT_inc_simple_void(res);
423cee85 11670 }
4e553d73 11671
423cee85
JH
11672 PUTBACK ;
11673 FREETMPS ;
11674 LEAVE ;
b3ac6de7 11675 POPSTACK;
4e553d73 11676
b3ac6de7 11677 if (!SvOK(res)) {
423cee85
JH
11678 why1 = "Call to &{$^H{";
11679 why2 = key;
f0af216f 11680 why3 = "}} did not return a defined value";
423cee85
JH
11681 sv = res;
11682 goto report;
9b0e499b 11683 }
423cee85 11684
9b0e499b 11685 return res;
b3ac6de7 11686}
4e553d73 11687
d0a148a6
NC
11688/* Returns a NUL terminated string, with the length of the string written to
11689 *slp
11690 */
76e3520e 11691STATIC char *
cea2e8a9 11692S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11693{
97aff369 11694 dVAR;
463ee0b2 11695 register char *d = dest;
890ce7af 11696 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11697
11698 PERL_ARGS_ASSERT_SCAN_WORD;
11699
463ee0b2 11700 for (;;) {
8903cb82 11701 if (d >= e)
cea2e8a9 11702 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11703 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11704 *d++ = *s++;
c35e046a 11705 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11706 *d++ = ':';
11707 *d++ = ':';
11708 s++;
11709 }
c35e046a 11710 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11711 *d++ = *s++;
11712 *d++ = *s++;
11713 }
fd400ab9 11714 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11715 char *t = s + UTF8SKIP(s);
c35e046a 11716 size_t len;
fd400ab9 11717 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11718 t += UTF8SKIP(t);
c35e046a
AL
11719 len = t - s;
11720 if (d + len > e)
cea2e8a9 11721 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11722 Copy(s, d, len, char);
11723 d += len;
a0ed51b3
LW
11724 s = t;
11725 }
463ee0b2
LW
11726 else {
11727 *d = '\0';
11728 *slp = d - dest;
11729 return s;
e929a76b 11730 }
378cc40b
LW
11731 }
11732}
11733
76e3520e 11734STATIC char *
f54cb97a 11735S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11736{
97aff369 11737 dVAR;
6136c704 11738 char *bracket = NULL;
748a9306 11739 char funny = *s++;
6136c704 11740 register char *d = dest;
0b3da58d 11741 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11742
7918f24d
NC
11743 PERL_ARGS_ASSERT_SCAN_IDENT;
11744
a0d0e21e 11745 if (isSPACE(*s))
29595ff2 11746 s = PEEKSPACE(s);
de3bb511 11747 if (isDIGIT(*s)) {
8903cb82 11748 while (isDIGIT(*s)) {
11749 if (d >= e)
cea2e8a9 11750 Perl_croak(aTHX_ ident_too_long);
378cc40b 11751 *d++ = *s++;
8903cb82 11752 }
378cc40b
LW
11753 }
11754 else {
463ee0b2 11755 for (;;) {
8903cb82 11756 if (d >= e)
cea2e8a9 11757 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11758 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11759 *d++ = *s++;
7e2040f0 11760 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11761 *d++ = ':';
11762 *d++ = ':';
11763 s++;
11764 }
a0d0e21e 11765 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11766 *d++ = *s++;
11767 *d++ = *s++;
11768 }
fd400ab9 11769 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11770 char *t = s + UTF8SKIP(s);
fd400ab9 11771 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11772 t += UTF8SKIP(t);
11773 if (d + (t - s) > e)
cea2e8a9 11774 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11775 Copy(s, d, t - s, char);
11776 d += t - s;
11777 s = t;
11778 }
463ee0b2
LW
11779 else
11780 break;
11781 }
378cc40b
LW
11782 }
11783 *d = '\0';
11784 d = dest;
79072805 11785 if (*d) {
3280af22
NIS
11786 if (PL_lex_state != LEX_NORMAL)
11787 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11788 return s;
378cc40b 11789 }
748a9306 11790 if (*s == '$' && s[1] &&
3792a11b 11791 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11792 {
4810e5ec 11793 return s;
5cd24f17 11794 }
79072805
LW
11795 if (*s == '{') {
11796 bracket = s;
11797 s++;
11798 }
11799 else if (ck_uni)
11800 check_uni();
93a17b20 11801 if (s < send)
79072805
LW
11802 *d = *s++;
11803 d[1] = '\0';
2b92dfce 11804 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11805 *d = toCTRL(*s);
11806 s++;
de3bb511 11807 }
79072805 11808 if (bracket) {
748a9306 11809 if (isSPACE(s[-1])) {
fa83b5b6 11810 while (s < send) {
f54cb97a 11811 const char ch = *s++;
bf4acbe4 11812 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11813 *d = ch;
11814 break;
11815 }
11816 }
748a9306 11817 }
7e2040f0 11818 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11819 d++;
a0ed51b3 11820 if (UTF) {
6136c704
AL
11821 char *end = s;
11822 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11823 end += UTF8SKIP(end);
11824 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11825 end += UTF8SKIP(end);
a0ed51b3 11826 }
6136c704
AL
11827 Copy(s, d, end - s, char);
11828 d += end - s;
11829 s = end;
a0ed51b3
LW
11830 }
11831 else {
2b92dfce 11832 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11833 *d++ = *s++;
2b92dfce 11834 if (d >= e)
cea2e8a9 11835 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11836 }
79072805 11837 *d = '\0';
c35e046a
AL
11838 while (s < send && SPACE_OR_TAB(*s))
11839 s++;
ff68c719 11840 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11841 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11842 const char * const brack =
11843 (const char *)
11844 ((*s == '[') ? "[...]" : "{...}");
9014280d 11845 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11846 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11847 funny, dest, brack, funny, dest, brack);
11848 }
79072805 11849 bracket++;
a0be28da 11850 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11851 return s;
11852 }
4e553d73
NIS
11853 }
11854 /* Handle extended ${^Foo} variables
2b92dfce
GS
11855 * 1999-02-27 mjd-perl-patch@plover.com */
11856 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11857 && isALNUM(*s))
11858 {
11859 d++;
11860 while (isALNUM(*s) && d < e) {
11861 *d++ = *s++;
11862 }
11863 if (d >= e)
cea2e8a9 11864 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11865 *d = '\0';
79072805
LW
11866 }
11867 if (*s == '}') {
11868 s++;
7df0d042 11869 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11870 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11871 PL_expect = XREF;
11872 }
d008e5eb 11873 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11874 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11875 (keyword(dest, d - dest, 0)
11876 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11877 {
c35e046a
AL
11878 if (funny == '#')
11879 funny = '@';
9014280d 11880 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11881 "Ambiguous use of %c{%s} resolved to %c%s",
11882 funny, dest, funny, dest);
11883 }
11884 }
79072805
LW
11885 }
11886 else {
11887 s = bracket; /* let the parser handle it */
93a17b20 11888 *dest = '\0';
79072805
LW
11889 }
11890 }
3280af22
NIS
11891 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11892 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11893 return s;
11894}
11895
879d0c72
NC
11896static U32
11897S_pmflag(U32 pmfl, const char ch) {
11898 switch (ch) {
11899 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11900 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11901 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11902 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11903 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11904 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11905 }
11906 return pmfl;
11907}
11908
76e3520e 11909STATIC char *
cea2e8a9 11910S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11911{
97aff369 11912 dVAR;
79072805 11913 PMOP *pm;
5db06880 11914 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11915 const char * const valid_flags =
a20207d7 11916 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11917#ifdef PERL_MAD
11918 char *modstart;
11919#endif
11920
7918f24d 11921 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11922
25c09cbf 11923 if (!s) {
6136c704 11924 const char * const delimiter = skipspace(start);
10edeb5d
JH
11925 Perl_croak(aTHX_
11926 (const char *)
11927 (*delimiter == '?'
11928 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11929 : "Search pattern not terminated" ));
25c09cbf 11930 }
bbce6d69 11931
8782bef2 11932 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11933 if (PL_multi_open == '?') {
11934 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11935 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11936
11937 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11938 allows us to restrict the list needed by reset to just the ??
11939 matches. */
11940 assert(type != OP_TRANS);
11941 if (PL_curstash) {
daba3364 11942 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11943 U32 elements;
11944 if (!mg) {
daba3364 11945 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11946 0);
11947 }
11948 elements = mg->mg_len / sizeof(PMOP**);
11949 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11950 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11951 mg->mg_len = elements * sizeof(PMOP**);
11952 PmopSTASH_set(pm,PL_curstash);
11953 }
11954 }
5db06880
NC
11955#ifdef PERL_MAD
11956 modstart = s;
11957#endif
6136c704 11958 while (*s && strchr(valid_flags, *s))
879d0c72 11959 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11960
11961 if (isALNUM(*s)) {
11962 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11963 "Having no space between pattern and following word is deprecated");
11964
11965 }
5db06880
NC
11966#ifdef PERL_MAD
11967 if (PL_madskills && modstart != s) {
11968 SV* tmptoken = newSVpvn(modstart, s - modstart);
11969 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11970 }
11971#endif
4ac733c9 11972 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11973 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11974 {
a2a5de95
NC
11975 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11976 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11977 }
11978
3280af22 11979 PL_lex_op = (OP*)pm;
6154021b 11980 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11981 return s;
11982}
11983
76e3520e 11984STATIC char *
cea2e8a9 11985S_scan_subst(pTHX_ char *start)
79072805 11986{
27da23d5 11987 dVAR;
a0d0e21e 11988 register char *s;
79072805 11989 register PMOP *pm;
4fdae800 11990 I32 first_start;
79072805 11991 I32 es = 0;
5db06880
NC
11992#ifdef PERL_MAD
11993 char *modstart;
11994#endif
79072805 11995
7918f24d
NC
11996 PERL_ARGS_ASSERT_SCAN_SUBST;
11997
6154021b 11998 pl_yylval.ival = OP_NULL;
79072805 11999
5db06880 12000 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12001
37fd879b 12002 if (!s)
cea2e8a9 12003 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 12004
3280af22 12005 if (s[-1] == PL_multi_open)
79072805 12006 s--;
5db06880
NC
12007#ifdef PERL_MAD
12008 if (PL_madskills) {
cd81e915
NC
12009 CURMAD('q', PL_thisopen);
12010 CURMAD('_', PL_thiswhite);
12011 CURMAD('E', PL_thisstuff);
12012 CURMAD('Q', PL_thisclose);
12013 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12014 }
12015#endif
79072805 12016
3280af22 12017 first_start = PL_multi_start;
5db06880 12018 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12019 if (!s) {
37fd879b 12020 if (PL_lex_stuff) {
3280af22 12021 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12022 PL_lex_stuff = NULL;
37fd879b 12023 }
cea2e8a9 12024 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 12025 }
3280af22 12026 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 12027
79072805 12028 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
12029
12030#ifdef PERL_MAD
12031 if (PL_madskills) {
cd81e915
NC
12032 CURMAD('z', PL_thisopen);
12033 CURMAD('R', PL_thisstuff);
12034 CURMAD('Z', PL_thisclose);
5db06880
NC
12035 }
12036 modstart = s;
12037#endif
12038
48c036b1 12039 while (*s) {
a20207d7 12040 if (*s == EXEC_PAT_MOD) {
a687059c 12041 s++;
2f3197b3 12042 es++;
a687059c 12043 }
a20207d7 12044 else if (strchr(S_PAT_MODS, *s))
879d0c72 12045 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
12046 else {
12047 if (isALNUM(*s)) {
12048 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12049 "Having no space between pattern and following word is deprecated");
12050
12051 }
48c036b1 12052 break;
aa78b661 12053 }
378cc40b 12054 }
79072805 12055
5db06880
NC
12056#ifdef PERL_MAD
12057 if (PL_madskills) {
12058 if (modstart != s)
12059 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12060 append_madprops(PL_thismad, (OP*)pm, 0);
12061 PL_thismad = 0;
5db06880
NC
12062 }
12063#endif
a2a5de95
NC
12064 if ((pm->op_pmflags & PMf_CONTINUE)) {
12065 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12066 }
12067
79072805 12068 if (es) {
6136c704
AL
12069 SV * const repl = newSVpvs("");
12070
0244c3a4
GS
12071 PL_sublex_info.super_bufptr = s;
12072 PL_sublex_info.super_bufend = PL_bufend;
12073 PL_multi_end = 0;
79072805 12074 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12075 while (es-- > 0) {
12076 if (es)
12077 sv_catpvs(repl, "eval ");
12078 else
12079 sv_catpvs(repl, "do ");
12080 }
6f43d98f 12081 sv_catpvs(repl, "{");
3280af22 12082 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12083 if (strchr(SvPVX(PL_lex_repl), '#'))
12084 sv_catpvs(repl, "\n");
12085 sv_catpvs(repl, "}");
25da4f38 12086 SvEVALED_on(repl);
3280af22
NIS
12087 SvREFCNT_dec(PL_lex_repl);
12088 PL_lex_repl = repl;
378cc40b 12089 }
79072805 12090
3280af22 12091 PL_lex_op = (OP*)pm;
6154021b 12092 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12093 return s;
12094}
12095
76e3520e 12096STATIC char *
cea2e8a9 12097S_scan_trans(pTHX_ char *start)
378cc40b 12098{
97aff369 12099 dVAR;
a0d0e21e 12100 register char* s;
11343788 12101 OP *o;
79072805 12102 short *tbl;
b84c11c8
NC
12103 U8 squash;
12104 U8 del;
12105 U8 complement;
bb16bae8 12106 bool nondestruct = 0;
5db06880
NC
12107#ifdef PERL_MAD
12108 char *modstart;
12109#endif
79072805 12110
7918f24d
NC
12111 PERL_ARGS_ASSERT_SCAN_TRANS;
12112
6154021b 12113 pl_yylval.ival = OP_NULL;
79072805 12114
5db06880 12115 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12116 if (!s)
cea2e8a9 12117 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12118
3280af22 12119 if (s[-1] == PL_multi_open)
2f3197b3 12120 s--;
5db06880
NC
12121#ifdef PERL_MAD
12122 if (PL_madskills) {
cd81e915
NC
12123 CURMAD('q', PL_thisopen);
12124 CURMAD('_', PL_thiswhite);
12125 CURMAD('E', PL_thisstuff);
12126 CURMAD('Q', PL_thisclose);
12127 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12128 }
12129#endif
2f3197b3 12130
5db06880 12131 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12132 if (!s) {
37fd879b 12133 if (PL_lex_stuff) {
3280af22 12134 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12135 PL_lex_stuff = NULL;
37fd879b 12136 }
cea2e8a9 12137 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12138 }
5db06880 12139 if (PL_madskills) {
cd81e915
NC
12140 CURMAD('z', PL_thisopen);
12141 CURMAD('R', PL_thisstuff);
12142 CURMAD('Z', PL_thisclose);
5db06880 12143 }
79072805 12144
a0ed51b3 12145 complement = del = squash = 0;
5db06880
NC
12146#ifdef PERL_MAD
12147 modstart = s;
12148#endif
7a1e2023
NC
12149 while (1) {
12150 switch (*s) {
12151 case 'c':
79072805 12152 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12153 break;
12154 case 'd':
a0ed51b3 12155 del = OPpTRANS_DELETE;
7a1e2023
NC
12156 break;
12157 case 's':
79072805 12158 squash = OPpTRANS_SQUASH;
7a1e2023 12159 break;
bb16bae8
FC
12160 case 'r':
12161 nondestruct = 1;
12162 break;
7a1e2023
NC
12163 default:
12164 goto no_more;
12165 }
395c3793
LW
12166 s++;
12167 }
7a1e2023 12168 no_more:
8973db79 12169
aa1f7c5b 12170 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 12171 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12172 o->op_private &= ~OPpTRANS_ALL;
12173 o->op_private |= del|squash|complement|
7948272d
NIS
12174 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12175 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12176
3280af22 12177 PL_lex_op = o;
bb16bae8 12178 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
12179
12180#ifdef PERL_MAD
12181 if (PL_madskills) {
12182 if (modstart != s)
12183 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12184 append_madprops(PL_thismad, o, 0);
12185 PL_thismad = 0;
5db06880
NC
12186 }
12187#endif
12188
79072805
LW
12189 return s;
12190}
12191
76e3520e 12192STATIC char *
cea2e8a9 12193S_scan_heredoc(pTHX_ register char *s)
79072805 12194{
97aff369 12195 dVAR;
79072805
LW
12196 SV *herewas;
12197 I32 op_type = OP_SCALAR;
12198 I32 len;
12199 SV *tmpstr;
12200 char term;
73d840c0 12201 const char *found_newline;
79072805 12202 register char *d;
fc36a67e 12203 register char *e;
4633a7c4 12204 char *peek;
f54cb97a 12205 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12206#ifdef PERL_MAD
12207 I32 stuffstart = s - SvPVX(PL_linestr);
12208 char *tstart;
12209
cd81e915 12210 PL_realtokenstart = -1;
5db06880 12211#endif
79072805 12212
7918f24d
NC
12213 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12214
79072805 12215 s += 2;
3280af22
NIS
12216 d = PL_tokenbuf;
12217 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12218 if (!outer)
79072805 12219 *d++ = '\n';
c35e046a
AL
12220 peek = s;
12221 while (SPACE_OR_TAB(*peek))
12222 peek++;
3792a11b 12223 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12224 s = peek;
79072805 12225 term = *s++;
3280af22 12226 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12227 d += len;
3280af22 12228 if (s < PL_bufend)
79072805 12229 s++;
79072805
LW
12230 }
12231 else {
12232 if (*s == '\\')
12233 s++, term = '\'';
12234 else
12235 term = '"';
7e2040f0 12236 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12237 deprecate("bare << to mean <<\"\"");
7e2040f0 12238 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12239 if (d < e)
12240 *d++ = *s;
12241 }
12242 }
3280af22 12243 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12244 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12245 *d++ = '\n';
12246 *d = '\0';
3280af22 12247 len = d - PL_tokenbuf;
5db06880
NC
12248
12249#ifdef PERL_MAD
12250 if (PL_madskills) {
12251 tstart = PL_tokenbuf + !outer;
cd81e915 12252 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12253 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12254 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12255 stuffstart = s - SvPVX(PL_linestr);
12256 }
12257#endif
6a27c188 12258#ifndef PERL_STRICT_CR
f63a84b2
LW
12259 d = strchr(s, '\r');
12260 if (d) {
b464bac0 12261 char * const olds = s;
f63a84b2 12262 s = d;
3280af22 12263 while (s < PL_bufend) {
f63a84b2
LW
12264 if (*s == '\r') {
12265 *d++ = '\n';
12266 if (*++s == '\n')
12267 s++;
12268 }
12269 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12270 *d++ = *s++;
12271 s++;
12272 }
12273 else
12274 *d++ = *s++;
12275 }
12276 *d = '\0';
3280af22 12277 PL_bufend = d;
95a20fc0 12278 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12279 s = olds;
12280 }
12281#endif
5db06880
NC
12282#ifdef PERL_MAD
12283 found_newline = 0;
12284#endif
10edeb5d 12285 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12286 herewas = newSVpvn(s,PL_bufend-s);
12287 }
12288 else {
5db06880
NC
12289#ifdef PERL_MAD
12290 herewas = newSVpvn(s-1,found_newline-s+1);
12291#else
73d840c0
AL
12292 s--;
12293 herewas = newSVpvn(s,found_newline-s);
5db06880 12294#endif
73d840c0 12295 }
5db06880
NC
12296#ifdef PERL_MAD
12297 if (PL_madskills) {
12298 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12299 if (PL_thisstuff)
12300 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12301 else
cd81e915 12302 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12303 }
12304#endif
79072805 12305 s += SvCUR(herewas);
748a9306 12306
5db06880
NC
12307#ifdef PERL_MAD
12308 stuffstart = s - SvPVX(PL_linestr);
12309
12310 if (found_newline)
12311 s--;
12312#endif
12313
7d0a29fe
NC
12314 tmpstr = newSV_type(SVt_PVIV);
12315 SvGROW(tmpstr, 80);
748a9306 12316 if (term == '\'') {
79072805 12317 op_type = OP_CONST;
45977657 12318 SvIV_set(tmpstr, -1);
748a9306
LW
12319 }
12320 else if (term == '`') {
79072805 12321 op_type = OP_BACKTICK;
45977657 12322 SvIV_set(tmpstr, '\\');
748a9306 12323 }
79072805
LW
12324
12325 CLINE;
57843af0 12326 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12327 PL_multi_open = PL_multi_close = '<';
12328 term = *PL_tokenbuf;
0244c3a4 12329 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12330 char * const bufptr = PL_sublex_info.super_bufptr;
12331 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12332 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12333 s = strchr(bufptr, '\n');
12334 if (!s)
12335 s = bufend;
12336 d = s;
12337 while (s < bufend &&
12338 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12339 if (*s++ == '\n')
57843af0 12340 CopLINE_inc(PL_curcop);
0244c3a4
GS
12341 }
12342 if (s >= bufend) {
eb160463 12343 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12344 missingterm(PL_tokenbuf);
12345 }
12346 sv_setpvn(herewas,bufptr,d-bufptr+1);
12347 sv_setpvn(tmpstr,d+1,s-d);
12348 s += len - 1;
12349 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12350 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12351
12352 s = olds;
12353 goto retval;
12354 }
12355 else if (!outer) {
79072805 12356 d = s;
3280af22
NIS
12357 while (s < PL_bufend &&
12358 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12359 if (*s++ == '\n')
57843af0 12360 CopLINE_inc(PL_curcop);
79072805 12361 }
3280af22 12362 if (s >= PL_bufend) {
eb160463 12363 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12364 missingterm(PL_tokenbuf);
79072805
LW
12365 }
12366 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12367#ifdef PERL_MAD
12368 if (PL_madskills) {
cd81e915
NC
12369 if (PL_thisstuff)
12370 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12371 else
cd81e915 12372 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12373 stuffstart = s - SvPVX(PL_linestr);
12374 }
12375#endif
79072805 12376 s += len - 1;
57843af0 12377 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12378
3280af22
NIS
12379 sv_catpvn(herewas,s,PL_bufend-s);
12380 sv_setsv(PL_linestr,herewas);
12381 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12382 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12383 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12384 }
12385 else
76f68e9b 12386 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12387 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12388#ifdef PERL_MAD
12389 if (PL_madskills) {
12390 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12391 if (PL_thisstuff)
12392 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12393 else
cd81e915 12394 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12395 }
12396#endif
f0e67a1d 12397 PL_bufptr = s;
17cc9359 12398 CopLINE_inc(PL_curcop);
f0e67a1d 12399 if (!outer || !lex_next_chunk(0)) {
eb160463 12400 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12401 missingterm(PL_tokenbuf);
79072805 12402 }
17cc9359 12403 CopLINE_dec(PL_curcop);
f0e67a1d 12404 s = PL_bufptr;
5db06880
NC
12405#ifdef PERL_MAD
12406 stuffstart = s - SvPVX(PL_linestr);
12407#endif
57843af0 12408 CopLINE_inc(PL_curcop);
3280af22 12409 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12410 PL_last_lop = PL_last_uni = NULL;
6a27c188 12411#ifndef PERL_STRICT_CR
3280af22 12412 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12413 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12414 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12415 {
3280af22
NIS
12416 PL_bufend[-2] = '\n';
12417 PL_bufend--;
95a20fc0 12418 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12419 }
3280af22
NIS
12420 else if (PL_bufend[-1] == '\r')
12421 PL_bufend[-1] = '\n';
f63a84b2 12422 }
3280af22
NIS
12423 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12424 PL_bufend[-1] = '\n';
f63a84b2 12425#endif
3280af22 12426 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12427 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12428 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12429 sv_catsv(PL_linestr,herewas);
12430 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12431 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12432 }
12433 else {
3280af22
NIS
12434 s = PL_bufend;
12435 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12436 }
12437 }
79072805 12438 s++;
0244c3a4 12439retval:
57843af0 12440 PL_multi_end = CopLINE(PL_curcop);
79072805 12441 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12442 SvPV_shrink_to_cur(tmpstr);
79072805 12443 }
8990e307 12444 SvREFCNT_dec(herewas);
2f31ce75 12445 if (!IN_BYTES) {
95a20fc0 12446 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12447 SvUTF8_on(tmpstr);
12448 else if (PL_encoding)
12449 sv_recode_to_utf8(tmpstr, PL_encoding);
12450 }
3280af22 12451 PL_lex_stuff = tmpstr;
6154021b 12452 pl_yylval.ival = op_type;
79072805
LW
12453 return s;
12454}
12455
02aa26ce
NT
12456/* scan_inputsymbol
12457 takes: current position in input buffer
12458 returns: new position in input buffer
6154021b 12459 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12460
12461 This code handles:
12462
12463 <> read from ARGV
12464 <FH> read from filehandle
12465 <pkg::FH> read from package qualified filehandle
12466 <pkg'FH> read from package qualified filehandle
12467 <$fh> read from filehandle in $fh
12468 <*.h> filename glob
12469
12470*/
12471
76e3520e 12472STATIC char *
cea2e8a9 12473S_scan_inputsymbol(pTHX_ char *start)
79072805 12474{
97aff369 12475 dVAR;
02aa26ce 12476 register char *s = start; /* current position in buffer */
1b420867 12477 char *end;
79072805 12478 I32 len;
6136c704
AL
12479 char *d = PL_tokenbuf; /* start of temp holding space */
12480 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12481
7918f24d
NC
12482 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12483
1b420867
GS
12484 end = strchr(s, '\n');
12485 if (!end)
12486 end = PL_bufend;
12487 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12488
12489 /* die if we didn't have space for the contents of the <>,
1b420867 12490 or if it didn't end, or if we see a newline
02aa26ce
NT
12491 */
12492
bb7a0f54 12493 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12494 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12495 if (s >= end)
cea2e8a9 12496 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12497
fc36a67e 12498 s++;
02aa26ce
NT
12499
12500 /* check for <$fh>
12501 Remember, only scalar variables are interpreted as filehandles by
12502 this code. Anything more complex (e.g., <$fh{$num}>) will be
12503 treated as a glob() call.
12504 This code makes use of the fact that except for the $ at the front,
12505 a scalar variable and a filehandle look the same.
12506 */
4633a7c4 12507 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12508
12509 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12510 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12511 d++;
02aa26ce
NT
12512
12513 /* If we've tried to read what we allow filehandles to look like, and
12514 there's still text left, then it must be a glob() and not a getline.
12515 Use scan_str to pull out the stuff between the <> and treat it
12516 as nothing more than a string.
12517 */
12518
3280af22 12519 if (d - PL_tokenbuf != len) {
6154021b 12520 pl_yylval.ival = OP_GLOB;
5db06880 12521 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12522 if (!s)
cea2e8a9 12523 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12524 return s;
12525 }
395c3793 12526 else {
9b3023bc 12527 bool readline_overriden = FALSE;
6136c704 12528 GV *gv_readline;
9b3023bc 12529 GV **gvp;
02aa26ce 12530 /* we're in a filehandle read situation */
3280af22 12531 d = PL_tokenbuf;
02aa26ce
NT
12532
12533 /* turn <> into <ARGV> */
79072805 12534 if (!len)
689badd5 12535 Copy("ARGV",d,5,char);
02aa26ce 12536
9b3023bc 12537 /* Check whether readline() is overriden */
fafc274c 12538 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12539 if ((gv_readline
ba979b31 12540 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12541 ||
017a3ce5 12542 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12543 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12544 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12545 readline_overriden = TRUE;
12546
02aa26ce
NT
12547 /* if <$fh>, create the ops to turn the variable into a
12548 filehandle
12549 */
79072805 12550 if (*d == '$') {
02aa26ce
NT
12551 /* try to find it in the pad for this block, otherwise find
12552 add symbol table ops
12553 */
f8f98e0a 12554 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12555 if (tmp != NOT_IN_PAD) {
00b1698f 12556 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12557 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12558 HEK * const stashname = HvNAME_HEK(stash);
12559 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12560 sv_catpvs(sym, "::");
f558d5af
JH
12561 sv_catpv(sym, d+1);
12562 d = SvPVX(sym);
12563 goto intro_sym;
12564 }
12565 else {
6136c704 12566 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12567 o->op_targ = tmp;
9b3023bc
RGS
12568 PL_lex_op = readline_overriden
12569 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12570 op_append_elem(OP_LIST, o,
9b3023bc
RGS
12571 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12572 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12573 }
a0d0e21e
LW
12574 }
12575 else {
f558d5af
JH
12576 GV *gv;
12577 ++d;
12578intro_sym:
12579 gv = gv_fetchpv(d,
12580 (PL_in_eval
12581 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12582 : GV_ADDMULTI),
f558d5af 12583 SVt_PV);
9b3023bc
RGS
12584 PL_lex_op = readline_overriden
12585 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12586 op_append_elem(OP_LIST,
9b3023bc
RGS
12587 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12588 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12589 : (OP*)newUNOP(OP_READLINE, 0,
12590 newUNOP(OP_RV2SV, 0,
12591 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12592 }
7c6fadd6
RGS
12593 if (!readline_overriden)
12594 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12595 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12596 pl_yylval.ival = OP_NULL;
79072805 12597 }
02aa26ce
NT
12598
12599 /* If it's none of the above, it must be a literal filehandle
12600 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12601 else {
6136c704 12602 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12603 PL_lex_op = readline_overriden
12604 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12605 op_append_elem(OP_LIST,
9b3023bc
RGS
12606 newGVOP(OP_GV, 0, gv),
12607 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12608 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12609 pl_yylval.ival = OP_NULL;
79072805
LW
12610 }
12611 }
02aa26ce 12612
79072805
LW
12613 return s;
12614}
12615
02aa26ce
NT
12616
12617/* scan_str
12618 takes: start position in buffer
09bef843
SB
12619 keep_quoted preserve \ on the embedded delimiter(s)
12620 keep_delims preserve the delimiters around the string
02aa26ce
NT
12621 returns: position to continue reading from buffer
12622 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12623 updates the read buffer.
12624
12625 This subroutine pulls a string out of the input. It is called for:
12626 q single quotes q(literal text)
12627 ' single quotes 'literal text'
12628 qq double quotes qq(interpolate $here please)
12629 " double quotes "interpolate $here please"
12630 qx backticks qx(/bin/ls -l)
12631 ` backticks `/bin/ls -l`
12632 qw quote words @EXPORT_OK = qw( func() $spam )
12633 m// regexp match m/this/
12634 s/// regexp substitute s/this/that/
12635 tr/// string transliterate tr/this/that/
12636 y/// string transliterate y/this/that/
12637 ($*@) sub prototypes sub foo ($)
09bef843 12638 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12639 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12640
12641 In most of these cases (all but <>, patterns and transliterate)
12642 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12643 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12644 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12645 calls scan_str().
4e553d73 12646
02aa26ce
NT
12647 It skips whitespace before the string starts, and treats the first
12648 character as the delimiter. If the delimiter is one of ([{< then
12649 the corresponding "close" character )]}> is used as the closing
12650 delimiter. It allows quoting of delimiters, and if the string has
12651 balanced delimiters ([{<>}]) it allows nesting.
12652
37fd879b
HS
12653 On success, the SV with the resulting string is put into lex_stuff or,
12654 if that is already non-NULL, into lex_repl. The second case occurs only
12655 when parsing the RHS of the special constructs s/// and tr/// (y///).
12656 For convenience, the terminating delimiter character is stuffed into
12657 SvIVX of the SV.
02aa26ce
NT
12658*/
12659
76e3520e 12660STATIC char *
09bef843 12661S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12662{
97aff369 12663 dVAR;
02aa26ce 12664 SV *sv; /* scalar value: string */
d3fcec1f 12665 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12666 register char *s = start; /* current position in the buffer */
12667 register char term; /* terminating character */
12668 register char *to; /* current position in the sv's data */
12669 I32 brackets = 1; /* bracket nesting level */
89491803 12670 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12671 I32 termcode; /* terminating char. code */
89ebb4a3 12672 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12673 STRLEN termlen; /* length of terminating string */
0331ef07 12674 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12675#ifdef PERL_MAD
12676 int stuffstart;
12677 char *tstart;
12678#endif
02aa26ce 12679
7918f24d
NC
12680 PERL_ARGS_ASSERT_SCAN_STR;
12681
02aa26ce 12682 /* skip space before the delimiter */
29595ff2
NC
12683 if (isSPACE(*s)) {
12684 s = PEEKSPACE(s);
12685 }
02aa26ce 12686
5db06880 12687#ifdef PERL_MAD
cd81e915
NC
12688 if (PL_realtokenstart >= 0) {
12689 stuffstart = PL_realtokenstart;
12690 PL_realtokenstart = -1;
5db06880
NC
12691 }
12692 else
12693 stuffstart = start - SvPVX(PL_linestr);
12694#endif
02aa26ce 12695 /* mark where we are, in case we need to report errors */
79072805 12696 CLINE;
02aa26ce
NT
12697
12698 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12699 term = *s;
220e2d4e
IH
12700 if (!UTF) {
12701 termcode = termstr[0] = term;
12702 termlen = 1;
12703 }
12704 else {
f3b9ce0f 12705 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12706 Copy(s, termstr, termlen, U8);
12707 if (!UTF8_IS_INVARIANT(term))
12708 has_utf8 = TRUE;
12709 }
b1c7b182 12710
02aa26ce 12711 /* mark where we are */
57843af0 12712 PL_multi_start = CopLINE(PL_curcop);
3280af22 12713 PL_multi_open = term;
02aa26ce
NT
12714
12715 /* find corresponding closing delimiter */
93a17b20 12716 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12717 termcode = termstr[0] = term = tmps[5];
12718
3280af22 12719 PL_multi_close = term;
79072805 12720
561b68a9
SH
12721 /* create a new SV to hold the contents. 79 is the SV's initial length.
12722 What a random number. */
7d0a29fe
NC
12723 sv = newSV_type(SVt_PVIV);
12724 SvGROW(sv, 80);
45977657 12725 SvIV_set(sv, termcode);
a0d0e21e 12726 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12727
12728 /* move past delimiter and try to read a complete string */
09bef843 12729 if (keep_delims)
220e2d4e
IH
12730 sv_catpvn(sv, s, termlen);
12731 s += termlen;
5db06880
NC
12732#ifdef PERL_MAD
12733 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12734 if (!PL_thisopen && !keep_delims) {
12735 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12736 stuffstart = s - SvPVX(PL_linestr);
12737 }
12738#endif
93a17b20 12739 for (;;) {
220e2d4e
IH
12740 if (PL_encoding && !UTF) {
12741 bool cont = TRUE;
12742
12743 while (cont) {
95a20fc0 12744 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12745 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12746 &offset, (char*)termstr, termlen);
6136c704
AL
12747 const char * const ns = SvPVX_const(PL_linestr) + offset;
12748 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12749
12750 for (; s < ns; s++) {
12751 if (*s == '\n' && !PL_rsfp)
12752 CopLINE_inc(PL_curcop);
12753 }
12754 if (!found)
12755 goto read_more_line;
12756 else {
12757 /* handle quoted delimiters */
52327caf 12758 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12759 const char *t;
95a20fc0 12760 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12761 t--;
12762 if ((svlast-1 - t) % 2) {
12763 if (!keep_quoted) {
12764 *(svlast-1) = term;
12765 *svlast = '\0';
12766 SvCUR_set(sv, SvCUR(sv) - 1);
12767 }
12768 continue;
12769 }
12770 }
12771 if (PL_multi_open == PL_multi_close) {
12772 cont = FALSE;
12773 }
12774 else {
f54cb97a
AL
12775 const char *t;
12776 char *w;
0331ef07 12777 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12778 /* At here, all closes are "was quoted" one,
12779 so we don't check PL_multi_close. */
12780 if (*t == '\\') {
12781 if (!keep_quoted && *(t+1) == PL_multi_open)
12782 t++;
12783 else
12784 *w++ = *t++;
12785 }
12786 else if (*t == PL_multi_open)
12787 brackets++;
12788
12789 *w = *t;
12790 }
12791 if (w < t) {
12792 *w++ = term;
12793 *w = '\0';
95a20fc0 12794 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12795 }
0331ef07 12796 last_off = w - SvPVX(sv);
220e2d4e
IH
12797 if (--brackets <= 0)
12798 cont = FALSE;
12799 }
12800 }
12801 }
12802 if (!keep_delims) {
12803 SvCUR_set(sv, SvCUR(sv) - 1);
12804 *SvEND(sv) = '\0';
12805 }
12806 break;
12807 }
12808
02aa26ce 12809 /* extend sv if need be */
3280af22 12810 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12811 /* set 'to' to the next character in the sv's string */
463ee0b2 12812 to = SvPVX(sv)+SvCUR(sv);
09bef843 12813
02aa26ce 12814 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12815 if (PL_multi_open == PL_multi_close) {
12816 for (; s < PL_bufend; s++,to++) {
02aa26ce 12817 /* embedded newlines increment the current line number */
3280af22 12818 if (*s == '\n' && !PL_rsfp)
57843af0 12819 CopLINE_inc(PL_curcop);
02aa26ce 12820 /* handle quoted delimiters */
3280af22 12821 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12822 if (!keep_quoted && s[1] == term)
a0d0e21e 12823 s++;
02aa26ce 12824 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12825 else
12826 *to++ = *s++;
12827 }
02aa26ce
NT
12828 /* terminate when run out of buffer (the for() condition), or
12829 have found the terminator */
220e2d4e
IH
12830 else if (*s == term) {
12831 if (termlen == 1)
12832 break;
f3b9ce0f 12833 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12834 break;
12835 }
63cd0674 12836 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12837 has_utf8 = TRUE;
93a17b20
LW
12838 *to = *s;
12839 }
12840 }
02aa26ce
NT
12841
12842 /* if the terminator isn't the same as the start character (e.g.,
12843 matched brackets), we have to allow more in the quoting, and
12844 be prepared for nested brackets.
12845 */
93a17b20 12846 else {
02aa26ce 12847 /* read until we run out of string, or we find the terminator */
3280af22 12848 for (; s < PL_bufend; s++,to++) {
02aa26ce 12849 /* embedded newlines increment the line count */
3280af22 12850 if (*s == '\n' && !PL_rsfp)
57843af0 12851 CopLINE_inc(PL_curcop);
02aa26ce 12852 /* backslashes can escape the open or closing characters */
3280af22 12853 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12854 if (!keep_quoted &&
12855 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12856 s++;
12857 else
12858 *to++ = *s++;
12859 }
02aa26ce 12860 /* allow nested opens and closes */
3280af22 12861 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12862 break;
3280af22 12863 else if (*s == PL_multi_open)
93a17b20 12864 brackets++;
63cd0674 12865 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12866 has_utf8 = TRUE;
93a17b20
LW
12867 *to = *s;
12868 }
12869 }
02aa26ce 12870 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12871 *to = '\0';
95a20fc0 12872 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12873
02aa26ce
NT
12874 /*
12875 * this next chunk reads more into the buffer if we're not done yet
12876 */
12877
b1c7b182
GS
12878 if (s < PL_bufend)
12879 break; /* handle case where we are done yet :-) */
79072805 12880
6a27c188 12881#ifndef PERL_STRICT_CR
95a20fc0 12882 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12883 if ((to[-2] == '\r' && to[-1] == '\n') ||
12884 (to[-2] == '\n' && to[-1] == '\r'))
12885 {
f63a84b2
LW
12886 to[-2] = '\n';
12887 to--;
95a20fc0 12888 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12889 }
12890 else if (to[-1] == '\r')
12891 to[-1] = '\n';
12892 }
95a20fc0 12893 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12894 to[-1] = '\n';
12895#endif
12896
220e2d4e 12897 read_more_line:
02aa26ce
NT
12898 /* if we're out of file, or a read fails, bail and reset the current
12899 line marker so we can report where the unterminated string began
12900 */
5db06880
NC
12901#ifdef PERL_MAD
12902 if (PL_madskills) {
c35e046a 12903 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12904 if (PL_thisstuff)
12905 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12906 else
cd81e915 12907 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12908 }
12909#endif
f0e67a1d
Z
12910 CopLINE_inc(PL_curcop);
12911 PL_bufptr = PL_bufend;
12912 if (!lex_next_chunk(0)) {
c07a80fd 12913 sv_free(sv);
eb160463 12914 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12915 return NULL;
79072805 12916 }
f0e67a1d 12917 s = PL_bufptr;
5db06880
NC
12918#ifdef PERL_MAD
12919 stuffstart = 0;
12920#endif
378cc40b 12921 }
4e553d73 12922
02aa26ce
NT
12923 /* at this point, we have successfully read the delimited string */
12924
220e2d4e 12925 if (!PL_encoding || UTF) {
5db06880
NC
12926#ifdef PERL_MAD
12927 if (PL_madskills) {
c35e046a 12928 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12929 const int len = s - tstart;
cd81e915 12930 if (PL_thisstuff)
c35e046a 12931 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12932 else
c35e046a 12933 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12934 if (!PL_thisclose && !keep_delims)
12935 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12936 }
12937#endif
12938
220e2d4e
IH
12939 if (keep_delims)
12940 sv_catpvn(sv, s, termlen);
12941 s += termlen;
12942 }
5db06880
NC
12943#ifdef PERL_MAD
12944 else {
12945 if (PL_madskills) {
c35e046a
AL
12946 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12947 const int len = s - tstart - termlen;
cd81e915 12948 if (PL_thisstuff)
c35e046a 12949 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12950 else
c35e046a 12951 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12952 if (!PL_thisclose && !keep_delims)
12953 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12954 }
12955 }
12956#endif
220e2d4e 12957 if (has_utf8 || PL_encoding)
b1c7b182 12958 SvUTF8_on(sv);
d0063567 12959
57843af0 12960 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12961
12962 /* if we allocated too much space, give some back */
93a17b20
LW
12963 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12964 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12965 SvPV_renew(sv, SvLEN(sv));
79072805 12966 }
02aa26ce
NT
12967
12968 /* decide whether this is the first or second quoted string we've read
12969 for this op
12970 */
4e553d73 12971
3280af22
NIS
12972 if (PL_lex_stuff)
12973 PL_lex_repl = sv;
79072805 12974 else
3280af22 12975 PL_lex_stuff = sv;
378cc40b
LW
12976 return s;
12977}
12978
02aa26ce
NT
12979/*
12980 scan_num
12981 takes: pointer to position in buffer
12982 returns: pointer to new position in buffer
6154021b 12983 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12984
12985 Read a number in any of the formats that Perl accepts:
12986
7fd134d9
JH
12987 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12988 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12989 0b[01](_?[01])*
12990 0[0-7](_?[0-7])*
12991 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12992
3280af22 12993 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12994 thing it reads.
12995
12996 If it reads a number without a decimal point or an exponent, it will
12997 try converting the number to an integer and see if it can do so
12998 without loss of precision.
12999*/
4e553d73 13000
378cc40b 13001char *
bfed75c6 13002Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 13003{
97aff369 13004 dVAR;
bfed75c6 13005 register const char *s = start; /* current position in buffer */
02aa26ce
NT
13006 register char *d; /* destination in temp buffer */
13007 register char *e; /* end of temp buffer */
86554af2 13008 NV nv; /* number read, as a double */
a0714e2c 13009 SV *sv = NULL; /* place to put the converted number */
a86a20aa 13010 bool floatit; /* boolean: int or float? */
cbbf8932 13011 const char *lastub = NULL; /* position of last underbar */
bfed75c6 13012 static char const number_too_long[] = "Number too long";
378cc40b 13013
7918f24d
NC
13014 PERL_ARGS_ASSERT_SCAN_NUM;
13015
02aa26ce
NT
13016 /* We use the first character to decide what type of number this is */
13017
378cc40b 13018 switch (*s) {
79072805 13019 default:
cea2e8a9 13020 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 13021
02aa26ce 13022 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 13023 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
13024 case '0':
13025 {
02aa26ce
NT
13026 /* variables:
13027 u holds the "number so far"
4f19785b
WSI
13028 shift the power of 2 of the base
13029 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
13030 overflowed was the number more than we can hold?
13031
13032 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
13033 we in octal/hex/binary?" indicator to disallow hex characters
13034 when in octal mode.
02aa26ce 13035 */
9e24b6e2
JH
13036 NV n = 0.0;
13037 UV u = 0;
79072805 13038 I32 shift;
9e24b6e2 13039 bool overflowed = FALSE;
61f33854 13040 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
13041 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13042 static const char* const bases[5] =
13043 { "", "binary", "", "octal", "hexadecimal" };
13044 static const char* const Bases[5] =
13045 { "", "Binary", "", "Octal", "Hexadecimal" };
13046 static const char* const maxima[5] =
13047 { "",
13048 "0b11111111111111111111111111111111",
13049 "",
13050 "037777777777",
13051 "0xffffffff" };
bfed75c6 13052 const char *base, *Base, *max;
378cc40b 13053
02aa26ce 13054 /* check for hex */
a674e8db 13055 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
13056 shift = 4;
13057 s += 2;
61f33854 13058 just_zero = FALSE;
a674e8db 13059 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13060 shift = 1;
13061 s += 2;
61f33854 13062 just_zero = FALSE;
378cc40b 13063 }
02aa26ce 13064 /* check for a decimal in disguise */
b78218b7 13065 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13066 goto decimal;
02aa26ce 13067 /* so it must be octal */
928753ea 13068 else {
378cc40b 13069 shift = 3;
928753ea
JH
13070 s++;
13071 }
13072
13073 if (*s == '_') {
a2a5de95 13074 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13075 "Misplaced _ in number");
13076 lastub = s++;
13077 }
9e24b6e2
JH
13078
13079 base = bases[shift];
13080 Base = Bases[shift];
13081 max = maxima[shift];
02aa26ce 13082
4f19785b 13083 /* read the rest of the number */
378cc40b 13084 for (;;) {
9e24b6e2 13085 /* x is used in the overflow test,
893fe2c2 13086 b is the digit we're adding on. */
9e24b6e2 13087 UV x, b;
55497cff 13088
378cc40b 13089 switch (*s) {
02aa26ce
NT
13090
13091 /* if we don't mention it, we're done */
378cc40b
LW
13092 default:
13093 goto out;
02aa26ce 13094
928753ea 13095 /* _ are ignored -- but warned about if consecutive */
de3bb511 13096 case '_':
a2a5de95
NC
13097 if (lastub && s == lastub + 1)
13098 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13099 "Misplaced _ in number");
928753ea 13100 lastub = s++;
de3bb511 13101 break;
02aa26ce
NT
13102
13103 /* 8 and 9 are not octal */
378cc40b 13104 case '8': case '9':
4f19785b 13105 if (shift == 3)
cea2e8a9 13106 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13107 /* FALL THROUGH */
02aa26ce
NT
13108
13109 /* octal digits */
4f19785b 13110 case '2': case '3': case '4':
378cc40b 13111 case '5': case '6': case '7':
4f19785b 13112 if (shift == 1)
cea2e8a9 13113 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13114 /* FALL THROUGH */
13115
13116 case '0': case '1':
02aa26ce 13117 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13118 goto digit;
02aa26ce
NT
13119
13120 /* hex digits */
378cc40b
LW
13121 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13122 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13123 /* make sure they said 0x */
378cc40b
LW
13124 if (shift != 4)
13125 goto out;
55497cff 13126 b = (*s++ & 7) + 9;
02aa26ce
NT
13127
13128 /* Prepare to put the digit we have onto the end
13129 of the number so far. We check for overflows.
13130 */
13131
55497cff 13132 digit:
61f33854 13133 just_zero = FALSE;
9e24b6e2
JH
13134 if (!overflowed) {
13135 x = u << shift; /* make room for the digit */
13136
13137 if ((x >> shift) != u
13138 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13139 overflowed = TRUE;
13140 n = (NV) u;
9b387841
NC
13141 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13142 "Integer overflow in %s number",
13143 base);
9e24b6e2
JH
13144 } else
13145 u = x | b; /* add the digit to the end */
13146 }
13147 if (overflowed) {
13148 n *= nvshift[shift];
13149 /* If an NV has not enough bits in its
13150 * mantissa to represent an UV this summing of
13151 * small low-order numbers is a waste of time
13152 * (because the NV cannot preserve the
13153 * low-order bits anyway): we could just
13154 * remember when did we overflow and in the
13155 * end just multiply n by the right
13156 * amount. */
13157 n += (NV) b;
55497cff 13158 }
378cc40b
LW
13159 break;
13160 }
13161 }
02aa26ce
NT
13162
13163 /* if we get here, we had success: make a scalar value from
13164 the number.
13165 */
378cc40b 13166 out:
928753ea
JH
13167
13168 /* final misplaced underbar check */
13169 if (s[-1] == '_') {
a2a5de95 13170 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13171 }
13172
9e24b6e2 13173 if (overflowed) {
a2a5de95
NC
13174 if (n > 4294967295.0)
13175 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13176 "%s number > %s non-portable",
13177 Base, max);
b081dd7e 13178 sv = newSVnv(n);
9e24b6e2
JH
13179 }
13180 else {
15041a67 13181#if UVSIZE > 4
a2a5de95
NC
13182 if (u > 0xffffffff)
13183 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13184 "%s number > %s non-portable",
13185 Base, max);
2cc4c2dc 13186#endif
b081dd7e 13187 sv = newSVuv(u);
9e24b6e2 13188 }
61f33854 13189 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13190 sv = new_constant(start, s - start, "integer",
eb0d8d16 13191 sv, NULL, NULL, 0);
61f33854 13192 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13193 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13194 }
13195 break;
02aa26ce
NT
13196
13197 /*
13198 handle decimal numbers.
13199 we're also sent here when we read a 0 as the first digit
13200 */
378cc40b
LW
13201 case '1': case '2': case '3': case '4': case '5':
13202 case '6': case '7': case '8': case '9': case '.':
13203 decimal:
3280af22
NIS
13204 d = PL_tokenbuf;
13205 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13206 floatit = FALSE;
02aa26ce
NT
13207
13208 /* read next group of digits and _ and copy into d */
de3bb511 13209 while (isDIGIT(*s) || *s == '_') {
4e553d73 13210 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13211 if -w is on
13212 */
93a17b20 13213 if (*s == '_') {
a2a5de95
NC
13214 if (lastub && s == lastub + 1)
13215 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13216 "Misplaced _ in number");
928753ea 13217 lastub = s++;
93a17b20 13218 }
fc36a67e 13219 else {
02aa26ce 13220 /* check for end of fixed-length buffer */
fc36a67e 13221 if (d >= e)
cea2e8a9 13222 Perl_croak(aTHX_ number_too_long);
02aa26ce 13223 /* if we're ok, copy the character */
378cc40b 13224 *d++ = *s++;
fc36a67e 13225 }
378cc40b 13226 }
02aa26ce
NT
13227
13228 /* final misplaced underbar check */
928753ea 13229 if (lastub && s == lastub + 1) {
a2a5de95 13230 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13231 }
02aa26ce
NT
13232
13233 /* read a decimal portion if there is one. avoid
13234 3..5 being interpreted as the number 3. followed
13235 by .5
13236 */
2f3197b3 13237 if (*s == '.' && s[1] != '.') {
79072805 13238 floatit = TRUE;
378cc40b 13239 *d++ = *s++;
02aa26ce 13240
928753ea 13241 if (*s == '_') {
a2a5de95
NC
13242 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13243 "Misplaced _ in number");
928753ea
JH
13244 lastub = s;
13245 }
13246
13247 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13248 */
fc36a67e 13249 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13250 /* fixed length buffer check */
fc36a67e 13251 if (d >= e)
cea2e8a9 13252 Perl_croak(aTHX_ number_too_long);
928753ea 13253 if (*s == '_') {
a2a5de95
NC
13254 if (lastub && s == lastub + 1)
13255 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13256 "Misplaced _ in number");
928753ea
JH
13257 lastub = s;
13258 }
13259 else
fc36a67e 13260 *d++ = *s;
378cc40b 13261 }
928753ea
JH
13262 /* fractional part ending in underbar? */
13263 if (s[-1] == '_') {
a2a5de95
NC
13264 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13265 "Misplaced _ in number");
928753ea 13266 }
dd629d5b
GS
13267 if (*s == '.' && isDIGIT(s[1])) {
13268 /* oops, it's really a v-string, but without the "v" */
f4758303 13269 s = start;
dd629d5b
GS
13270 goto vstring;
13271 }
378cc40b 13272 }
02aa26ce
NT
13273
13274 /* read exponent part, if present */
3792a11b 13275 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13276 floatit = TRUE;
13277 s++;
02aa26ce
NT
13278
13279 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13280 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13281
7fd134d9
JH
13282 /* stray preinitial _ */
13283 if (*s == '_') {
a2a5de95
NC
13284 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13285 "Misplaced _ in number");
7fd134d9
JH
13286 lastub = s++;
13287 }
13288
02aa26ce 13289 /* allow positive or negative exponent */
378cc40b
LW
13290 if (*s == '+' || *s == '-')
13291 *d++ = *s++;
02aa26ce 13292
7fd134d9
JH
13293 /* stray initial _ */
13294 if (*s == '_') {
a2a5de95
NC
13295 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13296 "Misplaced _ in number");
7fd134d9
JH
13297 lastub = s++;
13298 }
13299
7fd134d9
JH
13300 /* read digits of exponent */
13301 while (isDIGIT(*s) || *s == '_') {
13302 if (isDIGIT(*s)) {
13303 if (d >= e)
13304 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13305 *d++ = *s++;
7fd134d9
JH
13306 }
13307 else {
041457d9 13308 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13309 (!isDIGIT(s[1]) && s[1] != '_')))
13310 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13311 "Misplaced _ in number");
b3b48e3e 13312 lastub = s++;
7fd134d9 13313 }
7fd134d9 13314 }
378cc40b 13315 }
02aa26ce 13316
02aa26ce 13317
0b7fceb9 13318 /*
58bb9ec3
NC
13319 We try to do an integer conversion first if no characters
13320 indicating "float" have been found.
0b7fceb9
MU
13321 */
13322
13323 if (!floatit) {
58bb9ec3 13324 UV uv;
6136c704 13325 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13326
13327 if (flags == IS_NUMBER_IN_UV) {
13328 if (uv <= IV_MAX)
b081dd7e 13329 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13330 else
b081dd7e 13331 sv = newSVuv(uv);
58bb9ec3
NC
13332 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13333 if (uv <= (UV) IV_MIN)
b081dd7e 13334 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13335 else
13336 floatit = TRUE;
13337 } else
13338 floatit = TRUE;
13339 }
0b7fceb9 13340 if (floatit) {
58bb9ec3
NC
13341 /* terminate the string */
13342 *d = '\0';
86554af2 13343 nv = Atof(PL_tokenbuf);
b081dd7e 13344 sv = newSVnv(nv);
86554af2 13345 }
86554af2 13346
eb0d8d16
NC
13347 if ( floatit
13348 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13349 const char *const key = floatit ? "float" : "integer";
13350 const STRLEN keylen = floatit ? 5 : 7;
13351 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13352 key, keylen, sv, NULL, NULL, 0);
13353 }
378cc40b 13354 break;
0b7fceb9 13355
e312add1 13356 /* if it starts with a v, it could be a v-string */
a7cb1f99 13357 case 'v':
dd629d5b 13358vstring:
561b68a9 13359 sv = newSV(5); /* preallocate storage space */
65b06e02 13360 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13361 break;
79072805 13362 }
a687059c 13363
02aa26ce
NT
13364 /* make the op for the constant and return */
13365
a86a20aa 13366 if (sv)
b73d6f50 13367 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13368 else
5f66b61c 13369 lvalp->opval = NULL;
a687059c 13370
73d840c0 13371 return (char *)s;
378cc40b
LW
13372}
13373
76e3520e 13374STATIC char *
cea2e8a9 13375S_scan_formline(pTHX_ register char *s)
378cc40b 13376{
97aff369 13377 dVAR;
79072805 13378 register char *eol;
378cc40b 13379 register char *t;
6136c704 13380 SV * const stuff = newSVpvs("");
79072805 13381 bool needargs = FALSE;
c5ee2135 13382 bool eofmt = FALSE;
5db06880
NC
13383#ifdef PERL_MAD
13384 char *tokenstart = s;
4f61fd4b
JC
13385 SV* savewhite = NULL;
13386
5db06880 13387 if (PL_madskills) {
cd81e915
NC
13388 savewhite = PL_thiswhite;
13389 PL_thiswhite = 0;
5db06880
NC
13390 }
13391#endif
378cc40b 13392
7918f24d
NC
13393 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13394
79072805 13395 while (!needargs) {
a1b95068 13396 if (*s == '.') {
c35e046a 13397 t = s+1;
51882d45 13398#ifdef PERL_STRICT_CR
c35e046a
AL
13399 while (SPACE_OR_TAB(*t))
13400 t++;
51882d45 13401#else
c35e046a
AL
13402 while (SPACE_OR_TAB(*t) || *t == '\r')
13403 t++;
51882d45 13404#endif
c5ee2135
WL
13405 if (*t == '\n' || t == PL_bufend) {
13406 eofmt = TRUE;
79072805 13407 break;
c5ee2135 13408 }
79072805 13409 }
3280af22 13410 if (PL_in_eval && !PL_rsfp) {
07409e01 13411 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13412 if (!eol++)
3280af22 13413 eol = PL_bufend;
0f85fab0
LW
13414 }
13415 else
3280af22 13416 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13417 if (*s != '#') {
a0d0e21e
LW
13418 for (t = s; t < eol; t++) {
13419 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13420 needargs = FALSE;
13421 goto enough; /* ~~ must be first line in formline */
378cc40b 13422 }
a0d0e21e
LW
13423 if (*t == '@' || *t == '^')
13424 needargs = TRUE;
378cc40b 13425 }
7121b347
MG
13426 if (eol > s) {
13427 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13428#ifndef PERL_STRICT_CR
7121b347
MG
13429 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13430 char *end = SvPVX(stuff) + SvCUR(stuff);
13431 end[-2] = '\n';
13432 end[-1] = '\0';
b162af07 13433 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13434 }
2dc4c65b 13435#endif
7121b347
MG
13436 }
13437 else
13438 break;
79072805 13439 }
95a20fc0 13440 s = (char*)eol;
3280af22 13441 if (PL_rsfp) {
f0e67a1d 13442 bool got_some;
5db06880
NC
13443#ifdef PERL_MAD
13444 if (PL_madskills) {
cd81e915
NC
13445 if (PL_thistoken)
13446 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13447 else
cd81e915 13448 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13449 }
13450#endif
f0e67a1d
Z
13451 PL_bufptr = PL_bufend;
13452 CopLINE_inc(PL_curcop);
13453 got_some = lex_next_chunk(0);
13454 CopLINE_dec(PL_curcop);
13455 s = PL_bufptr;
5db06880 13456#ifdef PERL_MAD
f0e67a1d 13457 tokenstart = PL_bufptr;
5db06880 13458#endif
f0e67a1d 13459 if (!got_some)
378cc40b 13460 break;
378cc40b 13461 }
463ee0b2 13462 incline(s);
79072805 13463 }
a0d0e21e
LW
13464 enough:
13465 if (SvCUR(stuff)) {
3280af22 13466 PL_expect = XTERM;
79072805 13467 if (needargs) {
3280af22 13468 PL_lex_state = LEX_NORMAL;
cd81e915 13469 start_force(PL_curforce);
9ded7720 13470 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13471 force_next(',');
13472 }
a0d0e21e 13473 else
3280af22 13474 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13475 if (!IN_BYTES) {
95a20fc0 13476 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13477 SvUTF8_on(stuff);
13478 else if (PL_encoding)
13479 sv_recode_to_utf8(stuff, PL_encoding);
13480 }
cd81e915 13481 start_force(PL_curforce);
9ded7720 13482 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13483 force_next(THING);
cd81e915 13484 start_force(PL_curforce);
9ded7720 13485 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13486 force_next(LSTOP);
378cc40b 13487 }
79072805 13488 else {
8990e307 13489 SvREFCNT_dec(stuff);
c5ee2135
WL
13490 if (eofmt)
13491 PL_lex_formbrack = 0;
3280af22 13492 PL_bufptr = s;
79072805 13493 }
5db06880
NC
13494#ifdef PERL_MAD
13495 if (PL_madskills) {
cd81e915
NC
13496 if (PL_thistoken)
13497 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13498 else
cd81e915
NC
13499 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13500 PL_thiswhite = savewhite;
5db06880
NC
13501 }
13502#endif
79072805 13503 return s;
378cc40b 13504}
a687059c 13505
ba6d6ac9 13506I32
864dbfa3 13507Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13508{
97aff369 13509 dVAR;
a3b680e6 13510 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13511 CV* const outsidecv = PL_compcv;
8990e307 13512
3280af22
NIS
13513 if (PL_compcv) {
13514 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13515 }
7766f137 13516 SAVEI32(PL_subline);
3280af22 13517 save_item(PL_subname);
3280af22 13518 SAVESPTR(PL_compcv);
3280af22 13519
ea726b52 13520 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13521 CvFLAGS(PL_compcv) |= flags;
13522
57843af0 13523 PL_subline = CopLINE(PL_curcop);
dd2155a4 13524 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13525 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13526 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13527
8990e307
LW
13528 return oldsavestack_ix;
13529}
13530
084592ab
CN
13531#ifdef __SC__
13532#pragma segment Perl_yylex
13533#endif
af41e527
NC
13534static int
13535S_yywarn(pTHX_ const char *const s)
8990e307 13536{
97aff369 13537 dVAR;
7918f24d
NC
13538
13539 PERL_ARGS_ASSERT_YYWARN;
13540
faef0170 13541 PL_in_eval |= EVAL_WARNONLY;
748a9306 13542 yyerror(s);
faef0170 13543 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13544 return 0;
8990e307
LW
13545}
13546
13547int
15f169a1 13548Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13549{
97aff369 13550 dVAR;
bfed75c6
AL
13551 const char *where = NULL;
13552 const char *context = NULL;
68dc0745 13553 int contlen = -1;
46fc3d4c 13554 SV *msg;
5912531f 13555 int yychar = PL_parser->yychar;
463ee0b2 13556
7918f24d
NC
13557 PERL_ARGS_ASSERT_YYERROR;
13558
3280af22 13559 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13560 where = "at EOF";
8bcfe651
TM
13561 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13562 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13563 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13564 /*
13565 Only for NetWare:
13566 The code below is removed for NetWare because it abends/crashes on NetWare
13567 when the script has error such as not having the closing quotes like:
13568 if ($var eq "value)
13569 Checking of white spaces is anyway done in NetWare code.
13570 */
13571#ifndef NETWARE
3280af22
NIS
13572 while (isSPACE(*PL_oldoldbufptr))
13573 PL_oldoldbufptr++;
f355267c 13574#endif
3280af22
NIS
13575 context = PL_oldoldbufptr;
13576 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13577 }
8bcfe651
TM
13578 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13579 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13580 /*
13581 Only for NetWare:
13582 The code below is removed for NetWare because it abends/crashes on NetWare
13583 when the script has error such as not having the closing quotes like:
13584 if ($var eq "value)
13585 Checking of white spaces is anyway done in NetWare code.
13586 */
13587#ifndef NETWARE
3280af22
NIS
13588 while (isSPACE(*PL_oldbufptr))
13589 PL_oldbufptr++;
f355267c 13590#endif
3280af22
NIS
13591 context = PL_oldbufptr;
13592 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13593 }
13594 else if (yychar > 255)
68dc0745 13595 where = "next token ???";
12fbd33b 13596 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13597 if (PL_lex_state == LEX_NORMAL ||
13598 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13599 where = "at end of line";
3280af22 13600 else if (PL_lex_inpat)
68dc0745 13601 where = "within pattern";
463ee0b2 13602 else
68dc0745 13603 where = "within string";
463ee0b2 13604 }
46fc3d4c 13605 else {
84bafc02 13606 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13607 if (yychar < 32)
cea2e8a9 13608 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13609 else if (isPRINT_LC(yychar)) {
88c9ea1e 13610 const char string = yychar;
5e7aa789
NC
13611 sv_catpvn(where_sv, &string, 1);
13612 }
463ee0b2 13613 else
cea2e8a9 13614 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13615 where = SvPVX_const(where_sv);
463ee0b2 13616 }
46fc3d4c 13617 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13618 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13619 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13620 if (context)
cea2e8a9 13621 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13622 else
cea2e8a9 13623 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13624 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13625 Perl_sv_catpvf(aTHX_ msg,
57def98f 13626 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13627 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13628 PL_multi_end = 0;
a0d0e21e 13629 }
500960a6 13630 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13631 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13632 }
463ee0b2 13633 else
5a844595 13634 qerror(msg);
c7d6bfb2
GS
13635 if (PL_error_count >= 10) {
13636 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13637 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13638 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13639 else
13640 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13641 OutCopFILE(PL_curcop));
c7d6bfb2 13642 }
3280af22 13643 PL_in_my = 0;
5c284bb0 13644 PL_in_my_stash = NULL;
463ee0b2
LW
13645 return 0;
13646}
084592ab
CN
13647#ifdef __SC__
13648#pragma segment Main
13649#endif
4e35701f 13650
b250498f 13651STATIC char*
3ae08724 13652S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13653{
97aff369 13654 dVAR;
f54cb97a 13655 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13656
13657 PERL_ARGS_ASSERT_SWALLOW_BOM;
13658
7aa207d6 13659 switch (s[0]) {
4e553d73
NIS
13660 case 0xFF:
13661 if (s[1] == 0xFE) {
ee6ba15d 13662 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13663 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13664 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13665#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13666 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13667 s += 2;
dea0fc0b 13668 if (PL_bufend > (char*)s) {
81a923f4 13669 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13670 }
b250498f 13671#else
ee6ba15d 13672 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13673#endif
01ec43d0
GS
13674 }
13675 break;
78ae23f5 13676 case 0xFE:
7aa207d6 13677 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13678#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13679 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13680 s += 2;
13681 if (PL_bufend > (char *)s) {
81a923f4 13682 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13683 }
b250498f 13684#else
ee6ba15d 13685 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13686#endif
01ec43d0
GS
13687 }
13688 break;
3ae08724
GS
13689 case 0xEF:
13690 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13691 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13692 s += 3; /* UTF-8 */
13693 }
13694 break;
13695 case 0:
7aa207d6
JH
13696 if (slen > 3) {
13697 if (s[1] == 0) {
13698 if (s[2] == 0xFE && s[3] == 0xFF) {
13699 /* UTF-32 big-endian */
ee6ba15d 13700 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13701 }
13702 }
13703 else if (s[2] == 0 && s[3] != 0) {
13704 /* Leading bytes
13705 * 00 xx 00 xx
13706 * are a good indicator of UTF-16BE. */
ee6ba15d 13707#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13708 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13709 s = add_utf16_textfilter(s, FALSE);
13710#else
13711 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13712#endif
7aa207d6 13713 }
01ec43d0 13714 }
e294cc5d
JH
13715#ifdef EBCDIC
13716 case 0xDD:
13717 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13718 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13719 s += 4; /* UTF-8 */
13720 }
13721 break;
13722#endif
13723
7aa207d6
JH
13724 default:
13725 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13726 /* Leading bytes
13727 * xx 00 xx 00
13728 * are a good indicator of UTF-16LE. */
ee6ba15d 13729#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13730 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13731 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13732#else
13733 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13734#endif
7aa207d6 13735 }
01ec43d0 13736 }
b8f84bb2 13737 return (char*)s;
b250498f 13738}
4755096e 13739
6e3aabd6
GS
13740
13741#ifndef PERL_NO_UTF16_FILTER
13742static I32
a28af015 13743S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13744{
97aff369 13745 dVAR;
f3040f2c 13746 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13747 /* We re-use this each time round, throwing the contents away before we
13748 return. */
2a773401 13749 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13750 SV *const utf8_buffer = filter;
c28d6105 13751 IV status = IoPAGE(filter);
f2338a2e 13752 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13753 I32 retval;
c8b0cbae 13754
c85ae797
NC
13755 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13756
c8b0cbae
NC
13757 /* As we're automatically added, at the lowest level, and hence only called
13758 from this file, we can be sure that we're not called in block mode. Hence
13759 don't bother writing code to deal with block mode. */
13760 if (maxlen) {
13761 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13762 }
c28d6105
NC
13763 if (status < 0) {
13764 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13765 }
1de9afcd 13766 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13767 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13768 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13769 reverse ? 'l' : 'b', idx, maxlen, status,
13770 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13771
13772 while (1) {
13773 STRLEN chars;
13774 STRLEN have;
dea0fc0b 13775 I32 newlen;
2a773401 13776 U8 *end;
c28d6105
NC
13777 /* First, look in our buffer of existing UTF-8 data: */
13778 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13779
13780 if (nl) {
13781 ++nl;
13782 } else if (status == 0) {
13783 /* EOF */
13784 IoPAGE(filter) = 0;
13785 nl = SvEND(utf8_buffer);
13786 }
13787 if (nl) {
d2d1d4de
NC
13788 STRLEN got = nl - SvPVX(utf8_buffer);
13789 /* Did we have anything to append? */
13790 retval = got != 0;
13791 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13792 /* Everything else in this code works just fine if SVp_POK isn't
13793 set. This, however, needs it, and we need it to work, else
13794 we loop infinitely because the buffer is never consumed. */
13795 sv_chop(utf8_buffer, nl);
13796 break;
13797 }
ba77e4cc 13798
c28d6105
NC
13799 /* OK, not a complete line there, so need to read some more UTF-16.
13800 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13801 while (1) {
13802 if (status <= 0)
13803 break;
13804 if (SvCUR(utf16_buffer) >= 2) {
13805 /* Location of the high octet of the last complete code point.
13806 Gosh, UTF-16 is a pain. All the benefits of variable length,
13807 *coupled* with all the benefits of partial reads and
13808 endianness. */
13809 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13810 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13811
13812 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13813 break;
13814 }
13815
13816 /* We have the first half of a surrogate. Read more. */
13817 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13818 }
c28d6105 13819
c28d6105
NC
13820 status = FILTER_READ(idx + 1, utf16_buffer,
13821 160 + (SvCUR(utf16_buffer) & 1));
13822 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13823 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13824 if (status < 0) {
13825 /* Error */
13826 IoPAGE(filter) = status;
13827 return status;
13828 }
13829 }
13830
13831 chars = SvCUR(utf16_buffer) >> 1;
13832 have = SvCUR(utf8_buffer);
13833 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13834
aa6dbd60 13835 if (reverse) {
c28d6105
NC
13836 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13837 (U8*)SvPVX_const(utf8_buffer) + have,
13838 chars * 2, &newlen);
aa6dbd60 13839 } else {
2a773401 13840 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13841 (U8*)SvPVX_const(utf8_buffer) + have,
13842 chars * 2, &newlen);
2a773401 13843 }
c28d6105 13844 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13845 *end = '\0';
c28d6105 13846
e07286ed
NC
13847 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13848 it's private to us, and utf16_to_utf8{,reversed} take a
13849 (pointer,length) pair, rather than a NUL-terminated string. */
13850 if(SvCUR(utf16_buffer) & 1) {
13851 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13852 SvCUR_set(utf16_buffer, 1);
13853 } else {
13854 SvCUR_set(utf16_buffer, 0);
13855 }
2a773401 13856 }
c28d6105
NC
13857 DEBUG_P(PerlIO_printf(Perl_debug_log,
13858 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13859 status,
13860 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13861 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13862 return retval;
6e3aabd6 13863}
81a923f4
NC
13864
13865static U8 *
13866S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13867{
2a773401 13868 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13869
c85ae797
NC
13870 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13871
c28d6105 13872 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13873 sv_setpvs(filter, "");
2a773401 13874 IoLINES(filter) = reversed;
c28d6105
NC
13875 IoPAGE(filter) = 1; /* Not EOF */
13876
13877 /* Sadly, we have to return a valid pointer, come what may, so we have to
13878 ignore any error return from this. */
13879 SvCUR_set(PL_linestr, 0);
13880 if (FILTER_READ(0, PL_linestr, 0)) {
13881 SvUTF8_on(PL_linestr);
81a923f4 13882 } else {
c28d6105 13883 SvUTF8_on(PL_linestr);
81a923f4 13884 }
c28d6105 13885 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13886 return (U8*)SvPVX(PL_linestr);
13887}
6e3aabd6 13888#endif
9f4817db 13889
f333445c
JP
13890/*
13891Returns a pointer to the next character after the parsed
13892vstring, as well as updating the passed in sv.
13893
13894Function must be called like
13895
561b68a9 13896 sv = newSV(5);
65b06e02 13897 s = scan_vstring(s,e,sv);
f333445c 13898
65b06e02 13899where s and e are the start and end of the string.
f333445c
JP
13900The sv should already be large enough to store the vstring
13901passed in, for performance reasons.
13902
13903*/
13904
13905char *
15f169a1 13906Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13907{
97aff369 13908 dVAR;
bfed75c6
AL
13909 const char *pos = s;
13910 const char *start = s;
7918f24d
NC
13911
13912 PERL_ARGS_ASSERT_SCAN_VSTRING;
13913
f333445c 13914 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13915 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13916 pos++;
f333445c
JP
13917 if ( *pos != '.') {
13918 /* this may not be a v-string if followed by => */
bfed75c6 13919 const char *next = pos;
65b06e02 13920 while (next < e && isSPACE(*next))
8fc7bb1c 13921 ++next;
65b06e02 13922 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13923 /* return string not v-string */
13924 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13925 return (char *)pos;
f333445c
JP
13926 }
13927 }
13928
13929 if (!isALPHA(*pos)) {
89ebb4a3 13930 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13931
d4c19fe8
AL
13932 if (*s == 'v')
13933 s++; /* get past 'v' */
f333445c 13934
76f68e9b 13935 sv_setpvs(sv, "");
f333445c
JP
13936
13937 for (;;) {
d4c19fe8 13938 /* this is atoi() that tolerates underscores */
0bd48802
AL
13939 U8 *tmpend;
13940 UV rev = 0;
d4c19fe8
AL
13941 const char *end = pos;
13942 UV mult = 1;
13943 while (--end >= s) {
13944 if (*end != '_') {
13945 const UV orev = rev;
f333445c
JP
13946 rev += (*end - '0') * mult;
13947 mult *= 10;
9b387841
NC
13948 if (orev > rev)
13949 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13950 "Integer overflow in decimal number");
f333445c
JP
13951 }
13952 }
13953#ifdef EBCDIC
13954 if (rev > 0x7FFFFFFF)
13955 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13956#endif
13957 /* Append native character for the rev point */
13958 tmpend = uvchr_to_utf8(tmpbuf, rev);
13959 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13960 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13961 SvUTF8_on(sv);
65b06e02 13962 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13963 s = ++pos;
13964 else {
13965 s = pos;
13966 break;
13967 }
65b06e02 13968 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13969 pos++;
13970 }
13971 SvPOK_on(sv);
13972 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13973 SvRMAGICAL_on(sv);
13974 }
73d840c0 13975 return (char *)s;
f333445c
JP
13976}
13977
88e1f1a2
JV
13978int
13979Perl_keyword_plugin_standard(pTHX_
13980 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13981{
13982 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13983 PERL_UNUSED_CONTEXT;
13984 PERL_UNUSED_ARG(keyword_ptr);
13985 PERL_UNUSED_ARG(keyword_len);
13986 PERL_UNUSED_ARG(op_ptr);
13987 return KEYWORD_PLUGIN_DECLINE;
13988}
13989
a7aaec61 13990#define parse_recdescent(g) S_parse_recdescent(aTHX_ g)
e53d8f76
Z
13991static void
13992S_parse_recdescent(pTHX_ int gramtype)
a7aaec61
Z
13993{
13994 SAVEI32(PL_lex_brackets);
13995 if (PL_lex_brackets > 100)
13996 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
13997 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
13998 if(yyparse(gramtype) && !PL_parser->error_count)
13999 qerror(Perl_mess(aTHX_ "Parse error"));
14000}
14001
e53d8f76
Z
14002#define parse_recdescent_for_op(g) S_parse_recdescent_for_op(aTHX_ g)
14003static OP *
14004S_parse_recdescent_for_op(pTHX_ int gramtype)
14005{
14006 OP *o;
14007 ENTER;
14008 SAVEVPTR(PL_eval_root);
14009 PL_eval_root = NULL;
14010 parse_recdescent(gramtype);
14011 o = PL_eval_root;
14012 LEAVE;
14013 return o;
14014}
14015
14016/*
14017=for apidoc Amx|OP *|parse_block|U32 flags
14018
14019Parse a single complete Perl code block. This consists of an opening
14020brace, a sequence of statements, and a closing brace. The block
14021constitutes a lexical scope, so C<my> variables and various compile-time
14022effects can be contained within it. It is up to the caller to ensure
14023that the dynamic parser state (L</PL_parser> et al) is correctly set to
14024reflect the source of the code to be parsed and the lexical context for
14025the statement.
14026
14027The op tree representing the code block is returned. This is always a
14028real op, never a null pointer. It will normally be a C<lineseq> list,
14029including C<nextstate> or equivalent ops. No ops to construct any kind
14030of runtime scope are included by virtue of it being a block.
14031
14032If an error occurs in parsing or compilation, in most cases a valid op
14033tree (most likely null) is returned anyway. The error is reflected in
14034the parser state, normally resulting in a single exception at the top
14035level of parsing which covers all the compilation errors that occurred.
14036Some compilation errors, however, will throw an exception immediately.
14037
14038The I<flags> parameter is reserved for future use, and must always
14039be zero.
14040
14041=cut
14042*/
14043
14044OP *
14045Perl_parse_block(pTHX_ U32 flags)
14046{
14047 if (flags)
14048 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
14049 return parse_recdescent_for_op(GRAMBLOCK);
14050}
14051
1da4ca5f 14052/*
8359b381
Z
14053=for apidoc Amx|OP *|parse_barestmt|U32 flags
14054
14055Parse a single unadorned Perl statement. This may be a normal imperative
14056statement or a declaration that has compile-time effect. It does not
14057include any label or other affixture. It is up to the caller to ensure
14058that the dynamic parser state (L</PL_parser> et al) is correctly set to
14059reflect the source of the code to be parsed and the lexical context for
14060the statement.
14061
14062The op tree representing the statement is returned. This may be a
14063null pointer if the statement is null, for example if it was actually
14064a subroutine definition (which has compile-time side effects). If not
14065null, it will be ops directly implementing the statement, suitable to
14066pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
14067equivalent op (except for those embedded in a scope contained entirely
14068within the statement).
14069
14070If an error occurs in parsing or compilation, in most cases a valid op
14071tree (most likely null) is returned anyway. The error is reflected in
14072the parser state, normally resulting in a single exception at the top
14073level of parsing which covers all the compilation errors that occurred.
14074Some compilation errors, however, will throw an exception immediately.
14075
14076The I<flags> parameter is reserved for future use, and must always
14077be zero.
14078
14079=cut
14080*/
14081
14082OP *
14083Perl_parse_barestmt(pTHX_ U32 flags)
14084{
14085 if (flags)
14086 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
14087 return parse_recdescent_for_op(GRAMBARESTMT);
14088}
14089
14090/*
361d9b55
Z
14091=for apidoc Amx|SV *|parse_label|U32 flags
14092
14093Parse a single label, possibly optional, of the type that may prefix a
14094Perl statement. It is up to the caller to ensure that the dynamic parser
14095state (L</PL_parser> et al) is correctly set to reflect the source of
14096the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
14097label is optional, otherwise it is mandatory.
14098
14099The name of the label is returned in the form of a fresh scalar. If an
14100optional label is absent, a null pointer is returned.
14101
14102If an error occurs in parsing, which can only occur if the label is
14103mandatory, a valid label is returned anyway. The error is reflected in
14104the parser state, normally resulting in a single exception at the top
14105level of parsing which covers all the compilation errors that occurred.
14106
14107=cut
14108*/
14109
14110SV *
14111Perl_parse_label(pTHX_ U32 flags)
14112{
14113 if (flags & ~PARSE_OPTIONAL)
14114 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
14115 if (PL_lex_state == LEX_KNOWNEXT) {
14116 PL_parser->yychar = yylex();
14117 if (PL_parser->yychar == LABEL) {
14118 char *lpv = pl_yylval.pval;
14119 STRLEN llen = strlen(lpv);
14120 SV *lsv;
14121 PL_parser->yychar = YYEMPTY;
14122 lsv = newSV_type(SVt_PV);
14123 SvPV_set(lsv, lpv);
14124 SvCUR_set(lsv, llen);
14125 SvLEN_set(lsv, llen+1);
14126 SvPOK_on(lsv);
14127 return lsv;
14128 } else {
14129 yyunlex();
14130 goto no_label;
14131 }
14132 } else {
14133 char *s, *t;
14134 U8 c;
14135 STRLEN wlen, bufptr_pos;
14136 lex_read_space(0);
14137 t = s = PL_bufptr;
14138 c = (U8)*s;
14139 if (!isIDFIRST_A(c))
14140 goto no_label;
14141 do {
14142 c = (U8)*++t;
14143 } while(isWORDCHAR_A(c));
14144 wlen = t - s;
14145 if (word_takes_any_delimeter(s, wlen))
14146 goto no_label;
14147 bufptr_pos = s - SvPVX(PL_linestr);
14148 PL_bufptr = t;
14149 lex_read_space(LEX_KEEP_PREVIOUS);
14150 t = PL_bufptr;
14151 s = SvPVX(PL_linestr) + bufptr_pos;
14152 if (t[0] == ':' && t[1] != ':') {
14153 PL_oldoldbufptr = PL_oldbufptr;
14154 PL_oldbufptr = s;
14155 PL_bufptr = t+1;
14156 return newSVpvn(s, wlen);
14157 } else {
14158 PL_bufptr = s;
14159 no_label:
14160 if (flags & PARSE_OPTIONAL) {
14161 return NULL;
14162 } else {
14163 qerror(Perl_mess(aTHX_ "Parse error"));
14164 return newSVpvs("x");
14165 }
14166 }
14167 }
14168}
14169
14170/*
28ac2b49
Z
14171=for apidoc Amx|OP *|parse_fullstmt|U32 flags
14172
14173Parse a single complete Perl statement. This may be a normal imperative
8359b381
Z
14174statement or a declaration that has compile-time effect, and may include
14175an optional label. It is up to the caller to ensure that the dynamic
28ac2b49
Z
14176parser state (L</PL_parser> et al) is correctly set to reflect the source
14177of the code to be parsed and the lexical context for the statement.
14178
14179The op tree representing the statement is returned. This may be a
14180null pointer if the statement is null, for example if it was actually
14181a subroutine definition (which has compile-time side effects). If not
14182null, it will be the result of a L</newSTATEOP> call, normally including
14183a C<nextstate> or equivalent op.
14184
14185If an error occurs in parsing or compilation, in most cases a valid op
14186tree (most likely null) is returned anyway. The error is reflected in
14187the parser state, normally resulting in a single exception at the top
14188level of parsing which covers all the compilation errors that occurred.
14189Some compilation errors, however, will throw an exception immediately.
14190
14191The I<flags> parameter is reserved for future use, and must always
14192be zero.
14193
14194=cut
14195*/
14196
14197OP *
14198Perl_parse_fullstmt(pTHX_ U32 flags)
14199{
28ac2b49
Z
14200 if (flags)
14201 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
e53d8f76 14202 return parse_recdescent_for_op(GRAMFULLSTMT);
28ac2b49
Z
14203}
14204
07ffcb73
Z
14205/*
14206=for apidoc Amx|OP *|parse_stmtseq|U32 flags
14207
14208Parse a sequence of zero or more Perl statements. These may be normal
14209imperative statements, including optional labels, or declarations
14210that have compile-time effect, or any mixture thereof. The statement
14211sequence ends when a closing brace or end-of-file is encountered in a
14212place where a new statement could have validly started. It is up to
14213the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14214is correctly set to reflect the source of the code to be parsed and the
14215lexical context for the statements.
14216
14217The op tree representing the statement sequence is returned. This may
14218be a null pointer if the statements were all null, for example if there
14219were no statements or if there were only subroutine definitions (which
14220have compile-time side effects). If not null, it will be a C<lineseq>
14221list, normally including C<nextstate> or equivalent ops.
14222
14223If an error occurs in parsing or compilation, in most cases a valid op
14224tree is returned anyway. The error is reflected in the parser state,
14225normally resulting in a single exception at the top level of parsing
14226which covers all the compilation errors that occurred. Some compilation
14227errors, however, will throw an exception immediately.
14228
14229The I<flags> parameter is reserved for future use, and must always
14230be zero.
14231
14232=cut
14233*/
14234
14235OP *
14236Perl_parse_stmtseq(pTHX_ U32 flags)
14237{
14238 OP *stmtseqop;
e53d8f76 14239 I32 c;
07ffcb73
Z
14240 if (flags)
14241 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
e53d8f76
Z
14242 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ);
14243 c = lex_peek_unichar(0);
14244 if (c != -1 && c != /*{*/'}')
07ffcb73 14245 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
14246 return stmtseqop;
14247}
14248
ea25a9b2 14249void
f7e3d326 14250Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 14251{
f7e3d326 14252 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2
Z
14253 deprecate("qw(...) as parentheses");
14254 force_next(')');
14255 if (qwlist->op_type == OP_STUB) {
14256 op_free(qwlist);
14257 }
14258 else {
3d8e05a0 14259 start_force(PL_curforce);
ea25a9b2
Z
14260 NEXTVAL_NEXTTOKE.opval = qwlist;
14261 force_next(THING);
14262 }
14263 force_next('(');
14264}
14265
28ac2b49 14266/*
1da4ca5f
NC
14267 * Local variables:
14268 * c-indentation-style: bsd
14269 * c-basic-offset: 4
14270 * indent-tabs-mode: t
14271 * End:
14272 *
37442d52
RGS
14273 * ex: set ts=8 sts=4 sw=4 noet:
14274 */