This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
regcomp.c: teach tries about EXACTFU
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
04e98a4d 42#include "dquote_static.c"
378cc40b 43
eb0d8d16
NC
44#define new_constant(a,b,c,d,e,f,g) \
45 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
46
6154021b 47#define pl_yylval (PL_parser->yylval)
d3b6f988 48
199e78b7
DM
49/* XXX temporary backwards compatibility */
50#define PL_lex_brackets (PL_parser->lex_brackets)
51#define PL_lex_brackstack (PL_parser->lex_brackstack)
52#define PL_lex_casemods (PL_parser->lex_casemods)
53#define PL_lex_casestack (PL_parser->lex_casestack)
54#define PL_lex_defer (PL_parser->lex_defer)
55#define PL_lex_dojoin (PL_parser->lex_dojoin)
56#define PL_lex_expect (PL_parser->lex_expect)
57#define PL_lex_formbrack (PL_parser->lex_formbrack)
58#define PL_lex_inpat (PL_parser->lex_inpat)
59#define PL_lex_inwhat (PL_parser->lex_inwhat)
60#define PL_lex_op (PL_parser->lex_op)
61#define PL_lex_repl (PL_parser->lex_repl)
62#define PL_lex_starts (PL_parser->lex_starts)
63#define PL_lex_stuff (PL_parser->lex_stuff)
64#define PL_multi_start (PL_parser->multi_start)
65#define PL_multi_open (PL_parser->multi_open)
66#define PL_multi_close (PL_parser->multi_close)
67#define PL_pending_ident (PL_parser->pending_ident)
68#define PL_preambled (PL_parser->preambled)
69#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 70#define PL_linestr (PL_parser->linestr)
c2598295
DM
71#define PL_expect (PL_parser->expect)
72#define PL_copline (PL_parser->copline)
f06b5848
DM
73#define PL_bufptr (PL_parser->bufptr)
74#define PL_oldbufptr (PL_parser->oldbufptr)
75#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
76#define PL_linestart (PL_parser->linestart)
77#define PL_bufend (PL_parser->bufend)
78#define PL_last_uni (PL_parser->last_uni)
79#define PL_last_lop (PL_parser->last_lop)
80#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 81#define PL_lex_state (PL_parser->lex_state)
2f9285f8 82#define PL_rsfp (PL_parser->rsfp)
5486870f 83#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
84#define PL_in_my (PL_parser->in_my)
85#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 86#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 87#define PL_multi_end (PL_parser->multi_end)
13765c85 88#define PL_error_count (PL_parser->error_count)
199e78b7
DM
89
90#ifdef PERL_MAD
91# define PL_endwhite (PL_parser->endwhite)
92# define PL_faketokens (PL_parser->faketokens)
93# define PL_lasttoke (PL_parser->lasttoke)
94# define PL_nextwhite (PL_parser->nextwhite)
95# define PL_realtokenstart (PL_parser->realtokenstart)
96# define PL_skipwhite (PL_parser->skipwhite)
97# define PL_thisclose (PL_parser->thisclose)
98# define PL_thismad (PL_parser->thismad)
99# define PL_thisopen (PL_parser->thisopen)
100# define PL_thisstuff (PL_parser->thisstuff)
101# define PL_thistoken (PL_parser->thistoken)
102# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
103# define PL_thiswhite (PL_parser->thiswhite)
104# define PL_nexttoke (PL_parser->nexttoke)
105# define PL_curforce (PL_parser->curforce)
106#else
107# define PL_nexttoke (PL_parser->nexttoke)
108# define PL_nexttype (PL_parser->nexttype)
109# define PL_nextval (PL_parser->nextval)
199e78b7
DM
110#endif
111
16173588
NC
112/* This can't be done with embed.fnc, because struct yy_parser contains a
113 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
114static int
115S_pending_ident(pTHX);
199e78b7 116
0bd48802 117static const char ident_too_long[] = "Identifier too long";
8903cb82 118
29595ff2 119#ifdef PERL_MAD
29595ff2 120# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 121# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 122#else
5db06880 123# define CURMAD(slot,sv)
9ded7720 124# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
125#endif
126
a7aaec61
Z
127#define XENUMMASK 0x3f
128#define XFAKEEOF 0x40
129#define XFAKEBRACK 0x80
9059aa12 130
39e02b42
JH
131#ifdef USE_UTF8_SCRIPTS
132# define UTF (!IN_BYTES)
2b9d42f0 133#else
746b446a 134# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 135#endif
a0ed51b3 136
b1fc3636
CJ
137/* The maximum number of characters preceding the unrecognized one to display */
138#define UNRECOGNIZED_PRECEDE_COUNT 10
139
61f0cdd9 140/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
141 * 1999-02-27 mjd-perl-patch@plover.com */
142#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
143
bf4acbe4 144#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 145
ffb4593c
NT
146/* LEX_* are values for PL_lex_state, the state of the lexer.
147 * They are arranged oddly so that the guard on the switch statement
79072805
LW
148 * can get by with a single comparison (if the compiler is smart enough).
149 */
150
fb73857a 151/* #define LEX_NOTPARSING 11 is done in perl.h. */
152
b6007c36
DM
153#define LEX_NORMAL 10 /* normal code (ie not within "...") */
154#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
155#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
156#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
157#define LEX_INTERPSTART 6 /* expecting the start of a $var */
158
159 /* at end of code, eg "$x" followed by: */
160#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
161#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
162
163#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
164 string or after \E, $foo, etc */
165#define LEX_INTERPCONST 2 /* NOT USED */
166#define LEX_FORMLINE 1 /* expecting a format line */
167#define LEX_KNOWNEXT 0 /* next token known; just return it */
168
79072805 169
bbf60fe6 170#ifdef DEBUGGING
27da23d5 171static const char* const lex_state_names[] = {
bbf60fe6
DM
172 "KNOWNEXT",
173 "FORMLINE",
174 "INTERPCONST",
175 "INTERPCONCAT",
176 "INTERPENDMAYBE",
177 "INTERPEND",
178 "INTERPSTART",
179 "INTERPPUSH",
180 "INTERPCASEMOD",
181 "INTERPNORMAL",
182 "NORMAL"
183};
184#endif
185
79072805
LW
186#ifdef ff_next
187#undef ff_next
d48672a2
LW
188#endif
189
79072805 190#include "keywords.h"
fe14fcc3 191
ffb4593c
NT
192/* CLINE is a macro that ensures PL_copline has a sane value */
193
ae986130
LW
194#ifdef CLINE
195#undef CLINE
196#endif
57843af0 197#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 198
5db06880 199#ifdef PERL_MAD
29595ff2
NC
200# define SKIPSPACE0(s) skipspace0(s)
201# define SKIPSPACE1(s) skipspace1(s)
202# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
203# define PEEKSPACE(s) skipspace2(s,0)
204#else
205# define SKIPSPACE0(s) skipspace(s)
206# define SKIPSPACE1(s) skipspace(s)
207# define SKIPSPACE2(s,tsv) skipspace(s)
208# define PEEKSPACE(s) skipspace(s)
209#endif
210
ffb4593c
NT
211/*
212 * Convenience functions to return different tokens and prime the
9cbb5ea2 213 * lexer for the next token. They all take an argument.
ffb4593c
NT
214 *
215 * TOKEN : generic token (used for '(', DOLSHARP, etc)
216 * OPERATOR : generic operator
217 * AOPERATOR : assignment operator
218 * PREBLOCK : beginning the block after an if, while, foreach, ...
219 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
220 * PREREF : *EXPR where EXPR is not a simple identifier
221 * TERM : expression term
222 * LOOPX : loop exiting command (goto, last, dump, etc)
223 * FTST : file test operator
224 * FUN0 : zero-argument function
2d2e263d 225 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
226 * BOop : bitwise or or xor
227 * BAop : bitwise and
228 * SHop : shift operator
229 * PWop : power operator
9cbb5ea2 230 * PMop : pattern-matching operator
ffb4593c
NT
231 * Aop : addition-level operator
232 * Mop : multiplication-level operator
233 * Eop : equality-testing operator
e5edeb50 234 * Rop : relational operator <= != gt
ffb4593c
NT
235 *
236 * Also see LOP and lop() below.
237 */
238
998054bd 239#ifdef DEBUGGING /* Serve -DT. */
704d4215 240# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 241#else
bbf60fe6 242# define REPORT(retval) (retval)
998054bd
SC
243#endif
244
bbf60fe6
DM
245#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
246#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
247#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
248#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
249#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
250#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
251#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
252#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
253#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
254#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
255#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
256#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
257#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
258#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
259#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
260#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
261#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
262#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
263#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
264#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 265
a687059c
LW
266/* This bit of chicanery makes a unary function followed by
267 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
268 * The UNIDOR macro is for unary functions that can be followed by the //
269 * operator (such as C<shift // 0>).
a687059c 270 */
376fcdbf 271#define UNI2(f,x) { \
6154021b 272 pl_yylval.ival = f; \
376fcdbf
AL
273 PL_expect = x; \
274 PL_bufptr = s; \
275 PL_last_uni = PL_oldbufptr; \
276 PL_last_lop_op = f; \
277 if (*s == '(') \
278 return REPORT( (int)FUNC1 ); \
29595ff2 279 s = PEEKSPACE(s); \
376fcdbf
AL
280 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
281 }
6f33ba73
RGS
282#define UNI(f) UNI2(f,XTERM)
283#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 284
376fcdbf 285#define UNIBRACK(f) { \
6154021b 286 pl_yylval.ival = f; \
376fcdbf
AL
287 PL_bufptr = s; \
288 PL_last_uni = PL_oldbufptr; \
289 if (*s == '(') \
290 return REPORT( (int)FUNC1 ); \
29595ff2 291 s = PEEKSPACE(s); \
376fcdbf
AL
292 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
293 }
79072805 294
9f68db38 295/* grandfather return to old style */
6154021b 296#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 297
8fa7f367
JH
298#ifdef DEBUGGING
299
6154021b 300/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
301enum token_type {
302 TOKENTYPE_NONE,
303 TOKENTYPE_IVAL,
6154021b 304 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
305 TOKENTYPE_PVAL,
306 TOKENTYPE_OPVAL,
307 TOKENTYPE_GVVAL
308};
309
6d4a66ac
NC
310static struct debug_tokens {
311 const int token;
312 enum token_type type;
313 const char *name;
314} const debug_tokens[] =
9041c2e3 315{
bbf60fe6
DM
316 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
317 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
318 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
319 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
320 { ARROW, TOKENTYPE_NONE, "ARROW" },
321 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
322 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
323 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
324 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
325 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 326 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
327 { DO, TOKENTYPE_NONE, "DO" },
328 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
329 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
330 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
331 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
332 { ELSE, TOKENTYPE_NONE, "ELSE" },
333 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
334 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
335 { FOR, TOKENTYPE_IVAL, "FOR" },
336 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
337 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
338 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
339 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
340 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
341 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 342 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
343 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
344 { IF, TOKENTYPE_IVAL, "IF" },
345 { LABEL, TOKENTYPE_PVAL, "LABEL" },
346 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
347 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
348 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
349 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
350 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
351 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
352 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
353 { MY, TOKENTYPE_IVAL, "MY" },
354 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
355 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
356 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
357 { OROP, TOKENTYPE_IVAL, "OROP" },
358 { OROR, TOKENTYPE_NONE, "OROR" },
359 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
360 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
361 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
362 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
363 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
364 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
365 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
366 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
367 { PREINC, TOKENTYPE_NONE, "PREINC" },
368 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
369 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
370 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
371 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
372 { SUB, TOKENTYPE_NONE, "SUB" },
373 { THING, TOKENTYPE_OPVAL, "THING" },
374 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
375 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
376 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
377 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
378 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
379 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 380 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
381 { WHILE, TOKENTYPE_IVAL, "WHILE" },
382 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 383 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 384 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
385};
386
6154021b 387/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 388
bbf60fe6 389STATIC int
704d4215 390S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 391{
97aff369 392 dVAR;
7918f24d
NC
393
394 PERL_ARGS_ASSERT_TOKEREPORT;
395
bbf60fe6 396 if (DEBUG_T_TEST) {
bd61b366 397 const char *name = NULL;
bbf60fe6 398 enum token_type type = TOKENTYPE_NONE;
f54cb97a 399 const struct debug_tokens *p;
396482e1 400 SV* const report = newSVpvs("<== ");
bbf60fe6 401
f54cb97a 402 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
403 if (p->token == (int)rv) {
404 name = p->name;
405 type = p->type;
406 break;
407 }
408 }
409 if (name)
54667de8 410 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
411 else if ((char)rv > ' ' && (char)rv < '~')
412 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
413 else if (!rv)
396482e1 414 sv_catpvs(report, "EOF");
bbf60fe6
DM
415 else
416 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
417 switch (type) {
418 case TOKENTYPE_NONE:
419 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
420 break;
421 case TOKENTYPE_IVAL:
704d4215 422 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
423 break;
424 case TOKENTYPE_OPNUM:
425 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 426 PL_op_name[lvalp->ival]);
bbf60fe6
DM
427 break;
428 case TOKENTYPE_PVAL:
704d4215 429 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
430 break;
431 case TOKENTYPE_OPVAL:
704d4215 432 if (lvalp->opval) {
401441c0 433 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
434 PL_op_name[lvalp->opval->op_type]);
435 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 436 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 437 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
438 }
439
440 }
401441c0 441 else
396482e1 442 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
443 break;
444 }
b6007c36 445 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
446 };
447 return (int)rv;
998054bd
SC
448}
449
b6007c36
DM
450
451/* print the buffer with suitable escapes */
452
453STATIC void
15f169a1 454S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 455{
396482e1 456 SV* const tmp = newSVpvs("");
7918f24d
NC
457
458 PERL_ARGS_ASSERT_PRINTBUF;
459
b6007c36
DM
460 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
461 SvREFCNT_dec(tmp);
462}
463
8fa7f367
JH
464#endif
465
8290c323
NC
466static int
467S_deprecate_commaless_var_list(pTHX) {
468 PL_expect = XTERM;
469 deprecate("comma-less variable list");
470 return REPORT(','); /* grandfather non-comma-format format */
471}
472
ffb4593c
NT
473/*
474 * S_ao
475 *
c963b151
BD
476 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
477 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
478 */
479
76e3520e 480STATIC int
cea2e8a9 481S_ao(pTHX_ int toketype)
a0d0e21e 482{
97aff369 483 dVAR;
3280af22
NIS
484 if (*PL_bufptr == '=') {
485 PL_bufptr++;
a0d0e21e 486 if (toketype == ANDAND)
6154021b 487 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 488 else if (toketype == OROR)
6154021b 489 pl_yylval.ival = OP_ORASSIGN;
c963b151 490 else if (toketype == DORDOR)
6154021b 491 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
492 toketype = ASSIGNOP;
493 }
494 return toketype;
495}
496
ffb4593c
NT
497/*
498 * S_no_op
499 * When Perl expects an operator and finds something else, no_op
500 * prints the warning. It always prints "<something> found where
501 * operator expected. It prints "Missing semicolon on previous line?"
502 * if the surprise occurs at the start of the line. "do you need to
503 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
504 * where the compiler doesn't know if foo is a method call or a function.
505 * It prints "Missing operator before end of line" if there's nothing
506 * after the missing operator, or "... before <...>" if there is something
507 * after the missing operator.
508 */
509
76e3520e 510STATIC void
15f169a1 511S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 512{
97aff369 513 dVAR;
9d4ba2ae
AL
514 char * const oldbp = PL_bufptr;
515 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 516
7918f24d
NC
517 PERL_ARGS_ASSERT_NO_OP;
518
1189a94a
GS
519 if (!s)
520 s = oldbp;
07c798fb 521 else
1189a94a 522 PL_bufptr = s;
cea2e8a9 523 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
524 if (ckWARN_d(WARN_SYNTAX)) {
525 if (is_first)
526 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
527 "\t(Missing semicolon on previous line?)\n");
528 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 529 const char *t;
c35e046a
AL
530 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
531 NOOP;
56da5a46
RGS
532 if (t < PL_bufptr && isSPACE(*t))
533 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
534 "\t(Do you need to predeclare %.*s?)\n",
551405c4 535 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
536 }
537 else {
538 assert(s >= oldbp);
539 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 540 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 541 }
07c798fb 542 }
3280af22 543 PL_bufptr = oldbp;
8990e307
LW
544}
545
ffb4593c
NT
546/*
547 * S_missingterm
548 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 549 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
550 * If we're in a delimited string and the delimiter is a control
551 * character, it's reformatted into a two-char sequence like ^C.
552 * This is fatal.
553 */
554
76e3520e 555STATIC void
cea2e8a9 556S_missingterm(pTHX_ char *s)
8990e307 557{
97aff369 558 dVAR;
8990e307
LW
559 char tmpbuf[3];
560 char q;
561 if (s) {
9d4ba2ae 562 char * const nl = strrchr(s,'\n');
d2719217 563 if (nl)
8990e307
LW
564 *nl = '\0';
565 }
463559e7 566 else if (isCNTRL(PL_multi_close)) {
8990e307 567 *tmpbuf = '^';
585ec06d 568 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
569 tmpbuf[2] = '\0';
570 s = tmpbuf;
571 }
572 else {
eb160463 573 *tmpbuf = (char)PL_multi_close;
8990e307
LW
574 tmpbuf[1] = '\0';
575 s = tmpbuf;
576 }
577 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 578 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 579}
79072805 580
ef89dcc3 581#define FEATURE_IS_ENABLED(name) \
0d863452 582 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 583 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 584/* The longest string we pass in. */
1863b879 585#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 586
0d863452
RH
587/*
588 * S_feature_is_enabled
589 * Check whether the named feature is enabled.
590 */
591STATIC bool
15f169a1 592S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 593{
97aff369 594 dVAR;
0d863452 595 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 596 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
597
598 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
599
4a731d7b
NC
600 assert(namelen <= MAX_FEATURE_LEN);
601 memcpy(&he_name[8], name, namelen);
d4c19fe8 602
7b9ef140 603 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
604}
605
ffb4593c 606/*
9cbb5ea2
GS
607 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
608 * utf16-to-utf8-reversed.
ffb4593c
NT
609 */
610
c39cd008
GS
611#ifdef PERL_CR_FILTER
612static void
613strip_return(SV *sv)
614{
95a20fc0 615 register const char *s = SvPVX_const(sv);
9d4ba2ae 616 register const char * const e = s + SvCUR(sv);
7918f24d
NC
617
618 PERL_ARGS_ASSERT_STRIP_RETURN;
619
c39cd008
GS
620 /* outer loop optimized to do nothing if there are no CR-LFs */
621 while (s < e) {
622 if (*s++ == '\r' && *s == '\n') {
623 /* hit a CR-LF, need to copy the rest */
624 register char *d = s - 1;
625 *d++ = *s++;
626 while (s < e) {
627 if (*s == '\r' && s[1] == '\n')
628 s++;
629 *d++ = *s++;
630 }
631 SvCUR(sv) -= s - d;
632 return;
633 }
634 }
635}
a868473f 636
76e3520e 637STATIC I32
c39cd008 638S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 639{
f54cb97a 640 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
641 if (count > 0 && !maxlen)
642 strip_return(sv);
643 return count;
a868473f
NIS
644}
645#endif
646
ffb4593c 647/*
8eaa0acf
Z
648=for apidoc Amx|void|lex_start|SV *line|PerlIO *rsfp|U32 flags
649
650Creates and initialises a new lexer/parser state object, supplying
651a context in which to lex and parse from a new source of Perl code.
652A pointer to the new state object is placed in L</PL_parser>. An entry
653is made on the save stack so that upon unwinding the new state object
654will be destroyed and the former value of L</PL_parser> will be restored.
655Nothing else need be done to clean up the parsing context.
656
657The code to be parsed comes from I<line> and I<rsfp>. I<line>, if
658non-null, provides a string (in SV form) containing code to be parsed.
659A copy of the string is made, so subsequent modification of I<line>
660does not affect parsing. I<rsfp>, if non-null, provides an input stream
661from which code will be read to be parsed. If both are non-null, the
662code in I<line> comes first and must consist of complete lines of input,
663and I<rsfp> supplies the remainder of the source.
664
665The I<flags> parameter is reserved for future use, and must always
666be zero.
667
668=cut
669*/
ffb4593c 670
a0d0e21e 671void
8eaa0acf 672Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, U32 flags)
79072805 673{
97aff369 674 dVAR;
6ef55633 675 const char *s = NULL;
8990e307 676 STRLEN len;
5486870f 677 yy_parser *parser, *oparser;
8eaa0acf
Z
678 if (flags)
679 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_start");
acdf0a21
DM
680
681 /* create and initialise a parser */
682
199e78b7 683 Newxz(parser, 1, yy_parser);
5486870f 684 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
685 PL_parser = parser;
686
28ac2b49
Z
687 parser->stack = NULL;
688 parser->ps = NULL;
689 parser->stack_size = 0;
acdf0a21 690
e3abe207
DM
691 /* on scope exit, free this parser and restore any outer one */
692 SAVEPARSER(parser);
7c4baf47 693 parser->saved_curcop = PL_curcop;
e3abe207 694
acdf0a21 695 /* initialise lexer state */
8990e307 696
fb205e7a
DM
697#ifdef PERL_MAD
698 parser->curforce = -1;
699#else
700 parser->nexttoke = 0;
701#endif
ca4cfd28 702 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 703 parser->copline = NOLINE;
5afb0a62 704 parser->lex_state = LEX_NORMAL;
c2598295 705 parser->expect = XSTATE;
2f9285f8 706 parser->rsfp = rsfp;
f07ec6dd 707 parser->rsfp_filters = newAV();
2f9285f8 708
199e78b7
DM
709 Newx(parser->lex_brackstack, 120, char);
710 Newx(parser->lex_casestack, 12, char);
711 *parser->lex_casestack = '\0';
02b34bbe 712
10efb74f
NC
713 if (line) {
714 s = SvPV_const(line, len);
715 } else {
716 len = 0;
717 }
bdc0bf6f 718
10efb74f 719 if (!len) {
bdc0bf6f 720 parser->linestr = newSVpvs("\n;");
805700c1 721 } else {
719a9bb0 722 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 723 if (s[len-1] != ';')
bdc0bf6f 724 sv_catpvs(parser->linestr, "\n;");
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
737c24fc
Z
732
733 parser->in_pod = 0;
79072805 734}
a687059c 735
e3abe207
DM
736
737/* delete a parser object */
738
739void
740Perl_parser_free(pTHX_ const yy_parser *parser)
741{
7918f24d
NC
742 PERL_ARGS_ASSERT_PARSER_FREE;
743
7c4baf47 744 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
745 SvREFCNT_dec(parser->linestr);
746
2f9285f8
DM
747 if (parser->rsfp == PerlIO_stdin())
748 PerlIO_clearerr(parser->rsfp);
799361c3
SH
749 else if (parser->rsfp && (!parser->old_parser ||
750 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 751 PerlIO_close(parser->rsfp);
5486870f 752 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 753
e3abe207
DM
754 Safefree(parser->lex_brackstack);
755 Safefree(parser->lex_casestack);
756 PL_parser = parser->old_parser;
757 Safefree(parser);
758}
759
760
ffb4593c 761/*
f0e67a1d
Z
762=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
763
764Buffer scalar containing the chunk currently under consideration of the
765text currently being lexed. This is always a plain string scalar (for
766which C<SvPOK> is true). It is not intended to be used as a scalar by
767normal scalar means; instead refer to the buffer directly by the pointer
768variables described below.
769
770The lexer maintains various C<char*> pointers to things in the
771C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
772reallocated, all of these pointers must be updated. Don't attempt to
773do this manually, but rather use L</lex_grow_linestr> if you need to
774reallocate the buffer.
775
776The content of the text chunk in the buffer is commonly exactly one
777complete line of input, up to and including a newline terminator,
778but there are situations where it is otherwise. The octets of the
779buffer may be intended to be interpreted as either UTF-8 or Latin-1.
780The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
781flag on this scalar, which may disagree with it.
782
783For direct examination of the buffer, the variable
784L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
785lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
786of these pointers is usually preferable to examination of the scalar
787through normal scalar means.
788
789=for apidoc AmxU|char *|PL_parser-E<gt>bufend
790
791Direct pointer to the end of the chunk of text currently being lexed, the
792end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
793+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
794always located at the end of the buffer, and does not count as part of
795the buffer's contents.
796
797=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
798
799Points to the current position of lexing inside the lexer buffer.
800Characters around this point may be freely examined, within
801the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
802L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
803interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
804
805Lexing code (whether in the Perl core or not) moves this pointer past
806the characters that it consumes. It is also expected to perform some
807bookkeeping whenever a newline character is consumed. This movement
808can be more conveniently performed by the function L</lex_read_to>,
809which handles newlines appropriately.
810
811Interpretation of the buffer's octets can be abstracted out by
812using the slightly higher-level functions L</lex_peek_unichar> and
813L</lex_read_unichar>.
814
815=for apidoc AmxU|char *|PL_parser-E<gt>linestart
816
817Points to the start of the current line inside the lexer buffer.
818This is useful for indicating at which column an error occurred, and
819not much else. This must be updated by any lexing code that consumes
820a newline; the function L</lex_read_to> handles this detail.
821
822=cut
823*/
824
825/*
826=for apidoc Amx|bool|lex_bufutf8
827
828Indicates whether the octets in the lexer buffer
829(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
830of Unicode characters. If not, they should be interpreted as Latin-1
831characters. This is analogous to the C<SvUTF8> flag for scalars.
832
833In UTF-8 mode, it is not guaranteed that the lexer buffer actually
834contains valid UTF-8. Lexing code must be robust in the face of invalid
835encoding.
836
837The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
838is significant, but not the whole story regarding the input character
839encoding. Normally, when a file is being read, the scalar contains octets
840and its C<SvUTF8> flag is off, but the octets should be interpreted as
841UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
842however, the scalar may have the C<SvUTF8> flag on, and in this case its
843octets should be interpreted as UTF-8 unless the C<use bytes> pragma
844is in effect. This logic may change in the future; use this function
845instead of implementing the logic yourself.
846
847=cut
848*/
849
850bool
851Perl_lex_bufutf8(pTHX)
852{
853 return UTF;
854}
855
856/*
857=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
858
859Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
860at least I<len> octets (including terminating NUL). Returns a
861pointer to the reallocated buffer. This is necessary before making
862any direct modification of the buffer that would increase its length.
863L</lex_stuff_pvn> provides a more convenient way to insert text into
864the buffer.
865
866Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
867this function updates all of the lexer's variables that point directly
868into the buffer.
869
870=cut
871*/
872
873char *
874Perl_lex_grow_linestr(pTHX_ STRLEN len)
875{
876 SV *linestr;
877 char *buf;
878 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
879 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
880 linestr = PL_parser->linestr;
881 buf = SvPVX(linestr);
882 if (len <= SvLEN(linestr))
883 return buf;
884 bufend_pos = PL_parser->bufend - buf;
885 bufptr_pos = PL_parser->bufptr - buf;
886 oldbufptr_pos = PL_parser->oldbufptr - buf;
887 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
888 linestart_pos = PL_parser->linestart - buf;
889 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
890 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
891 buf = sv_grow(linestr, len);
892 PL_parser->bufend = buf + bufend_pos;
893 PL_parser->bufptr = buf + bufptr_pos;
894 PL_parser->oldbufptr = buf + oldbufptr_pos;
895 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
896 PL_parser->linestart = buf + linestart_pos;
897 if (PL_parser->last_uni)
898 PL_parser->last_uni = buf + last_uni_pos;
899 if (PL_parser->last_lop)
900 PL_parser->last_lop = buf + last_lop_pos;
901 return buf;
902}
903
904/*
83aa740e 905=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
906
907Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
908immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
909reallocating the buffer if necessary. This means that lexing code that
910runs later will see the characters as if they had appeared in the input.
911It is not recommended to do this as part of normal parsing, and most
912uses of this facility run the risk of the inserted characters being
913interpreted in an unintended manner.
914
915The string to be inserted is represented by I<len> octets starting
916at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
917according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
918The characters are recoded for the lexer buffer, according to how the
919buffer is currently being interpreted (L</lex_bufutf8>). If a string
9dcc53ea 920to be inserted is available as a Perl scalar, the L</lex_stuff_sv>
f0e67a1d
Z
921function is more convenient.
922
923=cut
924*/
925
926void
83aa740e 927Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 928{
749123ff 929 dVAR;
f0e67a1d
Z
930 char *bufptr;
931 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
932 if (flags & ~(LEX_STUFF_UTF8))
933 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
934 if (UTF) {
935 if (flags & LEX_STUFF_UTF8) {
936 goto plain_copy;
937 } else {
938 STRLEN highhalf = 0;
83aa740e 939 const char *p, *e = pv+len;
f0e67a1d
Z
940 for (p = pv; p != e; p++)
941 highhalf += !!(((U8)*p) & 0x80);
942 if (!highhalf)
943 goto plain_copy;
944 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
945 bufptr = PL_parser->bufptr;
946 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
947 SvCUR_set(PL_parser->linestr,
948 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
949 PL_parser->bufend += len+highhalf;
950 for (p = pv; p != e; p++) {
951 U8 c = (U8)*p;
952 if (c & 0x80) {
953 *bufptr++ = (char)(0xc0 | (c >> 6));
954 *bufptr++ = (char)(0x80 | (c & 0x3f));
955 } else {
956 *bufptr++ = (char)c;
957 }
958 }
959 }
960 } else {
961 if (flags & LEX_STUFF_UTF8) {
962 STRLEN highhalf = 0;
83aa740e 963 const char *p, *e = pv+len;
f0e67a1d
Z
964 for (p = pv; p != e; p++) {
965 U8 c = (U8)*p;
966 if (c >= 0xc4) {
967 Perl_croak(aTHX_ "Lexing code attempted to stuff "
968 "non-Latin-1 character into Latin-1 input");
969 } else if (c >= 0xc2 && p+1 != e &&
970 (((U8)p[1]) & 0xc0) == 0x80) {
971 p++;
972 highhalf++;
973 } else if (c >= 0x80) {
974 /* malformed UTF-8 */
975 ENTER;
976 SAVESPTR(PL_warnhook);
977 PL_warnhook = PERL_WARNHOOK_FATAL;
978 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
979 LEAVE;
980 }
981 }
982 if (!highhalf)
983 goto plain_copy;
984 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
985 bufptr = PL_parser->bufptr;
986 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
987 SvCUR_set(PL_parser->linestr,
988 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
989 PL_parser->bufend += len-highhalf;
990 for (p = pv; p != e; p++) {
991 U8 c = (U8)*p;
992 if (c & 0x80) {
993 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
994 p++;
995 } else {
996 *bufptr++ = (char)c;
997 }
998 }
999 } else {
1000 plain_copy:
1001 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1002 bufptr = PL_parser->bufptr;
1003 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1004 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1005 PL_parser->bufend += len;
1006 Copy(pv, bufptr, len, char);
1007 }
1008 }
1009}
1010
1011/*
9dcc53ea
Z
1012=for apidoc Amx|void|lex_stuff_pv|const char *pv|U32 flags
1013
1014Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1015immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1016reallocating the buffer if necessary. This means that lexing code that
1017runs later will see the characters as if they had appeared in the input.
1018It is not recommended to do this as part of normal parsing, and most
1019uses of this facility run the risk of the inserted characters being
1020interpreted in an unintended manner.
1021
1022The string to be inserted is represented by octets starting at I<pv>
1023and continuing to the first nul. These octets are interpreted as either
1024UTF-8 or Latin-1, according to whether the C<LEX_STUFF_UTF8> flag is set
1025in I<flags>. The characters are recoded for the lexer buffer, according
1026to how the buffer is currently being interpreted (L</lex_bufutf8>).
1027If it is not convenient to nul-terminate a string to be inserted, the
1028L</lex_stuff_pvn> function is more appropriate.
1029
1030=cut
1031*/
1032
1033void
1034Perl_lex_stuff_pv(pTHX_ const char *pv, U32 flags)
1035{
1036 PERL_ARGS_ASSERT_LEX_STUFF_PV;
1037 lex_stuff_pvn(pv, strlen(pv), flags);
1038}
1039
1040/*
f0e67a1d
Z
1041=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1042
1043Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1044immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1045reallocating the buffer if necessary. This means that lexing code that
1046runs later will see the characters as if they had appeared in the input.
1047It is not recommended to do this as part of normal parsing, and most
1048uses of this facility run the risk of the inserted characters being
1049interpreted in an unintended manner.
1050
1051The string to be inserted is the string value of I<sv>. The characters
1052are recoded for the lexer buffer, according to how the buffer is currently
9dcc53ea 1053being interpreted (L</lex_bufutf8>). If a string to be inserted is
f0e67a1d
Z
1054not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1055need to construct a scalar.
1056
1057=cut
1058*/
1059
1060void
1061Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1062{
1063 char *pv;
1064 STRLEN len;
1065 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1066 if (flags)
1067 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1068 pv = SvPV(sv, len);
1069 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1070}
1071
1072/*
1073=for apidoc Amx|void|lex_unstuff|char *ptr
1074
1075Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1076I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1077This hides the discarded text from any lexing code that runs later,
1078as if the text had never appeared.
1079
1080This is not the normal way to consume lexed text. For that, use
1081L</lex_read_to>.
1082
1083=cut
1084*/
1085
1086void
1087Perl_lex_unstuff(pTHX_ char *ptr)
1088{
1089 char *buf, *bufend;
1090 STRLEN unstuff_len;
1091 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1092 buf = PL_parser->bufptr;
1093 if (ptr < buf)
1094 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1095 if (ptr == buf)
1096 return;
1097 bufend = PL_parser->bufend;
1098 if (ptr > bufend)
1099 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1100 unstuff_len = ptr - buf;
1101 Move(ptr, buf, bufend+1-ptr, char);
1102 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1103 PL_parser->bufend = bufend - unstuff_len;
1104}
1105
1106/*
1107=for apidoc Amx|void|lex_read_to|char *ptr
1108
1109Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1110to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1111performing the correct bookkeeping whenever a newline character is passed.
1112This is the normal way to consume lexed text.
1113
1114Interpretation of the buffer's octets can be abstracted out by
1115using the slightly higher-level functions L</lex_peek_unichar> and
1116L</lex_read_unichar>.
1117
1118=cut
1119*/
1120
1121void
1122Perl_lex_read_to(pTHX_ char *ptr)
1123{
1124 char *s;
1125 PERL_ARGS_ASSERT_LEX_READ_TO;
1126 s = PL_parser->bufptr;
1127 if (ptr < s || ptr > PL_parser->bufend)
1128 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1129 for (; s != ptr; s++)
1130 if (*s == '\n') {
1131 CopLINE_inc(PL_curcop);
1132 PL_parser->linestart = s+1;
1133 }
1134 PL_parser->bufptr = ptr;
1135}
1136
1137/*
1138=for apidoc Amx|void|lex_discard_to|char *ptr
1139
1140Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1141up to I<ptr>. The remaining content of the buffer will be moved, and
1142all pointers into the buffer updated appropriately. I<ptr> must not
1143be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1144it is not permitted to discard text that has yet to be lexed.
1145
1146Normally it is not necessarily to do this directly, because it suffices to
1147use the implicit discarding behaviour of L</lex_next_chunk> and things
1148based on it. However, if a token stretches across multiple lines,
1f317c95 1149and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1150that purpose, then after completion of the token it would be wise to
1151explicitly discard the now-unneeded earlier lines, to avoid future
1152multi-line tokens growing the buffer without bound.
1153
1154=cut
1155*/
1156
1157void
1158Perl_lex_discard_to(pTHX_ char *ptr)
1159{
1160 char *buf;
1161 STRLEN discard_len;
1162 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1163 buf = SvPVX(PL_parser->linestr);
1164 if (ptr < buf)
1165 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1166 if (ptr == buf)
1167 return;
1168 if (ptr > PL_parser->bufptr)
1169 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1170 discard_len = ptr - buf;
1171 if (PL_parser->oldbufptr < ptr)
1172 PL_parser->oldbufptr = ptr;
1173 if (PL_parser->oldoldbufptr < ptr)
1174 PL_parser->oldoldbufptr = ptr;
1175 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1176 PL_parser->last_uni = NULL;
1177 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1178 PL_parser->last_lop = NULL;
1179 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1180 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1181 PL_parser->bufend -= discard_len;
1182 PL_parser->bufptr -= discard_len;
1183 PL_parser->oldbufptr -= discard_len;
1184 PL_parser->oldoldbufptr -= discard_len;
1185 if (PL_parser->last_uni)
1186 PL_parser->last_uni -= discard_len;
1187 if (PL_parser->last_lop)
1188 PL_parser->last_lop -= discard_len;
1189}
1190
1191/*
1192=for apidoc Amx|bool|lex_next_chunk|U32 flags
1193
1194Reads in the next chunk of text to be lexed, appending it to
1195L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1196looked to the end of the current chunk and wants to know more. It is
1197usual, but not necessary, for lexing to have consumed the entirety of
1198the current chunk at this time.
1199
1200If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1201chunk (i.e., the current chunk has been entirely consumed), normally the
1202current chunk will be discarded at the same time that the new chunk is
1203read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1204will not be discarded. If the current chunk has not been entirely
1205consumed, then it will not be discarded regardless of the flag.
1206
1207Returns true if some new text was added to the buffer, or false if the
1208buffer has reached the end of the input text.
1209
1210=cut
1211*/
1212
1213#define LEX_FAKE_EOF 0x80000000
1214
1215bool
1216Perl_lex_next_chunk(pTHX_ U32 flags)
1217{
1218 SV *linestr;
1219 char *buf;
1220 STRLEN old_bufend_pos, new_bufend_pos;
1221 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1222 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1223 bool got_some_for_debugger = 0;
f0e67a1d
Z
1224 bool got_some;
1225 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1226 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1227 linestr = PL_parser->linestr;
1228 buf = SvPVX(linestr);
1229 if (!(flags & LEX_KEEP_PREVIOUS) &&
1230 PL_parser->bufptr == PL_parser->bufend) {
1231 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1232 linestart_pos = 0;
1233 if (PL_parser->last_uni != PL_parser->bufend)
1234 PL_parser->last_uni = NULL;
1235 if (PL_parser->last_lop != PL_parser->bufend)
1236 PL_parser->last_lop = NULL;
1237 last_uni_pos = last_lop_pos = 0;
1238 *buf = 0;
1239 SvCUR(linestr) = 0;
1240 } else {
1241 old_bufend_pos = PL_parser->bufend - buf;
1242 bufptr_pos = PL_parser->bufptr - buf;
1243 oldbufptr_pos = PL_parser->oldbufptr - buf;
1244 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1245 linestart_pos = PL_parser->linestart - buf;
1246 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1247 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1248 }
1249 if (flags & LEX_FAKE_EOF) {
1250 goto eof;
1251 } else if (!PL_parser->rsfp) {
1252 got_some = 0;
1253 } else if (filter_gets(linestr, old_bufend_pos)) {
1254 got_some = 1;
17cc9359 1255 got_some_for_debugger = 1;
f0e67a1d 1256 } else {
580561a3
Z
1257 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1258 sv_setpvs(linestr, "");
f0e67a1d
Z
1259 eof:
1260 /* End of real input. Close filehandle (unless it was STDIN),
1261 * then add implicit termination.
1262 */
1263 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1264 PerlIO_clearerr(PL_parser->rsfp);
1265 else if (PL_parser->rsfp)
1266 (void)PerlIO_close(PL_parser->rsfp);
1267 PL_parser->rsfp = NULL;
737c24fc 1268 PL_parser->in_pod = 0;
f0e67a1d
Z
1269#ifdef PERL_MAD
1270 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1271 PL_faketokens = 1;
1272#endif
1273 if (!PL_in_eval && PL_minus_p) {
1274 sv_catpvs(linestr,
1275 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1276 PL_minus_n = PL_minus_p = 0;
1277 } else if (!PL_in_eval && PL_minus_n) {
1278 sv_catpvs(linestr, /*{*/";}");
1279 PL_minus_n = 0;
1280 } else
1281 sv_catpvs(linestr, ";");
1282 got_some = 1;
1283 }
1284 buf = SvPVX(linestr);
1285 new_bufend_pos = SvCUR(linestr);
1286 PL_parser->bufend = buf + new_bufend_pos;
1287 PL_parser->bufptr = buf + bufptr_pos;
1288 PL_parser->oldbufptr = buf + oldbufptr_pos;
1289 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1290 PL_parser->linestart = buf + linestart_pos;
1291 if (PL_parser->last_uni)
1292 PL_parser->last_uni = buf + last_uni_pos;
1293 if (PL_parser->last_lop)
1294 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1295 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1296 PL_curstash != PL_debstash) {
1297 /* debugger active and we're not compiling the debugger code,
1298 * so store the line into the debugger's array of lines
1299 */
1300 update_debugger_info(NULL, buf+old_bufend_pos,
1301 new_bufend_pos-old_bufend_pos);
1302 }
1303 return got_some;
1304}
1305
1306/*
1307=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1308
1309Looks ahead one (Unicode) character in the text currently being lexed.
1310Returns the codepoint (unsigned integer value) of the next character,
1311or -1 if lexing has reached the end of the input text. To consume the
1312peeked character, use L</lex_read_unichar>.
1313
1314If the next character is in (or extends into) the next chunk of input
1315text, the next chunk will be read in. Normally the current chunk will be
1316discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1317then the current chunk will not be discarded.
1318
1319If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1320is encountered, an exception is generated.
1321
1322=cut
1323*/
1324
1325I32
1326Perl_lex_peek_unichar(pTHX_ U32 flags)
1327{
749123ff 1328 dVAR;
f0e67a1d
Z
1329 char *s, *bufend;
1330 if (flags & ~(LEX_KEEP_PREVIOUS))
1331 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1332 s = PL_parser->bufptr;
1333 bufend = PL_parser->bufend;
1334 if (UTF) {
1335 U8 head;
1336 I32 unichar;
1337 STRLEN len, retlen;
1338 if (s == bufend) {
1339 if (!lex_next_chunk(flags))
1340 return -1;
1341 s = PL_parser->bufptr;
1342 bufend = PL_parser->bufend;
1343 }
1344 head = (U8)*s;
1345 if (!(head & 0x80))
1346 return head;
1347 if (head & 0x40) {
1348 len = PL_utf8skip[head];
1349 while ((STRLEN)(bufend-s) < len) {
1350 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1351 break;
1352 s = PL_parser->bufptr;
1353 bufend = PL_parser->bufend;
1354 }
1355 }
1356 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1357 if (retlen == (STRLEN)-1) {
1358 /* malformed UTF-8 */
1359 ENTER;
1360 SAVESPTR(PL_warnhook);
1361 PL_warnhook = PERL_WARNHOOK_FATAL;
1362 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1363 LEAVE;
1364 }
1365 return unichar;
1366 } else {
1367 if (s == bufend) {
1368 if (!lex_next_chunk(flags))
1369 return -1;
1370 s = PL_parser->bufptr;
1371 }
1372 return (U8)*s;
1373 }
1374}
1375
1376/*
1377=for apidoc Amx|I32|lex_read_unichar|U32 flags
1378
1379Reads the next (Unicode) character in the text currently being lexed.
1380Returns the codepoint (unsigned integer value) of the character read,
1381and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1382if lexing has reached the end of the input text. To non-destructively
1383examine the next character, use L</lex_peek_unichar> instead.
1384
1385If the next character is in (or extends into) the next chunk of input
1386text, the next chunk will be read in. Normally the current chunk will be
1387discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1388then the current chunk will not be discarded.
1389
1390If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1391is encountered, an exception is generated.
1392
1393=cut
1394*/
1395
1396I32
1397Perl_lex_read_unichar(pTHX_ U32 flags)
1398{
1399 I32 c;
1400 if (flags & ~(LEX_KEEP_PREVIOUS))
1401 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1402 c = lex_peek_unichar(flags);
1403 if (c != -1) {
1404 if (c == '\n')
1405 CopLINE_inc(PL_curcop);
1406 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1407 }
1408 return c;
1409}
1410
1411/*
1412=for apidoc Amx|void|lex_read_space|U32 flags
1413
1414Reads optional spaces, in Perl style, in the text currently being
1415lexed. The spaces may include ordinary whitespace characters and
1416Perl-style comments. C<#line> directives are processed if encountered.
1417L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1418at a non-space character (or the end of the input text).
1419
1420If spaces extend into the next chunk of input text, the next chunk will
1421be read in. Normally the current chunk will be discarded at the same
1422time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1423chunk will not be discarded.
1424
1425=cut
1426*/
1427
f0998909
Z
1428#define LEX_NO_NEXT_CHUNK 0x80000000
1429
f0e67a1d
Z
1430void
1431Perl_lex_read_space(pTHX_ U32 flags)
1432{
1433 char *s, *bufend;
1434 bool need_incline = 0;
f0998909 1435 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1436 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1437#ifdef PERL_MAD
1438 if (PL_skipwhite) {
1439 sv_free(PL_skipwhite);
1440 PL_skipwhite = NULL;
1441 }
1442 if (PL_madskills)
1443 PL_skipwhite = newSVpvs("");
1444#endif /* PERL_MAD */
1445 s = PL_parser->bufptr;
1446 bufend = PL_parser->bufend;
1447 while (1) {
1448 char c = *s;
1449 if (c == '#') {
1450 do {
1451 c = *++s;
1452 } while (!(c == '\n' || (c == 0 && s == bufend)));
1453 } else if (c == '\n') {
1454 s++;
1455 PL_parser->linestart = s;
1456 if (s == bufend)
1457 need_incline = 1;
1458 else
1459 incline(s);
1460 } else if (isSPACE(c)) {
1461 s++;
1462 } else if (c == 0 && s == bufend) {
1463 bool got_more;
1464#ifdef PERL_MAD
1465 if (PL_madskills)
1466 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1467#endif /* PERL_MAD */
f0998909
Z
1468 if (flags & LEX_NO_NEXT_CHUNK)
1469 break;
f0e67a1d
Z
1470 PL_parser->bufptr = s;
1471 CopLINE_inc(PL_curcop);
1472 got_more = lex_next_chunk(flags);
1473 CopLINE_dec(PL_curcop);
1474 s = PL_parser->bufptr;
1475 bufend = PL_parser->bufend;
1476 if (!got_more)
1477 break;
1478 if (need_incline && PL_parser->rsfp) {
1479 incline(s);
1480 need_incline = 0;
1481 }
1482 } else {
1483 break;
1484 }
1485 }
1486#ifdef PERL_MAD
1487 if (PL_madskills)
1488 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1489#endif /* PERL_MAD */
1490 PL_parser->bufptr = s;
1491}
1492
1493/*
ffb4593c
NT
1494 * S_incline
1495 * This subroutine has nothing to do with tilting, whether at windmills
1496 * or pinball tables. Its name is short for "increment line". It
57843af0 1497 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1498 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1499 * # line 500 "foo.pm"
1500 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1501 */
1502
76e3520e 1503STATIC void
d9095cec 1504S_incline(pTHX_ const char *s)
463ee0b2 1505{
97aff369 1506 dVAR;
d9095cec
NC
1507 const char *t;
1508 const char *n;
1509 const char *e;
463ee0b2 1510
7918f24d
NC
1511 PERL_ARGS_ASSERT_INCLINE;
1512
57843af0 1513 CopLINE_inc(PL_curcop);
463ee0b2
LW
1514 if (*s++ != '#')
1515 return;
d4c19fe8
AL
1516 while (SPACE_OR_TAB(*s))
1517 s++;
73659bf1
GS
1518 if (strnEQ(s, "line", 4))
1519 s += 4;
1520 else
1521 return;
084592ab 1522 if (SPACE_OR_TAB(*s))
73659bf1 1523 s++;
4e553d73 1524 else
73659bf1 1525 return;
d4c19fe8
AL
1526 while (SPACE_OR_TAB(*s))
1527 s++;
463ee0b2
LW
1528 if (!isDIGIT(*s))
1529 return;
d4c19fe8 1530
463ee0b2
LW
1531 n = s;
1532 while (isDIGIT(*s))
1533 s++;
07714eb4 1534 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1535 return;
bf4acbe4 1536 while (SPACE_OR_TAB(*s))
463ee0b2 1537 s++;
73659bf1 1538 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1539 s++;
73659bf1
GS
1540 e = t + 1;
1541 }
463ee0b2 1542 else {
c35e046a
AL
1543 t = s;
1544 while (!isSPACE(*t))
1545 t++;
73659bf1 1546 e = t;
463ee0b2 1547 }
bf4acbe4 1548 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1549 e++;
1550 if (*e != '\n' && *e != '\0')
1551 return; /* false alarm */
1552
f4dd75d9 1553 if (t - s > 0) {
d9095cec 1554 const STRLEN len = t - s;
8a5ee598 1555#ifndef USE_ITHREADS
19bad673
NC
1556 SV *const temp_sv = CopFILESV(PL_curcop);
1557 const char *cf;
1558 STRLEN tmplen;
1559
1560 if (temp_sv) {
1561 cf = SvPVX(temp_sv);
1562 tmplen = SvCUR(temp_sv);
1563 } else {
1564 cf = NULL;
1565 tmplen = 0;
1566 }
1567
42d9b98d 1568 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1569 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1570 * to *{"::_<newfilename"} */
44867030
NC
1571 /* However, the long form of evals is only turned on by the
1572 debugger - usually they're "(eval %lu)" */
1573 char smallbuf[128];
1574 char *tmpbuf;
1575 GV **gvp;
d9095cec 1576 STRLEN tmplen2 = len;
798b63bc 1577 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1578 tmpbuf = smallbuf;
1579 else
2ae0db35 1580 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1581 tmpbuf[0] = '_';
1582 tmpbuf[1] = '<';
2ae0db35 1583 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1584 tmplen += 2;
8a5ee598
RGS
1585 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1586 if (gvp) {
44867030
NC
1587 char *tmpbuf2;
1588 GV *gv2;
1589
1590 if (tmplen2 + 2 <= sizeof smallbuf)
1591 tmpbuf2 = smallbuf;
1592 else
1593 Newx(tmpbuf2, tmplen2 + 2, char);
1594
1595 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1596 /* Either they malloc'd it, or we malloc'd it,
1597 so no prefix is present in ours. */
1598 tmpbuf2[0] = '_';
1599 tmpbuf2[1] = '<';
1600 }
1601
1602 memcpy(tmpbuf2 + 2, s, tmplen2);
1603 tmplen2 += 2;
1604
8a5ee598 1605 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1606 if (!isGV(gv2)) {
8a5ee598 1607 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1608 /* adjust ${"::_<newfilename"} to store the new file name */
1609 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1610 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1611 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1612 }
44867030
NC
1613
1614 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1615 }
e66cf94c 1616 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1617 }
8a5ee598 1618#endif
05ec9bb3 1619 CopFILE_free(PL_curcop);
d9095cec 1620 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1621 }
57843af0 1622 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1623}
1624
29595ff2 1625#ifdef PERL_MAD
cd81e915 1626/* skip space before PL_thistoken */
29595ff2
NC
1627
1628STATIC char *
1629S_skipspace0(pTHX_ register char *s)
1630{
7918f24d
NC
1631 PERL_ARGS_ASSERT_SKIPSPACE0;
1632
29595ff2
NC
1633 s = skipspace(s);
1634 if (!PL_madskills)
1635 return s;
cd81e915
NC
1636 if (PL_skipwhite) {
1637 if (!PL_thiswhite)
6b29d1f5 1638 PL_thiswhite = newSVpvs("");
cd81e915
NC
1639 sv_catsv(PL_thiswhite, PL_skipwhite);
1640 sv_free(PL_skipwhite);
1641 PL_skipwhite = 0;
1642 }
1643 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1644 return s;
1645}
1646
cd81e915 1647/* skip space after PL_thistoken */
29595ff2
NC
1648
1649STATIC char *
1650S_skipspace1(pTHX_ register char *s)
1651{
d4c19fe8 1652 const char *start = s;
29595ff2
NC
1653 I32 startoff = start - SvPVX(PL_linestr);
1654
7918f24d
NC
1655 PERL_ARGS_ASSERT_SKIPSPACE1;
1656
29595ff2
NC
1657 s = skipspace(s);
1658 if (!PL_madskills)
1659 return s;
1660 start = SvPVX(PL_linestr) + startoff;
cd81e915 1661 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1662 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1663 PL_thistoken = newSVpvn(tstart, start - tstart);
1664 }
1665 PL_realtokenstart = -1;
1666 if (PL_skipwhite) {
1667 if (!PL_nextwhite)
6b29d1f5 1668 PL_nextwhite = newSVpvs("");
cd81e915
NC
1669 sv_catsv(PL_nextwhite, PL_skipwhite);
1670 sv_free(PL_skipwhite);
1671 PL_skipwhite = 0;
29595ff2
NC
1672 }
1673 return s;
1674}
1675
1676STATIC char *
1677S_skipspace2(pTHX_ register char *s, SV **svp)
1678{
c35e046a
AL
1679 char *start;
1680 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1681 const I32 startoff = s - SvPVX(PL_linestr);
1682
7918f24d
NC
1683 PERL_ARGS_ASSERT_SKIPSPACE2;
1684
29595ff2
NC
1685 s = skipspace(s);
1686 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1687 if (!PL_madskills || !svp)
1688 return s;
1689 start = SvPVX(PL_linestr) + startoff;
cd81e915 1690 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1691 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1692 PL_thistoken = newSVpvn(tstart, start - tstart);
1693 PL_realtokenstart = -1;
29595ff2 1694 }
cd81e915 1695 if (PL_skipwhite) {
29595ff2 1696 if (!*svp)
6b29d1f5 1697 *svp = newSVpvs("");
cd81e915
NC
1698 sv_setsv(*svp, PL_skipwhite);
1699 sv_free(PL_skipwhite);
1700 PL_skipwhite = 0;
29595ff2
NC
1701 }
1702
1703 return s;
1704}
1705#endif
1706
80a702cd 1707STATIC void
15f169a1 1708S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1709{
1710 AV *av = CopFILEAVx(PL_curcop);
1711 if (av) {
b9f83d2f 1712 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1713 if (orig_sv)
1714 sv_setsv(sv, orig_sv);
1715 else
1716 sv_setpvn(sv, buf, len);
80a702cd
RGS
1717 (void)SvIOK_on(sv);
1718 SvIV_set(sv, 0);
1719 av_store(av, (I32)CopLINE(PL_curcop), sv);
1720 }
1721}
1722
ffb4593c
NT
1723/*
1724 * S_skipspace
1725 * Called to gobble the appropriate amount and type of whitespace.
1726 * Skips comments as well.
1727 */
1728
76e3520e 1729STATIC char *
cea2e8a9 1730S_skipspace(pTHX_ register char *s)
a687059c 1731{
5db06880 1732#ifdef PERL_MAD
f0e67a1d
Z
1733 char *start = s;
1734#endif /* PERL_MAD */
7918f24d 1735 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1736#ifdef PERL_MAD
cd81e915
NC
1737 if (PL_skipwhite) {
1738 sv_free(PL_skipwhite);
f0e67a1d 1739 PL_skipwhite = NULL;
5db06880 1740 }
f0e67a1d 1741#endif /* PERL_MAD */
3280af22 1742 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1743 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1744 s++;
f0e67a1d
Z
1745 } else {
1746 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1747 PL_bufptr = s;
f0998909
Z
1748 lex_read_space(LEX_KEEP_PREVIOUS |
1749 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1750 LEX_NO_NEXT_CHUNK : 0));
3280af22 1751 s = PL_bufptr;
f0e67a1d
Z
1752 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1753 if (PL_linestart > PL_bufptr)
1754 PL_bufptr = PL_linestart;
1755 return s;
463ee0b2 1756 }
5db06880 1757#ifdef PERL_MAD
f0e67a1d
Z
1758 if (PL_madskills)
1759 PL_skipwhite = newSVpvn(start, s-start);
1760#endif /* PERL_MAD */
5db06880 1761 return s;
a687059c 1762}
378cc40b 1763
ffb4593c
NT
1764/*
1765 * S_check_uni
1766 * Check the unary operators to ensure there's no ambiguity in how they're
1767 * used. An ambiguous piece of code would be:
1768 * rand + 5
1769 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1770 * the +5 is its argument.
1771 */
1772
76e3520e 1773STATIC void
cea2e8a9 1774S_check_uni(pTHX)
ba106d47 1775{
97aff369 1776 dVAR;
d4c19fe8
AL
1777 const char *s;
1778 const char *t;
2f3197b3 1779
3280af22 1780 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1781 return;
3280af22
NIS
1782 while (isSPACE(*PL_last_uni))
1783 PL_last_uni++;
c35e046a
AL
1784 s = PL_last_uni;
1785 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1786 s++;
3280af22 1787 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1788 return;
6136c704 1789
9b387841
NC
1790 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1791 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1792 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1793}
1794
ffb4593c
NT
1795/*
1796 * LOP : macro to build a list operator. Its behaviour has been replaced
1797 * with a subroutine, S_lop() for which LOP is just another name.
1798 */
1799
a0d0e21e
LW
1800#define LOP(f,x) return lop(f,x,s)
1801
ffb4593c
NT
1802/*
1803 * S_lop
1804 * Build a list operator (or something that might be one). The rules:
1805 * - if we have a next token, then it's a list operator [why?]
1806 * - if the next thing is an opening paren, then it's a function
1807 * - else it's a list operator
1808 */
1809
76e3520e 1810STATIC I32
a0be28da 1811S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1812{
97aff369 1813 dVAR;
7918f24d
NC
1814
1815 PERL_ARGS_ASSERT_LOP;
1816
6154021b 1817 pl_yylval.ival = f;
35c8bce7 1818 CLINE;
3280af22
NIS
1819 PL_expect = x;
1820 PL_bufptr = s;
1821 PL_last_lop = PL_oldbufptr;
eb160463 1822 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1823#ifdef PERL_MAD
1824 if (PL_lasttoke)
1825 return REPORT(LSTOP);
1826#else
3280af22 1827 if (PL_nexttoke)
bbf60fe6 1828 return REPORT(LSTOP);
5db06880 1829#endif
79072805 1830 if (*s == '(')
bbf60fe6 1831 return REPORT(FUNC);
29595ff2 1832 s = PEEKSPACE(s);
79072805 1833 if (*s == '(')
bbf60fe6 1834 return REPORT(FUNC);
79072805 1835 else
bbf60fe6 1836 return REPORT(LSTOP);
79072805
LW
1837}
1838
5db06880
NC
1839#ifdef PERL_MAD
1840 /*
1841 * S_start_force
1842 * Sets up for an eventual force_next(). start_force(0) basically does
1843 * an unshift, while start_force(-1) does a push. yylex removes items
1844 * on the "pop" end.
1845 */
1846
1847STATIC void
1848S_start_force(pTHX_ int where)
1849{
1850 int i;
1851
cd81e915 1852 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1853 where = PL_lasttoke;
cd81e915
NC
1854 assert(PL_curforce < 0 || PL_curforce == where);
1855 if (PL_curforce != where) {
5db06880
NC
1856 for (i = PL_lasttoke; i > where; --i) {
1857 PL_nexttoke[i] = PL_nexttoke[i-1];
1858 }
1859 PL_lasttoke++;
1860 }
cd81e915 1861 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1862 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1863 PL_curforce = where;
1864 if (PL_nextwhite) {
5db06880 1865 if (PL_madskills)
6b29d1f5 1866 curmad('^', newSVpvs(""));
cd81e915 1867 CURMAD('_', PL_nextwhite);
5db06880
NC
1868 }
1869}
1870
1871STATIC void
1872S_curmad(pTHX_ char slot, SV *sv)
1873{
1874 MADPROP **where;
1875
1876 if (!sv)
1877 return;
cd81e915
NC
1878 if (PL_curforce < 0)
1879 where = &PL_thismad;
5db06880 1880 else
cd81e915 1881 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1882
cd81e915 1883 if (PL_faketokens)
76f68e9b 1884 sv_setpvs(sv, "");
5db06880
NC
1885 else {
1886 if (!IN_BYTES) {
1887 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1888 SvUTF8_on(sv);
1889 else if (PL_encoding) {
1890 sv_recode_to_utf8(sv, PL_encoding);
1891 }
1892 }
1893 }
1894
1895 /* keep a slot open for the head of the list? */
1896 if (slot != '_' && *where && (*where)->mad_key == '^') {
1897 (*where)->mad_key = slot;
daba3364 1898 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1899 (*where)->mad_val = (void*)sv;
1900 }
1901 else
1902 addmad(newMADsv(slot, sv), where, 0);
1903}
1904#else
b3f24c00
MHM
1905# define start_force(where) NOOP
1906# define curmad(slot, sv) NOOP
5db06880
NC
1907#endif
1908
ffb4593c
NT
1909/*
1910 * S_force_next
9cbb5ea2 1911 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1912 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1913 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1914 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1915 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1916 */
1917
4e553d73 1918STATIC void
cea2e8a9 1919S_force_next(pTHX_ I32 type)
79072805 1920{
97aff369 1921 dVAR;
704d4215
GG
1922#ifdef DEBUGGING
1923 if (DEBUG_T_TEST) {
1924 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1925 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1926 }
1927#endif
5db06880 1928#ifdef PERL_MAD
cd81e915 1929 if (PL_curforce < 0)
5db06880 1930 start_force(PL_lasttoke);
cd81e915 1931 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1932 if (PL_lex_state != LEX_KNOWNEXT)
1933 PL_lex_defer = PL_lex_state;
1934 PL_lex_state = LEX_KNOWNEXT;
1935 PL_lex_expect = PL_expect;
cd81e915 1936 PL_curforce = -1;
5db06880 1937#else
3280af22
NIS
1938 PL_nexttype[PL_nexttoke] = type;
1939 PL_nexttoke++;
1940 if (PL_lex_state != LEX_KNOWNEXT) {
1941 PL_lex_defer = PL_lex_state;
1942 PL_lex_expect = PL_expect;
1943 PL_lex_state = LEX_KNOWNEXT;
79072805 1944 }
5db06880 1945#endif
79072805
LW
1946}
1947
28ac2b49
Z
1948void
1949Perl_yyunlex(pTHX)
1950{
a7aaec61
Z
1951 int yyc = PL_parser->yychar;
1952 if (yyc != YYEMPTY) {
1953 if (yyc) {
1954 start_force(-1);
1955 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1956 if (yyc == '{'/*}*/ || yyc == HASHBRACK || yyc == '['/*]*/) {
1957 PL_lex_brackets--;
1958 yyc |= (1<<24) | (PL_lex_brackstack[PL_lex_brackets] << 16);
1959 }
1960 force_next(yyc);
1961 }
28ac2b49
Z
1962 PL_parser->yychar = YYEMPTY;
1963 }
1964}
1965
d0a148a6 1966STATIC SV *
15f169a1 1967S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1968{
97aff369 1969 dVAR;
740cce10 1970 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1971 !IN_BYTES
1972 && UTF
1973 && !is_ascii_string((const U8*)start, len)
740cce10 1974 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1975 return sv;
1976}
1977
ffb4593c
NT
1978/*
1979 * S_force_word
1980 * When the lexer knows the next thing is a word (for instance, it has
1981 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1982 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1983 * lookahead.
ffb4593c
NT
1984 *
1985 * Arguments:
b1b65b59 1986 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1987 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1988 * int check_keyword : if true, Perl checks to make sure the word isn't
1989 * a keyword (do this if the word is a label, e.g. goto FOO)
1990 * int allow_pack : if true, : characters will also be allowed (require,
1991 * use, etc. do this)
9cbb5ea2 1992 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1993 */
1994
76e3520e 1995STATIC char *
cea2e8a9 1996S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1997{
97aff369 1998 dVAR;
463ee0b2
LW
1999 register char *s;
2000 STRLEN len;
4e553d73 2001
7918f24d
NC
2002 PERL_ARGS_ASSERT_FORCE_WORD;
2003
29595ff2 2004 start = SKIPSPACE1(start);
463ee0b2 2005 s = start;
7e2040f0 2006 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 2007 (allow_pack && *s == ':') ||
15f0808c 2008 (allow_initial_tick && *s == '\'') )
a0d0e21e 2009 {
3280af22 2010 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 2011 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 2012 return start;
cd81e915 2013 start_force(PL_curforce);
5db06880
NC
2014 if (PL_madskills)
2015 curmad('X', newSVpvn(start,s-start));
463ee0b2 2016 if (token == METHOD) {
29595ff2 2017 s = SKIPSPACE1(s);
463ee0b2 2018 if (*s == '(')
3280af22 2019 PL_expect = XTERM;
463ee0b2 2020 else {
3280af22 2021 PL_expect = XOPERATOR;
463ee0b2 2022 }
79072805 2023 }
e74e6b3d 2024 if (PL_madskills)
63575281 2025 curmad('g', newSVpvs( "forced" ));
9ded7720 2026 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
2027 = (OP*)newSVOP(OP_CONST,0,
2028 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 2029 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
2030 force_next(token);
2031 }
2032 return s;
2033}
2034
ffb4593c
NT
2035/*
2036 * S_force_ident
9cbb5ea2 2037 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2038 * text only contains the "foo" portion. The first argument is a pointer
2039 * to the "foo", and the second argument is the type symbol to prefix.
2040 * Forces the next token to be a "WORD".
9cbb5ea2 2041 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2042 */
2043
76e3520e 2044STATIC void
bfed75c6 2045S_force_ident(pTHX_ register const char *s, int kind)
79072805 2046{
97aff369 2047 dVAR;
7918f24d
NC
2048
2049 PERL_ARGS_ASSERT_FORCE_IDENT;
2050
c35e046a 2051 if (*s) {
90e5519e
NC
2052 const STRLEN len = strlen(s);
2053 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2054 start_force(PL_curforce);
9ded7720 2055 NEXTVAL_NEXTTOKE.opval = o;
79072805 2056 force_next(WORD);
748a9306 2057 if (kind) {
11343788 2058 o->op_private = OPpCONST_ENTERED;
55497cff 2059 /* XXX see note in pp_entereval() for why we forgo typo
2060 warnings if the symbol must be introduced in an eval.
2061 GSAR 96-10-12 */
90e5519e
NC
2062 gv_fetchpvn_flags(s, len,
2063 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2064 : GV_ADD,
2065 kind == '$' ? SVt_PV :
2066 kind == '@' ? SVt_PVAV :
2067 kind == '%' ? SVt_PVHV :
a0d0e21e 2068 SVt_PVGV
90e5519e 2069 );
748a9306 2070 }
79072805
LW
2071 }
2072}
2073
1571675a
GS
2074NV
2075Perl_str_to_version(pTHX_ SV *sv)
2076{
2077 NV retval = 0.0;
2078 NV nshift = 1.0;
2079 STRLEN len;
cfd0369c 2080 const char *start = SvPV_const(sv,len);
9d4ba2ae 2081 const char * const end = start + len;
504618e9 2082 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2083
2084 PERL_ARGS_ASSERT_STR_TO_VERSION;
2085
1571675a 2086 while (start < end) {
ba210ebe 2087 STRLEN skip;
1571675a
GS
2088 UV n;
2089 if (utf)
9041c2e3 2090 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2091 else {
2092 n = *(U8*)start;
2093 skip = 1;
2094 }
2095 retval += ((NV)n)/nshift;
2096 start += skip;
2097 nshift *= 1000;
2098 }
2099 return retval;
2100}
2101
4e553d73 2102/*
ffb4593c
NT
2103 * S_force_version
2104 * Forces the next token to be a version number.
e759cc13
RGS
2105 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2106 * and if "guessing" is TRUE, then no new token is created (and the caller
2107 * must use an alternative parsing method).
ffb4593c
NT
2108 */
2109
76e3520e 2110STATIC char *
e759cc13 2111S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2112{
97aff369 2113 dVAR;
5f66b61c 2114 OP *version = NULL;
44dcb63b 2115 char *d;
5db06880
NC
2116#ifdef PERL_MAD
2117 I32 startoff = s - SvPVX(PL_linestr);
2118#endif
89bfa8cd 2119
7918f24d
NC
2120 PERL_ARGS_ASSERT_FORCE_VERSION;
2121
29595ff2 2122 s = SKIPSPACE1(s);
89bfa8cd 2123
44dcb63b 2124 d = s;
dd629d5b 2125 if (*d == 'v')
44dcb63b 2126 d++;
44dcb63b 2127 if (isDIGIT(*d)) {
e759cc13
RGS
2128 while (isDIGIT(*d) || *d == '_' || *d == '.')
2129 d++;
5db06880
NC
2130#ifdef PERL_MAD
2131 if (PL_madskills) {
cd81e915 2132 start_force(PL_curforce);
5db06880
NC
2133 curmad('X', newSVpvn(s,d-s));
2134 }
2135#endif
4e4da3ac 2136 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2137 SV *ver;
8d08d9ba
DG
2138#ifdef USE_LOCALE_NUMERIC
2139 char *loc = setlocale(LC_NUMERIC, "C");
2140#endif
6154021b 2141 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2142#ifdef USE_LOCALE_NUMERIC
2143 setlocale(LC_NUMERIC, loc);
2144#endif
6154021b 2145 version = pl_yylval.opval;
dd629d5b
GS
2146 ver = cSVOPx(version)->op_sv;
2147 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2148 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2149 SvNV_set(ver, str_to_version(ver));
1571675a 2150 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2151 }
89bfa8cd 2152 }
5db06880
NC
2153 else if (guessing) {
2154#ifdef PERL_MAD
2155 if (PL_madskills) {
cd81e915
NC
2156 sv_free(PL_nextwhite); /* let next token collect whitespace */
2157 PL_nextwhite = 0;
5db06880
NC
2158 s = SvPVX(PL_linestr) + startoff;
2159 }
2160#endif
e759cc13 2161 return s;
5db06880 2162 }
89bfa8cd 2163 }
2164
5db06880
NC
2165#ifdef PERL_MAD
2166 if (PL_madskills && !version) {
cd81e915
NC
2167 sv_free(PL_nextwhite); /* let next token collect whitespace */
2168 PL_nextwhite = 0;
5db06880
NC
2169 s = SvPVX(PL_linestr) + startoff;
2170 }
2171#endif
89bfa8cd 2172 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2173 start_force(PL_curforce);
9ded7720 2174 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2175 force_next(WORD);
89bfa8cd 2176
e759cc13 2177 return s;
89bfa8cd 2178}
2179
ffb4593c 2180/*
91152fc1
DG
2181 * S_force_strict_version
2182 * Forces the next token to be a version number using strict syntax rules.
2183 */
2184
2185STATIC char *
2186S_force_strict_version(pTHX_ char *s)
2187{
2188 dVAR;
2189 OP *version = NULL;
2190#ifdef PERL_MAD
2191 I32 startoff = s - SvPVX(PL_linestr);
2192#endif
2193 const char *errstr = NULL;
2194
2195 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2196
2197 while (isSPACE(*s)) /* leading whitespace */
2198 s++;
2199
2200 if (is_STRICT_VERSION(s,&errstr)) {
2201 SV *ver = newSV(0);
2202 s = (char *)scan_version(s, ver, 0);
2203 version = newSVOP(OP_CONST, 0, ver);
2204 }
4e4da3ac
Z
2205 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2206 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2207 {
91152fc1
DG
2208 PL_bufptr = s;
2209 if (errstr)
2210 yyerror(errstr); /* version required */
2211 return s;
2212 }
2213
2214#ifdef PERL_MAD
2215 if (PL_madskills && !version) {
2216 sv_free(PL_nextwhite); /* let next token collect whitespace */
2217 PL_nextwhite = 0;
2218 s = SvPVX(PL_linestr) + startoff;
2219 }
2220#endif
2221 /* NOTE: The parser sees the package name and the VERSION swapped */
2222 start_force(PL_curforce);
2223 NEXTVAL_NEXTTOKE.opval = version;
2224 force_next(WORD);
2225
2226 return s;
2227}
2228
2229/*
ffb4593c
NT
2230 * S_tokeq
2231 * Tokenize a quoted string passed in as an SV. It finds the next
2232 * chunk, up to end of string or a backslash. It may make a new
2233 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2234 * turns \\ into \.
2235 */
2236
76e3520e 2237STATIC SV *
cea2e8a9 2238S_tokeq(pTHX_ SV *sv)
79072805 2239{
97aff369 2240 dVAR;
79072805
LW
2241 register char *s;
2242 register char *send;
2243 register char *d;
b3ac6de7
IZ
2244 STRLEN len = 0;
2245 SV *pv = sv;
79072805 2246
7918f24d
NC
2247 PERL_ARGS_ASSERT_TOKEQ;
2248
79072805 2249 if (!SvLEN(sv))
b3ac6de7 2250 goto finish;
79072805 2251
a0d0e21e 2252 s = SvPV_force(sv, len);
21a311ee 2253 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2254 goto finish;
463ee0b2 2255 send = s + len;
dcb21ed6
NC
2256 /* This is relying on the SV being "well formed" with a trailing '\0' */
2257 while (s < send && !(*s == '\\' && s[1] == '\\'))
79072805
LW
2258 s++;
2259 if (s == send)
b3ac6de7 2260 goto finish;
79072805 2261 d = s;
be4731d2 2262 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2263 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2264 }
79072805
LW
2265 while (s < send) {
2266 if (*s == '\\') {
a0d0e21e 2267 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2268 s++; /* all that, just for this */
2269 }
2270 *d++ = *s++;
2271 }
2272 *d = '\0';
95a20fc0 2273 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2274 finish:
3280af22 2275 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2276 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2277 return sv;
2278}
2279
ffb4593c
NT
2280/*
2281 * Now come three functions related to double-quote context,
2282 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2283 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2284 * interact with PL_lex_state, and create fake ( ... ) argument lists
2285 * to handle functions and concatenation.
2286 * They assume that whoever calls them will be setting up a fake
2287 * join call, because each subthing puts a ',' after it. This lets
2288 * "lower \luPpEr"
2289 * become
2290 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2291 *
2292 * (I'm not sure whether the spurious commas at the end of lcfirst's
2293 * arguments and join's arguments are created or not).
2294 */
2295
2296/*
2297 * S_sublex_start
6154021b 2298 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2299 *
2300 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2301 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2302 *
2303 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2304 *
2305 * Everything else becomes a FUNC.
2306 *
2307 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2308 * had an OP_CONST or OP_READLINE). This just sets us up for a
2309 * call to S_sublex_push().
2310 */
2311
76e3520e 2312STATIC I32
cea2e8a9 2313S_sublex_start(pTHX)
79072805 2314{
97aff369 2315 dVAR;
6154021b 2316 register const I32 op_type = pl_yylval.ival;
79072805
LW
2317
2318 if (op_type == OP_NULL) {
6154021b 2319 pl_yylval.opval = PL_lex_op;
5f66b61c 2320 PL_lex_op = NULL;
79072805
LW
2321 return THING;
2322 }
2323 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2324 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2325
2326 if (SvTYPE(sv) == SVt_PVIV) {
2327 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2328 STRLEN len;
96a5add6 2329 const char * const p = SvPV_const(sv, len);
740cce10 2330 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2331 SvREFCNT_dec(sv);
2332 sv = nsv;
4e553d73 2333 }
6154021b 2334 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2335 PL_lex_stuff = NULL;
6f33ba73
RGS
2336 /* Allow <FH> // "foo" */
2337 if (op_type == OP_READLINE)
2338 PL_expect = XTERMORDORDOR;
79072805
LW
2339 return THING;
2340 }
e3f73d4e
RGS
2341 else if (op_type == OP_BACKTICK && PL_lex_op) {
2342 /* readpipe() vas overriden */
2343 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2344 pl_yylval.opval = PL_lex_op;
9b201d7d 2345 PL_lex_op = NULL;
e3f73d4e
RGS
2346 PL_lex_stuff = NULL;
2347 return THING;
2348 }
79072805 2349
3280af22 2350 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2351 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2352 PL_sublex_info.sub_op = PL_lex_op;
2353 PL_lex_state = LEX_INTERPPUSH;
55497cff 2354
3280af22
NIS
2355 PL_expect = XTERM;
2356 if (PL_lex_op) {
6154021b 2357 pl_yylval.opval = PL_lex_op;
5f66b61c 2358 PL_lex_op = NULL;
55497cff 2359 return PMFUNC;
2360 }
2361 else
2362 return FUNC;
2363}
2364
ffb4593c
NT
2365/*
2366 * S_sublex_push
2367 * Create a new scope to save the lexing state. The scope will be
2368 * ended in S_sublex_done. Returns a '(', starting the function arguments
2369 * to the uc, lc, etc. found before.
2370 * Sets PL_lex_state to LEX_INTERPCONCAT.
2371 */
2372
76e3520e 2373STATIC I32
cea2e8a9 2374S_sublex_push(pTHX)
55497cff 2375{
27da23d5 2376 dVAR;
f46d017c 2377 ENTER;
55497cff 2378
3280af22 2379 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2380 SAVEBOOL(PL_lex_dojoin);
3280af22 2381 SAVEI32(PL_lex_brackets);
3280af22
NIS
2382 SAVEI32(PL_lex_casemods);
2383 SAVEI32(PL_lex_starts);
651b5b28 2384 SAVEI8(PL_lex_state);
7766f137 2385 SAVEVPTR(PL_lex_inpat);
98246f1e 2386 SAVEI16(PL_lex_inwhat);
57843af0 2387 SAVECOPLINE(PL_curcop);
3280af22 2388 SAVEPPTR(PL_bufptr);
8452ff4b 2389 SAVEPPTR(PL_bufend);
3280af22
NIS
2390 SAVEPPTR(PL_oldbufptr);
2391 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2392 SAVEPPTR(PL_last_lop);
2393 SAVEPPTR(PL_last_uni);
3280af22
NIS
2394 SAVEPPTR(PL_linestart);
2395 SAVESPTR(PL_linestr);
8edd5f42
RGS
2396 SAVEGENERICPV(PL_lex_brackstack);
2397 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2398
2399 PL_linestr = PL_lex_stuff;
a0714e2c 2400 PL_lex_stuff = NULL;
3280af22 2401
9cbb5ea2
GS
2402 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2403 = SvPVX(PL_linestr);
3280af22 2404 PL_bufend += SvCUR(PL_linestr);
bd61b366 2405 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2406 SAVEFREESV(PL_linestr);
2407
2408 PL_lex_dojoin = FALSE;
2409 PL_lex_brackets = 0;
a02a5408
JC
2410 Newx(PL_lex_brackstack, 120, char);
2411 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2412 PL_lex_casemods = 0;
2413 *PL_lex_casestack = '\0';
2414 PL_lex_starts = 0;
2415 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2416 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2417
2418 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
bb16bae8 2419 if (PL_lex_inwhat == OP_TRANSR) PL_lex_inwhat = OP_TRANS;
3280af22
NIS
2420 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2421 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2422 else
5f66b61c 2423 PL_lex_inpat = NULL;
79072805 2424
55497cff 2425 return '(';
79072805
LW
2426}
2427
ffb4593c
NT
2428/*
2429 * S_sublex_done
2430 * Restores lexer state after a S_sublex_push.
2431 */
2432
76e3520e 2433STATIC I32
cea2e8a9 2434S_sublex_done(pTHX)
79072805 2435{
27da23d5 2436 dVAR;
3280af22 2437 if (!PL_lex_starts++) {
396482e1 2438 SV * const sv = newSVpvs("");
9aa983d2
JH
2439 if (SvUTF8(PL_linestr))
2440 SvUTF8_on(sv);
3280af22 2441 PL_expect = XOPERATOR;
6154021b 2442 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2443 return THING;
2444 }
2445
3280af22
NIS
2446 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2447 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2448 return yylex();
79072805
LW
2449 }
2450
ffb4593c 2451 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
bb16bae8 2452 assert(PL_lex_inwhat != OP_TRANSR);
3280af22
NIS
2453 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2454 PL_linestr = PL_lex_repl;
2455 PL_lex_inpat = 0;
2456 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2457 PL_bufend += SvCUR(PL_linestr);
bd61b366 2458 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2459 SAVEFREESV(PL_linestr);
2460 PL_lex_dojoin = FALSE;
2461 PL_lex_brackets = 0;
3280af22
NIS
2462 PL_lex_casemods = 0;
2463 *PL_lex_casestack = '\0';
2464 PL_lex_starts = 0;
25da4f38 2465 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2466 PL_lex_state = LEX_INTERPNORMAL;
2467 PL_lex_starts++;
e9fa98b2
HS
2468 /* we don't clear PL_lex_repl here, so that we can check later
2469 whether this is an evalled subst; that means we rely on the
2470 logic to ensure sublex_done() is called again only via the
2471 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2472 }
e9fa98b2 2473 else {
3280af22 2474 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2475 PL_lex_repl = NULL;
e9fa98b2 2476 }
79072805 2477 return ',';
ffed7fef
LW
2478 }
2479 else {
5db06880
NC
2480#ifdef PERL_MAD
2481 if (PL_madskills) {
cd81e915
NC
2482 if (PL_thiswhite) {
2483 if (!PL_endwhite)
6b29d1f5 2484 PL_endwhite = newSVpvs("");
cd81e915
NC
2485 sv_catsv(PL_endwhite, PL_thiswhite);
2486 PL_thiswhite = 0;
2487 }
2488 if (PL_thistoken)
76f68e9b 2489 sv_setpvs(PL_thistoken,"");
5db06880 2490 else
cd81e915 2491 PL_realtokenstart = -1;
5db06880
NC
2492 }
2493#endif
f46d017c 2494 LEAVE;
3280af22
NIS
2495 PL_bufend = SvPVX(PL_linestr);
2496 PL_bufend += SvCUR(PL_linestr);
2497 PL_expect = XOPERATOR;
09bef843 2498 PL_sublex_info.sub_inwhat = 0;
79072805 2499 return ')';
ffed7fef
LW
2500 }
2501}
2502
02aa26ce
NT
2503/*
2504 scan_const
2505
2506 Extracts a pattern, double-quoted string, or transliteration. This
2507 is terrifying code.
2508
94def140 2509 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2510 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2511 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2512
94def140
TS
2513 Returns a pointer to the character scanned up to. If this is
2514 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2515 successfully parsed), will leave an OP for the substring scanned
6154021b 2516 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2517 by looking at the next characters herself.
2518
02aa26ce
NT
2519 In patterns:
2520 backslashes:
ff3f963a 2521 constants: \N{NAME} only
02aa26ce
NT
2522 case and quoting: \U \Q \E
2523 stops on @ and $, but not for $ as tail anchor
2524
2525 In transliterations:
2526 characters are VERY literal, except for - not at the start or end
94def140
TS
2527 of the string, which indicates a range. If the range is in bytes,
2528 scan_const expands the range to the full set of intermediate
2529 characters. If the range is in utf8, the hyphen is replaced with
2530 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2531
2532 In double-quoted strings:
2533 backslashes:
2534 double-quoted style: \r and \n
ff3f963a 2535 constants: \x31, etc.
94def140 2536 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2537 case and quoting: \U \Q \E
2538 stops on @ and $
2539
2540 scan_const does *not* construct ops to handle interpolated strings.
2541 It stops processing as soon as it finds an embedded $ or @ variable
2542 and leaves it to the caller to work out what's going on.
2543
94def140
TS
2544 embedded arrays (whether in pattern or not) could be:
2545 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2546
2547 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2548
2549 $ in pattern could be $foo or could be tail anchor. Assumption:
2550 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2551 followed by one of "()| \r\n\t"
02aa26ce
NT
2552
2553 \1 (backreferences) are turned into $1
2554
2555 The structure of the code is
2556 while (there's a character to process) {
94def140
TS
2557 handle transliteration ranges
2558 skip regexp comments /(?#comment)/ and codes /(?{code})/
2559 skip #-initiated comments in //x patterns
2560 check for embedded arrays
02aa26ce
NT
2561 check for embedded scalars
2562 if (backslash) {
94def140 2563 deprecate \1 in substitution replacements
02aa26ce
NT
2564 handle string-changing backslashes \l \U \Q \E, etc.
2565 switch (what was escaped) {
94def140 2566 handle \- in a transliteration (becomes a literal -)
ff3f963a 2567 if a pattern and not \N{, go treat as regular character
94def140
TS
2568 handle \132 (octal characters)
2569 handle \x15 and \x{1234} (hex characters)
ff3f963a 2570 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2571 handle \cV (control characters)
2572 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2573 } (end switch)
77a135fe 2574 continue
02aa26ce 2575 } (end if backslash)
77a135fe 2576 handle regular character
02aa26ce 2577 } (end while character to read)
4e553d73 2578
02aa26ce
NT
2579*/
2580
76e3520e 2581STATIC char *
cea2e8a9 2582S_scan_const(pTHX_ char *start)
79072805 2583{
97aff369 2584 dVAR;
3280af22 2585 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2586 SV *sv = newSV(send - start); /* sv for the constant. See
2587 note below on sizing. */
02aa26ce
NT
2588 register char *s = start; /* start of the constant */
2589 register char *d = SvPVX(sv); /* destination for copies */
2590 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2591 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2592 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2593 I32 this_utf8 = UTF; /* Is the source string assumed
2594 to be UTF8? But, this can
2595 show as true when the source
2596 isn't utf8, as for example
2597 when it is entirely composed
2598 of hex constants */
2599
2600 /* Note on sizing: The scanned constant is placed into sv, which is
2601 * initialized by newSV() assuming one byte of output for every byte of
2602 * input. This routine expects newSV() to allocate an extra byte for a
2603 * trailing NUL, which this routine will append if it gets to the end of
2604 * the input. There may be more bytes of input than output (eg., \N{LATIN
2605 * CAPITAL LETTER A}), or more output than input if the constant ends up
2606 * recoded to utf8, but each time a construct is found that might increase
2607 * the needed size, SvGROW() is called. Its size parameter each time is
2608 * based on the best guess estimate at the time, namely the length used so
2609 * far, plus the length the current construct will occupy, plus room for
2610 * the trailing NUL, plus one byte for every input byte still unscanned */
2611
012bcf8d 2612 UV uv;
4c3a8340
TS
2613#ifdef EBCDIC
2614 UV literal_endpoint = 0;
e294cc5d 2615 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2616#endif
012bcf8d 2617
7918f24d
NC
2618 PERL_ARGS_ASSERT_SCAN_CONST;
2619
bb16bae8 2620 assert(PL_lex_inwhat != OP_TRANSR);
2b9d42f0
NIS
2621 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2622 /* If we are doing a trans and we know we want UTF8 set expectation */
2623 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2624 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2625 }
2626
2627
79072805 2628 while (s < send || dorange) {
ff3f963a 2629
02aa26ce 2630 /* get transliterations out of the way (they're most literal) */
3280af22 2631 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2632 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2633 if (dorange) {
1ba5c669
JH
2634 I32 i; /* current expanded character */
2635 I32 min; /* first character in range */
2636 I32 max; /* last character in range */
02aa26ce 2637
e294cc5d
JH
2638#ifdef EBCDIC
2639 UV uvmax = 0;
2640#endif
2641
2642 if (has_utf8
2643#ifdef EBCDIC
2644 && !native_range
2645#endif
2646 ) {
9d4ba2ae 2647 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2648 char *e = d++;
2649 while (e-- > c)
2650 *(e + 1) = *e;
25716404 2651 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2652 /* mark the range as done, and continue */
2653 dorange = FALSE;
2654 didrange = TRUE;
2655 continue;
2656 }
2b9d42f0 2657
95a20fc0 2658 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2659#ifdef EBCDIC
2660 SvGROW(sv,
2661 SvLEN(sv) + (has_utf8 ?
2662 (512 - UTF_CONTINUATION_MARK +
2663 UNISKIP(0x100))
2664 : 256));
2665 /* How many two-byte within 0..255: 128 in UTF-8,
2666 * 96 in UTF-8-mod. */
2667#else
9cbb5ea2 2668 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2669#endif
9cbb5ea2 2670 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2671#ifdef EBCDIC
2672 if (has_utf8) {
2673 int j;
2674 for (j = 0; j <= 1; j++) {
2675 char * const c = (char*)utf8_hop((U8*)d, -1);
2676 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2677 if (j)
2678 min = (U8)uv;
2679 else if (uv < 256)
2680 max = (U8)uv;
2681 else {
2682 max = (U8)0xff; /* only to \xff */
2683 uvmax = uv; /* \x{100} to uvmax */
2684 }
2685 d = c; /* eat endpoint chars */
2686 }
2687 }
2688 else {
2689#endif
2690 d -= 2; /* eat the first char and the - */
2691 min = (U8)*d; /* first char in range */
2692 max = (U8)d[1]; /* last char in range */
2693#ifdef EBCDIC
2694 }
2695#endif
8ada0baa 2696
c2e66d9e 2697 if (min > max) {
01ec43d0 2698 Perl_croak(aTHX_
d1573ac7 2699 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2700 (char)min, (char)max);
c2e66d9e
GS
2701 }
2702
c7f1f016 2703#ifdef EBCDIC
4c3a8340
TS
2704 if (literal_endpoint == 2 &&
2705 ((isLOWER(min) && isLOWER(max)) ||
2706 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2707 if (isLOWER(min)) {
2708 for (i = min; i <= max; i++)
2709 if (isLOWER(i))
db42d148 2710 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2711 } else {
2712 for (i = min; i <= max; i++)
2713 if (isUPPER(i))
db42d148 2714 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2715 }
2716 }
2717 else
2718#endif
2719 for (i = min; i <= max; i++)
e294cc5d
JH
2720#ifdef EBCDIC
2721 if (has_utf8) {
2722 const U8 ch = (U8)NATIVE_TO_UTF(i);
2723 if (UNI_IS_INVARIANT(ch))
2724 *d++ = (U8)i;
2725 else {
2726 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2727 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2728 }
2729 }
2730 else
2731#endif
2732 *d++ = (char)i;
2733
2734#ifdef EBCDIC
2735 if (uvmax) {
2736 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2737 if (uvmax > 0x101)
2738 *d++ = (char)UTF_TO_NATIVE(0xff);
2739 if (uvmax > 0x100)
2740 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2741 }
2742#endif
02aa26ce
NT
2743
2744 /* mark the range as done, and continue */
79072805 2745 dorange = FALSE;
01ec43d0 2746 didrange = TRUE;
4c3a8340
TS
2747#ifdef EBCDIC
2748 literal_endpoint = 0;
2749#endif
79072805 2750 continue;
4e553d73 2751 }
02aa26ce
NT
2752
2753 /* range begins (ignore - as first or last char) */
79072805 2754 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2755 if (didrange) {
1fafa243 2756 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2757 }
e294cc5d
JH
2758 if (has_utf8
2759#ifdef EBCDIC
2760 && !native_range
2761#endif
2762 ) {
25716404 2763 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2764 s++;
2765 continue;
2766 }
79072805
LW
2767 dorange = TRUE;
2768 s++;
01ec43d0
GS
2769 }
2770 else {
2771 didrange = FALSE;
4c3a8340
TS
2772#ifdef EBCDIC
2773 literal_endpoint = 0;
e294cc5d 2774 native_range = TRUE;
4c3a8340 2775#endif
01ec43d0 2776 }
79072805 2777 }
02aa26ce
NT
2778
2779 /* if we get here, we're not doing a transliteration */
2780
0f5d15d6
IZ
2781 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2782 except for the last char, which will be done separately. */
3280af22 2783 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2784 if (s[2] == '#') {
e994fd66 2785 while (s+1 < send && *s != ')')
db42d148 2786 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2787 }
2788 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2789 || (s[2] == '?' && s[3] == '{'))
155aba94 2790 {
cc6b7395 2791 I32 count = 1;
0f5d15d6 2792 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2793 char c;
2794
d9f97599
GS
2795 while (count && (c = *regparse)) {
2796 if (c == '\\' && regparse[1])
2797 regparse++;
4e553d73 2798 else if (c == '{')
cc6b7395 2799 count++;
4e553d73 2800 else if (c == '}')
cc6b7395 2801 count--;
d9f97599 2802 regparse++;
cc6b7395 2803 }
e994fd66 2804 if (*regparse != ')')
5bdf89e7 2805 regparse--; /* Leave one char for continuation. */
0f5d15d6 2806 while (s < regparse)
db42d148 2807 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2808 }
748a9306 2809 }
02aa26ce
NT
2810
2811 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2812 else if (*s == '#' && PL_lex_inpat &&
2813 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2814 while (s+1 < send && *s != '\n')
db42d148 2815 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2816 }
02aa26ce 2817
5d1d4326 2818 /* check for embedded arrays
da6eedaa 2819 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2820 */
1749ea0d
TS
2821 else if (*s == '@' && s[1]) {
2822 if (isALNUM_lazy_if(s+1,UTF))
2823 break;
2824 if (strchr(":'{$", s[1]))
2825 break;
2826 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2827 break; /* in regexp, neither @+ nor @- are interpolated */
2828 }
02aa26ce
NT
2829
2830 /* check for embedded scalars. only stop if we're sure it's a
2831 variable.
2832 */
79072805 2833 else if (*s == '$') {
3280af22 2834 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2835 break;
77772344 2836 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2837 if (s[1] == '\\') {
2838 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2839 "Possible unintended interpolation of $\\ in regex");
77772344 2840 }
79072805 2841 break; /* in regexp, $ might be tail anchor */
77772344 2842 }
79072805 2843 }
02aa26ce 2844
2b9d42f0
NIS
2845 /* End of else if chain - OP_TRANS rejoin rest */
2846
02aa26ce 2847 /* backslashes */
79072805 2848 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2849 char* e; /* Can be used for ending '}', etc. */
2850
79072805 2851 s++;
02aa26ce 2852
7d0fc23c
KW
2853 /* warn on \1 - \9 in substitution replacements, but note that \11
2854 * is an octal; and \19 is \1 followed by '9' */
3280af22 2855 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2856 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2857 {
a2a5de95 2858 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2859 *--s = '$';
2860 break;
2861 }
02aa26ce
NT
2862
2863 /* string-change backslash escapes */
3280af22 2864 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2865 --s;
2866 break;
2867 }
ff3f963a
KW
2868 /* In a pattern, process \N, but skip any other backslash escapes.
2869 * This is because we don't want to translate an escape sequence
2870 * into a meta symbol and have the regex compiler use the meta
2871 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2872 * in spite of this, we do have to process \N here while the proper
2873 * charnames handler is in scope. See bugs #56444 and #62056.
2874 * There is a complication because \N in a pattern may also stand
2875 * for 'match a non-nl', and not mean a charname, in which case its
2876 * processing should be deferred to the regex compiler. To be a
2877 * charname it must be followed immediately by a '{', and not look
2878 * like \N followed by a curly quantifier, i.e., not something like
2879 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2880 * quantifier */
2881 else if (PL_lex_inpat
2882 && (*s != 'N'
2883 || s[1] != '{'
2884 || regcurly(s + 1)))
2885 {
cc74c5bd
TS
2886 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2887 goto default_action;
2888 }
02aa26ce 2889
79072805 2890 switch (*s) {
02aa26ce
NT
2891
2892 /* quoted - in transliterations */
79072805 2893 case '-':
3280af22 2894 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2895 *d++ = *s++;
2896 continue;
2897 }
2898 /* FALL THROUGH */
2899 default:
11b8faa4 2900 {
a2a5de95
NC
2901 if ((isALPHA(*s) || isDIGIT(*s)))
2902 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2903 "Unrecognized escape \\%c passed through",
2904 *s);
11b8faa4 2905 /* default action is to copy the quoted character */
f9a63242 2906 goto default_action;
11b8faa4 2907 }
02aa26ce 2908
632403cc 2909 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2910 case '0': case '1': case '2': case '3':
2911 case '4': case '5': case '6': case '7':
ba210ebe 2912 {
53305cf1
NC
2913 I32 flags = 0;
2914 STRLEN len = 3;
77a135fe 2915 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2916 s += len;
2917 }
012bcf8d 2918 goto NUM_ESCAPE_INSERT;
02aa26ce 2919
f0a2b745
KW
2920 /* eg. \o{24} indicates the octal constant \024 */
2921 case 'o':
2922 {
2923 STRLEN len;
454155d9 2924 const char* error;
f0a2b745 2925
454155d9 2926 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2927 s += len;
454155d9 2928 if (! valid) {
f0a2b745
KW
2929 yyerror(error);
2930 continue;
2931 }
2932 goto NUM_ESCAPE_INSERT;
2933 }
2934
77a135fe 2935 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2936 case 'x':
a0ed51b3
LW
2937 ++s;
2938 if (*s == '{') {
9d4ba2ae 2939 char* const e = strchr(s, '}');
a4c04bdc
NC
2940 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2941 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2942 STRLEN len;
355860ce 2943
53305cf1 2944 ++s;
adaeee49 2945 if (!e) {
a0ed51b3 2946 yyerror("Missing right brace on \\x{}");
355860ce 2947 continue;
ba210ebe 2948 }
53305cf1 2949 len = e - s;
77a135fe 2950 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2951 s = e + 1;
a0ed51b3
LW
2952 }
2953 else {
ba210ebe 2954 {
53305cf1 2955 STRLEN len = 2;
a4c04bdc 2956 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2957 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2958 s += len;
2959 }
012bcf8d
GS
2960 }
2961
2962 NUM_ESCAPE_INSERT:
ff3f963a
KW
2963 /* Insert oct or hex escaped character. There will always be
2964 * enough room in sv since such escapes will be longer than any
2965 * UTF-8 sequence they can end up as, except if they force us
2966 * to recode the rest of the string into utf8 */
ba7cea30 2967
77a135fe 2968 /* Here uv is the ordinal of the next character being added in
ff3f963a 2969 * unicode (converted from native). */
77a135fe 2970 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2971 if (!has_utf8 && uv > 255) {
77a135fe
KW
2972 /* Might need to recode whatever we have accumulated so
2973 * far if it contains any chars variant in utf8 or
2974 * utf-ebcdic. */
2975
2976 SvCUR_set(sv, d - SvPVX_const(sv));
2977 SvPOK_on(sv);
2978 *d = '\0';
77a135fe 2979 /* See Note on sizing above. */
7bf79863
KW
2980 sv_utf8_upgrade_flags_grow(sv,
2981 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2982 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2983 d = SvPVX(sv) + SvCUR(sv);
2984 has_utf8 = TRUE;
012bcf8d
GS
2985 }
2986
77a135fe
KW
2987 if (has_utf8) {
2988 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2989 if (PL_lex_inwhat == OP_TRANS &&
2990 PL_sublex_info.sub_op) {
2991 PL_sublex_info.sub_op->op_private |=
2992 (PL_lex_repl ? OPpTRANS_FROM_UTF
2993 : OPpTRANS_TO_UTF);
f9a63242 2994 }
e294cc5d
JH
2995#ifdef EBCDIC
2996 if (uv > 255 && !dorange)
2997 native_range = FALSE;
2998#endif
012bcf8d 2999 }
a0ed51b3 3000 else {
012bcf8d 3001 *d++ = (char)uv;
a0ed51b3 3002 }
012bcf8d
GS
3003 }
3004 else {
c4d5f83a 3005 *d++ = (char) uv;
a0ed51b3 3006 }
79072805 3007 continue;
02aa26ce 3008
4a2d328f 3009 case 'N':
ff3f963a
KW
3010 /* In a non-pattern \N must be a named character, like \N{LATIN
3011 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
3012 * mean to match a non-newline. For non-patterns, named
3013 * characters are converted to their string equivalents. In
3014 * patterns, named characters are not converted to their
3015 * ultimate forms for the same reasons that other escapes
3016 * aren't. Instead, they are converted to the \N{U+...} form
3017 * to get the value from the charnames that is in effect right
3018 * now, while preserving the fact that it was a named character
3019 * so that the regex compiler knows this */
3020
3021 /* This section of code doesn't generally use the
3022 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
3023 * a close examination of this macro and determined it is a
3024 * no-op except on utfebcdic variant characters. Every
3025 * character generated by this that would normally need to be
3026 * enclosed by this macro is invariant, so the macro is not
3027 * needed, and would complicate use of copy(). There are other
3028 * parts of this file where the macro is used inconsistently,
3029 * but are saved by it being a no-op */
3030
3031 /* The structure of this section of code (besides checking for
3032 * errors and upgrading to utf8) is:
3033 * Further disambiguate between the two meanings of \N, and if
3034 * not a charname, go process it elsewhere
0a96133f
KW
3035 * If of form \N{U+...}, pass it through if a pattern;
3036 * otherwise convert to utf8
3037 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3038 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3039
3040 /* Here, s points to the 'N'; the test below is guaranteed to
3041 * succeed if we are being called on a pattern as we already
3042 * know from a test above that the next character is a '{'.
3043 * On a non-pattern \N must mean 'named sequence, which
3044 * requires braces */
3045 s++;
3046 if (*s != '{') {
3047 yyerror("Missing braces on \\N{}");
3048 continue;
3049 }
3050 s++;
3051
0a96133f 3052 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3053 if (! (e = strchr(s, '}'))) {
3054 if (! PL_lex_inpat) {
5777a3f7 3055 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3056 } else {
3057 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3058 }
0a96133f 3059 continue;
ff3f963a 3060 }
cddc7ef4 3061
ff3f963a 3062 /* Here it looks like a named character */
cddc7ef4 3063
ff3f963a
KW
3064 if (PL_lex_inpat) {
3065
3066 /* XXX This block is temporary code. \N{} implies that the
3067 * pattern is to have Unicode semantics, and therefore
3068 * currently has to be encoded in utf8. By putting it in
3069 * utf8 now, we save a whole pass in the regular expression
3070 * compiler. Once that code is changed so Unicode
3071 * semantics doesn't necessarily have to be in utf8, this
3072 * block should be removed */
3073 if (!has_utf8) {
77a135fe 3074 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3075 SvPOK_on(sv);
e4f3eed8 3076 *d = '\0';
77a135fe 3077 /* See Note on sizing above. */
7bf79863 3078 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3079 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3080 /* 5 = '\N{' + cur char + NUL */
3081 (STRLEN)(send - s) + 5);
f08d6ad9 3082 d = SvPVX(sv) + SvCUR(sv);
89491803 3083 has_utf8 = TRUE;
ff3f963a
KW
3084 }
3085 }
423cee85 3086
ff3f963a
KW
3087 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3088 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3089 | PERL_SCAN_DISALLOW_PREFIX;
3090 STRLEN len;
3091
3092 /* For \N{U+...}, the '...' is a unicode value even on
3093 * EBCDIC machines */
3094 s += 2; /* Skip to next char after the 'U+' */
3095 len = e - s;
3096 uv = grok_hex(s, &len, &flags, NULL);
3097 if (len == 0 || len != (STRLEN)(e - s)) {
3098 yyerror("Invalid hexadecimal number in \\N{U+...}");
3099 s = e + 1;
3100 continue;
3101 }
3102
3103 if (PL_lex_inpat) {
3104
3105 /* Pass through to the regex compiler unchanged. The
3106 * reason we evaluated the number above is to make sure
0a96133f 3107 * there wasn't a syntax error. */
ff3f963a
KW
3108 s -= 5; /* Include the '\N{U+' */
3109 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3110 d += e - s + 1;
3111 }
3112 else { /* Not a pattern: convert the hex to string */
3113
3114 /* If destination is not in utf8, unconditionally
3115 * recode it to be so. This is because \N{} implies
3116 * Unicode semantics, and scalars have to be in utf8
3117 * to guarantee those semantics */
3118 if (! has_utf8) {
3119 SvCUR_set(sv, d - SvPVX_const(sv));
3120 SvPOK_on(sv);
3121 *d = '\0';
3122 /* See Note on sizing above. */
3123 sv_utf8_upgrade_flags_grow(
3124 sv,
3125 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3126 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3127 d = SvPVX(sv) + SvCUR(sv);
3128 has_utf8 = TRUE;
3129 }
3130
3131 /* Add the string to the output */
3132 if (UNI_IS_INVARIANT(uv)) {
3133 *d++ = (char) uv;
3134 }
3135 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3136 }
3137 }
3138 else { /* Here is \N{NAME} but not \N{U+...}. */
3139
3140 SV *res; /* result from charnames */
3141 const char *str; /* the string in 'res' */
3142 STRLEN len; /* its length */
3143
3144 /* Get the value for NAME */
3145 res = newSVpvn(s, e - s);
3146 res = new_constant( NULL, 0, "charnames",
3147 /* includes all of: \N{...} */
3148 res, NULL, s - 3, e - s + 4 );
3149
3150 /* Most likely res will be in utf8 already since the
3151 * standard charnames uses pack U, but a custom translator
3152 * can leave it otherwise, so make sure. XXX This can be
3153 * revisited to not have charnames use utf8 for characters
3154 * that don't need it when regexes don't have to be in utf8
3155 * for Unicode semantics. If doing so, remember EBCDIC */
3156 sv_utf8_upgrade(res);
3157 str = SvPV_const(res, len);
3158
3159 /* Don't accept malformed input */
3160 if (! is_utf8_string((U8 *) str, len)) {
3161 yyerror("Malformed UTF-8 returned by \\N");
3162 }
3163 else if (PL_lex_inpat) {
3164
3165 if (! len) { /* The name resolved to an empty string */
3166 Copy("\\N{}", d, 4, char);
3167 d += 4;
3168 }
3169 else {
3170 /* In order to not lose information for the regex
3171 * compiler, pass the result in the specially made
3172 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3173 * the code points in hex of each character
3174 * returned by charnames */
3175
3176 const char *str_end = str + len;
3177 STRLEN char_length; /* cur char's byte length */
3178 STRLEN output_length; /* and the number of bytes
3179 after this is translated
3180 into hex digits */
3181 const STRLEN off = d - SvPVX_const(sv);
3182
3183 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3184 * max('U+', '.'); and 1 for NUL */
3185 char hex_string[2 * UTF8_MAXBYTES + 5];
3186
3187 /* Get the first character of the result. */
3188 U32 uv = utf8n_to_uvuni((U8 *) str,
3189 len,
3190 &char_length,
3191 UTF8_ALLOW_ANYUV);
3192
3193 /* The call to is_utf8_string() above hopefully
3194 * guarantees that there won't be an error. But
3195 * it's easy here to make sure. The function just
3196 * above warns and returns 0 if invalid utf8, but
3197 * it can also return 0 if the input is validly a
3198 * NUL. Disambiguate */
3199 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3200 uv = UNICODE_REPLACEMENT;
3201 }
3202
3203 /* Convert first code point to hex, including the
3204 * boiler plate before it */
78c35590 3205 output_length =
3353de27
NC
3206 my_snprintf(hex_string, sizeof(hex_string),
3207 "\\N{U+%X", (unsigned int) uv);
ff3f963a
KW
3208
3209 /* Make sure there is enough space to hold it */
3210 d = off + SvGROW(sv, off
3211 + output_length
3212 + (STRLEN)(send - e)
3213 + 2); /* '}' + NUL */
3214 /* And output it */
3215 Copy(hex_string, d, output_length, char);
3216 d += output_length;
3217
3218 /* For each subsequent character, append dot and
3219 * its ordinal in hex */
3220 while ((str += char_length) < str_end) {
3221 const STRLEN off = d - SvPVX_const(sv);
3222 U32 uv = utf8n_to_uvuni((U8 *) str,
3223 str_end - str,
3224 &char_length,
3225 UTF8_ALLOW_ANYUV);
3226 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3227 uv = UNICODE_REPLACEMENT;
3228 }
3229
78c35590 3230 output_length =
3353de27
NC
3231 my_snprintf(hex_string, sizeof(hex_string),
3232 ".%X", (unsigned int) uv);
ff3f963a
KW
3233
3234 d = off + SvGROW(sv, off
3235 + output_length
3236 + (STRLEN)(send - e)
3237 + 2); /* '}' + NUL */
3238 Copy(hex_string, d, output_length, char);
3239 d += output_length;
3240 }
3241
3242 *d++ = '}'; /* Done. Add the trailing brace */
3243 }
3244 }
3245 else { /* Here, not in a pattern. Convert the name to a
3246 * string. */
3247
3248 /* If destination is not in utf8, unconditionally
3249 * recode it to be so. This is because \N{} implies
3250 * Unicode semantics, and scalars have to be in utf8
3251 * to guarantee those semantics */
3252 if (! has_utf8) {
3253 SvCUR_set(sv, d - SvPVX_const(sv));
3254 SvPOK_on(sv);
3255 *d = '\0';
3256 /* See Note on sizing above. */
3257 sv_utf8_upgrade_flags_grow(sv,
3258 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3259 len + (STRLEN)(send - s) + 1);
3260 d = SvPVX(sv) + SvCUR(sv);
3261 has_utf8 = TRUE;
3262 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3263
3264 /* See Note on sizing above. (NOTE: SvCUR() is not
3265 * set correctly here). */
3266 const STRLEN off = d - SvPVX_const(sv);
3267 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3268 }
3269 Copy(str, d, len, char);
3270 d += len;
423cee85 3271 }
423cee85 3272 SvREFCNT_dec(res);
cb233ae3
KW
3273
3274 /* Deprecate non-approved name syntax */
3275 if (ckWARN_d(WARN_DEPRECATED)) {
3276 bool problematic = FALSE;
3277 char* i = s;
3278
3279 /* For non-ut8 input, look to see that the first
3280 * character is an alpha, then loop through the rest
3281 * checking that each is a continuation */
3282 if (! this_utf8) {
3283 if (! isALPHAU(*i)) problematic = TRUE;
3284 else for (i = s + 1; i < e; i++) {
3285 if (isCHARNAME_CONT(*i)) continue;
3286 problematic = TRUE;
3287 break;
3288 }
3289 }
3290 else {
3291 /* Similarly for utf8. For invariants can check
3292 * directly. We accept anything above the latin1
3293 * range because it is immaterial to Perl if it is
3294 * correct or not, and is expensive to check. But
3295 * it is fairly easy in the latin1 range to convert
3296 * the variants into a single character and check
3297 * those */
3298 if (UTF8_IS_INVARIANT(*i)) {
3299 if (! isALPHAU(*i)) problematic = TRUE;
3300 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3301 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3302 *(i+1)))))
3303 {
3304 problematic = TRUE;
3305 }
3306 }
3307 if (! problematic) for (i = s + UTF8SKIP(s);
3308 i < e;
3309 i+= UTF8SKIP(i))
3310 {
3311 if (UTF8_IS_INVARIANT(*i)) {
3312 if (isCHARNAME_CONT(*i)) continue;
3313 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3314 continue;
3315 } else if (isCHARNAME_CONT(
3316 UNI_TO_NATIVE(
3317 UTF8_ACCUMULATE(*i, *(i+1)))))
3318 {
3319 continue;
3320 }
3321 problematic = TRUE;
3322 break;
3323 }
3324 }
3325 if (problematic) {
6e1bad6c
KW
3326 /* The e-i passed to the final %.*s makes sure that
3327 * should the trailing NUL be missing that this
3328 * print won't run off the end of the string */
cb233ae3 3329 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3330 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3331 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3332 }
3333 }
3334 } /* End \N{NAME} */
ff3f963a
KW
3335#ifdef EBCDIC
3336 if (!dorange)
3337 native_range = FALSE; /* \N{} is defined to be Unicode */
3338#endif
3339 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3340 continue;
3341
02aa26ce 3342 /* \c is a control character */
79072805
LW
3343 case 'c':
3344 s++;
961ce445 3345 if (s < send) {
f9d13529 3346 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3347 }
961ce445
RGS
3348 else {
3349 yyerror("Missing control char name in \\c");
3350 }
79072805 3351 continue;
02aa26ce
NT
3352
3353 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3354 case 'b':
db42d148 3355 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3356 break;
3357 case 'n':
db42d148 3358 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3359 break;
3360 case 'r':
db42d148 3361 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3362 break;
3363 case 'f':
db42d148 3364 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3365 break;
3366 case 't':
db42d148 3367 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3368 break;
34a3fe2a 3369 case 'e':
db42d148 3370 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3371 break;
3372 case 'a':
db42d148 3373 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3374 break;
02aa26ce
NT
3375 } /* end switch */
3376
79072805
LW
3377 s++;
3378 continue;
02aa26ce 3379 } /* end if (backslash) */
4c3a8340
TS
3380#ifdef EBCDIC
3381 else
3382 literal_endpoint++;
3383#endif
02aa26ce 3384
f9a63242 3385 default_action:
77a135fe
KW
3386 /* If we started with encoded form, or already know we want it,
3387 then encode the next character */
3388 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3389 STRLEN len = 1;
77a135fe
KW
3390
3391
3392 /* One might think that it is wasted effort in the case of the
3393 * source being utf8 (this_utf8 == TRUE) to take the next character
3394 * in the source, convert it to an unsigned value, and then convert
3395 * it back again. But the source has not been validated here. The
3396 * routine that does the conversion checks for errors like
3397 * malformed utf8 */
3398
5f66b61c
AL
3399 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3400 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3401 if (!has_utf8) {
3402 SvCUR_set(sv, d - SvPVX_const(sv));
3403 SvPOK_on(sv);
3404 *d = '\0';
77a135fe 3405 /* See Note on sizing above. */
7bf79863
KW
3406 sv_utf8_upgrade_flags_grow(sv,
3407 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3408 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3409 d = SvPVX(sv) + SvCUR(sv);
3410 has_utf8 = TRUE;
3411 } else if (need > len) {
3412 /* encoded value larger than old, may need extra space (NOTE:
3413 * SvCUR() is not set correctly here). See Note on sizing
3414 * above. */
9d4ba2ae 3415 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3416 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3417 }
77a135fe
KW
3418 s += len;
3419
5f66b61c 3420 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3421#ifdef EBCDIC
3422 if (uv > 255 && !dorange)
3423 native_range = FALSE;
3424#endif
2b9d42f0
NIS
3425 }
3426 else {
3427 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3428 }
02aa26ce
NT
3429 } /* while loop to process each character */
3430
3431 /* terminate the string and set up the sv */
79072805 3432 *d = '\0';
95a20fc0 3433 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3434 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3435 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3436
79072805 3437 SvPOK_on(sv);
9f4817db 3438 if (PL_encoding && !has_utf8) {
d0063567
DK
3439 sv_recode_to_utf8(sv, PL_encoding);
3440 if (SvUTF8(sv))
3441 has_utf8 = TRUE;
9f4817db 3442 }
2b9d42f0 3443 if (has_utf8) {
7e2040f0 3444 SvUTF8_on(sv);
2b9d42f0 3445 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3446 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3447 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3448 }
3449 }
79072805 3450
02aa26ce 3451 /* shrink the sv if we allocated more than we used */
79072805 3452 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3453 SvPV_shrink_to_cur(sv);
79072805 3454 }
02aa26ce 3455
6154021b 3456 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3457 if (s > PL_bufptr) {
eb0d8d16
NC
3458 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3459 const char *const key = PL_lex_inpat ? "qr" : "q";
3460 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3461 const char *type;
3462 STRLEN typelen;
3463
3464 if (PL_lex_inwhat == OP_TRANS) {
3465 type = "tr";
3466 typelen = 2;
3467 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3468 type = "s";
3469 typelen = 1;
3470 } else {
3471 type = "qq";
3472 typelen = 2;
3473 }
3474
3475 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3476 type, typelen);
3477 }
6154021b 3478 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3479 } else
8990e307 3480 SvREFCNT_dec(sv);
79072805
LW
3481 return s;
3482}
3483
ffb4593c
NT
3484/* S_intuit_more
3485 * Returns TRUE if there's more to the expression (e.g., a subscript),
3486 * FALSE otherwise.
ffb4593c
NT
3487 *
3488 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3489 *
3490 * ->[ and ->{ return TRUE
3491 * { and [ outside a pattern are always subscripts, so return TRUE
3492 * if we're outside a pattern and it's not { or [, then return FALSE
3493 * if we're in a pattern and the first char is a {
3494 * {4,5} (any digits around the comma) returns FALSE
3495 * if we're in a pattern and the first char is a [
3496 * [] returns FALSE
3497 * [SOMETHING] has a funky algorithm to decide whether it's a
3498 * character class or not. It has to deal with things like
3499 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3500 * anything else returns TRUE
3501 */
3502
9cbb5ea2
GS
3503/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3504
76e3520e 3505STATIC int
cea2e8a9 3506S_intuit_more(pTHX_ register char *s)
79072805 3507{
97aff369 3508 dVAR;
7918f24d
NC
3509
3510 PERL_ARGS_ASSERT_INTUIT_MORE;
3511
3280af22 3512 if (PL_lex_brackets)
79072805
LW
3513 return TRUE;
3514 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3515 return TRUE;
3516 if (*s != '{' && *s != '[')
3517 return FALSE;
3280af22 3518 if (!PL_lex_inpat)
79072805
LW
3519 return TRUE;
3520
3521 /* In a pattern, so maybe we have {n,m}. */
3522 if (*s == '{') {
b3155d95 3523 if (regcurly(s)) {
79072805 3524 return FALSE;
b3155d95 3525 }
79072805 3526 return TRUE;
79072805
LW
3527 }
3528
3529 /* On the other hand, maybe we have a character class */
3530
3531 s++;
3532 if (*s == ']' || *s == '^')
3533 return FALSE;
3534 else {
ffb4593c 3535 /* this is terrifying, and it works */
79072805
LW
3536 int weight = 2; /* let's weigh the evidence */
3537 char seen[256];
f27ffc4a 3538 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3539 const char * const send = strchr(s,']');
3280af22 3540 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3541
3542 if (!send) /* has to be an expression */
3543 return TRUE;
3544
3545 Zero(seen,256,char);
3546 if (*s == '$')
3547 weight -= 3;
3548 else if (isDIGIT(*s)) {
3549 if (s[1] != ']') {
3550 if (isDIGIT(s[1]) && s[2] == ']')
3551 weight -= 10;
3552 }
3553 else
3554 weight -= 100;
3555 }
3556 for (; s < send; s++) {
3557 last_un_char = un_char;
3558 un_char = (unsigned char)*s;
3559 switch (*s) {
3560 case '@':
3561 case '&':
3562 case '$':
3563 weight -= seen[un_char] * 10;
7e2040f0 3564 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3565 int len;
8903cb82 3566 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3567 len = (int)strlen(tmpbuf);
3568 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3569 weight -= 100;
3570 else
3571 weight -= 10;
3572 }
3573 else if (*s == '$' && s[1] &&
93a17b20
LW
3574 strchr("[#!%*<>()-=",s[1])) {
3575 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3576 weight -= 10;
3577 else
3578 weight -= 1;
3579 }
3580 break;
3581 case '\\':
3582 un_char = 254;
3583 if (s[1]) {
93a17b20 3584 if (strchr("wds]",s[1]))
79072805 3585 weight += 100;
10edeb5d 3586 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3587 weight += 1;
93a17b20 3588 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3589 weight += 40;
3590 else if (isDIGIT(s[1])) {
3591 weight += 40;
3592 while (s[1] && isDIGIT(s[1]))
3593 s++;
3594 }
3595 }
3596 else
3597 weight += 100;
3598 break;
3599 case '-':
3600 if (s[1] == '\\')
3601 weight += 50;
93a17b20 3602 if (strchr("aA01! ",last_un_char))
79072805 3603 weight += 30;
93a17b20 3604 if (strchr("zZ79~",s[1]))
79072805 3605 weight += 30;
f27ffc4a
GS
3606 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3607 weight -= 5; /* cope with negative subscript */
79072805
LW
3608 break;
3609 default:
3792a11b
NC
3610 if (!isALNUM(last_un_char)
3611 && !(last_un_char == '$' || last_un_char == '@'
3612 || last_un_char == '&')
3613 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3614 char *d = tmpbuf;
3615 while (isALPHA(*s))
3616 *d++ = *s++;
3617 *d = '\0';
5458a98a 3618 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3619 weight -= 150;
3620 }
3621 if (un_char == last_un_char + 1)
3622 weight += 5;
3623 weight -= seen[un_char];
3624 break;
3625 }
3626 seen[un_char]++;
3627 }
3628 if (weight >= 0) /* probably a character class */
3629 return FALSE;
3630 }
3631
3632 return TRUE;
3633}
ffed7fef 3634
ffb4593c
NT
3635/*
3636 * S_intuit_method
3637 *
3638 * Does all the checking to disambiguate
3639 * foo bar
3640 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3641 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3642 *
3643 * First argument is the stuff after the first token, e.g. "bar".
3644 *
3645 * Not a method if bar is a filehandle.
3646 * Not a method if foo is a subroutine prototyped to take a filehandle.
3647 * Not a method if it's really "Foo $bar"
3648 * Method if it's "foo $bar"
3649 * Not a method if it's really "print foo $bar"
3650 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3651 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3652 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3653 * =>
3654 */
3655
76e3520e 3656STATIC int
62d55b22 3657S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3658{
97aff369 3659 dVAR;
a0d0e21e 3660 char *s = start + (*start == '$');
3280af22 3661 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3662 STRLEN len;
3663 GV* indirgv;
5db06880
NC
3664#ifdef PERL_MAD
3665 int soff;
3666#endif
a0d0e21e 3667
7918f24d
NC
3668 PERL_ARGS_ASSERT_INTUIT_METHOD;
3669
a0d0e21e 3670 if (gv) {
62d55b22 3671 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3672 return 0;
62d55b22
NC
3673 if (cv) {
3674 if (SvPOK(cv)) {
3675 const char *proto = SvPVX_const(cv);
3676 if (proto) {
3677 if (*proto == ';')
3678 proto++;
3679 if (*proto == '*')
3680 return 0;
3681 }
b6c543e3
IZ
3682 }
3683 } else
c35e046a 3684 gv = NULL;
a0d0e21e 3685 }
8903cb82 3686 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3687 /* start is the beginning of the possible filehandle/object,
3688 * and s is the end of it
3689 * tmpbuf is a copy of it
3690 */
3691
a0d0e21e 3692 if (*start == '$') {
3ef1310e
RGS
3693 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3694 isUPPER(*PL_tokenbuf))
a0d0e21e 3695 return 0;
5db06880
NC
3696#ifdef PERL_MAD
3697 len = start - SvPVX(PL_linestr);
3698#endif
29595ff2 3699 s = PEEKSPACE(s);
f0092767 3700#ifdef PERL_MAD
5db06880
NC
3701 start = SvPVX(PL_linestr) + len;
3702#endif
3280af22
NIS
3703 PL_bufptr = start;
3704 PL_expect = XREF;
a0d0e21e
LW
3705 return *s == '(' ? FUNCMETH : METHOD;
3706 }
5458a98a 3707 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3708 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3709 len -= 2;
3710 tmpbuf[len] = '\0';
5db06880
NC
3711#ifdef PERL_MAD
3712 soff = s - SvPVX(PL_linestr);
3713#endif
c3e0f903
GS
3714 goto bare_package;
3715 }
90e5519e 3716 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3717 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3718 return 0;
3719 /* filehandle or package name makes it a method */
da51bb9b 3720 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3721#ifdef PERL_MAD
3722 soff = s - SvPVX(PL_linestr);
3723#endif
29595ff2 3724 s = PEEKSPACE(s);
3280af22 3725 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3726 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3727 bare_package:
cd81e915 3728 start_force(PL_curforce);
9ded7720 3729 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3730 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3731 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3732 if (PL_madskills)
3733 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3734 PL_expect = XTERM;
a0d0e21e 3735 force_next(WORD);
3280af22 3736 PL_bufptr = s;
5db06880
NC
3737#ifdef PERL_MAD
3738 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3739#endif
a0d0e21e
LW
3740 return *s == '(' ? FUNCMETH : METHOD;
3741 }
3742 }
3743 return 0;
3744}
3745
16d20bd9 3746/* Encoded script support. filter_add() effectively inserts a
4e553d73 3747 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3748 * Note that the filter function only applies to the current source file
3749 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3750 *
3751 * The datasv parameter (which may be NULL) can be used to pass
3752 * private data to this instance of the filter. The filter function
3753 * can recover the SV using the FILTER_DATA macro and use it to
3754 * store private buffers and state information.
3755 *
3756 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3757 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3758 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3759 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3760 * private use must be set using malloc'd pointers.
3761 */
16d20bd9
AD
3762
3763SV *
864dbfa3 3764Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3765{
97aff369 3766 dVAR;
f4c556ac 3767 if (!funcp)
a0714e2c 3768 return NULL;
f4c556ac 3769
5486870f
DM
3770 if (!PL_parser)
3771 return NULL;
3772
3280af22
NIS
3773 if (!PL_rsfp_filters)
3774 PL_rsfp_filters = newAV();
16d20bd9 3775 if (!datasv)
561b68a9 3776 datasv = newSV(0);
862a34c6 3777 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3778 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3779 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3780 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3781 FPTR2DPTR(void *, IoANY(datasv)),
3782 SvPV_nolen(datasv)));
3280af22
NIS
3783 av_unshift(PL_rsfp_filters, 1);
3784 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3785 return(datasv);
3786}
4e553d73 3787
16d20bd9
AD
3788
3789/* Delete most recently added instance of this filter function. */
a0d0e21e 3790void
864dbfa3 3791Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3792{
97aff369 3793 dVAR;
e0c19803 3794 SV *datasv;
24801a4b 3795
7918f24d
NC
3796 PERL_ARGS_ASSERT_FILTER_DEL;
3797
33073adb 3798#ifdef DEBUGGING
55662e27
JH
3799 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3800 FPTR2DPTR(void*, funcp)));
33073adb 3801#endif
5486870f 3802 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3803 return;
3804 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3805 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3806 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3807 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3808
16d20bd9
AD
3809 return;
3810 }
3811 /* we need to search for the correct entry and clear it */
cea2e8a9 3812 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3813}
3814
3815
1de9afcd
RGS
3816/* Invoke the idxth filter function for the current rsfp. */
3817/* maxlen 0 = read one text line */
16d20bd9 3818I32
864dbfa3 3819Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3820{
97aff369 3821 dVAR;
16d20bd9
AD
3822 filter_t funcp;
3823 SV *datasv = NULL;
f482118e
NC
3824 /* This API is bad. It should have been using unsigned int for maxlen.
3825 Not sure if we want to change the API, but if not we should sanity
3826 check the value here. */
39cd7a59
NC
3827 const unsigned int correct_length
3828 = maxlen < 0 ?
3829#ifdef PERL_MICRO
3830 0x7FFFFFFF
3831#else
3832 INT_MAX
3833#endif
3834 : maxlen;
e50aee73 3835
7918f24d
NC
3836 PERL_ARGS_ASSERT_FILTER_READ;
3837
5486870f 3838 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3839 return -1;
1de9afcd 3840 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3841 /* Provide a default input filter to make life easy. */
3842 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3843 DEBUG_P(PerlIO_printf(Perl_debug_log,
3844 "filter_read %d: from rsfp\n", idx));
f482118e 3845 if (correct_length) {
16d20bd9
AD
3846 /* Want a block */
3847 int len ;
f54cb97a 3848 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3849
3850 /* ensure buf_sv is large enough */
881d8f0a 3851 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3852 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3853 correct_length)) <= 0) {
3280af22 3854 if (PerlIO_error(PL_rsfp))
37120919
AD
3855 return -1; /* error */
3856 else
3857 return 0 ; /* end of file */
3858 }
16d20bd9 3859 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3860 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3861 } else {
3862 /* Want a line */
3280af22
NIS
3863 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3864 if (PerlIO_error(PL_rsfp))
37120919
AD
3865 return -1; /* error */
3866 else
3867 return 0 ; /* end of file */
3868 }
16d20bd9
AD
3869 }
3870 return SvCUR(buf_sv);
3871 }
3872 /* Skip this filter slot if filter has been deleted */
1de9afcd 3873 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3874 DEBUG_P(PerlIO_printf(Perl_debug_log,
3875 "filter_read %d: skipped (filter deleted)\n",
3876 idx));
f482118e 3877 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3878 }
3879 /* Get function pointer hidden within datasv */
8141890a 3880 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3881 DEBUG_P(PerlIO_printf(Perl_debug_log,
3882 "filter_read %d: via function %p (%s)\n",
ca0270c4 3883 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3884 /* Call function. The function is expected to */
3885 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3886 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3887 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3888}
3889
76e3520e 3890STATIC char *
5cc814fd 3891S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3892{
97aff369 3893 dVAR;
7918f24d
NC
3894
3895 PERL_ARGS_ASSERT_FILTER_GETS;
3896
c39cd008 3897#ifdef PERL_CR_FILTER
3280af22 3898 if (!PL_rsfp_filters) {
c39cd008 3899 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3900 }
3901#endif
3280af22 3902 if (PL_rsfp_filters) {
55497cff 3903 if (!append)
3904 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3905 if (FILTER_READ(0, sv, 0) > 0)
3906 return ( SvPVX(sv) ) ;
3907 else
bd61b366 3908 return NULL ;
16d20bd9 3909 }
9d116dd7 3910 else
5cc814fd 3911 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3912}
3913
01ec43d0 3914STATIC HV *
9bde8eb0 3915S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3916{
97aff369 3917 dVAR;
def3634b
GS
3918 GV *gv;
3919
7918f24d
NC
3920 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3921
01ec43d0 3922 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3923 return PL_curstash;
3924
3925 if (len > 2 &&
3926 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3927 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3928 {
3929 return GvHV(gv); /* Foo:: */
def3634b
GS
3930 }
3931
3932 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3933 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3934 if (gv && GvCV(gv)) {
3935 SV * const sv = cv_const_sv(GvCV(gv));
3936 if (sv)
9bde8eb0 3937 pkgname = SvPV_const(sv, len);
def3634b
GS
3938 }
3939
9bde8eb0 3940 return gv_stashpvn(pkgname, len, 0);
def3634b 3941}
a0d0e21e 3942
e3f73d4e
RGS
3943/*
3944 * S_readpipe_override
3945 * Check whether readpipe() is overriden, and generates the appropriate
3946 * optree, provided sublex_start() is called afterwards.
3947 */
3948STATIC void
1d51329b 3949S_readpipe_override(pTHX)
e3f73d4e
RGS
3950{
3951 GV **gvp;
3952 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3953 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3954 if ((gv_readpipe
3955 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3956 ||
3957 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3958 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3959 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3960 {
3961 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 3962 op_append_elem(OP_LIST,
e3f73d4e
RGS
3963 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3964 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3965 }
e3f73d4e
RGS
3966}
3967
5db06880
NC
3968#ifdef PERL_MAD
3969 /*
3970 * Perl_madlex
3971 * The intent of this yylex wrapper is to minimize the changes to the
3972 * tokener when we aren't interested in collecting madprops. It remains
3973 * to be seen how successful this strategy will be...
3974 */
3975
3976int
3977Perl_madlex(pTHX)
3978{
3979 int optype;
3980 char *s = PL_bufptr;
3981
cd81e915
NC
3982 /* make sure PL_thiswhite is initialized */
3983 PL_thiswhite = 0;
3984 PL_thismad = 0;
5db06880 3985
cd81e915 3986 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 3987 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
3988 return S_pending_ident(aTHX);
3989
3990 /* previous token ate up our whitespace? */
cd81e915
NC
3991 if (!PL_lasttoke && PL_nextwhite) {
3992 PL_thiswhite = PL_nextwhite;
3993 PL_nextwhite = 0;
5db06880
NC
3994 }
3995
3996 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3997 PL_realtokenstart = -1;
3998 PL_thistoken = 0;
5db06880
NC
3999 optype = yylex();
4000 s = PL_bufptr;
cd81e915 4001 assert(PL_curforce < 0);
5db06880 4002
cd81e915
NC
4003 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
4004 if (!PL_thistoken) {
4005 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 4006 PL_thistoken = newSVpvs("");
5db06880 4007 else {
c35e046a 4008 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 4009 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
4010 }
4011 }
cd81e915
NC
4012 if (PL_thismad) /* install head */
4013 CURMAD('X', PL_thistoken);
5db06880
NC
4014 }
4015
4016 /* last whitespace of a sublex? */
cd81e915
NC
4017 if (optype == ')' && PL_endwhite) {
4018 CURMAD('X', PL_endwhite);
5db06880
NC
4019 }
4020
cd81e915 4021 if (!PL_thismad) {
5db06880
NC
4022
4023 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
4024 if (!PL_thiswhite && !PL_endwhite && !optype) {
4025 sv_free(PL_thistoken);
4026 PL_thistoken = 0;
5db06880
NC
4027 return 0;
4028 }
4029
4030 /* put off final whitespace till peg */
4031 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4032 PL_nextwhite = PL_thiswhite;
4033 PL_thiswhite = 0;
5db06880 4034 }
cd81e915
NC
4035 else if (PL_thisopen) {
4036 CURMAD('q', PL_thisopen);
4037 if (PL_thistoken)
4038 sv_free(PL_thistoken);
4039 PL_thistoken = 0;
5db06880
NC
4040 }
4041 else {
4042 /* Store actual token text as madprop X */
cd81e915 4043 CURMAD('X', PL_thistoken);
5db06880
NC
4044 }
4045
cd81e915 4046 if (PL_thiswhite) {
5db06880 4047 /* add preceding whitespace as madprop _ */
cd81e915 4048 CURMAD('_', PL_thiswhite);
5db06880
NC
4049 }
4050
cd81e915 4051 if (PL_thisstuff) {
5db06880 4052 /* add quoted material as madprop = */
cd81e915 4053 CURMAD('=', PL_thisstuff);
5db06880
NC
4054 }
4055
cd81e915 4056 if (PL_thisclose) {
5db06880 4057 /* add terminating quote as madprop Q */
cd81e915 4058 CURMAD('Q', PL_thisclose);
5db06880
NC
4059 }
4060 }
4061
4062 /* special processing based on optype */
4063
4064 switch (optype) {
4065
4066 /* opval doesn't need a TOKEN since it can already store mp */
4067 case WORD:
4068 case METHOD:
4069 case FUNCMETH:
4070 case THING:
4071 case PMFUNC:
4072 case PRIVATEREF:
4073 case FUNC0SUB:
4074 case UNIOPSUB:
4075 case LSTOPSUB:
6154021b
RGS
4076 if (pl_yylval.opval)
4077 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4078 PL_thismad = 0;
5db06880
NC
4079 return optype;
4080
4081 /* fake EOF */
4082 case 0:
4083 optype = PEG;
cd81e915
NC
4084 if (PL_endwhite) {
4085 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4086 PL_endwhite = 0;
5db06880
NC
4087 }
4088 break;
4089
4090 case ']':
4091 case '}':
cd81e915 4092 if (PL_faketokens)
5db06880
NC
4093 break;
4094 /* remember any fake bracket that lexer is about to discard */
4095 if (PL_lex_brackets == 1 &&
4096 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4097 {
4098 s = PL_bufptr;
4099 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4100 s++;
4101 if (*s == '}') {
cd81e915
NC
4102 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4103 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4104 PL_thiswhite = 0;
5db06880
NC
4105 PL_bufptr = s - 1;
4106 break; /* don't bother looking for trailing comment */
4107 }
4108 else
4109 s = PL_bufptr;
4110 }
4111 if (optype == ']')
4112 break;
4113 /* FALLTHROUGH */
4114
4115 /* attach a trailing comment to its statement instead of next token */
4116 case ';':
cd81e915 4117 if (PL_faketokens)
5db06880
NC
4118 break;
4119 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4120 s = PL_bufptr;
4121 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4122 s++;
4123 if (*s == '\n' || *s == '#') {
4124 while (s < PL_bufend && *s != '\n')
4125 s++;
4126 if (s < PL_bufend)
4127 s++;
cd81e915
NC
4128 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4129 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4130 PL_thiswhite = 0;
5db06880
NC
4131 PL_bufptr = s;
4132 }
4133 }
4134 break;
4135
4136 /* pval */
4137 case LABEL:
4138 break;
4139
4140 /* ival */
4141 default:
4142 break;
4143
4144 }
4145
4146 /* Create new token struct. Note: opvals return early above. */
6154021b 4147 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4148 PL_thismad = 0;
5db06880
NC
4149 return optype;
4150}
4151#endif
4152
468aa647 4153STATIC char *
cc6ed77d 4154S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4155 dVAR;
7918f24d
NC
4156
4157 PERL_ARGS_ASSERT_TOKENIZE_USE;
4158
468aa647
RGS
4159 if (PL_expect != XSTATE)
4160 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4161 is_use ? "use" : "no"));
29595ff2 4162 s = SKIPSPACE1(s);
468aa647
RGS
4163 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4164 s = force_version(s, TRUE);
17c59fdf
VP
4165 if (*s == ';' || *s == '}'
4166 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4167 start_force(PL_curforce);
9ded7720 4168 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4169 force_next(WORD);
4170 }
4171 else if (*s == 'v') {
4172 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4173 s = force_version(s, FALSE);
4174 }
4175 }
4176 else {
4177 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4178 s = force_version(s, FALSE);
4179 }
6154021b 4180 pl_yylval.ival = is_use;
468aa647
RGS
4181 return s;
4182}
748a9306 4183#ifdef DEBUGGING
27da23d5 4184 static const char* const exp_name[] =
09bef843 4185 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4186 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4187 };
748a9306 4188#endif
463ee0b2 4189
361d9b55
Z
4190#define word_takes_any_delimeter(p,l) S_word_takes_any_delimeter(p,l)
4191STATIC bool
4192S_word_takes_any_delimeter(char *p, STRLEN len)
4193{
4194 return (len == 1 && strchr("msyq", p[0])) ||
4195 (len == 2 && (
4196 (p[0] == 't' && p[1] == 'r') ||
4197 (p[0] == 'q' && strchr("qwxr", p[1]))));
4198}
4199
02aa26ce
NT
4200/*
4201 yylex
4202
4203 Works out what to call the token just pulled out of the input
4204 stream. The yacc parser takes care of taking the ops we return and
4205 stitching them into a tree.
4206
4207 Returns:
4208 PRIVATEREF
4209
4210 Structure:
4211 if read an identifier
4212 if we're in a my declaration
4213 croak if they tried to say my($foo::bar)
4214 build the ops for a my() declaration
4215 if it's an access to a my() variable
4216 are we in a sort block?
4217 croak if my($a); $a <=> $b
4218 build ops for access to a my() variable
4219 if in a dq string, and they've said @foo and we can't find @foo
4220 croak
4221 build ops for a bareword
4222 if we already built the token before, use it.
4223*/
4224
20141f0e 4225
dba4d153
JH
4226#ifdef __SC__
4227#pragma segment Perl_yylex
4228#endif
dba4d153 4229int
dba4d153 4230Perl_yylex(pTHX)
20141f0e 4231{
97aff369 4232 dVAR;
3afc138a 4233 register char *s = PL_bufptr;
378cc40b 4234 register char *d;
463ee0b2 4235 STRLEN len;
aa7440fb 4236 bool bof = FALSE;
580561a3 4237 U32 fake_eof = 0;
a687059c 4238
10edeb5d
JH
4239 /* orig_keyword, gvp, and gv are initialized here because
4240 * jump to the label just_a_word_zero can bypass their
4241 * initialization later. */
4242 I32 orig_keyword = 0;
4243 GV *gv = NULL;
4244 GV **gvp = NULL;
4245
bbf60fe6 4246 DEBUG_T( {
396482e1 4247 SV* tmp = newSVpvs("");
b6007c36
DM
4248 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4249 (IV)CopLINE(PL_curcop),
4250 lex_state_names[PL_lex_state],
4251 exp_name[PL_expect],
4252 pv_display(tmp, s, strlen(s), 0, 60));
4253 SvREFCNT_dec(tmp);
bbf60fe6 4254 } );
02aa26ce 4255 /* check if there's an identifier for us to look at */
28ac2b49 4256 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4257 return REPORT(S_pending_ident(aTHX));
bbce6d69 4258
02aa26ce
NT
4259 /* no identifier pending identification */
4260
3280af22 4261 switch (PL_lex_state) {
79072805
LW
4262#ifdef COMMENTARY
4263 case LEX_NORMAL: /* Some compilers will produce faster */
4264 case LEX_INTERPNORMAL: /* code if we comment these out. */
4265 break;
4266#endif
4267
09bef843 4268 /* when we've already built the next token, just pull it out of the queue */
79072805 4269 case LEX_KNOWNEXT:
5db06880
NC
4270#ifdef PERL_MAD
4271 PL_lasttoke--;
6154021b 4272 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4273 if (PL_madskills) {
cd81e915 4274 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4275 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4276 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4277 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4278 PL_thismad->mad_val = 0;
4279 mad_free(PL_thismad);
4280 PL_thismad = 0;
5db06880
NC
4281 }
4282 }
4283 if (!PL_lasttoke) {
4284 PL_lex_state = PL_lex_defer;
4285 PL_expect = PL_lex_expect;
4286 PL_lex_defer = LEX_NORMAL;
4287 if (!PL_nexttoke[PL_lasttoke].next_type)
4288 return yylex();
4289 }
4290#else
3280af22 4291 PL_nexttoke--;
6154021b 4292 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4293 if (!PL_nexttoke) {
4294 PL_lex_state = PL_lex_defer;
4295 PL_expect = PL_lex_expect;
4296 PL_lex_defer = LEX_NORMAL;
463ee0b2 4297 }
5db06880 4298#endif
a7aaec61
Z
4299 {
4300 I32 next_type;
5db06880 4301#ifdef PERL_MAD
a7aaec61 4302 next_type = PL_nexttoke[PL_lasttoke].next_type;
5db06880 4303#else
a7aaec61 4304 next_type = PL_nexttype[PL_nexttoke];
5db06880 4305#endif
a7aaec61
Z
4306 if (next_type & (1<<24)) {
4307 if (PL_lex_brackets > 100)
4308 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
4309 PL_lex_brackstack[PL_lex_brackets++] = (next_type >> 16) & 0xff;
4310 next_type &= 0xffff;
4311 }
4312#ifdef PERL_MAD
4313 /* FIXME - can these be merged? */
4314 return next_type;
4315#else
4316 return REPORT(next_type);
4317#endif
4318 }
79072805 4319
02aa26ce 4320 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4321 when we get here, PL_bufptr is at the \
02aa26ce 4322 */
79072805
LW
4323 case LEX_INTERPCASEMOD:
4324#ifdef DEBUGGING
3280af22 4325 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4326 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4327#endif
02aa26ce 4328 /* handle \E or end of string */
3280af22 4329 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4330 /* if at a \E */
3280af22 4331 if (PL_lex_casemods) {
f54cb97a 4332 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4333 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4334
3792a11b
NC
4335 if (PL_bufptr != PL_bufend
4336 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4337 PL_bufptr += 2;
4338 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4339#ifdef PERL_MAD
4340 if (PL_madskills)
6b29d1f5 4341 PL_thistoken = newSVpvs("\\E");
5db06880 4342#endif
a0d0e21e 4343 }
bbf60fe6 4344 return REPORT(')');
79072805 4345 }
5db06880
NC
4346#ifdef PERL_MAD
4347 while (PL_bufptr != PL_bufend &&
4348 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4349 if (!PL_thiswhite)
6b29d1f5 4350 PL_thiswhite = newSVpvs("");
cd81e915 4351 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4352 PL_bufptr += 2;
4353 }
4354#else
3280af22
NIS
4355 if (PL_bufptr != PL_bufend)
4356 PL_bufptr += 2;
5db06880 4357#endif
3280af22 4358 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4359 return yylex();
79072805
LW
4360 }
4361 else {
607df283 4362 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4363 "### Saw case modifier\n"); });
3280af22 4364 s = PL_bufptr + 1;
6e909404 4365 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4366#ifdef PERL_MAD
cd81e915 4367 if (!PL_thiswhite)
6b29d1f5 4368 PL_thiswhite = newSVpvs("");
cd81e915 4369 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4370#endif
89122651 4371 PL_bufptr = s + 3;
6e909404
JH
4372 PL_lex_state = LEX_INTERPCONCAT;
4373 return yylex();
a0d0e21e 4374 }
6e909404 4375 else {
90771dc0 4376 I32 tmp;
5db06880
NC
4377 if (!PL_madskills) /* when just compiling don't need correct */
4378 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4379 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4380 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4381 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4382 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4383 return REPORT(')');
6e909404
JH
4384 }
4385 if (PL_lex_casemods > 10)
4386 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4387 PL_lex_casestack[PL_lex_casemods++] = *s;
4388 PL_lex_casestack[PL_lex_casemods] = '\0';
4389 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4390 start_force(PL_curforce);
9ded7720 4391 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4392 force_next('(');
cd81e915 4393 start_force(PL_curforce);
6e909404 4394 if (*s == 'l')
9ded7720 4395 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4396 else if (*s == 'u')
9ded7720 4397 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4398 else if (*s == 'L')
9ded7720 4399 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4400 else if (*s == 'U')
9ded7720 4401 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4402 else if (*s == 'Q')
9ded7720 4403 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4404 else
4405 Perl_croak(aTHX_ "panic: yylex");
5db06880 4406 if (PL_madskills) {
a5849ce5
NC
4407 SV* const tmpsv = newSVpvs("\\ ");
4408 /* replace the space with the character we want to escape
4409 */
4410 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4411 curmad('_', tmpsv);
4412 }
6e909404 4413 PL_bufptr = s + 1;
a0d0e21e 4414 }
79072805 4415 force_next(FUNC);
3280af22
NIS
4416 if (PL_lex_starts) {
4417 s = PL_bufptr;
4418 PL_lex_starts = 0;
5db06880
NC
4419#ifdef PERL_MAD
4420 if (PL_madskills) {
cd81e915
NC
4421 if (PL_thistoken)
4422 sv_free(PL_thistoken);
6b29d1f5 4423 PL_thistoken = newSVpvs("");
5db06880
NC
4424 }
4425#endif
131b3ad0
DM
4426 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4427 if (PL_lex_casemods == 1 && PL_lex_inpat)
4428 OPERATOR(',');
4429 else
4430 Aop(OP_CONCAT);
79072805
LW
4431 }
4432 else
cea2e8a9 4433 return yylex();
79072805
LW
4434 }
4435
55497cff 4436 case LEX_INTERPPUSH:
bbf60fe6 4437 return REPORT(sublex_push());
55497cff 4438
79072805 4439 case LEX_INTERPSTART:
3280af22 4440 if (PL_bufptr == PL_bufend)
bbf60fe6 4441 return REPORT(sublex_done());
607df283 4442 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4443 "### Interpolated variable\n"); });
3280af22
NIS
4444 PL_expect = XTERM;
4445 PL_lex_dojoin = (*PL_bufptr == '@');
4446 PL_lex_state = LEX_INTERPNORMAL;
4447 if (PL_lex_dojoin) {
cd81e915 4448 start_force(PL_curforce);
9ded7720 4449 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4450 force_next(',');
cd81e915 4451 start_force(PL_curforce);
a0d0e21e 4452 force_ident("\"", '$');
cd81e915 4453 start_force(PL_curforce);
9ded7720 4454 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4455 force_next('$');
cd81e915 4456 start_force(PL_curforce);
9ded7720 4457 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4458 force_next('(');
cd81e915 4459 start_force(PL_curforce);
9ded7720 4460 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4461 force_next(FUNC);
4462 }
3280af22
NIS
4463 if (PL_lex_starts++) {
4464 s = PL_bufptr;
5db06880
NC
4465#ifdef PERL_MAD
4466 if (PL_madskills) {
cd81e915
NC
4467 if (PL_thistoken)
4468 sv_free(PL_thistoken);
6b29d1f5 4469 PL_thistoken = newSVpvs("");
5db06880
NC
4470 }
4471#endif
131b3ad0
DM
4472 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4473 if (!PL_lex_casemods && PL_lex_inpat)
4474 OPERATOR(',');
4475 else
4476 Aop(OP_CONCAT);
79072805 4477 }
cea2e8a9 4478 return yylex();
79072805
LW
4479
4480 case LEX_INTERPENDMAYBE:
3280af22
NIS
4481 if (intuit_more(PL_bufptr)) {
4482 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4483 break;
4484 }
4485 /* FALL THROUGH */
4486
4487 case LEX_INTERPEND:
3280af22
NIS
4488 if (PL_lex_dojoin) {
4489 PL_lex_dojoin = FALSE;
4490 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4491#ifdef PERL_MAD
4492 if (PL_madskills) {
cd81e915
NC
4493 if (PL_thistoken)
4494 sv_free(PL_thistoken);
6b29d1f5 4495 PL_thistoken = newSVpvs("");
5db06880
NC
4496 }
4497#endif
bbf60fe6 4498 return REPORT(')');
79072805 4499 }
43a16006 4500 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4501 && SvEVALED(PL_lex_repl))
43a16006 4502 {
e9fa98b2 4503 if (PL_bufptr != PL_bufend)
cea2e8a9 4504 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4505 PL_lex_repl = NULL;
e9fa98b2 4506 }
79072805
LW
4507 /* FALLTHROUGH */
4508 case LEX_INTERPCONCAT:
4509#ifdef DEBUGGING
3280af22 4510 if (PL_lex_brackets)
cea2e8a9 4511 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4512#endif
3280af22 4513 if (PL_bufptr == PL_bufend)
bbf60fe6 4514 return REPORT(sublex_done());
79072805 4515
3280af22
NIS
4516 if (SvIVX(PL_linestr) == '\'') {
4517 SV *sv = newSVsv(PL_linestr);
4518 if (!PL_lex_inpat)
76e3520e 4519 sv = tokeq(sv);
3280af22 4520 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4521 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4522 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4523 s = PL_bufend;
79072805
LW
4524 }
4525 else {
3280af22 4526 s = scan_const(PL_bufptr);
79072805 4527 if (*s == '\\')
3280af22 4528 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4529 else
3280af22 4530 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4531 }
4532
3280af22 4533 if (s != PL_bufptr) {
cd81e915 4534 start_force(PL_curforce);
5db06880
NC
4535 if (PL_madskills) {
4536 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4537 }
6154021b 4538 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4539 PL_expect = XTERM;
79072805 4540 force_next(THING);
131b3ad0 4541 if (PL_lex_starts++) {
5db06880
NC
4542#ifdef PERL_MAD
4543 if (PL_madskills) {
cd81e915
NC
4544 if (PL_thistoken)
4545 sv_free(PL_thistoken);
6b29d1f5 4546 PL_thistoken = newSVpvs("");
5db06880
NC
4547 }
4548#endif
131b3ad0
DM
4549 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4550 if (!PL_lex_casemods && PL_lex_inpat)
4551 OPERATOR(',');
4552 else
4553 Aop(OP_CONCAT);
4554 }
79072805 4555 else {
3280af22 4556 PL_bufptr = s;
cea2e8a9 4557 return yylex();
79072805
LW
4558 }
4559 }
4560
cea2e8a9 4561 return yylex();
a0d0e21e 4562 case LEX_FORMLINE:
3280af22
NIS
4563 PL_lex_state = LEX_NORMAL;
4564 s = scan_formline(PL_bufptr);
4565 if (!PL_lex_formbrack)
a0d0e21e
LW
4566 goto rightbracket;
4567 OPERATOR(';');
79072805
LW
4568 }
4569
3280af22
NIS
4570 s = PL_bufptr;
4571 PL_oldoldbufptr = PL_oldbufptr;
4572 PL_oldbufptr = s;
463ee0b2
LW
4573
4574 retry:
5db06880 4575#ifdef PERL_MAD
cd81e915
NC
4576 if (PL_thistoken) {
4577 sv_free(PL_thistoken);
4578 PL_thistoken = 0;
5db06880 4579 }
cd81e915 4580 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4581#endif
378cc40b
LW
4582 switch (*s) {
4583 default:
7e2040f0 4584 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4585 goto keylookup;
b1fc3636
CJ
4586 {
4587 unsigned char c = *s;
4588 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4589 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4590 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4591 } else {
4592 d = PL_linestart;
4593 }
4594 *s = '\0';
4595 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4596 }
e929a76b
LW
4597 case 4:
4598 case 26:
4599 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4600 case 0:
5db06880
NC
4601#ifdef PERL_MAD
4602 if (PL_madskills)
cd81e915 4603 PL_faketokens = 0;
5db06880 4604#endif
3280af22
NIS
4605 if (!PL_rsfp) {
4606 PL_last_uni = 0;
4607 PL_last_lop = 0;
a7aaec61
Z
4608 if (PL_lex_brackets &&
4609 PL_lex_brackstack[PL_lex_brackets-1] != XFAKEEOF) {
10edeb5d
JH
4610 yyerror((const char *)
4611 (PL_lex_formbrack
4612 ? "Format not terminated"
4613 : "Missing right curly or square bracket"));
c5ee2135 4614 }
4e553d73 4615 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4616 "### Tokener got EOF\n");
5f80b19c 4617 } );
79072805 4618 TOKEN(0);
463ee0b2 4619 }
3280af22 4620 if (s++ < PL_bufend)
a687059c 4621 goto retry; /* ignore stray nulls */
3280af22
NIS
4622 PL_last_uni = 0;
4623 PL_last_lop = 0;
4624 if (!PL_in_eval && !PL_preambled) {
4625 PL_preambled = TRUE;
5db06880
NC
4626#ifdef PERL_MAD
4627 if (PL_madskills)
cd81e915 4628 PL_faketokens = 1;
5db06880 4629#endif
5ab7ff98
NC
4630 if (PL_perldb) {
4631 /* Generate a string of Perl code to load the debugger.
4632 * If PERL5DB is set, it will return the contents of that,
4633 * otherwise a compile-time require of perl5db.pl. */
4634
4635 const char * const pdb = PerlEnv_getenv("PERL5DB");
4636
4637 if (pdb) {
4638 sv_setpv(PL_linestr, pdb);
4639 sv_catpvs(PL_linestr,";");
4640 } else {
4641 SETERRNO(0,SS_NORMAL);
4642 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4643 }
4644 } else
4645 sv_setpvs(PL_linestr,"");
c62eb204
NC
4646 if (PL_preambleav) {
4647 SV **svp = AvARRAY(PL_preambleav);
4648 SV **const end = svp + AvFILLp(PL_preambleav);
4649 while(svp <= end) {
4650 sv_catsv(PL_linestr, *svp);
4651 ++svp;
396482e1 4652 sv_catpvs(PL_linestr, ";");
91b7def8 4653 }
daba3364 4654 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4655 PL_preambleav = NULL;
91b7def8 4656 }
9f639728
FR
4657 if (PL_minus_E)
4658 sv_catpvs(PL_linestr,
4659 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4660 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4661 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4662 if (PL_minus_l)
396482e1 4663 sv_catpvs(PL_linestr,"chomp;");
3280af22 4664 if (PL_minus_a) {
3280af22 4665 if (PL_minus_F) {
3792a11b
NC
4666 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4667 || *PL_splitstr == '"')
3280af22 4668 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4669 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4670 else {
c8ef6a4b
NC
4671 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4672 bytes can be used as quoting characters. :-) */
dd374669 4673 const char *splits = PL_splitstr;
91d456ae 4674 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4675 do {
4676 /* Need to \ \s */
dd374669
AL
4677 if (*splits == '\\')
4678 sv_catpvn(PL_linestr, splits, 1);
4679 sv_catpvn(PL_linestr, splits, 1);
4680 } while (*splits++);
48c4c863
NC
4681 /* This loop will embed the trailing NUL of
4682 PL_linestr as the last thing it does before
4683 terminating. */
396482e1 4684 sv_catpvs(PL_linestr, ");");
54310121 4685 }
2304df62
AD
4686 }
4687 else
396482e1 4688 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4689 }
79072805 4690 }
396482e1 4691 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4692 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4693 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4694 PL_last_lop = PL_last_uni = NULL;
65269a95 4695 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4696 update_debugger_info(PL_linestr, NULL, 0);
79072805 4697 goto retry;
a687059c 4698 }
e929a76b 4699 do {
580561a3
Z
4700 fake_eof = 0;
4701 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4702 if (0) {
7e28d3af 4703 fake_eof:
f0e67a1d
Z
4704 fake_eof = LEX_FAKE_EOF;
4705 }
4706 PL_bufptr = PL_bufend;
17cc9359 4707 CopLINE_inc(PL_curcop);
f0e67a1d 4708 if (!lex_next_chunk(fake_eof)) {
17cc9359 4709 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4710 s = PL_bufptr;
4711 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4712 }
17cc9359 4713 CopLINE_dec(PL_curcop);
5db06880 4714#ifdef PERL_MAD
f0e67a1d 4715 if (!PL_rsfp)
cd81e915 4716 PL_realtokenstart = -1;
5db06880 4717#endif
f0e67a1d 4718 s = PL_bufptr;
7aa207d6
JH
4719 /* If it looks like the start of a BOM or raw UTF-16,
4720 * check if it in fact is. */
580561a3 4721 if (bof && PL_rsfp &&
7aa207d6
JH
4722 (*s == 0 ||
4723 *(U8*)s == 0xEF ||
4724 *(U8*)s >= 0xFE ||
4725 s[1] == 0)) {
eb160463 4726 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4727 if (bof) {
3280af22 4728 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4729 s = swallow_bom((U8*)s);
e929a76b 4730 }
378cc40b 4731 }
737c24fc 4732 if (PL_parser->in_pod) {
a0d0e21e 4733 /* Incest with pod. */
5db06880
NC
4734#ifdef PERL_MAD
4735 if (PL_madskills)
cd81e915 4736 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4737#endif
01a57ef7 4738 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4739 sv_setpvs(PL_linestr, "");
3280af22
NIS
4740 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4741 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4742 PL_last_lop = PL_last_uni = NULL;
737c24fc 4743 PL_parser->in_pod = 0;
a0d0e21e 4744 }
4e553d73 4745 }
85613cab
Z
4746 if (PL_rsfp)
4747 incline(s);
737c24fc 4748 } while (PL_parser->in_pod);
3280af22 4749 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4750 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4751 PL_last_lop = PL_last_uni = NULL;
57843af0 4752 if (CopLINE(PL_curcop) == 1) {
3280af22 4753 while (s < PL_bufend && isSPACE(*s))
79072805 4754 s++;
a0d0e21e 4755 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4756 s++;
5db06880
NC
4757#ifdef PERL_MAD
4758 if (PL_madskills)
cd81e915 4759 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4760#endif
bd61b366 4761 d = NULL;
3280af22 4762 if (!PL_in_eval) {
44a8e56a 4763 if (*s == '#' && *(s+1) == '!')
4764 d = s + 2;
4765#ifdef ALTERNATE_SHEBANG
4766 else {
bfed75c6 4767 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4768 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4769 d = s + (sizeof(as) - 1);
4770 }
4771#endif /* ALTERNATE_SHEBANG */
4772 }
4773 if (d) {
b8378b72 4774 char *ipath;
774d564b 4775 char *ipathend;
b8378b72 4776
774d564b 4777 while (isSPACE(*d))
b8378b72
CS
4778 d++;
4779 ipath = d;
774d564b 4780 while (*d && !isSPACE(*d))
4781 d++;
4782 ipathend = d;
4783
4784#ifdef ARG_ZERO_IS_SCRIPT
4785 if (ipathend > ipath) {
4786 /*
4787 * HP-UX (at least) sets argv[0] to the script name,
4788 * which makes $^X incorrect. And Digital UNIX and Linux,
4789 * at least, set argv[0] to the basename of the Perl
4790 * interpreter. So, having found "#!", we'll set it right.
4791 */
fafc274c
NC
4792 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4793 SVt_PV)); /* $^X */
774d564b 4794 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4795 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4796 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4797 SvSETMAGIC(x);
4798 }
556c1dec
JH
4799 else {
4800 STRLEN blen;
4801 STRLEN llen;
cfd0369c 4802 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4803 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4804 if (llen < blen) {
4805 bstart += blen - llen;
4806 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4807 sv_setpvn(x, ipath, ipathend - ipath);
4808 SvSETMAGIC(x);
4809 }
4810 }
4811 }
774d564b 4812 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4813 }
774d564b 4814#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4815
4816 /*
4817 * Look for options.
4818 */
748a9306 4819 d = instr(s,"perl -");
84e30d1a 4820 if (!d) {
748a9306 4821 d = instr(s,"perl");
84e30d1a
GS
4822#if defined(DOSISH)
4823 /* avoid getting into infinite loops when shebang
4824 * line contains "Perl" rather than "perl" */
4825 if (!d) {
4826 for (d = ipathend-4; d >= ipath; --d) {
4827 if ((*d == 'p' || *d == 'P')
4828 && !ibcmp(d, "perl", 4))
4829 {
4830 break;
4831 }
4832 }
4833 if (d < ipath)
bd61b366 4834 d = NULL;
84e30d1a
GS
4835 }
4836#endif
4837 }
44a8e56a 4838#ifdef ALTERNATE_SHEBANG
4839 /*
4840 * If the ALTERNATE_SHEBANG on this system starts with a
4841 * character that can be part of a Perl expression, then if
4842 * we see it but not "perl", we're probably looking at the
4843 * start of Perl code, not a request to hand off to some
4844 * other interpreter. Similarly, if "perl" is there, but
4845 * not in the first 'word' of the line, we assume the line
4846 * contains the start of the Perl program.
44a8e56a 4847 */
4848 if (d && *s != '#') {
f54cb97a 4849 const char *c = ipath;
44a8e56a 4850 while (*c && !strchr("; \t\r\n\f\v#", *c))
4851 c++;
4852 if (c < d)
bd61b366 4853 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4854 else
4855 *s = '#'; /* Don't try to parse shebang line */
4856 }
774d564b 4857#endif /* ALTERNATE_SHEBANG */
748a9306 4858 if (!d &&
44a8e56a 4859 *s == '#' &&
774d564b 4860 ipathend > ipath &&
3280af22 4861 !PL_minus_c &&
748a9306 4862 !instr(s,"indir") &&
3280af22 4863 instr(PL_origargv[0],"perl"))
748a9306 4864 {
27da23d5 4865 dVAR;
9f68db38 4866 char **newargv;
9f68db38 4867
774d564b 4868 *ipathend = '\0';
4869 s = ipathend + 1;
3280af22 4870 while (s < PL_bufend && isSPACE(*s))
9f68db38 4871 s++;
3280af22 4872 if (s < PL_bufend) {
d85f917e 4873 Newx(newargv,PL_origargc+3,char*);
9f68db38 4874 newargv[1] = s;
3280af22 4875 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4876 s++;
4877 *s = '\0';
3280af22 4878 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4879 }
4880 else
3280af22 4881 newargv = PL_origargv;
774d564b 4882 newargv[0] = ipath;
b35112e7 4883 PERL_FPU_PRE_EXEC
b4748376 4884 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4885 PERL_FPU_POST_EXEC
cea2e8a9 4886 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4887 }
748a9306 4888 if (d) {
c35e046a
AL
4889 while (*d && !isSPACE(*d))
4890 d++;
4891 while (SPACE_OR_TAB(*d))
4892 d++;
748a9306
LW
4893
4894 if (*d++ == '-') {
f54cb97a 4895 const bool switches_done = PL_doswitches;
fb993905
GA
4896 const U32 oldpdb = PL_perldb;
4897 const bool oldn = PL_minus_n;
4898 const bool oldp = PL_minus_p;
c7030b81 4899 const char *d1 = d;
fb993905 4900
8cc95fdb 4901 do {
4ba71d51
FC
4902 bool baduni = FALSE;
4903 if (*d1 == 'C') {
bd0ab00d
NC
4904 const char *d2 = d1 + 1;
4905 if (parse_unicode_opts((const char **)&d2)
4906 != PL_unicode)
4907 baduni = TRUE;
4ba71d51
FC
4908 }
4909 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4910 const char * const m = d1;
4911 while (*d1 && !isSPACE(*d1))
4912 d1++;
cea2e8a9 4913 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4914 (int)(d1 - m), m);
8cc95fdb 4915 }
c7030b81
NC
4916 d1 = moreswitches(d1);
4917 } while (d1);
f0b2cf55
YST
4918 if (PL_doswitches && !switches_done) {
4919 int argc = PL_origargc;
4920 char **argv = PL_origargv;
4921 do {
4922 argc--,argv++;
4923 } while (argc && argv[0][0] == '-' && argv[0][1]);
4924 init_argv_symbols(argc,argv);
4925 }
65269a95 4926 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4927 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4928 /* if we have already added "LINE: while (<>) {",
4929 we must not do it again */
748a9306 4930 {
76f68e9b 4931 sv_setpvs(PL_linestr, "");
3280af22
NIS
4932 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4933 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4934 PL_last_lop = PL_last_uni = NULL;
3280af22 4935 PL_preambled = FALSE;
65269a95 4936 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4937 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4938 goto retry;
4939 }
a0d0e21e 4940 }
79072805 4941 }
9f68db38 4942 }
79072805 4943 }
3280af22
NIS
4944 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4945 PL_bufptr = s;
4946 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4947 return yylex();
ae986130 4948 }
378cc40b 4949 goto retry;
4fdae800 4950 case '\r':
6a27c188 4951#ifdef PERL_STRICT_CR
cea2e8a9 4952 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4953 Perl_croak(aTHX_
cc507455 4954 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4955#endif
4fdae800 4956 case ' ': case '\t': case '\f': case 013:
5db06880 4957#ifdef PERL_MAD
cd81e915 4958 PL_realtokenstart = -1;
ac372eb8
RD
4959 if (!PL_thiswhite)
4960 PL_thiswhite = newSVpvs("");
4961 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4962#endif
ac372eb8 4963 s++;
378cc40b 4964 goto retry;
378cc40b 4965 case '#':
e929a76b 4966 case '\n':
5db06880 4967#ifdef PERL_MAD
cd81e915 4968 PL_realtokenstart = -1;
5db06880 4969 if (PL_madskills)
cd81e915 4970 PL_faketokens = 0;
5db06880 4971#endif
3280af22 4972 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4973 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4974 /* handle eval qq[#line 1 "foo"\n ...] */
4975 CopLINE_dec(PL_curcop);
4976 incline(s);
4977 }
5db06880
NC
4978 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4979 s = SKIPSPACE0(s);
4980 if (!PL_in_eval || PL_rsfp)
4981 incline(s);
4982 }
4983 else {
4984 d = s;
4985 while (d < PL_bufend && *d != '\n')
4986 d++;
4987 if (d < PL_bufend)
4988 d++;
4989 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4990 Perl_croak(aTHX_ "panic: input overflow");
4991#ifdef PERL_MAD
4992 if (PL_madskills)
cd81e915 4993 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4994#endif
4995 s = d;
4996 incline(s);
4997 }
3280af22
NIS
4998 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4999 PL_bufptr = s;
5000 PL_lex_state = LEX_FORMLINE;
cea2e8a9 5001 return yylex();
a687059c 5002 }
378cc40b 5003 }
a687059c 5004 else {
5db06880
NC
5005#ifdef PERL_MAD
5006 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
5007 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 5008 PL_faketokens = 0;
5db06880
NC
5009 s = SKIPSPACE0(s);
5010 TOKEN(PEG); /* make sure any #! line is accessible */
5011 }
5012 s = SKIPSPACE0(s);
5013 }
5014 else {
5015/* if (PL_madskills && PL_lex_formbrack) { */
5016 d = s;
5017 while (d < PL_bufend && *d != '\n')
5018 d++;
5019 if (d < PL_bufend)
5020 d++;
5021 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
5022 Perl_croak(aTHX_ "panic: input overflow");
5023 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 5024 if (!PL_thiswhite)
6b29d1f5 5025 PL_thiswhite = newSVpvs("");
5db06880 5026 if (CopLINE(PL_curcop) == 1) {
76f68e9b 5027 sv_setpvs(PL_thiswhite, "");
cd81e915 5028 PL_faketokens = 0;
5db06880 5029 }
cd81e915 5030 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
5031 }
5032 s = d;
5033/* }
5034 *s = '\0';
5035 PL_bufend = s; */
5036 }
5037#else
378cc40b 5038 *s = '\0';
3280af22 5039 PL_bufend = s;
5db06880 5040#endif
a687059c 5041 }
378cc40b
LW
5042 goto retry;
5043 case '-':
79072805 5044 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 5045 I32 ftst = 0;
90771dc0 5046 char tmp;
e5edeb50 5047
378cc40b 5048 s++;
3280af22 5049 PL_bufptr = s;
748a9306
LW
5050 tmp = *s++;
5051
bf4acbe4 5052 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5053 s++;
5054
5055 if (strnEQ(s,"=>",2)) {
3280af22 5056 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5057 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5058 OPERATOR('-'); /* unary minus */
5059 }
3280af22 5060 PL_last_uni = PL_oldbufptr;
748a9306 5061 switch (tmp) {
e5edeb50
JH
5062 case 'r': ftst = OP_FTEREAD; break;
5063 case 'w': ftst = OP_FTEWRITE; break;
5064 case 'x': ftst = OP_FTEEXEC; break;
5065 case 'o': ftst = OP_FTEOWNED; break;
5066 case 'R': ftst = OP_FTRREAD; break;
5067 case 'W': ftst = OP_FTRWRITE; break;
5068 case 'X': ftst = OP_FTREXEC; break;
5069 case 'O': ftst = OP_FTROWNED; break;
5070 case 'e': ftst = OP_FTIS; break;
5071 case 'z': ftst = OP_FTZERO; break;
5072 case 's': ftst = OP_FTSIZE; break;
5073 case 'f': ftst = OP_FTFILE; break;
5074 case 'd': ftst = OP_FTDIR; break;
5075 case 'l': ftst = OP_FTLINK; break;
5076 case 'p': ftst = OP_FTPIPE; break;
5077 case 'S': ftst = OP_FTSOCK; break;
5078 case 'u': ftst = OP_FTSUID; break;
5079 case 'g': ftst = OP_FTSGID; break;
5080 case 'k': ftst = OP_FTSVTX; break;
5081 case 'b': ftst = OP_FTBLK; break;
5082 case 'c': ftst = OP_FTCHR; break;
5083 case 't': ftst = OP_FTTTY; break;
5084 case 'T': ftst = OP_FTTEXT; break;
5085 case 'B': ftst = OP_FTBINARY; break;
5086 case 'M': case 'A': case 'C':
fafc274c 5087 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5088 switch (tmp) {
5089 case 'M': ftst = OP_FTMTIME; break;
5090 case 'A': ftst = OP_FTATIME; break;
5091 case 'C': ftst = OP_FTCTIME; break;
5092 default: break;
5093 }
5094 break;
378cc40b 5095 default:
378cc40b
LW
5096 break;
5097 }
e5edeb50 5098 if (ftst) {
eb160463 5099 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5100 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5101 "### Saw file test %c\n", (int)tmp);
5f80b19c 5102 } );
e5edeb50
JH
5103 FTST(ftst);
5104 }
5105 else {
5106 /* Assume it was a minus followed by a one-letter named
5107 * subroutine call (or a -bareword), then. */
95c31fe3 5108 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5109 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5110 (int) tmp);
5f80b19c 5111 } );
3cf7b4c4 5112 s = --PL_bufptr;
e5edeb50 5113 }
378cc40b 5114 }
90771dc0
NC
5115 {
5116 const char tmp = *s++;
5117 if (*s == tmp) {
5118 s++;
5119 if (PL_expect == XOPERATOR)
5120 TERM(POSTDEC);
5121 else
5122 OPERATOR(PREDEC);
5123 }
5124 else if (*s == '>') {
5125 s++;
29595ff2 5126 s = SKIPSPACE1(s);
90771dc0
NC
5127 if (isIDFIRST_lazy_if(s,UTF)) {
5128 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5129 TOKEN(ARROW);
5130 }
5131 else if (*s == '$')
5132 OPERATOR(ARROW);
5133 else
5134 TERM(ARROW);
5135 }
3280af22 5136 if (PL_expect == XOPERATOR)
90771dc0
NC
5137 Aop(OP_SUBTRACT);
5138 else {
5139 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5140 check_uni();
5141 OPERATOR('-'); /* unary minus */
79072805 5142 }
2f3197b3 5143 }
79072805 5144
378cc40b 5145 case '+':
90771dc0
NC
5146 {
5147 const char tmp = *s++;
5148 if (*s == tmp) {
5149 s++;
5150 if (PL_expect == XOPERATOR)
5151 TERM(POSTINC);
5152 else
5153 OPERATOR(PREINC);
5154 }
3280af22 5155 if (PL_expect == XOPERATOR)
90771dc0
NC
5156 Aop(OP_ADD);
5157 else {
5158 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5159 check_uni();
5160 OPERATOR('+');
5161 }
2f3197b3 5162 }
a687059c 5163
378cc40b 5164 case '*':
3280af22
NIS
5165 if (PL_expect != XOPERATOR) {
5166 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5167 PL_expect = XOPERATOR;
5168 force_ident(PL_tokenbuf, '*');
5169 if (!*PL_tokenbuf)
a0d0e21e 5170 PREREF('*');
79072805 5171 TERM('*');
a687059c 5172 }
79072805
LW
5173 s++;
5174 if (*s == '*') {
a687059c 5175 s++;
79072805 5176 PWop(OP_POW);
a687059c 5177 }
79072805
LW
5178 Mop(OP_MULTIPLY);
5179
378cc40b 5180 case '%':
3280af22 5181 if (PL_expect == XOPERATOR) {
bbce6d69 5182 ++s;
5183 Mop(OP_MODULO);
a687059c 5184 }
3280af22 5185 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5186 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5187 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5188 if (!PL_tokenbuf[1]) {
bbce6d69 5189 PREREF('%');
a687059c 5190 }
3280af22 5191 PL_pending_ident = '%';
bbce6d69 5192 TERM('%');
a687059c 5193
378cc40b 5194 case '^':
79072805 5195 s++;
a0d0e21e 5196 BOop(OP_BIT_XOR);
79072805 5197 case '[':
a7aaec61
Z
5198 if (PL_lex_brackets > 100)
5199 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
5200 PL_lex_brackstack[PL_lex_brackets++] = 0;
df3467db
IG
5201 {
5202 const char tmp = *s++;
5203 OPERATOR(tmp);
5204 }
378cc40b 5205 case '~':
0d863452 5206 if (s[1] == '~'
3e7dd34d 5207 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5208 {
5209 s += 2;
5210 Eop(OP_SMARTMATCH);
5211 }
378cc40b 5212 case ',':
90771dc0
NC
5213 {
5214 const char tmp = *s++;
5215 OPERATOR(tmp);
5216 }
a0d0e21e
LW
5217 case ':':
5218 if (s[1] == ':') {
5219 len = 0;
0bfa2a8a 5220 goto just_a_word_zero_gv;
a0d0e21e
LW
5221 }
5222 s++;
09bef843
SB
5223 switch (PL_expect) {
5224 OP *attrs;
5db06880
NC
5225#ifdef PERL_MAD
5226 I32 stuffstart;
5227#endif
09bef843
SB
5228 case XOPERATOR:
5229 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5230 break;
5231 PL_bufptr = s; /* update in case we back off */
d83f38d8 5232 if (*s == '=') {
2dc78664
NC
5233 Perl_croak(aTHX_
5234 "Use of := for an empty attribute list is not allowed");
d83f38d8 5235 }
09bef843
SB
5236 goto grabattrs;
5237 case XATTRBLOCK:
5238 PL_expect = XBLOCK;
5239 goto grabattrs;
5240 case XATTRTERM:
5241 PL_expect = XTERMBLOCK;
5242 grabattrs:
5db06880
NC
5243#ifdef PERL_MAD
5244 stuffstart = s - SvPVX(PL_linestr) - 1;
5245#endif
29595ff2 5246 s = PEEKSPACE(s);
5f66b61c 5247 attrs = NULL;
7e2040f0 5248 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5249 I32 tmp;
5cc237b8 5250 SV *sv;
09bef843 5251 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5252 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5253 if (tmp < 0) tmp = -tmp;
5254 switch (tmp) {
5255 case KEY_or:
5256 case KEY_and:
5257 case KEY_for:
11baf631 5258 case KEY_foreach:
f9829d6b
GS
5259 case KEY_unless:
5260 case KEY_if:
5261 case KEY_while:
5262 case KEY_until:
5263 goto got_attrs;
5264 default:
5265 break;
5266 }
5267 }
5cc237b8 5268 sv = newSVpvn(s, len);
09bef843
SB
5269 if (*d == '(') {
5270 d = scan_str(d,TRUE,TRUE);
5271 if (!d) {
09bef843
SB
5272 /* MUST advance bufptr here to avoid bogus
5273 "at end of line" context messages from yyerror().
5274 */
5275 PL_bufptr = s + len;
5276 yyerror("Unterminated attribute parameter in attribute list");
5277 if (attrs)
5278 op_free(attrs);
5cc237b8 5279 sv_free(sv);
bbf60fe6 5280 return REPORT(0); /* EOF indicator */
09bef843
SB
5281 }
5282 }
5283 if (PL_lex_stuff) {
09bef843 5284 sv_catsv(sv, PL_lex_stuff);
2fcb4757 5285 attrs = op_append_elem(OP_LIST, attrs,
09bef843
SB
5286 newSVOP(OP_CONST, 0, sv));
5287 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5288 PL_lex_stuff = NULL;
09bef843
SB
5289 }
5290 else {
5cc237b8
BS
5291 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5292 sv_free(sv);
1108974d 5293 if (PL_in_my == KEY_our) {
df9a6019 5294 deprecate(":unique");
1108974d 5295 }
bfed75c6 5296 else
371fce9b
DM
5297 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5298 }
5299
d3cea301
SB
5300 /* NOTE: any CV attrs applied here need to be part of
5301 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5302 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5303 sv_free(sv);
78f9721b 5304 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5305 }
5306 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5307 sv_free(sv);
8e5dadda 5308 deprecate(":locked");
5cc237b8
BS
5309 }
5310 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5311 sv_free(sv);
78f9721b 5312 CvMETHOD_on(PL_compcv);
5cc237b8 5313 }
78f9721b
SM
5314 /* After we've set the flags, it could be argued that
5315 we don't need to do the attributes.pm-based setting
5316 process, and shouldn't bother appending recognized
d3cea301
SB
5317 flags. To experiment with that, uncomment the
5318 following "else". (Note that's already been
5319 uncommented. That keeps the above-applied built-in
5320 attributes from being intercepted (and possibly
5321 rejected) by a package's attribute routines, but is
5322 justified by the performance win for the common case
5323 of applying only built-in attributes.) */
0256094b 5324 else
2fcb4757 5325 attrs = op_append_elem(OP_LIST, attrs,
78f9721b 5326 newSVOP(OP_CONST, 0,
5cc237b8 5327 sv));
09bef843 5328 }
29595ff2 5329 s = PEEKSPACE(d);
0120eecf 5330 if (*s == ':' && s[1] != ':')
29595ff2 5331 s = PEEKSPACE(s+1);
0120eecf
GS
5332 else if (s == d)
5333 break; /* require real whitespace or :'s */
29595ff2 5334 /* XXX losing whitespace on sequential attributes here */
09bef843 5335 }
90771dc0
NC
5336 {
5337 const char tmp
5338 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5339 if (*s != ';' && *s != '}' && *s != tmp
5340 && (tmp != '=' || *s != ')')) {
5341 const char q = ((*s == '\'') ? '"' : '\'');
5342 /* If here for an expression, and parsed no attrs, back
5343 off. */
5344 if (tmp == '=' && !attrs) {
5345 s = PL_bufptr;
5346 break;
5347 }
5348 /* MUST advance bufptr here to avoid bogus "at end of line"
5349 context messages from yyerror().
5350 */
5351 PL_bufptr = s;
10edeb5d
JH
5352 yyerror( (const char *)
5353 (*s
5354 ? Perl_form(aTHX_ "Invalid separator character "
5355 "%c%c%c in attribute list", q, *s, q)
5356 : "Unterminated attribute list" ) );
90771dc0
NC
5357 if (attrs)
5358 op_free(attrs);
5359 OPERATOR(':');
09bef843 5360 }
09bef843 5361 }
f9829d6b 5362 got_attrs:
09bef843 5363 if (attrs) {
cd81e915 5364 start_force(PL_curforce);
9ded7720 5365 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5366 CURMAD('_', PL_nextwhite);
89122651 5367 force_next(THING);
5db06880
NC
5368 }
5369#ifdef PERL_MAD
5370 if (PL_madskills) {
cd81e915 5371 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5372 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5373 }
5db06880 5374#endif
09bef843
SB
5375 TOKEN(COLONATTR);
5376 }
a0d0e21e 5377 OPERATOR(':');
8990e307
LW
5378 case '(':
5379 s++;
3280af22
NIS
5380 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5381 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5382 else
3280af22 5383 PL_expect = XTERM;
29595ff2 5384 s = SKIPSPACE1(s);
a0d0e21e 5385 TOKEN('(');
378cc40b 5386 case ';':
f4dd75d9 5387 CLINE;
90771dc0
NC
5388 {
5389 const char tmp = *s++;
5390 OPERATOR(tmp);
5391 }
378cc40b 5392 case ')':
90771dc0
NC
5393 {
5394 const char tmp = *s++;
29595ff2 5395 s = SKIPSPACE1(s);
90771dc0
NC
5396 if (*s == '{')
5397 PREBLOCK(tmp);
5398 TERM(tmp);
5399 }
79072805 5400 case ']':
a7aaec61
Z
5401 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5402 TOKEN(0);
79072805 5403 s++;
3280af22 5404 if (PL_lex_brackets <= 0)
d98d5fff 5405 yyerror("Unmatched right square bracket");
463ee0b2 5406 else
3280af22
NIS
5407 --PL_lex_brackets;
5408 if (PL_lex_state == LEX_INTERPNORMAL) {
5409 if (PL_lex_brackets == 0) {
02255c60
FC
5410 if (*s == '-' && s[1] == '>')
5411 PL_lex_state = LEX_INTERPENDMAYBE;
5412 else if (*s != '[' && *s != '{')
3280af22 5413 PL_lex_state = LEX_INTERPEND;
79072805
LW
5414 }
5415 }
4633a7c4 5416 TERM(']');
79072805
LW
5417 case '{':
5418 leftbracket:
79072805 5419 s++;
3280af22 5420 if (PL_lex_brackets > 100) {
8edd5f42 5421 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5422 }
3280af22 5423 switch (PL_expect) {
a0d0e21e 5424 case XTERM:
3280af22 5425 if (PL_lex_formbrack) {
a0d0e21e
LW
5426 s--;
5427 PRETERMBLOCK(DO);
5428 }
3280af22
NIS
5429 if (PL_oldoldbufptr == PL_last_lop)
5430 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5431 else
3280af22 5432 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5433 OPERATOR(HASHBRACK);
a0d0e21e 5434 case XOPERATOR:
bf4acbe4 5435 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5436 s++;
44a8e56a 5437 d = s;
3280af22
NIS
5438 PL_tokenbuf[0] = '\0';
5439 if (d < PL_bufend && *d == '-') {
5440 PL_tokenbuf[0] = '-';
44a8e56a 5441 d++;
bf4acbe4 5442 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5443 d++;
5444 }
7e2040f0 5445 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5446 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5447 FALSE, &len);
bf4acbe4 5448 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5449 d++;
5450 if (*d == '}') {
f54cb97a 5451 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5452 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5453 if (minus)
5454 force_next('-');
748a9306
LW
5455 }
5456 }
5457 /* FALL THROUGH */
09bef843 5458 case XATTRBLOCK:
748a9306 5459 case XBLOCK:
3280af22
NIS
5460 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5461 PL_expect = XSTATE;
a0d0e21e 5462 break;
09bef843 5463 case XATTRTERM:
a0d0e21e 5464 case XTERMBLOCK:
3280af22
NIS
5465 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5466 PL_expect = XSTATE;
a0d0e21e
LW
5467 break;
5468 default: {
f54cb97a 5469 const char *t;
3280af22
NIS
5470 if (PL_oldoldbufptr == PL_last_lop)
5471 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5472 else
3280af22 5473 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5474 s = SKIPSPACE1(s);
8452ff4b
SB
5475 if (*s == '}') {
5476 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5477 PL_expect = XTERM;
5478 /* This hack is to get the ${} in the message. */
5479 PL_bufptr = s+1;
5480 yyerror("syntax error");
5481 break;
5482 }
a0d0e21e 5483 OPERATOR(HASHBRACK);
8452ff4b 5484 }
b8a4b1be
GS
5485 /* This hack serves to disambiguate a pair of curlies
5486 * as being a block or an anon hash. Normally, expectation
5487 * determines that, but in cases where we're not in a
5488 * position to expect anything in particular (like inside
5489 * eval"") we have to resolve the ambiguity. This code
5490 * covers the case where the first term in the curlies is a
5491 * quoted string. Most other cases need to be explicitly
a0288114 5492 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5493 * curly in order to force resolution as an anon hash.
5494 *
5495 * XXX should probably propagate the outer expectation
5496 * into eval"" to rely less on this hack, but that could
5497 * potentially break current behavior of eval"".
5498 * GSAR 97-07-21
5499 */
5500 t = s;
5501 if (*s == '\'' || *s == '"' || *s == '`') {
5502 /* common case: get past first string, handling escapes */
3280af22 5503 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5504 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5505 t++;
5506 t++;
a0d0e21e 5507 }
b8a4b1be 5508 else if (*s == 'q') {
3280af22 5509 if (++t < PL_bufend
b8a4b1be 5510 && (!isALNUM(*t)
3280af22 5511 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5512 && !isALNUM(*t))))
5513 {
abc667d1 5514 /* skip q//-like construct */
f54cb97a 5515 const char *tmps;
b8a4b1be
GS
5516 char open, close, term;
5517 I32 brackets = 1;
5518
3280af22 5519 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5520 t++;
abc667d1
DM
5521 /* check for q => */
5522 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5523 OPERATOR(HASHBRACK);
5524 }
b8a4b1be
GS
5525 term = *t;
5526 open = term;
5527 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5528 term = tmps[5];
5529 close = term;
5530 if (open == close)
3280af22
NIS
5531 for (t++; t < PL_bufend; t++) {
5532 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5533 t++;
6d07e5e9 5534 else if (*t == open)
b8a4b1be
GS
5535 break;
5536 }
abc667d1 5537 else {
3280af22
NIS
5538 for (t++; t < PL_bufend; t++) {
5539 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5540 t++;
6d07e5e9 5541 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5542 break;
5543 else if (*t == open)
5544 brackets++;
5545 }
abc667d1
DM
5546 }
5547 t++;
b8a4b1be 5548 }
abc667d1
DM
5549 else
5550 /* skip plain q word */
5551 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5552 t += UTF8SKIP(t);
a0d0e21e 5553 }
7e2040f0 5554 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5555 t += UTF8SKIP(t);
7e2040f0 5556 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5557 t += UTF8SKIP(t);
a0d0e21e 5558 }
3280af22 5559 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5560 t++;
b8a4b1be
GS
5561 /* if comma follows first term, call it an anon hash */
5562 /* XXX it could be a comma expression with loop modifiers */
3280af22 5563 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5564 || (*t == '=' && t[1] == '>')))
a0d0e21e 5565 OPERATOR(HASHBRACK);
3280af22 5566 if (PL_expect == XREF)
4e4e412b 5567 PL_expect = XTERM;
a0d0e21e 5568 else {
3280af22
NIS
5569 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5570 PL_expect = XSTATE;
a0d0e21e 5571 }
8990e307 5572 }
a0d0e21e 5573 break;
463ee0b2 5574 }
6154021b 5575 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5576 if (isSPACE(*s) || *s == '#')
3280af22 5577 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5578 TOKEN('{');
378cc40b 5579 case '}':
a7aaec61
Z
5580 if (PL_lex_brackets && PL_lex_brackstack[PL_lex_brackets-1] == XFAKEEOF)
5581 TOKEN(0);
79072805
LW
5582 rightbracket:
5583 s++;
3280af22 5584 if (PL_lex_brackets <= 0)
d98d5fff 5585 yyerror("Unmatched right curly bracket");
463ee0b2 5586 else
3280af22 5587 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5588 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5589 PL_lex_formbrack = 0;
5590 if (PL_lex_state == LEX_INTERPNORMAL) {
5591 if (PL_lex_brackets == 0) {
9059aa12
LW
5592 if (PL_expect & XFAKEBRACK) {
5593 PL_expect &= XENUMMASK;
3280af22
NIS
5594 PL_lex_state = LEX_INTERPEND;
5595 PL_bufptr = s;
5db06880
NC
5596#if 0
5597 if (PL_madskills) {
cd81e915 5598 if (!PL_thiswhite)
6b29d1f5 5599 PL_thiswhite = newSVpvs("");
76f68e9b 5600 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5601 }
5602#endif
cea2e8a9 5603 return yylex(); /* ignore fake brackets */
79072805 5604 }
fa83b5b6 5605 if (*s == '-' && s[1] == '>')
3280af22 5606 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5607 else if (*s != '[' && *s != '{')
3280af22 5608 PL_lex_state = LEX_INTERPEND;
79072805
LW
5609 }
5610 }
9059aa12
LW
5611 if (PL_expect & XFAKEBRACK) {
5612 PL_expect &= XENUMMASK;
3280af22 5613 PL_bufptr = s;
cea2e8a9 5614 return yylex(); /* ignore fake brackets */
748a9306 5615 }
cd81e915 5616 start_force(PL_curforce);
5db06880
NC
5617 if (PL_madskills) {
5618 curmad('X', newSVpvn(s-1,1));
cd81e915 5619 CURMAD('_', PL_thiswhite);
5db06880 5620 }
79072805 5621 force_next('}');
5db06880 5622#ifdef PERL_MAD
cd81e915 5623 if (!PL_thistoken)
6b29d1f5 5624 PL_thistoken = newSVpvs("");
5db06880 5625#endif
79072805 5626 TOKEN(';');
378cc40b
LW
5627 case '&':
5628 s++;
90771dc0 5629 if (*s++ == '&')
a0d0e21e 5630 AOPERATOR(ANDAND);
378cc40b 5631 s--;
3280af22 5632 if (PL_expect == XOPERATOR) {
041457d9
DM
5633 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5634 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5635 {
57843af0 5636 CopLINE_dec(PL_curcop);
f1f66076 5637 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5638 CopLINE_inc(PL_curcop);
463ee0b2 5639 }
79072805 5640 BAop(OP_BIT_AND);
463ee0b2 5641 }
79072805 5642
3280af22
NIS
5643 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5644 if (*PL_tokenbuf) {
5645 PL_expect = XOPERATOR;
5646 force_ident(PL_tokenbuf, '&');
463ee0b2 5647 }
79072805
LW
5648 else
5649 PREREF('&');
6154021b 5650 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5651 TERM('&');
5652
378cc40b
LW
5653 case '|':
5654 s++;
90771dc0 5655 if (*s++ == '|')
a0d0e21e 5656 AOPERATOR(OROR);
378cc40b 5657 s--;
79072805 5658 BOop(OP_BIT_OR);
378cc40b
LW
5659 case '=':
5660 s++;
748a9306 5661 {
90771dc0
NC
5662 const char tmp = *s++;
5663 if (tmp == '=')
5664 Eop(OP_EQ);
5665 if (tmp == '>')
5666 OPERATOR(',');
5667 if (tmp == '~')
5668 PMop(OP_MATCH);
5669 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5670 && strchr("+-*/%.^&|<",tmp))
5671 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5672 "Reversed %c= operator",(int)tmp);
5673 s--;
5674 if (PL_expect == XSTATE && isALPHA(tmp) &&
5675 (s == PL_linestart+1 || s[-2] == '\n') )
5676 {
5677 if (PL_in_eval && !PL_rsfp) {
5678 d = PL_bufend;
5679 while (s < d) {
5680 if (*s++ == '\n') {
5681 incline(s);
5682 if (strnEQ(s,"=cut",4)) {
5683 s = strchr(s,'\n');
5684 if (s)
5685 s++;
5686 else
5687 s = d;
5688 incline(s);
5689 goto retry;
5690 }
5691 }
a5f75d66 5692 }
90771dc0 5693 goto retry;
a5f75d66 5694 }
5db06880
NC
5695#ifdef PERL_MAD
5696 if (PL_madskills) {
cd81e915 5697 if (!PL_thiswhite)
6b29d1f5 5698 PL_thiswhite = newSVpvs("");
cd81e915 5699 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5700 PL_bufend - PL_linestart);
5701 }
5702#endif
90771dc0 5703 s = PL_bufend;
737c24fc 5704 PL_parser->in_pod = 1;
90771dc0 5705 goto retry;
a5f75d66 5706 }
a0d0e21e 5707 }
3280af22 5708 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5709 const char *t = s;
51882d45 5710#ifdef PERL_STRICT_CR
c35e046a 5711 while (SPACE_OR_TAB(*t))
51882d45 5712#else
c35e046a 5713 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5714#endif
c35e046a 5715 t++;
a0d0e21e
LW
5716 if (*t == '\n' || *t == '#') {
5717 s--;
3280af22 5718 PL_expect = XBLOCK;
a0d0e21e
LW
5719 goto leftbracket;
5720 }
79072805 5721 }
6154021b 5722 pl_yylval.ival = 0;
a0d0e21e 5723 OPERATOR(ASSIGNOP);
378cc40b
LW
5724 case '!':
5725 s++;
90771dc0
NC
5726 {
5727 const char tmp = *s++;
5728 if (tmp == '=') {
5729 /* was this !=~ where !~ was meant?
5730 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5731
5732 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5733 const char *t = s+1;
5734
5735 while (t < PL_bufend && isSPACE(*t))
5736 ++t;
5737
5738 if (*t == '/' || *t == '?' ||
5739 ((*t == 'm' || *t == 's' || *t == 'y')
5740 && !isALNUM(t[1])) ||
5741 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5742 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5743 "!=~ should be !~");
5744 }
5745 Eop(OP_NE);
5746 }
5747 if (tmp == '~')
5748 PMop(OP_NOT);
5749 }
378cc40b
LW
5750 s--;
5751 OPERATOR('!');
5752 case '<':
3280af22 5753 if (PL_expect != XOPERATOR) {
93a17b20 5754 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5755 check_uni();
79072805
LW
5756 if (s[1] == '<')
5757 s = scan_heredoc(s);
5758 else
5759 s = scan_inputsymbol(s);
5760 TERM(sublex_start());
378cc40b
LW
5761 }
5762 s++;
90771dc0
NC
5763 {
5764 char tmp = *s++;
5765 if (tmp == '<')
5766 SHop(OP_LEFT_SHIFT);
5767 if (tmp == '=') {
5768 tmp = *s++;
5769 if (tmp == '>')
5770 Eop(OP_NCMP);
5771 s--;
5772 Rop(OP_LE);
5773 }
395c3793 5774 }
378cc40b 5775 s--;
79072805 5776 Rop(OP_LT);
378cc40b
LW
5777 case '>':
5778 s++;
90771dc0
NC
5779 {
5780 const char tmp = *s++;
5781 if (tmp == '>')
5782 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5783 else if (tmp == '=')
90771dc0
NC
5784 Rop(OP_GE);
5785 }
378cc40b 5786 s--;
79072805 5787 Rop(OP_GT);
378cc40b
LW
5788
5789 case '$':
bbce6d69 5790 CLINE;
5791
3280af22
NIS
5792 if (PL_expect == XOPERATOR) {
5793 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5794 return deprecate_commaless_var_list();
a0d0e21e 5795 }
8990e307 5796 }
a0d0e21e 5797
c0b977fd 5798 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5799 PL_tokenbuf[0] = '@';
376b8730
SM
5800 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5801 sizeof PL_tokenbuf - 1, FALSE);
5802 if (PL_expect == XOPERATOR)
5803 no_op("Array length", s);
3280af22 5804 if (!PL_tokenbuf[1])
a0d0e21e 5805 PREREF(DOLSHARP);
3280af22
NIS
5806 PL_expect = XOPERATOR;
5807 PL_pending_ident = '#';
463ee0b2 5808 TOKEN(DOLSHARP);
79072805 5809 }
bbce6d69 5810
3280af22 5811 PL_tokenbuf[0] = '$';
376b8730
SM
5812 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5813 sizeof PL_tokenbuf - 1, FALSE);
5814 if (PL_expect == XOPERATOR)
5815 no_op("Scalar", s);
3280af22
NIS
5816 if (!PL_tokenbuf[1]) {
5817 if (s == PL_bufend)
bbce6d69 5818 yyerror("Final $ should be \\$ or $name");
5819 PREREF('$');
8990e307 5820 }
a0d0e21e 5821
bbce6d69 5822 /* This kludge not intended to be bulletproof. */
3280af22 5823 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5824 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5825 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5826 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5827 TERM(THING);
5828 }
5829
ff68c719 5830 d = s;
90771dc0
NC
5831 {
5832 const char tmp = *s;
ae28bb2a 5833 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5834 s = SKIPSPACE1(s);
ff68c719 5835
90771dc0
NC
5836 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5837 && intuit_more(s)) {
5838 if (*s == '[') {
5839 PL_tokenbuf[0] = '@';
5840 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5841 char *t = s+1;
5842
5843 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5844 t++;
90771dc0 5845 if (*t++ == ',') {
29595ff2 5846 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5847 while (t < PL_bufend && *t != ']')
5848 t++;
9014280d 5849 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5850 "Multidimensional syntax %.*s not supported",
36c7798d 5851 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5852 }
748a9306 5853 }
93a17b20 5854 }
90771dc0
NC
5855 else if (*s == '{') {
5856 char *t;
5857 PL_tokenbuf[0] = '%';
5858 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5859 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5860 {
5861 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5862 do {
5863 t++;
5864 } while (isSPACE(*t));
90771dc0 5865 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5866 STRLEN len;
90771dc0 5867 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5868 &len);
c35e046a
AL
5869 while (isSPACE(*t))
5870 t++;
780a5241 5871 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5872 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5873 "You need to quote \"%s\"",
5874 tmpbuf);
5875 }
5876 }
5877 }
93a17b20 5878 }
bbce6d69 5879
90771dc0
NC
5880 PL_expect = XOPERATOR;
5881 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5882 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5883 if (!islop || PL_last_lop_op == OP_GREPSTART)
5884 PL_expect = XOPERATOR;
5885 else if (strchr("$@\"'`q", *s))
5886 PL_expect = XTERM; /* e.g. print $fh "foo" */
5887 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5888 PL_expect = XTERM; /* e.g. print $fh &sub */
5889 else if (isIDFIRST_lazy_if(s,UTF)) {
5890 char tmpbuf[sizeof PL_tokenbuf];
5891 int t2;
5892 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5893 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5894 /* binary operators exclude handle interpretations */
5895 switch (t2) {
5896 case -KEY_x:
5897 case -KEY_eq:
5898 case -KEY_ne:
5899 case -KEY_gt:
5900 case -KEY_lt:
5901 case -KEY_ge:
5902 case -KEY_le:
5903 case -KEY_cmp:
5904 break;
5905 default:
5906 PL_expect = XTERM; /* e.g. print $fh length() */
5907 break;
5908 }
5909 }
5910 else {
5911 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5912 }
5913 }
90771dc0
NC
5914 else if (isDIGIT(*s))
5915 PL_expect = XTERM; /* e.g. print $fh 3 */
5916 else if (*s == '.' && isDIGIT(s[1]))
5917 PL_expect = XTERM; /* e.g. print $fh .3 */
5918 else if ((*s == '?' || *s == '-' || *s == '+')
5919 && !isSPACE(s[1]) && s[1] != '=')
5920 PL_expect = XTERM; /* e.g. print $fh -1 */
5921 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5922 && s[1] != '/')
5923 PL_expect = XTERM; /* e.g. print $fh /.../
5924 XXX except DORDOR operator
5925 */
5926 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5927 && s[2] != '=')
5928 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5929 }
bbce6d69 5930 }
3280af22 5931 PL_pending_ident = '$';
79072805 5932 TOKEN('$');
378cc40b
LW
5933
5934 case '@':
3280af22 5935 if (PL_expect == XOPERATOR)
bbce6d69 5936 no_op("Array", s);
3280af22
NIS
5937 PL_tokenbuf[0] = '@';
5938 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5939 if (!PL_tokenbuf[1]) {
bbce6d69 5940 PREREF('@');
5941 }
3280af22 5942 if (PL_lex_state == LEX_NORMAL)
29595ff2 5943 s = SKIPSPACE1(s);
3280af22 5944 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5945 if (*s == '{')
3280af22 5946 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5947
5948 /* Warn about @ where they meant $. */
041457d9
DM
5949 if (*s == '[' || *s == '{') {
5950 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5951 const char *t = s + 1;
7e2040f0 5952 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5953 t++;
5954 if (*t == '}' || *t == ']') {
5955 t++;
29595ff2 5956 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5957 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5958 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5959 (int)(t-PL_bufptr), PL_bufptr,
5960 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5961 }
93a17b20
LW
5962 }
5963 }
463ee0b2 5964 }
3280af22 5965 PL_pending_ident = '@';
79072805 5966 TERM('@');
378cc40b 5967
c963b151 5968 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5969 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5970 s += 2;
5971 AOPERATOR(DORDOR);
5972 }
c963b151 5973 case '?': /* may either be conditional or pattern */
be25f609 5974 if (PL_expect == XOPERATOR) {
90771dc0 5975 char tmp = *s++;
c963b151 5976 if(tmp == '?') {
be25f609 5977 OPERATOR('?');
c963b151
BD
5978 }
5979 else {
5980 tmp = *s++;
5981 if(tmp == '/') {
5982 /* A // operator. */
5983 AOPERATOR(DORDOR);
5984 }
5985 else {
5986 s--;
5987 Mop(OP_DIVIDE);
5988 }
5989 }
5990 }
5991 else {
5992 /* Disable warning on "study /blah/" */
5993 if (PL_oldoldbufptr == PL_last_uni
5994 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5995 || memNE(PL_last_uni, "study", 5)
5996 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5997 ))
5998 check_uni();
725a61d7
Z
5999 if (*s == '?')
6000 deprecate("?PATTERN? without explicit operator");
c963b151
BD
6001 s = scan_pat(s,OP_MATCH);
6002 TERM(sublex_start());
6003 }
378cc40b
LW
6004
6005 case '.':
51882d45
GS
6006 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
6007#ifdef PERL_STRICT_CR
6008 && s[1] == '\n'
6009#else
6010 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
6011#endif
6012 && (s == PL_linestart || s[-1] == '\n') )
6013 {
3280af22
NIS
6014 PL_lex_formbrack = 0;
6015 PL_expect = XSTATE;
79072805
LW
6016 goto rightbracket;
6017 }
be25f609 6018 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
6019 s += 3;
6020 OPERATOR(YADAYADA);
6021 }
3280af22 6022 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 6023 char tmp = *s++;
a687059c
LW
6024 if (*s == tmp) {
6025 s++;
2f3197b3
LW
6026 if (*s == tmp) {
6027 s++;
6154021b 6028 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
6029 }
6030 else
6154021b 6031 pl_yylval.ival = 0;
378cc40b 6032 OPERATOR(DOTDOT);
a687059c 6033 }
79072805 6034 Aop(OP_CONCAT);
378cc40b
LW
6035 }
6036 /* FALL THROUGH */
6037 case '0': case '1': case '2': case '3': case '4':
6038 case '5': case '6': case '7': case '8': case '9':
6154021b 6039 s = scan_num(s, &pl_yylval);
931e0695 6040 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 6041 if (PL_expect == XOPERATOR)
8990e307 6042 no_op("Number",s);
79072805
LW
6043 TERM(THING);
6044
6045 case '\'':
5db06880 6046 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6047 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6048 if (PL_expect == XOPERATOR) {
6049 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6050 return deprecate_commaless_var_list();
a0d0e21e 6051 }
463ee0b2 6052 else
8990e307 6053 no_op("String",s);
463ee0b2 6054 }
79072805 6055 if (!s)
d4c19fe8 6056 missingterm(NULL);
6154021b 6057 pl_yylval.ival = OP_CONST;
79072805
LW
6058 TERM(sublex_start());
6059
6060 case '"':
5db06880 6061 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6062 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6063 if (PL_expect == XOPERATOR) {
6064 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6065 return deprecate_commaless_var_list();
a0d0e21e 6066 }
463ee0b2 6067 else
8990e307 6068 no_op("String",s);
463ee0b2 6069 }
79072805 6070 if (!s)
d4c19fe8 6071 missingterm(NULL);
6154021b 6072 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6073 /* FIXME. I think that this can be const if char *d is replaced by
6074 more localised variables. */
3280af22 6075 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6076 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6077 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6078 break;
6079 }
6080 }
79072805
LW
6081 TERM(sublex_start());
6082
6083 case '`':
5db06880 6084 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6085 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6086 if (PL_expect == XOPERATOR)
8990e307 6087 no_op("Backticks",s);
79072805 6088 if (!s)
d4c19fe8 6089 missingterm(NULL);
9b201d7d 6090 readpipe_override();
79072805
LW
6091 TERM(sublex_start());
6092
6093 case '\\':
6094 s++;
a2a5de95
NC
6095 if (PL_lex_inwhat && isDIGIT(*s))
6096 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6097 *s, *s);
3280af22 6098 if (PL_expect == XOPERATOR)
8990e307 6099 no_op("Backslash",s);
79072805
LW
6100 OPERATOR(REFGEN);
6101
a7cb1f99 6102 case 'v':
e526c9e6 6103 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6104 char *start = s + 2;
dd629d5b 6105 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6106 start++;
6107 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6108 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6109 TERM(THING);
6110 }
e526c9e6 6111 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6112 else if (!isALPHA(*start) && (PL_expect == XTERM
6113 || PL_expect == XREF || PL_expect == XSTATE
6114 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6115 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6116 if (!gv) {
6154021b 6117 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6118 TERM(THING);
6119 }
6120 }
a7cb1f99
GS
6121 }
6122 goto keylookup;
79072805 6123 case 'x':
3280af22 6124 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6125 s++;
6126 Mop(OP_REPEAT);
2f3197b3 6127 }
79072805
LW
6128 goto keylookup;
6129
378cc40b 6130 case '_':
79072805
LW
6131 case 'a': case 'A':
6132 case 'b': case 'B':
6133 case 'c': case 'C':
6134 case 'd': case 'D':
6135 case 'e': case 'E':
6136 case 'f': case 'F':
6137 case 'g': case 'G':
6138 case 'h': case 'H':
6139 case 'i': case 'I':
6140 case 'j': case 'J':
6141 case 'k': case 'K':
6142 case 'l': case 'L':
6143 case 'm': case 'M':
6144 case 'n': case 'N':
6145 case 'o': case 'O':
6146 case 'p': case 'P':
6147 case 'q': case 'Q':
6148 case 'r': case 'R':
6149 case 's': case 'S':
6150 case 't': case 'T':
6151 case 'u': case 'U':
a7cb1f99 6152 case 'V':
79072805
LW
6153 case 'w': case 'W':
6154 case 'X':
6155 case 'y': case 'Y':
6156 case 'z': case 'Z':
6157
49dc05e3 6158 keylookup: {
88e1f1a2 6159 bool anydelim;
90771dc0 6160 I32 tmp;
10edeb5d
JH
6161
6162 orig_keyword = 0;
6163 gv = NULL;
6164 gvp = NULL;
49dc05e3 6165
3280af22
NIS
6166 PL_bufptr = s;
6167 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6168
6169 /* Some keywords can be followed by any delimiter, including ':' */
361d9b55 6170 anydelim = word_takes_any_delimeter(PL_tokenbuf, len);
8ebc5c01 6171
6172 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6173 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6174 goto just_a_word;
6175
3643fb5f 6176 d = s;
3280af22 6177 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6178 d++; /* no comments skipped here, or s### is misparsed */
6179
748a9306 6180 /* Is this a word before a => operator? */
1c3923b3 6181 if (*d == '=' && d[1] == '>') {
748a9306 6182 CLINE;
6154021b 6183 pl_yylval.opval
d0a148a6
NC
6184 = (OP*)newSVOP(OP_CONST, 0,
6185 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6186 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6187 TERM(WORD);
6188 }
6189
88e1f1a2
JV
6190 /* Check for plugged-in keyword */
6191 {
6192 OP *o;
6193 int result;
6194 char *saved_bufptr = PL_bufptr;
6195 PL_bufptr = s;
16c91539 6196 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6197 s = PL_bufptr;
6198 if (result == KEYWORD_PLUGIN_DECLINE) {
6199 /* not a plugged-in keyword */
6200 PL_bufptr = saved_bufptr;
6201 } else if (result == KEYWORD_PLUGIN_STMT) {
6202 pl_yylval.opval = o;
6203 CLINE;
6204 PL_expect = XSTATE;
6205 return REPORT(PLUGSTMT);
6206 } else if (result == KEYWORD_PLUGIN_EXPR) {
6207 pl_yylval.opval = o;
6208 CLINE;
6209 PL_expect = XOPERATOR;
6210 return REPORT(PLUGEXPR);
6211 } else {
6212 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6213 PL_tokenbuf);
6214 }
6215 }
6216
6217 /* Check for built-in keyword */
6218 tmp = keyword(PL_tokenbuf, len, 0);
6219
6220 /* Is this a label? */
6221 if (!anydelim && PL_expect == XSTATE
6222 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6223 s = d + 1;
6224 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6225 CLINE;
6226 TOKEN(LABEL);
6227 }
6228
a0d0e21e 6229 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6230 GV *ogv = NULL; /* override (winner) */
6231 GV *hgv = NULL; /* hidden (loser) */
3280af22 6232 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6233 CV *cv;
90e5519e 6234 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6235 (cv = GvCVu(gv)))
6236 {
6237 if (GvIMPORTED_CV(gv))
6238 ogv = gv;
6239 else if (! CvMETHOD(cv))
6240 hgv = gv;
6241 }
6242 if (!ogv &&
3280af22 6243 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6244 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6245 GvCVu(gv) && GvIMPORTED_CV(gv))
6246 {
6247 ogv = gv;
6248 }
6249 }
6250 if (ogv) {
30fe34ed 6251 orig_keyword = tmp;
56f7f34b 6252 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6253 }
6254 else if (gv && !gvp
6255 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6256 && GvCVu(gv))
6e7b2336
GS
6257 {
6258 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6259 }
56f7f34b
CS
6260 else { /* no override */
6261 tmp = -tmp;
a2a5de95
NC
6262 if (tmp == KEY_dump) {
6263 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6264 "dump() better written as CORE::dump()");
ac206dc8 6265 }
a0714e2c 6266 gv = NULL;
56f7f34b 6267 gvp = 0;
a2a5de95
NC
6268 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6269 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6270 "Ambiguous call resolved as CORE::%s(), "
6271 "qualify as such or use &",
6272 GvENAME(hgv));
49dc05e3 6273 }
a0d0e21e
LW
6274 }
6275
6276 reserved_word:
6277 switch (tmp) {
79072805
LW
6278
6279 default: /* not a keyword */
0bfa2a8a
NC
6280 /* Trade off - by using this evil construction we can pull the
6281 variable gv into the block labelled keylookup. If not, then
6282 we have to give it function scope so that the goto from the
6283 earlier ':' case doesn't bypass the initialisation. */
6284 if (0) {
6285 just_a_word_zero_gv:
6286 gv = NULL;
6287 gvp = NULL;
8bee0991 6288 orig_keyword = 0;
0bfa2a8a 6289 }
93a17b20 6290 just_a_word: {
96e4d5b1 6291 SV *sv;
ce29ac45 6292 int pkgname = 0;
f54cb97a 6293 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6294 OP *rv2cv_op;
5069cc75 6295 CV *cv;
5db06880 6296#ifdef PERL_MAD
cd81e915 6297 SV *nextPL_nextwhite = 0;
5db06880
NC
6298#endif
6299
8990e307
LW
6300
6301 /* Get the rest if it looks like a package qualifier */
6302
155aba94 6303 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6304 STRLEN morelen;
3280af22 6305 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6306 TRUE, &morelen);
6307 if (!morelen)
cea2e8a9 6308 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6309 *s == '\'' ? "'" : "::");
c3e0f903 6310 len += morelen;
ce29ac45 6311 pkgname = 1;
a0d0e21e 6312 }
8990e307 6313
3280af22
NIS
6314 if (PL_expect == XOPERATOR) {
6315 if (PL_bufptr == PL_linestart) {
57843af0 6316 CopLINE_dec(PL_curcop);
f1f66076 6317 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6318 CopLINE_inc(PL_curcop);
463ee0b2
LW
6319 }
6320 else
54310121 6321 no_op("Bareword",s);
463ee0b2 6322 }
8990e307 6323
c3e0f903
GS
6324 /* Look for a subroutine with this name in current package,
6325 unless name is "Foo::", in which case Foo is a bearword
6326 (and a package name). */
6327
5db06880 6328 if (len > 2 && !PL_madskills &&
3280af22 6329 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6330 {
f776e3cd 6331 if (ckWARN(WARN_BAREWORD)
90e5519e 6332 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6333 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6334 "Bareword \"%s\" refers to nonexistent package",
3280af22 6335 PL_tokenbuf);
c3e0f903 6336 len -= 2;
3280af22 6337 PL_tokenbuf[len] = '\0';
a0714e2c 6338 gv = NULL;
c3e0f903
GS
6339 gvp = 0;
6340 }
6341 else {
62d55b22
NC
6342 if (!gv) {
6343 /* Mustn't actually add anything to a symbol table.
6344 But also don't want to "initialise" any placeholder
6345 constants that might already be there into full
6346 blown PVGVs with attached PVCV. */
90e5519e
NC
6347 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6348 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6349 }
b3d904f3 6350 len = 0;
c3e0f903
GS
6351 }
6352
6353 /* if we saw a global override before, get the right name */
8990e307 6354
37bb7629
EB
6355 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6356 len ? len : strlen(PL_tokenbuf));
49dc05e3 6357 if (gvp) {
37bb7629 6358 SV * const tmp_sv = sv;
396482e1 6359 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6360 sv_catsv(sv, tmp_sv);
6361 SvREFCNT_dec(tmp_sv);
8a7a129d 6362 }
37bb7629 6363
5db06880 6364#ifdef PERL_MAD
cd81e915
NC
6365 if (PL_madskills && !PL_thistoken) {
6366 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6367 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6368 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6369 }
6370#endif
8990e307 6371
a0d0e21e 6372 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6373 CLINE;
6154021b
RGS
6374 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6375 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6376
c3e0f903 6377 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6378 if (len)
6379 goto safe_bareword;
6380
f7461760
Z
6381 {
6382 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6383 const_op->op_private = OPpCONST_BARE;
6384 rv2cv_op = newCVREF(0, const_op);
6385 }
d9088386 6386 cv = rv2cv_op_cv(rv2cv_op, 0);
5069cc75 6387
8990e307
LW
6388 /* See if it's the indirect object for a list operator. */
6389
3280af22
NIS
6390 if (PL_oldoldbufptr &&
6391 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6392 (PL_oldoldbufptr == PL_last_lop
6393 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6394 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6395 (PL_expect == XREF ||
6396 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6397 {
748a9306
LW
6398 bool immediate_paren = *s == '(';
6399
a0d0e21e 6400 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6401 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6402#ifdef PERL_MAD
cd81e915 6403 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6404#endif
a0d0e21e
LW
6405
6406 /* Two barewords in a row may indicate method call. */
6407
62d55b22 6408 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6409 (tmp = intuit_method(s, gv, cv))) {
6410 op_free(rv2cv_op);
bbf60fe6 6411 return REPORT(tmp);
f7461760 6412 }
a0d0e21e
LW
6413
6414 /* If not a declared subroutine, it's an indirect object. */
6415 /* (But it's an indir obj regardless for sort.) */
7294df96 6416 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6417
7294df96
RGS
6418 if (
6419 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6420 (!cv &&
a9ef352a 6421 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6422 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6423 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6424 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6425 )
a9ef352a 6426 {
3280af22 6427 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6428 goto bareword;
93a17b20
LW
6429 }
6430 }
8990e307 6431
3280af22 6432 PL_expect = XOPERATOR;
5db06880
NC
6433#ifdef PERL_MAD
6434 if (isSPACE(*s))
cd81e915
NC
6435 s = SKIPSPACE2(s,nextPL_nextwhite);
6436 PL_nextwhite = nextPL_nextwhite;
5db06880 6437#else
8990e307 6438 s = skipspace(s);
5db06880 6439#endif
1c3923b3
GS
6440
6441 /* Is this a word before a => operator? */
ce29ac45 6442 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6443 op_free(rv2cv_op);
1c3923b3 6444 CLINE;
6154021b 6445 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6446 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6447 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6448 TERM(WORD);
6449 }
6450
6451 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6452 if (*s == '(') {
79072805 6453 CLINE;
5069cc75 6454 if (cv) {
c35e046a
AL
6455 d = s + 1;
6456 while (SPACE_OR_TAB(*d))
6457 d++;
f7461760 6458 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6459 s = d + 1;
c631f32b 6460 goto its_constant;
96e4d5b1 6461 }
6462 }
5db06880
NC
6463#ifdef PERL_MAD
6464 if (PL_madskills) {
cd81e915
NC
6465 PL_nextwhite = PL_thiswhite;
6466 PL_thiswhite = 0;
5db06880 6467 }
cd81e915 6468 start_force(PL_curforce);
5db06880 6469#endif
6154021b 6470 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6471 PL_expect = XOPERATOR;
5db06880
NC
6472#ifdef PERL_MAD
6473 if (PL_madskills) {
cd81e915
NC
6474 PL_nextwhite = nextPL_nextwhite;
6475 curmad('X', PL_thistoken);
6b29d1f5 6476 PL_thistoken = newSVpvs("");
5db06880
NC
6477 }
6478#endif
f7461760 6479 op_free(rv2cv_op);
93a17b20 6480 force_next(WORD);
6154021b 6481 pl_yylval.ival = 0;
463ee0b2 6482 TOKEN('&');
79072805 6483 }
93a17b20 6484
a0d0e21e 6485 /* If followed by var or block, call it a method (unless sub) */
8990e307 6486
f7461760
Z
6487 if ((*s == '$' || *s == '{') && !cv) {
6488 op_free(rv2cv_op);
3280af22
NIS
6489 PL_last_lop = PL_oldbufptr;
6490 PL_last_lop_op = OP_METHOD;
93a17b20 6491 PREBLOCK(METHOD);
463ee0b2
LW
6492 }
6493
8990e307
LW
6494 /* If followed by a bareword, see if it looks like indir obj. */
6495
30fe34ed
RGS
6496 if (!orig_keyword
6497 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6498 && (tmp = intuit_method(s, gv, cv))) {
6499 op_free(rv2cv_op);
bbf60fe6 6500 return REPORT(tmp);
f7461760 6501 }
93a17b20 6502
8990e307
LW
6503 /* Not a method, so call it a subroutine (if defined) */
6504
5069cc75 6505 if (cv) {
9b387841
NC
6506 if (lastchar == '-')
6507 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6508 "Ambiguous use of -%s resolved as -&%s()",
6509 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6510 /* Check for a constant sub */
f7461760 6511 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6512 its_constant:
f7461760 6513 op_free(rv2cv_op);
6154021b
RGS
6514 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6515 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6516 pl_yylval.opval->op_private = 0;
6b7c6d95 6517 pl_yylval.opval->op_flags |= OPf_SPECIAL;
96e4d5b1 6518 TOKEN(WORD);
89bfa8cd 6519 }
6520
6154021b 6521 op_free(pl_yylval.opval);
f7461760 6522 pl_yylval.opval = rv2cv_op;
6154021b 6523 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6524 PL_last_lop = PL_oldbufptr;
bf848113 6525 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6526 /* Is there a prototype? */
5db06880
NC
6527 if (
6528#ifdef PERL_MAD
6529 cv &&
6530#endif
d9f2850e
RGS
6531 SvPOK(cv))
6532 {
5f66b61c 6533 STRLEN protolen;
daba3364 6534 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6535 if (!protolen)
4633a7c4 6536 TERM(FUNC0SUB);
0f5d0394
AE
6537 while (*proto == ';')
6538 proto++;
649d02de
FC
6539 if (
6540 (
6541 (
6542 *proto == '$' || *proto == '_'
c035a075 6543 || *proto == '*' || *proto == '+'
649d02de
FC
6544 )
6545 && proto[1] == '\0'
6546 )
6547 || (
6548 *proto == '\\' && proto[1] && proto[2] == '\0'
6549 )
6550 )
6551 OPERATOR(UNIOPSUB);
6552 if (*proto == '\\' && proto[1] == '[') {
6553 const char *p = proto + 2;
6554 while(*p && *p != ']')
6555 ++p;
6556 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6557 }
7a52d87a 6558 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6559 if (PL_curstash)
6560 sv_setpvs(PL_subname, "__ANON__");
6561 else
6562 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6563 PREBLOCK(LSTOPSUB);
6564 }
a9ef352a 6565 }
5db06880
NC
6566#ifdef PERL_MAD
6567 {
6568 if (PL_madskills) {
cd81e915
NC
6569 PL_nextwhite = PL_thiswhite;
6570 PL_thiswhite = 0;
5db06880 6571 }
cd81e915 6572 start_force(PL_curforce);
6154021b 6573 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6574 PL_expect = XTERM;
6575 if (PL_madskills) {
cd81e915
NC
6576 PL_nextwhite = nextPL_nextwhite;
6577 curmad('X', PL_thistoken);
6b29d1f5 6578 PL_thistoken = newSVpvs("");
5db06880
NC
6579 }
6580 force_next(WORD);
6581 TOKEN(NOAMP);
6582 }
6583 }
6584
6585 /* Guess harder when madskills require "best effort". */
6586 if (PL_madskills && (!gv || !GvCVu(gv))) {
6587 int probable_sub = 0;
6588 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6589 probable_sub = 1;
6590 else if (isALPHA(*s)) {
6591 char tmpbuf[1024];
6592 STRLEN tmplen;
6593 d = s;
6594 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6595 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6596 probable_sub = 1;
6597 else {
6598 while (d < PL_bufend && isSPACE(*d))
6599 d++;
6600 if (*d == '=' && d[1] == '>')
6601 probable_sub = 1;
6602 }
6603 }
6604 if (probable_sub) {
7a6d04f4 6605 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6606 op_free(pl_yylval.opval);
f7461760 6607 pl_yylval.opval = rv2cv_op;
6154021b 6608 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6609 PL_last_lop = PL_oldbufptr;
6610 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6611 PL_nextwhite = PL_thiswhite;
6612 PL_thiswhite = 0;
6613 start_force(PL_curforce);
6154021b 6614 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6615 PL_expect = XTERM;
cd81e915
NC
6616 PL_nextwhite = nextPL_nextwhite;
6617 curmad('X', PL_thistoken);
6b29d1f5 6618 PL_thistoken = newSVpvs("");
5db06880
NC
6619 force_next(WORD);
6620 TOKEN(NOAMP);
6621 }
6622#else
6154021b 6623 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6624 PL_expect = XTERM;
8990e307
LW
6625 force_next(WORD);
6626 TOKEN(NOAMP);
5db06880 6627#endif
8990e307 6628 }
748a9306 6629
8990e307
LW
6630 /* Call it a bare word */
6631
5603f27d 6632 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6633 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6634 else {
9a073a1d
RGS
6635 bareword:
6636 /* after "print" and similar functions (corresponding to
6637 * "F? L" in opcode.pl), whatever wasn't already parsed as
6638 * a filehandle should be subject to "strict subs".
6639 * Likewise for the optional indirect-object argument to system
6640 * or exec, which can't be a bareword */
6641 if ((PL_last_lop_op == OP_PRINT
6642 || PL_last_lop_op == OP_PRTF
6643 || PL_last_lop_op == OP_SAY
6644 || PL_last_lop_op == OP_SYSTEM
6645 || PL_last_lop_op == OP_EXEC)
6646 && (PL_hints & HINT_STRICT_SUBS))
6647 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6648 if (lastchar != '-') {
6649 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6650 d = PL_tokenbuf;
6651 while (isLOWER(*d))
6652 d++;
da51bb9b 6653 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6654 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6655 PL_tokenbuf);
6656 }
748a9306
LW
6657 }
6658 }
f7461760 6659 op_free(rv2cv_op);
c3e0f903
GS
6660
6661 safe_bareword:
9b387841
NC
6662 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6663 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6664 "Operator or semicolon missing before %c%s",
6665 lastchar, PL_tokenbuf);
6666 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6667 "Ambiguous use of %c resolved as operator %c",
6668 lastchar, lastchar);
748a9306 6669 }
93a17b20 6670 TOKEN(WORD);
79072805 6671 }
79072805 6672
68dc0745 6673 case KEY___FILE__:
6154021b 6674 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6675 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6676 TERM(THING);
6677
79072805 6678 case KEY___LINE__:
6154021b 6679 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6680 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6681 TERM(THING);
68dc0745 6682
6683 case KEY___PACKAGE__:
6154021b 6684 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6685 (PL_curstash
5aaec2b4 6686 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6687 : &PL_sv_undef));
79072805 6688 TERM(THING);
79072805 6689
e50aee73 6690 case KEY___DATA__:
79072805
LW
6691 case KEY___END__: {
6692 GV *gv;
3280af22 6693 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6694 const char *pname = "main";
3280af22 6695 if (PL_tokenbuf[2] == 'D')
bfcb3514 6696 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6697 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6698 SVt_PVIO);
a5f75d66 6699 GvMULTI_on(gv);
79072805 6700 if (!GvIO(gv))
a0d0e21e 6701 GvIOp(gv) = newIO();
3280af22 6702 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6703#if defined(HAS_FCNTL) && defined(F_SETFD)
6704 {
f54cb97a 6705 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6706 fcntl(fd,F_SETFD,fd >= 3);
6707 }
79072805 6708#endif
fd049845 6709 /* Mark this internal pseudo-handle as clean */
6710 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6711 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6712 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6713 else
50952442 6714 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6715#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6716 /* if the script was opened in binmode, we need to revert
53129d29 6717 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6718 * XXX this is a questionable hack at best. */
53129d29
GS
6719 if (PL_bufend-PL_bufptr > 2
6720 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6721 {
6722 Off_t loc = 0;
50952442 6723 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6724 loc = PerlIO_tell(PL_rsfp);
6725 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6726 }
2986a63f
JH
6727#ifdef NETWARE
6728 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6729#else
c39cd008 6730 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6731#endif /* NETWARE */
1143fce0
JH
6732#ifdef PERLIO_IS_STDIO /* really? */
6733# if defined(__BORLANDC__)
cb359b41
JH
6734 /* XXX see note in do_binmode() */
6735 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6736# endif
6737#endif
c39cd008
GS
6738 if (loc > 0)
6739 PerlIO_seek(PL_rsfp, loc, 0);
6740 }
6741 }
6742#endif
7948272d 6743#ifdef PERLIO_LAYERS
52d2e0f4
JH
6744 if (!IN_BYTES) {
6745 if (UTF)
6746 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6747 else if (PL_encoding) {
6748 SV *name;
6749 dSP;
6750 ENTER;
6751 SAVETMPS;
6752 PUSHMARK(sp);
6753 EXTEND(SP, 1);
6754 XPUSHs(PL_encoding);
6755 PUTBACK;
6756 call_method("name", G_SCALAR);
6757 SPAGAIN;
6758 name = POPs;
6759 PUTBACK;
bfed75c6 6760 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6761 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6762 SVfARG(name)));
52d2e0f4
JH
6763 FREETMPS;
6764 LEAVE;
6765 }
6766 }
7948272d 6767#endif
5db06880
NC
6768#ifdef PERL_MAD
6769 if (PL_madskills) {
cd81e915
NC
6770 if (PL_realtokenstart >= 0) {
6771 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6772 if (!PL_endwhite)
6b29d1f5 6773 PL_endwhite = newSVpvs("");
cd81e915
NC
6774 sv_catsv(PL_endwhite, PL_thiswhite);
6775 PL_thiswhite = 0;
6776 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6777 PL_realtokenstart = -1;
5db06880 6778 }
5cc814fd
NC
6779 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6780 != NULL) ;
5db06880
NC
6781 }
6782#endif
4608196e 6783 PL_rsfp = NULL;
79072805
LW
6784 }
6785 goto fake_eof;
e929a76b 6786 }
de3bb511 6787
8990e307 6788 case KEY_AUTOLOAD:
ed6116ce 6789 case KEY_DESTROY:
79072805 6790 case KEY_BEGIN:
3c10abe3 6791 case KEY_UNITCHECK:
7d30b5c4 6792 case KEY_CHECK:
7d07dbc2 6793 case KEY_INIT:
7d30b5c4 6794 case KEY_END:
3280af22
NIS
6795 if (PL_expect == XSTATE) {
6796 s = PL_bufptr;
93a17b20 6797 goto really_sub;
79072805
LW
6798 }
6799 goto just_a_word;
6800
a0d0e21e
LW
6801 case KEY_CORE:
6802 if (*s == ':' && s[1] == ':') {
6803 s += 2;
748a9306 6804 d = s;
3280af22 6805 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6806 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6807 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6808 if (tmp < 0)
6809 tmp = -tmp;
850e8516 6810 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6811 /* that's a way to remember we saw "CORE::" */
850e8516 6812 orig_keyword = tmp;
a0d0e21e
LW
6813 goto reserved_word;
6814 }
6815 goto just_a_word;
6816
463ee0b2
LW
6817 case KEY_abs:
6818 UNI(OP_ABS);
6819
79072805
LW
6820 case KEY_alarm:
6821 UNI(OP_ALARM);
6822
6823 case KEY_accept:
a0d0e21e 6824 LOP(OP_ACCEPT,XTERM);
79072805 6825
463ee0b2
LW
6826 case KEY_and:
6827 OPERATOR(ANDOP);
6828
79072805 6829 case KEY_atan2:
a0d0e21e 6830 LOP(OP_ATAN2,XTERM);
85e6fe83 6831
79072805 6832 case KEY_bind:
a0d0e21e 6833 LOP(OP_BIND,XTERM);
79072805
LW
6834
6835 case KEY_binmode:
1c1fc3ea 6836 LOP(OP_BINMODE,XTERM);
79072805
LW
6837
6838 case KEY_bless:
a0d0e21e 6839 LOP(OP_BLESS,XTERM);
79072805 6840
0d863452
RH
6841 case KEY_break:
6842 FUN0(OP_BREAK);
6843
79072805
LW
6844 case KEY_chop:
6845 UNI(OP_CHOP);
6846
6847 case KEY_continue:
0d863452
RH
6848 /* When 'use switch' is in effect, continue has a dual
6849 life as a control operator. */
6850 {
ef89dcc3 6851 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6852 PREBLOCK(CONTINUE);
6853 else {
6854 /* We have to disambiguate the two senses of
6855 "continue". If the next token is a '{' then
6856 treat it as the start of a continue block;
6857 otherwise treat it as a control operator.
6858 */
6859 s = skipspace(s);
6860 if (*s == '{')
79072805 6861 PREBLOCK(CONTINUE);
0d863452
RH
6862 else
6863 FUN0(OP_CONTINUE);
6864 }
6865 }
79072805
LW
6866
6867 case KEY_chdir:
fafc274c
NC
6868 /* may use HOME */
6869 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6870 UNI(OP_CHDIR);
6871
6872 case KEY_close:
6873 UNI(OP_CLOSE);
6874
6875 case KEY_closedir:
6876 UNI(OP_CLOSEDIR);
6877
6878 case KEY_cmp:
6879 Eop(OP_SCMP);
6880
6881 case KEY_caller:
6882 UNI(OP_CALLER);
6883
6884 case KEY_crypt:
6885#ifdef FCRYPT
f4c556ac
GS
6886 if (!PL_cryptseen) {
6887 PL_cryptseen = TRUE;
de3bb511 6888 init_des();
f4c556ac 6889 }
a687059c 6890#endif
a0d0e21e 6891 LOP(OP_CRYPT,XTERM);
79072805
LW
6892
6893 case KEY_chmod:
a0d0e21e 6894 LOP(OP_CHMOD,XTERM);
79072805
LW
6895
6896 case KEY_chown:
a0d0e21e 6897 LOP(OP_CHOWN,XTERM);
79072805
LW
6898
6899 case KEY_connect:
a0d0e21e 6900 LOP(OP_CONNECT,XTERM);
79072805 6901
463ee0b2
LW
6902 case KEY_chr:
6903 UNI(OP_CHR);
6904
79072805
LW
6905 case KEY_cos:
6906 UNI(OP_COS);
6907
6908 case KEY_chroot:
6909 UNI(OP_CHROOT);
6910
0d863452
RH
6911 case KEY_default:
6912 PREBLOCK(DEFAULT);
6913
79072805 6914 case KEY_do:
29595ff2 6915 s = SKIPSPACE1(s);
79072805 6916 if (*s == '{')
a0d0e21e 6917 PRETERMBLOCK(DO);
79072805 6918 if (*s != '\'')
89c5585f 6919 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6920 if (orig_keyword == KEY_do) {
6921 orig_keyword = 0;
6154021b 6922 pl_yylval.ival = 1;
850e8516
RGS
6923 }
6924 else
6154021b 6925 pl_yylval.ival = 0;
378cc40b 6926 OPERATOR(DO);
79072805
LW
6927
6928 case KEY_die:
3280af22 6929 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6930 LOP(OP_DIE,XTERM);
79072805
LW
6931
6932 case KEY_defined:
6933 UNI(OP_DEFINED);
6934
6935 case KEY_delete:
a0d0e21e 6936 UNI(OP_DELETE);
79072805
LW
6937
6938 case KEY_dbmopen:
74e8ce34
NC
6939 Perl_populate_isa(aTHX_ STR_WITH_LEN("AnyDBM_File::ISA"),
6940 STR_WITH_LEN("NDBM_File::"),
6941 STR_WITH_LEN("DB_File::"),
6942 STR_WITH_LEN("GDBM_File::"),
6943 STR_WITH_LEN("SDBM_File::"),
6944 STR_WITH_LEN("ODBM_File::"),
6945 NULL);
a0d0e21e 6946 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6947
6948 case KEY_dbmclose:
6949 UNI(OP_DBMCLOSE);
6950
6951 case KEY_dump:
a0d0e21e 6952 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6953 LOOPX(OP_DUMP);
6954
6955 case KEY_else:
6956 PREBLOCK(ELSE);
6957
6958 case KEY_elsif:
6154021b 6959 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6960 OPERATOR(ELSIF);
6961
6962 case KEY_eq:
6963 Eop(OP_SEQ);
6964
a0d0e21e
LW
6965 case KEY_exists:
6966 UNI(OP_EXISTS);
4e553d73 6967
79072805 6968 case KEY_exit:
5db06880
NC
6969 if (PL_madskills)
6970 UNI(OP_INT);
79072805
LW
6971 UNI(OP_EXIT);
6972
6973 case KEY_eval:
29595ff2 6974 s = SKIPSPACE1(s);
32e2a35d
RGS
6975 if (*s == '{') { /* block eval */
6976 PL_expect = XTERMBLOCK;
6977 UNIBRACK(OP_ENTERTRY);
6978 }
6979 else { /* string eval */
6980 PL_expect = XTERM;
6981 UNIBRACK(OP_ENTEREVAL);
6982 }
79072805
LW
6983
6984 case KEY_eof:
6985 UNI(OP_EOF);
6986
6987 case KEY_exp:
6988 UNI(OP_EXP);
6989
6990 case KEY_each:
6991 UNI(OP_EACH);
6992
6993 case KEY_exec:
a0d0e21e 6994 LOP(OP_EXEC,XREF);
79072805
LW
6995
6996 case KEY_endhostent:
6997 FUN0(OP_EHOSTENT);
6998
6999 case KEY_endnetent:
7000 FUN0(OP_ENETENT);
7001
7002 case KEY_endservent:
7003 FUN0(OP_ESERVENT);
7004
7005 case KEY_endprotoent:
7006 FUN0(OP_EPROTOENT);
7007
7008 case KEY_endpwent:
7009 FUN0(OP_EPWENT);
7010
7011 case KEY_endgrent:
7012 FUN0(OP_EGRENT);
7013
7014 case KEY_for:
7015 case KEY_foreach:
6154021b 7016 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 7017 s = SKIPSPACE1(s);
7e2040f0 7018 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 7019 char *p = s;
5db06880
NC
7020#ifdef PERL_MAD
7021 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
7022#endif
7023
3280af22 7024 if ((PL_bufend - p) >= 3 &&
55497cff 7025 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
7026 p += 2;
77ca0c92
LW
7027 else if ((PL_bufend - p) >= 4 &&
7028 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
7029 p += 3;
29595ff2 7030 p = PEEKSPACE(p);
7e2040f0 7031 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
7032 p = scan_ident(p, PL_bufend,
7033 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 7034 p = PEEKSPACE(p);
77ca0c92
LW
7035 }
7036 if (*p != '$')
cea2e8a9 7037 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
7038#ifdef PERL_MAD
7039 s = SvPVX(PL_linestr) + soff;
7040#endif
55497cff 7041 }
79072805
LW
7042 OPERATOR(FOR);
7043
7044 case KEY_formline:
a0d0e21e 7045 LOP(OP_FORMLINE,XTERM);
79072805
LW
7046
7047 case KEY_fork:
7048 FUN0(OP_FORK);
7049
7050 case KEY_fcntl:
a0d0e21e 7051 LOP(OP_FCNTL,XTERM);
79072805
LW
7052
7053 case KEY_fileno:
7054 UNI(OP_FILENO);
7055
7056 case KEY_flock:
a0d0e21e 7057 LOP(OP_FLOCK,XTERM);
79072805
LW
7058
7059 case KEY_gt:
7060 Rop(OP_SGT);
7061
7062 case KEY_ge:
7063 Rop(OP_SGE);
7064
7065 case KEY_grep:
2c38e13d 7066 LOP(OP_GREPSTART, XREF);
79072805
LW
7067
7068 case KEY_goto:
a0d0e21e 7069 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7070 LOOPX(OP_GOTO);
7071
7072 case KEY_gmtime:
7073 UNI(OP_GMTIME);
7074
7075 case KEY_getc:
6f33ba73 7076 UNIDOR(OP_GETC);
79072805
LW
7077
7078 case KEY_getppid:
7079 FUN0(OP_GETPPID);
7080
7081 case KEY_getpgrp:
7082 UNI(OP_GETPGRP);
7083
7084 case KEY_getpriority:
a0d0e21e 7085 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7086
7087 case KEY_getprotobyname:
7088 UNI(OP_GPBYNAME);
7089
7090 case KEY_getprotobynumber:
a0d0e21e 7091 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7092
7093 case KEY_getprotoent:
7094 FUN0(OP_GPROTOENT);
7095
7096 case KEY_getpwent:
7097 FUN0(OP_GPWENT);
7098
7099 case KEY_getpwnam:
ff68c719 7100 UNI(OP_GPWNAM);
79072805
LW
7101
7102 case KEY_getpwuid:
ff68c719 7103 UNI(OP_GPWUID);
79072805
LW
7104
7105 case KEY_getpeername:
7106 UNI(OP_GETPEERNAME);
7107
7108 case KEY_gethostbyname:
7109 UNI(OP_GHBYNAME);
7110
7111 case KEY_gethostbyaddr:
a0d0e21e 7112 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7113
7114 case KEY_gethostent:
7115 FUN0(OP_GHOSTENT);
7116
7117 case KEY_getnetbyname:
7118 UNI(OP_GNBYNAME);
7119
7120 case KEY_getnetbyaddr:
a0d0e21e 7121 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7122
7123 case KEY_getnetent:
7124 FUN0(OP_GNETENT);
7125
7126 case KEY_getservbyname:
a0d0e21e 7127 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7128
7129 case KEY_getservbyport:
a0d0e21e 7130 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7131
7132 case KEY_getservent:
7133 FUN0(OP_GSERVENT);
7134
7135 case KEY_getsockname:
7136 UNI(OP_GETSOCKNAME);
7137
7138 case KEY_getsockopt:
a0d0e21e 7139 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7140
7141 case KEY_getgrent:
7142 FUN0(OP_GGRENT);
7143
7144 case KEY_getgrnam:
ff68c719 7145 UNI(OP_GGRNAM);
79072805
LW
7146
7147 case KEY_getgrgid:
ff68c719 7148 UNI(OP_GGRGID);
79072805
LW
7149
7150 case KEY_getlogin:
7151 FUN0(OP_GETLOGIN);
7152
0d863452 7153 case KEY_given:
6154021b 7154 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7155 OPERATOR(GIVEN);
7156
93a17b20 7157 case KEY_glob:
a0d0e21e 7158 LOP(OP_GLOB,XTERM);
93a17b20 7159
79072805
LW
7160 case KEY_hex:
7161 UNI(OP_HEX);
7162
7163 case KEY_if:
6154021b 7164 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7165 OPERATOR(IF);
7166
7167 case KEY_index:
a0d0e21e 7168 LOP(OP_INDEX,XTERM);
79072805
LW
7169
7170 case KEY_int:
7171 UNI(OP_INT);
7172
7173 case KEY_ioctl:
a0d0e21e 7174 LOP(OP_IOCTL,XTERM);
79072805
LW
7175
7176 case KEY_join:
a0d0e21e 7177 LOP(OP_JOIN,XTERM);
79072805
LW
7178
7179 case KEY_keys:
7180 UNI(OP_KEYS);
7181
7182 case KEY_kill:
a0d0e21e 7183 LOP(OP_KILL,XTERM);
79072805
LW
7184
7185 case KEY_last:
a0d0e21e 7186 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7187 LOOPX(OP_LAST);
4e553d73 7188
79072805
LW
7189 case KEY_lc:
7190 UNI(OP_LC);
7191
7192 case KEY_lcfirst:
7193 UNI(OP_LCFIRST);
7194
7195 case KEY_local:
6154021b 7196 pl_yylval.ival = 0;
79072805
LW
7197 OPERATOR(LOCAL);
7198
7199 case KEY_length:
7200 UNI(OP_LENGTH);
7201
7202 case KEY_lt:
7203 Rop(OP_SLT);
7204
7205 case KEY_le:
7206 Rop(OP_SLE);
7207
7208 case KEY_localtime:
7209 UNI(OP_LOCALTIME);
7210
7211 case KEY_log:
7212 UNI(OP_LOG);
7213
7214 case KEY_link:
a0d0e21e 7215 LOP(OP_LINK,XTERM);
79072805
LW
7216
7217 case KEY_listen:
a0d0e21e 7218 LOP(OP_LISTEN,XTERM);
79072805 7219
c0329465
MB
7220 case KEY_lock:
7221 UNI(OP_LOCK);
7222
79072805
LW
7223 case KEY_lstat:
7224 UNI(OP_LSTAT);
7225
7226 case KEY_m:
8782bef2 7227 s = scan_pat(s,OP_MATCH);
79072805
LW
7228 TERM(sublex_start());
7229
a0d0e21e 7230 case KEY_map:
2c38e13d 7231 LOP(OP_MAPSTART, XREF);
4e4e412b 7232
79072805 7233 case KEY_mkdir:
a0d0e21e 7234 LOP(OP_MKDIR,XTERM);
79072805
LW
7235
7236 case KEY_msgctl:
a0d0e21e 7237 LOP(OP_MSGCTL,XTERM);
79072805
LW
7238
7239 case KEY_msgget:
a0d0e21e 7240 LOP(OP_MSGGET,XTERM);
79072805
LW
7241
7242 case KEY_msgrcv:
a0d0e21e 7243 LOP(OP_MSGRCV,XTERM);
79072805
LW
7244
7245 case KEY_msgsnd:
a0d0e21e 7246 LOP(OP_MSGSND,XTERM);
79072805 7247
77ca0c92 7248 case KEY_our:
93a17b20 7249 case KEY_my:
952306ac 7250 case KEY_state:
eac04b2e 7251 PL_in_my = (U16)tmp;
29595ff2 7252 s = SKIPSPACE1(s);
7e2040f0 7253 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7254#ifdef PERL_MAD
7255 char* start = s;
7256#endif
3280af22 7257 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7258 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7259 goto really_sub;
def3634b 7260 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7261 if (!PL_in_my_stash) {
c750a3ec 7262 char tmpbuf[1024];
3280af22 7263 PL_bufptr = s;
d9fad198 7264 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7265 yyerror(tmpbuf);
7266 }
5db06880
NC
7267#ifdef PERL_MAD
7268 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7269 sv_catsv(PL_thistoken, PL_nextwhite);
7270 PL_nextwhite = 0;
7271 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7272 }
7273#endif
c750a3ec 7274 }
6154021b 7275 pl_yylval.ival = 1;
55497cff 7276 OPERATOR(MY);
93a17b20 7277
79072805 7278 case KEY_next:
a0d0e21e 7279 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7280 LOOPX(OP_NEXT);
7281
7282 case KEY_ne:
7283 Eop(OP_SNE);
7284
a0d0e21e 7285 case KEY_no:
468aa647 7286 s = tokenize_use(0, s);
a0d0e21e
LW
7287 OPERATOR(USE);
7288
7289 case KEY_not:
29595ff2 7290 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7291 FUN1(OP_NOT);
7292 else
7293 OPERATOR(NOTOP);
a0d0e21e 7294
79072805 7295 case KEY_open:
29595ff2 7296 s = SKIPSPACE1(s);
7e2040f0 7297 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7298 const char *t;
c35e046a
AL
7299 for (d = s; isALNUM_lazy_if(d,UTF);)
7300 d++;
7301 for (t=d; isSPACE(*t);)
7302 t++;
e2ab214b 7303 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7304 /* [perl #16184] */
7305 && !(t[0] == '=' && t[1] == '>')
7306 ) {
5f66b61c 7307 int parms_len = (int)(d-s);
9014280d 7308 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7309 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7310 parms_len, s, parms_len, s);
66fbe8fb 7311 }
93a17b20 7312 }
a0d0e21e 7313 LOP(OP_OPEN,XTERM);
79072805 7314
463ee0b2 7315 case KEY_or:
6154021b 7316 pl_yylval.ival = OP_OR;
463ee0b2
LW
7317 OPERATOR(OROP);
7318
79072805
LW
7319 case KEY_ord:
7320 UNI(OP_ORD);
7321
7322 case KEY_oct:
7323 UNI(OP_OCT);
7324
7325 case KEY_opendir:
a0d0e21e 7326 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7327
7328 case KEY_print:
3280af22 7329 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7330 LOP(OP_PRINT,XREF);
79072805
LW
7331
7332 case KEY_printf:
3280af22 7333 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7334 LOP(OP_PRTF,XREF);
79072805 7335
c07a80fd 7336 case KEY_prototype:
7337 UNI(OP_PROTOTYPE);
7338
79072805 7339 case KEY_push:
a0d0e21e 7340 LOP(OP_PUSH,XTERM);
79072805
LW
7341
7342 case KEY_pop:
6f33ba73 7343 UNIDOR(OP_POP);
79072805 7344
a0d0e21e 7345 case KEY_pos:
6f33ba73 7346 UNIDOR(OP_POS);
4e553d73 7347
79072805 7348 case KEY_pack:
a0d0e21e 7349 LOP(OP_PACK,XTERM);
79072805
LW
7350
7351 case KEY_package:
a0d0e21e 7352 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7353 s = SKIPSPACE1(s);
91152fc1 7354 s = force_strict_version(s);
4e4da3ac 7355 PL_lex_expect = XBLOCK;
79072805
LW
7356 OPERATOR(PACKAGE);
7357
7358 case KEY_pipe:
a0d0e21e 7359 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7360
7361 case KEY_q:
5db06880 7362 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7363 if (!s)
d4c19fe8 7364 missingterm(NULL);
6154021b 7365 pl_yylval.ival = OP_CONST;
79072805
LW
7366 TERM(sublex_start());
7367
a0d0e21e
LW
7368 case KEY_quotemeta:
7369 UNI(OP_QUOTEMETA);
7370
ea25a9b2
Z
7371 case KEY_qw: {
7372 OP *words = NULL;
5db06880 7373 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7374 if (!s)
d4c19fe8 7375 missingterm(NULL);
3480a8d2 7376 PL_expect = XOPERATOR;
8127e0e3 7377 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7378 int warned = 0;
3280af22 7379 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7380 while (len) {
d4c19fe8
AL
7381 for (; isSPACE(*d) && len; --len, ++d)
7382 /**/;
8127e0e3 7383 if (len) {
d4c19fe8 7384 SV *sv;
f54cb97a 7385 const char *b = d;
e476b1b5 7386 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7387 for (; !isSPACE(*d) && len; --len, ++d) {
7388 if (*d == ',') {
9014280d 7389 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7390 "Possible attempt to separate words with commas");
7391 ++warned;
7392 }
7393 else if (*d == '#') {
9014280d 7394 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7395 "Possible attempt to put comments in qw() list");
7396 ++warned;
7397 }
7398 }
7399 }
7400 else {
d4c19fe8
AL
7401 for (; !isSPACE(*d) && len; --len, ++d)
7402 /**/;
8127e0e3 7403 }
740cce10 7404 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
2fcb4757 7405 words = op_append_elem(OP_LIST, words,
7948272d 7406 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7407 }
7408 }
7409 }
ea25a9b2
Z
7410 if (!words)
7411 words = newNULLLIST();
37fd879b 7412 if (PL_lex_stuff) {
8127e0e3 7413 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7414 PL_lex_stuff = NULL;
37fd879b 7415 }
ea25a9b2
Z
7416 PL_expect = XOPERATOR;
7417 pl_yylval.opval = sawparens(words);
7418 TOKEN(QWLIST);
7419 }
8990e307 7420
79072805 7421 case KEY_qq:
5db06880 7422 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7423 if (!s)
d4c19fe8 7424 missingterm(NULL);
6154021b 7425 pl_yylval.ival = OP_STRINGIFY;
3280af22 7426 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7427 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7428 TERM(sublex_start());
7429
8782bef2
GB
7430 case KEY_qr:
7431 s = scan_pat(s,OP_QR);
7432 TERM(sublex_start());
7433
79072805 7434 case KEY_qx:
5db06880 7435 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7436 if (!s)
d4c19fe8 7437 missingterm(NULL);
9b201d7d 7438 readpipe_override();
79072805
LW
7439 TERM(sublex_start());
7440
7441 case KEY_return:
7442 OLDLOP(OP_RETURN);
7443
7444 case KEY_require:
29595ff2 7445 s = SKIPSPACE1(s);
e759cc13
RGS
7446 if (isDIGIT(*s)) {
7447 s = force_version(s, FALSE);
a7cb1f99 7448 }
e759cc13
RGS
7449 else if (*s != 'v' || !isDIGIT(s[1])
7450 || (s = force_version(s, TRUE), *s == 'v'))
7451 {
a7cb1f99
GS
7452 *PL_tokenbuf = '\0';
7453 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7454 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7455 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7456 else if (*s == '<')
7457 yyerror("<> should be quotes");
7458 }
a72a1c8b
RGS
7459 if (orig_keyword == KEY_require) {
7460 orig_keyword = 0;
6154021b 7461 pl_yylval.ival = 1;
a72a1c8b
RGS
7462 }
7463 else
6154021b 7464 pl_yylval.ival = 0;
a72a1c8b
RGS
7465 PL_expect = XTERM;
7466 PL_bufptr = s;
7467 PL_last_uni = PL_oldbufptr;
7468 PL_last_lop_op = OP_REQUIRE;
7469 s = skipspace(s);
7470 return REPORT( (int)REQUIRE );
79072805
LW
7471
7472 case KEY_reset:
7473 UNI(OP_RESET);
7474
7475 case KEY_redo:
a0d0e21e 7476 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7477 LOOPX(OP_REDO);
7478
7479 case KEY_rename:
a0d0e21e 7480 LOP(OP_RENAME,XTERM);
79072805
LW
7481
7482 case KEY_rand:
7483 UNI(OP_RAND);
7484
7485 case KEY_rmdir:
7486 UNI(OP_RMDIR);
7487
7488 case KEY_rindex:
a0d0e21e 7489 LOP(OP_RINDEX,XTERM);
79072805
LW
7490
7491 case KEY_read:
a0d0e21e 7492 LOP(OP_READ,XTERM);
79072805
LW
7493
7494 case KEY_readdir:
7495 UNI(OP_READDIR);
7496
93a17b20 7497 case KEY_readline:
6f33ba73 7498 UNIDOR(OP_READLINE);
93a17b20
LW
7499
7500 case KEY_readpipe:
0858480c 7501 UNIDOR(OP_BACKTICK);
93a17b20 7502
79072805
LW
7503 case KEY_rewinddir:
7504 UNI(OP_REWINDDIR);
7505
7506 case KEY_recv:
a0d0e21e 7507 LOP(OP_RECV,XTERM);
79072805
LW
7508
7509 case KEY_reverse:
a0d0e21e 7510 LOP(OP_REVERSE,XTERM);
79072805
LW
7511
7512 case KEY_readlink:
6f33ba73 7513 UNIDOR(OP_READLINK);
79072805
LW
7514
7515 case KEY_ref:
7516 UNI(OP_REF);
7517
7518 case KEY_s:
7519 s = scan_subst(s);
6154021b 7520 if (pl_yylval.opval)
79072805
LW
7521 TERM(sublex_start());
7522 else
7523 TOKEN(1); /* force error */
7524
0d863452
RH
7525 case KEY_say:
7526 checkcomma(s,PL_tokenbuf,"filehandle");
7527 LOP(OP_SAY,XREF);
7528
a0d0e21e
LW
7529 case KEY_chomp:
7530 UNI(OP_CHOMP);
4e553d73 7531
79072805
LW
7532 case KEY_scalar:
7533 UNI(OP_SCALAR);
7534
7535 case KEY_select:
a0d0e21e 7536 LOP(OP_SELECT,XTERM);
79072805
LW
7537
7538 case KEY_seek:
a0d0e21e 7539 LOP(OP_SEEK,XTERM);
79072805
LW
7540
7541 case KEY_semctl:
a0d0e21e 7542 LOP(OP_SEMCTL,XTERM);
79072805
LW
7543
7544 case KEY_semget:
a0d0e21e 7545 LOP(OP_SEMGET,XTERM);
79072805
LW
7546
7547 case KEY_semop:
a0d0e21e 7548 LOP(OP_SEMOP,XTERM);
79072805
LW
7549
7550 case KEY_send:
a0d0e21e 7551 LOP(OP_SEND,XTERM);
79072805
LW
7552
7553 case KEY_setpgrp:
a0d0e21e 7554 LOP(OP_SETPGRP,XTERM);
79072805
LW
7555
7556 case KEY_setpriority:
a0d0e21e 7557 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7558
7559 case KEY_sethostent:
ff68c719 7560 UNI(OP_SHOSTENT);
79072805
LW
7561
7562 case KEY_setnetent:
ff68c719 7563 UNI(OP_SNETENT);
79072805
LW
7564
7565 case KEY_setservent:
ff68c719 7566 UNI(OP_SSERVENT);
79072805
LW
7567
7568 case KEY_setprotoent:
ff68c719 7569 UNI(OP_SPROTOENT);
79072805
LW
7570
7571 case KEY_setpwent:
7572 FUN0(OP_SPWENT);
7573
7574 case KEY_setgrent:
7575 FUN0(OP_SGRENT);
7576
7577 case KEY_seekdir:
a0d0e21e 7578 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7579
7580 case KEY_setsockopt:
a0d0e21e 7581 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7582
7583 case KEY_shift:
6f33ba73 7584 UNIDOR(OP_SHIFT);
79072805
LW
7585
7586 case KEY_shmctl:
a0d0e21e 7587 LOP(OP_SHMCTL,XTERM);
79072805
LW
7588
7589 case KEY_shmget:
a0d0e21e 7590 LOP(OP_SHMGET,XTERM);
79072805
LW
7591
7592 case KEY_shmread:
a0d0e21e 7593 LOP(OP_SHMREAD,XTERM);
79072805
LW
7594
7595 case KEY_shmwrite:
a0d0e21e 7596 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7597
7598 case KEY_shutdown:
a0d0e21e 7599 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7600
7601 case KEY_sin:
7602 UNI(OP_SIN);
7603
7604 case KEY_sleep:
7605 UNI(OP_SLEEP);
7606
7607 case KEY_socket:
a0d0e21e 7608 LOP(OP_SOCKET,XTERM);
79072805
LW
7609
7610 case KEY_socketpair:
a0d0e21e 7611 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7612
7613 case KEY_sort:
3280af22 7614 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7615 s = SKIPSPACE1(s);
79072805 7616 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7617 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7618 PL_expect = XTERM;
15f0808c 7619 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7620 LOP(OP_SORT,XREF);
79072805
LW
7621
7622 case KEY_split:
a0d0e21e 7623 LOP(OP_SPLIT,XTERM);
79072805
LW
7624
7625 case KEY_sprintf:
a0d0e21e 7626 LOP(OP_SPRINTF,XTERM);
79072805
LW
7627
7628 case KEY_splice:
a0d0e21e 7629 LOP(OP_SPLICE,XTERM);
79072805
LW
7630
7631 case KEY_sqrt:
7632 UNI(OP_SQRT);
7633
7634 case KEY_srand:
7635 UNI(OP_SRAND);
7636
7637 case KEY_stat:
7638 UNI(OP_STAT);
7639
7640 case KEY_study:
79072805
LW
7641 UNI(OP_STUDY);
7642
7643 case KEY_substr:
a0d0e21e 7644 LOP(OP_SUBSTR,XTERM);
79072805
LW
7645
7646 case KEY_format:
7647 case KEY_sub:
93a17b20 7648 really_sub:
09bef843 7649 {
3280af22 7650 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7651 SSize_t tboffset = 0;
09bef843 7652 expectation attrful;
28cc6278 7653 bool have_name, have_proto;
f54cb97a 7654 const int key = tmp;
09bef843 7655
5db06880
NC
7656#ifdef PERL_MAD
7657 SV *tmpwhite = 0;
7658
cd81e915 7659 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7660 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7661 PL_thistoken = 0;
5db06880
NC
7662
7663 d = s;
7664 s = SKIPSPACE2(s,tmpwhite);
7665#else
09bef843 7666 s = skipspace(s);
5db06880 7667#endif
09bef843 7668
7e2040f0 7669 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7670 (*s == ':' && s[1] == ':'))
7671 {
5db06880 7672#ifdef PERL_MAD
4f61fd4b 7673 SV *nametoke = NULL;
5db06880
NC
7674#endif
7675
09bef843
SB
7676 PL_expect = XBLOCK;
7677 attrful = XATTRBLOCK;
b1b65b59
JH
7678 /* remember buffer pos'n for later force_word */
7679 tboffset = s - PL_oldbufptr;
09bef843 7680 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7681#ifdef PERL_MAD
7682 if (PL_madskills)
7683 nametoke = newSVpvn(s, d - s);
7684#endif
6502358f
NC
7685 if (memchr(tmpbuf, ':', len))
7686 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7687 else {
7688 sv_setsv(PL_subname,PL_curstname);
396482e1 7689 sv_catpvs(PL_subname,"::");
09bef843
SB
7690 sv_catpvn(PL_subname,tmpbuf,len);
7691 }
09bef843 7692 have_name = TRUE;
5db06880
NC
7693
7694#ifdef PERL_MAD
7695
7696 start_force(0);
7697 CURMAD('X', nametoke);
7698 CURMAD('_', tmpwhite);
7699 (void) force_word(PL_oldbufptr + tboffset, WORD,
7700 FALSE, TRUE, TRUE);
7701
7702 s = SKIPSPACE2(d,tmpwhite);
7703#else
7704 s = skipspace(d);
7705#endif
09bef843 7706 }
463ee0b2 7707 else {
09bef843
SB
7708 if (key == KEY_my)
7709 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7710 PL_expect = XTERMBLOCK;
7711 attrful = XATTRTERM;
76f68e9b 7712 sv_setpvs(PL_subname,"?");
09bef843 7713 have_name = FALSE;
463ee0b2 7714 }
4633a7c4 7715
09bef843
SB
7716 if (key == KEY_format) {
7717 if (*s == '=')
7718 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7719#ifdef PERL_MAD
cd81e915 7720 PL_thistoken = subtoken;
5db06880
NC
7721 s = d;
7722#else
09bef843 7723 if (have_name)
b1b65b59
JH
7724 (void) force_word(PL_oldbufptr + tboffset, WORD,
7725 FALSE, TRUE, TRUE);
5db06880 7726#endif
09bef843
SB
7727 OPERATOR(FORMAT);
7728 }
79072805 7729
09bef843
SB
7730 /* Look for a prototype */
7731 if (*s == '(') {
d9f2850e
RGS
7732 char *p;
7733 bool bad_proto = FALSE;
9e8d7757
RB
7734 bool in_brackets = FALSE;
7735 char greedy_proto = ' ';
7736 bool proto_after_greedy_proto = FALSE;
7737 bool must_be_last = FALSE;
7738 bool underscore = FALSE;
aef2a98a 7739 bool seen_underscore = FALSE;
197afce1 7740 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7741
5db06880 7742 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7743 if (!s)
09bef843 7744 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7745 /* strip spaces and check for bad characters */
09bef843
SB
7746 d = SvPVX(PL_lex_stuff);
7747 tmp = 0;
d9f2850e
RGS
7748 for (p = d; *p; ++p) {
7749 if (!isSPACE(*p)) {
7750 d[tmp++] = *p;
9e8d7757 7751
197afce1 7752 if (warnillegalproto) {
9e8d7757
RB
7753 if (must_be_last)
7754 proto_after_greedy_proto = TRUE;
c035a075 7755 if (!strchr("$@%*;[]&\\_+", *p)) {
9e8d7757
RB
7756 bad_proto = TRUE;
7757 }
7758 else {
7759 if ( underscore ) {
7760 if ( *p != ';' )
7761 bad_proto = TRUE;
7762 underscore = FALSE;
7763 }
7764 if ( *p == '[' ) {
7765 in_brackets = TRUE;
7766 }
7767 else if ( *p == ']' ) {
7768 in_brackets = FALSE;
7769 }
7770 else if ( (*p == '@' || *p == '%') &&
7771 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7772 !in_brackets ) {
7773 must_be_last = TRUE;
7774 greedy_proto = *p;
7775 }
7776 else if ( *p == '_' ) {
aef2a98a 7777 underscore = seen_underscore = TRUE;
9e8d7757
RB
7778 }
7779 }
7780 }
d37a9538 7781 }
09bef843 7782 }
d9f2850e 7783 d[tmp] = '\0';
9e8d7757 7784 if (proto_after_greedy_proto)
197afce1 7785 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7786 "Prototype after '%c' for %"SVf" : %s",
7787 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7788 if (bad_proto)
197afce1 7789 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7790 "Illegal character %sin prototype for %"SVf" : %s",
7791 seen_underscore ? "after '_' " : "",
be2597df 7792 SVfARG(PL_subname), d);
b162af07 7793 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7794 have_proto = TRUE;
68dc0745 7795
5db06880
NC
7796#ifdef PERL_MAD
7797 start_force(0);
cd81e915 7798 CURMAD('q', PL_thisopen);
5db06880 7799 CURMAD('_', tmpwhite);
cd81e915
NC
7800 CURMAD('=', PL_thisstuff);
7801 CURMAD('Q', PL_thisclose);
5db06880
NC
7802 NEXTVAL_NEXTTOKE.opval =
7803 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7804 PL_lex_stuff = NULL;
5db06880
NC
7805 force_next(THING);
7806
7807 s = SKIPSPACE2(s,tmpwhite);
7808#else
09bef843 7809 s = skipspace(s);
5db06880 7810#endif
4633a7c4 7811 }
09bef843
SB
7812 else
7813 have_proto = FALSE;
7814
7815 if (*s == ':' && s[1] != ':')
7816 PL_expect = attrful;
8e742a20
MHM
7817 else if (*s != '{' && key == KEY_sub) {
7818 if (!have_name)
7819 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7820 else if (*s != ';' && *s != '}')
be2597df 7821 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7822 }
09bef843 7823
5db06880
NC
7824#ifdef PERL_MAD
7825 start_force(0);
7826 if (tmpwhite) {
7827 if (PL_madskills)
6b29d1f5 7828 curmad('^', newSVpvs(""));
5db06880
NC
7829 CURMAD('_', tmpwhite);
7830 }
7831 force_next(0);
7832
cd81e915 7833 PL_thistoken = subtoken;
5db06880 7834#else
09bef843 7835 if (have_proto) {
9ded7720 7836 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7837 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7838 PL_lex_stuff = NULL;
09bef843 7839 force_next(THING);
68dc0745 7840 }
5db06880 7841#endif
09bef843 7842 if (!have_name) {
49a54bbe
NC
7843 if (PL_curstash)
7844 sv_setpvs(PL_subname, "__ANON__");
7845 else
7846 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7847 TOKEN(ANONSUB);
4633a7c4 7848 }
5db06880 7849#ifndef PERL_MAD
b1b65b59
JH
7850 (void) force_word(PL_oldbufptr + tboffset, WORD,
7851 FALSE, TRUE, TRUE);
5db06880 7852#endif
09bef843
SB
7853 if (key == KEY_my)
7854 TOKEN(MYSUB);
7855 TOKEN(SUB);
4633a7c4 7856 }
79072805
LW
7857
7858 case KEY_system:
a0d0e21e 7859 LOP(OP_SYSTEM,XREF);
79072805
LW
7860
7861 case KEY_symlink:
a0d0e21e 7862 LOP(OP_SYMLINK,XTERM);
79072805
LW
7863
7864 case KEY_syscall:
a0d0e21e 7865 LOP(OP_SYSCALL,XTERM);
79072805 7866
c07a80fd 7867 case KEY_sysopen:
7868 LOP(OP_SYSOPEN,XTERM);
7869
137443ea 7870 case KEY_sysseek:
7871 LOP(OP_SYSSEEK,XTERM);
7872
79072805 7873 case KEY_sysread:
a0d0e21e 7874 LOP(OP_SYSREAD,XTERM);
79072805
LW
7875
7876 case KEY_syswrite:
a0d0e21e 7877 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7878
7879 case KEY_tr:
7880 s = scan_trans(s);
7881 TERM(sublex_start());
7882
7883 case KEY_tell:
7884 UNI(OP_TELL);
7885
7886 case KEY_telldir:
7887 UNI(OP_TELLDIR);
7888
463ee0b2 7889 case KEY_tie:
a0d0e21e 7890 LOP(OP_TIE,XTERM);
463ee0b2 7891
c07a80fd 7892 case KEY_tied:
7893 UNI(OP_TIED);
7894
79072805
LW
7895 case KEY_time:
7896 FUN0(OP_TIME);
7897
7898 case KEY_times:
7899 FUN0(OP_TMS);
7900
7901 case KEY_truncate:
a0d0e21e 7902 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7903
7904 case KEY_uc:
7905 UNI(OP_UC);
7906
7907 case KEY_ucfirst:
7908 UNI(OP_UCFIRST);
7909
463ee0b2
LW
7910 case KEY_untie:
7911 UNI(OP_UNTIE);
7912
79072805 7913 case KEY_until:
6154021b 7914 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7915 OPERATOR(UNTIL);
7916
7917 case KEY_unless:
6154021b 7918 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7919 OPERATOR(UNLESS);
7920
7921 case KEY_unlink:
a0d0e21e 7922 LOP(OP_UNLINK,XTERM);
79072805
LW
7923
7924 case KEY_undef:
6f33ba73 7925 UNIDOR(OP_UNDEF);
79072805
LW
7926
7927 case KEY_unpack:
a0d0e21e 7928 LOP(OP_UNPACK,XTERM);
79072805
LW
7929
7930 case KEY_utime:
a0d0e21e 7931 LOP(OP_UTIME,XTERM);
79072805
LW
7932
7933 case KEY_umask:
6f33ba73 7934 UNIDOR(OP_UMASK);
79072805
LW
7935
7936 case KEY_unshift:
a0d0e21e
LW
7937 LOP(OP_UNSHIFT,XTERM);
7938
7939 case KEY_use:
468aa647 7940 s = tokenize_use(1, s);
a0d0e21e 7941 OPERATOR(USE);
79072805
LW
7942
7943 case KEY_values:
7944 UNI(OP_VALUES);
7945
7946 case KEY_vec:
a0d0e21e 7947 LOP(OP_VEC,XTERM);
79072805 7948
0d863452 7949 case KEY_when:
6154021b 7950 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7951 OPERATOR(WHEN);
7952
79072805 7953 case KEY_while:
6154021b 7954 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7955 OPERATOR(WHILE);
7956
7957 case KEY_warn:
3280af22 7958 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7959 LOP(OP_WARN,XTERM);
79072805
LW
7960
7961 case KEY_wait:
7962 FUN0(OP_WAIT);
7963
7964 case KEY_waitpid:
a0d0e21e 7965 LOP(OP_WAITPID,XTERM);
79072805
LW
7966
7967 case KEY_wantarray:
7968 FUN0(OP_WANTARRAY);
7969
7970 case KEY_write:
9d116dd7
JH
7971#ifdef EBCDIC
7972 {
df3728a2
JH
7973 char ctl_l[2];
7974 ctl_l[0] = toCTRL('L');
7975 ctl_l[1] = '\0';
fafc274c 7976 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7977 }
7978#else
fafc274c
NC
7979 /* Make sure $^L is defined */
7980 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7981#endif
79072805
LW
7982 UNI(OP_ENTERWRITE);
7983
7984 case KEY_x:
3280af22 7985 if (PL_expect == XOPERATOR)
79072805
LW
7986 Mop(OP_REPEAT);
7987 check_uni();
7988 goto just_a_word;
7989
a0d0e21e 7990 case KEY_xor:
6154021b 7991 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7992 OPERATOR(OROP);
7993
79072805
LW
7994 case KEY_y:
7995 s = scan_trans(s);
7996 TERM(sublex_start());
7997 }
49dc05e3 7998 }}
79072805 7999}
bf4acbe4
GS
8000#ifdef __SC__
8001#pragma segment Main
8002#endif
79072805 8003
e930465f
JH
8004static int
8005S_pending_ident(pTHX)
8eceec63 8006{
97aff369 8007 dVAR;
8eceec63 8008 register char *d;
bbd11bfc 8009 PADOFFSET tmp = 0;
8eceec63
SC
8010 /* pit holds the identifier we read and pending_ident is reset */
8011 char pit = PL_pending_ident;
9bde8eb0
NC
8012 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
8013 /* All routes through this function want to know if there is a colon. */
c099d646 8014 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
8015 PL_pending_ident = 0;
8016
cd81e915 8017 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 8018 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 8019 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
8020
8021 /* if we're in a my(), we can't allow dynamics here.
8022 $foo'bar has already been turned into $foo::bar, so
8023 just check for colons.
8024
8025 if it's a legal name, the OP is a PADANY.
8026 */
8027 if (PL_in_my) {
8028 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 8029 if (has_colon)
8eceec63
SC
8030 yyerror(Perl_form(aTHX_ "No package name allowed for "
8031 "variable %s in \"our\"",
8032 PL_tokenbuf));
d6447115 8033 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8034 }
8035 else {
9bde8eb0 8036 if (has_colon)
952306ac
RGS
8037 yyerror(Perl_form(aTHX_ PL_no_myglob,
8038 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 8039
6154021b 8040 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 8041 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
8042 return PRIVATEREF;
8043 }
8044 }
8045
8046 /*
8047 build the ops for accesses to a my() variable.
8048
8049 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8050 then used in a comparison. This catches most, but not
8051 all cases. For instance, it catches
8052 sort { my($a); $a <=> $b }
8053 but not
8054 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8055 (although why you'd do that is anyone's guess).
8056 */
8057
9bde8eb0 8058 if (!has_colon) {
8716503d 8059 if (!PL_in_my)
f8f98e0a 8060 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8061 if (tmp != NOT_IN_PAD) {
8eceec63 8062 /* might be an "our" variable" */
00b1698f 8063 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8064 /* build ops for a bareword */
b64e5050
AL
8065 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8066 HEK * const stashname = HvNAME_HEK(stash);
8067 SV * const sym = newSVhek(stashname);
396482e1 8068 sv_catpvs(sym, "::");
9bde8eb0 8069 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8070 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8071 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8072 gv_fetchsv(sym,
8eceec63
SC
8073 (PL_in_eval
8074 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8075 : GV_ADDMULTI
8eceec63
SC
8076 ),
8077 ((PL_tokenbuf[0] == '$') ? SVt_PV
8078 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8079 : SVt_PVHV));
8080 return WORD;
8081 }
8082
8083 /* if it's a sort block and they're naming $a or $b */
8084 if (PL_last_lop_op == OP_SORT &&
8085 PL_tokenbuf[0] == '$' &&
8086 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8087 && !PL_tokenbuf[2])
8088 {
8089 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8090 d < PL_bufend && *d != '\n';
8091 d++)
8092 {
8093 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8094 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8095 PL_tokenbuf);
8096 }
8097 }
8098 }
8099
6154021b
RGS
8100 pl_yylval.opval = newOP(OP_PADANY, 0);
8101 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8102 return PRIVATEREF;
8103 }
8104 }
8105
8106 /*
8107 Whine if they've said @foo in a doublequoted string,
8108 and @foo isn't a variable we can find in the symbol
8109 table.
8110 */
d824713b
NC
8111 if (ckWARN(WARN_AMBIGUOUS) &&
8112 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8113 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8114 SVt_PVAV);
8eceec63 8115 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8116 /* DO NOT warn for @- and @+ */
8117 && !( PL_tokenbuf[2] == '\0' &&
8118 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8119 )
8eceec63
SC
8120 {
8121 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8122 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8123 "Possible unintended interpolation of %s in string",
8124 PL_tokenbuf);
8eceec63
SC
8125 }
8126 }
8127
8128 /* build ops for a bareword */
6154021b 8129 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8130 tokenbuf_len - 1));
6154021b 8131 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8132 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8133 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8134 ((PL_tokenbuf[0] == '$') ? SVt_PV
8135 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8136 : SVt_PVHV));
8eceec63
SC
8137 return WORD;
8138}
8139
4c3bbe0f
MHM
8140/*
8141 * The following code was generated by perl_keyword.pl.
8142 */
e2e1dd5a 8143
79072805 8144I32
5458a98a 8145Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8146{
952306ac 8147 dVAR;
7918f24d
NC
8148
8149 PERL_ARGS_ASSERT_KEYWORD;
8150
4c3bbe0f
MHM
8151 switch (len)
8152 {
8153 case 1: /* 5 tokens of length 1 */
8154 switch (name[0])
e2e1dd5a 8155 {
4c3bbe0f
MHM
8156 case 'm':
8157 { /* m */
8158 return KEY_m;
8159 }
8160
4c3bbe0f
MHM
8161 case 'q':
8162 { /* q */
8163 return KEY_q;
8164 }
8165
4c3bbe0f
MHM
8166 case 's':
8167 { /* s */
8168 return KEY_s;
8169 }
8170
4c3bbe0f
MHM
8171 case 'x':
8172 { /* x */
8173 return -KEY_x;
8174 }
8175
4c3bbe0f
MHM
8176 case 'y':
8177 { /* y */
8178 return KEY_y;
8179 }
8180
4c3bbe0f
MHM
8181 default:
8182 goto unknown;
e2e1dd5a 8183 }
4c3bbe0f
MHM
8184
8185 case 2: /* 18 tokens of length 2 */
8186 switch (name[0])
e2e1dd5a 8187 {
4c3bbe0f
MHM
8188 case 'd':
8189 if (name[1] == 'o')
8190 { /* do */
8191 return KEY_do;
8192 }
8193
8194 goto unknown;
8195
8196 case 'e':
8197 if (name[1] == 'q')
8198 { /* eq */
8199 return -KEY_eq;
8200 }
8201
8202 goto unknown;
8203
8204 case 'g':
8205 switch (name[1])
8206 {
8207 case 'e':
8208 { /* ge */
8209 return -KEY_ge;
8210 }
8211
4c3bbe0f
MHM
8212 case 't':
8213 { /* gt */
8214 return -KEY_gt;
8215 }
8216
4c3bbe0f
MHM
8217 default:
8218 goto unknown;
8219 }
8220
8221 case 'i':
8222 if (name[1] == 'f')
8223 { /* if */
8224 return KEY_if;
8225 }
8226
8227 goto unknown;
8228
8229 case 'l':
8230 switch (name[1])
8231 {
8232 case 'c':
8233 { /* lc */
8234 return -KEY_lc;
8235 }
8236
4c3bbe0f
MHM
8237 case 'e':
8238 { /* le */
8239 return -KEY_le;
8240 }
8241
4c3bbe0f
MHM
8242 case 't':
8243 { /* lt */
8244 return -KEY_lt;
8245 }
8246
4c3bbe0f
MHM
8247 default:
8248 goto unknown;
8249 }
8250
8251 case 'm':
8252 if (name[1] == 'y')
8253 { /* my */
8254 return KEY_my;
8255 }
8256
8257 goto unknown;
8258
8259 case 'n':
8260 switch (name[1])
8261 {
8262 case 'e':
8263 { /* ne */
8264 return -KEY_ne;
8265 }
8266
4c3bbe0f
MHM
8267 case 'o':
8268 { /* no */
8269 return KEY_no;
8270 }
8271
4c3bbe0f
MHM
8272 default:
8273 goto unknown;
8274 }
8275
8276 case 'o':
8277 if (name[1] == 'r')
8278 { /* or */
8279 return -KEY_or;
8280 }
8281
8282 goto unknown;
8283
8284 case 'q':
8285 switch (name[1])
8286 {
8287 case 'q':
8288 { /* qq */
8289 return KEY_qq;
8290 }
8291
4c3bbe0f
MHM
8292 case 'r':
8293 { /* qr */
8294 return KEY_qr;
8295 }
8296
4c3bbe0f
MHM
8297 case 'w':
8298 { /* qw */
8299 return KEY_qw;
8300 }
8301
4c3bbe0f
MHM
8302 case 'x':
8303 { /* qx */
8304 return KEY_qx;
8305 }
8306
4c3bbe0f
MHM
8307 default:
8308 goto unknown;
8309 }
8310
8311 case 't':
8312 if (name[1] == 'r')
8313 { /* tr */
8314 return KEY_tr;
8315 }
8316
8317 goto unknown;
8318
8319 case 'u':
8320 if (name[1] == 'c')
8321 { /* uc */
8322 return -KEY_uc;
8323 }
8324
8325 goto unknown;
8326
8327 default:
8328 goto unknown;
e2e1dd5a 8329 }
4c3bbe0f 8330
0d863452 8331 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8332 switch (name[0])
e2e1dd5a 8333 {
4c3bbe0f
MHM
8334 case 'E':
8335 if (name[1] == 'N' &&
8336 name[2] == 'D')
8337 { /* END */
8338 return KEY_END;
8339 }
8340
8341 goto unknown;
8342
8343 case 'a':
8344 switch (name[1])
8345 {
8346 case 'b':
8347 if (name[2] == 's')
8348 { /* abs */
8349 return -KEY_abs;
8350 }
8351
8352 goto unknown;
8353
8354 case 'n':
8355 if (name[2] == 'd')
8356 { /* and */
8357 return -KEY_and;
8358 }
8359
8360 goto unknown;
8361
8362 default:
8363 goto unknown;
8364 }
8365
8366 case 'c':
8367 switch (name[1])
8368 {
8369 case 'h':
8370 if (name[2] == 'r')
8371 { /* chr */
8372 return -KEY_chr;
8373 }
8374
8375 goto unknown;
8376
8377 case 'm':
8378 if (name[2] == 'p')
8379 { /* cmp */
8380 return -KEY_cmp;
8381 }
8382
8383 goto unknown;
8384
8385 case 'o':
8386 if (name[2] == 's')
8387 { /* cos */
8388 return -KEY_cos;
8389 }
8390
8391 goto unknown;
8392
8393 default:
8394 goto unknown;
8395 }
8396
8397 case 'd':
8398 if (name[1] == 'i' &&
8399 name[2] == 'e')
8400 { /* die */
8401 return -KEY_die;
8402 }
8403
8404 goto unknown;
8405
8406 case 'e':
8407 switch (name[1])
8408 {
8409 case 'o':
8410 if (name[2] == 'f')
8411 { /* eof */
8412 return -KEY_eof;
8413 }
8414
8415 goto unknown;
8416
4c3bbe0f
MHM
8417 case 'x':
8418 if (name[2] == 'p')
8419 { /* exp */
8420 return -KEY_exp;
8421 }
8422
8423 goto unknown;
8424
8425 default:
8426 goto unknown;
8427 }
8428
8429 case 'f':
8430 if (name[1] == 'o' &&
8431 name[2] == 'r')
8432 { /* for */
8433 return KEY_for;
8434 }
8435
8436 goto unknown;
8437
8438 case 'h':
8439 if (name[1] == 'e' &&
8440 name[2] == 'x')
8441 { /* hex */
8442 return -KEY_hex;
8443 }
8444
8445 goto unknown;
8446
8447 case 'i':
8448 if (name[1] == 'n' &&
8449 name[2] == 't')
8450 { /* int */
8451 return -KEY_int;
8452 }
8453
8454 goto unknown;
8455
8456 case 'l':
8457 if (name[1] == 'o' &&
8458 name[2] == 'g')
8459 { /* log */
8460 return -KEY_log;
8461 }
8462
8463 goto unknown;
8464
8465 case 'm':
8466 if (name[1] == 'a' &&
8467 name[2] == 'p')
8468 { /* map */
8469 return KEY_map;
8470 }
8471
8472 goto unknown;
8473
8474 case 'n':
8475 if (name[1] == 'o' &&
8476 name[2] == 't')
8477 { /* not */
8478 return -KEY_not;
8479 }
8480
8481 goto unknown;
8482
8483 case 'o':
8484 switch (name[1])
8485 {
8486 case 'c':
8487 if (name[2] == 't')
8488 { /* oct */
8489 return -KEY_oct;
8490 }
8491
8492 goto unknown;
8493
8494 case 'r':
8495 if (name[2] == 'd')
8496 { /* ord */
8497 return -KEY_ord;
8498 }
8499
8500 goto unknown;
8501
8502 case 'u':
8503 if (name[2] == 'r')
8504 { /* our */
8505 return KEY_our;
8506 }
8507
8508 goto unknown;
8509
8510 default:
8511 goto unknown;
8512 }
8513
8514 case 'p':
8515 if (name[1] == 'o')
8516 {
8517 switch (name[2])
8518 {
8519 case 'p':
8520 { /* pop */
8521 return -KEY_pop;
8522 }
8523
4c3bbe0f
MHM
8524 case 's':
8525 { /* pos */
8526 return KEY_pos;
8527 }
8528
4c3bbe0f
MHM
8529 default:
8530 goto unknown;
8531 }
8532 }
8533
8534 goto unknown;
8535
8536 case 'r':
8537 if (name[1] == 'e' &&
8538 name[2] == 'f')
8539 { /* ref */
8540 return -KEY_ref;
8541 }
8542
8543 goto unknown;
8544
8545 case 's':
8546 switch (name[1])
8547 {
0d863452
RH
8548 case 'a':
8549 if (name[2] == 'y')
8550 { /* say */
e3e804c9 8551 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8552 }
8553
8554 goto unknown;
8555
4c3bbe0f
MHM
8556 case 'i':
8557 if (name[2] == 'n')
8558 { /* sin */
8559 return -KEY_sin;
8560 }
8561
8562 goto unknown;
8563
8564 case 'u':
8565 if (name[2] == 'b')
8566 { /* sub */
8567 return KEY_sub;
8568 }
8569
8570 goto unknown;
8571
8572 default:
8573 goto unknown;
8574 }
8575
8576 case 't':
8577 if (name[1] == 'i' &&
8578 name[2] == 'e')
8579 { /* tie */
1db4d195 8580 return -KEY_tie;
4c3bbe0f
MHM
8581 }
8582
8583 goto unknown;
8584
8585 case 'u':
8586 if (name[1] == 's' &&
8587 name[2] == 'e')
8588 { /* use */
8589 return KEY_use;
8590 }
8591
8592 goto unknown;
8593
8594 case 'v':
8595 if (name[1] == 'e' &&
8596 name[2] == 'c')
8597 { /* vec */
8598 return -KEY_vec;
8599 }
8600
8601 goto unknown;
8602
8603 case 'x':
8604 if (name[1] == 'o' &&
8605 name[2] == 'r')
8606 { /* xor */
8607 return -KEY_xor;
8608 }
8609
8610 goto unknown;
8611
8612 default:
8613 goto unknown;
e2e1dd5a 8614 }
4c3bbe0f 8615
0d863452 8616 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8617 switch (name[0])
e2e1dd5a 8618 {
4c3bbe0f
MHM
8619 case 'C':
8620 if (name[1] == 'O' &&
8621 name[2] == 'R' &&
8622 name[3] == 'E')
8623 { /* CORE */
8624 return -KEY_CORE;
8625 }
8626
8627 goto unknown;
8628
8629 case 'I':
8630 if (name[1] == 'N' &&
8631 name[2] == 'I' &&
8632 name[3] == 'T')
8633 { /* INIT */
8634 return KEY_INIT;
8635 }
8636
8637 goto unknown;
8638
8639 case 'b':
8640 if (name[1] == 'i' &&
8641 name[2] == 'n' &&
8642 name[3] == 'd')
8643 { /* bind */
8644 return -KEY_bind;
8645 }
8646
8647 goto unknown;
8648
8649 case 'c':
8650 if (name[1] == 'h' &&
8651 name[2] == 'o' &&
8652 name[3] == 'p')
8653 { /* chop */
8654 return -KEY_chop;
8655 }
8656
8657 goto unknown;
8658
8659 case 'd':
8660 if (name[1] == 'u' &&
8661 name[2] == 'm' &&
8662 name[3] == 'p')
8663 { /* dump */
8664 return -KEY_dump;
8665 }
8666
8667 goto unknown;
8668
8669 case 'e':
8670 switch (name[1])
8671 {
8672 case 'a':
8673 if (name[2] == 'c' &&
8674 name[3] == 'h')
8675 { /* each */
8676 return -KEY_each;
8677 }
8678
8679 goto unknown;
8680
8681 case 'l':
8682 if (name[2] == 's' &&
8683 name[3] == 'e')
8684 { /* else */
8685 return KEY_else;
8686 }
8687
8688 goto unknown;
8689
8690 case 'v':
8691 if (name[2] == 'a' &&
8692 name[3] == 'l')
8693 { /* eval */
8694 return KEY_eval;
8695 }
8696
8697 goto unknown;
8698
8699 case 'x':
8700 switch (name[2])
8701 {
8702 case 'e':
8703 if (name[3] == 'c')
8704 { /* exec */
8705 return -KEY_exec;
8706 }
8707
8708 goto unknown;
8709
8710 case 'i':
8711 if (name[3] == 't')
8712 { /* exit */
8713 return -KEY_exit;
8714 }
8715
8716 goto unknown;
8717
8718 default:
8719 goto unknown;
8720 }
8721
8722 default:
8723 goto unknown;
8724 }
8725
8726 case 'f':
8727 if (name[1] == 'o' &&
8728 name[2] == 'r' &&
8729 name[3] == 'k')
8730 { /* fork */
8731 return -KEY_fork;
8732 }
8733
8734 goto unknown;
8735
8736 case 'g':
8737 switch (name[1])
8738 {
8739 case 'e':
8740 if (name[2] == 't' &&
8741 name[3] == 'c')
8742 { /* getc */
8743 return -KEY_getc;
8744 }
8745
8746 goto unknown;
8747
8748 case 'l':
8749 if (name[2] == 'o' &&
8750 name[3] == 'b')
8751 { /* glob */
8752 return KEY_glob;
8753 }
8754
8755 goto unknown;
8756
8757 case 'o':
8758 if (name[2] == 't' &&
8759 name[3] == 'o')
8760 { /* goto */
8761 return KEY_goto;
8762 }
8763
8764 goto unknown;
8765
8766 case 'r':
8767 if (name[2] == 'e' &&
8768 name[3] == 'p')
8769 { /* grep */
8770 return KEY_grep;
8771 }
8772
8773 goto unknown;
8774
8775 default:
8776 goto unknown;
8777 }
8778
8779 case 'j':
8780 if (name[1] == 'o' &&
8781 name[2] == 'i' &&
8782 name[3] == 'n')
8783 { /* join */
8784 return -KEY_join;
8785 }
8786
8787 goto unknown;
8788
8789 case 'k':
8790 switch (name[1])
8791 {
8792 case 'e':
8793 if (name[2] == 'y' &&
8794 name[3] == 's')
8795 { /* keys */
8796 return -KEY_keys;
8797 }
8798
8799 goto unknown;
8800
8801 case 'i':
8802 if (name[2] == 'l' &&
8803 name[3] == 'l')
8804 { /* kill */
8805 return -KEY_kill;
8806 }
8807
8808 goto unknown;
8809
8810 default:
8811 goto unknown;
8812 }
8813
8814 case 'l':
8815 switch (name[1])
8816 {
8817 case 'a':
8818 if (name[2] == 's' &&
8819 name[3] == 't')
8820 { /* last */
8821 return KEY_last;
8822 }
8823
8824 goto unknown;
8825
8826 case 'i':
8827 if (name[2] == 'n' &&
8828 name[3] == 'k')
8829 { /* link */
8830 return -KEY_link;
8831 }
8832
8833 goto unknown;
8834
8835 case 'o':
8836 if (name[2] == 'c' &&
8837 name[3] == 'k')
8838 { /* lock */
8839 return -KEY_lock;
8840 }
8841
8842 goto unknown;
8843
8844 default:
8845 goto unknown;
8846 }
8847
8848 case 'n':
8849 if (name[1] == 'e' &&
8850 name[2] == 'x' &&
8851 name[3] == 't')
8852 { /* next */
8853 return KEY_next;
8854 }
8855
8856 goto unknown;
8857
8858 case 'o':
8859 if (name[1] == 'p' &&
8860 name[2] == 'e' &&
8861 name[3] == 'n')
8862 { /* open */
8863 return -KEY_open;
8864 }
8865
8866 goto unknown;
8867
8868 case 'p':
8869 switch (name[1])
8870 {
8871 case 'a':
8872 if (name[2] == 'c' &&
8873 name[3] == 'k')
8874 { /* pack */
8875 return -KEY_pack;
8876 }
8877
8878 goto unknown;
8879
8880 case 'i':
8881 if (name[2] == 'p' &&
8882 name[3] == 'e')
8883 { /* pipe */
8884 return -KEY_pipe;
8885 }
8886
8887 goto unknown;
8888
8889 case 'u':
8890 if (name[2] == 's' &&
8891 name[3] == 'h')
8892 { /* push */
8893 return -KEY_push;
8894 }
8895
8896 goto unknown;
8897
8898 default:
8899 goto unknown;
8900 }
8901
8902 case 'r':
8903 switch (name[1])
8904 {
8905 case 'a':
8906 if (name[2] == 'n' &&
8907 name[3] == 'd')
8908 { /* rand */
8909 return -KEY_rand;
8910 }
8911
8912 goto unknown;
8913
8914 case 'e':
8915 switch (name[2])
8916 {
8917 case 'a':
8918 if (name[3] == 'd')
8919 { /* read */
8920 return -KEY_read;
8921 }
8922
8923 goto unknown;
8924
8925 case 'c':
8926 if (name[3] == 'v')
8927 { /* recv */
8928 return -KEY_recv;
8929 }
8930
8931 goto unknown;
8932
8933 case 'd':
8934 if (name[3] == 'o')
8935 { /* redo */
8936 return KEY_redo;
8937 }
8938
8939 goto unknown;
8940
8941 default:
8942 goto unknown;
8943 }
8944
8945 default:
8946 goto unknown;
8947 }
8948
8949 case 's':
8950 switch (name[1])
8951 {
8952 case 'e':
8953 switch (name[2])
8954 {
8955 case 'e':
8956 if (name[3] == 'k')
8957 { /* seek */
8958 return -KEY_seek;
8959 }
8960
8961 goto unknown;
8962
8963 case 'n':
8964 if (name[3] == 'd')
8965 { /* send */
8966 return -KEY_send;
8967 }
8968
8969 goto unknown;
8970
8971 default:
8972 goto unknown;
8973 }
8974
8975 case 'o':
8976 if (name[2] == 'r' &&
8977 name[3] == 't')
8978 { /* sort */
8979 return KEY_sort;
8980 }
8981
8982 goto unknown;
8983
8984 case 'q':
8985 if (name[2] == 'r' &&
8986 name[3] == 't')
8987 { /* sqrt */
8988 return -KEY_sqrt;
8989 }
8990
8991 goto unknown;
8992
8993 case 't':
8994 if (name[2] == 'a' &&
8995 name[3] == 't')
8996 { /* stat */
8997 return -KEY_stat;
8998 }
8999
9000 goto unknown;
9001
9002 default:
9003 goto unknown;
9004 }
9005
9006 case 't':
9007 switch (name[1])
9008 {
9009 case 'e':
9010 if (name[2] == 'l' &&
9011 name[3] == 'l')
9012 { /* tell */
9013 return -KEY_tell;
9014 }
9015
9016 goto unknown;
9017
9018 case 'i':
9019 switch (name[2])
9020 {
9021 case 'e':
9022 if (name[3] == 'd')
9023 { /* tied */
1db4d195 9024 return -KEY_tied;
4c3bbe0f
MHM
9025 }
9026
9027 goto unknown;
9028
9029 case 'm':
9030 if (name[3] == 'e')
9031 { /* time */
9032 return -KEY_time;
9033 }
9034
9035 goto unknown;
9036
9037 default:
9038 goto unknown;
9039 }
9040
9041 default:
9042 goto unknown;
9043 }
9044
9045 case 'w':
0d863452 9046 switch (name[1])
4c3bbe0f 9047 {
0d863452 9048 case 'a':
952306ac
RGS
9049 switch (name[2])
9050 {
9051 case 'i':
9052 if (name[3] == 't')
9053 { /* wait */
9054 return -KEY_wait;
9055 }
4c3bbe0f 9056
952306ac 9057 goto unknown;
4c3bbe0f 9058
952306ac
RGS
9059 case 'r':
9060 if (name[3] == 'n')
9061 { /* warn */
9062 return -KEY_warn;
9063 }
4c3bbe0f 9064
952306ac 9065 goto unknown;
4c3bbe0f 9066
952306ac
RGS
9067 default:
9068 goto unknown;
9069 }
0d863452
RH
9070
9071 case 'h':
9072 if (name[2] == 'e' &&
9073 name[3] == 'n')
9074 { /* when */
5458a98a 9075 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9076 }
4c3bbe0f 9077
952306ac 9078 goto unknown;
4c3bbe0f 9079
952306ac
RGS
9080 default:
9081 goto unknown;
9082 }
4c3bbe0f 9083
0d863452
RH
9084 default:
9085 goto unknown;
9086 }
9087
952306ac 9088 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9089 switch (name[0])
e2e1dd5a 9090 {
4c3bbe0f
MHM
9091 case 'B':
9092 if (name[1] == 'E' &&
9093 name[2] == 'G' &&
9094 name[3] == 'I' &&
9095 name[4] == 'N')
9096 { /* BEGIN */
9097 return KEY_BEGIN;
9098 }
9099
9100 goto unknown;
9101
9102 case 'C':
9103 if (name[1] == 'H' &&
9104 name[2] == 'E' &&
9105 name[3] == 'C' &&
9106 name[4] == 'K')
9107 { /* CHECK */
9108 return KEY_CHECK;
9109 }
9110
9111 goto unknown;
9112
9113 case 'a':
9114 switch (name[1])
9115 {
9116 case 'l':
9117 if (name[2] == 'a' &&
9118 name[3] == 'r' &&
9119 name[4] == 'm')
9120 { /* alarm */
9121 return -KEY_alarm;
9122 }
9123
9124 goto unknown;
9125
9126 case 't':
9127 if (name[2] == 'a' &&
9128 name[3] == 'n' &&
9129 name[4] == '2')
9130 { /* atan2 */
9131 return -KEY_atan2;
9132 }
9133
9134 goto unknown;
9135
9136 default:
9137 goto unknown;
9138 }
9139
9140 case 'b':
0d863452
RH
9141 switch (name[1])
9142 {
9143 case 'l':
9144 if (name[2] == 'e' &&
952306ac
RGS
9145 name[3] == 's' &&
9146 name[4] == 's')
9147 { /* bless */
9148 return -KEY_bless;
9149 }
4c3bbe0f 9150
952306ac 9151 goto unknown;
4c3bbe0f 9152
0d863452
RH
9153 case 'r':
9154 if (name[2] == 'e' &&
9155 name[3] == 'a' &&
9156 name[4] == 'k')
9157 { /* break */
5458a98a 9158 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9159 }
9160
9161 goto unknown;
9162
9163 default:
9164 goto unknown;
9165 }
9166
4c3bbe0f
MHM
9167 case 'c':
9168 switch (name[1])
9169 {
9170 case 'h':
9171 switch (name[2])
9172 {
9173 case 'd':
9174 if (name[3] == 'i' &&
9175 name[4] == 'r')
9176 { /* chdir */
9177 return -KEY_chdir;
9178 }
9179
9180 goto unknown;
9181
9182 case 'm':
9183 if (name[3] == 'o' &&
9184 name[4] == 'd')
9185 { /* chmod */
9186 return -KEY_chmod;
9187 }
9188
9189 goto unknown;
9190
9191 case 'o':
9192 switch (name[3])
9193 {
9194 case 'm':
9195 if (name[4] == 'p')
9196 { /* chomp */
9197 return -KEY_chomp;
9198 }
9199
9200 goto unknown;
9201
9202 case 'w':
9203 if (name[4] == 'n')
9204 { /* chown */
9205 return -KEY_chown;
9206 }
9207
9208 goto unknown;
9209
9210 default:
9211 goto unknown;
9212 }
9213
9214 default:
9215 goto unknown;
9216 }
9217
9218 case 'l':
9219 if (name[2] == 'o' &&
9220 name[3] == 's' &&
9221 name[4] == 'e')
9222 { /* close */
9223 return -KEY_close;
9224 }
9225
9226 goto unknown;
9227
9228 case 'r':
9229 if (name[2] == 'y' &&
9230 name[3] == 'p' &&
9231 name[4] == 't')
9232 { /* crypt */
9233 return -KEY_crypt;
9234 }
9235
9236 goto unknown;
9237
9238 default:
9239 goto unknown;
9240 }
9241
9242 case 'e':
9243 if (name[1] == 'l' &&
9244 name[2] == 's' &&
9245 name[3] == 'i' &&
9246 name[4] == 'f')
9247 { /* elsif */
9248 return KEY_elsif;
9249 }
9250
9251 goto unknown;
9252
9253 case 'f':
9254 switch (name[1])
9255 {
9256 case 'c':
9257 if (name[2] == 'n' &&
9258 name[3] == 't' &&
9259 name[4] == 'l')
9260 { /* fcntl */
9261 return -KEY_fcntl;
9262 }
9263
9264 goto unknown;
9265
9266 case 'l':
9267 if (name[2] == 'o' &&
9268 name[3] == 'c' &&
9269 name[4] == 'k')
9270 { /* flock */
9271 return -KEY_flock;
9272 }
9273
9274 goto unknown;
9275
9276 default:
9277 goto unknown;
9278 }
9279
0d863452
RH
9280 case 'g':
9281 if (name[1] == 'i' &&
9282 name[2] == 'v' &&
9283 name[3] == 'e' &&
9284 name[4] == 'n')
9285 { /* given */
5458a98a 9286 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9287 }
9288
9289 goto unknown;
9290
4c3bbe0f
MHM
9291 case 'i':
9292 switch (name[1])
9293 {
9294 case 'n':
9295 if (name[2] == 'd' &&
9296 name[3] == 'e' &&
9297 name[4] == 'x')
9298 { /* index */
9299 return -KEY_index;
9300 }
9301
9302 goto unknown;
9303
9304 case 'o':
9305 if (name[2] == 'c' &&
9306 name[3] == 't' &&
9307 name[4] == 'l')
9308 { /* ioctl */
9309 return -KEY_ioctl;
9310 }
9311
9312 goto unknown;
9313
9314 default:
9315 goto unknown;
9316 }
9317
9318 case 'l':
9319 switch (name[1])
9320 {
9321 case 'o':
9322 if (name[2] == 'c' &&
9323 name[3] == 'a' &&
9324 name[4] == 'l')
9325 { /* local */
9326 return KEY_local;
9327 }
9328
9329 goto unknown;
9330
9331 case 's':
9332 if (name[2] == 't' &&
9333 name[3] == 'a' &&
9334 name[4] == 't')
9335 { /* lstat */
9336 return -KEY_lstat;
9337 }
9338
9339 goto unknown;
9340
9341 default:
9342 goto unknown;
9343 }
9344
9345 case 'm':
9346 if (name[1] == 'k' &&
9347 name[2] == 'd' &&
9348 name[3] == 'i' &&
9349 name[4] == 'r')
9350 { /* mkdir */
9351 return -KEY_mkdir;
9352 }
9353
9354 goto unknown;
9355
9356 case 'p':
9357 if (name[1] == 'r' &&
9358 name[2] == 'i' &&
9359 name[3] == 'n' &&
9360 name[4] == 't')
9361 { /* print */
9362 return KEY_print;
9363 }
9364
9365 goto unknown;
9366
9367 case 'r':
9368 switch (name[1])
9369 {
9370 case 'e':
9371 if (name[2] == 's' &&
9372 name[3] == 'e' &&
9373 name[4] == 't')
9374 { /* reset */
9375 return -KEY_reset;
9376 }
9377
9378 goto unknown;
9379
9380 case 'm':
9381 if (name[2] == 'd' &&
9382 name[3] == 'i' &&
9383 name[4] == 'r')
9384 { /* rmdir */
9385 return -KEY_rmdir;
9386 }
9387
9388 goto unknown;
9389
9390 default:
9391 goto unknown;
9392 }
9393
9394 case 's':
9395 switch (name[1])
9396 {
9397 case 'e':
9398 if (name[2] == 'm' &&
9399 name[3] == 'o' &&
9400 name[4] == 'p')
9401 { /* semop */
9402 return -KEY_semop;
9403 }
9404
9405 goto unknown;
9406
9407 case 'h':
9408 if (name[2] == 'i' &&
9409 name[3] == 'f' &&
9410 name[4] == 't')
9411 { /* shift */
9412 return -KEY_shift;
9413 }
9414
9415 goto unknown;
9416
9417 case 'l':
9418 if (name[2] == 'e' &&
9419 name[3] == 'e' &&
9420 name[4] == 'p')
9421 { /* sleep */
9422 return -KEY_sleep;
9423 }
9424
9425 goto unknown;
9426
9427 case 'p':
9428 if (name[2] == 'l' &&
9429 name[3] == 'i' &&
9430 name[4] == 't')
9431 { /* split */
9432 return KEY_split;
9433 }
9434
9435 goto unknown;
9436
9437 case 'r':
9438 if (name[2] == 'a' &&
9439 name[3] == 'n' &&
9440 name[4] == 'd')
9441 { /* srand */
9442 return -KEY_srand;
9443 }
9444
9445 goto unknown;
9446
9447 case 't':
952306ac
RGS
9448 switch (name[2])
9449 {
9450 case 'a':
9451 if (name[3] == 't' &&
9452 name[4] == 'e')
9453 { /* state */
5458a98a 9454 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9455 }
4c3bbe0f 9456
952306ac
RGS
9457 goto unknown;
9458
9459 case 'u':
9460 if (name[3] == 'd' &&
9461 name[4] == 'y')
9462 { /* study */
9463 return KEY_study;
9464 }
9465
9466 goto unknown;
9467
9468 default:
9469 goto unknown;
9470 }
4c3bbe0f
MHM
9471
9472 default:
9473 goto unknown;
9474 }
9475
9476 case 't':
9477 if (name[1] == 'i' &&
9478 name[2] == 'm' &&
9479 name[3] == 'e' &&
9480 name[4] == 's')
9481 { /* times */
9482 return -KEY_times;
9483 }
9484
9485 goto unknown;
9486
9487 case 'u':
9488 switch (name[1])
9489 {
9490 case 'm':
9491 if (name[2] == 'a' &&
9492 name[3] == 's' &&
9493 name[4] == 'k')
9494 { /* umask */
9495 return -KEY_umask;
9496 }
9497
9498 goto unknown;
9499
9500 case 'n':
9501 switch (name[2])
9502 {
9503 case 'd':
9504 if (name[3] == 'e' &&
9505 name[4] == 'f')
9506 { /* undef */
9507 return KEY_undef;
9508 }
9509
9510 goto unknown;
9511
9512 case 't':
9513 if (name[3] == 'i')
9514 {
9515 switch (name[4])
9516 {
9517 case 'e':
9518 { /* untie */
1db4d195 9519 return -KEY_untie;
4c3bbe0f
MHM
9520 }
9521
4c3bbe0f
MHM
9522 case 'l':
9523 { /* until */
9524 return KEY_until;
9525 }
9526
4c3bbe0f
MHM
9527 default:
9528 goto unknown;
9529 }
9530 }
9531
9532 goto unknown;
9533
9534 default:
9535 goto unknown;
9536 }
9537
9538 case 't':
9539 if (name[2] == 'i' &&
9540 name[3] == 'm' &&
9541 name[4] == 'e')
9542 { /* utime */
9543 return -KEY_utime;
9544 }
9545
9546 goto unknown;
9547
9548 default:
9549 goto unknown;
9550 }
9551
9552 case 'w':
9553 switch (name[1])
9554 {
9555 case 'h':
9556 if (name[2] == 'i' &&
9557 name[3] == 'l' &&
9558 name[4] == 'e')
9559 { /* while */
9560 return KEY_while;
9561 }
9562
9563 goto unknown;
9564
9565 case 'r':
9566 if (name[2] == 'i' &&
9567 name[3] == 't' &&
9568 name[4] == 'e')
9569 { /* write */
9570 return -KEY_write;
9571 }
9572
9573 goto unknown;
9574
9575 default:
9576 goto unknown;
9577 }
9578
9579 default:
9580 goto unknown;
e2e1dd5a 9581 }
4c3bbe0f
MHM
9582
9583 case 6: /* 33 tokens of length 6 */
9584 switch (name[0])
9585 {
9586 case 'a':
9587 if (name[1] == 'c' &&
9588 name[2] == 'c' &&
9589 name[3] == 'e' &&
9590 name[4] == 'p' &&
9591 name[5] == 't')
9592 { /* accept */
9593 return -KEY_accept;
9594 }
9595
9596 goto unknown;
9597
9598 case 'c':
9599 switch (name[1])
9600 {
9601 case 'a':
9602 if (name[2] == 'l' &&
9603 name[3] == 'l' &&
9604 name[4] == 'e' &&
9605 name[5] == 'r')
9606 { /* caller */
9607 return -KEY_caller;
9608 }
9609
9610 goto unknown;
9611
9612 case 'h':
9613 if (name[2] == 'r' &&
9614 name[3] == 'o' &&
9615 name[4] == 'o' &&
9616 name[5] == 't')
9617 { /* chroot */
9618 return -KEY_chroot;
9619 }
9620
9621 goto unknown;
9622
9623 default:
9624 goto unknown;
9625 }
9626
9627 case 'd':
9628 if (name[1] == 'e' &&
9629 name[2] == 'l' &&
9630 name[3] == 'e' &&
9631 name[4] == 't' &&
9632 name[5] == 'e')
9633 { /* delete */
9634 return KEY_delete;
9635 }
9636
9637 goto unknown;
9638
9639 case 'e':
9640 switch (name[1])
9641 {
9642 case 'l':
9643 if (name[2] == 's' &&
9644 name[3] == 'e' &&
9645 name[4] == 'i' &&
9646 name[5] == 'f')
9647 { /* elseif */
9b387841 9648 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9649 }
9650
9651 goto unknown;
9652
9653 case 'x':
9654 if (name[2] == 'i' &&
9655 name[3] == 's' &&
9656 name[4] == 't' &&
9657 name[5] == 's')
9658 { /* exists */
9659 return KEY_exists;
9660 }
9661
9662 goto unknown;
9663
9664 default:
9665 goto unknown;
9666 }
9667
9668 case 'f':
9669 switch (name[1])
9670 {
9671 case 'i':
9672 if (name[2] == 'l' &&
9673 name[3] == 'e' &&
9674 name[4] == 'n' &&
9675 name[5] == 'o')
9676 { /* fileno */
9677 return -KEY_fileno;
9678 }
9679
9680 goto unknown;
9681
9682 case 'o':
9683 if (name[2] == 'r' &&
9684 name[3] == 'm' &&
9685 name[4] == 'a' &&
9686 name[5] == 't')
9687 { /* format */
9688 return KEY_format;
9689 }
9690
9691 goto unknown;
9692
9693 default:
9694 goto unknown;
9695 }
9696
9697 case 'g':
9698 if (name[1] == 'm' &&
9699 name[2] == 't' &&
9700 name[3] == 'i' &&
9701 name[4] == 'm' &&
9702 name[5] == 'e')
9703 { /* gmtime */
9704 return -KEY_gmtime;
9705 }
9706
9707 goto unknown;
9708
9709 case 'l':
9710 switch (name[1])
9711 {
9712 case 'e':
9713 if (name[2] == 'n' &&
9714 name[3] == 'g' &&
9715 name[4] == 't' &&
9716 name[5] == 'h')
9717 { /* length */
9718 return -KEY_length;
9719 }
9720
9721 goto unknown;
9722
9723 case 'i':
9724 if (name[2] == 's' &&
9725 name[3] == 't' &&
9726 name[4] == 'e' &&
9727 name[5] == 'n')
9728 { /* listen */
9729 return -KEY_listen;
9730 }
9731
9732 goto unknown;
9733
9734 default:
9735 goto unknown;
9736 }
9737
9738 case 'm':
9739 if (name[1] == 's' &&
9740 name[2] == 'g')
9741 {
9742 switch (name[3])
9743 {
9744 case 'c':
9745 if (name[4] == 't' &&
9746 name[5] == 'l')
9747 { /* msgctl */
9748 return -KEY_msgctl;
9749 }
9750
9751 goto unknown;
9752
9753 case 'g':
9754 if (name[4] == 'e' &&
9755 name[5] == 't')
9756 { /* msgget */
9757 return -KEY_msgget;
9758 }
9759
9760 goto unknown;
9761
9762 case 'r':
9763 if (name[4] == 'c' &&
9764 name[5] == 'v')
9765 { /* msgrcv */
9766 return -KEY_msgrcv;
9767 }
9768
9769 goto unknown;
9770
9771 case 's':
9772 if (name[4] == 'n' &&
9773 name[5] == 'd')
9774 { /* msgsnd */
9775 return -KEY_msgsnd;
9776 }
9777
9778 goto unknown;
9779
9780 default:
9781 goto unknown;
9782 }
9783 }
9784
9785 goto unknown;
9786
9787 case 'p':
9788 if (name[1] == 'r' &&
9789 name[2] == 'i' &&
9790 name[3] == 'n' &&
9791 name[4] == 't' &&
9792 name[5] == 'f')
9793 { /* printf */
9794 return KEY_printf;
9795 }
9796
9797 goto unknown;
9798
9799 case 'r':
9800 switch (name[1])
9801 {
9802 case 'e':
9803 switch (name[2])
9804 {
9805 case 'n':
9806 if (name[3] == 'a' &&
9807 name[4] == 'm' &&
9808 name[5] == 'e')
9809 { /* rename */
9810 return -KEY_rename;
9811 }
9812
9813 goto unknown;
9814
9815 case 't':
9816 if (name[3] == 'u' &&
9817 name[4] == 'r' &&
9818 name[5] == 'n')
9819 { /* return */
9820 return KEY_return;
9821 }
9822
9823 goto unknown;
9824
9825 default:
9826 goto unknown;
9827 }
9828
9829 case 'i':
9830 if (name[2] == 'n' &&
9831 name[3] == 'd' &&
9832 name[4] == 'e' &&
9833 name[5] == 'x')
9834 { /* rindex */
9835 return -KEY_rindex;
9836 }
9837
9838 goto unknown;
9839
9840 default:
9841 goto unknown;
9842 }
9843
9844 case 's':
9845 switch (name[1])
9846 {
9847 case 'c':
9848 if (name[2] == 'a' &&
9849 name[3] == 'l' &&
9850 name[4] == 'a' &&
9851 name[5] == 'r')
9852 { /* scalar */
9853 return KEY_scalar;
9854 }
9855
9856 goto unknown;
9857
9858 case 'e':
9859 switch (name[2])
9860 {
9861 case 'l':
9862 if (name[3] == 'e' &&
9863 name[4] == 'c' &&
9864 name[5] == 't')
9865 { /* select */
9866 return -KEY_select;
9867 }
9868
9869 goto unknown;
9870
9871 case 'm':
9872 switch (name[3])
9873 {
9874 case 'c':
9875 if (name[4] == 't' &&
9876 name[5] == 'l')
9877 { /* semctl */
9878 return -KEY_semctl;
9879 }
9880
9881 goto unknown;
9882
9883 case 'g':
9884 if (name[4] == 'e' &&
9885 name[5] == 't')
9886 { /* semget */
9887 return -KEY_semget;
9888 }
9889
9890 goto unknown;
9891
9892 default:
9893 goto unknown;
9894 }
9895
9896 default:
9897 goto unknown;
9898 }
9899
9900 case 'h':
9901 if (name[2] == 'm')
9902 {
9903 switch (name[3])
9904 {
9905 case 'c':
9906 if (name[4] == 't' &&
9907 name[5] == 'l')
9908 { /* shmctl */
9909 return -KEY_shmctl;
9910 }
9911
9912 goto unknown;
9913
9914 case 'g':
9915 if (name[4] == 'e' &&
9916 name[5] == 't')
9917 { /* shmget */
9918 return -KEY_shmget;
9919 }
9920
9921 goto unknown;
9922
9923 default:
9924 goto unknown;
9925 }
9926 }
9927
9928 goto unknown;
9929
9930 case 'o':
9931 if (name[2] == 'c' &&
9932 name[3] == 'k' &&
9933 name[4] == 'e' &&
9934 name[5] == 't')
9935 { /* socket */
9936 return -KEY_socket;
9937 }
9938
9939 goto unknown;
9940
9941 case 'p':
9942 if (name[2] == 'l' &&
9943 name[3] == 'i' &&
9944 name[4] == 'c' &&
9945 name[5] == 'e')
9946 { /* splice */
9947 return -KEY_splice;
9948 }
9949
9950 goto unknown;
9951
9952 case 'u':
9953 if (name[2] == 'b' &&
9954 name[3] == 's' &&
9955 name[4] == 't' &&
9956 name[5] == 'r')
9957 { /* substr */
9958 return -KEY_substr;
9959 }
9960
9961 goto unknown;
9962
9963 case 'y':
9964 if (name[2] == 's' &&
9965 name[3] == 't' &&
9966 name[4] == 'e' &&
9967 name[5] == 'm')
9968 { /* system */
9969 return -KEY_system;
9970 }
9971
9972 goto unknown;
9973
9974 default:
9975 goto unknown;
9976 }
9977
9978 case 'u':
9979 if (name[1] == 'n')
9980 {
9981 switch (name[2])
9982 {
9983 case 'l':
9984 switch (name[3])
9985 {
9986 case 'e':
9987 if (name[4] == 's' &&
9988 name[5] == 's')
9989 { /* unless */
9990 return KEY_unless;
9991 }
9992
9993 goto unknown;
9994
9995 case 'i':
9996 if (name[4] == 'n' &&
9997 name[5] == 'k')
9998 { /* unlink */
9999 return -KEY_unlink;
10000 }
10001
10002 goto unknown;
10003
10004 default:
10005 goto unknown;
10006 }
10007
10008 case 'p':
10009 if (name[3] == 'a' &&
10010 name[4] == 'c' &&
10011 name[5] == 'k')
10012 { /* unpack */
10013 return -KEY_unpack;
10014 }
10015
10016 goto unknown;
10017
10018 default:
10019 goto unknown;
10020 }
10021 }
10022
10023 goto unknown;
10024
10025 case 'v':
10026 if (name[1] == 'a' &&
10027 name[2] == 'l' &&
10028 name[3] == 'u' &&
10029 name[4] == 'e' &&
10030 name[5] == 's')
10031 { /* values */
10032 return -KEY_values;
10033 }
10034
10035 goto unknown;
10036
10037 default:
10038 goto unknown;
e2e1dd5a 10039 }
4c3bbe0f 10040
0d863452 10041 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
10042 switch (name[0])
10043 {
10044 case 'D':
10045 if (name[1] == 'E' &&
10046 name[2] == 'S' &&
10047 name[3] == 'T' &&
10048 name[4] == 'R' &&
10049 name[5] == 'O' &&
10050 name[6] == 'Y')
10051 { /* DESTROY */
10052 return KEY_DESTROY;
10053 }
10054
10055 goto unknown;
10056
10057 case '_':
10058 if (name[1] == '_' &&
10059 name[2] == 'E' &&
10060 name[3] == 'N' &&
10061 name[4] == 'D' &&
10062 name[5] == '_' &&
10063 name[6] == '_')
10064 { /* __END__ */
10065 return KEY___END__;
10066 }
10067
10068 goto unknown;
10069
10070 case 'b':
10071 if (name[1] == 'i' &&
10072 name[2] == 'n' &&
10073 name[3] == 'm' &&
10074 name[4] == 'o' &&
10075 name[5] == 'd' &&
10076 name[6] == 'e')
10077 { /* binmode */
10078 return -KEY_binmode;
10079 }
10080
10081 goto unknown;
10082
10083 case 'c':
10084 if (name[1] == 'o' &&
10085 name[2] == 'n' &&
10086 name[3] == 'n' &&
10087 name[4] == 'e' &&
10088 name[5] == 'c' &&
10089 name[6] == 't')
10090 { /* connect */
10091 return -KEY_connect;
10092 }
10093
10094 goto unknown;
10095
10096 case 'd':
10097 switch (name[1])
10098 {
10099 case 'b':
10100 if (name[2] == 'm' &&
10101 name[3] == 'o' &&
10102 name[4] == 'p' &&
10103 name[5] == 'e' &&
10104 name[6] == 'n')
10105 { /* dbmopen */
10106 return -KEY_dbmopen;
10107 }
10108
10109 goto unknown;
10110
10111 case 'e':
0d863452
RH
10112 if (name[2] == 'f')
10113 {
10114 switch (name[3])
10115 {
10116 case 'a':
10117 if (name[4] == 'u' &&
10118 name[5] == 'l' &&
10119 name[6] == 't')
10120 { /* default */
5458a98a 10121 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10122 }
10123
10124 goto unknown;
10125
10126 case 'i':
10127 if (name[4] == 'n' &&
952306ac
RGS
10128 name[5] == 'e' &&
10129 name[6] == 'd')
10130 { /* defined */
10131 return KEY_defined;
10132 }
4c3bbe0f 10133
952306ac 10134 goto unknown;
4c3bbe0f 10135
952306ac
RGS
10136 default:
10137 goto unknown;
10138 }
0d863452
RH
10139 }
10140
10141 goto unknown;
10142
10143 default:
10144 goto unknown;
10145 }
4c3bbe0f
MHM
10146
10147 case 'f':
10148 if (name[1] == 'o' &&
10149 name[2] == 'r' &&
10150 name[3] == 'e' &&
10151 name[4] == 'a' &&
10152 name[5] == 'c' &&
10153 name[6] == 'h')
10154 { /* foreach */
10155 return KEY_foreach;
10156 }
10157
10158 goto unknown;
10159
10160 case 'g':
10161 if (name[1] == 'e' &&
10162 name[2] == 't' &&
10163 name[3] == 'p')
10164 {
10165 switch (name[4])
10166 {
10167 case 'g':
10168 if (name[5] == 'r' &&
10169 name[6] == 'p')
10170 { /* getpgrp */
10171 return -KEY_getpgrp;
10172 }
10173
10174 goto unknown;
10175
10176 case 'p':
10177 if (name[5] == 'i' &&
10178 name[6] == 'd')
10179 { /* getppid */
10180 return -KEY_getppid;
10181 }
10182
10183 goto unknown;
10184
10185 default:
10186 goto unknown;
10187 }
10188 }
10189
10190 goto unknown;
10191
10192 case 'l':
10193 if (name[1] == 'c' &&
10194 name[2] == 'f' &&
10195 name[3] == 'i' &&
10196 name[4] == 'r' &&
10197 name[5] == 's' &&
10198 name[6] == 't')
10199 { /* lcfirst */
10200 return -KEY_lcfirst;
10201 }
10202
10203 goto unknown;
10204
10205 case 'o':
10206 if (name[1] == 'p' &&
10207 name[2] == 'e' &&
10208 name[3] == 'n' &&
10209 name[4] == 'd' &&
10210 name[5] == 'i' &&
10211 name[6] == 'r')
10212 { /* opendir */
10213 return -KEY_opendir;
10214 }
10215
10216 goto unknown;
10217
10218 case 'p':
10219 if (name[1] == 'a' &&
10220 name[2] == 'c' &&
10221 name[3] == 'k' &&
10222 name[4] == 'a' &&
10223 name[5] == 'g' &&
10224 name[6] == 'e')
10225 { /* package */
10226 return KEY_package;
10227 }
10228
10229 goto unknown;
10230
10231 case 'r':
10232 if (name[1] == 'e')
10233 {
10234 switch (name[2])
10235 {
10236 case 'a':
10237 if (name[3] == 'd' &&
10238 name[4] == 'd' &&
10239 name[5] == 'i' &&
10240 name[6] == 'r')
10241 { /* readdir */
10242 return -KEY_readdir;
10243 }
10244
10245 goto unknown;
10246
10247 case 'q':
10248 if (name[3] == 'u' &&
10249 name[4] == 'i' &&
10250 name[5] == 'r' &&
10251 name[6] == 'e')
10252 { /* require */
10253 return KEY_require;
10254 }
10255
10256 goto unknown;
10257
10258 case 'v':
10259 if (name[3] == 'e' &&
10260 name[4] == 'r' &&
10261 name[5] == 's' &&
10262 name[6] == 'e')
10263 { /* reverse */
10264 return -KEY_reverse;
10265 }
10266
10267 goto unknown;
10268
10269 default:
10270 goto unknown;
10271 }
10272 }
10273
10274 goto unknown;
10275
10276 case 's':
10277 switch (name[1])
10278 {
10279 case 'e':
10280 switch (name[2])
10281 {
10282 case 'e':
10283 if (name[3] == 'k' &&
10284 name[4] == 'd' &&
10285 name[5] == 'i' &&
10286 name[6] == 'r')
10287 { /* seekdir */
10288 return -KEY_seekdir;
10289 }
10290
10291 goto unknown;
10292
10293 case 't':
10294 if (name[3] == 'p' &&
10295 name[4] == 'g' &&
10296 name[5] == 'r' &&
10297 name[6] == 'p')
10298 { /* setpgrp */
10299 return -KEY_setpgrp;
10300 }
10301
10302 goto unknown;
10303
10304 default:
10305 goto unknown;
10306 }
10307
10308 case 'h':
10309 if (name[2] == 'm' &&
10310 name[3] == 'r' &&
10311 name[4] == 'e' &&
10312 name[5] == 'a' &&
10313 name[6] == 'd')
10314 { /* shmread */
10315 return -KEY_shmread;
10316 }
10317
10318 goto unknown;
10319
10320 case 'p':
10321 if (name[2] == 'r' &&
10322 name[3] == 'i' &&
10323 name[4] == 'n' &&
10324 name[5] == 't' &&
10325 name[6] == 'f')
10326 { /* sprintf */
10327 return -KEY_sprintf;
10328 }
10329
10330 goto unknown;
10331
10332 case 'y':
10333 switch (name[2])
10334 {
10335 case 'm':
10336 if (name[3] == 'l' &&
10337 name[4] == 'i' &&
10338 name[5] == 'n' &&
10339 name[6] == 'k')
10340 { /* symlink */
10341 return -KEY_symlink;
10342 }
10343
10344 goto unknown;
10345
10346 case 's':
10347 switch (name[3])
10348 {
10349 case 'c':
10350 if (name[4] == 'a' &&
10351 name[5] == 'l' &&
10352 name[6] == 'l')
10353 { /* syscall */
10354 return -KEY_syscall;
10355 }
10356
10357 goto unknown;
10358
10359 case 'o':
10360 if (name[4] == 'p' &&
10361 name[5] == 'e' &&
10362 name[6] == 'n')
10363 { /* sysopen */
10364 return -KEY_sysopen;
10365 }
10366
10367 goto unknown;
10368
10369 case 'r':
10370 if (name[4] == 'e' &&
10371 name[5] == 'a' &&
10372 name[6] == 'd')
10373 { /* sysread */
10374 return -KEY_sysread;
10375 }
10376
10377 goto unknown;
10378
10379 case 's':
10380 if (name[4] == 'e' &&
10381 name[5] == 'e' &&
10382 name[6] == 'k')
10383 { /* sysseek */
10384 return -KEY_sysseek;
10385 }
10386
10387 goto unknown;
10388
10389 default:
10390 goto unknown;
10391 }
10392
10393 default:
10394 goto unknown;
10395 }
10396
10397 default:
10398 goto unknown;
10399 }
10400
10401 case 't':
10402 if (name[1] == 'e' &&
10403 name[2] == 'l' &&
10404 name[3] == 'l' &&
10405 name[4] == 'd' &&
10406 name[5] == 'i' &&
10407 name[6] == 'r')
10408 { /* telldir */
10409 return -KEY_telldir;
10410 }
10411
10412 goto unknown;
10413
10414 case 'u':
10415 switch (name[1])
10416 {
10417 case 'c':
10418 if (name[2] == 'f' &&
10419 name[3] == 'i' &&
10420 name[4] == 'r' &&
10421 name[5] == 's' &&
10422 name[6] == 't')
10423 { /* ucfirst */
10424 return -KEY_ucfirst;
10425 }
10426
10427 goto unknown;
10428
10429 case 'n':
10430 if (name[2] == 's' &&
10431 name[3] == 'h' &&
10432 name[4] == 'i' &&
10433 name[5] == 'f' &&
10434 name[6] == 't')
10435 { /* unshift */
10436 return -KEY_unshift;
10437 }
10438
10439 goto unknown;
10440
10441 default:
10442 goto unknown;
10443 }
10444
10445 case 'w':
10446 if (name[1] == 'a' &&
10447 name[2] == 'i' &&
10448 name[3] == 't' &&
10449 name[4] == 'p' &&
10450 name[5] == 'i' &&
10451 name[6] == 'd')
10452 { /* waitpid */
10453 return -KEY_waitpid;
10454 }
10455
10456 goto unknown;
10457
10458 default:
10459 goto unknown;
10460 }
10461
10462 case 8: /* 26 tokens of length 8 */
10463 switch (name[0])
10464 {
10465 case 'A':
10466 if (name[1] == 'U' &&
10467 name[2] == 'T' &&
10468 name[3] == 'O' &&
10469 name[4] == 'L' &&
10470 name[5] == 'O' &&
10471 name[6] == 'A' &&
10472 name[7] == 'D')
10473 { /* AUTOLOAD */
10474 return KEY_AUTOLOAD;
10475 }
10476
10477 goto unknown;
10478
10479 case '_':
10480 if (name[1] == '_')
10481 {
10482 switch (name[2])
10483 {
10484 case 'D':
10485 if (name[3] == 'A' &&
10486 name[4] == 'T' &&
10487 name[5] == 'A' &&
10488 name[6] == '_' &&
10489 name[7] == '_')
10490 { /* __DATA__ */
10491 return KEY___DATA__;
10492 }
10493
10494 goto unknown;
10495
10496 case 'F':
10497 if (name[3] == 'I' &&
10498 name[4] == 'L' &&
10499 name[5] == 'E' &&
10500 name[6] == '_' &&
10501 name[7] == '_')
10502 { /* __FILE__ */
10503 return -KEY___FILE__;
10504 }
10505
10506 goto unknown;
10507
10508 case 'L':
10509 if (name[3] == 'I' &&
10510 name[4] == 'N' &&
10511 name[5] == 'E' &&
10512 name[6] == '_' &&
10513 name[7] == '_')
10514 { /* __LINE__ */
10515 return -KEY___LINE__;
10516 }
10517
10518 goto unknown;
10519
10520 default:
10521 goto unknown;
10522 }
10523 }
10524
10525 goto unknown;
10526
10527 case 'c':
10528 switch (name[1])
10529 {
10530 case 'l':
10531 if (name[2] == 'o' &&
10532 name[3] == 's' &&
10533 name[4] == 'e' &&
10534 name[5] == 'd' &&
10535 name[6] == 'i' &&
10536 name[7] == 'r')
10537 { /* closedir */
10538 return -KEY_closedir;
10539 }
10540
10541 goto unknown;
10542
10543 case 'o':
10544 if (name[2] == 'n' &&
10545 name[3] == 't' &&
10546 name[4] == 'i' &&
10547 name[5] == 'n' &&
10548 name[6] == 'u' &&
10549 name[7] == 'e')
10550 { /* continue */
10551 return -KEY_continue;
10552 }
10553
10554 goto unknown;
10555
10556 default:
10557 goto unknown;
10558 }
10559
10560 case 'd':
10561 if (name[1] == 'b' &&
10562 name[2] == 'm' &&
10563 name[3] == 'c' &&
10564 name[4] == 'l' &&
10565 name[5] == 'o' &&
10566 name[6] == 's' &&
10567 name[7] == 'e')
10568 { /* dbmclose */
10569 return -KEY_dbmclose;
10570 }
10571
10572 goto unknown;
10573
10574 case 'e':
10575 if (name[1] == 'n' &&
10576 name[2] == 'd')
10577 {
10578 switch (name[3])
10579 {
10580 case 'g':
10581 if (name[4] == 'r' &&
10582 name[5] == 'e' &&
10583 name[6] == 'n' &&
10584 name[7] == 't')
10585 { /* endgrent */
10586 return -KEY_endgrent;
10587 }
10588
10589 goto unknown;
10590
10591 case 'p':
10592 if (name[4] == 'w' &&
10593 name[5] == 'e' &&
10594 name[6] == 'n' &&
10595 name[7] == 't')
10596 { /* endpwent */
10597 return -KEY_endpwent;
10598 }
10599
10600 goto unknown;
10601
10602 default:
10603 goto unknown;
10604 }
10605 }
10606
10607 goto unknown;
10608
10609 case 'f':
10610 if (name[1] == 'o' &&
10611 name[2] == 'r' &&
10612 name[3] == 'm' &&
10613 name[4] == 'l' &&
10614 name[5] == 'i' &&
10615 name[6] == 'n' &&
10616 name[7] == 'e')
10617 { /* formline */
10618 return -KEY_formline;
10619 }
10620
10621 goto unknown;
10622
10623 case 'g':
10624 if (name[1] == 'e' &&
10625 name[2] == 't')
10626 {
10627 switch (name[3])
10628 {
10629 case 'g':
10630 if (name[4] == 'r')
10631 {
10632 switch (name[5])
10633 {
10634 case 'e':
10635 if (name[6] == 'n' &&
10636 name[7] == 't')
10637 { /* getgrent */
10638 return -KEY_getgrent;
10639 }
10640
10641 goto unknown;
10642
10643 case 'g':
10644 if (name[6] == 'i' &&
10645 name[7] == 'd')
10646 { /* getgrgid */
10647 return -KEY_getgrgid;
10648 }
10649
10650 goto unknown;
10651
10652 case 'n':
10653 if (name[6] == 'a' &&
10654 name[7] == 'm')
10655 { /* getgrnam */
10656 return -KEY_getgrnam;
10657 }
10658
10659 goto unknown;
10660
10661 default:
10662 goto unknown;
10663 }
10664 }
10665
10666 goto unknown;
10667
10668 case 'l':
10669 if (name[4] == 'o' &&
10670 name[5] == 'g' &&
10671 name[6] == 'i' &&
10672 name[7] == 'n')
10673 { /* getlogin */
10674 return -KEY_getlogin;
10675 }
10676
10677 goto unknown;
10678
10679 case 'p':
10680 if (name[4] == 'w')
10681 {
10682 switch (name[5])
10683 {
10684 case 'e':
10685 if (name[6] == 'n' &&
10686 name[7] == 't')
10687 { /* getpwent */
10688 return -KEY_getpwent;
10689 }
10690
10691 goto unknown;
10692
10693 case 'n':
10694 if (name[6] == 'a' &&
10695 name[7] == 'm')
10696 { /* getpwnam */
10697 return -KEY_getpwnam;
10698 }
10699
10700 goto unknown;
10701
10702 case 'u':
10703 if (name[6] == 'i' &&
10704 name[7] == 'd')
10705 { /* getpwuid */
10706 return -KEY_getpwuid;
10707 }
10708
10709 goto unknown;
10710
10711 default:
10712 goto unknown;
10713 }
10714 }
10715
10716 goto unknown;
10717
10718 default:
10719 goto unknown;
10720 }
10721 }
10722
10723 goto unknown;
10724
10725 case 'r':
10726 if (name[1] == 'e' &&
10727 name[2] == 'a' &&
10728 name[3] == 'd')
10729 {
10730 switch (name[4])
10731 {
10732 case 'l':
10733 if (name[5] == 'i' &&
10734 name[6] == 'n')
10735 {
10736 switch (name[7])
10737 {
10738 case 'e':
10739 { /* readline */
10740 return -KEY_readline;
10741 }
10742
4c3bbe0f
MHM
10743 case 'k':
10744 { /* readlink */
10745 return -KEY_readlink;
10746 }
10747
4c3bbe0f
MHM
10748 default:
10749 goto unknown;
10750 }
10751 }
10752
10753 goto unknown;
10754
10755 case 'p':
10756 if (name[5] == 'i' &&
10757 name[6] == 'p' &&
10758 name[7] == 'e')
10759 { /* readpipe */
10760 return -KEY_readpipe;
10761 }
10762
10763 goto unknown;
10764
10765 default:
10766 goto unknown;
10767 }
10768 }
10769
10770 goto unknown;
10771
10772 case 's':
10773 switch (name[1])
10774 {
10775 case 'e':
10776 if (name[2] == 't')
10777 {
10778 switch (name[3])
10779 {
10780 case 'g':
10781 if (name[4] == 'r' &&
10782 name[5] == 'e' &&
10783 name[6] == 'n' &&
10784 name[7] == 't')
10785 { /* setgrent */
10786 return -KEY_setgrent;
10787 }
10788
10789 goto unknown;
10790
10791 case 'p':
10792 if (name[4] == 'w' &&
10793 name[5] == 'e' &&
10794 name[6] == 'n' &&
10795 name[7] == 't')
10796 { /* setpwent */
10797 return -KEY_setpwent;
10798 }
10799
10800 goto unknown;
10801
10802 default:
10803 goto unknown;
10804 }
10805 }
10806
10807 goto unknown;
10808
10809 case 'h':
10810 switch (name[2])
10811 {
10812 case 'm':
10813 if (name[3] == 'w' &&
10814 name[4] == 'r' &&
10815 name[5] == 'i' &&
10816 name[6] == 't' &&
10817 name[7] == 'e')
10818 { /* shmwrite */
10819 return -KEY_shmwrite;
10820 }
10821
10822 goto unknown;
10823
10824 case 'u':
10825 if (name[3] == 't' &&
10826 name[4] == 'd' &&
10827 name[5] == 'o' &&
10828 name[6] == 'w' &&
10829 name[7] == 'n')
10830 { /* shutdown */
10831 return -KEY_shutdown;
10832 }
10833
10834 goto unknown;
10835
10836 default:
10837 goto unknown;
10838 }
10839
10840 case 'y':
10841 if (name[2] == 's' &&
10842 name[3] == 'w' &&
10843 name[4] == 'r' &&
10844 name[5] == 'i' &&
10845 name[6] == 't' &&
10846 name[7] == 'e')
10847 { /* syswrite */
10848 return -KEY_syswrite;
10849 }
10850
10851 goto unknown;
10852
10853 default:
10854 goto unknown;
10855 }
10856
10857 case 't':
10858 if (name[1] == 'r' &&
10859 name[2] == 'u' &&
10860 name[3] == 'n' &&
10861 name[4] == 'c' &&
10862 name[5] == 'a' &&
10863 name[6] == 't' &&
10864 name[7] == 'e')
10865 { /* truncate */
10866 return -KEY_truncate;
10867 }
10868
10869 goto unknown;
10870
10871 default:
10872 goto unknown;
10873 }
10874
3c10abe3 10875 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10876 switch (name[0])
10877 {
3c10abe3
AG
10878 case 'U':
10879 if (name[1] == 'N' &&
10880 name[2] == 'I' &&
10881 name[3] == 'T' &&
10882 name[4] == 'C' &&
10883 name[5] == 'H' &&
10884 name[6] == 'E' &&
10885 name[7] == 'C' &&
10886 name[8] == 'K')
10887 { /* UNITCHECK */
10888 return KEY_UNITCHECK;
10889 }
10890
10891 goto unknown;
10892
4c3bbe0f
MHM
10893 case 'e':
10894 if (name[1] == 'n' &&
10895 name[2] == 'd' &&
10896 name[3] == 'n' &&
10897 name[4] == 'e' &&
10898 name[5] == 't' &&
10899 name[6] == 'e' &&
10900 name[7] == 'n' &&
10901 name[8] == 't')
10902 { /* endnetent */
10903 return -KEY_endnetent;
10904 }
10905
10906 goto unknown;
10907
10908 case 'g':
10909 if (name[1] == 'e' &&
10910 name[2] == 't' &&
10911 name[3] == 'n' &&
10912 name[4] == 'e' &&
10913 name[5] == 't' &&
10914 name[6] == 'e' &&
10915 name[7] == 'n' &&
10916 name[8] == 't')
10917 { /* getnetent */
10918 return -KEY_getnetent;
10919 }
10920
10921 goto unknown;
10922
10923 case 'l':
10924 if (name[1] == 'o' &&
10925 name[2] == 'c' &&
10926 name[3] == 'a' &&
10927 name[4] == 'l' &&
10928 name[5] == 't' &&
10929 name[6] == 'i' &&
10930 name[7] == 'm' &&
10931 name[8] == 'e')
10932 { /* localtime */
10933 return -KEY_localtime;
10934 }
10935
10936 goto unknown;
10937
10938 case 'p':
10939 if (name[1] == 'r' &&
10940 name[2] == 'o' &&
10941 name[3] == 't' &&
10942 name[4] == 'o' &&
10943 name[5] == 't' &&
10944 name[6] == 'y' &&
10945 name[7] == 'p' &&
10946 name[8] == 'e')
10947 { /* prototype */
10948 return KEY_prototype;
10949 }
10950
10951 goto unknown;
10952
10953 case 'q':
10954 if (name[1] == 'u' &&
10955 name[2] == 'o' &&
10956 name[3] == 't' &&
10957 name[4] == 'e' &&
10958 name[5] == 'm' &&
10959 name[6] == 'e' &&
10960 name[7] == 't' &&
10961 name[8] == 'a')
10962 { /* quotemeta */
10963 return -KEY_quotemeta;
10964 }
10965
10966 goto unknown;
10967
10968 case 'r':
10969 if (name[1] == 'e' &&
10970 name[2] == 'w' &&
10971 name[3] == 'i' &&
10972 name[4] == 'n' &&
10973 name[5] == 'd' &&
10974 name[6] == 'd' &&
10975 name[7] == 'i' &&
10976 name[8] == 'r')
10977 { /* rewinddir */
10978 return -KEY_rewinddir;
10979 }
10980
10981 goto unknown;
10982
10983 case 's':
10984 if (name[1] == 'e' &&
10985 name[2] == 't' &&
10986 name[3] == 'n' &&
10987 name[4] == 'e' &&
10988 name[5] == 't' &&
10989 name[6] == 'e' &&
10990 name[7] == 'n' &&
10991 name[8] == 't')
10992 { /* setnetent */
10993 return -KEY_setnetent;
10994 }
10995
10996 goto unknown;
10997
10998 case 'w':
10999 if (name[1] == 'a' &&
11000 name[2] == 'n' &&
11001 name[3] == 't' &&
11002 name[4] == 'a' &&
11003 name[5] == 'r' &&
11004 name[6] == 'r' &&
11005 name[7] == 'a' &&
11006 name[8] == 'y')
11007 { /* wantarray */
11008 return -KEY_wantarray;
11009 }
11010
11011 goto unknown;
11012
11013 default:
11014 goto unknown;
11015 }
11016
11017 case 10: /* 9 tokens of length 10 */
11018 switch (name[0])
11019 {
11020 case 'e':
11021 if (name[1] == 'n' &&
11022 name[2] == 'd')
11023 {
11024 switch (name[3])
11025 {
11026 case 'h':
11027 if (name[4] == 'o' &&
11028 name[5] == 's' &&
11029 name[6] == 't' &&
11030 name[7] == 'e' &&
11031 name[8] == 'n' &&
11032 name[9] == 't')
11033 { /* endhostent */
11034 return -KEY_endhostent;
11035 }
11036
11037 goto unknown;
11038
11039 case 's':
11040 if (name[4] == 'e' &&
11041 name[5] == 'r' &&
11042 name[6] == 'v' &&
11043 name[7] == 'e' &&
11044 name[8] == 'n' &&
11045 name[9] == 't')
11046 { /* endservent */
11047 return -KEY_endservent;
11048 }
11049
11050 goto unknown;
11051
11052 default:
11053 goto unknown;
11054 }
11055 }
11056
11057 goto unknown;
11058
11059 case 'g':
11060 if (name[1] == 'e' &&
11061 name[2] == 't')
11062 {
11063 switch (name[3])
11064 {
11065 case 'h':
11066 if (name[4] == 'o' &&
11067 name[5] == 's' &&
11068 name[6] == 't' &&
11069 name[7] == 'e' &&
11070 name[8] == 'n' &&
11071 name[9] == 't')
11072 { /* gethostent */
11073 return -KEY_gethostent;
11074 }
11075
11076 goto unknown;
11077
11078 case 's':
11079 switch (name[4])
11080 {
11081 case 'e':
11082 if (name[5] == 'r' &&
11083 name[6] == 'v' &&
11084 name[7] == 'e' &&
11085 name[8] == 'n' &&
11086 name[9] == 't')
11087 { /* getservent */
11088 return -KEY_getservent;
11089 }
11090
11091 goto unknown;
11092
11093 case 'o':
11094 if (name[5] == 'c' &&
11095 name[6] == 'k' &&
11096 name[7] == 'o' &&
11097 name[8] == 'p' &&
11098 name[9] == 't')
11099 { /* getsockopt */
11100 return -KEY_getsockopt;
11101 }
11102
11103 goto unknown;
11104
11105 default:
11106 goto unknown;
11107 }
11108
11109 default:
11110 goto unknown;
11111 }
11112 }
11113
11114 goto unknown;
11115
11116 case 's':
11117 switch (name[1])
11118 {
11119 case 'e':
11120 if (name[2] == 't')
11121 {
11122 switch (name[3])
11123 {
11124 case 'h':
11125 if (name[4] == 'o' &&
11126 name[5] == 's' &&
11127 name[6] == 't' &&
11128 name[7] == 'e' &&
11129 name[8] == 'n' &&
11130 name[9] == 't')
11131 { /* sethostent */
11132 return -KEY_sethostent;
11133 }
11134
11135 goto unknown;
11136
11137 case 's':
11138 switch (name[4])
11139 {
11140 case 'e':
11141 if (name[5] == 'r' &&
11142 name[6] == 'v' &&
11143 name[7] == 'e' &&
11144 name[8] == 'n' &&
11145 name[9] == 't')
11146 { /* setservent */
11147 return -KEY_setservent;
11148 }
11149
11150 goto unknown;
11151
11152 case 'o':
11153 if (name[5] == 'c' &&
11154 name[6] == 'k' &&
11155 name[7] == 'o' &&
11156 name[8] == 'p' &&
11157 name[9] == 't')
11158 { /* setsockopt */
11159 return -KEY_setsockopt;
11160 }
11161
11162 goto unknown;
11163
11164 default:
11165 goto unknown;
11166 }
11167
11168 default:
11169 goto unknown;
11170 }
11171 }
11172
11173 goto unknown;
11174
11175 case 'o':
11176 if (name[2] == 'c' &&
11177 name[3] == 'k' &&
11178 name[4] == 'e' &&
11179 name[5] == 't' &&
11180 name[6] == 'p' &&
11181 name[7] == 'a' &&
11182 name[8] == 'i' &&
11183 name[9] == 'r')
11184 { /* socketpair */
11185 return -KEY_socketpair;
11186 }
11187
11188 goto unknown;
11189
11190 default:
11191 goto unknown;
11192 }
11193
11194 default:
11195 goto unknown;
e2e1dd5a 11196 }
4c3bbe0f
MHM
11197
11198 case 11: /* 8 tokens of length 11 */
11199 switch (name[0])
11200 {
11201 case '_':
11202 if (name[1] == '_' &&
11203 name[2] == 'P' &&
11204 name[3] == 'A' &&
11205 name[4] == 'C' &&
11206 name[5] == 'K' &&
11207 name[6] == 'A' &&
11208 name[7] == 'G' &&
11209 name[8] == 'E' &&
11210 name[9] == '_' &&
11211 name[10] == '_')
11212 { /* __PACKAGE__ */
11213 return -KEY___PACKAGE__;
11214 }
11215
11216 goto unknown;
11217
11218 case 'e':
11219 if (name[1] == 'n' &&
11220 name[2] == 'd' &&
11221 name[3] == 'p' &&
11222 name[4] == 'r' &&
11223 name[5] == 'o' &&
11224 name[6] == 't' &&
11225 name[7] == 'o' &&
11226 name[8] == 'e' &&
11227 name[9] == 'n' &&
11228 name[10] == 't')
11229 { /* endprotoent */
11230 return -KEY_endprotoent;
11231 }
11232
11233 goto unknown;
11234
11235 case 'g':
11236 if (name[1] == 'e' &&
11237 name[2] == 't')
11238 {
11239 switch (name[3])
11240 {
11241 case 'p':
11242 switch (name[4])
11243 {
11244 case 'e':
11245 if (name[5] == 'e' &&
11246 name[6] == 'r' &&
11247 name[7] == 'n' &&
11248 name[8] == 'a' &&
11249 name[9] == 'm' &&
11250 name[10] == 'e')
11251 { /* getpeername */
11252 return -KEY_getpeername;
11253 }
11254
11255 goto unknown;
11256
11257 case 'r':
11258 switch (name[5])
11259 {
11260 case 'i':
11261 if (name[6] == 'o' &&
11262 name[7] == 'r' &&
11263 name[8] == 'i' &&
11264 name[9] == 't' &&
11265 name[10] == 'y')
11266 { /* getpriority */
11267 return -KEY_getpriority;
11268 }
11269
11270 goto unknown;
11271
11272 case 'o':
11273 if (name[6] == 't' &&
11274 name[7] == 'o' &&
11275 name[8] == 'e' &&
11276 name[9] == 'n' &&
11277 name[10] == 't')
11278 { /* getprotoent */
11279 return -KEY_getprotoent;
11280 }
11281
11282 goto unknown;
11283
11284 default:
11285 goto unknown;
11286 }
11287
11288 default:
11289 goto unknown;
11290 }
11291
11292 case 's':
11293 if (name[4] == 'o' &&
11294 name[5] == 'c' &&
11295 name[6] == 'k' &&
11296 name[7] == 'n' &&
11297 name[8] == 'a' &&
11298 name[9] == 'm' &&
11299 name[10] == 'e')
11300 { /* getsockname */
11301 return -KEY_getsockname;
11302 }
11303
11304 goto unknown;
11305
11306 default:
11307 goto unknown;
11308 }
11309 }
11310
11311 goto unknown;
11312
11313 case 's':
11314 if (name[1] == 'e' &&
11315 name[2] == 't' &&
11316 name[3] == 'p' &&
11317 name[4] == 'r')
11318 {
11319 switch (name[5])
11320 {
11321 case 'i':
11322 if (name[6] == 'o' &&
11323 name[7] == 'r' &&
11324 name[8] == 'i' &&
11325 name[9] == 't' &&
11326 name[10] == 'y')
11327 { /* setpriority */
11328 return -KEY_setpriority;
11329 }
11330
11331 goto unknown;
11332
11333 case 'o':
11334 if (name[6] == 't' &&
11335 name[7] == 'o' &&
11336 name[8] == 'e' &&
11337 name[9] == 'n' &&
11338 name[10] == 't')
11339 { /* setprotoent */
11340 return -KEY_setprotoent;
11341 }
11342
11343 goto unknown;
11344
11345 default:
11346 goto unknown;
11347 }
11348 }
11349
11350 goto unknown;
11351
11352 default:
11353 goto unknown;
e2e1dd5a 11354 }
4c3bbe0f
MHM
11355
11356 case 12: /* 2 tokens of length 12 */
11357 if (name[0] == 'g' &&
11358 name[1] == 'e' &&
11359 name[2] == 't' &&
11360 name[3] == 'n' &&
11361 name[4] == 'e' &&
11362 name[5] == 't' &&
11363 name[6] == 'b' &&
11364 name[7] == 'y')
11365 {
11366 switch (name[8])
11367 {
11368 case 'a':
11369 if (name[9] == 'd' &&
11370 name[10] == 'd' &&
11371 name[11] == 'r')
11372 { /* getnetbyaddr */
11373 return -KEY_getnetbyaddr;
11374 }
11375
11376 goto unknown;
11377
11378 case 'n':
11379 if (name[9] == 'a' &&
11380 name[10] == 'm' &&
11381 name[11] == 'e')
11382 { /* getnetbyname */
11383 return -KEY_getnetbyname;
11384 }
11385
11386 goto unknown;
11387
11388 default:
11389 goto unknown;
11390 }
e2e1dd5a 11391 }
4c3bbe0f
MHM
11392
11393 goto unknown;
11394
11395 case 13: /* 4 tokens of length 13 */
11396 if (name[0] == 'g' &&
11397 name[1] == 'e' &&
11398 name[2] == 't')
11399 {
11400 switch (name[3])
11401 {
11402 case 'h':
11403 if (name[4] == 'o' &&
11404 name[5] == 's' &&
11405 name[6] == 't' &&
11406 name[7] == 'b' &&
11407 name[8] == 'y')
11408 {
11409 switch (name[9])
11410 {
11411 case 'a':
11412 if (name[10] == 'd' &&
11413 name[11] == 'd' &&
11414 name[12] == 'r')
11415 { /* gethostbyaddr */
11416 return -KEY_gethostbyaddr;
11417 }
11418
11419 goto unknown;
11420
11421 case 'n':
11422 if (name[10] == 'a' &&
11423 name[11] == 'm' &&
11424 name[12] == 'e')
11425 { /* gethostbyname */
11426 return -KEY_gethostbyname;
11427 }
11428
11429 goto unknown;
11430
11431 default:
11432 goto unknown;
11433 }
11434 }
11435
11436 goto unknown;
11437
11438 case 's':
11439 if (name[4] == 'e' &&
11440 name[5] == 'r' &&
11441 name[6] == 'v' &&
11442 name[7] == 'b' &&
11443 name[8] == 'y')
11444 {
11445 switch (name[9])
11446 {
11447 case 'n':
11448 if (name[10] == 'a' &&
11449 name[11] == 'm' &&
11450 name[12] == 'e')
11451 { /* getservbyname */
11452 return -KEY_getservbyname;
11453 }
11454
11455 goto unknown;
11456
11457 case 'p':
11458 if (name[10] == 'o' &&
11459 name[11] == 'r' &&
11460 name[12] == 't')
11461 { /* getservbyport */
11462 return -KEY_getservbyport;
11463 }
11464
11465 goto unknown;
11466
11467 default:
11468 goto unknown;
11469 }
11470 }
11471
11472 goto unknown;
11473
11474 default:
11475 goto unknown;
11476 }
e2e1dd5a 11477 }
4c3bbe0f
MHM
11478
11479 goto unknown;
11480
11481 case 14: /* 1 tokens of length 14 */
11482 if (name[0] == 'g' &&
11483 name[1] == 'e' &&
11484 name[2] == 't' &&
11485 name[3] == 'p' &&
11486 name[4] == 'r' &&
11487 name[5] == 'o' &&
11488 name[6] == 't' &&
11489 name[7] == 'o' &&
11490 name[8] == 'b' &&
11491 name[9] == 'y' &&
11492 name[10] == 'n' &&
11493 name[11] == 'a' &&
11494 name[12] == 'm' &&
11495 name[13] == 'e')
11496 { /* getprotobyname */
11497 return -KEY_getprotobyname;
11498 }
11499
11500 goto unknown;
11501
11502 case 16: /* 1 tokens of length 16 */
11503 if (name[0] == 'g' &&
11504 name[1] == 'e' &&
11505 name[2] == 't' &&
11506 name[3] == 'p' &&
11507 name[4] == 'r' &&
11508 name[5] == 'o' &&
11509 name[6] == 't' &&
11510 name[7] == 'o' &&
11511 name[8] == 'b' &&
11512 name[9] == 'y' &&
11513 name[10] == 'n' &&
11514 name[11] == 'u' &&
11515 name[12] == 'm' &&
11516 name[13] == 'b' &&
11517 name[14] == 'e' &&
11518 name[15] == 'r')
11519 { /* getprotobynumber */
11520 return -KEY_getprotobynumber;
11521 }
11522
11523 goto unknown;
11524
11525 default:
11526 goto unknown;
e2e1dd5a 11527 }
4c3bbe0f
MHM
11528
11529unknown:
e2e1dd5a 11530 return 0;
a687059c
LW
11531}
11532
76e3520e 11533STATIC void
c94115d8 11534S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11535{
97aff369 11536 dVAR;
2f3197b3 11537
7918f24d
NC
11538 PERL_ARGS_ASSERT_CHECKCOMMA;
11539
d008e5eb 11540 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11541 if (ckWARN(WARN_SYNTAX)) {
11542 int level = 1;
26ff0806 11543 const char *w;
d008e5eb
GS
11544 for (w = s+2; *w && level; w++) {
11545 if (*w == '(')
11546 ++level;
11547 else if (*w == ')')
11548 --level;
11549 }
888fea98
NC
11550 while (isSPACE(*w))
11551 ++w;
b1439985
RGS
11552 /* the list of chars below is for end of statements or
11553 * block / parens, boolean operators (&&, ||, //) and branch
11554 * constructs (or, and, if, until, unless, while, err, for).
11555 * Not a very solid hack... */
11556 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11557 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11558 "%s (...) interpreted as function",name);
d008e5eb 11559 }
2f3197b3 11560 }
3280af22 11561 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11562 s++;
a687059c
LW
11563 if (*s == '(')
11564 s++;
3280af22 11565 while (s < PL_bufend && isSPACE(*s))
a687059c 11566 s++;
7e2040f0 11567 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11568 const char * const w = s++;
7e2040f0 11569 while (isALNUM_lazy_if(s,UTF))
a687059c 11570 s++;
3280af22 11571 while (s < PL_bufend && isSPACE(*s))
a687059c 11572 s++;
e929a76b 11573 if (*s == ',') {
c94115d8 11574 GV* gv;
5458a98a 11575 if (keyword(w, s - w, 0))
e929a76b 11576 return;
c94115d8
NC
11577
11578 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11579 if (gv && GvCVu(gv))
abbb3198 11580 return;
cea2e8a9 11581 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11582 }
11583 }
11584}
11585
423cee85
JH
11586/* Either returns sv, or mortalizes sv and returns a new SV*.
11587 Best used as sv=new_constant(..., sv, ...).
11588 If s, pv are NULL, calls subroutine with one argument,
11589 and type is used with error messages only. */
11590
b3ac6de7 11591STATIC SV *
eb0d8d16
NC
11592S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11593 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11594{
27da23d5 11595 dVAR; dSP;
890ce7af 11596 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11597 SV *res;
b3ac6de7
IZ
11598 SV **cvp;
11599 SV *cv, *typesv;
89e33a05 11600 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11601
7918f24d
NC
11602 PERL_ARGS_ASSERT_NEW_CONSTANT;
11603
f0af216f 11604 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11605 SV *msg;
11606
10edeb5d
JH
11607 why2 = (const char *)
11608 (strEQ(key,"charnames")
11609 ? "(possibly a missing \"use charnames ...\")"
11610 : "");
4e553d73 11611 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11612 (type ? type: "undef"), why2);
11613
11614 /* This is convoluted and evil ("goto considered harmful")
11615 * but I do not understand the intricacies of all the different
11616 * failure modes of %^H in here. The goal here is to make
11617 * the most probable error message user-friendly. --jhi */
11618
11619 goto msgdone;
11620
423cee85 11621 report:
4e553d73 11622 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11623 (type ? type: "undef"), why1, why2, why3);
41ab332f 11624 msgdone:
95a20fc0 11625 yyerror(SvPVX_const(msg));
423cee85
JH
11626 SvREFCNT_dec(msg);
11627 return sv;
11628 }
ff3f963a
KW
11629
11630 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11631 if (PL_error_count > 0 && strEQ(key,"charnames"))
11632 return &PL_sv_undef;
ff3f963a 11633
eb0d8d16 11634 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11635 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11636 why1 = "$^H{";
11637 why2 = key;
f0af216f 11638 why3 = "} is not defined";
423cee85 11639 goto report;
b3ac6de7
IZ
11640 }
11641 sv_2mortal(sv); /* Parent created it permanently */
11642 cv = *cvp;
423cee85 11643 if (!pv && s)
59cd0e26 11644 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11645 if (type && pv)
59cd0e26 11646 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11647 else
423cee85 11648 typesv = &PL_sv_undef;
4e553d73 11649
e788e7d3 11650 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11651 ENTER ;
11652 SAVETMPS;
4e553d73 11653
423cee85 11654 PUSHMARK(SP) ;
a5845cb7 11655 EXTEND(sp, 3);
423cee85
JH
11656 if (pv)
11657 PUSHs(pv);
b3ac6de7 11658 PUSHs(sv);
423cee85
JH
11659 if (pv)
11660 PUSHs(typesv);
b3ac6de7 11661 PUTBACK;
423cee85 11662 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11663
423cee85 11664 SPAGAIN ;
4e553d73 11665
423cee85 11666 /* Check the eval first */
9b0e499b 11667 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11668 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11669 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11670 (void)POPs;
b37c2d43 11671 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11672 }
11673 else {
11674 res = POPs;
b37c2d43 11675 SvREFCNT_inc_simple_void(res);
423cee85 11676 }
4e553d73 11677
423cee85
JH
11678 PUTBACK ;
11679 FREETMPS ;
11680 LEAVE ;
b3ac6de7 11681 POPSTACK;
4e553d73 11682
b3ac6de7 11683 if (!SvOK(res)) {
423cee85
JH
11684 why1 = "Call to &{$^H{";
11685 why2 = key;
f0af216f 11686 why3 = "}} did not return a defined value";
423cee85
JH
11687 sv = res;
11688 goto report;
9b0e499b 11689 }
423cee85 11690
9b0e499b 11691 return res;
b3ac6de7 11692}
4e553d73 11693
d0a148a6
NC
11694/* Returns a NUL terminated string, with the length of the string written to
11695 *slp
11696 */
76e3520e 11697STATIC char *
cea2e8a9 11698S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11699{
97aff369 11700 dVAR;
463ee0b2 11701 register char *d = dest;
890ce7af 11702 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11703
11704 PERL_ARGS_ASSERT_SCAN_WORD;
11705
463ee0b2 11706 for (;;) {
8903cb82 11707 if (d >= e)
cea2e8a9 11708 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11709 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11710 *d++ = *s++;
c35e046a 11711 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11712 *d++ = ':';
11713 *d++ = ':';
11714 s++;
11715 }
c35e046a 11716 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11717 *d++ = *s++;
11718 *d++ = *s++;
11719 }
fd400ab9 11720 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11721 char *t = s + UTF8SKIP(s);
c35e046a 11722 size_t len;
fd400ab9 11723 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11724 t += UTF8SKIP(t);
c35e046a
AL
11725 len = t - s;
11726 if (d + len > e)
cea2e8a9 11727 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11728 Copy(s, d, len, char);
11729 d += len;
a0ed51b3
LW
11730 s = t;
11731 }
463ee0b2
LW
11732 else {
11733 *d = '\0';
11734 *slp = d - dest;
11735 return s;
e929a76b 11736 }
378cc40b
LW
11737 }
11738}
11739
76e3520e 11740STATIC char *
f54cb97a 11741S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11742{
97aff369 11743 dVAR;
6136c704 11744 char *bracket = NULL;
748a9306 11745 char funny = *s++;
6136c704 11746 register char *d = dest;
0b3da58d 11747 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11748
7918f24d
NC
11749 PERL_ARGS_ASSERT_SCAN_IDENT;
11750
a0d0e21e 11751 if (isSPACE(*s))
29595ff2 11752 s = PEEKSPACE(s);
de3bb511 11753 if (isDIGIT(*s)) {
8903cb82 11754 while (isDIGIT(*s)) {
11755 if (d >= e)
cea2e8a9 11756 Perl_croak(aTHX_ ident_too_long);
378cc40b 11757 *d++ = *s++;
8903cb82 11758 }
378cc40b
LW
11759 }
11760 else {
463ee0b2 11761 for (;;) {
8903cb82 11762 if (d >= e)
cea2e8a9 11763 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11764 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11765 *d++ = *s++;
7e2040f0 11766 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11767 *d++ = ':';
11768 *d++ = ':';
11769 s++;
11770 }
a0d0e21e 11771 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11772 *d++ = *s++;
11773 *d++ = *s++;
11774 }
fd400ab9 11775 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11776 char *t = s + UTF8SKIP(s);
fd400ab9 11777 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11778 t += UTF8SKIP(t);
11779 if (d + (t - s) > e)
cea2e8a9 11780 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11781 Copy(s, d, t - s, char);
11782 d += t - s;
11783 s = t;
11784 }
463ee0b2
LW
11785 else
11786 break;
11787 }
378cc40b
LW
11788 }
11789 *d = '\0';
11790 d = dest;
79072805 11791 if (*d) {
3280af22
NIS
11792 if (PL_lex_state != LEX_NORMAL)
11793 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11794 return s;
378cc40b 11795 }
748a9306 11796 if (*s == '$' && s[1] &&
3792a11b 11797 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11798 {
4810e5ec 11799 return s;
5cd24f17 11800 }
79072805
LW
11801 if (*s == '{') {
11802 bracket = s;
11803 s++;
11804 }
11805 else if (ck_uni)
11806 check_uni();
93a17b20 11807 if (s < send)
79072805
LW
11808 *d = *s++;
11809 d[1] = '\0';
2b92dfce 11810 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11811 *d = toCTRL(*s);
11812 s++;
de3bb511 11813 }
79072805 11814 if (bracket) {
748a9306 11815 if (isSPACE(s[-1])) {
fa83b5b6 11816 while (s < send) {
f54cb97a 11817 const char ch = *s++;
bf4acbe4 11818 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11819 *d = ch;
11820 break;
11821 }
11822 }
748a9306 11823 }
7e2040f0 11824 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11825 d++;
a0ed51b3 11826 if (UTF) {
6136c704
AL
11827 char *end = s;
11828 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11829 end += UTF8SKIP(end);
11830 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11831 end += UTF8SKIP(end);
a0ed51b3 11832 }
6136c704
AL
11833 Copy(s, d, end - s, char);
11834 d += end - s;
11835 s = end;
a0ed51b3
LW
11836 }
11837 else {
2b92dfce 11838 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11839 *d++ = *s++;
2b92dfce 11840 if (d >= e)
cea2e8a9 11841 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11842 }
79072805 11843 *d = '\0';
c35e046a
AL
11844 while (s < send && SPACE_OR_TAB(*s))
11845 s++;
ff68c719 11846 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11847 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11848 const char * const brack =
11849 (const char *)
11850 ((*s == '[') ? "[...]" : "{...}");
9014280d 11851 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11852 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11853 funny, dest, brack, funny, dest, brack);
11854 }
79072805 11855 bracket++;
a0be28da 11856 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11857 return s;
11858 }
4e553d73
NIS
11859 }
11860 /* Handle extended ${^Foo} variables
2b92dfce
GS
11861 * 1999-02-27 mjd-perl-patch@plover.com */
11862 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11863 && isALNUM(*s))
11864 {
11865 d++;
11866 while (isALNUM(*s) && d < e) {
11867 *d++ = *s++;
11868 }
11869 if (d >= e)
cea2e8a9 11870 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11871 *d = '\0';
79072805
LW
11872 }
11873 if (*s == '}') {
11874 s++;
7df0d042 11875 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11876 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11877 PL_expect = XREF;
11878 }
d008e5eb 11879 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11880 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11881 (keyword(dest, d - dest, 0)
11882 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11883 {
c35e046a
AL
11884 if (funny == '#')
11885 funny = '@';
9014280d 11886 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11887 "Ambiguous use of %c{%s} resolved to %c%s",
11888 funny, dest, funny, dest);
11889 }
11890 }
79072805
LW
11891 }
11892 else {
11893 s = bracket; /* let the parser handle it */
93a17b20 11894 *dest = '\0';
79072805
LW
11895 }
11896 }
3280af22
NIS
11897 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11898 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11899 return s;
11900}
11901
879d0c72
NC
11902static U32
11903S_pmflag(U32 pmfl, const char ch) {
11904 switch (ch) {
11905 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11906 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11907 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11908 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11909 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11910 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11911 }
11912 return pmfl;
11913}
11914
76e3520e 11915STATIC char *
cea2e8a9 11916S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11917{
97aff369 11918 dVAR;
79072805 11919 PMOP *pm;
5db06880 11920 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11921 const char * const valid_flags =
a20207d7 11922 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11923#ifdef PERL_MAD
11924 char *modstart;
11925#endif
11926
7918f24d 11927 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11928
25c09cbf 11929 if (!s) {
6136c704 11930 const char * const delimiter = skipspace(start);
10edeb5d
JH
11931 Perl_croak(aTHX_
11932 (const char *)
11933 (*delimiter == '?'
11934 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11935 : "Search pattern not terminated" ));
25c09cbf 11936 }
bbce6d69 11937
8782bef2 11938 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11939 if (PL_multi_open == '?') {
11940 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11941 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11942
11943 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11944 allows us to restrict the list needed by reset to just the ??
11945 matches. */
11946 assert(type != OP_TRANS);
11947 if (PL_curstash) {
daba3364 11948 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11949 U32 elements;
11950 if (!mg) {
daba3364 11951 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11952 0);
11953 }
11954 elements = mg->mg_len / sizeof(PMOP**);
11955 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11956 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11957 mg->mg_len = elements * sizeof(PMOP**);
11958 PmopSTASH_set(pm,PL_curstash);
11959 }
11960 }
5db06880
NC
11961#ifdef PERL_MAD
11962 modstart = s;
11963#endif
6136c704 11964 while (*s && strchr(valid_flags, *s))
879d0c72 11965 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11966
11967 if (isALNUM(*s)) {
11968 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11969 "Having no space between pattern and following word is deprecated");
11970
11971 }
5db06880
NC
11972#ifdef PERL_MAD
11973 if (PL_madskills && modstart != s) {
11974 SV* tmptoken = newSVpvn(modstart, s - modstart);
11975 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11976 }
11977#endif
4ac733c9 11978 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11979 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11980 {
a2a5de95
NC
11981 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11982 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11983 }
11984
3280af22 11985 PL_lex_op = (OP*)pm;
6154021b 11986 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11987 return s;
11988}
11989
76e3520e 11990STATIC char *
cea2e8a9 11991S_scan_subst(pTHX_ char *start)
79072805 11992{
27da23d5 11993 dVAR;
a0d0e21e 11994 register char *s;
79072805 11995 register PMOP *pm;
4fdae800 11996 I32 first_start;
79072805 11997 I32 es = 0;
5db06880
NC
11998#ifdef PERL_MAD
11999 char *modstart;
12000#endif
79072805 12001
7918f24d
NC
12002 PERL_ARGS_ASSERT_SCAN_SUBST;
12003
6154021b 12004 pl_yylval.ival = OP_NULL;
79072805 12005
5db06880 12006 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12007
37fd879b 12008 if (!s)
cea2e8a9 12009 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 12010
3280af22 12011 if (s[-1] == PL_multi_open)
79072805 12012 s--;
5db06880
NC
12013#ifdef PERL_MAD
12014 if (PL_madskills) {
cd81e915
NC
12015 CURMAD('q', PL_thisopen);
12016 CURMAD('_', PL_thiswhite);
12017 CURMAD('E', PL_thisstuff);
12018 CURMAD('Q', PL_thisclose);
12019 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12020 }
12021#endif
79072805 12022
3280af22 12023 first_start = PL_multi_start;
5db06880 12024 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12025 if (!s) {
37fd879b 12026 if (PL_lex_stuff) {
3280af22 12027 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12028 PL_lex_stuff = NULL;
37fd879b 12029 }
cea2e8a9 12030 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 12031 }
3280af22 12032 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 12033
79072805 12034 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
12035
12036#ifdef PERL_MAD
12037 if (PL_madskills) {
cd81e915
NC
12038 CURMAD('z', PL_thisopen);
12039 CURMAD('R', PL_thisstuff);
12040 CURMAD('Z', PL_thisclose);
5db06880
NC
12041 }
12042 modstart = s;
12043#endif
12044
48c036b1 12045 while (*s) {
a20207d7 12046 if (*s == EXEC_PAT_MOD) {
a687059c 12047 s++;
2f3197b3 12048 es++;
a687059c 12049 }
a20207d7 12050 else if (strchr(S_PAT_MODS, *s))
879d0c72 12051 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
12052 else {
12053 if (isALNUM(*s)) {
12054 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12055 "Having no space between pattern and following word is deprecated");
12056
12057 }
48c036b1 12058 break;
aa78b661 12059 }
378cc40b 12060 }
79072805 12061
5db06880
NC
12062#ifdef PERL_MAD
12063 if (PL_madskills) {
12064 if (modstart != s)
12065 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12066 append_madprops(PL_thismad, (OP*)pm, 0);
12067 PL_thismad = 0;
5db06880
NC
12068 }
12069#endif
a2a5de95
NC
12070 if ((pm->op_pmflags & PMf_CONTINUE)) {
12071 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12072 }
12073
79072805 12074 if (es) {
6136c704
AL
12075 SV * const repl = newSVpvs("");
12076
0244c3a4
GS
12077 PL_sublex_info.super_bufptr = s;
12078 PL_sublex_info.super_bufend = PL_bufend;
12079 PL_multi_end = 0;
79072805 12080 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12081 while (es-- > 0) {
12082 if (es)
12083 sv_catpvs(repl, "eval ");
12084 else
12085 sv_catpvs(repl, "do ");
12086 }
6f43d98f 12087 sv_catpvs(repl, "{");
3280af22 12088 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12089 if (strchr(SvPVX(PL_lex_repl), '#'))
12090 sv_catpvs(repl, "\n");
12091 sv_catpvs(repl, "}");
25da4f38 12092 SvEVALED_on(repl);
3280af22
NIS
12093 SvREFCNT_dec(PL_lex_repl);
12094 PL_lex_repl = repl;
378cc40b 12095 }
79072805 12096
3280af22 12097 PL_lex_op = (OP*)pm;
6154021b 12098 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12099 return s;
12100}
12101
76e3520e 12102STATIC char *
cea2e8a9 12103S_scan_trans(pTHX_ char *start)
378cc40b 12104{
97aff369 12105 dVAR;
a0d0e21e 12106 register char* s;
11343788 12107 OP *o;
79072805 12108 short *tbl;
b84c11c8
NC
12109 U8 squash;
12110 U8 del;
12111 U8 complement;
bb16bae8 12112 bool nondestruct = 0;
5db06880
NC
12113#ifdef PERL_MAD
12114 char *modstart;
12115#endif
79072805 12116
7918f24d
NC
12117 PERL_ARGS_ASSERT_SCAN_TRANS;
12118
6154021b 12119 pl_yylval.ival = OP_NULL;
79072805 12120
5db06880 12121 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12122 if (!s)
cea2e8a9 12123 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12124
3280af22 12125 if (s[-1] == PL_multi_open)
2f3197b3 12126 s--;
5db06880
NC
12127#ifdef PERL_MAD
12128 if (PL_madskills) {
cd81e915
NC
12129 CURMAD('q', PL_thisopen);
12130 CURMAD('_', PL_thiswhite);
12131 CURMAD('E', PL_thisstuff);
12132 CURMAD('Q', PL_thisclose);
12133 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12134 }
12135#endif
2f3197b3 12136
5db06880 12137 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12138 if (!s) {
37fd879b 12139 if (PL_lex_stuff) {
3280af22 12140 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12141 PL_lex_stuff = NULL;
37fd879b 12142 }
cea2e8a9 12143 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12144 }
5db06880 12145 if (PL_madskills) {
cd81e915
NC
12146 CURMAD('z', PL_thisopen);
12147 CURMAD('R', PL_thisstuff);
12148 CURMAD('Z', PL_thisclose);
5db06880 12149 }
79072805 12150
a0ed51b3 12151 complement = del = squash = 0;
5db06880
NC
12152#ifdef PERL_MAD
12153 modstart = s;
12154#endif
7a1e2023
NC
12155 while (1) {
12156 switch (*s) {
12157 case 'c':
79072805 12158 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12159 break;
12160 case 'd':
a0ed51b3 12161 del = OPpTRANS_DELETE;
7a1e2023
NC
12162 break;
12163 case 's':
79072805 12164 squash = OPpTRANS_SQUASH;
7a1e2023 12165 break;
bb16bae8
FC
12166 case 'r':
12167 nondestruct = 1;
12168 break;
7a1e2023
NC
12169 default:
12170 goto no_more;
12171 }
395c3793
LW
12172 s++;
12173 }
7a1e2023 12174 no_more:
8973db79 12175
aa1f7c5b 12176 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
bb16bae8 12177 o = newPVOP(nondestruct ? OP_TRANSR : OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12178 o->op_private &= ~OPpTRANS_ALL;
12179 o->op_private |= del|squash|complement|
7948272d
NIS
12180 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12181 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12182
3280af22 12183 PL_lex_op = o;
bb16bae8 12184 pl_yylval.ival = nondestruct ? OP_TRANSR : OP_TRANS;
5db06880
NC
12185
12186#ifdef PERL_MAD
12187 if (PL_madskills) {
12188 if (modstart != s)
12189 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12190 append_madprops(PL_thismad, o, 0);
12191 PL_thismad = 0;
5db06880
NC
12192 }
12193#endif
12194
79072805
LW
12195 return s;
12196}
12197
76e3520e 12198STATIC char *
cea2e8a9 12199S_scan_heredoc(pTHX_ register char *s)
79072805 12200{
97aff369 12201 dVAR;
79072805
LW
12202 SV *herewas;
12203 I32 op_type = OP_SCALAR;
12204 I32 len;
12205 SV *tmpstr;
12206 char term;
73d840c0 12207 const char *found_newline;
79072805 12208 register char *d;
fc36a67e 12209 register char *e;
4633a7c4 12210 char *peek;
f54cb97a 12211 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12212#ifdef PERL_MAD
12213 I32 stuffstart = s - SvPVX(PL_linestr);
12214 char *tstart;
12215
cd81e915 12216 PL_realtokenstart = -1;
5db06880 12217#endif
79072805 12218
7918f24d
NC
12219 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12220
79072805 12221 s += 2;
3280af22
NIS
12222 d = PL_tokenbuf;
12223 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12224 if (!outer)
79072805 12225 *d++ = '\n';
c35e046a
AL
12226 peek = s;
12227 while (SPACE_OR_TAB(*peek))
12228 peek++;
3792a11b 12229 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12230 s = peek;
79072805 12231 term = *s++;
3280af22 12232 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12233 d += len;
3280af22 12234 if (s < PL_bufend)
79072805 12235 s++;
79072805
LW
12236 }
12237 else {
12238 if (*s == '\\')
12239 s++, term = '\'';
12240 else
12241 term = '"';
7e2040f0 12242 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12243 deprecate("bare << to mean <<\"\"");
7e2040f0 12244 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12245 if (d < e)
12246 *d++ = *s;
12247 }
12248 }
3280af22 12249 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12250 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12251 *d++ = '\n';
12252 *d = '\0';
3280af22 12253 len = d - PL_tokenbuf;
5db06880
NC
12254
12255#ifdef PERL_MAD
12256 if (PL_madskills) {
12257 tstart = PL_tokenbuf + !outer;
cd81e915 12258 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12259 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12260 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12261 stuffstart = s - SvPVX(PL_linestr);
12262 }
12263#endif
6a27c188 12264#ifndef PERL_STRICT_CR
f63a84b2
LW
12265 d = strchr(s, '\r');
12266 if (d) {
b464bac0 12267 char * const olds = s;
f63a84b2 12268 s = d;
3280af22 12269 while (s < PL_bufend) {
f63a84b2
LW
12270 if (*s == '\r') {
12271 *d++ = '\n';
12272 if (*++s == '\n')
12273 s++;
12274 }
12275 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12276 *d++ = *s++;
12277 s++;
12278 }
12279 else
12280 *d++ = *s++;
12281 }
12282 *d = '\0';
3280af22 12283 PL_bufend = d;
95a20fc0 12284 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12285 s = olds;
12286 }
12287#endif
5db06880
NC
12288#ifdef PERL_MAD
12289 found_newline = 0;
12290#endif
10edeb5d 12291 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12292 herewas = newSVpvn(s,PL_bufend-s);
12293 }
12294 else {
5db06880
NC
12295#ifdef PERL_MAD
12296 herewas = newSVpvn(s-1,found_newline-s+1);
12297#else
73d840c0
AL
12298 s--;
12299 herewas = newSVpvn(s,found_newline-s);
5db06880 12300#endif
73d840c0 12301 }
5db06880
NC
12302#ifdef PERL_MAD
12303 if (PL_madskills) {
12304 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12305 if (PL_thisstuff)
12306 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12307 else
cd81e915 12308 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12309 }
12310#endif
79072805 12311 s += SvCUR(herewas);
748a9306 12312
5db06880
NC
12313#ifdef PERL_MAD
12314 stuffstart = s - SvPVX(PL_linestr);
12315
12316 if (found_newline)
12317 s--;
12318#endif
12319
7d0a29fe
NC
12320 tmpstr = newSV_type(SVt_PVIV);
12321 SvGROW(tmpstr, 80);
748a9306 12322 if (term == '\'') {
79072805 12323 op_type = OP_CONST;
45977657 12324 SvIV_set(tmpstr, -1);
748a9306
LW
12325 }
12326 else if (term == '`') {
79072805 12327 op_type = OP_BACKTICK;
45977657 12328 SvIV_set(tmpstr, '\\');
748a9306 12329 }
79072805
LW
12330
12331 CLINE;
57843af0 12332 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12333 PL_multi_open = PL_multi_close = '<';
12334 term = *PL_tokenbuf;
0244c3a4 12335 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12336 char * const bufptr = PL_sublex_info.super_bufptr;
12337 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12338 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12339 s = strchr(bufptr, '\n');
12340 if (!s)
12341 s = bufend;
12342 d = s;
12343 while (s < bufend &&
12344 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12345 if (*s++ == '\n')
57843af0 12346 CopLINE_inc(PL_curcop);
0244c3a4
GS
12347 }
12348 if (s >= bufend) {
eb160463 12349 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12350 missingterm(PL_tokenbuf);
12351 }
12352 sv_setpvn(herewas,bufptr,d-bufptr+1);
12353 sv_setpvn(tmpstr,d+1,s-d);
12354 s += len - 1;
12355 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12356 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12357
12358 s = olds;
12359 goto retval;
12360 }
12361 else if (!outer) {
79072805 12362 d = s;
3280af22
NIS
12363 while (s < PL_bufend &&
12364 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12365 if (*s++ == '\n')
57843af0 12366 CopLINE_inc(PL_curcop);
79072805 12367 }
3280af22 12368 if (s >= PL_bufend) {
eb160463 12369 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12370 missingterm(PL_tokenbuf);
79072805
LW
12371 }
12372 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12373#ifdef PERL_MAD
12374 if (PL_madskills) {
cd81e915
NC
12375 if (PL_thisstuff)
12376 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12377 else
cd81e915 12378 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12379 stuffstart = s - SvPVX(PL_linestr);
12380 }
12381#endif
79072805 12382 s += len - 1;
57843af0 12383 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12384
3280af22
NIS
12385 sv_catpvn(herewas,s,PL_bufend-s);
12386 sv_setsv(PL_linestr,herewas);
12387 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12388 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12389 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12390 }
12391 else
76f68e9b 12392 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12393 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12394#ifdef PERL_MAD
12395 if (PL_madskills) {
12396 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12397 if (PL_thisstuff)
12398 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12399 else
cd81e915 12400 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12401 }
12402#endif
f0e67a1d 12403 PL_bufptr = s;
17cc9359 12404 CopLINE_inc(PL_curcop);
f0e67a1d 12405 if (!outer || !lex_next_chunk(0)) {
eb160463 12406 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12407 missingterm(PL_tokenbuf);
79072805 12408 }
17cc9359 12409 CopLINE_dec(PL_curcop);
f0e67a1d 12410 s = PL_bufptr;
5db06880
NC
12411#ifdef PERL_MAD
12412 stuffstart = s - SvPVX(PL_linestr);
12413#endif
57843af0 12414 CopLINE_inc(PL_curcop);
3280af22 12415 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12416 PL_last_lop = PL_last_uni = NULL;
6a27c188 12417#ifndef PERL_STRICT_CR
3280af22 12418 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12419 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12420 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12421 {
3280af22
NIS
12422 PL_bufend[-2] = '\n';
12423 PL_bufend--;
95a20fc0 12424 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12425 }
3280af22
NIS
12426 else if (PL_bufend[-1] == '\r')
12427 PL_bufend[-1] = '\n';
f63a84b2 12428 }
3280af22
NIS
12429 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12430 PL_bufend[-1] = '\n';
f63a84b2 12431#endif
3280af22 12432 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12433 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12434 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12435 sv_catsv(PL_linestr,herewas);
12436 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12437 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12438 }
12439 else {
3280af22
NIS
12440 s = PL_bufend;
12441 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12442 }
12443 }
79072805 12444 s++;
0244c3a4 12445retval:
57843af0 12446 PL_multi_end = CopLINE(PL_curcop);
79072805 12447 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12448 SvPV_shrink_to_cur(tmpstr);
79072805 12449 }
8990e307 12450 SvREFCNT_dec(herewas);
2f31ce75 12451 if (!IN_BYTES) {
95a20fc0 12452 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12453 SvUTF8_on(tmpstr);
12454 else if (PL_encoding)
12455 sv_recode_to_utf8(tmpstr, PL_encoding);
12456 }
3280af22 12457 PL_lex_stuff = tmpstr;
6154021b 12458 pl_yylval.ival = op_type;
79072805
LW
12459 return s;
12460}
12461
02aa26ce
NT
12462/* scan_inputsymbol
12463 takes: current position in input buffer
12464 returns: new position in input buffer
6154021b 12465 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12466
12467 This code handles:
12468
12469 <> read from ARGV
12470 <FH> read from filehandle
12471 <pkg::FH> read from package qualified filehandle
12472 <pkg'FH> read from package qualified filehandle
12473 <$fh> read from filehandle in $fh
12474 <*.h> filename glob
12475
12476*/
12477
76e3520e 12478STATIC char *
cea2e8a9 12479S_scan_inputsymbol(pTHX_ char *start)
79072805 12480{
97aff369 12481 dVAR;
02aa26ce 12482 register char *s = start; /* current position in buffer */
1b420867 12483 char *end;
79072805 12484 I32 len;
6136c704
AL
12485 char *d = PL_tokenbuf; /* start of temp holding space */
12486 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12487
7918f24d
NC
12488 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12489
1b420867
GS
12490 end = strchr(s, '\n');
12491 if (!end)
12492 end = PL_bufend;
12493 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12494
12495 /* die if we didn't have space for the contents of the <>,
1b420867 12496 or if it didn't end, or if we see a newline
02aa26ce
NT
12497 */
12498
bb7a0f54 12499 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12500 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12501 if (s >= end)
cea2e8a9 12502 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12503
fc36a67e 12504 s++;
02aa26ce
NT
12505
12506 /* check for <$fh>
12507 Remember, only scalar variables are interpreted as filehandles by
12508 this code. Anything more complex (e.g., <$fh{$num}>) will be
12509 treated as a glob() call.
12510 This code makes use of the fact that except for the $ at the front,
12511 a scalar variable and a filehandle look the same.
12512 */
4633a7c4 12513 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12514
12515 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12516 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12517 d++;
02aa26ce
NT
12518
12519 /* If we've tried to read what we allow filehandles to look like, and
12520 there's still text left, then it must be a glob() and not a getline.
12521 Use scan_str to pull out the stuff between the <> and treat it
12522 as nothing more than a string.
12523 */
12524
3280af22 12525 if (d - PL_tokenbuf != len) {
6154021b 12526 pl_yylval.ival = OP_GLOB;
5db06880 12527 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12528 if (!s)
cea2e8a9 12529 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12530 return s;
12531 }
395c3793 12532 else {
9b3023bc 12533 bool readline_overriden = FALSE;
6136c704 12534 GV *gv_readline;
9b3023bc 12535 GV **gvp;
02aa26ce 12536 /* we're in a filehandle read situation */
3280af22 12537 d = PL_tokenbuf;
02aa26ce
NT
12538
12539 /* turn <> into <ARGV> */
79072805 12540 if (!len)
689badd5 12541 Copy("ARGV",d,5,char);
02aa26ce 12542
9b3023bc 12543 /* Check whether readline() is overriden */
fafc274c 12544 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12545 if ((gv_readline
ba979b31 12546 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12547 ||
017a3ce5 12548 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12549 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12550 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12551 readline_overriden = TRUE;
12552
02aa26ce
NT
12553 /* if <$fh>, create the ops to turn the variable into a
12554 filehandle
12555 */
79072805 12556 if (*d == '$') {
02aa26ce
NT
12557 /* try to find it in the pad for this block, otherwise find
12558 add symbol table ops
12559 */
f8f98e0a 12560 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12561 if (tmp != NOT_IN_PAD) {
00b1698f 12562 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12563 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12564 HEK * const stashname = HvNAME_HEK(stash);
12565 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12566 sv_catpvs(sym, "::");
f558d5af
JH
12567 sv_catpv(sym, d+1);
12568 d = SvPVX(sym);
12569 goto intro_sym;
12570 }
12571 else {
6136c704 12572 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12573 o->op_targ = tmp;
9b3023bc
RGS
12574 PL_lex_op = readline_overriden
12575 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12576 op_append_elem(OP_LIST, o,
9b3023bc
RGS
12577 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12578 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12579 }
a0d0e21e
LW
12580 }
12581 else {
f558d5af
JH
12582 GV *gv;
12583 ++d;
12584intro_sym:
12585 gv = gv_fetchpv(d,
12586 (PL_in_eval
12587 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12588 : GV_ADDMULTI),
f558d5af 12589 SVt_PV);
9b3023bc
RGS
12590 PL_lex_op = readline_overriden
12591 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12592 op_append_elem(OP_LIST,
9b3023bc
RGS
12593 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12594 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12595 : (OP*)newUNOP(OP_READLINE, 0,
12596 newUNOP(OP_RV2SV, 0,
12597 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12598 }
7c6fadd6
RGS
12599 if (!readline_overriden)
12600 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12601 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12602 pl_yylval.ival = OP_NULL;
79072805 12603 }
02aa26ce
NT
12604
12605 /* If it's none of the above, it must be a literal filehandle
12606 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12607 else {
6136c704 12608 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12609 PL_lex_op = readline_overriden
12610 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
2fcb4757 12611 op_append_elem(OP_LIST,
9b3023bc
RGS
12612 newGVOP(OP_GV, 0, gv),
12613 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12614 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12615 pl_yylval.ival = OP_NULL;
79072805
LW
12616 }
12617 }
02aa26ce 12618
79072805
LW
12619 return s;
12620}
12621
02aa26ce
NT
12622
12623/* scan_str
12624 takes: start position in buffer
09bef843
SB
12625 keep_quoted preserve \ on the embedded delimiter(s)
12626 keep_delims preserve the delimiters around the string
02aa26ce
NT
12627 returns: position to continue reading from buffer
12628 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12629 updates the read buffer.
12630
12631 This subroutine pulls a string out of the input. It is called for:
12632 q single quotes q(literal text)
12633 ' single quotes 'literal text'
12634 qq double quotes qq(interpolate $here please)
12635 " double quotes "interpolate $here please"
12636 qx backticks qx(/bin/ls -l)
12637 ` backticks `/bin/ls -l`
12638 qw quote words @EXPORT_OK = qw( func() $spam )
12639 m// regexp match m/this/
12640 s/// regexp substitute s/this/that/
12641 tr/// string transliterate tr/this/that/
12642 y/// string transliterate y/this/that/
12643 ($*@) sub prototypes sub foo ($)
09bef843 12644 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12645 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12646
12647 In most of these cases (all but <>, patterns and transliterate)
12648 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12649 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12650 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12651 calls scan_str().
4e553d73 12652
02aa26ce
NT
12653 It skips whitespace before the string starts, and treats the first
12654 character as the delimiter. If the delimiter is one of ([{< then
12655 the corresponding "close" character )]}> is used as the closing
12656 delimiter. It allows quoting of delimiters, and if the string has
12657 balanced delimiters ([{<>}]) it allows nesting.
12658
37fd879b
HS
12659 On success, the SV with the resulting string is put into lex_stuff or,
12660 if that is already non-NULL, into lex_repl. The second case occurs only
12661 when parsing the RHS of the special constructs s/// and tr/// (y///).
12662 For convenience, the terminating delimiter character is stuffed into
12663 SvIVX of the SV.
02aa26ce
NT
12664*/
12665
76e3520e 12666STATIC char *
09bef843 12667S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12668{
97aff369 12669 dVAR;
02aa26ce 12670 SV *sv; /* scalar value: string */
d3fcec1f 12671 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12672 register char *s = start; /* current position in the buffer */
12673 register char term; /* terminating character */
12674 register char *to; /* current position in the sv's data */
12675 I32 brackets = 1; /* bracket nesting level */
89491803 12676 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12677 I32 termcode; /* terminating char. code */
89ebb4a3 12678 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12679 STRLEN termlen; /* length of terminating string */
0331ef07 12680 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12681#ifdef PERL_MAD
12682 int stuffstart;
12683 char *tstart;
12684#endif
02aa26ce 12685
7918f24d
NC
12686 PERL_ARGS_ASSERT_SCAN_STR;
12687
02aa26ce 12688 /* skip space before the delimiter */
29595ff2
NC
12689 if (isSPACE(*s)) {
12690 s = PEEKSPACE(s);
12691 }
02aa26ce 12692
5db06880 12693#ifdef PERL_MAD
cd81e915
NC
12694 if (PL_realtokenstart >= 0) {
12695 stuffstart = PL_realtokenstart;
12696 PL_realtokenstart = -1;
5db06880
NC
12697 }
12698 else
12699 stuffstart = start - SvPVX(PL_linestr);
12700#endif
02aa26ce 12701 /* mark where we are, in case we need to report errors */
79072805 12702 CLINE;
02aa26ce
NT
12703
12704 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12705 term = *s;
220e2d4e
IH
12706 if (!UTF) {
12707 termcode = termstr[0] = term;
12708 termlen = 1;
12709 }
12710 else {
f3b9ce0f 12711 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12712 Copy(s, termstr, termlen, U8);
12713 if (!UTF8_IS_INVARIANT(term))
12714 has_utf8 = TRUE;
12715 }
b1c7b182 12716
02aa26ce 12717 /* mark where we are */
57843af0 12718 PL_multi_start = CopLINE(PL_curcop);
3280af22 12719 PL_multi_open = term;
02aa26ce
NT
12720
12721 /* find corresponding closing delimiter */
93a17b20 12722 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12723 termcode = termstr[0] = term = tmps[5];
12724
3280af22 12725 PL_multi_close = term;
79072805 12726
561b68a9
SH
12727 /* create a new SV to hold the contents. 79 is the SV's initial length.
12728 What a random number. */
7d0a29fe
NC
12729 sv = newSV_type(SVt_PVIV);
12730 SvGROW(sv, 80);
45977657 12731 SvIV_set(sv, termcode);
a0d0e21e 12732 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12733
12734 /* move past delimiter and try to read a complete string */
09bef843 12735 if (keep_delims)
220e2d4e
IH
12736 sv_catpvn(sv, s, termlen);
12737 s += termlen;
5db06880
NC
12738#ifdef PERL_MAD
12739 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12740 if (!PL_thisopen && !keep_delims) {
12741 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12742 stuffstart = s - SvPVX(PL_linestr);
12743 }
12744#endif
93a17b20 12745 for (;;) {
220e2d4e
IH
12746 if (PL_encoding && !UTF) {
12747 bool cont = TRUE;
12748
12749 while (cont) {
95a20fc0 12750 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12751 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12752 &offset, (char*)termstr, termlen);
6136c704
AL
12753 const char * const ns = SvPVX_const(PL_linestr) + offset;
12754 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12755
12756 for (; s < ns; s++) {
12757 if (*s == '\n' && !PL_rsfp)
12758 CopLINE_inc(PL_curcop);
12759 }
12760 if (!found)
12761 goto read_more_line;
12762 else {
12763 /* handle quoted delimiters */
52327caf 12764 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12765 const char *t;
95a20fc0 12766 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12767 t--;
12768 if ((svlast-1 - t) % 2) {
12769 if (!keep_quoted) {
12770 *(svlast-1) = term;
12771 *svlast = '\0';
12772 SvCUR_set(sv, SvCUR(sv) - 1);
12773 }
12774 continue;
12775 }
12776 }
12777 if (PL_multi_open == PL_multi_close) {
12778 cont = FALSE;
12779 }
12780 else {
f54cb97a
AL
12781 const char *t;
12782 char *w;
0331ef07 12783 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12784 /* At here, all closes are "was quoted" one,
12785 so we don't check PL_multi_close. */
12786 if (*t == '\\') {
12787 if (!keep_quoted && *(t+1) == PL_multi_open)
12788 t++;
12789 else
12790 *w++ = *t++;
12791 }
12792 else if (*t == PL_multi_open)
12793 brackets++;
12794
12795 *w = *t;
12796 }
12797 if (w < t) {
12798 *w++ = term;
12799 *w = '\0';
95a20fc0 12800 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12801 }
0331ef07 12802 last_off = w - SvPVX(sv);
220e2d4e
IH
12803 if (--brackets <= 0)
12804 cont = FALSE;
12805 }
12806 }
12807 }
12808 if (!keep_delims) {
12809 SvCUR_set(sv, SvCUR(sv) - 1);
12810 *SvEND(sv) = '\0';
12811 }
12812 break;
12813 }
12814
02aa26ce 12815 /* extend sv if need be */
3280af22 12816 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12817 /* set 'to' to the next character in the sv's string */
463ee0b2 12818 to = SvPVX(sv)+SvCUR(sv);
09bef843 12819
02aa26ce 12820 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12821 if (PL_multi_open == PL_multi_close) {
12822 for (; s < PL_bufend; s++,to++) {
02aa26ce 12823 /* embedded newlines increment the current line number */
3280af22 12824 if (*s == '\n' && !PL_rsfp)
57843af0 12825 CopLINE_inc(PL_curcop);
02aa26ce 12826 /* handle quoted delimiters */
3280af22 12827 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12828 if (!keep_quoted && s[1] == term)
a0d0e21e 12829 s++;
02aa26ce 12830 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12831 else
12832 *to++ = *s++;
12833 }
02aa26ce
NT
12834 /* terminate when run out of buffer (the for() condition), or
12835 have found the terminator */
220e2d4e
IH
12836 else if (*s == term) {
12837 if (termlen == 1)
12838 break;
f3b9ce0f 12839 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12840 break;
12841 }
63cd0674 12842 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12843 has_utf8 = TRUE;
93a17b20
LW
12844 *to = *s;
12845 }
12846 }
02aa26ce
NT
12847
12848 /* if the terminator isn't the same as the start character (e.g.,
12849 matched brackets), we have to allow more in the quoting, and
12850 be prepared for nested brackets.
12851 */
93a17b20 12852 else {
02aa26ce 12853 /* read until we run out of string, or we find the terminator */
3280af22 12854 for (; s < PL_bufend; s++,to++) {
02aa26ce 12855 /* embedded newlines increment the line count */
3280af22 12856 if (*s == '\n' && !PL_rsfp)
57843af0 12857 CopLINE_inc(PL_curcop);
02aa26ce 12858 /* backslashes can escape the open or closing characters */
3280af22 12859 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12860 if (!keep_quoted &&
12861 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12862 s++;
12863 else
12864 *to++ = *s++;
12865 }
02aa26ce 12866 /* allow nested opens and closes */
3280af22 12867 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12868 break;
3280af22 12869 else if (*s == PL_multi_open)
93a17b20 12870 brackets++;
63cd0674 12871 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12872 has_utf8 = TRUE;
93a17b20
LW
12873 *to = *s;
12874 }
12875 }
02aa26ce 12876 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12877 *to = '\0';
95a20fc0 12878 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12879
02aa26ce
NT
12880 /*
12881 * this next chunk reads more into the buffer if we're not done yet
12882 */
12883
b1c7b182
GS
12884 if (s < PL_bufend)
12885 break; /* handle case where we are done yet :-) */
79072805 12886
6a27c188 12887#ifndef PERL_STRICT_CR
95a20fc0 12888 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12889 if ((to[-2] == '\r' && to[-1] == '\n') ||
12890 (to[-2] == '\n' && to[-1] == '\r'))
12891 {
f63a84b2
LW
12892 to[-2] = '\n';
12893 to--;
95a20fc0 12894 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12895 }
12896 else if (to[-1] == '\r')
12897 to[-1] = '\n';
12898 }
95a20fc0 12899 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12900 to[-1] = '\n';
12901#endif
12902
220e2d4e 12903 read_more_line:
02aa26ce
NT
12904 /* if we're out of file, or a read fails, bail and reset the current
12905 line marker so we can report where the unterminated string began
12906 */
5db06880
NC
12907#ifdef PERL_MAD
12908 if (PL_madskills) {
c35e046a 12909 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12910 if (PL_thisstuff)
12911 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12912 else
cd81e915 12913 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12914 }
12915#endif
f0e67a1d
Z
12916 CopLINE_inc(PL_curcop);
12917 PL_bufptr = PL_bufend;
12918 if (!lex_next_chunk(0)) {
c07a80fd 12919 sv_free(sv);
eb160463 12920 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12921 return NULL;
79072805 12922 }
f0e67a1d 12923 s = PL_bufptr;
5db06880
NC
12924#ifdef PERL_MAD
12925 stuffstart = 0;
12926#endif
378cc40b 12927 }
4e553d73 12928
02aa26ce
NT
12929 /* at this point, we have successfully read the delimited string */
12930
220e2d4e 12931 if (!PL_encoding || UTF) {
5db06880
NC
12932#ifdef PERL_MAD
12933 if (PL_madskills) {
c35e046a 12934 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12935 const int len = s - tstart;
cd81e915 12936 if (PL_thisstuff)
c35e046a 12937 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12938 else
c35e046a 12939 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12940 if (!PL_thisclose && !keep_delims)
12941 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12942 }
12943#endif
12944
220e2d4e
IH
12945 if (keep_delims)
12946 sv_catpvn(sv, s, termlen);
12947 s += termlen;
12948 }
5db06880
NC
12949#ifdef PERL_MAD
12950 else {
12951 if (PL_madskills) {
c35e046a
AL
12952 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12953 const int len = s - tstart - termlen;
cd81e915 12954 if (PL_thisstuff)
c35e046a 12955 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12956 else
c35e046a 12957 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12958 if (!PL_thisclose && !keep_delims)
12959 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12960 }
12961 }
12962#endif
220e2d4e 12963 if (has_utf8 || PL_encoding)
b1c7b182 12964 SvUTF8_on(sv);
d0063567 12965
57843af0 12966 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12967
12968 /* if we allocated too much space, give some back */
93a17b20
LW
12969 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12970 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12971 SvPV_renew(sv, SvLEN(sv));
79072805 12972 }
02aa26ce
NT
12973
12974 /* decide whether this is the first or second quoted string we've read
12975 for this op
12976 */
4e553d73 12977
3280af22
NIS
12978 if (PL_lex_stuff)
12979 PL_lex_repl = sv;
79072805 12980 else
3280af22 12981 PL_lex_stuff = sv;
378cc40b
LW
12982 return s;
12983}
12984
02aa26ce
NT
12985/*
12986 scan_num
12987 takes: pointer to position in buffer
12988 returns: pointer to new position in buffer
6154021b 12989 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12990
12991 Read a number in any of the formats that Perl accepts:
12992
7fd134d9
JH
12993 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12994 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12995 0b[01](_?[01])*
12996 0[0-7](_?[0-7])*
12997 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12998
3280af22 12999 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
13000 thing it reads.
13001
13002 If it reads a number without a decimal point or an exponent, it will
13003 try converting the number to an integer and see if it can do so
13004 without loss of precision.
13005*/
4e553d73 13006
378cc40b 13007char *
bfed75c6 13008Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 13009{
97aff369 13010 dVAR;
bfed75c6 13011 register const char *s = start; /* current position in buffer */
02aa26ce
NT
13012 register char *d; /* destination in temp buffer */
13013 register char *e; /* end of temp buffer */
86554af2 13014 NV nv; /* number read, as a double */
a0714e2c 13015 SV *sv = NULL; /* place to put the converted number */
a86a20aa 13016 bool floatit; /* boolean: int or float? */
cbbf8932 13017 const char *lastub = NULL; /* position of last underbar */
bfed75c6 13018 static char const number_too_long[] = "Number too long";
378cc40b 13019
7918f24d
NC
13020 PERL_ARGS_ASSERT_SCAN_NUM;
13021
02aa26ce
NT
13022 /* We use the first character to decide what type of number this is */
13023
378cc40b 13024 switch (*s) {
79072805 13025 default:
cea2e8a9 13026 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 13027
02aa26ce 13028 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 13029 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
13030 case '0':
13031 {
02aa26ce
NT
13032 /* variables:
13033 u holds the "number so far"
4f19785b
WSI
13034 shift the power of 2 of the base
13035 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
13036 overflowed was the number more than we can hold?
13037
13038 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
13039 we in octal/hex/binary?" indicator to disallow hex characters
13040 when in octal mode.
02aa26ce 13041 */
9e24b6e2
JH
13042 NV n = 0.0;
13043 UV u = 0;
79072805 13044 I32 shift;
9e24b6e2 13045 bool overflowed = FALSE;
61f33854 13046 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
13047 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
13048 static const char* const bases[5] =
13049 { "", "binary", "", "octal", "hexadecimal" };
13050 static const char* const Bases[5] =
13051 { "", "Binary", "", "Octal", "Hexadecimal" };
13052 static const char* const maxima[5] =
13053 { "",
13054 "0b11111111111111111111111111111111",
13055 "",
13056 "037777777777",
13057 "0xffffffff" };
bfed75c6 13058 const char *base, *Base, *max;
378cc40b 13059
02aa26ce 13060 /* check for hex */
a674e8db 13061 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
13062 shift = 4;
13063 s += 2;
61f33854 13064 just_zero = FALSE;
a674e8db 13065 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13066 shift = 1;
13067 s += 2;
61f33854 13068 just_zero = FALSE;
378cc40b 13069 }
02aa26ce 13070 /* check for a decimal in disguise */
b78218b7 13071 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13072 goto decimal;
02aa26ce 13073 /* so it must be octal */
928753ea 13074 else {
378cc40b 13075 shift = 3;
928753ea
JH
13076 s++;
13077 }
13078
13079 if (*s == '_') {
a2a5de95 13080 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13081 "Misplaced _ in number");
13082 lastub = s++;
13083 }
9e24b6e2
JH
13084
13085 base = bases[shift];
13086 Base = Bases[shift];
13087 max = maxima[shift];
02aa26ce 13088
4f19785b 13089 /* read the rest of the number */
378cc40b 13090 for (;;) {
9e24b6e2 13091 /* x is used in the overflow test,
893fe2c2 13092 b is the digit we're adding on. */
9e24b6e2 13093 UV x, b;
55497cff 13094
378cc40b 13095 switch (*s) {
02aa26ce
NT
13096
13097 /* if we don't mention it, we're done */
378cc40b
LW
13098 default:
13099 goto out;
02aa26ce 13100
928753ea 13101 /* _ are ignored -- but warned about if consecutive */
de3bb511 13102 case '_':
a2a5de95
NC
13103 if (lastub && s == lastub + 1)
13104 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13105 "Misplaced _ in number");
928753ea 13106 lastub = s++;
de3bb511 13107 break;
02aa26ce
NT
13108
13109 /* 8 and 9 are not octal */
378cc40b 13110 case '8': case '9':
4f19785b 13111 if (shift == 3)
cea2e8a9 13112 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13113 /* FALL THROUGH */
02aa26ce
NT
13114
13115 /* octal digits */
4f19785b 13116 case '2': case '3': case '4':
378cc40b 13117 case '5': case '6': case '7':
4f19785b 13118 if (shift == 1)
cea2e8a9 13119 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13120 /* FALL THROUGH */
13121
13122 case '0': case '1':
02aa26ce 13123 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13124 goto digit;
02aa26ce
NT
13125
13126 /* hex digits */
378cc40b
LW
13127 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13128 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13129 /* make sure they said 0x */
378cc40b
LW
13130 if (shift != 4)
13131 goto out;
55497cff 13132 b = (*s++ & 7) + 9;
02aa26ce
NT
13133
13134 /* Prepare to put the digit we have onto the end
13135 of the number so far. We check for overflows.
13136 */
13137
55497cff 13138 digit:
61f33854 13139 just_zero = FALSE;
9e24b6e2
JH
13140 if (!overflowed) {
13141 x = u << shift; /* make room for the digit */
13142
13143 if ((x >> shift) != u
13144 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13145 overflowed = TRUE;
13146 n = (NV) u;
9b387841
NC
13147 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13148 "Integer overflow in %s number",
13149 base);
9e24b6e2
JH
13150 } else
13151 u = x | b; /* add the digit to the end */
13152 }
13153 if (overflowed) {
13154 n *= nvshift[shift];
13155 /* If an NV has not enough bits in its
13156 * mantissa to represent an UV this summing of
13157 * small low-order numbers is a waste of time
13158 * (because the NV cannot preserve the
13159 * low-order bits anyway): we could just
13160 * remember when did we overflow and in the
13161 * end just multiply n by the right
13162 * amount. */
13163 n += (NV) b;
55497cff 13164 }
378cc40b
LW
13165 break;
13166 }
13167 }
02aa26ce
NT
13168
13169 /* if we get here, we had success: make a scalar value from
13170 the number.
13171 */
378cc40b 13172 out:
928753ea
JH
13173
13174 /* final misplaced underbar check */
13175 if (s[-1] == '_') {
a2a5de95 13176 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13177 }
13178
9e24b6e2 13179 if (overflowed) {
a2a5de95
NC
13180 if (n > 4294967295.0)
13181 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13182 "%s number > %s non-portable",
13183 Base, max);
b081dd7e 13184 sv = newSVnv(n);
9e24b6e2
JH
13185 }
13186 else {
15041a67 13187#if UVSIZE > 4
a2a5de95
NC
13188 if (u > 0xffffffff)
13189 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13190 "%s number > %s non-portable",
13191 Base, max);
2cc4c2dc 13192#endif
b081dd7e 13193 sv = newSVuv(u);
9e24b6e2 13194 }
61f33854 13195 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13196 sv = new_constant(start, s - start, "integer",
eb0d8d16 13197 sv, NULL, NULL, 0);
61f33854 13198 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13199 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13200 }
13201 break;
02aa26ce
NT
13202
13203 /*
13204 handle decimal numbers.
13205 we're also sent here when we read a 0 as the first digit
13206 */
378cc40b
LW
13207 case '1': case '2': case '3': case '4': case '5':
13208 case '6': case '7': case '8': case '9': case '.':
13209 decimal:
3280af22
NIS
13210 d = PL_tokenbuf;
13211 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13212 floatit = FALSE;
02aa26ce
NT
13213
13214 /* read next group of digits and _ and copy into d */
de3bb511 13215 while (isDIGIT(*s) || *s == '_') {
4e553d73 13216 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13217 if -w is on
13218 */
93a17b20 13219 if (*s == '_') {
a2a5de95
NC
13220 if (lastub && s == lastub + 1)
13221 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13222 "Misplaced _ in number");
928753ea 13223 lastub = s++;
93a17b20 13224 }
fc36a67e 13225 else {
02aa26ce 13226 /* check for end of fixed-length buffer */
fc36a67e 13227 if (d >= e)
cea2e8a9 13228 Perl_croak(aTHX_ number_too_long);
02aa26ce 13229 /* if we're ok, copy the character */
378cc40b 13230 *d++ = *s++;
fc36a67e 13231 }
378cc40b 13232 }
02aa26ce
NT
13233
13234 /* final misplaced underbar check */
928753ea 13235 if (lastub && s == lastub + 1) {
a2a5de95 13236 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13237 }
02aa26ce
NT
13238
13239 /* read a decimal portion if there is one. avoid
13240 3..5 being interpreted as the number 3. followed
13241 by .5
13242 */
2f3197b3 13243 if (*s == '.' && s[1] != '.') {
79072805 13244 floatit = TRUE;
378cc40b 13245 *d++ = *s++;
02aa26ce 13246
928753ea 13247 if (*s == '_') {
a2a5de95
NC
13248 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13249 "Misplaced _ in number");
928753ea
JH
13250 lastub = s;
13251 }
13252
13253 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13254 */
fc36a67e 13255 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13256 /* fixed length buffer check */
fc36a67e 13257 if (d >= e)
cea2e8a9 13258 Perl_croak(aTHX_ number_too_long);
928753ea 13259 if (*s == '_') {
a2a5de95
NC
13260 if (lastub && s == lastub + 1)
13261 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13262 "Misplaced _ in number");
928753ea
JH
13263 lastub = s;
13264 }
13265 else
fc36a67e 13266 *d++ = *s;
378cc40b 13267 }
928753ea
JH
13268 /* fractional part ending in underbar? */
13269 if (s[-1] == '_') {
a2a5de95
NC
13270 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13271 "Misplaced _ in number");
928753ea 13272 }
dd629d5b
GS
13273 if (*s == '.' && isDIGIT(s[1])) {
13274 /* oops, it's really a v-string, but without the "v" */
f4758303 13275 s = start;
dd629d5b
GS
13276 goto vstring;
13277 }
378cc40b 13278 }
02aa26ce
NT
13279
13280 /* read exponent part, if present */
3792a11b 13281 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13282 floatit = TRUE;
13283 s++;
02aa26ce
NT
13284
13285 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13286 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13287
7fd134d9
JH
13288 /* stray preinitial _ */
13289 if (*s == '_') {
a2a5de95
NC
13290 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13291 "Misplaced _ in number");
7fd134d9
JH
13292 lastub = s++;
13293 }
13294
02aa26ce 13295 /* allow positive or negative exponent */
378cc40b
LW
13296 if (*s == '+' || *s == '-')
13297 *d++ = *s++;
02aa26ce 13298
7fd134d9
JH
13299 /* stray initial _ */
13300 if (*s == '_') {
a2a5de95
NC
13301 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13302 "Misplaced _ in number");
7fd134d9
JH
13303 lastub = s++;
13304 }
13305
7fd134d9
JH
13306 /* read digits of exponent */
13307 while (isDIGIT(*s) || *s == '_') {
13308 if (isDIGIT(*s)) {
13309 if (d >= e)
13310 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13311 *d++ = *s++;
7fd134d9
JH
13312 }
13313 else {
041457d9 13314 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13315 (!isDIGIT(s[1]) && s[1] != '_')))
13316 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13317 "Misplaced _ in number");
b3b48e3e 13318 lastub = s++;
7fd134d9 13319 }
7fd134d9 13320 }
378cc40b 13321 }
02aa26ce 13322
02aa26ce 13323
0b7fceb9 13324 /*
58bb9ec3
NC
13325 We try to do an integer conversion first if no characters
13326 indicating "float" have been found.
0b7fceb9
MU
13327 */
13328
13329 if (!floatit) {
58bb9ec3 13330 UV uv;
6136c704 13331 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13332
13333 if (flags == IS_NUMBER_IN_UV) {
13334 if (uv <= IV_MAX)
b081dd7e 13335 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13336 else
b081dd7e 13337 sv = newSVuv(uv);
58bb9ec3
NC
13338 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13339 if (uv <= (UV) IV_MIN)
b081dd7e 13340 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13341 else
13342 floatit = TRUE;
13343 } else
13344 floatit = TRUE;
13345 }
0b7fceb9 13346 if (floatit) {
58bb9ec3
NC
13347 /* terminate the string */
13348 *d = '\0';
86554af2 13349 nv = Atof(PL_tokenbuf);
b081dd7e 13350 sv = newSVnv(nv);
86554af2 13351 }
86554af2 13352
eb0d8d16
NC
13353 if ( floatit
13354 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13355 const char *const key = floatit ? "float" : "integer";
13356 const STRLEN keylen = floatit ? 5 : 7;
13357 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13358 key, keylen, sv, NULL, NULL, 0);
13359 }
378cc40b 13360 break;
0b7fceb9 13361
e312add1 13362 /* if it starts with a v, it could be a v-string */
a7cb1f99 13363 case 'v':
dd629d5b 13364vstring:
561b68a9 13365 sv = newSV(5); /* preallocate storage space */
65b06e02 13366 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13367 break;
79072805 13368 }
a687059c 13369
02aa26ce
NT
13370 /* make the op for the constant and return */
13371
a86a20aa 13372 if (sv)
b73d6f50 13373 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13374 else
5f66b61c 13375 lvalp->opval = NULL;
a687059c 13376
73d840c0 13377 return (char *)s;
378cc40b
LW
13378}
13379
76e3520e 13380STATIC char *
cea2e8a9 13381S_scan_formline(pTHX_ register char *s)
378cc40b 13382{
97aff369 13383 dVAR;
79072805 13384 register char *eol;
378cc40b 13385 register char *t;
6136c704 13386 SV * const stuff = newSVpvs("");
79072805 13387 bool needargs = FALSE;
c5ee2135 13388 bool eofmt = FALSE;
5db06880
NC
13389#ifdef PERL_MAD
13390 char *tokenstart = s;
4f61fd4b
JC
13391 SV* savewhite = NULL;
13392
5db06880 13393 if (PL_madskills) {
cd81e915
NC
13394 savewhite = PL_thiswhite;
13395 PL_thiswhite = 0;
5db06880
NC
13396 }
13397#endif
378cc40b 13398
7918f24d
NC
13399 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13400
79072805 13401 while (!needargs) {
a1b95068 13402 if (*s == '.') {
c35e046a 13403 t = s+1;
51882d45 13404#ifdef PERL_STRICT_CR
c35e046a
AL
13405 while (SPACE_OR_TAB(*t))
13406 t++;
51882d45 13407#else
c35e046a
AL
13408 while (SPACE_OR_TAB(*t) || *t == '\r')
13409 t++;
51882d45 13410#endif
c5ee2135
WL
13411 if (*t == '\n' || t == PL_bufend) {
13412 eofmt = TRUE;
79072805 13413 break;
c5ee2135 13414 }
79072805 13415 }
3280af22 13416 if (PL_in_eval && !PL_rsfp) {
07409e01 13417 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13418 if (!eol++)
3280af22 13419 eol = PL_bufend;
0f85fab0
LW
13420 }
13421 else
3280af22 13422 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13423 if (*s != '#') {
a0d0e21e
LW
13424 for (t = s; t < eol; t++) {
13425 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13426 needargs = FALSE;
13427 goto enough; /* ~~ must be first line in formline */
378cc40b 13428 }
a0d0e21e
LW
13429 if (*t == '@' || *t == '^')
13430 needargs = TRUE;
378cc40b 13431 }
7121b347
MG
13432 if (eol > s) {
13433 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13434#ifndef PERL_STRICT_CR
7121b347
MG
13435 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13436 char *end = SvPVX(stuff) + SvCUR(stuff);
13437 end[-2] = '\n';
13438 end[-1] = '\0';
b162af07 13439 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13440 }
2dc4c65b 13441#endif
7121b347
MG
13442 }
13443 else
13444 break;
79072805 13445 }
95a20fc0 13446 s = (char*)eol;
3280af22 13447 if (PL_rsfp) {
f0e67a1d 13448 bool got_some;
5db06880
NC
13449#ifdef PERL_MAD
13450 if (PL_madskills) {
cd81e915
NC
13451 if (PL_thistoken)
13452 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13453 else
cd81e915 13454 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13455 }
13456#endif
f0e67a1d
Z
13457 PL_bufptr = PL_bufend;
13458 CopLINE_inc(PL_curcop);
13459 got_some = lex_next_chunk(0);
13460 CopLINE_dec(PL_curcop);
13461 s = PL_bufptr;
5db06880 13462#ifdef PERL_MAD
f0e67a1d 13463 tokenstart = PL_bufptr;
5db06880 13464#endif
f0e67a1d 13465 if (!got_some)
378cc40b 13466 break;
378cc40b 13467 }
463ee0b2 13468 incline(s);
79072805 13469 }
a0d0e21e
LW
13470 enough:
13471 if (SvCUR(stuff)) {
3280af22 13472 PL_expect = XTERM;
79072805 13473 if (needargs) {
3280af22 13474 PL_lex_state = LEX_NORMAL;
cd81e915 13475 start_force(PL_curforce);
9ded7720 13476 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13477 force_next(',');
13478 }
a0d0e21e 13479 else
3280af22 13480 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13481 if (!IN_BYTES) {
95a20fc0 13482 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13483 SvUTF8_on(stuff);
13484 else if (PL_encoding)
13485 sv_recode_to_utf8(stuff, PL_encoding);
13486 }
cd81e915 13487 start_force(PL_curforce);
9ded7720 13488 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13489 force_next(THING);
cd81e915 13490 start_force(PL_curforce);
9ded7720 13491 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13492 force_next(LSTOP);
378cc40b 13493 }
79072805 13494 else {
8990e307 13495 SvREFCNT_dec(stuff);
c5ee2135
WL
13496 if (eofmt)
13497 PL_lex_formbrack = 0;
3280af22 13498 PL_bufptr = s;
79072805 13499 }
5db06880
NC
13500#ifdef PERL_MAD
13501 if (PL_madskills) {
cd81e915
NC
13502 if (PL_thistoken)
13503 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13504 else
cd81e915
NC
13505 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13506 PL_thiswhite = savewhite;
5db06880
NC
13507 }
13508#endif
79072805 13509 return s;
378cc40b 13510}
a687059c 13511
ba6d6ac9 13512I32
864dbfa3 13513Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13514{
97aff369 13515 dVAR;
a3b680e6 13516 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13517 CV* const outsidecv = PL_compcv;
8990e307 13518
3280af22
NIS
13519 if (PL_compcv) {
13520 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13521 }
7766f137 13522 SAVEI32(PL_subline);
3280af22 13523 save_item(PL_subname);
3280af22 13524 SAVESPTR(PL_compcv);
3280af22 13525
ea726b52 13526 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13527 CvFLAGS(PL_compcv) |= flags;
13528
57843af0 13529 PL_subline = CopLINE(PL_curcop);
dd2155a4 13530 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13531 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13532 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13533
8990e307
LW
13534 return oldsavestack_ix;
13535}
13536
084592ab
CN
13537#ifdef __SC__
13538#pragma segment Perl_yylex
13539#endif
af41e527
NC
13540static int
13541S_yywarn(pTHX_ const char *const s)
8990e307 13542{
97aff369 13543 dVAR;
7918f24d
NC
13544
13545 PERL_ARGS_ASSERT_YYWARN;
13546
faef0170 13547 PL_in_eval |= EVAL_WARNONLY;
748a9306 13548 yyerror(s);
faef0170 13549 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13550 return 0;
8990e307
LW
13551}
13552
13553int
15f169a1 13554Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13555{
97aff369 13556 dVAR;
bfed75c6
AL
13557 const char *where = NULL;
13558 const char *context = NULL;
68dc0745 13559 int contlen = -1;
46fc3d4c 13560 SV *msg;
5912531f 13561 int yychar = PL_parser->yychar;
463ee0b2 13562
7918f24d
NC
13563 PERL_ARGS_ASSERT_YYERROR;
13564
3280af22 13565 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13566 where = "at EOF";
8bcfe651
TM
13567 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13568 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13569 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13570 /*
13571 Only for NetWare:
13572 The code below is removed for NetWare because it abends/crashes on NetWare
13573 when the script has error such as not having the closing quotes like:
13574 if ($var eq "value)
13575 Checking of white spaces is anyway done in NetWare code.
13576 */
13577#ifndef NETWARE
3280af22
NIS
13578 while (isSPACE(*PL_oldoldbufptr))
13579 PL_oldoldbufptr++;
f355267c 13580#endif
3280af22
NIS
13581 context = PL_oldoldbufptr;
13582 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13583 }
8bcfe651
TM
13584 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13585 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13586 /*
13587 Only for NetWare:
13588 The code below is removed for NetWare because it abends/crashes on NetWare
13589 when the script has error such as not having the closing quotes like:
13590 if ($var eq "value)
13591 Checking of white spaces is anyway done in NetWare code.
13592 */
13593#ifndef NETWARE
3280af22
NIS
13594 while (isSPACE(*PL_oldbufptr))
13595 PL_oldbufptr++;
f355267c 13596#endif
3280af22
NIS
13597 context = PL_oldbufptr;
13598 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13599 }
13600 else if (yychar > 255)
68dc0745 13601 where = "next token ???";
12fbd33b 13602 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13603 if (PL_lex_state == LEX_NORMAL ||
13604 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13605 where = "at end of line";
3280af22 13606 else if (PL_lex_inpat)
68dc0745 13607 where = "within pattern";
463ee0b2 13608 else
68dc0745 13609 where = "within string";
463ee0b2 13610 }
46fc3d4c 13611 else {
84bafc02 13612 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13613 if (yychar < 32)
cea2e8a9 13614 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13615 else if (isPRINT_LC(yychar)) {
88c9ea1e 13616 const char string = yychar;
5e7aa789
NC
13617 sv_catpvn(where_sv, &string, 1);
13618 }
463ee0b2 13619 else
cea2e8a9 13620 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13621 where = SvPVX_const(where_sv);
463ee0b2 13622 }
46fc3d4c 13623 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13624 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13625 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13626 if (context)
cea2e8a9 13627 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13628 else
cea2e8a9 13629 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13630 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13631 Perl_sv_catpvf(aTHX_ msg,
57def98f 13632 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13633 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13634 PL_multi_end = 0;
a0d0e21e 13635 }
500960a6 13636 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13637 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13638 }
463ee0b2 13639 else
5a844595 13640 qerror(msg);
c7d6bfb2
GS
13641 if (PL_error_count >= 10) {
13642 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13643 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13644 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13645 else
13646 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13647 OutCopFILE(PL_curcop));
c7d6bfb2 13648 }
3280af22 13649 PL_in_my = 0;
5c284bb0 13650 PL_in_my_stash = NULL;
463ee0b2
LW
13651 return 0;
13652}
084592ab
CN
13653#ifdef __SC__
13654#pragma segment Main
13655#endif
4e35701f 13656
b250498f 13657STATIC char*
3ae08724 13658S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13659{
97aff369 13660 dVAR;
f54cb97a 13661 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13662
13663 PERL_ARGS_ASSERT_SWALLOW_BOM;
13664
7aa207d6 13665 switch (s[0]) {
4e553d73
NIS
13666 case 0xFF:
13667 if (s[1] == 0xFE) {
ee6ba15d 13668 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13669 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13670 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13671#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13672 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13673 s += 2;
dea0fc0b 13674 if (PL_bufend > (char*)s) {
81a923f4 13675 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13676 }
b250498f 13677#else
ee6ba15d 13678 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13679#endif
01ec43d0
GS
13680 }
13681 break;
78ae23f5 13682 case 0xFE:
7aa207d6 13683 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13684#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13685 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13686 s += 2;
13687 if (PL_bufend > (char *)s) {
81a923f4 13688 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13689 }
b250498f 13690#else
ee6ba15d 13691 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13692#endif
01ec43d0
GS
13693 }
13694 break;
3ae08724
GS
13695 case 0xEF:
13696 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13697 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13698 s += 3; /* UTF-8 */
13699 }
13700 break;
13701 case 0:
7aa207d6
JH
13702 if (slen > 3) {
13703 if (s[1] == 0) {
13704 if (s[2] == 0xFE && s[3] == 0xFF) {
13705 /* UTF-32 big-endian */
ee6ba15d 13706 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13707 }
13708 }
13709 else if (s[2] == 0 && s[3] != 0) {
13710 /* Leading bytes
13711 * 00 xx 00 xx
13712 * are a good indicator of UTF-16BE. */
ee6ba15d 13713#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13714 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13715 s = add_utf16_textfilter(s, FALSE);
13716#else
13717 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13718#endif
7aa207d6 13719 }
01ec43d0 13720 }
e294cc5d
JH
13721#ifdef EBCDIC
13722 case 0xDD:
13723 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13724 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13725 s += 4; /* UTF-8 */
13726 }
13727 break;
13728#endif
13729
7aa207d6
JH
13730 default:
13731 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13732 /* Leading bytes
13733 * xx 00 xx 00
13734 * are a good indicator of UTF-16LE. */
ee6ba15d 13735#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13736 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13737 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13738#else
13739 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13740#endif
7aa207d6 13741 }
01ec43d0 13742 }
b8f84bb2 13743 return (char*)s;
b250498f 13744}
4755096e 13745
6e3aabd6
GS
13746
13747#ifndef PERL_NO_UTF16_FILTER
13748static I32
a28af015 13749S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13750{
97aff369 13751 dVAR;
f3040f2c 13752 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13753 /* We re-use this each time round, throwing the contents away before we
13754 return. */
2a773401 13755 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13756 SV *const utf8_buffer = filter;
c28d6105 13757 IV status = IoPAGE(filter);
f2338a2e 13758 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13759 I32 retval;
c8b0cbae 13760
c85ae797
NC
13761 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13762
c8b0cbae
NC
13763 /* As we're automatically added, at the lowest level, and hence only called
13764 from this file, we can be sure that we're not called in block mode. Hence
13765 don't bother writing code to deal with block mode. */
13766 if (maxlen) {
13767 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13768 }
c28d6105
NC
13769 if (status < 0) {
13770 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13771 }
1de9afcd 13772 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13773 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13774 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13775 reverse ? 'l' : 'b', idx, maxlen, status,
13776 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13777
13778 while (1) {
13779 STRLEN chars;
13780 STRLEN have;
dea0fc0b 13781 I32 newlen;
2a773401 13782 U8 *end;
c28d6105
NC
13783 /* First, look in our buffer of existing UTF-8 data: */
13784 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13785
13786 if (nl) {
13787 ++nl;
13788 } else if (status == 0) {
13789 /* EOF */
13790 IoPAGE(filter) = 0;
13791 nl = SvEND(utf8_buffer);
13792 }
13793 if (nl) {
d2d1d4de
NC
13794 STRLEN got = nl - SvPVX(utf8_buffer);
13795 /* Did we have anything to append? */
13796 retval = got != 0;
13797 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13798 /* Everything else in this code works just fine if SVp_POK isn't
13799 set. This, however, needs it, and we need it to work, else
13800 we loop infinitely because the buffer is never consumed. */
13801 sv_chop(utf8_buffer, nl);
13802 break;
13803 }
ba77e4cc 13804
c28d6105
NC
13805 /* OK, not a complete line there, so need to read some more UTF-16.
13806 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13807 while (1) {
13808 if (status <= 0)
13809 break;
13810 if (SvCUR(utf16_buffer) >= 2) {
13811 /* Location of the high octet of the last complete code point.
13812 Gosh, UTF-16 is a pain. All the benefits of variable length,
13813 *coupled* with all the benefits of partial reads and
13814 endianness. */
13815 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13816 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13817
13818 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13819 break;
13820 }
13821
13822 /* We have the first half of a surrogate. Read more. */
13823 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13824 }
c28d6105 13825
c28d6105
NC
13826 status = FILTER_READ(idx + 1, utf16_buffer,
13827 160 + (SvCUR(utf16_buffer) & 1));
13828 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13829 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13830 if (status < 0) {
13831 /* Error */
13832 IoPAGE(filter) = status;
13833 return status;
13834 }
13835 }
13836
13837 chars = SvCUR(utf16_buffer) >> 1;
13838 have = SvCUR(utf8_buffer);
13839 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13840
aa6dbd60 13841 if (reverse) {
c28d6105
NC
13842 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13843 (U8*)SvPVX_const(utf8_buffer) + have,
13844 chars * 2, &newlen);
aa6dbd60 13845 } else {
2a773401 13846 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13847 (U8*)SvPVX_const(utf8_buffer) + have,
13848 chars * 2, &newlen);
2a773401 13849 }
c28d6105 13850 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13851 *end = '\0';
c28d6105 13852
e07286ed
NC
13853 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13854 it's private to us, and utf16_to_utf8{,reversed} take a
13855 (pointer,length) pair, rather than a NUL-terminated string. */
13856 if(SvCUR(utf16_buffer) & 1) {
13857 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13858 SvCUR_set(utf16_buffer, 1);
13859 } else {
13860 SvCUR_set(utf16_buffer, 0);
13861 }
2a773401 13862 }
c28d6105
NC
13863 DEBUG_P(PerlIO_printf(Perl_debug_log,
13864 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13865 status,
13866 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13867 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13868 return retval;
6e3aabd6 13869}
81a923f4
NC
13870
13871static U8 *
13872S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13873{
2a773401 13874 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13875
c85ae797
NC
13876 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13877
c28d6105 13878 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13879 sv_setpvs(filter, "");
2a773401 13880 IoLINES(filter) = reversed;
c28d6105
NC
13881 IoPAGE(filter) = 1; /* Not EOF */
13882
13883 /* Sadly, we have to return a valid pointer, come what may, so we have to
13884 ignore any error return from this. */
13885 SvCUR_set(PL_linestr, 0);
13886 if (FILTER_READ(0, PL_linestr, 0)) {
13887 SvUTF8_on(PL_linestr);
81a923f4 13888 } else {
c28d6105 13889 SvUTF8_on(PL_linestr);
81a923f4 13890 }
c28d6105 13891 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13892 return (U8*)SvPVX(PL_linestr);
13893}
6e3aabd6 13894#endif
9f4817db 13895
f333445c
JP
13896/*
13897Returns a pointer to the next character after the parsed
13898vstring, as well as updating the passed in sv.
13899
13900Function must be called like
13901
561b68a9 13902 sv = newSV(5);
65b06e02 13903 s = scan_vstring(s,e,sv);
f333445c 13904
65b06e02 13905where s and e are the start and end of the string.
f333445c
JP
13906The sv should already be large enough to store the vstring
13907passed in, for performance reasons.
13908
13909*/
13910
13911char *
15f169a1 13912Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13913{
97aff369 13914 dVAR;
bfed75c6
AL
13915 const char *pos = s;
13916 const char *start = s;
7918f24d
NC
13917
13918 PERL_ARGS_ASSERT_SCAN_VSTRING;
13919
f333445c 13920 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13921 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13922 pos++;
f333445c
JP
13923 if ( *pos != '.') {
13924 /* this may not be a v-string if followed by => */
bfed75c6 13925 const char *next = pos;
65b06e02 13926 while (next < e && isSPACE(*next))
8fc7bb1c 13927 ++next;
65b06e02 13928 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13929 /* return string not v-string */
13930 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13931 return (char *)pos;
f333445c
JP
13932 }
13933 }
13934
13935 if (!isALPHA(*pos)) {
89ebb4a3 13936 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13937
d4c19fe8
AL
13938 if (*s == 'v')
13939 s++; /* get past 'v' */
f333445c 13940
76f68e9b 13941 sv_setpvs(sv, "");
f333445c
JP
13942
13943 for (;;) {
d4c19fe8 13944 /* this is atoi() that tolerates underscores */
0bd48802
AL
13945 U8 *tmpend;
13946 UV rev = 0;
d4c19fe8
AL
13947 const char *end = pos;
13948 UV mult = 1;
13949 while (--end >= s) {
13950 if (*end != '_') {
13951 const UV orev = rev;
f333445c
JP
13952 rev += (*end - '0') * mult;
13953 mult *= 10;
9b387841
NC
13954 if (orev > rev)
13955 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13956 "Integer overflow in decimal number");
f333445c
JP
13957 }
13958 }
13959#ifdef EBCDIC
13960 if (rev > 0x7FFFFFFF)
13961 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13962#endif
13963 /* Append native character for the rev point */
13964 tmpend = uvchr_to_utf8(tmpbuf, rev);
13965 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13966 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13967 SvUTF8_on(sv);
65b06e02 13968 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13969 s = ++pos;
13970 else {
13971 s = pos;
13972 break;
13973 }
65b06e02 13974 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13975 pos++;
13976 }
13977 SvPOK_on(sv);
13978 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13979 SvRMAGICAL_on(sv);
13980 }
73d840c0 13981 return (char *)s;
f333445c
JP
13982}
13983
88e1f1a2
JV
13984int
13985Perl_keyword_plugin_standard(pTHX_
13986 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13987{
13988 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13989 PERL_UNUSED_CONTEXT;
13990 PERL_UNUSED_ARG(keyword_ptr);
13991 PERL_UNUSED_ARG(keyword_len);
13992 PERL_UNUSED_ARG(op_ptr);
13993 return KEYWORD_PLUGIN_DECLINE;
13994}
13995
a7aaec61 13996#define parse_recdescent(g) S_parse_recdescent(aTHX_ g)
e53d8f76
Z
13997static void
13998S_parse_recdescent(pTHX_ int gramtype)
a7aaec61
Z
13999{
14000 SAVEI32(PL_lex_brackets);
14001 if (PL_lex_brackets > 100)
14002 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
14003 PL_lex_brackstack[PL_lex_brackets++] = XFAKEEOF;
14004 if(yyparse(gramtype) && !PL_parser->error_count)
14005 qerror(Perl_mess(aTHX_ "Parse error"));
14006}
14007
e53d8f76
Z
14008#define parse_recdescent_for_op(g) S_parse_recdescent_for_op(aTHX_ g)
14009static OP *
14010S_parse_recdescent_for_op(pTHX_ int gramtype)
14011{
14012 OP *o;
14013 ENTER;
14014 SAVEVPTR(PL_eval_root);
14015 PL_eval_root = NULL;
14016 parse_recdescent(gramtype);
14017 o = PL_eval_root;
14018 LEAVE;
14019 return o;
14020}
14021
14022/*
14023=for apidoc Amx|OP *|parse_block|U32 flags
14024
14025Parse a single complete Perl code block. This consists of an opening
14026brace, a sequence of statements, and a closing brace. The block
14027constitutes a lexical scope, so C<my> variables and various compile-time
14028effects can be contained within it. It is up to the caller to ensure
14029that the dynamic parser state (L</PL_parser> et al) is correctly set to
14030reflect the source of the code to be parsed and the lexical context for
14031the statement.
14032
14033The op tree representing the code block is returned. This is always a
14034real op, never a null pointer. It will normally be a C<lineseq> list,
14035including C<nextstate> or equivalent ops. No ops to construct any kind
14036of runtime scope are included by virtue of it being a block.
14037
14038If an error occurs in parsing or compilation, in most cases a valid op
14039tree (most likely null) is returned anyway. The error is reflected in
14040the parser state, normally resulting in a single exception at the top
14041level of parsing which covers all the compilation errors that occurred.
14042Some compilation errors, however, will throw an exception immediately.
14043
14044The I<flags> parameter is reserved for future use, and must always
14045be zero.
14046
14047=cut
14048*/
14049
14050OP *
14051Perl_parse_block(pTHX_ U32 flags)
14052{
14053 if (flags)
14054 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_block");
14055 return parse_recdescent_for_op(GRAMBLOCK);
14056}
14057
1da4ca5f 14058/*
8359b381
Z
14059=for apidoc Amx|OP *|parse_barestmt|U32 flags
14060
14061Parse a single unadorned Perl statement. This may be a normal imperative
14062statement or a declaration that has compile-time effect. It does not
14063include any label or other affixture. It is up to the caller to ensure
14064that the dynamic parser state (L</PL_parser> et al) is correctly set to
14065reflect the source of the code to be parsed and the lexical context for
14066the statement.
14067
14068The op tree representing the statement is returned. This may be a
14069null pointer if the statement is null, for example if it was actually
14070a subroutine definition (which has compile-time side effects). If not
14071null, it will be ops directly implementing the statement, suitable to
14072pass to L</newSTATEOP>. It will not normally include a C<nextstate> or
14073equivalent op (except for those embedded in a scope contained entirely
14074within the statement).
14075
14076If an error occurs in parsing or compilation, in most cases a valid op
14077tree (most likely null) is returned anyway. The error is reflected in
14078the parser state, normally resulting in a single exception at the top
14079level of parsing which covers all the compilation errors that occurred.
14080Some compilation errors, however, will throw an exception immediately.
14081
14082The I<flags> parameter is reserved for future use, and must always
14083be zero.
14084
14085=cut
14086*/
14087
14088OP *
14089Perl_parse_barestmt(pTHX_ U32 flags)
14090{
14091 if (flags)
14092 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_barestmt");
14093 return parse_recdescent_for_op(GRAMBARESTMT);
14094}
14095
14096/*
361d9b55
Z
14097=for apidoc Amx|SV *|parse_label|U32 flags
14098
14099Parse a single label, possibly optional, of the type that may prefix a
14100Perl statement. It is up to the caller to ensure that the dynamic parser
14101state (L</PL_parser> et al) is correctly set to reflect the source of
14102the code to be parsed. If I<flags> includes C<PARSE_OPTIONAL> then the
14103label is optional, otherwise it is mandatory.
14104
14105The name of the label is returned in the form of a fresh scalar. If an
14106optional label is absent, a null pointer is returned.
14107
14108If an error occurs in parsing, which can only occur if the label is
14109mandatory, a valid label is returned anyway. The error is reflected in
14110the parser state, normally resulting in a single exception at the top
14111level of parsing which covers all the compilation errors that occurred.
14112
14113=cut
14114*/
14115
14116SV *
14117Perl_parse_label(pTHX_ U32 flags)
14118{
14119 if (flags & ~PARSE_OPTIONAL)
14120 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_label");
14121 if (PL_lex_state == LEX_KNOWNEXT) {
14122 PL_parser->yychar = yylex();
14123 if (PL_parser->yychar == LABEL) {
14124 char *lpv = pl_yylval.pval;
14125 STRLEN llen = strlen(lpv);
14126 SV *lsv;
14127 PL_parser->yychar = YYEMPTY;
14128 lsv = newSV_type(SVt_PV);
14129 SvPV_set(lsv, lpv);
14130 SvCUR_set(lsv, llen);
14131 SvLEN_set(lsv, llen+1);
14132 SvPOK_on(lsv);
14133 return lsv;
14134 } else {
14135 yyunlex();
14136 goto no_label;
14137 }
14138 } else {
14139 char *s, *t;
14140 U8 c;
14141 STRLEN wlen, bufptr_pos;
14142 lex_read_space(0);
14143 t = s = PL_bufptr;
14144 c = (U8)*s;
14145 if (!isIDFIRST_A(c))
14146 goto no_label;
14147 do {
14148 c = (U8)*++t;
14149 } while(isWORDCHAR_A(c));
14150 wlen = t - s;
14151 if (word_takes_any_delimeter(s, wlen))
14152 goto no_label;
14153 bufptr_pos = s - SvPVX(PL_linestr);
14154 PL_bufptr = t;
14155 lex_read_space(LEX_KEEP_PREVIOUS);
14156 t = PL_bufptr;
14157 s = SvPVX(PL_linestr) + bufptr_pos;
14158 if (t[0] == ':' && t[1] != ':') {
14159 PL_oldoldbufptr = PL_oldbufptr;
14160 PL_oldbufptr = s;
14161 PL_bufptr = t+1;
14162 return newSVpvn(s, wlen);
14163 } else {
14164 PL_bufptr = s;
14165 no_label:
14166 if (flags & PARSE_OPTIONAL) {
14167 return NULL;
14168 } else {
14169 qerror(Perl_mess(aTHX_ "Parse error"));
14170 return newSVpvs("x");
14171 }
14172 }
14173 }
14174}
14175
14176/*
28ac2b49
Z
14177=for apidoc Amx|OP *|parse_fullstmt|U32 flags
14178
14179Parse a single complete Perl statement. This may be a normal imperative
8359b381
Z
14180statement or a declaration that has compile-time effect, and may include
14181an optional label. It is up to the caller to ensure that the dynamic
28ac2b49
Z
14182parser state (L</PL_parser> et al) is correctly set to reflect the source
14183of the code to be parsed and the lexical context for the statement.
14184
14185The op tree representing the statement is returned. This may be a
14186null pointer if the statement is null, for example if it was actually
14187a subroutine definition (which has compile-time side effects). If not
14188null, it will be the result of a L</newSTATEOP> call, normally including
14189a C<nextstate> or equivalent op.
14190
14191If an error occurs in parsing or compilation, in most cases a valid op
14192tree (most likely null) is returned anyway. The error is reflected in
14193the parser state, normally resulting in a single exception at the top
14194level of parsing which covers all the compilation errors that occurred.
14195Some compilation errors, however, will throw an exception immediately.
14196
14197The I<flags> parameter is reserved for future use, and must always
14198be zero.
14199
14200=cut
14201*/
14202
14203OP *
14204Perl_parse_fullstmt(pTHX_ U32 flags)
14205{
28ac2b49
Z
14206 if (flags)
14207 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
e53d8f76 14208 return parse_recdescent_for_op(GRAMFULLSTMT);
28ac2b49
Z
14209}
14210
07ffcb73
Z
14211/*
14212=for apidoc Amx|OP *|parse_stmtseq|U32 flags
14213
14214Parse a sequence of zero or more Perl statements. These may be normal
14215imperative statements, including optional labels, or declarations
14216that have compile-time effect, or any mixture thereof. The statement
14217sequence ends when a closing brace or end-of-file is encountered in a
14218place where a new statement could have validly started. It is up to
14219the caller to ensure that the dynamic parser state (L</PL_parser> et al)
14220is correctly set to reflect the source of the code to be parsed and the
14221lexical context for the statements.
14222
14223The op tree representing the statement sequence is returned. This may
14224be a null pointer if the statements were all null, for example if there
14225were no statements or if there were only subroutine definitions (which
14226have compile-time side effects). If not null, it will be a C<lineseq>
14227list, normally including C<nextstate> or equivalent ops.
14228
14229If an error occurs in parsing or compilation, in most cases a valid op
14230tree is returned anyway. The error is reflected in the parser state,
14231normally resulting in a single exception at the top level of parsing
14232which covers all the compilation errors that occurred. Some compilation
14233errors, however, will throw an exception immediately.
14234
14235The I<flags> parameter is reserved for future use, and must always
14236be zero.
14237
14238=cut
14239*/
14240
14241OP *
14242Perl_parse_stmtseq(pTHX_ U32 flags)
14243{
14244 OP *stmtseqop;
e53d8f76 14245 I32 c;
07ffcb73
Z
14246 if (flags)
14247 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
e53d8f76
Z
14248 stmtseqop = parse_recdescent_for_op(GRAMSTMTSEQ);
14249 c = lex_peek_unichar(0);
14250 if (c != -1 && c != /*{*/'}')
07ffcb73 14251 qerror(Perl_mess(aTHX_ "Parse error"));
07ffcb73
Z
14252 return stmtseqop;
14253}
14254
ea25a9b2 14255void
f7e3d326 14256Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 14257{
f7e3d326 14258 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2
Z
14259 deprecate("qw(...) as parentheses");
14260 force_next(')');
14261 if (qwlist->op_type == OP_STUB) {
14262 op_free(qwlist);
14263 }
14264 else {
3d8e05a0 14265 start_force(PL_curforce);
ea25a9b2
Z
14266 NEXTVAL_NEXTTOKE.opval = qwlist;
14267 force_next(THING);
14268 }
14269 force_next('(');
14270}
14271
28ac2b49 14272/*
1da4ca5f
NC
14273 * Local variables:
14274 * c-indentation-style: bsd
14275 * c-basic-offset: 4
14276 * indent-tabs-mode: t
14277 * End:
14278 *
37442d52
RGS
14279 * ex: set ts=8 sts=4 sw=4 noet:
14280 */