This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Document that interpolating a '(??{ code })' construct in a regular
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
10efb74f 717 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 718 parser->linestr = newSVsv(line);
10efb74f 719 if (s[len-1] != ';')
bdc0bf6f 720 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
721 } else {
722 SvTEMP_off(line);
723 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 724 parser->linestr = line;
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
79072805 732}
a687059c 733
e3abe207
DM
734
735/* delete a parser object */
736
737void
738Perl_parser_free(pTHX_ const yy_parser *parser)
739{
7918f24d
NC
740 PERL_ARGS_ASSERT_PARSER_FREE;
741
7c4baf47 742 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
743 SvREFCNT_dec(parser->linestr);
744
2f9285f8
DM
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
799361c3
SH
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 749 PerlIO_close(parser->rsfp);
5486870f 750 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 751
e3abe207
DM
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
756 Safefree(parser);
757}
758
759
ffb4593c
NT
760/*
761 * Perl_lex_end
9cbb5ea2
GS
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
ffb4593c
NT
764 */
765
463ee0b2 766void
864dbfa3 767Perl_lex_end(pTHX)
463ee0b2 768{
97aff369 769 dVAR;
3280af22 770 PL_doextract = FALSE;
463ee0b2
LW
771}
772
ffb4593c 773/*
f0e67a1d
Z
774=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
775
776Buffer scalar containing the chunk currently under consideration of the
777text currently being lexed. This is always a plain string scalar (for
778which C<SvPOK> is true). It is not intended to be used as a scalar by
779normal scalar means; instead refer to the buffer directly by the pointer
780variables described below.
781
782The lexer maintains various C<char*> pointers to things in the
783C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784reallocated, all of these pointers must be updated. Don't attempt to
785do this manually, but rather use L</lex_grow_linestr> if you need to
786reallocate the buffer.
787
788The content of the text chunk in the buffer is commonly exactly one
789complete line of input, up to and including a newline terminator,
790but there are situations where it is otherwise. The octets of the
791buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793flag on this scalar, which may disagree with it.
794
795For direct examination of the buffer, the variable
796L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798of these pointers is usually preferable to examination of the scalar
799through normal scalar means.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufend
802
803Direct pointer to the end of the chunk of text currently being lexed, the
804end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806always located at the end of the buffer, and does not count as part of
807the buffer's contents.
808
809=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
810
811Points to the current position of lexing inside the lexer buffer.
812Characters around this point may be freely examined, within
813the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
816
817Lexing code (whether in the Perl core or not) moves this pointer past
818the characters that it consumes. It is also expected to perform some
819bookkeeping whenever a newline character is consumed. This movement
820can be more conveniently performed by the function L</lex_read_to>,
821which handles newlines appropriately.
822
823Interpretation of the buffer's octets can be abstracted out by
824using the slightly higher-level functions L</lex_peek_unichar> and
825L</lex_read_unichar>.
826
827=for apidoc AmxU|char *|PL_parser-E<gt>linestart
828
829Points to the start of the current line inside the lexer buffer.
830This is useful for indicating at which column an error occurred, and
831not much else. This must be updated by any lexing code that consumes
832a newline; the function L</lex_read_to> handles this detail.
833
834=cut
835*/
836
837/*
838=for apidoc Amx|bool|lex_bufutf8
839
840Indicates whether the octets in the lexer buffer
841(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842of Unicode characters. If not, they should be interpreted as Latin-1
843characters. This is analogous to the C<SvUTF8> flag for scalars.
844
845In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846contains valid UTF-8. Lexing code must be robust in the face of invalid
847encoding.
848
849The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850is significant, but not the whole story regarding the input character
851encoding. Normally, when a file is being read, the scalar contains octets
852and its C<SvUTF8> flag is off, but the octets should be interpreted as
853UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854however, the scalar may have the C<SvUTF8> flag on, and in this case its
855octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856is in effect. This logic may change in the future; use this function
857instead of implementing the logic yourself.
858
859=cut
860*/
861
862bool
863Perl_lex_bufutf8(pTHX)
864{
865 return UTF;
866}
867
868/*
869=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
870
871Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872at least I<len> octets (including terminating NUL). Returns a
873pointer to the reallocated buffer. This is necessary before making
874any direct modification of the buffer that would increase its length.
875L</lex_stuff_pvn> provides a more convenient way to insert text into
876the buffer.
877
878Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879this function updates all of the lexer's variables that point directly
880into the buffer.
881
882=cut
883*/
884
885char *
886Perl_lex_grow_linestr(pTHX_ STRLEN len)
887{
888 SV *linestr;
889 char *buf;
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
895 return buf;
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
913 return buf;
914}
915
916/*
917=for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
918
919Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921reallocating the buffer if necessary. This means that lexing code that
922runs later will see the characters as if they had appeared in the input.
923It is not recommended to do this as part of normal parsing, and most
924uses of this facility run the risk of the inserted characters being
925interpreted in an unintended manner.
926
927The string to be inserted is represented by I<len> octets starting
928at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930The characters are recoded for the lexer buffer, according to how the
931buffer is currently being interpreted (L</lex_bufutf8>). If a string
932to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933function is more convenient.
934
935=cut
936*/
937
938void
939Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
940{
941 char *bufptr;
942 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
943 if (flags & ~(LEX_STUFF_UTF8))
944 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
945 if (UTF) {
946 if (flags & LEX_STUFF_UTF8) {
947 goto plain_copy;
948 } else {
949 STRLEN highhalf = 0;
950 char *p, *e = pv+len;
951 for (p = pv; p != e; p++)
952 highhalf += !!(((U8)*p) & 0x80);
953 if (!highhalf)
954 goto plain_copy;
955 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
956 bufptr = PL_parser->bufptr;
957 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
958 PL_parser->bufend += len+highhalf;
959 for (p = pv; p != e; p++) {
960 U8 c = (U8)*p;
961 if (c & 0x80) {
962 *bufptr++ = (char)(0xc0 | (c >> 6));
963 *bufptr++ = (char)(0x80 | (c & 0x3f));
964 } else {
965 *bufptr++ = (char)c;
966 }
967 }
968 }
969 } else {
970 if (flags & LEX_STUFF_UTF8) {
971 STRLEN highhalf = 0;
972 char *p, *e = pv+len;
973 for (p = pv; p != e; p++) {
974 U8 c = (U8)*p;
975 if (c >= 0xc4) {
976 Perl_croak(aTHX_ "Lexing code attempted to stuff "
977 "non-Latin-1 character into Latin-1 input");
978 } else if (c >= 0xc2 && p+1 != e &&
979 (((U8)p[1]) & 0xc0) == 0x80) {
980 p++;
981 highhalf++;
982 } else if (c >= 0x80) {
983 /* malformed UTF-8 */
984 ENTER;
985 SAVESPTR(PL_warnhook);
986 PL_warnhook = PERL_WARNHOOK_FATAL;
987 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
988 LEAVE;
989 }
990 }
991 if (!highhalf)
992 goto plain_copy;
993 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
994 bufptr = PL_parser->bufptr;
995 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
996 PL_parser->bufend += len-highhalf;
997 for (p = pv; p != e; p++) {
998 U8 c = (U8)*p;
999 if (c & 0x80) {
1000 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1001 p++;
1002 } else {
1003 *bufptr++ = (char)c;
1004 }
1005 }
1006 } else {
1007 plain_copy:
1008 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1009 bufptr = PL_parser->bufptr;
1010 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1011 PL_parser->bufend += len;
1012 Copy(pv, bufptr, len, char);
1013 }
1014 }
1015}
1016
1017/*
1018=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1019
1020Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1021immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1022reallocating the buffer if necessary. This means that lexing code that
1023runs later will see the characters as if they had appeared in the input.
1024It is not recommended to do this as part of normal parsing, and most
1025uses of this facility run the risk of the inserted characters being
1026interpreted in an unintended manner.
1027
1028The string to be inserted is the string value of I<sv>. The characters
1029are recoded for the lexer buffer, according to how the buffer is currently
1030being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1031not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1032need to construct a scalar.
1033
1034=cut
1035*/
1036
1037void
1038Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1039{
1040 char *pv;
1041 STRLEN len;
1042 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1043 if (flags)
1044 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1045 pv = SvPV(sv, len);
1046 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1047}
1048
1049/*
1050=for apidoc Amx|void|lex_unstuff|char *ptr
1051
1052Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1053I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1054This hides the discarded text from any lexing code that runs later,
1055as if the text had never appeared.
1056
1057This is not the normal way to consume lexed text. For that, use
1058L</lex_read_to>.
1059
1060=cut
1061*/
1062
1063void
1064Perl_lex_unstuff(pTHX_ char *ptr)
1065{
1066 char *buf, *bufend;
1067 STRLEN unstuff_len;
1068 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1069 buf = PL_parser->bufptr;
1070 if (ptr < buf)
1071 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1072 if (ptr == buf)
1073 return;
1074 bufend = PL_parser->bufend;
1075 if (ptr > bufend)
1076 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1077 unstuff_len = ptr - buf;
1078 Move(ptr, buf, bufend+1-ptr, char);
1079 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1080 PL_parser->bufend = bufend - unstuff_len;
1081}
1082
1083/*
1084=for apidoc Amx|void|lex_read_to|char *ptr
1085
1086Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1087to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1088performing the correct bookkeeping whenever a newline character is passed.
1089This is the normal way to consume lexed text.
1090
1091Interpretation of the buffer's octets can be abstracted out by
1092using the slightly higher-level functions L</lex_peek_unichar> and
1093L</lex_read_unichar>.
1094
1095=cut
1096*/
1097
1098void
1099Perl_lex_read_to(pTHX_ char *ptr)
1100{
1101 char *s;
1102 PERL_ARGS_ASSERT_LEX_READ_TO;
1103 s = PL_parser->bufptr;
1104 if (ptr < s || ptr > PL_parser->bufend)
1105 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1106 for (; s != ptr; s++)
1107 if (*s == '\n') {
1108 CopLINE_inc(PL_curcop);
1109 PL_parser->linestart = s+1;
1110 }
1111 PL_parser->bufptr = ptr;
1112}
1113
1114/*
1115=for apidoc Amx|void|lex_discard_to|char *ptr
1116
1117Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1118up to I<ptr>. The remaining content of the buffer will be moved, and
1119all pointers into the buffer updated appropriately. I<ptr> must not
1120be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1121it is not permitted to discard text that has yet to be lexed.
1122
1123Normally it is not necessarily to do this directly, because it suffices to
1124use the implicit discarding behaviour of L</lex_next_chunk> and things
1125based on it. However, if a token stretches across multiple lines,
1126and the lexing code has kept multiple lines of text in the buffer fof
1127that purpose, then after completion of the token it would be wise to
1128explicitly discard the now-unneeded earlier lines, to avoid future
1129multi-line tokens growing the buffer without bound.
1130
1131=cut
1132*/
1133
1134void
1135Perl_lex_discard_to(pTHX_ char *ptr)
1136{
1137 char *buf;
1138 STRLEN discard_len;
1139 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1140 buf = SvPVX(PL_parser->linestr);
1141 if (ptr < buf)
1142 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1143 if (ptr == buf)
1144 return;
1145 if (ptr > PL_parser->bufptr)
1146 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1147 discard_len = ptr - buf;
1148 if (PL_parser->oldbufptr < ptr)
1149 PL_parser->oldbufptr = ptr;
1150 if (PL_parser->oldoldbufptr < ptr)
1151 PL_parser->oldoldbufptr = ptr;
1152 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1153 PL_parser->last_uni = NULL;
1154 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1155 PL_parser->last_lop = NULL;
1156 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1157 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1158 PL_parser->bufend -= discard_len;
1159 PL_parser->bufptr -= discard_len;
1160 PL_parser->oldbufptr -= discard_len;
1161 PL_parser->oldoldbufptr -= discard_len;
1162 if (PL_parser->last_uni)
1163 PL_parser->last_uni -= discard_len;
1164 if (PL_parser->last_lop)
1165 PL_parser->last_lop -= discard_len;
1166}
1167
1168/*
1169=for apidoc Amx|bool|lex_next_chunk|U32 flags
1170
1171Reads in the next chunk of text to be lexed, appending it to
1172L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1173looked to the end of the current chunk and wants to know more. It is
1174usual, but not necessary, for lexing to have consumed the entirety of
1175the current chunk at this time.
1176
1177If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1178chunk (i.e., the current chunk has been entirely consumed), normally the
1179current chunk will be discarded at the same time that the new chunk is
1180read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1181will not be discarded. If the current chunk has not been entirely
1182consumed, then it will not be discarded regardless of the flag.
1183
1184Returns true if some new text was added to the buffer, or false if the
1185buffer has reached the end of the input text.
1186
1187=cut
1188*/
1189
1190#define LEX_FAKE_EOF 0x80000000
1191
1192bool
1193Perl_lex_next_chunk(pTHX_ U32 flags)
1194{
1195 SV *linestr;
1196 char *buf;
1197 STRLEN old_bufend_pos, new_bufend_pos;
1198 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1199 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1200 bool got_some_for_debugger = 0;
f0e67a1d
Z
1201 bool got_some;
1202 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1203 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1204 linestr = PL_parser->linestr;
1205 buf = SvPVX(linestr);
1206 if (!(flags & LEX_KEEP_PREVIOUS) &&
1207 PL_parser->bufptr == PL_parser->bufend) {
1208 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1209 linestart_pos = 0;
1210 if (PL_parser->last_uni != PL_parser->bufend)
1211 PL_parser->last_uni = NULL;
1212 if (PL_parser->last_lop != PL_parser->bufend)
1213 PL_parser->last_lop = NULL;
1214 last_uni_pos = last_lop_pos = 0;
1215 *buf = 0;
1216 SvCUR(linestr) = 0;
1217 } else {
1218 old_bufend_pos = PL_parser->bufend - buf;
1219 bufptr_pos = PL_parser->bufptr - buf;
1220 oldbufptr_pos = PL_parser->oldbufptr - buf;
1221 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1222 linestart_pos = PL_parser->linestart - buf;
1223 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1224 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1225 }
1226 if (flags & LEX_FAKE_EOF) {
1227 goto eof;
1228 } else if (!PL_parser->rsfp) {
1229 got_some = 0;
1230 } else if (filter_gets(linestr, old_bufend_pos)) {
1231 got_some = 1;
17cc9359 1232 got_some_for_debugger = 1;
f0e67a1d 1233 } else {
580561a3
Z
1234 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1235 sv_setpvs(linestr, "");
f0e67a1d
Z
1236 eof:
1237 /* End of real input. Close filehandle (unless it was STDIN),
1238 * then add implicit termination.
1239 */
1240 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1241 PerlIO_clearerr(PL_parser->rsfp);
1242 else if (PL_parser->rsfp)
1243 (void)PerlIO_close(PL_parser->rsfp);
1244 PL_parser->rsfp = NULL;
1245 PL_doextract = FALSE;
1246#ifdef PERL_MAD
1247 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1248 PL_faketokens = 1;
1249#endif
1250 if (!PL_in_eval && PL_minus_p) {
1251 sv_catpvs(linestr,
1252 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1253 PL_minus_n = PL_minus_p = 0;
1254 } else if (!PL_in_eval && PL_minus_n) {
1255 sv_catpvs(linestr, /*{*/";}");
1256 PL_minus_n = 0;
1257 } else
1258 sv_catpvs(linestr, ";");
1259 got_some = 1;
1260 }
1261 buf = SvPVX(linestr);
1262 new_bufend_pos = SvCUR(linestr);
1263 PL_parser->bufend = buf + new_bufend_pos;
1264 PL_parser->bufptr = buf + bufptr_pos;
1265 PL_parser->oldbufptr = buf + oldbufptr_pos;
1266 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1267 PL_parser->linestart = buf + linestart_pos;
1268 if (PL_parser->last_uni)
1269 PL_parser->last_uni = buf + last_uni_pos;
1270 if (PL_parser->last_lop)
1271 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1272 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1273 PL_curstash != PL_debstash) {
1274 /* debugger active and we're not compiling the debugger code,
1275 * so store the line into the debugger's array of lines
1276 */
1277 update_debugger_info(NULL, buf+old_bufend_pos,
1278 new_bufend_pos-old_bufend_pos);
1279 }
1280 return got_some;
1281}
1282
1283/*
1284=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1285
1286Looks ahead one (Unicode) character in the text currently being lexed.
1287Returns the codepoint (unsigned integer value) of the next character,
1288or -1 if lexing has reached the end of the input text. To consume the
1289peeked character, use L</lex_read_unichar>.
1290
1291If the next character is in (or extends into) the next chunk of input
1292text, the next chunk will be read in. Normally the current chunk will be
1293discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1294then the current chunk will not be discarded.
1295
1296If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1297is encountered, an exception is generated.
1298
1299=cut
1300*/
1301
1302I32
1303Perl_lex_peek_unichar(pTHX_ U32 flags)
1304{
1305 char *s, *bufend;
1306 if (flags & ~(LEX_KEEP_PREVIOUS))
1307 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308 s = PL_parser->bufptr;
1309 bufend = PL_parser->bufend;
1310 if (UTF) {
1311 U8 head;
1312 I32 unichar;
1313 STRLEN len, retlen;
1314 if (s == bufend) {
1315 if (!lex_next_chunk(flags))
1316 return -1;
1317 s = PL_parser->bufptr;
1318 bufend = PL_parser->bufend;
1319 }
1320 head = (U8)*s;
1321 if (!(head & 0x80))
1322 return head;
1323 if (head & 0x40) {
1324 len = PL_utf8skip[head];
1325 while ((STRLEN)(bufend-s) < len) {
1326 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327 break;
1328 s = PL_parser->bufptr;
1329 bufend = PL_parser->bufend;
1330 }
1331 }
1332 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333 if (retlen == (STRLEN)-1) {
1334 /* malformed UTF-8 */
1335 ENTER;
1336 SAVESPTR(PL_warnhook);
1337 PL_warnhook = PERL_WARNHOOK_FATAL;
1338 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339 LEAVE;
1340 }
1341 return unichar;
1342 } else {
1343 if (s == bufend) {
1344 if (!lex_next_chunk(flags))
1345 return -1;
1346 s = PL_parser->bufptr;
1347 }
1348 return (U8)*s;
1349 }
1350}
1351
1352/*
1353=for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355Reads the next (Unicode) character in the text currently being lexed.
1356Returns the codepoint (unsigned integer value) of the character read,
1357and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358if lexing has reached the end of the input text. To non-destructively
1359examine the next character, use L</lex_peek_unichar> instead.
1360
1361If the next character is in (or extends into) the next chunk of input
1362text, the next chunk will be read in. Normally the current chunk will be
1363discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364then the current chunk will not be discarded.
1365
1366If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367is encountered, an exception is generated.
1368
1369=cut
1370*/
1371
1372I32
1373Perl_lex_read_unichar(pTHX_ U32 flags)
1374{
1375 I32 c;
1376 if (flags & ~(LEX_KEEP_PREVIOUS))
1377 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378 c = lex_peek_unichar(flags);
1379 if (c != -1) {
1380 if (c == '\n')
1381 CopLINE_inc(PL_curcop);
1382 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383 }
1384 return c;
1385}
1386
1387/*
1388=for apidoc Amx|void|lex_read_space|U32 flags
1389
1390Reads optional spaces, in Perl style, in the text currently being
1391lexed. The spaces may include ordinary whitespace characters and
1392Perl-style comments. C<#line> directives are processed if encountered.
1393L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394at a non-space character (or the end of the input text).
1395
1396If spaces extend into the next chunk of input text, the next chunk will
1397be read in. Normally the current chunk will be discarded at the same
1398time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399chunk will not be discarded.
1400
1401=cut
1402*/
1403
1404void
1405Perl_lex_read_space(pTHX_ U32 flags)
1406{
1407 char *s, *bufend;
1408 bool need_incline = 0;
1409 if (flags & ~(LEX_KEEP_PREVIOUS))
1410 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1411#ifdef PERL_MAD
1412 if (PL_skipwhite) {
1413 sv_free(PL_skipwhite);
1414 PL_skipwhite = NULL;
1415 }
1416 if (PL_madskills)
1417 PL_skipwhite = newSVpvs("");
1418#endif /* PERL_MAD */
1419 s = PL_parser->bufptr;
1420 bufend = PL_parser->bufend;
1421 while (1) {
1422 char c = *s;
1423 if (c == '#') {
1424 do {
1425 c = *++s;
1426 } while (!(c == '\n' || (c == 0 && s == bufend)));
1427 } else if (c == '\n') {
1428 s++;
1429 PL_parser->linestart = s;
1430 if (s == bufend)
1431 need_incline = 1;
1432 else
1433 incline(s);
1434 } else if (isSPACE(c)) {
1435 s++;
1436 } else if (c == 0 && s == bufend) {
1437 bool got_more;
1438#ifdef PERL_MAD
1439 if (PL_madskills)
1440 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1441#endif /* PERL_MAD */
1442 PL_parser->bufptr = s;
1443 CopLINE_inc(PL_curcop);
1444 got_more = lex_next_chunk(flags);
1445 CopLINE_dec(PL_curcop);
1446 s = PL_parser->bufptr;
1447 bufend = PL_parser->bufend;
1448 if (!got_more)
1449 break;
1450 if (need_incline && PL_parser->rsfp) {
1451 incline(s);
1452 need_incline = 0;
1453 }
1454 } else {
1455 break;
1456 }
1457 }
1458#ifdef PERL_MAD
1459 if (PL_madskills)
1460 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1461#endif /* PERL_MAD */
1462 PL_parser->bufptr = s;
1463}
1464
1465/*
ffb4593c
NT
1466 * S_incline
1467 * This subroutine has nothing to do with tilting, whether at windmills
1468 * or pinball tables. Its name is short for "increment line". It
57843af0 1469 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1470 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1471 * # line 500 "foo.pm"
1472 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1473 */
1474
76e3520e 1475STATIC void
d9095cec 1476S_incline(pTHX_ const char *s)
463ee0b2 1477{
97aff369 1478 dVAR;
d9095cec
NC
1479 const char *t;
1480 const char *n;
1481 const char *e;
463ee0b2 1482
7918f24d
NC
1483 PERL_ARGS_ASSERT_INCLINE;
1484
57843af0 1485 CopLINE_inc(PL_curcop);
463ee0b2
LW
1486 if (*s++ != '#')
1487 return;
d4c19fe8
AL
1488 while (SPACE_OR_TAB(*s))
1489 s++;
73659bf1
GS
1490 if (strnEQ(s, "line", 4))
1491 s += 4;
1492 else
1493 return;
084592ab 1494 if (SPACE_OR_TAB(*s))
73659bf1 1495 s++;
4e553d73 1496 else
73659bf1 1497 return;
d4c19fe8
AL
1498 while (SPACE_OR_TAB(*s))
1499 s++;
463ee0b2
LW
1500 if (!isDIGIT(*s))
1501 return;
d4c19fe8 1502
463ee0b2
LW
1503 n = s;
1504 while (isDIGIT(*s))
1505 s++;
07714eb4 1506 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1507 return;
bf4acbe4 1508 while (SPACE_OR_TAB(*s))
463ee0b2 1509 s++;
73659bf1 1510 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1511 s++;
73659bf1
GS
1512 e = t + 1;
1513 }
463ee0b2 1514 else {
c35e046a
AL
1515 t = s;
1516 while (!isSPACE(*t))
1517 t++;
73659bf1 1518 e = t;
463ee0b2 1519 }
bf4acbe4 1520 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1521 e++;
1522 if (*e != '\n' && *e != '\0')
1523 return; /* false alarm */
1524
f4dd75d9 1525 if (t - s > 0) {
d9095cec 1526 const STRLEN len = t - s;
8a5ee598 1527#ifndef USE_ITHREADS
19bad673
NC
1528 SV *const temp_sv = CopFILESV(PL_curcop);
1529 const char *cf;
1530 STRLEN tmplen;
1531
1532 if (temp_sv) {
1533 cf = SvPVX(temp_sv);
1534 tmplen = SvCUR(temp_sv);
1535 } else {
1536 cf = NULL;
1537 tmplen = 0;
1538 }
1539
42d9b98d 1540 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1541 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1542 * to *{"::_<newfilename"} */
44867030
NC
1543 /* However, the long form of evals is only turned on by the
1544 debugger - usually they're "(eval %lu)" */
1545 char smallbuf[128];
1546 char *tmpbuf;
1547 GV **gvp;
d9095cec 1548 STRLEN tmplen2 = len;
798b63bc 1549 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1550 tmpbuf = smallbuf;
1551 else
2ae0db35 1552 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1553 tmpbuf[0] = '_';
1554 tmpbuf[1] = '<';
2ae0db35 1555 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1556 tmplen += 2;
8a5ee598
RGS
1557 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1558 if (gvp) {
44867030
NC
1559 char *tmpbuf2;
1560 GV *gv2;
1561
1562 if (tmplen2 + 2 <= sizeof smallbuf)
1563 tmpbuf2 = smallbuf;
1564 else
1565 Newx(tmpbuf2, tmplen2 + 2, char);
1566
1567 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1568 /* Either they malloc'd it, or we malloc'd it,
1569 so no prefix is present in ours. */
1570 tmpbuf2[0] = '_';
1571 tmpbuf2[1] = '<';
1572 }
1573
1574 memcpy(tmpbuf2 + 2, s, tmplen2);
1575 tmplen2 += 2;
1576
8a5ee598 1577 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1578 if (!isGV(gv2)) {
8a5ee598 1579 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1580 /* adjust ${"::_<newfilename"} to store the new file name */
1581 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1582 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1583 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1584 }
44867030
NC
1585
1586 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1587 }
e66cf94c 1588 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1589 }
8a5ee598 1590#endif
05ec9bb3 1591 CopFILE_free(PL_curcop);
d9095cec 1592 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1593 }
57843af0 1594 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1595}
1596
29595ff2 1597#ifdef PERL_MAD
cd81e915 1598/* skip space before PL_thistoken */
29595ff2
NC
1599
1600STATIC char *
1601S_skipspace0(pTHX_ register char *s)
1602{
7918f24d
NC
1603 PERL_ARGS_ASSERT_SKIPSPACE0;
1604
29595ff2
NC
1605 s = skipspace(s);
1606 if (!PL_madskills)
1607 return s;
cd81e915
NC
1608 if (PL_skipwhite) {
1609 if (!PL_thiswhite)
6b29d1f5 1610 PL_thiswhite = newSVpvs("");
cd81e915
NC
1611 sv_catsv(PL_thiswhite, PL_skipwhite);
1612 sv_free(PL_skipwhite);
1613 PL_skipwhite = 0;
1614 }
1615 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1616 return s;
1617}
1618
cd81e915 1619/* skip space after PL_thistoken */
29595ff2
NC
1620
1621STATIC char *
1622S_skipspace1(pTHX_ register char *s)
1623{
d4c19fe8 1624 const char *start = s;
29595ff2
NC
1625 I32 startoff = start - SvPVX(PL_linestr);
1626
7918f24d
NC
1627 PERL_ARGS_ASSERT_SKIPSPACE1;
1628
29595ff2
NC
1629 s = skipspace(s);
1630 if (!PL_madskills)
1631 return s;
1632 start = SvPVX(PL_linestr) + startoff;
cd81e915 1633 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1634 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1635 PL_thistoken = newSVpvn(tstart, start - tstart);
1636 }
1637 PL_realtokenstart = -1;
1638 if (PL_skipwhite) {
1639 if (!PL_nextwhite)
6b29d1f5 1640 PL_nextwhite = newSVpvs("");
cd81e915
NC
1641 sv_catsv(PL_nextwhite, PL_skipwhite);
1642 sv_free(PL_skipwhite);
1643 PL_skipwhite = 0;
29595ff2
NC
1644 }
1645 return s;
1646}
1647
1648STATIC char *
1649S_skipspace2(pTHX_ register char *s, SV **svp)
1650{
c35e046a
AL
1651 char *start;
1652 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1653 const I32 startoff = s - SvPVX(PL_linestr);
1654
7918f24d
NC
1655 PERL_ARGS_ASSERT_SKIPSPACE2;
1656
29595ff2
NC
1657 s = skipspace(s);
1658 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1659 if (!PL_madskills || !svp)
1660 return s;
1661 start = SvPVX(PL_linestr) + startoff;
cd81e915 1662 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1663 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1664 PL_thistoken = newSVpvn(tstart, start - tstart);
1665 PL_realtokenstart = -1;
29595ff2 1666 }
cd81e915 1667 if (PL_skipwhite) {
29595ff2 1668 if (!*svp)
6b29d1f5 1669 *svp = newSVpvs("");
cd81e915
NC
1670 sv_setsv(*svp, PL_skipwhite);
1671 sv_free(PL_skipwhite);
1672 PL_skipwhite = 0;
29595ff2
NC
1673 }
1674
1675 return s;
1676}
1677#endif
1678
80a702cd 1679STATIC void
15f169a1 1680S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1681{
1682 AV *av = CopFILEAVx(PL_curcop);
1683 if (av) {
b9f83d2f 1684 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1685 if (orig_sv)
1686 sv_setsv(sv, orig_sv);
1687 else
1688 sv_setpvn(sv, buf, len);
80a702cd
RGS
1689 (void)SvIOK_on(sv);
1690 SvIV_set(sv, 0);
1691 av_store(av, (I32)CopLINE(PL_curcop), sv);
1692 }
1693}
1694
ffb4593c
NT
1695/*
1696 * S_skipspace
1697 * Called to gobble the appropriate amount and type of whitespace.
1698 * Skips comments as well.
1699 */
1700
76e3520e 1701STATIC char *
cea2e8a9 1702S_skipspace(pTHX_ register char *s)
a687059c 1703{
5db06880 1704#ifdef PERL_MAD
f0e67a1d
Z
1705 char *start = s;
1706#endif /* PERL_MAD */
7918f24d 1707 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1708#ifdef PERL_MAD
cd81e915
NC
1709 if (PL_skipwhite) {
1710 sv_free(PL_skipwhite);
f0e67a1d 1711 PL_skipwhite = NULL;
5db06880 1712 }
f0e67a1d 1713#endif /* PERL_MAD */
3280af22 1714 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1715 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1716 s++;
f0e67a1d
Z
1717 } else if (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE) {
1718 while (isSPACE(*s) && *s != '\n')
1719 s++;
1720 if (*s == '#') {
1721 do {
463ee0b2 1722 s++;
f0e67a1d 1723 } while (s != PL_bufend && *s != '\n');
463ee0b2 1724 }
f0e67a1d
Z
1725 if (*s == '\n')
1726 s++;
1727 } else {
1728 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1729 PL_bufptr = s;
1730 lex_read_space(LEX_KEEP_PREVIOUS);
3280af22 1731 s = PL_bufptr;
f0e67a1d
Z
1732 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1733 if (PL_linestart > PL_bufptr)
1734 PL_bufptr = PL_linestart;
1735 return s;
463ee0b2 1736 }
5db06880 1737#ifdef PERL_MAD
f0e67a1d
Z
1738 if (PL_madskills)
1739 PL_skipwhite = newSVpvn(start, s-start);
1740#endif /* PERL_MAD */
5db06880 1741 return s;
a687059c 1742}
378cc40b 1743
ffb4593c
NT
1744/*
1745 * S_check_uni
1746 * Check the unary operators to ensure there's no ambiguity in how they're
1747 * used. An ambiguous piece of code would be:
1748 * rand + 5
1749 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1750 * the +5 is its argument.
1751 */
1752
76e3520e 1753STATIC void
cea2e8a9 1754S_check_uni(pTHX)
ba106d47 1755{
97aff369 1756 dVAR;
d4c19fe8
AL
1757 const char *s;
1758 const char *t;
2f3197b3 1759
3280af22 1760 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1761 return;
3280af22
NIS
1762 while (isSPACE(*PL_last_uni))
1763 PL_last_uni++;
c35e046a
AL
1764 s = PL_last_uni;
1765 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1766 s++;
3280af22 1767 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1768 return;
6136c704 1769
9b387841
NC
1770 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1771 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1772 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1773}
1774
ffb4593c
NT
1775/*
1776 * LOP : macro to build a list operator. Its behaviour has been replaced
1777 * with a subroutine, S_lop() for which LOP is just another name.
1778 */
1779
a0d0e21e
LW
1780#define LOP(f,x) return lop(f,x,s)
1781
ffb4593c
NT
1782/*
1783 * S_lop
1784 * Build a list operator (or something that might be one). The rules:
1785 * - if we have a next token, then it's a list operator [why?]
1786 * - if the next thing is an opening paren, then it's a function
1787 * - else it's a list operator
1788 */
1789
76e3520e 1790STATIC I32
a0be28da 1791S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1792{
97aff369 1793 dVAR;
7918f24d
NC
1794
1795 PERL_ARGS_ASSERT_LOP;
1796
6154021b 1797 pl_yylval.ival = f;
35c8bce7 1798 CLINE;
3280af22
NIS
1799 PL_expect = x;
1800 PL_bufptr = s;
1801 PL_last_lop = PL_oldbufptr;
eb160463 1802 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1803#ifdef PERL_MAD
1804 if (PL_lasttoke)
1805 return REPORT(LSTOP);
1806#else
3280af22 1807 if (PL_nexttoke)
bbf60fe6 1808 return REPORT(LSTOP);
5db06880 1809#endif
79072805 1810 if (*s == '(')
bbf60fe6 1811 return REPORT(FUNC);
29595ff2 1812 s = PEEKSPACE(s);
79072805 1813 if (*s == '(')
bbf60fe6 1814 return REPORT(FUNC);
79072805 1815 else
bbf60fe6 1816 return REPORT(LSTOP);
79072805
LW
1817}
1818
5db06880
NC
1819#ifdef PERL_MAD
1820 /*
1821 * S_start_force
1822 * Sets up for an eventual force_next(). start_force(0) basically does
1823 * an unshift, while start_force(-1) does a push. yylex removes items
1824 * on the "pop" end.
1825 */
1826
1827STATIC void
1828S_start_force(pTHX_ int where)
1829{
1830 int i;
1831
cd81e915 1832 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1833 where = PL_lasttoke;
cd81e915
NC
1834 assert(PL_curforce < 0 || PL_curforce == where);
1835 if (PL_curforce != where) {
5db06880
NC
1836 for (i = PL_lasttoke; i > where; --i) {
1837 PL_nexttoke[i] = PL_nexttoke[i-1];
1838 }
1839 PL_lasttoke++;
1840 }
cd81e915 1841 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1842 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1843 PL_curforce = where;
1844 if (PL_nextwhite) {
5db06880 1845 if (PL_madskills)
6b29d1f5 1846 curmad('^', newSVpvs(""));
cd81e915 1847 CURMAD('_', PL_nextwhite);
5db06880
NC
1848 }
1849}
1850
1851STATIC void
1852S_curmad(pTHX_ char slot, SV *sv)
1853{
1854 MADPROP **where;
1855
1856 if (!sv)
1857 return;
cd81e915
NC
1858 if (PL_curforce < 0)
1859 where = &PL_thismad;
5db06880 1860 else
cd81e915 1861 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1862
cd81e915 1863 if (PL_faketokens)
76f68e9b 1864 sv_setpvs(sv, "");
5db06880
NC
1865 else {
1866 if (!IN_BYTES) {
1867 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1868 SvUTF8_on(sv);
1869 else if (PL_encoding) {
1870 sv_recode_to_utf8(sv, PL_encoding);
1871 }
1872 }
1873 }
1874
1875 /* keep a slot open for the head of the list? */
1876 if (slot != '_' && *where && (*where)->mad_key == '^') {
1877 (*where)->mad_key = slot;
daba3364 1878 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1879 (*where)->mad_val = (void*)sv;
1880 }
1881 else
1882 addmad(newMADsv(slot, sv), where, 0);
1883}
1884#else
b3f24c00
MHM
1885# define start_force(where) NOOP
1886# define curmad(slot, sv) NOOP
5db06880
NC
1887#endif
1888
ffb4593c
NT
1889/*
1890 * S_force_next
9cbb5ea2 1891 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1892 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1893 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1894 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1895 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1896 */
1897
4e553d73 1898STATIC void
cea2e8a9 1899S_force_next(pTHX_ I32 type)
79072805 1900{
97aff369 1901 dVAR;
704d4215
GG
1902#ifdef DEBUGGING
1903 if (DEBUG_T_TEST) {
1904 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1905 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1906 }
1907#endif
5db06880 1908#ifdef PERL_MAD
cd81e915 1909 if (PL_curforce < 0)
5db06880 1910 start_force(PL_lasttoke);
cd81e915 1911 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1912 if (PL_lex_state != LEX_KNOWNEXT)
1913 PL_lex_defer = PL_lex_state;
1914 PL_lex_state = LEX_KNOWNEXT;
1915 PL_lex_expect = PL_expect;
cd81e915 1916 PL_curforce = -1;
5db06880 1917#else
3280af22
NIS
1918 PL_nexttype[PL_nexttoke] = type;
1919 PL_nexttoke++;
1920 if (PL_lex_state != LEX_KNOWNEXT) {
1921 PL_lex_defer = PL_lex_state;
1922 PL_lex_expect = PL_expect;
1923 PL_lex_state = LEX_KNOWNEXT;
79072805 1924 }
5db06880 1925#endif
79072805
LW
1926}
1927
d0a148a6 1928STATIC SV *
15f169a1 1929S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1930{
97aff369 1931 dVAR;
740cce10 1932 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1933 !IN_BYTES
1934 && UTF
1935 && !is_ascii_string((const U8*)start, len)
740cce10 1936 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1937 return sv;
1938}
1939
ffb4593c
NT
1940/*
1941 * S_force_word
1942 * When the lexer knows the next thing is a word (for instance, it has
1943 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1944 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1945 * lookahead.
ffb4593c
NT
1946 *
1947 * Arguments:
b1b65b59 1948 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1949 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1950 * int check_keyword : if true, Perl checks to make sure the word isn't
1951 * a keyword (do this if the word is a label, e.g. goto FOO)
1952 * int allow_pack : if true, : characters will also be allowed (require,
1953 * use, etc. do this)
9cbb5ea2 1954 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1955 */
1956
76e3520e 1957STATIC char *
cea2e8a9 1958S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1959{
97aff369 1960 dVAR;
463ee0b2
LW
1961 register char *s;
1962 STRLEN len;
4e553d73 1963
7918f24d
NC
1964 PERL_ARGS_ASSERT_FORCE_WORD;
1965
29595ff2 1966 start = SKIPSPACE1(start);
463ee0b2 1967 s = start;
7e2040f0 1968 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1969 (allow_pack && *s == ':') ||
15f0808c 1970 (allow_initial_tick && *s == '\'') )
a0d0e21e 1971 {
3280af22 1972 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1973 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1974 return start;
cd81e915 1975 start_force(PL_curforce);
5db06880
NC
1976 if (PL_madskills)
1977 curmad('X', newSVpvn(start,s-start));
463ee0b2 1978 if (token == METHOD) {
29595ff2 1979 s = SKIPSPACE1(s);
463ee0b2 1980 if (*s == '(')
3280af22 1981 PL_expect = XTERM;
463ee0b2 1982 else {
3280af22 1983 PL_expect = XOPERATOR;
463ee0b2 1984 }
79072805 1985 }
e74e6b3d 1986 if (PL_madskills)
63575281 1987 curmad('g', newSVpvs( "forced" ));
9ded7720 1988 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1989 = (OP*)newSVOP(OP_CONST,0,
1990 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1991 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1992 force_next(token);
1993 }
1994 return s;
1995}
1996
ffb4593c
NT
1997/*
1998 * S_force_ident
9cbb5ea2 1999 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2000 * text only contains the "foo" portion. The first argument is a pointer
2001 * to the "foo", and the second argument is the type symbol to prefix.
2002 * Forces the next token to be a "WORD".
9cbb5ea2 2003 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2004 */
2005
76e3520e 2006STATIC void
bfed75c6 2007S_force_ident(pTHX_ register const char *s, int kind)
79072805 2008{
97aff369 2009 dVAR;
7918f24d
NC
2010
2011 PERL_ARGS_ASSERT_FORCE_IDENT;
2012
c35e046a 2013 if (*s) {
90e5519e
NC
2014 const STRLEN len = strlen(s);
2015 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2016 start_force(PL_curforce);
9ded7720 2017 NEXTVAL_NEXTTOKE.opval = o;
79072805 2018 force_next(WORD);
748a9306 2019 if (kind) {
11343788 2020 o->op_private = OPpCONST_ENTERED;
55497cff 2021 /* XXX see note in pp_entereval() for why we forgo typo
2022 warnings if the symbol must be introduced in an eval.
2023 GSAR 96-10-12 */
90e5519e
NC
2024 gv_fetchpvn_flags(s, len,
2025 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2026 : GV_ADD,
2027 kind == '$' ? SVt_PV :
2028 kind == '@' ? SVt_PVAV :
2029 kind == '%' ? SVt_PVHV :
a0d0e21e 2030 SVt_PVGV
90e5519e 2031 );
748a9306 2032 }
79072805
LW
2033 }
2034}
2035
1571675a
GS
2036NV
2037Perl_str_to_version(pTHX_ SV *sv)
2038{
2039 NV retval = 0.0;
2040 NV nshift = 1.0;
2041 STRLEN len;
cfd0369c 2042 const char *start = SvPV_const(sv,len);
9d4ba2ae 2043 const char * const end = start + len;
504618e9 2044 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2045
2046 PERL_ARGS_ASSERT_STR_TO_VERSION;
2047
1571675a 2048 while (start < end) {
ba210ebe 2049 STRLEN skip;
1571675a
GS
2050 UV n;
2051 if (utf)
9041c2e3 2052 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2053 else {
2054 n = *(U8*)start;
2055 skip = 1;
2056 }
2057 retval += ((NV)n)/nshift;
2058 start += skip;
2059 nshift *= 1000;
2060 }
2061 return retval;
2062}
2063
4e553d73 2064/*
ffb4593c
NT
2065 * S_force_version
2066 * Forces the next token to be a version number.
e759cc13
RGS
2067 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2068 * and if "guessing" is TRUE, then no new token is created (and the caller
2069 * must use an alternative parsing method).
ffb4593c
NT
2070 */
2071
76e3520e 2072STATIC char *
e759cc13 2073S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2074{
97aff369 2075 dVAR;
5f66b61c 2076 OP *version = NULL;
44dcb63b 2077 char *d;
5db06880
NC
2078#ifdef PERL_MAD
2079 I32 startoff = s - SvPVX(PL_linestr);
2080#endif
89bfa8cd 2081
7918f24d
NC
2082 PERL_ARGS_ASSERT_FORCE_VERSION;
2083
29595ff2 2084 s = SKIPSPACE1(s);
89bfa8cd 2085
44dcb63b 2086 d = s;
dd629d5b 2087 if (*d == 'v')
44dcb63b 2088 d++;
44dcb63b 2089 if (isDIGIT(*d)) {
e759cc13
RGS
2090 while (isDIGIT(*d) || *d == '_' || *d == '.')
2091 d++;
5db06880
NC
2092#ifdef PERL_MAD
2093 if (PL_madskills) {
cd81e915 2094 start_force(PL_curforce);
5db06880
NC
2095 curmad('X', newSVpvn(s,d-s));
2096 }
2097#endif
9f3d182e 2098 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 2099 SV *ver;
6154021b
RGS
2100 s = scan_num(s, &pl_yylval);
2101 version = pl_yylval.opval;
dd629d5b
GS
2102 ver = cSVOPx(version)->op_sv;
2103 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2104 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2105 SvNV_set(ver, str_to_version(ver));
1571675a 2106 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2107 }
89bfa8cd 2108 }
5db06880
NC
2109 else if (guessing) {
2110#ifdef PERL_MAD
2111 if (PL_madskills) {
cd81e915
NC
2112 sv_free(PL_nextwhite); /* let next token collect whitespace */
2113 PL_nextwhite = 0;
5db06880
NC
2114 s = SvPVX(PL_linestr) + startoff;
2115 }
2116#endif
e759cc13 2117 return s;
5db06880 2118 }
89bfa8cd 2119 }
2120
5db06880
NC
2121#ifdef PERL_MAD
2122 if (PL_madskills && !version) {
cd81e915
NC
2123 sv_free(PL_nextwhite); /* let next token collect whitespace */
2124 PL_nextwhite = 0;
5db06880
NC
2125 s = SvPVX(PL_linestr) + startoff;
2126 }
2127#endif
89bfa8cd 2128 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2129 start_force(PL_curforce);
9ded7720 2130 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2131 force_next(WORD);
89bfa8cd 2132
e759cc13 2133 return s;
89bfa8cd 2134}
2135
ffb4593c
NT
2136/*
2137 * S_tokeq
2138 * Tokenize a quoted string passed in as an SV. It finds the next
2139 * chunk, up to end of string or a backslash. It may make a new
2140 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2141 * turns \\ into \.
2142 */
2143
76e3520e 2144STATIC SV *
cea2e8a9 2145S_tokeq(pTHX_ SV *sv)
79072805 2146{
97aff369 2147 dVAR;
79072805
LW
2148 register char *s;
2149 register char *send;
2150 register char *d;
b3ac6de7
IZ
2151 STRLEN len = 0;
2152 SV *pv = sv;
79072805 2153
7918f24d
NC
2154 PERL_ARGS_ASSERT_TOKEQ;
2155
79072805 2156 if (!SvLEN(sv))
b3ac6de7 2157 goto finish;
79072805 2158
a0d0e21e 2159 s = SvPV_force(sv, len);
21a311ee 2160 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2161 goto finish;
463ee0b2 2162 send = s + len;
79072805
LW
2163 while (s < send && *s != '\\')
2164 s++;
2165 if (s == send)
b3ac6de7 2166 goto finish;
79072805 2167 d = s;
be4731d2 2168 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2169 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2170 }
79072805
LW
2171 while (s < send) {
2172 if (*s == '\\') {
a0d0e21e 2173 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2174 s++; /* all that, just for this */
2175 }
2176 *d++ = *s++;
2177 }
2178 *d = '\0';
95a20fc0 2179 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2180 finish:
3280af22 2181 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2182 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2183 return sv;
2184}
2185
ffb4593c
NT
2186/*
2187 * Now come three functions related to double-quote context,
2188 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2189 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2190 * interact with PL_lex_state, and create fake ( ... ) argument lists
2191 * to handle functions and concatenation.
2192 * They assume that whoever calls them will be setting up a fake
2193 * join call, because each subthing puts a ',' after it. This lets
2194 * "lower \luPpEr"
2195 * become
2196 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2197 *
2198 * (I'm not sure whether the spurious commas at the end of lcfirst's
2199 * arguments and join's arguments are created or not).
2200 */
2201
2202/*
2203 * S_sublex_start
6154021b 2204 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2205 *
2206 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2207 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2208 *
2209 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2210 *
2211 * Everything else becomes a FUNC.
2212 *
2213 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2214 * had an OP_CONST or OP_READLINE). This just sets us up for a
2215 * call to S_sublex_push().
2216 */
2217
76e3520e 2218STATIC I32
cea2e8a9 2219S_sublex_start(pTHX)
79072805 2220{
97aff369 2221 dVAR;
6154021b 2222 register const I32 op_type = pl_yylval.ival;
79072805
LW
2223
2224 if (op_type == OP_NULL) {
6154021b 2225 pl_yylval.opval = PL_lex_op;
5f66b61c 2226 PL_lex_op = NULL;
79072805
LW
2227 return THING;
2228 }
2229 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2230 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2231
2232 if (SvTYPE(sv) == SVt_PVIV) {
2233 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2234 STRLEN len;
96a5add6 2235 const char * const p = SvPV_const(sv, len);
740cce10 2236 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2237 SvREFCNT_dec(sv);
2238 sv = nsv;
4e553d73 2239 }
6154021b 2240 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2241 PL_lex_stuff = NULL;
6f33ba73
RGS
2242 /* Allow <FH> // "foo" */
2243 if (op_type == OP_READLINE)
2244 PL_expect = XTERMORDORDOR;
79072805
LW
2245 return THING;
2246 }
e3f73d4e
RGS
2247 else if (op_type == OP_BACKTICK && PL_lex_op) {
2248 /* readpipe() vas overriden */
2249 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2250 pl_yylval.opval = PL_lex_op;
9b201d7d 2251 PL_lex_op = NULL;
e3f73d4e
RGS
2252 PL_lex_stuff = NULL;
2253 return THING;
2254 }
79072805 2255
3280af22 2256 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2257 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2258 PL_sublex_info.sub_op = PL_lex_op;
2259 PL_lex_state = LEX_INTERPPUSH;
55497cff 2260
3280af22
NIS
2261 PL_expect = XTERM;
2262 if (PL_lex_op) {
6154021b 2263 pl_yylval.opval = PL_lex_op;
5f66b61c 2264 PL_lex_op = NULL;
55497cff 2265 return PMFUNC;
2266 }
2267 else
2268 return FUNC;
2269}
2270
ffb4593c
NT
2271/*
2272 * S_sublex_push
2273 * Create a new scope to save the lexing state. The scope will be
2274 * ended in S_sublex_done. Returns a '(', starting the function arguments
2275 * to the uc, lc, etc. found before.
2276 * Sets PL_lex_state to LEX_INTERPCONCAT.
2277 */
2278
76e3520e 2279STATIC I32
cea2e8a9 2280S_sublex_push(pTHX)
55497cff 2281{
27da23d5 2282 dVAR;
f46d017c 2283 ENTER;
55497cff 2284
3280af22 2285 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2286 SAVEBOOL(PL_lex_dojoin);
3280af22 2287 SAVEI32(PL_lex_brackets);
3280af22
NIS
2288 SAVEI32(PL_lex_casemods);
2289 SAVEI32(PL_lex_starts);
651b5b28 2290 SAVEI8(PL_lex_state);
7766f137 2291 SAVEVPTR(PL_lex_inpat);
98246f1e 2292 SAVEI16(PL_lex_inwhat);
57843af0 2293 SAVECOPLINE(PL_curcop);
3280af22 2294 SAVEPPTR(PL_bufptr);
8452ff4b 2295 SAVEPPTR(PL_bufend);
3280af22
NIS
2296 SAVEPPTR(PL_oldbufptr);
2297 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2298 SAVEPPTR(PL_last_lop);
2299 SAVEPPTR(PL_last_uni);
3280af22
NIS
2300 SAVEPPTR(PL_linestart);
2301 SAVESPTR(PL_linestr);
8edd5f42
RGS
2302 SAVEGENERICPV(PL_lex_brackstack);
2303 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2304
2305 PL_linestr = PL_lex_stuff;
a0714e2c 2306 PL_lex_stuff = NULL;
3280af22 2307
9cbb5ea2
GS
2308 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2309 = SvPVX(PL_linestr);
3280af22 2310 PL_bufend += SvCUR(PL_linestr);
bd61b366 2311 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2312 SAVEFREESV(PL_linestr);
2313
2314 PL_lex_dojoin = FALSE;
2315 PL_lex_brackets = 0;
a02a5408
JC
2316 Newx(PL_lex_brackstack, 120, char);
2317 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2318 PL_lex_casemods = 0;
2319 *PL_lex_casestack = '\0';
2320 PL_lex_starts = 0;
2321 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2322 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2323
2324 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2325 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2326 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2327 else
5f66b61c 2328 PL_lex_inpat = NULL;
79072805 2329
55497cff 2330 return '(';
79072805
LW
2331}
2332
ffb4593c
NT
2333/*
2334 * S_sublex_done
2335 * Restores lexer state after a S_sublex_push.
2336 */
2337
76e3520e 2338STATIC I32
cea2e8a9 2339S_sublex_done(pTHX)
79072805 2340{
27da23d5 2341 dVAR;
3280af22 2342 if (!PL_lex_starts++) {
396482e1 2343 SV * const sv = newSVpvs("");
9aa983d2
JH
2344 if (SvUTF8(PL_linestr))
2345 SvUTF8_on(sv);
3280af22 2346 PL_expect = XOPERATOR;
6154021b 2347 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2348 return THING;
2349 }
2350
3280af22
NIS
2351 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2352 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2353 return yylex();
79072805
LW
2354 }
2355
ffb4593c 2356 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2357 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2358 PL_linestr = PL_lex_repl;
2359 PL_lex_inpat = 0;
2360 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2361 PL_bufend += SvCUR(PL_linestr);
bd61b366 2362 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2363 SAVEFREESV(PL_linestr);
2364 PL_lex_dojoin = FALSE;
2365 PL_lex_brackets = 0;
3280af22
NIS
2366 PL_lex_casemods = 0;
2367 *PL_lex_casestack = '\0';
2368 PL_lex_starts = 0;
25da4f38 2369 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2370 PL_lex_state = LEX_INTERPNORMAL;
2371 PL_lex_starts++;
e9fa98b2
HS
2372 /* we don't clear PL_lex_repl here, so that we can check later
2373 whether this is an evalled subst; that means we rely on the
2374 logic to ensure sublex_done() is called again only via the
2375 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2376 }
e9fa98b2 2377 else {
3280af22 2378 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2379 PL_lex_repl = NULL;
e9fa98b2 2380 }
79072805 2381 return ',';
ffed7fef
LW
2382 }
2383 else {
5db06880
NC
2384#ifdef PERL_MAD
2385 if (PL_madskills) {
cd81e915
NC
2386 if (PL_thiswhite) {
2387 if (!PL_endwhite)
6b29d1f5 2388 PL_endwhite = newSVpvs("");
cd81e915
NC
2389 sv_catsv(PL_endwhite, PL_thiswhite);
2390 PL_thiswhite = 0;
2391 }
2392 if (PL_thistoken)
76f68e9b 2393 sv_setpvs(PL_thistoken,"");
5db06880 2394 else
cd81e915 2395 PL_realtokenstart = -1;
5db06880
NC
2396 }
2397#endif
f46d017c 2398 LEAVE;
3280af22
NIS
2399 PL_bufend = SvPVX(PL_linestr);
2400 PL_bufend += SvCUR(PL_linestr);
2401 PL_expect = XOPERATOR;
09bef843 2402 PL_sublex_info.sub_inwhat = 0;
79072805 2403 return ')';
ffed7fef
LW
2404 }
2405}
2406
02aa26ce
NT
2407/*
2408 scan_const
2409
2410 Extracts a pattern, double-quoted string, or transliteration. This
2411 is terrifying code.
2412
94def140 2413 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2414 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2415 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2416
94def140
TS
2417 Returns a pointer to the character scanned up to. If this is
2418 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2419 successfully parsed), will leave an OP for the substring scanned
6154021b 2420 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2421 by looking at the next characters herself.
2422
02aa26ce
NT
2423 In patterns:
2424 backslashes:
2425 double-quoted style: \r and \n
2426 regexp special ones: \D \s
94def140
TS
2427 constants: \x31
2428 backrefs: \1
02aa26ce
NT
2429 case and quoting: \U \Q \E
2430 stops on @ and $, but not for $ as tail anchor
2431
2432 In transliterations:
2433 characters are VERY literal, except for - not at the start or end
94def140
TS
2434 of the string, which indicates a range. If the range is in bytes,
2435 scan_const expands the range to the full set of intermediate
2436 characters. If the range is in utf8, the hyphen is replaced with
2437 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2438
2439 In double-quoted strings:
2440 backslashes:
2441 double-quoted style: \r and \n
94def140
TS
2442 constants: \x31
2443 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2444 case and quoting: \U \Q \E
2445 stops on @ and $
2446
2447 scan_const does *not* construct ops to handle interpolated strings.
2448 It stops processing as soon as it finds an embedded $ or @ variable
2449 and leaves it to the caller to work out what's going on.
2450
94def140
TS
2451 embedded arrays (whether in pattern or not) could be:
2452 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2453
2454 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2455
2456 $ in pattern could be $foo or could be tail anchor. Assumption:
2457 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2458 followed by one of "()| \r\n\t"
02aa26ce
NT
2459
2460 \1 (backreferences) are turned into $1
2461
2462 The structure of the code is
2463 while (there's a character to process) {
94def140
TS
2464 handle transliteration ranges
2465 skip regexp comments /(?#comment)/ and codes /(?{code})/
2466 skip #-initiated comments in //x patterns
2467 check for embedded arrays
02aa26ce
NT
2468 check for embedded scalars
2469 if (backslash) {
94def140
TS
2470 leave intact backslashes from leaveit (below)
2471 deprecate \1 in substitution replacements
02aa26ce
NT
2472 handle string-changing backslashes \l \U \Q \E, etc.
2473 switch (what was escaped) {
94def140
TS
2474 handle \- in a transliteration (becomes a literal -)
2475 handle \132 (octal characters)
2476 handle \x15 and \x{1234} (hex characters)
2477 handle \N{name} (named characters)
2478 handle \cV (control characters)
2479 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2480 } (end switch)
77a135fe 2481 continue
02aa26ce 2482 } (end if backslash)
77a135fe 2483 handle regular character
02aa26ce 2484 } (end while character to read)
4e553d73 2485
02aa26ce
NT
2486*/
2487
76e3520e 2488STATIC char *
cea2e8a9 2489S_scan_const(pTHX_ char *start)
79072805 2490{
97aff369 2491 dVAR;
3280af22 2492 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2493 SV *sv = newSV(send - start); /* sv for the constant. See
2494 note below on sizing. */
02aa26ce
NT
2495 register char *s = start; /* start of the constant */
2496 register char *d = SvPVX(sv); /* destination for copies */
2497 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2498 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2499 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2500 I32 this_utf8 = UTF; /* Is the source string assumed
2501 to be UTF8? But, this can
2502 show as true when the source
2503 isn't utf8, as for example
2504 when it is entirely composed
2505 of hex constants */
2506
2507 /* Note on sizing: The scanned constant is placed into sv, which is
2508 * initialized by newSV() assuming one byte of output for every byte of
2509 * input. This routine expects newSV() to allocate an extra byte for a
2510 * trailing NUL, which this routine will append if it gets to the end of
2511 * the input. There may be more bytes of input than output (eg., \N{LATIN
2512 * CAPITAL LETTER A}), or more output than input if the constant ends up
2513 * recoded to utf8, but each time a construct is found that might increase
2514 * the needed size, SvGROW() is called. Its size parameter each time is
2515 * based on the best guess estimate at the time, namely the length used so
2516 * far, plus the length the current construct will occupy, plus room for
2517 * the trailing NUL, plus one byte for every input byte still unscanned */
2518
012bcf8d 2519 UV uv;
4c3a8340
TS
2520#ifdef EBCDIC
2521 UV literal_endpoint = 0;
e294cc5d 2522 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2523#endif
012bcf8d 2524
7918f24d
NC
2525 PERL_ARGS_ASSERT_SCAN_CONST;
2526
2b9d42f0
NIS
2527 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2528 /* If we are doing a trans and we know we want UTF8 set expectation */
2529 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2530 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2531 }
2532
2533
79072805 2534 while (s < send || dorange) {
02aa26ce 2535 /* get transliterations out of the way (they're most literal) */
3280af22 2536 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2537 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2538 if (dorange) {
1ba5c669
JH
2539 I32 i; /* current expanded character */
2540 I32 min; /* first character in range */
2541 I32 max; /* last character in range */
02aa26ce 2542
e294cc5d
JH
2543#ifdef EBCDIC
2544 UV uvmax = 0;
2545#endif
2546
2547 if (has_utf8
2548#ifdef EBCDIC
2549 && !native_range
2550#endif
2551 ) {
9d4ba2ae 2552 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2553 char *e = d++;
2554 while (e-- > c)
2555 *(e + 1) = *e;
25716404 2556 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2557 /* mark the range as done, and continue */
2558 dorange = FALSE;
2559 didrange = TRUE;
2560 continue;
2561 }
2b9d42f0 2562
95a20fc0 2563 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2564#ifdef EBCDIC
2565 SvGROW(sv,
2566 SvLEN(sv) + (has_utf8 ?
2567 (512 - UTF_CONTINUATION_MARK +
2568 UNISKIP(0x100))
2569 : 256));
2570 /* How many two-byte within 0..255: 128 in UTF-8,
2571 * 96 in UTF-8-mod. */
2572#else
9cbb5ea2 2573 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2574#endif
9cbb5ea2 2575 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2576#ifdef EBCDIC
2577 if (has_utf8) {
2578 int j;
2579 for (j = 0; j <= 1; j++) {
2580 char * const c = (char*)utf8_hop((U8*)d, -1);
2581 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2582 if (j)
2583 min = (U8)uv;
2584 else if (uv < 256)
2585 max = (U8)uv;
2586 else {
2587 max = (U8)0xff; /* only to \xff */
2588 uvmax = uv; /* \x{100} to uvmax */
2589 }
2590 d = c; /* eat endpoint chars */
2591 }
2592 }
2593 else {
2594#endif
2595 d -= 2; /* eat the first char and the - */
2596 min = (U8)*d; /* first char in range */
2597 max = (U8)d[1]; /* last char in range */
2598#ifdef EBCDIC
2599 }
2600#endif
8ada0baa 2601
c2e66d9e 2602 if (min > max) {
01ec43d0 2603 Perl_croak(aTHX_
d1573ac7 2604 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2605 (char)min, (char)max);
c2e66d9e
GS
2606 }
2607
c7f1f016 2608#ifdef EBCDIC
4c3a8340
TS
2609 if (literal_endpoint == 2 &&
2610 ((isLOWER(min) && isLOWER(max)) ||
2611 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2612 if (isLOWER(min)) {
2613 for (i = min; i <= max; i++)
2614 if (isLOWER(i))
db42d148 2615 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2616 } else {
2617 for (i = min; i <= max; i++)
2618 if (isUPPER(i))
db42d148 2619 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2620 }
2621 }
2622 else
2623#endif
2624 for (i = min; i <= max; i++)
e294cc5d
JH
2625#ifdef EBCDIC
2626 if (has_utf8) {
2627 const U8 ch = (U8)NATIVE_TO_UTF(i);
2628 if (UNI_IS_INVARIANT(ch))
2629 *d++ = (U8)i;
2630 else {
2631 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2632 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2633 }
2634 }
2635 else
2636#endif
2637 *d++ = (char)i;
2638
2639#ifdef EBCDIC
2640 if (uvmax) {
2641 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2642 if (uvmax > 0x101)
2643 *d++ = (char)UTF_TO_NATIVE(0xff);
2644 if (uvmax > 0x100)
2645 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2646 }
2647#endif
02aa26ce
NT
2648
2649 /* mark the range as done, and continue */
79072805 2650 dorange = FALSE;
01ec43d0 2651 didrange = TRUE;
4c3a8340
TS
2652#ifdef EBCDIC
2653 literal_endpoint = 0;
2654#endif
79072805 2655 continue;
4e553d73 2656 }
02aa26ce
NT
2657
2658 /* range begins (ignore - as first or last char) */
79072805 2659 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2660 if (didrange) {
1fafa243 2661 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2662 }
e294cc5d
JH
2663 if (has_utf8
2664#ifdef EBCDIC
2665 && !native_range
2666#endif
2667 ) {
25716404 2668 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2669 s++;
2670 continue;
2671 }
79072805
LW
2672 dorange = TRUE;
2673 s++;
01ec43d0
GS
2674 }
2675 else {
2676 didrange = FALSE;
4c3a8340
TS
2677#ifdef EBCDIC
2678 literal_endpoint = 0;
e294cc5d 2679 native_range = TRUE;
4c3a8340 2680#endif
01ec43d0 2681 }
79072805 2682 }
02aa26ce
NT
2683
2684 /* if we get here, we're not doing a transliteration */
2685
0f5d15d6
IZ
2686 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2687 except for the last char, which will be done separately. */
3280af22 2688 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2689 if (s[2] == '#') {
e994fd66 2690 while (s+1 < send && *s != ')')
db42d148 2691 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2692 }
2693 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2694 || (s[2] == '?' && s[3] == '{'))
155aba94 2695 {
cc6b7395 2696 I32 count = 1;
0f5d15d6 2697 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2698 char c;
2699
d9f97599
GS
2700 while (count && (c = *regparse)) {
2701 if (c == '\\' && regparse[1])
2702 regparse++;
4e553d73 2703 else if (c == '{')
cc6b7395 2704 count++;
4e553d73 2705 else if (c == '}')
cc6b7395 2706 count--;
d9f97599 2707 regparse++;
cc6b7395 2708 }
e994fd66 2709 if (*regparse != ')')
5bdf89e7 2710 regparse--; /* Leave one char for continuation. */
0f5d15d6 2711 while (s < regparse)
db42d148 2712 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2713 }
748a9306 2714 }
02aa26ce
NT
2715
2716 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2717 else if (*s == '#' && PL_lex_inpat &&
2718 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2719 while (s+1 < send && *s != '\n')
db42d148 2720 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2721 }
02aa26ce 2722
5d1d4326 2723 /* check for embedded arrays
da6eedaa 2724 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2725 */
1749ea0d
TS
2726 else if (*s == '@' && s[1]) {
2727 if (isALNUM_lazy_if(s+1,UTF))
2728 break;
2729 if (strchr(":'{$", s[1]))
2730 break;
2731 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2732 break; /* in regexp, neither @+ nor @- are interpolated */
2733 }
02aa26ce
NT
2734
2735 /* check for embedded scalars. only stop if we're sure it's a
2736 variable.
2737 */
79072805 2738 else if (*s == '$') {
3280af22 2739 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2740 break;
77772344 2741 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2742 if (s[1] == '\\') {
2743 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2744 "Possible unintended interpolation of $\\ in regex");
77772344 2745 }
79072805 2746 break; /* in regexp, $ might be tail anchor */
77772344 2747 }
79072805 2748 }
02aa26ce 2749
2b9d42f0
NIS
2750 /* End of else if chain - OP_TRANS rejoin rest */
2751
02aa26ce 2752 /* backslashes */
79072805
LW
2753 if (*s == '\\' && s+1 < send) {
2754 s++;
02aa26ce 2755
02aa26ce 2756 /* deprecate \1 in strings and substitution replacements */
3280af22 2757 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2758 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2759 {
a2a5de95 2760 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2761 *--s = '$';
2762 break;
2763 }
02aa26ce
NT
2764
2765 /* string-change backslash escapes */
3280af22 2766 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2767 --s;
2768 break;
2769 }
cc74c5bd
TS
2770 /* skip any other backslash escapes in a pattern */
2771 else if (PL_lex_inpat) {
2772 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2773 goto default_action;
2774 }
02aa26ce
NT
2775
2776 /* if we get here, it's either a quoted -, or a digit */
79072805 2777 switch (*s) {
02aa26ce
NT
2778
2779 /* quoted - in transliterations */
79072805 2780 case '-':
3280af22 2781 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2782 *d++ = *s++;
2783 continue;
2784 }
2785 /* FALL THROUGH */
2786 default:
11b8faa4 2787 {
a2a5de95
NC
2788 if ((isALPHA(*s) || isDIGIT(*s)))
2789 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2790 "Unrecognized escape \\%c passed through",
2791 *s);
11b8faa4 2792 /* default action is to copy the quoted character */
f9a63242 2793 goto default_action;
11b8faa4 2794 }
02aa26ce 2795
77a135fe 2796 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2797 case '0': case '1': case '2': case '3':
2798 case '4': case '5': case '6': case '7':
ba210ebe 2799 {
53305cf1
NC
2800 I32 flags = 0;
2801 STRLEN len = 3;
77a135fe 2802 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2803 s += len;
2804 }
012bcf8d 2805 goto NUM_ESCAPE_INSERT;
02aa26ce 2806
77a135fe 2807 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2808 case 'x':
a0ed51b3
LW
2809 ++s;
2810 if (*s == '{') {
9d4ba2ae 2811 char* const e = strchr(s, '}');
a4c04bdc
NC
2812 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2813 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2814 STRLEN len;
355860ce 2815
53305cf1 2816 ++s;
adaeee49 2817 if (!e) {
a0ed51b3 2818 yyerror("Missing right brace on \\x{}");
355860ce 2819 continue;
ba210ebe 2820 }
53305cf1 2821 len = e - s;
77a135fe 2822 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2823 s = e + 1;
a0ed51b3
LW
2824 }
2825 else {
ba210ebe 2826 {
53305cf1 2827 STRLEN len = 2;
a4c04bdc 2828 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2829 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2830 s += len;
2831 }
012bcf8d
GS
2832 }
2833
2834 NUM_ESCAPE_INSERT:
77a135fe
KW
2835 /* Insert oct, hex, or \N{U+...} escaped character. There will
2836 * always be enough room in sv since such escapes will be
2837 * longer than any UTF-8 sequence they can end up as, except if
2838 * they force us to recode the rest of the string into utf8 */
ba7cea30 2839
77a135fe
KW
2840 /* Here uv is the ordinal of the next character being added in
2841 * unicode (converted from native). (It has to be done before
2842 * here because \N is interpreted as unicode, and oct and hex
2843 * as native.) */
2844 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2845 if (!has_utf8 && uv > 255) {
77a135fe
KW
2846 /* Might need to recode whatever we have accumulated so
2847 * far if it contains any chars variant in utf8 or
2848 * utf-ebcdic. */
2849
2850 SvCUR_set(sv, d - SvPVX_const(sv));
2851 SvPOK_on(sv);
2852 *d = '\0';
77a135fe 2853 /* See Note on sizing above. */
7bf79863
KW
2854 sv_utf8_upgrade_flags_grow(sv,
2855 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2856 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2857 d = SvPVX(sv) + SvCUR(sv);
2858 has_utf8 = TRUE;
012bcf8d
GS
2859 }
2860
77a135fe
KW
2861 if (has_utf8) {
2862 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2863 if (PL_lex_inwhat == OP_TRANS &&
2864 PL_sublex_info.sub_op) {
2865 PL_sublex_info.sub_op->op_private |=
2866 (PL_lex_repl ? OPpTRANS_FROM_UTF
2867 : OPpTRANS_TO_UTF);
f9a63242 2868 }
e294cc5d
JH
2869#ifdef EBCDIC
2870 if (uv > 255 && !dorange)
2871 native_range = FALSE;
2872#endif
012bcf8d 2873 }
a0ed51b3 2874 else {
012bcf8d 2875 *d++ = (char)uv;
a0ed51b3 2876 }
012bcf8d
GS
2877 }
2878 else {
c4d5f83a 2879 *d++ = (char) uv;
a0ed51b3 2880 }
79072805 2881 continue;
02aa26ce 2882
77a135fe
KW
2883 /* \N{LATIN SMALL LETTER A} is a named character, and so is
2884 * \N{U+0041} */
4a2d328f 2885 case 'N':
55eda711 2886 ++s;
423cee85
JH
2887 if (*s == '{') {
2888 char* e = strchr(s, '}');
155aba94 2889 SV *res;
423cee85 2890 STRLEN len;
cfd0369c 2891 const char *str;
4e553d73 2892
423cee85 2893 if (!e) {
5777a3f7 2894 yyerror("Missing right brace on \\N{}");
423cee85
JH
2895 e = s - 1;
2896 goto cont_scan;
2897 }
dbc0d4f2 2898 if (e > s + 2 && s[1] == 'U' && s[2] == '+') {
77a135fe
KW
2899 /* \N{U+...} The ... is a unicode value even on EBCDIC
2900 * machines */
dbc0d4f2
JH
2901 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2902 PERL_SCAN_DISALLOW_PREFIX;
2903 s += 3;
2904 len = e - s;
2905 uv = grok_hex(s, &len, &flags, NULL);
b57a0404
JH
2906 if ( e > s && len != (STRLEN)(e - s) ) {
2907 uv = 0xFFFD;
fc8cd66c 2908 }
dbc0d4f2
JH
2909 s = e + 1;
2910 goto NUM_ESCAPE_INSERT;
2911 }
55eda711 2912 res = newSVpvn(s + 1, e - s - 1);
bd61b366 2913 res = new_constant( NULL, 0, "charnames",
eb0d8d16 2914 res, NULL, s - 2, e - s + 3 );
f9a63242
JH
2915 if (has_utf8)
2916 sv_utf8_upgrade(res);
cfd0369c 2917 str = SvPV_const(res,len);
1c47067b
JH
2918#ifdef EBCDIC_NEVER_MIND
2919 /* charnames uses pack U and that has been
2920 * recently changed to do the below uni->native
2921 * mapping, so this would be redundant (and wrong,
2922 * the code point would be doubly converted).
2923 * But leave this in just in case the pack U change
2924 * gets revoked, but the semantics is still
2925 * desireable for charnames. --jhi */
cddc7ef4 2926 {
cfd0369c 2927 UV uv = utf8_to_uvchr((const U8*)str, 0);
cddc7ef4
JH
2928
2929 if (uv < 0x100) {
89ebb4a3 2930 U8 tmpbuf[UTF8_MAXBYTES+1], *d;
cddc7ef4
JH
2931
2932 d = uvchr_to_utf8(tmpbuf, UNI_TO_NATIVE(uv));
2933 sv_setpvn(res, (char *)tmpbuf, d - tmpbuf);
cfd0369c 2934 str = SvPV_const(res, len);
cddc7ef4
JH
2935 }
2936 }
2937#endif
77a135fe
KW
2938 /* If destination is not in utf8 but this new character is,
2939 * recode the dest to utf8 */
89491803 2940 if (!has_utf8 && SvUTF8(res)) {
77a135fe 2941 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 2942 SvPOK_on(sv);
e4f3eed8 2943 *d = '\0';
77a135fe 2944 /* See Note on sizing above. */
7bf79863
KW
2945 sv_utf8_upgrade_flags_grow(sv,
2946 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2947 len + (STRLEN)(send - s) + 1);
f08d6ad9 2948 d = SvPVX(sv) + SvCUR(sv);
89491803 2949 has_utf8 = TRUE;
77a135fe 2950 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
423cee85 2951
77a135fe
KW
2952 /* See Note on sizing above. (NOTE: SvCUR() is not set
2953 * correctly here). */
2954 const STRLEN off = d - SvPVX_const(sv);
2955 d = SvGROW(sv, off + len + (STRLEN)(send - s) + 1) + off;
423cee85 2956 }
e294cc5d
JH
2957#ifdef EBCDIC
2958 if (!dorange)
2959 native_range = FALSE; /* \N{} is guessed to be Unicode */
2960#endif
423cee85
JH
2961 Copy(str, d, len, char);
2962 d += len;
2963 SvREFCNT_dec(res);
2964 cont_scan:
2965 s = e + 1;
2966 }
2967 else
5777a3f7 2968 yyerror("Missing braces on \\N{}");
423cee85
JH
2969 continue;
2970
02aa26ce 2971 /* \c is a control character */
79072805
LW
2972 case 'c':
2973 s++;
961ce445 2974 if (s < send) {
ba210ebe 2975 U8 c = *s++;
c7f1f016
NIS
2976#ifdef EBCDIC
2977 if (isLOWER(c))
2978 c = toUPPER(c);
2979#endif
db42d148 2980 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 2981 }
961ce445
RGS
2982 else {
2983 yyerror("Missing control char name in \\c");
2984 }
79072805 2985 continue;
02aa26ce
NT
2986
2987 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 2988 case 'b':
db42d148 2989 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
2990 break;
2991 case 'n':
db42d148 2992 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
2993 break;
2994 case 'r':
db42d148 2995 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
2996 break;
2997 case 'f':
db42d148 2998 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
2999 break;
3000 case 't':
db42d148 3001 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3002 break;
34a3fe2a 3003 case 'e':
db42d148 3004 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3005 break;
3006 case 'a':
db42d148 3007 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3008 break;
02aa26ce
NT
3009 } /* end switch */
3010
79072805
LW
3011 s++;
3012 continue;
02aa26ce 3013 } /* end if (backslash) */
4c3a8340
TS
3014#ifdef EBCDIC
3015 else
3016 literal_endpoint++;
3017#endif
02aa26ce 3018
f9a63242 3019 default_action:
77a135fe
KW
3020 /* If we started with encoded form, or already know we want it,
3021 then encode the next character */
3022 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3023 STRLEN len = 1;
77a135fe
KW
3024
3025
3026 /* One might think that it is wasted effort in the case of the
3027 * source being utf8 (this_utf8 == TRUE) to take the next character
3028 * in the source, convert it to an unsigned value, and then convert
3029 * it back again. But the source has not been validated here. The
3030 * routine that does the conversion checks for errors like
3031 * malformed utf8 */
3032
5f66b61c
AL
3033 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3034 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3035 if (!has_utf8) {
3036 SvCUR_set(sv, d - SvPVX_const(sv));
3037 SvPOK_on(sv);
3038 *d = '\0';
77a135fe 3039 /* See Note on sizing above. */
7bf79863
KW
3040 sv_utf8_upgrade_flags_grow(sv,
3041 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3042 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3043 d = SvPVX(sv) + SvCUR(sv);
3044 has_utf8 = TRUE;
3045 } else if (need > len) {
3046 /* encoded value larger than old, may need extra space (NOTE:
3047 * SvCUR() is not set correctly here). See Note on sizing
3048 * above. */
9d4ba2ae 3049 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3050 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3051 }
77a135fe
KW
3052 s += len;
3053
5f66b61c 3054 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3055#ifdef EBCDIC
3056 if (uv > 255 && !dorange)
3057 native_range = FALSE;
3058#endif
2b9d42f0
NIS
3059 }
3060 else {
3061 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3062 }
02aa26ce
NT
3063 } /* while loop to process each character */
3064
3065 /* terminate the string and set up the sv */
79072805 3066 *d = '\0';
95a20fc0 3067 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3068 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3069 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3070
79072805 3071 SvPOK_on(sv);
9f4817db 3072 if (PL_encoding && !has_utf8) {
d0063567
DK
3073 sv_recode_to_utf8(sv, PL_encoding);
3074 if (SvUTF8(sv))
3075 has_utf8 = TRUE;
9f4817db 3076 }
2b9d42f0 3077 if (has_utf8) {
7e2040f0 3078 SvUTF8_on(sv);
2b9d42f0 3079 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3080 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3081 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3082 }
3083 }
79072805 3084
02aa26ce 3085 /* shrink the sv if we allocated more than we used */
79072805 3086 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3087 SvPV_shrink_to_cur(sv);
79072805 3088 }
02aa26ce 3089
6154021b 3090 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3091 if (s > PL_bufptr) {
eb0d8d16
NC
3092 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3093 const char *const key = PL_lex_inpat ? "qr" : "q";
3094 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3095 const char *type;
3096 STRLEN typelen;
3097
3098 if (PL_lex_inwhat == OP_TRANS) {
3099 type = "tr";
3100 typelen = 2;
3101 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3102 type = "s";
3103 typelen = 1;
3104 } else {
3105 type = "qq";
3106 typelen = 2;
3107 }
3108
3109 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3110 type, typelen);
3111 }
6154021b 3112 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3113 } else
8990e307 3114 SvREFCNT_dec(sv);
79072805
LW
3115 return s;
3116}
3117
ffb4593c
NT
3118/* S_intuit_more
3119 * Returns TRUE if there's more to the expression (e.g., a subscript),
3120 * FALSE otherwise.
ffb4593c
NT
3121 *
3122 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3123 *
3124 * ->[ and ->{ return TRUE
3125 * { and [ outside a pattern are always subscripts, so return TRUE
3126 * if we're outside a pattern and it's not { or [, then return FALSE
3127 * if we're in a pattern and the first char is a {
3128 * {4,5} (any digits around the comma) returns FALSE
3129 * if we're in a pattern and the first char is a [
3130 * [] returns FALSE
3131 * [SOMETHING] has a funky algorithm to decide whether it's a
3132 * character class or not. It has to deal with things like
3133 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3134 * anything else returns TRUE
3135 */
3136
9cbb5ea2
GS
3137/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3138
76e3520e 3139STATIC int
cea2e8a9 3140S_intuit_more(pTHX_ register char *s)
79072805 3141{
97aff369 3142 dVAR;
7918f24d
NC
3143
3144 PERL_ARGS_ASSERT_INTUIT_MORE;
3145
3280af22 3146 if (PL_lex_brackets)
79072805
LW
3147 return TRUE;
3148 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3149 return TRUE;
3150 if (*s != '{' && *s != '[')
3151 return FALSE;
3280af22 3152 if (!PL_lex_inpat)
79072805
LW
3153 return TRUE;
3154
3155 /* In a pattern, so maybe we have {n,m}. */
3156 if (*s == '{') {
3157 s++;
3158 if (!isDIGIT(*s))
3159 return TRUE;
3160 while (isDIGIT(*s))
3161 s++;
3162 if (*s == ',')
3163 s++;
3164 while (isDIGIT(*s))
3165 s++;
3166 if (*s == '}')
3167 return FALSE;
3168 return TRUE;
3169
3170 }
3171
3172 /* On the other hand, maybe we have a character class */
3173
3174 s++;
3175 if (*s == ']' || *s == '^')
3176 return FALSE;
3177 else {
ffb4593c 3178 /* this is terrifying, and it works */
79072805
LW
3179 int weight = 2; /* let's weigh the evidence */
3180 char seen[256];
f27ffc4a 3181 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3182 const char * const send = strchr(s,']');
3280af22 3183 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3184
3185 if (!send) /* has to be an expression */
3186 return TRUE;
3187
3188 Zero(seen,256,char);
3189 if (*s == '$')
3190 weight -= 3;
3191 else if (isDIGIT(*s)) {
3192 if (s[1] != ']') {
3193 if (isDIGIT(s[1]) && s[2] == ']')
3194 weight -= 10;
3195 }
3196 else
3197 weight -= 100;
3198 }
3199 for (; s < send; s++) {
3200 last_un_char = un_char;
3201 un_char = (unsigned char)*s;
3202 switch (*s) {
3203 case '@':
3204 case '&':
3205 case '$':
3206 weight -= seen[un_char] * 10;
7e2040f0 3207 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3208 int len;
8903cb82 3209 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3210 len = (int)strlen(tmpbuf);
3211 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3212 weight -= 100;
3213 else
3214 weight -= 10;
3215 }
3216 else if (*s == '$' && s[1] &&
93a17b20
LW
3217 strchr("[#!%*<>()-=",s[1])) {
3218 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3219 weight -= 10;
3220 else
3221 weight -= 1;
3222 }
3223 break;
3224 case '\\':
3225 un_char = 254;
3226 if (s[1]) {
93a17b20 3227 if (strchr("wds]",s[1]))
79072805 3228 weight += 100;
10edeb5d 3229 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3230 weight += 1;
93a17b20 3231 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3232 weight += 40;
3233 else if (isDIGIT(s[1])) {
3234 weight += 40;
3235 while (s[1] && isDIGIT(s[1]))
3236 s++;
3237 }
3238 }
3239 else
3240 weight += 100;
3241 break;
3242 case '-':
3243 if (s[1] == '\\')
3244 weight += 50;
93a17b20 3245 if (strchr("aA01! ",last_un_char))
79072805 3246 weight += 30;
93a17b20 3247 if (strchr("zZ79~",s[1]))
79072805 3248 weight += 30;
f27ffc4a
GS
3249 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3250 weight -= 5; /* cope with negative subscript */
79072805
LW
3251 break;
3252 default:
3792a11b
NC
3253 if (!isALNUM(last_un_char)
3254 && !(last_un_char == '$' || last_un_char == '@'
3255 || last_un_char == '&')
3256 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3257 char *d = tmpbuf;
3258 while (isALPHA(*s))
3259 *d++ = *s++;
3260 *d = '\0';
5458a98a 3261 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3262 weight -= 150;
3263 }
3264 if (un_char == last_un_char + 1)
3265 weight += 5;
3266 weight -= seen[un_char];
3267 break;
3268 }
3269 seen[un_char]++;
3270 }
3271 if (weight >= 0) /* probably a character class */
3272 return FALSE;
3273 }
3274
3275 return TRUE;
3276}
ffed7fef 3277
ffb4593c
NT
3278/*
3279 * S_intuit_method
3280 *
3281 * Does all the checking to disambiguate
3282 * foo bar
3283 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3284 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3285 *
3286 * First argument is the stuff after the first token, e.g. "bar".
3287 *
3288 * Not a method if bar is a filehandle.
3289 * Not a method if foo is a subroutine prototyped to take a filehandle.
3290 * Not a method if it's really "Foo $bar"
3291 * Method if it's "foo $bar"
3292 * Not a method if it's really "print foo $bar"
3293 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3294 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3295 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3296 * =>
3297 */
3298
76e3520e 3299STATIC int
62d55b22 3300S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3301{
97aff369 3302 dVAR;
a0d0e21e 3303 char *s = start + (*start == '$');
3280af22 3304 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3305 STRLEN len;
3306 GV* indirgv;
5db06880
NC
3307#ifdef PERL_MAD
3308 int soff;
3309#endif
a0d0e21e 3310
7918f24d
NC
3311 PERL_ARGS_ASSERT_INTUIT_METHOD;
3312
a0d0e21e 3313 if (gv) {
62d55b22 3314 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3315 return 0;
62d55b22
NC
3316 if (cv) {
3317 if (SvPOK(cv)) {
3318 const char *proto = SvPVX_const(cv);
3319 if (proto) {
3320 if (*proto == ';')
3321 proto++;
3322 if (*proto == '*')
3323 return 0;
3324 }
b6c543e3
IZ
3325 }
3326 } else
c35e046a 3327 gv = NULL;
a0d0e21e 3328 }
8903cb82 3329 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3330 /* start is the beginning of the possible filehandle/object,
3331 * and s is the end of it
3332 * tmpbuf is a copy of it
3333 */
3334
a0d0e21e 3335 if (*start == '$') {
3ef1310e
RGS
3336 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3337 isUPPER(*PL_tokenbuf))
a0d0e21e 3338 return 0;
5db06880
NC
3339#ifdef PERL_MAD
3340 len = start - SvPVX(PL_linestr);
3341#endif
29595ff2 3342 s = PEEKSPACE(s);
f0092767 3343#ifdef PERL_MAD
5db06880
NC
3344 start = SvPVX(PL_linestr) + len;
3345#endif
3280af22
NIS
3346 PL_bufptr = start;
3347 PL_expect = XREF;
a0d0e21e
LW
3348 return *s == '(' ? FUNCMETH : METHOD;
3349 }
5458a98a 3350 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3351 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3352 len -= 2;
3353 tmpbuf[len] = '\0';
5db06880
NC
3354#ifdef PERL_MAD
3355 soff = s - SvPVX(PL_linestr);
3356#endif
c3e0f903
GS
3357 goto bare_package;
3358 }
90e5519e 3359 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3360 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3361 return 0;
3362 /* filehandle or package name makes it a method */
da51bb9b 3363 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3364#ifdef PERL_MAD
3365 soff = s - SvPVX(PL_linestr);
3366#endif
29595ff2 3367 s = PEEKSPACE(s);
3280af22 3368 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3369 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3370 bare_package:
cd81e915 3371 start_force(PL_curforce);
9ded7720 3372 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3373 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3374 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3375 if (PL_madskills)
3376 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3377 PL_expect = XTERM;
a0d0e21e 3378 force_next(WORD);
3280af22 3379 PL_bufptr = s;
5db06880
NC
3380#ifdef PERL_MAD
3381 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3382#endif
a0d0e21e
LW
3383 return *s == '(' ? FUNCMETH : METHOD;
3384 }
3385 }
3386 return 0;
3387}
3388
16d20bd9 3389/* Encoded script support. filter_add() effectively inserts a
4e553d73 3390 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3391 * Note that the filter function only applies to the current source file
3392 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3393 *
3394 * The datasv parameter (which may be NULL) can be used to pass
3395 * private data to this instance of the filter. The filter function
3396 * can recover the SV using the FILTER_DATA macro and use it to
3397 * store private buffers and state information.
3398 *
3399 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3400 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3401 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3402 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3403 * private use must be set using malloc'd pointers.
3404 */
16d20bd9
AD
3405
3406SV *
864dbfa3 3407Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3408{
97aff369 3409 dVAR;
f4c556ac 3410 if (!funcp)
a0714e2c 3411 return NULL;
f4c556ac 3412
5486870f
DM
3413 if (!PL_parser)
3414 return NULL;
3415
3280af22
NIS
3416 if (!PL_rsfp_filters)
3417 PL_rsfp_filters = newAV();
16d20bd9 3418 if (!datasv)
561b68a9 3419 datasv = newSV(0);
862a34c6 3420 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3421 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3422 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3423 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3424 FPTR2DPTR(void *, IoANY(datasv)),
3425 SvPV_nolen(datasv)));
3280af22
NIS
3426 av_unshift(PL_rsfp_filters, 1);
3427 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3428 return(datasv);
3429}
4e553d73 3430
16d20bd9
AD
3431
3432/* Delete most recently added instance of this filter function. */
a0d0e21e 3433void
864dbfa3 3434Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3435{
97aff369 3436 dVAR;
e0c19803 3437 SV *datasv;
24801a4b 3438
7918f24d
NC
3439 PERL_ARGS_ASSERT_FILTER_DEL;
3440
33073adb 3441#ifdef DEBUGGING
55662e27
JH
3442 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3443 FPTR2DPTR(void*, funcp)));
33073adb 3444#endif
5486870f 3445 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3446 return;
3447 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3448 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3449 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 3450 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 3451 IoANY(datasv) = (void *)NULL;
3280af22 3452 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3453
16d20bd9
AD
3454 return;
3455 }
3456 /* we need to search for the correct entry and clear it */
cea2e8a9 3457 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3458}
3459
3460
1de9afcd
RGS
3461/* Invoke the idxth filter function for the current rsfp. */
3462/* maxlen 0 = read one text line */
16d20bd9 3463I32
864dbfa3 3464Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3465{
97aff369 3466 dVAR;
16d20bd9
AD
3467 filter_t funcp;
3468 SV *datasv = NULL;
f482118e
NC
3469 /* This API is bad. It should have been using unsigned int for maxlen.
3470 Not sure if we want to change the API, but if not we should sanity
3471 check the value here. */
39cd7a59
NC
3472 const unsigned int correct_length
3473 = maxlen < 0 ?
3474#ifdef PERL_MICRO
3475 0x7FFFFFFF
3476#else
3477 INT_MAX
3478#endif
3479 : maxlen;
e50aee73 3480
7918f24d
NC
3481 PERL_ARGS_ASSERT_FILTER_READ;
3482
5486870f 3483 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3484 return -1;
1de9afcd 3485 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3486 /* Provide a default input filter to make life easy. */
3487 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3488 DEBUG_P(PerlIO_printf(Perl_debug_log,
3489 "filter_read %d: from rsfp\n", idx));
f482118e 3490 if (correct_length) {
16d20bd9
AD
3491 /* Want a block */
3492 int len ;
f54cb97a 3493 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3494
3495 /* ensure buf_sv is large enough */
881d8f0a 3496 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3497 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3498 correct_length)) <= 0) {
3280af22 3499 if (PerlIO_error(PL_rsfp))
37120919
AD
3500 return -1; /* error */
3501 else
3502 return 0 ; /* end of file */
3503 }
16d20bd9 3504 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3505 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3506 } else {
3507 /* Want a line */
3280af22
NIS
3508 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3509 if (PerlIO_error(PL_rsfp))
37120919
AD
3510 return -1; /* error */
3511 else
3512 return 0 ; /* end of file */
3513 }
16d20bd9
AD
3514 }
3515 return SvCUR(buf_sv);
3516 }
3517 /* Skip this filter slot if filter has been deleted */
1de9afcd 3518 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3519 DEBUG_P(PerlIO_printf(Perl_debug_log,
3520 "filter_read %d: skipped (filter deleted)\n",
3521 idx));
f482118e 3522 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3523 }
3524 /* Get function pointer hidden within datasv */
8141890a 3525 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3526 DEBUG_P(PerlIO_printf(Perl_debug_log,
3527 "filter_read %d: via function %p (%s)\n",
ca0270c4 3528 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3529 /* Call function. The function is expected to */
3530 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3531 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3532 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3533}
3534
76e3520e 3535STATIC char *
5cc814fd 3536S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3537{
97aff369 3538 dVAR;
7918f24d
NC
3539
3540 PERL_ARGS_ASSERT_FILTER_GETS;
3541
c39cd008 3542#ifdef PERL_CR_FILTER
3280af22 3543 if (!PL_rsfp_filters) {
c39cd008 3544 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3545 }
3546#endif
3280af22 3547 if (PL_rsfp_filters) {
55497cff 3548 if (!append)
3549 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3550 if (FILTER_READ(0, sv, 0) > 0)
3551 return ( SvPVX(sv) ) ;
3552 else
bd61b366 3553 return NULL ;
16d20bd9 3554 }
9d116dd7 3555 else
5cc814fd 3556 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3557}
3558
01ec43d0 3559STATIC HV *
9bde8eb0 3560S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3561{
97aff369 3562 dVAR;
def3634b
GS
3563 GV *gv;
3564
7918f24d
NC
3565 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3566
01ec43d0 3567 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3568 return PL_curstash;
3569
3570 if (len > 2 &&
3571 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3572 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3573 {
3574 return GvHV(gv); /* Foo:: */
def3634b
GS
3575 }
3576
3577 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3578 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3579 if (gv && GvCV(gv)) {
3580 SV * const sv = cv_const_sv(GvCV(gv));
3581 if (sv)
9bde8eb0 3582 pkgname = SvPV_const(sv, len);
def3634b
GS
3583 }
3584
9bde8eb0 3585 return gv_stashpvn(pkgname, len, 0);
def3634b 3586}
a0d0e21e 3587
e3f73d4e
RGS
3588/*
3589 * S_readpipe_override
3590 * Check whether readpipe() is overriden, and generates the appropriate
3591 * optree, provided sublex_start() is called afterwards.
3592 */
3593STATIC void
1d51329b 3594S_readpipe_override(pTHX)
e3f73d4e
RGS
3595{
3596 GV **gvp;
3597 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3598 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3599 if ((gv_readpipe
3600 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3601 ||
3602 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3603 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3604 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3605 {
3606 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3607 append_elem(OP_LIST,
3608 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3609 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3610 }
e3f73d4e
RGS
3611}
3612
5db06880
NC
3613#ifdef PERL_MAD
3614 /*
3615 * Perl_madlex
3616 * The intent of this yylex wrapper is to minimize the changes to the
3617 * tokener when we aren't interested in collecting madprops. It remains
3618 * to be seen how successful this strategy will be...
3619 */
3620
3621int
3622Perl_madlex(pTHX)
3623{
3624 int optype;
3625 char *s = PL_bufptr;
3626
cd81e915
NC
3627 /* make sure PL_thiswhite is initialized */
3628 PL_thiswhite = 0;
3629 PL_thismad = 0;
5db06880 3630
cd81e915 3631 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3632 if (PL_pending_ident)
3633 return S_pending_ident(aTHX);
3634
3635 /* previous token ate up our whitespace? */
cd81e915
NC
3636 if (!PL_lasttoke && PL_nextwhite) {
3637 PL_thiswhite = PL_nextwhite;
3638 PL_nextwhite = 0;
5db06880
NC
3639 }
3640
3641 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3642 PL_realtokenstart = -1;
3643 PL_thistoken = 0;
5db06880
NC
3644 optype = yylex();
3645 s = PL_bufptr;
cd81e915 3646 assert(PL_curforce < 0);
5db06880 3647
cd81e915
NC
3648 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3649 if (!PL_thistoken) {
3650 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3651 PL_thistoken = newSVpvs("");
5db06880 3652 else {
c35e046a 3653 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3654 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3655 }
3656 }
cd81e915
NC
3657 if (PL_thismad) /* install head */
3658 CURMAD('X', PL_thistoken);
5db06880
NC
3659 }
3660
3661 /* last whitespace of a sublex? */
cd81e915
NC
3662 if (optype == ')' && PL_endwhite) {
3663 CURMAD('X', PL_endwhite);
5db06880
NC
3664 }
3665
cd81e915 3666 if (!PL_thismad) {
5db06880
NC
3667
3668 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3669 if (!PL_thiswhite && !PL_endwhite && !optype) {
3670 sv_free(PL_thistoken);
3671 PL_thistoken = 0;
5db06880
NC
3672 return 0;
3673 }
3674
3675 /* put off final whitespace till peg */
3676 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3677 PL_nextwhite = PL_thiswhite;
3678 PL_thiswhite = 0;
5db06880 3679 }
cd81e915
NC
3680 else if (PL_thisopen) {
3681 CURMAD('q', PL_thisopen);
3682 if (PL_thistoken)
3683 sv_free(PL_thistoken);
3684 PL_thistoken = 0;
5db06880
NC
3685 }
3686 else {
3687 /* Store actual token text as madprop X */
cd81e915 3688 CURMAD('X', PL_thistoken);
5db06880
NC
3689 }
3690
cd81e915 3691 if (PL_thiswhite) {
5db06880 3692 /* add preceding whitespace as madprop _ */
cd81e915 3693 CURMAD('_', PL_thiswhite);
5db06880
NC
3694 }
3695
cd81e915 3696 if (PL_thisstuff) {
5db06880 3697 /* add quoted material as madprop = */
cd81e915 3698 CURMAD('=', PL_thisstuff);
5db06880
NC
3699 }
3700
cd81e915 3701 if (PL_thisclose) {
5db06880 3702 /* add terminating quote as madprop Q */
cd81e915 3703 CURMAD('Q', PL_thisclose);
5db06880
NC
3704 }
3705 }
3706
3707 /* special processing based on optype */
3708
3709 switch (optype) {
3710
3711 /* opval doesn't need a TOKEN since it can already store mp */
3712 case WORD:
3713 case METHOD:
3714 case FUNCMETH:
3715 case THING:
3716 case PMFUNC:
3717 case PRIVATEREF:
3718 case FUNC0SUB:
3719 case UNIOPSUB:
3720 case LSTOPSUB:
6154021b
RGS
3721 if (pl_yylval.opval)
3722 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 3723 PL_thismad = 0;
5db06880
NC
3724 return optype;
3725
3726 /* fake EOF */
3727 case 0:
3728 optype = PEG;
cd81e915
NC
3729 if (PL_endwhite) {
3730 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
3731 PL_endwhite = 0;
5db06880
NC
3732 }
3733 break;
3734
3735 case ']':
3736 case '}':
cd81e915 3737 if (PL_faketokens)
5db06880
NC
3738 break;
3739 /* remember any fake bracket that lexer is about to discard */
3740 if (PL_lex_brackets == 1 &&
3741 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
3742 {
3743 s = PL_bufptr;
3744 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3745 s++;
3746 if (*s == '}') {
cd81e915
NC
3747 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
3748 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3749 PL_thiswhite = 0;
5db06880
NC
3750 PL_bufptr = s - 1;
3751 break; /* don't bother looking for trailing comment */
3752 }
3753 else
3754 s = PL_bufptr;
3755 }
3756 if (optype == ']')
3757 break;
3758 /* FALLTHROUGH */
3759
3760 /* attach a trailing comment to its statement instead of next token */
3761 case ';':
cd81e915 3762 if (PL_faketokens)
5db06880
NC
3763 break;
3764 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
3765 s = PL_bufptr;
3766 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
3767 s++;
3768 if (*s == '\n' || *s == '#') {
3769 while (s < PL_bufend && *s != '\n')
3770 s++;
3771 if (s < PL_bufend)
3772 s++;
cd81e915
NC
3773 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
3774 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
3775 PL_thiswhite = 0;
5db06880
NC
3776 PL_bufptr = s;
3777 }
3778 }
3779 break;
3780
3781 /* pval */
3782 case LABEL:
3783 break;
3784
3785 /* ival */
3786 default:
3787 break;
3788
3789 }
3790
3791 /* Create new token struct. Note: opvals return early above. */
6154021b 3792 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 3793 PL_thismad = 0;
5db06880
NC
3794 return optype;
3795}
3796#endif
3797
468aa647 3798STATIC char *
cc6ed77d 3799S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 3800 dVAR;
7918f24d
NC
3801
3802 PERL_ARGS_ASSERT_TOKENIZE_USE;
3803
468aa647
RGS
3804 if (PL_expect != XSTATE)
3805 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
3806 is_use ? "use" : "no"));
29595ff2 3807 s = SKIPSPACE1(s);
468aa647
RGS
3808 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
3809 s = force_version(s, TRUE);
17c59fdf
VP
3810 if (*s == ';' || *s == '}'
3811 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 3812 start_force(PL_curforce);
9ded7720 3813 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
3814 force_next(WORD);
3815 }
3816 else if (*s == 'v') {
3817 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3818 s = force_version(s, FALSE);
3819 }
3820 }
3821 else {
3822 s = force_word(s,WORD,FALSE,TRUE,FALSE);
3823 s = force_version(s, FALSE);
3824 }
6154021b 3825 pl_yylval.ival = is_use;
468aa647
RGS
3826 return s;
3827}
748a9306 3828#ifdef DEBUGGING
27da23d5 3829 static const char* const exp_name[] =
09bef843 3830 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 3831 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 3832 };
748a9306 3833#endif
463ee0b2 3834
02aa26ce
NT
3835/*
3836 yylex
3837
3838 Works out what to call the token just pulled out of the input
3839 stream. The yacc parser takes care of taking the ops we return and
3840 stitching them into a tree.
3841
3842 Returns:
3843 PRIVATEREF
3844
3845 Structure:
3846 if read an identifier
3847 if we're in a my declaration
3848 croak if they tried to say my($foo::bar)
3849 build the ops for a my() declaration
3850 if it's an access to a my() variable
3851 are we in a sort block?
3852 croak if my($a); $a <=> $b
3853 build ops for access to a my() variable
3854 if in a dq string, and they've said @foo and we can't find @foo
3855 croak
3856 build ops for a bareword
3857 if we already built the token before, use it.
3858*/
3859
20141f0e 3860
dba4d153
JH
3861#ifdef __SC__
3862#pragma segment Perl_yylex
3863#endif
dba4d153 3864int
dba4d153 3865Perl_yylex(pTHX)
20141f0e 3866{
97aff369 3867 dVAR;
3afc138a 3868 register char *s = PL_bufptr;
378cc40b 3869 register char *d;
463ee0b2 3870 STRLEN len;
aa7440fb 3871 bool bof = FALSE;
580561a3 3872 U32 fake_eof = 0;
a687059c 3873
10edeb5d
JH
3874 /* orig_keyword, gvp, and gv are initialized here because
3875 * jump to the label just_a_word_zero can bypass their
3876 * initialization later. */
3877 I32 orig_keyword = 0;
3878 GV *gv = NULL;
3879 GV **gvp = NULL;
3880
bbf60fe6 3881 DEBUG_T( {
396482e1 3882 SV* tmp = newSVpvs("");
b6007c36
DM
3883 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
3884 (IV)CopLINE(PL_curcop),
3885 lex_state_names[PL_lex_state],
3886 exp_name[PL_expect],
3887 pv_display(tmp, s, strlen(s), 0, 60));
3888 SvREFCNT_dec(tmp);
bbf60fe6 3889 } );
02aa26ce 3890 /* check if there's an identifier for us to look at */
ba979b31 3891 if (PL_pending_ident)
bbf60fe6 3892 return REPORT(S_pending_ident(aTHX));
bbce6d69 3893
02aa26ce
NT
3894 /* no identifier pending identification */
3895
3280af22 3896 switch (PL_lex_state) {
79072805
LW
3897#ifdef COMMENTARY
3898 case LEX_NORMAL: /* Some compilers will produce faster */
3899 case LEX_INTERPNORMAL: /* code if we comment these out. */
3900 break;
3901#endif
3902
09bef843 3903 /* when we've already built the next token, just pull it out of the queue */
79072805 3904 case LEX_KNOWNEXT:
5db06880
NC
3905#ifdef PERL_MAD
3906 PL_lasttoke--;
6154021b 3907 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 3908 if (PL_madskills) {
cd81e915 3909 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 3910 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 3911 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 3912 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
3913 PL_thismad->mad_val = 0;
3914 mad_free(PL_thismad);
3915 PL_thismad = 0;
5db06880
NC
3916 }
3917 }
3918 if (!PL_lasttoke) {
3919 PL_lex_state = PL_lex_defer;
3920 PL_expect = PL_lex_expect;
3921 PL_lex_defer = LEX_NORMAL;
3922 if (!PL_nexttoke[PL_lasttoke].next_type)
3923 return yylex();
3924 }
3925#else
3280af22 3926 PL_nexttoke--;
6154021b 3927 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
3928 if (!PL_nexttoke) {
3929 PL_lex_state = PL_lex_defer;
3930 PL_expect = PL_lex_expect;
3931 PL_lex_defer = LEX_NORMAL;
463ee0b2 3932 }
5db06880
NC
3933#endif
3934#ifdef PERL_MAD
3935 /* FIXME - can these be merged? */
3936 return(PL_nexttoke[PL_lasttoke].next_type);
3937#else
bbf60fe6 3938 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 3939#endif
79072805 3940
02aa26ce 3941 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 3942 when we get here, PL_bufptr is at the \
02aa26ce 3943 */
79072805
LW
3944 case LEX_INTERPCASEMOD:
3945#ifdef DEBUGGING
3280af22 3946 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 3947 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 3948#endif
02aa26ce 3949 /* handle \E or end of string */
3280af22 3950 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 3951 /* if at a \E */
3280af22 3952 if (PL_lex_casemods) {
f54cb97a 3953 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 3954 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 3955
3792a11b
NC
3956 if (PL_bufptr != PL_bufend
3957 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
3958 PL_bufptr += 2;
3959 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
3960#ifdef PERL_MAD
3961 if (PL_madskills)
6b29d1f5 3962 PL_thistoken = newSVpvs("\\E");
5db06880 3963#endif
a0d0e21e 3964 }
bbf60fe6 3965 return REPORT(')');
79072805 3966 }
5db06880
NC
3967#ifdef PERL_MAD
3968 while (PL_bufptr != PL_bufend &&
3969 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 3970 if (!PL_thiswhite)
6b29d1f5 3971 PL_thiswhite = newSVpvs("");
cd81e915 3972 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
3973 PL_bufptr += 2;
3974 }
3975#else
3280af22
NIS
3976 if (PL_bufptr != PL_bufend)
3977 PL_bufptr += 2;
5db06880 3978#endif
3280af22 3979 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 3980 return yylex();
79072805
LW
3981 }
3982 else {
607df283 3983 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 3984 "### Saw case modifier\n"); });
3280af22 3985 s = PL_bufptr + 1;
6e909404 3986 if (s[1] == '\\' && s[2] == 'E') {
5db06880 3987#ifdef PERL_MAD
cd81e915 3988 if (!PL_thiswhite)
6b29d1f5 3989 PL_thiswhite = newSVpvs("");
cd81e915 3990 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 3991#endif
89122651 3992 PL_bufptr = s + 3;
6e909404
JH
3993 PL_lex_state = LEX_INTERPCONCAT;
3994 return yylex();
a0d0e21e 3995 }
6e909404 3996 else {
90771dc0 3997 I32 tmp;
5db06880
NC
3998 if (!PL_madskills) /* when just compiling don't need correct */
3999 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4000 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4001 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4002 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4003 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4004 return REPORT(')');
6e909404
JH
4005 }
4006 if (PL_lex_casemods > 10)
4007 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4008 PL_lex_casestack[PL_lex_casemods++] = *s;
4009 PL_lex_casestack[PL_lex_casemods] = '\0';
4010 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4011 start_force(PL_curforce);
9ded7720 4012 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4013 force_next('(');
cd81e915 4014 start_force(PL_curforce);
6e909404 4015 if (*s == 'l')
9ded7720 4016 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4017 else if (*s == 'u')
9ded7720 4018 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4019 else if (*s == 'L')
9ded7720 4020 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4021 else if (*s == 'U')
9ded7720 4022 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4023 else if (*s == 'Q')
9ded7720 4024 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4025 else
4026 Perl_croak(aTHX_ "panic: yylex");
5db06880 4027 if (PL_madskills) {
a5849ce5
NC
4028 SV* const tmpsv = newSVpvs("\\ ");
4029 /* replace the space with the character we want to escape
4030 */
4031 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4032 curmad('_', tmpsv);
4033 }
6e909404 4034 PL_bufptr = s + 1;
a0d0e21e 4035 }
79072805 4036 force_next(FUNC);
3280af22
NIS
4037 if (PL_lex_starts) {
4038 s = PL_bufptr;
4039 PL_lex_starts = 0;
5db06880
NC
4040#ifdef PERL_MAD
4041 if (PL_madskills) {
cd81e915
NC
4042 if (PL_thistoken)
4043 sv_free(PL_thistoken);
6b29d1f5 4044 PL_thistoken = newSVpvs("");
5db06880
NC
4045 }
4046#endif
131b3ad0
DM
4047 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4048 if (PL_lex_casemods == 1 && PL_lex_inpat)
4049 OPERATOR(',');
4050 else
4051 Aop(OP_CONCAT);
79072805
LW
4052 }
4053 else
cea2e8a9 4054 return yylex();
79072805
LW
4055 }
4056
55497cff 4057 case LEX_INTERPPUSH:
bbf60fe6 4058 return REPORT(sublex_push());
55497cff 4059
79072805 4060 case LEX_INTERPSTART:
3280af22 4061 if (PL_bufptr == PL_bufend)
bbf60fe6 4062 return REPORT(sublex_done());
607df283 4063 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4064 "### Interpolated variable\n"); });
3280af22
NIS
4065 PL_expect = XTERM;
4066 PL_lex_dojoin = (*PL_bufptr == '@');
4067 PL_lex_state = LEX_INTERPNORMAL;
4068 if (PL_lex_dojoin) {
cd81e915 4069 start_force(PL_curforce);
9ded7720 4070 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4071 force_next(',');
cd81e915 4072 start_force(PL_curforce);
a0d0e21e 4073 force_ident("\"", '$');
cd81e915 4074 start_force(PL_curforce);
9ded7720 4075 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4076 force_next('$');
cd81e915 4077 start_force(PL_curforce);
9ded7720 4078 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4079 force_next('(');
cd81e915 4080 start_force(PL_curforce);
9ded7720 4081 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4082 force_next(FUNC);
4083 }
3280af22
NIS
4084 if (PL_lex_starts++) {
4085 s = PL_bufptr;
5db06880
NC
4086#ifdef PERL_MAD
4087 if (PL_madskills) {
cd81e915
NC
4088 if (PL_thistoken)
4089 sv_free(PL_thistoken);
6b29d1f5 4090 PL_thistoken = newSVpvs("");
5db06880
NC
4091 }
4092#endif
131b3ad0
DM
4093 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4094 if (!PL_lex_casemods && PL_lex_inpat)
4095 OPERATOR(',');
4096 else
4097 Aop(OP_CONCAT);
79072805 4098 }
cea2e8a9 4099 return yylex();
79072805
LW
4100
4101 case LEX_INTERPENDMAYBE:
3280af22
NIS
4102 if (intuit_more(PL_bufptr)) {
4103 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4104 break;
4105 }
4106 /* FALL THROUGH */
4107
4108 case LEX_INTERPEND:
3280af22
NIS
4109 if (PL_lex_dojoin) {
4110 PL_lex_dojoin = FALSE;
4111 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4112#ifdef PERL_MAD
4113 if (PL_madskills) {
cd81e915
NC
4114 if (PL_thistoken)
4115 sv_free(PL_thistoken);
6b29d1f5 4116 PL_thistoken = newSVpvs("");
5db06880
NC
4117 }
4118#endif
bbf60fe6 4119 return REPORT(')');
79072805 4120 }
43a16006 4121 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4122 && SvEVALED(PL_lex_repl))
43a16006 4123 {
e9fa98b2 4124 if (PL_bufptr != PL_bufend)
cea2e8a9 4125 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4126 PL_lex_repl = NULL;
e9fa98b2 4127 }
79072805
LW
4128 /* FALLTHROUGH */
4129 case LEX_INTERPCONCAT:
4130#ifdef DEBUGGING
3280af22 4131 if (PL_lex_brackets)
cea2e8a9 4132 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4133#endif
3280af22 4134 if (PL_bufptr == PL_bufend)
bbf60fe6 4135 return REPORT(sublex_done());
79072805 4136
3280af22
NIS
4137 if (SvIVX(PL_linestr) == '\'') {
4138 SV *sv = newSVsv(PL_linestr);
4139 if (!PL_lex_inpat)
76e3520e 4140 sv = tokeq(sv);
3280af22 4141 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4142 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4143 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4144 s = PL_bufend;
79072805
LW
4145 }
4146 else {
3280af22 4147 s = scan_const(PL_bufptr);
79072805 4148 if (*s == '\\')
3280af22 4149 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4150 else
3280af22 4151 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4152 }
4153
3280af22 4154 if (s != PL_bufptr) {
cd81e915 4155 start_force(PL_curforce);
5db06880
NC
4156 if (PL_madskills) {
4157 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4158 }
6154021b 4159 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4160 PL_expect = XTERM;
79072805 4161 force_next(THING);
131b3ad0 4162 if (PL_lex_starts++) {
5db06880
NC
4163#ifdef PERL_MAD
4164 if (PL_madskills) {
cd81e915
NC
4165 if (PL_thistoken)
4166 sv_free(PL_thistoken);
6b29d1f5 4167 PL_thistoken = newSVpvs("");
5db06880
NC
4168 }
4169#endif
131b3ad0
DM
4170 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4171 if (!PL_lex_casemods && PL_lex_inpat)
4172 OPERATOR(',');
4173 else
4174 Aop(OP_CONCAT);
4175 }
79072805 4176 else {
3280af22 4177 PL_bufptr = s;
cea2e8a9 4178 return yylex();
79072805
LW
4179 }
4180 }
4181
cea2e8a9 4182 return yylex();
a0d0e21e 4183 case LEX_FORMLINE:
3280af22
NIS
4184 PL_lex_state = LEX_NORMAL;
4185 s = scan_formline(PL_bufptr);
4186 if (!PL_lex_formbrack)
a0d0e21e
LW
4187 goto rightbracket;
4188 OPERATOR(';');
79072805
LW
4189 }
4190
3280af22
NIS
4191 s = PL_bufptr;
4192 PL_oldoldbufptr = PL_oldbufptr;
4193 PL_oldbufptr = s;
463ee0b2
LW
4194
4195 retry:
5db06880 4196#ifdef PERL_MAD
cd81e915
NC
4197 if (PL_thistoken) {
4198 sv_free(PL_thistoken);
4199 PL_thistoken = 0;
5db06880 4200 }
cd81e915 4201 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4202#endif
378cc40b
LW
4203 switch (*s) {
4204 default:
7e2040f0 4205 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4206 goto keylookup;
b1fc3636
CJ
4207 {
4208 unsigned char c = *s;
4209 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4210 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4211 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4212 } else {
4213 d = PL_linestart;
4214 }
4215 *s = '\0';
4216 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4217 }
e929a76b
LW
4218 case 4:
4219 case 26:
4220 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4221 case 0:
5db06880
NC
4222#ifdef PERL_MAD
4223 if (PL_madskills)
cd81e915 4224 PL_faketokens = 0;
5db06880 4225#endif
3280af22
NIS
4226 if (!PL_rsfp) {
4227 PL_last_uni = 0;
4228 PL_last_lop = 0;
c5ee2135 4229 if (PL_lex_brackets) {
10edeb5d
JH
4230 yyerror((const char *)
4231 (PL_lex_formbrack
4232 ? "Format not terminated"
4233 : "Missing right curly or square bracket"));
c5ee2135 4234 }
4e553d73 4235 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4236 "### Tokener got EOF\n");
5f80b19c 4237 } );
79072805 4238 TOKEN(0);
463ee0b2 4239 }
3280af22 4240 if (s++ < PL_bufend)
a687059c 4241 goto retry; /* ignore stray nulls */
3280af22
NIS
4242 PL_last_uni = 0;
4243 PL_last_lop = 0;
4244 if (!PL_in_eval && !PL_preambled) {
4245 PL_preambled = TRUE;
5db06880
NC
4246#ifdef PERL_MAD
4247 if (PL_madskills)
cd81e915 4248 PL_faketokens = 1;
5db06880 4249#endif
5ab7ff98
NC
4250 if (PL_perldb) {
4251 /* Generate a string of Perl code to load the debugger.
4252 * If PERL5DB is set, it will return the contents of that,
4253 * otherwise a compile-time require of perl5db.pl. */
4254
4255 const char * const pdb = PerlEnv_getenv("PERL5DB");
4256
4257 if (pdb) {
4258 sv_setpv(PL_linestr, pdb);
4259 sv_catpvs(PL_linestr,";");
4260 } else {
4261 SETERRNO(0,SS_NORMAL);
4262 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4263 }
4264 } else
4265 sv_setpvs(PL_linestr,"");
c62eb204
NC
4266 if (PL_preambleav) {
4267 SV **svp = AvARRAY(PL_preambleav);
4268 SV **const end = svp + AvFILLp(PL_preambleav);
4269 while(svp <= end) {
4270 sv_catsv(PL_linestr, *svp);
4271 ++svp;
396482e1 4272 sv_catpvs(PL_linestr, ";");
91b7def8 4273 }
daba3364 4274 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4275 PL_preambleav = NULL;
91b7def8 4276 }
9f639728
FR
4277 if (PL_minus_E)
4278 sv_catpvs(PL_linestr,
4279 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4280 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4281 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4282 if (PL_minus_l)
396482e1 4283 sv_catpvs(PL_linestr,"chomp;");
3280af22 4284 if (PL_minus_a) {
3280af22 4285 if (PL_minus_F) {
3792a11b
NC
4286 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4287 || *PL_splitstr == '"')
3280af22 4288 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4289 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4290 else {
c8ef6a4b
NC
4291 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4292 bytes can be used as quoting characters. :-) */
dd374669 4293 const char *splits = PL_splitstr;
91d456ae 4294 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4295 do {
4296 /* Need to \ \s */
dd374669
AL
4297 if (*splits == '\\')
4298 sv_catpvn(PL_linestr, splits, 1);
4299 sv_catpvn(PL_linestr, splits, 1);
4300 } while (*splits++);
48c4c863
NC
4301 /* This loop will embed the trailing NUL of
4302 PL_linestr as the last thing it does before
4303 terminating. */
396482e1 4304 sv_catpvs(PL_linestr, ");");
54310121 4305 }
2304df62
AD
4306 }
4307 else
396482e1 4308 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4309 }
79072805 4310 }
396482e1 4311 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4312 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4313 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4314 PL_last_lop = PL_last_uni = NULL;
65269a95 4315 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4316 update_debugger_info(PL_linestr, NULL, 0);
79072805 4317 goto retry;
a687059c 4318 }
e929a76b 4319 do {
580561a3
Z
4320 fake_eof = 0;
4321 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4322 if (0) {
7e28d3af 4323 fake_eof:
f0e67a1d
Z
4324 fake_eof = LEX_FAKE_EOF;
4325 }
4326 PL_bufptr = PL_bufend;
17cc9359 4327 CopLINE_inc(PL_curcop);
f0e67a1d 4328 if (!lex_next_chunk(fake_eof)) {
17cc9359 4329 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4330 s = PL_bufptr;
4331 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4332 }
17cc9359 4333 CopLINE_dec(PL_curcop);
5db06880 4334#ifdef PERL_MAD
f0e67a1d 4335 if (!PL_rsfp)
cd81e915 4336 PL_realtokenstart = -1;
5db06880 4337#endif
f0e67a1d 4338 s = PL_bufptr;
7aa207d6
JH
4339 /* If it looks like the start of a BOM or raw UTF-16,
4340 * check if it in fact is. */
580561a3 4341 if (bof && PL_rsfp &&
7aa207d6
JH
4342 (*s == 0 ||
4343 *(U8*)s == 0xEF ||
4344 *(U8*)s >= 0xFE ||
4345 s[1] == 0)) {
eb160463 4346 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4347 if (bof) {
3280af22 4348 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4349 s = swallow_bom((U8*)s);
e929a76b 4350 }
378cc40b 4351 }
3280af22 4352 if (PL_doextract) {
a0d0e21e 4353 /* Incest with pod. */
5db06880
NC
4354#ifdef PERL_MAD
4355 if (PL_madskills)
cd81e915 4356 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4357#endif
01a57ef7 4358 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4359 sv_setpvs(PL_linestr, "");
3280af22
NIS
4360 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4361 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4362 PL_last_lop = PL_last_uni = NULL;
3280af22 4363 PL_doextract = FALSE;
a0d0e21e 4364 }
4e553d73 4365 }
85613cab
Z
4366 if (PL_rsfp)
4367 incline(s);
3280af22
NIS
4368 } while (PL_doextract);
4369 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4370 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4371 PL_last_lop = PL_last_uni = NULL;
57843af0 4372 if (CopLINE(PL_curcop) == 1) {
3280af22 4373 while (s < PL_bufend && isSPACE(*s))
79072805 4374 s++;
a0d0e21e 4375 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4376 s++;
5db06880
NC
4377#ifdef PERL_MAD
4378 if (PL_madskills)
cd81e915 4379 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4380#endif
bd61b366 4381 d = NULL;
3280af22 4382 if (!PL_in_eval) {
44a8e56a 4383 if (*s == '#' && *(s+1) == '!')
4384 d = s + 2;
4385#ifdef ALTERNATE_SHEBANG
4386 else {
bfed75c6 4387 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4388 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4389 d = s + (sizeof(as) - 1);
4390 }
4391#endif /* ALTERNATE_SHEBANG */
4392 }
4393 if (d) {
b8378b72 4394 char *ipath;
774d564b 4395 char *ipathend;
b8378b72 4396
774d564b 4397 while (isSPACE(*d))
b8378b72
CS
4398 d++;
4399 ipath = d;
774d564b 4400 while (*d && !isSPACE(*d))
4401 d++;
4402 ipathend = d;
4403
4404#ifdef ARG_ZERO_IS_SCRIPT
4405 if (ipathend > ipath) {
4406 /*
4407 * HP-UX (at least) sets argv[0] to the script name,
4408 * which makes $^X incorrect. And Digital UNIX and Linux,
4409 * at least, set argv[0] to the basename of the Perl
4410 * interpreter. So, having found "#!", we'll set it right.
4411 */
fafc274c
NC
4412 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4413 SVt_PV)); /* $^X */
774d564b 4414 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4415 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4416 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4417 SvSETMAGIC(x);
4418 }
556c1dec
JH
4419 else {
4420 STRLEN blen;
4421 STRLEN llen;
cfd0369c 4422 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4423 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4424 if (llen < blen) {
4425 bstart += blen - llen;
4426 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4427 sv_setpvn(x, ipath, ipathend - ipath);
4428 SvSETMAGIC(x);
4429 }
4430 }
4431 }
774d564b 4432 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4433 }
774d564b 4434#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4435
4436 /*
4437 * Look for options.
4438 */
748a9306 4439 d = instr(s,"perl -");
84e30d1a 4440 if (!d) {
748a9306 4441 d = instr(s,"perl");
84e30d1a
GS
4442#if defined(DOSISH)
4443 /* avoid getting into infinite loops when shebang
4444 * line contains "Perl" rather than "perl" */
4445 if (!d) {
4446 for (d = ipathend-4; d >= ipath; --d) {
4447 if ((*d == 'p' || *d == 'P')
4448 && !ibcmp(d, "perl", 4))
4449 {
4450 break;
4451 }
4452 }
4453 if (d < ipath)
bd61b366 4454 d = NULL;
84e30d1a
GS
4455 }
4456#endif
4457 }
44a8e56a 4458#ifdef ALTERNATE_SHEBANG
4459 /*
4460 * If the ALTERNATE_SHEBANG on this system starts with a
4461 * character that can be part of a Perl expression, then if
4462 * we see it but not "perl", we're probably looking at the
4463 * start of Perl code, not a request to hand off to some
4464 * other interpreter. Similarly, if "perl" is there, but
4465 * not in the first 'word' of the line, we assume the line
4466 * contains the start of the Perl program.
44a8e56a 4467 */
4468 if (d && *s != '#') {
f54cb97a 4469 const char *c = ipath;
44a8e56a 4470 while (*c && !strchr("; \t\r\n\f\v#", *c))
4471 c++;
4472 if (c < d)
bd61b366 4473 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4474 else
4475 *s = '#'; /* Don't try to parse shebang line */
4476 }
774d564b 4477#endif /* ALTERNATE_SHEBANG */
748a9306 4478 if (!d &&
44a8e56a 4479 *s == '#' &&
774d564b 4480 ipathend > ipath &&
3280af22 4481 !PL_minus_c &&
748a9306 4482 !instr(s,"indir") &&
3280af22 4483 instr(PL_origargv[0],"perl"))
748a9306 4484 {
27da23d5 4485 dVAR;
9f68db38 4486 char **newargv;
9f68db38 4487
774d564b 4488 *ipathend = '\0';
4489 s = ipathend + 1;
3280af22 4490 while (s < PL_bufend && isSPACE(*s))
9f68db38 4491 s++;
3280af22 4492 if (s < PL_bufend) {
d85f917e 4493 Newx(newargv,PL_origargc+3,char*);
9f68db38 4494 newargv[1] = s;
3280af22 4495 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4496 s++;
4497 *s = '\0';
3280af22 4498 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4499 }
4500 else
3280af22 4501 newargv = PL_origargv;
774d564b 4502 newargv[0] = ipath;
b35112e7 4503 PERL_FPU_PRE_EXEC
b4748376 4504 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4505 PERL_FPU_POST_EXEC
cea2e8a9 4506 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4507 }
748a9306 4508 if (d) {
c35e046a
AL
4509 while (*d && !isSPACE(*d))
4510 d++;
4511 while (SPACE_OR_TAB(*d))
4512 d++;
748a9306
LW
4513
4514 if (*d++ == '-') {
f54cb97a 4515 const bool switches_done = PL_doswitches;
fb993905
GA
4516 const U32 oldpdb = PL_perldb;
4517 const bool oldn = PL_minus_n;
4518 const bool oldp = PL_minus_p;
c7030b81 4519 const char *d1 = d;
fb993905 4520
8cc95fdb 4521 do {
4ba71d51
FC
4522 bool baduni = FALSE;
4523 if (*d1 == 'C') {
bd0ab00d
NC
4524 const char *d2 = d1 + 1;
4525 if (parse_unicode_opts((const char **)&d2)
4526 != PL_unicode)
4527 baduni = TRUE;
4ba71d51
FC
4528 }
4529 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4530 const char * const m = d1;
4531 while (*d1 && !isSPACE(*d1))
4532 d1++;
cea2e8a9 4533 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4534 (int)(d1 - m), m);
8cc95fdb 4535 }
c7030b81
NC
4536 d1 = moreswitches(d1);
4537 } while (d1);
f0b2cf55
YST
4538 if (PL_doswitches && !switches_done) {
4539 int argc = PL_origargc;
4540 char **argv = PL_origargv;
4541 do {
4542 argc--,argv++;
4543 } while (argc && argv[0][0] == '-' && argv[0][1]);
4544 init_argv_symbols(argc,argv);
4545 }
65269a95 4546 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4547 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4548 /* if we have already added "LINE: while (<>) {",
4549 we must not do it again */
748a9306 4550 {
76f68e9b 4551 sv_setpvs(PL_linestr, "");
3280af22
NIS
4552 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4553 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4554 PL_last_lop = PL_last_uni = NULL;
3280af22 4555 PL_preambled = FALSE;
65269a95 4556 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4557 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4558 goto retry;
4559 }
a0d0e21e 4560 }
79072805 4561 }
9f68db38 4562 }
79072805 4563 }
3280af22
NIS
4564 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4565 PL_bufptr = s;
4566 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4567 return yylex();
ae986130 4568 }
378cc40b 4569 goto retry;
4fdae800 4570 case '\r':
6a27c188 4571#ifdef PERL_STRICT_CR
cea2e8a9 4572 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4573 Perl_croak(aTHX_
cc507455 4574 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4575#endif
4fdae800 4576 case ' ': case '\t': case '\f': case 013:
5db06880 4577#ifdef PERL_MAD
cd81e915 4578 PL_realtokenstart = -1;
ac372eb8
RD
4579 if (!PL_thiswhite)
4580 PL_thiswhite = newSVpvs("");
4581 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4582#endif
ac372eb8 4583 s++;
378cc40b 4584 goto retry;
378cc40b 4585 case '#':
e929a76b 4586 case '\n':
5db06880 4587#ifdef PERL_MAD
cd81e915 4588 PL_realtokenstart = -1;
5db06880 4589 if (PL_madskills)
cd81e915 4590 PL_faketokens = 0;
5db06880 4591#endif
3280af22 4592 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4593 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4594 /* handle eval qq[#line 1 "foo"\n ...] */
4595 CopLINE_dec(PL_curcop);
4596 incline(s);
4597 }
5db06880
NC
4598 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4599 s = SKIPSPACE0(s);
4600 if (!PL_in_eval || PL_rsfp)
4601 incline(s);
4602 }
4603 else {
4604 d = s;
4605 while (d < PL_bufend && *d != '\n')
4606 d++;
4607 if (d < PL_bufend)
4608 d++;
4609 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4610 Perl_croak(aTHX_ "panic: input overflow");
4611#ifdef PERL_MAD
4612 if (PL_madskills)
cd81e915 4613 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4614#endif
4615 s = d;
4616 incline(s);
4617 }
3280af22
NIS
4618 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4619 PL_bufptr = s;
4620 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4621 return yylex();
a687059c 4622 }
378cc40b 4623 }
a687059c 4624 else {
5db06880
NC
4625#ifdef PERL_MAD
4626 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4627 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4628 PL_faketokens = 0;
5db06880
NC
4629 s = SKIPSPACE0(s);
4630 TOKEN(PEG); /* make sure any #! line is accessible */
4631 }
4632 s = SKIPSPACE0(s);
4633 }
4634 else {
4635/* if (PL_madskills && PL_lex_formbrack) { */
4636 d = s;
4637 while (d < PL_bufend && *d != '\n')
4638 d++;
4639 if (d < PL_bufend)
4640 d++;
4641 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4642 Perl_croak(aTHX_ "panic: input overflow");
4643 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4644 if (!PL_thiswhite)
6b29d1f5 4645 PL_thiswhite = newSVpvs("");
5db06880 4646 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4647 sv_setpvs(PL_thiswhite, "");
cd81e915 4648 PL_faketokens = 0;
5db06880 4649 }
cd81e915 4650 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4651 }
4652 s = d;
4653/* }
4654 *s = '\0';
4655 PL_bufend = s; */
4656 }
4657#else
378cc40b 4658 *s = '\0';
3280af22 4659 PL_bufend = s;
5db06880 4660#endif
a687059c 4661 }
378cc40b
LW
4662 goto retry;
4663 case '-':
79072805 4664 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4665 I32 ftst = 0;
90771dc0 4666 char tmp;
e5edeb50 4667
378cc40b 4668 s++;
3280af22 4669 PL_bufptr = s;
748a9306
LW
4670 tmp = *s++;
4671
bf4acbe4 4672 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
4673 s++;
4674
4675 if (strnEQ(s,"=>",2)) {
3280af22 4676 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 4677 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
4678 OPERATOR('-'); /* unary minus */
4679 }
3280af22 4680 PL_last_uni = PL_oldbufptr;
748a9306 4681 switch (tmp) {
e5edeb50
JH
4682 case 'r': ftst = OP_FTEREAD; break;
4683 case 'w': ftst = OP_FTEWRITE; break;
4684 case 'x': ftst = OP_FTEEXEC; break;
4685 case 'o': ftst = OP_FTEOWNED; break;
4686 case 'R': ftst = OP_FTRREAD; break;
4687 case 'W': ftst = OP_FTRWRITE; break;
4688 case 'X': ftst = OP_FTREXEC; break;
4689 case 'O': ftst = OP_FTROWNED; break;
4690 case 'e': ftst = OP_FTIS; break;
4691 case 'z': ftst = OP_FTZERO; break;
4692 case 's': ftst = OP_FTSIZE; break;
4693 case 'f': ftst = OP_FTFILE; break;
4694 case 'd': ftst = OP_FTDIR; break;
4695 case 'l': ftst = OP_FTLINK; break;
4696 case 'p': ftst = OP_FTPIPE; break;
4697 case 'S': ftst = OP_FTSOCK; break;
4698 case 'u': ftst = OP_FTSUID; break;
4699 case 'g': ftst = OP_FTSGID; break;
4700 case 'k': ftst = OP_FTSVTX; break;
4701 case 'b': ftst = OP_FTBLK; break;
4702 case 'c': ftst = OP_FTCHR; break;
4703 case 't': ftst = OP_FTTTY; break;
4704 case 'T': ftst = OP_FTTEXT; break;
4705 case 'B': ftst = OP_FTBINARY; break;
4706 case 'M': case 'A': case 'C':
fafc274c 4707 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
4708 switch (tmp) {
4709 case 'M': ftst = OP_FTMTIME; break;
4710 case 'A': ftst = OP_FTATIME; break;
4711 case 'C': ftst = OP_FTCTIME; break;
4712 default: break;
4713 }
4714 break;
378cc40b 4715 default:
378cc40b
LW
4716 break;
4717 }
e5edeb50 4718 if (ftst) {
eb160463 4719 PL_last_lop_op = (OPCODE)ftst;
4e553d73 4720 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 4721 "### Saw file test %c\n", (int)tmp);
5f80b19c 4722 } );
e5edeb50
JH
4723 FTST(ftst);
4724 }
4725 else {
4726 /* Assume it was a minus followed by a one-letter named
4727 * subroutine call (or a -bareword), then. */
95c31fe3 4728 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 4729 "### '-%c' looked like a file test but was not\n",
4fccd7c6 4730 (int) tmp);
5f80b19c 4731 } );
3cf7b4c4 4732 s = --PL_bufptr;
e5edeb50 4733 }
378cc40b 4734 }
90771dc0
NC
4735 {
4736 const char tmp = *s++;
4737 if (*s == tmp) {
4738 s++;
4739 if (PL_expect == XOPERATOR)
4740 TERM(POSTDEC);
4741 else
4742 OPERATOR(PREDEC);
4743 }
4744 else if (*s == '>') {
4745 s++;
29595ff2 4746 s = SKIPSPACE1(s);
90771dc0
NC
4747 if (isIDFIRST_lazy_if(s,UTF)) {
4748 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
4749 TOKEN(ARROW);
4750 }
4751 else if (*s == '$')
4752 OPERATOR(ARROW);
4753 else
4754 TERM(ARROW);
4755 }
3280af22 4756 if (PL_expect == XOPERATOR)
90771dc0
NC
4757 Aop(OP_SUBTRACT);
4758 else {
4759 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4760 check_uni();
4761 OPERATOR('-'); /* unary minus */
79072805 4762 }
2f3197b3 4763 }
79072805 4764
378cc40b 4765 case '+':
90771dc0
NC
4766 {
4767 const char tmp = *s++;
4768 if (*s == tmp) {
4769 s++;
4770 if (PL_expect == XOPERATOR)
4771 TERM(POSTINC);
4772 else
4773 OPERATOR(PREINC);
4774 }
3280af22 4775 if (PL_expect == XOPERATOR)
90771dc0
NC
4776 Aop(OP_ADD);
4777 else {
4778 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
4779 check_uni();
4780 OPERATOR('+');
4781 }
2f3197b3 4782 }
a687059c 4783
378cc40b 4784 case '*':
3280af22
NIS
4785 if (PL_expect != XOPERATOR) {
4786 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
4787 PL_expect = XOPERATOR;
4788 force_ident(PL_tokenbuf, '*');
4789 if (!*PL_tokenbuf)
a0d0e21e 4790 PREREF('*');
79072805 4791 TERM('*');
a687059c 4792 }
79072805
LW
4793 s++;
4794 if (*s == '*') {
a687059c 4795 s++;
79072805 4796 PWop(OP_POW);
a687059c 4797 }
79072805
LW
4798 Mop(OP_MULTIPLY);
4799
378cc40b 4800 case '%':
3280af22 4801 if (PL_expect == XOPERATOR) {
bbce6d69 4802 ++s;
4803 Mop(OP_MODULO);
a687059c 4804 }
3280af22 4805 PL_tokenbuf[0] = '%';
e8ae98db
RGS
4806 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
4807 sizeof PL_tokenbuf - 1, FALSE);
3280af22 4808 if (!PL_tokenbuf[1]) {
bbce6d69 4809 PREREF('%');
a687059c 4810 }
3280af22 4811 PL_pending_ident = '%';
bbce6d69 4812 TERM('%');
a687059c 4813
378cc40b 4814 case '^':
79072805 4815 s++;
a0d0e21e 4816 BOop(OP_BIT_XOR);
79072805 4817 case '[':
3280af22 4818 PL_lex_brackets++;
df3467db
IG
4819 {
4820 const char tmp = *s++;
4821 OPERATOR(tmp);
4822 }
378cc40b 4823 case '~':
0d863452 4824 if (s[1] == '~'
3e7dd34d 4825 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
4826 {
4827 s += 2;
4828 Eop(OP_SMARTMATCH);
4829 }
378cc40b 4830 case ',':
90771dc0
NC
4831 {
4832 const char tmp = *s++;
4833 OPERATOR(tmp);
4834 }
a0d0e21e
LW
4835 case ':':
4836 if (s[1] == ':') {
4837 len = 0;
0bfa2a8a 4838 goto just_a_word_zero_gv;
a0d0e21e
LW
4839 }
4840 s++;
09bef843
SB
4841 switch (PL_expect) {
4842 OP *attrs;
5db06880
NC
4843#ifdef PERL_MAD
4844 I32 stuffstart;
4845#endif
09bef843
SB
4846 case XOPERATOR:
4847 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
4848 break;
4849 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
4850 if (*s == '=') {
4851 deprecate(":= for an empty attribute list");
4852 }
09bef843
SB
4853 goto grabattrs;
4854 case XATTRBLOCK:
4855 PL_expect = XBLOCK;
4856 goto grabattrs;
4857 case XATTRTERM:
4858 PL_expect = XTERMBLOCK;
4859 grabattrs:
5db06880
NC
4860#ifdef PERL_MAD
4861 stuffstart = s - SvPVX(PL_linestr) - 1;
4862#endif
29595ff2 4863 s = PEEKSPACE(s);
5f66b61c 4864 attrs = NULL;
7e2040f0 4865 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 4866 I32 tmp;
5cc237b8 4867 SV *sv;
09bef843 4868 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 4869 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
4870 if (tmp < 0) tmp = -tmp;
4871 switch (tmp) {
4872 case KEY_or:
4873 case KEY_and:
4874 case KEY_for:
11baf631 4875 case KEY_foreach:
f9829d6b
GS
4876 case KEY_unless:
4877 case KEY_if:
4878 case KEY_while:
4879 case KEY_until:
4880 goto got_attrs;
4881 default:
4882 break;
4883 }
4884 }
5cc237b8 4885 sv = newSVpvn(s, len);
09bef843
SB
4886 if (*d == '(') {
4887 d = scan_str(d,TRUE,TRUE);
4888 if (!d) {
09bef843
SB
4889 /* MUST advance bufptr here to avoid bogus
4890 "at end of line" context messages from yyerror().
4891 */
4892 PL_bufptr = s + len;
4893 yyerror("Unterminated attribute parameter in attribute list");
4894 if (attrs)
4895 op_free(attrs);
5cc237b8 4896 sv_free(sv);
bbf60fe6 4897 return REPORT(0); /* EOF indicator */
09bef843
SB
4898 }
4899 }
4900 if (PL_lex_stuff) {
09bef843
SB
4901 sv_catsv(sv, PL_lex_stuff);
4902 attrs = append_elem(OP_LIST, attrs,
4903 newSVOP(OP_CONST, 0, sv));
4904 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 4905 PL_lex_stuff = NULL;
09bef843
SB
4906 }
4907 else {
5cc237b8
BS
4908 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
4909 sv_free(sv);
1108974d 4910 if (PL_in_my == KEY_our) {
df9a6019 4911 deprecate(":unique");
1108974d 4912 }
bfed75c6 4913 else
371fce9b
DM
4914 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
4915 }
4916
d3cea301
SB
4917 /* NOTE: any CV attrs applied here need to be part of
4918 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
4919 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
4920 sv_free(sv);
78f9721b 4921 CvLVALUE_on(PL_compcv);
5cc237b8
BS
4922 }
4923 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
4924 sv_free(sv);
8e5dadda 4925 deprecate(":locked");
5cc237b8
BS
4926 }
4927 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
4928 sv_free(sv);
78f9721b 4929 CvMETHOD_on(PL_compcv);
5cc237b8 4930 }
78f9721b
SM
4931 /* After we've set the flags, it could be argued that
4932 we don't need to do the attributes.pm-based setting
4933 process, and shouldn't bother appending recognized
d3cea301
SB
4934 flags. To experiment with that, uncomment the
4935 following "else". (Note that's already been
4936 uncommented. That keeps the above-applied built-in
4937 attributes from being intercepted (and possibly
4938 rejected) by a package's attribute routines, but is
4939 justified by the performance win for the common case
4940 of applying only built-in attributes.) */
0256094b 4941 else
78f9721b
SM
4942 attrs = append_elem(OP_LIST, attrs,
4943 newSVOP(OP_CONST, 0,
5cc237b8 4944 sv));
09bef843 4945 }
29595ff2 4946 s = PEEKSPACE(d);
0120eecf 4947 if (*s == ':' && s[1] != ':')
29595ff2 4948 s = PEEKSPACE(s+1);
0120eecf
GS
4949 else if (s == d)
4950 break; /* require real whitespace or :'s */
29595ff2 4951 /* XXX losing whitespace on sequential attributes here */
09bef843 4952 }
90771dc0
NC
4953 {
4954 const char tmp
4955 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
4956 if (*s != ';' && *s != '}' && *s != tmp
4957 && (tmp != '=' || *s != ')')) {
4958 const char q = ((*s == '\'') ? '"' : '\'');
4959 /* If here for an expression, and parsed no attrs, back
4960 off. */
4961 if (tmp == '=' && !attrs) {
4962 s = PL_bufptr;
4963 break;
4964 }
4965 /* MUST advance bufptr here to avoid bogus "at end of line"
4966 context messages from yyerror().
4967 */
4968 PL_bufptr = s;
10edeb5d
JH
4969 yyerror( (const char *)
4970 (*s
4971 ? Perl_form(aTHX_ "Invalid separator character "
4972 "%c%c%c in attribute list", q, *s, q)
4973 : "Unterminated attribute list" ) );
90771dc0
NC
4974 if (attrs)
4975 op_free(attrs);
4976 OPERATOR(':');
09bef843 4977 }
09bef843 4978 }
f9829d6b 4979 got_attrs:
09bef843 4980 if (attrs) {
cd81e915 4981 start_force(PL_curforce);
9ded7720 4982 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 4983 CURMAD('_', PL_nextwhite);
89122651 4984 force_next(THING);
5db06880
NC
4985 }
4986#ifdef PERL_MAD
4987 if (PL_madskills) {
cd81e915 4988 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 4989 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 4990 }
5db06880 4991#endif
09bef843
SB
4992 TOKEN(COLONATTR);
4993 }
a0d0e21e 4994 OPERATOR(':');
8990e307
LW
4995 case '(':
4996 s++;
3280af22
NIS
4997 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
4998 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 4999 else
3280af22 5000 PL_expect = XTERM;
29595ff2 5001 s = SKIPSPACE1(s);
a0d0e21e 5002 TOKEN('(');
378cc40b 5003 case ';':
f4dd75d9 5004 CLINE;
90771dc0
NC
5005 {
5006 const char tmp = *s++;
5007 OPERATOR(tmp);
5008 }
378cc40b 5009 case ')':
90771dc0
NC
5010 {
5011 const char tmp = *s++;
29595ff2 5012 s = SKIPSPACE1(s);
90771dc0
NC
5013 if (*s == '{')
5014 PREBLOCK(tmp);
5015 TERM(tmp);
5016 }
79072805
LW
5017 case ']':
5018 s++;
3280af22 5019 if (PL_lex_brackets <= 0)
d98d5fff 5020 yyerror("Unmatched right square bracket");
463ee0b2 5021 else
3280af22
NIS
5022 --PL_lex_brackets;
5023 if (PL_lex_state == LEX_INTERPNORMAL) {
5024 if (PL_lex_brackets == 0) {
02255c60
FC
5025 if (*s == '-' && s[1] == '>')
5026 PL_lex_state = LEX_INTERPENDMAYBE;
5027 else if (*s != '[' && *s != '{')
3280af22 5028 PL_lex_state = LEX_INTERPEND;
79072805
LW
5029 }
5030 }
4633a7c4 5031 TERM(']');
79072805
LW
5032 case '{':
5033 leftbracket:
79072805 5034 s++;
3280af22 5035 if (PL_lex_brackets > 100) {
8edd5f42 5036 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5037 }
3280af22 5038 switch (PL_expect) {
a0d0e21e 5039 case XTERM:
3280af22 5040 if (PL_lex_formbrack) {
a0d0e21e
LW
5041 s--;
5042 PRETERMBLOCK(DO);
5043 }
3280af22
NIS
5044 if (PL_oldoldbufptr == PL_last_lop)
5045 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5046 else
3280af22 5047 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5048 OPERATOR(HASHBRACK);
a0d0e21e 5049 case XOPERATOR:
bf4acbe4 5050 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5051 s++;
44a8e56a 5052 d = s;
3280af22
NIS
5053 PL_tokenbuf[0] = '\0';
5054 if (d < PL_bufend && *d == '-') {
5055 PL_tokenbuf[0] = '-';
44a8e56a 5056 d++;
bf4acbe4 5057 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5058 d++;
5059 }
7e2040f0 5060 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5061 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5062 FALSE, &len);
bf4acbe4 5063 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5064 d++;
5065 if (*d == '}') {
f54cb97a 5066 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5067 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5068 if (minus)
5069 force_next('-');
748a9306
LW
5070 }
5071 }
5072 /* FALL THROUGH */
09bef843 5073 case XATTRBLOCK:
748a9306 5074 case XBLOCK:
3280af22
NIS
5075 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5076 PL_expect = XSTATE;
a0d0e21e 5077 break;
09bef843 5078 case XATTRTERM:
a0d0e21e 5079 case XTERMBLOCK:
3280af22
NIS
5080 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5081 PL_expect = XSTATE;
a0d0e21e
LW
5082 break;
5083 default: {
f54cb97a 5084 const char *t;
3280af22
NIS
5085 if (PL_oldoldbufptr == PL_last_lop)
5086 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5087 else
3280af22 5088 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5089 s = SKIPSPACE1(s);
8452ff4b
SB
5090 if (*s == '}') {
5091 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5092 PL_expect = XTERM;
5093 /* This hack is to get the ${} in the message. */
5094 PL_bufptr = s+1;
5095 yyerror("syntax error");
5096 break;
5097 }
a0d0e21e 5098 OPERATOR(HASHBRACK);
8452ff4b 5099 }
b8a4b1be
GS
5100 /* This hack serves to disambiguate a pair of curlies
5101 * as being a block or an anon hash. Normally, expectation
5102 * determines that, but in cases where we're not in a
5103 * position to expect anything in particular (like inside
5104 * eval"") we have to resolve the ambiguity. This code
5105 * covers the case where the first term in the curlies is a
5106 * quoted string. Most other cases need to be explicitly
a0288114 5107 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5108 * curly in order to force resolution as an anon hash.
5109 *
5110 * XXX should probably propagate the outer expectation
5111 * into eval"" to rely less on this hack, but that could
5112 * potentially break current behavior of eval"".
5113 * GSAR 97-07-21
5114 */
5115 t = s;
5116 if (*s == '\'' || *s == '"' || *s == '`') {
5117 /* common case: get past first string, handling escapes */
3280af22 5118 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5119 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5120 t++;
5121 t++;
a0d0e21e 5122 }
b8a4b1be 5123 else if (*s == 'q') {
3280af22 5124 if (++t < PL_bufend
b8a4b1be 5125 && (!isALNUM(*t)
3280af22 5126 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5127 && !isALNUM(*t))))
5128 {
abc667d1 5129 /* skip q//-like construct */
f54cb97a 5130 const char *tmps;
b8a4b1be
GS
5131 char open, close, term;
5132 I32 brackets = 1;
5133
3280af22 5134 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5135 t++;
abc667d1
DM
5136 /* check for q => */
5137 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5138 OPERATOR(HASHBRACK);
5139 }
b8a4b1be
GS
5140 term = *t;
5141 open = term;
5142 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5143 term = tmps[5];
5144 close = term;
5145 if (open == close)
3280af22
NIS
5146 for (t++; t < PL_bufend; t++) {
5147 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5148 t++;
6d07e5e9 5149 else if (*t == open)
b8a4b1be
GS
5150 break;
5151 }
abc667d1 5152 else {
3280af22
NIS
5153 for (t++; t < PL_bufend; t++) {
5154 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5155 t++;
6d07e5e9 5156 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5157 break;
5158 else if (*t == open)
5159 brackets++;
5160 }
abc667d1
DM
5161 }
5162 t++;
b8a4b1be 5163 }
abc667d1
DM
5164 else
5165 /* skip plain q word */
5166 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5167 t += UTF8SKIP(t);
a0d0e21e 5168 }
7e2040f0 5169 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5170 t += UTF8SKIP(t);
7e2040f0 5171 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5172 t += UTF8SKIP(t);
a0d0e21e 5173 }
3280af22 5174 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5175 t++;
b8a4b1be
GS
5176 /* if comma follows first term, call it an anon hash */
5177 /* XXX it could be a comma expression with loop modifiers */
3280af22 5178 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5179 || (*t == '=' && t[1] == '>')))
a0d0e21e 5180 OPERATOR(HASHBRACK);
3280af22 5181 if (PL_expect == XREF)
4e4e412b 5182 PL_expect = XTERM;
a0d0e21e 5183 else {
3280af22
NIS
5184 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5185 PL_expect = XSTATE;
a0d0e21e 5186 }
8990e307 5187 }
a0d0e21e 5188 break;
463ee0b2 5189 }
6154021b 5190 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5191 if (isSPACE(*s) || *s == '#')
3280af22 5192 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5193 TOKEN('{');
378cc40b 5194 case '}':
79072805
LW
5195 rightbracket:
5196 s++;
3280af22 5197 if (PL_lex_brackets <= 0)
d98d5fff 5198 yyerror("Unmatched right curly bracket");
463ee0b2 5199 else
3280af22 5200 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5201 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5202 PL_lex_formbrack = 0;
5203 if (PL_lex_state == LEX_INTERPNORMAL) {
5204 if (PL_lex_brackets == 0) {
9059aa12
LW
5205 if (PL_expect & XFAKEBRACK) {
5206 PL_expect &= XENUMMASK;
3280af22
NIS
5207 PL_lex_state = LEX_INTERPEND;
5208 PL_bufptr = s;
5db06880
NC
5209#if 0
5210 if (PL_madskills) {
cd81e915 5211 if (!PL_thiswhite)
6b29d1f5 5212 PL_thiswhite = newSVpvs("");
76f68e9b 5213 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5214 }
5215#endif
cea2e8a9 5216 return yylex(); /* ignore fake brackets */
79072805 5217 }
fa83b5b6 5218 if (*s == '-' && s[1] == '>')
3280af22 5219 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5220 else if (*s != '[' && *s != '{')
3280af22 5221 PL_lex_state = LEX_INTERPEND;
79072805
LW
5222 }
5223 }
9059aa12
LW
5224 if (PL_expect & XFAKEBRACK) {
5225 PL_expect &= XENUMMASK;
3280af22 5226 PL_bufptr = s;
cea2e8a9 5227 return yylex(); /* ignore fake brackets */
748a9306 5228 }
cd81e915 5229 start_force(PL_curforce);
5db06880
NC
5230 if (PL_madskills) {
5231 curmad('X', newSVpvn(s-1,1));
cd81e915 5232 CURMAD('_', PL_thiswhite);
5db06880 5233 }
79072805 5234 force_next('}');
5db06880 5235#ifdef PERL_MAD
cd81e915 5236 if (!PL_thistoken)
6b29d1f5 5237 PL_thistoken = newSVpvs("");
5db06880 5238#endif
79072805 5239 TOKEN(';');
378cc40b
LW
5240 case '&':
5241 s++;
90771dc0 5242 if (*s++ == '&')
a0d0e21e 5243 AOPERATOR(ANDAND);
378cc40b 5244 s--;
3280af22 5245 if (PL_expect == XOPERATOR) {
041457d9
DM
5246 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5247 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5248 {
57843af0 5249 CopLINE_dec(PL_curcop);
f1f66076 5250 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5251 CopLINE_inc(PL_curcop);
463ee0b2 5252 }
79072805 5253 BAop(OP_BIT_AND);
463ee0b2 5254 }
79072805 5255
3280af22
NIS
5256 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5257 if (*PL_tokenbuf) {
5258 PL_expect = XOPERATOR;
5259 force_ident(PL_tokenbuf, '&');
463ee0b2 5260 }
79072805
LW
5261 else
5262 PREREF('&');
6154021b 5263 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5264 TERM('&');
5265
378cc40b
LW
5266 case '|':
5267 s++;
90771dc0 5268 if (*s++ == '|')
a0d0e21e 5269 AOPERATOR(OROR);
378cc40b 5270 s--;
79072805 5271 BOop(OP_BIT_OR);
378cc40b
LW
5272 case '=':
5273 s++;
748a9306 5274 {
90771dc0
NC
5275 const char tmp = *s++;
5276 if (tmp == '=')
5277 Eop(OP_EQ);
5278 if (tmp == '>')
5279 OPERATOR(',');
5280 if (tmp == '~')
5281 PMop(OP_MATCH);
5282 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5283 && strchr("+-*/%.^&|<",tmp))
5284 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5285 "Reversed %c= operator",(int)tmp);
5286 s--;
5287 if (PL_expect == XSTATE && isALPHA(tmp) &&
5288 (s == PL_linestart+1 || s[-2] == '\n') )
5289 {
5290 if (PL_in_eval && !PL_rsfp) {
5291 d = PL_bufend;
5292 while (s < d) {
5293 if (*s++ == '\n') {
5294 incline(s);
5295 if (strnEQ(s,"=cut",4)) {
5296 s = strchr(s,'\n');
5297 if (s)
5298 s++;
5299 else
5300 s = d;
5301 incline(s);
5302 goto retry;
5303 }
5304 }
a5f75d66 5305 }
90771dc0 5306 goto retry;
a5f75d66 5307 }
5db06880
NC
5308#ifdef PERL_MAD
5309 if (PL_madskills) {
cd81e915 5310 if (!PL_thiswhite)
6b29d1f5 5311 PL_thiswhite = newSVpvs("");
cd81e915 5312 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5313 PL_bufend - PL_linestart);
5314 }
5315#endif
90771dc0
NC
5316 s = PL_bufend;
5317 PL_doextract = TRUE;
5318 goto retry;
a5f75d66 5319 }
a0d0e21e 5320 }
3280af22 5321 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5322 const char *t = s;
51882d45 5323#ifdef PERL_STRICT_CR
c35e046a 5324 while (SPACE_OR_TAB(*t))
51882d45 5325#else
c35e046a 5326 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5327#endif
c35e046a 5328 t++;
a0d0e21e
LW
5329 if (*t == '\n' || *t == '#') {
5330 s--;
3280af22 5331 PL_expect = XBLOCK;
a0d0e21e
LW
5332 goto leftbracket;
5333 }
79072805 5334 }
6154021b 5335 pl_yylval.ival = 0;
a0d0e21e 5336 OPERATOR(ASSIGNOP);
378cc40b
LW
5337 case '!':
5338 s++;
90771dc0
NC
5339 {
5340 const char tmp = *s++;
5341 if (tmp == '=') {
5342 /* was this !=~ where !~ was meant?
5343 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5344
5345 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5346 const char *t = s+1;
5347
5348 while (t < PL_bufend && isSPACE(*t))
5349 ++t;
5350
5351 if (*t == '/' || *t == '?' ||
5352 ((*t == 'm' || *t == 's' || *t == 'y')
5353 && !isALNUM(t[1])) ||
5354 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5355 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5356 "!=~ should be !~");
5357 }
5358 Eop(OP_NE);
5359 }
5360 if (tmp == '~')
5361 PMop(OP_NOT);
5362 }
378cc40b
LW
5363 s--;
5364 OPERATOR('!');
5365 case '<':
3280af22 5366 if (PL_expect != XOPERATOR) {
93a17b20 5367 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5368 check_uni();
79072805
LW
5369 if (s[1] == '<')
5370 s = scan_heredoc(s);
5371 else
5372 s = scan_inputsymbol(s);
5373 TERM(sublex_start());
378cc40b
LW
5374 }
5375 s++;
90771dc0
NC
5376 {
5377 char tmp = *s++;
5378 if (tmp == '<')
5379 SHop(OP_LEFT_SHIFT);
5380 if (tmp == '=') {
5381 tmp = *s++;
5382 if (tmp == '>')
5383 Eop(OP_NCMP);
5384 s--;
5385 Rop(OP_LE);
5386 }
395c3793 5387 }
378cc40b 5388 s--;
79072805 5389 Rop(OP_LT);
378cc40b
LW
5390 case '>':
5391 s++;
90771dc0
NC
5392 {
5393 const char tmp = *s++;
5394 if (tmp == '>')
5395 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5396 else if (tmp == '=')
90771dc0
NC
5397 Rop(OP_GE);
5398 }
378cc40b 5399 s--;
79072805 5400 Rop(OP_GT);
378cc40b
LW
5401
5402 case '$':
bbce6d69 5403 CLINE;
5404
3280af22
NIS
5405 if (PL_expect == XOPERATOR) {
5406 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5407 return deprecate_commaless_var_list();
a0d0e21e 5408 }
8990e307 5409 }
a0d0e21e 5410
7e2040f0 5411 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 5412 PL_tokenbuf[0] = '@';
376b8730
SM
5413 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5414 sizeof PL_tokenbuf - 1, FALSE);
5415 if (PL_expect == XOPERATOR)
5416 no_op("Array length", s);
3280af22 5417 if (!PL_tokenbuf[1])
a0d0e21e 5418 PREREF(DOLSHARP);
3280af22
NIS
5419 PL_expect = XOPERATOR;
5420 PL_pending_ident = '#';
463ee0b2 5421 TOKEN(DOLSHARP);
79072805 5422 }
bbce6d69 5423
3280af22 5424 PL_tokenbuf[0] = '$';
376b8730
SM
5425 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5426 sizeof PL_tokenbuf - 1, FALSE);
5427 if (PL_expect == XOPERATOR)
5428 no_op("Scalar", s);
3280af22
NIS
5429 if (!PL_tokenbuf[1]) {
5430 if (s == PL_bufend)
bbce6d69 5431 yyerror("Final $ should be \\$ or $name");
5432 PREREF('$');
8990e307 5433 }
a0d0e21e 5434
bbce6d69 5435 /* This kludge not intended to be bulletproof. */
3280af22 5436 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5437 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5438 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5439 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5440 TERM(THING);
5441 }
5442
ff68c719 5443 d = s;
90771dc0
NC
5444 {
5445 const char tmp = *s;
ae28bb2a 5446 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5447 s = SKIPSPACE1(s);
ff68c719 5448
90771dc0
NC
5449 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5450 && intuit_more(s)) {
5451 if (*s == '[') {
5452 PL_tokenbuf[0] = '@';
5453 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5454 char *t = s+1;
5455
5456 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5457 t++;
90771dc0 5458 if (*t++ == ',') {
29595ff2 5459 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5460 while (t < PL_bufend && *t != ']')
5461 t++;
9014280d 5462 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5463 "Multidimensional syntax %.*s not supported",
36c7798d 5464 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5465 }
748a9306 5466 }
93a17b20 5467 }
90771dc0
NC
5468 else if (*s == '{') {
5469 char *t;
5470 PL_tokenbuf[0] = '%';
5471 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5472 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5473 {
5474 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5475 do {
5476 t++;
5477 } while (isSPACE(*t));
90771dc0 5478 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5479 STRLEN len;
90771dc0 5480 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5481 &len);
c35e046a
AL
5482 while (isSPACE(*t))
5483 t++;
780a5241 5484 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5485 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5486 "You need to quote \"%s\"",
5487 tmpbuf);
5488 }
5489 }
5490 }
93a17b20 5491 }
bbce6d69 5492
90771dc0
NC
5493 PL_expect = XOPERATOR;
5494 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5495 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5496 if (!islop || PL_last_lop_op == OP_GREPSTART)
5497 PL_expect = XOPERATOR;
5498 else if (strchr("$@\"'`q", *s))
5499 PL_expect = XTERM; /* e.g. print $fh "foo" */
5500 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5501 PL_expect = XTERM; /* e.g. print $fh &sub */
5502 else if (isIDFIRST_lazy_if(s,UTF)) {
5503 char tmpbuf[sizeof PL_tokenbuf];
5504 int t2;
5505 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5506 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5507 /* binary operators exclude handle interpretations */
5508 switch (t2) {
5509 case -KEY_x:
5510 case -KEY_eq:
5511 case -KEY_ne:
5512 case -KEY_gt:
5513 case -KEY_lt:
5514 case -KEY_ge:
5515 case -KEY_le:
5516 case -KEY_cmp:
5517 break;
5518 default:
5519 PL_expect = XTERM; /* e.g. print $fh length() */
5520 break;
5521 }
5522 }
5523 else {
5524 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5525 }
5526 }
90771dc0
NC
5527 else if (isDIGIT(*s))
5528 PL_expect = XTERM; /* e.g. print $fh 3 */
5529 else if (*s == '.' && isDIGIT(s[1]))
5530 PL_expect = XTERM; /* e.g. print $fh .3 */
5531 else if ((*s == '?' || *s == '-' || *s == '+')
5532 && !isSPACE(s[1]) && s[1] != '=')
5533 PL_expect = XTERM; /* e.g. print $fh -1 */
5534 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5535 && s[1] != '/')
5536 PL_expect = XTERM; /* e.g. print $fh /.../
5537 XXX except DORDOR operator
5538 */
5539 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5540 && s[2] != '=')
5541 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5542 }
bbce6d69 5543 }
3280af22 5544 PL_pending_ident = '$';
79072805 5545 TOKEN('$');
378cc40b
LW
5546
5547 case '@':
3280af22 5548 if (PL_expect == XOPERATOR)
bbce6d69 5549 no_op("Array", s);
3280af22
NIS
5550 PL_tokenbuf[0] = '@';
5551 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5552 if (!PL_tokenbuf[1]) {
bbce6d69 5553 PREREF('@');
5554 }
3280af22 5555 if (PL_lex_state == LEX_NORMAL)
29595ff2 5556 s = SKIPSPACE1(s);
3280af22 5557 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5558 if (*s == '{')
3280af22 5559 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5560
5561 /* Warn about @ where they meant $. */
041457d9
DM
5562 if (*s == '[' || *s == '{') {
5563 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5564 const char *t = s + 1;
7e2040f0 5565 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5566 t++;
5567 if (*t == '}' || *t == ']') {
5568 t++;
29595ff2 5569 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5570 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5571 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5572 (int)(t-PL_bufptr), PL_bufptr,
5573 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5574 }
93a17b20
LW
5575 }
5576 }
463ee0b2 5577 }
3280af22 5578 PL_pending_ident = '@';
79072805 5579 TERM('@');
378cc40b 5580
c963b151 5581 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5582 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5583 s += 2;
5584 AOPERATOR(DORDOR);
5585 }
c963b151 5586 case '?': /* may either be conditional or pattern */
be25f609 5587 if (PL_expect == XOPERATOR) {
90771dc0 5588 char tmp = *s++;
c963b151 5589 if(tmp == '?') {
be25f609 5590 OPERATOR('?');
c963b151
BD
5591 }
5592 else {
5593 tmp = *s++;
5594 if(tmp == '/') {
5595 /* A // operator. */
5596 AOPERATOR(DORDOR);
5597 }
5598 else {
5599 s--;
5600 Mop(OP_DIVIDE);
5601 }
5602 }
5603 }
5604 else {
5605 /* Disable warning on "study /blah/" */
5606 if (PL_oldoldbufptr == PL_last_uni
5607 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5608 || memNE(PL_last_uni, "study", 5)
5609 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5610 ))
5611 check_uni();
5612 s = scan_pat(s,OP_MATCH);
5613 TERM(sublex_start());
5614 }
378cc40b
LW
5615
5616 case '.':
51882d45
GS
5617 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5618#ifdef PERL_STRICT_CR
5619 && s[1] == '\n'
5620#else
5621 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5622#endif
5623 && (s == PL_linestart || s[-1] == '\n') )
5624 {
3280af22
NIS
5625 PL_lex_formbrack = 0;
5626 PL_expect = XSTATE;
79072805
LW
5627 goto rightbracket;
5628 }
be25f609 5629 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5630 s += 3;
5631 OPERATOR(YADAYADA);
5632 }
3280af22 5633 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5634 char tmp = *s++;
a687059c
LW
5635 if (*s == tmp) {
5636 s++;
2f3197b3
LW
5637 if (*s == tmp) {
5638 s++;
6154021b 5639 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5640 }
5641 else
6154021b 5642 pl_yylval.ival = 0;
378cc40b 5643 OPERATOR(DOTDOT);
a687059c 5644 }
79072805 5645 Aop(OP_CONCAT);
378cc40b
LW
5646 }
5647 /* FALL THROUGH */
5648 case '0': case '1': case '2': case '3': case '4':
5649 case '5': case '6': case '7': case '8': case '9':
6154021b 5650 s = scan_num(s, &pl_yylval);
931e0695 5651 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5652 if (PL_expect == XOPERATOR)
8990e307 5653 no_op("Number",s);
79072805
LW
5654 TERM(THING);
5655
5656 case '\'':
5db06880 5657 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5658 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5659 if (PL_expect == XOPERATOR) {
5660 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5661 return deprecate_commaless_var_list();
a0d0e21e 5662 }
463ee0b2 5663 else
8990e307 5664 no_op("String",s);
463ee0b2 5665 }
79072805 5666 if (!s)
d4c19fe8 5667 missingterm(NULL);
6154021b 5668 pl_yylval.ival = OP_CONST;
79072805
LW
5669 TERM(sublex_start());
5670
5671 case '"':
5db06880 5672 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5673 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5674 if (PL_expect == XOPERATOR) {
5675 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5676 return deprecate_commaless_var_list();
a0d0e21e 5677 }
463ee0b2 5678 else
8990e307 5679 no_op("String",s);
463ee0b2 5680 }
79072805 5681 if (!s)
d4c19fe8 5682 missingterm(NULL);
6154021b 5683 pl_yylval.ival = OP_CONST;
cfd0369c
NC
5684 /* FIXME. I think that this can be const if char *d is replaced by
5685 more localised variables. */
3280af22 5686 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 5687 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 5688 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
5689 break;
5690 }
5691 }
79072805
LW
5692 TERM(sublex_start());
5693
5694 case '`':
5db06880 5695 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5696 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 5697 if (PL_expect == XOPERATOR)
8990e307 5698 no_op("Backticks",s);
79072805 5699 if (!s)
d4c19fe8 5700 missingterm(NULL);
9b201d7d 5701 readpipe_override();
79072805
LW
5702 TERM(sublex_start());
5703
5704 case '\\':
5705 s++;
a2a5de95
NC
5706 if (PL_lex_inwhat && isDIGIT(*s))
5707 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
5708 *s, *s);
3280af22 5709 if (PL_expect == XOPERATOR)
8990e307 5710 no_op("Backslash",s);
79072805
LW
5711 OPERATOR(REFGEN);
5712
a7cb1f99 5713 case 'v':
e526c9e6 5714 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 5715 char *start = s + 2;
dd629d5b 5716 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
5717 start++;
5718 if (*start == '.' && isDIGIT(start[1])) {
6154021b 5719 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
5720 TERM(THING);
5721 }
e526c9e6 5722 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
5723 else if (!isALPHA(*start) && (PL_expect == XTERM
5724 || PL_expect == XREF || PL_expect == XSTATE
5725 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 5726 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 5727 if (!gv) {
6154021b 5728 s = scan_num(s, &pl_yylval);
e526c9e6
GS
5729 TERM(THING);
5730 }
5731 }
a7cb1f99
GS
5732 }
5733 goto keylookup;
79072805 5734 case 'x':
3280af22 5735 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
5736 s++;
5737 Mop(OP_REPEAT);
2f3197b3 5738 }
79072805
LW
5739 goto keylookup;
5740
378cc40b 5741 case '_':
79072805
LW
5742 case 'a': case 'A':
5743 case 'b': case 'B':
5744 case 'c': case 'C':
5745 case 'd': case 'D':
5746 case 'e': case 'E':
5747 case 'f': case 'F':
5748 case 'g': case 'G':
5749 case 'h': case 'H':
5750 case 'i': case 'I':
5751 case 'j': case 'J':
5752 case 'k': case 'K':
5753 case 'l': case 'L':
5754 case 'm': case 'M':
5755 case 'n': case 'N':
5756 case 'o': case 'O':
5757 case 'p': case 'P':
5758 case 'q': case 'Q':
5759 case 'r': case 'R':
5760 case 's': case 'S':
5761 case 't': case 'T':
5762 case 'u': case 'U':
a7cb1f99 5763 case 'V':
79072805
LW
5764 case 'w': case 'W':
5765 case 'X':
5766 case 'y': case 'Y':
5767 case 'z': case 'Z':
5768
49dc05e3 5769 keylookup: {
88e1f1a2 5770 bool anydelim;
90771dc0 5771 I32 tmp;
10edeb5d
JH
5772
5773 orig_keyword = 0;
5774 gv = NULL;
5775 gvp = NULL;
49dc05e3 5776
3280af22
NIS
5777 PL_bufptr = s;
5778 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 5779
5780 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 5781 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
5782 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
5783 (PL_tokenbuf[0] == 'q' &&
5784 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 5785
5786 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 5787 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
5788 goto just_a_word;
5789
3643fb5f 5790 d = s;
3280af22 5791 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
5792 d++; /* no comments skipped here, or s### is misparsed */
5793
748a9306 5794 /* Is this a word before a => operator? */
1c3923b3 5795 if (*d == '=' && d[1] == '>') {
748a9306 5796 CLINE;
6154021b 5797 pl_yylval.opval
d0a148a6
NC
5798 = (OP*)newSVOP(OP_CONST, 0,
5799 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 5800 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
5801 TERM(WORD);
5802 }
5803
88e1f1a2
JV
5804 /* Check for plugged-in keyword */
5805 {
5806 OP *o;
5807 int result;
5808 char *saved_bufptr = PL_bufptr;
5809 PL_bufptr = s;
5810 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
5811 s = PL_bufptr;
5812 if (result == KEYWORD_PLUGIN_DECLINE) {
5813 /* not a plugged-in keyword */
5814 PL_bufptr = saved_bufptr;
5815 } else if (result == KEYWORD_PLUGIN_STMT) {
5816 pl_yylval.opval = o;
5817 CLINE;
5818 PL_expect = XSTATE;
5819 return REPORT(PLUGSTMT);
5820 } else if (result == KEYWORD_PLUGIN_EXPR) {
5821 pl_yylval.opval = o;
5822 CLINE;
5823 PL_expect = XOPERATOR;
5824 return REPORT(PLUGEXPR);
5825 } else {
5826 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
5827 PL_tokenbuf);
5828 }
5829 }
5830
5831 /* Check for built-in keyword */
5832 tmp = keyword(PL_tokenbuf, len, 0);
5833
5834 /* Is this a label? */
5835 if (!anydelim && PL_expect == XSTATE
5836 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
5837 if (tmp)
5838 Perl_croak(aTHX_ "Can't use keyword '%s' as a label", PL_tokenbuf);
5839 s = d + 1;
5840 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
5841 CLINE;
5842 TOKEN(LABEL);
5843 }
5844
a0d0e21e 5845 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
5846 GV *ogv = NULL; /* override (winner) */
5847 GV *hgv = NULL; /* hidden (loser) */
3280af22 5848 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 5849 CV *cv;
90e5519e 5850 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
5851 (cv = GvCVu(gv)))
5852 {
5853 if (GvIMPORTED_CV(gv))
5854 ogv = gv;
5855 else if (! CvMETHOD(cv))
5856 hgv = gv;
5857 }
5858 if (!ogv &&
3280af22 5859 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 5860 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
5861 GvCVu(gv) && GvIMPORTED_CV(gv))
5862 {
5863 ogv = gv;
5864 }
5865 }
5866 if (ogv) {
30fe34ed 5867 orig_keyword = tmp;
56f7f34b 5868 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
5869 }
5870 else if (gv && !gvp
5871 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 5872 && GvCVu(gv))
6e7b2336
GS
5873 {
5874 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 5875 }
56f7f34b
CS
5876 else { /* no override */
5877 tmp = -tmp;
a2a5de95
NC
5878 if (tmp == KEY_dump) {
5879 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
5880 "dump() better written as CORE::dump()");
ac206dc8 5881 }
a0714e2c 5882 gv = NULL;
56f7f34b 5883 gvp = 0;
a2a5de95
NC
5884 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
5885 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
5886 "Ambiguous call resolved as CORE::%s(), %s",
5887 GvENAME(hgv), "qualify as such or use &");
49dc05e3 5888 }
a0d0e21e
LW
5889 }
5890
5891 reserved_word:
5892 switch (tmp) {
79072805
LW
5893
5894 default: /* not a keyword */
0bfa2a8a
NC
5895 /* Trade off - by using this evil construction we can pull the
5896 variable gv into the block labelled keylookup. If not, then
5897 we have to give it function scope so that the goto from the
5898 earlier ':' case doesn't bypass the initialisation. */
5899 if (0) {
5900 just_a_word_zero_gv:
5901 gv = NULL;
5902 gvp = NULL;
8bee0991 5903 orig_keyword = 0;
0bfa2a8a 5904 }
93a17b20 5905 just_a_word: {
96e4d5b1 5906 SV *sv;
ce29ac45 5907 int pkgname = 0;
f54cb97a 5908 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 5909 OP *rv2cv_op;
5069cc75 5910 CV *cv;
5db06880 5911#ifdef PERL_MAD
cd81e915 5912 SV *nextPL_nextwhite = 0;
5db06880
NC
5913#endif
5914
8990e307
LW
5915
5916 /* Get the rest if it looks like a package qualifier */
5917
155aba94 5918 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 5919 STRLEN morelen;
3280af22 5920 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
5921 TRUE, &morelen);
5922 if (!morelen)
cea2e8a9 5923 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 5924 *s == '\'' ? "'" : "::");
c3e0f903 5925 len += morelen;
ce29ac45 5926 pkgname = 1;
a0d0e21e 5927 }
8990e307 5928
3280af22
NIS
5929 if (PL_expect == XOPERATOR) {
5930 if (PL_bufptr == PL_linestart) {
57843af0 5931 CopLINE_dec(PL_curcop);
f1f66076 5932 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5933 CopLINE_inc(PL_curcop);
463ee0b2
LW
5934 }
5935 else
54310121 5936 no_op("Bareword",s);
463ee0b2 5937 }
8990e307 5938
c3e0f903
GS
5939 /* Look for a subroutine with this name in current package,
5940 unless name is "Foo::", in which case Foo is a bearword
5941 (and a package name). */
5942
5db06880 5943 if (len > 2 && !PL_madskills &&
3280af22 5944 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 5945 {
f776e3cd 5946 if (ckWARN(WARN_BAREWORD)
90e5519e 5947 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 5948 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 5949 "Bareword \"%s\" refers to nonexistent package",
3280af22 5950 PL_tokenbuf);
c3e0f903 5951 len -= 2;
3280af22 5952 PL_tokenbuf[len] = '\0';
a0714e2c 5953 gv = NULL;
c3e0f903
GS
5954 gvp = 0;
5955 }
5956 else {
62d55b22
NC
5957 if (!gv) {
5958 /* Mustn't actually add anything to a symbol table.
5959 But also don't want to "initialise" any placeholder
5960 constants that might already be there into full
5961 blown PVGVs with attached PVCV. */
90e5519e
NC
5962 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
5963 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 5964 }
b3d904f3 5965 len = 0;
c3e0f903
GS
5966 }
5967
5968 /* if we saw a global override before, get the right name */
8990e307 5969
49dc05e3 5970 if (gvp) {
396482e1 5971 sv = newSVpvs("CORE::GLOBAL::");
3280af22 5972 sv_catpv(sv,PL_tokenbuf);
49dc05e3 5973 }
8a7a129d
NC
5974 else {
5975 /* If len is 0, newSVpv does strlen(), which is correct.
5976 If len is non-zero, then it will be the true length,
5977 and so the scalar will be created correctly. */
5978 sv = newSVpv(PL_tokenbuf,len);
5979 }
5db06880 5980#ifdef PERL_MAD
cd81e915
NC
5981 if (PL_madskills && !PL_thistoken) {
5982 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 5983 PL_thistoken = newSVpvn(start,s - start);
cd81e915 5984 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
5985 }
5986#endif
8990e307 5987
a0d0e21e
LW
5988 /* Presume this is going to be a bareword of some sort. */
5989
5990 CLINE;
6154021b
RGS
5991 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
5992 pl_yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
5993 /* UTF-8 package name? */
5994 if (UTF && !IN_BYTES &&
95a20fc0 5995 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
8f8cf39c 5996 SvUTF8_on(sv);
a0d0e21e 5997
c3e0f903
GS
5998 /* And if "Foo::", then that's what it certainly is. */
5999
6000 if (len)
6001 goto safe_bareword;
6002
f7461760
Z
6003 cv = NULL;
6004 {
6005 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6006 const_op->op_private = OPpCONST_BARE;
6007 rv2cv_op = newCVREF(0, const_op);
6008 }
6009 if (rv2cv_op->op_type == OP_RV2CV &&
6010 (rv2cv_op->op_flags & OPf_KIDS)) {
6011 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6012 switch (rv_op->op_type) {
6013 case OP_CONST: {
6014 SV *sv = cSVOPx_sv(rv_op);
6015 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6016 cv = (CV*)SvRV(sv);
6017 } break;
6018 case OP_GV: {
6019 GV *gv = cGVOPx_gv(rv_op);
6020 CV *maybe_cv = GvCVu(gv);
6021 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6022 cv = maybe_cv;
6023 } break;
6024 }
6025 }
5069cc75 6026
8990e307
LW
6027 /* See if it's the indirect object for a list operator. */
6028
3280af22
NIS
6029 if (PL_oldoldbufptr &&
6030 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6031 (PL_oldoldbufptr == PL_last_lop
6032 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6033 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6034 (PL_expect == XREF ||
6035 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6036 {
748a9306
LW
6037 bool immediate_paren = *s == '(';
6038
a0d0e21e 6039 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6040 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6041#ifdef PERL_MAD
cd81e915 6042 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6043#endif
a0d0e21e
LW
6044
6045 /* Two barewords in a row may indicate method call. */
6046
62d55b22 6047 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6048 (tmp = intuit_method(s, gv, cv))) {
6049 op_free(rv2cv_op);
bbf60fe6 6050 return REPORT(tmp);
f7461760 6051 }
a0d0e21e
LW
6052
6053 /* If not a declared subroutine, it's an indirect object. */
6054 /* (But it's an indir obj regardless for sort.) */
7294df96 6055 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6056
7294df96
RGS
6057 if (
6058 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6059 (!cv &&
a9ef352a 6060 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6061 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6062 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6063 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6064 )
a9ef352a 6065 {
3280af22 6066 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6067 goto bareword;
93a17b20
LW
6068 }
6069 }
8990e307 6070
3280af22 6071 PL_expect = XOPERATOR;
5db06880
NC
6072#ifdef PERL_MAD
6073 if (isSPACE(*s))
cd81e915
NC
6074 s = SKIPSPACE2(s,nextPL_nextwhite);
6075 PL_nextwhite = nextPL_nextwhite;
5db06880 6076#else
8990e307 6077 s = skipspace(s);
5db06880 6078#endif
1c3923b3
GS
6079
6080 /* Is this a word before a => operator? */
ce29ac45 6081 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6082 op_free(rv2cv_op);
1c3923b3 6083 CLINE;
6154021b 6084 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6085 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6086 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6087 TERM(WORD);
6088 }
6089
6090 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6091 if (*s == '(') {
79072805 6092 CLINE;
5069cc75 6093 if (cv) {
c35e046a
AL
6094 d = s + 1;
6095 while (SPACE_OR_TAB(*d))
6096 d++;
f7461760 6097 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6098 s = d + 1;
c631f32b 6099 goto its_constant;
96e4d5b1 6100 }
6101 }
5db06880
NC
6102#ifdef PERL_MAD
6103 if (PL_madskills) {
cd81e915
NC
6104 PL_nextwhite = PL_thiswhite;
6105 PL_thiswhite = 0;
5db06880 6106 }
cd81e915 6107 start_force(PL_curforce);
5db06880 6108#endif
6154021b 6109 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6110 PL_expect = XOPERATOR;
5db06880
NC
6111#ifdef PERL_MAD
6112 if (PL_madskills) {
cd81e915
NC
6113 PL_nextwhite = nextPL_nextwhite;
6114 curmad('X', PL_thistoken);
6b29d1f5 6115 PL_thistoken = newSVpvs("");
5db06880
NC
6116 }
6117#endif
f7461760 6118 op_free(rv2cv_op);
93a17b20 6119 force_next(WORD);
6154021b 6120 pl_yylval.ival = 0;
463ee0b2 6121 TOKEN('&');
79072805 6122 }
93a17b20 6123
a0d0e21e 6124 /* If followed by var or block, call it a method (unless sub) */
8990e307 6125
f7461760
Z
6126 if ((*s == '$' || *s == '{') && !cv) {
6127 op_free(rv2cv_op);
3280af22
NIS
6128 PL_last_lop = PL_oldbufptr;
6129 PL_last_lop_op = OP_METHOD;
93a17b20 6130 PREBLOCK(METHOD);
463ee0b2
LW
6131 }
6132
8990e307
LW
6133 /* If followed by a bareword, see if it looks like indir obj. */
6134
30fe34ed
RGS
6135 if (!orig_keyword
6136 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6137 && (tmp = intuit_method(s, gv, cv))) {
6138 op_free(rv2cv_op);
bbf60fe6 6139 return REPORT(tmp);
f7461760 6140 }
93a17b20 6141
8990e307
LW
6142 /* Not a method, so call it a subroutine (if defined) */
6143
5069cc75 6144 if (cv) {
9b387841
NC
6145 if (lastchar == '-')
6146 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6147 "Ambiguous use of -%s resolved as -&%s()",
6148 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6149 /* Check for a constant sub */
f7461760 6150 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6151 its_constant:
f7461760 6152 op_free(rv2cv_op);
6154021b
RGS
6153 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6154 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6155 pl_yylval.opval->op_private = 0;
96e4d5b1 6156 TOKEN(WORD);
89bfa8cd 6157 }
6158
6154021b 6159 op_free(pl_yylval.opval);
f7461760 6160 pl_yylval.opval = rv2cv_op;
6154021b 6161 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6162 PL_last_lop = PL_oldbufptr;
bf848113 6163 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6164 /* Is there a prototype? */
5db06880
NC
6165 if (
6166#ifdef PERL_MAD
6167 cv &&
6168#endif
d9f2850e
RGS
6169 SvPOK(cv))
6170 {
5f66b61c 6171 STRLEN protolen;
daba3364 6172 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6173 if (!protolen)
4633a7c4 6174 TERM(FUNC0SUB);
8c28b960 6175 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
4633a7c4 6176 OPERATOR(UNIOPSUB);
0f5d0394
AE
6177 while (*proto == ';')
6178 proto++;
7a52d87a 6179 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6180 if (PL_curstash)
6181 sv_setpvs(PL_subname, "__ANON__");
6182 else
6183 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6184 PREBLOCK(LSTOPSUB);
6185 }
a9ef352a 6186 }
5db06880
NC
6187#ifdef PERL_MAD
6188 {
6189 if (PL_madskills) {
cd81e915
NC
6190 PL_nextwhite = PL_thiswhite;
6191 PL_thiswhite = 0;
5db06880 6192 }
cd81e915 6193 start_force(PL_curforce);
6154021b 6194 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6195 PL_expect = XTERM;
6196 if (PL_madskills) {
cd81e915
NC
6197 PL_nextwhite = nextPL_nextwhite;
6198 curmad('X', PL_thistoken);
6b29d1f5 6199 PL_thistoken = newSVpvs("");
5db06880
NC
6200 }
6201 force_next(WORD);
6202 TOKEN(NOAMP);
6203 }
6204 }
6205
6206 /* Guess harder when madskills require "best effort". */
6207 if (PL_madskills && (!gv || !GvCVu(gv))) {
6208 int probable_sub = 0;
6209 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6210 probable_sub = 1;
6211 else if (isALPHA(*s)) {
6212 char tmpbuf[1024];
6213 STRLEN tmplen;
6214 d = s;
6215 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6216 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6217 probable_sub = 1;
6218 else {
6219 while (d < PL_bufend && isSPACE(*d))
6220 d++;
6221 if (*d == '=' && d[1] == '>')
6222 probable_sub = 1;
6223 }
6224 }
6225 if (probable_sub) {
7a6d04f4 6226 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6227 op_free(pl_yylval.opval);
f7461760 6228 pl_yylval.opval = rv2cv_op;
6154021b 6229 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6230 PL_last_lop = PL_oldbufptr;
6231 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6232 PL_nextwhite = PL_thiswhite;
6233 PL_thiswhite = 0;
6234 start_force(PL_curforce);
6154021b 6235 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6236 PL_expect = XTERM;
cd81e915
NC
6237 PL_nextwhite = nextPL_nextwhite;
6238 curmad('X', PL_thistoken);
6b29d1f5 6239 PL_thistoken = newSVpvs("");
5db06880
NC
6240 force_next(WORD);
6241 TOKEN(NOAMP);
6242 }
6243#else
6154021b 6244 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6245 PL_expect = XTERM;
8990e307
LW
6246 force_next(WORD);
6247 TOKEN(NOAMP);
5db06880 6248#endif
8990e307 6249 }
748a9306 6250
8990e307
LW
6251 /* Call it a bare word */
6252
5603f27d 6253 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6254 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6255 else {
9a073a1d
RGS
6256 bareword:
6257 /* after "print" and similar functions (corresponding to
6258 * "F? L" in opcode.pl), whatever wasn't already parsed as
6259 * a filehandle should be subject to "strict subs".
6260 * Likewise for the optional indirect-object argument to system
6261 * or exec, which can't be a bareword */
6262 if ((PL_last_lop_op == OP_PRINT
6263 || PL_last_lop_op == OP_PRTF
6264 || PL_last_lop_op == OP_SAY
6265 || PL_last_lop_op == OP_SYSTEM
6266 || PL_last_lop_op == OP_EXEC)
6267 && (PL_hints & HINT_STRICT_SUBS))
6268 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6269 if (lastchar != '-') {
6270 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6271 d = PL_tokenbuf;
6272 while (isLOWER(*d))
6273 d++;
da51bb9b 6274 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6275 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6276 PL_tokenbuf);
6277 }
748a9306
LW
6278 }
6279 }
f7461760 6280 op_free(rv2cv_op);
c3e0f903
GS
6281
6282 safe_bareword:
9b387841
NC
6283 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6284 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6285 "Operator or semicolon missing before %c%s",
6286 lastchar, PL_tokenbuf);
6287 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6288 "Ambiguous use of %c resolved as operator %c",
6289 lastchar, lastchar);
748a9306 6290 }
93a17b20 6291 TOKEN(WORD);
79072805 6292 }
79072805 6293
68dc0745 6294 case KEY___FILE__:
6154021b 6295 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6296 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6297 TERM(THING);
6298
79072805 6299 case KEY___LINE__:
6154021b 6300 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6301 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6302 TERM(THING);
68dc0745 6303
6304 case KEY___PACKAGE__:
6154021b 6305 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6306 (PL_curstash
5aaec2b4 6307 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6308 : &PL_sv_undef));
79072805 6309 TERM(THING);
79072805 6310
e50aee73 6311 case KEY___DATA__:
79072805
LW
6312 case KEY___END__: {
6313 GV *gv;
3280af22 6314 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6315 const char *pname = "main";
3280af22 6316 if (PL_tokenbuf[2] == 'D')
bfcb3514 6317 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6318 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6319 SVt_PVIO);
a5f75d66 6320 GvMULTI_on(gv);
79072805 6321 if (!GvIO(gv))
a0d0e21e 6322 GvIOp(gv) = newIO();
3280af22 6323 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6324#if defined(HAS_FCNTL) && defined(F_SETFD)
6325 {
f54cb97a 6326 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6327 fcntl(fd,F_SETFD,fd >= 3);
6328 }
79072805 6329#endif
fd049845 6330 /* Mark this internal pseudo-handle as clean */
6331 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6332 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6333 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6334 else
50952442 6335 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6336#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6337 /* if the script was opened in binmode, we need to revert
53129d29 6338 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6339 * XXX this is a questionable hack at best. */
53129d29
GS
6340 if (PL_bufend-PL_bufptr > 2
6341 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6342 {
6343 Off_t loc = 0;
50952442 6344 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6345 loc = PerlIO_tell(PL_rsfp);
6346 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6347 }
2986a63f
JH
6348#ifdef NETWARE
6349 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6350#else
c39cd008 6351 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6352#endif /* NETWARE */
1143fce0
JH
6353#ifdef PERLIO_IS_STDIO /* really? */
6354# if defined(__BORLANDC__)
cb359b41
JH
6355 /* XXX see note in do_binmode() */
6356 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6357# endif
6358#endif
c39cd008
GS
6359 if (loc > 0)
6360 PerlIO_seek(PL_rsfp, loc, 0);
6361 }
6362 }
6363#endif
7948272d 6364#ifdef PERLIO_LAYERS
52d2e0f4
JH
6365 if (!IN_BYTES) {
6366 if (UTF)
6367 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6368 else if (PL_encoding) {
6369 SV *name;
6370 dSP;
6371 ENTER;
6372 SAVETMPS;
6373 PUSHMARK(sp);
6374 EXTEND(SP, 1);
6375 XPUSHs(PL_encoding);
6376 PUTBACK;
6377 call_method("name", G_SCALAR);
6378 SPAGAIN;
6379 name = POPs;
6380 PUTBACK;
bfed75c6 6381 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6382 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6383 SVfARG(name)));
52d2e0f4
JH
6384 FREETMPS;
6385 LEAVE;
6386 }
6387 }
7948272d 6388#endif
5db06880
NC
6389#ifdef PERL_MAD
6390 if (PL_madskills) {
cd81e915
NC
6391 if (PL_realtokenstart >= 0) {
6392 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6393 if (!PL_endwhite)
6b29d1f5 6394 PL_endwhite = newSVpvs("");
cd81e915
NC
6395 sv_catsv(PL_endwhite, PL_thiswhite);
6396 PL_thiswhite = 0;
6397 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6398 PL_realtokenstart = -1;
5db06880 6399 }
5cc814fd
NC
6400 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6401 != NULL) ;
5db06880
NC
6402 }
6403#endif
4608196e 6404 PL_rsfp = NULL;
79072805
LW
6405 }
6406 goto fake_eof;
e929a76b 6407 }
de3bb511 6408
8990e307 6409 case KEY_AUTOLOAD:
ed6116ce 6410 case KEY_DESTROY:
79072805 6411 case KEY_BEGIN:
3c10abe3 6412 case KEY_UNITCHECK:
7d30b5c4 6413 case KEY_CHECK:
7d07dbc2 6414 case KEY_INIT:
7d30b5c4 6415 case KEY_END:
3280af22
NIS
6416 if (PL_expect == XSTATE) {
6417 s = PL_bufptr;
93a17b20 6418 goto really_sub;
79072805
LW
6419 }
6420 goto just_a_word;
6421
a0d0e21e
LW
6422 case KEY_CORE:
6423 if (*s == ':' && s[1] == ':') {
6424 s += 2;
748a9306 6425 d = s;
3280af22 6426 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6427 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6428 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6429 if (tmp < 0)
6430 tmp = -tmp;
850e8516 6431 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6432 /* that's a way to remember we saw "CORE::" */
850e8516 6433 orig_keyword = tmp;
a0d0e21e
LW
6434 goto reserved_word;
6435 }
6436 goto just_a_word;
6437
463ee0b2
LW
6438 case KEY_abs:
6439 UNI(OP_ABS);
6440
79072805
LW
6441 case KEY_alarm:
6442 UNI(OP_ALARM);
6443
6444 case KEY_accept:
a0d0e21e 6445 LOP(OP_ACCEPT,XTERM);
79072805 6446
463ee0b2
LW
6447 case KEY_and:
6448 OPERATOR(ANDOP);
6449
79072805 6450 case KEY_atan2:
a0d0e21e 6451 LOP(OP_ATAN2,XTERM);
85e6fe83 6452
79072805 6453 case KEY_bind:
a0d0e21e 6454 LOP(OP_BIND,XTERM);
79072805
LW
6455
6456 case KEY_binmode:
1c1fc3ea 6457 LOP(OP_BINMODE,XTERM);
79072805
LW
6458
6459 case KEY_bless:
a0d0e21e 6460 LOP(OP_BLESS,XTERM);
79072805 6461
0d863452
RH
6462 case KEY_break:
6463 FUN0(OP_BREAK);
6464
79072805
LW
6465 case KEY_chop:
6466 UNI(OP_CHOP);
6467
6468 case KEY_continue:
0d863452
RH
6469 /* When 'use switch' is in effect, continue has a dual
6470 life as a control operator. */
6471 {
ef89dcc3 6472 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6473 PREBLOCK(CONTINUE);
6474 else {
6475 /* We have to disambiguate the two senses of
6476 "continue". If the next token is a '{' then
6477 treat it as the start of a continue block;
6478 otherwise treat it as a control operator.
6479 */
6480 s = skipspace(s);
6481 if (*s == '{')
79072805 6482 PREBLOCK(CONTINUE);
0d863452
RH
6483 else
6484 FUN0(OP_CONTINUE);
6485 }
6486 }
79072805
LW
6487
6488 case KEY_chdir:
fafc274c
NC
6489 /* may use HOME */
6490 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6491 UNI(OP_CHDIR);
6492
6493 case KEY_close:
6494 UNI(OP_CLOSE);
6495
6496 case KEY_closedir:
6497 UNI(OP_CLOSEDIR);
6498
6499 case KEY_cmp:
6500 Eop(OP_SCMP);
6501
6502 case KEY_caller:
6503 UNI(OP_CALLER);
6504
6505 case KEY_crypt:
6506#ifdef FCRYPT
f4c556ac
GS
6507 if (!PL_cryptseen) {
6508 PL_cryptseen = TRUE;
de3bb511 6509 init_des();
f4c556ac 6510 }
a687059c 6511#endif
a0d0e21e 6512 LOP(OP_CRYPT,XTERM);
79072805
LW
6513
6514 case KEY_chmod:
a0d0e21e 6515 LOP(OP_CHMOD,XTERM);
79072805
LW
6516
6517 case KEY_chown:
a0d0e21e 6518 LOP(OP_CHOWN,XTERM);
79072805
LW
6519
6520 case KEY_connect:
a0d0e21e 6521 LOP(OP_CONNECT,XTERM);
79072805 6522
463ee0b2
LW
6523 case KEY_chr:
6524 UNI(OP_CHR);
6525
79072805
LW
6526 case KEY_cos:
6527 UNI(OP_COS);
6528
6529 case KEY_chroot:
6530 UNI(OP_CHROOT);
6531
0d863452
RH
6532 case KEY_default:
6533 PREBLOCK(DEFAULT);
6534
79072805 6535 case KEY_do:
29595ff2 6536 s = SKIPSPACE1(s);
79072805 6537 if (*s == '{')
a0d0e21e 6538 PRETERMBLOCK(DO);
79072805 6539 if (*s != '\'')
89c5585f 6540 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6541 if (orig_keyword == KEY_do) {
6542 orig_keyword = 0;
6154021b 6543 pl_yylval.ival = 1;
850e8516
RGS
6544 }
6545 else
6154021b 6546 pl_yylval.ival = 0;
378cc40b 6547 OPERATOR(DO);
79072805
LW
6548
6549 case KEY_die:
3280af22 6550 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6551 LOP(OP_DIE,XTERM);
79072805
LW
6552
6553 case KEY_defined:
6554 UNI(OP_DEFINED);
6555
6556 case KEY_delete:
a0d0e21e 6557 UNI(OP_DELETE);
79072805
LW
6558
6559 case KEY_dbmopen:
5c1737d1 6560 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6561 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6562
6563 case KEY_dbmclose:
6564 UNI(OP_DBMCLOSE);
6565
6566 case KEY_dump:
a0d0e21e 6567 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6568 LOOPX(OP_DUMP);
6569
6570 case KEY_else:
6571 PREBLOCK(ELSE);
6572
6573 case KEY_elsif:
6154021b 6574 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6575 OPERATOR(ELSIF);
6576
6577 case KEY_eq:
6578 Eop(OP_SEQ);
6579
a0d0e21e
LW
6580 case KEY_exists:
6581 UNI(OP_EXISTS);
4e553d73 6582
79072805 6583 case KEY_exit:
5db06880
NC
6584 if (PL_madskills)
6585 UNI(OP_INT);
79072805
LW
6586 UNI(OP_EXIT);
6587
6588 case KEY_eval:
29595ff2 6589 s = SKIPSPACE1(s);
32e2a35d
RGS
6590 if (*s == '{') { /* block eval */
6591 PL_expect = XTERMBLOCK;
6592 UNIBRACK(OP_ENTERTRY);
6593 }
6594 else { /* string eval */
6595 PL_expect = XTERM;
6596 UNIBRACK(OP_ENTEREVAL);
6597 }
79072805
LW
6598
6599 case KEY_eof:
6600 UNI(OP_EOF);
6601
6602 case KEY_exp:
6603 UNI(OP_EXP);
6604
6605 case KEY_each:
6606 UNI(OP_EACH);
6607
6608 case KEY_exec:
a0d0e21e 6609 LOP(OP_EXEC,XREF);
79072805
LW
6610
6611 case KEY_endhostent:
6612 FUN0(OP_EHOSTENT);
6613
6614 case KEY_endnetent:
6615 FUN0(OP_ENETENT);
6616
6617 case KEY_endservent:
6618 FUN0(OP_ESERVENT);
6619
6620 case KEY_endprotoent:
6621 FUN0(OP_EPROTOENT);
6622
6623 case KEY_endpwent:
6624 FUN0(OP_EPWENT);
6625
6626 case KEY_endgrent:
6627 FUN0(OP_EGRENT);
6628
6629 case KEY_for:
6630 case KEY_foreach:
6154021b 6631 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6632 s = SKIPSPACE1(s);
7e2040f0 6633 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6634 char *p = s;
5db06880
NC
6635#ifdef PERL_MAD
6636 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6637#endif
6638
3280af22 6639 if ((PL_bufend - p) >= 3 &&
55497cff 6640 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6641 p += 2;
77ca0c92
LW
6642 else if ((PL_bufend - p) >= 4 &&
6643 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6644 p += 3;
29595ff2 6645 p = PEEKSPACE(p);
7e2040f0 6646 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6647 p = scan_ident(p, PL_bufend,
6648 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6649 p = PEEKSPACE(p);
77ca0c92
LW
6650 }
6651 if (*p != '$')
cea2e8a9 6652 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6653#ifdef PERL_MAD
6654 s = SvPVX(PL_linestr) + soff;
6655#endif
55497cff 6656 }
79072805
LW
6657 OPERATOR(FOR);
6658
6659 case KEY_formline:
a0d0e21e 6660 LOP(OP_FORMLINE,XTERM);
79072805
LW
6661
6662 case KEY_fork:
6663 FUN0(OP_FORK);
6664
6665 case KEY_fcntl:
a0d0e21e 6666 LOP(OP_FCNTL,XTERM);
79072805
LW
6667
6668 case KEY_fileno:
6669 UNI(OP_FILENO);
6670
6671 case KEY_flock:
a0d0e21e 6672 LOP(OP_FLOCK,XTERM);
79072805
LW
6673
6674 case KEY_gt:
6675 Rop(OP_SGT);
6676
6677 case KEY_ge:
6678 Rop(OP_SGE);
6679
6680 case KEY_grep:
2c38e13d 6681 LOP(OP_GREPSTART, XREF);
79072805
LW
6682
6683 case KEY_goto:
a0d0e21e 6684 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6685 LOOPX(OP_GOTO);
6686
6687 case KEY_gmtime:
6688 UNI(OP_GMTIME);
6689
6690 case KEY_getc:
6f33ba73 6691 UNIDOR(OP_GETC);
79072805
LW
6692
6693 case KEY_getppid:
6694 FUN0(OP_GETPPID);
6695
6696 case KEY_getpgrp:
6697 UNI(OP_GETPGRP);
6698
6699 case KEY_getpriority:
a0d0e21e 6700 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
6701
6702 case KEY_getprotobyname:
6703 UNI(OP_GPBYNAME);
6704
6705 case KEY_getprotobynumber:
a0d0e21e 6706 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
6707
6708 case KEY_getprotoent:
6709 FUN0(OP_GPROTOENT);
6710
6711 case KEY_getpwent:
6712 FUN0(OP_GPWENT);
6713
6714 case KEY_getpwnam:
ff68c719 6715 UNI(OP_GPWNAM);
79072805
LW
6716
6717 case KEY_getpwuid:
ff68c719 6718 UNI(OP_GPWUID);
79072805
LW
6719
6720 case KEY_getpeername:
6721 UNI(OP_GETPEERNAME);
6722
6723 case KEY_gethostbyname:
6724 UNI(OP_GHBYNAME);
6725
6726 case KEY_gethostbyaddr:
a0d0e21e 6727 LOP(OP_GHBYADDR,XTERM);
79072805
LW
6728
6729 case KEY_gethostent:
6730 FUN0(OP_GHOSTENT);
6731
6732 case KEY_getnetbyname:
6733 UNI(OP_GNBYNAME);
6734
6735 case KEY_getnetbyaddr:
a0d0e21e 6736 LOP(OP_GNBYADDR,XTERM);
79072805
LW
6737
6738 case KEY_getnetent:
6739 FUN0(OP_GNETENT);
6740
6741 case KEY_getservbyname:
a0d0e21e 6742 LOP(OP_GSBYNAME,XTERM);
79072805
LW
6743
6744 case KEY_getservbyport:
a0d0e21e 6745 LOP(OP_GSBYPORT,XTERM);
79072805
LW
6746
6747 case KEY_getservent:
6748 FUN0(OP_GSERVENT);
6749
6750 case KEY_getsockname:
6751 UNI(OP_GETSOCKNAME);
6752
6753 case KEY_getsockopt:
a0d0e21e 6754 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
6755
6756 case KEY_getgrent:
6757 FUN0(OP_GGRENT);
6758
6759 case KEY_getgrnam:
ff68c719 6760 UNI(OP_GGRNAM);
79072805
LW
6761
6762 case KEY_getgrgid:
ff68c719 6763 UNI(OP_GGRGID);
79072805
LW
6764
6765 case KEY_getlogin:
6766 FUN0(OP_GETLOGIN);
6767
0d863452 6768 case KEY_given:
6154021b 6769 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
6770 OPERATOR(GIVEN);
6771
93a17b20 6772 case KEY_glob:
a0d0e21e 6773 LOP(OP_GLOB,XTERM);
93a17b20 6774
79072805
LW
6775 case KEY_hex:
6776 UNI(OP_HEX);
6777
6778 case KEY_if:
6154021b 6779 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6780 OPERATOR(IF);
6781
6782 case KEY_index:
a0d0e21e 6783 LOP(OP_INDEX,XTERM);
79072805
LW
6784
6785 case KEY_int:
6786 UNI(OP_INT);
6787
6788 case KEY_ioctl:
a0d0e21e 6789 LOP(OP_IOCTL,XTERM);
79072805
LW
6790
6791 case KEY_join:
a0d0e21e 6792 LOP(OP_JOIN,XTERM);
79072805
LW
6793
6794 case KEY_keys:
6795 UNI(OP_KEYS);
6796
6797 case KEY_kill:
a0d0e21e 6798 LOP(OP_KILL,XTERM);
79072805
LW
6799
6800 case KEY_last:
a0d0e21e 6801 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 6802 LOOPX(OP_LAST);
4e553d73 6803
79072805
LW
6804 case KEY_lc:
6805 UNI(OP_LC);
6806
6807 case KEY_lcfirst:
6808 UNI(OP_LCFIRST);
6809
6810 case KEY_local:
6154021b 6811 pl_yylval.ival = 0;
79072805
LW
6812 OPERATOR(LOCAL);
6813
6814 case KEY_length:
6815 UNI(OP_LENGTH);
6816
6817 case KEY_lt:
6818 Rop(OP_SLT);
6819
6820 case KEY_le:
6821 Rop(OP_SLE);
6822
6823 case KEY_localtime:
6824 UNI(OP_LOCALTIME);
6825
6826 case KEY_log:
6827 UNI(OP_LOG);
6828
6829 case KEY_link:
a0d0e21e 6830 LOP(OP_LINK,XTERM);
79072805
LW
6831
6832 case KEY_listen:
a0d0e21e 6833 LOP(OP_LISTEN,XTERM);
79072805 6834
c0329465
MB
6835 case KEY_lock:
6836 UNI(OP_LOCK);
6837
79072805
LW
6838 case KEY_lstat:
6839 UNI(OP_LSTAT);
6840
6841 case KEY_m:
8782bef2 6842 s = scan_pat(s,OP_MATCH);
79072805
LW
6843 TERM(sublex_start());
6844
a0d0e21e 6845 case KEY_map:
2c38e13d 6846 LOP(OP_MAPSTART, XREF);
4e4e412b 6847
79072805 6848 case KEY_mkdir:
a0d0e21e 6849 LOP(OP_MKDIR,XTERM);
79072805
LW
6850
6851 case KEY_msgctl:
a0d0e21e 6852 LOP(OP_MSGCTL,XTERM);
79072805
LW
6853
6854 case KEY_msgget:
a0d0e21e 6855 LOP(OP_MSGGET,XTERM);
79072805
LW
6856
6857 case KEY_msgrcv:
a0d0e21e 6858 LOP(OP_MSGRCV,XTERM);
79072805
LW
6859
6860 case KEY_msgsnd:
a0d0e21e 6861 LOP(OP_MSGSND,XTERM);
79072805 6862
77ca0c92 6863 case KEY_our:
93a17b20 6864 case KEY_my:
952306ac 6865 case KEY_state:
eac04b2e 6866 PL_in_my = (U16)tmp;
29595ff2 6867 s = SKIPSPACE1(s);
7e2040f0 6868 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
6869#ifdef PERL_MAD
6870 char* start = s;
6871#endif
3280af22 6872 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
6873 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
6874 goto really_sub;
def3634b 6875 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 6876 if (!PL_in_my_stash) {
c750a3ec 6877 char tmpbuf[1024];
3280af22 6878 PL_bufptr = s;
d9fad198 6879 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
6880 yyerror(tmpbuf);
6881 }
5db06880
NC
6882#ifdef PERL_MAD
6883 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
6884 sv_catsv(PL_thistoken, PL_nextwhite);
6885 PL_nextwhite = 0;
6886 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
6887 }
6888#endif
c750a3ec 6889 }
6154021b 6890 pl_yylval.ival = 1;
55497cff 6891 OPERATOR(MY);
93a17b20 6892
79072805 6893 case KEY_next:
a0d0e21e 6894 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6895 LOOPX(OP_NEXT);
6896
6897 case KEY_ne:
6898 Eop(OP_SNE);
6899
a0d0e21e 6900 case KEY_no:
468aa647 6901 s = tokenize_use(0, s);
a0d0e21e
LW
6902 OPERATOR(USE);
6903
6904 case KEY_not:
29595ff2 6905 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
6906 FUN1(OP_NOT);
6907 else
6908 OPERATOR(NOTOP);
a0d0e21e 6909
79072805 6910 case KEY_open:
29595ff2 6911 s = SKIPSPACE1(s);
7e2040f0 6912 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 6913 const char *t;
c35e046a
AL
6914 for (d = s; isALNUM_lazy_if(d,UTF);)
6915 d++;
6916 for (t=d; isSPACE(*t);)
6917 t++;
e2ab214b 6918 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
6919 /* [perl #16184] */
6920 && !(t[0] == '=' && t[1] == '>')
6921 ) {
5f66b61c 6922 int parms_len = (int)(d-s);
9014280d 6923 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 6924 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 6925 parms_len, s, parms_len, s);
66fbe8fb 6926 }
93a17b20 6927 }
a0d0e21e 6928 LOP(OP_OPEN,XTERM);
79072805 6929
463ee0b2 6930 case KEY_or:
6154021b 6931 pl_yylval.ival = OP_OR;
463ee0b2
LW
6932 OPERATOR(OROP);
6933
79072805
LW
6934 case KEY_ord:
6935 UNI(OP_ORD);
6936
6937 case KEY_oct:
6938 UNI(OP_OCT);
6939
6940 case KEY_opendir:
a0d0e21e 6941 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
6942
6943 case KEY_print:
3280af22 6944 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 6945 LOP(OP_PRINT,XREF);
79072805
LW
6946
6947 case KEY_printf:
3280af22 6948 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 6949 LOP(OP_PRTF,XREF);
79072805 6950
c07a80fd 6951 case KEY_prototype:
6952 UNI(OP_PROTOTYPE);
6953
79072805 6954 case KEY_push:
a0d0e21e 6955 LOP(OP_PUSH,XTERM);
79072805
LW
6956
6957 case KEY_pop:
6f33ba73 6958 UNIDOR(OP_POP);
79072805 6959
a0d0e21e 6960 case KEY_pos:
6f33ba73 6961 UNIDOR(OP_POS);
4e553d73 6962
79072805 6963 case KEY_pack:
a0d0e21e 6964 LOP(OP_PACK,XTERM);
79072805
LW
6965
6966 case KEY_package:
a0d0e21e 6967 s = force_word(s,WORD,FALSE,TRUE,FALSE);
6fa4d285 6968 s = force_version(s, FALSE);
79072805
LW
6969 OPERATOR(PACKAGE);
6970
6971 case KEY_pipe:
a0d0e21e 6972 LOP(OP_PIPE_OP,XTERM);
79072805
LW
6973
6974 case KEY_q:
5db06880 6975 s = scan_str(s,!!PL_madskills,FALSE);
79072805 6976 if (!s)
d4c19fe8 6977 missingterm(NULL);
6154021b 6978 pl_yylval.ival = OP_CONST;
79072805
LW
6979 TERM(sublex_start());
6980
a0d0e21e
LW
6981 case KEY_quotemeta:
6982 UNI(OP_QUOTEMETA);
6983
8990e307 6984 case KEY_qw:
5db06880 6985 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 6986 if (!s)
d4c19fe8 6987 missingterm(NULL);
3480a8d2 6988 PL_expect = XOPERATOR;
8127e0e3
GS
6989 force_next(')');
6990 if (SvCUR(PL_lex_stuff)) {
5f66b61c 6991 OP *words = NULL;
8127e0e3 6992 int warned = 0;
3280af22 6993 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 6994 while (len) {
d4c19fe8
AL
6995 for (; isSPACE(*d) && len; --len, ++d)
6996 /**/;
8127e0e3 6997 if (len) {
d4c19fe8 6998 SV *sv;
f54cb97a 6999 const char *b = d;
e476b1b5 7000 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7001 for (; !isSPACE(*d) && len; --len, ++d) {
7002 if (*d == ',') {
9014280d 7003 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7004 "Possible attempt to separate words with commas");
7005 ++warned;
7006 }
7007 else if (*d == '#') {
9014280d 7008 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7009 "Possible attempt to put comments in qw() list");
7010 ++warned;
7011 }
7012 }
7013 }
7014 else {
d4c19fe8
AL
7015 for (; !isSPACE(*d) && len; --len, ++d)
7016 /**/;
8127e0e3 7017 }
740cce10 7018 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7019 words = append_elem(OP_LIST, words,
7948272d 7020 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7021 }
7022 }
8127e0e3 7023 if (words) {
cd81e915 7024 start_force(PL_curforce);
9ded7720 7025 NEXTVAL_NEXTTOKE.opval = words;
8127e0e3
GS
7026 force_next(THING);
7027 }
55497cff 7028 }
37fd879b 7029 if (PL_lex_stuff) {
8127e0e3 7030 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7031 PL_lex_stuff = NULL;
37fd879b 7032 }
3280af22 7033 PL_expect = XTERM;
8127e0e3 7034 TOKEN('(');
8990e307 7035
79072805 7036 case KEY_qq:
5db06880 7037 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7038 if (!s)
d4c19fe8 7039 missingterm(NULL);
6154021b 7040 pl_yylval.ival = OP_STRINGIFY;
3280af22 7041 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7042 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7043 TERM(sublex_start());
7044
8782bef2
GB
7045 case KEY_qr:
7046 s = scan_pat(s,OP_QR);
7047 TERM(sublex_start());
7048
79072805 7049 case KEY_qx:
5db06880 7050 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7051 if (!s)
d4c19fe8 7052 missingterm(NULL);
9b201d7d 7053 readpipe_override();
79072805
LW
7054 TERM(sublex_start());
7055
7056 case KEY_return:
7057 OLDLOP(OP_RETURN);
7058
7059 case KEY_require:
29595ff2 7060 s = SKIPSPACE1(s);
e759cc13
RGS
7061 if (isDIGIT(*s)) {
7062 s = force_version(s, FALSE);
a7cb1f99 7063 }
e759cc13
RGS
7064 else if (*s != 'v' || !isDIGIT(s[1])
7065 || (s = force_version(s, TRUE), *s == 'v'))
7066 {
a7cb1f99
GS
7067 *PL_tokenbuf = '\0';
7068 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7069 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7070 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7071 else if (*s == '<')
7072 yyerror("<> should be quotes");
7073 }
a72a1c8b
RGS
7074 if (orig_keyword == KEY_require) {
7075 orig_keyword = 0;
6154021b 7076 pl_yylval.ival = 1;
a72a1c8b
RGS
7077 }
7078 else
6154021b 7079 pl_yylval.ival = 0;
a72a1c8b
RGS
7080 PL_expect = XTERM;
7081 PL_bufptr = s;
7082 PL_last_uni = PL_oldbufptr;
7083 PL_last_lop_op = OP_REQUIRE;
7084 s = skipspace(s);
7085 return REPORT( (int)REQUIRE );
79072805
LW
7086
7087 case KEY_reset:
7088 UNI(OP_RESET);
7089
7090 case KEY_redo:
a0d0e21e 7091 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7092 LOOPX(OP_REDO);
7093
7094 case KEY_rename:
a0d0e21e 7095 LOP(OP_RENAME,XTERM);
79072805
LW
7096
7097 case KEY_rand:
7098 UNI(OP_RAND);
7099
7100 case KEY_rmdir:
7101 UNI(OP_RMDIR);
7102
7103 case KEY_rindex:
a0d0e21e 7104 LOP(OP_RINDEX,XTERM);
79072805
LW
7105
7106 case KEY_read:
a0d0e21e 7107 LOP(OP_READ,XTERM);
79072805
LW
7108
7109 case KEY_readdir:
7110 UNI(OP_READDIR);
7111
93a17b20 7112 case KEY_readline:
6f33ba73 7113 UNIDOR(OP_READLINE);
93a17b20
LW
7114
7115 case KEY_readpipe:
0858480c 7116 UNIDOR(OP_BACKTICK);
93a17b20 7117
79072805
LW
7118 case KEY_rewinddir:
7119 UNI(OP_REWINDDIR);
7120
7121 case KEY_recv:
a0d0e21e 7122 LOP(OP_RECV,XTERM);
79072805
LW
7123
7124 case KEY_reverse:
a0d0e21e 7125 LOP(OP_REVERSE,XTERM);
79072805
LW
7126
7127 case KEY_readlink:
6f33ba73 7128 UNIDOR(OP_READLINK);
79072805
LW
7129
7130 case KEY_ref:
7131 UNI(OP_REF);
7132
7133 case KEY_s:
7134 s = scan_subst(s);
6154021b 7135 if (pl_yylval.opval)
79072805
LW
7136 TERM(sublex_start());
7137 else
7138 TOKEN(1); /* force error */
7139
0d863452
RH
7140 case KEY_say:
7141 checkcomma(s,PL_tokenbuf,"filehandle");
7142 LOP(OP_SAY,XREF);
7143
a0d0e21e
LW
7144 case KEY_chomp:
7145 UNI(OP_CHOMP);
4e553d73 7146
79072805
LW
7147 case KEY_scalar:
7148 UNI(OP_SCALAR);
7149
7150 case KEY_select:
a0d0e21e 7151 LOP(OP_SELECT,XTERM);
79072805
LW
7152
7153 case KEY_seek:
a0d0e21e 7154 LOP(OP_SEEK,XTERM);
79072805
LW
7155
7156 case KEY_semctl:
a0d0e21e 7157 LOP(OP_SEMCTL,XTERM);
79072805
LW
7158
7159 case KEY_semget:
a0d0e21e 7160 LOP(OP_SEMGET,XTERM);
79072805
LW
7161
7162 case KEY_semop:
a0d0e21e 7163 LOP(OP_SEMOP,XTERM);
79072805
LW
7164
7165 case KEY_send:
a0d0e21e 7166 LOP(OP_SEND,XTERM);
79072805
LW
7167
7168 case KEY_setpgrp:
a0d0e21e 7169 LOP(OP_SETPGRP,XTERM);
79072805
LW
7170
7171 case KEY_setpriority:
a0d0e21e 7172 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7173
7174 case KEY_sethostent:
ff68c719 7175 UNI(OP_SHOSTENT);
79072805
LW
7176
7177 case KEY_setnetent:
ff68c719 7178 UNI(OP_SNETENT);
79072805
LW
7179
7180 case KEY_setservent:
ff68c719 7181 UNI(OP_SSERVENT);
79072805
LW
7182
7183 case KEY_setprotoent:
ff68c719 7184 UNI(OP_SPROTOENT);
79072805
LW
7185
7186 case KEY_setpwent:
7187 FUN0(OP_SPWENT);
7188
7189 case KEY_setgrent:
7190 FUN0(OP_SGRENT);
7191
7192 case KEY_seekdir:
a0d0e21e 7193 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7194
7195 case KEY_setsockopt:
a0d0e21e 7196 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7197
7198 case KEY_shift:
6f33ba73 7199 UNIDOR(OP_SHIFT);
79072805
LW
7200
7201 case KEY_shmctl:
a0d0e21e 7202 LOP(OP_SHMCTL,XTERM);
79072805
LW
7203
7204 case KEY_shmget:
a0d0e21e 7205 LOP(OP_SHMGET,XTERM);
79072805
LW
7206
7207 case KEY_shmread:
a0d0e21e 7208 LOP(OP_SHMREAD,XTERM);
79072805
LW
7209
7210 case KEY_shmwrite:
a0d0e21e 7211 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7212
7213 case KEY_shutdown:
a0d0e21e 7214 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7215
7216 case KEY_sin:
7217 UNI(OP_SIN);
7218
7219 case KEY_sleep:
7220 UNI(OP_SLEEP);
7221
7222 case KEY_socket:
a0d0e21e 7223 LOP(OP_SOCKET,XTERM);
79072805
LW
7224
7225 case KEY_socketpair:
a0d0e21e 7226 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7227
7228 case KEY_sort:
3280af22 7229 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7230 s = SKIPSPACE1(s);
79072805 7231 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7232 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7233 PL_expect = XTERM;
15f0808c 7234 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7235 LOP(OP_SORT,XREF);
79072805
LW
7236
7237 case KEY_split:
a0d0e21e 7238 LOP(OP_SPLIT,XTERM);
79072805
LW
7239
7240 case KEY_sprintf:
a0d0e21e 7241 LOP(OP_SPRINTF,XTERM);
79072805
LW
7242
7243 case KEY_splice:
a0d0e21e 7244 LOP(OP_SPLICE,XTERM);
79072805
LW
7245
7246 case KEY_sqrt:
7247 UNI(OP_SQRT);
7248
7249 case KEY_srand:
7250 UNI(OP_SRAND);
7251
7252 case KEY_stat:
7253 UNI(OP_STAT);
7254
7255 case KEY_study:
79072805
LW
7256 UNI(OP_STUDY);
7257
7258 case KEY_substr:
a0d0e21e 7259 LOP(OP_SUBSTR,XTERM);
79072805
LW
7260
7261 case KEY_format:
7262 case KEY_sub:
93a17b20 7263 really_sub:
09bef843 7264 {
3280af22 7265 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7266 SSize_t tboffset = 0;
09bef843 7267 expectation attrful;
28cc6278 7268 bool have_name, have_proto;
f54cb97a 7269 const int key = tmp;
09bef843 7270
5db06880
NC
7271#ifdef PERL_MAD
7272 SV *tmpwhite = 0;
7273
cd81e915 7274 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7275 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7276 PL_thistoken = 0;
5db06880
NC
7277
7278 d = s;
7279 s = SKIPSPACE2(s,tmpwhite);
7280#else
09bef843 7281 s = skipspace(s);
5db06880 7282#endif
09bef843 7283
7e2040f0 7284 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7285 (*s == ':' && s[1] == ':'))
7286 {
5db06880 7287#ifdef PERL_MAD
4f61fd4b 7288 SV *nametoke = NULL;
5db06880
NC
7289#endif
7290
09bef843
SB
7291 PL_expect = XBLOCK;
7292 attrful = XATTRBLOCK;
b1b65b59
JH
7293 /* remember buffer pos'n for later force_word */
7294 tboffset = s - PL_oldbufptr;
09bef843 7295 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7296#ifdef PERL_MAD
7297 if (PL_madskills)
7298 nametoke = newSVpvn(s, d - s);
7299#endif
6502358f
NC
7300 if (memchr(tmpbuf, ':', len))
7301 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7302 else {
7303 sv_setsv(PL_subname,PL_curstname);
396482e1 7304 sv_catpvs(PL_subname,"::");
09bef843
SB
7305 sv_catpvn(PL_subname,tmpbuf,len);
7306 }
09bef843 7307 have_name = TRUE;
5db06880
NC
7308
7309#ifdef PERL_MAD
7310
7311 start_force(0);
7312 CURMAD('X', nametoke);
7313 CURMAD('_', tmpwhite);
7314 (void) force_word(PL_oldbufptr + tboffset, WORD,
7315 FALSE, TRUE, TRUE);
7316
7317 s = SKIPSPACE2(d,tmpwhite);
7318#else
7319 s = skipspace(d);
7320#endif
09bef843 7321 }
463ee0b2 7322 else {
09bef843
SB
7323 if (key == KEY_my)
7324 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7325 PL_expect = XTERMBLOCK;
7326 attrful = XATTRTERM;
76f68e9b 7327 sv_setpvs(PL_subname,"?");
09bef843 7328 have_name = FALSE;
463ee0b2 7329 }
4633a7c4 7330
09bef843
SB
7331 if (key == KEY_format) {
7332 if (*s == '=')
7333 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7334#ifdef PERL_MAD
cd81e915 7335 PL_thistoken = subtoken;
5db06880
NC
7336 s = d;
7337#else
09bef843 7338 if (have_name)
b1b65b59
JH
7339 (void) force_word(PL_oldbufptr + tboffset, WORD,
7340 FALSE, TRUE, TRUE);
5db06880 7341#endif
09bef843
SB
7342 OPERATOR(FORMAT);
7343 }
79072805 7344
09bef843
SB
7345 /* Look for a prototype */
7346 if (*s == '(') {
d9f2850e
RGS
7347 char *p;
7348 bool bad_proto = FALSE;
9e8d7757
RB
7349 bool in_brackets = FALSE;
7350 char greedy_proto = ' ';
7351 bool proto_after_greedy_proto = FALSE;
7352 bool must_be_last = FALSE;
7353 bool underscore = FALSE;
aef2a98a 7354 bool seen_underscore = FALSE;
d9f2850e 7355 const bool warnsyntax = ckWARN(WARN_SYNTAX);
09bef843 7356
5db06880 7357 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7358 if (!s)
09bef843 7359 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7360 /* strip spaces and check for bad characters */
09bef843
SB
7361 d = SvPVX(PL_lex_stuff);
7362 tmp = 0;
d9f2850e
RGS
7363 for (p = d; *p; ++p) {
7364 if (!isSPACE(*p)) {
7365 d[tmp++] = *p;
9e8d7757
RB
7366
7367 if (warnsyntax) {
7368 if (must_be_last)
7369 proto_after_greedy_proto = TRUE;
7370 if (!strchr("$@%*;[]&\\_", *p)) {
7371 bad_proto = TRUE;
7372 }
7373 else {
7374 if ( underscore ) {
7375 if ( *p != ';' )
7376 bad_proto = TRUE;
7377 underscore = FALSE;
7378 }
7379 if ( *p == '[' ) {
7380 in_brackets = TRUE;
7381 }
7382 else if ( *p == ']' ) {
7383 in_brackets = FALSE;
7384 }
7385 else if ( (*p == '@' || *p == '%') &&
7386 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7387 !in_brackets ) {
7388 must_be_last = TRUE;
7389 greedy_proto = *p;
7390 }
7391 else if ( *p == '_' ) {
aef2a98a 7392 underscore = seen_underscore = TRUE;
9e8d7757
RB
7393 }
7394 }
7395 }
d37a9538 7396 }
09bef843 7397 }
d9f2850e 7398 d[tmp] = '\0';
9e8d7757
RB
7399 if (proto_after_greedy_proto)
7400 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
7401 "Prototype after '%c' for %"SVf" : %s",
7402 greedy_proto, SVfARG(PL_subname), d);
d9f2850e
RGS
7403 if (bad_proto)
7404 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
aef2a98a
RGS
7405 "Illegal character %sin prototype for %"SVf" : %s",
7406 seen_underscore ? "after '_' " : "",
be2597df 7407 SVfARG(PL_subname), d);
b162af07 7408 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7409 have_proto = TRUE;
68dc0745 7410
5db06880
NC
7411#ifdef PERL_MAD
7412 start_force(0);
cd81e915 7413 CURMAD('q', PL_thisopen);
5db06880 7414 CURMAD('_', tmpwhite);
cd81e915
NC
7415 CURMAD('=', PL_thisstuff);
7416 CURMAD('Q', PL_thisclose);
5db06880
NC
7417 NEXTVAL_NEXTTOKE.opval =
7418 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7419 PL_lex_stuff = NULL;
5db06880
NC
7420 force_next(THING);
7421
7422 s = SKIPSPACE2(s,tmpwhite);
7423#else
09bef843 7424 s = skipspace(s);
5db06880 7425#endif
4633a7c4 7426 }
09bef843
SB
7427 else
7428 have_proto = FALSE;
7429
7430 if (*s == ':' && s[1] != ':')
7431 PL_expect = attrful;
8e742a20
MHM
7432 else if (*s != '{' && key == KEY_sub) {
7433 if (!have_name)
7434 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7435 else if (*s != ';' && *s != '}')
be2597df 7436 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7437 }
09bef843 7438
5db06880
NC
7439#ifdef PERL_MAD
7440 start_force(0);
7441 if (tmpwhite) {
7442 if (PL_madskills)
6b29d1f5 7443 curmad('^', newSVpvs(""));
5db06880
NC
7444 CURMAD('_', tmpwhite);
7445 }
7446 force_next(0);
7447
cd81e915 7448 PL_thistoken = subtoken;
5db06880 7449#else
09bef843 7450 if (have_proto) {
9ded7720 7451 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7452 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7453 PL_lex_stuff = NULL;
09bef843 7454 force_next(THING);
68dc0745 7455 }
5db06880 7456#endif
09bef843 7457 if (!have_name) {
49a54bbe
NC
7458 if (PL_curstash)
7459 sv_setpvs(PL_subname, "__ANON__");
7460 else
7461 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7462 TOKEN(ANONSUB);
4633a7c4 7463 }
5db06880 7464#ifndef PERL_MAD
b1b65b59
JH
7465 (void) force_word(PL_oldbufptr + tboffset, WORD,
7466 FALSE, TRUE, TRUE);
5db06880 7467#endif
09bef843
SB
7468 if (key == KEY_my)
7469 TOKEN(MYSUB);
7470 TOKEN(SUB);
4633a7c4 7471 }
79072805
LW
7472
7473 case KEY_system:
a0d0e21e 7474 LOP(OP_SYSTEM,XREF);
79072805
LW
7475
7476 case KEY_symlink:
a0d0e21e 7477 LOP(OP_SYMLINK,XTERM);
79072805
LW
7478
7479 case KEY_syscall:
a0d0e21e 7480 LOP(OP_SYSCALL,XTERM);
79072805 7481
c07a80fd 7482 case KEY_sysopen:
7483 LOP(OP_SYSOPEN,XTERM);
7484
137443ea 7485 case KEY_sysseek:
7486 LOP(OP_SYSSEEK,XTERM);
7487
79072805 7488 case KEY_sysread:
a0d0e21e 7489 LOP(OP_SYSREAD,XTERM);
79072805
LW
7490
7491 case KEY_syswrite:
a0d0e21e 7492 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7493
7494 case KEY_tr:
7495 s = scan_trans(s);
7496 TERM(sublex_start());
7497
7498 case KEY_tell:
7499 UNI(OP_TELL);
7500
7501 case KEY_telldir:
7502 UNI(OP_TELLDIR);
7503
463ee0b2 7504 case KEY_tie:
a0d0e21e 7505 LOP(OP_TIE,XTERM);
463ee0b2 7506
c07a80fd 7507 case KEY_tied:
7508 UNI(OP_TIED);
7509
79072805
LW
7510 case KEY_time:
7511 FUN0(OP_TIME);
7512
7513 case KEY_times:
7514 FUN0(OP_TMS);
7515
7516 case KEY_truncate:
a0d0e21e 7517 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7518
7519 case KEY_uc:
7520 UNI(OP_UC);
7521
7522 case KEY_ucfirst:
7523 UNI(OP_UCFIRST);
7524
463ee0b2
LW
7525 case KEY_untie:
7526 UNI(OP_UNTIE);
7527
79072805 7528 case KEY_until:
6154021b 7529 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7530 OPERATOR(UNTIL);
7531
7532 case KEY_unless:
6154021b 7533 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7534 OPERATOR(UNLESS);
7535
7536 case KEY_unlink:
a0d0e21e 7537 LOP(OP_UNLINK,XTERM);
79072805
LW
7538
7539 case KEY_undef:
6f33ba73 7540 UNIDOR(OP_UNDEF);
79072805
LW
7541
7542 case KEY_unpack:
a0d0e21e 7543 LOP(OP_UNPACK,XTERM);
79072805
LW
7544
7545 case KEY_utime:
a0d0e21e 7546 LOP(OP_UTIME,XTERM);
79072805
LW
7547
7548 case KEY_umask:
6f33ba73 7549 UNIDOR(OP_UMASK);
79072805
LW
7550
7551 case KEY_unshift:
a0d0e21e
LW
7552 LOP(OP_UNSHIFT,XTERM);
7553
7554 case KEY_use:
468aa647 7555 s = tokenize_use(1, s);
a0d0e21e 7556 OPERATOR(USE);
79072805
LW
7557
7558 case KEY_values:
7559 UNI(OP_VALUES);
7560
7561 case KEY_vec:
a0d0e21e 7562 LOP(OP_VEC,XTERM);
79072805 7563
0d863452 7564 case KEY_when:
6154021b 7565 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7566 OPERATOR(WHEN);
7567
79072805 7568 case KEY_while:
6154021b 7569 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7570 OPERATOR(WHILE);
7571
7572 case KEY_warn:
3280af22 7573 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7574 LOP(OP_WARN,XTERM);
79072805
LW
7575
7576 case KEY_wait:
7577 FUN0(OP_WAIT);
7578
7579 case KEY_waitpid:
a0d0e21e 7580 LOP(OP_WAITPID,XTERM);
79072805
LW
7581
7582 case KEY_wantarray:
7583 FUN0(OP_WANTARRAY);
7584
7585 case KEY_write:
9d116dd7
JH
7586#ifdef EBCDIC
7587 {
df3728a2
JH
7588 char ctl_l[2];
7589 ctl_l[0] = toCTRL('L');
7590 ctl_l[1] = '\0';
fafc274c 7591 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7592 }
7593#else
fafc274c
NC
7594 /* Make sure $^L is defined */
7595 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7596#endif
79072805
LW
7597 UNI(OP_ENTERWRITE);
7598
7599 case KEY_x:
3280af22 7600 if (PL_expect == XOPERATOR)
79072805
LW
7601 Mop(OP_REPEAT);
7602 check_uni();
7603 goto just_a_word;
7604
a0d0e21e 7605 case KEY_xor:
6154021b 7606 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7607 OPERATOR(OROP);
7608
79072805
LW
7609 case KEY_y:
7610 s = scan_trans(s);
7611 TERM(sublex_start());
7612 }
49dc05e3 7613 }}
79072805 7614}
bf4acbe4
GS
7615#ifdef __SC__
7616#pragma segment Main
7617#endif
79072805 7618
e930465f
JH
7619static int
7620S_pending_ident(pTHX)
8eceec63 7621{
97aff369 7622 dVAR;
8eceec63 7623 register char *d;
bbd11bfc 7624 PADOFFSET tmp = 0;
8eceec63
SC
7625 /* pit holds the identifier we read and pending_ident is reset */
7626 char pit = PL_pending_ident;
9bde8eb0
NC
7627 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7628 /* All routes through this function want to know if there is a colon. */
c099d646 7629 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7630 PL_pending_ident = 0;
7631
cd81e915 7632 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7633 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7634 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7635
7636 /* if we're in a my(), we can't allow dynamics here.
7637 $foo'bar has already been turned into $foo::bar, so
7638 just check for colons.
7639
7640 if it's a legal name, the OP is a PADANY.
7641 */
7642 if (PL_in_my) {
7643 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7644 if (has_colon)
8eceec63
SC
7645 yyerror(Perl_form(aTHX_ "No package name allowed for "
7646 "variable %s in \"our\"",
7647 PL_tokenbuf));
d6447115 7648 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7649 }
7650 else {
9bde8eb0 7651 if (has_colon)
952306ac
RGS
7652 yyerror(Perl_form(aTHX_ PL_no_myglob,
7653 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7654
6154021b 7655 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7656 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7657 return PRIVATEREF;
7658 }
7659 }
7660
7661 /*
7662 build the ops for accesses to a my() variable.
7663
7664 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7665 then used in a comparison. This catches most, but not
7666 all cases. For instance, it catches
7667 sort { my($a); $a <=> $b }
7668 but not
7669 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7670 (although why you'd do that is anyone's guess).
7671 */
7672
9bde8eb0 7673 if (!has_colon) {
8716503d 7674 if (!PL_in_my)
f8f98e0a 7675 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 7676 if (tmp != NOT_IN_PAD) {
8eceec63 7677 /* might be an "our" variable" */
00b1698f 7678 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 7679 /* build ops for a bareword */
b64e5050
AL
7680 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7681 HEK * const stashname = HvNAME_HEK(stash);
7682 SV * const sym = newSVhek(stashname);
396482e1 7683 sv_catpvs(sym, "::");
9bde8eb0 7684 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
7685 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7686 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 7687 gv_fetchsv(sym,
8eceec63
SC
7688 (PL_in_eval
7689 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 7690 : GV_ADDMULTI
8eceec63
SC
7691 ),
7692 ((PL_tokenbuf[0] == '$') ? SVt_PV
7693 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7694 : SVt_PVHV));
7695 return WORD;
7696 }
7697
7698 /* if it's a sort block and they're naming $a or $b */
7699 if (PL_last_lop_op == OP_SORT &&
7700 PL_tokenbuf[0] == '$' &&
7701 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
7702 && !PL_tokenbuf[2])
7703 {
7704 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
7705 d < PL_bufend && *d != '\n';
7706 d++)
7707 {
7708 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
7709 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
7710 PL_tokenbuf);
7711 }
7712 }
7713 }
7714
6154021b
RGS
7715 pl_yylval.opval = newOP(OP_PADANY, 0);
7716 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
7717 return PRIVATEREF;
7718 }
7719 }
7720
7721 /*
7722 Whine if they've said @foo in a doublequoted string,
7723 and @foo isn't a variable we can find in the symbol
7724 table.
7725 */
d824713b
NC
7726 if (ckWARN(WARN_AMBIGUOUS) &&
7727 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
7728 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
7729 SVt_PVAV);
8eceec63 7730 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
7731 /* DO NOT warn for @- and @+ */
7732 && !( PL_tokenbuf[2] == '\0' &&
7733 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
7734 )
8eceec63
SC
7735 {
7736 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
7737 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
7738 "Possible unintended interpolation of %s in string",
7739 PL_tokenbuf);
8eceec63
SC
7740 }
7741 }
7742
7743 /* build ops for a bareword */
6154021b 7744 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 7745 tokenbuf_len - 1));
6154021b 7746 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9bde8eb0
NC
7747 gv_fetchpvn_flags(
7748 PL_tokenbuf + 1, tokenbuf_len - 1,
d6069db2
RGS
7749 /* If the identifier refers to a stash, don't autovivify it.
7750 * Change 24660 had the side effect of causing symbol table
7751 * hashes to always be defined, even if they were freshly
7752 * created and the only reference in the entire program was
7753 * the single statement with the defined %foo::bar:: test.
7754 * It appears that all code in the wild doing this actually
7755 * wants to know whether sub-packages have been loaded, so
7756 * by avoiding auto-vivifying symbol tables, we ensure that
7757 * defined %foo::bar:: continues to be false, and the existing
7758 * tests still give the expected answers, even though what
7759 * they're actually testing has now changed subtly.
7760 */
9bde8eb0
NC
7761 (*PL_tokenbuf == '%'
7762 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
7763 && d[-1] == ':'
d6069db2
RGS
7764 ? 0
7765 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
adc51b97
RGS
7766 ((PL_tokenbuf[0] == '$') ? SVt_PV
7767 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7768 : SVt_PVHV));
8eceec63
SC
7769 return WORD;
7770}
7771
4c3bbe0f
MHM
7772/*
7773 * The following code was generated by perl_keyword.pl.
7774 */
e2e1dd5a 7775
79072805 7776I32
5458a98a 7777Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 7778{
952306ac 7779 dVAR;
7918f24d
NC
7780
7781 PERL_ARGS_ASSERT_KEYWORD;
7782
4c3bbe0f
MHM
7783 switch (len)
7784 {
7785 case 1: /* 5 tokens of length 1 */
7786 switch (name[0])
e2e1dd5a 7787 {
4c3bbe0f
MHM
7788 case 'm':
7789 { /* m */
7790 return KEY_m;
7791 }
7792
4c3bbe0f
MHM
7793 case 'q':
7794 { /* q */
7795 return KEY_q;
7796 }
7797
4c3bbe0f
MHM
7798 case 's':
7799 { /* s */
7800 return KEY_s;
7801 }
7802
4c3bbe0f
MHM
7803 case 'x':
7804 { /* x */
7805 return -KEY_x;
7806 }
7807
4c3bbe0f
MHM
7808 case 'y':
7809 { /* y */
7810 return KEY_y;
7811 }
7812
4c3bbe0f
MHM
7813 default:
7814 goto unknown;
e2e1dd5a 7815 }
4c3bbe0f
MHM
7816
7817 case 2: /* 18 tokens of length 2 */
7818 switch (name[0])
e2e1dd5a 7819 {
4c3bbe0f
MHM
7820 case 'd':
7821 if (name[1] == 'o')
7822 { /* do */
7823 return KEY_do;
7824 }
7825
7826 goto unknown;
7827
7828 case 'e':
7829 if (name[1] == 'q')
7830 { /* eq */
7831 return -KEY_eq;
7832 }
7833
7834 goto unknown;
7835
7836 case 'g':
7837 switch (name[1])
7838 {
7839 case 'e':
7840 { /* ge */
7841 return -KEY_ge;
7842 }
7843
4c3bbe0f
MHM
7844 case 't':
7845 { /* gt */
7846 return -KEY_gt;
7847 }
7848
4c3bbe0f
MHM
7849 default:
7850 goto unknown;
7851 }
7852
7853 case 'i':
7854 if (name[1] == 'f')
7855 { /* if */
7856 return KEY_if;
7857 }
7858
7859 goto unknown;
7860
7861 case 'l':
7862 switch (name[1])
7863 {
7864 case 'c':
7865 { /* lc */
7866 return -KEY_lc;
7867 }
7868
4c3bbe0f
MHM
7869 case 'e':
7870 { /* le */
7871 return -KEY_le;
7872 }
7873
4c3bbe0f
MHM
7874 case 't':
7875 { /* lt */
7876 return -KEY_lt;
7877 }
7878
4c3bbe0f
MHM
7879 default:
7880 goto unknown;
7881 }
7882
7883 case 'm':
7884 if (name[1] == 'y')
7885 { /* my */
7886 return KEY_my;
7887 }
7888
7889 goto unknown;
7890
7891 case 'n':
7892 switch (name[1])
7893 {
7894 case 'e':
7895 { /* ne */
7896 return -KEY_ne;
7897 }
7898
4c3bbe0f
MHM
7899 case 'o':
7900 { /* no */
7901 return KEY_no;
7902 }
7903
4c3bbe0f
MHM
7904 default:
7905 goto unknown;
7906 }
7907
7908 case 'o':
7909 if (name[1] == 'r')
7910 { /* or */
7911 return -KEY_or;
7912 }
7913
7914 goto unknown;
7915
7916 case 'q':
7917 switch (name[1])
7918 {
7919 case 'q':
7920 { /* qq */
7921 return KEY_qq;
7922 }
7923
4c3bbe0f
MHM
7924 case 'r':
7925 { /* qr */
7926 return KEY_qr;
7927 }
7928
4c3bbe0f
MHM
7929 case 'w':
7930 { /* qw */
7931 return KEY_qw;
7932 }
7933
4c3bbe0f
MHM
7934 case 'x':
7935 { /* qx */
7936 return KEY_qx;
7937 }
7938
4c3bbe0f
MHM
7939 default:
7940 goto unknown;
7941 }
7942
7943 case 't':
7944 if (name[1] == 'r')
7945 { /* tr */
7946 return KEY_tr;
7947 }
7948
7949 goto unknown;
7950
7951 case 'u':
7952 if (name[1] == 'c')
7953 { /* uc */
7954 return -KEY_uc;
7955 }
7956
7957 goto unknown;
7958
7959 default:
7960 goto unknown;
e2e1dd5a 7961 }
4c3bbe0f 7962
0d863452 7963 case 3: /* 29 tokens of length 3 */
4c3bbe0f 7964 switch (name[0])
e2e1dd5a 7965 {
4c3bbe0f
MHM
7966 case 'E':
7967 if (name[1] == 'N' &&
7968 name[2] == 'D')
7969 { /* END */
7970 return KEY_END;
7971 }
7972
7973 goto unknown;
7974
7975 case 'a':
7976 switch (name[1])
7977 {
7978 case 'b':
7979 if (name[2] == 's')
7980 { /* abs */
7981 return -KEY_abs;
7982 }
7983
7984 goto unknown;
7985
7986 case 'n':
7987 if (name[2] == 'd')
7988 { /* and */
7989 return -KEY_and;
7990 }
7991
7992 goto unknown;
7993
7994 default:
7995 goto unknown;
7996 }
7997
7998 case 'c':
7999 switch (name[1])
8000 {
8001 case 'h':
8002 if (name[2] == 'r')
8003 { /* chr */
8004 return -KEY_chr;
8005 }
8006
8007 goto unknown;
8008
8009 case 'm':
8010 if (name[2] == 'p')
8011 { /* cmp */
8012 return -KEY_cmp;
8013 }
8014
8015 goto unknown;
8016
8017 case 'o':
8018 if (name[2] == 's')
8019 { /* cos */
8020 return -KEY_cos;
8021 }
8022
8023 goto unknown;
8024
8025 default:
8026 goto unknown;
8027 }
8028
8029 case 'd':
8030 if (name[1] == 'i' &&
8031 name[2] == 'e')
8032 { /* die */
8033 return -KEY_die;
8034 }
8035
8036 goto unknown;
8037
8038 case 'e':
8039 switch (name[1])
8040 {
8041 case 'o':
8042 if (name[2] == 'f')
8043 { /* eof */
8044 return -KEY_eof;
8045 }
8046
8047 goto unknown;
8048
4c3bbe0f
MHM
8049 case 'x':
8050 if (name[2] == 'p')
8051 { /* exp */
8052 return -KEY_exp;
8053 }
8054
8055 goto unknown;
8056
8057 default:
8058 goto unknown;
8059 }
8060
8061 case 'f':
8062 if (name[1] == 'o' &&
8063 name[2] == 'r')
8064 { /* for */
8065 return KEY_for;
8066 }
8067
8068 goto unknown;
8069
8070 case 'h':
8071 if (name[1] == 'e' &&
8072 name[2] == 'x')
8073 { /* hex */
8074 return -KEY_hex;
8075 }
8076
8077 goto unknown;
8078
8079 case 'i':
8080 if (name[1] == 'n' &&
8081 name[2] == 't')
8082 { /* int */
8083 return -KEY_int;
8084 }
8085
8086 goto unknown;
8087
8088 case 'l':
8089 if (name[1] == 'o' &&
8090 name[2] == 'g')
8091 { /* log */
8092 return -KEY_log;
8093 }
8094
8095 goto unknown;
8096
8097 case 'm':
8098 if (name[1] == 'a' &&
8099 name[2] == 'p')
8100 { /* map */
8101 return KEY_map;
8102 }
8103
8104 goto unknown;
8105
8106 case 'n':
8107 if (name[1] == 'o' &&
8108 name[2] == 't')
8109 { /* not */
8110 return -KEY_not;
8111 }
8112
8113 goto unknown;
8114
8115 case 'o':
8116 switch (name[1])
8117 {
8118 case 'c':
8119 if (name[2] == 't')
8120 { /* oct */
8121 return -KEY_oct;
8122 }
8123
8124 goto unknown;
8125
8126 case 'r':
8127 if (name[2] == 'd')
8128 { /* ord */
8129 return -KEY_ord;
8130 }
8131
8132 goto unknown;
8133
8134 case 'u':
8135 if (name[2] == 'r')
8136 { /* our */
8137 return KEY_our;
8138 }
8139
8140 goto unknown;
8141
8142 default:
8143 goto unknown;
8144 }
8145
8146 case 'p':
8147 if (name[1] == 'o')
8148 {
8149 switch (name[2])
8150 {
8151 case 'p':
8152 { /* pop */
8153 return -KEY_pop;
8154 }
8155
4c3bbe0f
MHM
8156 case 's':
8157 { /* pos */
8158 return KEY_pos;
8159 }
8160
4c3bbe0f
MHM
8161 default:
8162 goto unknown;
8163 }
8164 }
8165
8166 goto unknown;
8167
8168 case 'r':
8169 if (name[1] == 'e' &&
8170 name[2] == 'f')
8171 { /* ref */
8172 return -KEY_ref;
8173 }
8174
8175 goto unknown;
8176
8177 case 's':
8178 switch (name[1])
8179 {
0d863452
RH
8180 case 'a':
8181 if (name[2] == 'y')
8182 { /* say */
e3e804c9 8183 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8184 }
8185
8186 goto unknown;
8187
4c3bbe0f
MHM
8188 case 'i':
8189 if (name[2] == 'n')
8190 { /* sin */
8191 return -KEY_sin;
8192 }
8193
8194 goto unknown;
8195
8196 case 'u':
8197 if (name[2] == 'b')
8198 { /* sub */
8199 return KEY_sub;
8200 }
8201
8202 goto unknown;
8203
8204 default:
8205 goto unknown;
8206 }
8207
8208 case 't':
8209 if (name[1] == 'i' &&
8210 name[2] == 'e')
8211 { /* tie */
8212 return KEY_tie;
8213 }
8214
8215 goto unknown;
8216
8217 case 'u':
8218 if (name[1] == 's' &&
8219 name[2] == 'e')
8220 { /* use */
8221 return KEY_use;
8222 }
8223
8224 goto unknown;
8225
8226 case 'v':
8227 if (name[1] == 'e' &&
8228 name[2] == 'c')
8229 { /* vec */
8230 return -KEY_vec;
8231 }
8232
8233 goto unknown;
8234
8235 case 'x':
8236 if (name[1] == 'o' &&
8237 name[2] == 'r')
8238 { /* xor */
8239 return -KEY_xor;
8240 }
8241
8242 goto unknown;
8243
8244 default:
8245 goto unknown;
e2e1dd5a 8246 }
4c3bbe0f 8247
0d863452 8248 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8249 switch (name[0])
e2e1dd5a 8250 {
4c3bbe0f
MHM
8251 case 'C':
8252 if (name[1] == 'O' &&
8253 name[2] == 'R' &&
8254 name[3] == 'E')
8255 { /* CORE */
8256 return -KEY_CORE;
8257 }
8258
8259 goto unknown;
8260
8261 case 'I':
8262 if (name[1] == 'N' &&
8263 name[2] == 'I' &&
8264 name[3] == 'T')
8265 { /* INIT */
8266 return KEY_INIT;
8267 }
8268
8269 goto unknown;
8270
8271 case 'b':
8272 if (name[1] == 'i' &&
8273 name[2] == 'n' &&
8274 name[3] == 'd')
8275 { /* bind */
8276 return -KEY_bind;
8277 }
8278
8279 goto unknown;
8280
8281 case 'c':
8282 if (name[1] == 'h' &&
8283 name[2] == 'o' &&
8284 name[3] == 'p')
8285 { /* chop */
8286 return -KEY_chop;
8287 }
8288
8289 goto unknown;
8290
8291 case 'd':
8292 if (name[1] == 'u' &&
8293 name[2] == 'm' &&
8294 name[3] == 'p')
8295 { /* dump */
8296 return -KEY_dump;
8297 }
8298
8299 goto unknown;
8300
8301 case 'e':
8302 switch (name[1])
8303 {
8304 case 'a':
8305 if (name[2] == 'c' &&
8306 name[3] == 'h')
8307 { /* each */
8308 return -KEY_each;
8309 }
8310
8311 goto unknown;
8312
8313 case 'l':
8314 if (name[2] == 's' &&
8315 name[3] == 'e')
8316 { /* else */
8317 return KEY_else;
8318 }
8319
8320 goto unknown;
8321
8322 case 'v':
8323 if (name[2] == 'a' &&
8324 name[3] == 'l')
8325 { /* eval */
8326 return KEY_eval;
8327 }
8328
8329 goto unknown;
8330
8331 case 'x':
8332 switch (name[2])
8333 {
8334 case 'e':
8335 if (name[3] == 'c')
8336 { /* exec */
8337 return -KEY_exec;
8338 }
8339
8340 goto unknown;
8341
8342 case 'i':
8343 if (name[3] == 't')
8344 { /* exit */
8345 return -KEY_exit;
8346 }
8347
8348 goto unknown;
8349
8350 default:
8351 goto unknown;
8352 }
8353
8354 default:
8355 goto unknown;
8356 }
8357
8358 case 'f':
8359 if (name[1] == 'o' &&
8360 name[2] == 'r' &&
8361 name[3] == 'k')
8362 { /* fork */
8363 return -KEY_fork;
8364 }
8365
8366 goto unknown;
8367
8368 case 'g':
8369 switch (name[1])
8370 {
8371 case 'e':
8372 if (name[2] == 't' &&
8373 name[3] == 'c')
8374 { /* getc */
8375 return -KEY_getc;
8376 }
8377
8378 goto unknown;
8379
8380 case 'l':
8381 if (name[2] == 'o' &&
8382 name[3] == 'b')
8383 { /* glob */
8384 return KEY_glob;
8385 }
8386
8387 goto unknown;
8388
8389 case 'o':
8390 if (name[2] == 't' &&
8391 name[3] == 'o')
8392 { /* goto */
8393 return KEY_goto;
8394 }
8395
8396 goto unknown;
8397
8398 case 'r':
8399 if (name[2] == 'e' &&
8400 name[3] == 'p')
8401 { /* grep */
8402 return KEY_grep;
8403 }
8404
8405 goto unknown;
8406
8407 default:
8408 goto unknown;
8409 }
8410
8411 case 'j':
8412 if (name[1] == 'o' &&
8413 name[2] == 'i' &&
8414 name[3] == 'n')
8415 { /* join */
8416 return -KEY_join;
8417 }
8418
8419 goto unknown;
8420
8421 case 'k':
8422 switch (name[1])
8423 {
8424 case 'e':
8425 if (name[2] == 'y' &&
8426 name[3] == 's')
8427 { /* keys */
8428 return -KEY_keys;
8429 }
8430
8431 goto unknown;
8432
8433 case 'i':
8434 if (name[2] == 'l' &&
8435 name[3] == 'l')
8436 { /* kill */
8437 return -KEY_kill;
8438 }
8439
8440 goto unknown;
8441
8442 default:
8443 goto unknown;
8444 }
8445
8446 case 'l':
8447 switch (name[1])
8448 {
8449 case 'a':
8450 if (name[2] == 's' &&
8451 name[3] == 't')
8452 { /* last */
8453 return KEY_last;
8454 }
8455
8456 goto unknown;
8457
8458 case 'i':
8459 if (name[2] == 'n' &&
8460 name[3] == 'k')
8461 { /* link */
8462 return -KEY_link;
8463 }
8464
8465 goto unknown;
8466
8467 case 'o':
8468 if (name[2] == 'c' &&
8469 name[3] == 'k')
8470 { /* lock */
8471 return -KEY_lock;
8472 }
8473
8474 goto unknown;
8475
8476 default:
8477 goto unknown;
8478 }
8479
8480 case 'n':
8481 if (name[1] == 'e' &&
8482 name[2] == 'x' &&
8483 name[3] == 't')
8484 { /* next */
8485 return KEY_next;
8486 }
8487
8488 goto unknown;
8489
8490 case 'o':
8491 if (name[1] == 'p' &&
8492 name[2] == 'e' &&
8493 name[3] == 'n')
8494 { /* open */
8495 return -KEY_open;
8496 }
8497
8498 goto unknown;
8499
8500 case 'p':
8501 switch (name[1])
8502 {
8503 case 'a':
8504 if (name[2] == 'c' &&
8505 name[3] == 'k')
8506 { /* pack */
8507 return -KEY_pack;
8508 }
8509
8510 goto unknown;
8511
8512 case 'i':
8513 if (name[2] == 'p' &&
8514 name[3] == 'e')
8515 { /* pipe */
8516 return -KEY_pipe;
8517 }
8518
8519 goto unknown;
8520
8521 case 'u':
8522 if (name[2] == 's' &&
8523 name[3] == 'h')
8524 { /* push */
8525 return -KEY_push;
8526 }
8527
8528 goto unknown;
8529
8530 default:
8531 goto unknown;
8532 }
8533
8534 case 'r':
8535 switch (name[1])
8536 {
8537 case 'a':
8538 if (name[2] == 'n' &&
8539 name[3] == 'd')
8540 { /* rand */
8541 return -KEY_rand;
8542 }
8543
8544 goto unknown;
8545
8546 case 'e':
8547 switch (name[2])
8548 {
8549 case 'a':
8550 if (name[3] == 'd')
8551 { /* read */
8552 return -KEY_read;
8553 }
8554
8555 goto unknown;
8556
8557 case 'c':
8558 if (name[3] == 'v')
8559 { /* recv */
8560 return -KEY_recv;
8561 }
8562
8563 goto unknown;
8564
8565 case 'd':
8566 if (name[3] == 'o')
8567 { /* redo */
8568 return KEY_redo;
8569 }
8570
8571 goto unknown;
8572
8573 default:
8574 goto unknown;
8575 }
8576
8577 default:
8578 goto unknown;
8579 }
8580
8581 case 's':
8582 switch (name[1])
8583 {
8584 case 'e':
8585 switch (name[2])
8586 {
8587 case 'e':
8588 if (name[3] == 'k')
8589 { /* seek */
8590 return -KEY_seek;
8591 }
8592
8593 goto unknown;
8594
8595 case 'n':
8596 if (name[3] == 'd')
8597 { /* send */
8598 return -KEY_send;
8599 }
8600
8601 goto unknown;
8602
8603 default:
8604 goto unknown;
8605 }
8606
8607 case 'o':
8608 if (name[2] == 'r' &&
8609 name[3] == 't')
8610 { /* sort */
8611 return KEY_sort;
8612 }
8613
8614 goto unknown;
8615
8616 case 'q':
8617 if (name[2] == 'r' &&
8618 name[3] == 't')
8619 { /* sqrt */
8620 return -KEY_sqrt;
8621 }
8622
8623 goto unknown;
8624
8625 case 't':
8626 if (name[2] == 'a' &&
8627 name[3] == 't')
8628 { /* stat */
8629 return -KEY_stat;
8630 }
8631
8632 goto unknown;
8633
8634 default:
8635 goto unknown;
8636 }
8637
8638 case 't':
8639 switch (name[1])
8640 {
8641 case 'e':
8642 if (name[2] == 'l' &&
8643 name[3] == 'l')
8644 { /* tell */
8645 return -KEY_tell;
8646 }
8647
8648 goto unknown;
8649
8650 case 'i':
8651 switch (name[2])
8652 {
8653 case 'e':
8654 if (name[3] == 'd')
8655 { /* tied */
8656 return KEY_tied;
8657 }
8658
8659 goto unknown;
8660
8661 case 'm':
8662 if (name[3] == 'e')
8663 { /* time */
8664 return -KEY_time;
8665 }
8666
8667 goto unknown;
8668
8669 default:
8670 goto unknown;
8671 }
8672
8673 default:
8674 goto unknown;
8675 }
8676
8677 case 'w':
0d863452 8678 switch (name[1])
4c3bbe0f 8679 {
0d863452 8680 case 'a':
952306ac
RGS
8681 switch (name[2])
8682 {
8683 case 'i':
8684 if (name[3] == 't')
8685 { /* wait */
8686 return -KEY_wait;
8687 }
4c3bbe0f 8688
952306ac 8689 goto unknown;
4c3bbe0f 8690
952306ac
RGS
8691 case 'r':
8692 if (name[3] == 'n')
8693 { /* warn */
8694 return -KEY_warn;
8695 }
4c3bbe0f 8696
952306ac 8697 goto unknown;
4c3bbe0f 8698
952306ac
RGS
8699 default:
8700 goto unknown;
8701 }
0d863452
RH
8702
8703 case 'h':
8704 if (name[2] == 'e' &&
8705 name[3] == 'n')
8706 { /* when */
5458a98a 8707 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 8708 }
4c3bbe0f 8709
952306ac 8710 goto unknown;
4c3bbe0f 8711
952306ac
RGS
8712 default:
8713 goto unknown;
8714 }
4c3bbe0f 8715
0d863452
RH
8716 default:
8717 goto unknown;
8718 }
8719
952306ac 8720 case 5: /* 39 tokens of length 5 */
4c3bbe0f 8721 switch (name[0])
e2e1dd5a 8722 {
4c3bbe0f
MHM
8723 case 'B':
8724 if (name[1] == 'E' &&
8725 name[2] == 'G' &&
8726 name[3] == 'I' &&
8727 name[4] == 'N')
8728 { /* BEGIN */
8729 return KEY_BEGIN;
8730 }
8731
8732 goto unknown;
8733
8734 case 'C':
8735 if (name[1] == 'H' &&
8736 name[2] == 'E' &&
8737 name[3] == 'C' &&
8738 name[4] == 'K')
8739 { /* CHECK */
8740 return KEY_CHECK;
8741 }
8742
8743 goto unknown;
8744
8745 case 'a':
8746 switch (name[1])
8747 {
8748 case 'l':
8749 if (name[2] == 'a' &&
8750 name[3] == 'r' &&
8751 name[4] == 'm')
8752 { /* alarm */
8753 return -KEY_alarm;
8754 }
8755
8756 goto unknown;
8757
8758 case 't':
8759 if (name[2] == 'a' &&
8760 name[3] == 'n' &&
8761 name[4] == '2')
8762 { /* atan2 */
8763 return -KEY_atan2;
8764 }
8765
8766 goto unknown;
8767
8768 default:
8769 goto unknown;
8770 }
8771
8772 case 'b':
0d863452
RH
8773 switch (name[1])
8774 {
8775 case 'l':
8776 if (name[2] == 'e' &&
952306ac
RGS
8777 name[3] == 's' &&
8778 name[4] == 's')
8779 { /* bless */
8780 return -KEY_bless;
8781 }
4c3bbe0f 8782
952306ac 8783 goto unknown;
4c3bbe0f 8784
0d863452
RH
8785 case 'r':
8786 if (name[2] == 'e' &&
8787 name[3] == 'a' &&
8788 name[4] == 'k')
8789 { /* break */
5458a98a 8790 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
8791 }
8792
8793 goto unknown;
8794
8795 default:
8796 goto unknown;
8797 }
8798
4c3bbe0f
MHM
8799 case 'c':
8800 switch (name[1])
8801 {
8802 case 'h':
8803 switch (name[2])
8804 {
8805 case 'd':
8806 if (name[3] == 'i' &&
8807 name[4] == 'r')
8808 { /* chdir */
8809 return -KEY_chdir;
8810 }
8811
8812 goto unknown;
8813
8814 case 'm':
8815 if (name[3] == 'o' &&
8816 name[4] == 'd')
8817 { /* chmod */
8818 return -KEY_chmod;
8819 }
8820
8821 goto unknown;
8822
8823 case 'o':
8824 switch (name[3])
8825 {
8826 case 'm':
8827 if (name[4] == 'p')
8828 { /* chomp */
8829 return -KEY_chomp;
8830 }
8831
8832 goto unknown;
8833
8834 case 'w':
8835 if (name[4] == 'n')
8836 { /* chown */
8837 return -KEY_chown;
8838 }
8839
8840 goto unknown;
8841
8842 default:
8843 goto unknown;
8844 }
8845
8846 default:
8847 goto unknown;
8848 }
8849
8850 case 'l':
8851 if (name[2] == 'o' &&
8852 name[3] == 's' &&
8853 name[4] == 'e')
8854 { /* close */
8855 return -KEY_close;
8856 }
8857
8858 goto unknown;
8859
8860 case 'r':
8861 if (name[2] == 'y' &&
8862 name[3] == 'p' &&
8863 name[4] == 't')
8864 { /* crypt */
8865 return -KEY_crypt;
8866 }
8867
8868 goto unknown;
8869
8870 default:
8871 goto unknown;
8872 }
8873
8874 case 'e':
8875 if (name[1] == 'l' &&
8876 name[2] == 's' &&
8877 name[3] == 'i' &&
8878 name[4] == 'f')
8879 { /* elsif */
8880 return KEY_elsif;
8881 }
8882
8883 goto unknown;
8884
8885 case 'f':
8886 switch (name[1])
8887 {
8888 case 'c':
8889 if (name[2] == 'n' &&
8890 name[3] == 't' &&
8891 name[4] == 'l')
8892 { /* fcntl */
8893 return -KEY_fcntl;
8894 }
8895
8896 goto unknown;
8897
8898 case 'l':
8899 if (name[2] == 'o' &&
8900 name[3] == 'c' &&
8901 name[4] == 'k')
8902 { /* flock */
8903 return -KEY_flock;
8904 }
8905
8906 goto unknown;
8907
8908 default:
8909 goto unknown;
8910 }
8911
0d863452
RH
8912 case 'g':
8913 if (name[1] == 'i' &&
8914 name[2] == 'v' &&
8915 name[3] == 'e' &&
8916 name[4] == 'n')
8917 { /* given */
5458a98a 8918 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
8919 }
8920
8921 goto unknown;
8922
4c3bbe0f
MHM
8923 case 'i':
8924 switch (name[1])
8925 {
8926 case 'n':
8927 if (name[2] == 'd' &&
8928 name[3] == 'e' &&
8929 name[4] == 'x')
8930 { /* index */
8931 return -KEY_index;
8932 }
8933
8934 goto unknown;
8935
8936 case 'o':
8937 if (name[2] == 'c' &&
8938 name[3] == 't' &&
8939 name[4] == 'l')
8940 { /* ioctl */
8941 return -KEY_ioctl;
8942 }
8943
8944 goto unknown;
8945
8946 default:
8947 goto unknown;
8948 }
8949
8950 case 'l':
8951 switch (name[1])
8952 {
8953 case 'o':
8954 if (name[2] == 'c' &&
8955 name[3] == 'a' &&
8956 name[4] == 'l')
8957 { /* local */
8958 return KEY_local;
8959 }
8960
8961 goto unknown;
8962
8963 case 's':
8964 if (name[2] == 't' &&
8965 name[3] == 'a' &&
8966 name[4] == 't')
8967 { /* lstat */
8968 return -KEY_lstat;
8969 }
8970
8971 goto unknown;
8972
8973 default:
8974 goto unknown;
8975 }
8976
8977 case 'm':
8978 if (name[1] == 'k' &&
8979 name[2] == 'd' &&
8980 name[3] == 'i' &&
8981 name[4] == 'r')
8982 { /* mkdir */
8983 return -KEY_mkdir;
8984 }
8985
8986 goto unknown;
8987
8988 case 'p':
8989 if (name[1] == 'r' &&
8990 name[2] == 'i' &&
8991 name[3] == 'n' &&
8992 name[4] == 't')
8993 { /* print */
8994 return KEY_print;
8995 }
8996
8997 goto unknown;
8998
8999 case 'r':
9000 switch (name[1])
9001 {
9002 case 'e':
9003 if (name[2] == 's' &&
9004 name[3] == 'e' &&
9005 name[4] == 't')
9006 { /* reset */
9007 return -KEY_reset;
9008 }
9009
9010 goto unknown;
9011
9012 case 'm':
9013 if (name[2] == 'd' &&
9014 name[3] == 'i' &&
9015 name[4] == 'r')
9016 { /* rmdir */
9017 return -KEY_rmdir;
9018 }
9019
9020 goto unknown;
9021
9022 default:
9023 goto unknown;
9024 }
9025
9026 case 's':
9027 switch (name[1])
9028 {
9029 case 'e':
9030 if (name[2] == 'm' &&
9031 name[3] == 'o' &&
9032 name[4] == 'p')
9033 { /* semop */
9034 return -KEY_semop;
9035 }
9036
9037 goto unknown;
9038
9039 case 'h':
9040 if (name[2] == 'i' &&
9041 name[3] == 'f' &&
9042 name[4] == 't')
9043 { /* shift */
9044 return -KEY_shift;
9045 }
9046
9047 goto unknown;
9048
9049 case 'l':
9050 if (name[2] == 'e' &&
9051 name[3] == 'e' &&
9052 name[4] == 'p')
9053 { /* sleep */
9054 return -KEY_sleep;
9055 }
9056
9057 goto unknown;
9058
9059 case 'p':
9060 if (name[2] == 'l' &&
9061 name[3] == 'i' &&
9062 name[4] == 't')
9063 { /* split */
9064 return KEY_split;
9065 }
9066
9067 goto unknown;
9068
9069 case 'r':
9070 if (name[2] == 'a' &&
9071 name[3] == 'n' &&
9072 name[4] == 'd')
9073 { /* srand */
9074 return -KEY_srand;
9075 }
9076
9077 goto unknown;
9078
9079 case 't':
952306ac
RGS
9080 switch (name[2])
9081 {
9082 case 'a':
9083 if (name[3] == 't' &&
9084 name[4] == 'e')
9085 { /* state */
5458a98a 9086 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9087 }
4c3bbe0f 9088
952306ac
RGS
9089 goto unknown;
9090
9091 case 'u':
9092 if (name[3] == 'd' &&
9093 name[4] == 'y')
9094 { /* study */
9095 return KEY_study;
9096 }
9097
9098 goto unknown;
9099
9100 default:
9101 goto unknown;
9102 }
4c3bbe0f
MHM
9103
9104 default:
9105 goto unknown;
9106 }
9107
9108 case 't':
9109 if (name[1] == 'i' &&
9110 name[2] == 'm' &&
9111 name[3] == 'e' &&
9112 name[4] == 's')
9113 { /* times */
9114 return -KEY_times;
9115 }
9116
9117 goto unknown;
9118
9119 case 'u':
9120 switch (name[1])
9121 {
9122 case 'm':
9123 if (name[2] == 'a' &&
9124 name[3] == 's' &&
9125 name[4] == 'k')
9126 { /* umask */
9127 return -KEY_umask;
9128 }
9129
9130 goto unknown;
9131
9132 case 'n':
9133 switch (name[2])
9134 {
9135 case 'd':
9136 if (name[3] == 'e' &&
9137 name[4] == 'f')
9138 { /* undef */
9139 return KEY_undef;
9140 }
9141
9142 goto unknown;
9143
9144 case 't':
9145 if (name[3] == 'i')
9146 {
9147 switch (name[4])
9148 {
9149 case 'e':
9150 { /* untie */
9151 return KEY_untie;
9152 }
9153
4c3bbe0f
MHM
9154 case 'l':
9155 { /* until */
9156 return KEY_until;
9157 }
9158
4c3bbe0f
MHM
9159 default:
9160 goto unknown;
9161 }
9162 }
9163
9164 goto unknown;
9165
9166 default:
9167 goto unknown;
9168 }
9169
9170 case 't':
9171 if (name[2] == 'i' &&
9172 name[3] == 'm' &&
9173 name[4] == 'e')
9174 { /* utime */
9175 return -KEY_utime;
9176 }
9177
9178 goto unknown;
9179
9180 default:
9181 goto unknown;
9182 }
9183
9184 case 'w':
9185 switch (name[1])
9186 {
9187 case 'h':
9188 if (name[2] == 'i' &&
9189 name[3] == 'l' &&
9190 name[4] == 'e')
9191 { /* while */
9192 return KEY_while;
9193 }
9194
9195 goto unknown;
9196
9197 case 'r':
9198 if (name[2] == 'i' &&
9199 name[3] == 't' &&
9200 name[4] == 'e')
9201 { /* write */
9202 return -KEY_write;
9203 }
9204
9205 goto unknown;
9206
9207 default:
9208 goto unknown;
9209 }
9210
9211 default:
9212 goto unknown;
e2e1dd5a 9213 }
4c3bbe0f
MHM
9214
9215 case 6: /* 33 tokens of length 6 */
9216 switch (name[0])
9217 {
9218 case 'a':
9219 if (name[1] == 'c' &&
9220 name[2] == 'c' &&
9221 name[3] == 'e' &&
9222 name[4] == 'p' &&
9223 name[5] == 't')
9224 { /* accept */
9225 return -KEY_accept;
9226 }
9227
9228 goto unknown;
9229
9230 case 'c':
9231 switch (name[1])
9232 {
9233 case 'a':
9234 if (name[2] == 'l' &&
9235 name[3] == 'l' &&
9236 name[4] == 'e' &&
9237 name[5] == 'r')
9238 { /* caller */
9239 return -KEY_caller;
9240 }
9241
9242 goto unknown;
9243
9244 case 'h':
9245 if (name[2] == 'r' &&
9246 name[3] == 'o' &&
9247 name[4] == 'o' &&
9248 name[5] == 't')
9249 { /* chroot */
9250 return -KEY_chroot;
9251 }
9252
9253 goto unknown;
9254
9255 default:
9256 goto unknown;
9257 }
9258
9259 case 'd':
9260 if (name[1] == 'e' &&
9261 name[2] == 'l' &&
9262 name[3] == 'e' &&
9263 name[4] == 't' &&
9264 name[5] == 'e')
9265 { /* delete */
9266 return KEY_delete;
9267 }
9268
9269 goto unknown;
9270
9271 case 'e':
9272 switch (name[1])
9273 {
9274 case 'l':
9275 if (name[2] == 's' &&
9276 name[3] == 'e' &&
9277 name[4] == 'i' &&
9278 name[5] == 'f')
9279 { /* elseif */
9b387841 9280 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9281 }
9282
9283 goto unknown;
9284
9285 case 'x':
9286 if (name[2] == 'i' &&
9287 name[3] == 's' &&
9288 name[4] == 't' &&
9289 name[5] == 's')
9290 { /* exists */
9291 return KEY_exists;
9292 }
9293
9294 goto unknown;
9295
9296 default:
9297 goto unknown;
9298 }
9299
9300 case 'f':
9301 switch (name[1])
9302 {
9303 case 'i':
9304 if (name[2] == 'l' &&
9305 name[3] == 'e' &&
9306 name[4] == 'n' &&
9307 name[5] == 'o')
9308 { /* fileno */
9309 return -KEY_fileno;
9310 }
9311
9312 goto unknown;
9313
9314 case 'o':
9315 if (name[2] == 'r' &&
9316 name[3] == 'm' &&
9317 name[4] == 'a' &&
9318 name[5] == 't')
9319 { /* format */
9320 return KEY_format;
9321 }
9322
9323 goto unknown;
9324
9325 default:
9326 goto unknown;
9327 }
9328
9329 case 'g':
9330 if (name[1] == 'm' &&
9331 name[2] == 't' &&
9332 name[3] == 'i' &&
9333 name[4] == 'm' &&
9334 name[5] == 'e')
9335 { /* gmtime */
9336 return -KEY_gmtime;
9337 }
9338
9339 goto unknown;
9340
9341 case 'l':
9342 switch (name[1])
9343 {
9344 case 'e':
9345 if (name[2] == 'n' &&
9346 name[3] == 'g' &&
9347 name[4] == 't' &&
9348 name[5] == 'h')
9349 { /* length */
9350 return -KEY_length;
9351 }
9352
9353 goto unknown;
9354
9355 case 'i':
9356 if (name[2] == 's' &&
9357 name[3] == 't' &&
9358 name[4] == 'e' &&
9359 name[5] == 'n')
9360 { /* listen */
9361 return -KEY_listen;
9362 }
9363
9364 goto unknown;
9365
9366 default:
9367 goto unknown;
9368 }
9369
9370 case 'm':
9371 if (name[1] == 's' &&
9372 name[2] == 'g')
9373 {
9374 switch (name[3])
9375 {
9376 case 'c':
9377 if (name[4] == 't' &&
9378 name[5] == 'l')
9379 { /* msgctl */
9380 return -KEY_msgctl;
9381 }
9382
9383 goto unknown;
9384
9385 case 'g':
9386 if (name[4] == 'e' &&
9387 name[5] == 't')
9388 { /* msgget */
9389 return -KEY_msgget;
9390 }
9391
9392 goto unknown;
9393
9394 case 'r':
9395 if (name[4] == 'c' &&
9396 name[5] == 'v')
9397 { /* msgrcv */
9398 return -KEY_msgrcv;
9399 }
9400
9401 goto unknown;
9402
9403 case 's':
9404 if (name[4] == 'n' &&
9405 name[5] == 'd')
9406 { /* msgsnd */
9407 return -KEY_msgsnd;
9408 }
9409
9410 goto unknown;
9411
9412 default:
9413 goto unknown;
9414 }
9415 }
9416
9417 goto unknown;
9418
9419 case 'p':
9420 if (name[1] == 'r' &&
9421 name[2] == 'i' &&
9422 name[3] == 'n' &&
9423 name[4] == 't' &&
9424 name[5] == 'f')
9425 { /* printf */
9426 return KEY_printf;
9427 }
9428
9429 goto unknown;
9430
9431 case 'r':
9432 switch (name[1])
9433 {
9434 case 'e':
9435 switch (name[2])
9436 {
9437 case 'n':
9438 if (name[3] == 'a' &&
9439 name[4] == 'm' &&
9440 name[5] == 'e')
9441 { /* rename */
9442 return -KEY_rename;
9443 }
9444
9445 goto unknown;
9446
9447 case 't':
9448 if (name[3] == 'u' &&
9449 name[4] == 'r' &&
9450 name[5] == 'n')
9451 { /* return */
9452 return KEY_return;
9453 }
9454
9455 goto unknown;
9456
9457 default:
9458 goto unknown;
9459 }
9460
9461 case 'i':
9462 if (name[2] == 'n' &&
9463 name[3] == 'd' &&
9464 name[4] == 'e' &&
9465 name[5] == 'x')
9466 { /* rindex */
9467 return -KEY_rindex;
9468 }
9469
9470 goto unknown;
9471
9472 default:
9473 goto unknown;
9474 }
9475
9476 case 's':
9477 switch (name[1])
9478 {
9479 case 'c':
9480 if (name[2] == 'a' &&
9481 name[3] == 'l' &&
9482 name[4] == 'a' &&
9483 name[5] == 'r')
9484 { /* scalar */
9485 return KEY_scalar;
9486 }
9487
9488 goto unknown;
9489
9490 case 'e':
9491 switch (name[2])
9492 {
9493 case 'l':
9494 if (name[3] == 'e' &&
9495 name[4] == 'c' &&
9496 name[5] == 't')
9497 { /* select */
9498 return -KEY_select;
9499 }
9500
9501 goto unknown;
9502
9503 case 'm':
9504 switch (name[3])
9505 {
9506 case 'c':
9507 if (name[4] == 't' &&
9508 name[5] == 'l')
9509 { /* semctl */
9510 return -KEY_semctl;
9511 }
9512
9513 goto unknown;
9514
9515 case 'g':
9516 if (name[4] == 'e' &&
9517 name[5] == 't')
9518 { /* semget */
9519 return -KEY_semget;
9520 }
9521
9522 goto unknown;
9523
9524 default:
9525 goto unknown;
9526 }
9527
9528 default:
9529 goto unknown;
9530 }
9531
9532 case 'h':
9533 if (name[2] == 'm')
9534 {
9535 switch (name[3])
9536 {
9537 case 'c':
9538 if (name[4] == 't' &&
9539 name[5] == 'l')
9540 { /* shmctl */
9541 return -KEY_shmctl;
9542 }
9543
9544 goto unknown;
9545
9546 case 'g':
9547 if (name[4] == 'e' &&
9548 name[5] == 't')
9549 { /* shmget */
9550 return -KEY_shmget;
9551 }
9552
9553 goto unknown;
9554
9555 default:
9556 goto unknown;
9557 }
9558 }
9559
9560 goto unknown;
9561
9562 case 'o':
9563 if (name[2] == 'c' &&
9564 name[3] == 'k' &&
9565 name[4] == 'e' &&
9566 name[5] == 't')
9567 { /* socket */
9568 return -KEY_socket;
9569 }
9570
9571 goto unknown;
9572
9573 case 'p':
9574 if (name[2] == 'l' &&
9575 name[3] == 'i' &&
9576 name[4] == 'c' &&
9577 name[5] == 'e')
9578 { /* splice */
9579 return -KEY_splice;
9580 }
9581
9582 goto unknown;
9583
9584 case 'u':
9585 if (name[2] == 'b' &&
9586 name[3] == 's' &&
9587 name[4] == 't' &&
9588 name[5] == 'r')
9589 { /* substr */
9590 return -KEY_substr;
9591 }
9592
9593 goto unknown;
9594
9595 case 'y':
9596 if (name[2] == 's' &&
9597 name[3] == 't' &&
9598 name[4] == 'e' &&
9599 name[5] == 'm')
9600 { /* system */
9601 return -KEY_system;
9602 }
9603
9604 goto unknown;
9605
9606 default:
9607 goto unknown;
9608 }
9609
9610 case 'u':
9611 if (name[1] == 'n')
9612 {
9613 switch (name[2])
9614 {
9615 case 'l':
9616 switch (name[3])
9617 {
9618 case 'e':
9619 if (name[4] == 's' &&
9620 name[5] == 's')
9621 { /* unless */
9622 return KEY_unless;
9623 }
9624
9625 goto unknown;
9626
9627 case 'i':
9628 if (name[4] == 'n' &&
9629 name[5] == 'k')
9630 { /* unlink */
9631 return -KEY_unlink;
9632 }
9633
9634 goto unknown;
9635
9636 default:
9637 goto unknown;
9638 }
9639
9640 case 'p':
9641 if (name[3] == 'a' &&
9642 name[4] == 'c' &&
9643 name[5] == 'k')
9644 { /* unpack */
9645 return -KEY_unpack;
9646 }
9647
9648 goto unknown;
9649
9650 default:
9651 goto unknown;
9652 }
9653 }
9654
9655 goto unknown;
9656
9657 case 'v':
9658 if (name[1] == 'a' &&
9659 name[2] == 'l' &&
9660 name[3] == 'u' &&
9661 name[4] == 'e' &&
9662 name[5] == 's')
9663 { /* values */
9664 return -KEY_values;
9665 }
9666
9667 goto unknown;
9668
9669 default:
9670 goto unknown;
e2e1dd5a 9671 }
4c3bbe0f 9672
0d863452 9673 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9674 switch (name[0])
9675 {
9676 case 'D':
9677 if (name[1] == 'E' &&
9678 name[2] == 'S' &&
9679 name[3] == 'T' &&
9680 name[4] == 'R' &&
9681 name[5] == 'O' &&
9682 name[6] == 'Y')
9683 { /* DESTROY */
9684 return KEY_DESTROY;
9685 }
9686
9687 goto unknown;
9688
9689 case '_':
9690 if (name[1] == '_' &&
9691 name[2] == 'E' &&
9692 name[3] == 'N' &&
9693 name[4] == 'D' &&
9694 name[5] == '_' &&
9695 name[6] == '_')
9696 { /* __END__ */
9697 return KEY___END__;
9698 }
9699
9700 goto unknown;
9701
9702 case 'b':
9703 if (name[1] == 'i' &&
9704 name[2] == 'n' &&
9705 name[3] == 'm' &&
9706 name[4] == 'o' &&
9707 name[5] == 'd' &&
9708 name[6] == 'e')
9709 { /* binmode */
9710 return -KEY_binmode;
9711 }
9712
9713 goto unknown;
9714
9715 case 'c':
9716 if (name[1] == 'o' &&
9717 name[2] == 'n' &&
9718 name[3] == 'n' &&
9719 name[4] == 'e' &&
9720 name[5] == 'c' &&
9721 name[6] == 't')
9722 { /* connect */
9723 return -KEY_connect;
9724 }
9725
9726 goto unknown;
9727
9728 case 'd':
9729 switch (name[1])
9730 {
9731 case 'b':
9732 if (name[2] == 'm' &&
9733 name[3] == 'o' &&
9734 name[4] == 'p' &&
9735 name[5] == 'e' &&
9736 name[6] == 'n')
9737 { /* dbmopen */
9738 return -KEY_dbmopen;
9739 }
9740
9741 goto unknown;
9742
9743 case 'e':
0d863452
RH
9744 if (name[2] == 'f')
9745 {
9746 switch (name[3])
9747 {
9748 case 'a':
9749 if (name[4] == 'u' &&
9750 name[5] == 'l' &&
9751 name[6] == 't')
9752 { /* default */
5458a98a 9753 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
9754 }
9755
9756 goto unknown;
9757
9758 case 'i':
9759 if (name[4] == 'n' &&
952306ac
RGS
9760 name[5] == 'e' &&
9761 name[6] == 'd')
9762 { /* defined */
9763 return KEY_defined;
9764 }
4c3bbe0f 9765
952306ac 9766 goto unknown;
4c3bbe0f 9767
952306ac
RGS
9768 default:
9769 goto unknown;
9770 }
0d863452
RH
9771 }
9772
9773 goto unknown;
9774
9775 default:
9776 goto unknown;
9777 }
4c3bbe0f
MHM
9778
9779 case 'f':
9780 if (name[1] == 'o' &&
9781 name[2] == 'r' &&
9782 name[3] == 'e' &&
9783 name[4] == 'a' &&
9784 name[5] == 'c' &&
9785 name[6] == 'h')
9786 { /* foreach */
9787 return KEY_foreach;
9788 }
9789
9790 goto unknown;
9791
9792 case 'g':
9793 if (name[1] == 'e' &&
9794 name[2] == 't' &&
9795 name[3] == 'p')
9796 {
9797 switch (name[4])
9798 {
9799 case 'g':
9800 if (name[5] == 'r' &&
9801 name[6] == 'p')
9802 { /* getpgrp */
9803 return -KEY_getpgrp;
9804 }
9805
9806 goto unknown;
9807
9808 case 'p':
9809 if (name[5] == 'i' &&
9810 name[6] == 'd')
9811 { /* getppid */
9812 return -KEY_getppid;
9813 }
9814
9815 goto unknown;
9816
9817 default:
9818 goto unknown;
9819 }
9820 }
9821
9822 goto unknown;
9823
9824 case 'l':
9825 if (name[1] == 'c' &&
9826 name[2] == 'f' &&
9827 name[3] == 'i' &&
9828 name[4] == 'r' &&
9829 name[5] == 's' &&
9830 name[6] == 't')
9831 { /* lcfirst */
9832 return -KEY_lcfirst;
9833 }
9834
9835 goto unknown;
9836
9837 case 'o':
9838 if (name[1] == 'p' &&
9839 name[2] == 'e' &&
9840 name[3] == 'n' &&
9841 name[4] == 'd' &&
9842 name[5] == 'i' &&
9843 name[6] == 'r')
9844 { /* opendir */
9845 return -KEY_opendir;
9846 }
9847
9848 goto unknown;
9849
9850 case 'p':
9851 if (name[1] == 'a' &&
9852 name[2] == 'c' &&
9853 name[3] == 'k' &&
9854 name[4] == 'a' &&
9855 name[5] == 'g' &&
9856 name[6] == 'e')
9857 { /* package */
9858 return KEY_package;
9859 }
9860
9861 goto unknown;
9862
9863 case 'r':
9864 if (name[1] == 'e')
9865 {
9866 switch (name[2])
9867 {
9868 case 'a':
9869 if (name[3] == 'd' &&
9870 name[4] == 'd' &&
9871 name[5] == 'i' &&
9872 name[6] == 'r')
9873 { /* readdir */
9874 return -KEY_readdir;
9875 }
9876
9877 goto unknown;
9878
9879 case 'q':
9880 if (name[3] == 'u' &&
9881 name[4] == 'i' &&
9882 name[5] == 'r' &&
9883 name[6] == 'e')
9884 { /* require */
9885 return KEY_require;
9886 }
9887
9888 goto unknown;
9889
9890 case 'v':
9891 if (name[3] == 'e' &&
9892 name[4] == 'r' &&
9893 name[5] == 's' &&
9894 name[6] == 'e')
9895 { /* reverse */
9896 return -KEY_reverse;
9897 }
9898
9899 goto unknown;
9900
9901 default:
9902 goto unknown;
9903 }
9904 }
9905
9906 goto unknown;
9907
9908 case 's':
9909 switch (name[1])
9910 {
9911 case 'e':
9912 switch (name[2])
9913 {
9914 case 'e':
9915 if (name[3] == 'k' &&
9916 name[4] == 'd' &&
9917 name[5] == 'i' &&
9918 name[6] == 'r')
9919 { /* seekdir */
9920 return -KEY_seekdir;
9921 }
9922
9923 goto unknown;
9924
9925 case 't':
9926 if (name[3] == 'p' &&
9927 name[4] == 'g' &&
9928 name[5] == 'r' &&
9929 name[6] == 'p')
9930 { /* setpgrp */
9931 return -KEY_setpgrp;
9932 }
9933
9934 goto unknown;
9935
9936 default:
9937 goto unknown;
9938 }
9939
9940 case 'h':
9941 if (name[2] == 'm' &&
9942 name[3] == 'r' &&
9943 name[4] == 'e' &&
9944 name[5] == 'a' &&
9945 name[6] == 'd')
9946 { /* shmread */
9947 return -KEY_shmread;
9948 }
9949
9950 goto unknown;
9951
9952 case 'p':
9953 if (name[2] == 'r' &&
9954 name[3] == 'i' &&
9955 name[4] == 'n' &&
9956 name[5] == 't' &&
9957 name[6] == 'f')
9958 { /* sprintf */
9959 return -KEY_sprintf;
9960 }
9961
9962 goto unknown;
9963
9964 case 'y':
9965 switch (name[2])
9966 {
9967 case 'm':
9968 if (name[3] == 'l' &&
9969 name[4] == 'i' &&
9970 name[5] == 'n' &&
9971 name[6] == 'k')
9972 { /* symlink */
9973 return -KEY_symlink;
9974 }
9975
9976 goto unknown;
9977
9978 case 's':
9979 switch (name[3])
9980 {
9981 case 'c':
9982 if (name[4] == 'a' &&
9983 name[5] == 'l' &&
9984 name[6] == 'l')
9985 { /* syscall */
9986 return -KEY_syscall;
9987 }
9988
9989 goto unknown;
9990
9991 case 'o':
9992 if (name[4] == 'p' &&
9993 name[5] == 'e' &&
9994 name[6] == 'n')
9995 { /* sysopen */
9996 return -KEY_sysopen;
9997 }
9998
9999 goto unknown;
10000
10001 case 'r':
10002 if (name[4] == 'e' &&
10003 name[5] == 'a' &&
10004 name[6] == 'd')
10005 { /* sysread */
10006 return -KEY_sysread;
10007 }
10008
10009 goto unknown;
10010
10011 case 's':
10012 if (name[4] == 'e' &&
10013 name[5] == 'e' &&
10014 name[6] == 'k')
10015 { /* sysseek */
10016 return -KEY_sysseek;
10017 }
10018
10019 goto unknown;
10020
10021 default:
10022 goto unknown;
10023 }
10024
10025 default:
10026 goto unknown;
10027 }
10028
10029 default:
10030 goto unknown;
10031 }
10032
10033 case 't':
10034 if (name[1] == 'e' &&
10035 name[2] == 'l' &&
10036 name[3] == 'l' &&
10037 name[4] == 'd' &&
10038 name[5] == 'i' &&
10039 name[6] == 'r')
10040 { /* telldir */
10041 return -KEY_telldir;
10042 }
10043
10044 goto unknown;
10045
10046 case 'u':
10047 switch (name[1])
10048 {
10049 case 'c':
10050 if (name[2] == 'f' &&
10051 name[3] == 'i' &&
10052 name[4] == 'r' &&
10053 name[5] == 's' &&
10054 name[6] == 't')
10055 { /* ucfirst */
10056 return -KEY_ucfirst;
10057 }
10058
10059 goto unknown;
10060
10061 case 'n':
10062 if (name[2] == 's' &&
10063 name[3] == 'h' &&
10064 name[4] == 'i' &&
10065 name[5] == 'f' &&
10066 name[6] == 't')
10067 { /* unshift */
10068 return -KEY_unshift;
10069 }
10070
10071 goto unknown;
10072
10073 default:
10074 goto unknown;
10075 }
10076
10077 case 'w':
10078 if (name[1] == 'a' &&
10079 name[2] == 'i' &&
10080 name[3] == 't' &&
10081 name[4] == 'p' &&
10082 name[5] == 'i' &&
10083 name[6] == 'd')
10084 { /* waitpid */
10085 return -KEY_waitpid;
10086 }
10087
10088 goto unknown;
10089
10090 default:
10091 goto unknown;
10092 }
10093
10094 case 8: /* 26 tokens of length 8 */
10095 switch (name[0])
10096 {
10097 case 'A':
10098 if (name[1] == 'U' &&
10099 name[2] == 'T' &&
10100 name[3] == 'O' &&
10101 name[4] == 'L' &&
10102 name[5] == 'O' &&
10103 name[6] == 'A' &&
10104 name[7] == 'D')
10105 { /* AUTOLOAD */
10106 return KEY_AUTOLOAD;
10107 }
10108
10109 goto unknown;
10110
10111 case '_':
10112 if (name[1] == '_')
10113 {
10114 switch (name[2])
10115 {
10116 case 'D':
10117 if (name[3] == 'A' &&
10118 name[4] == 'T' &&
10119 name[5] == 'A' &&
10120 name[6] == '_' &&
10121 name[7] == '_')
10122 { /* __DATA__ */
10123 return KEY___DATA__;
10124 }
10125
10126 goto unknown;
10127
10128 case 'F':
10129 if (name[3] == 'I' &&
10130 name[4] == 'L' &&
10131 name[5] == 'E' &&
10132 name[6] == '_' &&
10133 name[7] == '_')
10134 { /* __FILE__ */
10135 return -KEY___FILE__;
10136 }
10137
10138 goto unknown;
10139
10140 case 'L':
10141 if (name[3] == 'I' &&
10142 name[4] == 'N' &&
10143 name[5] == 'E' &&
10144 name[6] == '_' &&
10145 name[7] == '_')
10146 { /* __LINE__ */
10147 return -KEY___LINE__;
10148 }
10149
10150 goto unknown;
10151
10152 default:
10153 goto unknown;
10154 }
10155 }
10156
10157 goto unknown;
10158
10159 case 'c':
10160 switch (name[1])
10161 {
10162 case 'l':
10163 if (name[2] == 'o' &&
10164 name[3] == 's' &&
10165 name[4] == 'e' &&
10166 name[5] == 'd' &&
10167 name[6] == 'i' &&
10168 name[7] == 'r')
10169 { /* closedir */
10170 return -KEY_closedir;
10171 }
10172
10173 goto unknown;
10174
10175 case 'o':
10176 if (name[2] == 'n' &&
10177 name[3] == 't' &&
10178 name[4] == 'i' &&
10179 name[5] == 'n' &&
10180 name[6] == 'u' &&
10181 name[7] == 'e')
10182 { /* continue */
10183 return -KEY_continue;
10184 }
10185
10186 goto unknown;
10187
10188 default:
10189 goto unknown;
10190 }
10191
10192 case 'd':
10193 if (name[1] == 'b' &&
10194 name[2] == 'm' &&
10195 name[3] == 'c' &&
10196 name[4] == 'l' &&
10197 name[5] == 'o' &&
10198 name[6] == 's' &&
10199 name[7] == 'e')
10200 { /* dbmclose */
10201 return -KEY_dbmclose;
10202 }
10203
10204 goto unknown;
10205
10206 case 'e':
10207 if (name[1] == 'n' &&
10208 name[2] == 'd')
10209 {
10210 switch (name[3])
10211 {
10212 case 'g':
10213 if (name[4] == 'r' &&
10214 name[5] == 'e' &&
10215 name[6] == 'n' &&
10216 name[7] == 't')
10217 { /* endgrent */
10218 return -KEY_endgrent;
10219 }
10220
10221 goto unknown;
10222
10223 case 'p':
10224 if (name[4] == 'w' &&
10225 name[5] == 'e' &&
10226 name[6] == 'n' &&
10227 name[7] == 't')
10228 { /* endpwent */
10229 return -KEY_endpwent;
10230 }
10231
10232 goto unknown;
10233
10234 default:
10235 goto unknown;
10236 }
10237 }
10238
10239 goto unknown;
10240
10241 case 'f':
10242 if (name[1] == 'o' &&
10243 name[2] == 'r' &&
10244 name[3] == 'm' &&
10245 name[4] == 'l' &&
10246 name[5] == 'i' &&
10247 name[6] == 'n' &&
10248 name[7] == 'e')
10249 { /* formline */
10250 return -KEY_formline;
10251 }
10252
10253 goto unknown;
10254
10255 case 'g':
10256 if (name[1] == 'e' &&
10257 name[2] == 't')
10258 {
10259 switch (name[3])
10260 {
10261 case 'g':
10262 if (name[4] == 'r')
10263 {
10264 switch (name[5])
10265 {
10266 case 'e':
10267 if (name[6] == 'n' &&
10268 name[7] == 't')
10269 { /* getgrent */
10270 return -KEY_getgrent;
10271 }
10272
10273 goto unknown;
10274
10275 case 'g':
10276 if (name[6] == 'i' &&
10277 name[7] == 'd')
10278 { /* getgrgid */
10279 return -KEY_getgrgid;
10280 }
10281
10282 goto unknown;
10283
10284 case 'n':
10285 if (name[6] == 'a' &&
10286 name[7] == 'm')
10287 { /* getgrnam */
10288 return -KEY_getgrnam;
10289 }
10290
10291 goto unknown;
10292
10293 default:
10294 goto unknown;
10295 }
10296 }
10297
10298 goto unknown;
10299
10300 case 'l':
10301 if (name[4] == 'o' &&
10302 name[5] == 'g' &&
10303 name[6] == 'i' &&
10304 name[7] == 'n')
10305 { /* getlogin */
10306 return -KEY_getlogin;
10307 }
10308
10309 goto unknown;
10310
10311 case 'p':
10312 if (name[4] == 'w')
10313 {
10314 switch (name[5])
10315 {
10316 case 'e':
10317 if (name[6] == 'n' &&
10318 name[7] == 't')
10319 { /* getpwent */
10320 return -KEY_getpwent;
10321 }
10322
10323 goto unknown;
10324
10325 case 'n':
10326 if (name[6] == 'a' &&
10327 name[7] == 'm')
10328 { /* getpwnam */
10329 return -KEY_getpwnam;
10330 }
10331
10332 goto unknown;
10333
10334 case 'u':
10335 if (name[6] == 'i' &&
10336 name[7] == 'd')
10337 { /* getpwuid */
10338 return -KEY_getpwuid;
10339 }
10340
10341 goto unknown;
10342
10343 default:
10344 goto unknown;
10345 }
10346 }
10347
10348 goto unknown;
10349
10350 default:
10351 goto unknown;
10352 }
10353 }
10354
10355 goto unknown;
10356
10357 case 'r':
10358 if (name[1] == 'e' &&
10359 name[2] == 'a' &&
10360 name[3] == 'd')
10361 {
10362 switch (name[4])
10363 {
10364 case 'l':
10365 if (name[5] == 'i' &&
10366 name[6] == 'n')
10367 {
10368 switch (name[7])
10369 {
10370 case 'e':
10371 { /* readline */
10372 return -KEY_readline;
10373 }
10374
4c3bbe0f
MHM
10375 case 'k':
10376 { /* readlink */
10377 return -KEY_readlink;
10378 }
10379
4c3bbe0f
MHM
10380 default:
10381 goto unknown;
10382 }
10383 }
10384
10385 goto unknown;
10386
10387 case 'p':
10388 if (name[5] == 'i' &&
10389 name[6] == 'p' &&
10390 name[7] == 'e')
10391 { /* readpipe */
10392 return -KEY_readpipe;
10393 }
10394
10395 goto unknown;
10396
10397 default:
10398 goto unknown;
10399 }
10400 }
10401
10402 goto unknown;
10403
10404 case 's':
10405 switch (name[1])
10406 {
10407 case 'e':
10408 if (name[2] == 't')
10409 {
10410 switch (name[3])
10411 {
10412 case 'g':
10413 if (name[4] == 'r' &&
10414 name[5] == 'e' &&
10415 name[6] == 'n' &&
10416 name[7] == 't')
10417 { /* setgrent */
10418 return -KEY_setgrent;
10419 }
10420
10421 goto unknown;
10422
10423 case 'p':
10424 if (name[4] == 'w' &&
10425 name[5] == 'e' &&
10426 name[6] == 'n' &&
10427 name[7] == 't')
10428 { /* setpwent */
10429 return -KEY_setpwent;
10430 }
10431
10432 goto unknown;
10433
10434 default:
10435 goto unknown;
10436 }
10437 }
10438
10439 goto unknown;
10440
10441 case 'h':
10442 switch (name[2])
10443 {
10444 case 'm':
10445 if (name[3] == 'w' &&
10446 name[4] == 'r' &&
10447 name[5] == 'i' &&
10448 name[6] == 't' &&
10449 name[7] == 'e')
10450 { /* shmwrite */
10451 return -KEY_shmwrite;
10452 }
10453
10454 goto unknown;
10455
10456 case 'u':
10457 if (name[3] == 't' &&
10458 name[4] == 'd' &&
10459 name[5] == 'o' &&
10460 name[6] == 'w' &&
10461 name[7] == 'n')
10462 { /* shutdown */
10463 return -KEY_shutdown;
10464 }
10465
10466 goto unknown;
10467
10468 default:
10469 goto unknown;
10470 }
10471
10472 case 'y':
10473 if (name[2] == 's' &&
10474 name[3] == 'w' &&
10475 name[4] == 'r' &&
10476 name[5] == 'i' &&
10477 name[6] == 't' &&
10478 name[7] == 'e')
10479 { /* syswrite */
10480 return -KEY_syswrite;
10481 }
10482
10483 goto unknown;
10484
10485 default:
10486 goto unknown;
10487 }
10488
10489 case 't':
10490 if (name[1] == 'r' &&
10491 name[2] == 'u' &&
10492 name[3] == 'n' &&
10493 name[4] == 'c' &&
10494 name[5] == 'a' &&
10495 name[6] == 't' &&
10496 name[7] == 'e')
10497 { /* truncate */
10498 return -KEY_truncate;
10499 }
10500
10501 goto unknown;
10502
10503 default:
10504 goto unknown;
10505 }
10506
3c10abe3 10507 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10508 switch (name[0])
10509 {
3c10abe3
AG
10510 case 'U':
10511 if (name[1] == 'N' &&
10512 name[2] == 'I' &&
10513 name[3] == 'T' &&
10514 name[4] == 'C' &&
10515 name[5] == 'H' &&
10516 name[6] == 'E' &&
10517 name[7] == 'C' &&
10518 name[8] == 'K')
10519 { /* UNITCHECK */
10520 return KEY_UNITCHECK;
10521 }
10522
10523 goto unknown;
10524
4c3bbe0f
MHM
10525 case 'e':
10526 if (name[1] == 'n' &&
10527 name[2] == 'd' &&
10528 name[3] == 'n' &&
10529 name[4] == 'e' &&
10530 name[5] == 't' &&
10531 name[6] == 'e' &&
10532 name[7] == 'n' &&
10533 name[8] == 't')
10534 { /* endnetent */
10535 return -KEY_endnetent;
10536 }
10537
10538 goto unknown;
10539
10540 case 'g':
10541 if (name[1] == 'e' &&
10542 name[2] == 't' &&
10543 name[3] == 'n' &&
10544 name[4] == 'e' &&
10545 name[5] == 't' &&
10546 name[6] == 'e' &&
10547 name[7] == 'n' &&
10548 name[8] == 't')
10549 { /* getnetent */
10550 return -KEY_getnetent;
10551 }
10552
10553 goto unknown;
10554
10555 case 'l':
10556 if (name[1] == 'o' &&
10557 name[2] == 'c' &&
10558 name[3] == 'a' &&
10559 name[4] == 'l' &&
10560 name[5] == 't' &&
10561 name[6] == 'i' &&
10562 name[7] == 'm' &&
10563 name[8] == 'e')
10564 { /* localtime */
10565 return -KEY_localtime;
10566 }
10567
10568 goto unknown;
10569
10570 case 'p':
10571 if (name[1] == 'r' &&
10572 name[2] == 'o' &&
10573 name[3] == 't' &&
10574 name[4] == 'o' &&
10575 name[5] == 't' &&
10576 name[6] == 'y' &&
10577 name[7] == 'p' &&
10578 name[8] == 'e')
10579 { /* prototype */
10580 return KEY_prototype;
10581 }
10582
10583 goto unknown;
10584
10585 case 'q':
10586 if (name[1] == 'u' &&
10587 name[2] == 'o' &&
10588 name[3] == 't' &&
10589 name[4] == 'e' &&
10590 name[5] == 'm' &&
10591 name[6] == 'e' &&
10592 name[7] == 't' &&
10593 name[8] == 'a')
10594 { /* quotemeta */
10595 return -KEY_quotemeta;
10596 }
10597
10598 goto unknown;
10599
10600 case 'r':
10601 if (name[1] == 'e' &&
10602 name[2] == 'w' &&
10603 name[3] == 'i' &&
10604 name[4] == 'n' &&
10605 name[5] == 'd' &&
10606 name[6] == 'd' &&
10607 name[7] == 'i' &&
10608 name[8] == 'r')
10609 { /* rewinddir */
10610 return -KEY_rewinddir;
10611 }
10612
10613 goto unknown;
10614
10615 case 's':
10616 if (name[1] == 'e' &&
10617 name[2] == 't' &&
10618 name[3] == 'n' &&
10619 name[4] == 'e' &&
10620 name[5] == 't' &&
10621 name[6] == 'e' &&
10622 name[7] == 'n' &&
10623 name[8] == 't')
10624 { /* setnetent */
10625 return -KEY_setnetent;
10626 }
10627
10628 goto unknown;
10629
10630 case 'w':
10631 if (name[1] == 'a' &&
10632 name[2] == 'n' &&
10633 name[3] == 't' &&
10634 name[4] == 'a' &&
10635 name[5] == 'r' &&
10636 name[6] == 'r' &&
10637 name[7] == 'a' &&
10638 name[8] == 'y')
10639 { /* wantarray */
10640 return -KEY_wantarray;
10641 }
10642
10643 goto unknown;
10644
10645 default:
10646 goto unknown;
10647 }
10648
10649 case 10: /* 9 tokens of length 10 */
10650 switch (name[0])
10651 {
10652 case 'e':
10653 if (name[1] == 'n' &&
10654 name[2] == 'd')
10655 {
10656 switch (name[3])
10657 {
10658 case 'h':
10659 if (name[4] == 'o' &&
10660 name[5] == 's' &&
10661 name[6] == 't' &&
10662 name[7] == 'e' &&
10663 name[8] == 'n' &&
10664 name[9] == 't')
10665 { /* endhostent */
10666 return -KEY_endhostent;
10667 }
10668
10669 goto unknown;
10670
10671 case 's':
10672 if (name[4] == 'e' &&
10673 name[5] == 'r' &&
10674 name[6] == 'v' &&
10675 name[7] == 'e' &&
10676 name[8] == 'n' &&
10677 name[9] == 't')
10678 { /* endservent */
10679 return -KEY_endservent;
10680 }
10681
10682 goto unknown;
10683
10684 default:
10685 goto unknown;
10686 }
10687 }
10688
10689 goto unknown;
10690
10691 case 'g':
10692 if (name[1] == 'e' &&
10693 name[2] == 't')
10694 {
10695 switch (name[3])
10696 {
10697 case 'h':
10698 if (name[4] == 'o' &&
10699 name[5] == 's' &&
10700 name[6] == 't' &&
10701 name[7] == 'e' &&
10702 name[8] == 'n' &&
10703 name[9] == 't')
10704 { /* gethostent */
10705 return -KEY_gethostent;
10706 }
10707
10708 goto unknown;
10709
10710 case 's':
10711 switch (name[4])
10712 {
10713 case 'e':
10714 if (name[5] == 'r' &&
10715 name[6] == 'v' &&
10716 name[7] == 'e' &&
10717 name[8] == 'n' &&
10718 name[9] == 't')
10719 { /* getservent */
10720 return -KEY_getservent;
10721 }
10722
10723 goto unknown;
10724
10725 case 'o':
10726 if (name[5] == 'c' &&
10727 name[6] == 'k' &&
10728 name[7] == 'o' &&
10729 name[8] == 'p' &&
10730 name[9] == 't')
10731 { /* getsockopt */
10732 return -KEY_getsockopt;
10733 }
10734
10735 goto unknown;
10736
10737 default:
10738 goto unknown;
10739 }
10740
10741 default:
10742 goto unknown;
10743 }
10744 }
10745
10746 goto unknown;
10747
10748 case 's':
10749 switch (name[1])
10750 {
10751 case 'e':
10752 if (name[2] == 't')
10753 {
10754 switch (name[3])
10755 {
10756 case 'h':
10757 if (name[4] == 'o' &&
10758 name[5] == 's' &&
10759 name[6] == 't' &&
10760 name[7] == 'e' &&
10761 name[8] == 'n' &&
10762 name[9] == 't')
10763 { /* sethostent */
10764 return -KEY_sethostent;
10765 }
10766
10767 goto unknown;
10768
10769 case 's':
10770 switch (name[4])
10771 {
10772 case 'e':
10773 if (name[5] == 'r' &&
10774 name[6] == 'v' &&
10775 name[7] == 'e' &&
10776 name[8] == 'n' &&
10777 name[9] == 't')
10778 { /* setservent */
10779 return -KEY_setservent;
10780 }
10781
10782 goto unknown;
10783
10784 case 'o':
10785 if (name[5] == 'c' &&
10786 name[6] == 'k' &&
10787 name[7] == 'o' &&
10788 name[8] == 'p' &&
10789 name[9] == 't')
10790 { /* setsockopt */
10791 return -KEY_setsockopt;
10792 }
10793
10794 goto unknown;
10795
10796 default:
10797 goto unknown;
10798 }
10799
10800 default:
10801 goto unknown;
10802 }
10803 }
10804
10805 goto unknown;
10806
10807 case 'o':
10808 if (name[2] == 'c' &&
10809 name[3] == 'k' &&
10810 name[4] == 'e' &&
10811 name[5] == 't' &&
10812 name[6] == 'p' &&
10813 name[7] == 'a' &&
10814 name[8] == 'i' &&
10815 name[9] == 'r')
10816 { /* socketpair */
10817 return -KEY_socketpair;
10818 }
10819
10820 goto unknown;
10821
10822 default:
10823 goto unknown;
10824 }
10825
10826 default:
10827 goto unknown;
e2e1dd5a 10828 }
4c3bbe0f
MHM
10829
10830 case 11: /* 8 tokens of length 11 */
10831 switch (name[0])
10832 {
10833 case '_':
10834 if (name[1] == '_' &&
10835 name[2] == 'P' &&
10836 name[3] == 'A' &&
10837 name[4] == 'C' &&
10838 name[5] == 'K' &&
10839 name[6] == 'A' &&
10840 name[7] == 'G' &&
10841 name[8] == 'E' &&
10842 name[9] == '_' &&
10843 name[10] == '_')
10844 { /* __PACKAGE__ */
10845 return -KEY___PACKAGE__;
10846 }
10847
10848 goto unknown;
10849
10850 case 'e':
10851 if (name[1] == 'n' &&
10852 name[2] == 'd' &&
10853 name[3] == 'p' &&
10854 name[4] == 'r' &&
10855 name[5] == 'o' &&
10856 name[6] == 't' &&
10857 name[7] == 'o' &&
10858 name[8] == 'e' &&
10859 name[9] == 'n' &&
10860 name[10] == 't')
10861 { /* endprotoent */
10862 return -KEY_endprotoent;
10863 }
10864
10865 goto unknown;
10866
10867 case 'g':
10868 if (name[1] == 'e' &&
10869 name[2] == 't')
10870 {
10871 switch (name[3])
10872 {
10873 case 'p':
10874 switch (name[4])
10875 {
10876 case 'e':
10877 if (name[5] == 'e' &&
10878 name[6] == 'r' &&
10879 name[7] == 'n' &&
10880 name[8] == 'a' &&
10881 name[9] == 'm' &&
10882 name[10] == 'e')
10883 { /* getpeername */
10884 return -KEY_getpeername;
10885 }
10886
10887 goto unknown;
10888
10889 case 'r':
10890 switch (name[5])
10891 {
10892 case 'i':
10893 if (name[6] == 'o' &&
10894 name[7] == 'r' &&
10895 name[8] == 'i' &&
10896 name[9] == 't' &&
10897 name[10] == 'y')
10898 { /* getpriority */
10899 return -KEY_getpriority;
10900 }
10901
10902 goto unknown;
10903
10904 case 'o':
10905 if (name[6] == 't' &&
10906 name[7] == 'o' &&
10907 name[8] == 'e' &&
10908 name[9] == 'n' &&
10909 name[10] == 't')
10910 { /* getprotoent */
10911 return -KEY_getprotoent;
10912 }
10913
10914 goto unknown;
10915
10916 default:
10917 goto unknown;
10918 }
10919
10920 default:
10921 goto unknown;
10922 }
10923
10924 case 's':
10925 if (name[4] == 'o' &&
10926 name[5] == 'c' &&
10927 name[6] == 'k' &&
10928 name[7] == 'n' &&
10929 name[8] == 'a' &&
10930 name[9] == 'm' &&
10931 name[10] == 'e')
10932 { /* getsockname */
10933 return -KEY_getsockname;
10934 }
10935
10936 goto unknown;
10937
10938 default:
10939 goto unknown;
10940 }
10941 }
10942
10943 goto unknown;
10944
10945 case 's':
10946 if (name[1] == 'e' &&
10947 name[2] == 't' &&
10948 name[3] == 'p' &&
10949 name[4] == 'r')
10950 {
10951 switch (name[5])
10952 {
10953 case 'i':
10954 if (name[6] == 'o' &&
10955 name[7] == 'r' &&
10956 name[8] == 'i' &&
10957 name[9] == 't' &&
10958 name[10] == 'y')
10959 { /* setpriority */
10960 return -KEY_setpriority;
10961 }
10962
10963 goto unknown;
10964
10965 case 'o':
10966 if (name[6] == 't' &&
10967 name[7] == 'o' &&
10968 name[8] == 'e' &&
10969 name[9] == 'n' &&
10970 name[10] == 't')
10971 { /* setprotoent */
10972 return -KEY_setprotoent;
10973 }
10974
10975 goto unknown;
10976
10977 default:
10978 goto unknown;
10979 }
10980 }
10981
10982 goto unknown;
10983
10984 default:
10985 goto unknown;
e2e1dd5a 10986 }
4c3bbe0f
MHM
10987
10988 case 12: /* 2 tokens of length 12 */
10989 if (name[0] == 'g' &&
10990 name[1] == 'e' &&
10991 name[2] == 't' &&
10992 name[3] == 'n' &&
10993 name[4] == 'e' &&
10994 name[5] == 't' &&
10995 name[6] == 'b' &&
10996 name[7] == 'y')
10997 {
10998 switch (name[8])
10999 {
11000 case 'a':
11001 if (name[9] == 'd' &&
11002 name[10] == 'd' &&
11003 name[11] == 'r')
11004 { /* getnetbyaddr */
11005 return -KEY_getnetbyaddr;
11006 }
11007
11008 goto unknown;
11009
11010 case 'n':
11011 if (name[9] == 'a' &&
11012 name[10] == 'm' &&
11013 name[11] == 'e')
11014 { /* getnetbyname */
11015 return -KEY_getnetbyname;
11016 }
11017
11018 goto unknown;
11019
11020 default:
11021 goto unknown;
11022 }
e2e1dd5a 11023 }
4c3bbe0f
MHM
11024
11025 goto unknown;
11026
11027 case 13: /* 4 tokens of length 13 */
11028 if (name[0] == 'g' &&
11029 name[1] == 'e' &&
11030 name[2] == 't')
11031 {
11032 switch (name[3])
11033 {
11034 case 'h':
11035 if (name[4] == 'o' &&
11036 name[5] == 's' &&
11037 name[6] == 't' &&
11038 name[7] == 'b' &&
11039 name[8] == 'y')
11040 {
11041 switch (name[9])
11042 {
11043 case 'a':
11044 if (name[10] == 'd' &&
11045 name[11] == 'd' &&
11046 name[12] == 'r')
11047 { /* gethostbyaddr */
11048 return -KEY_gethostbyaddr;
11049 }
11050
11051 goto unknown;
11052
11053 case 'n':
11054 if (name[10] == 'a' &&
11055 name[11] == 'm' &&
11056 name[12] == 'e')
11057 { /* gethostbyname */
11058 return -KEY_gethostbyname;
11059 }
11060
11061 goto unknown;
11062
11063 default:
11064 goto unknown;
11065 }
11066 }
11067
11068 goto unknown;
11069
11070 case 's':
11071 if (name[4] == 'e' &&
11072 name[5] == 'r' &&
11073 name[6] == 'v' &&
11074 name[7] == 'b' &&
11075 name[8] == 'y')
11076 {
11077 switch (name[9])
11078 {
11079 case 'n':
11080 if (name[10] == 'a' &&
11081 name[11] == 'm' &&
11082 name[12] == 'e')
11083 { /* getservbyname */
11084 return -KEY_getservbyname;
11085 }
11086
11087 goto unknown;
11088
11089 case 'p':
11090 if (name[10] == 'o' &&
11091 name[11] == 'r' &&
11092 name[12] == 't')
11093 { /* getservbyport */
11094 return -KEY_getservbyport;
11095 }
11096
11097 goto unknown;
11098
11099 default:
11100 goto unknown;
11101 }
11102 }
11103
11104 goto unknown;
11105
11106 default:
11107 goto unknown;
11108 }
e2e1dd5a 11109 }
4c3bbe0f
MHM
11110
11111 goto unknown;
11112
11113 case 14: /* 1 tokens of length 14 */
11114 if (name[0] == 'g' &&
11115 name[1] == 'e' &&
11116 name[2] == 't' &&
11117 name[3] == 'p' &&
11118 name[4] == 'r' &&
11119 name[5] == 'o' &&
11120 name[6] == 't' &&
11121 name[7] == 'o' &&
11122 name[8] == 'b' &&
11123 name[9] == 'y' &&
11124 name[10] == 'n' &&
11125 name[11] == 'a' &&
11126 name[12] == 'm' &&
11127 name[13] == 'e')
11128 { /* getprotobyname */
11129 return -KEY_getprotobyname;
11130 }
11131
11132 goto unknown;
11133
11134 case 16: /* 1 tokens of length 16 */
11135 if (name[0] == 'g' &&
11136 name[1] == 'e' &&
11137 name[2] == 't' &&
11138 name[3] == 'p' &&
11139 name[4] == 'r' &&
11140 name[5] == 'o' &&
11141 name[6] == 't' &&
11142 name[7] == 'o' &&
11143 name[8] == 'b' &&
11144 name[9] == 'y' &&
11145 name[10] == 'n' &&
11146 name[11] == 'u' &&
11147 name[12] == 'm' &&
11148 name[13] == 'b' &&
11149 name[14] == 'e' &&
11150 name[15] == 'r')
11151 { /* getprotobynumber */
11152 return -KEY_getprotobynumber;
11153 }
11154
11155 goto unknown;
11156
11157 default:
11158 goto unknown;
e2e1dd5a 11159 }
4c3bbe0f
MHM
11160
11161unknown:
e2e1dd5a 11162 return 0;
a687059c
LW
11163}
11164
76e3520e 11165STATIC void
c94115d8 11166S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11167{
97aff369 11168 dVAR;
2f3197b3 11169
7918f24d
NC
11170 PERL_ARGS_ASSERT_CHECKCOMMA;
11171
d008e5eb 11172 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11173 if (ckWARN(WARN_SYNTAX)) {
11174 int level = 1;
26ff0806 11175 const char *w;
d008e5eb
GS
11176 for (w = s+2; *w && level; w++) {
11177 if (*w == '(')
11178 ++level;
11179 else if (*w == ')')
11180 --level;
11181 }
888fea98
NC
11182 while (isSPACE(*w))
11183 ++w;
b1439985
RGS
11184 /* the list of chars below is for end of statements or
11185 * block / parens, boolean operators (&&, ||, //) and branch
11186 * constructs (or, and, if, until, unless, while, err, for).
11187 * Not a very solid hack... */
11188 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11189 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11190 "%s (...) interpreted as function",name);
d008e5eb 11191 }
2f3197b3 11192 }
3280af22 11193 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11194 s++;
a687059c
LW
11195 if (*s == '(')
11196 s++;
3280af22 11197 while (s < PL_bufend && isSPACE(*s))
a687059c 11198 s++;
7e2040f0 11199 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11200 const char * const w = s++;
7e2040f0 11201 while (isALNUM_lazy_if(s,UTF))
a687059c 11202 s++;
3280af22 11203 while (s < PL_bufend && isSPACE(*s))
a687059c 11204 s++;
e929a76b 11205 if (*s == ',') {
c94115d8 11206 GV* gv;
5458a98a 11207 if (keyword(w, s - w, 0))
e929a76b 11208 return;
c94115d8
NC
11209
11210 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11211 if (gv && GvCVu(gv))
abbb3198 11212 return;
cea2e8a9 11213 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11214 }
11215 }
11216}
11217
423cee85
JH
11218/* Either returns sv, or mortalizes sv and returns a new SV*.
11219 Best used as sv=new_constant(..., sv, ...).
11220 If s, pv are NULL, calls subroutine with one argument,
11221 and type is used with error messages only. */
11222
b3ac6de7 11223STATIC SV *
eb0d8d16
NC
11224S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11225 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11226{
27da23d5 11227 dVAR; dSP;
890ce7af 11228 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11229 SV *res;
b3ac6de7
IZ
11230 SV **cvp;
11231 SV *cv, *typesv;
89e33a05 11232 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11233
7918f24d
NC
11234 PERL_ARGS_ASSERT_NEW_CONSTANT;
11235
f0af216f 11236 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11237 SV *msg;
11238
10edeb5d
JH
11239 why2 = (const char *)
11240 (strEQ(key,"charnames")
11241 ? "(possibly a missing \"use charnames ...\")"
11242 : "");
4e553d73 11243 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11244 (type ? type: "undef"), why2);
11245
11246 /* This is convoluted and evil ("goto considered harmful")
11247 * but I do not understand the intricacies of all the different
11248 * failure modes of %^H in here. The goal here is to make
11249 * the most probable error message user-friendly. --jhi */
11250
11251 goto msgdone;
11252
423cee85 11253 report:
4e553d73 11254 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11255 (type ? type: "undef"), why1, why2, why3);
41ab332f 11256 msgdone:
95a20fc0 11257 yyerror(SvPVX_const(msg));
423cee85
JH
11258 SvREFCNT_dec(msg);
11259 return sv;
11260 }
eb0d8d16 11261 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11262 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11263 why1 = "$^H{";
11264 why2 = key;
f0af216f 11265 why3 = "} is not defined";
423cee85 11266 goto report;
b3ac6de7
IZ
11267 }
11268 sv_2mortal(sv); /* Parent created it permanently */
11269 cv = *cvp;
423cee85 11270 if (!pv && s)
59cd0e26 11271 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11272 if (type && pv)
59cd0e26 11273 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11274 else
423cee85 11275 typesv = &PL_sv_undef;
4e553d73 11276
e788e7d3 11277 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11278 ENTER ;
11279 SAVETMPS;
4e553d73 11280
423cee85 11281 PUSHMARK(SP) ;
a5845cb7 11282 EXTEND(sp, 3);
423cee85
JH
11283 if (pv)
11284 PUSHs(pv);
b3ac6de7 11285 PUSHs(sv);
423cee85
JH
11286 if (pv)
11287 PUSHs(typesv);
b3ac6de7 11288 PUTBACK;
423cee85 11289 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11290
423cee85 11291 SPAGAIN ;
4e553d73 11292
423cee85 11293 /* Check the eval first */
9b0e499b 11294 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11295 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11296 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11297 (void)POPs;
b37c2d43 11298 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11299 }
11300 else {
11301 res = POPs;
b37c2d43 11302 SvREFCNT_inc_simple_void(res);
423cee85 11303 }
4e553d73 11304
423cee85
JH
11305 PUTBACK ;
11306 FREETMPS ;
11307 LEAVE ;
b3ac6de7 11308 POPSTACK;
4e553d73 11309
b3ac6de7 11310 if (!SvOK(res)) {
423cee85
JH
11311 why1 = "Call to &{$^H{";
11312 why2 = key;
f0af216f 11313 why3 = "}} did not return a defined value";
423cee85
JH
11314 sv = res;
11315 goto report;
9b0e499b 11316 }
423cee85 11317
9b0e499b 11318 return res;
b3ac6de7 11319}
4e553d73 11320
d0a148a6
NC
11321/* Returns a NUL terminated string, with the length of the string written to
11322 *slp
11323 */
76e3520e 11324STATIC char *
cea2e8a9 11325S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11326{
97aff369 11327 dVAR;
463ee0b2 11328 register char *d = dest;
890ce7af 11329 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11330
11331 PERL_ARGS_ASSERT_SCAN_WORD;
11332
463ee0b2 11333 for (;;) {
8903cb82 11334 if (d >= e)
cea2e8a9 11335 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11336 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11337 *d++ = *s++;
c35e046a 11338 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11339 *d++ = ':';
11340 *d++ = ':';
11341 s++;
11342 }
c35e046a 11343 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11344 *d++ = *s++;
11345 *d++ = *s++;
11346 }
fd400ab9 11347 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11348 char *t = s + UTF8SKIP(s);
c35e046a 11349 size_t len;
fd400ab9 11350 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11351 t += UTF8SKIP(t);
c35e046a
AL
11352 len = t - s;
11353 if (d + len > e)
cea2e8a9 11354 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11355 Copy(s, d, len, char);
11356 d += len;
a0ed51b3
LW
11357 s = t;
11358 }
463ee0b2
LW
11359 else {
11360 *d = '\0';
11361 *slp = d - dest;
11362 return s;
e929a76b 11363 }
378cc40b
LW
11364 }
11365}
11366
76e3520e 11367STATIC char *
f54cb97a 11368S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11369{
97aff369 11370 dVAR;
6136c704 11371 char *bracket = NULL;
748a9306 11372 char funny = *s++;
6136c704 11373 register char *d = dest;
0b3da58d 11374 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11375
7918f24d
NC
11376 PERL_ARGS_ASSERT_SCAN_IDENT;
11377
a0d0e21e 11378 if (isSPACE(*s))
29595ff2 11379 s = PEEKSPACE(s);
de3bb511 11380 if (isDIGIT(*s)) {
8903cb82 11381 while (isDIGIT(*s)) {
11382 if (d >= e)
cea2e8a9 11383 Perl_croak(aTHX_ ident_too_long);
378cc40b 11384 *d++ = *s++;
8903cb82 11385 }
378cc40b
LW
11386 }
11387 else {
463ee0b2 11388 for (;;) {
8903cb82 11389 if (d >= e)
cea2e8a9 11390 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11391 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11392 *d++ = *s++;
7e2040f0 11393 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11394 *d++ = ':';
11395 *d++ = ':';
11396 s++;
11397 }
a0d0e21e 11398 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11399 *d++ = *s++;
11400 *d++ = *s++;
11401 }
fd400ab9 11402 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11403 char *t = s + UTF8SKIP(s);
fd400ab9 11404 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11405 t += UTF8SKIP(t);
11406 if (d + (t - s) > e)
cea2e8a9 11407 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11408 Copy(s, d, t - s, char);
11409 d += t - s;
11410 s = t;
11411 }
463ee0b2
LW
11412 else
11413 break;
11414 }
378cc40b
LW
11415 }
11416 *d = '\0';
11417 d = dest;
79072805 11418 if (*d) {
3280af22
NIS
11419 if (PL_lex_state != LEX_NORMAL)
11420 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11421 return s;
378cc40b 11422 }
748a9306 11423 if (*s == '$' && s[1] &&
3792a11b 11424 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11425 {
4810e5ec 11426 return s;
5cd24f17 11427 }
79072805
LW
11428 if (*s == '{') {
11429 bracket = s;
11430 s++;
11431 }
11432 else if (ck_uni)
11433 check_uni();
93a17b20 11434 if (s < send)
79072805
LW
11435 *d = *s++;
11436 d[1] = '\0';
2b92dfce 11437 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11438 *d = toCTRL(*s);
11439 s++;
de3bb511 11440 }
79072805 11441 if (bracket) {
748a9306 11442 if (isSPACE(s[-1])) {
fa83b5b6 11443 while (s < send) {
f54cb97a 11444 const char ch = *s++;
bf4acbe4 11445 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11446 *d = ch;
11447 break;
11448 }
11449 }
748a9306 11450 }
7e2040f0 11451 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11452 d++;
a0ed51b3 11453 if (UTF) {
6136c704
AL
11454 char *end = s;
11455 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11456 end += UTF8SKIP(end);
11457 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11458 end += UTF8SKIP(end);
a0ed51b3 11459 }
6136c704
AL
11460 Copy(s, d, end - s, char);
11461 d += end - s;
11462 s = end;
a0ed51b3
LW
11463 }
11464 else {
2b92dfce 11465 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11466 *d++ = *s++;
2b92dfce 11467 if (d >= e)
cea2e8a9 11468 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11469 }
79072805 11470 *d = '\0';
c35e046a
AL
11471 while (s < send && SPACE_OR_TAB(*s))
11472 s++;
ff68c719 11473 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11474 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11475 const char * const brack =
11476 (const char *)
11477 ((*s == '[') ? "[...]" : "{...}");
9014280d 11478 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11479 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11480 funny, dest, brack, funny, dest, brack);
11481 }
79072805 11482 bracket++;
a0be28da 11483 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11484 return s;
11485 }
4e553d73
NIS
11486 }
11487 /* Handle extended ${^Foo} variables
2b92dfce
GS
11488 * 1999-02-27 mjd-perl-patch@plover.com */
11489 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11490 && isALNUM(*s))
11491 {
11492 d++;
11493 while (isALNUM(*s) && d < e) {
11494 *d++ = *s++;
11495 }
11496 if (d >= e)
cea2e8a9 11497 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11498 *d = '\0';
79072805
LW
11499 }
11500 if (*s == '}') {
11501 s++;
7df0d042 11502 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11503 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11504 PL_expect = XREF;
11505 }
d008e5eb 11506 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11507 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11508 (keyword(dest, d - dest, 0)
11509 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11510 {
c35e046a
AL
11511 if (funny == '#')
11512 funny = '@';
9014280d 11513 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11514 "Ambiguous use of %c{%s} resolved to %c%s",
11515 funny, dest, funny, dest);
11516 }
11517 }
79072805
LW
11518 }
11519 else {
11520 s = bracket; /* let the parser handle it */
93a17b20 11521 *dest = '\0';
79072805
LW
11522 }
11523 }
3280af22
NIS
11524 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11525 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11526 return s;
11527}
11528
879d0c72
NC
11529static U32
11530S_pmflag(U32 pmfl, const char ch) {
11531 switch (ch) {
11532 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11533 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11534 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11535 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11536 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11537 }
11538 return pmfl;
11539}
11540
cea2e8a9 11541void
2b36a5a0 11542Perl_pmflag(pTHX_ U32* pmfl, int ch)
a0d0e21e 11543{
7918f24d
NC
11544 PERL_ARGS_ASSERT_PMFLAG;
11545
879d0c72
NC
11546 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11547 "Perl_pmflag() is deprecated, and will be removed from the XS API");
11548
cde0cee5 11549 if (ch<256) {
879d0c72 11550 *pmfl = S_pmflag(*pmfl, (char)ch);
cde0cee5 11551 }
a0d0e21e 11552}
378cc40b 11553
76e3520e 11554STATIC char *
cea2e8a9 11555S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11556{
97aff369 11557 dVAR;
79072805 11558 PMOP *pm;
5db06880 11559 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11560 const char * const valid_flags =
a20207d7 11561 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11562#ifdef PERL_MAD
11563 char *modstart;
11564#endif
11565
7918f24d 11566 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11567
25c09cbf 11568 if (!s) {
6136c704 11569 const char * const delimiter = skipspace(start);
10edeb5d
JH
11570 Perl_croak(aTHX_
11571 (const char *)
11572 (*delimiter == '?'
11573 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11574 : "Search pattern not terminated" ));
25c09cbf 11575 }
bbce6d69 11576
8782bef2 11577 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11578 if (PL_multi_open == '?') {
11579 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11580 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11581
11582 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11583 allows us to restrict the list needed by reset to just the ??
11584 matches. */
11585 assert(type != OP_TRANS);
11586 if (PL_curstash) {
daba3364 11587 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11588 U32 elements;
11589 if (!mg) {
daba3364 11590 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11591 0);
11592 }
11593 elements = mg->mg_len / sizeof(PMOP**);
11594 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11595 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11596 mg->mg_len = elements * sizeof(PMOP**);
11597 PmopSTASH_set(pm,PL_curstash);
11598 }
11599 }
5db06880
NC
11600#ifdef PERL_MAD
11601 modstart = s;
11602#endif
6136c704 11603 while (*s && strchr(valid_flags, *s))
879d0c72 11604 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
5db06880
NC
11605#ifdef PERL_MAD
11606 if (PL_madskills && modstart != s) {
11607 SV* tmptoken = newSVpvn(modstart, s - modstart);
11608 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11609 }
11610#endif
4ac733c9 11611 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11612 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11613 {
a2a5de95
NC
11614 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11615 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11616 }
11617
3280af22 11618 PL_lex_op = (OP*)pm;
6154021b 11619 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11620 return s;
11621}
11622
76e3520e 11623STATIC char *
cea2e8a9 11624S_scan_subst(pTHX_ char *start)
79072805 11625{
27da23d5 11626 dVAR;
a0d0e21e 11627 register char *s;
79072805 11628 register PMOP *pm;
4fdae800 11629 I32 first_start;
79072805 11630 I32 es = 0;
5db06880
NC
11631#ifdef PERL_MAD
11632 char *modstart;
11633#endif
79072805 11634
7918f24d
NC
11635 PERL_ARGS_ASSERT_SCAN_SUBST;
11636
6154021b 11637 pl_yylval.ival = OP_NULL;
79072805 11638
5db06880 11639 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11640
37fd879b 11641 if (!s)
cea2e8a9 11642 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11643
3280af22 11644 if (s[-1] == PL_multi_open)
79072805 11645 s--;
5db06880
NC
11646#ifdef PERL_MAD
11647 if (PL_madskills) {
cd81e915
NC
11648 CURMAD('q', PL_thisopen);
11649 CURMAD('_', PL_thiswhite);
11650 CURMAD('E', PL_thisstuff);
11651 CURMAD('Q', PL_thisclose);
11652 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11653 }
11654#endif
79072805 11655
3280af22 11656 first_start = PL_multi_start;
5db06880 11657 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11658 if (!s) {
37fd879b 11659 if (PL_lex_stuff) {
3280af22 11660 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11661 PL_lex_stuff = NULL;
37fd879b 11662 }
cea2e8a9 11663 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11664 }
3280af22 11665 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11666
79072805 11667 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11668
11669#ifdef PERL_MAD
11670 if (PL_madskills) {
cd81e915
NC
11671 CURMAD('z', PL_thisopen);
11672 CURMAD('R', PL_thisstuff);
11673 CURMAD('Z', PL_thisclose);
5db06880
NC
11674 }
11675 modstart = s;
11676#endif
11677
48c036b1 11678 while (*s) {
a20207d7 11679 if (*s == EXEC_PAT_MOD) {
a687059c 11680 s++;
2f3197b3 11681 es++;
a687059c 11682 }
a20207d7 11683 else if (strchr(S_PAT_MODS, *s))
879d0c72 11684 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
48c036b1
GS
11685 else
11686 break;
378cc40b 11687 }
79072805 11688
5db06880
NC
11689#ifdef PERL_MAD
11690 if (PL_madskills) {
11691 if (modstart != s)
11692 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
11693 append_madprops(PL_thismad, (OP*)pm, 0);
11694 PL_thismad = 0;
5db06880
NC
11695 }
11696#endif
a2a5de95
NC
11697 if ((pm->op_pmflags & PMf_CONTINUE)) {
11698 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
11699 }
11700
79072805 11701 if (es) {
6136c704
AL
11702 SV * const repl = newSVpvs("");
11703
0244c3a4
GS
11704 PL_sublex_info.super_bufptr = s;
11705 PL_sublex_info.super_bufend = PL_bufend;
11706 PL_multi_end = 0;
79072805 11707 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
11708 while (es-- > 0) {
11709 if (es)
11710 sv_catpvs(repl, "eval ");
11711 else
11712 sv_catpvs(repl, "do ");
11713 }
6f43d98f 11714 sv_catpvs(repl, "{");
3280af22 11715 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
11716 if (strchr(SvPVX(PL_lex_repl), '#'))
11717 sv_catpvs(repl, "\n");
11718 sv_catpvs(repl, "}");
25da4f38 11719 SvEVALED_on(repl);
3280af22
NIS
11720 SvREFCNT_dec(PL_lex_repl);
11721 PL_lex_repl = repl;
378cc40b 11722 }
79072805 11723
3280af22 11724 PL_lex_op = (OP*)pm;
6154021b 11725 pl_yylval.ival = OP_SUBST;
378cc40b
LW
11726 return s;
11727}
11728
76e3520e 11729STATIC char *
cea2e8a9 11730S_scan_trans(pTHX_ char *start)
378cc40b 11731{
97aff369 11732 dVAR;
a0d0e21e 11733 register char* s;
11343788 11734 OP *o;
79072805 11735 short *tbl;
b84c11c8
NC
11736 U8 squash;
11737 U8 del;
11738 U8 complement;
5db06880
NC
11739#ifdef PERL_MAD
11740 char *modstart;
11741#endif
79072805 11742
7918f24d
NC
11743 PERL_ARGS_ASSERT_SCAN_TRANS;
11744
6154021b 11745 pl_yylval.ival = OP_NULL;
79072805 11746
5db06880 11747 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 11748 if (!s)
cea2e8a9 11749 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 11750
3280af22 11751 if (s[-1] == PL_multi_open)
2f3197b3 11752 s--;
5db06880
NC
11753#ifdef PERL_MAD
11754 if (PL_madskills) {
cd81e915
NC
11755 CURMAD('q', PL_thisopen);
11756 CURMAD('_', PL_thiswhite);
11757 CURMAD('E', PL_thisstuff);
11758 CURMAD('Q', PL_thisclose);
11759 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11760 }
11761#endif
2f3197b3 11762
5db06880 11763 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11764 if (!s) {
37fd879b 11765 if (PL_lex_stuff) {
3280af22 11766 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11767 PL_lex_stuff = NULL;
37fd879b 11768 }
cea2e8a9 11769 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 11770 }
5db06880 11771 if (PL_madskills) {
cd81e915
NC
11772 CURMAD('z', PL_thisopen);
11773 CURMAD('R', PL_thisstuff);
11774 CURMAD('Z', PL_thisclose);
5db06880 11775 }
79072805 11776
a0ed51b3 11777 complement = del = squash = 0;
5db06880
NC
11778#ifdef PERL_MAD
11779 modstart = s;
11780#endif
7a1e2023
NC
11781 while (1) {
11782 switch (*s) {
11783 case 'c':
79072805 11784 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
11785 break;
11786 case 'd':
a0ed51b3 11787 del = OPpTRANS_DELETE;
7a1e2023
NC
11788 break;
11789 case 's':
79072805 11790 squash = OPpTRANS_SQUASH;
7a1e2023
NC
11791 break;
11792 default:
11793 goto no_more;
11794 }
395c3793
LW
11795 s++;
11796 }
7a1e2023 11797 no_more:
8973db79 11798
aa1f7c5b 11799 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 11800 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
11801 o->op_private &= ~OPpTRANS_ALL;
11802 o->op_private |= del|squash|complement|
7948272d
NIS
11803 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
11804 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 11805
3280af22 11806 PL_lex_op = o;
6154021b 11807 pl_yylval.ival = OP_TRANS;
5db06880
NC
11808
11809#ifdef PERL_MAD
11810 if (PL_madskills) {
11811 if (modstart != s)
11812 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
11813 append_madprops(PL_thismad, o, 0);
11814 PL_thismad = 0;
5db06880
NC
11815 }
11816#endif
11817
79072805
LW
11818 return s;
11819}
11820
76e3520e 11821STATIC char *
cea2e8a9 11822S_scan_heredoc(pTHX_ register char *s)
79072805 11823{
97aff369 11824 dVAR;
79072805
LW
11825 SV *herewas;
11826 I32 op_type = OP_SCALAR;
11827 I32 len;
11828 SV *tmpstr;
11829 char term;
73d840c0 11830 const char *found_newline;
79072805 11831 register char *d;
fc36a67e 11832 register char *e;
4633a7c4 11833 char *peek;
f54cb97a 11834 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
11835#ifdef PERL_MAD
11836 I32 stuffstart = s - SvPVX(PL_linestr);
11837 char *tstart;
11838
cd81e915 11839 PL_realtokenstart = -1;
5db06880 11840#endif
79072805 11841
7918f24d
NC
11842 PERL_ARGS_ASSERT_SCAN_HEREDOC;
11843
79072805 11844 s += 2;
3280af22
NIS
11845 d = PL_tokenbuf;
11846 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 11847 if (!outer)
79072805 11848 *d++ = '\n';
c35e046a
AL
11849 peek = s;
11850 while (SPACE_OR_TAB(*peek))
11851 peek++;
3792a11b 11852 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 11853 s = peek;
79072805 11854 term = *s++;
3280af22 11855 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 11856 d += len;
3280af22 11857 if (s < PL_bufend)
79072805 11858 s++;
79072805
LW
11859 }
11860 else {
11861 if (*s == '\\')
11862 s++, term = '\'';
11863 else
11864 term = '"';
7e2040f0 11865 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 11866 deprecate("bare << to mean <<\"\"");
7e2040f0 11867 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 11868 if (d < e)
11869 *d++ = *s;
11870 }
11871 }
3280af22 11872 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 11873 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
11874 *d++ = '\n';
11875 *d = '\0';
3280af22 11876 len = d - PL_tokenbuf;
5db06880
NC
11877
11878#ifdef PERL_MAD
11879 if (PL_madskills) {
11880 tstart = PL_tokenbuf + !outer;
cd81e915 11881 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 11882 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 11883 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
11884 stuffstart = s - SvPVX(PL_linestr);
11885 }
11886#endif
6a27c188 11887#ifndef PERL_STRICT_CR
f63a84b2
LW
11888 d = strchr(s, '\r');
11889 if (d) {
b464bac0 11890 char * const olds = s;
f63a84b2 11891 s = d;
3280af22 11892 while (s < PL_bufend) {
f63a84b2
LW
11893 if (*s == '\r') {
11894 *d++ = '\n';
11895 if (*++s == '\n')
11896 s++;
11897 }
11898 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
11899 *d++ = *s++;
11900 s++;
11901 }
11902 else
11903 *d++ = *s++;
11904 }
11905 *d = '\0';
3280af22 11906 PL_bufend = d;
95a20fc0 11907 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
11908 s = olds;
11909 }
11910#endif
5db06880
NC
11911#ifdef PERL_MAD
11912 found_newline = 0;
11913#endif
10edeb5d 11914 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
11915 herewas = newSVpvn(s,PL_bufend-s);
11916 }
11917 else {
5db06880
NC
11918#ifdef PERL_MAD
11919 herewas = newSVpvn(s-1,found_newline-s+1);
11920#else
73d840c0
AL
11921 s--;
11922 herewas = newSVpvn(s,found_newline-s);
5db06880 11923#endif
73d840c0 11924 }
5db06880
NC
11925#ifdef PERL_MAD
11926 if (PL_madskills) {
11927 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
11928 if (PL_thisstuff)
11929 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 11930 else
cd81e915 11931 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
11932 }
11933#endif
79072805 11934 s += SvCUR(herewas);
748a9306 11935
5db06880
NC
11936#ifdef PERL_MAD
11937 stuffstart = s - SvPVX(PL_linestr);
11938
11939 if (found_newline)
11940 s--;
11941#endif
11942
7d0a29fe
NC
11943 tmpstr = newSV_type(SVt_PVIV);
11944 SvGROW(tmpstr, 80);
748a9306 11945 if (term == '\'') {
79072805 11946 op_type = OP_CONST;
45977657 11947 SvIV_set(tmpstr, -1);
748a9306
LW
11948 }
11949 else if (term == '`') {
79072805 11950 op_type = OP_BACKTICK;
45977657 11951 SvIV_set(tmpstr, '\\');
748a9306 11952 }
79072805
LW
11953
11954 CLINE;
57843af0 11955 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
11956 PL_multi_open = PL_multi_close = '<';
11957 term = *PL_tokenbuf;
0244c3a4 11958 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
11959 char * const bufptr = PL_sublex_info.super_bufptr;
11960 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 11961 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
11962 s = strchr(bufptr, '\n');
11963 if (!s)
11964 s = bufend;
11965 d = s;
11966 while (s < bufend &&
11967 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
11968 if (*s++ == '\n')
57843af0 11969 CopLINE_inc(PL_curcop);
0244c3a4
GS
11970 }
11971 if (s >= bufend) {
eb160463 11972 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
11973 missingterm(PL_tokenbuf);
11974 }
11975 sv_setpvn(herewas,bufptr,d-bufptr+1);
11976 sv_setpvn(tmpstr,d+1,s-d);
11977 s += len - 1;
11978 sv_catpvn(herewas,s,bufend-s);
95a20fc0 11979 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
11980
11981 s = olds;
11982 goto retval;
11983 }
11984 else if (!outer) {
79072805 11985 d = s;
3280af22
NIS
11986 while (s < PL_bufend &&
11987 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 11988 if (*s++ == '\n')
57843af0 11989 CopLINE_inc(PL_curcop);
79072805 11990 }
3280af22 11991 if (s >= PL_bufend) {
eb160463 11992 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 11993 missingterm(PL_tokenbuf);
79072805
LW
11994 }
11995 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
11996#ifdef PERL_MAD
11997 if (PL_madskills) {
cd81e915
NC
11998 if (PL_thisstuff)
11999 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12000 else
cd81e915 12001 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12002 stuffstart = s - SvPVX(PL_linestr);
12003 }
12004#endif
79072805 12005 s += len - 1;
57843af0 12006 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12007
3280af22
NIS
12008 sv_catpvn(herewas,s,PL_bufend-s);
12009 sv_setsv(PL_linestr,herewas);
12010 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12011 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12012 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12013 }
12014 else
76f68e9b 12015 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12016 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12017#ifdef PERL_MAD
12018 if (PL_madskills) {
12019 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12020 if (PL_thisstuff)
12021 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12022 else
cd81e915 12023 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12024 }
12025#endif
f0e67a1d 12026 PL_bufptr = s;
17cc9359 12027 CopLINE_inc(PL_curcop);
f0e67a1d 12028 if (!outer || !lex_next_chunk(0)) {
eb160463 12029 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12030 missingterm(PL_tokenbuf);
79072805 12031 }
17cc9359 12032 CopLINE_dec(PL_curcop);
f0e67a1d 12033 s = PL_bufptr;
5db06880
NC
12034#ifdef PERL_MAD
12035 stuffstart = s - SvPVX(PL_linestr);
12036#endif
57843af0 12037 CopLINE_inc(PL_curcop);
3280af22 12038 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12039 PL_last_lop = PL_last_uni = NULL;
6a27c188 12040#ifndef PERL_STRICT_CR
3280af22 12041 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12042 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12043 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12044 {
3280af22
NIS
12045 PL_bufend[-2] = '\n';
12046 PL_bufend--;
95a20fc0 12047 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12048 }
3280af22
NIS
12049 else if (PL_bufend[-1] == '\r')
12050 PL_bufend[-1] = '\n';
f63a84b2 12051 }
3280af22
NIS
12052 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12053 PL_bufend[-1] = '\n';
f63a84b2 12054#endif
3280af22 12055 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12056 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12057 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12058 sv_catsv(PL_linestr,herewas);
12059 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12060 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12061 }
12062 else {
3280af22
NIS
12063 s = PL_bufend;
12064 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12065 }
12066 }
79072805 12067 s++;
0244c3a4 12068retval:
57843af0 12069 PL_multi_end = CopLINE(PL_curcop);
79072805 12070 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12071 SvPV_shrink_to_cur(tmpstr);
79072805 12072 }
8990e307 12073 SvREFCNT_dec(herewas);
2f31ce75 12074 if (!IN_BYTES) {
95a20fc0 12075 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12076 SvUTF8_on(tmpstr);
12077 else if (PL_encoding)
12078 sv_recode_to_utf8(tmpstr, PL_encoding);
12079 }
3280af22 12080 PL_lex_stuff = tmpstr;
6154021b 12081 pl_yylval.ival = op_type;
79072805
LW
12082 return s;
12083}
12084
02aa26ce
NT
12085/* scan_inputsymbol
12086 takes: current position in input buffer
12087 returns: new position in input buffer
6154021b 12088 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12089
12090 This code handles:
12091
12092 <> read from ARGV
12093 <FH> read from filehandle
12094 <pkg::FH> read from package qualified filehandle
12095 <pkg'FH> read from package qualified filehandle
12096 <$fh> read from filehandle in $fh
12097 <*.h> filename glob
12098
12099*/
12100
76e3520e 12101STATIC char *
cea2e8a9 12102S_scan_inputsymbol(pTHX_ char *start)
79072805 12103{
97aff369 12104 dVAR;
02aa26ce 12105 register char *s = start; /* current position in buffer */
1b420867 12106 char *end;
79072805 12107 I32 len;
6136c704
AL
12108 char *d = PL_tokenbuf; /* start of temp holding space */
12109 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12110
7918f24d
NC
12111 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12112
1b420867
GS
12113 end = strchr(s, '\n');
12114 if (!end)
12115 end = PL_bufend;
12116 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12117
12118 /* die if we didn't have space for the contents of the <>,
1b420867 12119 or if it didn't end, or if we see a newline
02aa26ce
NT
12120 */
12121
bb7a0f54 12122 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12123 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12124 if (s >= end)
cea2e8a9 12125 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12126
fc36a67e 12127 s++;
02aa26ce
NT
12128
12129 /* check for <$fh>
12130 Remember, only scalar variables are interpreted as filehandles by
12131 this code. Anything more complex (e.g., <$fh{$num}>) will be
12132 treated as a glob() call.
12133 This code makes use of the fact that except for the $ at the front,
12134 a scalar variable and a filehandle look the same.
12135 */
4633a7c4 12136 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12137
12138 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12139 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12140 d++;
02aa26ce
NT
12141
12142 /* If we've tried to read what we allow filehandles to look like, and
12143 there's still text left, then it must be a glob() and not a getline.
12144 Use scan_str to pull out the stuff between the <> and treat it
12145 as nothing more than a string.
12146 */
12147
3280af22 12148 if (d - PL_tokenbuf != len) {
6154021b 12149 pl_yylval.ival = OP_GLOB;
5db06880 12150 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12151 if (!s)
cea2e8a9 12152 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12153 return s;
12154 }
395c3793 12155 else {
9b3023bc 12156 bool readline_overriden = FALSE;
6136c704 12157 GV *gv_readline;
9b3023bc 12158 GV **gvp;
02aa26ce 12159 /* we're in a filehandle read situation */
3280af22 12160 d = PL_tokenbuf;
02aa26ce
NT
12161
12162 /* turn <> into <ARGV> */
79072805 12163 if (!len)
689badd5 12164 Copy("ARGV",d,5,char);
02aa26ce 12165
9b3023bc 12166 /* Check whether readline() is overriden */
fafc274c 12167 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12168 if ((gv_readline
ba979b31 12169 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12170 ||
017a3ce5 12171 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12172 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12173 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12174 readline_overriden = TRUE;
12175
02aa26ce
NT
12176 /* if <$fh>, create the ops to turn the variable into a
12177 filehandle
12178 */
79072805 12179 if (*d == '$') {
02aa26ce
NT
12180 /* try to find it in the pad for this block, otherwise find
12181 add symbol table ops
12182 */
f8f98e0a 12183 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12184 if (tmp != NOT_IN_PAD) {
00b1698f 12185 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12186 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12187 HEK * const stashname = HvNAME_HEK(stash);
12188 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12189 sv_catpvs(sym, "::");
f558d5af
JH
12190 sv_catpv(sym, d+1);
12191 d = SvPVX(sym);
12192 goto intro_sym;
12193 }
12194 else {
6136c704 12195 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12196 o->op_targ = tmp;
9b3023bc
RGS
12197 PL_lex_op = readline_overriden
12198 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12199 append_elem(OP_LIST, o,
12200 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12201 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12202 }
a0d0e21e
LW
12203 }
12204 else {
f558d5af
JH
12205 GV *gv;
12206 ++d;
12207intro_sym:
12208 gv = gv_fetchpv(d,
12209 (PL_in_eval
12210 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12211 : GV_ADDMULTI),
f558d5af 12212 SVt_PV);
9b3023bc
RGS
12213 PL_lex_op = readline_overriden
12214 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12215 append_elem(OP_LIST,
12216 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12217 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12218 : (OP*)newUNOP(OP_READLINE, 0,
12219 newUNOP(OP_RV2SV, 0,
12220 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12221 }
7c6fadd6
RGS
12222 if (!readline_overriden)
12223 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12224 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12225 pl_yylval.ival = OP_NULL;
79072805 12226 }
02aa26ce
NT
12227
12228 /* If it's none of the above, it must be a literal filehandle
12229 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12230 else {
6136c704 12231 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12232 PL_lex_op = readline_overriden
12233 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12234 append_elem(OP_LIST,
12235 newGVOP(OP_GV, 0, gv),
12236 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12237 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12238 pl_yylval.ival = OP_NULL;
79072805
LW
12239 }
12240 }
02aa26ce 12241
79072805
LW
12242 return s;
12243}
12244
02aa26ce
NT
12245
12246/* scan_str
12247 takes: start position in buffer
09bef843
SB
12248 keep_quoted preserve \ on the embedded delimiter(s)
12249 keep_delims preserve the delimiters around the string
02aa26ce
NT
12250 returns: position to continue reading from buffer
12251 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12252 updates the read buffer.
12253
12254 This subroutine pulls a string out of the input. It is called for:
12255 q single quotes q(literal text)
12256 ' single quotes 'literal text'
12257 qq double quotes qq(interpolate $here please)
12258 " double quotes "interpolate $here please"
12259 qx backticks qx(/bin/ls -l)
12260 ` backticks `/bin/ls -l`
12261 qw quote words @EXPORT_OK = qw( func() $spam )
12262 m// regexp match m/this/
12263 s/// regexp substitute s/this/that/
12264 tr/// string transliterate tr/this/that/
12265 y/// string transliterate y/this/that/
12266 ($*@) sub prototypes sub foo ($)
09bef843 12267 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12268 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12269
12270 In most of these cases (all but <>, patterns and transliterate)
12271 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12272 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12273 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12274 calls scan_str().
4e553d73 12275
02aa26ce
NT
12276 It skips whitespace before the string starts, and treats the first
12277 character as the delimiter. If the delimiter is one of ([{< then
12278 the corresponding "close" character )]}> is used as the closing
12279 delimiter. It allows quoting of delimiters, and if the string has
12280 balanced delimiters ([{<>}]) it allows nesting.
12281
37fd879b
HS
12282 On success, the SV with the resulting string is put into lex_stuff or,
12283 if that is already non-NULL, into lex_repl. The second case occurs only
12284 when parsing the RHS of the special constructs s/// and tr/// (y///).
12285 For convenience, the terminating delimiter character is stuffed into
12286 SvIVX of the SV.
02aa26ce
NT
12287*/
12288
76e3520e 12289STATIC char *
09bef843 12290S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12291{
97aff369 12292 dVAR;
02aa26ce 12293 SV *sv; /* scalar value: string */
d3fcec1f 12294 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12295 register char *s = start; /* current position in the buffer */
12296 register char term; /* terminating character */
12297 register char *to; /* current position in the sv's data */
12298 I32 brackets = 1; /* bracket nesting level */
89491803 12299 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12300 I32 termcode; /* terminating char. code */
89ebb4a3 12301 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12302 STRLEN termlen; /* length of terminating string */
0331ef07 12303 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12304#ifdef PERL_MAD
12305 int stuffstart;
12306 char *tstart;
12307#endif
02aa26ce 12308
7918f24d
NC
12309 PERL_ARGS_ASSERT_SCAN_STR;
12310
02aa26ce 12311 /* skip space before the delimiter */
29595ff2
NC
12312 if (isSPACE(*s)) {
12313 s = PEEKSPACE(s);
12314 }
02aa26ce 12315
5db06880 12316#ifdef PERL_MAD
cd81e915
NC
12317 if (PL_realtokenstart >= 0) {
12318 stuffstart = PL_realtokenstart;
12319 PL_realtokenstart = -1;
5db06880
NC
12320 }
12321 else
12322 stuffstart = start - SvPVX(PL_linestr);
12323#endif
02aa26ce 12324 /* mark where we are, in case we need to report errors */
79072805 12325 CLINE;
02aa26ce
NT
12326
12327 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12328 term = *s;
220e2d4e
IH
12329 if (!UTF) {
12330 termcode = termstr[0] = term;
12331 termlen = 1;
12332 }
12333 else {
f3b9ce0f 12334 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12335 Copy(s, termstr, termlen, U8);
12336 if (!UTF8_IS_INVARIANT(term))
12337 has_utf8 = TRUE;
12338 }
b1c7b182 12339
02aa26ce 12340 /* mark where we are */
57843af0 12341 PL_multi_start = CopLINE(PL_curcop);
3280af22 12342 PL_multi_open = term;
02aa26ce
NT
12343
12344 /* find corresponding closing delimiter */
93a17b20 12345 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12346 termcode = termstr[0] = term = tmps[5];
12347
3280af22 12348 PL_multi_close = term;
79072805 12349
561b68a9
SH
12350 /* create a new SV to hold the contents. 79 is the SV's initial length.
12351 What a random number. */
7d0a29fe
NC
12352 sv = newSV_type(SVt_PVIV);
12353 SvGROW(sv, 80);
45977657 12354 SvIV_set(sv, termcode);
a0d0e21e 12355 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12356
12357 /* move past delimiter and try to read a complete string */
09bef843 12358 if (keep_delims)
220e2d4e
IH
12359 sv_catpvn(sv, s, termlen);
12360 s += termlen;
5db06880
NC
12361#ifdef PERL_MAD
12362 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12363 if (!PL_thisopen && !keep_delims) {
12364 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12365 stuffstart = s - SvPVX(PL_linestr);
12366 }
12367#endif
93a17b20 12368 for (;;) {
220e2d4e
IH
12369 if (PL_encoding && !UTF) {
12370 bool cont = TRUE;
12371
12372 while (cont) {
95a20fc0 12373 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12374 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12375 &offset, (char*)termstr, termlen);
6136c704
AL
12376 const char * const ns = SvPVX_const(PL_linestr) + offset;
12377 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12378
12379 for (; s < ns; s++) {
12380 if (*s == '\n' && !PL_rsfp)
12381 CopLINE_inc(PL_curcop);
12382 }
12383 if (!found)
12384 goto read_more_line;
12385 else {
12386 /* handle quoted delimiters */
52327caf 12387 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12388 const char *t;
95a20fc0 12389 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12390 t--;
12391 if ((svlast-1 - t) % 2) {
12392 if (!keep_quoted) {
12393 *(svlast-1) = term;
12394 *svlast = '\0';
12395 SvCUR_set(sv, SvCUR(sv) - 1);
12396 }
12397 continue;
12398 }
12399 }
12400 if (PL_multi_open == PL_multi_close) {
12401 cont = FALSE;
12402 }
12403 else {
f54cb97a
AL
12404 const char *t;
12405 char *w;
0331ef07 12406 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12407 /* At here, all closes are "was quoted" one,
12408 so we don't check PL_multi_close. */
12409 if (*t == '\\') {
12410 if (!keep_quoted && *(t+1) == PL_multi_open)
12411 t++;
12412 else
12413 *w++ = *t++;
12414 }
12415 else if (*t == PL_multi_open)
12416 brackets++;
12417
12418 *w = *t;
12419 }
12420 if (w < t) {
12421 *w++ = term;
12422 *w = '\0';
95a20fc0 12423 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12424 }
0331ef07 12425 last_off = w - SvPVX(sv);
220e2d4e
IH
12426 if (--brackets <= 0)
12427 cont = FALSE;
12428 }
12429 }
12430 }
12431 if (!keep_delims) {
12432 SvCUR_set(sv, SvCUR(sv) - 1);
12433 *SvEND(sv) = '\0';
12434 }
12435 break;
12436 }
12437
02aa26ce 12438 /* extend sv if need be */
3280af22 12439 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12440 /* set 'to' to the next character in the sv's string */
463ee0b2 12441 to = SvPVX(sv)+SvCUR(sv);
09bef843 12442
02aa26ce 12443 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12444 if (PL_multi_open == PL_multi_close) {
12445 for (; s < PL_bufend; s++,to++) {
02aa26ce 12446 /* embedded newlines increment the current line number */
3280af22 12447 if (*s == '\n' && !PL_rsfp)
57843af0 12448 CopLINE_inc(PL_curcop);
02aa26ce 12449 /* handle quoted delimiters */
3280af22 12450 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12451 if (!keep_quoted && s[1] == term)
a0d0e21e 12452 s++;
02aa26ce 12453 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12454 else
12455 *to++ = *s++;
12456 }
02aa26ce
NT
12457 /* terminate when run out of buffer (the for() condition), or
12458 have found the terminator */
220e2d4e
IH
12459 else if (*s == term) {
12460 if (termlen == 1)
12461 break;
f3b9ce0f 12462 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12463 break;
12464 }
63cd0674 12465 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12466 has_utf8 = TRUE;
93a17b20
LW
12467 *to = *s;
12468 }
12469 }
02aa26ce
NT
12470
12471 /* if the terminator isn't the same as the start character (e.g.,
12472 matched brackets), we have to allow more in the quoting, and
12473 be prepared for nested brackets.
12474 */
93a17b20 12475 else {
02aa26ce 12476 /* read until we run out of string, or we find the terminator */
3280af22 12477 for (; s < PL_bufend; s++,to++) {
02aa26ce 12478 /* embedded newlines increment the line count */
3280af22 12479 if (*s == '\n' && !PL_rsfp)
57843af0 12480 CopLINE_inc(PL_curcop);
02aa26ce 12481 /* backslashes can escape the open or closing characters */
3280af22 12482 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12483 if (!keep_quoted &&
12484 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12485 s++;
12486 else
12487 *to++ = *s++;
12488 }
02aa26ce 12489 /* allow nested opens and closes */
3280af22 12490 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12491 break;
3280af22 12492 else if (*s == PL_multi_open)
93a17b20 12493 brackets++;
63cd0674 12494 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12495 has_utf8 = TRUE;
93a17b20
LW
12496 *to = *s;
12497 }
12498 }
02aa26ce 12499 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12500 *to = '\0';
95a20fc0 12501 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12502
02aa26ce
NT
12503 /*
12504 * this next chunk reads more into the buffer if we're not done yet
12505 */
12506
b1c7b182
GS
12507 if (s < PL_bufend)
12508 break; /* handle case where we are done yet :-) */
79072805 12509
6a27c188 12510#ifndef PERL_STRICT_CR
95a20fc0 12511 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12512 if ((to[-2] == '\r' && to[-1] == '\n') ||
12513 (to[-2] == '\n' && to[-1] == '\r'))
12514 {
f63a84b2
LW
12515 to[-2] = '\n';
12516 to--;
95a20fc0 12517 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12518 }
12519 else if (to[-1] == '\r')
12520 to[-1] = '\n';
12521 }
95a20fc0 12522 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12523 to[-1] = '\n';
12524#endif
12525
220e2d4e 12526 read_more_line:
02aa26ce
NT
12527 /* if we're out of file, or a read fails, bail and reset the current
12528 line marker so we can report where the unterminated string began
12529 */
5db06880
NC
12530#ifdef PERL_MAD
12531 if (PL_madskills) {
c35e046a 12532 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12533 if (PL_thisstuff)
12534 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12535 else
cd81e915 12536 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12537 }
12538#endif
f0e67a1d
Z
12539 CopLINE_inc(PL_curcop);
12540 PL_bufptr = PL_bufend;
12541 if (!lex_next_chunk(0)) {
c07a80fd 12542 sv_free(sv);
eb160463 12543 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12544 return NULL;
79072805 12545 }
f0e67a1d 12546 s = PL_bufptr;
5db06880
NC
12547#ifdef PERL_MAD
12548 stuffstart = 0;
12549#endif
378cc40b 12550 }
4e553d73 12551
02aa26ce
NT
12552 /* at this point, we have successfully read the delimited string */
12553
220e2d4e 12554 if (!PL_encoding || UTF) {
5db06880
NC
12555#ifdef PERL_MAD
12556 if (PL_madskills) {
c35e046a 12557 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12558 const int len = s - tstart;
cd81e915 12559 if (PL_thisstuff)
c35e046a 12560 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12561 else
c35e046a 12562 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12563 if (!PL_thisclose && !keep_delims)
12564 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12565 }
12566#endif
12567
220e2d4e
IH
12568 if (keep_delims)
12569 sv_catpvn(sv, s, termlen);
12570 s += termlen;
12571 }
5db06880
NC
12572#ifdef PERL_MAD
12573 else {
12574 if (PL_madskills) {
c35e046a
AL
12575 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12576 const int len = s - tstart - termlen;
cd81e915 12577 if (PL_thisstuff)
c35e046a 12578 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12579 else
c35e046a 12580 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12581 if (!PL_thisclose && !keep_delims)
12582 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12583 }
12584 }
12585#endif
220e2d4e 12586 if (has_utf8 || PL_encoding)
b1c7b182 12587 SvUTF8_on(sv);
d0063567 12588
57843af0 12589 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12590
12591 /* if we allocated too much space, give some back */
93a17b20
LW
12592 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12593 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12594 SvPV_renew(sv, SvLEN(sv));
79072805 12595 }
02aa26ce
NT
12596
12597 /* decide whether this is the first or second quoted string we've read
12598 for this op
12599 */
4e553d73 12600
3280af22
NIS
12601 if (PL_lex_stuff)
12602 PL_lex_repl = sv;
79072805 12603 else
3280af22 12604 PL_lex_stuff = sv;
378cc40b
LW
12605 return s;
12606}
12607
02aa26ce
NT
12608/*
12609 scan_num
12610 takes: pointer to position in buffer
12611 returns: pointer to new position in buffer
6154021b 12612 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12613
12614 Read a number in any of the formats that Perl accepts:
12615
7fd134d9
JH
12616 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12617 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12618 0b[01](_?[01])*
12619 0[0-7](_?[0-7])*
12620 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12621
3280af22 12622 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12623 thing it reads.
12624
12625 If it reads a number without a decimal point or an exponent, it will
12626 try converting the number to an integer and see if it can do so
12627 without loss of precision.
12628*/
4e553d73 12629
378cc40b 12630char *
bfed75c6 12631Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12632{
97aff369 12633 dVAR;
bfed75c6 12634 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12635 register char *d; /* destination in temp buffer */
12636 register char *e; /* end of temp buffer */
86554af2 12637 NV nv; /* number read, as a double */
a0714e2c 12638 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12639 bool floatit; /* boolean: int or float? */
cbbf8932 12640 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12641 static char const number_too_long[] = "Number too long";
378cc40b 12642
7918f24d
NC
12643 PERL_ARGS_ASSERT_SCAN_NUM;
12644
02aa26ce
NT
12645 /* We use the first character to decide what type of number this is */
12646
378cc40b 12647 switch (*s) {
79072805 12648 default:
cea2e8a9 12649 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12650
02aa26ce 12651 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12652 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12653 case '0':
12654 {
02aa26ce
NT
12655 /* variables:
12656 u holds the "number so far"
4f19785b
WSI
12657 shift the power of 2 of the base
12658 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12659 overflowed was the number more than we can hold?
12660
12661 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12662 we in octal/hex/binary?" indicator to disallow hex characters
12663 when in octal mode.
02aa26ce 12664 */
9e24b6e2
JH
12665 NV n = 0.0;
12666 UV u = 0;
79072805 12667 I32 shift;
9e24b6e2 12668 bool overflowed = FALSE;
61f33854 12669 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12670 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12671 static const char* const bases[5] =
12672 { "", "binary", "", "octal", "hexadecimal" };
12673 static const char* const Bases[5] =
12674 { "", "Binary", "", "Octal", "Hexadecimal" };
12675 static const char* const maxima[5] =
12676 { "",
12677 "0b11111111111111111111111111111111",
12678 "",
12679 "037777777777",
12680 "0xffffffff" };
bfed75c6 12681 const char *base, *Base, *max;
378cc40b 12682
02aa26ce 12683 /* check for hex */
378cc40b
LW
12684 if (s[1] == 'x') {
12685 shift = 4;
12686 s += 2;
61f33854 12687 just_zero = FALSE;
4f19785b
WSI
12688 } else if (s[1] == 'b') {
12689 shift = 1;
12690 s += 2;
61f33854 12691 just_zero = FALSE;
378cc40b 12692 }
02aa26ce 12693 /* check for a decimal in disguise */
b78218b7 12694 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 12695 goto decimal;
02aa26ce 12696 /* so it must be octal */
928753ea 12697 else {
378cc40b 12698 shift = 3;
928753ea
JH
12699 s++;
12700 }
12701
12702 if (*s == '_') {
a2a5de95 12703 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
12704 "Misplaced _ in number");
12705 lastub = s++;
12706 }
9e24b6e2
JH
12707
12708 base = bases[shift];
12709 Base = Bases[shift];
12710 max = maxima[shift];
02aa26ce 12711
4f19785b 12712 /* read the rest of the number */
378cc40b 12713 for (;;) {
9e24b6e2 12714 /* x is used in the overflow test,
893fe2c2 12715 b is the digit we're adding on. */
9e24b6e2 12716 UV x, b;
55497cff 12717
378cc40b 12718 switch (*s) {
02aa26ce
NT
12719
12720 /* if we don't mention it, we're done */
378cc40b
LW
12721 default:
12722 goto out;
02aa26ce 12723
928753ea 12724 /* _ are ignored -- but warned about if consecutive */
de3bb511 12725 case '_':
a2a5de95
NC
12726 if (lastub && s == lastub + 1)
12727 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12728 "Misplaced _ in number");
928753ea 12729 lastub = s++;
de3bb511 12730 break;
02aa26ce
NT
12731
12732 /* 8 and 9 are not octal */
378cc40b 12733 case '8': case '9':
4f19785b 12734 if (shift == 3)
cea2e8a9 12735 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 12736 /* FALL THROUGH */
02aa26ce
NT
12737
12738 /* octal digits */
4f19785b 12739 case '2': case '3': case '4':
378cc40b 12740 case '5': case '6': case '7':
4f19785b 12741 if (shift == 1)
cea2e8a9 12742 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
12743 /* FALL THROUGH */
12744
12745 case '0': case '1':
02aa26ce 12746 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 12747 goto digit;
02aa26ce
NT
12748
12749 /* hex digits */
378cc40b
LW
12750 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
12751 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 12752 /* make sure they said 0x */
378cc40b
LW
12753 if (shift != 4)
12754 goto out;
55497cff 12755 b = (*s++ & 7) + 9;
02aa26ce
NT
12756
12757 /* Prepare to put the digit we have onto the end
12758 of the number so far. We check for overflows.
12759 */
12760
55497cff 12761 digit:
61f33854 12762 just_zero = FALSE;
9e24b6e2
JH
12763 if (!overflowed) {
12764 x = u << shift; /* make room for the digit */
12765
12766 if ((x >> shift) != u
12767 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
12768 overflowed = TRUE;
12769 n = (NV) u;
9b387841
NC
12770 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
12771 "Integer overflow in %s number",
12772 base);
9e24b6e2
JH
12773 } else
12774 u = x | b; /* add the digit to the end */
12775 }
12776 if (overflowed) {
12777 n *= nvshift[shift];
12778 /* If an NV has not enough bits in its
12779 * mantissa to represent an UV this summing of
12780 * small low-order numbers is a waste of time
12781 * (because the NV cannot preserve the
12782 * low-order bits anyway): we could just
12783 * remember when did we overflow and in the
12784 * end just multiply n by the right
12785 * amount. */
12786 n += (NV) b;
55497cff 12787 }
378cc40b
LW
12788 break;
12789 }
12790 }
02aa26ce
NT
12791
12792 /* if we get here, we had success: make a scalar value from
12793 the number.
12794 */
378cc40b 12795 out:
928753ea
JH
12796
12797 /* final misplaced underbar check */
12798 if (s[-1] == '_') {
a2a5de95 12799 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
12800 }
12801
561b68a9 12802 sv = newSV(0);
9e24b6e2 12803 if (overflowed) {
a2a5de95
NC
12804 if (n > 4294967295.0)
12805 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12806 "%s number > %s non-portable",
12807 Base, max);
9e24b6e2
JH
12808 sv_setnv(sv, n);
12809 }
12810 else {
15041a67 12811#if UVSIZE > 4
a2a5de95
NC
12812 if (u > 0xffffffff)
12813 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
12814 "%s number > %s non-portable",
12815 Base, max);
2cc4c2dc 12816#endif
9e24b6e2
JH
12817 sv_setuv(sv, u);
12818 }
61f33854 12819 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 12820 sv = new_constant(start, s - start, "integer",
eb0d8d16 12821 sv, NULL, NULL, 0);
61f33854 12822 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 12823 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
12824 }
12825 break;
02aa26ce
NT
12826
12827 /*
12828 handle decimal numbers.
12829 we're also sent here when we read a 0 as the first digit
12830 */
378cc40b
LW
12831 case '1': case '2': case '3': case '4': case '5':
12832 case '6': case '7': case '8': case '9': case '.':
12833 decimal:
3280af22
NIS
12834 d = PL_tokenbuf;
12835 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 12836 floatit = FALSE;
02aa26ce
NT
12837
12838 /* read next group of digits and _ and copy into d */
de3bb511 12839 while (isDIGIT(*s) || *s == '_') {
4e553d73 12840 /* skip underscores, checking for misplaced ones
02aa26ce
NT
12841 if -w is on
12842 */
93a17b20 12843 if (*s == '_') {
a2a5de95
NC
12844 if (lastub && s == lastub + 1)
12845 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12846 "Misplaced _ in number");
928753ea 12847 lastub = s++;
93a17b20 12848 }
fc36a67e 12849 else {
02aa26ce 12850 /* check for end of fixed-length buffer */
fc36a67e 12851 if (d >= e)
cea2e8a9 12852 Perl_croak(aTHX_ number_too_long);
02aa26ce 12853 /* if we're ok, copy the character */
378cc40b 12854 *d++ = *s++;
fc36a67e 12855 }
378cc40b 12856 }
02aa26ce
NT
12857
12858 /* final misplaced underbar check */
928753ea 12859 if (lastub && s == lastub + 1) {
a2a5de95 12860 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 12861 }
02aa26ce
NT
12862
12863 /* read a decimal portion if there is one. avoid
12864 3..5 being interpreted as the number 3. followed
12865 by .5
12866 */
2f3197b3 12867 if (*s == '.' && s[1] != '.') {
79072805 12868 floatit = TRUE;
378cc40b 12869 *d++ = *s++;
02aa26ce 12870
928753ea 12871 if (*s == '_') {
a2a5de95
NC
12872 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12873 "Misplaced _ in number");
928753ea
JH
12874 lastub = s;
12875 }
12876
12877 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 12878 */
fc36a67e 12879 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 12880 /* fixed length buffer check */
fc36a67e 12881 if (d >= e)
cea2e8a9 12882 Perl_croak(aTHX_ number_too_long);
928753ea 12883 if (*s == '_') {
a2a5de95
NC
12884 if (lastub && s == lastub + 1)
12885 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12886 "Misplaced _ in number");
928753ea
JH
12887 lastub = s;
12888 }
12889 else
fc36a67e 12890 *d++ = *s;
378cc40b 12891 }
928753ea
JH
12892 /* fractional part ending in underbar? */
12893 if (s[-1] == '_') {
a2a5de95
NC
12894 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12895 "Misplaced _ in number");
928753ea 12896 }
dd629d5b
GS
12897 if (*s == '.' && isDIGIT(s[1])) {
12898 /* oops, it's really a v-string, but without the "v" */
f4758303 12899 s = start;
dd629d5b
GS
12900 goto vstring;
12901 }
378cc40b 12902 }
02aa26ce
NT
12903
12904 /* read exponent part, if present */
3792a11b 12905 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
12906 floatit = TRUE;
12907 s++;
02aa26ce
NT
12908
12909 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 12910 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 12911
7fd134d9
JH
12912 /* stray preinitial _ */
12913 if (*s == '_') {
a2a5de95
NC
12914 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12915 "Misplaced _ in number");
7fd134d9
JH
12916 lastub = s++;
12917 }
12918
02aa26ce 12919 /* allow positive or negative exponent */
378cc40b
LW
12920 if (*s == '+' || *s == '-')
12921 *d++ = *s++;
02aa26ce 12922
7fd134d9
JH
12923 /* stray initial _ */
12924 if (*s == '_') {
a2a5de95
NC
12925 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12926 "Misplaced _ in number");
7fd134d9
JH
12927 lastub = s++;
12928 }
12929
7fd134d9
JH
12930 /* read digits of exponent */
12931 while (isDIGIT(*s) || *s == '_') {
12932 if (isDIGIT(*s)) {
12933 if (d >= e)
12934 Perl_croak(aTHX_ number_too_long);
b3b48e3e 12935 *d++ = *s++;
7fd134d9
JH
12936 }
12937 else {
041457d9 12938 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
12939 (!isDIGIT(s[1]) && s[1] != '_')))
12940 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
12941 "Misplaced _ in number");
b3b48e3e 12942 lastub = s++;
7fd134d9 12943 }
7fd134d9 12944 }
378cc40b 12945 }
02aa26ce 12946
02aa26ce
NT
12947
12948 /* make an sv from the string */
561b68a9 12949 sv = newSV(0);
097ee67d 12950
0b7fceb9 12951 /*
58bb9ec3
NC
12952 We try to do an integer conversion first if no characters
12953 indicating "float" have been found.
0b7fceb9
MU
12954 */
12955
12956 if (!floatit) {
58bb9ec3 12957 UV uv;
6136c704 12958 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
12959
12960 if (flags == IS_NUMBER_IN_UV) {
12961 if (uv <= IV_MAX)
86554af2 12962 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 12963 else
c239479b 12964 sv_setuv(sv, uv);
58bb9ec3
NC
12965 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
12966 if (uv <= (UV) IV_MIN)
12967 sv_setiv(sv, -(IV)uv);
12968 else
12969 floatit = TRUE;
12970 } else
12971 floatit = TRUE;
12972 }
0b7fceb9 12973 if (floatit) {
58bb9ec3
NC
12974 /* terminate the string */
12975 *d = '\0';
86554af2
JH
12976 nv = Atof(PL_tokenbuf);
12977 sv_setnv(sv, nv);
12978 }
86554af2 12979
eb0d8d16
NC
12980 if ( floatit
12981 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
12982 const char *const key = floatit ? "float" : "integer";
12983 const STRLEN keylen = floatit ? 5 : 7;
12984 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
12985 key, keylen, sv, NULL, NULL, 0);
12986 }
378cc40b 12987 break;
0b7fceb9 12988
e312add1 12989 /* if it starts with a v, it could be a v-string */
a7cb1f99 12990 case 'v':
dd629d5b 12991vstring:
561b68a9 12992 sv = newSV(5); /* preallocate storage space */
65b06e02 12993 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 12994 break;
79072805 12995 }
a687059c 12996
02aa26ce
NT
12997 /* make the op for the constant and return */
12998
a86a20aa 12999 if (sv)
b73d6f50 13000 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13001 else
5f66b61c 13002 lvalp->opval = NULL;
a687059c 13003
73d840c0 13004 return (char *)s;
378cc40b
LW
13005}
13006
76e3520e 13007STATIC char *
cea2e8a9 13008S_scan_formline(pTHX_ register char *s)
378cc40b 13009{
97aff369 13010 dVAR;
79072805 13011 register char *eol;
378cc40b 13012 register char *t;
6136c704 13013 SV * const stuff = newSVpvs("");
79072805 13014 bool needargs = FALSE;
c5ee2135 13015 bool eofmt = FALSE;
5db06880
NC
13016#ifdef PERL_MAD
13017 char *tokenstart = s;
4f61fd4b
JC
13018 SV* savewhite = NULL;
13019
5db06880 13020 if (PL_madskills) {
cd81e915
NC
13021 savewhite = PL_thiswhite;
13022 PL_thiswhite = 0;
5db06880
NC
13023 }
13024#endif
378cc40b 13025
7918f24d
NC
13026 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13027
79072805 13028 while (!needargs) {
a1b95068 13029 if (*s == '.') {
c35e046a 13030 t = s+1;
51882d45 13031#ifdef PERL_STRICT_CR
c35e046a
AL
13032 while (SPACE_OR_TAB(*t))
13033 t++;
51882d45 13034#else
c35e046a
AL
13035 while (SPACE_OR_TAB(*t) || *t == '\r')
13036 t++;
51882d45 13037#endif
c5ee2135
WL
13038 if (*t == '\n' || t == PL_bufend) {
13039 eofmt = TRUE;
79072805 13040 break;
c5ee2135 13041 }
79072805 13042 }
3280af22 13043 if (PL_in_eval && !PL_rsfp) {
07409e01 13044 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13045 if (!eol++)
3280af22 13046 eol = PL_bufend;
0f85fab0
LW
13047 }
13048 else
3280af22 13049 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13050 if (*s != '#') {
a0d0e21e
LW
13051 for (t = s; t < eol; t++) {
13052 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13053 needargs = FALSE;
13054 goto enough; /* ~~ must be first line in formline */
378cc40b 13055 }
a0d0e21e
LW
13056 if (*t == '@' || *t == '^')
13057 needargs = TRUE;
378cc40b 13058 }
7121b347
MG
13059 if (eol > s) {
13060 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13061#ifndef PERL_STRICT_CR
7121b347
MG
13062 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13063 char *end = SvPVX(stuff) + SvCUR(stuff);
13064 end[-2] = '\n';
13065 end[-1] = '\0';
b162af07 13066 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13067 }
2dc4c65b 13068#endif
7121b347
MG
13069 }
13070 else
13071 break;
79072805 13072 }
95a20fc0 13073 s = (char*)eol;
3280af22 13074 if (PL_rsfp) {
f0e67a1d 13075 bool got_some;
5db06880
NC
13076#ifdef PERL_MAD
13077 if (PL_madskills) {
cd81e915
NC
13078 if (PL_thistoken)
13079 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13080 else
cd81e915 13081 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13082 }
13083#endif
f0e67a1d
Z
13084 PL_bufptr = PL_bufend;
13085 CopLINE_inc(PL_curcop);
13086 got_some = lex_next_chunk(0);
13087 CopLINE_dec(PL_curcop);
13088 s = PL_bufptr;
5db06880 13089#ifdef PERL_MAD
f0e67a1d 13090 tokenstart = PL_bufptr;
5db06880 13091#endif
f0e67a1d 13092 if (!got_some)
378cc40b 13093 break;
378cc40b 13094 }
463ee0b2 13095 incline(s);
79072805 13096 }
a0d0e21e
LW
13097 enough:
13098 if (SvCUR(stuff)) {
3280af22 13099 PL_expect = XTERM;
79072805 13100 if (needargs) {
3280af22 13101 PL_lex_state = LEX_NORMAL;
cd81e915 13102 start_force(PL_curforce);
9ded7720 13103 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13104 force_next(',');
13105 }
a0d0e21e 13106 else
3280af22 13107 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13108 if (!IN_BYTES) {
95a20fc0 13109 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13110 SvUTF8_on(stuff);
13111 else if (PL_encoding)
13112 sv_recode_to_utf8(stuff, PL_encoding);
13113 }
cd81e915 13114 start_force(PL_curforce);
9ded7720 13115 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13116 force_next(THING);
cd81e915 13117 start_force(PL_curforce);
9ded7720 13118 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13119 force_next(LSTOP);
378cc40b 13120 }
79072805 13121 else {
8990e307 13122 SvREFCNT_dec(stuff);
c5ee2135
WL
13123 if (eofmt)
13124 PL_lex_formbrack = 0;
3280af22 13125 PL_bufptr = s;
79072805 13126 }
5db06880
NC
13127#ifdef PERL_MAD
13128 if (PL_madskills) {
cd81e915
NC
13129 if (PL_thistoken)
13130 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13131 else
cd81e915
NC
13132 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13133 PL_thiswhite = savewhite;
5db06880
NC
13134 }
13135#endif
79072805 13136 return s;
378cc40b 13137}
a687059c 13138
ba6d6ac9 13139I32
864dbfa3 13140Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13141{
97aff369 13142 dVAR;
a3b680e6 13143 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13144 CV* const outsidecv = PL_compcv;
8990e307 13145
3280af22
NIS
13146 if (PL_compcv) {
13147 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13148 }
7766f137 13149 SAVEI32(PL_subline);
3280af22 13150 save_item(PL_subname);
3280af22 13151 SAVESPTR(PL_compcv);
3280af22 13152
ea726b52 13153 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13154 CvFLAGS(PL_compcv) |= flags;
13155
57843af0 13156 PL_subline = CopLINE(PL_curcop);
dd2155a4 13157 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13158 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13159 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13160
8990e307
LW
13161 return oldsavestack_ix;
13162}
13163
084592ab
CN
13164#ifdef __SC__
13165#pragma segment Perl_yylex
13166#endif
af41e527
NC
13167static int
13168S_yywarn(pTHX_ const char *const s)
8990e307 13169{
97aff369 13170 dVAR;
7918f24d
NC
13171
13172 PERL_ARGS_ASSERT_YYWARN;
13173
faef0170 13174 PL_in_eval |= EVAL_WARNONLY;
748a9306 13175 yyerror(s);
faef0170 13176 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13177 return 0;
8990e307
LW
13178}
13179
13180int
15f169a1 13181Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13182{
97aff369 13183 dVAR;
bfed75c6
AL
13184 const char *where = NULL;
13185 const char *context = NULL;
68dc0745 13186 int contlen = -1;
46fc3d4c 13187 SV *msg;
5912531f 13188 int yychar = PL_parser->yychar;
463ee0b2 13189
7918f24d
NC
13190 PERL_ARGS_ASSERT_YYERROR;
13191
3280af22 13192 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13193 where = "at EOF";
8bcfe651
TM
13194 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13195 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13196 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13197 /*
13198 Only for NetWare:
13199 The code below is removed for NetWare because it abends/crashes on NetWare
13200 when the script has error such as not having the closing quotes like:
13201 if ($var eq "value)
13202 Checking of white spaces is anyway done in NetWare code.
13203 */
13204#ifndef NETWARE
3280af22
NIS
13205 while (isSPACE(*PL_oldoldbufptr))
13206 PL_oldoldbufptr++;
f355267c 13207#endif
3280af22
NIS
13208 context = PL_oldoldbufptr;
13209 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13210 }
8bcfe651
TM
13211 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13212 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13213 /*
13214 Only for NetWare:
13215 The code below is removed for NetWare because it abends/crashes on NetWare
13216 when the script has error such as not having the closing quotes like:
13217 if ($var eq "value)
13218 Checking of white spaces is anyway done in NetWare code.
13219 */
13220#ifndef NETWARE
3280af22
NIS
13221 while (isSPACE(*PL_oldbufptr))
13222 PL_oldbufptr++;
f355267c 13223#endif
3280af22
NIS
13224 context = PL_oldbufptr;
13225 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13226 }
13227 else if (yychar > 255)
68dc0745 13228 where = "next token ???";
12fbd33b 13229 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13230 if (PL_lex_state == LEX_NORMAL ||
13231 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13232 where = "at end of line";
3280af22 13233 else if (PL_lex_inpat)
68dc0745 13234 where = "within pattern";
463ee0b2 13235 else
68dc0745 13236 where = "within string";
463ee0b2 13237 }
46fc3d4c 13238 else {
84bafc02 13239 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13240 if (yychar < 32)
cea2e8a9 13241 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13242 else if (isPRINT_LC(yychar)) {
88c9ea1e 13243 const char string = yychar;
5e7aa789
NC
13244 sv_catpvn(where_sv, &string, 1);
13245 }
463ee0b2 13246 else
cea2e8a9 13247 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13248 where = SvPVX_const(where_sv);
463ee0b2 13249 }
46fc3d4c 13250 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13251 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13252 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13253 if (context)
cea2e8a9 13254 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13255 else
cea2e8a9 13256 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13257 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13258 Perl_sv_catpvf(aTHX_ msg,
57def98f 13259 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13260 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13261 PL_multi_end = 0;
a0d0e21e 13262 }
500960a6 13263 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13264 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13265 }
463ee0b2 13266 else
5a844595 13267 qerror(msg);
c7d6bfb2
GS
13268 if (PL_error_count >= 10) {
13269 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13270 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13271 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13272 else
13273 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13274 OutCopFILE(PL_curcop));
c7d6bfb2 13275 }
3280af22 13276 PL_in_my = 0;
5c284bb0 13277 PL_in_my_stash = NULL;
463ee0b2
LW
13278 return 0;
13279}
084592ab
CN
13280#ifdef __SC__
13281#pragma segment Main
13282#endif
4e35701f 13283
b250498f 13284STATIC char*
3ae08724 13285S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13286{
97aff369 13287 dVAR;
f54cb97a 13288 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13289
13290 PERL_ARGS_ASSERT_SWALLOW_BOM;
13291
7aa207d6 13292 switch (s[0]) {
4e553d73
NIS
13293 case 0xFF:
13294 if (s[1] == 0xFE) {
ee6ba15d 13295 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13296 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13297 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13298#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13299 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13300 s += 2;
dea0fc0b 13301 if (PL_bufend > (char*)s) {
81a923f4 13302 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13303 }
b250498f 13304#else
ee6ba15d 13305 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13306#endif
01ec43d0
GS
13307 }
13308 break;
78ae23f5 13309 case 0xFE:
7aa207d6 13310 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13311#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13312 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13313 s += 2;
13314 if (PL_bufend > (char *)s) {
81a923f4 13315 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13316 }
b250498f 13317#else
ee6ba15d 13318 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13319#endif
01ec43d0
GS
13320 }
13321 break;
3ae08724
GS
13322 case 0xEF:
13323 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13324 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13325 s += 3; /* UTF-8 */
13326 }
13327 break;
13328 case 0:
7aa207d6
JH
13329 if (slen > 3) {
13330 if (s[1] == 0) {
13331 if (s[2] == 0xFE && s[3] == 0xFF) {
13332 /* UTF-32 big-endian */
ee6ba15d 13333 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13334 }
13335 }
13336 else if (s[2] == 0 && s[3] != 0) {
13337 /* Leading bytes
13338 * 00 xx 00 xx
13339 * are a good indicator of UTF-16BE. */
ee6ba15d 13340#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13341 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13342 s = add_utf16_textfilter(s, FALSE);
13343#else
13344 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13345#endif
7aa207d6 13346 }
01ec43d0 13347 }
e294cc5d
JH
13348#ifdef EBCDIC
13349 case 0xDD:
13350 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13351 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13352 s += 4; /* UTF-8 */
13353 }
13354 break;
13355#endif
13356
7aa207d6
JH
13357 default:
13358 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13359 /* Leading bytes
13360 * xx 00 xx 00
13361 * are a good indicator of UTF-16LE. */
ee6ba15d 13362#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13363 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13364 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13365#else
13366 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13367#endif
7aa207d6 13368 }
01ec43d0 13369 }
b8f84bb2 13370 return (char*)s;
b250498f 13371}
4755096e 13372
6e3aabd6
GS
13373
13374#ifndef PERL_NO_UTF16_FILTER
13375static I32
a28af015 13376S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13377{
97aff369 13378 dVAR;
f3040f2c 13379 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13380 /* We re-use this each time round, throwing the contents away before we
13381 return. */
2a773401 13382 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13383 SV *const utf8_buffer = filter;
c28d6105 13384 IV status = IoPAGE(filter);
eda4663d 13385 const bool reverse = (bool) IoLINES(filter);
d2d1d4de 13386 I32 retval;
c8b0cbae
NC
13387
13388 /* As we're automatically added, at the lowest level, and hence only called
13389 from this file, we can be sure that we're not called in block mode. Hence
13390 don't bother writing code to deal with block mode. */
13391 if (maxlen) {
13392 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13393 }
c28d6105
NC
13394 if (status < 0) {
13395 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13396 }
1de9afcd 13397 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13398 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13399 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13400 reverse ? 'l' : 'b', idx, maxlen, status,
13401 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13402
13403 while (1) {
13404 STRLEN chars;
13405 STRLEN have;
dea0fc0b 13406 I32 newlen;
2a773401 13407 U8 *end;
c28d6105
NC
13408 /* First, look in our buffer of existing UTF-8 data: */
13409 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13410
13411 if (nl) {
13412 ++nl;
13413 } else if (status == 0) {
13414 /* EOF */
13415 IoPAGE(filter) = 0;
13416 nl = SvEND(utf8_buffer);
13417 }
13418 if (nl) {
d2d1d4de
NC
13419 STRLEN got = nl - SvPVX(utf8_buffer);
13420 /* Did we have anything to append? */
13421 retval = got != 0;
13422 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13423 /* Everything else in this code works just fine if SVp_POK isn't
13424 set. This, however, needs it, and we need it to work, else
13425 we loop infinitely because the buffer is never consumed. */
13426 sv_chop(utf8_buffer, nl);
13427 break;
13428 }
ba77e4cc 13429
c28d6105
NC
13430 /* OK, not a complete line there, so need to read some more UTF-16.
13431 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13432 while (1) {
13433 if (status <= 0)
13434 break;
13435 if (SvCUR(utf16_buffer) >= 2) {
13436 /* Location of the high octet of the last complete code point.
13437 Gosh, UTF-16 is a pain. All the benefits of variable length,
13438 *coupled* with all the benefits of partial reads and
13439 endianness. */
13440 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13441 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13442
13443 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13444 break;
13445 }
13446
13447 /* We have the first half of a surrogate. Read more. */
13448 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13449 }
c28d6105 13450
c28d6105
NC
13451 status = FILTER_READ(idx + 1, utf16_buffer,
13452 160 + (SvCUR(utf16_buffer) & 1));
13453 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13454 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13455 if (status < 0) {
13456 /* Error */
13457 IoPAGE(filter) = status;
13458 return status;
13459 }
13460 }
13461
13462 chars = SvCUR(utf16_buffer) >> 1;
13463 have = SvCUR(utf8_buffer);
13464 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13465
aa6dbd60 13466 if (reverse) {
c28d6105
NC
13467 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13468 (U8*)SvPVX_const(utf8_buffer) + have,
13469 chars * 2, &newlen);
aa6dbd60 13470 } else {
2a773401 13471 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13472 (U8*)SvPVX_const(utf8_buffer) + have,
13473 chars * 2, &newlen);
2a773401 13474 }
c28d6105 13475 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13476 *end = '\0';
c28d6105 13477
e07286ed
NC
13478 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13479 it's private to us, and utf16_to_utf8{,reversed} take a
13480 (pointer,length) pair, rather than a NUL-terminated string. */
13481 if(SvCUR(utf16_buffer) & 1) {
13482 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13483 SvCUR_set(utf16_buffer, 1);
13484 } else {
13485 SvCUR_set(utf16_buffer, 0);
13486 }
2a773401 13487 }
c28d6105
NC
13488 DEBUG_P(PerlIO_printf(Perl_debug_log,
13489 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13490 status,
13491 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13492 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13493 return retval;
6e3aabd6 13494}
81a923f4
NC
13495
13496static U8 *
13497S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13498{
2a773401 13499 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13500
c28d6105 13501 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13502 sv_setpvs(filter, "");
2a773401 13503 IoLINES(filter) = reversed;
c28d6105
NC
13504 IoPAGE(filter) = 1; /* Not EOF */
13505
13506 /* Sadly, we have to return a valid pointer, come what may, so we have to
13507 ignore any error return from this. */
13508 SvCUR_set(PL_linestr, 0);
13509 if (FILTER_READ(0, PL_linestr, 0)) {
13510 SvUTF8_on(PL_linestr);
81a923f4 13511 } else {
c28d6105 13512 SvUTF8_on(PL_linestr);
81a923f4 13513 }
c28d6105 13514 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13515 return (U8*)SvPVX(PL_linestr);
13516}
6e3aabd6 13517#endif
9f4817db 13518
f333445c
JP
13519/*
13520Returns a pointer to the next character after the parsed
13521vstring, as well as updating the passed in sv.
13522
13523Function must be called like
13524
561b68a9 13525 sv = newSV(5);
65b06e02 13526 s = scan_vstring(s,e,sv);
f333445c 13527
65b06e02 13528where s and e are the start and end of the string.
f333445c
JP
13529The sv should already be large enough to store the vstring
13530passed in, for performance reasons.
13531
13532*/
13533
13534char *
15f169a1 13535Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13536{
97aff369 13537 dVAR;
bfed75c6
AL
13538 const char *pos = s;
13539 const char *start = s;
7918f24d
NC
13540
13541 PERL_ARGS_ASSERT_SCAN_VSTRING;
13542
f333445c 13543 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13544 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13545 pos++;
f333445c
JP
13546 if ( *pos != '.') {
13547 /* this may not be a v-string if followed by => */
bfed75c6 13548 const char *next = pos;
65b06e02 13549 while (next < e && isSPACE(*next))
8fc7bb1c 13550 ++next;
65b06e02 13551 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13552 /* return string not v-string */
13553 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13554 return (char *)pos;
f333445c
JP
13555 }
13556 }
13557
13558 if (!isALPHA(*pos)) {
89ebb4a3 13559 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13560
d4c19fe8
AL
13561 if (*s == 'v')
13562 s++; /* get past 'v' */
f333445c 13563
76f68e9b 13564 sv_setpvs(sv, "");
f333445c
JP
13565
13566 for (;;) {
d4c19fe8 13567 /* this is atoi() that tolerates underscores */
0bd48802
AL
13568 U8 *tmpend;
13569 UV rev = 0;
d4c19fe8
AL
13570 const char *end = pos;
13571 UV mult = 1;
13572 while (--end >= s) {
13573 if (*end != '_') {
13574 const UV orev = rev;
f333445c
JP
13575 rev += (*end - '0') * mult;
13576 mult *= 10;
9b387841
NC
13577 if (orev > rev)
13578 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13579 "Integer overflow in decimal number");
f333445c
JP
13580 }
13581 }
13582#ifdef EBCDIC
13583 if (rev > 0x7FFFFFFF)
13584 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13585#endif
13586 /* Append native character for the rev point */
13587 tmpend = uvchr_to_utf8(tmpbuf, rev);
13588 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13589 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13590 SvUTF8_on(sv);
65b06e02 13591 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13592 s = ++pos;
13593 else {
13594 s = pos;
13595 break;
13596 }
65b06e02 13597 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13598 pos++;
13599 }
13600 SvPOK_on(sv);
13601 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13602 SvRMAGICAL_on(sv);
13603 }
73d840c0 13604 return (char *)s;
f333445c
JP
13605}
13606
88e1f1a2
JV
13607int
13608Perl_keyword_plugin_standard(pTHX_
13609 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13610{
13611 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13612 PERL_UNUSED_CONTEXT;
13613 PERL_UNUSED_ARG(keyword_ptr);
13614 PERL_UNUSED_ARG(keyword_len);
13615 PERL_UNUSED_ARG(op_ptr);
13616 return KEYWORD_PLUGIN_DECLINE;
13617}
13618
1da4ca5f
NC
13619/*
13620 * Local variables:
13621 * c-indentation-style: bsd
13622 * c-basic-offset: 4
13623 * indent-tabs-mode: t
13624 * End:
13625 *
37442d52
RGS
13626 * ex: set ts=8 sts=4 sw=4 noet:
13627 */