This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
run named IO destructors later
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
199e78b7
DM
48/* XXX temporary backwards compatibility */
49#define PL_lex_brackets (PL_parser->lex_brackets)
50#define PL_lex_brackstack (PL_parser->lex_brackstack)
51#define PL_lex_casemods (PL_parser->lex_casemods)
52#define PL_lex_casestack (PL_parser->lex_casestack)
53#define PL_lex_defer (PL_parser->lex_defer)
54#define PL_lex_dojoin (PL_parser->lex_dojoin)
55#define PL_lex_expect (PL_parser->lex_expect)
56#define PL_lex_formbrack (PL_parser->lex_formbrack)
57#define PL_lex_inpat (PL_parser->lex_inpat)
58#define PL_lex_inwhat (PL_parser->lex_inwhat)
59#define PL_lex_op (PL_parser->lex_op)
60#define PL_lex_repl (PL_parser->lex_repl)
61#define PL_lex_starts (PL_parser->lex_starts)
62#define PL_lex_stuff (PL_parser->lex_stuff)
63#define PL_multi_start (PL_parser->multi_start)
64#define PL_multi_open (PL_parser->multi_open)
65#define PL_multi_close (PL_parser->multi_close)
66#define PL_pending_ident (PL_parser->pending_ident)
67#define PL_preambled (PL_parser->preambled)
68#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 69#define PL_linestr (PL_parser->linestr)
c2598295
DM
70#define PL_expect (PL_parser->expect)
71#define PL_copline (PL_parser->copline)
f06b5848
DM
72#define PL_bufptr (PL_parser->bufptr)
73#define PL_oldbufptr (PL_parser->oldbufptr)
74#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
75#define PL_linestart (PL_parser->linestart)
76#define PL_bufend (PL_parser->bufend)
77#define PL_last_uni (PL_parser->last_uni)
78#define PL_last_lop (PL_parser->last_lop)
79#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 80#define PL_lex_state (PL_parser->lex_state)
2f9285f8 81#define PL_rsfp (PL_parser->rsfp)
5486870f 82#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
83#define PL_in_my (PL_parser->in_my)
84#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 85#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 86#define PL_multi_end (PL_parser->multi_end)
13765c85 87#define PL_error_count (PL_parser->error_count)
199e78b7
DM
88
89#ifdef PERL_MAD
90# define PL_endwhite (PL_parser->endwhite)
91# define PL_faketokens (PL_parser->faketokens)
92# define PL_lasttoke (PL_parser->lasttoke)
93# define PL_nextwhite (PL_parser->nextwhite)
94# define PL_realtokenstart (PL_parser->realtokenstart)
95# define PL_skipwhite (PL_parser->skipwhite)
96# define PL_thisclose (PL_parser->thisclose)
97# define PL_thismad (PL_parser->thismad)
98# define PL_thisopen (PL_parser->thisopen)
99# define PL_thisstuff (PL_parser->thisstuff)
100# define PL_thistoken (PL_parser->thistoken)
101# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
102# define PL_thiswhite (PL_parser->thiswhite)
103# define PL_nexttoke (PL_parser->nexttoke)
104# define PL_curforce (PL_parser->curforce)
105#else
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_nexttype (PL_parser->nexttype)
108# define PL_nextval (PL_parser->nextval)
199e78b7
DM
109#endif
110
16173588
NC
111/* This can't be done with embed.fnc, because struct yy_parser contains a
112 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
113static int
114S_pending_ident(pTHX);
199e78b7 115
0bd48802 116static const char ident_too_long[] = "Identifier too long";
8903cb82 117
29595ff2 118#ifdef PERL_MAD
29595ff2 119# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 120# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 121#else
5db06880 122# define CURMAD(slot,sv)
9ded7720 123# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
124#endif
125
9059aa12
LW
126#define XFAKEBRACK 128
127#define XENUMMASK 127
128
39e02b42
JH
129#ifdef USE_UTF8_SCRIPTS
130# define UTF (!IN_BYTES)
2b9d42f0 131#else
746b446a 132# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 133#endif
a0ed51b3 134
b1fc3636
CJ
135/* The maximum number of characters preceding the unrecognized one to display */
136#define UNRECOGNIZED_PRECEDE_COUNT 10
137
61f0cdd9 138/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
139 * 1999-02-27 mjd-perl-patch@plover.com */
140#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
141
bf4acbe4 142#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 143
ffb4593c
NT
144/* LEX_* are values for PL_lex_state, the state of the lexer.
145 * They are arranged oddly so that the guard on the switch statement
79072805
LW
146 * can get by with a single comparison (if the compiler is smart enough).
147 */
148
fb73857a 149/* #define LEX_NOTPARSING 11 is done in perl.h. */
150
b6007c36
DM
151#define LEX_NORMAL 10 /* normal code (ie not within "...") */
152#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
153#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
154#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
155#define LEX_INTERPSTART 6 /* expecting the start of a $var */
156
157 /* at end of code, eg "$x" followed by: */
158#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
159#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
160
161#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
162 string or after \E, $foo, etc */
163#define LEX_INTERPCONST 2 /* NOT USED */
164#define LEX_FORMLINE 1 /* expecting a format line */
165#define LEX_KNOWNEXT 0 /* next token known; just return it */
166
79072805 167
bbf60fe6 168#ifdef DEBUGGING
27da23d5 169static const char* const lex_state_names[] = {
bbf60fe6
DM
170 "KNOWNEXT",
171 "FORMLINE",
172 "INTERPCONST",
173 "INTERPCONCAT",
174 "INTERPENDMAYBE",
175 "INTERPEND",
176 "INTERPSTART",
177 "INTERPPUSH",
178 "INTERPCASEMOD",
179 "INTERPNORMAL",
180 "NORMAL"
181};
182#endif
183
79072805
LW
184#ifdef ff_next
185#undef ff_next
d48672a2
LW
186#endif
187
79072805 188#include "keywords.h"
fe14fcc3 189
ffb4593c
NT
190/* CLINE is a macro that ensures PL_copline has a sane value */
191
ae986130
LW
192#ifdef CLINE
193#undef CLINE
194#endif
57843af0 195#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 196
5db06880 197#ifdef PERL_MAD
29595ff2
NC
198# define SKIPSPACE0(s) skipspace0(s)
199# define SKIPSPACE1(s) skipspace1(s)
200# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
201# define PEEKSPACE(s) skipspace2(s,0)
202#else
203# define SKIPSPACE0(s) skipspace(s)
204# define SKIPSPACE1(s) skipspace(s)
205# define SKIPSPACE2(s,tsv) skipspace(s)
206# define PEEKSPACE(s) skipspace(s)
207#endif
208
ffb4593c
NT
209/*
210 * Convenience functions to return different tokens and prime the
9cbb5ea2 211 * lexer for the next token. They all take an argument.
ffb4593c
NT
212 *
213 * TOKEN : generic token (used for '(', DOLSHARP, etc)
214 * OPERATOR : generic operator
215 * AOPERATOR : assignment operator
216 * PREBLOCK : beginning the block after an if, while, foreach, ...
217 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
218 * PREREF : *EXPR where EXPR is not a simple identifier
219 * TERM : expression term
220 * LOOPX : loop exiting command (goto, last, dump, etc)
221 * FTST : file test operator
222 * FUN0 : zero-argument function
2d2e263d 223 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
224 * BOop : bitwise or or xor
225 * BAop : bitwise and
226 * SHop : shift operator
227 * PWop : power operator
9cbb5ea2 228 * PMop : pattern-matching operator
ffb4593c
NT
229 * Aop : addition-level operator
230 * Mop : multiplication-level operator
231 * Eop : equality-testing operator
e5edeb50 232 * Rop : relational operator <= != gt
ffb4593c
NT
233 *
234 * Also see LOP and lop() below.
235 */
236
998054bd 237#ifdef DEBUGGING /* Serve -DT. */
704d4215 238# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 239#else
bbf60fe6 240# define REPORT(retval) (retval)
998054bd
SC
241#endif
242
bbf60fe6
DM
243#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
244#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
245#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
246#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
247#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
248#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
249#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
250#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
251#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
252#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
253#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
254#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
255#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
256#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
257#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
258#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
259#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
260#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
261#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
262#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 263
a687059c
LW
264/* This bit of chicanery makes a unary function followed by
265 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
266 * The UNIDOR macro is for unary functions that can be followed by the //
267 * operator (such as C<shift // 0>).
a687059c 268 */
376fcdbf 269#define UNI2(f,x) { \
6154021b 270 pl_yylval.ival = f; \
376fcdbf
AL
271 PL_expect = x; \
272 PL_bufptr = s; \
273 PL_last_uni = PL_oldbufptr; \
274 PL_last_lop_op = f; \
275 if (*s == '(') \
276 return REPORT( (int)FUNC1 ); \
29595ff2 277 s = PEEKSPACE(s); \
376fcdbf
AL
278 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
279 }
6f33ba73
RGS
280#define UNI(f) UNI2(f,XTERM)
281#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 282
376fcdbf 283#define UNIBRACK(f) { \
6154021b 284 pl_yylval.ival = f; \
376fcdbf
AL
285 PL_bufptr = s; \
286 PL_last_uni = PL_oldbufptr; \
287 if (*s == '(') \
288 return REPORT( (int)FUNC1 ); \
29595ff2 289 s = PEEKSPACE(s); \
376fcdbf
AL
290 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
291 }
79072805 292
9f68db38 293/* grandfather return to old style */
6154021b 294#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 295
8fa7f367
JH
296#ifdef DEBUGGING
297
6154021b 298/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
299enum token_type {
300 TOKENTYPE_NONE,
301 TOKENTYPE_IVAL,
6154021b 302 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
303 TOKENTYPE_PVAL,
304 TOKENTYPE_OPVAL,
305 TOKENTYPE_GVVAL
306};
307
6d4a66ac
NC
308static struct debug_tokens {
309 const int token;
310 enum token_type type;
311 const char *name;
312} const debug_tokens[] =
9041c2e3 313{
bbf60fe6
DM
314 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
315 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
316 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
317 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
318 { ARROW, TOKENTYPE_NONE, "ARROW" },
319 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
320 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
321 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
322 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
323 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 324 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
325 { DO, TOKENTYPE_NONE, "DO" },
326 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
327 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
328 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
329 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
330 { ELSE, TOKENTYPE_NONE, "ELSE" },
331 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
332 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
333 { FOR, TOKENTYPE_IVAL, "FOR" },
334 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
335 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
336 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
337 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
338 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
339 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 340 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
341 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
342 { IF, TOKENTYPE_IVAL, "IF" },
343 { LABEL, TOKENTYPE_PVAL, "LABEL" },
344 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
345 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
346 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
347 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
348 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
349 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
350 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
351 { MY, TOKENTYPE_IVAL, "MY" },
352 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
353 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
354 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
355 { OROP, TOKENTYPE_IVAL, "OROP" },
356 { OROR, TOKENTYPE_NONE, "OROR" },
357 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
358 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
359 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
360 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
361 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
362 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
363 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
364 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
365 { PREINC, TOKENTYPE_NONE, "PREINC" },
366 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
367 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
368 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
369 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
370 { SUB, TOKENTYPE_NONE, "SUB" },
371 { THING, TOKENTYPE_OPVAL, "THING" },
372 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
373 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
374 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
375 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
376 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
377 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 378 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
379 { WHILE, TOKENTYPE_IVAL, "WHILE" },
380 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 381 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 382 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
383};
384
6154021b 385/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 386
bbf60fe6 387STATIC int
704d4215 388S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 389{
97aff369 390 dVAR;
7918f24d
NC
391
392 PERL_ARGS_ASSERT_TOKEREPORT;
393
bbf60fe6 394 if (DEBUG_T_TEST) {
bd61b366 395 const char *name = NULL;
bbf60fe6 396 enum token_type type = TOKENTYPE_NONE;
f54cb97a 397 const struct debug_tokens *p;
396482e1 398 SV* const report = newSVpvs("<== ");
bbf60fe6 399
f54cb97a 400 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
401 if (p->token == (int)rv) {
402 name = p->name;
403 type = p->type;
404 break;
405 }
406 }
407 if (name)
54667de8 408 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
409 else if ((char)rv > ' ' && (char)rv < '~')
410 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
411 else if (!rv)
396482e1 412 sv_catpvs(report, "EOF");
bbf60fe6
DM
413 else
414 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
415 switch (type) {
416 case TOKENTYPE_NONE:
417 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
418 break;
419 case TOKENTYPE_IVAL:
704d4215 420 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
421 break;
422 case TOKENTYPE_OPNUM:
423 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 424 PL_op_name[lvalp->ival]);
bbf60fe6
DM
425 break;
426 case TOKENTYPE_PVAL:
704d4215 427 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_OPVAL:
704d4215 430 if (lvalp->opval) {
401441c0 431 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
432 PL_op_name[lvalp->opval->op_type]);
433 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 434 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 435 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
436 }
437
438 }
401441c0 439 else
396482e1 440 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
441 break;
442 }
b6007c36 443 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
444 };
445 return (int)rv;
998054bd
SC
446}
447
b6007c36
DM
448
449/* print the buffer with suitable escapes */
450
451STATIC void
15f169a1 452S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 453{
396482e1 454 SV* const tmp = newSVpvs("");
7918f24d
NC
455
456 PERL_ARGS_ASSERT_PRINTBUF;
457
b6007c36
DM
458 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
459 SvREFCNT_dec(tmp);
460}
461
8fa7f367
JH
462#endif
463
8290c323
NC
464static int
465S_deprecate_commaless_var_list(pTHX) {
466 PL_expect = XTERM;
467 deprecate("comma-less variable list");
468 return REPORT(','); /* grandfather non-comma-format format */
469}
470
ffb4593c
NT
471/*
472 * S_ao
473 *
c963b151
BD
474 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
475 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
476 */
477
76e3520e 478STATIC int
cea2e8a9 479S_ao(pTHX_ int toketype)
a0d0e21e 480{
97aff369 481 dVAR;
3280af22
NIS
482 if (*PL_bufptr == '=') {
483 PL_bufptr++;
a0d0e21e 484 if (toketype == ANDAND)
6154021b 485 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 486 else if (toketype == OROR)
6154021b 487 pl_yylval.ival = OP_ORASSIGN;
c963b151 488 else if (toketype == DORDOR)
6154021b 489 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
490 toketype = ASSIGNOP;
491 }
492 return toketype;
493}
494
ffb4593c
NT
495/*
496 * S_no_op
497 * When Perl expects an operator and finds something else, no_op
498 * prints the warning. It always prints "<something> found where
499 * operator expected. It prints "Missing semicolon on previous line?"
500 * if the surprise occurs at the start of the line. "do you need to
501 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
502 * where the compiler doesn't know if foo is a method call or a function.
503 * It prints "Missing operator before end of line" if there's nothing
504 * after the missing operator, or "... before <...>" if there is something
505 * after the missing operator.
506 */
507
76e3520e 508STATIC void
15f169a1 509S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 510{
97aff369 511 dVAR;
9d4ba2ae
AL
512 char * const oldbp = PL_bufptr;
513 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 514
7918f24d
NC
515 PERL_ARGS_ASSERT_NO_OP;
516
1189a94a
GS
517 if (!s)
518 s = oldbp;
07c798fb 519 else
1189a94a 520 PL_bufptr = s;
cea2e8a9 521 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
522 if (ckWARN_d(WARN_SYNTAX)) {
523 if (is_first)
524 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
525 "\t(Missing semicolon on previous line?)\n");
526 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 527 const char *t;
c35e046a
AL
528 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
529 NOOP;
56da5a46
RGS
530 if (t < PL_bufptr && isSPACE(*t))
531 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
532 "\t(Do you need to predeclare %.*s?)\n",
551405c4 533 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
534 }
535 else {
536 assert(s >= oldbp);
537 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 538 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 539 }
07c798fb 540 }
3280af22 541 PL_bufptr = oldbp;
8990e307
LW
542}
543
ffb4593c
NT
544/*
545 * S_missingterm
546 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 547 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
548 * If we're in a delimited string and the delimiter is a control
549 * character, it's reformatted into a two-char sequence like ^C.
550 * This is fatal.
551 */
552
76e3520e 553STATIC void
cea2e8a9 554S_missingterm(pTHX_ char *s)
8990e307 555{
97aff369 556 dVAR;
8990e307
LW
557 char tmpbuf[3];
558 char q;
559 if (s) {
9d4ba2ae 560 char * const nl = strrchr(s,'\n');
d2719217 561 if (nl)
8990e307
LW
562 *nl = '\0';
563 }
463559e7 564 else if (isCNTRL(PL_multi_close)) {
8990e307 565 *tmpbuf = '^';
585ec06d 566 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
567 tmpbuf[2] = '\0';
568 s = tmpbuf;
569 }
570 else {
eb160463 571 *tmpbuf = (char)PL_multi_close;
8990e307
LW
572 tmpbuf[1] = '\0';
573 s = tmpbuf;
574 }
575 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 576 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 577}
79072805 578
ef89dcc3 579#define FEATURE_IS_ENABLED(name) \
0d863452 580 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 581 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 582/* The longest string we pass in. */
1863b879 583#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 584
0d863452
RH
585/*
586 * S_feature_is_enabled
587 * Check whether the named feature is enabled.
588 */
589STATIC bool
15f169a1 590S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 591{
97aff369 592 dVAR;
0d863452 593 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 594 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
595
596 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
597
4a731d7b
NC
598 assert(namelen <= MAX_FEATURE_LEN);
599 memcpy(&he_name[8], name, namelen);
d4c19fe8 600
7b9ef140 601 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
602}
603
ffb4593c 604/*
9cbb5ea2
GS
605 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
606 * utf16-to-utf8-reversed.
ffb4593c
NT
607 */
608
c39cd008
GS
609#ifdef PERL_CR_FILTER
610static void
611strip_return(SV *sv)
612{
95a20fc0 613 register const char *s = SvPVX_const(sv);
9d4ba2ae 614 register const char * const e = s + SvCUR(sv);
7918f24d
NC
615
616 PERL_ARGS_ASSERT_STRIP_RETURN;
617
c39cd008
GS
618 /* outer loop optimized to do nothing if there are no CR-LFs */
619 while (s < e) {
620 if (*s++ == '\r' && *s == '\n') {
621 /* hit a CR-LF, need to copy the rest */
622 register char *d = s - 1;
623 *d++ = *s++;
624 while (s < e) {
625 if (*s == '\r' && s[1] == '\n')
626 s++;
627 *d++ = *s++;
628 }
629 SvCUR(sv) -= s - d;
630 return;
631 }
632 }
633}
a868473f 634
76e3520e 635STATIC I32
c39cd008 636S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 637{
f54cb97a 638 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
639 if (count > 0 && !maxlen)
640 strip_return(sv);
641 return count;
a868473f
NIS
642}
643#endif
644
199e78b7
DM
645
646
ffb4593c
NT
647/*
648 * Perl_lex_start
5486870f 649 *
e3abe207 650 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
651 *
652 * rsfp is the opened file handle to read from (if any),
653 *
654 * line holds any initial content already read from the file (or in
655 * the case of no file, such as an eval, the whole contents);
656 *
657 * new_filter indicates that this is a new file and it shouldn't inherit
658 * the filters from the current parser (ie require).
ffb4593c
NT
659 */
660
a0d0e21e 661void
5486870f 662Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 663{
97aff369 664 dVAR;
6ef55633 665 const char *s = NULL;
8990e307 666 STRLEN len;
5486870f 667 yy_parser *parser, *oparser;
acdf0a21
DM
668
669 /* create and initialise a parser */
670
199e78b7 671 Newxz(parser, 1, yy_parser);
5486870f 672 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
673 PL_parser = parser;
674
28ac2b49
Z
675 parser->stack = NULL;
676 parser->ps = NULL;
677 parser->stack_size = 0;
acdf0a21 678
e3abe207
DM
679 /* on scope exit, free this parser and restore any outer one */
680 SAVEPARSER(parser);
7c4baf47 681 parser->saved_curcop = PL_curcop;
e3abe207 682
acdf0a21 683 /* initialise lexer state */
8990e307 684
fb205e7a
DM
685#ifdef PERL_MAD
686 parser->curforce = -1;
687#else
688 parser->nexttoke = 0;
689#endif
ca4cfd28 690 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 691 parser->copline = NOLINE;
5afb0a62 692 parser->lex_state = LEX_NORMAL;
c2598295 693 parser->expect = XSTATE;
2f9285f8 694 parser->rsfp = rsfp;
56b27c9a 695 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 696 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 697
199e78b7
DM
698 Newx(parser->lex_brackstack, 120, char);
699 Newx(parser->lex_casestack, 12, char);
700 *parser->lex_casestack = '\0';
02b34bbe 701
10efb74f
NC
702 if (line) {
703 s = SvPV_const(line, len);
704 } else {
705 len = 0;
706 }
bdc0bf6f 707
10efb74f 708 if (!len) {
bdc0bf6f 709 parser->linestr = newSVpvs("\n;");
3e5c0189 710 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
719a9bb0
NC
711 /* avoid tie/overload weirdness */
712 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 713 if (s[len-1] != ';')
bdc0bf6f 714 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
715 } else {
716 SvTEMP_off(line);
717 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 718 parser->linestr = line;
8990e307 719 }
f06b5848
DM
720 parser->oldoldbufptr =
721 parser->oldbufptr =
722 parser->bufptr =
723 parser->linestart = SvPVX(parser->linestr);
724 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
725 parser->last_lop = parser->last_uni = NULL;
79072805 726}
a687059c 727
e3abe207
DM
728
729/* delete a parser object */
730
731void
732Perl_parser_free(pTHX_ const yy_parser *parser)
733{
7918f24d
NC
734 PERL_ARGS_ASSERT_PARSER_FREE;
735
7c4baf47 736 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
737 SvREFCNT_dec(parser->linestr);
738
2f9285f8
DM
739 if (parser->rsfp == PerlIO_stdin())
740 PerlIO_clearerr(parser->rsfp);
799361c3
SH
741 else if (parser->rsfp && (!parser->old_parser ||
742 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 743 PerlIO_close(parser->rsfp);
5486870f 744 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 745
e3abe207
DM
746 Safefree(parser->lex_brackstack);
747 Safefree(parser->lex_casestack);
748 PL_parser = parser->old_parser;
749 Safefree(parser);
750}
751
752
ffb4593c
NT
753/*
754 * Perl_lex_end
9cbb5ea2
GS
755 * Finalizer for lexing operations. Must be called when the parser is
756 * done with the lexer.
ffb4593c
NT
757 */
758
463ee0b2 759void
864dbfa3 760Perl_lex_end(pTHX)
463ee0b2 761{
97aff369 762 dVAR;
3280af22 763 PL_doextract = FALSE;
463ee0b2
LW
764}
765
ffb4593c 766/*
f0e67a1d
Z
767=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
768
769Buffer scalar containing the chunk currently under consideration of the
770text currently being lexed. This is always a plain string scalar (for
771which C<SvPOK> is true). It is not intended to be used as a scalar by
772normal scalar means; instead refer to the buffer directly by the pointer
773variables described below.
774
775The lexer maintains various C<char*> pointers to things in the
776C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
777reallocated, all of these pointers must be updated. Don't attempt to
778do this manually, but rather use L</lex_grow_linestr> if you need to
779reallocate the buffer.
780
781The content of the text chunk in the buffer is commonly exactly one
782complete line of input, up to and including a newline terminator,
783but there are situations where it is otherwise. The octets of the
784buffer may be intended to be interpreted as either UTF-8 or Latin-1.
785The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
786flag on this scalar, which may disagree with it.
787
788For direct examination of the buffer, the variable
789L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
790lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
791of these pointers is usually preferable to examination of the scalar
792through normal scalar means.
793
794=for apidoc AmxU|char *|PL_parser-E<gt>bufend
795
796Direct pointer to the end of the chunk of text currently being lexed, the
797end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
798+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
799always located at the end of the buffer, and does not count as part of
800the buffer's contents.
801
802=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
803
804Points to the current position of lexing inside the lexer buffer.
805Characters around this point may be freely examined, within
806the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
807L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
808interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
809
810Lexing code (whether in the Perl core or not) moves this pointer past
811the characters that it consumes. It is also expected to perform some
812bookkeeping whenever a newline character is consumed. This movement
813can be more conveniently performed by the function L</lex_read_to>,
814which handles newlines appropriately.
815
816Interpretation of the buffer's octets can be abstracted out by
817using the slightly higher-level functions L</lex_peek_unichar> and
818L</lex_read_unichar>.
819
820=for apidoc AmxU|char *|PL_parser-E<gt>linestart
821
822Points to the start of the current line inside the lexer buffer.
823This is useful for indicating at which column an error occurred, and
824not much else. This must be updated by any lexing code that consumes
825a newline; the function L</lex_read_to> handles this detail.
826
827=cut
828*/
829
830/*
831=for apidoc Amx|bool|lex_bufutf8
832
833Indicates whether the octets in the lexer buffer
834(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
835of Unicode characters. If not, they should be interpreted as Latin-1
836characters. This is analogous to the C<SvUTF8> flag for scalars.
837
838In UTF-8 mode, it is not guaranteed that the lexer buffer actually
839contains valid UTF-8. Lexing code must be robust in the face of invalid
840encoding.
841
842The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
843is significant, but not the whole story regarding the input character
844encoding. Normally, when a file is being read, the scalar contains octets
845and its C<SvUTF8> flag is off, but the octets should be interpreted as
846UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
847however, the scalar may have the C<SvUTF8> flag on, and in this case its
848octets should be interpreted as UTF-8 unless the C<use bytes> pragma
849is in effect. This logic may change in the future; use this function
850instead of implementing the logic yourself.
851
852=cut
853*/
854
855bool
856Perl_lex_bufutf8(pTHX)
857{
858 return UTF;
859}
860
861/*
862=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
863
864Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
865at least I<len> octets (including terminating NUL). Returns a
866pointer to the reallocated buffer. This is necessary before making
867any direct modification of the buffer that would increase its length.
868L</lex_stuff_pvn> provides a more convenient way to insert text into
869the buffer.
870
871Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
872this function updates all of the lexer's variables that point directly
873into the buffer.
874
875=cut
876*/
877
878char *
879Perl_lex_grow_linestr(pTHX_ STRLEN len)
880{
881 SV *linestr;
882 char *buf;
883 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
884 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
885 linestr = PL_parser->linestr;
886 buf = SvPVX(linestr);
887 if (len <= SvLEN(linestr))
888 return buf;
889 bufend_pos = PL_parser->bufend - buf;
890 bufptr_pos = PL_parser->bufptr - buf;
891 oldbufptr_pos = PL_parser->oldbufptr - buf;
892 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
893 linestart_pos = PL_parser->linestart - buf;
894 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
895 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
896 buf = sv_grow(linestr, len);
897 PL_parser->bufend = buf + bufend_pos;
898 PL_parser->bufptr = buf + bufptr_pos;
899 PL_parser->oldbufptr = buf + oldbufptr_pos;
900 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
901 PL_parser->linestart = buf + linestart_pos;
902 if (PL_parser->last_uni)
903 PL_parser->last_uni = buf + last_uni_pos;
904 if (PL_parser->last_lop)
905 PL_parser->last_lop = buf + last_lop_pos;
906 return buf;
907}
908
909/*
83aa740e 910=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
911
912Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
913immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
914reallocating the buffer if necessary. This means that lexing code that
915runs later will see the characters as if they had appeared in the input.
916It is not recommended to do this as part of normal parsing, and most
917uses of this facility run the risk of the inserted characters being
918interpreted in an unintended manner.
919
920The string to be inserted is represented by I<len> octets starting
921at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
922according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
923The characters are recoded for the lexer buffer, according to how the
924buffer is currently being interpreted (L</lex_bufutf8>). If a string
925to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
926function is more convenient.
927
928=cut
929*/
930
931void
83aa740e 932Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 933{
749123ff 934 dVAR;
f0e67a1d
Z
935 char *bufptr;
936 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
937 if (flags & ~(LEX_STUFF_UTF8))
938 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
939 if (UTF) {
940 if (flags & LEX_STUFF_UTF8) {
941 goto plain_copy;
942 } else {
943 STRLEN highhalf = 0;
83aa740e 944 const char *p, *e = pv+len;
f0e67a1d
Z
945 for (p = pv; p != e; p++)
946 highhalf += !!(((U8)*p) & 0x80);
947 if (!highhalf)
948 goto plain_copy;
949 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
950 bufptr = PL_parser->bufptr;
951 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
952 SvCUR_set(PL_parser->linestr,
953 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
954 PL_parser->bufend += len+highhalf;
955 for (p = pv; p != e; p++) {
956 U8 c = (U8)*p;
957 if (c & 0x80) {
958 *bufptr++ = (char)(0xc0 | (c >> 6));
959 *bufptr++ = (char)(0x80 | (c & 0x3f));
960 } else {
961 *bufptr++ = (char)c;
962 }
963 }
964 }
965 } else {
966 if (flags & LEX_STUFF_UTF8) {
967 STRLEN highhalf = 0;
83aa740e 968 const char *p, *e = pv+len;
f0e67a1d
Z
969 for (p = pv; p != e; p++) {
970 U8 c = (U8)*p;
971 if (c >= 0xc4) {
972 Perl_croak(aTHX_ "Lexing code attempted to stuff "
973 "non-Latin-1 character into Latin-1 input");
974 } else if (c >= 0xc2 && p+1 != e &&
975 (((U8)p[1]) & 0xc0) == 0x80) {
976 p++;
977 highhalf++;
978 } else if (c >= 0x80) {
979 /* malformed UTF-8 */
980 ENTER;
981 SAVESPTR(PL_warnhook);
982 PL_warnhook = PERL_WARNHOOK_FATAL;
983 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
984 LEAVE;
985 }
986 }
987 if (!highhalf)
988 goto plain_copy;
989 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
990 bufptr = PL_parser->bufptr;
991 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
992 SvCUR_set(PL_parser->linestr,
993 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
994 PL_parser->bufend += len-highhalf;
995 for (p = pv; p != e; p++) {
996 U8 c = (U8)*p;
997 if (c & 0x80) {
998 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
999 p++;
1000 } else {
1001 *bufptr++ = (char)c;
1002 }
1003 }
1004 } else {
1005 plain_copy:
1006 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1007 bufptr = PL_parser->bufptr;
1008 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1009 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1010 PL_parser->bufend += len;
1011 Copy(pv, bufptr, len, char);
1012 }
1013 }
1014}
1015
1016/*
1017=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1018
1019Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1020immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1021reallocating the buffer if necessary. This means that lexing code that
1022runs later will see the characters as if they had appeared in the input.
1023It is not recommended to do this as part of normal parsing, and most
1024uses of this facility run the risk of the inserted characters being
1025interpreted in an unintended manner.
1026
1027The string to be inserted is the string value of I<sv>. The characters
1028are recoded for the lexer buffer, according to how the buffer is currently
1029being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1030not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1031need to construct a scalar.
1032
1033=cut
1034*/
1035
1036void
1037Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1038{
1039 char *pv;
1040 STRLEN len;
1041 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1042 if (flags)
1043 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1044 pv = SvPV(sv, len);
1045 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1046}
1047
1048/*
1049=for apidoc Amx|void|lex_unstuff|char *ptr
1050
1051Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1052I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1053This hides the discarded text from any lexing code that runs later,
1054as if the text had never appeared.
1055
1056This is not the normal way to consume lexed text. For that, use
1057L</lex_read_to>.
1058
1059=cut
1060*/
1061
1062void
1063Perl_lex_unstuff(pTHX_ char *ptr)
1064{
1065 char *buf, *bufend;
1066 STRLEN unstuff_len;
1067 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1068 buf = PL_parser->bufptr;
1069 if (ptr < buf)
1070 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1071 if (ptr == buf)
1072 return;
1073 bufend = PL_parser->bufend;
1074 if (ptr > bufend)
1075 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1076 unstuff_len = ptr - buf;
1077 Move(ptr, buf, bufend+1-ptr, char);
1078 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1079 PL_parser->bufend = bufend - unstuff_len;
1080}
1081
1082/*
1083=for apidoc Amx|void|lex_read_to|char *ptr
1084
1085Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1086to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1087performing the correct bookkeeping whenever a newline character is passed.
1088This is the normal way to consume lexed text.
1089
1090Interpretation of the buffer's octets can be abstracted out by
1091using the slightly higher-level functions L</lex_peek_unichar> and
1092L</lex_read_unichar>.
1093
1094=cut
1095*/
1096
1097void
1098Perl_lex_read_to(pTHX_ char *ptr)
1099{
1100 char *s;
1101 PERL_ARGS_ASSERT_LEX_READ_TO;
1102 s = PL_parser->bufptr;
1103 if (ptr < s || ptr > PL_parser->bufend)
1104 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1105 for (; s != ptr; s++)
1106 if (*s == '\n') {
1107 CopLINE_inc(PL_curcop);
1108 PL_parser->linestart = s+1;
1109 }
1110 PL_parser->bufptr = ptr;
1111}
1112
1113/*
1114=for apidoc Amx|void|lex_discard_to|char *ptr
1115
1116Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1117up to I<ptr>. The remaining content of the buffer will be moved, and
1118all pointers into the buffer updated appropriately. I<ptr> must not
1119be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1120it is not permitted to discard text that has yet to be lexed.
1121
1122Normally it is not necessarily to do this directly, because it suffices to
1123use the implicit discarding behaviour of L</lex_next_chunk> and things
1124based on it. However, if a token stretches across multiple lines,
1f317c95 1125and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1126that purpose, then after completion of the token it would be wise to
1127explicitly discard the now-unneeded earlier lines, to avoid future
1128multi-line tokens growing the buffer without bound.
1129
1130=cut
1131*/
1132
1133void
1134Perl_lex_discard_to(pTHX_ char *ptr)
1135{
1136 char *buf;
1137 STRLEN discard_len;
1138 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1139 buf = SvPVX(PL_parser->linestr);
1140 if (ptr < buf)
1141 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1142 if (ptr == buf)
1143 return;
1144 if (ptr > PL_parser->bufptr)
1145 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1146 discard_len = ptr - buf;
1147 if (PL_parser->oldbufptr < ptr)
1148 PL_parser->oldbufptr = ptr;
1149 if (PL_parser->oldoldbufptr < ptr)
1150 PL_parser->oldoldbufptr = ptr;
1151 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1152 PL_parser->last_uni = NULL;
1153 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1154 PL_parser->last_lop = NULL;
1155 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1156 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1157 PL_parser->bufend -= discard_len;
1158 PL_parser->bufptr -= discard_len;
1159 PL_parser->oldbufptr -= discard_len;
1160 PL_parser->oldoldbufptr -= discard_len;
1161 if (PL_parser->last_uni)
1162 PL_parser->last_uni -= discard_len;
1163 if (PL_parser->last_lop)
1164 PL_parser->last_lop -= discard_len;
1165}
1166
1167/*
1168=for apidoc Amx|bool|lex_next_chunk|U32 flags
1169
1170Reads in the next chunk of text to be lexed, appending it to
1171L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1172looked to the end of the current chunk and wants to know more. It is
1173usual, but not necessary, for lexing to have consumed the entirety of
1174the current chunk at this time.
1175
1176If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1177chunk (i.e., the current chunk has been entirely consumed), normally the
1178current chunk will be discarded at the same time that the new chunk is
1179read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1180will not be discarded. If the current chunk has not been entirely
1181consumed, then it will not be discarded regardless of the flag.
1182
1183Returns true if some new text was added to the buffer, or false if the
1184buffer has reached the end of the input text.
1185
1186=cut
1187*/
1188
1189#define LEX_FAKE_EOF 0x80000000
1190
1191bool
1192Perl_lex_next_chunk(pTHX_ U32 flags)
1193{
1194 SV *linestr;
1195 char *buf;
1196 STRLEN old_bufend_pos, new_bufend_pos;
1197 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1198 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1199 bool got_some_for_debugger = 0;
f0e67a1d
Z
1200 bool got_some;
1201 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1202 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1203 linestr = PL_parser->linestr;
1204 buf = SvPVX(linestr);
1205 if (!(flags & LEX_KEEP_PREVIOUS) &&
1206 PL_parser->bufptr == PL_parser->bufend) {
1207 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1208 linestart_pos = 0;
1209 if (PL_parser->last_uni != PL_parser->bufend)
1210 PL_parser->last_uni = NULL;
1211 if (PL_parser->last_lop != PL_parser->bufend)
1212 PL_parser->last_lop = NULL;
1213 last_uni_pos = last_lop_pos = 0;
1214 *buf = 0;
1215 SvCUR(linestr) = 0;
1216 } else {
1217 old_bufend_pos = PL_parser->bufend - buf;
1218 bufptr_pos = PL_parser->bufptr - buf;
1219 oldbufptr_pos = PL_parser->oldbufptr - buf;
1220 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1221 linestart_pos = PL_parser->linestart - buf;
1222 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1223 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1224 }
1225 if (flags & LEX_FAKE_EOF) {
1226 goto eof;
1227 } else if (!PL_parser->rsfp) {
1228 got_some = 0;
1229 } else if (filter_gets(linestr, old_bufend_pos)) {
1230 got_some = 1;
17cc9359 1231 got_some_for_debugger = 1;
f0e67a1d 1232 } else {
580561a3
Z
1233 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1234 sv_setpvs(linestr, "");
f0e67a1d
Z
1235 eof:
1236 /* End of real input. Close filehandle (unless it was STDIN),
1237 * then add implicit termination.
1238 */
1239 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1240 PerlIO_clearerr(PL_parser->rsfp);
1241 else if (PL_parser->rsfp)
1242 (void)PerlIO_close(PL_parser->rsfp);
1243 PL_parser->rsfp = NULL;
1244 PL_doextract = FALSE;
1245#ifdef PERL_MAD
1246 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1247 PL_faketokens = 1;
1248#endif
1249 if (!PL_in_eval && PL_minus_p) {
1250 sv_catpvs(linestr,
1251 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1252 PL_minus_n = PL_minus_p = 0;
1253 } else if (!PL_in_eval && PL_minus_n) {
1254 sv_catpvs(linestr, /*{*/";}");
1255 PL_minus_n = 0;
1256 } else
1257 sv_catpvs(linestr, ";");
1258 got_some = 1;
1259 }
1260 buf = SvPVX(linestr);
1261 new_bufend_pos = SvCUR(linestr);
1262 PL_parser->bufend = buf + new_bufend_pos;
1263 PL_parser->bufptr = buf + bufptr_pos;
1264 PL_parser->oldbufptr = buf + oldbufptr_pos;
1265 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1266 PL_parser->linestart = buf + linestart_pos;
1267 if (PL_parser->last_uni)
1268 PL_parser->last_uni = buf + last_uni_pos;
1269 if (PL_parser->last_lop)
1270 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1271 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1272 PL_curstash != PL_debstash) {
1273 /* debugger active and we're not compiling the debugger code,
1274 * so store the line into the debugger's array of lines
1275 */
1276 update_debugger_info(NULL, buf+old_bufend_pos,
1277 new_bufend_pos-old_bufend_pos);
1278 }
1279 return got_some;
1280}
1281
1282/*
1283=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1284
1285Looks ahead one (Unicode) character in the text currently being lexed.
1286Returns the codepoint (unsigned integer value) of the next character,
1287or -1 if lexing has reached the end of the input text. To consume the
1288peeked character, use L</lex_read_unichar>.
1289
1290If the next character is in (or extends into) the next chunk of input
1291text, the next chunk will be read in. Normally the current chunk will be
1292discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1293then the current chunk will not be discarded.
1294
1295If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1296is encountered, an exception is generated.
1297
1298=cut
1299*/
1300
1301I32
1302Perl_lex_peek_unichar(pTHX_ U32 flags)
1303{
749123ff 1304 dVAR;
f0e67a1d
Z
1305 char *s, *bufend;
1306 if (flags & ~(LEX_KEEP_PREVIOUS))
1307 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1308 s = PL_parser->bufptr;
1309 bufend = PL_parser->bufend;
1310 if (UTF) {
1311 U8 head;
1312 I32 unichar;
1313 STRLEN len, retlen;
1314 if (s == bufend) {
1315 if (!lex_next_chunk(flags))
1316 return -1;
1317 s = PL_parser->bufptr;
1318 bufend = PL_parser->bufend;
1319 }
1320 head = (U8)*s;
1321 if (!(head & 0x80))
1322 return head;
1323 if (head & 0x40) {
1324 len = PL_utf8skip[head];
1325 while ((STRLEN)(bufend-s) < len) {
1326 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1327 break;
1328 s = PL_parser->bufptr;
1329 bufend = PL_parser->bufend;
1330 }
1331 }
1332 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1333 if (retlen == (STRLEN)-1) {
1334 /* malformed UTF-8 */
1335 ENTER;
1336 SAVESPTR(PL_warnhook);
1337 PL_warnhook = PERL_WARNHOOK_FATAL;
1338 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1339 LEAVE;
1340 }
1341 return unichar;
1342 } else {
1343 if (s == bufend) {
1344 if (!lex_next_chunk(flags))
1345 return -1;
1346 s = PL_parser->bufptr;
1347 }
1348 return (U8)*s;
1349 }
1350}
1351
1352/*
1353=for apidoc Amx|I32|lex_read_unichar|U32 flags
1354
1355Reads the next (Unicode) character in the text currently being lexed.
1356Returns the codepoint (unsigned integer value) of the character read,
1357and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1358if lexing has reached the end of the input text. To non-destructively
1359examine the next character, use L</lex_peek_unichar> instead.
1360
1361If the next character is in (or extends into) the next chunk of input
1362text, the next chunk will be read in. Normally the current chunk will be
1363discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1364then the current chunk will not be discarded.
1365
1366If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1367is encountered, an exception is generated.
1368
1369=cut
1370*/
1371
1372I32
1373Perl_lex_read_unichar(pTHX_ U32 flags)
1374{
1375 I32 c;
1376 if (flags & ~(LEX_KEEP_PREVIOUS))
1377 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1378 c = lex_peek_unichar(flags);
1379 if (c != -1) {
1380 if (c == '\n')
1381 CopLINE_inc(PL_curcop);
1382 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1383 }
1384 return c;
1385}
1386
1387/*
1388=for apidoc Amx|void|lex_read_space|U32 flags
1389
1390Reads optional spaces, in Perl style, in the text currently being
1391lexed. The spaces may include ordinary whitespace characters and
1392Perl-style comments. C<#line> directives are processed if encountered.
1393L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1394at a non-space character (or the end of the input text).
1395
1396If spaces extend into the next chunk of input text, the next chunk will
1397be read in. Normally the current chunk will be discarded at the same
1398time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1399chunk will not be discarded.
1400
1401=cut
1402*/
1403
f0998909
Z
1404#define LEX_NO_NEXT_CHUNK 0x80000000
1405
f0e67a1d
Z
1406void
1407Perl_lex_read_space(pTHX_ U32 flags)
1408{
1409 char *s, *bufend;
1410 bool need_incline = 0;
f0998909 1411 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1412 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1413#ifdef PERL_MAD
1414 if (PL_skipwhite) {
1415 sv_free(PL_skipwhite);
1416 PL_skipwhite = NULL;
1417 }
1418 if (PL_madskills)
1419 PL_skipwhite = newSVpvs("");
1420#endif /* PERL_MAD */
1421 s = PL_parser->bufptr;
1422 bufend = PL_parser->bufend;
1423 while (1) {
1424 char c = *s;
1425 if (c == '#') {
1426 do {
1427 c = *++s;
1428 } while (!(c == '\n' || (c == 0 && s == bufend)));
1429 } else if (c == '\n') {
1430 s++;
1431 PL_parser->linestart = s;
1432 if (s == bufend)
1433 need_incline = 1;
1434 else
1435 incline(s);
1436 } else if (isSPACE(c)) {
1437 s++;
1438 } else if (c == 0 && s == bufend) {
1439 bool got_more;
1440#ifdef PERL_MAD
1441 if (PL_madskills)
1442 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1443#endif /* PERL_MAD */
f0998909
Z
1444 if (flags & LEX_NO_NEXT_CHUNK)
1445 break;
f0e67a1d
Z
1446 PL_parser->bufptr = s;
1447 CopLINE_inc(PL_curcop);
1448 got_more = lex_next_chunk(flags);
1449 CopLINE_dec(PL_curcop);
1450 s = PL_parser->bufptr;
1451 bufend = PL_parser->bufend;
1452 if (!got_more)
1453 break;
1454 if (need_incline && PL_parser->rsfp) {
1455 incline(s);
1456 need_incline = 0;
1457 }
1458 } else {
1459 break;
1460 }
1461 }
1462#ifdef PERL_MAD
1463 if (PL_madskills)
1464 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1465#endif /* PERL_MAD */
1466 PL_parser->bufptr = s;
1467}
1468
1469/*
ffb4593c
NT
1470 * S_incline
1471 * This subroutine has nothing to do with tilting, whether at windmills
1472 * or pinball tables. Its name is short for "increment line". It
57843af0 1473 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1474 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1475 * # line 500 "foo.pm"
1476 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1477 */
1478
76e3520e 1479STATIC void
d9095cec 1480S_incline(pTHX_ const char *s)
463ee0b2 1481{
97aff369 1482 dVAR;
d9095cec
NC
1483 const char *t;
1484 const char *n;
1485 const char *e;
463ee0b2 1486
7918f24d
NC
1487 PERL_ARGS_ASSERT_INCLINE;
1488
57843af0 1489 CopLINE_inc(PL_curcop);
463ee0b2
LW
1490 if (*s++ != '#')
1491 return;
d4c19fe8
AL
1492 while (SPACE_OR_TAB(*s))
1493 s++;
73659bf1
GS
1494 if (strnEQ(s, "line", 4))
1495 s += 4;
1496 else
1497 return;
084592ab 1498 if (SPACE_OR_TAB(*s))
73659bf1 1499 s++;
4e553d73 1500 else
73659bf1 1501 return;
d4c19fe8
AL
1502 while (SPACE_OR_TAB(*s))
1503 s++;
463ee0b2
LW
1504 if (!isDIGIT(*s))
1505 return;
d4c19fe8 1506
463ee0b2
LW
1507 n = s;
1508 while (isDIGIT(*s))
1509 s++;
07714eb4 1510 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1511 return;
bf4acbe4 1512 while (SPACE_OR_TAB(*s))
463ee0b2 1513 s++;
73659bf1 1514 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1515 s++;
73659bf1
GS
1516 e = t + 1;
1517 }
463ee0b2 1518 else {
c35e046a
AL
1519 t = s;
1520 while (!isSPACE(*t))
1521 t++;
73659bf1 1522 e = t;
463ee0b2 1523 }
bf4acbe4 1524 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1525 e++;
1526 if (*e != '\n' && *e != '\0')
1527 return; /* false alarm */
1528
f4dd75d9 1529 if (t - s > 0) {
d9095cec 1530 const STRLEN len = t - s;
8a5ee598 1531#ifndef USE_ITHREADS
19bad673
NC
1532 SV *const temp_sv = CopFILESV(PL_curcop);
1533 const char *cf;
1534 STRLEN tmplen;
1535
1536 if (temp_sv) {
1537 cf = SvPVX(temp_sv);
1538 tmplen = SvCUR(temp_sv);
1539 } else {
1540 cf = NULL;
1541 tmplen = 0;
1542 }
1543
42d9b98d 1544 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1545 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1546 * to *{"::_<newfilename"} */
44867030
NC
1547 /* However, the long form of evals is only turned on by the
1548 debugger - usually they're "(eval %lu)" */
1549 char smallbuf[128];
1550 char *tmpbuf;
1551 GV **gvp;
d9095cec 1552 STRLEN tmplen2 = len;
798b63bc 1553 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1554 tmpbuf = smallbuf;
1555 else
2ae0db35 1556 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1557 tmpbuf[0] = '_';
1558 tmpbuf[1] = '<';
2ae0db35 1559 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1560 tmplen += 2;
8a5ee598
RGS
1561 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1562 if (gvp) {
44867030
NC
1563 char *tmpbuf2;
1564 GV *gv2;
1565
1566 if (tmplen2 + 2 <= sizeof smallbuf)
1567 tmpbuf2 = smallbuf;
1568 else
1569 Newx(tmpbuf2, tmplen2 + 2, char);
1570
1571 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1572 /* Either they malloc'd it, or we malloc'd it,
1573 so no prefix is present in ours. */
1574 tmpbuf2[0] = '_';
1575 tmpbuf2[1] = '<';
1576 }
1577
1578 memcpy(tmpbuf2 + 2, s, tmplen2);
1579 tmplen2 += 2;
1580
8a5ee598 1581 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1582 if (!isGV(gv2)) {
8a5ee598 1583 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1584 /* adjust ${"::_<newfilename"} to store the new file name */
1585 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1586 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1587 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1588 }
44867030
NC
1589
1590 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1591 }
e66cf94c 1592 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1593 }
8a5ee598 1594#endif
05ec9bb3 1595 CopFILE_free(PL_curcop);
d9095cec 1596 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1597 }
57843af0 1598 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1599}
1600
29595ff2 1601#ifdef PERL_MAD
cd81e915 1602/* skip space before PL_thistoken */
29595ff2
NC
1603
1604STATIC char *
1605S_skipspace0(pTHX_ register char *s)
1606{
7918f24d
NC
1607 PERL_ARGS_ASSERT_SKIPSPACE0;
1608
29595ff2
NC
1609 s = skipspace(s);
1610 if (!PL_madskills)
1611 return s;
cd81e915
NC
1612 if (PL_skipwhite) {
1613 if (!PL_thiswhite)
6b29d1f5 1614 PL_thiswhite = newSVpvs("");
cd81e915
NC
1615 sv_catsv(PL_thiswhite, PL_skipwhite);
1616 sv_free(PL_skipwhite);
1617 PL_skipwhite = 0;
1618 }
1619 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1620 return s;
1621}
1622
cd81e915 1623/* skip space after PL_thistoken */
29595ff2
NC
1624
1625STATIC char *
1626S_skipspace1(pTHX_ register char *s)
1627{
d4c19fe8 1628 const char *start = s;
29595ff2
NC
1629 I32 startoff = start - SvPVX(PL_linestr);
1630
7918f24d
NC
1631 PERL_ARGS_ASSERT_SKIPSPACE1;
1632
29595ff2
NC
1633 s = skipspace(s);
1634 if (!PL_madskills)
1635 return s;
1636 start = SvPVX(PL_linestr) + startoff;
cd81e915 1637 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1638 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1639 PL_thistoken = newSVpvn(tstart, start - tstart);
1640 }
1641 PL_realtokenstart = -1;
1642 if (PL_skipwhite) {
1643 if (!PL_nextwhite)
6b29d1f5 1644 PL_nextwhite = newSVpvs("");
cd81e915
NC
1645 sv_catsv(PL_nextwhite, PL_skipwhite);
1646 sv_free(PL_skipwhite);
1647 PL_skipwhite = 0;
29595ff2
NC
1648 }
1649 return s;
1650}
1651
1652STATIC char *
1653S_skipspace2(pTHX_ register char *s, SV **svp)
1654{
c35e046a
AL
1655 char *start;
1656 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1657 const I32 startoff = s - SvPVX(PL_linestr);
1658
7918f24d
NC
1659 PERL_ARGS_ASSERT_SKIPSPACE2;
1660
29595ff2
NC
1661 s = skipspace(s);
1662 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1663 if (!PL_madskills || !svp)
1664 return s;
1665 start = SvPVX(PL_linestr) + startoff;
cd81e915 1666 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1667 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1668 PL_thistoken = newSVpvn(tstart, start - tstart);
1669 PL_realtokenstart = -1;
29595ff2 1670 }
cd81e915 1671 if (PL_skipwhite) {
29595ff2 1672 if (!*svp)
6b29d1f5 1673 *svp = newSVpvs("");
cd81e915
NC
1674 sv_setsv(*svp, PL_skipwhite);
1675 sv_free(PL_skipwhite);
1676 PL_skipwhite = 0;
29595ff2
NC
1677 }
1678
1679 return s;
1680}
1681#endif
1682
80a702cd 1683STATIC void
15f169a1 1684S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1685{
1686 AV *av = CopFILEAVx(PL_curcop);
1687 if (av) {
b9f83d2f 1688 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1689 if (orig_sv)
1690 sv_setsv(sv, orig_sv);
1691 else
1692 sv_setpvn(sv, buf, len);
80a702cd
RGS
1693 (void)SvIOK_on(sv);
1694 SvIV_set(sv, 0);
1695 av_store(av, (I32)CopLINE(PL_curcop), sv);
1696 }
1697}
1698
ffb4593c
NT
1699/*
1700 * S_skipspace
1701 * Called to gobble the appropriate amount and type of whitespace.
1702 * Skips comments as well.
1703 */
1704
76e3520e 1705STATIC char *
cea2e8a9 1706S_skipspace(pTHX_ register char *s)
a687059c 1707{
5db06880 1708#ifdef PERL_MAD
f0e67a1d
Z
1709 char *start = s;
1710#endif /* PERL_MAD */
7918f24d 1711 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1712#ifdef PERL_MAD
cd81e915
NC
1713 if (PL_skipwhite) {
1714 sv_free(PL_skipwhite);
f0e67a1d 1715 PL_skipwhite = NULL;
5db06880 1716 }
f0e67a1d 1717#endif /* PERL_MAD */
3280af22 1718 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1719 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1720 s++;
f0e67a1d
Z
1721 } else {
1722 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1723 PL_bufptr = s;
f0998909
Z
1724 lex_read_space(LEX_KEEP_PREVIOUS |
1725 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1726 LEX_NO_NEXT_CHUNK : 0));
3280af22 1727 s = PL_bufptr;
f0e67a1d
Z
1728 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1729 if (PL_linestart > PL_bufptr)
1730 PL_bufptr = PL_linestart;
1731 return s;
463ee0b2 1732 }
5db06880 1733#ifdef PERL_MAD
f0e67a1d
Z
1734 if (PL_madskills)
1735 PL_skipwhite = newSVpvn(start, s-start);
1736#endif /* PERL_MAD */
5db06880 1737 return s;
a687059c 1738}
378cc40b 1739
ffb4593c
NT
1740/*
1741 * S_check_uni
1742 * Check the unary operators to ensure there's no ambiguity in how they're
1743 * used. An ambiguous piece of code would be:
1744 * rand + 5
1745 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1746 * the +5 is its argument.
1747 */
1748
76e3520e 1749STATIC void
cea2e8a9 1750S_check_uni(pTHX)
ba106d47 1751{
97aff369 1752 dVAR;
d4c19fe8
AL
1753 const char *s;
1754 const char *t;
2f3197b3 1755
3280af22 1756 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1757 return;
3280af22
NIS
1758 while (isSPACE(*PL_last_uni))
1759 PL_last_uni++;
c35e046a
AL
1760 s = PL_last_uni;
1761 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1762 s++;
3280af22 1763 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1764 return;
6136c704 1765
9b387841
NC
1766 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1767 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1768 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1769}
1770
ffb4593c
NT
1771/*
1772 * LOP : macro to build a list operator. Its behaviour has been replaced
1773 * with a subroutine, S_lop() for which LOP is just another name.
1774 */
1775
a0d0e21e
LW
1776#define LOP(f,x) return lop(f,x,s)
1777
ffb4593c
NT
1778/*
1779 * S_lop
1780 * Build a list operator (or something that might be one). The rules:
1781 * - if we have a next token, then it's a list operator [why?]
1782 * - if the next thing is an opening paren, then it's a function
1783 * - else it's a list operator
1784 */
1785
76e3520e 1786STATIC I32
a0be28da 1787S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1788{
97aff369 1789 dVAR;
7918f24d
NC
1790
1791 PERL_ARGS_ASSERT_LOP;
1792
6154021b 1793 pl_yylval.ival = f;
35c8bce7 1794 CLINE;
3280af22
NIS
1795 PL_expect = x;
1796 PL_bufptr = s;
1797 PL_last_lop = PL_oldbufptr;
eb160463 1798 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1799#ifdef PERL_MAD
1800 if (PL_lasttoke)
1801 return REPORT(LSTOP);
1802#else
3280af22 1803 if (PL_nexttoke)
bbf60fe6 1804 return REPORT(LSTOP);
5db06880 1805#endif
79072805 1806 if (*s == '(')
bbf60fe6 1807 return REPORT(FUNC);
29595ff2 1808 s = PEEKSPACE(s);
79072805 1809 if (*s == '(')
bbf60fe6 1810 return REPORT(FUNC);
79072805 1811 else
bbf60fe6 1812 return REPORT(LSTOP);
79072805
LW
1813}
1814
5db06880
NC
1815#ifdef PERL_MAD
1816 /*
1817 * S_start_force
1818 * Sets up for an eventual force_next(). start_force(0) basically does
1819 * an unshift, while start_force(-1) does a push. yylex removes items
1820 * on the "pop" end.
1821 */
1822
1823STATIC void
1824S_start_force(pTHX_ int where)
1825{
1826 int i;
1827
cd81e915 1828 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1829 where = PL_lasttoke;
cd81e915
NC
1830 assert(PL_curforce < 0 || PL_curforce == where);
1831 if (PL_curforce != where) {
5db06880
NC
1832 for (i = PL_lasttoke; i > where; --i) {
1833 PL_nexttoke[i] = PL_nexttoke[i-1];
1834 }
1835 PL_lasttoke++;
1836 }
cd81e915 1837 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1838 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1839 PL_curforce = where;
1840 if (PL_nextwhite) {
5db06880 1841 if (PL_madskills)
6b29d1f5 1842 curmad('^', newSVpvs(""));
cd81e915 1843 CURMAD('_', PL_nextwhite);
5db06880
NC
1844 }
1845}
1846
1847STATIC void
1848S_curmad(pTHX_ char slot, SV *sv)
1849{
1850 MADPROP **where;
1851
1852 if (!sv)
1853 return;
cd81e915
NC
1854 if (PL_curforce < 0)
1855 where = &PL_thismad;
5db06880 1856 else
cd81e915 1857 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1858
cd81e915 1859 if (PL_faketokens)
76f68e9b 1860 sv_setpvs(sv, "");
5db06880
NC
1861 else {
1862 if (!IN_BYTES) {
1863 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1864 SvUTF8_on(sv);
1865 else if (PL_encoding) {
1866 sv_recode_to_utf8(sv, PL_encoding);
1867 }
1868 }
1869 }
1870
1871 /* keep a slot open for the head of the list? */
1872 if (slot != '_' && *where && (*where)->mad_key == '^') {
1873 (*where)->mad_key = slot;
daba3364 1874 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1875 (*where)->mad_val = (void*)sv;
1876 }
1877 else
1878 addmad(newMADsv(slot, sv), where, 0);
1879}
1880#else
b3f24c00
MHM
1881# define start_force(where) NOOP
1882# define curmad(slot, sv) NOOP
5db06880
NC
1883#endif
1884
ffb4593c
NT
1885/*
1886 * S_force_next
9cbb5ea2 1887 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1888 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1889 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1890 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1891 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1892 */
1893
4e553d73 1894STATIC void
cea2e8a9 1895S_force_next(pTHX_ I32 type)
79072805 1896{
97aff369 1897 dVAR;
704d4215
GG
1898#ifdef DEBUGGING
1899 if (DEBUG_T_TEST) {
1900 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1901 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1902 }
1903#endif
5db06880 1904#ifdef PERL_MAD
cd81e915 1905 if (PL_curforce < 0)
5db06880 1906 start_force(PL_lasttoke);
cd81e915 1907 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1908 if (PL_lex_state != LEX_KNOWNEXT)
1909 PL_lex_defer = PL_lex_state;
1910 PL_lex_state = LEX_KNOWNEXT;
1911 PL_lex_expect = PL_expect;
cd81e915 1912 PL_curforce = -1;
5db06880 1913#else
3280af22
NIS
1914 PL_nexttype[PL_nexttoke] = type;
1915 PL_nexttoke++;
1916 if (PL_lex_state != LEX_KNOWNEXT) {
1917 PL_lex_defer = PL_lex_state;
1918 PL_lex_expect = PL_expect;
1919 PL_lex_state = LEX_KNOWNEXT;
79072805 1920 }
5db06880 1921#endif
79072805
LW
1922}
1923
28ac2b49
Z
1924void
1925Perl_yyunlex(pTHX)
1926{
1927 if (PL_parser->yychar != YYEMPTY) {
1928 start_force(-1);
1929 NEXTVAL_NEXTTOKE = PL_parser->yylval;
1930 force_next(PL_parser->yychar);
1931 PL_parser->yychar = YYEMPTY;
1932 }
1933}
1934
d0a148a6 1935STATIC SV *
15f169a1 1936S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1937{
97aff369 1938 dVAR;
740cce10 1939 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1940 !IN_BYTES
1941 && UTF
1942 && !is_ascii_string((const U8*)start, len)
740cce10 1943 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1944 return sv;
1945}
1946
ffb4593c
NT
1947/*
1948 * S_force_word
1949 * When the lexer knows the next thing is a word (for instance, it has
1950 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1951 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1952 * lookahead.
ffb4593c
NT
1953 *
1954 * Arguments:
b1b65b59 1955 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1956 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1957 * int check_keyword : if true, Perl checks to make sure the word isn't
1958 * a keyword (do this if the word is a label, e.g. goto FOO)
1959 * int allow_pack : if true, : characters will also be allowed (require,
1960 * use, etc. do this)
9cbb5ea2 1961 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1962 */
1963
76e3520e 1964STATIC char *
cea2e8a9 1965S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1966{
97aff369 1967 dVAR;
463ee0b2
LW
1968 register char *s;
1969 STRLEN len;
4e553d73 1970
7918f24d
NC
1971 PERL_ARGS_ASSERT_FORCE_WORD;
1972
29595ff2 1973 start = SKIPSPACE1(start);
463ee0b2 1974 s = start;
7e2040f0 1975 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1976 (allow_pack && *s == ':') ||
15f0808c 1977 (allow_initial_tick && *s == '\'') )
a0d0e21e 1978 {
3280af22 1979 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1980 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1981 return start;
cd81e915 1982 start_force(PL_curforce);
5db06880
NC
1983 if (PL_madskills)
1984 curmad('X', newSVpvn(start,s-start));
463ee0b2 1985 if (token == METHOD) {
29595ff2 1986 s = SKIPSPACE1(s);
463ee0b2 1987 if (*s == '(')
3280af22 1988 PL_expect = XTERM;
463ee0b2 1989 else {
3280af22 1990 PL_expect = XOPERATOR;
463ee0b2 1991 }
79072805 1992 }
e74e6b3d 1993 if (PL_madskills)
63575281 1994 curmad('g', newSVpvs( "forced" ));
9ded7720 1995 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1996 = (OP*)newSVOP(OP_CONST,0,
1997 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1998 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1999 force_next(token);
2000 }
2001 return s;
2002}
2003
ffb4593c
NT
2004/*
2005 * S_force_ident
9cbb5ea2 2006 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2007 * text only contains the "foo" portion. The first argument is a pointer
2008 * to the "foo", and the second argument is the type symbol to prefix.
2009 * Forces the next token to be a "WORD".
9cbb5ea2 2010 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2011 */
2012
76e3520e 2013STATIC void
bfed75c6 2014S_force_ident(pTHX_ register const char *s, int kind)
79072805 2015{
97aff369 2016 dVAR;
7918f24d
NC
2017
2018 PERL_ARGS_ASSERT_FORCE_IDENT;
2019
c35e046a 2020 if (*s) {
90e5519e
NC
2021 const STRLEN len = strlen(s);
2022 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2023 start_force(PL_curforce);
9ded7720 2024 NEXTVAL_NEXTTOKE.opval = o;
79072805 2025 force_next(WORD);
748a9306 2026 if (kind) {
11343788 2027 o->op_private = OPpCONST_ENTERED;
55497cff 2028 /* XXX see note in pp_entereval() for why we forgo typo
2029 warnings if the symbol must be introduced in an eval.
2030 GSAR 96-10-12 */
90e5519e
NC
2031 gv_fetchpvn_flags(s, len,
2032 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2033 : GV_ADD,
2034 kind == '$' ? SVt_PV :
2035 kind == '@' ? SVt_PVAV :
2036 kind == '%' ? SVt_PVHV :
a0d0e21e 2037 SVt_PVGV
90e5519e 2038 );
748a9306 2039 }
79072805
LW
2040 }
2041}
2042
1571675a
GS
2043NV
2044Perl_str_to_version(pTHX_ SV *sv)
2045{
2046 NV retval = 0.0;
2047 NV nshift = 1.0;
2048 STRLEN len;
cfd0369c 2049 const char *start = SvPV_const(sv,len);
9d4ba2ae 2050 const char * const end = start + len;
504618e9 2051 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2052
2053 PERL_ARGS_ASSERT_STR_TO_VERSION;
2054
1571675a 2055 while (start < end) {
ba210ebe 2056 STRLEN skip;
1571675a
GS
2057 UV n;
2058 if (utf)
9041c2e3 2059 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2060 else {
2061 n = *(U8*)start;
2062 skip = 1;
2063 }
2064 retval += ((NV)n)/nshift;
2065 start += skip;
2066 nshift *= 1000;
2067 }
2068 return retval;
2069}
2070
4e553d73 2071/*
ffb4593c
NT
2072 * S_force_version
2073 * Forces the next token to be a version number.
e759cc13
RGS
2074 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2075 * and if "guessing" is TRUE, then no new token is created (and the caller
2076 * must use an alternative parsing method).
ffb4593c
NT
2077 */
2078
76e3520e 2079STATIC char *
e759cc13 2080S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2081{
97aff369 2082 dVAR;
5f66b61c 2083 OP *version = NULL;
44dcb63b 2084 char *d;
5db06880
NC
2085#ifdef PERL_MAD
2086 I32 startoff = s - SvPVX(PL_linestr);
2087#endif
89bfa8cd 2088
7918f24d
NC
2089 PERL_ARGS_ASSERT_FORCE_VERSION;
2090
29595ff2 2091 s = SKIPSPACE1(s);
89bfa8cd 2092
44dcb63b 2093 d = s;
dd629d5b 2094 if (*d == 'v')
44dcb63b 2095 d++;
44dcb63b 2096 if (isDIGIT(*d)) {
e759cc13
RGS
2097 while (isDIGIT(*d) || *d == '_' || *d == '.')
2098 d++;
5db06880
NC
2099#ifdef PERL_MAD
2100 if (PL_madskills) {
cd81e915 2101 start_force(PL_curforce);
5db06880
NC
2102 curmad('X', newSVpvn(s,d-s));
2103 }
2104#endif
4e4da3ac 2105 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2106 SV *ver;
8d08d9ba
DG
2107#ifdef USE_LOCALE_NUMERIC
2108 char *loc = setlocale(LC_NUMERIC, "C");
2109#endif
6154021b 2110 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2111#ifdef USE_LOCALE_NUMERIC
2112 setlocale(LC_NUMERIC, loc);
2113#endif
6154021b 2114 version = pl_yylval.opval;
dd629d5b
GS
2115 ver = cSVOPx(version)->op_sv;
2116 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2117 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2118 SvNV_set(ver, str_to_version(ver));
1571675a 2119 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2120 }
89bfa8cd 2121 }
5db06880
NC
2122 else if (guessing) {
2123#ifdef PERL_MAD
2124 if (PL_madskills) {
cd81e915
NC
2125 sv_free(PL_nextwhite); /* let next token collect whitespace */
2126 PL_nextwhite = 0;
5db06880
NC
2127 s = SvPVX(PL_linestr) + startoff;
2128 }
2129#endif
e759cc13 2130 return s;
5db06880 2131 }
89bfa8cd 2132 }
2133
5db06880
NC
2134#ifdef PERL_MAD
2135 if (PL_madskills && !version) {
cd81e915
NC
2136 sv_free(PL_nextwhite); /* let next token collect whitespace */
2137 PL_nextwhite = 0;
5db06880
NC
2138 s = SvPVX(PL_linestr) + startoff;
2139 }
2140#endif
89bfa8cd 2141 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2142 start_force(PL_curforce);
9ded7720 2143 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2144 force_next(WORD);
89bfa8cd 2145
e759cc13 2146 return s;
89bfa8cd 2147}
2148
ffb4593c 2149/*
91152fc1
DG
2150 * S_force_strict_version
2151 * Forces the next token to be a version number using strict syntax rules.
2152 */
2153
2154STATIC char *
2155S_force_strict_version(pTHX_ char *s)
2156{
2157 dVAR;
2158 OP *version = NULL;
2159#ifdef PERL_MAD
2160 I32 startoff = s - SvPVX(PL_linestr);
2161#endif
2162 const char *errstr = NULL;
2163
2164 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2165
2166 while (isSPACE(*s)) /* leading whitespace */
2167 s++;
2168
2169 if (is_STRICT_VERSION(s,&errstr)) {
2170 SV *ver = newSV(0);
2171 s = (char *)scan_version(s, ver, 0);
2172 version = newSVOP(OP_CONST, 0, ver);
2173 }
4e4da3ac
Z
2174 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2175 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2176 {
91152fc1
DG
2177 PL_bufptr = s;
2178 if (errstr)
2179 yyerror(errstr); /* version required */
2180 return s;
2181 }
2182
2183#ifdef PERL_MAD
2184 if (PL_madskills && !version) {
2185 sv_free(PL_nextwhite); /* let next token collect whitespace */
2186 PL_nextwhite = 0;
2187 s = SvPVX(PL_linestr) + startoff;
2188 }
2189#endif
2190 /* NOTE: The parser sees the package name and the VERSION swapped */
2191 start_force(PL_curforce);
2192 NEXTVAL_NEXTTOKE.opval = version;
2193 force_next(WORD);
2194
2195 return s;
2196}
2197
2198/*
ffb4593c
NT
2199 * S_tokeq
2200 * Tokenize a quoted string passed in as an SV. It finds the next
2201 * chunk, up to end of string or a backslash. It may make a new
2202 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2203 * turns \\ into \.
2204 */
2205
76e3520e 2206STATIC SV *
cea2e8a9 2207S_tokeq(pTHX_ SV *sv)
79072805 2208{
97aff369 2209 dVAR;
79072805
LW
2210 register char *s;
2211 register char *send;
2212 register char *d;
b3ac6de7
IZ
2213 STRLEN len = 0;
2214 SV *pv = sv;
79072805 2215
7918f24d
NC
2216 PERL_ARGS_ASSERT_TOKEQ;
2217
79072805 2218 if (!SvLEN(sv))
b3ac6de7 2219 goto finish;
79072805 2220
a0d0e21e 2221 s = SvPV_force(sv, len);
21a311ee 2222 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2223 goto finish;
463ee0b2 2224 send = s + len;
79072805
LW
2225 while (s < send && *s != '\\')
2226 s++;
2227 if (s == send)
b3ac6de7 2228 goto finish;
79072805 2229 d = s;
be4731d2 2230 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2231 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2232 }
79072805
LW
2233 while (s < send) {
2234 if (*s == '\\') {
a0d0e21e 2235 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2236 s++; /* all that, just for this */
2237 }
2238 *d++ = *s++;
2239 }
2240 *d = '\0';
95a20fc0 2241 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2242 finish:
3280af22 2243 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2244 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2245 return sv;
2246}
2247
ffb4593c
NT
2248/*
2249 * Now come three functions related to double-quote context,
2250 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2251 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2252 * interact with PL_lex_state, and create fake ( ... ) argument lists
2253 * to handle functions and concatenation.
2254 * They assume that whoever calls them will be setting up a fake
2255 * join call, because each subthing puts a ',' after it. This lets
2256 * "lower \luPpEr"
2257 * become
2258 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2259 *
2260 * (I'm not sure whether the spurious commas at the end of lcfirst's
2261 * arguments and join's arguments are created or not).
2262 */
2263
2264/*
2265 * S_sublex_start
6154021b 2266 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2267 *
2268 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2269 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2270 *
2271 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2272 *
2273 * Everything else becomes a FUNC.
2274 *
2275 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2276 * had an OP_CONST or OP_READLINE). This just sets us up for a
2277 * call to S_sublex_push().
2278 */
2279
76e3520e 2280STATIC I32
cea2e8a9 2281S_sublex_start(pTHX)
79072805 2282{
97aff369 2283 dVAR;
6154021b 2284 register const I32 op_type = pl_yylval.ival;
79072805
LW
2285
2286 if (op_type == OP_NULL) {
6154021b 2287 pl_yylval.opval = PL_lex_op;
5f66b61c 2288 PL_lex_op = NULL;
79072805
LW
2289 return THING;
2290 }
2291 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2292 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2293
2294 if (SvTYPE(sv) == SVt_PVIV) {
2295 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2296 STRLEN len;
96a5add6 2297 const char * const p = SvPV_const(sv, len);
740cce10 2298 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2299 SvREFCNT_dec(sv);
2300 sv = nsv;
4e553d73 2301 }
6154021b 2302 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2303 PL_lex_stuff = NULL;
6f33ba73
RGS
2304 /* Allow <FH> // "foo" */
2305 if (op_type == OP_READLINE)
2306 PL_expect = XTERMORDORDOR;
79072805
LW
2307 return THING;
2308 }
e3f73d4e
RGS
2309 else if (op_type == OP_BACKTICK && PL_lex_op) {
2310 /* readpipe() vas overriden */
2311 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2312 pl_yylval.opval = PL_lex_op;
9b201d7d 2313 PL_lex_op = NULL;
e3f73d4e
RGS
2314 PL_lex_stuff = NULL;
2315 return THING;
2316 }
79072805 2317
3280af22 2318 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2319 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2320 PL_sublex_info.sub_op = PL_lex_op;
2321 PL_lex_state = LEX_INTERPPUSH;
55497cff 2322
3280af22
NIS
2323 PL_expect = XTERM;
2324 if (PL_lex_op) {
6154021b 2325 pl_yylval.opval = PL_lex_op;
5f66b61c 2326 PL_lex_op = NULL;
55497cff 2327 return PMFUNC;
2328 }
2329 else
2330 return FUNC;
2331}
2332
ffb4593c
NT
2333/*
2334 * S_sublex_push
2335 * Create a new scope to save the lexing state. The scope will be
2336 * ended in S_sublex_done. Returns a '(', starting the function arguments
2337 * to the uc, lc, etc. found before.
2338 * Sets PL_lex_state to LEX_INTERPCONCAT.
2339 */
2340
76e3520e 2341STATIC I32
cea2e8a9 2342S_sublex_push(pTHX)
55497cff 2343{
27da23d5 2344 dVAR;
f46d017c 2345 ENTER;
55497cff 2346
3280af22 2347 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2348 SAVEBOOL(PL_lex_dojoin);
3280af22 2349 SAVEI32(PL_lex_brackets);
3280af22
NIS
2350 SAVEI32(PL_lex_casemods);
2351 SAVEI32(PL_lex_starts);
651b5b28 2352 SAVEI8(PL_lex_state);
7766f137 2353 SAVEVPTR(PL_lex_inpat);
98246f1e 2354 SAVEI16(PL_lex_inwhat);
57843af0 2355 SAVECOPLINE(PL_curcop);
3280af22 2356 SAVEPPTR(PL_bufptr);
8452ff4b 2357 SAVEPPTR(PL_bufend);
3280af22
NIS
2358 SAVEPPTR(PL_oldbufptr);
2359 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2360 SAVEPPTR(PL_last_lop);
2361 SAVEPPTR(PL_last_uni);
3280af22
NIS
2362 SAVEPPTR(PL_linestart);
2363 SAVESPTR(PL_linestr);
8edd5f42
RGS
2364 SAVEGENERICPV(PL_lex_brackstack);
2365 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2366
2367 PL_linestr = PL_lex_stuff;
a0714e2c 2368 PL_lex_stuff = NULL;
3280af22 2369
9cbb5ea2
GS
2370 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2371 = SvPVX(PL_linestr);
3280af22 2372 PL_bufend += SvCUR(PL_linestr);
bd61b366 2373 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2374 SAVEFREESV(PL_linestr);
2375
2376 PL_lex_dojoin = FALSE;
2377 PL_lex_brackets = 0;
a02a5408
JC
2378 Newx(PL_lex_brackstack, 120, char);
2379 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2380 PL_lex_casemods = 0;
2381 *PL_lex_casestack = '\0';
2382 PL_lex_starts = 0;
2383 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2384 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2385
2386 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2387 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2388 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2389 else
5f66b61c 2390 PL_lex_inpat = NULL;
79072805 2391
55497cff 2392 return '(';
79072805
LW
2393}
2394
ffb4593c
NT
2395/*
2396 * S_sublex_done
2397 * Restores lexer state after a S_sublex_push.
2398 */
2399
76e3520e 2400STATIC I32
cea2e8a9 2401S_sublex_done(pTHX)
79072805 2402{
27da23d5 2403 dVAR;
3280af22 2404 if (!PL_lex_starts++) {
396482e1 2405 SV * const sv = newSVpvs("");
9aa983d2
JH
2406 if (SvUTF8(PL_linestr))
2407 SvUTF8_on(sv);
3280af22 2408 PL_expect = XOPERATOR;
6154021b 2409 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2410 return THING;
2411 }
2412
3280af22
NIS
2413 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2414 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2415 return yylex();
79072805
LW
2416 }
2417
ffb4593c 2418 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2419 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2420 PL_linestr = PL_lex_repl;
2421 PL_lex_inpat = 0;
2422 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2423 PL_bufend += SvCUR(PL_linestr);
bd61b366 2424 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2425 SAVEFREESV(PL_linestr);
2426 PL_lex_dojoin = FALSE;
2427 PL_lex_brackets = 0;
3280af22
NIS
2428 PL_lex_casemods = 0;
2429 *PL_lex_casestack = '\0';
2430 PL_lex_starts = 0;
25da4f38 2431 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2432 PL_lex_state = LEX_INTERPNORMAL;
2433 PL_lex_starts++;
e9fa98b2
HS
2434 /* we don't clear PL_lex_repl here, so that we can check later
2435 whether this is an evalled subst; that means we rely on the
2436 logic to ensure sublex_done() is called again only via the
2437 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2438 }
e9fa98b2 2439 else {
3280af22 2440 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2441 PL_lex_repl = NULL;
e9fa98b2 2442 }
79072805 2443 return ',';
ffed7fef
LW
2444 }
2445 else {
5db06880
NC
2446#ifdef PERL_MAD
2447 if (PL_madskills) {
cd81e915
NC
2448 if (PL_thiswhite) {
2449 if (!PL_endwhite)
6b29d1f5 2450 PL_endwhite = newSVpvs("");
cd81e915
NC
2451 sv_catsv(PL_endwhite, PL_thiswhite);
2452 PL_thiswhite = 0;
2453 }
2454 if (PL_thistoken)
76f68e9b 2455 sv_setpvs(PL_thistoken,"");
5db06880 2456 else
cd81e915 2457 PL_realtokenstart = -1;
5db06880
NC
2458 }
2459#endif
f46d017c 2460 LEAVE;
3280af22
NIS
2461 PL_bufend = SvPVX(PL_linestr);
2462 PL_bufend += SvCUR(PL_linestr);
2463 PL_expect = XOPERATOR;
09bef843 2464 PL_sublex_info.sub_inwhat = 0;
79072805 2465 return ')';
ffed7fef
LW
2466 }
2467}
2468
02aa26ce
NT
2469/*
2470 scan_const
2471
2472 Extracts a pattern, double-quoted string, or transliteration. This
2473 is terrifying code.
2474
94def140 2475 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2476 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2477 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2478
94def140
TS
2479 Returns a pointer to the character scanned up to. If this is
2480 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2481 successfully parsed), will leave an OP for the substring scanned
6154021b 2482 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2483 by looking at the next characters herself.
2484
02aa26ce
NT
2485 In patterns:
2486 backslashes:
ff3f963a 2487 constants: \N{NAME} only
02aa26ce
NT
2488 case and quoting: \U \Q \E
2489 stops on @ and $, but not for $ as tail anchor
2490
2491 In transliterations:
2492 characters are VERY literal, except for - not at the start or end
94def140
TS
2493 of the string, which indicates a range. If the range is in bytes,
2494 scan_const expands the range to the full set of intermediate
2495 characters. If the range is in utf8, the hyphen is replaced with
2496 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2497
2498 In double-quoted strings:
2499 backslashes:
2500 double-quoted style: \r and \n
ff3f963a 2501 constants: \x31, etc.
94def140 2502 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2503 case and quoting: \U \Q \E
2504 stops on @ and $
2505
2506 scan_const does *not* construct ops to handle interpolated strings.
2507 It stops processing as soon as it finds an embedded $ or @ variable
2508 and leaves it to the caller to work out what's going on.
2509
94def140
TS
2510 embedded arrays (whether in pattern or not) could be:
2511 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2512
2513 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2514
2515 $ in pattern could be $foo or could be tail anchor. Assumption:
2516 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2517 followed by one of "()| \r\n\t"
02aa26ce
NT
2518
2519 \1 (backreferences) are turned into $1
2520
2521 The structure of the code is
2522 while (there's a character to process) {
94def140
TS
2523 handle transliteration ranges
2524 skip regexp comments /(?#comment)/ and codes /(?{code})/
2525 skip #-initiated comments in //x patterns
2526 check for embedded arrays
02aa26ce
NT
2527 check for embedded scalars
2528 if (backslash) {
94def140 2529 deprecate \1 in substitution replacements
02aa26ce
NT
2530 handle string-changing backslashes \l \U \Q \E, etc.
2531 switch (what was escaped) {
94def140 2532 handle \- in a transliteration (becomes a literal -)
ff3f963a 2533 if a pattern and not \N{, go treat as regular character
94def140
TS
2534 handle \132 (octal characters)
2535 handle \x15 and \x{1234} (hex characters)
ff3f963a 2536 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2537 handle \cV (control characters)
2538 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2539 } (end switch)
77a135fe 2540 continue
02aa26ce 2541 } (end if backslash)
77a135fe 2542 handle regular character
02aa26ce 2543 } (end while character to read)
4e553d73 2544
02aa26ce
NT
2545*/
2546
76e3520e 2547STATIC char *
cea2e8a9 2548S_scan_const(pTHX_ char *start)
79072805 2549{
97aff369 2550 dVAR;
3280af22 2551 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2552 SV *sv = newSV(send - start); /* sv for the constant. See
2553 note below on sizing. */
02aa26ce
NT
2554 register char *s = start; /* start of the constant */
2555 register char *d = SvPVX(sv); /* destination for copies */
2556 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2557 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2558 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2559 I32 this_utf8 = UTF; /* Is the source string assumed
2560 to be UTF8? But, this can
2561 show as true when the source
2562 isn't utf8, as for example
2563 when it is entirely composed
2564 of hex constants */
2565
2566 /* Note on sizing: The scanned constant is placed into sv, which is
2567 * initialized by newSV() assuming one byte of output for every byte of
2568 * input. This routine expects newSV() to allocate an extra byte for a
2569 * trailing NUL, which this routine will append if it gets to the end of
2570 * the input. There may be more bytes of input than output (eg., \N{LATIN
2571 * CAPITAL LETTER A}), or more output than input if the constant ends up
2572 * recoded to utf8, but each time a construct is found that might increase
2573 * the needed size, SvGROW() is called. Its size parameter each time is
2574 * based on the best guess estimate at the time, namely the length used so
2575 * far, plus the length the current construct will occupy, plus room for
2576 * the trailing NUL, plus one byte for every input byte still unscanned */
2577
012bcf8d 2578 UV uv;
4c3a8340
TS
2579#ifdef EBCDIC
2580 UV literal_endpoint = 0;
e294cc5d 2581 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2582#endif
012bcf8d 2583
7918f24d
NC
2584 PERL_ARGS_ASSERT_SCAN_CONST;
2585
2b9d42f0
NIS
2586 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2587 /* If we are doing a trans and we know we want UTF8 set expectation */
2588 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2589 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2590 }
2591
2592
79072805 2593 while (s < send || dorange) {
ff3f963a 2594
02aa26ce 2595 /* get transliterations out of the way (they're most literal) */
3280af22 2596 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2597 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2598 if (dorange) {
1ba5c669
JH
2599 I32 i; /* current expanded character */
2600 I32 min; /* first character in range */
2601 I32 max; /* last character in range */
02aa26ce 2602
e294cc5d
JH
2603#ifdef EBCDIC
2604 UV uvmax = 0;
2605#endif
2606
2607 if (has_utf8
2608#ifdef EBCDIC
2609 && !native_range
2610#endif
2611 ) {
9d4ba2ae 2612 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2613 char *e = d++;
2614 while (e-- > c)
2615 *(e + 1) = *e;
25716404 2616 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2617 /* mark the range as done, and continue */
2618 dorange = FALSE;
2619 didrange = TRUE;
2620 continue;
2621 }
2b9d42f0 2622
95a20fc0 2623 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2624#ifdef EBCDIC
2625 SvGROW(sv,
2626 SvLEN(sv) + (has_utf8 ?
2627 (512 - UTF_CONTINUATION_MARK +
2628 UNISKIP(0x100))
2629 : 256));
2630 /* How many two-byte within 0..255: 128 in UTF-8,
2631 * 96 in UTF-8-mod. */
2632#else
9cbb5ea2 2633 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2634#endif
9cbb5ea2 2635 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2636#ifdef EBCDIC
2637 if (has_utf8) {
2638 int j;
2639 for (j = 0; j <= 1; j++) {
2640 char * const c = (char*)utf8_hop((U8*)d, -1);
2641 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2642 if (j)
2643 min = (U8)uv;
2644 else if (uv < 256)
2645 max = (U8)uv;
2646 else {
2647 max = (U8)0xff; /* only to \xff */
2648 uvmax = uv; /* \x{100} to uvmax */
2649 }
2650 d = c; /* eat endpoint chars */
2651 }
2652 }
2653 else {
2654#endif
2655 d -= 2; /* eat the first char and the - */
2656 min = (U8)*d; /* first char in range */
2657 max = (U8)d[1]; /* last char in range */
2658#ifdef EBCDIC
2659 }
2660#endif
8ada0baa 2661
c2e66d9e 2662 if (min > max) {
01ec43d0 2663 Perl_croak(aTHX_
d1573ac7 2664 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2665 (char)min, (char)max);
c2e66d9e
GS
2666 }
2667
c7f1f016 2668#ifdef EBCDIC
4c3a8340
TS
2669 if (literal_endpoint == 2 &&
2670 ((isLOWER(min) && isLOWER(max)) ||
2671 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2672 if (isLOWER(min)) {
2673 for (i = min; i <= max; i++)
2674 if (isLOWER(i))
db42d148 2675 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2676 } else {
2677 for (i = min; i <= max; i++)
2678 if (isUPPER(i))
db42d148 2679 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2680 }
2681 }
2682 else
2683#endif
2684 for (i = min; i <= max; i++)
e294cc5d
JH
2685#ifdef EBCDIC
2686 if (has_utf8) {
2687 const U8 ch = (U8)NATIVE_TO_UTF(i);
2688 if (UNI_IS_INVARIANT(ch))
2689 *d++ = (U8)i;
2690 else {
2691 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2692 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2693 }
2694 }
2695 else
2696#endif
2697 *d++ = (char)i;
2698
2699#ifdef EBCDIC
2700 if (uvmax) {
2701 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2702 if (uvmax > 0x101)
2703 *d++ = (char)UTF_TO_NATIVE(0xff);
2704 if (uvmax > 0x100)
2705 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2706 }
2707#endif
02aa26ce
NT
2708
2709 /* mark the range as done, and continue */
79072805 2710 dorange = FALSE;
01ec43d0 2711 didrange = TRUE;
4c3a8340
TS
2712#ifdef EBCDIC
2713 literal_endpoint = 0;
2714#endif
79072805 2715 continue;
4e553d73 2716 }
02aa26ce
NT
2717
2718 /* range begins (ignore - as first or last char) */
79072805 2719 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2720 if (didrange) {
1fafa243 2721 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2722 }
e294cc5d
JH
2723 if (has_utf8
2724#ifdef EBCDIC
2725 && !native_range
2726#endif
2727 ) {
25716404 2728 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2729 s++;
2730 continue;
2731 }
79072805
LW
2732 dorange = TRUE;
2733 s++;
01ec43d0
GS
2734 }
2735 else {
2736 didrange = FALSE;
4c3a8340
TS
2737#ifdef EBCDIC
2738 literal_endpoint = 0;
e294cc5d 2739 native_range = TRUE;
4c3a8340 2740#endif
01ec43d0 2741 }
79072805 2742 }
02aa26ce
NT
2743
2744 /* if we get here, we're not doing a transliteration */
2745
0f5d15d6
IZ
2746 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2747 except for the last char, which will be done separately. */
3280af22 2748 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2749 if (s[2] == '#') {
e994fd66 2750 while (s+1 < send && *s != ')')
db42d148 2751 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2752 }
2753 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2754 || (s[2] == '?' && s[3] == '{'))
155aba94 2755 {
cc6b7395 2756 I32 count = 1;
0f5d15d6 2757 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2758 char c;
2759
d9f97599
GS
2760 while (count && (c = *regparse)) {
2761 if (c == '\\' && regparse[1])
2762 regparse++;
4e553d73 2763 else if (c == '{')
cc6b7395 2764 count++;
4e553d73 2765 else if (c == '}')
cc6b7395 2766 count--;
d9f97599 2767 regparse++;
cc6b7395 2768 }
e994fd66 2769 if (*regparse != ')')
5bdf89e7 2770 regparse--; /* Leave one char for continuation. */
0f5d15d6 2771 while (s < regparse)
db42d148 2772 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2773 }
748a9306 2774 }
02aa26ce
NT
2775
2776 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2777 else if (*s == '#' && PL_lex_inpat &&
2778 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2779 while (s+1 < send && *s != '\n')
db42d148 2780 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2781 }
02aa26ce 2782
5d1d4326 2783 /* check for embedded arrays
da6eedaa 2784 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2785 */
1749ea0d
TS
2786 else if (*s == '@' && s[1]) {
2787 if (isALNUM_lazy_if(s+1,UTF))
2788 break;
2789 if (strchr(":'{$", s[1]))
2790 break;
2791 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2792 break; /* in regexp, neither @+ nor @- are interpolated */
2793 }
02aa26ce
NT
2794
2795 /* check for embedded scalars. only stop if we're sure it's a
2796 variable.
2797 */
79072805 2798 else if (*s == '$') {
3280af22 2799 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2800 break;
77772344 2801 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2802 if (s[1] == '\\') {
2803 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2804 "Possible unintended interpolation of $\\ in regex");
77772344 2805 }
79072805 2806 break; /* in regexp, $ might be tail anchor */
77772344 2807 }
79072805 2808 }
02aa26ce 2809
2b9d42f0
NIS
2810 /* End of else if chain - OP_TRANS rejoin rest */
2811
02aa26ce 2812 /* backslashes */
79072805 2813 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2814 char* e; /* Can be used for ending '}', etc. */
2815
79072805 2816 s++;
02aa26ce 2817
7d0fc23c
KW
2818 /* warn on \1 - \9 in substitution replacements, but note that \11
2819 * is an octal; and \19 is \1 followed by '9' */
3280af22 2820 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2821 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2822 {
a2a5de95 2823 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2824 *--s = '$';
2825 break;
2826 }
02aa26ce
NT
2827
2828 /* string-change backslash escapes */
3280af22 2829 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2830 --s;
2831 break;
2832 }
ff3f963a
KW
2833 /* In a pattern, process \N, but skip any other backslash escapes.
2834 * This is because we don't want to translate an escape sequence
2835 * into a meta symbol and have the regex compiler use the meta
2836 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2837 * in spite of this, we do have to process \N here while the proper
2838 * charnames handler is in scope. See bugs #56444 and #62056.
2839 * There is a complication because \N in a pattern may also stand
2840 * for 'match a non-nl', and not mean a charname, in which case its
2841 * processing should be deferred to the regex compiler. To be a
2842 * charname it must be followed immediately by a '{', and not look
2843 * like \N followed by a curly quantifier, i.e., not something like
2844 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2845 * quantifier */
2846 else if (PL_lex_inpat
2847 && (*s != 'N'
2848 || s[1] != '{'
2849 || regcurly(s + 1)))
2850 {
cc74c5bd
TS
2851 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2852 goto default_action;
2853 }
02aa26ce 2854
79072805 2855 switch (*s) {
02aa26ce
NT
2856
2857 /* quoted - in transliterations */
79072805 2858 case '-':
3280af22 2859 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2860 *d++ = *s++;
2861 continue;
2862 }
2863 /* FALL THROUGH */
2864 default:
11b8faa4 2865 {
a2a5de95
NC
2866 if ((isALPHA(*s) || isDIGIT(*s)))
2867 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2868 "Unrecognized escape \\%c passed through",
2869 *s);
11b8faa4 2870 /* default action is to copy the quoted character */
f9a63242 2871 goto default_action;
11b8faa4 2872 }
02aa26ce 2873
632403cc 2874 /* eg. \132 indicates the octal constant 0132 */
79072805
LW
2875 case '0': case '1': case '2': case '3':
2876 case '4': case '5': case '6': case '7':
ba210ebe 2877 {
53305cf1
NC
2878 I32 flags = 0;
2879 STRLEN len = 3;
77a135fe 2880 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2881 s += len;
2882 }
012bcf8d 2883 goto NUM_ESCAPE_INSERT;
02aa26ce 2884
f0a2b745
KW
2885 /* eg. \o{24} indicates the octal constant \024 */
2886 case 'o':
2887 {
2888 STRLEN len;
454155d9 2889 const char* error;
f0a2b745 2890
454155d9 2891 bool valid = grok_bslash_o(s, &uv, &len, &error, 1);
f0a2b745 2892 s += len;
454155d9 2893 if (! valid) {
f0a2b745
KW
2894 yyerror(error);
2895 continue;
2896 }
2897 goto NUM_ESCAPE_INSERT;
2898 }
2899
77a135fe 2900 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2901 case 'x':
a0ed51b3
LW
2902 ++s;
2903 if (*s == '{') {
9d4ba2ae 2904 char* const e = strchr(s, '}');
a4c04bdc
NC
2905 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2906 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2907 STRLEN len;
355860ce 2908
53305cf1 2909 ++s;
adaeee49 2910 if (!e) {
a0ed51b3 2911 yyerror("Missing right brace on \\x{}");
355860ce 2912 continue;
ba210ebe 2913 }
53305cf1 2914 len = e - s;
77a135fe 2915 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2916 s = e + 1;
a0ed51b3
LW
2917 }
2918 else {
ba210ebe 2919 {
53305cf1 2920 STRLEN len = 2;
a4c04bdc 2921 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2922 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2923 s += len;
2924 }
012bcf8d
GS
2925 }
2926
2927 NUM_ESCAPE_INSERT:
ff3f963a
KW
2928 /* Insert oct or hex escaped character. There will always be
2929 * enough room in sv since such escapes will be longer than any
2930 * UTF-8 sequence they can end up as, except if they force us
2931 * to recode the rest of the string into utf8 */
ba7cea30 2932
77a135fe 2933 /* Here uv is the ordinal of the next character being added in
ff3f963a 2934 * unicode (converted from native). */
77a135fe 2935 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2936 if (!has_utf8 && uv > 255) {
77a135fe
KW
2937 /* Might need to recode whatever we have accumulated so
2938 * far if it contains any chars variant in utf8 or
2939 * utf-ebcdic. */
2940
2941 SvCUR_set(sv, d - SvPVX_const(sv));
2942 SvPOK_on(sv);
2943 *d = '\0';
77a135fe 2944 /* See Note on sizing above. */
7bf79863
KW
2945 sv_utf8_upgrade_flags_grow(sv,
2946 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2947 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2948 d = SvPVX(sv) + SvCUR(sv);
2949 has_utf8 = TRUE;
012bcf8d
GS
2950 }
2951
77a135fe
KW
2952 if (has_utf8) {
2953 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2954 if (PL_lex_inwhat == OP_TRANS &&
2955 PL_sublex_info.sub_op) {
2956 PL_sublex_info.sub_op->op_private |=
2957 (PL_lex_repl ? OPpTRANS_FROM_UTF
2958 : OPpTRANS_TO_UTF);
f9a63242 2959 }
e294cc5d
JH
2960#ifdef EBCDIC
2961 if (uv > 255 && !dorange)
2962 native_range = FALSE;
2963#endif
012bcf8d 2964 }
a0ed51b3 2965 else {
012bcf8d 2966 *d++ = (char)uv;
a0ed51b3 2967 }
012bcf8d
GS
2968 }
2969 else {
c4d5f83a 2970 *d++ = (char) uv;
a0ed51b3 2971 }
79072805 2972 continue;
02aa26ce 2973
4a2d328f 2974 case 'N':
ff3f963a
KW
2975 /* In a non-pattern \N must be a named character, like \N{LATIN
2976 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2977 * mean to match a non-newline. For non-patterns, named
2978 * characters are converted to their string equivalents. In
2979 * patterns, named characters are not converted to their
2980 * ultimate forms for the same reasons that other escapes
2981 * aren't. Instead, they are converted to the \N{U+...} form
2982 * to get the value from the charnames that is in effect right
2983 * now, while preserving the fact that it was a named character
2984 * so that the regex compiler knows this */
2985
2986 /* This section of code doesn't generally use the
2987 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2988 * a close examination of this macro and determined it is a
2989 * no-op except on utfebcdic variant characters. Every
2990 * character generated by this that would normally need to be
2991 * enclosed by this macro is invariant, so the macro is not
2992 * needed, and would complicate use of copy(). There are other
2993 * parts of this file where the macro is used inconsistently,
2994 * but are saved by it being a no-op */
2995
2996 /* The structure of this section of code (besides checking for
2997 * errors and upgrading to utf8) is:
2998 * Further disambiguate between the two meanings of \N, and if
2999 * not a charname, go process it elsewhere
0a96133f
KW
3000 * If of form \N{U+...}, pass it through if a pattern;
3001 * otherwise convert to utf8
3002 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
3003 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3004
3005 /* Here, s points to the 'N'; the test below is guaranteed to
3006 * succeed if we are being called on a pattern as we already
3007 * know from a test above that the next character is a '{'.
3008 * On a non-pattern \N must mean 'named sequence, which
3009 * requires braces */
3010 s++;
3011 if (*s != '{') {
3012 yyerror("Missing braces on \\N{}");
3013 continue;
3014 }
3015 s++;
3016
0a96133f 3017 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3018 if (! (e = strchr(s, '}'))) {
3019 if (! PL_lex_inpat) {
5777a3f7 3020 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3021 } else {
3022 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3023 }
0a96133f 3024 continue;
ff3f963a 3025 }
cddc7ef4 3026
ff3f963a 3027 /* Here it looks like a named character */
cddc7ef4 3028
ff3f963a
KW
3029 if (PL_lex_inpat) {
3030
3031 /* XXX This block is temporary code. \N{} implies that the
3032 * pattern is to have Unicode semantics, and therefore
3033 * currently has to be encoded in utf8. By putting it in
3034 * utf8 now, we save a whole pass in the regular expression
3035 * compiler. Once that code is changed so Unicode
3036 * semantics doesn't necessarily have to be in utf8, this
3037 * block should be removed */
3038 if (!has_utf8) {
77a135fe 3039 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3040 SvPOK_on(sv);
e4f3eed8 3041 *d = '\0';
77a135fe 3042 /* See Note on sizing above. */
7bf79863 3043 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3044 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3045 /* 5 = '\N{' + cur char + NUL */
3046 (STRLEN)(send - s) + 5);
f08d6ad9 3047 d = SvPVX(sv) + SvCUR(sv);
89491803 3048 has_utf8 = TRUE;
ff3f963a
KW
3049 }
3050 }
423cee85 3051
ff3f963a
KW
3052 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3053 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3054 | PERL_SCAN_DISALLOW_PREFIX;
3055 STRLEN len;
3056
3057 /* For \N{U+...}, the '...' is a unicode value even on
3058 * EBCDIC machines */
3059 s += 2; /* Skip to next char after the 'U+' */
3060 len = e - s;
3061 uv = grok_hex(s, &len, &flags, NULL);
3062 if (len == 0 || len != (STRLEN)(e - s)) {
3063 yyerror("Invalid hexadecimal number in \\N{U+...}");
3064 s = e + 1;
3065 continue;
3066 }
3067
3068 if (PL_lex_inpat) {
3069
3070 /* Pass through to the regex compiler unchanged. The
3071 * reason we evaluated the number above is to make sure
0a96133f 3072 * there wasn't a syntax error. */
ff3f963a
KW
3073 s -= 5; /* Include the '\N{U+' */
3074 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3075 d += e - s + 1;
3076 }
3077 else { /* Not a pattern: convert the hex to string */
3078
3079 /* If destination is not in utf8, unconditionally
3080 * recode it to be so. This is because \N{} implies
3081 * Unicode semantics, and scalars have to be in utf8
3082 * to guarantee those semantics */
3083 if (! has_utf8) {
3084 SvCUR_set(sv, d - SvPVX_const(sv));
3085 SvPOK_on(sv);
3086 *d = '\0';
3087 /* See Note on sizing above. */
3088 sv_utf8_upgrade_flags_grow(
3089 sv,
3090 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3091 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3092 d = SvPVX(sv) + SvCUR(sv);
3093 has_utf8 = TRUE;
3094 }
3095
3096 /* Add the string to the output */
3097 if (UNI_IS_INVARIANT(uv)) {
3098 *d++ = (char) uv;
3099 }
3100 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3101 }
3102 }
3103 else { /* Here is \N{NAME} but not \N{U+...}. */
3104
3105 SV *res; /* result from charnames */
3106 const char *str; /* the string in 'res' */
3107 STRLEN len; /* its length */
3108
3109 /* Get the value for NAME */
3110 res = newSVpvn(s, e - s);
3111 res = new_constant( NULL, 0, "charnames",
3112 /* includes all of: \N{...} */
3113 res, NULL, s - 3, e - s + 4 );
3114
3115 /* Most likely res will be in utf8 already since the
3116 * standard charnames uses pack U, but a custom translator
3117 * can leave it otherwise, so make sure. XXX This can be
3118 * revisited to not have charnames use utf8 for characters
3119 * that don't need it when regexes don't have to be in utf8
3120 * for Unicode semantics. If doing so, remember EBCDIC */
3121 sv_utf8_upgrade(res);
3122 str = SvPV_const(res, len);
3123
3124 /* Don't accept malformed input */
3125 if (! is_utf8_string((U8 *) str, len)) {
3126 yyerror("Malformed UTF-8 returned by \\N");
3127 }
3128 else if (PL_lex_inpat) {
3129
3130 if (! len) { /* The name resolved to an empty string */
3131 Copy("\\N{}", d, 4, char);
3132 d += 4;
3133 }
3134 else {
3135 /* In order to not lose information for the regex
3136 * compiler, pass the result in the specially made
3137 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3138 * the code points in hex of each character
3139 * returned by charnames */
3140
3141 const char *str_end = str + len;
3142 STRLEN char_length; /* cur char's byte length */
3143 STRLEN output_length; /* and the number of bytes
3144 after this is translated
3145 into hex digits */
3146 const STRLEN off = d - SvPVX_const(sv);
3147
3148 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3149 * max('U+', '.'); and 1 for NUL */
3150 char hex_string[2 * UTF8_MAXBYTES + 5];
3151
3152 /* Get the first character of the result. */
3153 U32 uv = utf8n_to_uvuni((U8 *) str,
3154 len,
3155 &char_length,
3156 UTF8_ALLOW_ANYUV);
3157
3158 /* The call to is_utf8_string() above hopefully
3159 * guarantees that there won't be an error. But
3160 * it's easy here to make sure. The function just
3161 * above warns and returns 0 if invalid utf8, but
3162 * it can also return 0 if the input is validly a
3163 * NUL. Disambiguate */
3164 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3165 uv = UNICODE_REPLACEMENT;
3166 }
3167
3168 /* Convert first code point to hex, including the
3169 * boiler plate before it */
3170 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3171 output_length = strlen(hex_string);
3172
3173 /* Make sure there is enough space to hold it */
3174 d = off + SvGROW(sv, off
3175 + output_length
3176 + (STRLEN)(send - e)
3177 + 2); /* '}' + NUL */
3178 /* And output it */
3179 Copy(hex_string, d, output_length, char);
3180 d += output_length;
3181
3182 /* For each subsequent character, append dot and
3183 * its ordinal in hex */
3184 while ((str += char_length) < str_end) {
3185 const STRLEN off = d - SvPVX_const(sv);
3186 U32 uv = utf8n_to_uvuni((U8 *) str,
3187 str_end - str,
3188 &char_length,
3189 UTF8_ALLOW_ANYUV);
3190 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3191 uv = UNICODE_REPLACEMENT;
3192 }
3193
3194 sprintf(hex_string, ".%X", (unsigned int) uv);
3195 output_length = strlen(hex_string);
3196
3197 d = off + SvGROW(sv, off
3198 + output_length
3199 + (STRLEN)(send - e)
3200 + 2); /* '}' + NUL */
3201 Copy(hex_string, d, output_length, char);
3202 d += output_length;
3203 }
3204
3205 *d++ = '}'; /* Done. Add the trailing brace */
3206 }
3207 }
3208 else { /* Here, not in a pattern. Convert the name to a
3209 * string. */
3210
3211 /* If destination is not in utf8, unconditionally
3212 * recode it to be so. This is because \N{} implies
3213 * Unicode semantics, and scalars have to be in utf8
3214 * to guarantee those semantics */
3215 if (! has_utf8) {
3216 SvCUR_set(sv, d - SvPVX_const(sv));
3217 SvPOK_on(sv);
3218 *d = '\0';
3219 /* See Note on sizing above. */
3220 sv_utf8_upgrade_flags_grow(sv,
3221 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3222 len + (STRLEN)(send - s) + 1);
3223 d = SvPVX(sv) + SvCUR(sv);
3224 has_utf8 = TRUE;
3225 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3226
3227 /* See Note on sizing above. (NOTE: SvCUR() is not
3228 * set correctly here). */
3229 const STRLEN off = d - SvPVX_const(sv);
3230 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3231 }
3232 Copy(str, d, len, char);
3233 d += len;
423cee85 3234 }
423cee85 3235 SvREFCNT_dec(res);
cb233ae3
KW
3236
3237 /* Deprecate non-approved name syntax */
3238 if (ckWARN_d(WARN_DEPRECATED)) {
3239 bool problematic = FALSE;
3240 char* i = s;
3241
3242 /* For non-ut8 input, look to see that the first
3243 * character is an alpha, then loop through the rest
3244 * checking that each is a continuation */
3245 if (! this_utf8) {
3246 if (! isALPHAU(*i)) problematic = TRUE;
3247 else for (i = s + 1; i < e; i++) {
3248 if (isCHARNAME_CONT(*i)) continue;
3249 problematic = TRUE;
3250 break;
3251 }
3252 }
3253 else {
3254 /* Similarly for utf8. For invariants can check
3255 * directly. We accept anything above the latin1
3256 * range because it is immaterial to Perl if it is
3257 * correct or not, and is expensive to check. But
3258 * it is fairly easy in the latin1 range to convert
3259 * the variants into a single character and check
3260 * those */
3261 if (UTF8_IS_INVARIANT(*i)) {
3262 if (! isALPHAU(*i)) problematic = TRUE;
3263 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3264 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3265 *(i+1)))))
3266 {
3267 problematic = TRUE;
3268 }
3269 }
3270 if (! problematic) for (i = s + UTF8SKIP(s);
3271 i < e;
3272 i+= UTF8SKIP(i))
3273 {
3274 if (UTF8_IS_INVARIANT(*i)) {
3275 if (isCHARNAME_CONT(*i)) continue;
3276 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3277 continue;
3278 } else if (isCHARNAME_CONT(
3279 UNI_TO_NATIVE(
3280 UTF8_ACCUMULATE(*i, *(i+1)))))
3281 {
3282 continue;
3283 }
3284 problematic = TRUE;
3285 break;
3286 }
3287 }
3288 if (problematic) {
6e1bad6c
KW
3289 /* The e-i passed to the final %.*s makes sure that
3290 * should the trailing NUL be missing that this
3291 * print won't run off the end of the string */
cb233ae3 3292 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3293 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3294 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3295 }
3296 }
3297 } /* End \N{NAME} */
ff3f963a
KW
3298#ifdef EBCDIC
3299 if (!dorange)
3300 native_range = FALSE; /* \N{} is defined to be Unicode */
3301#endif
3302 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3303 continue;
3304
02aa26ce 3305 /* \c is a control character */
79072805
LW
3306 case 'c':
3307 s++;
961ce445 3308 if (s < send) {
f9d13529 3309 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3310 }
961ce445
RGS
3311 else {
3312 yyerror("Missing control char name in \\c");
3313 }
79072805 3314 continue;
02aa26ce
NT
3315
3316 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3317 case 'b':
db42d148 3318 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3319 break;
3320 case 'n':
db42d148 3321 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3322 break;
3323 case 'r':
db42d148 3324 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3325 break;
3326 case 'f':
db42d148 3327 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3328 break;
3329 case 't':
db42d148 3330 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3331 break;
34a3fe2a 3332 case 'e':
db42d148 3333 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3334 break;
3335 case 'a':
db42d148 3336 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3337 break;
02aa26ce
NT
3338 } /* end switch */
3339
79072805
LW
3340 s++;
3341 continue;
02aa26ce 3342 } /* end if (backslash) */
4c3a8340
TS
3343#ifdef EBCDIC
3344 else
3345 literal_endpoint++;
3346#endif
02aa26ce 3347
f9a63242 3348 default_action:
77a135fe
KW
3349 /* If we started with encoded form, or already know we want it,
3350 then encode the next character */
3351 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3352 STRLEN len = 1;
77a135fe
KW
3353
3354
3355 /* One might think that it is wasted effort in the case of the
3356 * source being utf8 (this_utf8 == TRUE) to take the next character
3357 * in the source, convert it to an unsigned value, and then convert
3358 * it back again. But the source has not been validated here. The
3359 * routine that does the conversion checks for errors like
3360 * malformed utf8 */
3361
5f66b61c
AL
3362 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3363 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3364 if (!has_utf8) {
3365 SvCUR_set(sv, d - SvPVX_const(sv));
3366 SvPOK_on(sv);
3367 *d = '\0';
77a135fe 3368 /* See Note on sizing above. */
7bf79863
KW
3369 sv_utf8_upgrade_flags_grow(sv,
3370 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3371 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3372 d = SvPVX(sv) + SvCUR(sv);
3373 has_utf8 = TRUE;
3374 } else if (need > len) {
3375 /* encoded value larger than old, may need extra space (NOTE:
3376 * SvCUR() is not set correctly here). See Note on sizing
3377 * above. */
9d4ba2ae 3378 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3379 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3380 }
77a135fe
KW
3381 s += len;
3382
5f66b61c 3383 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3384#ifdef EBCDIC
3385 if (uv > 255 && !dorange)
3386 native_range = FALSE;
3387#endif
2b9d42f0
NIS
3388 }
3389 else {
3390 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3391 }
02aa26ce
NT
3392 } /* while loop to process each character */
3393
3394 /* terminate the string and set up the sv */
79072805 3395 *d = '\0';
95a20fc0 3396 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3397 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3398 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3399
79072805 3400 SvPOK_on(sv);
9f4817db 3401 if (PL_encoding && !has_utf8) {
d0063567
DK
3402 sv_recode_to_utf8(sv, PL_encoding);
3403 if (SvUTF8(sv))
3404 has_utf8 = TRUE;
9f4817db 3405 }
2b9d42f0 3406 if (has_utf8) {
7e2040f0 3407 SvUTF8_on(sv);
2b9d42f0 3408 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3409 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3410 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3411 }
3412 }
79072805 3413
02aa26ce 3414 /* shrink the sv if we allocated more than we used */
79072805 3415 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3416 SvPV_shrink_to_cur(sv);
79072805 3417 }
02aa26ce 3418
6154021b 3419 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3420 if (s > PL_bufptr) {
eb0d8d16
NC
3421 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3422 const char *const key = PL_lex_inpat ? "qr" : "q";
3423 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3424 const char *type;
3425 STRLEN typelen;
3426
3427 if (PL_lex_inwhat == OP_TRANS) {
3428 type = "tr";
3429 typelen = 2;
3430 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3431 type = "s";
3432 typelen = 1;
3433 } else {
3434 type = "qq";
3435 typelen = 2;
3436 }
3437
3438 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3439 type, typelen);
3440 }
6154021b 3441 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3442 } else
8990e307 3443 SvREFCNT_dec(sv);
79072805
LW
3444 return s;
3445}
3446
ffb4593c
NT
3447/* S_intuit_more
3448 * Returns TRUE if there's more to the expression (e.g., a subscript),
3449 * FALSE otherwise.
ffb4593c
NT
3450 *
3451 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3452 *
3453 * ->[ and ->{ return TRUE
3454 * { and [ outside a pattern are always subscripts, so return TRUE
3455 * if we're outside a pattern and it's not { or [, then return FALSE
3456 * if we're in a pattern and the first char is a {
3457 * {4,5} (any digits around the comma) returns FALSE
3458 * if we're in a pattern and the first char is a [
3459 * [] returns FALSE
3460 * [SOMETHING] has a funky algorithm to decide whether it's a
3461 * character class or not. It has to deal with things like
3462 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3463 * anything else returns TRUE
3464 */
3465
9cbb5ea2
GS
3466/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3467
76e3520e 3468STATIC int
cea2e8a9 3469S_intuit_more(pTHX_ register char *s)
79072805 3470{
97aff369 3471 dVAR;
7918f24d
NC
3472
3473 PERL_ARGS_ASSERT_INTUIT_MORE;
3474
3280af22 3475 if (PL_lex_brackets)
79072805
LW
3476 return TRUE;
3477 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3478 return TRUE;
3479 if (*s != '{' && *s != '[')
3480 return FALSE;
3280af22 3481 if (!PL_lex_inpat)
79072805
LW
3482 return TRUE;
3483
3484 /* In a pattern, so maybe we have {n,m}. */
3485 if (*s == '{') {
3486 s++;
3487 if (!isDIGIT(*s))
3488 return TRUE;
3489 while (isDIGIT(*s))
3490 s++;
3491 if (*s == ',')
3492 s++;
3493 while (isDIGIT(*s))
3494 s++;
3495 if (*s == '}')
3496 return FALSE;
3497 return TRUE;
3498
3499 }
3500
3501 /* On the other hand, maybe we have a character class */
3502
3503 s++;
3504 if (*s == ']' || *s == '^')
3505 return FALSE;
3506 else {
ffb4593c 3507 /* this is terrifying, and it works */
79072805
LW
3508 int weight = 2; /* let's weigh the evidence */
3509 char seen[256];
f27ffc4a 3510 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3511 const char * const send = strchr(s,']');
3280af22 3512 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3513
3514 if (!send) /* has to be an expression */
3515 return TRUE;
3516
3517 Zero(seen,256,char);
3518 if (*s == '$')
3519 weight -= 3;
3520 else if (isDIGIT(*s)) {
3521 if (s[1] != ']') {
3522 if (isDIGIT(s[1]) && s[2] == ']')
3523 weight -= 10;
3524 }
3525 else
3526 weight -= 100;
3527 }
3528 for (; s < send; s++) {
3529 last_un_char = un_char;
3530 un_char = (unsigned char)*s;
3531 switch (*s) {
3532 case '@':
3533 case '&':
3534 case '$':
3535 weight -= seen[un_char] * 10;
7e2040f0 3536 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3537 int len;
8903cb82 3538 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3539 len = (int)strlen(tmpbuf);
3540 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3541 weight -= 100;
3542 else
3543 weight -= 10;
3544 }
3545 else if (*s == '$' && s[1] &&
93a17b20
LW
3546 strchr("[#!%*<>()-=",s[1])) {
3547 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3548 weight -= 10;
3549 else
3550 weight -= 1;
3551 }
3552 break;
3553 case '\\':
3554 un_char = 254;
3555 if (s[1]) {
93a17b20 3556 if (strchr("wds]",s[1]))
79072805 3557 weight += 100;
10edeb5d 3558 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3559 weight += 1;
93a17b20 3560 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3561 weight += 40;
3562 else if (isDIGIT(s[1])) {
3563 weight += 40;
3564 while (s[1] && isDIGIT(s[1]))
3565 s++;
3566 }
3567 }
3568 else
3569 weight += 100;
3570 break;
3571 case '-':
3572 if (s[1] == '\\')
3573 weight += 50;
93a17b20 3574 if (strchr("aA01! ",last_un_char))
79072805 3575 weight += 30;
93a17b20 3576 if (strchr("zZ79~",s[1]))
79072805 3577 weight += 30;
f27ffc4a
GS
3578 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3579 weight -= 5; /* cope with negative subscript */
79072805
LW
3580 break;
3581 default:
3792a11b
NC
3582 if (!isALNUM(last_un_char)
3583 && !(last_un_char == '$' || last_un_char == '@'
3584 || last_un_char == '&')
3585 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3586 char *d = tmpbuf;
3587 while (isALPHA(*s))
3588 *d++ = *s++;
3589 *d = '\0';
5458a98a 3590 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3591 weight -= 150;
3592 }
3593 if (un_char == last_un_char + 1)
3594 weight += 5;
3595 weight -= seen[un_char];
3596 break;
3597 }
3598 seen[un_char]++;
3599 }
3600 if (weight >= 0) /* probably a character class */
3601 return FALSE;
3602 }
3603
3604 return TRUE;
3605}
ffed7fef 3606
ffb4593c
NT
3607/*
3608 * S_intuit_method
3609 *
3610 * Does all the checking to disambiguate
3611 * foo bar
3612 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3613 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3614 *
3615 * First argument is the stuff after the first token, e.g. "bar".
3616 *
3617 * Not a method if bar is a filehandle.
3618 * Not a method if foo is a subroutine prototyped to take a filehandle.
3619 * Not a method if it's really "Foo $bar"
3620 * Method if it's "foo $bar"
3621 * Not a method if it's really "print foo $bar"
3622 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3623 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3624 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3625 * =>
3626 */
3627
76e3520e 3628STATIC int
62d55b22 3629S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3630{
97aff369 3631 dVAR;
a0d0e21e 3632 char *s = start + (*start == '$');
3280af22 3633 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3634 STRLEN len;
3635 GV* indirgv;
5db06880
NC
3636#ifdef PERL_MAD
3637 int soff;
3638#endif
a0d0e21e 3639
7918f24d
NC
3640 PERL_ARGS_ASSERT_INTUIT_METHOD;
3641
a0d0e21e 3642 if (gv) {
62d55b22 3643 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3644 return 0;
62d55b22
NC
3645 if (cv) {
3646 if (SvPOK(cv)) {
3647 const char *proto = SvPVX_const(cv);
3648 if (proto) {
3649 if (*proto == ';')
3650 proto++;
3651 if (*proto == '*')
3652 return 0;
3653 }
b6c543e3
IZ
3654 }
3655 } else
c35e046a 3656 gv = NULL;
a0d0e21e 3657 }
8903cb82 3658 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3659 /* start is the beginning of the possible filehandle/object,
3660 * and s is the end of it
3661 * tmpbuf is a copy of it
3662 */
3663
a0d0e21e 3664 if (*start == '$') {
3ef1310e
RGS
3665 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3666 isUPPER(*PL_tokenbuf))
a0d0e21e 3667 return 0;
5db06880
NC
3668#ifdef PERL_MAD
3669 len = start - SvPVX(PL_linestr);
3670#endif
29595ff2 3671 s = PEEKSPACE(s);
f0092767 3672#ifdef PERL_MAD
5db06880
NC
3673 start = SvPVX(PL_linestr) + len;
3674#endif
3280af22
NIS
3675 PL_bufptr = start;
3676 PL_expect = XREF;
a0d0e21e
LW
3677 return *s == '(' ? FUNCMETH : METHOD;
3678 }
5458a98a 3679 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3680 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3681 len -= 2;
3682 tmpbuf[len] = '\0';
5db06880
NC
3683#ifdef PERL_MAD
3684 soff = s - SvPVX(PL_linestr);
3685#endif
c3e0f903
GS
3686 goto bare_package;
3687 }
90e5519e 3688 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3689 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3690 return 0;
3691 /* filehandle or package name makes it a method */
da51bb9b 3692 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3693#ifdef PERL_MAD
3694 soff = s - SvPVX(PL_linestr);
3695#endif
29595ff2 3696 s = PEEKSPACE(s);
3280af22 3697 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3698 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3699 bare_package:
cd81e915 3700 start_force(PL_curforce);
9ded7720 3701 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3702 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3703 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3704 if (PL_madskills)
3705 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3706 PL_expect = XTERM;
a0d0e21e 3707 force_next(WORD);
3280af22 3708 PL_bufptr = s;
5db06880
NC
3709#ifdef PERL_MAD
3710 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3711#endif
a0d0e21e
LW
3712 return *s == '(' ? FUNCMETH : METHOD;
3713 }
3714 }
3715 return 0;
3716}
3717
16d20bd9 3718/* Encoded script support. filter_add() effectively inserts a
4e553d73 3719 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3720 * Note that the filter function only applies to the current source file
3721 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3722 *
3723 * The datasv parameter (which may be NULL) can be used to pass
3724 * private data to this instance of the filter. The filter function
3725 * can recover the SV using the FILTER_DATA macro and use it to
3726 * store private buffers and state information.
3727 *
3728 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3729 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3730 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3731 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3732 * private use must be set using malloc'd pointers.
3733 */
16d20bd9
AD
3734
3735SV *
864dbfa3 3736Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3737{
97aff369 3738 dVAR;
f4c556ac 3739 if (!funcp)
a0714e2c 3740 return NULL;
f4c556ac 3741
5486870f
DM
3742 if (!PL_parser)
3743 return NULL;
3744
3280af22
NIS
3745 if (!PL_rsfp_filters)
3746 PL_rsfp_filters = newAV();
16d20bd9 3747 if (!datasv)
561b68a9 3748 datasv = newSV(0);
862a34c6 3749 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3750 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3751 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3752 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3753 FPTR2DPTR(void *, IoANY(datasv)),
3754 SvPV_nolen(datasv)));
3280af22
NIS
3755 av_unshift(PL_rsfp_filters, 1);
3756 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3757 return(datasv);
3758}
4e553d73 3759
16d20bd9
AD
3760
3761/* Delete most recently added instance of this filter function. */
a0d0e21e 3762void
864dbfa3 3763Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3764{
97aff369 3765 dVAR;
e0c19803 3766 SV *datasv;
24801a4b 3767
7918f24d
NC
3768 PERL_ARGS_ASSERT_FILTER_DEL;
3769
33073adb 3770#ifdef DEBUGGING
55662e27
JH
3771 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3772 FPTR2DPTR(void*, funcp)));
33073adb 3773#endif
5486870f 3774 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3775 return;
3776 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3777 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3778 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3779 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3780
16d20bd9
AD
3781 return;
3782 }
3783 /* we need to search for the correct entry and clear it */
cea2e8a9 3784 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3785}
3786
3787
1de9afcd
RGS
3788/* Invoke the idxth filter function for the current rsfp. */
3789/* maxlen 0 = read one text line */
16d20bd9 3790I32
864dbfa3 3791Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3792{
97aff369 3793 dVAR;
16d20bd9
AD
3794 filter_t funcp;
3795 SV *datasv = NULL;
f482118e
NC
3796 /* This API is bad. It should have been using unsigned int for maxlen.
3797 Not sure if we want to change the API, but if not we should sanity
3798 check the value here. */
39cd7a59
NC
3799 const unsigned int correct_length
3800 = maxlen < 0 ?
3801#ifdef PERL_MICRO
3802 0x7FFFFFFF
3803#else
3804 INT_MAX
3805#endif
3806 : maxlen;
e50aee73 3807
7918f24d
NC
3808 PERL_ARGS_ASSERT_FILTER_READ;
3809
5486870f 3810 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3811 return -1;
1de9afcd 3812 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3813 /* Provide a default input filter to make life easy. */
3814 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3815 DEBUG_P(PerlIO_printf(Perl_debug_log,
3816 "filter_read %d: from rsfp\n", idx));
f482118e 3817 if (correct_length) {
16d20bd9
AD
3818 /* Want a block */
3819 int len ;
f54cb97a 3820 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3821
3822 /* ensure buf_sv is large enough */
881d8f0a 3823 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3824 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3825 correct_length)) <= 0) {
3280af22 3826 if (PerlIO_error(PL_rsfp))
37120919
AD
3827 return -1; /* error */
3828 else
3829 return 0 ; /* end of file */
3830 }
16d20bd9 3831 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3832 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3833 } else {
3834 /* Want a line */
3280af22
NIS
3835 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3836 if (PerlIO_error(PL_rsfp))
37120919
AD
3837 return -1; /* error */
3838 else
3839 return 0 ; /* end of file */
3840 }
16d20bd9
AD
3841 }
3842 return SvCUR(buf_sv);
3843 }
3844 /* Skip this filter slot if filter has been deleted */
1de9afcd 3845 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3846 DEBUG_P(PerlIO_printf(Perl_debug_log,
3847 "filter_read %d: skipped (filter deleted)\n",
3848 idx));
f482118e 3849 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3850 }
3851 /* Get function pointer hidden within datasv */
8141890a 3852 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3853 DEBUG_P(PerlIO_printf(Perl_debug_log,
3854 "filter_read %d: via function %p (%s)\n",
ca0270c4 3855 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3856 /* Call function. The function is expected to */
3857 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3858 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3859 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3860}
3861
76e3520e 3862STATIC char *
5cc814fd 3863S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3864{
97aff369 3865 dVAR;
7918f24d
NC
3866
3867 PERL_ARGS_ASSERT_FILTER_GETS;
3868
c39cd008 3869#ifdef PERL_CR_FILTER
3280af22 3870 if (!PL_rsfp_filters) {
c39cd008 3871 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3872 }
3873#endif
3280af22 3874 if (PL_rsfp_filters) {
55497cff 3875 if (!append)
3876 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3877 if (FILTER_READ(0, sv, 0) > 0)
3878 return ( SvPVX(sv) ) ;
3879 else
bd61b366 3880 return NULL ;
16d20bd9 3881 }
9d116dd7 3882 else
5cc814fd 3883 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3884}
3885
01ec43d0 3886STATIC HV *
9bde8eb0 3887S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3888{
97aff369 3889 dVAR;
def3634b
GS
3890 GV *gv;
3891
7918f24d
NC
3892 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3893
01ec43d0 3894 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3895 return PL_curstash;
3896
3897 if (len > 2 &&
3898 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3899 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3900 {
3901 return GvHV(gv); /* Foo:: */
def3634b
GS
3902 }
3903
3904 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3905 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3906 if (gv && GvCV(gv)) {
3907 SV * const sv = cv_const_sv(GvCV(gv));
3908 if (sv)
9bde8eb0 3909 pkgname = SvPV_const(sv, len);
def3634b
GS
3910 }
3911
9bde8eb0 3912 return gv_stashpvn(pkgname, len, 0);
def3634b 3913}
a0d0e21e 3914
e3f73d4e
RGS
3915/*
3916 * S_readpipe_override
3917 * Check whether readpipe() is overriden, and generates the appropriate
3918 * optree, provided sublex_start() is called afterwards.
3919 */
3920STATIC void
1d51329b 3921S_readpipe_override(pTHX)
e3f73d4e
RGS
3922{
3923 GV **gvp;
3924 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3925 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3926 if ((gv_readpipe
3927 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3928 ||
3929 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3930 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3931 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3932 {
3933 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3934 append_elem(OP_LIST,
3935 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3936 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3937 }
e3f73d4e
RGS
3938}
3939
5db06880
NC
3940#ifdef PERL_MAD
3941 /*
3942 * Perl_madlex
3943 * The intent of this yylex wrapper is to minimize the changes to the
3944 * tokener when we aren't interested in collecting madprops. It remains
3945 * to be seen how successful this strategy will be...
3946 */
3947
3948int
3949Perl_madlex(pTHX)
3950{
3951 int optype;
3952 char *s = PL_bufptr;
3953
cd81e915
NC
3954 /* make sure PL_thiswhite is initialized */
3955 PL_thiswhite = 0;
3956 PL_thismad = 0;
5db06880 3957
cd81e915 3958 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
28ac2b49 3959 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
5db06880
NC
3960 return S_pending_ident(aTHX);
3961
3962 /* previous token ate up our whitespace? */
cd81e915
NC
3963 if (!PL_lasttoke && PL_nextwhite) {
3964 PL_thiswhite = PL_nextwhite;
3965 PL_nextwhite = 0;
5db06880
NC
3966 }
3967
3968 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3969 PL_realtokenstart = -1;
3970 PL_thistoken = 0;
5db06880
NC
3971 optype = yylex();
3972 s = PL_bufptr;
cd81e915 3973 assert(PL_curforce < 0);
5db06880 3974
cd81e915
NC
3975 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3976 if (!PL_thistoken) {
3977 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3978 PL_thistoken = newSVpvs("");
5db06880 3979 else {
c35e046a 3980 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3981 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3982 }
3983 }
cd81e915
NC
3984 if (PL_thismad) /* install head */
3985 CURMAD('X', PL_thistoken);
5db06880
NC
3986 }
3987
3988 /* last whitespace of a sublex? */
cd81e915
NC
3989 if (optype == ')' && PL_endwhite) {
3990 CURMAD('X', PL_endwhite);
5db06880
NC
3991 }
3992
cd81e915 3993 if (!PL_thismad) {
5db06880
NC
3994
3995 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3996 if (!PL_thiswhite && !PL_endwhite && !optype) {
3997 sv_free(PL_thistoken);
3998 PL_thistoken = 0;
5db06880
NC
3999 return 0;
4000 }
4001
4002 /* put off final whitespace till peg */
4003 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4004 PL_nextwhite = PL_thiswhite;
4005 PL_thiswhite = 0;
5db06880 4006 }
cd81e915
NC
4007 else if (PL_thisopen) {
4008 CURMAD('q', PL_thisopen);
4009 if (PL_thistoken)
4010 sv_free(PL_thistoken);
4011 PL_thistoken = 0;
5db06880
NC
4012 }
4013 else {
4014 /* Store actual token text as madprop X */
cd81e915 4015 CURMAD('X', PL_thistoken);
5db06880
NC
4016 }
4017
cd81e915 4018 if (PL_thiswhite) {
5db06880 4019 /* add preceding whitespace as madprop _ */
cd81e915 4020 CURMAD('_', PL_thiswhite);
5db06880
NC
4021 }
4022
cd81e915 4023 if (PL_thisstuff) {
5db06880 4024 /* add quoted material as madprop = */
cd81e915 4025 CURMAD('=', PL_thisstuff);
5db06880
NC
4026 }
4027
cd81e915 4028 if (PL_thisclose) {
5db06880 4029 /* add terminating quote as madprop Q */
cd81e915 4030 CURMAD('Q', PL_thisclose);
5db06880
NC
4031 }
4032 }
4033
4034 /* special processing based on optype */
4035
4036 switch (optype) {
4037
4038 /* opval doesn't need a TOKEN since it can already store mp */
4039 case WORD:
4040 case METHOD:
4041 case FUNCMETH:
4042 case THING:
4043 case PMFUNC:
4044 case PRIVATEREF:
4045 case FUNC0SUB:
4046 case UNIOPSUB:
4047 case LSTOPSUB:
6154021b
RGS
4048 if (pl_yylval.opval)
4049 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4050 PL_thismad = 0;
5db06880
NC
4051 return optype;
4052
4053 /* fake EOF */
4054 case 0:
4055 optype = PEG;
cd81e915
NC
4056 if (PL_endwhite) {
4057 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4058 PL_endwhite = 0;
5db06880
NC
4059 }
4060 break;
4061
4062 case ']':
4063 case '}':
cd81e915 4064 if (PL_faketokens)
5db06880
NC
4065 break;
4066 /* remember any fake bracket that lexer is about to discard */
4067 if (PL_lex_brackets == 1 &&
4068 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4069 {
4070 s = PL_bufptr;
4071 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4072 s++;
4073 if (*s == '}') {
cd81e915
NC
4074 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4075 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4076 PL_thiswhite = 0;
5db06880
NC
4077 PL_bufptr = s - 1;
4078 break; /* don't bother looking for trailing comment */
4079 }
4080 else
4081 s = PL_bufptr;
4082 }
4083 if (optype == ']')
4084 break;
4085 /* FALLTHROUGH */
4086
4087 /* attach a trailing comment to its statement instead of next token */
4088 case ';':
cd81e915 4089 if (PL_faketokens)
5db06880
NC
4090 break;
4091 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4092 s = PL_bufptr;
4093 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4094 s++;
4095 if (*s == '\n' || *s == '#') {
4096 while (s < PL_bufend && *s != '\n')
4097 s++;
4098 if (s < PL_bufend)
4099 s++;
cd81e915
NC
4100 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4101 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4102 PL_thiswhite = 0;
5db06880
NC
4103 PL_bufptr = s;
4104 }
4105 }
4106 break;
4107
4108 /* pval */
4109 case LABEL:
4110 break;
4111
4112 /* ival */
4113 default:
4114 break;
4115
4116 }
4117
4118 /* Create new token struct. Note: opvals return early above. */
6154021b 4119 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4120 PL_thismad = 0;
5db06880
NC
4121 return optype;
4122}
4123#endif
4124
468aa647 4125STATIC char *
cc6ed77d 4126S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4127 dVAR;
7918f24d
NC
4128
4129 PERL_ARGS_ASSERT_TOKENIZE_USE;
4130
468aa647
RGS
4131 if (PL_expect != XSTATE)
4132 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4133 is_use ? "use" : "no"));
29595ff2 4134 s = SKIPSPACE1(s);
468aa647
RGS
4135 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4136 s = force_version(s, TRUE);
17c59fdf
VP
4137 if (*s == ';' || *s == '}'
4138 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4139 start_force(PL_curforce);
9ded7720 4140 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4141 force_next(WORD);
4142 }
4143 else if (*s == 'v') {
4144 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4145 s = force_version(s, FALSE);
4146 }
4147 }
4148 else {
4149 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4150 s = force_version(s, FALSE);
4151 }
6154021b 4152 pl_yylval.ival = is_use;
468aa647
RGS
4153 return s;
4154}
748a9306 4155#ifdef DEBUGGING
27da23d5 4156 static const char* const exp_name[] =
09bef843 4157 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4158 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4159 };
748a9306 4160#endif
463ee0b2 4161
02aa26ce
NT
4162/*
4163 yylex
4164
4165 Works out what to call the token just pulled out of the input
4166 stream. The yacc parser takes care of taking the ops we return and
4167 stitching them into a tree.
4168
4169 Returns:
4170 PRIVATEREF
4171
4172 Structure:
4173 if read an identifier
4174 if we're in a my declaration
4175 croak if they tried to say my($foo::bar)
4176 build the ops for a my() declaration
4177 if it's an access to a my() variable
4178 are we in a sort block?
4179 croak if my($a); $a <=> $b
4180 build ops for access to a my() variable
4181 if in a dq string, and they've said @foo and we can't find @foo
4182 croak
4183 build ops for a bareword
4184 if we already built the token before, use it.
4185*/
4186
20141f0e 4187
dba4d153
JH
4188#ifdef __SC__
4189#pragma segment Perl_yylex
4190#endif
dba4d153 4191int
dba4d153 4192Perl_yylex(pTHX)
20141f0e 4193{
97aff369 4194 dVAR;
3afc138a 4195 register char *s = PL_bufptr;
378cc40b 4196 register char *d;
463ee0b2 4197 STRLEN len;
aa7440fb 4198 bool bof = FALSE;
580561a3 4199 U32 fake_eof = 0;
a687059c 4200
10edeb5d
JH
4201 /* orig_keyword, gvp, and gv are initialized here because
4202 * jump to the label just_a_word_zero can bypass their
4203 * initialization later. */
4204 I32 orig_keyword = 0;
4205 GV *gv = NULL;
4206 GV **gvp = NULL;
4207
bbf60fe6 4208 DEBUG_T( {
396482e1 4209 SV* tmp = newSVpvs("");
b6007c36
DM
4210 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4211 (IV)CopLINE(PL_curcop),
4212 lex_state_names[PL_lex_state],
4213 exp_name[PL_expect],
4214 pv_display(tmp, s, strlen(s), 0, 60));
4215 SvREFCNT_dec(tmp);
bbf60fe6 4216 } );
02aa26ce 4217 /* check if there's an identifier for us to look at */
28ac2b49 4218 if (PL_lex_state != LEX_KNOWNEXT && PL_pending_ident)
bbf60fe6 4219 return REPORT(S_pending_ident(aTHX));
bbce6d69 4220
02aa26ce
NT
4221 /* no identifier pending identification */
4222
3280af22 4223 switch (PL_lex_state) {
79072805
LW
4224#ifdef COMMENTARY
4225 case LEX_NORMAL: /* Some compilers will produce faster */
4226 case LEX_INTERPNORMAL: /* code if we comment these out. */
4227 break;
4228#endif
4229
09bef843 4230 /* when we've already built the next token, just pull it out of the queue */
79072805 4231 case LEX_KNOWNEXT:
5db06880
NC
4232#ifdef PERL_MAD
4233 PL_lasttoke--;
6154021b 4234 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4235 if (PL_madskills) {
cd81e915 4236 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4237 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4238 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4239 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4240 PL_thismad->mad_val = 0;
4241 mad_free(PL_thismad);
4242 PL_thismad = 0;
5db06880
NC
4243 }
4244 }
4245 if (!PL_lasttoke) {
4246 PL_lex_state = PL_lex_defer;
4247 PL_expect = PL_lex_expect;
4248 PL_lex_defer = LEX_NORMAL;
4249 if (!PL_nexttoke[PL_lasttoke].next_type)
4250 return yylex();
4251 }
4252#else
3280af22 4253 PL_nexttoke--;
6154021b 4254 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4255 if (!PL_nexttoke) {
4256 PL_lex_state = PL_lex_defer;
4257 PL_expect = PL_lex_expect;
4258 PL_lex_defer = LEX_NORMAL;
463ee0b2 4259 }
5db06880
NC
4260#endif
4261#ifdef PERL_MAD
4262 /* FIXME - can these be merged? */
4263 return(PL_nexttoke[PL_lasttoke].next_type);
4264#else
bbf60fe6 4265 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4266#endif
79072805 4267
02aa26ce 4268 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4269 when we get here, PL_bufptr is at the \
02aa26ce 4270 */
79072805
LW
4271 case LEX_INTERPCASEMOD:
4272#ifdef DEBUGGING
3280af22 4273 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4274 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4275#endif
02aa26ce 4276 /* handle \E or end of string */
3280af22 4277 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4278 /* if at a \E */
3280af22 4279 if (PL_lex_casemods) {
f54cb97a 4280 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4281 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4282
3792a11b
NC
4283 if (PL_bufptr != PL_bufend
4284 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4285 PL_bufptr += 2;
4286 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4287#ifdef PERL_MAD
4288 if (PL_madskills)
6b29d1f5 4289 PL_thistoken = newSVpvs("\\E");
5db06880 4290#endif
a0d0e21e 4291 }
bbf60fe6 4292 return REPORT(')');
79072805 4293 }
5db06880
NC
4294#ifdef PERL_MAD
4295 while (PL_bufptr != PL_bufend &&
4296 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4297 if (!PL_thiswhite)
6b29d1f5 4298 PL_thiswhite = newSVpvs("");
cd81e915 4299 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4300 PL_bufptr += 2;
4301 }
4302#else
3280af22
NIS
4303 if (PL_bufptr != PL_bufend)
4304 PL_bufptr += 2;
5db06880 4305#endif
3280af22 4306 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4307 return yylex();
79072805
LW
4308 }
4309 else {
607df283 4310 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4311 "### Saw case modifier\n"); });
3280af22 4312 s = PL_bufptr + 1;
6e909404 4313 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4314#ifdef PERL_MAD
cd81e915 4315 if (!PL_thiswhite)
6b29d1f5 4316 PL_thiswhite = newSVpvs("");
cd81e915 4317 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4318#endif
89122651 4319 PL_bufptr = s + 3;
6e909404
JH
4320 PL_lex_state = LEX_INTERPCONCAT;
4321 return yylex();
a0d0e21e 4322 }
6e909404 4323 else {
90771dc0 4324 I32 tmp;
5db06880
NC
4325 if (!PL_madskills) /* when just compiling don't need correct */
4326 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4327 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4328 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4329 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4330 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4331 return REPORT(')');
6e909404
JH
4332 }
4333 if (PL_lex_casemods > 10)
4334 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4335 PL_lex_casestack[PL_lex_casemods++] = *s;
4336 PL_lex_casestack[PL_lex_casemods] = '\0';
4337 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4338 start_force(PL_curforce);
9ded7720 4339 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4340 force_next('(');
cd81e915 4341 start_force(PL_curforce);
6e909404 4342 if (*s == 'l')
9ded7720 4343 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4344 else if (*s == 'u')
9ded7720 4345 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4346 else if (*s == 'L')
9ded7720 4347 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4348 else if (*s == 'U')
9ded7720 4349 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4350 else if (*s == 'Q')
9ded7720 4351 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4352 else
4353 Perl_croak(aTHX_ "panic: yylex");
5db06880 4354 if (PL_madskills) {
a5849ce5
NC
4355 SV* const tmpsv = newSVpvs("\\ ");
4356 /* replace the space with the character we want to escape
4357 */
4358 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4359 curmad('_', tmpsv);
4360 }
6e909404 4361 PL_bufptr = s + 1;
a0d0e21e 4362 }
79072805 4363 force_next(FUNC);
3280af22
NIS
4364 if (PL_lex_starts) {
4365 s = PL_bufptr;
4366 PL_lex_starts = 0;
5db06880
NC
4367#ifdef PERL_MAD
4368 if (PL_madskills) {
cd81e915
NC
4369 if (PL_thistoken)
4370 sv_free(PL_thistoken);
6b29d1f5 4371 PL_thistoken = newSVpvs("");
5db06880
NC
4372 }
4373#endif
131b3ad0
DM
4374 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4375 if (PL_lex_casemods == 1 && PL_lex_inpat)
4376 OPERATOR(',');
4377 else
4378 Aop(OP_CONCAT);
79072805
LW
4379 }
4380 else
cea2e8a9 4381 return yylex();
79072805
LW
4382 }
4383
55497cff 4384 case LEX_INTERPPUSH:
bbf60fe6 4385 return REPORT(sublex_push());
55497cff 4386
79072805 4387 case LEX_INTERPSTART:
3280af22 4388 if (PL_bufptr == PL_bufend)
bbf60fe6 4389 return REPORT(sublex_done());
607df283 4390 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4391 "### Interpolated variable\n"); });
3280af22
NIS
4392 PL_expect = XTERM;
4393 PL_lex_dojoin = (*PL_bufptr == '@');
4394 PL_lex_state = LEX_INTERPNORMAL;
4395 if (PL_lex_dojoin) {
cd81e915 4396 start_force(PL_curforce);
9ded7720 4397 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4398 force_next(',');
cd81e915 4399 start_force(PL_curforce);
a0d0e21e 4400 force_ident("\"", '$');
cd81e915 4401 start_force(PL_curforce);
9ded7720 4402 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4403 force_next('$');
cd81e915 4404 start_force(PL_curforce);
9ded7720 4405 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4406 force_next('(');
cd81e915 4407 start_force(PL_curforce);
9ded7720 4408 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4409 force_next(FUNC);
4410 }
3280af22
NIS
4411 if (PL_lex_starts++) {
4412 s = PL_bufptr;
5db06880
NC
4413#ifdef PERL_MAD
4414 if (PL_madskills) {
cd81e915
NC
4415 if (PL_thistoken)
4416 sv_free(PL_thistoken);
6b29d1f5 4417 PL_thistoken = newSVpvs("");
5db06880
NC
4418 }
4419#endif
131b3ad0
DM
4420 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4421 if (!PL_lex_casemods && PL_lex_inpat)
4422 OPERATOR(',');
4423 else
4424 Aop(OP_CONCAT);
79072805 4425 }
cea2e8a9 4426 return yylex();
79072805
LW
4427
4428 case LEX_INTERPENDMAYBE:
3280af22
NIS
4429 if (intuit_more(PL_bufptr)) {
4430 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4431 break;
4432 }
4433 /* FALL THROUGH */
4434
4435 case LEX_INTERPEND:
3280af22
NIS
4436 if (PL_lex_dojoin) {
4437 PL_lex_dojoin = FALSE;
4438 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4439#ifdef PERL_MAD
4440 if (PL_madskills) {
cd81e915
NC
4441 if (PL_thistoken)
4442 sv_free(PL_thistoken);
6b29d1f5 4443 PL_thistoken = newSVpvs("");
5db06880
NC
4444 }
4445#endif
bbf60fe6 4446 return REPORT(')');
79072805 4447 }
43a16006 4448 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4449 && SvEVALED(PL_lex_repl))
43a16006 4450 {
e9fa98b2 4451 if (PL_bufptr != PL_bufend)
cea2e8a9 4452 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4453 PL_lex_repl = NULL;
e9fa98b2 4454 }
79072805
LW
4455 /* FALLTHROUGH */
4456 case LEX_INTERPCONCAT:
4457#ifdef DEBUGGING
3280af22 4458 if (PL_lex_brackets)
cea2e8a9 4459 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4460#endif
3280af22 4461 if (PL_bufptr == PL_bufend)
bbf60fe6 4462 return REPORT(sublex_done());
79072805 4463
3280af22
NIS
4464 if (SvIVX(PL_linestr) == '\'') {
4465 SV *sv = newSVsv(PL_linestr);
4466 if (!PL_lex_inpat)
76e3520e 4467 sv = tokeq(sv);
3280af22 4468 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4469 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4470 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4471 s = PL_bufend;
79072805
LW
4472 }
4473 else {
3280af22 4474 s = scan_const(PL_bufptr);
79072805 4475 if (*s == '\\')
3280af22 4476 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4477 else
3280af22 4478 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4479 }
4480
3280af22 4481 if (s != PL_bufptr) {
cd81e915 4482 start_force(PL_curforce);
5db06880
NC
4483 if (PL_madskills) {
4484 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4485 }
6154021b 4486 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4487 PL_expect = XTERM;
79072805 4488 force_next(THING);
131b3ad0 4489 if (PL_lex_starts++) {
5db06880
NC
4490#ifdef PERL_MAD
4491 if (PL_madskills) {
cd81e915
NC
4492 if (PL_thistoken)
4493 sv_free(PL_thistoken);
6b29d1f5 4494 PL_thistoken = newSVpvs("");
5db06880
NC
4495 }
4496#endif
131b3ad0
DM
4497 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4498 if (!PL_lex_casemods && PL_lex_inpat)
4499 OPERATOR(',');
4500 else
4501 Aop(OP_CONCAT);
4502 }
79072805 4503 else {
3280af22 4504 PL_bufptr = s;
cea2e8a9 4505 return yylex();
79072805
LW
4506 }
4507 }
4508
cea2e8a9 4509 return yylex();
a0d0e21e 4510 case LEX_FORMLINE:
3280af22
NIS
4511 PL_lex_state = LEX_NORMAL;
4512 s = scan_formline(PL_bufptr);
4513 if (!PL_lex_formbrack)
a0d0e21e
LW
4514 goto rightbracket;
4515 OPERATOR(';');
79072805
LW
4516 }
4517
3280af22
NIS
4518 s = PL_bufptr;
4519 PL_oldoldbufptr = PL_oldbufptr;
4520 PL_oldbufptr = s;
463ee0b2
LW
4521
4522 retry:
5db06880 4523#ifdef PERL_MAD
cd81e915
NC
4524 if (PL_thistoken) {
4525 sv_free(PL_thistoken);
4526 PL_thistoken = 0;
5db06880 4527 }
cd81e915 4528 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4529#endif
378cc40b
LW
4530 switch (*s) {
4531 default:
7e2040f0 4532 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4533 goto keylookup;
b1fc3636
CJ
4534 {
4535 unsigned char c = *s;
4536 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4537 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4538 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4539 } else {
4540 d = PL_linestart;
4541 }
4542 *s = '\0';
4543 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4544 }
e929a76b
LW
4545 case 4:
4546 case 26:
4547 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4548 case 0:
5db06880
NC
4549#ifdef PERL_MAD
4550 if (PL_madskills)
cd81e915 4551 PL_faketokens = 0;
5db06880 4552#endif
3280af22
NIS
4553 if (!PL_rsfp) {
4554 PL_last_uni = 0;
4555 PL_last_lop = 0;
c5ee2135 4556 if (PL_lex_brackets) {
10edeb5d
JH
4557 yyerror((const char *)
4558 (PL_lex_formbrack
4559 ? "Format not terminated"
4560 : "Missing right curly or square bracket"));
c5ee2135 4561 }
4e553d73 4562 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4563 "### Tokener got EOF\n");
5f80b19c 4564 } );
79072805 4565 TOKEN(0);
463ee0b2 4566 }
3280af22 4567 if (s++ < PL_bufend)
a687059c 4568 goto retry; /* ignore stray nulls */
3280af22
NIS
4569 PL_last_uni = 0;
4570 PL_last_lop = 0;
4571 if (!PL_in_eval && !PL_preambled) {
4572 PL_preambled = TRUE;
5db06880
NC
4573#ifdef PERL_MAD
4574 if (PL_madskills)
cd81e915 4575 PL_faketokens = 1;
5db06880 4576#endif
5ab7ff98
NC
4577 if (PL_perldb) {
4578 /* Generate a string of Perl code to load the debugger.
4579 * If PERL5DB is set, it will return the contents of that,
4580 * otherwise a compile-time require of perl5db.pl. */
4581
4582 const char * const pdb = PerlEnv_getenv("PERL5DB");
4583
4584 if (pdb) {
4585 sv_setpv(PL_linestr, pdb);
4586 sv_catpvs(PL_linestr,";");
4587 } else {
4588 SETERRNO(0,SS_NORMAL);
4589 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4590 }
4591 } else
4592 sv_setpvs(PL_linestr,"");
c62eb204
NC
4593 if (PL_preambleav) {
4594 SV **svp = AvARRAY(PL_preambleav);
4595 SV **const end = svp + AvFILLp(PL_preambleav);
4596 while(svp <= end) {
4597 sv_catsv(PL_linestr, *svp);
4598 ++svp;
396482e1 4599 sv_catpvs(PL_linestr, ";");
91b7def8 4600 }
daba3364 4601 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4602 PL_preambleav = NULL;
91b7def8 4603 }
9f639728
FR
4604 if (PL_minus_E)
4605 sv_catpvs(PL_linestr,
4606 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4607 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4608 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4609 if (PL_minus_l)
396482e1 4610 sv_catpvs(PL_linestr,"chomp;");
3280af22 4611 if (PL_minus_a) {
3280af22 4612 if (PL_minus_F) {
3792a11b
NC
4613 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4614 || *PL_splitstr == '"')
3280af22 4615 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4616 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4617 else {
c8ef6a4b
NC
4618 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4619 bytes can be used as quoting characters. :-) */
dd374669 4620 const char *splits = PL_splitstr;
91d456ae 4621 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4622 do {
4623 /* Need to \ \s */
dd374669
AL
4624 if (*splits == '\\')
4625 sv_catpvn(PL_linestr, splits, 1);
4626 sv_catpvn(PL_linestr, splits, 1);
4627 } while (*splits++);
48c4c863
NC
4628 /* This loop will embed the trailing NUL of
4629 PL_linestr as the last thing it does before
4630 terminating. */
396482e1 4631 sv_catpvs(PL_linestr, ");");
54310121 4632 }
2304df62
AD
4633 }
4634 else
396482e1 4635 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4636 }
79072805 4637 }
396482e1 4638 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4639 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4640 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4641 PL_last_lop = PL_last_uni = NULL;
65269a95 4642 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4643 update_debugger_info(PL_linestr, NULL, 0);
79072805 4644 goto retry;
a687059c 4645 }
e929a76b 4646 do {
580561a3
Z
4647 fake_eof = 0;
4648 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4649 if (0) {
7e28d3af 4650 fake_eof:
f0e67a1d
Z
4651 fake_eof = LEX_FAKE_EOF;
4652 }
4653 PL_bufptr = PL_bufend;
17cc9359 4654 CopLINE_inc(PL_curcop);
f0e67a1d 4655 if (!lex_next_chunk(fake_eof)) {
17cc9359 4656 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4657 s = PL_bufptr;
4658 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4659 }
17cc9359 4660 CopLINE_dec(PL_curcop);
5db06880 4661#ifdef PERL_MAD
f0e67a1d 4662 if (!PL_rsfp)
cd81e915 4663 PL_realtokenstart = -1;
5db06880 4664#endif
f0e67a1d 4665 s = PL_bufptr;
7aa207d6
JH
4666 /* If it looks like the start of a BOM or raw UTF-16,
4667 * check if it in fact is. */
580561a3 4668 if (bof && PL_rsfp &&
7aa207d6
JH
4669 (*s == 0 ||
4670 *(U8*)s == 0xEF ||
4671 *(U8*)s >= 0xFE ||
4672 s[1] == 0)) {
eb160463 4673 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4674 if (bof) {
3280af22 4675 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4676 s = swallow_bom((U8*)s);
e929a76b 4677 }
378cc40b 4678 }
3280af22 4679 if (PL_doextract) {
a0d0e21e 4680 /* Incest with pod. */
5db06880
NC
4681#ifdef PERL_MAD
4682 if (PL_madskills)
cd81e915 4683 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4684#endif
01a57ef7 4685 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4686 sv_setpvs(PL_linestr, "");
3280af22
NIS
4687 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4688 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4689 PL_last_lop = PL_last_uni = NULL;
3280af22 4690 PL_doextract = FALSE;
a0d0e21e 4691 }
4e553d73 4692 }
85613cab
Z
4693 if (PL_rsfp)
4694 incline(s);
3280af22
NIS
4695 } while (PL_doextract);
4696 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4697 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4698 PL_last_lop = PL_last_uni = NULL;
57843af0 4699 if (CopLINE(PL_curcop) == 1) {
3280af22 4700 while (s < PL_bufend && isSPACE(*s))
79072805 4701 s++;
a0d0e21e 4702 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4703 s++;
5db06880
NC
4704#ifdef PERL_MAD
4705 if (PL_madskills)
cd81e915 4706 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4707#endif
bd61b366 4708 d = NULL;
3280af22 4709 if (!PL_in_eval) {
44a8e56a 4710 if (*s == '#' && *(s+1) == '!')
4711 d = s + 2;
4712#ifdef ALTERNATE_SHEBANG
4713 else {
bfed75c6 4714 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4715 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4716 d = s + (sizeof(as) - 1);
4717 }
4718#endif /* ALTERNATE_SHEBANG */
4719 }
4720 if (d) {
b8378b72 4721 char *ipath;
774d564b 4722 char *ipathend;
b8378b72 4723
774d564b 4724 while (isSPACE(*d))
b8378b72
CS
4725 d++;
4726 ipath = d;
774d564b 4727 while (*d && !isSPACE(*d))
4728 d++;
4729 ipathend = d;
4730
4731#ifdef ARG_ZERO_IS_SCRIPT
4732 if (ipathend > ipath) {
4733 /*
4734 * HP-UX (at least) sets argv[0] to the script name,
4735 * which makes $^X incorrect. And Digital UNIX and Linux,
4736 * at least, set argv[0] to the basename of the Perl
4737 * interpreter. So, having found "#!", we'll set it right.
4738 */
fafc274c
NC
4739 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4740 SVt_PV)); /* $^X */
774d564b 4741 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4742 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4743 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4744 SvSETMAGIC(x);
4745 }
556c1dec
JH
4746 else {
4747 STRLEN blen;
4748 STRLEN llen;
cfd0369c 4749 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4750 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4751 if (llen < blen) {
4752 bstart += blen - llen;
4753 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4754 sv_setpvn(x, ipath, ipathend - ipath);
4755 SvSETMAGIC(x);
4756 }
4757 }
4758 }
774d564b 4759 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4760 }
774d564b 4761#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4762
4763 /*
4764 * Look for options.
4765 */
748a9306 4766 d = instr(s,"perl -");
84e30d1a 4767 if (!d) {
748a9306 4768 d = instr(s,"perl");
84e30d1a
GS
4769#if defined(DOSISH)
4770 /* avoid getting into infinite loops when shebang
4771 * line contains "Perl" rather than "perl" */
4772 if (!d) {
4773 for (d = ipathend-4; d >= ipath; --d) {
4774 if ((*d == 'p' || *d == 'P')
4775 && !ibcmp(d, "perl", 4))
4776 {
4777 break;
4778 }
4779 }
4780 if (d < ipath)
bd61b366 4781 d = NULL;
84e30d1a
GS
4782 }
4783#endif
4784 }
44a8e56a 4785#ifdef ALTERNATE_SHEBANG
4786 /*
4787 * If the ALTERNATE_SHEBANG on this system starts with a
4788 * character that can be part of a Perl expression, then if
4789 * we see it but not "perl", we're probably looking at the
4790 * start of Perl code, not a request to hand off to some
4791 * other interpreter. Similarly, if "perl" is there, but
4792 * not in the first 'word' of the line, we assume the line
4793 * contains the start of the Perl program.
44a8e56a 4794 */
4795 if (d && *s != '#') {
f54cb97a 4796 const char *c = ipath;
44a8e56a 4797 while (*c && !strchr("; \t\r\n\f\v#", *c))
4798 c++;
4799 if (c < d)
bd61b366 4800 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4801 else
4802 *s = '#'; /* Don't try to parse shebang line */
4803 }
774d564b 4804#endif /* ALTERNATE_SHEBANG */
748a9306 4805 if (!d &&
44a8e56a 4806 *s == '#' &&
774d564b 4807 ipathend > ipath &&
3280af22 4808 !PL_minus_c &&
748a9306 4809 !instr(s,"indir") &&
3280af22 4810 instr(PL_origargv[0],"perl"))
748a9306 4811 {
27da23d5 4812 dVAR;
9f68db38 4813 char **newargv;
9f68db38 4814
774d564b 4815 *ipathend = '\0';
4816 s = ipathend + 1;
3280af22 4817 while (s < PL_bufend && isSPACE(*s))
9f68db38 4818 s++;
3280af22 4819 if (s < PL_bufend) {
d85f917e 4820 Newx(newargv,PL_origargc+3,char*);
9f68db38 4821 newargv[1] = s;
3280af22 4822 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4823 s++;
4824 *s = '\0';
3280af22 4825 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4826 }
4827 else
3280af22 4828 newargv = PL_origargv;
774d564b 4829 newargv[0] = ipath;
b35112e7 4830 PERL_FPU_PRE_EXEC
b4748376 4831 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4832 PERL_FPU_POST_EXEC
cea2e8a9 4833 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4834 }
748a9306 4835 if (d) {
c35e046a
AL
4836 while (*d && !isSPACE(*d))
4837 d++;
4838 while (SPACE_OR_TAB(*d))
4839 d++;
748a9306
LW
4840
4841 if (*d++ == '-') {
f54cb97a 4842 const bool switches_done = PL_doswitches;
fb993905
GA
4843 const U32 oldpdb = PL_perldb;
4844 const bool oldn = PL_minus_n;
4845 const bool oldp = PL_minus_p;
c7030b81 4846 const char *d1 = d;
fb993905 4847
8cc95fdb 4848 do {
4ba71d51
FC
4849 bool baduni = FALSE;
4850 if (*d1 == 'C') {
bd0ab00d
NC
4851 const char *d2 = d1 + 1;
4852 if (parse_unicode_opts((const char **)&d2)
4853 != PL_unicode)
4854 baduni = TRUE;
4ba71d51
FC
4855 }
4856 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4857 const char * const m = d1;
4858 while (*d1 && !isSPACE(*d1))
4859 d1++;
cea2e8a9 4860 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4861 (int)(d1 - m), m);
8cc95fdb 4862 }
c7030b81
NC
4863 d1 = moreswitches(d1);
4864 } while (d1);
f0b2cf55
YST
4865 if (PL_doswitches && !switches_done) {
4866 int argc = PL_origargc;
4867 char **argv = PL_origargv;
4868 do {
4869 argc--,argv++;
4870 } while (argc && argv[0][0] == '-' && argv[0][1]);
4871 init_argv_symbols(argc,argv);
4872 }
65269a95 4873 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4874 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4875 /* if we have already added "LINE: while (<>) {",
4876 we must not do it again */
748a9306 4877 {
76f68e9b 4878 sv_setpvs(PL_linestr, "");
3280af22
NIS
4879 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4880 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4881 PL_last_lop = PL_last_uni = NULL;
3280af22 4882 PL_preambled = FALSE;
65269a95 4883 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4884 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4885 goto retry;
4886 }
a0d0e21e 4887 }
79072805 4888 }
9f68db38 4889 }
79072805 4890 }
3280af22
NIS
4891 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4892 PL_bufptr = s;
4893 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4894 return yylex();
ae986130 4895 }
378cc40b 4896 goto retry;
4fdae800 4897 case '\r':
6a27c188 4898#ifdef PERL_STRICT_CR
cea2e8a9 4899 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4900 Perl_croak(aTHX_
cc507455 4901 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4902#endif
4fdae800 4903 case ' ': case '\t': case '\f': case 013:
5db06880 4904#ifdef PERL_MAD
cd81e915 4905 PL_realtokenstart = -1;
ac372eb8
RD
4906 if (!PL_thiswhite)
4907 PL_thiswhite = newSVpvs("");
4908 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4909#endif
ac372eb8 4910 s++;
378cc40b 4911 goto retry;
378cc40b 4912 case '#':
e929a76b 4913 case '\n':
5db06880 4914#ifdef PERL_MAD
cd81e915 4915 PL_realtokenstart = -1;
5db06880 4916 if (PL_madskills)
cd81e915 4917 PL_faketokens = 0;
5db06880 4918#endif
3280af22 4919 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4920 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4921 /* handle eval qq[#line 1 "foo"\n ...] */
4922 CopLINE_dec(PL_curcop);
4923 incline(s);
4924 }
5db06880
NC
4925 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4926 s = SKIPSPACE0(s);
4927 if (!PL_in_eval || PL_rsfp)
4928 incline(s);
4929 }
4930 else {
4931 d = s;
4932 while (d < PL_bufend && *d != '\n')
4933 d++;
4934 if (d < PL_bufend)
4935 d++;
4936 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4937 Perl_croak(aTHX_ "panic: input overflow");
4938#ifdef PERL_MAD
4939 if (PL_madskills)
cd81e915 4940 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4941#endif
4942 s = d;
4943 incline(s);
4944 }
3280af22
NIS
4945 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4946 PL_bufptr = s;
4947 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4948 return yylex();
a687059c 4949 }
378cc40b 4950 }
a687059c 4951 else {
5db06880
NC
4952#ifdef PERL_MAD
4953 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4954 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4955 PL_faketokens = 0;
5db06880
NC
4956 s = SKIPSPACE0(s);
4957 TOKEN(PEG); /* make sure any #! line is accessible */
4958 }
4959 s = SKIPSPACE0(s);
4960 }
4961 else {
4962/* if (PL_madskills && PL_lex_formbrack) { */
4963 d = s;
4964 while (d < PL_bufend && *d != '\n')
4965 d++;
4966 if (d < PL_bufend)
4967 d++;
4968 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4969 Perl_croak(aTHX_ "panic: input overflow");
4970 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4971 if (!PL_thiswhite)
6b29d1f5 4972 PL_thiswhite = newSVpvs("");
5db06880 4973 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4974 sv_setpvs(PL_thiswhite, "");
cd81e915 4975 PL_faketokens = 0;
5db06880 4976 }
cd81e915 4977 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4978 }
4979 s = d;
4980/* }
4981 *s = '\0';
4982 PL_bufend = s; */
4983 }
4984#else
378cc40b 4985 *s = '\0';
3280af22 4986 PL_bufend = s;
5db06880 4987#endif
a687059c 4988 }
378cc40b
LW
4989 goto retry;
4990 case '-':
79072805 4991 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4992 I32 ftst = 0;
90771dc0 4993 char tmp;
e5edeb50 4994
378cc40b 4995 s++;
3280af22 4996 PL_bufptr = s;
748a9306
LW
4997 tmp = *s++;
4998
bf4acbe4 4999 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
5000 s++;
5001
5002 if (strnEQ(s,"=>",2)) {
3280af22 5003 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5004 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5005 OPERATOR('-'); /* unary minus */
5006 }
3280af22 5007 PL_last_uni = PL_oldbufptr;
748a9306 5008 switch (tmp) {
e5edeb50
JH
5009 case 'r': ftst = OP_FTEREAD; break;
5010 case 'w': ftst = OP_FTEWRITE; break;
5011 case 'x': ftst = OP_FTEEXEC; break;
5012 case 'o': ftst = OP_FTEOWNED; break;
5013 case 'R': ftst = OP_FTRREAD; break;
5014 case 'W': ftst = OP_FTRWRITE; break;
5015 case 'X': ftst = OP_FTREXEC; break;
5016 case 'O': ftst = OP_FTROWNED; break;
5017 case 'e': ftst = OP_FTIS; break;
5018 case 'z': ftst = OP_FTZERO; break;
5019 case 's': ftst = OP_FTSIZE; break;
5020 case 'f': ftst = OP_FTFILE; break;
5021 case 'd': ftst = OP_FTDIR; break;
5022 case 'l': ftst = OP_FTLINK; break;
5023 case 'p': ftst = OP_FTPIPE; break;
5024 case 'S': ftst = OP_FTSOCK; break;
5025 case 'u': ftst = OP_FTSUID; break;
5026 case 'g': ftst = OP_FTSGID; break;
5027 case 'k': ftst = OP_FTSVTX; break;
5028 case 'b': ftst = OP_FTBLK; break;
5029 case 'c': ftst = OP_FTCHR; break;
5030 case 't': ftst = OP_FTTTY; break;
5031 case 'T': ftst = OP_FTTEXT; break;
5032 case 'B': ftst = OP_FTBINARY; break;
5033 case 'M': case 'A': case 'C':
fafc274c 5034 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5035 switch (tmp) {
5036 case 'M': ftst = OP_FTMTIME; break;
5037 case 'A': ftst = OP_FTATIME; break;
5038 case 'C': ftst = OP_FTCTIME; break;
5039 default: break;
5040 }
5041 break;
378cc40b 5042 default:
378cc40b
LW
5043 break;
5044 }
e5edeb50 5045 if (ftst) {
eb160463 5046 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5047 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5048 "### Saw file test %c\n", (int)tmp);
5f80b19c 5049 } );
e5edeb50
JH
5050 FTST(ftst);
5051 }
5052 else {
5053 /* Assume it was a minus followed by a one-letter named
5054 * subroutine call (or a -bareword), then. */
95c31fe3 5055 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5056 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5057 (int) tmp);
5f80b19c 5058 } );
3cf7b4c4 5059 s = --PL_bufptr;
e5edeb50 5060 }
378cc40b 5061 }
90771dc0
NC
5062 {
5063 const char tmp = *s++;
5064 if (*s == tmp) {
5065 s++;
5066 if (PL_expect == XOPERATOR)
5067 TERM(POSTDEC);
5068 else
5069 OPERATOR(PREDEC);
5070 }
5071 else if (*s == '>') {
5072 s++;
29595ff2 5073 s = SKIPSPACE1(s);
90771dc0
NC
5074 if (isIDFIRST_lazy_if(s,UTF)) {
5075 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5076 TOKEN(ARROW);
5077 }
5078 else if (*s == '$')
5079 OPERATOR(ARROW);
5080 else
5081 TERM(ARROW);
5082 }
3280af22 5083 if (PL_expect == XOPERATOR)
90771dc0
NC
5084 Aop(OP_SUBTRACT);
5085 else {
5086 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5087 check_uni();
5088 OPERATOR('-'); /* unary minus */
79072805 5089 }
2f3197b3 5090 }
79072805 5091
378cc40b 5092 case '+':
90771dc0
NC
5093 {
5094 const char tmp = *s++;
5095 if (*s == tmp) {
5096 s++;
5097 if (PL_expect == XOPERATOR)
5098 TERM(POSTINC);
5099 else
5100 OPERATOR(PREINC);
5101 }
3280af22 5102 if (PL_expect == XOPERATOR)
90771dc0
NC
5103 Aop(OP_ADD);
5104 else {
5105 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5106 check_uni();
5107 OPERATOR('+');
5108 }
2f3197b3 5109 }
a687059c 5110
378cc40b 5111 case '*':
3280af22
NIS
5112 if (PL_expect != XOPERATOR) {
5113 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5114 PL_expect = XOPERATOR;
5115 force_ident(PL_tokenbuf, '*');
5116 if (!*PL_tokenbuf)
a0d0e21e 5117 PREREF('*');
79072805 5118 TERM('*');
a687059c 5119 }
79072805
LW
5120 s++;
5121 if (*s == '*') {
a687059c 5122 s++;
79072805 5123 PWop(OP_POW);
a687059c 5124 }
79072805
LW
5125 Mop(OP_MULTIPLY);
5126
378cc40b 5127 case '%':
3280af22 5128 if (PL_expect == XOPERATOR) {
bbce6d69 5129 ++s;
5130 Mop(OP_MODULO);
a687059c 5131 }
3280af22 5132 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5133 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5134 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5135 if (!PL_tokenbuf[1]) {
bbce6d69 5136 PREREF('%');
a687059c 5137 }
3280af22 5138 PL_pending_ident = '%';
bbce6d69 5139 TERM('%');
a687059c 5140
378cc40b 5141 case '^':
79072805 5142 s++;
a0d0e21e 5143 BOop(OP_BIT_XOR);
79072805 5144 case '[':
3280af22 5145 PL_lex_brackets++;
df3467db
IG
5146 {
5147 const char tmp = *s++;
5148 OPERATOR(tmp);
5149 }
378cc40b 5150 case '~':
0d863452 5151 if (s[1] == '~'
3e7dd34d 5152 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5153 {
5154 s += 2;
5155 Eop(OP_SMARTMATCH);
5156 }
378cc40b 5157 case ',':
90771dc0
NC
5158 {
5159 const char tmp = *s++;
5160 OPERATOR(tmp);
5161 }
a0d0e21e
LW
5162 case ':':
5163 if (s[1] == ':') {
5164 len = 0;
0bfa2a8a 5165 goto just_a_word_zero_gv;
a0d0e21e
LW
5166 }
5167 s++;
09bef843
SB
5168 switch (PL_expect) {
5169 OP *attrs;
5db06880
NC
5170#ifdef PERL_MAD
5171 I32 stuffstart;
5172#endif
09bef843
SB
5173 case XOPERATOR:
5174 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5175 break;
5176 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5177 if (*s == '=') {
5178 deprecate(":= for an empty attribute list");
5179 }
09bef843
SB
5180 goto grabattrs;
5181 case XATTRBLOCK:
5182 PL_expect = XBLOCK;
5183 goto grabattrs;
5184 case XATTRTERM:
5185 PL_expect = XTERMBLOCK;
5186 grabattrs:
5db06880
NC
5187#ifdef PERL_MAD
5188 stuffstart = s - SvPVX(PL_linestr) - 1;
5189#endif
29595ff2 5190 s = PEEKSPACE(s);
5f66b61c 5191 attrs = NULL;
7e2040f0 5192 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5193 I32 tmp;
5cc237b8 5194 SV *sv;
09bef843 5195 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5196 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5197 if (tmp < 0) tmp = -tmp;
5198 switch (tmp) {
5199 case KEY_or:
5200 case KEY_and:
5201 case KEY_for:
11baf631 5202 case KEY_foreach:
f9829d6b
GS
5203 case KEY_unless:
5204 case KEY_if:
5205 case KEY_while:
5206 case KEY_until:
5207 goto got_attrs;
5208 default:
5209 break;
5210 }
5211 }
5cc237b8 5212 sv = newSVpvn(s, len);
09bef843
SB
5213 if (*d == '(') {
5214 d = scan_str(d,TRUE,TRUE);
5215 if (!d) {
09bef843
SB
5216 /* MUST advance bufptr here to avoid bogus
5217 "at end of line" context messages from yyerror().
5218 */
5219 PL_bufptr = s + len;
5220 yyerror("Unterminated attribute parameter in attribute list");
5221 if (attrs)
5222 op_free(attrs);
5cc237b8 5223 sv_free(sv);
bbf60fe6 5224 return REPORT(0); /* EOF indicator */
09bef843
SB
5225 }
5226 }
5227 if (PL_lex_stuff) {
09bef843
SB
5228 sv_catsv(sv, PL_lex_stuff);
5229 attrs = append_elem(OP_LIST, attrs,
5230 newSVOP(OP_CONST, 0, sv));
5231 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5232 PL_lex_stuff = NULL;
09bef843
SB
5233 }
5234 else {
5cc237b8
BS
5235 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5236 sv_free(sv);
1108974d 5237 if (PL_in_my == KEY_our) {
df9a6019 5238 deprecate(":unique");
1108974d 5239 }
bfed75c6 5240 else
371fce9b
DM
5241 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5242 }
5243
d3cea301
SB
5244 /* NOTE: any CV attrs applied here need to be part of
5245 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5246 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5247 sv_free(sv);
78f9721b 5248 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5249 }
5250 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5251 sv_free(sv);
8e5dadda 5252 deprecate(":locked");
5cc237b8
BS
5253 }
5254 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5255 sv_free(sv);
78f9721b 5256 CvMETHOD_on(PL_compcv);
5cc237b8 5257 }
78f9721b
SM
5258 /* After we've set the flags, it could be argued that
5259 we don't need to do the attributes.pm-based setting
5260 process, and shouldn't bother appending recognized
d3cea301
SB
5261 flags. To experiment with that, uncomment the
5262 following "else". (Note that's already been
5263 uncommented. That keeps the above-applied built-in
5264 attributes from being intercepted (and possibly
5265 rejected) by a package's attribute routines, but is
5266 justified by the performance win for the common case
5267 of applying only built-in attributes.) */
0256094b 5268 else
78f9721b
SM
5269 attrs = append_elem(OP_LIST, attrs,
5270 newSVOP(OP_CONST, 0,
5cc237b8 5271 sv));
09bef843 5272 }
29595ff2 5273 s = PEEKSPACE(d);
0120eecf 5274 if (*s == ':' && s[1] != ':')
29595ff2 5275 s = PEEKSPACE(s+1);
0120eecf
GS
5276 else if (s == d)
5277 break; /* require real whitespace or :'s */
29595ff2 5278 /* XXX losing whitespace on sequential attributes here */
09bef843 5279 }
90771dc0
NC
5280 {
5281 const char tmp
5282 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5283 if (*s != ';' && *s != '}' && *s != tmp
5284 && (tmp != '=' || *s != ')')) {
5285 const char q = ((*s == '\'') ? '"' : '\'');
5286 /* If here for an expression, and parsed no attrs, back
5287 off. */
5288 if (tmp == '=' && !attrs) {
5289 s = PL_bufptr;
5290 break;
5291 }
5292 /* MUST advance bufptr here to avoid bogus "at end of line"
5293 context messages from yyerror().
5294 */
5295 PL_bufptr = s;
10edeb5d
JH
5296 yyerror( (const char *)
5297 (*s
5298 ? Perl_form(aTHX_ "Invalid separator character "
5299 "%c%c%c in attribute list", q, *s, q)
5300 : "Unterminated attribute list" ) );
90771dc0
NC
5301 if (attrs)
5302 op_free(attrs);
5303 OPERATOR(':');
09bef843 5304 }
09bef843 5305 }
f9829d6b 5306 got_attrs:
09bef843 5307 if (attrs) {
cd81e915 5308 start_force(PL_curforce);
9ded7720 5309 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5310 CURMAD('_', PL_nextwhite);
89122651 5311 force_next(THING);
5db06880
NC
5312 }
5313#ifdef PERL_MAD
5314 if (PL_madskills) {
cd81e915 5315 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5316 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5317 }
5db06880 5318#endif
09bef843
SB
5319 TOKEN(COLONATTR);
5320 }
a0d0e21e 5321 OPERATOR(':');
8990e307
LW
5322 case '(':
5323 s++;
3280af22
NIS
5324 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5325 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5326 else
3280af22 5327 PL_expect = XTERM;
29595ff2 5328 s = SKIPSPACE1(s);
a0d0e21e 5329 TOKEN('(');
378cc40b 5330 case ';':
f4dd75d9 5331 CLINE;
90771dc0
NC
5332 {
5333 const char tmp = *s++;
5334 OPERATOR(tmp);
5335 }
378cc40b 5336 case ')':
90771dc0
NC
5337 {
5338 const char tmp = *s++;
29595ff2 5339 s = SKIPSPACE1(s);
90771dc0
NC
5340 if (*s == '{')
5341 PREBLOCK(tmp);
5342 TERM(tmp);
5343 }
79072805
LW
5344 case ']':
5345 s++;
3280af22 5346 if (PL_lex_brackets <= 0)
d98d5fff 5347 yyerror("Unmatched right square bracket");
463ee0b2 5348 else
3280af22
NIS
5349 --PL_lex_brackets;
5350 if (PL_lex_state == LEX_INTERPNORMAL) {
5351 if (PL_lex_brackets == 0) {
02255c60
FC
5352 if (*s == '-' && s[1] == '>')
5353 PL_lex_state = LEX_INTERPENDMAYBE;
5354 else if (*s != '[' && *s != '{')
3280af22 5355 PL_lex_state = LEX_INTERPEND;
79072805
LW
5356 }
5357 }
4633a7c4 5358 TERM(']');
79072805
LW
5359 case '{':
5360 leftbracket:
79072805 5361 s++;
3280af22 5362 if (PL_lex_brackets > 100) {
8edd5f42 5363 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5364 }
3280af22 5365 switch (PL_expect) {
a0d0e21e 5366 case XTERM:
3280af22 5367 if (PL_lex_formbrack) {
a0d0e21e
LW
5368 s--;
5369 PRETERMBLOCK(DO);
5370 }
3280af22
NIS
5371 if (PL_oldoldbufptr == PL_last_lop)
5372 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5373 else
3280af22 5374 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5375 OPERATOR(HASHBRACK);
a0d0e21e 5376 case XOPERATOR:
bf4acbe4 5377 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5378 s++;
44a8e56a 5379 d = s;
3280af22
NIS
5380 PL_tokenbuf[0] = '\0';
5381 if (d < PL_bufend && *d == '-') {
5382 PL_tokenbuf[0] = '-';
44a8e56a 5383 d++;
bf4acbe4 5384 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5385 d++;
5386 }
7e2040f0 5387 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5388 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5389 FALSE, &len);
bf4acbe4 5390 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5391 d++;
5392 if (*d == '}') {
f54cb97a 5393 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5394 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5395 if (minus)
5396 force_next('-');
748a9306
LW
5397 }
5398 }
5399 /* FALL THROUGH */
09bef843 5400 case XATTRBLOCK:
748a9306 5401 case XBLOCK:
3280af22
NIS
5402 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5403 PL_expect = XSTATE;
a0d0e21e 5404 break;
09bef843 5405 case XATTRTERM:
a0d0e21e 5406 case XTERMBLOCK:
3280af22
NIS
5407 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5408 PL_expect = XSTATE;
a0d0e21e
LW
5409 break;
5410 default: {
f54cb97a 5411 const char *t;
3280af22
NIS
5412 if (PL_oldoldbufptr == PL_last_lop)
5413 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5414 else
3280af22 5415 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5416 s = SKIPSPACE1(s);
8452ff4b
SB
5417 if (*s == '}') {
5418 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5419 PL_expect = XTERM;
5420 /* This hack is to get the ${} in the message. */
5421 PL_bufptr = s+1;
5422 yyerror("syntax error");
5423 break;
5424 }
a0d0e21e 5425 OPERATOR(HASHBRACK);
8452ff4b 5426 }
b8a4b1be
GS
5427 /* This hack serves to disambiguate a pair of curlies
5428 * as being a block or an anon hash. Normally, expectation
5429 * determines that, but in cases where we're not in a
5430 * position to expect anything in particular (like inside
5431 * eval"") we have to resolve the ambiguity. This code
5432 * covers the case where the first term in the curlies is a
5433 * quoted string. Most other cases need to be explicitly
a0288114 5434 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5435 * curly in order to force resolution as an anon hash.
5436 *
5437 * XXX should probably propagate the outer expectation
5438 * into eval"" to rely less on this hack, but that could
5439 * potentially break current behavior of eval"".
5440 * GSAR 97-07-21
5441 */
5442 t = s;
5443 if (*s == '\'' || *s == '"' || *s == '`') {
5444 /* common case: get past first string, handling escapes */
3280af22 5445 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5446 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5447 t++;
5448 t++;
a0d0e21e 5449 }
b8a4b1be 5450 else if (*s == 'q') {
3280af22 5451 if (++t < PL_bufend
b8a4b1be 5452 && (!isALNUM(*t)
3280af22 5453 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5454 && !isALNUM(*t))))
5455 {
abc667d1 5456 /* skip q//-like construct */
f54cb97a 5457 const char *tmps;
b8a4b1be
GS
5458 char open, close, term;
5459 I32 brackets = 1;
5460
3280af22 5461 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5462 t++;
abc667d1
DM
5463 /* check for q => */
5464 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5465 OPERATOR(HASHBRACK);
5466 }
b8a4b1be
GS
5467 term = *t;
5468 open = term;
5469 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5470 term = tmps[5];
5471 close = term;
5472 if (open == close)
3280af22
NIS
5473 for (t++; t < PL_bufend; t++) {
5474 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5475 t++;
6d07e5e9 5476 else if (*t == open)
b8a4b1be
GS
5477 break;
5478 }
abc667d1 5479 else {
3280af22
NIS
5480 for (t++; t < PL_bufend; t++) {
5481 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5482 t++;
6d07e5e9 5483 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5484 break;
5485 else if (*t == open)
5486 brackets++;
5487 }
abc667d1
DM
5488 }
5489 t++;
b8a4b1be 5490 }
abc667d1
DM
5491 else
5492 /* skip plain q word */
5493 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5494 t += UTF8SKIP(t);
a0d0e21e 5495 }
7e2040f0 5496 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5497 t += UTF8SKIP(t);
7e2040f0 5498 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5499 t += UTF8SKIP(t);
a0d0e21e 5500 }
3280af22 5501 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5502 t++;
b8a4b1be
GS
5503 /* if comma follows first term, call it an anon hash */
5504 /* XXX it could be a comma expression with loop modifiers */
3280af22 5505 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5506 || (*t == '=' && t[1] == '>')))
a0d0e21e 5507 OPERATOR(HASHBRACK);
3280af22 5508 if (PL_expect == XREF)
4e4e412b 5509 PL_expect = XTERM;
a0d0e21e 5510 else {
3280af22
NIS
5511 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5512 PL_expect = XSTATE;
a0d0e21e 5513 }
8990e307 5514 }
a0d0e21e 5515 break;
463ee0b2 5516 }
6154021b 5517 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5518 if (isSPACE(*s) || *s == '#')
3280af22 5519 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5520 TOKEN('{');
378cc40b 5521 case '}':
79072805
LW
5522 rightbracket:
5523 s++;
3280af22 5524 if (PL_lex_brackets <= 0)
d98d5fff 5525 yyerror("Unmatched right curly bracket");
463ee0b2 5526 else
3280af22 5527 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5528 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5529 PL_lex_formbrack = 0;
5530 if (PL_lex_state == LEX_INTERPNORMAL) {
5531 if (PL_lex_brackets == 0) {
9059aa12
LW
5532 if (PL_expect & XFAKEBRACK) {
5533 PL_expect &= XENUMMASK;
3280af22
NIS
5534 PL_lex_state = LEX_INTERPEND;
5535 PL_bufptr = s;
5db06880
NC
5536#if 0
5537 if (PL_madskills) {
cd81e915 5538 if (!PL_thiswhite)
6b29d1f5 5539 PL_thiswhite = newSVpvs("");
76f68e9b 5540 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5541 }
5542#endif
cea2e8a9 5543 return yylex(); /* ignore fake brackets */
79072805 5544 }
fa83b5b6 5545 if (*s == '-' && s[1] == '>')
3280af22 5546 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5547 else if (*s != '[' && *s != '{')
3280af22 5548 PL_lex_state = LEX_INTERPEND;
79072805
LW
5549 }
5550 }
9059aa12
LW
5551 if (PL_expect & XFAKEBRACK) {
5552 PL_expect &= XENUMMASK;
3280af22 5553 PL_bufptr = s;
cea2e8a9 5554 return yylex(); /* ignore fake brackets */
748a9306 5555 }
cd81e915 5556 start_force(PL_curforce);
5db06880
NC
5557 if (PL_madskills) {
5558 curmad('X', newSVpvn(s-1,1));
cd81e915 5559 CURMAD('_', PL_thiswhite);
5db06880 5560 }
79072805 5561 force_next('}');
5db06880 5562#ifdef PERL_MAD
cd81e915 5563 if (!PL_thistoken)
6b29d1f5 5564 PL_thistoken = newSVpvs("");
5db06880 5565#endif
79072805 5566 TOKEN(';');
378cc40b
LW
5567 case '&':
5568 s++;
90771dc0 5569 if (*s++ == '&')
a0d0e21e 5570 AOPERATOR(ANDAND);
378cc40b 5571 s--;
3280af22 5572 if (PL_expect == XOPERATOR) {
041457d9
DM
5573 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5574 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5575 {
57843af0 5576 CopLINE_dec(PL_curcop);
f1f66076 5577 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5578 CopLINE_inc(PL_curcop);
463ee0b2 5579 }
79072805 5580 BAop(OP_BIT_AND);
463ee0b2 5581 }
79072805 5582
3280af22
NIS
5583 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5584 if (*PL_tokenbuf) {
5585 PL_expect = XOPERATOR;
5586 force_ident(PL_tokenbuf, '&');
463ee0b2 5587 }
79072805
LW
5588 else
5589 PREREF('&');
6154021b 5590 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5591 TERM('&');
5592
378cc40b
LW
5593 case '|':
5594 s++;
90771dc0 5595 if (*s++ == '|')
a0d0e21e 5596 AOPERATOR(OROR);
378cc40b 5597 s--;
79072805 5598 BOop(OP_BIT_OR);
378cc40b
LW
5599 case '=':
5600 s++;
748a9306 5601 {
90771dc0
NC
5602 const char tmp = *s++;
5603 if (tmp == '=')
5604 Eop(OP_EQ);
5605 if (tmp == '>')
5606 OPERATOR(',');
5607 if (tmp == '~')
5608 PMop(OP_MATCH);
5609 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5610 && strchr("+-*/%.^&|<",tmp))
5611 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5612 "Reversed %c= operator",(int)tmp);
5613 s--;
5614 if (PL_expect == XSTATE && isALPHA(tmp) &&
5615 (s == PL_linestart+1 || s[-2] == '\n') )
5616 {
5617 if (PL_in_eval && !PL_rsfp) {
5618 d = PL_bufend;
5619 while (s < d) {
5620 if (*s++ == '\n') {
5621 incline(s);
5622 if (strnEQ(s,"=cut",4)) {
5623 s = strchr(s,'\n');
5624 if (s)
5625 s++;
5626 else
5627 s = d;
5628 incline(s);
5629 goto retry;
5630 }
5631 }
a5f75d66 5632 }
90771dc0 5633 goto retry;
a5f75d66 5634 }
5db06880
NC
5635#ifdef PERL_MAD
5636 if (PL_madskills) {
cd81e915 5637 if (!PL_thiswhite)
6b29d1f5 5638 PL_thiswhite = newSVpvs("");
cd81e915 5639 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5640 PL_bufend - PL_linestart);
5641 }
5642#endif
90771dc0
NC
5643 s = PL_bufend;
5644 PL_doextract = TRUE;
5645 goto retry;
a5f75d66 5646 }
a0d0e21e 5647 }
3280af22 5648 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5649 const char *t = s;
51882d45 5650#ifdef PERL_STRICT_CR
c35e046a 5651 while (SPACE_OR_TAB(*t))
51882d45 5652#else
c35e046a 5653 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5654#endif
c35e046a 5655 t++;
a0d0e21e
LW
5656 if (*t == '\n' || *t == '#') {
5657 s--;
3280af22 5658 PL_expect = XBLOCK;
a0d0e21e
LW
5659 goto leftbracket;
5660 }
79072805 5661 }
6154021b 5662 pl_yylval.ival = 0;
a0d0e21e 5663 OPERATOR(ASSIGNOP);
378cc40b
LW
5664 case '!':
5665 s++;
90771dc0
NC
5666 {
5667 const char tmp = *s++;
5668 if (tmp == '=') {
5669 /* was this !=~ where !~ was meant?
5670 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5671
5672 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5673 const char *t = s+1;
5674
5675 while (t < PL_bufend && isSPACE(*t))
5676 ++t;
5677
5678 if (*t == '/' || *t == '?' ||
5679 ((*t == 'm' || *t == 's' || *t == 'y')
5680 && !isALNUM(t[1])) ||
5681 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5682 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5683 "!=~ should be !~");
5684 }
5685 Eop(OP_NE);
5686 }
5687 if (tmp == '~')
5688 PMop(OP_NOT);
5689 }
378cc40b
LW
5690 s--;
5691 OPERATOR('!');
5692 case '<':
3280af22 5693 if (PL_expect != XOPERATOR) {
93a17b20 5694 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5695 check_uni();
79072805
LW
5696 if (s[1] == '<')
5697 s = scan_heredoc(s);
5698 else
5699 s = scan_inputsymbol(s);
5700 TERM(sublex_start());
378cc40b
LW
5701 }
5702 s++;
90771dc0
NC
5703 {
5704 char tmp = *s++;
5705 if (tmp == '<')
5706 SHop(OP_LEFT_SHIFT);
5707 if (tmp == '=') {
5708 tmp = *s++;
5709 if (tmp == '>')
5710 Eop(OP_NCMP);
5711 s--;
5712 Rop(OP_LE);
5713 }
395c3793 5714 }
378cc40b 5715 s--;
79072805 5716 Rop(OP_LT);
378cc40b
LW
5717 case '>':
5718 s++;
90771dc0
NC
5719 {
5720 const char tmp = *s++;
5721 if (tmp == '>')
5722 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5723 else if (tmp == '=')
90771dc0
NC
5724 Rop(OP_GE);
5725 }
378cc40b 5726 s--;
79072805 5727 Rop(OP_GT);
378cc40b
LW
5728
5729 case '$':
bbce6d69 5730 CLINE;
5731
3280af22
NIS
5732 if (PL_expect == XOPERATOR) {
5733 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5734 return deprecate_commaless_var_list();
a0d0e21e 5735 }
8990e307 5736 }
a0d0e21e 5737
c0b977fd 5738 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5739 PL_tokenbuf[0] = '@';
376b8730
SM
5740 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5741 sizeof PL_tokenbuf - 1, FALSE);
5742 if (PL_expect == XOPERATOR)
5743 no_op("Array length", s);
3280af22 5744 if (!PL_tokenbuf[1])
a0d0e21e 5745 PREREF(DOLSHARP);
3280af22
NIS
5746 PL_expect = XOPERATOR;
5747 PL_pending_ident = '#';
463ee0b2 5748 TOKEN(DOLSHARP);
79072805 5749 }
bbce6d69 5750
3280af22 5751 PL_tokenbuf[0] = '$';
376b8730
SM
5752 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5753 sizeof PL_tokenbuf - 1, FALSE);
5754 if (PL_expect == XOPERATOR)
5755 no_op("Scalar", s);
3280af22
NIS
5756 if (!PL_tokenbuf[1]) {
5757 if (s == PL_bufend)
bbce6d69 5758 yyerror("Final $ should be \\$ or $name");
5759 PREREF('$');
8990e307 5760 }
a0d0e21e 5761
bbce6d69 5762 /* This kludge not intended to be bulletproof. */
3280af22 5763 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5764 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5765 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5766 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5767 TERM(THING);
5768 }
5769
ff68c719 5770 d = s;
90771dc0
NC
5771 {
5772 const char tmp = *s;
ae28bb2a 5773 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5774 s = SKIPSPACE1(s);
ff68c719 5775
90771dc0
NC
5776 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5777 && intuit_more(s)) {
5778 if (*s == '[') {
5779 PL_tokenbuf[0] = '@';
5780 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5781 char *t = s+1;
5782
5783 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5784 t++;
90771dc0 5785 if (*t++ == ',') {
29595ff2 5786 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5787 while (t < PL_bufend && *t != ']')
5788 t++;
9014280d 5789 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5790 "Multidimensional syntax %.*s not supported",
36c7798d 5791 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5792 }
748a9306 5793 }
93a17b20 5794 }
90771dc0
NC
5795 else if (*s == '{') {
5796 char *t;
5797 PL_tokenbuf[0] = '%';
5798 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5799 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5800 {
5801 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5802 do {
5803 t++;
5804 } while (isSPACE(*t));
90771dc0 5805 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5806 STRLEN len;
90771dc0 5807 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5808 &len);
c35e046a
AL
5809 while (isSPACE(*t))
5810 t++;
780a5241 5811 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5812 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5813 "You need to quote \"%s\"",
5814 tmpbuf);
5815 }
5816 }
5817 }
93a17b20 5818 }
bbce6d69 5819
90771dc0
NC
5820 PL_expect = XOPERATOR;
5821 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5822 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5823 if (!islop || PL_last_lop_op == OP_GREPSTART)
5824 PL_expect = XOPERATOR;
5825 else if (strchr("$@\"'`q", *s))
5826 PL_expect = XTERM; /* e.g. print $fh "foo" */
5827 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5828 PL_expect = XTERM; /* e.g. print $fh &sub */
5829 else if (isIDFIRST_lazy_if(s,UTF)) {
5830 char tmpbuf[sizeof PL_tokenbuf];
5831 int t2;
5832 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5833 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5834 /* binary operators exclude handle interpretations */
5835 switch (t2) {
5836 case -KEY_x:
5837 case -KEY_eq:
5838 case -KEY_ne:
5839 case -KEY_gt:
5840 case -KEY_lt:
5841 case -KEY_ge:
5842 case -KEY_le:
5843 case -KEY_cmp:
5844 break;
5845 default:
5846 PL_expect = XTERM; /* e.g. print $fh length() */
5847 break;
5848 }
5849 }
5850 else {
5851 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5852 }
5853 }
90771dc0
NC
5854 else if (isDIGIT(*s))
5855 PL_expect = XTERM; /* e.g. print $fh 3 */
5856 else if (*s == '.' && isDIGIT(s[1]))
5857 PL_expect = XTERM; /* e.g. print $fh .3 */
5858 else if ((*s == '?' || *s == '-' || *s == '+')
5859 && !isSPACE(s[1]) && s[1] != '=')
5860 PL_expect = XTERM; /* e.g. print $fh -1 */
5861 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5862 && s[1] != '/')
5863 PL_expect = XTERM; /* e.g. print $fh /.../
5864 XXX except DORDOR operator
5865 */
5866 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5867 && s[2] != '=')
5868 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5869 }
bbce6d69 5870 }
3280af22 5871 PL_pending_ident = '$';
79072805 5872 TOKEN('$');
378cc40b
LW
5873
5874 case '@':
3280af22 5875 if (PL_expect == XOPERATOR)
bbce6d69 5876 no_op("Array", s);
3280af22
NIS
5877 PL_tokenbuf[0] = '@';
5878 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5879 if (!PL_tokenbuf[1]) {
bbce6d69 5880 PREREF('@');
5881 }
3280af22 5882 if (PL_lex_state == LEX_NORMAL)
29595ff2 5883 s = SKIPSPACE1(s);
3280af22 5884 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5885 if (*s == '{')
3280af22 5886 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5887
5888 /* Warn about @ where they meant $. */
041457d9
DM
5889 if (*s == '[' || *s == '{') {
5890 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5891 const char *t = s + 1;
7e2040f0 5892 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5893 t++;
5894 if (*t == '}' || *t == ']') {
5895 t++;
29595ff2 5896 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5897 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5898 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5899 (int)(t-PL_bufptr), PL_bufptr,
5900 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5901 }
93a17b20
LW
5902 }
5903 }
463ee0b2 5904 }
3280af22 5905 PL_pending_ident = '@';
79072805 5906 TERM('@');
378cc40b 5907
c963b151 5908 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5909 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5910 s += 2;
5911 AOPERATOR(DORDOR);
5912 }
c963b151 5913 case '?': /* may either be conditional or pattern */
be25f609 5914 if (PL_expect == XOPERATOR) {
90771dc0 5915 char tmp = *s++;
c963b151 5916 if(tmp == '?') {
be25f609 5917 OPERATOR('?');
c963b151
BD
5918 }
5919 else {
5920 tmp = *s++;
5921 if(tmp == '/') {
5922 /* A // operator. */
5923 AOPERATOR(DORDOR);
5924 }
5925 else {
5926 s--;
5927 Mop(OP_DIVIDE);
5928 }
5929 }
5930 }
5931 else {
5932 /* Disable warning on "study /blah/" */
5933 if (PL_oldoldbufptr == PL_last_uni
5934 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5935 || memNE(PL_last_uni, "study", 5)
5936 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5937 ))
5938 check_uni();
5939 s = scan_pat(s,OP_MATCH);
5940 TERM(sublex_start());
5941 }
378cc40b
LW
5942
5943 case '.':
51882d45
GS
5944 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5945#ifdef PERL_STRICT_CR
5946 && s[1] == '\n'
5947#else
5948 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5949#endif
5950 && (s == PL_linestart || s[-1] == '\n') )
5951 {
3280af22
NIS
5952 PL_lex_formbrack = 0;
5953 PL_expect = XSTATE;
79072805
LW
5954 goto rightbracket;
5955 }
be25f609 5956 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5957 s += 3;
5958 OPERATOR(YADAYADA);
5959 }
3280af22 5960 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5961 char tmp = *s++;
a687059c
LW
5962 if (*s == tmp) {
5963 s++;
2f3197b3
LW
5964 if (*s == tmp) {
5965 s++;
6154021b 5966 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5967 }
5968 else
6154021b 5969 pl_yylval.ival = 0;
378cc40b 5970 OPERATOR(DOTDOT);
a687059c 5971 }
79072805 5972 Aop(OP_CONCAT);
378cc40b
LW
5973 }
5974 /* FALL THROUGH */
5975 case '0': case '1': case '2': case '3': case '4':
5976 case '5': case '6': case '7': case '8': case '9':
6154021b 5977 s = scan_num(s, &pl_yylval);
931e0695 5978 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5979 if (PL_expect == XOPERATOR)
8990e307 5980 no_op("Number",s);
79072805
LW
5981 TERM(THING);
5982
5983 case '\'':
5db06880 5984 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5985 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5986 if (PL_expect == XOPERATOR) {
5987 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5988 return deprecate_commaless_var_list();
a0d0e21e 5989 }
463ee0b2 5990 else
8990e307 5991 no_op("String",s);
463ee0b2 5992 }
79072805 5993 if (!s)
d4c19fe8 5994 missingterm(NULL);
6154021b 5995 pl_yylval.ival = OP_CONST;
79072805
LW
5996 TERM(sublex_start());
5997
5998 case '"':
5db06880 5999 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6000 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
6001 if (PL_expect == XOPERATOR) {
6002 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 6003 return deprecate_commaless_var_list();
a0d0e21e 6004 }
463ee0b2 6005 else
8990e307 6006 no_op("String",s);
463ee0b2 6007 }
79072805 6008 if (!s)
d4c19fe8 6009 missingterm(NULL);
6154021b 6010 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6011 /* FIXME. I think that this can be const if char *d is replaced by
6012 more localised variables. */
3280af22 6013 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6014 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6015 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6016 break;
6017 }
6018 }
79072805
LW
6019 TERM(sublex_start());
6020
6021 case '`':
5db06880 6022 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6023 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6024 if (PL_expect == XOPERATOR)
8990e307 6025 no_op("Backticks",s);
79072805 6026 if (!s)
d4c19fe8 6027 missingterm(NULL);
9b201d7d 6028 readpipe_override();
79072805
LW
6029 TERM(sublex_start());
6030
6031 case '\\':
6032 s++;
a2a5de95
NC
6033 if (PL_lex_inwhat && isDIGIT(*s))
6034 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6035 *s, *s);
3280af22 6036 if (PL_expect == XOPERATOR)
8990e307 6037 no_op("Backslash",s);
79072805
LW
6038 OPERATOR(REFGEN);
6039
a7cb1f99 6040 case 'v':
e526c9e6 6041 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6042 char *start = s + 2;
dd629d5b 6043 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6044 start++;
6045 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6046 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6047 TERM(THING);
6048 }
e526c9e6 6049 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6050 else if (!isALPHA(*start) && (PL_expect == XTERM
6051 || PL_expect == XREF || PL_expect == XSTATE
6052 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6053 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6054 if (!gv) {
6154021b 6055 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6056 TERM(THING);
6057 }
6058 }
a7cb1f99
GS
6059 }
6060 goto keylookup;
79072805 6061 case 'x':
3280af22 6062 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6063 s++;
6064 Mop(OP_REPEAT);
2f3197b3 6065 }
79072805
LW
6066 goto keylookup;
6067
378cc40b 6068 case '_':
79072805
LW
6069 case 'a': case 'A':
6070 case 'b': case 'B':
6071 case 'c': case 'C':
6072 case 'd': case 'D':
6073 case 'e': case 'E':
6074 case 'f': case 'F':
6075 case 'g': case 'G':
6076 case 'h': case 'H':
6077 case 'i': case 'I':
6078 case 'j': case 'J':
6079 case 'k': case 'K':
6080 case 'l': case 'L':
6081 case 'm': case 'M':
6082 case 'n': case 'N':
6083 case 'o': case 'O':
6084 case 'p': case 'P':
6085 case 'q': case 'Q':
6086 case 'r': case 'R':
6087 case 's': case 'S':
6088 case 't': case 'T':
6089 case 'u': case 'U':
a7cb1f99 6090 case 'V':
79072805
LW
6091 case 'w': case 'W':
6092 case 'X':
6093 case 'y': case 'Y':
6094 case 'z': case 'Z':
6095
49dc05e3 6096 keylookup: {
88e1f1a2 6097 bool anydelim;
90771dc0 6098 I32 tmp;
10edeb5d
JH
6099
6100 orig_keyword = 0;
6101 gv = NULL;
6102 gvp = NULL;
49dc05e3 6103
3280af22
NIS
6104 PL_bufptr = s;
6105 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6106
6107 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6108 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6109 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6110 (PL_tokenbuf[0] == 'q' &&
6111 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6112
6113 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6114 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6115 goto just_a_word;
6116
3643fb5f 6117 d = s;
3280af22 6118 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6119 d++; /* no comments skipped here, or s### is misparsed */
6120
748a9306 6121 /* Is this a word before a => operator? */
1c3923b3 6122 if (*d == '=' && d[1] == '>') {
748a9306 6123 CLINE;
6154021b 6124 pl_yylval.opval
d0a148a6
NC
6125 = (OP*)newSVOP(OP_CONST, 0,
6126 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6127 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6128 TERM(WORD);
6129 }
6130
88e1f1a2
JV
6131 /* Check for plugged-in keyword */
6132 {
6133 OP *o;
6134 int result;
6135 char *saved_bufptr = PL_bufptr;
6136 PL_bufptr = s;
16c91539 6137 result = PL_keyword_plugin(aTHX_ PL_tokenbuf, len, &o);
88e1f1a2
JV
6138 s = PL_bufptr;
6139 if (result == KEYWORD_PLUGIN_DECLINE) {
6140 /* not a plugged-in keyword */
6141 PL_bufptr = saved_bufptr;
6142 } else if (result == KEYWORD_PLUGIN_STMT) {
6143 pl_yylval.opval = o;
6144 CLINE;
6145 PL_expect = XSTATE;
6146 return REPORT(PLUGSTMT);
6147 } else if (result == KEYWORD_PLUGIN_EXPR) {
6148 pl_yylval.opval = o;
6149 CLINE;
6150 PL_expect = XOPERATOR;
6151 return REPORT(PLUGEXPR);
6152 } else {
6153 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6154 PL_tokenbuf);
6155 }
6156 }
6157
6158 /* Check for built-in keyword */
6159 tmp = keyword(PL_tokenbuf, len, 0);
6160
6161 /* Is this a label? */
6162 if (!anydelim && PL_expect == XSTATE
6163 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6164 s = d + 1;
6165 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6166 CLINE;
6167 TOKEN(LABEL);
6168 }
6169
a0d0e21e 6170 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6171 GV *ogv = NULL; /* override (winner) */
6172 GV *hgv = NULL; /* hidden (loser) */
3280af22 6173 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6174 CV *cv;
90e5519e 6175 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6176 (cv = GvCVu(gv)))
6177 {
6178 if (GvIMPORTED_CV(gv))
6179 ogv = gv;
6180 else if (! CvMETHOD(cv))
6181 hgv = gv;
6182 }
6183 if (!ogv &&
3280af22 6184 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6185 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6186 GvCVu(gv) && GvIMPORTED_CV(gv))
6187 {
6188 ogv = gv;
6189 }
6190 }
6191 if (ogv) {
30fe34ed 6192 orig_keyword = tmp;
56f7f34b 6193 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6194 }
6195 else if (gv && !gvp
6196 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6197 && GvCVu(gv))
6e7b2336
GS
6198 {
6199 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6200 }
56f7f34b
CS
6201 else { /* no override */
6202 tmp = -tmp;
a2a5de95
NC
6203 if (tmp == KEY_dump) {
6204 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6205 "dump() better written as CORE::dump()");
ac206dc8 6206 }
a0714e2c 6207 gv = NULL;
56f7f34b 6208 gvp = 0;
a2a5de95
NC
6209 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6210 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
de2b151d
JM
6211 "Ambiguous call resolved as CORE::%s(), "
6212 "qualify as such or use &",
6213 GvENAME(hgv));
49dc05e3 6214 }
a0d0e21e
LW
6215 }
6216
6217 reserved_word:
6218 switch (tmp) {
79072805
LW
6219
6220 default: /* not a keyword */
0bfa2a8a
NC
6221 /* Trade off - by using this evil construction we can pull the
6222 variable gv into the block labelled keylookup. If not, then
6223 we have to give it function scope so that the goto from the
6224 earlier ':' case doesn't bypass the initialisation. */
6225 if (0) {
6226 just_a_word_zero_gv:
6227 gv = NULL;
6228 gvp = NULL;
8bee0991 6229 orig_keyword = 0;
0bfa2a8a 6230 }
93a17b20 6231 just_a_word: {
96e4d5b1 6232 SV *sv;
ce29ac45 6233 int pkgname = 0;
f54cb97a 6234 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6235 OP *rv2cv_op;
5069cc75 6236 CV *cv;
5db06880 6237#ifdef PERL_MAD
cd81e915 6238 SV *nextPL_nextwhite = 0;
5db06880
NC
6239#endif
6240
8990e307
LW
6241
6242 /* Get the rest if it looks like a package qualifier */
6243
155aba94 6244 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6245 STRLEN morelen;
3280af22 6246 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6247 TRUE, &morelen);
6248 if (!morelen)
cea2e8a9 6249 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6250 *s == '\'' ? "'" : "::");
c3e0f903 6251 len += morelen;
ce29ac45 6252 pkgname = 1;
a0d0e21e 6253 }
8990e307 6254
3280af22
NIS
6255 if (PL_expect == XOPERATOR) {
6256 if (PL_bufptr == PL_linestart) {
57843af0 6257 CopLINE_dec(PL_curcop);
f1f66076 6258 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6259 CopLINE_inc(PL_curcop);
463ee0b2
LW
6260 }
6261 else
54310121 6262 no_op("Bareword",s);
463ee0b2 6263 }
8990e307 6264
c3e0f903
GS
6265 /* Look for a subroutine with this name in current package,
6266 unless name is "Foo::", in which case Foo is a bearword
6267 (and a package name). */
6268
5db06880 6269 if (len > 2 && !PL_madskills &&
3280af22 6270 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6271 {
f776e3cd 6272 if (ckWARN(WARN_BAREWORD)
90e5519e 6273 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6274 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6275 "Bareword \"%s\" refers to nonexistent package",
3280af22 6276 PL_tokenbuf);
c3e0f903 6277 len -= 2;
3280af22 6278 PL_tokenbuf[len] = '\0';
a0714e2c 6279 gv = NULL;
c3e0f903
GS
6280 gvp = 0;
6281 }
6282 else {
62d55b22
NC
6283 if (!gv) {
6284 /* Mustn't actually add anything to a symbol table.
6285 But also don't want to "initialise" any placeholder
6286 constants that might already be there into full
6287 blown PVGVs with attached PVCV. */
90e5519e
NC
6288 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6289 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6290 }
b3d904f3 6291 len = 0;
c3e0f903
GS
6292 }
6293
6294 /* if we saw a global override before, get the right name */
8990e307 6295
37bb7629
EB
6296 sv = S_newSV_maybe_utf8(aTHX_ PL_tokenbuf,
6297 len ? len : strlen(PL_tokenbuf));
49dc05e3 6298 if (gvp) {
37bb7629 6299 SV * const tmp_sv = sv;
396482e1 6300 sv = newSVpvs("CORE::GLOBAL::");
37bb7629
EB
6301 sv_catsv(sv, tmp_sv);
6302 SvREFCNT_dec(tmp_sv);
8a7a129d 6303 }
37bb7629 6304
5db06880 6305#ifdef PERL_MAD
cd81e915
NC
6306 if (PL_madskills && !PL_thistoken) {
6307 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6308 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6309 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6310 }
6311#endif
8990e307 6312
a0d0e21e 6313 /* Presume this is going to be a bareword of some sort. */
a0d0e21e 6314 CLINE;
6154021b
RGS
6315 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6316 pl_yylval.opval->op_private = OPpCONST_BARE;
a0d0e21e 6317
c3e0f903 6318 /* And if "Foo::", then that's what it certainly is. */
c3e0f903
GS
6319 if (len)
6320 goto safe_bareword;
6321
f7461760
Z
6322 cv = NULL;
6323 {
6324 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6325 const_op->op_private = OPpCONST_BARE;
6326 rv2cv_op = newCVREF(0, const_op);
6327 }
6328 if (rv2cv_op->op_type == OP_RV2CV &&
6329 (rv2cv_op->op_flags & OPf_KIDS)) {
6330 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6331 switch (rv_op->op_type) {
6332 case OP_CONST: {
6333 SV *sv = cSVOPx_sv(rv_op);
6334 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6335 cv = (CV*)SvRV(sv);
6336 } break;
6337 case OP_GV: {
6338 GV *gv = cGVOPx_gv(rv_op);
6339 CV *maybe_cv = GvCVu(gv);
6340 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6341 cv = maybe_cv;
6342 } break;
6343 }
6344 }
5069cc75 6345
8990e307
LW
6346 /* See if it's the indirect object for a list operator. */
6347
3280af22
NIS
6348 if (PL_oldoldbufptr &&
6349 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6350 (PL_oldoldbufptr == PL_last_lop
6351 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6352 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6353 (PL_expect == XREF ||
6354 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6355 {
748a9306
LW
6356 bool immediate_paren = *s == '(';
6357
a0d0e21e 6358 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6359 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6360#ifdef PERL_MAD
cd81e915 6361 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6362#endif
a0d0e21e
LW
6363
6364 /* Two barewords in a row may indicate method call. */
6365
62d55b22 6366 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6367 (tmp = intuit_method(s, gv, cv))) {
6368 op_free(rv2cv_op);
bbf60fe6 6369 return REPORT(tmp);
f7461760 6370 }
a0d0e21e
LW
6371
6372 /* If not a declared subroutine, it's an indirect object. */
6373 /* (But it's an indir obj regardless for sort.) */
7294df96 6374 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6375
7294df96
RGS
6376 if (
6377 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6378 (!cv &&
a9ef352a 6379 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6380 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6381 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6382 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6383 )
a9ef352a 6384 {
3280af22 6385 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6386 goto bareword;
93a17b20
LW
6387 }
6388 }
8990e307 6389
3280af22 6390 PL_expect = XOPERATOR;
5db06880
NC
6391#ifdef PERL_MAD
6392 if (isSPACE(*s))
cd81e915
NC
6393 s = SKIPSPACE2(s,nextPL_nextwhite);
6394 PL_nextwhite = nextPL_nextwhite;
5db06880 6395#else
8990e307 6396 s = skipspace(s);
5db06880 6397#endif
1c3923b3
GS
6398
6399 /* Is this a word before a => operator? */
ce29ac45 6400 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6401 op_free(rv2cv_op);
1c3923b3 6402 CLINE;
6154021b 6403 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6404 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6405 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6406 TERM(WORD);
6407 }
6408
6409 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6410 if (*s == '(') {
79072805 6411 CLINE;
5069cc75 6412 if (cv) {
c35e046a
AL
6413 d = s + 1;
6414 while (SPACE_OR_TAB(*d))
6415 d++;
f7461760 6416 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6417 s = d + 1;
c631f32b 6418 goto its_constant;
96e4d5b1 6419 }
6420 }
5db06880
NC
6421#ifdef PERL_MAD
6422 if (PL_madskills) {
cd81e915
NC
6423 PL_nextwhite = PL_thiswhite;
6424 PL_thiswhite = 0;
5db06880 6425 }
cd81e915 6426 start_force(PL_curforce);
5db06880 6427#endif
6154021b 6428 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6429 PL_expect = XOPERATOR;
5db06880
NC
6430#ifdef PERL_MAD
6431 if (PL_madskills) {
cd81e915
NC
6432 PL_nextwhite = nextPL_nextwhite;
6433 curmad('X', PL_thistoken);
6b29d1f5 6434 PL_thistoken = newSVpvs("");
5db06880
NC
6435 }
6436#endif
f7461760 6437 op_free(rv2cv_op);
93a17b20 6438 force_next(WORD);
6154021b 6439 pl_yylval.ival = 0;
463ee0b2 6440 TOKEN('&');
79072805 6441 }
93a17b20 6442
a0d0e21e 6443 /* If followed by var or block, call it a method (unless sub) */
8990e307 6444
f7461760
Z
6445 if ((*s == '$' || *s == '{') && !cv) {
6446 op_free(rv2cv_op);
3280af22
NIS
6447 PL_last_lop = PL_oldbufptr;
6448 PL_last_lop_op = OP_METHOD;
93a17b20 6449 PREBLOCK(METHOD);
463ee0b2
LW
6450 }
6451
8990e307
LW
6452 /* If followed by a bareword, see if it looks like indir obj. */
6453
30fe34ed
RGS
6454 if (!orig_keyword
6455 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6456 && (tmp = intuit_method(s, gv, cv))) {
6457 op_free(rv2cv_op);
bbf60fe6 6458 return REPORT(tmp);
f7461760 6459 }
93a17b20 6460
8990e307
LW
6461 /* Not a method, so call it a subroutine (if defined) */
6462
5069cc75 6463 if (cv) {
9b387841
NC
6464 if (lastchar == '-')
6465 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6466 "Ambiguous use of -%s resolved as -&%s()",
6467 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6468 /* Check for a constant sub */
f7461760 6469 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6470 its_constant:
f7461760 6471 op_free(rv2cv_op);
6154021b
RGS
6472 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6473 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6474 pl_yylval.opval->op_private = 0;
96e4d5b1 6475 TOKEN(WORD);
89bfa8cd 6476 }
6477
6154021b 6478 op_free(pl_yylval.opval);
f7461760 6479 pl_yylval.opval = rv2cv_op;
6154021b 6480 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6481 PL_last_lop = PL_oldbufptr;
bf848113 6482 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6483 /* Is there a prototype? */
5db06880
NC
6484 if (
6485#ifdef PERL_MAD
6486 cv &&
6487#endif
d9f2850e
RGS
6488 SvPOK(cv))
6489 {
5f66b61c 6490 STRLEN protolen;
daba3364 6491 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6492 if (!protolen)
4633a7c4 6493 TERM(FUNC0SUB);
0f5d0394
AE
6494 while (*proto == ';')
6495 proto++;
649d02de
FC
6496 if (
6497 (
6498 (
6499 *proto == '$' || *proto == '_'
6500 || *proto == '*'
6501 )
6502 && proto[1] == '\0'
6503 )
6504 || (
6505 *proto == '\\' && proto[1] && proto[2] == '\0'
6506 )
6507 )
6508 OPERATOR(UNIOPSUB);
6509 if (*proto == '\\' && proto[1] == '[') {
6510 const char *p = proto + 2;
6511 while(*p && *p != ']')
6512 ++p;
6513 if(*p == ']' && !p[1]) OPERATOR(UNIOPSUB);
6514 }
7a52d87a 6515 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6516 if (PL_curstash)
6517 sv_setpvs(PL_subname, "__ANON__");
6518 else
6519 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6520 PREBLOCK(LSTOPSUB);
6521 }
a9ef352a 6522 }
5db06880
NC
6523#ifdef PERL_MAD
6524 {
6525 if (PL_madskills) {
cd81e915
NC
6526 PL_nextwhite = PL_thiswhite;
6527 PL_thiswhite = 0;
5db06880 6528 }
cd81e915 6529 start_force(PL_curforce);
6154021b 6530 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6531 PL_expect = XTERM;
6532 if (PL_madskills) {
cd81e915
NC
6533 PL_nextwhite = nextPL_nextwhite;
6534 curmad('X', PL_thistoken);
6b29d1f5 6535 PL_thistoken = newSVpvs("");
5db06880
NC
6536 }
6537 force_next(WORD);
6538 TOKEN(NOAMP);
6539 }
6540 }
6541
6542 /* Guess harder when madskills require "best effort". */
6543 if (PL_madskills && (!gv || !GvCVu(gv))) {
6544 int probable_sub = 0;
6545 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6546 probable_sub = 1;
6547 else if (isALPHA(*s)) {
6548 char tmpbuf[1024];
6549 STRLEN tmplen;
6550 d = s;
6551 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6552 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6553 probable_sub = 1;
6554 else {
6555 while (d < PL_bufend && isSPACE(*d))
6556 d++;
6557 if (*d == '=' && d[1] == '>')
6558 probable_sub = 1;
6559 }
6560 }
6561 if (probable_sub) {
7a6d04f4 6562 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6563 op_free(pl_yylval.opval);
f7461760 6564 pl_yylval.opval = rv2cv_op;
6154021b 6565 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6566 PL_last_lop = PL_oldbufptr;
6567 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6568 PL_nextwhite = PL_thiswhite;
6569 PL_thiswhite = 0;
6570 start_force(PL_curforce);
6154021b 6571 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6572 PL_expect = XTERM;
cd81e915
NC
6573 PL_nextwhite = nextPL_nextwhite;
6574 curmad('X', PL_thistoken);
6b29d1f5 6575 PL_thistoken = newSVpvs("");
5db06880
NC
6576 force_next(WORD);
6577 TOKEN(NOAMP);
6578 }
6579#else
6154021b 6580 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6581 PL_expect = XTERM;
8990e307
LW
6582 force_next(WORD);
6583 TOKEN(NOAMP);
5db06880 6584#endif
8990e307 6585 }
748a9306 6586
8990e307
LW
6587 /* Call it a bare word */
6588
5603f27d 6589 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6590 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6591 else {
9a073a1d
RGS
6592 bareword:
6593 /* after "print" and similar functions (corresponding to
6594 * "F? L" in opcode.pl), whatever wasn't already parsed as
6595 * a filehandle should be subject to "strict subs".
6596 * Likewise for the optional indirect-object argument to system
6597 * or exec, which can't be a bareword */
6598 if ((PL_last_lop_op == OP_PRINT
6599 || PL_last_lop_op == OP_PRTF
6600 || PL_last_lop_op == OP_SAY
6601 || PL_last_lop_op == OP_SYSTEM
6602 || PL_last_lop_op == OP_EXEC)
6603 && (PL_hints & HINT_STRICT_SUBS))
6604 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6605 if (lastchar != '-') {
6606 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6607 d = PL_tokenbuf;
6608 while (isLOWER(*d))
6609 d++;
da51bb9b 6610 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6611 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6612 PL_tokenbuf);
6613 }
748a9306
LW
6614 }
6615 }
f7461760 6616 op_free(rv2cv_op);
c3e0f903
GS
6617
6618 safe_bareword:
9b387841
NC
6619 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6620 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6621 "Operator or semicolon missing before %c%s",
6622 lastchar, PL_tokenbuf);
6623 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6624 "Ambiguous use of %c resolved as operator %c",
6625 lastchar, lastchar);
748a9306 6626 }
93a17b20 6627 TOKEN(WORD);
79072805 6628 }
79072805 6629
68dc0745 6630 case KEY___FILE__:
6154021b 6631 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6632 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6633 TERM(THING);
6634
79072805 6635 case KEY___LINE__:
6154021b 6636 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6637 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6638 TERM(THING);
68dc0745 6639
6640 case KEY___PACKAGE__:
6154021b 6641 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6642 (PL_curstash
5aaec2b4 6643 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6644 : &PL_sv_undef));
79072805 6645 TERM(THING);
79072805 6646
e50aee73 6647 case KEY___DATA__:
79072805
LW
6648 case KEY___END__: {
6649 GV *gv;
3280af22 6650 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6651 const char *pname = "main";
3280af22 6652 if (PL_tokenbuf[2] == 'D')
bfcb3514 6653 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6654 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6655 SVt_PVIO);
a5f75d66 6656 GvMULTI_on(gv);
79072805 6657 if (!GvIO(gv))
a0d0e21e 6658 GvIOp(gv) = newIO();
3280af22 6659 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6660#if defined(HAS_FCNTL) && defined(F_SETFD)
6661 {
f54cb97a 6662 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6663 fcntl(fd,F_SETFD,fd >= 3);
6664 }
79072805 6665#endif
fd049845 6666 /* Mark this internal pseudo-handle as clean */
6667 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6668 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6669 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6670 else
50952442 6671 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6672#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6673 /* if the script was opened in binmode, we need to revert
53129d29 6674 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6675 * XXX this is a questionable hack at best. */
53129d29
GS
6676 if (PL_bufend-PL_bufptr > 2
6677 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6678 {
6679 Off_t loc = 0;
50952442 6680 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6681 loc = PerlIO_tell(PL_rsfp);
6682 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6683 }
2986a63f
JH
6684#ifdef NETWARE
6685 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6686#else
c39cd008 6687 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6688#endif /* NETWARE */
1143fce0
JH
6689#ifdef PERLIO_IS_STDIO /* really? */
6690# if defined(__BORLANDC__)
cb359b41
JH
6691 /* XXX see note in do_binmode() */
6692 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6693# endif
6694#endif
c39cd008
GS
6695 if (loc > 0)
6696 PerlIO_seek(PL_rsfp, loc, 0);
6697 }
6698 }
6699#endif
7948272d 6700#ifdef PERLIO_LAYERS
52d2e0f4
JH
6701 if (!IN_BYTES) {
6702 if (UTF)
6703 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6704 else if (PL_encoding) {
6705 SV *name;
6706 dSP;
6707 ENTER;
6708 SAVETMPS;
6709 PUSHMARK(sp);
6710 EXTEND(SP, 1);
6711 XPUSHs(PL_encoding);
6712 PUTBACK;
6713 call_method("name", G_SCALAR);
6714 SPAGAIN;
6715 name = POPs;
6716 PUTBACK;
bfed75c6 6717 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6718 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6719 SVfARG(name)));
52d2e0f4
JH
6720 FREETMPS;
6721 LEAVE;
6722 }
6723 }
7948272d 6724#endif
5db06880
NC
6725#ifdef PERL_MAD
6726 if (PL_madskills) {
cd81e915
NC
6727 if (PL_realtokenstart >= 0) {
6728 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6729 if (!PL_endwhite)
6b29d1f5 6730 PL_endwhite = newSVpvs("");
cd81e915
NC
6731 sv_catsv(PL_endwhite, PL_thiswhite);
6732 PL_thiswhite = 0;
6733 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6734 PL_realtokenstart = -1;
5db06880 6735 }
5cc814fd
NC
6736 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6737 != NULL) ;
5db06880
NC
6738 }
6739#endif
4608196e 6740 PL_rsfp = NULL;
79072805
LW
6741 }
6742 goto fake_eof;
e929a76b 6743 }
de3bb511 6744
8990e307 6745 case KEY_AUTOLOAD:
ed6116ce 6746 case KEY_DESTROY:
79072805 6747 case KEY_BEGIN:
3c10abe3 6748 case KEY_UNITCHECK:
7d30b5c4 6749 case KEY_CHECK:
7d07dbc2 6750 case KEY_INIT:
7d30b5c4 6751 case KEY_END:
3280af22
NIS
6752 if (PL_expect == XSTATE) {
6753 s = PL_bufptr;
93a17b20 6754 goto really_sub;
79072805
LW
6755 }
6756 goto just_a_word;
6757
a0d0e21e
LW
6758 case KEY_CORE:
6759 if (*s == ':' && s[1] == ':') {
6760 s += 2;
748a9306 6761 d = s;
3280af22 6762 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6763 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6764 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6765 if (tmp < 0)
6766 tmp = -tmp;
850e8516 6767 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6768 /* that's a way to remember we saw "CORE::" */
850e8516 6769 orig_keyword = tmp;
a0d0e21e
LW
6770 goto reserved_word;
6771 }
6772 goto just_a_word;
6773
463ee0b2
LW
6774 case KEY_abs:
6775 UNI(OP_ABS);
6776
79072805
LW
6777 case KEY_alarm:
6778 UNI(OP_ALARM);
6779
6780 case KEY_accept:
a0d0e21e 6781 LOP(OP_ACCEPT,XTERM);
79072805 6782
463ee0b2
LW
6783 case KEY_and:
6784 OPERATOR(ANDOP);
6785
79072805 6786 case KEY_atan2:
a0d0e21e 6787 LOP(OP_ATAN2,XTERM);
85e6fe83 6788
79072805 6789 case KEY_bind:
a0d0e21e 6790 LOP(OP_BIND,XTERM);
79072805
LW
6791
6792 case KEY_binmode:
1c1fc3ea 6793 LOP(OP_BINMODE,XTERM);
79072805
LW
6794
6795 case KEY_bless:
a0d0e21e 6796 LOP(OP_BLESS,XTERM);
79072805 6797
0d863452
RH
6798 case KEY_break:
6799 FUN0(OP_BREAK);
6800
79072805
LW
6801 case KEY_chop:
6802 UNI(OP_CHOP);
6803
6804 case KEY_continue:
0d863452
RH
6805 /* When 'use switch' is in effect, continue has a dual
6806 life as a control operator. */
6807 {
ef89dcc3 6808 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6809 PREBLOCK(CONTINUE);
6810 else {
6811 /* We have to disambiguate the two senses of
6812 "continue". If the next token is a '{' then
6813 treat it as the start of a continue block;
6814 otherwise treat it as a control operator.
6815 */
6816 s = skipspace(s);
6817 if (*s == '{')
79072805 6818 PREBLOCK(CONTINUE);
0d863452
RH
6819 else
6820 FUN0(OP_CONTINUE);
6821 }
6822 }
79072805
LW
6823
6824 case KEY_chdir:
fafc274c
NC
6825 /* may use HOME */
6826 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6827 UNI(OP_CHDIR);
6828
6829 case KEY_close:
6830 UNI(OP_CLOSE);
6831
6832 case KEY_closedir:
6833 UNI(OP_CLOSEDIR);
6834
6835 case KEY_cmp:
6836 Eop(OP_SCMP);
6837
6838 case KEY_caller:
6839 UNI(OP_CALLER);
6840
6841 case KEY_crypt:
6842#ifdef FCRYPT
f4c556ac
GS
6843 if (!PL_cryptseen) {
6844 PL_cryptseen = TRUE;
de3bb511 6845 init_des();
f4c556ac 6846 }
a687059c 6847#endif
a0d0e21e 6848 LOP(OP_CRYPT,XTERM);
79072805
LW
6849
6850 case KEY_chmod:
a0d0e21e 6851 LOP(OP_CHMOD,XTERM);
79072805
LW
6852
6853 case KEY_chown:
a0d0e21e 6854 LOP(OP_CHOWN,XTERM);
79072805
LW
6855
6856 case KEY_connect:
a0d0e21e 6857 LOP(OP_CONNECT,XTERM);
79072805 6858
463ee0b2
LW
6859 case KEY_chr:
6860 UNI(OP_CHR);
6861
79072805
LW
6862 case KEY_cos:
6863 UNI(OP_COS);
6864
6865 case KEY_chroot:
6866 UNI(OP_CHROOT);
6867
0d863452
RH
6868 case KEY_default:
6869 PREBLOCK(DEFAULT);
6870
79072805 6871 case KEY_do:
29595ff2 6872 s = SKIPSPACE1(s);
79072805 6873 if (*s == '{')
a0d0e21e 6874 PRETERMBLOCK(DO);
79072805 6875 if (*s != '\'')
89c5585f 6876 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6877 if (orig_keyword == KEY_do) {
6878 orig_keyword = 0;
6154021b 6879 pl_yylval.ival = 1;
850e8516
RGS
6880 }
6881 else
6154021b 6882 pl_yylval.ival = 0;
378cc40b 6883 OPERATOR(DO);
79072805
LW
6884
6885 case KEY_die:
3280af22 6886 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6887 LOP(OP_DIE,XTERM);
79072805
LW
6888
6889 case KEY_defined:
6890 UNI(OP_DEFINED);
6891
6892 case KEY_delete:
a0d0e21e 6893 UNI(OP_DELETE);
79072805
LW
6894
6895 case KEY_dbmopen:
5c1737d1 6896 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6897 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6898
6899 case KEY_dbmclose:
6900 UNI(OP_DBMCLOSE);
6901
6902 case KEY_dump:
a0d0e21e 6903 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6904 LOOPX(OP_DUMP);
6905
6906 case KEY_else:
6907 PREBLOCK(ELSE);
6908
6909 case KEY_elsif:
6154021b 6910 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6911 OPERATOR(ELSIF);
6912
6913 case KEY_eq:
6914 Eop(OP_SEQ);
6915
a0d0e21e
LW
6916 case KEY_exists:
6917 UNI(OP_EXISTS);
4e553d73 6918
79072805 6919 case KEY_exit:
5db06880
NC
6920 if (PL_madskills)
6921 UNI(OP_INT);
79072805
LW
6922 UNI(OP_EXIT);
6923
6924 case KEY_eval:
29595ff2 6925 s = SKIPSPACE1(s);
32e2a35d
RGS
6926 if (*s == '{') { /* block eval */
6927 PL_expect = XTERMBLOCK;
6928 UNIBRACK(OP_ENTERTRY);
6929 }
6930 else { /* string eval */
6931 PL_expect = XTERM;
6932 UNIBRACK(OP_ENTEREVAL);
6933 }
79072805
LW
6934
6935 case KEY_eof:
6936 UNI(OP_EOF);
6937
6938 case KEY_exp:
6939 UNI(OP_EXP);
6940
6941 case KEY_each:
6942 UNI(OP_EACH);
6943
6944 case KEY_exec:
a0d0e21e 6945 LOP(OP_EXEC,XREF);
79072805
LW
6946
6947 case KEY_endhostent:
6948 FUN0(OP_EHOSTENT);
6949
6950 case KEY_endnetent:
6951 FUN0(OP_ENETENT);
6952
6953 case KEY_endservent:
6954 FUN0(OP_ESERVENT);
6955
6956 case KEY_endprotoent:
6957 FUN0(OP_EPROTOENT);
6958
6959 case KEY_endpwent:
6960 FUN0(OP_EPWENT);
6961
6962 case KEY_endgrent:
6963 FUN0(OP_EGRENT);
6964
6965 case KEY_for:
6966 case KEY_foreach:
6154021b 6967 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6968 s = SKIPSPACE1(s);
7e2040f0 6969 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6970 char *p = s;
5db06880
NC
6971#ifdef PERL_MAD
6972 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6973#endif
6974
3280af22 6975 if ((PL_bufend - p) >= 3 &&
55497cff 6976 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6977 p += 2;
77ca0c92
LW
6978 else if ((PL_bufend - p) >= 4 &&
6979 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6980 p += 3;
29595ff2 6981 p = PEEKSPACE(p);
7e2040f0 6982 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6983 p = scan_ident(p, PL_bufend,
6984 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6985 p = PEEKSPACE(p);
77ca0c92
LW
6986 }
6987 if (*p != '$')
cea2e8a9 6988 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6989#ifdef PERL_MAD
6990 s = SvPVX(PL_linestr) + soff;
6991#endif
55497cff 6992 }
79072805
LW
6993 OPERATOR(FOR);
6994
6995 case KEY_formline:
a0d0e21e 6996 LOP(OP_FORMLINE,XTERM);
79072805
LW
6997
6998 case KEY_fork:
6999 FUN0(OP_FORK);
7000
7001 case KEY_fcntl:
a0d0e21e 7002 LOP(OP_FCNTL,XTERM);
79072805
LW
7003
7004 case KEY_fileno:
7005 UNI(OP_FILENO);
7006
7007 case KEY_flock:
a0d0e21e 7008 LOP(OP_FLOCK,XTERM);
79072805
LW
7009
7010 case KEY_gt:
7011 Rop(OP_SGT);
7012
7013 case KEY_ge:
7014 Rop(OP_SGE);
7015
7016 case KEY_grep:
2c38e13d 7017 LOP(OP_GREPSTART, XREF);
79072805
LW
7018
7019 case KEY_goto:
a0d0e21e 7020 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7021 LOOPX(OP_GOTO);
7022
7023 case KEY_gmtime:
7024 UNI(OP_GMTIME);
7025
7026 case KEY_getc:
6f33ba73 7027 UNIDOR(OP_GETC);
79072805
LW
7028
7029 case KEY_getppid:
7030 FUN0(OP_GETPPID);
7031
7032 case KEY_getpgrp:
7033 UNI(OP_GETPGRP);
7034
7035 case KEY_getpriority:
a0d0e21e 7036 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7037
7038 case KEY_getprotobyname:
7039 UNI(OP_GPBYNAME);
7040
7041 case KEY_getprotobynumber:
a0d0e21e 7042 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7043
7044 case KEY_getprotoent:
7045 FUN0(OP_GPROTOENT);
7046
7047 case KEY_getpwent:
7048 FUN0(OP_GPWENT);
7049
7050 case KEY_getpwnam:
ff68c719 7051 UNI(OP_GPWNAM);
79072805
LW
7052
7053 case KEY_getpwuid:
ff68c719 7054 UNI(OP_GPWUID);
79072805
LW
7055
7056 case KEY_getpeername:
7057 UNI(OP_GETPEERNAME);
7058
7059 case KEY_gethostbyname:
7060 UNI(OP_GHBYNAME);
7061
7062 case KEY_gethostbyaddr:
a0d0e21e 7063 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7064
7065 case KEY_gethostent:
7066 FUN0(OP_GHOSTENT);
7067
7068 case KEY_getnetbyname:
7069 UNI(OP_GNBYNAME);
7070
7071 case KEY_getnetbyaddr:
a0d0e21e 7072 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7073
7074 case KEY_getnetent:
7075 FUN0(OP_GNETENT);
7076
7077 case KEY_getservbyname:
a0d0e21e 7078 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7079
7080 case KEY_getservbyport:
a0d0e21e 7081 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7082
7083 case KEY_getservent:
7084 FUN0(OP_GSERVENT);
7085
7086 case KEY_getsockname:
7087 UNI(OP_GETSOCKNAME);
7088
7089 case KEY_getsockopt:
a0d0e21e 7090 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7091
7092 case KEY_getgrent:
7093 FUN0(OP_GGRENT);
7094
7095 case KEY_getgrnam:
ff68c719 7096 UNI(OP_GGRNAM);
79072805
LW
7097
7098 case KEY_getgrgid:
ff68c719 7099 UNI(OP_GGRGID);
79072805
LW
7100
7101 case KEY_getlogin:
7102 FUN0(OP_GETLOGIN);
7103
0d863452 7104 case KEY_given:
6154021b 7105 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7106 OPERATOR(GIVEN);
7107
93a17b20 7108 case KEY_glob:
a0d0e21e 7109 LOP(OP_GLOB,XTERM);
93a17b20 7110
79072805
LW
7111 case KEY_hex:
7112 UNI(OP_HEX);
7113
7114 case KEY_if:
6154021b 7115 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7116 OPERATOR(IF);
7117
7118 case KEY_index:
a0d0e21e 7119 LOP(OP_INDEX,XTERM);
79072805
LW
7120
7121 case KEY_int:
7122 UNI(OP_INT);
7123
7124 case KEY_ioctl:
a0d0e21e 7125 LOP(OP_IOCTL,XTERM);
79072805
LW
7126
7127 case KEY_join:
a0d0e21e 7128 LOP(OP_JOIN,XTERM);
79072805
LW
7129
7130 case KEY_keys:
7131 UNI(OP_KEYS);
7132
7133 case KEY_kill:
a0d0e21e 7134 LOP(OP_KILL,XTERM);
79072805
LW
7135
7136 case KEY_last:
a0d0e21e 7137 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7138 LOOPX(OP_LAST);
4e553d73 7139
79072805
LW
7140 case KEY_lc:
7141 UNI(OP_LC);
7142
7143 case KEY_lcfirst:
7144 UNI(OP_LCFIRST);
7145
7146 case KEY_local:
6154021b 7147 pl_yylval.ival = 0;
79072805
LW
7148 OPERATOR(LOCAL);
7149
7150 case KEY_length:
7151 UNI(OP_LENGTH);
7152
7153 case KEY_lt:
7154 Rop(OP_SLT);
7155
7156 case KEY_le:
7157 Rop(OP_SLE);
7158
7159 case KEY_localtime:
7160 UNI(OP_LOCALTIME);
7161
7162 case KEY_log:
7163 UNI(OP_LOG);
7164
7165 case KEY_link:
a0d0e21e 7166 LOP(OP_LINK,XTERM);
79072805
LW
7167
7168 case KEY_listen:
a0d0e21e 7169 LOP(OP_LISTEN,XTERM);
79072805 7170
c0329465
MB
7171 case KEY_lock:
7172 UNI(OP_LOCK);
7173
79072805
LW
7174 case KEY_lstat:
7175 UNI(OP_LSTAT);
7176
7177 case KEY_m:
8782bef2 7178 s = scan_pat(s,OP_MATCH);
79072805
LW
7179 TERM(sublex_start());
7180
a0d0e21e 7181 case KEY_map:
2c38e13d 7182 LOP(OP_MAPSTART, XREF);
4e4e412b 7183
79072805 7184 case KEY_mkdir:
a0d0e21e 7185 LOP(OP_MKDIR,XTERM);
79072805
LW
7186
7187 case KEY_msgctl:
a0d0e21e 7188 LOP(OP_MSGCTL,XTERM);
79072805
LW
7189
7190 case KEY_msgget:
a0d0e21e 7191 LOP(OP_MSGGET,XTERM);
79072805
LW
7192
7193 case KEY_msgrcv:
a0d0e21e 7194 LOP(OP_MSGRCV,XTERM);
79072805
LW
7195
7196 case KEY_msgsnd:
a0d0e21e 7197 LOP(OP_MSGSND,XTERM);
79072805 7198
77ca0c92 7199 case KEY_our:
93a17b20 7200 case KEY_my:
952306ac 7201 case KEY_state:
eac04b2e 7202 PL_in_my = (U16)tmp;
29595ff2 7203 s = SKIPSPACE1(s);
7e2040f0 7204 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7205#ifdef PERL_MAD
7206 char* start = s;
7207#endif
3280af22 7208 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7209 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7210 goto really_sub;
def3634b 7211 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7212 if (!PL_in_my_stash) {
c750a3ec 7213 char tmpbuf[1024];
3280af22 7214 PL_bufptr = s;
d9fad198 7215 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7216 yyerror(tmpbuf);
7217 }
5db06880
NC
7218#ifdef PERL_MAD
7219 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7220 sv_catsv(PL_thistoken, PL_nextwhite);
7221 PL_nextwhite = 0;
7222 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7223 }
7224#endif
c750a3ec 7225 }
6154021b 7226 pl_yylval.ival = 1;
55497cff 7227 OPERATOR(MY);
93a17b20 7228
79072805 7229 case KEY_next:
a0d0e21e 7230 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7231 LOOPX(OP_NEXT);
7232
7233 case KEY_ne:
7234 Eop(OP_SNE);
7235
a0d0e21e 7236 case KEY_no:
468aa647 7237 s = tokenize_use(0, s);
a0d0e21e
LW
7238 OPERATOR(USE);
7239
7240 case KEY_not:
29595ff2 7241 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7242 FUN1(OP_NOT);
7243 else
7244 OPERATOR(NOTOP);
a0d0e21e 7245
79072805 7246 case KEY_open:
29595ff2 7247 s = SKIPSPACE1(s);
7e2040f0 7248 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7249 const char *t;
c35e046a
AL
7250 for (d = s; isALNUM_lazy_if(d,UTF);)
7251 d++;
7252 for (t=d; isSPACE(*t);)
7253 t++;
e2ab214b 7254 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7255 /* [perl #16184] */
7256 && !(t[0] == '=' && t[1] == '>')
7257 ) {
5f66b61c 7258 int parms_len = (int)(d-s);
9014280d 7259 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7260 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7261 parms_len, s, parms_len, s);
66fbe8fb 7262 }
93a17b20 7263 }
a0d0e21e 7264 LOP(OP_OPEN,XTERM);
79072805 7265
463ee0b2 7266 case KEY_or:
6154021b 7267 pl_yylval.ival = OP_OR;
463ee0b2
LW
7268 OPERATOR(OROP);
7269
79072805
LW
7270 case KEY_ord:
7271 UNI(OP_ORD);
7272
7273 case KEY_oct:
7274 UNI(OP_OCT);
7275
7276 case KEY_opendir:
a0d0e21e 7277 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7278
7279 case KEY_print:
3280af22 7280 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7281 LOP(OP_PRINT,XREF);
79072805
LW
7282
7283 case KEY_printf:
3280af22 7284 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7285 LOP(OP_PRTF,XREF);
79072805 7286
c07a80fd 7287 case KEY_prototype:
7288 UNI(OP_PROTOTYPE);
7289
79072805 7290 case KEY_push:
a0d0e21e 7291 LOP(OP_PUSH,XTERM);
79072805
LW
7292
7293 case KEY_pop:
6f33ba73 7294 UNIDOR(OP_POP);
79072805 7295
a0d0e21e 7296 case KEY_pos:
6f33ba73 7297 UNIDOR(OP_POS);
4e553d73 7298
79072805 7299 case KEY_pack:
a0d0e21e 7300 LOP(OP_PACK,XTERM);
79072805
LW
7301
7302 case KEY_package:
a0d0e21e 7303 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7304 s = SKIPSPACE1(s);
91152fc1 7305 s = force_strict_version(s);
4e4da3ac 7306 PL_lex_expect = XBLOCK;
79072805
LW
7307 OPERATOR(PACKAGE);
7308
7309 case KEY_pipe:
a0d0e21e 7310 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7311
7312 case KEY_q:
5db06880 7313 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7314 if (!s)
d4c19fe8 7315 missingterm(NULL);
6154021b 7316 pl_yylval.ival = OP_CONST;
79072805
LW
7317 TERM(sublex_start());
7318
a0d0e21e
LW
7319 case KEY_quotemeta:
7320 UNI(OP_QUOTEMETA);
7321
ea25a9b2
Z
7322 case KEY_qw: {
7323 OP *words = NULL;
5db06880 7324 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7325 if (!s)
d4c19fe8 7326 missingterm(NULL);
3480a8d2 7327 PL_expect = XOPERATOR;
8127e0e3 7328 if (SvCUR(PL_lex_stuff)) {
8127e0e3 7329 int warned = 0;
3280af22 7330 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7331 while (len) {
d4c19fe8
AL
7332 for (; isSPACE(*d) && len; --len, ++d)
7333 /**/;
8127e0e3 7334 if (len) {
d4c19fe8 7335 SV *sv;
f54cb97a 7336 const char *b = d;
e476b1b5 7337 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7338 for (; !isSPACE(*d) && len; --len, ++d) {
7339 if (*d == ',') {
9014280d 7340 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7341 "Possible attempt to separate words with commas");
7342 ++warned;
7343 }
7344 else if (*d == '#') {
9014280d 7345 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7346 "Possible attempt to put comments in qw() list");
7347 ++warned;
7348 }
7349 }
7350 }
7351 else {
d4c19fe8
AL
7352 for (; !isSPACE(*d) && len; --len, ++d)
7353 /**/;
8127e0e3 7354 }
740cce10 7355 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7356 words = append_elem(OP_LIST, words,
7948272d 7357 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7358 }
7359 }
7360 }
ea25a9b2
Z
7361 if (!words)
7362 words = newNULLLIST();
37fd879b 7363 if (PL_lex_stuff) {
8127e0e3 7364 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7365 PL_lex_stuff = NULL;
37fd879b 7366 }
ea25a9b2
Z
7367 PL_expect = XOPERATOR;
7368 pl_yylval.opval = sawparens(words);
7369 TOKEN(QWLIST);
7370 }
8990e307 7371
79072805 7372 case KEY_qq:
5db06880 7373 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7374 if (!s)
d4c19fe8 7375 missingterm(NULL);
6154021b 7376 pl_yylval.ival = OP_STRINGIFY;
3280af22 7377 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7378 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7379 TERM(sublex_start());
7380
8782bef2
GB
7381 case KEY_qr:
7382 s = scan_pat(s,OP_QR);
7383 TERM(sublex_start());
7384
79072805 7385 case KEY_qx:
5db06880 7386 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7387 if (!s)
d4c19fe8 7388 missingterm(NULL);
9b201d7d 7389 readpipe_override();
79072805
LW
7390 TERM(sublex_start());
7391
7392 case KEY_return:
7393 OLDLOP(OP_RETURN);
7394
7395 case KEY_require:
29595ff2 7396 s = SKIPSPACE1(s);
e759cc13
RGS
7397 if (isDIGIT(*s)) {
7398 s = force_version(s, FALSE);
a7cb1f99 7399 }
e759cc13
RGS
7400 else if (*s != 'v' || !isDIGIT(s[1])
7401 || (s = force_version(s, TRUE), *s == 'v'))
7402 {
a7cb1f99
GS
7403 *PL_tokenbuf = '\0';
7404 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7405 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7406 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7407 else if (*s == '<')
7408 yyerror("<> should be quotes");
7409 }
a72a1c8b
RGS
7410 if (orig_keyword == KEY_require) {
7411 orig_keyword = 0;
6154021b 7412 pl_yylval.ival = 1;
a72a1c8b
RGS
7413 }
7414 else
6154021b 7415 pl_yylval.ival = 0;
a72a1c8b
RGS
7416 PL_expect = XTERM;
7417 PL_bufptr = s;
7418 PL_last_uni = PL_oldbufptr;
7419 PL_last_lop_op = OP_REQUIRE;
7420 s = skipspace(s);
7421 return REPORT( (int)REQUIRE );
79072805
LW
7422
7423 case KEY_reset:
7424 UNI(OP_RESET);
7425
7426 case KEY_redo:
a0d0e21e 7427 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7428 LOOPX(OP_REDO);
7429
7430 case KEY_rename:
a0d0e21e 7431 LOP(OP_RENAME,XTERM);
79072805
LW
7432
7433 case KEY_rand:
7434 UNI(OP_RAND);
7435
7436 case KEY_rmdir:
7437 UNI(OP_RMDIR);
7438
7439 case KEY_rindex:
a0d0e21e 7440 LOP(OP_RINDEX,XTERM);
79072805
LW
7441
7442 case KEY_read:
a0d0e21e 7443 LOP(OP_READ,XTERM);
79072805
LW
7444
7445 case KEY_readdir:
7446 UNI(OP_READDIR);
7447
93a17b20 7448 case KEY_readline:
6f33ba73 7449 UNIDOR(OP_READLINE);
93a17b20
LW
7450
7451 case KEY_readpipe:
0858480c 7452 UNIDOR(OP_BACKTICK);
93a17b20 7453
79072805
LW
7454 case KEY_rewinddir:
7455 UNI(OP_REWINDDIR);
7456
7457 case KEY_recv:
a0d0e21e 7458 LOP(OP_RECV,XTERM);
79072805
LW
7459
7460 case KEY_reverse:
a0d0e21e 7461 LOP(OP_REVERSE,XTERM);
79072805
LW
7462
7463 case KEY_readlink:
6f33ba73 7464 UNIDOR(OP_READLINK);
79072805
LW
7465
7466 case KEY_ref:
7467 UNI(OP_REF);
7468
7469 case KEY_s:
7470 s = scan_subst(s);
6154021b 7471 if (pl_yylval.opval)
79072805
LW
7472 TERM(sublex_start());
7473 else
7474 TOKEN(1); /* force error */
7475
0d863452
RH
7476 case KEY_say:
7477 checkcomma(s,PL_tokenbuf,"filehandle");
7478 LOP(OP_SAY,XREF);
7479
a0d0e21e
LW
7480 case KEY_chomp:
7481 UNI(OP_CHOMP);
4e553d73 7482
79072805
LW
7483 case KEY_scalar:
7484 UNI(OP_SCALAR);
7485
7486 case KEY_select:
a0d0e21e 7487 LOP(OP_SELECT,XTERM);
79072805
LW
7488
7489 case KEY_seek:
a0d0e21e 7490 LOP(OP_SEEK,XTERM);
79072805
LW
7491
7492 case KEY_semctl:
a0d0e21e 7493 LOP(OP_SEMCTL,XTERM);
79072805
LW
7494
7495 case KEY_semget:
a0d0e21e 7496 LOP(OP_SEMGET,XTERM);
79072805
LW
7497
7498 case KEY_semop:
a0d0e21e 7499 LOP(OP_SEMOP,XTERM);
79072805
LW
7500
7501 case KEY_send:
a0d0e21e 7502 LOP(OP_SEND,XTERM);
79072805
LW
7503
7504 case KEY_setpgrp:
a0d0e21e 7505 LOP(OP_SETPGRP,XTERM);
79072805
LW
7506
7507 case KEY_setpriority:
a0d0e21e 7508 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7509
7510 case KEY_sethostent:
ff68c719 7511 UNI(OP_SHOSTENT);
79072805
LW
7512
7513 case KEY_setnetent:
ff68c719 7514 UNI(OP_SNETENT);
79072805
LW
7515
7516 case KEY_setservent:
ff68c719 7517 UNI(OP_SSERVENT);
79072805
LW
7518
7519 case KEY_setprotoent:
ff68c719 7520 UNI(OP_SPROTOENT);
79072805
LW
7521
7522 case KEY_setpwent:
7523 FUN0(OP_SPWENT);
7524
7525 case KEY_setgrent:
7526 FUN0(OP_SGRENT);
7527
7528 case KEY_seekdir:
a0d0e21e 7529 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7530
7531 case KEY_setsockopt:
a0d0e21e 7532 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7533
7534 case KEY_shift:
6f33ba73 7535 UNIDOR(OP_SHIFT);
79072805
LW
7536
7537 case KEY_shmctl:
a0d0e21e 7538 LOP(OP_SHMCTL,XTERM);
79072805
LW
7539
7540 case KEY_shmget:
a0d0e21e 7541 LOP(OP_SHMGET,XTERM);
79072805
LW
7542
7543 case KEY_shmread:
a0d0e21e 7544 LOP(OP_SHMREAD,XTERM);
79072805
LW
7545
7546 case KEY_shmwrite:
a0d0e21e 7547 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7548
7549 case KEY_shutdown:
a0d0e21e 7550 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7551
7552 case KEY_sin:
7553 UNI(OP_SIN);
7554
7555 case KEY_sleep:
7556 UNI(OP_SLEEP);
7557
7558 case KEY_socket:
a0d0e21e 7559 LOP(OP_SOCKET,XTERM);
79072805
LW
7560
7561 case KEY_socketpair:
a0d0e21e 7562 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7563
7564 case KEY_sort:
3280af22 7565 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7566 s = SKIPSPACE1(s);
79072805 7567 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7568 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7569 PL_expect = XTERM;
15f0808c 7570 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7571 LOP(OP_SORT,XREF);
79072805
LW
7572
7573 case KEY_split:
a0d0e21e 7574 LOP(OP_SPLIT,XTERM);
79072805
LW
7575
7576 case KEY_sprintf:
a0d0e21e 7577 LOP(OP_SPRINTF,XTERM);
79072805
LW
7578
7579 case KEY_splice:
a0d0e21e 7580 LOP(OP_SPLICE,XTERM);
79072805
LW
7581
7582 case KEY_sqrt:
7583 UNI(OP_SQRT);
7584
7585 case KEY_srand:
7586 UNI(OP_SRAND);
7587
7588 case KEY_stat:
7589 UNI(OP_STAT);
7590
7591 case KEY_study:
79072805
LW
7592 UNI(OP_STUDY);
7593
7594 case KEY_substr:
a0d0e21e 7595 LOP(OP_SUBSTR,XTERM);
79072805
LW
7596
7597 case KEY_format:
7598 case KEY_sub:
93a17b20 7599 really_sub:
09bef843 7600 {
3280af22 7601 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7602 SSize_t tboffset = 0;
09bef843 7603 expectation attrful;
28cc6278 7604 bool have_name, have_proto;
f54cb97a 7605 const int key = tmp;
09bef843 7606
5db06880
NC
7607#ifdef PERL_MAD
7608 SV *tmpwhite = 0;
7609
cd81e915 7610 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7611 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7612 PL_thistoken = 0;
5db06880
NC
7613
7614 d = s;
7615 s = SKIPSPACE2(s,tmpwhite);
7616#else
09bef843 7617 s = skipspace(s);
5db06880 7618#endif
09bef843 7619
7e2040f0 7620 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7621 (*s == ':' && s[1] == ':'))
7622 {
5db06880 7623#ifdef PERL_MAD
4f61fd4b 7624 SV *nametoke = NULL;
5db06880
NC
7625#endif
7626
09bef843
SB
7627 PL_expect = XBLOCK;
7628 attrful = XATTRBLOCK;
b1b65b59
JH
7629 /* remember buffer pos'n for later force_word */
7630 tboffset = s - PL_oldbufptr;
09bef843 7631 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7632#ifdef PERL_MAD
7633 if (PL_madskills)
7634 nametoke = newSVpvn(s, d - s);
7635#endif
6502358f
NC
7636 if (memchr(tmpbuf, ':', len))
7637 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7638 else {
7639 sv_setsv(PL_subname,PL_curstname);
396482e1 7640 sv_catpvs(PL_subname,"::");
09bef843
SB
7641 sv_catpvn(PL_subname,tmpbuf,len);
7642 }
09bef843 7643 have_name = TRUE;
5db06880
NC
7644
7645#ifdef PERL_MAD
7646
7647 start_force(0);
7648 CURMAD('X', nametoke);
7649 CURMAD('_', tmpwhite);
7650 (void) force_word(PL_oldbufptr + tboffset, WORD,
7651 FALSE, TRUE, TRUE);
7652
7653 s = SKIPSPACE2(d,tmpwhite);
7654#else
7655 s = skipspace(d);
7656#endif
09bef843 7657 }
463ee0b2 7658 else {
09bef843
SB
7659 if (key == KEY_my)
7660 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7661 PL_expect = XTERMBLOCK;
7662 attrful = XATTRTERM;
76f68e9b 7663 sv_setpvs(PL_subname,"?");
09bef843 7664 have_name = FALSE;
463ee0b2 7665 }
4633a7c4 7666
09bef843
SB
7667 if (key == KEY_format) {
7668 if (*s == '=')
7669 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7670#ifdef PERL_MAD
cd81e915 7671 PL_thistoken = subtoken;
5db06880
NC
7672 s = d;
7673#else
09bef843 7674 if (have_name)
b1b65b59
JH
7675 (void) force_word(PL_oldbufptr + tboffset, WORD,
7676 FALSE, TRUE, TRUE);
5db06880 7677#endif
09bef843
SB
7678 OPERATOR(FORMAT);
7679 }
79072805 7680
09bef843
SB
7681 /* Look for a prototype */
7682 if (*s == '(') {
d9f2850e
RGS
7683 char *p;
7684 bool bad_proto = FALSE;
9e8d7757
RB
7685 bool in_brackets = FALSE;
7686 char greedy_proto = ' ';
7687 bool proto_after_greedy_proto = FALSE;
7688 bool must_be_last = FALSE;
7689 bool underscore = FALSE;
aef2a98a 7690 bool seen_underscore = FALSE;
197afce1 7691 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7692
5db06880 7693 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7694 if (!s)
09bef843 7695 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7696 /* strip spaces and check for bad characters */
09bef843
SB
7697 d = SvPVX(PL_lex_stuff);
7698 tmp = 0;
d9f2850e
RGS
7699 for (p = d; *p; ++p) {
7700 if (!isSPACE(*p)) {
7701 d[tmp++] = *p;
9e8d7757 7702
197afce1 7703 if (warnillegalproto) {
9e8d7757
RB
7704 if (must_be_last)
7705 proto_after_greedy_proto = TRUE;
7706 if (!strchr("$@%*;[]&\\_", *p)) {
7707 bad_proto = TRUE;
7708 }
7709 else {
7710 if ( underscore ) {
7711 if ( *p != ';' )
7712 bad_proto = TRUE;
7713 underscore = FALSE;
7714 }
7715 if ( *p == '[' ) {
7716 in_brackets = TRUE;
7717 }
7718 else if ( *p == ']' ) {
7719 in_brackets = FALSE;
7720 }
7721 else if ( (*p == '@' || *p == '%') &&
7722 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7723 !in_brackets ) {
7724 must_be_last = TRUE;
7725 greedy_proto = *p;
7726 }
7727 else if ( *p == '_' ) {
aef2a98a 7728 underscore = seen_underscore = TRUE;
9e8d7757
RB
7729 }
7730 }
7731 }
d37a9538 7732 }
09bef843 7733 }
d9f2850e 7734 d[tmp] = '\0';
9e8d7757 7735 if (proto_after_greedy_proto)
197afce1 7736 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7737 "Prototype after '%c' for %"SVf" : %s",
7738 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7739 if (bad_proto)
197afce1 7740 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7741 "Illegal character %sin prototype for %"SVf" : %s",
7742 seen_underscore ? "after '_' " : "",
be2597df 7743 SVfARG(PL_subname), d);
b162af07 7744 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7745 have_proto = TRUE;
68dc0745 7746
5db06880
NC
7747#ifdef PERL_MAD
7748 start_force(0);
cd81e915 7749 CURMAD('q', PL_thisopen);
5db06880 7750 CURMAD('_', tmpwhite);
cd81e915
NC
7751 CURMAD('=', PL_thisstuff);
7752 CURMAD('Q', PL_thisclose);
5db06880
NC
7753 NEXTVAL_NEXTTOKE.opval =
7754 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7755 PL_lex_stuff = NULL;
5db06880
NC
7756 force_next(THING);
7757
7758 s = SKIPSPACE2(s,tmpwhite);
7759#else
09bef843 7760 s = skipspace(s);
5db06880 7761#endif
4633a7c4 7762 }
09bef843
SB
7763 else
7764 have_proto = FALSE;
7765
7766 if (*s == ':' && s[1] != ':')
7767 PL_expect = attrful;
8e742a20
MHM
7768 else if (*s != '{' && key == KEY_sub) {
7769 if (!have_name)
7770 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7771 else if (*s != ';' && *s != '}')
be2597df 7772 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7773 }
09bef843 7774
5db06880
NC
7775#ifdef PERL_MAD
7776 start_force(0);
7777 if (tmpwhite) {
7778 if (PL_madskills)
6b29d1f5 7779 curmad('^', newSVpvs(""));
5db06880
NC
7780 CURMAD('_', tmpwhite);
7781 }
7782 force_next(0);
7783
cd81e915 7784 PL_thistoken = subtoken;
5db06880 7785#else
09bef843 7786 if (have_proto) {
9ded7720 7787 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7788 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7789 PL_lex_stuff = NULL;
09bef843 7790 force_next(THING);
68dc0745 7791 }
5db06880 7792#endif
09bef843 7793 if (!have_name) {
49a54bbe
NC
7794 if (PL_curstash)
7795 sv_setpvs(PL_subname, "__ANON__");
7796 else
7797 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7798 TOKEN(ANONSUB);
4633a7c4 7799 }
5db06880 7800#ifndef PERL_MAD
b1b65b59
JH
7801 (void) force_word(PL_oldbufptr + tboffset, WORD,
7802 FALSE, TRUE, TRUE);
5db06880 7803#endif
09bef843
SB
7804 if (key == KEY_my)
7805 TOKEN(MYSUB);
7806 TOKEN(SUB);
4633a7c4 7807 }
79072805
LW
7808
7809 case KEY_system:
a0d0e21e 7810 LOP(OP_SYSTEM,XREF);
79072805
LW
7811
7812 case KEY_symlink:
a0d0e21e 7813 LOP(OP_SYMLINK,XTERM);
79072805
LW
7814
7815 case KEY_syscall:
a0d0e21e 7816 LOP(OP_SYSCALL,XTERM);
79072805 7817
c07a80fd 7818 case KEY_sysopen:
7819 LOP(OP_SYSOPEN,XTERM);
7820
137443ea 7821 case KEY_sysseek:
7822 LOP(OP_SYSSEEK,XTERM);
7823
79072805 7824 case KEY_sysread:
a0d0e21e 7825 LOP(OP_SYSREAD,XTERM);
79072805
LW
7826
7827 case KEY_syswrite:
a0d0e21e 7828 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7829
7830 case KEY_tr:
7831 s = scan_trans(s);
7832 TERM(sublex_start());
7833
7834 case KEY_tell:
7835 UNI(OP_TELL);
7836
7837 case KEY_telldir:
7838 UNI(OP_TELLDIR);
7839
463ee0b2 7840 case KEY_tie:
a0d0e21e 7841 LOP(OP_TIE,XTERM);
463ee0b2 7842
c07a80fd 7843 case KEY_tied:
7844 UNI(OP_TIED);
7845
79072805
LW
7846 case KEY_time:
7847 FUN0(OP_TIME);
7848
7849 case KEY_times:
7850 FUN0(OP_TMS);
7851
7852 case KEY_truncate:
a0d0e21e 7853 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7854
7855 case KEY_uc:
7856 UNI(OP_UC);
7857
7858 case KEY_ucfirst:
7859 UNI(OP_UCFIRST);
7860
463ee0b2
LW
7861 case KEY_untie:
7862 UNI(OP_UNTIE);
7863
79072805 7864 case KEY_until:
6154021b 7865 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7866 OPERATOR(UNTIL);
7867
7868 case KEY_unless:
6154021b 7869 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7870 OPERATOR(UNLESS);
7871
7872 case KEY_unlink:
a0d0e21e 7873 LOP(OP_UNLINK,XTERM);
79072805
LW
7874
7875 case KEY_undef:
6f33ba73 7876 UNIDOR(OP_UNDEF);
79072805
LW
7877
7878 case KEY_unpack:
a0d0e21e 7879 LOP(OP_UNPACK,XTERM);
79072805
LW
7880
7881 case KEY_utime:
a0d0e21e 7882 LOP(OP_UTIME,XTERM);
79072805
LW
7883
7884 case KEY_umask:
6f33ba73 7885 UNIDOR(OP_UMASK);
79072805
LW
7886
7887 case KEY_unshift:
a0d0e21e
LW
7888 LOP(OP_UNSHIFT,XTERM);
7889
7890 case KEY_use:
468aa647 7891 s = tokenize_use(1, s);
a0d0e21e 7892 OPERATOR(USE);
79072805
LW
7893
7894 case KEY_values:
7895 UNI(OP_VALUES);
7896
7897 case KEY_vec:
a0d0e21e 7898 LOP(OP_VEC,XTERM);
79072805 7899
0d863452 7900 case KEY_when:
6154021b 7901 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7902 OPERATOR(WHEN);
7903
79072805 7904 case KEY_while:
6154021b 7905 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7906 OPERATOR(WHILE);
7907
7908 case KEY_warn:
3280af22 7909 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7910 LOP(OP_WARN,XTERM);
79072805
LW
7911
7912 case KEY_wait:
7913 FUN0(OP_WAIT);
7914
7915 case KEY_waitpid:
a0d0e21e 7916 LOP(OP_WAITPID,XTERM);
79072805
LW
7917
7918 case KEY_wantarray:
7919 FUN0(OP_WANTARRAY);
7920
7921 case KEY_write:
9d116dd7
JH
7922#ifdef EBCDIC
7923 {
df3728a2
JH
7924 char ctl_l[2];
7925 ctl_l[0] = toCTRL('L');
7926 ctl_l[1] = '\0';
fafc274c 7927 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7928 }
7929#else
fafc274c
NC
7930 /* Make sure $^L is defined */
7931 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7932#endif
79072805
LW
7933 UNI(OP_ENTERWRITE);
7934
7935 case KEY_x:
3280af22 7936 if (PL_expect == XOPERATOR)
79072805
LW
7937 Mop(OP_REPEAT);
7938 check_uni();
7939 goto just_a_word;
7940
a0d0e21e 7941 case KEY_xor:
6154021b 7942 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7943 OPERATOR(OROP);
7944
79072805
LW
7945 case KEY_y:
7946 s = scan_trans(s);
7947 TERM(sublex_start());
7948 }
49dc05e3 7949 }}
79072805 7950}
bf4acbe4
GS
7951#ifdef __SC__
7952#pragma segment Main
7953#endif
79072805 7954
e930465f
JH
7955static int
7956S_pending_ident(pTHX)
8eceec63 7957{
97aff369 7958 dVAR;
8eceec63 7959 register char *d;
bbd11bfc 7960 PADOFFSET tmp = 0;
8eceec63
SC
7961 /* pit holds the identifier we read and pending_ident is reset */
7962 char pit = PL_pending_ident;
9bde8eb0
NC
7963 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7964 /* All routes through this function want to know if there is a colon. */
c099d646 7965 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7966 PL_pending_ident = 0;
7967
cd81e915 7968 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7969 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7970 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7971
7972 /* if we're in a my(), we can't allow dynamics here.
7973 $foo'bar has already been turned into $foo::bar, so
7974 just check for colons.
7975
7976 if it's a legal name, the OP is a PADANY.
7977 */
7978 if (PL_in_my) {
7979 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7980 if (has_colon)
8eceec63
SC
7981 yyerror(Perl_form(aTHX_ "No package name allowed for "
7982 "variable %s in \"our\"",
7983 PL_tokenbuf));
d6447115 7984 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7985 }
7986 else {
9bde8eb0 7987 if (has_colon)
952306ac
RGS
7988 yyerror(Perl_form(aTHX_ PL_no_myglob,
7989 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7990
6154021b 7991 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7992 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7993 return PRIVATEREF;
7994 }
7995 }
7996
7997 /*
7998 build the ops for accesses to a my() variable.
7999
8000 Deny my($a) or my($b) in a sort block, *if* $a or $b is
8001 then used in a comparison. This catches most, but not
8002 all cases. For instance, it catches
8003 sort { my($a); $a <=> $b }
8004 but not
8005 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
8006 (although why you'd do that is anyone's guess).
8007 */
8008
9bde8eb0 8009 if (!has_colon) {
8716503d 8010 if (!PL_in_my)
f8f98e0a 8011 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 8012 if (tmp != NOT_IN_PAD) {
8eceec63 8013 /* might be an "our" variable" */
00b1698f 8014 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8015 /* build ops for a bareword */
b64e5050
AL
8016 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8017 HEK * const stashname = HvNAME_HEK(stash);
8018 SV * const sym = newSVhek(stashname);
396482e1 8019 sv_catpvs(sym, "::");
9bde8eb0 8020 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8021 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8022 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8023 gv_fetchsv(sym,
8eceec63
SC
8024 (PL_in_eval
8025 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8026 : GV_ADDMULTI
8eceec63
SC
8027 ),
8028 ((PL_tokenbuf[0] == '$') ? SVt_PV
8029 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8030 : SVt_PVHV));
8031 return WORD;
8032 }
8033
8034 /* if it's a sort block and they're naming $a or $b */
8035 if (PL_last_lop_op == OP_SORT &&
8036 PL_tokenbuf[0] == '$' &&
8037 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8038 && !PL_tokenbuf[2])
8039 {
8040 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8041 d < PL_bufend && *d != '\n';
8042 d++)
8043 {
8044 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8045 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8046 PL_tokenbuf);
8047 }
8048 }
8049 }
8050
6154021b
RGS
8051 pl_yylval.opval = newOP(OP_PADANY, 0);
8052 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8053 return PRIVATEREF;
8054 }
8055 }
8056
8057 /*
8058 Whine if they've said @foo in a doublequoted string,
8059 and @foo isn't a variable we can find in the symbol
8060 table.
8061 */
d824713b
NC
8062 if (ckWARN(WARN_AMBIGUOUS) &&
8063 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8064 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8065 SVt_PVAV);
8eceec63 8066 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8067 /* DO NOT warn for @- and @+ */
8068 && !( PL_tokenbuf[2] == '\0' &&
8069 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8070 )
8eceec63
SC
8071 {
8072 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8073 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8074 "Possible unintended interpolation of %s in string",
8075 PL_tokenbuf);
8eceec63
SC
8076 }
8077 }
8078
8079 /* build ops for a bareword */
6154021b 8080 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8081 tokenbuf_len - 1));
6154021b 8082 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8083 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8084 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8085 ((PL_tokenbuf[0] == '$') ? SVt_PV
8086 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8087 : SVt_PVHV));
8eceec63
SC
8088 return WORD;
8089}
8090
4c3bbe0f
MHM
8091/*
8092 * The following code was generated by perl_keyword.pl.
8093 */
e2e1dd5a 8094
79072805 8095I32
5458a98a 8096Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8097{
952306ac 8098 dVAR;
7918f24d
NC
8099
8100 PERL_ARGS_ASSERT_KEYWORD;
8101
4c3bbe0f
MHM
8102 switch (len)
8103 {
8104 case 1: /* 5 tokens of length 1 */
8105 switch (name[0])
e2e1dd5a 8106 {
4c3bbe0f
MHM
8107 case 'm':
8108 { /* m */
8109 return KEY_m;
8110 }
8111
4c3bbe0f
MHM
8112 case 'q':
8113 { /* q */
8114 return KEY_q;
8115 }
8116
4c3bbe0f
MHM
8117 case 's':
8118 { /* s */
8119 return KEY_s;
8120 }
8121
4c3bbe0f
MHM
8122 case 'x':
8123 { /* x */
8124 return -KEY_x;
8125 }
8126
4c3bbe0f
MHM
8127 case 'y':
8128 { /* y */
8129 return KEY_y;
8130 }
8131
4c3bbe0f
MHM
8132 default:
8133 goto unknown;
e2e1dd5a 8134 }
4c3bbe0f
MHM
8135
8136 case 2: /* 18 tokens of length 2 */
8137 switch (name[0])
e2e1dd5a 8138 {
4c3bbe0f
MHM
8139 case 'd':
8140 if (name[1] == 'o')
8141 { /* do */
8142 return KEY_do;
8143 }
8144
8145 goto unknown;
8146
8147 case 'e':
8148 if (name[1] == 'q')
8149 { /* eq */
8150 return -KEY_eq;
8151 }
8152
8153 goto unknown;
8154
8155 case 'g':
8156 switch (name[1])
8157 {
8158 case 'e':
8159 { /* ge */
8160 return -KEY_ge;
8161 }
8162
4c3bbe0f
MHM
8163 case 't':
8164 { /* gt */
8165 return -KEY_gt;
8166 }
8167
4c3bbe0f
MHM
8168 default:
8169 goto unknown;
8170 }
8171
8172 case 'i':
8173 if (name[1] == 'f')
8174 { /* if */
8175 return KEY_if;
8176 }
8177
8178 goto unknown;
8179
8180 case 'l':
8181 switch (name[1])
8182 {
8183 case 'c':
8184 { /* lc */
8185 return -KEY_lc;
8186 }
8187
4c3bbe0f
MHM
8188 case 'e':
8189 { /* le */
8190 return -KEY_le;
8191 }
8192
4c3bbe0f
MHM
8193 case 't':
8194 { /* lt */
8195 return -KEY_lt;
8196 }
8197
4c3bbe0f
MHM
8198 default:
8199 goto unknown;
8200 }
8201
8202 case 'm':
8203 if (name[1] == 'y')
8204 { /* my */
8205 return KEY_my;
8206 }
8207
8208 goto unknown;
8209
8210 case 'n':
8211 switch (name[1])
8212 {
8213 case 'e':
8214 { /* ne */
8215 return -KEY_ne;
8216 }
8217
4c3bbe0f
MHM
8218 case 'o':
8219 { /* no */
8220 return KEY_no;
8221 }
8222
4c3bbe0f
MHM
8223 default:
8224 goto unknown;
8225 }
8226
8227 case 'o':
8228 if (name[1] == 'r')
8229 { /* or */
8230 return -KEY_or;
8231 }
8232
8233 goto unknown;
8234
8235 case 'q':
8236 switch (name[1])
8237 {
8238 case 'q':
8239 { /* qq */
8240 return KEY_qq;
8241 }
8242
4c3bbe0f
MHM
8243 case 'r':
8244 { /* qr */
8245 return KEY_qr;
8246 }
8247
4c3bbe0f
MHM
8248 case 'w':
8249 { /* qw */
8250 return KEY_qw;
8251 }
8252
4c3bbe0f
MHM
8253 case 'x':
8254 { /* qx */
8255 return KEY_qx;
8256 }
8257
4c3bbe0f
MHM
8258 default:
8259 goto unknown;
8260 }
8261
8262 case 't':
8263 if (name[1] == 'r')
8264 { /* tr */
8265 return KEY_tr;
8266 }
8267
8268 goto unknown;
8269
8270 case 'u':
8271 if (name[1] == 'c')
8272 { /* uc */
8273 return -KEY_uc;
8274 }
8275
8276 goto unknown;
8277
8278 default:
8279 goto unknown;
e2e1dd5a 8280 }
4c3bbe0f 8281
0d863452 8282 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8283 switch (name[0])
e2e1dd5a 8284 {
4c3bbe0f
MHM
8285 case 'E':
8286 if (name[1] == 'N' &&
8287 name[2] == 'D')
8288 { /* END */
8289 return KEY_END;
8290 }
8291
8292 goto unknown;
8293
8294 case 'a':
8295 switch (name[1])
8296 {
8297 case 'b':
8298 if (name[2] == 's')
8299 { /* abs */
8300 return -KEY_abs;
8301 }
8302
8303 goto unknown;
8304
8305 case 'n':
8306 if (name[2] == 'd')
8307 { /* and */
8308 return -KEY_and;
8309 }
8310
8311 goto unknown;
8312
8313 default:
8314 goto unknown;
8315 }
8316
8317 case 'c':
8318 switch (name[1])
8319 {
8320 case 'h':
8321 if (name[2] == 'r')
8322 { /* chr */
8323 return -KEY_chr;
8324 }
8325
8326 goto unknown;
8327
8328 case 'm':
8329 if (name[2] == 'p')
8330 { /* cmp */
8331 return -KEY_cmp;
8332 }
8333
8334 goto unknown;
8335
8336 case 'o':
8337 if (name[2] == 's')
8338 { /* cos */
8339 return -KEY_cos;
8340 }
8341
8342 goto unknown;
8343
8344 default:
8345 goto unknown;
8346 }
8347
8348 case 'd':
8349 if (name[1] == 'i' &&
8350 name[2] == 'e')
8351 { /* die */
8352 return -KEY_die;
8353 }
8354
8355 goto unknown;
8356
8357 case 'e':
8358 switch (name[1])
8359 {
8360 case 'o':
8361 if (name[2] == 'f')
8362 { /* eof */
8363 return -KEY_eof;
8364 }
8365
8366 goto unknown;
8367
4c3bbe0f
MHM
8368 case 'x':
8369 if (name[2] == 'p')
8370 { /* exp */
8371 return -KEY_exp;
8372 }
8373
8374 goto unknown;
8375
8376 default:
8377 goto unknown;
8378 }
8379
8380 case 'f':
8381 if (name[1] == 'o' &&
8382 name[2] == 'r')
8383 { /* for */
8384 return KEY_for;
8385 }
8386
8387 goto unknown;
8388
8389 case 'h':
8390 if (name[1] == 'e' &&
8391 name[2] == 'x')
8392 { /* hex */
8393 return -KEY_hex;
8394 }
8395
8396 goto unknown;
8397
8398 case 'i':
8399 if (name[1] == 'n' &&
8400 name[2] == 't')
8401 { /* int */
8402 return -KEY_int;
8403 }
8404
8405 goto unknown;
8406
8407 case 'l':
8408 if (name[1] == 'o' &&
8409 name[2] == 'g')
8410 { /* log */
8411 return -KEY_log;
8412 }
8413
8414 goto unknown;
8415
8416 case 'm':
8417 if (name[1] == 'a' &&
8418 name[2] == 'p')
8419 { /* map */
8420 return KEY_map;
8421 }
8422
8423 goto unknown;
8424
8425 case 'n':
8426 if (name[1] == 'o' &&
8427 name[2] == 't')
8428 { /* not */
8429 return -KEY_not;
8430 }
8431
8432 goto unknown;
8433
8434 case 'o':
8435 switch (name[1])
8436 {
8437 case 'c':
8438 if (name[2] == 't')
8439 { /* oct */
8440 return -KEY_oct;
8441 }
8442
8443 goto unknown;
8444
8445 case 'r':
8446 if (name[2] == 'd')
8447 { /* ord */
8448 return -KEY_ord;
8449 }
8450
8451 goto unknown;
8452
8453 case 'u':
8454 if (name[2] == 'r')
8455 { /* our */
8456 return KEY_our;
8457 }
8458
8459 goto unknown;
8460
8461 default:
8462 goto unknown;
8463 }
8464
8465 case 'p':
8466 if (name[1] == 'o')
8467 {
8468 switch (name[2])
8469 {
8470 case 'p':
8471 { /* pop */
8472 return -KEY_pop;
8473 }
8474
4c3bbe0f
MHM
8475 case 's':
8476 { /* pos */
8477 return KEY_pos;
8478 }
8479
4c3bbe0f
MHM
8480 default:
8481 goto unknown;
8482 }
8483 }
8484
8485 goto unknown;
8486
8487 case 'r':
8488 if (name[1] == 'e' &&
8489 name[2] == 'f')
8490 { /* ref */
8491 return -KEY_ref;
8492 }
8493
8494 goto unknown;
8495
8496 case 's':
8497 switch (name[1])
8498 {
0d863452
RH
8499 case 'a':
8500 if (name[2] == 'y')
8501 { /* say */
e3e804c9 8502 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8503 }
8504
8505 goto unknown;
8506
4c3bbe0f
MHM
8507 case 'i':
8508 if (name[2] == 'n')
8509 { /* sin */
8510 return -KEY_sin;
8511 }
8512
8513 goto unknown;
8514
8515 case 'u':
8516 if (name[2] == 'b')
8517 { /* sub */
8518 return KEY_sub;
8519 }
8520
8521 goto unknown;
8522
8523 default:
8524 goto unknown;
8525 }
8526
8527 case 't':
8528 if (name[1] == 'i' &&
8529 name[2] == 'e')
8530 { /* tie */
1db4d195 8531 return -KEY_tie;
4c3bbe0f
MHM
8532 }
8533
8534 goto unknown;
8535
8536 case 'u':
8537 if (name[1] == 's' &&
8538 name[2] == 'e')
8539 { /* use */
8540 return KEY_use;
8541 }
8542
8543 goto unknown;
8544
8545 case 'v':
8546 if (name[1] == 'e' &&
8547 name[2] == 'c')
8548 { /* vec */
8549 return -KEY_vec;
8550 }
8551
8552 goto unknown;
8553
8554 case 'x':
8555 if (name[1] == 'o' &&
8556 name[2] == 'r')
8557 { /* xor */
8558 return -KEY_xor;
8559 }
8560
8561 goto unknown;
8562
8563 default:
8564 goto unknown;
e2e1dd5a 8565 }
4c3bbe0f 8566
0d863452 8567 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8568 switch (name[0])
e2e1dd5a 8569 {
4c3bbe0f
MHM
8570 case 'C':
8571 if (name[1] == 'O' &&
8572 name[2] == 'R' &&
8573 name[3] == 'E')
8574 { /* CORE */
8575 return -KEY_CORE;
8576 }
8577
8578 goto unknown;
8579
8580 case 'I':
8581 if (name[1] == 'N' &&
8582 name[2] == 'I' &&
8583 name[3] == 'T')
8584 { /* INIT */
8585 return KEY_INIT;
8586 }
8587
8588 goto unknown;
8589
8590 case 'b':
8591 if (name[1] == 'i' &&
8592 name[2] == 'n' &&
8593 name[3] == 'd')
8594 { /* bind */
8595 return -KEY_bind;
8596 }
8597
8598 goto unknown;
8599
8600 case 'c':
8601 if (name[1] == 'h' &&
8602 name[2] == 'o' &&
8603 name[3] == 'p')
8604 { /* chop */
8605 return -KEY_chop;
8606 }
8607
8608 goto unknown;
8609
8610 case 'd':
8611 if (name[1] == 'u' &&
8612 name[2] == 'm' &&
8613 name[3] == 'p')
8614 { /* dump */
8615 return -KEY_dump;
8616 }
8617
8618 goto unknown;
8619
8620 case 'e':
8621 switch (name[1])
8622 {
8623 case 'a':
8624 if (name[2] == 'c' &&
8625 name[3] == 'h')
8626 { /* each */
8627 return -KEY_each;
8628 }
8629
8630 goto unknown;
8631
8632 case 'l':
8633 if (name[2] == 's' &&
8634 name[3] == 'e')
8635 { /* else */
8636 return KEY_else;
8637 }
8638
8639 goto unknown;
8640
8641 case 'v':
8642 if (name[2] == 'a' &&
8643 name[3] == 'l')
8644 { /* eval */
8645 return KEY_eval;
8646 }
8647
8648 goto unknown;
8649
8650 case 'x':
8651 switch (name[2])
8652 {
8653 case 'e':
8654 if (name[3] == 'c')
8655 { /* exec */
8656 return -KEY_exec;
8657 }
8658
8659 goto unknown;
8660
8661 case 'i':
8662 if (name[3] == 't')
8663 { /* exit */
8664 return -KEY_exit;
8665 }
8666
8667 goto unknown;
8668
8669 default:
8670 goto unknown;
8671 }
8672
8673 default:
8674 goto unknown;
8675 }
8676
8677 case 'f':
8678 if (name[1] == 'o' &&
8679 name[2] == 'r' &&
8680 name[3] == 'k')
8681 { /* fork */
8682 return -KEY_fork;
8683 }
8684
8685 goto unknown;
8686
8687 case 'g':
8688 switch (name[1])
8689 {
8690 case 'e':
8691 if (name[2] == 't' &&
8692 name[3] == 'c')
8693 { /* getc */
8694 return -KEY_getc;
8695 }
8696
8697 goto unknown;
8698
8699 case 'l':
8700 if (name[2] == 'o' &&
8701 name[3] == 'b')
8702 { /* glob */
8703 return KEY_glob;
8704 }
8705
8706 goto unknown;
8707
8708 case 'o':
8709 if (name[2] == 't' &&
8710 name[3] == 'o')
8711 { /* goto */
8712 return KEY_goto;
8713 }
8714
8715 goto unknown;
8716
8717 case 'r':
8718 if (name[2] == 'e' &&
8719 name[3] == 'p')
8720 { /* grep */
8721 return KEY_grep;
8722 }
8723
8724 goto unknown;
8725
8726 default:
8727 goto unknown;
8728 }
8729
8730 case 'j':
8731 if (name[1] == 'o' &&
8732 name[2] == 'i' &&
8733 name[3] == 'n')
8734 { /* join */
8735 return -KEY_join;
8736 }
8737
8738 goto unknown;
8739
8740 case 'k':
8741 switch (name[1])
8742 {
8743 case 'e':
8744 if (name[2] == 'y' &&
8745 name[3] == 's')
8746 { /* keys */
8747 return -KEY_keys;
8748 }
8749
8750 goto unknown;
8751
8752 case 'i':
8753 if (name[2] == 'l' &&
8754 name[3] == 'l')
8755 { /* kill */
8756 return -KEY_kill;
8757 }
8758
8759 goto unknown;
8760
8761 default:
8762 goto unknown;
8763 }
8764
8765 case 'l':
8766 switch (name[1])
8767 {
8768 case 'a':
8769 if (name[2] == 's' &&
8770 name[3] == 't')
8771 { /* last */
8772 return KEY_last;
8773 }
8774
8775 goto unknown;
8776
8777 case 'i':
8778 if (name[2] == 'n' &&
8779 name[3] == 'k')
8780 { /* link */
8781 return -KEY_link;
8782 }
8783
8784 goto unknown;
8785
8786 case 'o':
8787 if (name[2] == 'c' &&
8788 name[3] == 'k')
8789 { /* lock */
8790 return -KEY_lock;
8791 }
8792
8793 goto unknown;
8794
8795 default:
8796 goto unknown;
8797 }
8798
8799 case 'n':
8800 if (name[1] == 'e' &&
8801 name[2] == 'x' &&
8802 name[3] == 't')
8803 { /* next */
8804 return KEY_next;
8805 }
8806
8807 goto unknown;
8808
8809 case 'o':
8810 if (name[1] == 'p' &&
8811 name[2] == 'e' &&
8812 name[3] == 'n')
8813 { /* open */
8814 return -KEY_open;
8815 }
8816
8817 goto unknown;
8818
8819 case 'p':
8820 switch (name[1])
8821 {
8822 case 'a':
8823 if (name[2] == 'c' &&
8824 name[3] == 'k')
8825 { /* pack */
8826 return -KEY_pack;
8827 }
8828
8829 goto unknown;
8830
8831 case 'i':
8832 if (name[2] == 'p' &&
8833 name[3] == 'e')
8834 { /* pipe */
8835 return -KEY_pipe;
8836 }
8837
8838 goto unknown;
8839
8840 case 'u':
8841 if (name[2] == 's' &&
8842 name[3] == 'h')
8843 { /* push */
8844 return -KEY_push;
8845 }
8846
8847 goto unknown;
8848
8849 default:
8850 goto unknown;
8851 }
8852
8853 case 'r':
8854 switch (name[1])
8855 {
8856 case 'a':
8857 if (name[2] == 'n' &&
8858 name[3] == 'd')
8859 { /* rand */
8860 return -KEY_rand;
8861 }
8862
8863 goto unknown;
8864
8865 case 'e':
8866 switch (name[2])
8867 {
8868 case 'a':
8869 if (name[3] == 'd')
8870 { /* read */
8871 return -KEY_read;
8872 }
8873
8874 goto unknown;
8875
8876 case 'c':
8877 if (name[3] == 'v')
8878 { /* recv */
8879 return -KEY_recv;
8880 }
8881
8882 goto unknown;
8883
8884 case 'd':
8885 if (name[3] == 'o')
8886 { /* redo */
8887 return KEY_redo;
8888 }
8889
8890 goto unknown;
8891
8892 default:
8893 goto unknown;
8894 }
8895
8896 default:
8897 goto unknown;
8898 }
8899
8900 case 's':
8901 switch (name[1])
8902 {
8903 case 'e':
8904 switch (name[2])
8905 {
8906 case 'e':
8907 if (name[3] == 'k')
8908 { /* seek */
8909 return -KEY_seek;
8910 }
8911
8912 goto unknown;
8913
8914 case 'n':
8915 if (name[3] == 'd')
8916 { /* send */
8917 return -KEY_send;
8918 }
8919
8920 goto unknown;
8921
8922 default:
8923 goto unknown;
8924 }
8925
8926 case 'o':
8927 if (name[2] == 'r' &&
8928 name[3] == 't')
8929 { /* sort */
8930 return KEY_sort;
8931 }
8932
8933 goto unknown;
8934
8935 case 'q':
8936 if (name[2] == 'r' &&
8937 name[3] == 't')
8938 { /* sqrt */
8939 return -KEY_sqrt;
8940 }
8941
8942 goto unknown;
8943
8944 case 't':
8945 if (name[2] == 'a' &&
8946 name[3] == 't')
8947 { /* stat */
8948 return -KEY_stat;
8949 }
8950
8951 goto unknown;
8952
8953 default:
8954 goto unknown;
8955 }
8956
8957 case 't':
8958 switch (name[1])
8959 {
8960 case 'e':
8961 if (name[2] == 'l' &&
8962 name[3] == 'l')
8963 { /* tell */
8964 return -KEY_tell;
8965 }
8966
8967 goto unknown;
8968
8969 case 'i':
8970 switch (name[2])
8971 {
8972 case 'e':
8973 if (name[3] == 'd')
8974 { /* tied */
1db4d195 8975 return -KEY_tied;
4c3bbe0f
MHM
8976 }
8977
8978 goto unknown;
8979
8980 case 'm':
8981 if (name[3] == 'e')
8982 { /* time */
8983 return -KEY_time;
8984 }
8985
8986 goto unknown;
8987
8988 default:
8989 goto unknown;
8990 }
8991
8992 default:
8993 goto unknown;
8994 }
8995
8996 case 'w':
0d863452 8997 switch (name[1])
4c3bbe0f 8998 {
0d863452 8999 case 'a':
952306ac
RGS
9000 switch (name[2])
9001 {
9002 case 'i':
9003 if (name[3] == 't')
9004 { /* wait */
9005 return -KEY_wait;
9006 }
4c3bbe0f 9007
952306ac 9008 goto unknown;
4c3bbe0f 9009
952306ac
RGS
9010 case 'r':
9011 if (name[3] == 'n')
9012 { /* warn */
9013 return -KEY_warn;
9014 }
4c3bbe0f 9015
952306ac 9016 goto unknown;
4c3bbe0f 9017
952306ac
RGS
9018 default:
9019 goto unknown;
9020 }
0d863452
RH
9021
9022 case 'h':
9023 if (name[2] == 'e' &&
9024 name[3] == 'n')
9025 { /* when */
5458a98a 9026 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9027 }
4c3bbe0f 9028
952306ac 9029 goto unknown;
4c3bbe0f 9030
952306ac
RGS
9031 default:
9032 goto unknown;
9033 }
4c3bbe0f 9034
0d863452
RH
9035 default:
9036 goto unknown;
9037 }
9038
952306ac 9039 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9040 switch (name[0])
e2e1dd5a 9041 {
4c3bbe0f
MHM
9042 case 'B':
9043 if (name[1] == 'E' &&
9044 name[2] == 'G' &&
9045 name[3] == 'I' &&
9046 name[4] == 'N')
9047 { /* BEGIN */
9048 return KEY_BEGIN;
9049 }
9050
9051 goto unknown;
9052
9053 case 'C':
9054 if (name[1] == 'H' &&
9055 name[2] == 'E' &&
9056 name[3] == 'C' &&
9057 name[4] == 'K')
9058 { /* CHECK */
9059 return KEY_CHECK;
9060 }
9061
9062 goto unknown;
9063
9064 case 'a':
9065 switch (name[1])
9066 {
9067 case 'l':
9068 if (name[2] == 'a' &&
9069 name[3] == 'r' &&
9070 name[4] == 'm')
9071 { /* alarm */
9072 return -KEY_alarm;
9073 }
9074
9075 goto unknown;
9076
9077 case 't':
9078 if (name[2] == 'a' &&
9079 name[3] == 'n' &&
9080 name[4] == '2')
9081 { /* atan2 */
9082 return -KEY_atan2;
9083 }
9084
9085 goto unknown;
9086
9087 default:
9088 goto unknown;
9089 }
9090
9091 case 'b':
0d863452
RH
9092 switch (name[1])
9093 {
9094 case 'l':
9095 if (name[2] == 'e' &&
952306ac
RGS
9096 name[3] == 's' &&
9097 name[4] == 's')
9098 { /* bless */
9099 return -KEY_bless;
9100 }
4c3bbe0f 9101
952306ac 9102 goto unknown;
4c3bbe0f 9103
0d863452
RH
9104 case 'r':
9105 if (name[2] == 'e' &&
9106 name[3] == 'a' &&
9107 name[4] == 'k')
9108 { /* break */
5458a98a 9109 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9110 }
9111
9112 goto unknown;
9113
9114 default:
9115 goto unknown;
9116 }
9117
4c3bbe0f
MHM
9118 case 'c':
9119 switch (name[1])
9120 {
9121 case 'h':
9122 switch (name[2])
9123 {
9124 case 'd':
9125 if (name[3] == 'i' &&
9126 name[4] == 'r')
9127 { /* chdir */
9128 return -KEY_chdir;
9129 }
9130
9131 goto unknown;
9132
9133 case 'm':
9134 if (name[3] == 'o' &&
9135 name[4] == 'd')
9136 { /* chmod */
9137 return -KEY_chmod;
9138 }
9139
9140 goto unknown;
9141
9142 case 'o':
9143 switch (name[3])
9144 {
9145 case 'm':
9146 if (name[4] == 'p')
9147 { /* chomp */
9148 return -KEY_chomp;
9149 }
9150
9151 goto unknown;
9152
9153 case 'w':
9154 if (name[4] == 'n')
9155 { /* chown */
9156 return -KEY_chown;
9157 }
9158
9159 goto unknown;
9160
9161 default:
9162 goto unknown;
9163 }
9164
9165 default:
9166 goto unknown;
9167 }
9168
9169 case 'l':
9170 if (name[2] == 'o' &&
9171 name[3] == 's' &&
9172 name[4] == 'e')
9173 { /* close */
9174 return -KEY_close;
9175 }
9176
9177 goto unknown;
9178
9179 case 'r':
9180 if (name[2] == 'y' &&
9181 name[3] == 'p' &&
9182 name[4] == 't')
9183 { /* crypt */
9184 return -KEY_crypt;
9185 }
9186
9187 goto unknown;
9188
9189 default:
9190 goto unknown;
9191 }
9192
9193 case 'e':
9194 if (name[1] == 'l' &&
9195 name[2] == 's' &&
9196 name[3] == 'i' &&
9197 name[4] == 'f')
9198 { /* elsif */
9199 return KEY_elsif;
9200 }
9201
9202 goto unknown;
9203
9204 case 'f':
9205 switch (name[1])
9206 {
9207 case 'c':
9208 if (name[2] == 'n' &&
9209 name[3] == 't' &&
9210 name[4] == 'l')
9211 { /* fcntl */
9212 return -KEY_fcntl;
9213 }
9214
9215 goto unknown;
9216
9217 case 'l':
9218 if (name[2] == 'o' &&
9219 name[3] == 'c' &&
9220 name[4] == 'k')
9221 { /* flock */
9222 return -KEY_flock;
9223 }
9224
9225 goto unknown;
9226
9227 default:
9228 goto unknown;
9229 }
9230
0d863452
RH
9231 case 'g':
9232 if (name[1] == 'i' &&
9233 name[2] == 'v' &&
9234 name[3] == 'e' &&
9235 name[4] == 'n')
9236 { /* given */
5458a98a 9237 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9238 }
9239
9240 goto unknown;
9241
4c3bbe0f
MHM
9242 case 'i':
9243 switch (name[1])
9244 {
9245 case 'n':
9246 if (name[2] == 'd' &&
9247 name[3] == 'e' &&
9248 name[4] == 'x')
9249 { /* index */
9250 return -KEY_index;
9251 }
9252
9253 goto unknown;
9254
9255 case 'o':
9256 if (name[2] == 'c' &&
9257 name[3] == 't' &&
9258 name[4] == 'l')
9259 { /* ioctl */
9260 return -KEY_ioctl;
9261 }
9262
9263 goto unknown;
9264
9265 default:
9266 goto unknown;
9267 }
9268
9269 case 'l':
9270 switch (name[1])
9271 {
9272 case 'o':
9273 if (name[2] == 'c' &&
9274 name[3] == 'a' &&
9275 name[4] == 'l')
9276 { /* local */
9277 return KEY_local;
9278 }
9279
9280 goto unknown;
9281
9282 case 's':
9283 if (name[2] == 't' &&
9284 name[3] == 'a' &&
9285 name[4] == 't')
9286 { /* lstat */
9287 return -KEY_lstat;
9288 }
9289
9290 goto unknown;
9291
9292 default:
9293 goto unknown;
9294 }
9295
9296 case 'm':
9297 if (name[1] == 'k' &&
9298 name[2] == 'd' &&
9299 name[3] == 'i' &&
9300 name[4] == 'r')
9301 { /* mkdir */
9302 return -KEY_mkdir;
9303 }
9304
9305 goto unknown;
9306
9307 case 'p':
9308 if (name[1] == 'r' &&
9309 name[2] == 'i' &&
9310 name[3] == 'n' &&
9311 name[4] == 't')
9312 { /* print */
9313 return KEY_print;
9314 }
9315
9316 goto unknown;
9317
9318 case 'r':
9319 switch (name[1])
9320 {
9321 case 'e':
9322 if (name[2] == 's' &&
9323 name[3] == 'e' &&
9324 name[4] == 't')
9325 { /* reset */
9326 return -KEY_reset;
9327 }
9328
9329 goto unknown;
9330
9331 case 'm':
9332 if (name[2] == 'd' &&
9333 name[3] == 'i' &&
9334 name[4] == 'r')
9335 { /* rmdir */
9336 return -KEY_rmdir;
9337 }
9338
9339 goto unknown;
9340
9341 default:
9342 goto unknown;
9343 }
9344
9345 case 's':
9346 switch (name[1])
9347 {
9348 case 'e':
9349 if (name[2] == 'm' &&
9350 name[3] == 'o' &&
9351 name[4] == 'p')
9352 { /* semop */
9353 return -KEY_semop;
9354 }
9355
9356 goto unknown;
9357
9358 case 'h':
9359 if (name[2] == 'i' &&
9360 name[3] == 'f' &&
9361 name[4] == 't')
9362 { /* shift */
9363 return -KEY_shift;
9364 }
9365
9366 goto unknown;
9367
9368 case 'l':
9369 if (name[2] == 'e' &&
9370 name[3] == 'e' &&
9371 name[4] == 'p')
9372 { /* sleep */
9373 return -KEY_sleep;
9374 }
9375
9376 goto unknown;
9377
9378 case 'p':
9379 if (name[2] == 'l' &&
9380 name[3] == 'i' &&
9381 name[4] == 't')
9382 { /* split */
9383 return KEY_split;
9384 }
9385
9386 goto unknown;
9387
9388 case 'r':
9389 if (name[2] == 'a' &&
9390 name[3] == 'n' &&
9391 name[4] == 'd')
9392 { /* srand */
9393 return -KEY_srand;
9394 }
9395
9396 goto unknown;
9397
9398 case 't':
952306ac
RGS
9399 switch (name[2])
9400 {
9401 case 'a':
9402 if (name[3] == 't' &&
9403 name[4] == 'e')
9404 { /* state */
5458a98a 9405 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9406 }
4c3bbe0f 9407
952306ac
RGS
9408 goto unknown;
9409
9410 case 'u':
9411 if (name[3] == 'd' &&
9412 name[4] == 'y')
9413 { /* study */
9414 return KEY_study;
9415 }
9416
9417 goto unknown;
9418
9419 default:
9420 goto unknown;
9421 }
4c3bbe0f
MHM
9422
9423 default:
9424 goto unknown;
9425 }
9426
9427 case 't':
9428 if (name[1] == 'i' &&
9429 name[2] == 'm' &&
9430 name[3] == 'e' &&
9431 name[4] == 's')
9432 { /* times */
9433 return -KEY_times;
9434 }
9435
9436 goto unknown;
9437
9438 case 'u':
9439 switch (name[1])
9440 {
9441 case 'm':
9442 if (name[2] == 'a' &&
9443 name[3] == 's' &&
9444 name[4] == 'k')
9445 { /* umask */
9446 return -KEY_umask;
9447 }
9448
9449 goto unknown;
9450
9451 case 'n':
9452 switch (name[2])
9453 {
9454 case 'd':
9455 if (name[3] == 'e' &&
9456 name[4] == 'f')
9457 { /* undef */
9458 return KEY_undef;
9459 }
9460
9461 goto unknown;
9462
9463 case 't':
9464 if (name[3] == 'i')
9465 {
9466 switch (name[4])
9467 {
9468 case 'e':
9469 { /* untie */
1db4d195 9470 return -KEY_untie;
4c3bbe0f
MHM
9471 }
9472
4c3bbe0f
MHM
9473 case 'l':
9474 { /* until */
9475 return KEY_until;
9476 }
9477
4c3bbe0f
MHM
9478 default:
9479 goto unknown;
9480 }
9481 }
9482
9483 goto unknown;
9484
9485 default:
9486 goto unknown;
9487 }
9488
9489 case 't':
9490 if (name[2] == 'i' &&
9491 name[3] == 'm' &&
9492 name[4] == 'e')
9493 { /* utime */
9494 return -KEY_utime;
9495 }
9496
9497 goto unknown;
9498
9499 default:
9500 goto unknown;
9501 }
9502
9503 case 'w':
9504 switch (name[1])
9505 {
9506 case 'h':
9507 if (name[2] == 'i' &&
9508 name[3] == 'l' &&
9509 name[4] == 'e')
9510 { /* while */
9511 return KEY_while;
9512 }
9513
9514 goto unknown;
9515
9516 case 'r':
9517 if (name[2] == 'i' &&
9518 name[3] == 't' &&
9519 name[4] == 'e')
9520 { /* write */
9521 return -KEY_write;
9522 }
9523
9524 goto unknown;
9525
9526 default:
9527 goto unknown;
9528 }
9529
9530 default:
9531 goto unknown;
e2e1dd5a 9532 }
4c3bbe0f
MHM
9533
9534 case 6: /* 33 tokens of length 6 */
9535 switch (name[0])
9536 {
9537 case 'a':
9538 if (name[1] == 'c' &&
9539 name[2] == 'c' &&
9540 name[3] == 'e' &&
9541 name[4] == 'p' &&
9542 name[5] == 't')
9543 { /* accept */
9544 return -KEY_accept;
9545 }
9546
9547 goto unknown;
9548
9549 case 'c':
9550 switch (name[1])
9551 {
9552 case 'a':
9553 if (name[2] == 'l' &&
9554 name[3] == 'l' &&
9555 name[4] == 'e' &&
9556 name[5] == 'r')
9557 { /* caller */
9558 return -KEY_caller;
9559 }
9560
9561 goto unknown;
9562
9563 case 'h':
9564 if (name[2] == 'r' &&
9565 name[3] == 'o' &&
9566 name[4] == 'o' &&
9567 name[5] == 't')
9568 { /* chroot */
9569 return -KEY_chroot;
9570 }
9571
9572 goto unknown;
9573
9574 default:
9575 goto unknown;
9576 }
9577
9578 case 'd':
9579 if (name[1] == 'e' &&
9580 name[2] == 'l' &&
9581 name[3] == 'e' &&
9582 name[4] == 't' &&
9583 name[5] == 'e')
9584 { /* delete */
9585 return KEY_delete;
9586 }
9587
9588 goto unknown;
9589
9590 case 'e':
9591 switch (name[1])
9592 {
9593 case 'l':
9594 if (name[2] == 's' &&
9595 name[3] == 'e' &&
9596 name[4] == 'i' &&
9597 name[5] == 'f')
9598 { /* elseif */
9b387841 9599 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9600 }
9601
9602 goto unknown;
9603
9604 case 'x':
9605 if (name[2] == 'i' &&
9606 name[3] == 's' &&
9607 name[4] == 't' &&
9608 name[5] == 's')
9609 { /* exists */
9610 return KEY_exists;
9611 }
9612
9613 goto unknown;
9614
9615 default:
9616 goto unknown;
9617 }
9618
9619 case 'f':
9620 switch (name[1])
9621 {
9622 case 'i':
9623 if (name[2] == 'l' &&
9624 name[3] == 'e' &&
9625 name[4] == 'n' &&
9626 name[5] == 'o')
9627 { /* fileno */
9628 return -KEY_fileno;
9629 }
9630
9631 goto unknown;
9632
9633 case 'o':
9634 if (name[2] == 'r' &&
9635 name[3] == 'm' &&
9636 name[4] == 'a' &&
9637 name[5] == 't')
9638 { /* format */
9639 return KEY_format;
9640 }
9641
9642 goto unknown;
9643
9644 default:
9645 goto unknown;
9646 }
9647
9648 case 'g':
9649 if (name[1] == 'm' &&
9650 name[2] == 't' &&
9651 name[3] == 'i' &&
9652 name[4] == 'm' &&
9653 name[5] == 'e')
9654 { /* gmtime */
9655 return -KEY_gmtime;
9656 }
9657
9658 goto unknown;
9659
9660 case 'l':
9661 switch (name[1])
9662 {
9663 case 'e':
9664 if (name[2] == 'n' &&
9665 name[3] == 'g' &&
9666 name[4] == 't' &&
9667 name[5] == 'h')
9668 { /* length */
9669 return -KEY_length;
9670 }
9671
9672 goto unknown;
9673
9674 case 'i':
9675 if (name[2] == 's' &&
9676 name[3] == 't' &&
9677 name[4] == 'e' &&
9678 name[5] == 'n')
9679 { /* listen */
9680 return -KEY_listen;
9681 }
9682
9683 goto unknown;
9684
9685 default:
9686 goto unknown;
9687 }
9688
9689 case 'm':
9690 if (name[1] == 's' &&
9691 name[2] == 'g')
9692 {
9693 switch (name[3])
9694 {
9695 case 'c':
9696 if (name[4] == 't' &&
9697 name[5] == 'l')
9698 { /* msgctl */
9699 return -KEY_msgctl;
9700 }
9701
9702 goto unknown;
9703
9704 case 'g':
9705 if (name[4] == 'e' &&
9706 name[5] == 't')
9707 { /* msgget */
9708 return -KEY_msgget;
9709 }
9710
9711 goto unknown;
9712
9713 case 'r':
9714 if (name[4] == 'c' &&
9715 name[5] == 'v')
9716 { /* msgrcv */
9717 return -KEY_msgrcv;
9718 }
9719
9720 goto unknown;
9721
9722 case 's':
9723 if (name[4] == 'n' &&
9724 name[5] == 'd')
9725 { /* msgsnd */
9726 return -KEY_msgsnd;
9727 }
9728
9729 goto unknown;
9730
9731 default:
9732 goto unknown;
9733 }
9734 }
9735
9736 goto unknown;
9737
9738 case 'p':
9739 if (name[1] == 'r' &&
9740 name[2] == 'i' &&
9741 name[3] == 'n' &&
9742 name[4] == 't' &&
9743 name[5] == 'f')
9744 { /* printf */
9745 return KEY_printf;
9746 }
9747
9748 goto unknown;
9749
9750 case 'r':
9751 switch (name[1])
9752 {
9753 case 'e':
9754 switch (name[2])
9755 {
9756 case 'n':
9757 if (name[3] == 'a' &&
9758 name[4] == 'm' &&
9759 name[5] == 'e')
9760 { /* rename */
9761 return -KEY_rename;
9762 }
9763
9764 goto unknown;
9765
9766 case 't':
9767 if (name[3] == 'u' &&
9768 name[4] == 'r' &&
9769 name[5] == 'n')
9770 { /* return */
9771 return KEY_return;
9772 }
9773
9774 goto unknown;
9775
9776 default:
9777 goto unknown;
9778 }
9779
9780 case 'i':
9781 if (name[2] == 'n' &&
9782 name[3] == 'd' &&
9783 name[4] == 'e' &&
9784 name[5] == 'x')
9785 { /* rindex */
9786 return -KEY_rindex;
9787 }
9788
9789 goto unknown;
9790
9791 default:
9792 goto unknown;
9793 }
9794
9795 case 's':
9796 switch (name[1])
9797 {
9798 case 'c':
9799 if (name[2] == 'a' &&
9800 name[3] == 'l' &&
9801 name[4] == 'a' &&
9802 name[5] == 'r')
9803 { /* scalar */
9804 return KEY_scalar;
9805 }
9806
9807 goto unknown;
9808
9809 case 'e':
9810 switch (name[2])
9811 {
9812 case 'l':
9813 if (name[3] == 'e' &&
9814 name[4] == 'c' &&
9815 name[5] == 't')
9816 { /* select */
9817 return -KEY_select;
9818 }
9819
9820 goto unknown;
9821
9822 case 'm':
9823 switch (name[3])
9824 {
9825 case 'c':
9826 if (name[4] == 't' &&
9827 name[5] == 'l')
9828 { /* semctl */
9829 return -KEY_semctl;
9830 }
9831
9832 goto unknown;
9833
9834 case 'g':
9835 if (name[4] == 'e' &&
9836 name[5] == 't')
9837 { /* semget */
9838 return -KEY_semget;
9839 }
9840
9841 goto unknown;
9842
9843 default:
9844 goto unknown;
9845 }
9846
9847 default:
9848 goto unknown;
9849 }
9850
9851 case 'h':
9852 if (name[2] == 'm')
9853 {
9854 switch (name[3])
9855 {
9856 case 'c':
9857 if (name[4] == 't' &&
9858 name[5] == 'l')
9859 { /* shmctl */
9860 return -KEY_shmctl;
9861 }
9862
9863 goto unknown;
9864
9865 case 'g':
9866 if (name[4] == 'e' &&
9867 name[5] == 't')
9868 { /* shmget */
9869 return -KEY_shmget;
9870 }
9871
9872 goto unknown;
9873
9874 default:
9875 goto unknown;
9876 }
9877 }
9878
9879 goto unknown;
9880
9881 case 'o':
9882 if (name[2] == 'c' &&
9883 name[3] == 'k' &&
9884 name[4] == 'e' &&
9885 name[5] == 't')
9886 { /* socket */
9887 return -KEY_socket;
9888 }
9889
9890 goto unknown;
9891
9892 case 'p':
9893 if (name[2] == 'l' &&
9894 name[3] == 'i' &&
9895 name[4] == 'c' &&
9896 name[5] == 'e')
9897 { /* splice */
9898 return -KEY_splice;
9899 }
9900
9901 goto unknown;
9902
9903 case 'u':
9904 if (name[2] == 'b' &&
9905 name[3] == 's' &&
9906 name[4] == 't' &&
9907 name[5] == 'r')
9908 { /* substr */
9909 return -KEY_substr;
9910 }
9911
9912 goto unknown;
9913
9914 case 'y':
9915 if (name[2] == 's' &&
9916 name[3] == 't' &&
9917 name[4] == 'e' &&
9918 name[5] == 'm')
9919 { /* system */
9920 return -KEY_system;
9921 }
9922
9923 goto unknown;
9924
9925 default:
9926 goto unknown;
9927 }
9928
9929 case 'u':
9930 if (name[1] == 'n')
9931 {
9932 switch (name[2])
9933 {
9934 case 'l':
9935 switch (name[3])
9936 {
9937 case 'e':
9938 if (name[4] == 's' &&
9939 name[5] == 's')
9940 { /* unless */
9941 return KEY_unless;
9942 }
9943
9944 goto unknown;
9945
9946 case 'i':
9947 if (name[4] == 'n' &&
9948 name[5] == 'k')
9949 { /* unlink */
9950 return -KEY_unlink;
9951 }
9952
9953 goto unknown;
9954
9955 default:
9956 goto unknown;
9957 }
9958
9959 case 'p':
9960 if (name[3] == 'a' &&
9961 name[4] == 'c' &&
9962 name[5] == 'k')
9963 { /* unpack */
9964 return -KEY_unpack;
9965 }
9966
9967 goto unknown;
9968
9969 default:
9970 goto unknown;
9971 }
9972 }
9973
9974 goto unknown;
9975
9976 case 'v':
9977 if (name[1] == 'a' &&
9978 name[2] == 'l' &&
9979 name[3] == 'u' &&
9980 name[4] == 'e' &&
9981 name[5] == 's')
9982 { /* values */
9983 return -KEY_values;
9984 }
9985
9986 goto unknown;
9987
9988 default:
9989 goto unknown;
e2e1dd5a 9990 }
4c3bbe0f 9991
0d863452 9992 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9993 switch (name[0])
9994 {
9995 case 'D':
9996 if (name[1] == 'E' &&
9997 name[2] == 'S' &&
9998 name[3] == 'T' &&
9999 name[4] == 'R' &&
10000 name[5] == 'O' &&
10001 name[6] == 'Y')
10002 { /* DESTROY */
10003 return KEY_DESTROY;
10004 }
10005
10006 goto unknown;
10007
10008 case '_':
10009 if (name[1] == '_' &&
10010 name[2] == 'E' &&
10011 name[3] == 'N' &&
10012 name[4] == 'D' &&
10013 name[5] == '_' &&
10014 name[6] == '_')
10015 { /* __END__ */
10016 return KEY___END__;
10017 }
10018
10019 goto unknown;
10020
10021 case 'b':
10022 if (name[1] == 'i' &&
10023 name[2] == 'n' &&
10024 name[3] == 'm' &&
10025 name[4] == 'o' &&
10026 name[5] == 'd' &&
10027 name[6] == 'e')
10028 { /* binmode */
10029 return -KEY_binmode;
10030 }
10031
10032 goto unknown;
10033
10034 case 'c':
10035 if (name[1] == 'o' &&
10036 name[2] == 'n' &&
10037 name[3] == 'n' &&
10038 name[4] == 'e' &&
10039 name[5] == 'c' &&
10040 name[6] == 't')
10041 { /* connect */
10042 return -KEY_connect;
10043 }
10044
10045 goto unknown;
10046
10047 case 'd':
10048 switch (name[1])
10049 {
10050 case 'b':
10051 if (name[2] == 'm' &&
10052 name[3] == 'o' &&
10053 name[4] == 'p' &&
10054 name[5] == 'e' &&
10055 name[6] == 'n')
10056 { /* dbmopen */
10057 return -KEY_dbmopen;
10058 }
10059
10060 goto unknown;
10061
10062 case 'e':
0d863452
RH
10063 if (name[2] == 'f')
10064 {
10065 switch (name[3])
10066 {
10067 case 'a':
10068 if (name[4] == 'u' &&
10069 name[5] == 'l' &&
10070 name[6] == 't')
10071 { /* default */
5458a98a 10072 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10073 }
10074
10075 goto unknown;
10076
10077 case 'i':
10078 if (name[4] == 'n' &&
952306ac
RGS
10079 name[5] == 'e' &&
10080 name[6] == 'd')
10081 { /* defined */
10082 return KEY_defined;
10083 }
4c3bbe0f 10084
952306ac 10085 goto unknown;
4c3bbe0f 10086
952306ac
RGS
10087 default:
10088 goto unknown;
10089 }
0d863452
RH
10090 }
10091
10092 goto unknown;
10093
10094 default:
10095 goto unknown;
10096 }
4c3bbe0f
MHM
10097
10098 case 'f':
10099 if (name[1] == 'o' &&
10100 name[2] == 'r' &&
10101 name[3] == 'e' &&
10102 name[4] == 'a' &&
10103 name[5] == 'c' &&
10104 name[6] == 'h')
10105 { /* foreach */
10106 return KEY_foreach;
10107 }
10108
10109 goto unknown;
10110
10111 case 'g':
10112 if (name[1] == 'e' &&
10113 name[2] == 't' &&
10114 name[3] == 'p')
10115 {
10116 switch (name[4])
10117 {
10118 case 'g':
10119 if (name[5] == 'r' &&
10120 name[6] == 'p')
10121 { /* getpgrp */
10122 return -KEY_getpgrp;
10123 }
10124
10125 goto unknown;
10126
10127 case 'p':
10128 if (name[5] == 'i' &&
10129 name[6] == 'd')
10130 { /* getppid */
10131 return -KEY_getppid;
10132 }
10133
10134 goto unknown;
10135
10136 default:
10137 goto unknown;
10138 }
10139 }
10140
10141 goto unknown;
10142
10143 case 'l':
10144 if (name[1] == 'c' &&
10145 name[2] == 'f' &&
10146 name[3] == 'i' &&
10147 name[4] == 'r' &&
10148 name[5] == 's' &&
10149 name[6] == 't')
10150 { /* lcfirst */
10151 return -KEY_lcfirst;
10152 }
10153
10154 goto unknown;
10155
10156 case 'o':
10157 if (name[1] == 'p' &&
10158 name[2] == 'e' &&
10159 name[3] == 'n' &&
10160 name[4] == 'd' &&
10161 name[5] == 'i' &&
10162 name[6] == 'r')
10163 { /* opendir */
10164 return -KEY_opendir;
10165 }
10166
10167 goto unknown;
10168
10169 case 'p':
10170 if (name[1] == 'a' &&
10171 name[2] == 'c' &&
10172 name[3] == 'k' &&
10173 name[4] == 'a' &&
10174 name[5] == 'g' &&
10175 name[6] == 'e')
10176 { /* package */
10177 return KEY_package;
10178 }
10179
10180 goto unknown;
10181
10182 case 'r':
10183 if (name[1] == 'e')
10184 {
10185 switch (name[2])
10186 {
10187 case 'a':
10188 if (name[3] == 'd' &&
10189 name[4] == 'd' &&
10190 name[5] == 'i' &&
10191 name[6] == 'r')
10192 { /* readdir */
10193 return -KEY_readdir;
10194 }
10195
10196 goto unknown;
10197
10198 case 'q':
10199 if (name[3] == 'u' &&
10200 name[4] == 'i' &&
10201 name[5] == 'r' &&
10202 name[6] == 'e')
10203 { /* require */
10204 return KEY_require;
10205 }
10206
10207 goto unknown;
10208
10209 case 'v':
10210 if (name[3] == 'e' &&
10211 name[4] == 'r' &&
10212 name[5] == 's' &&
10213 name[6] == 'e')
10214 { /* reverse */
10215 return -KEY_reverse;
10216 }
10217
10218 goto unknown;
10219
10220 default:
10221 goto unknown;
10222 }
10223 }
10224
10225 goto unknown;
10226
10227 case 's':
10228 switch (name[1])
10229 {
10230 case 'e':
10231 switch (name[2])
10232 {
10233 case 'e':
10234 if (name[3] == 'k' &&
10235 name[4] == 'd' &&
10236 name[5] == 'i' &&
10237 name[6] == 'r')
10238 { /* seekdir */
10239 return -KEY_seekdir;
10240 }
10241
10242 goto unknown;
10243
10244 case 't':
10245 if (name[3] == 'p' &&
10246 name[4] == 'g' &&
10247 name[5] == 'r' &&
10248 name[6] == 'p')
10249 { /* setpgrp */
10250 return -KEY_setpgrp;
10251 }
10252
10253 goto unknown;
10254
10255 default:
10256 goto unknown;
10257 }
10258
10259 case 'h':
10260 if (name[2] == 'm' &&
10261 name[3] == 'r' &&
10262 name[4] == 'e' &&
10263 name[5] == 'a' &&
10264 name[6] == 'd')
10265 { /* shmread */
10266 return -KEY_shmread;
10267 }
10268
10269 goto unknown;
10270
10271 case 'p':
10272 if (name[2] == 'r' &&
10273 name[3] == 'i' &&
10274 name[4] == 'n' &&
10275 name[5] == 't' &&
10276 name[6] == 'f')
10277 { /* sprintf */
10278 return -KEY_sprintf;
10279 }
10280
10281 goto unknown;
10282
10283 case 'y':
10284 switch (name[2])
10285 {
10286 case 'm':
10287 if (name[3] == 'l' &&
10288 name[4] == 'i' &&
10289 name[5] == 'n' &&
10290 name[6] == 'k')
10291 { /* symlink */
10292 return -KEY_symlink;
10293 }
10294
10295 goto unknown;
10296
10297 case 's':
10298 switch (name[3])
10299 {
10300 case 'c':
10301 if (name[4] == 'a' &&
10302 name[5] == 'l' &&
10303 name[6] == 'l')
10304 { /* syscall */
10305 return -KEY_syscall;
10306 }
10307
10308 goto unknown;
10309
10310 case 'o':
10311 if (name[4] == 'p' &&
10312 name[5] == 'e' &&
10313 name[6] == 'n')
10314 { /* sysopen */
10315 return -KEY_sysopen;
10316 }
10317
10318 goto unknown;
10319
10320 case 'r':
10321 if (name[4] == 'e' &&
10322 name[5] == 'a' &&
10323 name[6] == 'd')
10324 { /* sysread */
10325 return -KEY_sysread;
10326 }
10327
10328 goto unknown;
10329
10330 case 's':
10331 if (name[4] == 'e' &&
10332 name[5] == 'e' &&
10333 name[6] == 'k')
10334 { /* sysseek */
10335 return -KEY_sysseek;
10336 }
10337
10338 goto unknown;
10339
10340 default:
10341 goto unknown;
10342 }
10343
10344 default:
10345 goto unknown;
10346 }
10347
10348 default:
10349 goto unknown;
10350 }
10351
10352 case 't':
10353 if (name[1] == 'e' &&
10354 name[2] == 'l' &&
10355 name[3] == 'l' &&
10356 name[4] == 'd' &&
10357 name[5] == 'i' &&
10358 name[6] == 'r')
10359 { /* telldir */
10360 return -KEY_telldir;
10361 }
10362
10363 goto unknown;
10364
10365 case 'u':
10366 switch (name[1])
10367 {
10368 case 'c':
10369 if (name[2] == 'f' &&
10370 name[3] == 'i' &&
10371 name[4] == 'r' &&
10372 name[5] == 's' &&
10373 name[6] == 't')
10374 { /* ucfirst */
10375 return -KEY_ucfirst;
10376 }
10377
10378 goto unknown;
10379
10380 case 'n':
10381 if (name[2] == 's' &&
10382 name[3] == 'h' &&
10383 name[4] == 'i' &&
10384 name[5] == 'f' &&
10385 name[6] == 't')
10386 { /* unshift */
10387 return -KEY_unshift;
10388 }
10389
10390 goto unknown;
10391
10392 default:
10393 goto unknown;
10394 }
10395
10396 case 'w':
10397 if (name[1] == 'a' &&
10398 name[2] == 'i' &&
10399 name[3] == 't' &&
10400 name[4] == 'p' &&
10401 name[5] == 'i' &&
10402 name[6] == 'd')
10403 { /* waitpid */
10404 return -KEY_waitpid;
10405 }
10406
10407 goto unknown;
10408
10409 default:
10410 goto unknown;
10411 }
10412
10413 case 8: /* 26 tokens of length 8 */
10414 switch (name[0])
10415 {
10416 case 'A':
10417 if (name[1] == 'U' &&
10418 name[2] == 'T' &&
10419 name[3] == 'O' &&
10420 name[4] == 'L' &&
10421 name[5] == 'O' &&
10422 name[6] == 'A' &&
10423 name[7] == 'D')
10424 { /* AUTOLOAD */
10425 return KEY_AUTOLOAD;
10426 }
10427
10428 goto unknown;
10429
10430 case '_':
10431 if (name[1] == '_')
10432 {
10433 switch (name[2])
10434 {
10435 case 'D':
10436 if (name[3] == 'A' &&
10437 name[4] == 'T' &&
10438 name[5] == 'A' &&
10439 name[6] == '_' &&
10440 name[7] == '_')
10441 { /* __DATA__ */
10442 return KEY___DATA__;
10443 }
10444
10445 goto unknown;
10446
10447 case 'F':
10448 if (name[3] == 'I' &&
10449 name[4] == 'L' &&
10450 name[5] == 'E' &&
10451 name[6] == '_' &&
10452 name[7] == '_')
10453 { /* __FILE__ */
10454 return -KEY___FILE__;
10455 }
10456
10457 goto unknown;
10458
10459 case 'L':
10460 if (name[3] == 'I' &&
10461 name[4] == 'N' &&
10462 name[5] == 'E' &&
10463 name[6] == '_' &&
10464 name[7] == '_')
10465 { /* __LINE__ */
10466 return -KEY___LINE__;
10467 }
10468
10469 goto unknown;
10470
10471 default:
10472 goto unknown;
10473 }
10474 }
10475
10476 goto unknown;
10477
10478 case 'c':
10479 switch (name[1])
10480 {
10481 case 'l':
10482 if (name[2] == 'o' &&
10483 name[3] == 's' &&
10484 name[4] == 'e' &&
10485 name[5] == 'd' &&
10486 name[6] == 'i' &&
10487 name[7] == 'r')
10488 { /* closedir */
10489 return -KEY_closedir;
10490 }
10491
10492 goto unknown;
10493
10494 case 'o':
10495 if (name[2] == 'n' &&
10496 name[3] == 't' &&
10497 name[4] == 'i' &&
10498 name[5] == 'n' &&
10499 name[6] == 'u' &&
10500 name[7] == 'e')
10501 { /* continue */
10502 return -KEY_continue;
10503 }
10504
10505 goto unknown;
10506
10507 default:
10508 goto unknown;
10509 }
10510
10511 case 'd':
10512 if (name[1] == 'b' &&
10513 name[2] == 'm' &&
10514 name[3] == 'c' &&
10515 name[4] == 'l' &&
10516 name[5] == 'o' &&
10517 name[6] == 's' &&
10518 name[7] == 'e')
10519 { /* dbmclose */
10520 return -KEY_dbmclose;
10521 }
10522
10523 goto unknown;
10524
10525 case 'e':
10526 if (name[1] == 'n' &&
10527 name[2] == 'd')
10528 {
10529 switch (name[3])
10530 {
10531 case 'g':
10532 if (name[4] == 'r' &&
10533 name[5] == 'e' &&
10534 name[6] == 'n' &&
10535 name[7] == 't')
10536 { /* endgrent */
10537 return -KEY_endgrent;
10538 }
10539
10540 goto unknown;
10541
10542 case 'p':
10543 if (name[4] == 'w' &&
10544 name[5] == 'e' &&
10545 name[6] == 'n' &&
10546 name[7] == 't')
10547 { /* endpwent */
10548 return -KEY_endpwent;
10549 }
10550
10551 goto unknown;
10552
10553 default:
10554 goto unknown;
10555 }
10556 }
10557
10558 goto unknown;
10559
10560 case 'f':
10561 if (name[1] == 'o' &&
10562 name[2] == 'r' &&
10563 name[3] == 'm' &&
10564 name[4] == 'l' &&
10565 name[5] == 'i' &&
10566 name[6] == 'n' &&
10567 name[7] == 'e')
10568 { /* formline */
10569 return -KEY_formline;
10570 }
10571
10572 goto unknown;
10573
10574 case 'g':
10575 if (name[1] == 'e' &&
10576 name[2] == 't')
10577 {
10578 switch (name[3])
10579 {
10580 case 'g':
10581 if (name[4] == 'r')
10582 {
10583 switch (name[5])
10584 {
10585 case 'e':
10586 if (name[6] == 'n' &&
10587 name[7] == 't')
10588 { /* getgrent */
10589 return -KEY_getgrent;
10590 }
10591
10592 goto unknown;
10593
10594 case 'g':
10595 if (name[6] == 'i' &&
10596 name[7] == 'd')
10597 { /* getgrgid */
10598 return -KEY_getgrgid;
10599 }
10600
10601 goto unknown;
10602
10603 case 'n':
10604 if (name[6] == 'a' &&
10605 name[7] == 'm')
10606 { /* getgrnam */
10607 return -KEY_getgrnam;
10608 }
10609
10610 goto unknown;
10611
10612 default:
10613 goto unknown;
10614 }
10615 }
10616
10617 goto unknown;
10618
10619 case 'l':
10620 if (name[4] == 'o' &&
10621 name[5] == 'g' &&
10622 name[6] == 'i' &&
10623 name[7] == 'n')
10624 { /* getlogin */
10625 return -KEY_getlogin;
10626 }
10627
10628 goto unknown;
10629
10630 case 'p':
10631 if (name[4] == 'w')
10632 {
10633 switch (name[5])
10634 {
10635 case 'e':
10636 if (name[6] == 'n' &&
10637 name[7] == 't')
10638 { /* getpwent */
10639 return -KEY_getpwent;
10640 }
10641
10642 goto unknown;
10643
10644 case 'n':
10645 if (name[6] == 'a' &&
10646 name[7] == 'm')
10647 { /* getpwnam */
10648 return -KEY_getpwnam;
10649 }
10650
10651 goto unknown;
10652
10653 case 'u':
10654 if (name[6] == 'i' &&
10655 name[7] == 'd')
10656 { /* getpwuid */
10657 return -KEY_getpwuid;
10658 }
10659
10660 goto unknown;
10661
10662 default:
10663 goto unknown;
10664 }
10665 }
10666
10667 goto unknown;
10668
10669 default:
10670 goto unknown;
10671 }
10672 }
10673
10674 goto unknown;
10675
10676 case 'r':
10677 if (name[1] == 'e' &&
10678 name[2] == 'a' &&
10679 name[3] == 'd')
10680 {
10681 switch (name[4])
10682 {
10683 case 'l':
10684 if (name[5] == 'i' &&
10685 name[6] == 'n')
10686 {
10687 switch (name[7])
10688 {
10689 case 'e':
10690 { /* readline */
10691 return -KEY_readline;
10692 }
10693
4c3bbe0f
MHM
10694 case 'k':
10695 { /* readlink */
10696 return -KEY_readlink;
10697 }
10698
4c3bbe0f
MHM
10699 default:
10700 goto unknown;
10701 }
10702 }
10703
10704 goto unknown;
10705
10706 case 'p':
10707 if (name[5] == 'i' &&
10708 name[6] == 'p' &&
10709 name[7] == 'e')
10710 { /* readpipe */
10711 return -KEY_readpipe;
10712 }
10713
10714 goto unknown;
10715
10716 default:
10717 goto unknown;
10718 }
10719 }
10720
10721 goto unknown;
10722
10723 case 's':
10724 switch (name[1])
10725 {
10726 case 'e':
10727 if (name[2] == 't')
10728 {
10729 switch (name[3])
10730 {
10731 case 'g':
10732 if (name[4] == 'r' &&
10733 name[5] == 'e' &&
10734 name[6] == 'n' &&
10735 name[7] == 't')
10736 { /* setgrent */
10737 return -KEY_setgrent;
10738 }
10739
10740 goto unknown;
10741
10742 case 'p':
10743 if (name[4] == 'w' &&
10744 name[5] == 'e' &&
10745 name[6] == 'n' &&
10746 name[7] == 't')
10747 { /* setpwent */
10748 return -KEY_setpwent;
10749 }
10750
10751 goto unknown;
10752
10753 default:
10754 goto unknown;
10755 }
10756 }
10757
10758 goto unknown;
10759
10760 case 'h':
10761 switch (name[2])
10762 {
10763 case 'm':
10764 if (name[3] == 'w' &&
10765 name[4] == 'r' &&
10766 name[5] == 'i' &&
10767 name[6] == 't' &&
10768 name[7] == 'e')
10769 { /* shmwrite */
10770 return -KEY_shmwrite;
10771 }
10772
10773 goto unknown;
10774
10775 case 'u':
10776 if (name[3] == 't' &&
10777 name[4] == 'd' &&
10778 name[5] == 'o' &&
10779 name[6] == 'w' &&
10780 name[7] == 'n')
10781 { /* shutdown */
10782 return -KEY_shutdown;
10783 }
10784
10785 goto unknown;
10786
10787 default:
10788 goto unknown;
10789 }
10790
10791 case 'y':
10792 if (name[2] == 's' &&
10793 name[3] == 'w' &&
10794 name[4] == 'r' &&
10795 name[5] == 'i' &&
10796 name[6] == 't' &&
10797 name[7] == 'e')
10798 { /* syswrite */
10799 return -KEY_syswrite;
10800 }
10801
10802 goto unknown;
10803
10804 default:
10805 goto unknown;
10806 }
10807
10808 case 't':
10809 if (name[1] == 'r' &&
10810 name[2] == 'u' &&
10811 name[3] == 'n' &&
10812 name[4] == 'c' &&
10813 name[5] == 'a' &&
10814 name[6] == 't' &&
10815 name[7] == 'e')
10816 { /* truncate */
10817 return -KEY_truncate;
10818 }
10819
10820 goto unknown;
10821
10822 default:
10823 goto unknown;
10824 }
10825
3c10abe3 10826 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10827 switch (name[0])
10828 {
3c10abe3
AG
10829 case 'U':
10830 if (name[1] == 'N' &&
10831 name[2] == 'I' &&
10832 name[3] == 'T' &&
10833 name[4] == 'C' &&
10834 name[5] == 'H' &&
10835 name[6] == 'E' &&
10836 name[7] == 'C' &&
10837 name[8] == 'K')
10838 { /* UNITCHECK */
10839 return KEY_UNITCHECK;
10840 }
10841
10842 goto unknown;
10843
4c3bbe0f
MHM
10844 case 'e':
10845 if (name[1] == 'n' &&
10846 name[2] == 'd' &&
10847 name[3] == 'n' &&
10848 name[4] == 'e' &&
10849 name[5] == 't' &&
10850 name[6] == 'e' &&
10851 name[7] == 'n' &&
10852 name[8] == 't')
10853 { /* endnetent */
10854 return -KEY_endnetent;
10855 }
10856
10857 goto unknown;
10858
10859 case 'g':
10860 if (name[1] == 'e' &&
10861 name[2] == 't' &&
10862 name[3] == 'n' &&
10863 name[4] == 'e' &&
10864 name[5] == 't' &&
10865 name[6] == 'e' &&
10866 name[7] == 'n' &&
10867 name[8] == 't')
10868 { /* getnetent */
10869 return -KEY_getnetent;
10870 }
10871
10872 goto unknown;
10873
10874 case 'l':
10875 if (name[1] == 'o' &&
10876 name[2] == 'c' &&
10877 name[3] == 'a' &&
10878 name[4] == 'l' &&
10879 name[5] == 't' &&
10880 name[6] == 'i' &&
10881 name[7] == 'm' &&
10882 name[8] == 'e')
10883 { /* localtime */
10884 return -KEY_localtime;
10885 }
10886
10887 goto unknown;
10888
10889 case 'p':
10890 if (name[1] == 'r' &&
10891 name[2] == 'o' &&
10892 name[3] == 't' &&
10893 name[4] == 'o' &&
10894 name[5] == 't' &&
10895 name[6] == 'y' &&
10896 name[7] == 'p' &&
10897 name[8] == 'e')
10898 { /* prototype */
10899 return KEY_prototype;
10900 }
10901
10902 goto unknown;
10903
10904 case 'q':
10905 if (name[1] == 'u' &&
10906 name[2] == 'o' &&
10907 name[3] == 't' &&
10908 name[4] == 'e' &&
10909 name[5] == 'm' &&
10910 name[6] == 'e' &&
10911 name[7] == 't' &&
10912 name[8] == 'a')
10913 { /* quotemeta */
10914 return -KEY_quotemeta;
10915 }
10916
10917 goto unknown;
10918
10919 case 'r':
10920 if (name[1] == 'e' &&
10921 name[2] == 'w' &&
10922 name[3] == 'i' &&
10923 name[4] == 'n' &&
10924 name[5] == 'd' &&
10925 name[6] == 'd' &&
10926 name[7] == 'i' &&
10927 name[8] == 'r')
10928 { /* rewinddir */
10929 return -KEY_rewinddir;
10930 }
10931
10932 goto unknown;
10933
10934 case 's':
10935 if (name[1] == 'e' &&
10936 name[2] == 't' &&
10937 name[3] == 'n' &&
10938 name[4] == 'e' &&
10939 name[5] == 't' &&
10940 name[6] == 'e' &&
10941 name[7] == 'n' &&
10942 name[8] == 't')
10943 { /* setnetent */
10944 return -KEY_setnetent;
10945 }
10946
10947 goto unknown;
10948
10949 case 'w':
10950 if (name[1] == 'a' &&
10951 name[2] == 'n' &&
10952 name[3] == 't' &&
10953 name[4] == 'a' &&
10954 name[5] == 'r' &&
10955 name[6] == 'r' &&
10956 name[7] == 'a' &&
10957 name[8] == 'y')
10958 { /* wantarray */
10959 return -KEY_wantarray;
10960 }
10961
10962 goto unknown;
10963
10964 default:
10965 goto unknown;
10966 }
10967
10968 case 10: /* 9 tokens of length 10 */
10969 switch (name[0])
10970 {
10971 case 'e':
10972 if (name[1] == 'n' &&
10973 name[2] == 'd')
10974 {
10975 switch (name[3])
10976 {
10977 case 'h':
10978 if (name[4] == 'o' &&
10979 name[5] == 's' &&
10980 name[6] == 't' &&
10981 name[7] == 'e' &&
10982 name[8] == 'n' &&
10983 name[9] == 't')
10984 { /* endhostent */
10985 return -KEY_endhostent;
10986 }
10987
10988 goto unknown;
10989
10990 case 's':
10991 if (name[4] == 'e' &&
10992 name[5] == 'r' &&
10993 name[6] == 'v' &&
10994 name[7] == 'e' &&
10995 name[8] == 'n' &&
10996 name[9] == 't')
10997 { /* endservent */
10998 return -KEY_endservent;
10999 }
11000
11001 goto unknown;
11002
11003 default:
11004 goto unknown;
11005 }
11006 }
11007
11008 goto unknown;
11009
11010 case 'g':
11011 if (name[1] == 'e' &&
11012 name[2] == 't')
11013 {
11014 switch (name[3])
11015 {
11016 case 'h':
11017 if (name[4] == 'o' &&
11018 name[5] == 's' &&
11019 name[6] == 't' &&
11020 name[7] == 'e' &&
11021 name[8] == 'n' &&
11022 name[9] == 't')
11023 { /* gethostent */
11024 return -KEY_gethostent;
11025 }
11026
11027 goto unknown;
11028
11029 case 's':
11030 switch (name[4])
11031 {
11032 case 'e':
11033 if (name[5] == 'r' &&
11034 name[6] == 'v' &&
11035 name[7] == 'e' &&
11036 name[8] == 'n' &&
11037 name[9] == 't')
11038 { /* getservent */
11039 return -KEY_getservent;
11040 }
11041
11042 goto unknown;
11043
11044 case 'o':
11045 if (name[5] == 'c' &&
11046 name[6] == 'k' &&
11047 name[7] == 'o' &&
11048 name[8] == 'p' &&
11049 name[9] == 't')
11050 { /* getsockopt */
11051 return -KEY_getsockopt;
11052 }
11053
11054 goto unknown;
11055
11056 default:
11057 goto unknown;
11058 }
11059
11060 default:
11061 goto unknown;
11062 }
11063 }
11064
11065 goto unknown;
11066
11067 case 's':
11068 switch (name[1])
11069 {
11070 case 'e':
11071 if (name[2] == 't')
11072 {
11073 switch (name[3])
11074 {
11075 case 'h':
11076 if (name[4] == 'o' &&
11077 name[5] == 's' &&
11078 name[6] == 't' &&
11079 name[7] == 'e' &&
11080 name[8] == 'n' &&
11081 name[9] == 't')
11082 { /* sethostent */
11083 return -KEY_sethostent;
11084 }
11085
11086 goto unknown;
11087
11088 case 's':
11089 switch (name[4])
11090 {
11091 case 'e':
11092 if (name[5] == 'r' &&
11093 name[6] == 'v' &&
11094 name[7] == 'e' &&
11095 name[8] == 'n' &&
11096 name[9] == 't')
11097 { /* setservent */
11098 return -KEY_setservent;
11099 }
11100
11101 goto unknown;
11102
11103 case 'o':
11104 if (name[5] == 'c' &&
11105 name[6] == 'k' &&
11106 name[7] == 'o' &&
11107 name[8] == 'p' &&
11108 name[9] == 't')
11109 { /* setsockopt */
11110 return -KEY_setsockopt;
11111 }
11112
11113 goto unknown;
11114
11115 default:
11116 goto unknown;
11117 }
11118
11119 default:
11120 goto unknown;
11121 }
11122 }
11123
11124 goto unknown;
11125
11126 case 'o':
11127 if (name[2] == 'c' &&
11128 name[3] == 'k' &&
11129 name[4] == 'e' &&
11130 name[5] == 't' &&
11131 name[6] == 'p' &&
11132 name[7] == 'a' &&
11133 name[8] == 'i' &&
11134 name[9] == 'r')
11135 { /* socketpair */
11136 return -KEY_socketpair;
11137 }
11138
11139 goto unknown;
11140
11141 default:
11142 goto unknown;
11143 }
11144
11145 default:
11146 goto unknown;
e2e1dd5a 11147 }
4c3bbe0f
MHM
11148
11149 case 11: /* 8 tokens of length 11 */
11150 switch (name[0])
11151 {
11152 case '_':
11153 if (name[1] == '_' &&
11154 name[2] == 'P' &&
11155 name[3] == 'A' &&
11156 name[4] == 'C' &&
11157 name[5] == 'K' &&
11158 name[6] == 'A' &&
11159 name[7] == 'G' &&
11160 name[8] == 'E' &&
11161 name[9] == '_' &&
11162 name[10] == '_')
11163 { /* __PACKAGE__ */
11164 return -KEY___PACKAGE__;
11165 }
11166
11167 goto unknown;
11168
11169 case 'e':
11170 if (name[1] == 'n' &&
11171 name[2] == 'd' &&
11172 name[3] == 'p' &&
11173 name[4] == 'r' &&
11174 name[5] == 'o' &&
11175 name[6] == 't' &&
11176 name[7] == 'o' &&
11177 name[8] == 'e' &&
11178 name[9] == 'n' &&
11179 name[10] == 't')
11180 { /* endprotoent */
11181 return -KEY_endprotoent;
11182 }
11183
11184 goto unknown;
11185
11186 case 'g':
11187 if (name[1] == 'e' &&
11188 name[2] == 't')
11189 {
11190 switch (name[3])
11191 {
11192 case 'p':
11193 switch (name[4])
11194 {
11195 case 'e':
11196 if (name[5] == 'e' &&
11197 name[6] == 'r' &&
11198 name[7] == 'n' &&
11199 name[8] == 'a' &&
11200 name[9] == 'm' &&
11201 name[10] == 'e')
11202 { /* getpeername */
11203 return -KEY_getpeername;
11204 }
11205
11206 goto unknown;
11207
11208 case 'r':
11209 switch (name[5])
11210 {
11211 case 'i':
11212 if (name[6] == 'o' &&
11213 name[7] == 'r' &&
11214 name[8] == 'i' &&
11215 name[9] == 't' &&
11216 name[10] == 'y')
11217 { /* getpriority */
11218 return -KEY_getpriority;
11219 }
11220
11221 goto unknown;
11222
11223 case 'o':
11224 if (name[6] == 't' &&
11225 name[7] == 'o' &&
11226 name[8] == 'e' &&
11227 name[9] == 'n' &&
11228 name[10] == 't')
11229 { /* getprotoent */
11230 return -KEY_getprotoent;
11231 }
11232
11233 goto unknown;
11234
11235 default:
11236 goto unknown;
11237 }
11238
11239 default:
11240 goto unknown;
11241 }
11242
11243 case 's':
11244 if (name[4] == 'o' &&
11245 name[5] == 'c' &&
11246 name[6] == 'k' &&
11247 name[7] == 'n' &&
11248 name[8] == 'a' &&
11249 name[9] == 'm' &&
11250 name[10] == 'e')
11251 { /* getsockname */
11252 return -KEY_getsockname;
11253 }
11254
11255 goto unknown;
11256
11257 default:
11258 goto unknown;
11259 }
11260 }
11261
11262 goto unknown;
11263
11264 case 's':
11265 if (name[1] == 'e' &&
11266 name[2] == 't' &&
11267 name[3] == 'p' &&
11268 name[4] == 'r')
11269 {
11270 switch (name[5])
11271 {
11272 case 'i':
11273 if (name[6] == 'o' &&
11274 name[7] == 'r' &&
11275 name[8] == 'i' &&
11276 name[9] == 't' &&
11277 name[10] == 'y')
11278 { /* setpriority */
11279 return -KEY_setpriority;
11280 }
11281
11282 goto unknown;
11283
11284 case 'o':
11285 if (name[6] == 't' &&
11286 name[7] == 'o' &&
11287 name[8] == 'e' &&
11288 name[9] == 'n' &&
11289 name[10] == 't')
11290 { /* setprotoent */
11291 return -KEY_setprotoent;
11292 }
11293
11294 goto unknown;
11295
11296 default:
11297 goto unknown;
11298 }
11299 }
11300
11301 goto unknown;
11302
11303 default:
11304 goto unknown;
e2e1dd5a 11305 }
4c3bbe0f
MHM
11306
11307 case 12: /* 2 tokens of length 12 */
11308 if (name[0] == 'g' &&
11309 name[1] == 'e' &&
11310 name[2] == 't' &&
11311 name[3] == 'n' &&
11312 name[4] == 'e' &&
11313 name[5] == 't' &&
11314 name[6] == 'b' &&
11315 name[7] == 'y')
11316 {
11317 switch (name[8])
11318 {
11319 case 'a':
11320 if (name[9] == 'd' &&
11321 name[10] == 'd' &&
11322 name[11] == 'r')
11323 { /* getnetbyaddr */
11324 return -KEY_getnetbyaddr;
11325 }
11326
11327 goto unknown;
11328
11329 case 'n':
11330 if (name[9] == 'a' &&
11331 name[10] == 'm' &&
11332 name[11] == 'e')
11333 { /* getnetbyname */
11334 return -KEY_getnetbyname;
11335 }
11336
11337 goto unknown;
11338
11339 default:
11340 goto unknown;
11341 }
e2e1dd5a 11342 }
4c3bbe0f
MHM
11343
11344 goto unknown;
11345
11346 case 13: /* 4 tokens of length 13 */
11347 if (name[0] == 'g' &&
11348 name[1] == 'e' &&
11349 name[2] == 't')
11350 {
11351 switch (name[3])
11352 {
11353 case 'h':
11354 if (name[4] == 'o' &&
11355 name[5] == 's' &&
11356 name[6] == 't' &&
11357 name[7] == 'b' &&
11358 name[8] == 'y')
11359 {
11360 switch (name[9])
11361 {
11362 case 'a':
11363 if (name[10] == 'd' &&
11364 name[11] == 'd' &&
11365 name[12] == 'r')
11366 { /* gethostbyaddr */
11367 return -KEY_gethostbyaddr;
11368 }
11369
11370 goto unknown;
11371
11372 case 'n':
11373 if (name[10] == 'a' &&
11374 name[11] == 'm' &&
11375 name[12] == 'e')
11376 { /* gethostbyname */
11377 return -KEY_gethostbyname;
11378 }
11379
11380 goto unknown;
11381
11382 default:
11383 goto unknown;
11384 }
11385 }
11386
11387 goto unknown;
11388
11389 case 's':
11390 if (name[4] == 'e' &&
11391 name[5] == 'r' &&
11392 name[6] == 'v' &&
11393 name[7] == 'b' &&
11394 name[8] == 'y')
11395 {
11396 switch (name[9])
11397 {
11398 case 'n':
11399 if (name[10] == 'a' &&
11400 name[11] == 'm' &&
11401 name[12] == 'e')
11402 { /* getservbyname */
11403 return -KEY_getservbyname;
11404 }
11405
11406 goto unknown;
11407
11408 case 'p':
11409 if (name[10] == 'o' &&
11410 name[11] == 'r' &&
11411 name[12] == 't')
11412 { /* getservbyport */
11413 return -KEY_getservbyport;
11414 }
11415
11416 goto unknown;
11417
11418 default:
11419 goto unknown;
11420 }
11421 }
11422
11423 goto unknown;
11424
11425 default:
11426 goto unknown;
11427 }
e2e1dd5a 11428 }
4c3bbe0f
MHM
11429
11430 goto unknown;
11431
11432 case 14: /* 1 tokens of length 14 */
11433 if (name[0] == 'g' &&
11434 name[1] == 'e' &&
11435 name[2] == 't' &&
11436 name[3] == 'p' &&
11437 name[4] == 'r' &&
11438 name[5] == 'o' &&
11439 name[6] == 't' &&
11440 name[7] == 'o' &&
11441 name[8] == 'b' &&
11442 name[9] == 'y' &&
11443 name[10] == 'n' &&
11444 name[11] == 'a' &&
11445 name[12] == 'm' &&
11446 name[13] == 'e')
11447 { /* getprotobyname */
11448 return -KEY_getprotobyname;
11449 }
11450
11451 goto unknown;
11452
11453 case 16: /* 1 tokens of length 16 */
11454 if (name[0] == 'g' &&
11455 name[1] == 'e' &&
11456 name[2] == 't' &&
11457 name[3] == 'p' &&
11458 name[4] == 'r' &&
11459 name[5] == 'o' &&
11460 name[6] == 't' &&
11461 name[7] == 'o' &&
11462 name[8] == 'b' &&
11463 name[9] == 'y' &&
11464 name[10] == 'n' &&
11465 name[11] == 'u' &&
11466 name[12] == 'm' &&
11467 name[13] == 'b' &&
11468 name[14] == 'e' &&
11469 name[15] == 'r')
11470 { /* getprotobynumber */
11471 return -KEY_getprotobynumber;
11472 }
11473
11474 goto unknown;
11475
11476 default:
11477 goto unknown;
e2e1dd5a 11478 }
4c3bbe0f
MHM
11479
11480unknown:
e2e1dd5a 11481 return 0;
a687059c
LW
11482}
11483
76e3520e 11484STATIC void
c94115d8 11485S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11486{
97aff369 11487 dVAR;
2f3197b3 11488
7918f24d
NC
11489 PERL_ARGS_ASSERT_CHECKCOMMA;
11490
d008e5eb 11491 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11492 if (ckWARN(WARN_SYNTAX)) {
11493 int level = 1;
26ff0806 11494 const char *w;
d008e5eb
GS
11495 for (w = s+2; *w && level; w++) {
11496 if (*w == '(')
11497 ++level;
11498 else if (*w == ')')
11499 --level;
11500 }
888fea98
NC
11501 while (isSPACE(*w))
11502 ++w;
b1439985
RGS
11503 /* the list of chars below is for end of statements or
11504 * block / parens, boolean operators (&&, ||, //) and branch
11505 * constructs (or, and, if, until, unless, while, err, for).
11506 * Not a very solid hack... */
11507 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11508 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11509 "%s (...) interpreted as function",name);
d008e5eb 11510 }
2f3197b3 11511 }
3280af22 11512 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11513 s++;
a687059c
LW
11514 if (*s == '(')
11515 s++;
3280af22 11516 while (s < PL_bufend && isSPACE(*s))
a687059c 11517 s++;
7e2040f0 11518 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11519 const char * const w = s++;
7e2040f0 11520 while (isALNUM_lazy_if(s,UTF))
a687059c 11521 s++;
3280af22 11522 while (s < PL_bufend && isSPACE(*s))
a687059c 11523 s++;
e929a76b 11524 if (*s == ',') {
c94115d8 11525 GV* gv;
5458a98a 11526 if (keyword(w, s - w, 0))
e929a76b 11527 return;
c94115d8
NC
11528
11529 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11530 if (gv && GvCVu(gv))
abbb3198 11531 return;
cea2e8a9 11532 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11533 }
11534 }
11535}
11536
423cee85
JH
11537/* Either returns sv, or mortalizes sv and returns a new SV*.
11538 Best used as sv=new_constant(..., sv, ...).
11539 If s, pv are NULL, calls subroutine with one argument,
11540 and type is used with error messages only. */
11541
b3ac6de7 11542STATIC SV *
eb0d8d16
NC
11543S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11544 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11545{
27da23d5 11546 dVAR; dSP;
890ce7af 11547 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11548 SV *res;
b3ac6de7
IZ
11549 SV **cvp;
11550 SV *cv, *typesv;
89e33a05 11551 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11552
7918f24d
NC
11553 PERL_ARGS_ASSERT_NEW_CONSTANT;
11554
f0af216f 11555 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11556 SV *msg;
11557
10edeb5d
JH
11558 why2 = (const char *)
11559 (strEQ(key,"charnames")
11560 ? "(possibly a missing \"use charnames ...\")"
11561 : "");
4e553d73 11562 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11563 (type ? type: "undef"), why2);
11564
11565 /* This is convoluted and evil ("goto considered harmful")
11566 * but I do not understand the intricacies of all the different
11567 * failure modes of %^H in here. The goal here is to make
11568 * the most probable error message user-friendly. --jhi */
11569
11570 goto msgdone;
11571
423cee85 11572 report:
4e553d73 11573 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11574 (type ? type: "undef"), why1, why2, why3);
41ab332f 11575 msgdone:
95a20fc0 11576 yyerror(SvPVX_const(msg));
423cee85
JH
11577 SvREFCNT_dec(msg);
11578 return sv;
11579 }
ff3f963a
KW
11580
11581 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11582 if (PL_error_count > 0 && strEQ(key,"charnames"))
11583 return &PL_sv_undef;
ff3f963a 11584
eb0d8d16 11585 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11586 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11587 why1 = "$^H{";
11588 why2 = key;
f0af216f 11589 why3 = "} is not defined";
423cee85 11590 goto report;
b3ac6de7
IZ
11591 }
11592 sv_2mortal(sv); /* Parent created it permanently */
11593 cv = *cvp;
423cee85 11594 if (!pv && s)
59cd0e26 11595 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11596 if (type && pv)
59cd0e26 11597 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11598 else
423cee85 11599 typesv = &PL_sv_undef;
4e553d73 11600
e788e7d3 11601 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11602 ENTER ;
11603 SAVETMPS;
4e553d73 11604
423cee85 11605 PUSHMARK(SP) ;
a5845cb7 11606 EXTEND(sp, 3);
423cee85
JH
11607 if (pv)
11608 PUSHs(pv);
b3ac6de7 11609 PUSHs(sv);
423cee85
JH
11610 if (pv)
11611 PUSHs(typesv);
b3ac6de7 11612 PUTBACK;
423cee85 11613 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11614
423cee85 11615 SPAGAIN ;
4e553d73 11616
423cee85 11617 /* Check the eval first */
9b0e499b 11618 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11619 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11620 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11621 (void)POPs;
b37c2d43 11622 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11623 }
11624 else {
11625 res = POPs;
b37c2d43 11626 SvREFCNT_inc_simple_void(res);
423cee85 11627 }
4e553d73 11628
423cee85
JH
11629 PUTBACK ;
11630 FREETMPS ;
11631 LEAVE ;
b3ac6de7 11632 POPSTACK;
4e553d73 11633
b3ac6de7 11634 if (!SvOK(res)) {
423cee85
JH
11635 why1 = "Call to &{$^H{";
11636 why2 = key;
f0af216f 11637 why3 = "}} did not return a defined value";
423cee85
JH
11638 sv = res;
11639 goto report;
9b0e499b 11640 }
423cee85 11641
9b0e499b 11642 return res;
b3ac6de7 11643}
4e553d73 11644
d0a148a6
NC
11645/* Returns a NUL terminated string, with the length of the string written to
11646 *slp
11647 */
76e3520e 11648STATIC char *
cea2e8a9 11649S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11650{
97aff369 11651 dVAR;
463ee0b2 11652 register char *d = dest;
890ce7af 11653 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11654
11655 PERL_ARGS_ASSERT_SCAN_WORD;
11656
463ee0b2 11657 for (;;) {
8903cb82 11658 if (d >= e)
cea2e8a9 11659 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11660 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11661 *d++ = *s++;
c35e046a 11662 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11663 *d++ = ':';
11664 *d++ = ':';
11665 s++;
11666 }
c35e046a 11667 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11668 *d++ = *s++;
11669 *d++ = *s++;
11670 }
fd400ab9 11671 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11672 char *t = s + UTF8SKIP(s);
c35e046a 11673 size_t len;
fd400ab9 11674 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11675 t += UTF8SKIP(t);
c35e046a
AL
11676 len = t - s;
11677 if (d + len > e)
cea2e8a9 11678 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11679 Copy(s, d, len, char);
11680 d += len;
a0ed51b3
LW
11681 s = t;
11682 }
463ee0b2
LW
11683 else {
11684 *d = '\0';
11685 *slp = d - dest;
11686 return s;
e929a76b 11687 }
378cc40b
LW
11688 }
11689}
11690
76e3520e 11691STATIC char *
f54cb97a 11692S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11693{
97aff369 11694 dVAR;
6136c704 11695 char *bracket = NULL;
748a9306 11696 char funny = *s++;
6136c704 11697 register char *d = dest;
0b3da58d 11698 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11699
7918f24d
NC
11700 PERL_ARGS_ASSERT_SCAN_IDENT;
11701
a0d0e21e 11702 if (isSPACE(*s))
29595ff2 11703 s = PEEKSPACE(s);
de3bb511 11704 if (isDIGIT(*s)) {
8903cb82 11705 while (isDIGIT(*s)) {
11706 if (d >= e)
cea2e8a9 11707 Perl_croak(aTHX_ ident_too_long);
378cc40b 11708 *d++ = *s++;
8903cb82 11709 }
378cc40b
LW
11710 }
11711 else {
463ee0b2 11712 for (;;) {
8903cb82 11713 if (d >= e)
cea2e8a9 11714 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11715 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11716 *d++ = *s++;
7e2040f0 11717 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11718 *d++ = ':';
11719 *d++ = ':';
11720 s++;
11721 }
a0d0e21e 11722 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11723 *d++ = *s++;
11724 *d++ = *s++;
11725 }
fd400ab9 11726 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11727 char *t = s + UTF8SKIP(s);
fd400ab9 11728 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11729 t += UTF8SKIP(t);
11730 if (d + (t - s) > e)
cea2e8a9 11731 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11732 Copy(s, d, t - s, char);
11733 d += t - s;
11734 s = t;
11735 }
463ee0b2
LW
11736 else
11737 break;
11738 }
378cc40b
LW
11739 }
11740 *d = '\0';
11741 d = dest;
79072805 11742 if (*d) {
3280af22
NIS
11743 if (PL_lex_state != LEX_NORMAL)
11744 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11745 return s;
378cc40b 11746 }
748a9306 11747 if (*s == '$' && s[1] &&
3792a11b 11748 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11749 {
4810e5ec 11750 return s;
5cd24f17 11751 }
79072805
LW
11752 if (*s == '{') {
11753 bracket = s;
11754 s++;
11755 }
11756 else if (ck_uni)
11757 check_uni();
93a17b20 11758 if (s < send)
79072805
LW
11759 *d = *s++;
11760 d[1] = '\0';
2b92dfce 11761 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11762 *d = toCTRL(*s);
11763 s++;
de3bb511 11764 }
79072805 11765 if (bracket) {
748a9306 11766 if (isSPACE(s[-1])) {
fa83b5b6 11767 while (s < send) {
f54cb97a 11768 const char ch = *s++;
bf4acbe4 11769 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11770 *d = ch;
11771 break;
11772 }
11773 }
748a9306 11774 }
7e2040f0 11775 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11776 d++;
a0ed51b3 11777 if (UTF) {
6136c704
AL
11778 char *end = s;
11779 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11780 end += UTF8SKIP(end);
11781 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11782 end += UTF8SKIP(end);
a0ed51b3 11783 }
6136c704
AL
11784 Copy(s, d, end - s, char);
11785 d += end - s;
11786 s = end;
a0ed51b3
LW
11787 }
11788 else {
2b92dfce 11789 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11790 *d++ = *s++;
2b92dfce 11791 if (d >= e)
cea2e8a9 11792 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11793 }
79072805 11794 *d = '\0';
c35e046a
AL
11795 while (s < send && SPACE_OR_TAB(*s))
11796 s++;
ff68c719 11797 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11798 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11799 const char * const brack =
11800 (const char *)
11801 ((*s == '[') ? "[...]" : "{...}");
9014280d 11802 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11803 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11804 funny, dest, brack, funny, dest, brack);
11805 }
79072805 11806 bracket++;
a0be28da 11807 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11808 return s;
11809 }
4e553d73
NIS
11810 }
11811 /* Handle extended ${^Foo} variables
2b92dfce
GS
11812 * 1999-02-27 mjd-perl-patch@plover.com */
11813 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11814 && isALNUM(*s))
11815 {
11816 d++;
11817 while (isALNUM(*s) && d < e) {
11818 *d++ = *s++;
11819 }
11820 if (d >= e)
cea2e8a9 11821 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11822 *d = '\0';
79072805
LW
11823 }
11824 if (*s == '}') {
11825 s++;
7df0d042 11826 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11827 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11828 PL_expect = XREF;
11829 }
d008e5eb 11830 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11831 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11832 (keyword(dest, d - dest, 0)
11833 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11834 {
c35e046a
AL
11835 if (funny == '#')
11836 funny = '@';
9014280d 11837 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11838 "Ambiguous use of %c{%s} resolved to %c%s",
11839 funny, dest, funny, dest);
11840 }
11841 }
79072805
LW
11842 }
11843 else {
11844 s = bracket; /* let the parser handle it */
93a17b20 11845 *dest = '\0';
79072805
LW
11846 }
11847 }
3280af22
NIS
11848 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11849 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11850 return s;
11851}
11852
879d0c72
NC
11853static U32
11854S_pmflag(U32 pmfl, const char ch) {
11855 switch (ch) {
11856 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11857 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11858 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11859 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11860 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11861 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11862 }
11863 return pmfl;
11864}
11865
76e3520e 11866STATIC char *
cea2e8a9 11867S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11868{
97aff369 11869 dVAR;
79072805 11870 PMOP *pm;
5db06880 11871 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11872 const char * const valid_flags =
a20207d7 11873 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11874#ifdef PERL_MAD
11875 char *modstart;
11876#endif
11877
7918f24d 11878 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11879
25c09cbf 11880 if (!s) {
6136c704 11881 const char * const delimiter = skipspace(start);
10edeb5d
JH
11882 Perl_croak(aTHX_
11883 (const char *)
11884 (*delimiter == '?'
11885 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11886 : "Search pattern not terminated" ));
25c09cbf 11887 }
bbce6d69 11888
8782bef2 11889 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11890 if (PL_multi_open == '?') {
11891 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11892 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11893
11894 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11895 allows us to restrict the list needed by reset to just the ??
11896 matches. */
11897 assert(type != OP_TRANS);
11898 if (PL_curstash) {
daba3364 11899 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11900 U32 elements;
11901 if (!mg) {
daba3364 11902 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11903 0);
11904 }
11905 elements = mg->mg_len / sizeof(PMOP**);
11906 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11907 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11908 mg->mg_len = elements * sizeof(PMOP**);
11909 PmopSTASH_set(pm,PL_curstash);
11910 }
11911 }
5db06880
NC
11912#ifdef PERL_MAD
11913 modstart = s;
11914#endif
6136c704 11915 while (*s && strchr(valid_flags, *s))
879d0c72 11916 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11917
11918 if (isALNUM(*s)) {
11919 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11920 "Having no space between pattern and following word is deprecated");
11921
11922 }
5db06880
NC
11923#ifdef PERL_MAD
11924 if (PL_madskills && modstart != s) {
11925 SV* tmptoken = newSVpvn(modstart, s - modstart);
11926 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11927 }
11928#endif
4ac733c9 11929 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11930 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11931 {
a2a5de95
NC
11932 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11933 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11934 }
11935
3280af22 11936 PL_lex_op = (OP*)pm;
6154021b 11937 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11938 return s;
11939}
11940
76e3520e 11941STATIC char *
cea2e8a9 11942S_scan_subst(pTHX_ char *start)
79072805 11943{
27da23d5 11944 dVAR;
a0d0e21e 11945 register char *s;
79072805 11946 register PMOP *pm;
4fdae800 11947 I32 first_start;
79072805 11948 I32 es = 0;
5db06880
NC
11949#ifdef PERL_MAD
11950 char *modstart;
11951#endif
79072805 11952
7918f24d
NC
11953 PERL_ARGS_ASSERT_SCAN_SUBST;
11954
6154021b 11955 pl_yylval.ival = OP_NULL;
79072805 11956
5db06880 11957 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11958
37fd879b 11959 if (!s)
cea2e8a9 11960 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11961
3280af22 11962 if (s[-1] == PL_multi_open)
79072805 11963 s--;
5db06880
NC
11964#ifdef PERL_MAD
11965 if (PL_madskills) {
cd81e915
NC
11966 CURMAD('q', PL_thisopen);
11967 CURMAD('_', PL_thiswhite);
11968 CURMAD('E', PL_thisstuff);
11969 CURMAD('Q', PL_thisclose);
11970 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11971 }
11972#endif
79072805 11973
3280af22 11974 first_start = PL_multi_start;
5db06880 11975 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11976 if (!s) {
37fd879b 11977 if (PL_lex_stuff) {
3280af22 11978 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11979 PL_lex_stuff = NULL;
37fd879b 11980 }
cea2e8a9 11981 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11982 }
3280af22 11983 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11984
79072805 11985 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11986
11987#ifdef PERL_MAD
11988 if (PL_madskills) {
cd81e915
NC
11989 CURMAD('z', PL_thisopen);
11990 CURMAD('R', PL_thisstuff);
11991 CURMAD('Z', PL_thisclose);
5db06880
NC
11992 }
11993 modstart = s;
11994#endif
11995
48c036b1 11996 while (*s) {
a20207d7 11997 if (*s == EXEC_PAT_MOD) {
a687059c 11998 s++;
2f3197b3 11999 es++;
a687059c 12000 }
a20207d7 12001 else if (strchr(S_PAT_MODS, *s))
879d0c72 12002 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
12003 else {
12004 if (isALNUM(*s)) {
12005 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
12006 "Having no space between pattern and following word is deprecated");
12007
12008 }
48c036b1 12009 break;
aa78b661 12010 }
378cc40b 12011 }
79072805 12012
5db06880
NC
12013#ifdef PERL_MAD
12014 if (PL_madskills) {
12015 if (modstart != s)
12016 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12017 append_madprops(PL_thismad, (OP*)pm, 0);
12018 PL_thismad = 0;
5db06880
NC
12019 }
12020#endif
a2a5de95
NC
12021 if ((pm->op_pmflags & PMf_CONTINUE)) {
12022 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12023 }
12024
79072805 12025 if (es) {
6136c704
AL
12026 SV * const repl = newSVpvs("");
12027
0244c3a4
GS
12028 PL_sublex_info.super_bufptr = s;
12029 PL_sublex_info.super_bufend = PL_bufend;
12030 PL_multi_end = 0;
79072805 12031 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12032 while (es-- > 0) {
12033 if (es)
12034 sv_catpvs(repl, "eval ");
12035 else
12036 sv_catpvs(repl, "do ");
12037 }
6f43d98f 12038 sv_catpvs(repl, "{");
3280af22 12039 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12040 if (strchr(SvPVX(PL_lex_repl), '#'))
12041 sv_catpvs(repl, "\n");
12042 sv_catpvs(repl, "}");
25da4f38 12043 SvEVALED_on(repl);
3280af22
NIS
12044 SvREFCNT_dec(PL_lex_repl);
12045 PL_lex_repl = repl;
378cc40b 12046 }
79072805 12047
3280af22 12048 PL_lex_op = (OP*)pm;
6154021b 12049 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12050 return s;
12051}
12052
76e3520e 12053STATIC char *
cea2e8a9 12054S_scan_trans(pTHX_ char *start)
378cc40b 12055{
97aff369 12056 dVAR;
a0d0e21e 12057 register char* s;
11343788 12058 OP *o;
79072805 12059 short *tbl;
b84c11c8
NC
12060 U8 squash;
12061 U8 del;
12062 U8 complement;
5db06880
NC
12063#ifdef PERL_MAD
12064 char *modstart;
12065#endif
79072805 12066
7918f24d
NC
12067 PERL_ARGS_ASSERT_SCAN_TRANS;
12068
6154021b 12069 pl_yylval.ival = OP_NULL;
79072805 12070
5db06880 12071 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12072 if (!s)
cea2e8a9 12073 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12074
3280af22 12075 if (s[-1] == PL_multi_open)
2f3197b3 12076 s--;
5db06880
NC
12077#ifdef PERL_MAD
12078 if (PL_madskills) {
cd81e915
NC
12079 CURMAD('q', PL_thisopen);
12080 CURMAD('_', PL_thiswhite);
12081 CURMAD('E', PL_thisstuff);
12082 CURMAD('Q', PL_thisclose);
12083 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12084 }
12085#endif
2f3197b3 12086
5db06880 12087 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12088 if (!s) {
37fd879b 12089 if (PL_lex_stuff) {
3280af22 12090 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12091 PL_lex_stuff = NULL;
37fd879b 12092 }
cea2e8a9 12093 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12094 }
5db06880 12095 if (PL_madskills) {
cd81e915
NC
12096 CURMAD('z', PL_thisopen);
12097 CURMAD('R', PL_thisstuff);
12098 CURMAD('Z', PL_thisclose);
5db06880 12099 }
79072805 12100
a0ed51b3 12101 complement = del = squash = 0;
5db06880
NC
12102#ifdef PERL_MAD
12103 modstart = s;
12104#endif
7a1e2023
NC
12105 while (1) {
12106 switch (*s) {
12107 case 'c':
79072805 12108 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12109 break;
12110 case 'd':
a0ed51b3 12111 del = OPpTRANS_DELETE;
7a1e2023
NC
12112 break;
12113 case 's':
79072805 12114 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12115 break;
12116 default:
12117 goto no_more;
12118 }
395c3793
LW
12119 s++;
12120 }
7a1e2023 12121 no_more:
8973db79 12122
aa1f7c5b 12123 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12124 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12125 o->op_private &= ~OPpTRANS_ALL;
12126 o->op_private |= del|squash|complement|
7948272d
NIS
12127 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12128 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12129
3280af22 12130 PL_lex_op = o;
6154021b 12131 pl_yylval.ival = OP_TRANS;
5db06880
NC
12132
12133#ifdef PERL_MAD
12134 if (PL_madskills) {
12135 if (modstart != s)
12136 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12137 append_madprops(PL_thismad, o, 0);
12138 PL_thismad = 0;
5db06880
NC
12139 }
12140#endif
12141
79072805
LW
12142 return s;
12143}
12144
76e3520e 12145STATIC char *
cea2e8a9 12146S_scan_heredoc(pTHX_ register char *s)
79072805 12147{
97aff369 12148 dVAR;
79072805
LW
12149 SV *herewas;
12150 I32 op_type = OP_SCALAR;
12151 I32 len;
12152 SV *tmpstr;
12153 char term;
73d840c0 12154 const char *found_newline;
79072805 12155 register char *d;
fc36a67e 12156 register char *e;
4633a7c4 12157 char *peek;
f54cb97a 12158 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12159#ifdef PERL_MAD
12160 I32 stuffstart = s - SvPVX(PL_linestr);
12161 char *tstart;
12162
cd81e915 12163 PL_realtokenstart = -1;
5db06880 12164#endif
79072805 12165
7918f24d
NC
12166 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12167
79072805 12168 s += 2;
3280af22
NIS
12169 d = PL_tokenbuf;
12170 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12171 if (!outer)
79072805 12172 *d++ = '\n';
c35e046a
AL
12173 peek = s;
12174 while (SPACE_OR_TAB(*peek))
12175 peek++;
3792a11b 12176 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12177 s = peek;
79072805 12178 term = *s++;
3280af22 12179 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12180 d += len;
3280af22 12181 if (s < PL_bufend)
79072805 12182 s++;
79072805
LW
12183 }
12184 else {
12185 if (*s == '\\')
12186 s++, term = '\'';
12187 else
12188 term = '"';
7e2040f0 12189 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12190 deprecate("bare << to mean <<\"\"");
7e2040f0 12191 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12192 if (d < e)
12193 *d++ = *s;
12194 }
12195 }
3280af22 12196 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12197 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12198 *d++ = '\n';
12199 *d = '\0';
3280af22 12200 len = d - PL_tokenbuf;
5db06880
NC
12201
12202#ifdef PERL_MAD
12203 if (PL_madskills) {
12204 tstart = PL_tokenbuf + !outer;
cd81e915 12205 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12206 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12207 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12208 stuffstart = s - SvPVX(PL_linestr);
12209 }
12210#endif
6a27c188 12211#ifndef PERL_STRICT_CR
f63a84b2
LW
12212 d = strchr(s, '\r');
12213 if (d) {
b464bac0 12214 char * const olds = s;
f63a84b2 12215 s = d;
3280af22 12216 while (s < PL_bufend) {
f63a84b2
LW
12217 if (*s == '\r') {
12218 *d++ = '\n';
12219 if (*++s == '\n')
12220 s++;
12221 }
12222 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12223 *d++ = *s++;
12224 s++;
12225 }
12226 else
12227 *d++ = *s++;
12228 }
12229 *d = '\0';
3280af22 12230 PL_bufend = d;
95a20fc0 12231 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12232 s = olds;
12233 }
12234#endif
5db06880
NC
12235#ifdef PERL_MAD
12236 found_newline = 0;
12237#endif
10edeb5d 12238 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12239 herewas = newSVpvn(s,PL_bufend-s);
12240 }
12241 else {
5db06880
NC
12242#ifdef PERL_MAD
12243 herewas = newSVpvn(s-1,found_newline-s+1);
12244#else
73d840c0
AL
12245 s--;
12246 herewas = newSVpvn(s,found_newline-s);
5db06880 12247#endif
73d840c0 12248 }
5db06880
NC
12249#ifdef PERL_MAD
12250 if (PL_madskills) {
12251 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12252 if (PL_thisstuff)
12253 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12254 else
cd81e915 12255 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12256 }
12257#endif
79072805 12258 s += SvCUR(herewas);
748a9306 12259
5db06880
NC
12260#ifdef PERL_MAD
12261 stuffstart = s - SvPVX(PL_linestr);
12262
12263 if (found_newline)
12264 s--;
12265#endif
12266
7d0a29fe
NC
12267 tmpstr = newSV_type(SVt_PVIV);
12268 SvGROW(tmpstr, 80);
748a9306 12269 if (term == '\'') {
79072805 12270 op_type = OP_CONST;
45977657 12271 SvIV_set(tmpstr, -1);
748a9306
LW
12272 }
12273 else if (term == '`') {
79072805 12274 op_type = OP_BACKTICK;
45977657 12275 SvIV_set(tmpstr, '\\');
748a9306 12276 }
79072805
LW
12277
12278 CLINE;
57843af0 12279 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12280 PL_multi_open = PL_multi_close = '<';
12281 term = *PL_tokenbuf;
0244c3a4 12282 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12283 char * const bufptr = PL_sublex_info.super_bufptr;
12284 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12285 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12286 s = strchr(bufptr, '\n');
12287 if (!s)
12288 s = bufend;
12289 d = s;
12290 while (s < bufend &&
12291 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12292 if (*s++ == '\n')
57843af0 12293 CopLINE_inc(PL_curcop);
0244c3a4
GS
12294 }
12295 if (s >= bufend) {
eb160463 12296 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12297 missingterm(PL_tokenbuf);
12298 }
12299 sv_setpvn(herewas,bufptr,d-bufptr+1);
12300 sv_setpvn(tmpstr,d+1,s-d);
12301 s += len - 1;
12302 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12303 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12304
12305 s = olds;
12306 goto retval;
12307 }
12308 else if (!outer) {
79072805 12309 d = s;
3280af22
NIS
12310 while (s < PL_bufend &&
12311 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12312 if (*s++ == '\n')
57843af0 12313 CopLINE_inc(PL_curcop);
79072805 12314 }
3280af22 12315 if (s >= PL_bufend) {
eb160463 12316 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12317 missingterm(PL_tokenbuf);
79072805
LW
12318 }
12319 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12320#ifdef PERL_MAD
12321 if (PL_madskills) {
cd81e915
NC
12322 if (PL_thisstuff)
12323 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12324 else
cd81e915 12325 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12326 stuffstart = s - SvPVX(PL_linestr);
12327 }
12328#endif
79072805 12329 s += len - 1;
57843af0 12330 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12331
3280af22
NIS
12332 sv_catpvn(herewas,s,PL_bufend-s);
12333 sv_setsv(PL_linestr,herewas);
12334 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12335 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12336 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12337 }
12338 else
76f68e9b 12339 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12340 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12341#ifdef PERL_MAD
12342 if (PL_madskills) {
12343 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12344 if (PL_thisstuff)
12345 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12346 else
cd81e915 12347 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12348 }
12349#endif
f0e67a1d 12350 PL_bufptr = s;
17cc9359 12351 CopLINE_inc(PL_curcop);
f0e67a1d 12352 if (!outer || !lex_next_chunk(0)) {
eb160463 12353 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12354 missingterm(PL_tokenbuf);
79072805 12355 }
17cc9359 12356 CopLINE_dec(PL_curcop);
f0e67a1d 12357 s = PL_bufptr;
5db06880
NC
12358#ifdef PERL_MAD
12359 stuffstart = s - SvPVX(PL_linestr);
12360#endif
57843af0 12361 CopLINE_inc(PL_curcop);
3280af22 12362 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12363 PL_last_lop = PL_last_uni = NULL;
6a27c188 12364#ifndef PERL_STRICT_CR
3280af22 12365 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12366 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12367 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12368 {
3280af22
NIS
12369 PL_bufend[-2] = '\n';
12370 PL_bufend--;
95a20fc0 12371 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12372 }
3280af22
NIS
12373 else if (PL_bufend[-1] == '\r')
12374 PL_bufend[-1] = '\n';
f63a84b2 12375 }
3280af22
NIS
12376 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12377 PL_bufend[-1] = '\n';
f63a84b2 12378#endif
3280af22 12379 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12380 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12381 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12382 sv_catsv(PL_linestr,herewas);
12383 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12384 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12385 }
12386 else {
3280af22
NIS
12387 s = PL_bufend;
12388 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12389 }
12390 }
79072805 12391 s++;
0244c3a4 12392retval:
57843af0 12393 PL_multi_end = CopLINE(PL_curcop);
79072805 12394 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12395 SvPV_shrink_to_cur(tmpstr);
79072805 12396 }
8990e307 12397 SvREFCNT_dec(herewas);
2f31ce75 12398 if (!IN_BYTES) {
95a20fc0 12399 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12400 SvUTF8_on(tmpstr);
12401 else if (PL_encoding)
12402 sv_recode_to_utf8(tmpstr, PL_encoding);
12403 }
3280af22 12404 PL_lex_stuff = tmpstr;
6154021b 12405 pl_yylval.ival = op_type;
79072805
LW
12406 return s;
12407}
12408
02aa26ce
NT
12409/* scan_inputsymbol
12410 takes: current position in input buffer
12411 returns: new position in input buffer
6154021b 12412 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12413
12414 This code handles:
12415
12416 <> read from ARGV
12417 <FH> read from filehandle
12418 <pkg::FH> read from package qualified filehandle
12419 <pkg'FH> read from package qualified filehandle
12420 <$fh> read from filehandle in $fh
12421 <*.h> filename glob
12422
12423*/
12424
76e3520e 12425STATIC char *
cea2e8a9 12426S_scan_inputsymbol(pTHX_ char *start)
79072805 12427{
97aff369 12428 dVAR;
02aa26ce 12429 register char *s = start; /* current position in buffer */
1b420867 12430 char *end;
79072805 12431 I32 len;
6136c704
AL
12432 char *d = PL_tokenbuf; /* start of temp holding space */
12433 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12434
7918f24d
NC
12435 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12436
1b420867
GS
12437 end = strchr(s, '\n');
12438 if (!end)
12439 end = PL_bufend;
12440 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12441
12442 /* die if we didn't have space for the contents of the <>,
1b420867 12443 or if it didn't end, or if we see a newline
02aa26ce
NT
12444 */
12445
bb7a0f54 12446 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12447 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12448 if (s >= end)
cea2e8a9 12449 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12450
fc36a67e 12451 s++;
02aa26ce
NT
12452
12453 /* check for <$fh>
12454 Remember, only scalar variables are interpreted as filehandles by
12455 this code. Anything more complex (e.g., <$fh{$num}>) will be
12456 treated as a glob() call.
12457 This code makes use of the fact that except for the $ at the front,
12458 a scalar variable and a filehandle look the same.
12459 */
4633a7c4 12460 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12461
12462 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12463 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12464 d++;
02aa26ce
NT
12465
12466 /* If we've tried to read what we allow filehandles to look like, and
12467 there's still text left, then it must be a glob() and not a getline.
12468 Use scan_str to pull out the stuff between the <> and treat it
12469 as nothing more than a string.
12470 */
12471
3280af22 12472 if (d - PL_tokenbuf != len) {
6154021b 12473 pl_yylval.ival = OP_GLOB;
5db06880 12474 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12475 if (!s)
cea2e8a9 12476 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12477 return s;
12478 }
395c3793 12479 else {
9b3023bc 12480 bool readline_overriden = FALSE;
6136c704 12481 GV *gv_readline;
9b3023bc 12482 GV **gvp;
02aa26ce 12483 /* we're in a filehandle read situation */
3280af22 12484 d = PL_tokenbuf;
02aa26ce
NT
12485
12486 /* turn <> into <ARGV> */
79072805 12487 if (!len)
689badd5 12488 Copy("ARGV",d,5,char);
02aa26ce 12489
9b3023bc 12490 /* Check whether readline() is overriden */
fafc274c 12491 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12492 if ((gv_readline
ba979b31 12493 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12494 ||
017a3ce5 12495 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12496 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12497 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12498 readline_overriden = TRUE;
12499
02aa26ce
NT
12500 /* if <$fh>, create the ops to turn the variable into a
12501 filehandle
12502 */
79072805 12503 if (*d == '$') {
02aa26ce
NT
12504 /* try to find it in the pad for this block, otherwise find
12505 add symbol table ops
12506 */
f8f98e0a 12507 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12508 if (tmp != NOT_IN_PAD) {
00b1698f 12509 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12510 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12511 HEK * const stashname = HvNAME_HEK(stash);
12512 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12513 sv_catpvs(sym, "::");
f558d5af
JH
12514 sv_catpv(sym, d+1);
12515 d = SvPVX(sym);
12516 goto intro_sym;
12517 }
12518 else {
6136c704 12519 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12520 o->op_targ = tmp;
9b3023bc
RGS
12521 PL_lex_op = readline_overriden
12522 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12523 append_elem(OP_LIST, o,
12524 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12525 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12526 }
a0d0e21e
LW
12527 }
12528 else {
f558d5af
JH
12529 GV *gv;
12530 ++d;
12531intro_sym:
12532 gv = gv_fetchpv(d,
12533 (PL_in_eval
12534 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12535 : GV_ADDMULTI),
f558d5af 12536 SVt_PV);
9b3023bc
RGS
12537 PL_lex_op = readline_overriden
12538 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12539 append_elem(OP_LIST,
12540 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12541 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12542 : (OP*)newUNOP(OP_READLINE, 0,
12543 newUNOP(OP_RV2SV, 0,
12544 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12545 }
7c6fadd6
RGS
12546 if (!readline_overriden)
12547 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12548 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12549 pl_yylval.ival = OP_NULL;
79072805 12550 }
02aa26ce
NT
12551
12552 /* If it's none of the above, it must be a literal filehandle
12553 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12554 else {
6136c704 12555 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12556 PL_lex_op = readline_overriden
12557 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12558 append_elem(OP_LIST,
12559 newGVOP(OP_GV, 0, gv),
12560 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12561 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12562 pl_yylval.ival = OP_NULL;
79072805
LW
12563 }
12564 }
02aa26ce 12565
79072805
LW
12566 return s;
12567}
12568
02aa26ce
NT
12569
12570/* scan_str
12571 takes: start position in buffer
09bef843
SB
12572 keep_quoted preserve \ on the embedded delimiter(s)
12573 keep_delims preserve the delimiters around the string
02aa26ce
NT
12574 returns: position to continue reading from buffer
12575 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12576 updates the read buffer.
12577
12578 This subroutine pulls a string out of the input. It is called for:
12579 q single quotes q(literal text)
12580 ' single quotes 'literal text'
12581 qq double quotes qq(interpolate $here please)
12582 " double quotes "interpolate $here please"
12583 qx backticks qx(/bin/ls -l)
12584 ` backticks `/bin/ls -l`
12585 qw quote words @EXPORT_OK = qw( func() $spam )
12586 m// regexp match m/this/
12587 s/// regexp substitute s/this/that/
12588 tr/// string transliterate tr/this/that/
12589 y/// string transliterate y/this/that/
12590 ($*@) sub prototypes sub foo ($)
09bef843 12591 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12592 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12593
12594 In most of these cases (all but <>, patterns and transliterate)
12595 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12596 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12597 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12598 calls scan_str().
4e553d73 12599
02aa26ce
NT
12600 It skips whitespace before the string starts, and treats the first
12601 character as the delimiter. If the delimiter is one of ([{< then
12602 the corresponding "close" character )]}> is used as the closing
12603 delimiter. It allows quoting of delimiters, and if the string has
12604 balanced delimiters ([{<>}]) it allows nesting.
12605
37fd879b
HS
12606 On success, the SV with the resulting string is put into lex_stuff or,
12607 if that is already non-NULL, into lex_repl. The second case occurs only
12608 when parsing the RHS of the special constructs s/// and tr/// (y///).
12609 For convenience, the terminating delimiter character is stuffed into
12610 SvIVX of the SV.
02aa26ce
NT
12611*/
12612
76e3520e 12613STATIC char *
09bef843 12614S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12615{
97aff369 12616 dVAR;
02aa26ce 12617 SV *sv; /* scalar value: string */
d3fcec1f 12618 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12619 register char *s = start; /* current position in the buffer */
12620 register char term; /* terminating character */
12621 register char *to; /* current position in the sv's data */
12622 I32 brackets = 1; /* bracket nesting level */
89491803 12623 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12624 I32 termcode; /* terminating char. code */
89ebb4a3 12625 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12626 STRLEN termlen; /* length of terminating string */
0331ef07 12627 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12628#ifdef PERL_MAD
12629 int stuffstart;
12630 char *tstart;
12631#endif
02aa26ce 12632
7918f24d
NC
12633 PERL_ARGS_ASSERT_SCAN_STR;
12634
02aa26ce 12635 /* skip space before the delimiter */
29595ff2
NC
12636 if (isSPACE(*s)) {
12637 s = PEEKSPACE(s);
12638 }
02aa26ce 12639
5db06880 12640#ifdef PERL_MAD
cd81e915
NC
12641 if (PL_realtokenstart >= 0) {
12642 stuffstart = PL_realtokenstart;
12643 PL_realtokenstart = -1;
5db06880
NC
12644 }
12645 else
12646 stuffstart = start - SvPVX(PL_linestr);
12647#endif
02aa26ce 12648 /* mark where we are, in case we need to report errors */
79072805 12649 CLINE;
02aa26ce
NT
12650
12651 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12652 term = *s;
220e2d4e
IH
12653 if (!UTF) {
12654 termcode = termstr[0] = term;
12655 termlen = 1;
12656 }
12657 else {
f3b9ce0f 12658 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12659 Copy(s, termstr, termlen, U8);
12660 if (!UTF8_IS_INVARIANT(term))
12661 has_utf8 = TRUE;
12662 }
b1c7b182 12663
02aa26ce 12664 /* mark where we are */
57843af0 12665 PL_multi_start = CopLINE(PL_curcop);
3280af22 12666 PL_multi_open = term;
02aa26ce
NT
12667
12668 /* find corresponding closing delimiter */
93a17b20 12669 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12670 termcode = termstr[0] = term = tmps[5];
12671
3280af22 12672 PL_multi_close = term;
79072805 12673
561b68a9
SH
12674 /* create a new SV to hold the contents. 79 is the SV's initial length.
12675 What a random number. */
7d0a29fe
NC
12676 sv = newSV_type(SVt_PVIV);
12677 SvGROW(sv, 80);
45977657 12678 SvIV_set(sv, termcode);
a0d0e21e 12679 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12680
12681 /* move past delimiter and try to read a complete string */
09bef843 12682 if (keep_delims)
220e2d4e
IH
12683 sv_catpvn(sv, s, termlen);
12684 s += termlen;
5db06880
NC
12685#ifdef PERL_MAD
12686 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12687 if (!PL_thisopen && !keep_delims) {
12688 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12689 stuffstart = s - SvPVX(PL_linestr);
12690 }
12691#endif
93a17b20 12692 for (;;) {
220e2d4e
IH
12693 if (PL_encoding && !UTF) {
12694 bool cont = TRUE;
12695
12696 while (cont) {
95a20fc0 12697 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12698 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12699 &offset, (char*)termstr, termlen);
6136c704
AL
12700 const char * const ns = SvPVX_const(PL_linestr) + offset;
12701 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12702
12703 for (; s < ns; s++) {
12704 if (*s == '\n' && !PL_rsfp)
12705 CopLINE_inc(PL_curcop);
12706 }
12707 if (!found)
12708 goto read_more_line;
12709 else {
12710 /* handle quoted delimiters */
52327caf 12711 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12712 const char *t;
95a20fc0 12713 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12714 t--;
12715 if ((svlast-1 - t) % 2) {
12716 if (!keep_quoted) {
12717 *(svlast-1) = term;
12718 *svlast = '\0';
12719 SvCUR_set(sv, SvCUR(sv) - 1);
12720 }
12721 continue;
12722 }
12723 }
12724 if (PL_multi_open == PL_multi_close) {
12725 cont = FALSE;
12726 }
12727 else {
f54cb97a
AL
12728 const char *t;
12729 char *w;
0331ef07 12730 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12731 /* At here, all closes are "was quoted" one,
12732 so we don't check PL_multi_close. */
12733 if (*t == '\\') {
12734 if (!keep_quoted && *(t+1) == PL_multi_open)
12735 t++;
12736 else
12737 *w++ = *t++;
12738 }
12739 else if (*t == PL_multi_open)
12740 brackets++;
12741
12742 *w = *t;
12743 }
12744 if (w < t) {
12745 *w++ = term;
12746 *w = '\0';
95a20fc0 12747 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12748 }
0331ef07 12749 last_off = w - SvPVX(sv);
220e2d4e
IH
12750 if (--brackets <= 0)
12751 cont = FALSE;
12752 }
12753 }
12754 }
12755 if (!keep_delims) {
12756 SvCUR_set(sv, SvCUR(sv) - 1);
12757 *SvEND(sv) = '\0';
12758 }
12759 break;
12760 }
12761
02aa26ce 12762 /* extend sv if need be */
3280af22 12763 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12764 /* set 'to' to the next character in the sv's string */
463ee0b2 12765 to = SvPVX(sv)+SvCUR(sv);
09bef843 12766
02aa26ce 12767 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12768 if (PL_multi_open == PL_multi_close) {
12769 for (; s < PL_bufend; s++,to++) {
02aa26ce 12770 /* embedded newlines increment the current line number */
3280af22 12771 if (*s == '\n' && !PL_rsfp)
57843af0 12772 CopLINE_inc(PL_curcop);
02aa26ce 12773 /* handle quoted delimiters */
3280af22 12774 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12775 if (!keep_quoted && s[1] == term)
a0d0e21e 12776 s++;
02aa26ce 12777 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12778 else
12779 *to++ = *s++;
12780 }
02aa26ce
NT
12781 /* terminate when run out of buffer (the for() condition), or
12782 have found the terminator */
220e2d4e
IH
12783 else if (*s == term) {
12784 if (termlen == 1)
12785 break;
f3b9ce0f 12786 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12787 break;
12788 }
63cd0674 12789 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12790 has_utf8 = TRUE;
93a17b20
LW
12791 *to = *s;
12792 }
12793 }
02aa26ce
NT
12794
12795 /* if the terminator isn't the same as the start character (e.g.,
12796 matched brackets), we have to allow more in the quoting, and
12797 be prepared for nested brackets.
12798 */
93a17b20 12799 else {
02aa26ce 12800 /* read until we run out of string, or we find the terminator */
3280af22 12801 for (; s < PL_bufend; s++,to++) {
02aa26ce 12802 /* embedded newlines increment the line count */
3280af22 12803 if (*s == '\n' && !PL_rsfp)
57843af0 12804 CopLINE_inc(PL_curcop);
02aa26ce 12805 /* backslashes can escape the open or closing characters */
3280af22 12806 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12807 if (!keep_quoted &&
12808 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12809 s++;
12810 else
12811 *to++ = *s++;
12812 }
02aa26ce 12813 /* allow nested opens and closes */
3280af22 12814 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12815 break;
3280af22 12816 else if (*s == PL_multi_open)
93a17b20 12817 brackets++;
63cd0674 12818 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12819 has_utf8 = TRUE;
93a17b20
LW
12820 *to = *s;
12821 }
12822 }
02aa26ce 12823 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12824 *to = '\0';
95a20fc0 12825 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12826
02aa26ce
NT
12827 /*
12828 * this next chunk reads more into the buffer if we're not done yet
12829 */
12830
b1c7b182
GS
12831 if (s < PL_bufend)
12832 break; /* handle case where we are done yet :-) */
79072805 12833
6a27c188 12834#ifndef PERL_STRICT_CR
95a20fc0 12835 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12836 if ((to[-2] == '\r' && to[-1] == '\n') ||
12837 (to[-2] == '\n' && to[-1] == '\r'))
12838 {
f63a84b2
LW
12839 to[-2] = '\n';
12840 to--;
95a20fc0 12841 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12842 }
12843 else if (to[-1] == '\r')
12844 to[-1] = '\n';
12845 }
95a20fc0 12846 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12847 to[-1] = '\n';
12848#endif
12849
220e2d4e 12850 read_more_line:
02aa26ce
NT
12851 /* if we're out of file, or a read fails, bail and reset the current
12852 line marker so we can report where the unterminated string began
12853 */
5db06880
NC
12854#ifdef PERL_MAD
12855 if (PL_madskills) {
c35e046a 12856 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12857 if (PL_thisstuff)
12858 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12859 else
cd81e915 12860 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12861 }
12862#endif
f0e67a1d
Z
12863 CopLINE_inc(PL_curcop);
12864 PL_bufptr = PL_bufend;
12865 if (!lex_next_chunk(0)) {
c07a80fd 12866 sv_free(sv);
eb160463 12867 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12868 return NULL;
79072805 12869 }
f0e67a1d 12870 s = PL_bufptr;
5db06880
NC
12871#ifdef PERL_MAD
12872 stuffstart = 0;
12873#endif
378cc40b 12874 }
4e553d73 12875
02aa26ce
NT
12876 /* at this point, we have successfully read the delimited string */
12877
220e2d4e 12878 if (!PL_encoding || UTF) {
5db06880
NC
12879#ifdef PERL_MAD
12880 if (PL_madskills) {
c35e046a 12881 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12882 const int len = s - tstart;
cd81e915 12883 if (PL_thisstuff)
c35e046a 12884 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12885 else
c35e046a 12886 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12887 if (!PL_thisclose && !keep_delims)
12888 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12889 }
12890#endif
12891
220e2d4e
IH
12892 if (keep_delims)
12893 sv_catpvn(sv, s, termlen);
12894 s += termlen;
12895 }
5db06880
NC
12896#ifdef PERL_MAD
12897 else {
12898 if (PL_madskills) {
c35e046a
AL
12899 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12900 const int len = s - tstart - termlen;
cd81e915 12901 if (PL_thisstuff)
c35e046a 12902 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12903 else
c35e046a 12904 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12905 if (!PL_thisclose && !keep_delims)
12906 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12907 }
12908 }
12909#endif
220e2d4e 12910 if (has_utf8 || PL_encoding)
b1c7b182 12911 SvUTF8_on(sv);
d0063567 12912
57843af0 12913 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12914
12915 /* if we allocated too much space, give some back */
93a17b20
LW
12916 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12917 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12918 SvPV_renew(sv, SvLEN(sv));
79072805 12919 }
02aa26ce
NT
12920
12921 /* decide whether this is the first or second quoted string we've read
12922 for this op
12923 */
4e553d73 12924
3280af22
NIS
12925 if (PL_lex_stuff)
12926 PL_lex_repl = sv;
79072805 12927 else
3280af22 12928 PL_lex_stuff = sv;
378cc40b
LW
12929 return s;
12930}
12931
02aa26ce
NT
12932/*
12933 scan_num
12934 takes: pointer to position in buffer
12935 returns: pointer to new position in buffer
6154021b 12936 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12937
12938 Read a number in any of the formats that Perl accepts:
12939
7fd134d9
JH
12940 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12941 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12942 0b[01](_?[01])*
12943 0[0-7](_?[0-7])*
12944 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12945
3280af22 12946 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12947 thing it reads.
12948
12949 If it reads a number without a decimal point or an exponent, it will
12950 try converting the number to an integer and see if it can do so
12951 without loss of precision.
12952*/
4e553d73 12953
378cc40b 12954char *
bfed75c6 12955Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12956{
97aff369 12957 dVAR;
bfed75c6 12958 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12959 register char *d; /* destination in temp buffer */
12960 register char *e; /* end of temp buffer */
86554af2 12961 NV nv; /* number read, as a double */
a0714e2c 12962 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12963 bool floatit; /* boolean: int or float? */
cbbf8932 12964 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12965 static char const number_too_long[] = "Number too long";
378cc40b 12966
7918f24d
NC
12967 PERL_ARGS_ASSERT_SCAN_NUM;
12968
02aa26ce
NT
12969 /* We use the first character to decide what type of number this is */
12970
378cc40b 12971 switch (*s) {
79072805 12972 default:
cea2e8a9 12973 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12974
02aa26ce 12975 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12976 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12977 case '0':
12978 {
02aa26ce
NT
12979 /* variables:
12980 u holds the "number so far"
4f19785b
WSI
12981 shift the power of 2 of the base
12982 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12983 overflowed was the number more than we can hold?
12984
12985 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12986 we in octal/hex/binary?" indicator to disallow hex characters
12987 when in octal mode.
02aa26ce 12988 */
9e24b6e2
JH
12989 NV n = 0.0;
12990 UV u = 0;
79072805 12991 I32 shift;
9e24b6e2 12992 bool overflowed = FALSE;
61f33854 12993 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12994 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12995 static const char* const bases[5] =
12996 { "", "binary", "", "octal", "hexadecimal" };
12997 static const char* const Bases[5] =
12998 { "", "Binary", "", "Octal", "Hexadecimal" };
12999 static const char* const maxima[5] =
13000 { "",
13001 "0b11111111111111111111111111111111",
13002 "",
13003 "037777777777",
13004 "0xffffffff" };
bfed75c6 13005 const char *base, *Base, *max;
378cc40b 13006
02aa26ce 13007 /* check for hex */
a674e8db 13008 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
13009 shift = 4;
13010 s += 2;
61f33854 13011 just_zero = FALSE;
a674e8db 13012 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13013 shift = 1;
13014 s += 2;
61f33854 13015 just_zero = FALSE;
378cc40b 13016 }
02aa26ce 13017 /* check for a decimal in disguise */
b78218b7 13018 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13019 goto decimal;
02aa26ce 13020 /* so it must be octal */
928753ea 13021 else {
378cc40b 13022 shift = 3;
928753ea
JH
13023 s++;
13024 }
13025
13026 if (*s == '_') {
a2a5de95 13027 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13028 "Misplaced _ in number");
13029 lastub = s++;
13030 }
9e24b6e2
JH
13031
13032 base = bases[shift];
13033 Base = Bases[shift];
13034 max = maxima[shift];
02aa26ce 13035
4f19785b 13036 /* read the rest of the number */
378cc40b 13037 for (;;) {
9e24b6e2 13038 /* x is used in the overflow test,
893fe2c2 13039 b is the digit we're adding on. */
9e24b6e2 13040 UV x, b;
55497cff 13041
378cc40b 13042 switch (*s) {
02aa26ce
NT
13043
13044 /* if we don't mention it, we're done */
378cc40b
LW
13045 default:
13046 goto out;
02aa26ce 13047
928753ea 13048 /* _ are ignored -- but warned about if consecutive */
de3bb511 13049 case '_':
a2a5de95
NC
13050 if (lastub && s == lastub + 1)
13051 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13052 "Misplaced _ in number");
928753ea 13053 lastub = s++;
de3bb511 13054 break;
02aa26ce
NT
13055
13056 /* 8 and 9 are not octal */
378cc40b 13057 case '8': case '9':
4f19785b 13058 if (shift == 3)
cea2e8a9 13059 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13060 /* FALL THROUGH */
02aa26ce
NT
13061
13062 /* octal digits */
4f19785b 13063 case '2': case '3': case '4':
378cc40b 13064 case '5': case '6': case '7':
4f19785b 13065 if (shift == 1)
cea2e8a9 13066 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13067 /* FALL THROUGH */
13068
13069 case '0': case '1':
02aa26ce 13070 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13071 goto digit;
02aa26ce
NT
13072
13073 /* hex digits */
378cc40b
LW
13074 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13075 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13076 /* make sure they said 0x */
378cc40b
LW
13077 if (shift != 4)
13078 goto out;
55497cff 13079 b = (*s++ & 7) + 9;
02aa26ce
NT
13080
13081 /* Prepare to put the digit we have onto the end
13082 of the number so far. We check for overflows.
13083 */
13084
55497cff 13085 digit:
61f33854 13086 just_zero = FALSE;
9e24b6e2
JH
13087 if (!overflowed) {
13088 x = u << shift; /* make room for the digit */
13089
13090 if ((x >> shift) != u
13091 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13092 overflowed = TRUE;
13093 n = (NV) u;
9b387841
NC
13094 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13095 "Integer overflow in %s number",
13096 base);
9e24b6e2
JH
13097 } else
13098 u = x | b; /* add the digit to the end */
13099 }
13100 if (overflowed) {
13101 n *= nvshift[shift];
13102 /* If an NV has not enough bits in its
13103 * mantissa to represent an UV this summing of
13104 * small low-order numbers is a waste of time
13105 * (because the NV cannot preserve the
13106 * low-order bits anyway): we could just
13107 * remember when did we overflow and in the
13108 * end just multiply n by the right
13109 * amount. */
13110 n += (NV) b;
55497cff 13111 }
378cc40b
LW
13112 break;
13113 }
13114 }
02aa26ce
NT
13115
13116 /* if we get here, we had success: make a scalar value from
13117 the number.
13118 */
378cc40b 13119 out:
928753ea
JH
13120
13121 /* final misplaced underbar check */
13122 if (s[-1] == '_') {
a2a5de95 13123 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13124 }
13125
9e24b6e2 13126 if (overflowed) {
a2a5de95
NC
13127 if (n > 4294967295.0)
13128 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13129 "%s number > %s non-portable",
13130 Base, max);
b081dd7e 13131 sv = newSVnv(n);
9e24b6e2
JH
13132 }
13133 else {
15041a67 13134#if UVSIZE > 4
a2a5de95
NC
13135 if (u > 0xffffffff)
13136 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13137 "%s number > %s non-portable",
13138 Base, max);
2cc4c2dc 13139#endif
b081dd7e 13140 sv = newSVuv(u);
9e24b6e2 13141 }
61f33854 13142 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13143 sv = new_constant(start, s - start, "integer",
eb0d8d16 13144 sv, NULL, NULL, 0);
61f33854 13145 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13146 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13147 }
13148 break;
02aa26ce
NT
13149
13150 /*
13151 handle decimal numbers.
13152 we're also sent here when we read a 0 as the first digit
13153 */
378cc40b
LW
13154 case '1': case '2': case '3': case '4': case '5':
13155 case '6': case '7': case '8': case '9': case '.':
13156 decimal:
3280af22
NIS
13157 d = PL_tokenbuf;
13158 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13159 floatit = FALSE;
02aa26ce
NT
13160
13161 /* read next group of digits and _ and copy into d */
de3bb511 13162 while (isDIGIT(*s) || *s == '_') {
4e553d73 13163 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13164 if -w is on
13165 */
93a17b20 13166 if (*s == '_') {
a2a5de95
NC
13167 if (lastub && s == lastub + 1)
13168 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13169 "Misplaced _ in number");
928753ea 13170 lastub = s++;
93a17b20 13171 }
fc36a67e 13172 else {
02aa26ce 13173 /* check for end of fixed-length buffer */
fc36a67e 13174 if (d >= e)
cea2e8a9 13175 Perl_croak(aTHX_ number_too_long);
02aa26ce 13176 /* if we're ok, copy the character */
378cc40b 13177 *d++ = *s++;
fc36a67e 13178 }
378cc40b 13179 }
02aa26ce
NT
13180
13181 /* final misplaced underbar check */
928753ea 13182 if (lastub && s == lastub + 1) {
a2a5de95 13183 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13184 }
02aa26ce
NT
13185
13186 /* read a decimal portion if there is one. avoid
13187 3..5 being interpreted as the number 3. followed
13188 by .5
13189 */
2f3197b3 13190 if (*s == '.' && s[1] != '.') {
79072805 13191 floatit = TRUE;
378cc40b 13192 *d++ = *s++;
02aa26ce 13193
928753ea 13194 if (*s == '_') {
a2a5de95
NC
13195 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13196 "Misplaced _ in number");
928753ea
JH
13197 lastub = s;
13198 }
13199
13200 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13201 */
fc36a67e 13202 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13203 /* fixed length buffer check */
fc36a67e 13204 if (d >= e)
cea2e8a9 13205 Perl_croak(aTHX_ number_too_long);
928753ea 13206 if (*s == '_') {
a2a5de95
NC
13207 if (lastub && s == lastub + 1)
13208 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13209 "Misplaced _ in number");
928753ea
JH
13210 lastub = s;
13211 }
13212 else
fc36a67e 13213 *d++ = *s;
378cc40b 13214 }
928753ea
JH
13215 /* fractional part ending in underbar? */
13216 if (s[-1] == '_') {
a2a5de95
NC
13217 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13218 "Misplaced _ in number");
928753ea 13219 }
dd629d5b
GS
13220 if (*s == '.' && isDIGIT(s[1])) {
13221 /* oops, it's really a v-string, but without the "v" */
f4758303 13222 s = start;
dd629d5b
GS
13223 goto vstring;
13224 }
378cc40b 13225 }
02aa26ce
NT
13226
13227 /* read exponent part, if present */
3792a11b 13228 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13229 floatit = TRUE;
13230 s++;
02aa26ce
NT
13231
13232 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13233 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13234
7fd134d9
JH
13235 /* stray preinitial _ */
13236 if (*s == '_') {
a2a5de95
NC
13237 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13238 "Misplaced _ in number");
7fd134d9
JH
13239 lastub = s++;
13240 }
13241
02aa26ce 13242 /* allow positive or negative exponent */
378cc40b
LW
13243 if (*s == '+' || *s == '-')
13244 *d++ = *s++;
02aa26ce 13245
7fd134d9
JH
13246 /* stray initial _ */
13247 if (*s == '_') {
a2a5de95
NC
13248 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13249 "Misplaced _ in number");
7fd134d9
JH
13250 lastub = s++;
13251 }
13252
7fd134d9
JH
13253 /* read digits of exponent */
13254 while (isDIGIT(*s) || *s == '_') {
13255 if (isDIGIT(*s)) {
13256 if (d >= e)
13257 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13258 *d++ = *s++;
7fd134d9
JH
13259 }
13260 else {
041457d9 13261 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13262 (!isDIGIT(s[1]) && s[1] != '_')))
13263 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13264 "Misplaced _ in number");
b3b48e3e 13265 lastub = s++;
7fd134d9 13266 }
7fd134d9 13267 }
378cc40b 13268 }
02aa26ce 13269
02aa26ce 13270
0b7fceb9 13271 /*
58bb9ec3
NC
13272 We try to do an integer conversion first if no characters
13273 indicating "float" have been found.
0b7fceb9
MU
13274 */
13275
13276 if (!floatit) {
58bb9ec3 13277 UV uv;
6136c704 13278 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13279
13280 if (flags == IS_NUMBER_IN_UV) {
13281 if (uv <= IV_MAX)
b081dd7e 13282 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13283 else
b081dd7e 13284 sv = newSVuv(uv);
58bb9ec3
NC
13285 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13286 if (uv <= (UV) IV_MIN)
b081dd7e 13287 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13288 else
13289 floatit = TRUE;
13290 } else
13291 floatit = TRUE;
13292 }
0b7fceb9 13293 if (floatit) {
58bb9ec3
NC
13294 /* terminate the string */
13295 *d = '\0';
86554af2 13296 nv = Atof(PL_tokenbuf);
b081dd7e 13297 sv = newSVnv(nv);
86554af2 13298 }
86554af2 13299
eb0d8d16
NC
13300 if ( floatit
13301 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13302 const char *const key = floatit ? "float" : "integer";
13303 const STRLEN keylen = floatit ? 5 : 7;
13304 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13305 key, keylen, sv, NULL, NULL, 0);
13306 }
378cc40b 13307 break;
0b7fceb9 13308
e312add1 13309 /* if it starts with a v, it could be a v-string */
a7cb1f99 13310 case 'v':
dd629d5b 13311vstring:
561b68a9 13312 sv = newSV(5); /* preallocate storage space */
65b06e02 13313 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13314 break;
79072805 13315 }
a687059c 13316
02aa26ce
NT
13317 /* make the op for the constant and return */
13318
a86a20aa 13319 if (sv)
b73d6f50 13320 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13321 else
5f66b61c 13322 lvalp->opval = NULL;
a687059c 13323
73d840c0 13324 return (char *)s;
378cc40b
LW
13325}
13326
76e3520e 13327STATIC char *
cea2e8a9 13328S_scan_formline(pTHX_ register char *s)
378cc40b 13329{
97aff369 13330 dVAR;
79072805 13331 register char *eol;
378cc40b 13332 register char *t;
6136c704 13333 SV * const stuff = newSVpvs("");
79072805 13334 bool needargs = FALSE;
c5ee2135 13335 bool eofmt = FALSE;
5db06880
NC
13336#ifdef PERL_MAD
13337 char *tokenstart = s;
4f61fd4b
JC
13338 SV* savewhite = NULL;
13339
5db06880 13340 if (PL_madskills) {
cd81e915
NC
13341 savewhite = PL_thiswhite;
13342 PL_thiswhite = 0;
5db06880
NC
13343 }
13344#endif
378cc40b 13345
7918f24d
NC
13346 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13347
79072805 13348 while (!needargs) {
a1b95068 13349 if (*s == '.') {
c35e046a 13350 t = s+1;
51882d45 13351#ifdef PERL_STRICT_CR
c35e046a
AL
13352 while (SPACE_OR_TAB(*t))
13353 t++;
51882d45 13354#else
c35e046a
AL
13355 while (SPACE_OR_TAB(*t) || *t == '\r')
13356 t++;
51882d45 13357#endif
c5ee2135
WL
13358 if (*t == '\n' || t == PL_bufend) {
13359 eofmt = TRUE;
79072805 13360 break;
c5ee2135 13361 }
79072805 13362 }
3280af22 13363 if (PL_in_eval && !PL_rsfp) {
07409e01 13364 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13365 if (!eol++)
3280af22 13366 eol = PL_bufend;
0f85fab0
LW
13367 }
13368 else
3280af22 13369 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13370 if (*s != '#') {
a0d0e21e
LW
13371 for (t = s; t < eol; t++) {
13372 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13373 needargs = FALSE;
13374 goto enough; /* ~~ must be first line in formline */
378cc40b 13375 }
a0d0e21e
LW
13376 if (*t == '@' || *t == '^')
13377 needargs = TRUE;
378cc40b 13378 }
7121b347
MG
13379 if (eol > s) {
13380 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13381#ifndef PERL_STRICT_CR
7121b347
MG
13382 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13383 char *end = SvPVX(stuff) + SvCUR(stuff);
13384 end[-2] = '\n';
13385 end[-1] = '\0';
b162af07 13386 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13387 }
2dc4c65b 13388#endif
7121b347
MG
13389 }
13390 else
13391 break;
79072805 13392 }
95a20fc0 13393 s = (char*)eol;
3280af22 13394 if (PL_rsfp) {
f0e67a1d 13395 bool got_some;
5db06880
NC
13396#ifdef PERL_MAD
13397 if (PL_madskills) {
cd81e915
NC
13398 if (PL_thistoken)
13399 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13400 else
cd81e915 13401 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13402 }
13403#endif
f0e67a1d
Z
13404 PL_bufptr = PL_bufend;
13405 CopLINE_inc(PL_curcop);
13406 got_some = lex_next_chunk(0);
13407 CopLINE_dec(PL_curcop);
13408 s = PL_bufptr;
5db06880 13409#ifdef PERL_MAD
f0e67a1d 13410 tokenstart = PL_bufptr;
5db06880 13411#endif
f0e67a1d 13412 if (!got_some)
378cc40b 13413 break;
378cc40b 13414 }
463ee0b2 13415 incline(s);
79072805 13416 }
a0d0e21e
LW
13417 enough:
13418 if (SvCUR(stuff)) {
3280af22 13419 PL_expect = XTERM;
79072805 13420 if (needargs) {
3280af22 13421 PL_lex_state = LEX_NORMAL;
cd81e915 13422 start_force(PL_curforce);
9ded7720 13423 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13424 force_next(',');
13425 }
a0d0e21e 13426 else
3280af22 13427 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13428 if (!IN_BYTES) {
95a20fc0 13429 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13430 SvUTF8_on(stuff);
13431 else if (PL_encoding)
13432 sv_recode_to_utf8(stuff, PL_encoding);
13433 }
cd81e915 13434 start_force(PL_curforce);
9ded7720 13435 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13436 force_next(THING);
cd81e915 13437 start_force(PL_curforce);
9ded7720 13438 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13439 force_next(LSTOP);
378cc40b 13440 }
79072805 13441 else {
8990e307 13442 SvREFCNT_dec(stuff);
c5ee2135
WL
13443 if (eofmt)
13444 PL_lex_formbrack = 0;
3280af22 13445 PL_bufptr = s;
79072805 13446 }
5db06880
NC
13447#ifdef PERL_MAD
13448 if (PL_madskills) {
cd81e915
NC
13449 if (PL_thistoken)
13450 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13451 else
cd81e915
NC
13452 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13453 PL_thiswhite = savewhite;
5db06880
NC
13454 }
13455#endif
79072805 13456 return s;
378cc40b 13457}
a687059c 13458
ba6d6ac9 13459I32
864dbfa3 13460Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13461{
97aff369 13462 dVAR;
a3b680e6 13463 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13464 CV* const outsidecv = PL_compcv;
8990e307 13465
3280af22
NIS
13466 if (PL_compcv) {
13467 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13468 }
7766f137 13469 SAVEI32(PL_subline);
3280af22 13470 save_item(PL_subname);
3280af22 13471 SAVESPTR(PL_compcv);
3280af22 13472
ea726b52 13473 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13474 CvFLAGS(PL_compcv) |= flags;
13475
57843af0 13476 PL_subline = CopLINE(PL_curcop);
dd2155a4 13477 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13478 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13479 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13480
8990e307
LW
13481 return oldsavestack_ix;
13482}
13483
084592ab
CN
13484#ifdef __SC__
13485#pragma segment Perl_yylex
13486#endif
af41e527
NC
13487static int
13488S_yywarn(pTHX_ const char *const s)
8990e307 13489{
97aff369 13490 dVAR;
7918f24d
NC
13491
13492 PERL_ARGS_ASSERT_YYWARN;
13493
faef0170 13494 PL_in_eval |= EVAL_WARNONLY;
748a9306 13495 yyerror(s);
faef0170 13496 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13497 return 0;
8990e307
LW
13498}
13499
13500int
15f169a1 13501Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13502{
97aff369 13503 dVAR;
bfed75c6
AL
13504 const char *where = NULL;
13505 const char *context = NULL;
68dc0745 13506 int contlen = -1;
46fc3d4c 13507 SV *msg;
5912531f 13508 int yychar = PL_parser->yychar;
463ee0b2 13509
7918f24d
NC
13510 PERL_ARGS_ASSERT_YYERROR;
13511
3280af22 13512 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13513 where = "at EOF";
8bcfe651
TM
13514 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13515 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13516 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13517 /*
13518 Only for NetWare:
13519 The code below is removed for NetWare because it abends/crashes on NetWare
13520 when the script has error such as not having the closing quotes like:
13521 if ($var eq "value)
13522 Checking of white spaces is anyway done in NetWare code.
13523 */
13524#ifndef NETWARE
3280af22
NIS
13525 while (isSPACE(*PL_oldoldbufptr))
13526 PL_oldoldbufptr++;
f355267c 13527#endif
3280af22
NIS
13528 context = PL_oldoldbufptr;
13529 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13530 }
8bcfe651
TM
13531 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13532 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13533 /*
13534 Only for NetWare:
13535 The code below is removed for NetWare because it abends/crashes on NetWare
13536 when the script has error such as not having the closing quotes like:
13537 if ($var eq "value)
13538 Checking of white spaces is anyway done in NetWare code.
13539 */
13540#ifndef NETWARE
3280af22
NIS
13541 while (isSPACE(*PL_oldbufptr))
13542 PL_oldbufptr++;
f355267c 13543#endif
3280af22
NIS
13544 context = PL_oldbufptr;
13545 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13546 }
13547 else if (yychar > 255)
68dc0745 13548 where = "next token ???";
12fbd33b 13549 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13550 if (PL_lex_state == LEX_NORMAL ||
13551 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13552 where = "at end of line";
3280af22 13553 else if (PL_lex_inpat)
68dc0745 13554 where = "within pattern";
463ee0b2 13555 else
68dc0745 13556 where = "within string";
463ee0b2 13557 }
46fc3d4c 13558 else {
84bafc02 13559 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13560 if (yychar < 32)
cea2e8a9 13561 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13562 else if (isPRINT_LC(yychar)) {
88c9ea1e 13563 const char string = yychar;
5e7aa789
NC
13564 sv_catpvn(where_sv, &string, 1);
13565 }
463ee0b2 13566 else
cea2e8a9 13567 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13568 where = SvPVX_const(where_sv);
463ee0b2 13569 }
46fc3d4c 13570 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13571 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13572 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13573 if (context)
cea2e8a9 13574 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13575 else
cea2e8a9 13576 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13577 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13578 Perl_sv_catpvf(aTHX_ msg,
57def98f 13579 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13580 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13581 PL_multi_end = 0;
a0d0e21e 13582 }
500960a6 13583 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13584 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13585 }
463ee0b2 13586 else
5a844595 13587 qerror(msg);
c7d6bfb2
GS
13588 if (PL_error_count >= 10) {
13589 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13590 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13591 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13592 else
13593 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13594 OutCopFILE(PL_curcop));
c7d6bfb2 13595 }
3280af22 13596 PL_in_my = 0;
5c284bb0 13597 PL_in_my_stash = NULL;
463ee0b2
LW
13598 return 0;
13599}
084592ab
CN
13600#ifdef __SC__
13601#pragma segment Main
13602#endif
4e35701f 13603
b250498f 13604STATIC char*
3ae08724 13605S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13606{
97aff369 13607 dVAR;
f54cb97a 13608 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13609
13610 PERL_ARGS_ASSERT_SWALLOW_BOM;
13611
7aa207d6 13612 switch (s[0]) {
4e553d73
NIS
13613 case 0xFF:
13614 if (s[1] == 0xFE) {
ee6ba15d 13615 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13616 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13617 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13618#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13619 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13620 s += 2;
dea0fc0b 13621 if (PL_bufend > (char*)s) {
81a923f4 13622 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13623 }
b250498f 13624#else
ee6ba15d 13625 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13626#endif
01ec43d0
GS
13627 }
13628 break;
78ae23f5 13629 case 0xFE:
7aa207d6 13630 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13631#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13632 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13633 s += 2;
13634 if (PL_bufend > (char *)s) {
81a923f4 13635 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13636 }
b250498f 13637#else
ee6ba15d 13638 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13639#endif
01ec43d0
GS
13640 }
13641 break;
3ae08724
GS
13642 case 0xEF:
13643 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13644 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13645 s += 3; /* UTF-8 */
13646 }
13647 break;
13648 case 0:
7aa207d6
JH
13649 if (slen > 3) {
13650 if (s[1] == 0) {
13651 if (s[2] == 0xFE && s[3] == 0xFF) {
13652 /* UTF-32 big-endian */
ee6ba15d 13653 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13654 }
13655 }
13656 else if (s[2] == 0 && s[3] != 0) {
13657 /* Leading bytes
13658 * 00 xx 00 xx
13659 * are a good indicator of UTF-16BE. */
ee6ba15d 13660#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13661 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13662 s = add_utf16_textfilter(s, FALSE);
13663#else
13664 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13665#endif
7aa207d6 13666 }
01ec43d0 13667 }
e294cc5d
JH
13668#ifdef EBCDIC
13669 case 0xDD:
13670 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13671 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13672 s += 4; /* UTF-8 */
13673 }
13674 break;
13675#endif
13676
7aa207d6
JH
13677 default:
13678 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13679 /* Leading bytes
13680 * xx 00 xx 00
13681 * are a good indicator of UTF-16LE. */
ee6ba15d 13682#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13683 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13684 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13685#else
13686 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13687#endif
7aa207d6 13688 }
01ec43d0 13689 }
b8f84bb2 13690 return (char*)s;
b250498f 13691}
4755096e 13692
6e3aabd6
GS
13693
13694#ifndef PERL_NO_UTF16_FILTER
13695static I32
a28af015 13696S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13697{
97aff369 13698 dVAR;
f3040f2c 13699 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13700 /* We re-use this each time round, throwing the contents away before we
13701 return. */
2a773401 13702 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13703 SV *const utf8_buffer = filter;
c28d6105 13704 IV status = IoPAGE(filter);
f2338a2e 13705 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13706 I32 retval;
c8b0cbae 13707
c85ae797
NC
13708 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13709
c8b0cbae
NC
13710 /* As we're automatically added, at the lowest level, and hence only called
13711 from this file, we can be sure that we're not called in block mode. Hence
13712 don't bother writing code to deal with block mode. */
13713 if (maxlen) {
13714 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13715 }
c28d6105
NC
13716 if (status < 0) {
13717 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13718 }
1de9afcd 13719 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13720 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13721 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13722 reverse ? 'l' : 'b', idx, maxlen, status,
13723 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13724
13725 while (1) {
13726 STRLEN chars;
13727 STRLEN have;
dea0fc0b 13728 I32 newlen;
2a773401 13729 U8 *end;
c28d6105
NC
13730 /* First, look in our buffer of existing UTF-8 data: */
13731 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13732
13733 if (nl) {
13734 ++nl;
13735 } else if (status == 0) {
13736 /* EOF */
13737 IoPAGE(filter) = 0;
13738 nl = SvEND(utf8_buffer);
13739 }
13740 if (nl) {
d2d1d4de
NC
13741 STRLEN got = nl - SvPVX(utf8_buffer);
13742 /* Did we have anything to append? */
13743 retval = got != 0;
13744 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13745 /* Everything else in this code works just fine if SVp_POK isn't
13746 set. This, however, needs it, and we need it to work, else
13747 we loop infinitely because the buffer is never consumed. */
13748 sv_chop(utf8_buffer, nl);
13749 break;
13750 }
ba77e4cc 13751
c28d6105
NC
13752 /* OK, not a complete line there, so need to read some more UTF-16.
13753 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13754 while (1) {
13755 if (status <= 0)
13756 break;
13757 if (SvCUR(utf16_buffer) >= 2) {
13758 /* Location of the high octet of the last complete code point.
13759 Gosh, UTF-16 is a pain. All the benefits of variable length,
13760 *coupled* with all the benefits of partial reads and
13761 endianness. */
13762 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13763 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13764
13765 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13766 break;
13767 }
13768
13769 /* We have the first half of a surrogate. Read more. */
13770 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13771 }
c28d6105 13772
c28d6105
NC
13773 status = FILTER_READ(idx + 1, utf16_buffer,
13774 160 + (SvCUR(utf16_buffer) & 1));
13775 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13776 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13777 if (status < 0) {
13778 /* Error */
13779 IoPAGE(filter) = status;
13780 return status;
13781 }
13782 }
13783
13784 chars = SvCUR(utf16_buffer) >> 1;
13785 have = SvCUR(utf8_buffer);
13786 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13787
aa6dbd60 13788 if (reverse) {
c28d6105
NC
13789 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13790 (U8*)SvPVX_const(utf8_buffer) + have,
13791 chars * 2, &newlen);
aa6dbd60 13792 } else {
2a773401 13793 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13794 (U8*)SvPVX_const(utf8_buffer) + have,
13795 chars * 2, &newlen);
2a773401 13796 }
c28d6105 13797 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13798 *end = '\0';
c28d6105 13799
e07286ed
NC
13800 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13801 it's private to us, and utf16_to_utf8{,reversed} take a
13802 (pointer,length) pair, rather than a NUL-terminated string. */
13803 if(SvCUR(utf16_buffer) & 1) {
13804 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13805 SvCUR_set(utf16_buffer, 1);
13806 } else {
13807 SvCUR_set(utf16_buffer, 0);
13808 }
2a773401 13809 }
c28d6105
NC
13810 DEBUG_P(PerlIO_printf(Perl_debug_log,
13811 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13812 status,
13813 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13814 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13815 return retval;
6e3aabd6 13816}
81a923f4
NC
13817
13818static U8 *
13819S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13820{
2a773401 13821 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13822
c85ae797
NC
13823 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13824
c28d6105 13825 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13826 sv_setpvs(filter, "");
2a773401 13827 IoLINES(filter) = reversed;
c28d6105
NC
13828 IoPAGE(filter) = 1; /* Not EOF */
13829
13830 /* Sadly, we have to return a valid pointer, come what may, so we have to
13831 ignore any error return from this. */
13832 SvCUR_set(PL_linestr, 0);
13833 if (FILTER_READ(0, PL_linestr, 0)) {
13834 SvUTF8_on(PL_linestr);
81a923f4 13835 } else {
c28d6105 13836 SvUTF8_on(PL_linestr);
81a923f4 13837 }
c28d6105 13838 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13839 return (U8*)SvPVX(PL_linestr);
13840}
6e3aabd6 13841#endif
9f4817db 13842
f333445c
JP
13843/*
13844Returns a pointer to the next character after the parsed
13845vstring, as well as updating the passed in sv.
13846
13847Function must be called like
13848
561b68a9 13849 sv = newSV(5);
65b06e02 13850 s = scan_vstring(s,e,sv);
f333445c 13851
65b06e02 13852where s and e are the start and end of the string.
f333445c
JP
13853The sv should already be large enough to store the vstring
13854passed in, for performance reasons.
13855
13856*/
13857
13858char *
15f169a1 13859Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13860{
97aff369 13861 dVAR;
bfed75c6
AL
13862 const char *pos = s;
13863 const char *start = s;
7918f24d
NC
13864
13865 PERL_ARGS_ASSERT_SCAN_VSTRING;
13866
f333445c 13867 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13868 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13869 pos++;
f333445c
JP
13870 if ( *pos != '.') {
13871 /* this may not be a v-string if followed by => */
bfed75c6 13872 const char *next = pos;
65b06e02 13873 while (next < e && isSPACE(*next))
8fc7bb1c 13874 ++next;
65b06e02 13875 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13876 /* return string not v-string */
13877 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13878 return (char *)pos;
f333445c
JP
13879 }
13880 }
13881
13882 if (!isALPHA(*pos)) {
89ebb4a3 13883 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13884
d4c19fe8
AL
13885 if (*s == 'v')
13886 s++; /* get past 'v' */
f333445c 13887
76f68e9b 13888 sv_setpvs(sv, "");
f333445c
JP
13889
13890 for (;;) {
d4c19fe8 13891 /* this is atoi() that tolerates underscores */
0bd48802
AL
13892 U8 *tmpend;
13893 UV rev = 0;
d4c19fe8
AL
13894 const char *end = pos;
13895 UV mult = 1;
13896 while (--end >= s) {
13897 if (*end != '_') {
13898 const UV orev = rev;
f333445c
JP
13899 rev += (*end - '0') * mult;
13900 mult *= 10;
9b387841
NC
13901 if (orev > rev)
13902 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13903 "Integer overflow in decimal number");
f333445c
JP
13904 }
13905 }
13906#ifdef EBCDIC
13907 if (rev > 0x7FFFFFFF)
13908 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13909#endif
13910 /* Append native character for the rev point */
13911 tmpend = uvchr_to_utf8(tmpbuf, rev);
13912 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13913 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13914 SvUTF8_on(sv);
65b06e02 13915 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13916 s = ++pos;
13917 else {
13918 s = pos;
13919 break;
13920 }
65b06e02 13921 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13922 pos++;
13923 }
13924 SvPOK_on(sv);
13925 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13926 SvRMAGICAL_on(sv);
13927 }
73d840c0 13928 return (char *)s;
f333445c
JP
13929}
13930
88e1f1a2
JV
13931int
13932Perl_keyword_plugin_standard(pTHX_
13933 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13934{
13935 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13936 PERL_UNUSED_CONTEXT;
13937 PERL_UNUSED_ARG(keyword_ptr);
13938 PERL_UNUSED_ARG(keyword_len);
13939 PERL_UNUSED_ARG(op_ptr);
13940 return KEYWORD_PLUGIN_DECLINE;
13941}
13942
1da4ca5f 13943/*
28ac2b49
Z
13944=for apidoc Amx|OP *|parse_fullstmt|U32 flags
13945
13946Parse a single complete Perl statement. This may be a normal imperative
13947statement, including optional label, or a declaration that has
13948compile-time effect. It is up to the caller to ensure that the dynamic
13949parser state (L</PL_parser> et al) is correctly set to reflect the source
13950of the code to be parsed and the lexical context for the statement.
13951
13952The op tree representing the statement is returned. This may be a
13953null pointer if the statement is null, for example if it was actually
13954a subroutine definition (which has compile-time side effects). If not
13955null, it will be the result of a L</newSTATEOP> call, normally including
13956a C<nextstate> or equivalent op.
13957
13958If an error occurs in parsing or compilation, in most cases a valid op
13959tree (most likely null) is returned anyway. The error is reflected in
13960the parser state, normally resulting in a single exception at the top
13961level of parsing which covers all the compilation errors that occurred.
13962Some compilation errors, however, will throw an exception immediately.
13963
13964The I<flags> parameter is reserved for future use, and must always
13965be zero.
13966
13967=cut
13968*/
13969
13970OP *
13971Perl_parse_fullstmt(pTHX_ U32 flags)
13972{
13973 OP *fullstmtop;
13974 if (flags)
13975 Perl_croak(aTHX_ "Parsing code internal error (%s)", "parse_fullstmt");
13976 ENTER;
13977 SAVEVPTR(PL_eval_root);
13978 PL_eval_root = NULL;
13979 if(yyparse(GRAMFULLSTMT) && !PL_parser->error_count)
13980 qerror(Perl_mess(aTHX_ "Parse error"));
13981 fullstmtop = PL_eval_root;
13982 LEAVE;
13983 return fullstmtop;
13984}
13985
ea25a9b2 13986void
f7e3d326 13987Perl_munge_qwlist_to_paren_list(pTHX_ OP *qwlist)
ea25a9b2 13988{
f7e3d326 13989 PERL_ARGS_ASSERT_MUNGE_QWLIST_TO_PAREN_LIST;
ea25a9b2
Z
13990 deprecate("qw(...) as parentheses");
13991 force_next(')');
13992 if (qwlist->op_type == OP_STUB) {
13993 op_free(qwlist);
13994 }
13995 else {
3d8e05a0 13996 start_force(PL_curforce);
ea25a9b2
Z
13997 NEXTVAL_NEXTTOKE.opval = qwlist;
13998 force_next(THING);
13999 }
14000 force_next('(');
14001}
14002
28ac2b49 14003/*
1da4ca5f
NC
14004 * Local variables:
14005 * c-indentation-style: bsd
14006 * c-basic-offset: 4
14007 * indent-tabs-mode: t
14008 * End:
14009 *
37442d52
RGS
14010 * ex: set ts=8 sts=4 sw=4 noet:
14011 */