This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Fix to display ok in 80 columns
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
10efb74f 717 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 718 parser->linestr = newSVsv(line);
10efb74f 719 if (s[len-1] != ';')
bdc0bf6f 720 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
721 } else {
722 SvTEMP_off(line);
723 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 724 parser->linestr = line;
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
79072805 732}
a687059c 733
e3abe207
DM
734
735/* delete a parser object */
736
737void
738Perl_parser_free(pTHX_ const yy_parser *parser)
739{
7918f24d
NC
740 PERL_ARGS_ASSERT_PARSER_FREE;
741
7c4baf47 742 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
743 SvREFCNT_dec(parser->linestr);
744
2f9285f8
DM
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
799361c3
SH
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 749 PerlIO_close(parser->rsfp);
5486870f 750 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 751
e3abe207
DM
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
756 Safefree(parser);
757}
758
759
ffb4593c
NT
760/*
761 * Perl_lex_end
9cbb5ea2
GS
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
ffb4593c
NT
764 */
765
463ee0b2 766void
864dbfa3 767Perl_lex_end(pTHX)
463ee0b2 768{
97aff369 769 dVAR;
3280af22 770 PL_doextract = FALSE;
463ee0b2
LW
771}
772
ffb4593c 773/*
f0e67a1d
Z
774=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
775
776Buffer scalar containing the chunk currently under consideration of the
777text currently being lexed. This is always a plain string scalar (for
778which C<SvPOK> is true). It is not intended to be used as a scalar by
779normal scalar means; instead refer to the buffer directly by the pointer
780variables described below.
781
782The lexer maintains various C<char*> pointers to things in the
783C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784reallocated, all of these pointers must be updated. Don't attempt to
785do this manually, but rather use L</lex_grow_linestr> if you need to
786reallocate the buffer.
787
788The content of the text chunk in the buffer is commonly exactly one
789complete line of input, up to and including a newline terminator,
790but there are situations where it is otherwise. The octets of the
791buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793flag on this scalar, which may disagree with it.
794
795For direct examination of the buffer, the variable
796L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798of these pointers is usually preferable to examination of the scalar
799through normal scalar means.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufend
802
803Direct pointer to the end of the chunk of text currently being lexed, the
804end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806always located at the end of the buffer, and does not count as part of
807the buffer's contents.
808
809=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
810
811Points to the current position of lexing inside the lexer buffer.
812Characters around this point may be freely examined, within
813the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
816
817Lexing code (whether in the Perl core or not) moves this pointer past
818the characters that it consumes. It is also expected to perform some
819bookkeeping whenever a newline character is consumed. This movement
820can be more conveniently performed by the function L</lex_read_to>,
821which handles newlines appropriately.
822
823Interpretation of the buffer's octets can be abstracted out by
824using the slightly higher-level functions L</lex_peek_unichar> and
825L</lex_read_unichar>.
826
827=for apidoc AmxU|char *|PL_parser-E<gt>linestart
828
829Points to the start of the current line inside the lexer buffer.
830This is useful for indicating at which column an error occurred, and
831not much else. This must be updated by any lexing code that consumes
832a newline; the function L</lex_read_to> handles this detail.
833
834=cut
835*/
836
837/*
838=for apidoc Amx|bool|lex_bufutf8
839
840Indicates whether the octets in the lexer buffer
841(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842of Unicode characters. If not, they should be interpreted as Latin-1
843characters. This is analogous to the C<SvUTF8> flag for scalars.
844
845In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846contains valid UTF-8. Lexing code must be robust in the face of invalid
847encoding.
848
849The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850is significant, but not the whole story regarding the input character
851encoding. Normally, when a file is being read, the scalar contains octets
852and its C<SvUTF8> flag is off, but the octets should be interpreted as
853UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854however, the scalar may have the C<SvUTF8> flag on, and in this case its
855octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856is in effect. This logic may change in the future; use this function
857instead of implementing the logic yourself.
858
859=cut
860*/
861
862bool
863Perl_lex_bufutf8(pTHX)
864{
865 return UTF;
866}
867
868/*
869=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
870
871Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872at least I<len> octets (including terminating NUL). Returns a
873pointer to the reallocated buffer. This is necessary before making
874any direct modification of the buffer that would increase its length.
875L</lex_stuff_pvn> provides a more convenient way to insert text into
876the buffer.
877
878Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879this function updates all of the lexer's variables that point directly
880into the buffer.
881
882=cut
883*/
884
885char *
886Perl_lex_grow_linestr(pTHX_ STRLEN len)
887{
888 SV *linestr;
889 char *buf;
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
895 return buf;
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
913 return buf;
914}
915
916/*
83aa740e 917=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
918
919Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921reallocating the buffer if necessary. This means that lexing code that
922runs later will see the characters as if they had appeared in the input.
923It is not recommended to do this as part of normal parsing, and most
924uses of this facility run the risk of the inserted characters being
925interpreted in an unintended manner.
926
927The string to be inserted is represented by I<len> octets starting
928at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930The characters are recoded for the lexer buffer, according to how the
931buffer is currently being interpreted (L</lex_bufutf8>). If a string
932to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933function is more convenient.
934
935=cut
936*/
937
938void
83aa740e 939Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 940{
749123ff 941 dVAR;
f0e67a1d
Z
942 char *bufptr;
943 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
944 if (flags & ~(LEX_STUFF_UTF8))
945 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
946 if (UTF) {
947 if (flags & LEX_STUFF_UTF8) {
948 goto plain_copy;
949 } else {
950 STRLEN highhalf = 0;
83aa740e 951 const char *p, *e = pv+len;
f0e67a1d
Z
952 for (p = pv; p != e; p++)
953 highhalf += !!(((U8)*p) & 0x80);
954 if (!highhalf)
955 goto plain_copy;
956 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
957 bufptr = PL_parser->bufptr;
958 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
959 SvCUR_set(PL_parser->linestr,
960 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
961 PL_parser->bufend += len+highhalf;
962 for (p = pv; p != e; p++) {
963 U8 c = (U8)*p;
964 if (c & 0x80) {
965 *bufptr++ = (char)(0xc0 | (c >> 6));
966 *bufptr++ = (char)(0x80 | (c & 0x3f));
967 } else {
968 *bufptr++ = (char)c;
969 }
970 }
971 }
972 } else {
973 if (flags & LEX_STUFF_UTF8) {
974 STRLEN highhalf = 0;
83aa740e 975 const char *p, *e = pv+len;
f0e67a1d
Z
976 for (p = pv; p != e; p++) {
977 U8 c = (U8)*p;
978 if (c >= 0xc4) {
979 Perl_croak(aTHX_ "Lexing code attempted to stuff "
980 "non-Latin-1 character into Latin-1 input");
981 } else if (c >= 0xc2 && p+1 != e &&
982 (((U8)p[1]) & 0xc0) == 0x80) {
983 p++;
984 highhalf++;
985 } else if (c >= 0x80) {
986 /* malformed UTF-8 */
987 ENTER;
988 SAVESPTR(PL_warnhook);
989 PL_warnhook = PERL_WARNHOOK_FATAL;
990 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
991 LEAVE;
992 }
993 }
994 if (!highhalf)
995 goto plain_copy;
996 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
997 bufptr = PL_parser->bufptr;
998 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
999 SvCUR_set(PL_parser->linestr,
1000 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1001 PL_parser->bufend += len-highhalf;
1002 for (p = pv; p != e; p++) {
1003 U8 c = (U8)*p;
1004 if (c & 0x80) {
1005 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006 p++;
1007 } else {
1008 *bufptr++ = (char)c;
1009 }
1010 }
1011 } else {
1012 plain_copy:
1013 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1014 bufptr = PL_parser->bufptr;
1015 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1016 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1017 PL_parser->bufend += len;
1018 Copy(pv, bufptr, len, char);
1019 }
1020 }
1021}
1022
1023/*
1024=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1025
1026Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1027immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1028reallocating the buffer if necessary. This means that lexing code that
1029runs later will see the characters as if they had appeared in the input.
1030It is not recommended to do this as part of normal parsing, and most
1031uses of this facility run the risk of the inserted characters being
1032interpreted in an unintended manner.
1033
1034The string to be inserted is the string value of I<sv>. The characters
1035are recoded for the lexer buffer, according to how the buffer is currently
1036being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1037not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1038need to construct a scalar.
1039
1040=cut
1041*/
1042
1043void
1044Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1045{
1046 char *pv;
1047 STRLEN len;
1048 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1049 if (flags)
1050 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1051 pv = SvPV(sv, len);
1052 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1053}
1054
1055/*
1056=for apidoc Amx|void|lex_unstuff|char *ptr
1057
1058Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1059I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1060This hides the discarded text from any lexing code that runs later,
1061as if the text had never appeared.
1062
1063This is not the normal way to consume lexed text. For that, use
1064L</lex_read_to>.
1065
1066=cut
1067*/
1068
1069void
1070Perl_lex_unstuff(pTHX_ char *ptr)
1071{
1072 char *buf, *bufend;
1073 STRLEN unstuff_len;
1074 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1075 buf = PL_parser->bufptr;
1076 if (ptr < buf)
1077 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1078 if (ptr == buf)
1079 return;
1080 bufend = PL_parser->bufend;
1081 if (ptr > bufend)
1082 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1083 unstuff_len = ptr - buf;
1084 Move(ptr, buf, bufend+1-ptr, char);
1085 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1086 PL_parser->bufend = bufend - unstuff_len;
1087}
1088
1089/*
1090=for apidoc Amx|void|lex_read_to|char *ptr
1091
1092Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1093to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1094performing the correct bookkeeping whenever a newline character is passed.
1095This is the normal way to consume lexed text.
1096
1097Interpretation of the buffer's octets can be abstracted out by
1098using the slightly higher-level functions L</lex_peek_unichar> and
1099L</lex_read_unichar>.
1100
1101=cut
1102*/
1103
1104void
1105Perl_lex_read_to(pTHX_ char *ptr)
1106{
1107 char *s;
1108 PERL_ARGS_ASSERT_LEX_READ_TO;
1109 s = PL_parser->bufptr;
1110 if (ptr < s || ptr > PL_parser->bufend)
1111 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1112 for (; s != ptr; s++)
1113 if (*s == '\n') {
1114 CopLINE_inc(PL_curcop);
1115 PL_parser->linestart = s+1;
1116 }
1117 PL_parser->bufptr = ptr;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_discard_to|char *ptr
1122
1123Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1124up to I<ptr>. The remaining content of the buffer will be moved, and
1125all pointers into the buffer updated appropriately. I<ptr> must not
1126be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1127it is not permitted to discard text that has yet to be lexed.
1128
1129Normally it is not necessarily to do this directly, because it suffices to
1130use the implicit discarding behaviour of L</lex_next_chunk> and things
1131based on it. However, if a token stretches across multiple lines,
1f317c95 1132and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1133that purpose, then after completion of the token it would be wise to
1134explicitly discard the now-unneeded earlier lines, to avoid future
1135multi-line tokens growing the buffer without bound.
1136
1137=cut
1138*/
1139
1140void
1141Perl_lex_discard_to(pTHX_ char *ptr)
1142{
1143 char *buf;
1144 STRLEN discard_len;
1145 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1146 buf = SvPVX(PL_parser->linestr);
1147 if (ptr < buf)
1148 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1149 if (ptr == buf)
1150 return;
1151 if (ptr > PL_parser->bufptr)
1152 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1153 discard_len = ptr - buf;
1154 if (PL_parser->oldbufptr < ptr)
1155 PL_parser->oldbufptr = ptr;
1156 if (PL_parser->oldoldbufptr < ptr)
1157 PL_parser->oldoldbufptr = ptr;
1158 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1159 PL_parser->last_uni = NULL;
1160 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1161 PL_parser->last_lop = NULL;
1162 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1163 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1164 PL_parser->bufend -= discard_len;
1165 PL_parser->bufptr -= discard_len;
1166 PL_parser->oldbufptr -= discard_len;
1167 PL_parser->oldoldbufptr -= discard_len;
1168 if (PL_parser->last_uni)
1169 PL_parser->last_uni -= discard_len;
1170 if (PL_parser->last_lop)
1171 PL_parser->last_lop -= discard_len;
1172}
1173
1174/*
1175=for apidoc Amx|bool|lex_next_chunk|U32 flags
1176
1177Reads in the next chunk of text to be lexed, appending it to
1178L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1179looked to the end of the current chunk and wants to know more. It is
1180usual, but not necessary, for lexing to have consumed the entirety of
1181the current chunk at this time.
1182
1183If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1184chunk (i.e., the current chunk has been entirely consumed), normally the
1185current chunk will be discarded at the same time that the new chunk is
1186read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1187will not be discarded. If the current chunk has not been entirely
1188consumed, then it will not be discarded regardless of the flag.
1189
1190Returns true if some new text was added to the buffer, or false if the
1191buffer has reached the end of the input text.
1192
1193=cut
1194*/
1195
1196#define LEX_FAKE_EOF 0x80000000
1197
1198bool
1199Perl_lex_next_chunk(pTHX_ U32 flags)
1200{
1201 SV *linestr;
1202 char *buf;
1203 STRLEN old_bufend_pos, new_bufend_pos;
1204 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1205 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1206 bool got_some_for_debugger = 0;
f0e67a1d
Z
1207 bool got_some;
1208 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1209 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1210 linestr = PL_parser->linestr;
1211 buf = SvPVX(linestr);
1212 if (!(flags & LEX_KEEP_PREVIOUS) &&
1213 PL_parser->bufptr == PL_parser->bufend) {
1214 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1215 linestart_pos = 0;
1216 if (PL_parser->last_uni != PL_parser->bufend)
1217 PL_parser->last_uni = NULL;
1218 if (PL_parser->last_lop != PL_parser->bufend)
1219 PL_parser->last_lop = NULL;
1220 last_uni_pos = last_lop_pos = 0;
1221 *buf = 0;
1222 SvCUR(linestr) = 0;
1223 } else {
1224 old_bufend_pos = PL_parser->bufend - buf;
1225 bufptr_pos = PL_parser->bufptr - buf;
1226 oldbufptr_pos = PL_parser->oldbufptr - buf;
1227 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1228 linestart_pos = PL_parser->linestart - buf;
1229 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1230 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1231 }
1232 if (flags & LEX_FAKE_EOF) {
1233 goto eof;
1234 } else if (!PL_parser->rsfp) {
1235 got_some = 0;
1236 } else if (filter_gets(linestr, old_bufend_pos)) {
1237 got_some = 1;
17cc9359 1238 got_some_for_debugger = 1;
f0e67a1d 1239 } else {
580561a3
Z
1240 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1241 sv_setpvs(linestr, "");
f0e67a1d
Z
1242 eof:
1243 /* End of real input. Close filehandle (unless it was STDIN),
1244 * then add implicit termination.
1245 */
1246 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1247 PerlIO_clearerr(PL_parser->rsfp);
1248 else if (PL_parser->rsfp)
1249 (void)PerlIO_close(PL_parser->rsfp);
1250 PL_parser->rsfp = NULL;
1251 PL_doextract = FALSE;
1252#ifdef PERL_MAD
1253 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1254 PL_faketokens = 1;
1255#endif
1256 if (!PL_in_eval && PL_minus_p) {
1257 sv_catpvs(linestr,
1258 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1259 PL_minus_n = PL_minus_p = 0;
1260 } else if (!PL_in_eval && PL_minus_n) {
1261 sv_catpvs(linestr, /*{*/";}");
1262 PL_minus_n = 0;
1263 } else
1264 sv_catpvs(linestr, ";");
1265 got_some = 1;
1266 }
1267 buf = SvPVX(linestr);
1268 new_bufend_pos = SvCUR(linestr);
1269 PL_parser->bufend = buf + new_bufend_pos;
1270 PL_parser->bufptr = buf + bufptr_pos;
1271 PL_parser->oldbufptr = buf + oldbufptr_pos;
1272 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1273 PL_parser->linestart = buf + linestart_pos;
1274 if (PL_parser->last_uni)
1275 PL_parser->last_uni = buf + last_uni_pos;
1276 if (PL_parser->last_lop)
1277 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1278 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1279 PL_curstash != PL_debstash) {
1280 /* debugger active and we're not compiling the debugger code,
1281 * so store the line into the debugger's array of lines
1282 */
1283 update_debugger_info(NULL, buf+old_bufend_pos,
1284 new_bufend_pos-old_bufend_pos);
1285 }
1286 return got_some;
1287}
1288
1289/*
1290=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1291
1292Looks ahead one (Unicode) character in the text currently being lexed.
1293Returns the codepoint (unsigned integer value) of the next character,
1294or -1 if lexing has reached the end of the input text. To consume the
1295peeked character, use L</lex_read_unichar>.
1296
1297If the next character is in (or extends into) the next chunk of input
1298text, the next chunk will be read in. Normally the current chunk will be
1299discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1300then the current chunk will not be discarded.
1301
1302If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1303is encountered, an exception is generated.
1304
1305=cut
1306*/
1307
1308I32
1309Perl_lex_peek_unichar(pTHX_ U32 flags)
1310{
749123ff 1311 dVAR;
f0e67a1d
Z
1312 char *s, *bufend;
1313 if (flags & ~(LEX_KEEP_PREVIOUS))
1314 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1315 s = PL_parser->bufptr;
1316 bufend = PL_parser->bufend;
1317 if (UTF) {
1318 U8 head;
1319 I32 unichar;
1320 STRLEN len, retlen;
1321 if (s == bufend) {
1322 if (!lex_next_chunk(flags))
1323 return -1;
1324 s = PL_parser->bufptr;
1325 bufend = PL_parser->bufend;
1326 }
1327 head = (U8)*s;
1328 if (!(head & 0x80))
1329 return head;
1330 if (head & 0x40) {
1331 len = PL_utf8skip[head];
1332 while ((STRLEN)(bufend-s) < len) {
1333 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1334 break;
1335 s = PL_parser->bufptr;
1336 bufend = PL_parser->bufend;
1337 }
1338 }
1339 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1340 if (retlen == (STRLEN)-1) {
1341 /* malformed UTF-8 */
1342 ENTER;
1343 SAVESPTR(PL_warnhook);
1344 PL_warnhook = PERL_WARNHOOK_FATAL;
1345 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1346 LEAVE;
1347 }
1348 return unichar;
1349 } else {
1350 if (s == bufend) {
1351 if (!lex_next_chunk(flags))
1352 return -1;
1353 s = PL_parser->bufptr;
1354 }
1355 return (U8)*s;
1356 }
1357}
1358
1359/*
1360=for apidoc Amx|I32|lex_read_unichar|U32 flags
1361
1362Reads the next (Unicode) character in the text currently being lexed.
1363Returns the codepoint (unsigned integer value) of the character read,
1364and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1365if lexing has reached the end of the input text. To non-destructively
1366examine the next character, use L</lex_peek_unichar> instead.
1367
1368If the next character is in (or extends into) the next chunk of input
1369text, the next chunk will be read in. Normally the current chunk will be
1370discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1371then the current chunk will not be discarded.
1372
1373If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1374is encountered, an exception is generated.
1375
1376=cut
1377*/
1378
1379I32
1380Perl_lex_read_unichar(pTHX_ U32 flags)
1381{
1382 I32 c;
1383 if (flags & ~(LEX_KEEP_PREVIOUS))
1384 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1385 c = lex_peek_unichar(flags);
1386 if (c != -1) {
1387 if (c == '\n')
1388 CopLINE_inc(PL_curcop);
1389 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1390 }
1391 return c;
1392}
1393
1394/*
1395=for apidoc Amx|void|lex_read_space|U32 flags
1396
1397Reads optional spaces, in Perl style, in the text currently being
1398lexed. The spaces may include ordinary whitespace characters and
1399Perl-style comments. C<#line> directives are processed if encountered.
1400L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1401at a non-space character (or the end of the input text).
1402
1403If spaces extend into the next chunk of input text, the next chunk will
1404be read in. Normally the current chunk will be discarded at the same
1405time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1406chunk will not be discarded.
1407
1408=cut
1409*/
1410
f0998909
Z
1411#define LEX_NO_NEXT_CHUNK 0x80000000
1412
f0e67a1d
Z
1413void
1414Perl_lex_read_space(pTHX_ U32 flags)
1415{
1416 char *s, *bufend;
1417 bool need_incline = 0;
f0998909 1418 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1419 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1420#ifdef PERL_MAD
1421 if (PL_skipwhite) {
1422 sv_free(PL_skipwhite);
1423 PL_skipwhite = NULL;
1424 }
1425 if (PL_madskills)
1426 PL_skipwhite = newSVpvs("");
1427#endif /* PERL_MAD */
1428 s = PL_parser->bufptr;
1429 bufend = PL_parser->bufend;
1430 while (1) {
1431 char c = *s;
1432 if (c == '#') {
1433 do {
1434 c = *++s;
1435 } while (!(c == '\n' || (c == 0 && s == bufend)));
1436 } else if (c == '\n') {
1437 s++;
1438 PL_parser->linestart = s;
1439 if (s == bufend)
1440 need_incline = 1;
1441 else
1442 incline(s);
1443 } else if (isSPACE(c)) {
1444 s++;
1445 } else if (c == 0 && s == bufend) {
1446 bool got_more;
1447#ifdef PERL_MAD
1448 if (PL_madskills)
1449 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1450#endif /* PERL_MAD */
f0998909
Z
1451 if (flags & LEX_NO_NEXT_CHUNK)
1452 break;
f0e67a1d
Z
1453 PL_parser->bufptr = s;
1454 CopLINE_inc(PL_curcop);
1455 got_more = lex_next_chunk(flags);
1456 CopLINE_dec(PL_curcop);
1457 s = PL_parser->bufptr;
1458 bufend = PL_parser->bufend;
1459 if (!got_more)
1460 break;
1461 if (need_incline && PL_parser->rsfp) {
1462 incline(s);
1463 need_incline = 0;
1464 }
1465 } else {
1466 break;
1467 }
1468 }
1469#ifdef PERL_MAD
1470 if (PL_madskills)
1471 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1472#endif /* PERL_MAD */
1473 PL_parser->bufptr = s;
1474}
1475
1476/*
ffb4593c
NT
1477 * S_incline
1478 * This subroutine has nothing to do with tilting, whether at windmills
1479 * or pinball tables. Its name is short for "increment line". It
57843af0 1480 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1481 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1482 * # line 500 "foo.pm"
1483 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1484 */
1485
76e3520e 1486STATIC void
d9095cec 1487S_incline(pTHX_ const char *s)
463ee0b2 1488{
97aff369 1489 dVAR;
d9095cec
NC
1490 const char *t;
1491 const char *n;
1492 const char *e;
463ee0b2 1493
7918f24d
NC
1494 PERL_ARGS_ASSERT_INCLINE;
1495
57843af0 1496 CopLINE_inc(PL_curcop);
463ee0b2
LW
1497 if (*s++ != '#')
1498 return;
d4c19fe8
AL
1499 while (SPACE_OR_TAB(*s))
1500 s++;
73659bf1
GS
1501 if (strnEQ(s, "line", 4))
1502 s += 4;
1503 else
1504 return;
084592ab 1505 if (SPACE_OR_TAB(*s))
73659bf1 1506 s++;
4e553d73 1507 else
73659bf1 1508 return;
d4c19fe8
AL
1509 while (SPACE_OR_TAB(*s))
1510 s++;
463ee0b2
LW
1511 if (!isDIGIT(*s))
1512 return;
d4c19fe8 1513
463ee0b2
LW
1514 n = s;
1515 while (isDIGIT(*s))
1516 s++;
07714eb4 1517 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1518 return;
bf4acbe4 1519 while (SPACE_OR_TAB(*s))
463ee0b2 1520 s++;
73659bf1 1521 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1522 s++;
73659bf1
GS
1523 e = t + 1;
1524 }
463ee0b2 1525 else {
c35e046a
AL
1526 t = s;
1527 while (!isSPACE(*t))
1528 t++;
73659bf1 1529 e = t;
463ee0b2 1530 }
bf4acbe4 1531 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1532 e++;
1533 if (*e != '\n' && *e != '\0')
1534 return; /* false alarm */
1535
f4dd75d9 1536 if (t - s > 0) {
d9095cec 1537 const STRLEN len = t - s;
8a5ee598 1538#ifndef USE_ITHREADS
19bad673
NC
1539 SV *const temp_sv = CopFILESV(PL_curcop);
1540 const char *cf;
1541 STRLEN tmplen;
1542
1543 if (temp_sv) {
1544 cf = SvPVX(temp_sv);
1545 tmplen = SvCUR(temp_sv);
1546 } else {
1547 cf = NULL;
1548 tmplen = 0;
1549 }
1550
42d9b98d 1551 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1552 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1553 * to *{"::_<newfilename"} */
44867030
NC
1554 /* However, the long form of evals is only turned on by the
1555 debugger - usually they're "(eval %lu)" */
1556 char smallbuf[128];
1557 char *tmpbuf;
1558 GV **gvp;
d9095cec 1559 STRLEN tmplen2 = len;
798b63bc 1560 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1561 tmpbuf = smallbuf;
1562 else
2ae0db35 1563 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1564 tmpbuf[0] = '_';
1565 tmpbuf[1] = '<';
2ae0db35 1566 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1567 tmplen += 2;
8a5ee598
RGS
1568 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1569 if (gvp) {
44867030
NC
1570 char *tmpbuf2;
1571 GV *gv2;
1572
1573 if (tmplen2 + 2 <= sizeof smallbuf)
1574 tmpbuf2 = smallbuf;
1575 else
1576 Newx(tmpbuf2, tmplen2 + 2, char);
1577
1578 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1579 /* Either they malloc'd it, or we malloc'd it,
1580 so no prefix is present in ours. */
1581 tmpbuf2[0] = '_';
1582 tmpbuf2[1] = '<';
1583 }
1584
1585 memcpy(tmpbuf2 + 2, s, tmplen2);
1586 tmplen2 += 2;
1587
8a5ee598 1588 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1589 if (!isGV(gv2)) {
8a5ee598 1590 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1591 /* adjust ${"::_<newfilename"} to store the new file name */
1592 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1593 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1594 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1595 }
44867030
NC
1596
1597 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1598 }
e66cf94c 1599 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1600 }
8a5ee598 1601#endif
05ec9bb3 1602 CopFILE_free(PL_curcop);
d9095cec 1603 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1604 }
57843af0 1605 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1606}
1607
29595ff2 1608#ifdef PERL_MAD
cd81e915 1609/* skip space before PL_thistoken */
29595ff2
NC
1610
1611STATIC char *
1612S_skipspace0(pTHX_ register char *s)
1613{
7918f24d
NC
1614 PERL_ARGS_ASSERT_SKIPSPACE0;
1615
29595ff2
NC
1616 s = skipspace(s);
1617 if (!PL_madskills)
1618 return s;
cd81e915
NC
1619 if (PL_skipwhite) {
1620 if (!PL_thiswhite)
6b29d1f5 1621 PL_thiswhite = newSVpvs("");
cd81e915
NC
1622 sv_catsv(PL_thiswhite, PL_skipwhite);
1623 sv_free(PL_skipwhite);
1624 PL_skipwhite = 0;
1625 }
1626 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1627 return s;
1628}
1629
cd81e915 1630/* skip space after PL_thistoken */
29595ff2
NC
1631
1632STATIC char *
1633S_skipspace1(pTHX_ register char *s)
1634{
d4c19fe8 1635 const char *start = s;
29595ff2
NC
1636 I32 startoff = start - SvPVX(PL_linestr);
1637
7918f24d
NC
1638 PERL_ARGS_ASSERT_SKIPSPACE1;
1639
29595ff2
NC
1640 s = skipspace(s);
1641 if (!PL_madskills)
1642 return s;
1643 start = SvPVX(PL_linestr) + startoff;
cd81e915 1644 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1645 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1646 PL_thistoken = newSVpvn(tstart, start - tstart);
1647 }
1648 PL_realtokenstart = -1;
1649 if (PL_skipwhite) {
1650 if (!PL_nextwhite)
6b29d1f5 1651 PL_nextwhite = newSVpvs("");
cd81e915
NC
1652 sv_catsv(PL_nextwhite, PL_skipwhite);
1653 sv_free(PL_skipwhite);
1654 PL_skipwhite = 0;
29595ff2
NC
1655 }
1656 return s;
1657}
1658
1659STATIC char *
1660S_skipspace2(pTHX_ register char *s, SV **svp)
1661{
c35e046a
AL
1662 char *start;
1663 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1664 const I32 startoff = s - SvPVX(PL_linestr);
1665
7918f24d
NC
1666 PERL_ARGS_ASSERT_SKIPSPACE2;
1667
29595ff2
NC
1668 s = skipspace(s);
1669 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1670 if (!PL_madskills || !svp)
1671 return s;
1672 start = SvPVX(PL_linestr) + startoff;
cd81e915 1673 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1674 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1675 PL_thistoken = newSVpvn(tstart, start - tstart);
1676 PL_realtokenstart = -1;
29595ff2 1677 }
cd81e915 1678 if (PL_skipwhite) {
29595ff2 1679 if (!*svp)
6b29d1f5 1680 *svp = newSVpvs("");
cd81e915
NC
1681 sv_setsv(*svp, PL_skipwhite);
1682 sv_free(PL_skipwhite);
1683 PL_skipwhite = 0;
29595ff2
NC
1684 }
1685
1686 return s;
1687}
1688#endif
1689
80a702cd 1690STATIC void
15f169a1 1691S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1692{
1693 AV *av = CopFILEAVx(PL_curcop);
1694 if (av) {
b9f83d2f 1695 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1696 if (orig_sv)
1697 sv_setsv(sv, orig_sv);
1698 else
1699 sv_setpvn(sv, buf, len);
80a702cd
RGS
1700 (void)SvIOK_on(sv);
1701 SvIV_set(sv, 0);
1702 av_store(av, (I32)CopLINE(PL_curcop), sv);
1703 }
1704}
1705
ffb4593c
NT
1706/*
1707 * S_skipspace
1708 * Called to gobble the appropriate amount and type of whitespace.
1709 * Skips comments as well.
1710 */
1711
76e3520e 1712STATIC char *
cea2e8a9 1713S_skipspace(pTHX_ register char *s)
a687059c 1714{
5db06880 1715#ifdef PERL_MAD
f0e67a1d
Z
1716 char *start = s;
1717#endif /* PERL_MAD */
7918f24d 1718 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1719#ifdef PERL_MAD
cd81e915
NC
1720 if (PL_skipwhite) {
1721 sv_free(PL_skipwhite);
f0e67a1d 1722 PL_skipwhite = NULL;
5db06880 1723 }
f0e67a1d 1724#endif /* PERL_MAD */
3280af22 1725 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1726 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1727 s++;
f0e67a1d
Z
1728 } else {
1729 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1730 PL_bufptr = s;
f0998909
Z
1731 lex_read_space(LEX_KEEP_PREVIOUS |
1732 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1733 LEX_NO_NEXT_CHUNK : 0));
3280af22 1734 s = PL_bufptr;
f0e67a1d
Z
1735 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736 if (PL_linestart > PL_bufptr)
1737 PL_bufptr = PL_linestart;
1738 return s;
463ee0b2 1739 }
5db06880 1740#ifdef PERL_MAD
f0e67a1d
Z
1741 if (PL_madskills)
1742 PL_skipwhite = newSVpvn(start, s-start);
1743#endif /* PERL_MAD */
5db06880 1744 return s;
a687059c 1745}
378cc40b 1746
ffb4593c
NT
1747/*
1748 * S_check_uni
1749 * Check the unary operators to ensure there's no ambiguity in how they're
1750 * used. An ambiguous piece of code would be:
1751 * rand + 5
1752 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1753 * the +5 is its argument.
1754 */
1755
76e3520e 1756STATIC void
cea2e8a9 1757S_check_uni(pTHX)
ba106d47 1758{
97aff369 1759 dVAR;
d4c19fe8
AL
1760 const char *s;
1761 const char *t;
2f3197b3 1762
3280af22 1763 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1764 return;
3280af22
NIS
1765 while (isSPACE(*PL_last_uni))
1766 PL_last_uni++;
c35e046a
AL
1767 s = PL_last_uni;
1768 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1769 s++;
3280af22 1770 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1771 return;
6136c704 1772
9b387841
NC
1773 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1776}
1777
ffb4593c
NT
1778/*
1779 * LOP : macro to build a list operator. Its behaviour has been replaced
1780 * with a subroutine, S_lop() for which LOP is just another name.
1781 */
1782
a0d0e21e
LW
1783#define LOP(f,x) return lop(f,x,s)
1784
ffb4593c
NT
1785/*
1786 * S_lop
1787 * Build a list operator (or something that might be one). The rules:
1788 * - if we have a next token, then it's a list operator [why?]
1789 * - if the next thing is an opening paren, then it's a function
1790 * - else it's a list operator
1791 */
1792
76e3520e 1793STATIC I32
a0be28da 1794S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1795{
97aff369 1796 dVAR;
7918f24d
NC
1797
1798 PERL_ARGS_ASSERT_LOP;
1799
6154021b 1800 pl_yylval.ival = f;
35c8bce7 1801 CLINE;
3280af22
NIS
1802 PL_expect = x;
1803 PL_bufptr = s;
1804 PL_last_lop = PL_oldbufptr;
eb160463 1805 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1806#ifdef PERL_MAD
1807 if (PL_lasttoke)
1808 return REPORT(LSTOP);
1809#else
3280af22 1810 if (PL_nexttoke)
bbf60fe6 1811 return REPORT(LSTOP);
5db06880 1812#endif
79072805 1813 if (*s == '(')
bbf60fe6 1814 return REPORT(FUNC);
29595ff2 1815 s = PEEKSPACE(s);
79072805 1816 if (*s == '(')
bbf60fe6 1817 return REPORT(FUNC);
79072805 1818 else
bbf60fe6 1819 return REPORT(LSTOP);
79072805
LW
1820}
1821
5db06880
NC
1822#ifdef PERL_MAD
1823 /*
1824 * S_start_force
1825 * Sets up for an eventual force_next(). start_force(0) basically does
1826 * an unshift, while start_force(-1) does a push. yylex removes items
1827 * on the "pop" end.
1828 */
1829
1830STATIC void
1831S_start_force(pTHX_ int where)
1832{
1833 int i;
1834
cd81e915 1835 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1836 where = PL_lasttoke;
cd81e915
NC
1837 assert(PL_curforce < 0 || PL_curforce == where);
1838 if (PL_curforce != where) {
5db06880
NC
1839 for (i = PL_lasttoke; i > where; --i) {
1840 PL_nexttoke[i] = PL_nexttoke[i-1];
1841 }
1842 PL_lasttoke++;
1843 }
cd81e915 1844 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1845 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1846 PL_curforce = where;
1847 if (PL_nextwhite) {
5db06880 1848 if (PL_madskills)
6b29d1f5 1849 curmad('^', newSVpvs(""));
cd81e915 1850 CURMAD('_', PL_nextwhite);
5db06880
NC
1851 }
1852}
1853
1854STATIC void
1855S_curmad(pTHX_ char slot, SV *sv)
1856{
1857 MADPROP **where;
1858
1859 if (!sv)
1860 return;
cd81e915
NC
1861 if (PL_curforce < 0)
1862 where = &PL_thismad;
5db06880 1863 else
cd81e915 1864 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1865
cd81e915 1866 if (PL_faketokens)
76f68e9b 1867 sv_setpvs(sv, "");
5db06880
NC
1868 else {
1869 if (!IN_BYTES) {
1870 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1871 SvUTF8_on(sv);
1872 else if (PL_encoding) {
1873 sv_recode_to_utf8(sv, PL_encoding);
1874 }
1875 }
1876 }
1877
1878 /* keep a slot open for the head of the list? */
1879 if (slot != '_' && *where && (*where)->mad_key == '^') {
1880 (*where)->mad_key = slot;
daba3364 1881 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1882 (*where)->mad_val = (void*)sv;
1883 }
1884 else
1885 addmad(newMADsv(slot, sv), where, 0);
1886}
1887#else
b3f24c00
MHM
1888# define start_force(where) NOOP
1889# define curmad(slot, sv) NOOP
5db06880
NC
1890#endif
1891
ffb4593c
NT
1892/*
1893 * S_force_next
9cbb5ea2 1894 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1895 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1896 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1897 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1899 */
1900
4e553d73 1901STATIC void
cea2e8a9 1902S_force_next(pTHX_ I32 type)
79072805 1903{
97aff369 1904 dVAR;
704d4215
GG
1905#ifdef DEBUGGING
1906 if (DEBUG_T_TEST) {
1907 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1908 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1909 }
1910#endif
5db06880 1911#ifdef PERL_MAD
cd81e915 1912 if (PL_curforce < 0)
5db06880 1913 start_force(PL_lasttoke);
cd81e915 1914 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1915 if (PL_lex_state != LEX_KNOWNEXT)
1916 PL_lex_defer = PL_lex_state;
1917 PL_lex_state = LEX_KNOWNEXT;
1918 PL_lex_expect = PL_expect;
cd81e915 1919 PL_curforce = -1;
5db06880 1920#else
3280af22
NIS
1921 PL_nexttype[PL_nexttoke] = type;
1922 PL_nexttoke++;
1923 if (PL_lex_state != LEX_KNOWNEXT) {
1924 PL_lex_defer = PL_lex_state;
1925 PL_lex_expect = PL_expect;
1926 PL_lex_state = LEX_KNOWNEXT;
79072805 1927 }
5db06880 1928#endif
79072805
LW
1929}
1930
d0a148a6 1931STATIC SV *
15f169a1 1932S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1933{
97aff369 1934 dVAR;
740cce10 1935 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1936 !IN_BYTES
1937 && UTF
1938 && !is_ascii_string((const U8*)start, len)
740cce10 1939 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1940 return sv;
1941}
1942
ffb4593c
NT
1943/*
1944 * S_force_word
1945 * When the lexer knows the next thing is a word (for instance, it has
1946 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1947 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1948 * lookahead.
ffb4593c
NT
1949 *
1950 * Arguments:
b1b65b59 1951 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1952 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1953 * int check_keyword : if true, Perl checks to make sure the word isn't
1954 * a keyword (do this if the word is a label, e.g. goto FOO)
1955 * int allow_pack : if true, : characters will also be allowed (require,
1956 * use, etc. do this)
9cbb5ea2 1957 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1958 */
1959
76e3520e 1960STATIC char *
cea2e8a9 1961S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1962{
97aff369 1963 dVAR;
463ee0b2
LW
1964 register char *s;
1965 STRLEN len;
4e553d73 1966
7918f24d
NC
1967 PERL_ARGS_ASSERT_FORCE_WORD;
1968
29595ff2 1969 start = SKIPSPACE1(start);
463ee0b2 1970 s = start;
7e2040f0 1971 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1972 (allow_pack && *s == ':') ||
15f0808c 1973 (allow_initial_tick && *s == '\'') )
a0d0e21e 1974 {
3280af22 1975 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1976 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1977 return start;
cd81e915 1978 start_force(PL_curforce);
5db06880
NC
1979 if (PL_madskills)
1980 curmad('X', newSVpvn(start,s-start));
463ee0b2 1981 if (token == METHOD) {
29595ff2 1982 s = SKIPSPACE1(s);
463ee0b2 1983 if (*s == '(')
3280af22 1984 PL_expect = XTERM;
463ee0b2 1985 else {
3280af22 1986 PL_expect = XOPERATOR;
463ee0b2 1987 }
79072805 1988 }
e74e6b3d 1989 if (PL_madskills)
63575281 1990 curmad('g', newSVpvs( "forced" ));
9ded7720 1991 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1992 = (OP*)newSVOP(OP_CONST,0,
1993 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1994 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1995 force_next(token);
1996 }
1997 return s;
1998}
1999
ffb4593c
NT
2000/*
2001 * S_force_ident
9cbb5ea2 2002 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2003 * text only contains the "foo" portion. The first argument is a pointer
2004 * to the "foo", and the second argument is the type symbol to prefix.
2005 * Forces the next token to be a "WORD".
9cbb5ea2 2006 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2007 */
2008
76e3520e 2009STATIC void
bfed75c6 2010S_force_ident(pTHX_ register const char *s, int kind)
79072805 2011{
97aff369 2012 dVAR;
7918f24d
NC
2013
2014 PERL_ARGS_ASSERT_FORCE_IDENT;
2015
c35e046a 2016 if (*s) {
90e5519e
NC
2017 const STRLEN len = strlen(s);
2018 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2019 start_force(PL_curforce);
9ded7720 2020 NEXTVAL_NEXTTOKE.opval = o;
79072805 2021 force_next(WORD);
748a9306 2022 if (kind) {
11343788 2023 o->op_private = OPpCONST_ENTERED;
55497cff 2024 /* XXX see note in pp_entereval() for why we forgo typo
2025 warnings if the symbol must be introduced in an eval.
2026 GSAR 96-10-12 */
90e5519e
NC
2027 gv_fetchpvn_flags(s, len,
2028 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2029 : GV_ADD,
2030 kind == '$' ? SVt_PV :
2031 kind == '@' ? SVt_PVAV :
2032 kind == '%' ? SVt_PVHV :
a0d0e21e 2033 SVt_PVGV
90e5519e 2034 );
748a9306 2035 }
79072805
LW
2036 }
2037}
2038
1571675a
GS
2039NV
2040Perl_str_to_version(pTHX_ SV *sv)
2041{
2042 NV retval = 0.0;
2043 NV nshift = 1.0;
2044 STRLEN len;
cfd0369c 2045 const char *start = SvPV_const(sv,len);
9d4ba2ae 2046 const char * const end = start + len;
504618e9 2047 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2048
2049 PERL_ARGS_ASSERT_STR_TO_VERSION;
2050
1571675a 2051 while (start < end) {
ba210ebe 2052 STRLEN skip;
1571675a
GS
2053 UV n;
2054 if (utf)
9041c2e3 2055 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2056 else {
2057 n = *(U8*)start;
2058 skip = 1;
2059 }
2060 retval += ((NV)n)/nshift;
2061 start += skip;
2062 nshift *= 1000;
2063 }
2064 return retval;
2065}
2066
4e553d73 2067/*
ffb4593c
NT
2068 * S_force_version
2069 * Forces the next token to be a version number.
e759cc13
RGS
2070 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071 * and if "guessing" is TRUE, then no new token is created (and the caller
2072 * must use an alternative parsing method).
ffb4593c
NT
2073 */
2074
76e3520e 2075STATIC char *
e759cc13 2076S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2077{
97aff369 2078 dVAR;
5f66b61c 2079 OP *version = NULL;
44dcb63b 2080 char *d;
5db06880
NC
2081#ifdef PERL_MAD
2082 I32 startoff = s - SvPVX(PL_linestr);
2083#endif
89bfa8cd 2084
7918f24d
NC
2085 PERL_ARGS_ASSERT_FORCE_VERSION;
2086
29595ff2 2087 s = SKIPSPACE1(s);
89bfa8cd 2088
44dcb63b 2089 d = s;
dd629d5b 2090 if (*d == 'v')
44dcb63b 2091 d++;
44dcb63b 2092 if (isDIGIT(*d)) {
e759cc13
RGS
2093 while (isDIGIT(*d) || *d == '_' || *d == '.')
2094 d++;
5db06880
NC
2095#ifdef PERL_MAD
2096 if (PL_madskills) {
cd81e915 2097 start_force(PL_curforce);
5db06880
NC
2098 curmad('X', newSVpvn(s,d-s));
2099 }
2100#endif
9f3d182e 2101 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 2102 SV *ver;
8d08d9ba
DG
2103#ifdef USE_LOCALE_NUMERIC
2104 char *loc = setlocale(LC_NUMERIC, "C");
2105#endif
6154021b 2106 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2107#ifdef USE_LOCALE_NUMERIC
2108 setlocale(LC_NUMERIC, loc);
2109#endif
6154021b 2110 version = pl_yylval.opval;
dd629d5b
GS
2111 ver = cSVOPx(version)->op_sv;
2112 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2113 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2114 SvNV_set(ver, str_to_version(ver));
1571675a 2115 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2116 }
89bfa8cd 2117 }
5db06880
NC
2118 else if (guessing) {
2119#ifdef PERL_MAD
2120 if (PL_madskills) {
cd81e915
NC
2121 sv_free(PL_nextwhite); /* let next token collect whitespace */
2122 PL_nextwhite = 0;
5db06880
NC
2123 s = SvPVX(PL_linestr) + startoff;
2124 }
2125#endif
e759cc13 2126 return s;
5db06880 2127 }
89bfa8cd 2128 }
2129
5db06880
NC
2130#ifdef PERL_MAD
2131 if (PL_madskills && !version) {
cd81e915
NC
2132 sv_free(PL_nextwhite); /* let next token collect whitespace */
2133 PL_nextwhite = 0;
5db06880
NC
2134 s = SvPVX(PL_linestr) + startoff;
2135 }
2136#endif
89bfa8cd 2137 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2138 start_force(PL_curforce);
9ded7720 2139 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2140 force_next(WORD);
89bfa8cd 2141
e759cc13 2142 return s;
89bfa8cd 2143}
2144
ffb4593c 2145/*
91152fc1
DG
2146 * S_force_strict_version
2147 * Forces the next token to be a version number using strict syntax rules.
2148 */
2149
2150STATIC char *
2151S_force_strict_version(pTHX_ char *s)
2152{
2153 dVAR;
2154 OP *version = NULL;
2155#ifdef PERL_MAD
2156 I32 startoff = s - SvPVX(PL_linestr);
2157#endif
2158 const char *errstr = NULL;
2159
2160 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2161
2162 while (isSPACE(*s)) /* leading whitespace */
2163 s++;
2164
2165 if (is_STRICT_VERSION(s,&errstr)) {
2166 SV *ver = newSV(0);
2167 s = (char *)scan_version(s, ver, 0);
2168 version = newSVOP(OP_CONST, 0, ver);
2169 }
2170 else if ( (*s != ';' && *s != '}' ) && (s = SKIPSPACE1(s), (*s != ';' && *s !='}' ))) {
2171 PL_bufptr = s;
2172 if (errstr)
2173 yyerror(errstr); /* version required */
2174 return s;
2175 }
2176
2177#ifdef PERL_MAD
2178 if (PL_madskills && !version) {
2179 sv_free(PL_nextwhite); /* let next token collect whitespace */
2180 PL_nextwhite = 0;
2181 s = SvPVX(PL_linestr) + startoff;
2182 }
2183#endif
2184 /* NOTE: The parser sees the package name and the VERSION swapped */
2185 start_force(PL_curforce);
2186 NEXTVAL_NEXTTOKE.opval = version;
2187 force_next(WORD);
2188
2189 return s;
2190}
2191
2192/*
ffb4593c
NT
2193 * S_tokeq
2194 * Tokenize a quoted string passed in as an SV. It finds the next
2195 * chunk, up to end of string or a backslash. It may make a new
2196 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2197 * turns \\ into \.
2198 */
2199
76e3520e 2200STATIC SV *
cea2e8a9 2201S_tokeq(pTHX_ SV *sv)
79072805 2202{
97aff369 2203 dVAR;
79072805
LW
2204 register char *s;
2205 register char *send;
2206 register char *d;
b3ac6de7
IZ
2207 STRLEN len = 0;
2208 SV *pv = sv;
79072805 2209
7918f24d
NC
2210 PERL_ARGS_ASSERT_TOKEQ;
2211
79072805 2212 if (!SvLEN(sv))
b3ac6de7 2213 goto finish;
79072805 2214
a0d0e21e 2215 s = SvPV_force(sv, len);
21a311ee 2216 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2217 goto finish;
463ee0b2 2218 send = s + len;
79072805
LW
2219 while (s < send && *s != '\\')
2220 s++;
2221 if (s == send)
b3ac6de7 2222 goto finish;
79072805 2223 d = s;
be4731d2 2224 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2225 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2226 }
79072805
LW
2227 while (s < send) {
2228 if (*s == '\\') {
a0d0e21e 2229 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2230 s++; /* all that, just for this */
2231 }
2232 *d++ = *s++;
2233 }
2234 *d = '\0';
95a20fc0 2235 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2236 finish:
3280af22 2237 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2238 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2239 return sv;
2240}
2241
ffb4593c
NT
2242/*
2243 * Now come three functions related to double-quote context,
2244 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2245 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2246 * interact with PL_lex_state, and create fake ( ... ) argument lists
2247 * to handle functions and concatenation.
2248 * They assume that whoever calls them will be setting up a fake
2249 * join call, because each subthing puts a ',' after it. This lets
2250 * "lower \luPpEr"
2251 * become
2252 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2253 *
2254 * (I'm not sure whether the spurious commas at the end of lcfirst's
2255 * arguments and join's arguments are created or not).
2256 */
2257
2258/*
2259 * S_sublex_start
6154021b 2260 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2261 *
2262 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2263 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2264 *
2265 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2266 *
2267 * Everything else becomes a FUNC.
2268 *
2269 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2270 * had an OP_CONST or OP_READLINE). This just sets us up for a
2271 * call to S_sublex_push().
2272 */
2273
76e3520e 2274STATIC I32
cea2e8a9 2275S_sublex_start(pTHX)
79072805 2276{
97aff369 2277 dVAR;
6154021b 2278 register const I32 op_type = pl_yylval.ival;
79072805
LW
2279
2280 if (op_type == OP_NULL) {
6154021b 2281 pl_yylval.opval = PL_lex_op;
5f66b61c 2282 PL_lex_op = NULL;
79072805
LW
2283 return THING;
2284 }
2285 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2286 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2287
2288 if (SvTYPE(sv) == SVt_PVIV) {
2289 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2290 STRLEN len;
96a5add6 2291 const char * const p = SvPV_const(sv, len);
740cce10 2292 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2293 SvREFCNT_dec(sv);
2294 sv = nsv;
4e553d73 2295 }
6154021b 2296 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2297 PL_lex_stuff = NULL;
6f33ba73
RGS
2298 /* Allow <FH> // "foo" */
2299 if (op_type == OP_READLINE)
2300 PL_expect = XTERMORDORDOR;
79072805
LW
2301 return THING;
2302 }
e3f73d4e
RGS
2303 else if (op_type == OP_BACKTICK && PL_lex_op) {
2304 /* readpipe() vas overriden */
2305 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2306 pl_yylval.opval = PL_lex_op;
9b201d7d 2307 PL_lex_op = NULL;
e3f73d4e
RGS
2308 PL_lex_stuff = NULL;
2309 return THING;
2310 }
79072805 2311
3280af22 2312 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2313 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2314 PL_sublex_info.sub_op = PL_lex_op;
2315 PL_lex_state = LEX_INTERPPUSH;
55497cff 2316
3280af22
NIS
2317 PL_expect = XTERM;
2318 if (PL_lex_op) {
6154021b 2319 pl_yylval.opval = PL_lex_op;
5f66b61c 2320 PL_lex_op = NULL;
55497cff 2321 return PMFUNC;
2322 }
2323 else
2324 return FUNC;
2325}
2326
ffb4593c
NT
2327/*
2328 * S_sublex_push
2329 * Create a new scope to save the lexing state. The scope will be
2330 * ended in S_sublex_done. Returns a '(', starting the function arguments
2331 * to the uc, lc, etc. found before.
2332 * Sets PL_lex_state to LEX_INTERPCONCAT.
2333 */
2334
76e3520e 2335STATIC I32
cea2e8a9 2336S_sublex_push(pTHX)
55497cff 2337{
27da23d5 2338 dVAR;
f46d017c 2339 ENTER;
55497cff 2340
3280af22 2341 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2342 SAVEBOOL(PL_lex_dojoin);
3280af22 2343 SAVEI32(PL_lex_brackets);
3280af22
NIS
2344 SAVEI32(PL_lex_casemods);
2345 SAVEI32(PL_lex_starts);
651b5b28 2346 SAVEI8(PL_lex_state);
7766f137 2347 SAVEVPTR(PL_lex_inpat);
98246f1e 2348 SAVEI16(PL_lex_inwhat);
57843af0 2349 SAVECOPLINE(PL_curcop);
3280af22 2350 SAVEPPTR(PL_bufptr);
8452ff4b 2351 SAVEPPTR(PL_bufend);
3280af22
NIS
2352 SAVEPPTR(PL_oldbufptr);
2353 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2354 SAVEPPTR(PL_last_lop);
2355 SAVEPPTR(PL_last_uni);
3280af22
NIS
2356 SAVEPPTR(PL_linestart);
2357 SAVESPTR(PL_linestr);
8edd5f42
RGS
2358 SAVEGENERICPV(PL_lex_brackstack);
2359 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2360
2361 PL_linestr = PL_lex_stuff;
a0714e2c 2362 PL_lex_stuff = NULL;
3280af22 2363
9cbb5ea2
GS
2364 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2365 = SvPVX(PL_linestr);
3280af22 2366 PL_bufend += SvCUR(PL_linestr);
bd61b366 2367 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2368 SAVEFREESV(PL_linestr);
2369
2370 PL_lex_dojoin = FALSE;
2371 PL_lex_brackets = 0;
a02a5408
JC
2372 Newx(PL_lex_brackstack, 120, char);
2373 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2374 PL_lex_casemods = 0;
2375 *PL_lex_casestack = '\0';
2376 PL_lex_starts = 0;
2377 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2378 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2379
2380 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2381 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2382 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2383 else
5f66b61c 2384 PL_lex_inpat = NULL;
79072805 2385
55497cff 2386 return '(';
79072805
LW
2387}
2388
ffb4593c
NT
2389/*
2390 * S_sublex_done
2391 * Restores lexer state after a S_sublex_push.
2392 */
2393
76e3520e 2394STATIC I32
cea2e8a9 2395S_sublex_done(pTHX)
79072805 2396{
27da23d5 2397 dVAR;
3280af22 2398 if (!PL_lex_starts++) {
396482e1 2399 SV * const sv = newSVpvs("");
9aa983d2
JH
2400 if (SvUTF8(PL_linestr))
2401 SvUTF8_on(sv);
3280af22 2402 PL_expect = XOPERATOR;
6154021b 2403 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2404 return THING;
2405 }
2406
3280af22
NIS
2407 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2408 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2409 return yylex();
79072805
LW
2410 }
2411
ffb4593c 2412 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2413 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2414 PL_linestr = PL_lex_repl;
2415 PL_lex_inpat = 0;
2416 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2417 PL_bufend += SvCUR(PL_linestr);
bd61b366 2418 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2419 SAVEFREESV(PL_linestr);
2420 PL_lex_dojoin = FALSE;
2421 PL_lex_brackets = 0;
3280af22
NIS
2422 PL_lex_casemods = 0;
2423 *PL_lex_casestack = '\0';
2424 PL_lex_starts = 0;
25da4f38 2425 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2426 PL_lex_state = LEX_INTERPNORMAL;
2427 PL_lex_starts++;
e9fa98b2
HS
2428 /* we don't clear PL_lex_repl here, so that we can check later
2429 whether this is an evalled subst; that means we rely on the
2430 logic to ensure sublex_done() is called again only via the
2431 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2432 }
e9fa98b2 2433 else {
3280af22 2434 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2435 PL_lex_repl = NULL;
e9fa98b2 2436 }
79072805 2437 return ',';
ffed7fef
LW
2438 }
2439 else {
5db06880
NC
2440#ifdef PERL_MAD
2441 if (PL_madskills) {
cd81e915
NC
2442 if (PL_thiswhite) {
2443 if (!PL_endwhite)
6b29d1f5 2444 PL_endwhite = newSVpvs("");
cd81e915
NC
2445 sv_catsv(PL_endwhite, PL_thiswhite);
2446 PL_thiswhite = 0;
2447 }
2448 if (PL_thistoken)
76f68e9b 2449 sv_setpvs(PL_thistoken,"");
5db06880 2450 else
cd81e915 2451 PL_realtokenstart = -1;
5db06880
NC
2452 }
2453#endif
f46d017c 2454 LEAVE;
3280af22
NIS
2455 PL_bufend = SvPVX(PL_linestr);
2456 PL_bufend += SvCUR(PL_linestr);
2457 PL_expect = XOPERATOR;
09bef843 2458 PL_sublex_info.sub_inwhat = 0;
79072805 2459 return ')';
ffed7fef
LW
2460 }
2461}
2462
02aa26ce
NT
2463/*
2464 scan_const
2465
2466 Extracts a pattern, double-quoted string, or transliteration. This
2467 is terrifying code.
2468
94def140 2469 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2470 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2471 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2472
94def140
TS
2473 Returns a pointer to the character scanned up to. If this is
2474 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2475 successfully parsed), will leave an OP for the substring scanned
6154021b 2476 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2477 by looking at the next characters herself.
2478
02aa26ce
NT
2479 In patterns:
2480 backslashes:
ff3f963a 2481 constants: \N{NAME} only
02aa26ce
NT
2482 case and quoting: \U \Q \E
2483 stops on @ and $, but not for $ as tail anchor
2484
2485 In transliterations:
2486 characters are VERY literal, except for - not at the start or end
94def140
TS
2487 of the string, which indicates a range. If the range is in bytes,
2488 scan_const expands the range to the full set of intermediate
2489 characters. If the range is in utf8, the hyphen is replaced with
2490 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2491
2492 In double-quoted strings:
2493 backslashes:
2494 double-quoted style: \r and \n
ff3f963a 2495 constants: \x31, etc.
94def140 2496 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2497 case and quoting: \U \Q \E
2498 stops on @ and $
2499
2500 scan_const does *not* construct ops to handle interpolated strings.
2501 It stops processing as soon as it finds an embedded $ or @ variable
2502 and leaves it to the caller to work out what's going on.
2503
94def140
TS
2504 embedded arrays (whether in pattern or not) could be:
2505 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2506
2507 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2508
2509 $ in pattern could be $foo or could be tail anchor. Assumption:
2510 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2511 followed by one of "()| \r\n\t"
02aa26ce
NT
2512
2513 \1 (backreferences) are turned into $1
2514
2515 The structure of the code is
2516 while (there's a character to process) {
94def140
TS
2517 handle transliteration ranges
2518 skip regexp comments /(?#comment)/ and codes /(?{code})/
2519 skip #-initiated comments in //x patterns
2520 check for embedded arrays
02aa26ce
NT
2521 check for embedded scalars
2522 if (backslash) {
94def140 2523 deprecate \1 in substitution replacements
02aa26ce
NT
2524 handle string-changing backslashes \l \U \Q \E, etc.
2525 switch (what was escaped) {
94def140 2526 handle \- in a transliteration (becomes a literal -)
ff3f963a 2527 if a pattern and not \N{, go treat as regular character
94def140
TS
2528 handle \132 (octal characters)
2529 handle \x15 and \x{1234} (hex characters)
ff3f963a 2530 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2531 handle \cV (control characters)
2532 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2533 } (end switch)
77a135fe 2534 continue
02aa26ce 2535 } (end if backslash)
77a135fe 2536 handle regular character
02aa26ce 2537 } (end while character to read)
4e553d73 2538
02aa26ce
NT
2539*/
2540
76e3520e 2541STATIC char *
cea2e8a9 2542S_scan_const(pTHX_ char *start)
79072805 2543{
97aff369 2544 dVAR;
3280af22 2545 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2546 SV *sv = newSV(send - start); /* sv for the constant. See
2547 note below on sizing. */
02aa26ce
NT
2548 register char *s = start; /* start of the constant */
2549 register char *d = SvPVX(sv); /* destination for copies */
2550 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2551 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2552 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2553 I32 this_utf8 = UTF; /* Is the source string assumed
2554 to be UTF8? But, this can
2555 show as true when the source
2556 isn't utf8, as for example
2557 when it is entirely composed
2558 of hex constants */
2559
2560 /* Note on sizing: The scanned constant is placed into sv, which is
2561 * initialized by newSV() assuming one byte of output for every byte of
2562 * input. This routine expects newSV() to allocate an extra byte for a
2563 * trailing NUL, which this routine will append if it gets to the end of
2564 * the input. There may be more bytes of input than output (eg., \N{LATIN
2565 * CAPITAL LETTER A}), or more output than input if the constant ends up
2566 * recoded to utf8, but each time a construct is found that might increase
2567 * the needed size, SvGROW() is called. Its size parameter each time is
2568 * based on the best guess estimate at the time, namely the length used so
2569 * far, plus the length the current construct will occupy, plus room for
2570 * the trailing NUL, plus one byte for every input byte still unscanned */
2571
012bcf8d 2572 UV uv;
4c3a8340
TS
2573#ifdef EBCDIC
2574 UV literal_endpoint = 0;
e294cc5d 2575 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2576#endif
012bcf8d 2577
7918f24d
NC
2578 PERL_ARGS_ASSERT_SCAN_CONST;
2579
2b9d42f0
NIS
2580 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2581 /* If we are doing a trans and we know we want UTF8 set expectation */
2582 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2583 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2584 }
2585
2586
79072805 2587 while (s < send || dorange) {
ff3f963a 2588
02aa26ce 2589 /* get transliterations out of the way (they're most literal) */
3280af22 2590 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2591 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2592 if (dorange) {
1ba5c669
JH
2593 I32 i; /* current expanded character */
2594 I32 min; /* first character in range */
2595 I32 max; /* last character in range */
02aa26ce 2596
e294cc5d
JH
2597#ifdef EBCDIC
2598 UV uvmax = 0;
2599#endif
2600
2601 if (has_utf8
2602#ifdef EBCDIC
2603 && !native_range
2604#endif
2605 ) {
9d4ba2ae 2606 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2607 char *e = d++;
2608 while (e-- > c)
2609 *(e + 1) = *e;
25716404 2610 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2611 /* mark the range as done, and continue */
2612 dorange = FALSE;
2613 didrange = TRUE;
2614 continue;
2615 }
2b9d42f0 2616
95a20fc0 2617 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2618#ifdef EBCDIC
2619 SvGROW(sv,
2620 SvLEN(sv) + (has_utf8 ?
2621 (512 - UTF_CONTINUATION_MARK +
2622 UNISKIP(0x100))
2623 : 256));
2624 /* How many two-byte within 0..255: 128 in UTF-8,
2625 * 96 in UTF-8-mod. */
2626#else
9cbb5ea2 2627 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2628#endif
9cbb5ea2 2629 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2630#ifdef EBCDIC
2631 if (has_utf8) {
2632 int j;
2633 for (j = 0; j <= 1; j++) {
2634 char * const c = (char*)utf8_hop((U8*)d, -1);
2635 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2636 if (j)
2637 min = (U8)uv;
2638 else if (uv < 256)
2639 max = (U8)uv;
2640 else {
2641 max = (U8)0xff; /* only to \xff */
2642 uvmax = uv; /* \x{100} to uvmax */
2643 }
2644 d = c; /* eat endpoint chars */
2645 }
2646 }
2647 else {
2648#endif
2649 d -= 2; /* eat the first char and the - */
2650 min = (U8)*d; /* first char in range */
2651 max = (U8)d[1]; /* last char in range */
2652#ifdef EBCDIC
2653 }
2654#endif
8ada0baa 2655
c2e66d9e 2656 if (min > max) {
01ec43d0 2657 Perl_croak(aTHX_
d1573ac7 2658 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2659 (char)min, (char)max);
c2e66d9e
GS
2660 }
2661
c7f1f016 2662#ifdef EBCDIC
4c3a8340
TS
2663 if (literal_endpoint == 2 &&
2664 ((isLOWER(min) && isLOWER(max)) ||
2665 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2666 if (isLOWER(min)) {
2667 for (i = min; i <= max; i++)
2668 if (isLOWER(i))
db42d148 2669 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2670 } else {
2671 for (i = min; i <= max; i++)
2672 if (isUPPER(i))
db42d148 2673 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2674 }
2675 }
2676 else
2677#endif
2678 for (i = min; i <= max; i++)
e294cc5d
JH
2679#ifdef EBCDIC
2680 if (has_utf8) {
2681 const U8 ch = (U8)NATIVE_TO_UTF(i);
2682 if (UNI_IS_INVARIANT(ch))
2683 *d++ = (U8)i;
2684 else {
2685 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2686 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2687 }
2688 }
2689 else
2690#endif
2691 *d++ = (char)i;
2692
2693#ifdef EBCDIC
2694 if (uvmax) {
2695 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2696 if (uvmax > 0x101)
2697 *d++ = (char)UTF_TO_NATIVE(0xff);
2698 if (uvmax > 0x100)
2699 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2700 }
2701#endif
02aa26ce
NT
2702
2703 /* mark the range as done, and continue */
79072805 2704 dorange = FALSE;
01ec43d0 2705 didrange = TRUE;
4c3a8340
TS
2706#ifdef EBCDIC
2707 literal_endpoint = 0;
2708#endif
79072805 2709 continue;
4e553d73 2710 }
02aa26ce
NT
2711
2712 /* range begins (ignore - as first or last char) */
79072805 2713 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2714 if (didrange) {
1fafa243 2715 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2716 }
e294cc5d
JH
2717 if (has_utf8
2718#ifdef EBCDIC
2719 && !native_range
2720#endif
2721 ) {
25716404 2722 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2723 s++;
2724 continue;
2725 }
79072805
LW
2726 dorange = TRUE;
2727 s++;
01ec43d0
GS
2728 }
2729 else {
2730 didrange = FALSE;
4c3a8340
TS
2731#ifdef EBCDIC
2732 literal_endpoint = 0;
e294cc5d 2733 native_range = TRUE;
4c3a8340 2734#endif
01ec43d0 2735 }
79072805 2736 }
02aa26ce
NT
2737
2738 /* if we get here, we're not doing a transliteration */
2739
0f5d15d6
IZ
2740 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2741 except for the last char, which will be done separately. */
3280af22 2742 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2743 if (s[2] == '#') {
e994fd66 2744 while (s+1 < send && *s != ')')
db42d148 2745 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2746 }
2747 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2748 || (s[2] == '?' && s[3] == '{'))
155aba94 2749 {
cc6b7395 2750 I32 count = 1;
0f5d15d6 2751 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2752 char c;
2753
d9f97599
GS
2754 while (count && (c = *regparse)) {
2755 if (c == '\\' && regparse[1])
2756 regparse++;
4e553d73 2757 else if (c == '{')
cc6b7395 2758 count++;
4e553d73 2759 else if (c == '}')
cc6b7395 2760 count--;
d9f97599 2761 regparse++;
cc6b7395 2762 }
e994fd66 2763 if (*regparse != ')')
5bdf89e7 2764 regparse--; /* Leave one char for continuation. */
0f5d15d6 2765 while (s < regparse)
db42d148 2766 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2767 }
748a9306 2768 }
02aa26ce
NT
2769
2770 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2771 else if (*s == '#' && PL_lex_inpat &&
2772 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2773 while (s+1 < send && *s != '\n')
db42d148 2774 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2775 }
02aa26ce 2776
5d1d4326 2777 /* check for embedded arrays
da6eedaa 2778 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2779 */
1749ea0d
TS
2780 else if (*s == '@' && s[1]) {
2781 if (isALNUM_lazy_if(s+1,UTF))
2782 break;
2783 if (strchr(":'{$", s[1]))
2784 break;
2785 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2786 break; /* in regexp, neither @+ nor @- are interpolated */
2787 }
02aa26ce
NT
2788
2789 /* check for embedded scalars. only stop if we're sure it's a
2790 variable.
2791 */
79072805 2792 else if (*s == '$') {
3280af22 2793 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2794 break;
77772344 2795 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2796 if (s[1] == '\\') {
2797 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2798 "Possible unintended interpolation of $\\ in regex");
77772344 2799 }
79072805 2800 break; /* in regexp, $ might be tail anchor */
77772344 2801 }
79072805 2802 }
02aa26ce 2803
2b9d42f0
NIS
2804 /* End of else if chain - OP_TRANS rejoin rest */
2805
02aa26ce 2806 /* backslashes */
79072805 2807 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2808 char* e; /* Can be used for ending '}', etc. */
2809
79072805 2810 s++;
02aa26ce 2811
02aa26ce 2812 /* deprecate \1 in strings and substitution replacements */
3280af22 2813 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2814 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2815 {
a2a5de95 2816 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2817 *--s = '$';
2818 break;
2819 }
02aa26ce
NT
2820
2821 /* string-change backslash escapes */
3280af22 2822 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2823 --s;
2824 break;
2825 }
ff3f963a
KW
2826 /* In a pattern, process \N, but skip any other backslash escapes.
2827 * This is because we don't want to translate an escape sequence
2828 * into a meta symbol and have the regex compiler use the meta
2829 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2830 * in spite of this, we do have to process \N here while the proper
2831 * charnames handler is in scope. See bugs #56444 and #62056.
2832 * There is a complication because \N in a pattern may also stand
2833 * for 'match a non-nl', and not mean a charname, in which case its
2834 * processing should be deferred to the regex compiler. To be a
2835 * charname it must be followed immediately by a '{', and not look
2836 * like \N followed by a curly quantifier, i.e., not something like
2837 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2838 * quantifier */
2839 else if (PL_lex_inpat
2840 && (*s != 'N'
2841 || s[1] != '{'
2842 || regcurly(s + 1)))
2843 {
cc74c5bd
TS
2844 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2845 goto default_action;
2846 }
02aa26ce 2847
79072805 2848 switch (*s) {
02aa26ce
NT
2849
2850 /* quoted - in transliterations */
79072805 2851 case '-':
3280af22 2852 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2853 *d++ = *s++;
2854 continue;
2855 }
2856 /* FALL THROUGH */
2857 default:
11b8faa4 2858 {
a2a5de95
NC
2859 if ((isALPHA(*s) || isDIGIT(*s)))
2860 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2861 "Unrecognized escape \\%c passed through",
2862 *s);
11b8faa4 2863 /* default action is to copy the quoted character */
f9a63242 2864 goto default_action;
11b8faa4 2865 }
02aa26ce 2866
77a135fe 2867 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2868 case '0': case '1': case '2': case '3':
2869 case '4': case '5': case '6': case '7':
ba210ebe 2870 {
53305cf1
NC
2871 I32 flags = 0;
2872 STRLEN len = 3;
77a135fe 2873 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2874 s += len;
2875 }
012bcf8d 2876 goto NUM_ESCAPE_INSERT;
02aa26ce 2877
77a135fe 2878 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2879 case 'x':
a0ed51b3
LW
2880 ++s;
2881 if (*s == '{') {
9d4ba2ae 2882 char* const e = strchr(s, '}');
a4c04bdc
NC
2883 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2884 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2885 STRLEN len;
355860ce 2886
53305cf1 2887 ++s;
adaeee49 2888 if (!e) {
a0ed51b3 2889 yyerror("Missing right brace on \\x{}");
355860ce 2890 continue;
ba210ebe 2891 }
53305cf1 2892 len = e - s;
77a135fe 2893 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2894 s = e + 1;
a0ed51b3
LW
2895 }
2896 else {
ba210ebe 2897 {
53305cf1 2898 STRLEN len = 2;
a4c04bdc 2899 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2900 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2901 s += len;
2902 }
012bcf8d
GS
2903 }
2904
2905 NUM_ESCAPE_INSERT:
ff3f963a
KW
2906 /* Insert oct or hex escaped character. There will always be
2907 * enough room in sv since such escapes will be longer than any
2908 * UTF-8 sequence they can end up as, except if they force us
2909 * to recode the rest of the string into utf8 */
ba7cea30 2910
77a135fe 2911 /* Here uv is the ordinal of the next character being added in
ff3f963a 2912 * unicode (converted from native). */
77a135fe 2913 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2914 if (!has_utf8 && uv > 255) {
77a135fe
KW
2915 /* Might need to recode whatever we have accumulated so
2916 * far if it contains any chars variant in utf8 or
2917 * utf-ebcdic. */
2918
2919 SvCUR_set(sv, d - SvPVX_const(sv));
2920 SvPOK_on(sv);
2921 *d = '\0';
77a135fe 2922 /* See Note on sizing above. */
7bf79863
KW
2923 sv_utf8_upgrade_flags_grow(sv,
2924 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2925 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2926 d = SvPVX(sv) + SvCUR(sv);
2927 has_utf8 = TRUE;
012bcf8d
GS
2928 }
2929
77a135fe
KW
2930 if (has_utf8) {
2931 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2932 if (PL_lex_inwhat == OP_TRANS &&
2933 PL_sublex_info.sub_op) {
2934 PL_sublex_info.sub_op->op_private |=
2935 (PL_lex_repl ? OPpTRANS_FROM_UTF
2936 : OPpTRANS_TO_UTF);
f9a63242 2937 }
e294cc5d
JH
2938#ifdef EBCDIC
2939 if (uv > 255 && !dorange)
2940 native_range = FALSE;
2941#endif
012bcf8d 2942 }
a0ed51b3 2943 else {
012bcf8d 2944 *d++ = (char)uv;
a0ed51b3 2945 }
012bcf8d
GS
2946 }
2947 else {
c4d5f83a 2948 *d++ = (char) uv;
a0ed51b3 2949 }
79072805 2950 continue;
02aa26ce 2951
4a2d328f 2952 case 'N':
ff3f963a
KW
2953 /* In a non-pattern \N must be a named character, like \N{LATIN
2954 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2955 * mean to match a non-newline. For non-patterns, named
2956 * characters are converted to their string equivalents. In
2957 * patterns, named characters are not converted to their
2958 * ultimate forms for the same reasons that other escapes
2959 * aren't. Instead, they are converted to the \N{U+...} form
2960 * to get the value from the charnames that is in effect right
2961 * now, while preserving the fact that it was a named character
2962 * so that the regex compiler knows this */
2963
2964 /* This section of code doesn't generally use the
2965 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2966 * a close examination of this macro and determined it is a
2967 * no-op except on utfebcdic variant characters. Every
2968 * character generated by this that would normally need to be
2969 * enclosed by this macro is invariant, so the macro is not
2970 * needed, and would complicate use of copy(). There are other
2971 * parts of this file where the macro is used inconsistently,
2972 * but are saved by it being a no-op */
2973
2974 /* The structure of this section of code (besides checking for
2975 * errors and upgrading to utf8) is:
2976 * Further disambiguate between the two meanings of \N, and if
2977 * not a charname, go process it elsewhere
0a96133f
KW
2978 * If of form \N{U+...}, pass it through if a pattern;
2979 * otherwise convert to utf8
2980 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2981 * pattern; otherwise convert to utf8 */
ff3f963a
KW
2982
2983 /* Here, s points to the 'N'; the test below is guaranteed to
2984 * succeed if we are being called on a pattern as we already
2985 * know from a test above that the next character is a '{'.
2986 * On a non-pattern \N must mean 'named sequence, which
2987 * requires braces */
2988 s++;
2989 if (*s != '{') {
2990 yyerror("Missing braces on \\N{}");
2991 continue;
2992 }
2993 s++;
2994
0a96133f 2995 /* If there is no matching '}', it is an error. */
ff3f963a
KW
2996 if (! (e = strchr(s, '}'))) {
2997 if (! PL_lex_inpat) {
5777a3f7 2998 yyerror("Missing right brace on \\N{}");
0a96133f
KW
2999 } else {
3000 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3001 }
0a96133f 3002 continue;
ff3f963a 3003 }
cddc7ef4 3004
ff3f963a 3005 /* Here it looks like a named character */
cddc7ef4 3006
ff3f963a
KW
3007 if (PL_lex_inpat) {
3008
3009 /* XXX This block is temporary code. \N{} implies that the
3010 * pattern is to have Unicode semantics, and therefore
3011 * currently has to be encoded in utf8. By putting it in
3012 * utf8 now, we save a whole pass in the regular expression
3013 * compiler. Once that code is changed so Unicode
3014 * semantics doesn't necessarily have to be in utf8, this
3015 * block should be removed */
3016 if (!has_utf8) {
77a135fe 3017 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3018 SvPOK_on(sv);
e4f3eed8 3019 *d = '\0';
77a135fe 3020 /* See Note on sizing above. */
7bf79863 3021 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3022 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3023 /* 5 = '\N{' + cur char + NUL */
3024 (STRLEN)(send - s) + 5);
f08d6ad9 3025 d = SvPVX(sv) + SvCUR(sv);
89491803 3026 has_utf8 = TRUE;
ff3f963a
KW
3027 }
3028 }
423cee85 3029
ff3f963a
KW
3030 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3031 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3032 | PERL_SCAN_DISALLOW_PREFIX;
3033 STRLEN len;
3034
3035 /* For \N{U+...}, the '...' is a unicode value even on
3036 * EBCDIC machines */
3037 s += 2; /* Skip to next char after the 'U+' */
3038 len = e - s;
3039 uv = grok_hex(s, &len, &flags, NULL);
3040 if (len == 0 || len != (STRLEN)(e - s)) {
3041 yyerror("Invalid hexadecimal number in \\N{U+...}");
3042 s = e + 1;
3043 continue;
3044 }
3045
3046 if (PL_lex_inpat) {
3047
3048 /* Pass through to the regex compiler unchanged. The
3049 * reason we evaluated the number above is to make sure
0a96133f 3050 * there wasn't a syntax error. */
ff3f963a
KW
3051 s -= 5; /* Include the '\N{U+' */
3052 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3053 d += e - s + 1;
3054 }
3055 else { /* Not a pattern: convert the hex to string */
3056
3057 /* If destination is not in utf8, unconditionally
3058 * recode it to be so. This is because \N{} implies
3059 * Unicode semantics, and scalars have to be in utf8
3060 * to guarantee those semantics */
3061 if (! has_utf8) {
3062 SvCUR_set(sv, d - SvPVX_const(sv));
3063 SvPOK_on(sv);
3064 *d = '\0';
3065 /* See Note on sizing above. */
3066 sv_utf8_upgrade_flags_grow(
3067 sv,
3068 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3069 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3070 d = SvPVX(sv) + SvCUR(sv);
3071 has_utf8 = TRUE;
3072 }
3073
3074 /* Add the string to the output */
3075 if (UNI_IS_INVARIANT(uv)) {
3076 *d++ = (char) uv;
3077 }
3078 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3079 }
3080 }
3081 else { /* Here is \N{NAME} but not \N{U+...}. */
3082
3083 SV *res; /* result from charnames */
3084 const char *str; /* the string in 'res' */
3085 STRLEN len; /* its length */
3086
3087 /* Get the value for NAME */
3088 res = newSVpvn(s, e - s);
3089 res = new_constant( NULL, 0, "charnames",
3090 /* includes all of: \N{...} */
3091 res, NULL, s - 3, e - s + 4 );
3092
3093 /* Most likely res will be in utf8 already since the
3094 * standard charnames uses pack U, but a custom translator
3095 * can leave it otherwise, so make sure. XXX This can be
3096 * revisited to not have charnames use utf8 for characters
3097 * that don't need it when regexes don't have to be in utf8
3098 * for Unicode semantics. If doing so, remember EBCDIC */
3099 sv_utf8_upgrade(res);
3100 str = SvPV_const(res, len);
3101
3102 /* Don't accept malformed input */
3103 if (! is_utf8_string((U8 *) str, len)) {
3104 yyerror("Malformed UTF-8 returned by \\N");
3105 }
3106 else if (PL_lex_inpat) {
3107
3108 if (! len) { /* The name resolved to an empty string */
3109 Copy("\\N{}", d, 4, char);
3110 d += 4;
3111 }
3112 else {
3113 /* In order to not lose information for the regex
3114 * compiler, pass the result in the specially made
3115 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3116 * the code points in hex of each character
3117 * returned by charnames */
3118
3119 const char *str_end = str + len;
3120 STRLEN char_length; /* cur char's byte length */
3121 STRLEN output_length; /* and the number of bytes
3122 after this is translated
3123 into hex digits */
3124 const STRLEN off = d - SvPVX_const(sv);
3125
3126 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3127 * max('U+', '.'); and 1 for NUL */
3128 char hex_string[2 * UTF8_MAXBYTES + 5];
3129
3130 /* Get the first character of the result. */
3131 U32 uv = utf8n_to_uvuni((U8 *) str,
3132 len,
3133 &char_length,
3134 UTF8_ALLOW_ANYUV);
3135
3136 /* The call to is_utf8_string() above hopefully
3137 * guarantees that there won't be an error. But
3138 * it's easy here to make sure. The function just
3139 * above warns and returns 0 if invalid utf8, but
3140 * it can also return 0 if the input is validly a
3141 * NUL. Disambiguate */
3142 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3143 uv = UNICODE_REPLACEMENT;
3144 }
3145
3146 /* Convert first code point to hex, including the
3147 * boiler plate before it */
3148 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3149 output_length = strlen(hex_string);
3150
3151 /* Make sure there is enough space to hold it */
3152 d = off + SvGROW(sv, off
3153 + output_length
3154 + (STRLEN)(send - e)
3155 + 2); /* '}' + NUL */
3156 /* And output it */
3157 Copy(hex_string, d, output_length, char);
3158 d += output_length;
3159
3160 /* For each subsequent character, append dot and
3161 * its ordinal in hex */
3162 while ((str += char_length) < str_end) {
3163 const STRLEN off = d - SvPVX_const(sv);
3164 U32 uv = utf8n_to_uvuni((U8 *) str,
3165 str_end - str,
3166 &char_length,
3167 UTF8_ALLOW_ANYUV);
3168 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3169 uv = UNICODE_REPLACEMENT;
3170 }
3171
3172 sprintf(hex_string, ".%X", (unsigned int) uv);
3173 output_length = strlen(hex_string);
3174
3175 d = off + SvGROW(sv, off
3176 + output_length
3177 + (STRLEN)(send - e)
3178 + 2); /* '}' + NUL */
3179 Copy(hex_string, d, output_length, char);
3180 d += output_length;
3181 }
3182
3183 *d++ = '}'; /* Done. Add the trailing brace */
3184 }
3185 }
3186 else { /* Here, not in a pattern. Convert the name to a
3187 * string. */
3188
3189 /* If destination is not in utf8, unconditionally
3190 * recode it to be so. This is because \N{} implies
3191 * Unicode semantics, and scalars have to be in utf8
3192 * to guarantee those semantics */
3193 if (! has_utf8) {
3194 SvCUR_set(sv, d - SvPVX_const(sv));
3195 SvPOK_on(sv);
3196 *d = '\0';
3197 /* See Note on sizing above. */
3198 sv_utf8_upgrade_flags_grow(sv,
3199 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3200 len + (STRLEN)(send - s) + 1);
3201 d = SvPVX(sv) + SvCUR(sv);
3202 has_utf8 = TRUE;
3203 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3204
3205 /* See Note on sizing above. (NOTE: SvCUR() is not
3206 * set correctly here). */
3207 const STRLEN off = d - SvPVX_const(sv);
3208 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3209 }
3210 Copy(str, d, len, char);
3211 d += len;
423cee85 3212 }
423cee85 3213 SvREFCNT_dec(res);
cb233ae3
KW
3214
3215 /* Deprecate non-approved name syntax */
3216 if (ckWARN_d(WARN_DEPRECATED)) {
3217 bool problematic = FALSE;
3218 char* i = s;
3219
3220 /* For non-ut8 input, look to see that the first
3221 * character is an alpha, then loop through the rest
3222 * checking that each is a continuation */
3223 if (! this_utf8) {
3224 if (! isALPHAU(*i)) problematic = TRUE;
3225 else for (i = s + 1; i < e; i++) {
3226 if (isCHARNAME_CONT(*i)) continue;
3227 problematic = TRUE;
3228 break;
3229 }
3230 }
3231 else {
3232 /* Similarly for utf8. For invariants can check
3233 * directly. We accept anything above the latin1
3234 * range because it is immaterial to Perl if it is
3235 * correct or not, and is expensive to check. But
3236 * it is fairly easy in the latin1 range to convert
3237 * the variants into a single character and check
3238 * those */
3239 if (UTF8_IS_INVARIANT(*i)) {
3240 if (! isALPHAU(*i)) problematic = TRUE;
3241 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3242 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3243 *(i+1)))))
3244 {
3245 problematic = TRUE;
3246 }
3247 }
3248 if (! problematic) for (i = s + UTF8SKIP(s);
3249 i < e;
3250 i+= UTF8SKIP(i))
3251 {
3252 if (UTF8_IS_INVARIANT(*i)) {
3253 if (isCHARNAME_CONT(*i)) continue;
3254 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3255 continue;
3256 } else if (isCHARNAME_CONT(
3257 UNI_TO_NATIVE(
3258 UTF8_ACCUMULATE(*i, *(i+1)))))
3259 {
3260 continue;
3261 }
3262 problematic = TRUE;
3263 break;
3264 }
3265 }
3266 if (problematic) {
6e1bad6c
KW
3267 /* The e-i passed to the final %.*s makes sure that
3268 * should the trailing NUL be missing that this
3269 * print won't run off the end of the string */
cb233ae3 3270 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
6e1bad6c 3271 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s", i - s + 1, s, e - i, i + 1);
cb233ae3
KW
3272 }
3273 }
3274 } /* End \N{NAME} */
ff3f963a
KW
3275#ifdef EBCDIC
3276 if (!dorange)
3277 native_range = FALSE; /* \N{} is defined to be Unicode */
3278#endif
3279 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3280 continue;
3281
02aa26ce 3282 /* \c is a control character */
79072805
LW
3283 case 'c':
3284 s++;
961ce445 3285 if (s < send) {
f9d13529 3286 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3287 }
961ce445
RGS
3288 else {
3289 yyerror("Missing control char name in \\c");
3290 }
79072805 3291 continue;
02aa26ce
NT
3292
3293 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3294 case 'b':
db42d148 3295 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3296 break;
3297 case 'n':
db42d148 3298 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3299 break;
3300 case 'r':
db42d148 3301 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3302 break;
3303 case 'f':
db42d148 3304 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3305 break;
3306 case 't':
db42d148 3307 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3308 break;
34a3fe2a 3309 case 'e':
db42d148 3310 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3311 break;
3312 case 'a':
db42d148 3313 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3314 break;
02aa26ce
NT
3315 } /* end switch */
3316
79072805
LW
3317 s++;
3318 continue;
02aa26ce 3319 } /* end if (backslash) */
4c3a8340
TS
3320#ifdef EBCDIC
3321 else
3322 literal_endpoint++;
3323#endif
02aa26ce 3324
f9a63242 3325 default_action:
77a135fe
KW
3326 /* If we started with encoded form, or already know we want it,
3327 then encode the next character */
3328 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3329 STRLEN len = 1;
77a135fe
KW
3330
3331
3332 /* One might think that it is wasted effort in the case of the
3333 * source being utf8 (this_utf8 == TRUE) to take the next character
3334 * in the source, convert it to an unsigned value, and then convert
3335 * it back again. But the source has not been validated here. The
3336 * routine that does the conversion checks for errors like
3337 * malformed utf8 */
3338
5f66b61c
AL
3339 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3340 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3341 if (!has_utf8) {
3342 SvCUR_set(sv, d - SvPVX_const(sv));
3343 SvPOK_on(sv);
3344 *d = '\0';
77a135fe 3345 /* See Note on sizing above. */
7bf79863
KW
3346 sv_utf8_upgrade_flags_grow(sv,
3347 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3348 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3349 d = SvPVX(sv) + SvCUR(sv);
3350 has_utf8 = TRUE;
3351 } else if (need > len) {
3352 /* encoded value larger than old, may need extra space (NOTE:
3353 * SvCUR() is not set correctly here). See Note on sizing
3354 * above. */
9d4ba2ae 3355 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3356 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3357 }
77a135fe
KW
3358 s += len;
3359
5f66b61c 3360 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3361#ifdef EBCDIC
3362 if (uv > 255 && !dorange)
3363 native_range = FALSE;
3364#endif
2b9d42f0
NIS
3365 }
3366 else {
3367 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3368 }
02aa26ce
NT
3369 } /* while loop to process each character */
3370
3371 /* terminate the string and set up the sv */
79072805 3372 *d = '\0';
95a20fc0 3373 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3374 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3375 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3376
79072805 3377 SvPOK_on(sv);
9f4817db 3378 if (PL_encoding && !has_utf8) {
d0063567
DK
3379 sv_recode_to_utf8(sv, PL_encoding);
3380 if (SvUTF8(sv))
3381 has_utf8 = TRUE;
9f4817db 3382 }
2b9d42f0 3383 if (has_utf8) {
7e2040f0 3384 SvUTF8_on(sv);
2b9d42f0 3385 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3386 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3387 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3388 }
3389 }
79072805 3390
02aa26ce 3391 /* shrink the sv if we allocated more than we used */
79072805 3392 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3393 SvPV_shrink_to_cur(sv);
79072805 3394 }
02aa26ce 3395
6154021b 3396 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3397 if (s > PL_bufptr) {
eb0d8d16
NC
3398 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3399 const char *const key = PL_lex_inpat ? "qr" : "q";
3400 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3401 const char *type;
3402 STRLEN typelen;
3403
3404 if (PL_lex_inwhat == OP_TRANS) {
3405 type = "tr";
3406 typelen = 2;
3407 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3408 type = "s";
3409 typelen = 1;
3410 } else {
3411 type = "qq";
3412 typelen = 2;
3413 }
3414
3415 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3416 type, typelen);
3417 }
6154021b 3418 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3419 } else
8990e307 3420 SvREFCNT_dec(sv);
79072805
LW
3421 return s;
3422}
3423
ffb4593c
NT
3424/* S_intuit_more
3425 * Returns TRUE if there's more to the expression (e.g., a subscript),
3426 * FALSE otherwise.
ffb4593c
NT
3427 *
3428 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3429 *
3430 * ->[ and ->{ return TRUE
3431 * { and [ outside a pattern are always subscripts, so return TRUE
3432 * if we're outside a pattern and it's not { or [, then return FALSE
3433 * if we're in a pattern and the first char is a {
3434 * {4,5} (any digits around the comma) returns FALSE
3435 * if we're in a pattern and the first char is a [
3436 * [] returns FALSE
3437 * [SOMETHING] has a funky algorithm to decide whether it's a
3438 * character class or not. It has to deal with things like
3439 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3440 * anything else returns TRUE
3441 */
3442
9cbb5ea2
GS
3443/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3444
76e3520e 3445STATIC int
cea2e8a9 3446S_intuit_more(pTHX_ register char *s)
79072805 3447{
97aff369 3448 dVAR;
7918f24d
NC
3449
3450 PERL_ARGS_ASSERT_INTUIT_MORE;
3451
3280af22 3452 if (PL_lex_brackets)
79072805
LW
3453 return TRUE;
3454 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3455 return TRUE;
3456 if (*s != '{' && *s != '[')
3457 return FALSE;
3280af22 3458 if (!PL_lex_inpat)
79072805
LW
3459 return TRUE;
3460
3461 /* In a pattern, so maybe we have {n,m}. */
3462 if (*s == '{') {
3463 s++;
3464 if (!isDIGIT(*s))
3465 return TRUE;
3466 while (isDIGIT(*s))
3467 s++;
3468 if (*s == ',')
3469 s++;
3470 while (isDIGIT(*s))
3471 s++;
3472 if (*s == '}')
3473 return FALSE;
3474 return TRUE;
3475
3476 }
3477
3478 /* On the other hand, maybe we have a character class */
3479
3480 s++;
3481 if (*s == ']' || *s == '^')
3482 return FALSE;
3483 else {
ffb4593c 3484 /* this is terrifying, and it works */
79072805
LW
3485 int weight = 2; /* let's weigh the evidence */
3486 char seen[256];
f27ffc4a 3487 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3488 const char * const send = strchr(s,']');
3280af22 3489 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3490
3491 if (!send) /* has to be an expression */
3492 return TRUE;
3493
3494 Zero(seen,256,char);
3495 if (*s == '$')
3496 weight -= 3;
3497 else if (isDIGIT(*s)) {
3498 if (s[1] != ']') {
3499 if (isDIGIT(s[1]) && s[2] == ']')
3500 weight -= 10;
3501 }
3502 else
3503 weight -= 100;
3504 }
3505 for (; s < send; s++) {
3506 last_un_char = un_char;
3507 un_char = (unsigned char)*s;
3508 switch (*s) {
3509 case '@':
3510 case '&':
3511 case '$':
3512 weight -= seen[un_char] * 10;
7e2040f0 3513 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3514 int len;
8903cb82 3515 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3516 len = (int)strlen(tmpbuf);
3517 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3518 weight -= 100;
3519 else
3520 weight -= 10;
3521 }
3522 else if (*s == '$' && s[1] &&
93a17b20
LW
3523 strchr("[#!%*<>()-=",s[1])) {
3524 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3525 weight -= 10;
3526 else
3527 weight -= 1;
3528 }
3529 break;
3530 case '\\':
3531 un_char = 254;
3532 if (s[1]) {
93a17b20 3533 if (strchr("wds]",s[1]))
79072805 3534 weight += 100;
10edeb5d 3535 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3536 weight += 1;
93a17b20 3537 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3538 weight += 40;
3539 else if (isDIGIT(s[1])) {
3540 weight += 40;
3541 while (s[1] && isDIGIT(s[1]))
3542 s++;
3543 }
3544 }
3545 else
3546 weight += 100;
3547 break;
3548 case '-':
3549 if (s[1] == '\\')
3550 weight += 50;
93a17b20 3551 if (strchr("aA01! ",last_un_char))
79072805 3552 weight += 30;
93a17b20 3553 if (strchr("zZ79~",s[1]))
79072805 3554 weight += 30;
f27ffc4a
GS
3555 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3556 weight -= 5; /* cope with negative subscript */
79072805
LW
3557 break;
3558 default:
3792a11b
NC
3559 if (!isALNUM(last_un_char)
3560 && !(last_un_char == '$' || last_un_char == '@'
3561 || last_un_char == '&')
3562 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3563 char *d = tmpbuf;
3564 while (isALPHA(*s))
3565 *d++ = *s++;
3566 *d = '\0';
5458a98a 3567 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3568 weight -= 150;
3569 }
3570 if (un_char == last_un_char + 1)
3571 weight += 5;
3572 weight -= seen[un_char];
3573 break;
3574 }
3575 seen[un_char]++;
3576 }
3577 if (weight >= 0) /* probably a character class */
3578 return FALSE;
3579 }
3580
3581 return TRUE;
3582}
ffed7fef 3583
ffb4593c
NT
3584/*
3585 * S_intuit_method
3586 *
3587 * Does all the checking to disambiguate
3588 * foo bar
3589 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3590 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3591 *
3592 * First argument is the stuff after the first token, e.g. "bar".
3593 *
3594 * Not a method if bar is a filehandle.
3595 * Not a method if foo is a subroutine prototyped to take a filehandle.
3596 * Not a method if it's really "Foo $bar"
3597 * Method if it's "foo $bar"
3598 * Not a method if it's really "print foo $bar"
3599 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3600 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3601 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3602 * =>
3603 */
3604
76e3520e 3605STATIC int
62d55b22 3606S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3607{
97aff369 3608 dVAR;
a0d0e21e 3609 char *s = start + (*start == '$');
3280af22 3610 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3611 STRLEN len;
3612 GV* indirgv;
5db06880
NC
3613#ifdef PERL_MAD
3614 int soff;
3615#endif
a0d0e21e 3616
7918f24d
NC
3617 PERL_ARGS_ASSERT_INTUIT_METHOD;
3618
a0d0e21e 3619 if (gv) {
62d55b22 3620 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3621 return 0;
62d55b22
NC
3622 if (cv) {
3623 if (SvPOK(cv)) {
3624 const char *proto = SvPVX_const(cv);
3625 if (proto) {
3626 if (*proto == ';')
3627 proto++;
3628 if (*proto == '*')
3629 return 0;
3630 }
b6c543e3
IZ
3631 }
3632 } else
c35e046a 3633 gv = NULL;
a0d0e21e 3634 }
8903cb82 3635 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3636 /* start is the beginning of the possible filehandle/object,
3637 * and s is the end of it
3638 * tmpbuf is a copy of it
3639 */
3640
a0d0e21e 3641 if (*start == '$') {
3ef1310e
RGS
3642 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3643 isUPPER(*PL_tokenbuf))
a0d0e21e 3644 return 0;
5db06880
NC
3645#ifdef PERL_MAD
3646 len = start - SvPVX(PL_linestr);
3647#endif
29595ff2 3648 s = PEEKSPACE(s);
f0092767 3649#ifdef PERL_MAD
5db06880
NC
3650 start = SvPVX(PL_linestr) + len;
3651#endif
3280af22
NIS
3652 PL_bufptr = start;
3653 PL_expect = XREF;
a0d0e21e
LW
3654 return *s == '(' ? FUNCMETH : METHOD;
3655 }
5458a98a 3656 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3657 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3658 len -= 2;
3659 tmpbuf[len] = '\0';
5db06880
NC
3660#ifdef PERL_MAD
3661 soff = s - SvPVX(PL_linestr);
3662#endif
c3e0f903
GS
3663 goto bare_package;
3664 }
90e5519e 3665 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3666 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3667 return 0;
3668 /* filehandle or package name makes it a method */
da51bb9b 3669 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3670#ifdef PERL_MAD
3671 soff = s - SvPVX(PL_linestr);
3672#endif
29595ff2 3673 s = PEEKSPACE(s);
3280af22 3674 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3675 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3676 bare_package:
cd81e915 3677 start_force(PL_curforce);
9ded7720 3678 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3679 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3680 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3681 if (PL_madskills)
3682 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3683 PL_expect = XTERM;
a0d0e21e 3684 force_next(WORD);
3280af22 3685 PL_bufptr = s;
5db06880
NC
3686#ifdef PERL_MAD
3687 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3688#endif
a0d0e21e
LW
3689 return *s == '(' ? FUNCMETH : METHOD;
3690 }
3691 }
3692 return 0;
3693}
3694
16d20bd9 3695/* Encoded script support. filter_add() effectively inserts a
4e553d73 3696 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3697 * Note that the filter function only applies to the current source file
3698 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3699 *
3700 * The datasv parameter (which may be NULL) can be used to pass
3701 * private data to this instance of the filter. The filter function
3702 * can recover the SV using the FILTER_DATA macro and use it to
3703 * store private buffers and state information.
3704 *
3705 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3706 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3707 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3708 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3709 * private use must be set using malloc'd pointers.
3710 */
16d20bd9
AD
3711
3712SV *
864dbfa3 3713Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3714{
97aff369 3715 dVAR;
f4c556ac 3716 if (!funcp)
a0714e2c 3717 return NULL;
f4c556ac 3718
5486870f
DM
3719 if (!PL_parser)
3720 return NULL;
3721
3280af22
NIS
3722 if (!PL_rsfp_filters)
3723 PL_rsfp_filters = newAV();
16d20bd9 3724 if (!datasv)
561b68a9 3725 datasv = newSV(0);
862a34c6 3726 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3727 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3728 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3729 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3730 FPTR2DPTR(void *, IoANY(datasv)),
3731 SvPV_nolen(datasv)));
3280af22
NIS
3732 av_unshift(PL_rsfp_filters, 1);
3733 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3734 return(datasv);
3735}
4e553d73 3736
16d20bd9
AD
3737
3738/* Delete most recently added instance of this filter function. */
a0d0e21e 3739void
864dbfa3 3740Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3741{
97aff369 3742 dVAR;
e0c19803 3743 SV *datasv;
24801a4b 3744
7918f24d
NC
3745 PERL_ARGS_ASSERT_FILTER_DEL;
3746
33073adb 3747#ifdef DEBUGGING
55662e27
JH
3748 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3749 FPTR2DPTR(void*, funcp)));
33073adb 3750#endif
5486870f 3751 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3752 return;
3753 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3754 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3755 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 3756 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 3757 IoANY(datasv) = (void *)NULL;
3280af22 3758 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3759
16d20bd9
AD
3760 return;
3761 }
3762 /* we need to search for the correct entry and clear it */
cea2e8a9 3763 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3764}
3765
3766
1de9afcd
RGS
3767/* Invoke the idxth filter function for the current rsfp. */
3768/* maxlen 0 = read one text line */
16d20bd9 3769I32
864dbfa3 3770Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3771{
97aff369 3772 dVAR;
16d20bd9
AD
3773 filter_t funcp;
3774 SV *datasv = NULL;
f482118e
NC
3775 /* This API is bad. It should have been using unsigned int for maxlen.
3776 Not sure if we want to change the API, but if not we should sanity
3777 check the value here. */
39cd7a59
NC
3778 const unsigned int correct_length
3779 = maxlen < 0 ?
3780#ifdef PERL_MICRO
3781 0x7FFFFFFF
3782#else
3783 INT_MAX
3784#endif
3785 : maxlen;
e50aee73 3786
7918f24d
NC
3787 PERL_ARGS_ASSERT_FILTER_READ;
3788
5486870f 3789 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3790 return -1;
1de9afcd 3791 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3792 /* Provide a default input filter to make life easy. */
3793 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3794 DEBUG_P(PerlIO_printf(Perl_debug_log,
3795 "filter_read %d: from rsfp\n", idx));
f482118e 3796 if (correct_length) {
16d20bd9
AD
3797 /* Want a block */
3798 int len ;
f54cb97a 3799 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3800
3801 /* ensure buf_sv is large enough */
881d8f0a 3802 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3803 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3804 correct_length)) <= 0) {
3280af22 3805 if (PerlIO_error(PL_rsfp))
37120919
AD
3806 return -1; /* error */
3807 else
3808 return 0 ; /* end of file */
3809 }
16d20bd9 3810 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3811 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3812 } else {
3813 /* Want a line */
3280af22
NIS
3814 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3815 if (PerlIO_error(PL_rsfp))
37120919
AD
3816 return -1; /* error */
3817 else
3818 return 0 ; /* end of file */
3819 }
16d20bd9
AD
3820 }
3821 return SvCUR(buf_sv);
3822 }
3823 /* Skip this filter slot if filter has been deleted */
1de9afcd 3824 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3825 DEBUG_P(PerlIO_printf(Perl_debug_log,
3826 "filter_read %d: skipped (filter deleted)\n",
3827 idx));
f482118e 3828 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3829 }
3830 /* Get function pointer hidden within datasv */
8141890a 3831 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3832 DEBUG_P(PerlIO_printf(Perl_debug_log,
3833 "filter_read %d: via function %p (%s)\n",
ca0270c4 3834 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3835 /* Call function. The function is expected to */
3836 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3837 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3838 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3839}
3840
76e3520e 3841STATIC char *
5cc814fd 3842S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3843{
97aff369 3844 dVAR;
7918f24d
NC
3845
3846 PERL_ARGS_ASSERT_FILTER_GETS;
3847
c39cd008 3848#ifdef PERL_CR_FILTER
3280af22 3849 if (!PL_rsfp_filters) {
c39cd008 3850 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3851 }
3852#endif
3280af22 3853 if (PL_rsfp_filters) {
55497cff 3854 if (!append)
3855 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3856 if (FILTER_READ(0, sv, 0) > 0)
3857 return ( SvPVX(sv) ) ;
3858 else
bd61b366 3859 return NULL ;
16d20bd9 3860 }
9d116dd7 3861 else
5cc814fd 3862 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3863}
3864
01ec43d0 3865STATIC HV *
9bde8eb0 3866S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3867{
97aff369 3868 dVAR;
def3634b
GS
3869 GV *gv;
3870
7918f24d
NC
3871 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3872
01ec43d0 3873 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3874 return PL_curstash;
3875
3876 if (len > 2 &&
3877 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3878 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3879 {
3880 return GvHV(gv); /* Foo:: */
def3634b
GS
3881 }
3882
3883 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3884 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3885 if (gv && GvCV(gv)) {
3886 SV * const sv = cv_const_sv(GvCV(gv));
3887 if (sv)
9bde8eb0 3888 pkgname = SvPV_const(sv, len);
def3634b
GS
3889 }
3890
9bde8eb0 3891 return gv_stashpvn(pkgname, len, 0);
def3634b 3892}
a0d0e21e 3893
e3f73d4e
RGS
3894/*
3895 * S_readpipe_override
3896 * Check whether readpipe() is overriden, and generates the appropriate
3897 * optree, provided sublex_start() is called afterwards.
3898 */
3899STATIC void
1d51329b 3900S_readpipe_override(pTHX)
e3f73d4e
RGS
3901{
3902 GV **gvp;
3903 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3904 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3905 if ((gv_readpipe
3906 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3907 ||
3908 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3909 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3910 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3911 {
3912 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3913 append_elem(OP_LIST,
3914 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3915 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3916 }
e3f73d4e
RGS
3917}
3918
5db06880
NC
3919#ifdef PERL_MAD
3920 /*
3921 * Perl_madlex
3922 * The intent of this yylex wrapper is to minimize the changes to the
3923 * tokener when we aren't interested in collecting madprops. It remains
3924 * to be seen how successful this strategy will be...
3925 */
3926
3927int
3928Perl_madlex(pTHX)
3929{
3930 int optype;
3931 char *s = PL_bufptr;
3932
cd81e915
NC
3933 /* make sure PL_thiswhite is initialized */
3934 PL_thiswhite = 0;
3935 PL_thismad = 0;
5db06880 3936
cd81e915 3937 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3938 if (PL_pending_ident)
3939 return S_pending_ident(aTHX);
3940
3941 /* previous token ate up our whitespace? */
cd81e915
NC
3942 if (!PL_lasttoke && PL_nextwhite) {
3943 PL_thiswhite = PL_nextwhite;
3944 PL_nextwhite = 0;
5db06880
NC
3945 }
3946
3947 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3948 PL_realtokenstart = -1;
3949 PL_thistoken = 0;
5db06880
NC
3950 optype = yylex();
3951 s = PL_bufptr;
cd81e915 3952 assert(PL_curforce < 0);
5db06880 3953
cd81e915
NC
3954 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3955 if (!PL_thistoken) {
3956 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3957 PL_thistoken = newSVpvs("");
5db06880 3958 else {
c35e046a 3959 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3960 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3961 }
3962 }
cd81e915
NC
3963 if (PL_thismad) /* install head */
3964 CURMAD('X', PL_thistoken);
5db06880
NC
3965 }
3966
3967 /* last whitespace of a sublex? */
cd81e915
NC
3968 if (optype == ')' && PL_endwhite) {
3969 CURMAD('X', PL_endwhite);
5db06880
NC
3970 }
3971
cd81e915 3972 if (!PL_thismad) {
5db06880
NC
3973
3974 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3975 if (!PL_thiswhite && !PL_endwhite && !optype) {
3976 sv_free(PL_thistoken);
3977 PL_thistoken = 0;
5db06880
NC
3978 return 0;
3979 }
3980
3981 /* put off final whitespace till peg */
3982 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3983 PL_nextwhite = PL_thiswhite;
3984 PL_thiswhite = 0;
5db06880 3985 }
cd81e915
NC
3986 else if (PL_thisopen) {
3987 CURMAD('q', PL_thisopen);
3988 if (PL_thistoken)
3989 sv_free(PL_thistoken);
3990 PL_thistoken = 0;
5db06880
NC
3991 }
3992 else {
3993 /* Store actual token text as madprop X */
cd81e915 3994 CURMAD('X', PL_thistoken);
5db06880
NC
3995 }
3996
cd81e915 3997 if (PL_thiswhite) {
5db06880 3998 /* add preceding whitespace as madprop _ */
cd81e915 3999 CURMAD('_', PL_thiswhite);
5db06880
NC
4000 }
4001
cd81e915 4002 if (PL_thisstuff) {
5db06880 4003 /* add quoted material as madprop = */
cd81e915 4004 CURMAD('=', PL_thisstuff);
5db06880
NC
4005 }
4006
cd81e915 4007 if (PL_thisclose) {
5db06880 4008 /* add terminating quote as madprop Q */
cd81e915 4009 CURMAD('Q', PL_thisclose);
5db06880
NC
4010 }
4011 }
4012
4013 /* special processing based on optype */
4014
4015 switch (optype) {
4016
4017 /* opval doesn't need a TOKEN since it can already store mp */
4018 case WORD:
4019 case METHOD:
4020 case FUNCMETH:
4021 case THING:
4022 case PMFUNC:
4023 case PRIVATEREF:
4024 case FUNC0SUB:
4025 case UNIOPSUB:
4026 case LSTOPSUB:
6154021b
RGS
4027 if (pl_yylval.opval)
4028 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4029 PL_thismad = 0;
5db06880
NC
4030 return optype;
4031
4032 /* fake EOF */
4033 case 0:
4034 optype = PEG;
cd81e915
NC
4035 if (PL_endwhite) {
4036 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4037 PL_endwhite = 0;
5db06880
NC
4038 }
4039 break;
4040
4041 case ']':
4042 case '}':
cd81e915 4043 if (PL_faketokens)
5db06880
NC
4044 break;
4045 /* remember any fake bracket that lexer is about to discard */
4046 if (PL_lex_brackets == 1 &&
4047 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4048 {
4049 s = PL_bufptr;
4050 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4051 s++;
4052 if (*s == '}') {
cd81e915
NC
4053 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4054 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4055 PL_thiswhite = 0;
5db06880
NC
4056 PL_bufptr = s - 1;
4057 break; /* don't bother looking for trailing comment */
4058 }
4059 else
4060 s = PL_bufptr;
4061 }
4062 if (optype == ']')
4063 break;
4064 /* FALLTHROUGH */
4065
4066 /* attach a trailing comment to its statement instead of next token */
4067 case ';':
cd81e915 4068 if (PL_faketokens)
5db06880
NC
4069 break;
4070 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4071 s = PL_bufptr;
4072 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4073 s++;
4074 if (*s == '\n' || *s == '#') {
4075 while (s < PL_bufend && *s != '\n')
4076 s++;
4077 if (s < PL_bufend)
4078 s++;
cd81e915
NC
4079 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4080 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4081 PL_thiswhite = 0;
5db06880
NC
4082 PL_bufptr = s;
4083 }
4084 }
4085 break;
4086
4087 /* pval */
4088 case LABEL:
4089 break;
4090
4091 /* ival */
4092 default:
4093 break;
4094
4095 }
4096
4097 /* Create new token struct. Note: opvals return early above. */
6154021b 4098 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4099 PL_thismad = 0;
5db06880
NC
4100 return optype;
4101}
4102#endif
4103
468aa647 4104STATIC char *
cc6ed77d 4105S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4106 dVAR;
7918f24d
NC
4107
4108 PERL_ARGS_ASSERT_TOKENIZE_USE;
4109
468aa647
RGS
4110 if (PL_expect != XSTATE)
4111 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4112 is_use ? "use" : "no"));
29595ff2 4113 s = SKIPSPACE1(s);
468aa647
RGS
4114 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4115 s = force_version(s, TRUE);
17c59fdf
VP
4116 if (*s == ';' || *s == '}'
4117 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4118 start_force(PL_curforce);
9ded7720 4119 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4120 force_next(WORD);
4121 }
4122 else if (*s == 'v') {
4123 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4124 s = force_version(s, FALSE);
4125 }
4126 }
4127 else {
4128 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4129 s = force_version(s, FALSE);
4130 }
6154021b 4131 pl_yylval.ival = is_use;
468aa647
RGS
4132 return s;
4133}
748a9306 4134#ifdef DEBUGGING
27da23d5 4135 static const char* const exp_name[] =
09bef843 4136 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4137 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4138 };
748a9306 4139#endif
463ee0b2 4140
02aa26ce
NT
4141/*
4142 yylex
4143
4144 Works out what to call the token just pulled out of the input
4145 stream. The yacc parser takes care of taking the ops we return and
4146 stitching them into a tree.
4147
4148 Returns:
4149 PRIVATEREF
4150
4151 Structure:
4152 if read an identifier
4153 if we're in a my declaration
4154 croak if they tried to say my($foo::bar)
4155 build the ops for a my() declaration
4156 if it's an access to a my() variable
4157 are we in a sort block?
4158 croak if my($a); $a <=> $b
4159 build ops for access to a my() variable
4160 if in a dq string, and they've said @foo and we can't find @foo
4161 croak
4162 build ops for a bareword
4163 if we already built the token before, use it.
4164*/
4165
20141f0e 4166
dba4d153
JH
4167#ifdef __SC__
4168#pragma segment Perl_yylex
4169#endif
dba4d153 4170int
dba4d153 4171Perl_yylex(pTHX)
20141f0e 4172{
97aff369 4173 dVAR;
3afc138a 4174 register char *s = PL_bufptr;
378cc40b 4175 register char *d;
463ee0b2 4176 STRLEN len;
aa7440fb 4177 bool bof = FALSE;
580561a3 4178 U32 fake_eof = 0;
a687059c 4179
10edeb5d
JH
4180 /* orig_keyword, gvp, and gv are initialized here because
4181 * jump to the label just_a_word_zero can bypass their
4182 * initialization later. */
4183 I32 orig_keyword = 0;
4184 GV *gv = NULL;
4185 GV **gvp = NULL;
4186
bbf60fe6 4187 DEBUG_T( {
396482e1 4188 SV* tmp = newSVpvs("");
b6007c36
DM
4189 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4190 (IV)CopLINE(PL_curcop),
4191 lex_state_names[PL_lex_state],
4192 exp_name[PL_expect],
4193 pv_display(tmp, s, strlen(s), 0, 60));
4194 SvREFCNT_dec(tmp);
bbf60fe6 4195 } );
02aa26ce 4196 /* check if there's an identifier for us to look at */
ba979b31 4197 if (PL_pending_ident)
bbf60fe6 4198 return REPORT(S_pending_ident(aTHX));
bbce6d69 4199
02aa26ce
NT
4200 /* no identifier pending identification */
4201
3280af22 4202 switch (PL_lex_state) {
79072805
LW
4203#ifdef COMMENTARY
4204 case LEX_NORMAL: /* Some compilers will produce faster */
4205 case LEX_INTERPNORMAL: /* code if we comment these out. */
4206 break;
4207#endif
4208
09bef843 4209 /* when we've already built the next token, just pull it out of the queue */
79072805 4210 case LEX_KNOWNEXT:
5db06880
NC
4211#ifdef PERL_MAD
4212 PL_lasttoke--;
6154021b 4213 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4214 if (PL_madskills) {
cd81e915 4215 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4216 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4217 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4218 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4219 PL_thismad->mad_val = 0;
4220 mad_free(PL_thismad);
4221 PL_thismad = 0;
5db06880
NC
4222 }
4223 }
4224 if (!PL_lasttoke) {
4225 PL_lex_state = PL_lex_defer;
4226 PL_expect = PL_lex_expect;
4227 PL_lex_defer = LEX_NORMAL;
4228 if (!PL_nexttoke[PL_lasttoke].next_type)
4229 return yylex();
4230 }
4231#else
3280af22 4232 PL_nexttoke--;
6154021b 4233 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4234 if (!PL_nexttoke) {
4235 PL_lex_state = PL_lex_defer;
4236 PL_expect = PL_lex_expect;
4237 PL_lex_defer = LEX_NORMAL;
463ee0b2 4238 }
5db06880
NC
4239#endif
4240#ifdef PERL_MAD
4241 /* FIXME - can these be merged? */
4242 return(PL_nexttoke[PL_lasttoke].next_type);
4243#else
bbf60fe6 4244 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4245#endif
79072805 4246
02aa26ce 4247 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4248 when we get here, PL_bufptr is at the \
02aa26ce 4249 */
79072805
LW
4250 case LEX_INTERPCASEMOD:
4251#ifdef DEBUGGING
3280af22 4252 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4253 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4254#endif
02aa26ce 4255 /* handle \E or end of string */
3280af22 4256 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4257 /* if at a \E */
3280af22 4258 if (PL_lex_casemods) {
f54cb97a 4259 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4260 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4261
3792a11b
NC
4262 if (PL_bufptr != PL_bufend
4263 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4264 PL_bufptr += 2;
4265 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4266#ifdef PERL_MAD
4267 if (PL_madskills)
6b29d1f5 4268 PL_thistoken = newSVpvs("\\E");
5db06880 4269#endif
a0d0e21e 4270 }
bbf60fe6 4271 return REPORT(')');
79072805 4272 }
5db06880
NC
4273#ifdef PERL_MAD
4274 while (PL_bufptr != PL_bufend &&
4275 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4276 if (!PL_thiswhite)
6b29d1f5 4277 PL_thiswhite = newSVpvs("");
cd81e915 4278 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4279 PL_bufptr += 2;
4280 }
4281#else
3280af22
NIS
4282 if (PL_bufptr != PL_bufend)
4283 PL_bufptr += 2;
5db06880 4284#endif
3280af22 4285 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4286 return yylex();
79072805
LW
4287 }
4288 else {
607df283 4289 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4290 "### Saw case modifier\n"); });
3280af22 4291 s = PL_bufptr + 1;
6e909404 4292 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4293#ifdef PERL_MAD
cd81e915 4294 if (!PL_thiswhite)
6b29d1f5 4295 PL_thiswhite = newSVpvs("");
cd81e915 4296 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4297#endif
89122651 4298 PL_bufptr = s + 3;
6e909404
JH
4299 PL_lex_state = LEX_INTERPCONCAT;
4300 return yylex();
a0d0e21e 4301 }
6e909404 4302 else {
90771dc0 4303 I32 tmp;
5db06880
NC
4304 if (!PL_madskills) /* when just compiling don't need correct */
4305 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4306 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4307 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4308 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4309 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4310 return REPORT(')');
6e909404
JH
4311 }
4312 if (PL_lex_casemods > 10)
4313 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4314 PL_lex_casestack[PL_lex_casemods++] = *s;
4315 PL_lex_casestack[PL_lex_casemods] = '\0';
4316 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4317 start_force(PL_curforce);
9ded7720 4318 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4319 force_next('(');
cd81e915 4320 start_force(PL_curforce);
6e909404 4321 if (*s == 'l')
9ded7720 4322 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4323 else if (*s == 'u')
9ded7720 4324 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4325 else if (*s == 'L')
9ded7720 4326 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4327 else if (*s == 'U')
9ded7720 4328 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4329 else if (*s == 'Q')
9ded7720 4330 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4331 else
4332 Perl_croak(aTHX_ "panic: yylex");
5db06880 4333 if (PL_madskills) {
a5849ce5
NC
4334 SV* const tmpsv = newSVpvs("\\ ");
4335 /* replace the space with the character we want to escape
4336 */
4337 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4338 curmad('_', tmpsv);
4339 }
6e909404 4340 PL_bufptr = s + 1;
a0d0e21e 4341 }
79072805 4342 force_next(FUNC);
3280af22
NIS
4343 if (PL_lex_starts) {
4344 s = PL_bufptr;
4345 PL_lex_starts = 0;
5db06880
NC
4346#ifdef PERL_MAD
4347 if (PL_madskills) {
cd81e915
NC
4348 if (PL_thistoken)
4349 sv_free(PL_thistoken);
6b29d1f5 4350 PL_thistoken = newSVpvs("");
5db06880
NC
4351 }
4352#endif
131b3ad0
DM
4353 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4354 if (PL_lex_casemods == 1 && PL_lex_inpat)
4355 OPERATOR(',');
4356 else
4357 Aop(OP_CONCAT);
79072805
LW
4358 }
4359 else
cea2e8a9 4360 return yylex();
79072805
LW
4361 }
4362
55497cff 4363 case LEX_INTERPPUSH:
bbf60fe6 4364 return REPORT(sublex_push());
55497cff 4365
79072805 4366 case LEX_INTERPSTART:
3280af22 4367 if (PL_bufptr == PL_bufend)
bbf60fe6 4368 return REPORT(sublex_done());
607df283 4369 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4370 "### Interpolated variable\n"); });
3280af22
NIS
4371 PL_expect = XTERM;
4372 PL_lex_dojoin = (*PL_bufptr == '@');
4373 PL_lex_state = LEX_INTERPNORMAL;
4374 if (PL_lex_dojoin) {
cd81e915 4375 start_force(PL_curforce);
9ded7720 4376 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4377 force_next(',');
cd81e915 4378 start_force(PL_curforce);
a0d0e21e 4379 force_ident("\"", '$');
cd81e915 4380 start_force(PL_curforce);
9ded7720 4381 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4382 force_next('$');
cd81e915 4383 start_force(PL_curforce);
9ded7720 4384 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4385 force_next('(');
cd81e915 4386 start_force(PL_curforce);
9ded7720 4387 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4388 force_next(FUNC);
4389 }
3280af22
NIS
4390 if (PL_lex_starts++) {
4391 s = PL_bufptr;
5db06880
NC
4392#ifdef PERL_MAD
4393 if (PL_madskills) {
cd81e915
NC
4394 if (PL_thistoken)
4395 sv_free(PL_thistoken);
6b29d1f5 4396 PL_thistoken = newSVpvs("");
5db06880
NC
4397 }
4398#endif
131b3ad0
DM
4399 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4400 if (!PL_lex_casemods && PL_lex_inpat)
4401 OPERATOR(',');
4402 else
4403 Aop(OP_CONCAT);
79072805 4404 }
cea2e8a9 4405 return yylex();
79072805
LW
4406
4407 case LEX_INTERPENDMAYBE:
3280af22
NIS
4408 if (intuit_more(PL_bufptr)) {
4409 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4410 break;
4411 }
4412 /* FALL THROUGH */
4413
4414 case LEX_INTERPEND:
3280af22
NIS
4415 if (PL_lex_dojoin) {
4416 PL_lex_dojoin = FALSE;
4417 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4418#ifdef PERL_MAD
4419 if (PL_madskills) {
cd81e915
NC
4420 if (PL_thistoken)
4421 sv_free(PL_thistoken);
6b29d1f5 4422 PL_thistoken = newSVpvs("");
5db06880
NC
4423 }
4424#endif
bbf60fe6 4425 return REPORT(')');
79072805 4426 }
43a16006 4427 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4428 && SvEVALED(PL_lex_repl))
43a16006 4429 {
e9fa98b2 4430 if (PL_bufptr != PL_bufend)
cea2e8a9 4431 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4432 PL_lex_repl = NULL;
e9fa98b2 4433 }
79072805
LW
4434 /* FALLTHROUGH */
4435 case LEX_INTERPCONCAT:
4436#ifdef DEBUGGING
3280af22 4437 if (PL_lex_brackets)
cea2e8a9 4438 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4439#endif
3280af22 4440 if (PL_bufptr == PL_bufend)
bbf60fe6 4441 return REPORT(sublex_done());
79072805 4442
3280af22
NIS
4443 if (SvIVX(PL_linestr) == '\'') {
4444 SV *sv = newSVsv(PL_linestr);
4445 if (!PL_lex_inpat)
76e3520e 4446 sv = tokeq(sv);
3280af22 4447 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4448 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4449 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4450 s = PL_bufend;
79072805
LW
4451 }
4452 else {
3280af22 4453 s = scan_const(PL_bufptr);
79072805 4454 if (*s == '\\')
3280af22 4455 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4456 else
3280af22 4457 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4458 }
4459
3280af22 4460 if (s != PL_bufptr) {
cd81e915 4461 start_force(PL_curforce);
5db06880
NC
4462 if (PL_madskills) {
4463 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4464 }
6154021b 4465 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4466 PL_expect = XTERM;
79072805 4467 force_next(THING);
131b3ad0 4468 if (PL_lex_starts++) {
5db06880
NC
4469#ifdef PERL_MAD
4470 if (PL_madskills) {
cd81e915
NC
4471 if (PL_thistoken)
4472 sv_free(PL_thistoken);
6b29d1f5 4473 PL_thistoken = newSVpvs("");
5db06880
NC
4474 }
4475#endif
131b3ad0
DM
4476 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4477 if (!PL_lex_casemods && PL_lex_inpat)
4478 OPERATOR(',');
4479 else
4480 Aop(OP_CONCAT);
4481 }
79072805 4482 else {
3280af22 4483 PL_bufptr = s;
cea2e8a9 4484 return yylex();
79072805
LW
4485 }
4486 }
4487
cea2e8a9 4488 return yylex();
a0d0e21e 4489 case LEX_FORMLINE:
3280af22
NIS
4490 PL_lex_state = LEX_NORMAL;
4491 s = scan_formline(PL_bufptr);
4492 if (!PL_lex_formbrack)
a0d0e21e
LW
4493 goto rightbracket;
4494 OPERATOR(';');
79072805
LW
4495 }
4496
3280af22
NIS
4497 s = PL_bufptr;
4498 PL_oldoldbufptr = PL_oldbufptr;
4499 PL_oldbufptr = s;
463ee0b2
LW
4500
4501 retry:
5db06880 4502#ifdef PERL_MAD
cd81e915
NC
4503 if (PL_thistoken) {
4504 sv_free(PL_thistoken);
4505 PL_thistoken = 0;
5db06880 4506 }
cd81e915 4507 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4508#endif
378cc40b
LW
4509 switch (*s) {
4510 default:
7e2040f0 4511 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4512 goto keylookup;
b1fc3636
CJ
4513 {
4514 unsigned char c = *s;
4515 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4516 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4517 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4518 } else {
4519 d = PL_linestart;
4520 }
4521 *s = '\0';
4522 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4523 }
e929a76b
LW
4524 case 4:
4525 case 26:
4526 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4527 case 0:
5db06880
NC
4528#ifdef PERL_MAD
4529 if (PL_madskills)
cd81e915 4530 PL_faketokens = 0;
5db06880 4531#endif
3280af22
NIS
4532 if (!PL_rsfp) {
4533 PL_last_uni = 0;
4534 PL_last_lop = 0;
c5ee2135 4535 if (PL_lex_brackets) {
10edeb5d
JH
4536 yyerror((const char *)
4537 (PL_lex_formbrack
4538 ? "Format not terminated"
4539 : "Missing right curly or square bracket"));
c5ee2135 4540 }
4e553d73 4541 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4542 "### Tokener got EOF\n");
5f80b19c 4543 } );
79072805 4544 TOKEN(0);
463ee0b2 4545 }
3280af22 4546 if (s++ < PL_bufend)
a687059c 4547 goto retry; /* ignore stray nulls */
3280af22
NIS
4548 PL_last_uni = 0;
4549 PL_last_lop = 0;
4550 if (!PL_in_eval && !PL_preambled) {
4551 PL_preambled = TRUE;
5db06880
NC
4552#ifdef PERL_MAD
4553 if (PL_madskills)
cd81e915 4554 PL_faketokens = 1;
5db06880 4555#endif
5ab7ff98
NC
4556 if (PL_perldb) {
4557 /* Generate a string of Perl code to load the debugger.
4558 * If PERL5DB is set, it will return the contents of that,
4559 * otherwise a compile-time require of perl5db.pl. */
4560
4561 const char * const pdb = PerlEnv_getenv("PERL5DB");
4562
4563 if (pdb) {
4564 sv_setpv(PL_linestr, pdb);
4565 sv_catpvs(PL_linestr,";");
4566 } else {
4567 SETERRNO(0,SS_NORMAL);
4568 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4569 }
4570 } else
4571 sv_setpvs(PL_linestr,"");
c62eb204
NC
4572 if (PL_preambleav) {
4573 SV **svp = AvARRAY(PL_preambleav);
4574 SV **const end = svp + AvFILLp(PL_preambleav);
4575 while(svp <= end) {
4576 sv_catsv(PL_linestr, *svp);
4577 ++svp;
396482e1 4578 sv_catpvs(PL_linestr, ";");
91b7def8 4579 }
daba3364 4580 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4581 PL_preambleav = NULL;
91b7def8 4582 }
9f639728
FR
4583 if (PL_minus_E)
4584 sv_catpvs(PL_linestr,
4585 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4586 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4587 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4588 if (PL_minus_l)
396482e1 4589 sv_catpvs(PL_linestr,"chomp;");
3280af22 4590 if (PL_minus_a) {
3280af22 4591 if (PL_minus_F) {
3792a11b
NC
4592 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4593 || *PL_splitstr == '"')
3280af22 4594 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4595 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4596 else {
c8ef6a4b
NC
4597 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4598 bytes can be used as quoting characters. :-) */
dd374669 4599 const char *splits = PL_splitstr;
91d456ae 4600 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4601 do {
4602 /* Need to \ \s */
dd374669
AL
4603 if (*splits == '\\')
4604 sv_catpvn(PL_linestr, splits, 1);
4605 sv_catpvn(PL_linestr, splits, 1);
4606 } while (*splits++);
48c4c863
NC
4607 /* This loop will embed the trailing NUL of
4608 PL_linestr as the last thing it does before
4609 terminating. */
396482e1 4610 sv_catpvs(PL_linestr, ");");
54310121 4611 }
2304df62
AD
4612 }
4613 else
396482e1 4614 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4615 }
79072805 4616 }
396482e1 4617 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4618 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4619 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4620 PL_last_lop = PL_last_uni = NULL;
65269a95 4621 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4622 update_debugger_info(PL_linestr, NULL, 0);
79072805 4623 goto retry;
a687059c 4624 }
e929a76b 4625 do {
580561a3
Z
4626 fake_eof = 0;
4627 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4628 if (0) {
7e28d3af 4629 fake_eof:
f0e67a1d
Z
4630 fake_eof = LEX_FAKE_EOF;
4631 }
4632 PL_bufptr = PL_bufend;
17cc9359 4633 CopLINE_inc(PL_curcop);
f0e67a1d 4634 if (!lex_next_chunk(fake_eof)) {
17cc9359 4635 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4636 s = PL_bufptr;
4637 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4638 }
17cc9359 4639 CopLINE_dec(PL_curcop);
5db06880 4640#ifdef PERL_MAD
f0e67a1d 4641 if (!PL_rsfp)
cd81e915 4642 PL_realtokenstart = -1;
5db06880 4643#endif
f0e67a1d 4644 s = PL_bufptr;
7aa207d6
JH
4645 /* If it looks like the start of a BOM or raw UTF-16,
4646 * check if it in fact is. */
580561a3 4647 if (bof && PL_rsfp &&
7aa207d6
JH
4648 (*s == 0 ||
4649 *(U8*)s == 0xEF ||
4650 *(U8*)s >= 0xFE ||
4651 s[1] == 0)) {
eb160463 4652 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4653 if (bof) {
3280af22 4654 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4655 s = swallow_bom((U8*)s);
e929a76b 4656 }
378cc40b 4657 }
3280af22 4658 if (PL_doextract) {
a0d0e21e 4659 /* Incest with pod. */
5db06880
NC
4660#ifdef PERL_MAD
4661 if (PL_madskills)
cd81e915 4662 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4663#endif
01a57ef7 4664 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4665 sv_setpvs(PL_linestr, "");
3280af22
NIS
4666 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4667 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4668 PL_last_lop = PL_last_uni = NULL;
3280af22 4669 PL_doextract = FALSE;
a0d0e21e 4670 }
4e553d73 4671 }
85613cab
Z
4672 if (PL_rsfp)
4673 incline(s);
3280af22
NIS
4674 } while (PL_doextract);
4675 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4676 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4677 PL_last_lop = PL_last_uni = NULL;
57843af0 4678 if (CopLINE(PL_curcop) == 1) {
3280af22 4679 while (s < PL_bufend && isSPACE(*s))
79072805 4680 s++;
a0d0e21e 4681 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4682 s++;
5db06880
NC
4683#ifdef PERL_MAD
4684 if (PL_madskills)
cd81e915 4685 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4686#endif
bd61b366 4687 d = NULL;
3280af22 4688 if (!PL_in_eval) {
44a8e56a 4689 if (*s == '#' && *(s+1) == '!')
4690 d = s + 2;
4691#ifdef ALTERNATE_SHEBANG
4692 else {
bfed75c6 4693 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4694 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4695 d = s + (sizeof(as) - 1);
4696 }
4697#endif /* ALTERNATE_SHEBANG */
4698 }
4699 if (d) {
b8378b72 4700 char *ipath;
774d564b 4701 char *ipathend;
b8378b72 4702
774d564b 4703 while (isSPACE(*d))
b8378b72
CS
4704 d++;
4705 ipath = d;
774d564b 4706 while (*d && !isSPACE(*d))
4707 d++;
4708 ipathend = d;
4709
4710#ifdef ARG_ZERO_IS_SCRIPT
4711 if (ipathend > ipath) {
4712 /*
4713 * HP-UX (at least) sets argv[0] to the script name,
4714 * which makes $^X incorrect. And Digital UNIX and Linux,
4715 * at least, set argv[0] to the basename of the Perl
4716 * interpreter. So, having found "#!", we'll set it right.
4717 */
fafc274c
NC
4718 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4719 SVt_PV)); /* $^X */
774d564b 4720 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4721 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4722 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4723 SvSETMAGIC(x);
4724 }
556c1dec
JH
4725 else {
4726 STRLEN blen;
4727 STRLEN llen;
cfd0369c 4728 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4729 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4730 if (llen < blen) {
4731 bstart += blen - llen;
4732 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4733 sv_setpvn(x, ipath, ipathend - ipath);
4734 SvSETMAGIC(x);
4735 }
4736 }
4737 }
774d564b 4738 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4739 }
774d564b 4740#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4741
4742 /*
4743 * Look for options.
4744 */
748a9306 4745 d = instr(s,"perl -");
84e30d1a 4746 if (!d) {
748a9306 4747 d = instr(s,"perl");
84e30d1a
GS
4748#if defined(DOSISH)
4749 /* avoid getting into infinite loops when shebang
4750 * line contains "Perl" rather than "perl" */
4751 if (!d) {
4752 for (d = ipathend-4; d >= ipath; --d) {
4753 if ((*d == 'p' || *d == 'P')
4754 && !ibcmp(d, "perl", 4))
4755 {
4756 break;
4757 }
4758 }
4759 if (d < ipath)
bd61b366 4760 d = NULL;
84e30d1a
GS
4761 }
4762#endif
4763 }
44a8e56a 4764#ifdef ALTERNATE_SHEBANG
4765 /*
4766 * If the ALTERNATE_SHEBANG on this system starts with a
4767 * character that can be part of a Perl expression, then if
4768 * we see it but not "perl", we're probably looking at the
4769 * start of Perl code, not a request to hand off to some
4770 * other interpreter. Similarly, if "perl" is there, but
4771 * not in the first 'word' of the line, we assume the line
4772 * contains the start of the Perl program.
44a8e56a 4773 */
4774 if (d && *s != '#') {
f54cb97a 4775 const char *c = ipath;
44a8e56a 4776 while (*c && !strchr("; \t\r\n\f\v#", *c))
4777 c++;
4778 if (c < d)
bd61b366 4779 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4780 else
4781 *s = '#'; /* Don't try to parse shebang line */
4782 }
774d564b 4783#endif /* ALTERNATE_SHEBANG */
748a9306 4784 if (!d &&
44a8e56a 4785 *s == '#' &&
774d564b 4786 ipathend > ipath &&
3280af22 4787 !PL_minus_c &&
748a9306 4788 !instr(s,"indir") &&
3280af22 4789 instr(PL_origargv[0],"perl"))
748a9306 4790 {
27da23d5 4791 dVAR;
9f68db38 4792 char **newargv;
9f68db38 4793
774d564b 4794 *ipathend = '\0';
4795 s = ipathend + 1;
3280af22 4796 while (s < PL_bufend && isSPACE(*s))
9f68db38 4797 s++;
3280af22 4798 if (s < PL_bufend) {
d85f917e 4799 Newx(newargv,PL_origargc+3,char*);
9f68db38 4800 newargv[1] = s;
3280af22 4801 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4802 s++;
4803 *s = '\0';
3280af22 4804 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4805 }
4806 else
3280af22 4807 newargv = PL_origargv;
774d564b 4808 newargv[0] = ipath;
b35112e7 4809 PERL_FPU_PRE_EXEC
b4748376 4810 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4811 PERL_FPU_POST_EXEC
cea2e8a9 4812 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4813 }
748a9306 4814 if (d) {
c35e046a
AL
4815 while (*d && !isSPACE(*d))
4816 d++;
4817 while (SPACE_OR_TAB(*d))
4818 d++;
748a9306
LW
4819
4820 if (*d++ == '-') {
f54cb97a 4821 const bool switches_done = PL_doswitches;
fb993905
GA
4822 const U32 oldpdb = PL_perldb;
4823 const bool oldn = PL_minus_n;
4824 const bool oldp = PL_minus_p;
c7030b81 4825 const char *d1 = d;
fb993905 4826
8cc95fdb 4827 do {
4ba71d51
FC
4828 bool baduni = FALSE;
4829 if (*d1 == 'C') {
bd0ab00d
NC
4830 const char *d2 = d1 + 1;
4831 if (parse_unicode_opts((const char **)&d2)
4832 != PL_unicode)
4833 baduni = TRUE;
4ba71d51
FC
4834 }
4835 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4836 const char * const m = d1;
4837 while (*d1 && !isSPACE(*d1))
4838 d1++;
cea2e8a9 4839 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4840 (int)(d1 - m), m);
8cc95fdb 4841 }
c7030b81
NC
4842 d1 = moreswitches(d1);
4843 } while (d1);
f0b2cf55
YST
4844 if (PL_doswitches && !switches_done) {
4845 int argc = PL_origargc;
4846 char **argv = PL_origargv;
4847 do {
4848 argc--,argv++;
4849 } while (argc && argv[0][0] == '-' && argv[0][1]);
4850 init_argv_symbols(argc,argv);
4851 }
65269a95 4852 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4853 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4854 /* if we have already added "LINE: while (<>) {",
4855 we must not do it again */
748a9306 4856 {
76f68e9b 4857 sv_setpvs(PL_linestr, "");
3280af22
NIS
4858 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4859 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4860 PL_last_lop = PL_last_uni = NULL;
3280af22 4861 PL_preambled = FALSE;
65269a95 4862 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4863 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4864 goto retry;
4865 }
a0d0e21e 4866 }
79072805 4867 }
9f68db38 4868 }
79072805 4869 }
3280af22
NIS
4870 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4871 PL_bufptr = s;
4872 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4873 return yylex();
ae986130 4874 }
378cc40b 4875 goto retry;
4fdae800 4876 case '\r':
6a27c188 4877#ifdef PERL_STRICT_CR
cea2e8a9 4878 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4879 Perl_croak(aTHX_
cc507455 4880 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4881#endif
4fdae800 4882 case ' ': case '\t': case '\f': case 013:
5db06880 4883#ifdef PERL_MAD
cd81e915 4884 PL_realtokenstart = -1;
ac372eb8
RD
4885 if (!PL_thiswhite)
4886 PL_thiswhite = newSVpvs("");
4887 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4888#endif
ac372eb8 4889 s++;
378cc40b 4890 goto retry;
378cc40b 4891 case '#':
e929a76b 4892 case '\n':
5db06880 4893#ifdef PERL_MAD
cd81e915 4894 PL_realtokenstart = -1;
5db06880 4895 if (PL_madskills)
cd81e915 4896 PL_faketokens = 0;
5db06880 4897#endif
3280af22 4898 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4899 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4900 /* handle eval qq[#line 1 "foo"\n ...] */
4901 CopLINE_dec(PL_curcop);
4902 incline(s);
4903 }
5db06880
NC
4904 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4905 s = SKIPSPACE0(s);
4906 if (!PL_in_eval || PL_rsfp)
4907 incline(s);
4908 }
4909 else {
4910 d = s;
4911 while (d < PL_bufend && *d != '\n')
4912 d++;
4913 if (d < PL_bufend)
4914 d++;
4915 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4916 Perl_croak(aTHX_ "panic: input overflow");
4917#ifdef PERL_MAD
4918 if (PL_madskills)
cd81e915 4919 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4920#endif
4921 s = d;
4922 incline(s);
4923 }
3280af22
NIS
4924 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4925 PL_bufptr = s;
4926 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4927 return yylex();
a687059c 4928 }
378cc40b 4929 }
a687059c 4930 else {
5db06880
NC
4931#ifdef PERL_MAD
4932 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4933 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4934 PL_faketokens = 0;
5db06880
NC
4935 s = SKIPSPACE0(s);
4936 TOKEN(PEG); /* make sure any #! line is accessible */
4937 }
4938 s = SKIPSPACE0(s);
4939 }
4940 else {
4941/* if (PL_madskills && PL_lex_formbrack) { */
4942 d = s;
4943 while (d < PL_bufend && *d != '\n')
4944 d++;
4945 if (d < PL_bufend)
4946 d++;
4947 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4948 Perl_croak(aTHX_ "panic: input overflow");
4949 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4950 if (!PL_thiswhite)
6b29d1f5 4951 PL_thiswhite = newSVpvs("");
5db06880 4952 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4953 sv_setpvs(PL_thiswhite, "");
cd81e915 4954 PL_faketokens = 0;
5db06880 4955 }
cd81e915 4956 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4957 }
4958 s = d;
4959/* }
4960 *s = '\0';
4961 PL_bufend = s; */
4962 }
4963#else
378cc40b 4964 *s = '\0';
3280af22 4965 PL_bufend = s;
5db06880 4966#endif
a687059c 4967 }
378cc40b
LW
4968 goto retry;
4969 case '-':
79072805 4970 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4971 I32 ftst = 0;
90771dc0 4972 char tmp;
e5edeb50 4973
378cc40b 4974 s++;
3280af22 4975 PL_bufptr = s;
748a9306
LW
4976 tmp = *s++;
4977
bf4acbe4 4978 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
4979 s++;
4980
4981 if (strnEQ(s,"=>",2)) {
3280af22 4982 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 4983 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
4984 OPERATOR('-'); /* unary minus */
4985 }
3280af22 4986 PL_last_uni = PL_oldbufptr;
748a9306 4987 switch (tmp) {
e5edeb50
JH
4988 case 'r': ftst = OP_FTEREAD; break;
4989 case 'w': ftst = OP_FTEWRITE; break;
4990 case 'x': ftst = OP_FTEEXEC; break;
4991 case 'o': ftst = OP_FTEOWNED; break;
4992 case 'R': ftst = OP_FTRREAD; break;
4993 case 'W': ftst = OP_FTRWRITE; break;
4994 case 'X': ftst = OP_FTREXEC; break;
4995 case 'O': ftst = OP_FTROWNED; break;
4996 case 'e': ftst = OP_FTIS; break;
4997 case 'z': ftst = OP_FTZERO; break;
4998 case 's': ftst = OP_FTSIZE; break;
4999 case 'f': ftst = OP_FTFILE; break;
5000 case 'd': ftst = OP_FTDIR; break;
5001 case 'l': ftst = OP_FTLINK; break;
5002 case 'p': ftst = OP_FTPIPE; break;
5003 case 'S': ftst = OP_FTSOCK; break;
5004 case 'u': ftst = OP_FTSUID; break;
5005 case 'g': ftst = OP_FTSGID; break;
5006 case 'k': ftst = OP_FTSVTX; break;
5007 case 'b': ftst = OP_FTBLK; break;
5008 case 'c': ftst = OP_FTCHR; break;
5009 case 't': ftst = OP_FTTTY; break;
5010 case 'T': ftst = OP_FTTEXT; break;
5011 case 'B': ftst = OP_FTBINARY; break;
5012 case 'M': case 'A': case 'C':
fafc274c 5013 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5014 switch (tmp) {
5015 case 'M': ftst = OP_FTMTIME; break;
5016 case 'A': ftst = OP_FTATIME; break;
5017 case 'C': ftst = OP_FTCTIME; break;
5018 default: break;
5019 }
5020 break;
378cc40b 5021 default:
378cc40b
LW
5022 break;
5023 }
e5edeb50 5024 if (ftst) {
eb160463 5025 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5026 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5027 "### Saw file test %c\n", (int)tmp);
5f80b19c 5028 } );
e5edeb50
JH
5029 FTST(ftst);
5030 }
5031 else {
5032 /* Assume it was a minus followed by a one-letter named
5033 * subroutine call (or a -bareword), then. */
95c31fe3 5034 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5035 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5036 (int) tmp);
5f80b19c 5037 } );
3cf7b4c4 5038 s = --PL_bufptr;
e5edeb50 5039 }
378cc40b 5040 }
90771dc0
NC
5041 {
5042 const char tmp = *s++;
5043 if (*s == tmp) {
5044 s++;
5045 if (PL_expect == XOPERATOR)
5046 TERM(POSTDEC);
5047 else
5048 OPERATOR(PREDEC);
5049 }
5050 else if (*s == '>') {
5051 s++;
29595ff2 5052 s = SKIPSPACE1(s);
90771dc0
NC
5053 if (isIDFIRST_lazy_if(s,UTF)) {
5054 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5055 TOKEN(ARROW);
5056 }
5057 else if (*s == '$')
5058 OPERATOR(ARROW);
5059 else
5060 TERM(ARROW);
5061 }
3280af22 5062 if (PL_expect == XOPERATOR)
90771dc0
NC
5063 Aop(OP_SUBTRACT);
5064 else {
5065 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5066 check_uni();
5067 OPERATOR('-'); /* unary minus */
79072805 5068 }
2f3197b3 5069 }
79072805 5070
378cc40b 5071 case '+':
90771dc0
NC
5072 {
5073 const char tmp = *s++;
5074 if (*s == tmp) {
5075 s++;
5076 if (PL_expect == XOPERATOR)
5077 TERM(POSTINC);
5078 else
5079 OPERATOR(PREINC);
5080 }
3280af22 5081 if (PL_expect == XOPERATOR)
90771dc0
NC
5082 Aop(OP_ADD);
5083 else {
5084 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5085 check_uni();
5086 OPERATOR('+');
5087 }
2f3197b3 5088 }
a687059c 5089
378cc40b 5090 case '*':
3280af22
NIS
5091 if (PL_expect != XOPERATOR) {
5092 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5093 PL_expect = XOPERATOR;
5094 force_ident(PL_tokenbuf, '*');
5095 if (!*PL_tokenbuf)
a0d0e21e 5096 PREREF('*');
79072805 5097 TERM('*');
a687059c 5098 }
79072805
LW
5099 s++;
5100 if (*s == '*') {
a687059c 5101 s++;
79072805 5102 PWop(OP_POW);
a687059c 5103 }
79072805
LW
5104 Mop(OP_MULTIPLY);
5105
378cc40b 5106 case '%':
3280af22 5107 if (PL_expect == XOPERATOR) {
bbce6d69 5108 ++s;
5109 Mop(OP_MODULO);
a687059c 5110 }
3280af22 5111 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5112 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5113 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5114 if (!PL_tokenbuf[1]) {
bbce6d69 5115 PREREF('%');
a687059c 5116 }
3280af22 5117 PL_pending_ident = '%';
bbce6d69 5118 TERM('%');
a687059c 5119
378cc40b 5120 case '^':
79072805 5121 s++;
a0d0e21e 5122 BOop(OP_BIT_XOR);
79072805 5123 case '[':
3280af22 5124 PL_lex_brackets++;
df3467db
IG
5125 {
5126 const char tmp = *s++;
5127 OPERATOR(tmp);
5128 }
378cc40b 5129 case '~':
0d863452 5130 if (s[1] == '~'
3e7dd34d 5131 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5132 {
5133 s += 2;
5134 Eop(OP_SMARTMATCH);
5135 }
378cc40b 5136 case ',':
90771dc0
NC
5137 {
5138 const char tmp = *s++;
5139 OPERATOR(tmp);
5140 }
a0d0e21e
LW
5141 case ':':
5142 if (s[1] == ':') {
5143 len = 0;
0bfa2a8a 5144 goto just_a_word_zero_gv;
a0d0e21e
LW
5145 }
5146 s++;
09bef843
SB
5147 switch (PL_expect) {
5148 OP *attrs;
5db06880
NC
5149#ifdef PERL_MAD
5150 I32 stuffstart;
5151#endif
09bef843
SB
5152 case XOPERATOR:
5153 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5154 break;
5155 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5156 if (*s == '=') {
5157 deprecate(":= for an empty attribute list");
5158 }
09bef843
SB
5159 goto grabattrs;
5160 case XATTRBLOCK:
5161 PL_expect = XBLOCK;
5162 goto grabattrs;
5163 case XATTRTERM:
5164 PL_expect = XTERMBLOCK;
5165 grabattrs:
5db06880
NC
5166#ifdef PERL_MAD
5167 stuffstart = s - SvPVX(PL_linestr) - 1;
5168#endif
29595ff2 5169 s = PEEKSPACE(s);
5f66b61c 5170 attrs = NULL;
7e2040f0 5171 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5172 I32 tmp;
5cc237b8 5173 SV *sv;
09bef843 5174 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5175 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5176 if (tmp < 0) tmp = -tmp;
5177 switch (tmp) {
5178 case KEY_or:
5179 case KEY_and:
5180 case KEY_for:
11baf631 5181 case KEY_foreach:
f9829d6b
GS
5182 case KEY_unless:
5183 case KEY_if:
5184 case KEY_while:
5185 case KEY_until:
5186 goto got_attrs;
5187 default:
5188 break;
5189 }
5190 }
5cc237b8 5191 sv = newSVpvn(s, len);
09bef843
SB
5192 if (*d == '(') {
5193 d = scan_str(d,TRUE,TRUE);
5194 if (!d) {
09bef843
SB
5195 /* MUST advance bufptr here to avoid bogus
5196 "at end of line" context messages from yyerror().
5197 */
5198 PL_bufptr = s + len;
5199 yyerror("Unterminated attribute parameter in attribute list");
5200 if (attrs)
5201 op_free(attrs);
5cc237b8 5202 sv_free(sv);
bbf60fe6 5203 return REPORT(0); /* EOF indicator */
09bef843
SB
5204 }
5205 }
5206 if (PL_lex_stuff) {
09bef843
SB
5207 sv_catsv(sv, PL_lex_stuff);
5208 attrs = append_elem(OP_LIST, attrs,
5209 newSVOP(OP_CONST, 0, sv));
5210 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5211 PL_lex_stuff = NULL;
09bef843
SB
5212 }
5213 else {
5cc237b8
BS
5214 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5215 sv_free(sv);
1108974d 5216 if (PL_in_my == KEY_our) {
df9a6019 5217 deprecate(":unique");
1108974d 5218 }
bfed75c6 5219 else
371fce9b
DM
5220 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5221 }
5222
d3cea301
SB
5223 /* NOTE: any CV attrs applied here need to be part of
5224 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5225 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5226 sv_free(sv);
78f9721b 5227 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5228 }
5229 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5230 sv_free(sv);
8e5dadda 5231 deprecate(":locked");
5cc237b8
BS
5232 }
5233 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5234 sv_free(sv);
78f9721b 5235 CvMETHOD_on(PL_compcv);
5cc237b8 5236 }
78f9721b
SM
5237 /* After we've set the flags, it could be argued that
5238 we don't need to do the attributes.pm-based setting
5239 process, and shouldn't bother appending recognized
d3cea301
SB
5240 flags. To experiment with that, uncomment the
5241 following "else". (Note that's already been
5242 uncommented. That keeps the above-applied built-in
5243 attributes from being intercepted (and possibly
5244 rejected) by a package's attribute routines, but is
5245 justified by the performance win for the common case
5246 of applying only built-in attributes.) */
0256094b 5247 else
78f9721b
SM
5248 attrs = append_elem(OP_LIST, attrs,
5249 newSVOP(OP_CONST, 0,
5cc237b8 5250 sv));
09bef843 5251 }
29595ff2 5252 s = PEEKSPACE(d);
0120eecf 5253 if (*s == ':' && s[1] != ':')
29595ff2 5254 s = PEEKSPACE(s+1);
0120eecf
GS
5255 else if (s == d)
5256 break; /* require real whitespace or :'s */
29595ff2 5257 /* XXX losing whitespace on sequential attributes here */
09bef843 5258 }
90771dc0
NC
5259 {
5260 const char tmp
5261 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5262 if (*s != ';' && *s != '}' && *s != tmp
5263 && (tmp != '=' || *s != ')')) {
5264 const char q = ((*s == '\'') ? '"' : '\'');
5265 /* If here for an expression, and parsed no attrs, back
5266 off. */
5267 if (tmp == '=' && !attrs) {
5268 s = PL_bufptr;
5269 break;
5270 }
5271 /* MUST advance bufptr here to avoid bogus "at end of line"
5272 context messages from yyerror().
5273 */
5274 PL_bufptr = s;
10edeb5d
JH
5275 yyerror( (const char *)
5276 (*s
5277 ? Perl_form(aTHX_ "Invalid separator character "
5278 "%c%c%c in attribute list", q, *s, q)
5279 : "Unterminated attribute list" ) );
90771dc0
NC
5280 if (attrs)
5281 op_free(attrs);
5282 OPERATOR(':');
09bef843 5283 }
09bef843 5284 }
f9829d6b 5285 got_attrs:
09bef843 5286 if (attrs) {
cd81e915 5287 start_force(PL_curforce);
9ded7720 5288 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5289 CURMAD('_', PL_nextwhite);
89122651 5290 force_next(THING);
5db06880
NC
5291 }
5292#ifdef PERL_MAD
5293 if (PL_madskills) {
cd81e915 5294 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5295 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5296 }
5db06880 5297#endif
09bef843
SB
5298 TOKEN(COLONATTR);
5299 }
a0d0e21e 5300 OPERATOR(':');
8990e307
LW
5301 case '(':
5302 s++;
3280af22
NIS
5303 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5304 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5305 else
3280af22 5306 PL_expect = XTERM;
29595ff2 5307 s = SKIPSPACE1(s);
a0d0e21e 5308 TOKEN('(');
378cc40b 5309 case ';':
f4dd75d9 5310 CLINE;
90771dc0
NC
5311 {
5312 const char tmp = *s++;
5313 OPERATOR(tmp);
5314 }
378cc40b 5315 case ')':
90771dc0
NC
5316 {
5317 const char tmp = *s++;
29595ff2 5318 s = SKIPSPACE1(s);
90771dc0
NC
5319 if (*s == '{')
5320 PREBLOCK(tmp);
5321 TERM(tmp);
5322 }
79072805
LW
5323 case ']':
5324 s++;
3280af22 5325 if (PL_lex_brackets <= 0)
d98d5fff 5326 yyerror("Unmatched right square bracket");
463ee0b2 5327 else
3280af22
NIS
5328 --PL_lex_brackets;
5329 if (PL_lex_state == LEX_INTERPNORMAL) {
5330 if (PL_lex_brackets == 0) {
02255c60
FC
5331 if (*s == '-' && s[1] == '>')
5332 PL_lex_state = LEX_INTERPENDMAYBE;
5333 else if (*s != '[' && *s != '{')
3280af22 5334 PL_lex_state = LEX_INTERPEND;
79072805
LW
5335 }
5336 }
4633a7c4 5337 TERM(']');
79072805
LW
5338 case '{':
5339 leftbracket:
79072805 5340 s++;
3280af22 5341 if (PL_lex_brackets > 100) {
8edd5f42 5342 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5343 }
3280af22 5344 switch (PL_expect) {
a0d0e21e 5345 case XTERM:
3280af22 5346 if (PL_lex_formbrack) {
a0d0e21e
LW
5347 s--;
5348 PRETERMBLOCK(DO);
5349 }
3280af22
NIS
5350 if (PL_oldoldbufptr == PL_last_lop)
5351 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5352 else
3280af22 5353 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5354 OPERATOR(HASHBRACK);
a0d0e21e 5355 case XOPERATOR:
bf4acbe4 5356 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5357 s++;
44a8e56a 5358 d = s;
3280af22
NIS
5359 PL_tokenbuf[0] = '\0';
5360 if (d < PL_bufend && *d == '-') {
5361 PL_tokenbuf[0] = '-';
44a8e56a 5362 d++;
bf4acbe4 5363 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5364 d++;
5365 }
7e2040f0 5366 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5367 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5368 FALSE, &len);
bf4acbe4 5369 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5370 d++;
5371 if (*d == '}') {
f54cb97a 5372 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5373 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5374 if (minus)
5375 force_next('-');
748a9306
LW
5376 }
5377 }
5378 /* FALL THROUGH */
09bef843 5379 case XATTRBLOCK:
748a9306 5380 case XBLOCK:
3280af22
NIS
5381 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5382 PL_expect = XSTATE;
a0d0e21e 5383 break;
09bef843 5384 case XATTRTERM:
a0d0e21e 5385 case XTERMBLOCK:
3280af22
NIS
5386 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5387 PL_expect = XSTATE;
a0d0e21e
LW
5388 break;
5389 default: {
f54cb97a 5390 const char *t;
3280af22
NIS
5391 if (PL_oldoldbufptr == PL_last_lop)
5392 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5393 else
3280af22 5394 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5395 s = SKIPSPACE1(s);
8452ff4b
SB
5396 if (*s == '}') {
5397 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5398 PL_expect = XTERM;
5399 /* This hack is to get the ${} in the message. */
5400 PL_bufptr = s+1;
5401 yyerror("syntax error");
5402 break;
5403 }
a0d0e21e 5404 OPERATOR(HASHBRACK);
8452ff4b 5405 }
b8a4b1be
GS
5406 /* This hack serves to disambiguate a pair of curlies
5407 * as being a block or an anon hash. Normally, expectation
5408 * determines that, but in cases where we're not in a
5409 * position to expect anything in particular (like inside
5410 * eval"") we have to resolve the ambiguity. This code
5411 * covers the case where the first term in the curlies is a
5412 * quoted string. Most other cases need to be explicitly
a0288114 5413 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5414 * curly in order to force resolution as an anon hash.
5415 *
5416 * XXX should probably propagate the outer expectation
5417 * into eval"" to rely less on this hack, but that could
5418 * potentially break current behavior of eval"".
5419 * GSAR 97-07-21
5420 */
5421 t = s;
5422 if (*s == '\'' || *s == '"' || *s == '`') {
5423 /* common case: get past first string, handling escapes */
3280af22 5424 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5425 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5426 t++;
5427 t++;
a0d0e21e 5428 }
b8a4b1be 5429 else if (*s == 'q') {
3280af22 5430 if (++t < PL_bufend
b8a4b1be 5431 && (!isALNUM(*t)
3280af22 5432 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5433 && !isALNUM(*t))))
5434 {
abc667d1 5435 /* skip q//-like construct */
f54cb97a 5436 const char *tmps;
b8a4b1be
GS
5437 char open, close, term;
5438 I32 brackets = 1;
5439
3280af22 5440 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5441 t++;
abc667d1
DM
5442 /* check for q => */
5443 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5444 OPERATOR(HASHBRACK);
5445 }
b8a4b1be
GS
5446 term = *t;
5447 open = term;
5448 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5449 term = tmps[5];
5450 close = term;
5451 if (open == close)
3280af22
NIS
5452 for (t++; t < PL_bufend; t++) {
5453 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5454 t++;
6d07e5e9 5455 else if (*t == open)
b8a4b1be
GS
5456 break;
5457 }
abc667d1 5458 else {
3280af22
NIS
5459 for (t++; t < PL_bufend; t++) {
5460 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5461 t++;
6d07e5e9 5462 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5463 break;
5464 else if (*t == open)
5465 brackets++;
5466 }
abc667d1
DM
5467 }
5468 t++;
b8a4b1be 5469 }
abc667d1
DM
5470 else
5471 /* skip plain q word */
5472 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5473 t += UTF8SKIP(t);
a0d0e21e 5474 }
7e2040f0 5475 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5476 t += UTF8SKIP(t);
7e2040f0 5477 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5478 t += UTF8SKIP(t);
a0d0e21e 5479 }
3280af22 5480 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5481 t++;
b8a4b1be
GS
5482 /* if comma follows first term, call it an anon hash */
5483 /* XXX it could be a comma expression with loop modifiers */
3280af22 5484 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5485 || (*t == '=' && t[1] == '>')))
a0d0e21e 5486 OPERATOR(HASHBRACK);
3280af22 5487 if (PL_expect == XREF)
4e4e412b 5488 PL_expect = XTERM;
a0d0e21e 5489 else {
3280af22
NIS
5490 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5491 PL_expect = XSTATE;
a0d0e21e 5492 }
8990e307 5493 }
a0d0e21e 5494 break;
463ee0b2 5495 }
6154021b 5496 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5497 if (isSPACE(*s) || *s == '#')
3280af22 5498 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5499 TOKEN('{');
378cc40b 5500 case '}':
79072805
LW
5501 rightbracket:
5502 s++;
3280af22 5503 if (PL_lex_brackets <= 0)
d98d5fff 5504 yyerror("Unmatched right curly bracket");
463ee0b2 5505 else
3280af22 5506 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5507 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5508 PL_lex_formbrack = 0;
5509 if (PL_lex_state == LEX_INTERPNORMAL) {
5510 if (PL_lex_brackets == 0) {
9059aa12
LW
5511 if (PL_expect & XFAKEBRACK) {
5512 PL_expect &= XENUMMASK;
3280af22
NIS
5513 PL_lex_state = LEX_INTERPEND;
5514 PL_bufptr = s;
5db06880
NC
5515#if 0
5516 if (PL_madskills) {
cd81e915 5517 if (!PL_thiswhite)
6b29d1f5 5518 PL_thiswhite = newSVpvs("");
76f68e9b 5519 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5520 }
5521#endif
cea2e8a9 5522 return yylex(); /* ignore fake brackets */
79072805 5523 }
fa83b5b6 5524 if (*s == '-' && s[1] == '>')
3280af22 5525 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5526 else if (*s != '[' && *s != '{')
3280af22 5527 PL_lex_state = LEX_INTERPEND;
79072805
LW
5528 }
5529 }
9059aa12
LW
5530 if (PL_expect & XFAKEBRACK) {
5531 PL_expect &= XENUMMASK;
3280af22 5532 PL_bufptr = s;
cea2e8a9 5533 return yylex(); /* ignore fake brackets */
748a9306 5534 }
cd81e915 5535 start_force(PL_curforce);
5db06880
NC
5536 if (PL_madskills) {
5537 curmad('X', newSVpvn(s-1,1));
cd81e915 5538 CURMAD('_', PL_thiswhite);
5db06880 5539 }
79072805 5540 force_next('}');
5db06880 5541#ifdef PERL_MAD
cd81e915 5542 if (!PL_thistoken)
6b29d1f5 5543 PL_thistoken = newSVpvs("");
5db06880 5544#endif
79072805 5545 TOKEN(';');
378cc40b
LW
5546 case '&':
5547 s++;
90771dc0 5548 if (*s++ == '&')
a0d0e21e 5549 AOPERATOR(ANDAND);
378cc40b 5550 s--;
3280af22 5551 if (PL_expect == XOPERATOR) {
041457d9
DM
5552 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5553 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5554 {
57843af0 5555 CopLINE_dec(PL_curcop);
f1f66076 5556 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5557 CopLINE_inc(PL_curcop);
463ee0b2 5558 }
79072805 5559 BAop(OP_BIT_AND);
463ee0b2 5560 }
79072805 5561
3280af22
NIS
5562 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5563 if (*PL_tokenbuf) {
5564 PL_expect = XOPERATOR;
5565 force_ident(PL_tokenbuf, '&');
463ee0b2 5566 }
79072805
LW
5567 else
5568 PREREF('&');
6154021b 5569 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5570 TERM('&');
5571
378cc40b
LW
5572 case '|':
5573 s++;
90771dc0 5574 if (*s++ == '|')
a0d0e21e 5575 AOPERATOR(OROR);
378cc40b 5576 s--;
79072805 5577 BOop(OP_BIT_OR);
378cc40b
LW
5578 case '=':
5579 s++;
748a9306 5580 {
90771dc0
NC
5581 const char tmp = *s++;
5582 if (tmp == '=')
5583 Eop(OP_EQ);
5584 if (tmp == '>')
5585 OPERATOR(',');
5586 if (tmp == '~')
5587 PMop(OP_MATCH);
5588 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5589 && strchr("+-*/%.^&|<",tmp))
5590 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5591 "Reversed %c= operator",(int)tmp);
5592 s--;
5593 if (PL_expect == XSTATE && isALPHA(tmp) &&
5594 (s == PL_linestart+1 || s[-2] == '\n') )
5595 {
5596 if (PL_in_eval && !PL_rsfp) {
5597 d = PL_bufend;
5598 while (s < d) {
5599 if (*s++ == '\n') {
5600 incline(s);
5601 if (strnEQ(s,"=cut",4)) {
5602 s = strchr(s,'\n');
5603 if (s)
5604 s++;
5605 else
5606 s = d;
5607 incline(s);
5608 goto retry;
5609 }
5610 }
a5f75d66 5611 }
90771dc0 5612 goto retry;
a5f75d66 5613 }
5db06880
NC
5614#ifdef PERL_MAD
5615 if (PL_madskills) {
cd81e915 5616 if (!PL_thiswhite)
6b29d1f5 5617 PL_thiswhite = newSVpvs("");
cd81e915 5618 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5619 PL_bufend - PL_linestart);
5620 }
5621#endif
90771dc0
NC
5622 s = PL_bufend;
5623 PL_doextract = TRUE;
5624 goto retry;
a5f75d66 5625 }
a0d0e21e 5626 }
3280af22 5627 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5628 const char *t = s;
51882d45 5629#ifdef PERL_STRICT_CR
c35e046a 5630 while (SPACE_OR_TAB(*t))
51882d45 5631#else
c35e046a 5632 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5633#endif
c35e046a 5634 t++;
a0d0e21e
LW
5635 if (*t == '\n' || *t == '#') {
5636 s--;
3280af22 5637 PL_expect = XBLOCK;
a0d0e21e
LW
5638 goto leftbracket;
5639 }
79072805 5640 }
6154021b 5641 pl_yylval.ival = 0;
a0d0e21e 5642 OPERATOR(ASSIGNOP);
378cc40b
LW
5643 case '!':
5644 s++;
90771dc0
NC
5645 {
5646 const char tmp = *s++;
5647 if (tmp == '=') {
5648 /* was this !=~ where !~ was meant?
5649 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5650
5651 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5652 const char *t = s+1;
5653
5654 while (t < PL_bufend && isSPACE(*t))
5655 ++t;
5656
5657 if (*t == '/' || *t == '?' ||
5658 ((*t == 'm' || *t == 's' || *t == 'y')
5659 && !isALNUM(t[1])) ||
5660 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5661 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5662 "!=~ should be !~");
5663 }
5664 Eop(OP_NE);
5665 }
5666 if (tmp == '~')
5667 PMop(OP_NOT);
5668 }
378cc40b
LW
5669 s--;
5670 OPERATOR('!');
5671 case '<':
3280af22 5672 if (PL_expect != XOPERATOR) {
93a17b20 5673 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5674 check_uni();
79072805
LW
5675 if (s[1] == '<')
5676 s = scan_heredoc(s);
5677 else
5678 s = scan_inputsymbol(s);
5679 TERM(sublex_start());
378cc40b
LW
5680 }
5681 s++;
90771dc0
NC
5682 {
5683 char tmp = *s++;
5684 if (tmp == '<')
5685 SHop(OP_LEFT_SHIFT);
5686 if (tmp == '=') {
5687 tmp = *s++;
5688 if (tmp == '>')
5689 Eop(OP_NCMP);
5690 s--;
5691 Rop(OP_LE);
5692 }
395c3793 5693 }
378cc40b 5694 s--;
79072805 5695 Rop(OP_LT);
378cc40b
LW
5696 case '>':
5697 s++;
90771dc0
NC
5698 {
5699 const char tmp = *s++;
5700 if (tmp == '>')
5701 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5702 else if (tmp == '=')
90771dc0
NC
5703 Rop(OP_GE);
5704 }
378cc40b 5705 s--;
79072805 5706 Rop(OP_GT);
378cc40b
LW
5707
5708 case '$':
bbce6d69 5709 CLINE;
5710
3280af22
NIS
5711 if (PL_expect == XOPERATOR) {
5712 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5713 return deprecate_commaless_var_list();
a0d0e21e 5714 }
8990e307 5715 }
a0d0e21e 5716
c0b977fd 5717 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5718 PL_tokenbuf[0] = '@';
376b8730
SM
5719 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5720 sizeof PL_tokenbuf - 1, FALSE);
5721 if (PL_expect == XOPERATOR)
5722 no_op("Array length", s);
3280af22 5723 if (!PL_tokenbuf[1])
a0d0e21e 5724 PREREF(DOLSHARP);
3280af22
NIS
5725 PL_expect = XOPERATOR;
5726 PL_pending_ident = '#';
463ee0b2 5727 TOKEN(DOLSHARP);
79072805 5728 }
bbce6d69 5729
3280af22 5730 PL_tokenbuf[0] = '$';
376b8730
SM
5731 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5732 sizeof PL_tokenbuf - 1, FALSE);
5733 if (PL_expect == XOPERATOR)
5734 no_op("Scalar", s);
3280af22
NIS
5735 if (!PL_tokenbuf[1]) {
5736 if (s == PL_bufend)
bbce6d69 5737 yyerror("Final $ should be \\$ or $name");
5738 PREREF('$');
8990e307 5739 }
a0d0e21e 5740
bbce6d69 5741 /* This kludge not intended to be bulletproof. */
3280af22 5742 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5743 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5744 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5745 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5746 TERM(THING);
5747 }
5748
ff68c719 5749 d = s;
90771dc0
NC
5750 {
5751 const char tmp = *s;
ae28bb2a 5752 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5753 s = SKIPSPACE1(s);
ff68c719 5754
90771dc0
NC
5755 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5756 && intuit_more(s)) {
5757 if (*s == '[') {
5758 PL_tokenbuf[0] = '@';
5759 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5760 char *t = s+1;
5761
5762 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5763 t++;
90771dc0 5764 if (*t++ == ',') {
29595ff2 5765 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5766 while (t < PL_bufend && *t != ']')
5767 t++;
9014280d 5768 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5769 "Multidimensional syntax %.*s not supported",
36c7798d 5770 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5771 }
748a9306 5772 }
93a17b20 5773 }
90771dc0
NC
5774 else if (*s == '{') {
5775 char *t;
5776 PL_tokenbuf[0] = '%';
5777 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5778 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5779 {
5780 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5781 do {
5782 t++;
5783 } while (isSPACE(*t));
90771dc0 5784 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5785 STRLEN len;
90771dc0 5786 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5787 &len);
c35e046a
AL
5788 while (isSPACE(*t))
5789 t++;
780a5241 5790 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5791 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5792 "You need to quote \"%s\"",
5793 tmpbuf);
5794 }
5795 }
5796 }
93a17b20 5797 }
bbce6d69 5798
90771dc0
NC
5799 PL_expect = XOPERATOR;
5800 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5801 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5802 if (!islop || PL_last_lop_op == OP_GREPSTART)
5803 PL_expect = XOPERATOR;
5804 else if (strchr("$@\"'`q", *s))
5805 PL_expect = XTERM; /* e.g. print $fh "foo" */
5806 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5807 PL_expect = XTERM; /* e.g. print $fh &sub */
5808 else if (isIDFIRST_lazy_if(s,UTF)) {
5809 char tmpbuf[sizeof PL_tokenbuf];
5810 int t2;
5811 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5812 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5813 /* binary operators exclude handle interpretations */
5814 switch (t2) {
5815 case -KEY_x:
5816 case -KEY_eq:
5817 case -KEY_ne:
5818 case -KEY_gt:
5819 case -KEY_lt:
5820 case -KEY_ge:
5821 case -KEY_le:
5822 case -KEY_cmp:
5823 break;
5824 default:
5825 PL_expect = XTERM; /* e.g. print $fh length() */
5826 break;
5827 }
5828 }
5829 else {
5830 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5831 }
5832 }
90771dc0
NC
5833 else if (isDIGIT(*s))
5834 PL_expect = XTERM; /* e.g. print $fh 3 */
5835 else if (*s == '.' && isDIGIT(s[1]))
5836 PL_expect = XTERM; /* e.g. print $fh .3 */
5837 else if ((*s == '?' || *s == '-' || *s == '+')
5838 && !isSPACE(s[1]) && s[1] != '=')
5839 PL_expect = XTERM; /* e.g. print $fh -1 */
5840 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5841 && s[1] != '/')
5842 PL_expect = XTERM; /* e.g. print $fh /.../
5843 XXX except DORDOR operator
5844 */
5845 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5846 && s[2] != '=')
5847 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5848 }
bbce6d69 5849 }
3280af22 5850 PL_pending_ident = '$';
79072805 5851 TOKEN('$');
378cc40b
LW
5852
5853 case '@':
3280af22 5854 if (PL_expect == XOPERATOR)
bbce6d69 5855 no_op("Array", s);
3280af22
NIS
5856 PL_tokenbuf[0] = '@';
5857 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5858 if (!PL_tokenbuf[1]) {
bbce6d69 5859 PREREF('@');
5860 }
3280af22 5861 if (PL_lex_state == LEX_NORMAL)
29595ff2 5862 s = SKIPSPACE1(s);
3280af22 5863 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5864 if (*s == '{')
3280af22 5865 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5866
5867 /* Warn about @ where they meant $. */
041457d9
DM
5868 if (*s == '[' || *s == '{') {
5869 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5870 const char *t = s + 1;
7e2040f0 5871 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5872 t++;
5873 if (*t == '}' || *t == ']') {
5874 t++;
29595ff2 5875 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5876 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5877 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5878 (int)(t-PL_bufptr), PL_bufptr,
5879 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5880 }
93a17b20
LW
5881 }
5882 }
463ee0b2 5883 }
3280af22 5884 PL_pending_ident = '@';
79072805 5885 TERM('@');
378cc40b 5886
c963b151 5887 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5888 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5889 s += 2;
5890 AOPERATOR(DORDOR);
5891 }
c963b151 5892 case '?': /* may either be conditional or pattern */
be25f609 5893 if (PL_expect == XOPERATOR) {
90771dc0 5894 char tmp = *s++;
c963b151 5895 if(tmp == '?') {
be25f609 5896 OPERATOR('?');
c963b151
BD
5897 }
5898 else {
5899 tmp = *s++;
5900 if(tmp == '/') {
5901 /* A // operator. */
5902 AOPERATOR(DORDOR);
5903 }
5904 else {
5905 s--;
5906 Mop(OP_DIVIDE);
5907 }
5908 }
5909 }
5910 else {
5911 /* Disable warning on "study /blah/" */
5912 if (PL_oldoldbufptr == PL_last_uni
5913 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5914 || memNE(PL_last_uni, "study", 5)
5915 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5916 ))
5917 check_uni();
5918 s = scan_pat(s,OP_MATCH);
5919 TERM(sublex_start());
5920 }
378cc40b
LW
5921
5922 case '.':
51882d45
GS
5923 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5924#ifdef PERL_STRICT_CR
5925 && s[1] == '\n'
5926#else
5927 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5928#endif
5929 && (s == PL_linestart || s[-1] == '\n') )
5930 {
3280af22
NIS
5931 PL_lex_formbrack = 0;
5932 PL_expect = XSTATE;
79072805
LW
5933 goto rightbracket;
5934 }
be25f609 5935 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5936 s += 3;
5937 OPERATOR(YADAYADA);
5938 }
3280af22 5939 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5940 char tmp = *s++;
a687059c
LW
5941 if (*s == tmp) {
5942 s++;
2f3197b3
LW
5943 if (*s == tmp) {
5944 s++;
6154021b 5945 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5946 }
5947 else
6154021b 5948 pl_yylval.ival = 0;
378cc40b 5949 OPERATOR(DOTDOT);
a687059c 5950 }
79072805 5951 Aop(OP_CONCAT);
378cc40b
LW
5952 }
5953 /* FALL THROUGH */
5954 case '0': case '1': case '2': case '3': case '4':
5955 case '5': case '6': case '7': case '8': case '9':
6154021b 5956 s = scan_num(s, &pl_yylval);
931e0695 5957 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5958 if (PL_expect == XOPERATOR)
8990e307 5959 no_op("Number",s);
79072805
LW
5960 TERM(THING);
5961
5962 case '\'':
5db06880 5963 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5964 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5965 if (PL_expect == XOPERATOR) {
5966 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5967 return deprecate_commaless_var_list();
a0d0e21e 5968 }
463ee0b2 5969 else
8990e307 5970 no_op("String",s);
463ee0b2 5971 }
79072805 5972 if (!s)
d4c19fe8 5973 missingterm(NULL);
6154021b 5974 pl_yylval.ival = OP_CONST;
79072805
LW
5975 TERM(sublex_start());
5976
5977 case '"':
5db06880 5978 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5979 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5980 if (PL_expect == XOPERATOR) {
5981 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5982 return deprecate_commaless_var_list();
a0d0e21e 5983 }
463ee0b2 5984 else
8990e307 5985 no_op("String",s);
463ee0b2 5986 }
79072805 5987 if (!s)
d4c19fe8 5988 missingterm(NULL);
6154021b 5989 pl_yylval.ival = OP_CONST;
cfd0369c
NC
5990 /* FIXME. I think that this can be const if char *d is replaced by
5991 more localised variables. */
3280af22 5992 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 5993 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 5994 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
5995 break;
5996 }
5997 }
79072805
LW
5998 TERM(sublex_start());
5999
6000 case '`':
5db06880 6001 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6002 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6003 if (PL_expect == XOPERATOR)
8990e307 6004 no_op("Backticks",s);
79072805 6005 if (!s)
d4c19fe8 6006 missingterm(NULL);
9b201d7d 6007 readpipe_override();
79072805
LW
6008 TERM(sublex_start());
6009
6010 case '\\':
6011 s++;
a2a5de95
NC
6012 if (PL_lex_inwhat && isDIGIT(*s))
6013 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6014 *s, *s);
3280af22 6015 if (PL_expect == XOPERATOR)
8990e307 6016 no_op("Backslash",s);
79072805
LW
6017 OPERATOR(REFGEN);
6018
a7cb1f99 6019 case 'v':
e526c9e6 6020 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6021 char *start = s + 2;
dd629d5b 6022 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6023 start++;
6024 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6025 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6026 TERM(THING);
6027 }
e526c9e6 6028 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6029 else if (!isALPHA(*start) && (PL_expect == XTERM
6030 || PL_expect == XREF || PL_expect == XSTATE
6031 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6032 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6033 if (!gv) {
6154021b 6034 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6035 TERM(THING);
6036 }
6037 }
a7cb1f99
GS
6038 }
6039 goto keylookup;
79072805 6040 case 'x':
3280af22 6041 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6042 s++;
6043 Mop(OP_REPEAT);
2f3197b3 6044 }
79072805
LW
6045 goto keylookup;
6046
378cc40b 6047 case '_':
79072805
LW
6048 case 'a': case 'A':
6049 case 'b': case 'B':
6050 case 'c': case 'C':
6051 case 'd': case 'D':
6052 case 'e': case 'E':
6053 case 'f': case 'F':
6054 case 'g': case 'G':
6055 case 'h': case 'H':
6056 case 'i': case 'I':
6057 case 'j': case 'J':
6058 case 'k': case 'K':
6059 case 'l': case 'L':
6060 case 'm': case 'M':
6061 case 'n': case 'N':
6062 case 'o': case 'O':
6063 case 'p': case 'P':
6064 case 'q': case 'Q':
6065 case 'r': case 'R':
6066 case 's': case 'S':
6067 case 't': case 'T':
6068 case 'u': case 'U':
a7cb1f99 6069 case 'V':
79072805
LW
6070 case 'w': case 'W':
6071 case 'X':
6072 case 'y': case 'Y':
6073 case 'z': case 'Z':
6074
49dc05e3 6075 keylookup: {
88e1f1a2 6076 bool anydelim;
90771dc0 6077 I32 tmp;
10edeb5d
JH
6078
6079 orig_keyword = 0;
6080 gv = NULL;
6081 gvp = NULL;
49dc05e3 6082
3280af22
NIS
6083 PL_bufptr = s;
6084 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6085
6086 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6087 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6088 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6089 (PL_tokenbuf[0] == 'q' &&
6090 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6091
6092 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6093 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6094 goto just_a_word;
6095
3643fb5f 6096 d = s;
3280af22 6097 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6098 d++; /* no comments skipped here, or s### is misparsed */
6099
748a9306 6100 /* Is this a word before a => operator? */
1c3923b3 6101 if (*d == '=' && d[1] == '>') {
748a9306 6102 CLINE;
6154021b 6103 pl_yylval.opval
d0a148a6
NC
6104 = (OP*)newSVOP(OP_CONST, 0,
6105 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6106 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6107 TERM(WORD);
6108 }
6109
88e1f1a2
JV
6110 /* Check for plugged-in keyword */
6111 {
6112 OP *o;
6113 int result;
6114 char *saved_bufptr = PL_bufptr;
6115 PL_bufptr = s;
6116 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6117 s = PL_bufptr;
6118 if (result == KEYWORD_PLUGIN_DECLINE) {
6119 /* not a plugged-in keyword */
6120 PL_bufptr = saved_bufptr;
6121 } else if (result == KEYWORD_PLUGIN_STMT) {
6122 pl_yylval.opval = o;
6123 CLINE;
6124 PL_expect = XSTATE;
6125 return REPORT(PLUGSTMT);
6126 } else if (result == KEYWORD_PLUGIN_EXPR) {
6127 pl_yylval.opval = o;
6128 CLINE;
6129 PL_expect = XOPERATOR;
6130 return REPORT(PLUGEXPR);
6131 } else {
6132 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6133 PL_tokenbuf);
6134 }
6135 }
6136
6137 /* Check for built-in keyword */
6138 tmp = keyword(PL_tokenbuf, len, 0);
6139
6140 /* Is this a label? */
6141 if (!anydelim && PL_expect == XSTATE
6142 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6143 s = d + 1;
6144 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6145 CLINE;
6146 TOKEN(LABEL);
6147 }
6148
a0d0e21e 6149 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6150 GV *ogv = NULL; /* override (winner) */
6151 GV *hgv = NULL; /* hidden (loser) */
3280af22 6152 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6153 CV *cv;
90e5519e 6154 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6155 (cv = GvCVu(gv)))
6156 {
6157 if (GvIMPORTED_CV(gv))
6158 ogv = gv;
6159 else if (! CvMETHOD(cv))
6160 hgv = gv;
6161 }
6162 if (!ogv &&
3280af22 6163 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6164 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6165 GvCVu(gv) && GvIMPORTED_CV(gv))
6166 {
6167 ogv = gv;
6168 }
6169 }
6170 if (ogv) {
30fe34ed 6171 orig_keyword = tmp;
56f7f34b 6172 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6173 }
6174 else if (gv && !gvp
6175 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6176 && GvCVu(gv))
6e7b2336
GS
6177 {
6178 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6179 }
56f7f34b
CS
6180 else { /* no override */
6181 tmp = -tmp;
a2a5de95
NC
6182 if (tmp == KEY_dump) {
6183 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6184 "dump() better written as CORE::dump()");
ac206dc8 6185 }
a0714e2c 6186 gv = NULL;
56f7f34b 6187 gvp = 0;
a2a5de95
NC
6188 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6189 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6190 "Ambiguous call resolved as CORE::%s(), %s",
6191 GvENAME(hgv), "qualify as such or use &");
49dc05e3 6192 }
a0d0e21e
LW
6193 }
6194
6195 reserved_word:
6196 switch (tmp) {
79072805
LW
6197
6198 default: /* not a keyword */
0bfa2a8a
NC
6199 /* Trade off - by using this evil construction we can pull the
6200 variable gv into the block labelled keylookup. If not, then
6201 we have to give it function scope so that the goto from the
6202 earlier ':' case doesn't bypass the initialisation. */
6203 if (0) {
6204 just_a_word_zero_gv:
6205 gv = NULL;
6206 gvp = NULL;
8bee0991 6207 orig_keyword = 0;
0bfa2a8a 6208 }
93a17b20 6209 just_a_word: {
96e4d5b1 6210 SV *sv;
ce29ac45 6211 int pkgname = 0;
f54cb97a 6212 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6213 OP *rv2cv_op;
5069cc75 6214 CV *cv;
5db06880 6215#ifdef PERL_MAD
cd81e915 6216 SV *nextPL_nextwhite = 0;
5db06880
NC
6217#endif
6218
8990e307
LW
6219
6220 /* Get the rest if it looks like a package qualifier */
6221
155aba94 6222 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6223 STRLEN morelen;
3280af22 6224 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6225 TRUE, &morelen);
6226 if (!morelen)
cea2e8a9 6227 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6228 *s == '\'' ? "'" : "::");
c3e0f903 6229 len += morelen;
ce29ac45 6230 pkgname = 1;
a0d0e21e 6231 }
8990e307 6232
3280af22
NIS
6233 if (PL_expect == XOPERATOR) {
6234 if (PL_bufptr == PL_linestart) {
57843af0 6235 CopLINE_dec(PL_curcop);
f1f66076 6236 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6237 CopLINE_inc(PL_curcop);
463ee0b2
LW
6238 }
6239 else
54310121 6240 no_op("Bareword",s);
463ee0b2 6241 }
8990e307 6242
c3e0f903
GS
6243 /* Look for a subroutine with this name in current package,
6244 unless name is "Foo::", in which case Foo is a bearword
6245 (and a package name). */
6246
5db06880 6247 if (len > 2 && !PL_madskills &&
3280af22 6248 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6249 {
f776e3cd 6250 if (ckWARN(WARN_BAREWORD)
90e5519e 6251 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6252 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6253 "Bareword \"%s\" refers to nonexistent package",
3280af22 6254 PL_tokenbuf);
c3e0f903 6255 len -= 2;
3280af22 6256 PL_tokenbuf[len] = '\0';
a0714e2c 6257 gv = NULL;
c3e0f903
GS
6258 gvp = 0;
6259 }
6260 else {
62d55b22
NC
6261 if (!gv) {
6262 /* Mustn't actually add anything to a symbol table.
6263 But also don't want to "initialise" any placeholder
6264 constants that might already be there into full
6265 blown PVGVs with attached PVCV. */
90e5519e
NC
6266 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6267 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6268 }
b3d904f3 6269 len = 0;
c3e0f903
GS
6270 }
6271
6272 /* if we saw a global override before, get the right name */
8990e307 6273
49dc05e3 6274 if (gvp) {
396482e1 6275 sv = newSVpvs("CORE::GLOBAL::");
3280af22 6276 sv_catpv(sv,PL_tokenbuf);
49dc05e3 6277 }
8a7a129d
NC
6278 else {
6279 /* If len is 0, newSVpv does strlen(), which is correct.
6280 If len is non-zero, then it will be the true length,
6281 and so the scalar will be created correctly. */
6282 sv = newSVpv(PL_tokenbuf,len);
6283 }
5db06880 6284#ifdef PERL_MAD
cd81e915
NC
6285 if (PL_madskills && !PL_thistoken) {
6286 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6287 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6288 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6289 }
6290#endif
8990e307 6291
a0d0e21e
LW
6292 /* Presume this is going to be a bareword of some sort. */
6293
6294 CLINE;
6154021b
RGS
6295 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6296 pl_yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
6297 /* UTF-8 package name? */
6298 if (UTF && !IN_BYTES &&
95a20fc0 6299 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
8f8cf39c 6300 SvUTF8_on(sv);
a0d0e21e 6301
c3e0f903
GS
6302 /* And if "Foo::", then that's what it certainly is. */
6303
6304 if (len)
6305 goto safe_bareword;
6306
f7461760
Z
6307 cv = NULL;
6308 {
6309 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6310 const_op->op_private = OPpCONST_BARE;
6311 rv2cv_op = newCVREF(0, const_op);
6312 }
6313 if (rv2cv_op->op_type == OP_RV2CV &&
6314 (rv2cv_op->op_flags & OPf_KIDS)) {
6315 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6316 switch (rv_op->op_type) {
6317 case OP_CONST: {
6318 SV *sv = cSVOPx_sv(rv_op);
6319 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6320 cv = (CV*)SvRV(sv);
6321 } break;
6322 case OP_GV: {
6323 GV *gv = cGVOPx_gv(rv_op);
6324 CV *maybe_cv = GvCVu(gv);
6325 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6326 cv = maybe_cv;
6327 } break;
6328 }
6329 }
5069cc75 6330
8990e307
LW
6331 /* See if it's the indirect object for a list operator. */
6332
3280af22
NIS
6333 if (PL_oldoldbufptr &&
6334 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6335 (PL_oldoldbufptr == PL_last_lop
6336 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6337 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6338 (PL_expect == XREF ||
6339 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6340 {
748a9306
LW
6341 bool immediate_paren = *s == '(';
6342
a0d0e21e 6343 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6344 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6345#ifdef PERL_MAD
cd81e915 6346 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6347#endif
a0d0e21e
LW
6348
6349 /* Two barewords in a row may indicate method call. */
6350
62d55b22 6351 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6352 (tmp = intuit_method(s, gv, cv))) {
6353 op_free(rv2cv_op);
bbf60fe6 6354 return REPORT(tmp);
f7461760 6355 }
a0d0e21e
LW
6356
6357 /* If not a declared subroutine, it's an indirect object. */
6358 /* (But it's an indir obj regardless for sort.) */
7294df96 6359 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6360
7294df96
RGS
6361 if (
6362 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6363 (!cv &&
a9ef352a 6364 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6365 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6366 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6367 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6368 )
a9ef352a 6369 {
3280af22 6370 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6371 goto bareword;
93a17b20
LW
6372 }
6373 }
8990e307 6374
3280af22 6375 PL_expect = XOPERATOR;
5db06880
NC
6376#ifdef PERL_MAD
6377 if (isSPACE(*s))
cd81e915
NC
6378 s = SKIPSPACE2(s,nextPL_nextwhite);
6379 PL_nextwhite = nextPL_nextwhite;
5db06880 6380#else
8990e307 6381 s = skipspace(s);
5db06880 6382#endif
1c3923b3
GS
6383
6384 /* Is this a word before a => operator? */
ce29ac45 6385 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6386 op_free(rv2cv_op);
1c3923b3 6387 CLINE;
6154021b 6388 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6389 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6390 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6391 TERM(WORD);
6392 }
6393
6394 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6395 if (*s == '(') {
79072805 6396 CLINE;
5069cc75 6397 if (cv) {
c35e046a
AL
6398 d = s + 1;
6399 while (SPACE_OR_TAB(*d))
6400 d++;
f7461760 6401 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6402 s = d + 1;
c631f32b 6403 goto its_constant;
96e4d5b1 6404 }
6405 }
5db06880
NC
6406#ifdef PERL_MAD
6407 if (PL_madskills) {
cd81e915
NC
6408 PL_nextwhite = PL_thiswhite;
6409 PL_thiswhite = 0;
5db06880 6410 }
cd81e915 6411 start_force(PL_curforce);
5db06880 6412#endif
6154021b 6413 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6414 PL_expect = XOPERATOR;
5db06880
NC
6415#ifdef PERL_MAD
6416 if (PL_madskills) {
cd81e915
NC
6417 PL_nextwhite = nextPL_nextwhite;
6418 curmad('X', PL_thistoken);
6b29d1f5 6419 PL_thistoken = newSVpvs("");
5db06880
NC
6420 }
6421#endif
f7461760 6422 op_free(rv2cv_op);
93a17b20 6423 force_next(WORD);
6154021b 6424 pl_yylval.ival = 0;
463ee0b2 6425 TOKEN('&');
79072805 6426 }
93a17b20 6427
a0d0e21e 6428 /* If followed by var or block, call it a method (unless sub) */
8990e307 6429
f7461760
Z
6430 if ((*s == '$' || *s == '{') && !cv) {
6431 op_free(rv2cv_op);
3280af22
NIS
6432 PL_last_lop = PL_oldbufptr;
6433 PL_last_lop_op = OP_METHOD;
93a17b20 6434 PREBLOCK(METHOD);
463ee0b2
LW
6435 }
6436
8990e307
LW
6437 /* If followed by a bareword, see if it looks like indir obj. */
6438
30fe34ed
RGS
6439 if (!orig_keyword
6440 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6441 && (tmp = intuit_method(s, gv, cv))) {
6442 op_free(rv2cv_op);
bbf60fe6 6443 return REPORT(tmp);
f7461760 6444 }
93a17b20 6445
8990e307
LW
6446 /* Not a method, so call it a subroutine (if defined) */
6447
5069cc75 6448 if (cv) {
9b387841
NC
6449 if (lastchar == '-')
6450 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6451 "Ambiguous use of -%s resolved as -&%s()",
6452 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6453 /* Check for a constant sub */
f7461760 6454 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6455 its_constant:
f7461760 6456 op_free(rv2cv_op);
6154021b
RGS
6457 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6458 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6459 pl_yylval.opval->op_private = 0;
96e4d5b1 6460 TOKEN(WORD);
89bfa8cd 6461 }
6462
6154021b 6463 op_free(pl_yylval.opval);
f7461760 6464 pl_yylval.opval = rv2cv_op;
6154021b 6465 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6466 PL_last_lop = PL_oldbufptr;
bf848113 6467 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6468 /* Is there a prototype? */
5db06880
NC
6469 if (
6470#ifdef PERL_MAD
6471 cv &&
6472#endif
d9f2850e
RGS
6473 SvPOK(cv))
6474 {
5f66b61c 6475 STRLEN protolen;
daba3364 6476 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6477 if (!protolen)
4633a7c4 6478 TERM(FUNC0SUB);
8c28b960 6479 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
4633a7c4 6480 OPERATOR(UNIOPSUB);
0f5d0394
AE
6481 while (*proto == ';')
6482 proto++;
7a52d87a 6483 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6484 if (PL_curstash)
6485 sv_setpvs(PL_subname, "__ANON__");
6486 else
6487 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6488 PREBLOCK(LSTOPSUB);
6489 }
a9ef352a 6490 }
5db06880
NC
6491#ifdef PERL_MAD
6492 {
6493 if (PL_madskills) {
cd81e915
NC
6494 PL_nextwhite = PL_thiswhite;
6495 PL_thiswhite = 0;
5db06880 6496 }
cd81e915 6497 start_force(PL_curforce);
6154021b 6498 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6499 PL_expect = XTERM;
6500 if (PL_madskills) {
cd81e915
NC
6501 PL_nextwhite = nextPL_nextwhite;
6502 curmad('X', PL_thistoken);
6b29d1f5 6503 PL_thistoken = newSVpvs("");
5db06880
NC
6504 }
6505 force_next(WORD);
6506 TOKEN(NOAMP);
6507 }
6508 }
6509
6510 /* Guess harder when madskills require "best effort". */
6511 if (PL_madskills && (!gv || !GvCVu(gv))) {
6512 int probable_sub = 0;
6513 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6514 probable_sub = 1;
6515 else if (isALPHA(*s)) {
6516 char tmpbuf[1024];
6517 STRLEN tmplen;
6518 d = s;
6519 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6520 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6521 probable_sub = 1;
6522 else {
6523 while (d < PL_bufend && isSPACE(*d))
6524 d++;
6525 if (*d == '=' && d[1] == '>')
6526 probable_sub = 1;
6527 }
6528 }
6529 if (probable_sub) {
7a6d04f4 6530 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6531 op_free(pl_yylval.opval);
f7461760 6532 pl_yylval.opval = rv2cv_op;
6154021b 6533 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6534 PL_last_lop = PL_oldbufptr;
6535 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6536 PL_nextwhite = PL_thiswhite;
6537 PL_thiswhite = 0;
6538 start_force(PL_curforce);
6154021b 6539 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6540 PL_expect = XTERM;
cd81e915
NC
6541 PL_nextwhite = nextPL_nextwhite;
6542 curmad('X', PL_thistoken);
6b29d1f5 6543 PL_thistoken = newSVpvs("");
5db06880
NC
6544 force_next(WORD);
6545 TOKEN(NOAMP);
6546 }
6547#else
6154021b 6548 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6549 PL_expect = XTERM;
8990e307
LW
6550 force_next(WORD);
6551 TOKEN(NOAMP);
5db06880 6552#endif
8990e307 6553 }
748a9306 6554
8990e307
LW
6555 /* Call it a bare word */
6556
5603f27d 6557 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6558 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6559 else {
9a073a1d
RGS
6560 bareword:
6561 /* after "print" and similar functions (corresponding to
6562 * "F? L" in opcode.pl), whatever wasn't already parsed as
6563 * a filehandle should be subject to "strict subs".
6564 * Likewise for the optional indirect-object argument to system
6565 * or exec, which can't be a bareword */
6566 if ((PL_last_lop_op == OP_PRINT
6567 || PL_last_lop_op == OP_PRTF
6568 || PL_last_lop_op == OP_SAY
6569 || PL_last_lop_op == OP_SYSTEM
6570 || PL_last_lop_op == OP_EXEC)
6571 && (PL_hints & HINT_STRICT_SUBS))
6572 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6573 if (lastchar != '-') {
6574 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6575 d = PL_tokenbuf;
6576 while (isLOWER(*d))
6577 d++;
da51bb9b 6578 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6579 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6580 PL_tokenbuf);
6581 }
748a9306
LW
6582 }
6583 }
f7461760 6584 op_free(rv2cv_op);
c3e0f903
GS
6585
6586 safe_bareword:
9b387841
NC
6587 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6588 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6589 "Operator or semicolon missing before %c%s",
6590 lastchar, PL_tokenbuf);
6591 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6592 "Ambiguous use of %c resolved as operator %c",
6593 lastchar, lastchar);
748a9306 6594 }
93a17b20 6595 TOKEN(WORD);
79072805 6596 }
79072805 6597
68dc0745 6598 case KEY___FILE__:
6154021b 6599 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6600 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6601 TERM(THING);
6602
79072805 6603 case KEY___LINE__:
6154021b 6604 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6605 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6606 TERM(THING);
68dc0745 6607
6608 case KEY___PACKAGE__:
6154021b 6609 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6610 (PL_curstash
5aaec2b4 6611 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6612 : &PL_sv_undef));
79072805 6613 TERM(THING);
79072805 6614
e50aee73 6615 case KEY___DATA__:
79072805
LW
6616 case KEY___END__: {
6617 GV *gv;
3280af22 6618 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6619 const char *pname = "main";
3280af22 6620 if (PL_tokenbuf[2] == 'D')
bfcb3514 6621 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6622 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6623 SVt_PVIO);
a5f75d66 6624 GvMULTI_on(gv);
79072805 6625 if (!GvIO(gv))
a0d0e21e 6626 GvIOp(gv) = newIO();
3280af22 6627 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6628#if defined(HAS_FCNTL) && defined(F_SETFD)
6629 {
f54cb97a 6630 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6631 fcntl(fd,F_SETFD,fd >= 3);
6632 }
79072805 6633#endif
fd049845 6634 /* Mark this internal pseudo-handle as clean */
6635 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6636 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6637 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6638 else
50952442 6639 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6640#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6641 /* if the script was opened in binmode, we need to revert
53129d29 6642 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6643 * XXX this is a questionable hack at best. */
53129d29
GS
6644 if (PL_bufend-PL_bufptr > 2
6645 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6646 {
6647 Off_t loc = 0;
50952442 6648 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6649 loc = PerlIO_tell(PL_rsfp);
6650 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6651 }
2986a63f
JH
6652#ifdef NETWARE
6653 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6654#else
c39cd008 6655 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6656#endif /* NETWARE */
1143fce0
JH
6657#ifdef PERLIO_IS_STDIO /* really? */
6658# if defined(__BORLANDC__)
cb359b41
JH
6659 /* XXX see note in do_binmode() */
6660 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6661# endif
6662#endif
c39cd008
GS
6663 if (loc > 0)
6664 PerlIO_seek(PL_rsfp, loc, 0);
6665 }
6666 }
6667#endif
7948272d 6668#ifdef PERLIO_LAYERS
52d2e0f4
JH
6669 if (!IN_BYTES) {
6670 if (UTF)
6671 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6672 else if (PL_encoding) {
6673 SV *name;
6674 dSP;
6675 ENTER;
6676 SAVETMPS;
6677 PUSHMARK(sp);
6678 EXTEND(SP, 1);
6679 XPUSHs(PL_encoding);
6680 PUTBACK;
6681 call_method("name", G_SCALAR);
6682 SPAGAIN;
6683 name = POPs;
6684 PUTBACK;
bfed75c6 6685 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6686 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6687 SVfARG(name)));
52d2e0f4
JH
6688 FREETMPS;
6689 LEAVE;
6690 }
6691 }
7948272d 6692#endif
5db06880
NC
6693#ifdef PERL_MAD
6694 if (PL_madskills) {
cd81e915
NC
6695 if (PL_realtokenstart >= 0) {
6696 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6697 if (!PL_endwhite)
6b29d1f5 6698 PL_endwhite = newSVpvs("");
cd81e915
NC
6699 sv_catsv(PL_endwhite, PL_thiswhite);
6700 PL_thiswhite = 0;
6701 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6702 PL_realtokenstart = -1;
5db06880 6703 }
5cc814fd
NC
6704 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6705 != NULL) ;
5db06880
NC
6706 }
6707#endif
4608196e 6708 PL_rsfp = NULL;
79072805
LW
6709 }
6710 goto fake_eof;
e929a76b 6711 }
de3bb511 6712
8990e307 6713 case KEY_AUTOLOAD:
ed6116ce 6714 case KEY_DESTROY:
79072805 6715 case KEY_BEGIN:
3c10abe3 6716 case KEY_UNITCHECK:
7d30b5c4 6717 case KEY_CHECK:
7d07dbc2 6718 case KEY_INIT:
7d30b5c4 6719 case KEY_END:
3280af22
NIS
6720 if (PL_expect == XSTATE) {
6721 s = PL_bufptr;
93a17b20 6722 goto really_sub;
79072805
LW
6723 }
6724 goto just_a_word;
6725
a0d0e21e
LW
6726 case KEY_CORE:
6727 if (*s == ':' && s[1] == ':') {
6728 s += 2;
748a9306 6729 d = s;
3280af22 6730 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6731 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6732 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6733 if (tmp < 0)
6734 tmp = -tmp;
850e8516 6735 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6736 /* that's a way to remember we saw "CORE::" */
850e8516 6737 orig_keyword = tmp;
a0d0e21e
LW
6738 goto reserved_word;
6739 }
6740 goto just_a_word;
6741
463ee0b2
LW
6742 case KEY_abs:
6743 UNI(OP_ABS);
6744
79072805
LW
6745 case KEY_alarm:
6746 UNI(OP_ALARM);
6747
6748 case KEY_accept:
a0d0e21e 6749 LOP(OP_ACCEPT,XTERM);
79072805 6750
463ee0b2
LW
6751 case KEY_and:
6752 OPERATOR(ANDOP);
6753
79072805 6754 case KEY_atan2:
a0d0e21e 6755 LOP(OP_ATAN2,XTERM);
85e6fe83 6756
79072805 6757 case KEY_bind:
a0d0e21e 6758 LOP(OP_BIND,XTERM);
79072805
LW
6759
6760 case KEY_binmode:
1c1fc3ea 6761 LOP(OP_BINMODE,XTERM);
79072805
LW
6762
6763 case KEY_bless:
a0d0e21e 6764 LOP(OP_BLESS,XTERM);
79072805 6765
0d863452
RH
6766 case KEY_break:
6767 FUN0(OP_BREAK);
6768
79072805
LW
6769 case KEY_chop:
6770 UNI(OP_CHOP);
6771
6772 case KEY_continue:
0d863452
RH
6773 /* When 'use switch' is in effect, continue has a dual
6774 life as a control operator. */
6775 {
ef89dcc3 6776 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6777 PREBLOCK(CONTINUE);
6778 else {
6779 /* We have to disambiguate the two senses of
6780 "continue". If the next token is a '{' then
6781 treat it as the start of a continue block;
6782 otherwise treat it as a control operator.
6783 */
6784 s = skipspace(s);
6785 if (*s == '{')
79072805 6786 PREBLOCK(CONTINUE);
0d863452
RH
6787 else
6788 FUN0(OP_CONTINUE);
6789 }
6790 }
79072805
LW
6791
6792 case KEY_chdir:
fafc274c
NC
6793 /* may use HOME */
6794 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6795 UNI(OP_CHDIR);
6796
6797 case KEY_close:
6798 UNI(OP_CLOSE);
6799
6800 case KEY_closedir:
6801 UNI(OP_CLOSEDIR);
6802
6803 case KEY_cmp:
6804 Eop(OP_SCMP);
6805
6806 case KEY_caller:
6807 UNI(OP_CALLER);
6808
6809 case KEY_crypt:
6810#ifdef FCRYPT
f4c556ac
GS
6811 if (!PL_cryptseen) {
6812 PL_cryptseen = TRUE;
de3bb511 6813 init_des();
f4c556ac 6814 }
a687059c 6815#endif
a0d0e21e 6816 LOP(OP_CRYPT,XTERM);
79072805
LW
6817
6818 case KEY_chmod:
a0d0e21e 6819 LOP(OP_CHMOD,XTERM);
79072805
LW
6820
6821 case KEY_chown:
a0d0e21e 6822 LOP(OP_CHOWN,XTERM);
79072805
LW
6823
6824 case KEY_connect:
a0d0e21e 6825 LOP(OP_CONNECT,XTERM);
79072805 6826
463ee0b2
LW
6827 case KEY_chr:
6828 UNI(OP_CHR);
6829
79072805
LW
6830 case KEY_cos:
6831 UNI(OP_COS);
6832
6833 case KEY_chroot:
6834 UNI(OP_CHROOT);
6835
0d863452
RH
6836 case KEY_default:
6837 PREBLOCK(DEFAULT);
6838
79072805 6839 case KEY_do:
29595ff2 6840 s = SKIPSPACE1(s);
79072805 6841 if (*s == '{')
a0d0e21e 6842 PRETERMBLOCK(DO);
79072805 6843 if (*s != '\'')
89c5585f 6844 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6845 if (orig_keyword == KEY_do) {
6846 orig_keyword = 0;
6154021b 6847 pl_yylval.ival = 1;
850e8516
RGS
6848 }
6849 else
6154021b 6850 pl_yylval.ival = 0;
378cc40b 6851 OPERATOR(DO);
79072805
LW
6852
6853 case KEY_die:
3280af22 6854 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6855 LOP(OP_DIE,XTERM);
79072805
LW
6856
6857 case KEY_defined:
6858 UNI(OP_DEFINED);
6859
6860 case KEY_delete:
a0d0e21e 6861 UNI(OP_DELETE);
79072805
LW
6862
6863 case KEY_dbmopen:
5c1737d1 6864 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6865 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6866
6867 case KEY_dbmclose:
6868 UNI(OP_DBMCLOSE);
6869
6870 case KEY_dump:
a0d0e21e 6871 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6872 LOOPX(OP_DUMP);
6873
6874 case KEY_else:
6875 PREBLOCK(ELSE);
6876
6877 case KEY_elsif:
6154021b 6878 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6879 OPERATOR(ELSIF);
6880
6881 case KEY_eq:
6882 Eop(OP_SEQ);
6883
a0d0e21e
LW
6884 case KEY_exists:
6885 UNI(OP_EXISTS);
4e553d73 6886
79072805 6887 case KEY_exit:
5db06880
NC
6888 if (PL_madskills)
6889 UNI(OP_INT);
79072805
LW
6890 UNI(OP_EXIT);
6891
6892 case KEY_eval:
29595ff2 6893 s = SKIPSPACE1(s);
32e2a35d
RGS
6894 if (*s == '{') { /* block eval */
6895 PL_expect = XTERMBLOCK;
6896 UNIBRACK(OP_ENTERTRY);
6897 }
6898 else { /* string eval */
6899 PL_expect = XTERM;
6900 UNIBRACK(OP_ENTEREVAL);
6901 }
79072805
LW
6902
6903 case KEY_eof:
6904 UNI(OP_EOF);
6905
6906 case KEY_exp:
6907 UNI(OP_EXP);
6908
6909 case KEY_each:
6910 UNI(OP_EACH);
6911
6912 case KEY_exec:
a0d0e21e 6913 LOP(OP_EXEC,XREF);
79072805
LW
6914
6915 case KEY_endhostent:
6916 FUN0(OP_EHOSTENT);
6917
6918 case KEY_endnetent:
6919 FUN0(OP_ENETENT);
6920
6921 case KEY_endservent:
6922 FUN0(OP_ESERVENT);
6923
6924 case KEY_endprotoent:
6925 FUN0(OP_EPROTOENT);
6926
6927 case KEY_endpwent:
6928 FUN0(OP_EPWENT);
6929
6930 case KEY_endgrent:
6931 FUN0(OP_EGRENT);
6932
6933 case KEY_for:
6934 case KEY_foreach:
6154021b 6935 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6936 s = SKIPSPACE1(s);
7e2040f0 6937 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6938 char *p = s;
5db06880
NC
6939#ifdef PERL_MAD
6940 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6941#endif
6942
3280af22 6943 if ((PL_bufend - p) >= 3 &&
55497cff 6944 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6945 p += 2;
77ca0c92
LW
6946 else if ((PL_bufend - p) >= 4 &&
6947 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6948 p += 3;
29595ff2 6949 p = PEEKSPACE(p);
7e2040f0 6950 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6951 p = scan_ident(p, PL_bufend,
6952 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6953 p = PEEKSPACE(p);
77ca0c92
LW
6954 }
6955 if (*p != '$')
cea2e8a9 6956 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6957#ifdef PERL_MAD
6958 s = SvPVX(PL_linestr) + soff;
6959#endif
55497cff 6960 }
79072805
LW
6961 OPERATOR(FOR);
6962
6963 case KEY_formline:
a0d0e21e 6964 LOP(OP_FORMLINE,XTERM);
79072805
LW
6965
6966 case KEY_fork:
6967 FUN0(OP_FORK);
6968
6969 case KEY_fcntl:
a0d0e21e 6970 LOP(OP_FCNTL,XTERM);
79072805
LW
6971
6972 case KEY_fileno:
6973 UNI(OP_FILENO);
6974
6975 case KEY_flock:
a0d0e21e 6976 LOP(OP_FLOCK,XTERM);
79072805
LW
6977
6978 case KEY_gt:
6979 Rop(OP_SGT);
6980
6981 case KEY_ge:
6982 Rop(OP_SGE);
6983
6984 case KEY_grep:
2c38e13d 6985 LOP(OP_GREPSTART, XREF);
79072805
LW
6986
6987 case KEY_goto:
a0d0e21e 6988 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6989 LOOPX(OP_GOTO);
6990
6991 case KEY_gmtime:
6992 UNI(OP_GMTIME);
6993
6994 case KEY_getc:
6f33ba73 6995 UNIDOR(OP_GETC);
79072805
LW
6996
6997 case KEY_getppid:
6998 FUN0(OP_GETPPID);
6999
7000 case KEY_getpgrp:
7001 UNI(OP_GETPGRP);
7002
7003 case KEY_getpriority:
a0d0e21e 7004 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7005
7006 case KEY_getprotobyname:
7007 UNI(OP_GPBYNAME);
7008
7009 case KEY_getprotobynumber:
a0d0e21e 7010 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7011
7012 case KEY_getprotoent:
7013 FUN0(OP_GPROTOENT);
7014
7015 case KEY_getpwent:
7016 FUN0(OP_GPWENT);
7017
7018 case KEY_getpwnam:
ff68c719 7019 UNI(OP_GPWNAM);
79072805
LW
7020
7021 case KEY_getpwuid:
ff68c719 7022 UNI(OP_GPWUID);
79072805
LW
7023
7024 case KEY_getpeername:
7025 UNI(OP_GETPEERNAME);
7026
7027 case KEY_gethostbyname:
7028 UNI(OP_GHBYNAME);
7029
7030 case KEY_gethostbyaddr:
a0d0e21e 7031 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7032
7033 case KEY_gethostent:
7034 FUN0(OP_GHOSTENT);
7035
7036 case KEY_getnetbyname:
7037 UNI(OP_GNBYNAME);
7038
7039 case KEY_getnetbyaddr:
a0d0e21e 7040 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7041
7042 case KEY_getnetent:
7043 FUN0(OP_GNETENT);
7044
7045 case KEY_getservbyname:
a0d0e21e 7046 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7047
7048 case KEY_getservbyport:
a0d0e21e 7049 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7050
7051 case KEY_getservent:
7052 FUN0(OP_GSERVENT);
7053
7054 case KEY_getsockname:
7055 UNI(OP_GETSOCKNAME);
7056
7057 case KEY_getsockopt:
a0d0e21e 7058 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7059
7060 case KEY_getgrent:
7061 FUN0(OP_GGRENT);
7062
7063 case KEY_getgrnam:
ff68c719 7064 UNI(OP_GGRNAM);
79072805
LW
7065
7066 case KEY_getgrgid:
ff68c719 7067 UNI(OP_GGRGID);
79072805
LW
7068
7069 case KEY_getlogin:
7070 FUN0(OP_GETLOGIN);
7071
0d863452 7072 case KEY_given:
6154021b 7073 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7074 OPERATOR(GIVEN);
7075
93a17b20 7076 case KEY_glob:
a0d0e21e 7077 LOP(OP_GLOB,XTERM);
93a17b20 7078
79072805
LW
7079 case KEY_hex:
7080 UNI(OP_HEX);
7081
7082 case KEY_if:
6154021b 7083 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7084 OPERATOR(IF);
7085
7086 case KEY_index:
a0d0e21e 7087 LOP(OP_INDEX,XTERM);
79072805
LW
7088
7089 case KEY_int:
7090 UNI(OP_INT);
7091
7092 case KEY_ioctl:
a0d0e21e 7093 LOP(OP_IOCTL,XTERM);
79072805
LW
7094
7095 case KEY_join:
a0d0e21e 7096 LOP(OP_JOIN,XTERM);
79072805
LW
7097
7098 case KEY_keys:
7099 UNI(OP_KEYS);
7100
7101 case KEY_kill:
a0d0e21e 7102 LOP(OP_KILL,XTERM);
79072805
LW
7103
7104 case KEY_last:
a0d0e21e 7105 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7106 LOOPX(OP_LAST);
4e553d73 7107
79072805
LW
7108 case KEY_lc:
7109 UNI(OP_LC);
7110
7111 case KEY_lcfirst:
7112 UNI(OP_LCFIRST);
7113
7114 case KEY_local:
6154021b 7115 pl_yylval.ival = 0;
79072805
LW
7116 OPERATOR(LOCAL);
7117
7118 case KEY_length:
7119 UNI(OP_LENGTH);
7120
7121 case KEY_lt:
7122 Rop(OP_SLT);
7123
7124 case KEY_le:
7125 Rop(OP_SLE);
7126
7127 case KEY_localtime:
7128 UNI(OP_LOCALTIME);
7129
7130 case KEY_log:
7131 UNI(OP_LOG);
7132
7133 case KEY_link:
a0d0e21e 7134 LOP(OP_LINK,XTERM);
79072805
LW
7135
7136 case KEY_listen:
a0d0e21e 7137 LOP(OP_LISTEN,XTERM);
79072805 7138
c0329465
MB
7139 case KEY_lock:
7140 UNI(OP_LOCK);
7141
79072805
LW
7142 case KEY_lstat:
7143 UNI(OP_LSTAT);
7144
7145 case KEY_m:
8782bef2 7146 s = scan_pat(s,OP_MATCH);
79072805
LW
7147 TERM(sublex_start());
7148
a0d0e21e 7149 case KEY_map:
2c38e13d 7150 LOP(OP_MAPSTART, XREF);
4e4e412b 7151
79072805 7152 case KEY_mkdir:
a0d0e21e 7153 LOP(OP_MKDIR,XTERM);
79072805
LW
7154
7155 case KEY_msgctl:
a0d0e21e 7156 LOP(OP_MSGCTL,XTERM);
79072805
LW
7157
7158 case KEY_msgget:
a0d0e21e 7159 LOP(OP_MSGGET,XTERM);
79072805
LW
7160
7161 case KEY_msgrcv:
a0d0e21e 7162 LOP(OP_MSGRCV,XTERM);
79072805
LW
7163
7164 case KEY_msgsnd:
a0d0e21e 7165 LOP(OP_MSGSND,XTERM);
79072805 7166
77ca0c92 7167 case KEY_our:
93a17b20 7168 case KEY_my:
952306ac 7169 case KEY_state:
eac04b2e 7170 PL_in_my = (U16)tmp;
29595ff2 7171 s = SKIPSPACE1(s);
7e2040f0 7172 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7173#ifdef PERL_MAD
7174 char* start = s;
7175#endif
3280af22 7176 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7177 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7178 goto really_sub;
def3634b 7179 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7180 if (!PL_in_my_stash) {
c750a3ec 7181 char tmpbuf[1024];
3280af22 7182 PL_bufptr = s;
d9fad198 7183 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7184 yyerror(tmpbuf);
7185 }
5db06880
NC
7186#ifdef PERL_MAD
7187 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7188 sv_catsv(PL_thistoken, PL_nextwhite);
7189 PL_nextwhite = 0;
7190 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7191 }
7192#endif
c750a3ec 7193 }
6154021b 7194 pl_yylval.ival = 1;
55497cff 7195 OPERATOR(MY);
93a17b20 7196
79072805 7197 case KEY_next:
a0d0e21e 7198 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7199 LOOPX(OP_NEXT);
7200
7201 case KEY_ne:
7202 Eop(OP_SNE);
7203
a0d0e21e 7204 case KEY_no:
468aa647 7205 s = tokenize_use(0, s);
a0d0e21e
LW
7206 OPERATOR(USE);
7207
7208 case KEY_not:
29595ff2 7209 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7210 FUN1(OP_NOT);
7211 else
7212 OPERATOR(NOTOP);
a0d0e21e 7213
79072805 7214 case KEY_open:
29595ff2 7215 s = SKIPSPACE1(s);
7e2040f0 7216 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7217 const char *t;
c35e046a
AL
7218 for (d = s; isALNUM_lazy_if(d,UTF);)
7219 d++;
7220 for (t=d; isSPACE(*t);)
7221 t++;
e2ab214b 7222 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7223 /* [perl #16184] */
7224 && !(t[0] == '=' && t[1] == '>')
7225 ) {
5f66b61c 7226 int parms_len = (int)(d-s);
9014280d 7227 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7228 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7229 parms_len, s, parms_len, s);
66fbe8fb 7230 }
93a17b20 7231 }
a0d0e21e 7232 LOP(OP_OPEN,XTERM);
79072805 7233
463ee0b2 7234 case KEY_or:
6154021b 7235 pl_yylval.ival = OP_OR;
463ee0b2
LW
7236 OPERATOR(OROP);
7237
79072805
LW
7238 case KEY_ord:
7239 UNI(OP_ORD);
7240
7241 case KEY_oct:
7242 UNI(OP_OCT);
7243
7244 case KEY_opendir:
a0d0e21e 7245 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7246
7247 case KEY_print:
3280af22 7248 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7249 LOP(OP_PRINT,XREF);
79072805
LW
7250
7251 case KEY_printf:
3280af22 7252 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7253 LOP(OP_PRTF,XREF);
79072805 7254
c07a80fd 7255 case KEY_prototype:
7256 UNI(OP_PROTOTYPE);
7257
79072805 7258 case KEY_push:
a0d0e21e 7259 LOP(OP_PUSH,XTERM);
79072805
LW
7260
7261 case KEY_pop:
6f33ba73 7262 UNIDOR(OP_POP);
79072805 7263
a0d0e21e 7264 case KEY_pos:
6f33ba73 7265 UNIDOR(OP_POS);
4e553d73 7266
79072805 7267 case KEY_pack:
a0d0e21e 7268 LOP(OP_PACK,XTERM);
79072805
LW
7269
7270 case KEY_package:
a0d0e21e 7271 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7272 s = SKIPSPACE1(s);
91152fc1 7273 s = force_strict_version(s);
79072805
LW
7274 OPERATOR(PACKAGE);
7275
7276 case KEY_pipe:
a0d0e21e 7277 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7278
7279 case KEY_q:
5db06880 7280 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7281 if (!s)
d4c19fe8 7282 missingterm(NULL);
6154021b 7283 pl_yylval.ival = OP_CONST;
79072805
LW
7284 TERM(sublex_start());
7285
a0d0e21e
LW
7286 case KEY_quotemeta:
7287 UNI(OP_QUOTEMETA);
7288
8990e307 7289 case KEY_qw:
5db06880 7290 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7291 if (!s)
d4c19fe8 7292 missingterm(NULL);
3480a8d2 7293 PL_expect = XOPERATOR;
8127e0e3
GS
7294 force_next(')');
7295 if (SvCUR(PL_lex_stuff)) {
5f66b61c 7296 OP *words = NULL;
8127e0e3 7297 int warned = 0;
3280af22 7298 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7299 while (len) {
d4c19fe8
AL
7300 for (; isSPACE(*d) && len; --len, ++d)
7301 /**/;
8127e0e3 7302 if (len) {
d4c19fe8 7303 SV *sv;
f54cb97a 7304 const char *b = d;
e476b1b5 7305 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7306 for (; !isSPACE(*d) && len; --len, ++d) {
7307 if (*d == ',') {
9014280d 7308 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7309 "Possible attempt to separate words with commas");
7310 ++warned;
7311 }
7312 else if (*d == '#') {
9014280d 7313 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7314 "Possible attempt to put comments in qw() list");
7315 ++warned;
7316 }
7317 }
7318 }
7319 else {
d4c19fe8
AL
7320 for (; !isSPACE(*d) && len; --len, ++d)
7321 /**/;
8127e0e3 7322 }
740cce10 7323 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7324 words = append_elem(OP_LIST, words,
7948272d 7325 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7326 }
7327 }
8127e0e3 7328 if (words) {
cd81e915 7329 start_force(PL_curforce);
9ded7720 7330 NEXTVAL_NEXTTOKE.opval = words;
8127e0e3
GS
7331 force_next(THING);
7332 }
55497cff 7333 }
37fd879b 7334 if (PL_lex_stuff) {
8127e0e3 7335 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7336 PL_lex_stuff = NULL;
37fd879b 7337 }
3280af22 7338 PL_expect = XTERM;
8127e0e3 7339 TOKEN('(');
8990e307 7340
79072805 7341 case KEY_qq:
5db06880 7342 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7343 if (!s)
d4c19fe8 7344 missingterm(NULL);
6154021b 7345 pl_yylval.ival = OP_STRINGIFY;
3280af22 7346 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7347 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7348 TERM(sublex_start());
7349
8782bef2
GB
7350 case KEY_qr:
7351 s = scan_pat(s,OP_QR);
7352 TERM(sublex_start());
7353
79072805 7354 case KEY_qx:
5db06880 7355 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7356 if (!s)
d4c19fe8 7357 missingterm(NULL);
9b201d7d 7358 readpipe_override();
79072805
LW
7359 TERM(sublex_start());
7360
7361 case KEY_return:
7362 OLDLOP(OP_RETURN);
7363
7364 case KEY_require:
29595ff2 7365 s = SKIPSPACE1(s);
e759cc13
RGS
7366 if (isDIGIT(*s)) {
7367 s = force_version(s, FALSE);
a7cb1f99 7368 }
e759cc13
RGS
7369 else if (*s != 'v' || !isDIGIT(s[1])
7370 || (s = force_version(s, TRUE), *s == 'v'))
7371 {
a7cb1f99
GS
7372 *PL_tokenbuf = '\0';
7373 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7374 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7375 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7376 else if (*s == '<')
7377 yyerror("<> should be quotes");
7378 }
a72a1c8b
RGS
7379 if (orig_keyword == KEY_require) {
7380 orig_keyword = 0;
6154021b 7381 pl_yylval.ival = 1;
a72a1c8b
RGS
7382 }
7383 else
6154021b 7384 pl_yylval.ival = 0;
a72a1c8b
RGS
7385 PL_expect = XTERM;
7386 PL_bufptr = s;
7387 PL_last_uni = PL_oldbufptr;
7388 PL_last_lop_op = OP_REQUIRE;
7389 s = skipspace(s);
7390 return REPORT( (int)REQUIRE );
79072805
LW
7391
7392 case KEY_reset:
7393 UNI(OP_RESET);
7394
7395 case KEY_redo:
a0d0e21e 7396 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7397 LOOPX(OP_REDO);
7398
7399 case KEY_rename:
a0d0e21e 7400 LOP(OP_RENAME,XTERM);
79072805
LW
7401
7402 case KEY_rand:
7403 UNI(OP_RAND);
7404
7405 case KEY_rmdir:
7406 UNI(OP_RMDIR);
7407
7408 case KEY_rindex:
a0d0e21e 7409 LOP(OP_RINDEX,XTERM);
79072805
LW
7410
7411 case KEY_read:
a0d0e21e 7412 LOP(OP_READ,XTERM);
79072805
LW
7413
7414 case KEY_readdir:
7415 UNI(OP_READDIR);
7416
93a17b20 7417 case KEY_readline:
6f33ba73 7418 UNIDOR(OP_READLINE);
93a17b20
LW
7419
7420 case KEY_readpipe:
0858480c 7421 UNIDOR(OP_BACKTICK);
93a17b20 7422
79072805
LW
7423 case KEY_rewinddir:
7424 UNI(OP_REWINDDIR);
7425
7426 case KEY_recv:
a0d0e21e 7427 LOP(OP_RECV,XTERM);
79072805
LW
7428
7429 case KEY_reverse:
a0d0e21e 7430 LOP(OP_REVERSE,XTERM);
79072805
LW
7431
7432 case KEY_readlink:
6f33ba73 7433 UNIDOR(OP_READLINK);
79072805
LW
7434
7435 case KEY_ref:
7436 UNI(OP_REF);
7437
7438 case KEY_s:
7439 s = scan_subst(s);
6154021b 7440 if (pl_yylval.opval)
79072805
LW
7441 TERM(sublex_start());
7442 else
7443 TOKEN(1); /* force error */
7444
0d863452
RH
7445 case KEY_say:
7446 checkcomma(s,PL_tokenbuf,"filehandle");
7447 LOP(OP_SAY,XREF);
7448
a0d0e21e
LW
7449 case KEY_chomp:
7450 UNI(OP_CHOMP);
4e553d73 7451
79072805
LW
7452 case KEY_scalar:
7453 UNI(OP_SCALAR);
7454
7455 case KEY_select:
a0d0e21e 7456 LOP(OP_SELECT,XTERM);
79072805
LW
7457
7458 case KEY_seek:
a0d0e21e 7459 LOP(OP_SEEK,XTERM);
79072805
LW
7460
7461 case KEY_semctl:
a0d0e21e 7462 LOP(OP_SEMCTL,XTERM);
79072805
LW
7463
7464 case KEY_semget:
a0d0e21e 7465 LOP(OP_SEMGET,XTERM);
79072805
LW
7466
7467 case KEY_semop:
a0d0e21e 7468 LOP(OP_SEMOP,XTERM);
79072805
LW
7469
7470 case KEY_send:
a0d0e21e 7471 LOP(OP_SEND,XTERM);
79072805
LW
7472
7473 case KEY_setpgrp:
a0d0e21e 7474 LOP(OP_SETPGRP,XTERM);
79072805
LW
7475
7476 case KEY_setpriority:
a0d0e21e 7477 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7478
7479 case KEY_sethostent:
ff68c719 7480 UNI(OP_SHOSTENT);
79072805
LW
7481
7482 case KEY_setnetent:
ff68c719 7483 UNI(OP_SNETENT);
79072805
LW
7484
7485 case KEY_setservent:
ff68c719 7486 UNI(OP_SSERVENT);
79072805
LW
7487
7488 case KEY_setprotoent:
ff68c719 7489 UNI(OP_SPROTOENT);
79072805
LW
7490
7491 case KEY_setpwent:
7492 FUN0(OP_SPWENT);
7493
7494 case KEY_setgrent:
7495 FUN0(OP_SGRENT);
7496
7497 case KEY_seekdir:
a0d0e21e 7498 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7499
7500 case KEY_setsockopt:
a0d0e21e 7501 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7502
7503 case KEY_shift:
6f33ba73 7504 UNIDOR(OP_SHIFT);
79072805
LW
7505
7506 case KEY_shmctl:
a0d0e21e 7507 LOP(OP_SHMCTL,XTERM);
79072805
LW
7508
7509 case KEY_shmget:
a0d0e21e 7510 LOP(OP_SHMGET,XTERM);
79072805
LW
7511
7512 case KEY_shmread:
a0d0e21e 7513 LOP(OP_SHMREAD,XTERM);
79072805
LW
7514
7515 case KEY_shmwrite:
a0d0e21e 7516 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7517
7518 case KEY_shutdown:
a0d0e21e 7519 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7520
7521 case KEY_sin:
7522 UNI(OP_SIN);
7523
7524 case KEY_sleep:
7525 UNI(OP_SLEEP);
7526
7527 case KEY_socket:
a0d0e21e 7528 LOP(OP_SOCKET,XTERM);
79072805
LW
7529
7530 case KEY_socketpair:
a0d0e21e 7531 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7532
7533 case KEY_sort:
3280af22 7534 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7535 s = SKIPSPACE1(s);
79072805 7536 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7537 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7538 PL_expect = XTERM;
15f0808c 7539 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7540 LOP(OP_SORT,XREF);
79072805
LW
7541
7542 case KEY_split:
a0d0e21e 7543 LOP(OP_SPLIT,XTERM);
79072805
LW
7544
7545 case KEY_sprintf:
a0d0e21e 7546 LOP(OP_SPRINTF,XTERM);
79072805
LW
7547
7548 case KEY_splice:
a0d0e21e 7549 LOP(OP_SPLICE,XTERM);
79072805
LW
7550
7551 case KEY_sqrt:
7552 UNI(OP_SQRT);
7553
7554 case KEY_srand:
7555 UNI(OP_SRAND);
7556
7557 case KEY_stat:
7558 UNI(OP_STAT);
7559
7560 case KEY_study:
79072805
LW
7561 UNI(OP_STUDY);
7562
7563 case KEY_substr:
a0d0e21e 7564 LOP(OP_SUBSTR,XTERM);
79072805
LW
7565
7566 case KEY_format:
7567 case KEY_sub:
93a17b20 7568 really_sub:
09bef843 7569 {
3280af22 7570 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7571 SSize_t tboffset = 0;
09bef843 7572 expectation attrful;
28cc6278 7573 bool have_name, have_proto;
f54cb97a 7574 const int key = tmp;
09bef843 7575
5db06880
NC
7576#ifdef PERL_MAD
7577 SV *tmpwhite = 0;
7578
cd81e915 7579 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7580 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7581 PL_thistoken = 0;
5db06880
NC
7582
7583 d = s;
7584 s = SKIPSPACE2(s,tmpwhite);
7585#else
09bef843 7586 s = skipspace(s);
5db06880 7587#endif
09bef843 7588
7e2040f0 7589 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7590 (*s == ':' && s[1] == ':'))
7591 {
5db06880 7592#ifdef PERL_MAD
4f61fd4b 7593 SV *nametoke = NULL;
5db06880
NC
7594#endif
7595
09bef843
SB
7596 PL_expect = XBLOCK;
7597 attrful = XATTRBLOCK;
b1b65b59
JH
7598 /* remember buffer pos'n for later force_word */
7599 tboffset = s - PL_oldbufptr;
09bef843 7600 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7601#ifdef PERL_MAD
7602 if (PL_madskills)
7603 nametoke = newSVpvn(s, d - s);
7604#endif
6502358f
NC
7605 if (memchr(tmpbuf, ':', len))
7606 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7607 else {
7608 sv_setsv(PL_subname,PL_curstname);
396482e1 7609 sv_catpvs(PL_subname,"::");
09bef843
SB
7610 sv_catpvn(PL_subname,tmpbuf,len);
7611 }
09bef843 7612 have_name = TRUE;
5db06880
NC
7613
7614#ifdef PERL_MAD
7615
7616 start_force(0);
7617 CURMAD('X', nametoke);
7618 CURMAD('_', tmpwhite);
7619 (void) force_word(PL_oldbufptr + tboffset, WORD,
7620 FALSE, TRUE, TRUE);
7621
7622 s = SKIPSPACE2(d,tmpwhite);
7623#else
7624 s = skipspace(d);
7625#endif
09bef843 7626 }
463ee0b2 7627 else {
09bef843
SB
7628 if (key == KEY_my)
7629 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7630 PL_expect = XTERMBLOCK;
7631 attrful = XATTRTERM;
76f68e9b 7632 sv_setpvs(PL_subname,"?");
09bef843 7633 have_name = FALSE;
463ee0b2 7634 }
4633a7c4 7635
09bef843
SB
7636 if (key == KEY_format) {
7637 if (*s == '=')
7638 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7639#ifdef PERL_MAD
cd81e915 7640 PL_thistoken = subtoken;
5db06880
NC
7641 s = d;
7642#else
09bef843 7643 if (have_name)
b1b65b59
JH
7644 (void) force_word(PL_oldbufptr + tboffset, WORD,
7645 FALSE, TRUE, TRUE);
5db06880 7646#endif
09bef843
SB
7647 OPERATOR(FORMAT);
7648 }
79072805 7649
09bef843
SB
7650 /* Look for a prototype */
7651 if (*s == '(') {
d9f2850e
RGS
7652 char *p;
7653 bool bad_proto = FALSE;
9e8d7757
RB
7654 bool in_brackets = FALSE;
7655 char greedy_proto = ' ';
7656 bool proto_after_greedy_proto = FALSE;
7657 bool must_be_last = FALSE;
7658 bool underscore = FALSE;
aef2a98a 7659 bool seen_underscore = FALSE;
197afce1 7660 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7661
5db06880 7662 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7663 if (!s)
09bef843 7664 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7665 /* strip spaces and check for bad characters */
09bef843
SB
7666 d = SvPVX(PL_lex_stuff);
7667 tmp = 0;
d9f2850e
RGS
7668 for (p = d; *p; ++p) {
7669 if (!isSPACE(*p)) {
7670 d[tmp++] = *p;
9e8d7757 7671
197afce1 7672 if (warnillegalproto) {
9e8d7757
RB
7673 if (must_be_last)
7674 proto_after_greedy_proto = TRUE;
7675 if (!strchr("$@%*;[]&\\_", *p)) {
7676 bad_proto = TRUE;
7677 }
7678 else {
7679 if ( underscore ) {
7680 if ( *p != ';' )
7681 bad_proto = TRUE;
7682 underscore = FALSE;
7683 }
7684 if ( *p == '[' ) {
7685 in_brackets = TRUE;
7686 }
7687 else if ( *p == ']' ) {
7688 in_brackets = FALSE;
7689 }
7690 else if ( (*p == '@' || *p == '%') &&
7691 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7692 !in_brackets ) {
7693 must_be_last = TRUE;
7694 greedy_proto = *p;
7695 }
7696 else if ( *p == '_' ) {
aef2a98a 7697 underscore = seen_underscore = TRUE;
9e8d7757
RB
7698 }
7699 }
7700 }
d37a9538 7701 }
09bef843 7702 }
d9f2850e 7703 d[tmp] = '\0';
9e8d7757 7704 if (proto_after_greedy_proto)
197afce1 7705 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7706 "Prototype after '%c' for %"SVf" : %s",
7707 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7708 if (bad_proto)
197afce1 7709 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7710 "Illegal character %sin prototype for %"SVf" : %s",
7711 seen_underscore ? "after '_' " : "",
be2597df 7712 SVfARG(PL_subname), d);
b162af07 7713 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7714 have_proto = TRUE;
68dc0745 7715
5db06880
NC
7716#ifdef PERL_MAD
7717 start_force(0);
cd81e915 7718 CURMAD('q', PL_thisopen);
5db06880 7719 CURMAD('_', tmpwhite);
cd81e915
NC
7720 CURMAD('=', PL_thisstuff);
7721 CURMAD('Q', PL_thisclose);
5db06880
NC
7722 NEXTVAL_NEXTTOKE.opval =
7723 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7724 PL_lex_stuff = NULL;
5db06880
NC
7725 force_next(THING);
7726
7727 s = SKIPSPACE2(s,tmpwhite);
7728#else
09bef843 7729 s = skipspace(s);
5db06880 7730#endif
4633a7c4 7731 }
09bef843
SB
7732 else
7733 have_proto = FALSE;
7734
7735 if (*s == ':' && s[1] != ':')
7736 PL_expect = attrful;
8e742a20
MHM
7737 else if (*s != '{' && key == KEY_sub) {
7738 if (!have_name)
7739 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7740 else if (*s != ';' && *s != '}')
be2597df 7741 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7742 }
09bef843 7743
5db06880
NC
7744#ifdef PERL_MAD
7745 start_force(0);
7746 if (tmpwhite) {
7747 if (PL_madskills)
6b29d1f5 7748 curmad('^', newSVpvs(""));
5db06880
NC
7749 CURMAD('_', tmpwhite);
7750 }
7751 force_next(0);
7752
cd81e915 7753 PL_thistoken = subtoken;
5db06880 7754#else
09bef843 7755 if (have_proto) {
9ded7720 7756 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7757 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7758 PL_lex_stuff = NULL;
09bef843 7759 force_next(THING);
68dc0745 7760 }
5db06880 7761#endif
09bef843 7762 if (!have_name) {
49a54bbe
NC
7763 if (PL_curstash)
7764 sv_setpvs(PL_subname, "__ANON__");
7765 else
7766 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7767 TOKEN(ANONSUB);
4633a7c4 7768 }
5db06880 7769#ifndef PERL_MAD
b1b65b59
JH
7770 (void) force_word(PL_oldbufptr + tboffset, WORD,
7771 FALSE, TRUE, TRUE);
5db06880 7772#endif
09bef843
SB
7773 if (key == KEY_my)
7774 TOKEN(MYSUB);
7775 TOKEN(SUB);
4633a7c4 7776 }
79072805
LW
7777
7778 case KEY_system:
a0d0e21e 7779 LOP(OP_SYSTEM,XREF);
79072805
LW
7780
7781 case KEY_symlink:
a0d0e21e 7782 LOP(OP_SYMLINK,XTERM);
79072805
LW
7783
7784 case KEY_syscall:
a0d0e21e 7785 LOP(OP_SYSCALL,XTERM);
79072805 7786
c07a80fd 7787 case KEY_sysopen:
7788 LOP(OP_SYSOPEN,XTERM);
7789
137443ea 7790 case KEY_sysseek:
7791 LOP(OP_SYSSEEK,XTERM);
7792
79072805 7793 case KEY_sysread:
a0d0e21e 7794 LOP(OP_SYSREAD,XTERM);
79072805
LW
7795
7796 case KEY_syswrite:
a0d0e21e 7797 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7798
7799 case KEY_tr:
7800 s = scan_trans(s);
7801 TERM(sublex_start());
7802
7803 case KEY_tell:
7804 UNI(OP_TELL);
7805
7806 case KEY_telldir:
7807 UNI(OP_TELLDIR);
7808
463ee0b2 7809 case KEY_tie:
a0d0e21e 7810 LOP(OP_TIE,XTERM);
463ee0b2 7811
c07a80fd 7812 case KEY_tied:
7813 UNI(OP_TIED);
7814
79072805
LW
7815 case KEY_time:
7816 FUN0(OP_TIME);
7817
7818 case KEY_times:
7819 FUN0(OP_TMS);
7820
7821 case KEY_truncate:
a0d0e21e 7822 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7823
7824 case KEY_uc:
7825 UNI(OP_UC);
7826
7827 case KEY_ucfirst:
7828 UNI(OP_UCFIRST);
7829
463ee0b2
LW
7830 case KEY_untie:
7831 UNI(OP_UNTIE);
7832
79072805 7833 case KEY_until:
6154021b 7834 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7835 OPERATOR(UNTIL);
7836
7837 case KEY_unless:
6154021b 7838 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7839 OPERATOR(UNLESS);
7840
7841 case KEY_unlink:
a0d0e21e 7842 LOP(OP_UNLINK,XTERM);
79072805
LW
7843
7844 case KEY_undef:
6f33ba73 7845 UNIDOR(OP_UNDEF);
79072805
LW
7846
7847 case KEY_unpack:
a0d0e21e 7848 LOP(OP_UNPACK,XTERM);
79072805
LW
7849
7850 case KEY_utime:
a0d0e21e 7851 LOP(OP_UTIME,XTERM);
79072805
LW
7852
7853 case KEY_umask:
6f33ba73 7854 UNIDOR(OP_UMASK);
79072805
LW
7855
7856 case KEY_unshift:
a0d0e21e
LW
7857 LOP(OP_UNSHIFT,XTERM);
7858
7859 case KEY_use:
468aa647 7860 s = tokenize_use(1, s);
a0d0e21e 7861 OPERATOR(USE);
79072805
LW
7862
7863 case KEY_values:
7864 UNI(OP_VALUES);
7865
7866 case KEY_vec:
a0d0e21e 7867 LOP(OP_VEC,XTERM);
79072805 7868
0d863452 7869 case KEY_when:
6154021b 7870 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7871 OPERATOR(WHEN);
7872
79072805 7873 case KEY_while:
6154021b 7874 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7875 OPERATOR(WHILE);
7876
7877 case KEY_warn:
3280af22 7878 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7879 LOP(OP_WARN,XTERM);
79072805
LW
7880
7881 case KEY_wait:
7882 FUN0(OP_WAIT);
7883
7884 case KEY_waitpid:
a0d0e21e 7885 LOP(OP_WAITPID,XTERM);
79072805
LW
7886
7887 case KEY_wantarray:
7888 FUN0(OP_WANTARRAY);
7889
7890 case KEY_write:
9d116dd7
JH
7891#ifdef EBCDIC
7892 {
df3728a2
JH
7893 char ctl_l[2];
7894 ctl_l[0] = toCTRL('L');
7895 ctl_l[1] = '\0';
fafc274c 7896 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7897 }
7898#else
fafc274c
NC
7899 /* Make sure $^L is defined */
7900 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7901#endif
79072805
LW
7902 UNI(OP_ENTERWRITE);
7903
7904 case KEY_x:
3280af22 7905 if (PL_expect == XOPERATOR)
79072805
LW
7906 Mop(OP_REPEAT);
7907 check_uni();
7908 goto just_a_word;
7909
a0d0e21e 7910 case KEY_xor:
6154021b 7911 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7912 OPERATOR(OROP);
7913
79072805
LW
7914 case KEY_y:
7915 s = scan_trans(s);
7916 TERM(sublex_start());
7917 }
49dc05e3 7918 }}
79072805 7919}
bf4acbe4
GS
7920#ifdef __SC__
7921#pragma segment Main
7922#endif
79072805 7923
e930465f
JH
7924static int
7925S_pending_ident(pTHX)
8eceec63 7926{
97aff369 7927 dVAR;
8eceec63 7928 register char *d;
bbd11bfc 7929 PADOFFSET tmp = 0;
8eceec63
SC
7930 /* pit holds the identifier we read and pending_ident is reset */
7931 char pit = PL_pending_ident;
9bde8eb0
NC
7932 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7933 /* All routes through this function want to know if there is a colon. */
c099d646 7934 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7935 PL_pending_ident = 0;
7936
cd81e915 7937 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7938 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7939 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7940
7941 /* if we're in a my(), we can't allow dynamics here.
7942 $foo'bar has already been turned into $foo::bar, so
7943 just check for colons.
7944
7945 if it's a legal name, the OP is a PADANY.
7946 */
7947 if (PL_in_my) {
7948 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7949 if (has_colon)
8eceec63
SC
7950 yyerror(Perl_form(aTHX_ "No package name allowed for "
7951 "variable %s in \"our\"",
7952 PL_tokenbuf));
d6447115 7953 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7954 }
7955 else {
9bde8eb0 7956 if (has_colon)
952306ac
RGS
7957 yyerror(Perl_form(aTHX_ PL_no_myglob,
7958 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7959
6154021b 7960 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7961 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7962 return PRIVATEREF;
7963 }
7964 }
7965
7966 /*
7967 build the ops for accesses to a my() variable.
7968
7969 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7970 then used in a comparison. This catches most, but not
7971 all cases. For instance, it catches
7972 sort { my($a); $a <=> $b }
7973 but not
7974 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7975 (although why you'd do that is anyone's guess).
7976 */
7977
9bde8eb0 7978 if (!has_colon) {
8716503d 7979 if (!PL_in_my)
f8f98e0a 7980 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 7981 if (tmp != NOT_IN_PAD) {
8eceec63 7982 /* might be an "our" variable" */
00b1698f 7983 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 7984 /* build ops for a bareword */
b64e5050
AL
7985 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7986 HEK * const stashname = HvNAME_HEK(stash);
7987 SV * const sym = newSVhek(stashname);
396482e1 7988 sv_catpvs(sym, "::");
9bde8eb0 7989 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
7990 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7991 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 7992 gv_fetchsv(sym,
8eceec63
SC
7993 (PL_in_eval
7994 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 7995 : GV_ADDMULTI
8eceec63
SC
7996 ),
7997 ((PL_tokenbuf[0] == '$') ? SVt_PV
7998 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
7999 : SVt_PVHV));
8000 return WORD;
8001 }
8002
8003 /* if it's a sort block and they're naming $a or $b */
8004 if (PL_last_lop_op == OP_SORT &&
8005 PL_tokenbuf[0] == '$' &&
8006 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8007 && !PL_tokenbuf[2])
8008 {
8009 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8010 d < PL_bufend && *d != '\n';
8011 d++)
8012 {
8013 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8014 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8015 PL_tokenbuf);
8016 }
8017 }
8018 }
8019
6154021b
RGS
8020 pl_yylval.opval = newOP(OP_PADANY, 0);
8021 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8022 return PRIVATEREF;
8023 }
8024 }
8025
8026 /*
8027 Whine if they've said @foo in a doublequoted string,
8028 and @foo isn't a variable we can find in the symbol
8029 table.
8030 */
d824713b
NC
8031 if (ckWARN(WARN_AMBIGUOUS) &&
8032 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8033 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8034 SVt_PVAV);
8eceec63 8035 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8036 /* DO NOT warn for @- and @+ */
8037 && !( PL_tokenbuf[2] == '\0' &&
8038 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8039 )
8eceec63
SC
8040 {
8041 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8042 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8043 "Possible unintended interpolation of %s in string",
8044 PL_tokenbuf);
8eceec63
SC
8045 }
8046 }
8047
8048 /* build ops for a bareword */
6154021b 8049 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8050 tokenbuf_len - 1));
6154021b 8051 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9bde8eb0
NC
8052 gv_fetchpvn_flags(
8053 PL_tokenbuf + 1, tokenbuf_len - 1,
d6069db2
RGS
8054 /* If the identifier refers to a stash, don't autovivify it.
8055 * Change 24660 had the side effect of causing symbol table
8056 * hashes to always be defined, even if they were freshly
8057 * created and the only reference in the entire program was
8058 * the single statement with the defined %foo::bar:: test.
8059 * It appears that all code in the wild doing this actually
8060 * wants to know whether sub-packages have been loaded, so
8061 * by avoiding auto-vivifying symbol tables, we ensure that
8062 * defined %foo::bar:: continues to be false, and the existing
8063 * tests still give the expected answers, even though what
8064 * they're actually testing has now changed subtly.
8065 */
9bde8eb0
NC
8066 (*PL_tokenbuf == '%'
8067 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
8068 && d[-1] == ':'
d6069db2
RGS
8069 ? 0
8070 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
adc51b97
RGS
8071 ((PL_tokenbuf[0] == '$') ? SVt_PV
8072 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8073 : SVt_PVHV));
8eceec63
SC
8074 return WORD;
8075}
8076
4c3bbe0f
MHM
8077/*
8078 * The following code was generated by perl_keyword.pl.
8079 */
e2e1dd5a 8080
79072805 8081I32
5458a98a 8082Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8083{
952306ac 8084 dVAR;
7918f24d
NC
8085
8086 PERL_ARGS_ASSERT_KEYWORD;
8087
4c3bbe0f
MHM
8088 switch (len)
8089 {
8090 case 1: /* 5 tokens of length 1 */
8091 switch (name[0])
e2e1dd5a 8092 {
4c3bbe0f
MHM
8093 case 'm':
8094 { /* m */
8095 return KEY_m;
8096 }
8097
4c3bbe0f
MHM
8098 case 'q':
8099 { /* q */
8100 return KEY_q;
8101 }
8102
4c3bbe0f
MHM
8103 case 's':
8104 { /* s */
8105 return KEY_s;
8106 }
8107
4c3bbe0f
MHM
8108 case 'x':
8109 { /* x */
8110 return -KEY_x;
8111 }
8112
4c3bbe0f
MHM
8113 case 'y':
8114 { /* y */
8115 return KEY_y;
8116 }
8117
4c3bbe0f
MHM
8118 default:
8119 goto unknown;
e2e1dd5a 8120 }
4c3bbe0f
MHM
8121
8122 case 2: /* 18 tokens of length 2 */
8123 switch (name[0])
e2e1dd5a 8124 {
4c3bbe0f
MHM
8125 case 'd':
8126 if (name[1] == 'o')
8127 { /* do */
8128 return KEY_do;
8129 }
8130
8131 goto unknown;
8132
8133 case 'e':
8134 if (name[1] == 'q')
8135 { /* eq */
8136 return -KEY_eq;
8137 }
8138
8139 goto unknown;
8140
8141 case 'g':
8142 switch (name[1])
8143 {
8144 case 'e':
8145 { /* ge */
8146 return -KEY_ge;
8147 }
8148
4c3bbe0f
MHM
8149 case 't':
8150 { /* gt */
8151 return -KEY_gt;
8152 }
8153
4c3bbe0f
MHM
8154 default:
8155 goto unknown;
8156 }
8157
8158 case 'i':
8159 if (name[1] == 'f')
8160 { /* if */
8161 return KEY_if;
8162 }
8163
8164 goto unknown;
8165
8166 case 'l':
8167 switch (name[1])
8168 {
8169 case 'c':
8170 { /* lc */
8171 return -KEY_lc;
8172 }
8173
4c3bbe0f
MHM
8174 case 'e':
8175 { /* le */
8176 return -KEY_le;
8177 }
8178
4c3bbe0f
MHM
8179 case 't':
8180 { /* lt */
8181 return -KEY_lt;
8182 }
8183
4c3bbe0f
MHM
8184 default:
8185 goto unknown;
8186 }
8187
8188 case 'm':
8189 if (name[1] == 'y')
8190 { /* my */
8191 return KEY_my;
8192 }
8193
8194 goto unknown;
8195
8196 case 'n':
8197 switch (name[1])
8198 {
8199 case 'e':
8200 { /* ne */
8201 return -KEY_ne;
8202 }
8203
4c3bbe0f
MHM
8204 case 'o':
8205 { /* no */
8206 return KEY_no;
8207 }
8208
4c3bbe0f
MHM
8209 default:
8210 goto unknown;
8211 }
8212
8213 case 'o':
8214 if (name[1] == 'r')
8215 { /* or */
8216 return -KEY_or;
8217 }
8218
8219 goto unknown;
8220
8221 case 'q':
8222 switch (name[1])
8223 {
8224 case 'q':
8225 { /* qq */
8226 return KEY_qq;
8227 }
8228
4c3bbe0f
MHM
8229 case 'r':
8230 { /* qr */
8231 return KEY_qr;
8232 }
8233
4c3bbe0f
MHM
8234 case 'w':
8235 { /* qw */
8236 return KEY_qw;
8237 }
8238
4c3bbe0f
MHM
8239 case 'x':
8240 { /* qx */
8241 return KEY_qx;
8242 }
8243
4c3bbe0f
MHM
8244 default:
8245 goto unknown;
8246 }
8247
8248 case 't':
8249 if (name[1] == 'r')
8250 { /* tr */
8251 return KEY_tr;
8252 }
8253
8254 goto unknown;
8255
8256 case 'u':
8257 if (name[1] == 'c')
8258 { /* uc */
8259 return -KEY_uc;
8260 }
8261
8262 goto unknown;
8263
8264 default:
8265 goto unknown;
e2e1dd5a 8266 }
4c3bbe0f 8267
0d863452 8268 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8269 switch (name[0])
e2e1dd5a 8270 {
4c3bbe0f
MHM
8271 case 'E':
8272 if (name[1] == 'N' &&
8273 name[2] == 'D')
8274 { /* END */
8275 return KEY_END;
8276 }
8277
8278 goto unknown;
8279
8280 case 'a':
8281 switch (name[1])
8282 {
8283 case 'b':
8284 if (name[2] == 's')
8285 { /* abs */
8286 return -KEY_abs;
8287 }
8288
8289 goto unknown;
8290
8291 case 'n':
8292 if (name[2] == 'd')
8293 { /* and */
8294 return -KEY_and;
8295 }
8296
8297 goto unknown;
8298
8299 default:
8300 goto unknown;
8301 }
8302
8303 case 'c':
8304 switch (name[1])
8305 {
8306 case 'h':
8307 if (name[2] == 'r')
8308 { /* chr */
8309 return -KEY_chr;
8310 }
8311
8312 goto unknown;
8313
8314 case 'm':
8315 if (name[2] == 'p')
8316 { /* cmp */
8317 return -KEY_cmp;
8318 }
8319
8320 goto unknown;
8321
8322 case 'o':
8323 if (name[2] == 's')
8324 { /* cos */
8325 return -KEY_cos;
8326 }
8327
8328 goto unknown;
8329
8330 default:
8331 goto unknown;
8332 }
8333
8334 case 'd':
8335 if (name[1] == 'i' &&
8336 name[2] == 'e')
8337 { /* die */
8338 return -KEY_die;
8339 }
8340
8341 goto unknown;
8342
8343 case 'e':
8344 switch (name[1])
8345 {
8346 case 'o':
8347 if (name[2] == 'f')
8348 { /* eof */
8349 return -KEY_eof;
8350 }
8351
8352 goto unknown;
8353
4c3bbe0f
MHM
8354 case 'x':
8355 if (name[2] == 'p')
8356 { /* exp */
8357 return -KEY_exp;
8358 }
8359
8360 goto unknown;
8361
8362 default:
8363 goto unknown;
8364 }
8365
8366 case 'f':
8367 if (name[1] == 'o' &&
8368 name[2] == 'r')
8369 { /* for */
8370 return KEY_for;
8371 }
8372
8373 goto unknown;
8374
8375 case 'h':
8376 if (name[1] == 'e' &&
8377 name[2] == 'x')
8378 { /* hex */
8379 return -KEY_hex;
8380 }
8381
8382 goto unknown;
8383
8384 case 'i':
8385 if (name[1] == 'n' &&
8386 name[2] == 't')
8387 { /* int */
8388 return -KEY_int;
8389 }
8390
8391 goto unknown;
8392
8393 case 'l':
8394 if (name[1] == 'o' &&
8395 name[2] == 'g')
8396 { /* log */
8397 return -KEY_log;
8398 }
8399
8400 goto unknown;
8401
8402 case 'm':
8403 if (name[1] == 'a' &&
8404 name[2] == 'p')
8405 { /* map */
8406 return KEY_map;
8407 }
8408
8409 goto unknown;
8410
8411 case 'n':
8412 if (name[1] == 'o' &&
8413 name[2] == 't')
8414 { /* not */
8415 return -KEY_not;
8416 }
8417
8418 goto unknown;
8419
8420 case 'o':
8421 switch (name[1])
8422 {
8423 case 'c':
8424 if (name[2] == 't')
8425 { /* oct */
8426 return -KEY_oct;
8427 }
8428
8429 goto unknown;
8430
8431 case 'r':
8432 if (name[2] == 'd')
8433 { /* ord */
8434 return -KEY_ord;
8435 }
8436
8437 goto unknown;
8438
8439 case 'u':
8440 if (name[2] == 'r')
8441 { /* our */
8442 return KEY_our;
8443 }
8444
8445 goto unknown;
8446
8447 default:
8448 goto unknown;
8449 }
8450
8451 case 'p':
8452 if (name[1] == 'o')
8453 {
8454 switch (name[2])
8455 {
8456 case 'p':
8457 { /* pop */
8458 return -KEY_pop;
8459 }
8460
4c3bbe0f
MHM
8461 case 's':
8462 { /* pos */
8463 return KEY_pos;
8464 }
8465
4c3bbe0f
MHM
8466 default:
8467 goto unknown;
8468 }
8469 }
8470
8471 goto unknown;
8472
8473 case 'r':
8474 if (name[1] == 'e' &&
8475 name[2] == 'f')
8476 { /* ref */
8477 return -KEY_ref;
8478 }
8479
8480 goto unknown;
8481
8482 case 's':
8483 switch (name[1])
8484 {
0d863452
RH
8485 case 'a':
8486 if (name[2] == 'y')
8487 { /* say */
e3e804c9 8488 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8489 }
8490
8491 goto unknown;
8492
4c3bbe0f
MHM
8493 case 'i':
8494 if (name[2] == 'n')
8495 { /* sin */
8496 return -KEY_sin;
8497 }
8498
8499 goto unknown;
8500
8501 case 'u':
8502 if (name[2] == 'b')
8503 { /* sub */
8504 return KEY_sub;
8505 }
8506
8507 goto unknown;
8508
8509 default:
8510 goto unknown;
8511 }
8512
8513 case 't':
8514 if (name[1] == 'i' &&
8515 name[2] == 'e')
8516 { /* tie */
8517 return KEY_tie;
8518 }
8519
8520 goto unknown;
8521
8522 case 'u':
8523 if (name[1] == 's' &&
8524 name[2] == 'e')
8525 { /* use */
8526 return KEY_use;
8527 }
8528
8529 goto unknown;
8530
8531 case 'v':
8532 if (name[1] == 'e' &&
8533 name[2] == 'c')
8534 { /* vec */
8535 return -KEY_vec;
8536 }
8537
8538 goto unknown;
8539
8540 case 'x':
8541 if (name[1] == 'o' &&
8542 name[2] == 'r')
8543 { /* xor */
8544 return -KEY_xor;
8545 }
8546
8547 goto unknown;
8548
8549 default:
8550 goto unknown;
e2e1dd5a 8551 }
4c3bbe0f 8552
0d863452 8553 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8554 switch (name[0])
e2e1dd5a 8555 {
4c3bbe0f
MHM
8556 case 'C':
8557 if (name[1] == 'O' &&
8558 name[2] == 'R' &&
8559 name[3] == 'E')
8560 { /* CORE */
8561 return -KEY_CORE;
8562 }
8563
8564 goto unknown;
8565
8566 case 'I':
8567 if (name[1] == 'N' &&
8568 name[2] == 'I' &&
8569 name[3] == 'T')
8570 { /* INIT */
8571 return KEY_INIT;
8572 }
8573
8574 goto unknown;
8575
8576 case 'b':
8577 if (name[1] == 'i' &&
8578 name[2] == 'n' &&
8579 name[3] == 'd')
8580 { /* bind */
8581 return -KEY_bind;
8582 }
8583
8584 goto unknown;
8585
8586 case 'c':
8587 if (name[1] == 'h' &&
8588 name[2] == 'o' &&
8589 name[3] == 'p')
8590 { /* chop */
8591 return -KEY_chop;
8592 }
8593
8594 goto unknown;
8595
8596 case 'd':
8597 if (name[1] == 'u' &&
8598 name[2] == 'm' &&
8599 name[3] == 'p')
8600 { /* dump */
8601 return -KEY_dump;
8602 }
8603
8604 goto unknown;
8605
8606 case 'e':
8607 switch (name[1])
8608 {
8609 case 'a':
8610 if (name[2] == 'c' &&
8611 name[3] == 'h')
8612 { /* each */
8613 return -KEY_each;
8614 }
8615
8616 goto unknown;
8617
8618 case 'l':
8619 if (name[2] == 's' &&
8620 name[3] == 'e')
8621 { /* else */
8622 return KEY_else;
8623 }
8624
8625 goto unknown;
8626
8627 case 'v':
8628 if (name[2] == 'a' &&
8629 name[3] == 'l')
8630 { /* eval */
8631 return KEY_eval;
8632 }
8633
8634 goto unknown;
8635
8636 case 'x':
8637 switch (name[2])
8638 {
8639 case 'e':
8640 if (name[3] == 'c')
8641 { /* exec */
8642 return -KEY_exec;
8643 }
8644
8645 goto unknown;
8646
8647 case 'i':
8648 if (name[3] == 't')
8649 { /* exit */
8650 return -KEY_exit;
8651 }
8652
8653 goto unknown;
8654
8655 default:
8656 goto unknown;
8657 }
8658
8659 default:
8660 goto unknown;
8661 }
8662
8663 case 'f':
8664 if (name[1] == 'o' &&
8665 name[2] == 'r' &&
8666 name[3] == 'k')
8667 { /* fork */
8668 return -KEY_fork;
8669 }
8670
8671 goto unknown;
8672
8673 case 'g':
8674 switch (name[1])
8675 {
8676 case 'e':
8677 if (name[2] == 't' &&
8678 name[3] == 'c')
8679 { /* getc */
8680 return -KEY_getc;
8681 }
8682
8683 goto unknown;
8684
8685 case 'l':
8686 if (name[2] == 'o' &&
8687 name[3] == 'b')
8688 { /* glob */
8689 return KEY_glob;
8690 }
8691
8692 goto unknown;
8693
8694 case 'o':
8695 if (name[2] == 't' &&
8696 name[3] == 'o')
8697 { /* goto */
8698 return KEY_goto;
8699 }
8700
8701 goto unknown;
8702
8703 case 'r':
8704 if (name[2] == 'e' &&
8705 name[3] == 'p')
8706 { /* grep */
8707 return KEY_grep;
8708 }
8709
8710 goto unknown;
8711
8712 default:
8713 goto unknown;
8714 }
8715
8716 case 'j':
8717 if (name[1] == 'o' &&
8718 name[2] == 'i' &&
8719 name[3] == 'n')
8720 { /* join */
8721 return -KEY_join;
8722 }
8723
8724 goto unknown;
8725
8726 case 'k':
8727 switch (name[1])
8728 {
8729 case 'e':
8730 if (name[2] == 'y' &&
8731 name[3] == 's')
8732 { /* keys */
8733 return -KEY_keys;
8734 }
8735
8736 goto unknown;
8737
8738 case 'i':
8739 if (name[2] == 'l' &&
8740 name[3] == 'l')
8741 { /* kill */
8742 return -KEY_kill;
8743 }
8744
8745 goto unknown;
8746
8747 default:
8748 goto unknown;
8749 }
8750
8751 case 'l':
8752 switch (name[1])
8753 {
8754 case 'a':
8755 if (name[2] == 's' &&
8756 name[3] == 't')
8757 { /* last */
8758 return KEY_last;
8759 }
8760
8761 goto unknown;
8762
8763 case 'i':
8764 if (name[2] == 'n' &&
8765 name[3] == 'k')
8766 { /* link */
8767 return -KEY_link;
8768 }
8769
8770 goto unknown;
8771
8772 case 'o':
8773 if (name[2] == 'c' &&
8774 name[3] == 'k')
8775 { /* lock */
8776 return -KEY_lock;
8777 }
8778
8779 goto unknown;
8780
8781 default:
8782 goto unknown;
8783 }
8784
8785 case 'n':
8786 if (name[1] == 'e' &&
8787 name[2] == 'x' &&
8788 name[3] == 't')
8789 { /* next */
8790 return KEY_next;
8791 }
8792
8793 goto unknown;
8794
8795 case 'o':
8796 if (name[1] == 'p' &&
8797 name[2] == 'e' &&
8798 name[3] == 'n')
8799 { /* open */
8800 return -KEY_open;
8801 }
8802
8803 goto unknown;
8804
8805 case 'p':
8806 switch (name[1])
8807 {
8808 case 'a':
8809 if (name[2] == 'c' &&
8810 name[3] == 'k')
8811 { /* pack */
8812 return -KEY_pack;
8813 }
8814
8815 goto unknown;
8816
8817 case 'i':
8818 if (name[2] == 'p' &&
8819 name[3] == 'e')
8820 { /* pipe */
8821 return -KEY_pipe;
8822 }
8823
8824 goto unknown;
8825
8826 case 'u':
8827 if (name[2] == 's' &&
8828 name[3] == 'h')
8829 { /* push */
8830 return -KEY_push;
8831 }
8832
8833 goto unknown;
8834
8835 default:
8836 goto unknown;
8837 }
8838
8839 case 'r':
8840 switch (name[1])
8841 {
8842 case 'a':
8843 if (name[2] == 'n' &&
8844 name[3] == 'd')
8845 { /* rand */
8846 return -KEY_rand;
8847 }
8848
8849 goto unknown;
8850
8851 case 'e':
8852 switch (name[2])
8853 {
8854 case 'a':
8855 if (name[3] == 'd')
8856 { /* read */
8857 return -KEY_read;
8858 }
8859
8860 goto unknown;
8861
8862 case 'c':
8863 if (name[3] == 'v')
8864 { /* recv */
8865 return -KEY_recv;
8866 }
8867
8868 goto unknown;
8869
8870 case 'd':
8871 if (name[3] == 'o')
8872 { /* redo */
8873 return KEY_redo;
8874 }
8875
8876 goto unknown;
8877
8878 default:
8879 goto unknown;
8880 }
8881
8882 default:
8883 goto unknown;
8884 }
8885
8886 case 's':
8887 switch (name[1])
8888 {
8889 case 'e':
8890 switch (name[2])
8891 {
8892 case 'e':
8893 if (name[3] == 'k')
8894 { /* seek */
8895 return -KEY_seek;
8896 }
8897
8898 goto unknown;
8899
8900 case 'n':
8901 if (name[3] == 'd')
8902 { /* send */
8903 return -KEY_send;
8904 }
8905
8906 goto unknown;
8907
8908 default:
8909 goto unknown;
8910 }
8911
8912 case 'o':
8913 if (name[2] == 'r' &&
8914 name[3] == 't')
8915 { /* sort */
8916 return KEY_sort;
8917 }
8918
8919 goto unknown;
8920
8921 case 'q':
8922 if (name[2] == 'r' &&
8923 name[3] == 't')
8924 { /* sqrt */
8925 return -KEY_sqrt;
8926 }
8927
8928 goto unknown;
8929
8930 case 't':
8931 if (name[2] == 'a' &&
8932 name[3] == 't')
8933 { /* stat */
8934 return -KEY_stat;
8935 }
8936
8937 goto unknown;
8938
8939 default:
8940 goto unknown;
8941 }
8942
8943 case 't':
8944 switch (name[1])
8945 {
8946 case 'e':
8947 if (name[2] == 'l' &&
8948 name[3] == 'l')
8949 { /* tell */
8950 return -KEY_tell;
8951 }
8952
8953 goto unknown;
8954
8955 case 'i':
8956 switch (name[2])
8957 {
8958 case 'e':
8959 if (name[3] == 'd')
8960 { /* tied */
8961 return KEY_tied;
8962 }
8963
8964 goto unknown;
8965
8966 case 'm':
8967 if (name[3] == 'e')
8968 { /* time */
8969 return -KEY_time;
8970 }
8971
8972 goto unknown;
8973
8974 default:
8975 goto unknown;
8976 }
8977
8978 default:
8979 goto unknown;
8980 }
8981
8982 case 'w':
0d863452 8983 switch (name[1])
4c3bbe0f 8984 {
0d863452 8985 case 'a':
952306ac
RGS
8986 switch (name[2])
8987 {
8988 case 'i':
8989 if (name[3] == 't')
8990 { /* wait */
8991 return -KEY_wait;
8992 }
4c3bbe0f 8993
952306ac 8994 goto unknown;
4c3bbe0f 8995
952306ac
RGS
8996 case 'r':
8997 if (name[3] == 'n')
8998 { /* warn */
8999 return -KEY_warn;
9000 }
4c3bbe0f 9001
952306ac 9002 goto unknown;
4c3bbe0f 9003
952306ac
RGS
9004 default:
9005 goto unknown;
9006 }
0d863452
RH
9007
9008 case 'h':
9009 if (name[2] == 'e' &&
9010 name[3] == 'n')
9011 { /* when */
5458a98a 9012 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9013 }
4c3bbe0f 9014
952306ac 9015 goto unknown;
4c3bbe0f 9016
952306ac
RGS
9017 default:
9018 goto unknown;
9019 }
4c3bbe0f 9020
0d863452
RH
9021 default:
9022 goto unknown;
9023 }
9024
952306ac 9025 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9026 switch (name[0])
e2e1dd5a 9027 {
4c3bbe0f
MHM
9028 case 'B':
9029 if (name[1] == 'E' &&
9030 name[2] == 'G' &&
9031 name[3] == 'I' &&
9032 name[4] == 'N')
9033 { /* BEGIN */
9034 return KEY_BEGIN;
9035 }
9036
9037 goto unknown;
9038
9039 case 'C':
9040 if (name[1] == 'H' &&
9041 name[2] == 'E' &&
9042 name[3] == 'C' &&
9043 name[4] == 'K')
9044 { /* CHECK */
9045 return KEY_CHECK;
9046 }
9047
9048 goto unknown;
9049
9050 case 'a':
9051 switch (name[1])
9052 {
9053 case 'l':
9054 if (name[2] == 'a' &&
9055 name[3] == 'r' &&
9056 name[4] == 'm')
9057 { /* alarm */
9058 return -KEY_alarm;
9059 }
9060
9061 goto unknown;
9062
9063 case 't':
9064 if (name[2] == 'a' &&
9065 name[3] == 'n' &&
9066 name[4] == '2')
9067 { /* atan2 */
9068 return -KEY_atan2;
9069 }
9070
9071 goto unknown;
9072
9073 default:
9074 goto unknown;
9075 }
9076
9077 case 'b':
0d863452
RH
9078 switch (name[1])
9079 {
9080 case 'l':
9081 if (name[2] == 'e' &&
952306ac
RGS
9082 name[3] == 's' &&
9083 name[4] == 's')
9084 { /* bless */
9085 return -KEY_bless;
9086 }
4c3bbe0f 9087
952306ac 9088 goto unknown;
4c3bbe0f 9089
0d863452
RH
9090 case 'r':
9091 if (name[2] == 'e' &&
9092 name[3] == 'a' &&
9093 name[4] == 'k')
9094 { /* break */
5458a98a 9095 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9096 }
9097
9098 goto unknown;
9099
9100 default:
9101 goto unknown;
9102 }
9103
4c3bbe0f
MHM
9104 case 'c':
9105 switch (name[1])
9106 {
9107 case 'h':
9108 switch (name[2])
9109 {
9110 case 'd':
9111 if (name[3] == 'i' &&
9112 name[4] == 'r')
9113 { /* chdir */
9114 return -KEY_chdir;
9115 }
9116
9117 goto unknown;
9118
9119 case 'm':
9120 if (name[3] == 'o' &&
9121 name[4] == 'd')
9122 { /* chmod */
9123 return -KEY_chmod;
9124 }
9125
9126 goto unknown;
9127
9128 case 'o':
9129 switch (name[3])
9130 {
9131 case 'm':
9132 if (name[4] == 'p')
9133 { /* chomp */
9134 return -KEY_chomp;
9135 }
9136
9137 goto unknown;
9138
9139 case 'w':
9140 if (name[4] == 'n')
9141 { /* chown */
9142 return -KEY_chown;
9143 }
9144
9145 goto unknown;
9146
9147 default:
9148 goto unknown;
9149 }
9150
9151 default:
9152 goto unknown;
9153 }
9154
9155 case 'l':
9156 if (name[2] == 'o' &&
9157 name[3] == 's' &&
9158 name[4] == 'e')
9159 { /* close */
9160 return -KEY_close;
9161 }
9162
9163 goto unknown;
9164
9165 case 'r':
9166 if (name[2] == 'y' &&
9167 name[3] == 'p' &&
9168 name[4] == 't')
9169 { /* crypt */
9170 return -KEY_crypt;
9171 }
9172
9173 goto unknown;
9174
9175 default:
9176 goto unknown;
9177 }
9178
9179 case 'e':
9180 if (name[1] == 'l' &&
9181 name[2] == 's' &&
9182 name[3] == 'i' &&
9183 name[4] == 'f')
9184 { /* elsif */
9185 return KEY_elsif;
9186 }
9187
9188 goto unknown;
9189
9190 case 'f':
9191 switch (name[1])
9192 {
9193 case 'c':
9194 if (name[2] == 'n' &&
9195 name[3] == 't' &&
9196 name[4] == 'l')
9197 { /* fcntl */
9198 return -KEY_fcntl;
9199 }
9200
9201 goto unknown;
9202
9203 case 'l':
9204 if (name[2] == 'o' &&
9205 name[3] == 'c' &&
9206 name[4] == 'k')
9207 { /* flock */
9208 return -KEY_flock;
9209 }
9210
9211 goto unknown;
9212
9213 default:
9214 goto unknown;
9215 }
9216
0d863452
RH
9217 case 'g':
9218 if (name[1] == 'i' &&
9219 name[2] == 'v' &&
9220 name[3] == 'e' &&
9221 name[4] == 'n')
9222 { /* given */
5458a98a 9223 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9224 }
9225
9226 goto unknown;
9227
4c3bbe0f
MHM
9228 case 'i':
9229 switch (name[1])
9230 {
9231 case 'n':
9232 if (name[2] == 'd' &&
9233 name[3] == 'e' &&
9234 name[4] == 'x')
9235 { /* index */
9236 return -KEY_index;
9237 }
9238
9239 goto unknown;
9240
9241 case 'o':
9242 if (name[2] == 'c' &&
9243 name[3] == 't' &&
9244 name[4] == 'l')
9245 { /* ioctl */
9246 return -KEY_ioctl;
9247 }
9248
9249 goto unknown;
9250
9251 default:
9252 goto unknown;
9253 }
9254
9255 case 'l':
9256 switch (name[1])
9257 {
9258 case 'o':
9259 if (name[2] == 'c' &&
9260 name[3] == 'a' &&
9261 name[4] == 'l')
9262 { /* local */
9263 return KEY_local;
9264 }
9265
9266 goto unknown;
9267
9268 case 's':
9269 if (name[2] == 't' &&
9270 name[3] == 'a' &&
9271 name[4] == 't')
9272 { /* lstat */
9273 return -KEY_lstat;
9274 }
9275
9276 goto unknown;
9277
9278 default:
9279 goto unknown;
9280 }
9281
9282 case 'm':
9283 if (name[1] == 'k' &&
9284 name[2] == 'd' &&
9285 name[3] == 'i' &&
9286 name[4] == 'r')
9287 { /* mkdir */
9288 return -KEY_mkdir;
9289 }
9290
9291 goto unknown;
9292
9293 case 'p':
9294 if (name[1] == 'r' &&
9295 name[2] == 'i' &&
9296 name[3] == 'n' &&
9297 name[4] == 't')
9298 { /* print */
9299 return KEY_print;
9300 }
9301
9302 goto unknown;
9303
9304 case 'r':
9305 switch (name[1])
9306 {
9307 case 'e':
9308 if (name[2] == 's' &&
9309 name[3] == 'e' &&
9310 name[4] == 't')
9311 { /* reset */
9312 return -KEY_reset;
9313 }
9314
9315 goto unknown;
9316
9317 case 'm':
9318 if (name[2] == 'd' &&
9319 name[3] == 'i' &&
9320 name[4] == 'r')
9321 { /* rmdir */
9322 return -KEY_rmdir;
9323 }
9324
9325 goto unknown;
9326
9327 default:
9328 goto unknown;
9329 }
9330
9331 case 's':
9332 switch (name[1])
9333 {
9334 case 'e':
9335 if (name[2] == 'm' &&
9336 name[3] == 'o' &&
9337 name[4] == 'p')
9338 { /* semop */
9339 return -KEY_semop;
9340 }
9341
9342 goto unknown;
9343
9344 case 'h':
9345 if (name[2] == 'i' &&
9346 name[3] == 'f' &&
9347 name[4] == 't')
9348 { /* shift */
9349 return -KEY_shift;
9350 }
9351
9352 goto unknown;
9353
9354 case 'l':
9355 if (name[2] == 'e' &&
9356 name[3] == 'e' &&
9357 name[4] == 'p')
9358 { /* sleep */
9359 return -KEY_sleep;
9360 }
9361
9362 goto unknown;
9363
9364 case 'p':
9365 if (name[2] == 'l' &&
9366 name[3] == 'i' &&
9367 name[4] == 't')
9368 { /* split */
9369 return KEY_split;
9370 }
9371
9372 goto unknown;
9373
9374 case 'r':
9375 if (name[2] == 'a' &&
9376 name[3] == 'n' &&
9377 name[4] == 'd')
9378 { /* srand */
9379 return -KEY_srand;
9380 }
9381
9382 goto unknown;
9383
9384 case 't':
952306ac
RGS
9385 switch (name[2])
9386 {
9387 case 'a':
9388 if (name[3] == 't' &&
9389 name[4] == 'e')
9390 { /* state */
5458a98a 9391 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9392 }
4c3bbe0f 9393
952306ac
RGS
9394 goto unknown;
9395
9396 case 'u':
9397 if (name[3] == 'd' &&
9398 name[4] == 'y')
9399 { /* study */
9400 return KEY_study;
9401 }
9402
9403 goto unknown;
9404
9405 default:
9406 goto unknown;
9407 }
4c3bbe0f
MHM
9408
9409 default:
9410 goto unknown;
9411 }
9412
9413 case 't':
9414 if (name[1] == 'i' &&
9415 name[2] == 'm' &&
9416 name[3] == 'e' &&
9417 name[4] == 's')
9418 { /* times */
9419 return -KEY_times;
9420 }
9421
9422 goto unknown;
9423
9424 case 'u':
9425 switch (name[1])
9426 {
9427 case 'm':
9428 if (name[2] == 'a' &&
9429 name[3] == 's' &&
9430 name[4] == 'k')
9431 { /* umask */
9432 return -KEY_umask;
9433 }
9434
9435 goto unknown;
9436
9437 case 'n':
9438 switch (name[2])
9439 {
9440 case 'd':
9441 if (name[3] == 'e' &&
9442 name[4] == 'f')
9443 { /* undef */
9444 return KEY_undef;
9445 }
9446
9447 goto unknown;
9448
9449 case 't':
9450 if (name[3] == 'i')
9451 {
9452 switch (name[4])
9453 {
9454 case 'e':
9455 { /* untie */
9456 return KEY_untie;
9457 }
9458
4c3bbe0f
MHM
9459 case 'l':
9460 { /* until */
9461 return KEY_until;
9462 }
9463
4c3bbe0f
MHM
9464 default:
9465 goto unknown;
9466 }
9467 }
9468
9469 goto unknown;
9470
9471 default:
9472 goto unknown;
9473 }
9474
9475 case 't':
9476 if (name[2] == 'i' &&
9477 name[3] == 'm' &&
9478 name[4] == 'e')
9479 { /* utime */
9480 return -KEY_utime;
9481 }
9482
9483 goto unknown;
9484
9485 default:
9486 goto unknown;
9487 }
9488
9489 case 'w':
9490 switch (name[1])
9491 {
9492 case 'h':
9493 if (name[2] == 'i' &&
9494 name[3] == 'l' &&
9495 name[4] == 'e')
9496 { /* while */
9497 return KEY_while;
9498 }
9499
9500 goto unknown;
9501
9502 case 'r':
9503 if (name[2] == 'i' &&
9504 name[3] == 't' &&
9505 name[4] == 'e')
9506 { /* write */
9507 return -KEY_write;
9508 }
9509
9510 goto unknown;
9511
9512 default:
9513 goto unknown;
9514 }
9515
9516 default:
9517 goto unknown;
e2e1dd5a 9518 }
4c3bbe0f
MHM
9519
9520 case 6: /* 33 tokens of length 6 */
9521 switch (name[0])
9522 {
9523 case 'a':
9524 if (name[1] == 'c' &&
9525 name[2] == 'c' &&
9526 name[3] == 'e' &&
9527 name[4] == 'p' &&
9528 name[5] == 't')
9529 { /* accept */
9530 return -KEY_accept;
9531 }
9532
9533 goto unknown;
9534
9535 case 'c':
9536 switch (name[1])
9537 {
9538 case 'a':
9539 if (name[2] == 'l' &&
9540 name[3] == 'l' &&
9541 name[4] == 'e' &&
9542 name[5] == 'r')
9543 { /* caller */
9544 return -KEY_caller;
9545 }
9546
9547 goto unknown;
9548
9549 case 'h':
9550 if (name[2] == 'r' &&
9551 name[3] == 'o' &&
9552 name[4] == 'o' &&
9553 name[5] == 't')
9554 { /* chroot */
9555 return -KEY_chroot;
9556 }
9557
9558 goto unknown;
9559
9560 default:
9561 goto unknown;
9562 }
9563
9564 case 'd':
9565 if (name[1] == 'e' &&
9566 name[2] == 'l' &&
9567 name[3] == 'e' &&
9568 name[4] == 't' &&
9569 name[5] == 'e')
9570 { /* delete */
9571 return KEY_delete;
9572 }
9573
9574 goto unknown;
9575
9576 case 'e':
9577 switch (name[1])
9578 {
9579 case 'l':
9580 if (name[2] == 's' &&
9581 name[3] == 'e' &&
9582 name[4] == 'i' &&
9583 name[5] == 'f')
9584 { /* elseif */
9b387841 9585 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9586 }
9587
9588 goto unknown;
9589
9590 case 'x':
9591 if (name[2] == 'i' &&
9592 name[3] == 's' &&
9593 name[4] == 't' &&
9594 name[5] == 's')
9595 { /* exists */
9596 return KEY_exists;
9597 }
9598
9599 goto unknown;
9600
9601 default:
9602 goto unknown;
9603 }
9604
9605 case 'f':
9606 switch (name[1])
9607 {
9608 case 'i':
9609 if (name[2] == 'l' &&
9610 name[3] == 'e' &&
9611 name[4] == 'n' &&
9612 name[5] == 'o')
9613 { /* fileno */
9614 return -KEY_fileno;
9615 }
9616
9617 goto unknown;
9618
9619 case 'o':
9620 if (name[2] == 'r' &&
9621 name[3] == 'm' &&
9622 name[4] == 'a' &&
9623 name[5] == 't')
9624 { /* format */
9625 return KEY_format;
9626 }
9627
9628 goto unknown;
9629
9630 default:
9631 goto unknown;
9632 }
9633
9634 case 'g':
9635 if (name[1] == 'm' &&
9636 name[2] == 't' &&
9637 name[3] == 'i' &&
9638 name[4] == 'm' &&
9639 name[5] == 'e')
9640 { /* gmtime */
9641 return -KEY_gmtime;
9642 }
9643
9644 goto unknown;
9645
9646 case 'l':
9647 switch (name[1])
9648 {
9649 case 'e':
9650 if (name[2] == 'n' &&
9651 name[3] == 'g' &&
9652 name[4] == 't' &&
9653 name[5] == 'h')
9654 { /* length */
9655 return -KEY_length;
9656 }
9657
9658 goto unknown;
9659
9660 case 'i':
9661 if (name[2] == 's' &&
9662 name[3] == 't' &&
9663 name[4] == 'e' &&
9664 name[5] == 'n')
9665 { /* listen */
9666 return -KEY_listen;
9667 }
9668
9669 goto unknown;
9670
9671 default:
9672 goto unknown;
9673 }
9674
9675 case 'm':
9676 if (name[1] == 's' &&
9677 name[2] == 'g')
9678 {
9679 switch (name[3])
9680 {
9681 case 'c':
9682 if (name[4] == 't' &&
9683 name[5] == 'l')
9684 { /* msgctl */
9685 return -KEY_msgctl;
9686 }
9687
9688 goto unknown;
9689
9690 case 'g':
9691 if (name[4] == 'e' &&
9692 name[5] == 't')
9693 { /* msgget */
9694 return -KEY_msgget;
9695 }
9696
9697 goto unknown;
9698
9699 case 'r':
9700 if (name[4] == 'c' &&
9701 name[5] == 'v')
9702 { /* msgrcv */
9703 return -KEY_msgrcv;
9704 }
9705
9706 goto unknown;
9707
9708 case 's':
9709 if (name[4] == 'n' &&
9710 name[5] == 'd')
9711 { /* msgsnd */
9712 return -KEY_msgsnd;
9713 }
9714
9715 goto unknown;
9716
9717 default:
9718 goto unknown;
9719 }
9720 }
9721
9722 goto unknown;
9723
9724 case 'p':
9725 if (name[1] == 'r' &&
9726 name[2] == 'i' &&
9727 name[3] == 'n' &&
9728 name[4] == 't' &&
9729 name[5] == 'f')
9730 { /* printf */
9731 return KEY_printf;
9732 }
9733
9734 goto unknown;
9735
9736 case 'r':
9737 switch (name[1])
9738 {
9739 case 'e':
9740 switch (name[2])
9741 {
9742 case 'n':
9743 if (name[3] == 'a' &&
9744 name[4] == 'm' &&
9745 name[5] == 'e')
9746 { /* rename */
9747 return -KEY_rename;
9748 }
9749
9750 goto unknown;
9751
9752 case 't':
9753 if (name[3] == 'u' &&
9754 name[4] == 'r' &&
9755 name[5] == 'n')
9756 { /* return */
9757 return KEY_return;
9758 }
9759
9760 goto unknown;
9761
9762 default:
9763 goto unknown;
9764 }
9765
9766 case 'i':
9767 if (name[2] == 'n' &&
9768 name[3] == 'd' &&
9769 name[4] == 'e' &&
9770 name[5] == 'x')
9771 { /* rindex */
9772 return -KEY_rindex;
9773 }
9774
9775 goto unknown;
9776
9777 default:
9778 goto unknown;
9779 }
9780
9781 case 's':
9782 switch (name[1])
9783 {
9784 case 'c':
9785 if (name[2] == 'a' &&
9786 name[3] == 'l' &&
9787 name[4] == 'a' &&
9788 name[5] == 'r')
9789 { /* scalar */
9790 return KEY_scalar;
9791 }
9792
9793 goto unknown;
9794
9795 case 'e':
9796 switch (name[2])
9797 {
9798 case 'l':
9799 if (name[3] == 'e' &&
9800 name[4] == 'c' &&
9801 name[5] == 't')
9802 { /* select */
9803 return -KEY_select;
9804 }
9805
9806 goto unknown;
9807
9808 case 'm':
9809 switch (name[3])
9810 {
9811 case 'c':
9812 if (name[4] == 't' &&
9813 name[5] == 'l')
9814 { /* semctl */
9815 return -KEY_semctl;
9816 }
9817
9818 goto unknown;
9819
9820 case 'g':
9821 if (name[4] == 'e' &&
9822 name[5] == 't')
9823 { /* semget */
9824 return -KEY_semget;
9825 }
9826
9827 goto unknown;
9828
9829 default:
9830 goto unknown;
9831 }
9832
9833 default:
9834 goto unknown;
9835 }
9836
9837 case 'h':
9838 if (name[2] == 'm')
9839 {
9840 switch (name[3])
9841 {
9842 case 'c':
9843 if (name[4] == 't' &&
9844 name[5] == 'l')
9845 { /* shmctl */
9846 return -KEY_shmctl;
9847 }
9848
9849 goto unknown;
9850
9851 case 'g':
9852 if (name[4] == 'e' &&
9853 name[5] == 't')
9854 { /* shmget */
9855 return -KEY_shmget;
9856 }
9857
9858 goto unknown;
9859
9860 default:
9861 goto unknown;
9862 }
9863 }
9864
9865 goto unknown;
9866
9867 case 'o':
9868 if (name[2] == 'c' &&
9869 name[3] == 'k' &&
9870 name[4] == 'e' &&
9871 name[5] == 't')
9872 { /* socket */
9873 return -KEY_socket;
9874 }
9875
9876 goto unknown;
9877
9878 case 'p':
9879 if (name[2] == 'l' &&
9880 name[3] == 'i' &&
9881 name[4] == 'c' &&
9882 name[5] == 'e')
9883 { /* splice */
9884 return -KEY_splice;
9885 }
9886
9887 goto unknown;
9888
9889 case 'u':
9890 if (name[2] == 'b' &&
9891 name[3] == 's' &&
9892 name[4] == 't' &&
9893 name[5] == 'r')
9894 { /* substr */
9895 return -KEY_substr;
9896 }
9897
9898 goto unknown;
9899
9900 case 'y':
9901 if (name[2] == 's' &&
9902 name[3] == 't' &&
9903 name[4] == 'e' &&
9904 name[5] == 'm')
9905 { /* system */
9906 return -KEY_system;
9907 }
9908
9909 goto unknown;
9910
9911 default:
9912 goto unknown;
9913 }
9914
9915 case 'u':
9916 if (name[1] == 'n')
9917 {
9918 switch (name[2])
9919 {
9920 case 'l':
9921 switch (name[3])
9922 {
9923 case 'e':
9924 if (name[4] == 's' &&
9925 name[5] == 's')
9926 { /* unless */
9927 return KEY_unless;
9928 }
9929
9930 goto unknown;
9931
9932 case 'i':
9933 if (name[4] == 'n' &&
9934 name[5] == 'k')
9935 { /* unlink */
9936 return -KEY_unlink;
9937 }
9938
9939 goto unknown;
9940
9941 default:
9942 goto unknown;
9943 }
9944
9945 case 'p':
9946 if (name[3] == 'a' &&
9947 name[4] == 'c' &&
9948 name[5] == 'k')
9949 { /* unpack */
9950 return -KEY_unpack;
9951 }
9952
9953 goto unknown;
9954
9955 default:
9956 goto unknown;
9957 }
9958 }
9959
9960 goto unknown;
9961
9962 case 'v':
9963 if (name[1] == 'a' &&
9964 name[2] == 'l' &&
9965 name[3] == 'u' &&
9966 name[4] == 'e' &&
9967 name[5] == 's')
9968 { /* values */
9969 return -KEY_values;
9970 }
9971
9972 goto unknown;
9973
9974 default:
9975 goto unknown;
e2e1dd5a 9976 }
4c3bbe0f 9977
0d863452 9978 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9979 switch (name[0])
9980 {
9981 case 'D':
9982 if (name[1] == 'E' &&
9983 name[2] == 'S' &&
9984 name[3] == 'T' &&
9985 name[4] == 'R' &&
9986 name[5] == 'O' &&
9987 name[6] == 'Y')
9988 { /* DESTROY */
9989 return KEY_DESTROY;
9990 }
9991
9992 goto unknown;
9993
9994 case '_':
9995 if (name[1] == '_' &&
9996 name[2] == 'E' &&
9997 name[3] == 'N' &&
9998 name[4] == 'D' &&
9999 name[5] == '_' &&
10000 name[6] == '_')
10001 { /* __END__ */
10002 return KEY___END__;
10003 }
10004
10005 goto unknown;
10006
10007 case 'b':
10008 if (name[1] == 'i' &&
10009 name[2] == 'n' &&
10010 name[3] == 'm' &&
10011 name[4] == 'o' &&
10012 name[5] == 'd' &&
10013 name[6] == 'e')
10014 { /* binmode */
10015 return -KEY_binmode;
10016 }
10017
10018 goto unknown;
10019
10020 case 'c':
10021 if (name[1] == 'o' &&
10022 name[2] == 'n' &&
10023 name[3] == 'n' &&
10024 name[4] == 'e' &&
10025 name[5] == 'c' &&
10026 name[6] == 't')
10027 { /* connect */
10028 return -KEY_connect;
10029 }
10030
10031 goto unknown;
10032
10033 case 'd':
10034 switch (name[1])
10035 {
10036 case 'b':
10037 if (name[2] == 'm' &&
10038 name[3] == 'o' &&
10039 name[4] == 'p' &&
10040 name[5] == 'e' &&
10041 name[6] == 'n')
10042 { /* dbmopen */
10043 return -KEY_dbmopen;
10044 }
10045
10046 goto unknown;
10047
10048 case 'e':
0d863452
RH
10049 if (name[2] == 'f')
10050 {
10051 switch (name[3])
10052 {
10053 case 'a':
10054 if (name[4] == 'u' &&
10055 name[5] == 'l' &&
10056 name[6] == 't')
10057 { /* default */
5458a98a 10058 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10059 }
10060
10061 goto unknown;
10062
10063 case 'i':
10064 if (name[4] == 'n' &&
952306ac
RGS
10065 name[5] == 'e' &&
10066 name[6] == 'd')
10067 { /* defined */
10068 return KEY_defined;
10069 }
4c3bbe0f 10070
952306ac 10071 goto unknown;
4c3bbe0f 10072
952306ac
RGS
10073 default:
10074 goto unknown;
10075 }
0d863452
RH
10076 }
10077
10078 goto unknown;
10079
10080 default:
10081 goto unknown;
10082 }
4c3bbe0f
MHM
10083
10084 case 'f':
10085 if (name[1] == 'o' &&
10086 name[2] == 'r' &&
10087 name[3] == 'e' &&
10088 name[4] == 'a' &&
10089 name[5] == 'c' &&
10090 name[6] == 'h')
10091 { /* foreach */
10092 return KEY_foreach;
10093 }
10094
10095 goto unknown;
10096
10097 case 'g':
10098 if (name[1] == 'e' &&
10099 name[2] == 't' &&
10100 name[3] == 'p')
10101 {
10102 switch (name[4])
10103 {
10104 case 'g':
10105 if (name[5] == 'r' &&
10106 name[6] == 'p')
10107 { /* getpgrp */
10108 return -KEY_getpgrp;
10109 }
10110
10111 goto unknown;
10112
10113 case 'p':
10114 if (name[5] == 'i' &&
10115 name[6] == 'd')
10116 { /* getppid */
10117 return -KEY_getppid;
10118 }
10119
10120 goto unknown;
10121
10122 default:
10123 goto unknown;
10124 }
10125 }
10126
10127 goto unknown;
10128
10129 case 'l':
10130 if (name[1] == 'c' &&
10131 name[2] == 'f' &&
10132 name[3] == 'i' &&
10133 name[4] == 'r' &&
10134 name[5] == 's' &&
10135 name[6] == 't')
10136 { /* lcfirst */
10137 return -KEY_lcfirst;
10138 }
10139
10140 goto unknown;
10141
10142 case 'o':
10143 if (name[1] == 'p' &&
10144 name[2] == 'e' &&
10145 name[3] == 'n' &&
10146 name[4] == 'd' &&
10147 name[5] == 'i' &&
10148 name[6] == 'r')
10149 { /* opendir */
10150 return -KEY_opendir;
10151 }
10152
10153 goto unknown;
10154
10155 case 'p':
10156 if (name[1] == 'a' &&
10157 name[2] == 'c' &&
10158 name[3] == 'k' &&
10159 name[4] == 'a' &&
10160 name[5] == 'g' &&
10161 name[6] == 'e')
10162 { /* package */
10163 return KEY_package;
10164 }
10165
10166 goto unknown;
10167
10168 case 'r':
10169 if (name[1] == 'e')
10170 {
10171 switch (name[2])
10172 {
10173 case 'a':
10174 if (name[3] == 'd' &&
10175 name[4] == 'd' &&
10176 name[5] == 'i' &&
10177 name[6] == 'r')
10178 { /* readdir */
10179 return -KEY_readdir;
10180 }
10181
10182 goto unknown;
10183
10184 case 'q':
10185 if (name[3] == 'u' &&
10186 name[4] == 'i' &&
10187 name[5] == 'r' &&
10188 name[6] == 'e')
10189 { /* require */
10190 return KEY_require;
10191 }
10192
10193 goto unknown;
10194
10195 case 'v':
10196 if (name[3] == 'e' &&
10197 name[4] == 'r' &&
10198 name[5] == 's' &&
10199 name[6] == 'e')
10200 { /* reverse */
10201 return -KEY_reverse;
10202 }
10203
10204 goto unknown;
10205
10206 default:
10207 goto unknown;
10208 }
10209 }
10210
10211 goto unknown;
10212
10213 case 's':
10214 switch (name[1])
10215 {
10216 case 'e':
10217 switch (name[2])
10218 {
10219 case 'e':
10220 if (name[3] == 'k' &&
10221 name[4] == 'd' &&
10222 name[5] == 'i' &&
10223 name[6] == 'r')
10224 { /* seekdir */
10225 return -KEY_seekdir;
10226 }
10227
10228 goto unknown;
10229
10230 case 't':
10231 if (name[3] == 'p' &&
10232 name[4] == 'g' &&
10233 name[5] == 'r' &&
10234 name[6] == 'p')
10235 { /* setpgrp */
10236 return -KEY_setpgrp;
10237 }
10238
10239 goto unknown;
10240
10241 default:
10242 goto unknown;
10243 }
10244
10245 case 'h':
10246 if (name[2] == 'm' &&
10247 name[3] == 'r' &&
10248 name[4] == 'e' &&
10249 name[5] == 'a' &&
10250 name[6] == 'd')
10251 { /* shmread */
10252 return -KEY_shmread;
10253 }
10254
10255 goto unknown;
10256
10257 case 'p':
10258 if (name[2] == 'r' &&
10259 name[3] == 'i' &&
10260 name[4] == 'n' &&
10261 name[5] == 't' &&
10262 name[6] == 'f')
10263 { /* sprintf */
10264 return -KEY_sprintf;
10265 }
10266
10267 goto unknown;
10268
10269 case 'y':
10270 switch (name[2])
10271 {
10272 case 'm':
10273 if (name[3] == 'l' &&
10274 name[4] == 'i' &&
10275 name[5] == 'n' &&
10276 name[6] == 'k')
10277 { /* symlink */
10278 return -KEY_symlink;
10279 }
10280
10281 goto unknown;
10282
10283 case 's':
10284 switch (name[3])
10285 {
10286 case 'c':
10287 if (name[4] == 'a' &&
10288 name[5] == 'l' &&
10289 name[6] == 'l')
10290 { /* syscall */
10291 return -KEY_syscall;
10292 }
10293
10294 goto unknown;
10295
10296 case 'o':
10297 if (name[4] == 'p' &&
10298 name[5] == 'e' &&
10299 name[6] == 'n')
10300 { /* sysopen */
10301 return -KEY_sysopen;
10302 }
10303
10304 goto unknown;
10305
10306 case 'r':
10307 if (name[4] == 'e' &&
10308 name[5] == 'a' &&
10309 name[6] == 'd')
10310 { /* sysread */
10311 return -KEY_sysread;
10312 }
10313
10314 goto unknown;
10315
10316 case 's':
10317 if (name[4] == 'e' &&
10318 name[5] == 'e' &&
10319 name[6] == 'k')
10320 { /* sysseek */
10321 return -KEY_sysseek;
10322 }
10323
10324 goto unknown;
10325
10326 default:
10327 goto unknown;
10328 }
10329
10330 default:
10331 goto unknown;
10332 }
10333
10334 default:
10335 goto unknown;
10336 }
10337
10338 case 't':
10339 if (name[1] == 'e' &&
10340 name[2] == 'l' &&
10341 name[3] == 'l' &&
10342 name[4] == 'd' &&
10343 name[5] == 'i' &&
10344 name[6] == 'r')
10345 { /* telldir */
10346 return -KEY_telldir;
10347 }
10348
10349 goto unknown;
10350
10351 case 'u':
10352 switch (name[1])
10353 {
10354 case 'c':
10355 if (name[2] == 'f' &&
10356 name[3] == 'i' &&
10357 name[4] == 'r' &&
10358 name[5] == 's' &&
10359 name[6] == 't')
10360 { /* ucfirst */
10361 return -KEY_ucfirst;
10362 }
10363
10364 goto unknown;
10365
10366 case 'n':
10367 if (name[2] == 's' &&
10368 name[3] == 'h' &&
10369 name[4] == 'i' &&
10370 name[5] == 'f' &&
10371 name[6] == 't')
10372 { /* unshift */
10373 return -KEY_unshift;
10374 }
10375
10376 goto unknown;
10377
10378 default:
10379 goto unknown;
10380 }
10381
10382 case 'w':
10383 if (name[1] == 'a' &&
10384 name[2] == 'i' &&
10385 name[3] == 't' &&
10386 name[4] == 'p' &&
10387 name[5] == 'i' &&
10388 name[6] == 'd')
10389 { /* waitpid */
10390 return -KEY_waitpid;
10391 }
10392
10393 goto unknown;
10394
10395 default:
10396 goto unknown;
10397 }
10398
10399 case 8: /* 26 tokens of length 8 */
10400 switch (name[0])
10401 {
10402 case 'A':
10403 if (name[1] == 'U' &&
10404 name[2] == 'T' &&
10405 name[3] == 'O' &&
10406 name[4] == 'L' &&
10407 name[5] == 'O' &&
10408 name[6] == 'A' &&
10409 name[7] == 'D')
10410 { /* AUTOLOAD */
10411 return KEY_AUTOLOAD;
10412 }
10413
10414 goto unknown;
10415
10416 case '_':
10417 if (name[1] == '_')
10418 {
10419 switch (name[2])
10420 {
10421 case 'D':
10422 if (name[3] == 'A' &&
10423 name[4] == 'T' &&
10424 name[5] == 'A' &&
10425 name[6] == '_' &&
10426 name[7] == '_')
10427 { /* __DATA__ */
10428 return KEY___DATA__;
10429 }
10430
10431 goto unknown;
10432
10433 case 'F':
10434 if (name[3] == 'I' &&
10435 name[4] == 'L' &&
10436 name[5] == 'E' &&
10437 name[6] == '_' &&
10438 name[7] == '_')
10439 { /* __FILE__ */
10440 return -KEY___FILE__;
10441 }
10442
10443 goto unknown;
10444
10445 case 'L':
10446 if (name[3] == 'I' &&
10447 name[4] == 'N' &&
10448 name[5] == 'E' &&
10449 name[6] == '_' &&
10450 name[7] == '_')
10451 { /* __LINE__ */
10452 return -KEY___LINE__;
10453 }
10454
10455 goto unknown;
10456
10457 default:
10458 goto unknown;
10459 }
10460 }
10461
10462 goto unknown;
10463
10464 case 'c':
10465 switch (name[1])
10466 {
10467 case 'l':
10468 if (name[2] == 'o' &&
10469 name[3] == 's' &&
10470 name[4] == 'e' &&
10471 name[5] == 'd' &&
10472 name[6] == 'i' &&
10473 name[7] == 'r')
10474 { /* closedir */
10475 return -KEY_closedir;
10476 }
10477
10478 goto unknown;
10479
10480 case 'o':
10481 if (name[2] == 'n' &&
10482 name[3] == 't' &&
10483 name[4] == 'i' &&
10484 name[5] == 'n' &&
10485 name[6] == 'u' &&
10486 name[7] == 'e')
10487 { /* continue */
10488 return -KEY_continue;
10489 }
10490
10491 goto unknown;
10492
10493 default:
10494 goto unknown;
10495 }
10496
10497 case 'd':
10498 if (name[1] == 'b' &&
10499 name[2] == 'm' &&
10500 name[3] == 'c' &&
10501 name[4] == 'l' &&
10502 name[5] == 'o' &&
10503 name[6] == 's' &&
10504 name[7] == 'e')
10505 { /* dbmclose */
10506 return -KEY_dbmclose;
10507 }
10508
10509 goto unknown;
10510
10511 case 'e':
10512 if (name[1] == 'n' &&
10513 name[2] == 'd')
10514 {
10515 switch (name[3])
10516 {
10517 case 'g':
10518 if (name[4] == 'r' &&
10519 name[5] == 'e' &&
10520 name[6] == 'n' &&
10521 name[7] == 't')
10522 { /* endgrent */
10523 return -KEY_endgrent;
10524 }
10525
10526 goto unknown;
10527
10528 case 'p':
10529 if (name[4] == 'w' &&
10530 name[5] == 'e' &&
10531 name[6] == 'n' &&
10532 name[7] == 't')
10533 { /* endpwent */
10534 return -KEY_endpwent;
10535 }
10536
10537 goto unknown;
10538
10539 default:
10540 goto unknown;
10541 }
10542 }
10543
10544 goto unknown;
10545
10546 case 'f':
10547 if (name[1] == 'o' &&
10548 name[2] == 'r' &&
10549 name[3] == 'm' &&
10550 name[4] == 'l' &&
10551 name[5] == 'i' &&
10552 name[6] == 'n' &&
10553 name[7] == 'e')
10554 { /* formline */
10555 return -KEY_formline;
10556 }
10557
10558 goto unknown;
10559
10560 case 'g':
10561 if (name[1] == 'e' &&
10562 name[2] == 't')
10563 {
10564 switch (name[3])
10565 {
10566 case 'g':
10567 if (name[4] == 'r')
10568 {
10569 switch (name[5])
10570 {
10571 case 'e':
10572 if (name[6] == 'n' &&
10573 name[7] == 't')
10574 { /* getgrent */
10575 return -KEY_getgrent;
10576 }
10577
10578 goto unknown;
10579
10580 case 'g':
10581 if (name[6] == 'i' &&
10582 name[7] == 'd')
10583 { /* getgrgid */
10584 return -KEY_getgrgid;
10585 }
10586
10587 goto unknown;
10588
10589 case 'n':
10590 if (name[6] == 'a' &&
10591 name[7] == 'm')
10592 { /* getgrnam */
10593 return -KEY_getgrnam;
10594 }
10595
10596 goto unknown;
10597
10598 default:
10599 goto unknown;
10600 }
10601 }
10602
10603 goto unknown;
10604
10605 case 'l':
10606 if (name[4] == 'o' &&
10607 name[5] == 'g' &&
10608 name[6] == 'i' &&
10609 name[7] == 'n')
10610 { /* getlogin */
10611 return -KEY_getlogin;
10612 }
10613
10614 goto unknown;
10615
10616 case 'p':
10617 if (name[4] == 'w')
10618 {
10619 switch (name[5])
10620 {
10621 case 'e':
10622 if (name[6] == 'n' &&
10623 name[7] == 't')
10624 { /* getpwent */
10625 return -KEY_getpwent;
10626 }
10627
10628 goto unknown;
10629
10630 case 'n':
10631 if (name[6] == 'a' &&
10632 name[7] == 'm')
10633 { /* getpwnam */
10634 return -KEY_getpwnam;
10635 }
10636
10637 goto unknown;
10638
10639 case 'u':
10640 if (name[6] == 'i' &&
10641 name[7] == 'd')
10642 { /* getpwuid */
10643 return -KEY_getpwuid;
10644 }
10645
10646 goto unknown;
10647
10648 default:
10649 goto unknown;
10650 }
10651 }
10652
10653 goto unknown;
10654
10655 default:
10656 goto unknown;
10657 }
10658 }
10659
10660 goto unknown;
10661
10662 case 'r':
10663 if (name[1] == 'e' &&
10664 name[2] == 'a' &&
10665 name[3] == 'd')
10666 {
10667 switch (name[4])
10668 {
10669 case 'l':
10670 if (name[5] == 'i' &&
10671 name[6] == 'n')
10672 {
10673 switch (name[7])
10674 {
10675 case 'e':
10676 { /* readline */
10677 return -KEY_readline;
10678 }
10679
4c3bbe0f
MHM
10680 case 'k':
10681 { /* readlink */
10682 return -KEY_readlink;
10683 }
10684
4c3bbe0f
MHM
10685 default:
10686 goto unknown;
10687 }
10688 }
10689
10690 goto unknown;
10691
10692 case 'p':
10693 if (name[5] == 'i' &&
10694 name[6] == 'p' &&
10695 name[7] == 'e')
10696 { /* readpipe */
10697 return -KEY_readpipe;
10698 }
10699
10700 goto unknown;
10701
10702 default:
10703 goto unknown;
10704 }
10705 }
10706
10707 goto unknown;
10708
10709 case 's':
10710 switch (name[1])
10711 {
10712 case 'e':
10713 if (name[2] == 't')
10714 {
10715 switch (name[3])
10716 {
10717 case 'g':
10718 if (name[4] == 'r' &&
10719 name[5] == 'e' &&
10720 name[6] == 'n' &&
10721 name[7] == 't')
10722 { /* setgrent */
10723 return -KEY_setgrent;
10724 }
10725
10726 goto unknown;
10727
10728 case 'p':
10729 if (name[4] == 'w' &&
10730 name[5] == 'e' &&
10731 name[6] == 'n' &&
10732 name[7] == 't')
10733 { /* setpwent */
10734 return -KEY_setpwent;
10735 }
10736
10737 goto unknown;
10738
10739 default:
10740 goto unknown;
10741 }
10742 }
10743
10744 goto unknown;
10745
10746 case 'h':
10747 switch (name[2])
10748 {
10749 case 'm':
10750 if (name[3] == 'w' &&
10751 name[4] == 'r' &&
10752 name[5] == 'i' &&
10753 name[6] == 't' &&
10754 name[7] == 'e')
10755 { /* shmwrite */
10756 return -KEY_shmwrite;
10757 }
10758
10759 goto unknown;
10760
10761 case 'u':
10762 if (name[3] == 't' &&
10763 name[4] == 'd' &&
10764 name[5] == 'o' &&
10765 name[6] == 'w' &&
10766 name[7] == 'n')
10767 { /* shutdown */
10768 return -KEY_shutdown;
10769 }
10770
10771 goto unknown;
10772
10773 default:
10774 goto unknown;
10775 }
10776
10777 case 'y':
10778 if (name[2] == 's' &&
10779 name[3] == 'w' &&
10780 name[4] == 'r' &&
10781 name[5] == 'i' &&
10782 name[6] == 't' &&
10783 name[7] == 'e')
10784 { /* syswrite */
10785 return -KEY_syswrite;
10786 }
10787
10788 goto unknown;
10789
10790 default:
10791 goto unknown;
10792 }
10793
10794 case 't':
10795 if (name[1] == 'r' &&
10796 name[2] == 'u' &&
10797 name[3] == 'n' &&
10798 name[4] == 'c' &&
10799 name[5] == 'a' &&
10800 name[6] == 't' &&
10801 name[7] == 'e')
10802 { /* truncate */
10803 return -KEY_truncate;
10804 }
10805
10806 goto unknown;
10807
10808 default:
10809 goto unknown;
10810 }
10811
3c10abe3 10812 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10813 switch (name[0])
10814 {
3c10abe3
AG
10815 case 'U':
10816 if (name[1] == 'N' &&
10817 name[2] == 'I' &&
10818 name[3] == 'T' &&
10819 name[4] == 'C' &&
10820 name[5] == 'H' &&
10821 name[6] == 'E' &&
10822 name[7] == 'C' &&
10823 name[8] == 'K')
10824 { /* UNITCHECK */
10825 return KEY_UNITCHECK;
10826 }
10827
10828 goto unknown;
10829
4c3bbe0f
MHM
10830 case 'e':
10831 if (name[1] == 'n' &&
10832 name[2] == 'd' &&
10833 name[3] == 'n' &&
10834 name[4] == 'e' &&
10835 name[5] == 't' &&
10836 name[6] == 'e' &&
10837 name[7] == 'n' &&
10838 name[8] == 't')
10839 { /* endnetent */
10840 return -KEY_endnetent;
10841 }
10842
10843 goto unknown;
10844
10845 case 'g':
10846 if (name[1] == 'e' &&
10847 name[2] == 't' &&
10848 name[3] == 'n' &&
10849 name[4] == 'e' &&
10850 name[5] == 't' &&
10851 name[6] == 'e' &&
10852 name[7] == 'n' &&
10853 name[8] == 't')
10854 { /* getnetent */
10855 return -KEY_getnetent;
10856 }
10857
10858 goto unknown;
10859
10860 case 'l':
10861 if (name[1] == 'o' &&
10862 name[2] == 'c' &&
10863 name[3] == 'a' &&
10864 name[4] == 'l' &&
10865 name[5] == 't' &&
10866 name[6] == 'i' &&
10867 name[7] == 'm' &&
10868 name[8] == 'e')
10869 { /* localtime */
10870 return -KEY_localtime;
10871 }
10872
10873 goto unknown;
10874
10875 case 'p':
10876 if (name[1] == 'r' &&
10877 name[2] == 'o' &&
10878 name[3] == 't' &&
10879 name[4] == 'o' &&
10880 name[5] == 't' &&
10881 name[6] == 'y' &&
10882 name[7] == 'p' &&
10883 name[8] == 'e')
10884 { /* prototype */
10885 return KEY_prototype;
10886 }
10887
10888 goto unknown;
10889
10890 case 'q':
10891 if (name[1] == 'u' &&
10892 name[2] == 'o' &&
10893 name[3] == 't' &&
10894 name[4] == 'e' &&
10895 name[5] == 'm' &&
10896 name[6] == 'e' &&
10897 name[7] == 't' &&
10898 name[8] == 'a')
10899 { /* quotemeta */
10900 return -KEY_quotemeta;
10901 }
10902
10903 goto unknown;
10904
10905 case 'r':
10906 if (name[1] == 'e' &&
10907 name[2] == 'w' &&
10908 name[3] == 'i' &&
10909 name[4] == 'n' &&
10910 name[5] == 'd' &&
10911 name[6] == 'd' &&
10912 name[7] == 'i' &&
10913 name[8] == 'r')
10914 { /* rewinddir */
10915 return -KEY_rewinddir;
10916 }
10917
10918 goto unknown;
10919
10920 case 's':
10921 if (name[1] == 'e' &&
10922 name[2] == 't' &&
10923 name[3] == 'n' &&
10924 name[4] == 'e' &&
10925 name[5] == 't' &&
10926 name[6] == 'e' &&
10927 name[7] == 'n' &&
10928 name[8] == 't')
10929 { /* setnetent */
10930 return -KEY_setnetent;
10931 }
10932
10933 goto unknown;
10934
10935 case 'w':
10936 if (name[1] == 'a' &&
10937 name[2] == 'n' &&
10938 name[3] == 't' &&
10939 name[4] == 'a' &&
10940 name[5] == 'r' &&
10941 name[6] == 'r' &&
10942 name[7] == 'a' &&
10943 name[8] == 'y')
10944 { /* wantarray */
10945 return -KEY_wantarray;
10946 }
10947
10948 goto unknown;
10949
10950 default:
10951 goto unknown;
10952 }
10953
10954 case 10: /* 9 tokens of length 10 */
10955 switch (name[0])
10956 {
10957 case 'e':
10958 if (name[1] == 'n' &&
10959 name[2] == 'd')
10960 {
10961 switch (name[3])
10962 {
10963 case 'h':
10964 if (name[4] == 'o' &&
10965 name[5] == 's' &&
10966 name[6] == 't' &&
10967 name[7] == 'e' &&
10968 name[8] == 'n' &&
10969 name[9] == 't')
10970 { /* endhostent */
10971 return -KEY_endhostent;
10972 }
10973
10974 goto unknown;
10975
10976 case 's':
10977 if (name[4] == 'e' &&
10978 name[5] == 'r' &&
10979 name[6] == 'v' &&
10980 name[7] == 'e' &&
10981 name[8] == 'n' &&
10982 name[9] == 't')
10983 { /* endservent */
10984 return -KEY_endservent;
10985 }
10986
10987 goto unknown;
10988
10989 default:
10990 goto unknown;
10991 }
10992 }
10993
10994 goto unknown;
10995
10996 case 'g':
10997 if (name[1] == 'e' &&
10998 name[2] == 't')
10999 {
11000 switch (name[3])
11001 {
11002 case 'h':
11003 if (name[4] == 'o' &&
11004 name[5] == 's' &&
11005 name[6] == 't' &&
11006 name[7] == 'e' &&
11007 name[8] == 'n' &&
11008 name[9] == 't')
11009 { /* gethostent */
11010 return -KEY_gethostent;
11011 }
11012
11013 goto unknown;
11014
11015 case 's':
11016 switch (name[4])
11017 {
11018 case 'e':
11019 if (name[5] == 'r' &&
11020 name[6] == 'v' &&
11021 name[7] == 'e' &&
11022 name[8] == 'n' &&
11023 name[9] == 't')
11024 { /* getservent */
11025 return -KEY_getservent;
11026 }
11027
11028 goto unknown;
11029
11030 case 'o':
11031 if (name[5] == 'c' &&
11032 name[6] == 'k' &&
11033 name[7] == 'o' &&
11034 name[8] == 'p' &&
11035 name[9] == 't')
11036 { /* getsockopt */
11037 return -KEY_getsockopt;
11038 }
11039
11040 goto unknown;
11041
11042 default:
11043 goto unknown;
11044 }
11045
11046 default:
11047 goto unknown;
11048 }
11049 }
11050
11051 goto unknown;
11052
11053 case 's':
11054 switch (name[1])
11055 {
11056 case 'e':
11057 if (name[2] == 't')
11058 {
11059 switch (name[3])
11060 {
11061 case 'h':
11062 if (name[4] == 'o' &&
11063 name[5] == 's' &&
11064 name[6] == 't' &&
11065 name[7] == 'e' &&
11066 name[8] == 'n' &&
11067 name[9] == 't')
11068 { /* sethostent */
11069 return -KEY_sethostent;
11070 }
11071
11072 goto unknown;
11073
11074 case 's':
11075 switch (name[4])
11076 {
11077 case 'e':
11078 if (name[5] == 'r' &&
11079 name[6] == 'v' &&
11080 name[7] == 'e' &&
11081 name[8] == 'n' &&
11082 name[9] == 't')
11083 { /* setservent */
11084 return -KEY_setservent;
11085 }
11086
11087 goto unknown;
11088
11089 case 'o':
11090 if (name[5] == 'c' &&
11091 name[6] == 'k' &&
11092 name[7] == 'o' &&
11093 name[8] == 'p' &&
11094 name[9] == 't')
11095 { /* setsockopt */
11096 return -KEY_setsockopt;
11097 }
11098
11099 goto unknown;
11100
11101 default:
11102 goto unknown;
11103 }
11104
11105 default:
11106 goto unknown;
11107 }
11108 }
11109
11110 goto unknown;
11111
11112 case 'o':
11113 if (name[2] == 'c' &&
11114 name[3] == 'k' &&
11115 name[4] == 'e' &&
11116 name[5] == 't' &&
11117 name[6] == 'p' &&
11118 name[7] == 'a' &&
11119 name[8] == 'i' &&
11120 name[9] == 'r')
11121 { /* socketpair */
11122 return -KEY_socketpair;
11123 }
11124
11125 goto unknown;
11126
11127 default:
11128 goto unknown;
11129 }
11130
11131 default:
11132 goto unknown;
e2e1dd5a 11133 }
4c3bbe0f
MHM
11134
11135 case 11: /* 8 tokens of length 11 */
11136 switch (name[0])
11137 {
11138 case '_':
11139 if (name[1] == '_' &&
11140 name[2] == 'P' &&
11141 name[3] == 'A' &&
11142 name[4] == 'C' &&
11143 name[5] == 'K' &&
11144 name[6] == 'A' &&
11145 name[7] == 'G' &&
11146 name[8] == 'E' &&
11147 name[9] == '_' &&
11148 name[10] == '_')
11149 { /* __PACKAGE__ */
11150 return -KEY___PACKAGE__;
11151 }
11152
11153 goto unknown;
11154
11155 case 'e':
11156 if (name[1] == 'n' &&
11157 name[2] == 'd' &&
11158 name[3] == 'p' &&
11159 name[4] == 'r' &&
11160 name[5] == 'o' &&
11161 name[6] == 't' &&
11162 name[7] == 'o' &&
11163 name[8] == 'e' &&
11164 name[9] == 'n' &&
11165 name[10] == 't')
11166 { /* endprotoent */
11167 return -KEY_endprotoent;
11168 }
11169
11170 goto unknown;
11171
11172 case 'g':
11173 if (name[1] == 'e' &&
11174 name[2] == 't')
11175 {
11176 switch (name[3])
11177 {
11178 case 'p':
11179 switch (name[4])
11180 {
11181 case 'e':
11182 if (name[5] == 'e' &&
11183 name[6] == 'r' &&
11184 name[7] == 'n' &&
11185 name[8] == 'a' &&
11186 name[9] == 'm' &&
11187 name[10] == 'e')
11188 { /* getpeername */
11189 return -KEY_getpeername;
11190 }
11191
11192 goto unknown;
11193
11194 case 'r':
11195 switch (name[5])
11196 {
11197 case 'i':
11198 if (name[6] == 'o' &&
11199 name[7] == 'r' &&
11200 name[8] == 'i' &&
11201 name[9] == 't' &&
11202 name[10] == 'y')
11203 { /* getpriority */
11204 return -KEY_getpriority;
11205 }
11206
11207 goto unknown;
11208
11209 case 'o':
11210 if (name[6] == 't' &&
11211 name[7] == 'o' &&
11212 name[8] == 'e' &&
11213 name[9] == 'n' &&
11214 name[10] == 't')
11215 { /* getprotoent */
11216 return -KEY_getprotoent;
11217 }
11218
11219 goto unknown;
11220
11221 default:
11222 goto unknown;
11223 }
11224
11225 default:
11226 goto unknown;
11227 }
11228
11229 case 's':
11230 if (name[4] == 'o' &&
11231 name[5] == 'c' &&
11232 name[6] == 'k' &&
11233 name[7] == 'n' &&
11234 name[8] == 'a' &&
11235 name[9] == 'm' &&
11236 name[10] == 'e')
11237 { /* getsockname */
11238 return -KEY_getsockname;
11239 }
11240
11241 goto unknown;
11242
11243 default:
11244 goto unknown;
11245 }
11246 }
11247
11248 goto unknown;
11249
11250 case 's':
11251 if (name[1] == 'e' &&
11252 name[2] == 't' &&
11253 name[3] == 'p' &&
11254 name[4] == 'r')
11255 {
11256 switch (name[5])
11257 {
11258 case 'i':
11259 if (name[6] == 'o' &&
11260 name[7] == 'r' &&
11261 name[8] == 'i' &&
11262 name[9] == 't' &&
11263 name[10] == 'y')
11264 { /* setpriority */
11265 return -KEY_setpriority;
11266 }
11267
11268 goto unknown;
11269
11270 case 'o':
11271 if (name[6] == 't' &&
11272 name[7] == 'o' &&
11273 name[8] == 'e' &&
11274 name[9] == 'n' &&
11275 name[10] == 't')
11276 { /* setprotoent */
11277 return -KEY_setprotoent;
11278 }
11279
11280 goto unknown;
11281
11282 default:
11283 goto unknown;
11284 }
11285 }
11286
11287 goto unknown;
11288
11289 default:
11290 goto unknown;
e2e1dd5a 11291 }
4c3bbe0f
MHM
11292
11293 case 12: /* 2 tokens of length 12 */
11294 if (name[0] == 'g' &&
11295 name[1] == 'e' &&
11296 name[2] == 't' &&
11297 name[3] == 'n' &&
11298 name[4] == 'e' &&
11299 name[5] == 't' &&
11300 name[6] == 'b' &&
11301 name[7] == 'y')
11302 {
11303 switch (name[8])
11304 {
11305 case 'a':
11306 if (name[9] == 'd' &&
11307 name[10] == 'd' &&
11308 name[11] == 'r')
11309 { /* getnetbyaddr */
11310 return -KEY_getnetbyaddr;
11311 }
11312
11313 goto unknown;
11314
11315 case 'n':
11316 if (name[9] == 'a' &&
11317 name[10] == 'm' &&
11318 name[11] == 'e')
11319 { /* getnetbyname */
11320 return -KEY_getnetbyname;
11321 }
11322
11323 goto unknown;
11324
11325 default:
11326 goto unknown;
11327 }
e2e1dd5a 11328 }
4c3bbe0f
MHM
11329
11330 goto unknown;
11331
11332 case 13: /* 4 tokens of length 13 */
11333 if (name[0] == 'g' &&
11334 name[1] == 'e' &&
11335 name[2] == 't')
11336 {
11337 switch (name[3])
11338 {
11339 case 'h':
11340 if (name[4] == 'o' &&
11341 name[5] == 's' &&
11342 name[6] == 't' &&
11343 name[7] == 'b' &&
11344 name[8] == 'y')
11345 {
11346 switch (name[9])
11347 {
11348 case 'a':
11349 if (name[10] == 'd' &&
11350 name[11] == 'd' &&
11351 name[12] == 'r')
11352 { /* gethostbyaddr */
11353 return -KEY_gethostbyaddr;
11354 }
11355
11356 goto unknown;
11357
11358 case 'n':
11359 if (name[10] == 'a' &&
11360 name[11] == 'm' &&
11361 name[12] == 'e')
11362 { /* gethostbyname */
11363 return -KEY_gethostbyname;
11364 }
11365
11366 goto unknown;
11367
11368 default:
11369 goto unknown;
11370 }
11371 }
11372
11373 goto unknown;
11374
11375 case 's':
11376 if (name[4] == 'e' &&
11377 name[5] == 'r' &&
11378 name[6] == 'v' &&
11379 name[7] == 'b' &&
11380 name[8] == 'y')
11381 {
11382 switch (name[9])
11383 {
11384 case 'n':
11385 if (name[10] == 'a' &&
11386 name[11] == 'm' &&
11387 name[12] == 'e')
11388 { /* getservbyname */
11389 return -KEY_getservbyname;
11390 }
11391
11392 goto unknown;
11393
11394 case 'p':
11395 if (name[10] == 'o' &&
11396 name[11] == 'r' &&
11397 name[12] == 't')
11398 { /* getservbyport */
11399 return -KEY_getservbyport;
11400 }
11401
11402 goto unknown;
11403
11404 default:
11405 goto unknown;
11406 }
11407 }
11408
11409 goto unknown;
11410
11411 default:
11412 goto unknown;
11413 }
e2e1dd5a 11414 }
4c3bbe0f
MHM
11415
11416 goto unknown;
11417
11418 case 14: /* 1 tokens of length 14 */
11419 if (name[0] == 'g' &&
11420 name[1] == 'e' &&
11421 name[2] == 't' &&
11422 name[3] == 'p' &&
11423 name[4] == 'r' &&
11424 name[5] == 'o' &&
11425 name[6] == 't' &&
11426 name[7] == 'o' &&
11427 name[8] == 'b' &&
11428 name[9] == 'y' &&
11429 name[10] == 'n' &&
11430 name[11] == 'a' &&
11431 name[12] == 'm' &&
11432 name[13] == 'e')
11433 { /* getprotobyname */
11434 return -KEY_getprotobyname;
11435 }
11436
11437 goto unknown;
11438
11439 case 16: /* 1 tokens of length 16 */
11440 if (name[0] == 'g' &&
11441 name[1] == 'e' &&
11442 name[2] == 't' &&
11443 name[3] == 'p' &&
11444 name[4] == 'r' &&
11445 name[5] == 'o' &&
11446 name[6] == 't' &&
11447 name[7] == 'o' &&
11448 name[8] == 'b' &&
11449 name[9] == 'y' &&
11450 name[10] == 'n' &&
11451 name[11] == 'u' &&
11452 name[12] == 'm' &&
11453 name[13] == 'b' &&
11454 name[14] == 'e' &&
11455 name[15] == 'r')
11456 { /* getprotobynumber */
11457 return -KEY_getprotobynumber;
11458 }
11459
11460 goto unknown;
11461
11462 default:
11463 goto unknown;
e2e1dd5a 11464 }
4c3bbe0f
MHM
11465
11466unknown:
e2e1dd5a 11467 return 0;
a687059c
LW
11468}
11469
76e3520e 11470STATIC void
c94115d8 11471S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11472{
97aff369 11473 dVAR;
2f3197b3 11474
7918f24d
NC
11475 PERL_ARGS_ASSERT_CHECKCOMMA;
11476
d008e5eb 11477 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11478 if (ckWARN(WARN_SYNTAX)) {
11479 int level = 1;
26ff0806 11480 const char *w;
d008e5eb
GS
11481 for (w = s+2; *w && level; w++) {
11482 if (*w == '(')
11483 ++level;
11484 else if (*w == ')')
11485 --level;
11486 }
888fea98
NC
11487 while (isSPACE(*w))
11488 ++w;
b1439985
RGS
11489 /* the list of chars below is for end of statements or
11490 * block / parens, boolean operators (&&, ||, //) and branch
11491 * constructs (or, and, if, until, unless, while, err, for).
11492 * Not a very solid hack... */
11493 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11494 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11495 "%s (...) interpreted as function",name);
d008e5eb 11496 }
2f3197b3 11497 }
3280af22 11498 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11499 s++;
a687059c
LW
11500 if (*s == '(')
11501 s++;
3280af22 11502 while (s < PL_bufend && isSPACE(*s))
a687059c 11503 s++;
7e2040f0 11504 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11505 const char * const w = s++;
7e2040f0 11506 while (isALNUM_lazy_if(s,UTF))
a687059c 11507 s++;
3280af22 11508 while (s < PL_bufend && isSPACE(*s))
a687059c 11509 s++;
e929a76b 11510 if (*s == ',') {
c94115d8 11511 GV* gv;
5458a98a 11512 if (keyword(w, s - w, 0))
e929a76b 11513 return;
c94115d8
NC
11514
11515 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11516 if (gv && GvCVu(gv))
abbb3198 11517 return;
cea2e8a9 11518 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11519 }
11520 }
11521}
11522
423cee85
JH
11523/* Either returns sv, or mortalizes sv and returns a new SV*.
11524 Best used as sv=new_constant(..., sv, ...).
11525 If s, pv are NULL, calls subroutine with one argument,
11526 and type is used with error messages only. */
11527
b3ac6de7 11528STATIC SV *
eb0d8d16
NC
11529S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11530 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11531{
27da23d5 11532 dVAR; dSP;
890ce7af 11533 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11534 SV *res;
b3ac6de7
IZ
11535 SV **cvp;
11536 SV *cv, *typesv;
89e33a05 11537 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11538
7918f24d
NC
11539 PERL_ARGS_ASSERT_NEW_CONSTANT;
11540
f0af216f 11541 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11542 SV *msg;
11543
10edeb5d
JH
11544 why2 = (const char *)
11545 (strEQ(key,"charnames")
11546 ? "(possibly a missing \"use charnames ...\")"
11547 : "");
4e553d73 11548 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11549 (type ? type: "undef"), why2);
11550
11551 /* This is convoluted and evil ("goto considered harmful")
11552 * but I do not understand the intricacies of all the different
11553 * failure modes of %^H in here. The goal here is to make
11554 * the most probable error message user-friendly. --jhi */
11555
11556 goto msgdone;
11557
423cee85 11558 report:
4e553d73 11559 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11560 (type ? type: "undef"), why1, why2, why3);
41ab332f 11561 msgdone:
95a20fc0 11562 yyerror(SvPVX_const(msg));
423cee85
JH
11563 SvREFCNT_dec(msg);
11564 return sv;
11565 }
ff3f963a
KW
11566
11567 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11568 if (PL_error_count > 0 && strEQ(key,"charnames"))
11569 return &PL_sv_undef;
ff3f963a 11570
eb0d8d16 11571 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11572 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11573 why1 = "$^H{";
11574 why2 = key;
f0af216f 11575 why3 = "} is not defined";
423cee85 11576 goto report;
b3ac6de7
IZ
11577 }
11578 sv_2mortal(sv); /* Parent created it permanently */
11579 cv = *cvp;
423cee85 11580 if (!pv && s)
59cd0e26 11581 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11582 if (type && pv)
59cd0e26 11583 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11584 else
423cee85 11585 typesv = &PL_sv_undef;
4e553d73 11586
e788e7d3 11587 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11588 ENTER ;
11589 SAVETMPS;
4e553d73 11590
423cee85 11591 PUSHMARK(SP) ;
a5845cb7 11592 EXTEND(sp, 3);
423cee85
JH
11593 if (pv)
11594 PUSHs(pv);
b3ac6de7 11595 PUSHs(sv);
423cee85
JH
11596 if (pv)
11597 PUSHs(typesv);
b3ac6de7 11598 PUTBACK;
423cee85 11599 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11600
423cee85 11601 SPAGAIN ;
4e553d73 11602
423cee85 11603 /* Check the eval first */
9b0e499b 11604 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11605 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11606 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11607 (void)POPs;
b37c2d43 11608 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11609 }
11610 else {
11611 res = POPs;
b37c2d43 11612 SvREFCNT_inc_simple_void(res);
423cee85 11613 }
4e553d73 11614
423cee85
JH
11615 PUTBACK ;
11616 FREETMPS ;
11617 LEAVE ;
b3ac6de7 11618 POPSTACK;
4e553d73 11619
b3ac6de7 11620 if (!SvOK(res)) {
423cee85
JH
11621 why1 = "Call to &{$^H{";
11622 why2 = key;
f0af216f 11623 why3 = "}} did not return a defined value";
423cee85
JH
11624 sv = res;
11625 goto report;
9b0e499b 11626 }
423cee85 11627
9b0e499b 11628 return res;
b3ac6de7 11629}
4e553d73 11630
d0a148a6
NC
11631/* Returns a NUL terminated string, with the length of the string written to
11632 *slp
11633 */
76e3520e 11634STATIC char *
cea2e8a9 11635S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11636{
97aff369 11637 dVAR;
463ee0b2 11638 register char *d = dest;
890ce7af 11639 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11640
11641 PERL_ARGS_ASSERT_SCAN_WORD;
11642
463ee0b2 11643 for (;;) {
8903cb82 11644 if (d >= e)
cea2e8a9 11645 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11646 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11647 *d++ = *s++;
c35e046a 11648 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11649 *d++ = ':';
11650 *d++ = ':';
11651 s++;
11652 }
c35e046a 11653 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11654 *d++ = *s++;
11655 *d++ = *s++;
11656 }
fd400ab9 11657 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11658 char *t = s + UTF8SKIP(s);
c35e046a 11659 size_t len;
fd400ab9 11660 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11661 t += UTF8SKIP(t);
c35e046a
AL
11662 len = t - s;
11663 if (d + len > e)
cea2e8a9 11664 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11665 Copy(s, d, len, char);
11666 d += len;
a0ed51b3
LW
11667 s = t;
11668 }
463ee0b2
LW
11669 else {
11670 *d = '\0';
11671 *slp = d - dest;
11672 return s;
e929a76b 11673 }
378cc40b
LW
11674 }
11675}
11676
76e3520e 11677STATIC char *
f54cb97a 11678S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11679{
97aff369 11680 dVAR;
6136c704 11681 char *bracket = NULL;
748a9306 11682 char funny = *s++;
6136c704 11683 register char *d = dest;
0b3da58d 11684 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11685
7918f24d
NC
11686 PERL_ARGS_ASSERT_SCAN_IDENT;
11687
a0d0e21e 11688 if (isSPACE(*s))
29595ff2 11689 s = PEEKSPACE(s);
de3bb511 11690 if (isDIGIT(*s)) {
8903cb82 11691 while (isDIGIT(*s)) {
11692 if (d >= e)
cea2e8a9 11693 Perl_croak(aTHX_ ident_too_long);
378cc40b 11694 *d++ = *s++;
8903cb82 11695 }
378cc40b
LW
11696 }
11697 else {
463ee0b2 11698 for (;;) {
8903cb82 11699 if (d >= e)
cea2e8a9 11700 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11701 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11702 *d++ = *s++;
7e2040f0 11703 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11704 *d++ = ':';
11705 *d++ = ':';
11706 s++;
11707 }
a0d0e21e 11708 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11709 *d++ = *s++;
11710 *d++ = *s++;
11711 }
fd400ab9 11712 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11713 char *t = s + UTF8SKIP(s);
fd400ab9 11714 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11715 t += UTF8SKIP(t);
11716 if (d + (t - s) > e)
cea2e8a9 11717 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11718 Copy(s, d, t - s, char);
11719 d += t - s;
11720 s = t;
11721 }
463ee0b2
LW
11722 else
11723 break;
11724 }
378cc40b
LW
11725 }
11726 *d = '\0';
11727 d = dest;
79072805 11728 if (*d) {
3280af22
NIS
11729 if (PL_lex_state != LEX_NORMAL)
11730 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11731 return s;
378cc40b 11732 }
748a9306 11733 if (*s == '$' && s[1] &&
3792a11b 11734 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11735 {
4810e5ec 11736 return s;
5cd24f17 11737 }
79072805
LW
11738 if (*s == '{') {
11739 bracket = s;
11740 s++;
11741 }
11742 else if (ck_uni)
11743 check_uni();
93a17b20 11744 if (s < send)
79072805
LW
11745 *d = *s++;
11746 d[1] = '\0';
2b92dfce 11747 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11748 *d = toCTRL(*s);
11749 s++;
de3bb511 11750 }
79072805 11751 if (bracket) {
748a9306 11752 if (isSPACE(s[-1])) {
fa83b5b6 11753 while (s < send) {
f54cb97a 11754 const char ch = *s++;
bf4acbe4 11755 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11756 *d = ch;
11757 break;
11758 }
11759 }
748a9306 11760 }
7e2040f0 11761 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11762 d++;
a0ed51b3 11763 if (UTF) {
6136c704
AL
11764 char *end = s;
11765 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11766 end += UTF8SKIP(end);
11767 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11768 end += UTF8SKIP(end);
a0ed51b3 11769 }
6136c704
AL
11770 Copy(s, d, end - s, char);
11771 d += end - s;
11772 s = end;
a0ed51b3
LW
11773 }
11774 else {
2b92dfce 11775 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11776 *d++ = *s++;
2b92dfce 11777 if (d >= e)
cea2e8a9 11778 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11779 }
79072805 11780 *d = '\0';
c35e046a
AL
11781 while (s < send && SPACE_OR_TAB(*s))
11782 s++;
ff68c719 11783 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11784 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11785 const char * const brack =
11786 (const char *)
11787 ((*s == '[') ? "[...]" : "{...}");
9014280d 11788 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11789 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11790 funny, dest, brack, funny, dest, brack);
11791 }
79072805 11792 bracket++;
a0be28da 11793 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11794 return s;
11795 }
4e553d73
NIS
11796 }
11797 /* Handle extended ${^Foo} variables
2b92dfce
GS
11798 * 1999-02-27 mjd-perl-patch@plover.com */
11799 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11800 && isALNUM(*s))
11801 {
11802 d++;
11803 while (isALNUM(*s) && d < e) {
11804 *d++ = *s++;
11805 }
11806 if (d >= e)
cea2e8a9 11807 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11808 *d = '\0';
79072805
LW
11809 }
11810 if (*s == '}') {
11811 s++;
7df0d042 11812 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11813 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11814 PL_expect = XREF;
11815 }
d008e5eb 11816 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11817 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11818 (keyword(dest, d - dest, 0)
11819 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11820 {
c35e046a
AL
11821 if (funny == '#')
11822 funny = '@';
9014280d 11823 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11824 "Ambiguous use of %c{%s} resolved to %c%s",
11825 funny, dest, funny, dest);
11826 }
11827 }
79072805
LW
11828 }
11829 else {
11830 s = bracket; /* let the parser handle it */
93a17b20 11831 *dest = '\0';
79072805
LW
11832 }
11833 }
3280af22
NIS
11834 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11835 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11836 return s;
11837}
11838
879d0c72
NC
11839static U32
11840S_pmflag(U32 pmfl, const char ch) {
11841 switch (ch) {
11842 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11843 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11844 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11845 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11846 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11847 }
11848 return pmfl;
11849}
11850
76e3520e 11851STATIC char *
cea2e8a9 11852S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11853{
97aff369 11854 dVAR;
79072805 11855 PMOP *pm;
5db06880 11856 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11857 const char * const valid_flags =
a20207d7 11858 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11859#ifdef PERL_MAD
11860 char *modstart;
11861#endif
11862
7918f24d 11863 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11864
25c09cbf 11865 if (!s) {
6136c704 11866 const char * const delimiter = skipspace(start);
10edeb5d
JH
11867 Perl_croak(aTHX_
11868 (const char *)
11869 (*delimiter == '?'
11870 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11871 : "Search pattern not terminated" ));
25c09cbf 11872 }
bbce6d69 11873
8782bef2 11874 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11875 if (PL_multi_open == '?') {
11876 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11877 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11878
11879 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11880 allows us to restrict the list needed by reset to just the ??
11881 matches. */
11882 assert(type != OP_TRANS);
11883 if (PL_curstash) {
daba3364 11884 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11885 U32 elements;
11886 if (!mg) {
daba3364 11887 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11888 0);
11889 }
11890 elements = mg->mg_len / sizeof(PMOP**);
11891 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11892 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11893 mg->mg_len = elements * sizeof(PMOP**);
11894 PmopSTASH_set(pm,PL_curstash);
11895 }
11896 }
5db06880
NC
11897#ifdef PERL_MAD
11898 modstart = s;
11899#endif
6136c704 11900 while (*s && strchr(valid_flags, *s))
879d0c72 11901 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
5db06880
NC
11902#ifdef PERL_MAD
11903 if (PL_madskills && modstart != s) {
11904 SV* tmptoken = newSVpvn(modstart, s - modstart);
11905 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11906 }
11907#endif
4ac733c9 11908 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11909 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11910 {
a2a5de95
NC
11911 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11912 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11913 }
11914
3280af22 11915 PL_lex_op = (OP*)pm;
6154021b 11916 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11917 return s;
11918}
11919
76e3520e 11920STATIC char *
cea2e8a9 11921S_scan_subst(pTHX_ char *start)
79072805 11922{
27da23d5 11923 dVAR;
a0d0e21e 11924 register char *s;
79072805 11925 register PMOP *pm;
4fdae800 11926 I32 first_start;
79072805 11927 I32 es = 0;
5db06880
NC
11928#ifdef PERL_MAD
11929 char *modstart;
11930#endif
79072805 11931
7918f24d
NC
11932 PERL_ARGS_ASSERT_SCAN_SUBST;
11933
6154021b 11934 pl_yylval.ival = OP_NULL;
79072805 11935
5db06880 11936 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11937
37fd879b 11938 if (!s)
cea2e8a9 11939 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11940
3280af22 11941 if (s[-1] == PL_multi_open)
79072805 11942 s--;
5db06880
NC
11943#ifdef PERL_MAD
11944 if (PL_madskills) {
cd81e915
NC
11945 CURMAD('q', PL_thisopen);
11946 CURMAD('_', PL_thiswhite);
11947 CURMAD('E', PL_thisstuff);
11948 CURMAD('Q', PL_thisclose);
11949 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11950 }
11951#endif
79072805 11952
3280af22 11953 first_start = PL_multi_start;
5db06880 11954 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11955 if (!s) {
37fd879b 11956 if (PL_lex_stuff) {
3280af22 11957 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11958 PL_lex_stuff = NULL;
37fd879b 11959 }
cea2e8a9 11960 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11961 }
3280af22 11962 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11963
79072805 11964 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11965
11966#ifdef PERL_MAD
11967 if (PL_madskills) {
cd81e915
NC
11968 CURMAD('z', PL_thisopen);
11969 CURMAD('R', PL_thisstuff);
11970 CURMAD('Z', PL_thisclose);
5db06880
NC
11971 }
11972 modstart = s;
11973#endif
11974
48c036b1 11975 while (*s) {
a20207d7 11976 if (*s == EXEC_PAT_MOD) {
a687059c 11977 s++;
2f3197b3 11978 es++;
a687059c 11979 }
a20207d7 11980 else if (strchr(S_PAT_MODS, *s))
879d0c72 11981 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
48c036b1
GS
11982 else
11983 break;
378cc40b 11984 }
79072805 11985
5db06880
NC
11986#ifdef PERL_MAD
11987 if (PL_madskills) {
11988 if (modstart != s)
11989 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
11990 append_madprops(PL_thismad, (OP*)pm, 0);
11991 PL_thismad = 0;
5db06880
NC
11992 }
11993#endif
a2a5de95
NC
11994 if ((pm->op_pmflags & PMf_CONTINUE)) {
11995 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
11996 }
11997
79072805 11998 if (es) {
6136c704
AL
11999 SV * const repl = newSVpvs("");
12000
0244c3a4
GS
12001 PL_sublex_info.super_bufptr = s;
12002 PL_sublex_info.super_bufend = PL_bufend;
12003 PL_multi_end = 0;
79072805 12004 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12005 while (es-- > 0) {
12006 if (es)
12007 sv_catpvs(repl, "eval ");
12008 else
12009 sv_catpvs(repl, "do ");
12010 }
6f43d98f 12011 sv_catpvs(repl, "{");
3280af22 12012 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12013 if (strchr(SvPVX(PL_lex_repl), '#'))
12014 sv_catpvs(repl, "\n");
12015 sv_catpvs(repl, "}");
25da4f38 12016 SvEVALED_on(repl);
3280af22
NIS
12017 SvREFCNT_dec(PL_lex_repl);
12018 PL_lex_repl = repl;
378cc40b 12019 }
79072805 12020
3280af22 12021 PL_lex_op = (OP*)pm;
6154021b 12022 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12023 return s;
12024}
12025
76e3520e 12026STATIC char *
cea2e8a9 12027S_scan_trans(pTHX_ char *start)
378cc40b 12028{
97aff369 12029 dVAR;
a0d0e21e 12030 register char* s;
11343788 12031 OP *o;
79072805 12032 short *tbl;
b84c11c8
NC
12033 U8 squash;
12034 U8 del;
12035 U8 complement;
5db06880
NC
12036#ifdef PERL_MAD
12037 char *modstart;
12038#endif
79072805 12039
7918f24d
NC
12040 PERL_ARGS_ASSERT_SCAN_TRANS;
12041
6154021b 12042 pl_yylval.ival = OP_NULL;
79072805 12043
5db06880 12044 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12045 if (!s)
cea2e8a9 12046 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12047
3280af22 12048 if (s[-1] == PL_multi_open)
2f3197b3 12049 s--;
5db06880
NC
12050#ifdef PERL_MAD
12051 if (PL_madskills) {
cd81e915
NC
12052 CURMAD('q', PL_thisopen);
12053 CURMAD('_', PL_thiswhite);
12054 CURMAD('E', PL_thisstuff);
12055 CURMAD('Q', PL_thisclose);
12056 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12057 }
12058#endif
2f3197b3 12059
5db06880 12060 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12061 if (!s) {
37fd879b 12062 if (PL_lex_stuff) {
3280af22 12063 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12064 PL_lex_stuff = NULL;
37fd879b 12065 }
cea2e8a9 12066 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12067 }
5db06880 12068 if (PL_madskills) {
cd81e915
NC
12069 CURMAD('z', PL_thisopen);
12070 CURMAD('R', PL_thisstuff);
12071 CURMAD('Z', PL_thisclose);
5db06880 12072 }
79072805 12073
a0ed51b3 12074 complement = del = squash = 0;
5db06880
NC
12075#ifdef PERL_MAD
12076 modstart = s;
12077#endif
7a1e2023
NC
12078 while (1) {
12079 switch (*s) {
12080 case 'c':
79072805 12081 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12082 break;
12083 case 'd':
a0ed51b3 12084 del = OPpTRANS_DELETE;
7a1e2023
NC
12085 break;
12086 case 's':
79072805 12087 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12088 break;
12089 default:
12090 goto no_more;
12091 }
395c3793
LW
12092 s++;
12093 }
7a1e2023 12094 no_more:
8973db79 12095
aa1f7c5b 12096 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12097 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12098 o->op_private &= ~OPpTRANS_ALL;
12099 o->op_private |= del|squash|complement|
7948272d
NIS
12100 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12101 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12102
3280af22 12103 PL_lex_op = o;
6154021b 12104 pl_yylval.ival = OP_TRANS;
5db06880
NC
12105
12106#ifdef PERL_MAD
12107 if (PL_madskills) {
12108 if (modstart != s)
12109 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12110 append_madprops(PL_thismad, o, 0);
12111 PL_thismad = 0;
5db06880
NC
12112 }
12113#endif
12114
79072805
LW
12115 return s;
12116}
12117
76e3520e 12118STATIC char *
cea2e8a9 12119S_scan_heredoc(pTHX_ register char *s)
79072805 12120{
97aff369 12121 dVAR;
79072805
LW
12122 SV *herewas;
12123 I32 op_type = OP_SCALAR;
12124 I32 len;
12125 SV *tmpstr;
12126 char term;
73d840c0 12127 const char *found_newline;
79072805 12128 register char *d;
fc36a67e 12129 register char *e;
4633a7c4 12130 char *peek;
f54cb97a 12131 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12132#ifdef PERL_MAD
12133 I32 stuffstart = s - SvPVX(PL_linestr);
12134 char *tstart;
12135
cd81e915 12136 PL_realtokenstart = -1;
5db06880 12137#endif
79072805 12138
7918f24d
NC
12139 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12140
79072805 12141 s += 2;
3280af22
NIS
12142 d = PL_tokenbuf;
12143 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12144 if (!outer)
79072805 12145 *d++ = '\n';
c35e046a
AL
12146 peek = s;
12147 while (SPACE_OR_TAB(*peek))
12148 peek++;
3792a11b 12149 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12150 s = peek;
79072805 12151 term = *s++;
3280af22 12152 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12153 d += len;
3280af22 12154 if (s < PL_bufend)
79072805 12155 s++;
79072805
LW
12156 }
12157 else {
12158 if (*s == '\\')
12159 s++, term = '\'';
12160 else
12161 term = '"';
7e2040f0 12162 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12163 deprecate("bare << to mean <<\"\"");
7e2040f0 12164 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12165 if (d < e)
12166 *d++ = *s;
12167 }
12168 }
3280af22 12169 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12170 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12171 *d++ = '\n';
12172 *d = '\0';
3280af22 12173 len = d - PL_tokenbuf;
5db06880
NC
12174
12175#ifdef PERL_MAD
12176 if (PL_madskills) {
12177 tstart = PL_tokenbuf + !outer;
cd81e915 12178 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12179 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12180 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12181 stuffstart = s - SvPVX(PL_linestr);
12182 }
12183#endif
6a27c188 12184#ifndef PERL_STRICT_CR
f63a84b2
LW
12185 d = strchr(s, '\r');
12186 if (d) {
b464bac0 12187 char * const olds = s;
f63a84b2 12188 s = d;
3280af22 12189 while (s < PL_bufend) {
f63a84b2
LW
12190 if (*s == '\r') {
12191 *d++ = '\n';
12192 if (*++s == '\n')
12193 s++;
12194 }
12195 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12196 *d++ = *s++;
12197 s++;
12198 }
12199 else
12200 *d++ = *s++;
12201 }
12202 *d = '\0';
3280af22 12203 PL_bufend = d;
95a20fc0 12204 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12205 s = olds;
12206 }
12207#endif
5db06880
NC
12208#ifdef PERL_MAD
12209 found_newline = 0;
12210#endif
10edeb5d 12211 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12212 herewas = newSVpvn(s,PL_bufend-s);
12213 }
12214 else {
5db06880
NC
12215#ifdef PERL_MAD
12216 herewas = newSVpvn(s-1,found_newline-s+1);
12217#else
73d840c0
AL
12218 s--;
12219 herewas = newSVpvn(s,found_newline-s);
5db06880 12220#endif
73d840c0 12221 }
5db06880
NC
12222#ifdef PERL_MAD
12223 if (PL_madskills) {
12224 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12225 if (PL_thisstuff)
12226 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12227 else
cd81e915 12228 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12229 }
12230#endif
79072805 12231 s += SvCUR(herewas);
748a9306 12232
5db06880
NC
12233#ifdef PERL_MAD
12234 stuffstart = s - SvPVX(PL_linestr);
12235
12236 if (found_newline)
12237 s--;
12238#endif
12239
7d0a29fe
NC
12240 tmpstr = newSV_type(SVt_PVIV);
12241 SvGROW(tmpstr, 80);
748a9306 12242 if (term == '\'') {
79072805 12243 op_type = OP_CONST;
45977657 12244 SvIV_set(tmpstr, -1);
748a9306
LW
12245 }
12246 else if (term == '`') {
79072805 12247 op_type = OP_BACKTICK;
45977657 12248 SvIV_set(tmpstr, '\\');
748a9306 12249 }
79072805
LW
12250
12251 CLINE;
57843af0 12252 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12253 PL_multi_open = PL_multi_close = '<';
12254 term = *PL_tokenbuf;
0244c3a4 12255 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12256 char * const bufptr = PL_sublex_info.super_bufptr;
12257 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12258 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12259 s = strchr(bufptr, '\n');
12260 if (!s)
12261 s = bufend;
12262 d = s;
12263 while (s < bufend &&
12264 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12265 if (*s++ == '\n')
57843af0 12266 CopLINE_inc(PL_curcop);
0244c3a4
GS
12267 }
12268 if (s >= bufend) {
eb160463 12269 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12270 missingterm(PL_tokenbuf);
12271 }
12272 sv_setpvn(herewas,bufptr,d-bufptr+1);
12273 sv_setpvn(tmpstr,d+1,s-d);
12274 s += len - 1;
12275 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12276 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12277
12278 s = olds;
12279 goto retval;
12280 }
12281 else if (!outer) {
79072805 12282 d = s;
3280af22
NIS
12283 while (s < PL_bufend &&
12284 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12285 if (*s++ == '\n')
57843af0 12286 CopLINE_inc(PL_curcop);
79072805 12287 }
3280af22 12288 if (s >= PL_bufend) {
eb160463 12289 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12290 missingterm(PL_tokenbuf);
79072805
LW
12291 }
12292 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12293#ifdef PERL_MAD
12294 if (PL_madskills) {
cd81e915
NC
12295 if (PL_thisstuff)
12296 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12297 else
cd81e915 12298 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12299 stuffstart = s - SvPVX(PL_linestr);
12300 }
12301#endif
79072805 12302 s += len - 1;
57843af0 12303 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12304
3280af22
NIS
12305 sv_catpvn(herewas,s,PL_bufend-s);
12306 sv_setsv(PL_linestr,herewas);
12307 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12308 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12309 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12310 }
12311 else
76f68e9b 12312 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12313 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12314#ifdef PERL_MAD
12315 if (PL_madskills) {
12316 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12317 if (PL_thisstuff)
12318 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12319 else
cd81e915 12320 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12321 }
12322#endif
f0e67a1d 12323 PL_bufptr = s;
17cc9359 12324 CopLINE_inc(PL_curcop);
f0e67a1d 12325 if (!outer || !lex_next_chunk(0)) {
eb160463 12326 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12327 missingterm(PL_tokenbuf);
79072805 12328 }
17cc9359 12329 CopLINE_dec(PL_curcop);
f0e67a1d 12330 s = PL_bufptr;
5db06880
NC
12331#ifdef PERL_MAD
12332 stuffstart = s - SvPVX(PL_linestr);
12333#endif
57843af0 12334 CopLINE_inc(PL_curcop);
3280af22 12335 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12336 PL_last_lop = PL_last_uni = NULL;
6a27c188 12337#ifndef PERL_STRICT_CR
3280af22 12338 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12339 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12340 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12341 {
3280af22
NIS
12342 PL_bufend[-2] = '\n';
12343 PL_bufend--;
95a20fc0 12344 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12345 }
3280af22
NIS
12346 else if (PL_bufend[-1] == '\r')
12347 PL_bufend[-1] = '\n';
f63a84b2 12348 }
3280af22
NIS
12349 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12350 PL_bufend[-1] = '\n';
f63a84b2 12351#endif
3280af22 12352 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12353 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12354 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12355 sv_catsv(PL_linestr,herewas);
12356 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12357 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12358 }
12359 else {
3280af22
NIS
12360 s = PL_bufend;
12361 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12362 }
12363 }
79072805 12364 s++;
0244c3a4 12365retval:
57843af0 12366 PL_multi_end = CopLINE(PL_curcop);
79072805 12367 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12368 SvPV_shrink_to_cur(tmpstr);
79072805 12369 }
8990e307 12370 SvREFCNT_dec(herewas);
2f31ce75 12371 if (!IN_BYTES) {
95a20fc0 12372 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12373 SvUTF8_on(tmpstr);
12374 else if (PL_encoding)
12375 sv_recode_to_utf8(tmpstr, PL_encoding);
12376 }
3280af22 12377 PL_lex_stuff = tmpstr;
6154021b 12378 pl_yylval.ival = op_type;
79072805
LW
12379 return s;
12380}
12381
02aa26ce
NT
12382/* scan_inputsymbol
12383 takes: current position in input buffer
12384 returns: new position in input buffer
6154021b 12385 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12386
12387 This code handles:
12388
12389 <> read from ARGV
12390 <FH> read from filehandle
12391 <pkg::FH> read from package qualified filehandle
12392 <pkg'FH> read from package qualified filehandle
12393 <$fh> read from filehandle in $fh
12394 <*.h> filename glob
12395
12396*/
12397
76e3520e 12398STATIC char *
cea2e8a9 12399S_scan_inputsymbol(pTHX_ char *start)
79072805 12400{
97aff369 12401 dVAR;
02aa26ce 12402 register char *s = start; /* current position in buffer */
1b420867 12403 char *end;
79072805 12404 I32 len;
6136c704
AL
12405 char *d = PL_tokenbuf; /* start of temp holding space */
12406 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12407
7918f24d
NC
12408 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12409
1b420867
GS
12410 end = strchr(s, '\n');
12411 if (!end)
12412 end = PL_bufend;
12413 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12414
12415 /* die if we didn't have space for the contents of the <>,
1b420867 12416 or if it didn't end, or if we see a newline
02aa26ce
NT
12417 */
12418
bb7a0f54 12419 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12420 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12421 if (s >= end)
cea2e8a9 12422 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12423
fc36a67e 12424 s++;
02aa26ce
NT
12425
12426 /* check for <$fh>
12427 Remember, only scalar variables are interpreted as filehandles by
12428 this code. Anything more complex (e.g., <$fh{$num}>) will be
12429 treated as a glob() call.
12430 This code makes use of the fact that except for the $ at the front,
12431 a scalar variable and a filehandle look the same.
12432 */
4633a7c4 12433 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12434
12435 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12436 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12437 d++;
02aa26ce
NT
12438
12439 /* If we've tried to read what we allow filehandles to look like, and
12440 there's still text left, then it must be a glob() and not a getline.
12441 Use scan_str to pull out the stuff between the <> and treat it
12442 as nothing more than a string.
12443 */
12444
3280af22 12445 if (d - PL_tokenbuf != len) {
6154021b 12446 pl_yylval.ival = OP_GLOB;
5db06880 12447 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12448 if (!s)
cea2e8a9 12449 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12450 return s;
12451 }
395c3793 12452 else {
9b3023bc 12453 bool readline_overriden = FALSE;
6136c704 12454 GV *gv_readline;
9b3023bc 12455 GV **gvp;
02aa26ce 12456 /* we're in a filehandle read situation */
3280af22 12457 d = PL_tokenbuf;
02aa26ce
NT
12458
12459 /* turn <> into <ARGV> */
79072805 12460 if (!len)
689badd5 12461 Copy("ARGV",d,5,char);
02aa26ce 12462
9b3023bc 12463 /* Check whether readline() is overriden */
fafc274c 12464 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12465 if ((gv_readline
ba979b31 12466 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12467 ||
017a3ce5 12468 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12469 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12470 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12471 readline_overriden = TRUE;
12472
02aa26ce
NT
12473 /* if <$fh>, create the ops to turn the variable into a
12474 filehandle
12475 */
79072805 12476 if (*d == '$') {
02aa26ce
NT
12477 /* try to find it in the pad for this block, otherwise find
12478 add symbol table ops
12479 */
f8f98e0a 12480 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12481 if (tmp != NOT_IN_PAD) {
00b1698f 12482 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12483 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12484 HEK * const stashname = HvNAME_HEK(stash);
12485 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12486 sv_catpvs(sym, "::");
f558d5af
JH
12487 sv_catpv(sym, d+1);
12488 d = SvPVX(sym);
12489 goto intro_sym;
12490 }
12491 else {
6136c704 12492 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12493 o->op_targ = tmp;
9b3023bc
RGS
12494 PL_lex_op = readline_overriden
12495 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12496 append_elem(OP_LIST, o,
12497 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12498 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12499 }
a0d0e21e
LW
12500 }
12501 else {
f558d5af
JH
12502 GV *gv;
12503 ++d;
12504intro_sym:
12505 gv = gv_fetchpv(d,
12506 (PL_in_eval
12507 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12508 : GV_ADDMULTI),
f558d5af 12509 SVt_PV);
9b3023bc
RGS
12510 PL_lex_op = readline_overriden
12511 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12512 append_elem(OP_LIST,
12513 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12514 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12515 : (OP*)newUNOP(OP_READLINE, 0,
12516 newUNOP(OP_RV2SV, 0,
12517 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12518 }
7c6fadd6
RGS
12519 if (!readline_overriden)
12520 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12521 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12522 pl_yylval.ival = OP_NULL;
79072805 12523 }
02aa26ce
NT
12524
12525 /* If it's none of the above, it must be a literal filehandle
12526 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12527 else {
6136c704 12528 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12529 PL_lex_op = readline_overriden
12530 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12531 append_elem(OP_LIST,
12532 newGVOP(OP_GV, 0, gv),
12533 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12534 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12535 pl_yylval.ival = OP_NULL;
79072805
LW
12536 }
12537 }
02aa26ce 12538
79072805
LW
12539 return s;
12540}
12541
02aa26ce
NT
12542
12543/* scan_str
12544 takes: start position in buffer
09bef843
SB
12545 keep_quoted preserve \ on the embedded delimiter(s)
12546 keep_delims preserve the delimiters around the string
02aa26ce
NT
12547 returns: position to continue reading from buffer
12548 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12549 updates the read buffer.
12550
12551 This subroutine pulls a string out of the input. It is called for:
12552 q single quotes q(literal text)
12553 ' single quotes 'literal text'
12554 qq double quotes qq(interpolate $here please)
12555 " double quotes "interpolate $here please"
12556 qx backticks qx(/bin/ls -l)
12557 ` backticks `/bin/ls -l`
12558 qw quote words @EXPORT_OK = qw( func() $spam )
12559 m// regexp match m/this/
12560 s/// regexp substitute s/this/that/
12561 tr/// string transliterate tr/this/that/
12562 y/// string transliterate y/this/that/
12563 ($*@) sub prototypes sub foo ($)
09bef843 12564 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12565 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12566
12567 In most of these cases (all but <>, patterns and transliterate)
12568 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12569 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12570 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12571 calls scan_str().
4e553d73 12572
02aa26ce
NT
12573 It skips whitespace before the string starts, and treats the first
12574 character as the delimiter. If the delimiter is one of ([{< then
12575 the corresponding "close" character )]}> is used as the closing
12576 delimiter. It allows quoting of delimiters, and if the string has
12577 balanced delimiters ([{<>}]) it allows nesting.
12578
37fd879b
HS
12579 On success, the SV with the resulting string is put into lex_stuff or,
12580 if that is already non-NULL, into lex_repl. The second case occurs only
12581 when parsing the RHS of the special constructs s/// and tr/// (y///).
12582 For convenience, the terminating delimiter character is stuffed into
12583 SvIVX of the SV.
02aa26ce
NT
12584*/
12585
76e3520e 12586STATIC char *
09bef843 12587S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12588{
97aff369 12589 dVAR;
02aa26ce 12590 SV *sv; /* scalar value: string */
d3fcec1f 12591 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12592 register char *s = start; /* current position in the buffer */
12593 register char term; /* terminating character */
12594 register char *to; /* current position in the sv's data */
12595 I32 brackets = 1; /* bracket nesting level */
89491803 12596 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12597 I32 termcode; /* terminating char. code */
89ebb4a3 12598 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12599 STRLEN termlen; /* length of terminating string */
0331ef07 12600 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12601#ifdef PERL_MAD
12602 int stuffstart;
12603 char *tstart;
12604#endif
02aa26ce 12605
7918f24d
NC
12606 PERL_ARGS_ASSERT_SCAN_STR;
12607
02aa26ce 12608 /* skip space before the delimiter */
29595ff2
NC
12609 if (isSPACE(*s)) {
12610 s = PEEKSPACE(s);
12611 }
02aa26ce 12612
5db06880 12613#ifdef PERL_MAD
cd81e915
NC
12614 if (PL_realtokenstart >= 0) {
12615 stuffstart = PL_realtokenstart;
12616 PL_realtokenstart = -1;
5db06880
NC
12617 }
12618 else
12619 stuffstart = start - SvPVX(PL_linestr);
12620#endif
02aa26ce 12621 /* mark where we are, in case we need to report errors */
79072805 12622 CLINE;
02aa26ce
NT
12623
12624 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12625 term = *s;
220e2d4e
IH
12626 if (!UTF) {
12627 termcode = termstr[0] = term;
12628 termlen = 1;
12629 }
12630 else {
f3b9ce0f 12631 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12632 Copy(s, termstr, termlen, U8);
12633 if (!UTF8_IS_INVARIANT(term))
12634 has_utf8 = TRUE;
12635 }
b1c7b182 12636
02aa26ce 12637 /* mark where we are */
57843af0 12638 PL_multi_start = CopLINE(PL_curcop);
3280af22 12639 PL_multi_open = term;
02aa26ce
NT
12640
12641 /* find corresponding closing delimiter */
93a17b20 12642 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12643 termcode = termstr[0] = term = tmps[5];
12644
3280af22 12645 PL_multi_close = term;
79072805 12646
561b68a9
SH
12647 /* create a new SV to hold the contents. 79 is the SV's initial length.
12648 What a random number. */
7d0a29fe
NC
12649 sv = newSV_type(SVt_PVIV);
12650 SvGROW(sv, 80);
45977657 12651 SvIV_set(sv, termcode);
a0d0e21e 12652 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12653
12654 /* move past delimiter and try to read a complete string */
09bef843 12655 if (keep_delims)
220e2d4e
IH
12656 sv_catpvn(sv, s, termlen);
12657 s += termlen;
5db06880
NC
12658#ifdef PERL_MAD
12659 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12660 if (!PL_thisopen && !keep_delims) {
12661 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12662 stuffstart = s - SvPVX(PL_linestr);
12663 }
12664#endif
93a17b20 12665 for (;;) {
220e2d4e
IH
12666 if (PL_encoding && !UTF) {
12667 bool cont = TRUE;
12668
12669 while (cont) {
95a20fc0 12670 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12671 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12672 &offset, (char*)termstr, termlen);
6136c704
AL
12673 const char * const ns = SvPVX_const(PL_linestr) + offset;
12674 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12675
12676 for (; s < ns; s++) {
12677 if (*s == '\n' && !PL_rsfp)
12678 CopLINE_inc(PL_curcop);
12679 }
12680 if (!found)
12681 goto read_more_line;
12682 else {
12683 /* handle quoted delimiters */
52327caf 12684 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12685 const char *t;
95a20fc0 12686 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12687 t--;
12688 if ((svlast-1 - t) % 2) {
12689 if (!keep_quoted) {
12690 *(svlast-1) = term;
12691 *svlast = '\0';
12692 SvCUR_set(sv, SvCUR(sv) - 1);
12693 }
12694 continue;
12695 }
12696 }
12697 if (PL_multi_open == PL_multi_close) {
12698 cont = FALSE;
12699 }
12700 else {
f54cb97a
AL
12701 const char *t;
12702 char *w;
0331ef07 12703 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12704 /* At here, all closes are "was quoted" one,
12705 so we don't check PL_multi_close. */
12706 if (*t == '\\') {
12707 if (!keep_quoted && *(t+1) == PL_multi_open)
12708 t++;
12709 else
12710 *w++ = *t++;
12711 }
12712 else if (*t == PL_multi_open)
12713 brackets++;
12714
12715 *w = *t;
12716 }
12717 if (w < t) {
12718 *w++ = term;
12719 *w = '\0';
95a20fc0 12720 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12721 }
0331ef07 12722 last_off = w - SvPVX(sv);
220e2d4e
IH
12723 if (--brackets <= 0)
12724 cont = FALSE;
12725 }
12726 }
12727 }
12728 if (!keep_delims) {
12729 SvCUR_set(sv, SvCUR(sv) - 1);
12730 *SvEND(sv) = '\0';
12731 }
12732 break;
12733 }
12734
02aa26ce 12735 /* extend sv if need be */
3280af22 12736 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12737 /* set 'to' to the next character in the sv's string */
463ee0b2 12738 to = SvPVX(sv)+SvCUR(sv);
09bef843 12739
02aa26ce 12740 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12741 if (PL_multi_open == PL_multi_close) {
12742 for (; s < PL_bufend; s++,to++) {
02aa26ce 12743 /* embedded newlines increment the current line number */
3280af22 12744 if (*s == '\n' && !PL_rsfp)
57843af0 12745 CopLINE_inc(PL_curcop);
02aa26ce 12746 /* handle quoted delimiters */
3280af22 12747 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12748 if (!keep_quoted && s[1] == term)
a0d0e21e 12749 s++;
02aa26ce 12750 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12751 else
12752 *to++ = *s++;
12753 }
02aa26ce
NT
12754 /* terminate when run out of buffer (the for() condition), or
12755 have found the terminator */
220e2d4e
IH
12756 else if (*s == term) {
12757 if (termlen == 1)
12758 break;
f3b9ce0f 12759 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12760 break;
12761 }
63cd0674 12762 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12763 has_utf8 = TRUE;
93a17b20
LW
12764 *to = *s;
12765 }
12766 }
02aa26ce
NT
12767
12768 /* if the terminator isn't the same as the start character (e.g.,
12769 matched brackets), we have to allow more in the quoting, and
12770 be prepared for nested brackets.
12771 */
93a17b20 12772 else {
02aa26ce 12773 /* read until we run out of string, or we find the terminator */
3280af22 12774 for (; s < PL_bufend; s++,to++) {
02aa26ce 12775 /* embedded newlines increment the line count */
3280af22 12776 if (*s == '\n' && !PL_rsfp)
57843af0 12777 CopLINE_inc(PL_curcop);
02aa26ce 12778 /* backslashes can escape the open or closing characters */
3280af22 12779 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12780 if (!keep_quoted &&
12781 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12782 s++;
12783 else
12784 *to++ = *s++;
12785 }
02aa26ce 12786 /* allow nested opens and closes */
3280af22 12787 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12788 break;
3280af22 12789 else if (*s == PL_multi_open)
93a17b20 12790 brackets++;
63cd0674 12791 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12792 has_utf8 = TRUE;
93a17b20
LW
12793 *to = *s;
12794 }
12795 }
02aa26ce 12796 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12797 *to = '\0';
95a20fc0 12798 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12799
02aa26ce
NT
12800 /*
12801 * this next chunk reads more into the buffer if we're not done yet
12802 */
12803
b1c7b182
GS
12804 if (s < PL_bufend)
12805 break; /* handle case where we are done yet :-) */
79072805 12806
6a27c188 12807#ifndef PERL_STRICT_CR
95a20fc0 12808 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12809 if ((to[-2] == '\r' && to[-1] == '\n') ||
12810 (to[-2] == '\n' && to[-1] == '\r'))
12811 {
f63a84b2
LW
12812 to[-2] = '\n';
12813 to--;
95a20fc0 12814 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12815 }
12816 else if (to[-1] == '\r')
12817 to[-1] = '\n';
12818 }
95a20fc0 12819 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12820 to[-1] = '\n';
12821#endif
12822
220e2d4e 12823 read_more_line:
02aa26ce
NT
12824 /* if we're out of file, or a read fails, bail and reset the current
12825 line marker so we can report where the unterminated string began
12826 */
5db06880
NC
12827#ifdef PERL_MAD
12828 if (PL_madskills) {
c35e046a 12829 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12830 if (PL_thisstuff)
12831 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12832 else
cd81e915 12833 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12834 }
12835#endif
f0e67a1d
Z
12836 CopLINE_inc(PL_curcop);
12837 PL_bufptr = PL_bufend;
12838 if (!lex_next_chunk(0)) {
c07a80fd 12839 sv_free(sv);
eb160463 12840 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12841 return NULL;
79072805 12842 }
f0e67a1d 12843 s = PL_bufptr;
5db06880
NC
12844#ifdef PERL_MAD
12845 stuffstart = 0;
12846#endif
378cc40b 12847 }
4e553d73 12848
02aa26ce
NT
12849 /* at this point, we have successfully read the delimited string */
12850
220e2d4e 12851 if (!PL_encoding || UTF) {
5db06880
NC
12852#ifdef PERL_MAD
12853 if (PL_madskills) {
c35e046a 12854 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12855 const int len = s - tstart;
cd81e915 12856 if (PL_thisstuff)
c35e046a 12857 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12858 else
c35e046a 12859 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12860 if (!PL_thisclose && !keep_delims)
12861 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12862 }
12863#endif
12864
220e2d4e
IH
12865 if (keep_delims)
12866 sv_catpvn(sv, s, termlen);
12867 s += termlen;
12868 }
5db06880
NC
12869#ifdef PERL_MAD
12870 else {
12871 if (PL_madskills) {
c35e046a
AL
12872 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12873 const int len = s - tstart - termlen;
cd81e915 12874 if (PL_thisstuff)
c35e046a 12875 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12876 else
c35e046a 12877 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12878 if (!PL_thisclose && !keep_delims)
12879 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12880 }
12881 }
12882#endif
220e2d4e 12883 if (has_utf8 || PL_encoding)
b1c7b182 12884 SvUTF8_on(sv);
d0063567 12885
57843af0 12886 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12887
12888 /* if we allocated too much space, give some back */
93a17b20
LW
12889 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12890 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12891 SvPV_renew(sv, SvLEN(sv));
79072805 12892 }
02aa26ce
NT
12893
12894 /* decide whether this is the first or second quoted string we've read
12895 for this op
12896 */
4e553d73 12897
3280af22
NIS
12898 if (PL_lex_stuff)
12899 PL_lex_repl = sv;
79072805 12900 else
3280af22 12901 PL_lex_stuff = sv;
378cc40b
LW
12902 return s;
12903}
12904
02aa26ce
NT
12905/*
12906 scan_num
12907 takes: pointer to position in buffer
12908 returns: pointer to new position in buffer
6154021b 12909 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12910
12911 Read a number in any of the formats that Perl accepts:
12912
7fd134d9
JH
12913 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12914 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12915 0b[01](_?[01])*
12916 0[0-7](_?[0-7])*
12917 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12918
3280af22 12919 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12920 thing it reads.
12921
12922 If it reads a number without a decimal point or an exponent, it will
12923 try converting the number to an integer and see if it can do so
12924 without loss of precision.
12925*/
4e553d73 12926
378cc40b 12927char *
bfed75c6 12928Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12929{
97aff369 12930 dVAR;
bfed75c6 12931 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12932 register char *d; /* destination in temp buffer */
12933 register char *e; /* end of temp buffer */
86554af2 12934 NV nv; /* number read, as a double */
a0714e2c 12935 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12936 bool floatit; /* boolean: int or float? */
cbbf8932 12937 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12938 static char const number_too_long[] = "Number too long";
378cc40b 12939
7918f24d
NC
12940 PERL_ARGS_ASSERT_SCAN_NUM;
12941
02aa26ce
NT
12942 /* We use the first character to decide what type of number this is */
12943
378cc40b 12944 switch (*s) {
79072805 12945 default:
cea2e8a9 12946 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12947
02aa26ce 12948 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12949 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12950 case '0':
12951 {
02aa26ce
NT
12952 /* variables:
12953 u holds the "number so far"
4f19785b
WSI
12954 shift the power of 2 of the base
12955 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12956 overflowed was the number more than we can hold?
12957
12958 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12959 we in octal/hex/binary?" indicator to disallow hex characters
12960 when in octal mode.
02aa26ce 12961 */
9e24b6e2
JH
12962 NV n = 0.0;
12963 UV u = 0;
79072805 12964 I32 shift;
9e24b6e2 12965 bool overflowed = FALSE;
61f33854 12966 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12967 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12968 static const char* const bases[5] =
12969 { "", "binary", "", "octal", "hexadecimal" };
12970 static const char* const Bases[5] =
12971 { "", "Binary", "", "Octal", "Hexadecimal" };
12972 static const char* const maxima[5] =
12973 { "",
12974 "0b11111111111111111111111111111111",
12975 "",
12976 "037777777777",
12977 "0xffffffff" };
bfed75c6 12978 const char *base, *Base, *max;
378cc40b 12979
02aa26ce 12980 /* check for hex */
378cc40b
LW
12981 if (s[1] == 'x') {
12982 shift = 4;
12983 s += 2;
61f33854 12984 just_zero = FALSE;
4f19785b
WSI
12985 } else if (s[1] == 'b') {
12986 shift = 1;
12987 s += 2;
61f33854 12988 just_zero = FALSE;
378cc40b 12989 }
02aa26ce 12990 /* check for a decimal in disguise */
b78218b7 12991 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 12992 goto decimal;
02aa26ce 12993 /* so it must be octal */
928753ea 12994 else {
378cc40b 12995 shift = 3;
928753ea
JH
12996 s++;
12997 }
12998
12999 if (*s == '_') {
a2a5de95 13000 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13001 "Misplaced _ in number");
13002 lastub = s++;
13003 }
9e24b6e2
JH
13004
13005 base = bases[shift];
13006 Base = Bases[shift];
13007 max = maxima[shift];
02aa26ce 13008
4f19785b 13009 /* read the rest of the number */
378cc40b 13010 for (;;) {
9e24b6e2 13011 /* x is used in the overflow test,
893fe2c2 13012 b is the digit we're adding on. */
9e24b6e2 13013 UV x, b;
55497cff 13014
378cc40b 13015 switch (*s) {
02aa26ce
NT
13016
13017 /* if we don't mention it, we're done */
378cc40b
LW
13018 default:
13019 goto out;
02aa26ce 13020
928753ea 13021 /* _ are ignored -- but warned about if consecutive */
de3bb511 13022 case '_':
a2a5de95
NC
13023 if (lastub && s == lastub + 1)
13024 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13025 "Misplaced _ in number");
928753ea 13026 lastub = s++;
de3bb511 13027 break;
02aa26ce
NT
13028
13029 /* 8 and 9 are not octal */
378cc40b 13030 case '8': case '9':
4f19785b 13031 if (shift == 3)
cea2e8a9 13032 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13033 /* FALL THROUGH */
02aa26ce
NT
13034
13035 /* octal digits */
4f19785b 13036 case '2': case '3': case '4':
378cc40b 13037 case '5': case '6': case '7':
4f19785b 13038 if (shift == 1)
cea2e8a9 13039 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13040 /* FALL THROUGH */
13041
13042 case '0': case '1':
02aa26ce 13043 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13044 goto digit;
02aa26ce
NT
13045
13046 /* hex digits */
378cc40b
LW
13047 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13048 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13049 /* make sure they said 0x */
378cc40b
LW
13050 if (shift != 4)
13051 goto out;
55497cff 13052 b = (*s++ & 7) + 9;
02aa26ce
NT
13053
13054 /* Prepare to put the digit we have onto the end
13055 of the number so far. We check for overflows.
13056 */
13057
55497cff 13058 digit:
61f33854 13059 just_zero = FALSE;
9e24b6e2
JH
13060 if (!overflowed) {
13061 x = u << shift; /* make room for the digit */
13062
13063 if ((x >> shift) != u
13064 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13065 overflowed = TRUE;
13066 n = (NV) u;
9b387841
NC
13067 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13068 "Integer overflow in %s number",
13069 base);
9e24b6e2
JH
13070 } else
13071 u = x | b; /* add the digit to the end */
13072 }
13073 if (overflowed) {
13074 n *= nvshift[shift];
13075 /* If an NV has not enough bits in its
13076 * mantissa to represent an UV this summing of
13077 * small low-order numbers is a waste of time
13078 * (because the NV cannot preserve the
13079 * low-order bits anyway): we could just
13080 * remember when did we overflow and in the
13081 * end just multiply n by the right
13082 * amount. */
13083 n += (NV) b;
55497cff 13084 }
378cc40b
LW
13085 break;
13086 }
13087 }
02aa26ce
NT
13088
13089 /* if we get here, we had success: make a scalar value from
13090 the number.
13091 */
378cc40b 13092 out:
928753ea
JH
13093
13094 /* final misplaced underbar check */
13095 if (s[-1] == '_') {
a2a5de95 13096 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13097 }
13098
561b68a9 13099 sv = newSV(0);
9e24b6e2 13100 if (overflowed) {
a2a5de95
NC
13101 if (n > 4294967295.0)
13102 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13103 "%s number > %s non-portable",
13104 Base, max);
9e24b6e2
JH
13105 sv_setnv(sv, n);
13106 }
13107 else {
15041a67 13108#if UVSIZE > 4
a2a5de95
NC
13109 if (u > 0xffffffff)
13110 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13111 "%s number > %s non-portable",
13112 Base, max);
2cc4c2dc 13113#endif
9e24b6e2
JH
13114 sv_setuv(sv, u);
13115 }
61f33854 13116 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13117 sv = new_constant(start, s - start, "integer",
eb0d8d16 13118 sv, NULL, NULL, 0);
61f33854 13119 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13120 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13121 }
13122 break;
02aa26ce
NT
13123
13124 /*
13125 handle decimal numbers.
13126 we're also sent here when we read a 0 as the first digit
13127 */
378cc40b
LW
13128 case '1': case '2': case '3': case '4': case '5':
13129 case '6': case '7': case '8': case '9': case '.':
13130 decimal:
3280af22
NIS
13131 d = PL_tokenbuf;
13132 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13133 floatit = FALSE;
02aa26ce
NT
13134
13135 /* read next group of digits and _ and copy into d */
de3bb511 13136 while (isDIGIT(*s) || *s == '_') {
4e553d73 13137 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13138 if -w is on
13139 */
93a17b20 13140 if (*s == '_') {
a2a5de95
NC
13141 if (lastub && s == lastub + 1)
13142 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13143 "Misplaced _ in number");
928753ea 13144 lastub = s++;
93a17b20 13145 }
fc36a67e 13146 else {
02aa26ce 13147 /* check for end of fixed-length buffer */
fc36a67e 13148 if (d >= e)
cea2e8a9 13149 Perl_croak(aTHX_ number_too_long);
02aa26ce 13150 /* if we're ok, copy the character */
378cc40b 13151 *d++ = *s++;
fc36a67e 13152 }
378cc40b 13153 }
02aa26ce
NT
13154
13155 /* final misplaced underbar check */
928753ea 13156 if (lastub && s == lastub + 1) {
a2a5de95 13157 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13158 }
02aa26ce
NT
13159
13160 /* read a decimal portion if there is one. avoid
13161 3..5 being interpreted as the number 3. followed
13162 by .5
13163 */
2f3197b3 13164 if (*s == '.' && s[1] != '.') {
79072805 13165 floatit = TRUE;
378cc40b 13166 *d++ = *s++;
02aa26ce 13167
928753ea 13168 if (*s == '_') {
a2a5de95
NC
13169 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13170 "Misplaced _ in number");
928753ea
JH
13171 lastub = s;
13172 }
13173
13174 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13175 */
fc36a67e 13176 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13177 /* fixed length buffer check */
fc36a67e 13178 if (d >= e)
cea2e8a9 13179 Perl_croak(aTHX_ number_too_long);
928753ea 13180 if (*s == '_') {
a2a5de95
NC
13181 if (lastub && s == lastub + 1)
13182 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13183 "Misplaced _ in number");
928753ea
JH
13184 lastub = s;
13185 }
13186 else
fc36a67e 13187 *d++ = *s;
378cc40b 13188 }
928753ea
JH
13189 /* fractional part ending in underbar? */
13190 if (s[-1] == '_') {
a2a5de95
NC
13191 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13192 "Misplaced _ in number");
928753ea 13193 }
dd629d5b
GS
13194 if (*s == '.' && isDIGIT(s[1])) {
13195 /* oops, it's really a v-string, but without the "v" */
f4758303 13196 s = start;
dd629d5b
GS
13197 goto vstring;
13198 }
378cc40b 13199 }
02aa26ce
NT
13200
13201 /* read exponent part, if present */
3792a11b 13202 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13203 floatit = TRUE;
13204 s++;
02aa26ce
NT
13205
13206 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13207 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13208
7fd134d9
JH
13209 /* stray preinitial _ */
13210 if (*s == '_') {
a2a5de95
NC
13211 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13212 "Misplaced _ in number");
7fd134d9
JH
13213 lastub = s++;
13214 }
13215
02aa26ce 13216 /* allow positive or negative exponent */
378cc40b
LW
13217 if (*s == '+' || *s == '-')
13218 *d++ = *s++;
02aa26ce 13219
7fd134d9
JH
13220 /* stray initial _ */
13221 if (*s == '_') {
a2a5de95
NC
13222 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13223 "Misplaced _ in number");
7fd134d9
JH
13224 lastub = s++;
13225 }
13226
7fd134d9
JH
13227 /* read digits of exponent */
13228 while (isDIGIT(*s) || *s == '_') {
13229 if (isDIGIT(*s)) {
13230 if (d >= e)
13231 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13232 *d++ = *s++;
7fd134d9
JH
13233 }
13234 else {
041457d9 13235 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13236 (!isDIGIT(s[1]) && s[1] != '_')))
13237 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13238 "Misplaced _ in number");
b3b48e3e 13239 lastub = s++;
7fd134d9 13240 }
7fd134d9 13241 }
378cc40b 13242 }
02aa26ce 13243
02aa26ce
NT
13244
13245 /* make an sv from the string */
561b68a9 13246 sv = newSV(0);
097ee67d 13247
0b7fceb9 13248 /*
58bb9ec3
NC
13249 We try to do an integer conversion first if no characters
13250 indicating "float" have been found.
0b7fceb9
MU
13251 */
13252
13253 if (!floatit) {
58bb9ec3 13254 UV uv;
6136c704 13255 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13256
13257 if (flags == IS_NUMBER_IN_UV) {
13258 if (uv <= IV_MAX)
86554af2 13259 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 13260 else
c239479b 13261 sv_setuv(sv, uv);
58bb9ec3
NC
13262 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13263 if (uv <= (UV) IV_MIN)
13264 sv_setiv(sv, -(IV)uv);
13265 else
13266 floatit = TRUE;
13267 } else
13268 floatit = TRUE;
13269 }
0b7fceb9 13270 if (floatit) {
58bb9ec3
NC
13271 /* terminate the string */
13272 *d = '\0';
86554af2
JH
13273 nv = Atof(PL_tokenbuf);
13274 sv_setnv(sv, nv);
13275 }
86554af2 13276
eb0d8d16
NC
13277 if ( floatit
13278 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13279 const char *const key = floatit ? "float" : "integer";
13280 const STRLEN keylen = floatit ? 5 : 7;
13281 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13282 key, keylen, sv, NULL, NULL, 0);
13283 }
378cc40b 13284 break;
0b7fceb9 13285
e312add1 13286 /* if it starts with a v, it could be a v-string */
a7cb1f99 13287 case 'v':
dd629d5b 13288vstring:
561b68a9 13289 sv = newSV(5); /* preallocate storage space */
65b06e02 13290 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13291 break;
79072805 13292 }
a687059c 13293
02aa26ce
NT
13294 /* make the op for the constant and return */
13295
a86a20aa 13296 if (sv)
b73d6f50 13297 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13298 else
5f66b61c 13299 lvalp->opval = NULL;
a687059c 13300
73d840c0 13301 return (char *)s;
378cc40b
LW
13302}
13303
76e3520e 13304STATIC char *
cea2e8a9 13305S_scan_formline(pTHX_ register char *s)
378cc40b 13306{
97aff369 13307 dVAR;
79072805 13308 register char *eol;
378cc40b 13309 register char *t;
6136c704 13310 SV * const stuff = newSVpvs("");
79072805 13311 bool needargs = FALSE;
c5ee2135 13312 bool eofmt = FALSE;
5db06880
NC
13313#ifdef PERL_MAD
13314 char *tokenstart = s;
4f61fd4b
JC
13315 SV* savewhite = NULL;
13316
5db06880 13317 if (PL_madskills) {
cd81e915
NC
13318 savewhite = PL_thiswhite;
13319 PL_thiswhite = 0;
5db06880
NC
13320 }
13321#endif
378cc40b 13322
7918f24d
NC
13323 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13324
79072805 13325 while (!needargs) {
a1b95068 13326 if (*s == '.') {
c35e046a 13327 t = s+1;
51882d45 13328#ifdef PERL_STRICT_CR
c35e046a
AL
13329 while (SPACE_OR_TAB(*t))
13330 t++;
51882d45 13331#else
c35e046a
AL
13332 while (SPACE_OR_TAB(*t) || *t == '\r')
13333 t++;
51882d45 13334#endif
c5ee2135
WL
13335 if (*t == '\n' || t == PL_bufend) {
13336 eofmt = TRUE;
79072805 13337 break;
c5ee2135 13338 }
79072805 13339 }
3280af22 13340 if (PL_in_eval && !PL_rsfp) {
07409e01 13341 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13342 if (!eol++)
3280af22 13343 eol = PL_bufend;
0f85fab0
LW
13344 }
13345 else
3280af22 13346 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13347 if (*s != '#') {
a0d0e21e
LW
13348 for (t = s; t < eol; t++) {
13349 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13350 needargs = FALSE;
13351 goto enough; /* ~~ must be first line in formline */
378cc40b 13352 }
a0d0e21e
LW
13353 if (*t == '@' || *t == '^')
13354 needargs = TRUE;
378cc40b 13355 }
7121b347
MG
13356 if (eol > s) {
13357 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13358#ifndef PERL_STRICT_CR
7121b347
MG
13359 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13360 char *end = SvPVX(stuff) + SvCUR(stuff);
13361 end[-2] = '\n';
13362 end[-1] = '\0';
b162af07 13363 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13364 }
2dc4c65b 13365#endif
7121b347
MG
13366 }
13367 else
13368 break;
79072805 13369 }
95a20fc0 13370 s = (char*)eol;
3280af22 13371 if (PL_rsfp) {
f0e67a1d 13372 bool got_some;
5db06880
NC
13373#ifdef PERL_MAD
13374 if (PL_madskills) {
cd81e915
NC
13375 if (PL_thistoken)
13376 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13377 else
cd81e915 13378 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13379 }
13380#endif
f0e67a1d
Z
13381 PL_bufptr = PL_bufend;
13382 CopLINE_inc(PL_curcop);
13383 got_some = lex_next_chunk(0);
13384 CopLINE_dec(PL_curcop);
13385 s = PL_bufptr;
5db06880 13386#ifdef PERL_MAD
f0e67a1d 13387 tokenstart = PL_bufptr;
5db06880 13388#endif
f0e67a1d 13389 if (!got_some)
378cc40b 13390 break;
378cc40b 13391 }
463ee0b2 13392 incline(s);
79072805 13393 }
a0d0e21e
LW
13394 enough:
13395 if (SvCUR(stuff)) {
3280af22 13396 PL_expect = XTERM;
79072805 13397 if (needargs) {
3280af22 13398 PL_lex_state = LEX_NORMAL;
cd81e915 13399 start_force(PL_curforce);
9ded7720 13400 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13401 force_next(',');
13402 }
a0d0e21e 13403 else
3280af22 13404 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13405 if (!IN_BYTES) {
95a20fc0 13406 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13407 SvUTF8_on(stuff);
13408 else if (PL_encoding)
13409 sv_recode_to_utf8(stuff, PL_encoding);
13410 }
cd81e915 13411 start_force(PL_curforce);
9ded7720 13412 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13413 force_next(THING);
cd81e915 13414 start_force(PL_curforce);
9ded7720 13415 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13416 force_next(LSTOP);
378cc40b 13417 }
79072805 13418 else {
8990e307 13419 SvREFCNT_dec(stuff);
c5ee2135
WL
13420 if (eofmt)
13421 PL_lex_formbrack = 0;
3280af22 13422 PL_bufptr = s;
79072805 13423 }
5db06880
NC
13424#ifdef PERL_MAD
13425 if (PL_madskills) {
cd81e915
NC
13426 if (PL_thistoken)
13427 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13428 else
cd81e915
NC
13429 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13430 PL_thiswhite = savewhite;
5db06880
NC
13431 }
13432#endif
79072805 13433 return s;
378cc40b 13434}
a687059c 13435
ba6d6ac9 13436I32
864dbfa3 13437Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13438{
97aff369 13439 dVAR;
a3b680e6 13440 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13441 CV* const outsidecv = PL_compcv;
8990e307 13442
3280af22
NIS
13443 if (PL_compcv) {
13444 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13445 }
7766f137 13446 SAVEI32(PL_subline);
3280af22 13447 save_item(PL_subname);
3280af22 13448 SAVESPTR(PL_compcv);
3280af22 13449
ea726b52 13450 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13451 CvFLAGS(PL_compcv) |= flags;
13452
57843af0 13453 PL_subline = CopLINE(PL_curcop);
dd2155a4 13454 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13455 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13456 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13457
8990e307
LW
13458 return oldsavestack_ix;
13459}
13460
084592ab
CN
13461#ifdef __SC__
13462#pragma segment Perl_yylex
13463#endif
af41e527
NC
13464static int
13465S_yywarn(pTHX_ const char *const s)
8990e307 13466{
97aff369 13467 dVAR;
7918f24d
NC
13468
13469 PERL_ARGS_ASSERT_YYWARN;
13470
faef0170 13471 PL_in_eval |= EVAL_WARNONLY;
748a9306 13472 yyerror(s);
faef0170 13473 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13474 return 0;
8990e307
LW
13475}
13476
13477int
15f169a1 13478Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13479{
97aff369 13480 dVAR;
bfed75c6
AL
13481 const char *where = NULL;
13482 const char *context = NULL;
68dc0745 13483 int contlen = -1;
46fc3d4c 13484 SV *msg;
5912531f 13485 int yychar = PL_parser->yychar;
463ee0b2 13486
7918f24d
NC
13487 PERL_ARGS_ASSERT_YYERROR;
13488
3280af22 13489 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13490 where = "at EOF";
8bcfe651
TM
13491 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13492 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13493 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13494 /*
13495 Only for NetWare:
13496 The code below is removed for NetWare because it abends/crashes on NetWare
13497 when the script has error such as not having the closing quotes like:
13498 if ($var eq "value)
13499 Checking of white spaces is anyway done in NetWare code.
13500 */
13501#ifndef NETWARE
3280af22
NIS
13502 while (isSPACE(*PL_oldoldbufptr))
13503 PL_oldoldbufptr++;
f355267c 13504#endif
3280af22
NIS
13505 context = PL_oldoldbufptr;
13506 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13507 }
8bcfe651
TM
13508 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13509 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13510 /*
13511 Only for NetWare:
13512 The code below is removed for NetWare because it abends/crashes on NetWare
13513 when the script has error such as not having the closing quotes like:
13514 if ($var eq "value)
13515 Checking of white spaces is anyway done in NetWare code.
13516 */
13517#ifndef NETWARE
3280af22
NIS
13518 while (isSPACE(*PL_oldbufptr))
13519 PL_oldbufptr++;
f355267c 13520#endif
3280af22
NIS
13521 context = PL_oldbufptr;
13522 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13523 }
13524 else if (yychar > 255)
68dc0745 13525 where = "next token ???";
12fbd33b 13526 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13527 if (PL_lex_state == LEX_NORMAL ||
13528 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13529 where = "at end of line";
3280af22 13530 else if (PL_lex_inpat)
68dc0745 13531 where = "within pattern";
463ee0b2 13532 else
68dc0745 13533 where = "within string";
463ee0b2 13534 }
46fc3d4c 13535 else {
84bafc02 13536 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13537 if (yychar < 32)
cea2e8a9 13538 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13539 else if (isPRINT_LC(yychar)) {
88c9ea1e 13540 const char string = yychar;
5e7aa789
NC
13541 sv_catpvn(where_sv, &string, 1);
13542 }
463ee0b2 13543 else
cea2e8a9 13544 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13545 where = SvPVX_const(where_sv);
463ee0b2 13546 }
46fc3d4c 13547 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13548 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13549 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13550 if (context)
cea2e8a9 13551 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13552 else
cea2e8a9 13553 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13554 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13555 Perl_sv_catpvf(aTHX_ msg,
57def98f 13556 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13557 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13558 PL_multi_end = 0;
a0d0e21e 13559 }
500960a6 13560 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13561 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13562 }
463ee0b2 13563 else
5a844595 13564 qerror(msg);
c7d6bfb2
GS
13565 if (PL_error_count >= 10) {
13566 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13567 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13568 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13569 else
13570 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13571 OutCopFILE(PL_curcop));
c7d6bfb2 13572 }
3280af22 13573 PL_in_my = 0;
5c284bb0 13574 PL_in_my_stash = NULL;
463ee0b2
LW
13575 return 0;
13576}
084592ab
CN
13577#ifdef __SC__
13578#pragma segment Main
13579#endif
4e35701f 13580
b250498f 13581STATIC char*
3ae08724 13582S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13583{
97aff369 13584 dVAR;
f54cb97a 13585 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13586
13587 PERL_ARGS_ASSERT_SWALLOW_BOM;
13588
7aa207d6 13589 switch (s[0]) {
4e553d73
NIS
13590 case 0xFF:
13591 if (s[1] == 0xFE) {
ee6ba15d 13592 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13593 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13594 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13595#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13596 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13597 s += 2;
dea0fc0b 13598 if (PL_bufend > (char*)s) {
81a923f4 13599 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13600 }
b250498f 13601#else
ee6ba15d 13602 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13603#endif
01ec43d0
GS
13604 }
13605 break;
78ae23f5 13606 case 0xFE:
7aa207d6 13607 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13608#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13609 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13610 s += 2;
13611 if (PL_bufend > (char *)s) {
81a923f4 13612 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13613 }
b250498f 13614#else
ee6ba15d 13615 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13616#endif
01ec43d0
GS
13617 }
13618 break;
3ae08724
GS
13619 case 0xEF:
13620 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13621 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13622 s += 3; /* UTF-8 */
13623 }
13624 break;
13625 case 0:
7aa207d6
JH
13626 if (slen > 3) {
13627 if (s[1] == 0) {
13628 if (s[2] == 0xFE && s[3] == 0xFF) {
13629 /* UTF-32 big-endian */
ee6ba15d 13630 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13631 }
13632 }
13633 else if (s[2] == 0 && s[3] != 0) {
13634 /* Leading bytes
13635 * 00 xx 00 xx
13636 * are a good indicator of UTF-16BE. */
ee6ba15d 13637#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13638 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13639 s = add_utf16_textfilter(s, FALSE);
13640#else
13641 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13642#endif
7aa207d6 13643 }
01ec43d0 13644 }
e294cc5d
JH
13645#ifdef EBCDIC
13646 case 0xDD:
13647 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13648 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13649 s += 4; /* UTF-8 */
13650 }
13651 break;
13652#endif
13653
7aa207d6
JH
13654 default:
13655 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13656 /* Leading bytes
13657 * xx 00 xx 00
13658 * are a good indicator of UTF-16LE. */
ee6ba15d 13659#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13660 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13661 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13662#else
13663 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13664#endif
7aa207d6 13665 }
01ec43d0 13666 }
b8f84bb2 13667 return (char*)s;
b250498f 13668}
4755096e 13669
6e3aabd6
GS
13670
13671#ifndef PERL_NO_UTF16_FILTER
13672static I32
a28af015 13673S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13674{
97aff369 13675 dVAR;
f3040f2c 13676 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13677 /* We re-use this each time round, throwing the contents away before we
13678 return. */
2a773401 13679 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13680 SV *const utf8_buffer = filter;
c28d6105 13681 IV status = IoPAGE(filter);
f2338a2e 13682 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13683 I32 retval;
c8b0cbae
NC
13684
13685 /* As we're automatically added, at the lowest level, and hence only called
13686 from this file, we can be sure that we're not called in block mode. Hence
13687 don't bother writing code to deal with block mode. */
13688 if (maxlen) {
13689 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13690 }
c28d6105
NC
13691 if (status < 0) {
13692 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13693 }
1de9afcd 13694 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13695 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13696 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13697 reverse ? 'l' : 'b', idx, maxlen, status,
13698 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13699
13700 while (1) {
13701 STRLEN chars;
13702 STRLEN have;
dea0fc0b 13703 I32 newlen;
2a773401 13704 U8 *end;
c28d6105
NC
13705 /* First, look in our buffer of existing UTF-8 data: */
13706 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13707
13708 if (nl) {
13709 ++nl;
13710 } else if (status == 0) {
13711 /* EOF */
13712 IoPAGE(filter) = 0;
13713 nl = SvEND(utf8_buffer);
13714 }
13715 if (nl) {
d2d1d4de
NC
13716 STRLEN got = nl - SvPVX(utf8_buffer);
13717 /* Did we have anything to append? */
13718 retval = got != 0;
13719 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13720 /* Everything else in this code works just fine if SVp_POK isn't
13721 set. This, however, needs it, and we need it to work, else
13722 we loop infinitely because the buffer is never consumed. */
13723 sv_chop(utf8_buffer, nl);
13724 break;
13725 }
ba77e4cc 13726
c28d6105
NC
13727 /* OK, not a complete line there, so need to read some more UTF-16.
13728 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13729 while (1) {
13730 if (status <= 0)
13731 break;
13732 if (SvCUR(utf16_buffer) >= 2) {
13733 /* Location of the high octet of the last complete code point.
13734 Gosh, UTF-16 is a pain. All the benefits of variable length,
13735 *coupled* with all the benefits of partial reads and
13736 endianness. */
13737 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13738 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13739
13740 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13741 break;
13742 }
13743
13744 /* We have the first half of a surrogate. Read more. */
13745 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13746 }
c28d6105 13747
c28d6105
NC
13748 status = FILTER_READ(idx + 1, utf16_buffer,
13749 160 + (SvCUR(utf16_buffer) & 1));
13750 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13751 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13752 if (status < 0) {
13753 /* Error */
13754 IoPAGE(filter) = status;
13755 return status;
13756 }
13757 }
13758
13759 chars = SvCUR(utf16_buffer) >> 1;
13760 have = SvCUR(utf8_buffer);
13761 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13762
aa6dbd60 13763 if (reverse) {
c28d6105
NC
13764 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13765 (U8*)SvPVX_const(utf8_buffer) + have,
13766 chars * 2, &newlen);
aa6dbd60 13767 } else {
2a773401 13768 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13769 (U8*)SvPVX_const(utf8_buffer) + have,
13770 chars * 2, &newlen);
2a773401 13771 }
c28d6105 13772 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13773 *end = '\0';
c28d6105 13774
e07286ed
NC
13775 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13776 it's private to us, and utf16_to_utf8{,reversed} take a
13777 (pointer,length) pair, rather than a NUL-terminated string. */
13778 if(SvCUR(utf16_buffer) & 1) {
13779 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13780 SvCUR_set(utf16_buffer, 1);
13781 } else {
13782 SvCUR_set(utf16_buffer, 0);
13783 }
2a773401 13784 }
c28d6105
NC
13785 DEBUG_P(PerlIO_printf(Perl_debug_log,
13786 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13787 status,
13788 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13789 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13790 return retval;
6e3aabd6 13791}
81a923f4
NC
13792
13793static U8 *
13794S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13795{
2a773401 13796 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13797
c28d6105 13798 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13799 sv_setpvs(filter, "");
2a773401 13800 IoLINES(filter) = reversed;
c28d6105
NC
13801 IoPAGE(filter) = 1; /* Not EOF */
13802
13803 /* Sadly, we have to return a valid pointer, come what may, so we have to
13804 ignore any error return from this. */
13805 SvCUR_set(PL_linestr, 0);
13806 if (FILTER_READ(0, PL_linestr, 0)) {
13807 SvUTF8_on(PL_linestr);
81a923f4 13808 } else {
c28d6105 13809 SvUTF8_on(PL_linestr);
81a923f4 13810 }
c28d6105 13811 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13812 return (U8*)SvPVX(PL_linestr);
13813}
6e3aabd6 13814#endif
9f4817db 13815
f333445c
JP
13816/*
13817Returns a pointer to the next character after the parsed
13818vstring, as well as updating the passed in sv.
13819
13820Function must be called like
13821
561b68a9 13822 sv = newSV(5);
65b06e02 13823 s = scan_vstring(s,e,sv);
f333445c 13824
65b06e02 13825where s and e are the start and end of the string.
f333445c
JP
13826The sv should already be large enough to store the vstring
13827passed in, for performance reasons.
13828
13829*/
13830
13831char *
15f169a1 13832Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13833{
97aff369 13834 dVAR;
bfed75c6
AL
13835 const char *pos = s;
13836 const char *start = s;
7918f24d
NC
13837
13838 PERL_ARGS_ASSERT_SCAN_VSTRING;
13839
f333445c 13840 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13841 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13842 pos++;
f333445c
JP
13843 if ( *pos != '.') {
13844 /* this may not be a v-string if followed by => */
bfed75c6 13845 const char *next = pos;
65b06e02 13846 while (next < e && isSPACE(*next))
8fc7bb1c 13847 ++next;
65b06e02 13848 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13849 /* return string not v-string */
13850 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13851 return (char *)pos;
f333445c
JP
13852 }
13853 }
13854
13855 if (!isALPHA(*pos)) {
89ebb4a3 13856 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13857
d4c19fe8
AL
13858 if (*s == 'v')
13859 s++; /* get past 'v' */
f333445c 13860
76f68e9b 13861 sv_setpvs(sv, "");
f333445c
JP
13862
13863 for (;;) {
d4c19fe8 13864 /* this is atoi() that tolerates underscores */
0bd48802
AL
13865 U8 *tmpend;
13866 UV rev = 0;
d4c19fe8
AL
13867 const char *end = pos;
13868 UV mult = 1;
13869 while (--end >= s) {
13870 if (*end != '_') {
13871 const UV orev = rev;
f333445c
JP
13872 rev += (*end - '0') * mult;
13873 mult *= 10;
9b387841
NC
13874 if (orev > rev)
13875 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13876 "Integer overflow in decimal number");
f333445c
JP
13877 }
13878 }
13879#ifdef EBCDIC
13880 if (rev > 0x7FFFFFFF)
13881 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13882#endif
13883 /* Append native character for the rev point */
13884 tmpend = uvchr_to_utf8(tmpbuf, rev);
13885 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13886 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13887 SvUTF8_on(sv);
65b06e02 13888 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13889 s = ++pos;
13890 else {
13891 s = pos;
13892 break;
13893 }
65b06e02 13894 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13895 pos++;
13896 }
13897 SvPOK_on(sv);
13898 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13899 SvRMAGICAL_on(sv);
13900 }
73d840c0 13901 return (char *)s;
f333445c
JP
13902}
13903
88e1f1a2
JV
13904int
13905Perl_keyword_plugin_standard(pTHX_
13906 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13907{
13908 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13909 PERL_UNUSED_CONTEXT;
13910 PERL_UNUSED_ARG(keyword_ptr);
13911 PERL_UNUSED_ARG(keyword_len);
13912 PERL_UNUSED_ARG(op_ptr);
13913 return KEYWORD_PLUGIN_DECLINE;
13914}
13915
1da4ca5f
NC
13916/*
13917 * Local variables:
13918 * c-indentation-style: bsd
13919 * c-basic-offset: 4
13920 * indent-tabs-mode: t
13921 * End:
13922 *
37442d52
RGS
13923 * ex: set ts=8 sts=4 sw=4 noet:
13924 */