This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Standardize on use of 'capture group' over 'buffer'
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
10efb74f 717 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 718 parser->linestr = newSVsv(line);
10efb74f 719 if (s[len-1] != ';')
bdc0bf6f 720 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
721 } else {
722 SvTEMP_off(line);
723 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 724 parser->linestr = line;
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
79072805 732}
a687059c 733
e3abe207
DM
734
735/* delete a parser object */
736
737void
738Perl_parser_free(pTHX_ const yy_parser *parser)
739{
7918f24d
NC
740 PERL_ARGS_ASSERT_PARSER_FREE;
741
7c4baf47 742 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
743 SvREFCNT_dec(parser->linestr);
744
2f9285f8
DM
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
799361c3
SH
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 749 PerlIO_close(parser->rsfp);
5486870f 750 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 751
e3abe207
DM
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
756 Safefree(parser);
757}
758
759
ffb4593c
NT
760/*
761 * Perl_lex_end
9cbb5ea2
GS
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
ffb4593c
NT
764 */
765
463ee0b2 766void
864dbfa3 767Perl_lex_end(pTHX)
463ee0b2 768{
97aff369 769 dVAR;
3280af22 770 PL_doextract = FALSE;
463ee0b2
LW
771}
772
ffb4593c 773/*
f0e67a1d
Z
774=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
775
776Buffer scalar containing the chunk currently under consideration of the
777text currently being lexed. This is always a plain string scalar (for
778which C<SvPOK> is true). It is not intended to be used as a scalar by
779normal scalar means; instead refer to the buffer directly by the pointer
780variables described below.
781
782The lexer maintains various C<char*> pointers to things in the
783C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784reallocated, all of these pointers must be updated. Don't attempt to
785do this manually, but rather use L</lex_grow_linestr> if you need to
786reallocate the buffer.
787
788The content of the text chunk in the buffer is commonly exactly one
789complete line of input, up to and including a newline terminator,
790but there are situations where it is otherwise. The octets of the
791buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793flag on this scalar, which may disagree with it.
794
795For direct examination of the buffer, the variable
796L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798of these pointers is usually preferable to examination of the scalar
799through normal scalar means.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufend
802
803Direct pointer to the end of the chunk of text currently being lexed, the
804end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806always located at the end of the buffer, and does not count as part of
807the buffer's contents.
808
809=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
810
811Points to the current position of lexing inside the lexer buffer.
812Characters around this point may be freely examined, within
813the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
816
817Lexing code (whether in the Perl core or not) moves this pointer past
818the characters that it consumes. It is also expected to perform some
819bookkeeping whenever a newline character is consumed. This movement
820can be more conveniently performed by the function L</lex_read_to>,
821which handles newlines appropriately.
822
823Interpretation of the buffer's octets can be abstracted out by
824using the slightly higher-level functions L</lex_peek_unichar> and
825L</lex_read_unichar>.
826
827=for apidoc AmxU|char *|PL_parser-E<gt>linestart
828
829Points to the start of the current line inside the lexer buffer.
830This is useful for indicating at which column an error occurred, and
831not much else. This must be updated by any lexing code that consumes
832a newline; the function L</lex_read_to> handles this detail.
833
834=cut
835*/
836
837/*
838=for apidoc Amx|bool|lex_bufutf8
839
840Indicates whether the octets in the lexer buffer
841(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842of Unicode characters. If not, they should be interpreted as Latin-1
843characters. This is analogous to the C<SvUTF8> flag for scalars.
844
845In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846contains valid UTF-8. Lexing code must be robust in the face of invalid
847encoding.
848
849The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850is significant, but not the whole story regarding the input character
851encoding. Normally, when a file is being read, the scalar contains octets
852and its C<SvUTF8> flag is off, but the octets should be interpreted as
853UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854however, the scalar may have the C<SvUTF8> flag on, and in this case its
855octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856is in effect. This logic may change in the future; use this function
857instead of implementing the logic yourself.
858
859=cut
860*/
861
862bool
863Perl_lex_bufutf8(pTHX)
864{
865 return UTF;
866}
867
868/*
869=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
870
871Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872at least I<len> octets (including terminating NUL). Returns a
873pointer to the reallocated buffer. This is necessary before making
874any direct modification of the buffer that would increase its length.
875L</lex_stuff_pvn> provides a more convenient way to insert text into
876the buffer.
877
878Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879this function updates all of the lexer's variables that point directly
880into the buffer.
881
882=cut
883*/
884
885char *
886Perl_lex_grow_linestr(pTHX_ STRLEN len)
887{
888 SV *linestr;
889 char *buf;
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
895 return buf;
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
913 return buf;
914}
915
916/*
83aa740e 917=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
918
919Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921reallocating the buffer if necessary. This means that lexing code that
922runs later will see the characters as if they had appeared in the input.
923It is not recommended to do this as part of normal parsing, and most
924uses of this facility run the risk of the inserted characters being
925interpreted in an unintended manner.
926
927The string to be inserted is represented by I<len> octets starting
928at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930The characters are recoded for the lexer buffer, according to how the
931buffer is currently being interpreted (L</lex_bufutf8>). If a string
932to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933function is more convenient.
934
935=cut
936*/
937
938void
83aa740e 939Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 940{
749123ff 941 dVAR;
f0e67a1d
Z
942 char *bufptr;
943 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
944 if (flags & ~(LEX_STUFF_UTF8))
945 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
946 if (UTF) {
947 if (flags & LEX_STUFF_UTF8) {
948 goto plain_copy;
949 } else {
950 STRLEN highhalf = 0;
83aa740e 951 const char *p, *e = pv+len;
f0e67a1d
Z
952 for (p = pv; p != e; p++)
953 highhalf += !!(((U8)*p) & 0x80);
954 if (!highhalf)
955 goto plain_copy;
956 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
957 bufptr = PL_parser->bufptr;
958 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
959 SvCUR_set(PL_parser->linestr,
960 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
961 PL_parser->bufend += len+highhalf;
962 for (p = pv; p != e; p++) {
963 U8 c = (U8)*p;
964 if (c & 0x80) {
965 *bufptr++ = (char)(0xc0 | (c >> 6));
966 *bufptr++ = (char)(0x80 | (c & 0x3f));
967 } else {
968 *bufptr++ = (char)c;
969 }
970 }
971 }
972 } else {
973 if (flags & LEX_STUFF_UTF8) {
974 STRLEN highhalf = 0;
83aa740e 975 const char *p, *e = pv+len;
f0e67a1d
Z
976 for (p = pv; p != e; p++) {
977 U8 c = (U8)*p;
978 if (c >= 0xc4) {
979 Perl_croak(aTHX_ "Lexing code attempted to stuff "
980 "non-Latin-1 character into Latin-1 input");
981 } else if (c >= 0xc2 && p+1 != e &&
982 (((U8)p[1]) & 0xc0) == 0x80) {
983 p++;
984 highhalf++;
985 } else if (c >= 0x80) {
986 /* malformed UTF-8 */
987 ENTER;
988 SAVESPTR(PL_warnhook);
989 PL_warnhook = PERL_WARNHOOK_FATAL;
990 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
991 LEAVE;
992 }
993 }
994 if (!highhalf)
995 goto plain_copy;
996 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
997 bufptr = PL_parser->bufptr;
998 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
999 SvCUR_set(PL_parser->linestr,
1000 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1001 PL_parser->bufend += len-highhalf;
1002 for (p = pv; p != e; p++) {
1003 U8 c = (U8)*p;
1004 if (c & 0x80) {
1005 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1006 p++;
1007 } else {
1008 *bufptr++ = (char)c;
1009 }
1010 }
1011 } else {
1012 plain_copy:
1013 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1014 bufptr = PL_parser->bufptr;
1015 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1016 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1017 PL_parser->bufend += len;
1018 Copy(pv, bufptr, len, char);
1019 }
1020 }
1021}
1022
1023/*
1024=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1025
1026Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1027immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1028reallocating the buffer if necessary. This means that lexing code that
1029runs later will see the characters as if they had appeared in the input.
1030It is not recommended to do this as part of normal parsing, and most
1031uses of this facility run the risk of the inserted characters being
1032interpreted in an unintended manner.
1033
1034The string to be inserted is the string value of I<sv>. The characters
1035are recoded for the lexer buffer, according to how the buffer is currently
1036being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1037not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1038need to construct a scalar.
1039
1040=cut
1041*/
1042
1043void
1044Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1045{
1046 char *pv;
1047 STRLEN len;
1048 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1049 if (flags)
1050 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1051 pv = SvPV(sv, len);
1052 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1053}
1054
1055/*
1056=for apidoc Amx|void|lex_unstuff|char *ptr
1057
1058Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1059I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1060This hides the discarded text from any lexing code that runs later,
1061as if the text had never appeared.
1062
1063This is not the normal way to consume lexed text. For that, use
1064L</lex_read_to>.
1065
1066=cut
1067*/
1068
1069void
1070Perl_lex_unstuff(pTHX_ char *ptr)
1071{
1072 char *buf, *bufend;
1073 STRLEN unstuff_len;
1074 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1075 buf = PL_parser->bufptr;
1076 if (ptr < buf)
1077 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1078 if (ptr == buf)
1079 return;
1080 bufend = PL_parser->bufend;
1081 if (ptr > bufend)
1082 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1083 unstuff_len = ptr - buf;
1084 Move(ptr, buf, bufend+1-ptr, char);
1085 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1086 PL_parser->bufend = bufend - unstuff_len;
1087}
1088
1089/*
1090=for apidoc Amx|void|lex_read_to|char *ptr
1091
1092Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1093to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1094performing the correct bookkeeping whenever a newline character is passed.
1095This is the normal way to consume lexed text.
1096
1097Interpretation of the buffer's octets can be abstracted out by
1098using the slightly higher-level functions L</lex_peek_unichar> and
1099L</lex_read_unichar>.
1100
1101=cut
1102*/
1103
1104void
1105Perl_lex_read_to(pTHX_ char *ptr)
1106{
1107 char *s;
1108 PERL_ARGS_ASSERT_LEX_READ_TO;
1109 s = PL_parser->bufptr;
1110 if (ptr < s || ptr > PL_parser->bufend)
1111 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1112 for (; s != ptr; s++)
1113 if (*s == '\n') {
1114 CopLINE_inc(PL_curcop);
1115 PL_parser->linestart = s+1;
1116 }
1117 PL_parser->bufptr = ptr;
1118}
1119
1120/*
1121=for apidoc Amx|void|lex_discard_to|char *ptr
1122
1123Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1124up to I<ptr>. The remaining content of the buffer will be moved, and
1125all pointers into the buffer updated appropriately. I<ptr> must not
1126be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1127it is not permitted to discard text that has yet to be lexed.
1128
1129Normally it is not necessarily to do this directly, because it suffices to
1130use the implicit discarding behaviour of L</lex_next_chunk> and things
1131based on it. However, if a token stretches across multiple lines,
1f317c95 1132and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1133that purpose, then after completion of the token it would be wise to
1134explicitly discard the now-unneeded earlier lines, to avoid future
1135multi-line tokens growing the buffer without bound.
1136
1137=cut
1138*/
1139
1140void
1141Perl_lex_discard_to(pTHX_ char *ptr)
1142{
1143 char *buf;
1144 STRLEN discard_len;
1145 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1146 buf = SvPVX(PL_parser->linestr);
1147 if (ptr < buf)
1148 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1149 if (ptr == buf)
1150 return;
1151 if (ptr > PL_parser->bufptr)
1152 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1153 discard_len = ptr - buf;
1154 if (PL_parser->oldbufptr < ptr)
1155 PL_parser->oldbufptr = ptr;
1156 if (PL_parser->oldoldbufptr < ptr)
1157 PL_parser->oldoldbufptr = ptr;
1158 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1159 PL_parser->last_uni = NULL;
1160 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1161 PL_parser->last_lop = NULL;
1162 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1163 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1164 PL_parser->bufend -= discard_len;
1165 PL_parser->bufptr -= discard_len;
1166 PL_parser->oldbufptr -= discard_len;
1167 PL_parser->oldoldbufptr -= discard_len;
1168 if (PL_parser->last_uni)
1169 PL_parser->last_uni -= discard_len;
1170 if (PL_parser->last_lop)
1171 PL_parser->last_lop -= discard_len;
1172}
1173
1174/*
1175=for apidoc Amx|bool|lex_next_chunk|U32 flags
1176
1177Reads in the next chunk of text to be lexed, appending it to
1178L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1179looked to the end of the current chunk and wants to know more. It is
1180usual, but not necessary, for lexing to have consumed the entirety of
1181the current chunk at this time.
1182
1183If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1184chunk (i.e., the current chunk has been entirely consumed), normally the
1185current chunk will be discarded at the same time that the new chunk is
1186read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1187will not be discarded. If the current chunk has not been entirely
1188consumed, then it will not be discarded regardless of the flag.
1189
1190Returns true if some new text was added to the buffer, or false if the
1191buffer has reached the end of the input text.
1192
1193=cut
1194*/
1195
1196#define LEX_FAKE_EOF 0x80000000
1197
1198bool
1199Perl_lex_next_chunk(pTHX_ U32 flags)
1200{
1201 SV *linestr;
1202 char *buf;
1203 STRLEN old_bufend_pos, new_bufend_pos;
1204 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1205 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1206 bool got_some_for_debugger = 0;
f0e67a1d
Z
1207 bool got_some;
1208 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1209 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1210 linestr = PL_parser->linestr;
1211 buf = SvPVX(linestr);
1212 if (!(flags & LEX_KEEP_PREVIOUS) &&
1213 PL_parser->bufptr == PL_parser->bufend) {
1214 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1215 linestart_pos = 0;
1216 if (PL_parser->last_uni != PL_parser->bufend)
1217 PL_parser->last_uni = NULL;
1218 if (PL_parser->last_lop != PL_parser->bufend)
1219 PL_parser->last_lop = NULL;
1220 last_uni_pos = last_lop_pos = 0;
1221 *buf = 0;
1222 SvCUR(linestr) = 0;
1223 } else {
1224 old_bufend_pos = PL_parser->bufend - buf;
1225 bufptr_pos = PL_parser->bufptr - buf;
1226 oldbufptr_pos = PL_parser->oldbufptr - buf;
1227 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1228 linestart_pos = PL_parser->linestart - buf;
1229 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1230 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1231 }
1232 if (flags & LEX_FAKE_EOF) {
1233 goto eof;
1234 } else if (!PL_parser->rsfp) {
1235 got_some = 0;
1236 } else if (filter_gets(linestr, old_bufend_pos)) {
1237 got_some = 1;
17cc9359 1238 got_some_for_debugger = 1;
f0e67a1d 1239 } else {
580561a3
Z
1240 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1241 sv_setpvs(linestr, "");
f0e67a1d
Z
1242 eof:
1243 /* End of real input. Close filehandle (unless it was STDIN),
1244 * then add implicit termination.
1245 */
1246 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1247 PerlIO_clearerr(PL_parser->rsfp);
1248 else if (PL_parser->rsfp)
1249 (void)PerlIO_close(PL_parser->rsfp);
1250 PL_parser->rsfp = NULL;
1251 PL_doextract = FALSE;
1252#ifdef PERL_MAD
1253 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1254 PL_faketokens = 1;
1255#endif
1256 if (!PL_in_eval && PL_minus_p) {
1257 sv_catpvs(linestr,
1258 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1259 PL_minus_n = PL_minus_p = 0;
1260 } else if (!PL_in_eval && PL_minus_n) {
1261 sv_catpvs(linestr, /*{*/";}");
1262 PL_minus_n = 0;
1263 } else
1264 sv_catpvs(linestr, ";");
1265 got_some = 1;
1266 }
1267 buf = SvPVX(linestr);
1268 new_bufend_pos = SvCUR(linestr);
1269 PL_parser->bufend = buf + new_bufend_pos;
1270 PL_parser->bufptr = buf + bufptr_pos;
1271 PL_parser->oldbufptr = buf + oldbufptr_pos;
1272 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1273 PL_parser->linestart = buf + linestart_pos;
1274 if (PL_parser->last_uni)
1275 PL_parser->last_uni = buf + last_uni_pos;
1276 if (PL_parser->last_lop)
1277 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1278 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1279 PL_curstash != PL_debstash) {
1280 /* debugger active and we're not compiling the debugger code,
1281 * so store the line into the debugger's array of lines
1282 */
1283 update_debugger_info(NULL, buf+old_bufend_pos,
1284 new_bufend_pos-old_bufend_pos);
1285 }
1286 return got_some;
1287}
1288
1289/*
1290=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1291
1292Looks ahead one (Unicode) character in the text currently being lexed.
1293Returns the codepoint (unsigned integer value) of the next character,
1294or -1 if lexing has reached the end of the input text. To consume the
1295peeked character, use L</lex_read_unichar>.
1296
1297If the next character is in (or extends into) the next chunk of input
1298text, the next chunk will be read in. Normally the current chunk will be
1299discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1300then the current chunk will not be discarded.
1301
1302If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1303is encountered, an exception is generated.
1304
1305=cut
1306*/
1307
1308I32
1309Perl_lex_peek_unichar(pTHX_ U32 flags)
1310{
749123ff 1311 dVAR;
f0e67a1d
Z
1312 char *s, *bufend;
1313 if (flags & ~(LEX_KEEP_PREVIOUS))
1314 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1315 s = PL_parser->bufptr;
1316 bufend = PL_parser->bufend;
1317 if (UTF) {
1318 U8 head;
1319 I32 unichar;
1320 STRLEN len, retlen;
1321 if (s == bufend) {
1322 if (!lex_next_chunk(flags))
1323 return -1;
1324 s = PL_parser->bufptr;
1325 bufend = PL_parser->bufend;
1326 }
1327 head = (U8)*s;
1328 if (!(head & 0x80))
1329 return head;
1330 if (head & 0x40) {
1331 len = PL_utf8skip[head];
1332 while ((STRLEN)(bufend-s) < len) {
1333 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1334 break;
1335 s = PL_parser->bufptr;
1336 bufend = PL_parser->bufend;
1337 }
1338 }
1339 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1340 if (retlen == (STRLEN)-1) {
1341 /* malformed UTF-8 */
1342 ENTER;
1343 SAVESPTR(PL_warnhook);
1344 PL_warnhook = PERL_WARNHOOK_FATAL;
1345 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1346 LEAVE;
1347 }
1348 return unichar;
1349 } else {
1350 if (s == bufend) {
1351 if (!lex_next_chunk(flags))
1352 return -1;
1353 s = PL_parser->bufptr;
1354 }
1355 return (U8)*s;
1356 }
1357}
1358
1359/*
1360=for apidoc Amx|I32|lex_read_unichar|U32 flags
1361
1362Reads the next (Unicode) character in the text currently being lexed.
1363Returns the codepoint (unsigned integer value) of the character read,
1364and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1365if lexing has reached the end of the input text. To non-destructively
1366examine the next character, use L</lex_peek_unichar> instead.
1367
1368If the next character is in (or extends into) the next chunk of input
1369text, the next chunk will be read in. Normally the current chunk will be
1370discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1371then the current chunk will not be discarded.
1372
1373If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1374is encountered, an exception is generated.
1375
1376=cut
1377*/
1378
1379I32
1380Perl_lex_read_unichar(pTHX_ U32 flags)
1381{
1382 I32 c;
1383 if (flags & ~(LEX_KEEP_PREVIOUS))
1384 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1385 c = lex_peek_unichar(flags);
1386 if (c != -1) {
1387 if (c == '\n')
1388 CopLINE_inc(PL_curcop);
1389 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1390 }
1391 return c;
1392}
1393
1394/*
1395=for apidoc Amx|void|lex_read_space|U32 flags
1396
1397Reads optional spaces, in Perl style, in the text currently being
1398lexed. The spaces may include ordinary whitespace characters and
1399Perl-style comments. C<#line> directives are processed if encountered.
1400L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1401at a non-space character (or the end of the input text).
1402
1403If spaces extend into the next chunk of input text, the next chunk will
1404be read in. Normally the current chunk will be discarded at the same
1405time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1406chunk will not be discarded.
1407
1408=cut
1409*/
1410
f0998909
Z
1411#define LEX_NO_NEXT_CHUNK 0x80000000
1412
f0e67a1d
Z
1413void
1414Perl_lex_read_space(pTHX_ U32 flags)
1415{
1416 char *s, *bufend;
1417 bool need_incline = 0;
f0998909 1418 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1419 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1420#ifdef PERL_MAD
1421 if (PL_skipwhite) {
1422 sv_free(PL_skipwhite);
1423 PL_skipwhite = NULL;
1424 }
1425 if (PL_madskills)
1426 PL_skipwhite = newSVpvs("");
1427#endif /* PERL_MAD */
1428 s = PL_parser->bufptr;
1429 bufend = PL_parser->bufend;
1430 while (1) {
1431 char c = *s;
1432 if (c == '#') {
1433 do {
1434 c = *++s;
1435 } while (!(c == '\n' || (c == 0 && s == bufend)));
1436 } else if (c == '\n') {
1437 s++;
1438 PL_parser->linestart = s;
1439 if (s == bufend)
1440 need_incline = 1;
1441 else
1442 incline(s);
1443 } else if (isSPACE(c)) {
1444 s++;
1445 } else if (c == 0 && s == bufend) {
1446 bool got_more;
1447#ifdef PERL_MAD
1448 if (PL_madskills)
1449 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1450#endif /* PERL_MAD */
f0998909
Z
1451 if (flags & LEX_NO_NEXT_CHUNK)
1452 break;
f0e67a1d
Z
1453 PL_parser->bufptr = s;
1454 CopLINE_inc(PL_curcop);
1455 got_more = lex_next_chunk(flags);
1456 CopLINE_dec(PL_curcop);
1457 s = PL_parser->bufptr;
1458 bufend = PL_parser->bufend;
1459 if (!got_more)
1460 break;
1461 if (need_incline && PL_parser->rsfp) {
1462 incline(s);
1463 need_incline = 0;
1464 }
1465 } else {
1466 break;
1467 }
1468 }
1469#ifdef PERL_MAD
1470 if (PL_madskills)
1471 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1472#endif /* PERL_MAD */
1473 PL_parser->bufptr = s;
1474}
1475
1476/*
ffb4593c
NT
1477 * S_incline
1478 * This subroutine has nothing to do with tilting, whether at windmills
1479 * or pinball tables. Its name is short for "increment line". It
57843af0 1480 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1481 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1482 * # line 500 "foo.pm"
1483 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1484 */
1485
76e3520e 1486STATIC void
d9095cec 1487S_incline(pTHX_ const char *s)
463ee0b2 1488{
97aff369 1489 dVAR;
d9095cec
NC
1490 const char *t;
1491 const char *n;
1492 const char *e;
463ee0b2 1493
7918f24d
NC
1494 PERL_ARGS_ASSERT_INCLINE;
1495
57843af0 1496 CopLINE_inc(PL_curcop);
463ee0b2
LW
1497 if (*s++ != '#')
1498 return;
d4c19fe8
AL
1499 while (SPACE_OR_TAB(*s))
1500 s++;
73659bf1
GS
1501 if (strnEQ(s, "line", 4))
1502 s += 4;
1503 else
1504 return;
084592ab 1505 if (SPACE_OR_TAB(*s))
73659bf1 1506 s++;
4e553d73 1507 else
73659bf1 1508 return;
d4c19fe8
AL
1509 while (SPACE_OR_TAB(*s))
1510 s++;
463ee0b2
LW
1511 if (!isDIGIT(*s))
1512 return;
d4c19fe8 1513
463ee0b2
LW
1514 n = s;
1515 while (isDIGIT(*s))
1516 s++;
07714eb4 1517 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1518 return;
bf4acbe4 1519 while (SPACE_OR_TAB(*s))
463ee0b2 1520 s++;
73659bf1 1521 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1522 s++;
73659bf1
GS
1523 e = t + 1;
1524 }
463ee0b2 1525 else {
c35e046a
AL
1526 t = s;
1527 while (!isSPACE(*t))
1528 t++;
73659bf1 1529 e = t;
463ee0b2 1530 }
bf4acbe4 1531 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1532 e++;
1533 if (*e != '\n' && *e != '\0')
1534 return; /* false alarm */
1535
f4dd75d9 1536 if (t - s > 0) {
d9095cec 1537 const STRLEN len = t - s;
8a5ee598 1538#ifndef USE_ITHREADS
19bad673
NC
1539 SV *const temp_sv = CopFILESV(PL_curcop);
1540 const char *cf;
1541 STRLEN tmplen;
1542
1543 if (temp_sv) {
1544 cf = SvPVX(temp_sv);
1545 tmplen = SvCUR(temp_sv);
1546 } else {
1547 cf = NULL;
1548 tmplen = 0;
1549 }
1550
42d9b98d 1551 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1552 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1553 * to *{"::_<newfilename"} */
44867030
NC
1554 /* However, the long form of evals is only turned on by the
1555 debugger - usually they're "(eval %lu)" */
1556 char smallbuf[128];
1557 char *tmpbuf;
1558 GV **gvp;
d9095cec 1559 STRLEN tmplen2 = len;
798b63bc 1560 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1561 tmpbuf = smallbuf;
1562 else
2ae0db35 1563 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1564 tmpbuf[0] = '_';
1565 tmpbuf[1] = '<';
2ae0db35 1566 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1567 tmplen += 2;
8a5ee598
RGS
1568 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1569 if (gvp) {
44867030
NC
1570 char *tmpbuf2;
1571 GV *gv2;
1572
1573 if (tmplen2 + 2 <= sizeof smallbuf)
1574 tmpbuf2 = smallbuf;
1575 else
1576 Newx(tmpbuf2, tmplen2 + 2, char);
1577
1578 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1579 /* Either they malloc'd it, or we malloc'd it,
1580 so no prefix is present in ours. */
1581 tmpbuf2[0] = '_';
1582 tmpbuf2[1] = '<';
1583 }
1584
1585 memcpy(tmpbuf2 + 2, s, tmplen2);
1586 tmplen2 += 2;
1587
8a5ee598 1588 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1589 if (!isGV(gv2)) {
8a5ee598 1590 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1591 /* adjust ${"::_<newfilename"} to store the new file name */
1592 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1593 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1594 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1595 }
44867030
NC
1596
1597 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1598 }
e66cf94c 1599 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1600 }
8a5ee598 1601#endif
05ec9bb3 1602 CopFILE_free(PL_curcop);
d9095cec 1603 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1604 }
57843af0 1605 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1606}
1607
29595ff2 1608#ifdef PERL_MAD
cd81e915 1609/* skip space before PL_thistoken */
29595ff2
NC
1610
1611STATIC char *
1612S_skipspace0(pTHX_ register char *s)
1613{
7918f24d
NC
1614 PERL_ARGS_ASSERT_SKIPSPACE0;
1615
29595ff2
NC
1616 s = skipspace(s);
1617 if (!PL_madskills)
1618 return s;
cd81e915
NC
1619 if (PL_skipwhite) {
1620 if (!PL_thiswhite)
6b29d1f5 1621 PL_thiswhite = newSVpvs("");
cd81e915
NC
1622 sv_catsv(PL_thiswhite, PL_skipwhite);
1623 sv_free(PL_skipwhite);
1624 PL_skipwhite = 0;
1625 }
1626 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1627 return s;
1628}
1629
cd81e915 1630/* skip space after PL_thistoken */
29595ff2
NC
1631
1632STATIC char *
1633S_skipspace1(pTHX_ register char *s)
1634{
d4c19fe8 1635 const char *start = s;
29595ff2
NC
1636 I32 startoff = start - SvPVX(PL_linestr);
1637
7918f24d
NC
1638 PERL_ARGS_ASSERT_SKIPSPACE1;
1639
29595ff2
NC
1640 s = skipspace(s);
1641 if (!PL_madskills)
1642 return s;
1643 start = SvPVX(PL_linestr) + startoff;
cd81e915 1644 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1645 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1646 PL_thistoken = newSVpvn(tstart, start - tstart);
1647 }
1648 PL_realtokenstart = -1;
1649 if (PL_skipwhite) {
1650 if (!PL_nextwhite)
6b29d1f5 1651 PL_nextwhite = newSVpvs("");
cd81e915
NC
1652 sv_catsv(PL_nextwhite, PL_skipwhite);
1653 sv_free(PL_skipwhite);
1654 PL_skipwhite = 0;
29595ff2
NC
1655 }
1656 return s;
1657}
1658
1659STATIC char *
1660S_skipspace2(pTHX_ register char *s, SV **svp)
1661{
c35e046a
AL
1662 char *start;
1663 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1664 const I32 startoff = s - SvPVX(PL_linestr);
1665
7918f24d
NC
1666 PERL_ARGS_ASSERT_SKIPSPACE2;
1667
29595ff2
NC
1668 s = skipspace(s);
1669 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1670 if (!PL_madskills || !svp)
1671 return s;
1672 start = SvPVX(PL_linestr) + startoff;
cd81e915 1673 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1674 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1675 PL_thistoken = newSVpvn(tstart, start - tstart);
1676 PL_realtokenstart = -1;
29595ff2 1677 }
cd81e915 1678 if (PL_skipwhite) {
29595ff2 1679 if (!*svp)
6b29d1f5 1680 *svp = newSVpvs("");
cd81e915
NC
1681 sv_setsv(*svp, PL_skipwhite);
1682 sv_free(PL_skipwhite);
1683 PL_skipwhite = 0;
29595ff2
NC
1684 }
1685
1686 return s;
1687}
1688#endif
1689
80a702cd 1690STATIC void
15f169a1 1691S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1692{
1693 AV *av = CopFILEAVx(PL_curcop);
1694 if (av) {
b9f83d2f 1695 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1696 if (orig_sv)
1697 sv_setsv(sv, orig_sv);
1698 else
1699 sv_setpvn(sv, buf, len);
80a702cd
RGS
1700 (void)SvIOK_on(sv);
1701 SvIV_set(sv, 0);
1702 av_store(av, (I32)CopLINE(PL_curcop), sv);
1703 }
1704}
1705
ffb4593c
NT
1706/*
1707 * S_skipspace
1708 * Called to gobble the appropriate amount and type of whitespace.
1709 * Skips comments as well.
1710 */
1711
76e3520e 1712STATIC char *
cea2e8a9 1713S_skipspace(pTHX_ register char *s)
a687059c 1714{
5db06880 1715#ifdef PERL_MAD
f0e67a1d
Z
1716 char *start = s;
1717#endif /* PERL_MAD */
7918f24d 1718 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1719#ifdef PERL_MAD
cd81e915
NC
1720 if (PL_skipwhite) {
1721 sv_free(PL_skipwhite);
f0e67a1d 1722 PL_skipwhite = NULL;
5db06880 1723 }
f0e67a1d 1724#endif /* PERL_MAD */
3280af22 1725 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1726 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1727 s++;
f0e67a1d
Z
1728 } else {
1729 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1730 PL_bufptr = s;
f0998909
Z
1731 lex_read_space(LEX_KEEP_PREVIOUS |
1732 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1733 LEX_NO_NEXT_CHUNK : 0));
3280af22 1734 s = PL_bufptr;
f0e67a1d
Z
1735 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1736 if (PL_linestart > PL_bufptr)
1737 PL_bufptr = PL_linestart;
1738 return s;
463ee0b2 1739 }
5db06880 1740#ifdef PERL_MAD
f0e67a1d
Z
1741 if (PL_madskills)
1742 PL_skipwhite = newSVpvn(start, s-start);
1743#endif /* PERL_MAD */
5db06880 1744 return s;
a687059c 1745}
378cc40b 1746
ffb4593c
NT
1747/*
1748 * S_check_uni
1749 * Check the unary operators to ensure there's no ambiguity in how they're
1750 * used. An ambiguous piece of code would be:
1751 * rand + 5
1752 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1753 * the +5 is its argument.
1754 */
1755
76e3520e 1756STATIC void
cea2e8a9 1757S_check_uni(pTHX)
ba106d47 1758{
97aff369 1759 dVAR;
d4c19fe8
AL
1760 const char *s;
1761 const char *t;
2f3197b3 1762
3280af22 1763 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1764 return;
3280af22
NIS
1765 while (isSPACE(*PL_last_uni))
1766 PL_last_uni++;
c35e046a
AL
1767 s = PL_last_uni;
1768 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1769 s++;
3280af22 1770 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1771 return;
6136c704 1772
9b387841
NC
1773 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1774 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1775 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1776}
1777
ffb4593c
NT
1778/*
1779 * LOP : macro to build a list operator. Its behaviour has been replaced
1780 * with a subroutine, S_lop() for which LOP is just another name.
1781 */
1782
a0d0e21e
LW
1783#define LOP(f,x) return lop(f,x,s)
1784
ffb4593c
NT
1785/*
1786 * S_lop
1787 * Build a list operator (or something that might be one). The rules:
1788 * - if we have a next token, then it's a list operator [why?]
1789 * - if the next thing is an opening paren, then it's a function
1790 * - else it's a list operator
1791 */
1792
76e3520e 1793STATIC I32
a0be28da 1794S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1795{
97aff369 1796 dVAR;
7918f24d
NC
1797
1798 PERL_ARGS_ASSERT_LOP;
1799
6154021b 1800 pl_yylval.ival = f;
35c8bce7 1801 CLINE;
3280af22
NIS
1802 PL_expect = x;
1803 PL_bufptr = s;
1804 PL_last_lop = PL_oldbufptr;
eb160463 1805 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1806#ifdef PERL_MAD
1807 if (PL_lasttoke)
1808 return REPORT(LSTOP);
1809#else
3280af22 1810 if (PL_nexttoke)
bbf60fe6 1811 return REPORT(LSTOP);
5db06880 1812#endif
79072805 1813 if (*s == '(')
bbf60fe6 1814 return REPORT(FUNC);
29595ff2 1815 s = PEEKSPACE(s);
79072805 1816 if (*s == '(')
bbf60fe6 1817 return REPORT(FUNC);
79072805 1818 else
bbf60fe6 1819 return REPORT(LSTOP);
79072805
LW
1820}
1821
5db06880
NC
1822#ifdef PERL_MAD
1823 /*
1824 * S_start_force
1825 * Sets up for an eventual force_next(). start_force(0) basically does
1826 * an unshift, while start_force(-1) does a push. yylex removes items
1827 * on the "pop" end.
1828 */
1829
1830STATIC void
1831S_start_force(pTHX_ int where)
1832{
1833 int i;
1834
cd81e915 1835 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1836 where = PL_lasttoke;
cd81e915
NC
1837 assert(PL_curforce < 0 || PL_curforce == where);
1838 if (PL_curforce != where) {
5db06880
NC
1839 for (i = PL_lasttoke; i > where; --i) {
1840 PL_nexttoke[i] = PL_nexttoke[i-1];
1841 }
1842 PL_lasttoke++;
1843 }
cd81e915 1844 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1845 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1846 PL_curforce = where;
1847 if (PL_nextwhite) {
5db06880 1848 if (PL_madskills)
6b29d1f5 1849 curmad('^', newSVpvs(""));
cd81e915 1850 CURMAD('_', PL_nextwhite);
5db06880
NC
1851 }
1852}
1853
1854STATIC void
1855S_curmad(pTHX_ char slot, SV *sv)
1856{
1857 MADPROP **where;
1858
1859 if (!sv)
1860 return;
cd81e915
NC
1861 if (PL_curforce < 0)
1862 where = &PL_thismad;
5db06880 1863 else
cd81e915 1864 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1865
cd81e915 1866 if (PL_faketokens)
76f68e9b 1867 sv_setpvs(sv, "");
5db06880
NC
1868 else {
1869 if (!IN_BYTES) {
1870 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1871 SvUTF8_on(sv);
1872 else if (PL_encoding) {
1873 sv_recode_to_utf8(sv, PL_encoding);
1874 }
1875 }
1876 }
1877
1878 /* keep a slot open for the head of the list? */
1879 if (slot != '_' && *where && (*where)->mad_key == '^') {
1880 (*where)->mad_key = slot;
daba3364 1881 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1882 (*where)->mad_val = (void*)sv;
1883 }
1884 else
1885 addmad(newMADsv(slot, sv), where, 0);
1886}
1887#else
b3f24c00
MHM
1888# define start_force(where) NOOP
1889# define curmad(slot, sv) NOOP
5db06880
NC
1890#endif
1891
ffb4593c
NT
1892/*
1893 * S_force_next
9cbb5ea2 1894 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1895 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1896 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1897 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1898 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1899 */
1900
4e553d73 1901STATIC void
cea2e8a9 1902S_force_next(pTHX_ I32 type)
79072805 1903{
97aff369 1904 dVAR;
704d4215
GG
1905#ifdef DEBUGGING
1906 if (DEBUG_T_TEST) {
1907 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1908 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1909 }
1910#endif
5db06880 1911#ifdef PERL_MAD
cd81e915 1912 if (PL_curforce < 0)
5db06880 1913 start_force(PL_lasttoke);
cd81e915 1914 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1915 if (PL_lex_state != LEX_KNOWNEXT)
1916 PL_lex_defer = PL_lex_state;
1917 PL_lex_state = LEX_KNOWNEXT;
1918 PL_lex_expect = PL_expect;
cd81e915 1919 PL_curforce = -1;
5db06880 1920#else
3280af22
NIS
1921 PL_nexttype[PL_nexttoke] = type;
1922 PL_nexttoke++;
1923 if (PL_lex_state != LEX_KNOWNEXT) {
1924 PL_lex_defer = PL_lex_state;
1925 PL_lex_expect = PL_expect;
1926 PL_lex_state = LEX_KNOWNEXT;
79072805 1927 }
5db06880 1928#endif
79072805
LW
1929}
1930
d0a148a6 1931STATIC SV *
15f169a1 1932S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1933{
97aff369 1934 dVAR;
740cce10 1935 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1936 !IN_BYTES
1937 && UTF
1938 && !is_ascii_string((const U8*)start, len)
740cce10 1939 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1940 return sv;
1941}
1942
ffb4593c
NT
1943/*
1944 * S_force_word
1945 * When the lexer knows the next thing is a word (for instance, it has
1946 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1947 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1948 * lookahead.
ffb4593c
NT
1949 *
1950 * Arguments:
b1b65b59 1951 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1952 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1953 * int check_keyword : if true, Perl checks to make sure the word isn't
1954 * a keyword (do this if the word is a label, e.g. goto FOO)
1955 * int allow_pack : if true, : characters will also be allowed (require,
1956 * use, etc. do this)
9cbb5ea2 1957 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1958 */
1959
76e3520e 1960STATIC char *
cea2e8a9 1961S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1962{
97aff369 1963 dVAR;
463ee0b2
LW
1964 register char *s;
1965 STRLEN len;
4e553d73 1966
7918f24d
NC
1967 PERL_ARGS_ASSERT_FORCE_WORD;
1968
29595ff2 1969 start = SKIPSPACE1(start);
463ee0b2 1970 s = start;
7e2040f0 1971 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1972 (allow_pack && *s == ':') ||
15f0808c 1973 (allow_initial_tick && *s == '\'') )
a0d0e21e 1974 {
3280af22 1975 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1976 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1977 return start;
cd81e915 1978 start_force(PL_curforce);
5db06880
NC
1979 if (PL_madskills)
1980 curmad('X', newSVpvn(start,s-start));
463ee0b2 1981 if (token == METHOD) {
29595ff2 1982 s = SKIPSPACE1(s);
463ee0b2 1983 if (*s == '(')
3280af22 1984 PL_expect = XTERM;
463ee0b2 1985 else {
3280af22 1986 PL_expect = XOPERATOR;
463ee0b2 1987 }
79072805 1988 }
e74e6b3d 1989 if (PL_madskills)
63575281 1990 curmad('g', newSVpvs( "forced" ));
9ded7720 1991 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1992 = (OP*)newSVOP(OP_CONST,0,
1993 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1994 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1995 force_next(token);
1996 }
1997 return s;
1998}
1999
ffb4593c
NT
2000/*
2001 * S_force_ident
9cbb5ea2 2002 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2003 * text only contains the "foo" portion. The first argument is a pointer
2004 * to the "foo", and the second argument is the type symbol to prefix.
2005 * Forces the next token to be a "WORD".
9cbb5ea2 2006 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2007 */
2008
76e3520e 2009STATIC void
bfed75c6 2010S_force_ident(pTHX_ register const char *s, int kind)
79072805 2011{
97aff369 2012 dVAR;
7918f24d
NC
2013
2014 PERL_ARGS_ASSERT_FORCE_IDENT;
2015
c35e046a 2016 if (*s) {
90e5519e
NC
2017 const STRLEN len = strlen(s);
2018 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2019 start_force(PL_curforce);
9ded7720 2020 NEXTVAL_NEXTTOKE.opval = o;
79072805 2021 force_next(WORD);
748a9306 2022 if (kind) {
11343788 2023 o->op_private = OPpCONST_ENTERED;
55497cff 2024 /* XXX see note in pp_entereval() for why we forgo typo
2025 warnings if the symbol must be introduced in an eval.
2026 GSAR 96-10-12 */
90e5519e
NC
2027 gv_fetchpvn_flags(s, len,
2028 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2029 : GV_ADD,
2030 kind == '$' ? SVt_PV :
2031 kind == '@' ? SVt_PVAV :
2032 kind == '%' ? SVt_PVHV :
a0d0e21e 2033 SVt_PVGV
90e5519e 2034 );
748a9306 2035 }
79072805
LW
2036 }
2037}
2038
1571675a
GS
2039NV
2040Perl_str_to_version(pTHX_ SV *sv)
2041{
2042 NV retval = 0.0;
2043 NV nshift = 1.0;
2044 STRLEN len;
cfd0369c 2045 const char *start = SvPV_const(sv,len);
9d4ba2ae 2046 const char * const end = start + len;
504618e9 2047 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2048
2049 PERL_ARGS_ASSERT_STR_TO_VERSION;
2050
1571675a 2051 while (start < end) {
ba210ebe 2052 STRLEN skip;
1571675a
GS
2053 UV n;
2054 if (utf)
9041c2e3 2055 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2056 else {
2057 n = *(U8*)start;
2058 skip = 1;
2059 }
2060 retval += ((NV)n)/nshift;
2061 start += skip;
2062 nshift *= 1000;
2063 }
2064 return retval;
2065}
2066
4e553d73 2067/*
ffb4593c
NT
2068 * S_force_version
2069 * Forces the next token to be a version number.
e759cc13
RGS
2070 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2071 * and if "guessing" is TRUE, then no new token is created (and the caller
2072 * must use an alternative parsing method).
ffb4593c
NT
2073 */
2074
76e3520e 2075STATIC char *
e759cc13 2076S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2077{
97aff369 2078 dVAR;
5f66b61c 2079 OP *version = NULL;
44dcb63b 2080 char *d;
5db06880
NC
2081#ifdef PERL_MAD
2082 I32 startoff = s - SvPVX(PL_linestr);
2083#endif
89bfa8cd 2084
7918f24d
NC
2085 PERL_ARGS_ASSERT_FORCE_VERSION;
2086
29595ff2 2087 s = SKIPSPACE1(s);
89bfa8cd 2088
44dcb63b 2089 d = s;
dd629d5b 2090 if (*d == 'v')
44dcb63b 2091 d++;
44dcb63b 2092 if (isDIGIT(*d)) {
e759cc13
RGS
2093 while (isDIGIT(*d) || *d == '_' || *d == '.')
2094 d++;
5db06880
NC
2095#ifdef PERL_MAD
2096 if (PL_madskills) {
cd81e915 2097 start_force(PL_curforce);
5db06880
NC
2098 curmad('X', newSVpvn(s,d-s));
2099 }
2100#endif
4e4da3ac 2101 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2102 SV *ver;
8d08d9ba
DG
2103#ifdef USE_LOCALE_NUMERIC
2104 char *loc = setlocale(LC_NUMERIC, "C");
2105#endif
6154021b 2106 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2107#ifdef USE_LOCALE_NUMERIC
2108 setlocale(LC_NUMERIC, loc);
2109#endif
6154021b 2110 version = pl_yylval.opval;
dd629d5b
GS
2111 ver = cSVOPx(version)->op_sv;
2112 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2113 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2114 SvNV_set(ver, str_to_version(ver));
1571675a 2115 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2116 }
89bfa8cd 2117 }
5db06880
NC
2118 else if (guessing) {
2119#ifdef PERL_MAD
2120 if (PL_madskills) {
cd81e915
NC
2121 sv_free(PL_nextwhite); /* let next token collect whitespace */
2122 PL_nextwhite = 0;
5db06880
NC
2123 s = SvPVX(PL_linestr) + startoff;
2124 }
2125#endif
e759cc13 2126 return s;
5db06880 2127 }
89bfa8cd 2128 }
2129
5db06880
NC
2130#ifdef PERL_MAD
2131 if (PL_madskills && !version) {
cd81e915
NC
2132 sv_free(PL_nextwhite); /* let next token collect whitespace */
2133 PL_nextwhite = 0;
5db06880
NC
2134 s = SvPVX(PL_linestr) + startoff;
2135 }
2136#endif
89bfa8cd 2137 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2138 start_force(PL_curforce);
9ded7720 2139 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2140 force_next(WORD);
89bfa8cd 2141
e759cc13 2142 return s;
89bfa8cd 2143}
2144
ffb4593c 2145/*
91152fc1
DG
2146 * S_force_strict_version
2147 * Forces the next token to be a version number using strict syntax rules.
2148 */
2149
2150STATIC char *
2151S_force_strict_version(pTHX_ char *s)
2152{
2153 dVAR;
2154 OP *version = NULL;
2155#ifdef PERL_MAD
2156 I32 startoff = s - SvPVX(PL_linestr);
2157#endif
2158 const char *errstr = NULL;
2159
2160 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2161
2162 while (isSPACE(*s)) /* leading whitespace */
2163 s++;
2164
2165 if (is_STRICT_VERSION(s,&errstr)) {
2166 SV *ver = newSV(0);
2167 s = (char *)scan_version(s, ver, 0);
2168 version = newSVOP(OP_CONST, 0, ver);
2169 }
4e4da3ac
Z
2170 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2171 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2172 {
91152fc1
DG
2173 PL_bufptr = s;
2174 if (errstr)
2175 yyerror(errstr); /* version required */
2176 return s;
2177 }
2178
2179#ifdef PERL_MAD
2180 if (PL_madskills && !version) {
2181 sv_free(PL_nextwhite); /* let next token collect whitespace */
2182 PL_nextwhite = 0;
2183 s = SvPVX(PL_linestr) + startoff;
2184 }
2185#endif
2186 /* NOTE: The parser sees the package name and the VERSION swapped */
2187 start_force(PL_curforce);
2188 NEXTVAL_NEXTTOKE.opval = version;
2189 force_next(WORD);
2190
2191 return s;
2192}
2193
2194/*
ffb4593c
NT
2195 * S_tokeq
2196 * Tokenize a quoted string passed in as an SV. It finds the next
2197 * chunk, up to end of string or a backslash. It may make a new
2198 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2199 * turns \\ into \.
2200 */
2201
76e3520e 2202STATIC SV *
cea2e8a9 2203S_tokeq(pTHX_ SV *sv)
79072805 2204{
97aff369 2205 dVAR;
79072805
LW
2206 register char *s;
2207 register char *send;
2208 register char *d;
b3ac6de7
IZ
2209 STRLEN len = 0;
2210 SV *pv = sv;
79072805 2211
7918f24d
NC
2212 PERL_ARGS_ASSERT_TOKEQ;
2213
79072805 2214 if (!SvLEN(sv))
b3ac6de7 2215 goto finish;
79072805 2216
a0d0e21e 2217 s = SvPV_force(sv, len);
21a311ee 2218 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2219 goto finish;
463ee0b2 2220 send = s + len;
79072805
LW
2221 while (s < send && *s != '\\')
2222 s++;
2223 if (s == send)
b3ac6de7 2224 goto finish;
79072805 2225 d = s;
be4731d2 2226 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2227 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2228 }
79072805
LW
2229 while (s < send) {
2230 if (*s == '\\') {
a0d0e21e 2231 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2232 s++; /* all that, just for this */
2233 }
2234 *d++ = *s++;
2235 }
2236 *d = '\0';
95a20fc0 2237 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2238 finish:
3280af22 2239 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2240 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2241 return sv;
2242}
2243
ffb4593c
NT
2244/*
2245 * Now come three functions related to double-quote context,
2246 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2247 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2248 * interact with PL_lex_state, and create fake ( ... ) argument lists
2249 * to handle functions and concatenation.
2250 * They assume that whoever calls them will be setting up a fake
2251 * join call, because each subthing puts a ',' after it. This lets
2252 * "lower \luPpEr"
2253 * become
2254 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2255 *
2256 * (I'm not sure whether the spurious commas at the end of lcfirst's
2257 * arguments and join's arguments are created or not).
2258 */
2259
2260/*
2261 * S_sublex_start
6154021b 2262 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2263 *
2264 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2265 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2266 *
2267 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2268 *
2269 * Everything else becomes a FUNC.
2270 *
2271 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2272 * had an OP_CONST or OP_READLINE). This just sets us up for a
2273 * call to S_sublex_push().
2274 */
2275
76e3520e 2276STATIC I32
cea2e8a9 2277S_sublex_start(pTHX)
79072805 2278{
97aff369 2279 dVAR;
6154021b 2280 register const I32 op_type = pl_yylval.ival;
79072805
LW
2281
2282 if (op_type == OP_NULL) {
6154021b 2283 pl_yylval.opval = PL_lex_op;
5f66b61c 2284 PL_lex_op = NULL;
79072805
LW
2285 return THING;
2286 }
2287 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2288 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2289
2290 if (SvTYPE(sv) == SVt_PVIV) {
2291 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2292 STRLEN len;
96a5add6 2293 const char * const p = SvPV_const(sv, len);
740cce10 2294 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2295 SvREFCNT_dec(sv);
2296 sv = nsv;
4e553d73 2297 }
6154021b 2298 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2299 PL_lex_stuff = NULL;
6f33ba73
RGS
2300 /* Allow <FH> // "foo" */
2301 if (op_type == OP_READLINE)
2302 PL_expect = XTERMORDORDOR;
79072805
LW
2303 return THING;
2304 }
e3f73d4e
RGS
2305 else if (op_type == OP_BACKTICK && PL_lex_op) {
2306 /* readpipe() vas overriden */
2307 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2308 pl_yylval.opval = PL_lex_op;
9b201d7d 2309 PL_lex_op = NULL;
e3f73d4e
RGS
2310 PL_lex_stuff = NULL;
2311 return THING;
2312 }
79072805 2313
3280af22 2314 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2315 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2316 PL_sublex_info.sub_op = PL_lex_op;
2317 PL_lex_state = LEX_INTERPPUSH;
55497cff 2318
3280af22
NIS
2319 PL_expect = XTERM;
2320 if (PL_lex_op) {
6154021b 2321 pl_yylval.opval = PL_lex_op;
5f66b61c 2322 PL_lex_op = NULL;
55497cff 2323 return PMFUNC;
2324 }
2325 else
2326 return FUNC;
2327}
2328
ffb4593c
NT
2329/*
2330 * S_sublex_push
2331 * Create a new scope to save the lexing state. The scope will be
2332 * ended in S_sublex_done. Returns a '(', starting the function arguments
2333 * to the uc, lc, etc. found before.
2334 * Sets PL_lex_state to LEX_INTERPCONCAT.
2335 */
2336
76e3520e 2337STATIC I32
cea2e8a9 2338S_sublex_push(pTHX)
55497cff 2339{
27da23d5 2340 dVAR;
f46d017c 2341 ENTER;
55497cff 2342
3280af22 2343 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2344 SAVEBOOL(PL_lex_dojoin);
3280af22 2345 SAVEI32(PL_lex_brackets);
3280af22
NIS
2346 SAVEI32(PL_lex_casemods);
2347 SAVEI32(PL_lex_starts);
651b5b28 2348 SAVEI8(PL_lex_state);
7766f137 2349 SAVEVPTR(PL_lex_inpat);
98246f1e 2350 SAVEI16(PL_lex_inwhat);
57843af0 2351 SAVECOPLINE(PL_curcop);
3280af22 2352 SAVEPPTR(PL_bufptr);
8452ff4b 2353 SAVEPPTR(PL_bufend);
3280af22
NIS
2354 SAVEPPTR(PL_oldbufptr);
2355 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2356 SAVEPPTR(PL_last_lop);
2357 SAVEPPTR(PL_last_uni);
3280af22
NIS
2358 SAVEPPTR(PL_linestart);
2359 SAVESPTR(PL_linestr);
8edd5f42
RGS
2360 SAVEGENERICPV(PL_lex_brackstack);
2361 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2362
2363 PL_linestr = PL_lex_stuff;
a0714e2c 2364 PL_lex_stuff = NULL;
3280af22 2365
9cbb5ea2
GS
2366 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2367 = SvPVX(PL_linestr);
3280af22 2368 PL_bufend += SvCUR(PL_linestr);
bd61b366 2369 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2370 SAVEFREESV(PL_linestr);
2371
2372 PL_lex_dojoin = FALSE;
2373 PL_lex_brackets = 0;
a02a5408
JC
2374 Newx(PL_lex_brackstack, 120, char);
2375 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2376 PL_lex_casemods = 0;
2377 *PL_lex_casestack = '\0';
2378 PL_lex_starts = 0;
2379 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2380 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2381
2382 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2383 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2384 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2385 else
5f66b61c 2386 PL_lex_inpat = NULL;
79072805 2387
55497cff 2388 return '(';
79072805
LW
2389}
2390
ffb4593c
NT
2391/*
2392 * S_sublex_done
2393 * Restores lexer state after a S_sublex_push.
2394 */
2395
76e3520e 2396STATIC I32
cea2e8a9 2397S_sublex_done(pTHX)
79072805 2398{
27da23d5 2399 dVAR;
3280af22 2400 if (!PL_lex_starts++) {
396482e1 2401 SV * const sv = newSVpvs("");
9aa983d2
JH
2402 if (SvUTF8(PL_linestr))
2403 SvUTF8_on(sv);
3280af22 2404 PL_expect = XOPERATOR;
6154021b 2405 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2406 return THING;
2407 }
2408
3280af22
NIS
2409 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2410 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2411 return yylex();
79072805
LW
2412 }
2413
ffb4593c 2414 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2415 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2416 PL_linestr = PL_lex_repl;
2417 PL_lex_inpat = 0;
2418 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2419 PL_bufend += SvCUR(PL_linestr);
bd61b366 2420 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2421 SAVEFREESV(PL_linestr);
2422 PL_lex_dojoin = FALSE;
2423 PL_lex_brackets = 0;
3280af22
NIS
2424 PL_lex_casemods = 0;
2425 *PL_lex_casestack = '\0';
2426 PL_lex_starts = 0;
25da4f38 2427 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2428 PL_lex_state = LEX_INTERPNORMAL;
2429 PL_lex_starts++;
e9fa98b2
HS
2430 /* we don't clear PL_lex_repl here, so that we can check later
2431 whether this is an evalled subst; that means we rely on the
2432 logic to ensure sublex_done() is called again only via the
2433 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2434 }
e9fa98b2 2435 else {
3280af22 2436 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2437 PL_lex_repl = NULL;
e9fa98b2 2438 }
79072805 2439 return ',';
ffed7fef
LW
2440 }
2441 else {
5db06880
NC
2442#ifdef PERL_MAD
2443 if (PL_madskills) {
cd81e915
NC
2444 if (PL_thiswhite) {
2445 if (!PL_endwhite)
6b29d1f5 2446 PL_endwhite = newSVpvs("");
cd81e915
NC
2447 sv_catsv(PL_endwhite, PL_thiswhite);
2448 PL_thiswhite = 0;
2449 }
2450 if (PL_thistoken)
76f68e9b 2451 sv_setpvs(PL_thistoken,"");
5db06880 2452 else
cd81e915 2453 PL_realtokenstart = -1;
5db06880
NC
2454 }
2455#endif
f46d017c 2456 LEAVE;
3280af22
NIS
2457 PL_bufend = SvPVX(PL_linestr);
2458 PL_bufend += SvCUR(PL_linestr);
2459 PL_expect = XOPERATOR;
09bef843 2460 PL_sublex_info.sub_inwhat = 0;
79072805 2461 return ')';
ffed7fef
LW
2462 }
2463}
2464
02aa26ce
NT
2465/*
2466 scan_const
2467
2468 Extracts a pattern, double-quoted string, or transliteration. This
2469 is terrifying code.
2470
94def140 2471 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2472 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2473 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2474
94def140
TS
2475 Returns a pointer to the character scanned up to. If this is
2476 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2477 successfully parsed), will leave an OP for the substring scanned
6154021b 2478 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2479 by looking at the next characters herself.
2480
02aa26ce
NT
2481 In patterns:
2482 backslashes:
ff3f963a 2483 constants: \N{NAME} only
02aa26ce
NT
2484 case and quoting: \U \Q \E
2485 stops on @ and $, but not for $ as tail anchor
2486
2487 In transliterations:
2488 characters are VERY literal, except for - not at the start or end
94def140
TS
2489 of the string, which indicates a range. If the range is in bytes,
2490 scan_const expands the range to the full set of intermediate
2491 characters. If the range is in utf8, the hyphen is replaced with
2492 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2493
2494 In double-quoted strings:
2495 backslashes:
2496 double-quoted style: \r and \n
ff3f963a 2497 constants: \x31, etc.
94def140 2498 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2499 case and quoting: \U \Q \E
2500 stops on @ and $
2501
2502 scan_const does *not* construct ops to handle interpolated strings.
2503 It stops processing as soon as it finds an embedded $ or @ variable
2504 and leaves it to the caller to work out what's going on.
2505
94def140
TS
2506 embedded arrays (whether in pattern or not) could be:
2507 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2508
2509 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2510
2511 $ in pattern could be $foo or could be tail anchor. Assumption:
2512 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2513 followed by one of "()| \r\n\t"
02aa26ce
NT
2514
2515 \1 (backreferences) are turned into $1
2516
2517 The structure of the code is
2518 while (there's a character to process) {
94def140
TS
2519 handle transliteration ranges
2520 skip regexp comments /(?#comment)/ and codes /(?{code})/
2521 skip #-initiated comments in //x patterns
2522 check for embedded arrays
02aa26ce
NT
2523 check for embedded scalars
2524 if (backslash) {
94def140 2525 deprecate \1 in substitution replacements
02aa26ce
NT
2526 handle string-changing backslashes \l \U \Q \E, etc.
2527 switch (what was escaped) {
94def140 2528 handle \- in a transliteration (becomes a literal -)
ff3f963a 2529 if a pattern and not \N{, go treat as regular character
94def140
TS
2530 handle \132 (octal characters)
2531 handle \x15 and \x{1234} (hex characters)
ff3f963a 2532 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2533 handle \cV (control characters)
2534 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2535 } (end switch)
77a135fe 2536 continue
02aa26ce 2537 } (end if backslash)
77a135fe 2538 handle regular character
02aa26ce 2539 } (end while character to read)
4e553d73 2540
02aa26ce
NT
2541*/
2542
76e3520e 2543STATIC char *
cea2e8a9 2544S_scan_const(pTHX_ char *start)
79072805 2545{
97aff369 2546 dVAR;
3280af22 2547 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2548 SV *sv = newSV(send - start); /* sv for the constant. See
2549 note below on sizing. */
02aa26ce
NT
2550 register char *s = start; /* start of the constant */
2551 register char *d = SvPVX(sv); /* destination for copies */
2552 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2553 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2554 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2555 I32 this_utf8 = UTF; /* Is the source string assumed
2556 to be UTF8? But, this can
2557 show as true when the source
2558 isn't utf8, as for example
2559 when it is entirely composed
2560 of hex constants */
2561
2562 /* Note on sizing: The scanned constant is placed into sv, which is
2563 * initialized by newSV() assuming one byte of output for every byte of
2564 * input. This routine expects newSV() to allocate an extra byte for a
2565 * trailing NUL, which this routine will append if it gets to the end of
2566 * the input. There may be more bytes of input than output (eg., \N{LATIN
2567 * CAPITAL LETTER A}), or more output than input if the constant ends up
2568 * recoded to utf8, but each time a construct is found that might increase
2569 * the needed size, SvGROW() is called. Its size parameter each time is
2570 * based on the best guess estimate at the time, namely the length used so
2571 * far, plus the length the current construct will occupy, plus room for
2572 * the trailing NUL, plus one byte for every input byte still unscanned */
2573
012bcf8d 2574 UV uv;
4c3a8340
TS
2575#ifdef EBCDIC
2576 UV literal_endpoint = 0;
e294cc5d 2577 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2578#endif
012bcf8d 2579
7918f24d
NC
2580 PERL_ARGS_ASSERT_SCAN_CONST;
2581
2b9d42f0
NIS
2582 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2583 /* If we are doing a trans and we know we want UTF8 set expectation */
2584 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2585 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2586 }
2587
2588
79072805 2589 while (s < send || dorange) {
ff3f963a 2590
02aa26ce 2591 /* get transliterations out of the way (they're most literal) */
3280af22 2592 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2593 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2594 if (dorange) {
1ba5c669
JH
2595 I32 i; /* current expanded character */
2596 I32 min; /* first character in range */
2597 I32 max; /* last character in range */
02aa26ce 2598
e294cc5d
JH
2599#ifdef EBCDIC
2600 UV uvmax = 0;
2601#endif
2602
2603 if (has_utf8
2604#ifdef EBCDIC
2605 && !native_range
2606#endif
2607 ) {
9d4ba2ae 2608 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2609 char *e = d++;
2610 while (e-- > c)
2611 *(e + 1) = *e;
25716404 2612 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2613 /* mark the range as done, and continue */
2614 dorange = FALSE;
2615 didrange = TRUE;
2616 continue;
2617 }
2b9d42f0 2618
95a20fc0 2619 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2620#ifdef EBCDIC
2621 SvGROW(sv,
2622 SvLEN(sv) + (has_utf8 ?
2623 (512 - UTF_CONTINUATION_MARK +
2624 UNISKIP(0x100))
2625 : 256));
2626 /* How many two-byte within 0..255: 128 in UTF-8,
2627 * 96 in UTF-8-mod. */
2628#else
9cbb5ea2 2629 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2630#endif
9cbb5ea2 2631 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2632#ifdef EBCDIC
2633 if (has_utf8) {
2634 int j;
2635 for (j = 0; j <= 1; j++) {
2636 char * const c = (char*)utf8_hop((U8*)d, -1);
2637 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2638 if (j)
2639 min = (U8)uv;
2640 else if (uv < 256)
2641 max = (U8)uv;
2642 else {
2643 max = (U8)0xff; /* only to \xff */
2644 uvmax = uv; /* \x{100} to uvmax */
2645 }
2646 d = c; /* eat endpoint chars */
2647 }
2648 }
2649 else {
2650#endif
2651 d -= 2; /* eat the first char and the - */
2652 min = (U8)*d; /* first char in range */
2653 max = (U8)d[1]; /* last char in range */
2654#ifdef EBCDIC
2655 }
2656#endif
8ada0baa 2657
c2e66d9e 2658 if (min > max) {
01ec43d0 2659 Perl_croak(aTHX_
d1573ac7 2660 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2661 (char)min, (char)max);
c2e66d9e
GS
2662 }
2663
c7f1f016 2664#ifdef EBCDIC
4c3a8340
TS
2665 if (literal_endpoint == 2 &&
2666 ((isLOWER(min) && isLOWER(max)) ||
2667 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2668 if (isLOWER(min)) {
2669 for (i = min; i <= max; i++)
2670 if (isLOWER(i))
db42d148 2671 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2672 } else {
2673 for (i = min; i <= max; i++)
2674 if (isUPPER(i))
db42d148 2675 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2676 }
2677 }
2678 else
2679#endif
2680 for (i = min; i <= max; i++)
e294cc5d
JH
2681#ifdef EBCDIC
2682 if (has_utf8) {
2683 const U8 ch = (U8)NATIVE_TO_UTF(i);
2684 if (UNI_IS_INVARIANT(ch))
2685 *d++ = (U8)i;
2686 else {
2687 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2688 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2689 }
2690 }
2691 else
2692#endif
2693 *d++ = (char)i;
2694
2695#ifdef EBCDIC
2696 if (uvmax) {
2697 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2698 if (uvmax > 0x101)
2699 *d++ = (char)UTF_TO_NATIVE(0xff);
2700 if (uvmax > 0x100)
2701 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2702 }
2703#endif
02aa26ce
NT
2704
2705 /* mark the range as done, and continue */
79072805 2706 dorange = FALSE;
01ec43d0 2707 didrange = TRUE;
4c3a8340
TS
2708#ifdef EBCDIC
2709 literal_endpoint = 0;
2710#endif
79072805 2711 continue;
4e553d73 2712 }
02aa26ce
NT
2713
2714 /* range begins (ignore - as first or last char) */
79072805 2715 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2716 if (didrange) {
1fafa243 2717 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2718 }
e294cc5d
JH
2719 if (has_utf8
2720#ifdef EBCDIC
2721 && !native_range
2722#endif
2723 ) {
25716404 2724 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2725 s++;
2726 continue;
2727 }
79072805
LW
2728 dorange = TRUE;
2729 s++;
01ec43d0
GS
2730 }
2731 else {
2732 didrange = FALSE;
4c3a8340
TS
2733#ifdef EBCDIC
2734 literal_endpoint = 0;
e294cc5d 2735 native_range = TRUE;
4c3a8340 2736#endif
01ec43d0 2737 }
79072805 2738 }
02aa26ce
NT
2739
2740 /* if we get here, we're not doing a transliteration */
2741
0f5d15d6
IZ
2742 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2743 except for the last char, which will be done separately. */
3280af22 2744 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2745 if (s[2] == '#') {
e994fd66 2746 while (s+1 < send && *s != ')')
db42d148 2747 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2748 }
2749 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2750 || (s[2] == '?' && s[3] == '{'))
155aba94 2751 {
cc6b7395 2752 I32 count = 1;
0f5d15d6 2753 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2754 char c;
2755
d9f97599
GS
2756 while (count && (c = *regparse)) {
2757 if (c == '\\' && regparse[1])
2758 regparse++;
4e553d73 2759 else if (c == '{')
cc6b7395 2760 count++;
4e553d73 2761 else if (c == '}')
cc6b7395 2762 count--;
d9f97599 2763 regparse++;
cc6b7395 2764 }
e994fd66 2765 if (*regparse != ')')
5bdf89e7 2766 regparse--; /* Leave one char for continuation. */
0f5d15d6 2767 while (s < regparse)
db42d148 2768 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2769 }
748a9306 2770 }
02aa26ce
NT
2771
2772 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2773 else if (*s == '#' && PL_lex_inpat &&
2774 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2775 while (s+1 < send && *s != '\n')
db42d148 2776 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2777 }
02aa26ce 2778
5d1d4326 2779 /* check for embedded arrays
da6eedaa 2780 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2781 */
1749ea0d
TS
2782 else if (*s == '@' && s[1]) {
2783 if (isALNUM_lazy_if(s+1,UTF))
2784 break;
2785 if (strchr(":'{$", s[1]))
2786 break;
2787 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2788 break; /* in regexp, neither @+ nor @- are interpolated */
2789 }
02aa26ce
NT
2790
2791 /* check for embedded scalars. only stop if we're sure it's a
2792 variable.
2793 */
79072805 2794 else if (*s == '$') {
3280af22 2795 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2796 break;
77772344 2797 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2798 if (s[1] == '\\') {
2799 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2800 "Possible unintended interpolation of $\\ in regex");
77772344 2801 }
79072805 2802 break; /* in regexp, $ might be tail anchor */
77772344 2803 }
79072805 2804 }
02aa26ce 2805
2b9d42f0
NIS
2806 /* End of else if chain - OP_TRANS rejoin rest */
2807
02aa26ce 2808 /* backslashes */
79072805 2809 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2810 char* e; /* Can be used for ending '}', etc. */
2811
79072805 2812 s++;
02aa26ce 2813
7d0fc23c
KW
2814 /* warn on \1 - \9 in substitution replacements, but note that \11
2815 * is an octal; and \19 is \1 followed by '9' */
3280af22 2816 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2817 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2818 {
a2a5de95 2819 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2820 *--s = '$';
2821 break;
2822 }
02aa26ce
NT
2823
2824 /* string-change backslash escapes */
3280af22 2825 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2826 --s;
2827 break;
2828 }
ff3f963a
KW
2829 /* In a pattern, process \N, but skip any other backslash escapes.
2830 * This is because we don't want to translate an escape sequence
2831 * into a meta symbol and have the regex compiler use the meta
2832 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2833 * in spite of this, we do have to process \N here while the proper
2834 * charnames handler is in scope. See bugs #56444 and #62056.
2835 * There is a complication because \N in a pattern may also stand
2836 * for 'match a non-nl', and not mean a charname, in which case its
2837 * processing should be deferred to the regex compiler. To be a
2838 * charname it must be followed immediately by a '{', and not look
2839 * like \N followed by a curly quantifier, i.e., not something like
2840 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2841 * quantifier */
2842 else if (PL_lex_inpat
2843 && (*s != 'N'
2844 || s[1] != '{'
2845 || regcurly(s + 1)))
2846 {
cc74c5bd
TS
2847 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2848 goto default_action;
2849 }
02aa26ce 2850
79072805 2851 switch (*s) {
02aa26ce
NT
2852
2853 /* quoted - in transliterations */
79072805 2854 case '-':
3280af22 2855 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2856 *d++ = *s++;
2857 continue;
2858 }
2859 /* FALL THROUGH */
2860 default:
11b8faa4 2861 {
a2a5de95
NC
2862 if ((isALPHA(*s) || isDIGIT(*s)))
2863 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2864 "Unrecognized escape \\%c passed through",
2865 *s);
11b8faa4 2866 /* default action is to copy the quoted character */
f9a63242 2867 goto default_action;
11b8faa4 2868 }
02aa26ce 2869
77a135fe 2870 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2871 case '0': case '1': case '2': case '3':
2872 case '4': case '5': case '6': case '7':
ba210ebe 2873 {
53305cf1
NC
2874 I32 flags = 0;
2875 STRLEN len = 3;
77a135fe 2876 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2877 s += len;
2878 }
012bcf8d 2879 goto NUM_ESCAPE_INSERT;
02aa26ce 2880
77a135fe 2881 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2882 case 'x':
a0ed51b3
LW
2883 ++s;
2884 if (*s == '{') {
9d4ba2ae 2885 char* const e = strchr(s, '}');
a4c04bdc
NC
2886 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2887 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2888 STRLEN len;
355860ce 2889
53305cf1 2890 ++s;
adaeee49 2891 if (!e) {
a0ed51b3 2892 yyerror("Missing right brace on \\x{}");
355860ce 2893 continue;
ba210ebe 2894 }
53305cf1 2895 len = e - s;
77a135fe 2896 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2897 s = e + 1;
a0ed51b3
LW
2898 }
2899 else {
ba210ebe 2900 {
53305cf1 2901 STRLEN len = 2;
a4c04bdc 2902 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2903 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2904 s += len;
2905 }
012bcf8d
GS
2906 }
2907
2908 NUM_ESCAPE_INSERT:
ff3f963a
KW
2909 /* Insert oct or hex escaped character. There will always be
2910 * enough room in sv since such escapes will be longer than any
2911 * UTF-8 sequence they can end up as, except if they force us
2912 * to recode the rest of the string into utf8 */
ba7cea30 2913
77a135fe 2914 /* Here uv is the ordinal of the next character being added in
ff3f963a 2915 * unicode (converted from native). */
77a135fe 2916 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2917 if (!has_utf8 && uv > 255) {
77a135fe
KW
2918 /* Might need to recode whatever we have accumulated so
2919 * far if it contains any chars variant in utf8 or
2920 * utf-ebcdic. */
2921
2922 SvCUR_set(sv, d - SvPVX_const(sv));
2923 SvPOK_on(sv);
2924 *d = '\0';
77a135fe 2925 /* See Note on sizing above. */
7bf79863
KW
2926 sv_utf8_upgrade_flags_grow(sv,
2927 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2928 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2929 d = SvPVX(sv) + SvCUR(sv);
2930 has_utf8 = TRUE;
012bcf8d
GS
2931 }
2932
77a135fe
KW
2933 if (has_utf8) {
2934 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2935 if (PL_lex_inwhat == OP_TRANS &&
2936 PL_sublex_info.sub_op) {
2937 PL_sublex_info.sub_op->op_private |=
2938 (PL_lex_repl ? OPpTRANS_FROM_UTF
2939 : OPpTRANS_TO_UTF);
f9a63242 2940 }
e294cc5d
JH
2941#ifdef EBCDIC
2942 if (uv > 255 && !dorange)
2943 native_range = FALSE;
2944#endif
012bcf8d 2945 }
a0ed51b3 2946 else {
012bcf8d 2947 *d++ = (char)uv;
a0ed51b3 2948 }
012bcf8d
GS
2949 }
2950 else {
c4d5f83a 2951 *d++ = (char) uv;
a0ed51b3 2952 }
79072805 2953 continue;
02aa26ce 2954
4a2d328f 2955 case 'N':
ff3f963a
KW
2956 /* In a non-pattern \N must be a named character, like \N{LATIN
2957 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2958 * mean to match a non-newline. For non-patterns, named
2959 * characters are converted to their string equivalents. In
2960 * patterns, named characters are not converted to their
2961 * ultimate forms for the same reasons that other escapes
2962 * aren't. Instead, they are converted to the \N{U+...} form
2963 * to get the value from the charnames that is in effect right
2964 * now, while preserving the fact that it was a named character
2965 * so that the regex compiler knows this */
2966
2967 /* This section of code doesn't generally use the
2968 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2969 * a close examination of this macro and determined it is a
2970 * no-op except on utfebcdic variant characters. Every
2971 * character generated by this that would normally need to be
2972 * enclosed by this macro is invariant, so the macro is not
2973 * needed, and would complicate use of copy(). There are other
2974 * parts of this file where the macro is used inconsistently,
2975 * but are saved by it being a no-op */
2976
2977 /* The structure of this section of code (besides checking for
2978 * errors and upgrading to utf8) is:
2979 * Further disambiguate between the two meanings of \N, and if
2980 * not a charname, go process it elsewhere
0a96133f
KW
2981 * If of form \N{U+...}, pass it through if a pattern;
2982 * otherwise convert to utf8
2983 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2984 * pattern; otherwise convert to utf8 */
ff3f963a
KW
2985
2986 /* Here, s points to the 'N'; the test below is guaranteed to
2987 * succeed if we are being called on a pattern as we already
2988 * know from a test above that the next character is a '{'.
2989 * On a non-pattern \N must mean 'named sequence, which
2990 * requires braces */
2991 s++;
2992 if (*s != '{') {
2993 yyerror("Missing braces on \\N{}");
2994 continue;
2995 }
2996 s++;
2997
0a96133f 2998 /* If there is no matching '}', it is an error. */
ff3f963a
KW
2999 if (! (e = strchr(s, '}'))) {
3000 if (! PL_lex_inpat) {
5777a3f7 3001 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3002 } else {
3003 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3004 }
0a96133f 3005 continue;
ff3f963a 3006 }
cddc7ef4 3007
ff3f963a 3008 /* Here it looks like a named character */
cddc7ef4 3009
ff3f963a
KW
3010 if (PL_lex_inpat) {
3011
3012 /* XXX This block is temporary code. \N{} implies that the
3013 * pattern is to have Unicode semantics, and therefore
3014 * currently has to be encoded in utf8. By putting it in
3015 * utf8 now, we save a whole pass in the regular expression
3016 * compiler. Once that code is changed so Unicode
3017 * semantics doesn't necessarily have to be in utf8, this
3018 * block should be removed */
3019 if (!has_utf8) {
77a135fe 3020 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3021 SvPOK_on(sv);
e4f3eed8 3022 *d = '\0';
77a135fe 3023 /* See Note on sizing above. */
7bf79863 3024 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3025 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3026 /* 5 = '\N{' + cur char + NUL */
3027 (STRLEN)(send - s) + 5);
f08d6ad9 3028 d = SvPVX(sv) + SvCUR(sv);
89491803 3029 has_utf8 = TRUE;
ff3f963a
KW
3030 }
3031 }
423cee85 3032
ff3f963a
KW
3033 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3034 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3035 | PERL_SCAN_DISALLOW_PREFIX;
3036 STRLEN len;
3037
3038 /* For \N{U+...}, the '...' is a unicode value even on
3039 * EBCDIC machines */
3040 s += 2; /* Skip to next char after the 'U+' */
3041 len = e - s;
3042 uv = grok_hex(s, &len, &flags, NULL);
3043 if (len == 0 || len != (STRLEN)(e - s)) {
3044 yyerror("Invalid hexadecimal number in \\N{U+...}");
3045 s = e + 1;
3046 continue;
3047 }
3048
3049 if (PL_lex_inpat) {
3050
3051 /* Pass through to the regex compiler unchanged. The
3052 * reason we evaluated the number above is to make sure
0a96133f 3053 * there wasn't a syntax error. */
ff3f963a
KW
3054 s -= 5; /* Include the '\N{U+' */
3055 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3056 d += e - s + 1;
3057 }
3058 else { /* Not a pattern: convert the hex to string */
3059
3060 /* If destination is not in utf8, unconditionally
3061 * recode it to be so. This is because \N{} implies
3062 * Unicode semantics, and scalars have to be in utf8
3063 * to guarantee those semantics */
3064 if (! has_utf8) {
3065 SvCUR_set(sv, d - SvPVX_const(sv));
3066 SvPOK_on(sv);
3067 *d = '\0';
3068 /* See Note on sizing above. */
3069 sv_utf8_upgrade_flags_grow(
3070 sv,
3071 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3072 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3073 d = SvPVX(sv) + SvCUR(sv);
3074 has_utf8 = TRUE;
3075 }
3076
3077 /* Add the string to the output */
3078 if (UNI_IS_INVARIANT(uv)) {
3079 *d++ = (char) uv;
3080 }
3081 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3082 }
3083 }
3084 else { /* Here is \N{NAME} but not \N{U+...}. */
3085
3086 SV *res; /* result from charnames */
3087 const char *str; /* the string in 'res' */
3088 STRLEN len; /* its length */
3089
3090 /* Get the value for NAME */
3091 res = newSVpvn(s, e - s);
3092 res = new_constant( NULL, 0, "charnames",
3093 /* includes all of: \N{...} */
3094 res, NULL, s - 3, e - s + 4 );
3095
3096 /* Most likely res will be in utf8 already since the
3097 * standard charnames uses pack U, but a custom translator
3098 * can leave it otherwise, so make sure. XXX This can be
3099 * revisited to not have charnames use utf8 for characters
3100 * that don't need it when regexes don't have to be in utf8
3101 * for Unicode semantics. If doing so, remember EBCDIC */
3102 sv_utf8_upgrade(res);
3103 str = SvPV_const(res, len);
3104
3105 /* Don't accept malformed input */
3106 if (! is_utf8_string((U8 *) str, len)) {
3107 yyerror("Malformed UTF-8 returned by \\N");
3108 }
3109 else if (PL_lex_inpat) {
3110
3111 if (! len) { /* The name resolved to an empty string */
3112 Copy("\\N{}", d, 4, char);
3113 d += 4;
3114 }
3115 else {
3116 /* In order to not lose information for the regex
3117 * compiler, pass the result in the specially made
3118 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3119 * the code points in hex of each character
3120 * returned by charnames */
3121
3122 const char *str_end = str + len;
3123 STRLEN char_length; /* cur char's byte length */
3124 STRLEN output_length; /* and the number of bytes
3125 after this is translated
3126 into hex digits */
3127 const STRLEN off = d - SvPVX_const(sv);
3128
3129 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3130 * max('U+', '.'); and 1 for NUL */
3131 char hex_string[2 * UTF8_MAXBYTES + 5];
3132
3133 /* Get the first character of the result. */
3134 U32 uv = utf8n_to_uvuni((U8 *) str,
3135 len,
3136 &char_length,
3137 UTF8_ALLOW_ANYUV);
3138
3139 /* The call to is_utf8_string() above hopefully
3140 * guarantees that there won't be an error. But
3141 * it's easy here to make sure. The function just
3142 * above warns and returns 0 if invalid utf8, but
3143 * it can also return 0 if the input is validly a
3144 * NUL. Disambiguate */
3145 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3146 uv = UNICODE_REPLACEMENT;
3147 }
3148
3149 /* Convert first code point to hex, including the
3150 * boiler plate before it */
3151 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3152 output_length = strlen(hex_string);
3153
3154 /* Make sure there is enough space to hold it */
3155 d = off + SvGROW(sv, off
3156 + output_length
3157 + (STRLEN)(send - e)
3158 + 2); /* '}' + NUL */
3159 /* And output it */
3160 Copy(hex_string, d, output_length, char);
3161 d += output_length;
3162
3163 /* For each subsequent character, append dot and
3164 * its ordinal in hex */
3165 while ((str += char_length) < str_end) {
3166 const STRLEN off = d - SvPVX_const(sv);
3167 U32 uv = utf8n_to_uvuni((U8 *) str,
3168 str_end - str,
3169 &char_length,
3170 UTF8_ALLOW_ANYUV);
3171 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3172 uv = UNICODE_REPLACEMENT;
3173 }
3174
3175 sprintf(hex_string, ".%X", (unsigned int) uv);
3176 output_length = strlen(hex_string);
3177
3178 d = off + SvGROW(sv, off
3179 + output_length
3180 + (STRLEN)(send - e)
3181 + 2); /* '}' + NUL */
3182 Copy(hex_string, d, output_length, char);
3183 d += output_length;
3184 }
3185
3186 *d++ = '}'; /* Done. Add the trailing brace */
3187 }
3188 }
3189 else { /* Here, not in a pattern. Convert the name to a
3190 * string. */
3191
3192 /* If destination is not in utf8, unconditionally
3193 * recode it to be so. This is because \N{} implies
3194 * Unicode semantics, and scalars have to be in utf8
3195 * to guarantee those semantics */
3196 if (! has_utf8) {
3197 SvCUR_set(sv, d - SvPVX_const(sv));
3198 SvPOK_on(sv);
3199 *d = '\0';
3200 /* See Note on sizing above. */
3201 sv_utf8_upgrade_flags_grow(sv,
3202 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3203 len + (STRLEN)(send - s) + 1);
3204 d = SvPVX(sv) + SvCUR(sv);
3205 has_utf8 = TRUE;
3206 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3207
3208 /* See Note on sizing above. (NOTE: SvCUR() is not
3209 * set correctly here). */
3210 const STRLEN off = d - SvPVX_const(sv);
3211 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3212 }
3213 Copy(str, d, len, char);
3214 d += len;
423cee85 3215 }
423cee85 3216 SvREFCNT_dec(res);
cb233ae3
KW
3217
3218 /* Deprecate non-approved name syntax */
3219 if (ckWARN_d(WARN_DEPRECATED)) {
3220 bool problematic = FALSE;
3221 char* i = s;
3222
3223 /* For non-ut8 input, look to see that the first
3224 * character is an alpha, then loop through the rest
3225 * checking that each is a continuation */
3226 if (! this_utf8) {
3227 if (! isALPHAU(*i)) problematic = TRUE;
3228 else for (i = s + 1; i < e; i++) {
3229 if (isCHARNAME_CONT(*i)) continue;
3230 problematic = TRUE;
3231 break;
3232 }
3233 }
3234 else {
3235 /* Similarly for utf8. For invariants can check
3236 * directly. We accept anything above the latin1
3237 * range because it is immaterial to Perl if it is
3238 * correct or not, and is expensive to check. But
3239 * it is fairly easy in the latin1 range to convert
3240 * the variants into a single character and check
3241 * those */
3242 if (UTF8_IS_INVARIANT(*i)) {
3243 if (! isALPHAU(*i)) problematic = TRUE;
3244 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3245 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3246 *(i+1)))))
3247 {
3248 problematic = TRUE;
3249 }
3250 }
3251 if (! problematic) for (i = s + UTF8SKIP(s);
3252 i < e;
3253 i+= UTF8SKIP(i))
3254 {
3255 if (UTF8_IS_INVARIANT(*i)) {
3256 if (isCHARNAME_CONT(*i)) continue;
3257 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3258 continue;
3259 } else if (isCHARNAME_CONT(
3260 UNI_TO_NATIVE(
3261 UTF8_ACCUMULATE(*i, *(i+1)))))
3262 {
3263 continue;
3264 }
3265 problematic = TRUE;
3266 break;
3267 }
3268 }
3269 if (problematic) {
6e1bad6c
KW
3270 /* The e-i passed to the final %.*s makes sure that
3271 * should the trailing NUL be missing that this
3272 * print won't run off the end of the string */
cb233ae3 3273 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3274 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3275 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3276 }
3277 }
3278 } /* End \N{NAME} */
ff3f963a
KW
3279#ifdef EBCDIC
3280 if (!dorange)
3281 native_range = FALSE; /* \N{} is defined to be Unicode */
3282#endif
3283 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3284 continue;
3285
02aa26ce 3286 /* \c is a control character */
79072805
LW
3287 case 'c':
3288 s++;
961ce445 3289 if (s < send) {
f9d13529 3290 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3291 }
961ce445
RGS
3292 else {
3293 yyerror("Missing control char name in \\c");
3294 }
79072805 3295 continue;
02aa26ce
NT
3296
3297 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3298 case 'b':
db42d148 3299 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3300 break;
3301 case 'n':
db42d148 3302 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3303 break;
3304 case 'r':
db42d148 3305 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3306 break;
3307 case 'f':
db42d148 3308 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3309 break;
3310 case 't':
db42d148 3311 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3312 break;
34a3fe2a 3313 case 'e':
db42d148 3314 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3315 break;
3316 case 'a':
db42d148 3317 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3318 break;
02aa26ce
NT
3319 } /* end switch */
3320
79072805
LW
3321 s++;
3322 continue;
02aa26ce 3323 } /* end if (backslash) */
4c3a8340
TS
3324#ifdef EBCDIC
3325 else
3326 literal_endpoint++;
3327#endif
02aa26ce 3328
f9a63242 3329 default_action:
77a135fe
KW
3330 /* If we started with encoded form, or already know we want it,
3331 then encode the next character */
3332 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3333 STRLEN len = 1;
77a135fe
KW
3334
3335
3336 /* One might think that it is wasted effort in the case of the
3337 * source being utf8 (this_utf8 == TRUE) to take the next character
3338 * in the source, convert it to an unsigned value, and then convert
3339 * it back again. But the source has not been validated here. The
3340 * routine that does the conversion checks for errors like
3341 * malformed utf8 */
3342
5f66b61c
AL
3343 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3344 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3345 if (!has_utf8) {
3346 SvCUR_set(sv, d - SvPVX_const(sv));
3347 SvPOK_on(sv);
3348 *d = '\0';
77a135fe 3349 /* See Note on sizing above. */
7bf79863
KW
3350 sv_utf8_upgrade_flags_grow(sv,
3351 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3352 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3353 d = SvPVX(sv) + SvCUR(sv);
3354 has_utf8 = TRUE;
3355 } else if (need > len) {
3356 /* encoded value larger than old, may need extra space (NOTE:
3357 * SvCUR() is not set correctly here). See Note on sizing
3358 * above. */
9d4ba2ae 3359 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3360 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3361 }
77a135fe
KW
3362 s += len;
3363
5f66b61c 3364 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3365#ifdef EBCDIC
3366 if (uv > 255 && !dorange)
3367 native_range = FALSE;
3368#endif
2b9d42f0
NIS
3369 }
3370 else {
3371 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3372 }
02aa26ce
NT
3373 } /* while loop to process each character */
3374
3375 /* terminate the string and set up the sv */
79072805 3376 *d = '\0';
95a20fc0 3377 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3378 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3379 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3380
79072805 3381 SvPOK_on(sv);
9f4817db 3382 if (PL_encoding && !has_utf8) {
d0063567
DK
3383 sv_recode_to_utf8(sv, PL_encoding);
3384 if (SvUTF8(sv))
3385 has_utf8 = TRUE;
9f4817db 3386 }
2b9d42f0 3387 if (has_utf8) {
7e2040f0 3388 SvUTF8_on(sv);
2b9d42f0 3389 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3390 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3391 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3392 }
3393 }
79072805 3394
02aa26ce 3395 /* shrink the sv if we allocated more than we used */
79072805 3396 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3397 SvPV_shrink_to_cur(sv);
79072805 3398 }
02aa26ce 3399
6154021b 3400 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3401 if (s > PL_bufptr) {
eb0d8d16
NC
3402 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3403 const char *const key = PL_lex_inpat ? "qr" : "q";
3404 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3405 const char *type;
3406 STRLEN typelen;
3407
3408 if (PL_lex_inwhat == OP_TRANS) {
3409 type = "tr";
3410 typelen = 2;
3411 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3412 type = "s";
3413 typelen = 1;
3414 } else {
3415 type = "qq";
3416 typelen = 2;
3417 }
3418
3419 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3420 type, typelen);
3421 }
6154021b 3422 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3423 } else
8990e307 3424 SvREFCNT_dec(sv);
79072805
LW
3425 return s;
3426}
3427
ffb4593c
NT
3428/* S_intuit_more
3429 * Returns TRUE if there's more to the expression (e.g., a subscript),
3430 * FALSE otherwise.
ffb4593c
NT
3431 *
3432 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3433 *
3434 * ->[ and ->{ return TRUE
3435 * { and [ outside a pattern are always subscripts, so return TRUE
3436 * if we're outside a pattern and it's not { or [, then return FALSE
3437 * if we're in a pattern and the first char is a {
3438 * {4,5} (any digits around the comma) returns FALSE
3439 * if we're in a pattern and the first char is a [
3440 * [] returns FALSE
3441 * [SOMETHING] has a funky algorithm to decide whether it's a
3442 * character class or not. It has to deal with things like
3443 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3444 * anything else returns TRUE
3445 */
3446
9cbb5ea2
GS
3447/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3448
76e3520e 3449STATIC int
cea2e8a9 3450S_intuit_more(pTHX_ register char *s)
79072805 3451{
97aff369 3452 dVAR;
7918f24d
NC
3453
3454 PERL_ARGS_ASSERT_INTUIT_MORE;
3455
3280af22 3456 if (PL_lex_brackets)
79072805
LW
3457 return TRUE;
3458 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3459 return TRUE;
3460 if (*s != '{' && *s != '[')
3461 return FALSE;
3280af22 3462 if (!PL_lex_inpat)
79072805
LW
3463 return TRUE;
3464
3465 /* In a pattern, so maybe we have {n,m}. */
3466 if (*s == '{') {
3467 s++;
3468 if (!isDIGIT(*s))
3469 return TRUE;
3470 while (isDIGIT(*s))
3471 s++;
3472 if (*s == ',')
3473 s++;
3474 while (isDIGIT(*s))
3475 s++;
3476 if (*s == '}')
3477 return FALSE;
3478 return TRUE;
3479
3480 }
3481
3482 /* On the other hand, maybe we have a character class */
3483
3484 s++;
3485 if (*s == ']' || *s == '^')
3486 return FALSE;
3487 else {
ffb4593c 3488 /* this is terrifying, and it works */
79072805
LW
3489 int weight = 2; /* let's weigh the evidence */
3490 char seen[256];
f27ffc4a 3491 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3492 const char * const send = strchr(s,']');
3280af22 3493 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3494
3495 if (!send) /* has to be an expression */
3496 return TRUE;
3497
3498 Zero(seen,256,char);
3499 if (*s == '$')
3500 weight -= 3;
3501 else if (isDIGIT(*s)) {
3502 if (s[1] != ']') {
3503 if (isDIGIT(s[1]) && s[2] == ']')
3504 weight -= 10;
3505 }
3506 else
3507 weight -= 100;
3508 }
3509 for (; s < send; s++) {
3510 last_un_char = un_char;
3511 un_char = (unsigned char)*s;
3512 switch (*s) {
3513 case '@':
3514 case '&':
3515 case '$':
3516 weight -= seen[un_char] * 10;
7e2040f0 3517 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3518 int len;
8903cb82 3519 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3520 len = (int)strlen(tmpbuf);
3521 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3522 weight -= 100;
3523 else
3524 weight -= 10;
3525 }
3526 else if (*s == '$' && s[1] &&
93a17b20
LW
3527 strchr("[#!%*<>()-=",s[1])) {
3528 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3529 weight -= 10;
3530 else
3531 weight -= 1;
3532 }
3533 break;
3534 case '\\':
3535 un_char = 254;
3536 if (s[1]) {
93a17b20 3537 if (strchr("wds]",s[1]))
79072805 3538 weight += 100;
10edeb5d 3539 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3540 weight += 1;
93a17b20 3541 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3542 weight += 40;
3543 else if (isDIGIT(s[1])) {
3544 weight += 40;
3545 while (s[1] && isDIGIT(s[1]))
3546 s++;
3547 }
3548 }
3549 else
3550 weight += 100;
3551 break;
3552 case '-':
3553 if (s[1] == '\\')
3554 weight += 50;
93a17b20 3555 if (strchr("aA01! ",last_un_char))
79072805 3556 weight += 30;
93a17b20 3557 if (strchr("zZ79~",s[1]))
79072805 3558 weight += 30;
f27ffc4a
GS
3559 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3560 weight -= 5; /* cope with negative subscript */
79072805
LW
3561 break;
3562 default:
3792a11b
NC
3563 if (!isALNUM(last_un_char)
3564 && !(last_un_char == '$' || last_un_char == '@'
3565 || last_un_char == '&')
3566 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3567 char *d = tmpbuf;
3568 while (isALPHA(*s))
3569 *d++ = *s++;
3570 *d = '\0';
5458a98a 3571 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3572 weight -= 150;
3573 }
3574 if (un_char == last_un_char + 1)
3575 weight += 5;
3576 weight -= seen[un_char];
3577 break;
3578 }
3579 seen[un_char]++;
3580 }
3581 if (weight >= 0) /* probably a character class */
3582 return FALSE;
3583 }
3584
3585 return TRUE;
3586}
ffed7fef 3587
ffb4593c
NT
3588/*
3589 * S_intuit_method
3590 *
3591 * Does all the checking to disambiguate
3592 * foo bar
3593 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3594 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3595 *
3596 * First argument is the stuff after the first token, e.g. "bar".
3597 *
3598 * Not a method if bar is a filehandle.
3599 * Not a method if foo is a subroutine prototyped to take a filehandle.
3600 * Not a method if it's really "Foo $bar"
3601 * Method if it's "foo $bar"
3602 * Not a method if it's really "print foo $bar"
3603 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3604 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3605 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3606 * =>
3607 */
3608
76e3520e 3609STATIC int
62d55b22 3610S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3611{
97aff369 3612 dVAR;
a0d0e21e 3613 char *s = start + (*start == '$');
3280af22 3614 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3615 STRLEN len;
3616 GV* indirgv;
5db06880
NC
3617#ifdef PERL_MAD
3618 int soff;
3619#endif
a0d0e21e 3620
7918f24d
NC
3621 PERL_ARGS_ASSERT_INTUIT_METHOD;
3622
a0d0e21e 3623 if (gv) {
62d55b22 3624 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3625 return 0;
62d55b22
NC
3626 if (cv) {
3627 if (SvPOK(cv)) {
3628 const char *proto = SvPVX_const(cv);
3629 if (proto) {
3630 if (*proto == ';')
3631 proto++;
3632 if (*proto == '*')
3633 return 0;
3634 }
b6c543e3
IZ
3635 }
3636 } else
c35e046a 3637 gv = NULL;
a0d0e21e 3638 }
8903cb82 3639 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3640 /* start is the beginning of the possible filehandle/object,
3641 * and s is the end of it
3642 * tmpbuf is a copy of it
3643 */
3644
a0d0e21e 3645 if (*start == '$') {
3ef1310e
RGS
3646 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3647 isUPPER(*PL_tokenbuf))
a0d0e21e 3648 return 0;
5db06880
NC
3649#ifdef PERL_MAD
3650 len = start - SvPVX(PL_linestr);
3651#endif
29595ff2 3652 s = PEEKSPACE(s);
f0092767 3653#ifdef PERL_MAD
5db06880
NC
3654 start = SvPVX(PL_linestr) + len;
3655#endif
3280af22
NIS
3656 PL_bufptr = start;
3657 PL_expect = XREF;
a0d0e21e
LW
3658 return *s == '(' ? FUNCMETH : METHOD;
3659 }
5458a98a 3660 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3661 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3662 len -= 2;
3663 tmpbuf[len] = '\0';
5db06880
NC
3664#ifdef PERL_MAD
3665 soff = s - SvPVX(PL_linestr);
3666#endif
c3e0f903
GS
3667 goto bare_package;
3668 }
90e5519e 3669 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3670 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3671 return 0;
3672 /* filehandle or package name makes it a method */
da51bb9b 3673 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3674#ifdef PERL_MAD
3675 soff = s - SvPVX(PL_linestr);
3676#endif
29595ff2 3677 s = PEEKSPACE(s);
3280af22 3678 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3679 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3680 bare_package:
cd81e915 3681 start_force(PL_curforce);
9ded7720 3682 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3683 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3684 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3685 if (PL_madskills)
3686 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3687 PL_expect = XTERM;
a0d0e21e 3688 force_next(WORD);
3280af22 3689 PL_bufptr = s;
5db06880
NC
3690#ifdef PERL_MAD
3691 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3692#endif
a0d0e21e
LW
3693 return *s == '(' ? FUNCMETH : METHOD;
3694 }
3695 }
3696 return 0;
3697}
3698
16d20bd9 3699/* Encoded script support. filter_add() effectively inserts a
4e553d73 3700 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3701 * Note that the filter function only applies to the current source file
3702 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3703 *
3704 * The datasv parameter (which may be NULL) can be used to pass
3705 * private data to this instance of the filter. The filter function
3706 * can recover the SV using the FILTER_DATA macro and use it to
3707 * store private buffers and state information.
3708 *
3709 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3710 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3711 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3712 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3713 * private use must be set using malloc'd pointers.
3714 */
16d20bd9
AD
3715
3716SV *
864dbfa3 3717Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3718{
97aff369 3719 dVAR;
f4c556ac 3720 if (!funcp)
a0714e2c 3721 return NULL;
f4c556ac 3722
5486870f
DM
3723 if (!PL_parser)
3724 return NULL;
3725
3280af22
NIS
3726 if (!PL_rsfp_filters)
3727 PL_rsfp_filters = newAV();
16d20bd9 3728 if (!datasv)
561b68a9 3729 datasv = newSV(0);
862a34c6 3730 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3731 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3732 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3733 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3734 FPTR2DPTR(void *, IoANY(datasv)),
3735 SvPV_nolen(datasv)));
3280af22
NIS
3736 av_unshift(PL_rsfp_filters, 1);
3737 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3738 return(datasv);
3739}
4e553d73 3740
16d20bd9
AD
3741
3742/* Delete most recently added instance of this filter function. */
a0d0e21e 3743void
864dbfa3 3744Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3745{
97aff369 3746 dVAR;
e0c19803 3747 SV *datasv;
24801a4b 3748
7918f24d
NC
3749 PERL_ARGS_ASSERT_FILTER_DEL;
3750
33073adb 3751#ifdef DEBUGGING
55662e27
JH
3752 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3753 FPTR2DPTR(void*, funcp)));
33073adb 3754#endif
5486870f 3755 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3756 return;
3757 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3758 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3759 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 3760 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 3761 IoANY(datasv) = (void *)NULL;
3280af22 3762 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3763
16d20bd9
AD
3764 return;
3765 }
3766 /* we need to search for the correct entry and clear it */
cea2e8a9 3767 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3768}
3769
3770
1de9afcd
RGS
3771/* Invoke the idxth filter function for the current rsfp. */
3772/* maxlen 0 = read one text line */
16d20bd9 3773I32
864dbfa3 3774Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3775{
97aff369 3776 dVAR;
16d20bd9
AD
3777 filter_t funcp;
3778 SV *datasv = NULL;
f482118e
NC
3779 /* This API is bad. It should have been using unsigned int for maxlen.
3780 Not sure if we want to change the API, but if not we should sanity
3781 check the value here. */
39cd7a59
NC
3782 const unsigned int correct_length
3783 = maxlen < 0 ?
3784#ifdef PERL_MICRO
3785 0x7FFFFFFF
3786#else
3787 INT_MAX
3788#endif
3789 : maxlen;
e50aee73 3790
7918f24d
NC
3791 PERL_ARGS_ASSERT_FILTER_READ;
3792
5486870f 3793 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3794 return -1;
1de9afcd 3795 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3796 /* Provide a default input filter to make life easy. */
3797 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3798 DEBUG_P(PerlIO_printf(Perl_debug_log,
3799 "filter_read %d: from rsfp\n", idx));
f482118e 3800 if (correct_length) {
16d20bd9
AD
3801 /* Want a block */
3802 int len ;
f54cb97a 3803 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3804
3805 /* ensure buf_sv is large enough */
881d8f0a 3806 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3807 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3808 correct_length)) <= 0) {
3280af22 3809 if (PerlIO_error(PL_rsfp))
37120919
AD
3810 return -1; /* error */
3811 else
3812 return 0 ; /* end of file */
3813 }
16d20bd9 3814 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3815 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3816 } else {
3817 /* Want a line */
3280af22
NIS
3818 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3819 if (PerlIO_error(PL_rsfp))
37120919
AD
3820 return -1; /* error */
3821 else
3822 return 0 ; /* end of file */
3823 }
16d20bd9
AD
3824 }
3825 return SvCUR(buf_sv);
3826 }
3827 /* Skip this filter slot if filter has been deleted */
1de9afcd 3828 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3829 DEBUG_P(PerlIO_printf(Perl_debug_log,
3830 "filter_read %d: skipped (filter deleted)\n",
3831 idx));
f482118e 3832 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3833 }
3834 /* Get function pointer hidden within datasv */
8141890a 3835 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3836 DEBUG_P(PerlIO_printf(Perl_debug_log,
3837 "filter_read %d: via function %p (%s)\n",
ca0270c4 3838 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3839 /* Call function. The function is expected to */
3840 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3841 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3842 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3843}
3844
76e3520e 3845STATIC char *
5cc814fd 3846S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3847{
97aff369 3848 dVAR;
7918f24d
NC
3849
3850 PERL_ARGS_ASSERT_FILTER_GETS;
3851
c39cd008 3852#ifdef PERL_CR_FILTER
3280af22 3853 if (!PL_rsfp_filters) {
c39cd008 3854 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3855 }
3856#endif
3280af22 3857 if (PL_rsfp_filters) {
55497cff 3858 if (!append)
3859 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3860 if (FILTER_READ(0, sv, 0) > 0)
3861 return ( SvPVX(sv) ) ;
3862 else
bd61b366 3863 return NULL ;
16d20bd9 3864 }
9d116dd7 3865 else
5cc814fd 3866 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3867}
3868
01ec43d0 3869STATIC HV *
9bde8eb0 3870S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3871{
97aff369 3872 dVAR;
def3634b
GS
3873 GV *gv;
3874
7918f24d
NC
3875 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3876
01ec43d0 3877 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3878 return PL_curstash;
3879
3880 if (len > 2 &&
3881 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3882 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3883 {
3884 return GvHV(gv); /* Foo:: */
def3634b
GS
3885 }
3886
3887 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3888 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3889 if (gv && GvCV(gv)) {
3890 SV * const sv = cv_const_sv(GvCV(gv));
3891 if (sv)
9bde8eb0 3892 pkgname = SvPV_const(sv, len);
def3634b
GS
3893 }
3894
9bde8eb0 3895 return gv_stashpvn(pkgname, len, 0);
def3634b 3896}
a0d0e21e 3897
e3f73d4e
RGS
3898/*
3899 * S_readpipe_override
3900 * Check whether readpipe() is overriden, and generates the appropriate
3901 * optree, provided sublex_start() is called afterwards.
3902 */
3903STATIC void
1d51329b 3904S_readpipe_override(pTHX)
e3f73d4e
RGS
3905{
3906 GV **gvp;
3907 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3908 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3909 if ((gv_readpipe
3910 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3911 ||
3912 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3913 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3914 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3915 {
3916 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3917 append_elem(OP_LIST,
3918 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3919 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3920 }
e3f73d4e
RGS
3921}
3922
5db06880
NC
3923#ifdef PERL_MAD
3924 /*
3925 * Perl_madlex
3926 * The intent of this yylex wrapper is to minimize the changes to the
3927 * tokener when we aren't interested in collecting madprops. It remains
3928 * to be seen how successful this strategy will be...
3929 */
3930
3931int
3932Perl_madlex(pTHX)
3933{
3934 int optype;
3935 char *s = PL_bufptr;
3936
cd81e915
NC
3937 /* make sure PL_thiswhite is initialized */
3938 PL_thiswhite = 0;
3939 PL_thismad = 0;
5db06880 3940
cd81e915 3941 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3942 if (PL_pending_ident)
3943 return S_pending_ident(aTHX);
3944
3945 /* previous token ate up our whitespace? */
cd81e915
NC
3946 if (!PL_lasttoke && PL_nextwhite) {
3947 PL_thiswhite = PL_nextwhite;
3948 PL_nextwhite = 0;
5db06880
NC
3949 }
3950
3951 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3952 PL_realtokenstart = -1;
3953 PL_thistoken = 0;
5db06880
NC
3954 optype = yylex();
3955 s = PL_bufptr;
cd81e915 3956 assert(PL_curforce < 0);
5db06880 3957
cd81e915
NC
3958 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3959 if (!PL_thistoken) {
3960 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3961 PL_thistoken = newSVpvs("");
5db06880 3962 else {
c35e046a 3963 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3964 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3965 }
3966 }
cd81e915
NC
3967 if (PL_thismad) /* install head */
3968 CURMAD('X', PL_thistoken);
5db06880
NC
3969 }
3970
3971 /* last whitespace of a sublex? */
cd81e915
NC
3972 if (optype == ')' && PL_endwhite) {
3973 CURMAD('X', PL_endwhite);
5db06880
NC
3974 }
3975
cd81e915 3976 if (!PL_thismad) {
5db06880
NC
3977
3978 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3979 if (!PL_thiswhite && !PL_endwhite && !optype) {
3980 sv_free(PL_thistoken);
3981 PL_thistoken = 0;
5db06880
NC
3982 return 0;
3983 }
3984
3985 /* put off final whitespace till peg */
3986 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3987 PL_nextwhite = PL_thiswhite;
3988 PL_thiswhite = 0;
5db06880 3989 }
cd81e915
NC
3990 else if (PL_thisopen) {
3991 CURMAD('q', PL_thisopen);
3992 if (PL_thistoken)
3993 sv_free(PL_thistoken);
3994 PL_thistoken = 0;
5db06880
NC
3995 }
3996 else {
3997 /* Store actual token text as madprop X */
cd81e915 3998 CURMAD('X', PL_thistoken);
5db06880
NC
3999 }
4000
cd81e915 4001 if (PL_thiswhite) {
5db06880 4002 /* add preceding whitespace as madprop _ */
cd81e915 4003 CURMAD('_', PL_thiswhite);
5db06880
NC
4004 }
4005
cd81e915 4006 if (PL_thisstuff) {
5db06880 4007 /* add quoted material as madprop = */
cd81e915 4008 CURMAD('=', PL_thisstuff);
5db06880
NC
4009 }
4010
cd81e915 4011 if (PL_thisclose) {
5db06880 4012 /* add terminating quote as madprop Q */
cd81e915 4013 CURMAD('Q', PL_thisclose);
5db06880
NC
4014 }
4015 }
4016
4017 /* special processing based on optype */
4018
4019 switch (optype) {
4020
4021 /* opval doesn't need a TOKEN since it can already store mp */
4022 case WORD:
4023 case METHOD:
4024 case FUNCMETH:
4025 case THING:
4026 case PMFUNC:
4027 case PRIVATEREF:
4028 case FUNC0SUB:
4029 case UNIOPSUB:
4030 case LSTOPSUB:
6154021b
RGS
4031 if (pl_yylval.opval)
4032 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4033 PL_thismad = 0;
5db06880
NC
4034 return optype;
4035
4036 /* fake EOF */
4037 case 0:
4038 optype = PEG;
cd81e915
NC
4039 if (PL_endwhite) {
4040 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4041 PL_endwhite = 0;
5db06880
NC
4042 }
4043 break;
4044
4045 case ']':
4046 case '}':
cd81e915 4047 if (PL_faketokens)
5db06880
NC
4048 break;
4049 /* remember any fake bracket that lexer is about to discard */
4050 if (PL_lex_brackets == 1 &&
4051 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4052 {
4053 s = PL_bufptr;
4054 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4055 s++;
4056 if (*s == '}') {
cd81e915
NC
4057 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4058 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4059 PL_thiswhite = 0;
5db06880
NC
4060 PL_bufptr = s - 1;
4061 break; /* don't bother looking for trailing comment */
4062 }
4063 else
4064 s = PL_bufptr;
4065 }
4066 if (optype == ']')
4067 break;
4068 /* FALLTHROUGH */
4069
4070 /* attach a trailing comment to its statement instead of next token */
4071 case ';':
cd81e915 4072 if (PL_faketokens)
5db06880
NC
4073 break;
4074 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4075 s = PL_bufptr;
4076 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4077 s++;
4078 if (*s == '\n' || *s == '#') {
4079 while (s < PL_bufend && *s != '\n')
4080 s++;
4081 if (s < PL_bufend)
4082 s++;
cd81e915
NC
4083 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4084 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4085 PL_thiswhite = 0;
5db06880
NC
4086 PL_bufptr = s;
4087 }
4088 }
4089 break;
4090
4091 /* pval */
4092 case LABEL:
4093 break;
4094
4095 /* ival */
4096 default:
4097 break;
4098
4099 }
4100
4101 /* Create new token struct. Note: opvals return early above. */
6154021b 4102 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4103 PL_thismad = 0;
5db06880
NC
4104 return optype;
4105}
4106#endif
4107
468aa647 4108STATIC char *
cc6ed77d 4109S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4110 dVAR;
7918f24d
NC
4111
4112 PERL_ARGS_ASSERT_TOKENIZE_USE;
4113
468aa647
RGS
4114 if (PL_expect != XSTATE)
4115 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4116 is_use ? "use" : "no"));
29595ff2 4117 s = SKIPSPACE1(s);
468aa647
RGS
4118 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4119 s = force_version(s, TRUE);
17c59fdf
VP
4120 if (*s == ';' || *s == '}'
4121 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4122 start_force(PL_curforce);
9ded7720 4123 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4124 force_next(WORD);
4125 }
4126 else if (*s == 'v') {
4127 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4128 s = force_version(s, FALSE);
4129 }
4130 }
4131 else {
4132 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4133 s = force_version(s, FALSE);
4134 }
6154021b 4135 pl_yylval.ival = is_use;
468aa647
RGS
4136 return s;
4137}
748a9306 4138#ifdef DEBUGGING
27da23d5 4139 static const char* const exp_name[] =
09bef843 4140 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4141 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4142 };
748a9306 4143#endif
463ee0b2 4144
02aa26ce
NT
4145/*
4146 yylex
4147
4148 Works out what to call the token just pulled out of the input
4149 stream. The yacc parser takes care of taking the ops we return and
4150 stitching them into a tree.
4151
4152 Returns:
4153 PRIVATEREF
4154
4155 Structure:
4156 if read an identifier
4157 if we're in a my declaration
4158 croak if they tried to say my($foo::bar)
4159 build the ops for a my() declaration
4160 if it's an access to a my() variable
4161 are we in a sort block?
4162 croak if my($a); $a <=> $b
4163 build ops for access to a my() variable
4164 if in a dq string, and they've said @foo and we can't find @foo
4165 croak
4166 build ops for a bareword
4167 if we already built the token before, use it.
4168*/
4169
20141f0e 4170
dba4d153
JH
4171#ifdef __SC__
4172#pragma segment Perl_yylex
4173#endif
dba4d153 4174int
dba4d153 4175Perl_yylex(pTHX)
20141f0e 4176{
97aff369 4177 dVAR;
3afc138a 4178 register char *s = PL_bufptr;
378cc40b 4179 register char *d;
463ee0b2 4180 STRLEN len;
aa7440fb 4181 bool bof = FALSE;
580561a3 4182 U32 fake_eof = 0;
a687059c 4183
10edeb5d
JH
4184 /* orig_keyword, gvp, and gv are initialized here because
4185 * jump to the label just_a_word_zero can bypass their
4186 * initialization later. */
4187 I32 orig_keyword = 0;
4188 GV *gv = NULL;
4189 GV **gvp = NULL;
4190
bbf60fe6 4191 DEBUG_T( {
396482e1 4192 SV* tmp = newSVpvs("");
b6007c36
DM
4193 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4194 (IV)CopLINE(PL_curcop),
4195 lex_state_names[PL_lex_state],
4196 exp_name[PL_expect],
4197 pv_display(tmp, s, strlen(s), 0, 60));
4198 SvREFCNT_dec(tmp);
bbf60fe6 4199 } );
02aa26ce 4200 /* check if there's an identifier for us to look at */
ba979b31 4201 if (PL_pending_ident)
bbf60fe6 4202 return REPORT(S_pending_ident(aTHX));
bbce6d69 4203
02aa26ce
NT
4204 /* no identifier pending identification */
4205
3280af22 4206 switch (PL_lex_state) {
79072805
LW
4207#ifdef COMMENTARY
4208 case LEX_NORMAL: /* Some compilers will produce faster */
4209 case LEX_INTERPNORMAL: /* code if we comment these out. */
4210 break;
4211#endif
4212
09bef843 4213 /* when we've already built the next token, just pull it out of the queue */
79072805 4214 case LEX_KNOWNEXT:
5db06880
NC
4215#ifdef PERL_MAD
4216 PL_lasttoke--;
6154021b 4217 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4218 if (PL_madskills) {
cd81e915 4219 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4220 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4221 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4222 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4223 PL_thismad->mad_val = 0;
4224 mad_free(PL_thismad);
4225 PL_thismad = 0;
5db06880
NC
4226 }
4227 }
4228 if (!PL_lasttoke) {
4229 PL_lex_state = PL_lex_defer;
4230 PL_expect = PL_lex_expect;
4231 PL_lex_defer = LEX_NORMAL;
4232 if (!PL_nexttoke[PL_lasttoke].next_type)
4233 return yylex();
4234 }
4235#else
3280af22 4236 PL_nexttoke--;
6154021b 4237 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4238 if (!PL_nexttoke) {
4239 PL_lex_state = PL_lex_defer;
4240 PL_expect = PL_lex_expect;
4241 PL_lex_defer = LEX_NORMAL;
463ee0b2 4242 }
5db06880
NC
4243#endif
4244#ifdef PERL_MAD
4245 /* FIXME - can these be merged? */
4246 return(PL_nexttoke[PL_lasttoke].next_type);
4247#else
bbf60fe6 4248 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4249#endif
79072805 4250
02aa26ce 4251 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4252 when we get here, PL_bufptr is at the \
02aa26ce 4253 */
79072805
LW
4254 case LEX_INTERPCASEMOD:
4255#ifdef DEBUGGING
3280af22 4256 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4257 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4258#endif
02aa26ce 4259 /* handle \E or end of string */
3280af22 4260 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4261 /* if at a \E */
3280af22 4262 if (PL_lex_casemods) {
f54cb97a 4263 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4264 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4265
3792a11b
NC
4266 if (PL_bufptr != PL_bufend
4267 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4268 PL_bufptr += 2;
4269 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4270#ifdef PERL_MAD
4271 if (PL_madskills)
6b29d1f5 4272 PL_thistoken = newSVpvs("\\E");
5db06880 4273#endif
a0d0e21e 4274 }
bbf60fe6 4275 return REPORT(')');
79072805 4276 }
5db06880
NC
4277#ifdef PERL_MAD
4278 while (PL_bufptr != PL_bufend &&
4279 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4280 if (!PL_thiswhite)
6b29d1f5 4281 PL_thiswhite = newSVpvs("");
cd81e915 4282 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4283 PL_bufptr += 2;
4284 }
4285#else
3280af22
NIS
4286 if (PL_bufptr != PL_bufend)
4287 PL_bufptr += 2;
5db06880 4288#endif
3280af22 4289 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4290 return yylex();
79072805
LW
4291 }
4292 else {
607df283 4293 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4294 "### Saw case modifier\n"); });
3280af22 4295 s = PL_bufptr + 1;
6e909404 4296 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4297#ifdef PERL_MAD
cd81e915 4298 if (!PL_thiswhite)
6b29d1f5 4299 PL_thiswhite = newSVpvs("");
cd81e915 4300 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4301#endif
89122651 4302 PL_bufptr = s + 3;
6e909404
JH
4303 PL_lex_state = LEX_INTERPCONCAT;
4304 return yylex();
a0d0e21e 4305 }
6e909404 4306 else {
90771dc0 4307 I32 tmp;
5db06880
NC
4308 if (!PL_madskills) /* when just compiling don't need correct */
4309 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4310 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4311 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4312 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4313 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4314 return REPORT(')');
6e909404
JH
4315 }
4316 if (PL_lex_casemods > 10)
4317 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4318 PL_lex_casestack[PL_lex_casemods++] = *s;
4319 PL_lex_casestack[PL_lex_casemods] = '\0';
4320 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4321 start_force(PL_curforce);
9ded7720 4322 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4323 force_next('(');
cd81e915 4324 start_force(PL_curforce);
6e909404 4325 if (*s == 'l')
9ded7720 4326 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4327 else if (*s == 'u')
9ded7720 4328 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4329 else if (*s == 'L')
9ded7720 4330 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4331 else if (*s == 'U')
9ded7720 4332 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4333 else if (*s == 'Q')
9ded7720 4334 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4335 else
4336 Perl_croak(aTHX_ "panic: yylex");
5db06880 4337 if (PL_madskills) {
a5849ce5
NC
4338 SV* const tmpsv = newSVpvs("\\ ");
4339 /* replace the space with the character we want to escape
4340 */
4341 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4342 curmad('_', tmpsv);
4343 }
6e909404 4344 PL_bufptr = s + 1;
a0d0e21e 4345 }
79072805 4346 force_next(FUNC);
3280af22
NIS
4347 if (PL_lex_starts) {
4348 s = PL_bufptr;
4349 PL_lex_starts = 0;
5db06880
NC
4350#ifdef PERL_MAD
4351 if (PL_madskills) {
cd81e915
NC
4352 if (PL_thistoken)
4353 sv_free(PL_thistoken);
6b29d1f5 4354 PL_thistoken = newSVpvs("");
5db06880
NC
4355 }
4356#endif
131b3ad0
DM
4357 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4358 if (PL_lex_casemods == 1 && PL_lex_inpat)
4359 OPERATOR(',');
4360 else
4361 Aop(OP_CONCAT);
79072805
LW
4362 }
4363 else
cea2e8a9 4364 return yylex();
79072805
LW
4365 }
4366
55497cff 4367 case LEX_INTERPPUSH:
bbf60fe6 4368 return REPORT(sublex_push());
55497cff 4369
79072805 4370 case LEX_INTERPSTART:
3280af22 4371 if (PL_bufptr == PL_bufend)
bbf60fe6 4372 return REPORT(sublex_done());
607df283 4373 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4374 "### Interpolated variable\n"); });
3280af22
NIS
4375 PL_expect = XTERM;
4376 PL_lex_dojoin = (*PL_bufptr == '@');
4377 PL_lex_state = LEX_INTERPNORMAL;
4378 if (PL_lex_dojoin) {
cd81e915 4379 start_force(PL_curforce);
9ded7720 4380 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4381 force_next(',');
cd81e915 4382 start_force(PL_curforce);
a0d0e21e 4383 force_ident("\"", '$');
cd81e915 4384 start_force(PL_curforce);
9ded7720 4385 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4386 force_next('$');
cd81e915 4387 start_force(PL_curforce);
9ded7720 4388 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4389 force_next('(');
cd81e915 4390 start_force(PL_curforce);
9ded7720 4391 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4392 force_next(FUNC);
4393 }
3280af22
NIS
4394 if (PL_lex_starts++) {
4395 s = PL_bufptr;
5db06880
NC
4396#ifdef PERL_MAD
4397 if (PL_madskills) {
cd81e915
NC
4398 if (PL_thistoken)
4399 sv_free(PL_thistoken);
6b29d1f5 4400 PL_thistoken = newSVpvs("");
5db06880
NC
4401 }
4402#endif
131b3ad0
DM
4403 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4404 if (!PL_lex_casemods && PL_lex_inpat)
4405 OPERATOR(',');
4406 else
4407 Aop(OP_CONCAT);
79072805 4408 }
cea2e8a9 4409 return yylex();
79072805
LW
4410
4411 case LEX_INTERPENDMAYBE:
3280af22
NIS
4412 if (intuit_more(PL_bufptr)) {
4413 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4414 break;
4415 }
4416 /* FALL THROUGH */
4417
4418 case LEX_INTERPEND:
3280af22
NIS
4419 if (PL_lex_dojoin) {
4420 PL_lex_dojoin = FALSE;
4421 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4422#ifdef PERL_MAD
4423 if (PL_madskills) {
cd81e915
NC
4424 if (PL_thistoken)
4425 sv_free(PL_thistoken);
6b29d1f5 4426 PL_thistoken = newSVpvs("");
5db06880
NC
4427 }
4428#endif
bbf60fe6 4429 return REPORT(')');
79072805 4430 }
43a16006 4431 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4432 && SvEVALED(PL_lex_repl))
43a16006 4433 {
e9fa98b2 4434 if (PL_bufptr != PL_bufend)
cea2e8a9 4435 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4436 PL_lex_repl = NULL;
e9fa98b2 4437 }
79072805
LW
4438 /* FALLTHROUGH */
4439 case LEX_INTERPCONCAT:
4440#ifdef DEBUGGING
3280af22 4441 if (PL_lex_brackets)
cea2e8a9 4442 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4443#endif
3280af22 4444 if (PL_bufptr == PL_bufend)
bbf60fe6 4445 return REPORT(sublex_done());
79072805 4446
3280af22
NIS
4447 if (SvIVX(PL_linestr) == '\'') {
4448 SV *sv = newSVsv(PL_linestr);
4449 if (!PL_lex_inpat)
76e3520e 4450 sv = tokeq(sv);
3280af22 4451 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4452 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4453 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4454 s = PL_bufend;
79072805
LW
4455 }
4456 else {
3280af22 4457 s = scan_const(PL_bufptr);
79072805 4458 if (*s == '\\')
3280af22 4459 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4460 else
3280af22 4461 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4462 }
4463
3280af22 4464 if (s != PL_bufptr) {
cd81e915 4465 start_force(PL_curforce);
5db06880
NC
4466 if (PL_madskills) {
4467 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4468 }
6154021b 4469 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4470 PL_expect = XTERM;
79072805 4471 force_next(THING);
131b3ad0 4472 if (PL_lex_starts++) {
5db06880
NC
4473#ifdef PERL_MAD
4474 if (PL_madskills) {
cd81e915
NC
4475 if (PL_thistoken)
4476 sv_free(PL_thistoken);
6b29d1f5 4477 PL_thistoken = newSVpvs("");
5db06880
NC
4478 }
4479#endif
131b3ad0
DM
4480 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4481 if (!PL_lex_casemods && PL_lex_inpat)
4482 OPERATOR(',');
4483 else
4484 Aop(OP_CONCAT);
4485 }
79072805 4486 else {
3280af22 4487 PL_bufptr = s;
cea2e8a9 4488 return yylex();
79072805
LW
4489 }
4490 }
4491
cea2e8a9 4492 return yylex();
a0d0e21e 4493 case LEX_FORMLINE:
3280af22
NIS
4494 PL_lex_state = LEX_NORMAL;
4495 s = scan_formline(PL_bufptr);
4496 if (!PL_lex_formbrack)
a0d0e21e
LW
4497 goto rightbracket;
4498 OPERATOR(';');
79072805
LW
4499 }
4500
3280af22
NIS
4501 s = PL_bufptr;
4502 PL_oldoldbufptr = PL_oldbufptr;
4503 PL_oldbufptr = s;
463ee0b2
LW
4504
4505 retry:
5db06880 4506#ifdef PERL_MAD
cd81e915
NC
4507 if (PL_thistoken) {
4508 sv_free(PL_thistoken);
4509 PL_thistoken = 0;
5db06880 4510 }
cd81e915 4511 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4512#endif
378cc40b
LW
4513 switch (*s) {
4514 default:
7e2040f0 4515 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4516 goto keylookup;
b1fc3636
CJ
4517 {
4518 unsigned char c = *s;
4519 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4520 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4521 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4522 } else {
4523 d = PL_linestart;
4524 }
4525 *s = '\0';
4526 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4527 }
e929a76b
LW
4528 case 4:
4529 case 26:
4530 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4531 case 0:
5db06880
NC
4532#ifdef PERL_MAD
4533 if (PL_madskills)
cd81e915 4534 PL_faketokens = 0;
5db06880 4535#endif
3280af22
NIS
4536 if (!PL_rsfp) {
4537 PL_last_uni = 0;
4538 PL_last_lop = 0;
c5ee2135 4539 if (PL_lex_brackets) {
10edeb5d
JH
4540 yyerror((const char *)
4541 (PL_lex_formbrack
4542 ? "Format not terminated"
4543 : "Missing right curly or square bracket"));
c5ee2135 4544 }
4e553d73 4545 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4546 "### Tokener got EOF\n");
5f80b19c 4547 } );
79072805 4548 TOKEN(0);
463ee0b2 4549 }
3280af22 4550 if (s++ < PL_bufend)
a687059c 4551 goto retry; /* ignore stray nulls */
3280af22
NIS
4552 PL_last_uni = 0;
4553 PL_last_lop = 0;
4554 if (!PL_in_eval && !PL_preambled) {
4555 PL_preambled = TRUE;
5db06880
NC
4556#ifdef PERL_MAD
4557 if (PL_madskills)
cd81e915 4558 PL_faketokens = 1;
5db06880 4559#endif
5ab7ff98
NC
4560 if (PL_perldb) {
4561 /* Generate a string of Perl code to load the debugger.
4562 * If PERL5DB is set, it will return the contents of that,
4563 * otherwise a compile-time require of perl5db.pl. */
4564
4565 const char * const pdb = PerlEnv_getenv("PERL5DB");
4566
4567 if (pdb) {
4568 sv_setpv(PL_linestr, pdb);
4569 sv_catpvs(PL_linestr,";");
4570 } else {
4571 SETERRNO(0,SS_NORMAL);
4572 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4573 }
4574 } else
4575 sv_setpvs(PL_linestr,"");
c62eb204
NC
4576 if (PL_preambleav) {
4577 SV **svp = AvARRAY(PL_preambleav);
4578 SV **const end = svp + AvFILLp(PL_preambleav);
4579 while(svp <= end) {
4580 sv_catsv(PL_linestr, *svp);
4581 ++svp;
396482e1 4582 sv_catpvs(PL_linestr, ";");
91b7def8 4583 }
daba3364 4584 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4585 PL_preambleav = NULL;
91b7def8 4586 }
9f639728
FR
4587 if (PL_minus_E)
4588 sv_catpvs(PL_linestr,
4589 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4590 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4591 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4592 if (PL_minus_l)
396482e1 4593 sv_catpvs(PL_linestr,"chomp;");
3280af22 4594 if (PL_minus_a) {
3280af22 4595 if (PL_minus_F) {
3792a11b
NC
4596 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4597 || *PL_splitstr == '"')
3280af22 4598 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4599 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4600 else {
c8ef6a4b
NC
4601 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4602 bytes can be used as quoting characters. :-) */
dd374669 4603 const char *splits = PL_splitstr;
91d456ae 4604 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4605 do {
4606 /* Need to \ \s */
dd374669
AL
4607 if (*splits == '\\')
4608 sv_catpvn(PL_linestr, splits, 1);
4609 sv_catpvn(PL_linestr, splits, 1);
4610 } while (*splits++);
48c4c863
NC
4611 /* This loop will embed the trailing NUL of
4612 PL_linestr as the last thing it does before
4613 terminating. */
396482e1 4614 sv_catpvs(PL_linestr, ");");
54310121 4615 }
2304df62
AD
4616 }
4617 else
396482e1 4618 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4619 }
79072805 4620 }
396482e1 4621 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4622 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4623 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4624 PL_last_lop = PL_last_uni = NULL;
65269a95 4625 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4626 update_debugger_info(PL_linestr, NULL, 0);
79072805 4627 goto retry;
a687059c 4628 }
e929a76b 4629 do {
580561a3
Z
4630 fake_eof = 0;
4631 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4632 if (0) {
7e28d3af 4633 fake_eof:
f0e67a1d
Z
4634 fake_eof = LEX_FAKE_EOF;
4635 }
4636 PL_bufptr = PL_bufend;
17cc9359 4637 CopLINE_inc(PL_curcop);
f0e67a1d 4638 if (!lex_next_chunk(fake_eof)) {
17cc9359 4639 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4640 s = PL_bufptr;
4641 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4642 }
17cc9359 4643 CopLINE_dec(PL_curcop);
5db06880 4644#ifdef PERL_MAD
f0e67a1d 4645 if (!PL_rsfp)
cd81e915 4646 PL_realtokenstart = -1;
5db06880 4647#endif
f0e67a1d 4648 s = PL_bufptr;
7aa207d6
JH
4649 /* If it looks like the start of a BOM or raw UTF-16,
4650 * check if it in fact is. */
580561a3 4651 if (bof && PL_rsfp &&
7aa207d6
JH
4652 (*s == 0 ||
4653 *(U8*)s == 0xEF ||
4654 *(U8*)s >= 0xFE ||
4655 s[1] == 0)) {
eb160463 4656 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4657 if (bof) {
3280af22 4658 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4659 s = swallow_bom((U8*)s);
e929a76b 4660 }
378cc40b 4661 }
3280af22 4662 if (PL_doextract) {
a0d0e21e 4663 /* Incest with pod. */
5db06880
NC
4664#ifdef PERL_MAD
4665 if (PL_madskills)
cd81e915 4666 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4667#endif
01a57ef7 4668 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4669 sv_setpvs(PL_linestr, "");
3280af22
NIS
4670 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4671 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4672 PL_last_lop = PL_last_uni = NULL;
3280af22 4673 PL_doextract = FALSE;
a0d0e21e 4674 }
4e553d73 4675 }
85613cab
Z
4676 if (PL_rsfp)
4677 incline(s);
3280af22
NIS
4678 } while (PL_doextract);
4679 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4680 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4681 PL_last_lop = PL_last_uni = NULL;
57843af0 4682 if (CopLINE(PL_curcop) == 1) {
3280af22 4683 while (s < PL_bufend && isSPACE(*s))
79072805 4684 s++;
a0d0e21e 4685 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4686 s++;
5db06880
NC
4687#ifdef PERL_MAD
4688 if (PL_madskills)
cd81e915 4689 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4690#endif
bd61b366 4691 d = NULL;
3280af22 4692 if (!PL_in_eval) {
44a8e56a 4693 if (*s == '#' && *(s+1) == '!')
4694 d = s + 2;
4695#ifdef ALTERNATE_SHEBANG
4696 else {
bfed75c6 4697 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4698 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4699 d = s + (sizeof(as) - 1);
4700 }
4701#endif /* ALTERNATE_SHEBANG */
4702 }
4703 if (d) {
b8378b72 4704 char *ipath;
774d564b 4705 char *ipathend;
b8378b72 4706
774d564b 4707 while (isSPACE(*d))
b8378b72
CS
4708 d++;
4709 ipath = d;
774d564b 4710 while (*d && !isSPACE(*d))
4711 d++;
4712 ipathend = d;
4713
4714#ifdef ARG_ZERO_IS_SCRIPT
4715 if (ipathend > ipath) {
4716 /*
4717 * HP-UX (at least) sets argv[0] to the script name,
4718 * which makes $^X incorrect. And Digital UNIX and Linux,
4719 * at least, set argv[0] to the basename of the Perl
4720 * interpreter. So, having found "#!", we'll set it right.
4721 */
fafc274c
NC
4722 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4723 SVt_PV)); /* $^X */
774d564b 4724 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4725 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4726 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4727 SvSETMAGIC(x);
4728 }
556c1dec
JH
4729 else {
4730 STRLEN blen;
4731 STRLEN llen;
cfd0369c 4732 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4733 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4734 if (llen < blen) {
4735 bstart += blen - llen;
4736 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4737 sv_setpvn(x, ipath, ipathend - ipath);
4738 SvSETMAGIC(x);
4739 }
4740 }
4741 }
774d564b 4742 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4743 }
774d564b 4744#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4745
4746 /*
4747 * Look for options.
4748 */
748a9306 4749 d = instr(s,"perl -");
84e30d1a 4750 if (!d) {
748a9306 4751 d = instr(s,"perl");
84e30d1a
GS
4752#if defined(DOSISH)
4753 /* avoid getting into infinite loops when shebang
4754 * line contains "Perl" rather than "perl" */
4755 if (!d) {
4756 for (d = ipathend-4; d >= ipath; --d) {
4757 if ((*d == 'p' || *d == 'P')
4758 && !ibcmp(d, "perl", 4))
4759 {
4760 break;
4761 }
4762 }
4763 if (d < ipath)
bd61b366 4764 d = NULL;
84e30d1a
GS
4765 }
4766#endif
4767 }
44a8e56a 4768#ifdef ALTERNATE_SHEBANG
4769 /*
4770 * If the ALTERNATE_SHEBANG on this system starts with a
4771 * character that can be part of a Perl expression, then if
4772 * we see it but not "perl", we're probably looking at the
4773 * start of Perl code, not a request to hand off to some
4774 * other interpreter. Similarly, if "perl" is there, but
4775 * not in the first 'word' of the line, we assume the line
4776 * contains the start of the Perl program.
44a8e56a 4777 */
4778 if (d && *s != '#') {
f54cb97a 4779 const char *c = ipath;
44a8e56a 4780 while (*c && !strchr("; \t\r\n\f\v#", *c))
4781 c++;
4782 if (c < d)
bd61b366 4783 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4784 else
4785 *s = '#'; /* Don't try to parse shebang line */
4786 }
774d564b 4787#endif /* ALTERNATE_SHEBANG */
748a9306 4788 if (!d &&
44a8e56a 4789 *s == '#' &&
774d564b 4790 ipathend > ipath &&
3280af22 4791 !PL_minus_c &&
748a9306 4792 !instr(s,"indir") &&
3280af22 4793 instr(PL_origargv[0],"perl"))
748a9306 4794 {
27da23d5 4795 dVAR;
9f68db38 4796 char **newargv;
9f68db38 4797
774d564b 4798 *ipathend = '\0';
4799 s = ipathend + 1;
3280af22 4800 while (s < PL_bufend && isSPACE(*s))
9f68db38 4801 s++;
3280af22 4802 if (s < PL_bufend) {
d85f917e 4803 Newx(newargv,PL_origargc+3,char*);
9f68db38 4804 newargv[1] = s;
3280af22 4805 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4806 s++;
4807 *s = '\0';
3280af22 4808 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4809 }
4810 else
3280af22 4811 newargv = PL_origargv;
774d564b 4812 newargv[0] = ipath;
b35112e7 4813 PERL_FPU_PRE_EXEC
b4748376 4814 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4815 PERL_FPU_POST_EXEC
cea2e8a9 4816 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4817 }
748a9306 4818 if (d) {
c35e046a
AL
4819 while (*d && !isSPACE(*d))
4820 d++;
4821 while (SPACE_OR_TAB(*d))
4822 d++;
748a9306
LW
4823
4824 if (*d++ == '-') {
f54cb97a 4825 const bool switches_done = PL_doswitches;
fb993905
GA
4826 const U32 oldpdb = PL_perldb;
4827 const bool oldn = PL_minus_n;
4828 const bool oldp = PL_minus_p;
c7030b81 4829 const char *d1 = d;
fb993905 4830
8cc95fdb 4831 do {
4ba71d51
FC
4832 bool baduni = FALSE;
4833 if (*d1 == 'C') {
bd0ab00d
NC
4834 const char *d2 = d1 + 1;
4835 if (parse_unicode_opts((const char **)&d2)
4836 != PL_unicode)
4837 baduni = TRUE;
4ba71d51
FC
4838 }
4839 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4840 const char * const m = d1;
4841 while (*d1 && !isSPACE(*d1))
4842 d1++;
cea2e8a9 4843 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4844 (int)(d1 - m), m);
8cc95fdb 4845 }
c7030b81
NC
4846 d1 = moreswitches(d1);
4847 } while (d1);
f0b2cf55
YST
4848 if (PL_doswitches && !switches_done) {
4849 int argc = PL_origargc;
4850 char **argv = PL_origargv;
4851 do {
4852 argc--,argv++;
4853 } while (argc && argv[0][0] == '-' && argv[0][1]);
4854 init_argv_symbols(argc,argv);
4855 }
65269a95 4856 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4857 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4858 /* if we have already added "LINE: while (<>) {",
4859 we must not do it again */
748a9306 4860 {
76f68e9b 4861 sv_setpvs(PL_linestr, "");
3280af22
NIS
4862 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4863 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4864 PL_last_lop = PL_last_uni = NULL;
3280af22 4865 PL_preambled = FALSE;
65269a95 4866 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4867 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4868 goto retry;
4869 }
a0d0e21e 4870 }
79072805 4871 }
9f68db38 4872 }
79072805 4873 }
3280af22
NIS
4874 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4875 PL_bufptr = s;
4876 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4877 return yylex();
ae986130 4878 }
378cc40b 4879 goto retry;
4fdae800 4880 case '\r':
6a27c188 4881#ifdef PERL_STRICT_CR
cea2e8a9 4882 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4883 Perl_croak(aTHX_
cc507455 4884 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4885#endif
4fdae800 4886 case ' ': case '\t': case '\f': case 013:
5db06880 4887#ifdef PERL_MAD
cd81e915 4888 PL_realtokenstart = -1;
ac372eb8
RD
4889 if (!PL_thiswhite)
4890 PL_thiswhite = newSVpvs("");
4891 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4892#endif
ac372eb8 4893 s++;
378cc40b 4894 goto retry;
378cc40b 4895 case '#':
e929a76b 4896 case '\n':
5db06880 4897#ifdef PERL_MAD
cd81e915 4898 PL_realtokenstart = -1;
5db06880 4899 if (PL_madskills)
cd81e915 4900 PL_faketokens = 0;
5db06880 4901#endif
3280af22 4902 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4903 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4904 /* handle eval qq[#line 1 "foo"\n ...] */
4905 CopLINE_dec(PL_curcop);
4906 incline(s);
4907 }
5db06880
NC
4908 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4909 s = SKIPSPACE0(s);
4910 if (!PL_in_eval || PL_rsfp)
4911 incline(s);
4912 }
4913 else {
4914 d = s;
4915 while (d < PL_bufend && *d != '\n')
4916 d++;
4917 if (d < PL_bufend)
4918 d++;
4919 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4920 Perl_croak(aTHX_ "panic: input overflow");
4921#ifdef PERL_MAD
4922 if (PL_madskills)
cd81e915 4923 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4924#endif
4925 s = d;
4926 incline(s);
4927 }
3280af22
NIS
4928 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4929 PL_bufptr = s;
4930 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4931 return yylex();
a687059c 4932 }
378cc40b 4933 }
a687059c 4934 else {
5db06880
NC
4935#ifdef PERL_MAD
4936 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4937 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4938 PL_faketokens = 0;
5db06880
NC
4939 s = SKIPSPACE0(s);
4940 TOKEN(PEG); /* make sure any #! line is accessible */
4941 }
4942 s = SKIPSPACE0(s);
4943 }
4944 else {
4945/* if (PL_madskills && PL_lex_formbrack) { */
4946 d = s;
4947 while (d < PL_bufend && *d != '\n')
4948 d++;
4949 if (d < PL_bufend)
4950 d++;
4951 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4952 Perl_croak(aTHX_ "panic: input overflow");
4953 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4954 if (!PL_thiswhite)
6b29d1f5 4955 PL_thiswhite = newSVpvs("");
5db06880 4956 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4957 sv_setpvs(PL_thiswhite, "");
cd81e915 4958 PL_faketokens = 0;
5db06880 4959 }
cd81e915 4960 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4961 }
4962 s = d;
4963/* }
4964 *s = '\0';
4965 PL_bufend = s; */
4966 }
4967#else
378cc40b 4968 *s = '\0';
3280af22 4969 PL_bufend = s;
5db06880 4970#endif
a687059c 4971 }
378cc40b
LW
4972 goto retry;
4973 case '-':
79072805 4974 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4975 I32 ftst = 0;
90771dc0 4976 char tmp;
e5edeb50 4977
378cc40b 4978 s++;
3280af22 4979 PL_bufptr = s;
748a9306
LW
4980 tmp = *s++;
4981
bf4acbe4 4982 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
4983 s++;
4984
4985 if (strnEQ(s,"=>",2)) {
3280af22 4986 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 4987 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
4988 OPERATOR('-'); /* unary minus */
4989 }
3280af22 4990 PL_last_uni = PL_oldbufptr;
748a9306 4991 switch (tmp) {
e5edeb50
JH
4992 case 'r': ftst = OP_FTEREAD; break;
4993 case 'w': ftst = OP_FTEWRITE; break;
4994 case 'x': ftst = OP_FTEEXEC; break;
4995 case 'o': ftst = OP_FTEOWNED; break;
4996 case 'R': ftst = OP_FTRREAD; break;
4997 case 'W': ftst = OP_FTRWRITE; break;
4998 case 'X': ftst = OP_FTREXEC; break;
4999 case 'O': ftst = OP_FTROWNED; break;
5000 case 'e': ftst = OP_FTIS; break;
5001 case 'z': ftst = OP_FTZERO; break;
5002 case 's': ftst = OP_FTSIZE; break;
5003 case 'f': ftst = OP_FTFILE; break;
5004 case 'd': ftst = OP_FTDIR; break;
5005 case 'l': ftst = OP_FTLINK; break;
5006 case 'p': ftst = OP_FTPIPE; break;
5007 case 'S': ftst = OP_FTSOCK; break;
5008 case 'u': ftst = OP_FTSUID; break;
5009 case 'g': ftst = OP_FTSGID; break;
5010 case 'k': ftst = OP_FTSVTX; break;
5011 case 'b': ftst = OP_FTBLK; break;
5012 case 'c': ftst = OP_FTCHR; break;
5013 case 't': ftst = OP_FTTTY; break;
5014 case 'T': ftst = OP_FTTEXT; break;
5015 case 'B': ftst = OP_FTBINARY; break;
5016 case 'M': case 'A': case 'C':
fafc274c 5017 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5018 switch (tmp) {
5019 case 'M': ftst = OP_FTMTIME; break;
5020 case 'A': ftst = OP_FTATIME; break;
5021 case 'C': ftst = OP_FTCTIME; break;
5022 default: break;
5023 }
5024 break;
378cc40b 5025 default:
378cc40b
LW
5026 break;
5027 }
e5edeb50 5028 if (ftst) {
eb160463 5029 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5030 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5031 "### Saw file test %c\n", (int)tmp);
5f80b19c 5032 } );
e5edeb50
JH
5033 FTST(ftst);
5034 }
5035 else {
5036 /* Assume it was a minus followed by a one-letter named
5037 * subroutine call (or a -bareword), then. */
95c31fe3 5038 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5039 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5040 (int) tmp);
5f80b19c 5041 } );
3cf7b4c4 5042 s = --PL_bufptr;
e5edeb50 5043 }
378cc40b 5044 }
90771dc0
NC
5045 {
5046 const char tmp = *s++;
5047 if (*s == tmp) {
5048 s++;
5049 if (PL_expect == XOPERATOR)
5050 TERM(POSTDEC);
5051 else
5052 OPERATOR(PREDEC);
5053 }
5054 else if (*s == '>') {
5055 s++;
29595ff2 5056 s = SKIPSPACE1(s);
90771dc0
NC
5057 if (isIDFIRST_lazy_if(s,UTF)) {
5058 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5059 TOKEN(ARROW);
5060 }
5061 else if (*s == '$')
5062 OPERATOR(ARROW);
5063 else
5064 TERM(ARROW);
5065 }
3280af22 5066 if (PL_expect == XOPERATOR)
90771dc0
NC
5067 Aop(OP_SUBTRACT);
5068 else {
5069 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5070 check_uni();
5071 OPERATOR('-'); /* unary minus */
79072805 5072 }
2f3197b3 5073 }
79072805 5074
378cc40b 5075 case '+':
90771dc0
NC
5076 {
5077 const char tmp = *s++;
5078 if (*s == tmp) {
5079 s++;
5080 if (PL_expect == XOPERATOR)
5081 TERM(POSTINC);
5082 else
5083 OPERATOR(PREINC);
5084 }
3280af22 5085 if (PL_expect == XOPERATOR)
90771dc0
NC
5086 Aop(OP_ADD);
5087 else {
5088 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5089 check_uni();
5090 OPERATOR('+');
5091 }
2f3197b3 5092 }
a687059c 5093
378cc40b 5094 case '*':
3280af22
NIS
5095 if (PL_expect != XOPERATOR) {
5096 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5097 PL_expect = XOPERATOR;
5098 force_ident(PL_tokenbuf, '*');
5099 if (!*PL_tokenbuf)
a0d0e21e 5100 PREREF('*');
79072805 5101 TERM('*');
a687059c 5102 }
79072805
LW
5103 s++;
5104 if (*s == '*') {
a687059c 5105 s++;
79072805 5106 PWop(OP_POW);
a687059c 5107 }
79072805
LW
5108 Mop(OP_MULTIPLY);
5109
378cc40b 5110 case '%':
3280af22 5111 if (PL_expect == XOPERATOR) {
bbce6d69 5112 ++s;
5113 Mop(OP_MODULO);
a687059c 5114 }
3280af22 5115 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5116 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5117 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5118 if (!PL_tokenbuf[1]) {
bbce6d69 5119 PREREF('%');
a687059c 5120 }
3280af22 5121 PL_pending_ident = '%';
bbce6d69 5122 TERM('%');
a687059c 5123
378cc40b 5124 case '^':
79072805 5125 s++;
a0d0e21e 5126 BOop(OP_BIT_XOR);
79072805 5127 case '[':
3280af22 5128 PL_lex_brackets++;
df3467db
IG
5129 {
5130 const char tmp = *s++;
5131 OPERATOR(tmp);
5132 }
378cc40b 5133 case '~':
0d863452 5134 if (s[1] == '~'
3e7dd34d 5135 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5136 {
5137 s += 2;
5138 Eop(OP_SMARTMATCH);
5139 }
378cc40b 5140 case ',':
90771dc0
NC
5141 {
5142 const char tmp = *s++;
5143 OPERATOR(tmp);
5144 }
a0d0e21e
LW
5145 case ':':
5146 if (s[1] == ':') {
5147 len = 0;
0bfa2a8a 5148 goto just_a_word_zero_gv;
a0d0e21e
LW
5149 }
5150 s++;
09bef843
SB
5151 switch (PL_expect) {
5152 OP *attrs;
5db06880
NC
5153#ifdef PERL_MAD
5154 I32 stuffstart;
5155#endif
09bef843
SB
5156 case XOPERATOR:
5157 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5158 break;
5159 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5160 if (*s == '=') {
5161 deprecate(":= for an empty attribute list");
5162 }
09bef843
SB
5163 goto grabattrs;
5164 case XATTRBLOCK:
5165 PL_expect = XBLOCK;
5166 goto grabattrs;
5167 case XATTRTERM:
5168 PL_expect = XTERMBLOCK;
5169 grabattrs:
5db06880
NC
5170#ifdef PERL_MAD
5171 stuffstart = s - SvPVX(PL_linestr) - 1;
5172#endif
29595ff2 5173 s = PEEKSPACE(s);
5f66b61c 5174 attrs = NULL;
7e2040f0 5175 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5176 I32 tmp;
5cc237b8 5177 SV *sv;
09bef843 5178 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5179 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5180 if (tmp < 0) tmp = -tmp;
5181 switch (tmp) {
5182 case KEY_or:
5183 case KEY_and:
5184 case KEY_for:
11baf631 5185 case KEY_foreach:
f9829d6b
GS
5186 case KEY_unless:
5187 case KEY_if:
5188 case KEY_while:
5189 case KEY_until:
5190 goto got_attrs;
5191 default:
5192 break;
5193 }
5194 }
5cc237b8 5195 sv = newSVpvn(s, len);
09bef843
SB
5196 if (*d == '(') {
5197 d = scan_str(d,TRUE,TRUE);
5198 if (!d) {
09bef843
SB
5199 /* MUST advance bufptr here to avoid bogus
5200 "at end of line" context messages from yyerror().
5201 */
5202 PL_bufptr = s + len;
5203 yyerror("Unterminated attribute parameter in attribute list");
5204 if (attrs)
5205 op_free(attrs);
5cc237b8 5206 sv_free(sv);
bbf60fe6 5207 return REPORT(0); /* EOF indicator */
09bef843
SB
5208 }
5209 }
5210 if (PL_lex_stuff) {
09bef843
SB
5211 sv_catsv(sv, PL_lex_stuff);
5212 attrs = append_elem(OP_LIST, attrs,
5213 newSVOP(OP_CONST, 0, sv));
5214 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5215 PL_lex_stuff = NULL;
09bef843
SB
5216 }
5217 else {
5cc237b8
BS
5218 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5219 sv_free(sv);
1108974d 5220 if (PL_in_my == KEY_our) {
df9a6019 5221 deprecate(":unique");
1108974d 5222 }
bfed75c6 5223 else
371fce9b
DM
5224 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5225 }
5226
d3cea301
SB
5227 /* NOTE: any CV attrs applied here need to be part of
5228 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5229 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5230 sv_free(sv);
78f9721b 5231 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5232 }
5233 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5234 sv_free(sv);
8e5dadda 5235 deprecate(":locked");
5cc237b8
BS
5236 }
5237 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5238 sv_free(sv);
78f9721b 5239 CvMETHOD_on(PL_compcv);
5cc237b8 5240 }
78f9721b
SM
5241 /* After we've set the flags, it could be argued that
5242 we don't need to do the attributes.pm-based setting
5243 process, and shouldn't bother appending recognized
d3cea301
SB
5244 flags. To experiment with that, uncomment the
5245 following "else". (Note that's already been
5246 uncommented. That keeps the above-applied built-in
5247 attributes from being intercepted (and possibly
5248 rejected) by a package's attribute routines, but is
5249 justified by the performance win for the common case
5250 of applying only built-in attributes.) */
0256094b 5251 else
78f9721b
SM
5252 attrs = append_elem(OP_LIST, attrs,
5253 newSVOP(OP_CONST, 0,
5cc237b8 5254 sv));
09bef843 5255 }
29595ff2 5256 s = PEEKSPACE(d);
0120eecf 5257 if (*s == ':' && s[1] != ':')
29595ff2 5258 s = PEEKSPACE(s+1);
0120eecf
GS
5259 else if (s == d)
5260 break; /* require real whitespace or :'s */
29595ff2 5261 /* XXX losing whitespace on sequential attributes here */
09bef843 5262 }
90771dc0
NC
5263 {
5264 const char tmp
5265 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5266 if (*s != ';' && *s != '}' && *s != tmp
5267 && (tmp != '=' || *s != ')')) {
5268 const char q = ((*s == '\'') ? '"' : '\'');
5269 /* If here for an expression, and parsed no attrs, back
5270 off. */
5271 if (tmp == '=' && !attrs) {
5272 s = PL_bufptr;
5273 break;
5274 }
5275 /* MUST advance bufptr here to avoid bogus "at end of line"
5276 context messages from yyerror().
5277 */
5278 PL_bufptr = s;
10edeb5d
JH
5279 yyerror( (const char *)
5280 (*s
5281 ? Perl_form(aTHX_ "Invalid separator character "
5282 "%c%c%c in attribute list", q, *s, q)
5283 : "Unterminated attribute list" ) );
90771dc0
NC
5284 if (attrs)
5285 op_free(attrs);
5286 OPERATOR(':');
09bef843 5287 }
09bef843 5288 }
f9829d6b 5289 got_attrs:
09bef843 5290 if (attrs) {
cd81e915 5291 start_force(PL_curforce);
9ded7720 5292 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5293 CURMAD('_', PL_nextwhite);
89122651 5294 force_next(THING);
5db06880
NC
5295 }
5296#ifdef PERL_MAD
5297 if (PL_madskills) {
cd81e915 5298 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5299 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5300 }
5db06880 5301#endif
09bef843
SB
5302 TOKEN(COLONATTR);
5303 }
a0d0e21e 5304 OPERATOR(':');
8990e307
LW
5305 case '(':
5306 s++;
3280af22
NIS
5307 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5308 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5309 else
3280af22 5310 PL_expect = XTERM;
29595ff2 5311 s = SKIPSPACE1(s);
a0d0e21e 5312 TOKEN('(');
378cc40b 5313 case ';':
f4dd75d9 5314 CLINE;
90771dc0
NC
5315 {
5316 const char tmp = *s++;
5317 OPERATOR(tmp);
5318 }
378cc40b 5319 case ')':
90771dc0
NC
5320 {
5321 const char tmp = *s++;
29595ff2 5322 s = SKIPSPACE1(s);
90771dc0
NC
5323 if (*s == '{')
5324 PREBLOCK(tmp);
5325 TERM(tmp);
5326 }
79072805
LW
5327 case ']':
5328 s++;
3280af22 5329 if (PL_lex_brackets <= 0)
d98d5fff 5330 yyerror("Unmatched right square bracket");
463ee0b2 5331 else
3280af22
NIS
5332 --PL_lex_brackets;
5333 if (PL_lex_state == LEX_INTERPNORMAL) {
5334 if (PL_lex_brackets == 0) {
02255c60
FC
5335 if (*s == '-' && s[1] == '>')
5336 PL_lex_state = LEX_INTERPENDMAYBE;
5337 else if (*s != '[' && *s != '{')
3280af22 5338 PL_lex_state = LEX_INTERPEND;
79072805
LW
5339 }
5340 }
4633a7c4 5341 TERM(']');
79072805
LW
5342 case '{':
5343 leftbracket:
79072805 5344 s++;
3280af22 5345 if (PL_lex_brackets > 100) {
8edd5f42 5346 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5347 }
3280af22 5348 switch (PL_expect) {
a0d0e21e 5349 case XTERM:
3280af22 5350 if (PL_lex_formbrack) {
a0d0e21e
LW
5351 s--;
5352 PRETERMBLOCK(DO);
5353 }
3280af22
NIS
5354 if (PL_oldoldbufptr == PL_last_lop)
5355 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5356 else
3280af22 5357 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5358 OPERATOR(HASHBRACK);
a0d0e21e 5359 case XOPERATOR:
bf4acbe4 5360 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5361 s++;
44a8e56a 5362 d = s;
3280af22
NIS
5363 PL_tokenbuf[0] = '\0';
5364 if (d < PL_bufend && *d == '-') {
5365 PL_tokenbuf[0] = '-';
44a8e56a 5366 d++;
bf4acbe4 5367 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5368 d++;
5369 }
7e2040f0 5370 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5371 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5372 FALSE, &len);
bf4acbe4 5373 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5374 d++;
5375 if (*d == '}') {
f54cb97a 5376 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5377 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5378 if (minus)
5379 force_next('-');
748a9306
LW
5380 }
5381 }
5382 /* FALL THROUGH */
09bef843 5383 case XATTRBLOCK:
748a9306 5384 case XBLOCK:
3280af22
NIS
5385 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5386 PL_expect = XSTATE;
a0d0e21e 5387 break;
09bef843 5388 case XATTRTERM:
a0d0e21e 5389 case XTERMBLOCK:
3280af22
NIS
5390 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5391 PL_expect = XSTATE;
a0d0e21e
LW
5392 break;
5393 default: {
f54cb97a 5394 const char *t;
3280af22
NIS
5395 if (PL_oldoldbufptr == PL_last_lop)
5396 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5397 else
3280af22 5398 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5399 s = SKIPSPACE1(s);
8452ff4b
SB
5400 if (*s == '}') {
5401 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5402 PL_expect = XTERM;
5403 /* This hack is to get the ${} in the message. */
5404 PL_bufptr = s+1;
5405 yyerror("syntax error");
5406 break;
5407 }
a0d0e21e 5408 OPERATOR(HASHBRACK);
8452ff4b 5409 }
b8a4b1be
GS
5410 /* This hack serves to disambiguate a pair of curlies
5411 * as being a block or an anon hash. Normally, expectation
5412 * determines that, but in cases where we're not in a
5413 * position to expect anything in particular (like inside
5414 * eval"") we have to resolve the ambiguity. This code
5415 * covers the case where the first term in the curlies is a
5416 * quoted string. Most other cases need to be explicitly
a0288114 5417 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5418 * curly in order to force resolution as an anon hash.
5419 *
5420 * XXX should probably propagate the outer expectation
5421 * into eval"" to rely less on this hack, but that could
5422 * potentially break current behavior of eval"".
5423 * GSAR 97-07-21
5424 */
5425 t = s;
5426 if (*s == '\'' || *s == '"' || *s == '`') {
5427 /* common case: get past first string, handling escapes */
3280af22 5428 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5429 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5430 t++;
5431 t++;
a0d0e21e 5432 }
b8a4b1be 5433 else if (*s == 'q') {
3280af22 5434 if (++t < PL_bufend
b8a4b1be 5435 && (!isALNUM(*t)
3280af22 5436 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5437 && !isALNUM(*t))))
5438 {
abc667d1 5439 /* skip q//-like construct */
f54cb97a 5440 const char *tmps;
b8a4b1be
GS
5441 char open, close, term;
5442 I32 brackets = 1;
5443
3280af22 5444 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5445 t++;
abc667d1
DM
5446 /* check for q => */
5447 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5448 OPERATOR(HASHBRACK);
5449 }
b8a4b1be
GS
5450 term = *t;
5451 open = term;
5452 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5453 term = tmps[5];
5454 close = term;
5455 if (open == close)
3280af22
NIS
5456 for (t++; t < PL_bufend; t++) {
5457 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5458 t++;
6d07e5e9 5459 else if (*t == open)
b8a4b1be
GS
5460 break;
5461 }
abc667d1 5462 else {
3280af22
NIS
5463 for (t++; t < PL_bufend; t++) {
5464 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5465 t++;
6d07e5e9 5466 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5467 break;
5468 else if (*t == open)
5469 brackets++;
5470 }
abc667d1
DM
5471 }
5472 t++;
b8a4b1be 5473 }
abc667d1
DM
5474 else
5475 /* skip plain q word */
5476 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5477 t += UTF8SKIP(t);
a0d0e21e 5478 }
7e2040f0 5479 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5480 t += UTF8SKIP(t);
7e2040f0 5481 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5482 t += UTF8SKIP(t);
a0d0e21e 5483 }
3280af22 5484 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5485 t++;
b8a4b1be
GS
5486 /* if comma follows first term, call it an anon hash */
5487 /* XXX it could be a comma expression with loop modifiers */
3280af22 5488 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5489 || (*t == '=' && t[1] == '>')))
a0d0e21e 5490 OPERATOR(HASHBRACK);
3280af22 5491 if (PL_expect == XREF)
4e4e412b 5492 PL_expect = XTERM;
a0d0e21e 5493 else {
3280af22
NIS
5494 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5495 PL_expect = XSTATE;
a0d0e21e 5496 }
8990e307 5497 }
a0d0e21e 5498 break;
463ee0b2 5499 }
6154021b 5500 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5501 if (isSPACE(*s) || *s == '#')
3280af22 5502 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5503 TOKEN('{');
378cc40b 5504 case '}':
79072805
LW
5505 rightbracket:
5506 s++;
3280af22 5507 if (PL_lex_brackets <= 0)
d98d5fff 5508 yyerror("Unmatched right curly bracket");
463ee0b2 5509 else
3280af22 5510 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5511 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5512 PL_lex_formbrack = 0;
5513 if (PL_lex_state == LEX_INTERPNORMAL) {
5514 if (PL_lex_brackets == 0) {
9059aa12
LW
5515 if (PL_expect & XFAKEBRACK) {
5516 PL_expect &= XENUMMASK;
3280af22
NIS
5517 PL_lex_state = LEX_INTERPEND;
5518 PL_bufptr = s;
5db06880
NC
5519#if 0
5520 if (PL_madskills) {
cd81e915 5521 if (!PL_thiswhite)
6b29d1f5 5522 PL_thiswhite = newSVpvs("");
76f68e9b 5523 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5524 }
5525#endif
cea2e8a9 5526 return yylex(); /* ignore fake brackets */
79072805 5527 }
fa83b5b6 5528 if (*s == '-' && s[1] == '>')
3280af22 5529 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5530 else if (*s != '[' && *s != '{')
3280af22 5531 PL_lex_state = LEX_INTERPEND;
79072805
LW
5532 }
5533 }
9059aa12
LW
5534 if (PL_expect & XFAKEBRACK) {
5535 PL_expect &= XENUMMASK;
3280af22 5536 PL_bufptr = s;
cea2e8a9 5537 return yylex(); /* ignore fake brackets */
748a9306 5538 }
cd81e915 5539 start_force(PL_curforce);
5db06880
NC
5540 if (PL_madskills) {
5541 curmad('X', newSVpvn(s-1,1));
cd81e915 5542 CURMAD('_', PL_thiswhite);
5db06880 5543 }
79072805 5544 force_next('}');
5db06880 5545#ifdef PERL_MAD
cd81e915 5546 if (!PL_thistoken)
6b29d1f5 5547 PL_thistoken = newSVpvs("");
5db06880 5548#endif
79072805 5549 TOKEN(';');
378cc40b
LW
5550 case '&':
5551 s++;
90771dc0 5552 if (*s++ == '&')
a0d0e21e 5553 AOPERATOR(ANDAND);
378cc40b 5554 s--;
3280af22 5555 if (PL_expect == XOPERATOR) {
041457d9
DM
5556 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5557 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5558 {
57843af0 5559 CopLINE_dec(PL_curcop);
f1f66076 5560 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5561 CopLINE_inc(PL_curcop);
463ee0b2 5562 }
79072805 5563 BAop(OP_BIT_AND);
463ee0b2 5564 }
79072805 5565
3280af22
NIS
5566 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5567 if (*PL_tokenbuf) {
5568 PL_expect = XOPERATOR;
5569 force_ident(PL_tokenbuf, '&');
463ee0b2 5570 }
79072805
LW
5571 else
5572 PREREF('&');
6154021b 5573 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5574 TERM('&');
5575
378cc40b
LW
5576 case '|':
5577 s++;
90771dc0 5578 if (*s++ == '|')
a0d0e21e 5579 AOPERATOR(OROR);
378cc40b 5580 s--;
79072805 5581 BOop(OP_BIT_OR);
378cc40b
LW
5582 case '=':
5583 s++;
748a9306 5584 {
90771dc0
NC
5585 const char tmp = *s++;
5586 if (tmp == '=')
5587 Eop(OP_EQ);
5588 if (tmp == '>')
5589 OPERATOR(',');
5590 if (tmp == '~')
5591 PMop(OP_MATCH);
5592 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5593 && strchr("+-*/%.^&|<",tmp))
5594 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5595 "Reversed %c= operator",(int)tmp);
5596 s--;
5597 if (PL_expect == XSTATE && isALPHA(tmp) &&
5598 (s == PL_linestart+1 || s[-2] == '\n') )
5599 {
5600 if (PL_in_eval && !PL_rsfp) {
5601 d = PL_bufend;
5602 while (s < d) {
5603 if (*s++ == '\n') {
5604 incline(s);
5605 if (strnEQ(s,"=cut",4)) {
5606 s = strchr(s,'\n');
5607 if (s)
5608 s++;
5609 else
5610 s = d;
5611 incline(s);
5612 goto retry;
5613 }
5614 }
a5f75d66 5615 }
90771dc0 5616 goto retry;
a5f75d66 5617 }
5db06880
NC
5618#ifdef PERL_MAD
5619 if (PL_madskills) {
cd81e915 5620 if (!PL_thiswhite)
6b29d1f5 5621 PL_thiswhite = newSVpvs("");
cd81e915 5622 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5623 PL_bufend - PL_linestart);
5624 }
5625#endif
90771dc0
NC
5626 s = PL_bufend;
5627 PL_doextract = TRUE;
5628 goto retry;
a5f75d66 5629 }
a0d0e21e 5630 }
3280af22 5631 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5632 const char *t = s;
51882d45 5633#ifdef PERL_STRICT_CR
c35e046a 5634 while (SPACE_OR_TAB(*t))
51882d45 5635#else
c35e046a 5636 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5637#endif
c35e046a 5638 t++;
a0d0e21e
LW
5639 if (*t == '\n' || *t == '#') {
5640 s--;
3280af22 5641 PL_expect = XBLOCK;
a0d0e21e
LW
5642 goto leftbracket;
5643 }
79072805 5644 }
6154021b 5645 pl_yylval.ival = 0;
a0d0e21e 5646 OPERATOR(ASSIGNOP);
378cc40b
LW
5647 case '!':
5648 s++;
90771dc0
NC
5649 {
5650 const char tmp = *s++;
5651 if (tmp == '=') {
5652 /* was this !=~ where !~ was meant?
5653 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5654
5655 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5656 const char *t = s+1;
5657
5658 while (t < PL_bufend && isSPACE(*t))
5659 ++t;
5660
5661 if (*t == '/' || *t == '?' ||
5662 ((*t == 'm' || *t == 's' || *t == 'y')
5663 && !isALNUM(t[1])) ||
5664 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5665 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5666 "!=~ should be !~");
5667 }
5668 Eop(OP_NE);
5669 }
5670 if (tmp == '~')
5671 PMop(OP_NOT);
5672 }
378cc40b
LW
5673 s--;
5674 OPERATOR('!');
5675 case '<':
3280af22 5676 if (PL_expect != XOPERATOR) {
93a17b20 5677 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5678 check_uni();
79072805
LW
5679 if (s[1] == '<')
5680 s = scan_heredoc(s);
5681 else
5682 s = scan_inputsymbol(s);
5683 TERM(sublex_start());
378cc40b
LW
5684 }
5685 s++;
90771dc0
NC
5686 {
5687 char tmp = *s++;
5688 if (tmp == '<')
5689 SHop(OP_LEFT_SHIFT);
5690 if (tmp == '=') {
5691 tmp = *s++;
5692 if (tmp == '>')
5693 Eop(OP_NCMP);
5694 s--;
5695 Rop(OP_LE);
5696 }
395c3793 5697 }
378cc40b 5698 s--;
79072805 5699 Rop(OP_LT);
378cc40b
LW
5700 case '>':
5701 s++;
90771dc0
NC
5702 {
5703 const char tmp = *s++;
5704 if (tmp == '>')
5705 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5706 else if (tmp == '=')
90771dc0
NC
5707 Rop(OP_GE);
5708 }
378cc40b 5709 s--;
79072805 5710 Rop(OP_GT);
378cc40b
LW
5711
5712 case '$':
bbce6d69 5713 CLINE;
5714
3280af22
NIS
5715 if (PL_expect == XOPERATOR) {
5716 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5717 return deprecate_commaless_var_list();
a0d0e21e 5718 }
8990e307 5719 }
a0d0e21e 5720
c0b977fd 5721 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5722 PL_tokenbuf[0] = '@';
376b8730
SM
5723 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5724 sizeof PL_tokenbuf - 1, FALSE);
5725 if (PL_expect == XOPERATOR)
5726 no_op("Array length", s);
3280af22 5727 if (!PL_tokenbuf[1])
a0d0e21e 5728 PREREF(DOLSHARP);
3280af22
NIS
5729 PL_expect = XOPERATOR;
5730 PL_pending_ident = '#';
463ee0b2 5731 TOKEN(DOLSHARP);
79072805 5732 }
bbce6d69 5733
3280af22 5734 PL_tokenbuf[0] = '$';
376b8730
SM
5735 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5736 sizeof PL_tokenbuf - 1, FALSE);
5737 if (PL_expect == XOPERATOR)
5738 no_op("Scalar", s);
3280af22
NIS
5739 if (!PL_tokenbuf[1]) {
5740 if (s == PL_bufend)
bbce6d69 5741 yyerror("Final $ should be \\$ or $name");
5742 PREREF('$');
8990e307 5743 }
a0d0e21e 5744
bbce6d69 5745 /* This kludge not intended to be bulletproof. */
3280af22 5746 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5747 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5748 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5749 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5750 TERM(THING);
5751 }
5752
ff68c719 5753 d = s;
90771dc0
NC
5754 {
5755 const char tmp = *s;
ae28bb2a 5756 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5757 s = SKIPSPACE1(s);
ff68c719 5758
90771dc0
NC
5759 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5760 && intuit_more(s)) {
5761 if (*s == '[') {
5762 PL_tokenbuf[0] = '@';
5763 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5764 char *t = s+1;
5765
5766 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5767 t++;
90771dc0 5768 if (*t++ == ',') {
29595ff2 5769 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5770 while (t < PL_bufend && *t != ']')
5771 t++;
9014280d 5772 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5773 "Multidimensional syntax %.*s not supported",
36c7798d 5774 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5775 }
748a9306 5776 }
93a17b20 5777 }
90771dc0
NC
5778 else if (*s == '{') {
5779 char *t;
5780 PL_tokenbuf[0] = '%';
5781 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5782 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5783 {
5784 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5785 do {
5786 t++;
5787 } while (isSPACE(*t));
90771dc0 5788 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5789 STRLEN len;
90771dc0 5790 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5791 &len);
c35e046a
AL
5792 while (isSPACE(*t))
5793 t++;
780a5241 5794 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5795 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5796 "You need to quote \"%s\"",
5797 tmpbuf);
5798 }
5799 }
5800 }
93a17b20 5801 }
bbce6d69 5802
90771dc0
NC
5803 PL_expect = XOPERATOR;
5804 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5805 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5806 if (!islop || PL_last_lop_op == OP_GREPSTART)
5807 PL_expect = XOPERATOR;
5808 else if (strchr("$@\"'`q", *s))
5809 PL_expect = XTERM; /* e.g. print $fh "foo" */
5810 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5811 PL_expect = XTERM; /* e.g. print $fh &sub */
5812 else if (isIDFIRST_lazy_if(s,UTF)) {
5813 char tmpbuf[sizeof PL_tokenbuf];
5814 int t2;
5815 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5816 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5817 /* binary operators exclude handle interpretations */
5818 switch (t2) {
5819 case -KEY_x:
5820 case -KEY_eq:
5821 case -KEY_ne:
5822 case -KEY_gt:
5823 case -KEY_lt:
5824 case -KEY_ge:
5825 case -KEY_le:
5826 case -KEY_cmp:
5827 break;
5828 default:
5829 PL_expect = XTERM; /* e.g. print $fh length() */
5830 break;
5831 }
5832 }
5833 else {
5834 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5835 }
5836 }
90771dc0
NC
5837 else if (isDIGIT(*s))
5838 PL_expect = XTERM; /* e.g. print $fh 3 */
5839 else if (*s == '.' && isDIGIT(s[1]))
5840 PL_expect = XTERM; /* e.g. print $fh .3 */
5841 else if ((*s == '?' || *s == '-' || *s == '+')
5842 && !isSPACE(s[1]) && s[1] != '=')
5843 PL_expect = XTERM; /* e.g. print $fh -1 */
5844 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5845 && s[1] != '/')
5846 PL_expect = XTERM; /* e.g. print $fh /.../
5847 XXX except DORDOR operator
5848 */
5849 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5850 && s[2] != '=')
5851 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5852 }
bbce6d69 5853 }
3280af22 5854 PL_pending_ident = '$';
79072805 5855 TOKEN('$');
378cc40b
LW
5856
5857 case '@':
3280af22 5858 if (PL_expect == XOPERATOR)
bbce6d69 5859 no_op("Array", s);
3280af22
NIS
5860 PL_tokenbuf[0] = '@';
5861 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5862 if (!PL_tokenbuf[1]) {
bbce6d69 5863 PREREF('@');
5864 }
3280af22 5865 if (PL_lex_state == LEX_NORMAL)
29595ff2 5866 s = SKIPSPACE1(s);
3280af22 5867 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5868 if (*s == '{')
3280af22 5869 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5870
5871 /* Warn about @ where they meant $. */
041457d9
DM
5872 if (*s == '[' || *s == '{') {
5873 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5874 const char *t = s + 1;
7e2040f0 5875 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5876 t++;
5877 if (*t == '}' || *t == ']') {
5878 t++;
29595ff2 5879 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5880 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5881 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5882 (int)(t-PL_bufptr), PL_bufptr,
5883 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5884 }
93a17b20
LW
5885 }
5886 }
463ee0b2 5887 }
3280af22 5888 PL_pending_ident = '@';
79072805 5889 TERM('@');
378cc40b 5890
c963b151 5891 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5892 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5893 s += 2;
5894 AOPERATOR(DORDOR);
5895 }
c963b151 5896 case '?': /* may either be conditional or pattern */
be25f609 5897 if (PL_expect == XOPERATOR) {
90771dc0 5898 char tmp = *s++;
c963b151 5899 if(tmp == '?') {
be25f609 5900 OPERATOR('?');
c963b151
BD
5901 }
5902 else {
5903 tmp = *s++;
5904 if(tmp == '/') {
5905 /* A // operator. */
5906 AOPERATOR(DORDOR);
5907 }
5908 else {
5909 s--;
5910 Mop(OP_DIVIDE);
5911 }
5912 }
5913 }
5914 else {
5915 /* Disable warning on "study /blah/" */
5916 if (PL_oldoldbufptr == PL_last_uni
5917 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5918 || memNE(PL_last_uni, "study", 5)
5919 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5920 ))
5921 check_uni();
5922 s = scan_pat(s,OP_MATCH);
5923 TERM(sublex_start());
5924 }
378cc40b
LW
5925
5926 case '.':
51882d45
GS
5927 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5928#ifdef PERL_STRICT_CR
5929 && s[1] == '\n'
5930#else
5931 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5932#endif
5933 && (s == PL_linestart || s[-1] == '\n') )
5934 {
3280af22
NIS
5935 PL_lex_formbrack = 0;
5936 PL_expect = XSTATE;
79072805
LW
5937 goto rightbracket;
5938 }
be25f609 5939 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5940 s += 3;
5941 OPERATOR(YADAYADA);
5942 }
3280af22 5943 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5944 char tmp = *s++;
a687059c
LW
5945 if (*s == tmp) {
5946 s++;
2f3197b3
LW
5947 if (*s == tmp) {
5948 s++;
6154021b 5949 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5950 }
5951 else
6154021b 5952 pl_yylval.ival = 0;
378cc40b 5953 OPERATOR(DOTDOT);
a687059c 5954 }
79072805 5955 Aop(OP_CONCAT);
378cc40b
LW
5956 }
5957 /* FALL THROUGH */
5958 case '0': case '1': case '2': case '3': case '4':
5959 case '5': case '6': case '7': case '8': case '9':
6154021b 5960 s = scan_num(s, &pl_yylval);
931e0695 5961 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5962 if (PL_expect == XOPERATOR)
8990e307 5963 no_op("Number",s);
79072805
LW
5964 TERM(THING);
5965
5966 case '\'':
5db06880 5967 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5968 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5969 if (PL_expect == XOPERATOR) {
5970 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5971 return deprecate_commaless_var_list();
a0d0e21e 5972 }
463ee0b2 5973 else
8990e307 5974 no_op("String",s);
463ee0b2 5975 }
79072805 5976 if (!s)
d4c19fe8 5977 missingterm(NULL);
6154021b 5978 pl_yylval.ival = OP_CONST;
79072805
LW
5979 TERM(sublex_start());
5980
5981 case '"':
5db06880 5982 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5983 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5984 if (PL_expect == XOPERATOR) {
5985 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5986 return deprecate_commaless_var_list();
a0d0e21e 5987 }
463ee0b2 5988 else
8990e307 5989 no_op("String",s);
463ee0b2 5990 }
79072805 5991 if (!s)
d4c19fe8 5992 missingterm(NULL);
6154021b 5993 pl_yylval.ival = OP_CONST;
cfd0369c
NC
5994 /* FIXME. I think that this can be const if char *d is replaced by
5995 more localised variables. */
3280af22 5996 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 5997 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 5998 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
5999 break;
6000 }
6001 }
79072805
LW
6002 TERM(sublex_start());
6003
6004 case '`':
5db06880 6005 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6006 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6007 if (PL_expect == XOPERATOR)
8990e307 6008 no_op("Backticks",s);
79072805 6009 if (!s)
d4c19fe8 6010 missingterm(NULL);
9b201d7d 6011 readpipe_override();
79072805
LW
6012 TERM(sublex_start());
6013
6014 case '\\':
6015 s++;
a2a5de95
NC
6016 if (PL_lex_inwhat && isDIGIT(*s))
6017 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6018 *s, *s);
3280af22 6019 if (PL_expect == XOPERATOR)
8990e307 6020 no_op("Backslash",s);
79072805
LW
6021 OPERATOR(REFGEN);
6022
a7cb1f99 6023 case 'v':
e526c9e6 6024 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6025 char *start = s + 2;
dd629d5b 6026 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6027 start++;
6028 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6029 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6030 TERM(THING);
6031 }
e526c9e6 6032 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6033 else if (!isALPHA(*start) && (PL_expect == XTERM
6034 || PL_expect == XREF || PL_expect == XSTATE
6035 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6036 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6037 if (!gv) {
6154021b 6038 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6039 TERM(THING);
6040 }
6041 }
a7cb1f99
GS
6042 }
6043 goto keylookup;
79072805 6044 case 'x':
3280af22 6045 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6046 s++;
6047 Mop(OP_REPEAT);
2f3197b3 6048 }
79072805
LW
6049 goto keylookup;
6050
378cc40b 6051 case '_':
79072805
LW
6052 case 'a': case 'A':
6053 case 'b': case 'B':
6054 case 'c': case 'C':
6055 case 'd': case 'D':
6056 case 'e': case 'E':
6057 case 'f': case 'F':
6058 case 'g': case 'G':
6059 case 'h': case 'H':
6060 case 'i': case 'I':
6061 case 'j': case 'J':
6062 case 'k': case 'K':
6063 case 'l': case 'L':
6064 case 'm': case 'M':
6065 case 'n': case 'N':
6066 case 'o': case 'O':
6067 case 'p': case 'P':
6068 case 'q': case 'Q':
6069 case 'r': case 'R':
6070 case 's': case 'S':
6071 case 't': case 'T':
6072 case 'u': case 'U':
a7cb1f99 6073 case 'V':
79072805
LW
6074 case 'w': case 'W':
6075 case 'X':
6076 case 'y': case 'Y':
6077 case 'z': case 'Z':
6078
49dc05e3 6079 keylookup: {
88e1f1a2 6080 bool anydelim;
90771dc0 6081 I32 tmp;
10edeb5d
JH
6082
6083 orig_keyword = 0;
6084 gv = NULL;
6085 gvp = NULL;
49dc05e3 6086
3280af22
NIS
6087 PL_bufptr = s;
6088 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6089
6090 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6091 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6092 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6093 (PL_tokenbuf[0] == 'q' &&
6094 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6095
6096 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6097 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6098 goto just_a_word;
6099
3643fb5f 6100 d = s;
3280af22 6101 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6102 d++; /* no comments skipped here, or s### is misparsed */
6103
748a9306 6104 /* Is this a word before a => operator? */
1c3923b3 6105 if (*d == '=' && d[1] == '>') {
748a9306 6106 CLINE;
6154021b 6107 pl_yylval.opval
d0a148a6
NC
6108 = (OP*)newSVOP(OP_CONST, 0,
6109 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6110 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6111 TERM(WORD);
6112 }
6113
88e1f1a2
JV
6114 /* Check for plugged-in keyword */
6115 {
6116 OP *o;
6117 int result;
6118 char *saved_bufptr = PL_bufptr;
6119 PL_bufptr = s;
6120 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6121 s = PL_bufptr;
6122 if (result == KEYWORD_PLUGIN_DECLINE) {
6123 /* not a plugged-in keyword */
6124 PL_bufptr = saved_bufptr;
6125 } else if (result == KEYWORD_PLUGIN_STMT) {
6126 pl_yylval.opval = o;
6127 CLINE;
6128 PL_expect = XSTATE;
6129 return REPORT(PLUGSTMT);
6130 } else if (result == KEYWORD_PLUGIN_EXPR) {
6131 pl_yylval.opval = o;
6132 CLINE;
6133 PL_expect = XOPERATOR;
6134 return REPORT(PLUGEXPR);
6135 } else {
6136 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6137 PL_tokenbuf);
6138 }
6139 }
6140
6141 /* Check for built-in keyword */
6142 tmp = keyword(PL_tokenbuf, len, 0);
6143
6144 /* Is this a label? */
6145 if (!anydelim && PL_expect == XSTATE
6146 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6147 s = d + 1;
6148 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6149 CLINE;
6150 TOKEN(LABEL);
6151 }
6152
a0d0e21e 6153 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6154 GV *ogv = NULL; /* override (winner) */
6155 GV *hgv = NULL; /* hidden (loser) */
3280af22 6156 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6157 CV *cv;
90e5519e 6158 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6159 (cv = GvCVu(gv)))
6160 {
6161 if (GvIMPORTED_CV(gv))
6162 ogv = gv;
6163 else if (! CvMETHOD(cv))
6164 hgv = gv;
6165 }
6166 if (!ogv &&
3280af22 6167 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6168 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6169 GvCVu(gv) && GvIMPORTED_CV(gv))
6170 {
6171 ogv = gv;
6172 }
6173 }
6174 if (ogv) {
30fe34ed 6175 orig_keyword = tmp;
56f7f34b 6176 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6177 }
6178 else if (gv && !gvp
6179 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6180 && GvCVu(gv))
6e7b2336
GS
6181 {
6182 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6183 }
56f7f34b
CS
6184 else { /* no override */
6185 tmp = -tmp;
a2a5de95
NC
6186 if (tmp == KEY_dump) {
6187 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6188 "dump() better written as CORE::dump()");
ac206dc8 6189 }
a0714e2c 6190 gv = NULL;
56f7f34b 6191 gvp = 0;
a2a5de95
NC
6192 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6193 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6194 "Ambiguous call resolved as CORE::%s(), %s",
6195 GvENAME(hgv), "qualify as such or use &");
49dc05e3 6196 }
a0d0e21e
LW
6197 }
6198
6199 reserved_word:
6200 switch (tmp) {
79072805
LW
6201
6202 default: /* not a keyword */
0bfa2a8a
NC
6203 /* Trade off - by using this evil construction we can pull the
6204 variable gv into the block labelled keylookup. If not, then
6205 we have to give it function scope so that the goto from the
6206 earlier ':' case doesn't bypass the initialisation. */
6207 if (0) {
6208 just_a_word_zero_gv:
6209 gv = NULL;
6210 gvp = NULL;
8bee0991 6211 orig_keyword = 0;
0bfa2a8a 6212 }
93a17b20 6213 just_a_word: {
96e4d5b1 6214 SV *sv;
ce29ac45 6215 int pkgname = 0;
f54cb97a 6216 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6217 OP *rv2cv_op;
5069cc75 6218 CV *cv;
5db06880 6219#ifdef PERL_MAD
cd81e915 6220 SV *nextPL_nextwhite = 0;
5db06880
NC
6221#endif
6222
8990e307
LW
6223
6224 /* Get the rest if it looks like a package qualifier */
6225
155aba94 6226 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6227 STRLEN morelen;
3280af22 6228 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6229 TRUE, &morelen);
6230 if (!morelen)
cea2e8a9 6231 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6232 *s == '\'' ? "'" : "::");
c3e0f903 6233 len += morelen;
ce29ac45 6234 pkgname = 1;
a0d0e21e 6235 }
8990e307 6236
3280af22
NIS
6237 if (PL_expect == XOPERATOR) {
6238 if (PL_bufptr == PL_linestart) {
57843af0 6239 CopLINE_dec(PL_curcop);
f1f66076 6240 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6241 CopLINE_inc(PL_curcop);
463ee0b2
LW
6242 }
6243 else
54310121 6244 no_op("Bareword",s);
463ee0b2 6245 }
8990e307 6246
c3e0f903
GS
6247 /* Look for a subroutine with this name in current package,
6248 unless name is "Foo::", in which case Foo is a bearword
6249 (and a package name). */
6250
5db06880 6251 if (len > 2 && !PL_madskills &&
3280af22 6252 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6253 {
f776e3cd 6254 if (ckWARN(WARN_BAREWORD)
90e5519e 6255 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6256 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6257 "Bareword \"%s\" refers to nonexistent package",
3280af22 6258 PL_tokenbuf);
c3e0f903 6259 len -= 2;
3280af22 6260 PL_tokenbuf[len] = '\0';
a0714e2c 6261 gv = NULL;
c3e0f903
GS
6262 gvp = 0;
6263 }
6264 else {
62d55b22
NC
6265 if (!gv) {
6266 /* Mustn't actually add anything to a symbol table.
6267 But also don't want to "initialise" any placeholder
6268 constants that might already be there into full
6269 blown PVGVs with attached PVCV. */
90e5519e
NC
6270 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6271 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6272 }
b3d904f3 6273 len = 0;
c3e0f903
GS
6274 }
6275
6276 /* if we saw a global override before, get the right name */
8990e307 6277
49dc05e3 6278 if (gvp) {
396482e1 6279 sv = newSVpvs("CORE::GLOBAL::");
3280af22 6280 sv_catpv(sv,PL_tokenbuf);
49dc05e3 6281 }
8a7a129d
NC
6282 else {
6283 /* If len is 0, newSVpv does strlen(), which is correct.
6284 If len is non-zero, then it will be the true length,
6285 and so the scalar will be created correctly. */
6286 sv = newSVpv(PL_tokenbuf,len);
6287 }
5db06880 6288#ifdef PERL_MAD
cd81e915
NC
6289 if (PL_madskills && !PL_thistoken) {
6290 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6291 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6292 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6293 }
6294#endif
8990e307 6295
a0d0e21e
LW
6296 /* Presume this is going to be a bareword of some sort. */
6297
6298 CLINE;
6154021b
RGS
6299 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6300 pl_yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
6301 /* UTF-8 package name? */
6302 if (UTF && !IN_BYTES &&
95a20fc0 6303 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
8f8cf39c 6304 SvUTF8_on(sv);
a0d0e21e 6305
c3e0f903
GS
6306 /* And if "Foo::", then that's what it certainly is. */
6307
6308 if (len)
6309 goto safe_bareword;
6310
f7461760
Z
6311 cv = NULL;
6312 {
6313 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6314 const_op->op_private = OPpCONST_BARE;
6315 rv2cv_op = newCVREF(0, const_op);
6316 }
6317 if (rv2cv_op->op_type == OP_RV2CV &&
6318 (rv2cv_op->op_flags & OPf_KIDS)) {
6319 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6320 switch (rv_op->op_type) {
6321 case OP_CONST: {
6322 SV *sv = cSVOPx_sv(rv_op);
6323 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6324 cv = (CV*)SvRV(sv);
6325 } break;
6326 case OP_GV: {
6327 GV *gv = cGVOPx_gv(rv_op);
6328 CV *maybe_cv = GvCVu(gv);
6329 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6330 cv = maybe_cv;
6331 } break;
6332 }
6333 }
5069cc75 6334
8990e307
LW
6335 /* See if it's the indirect object for a list operator. */
6336
3280af22
NIS
6337 if (PL_oldoldbufptr &&
6338 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6339 (PL_oldoldbufptr == PL_last_lop
6340 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6341 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6342 (PL_expect == XREF ||
6343 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6344 {
748a9306
LW
6345 bool immediate_paren = *s == '(';
6346
a0d0e21e 6347 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6348 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6349#ifdef PERL_MAD
cd81e915 6350 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6351#endif
a0d0e21e
LW
6352
6353 /* Two barewords in a row may indicate method call. */
6354
62d55b22 6355 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6356 (tmp = intuit_method(s, gv, cv))) {
6357 op_free(rv2cv_op);
bbf60fe6 6358 return REPORT(tmp);
f7461760 6359 }
a0d0e21e
LW
6360
6361 /* If not a declared subroutine, it's an indirect object. */
6362 /* (But it's an indir obj regardless for sort.) */
7294df96 6363 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6364
7294df96
RGS
6365 if (
6366 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6367 (!cv &&
a9ef352a 6368 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6369 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6370 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6371 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6372 )
a9ef352a 6373 {
3280af22 6374 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6375 goto bareword;
93a17b20
LW
6376 }
6377 }
8990e307 6378
3280af22 6379 PL_expect = XOPERATOR;
5db06880
NC
6380#ifdef PERL_MAD
6381 if (isSPACE(*s))
cd81e915
NC
6382 s = SKIPSPACE2(s,nextPL_nextwhite);
6383 PL_nextwhite = nextPL_nextwhite;
5db06880 6384#else
8990e307 6385 s = skipspace(s);
5db06880 6386#endif
1c3923b3
GS
6387
6388 /* Is this a word before a => operator? */
ce29ac45 6389 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6390 op_free(rv2cv_op);
1c3923b3 6391 CLINE;
6154021b 6392 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6393 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6394 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6395 TERM(WORD);
6396 }
6397
6398 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6399 if (*s == '(') {
79072805 6400 CLINE;
5069cc75 6401 if (cv) {
c35e046a
AL
6402 d = s + 1;
6403 while (SPACE_OR_TAB(*d))
6404 d++;
f7461760 6405 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6406 s = d + 1;
c631f32b 6407 goto its_constant;
96e4d5b1 6408 }
6409 }
5db06880
NC
6410#ifdef PERL_MAD
6411 if (PL_madskills) {
cd81e915
NC
6412 PL_nextwhite = PL_thiswhite;
6413 PL_thiswhite = 0;
5db06880 6414 }
cd81e915 6415 start_force(PL_curforce);
5db06880 6416#endif
6154021b 6417 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6418 PL_expect = XOPERATOR;
5db06880
NC
6419#ifdef PERL_MAD
6420 if (PL_madskills) {
cd81e915
NC
6421 PL_nextwhite = nextPL_nextwhite;
6422 curmad('X', PL_thistoken);
6b29d1f5 6423 PL_thistoken = newSVpvs("");
5db06880
NC
6424 }
6425#endif
f7461760 6426 op_free(rv2cv_op);
93a17b20 6427 force_next(WORD);
6154021b 6428 pl_yylval.ival = 0;
463ee0b2 6429 TOKEN('&');
79072805 6430 }
93a17b20 6431
a0d0e21e 6432 /* If followed by var or block, call it a method (unless sub) */
8990e307 6433
f7461760
Z
6434 if ((*s == '$' || *s == '{') && !cv) {
6435 op_free(rv2cv_op);
3280af22
NIS
6436 PL_last_lop = PL_oldbufptr;
6437 PL_last_lop_op = OP_METHOD;
93a17b20 6438 PREBLOCK(METHOD);
463ee0b2
LW
6439 }
6440
8990e307
LW
6441 /* If followed by a bareword, see if it looks like indir obj. */
6442
30fe34ed
RGS
6443 if (!orig_keyword
6444 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6445 && (tmp = intuit_method(s, gv, cv))) {
6446 op_free(rv2cv_op);
bbf60fe6 6447 return REPORT(tmp);
f7461760 6448 }
93a17b20 6449
8990e307
LW
6450 /* Not a method, so call it a subroutine (if defined) */
6451
5069cc75 6452 if (cv) {
9b387841
NC
6453 if (lastchar == '-')
6454 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6455 "Ambiguous use of -%s resolved as -&%s()",
6456 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6457 /* Check for a constant sub */
f7461760 6458 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6459 its_constant:
f7461760 6460 op_free(rv2cv_op);
6154021b
RGS
6461 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6462 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6463 pl_yylval.opval->op_private = 0;
96e4d5b1 6464 TOKEN(WORD);
89bfa8cd 6465 }
6466
6154021b 6467 op_free(pl_yylval.opval);
f7461760 6468 pl_yylval.opval = rv2cv_op;
6154021b 6469 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6470 PL_last_lop = PL_oldbufptr;
bf848113 6471 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6472 /* Is there a prototype? */
5db06880
NC
6473 if (
6474#ifdef PERL_MAD
6475 cv &&
6476#endif
d9f2850e
RGS
6477 SvPOK(cv))
6478 {
5f66b61c 6479 STRLEN protolen;
daba3364 6480 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6481 if (!protolen)
4633a7c4 6482 TERM(FUNC0SUB);
8c28b960 6483 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
4633a7c4 6484 OPERATOR(UNIOPSUB);
0f5d0394
AE
6485 while (*proto == ';')
6486 proto++;
7a52d87a 6487 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6488 if (PL_curstash)
6489 sv_setpvs(PL_subname, "__ANON__");
6490 else
6491 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6492 PREBLOCK(LSTOPSUB);
6493 }
a9ef352a 6494 }
5db06880
NC
6495#ifdef PERL_MAD
6496 {
6497 if (PL_madskills) {
cd81e915
NC
6498 PL_nextwhite = PL_thiswhite;
6499 PL_thiswhite = 0;
5db06880 6500 }
cd81e915 6501 start_force(PL_curforce);
6154021b 6502 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6503 PL_expect = XTERM;
6504 if (PL_madskills) {
cd81e915
NC
6505 PL_nextwhite = nextPL_nextwhite;
6506 curmad('X', PL_thistoken);
6b29d1f5 6507 PL_thistoken = newSVpvs("");
5db06880
NC
6508 }
6509 force_next(WORD);
6510 TOKEN(NOAMP);
6511 }
6512 }
6513
6514 /* Guess harder when madskills require "best effort". */
6515 if (PL_madskills && (!gv || !GvCVu(gv))) {
6516 int probable_sub = 0;
6517 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6518 probable_sub = 1;
6519 else if (isALPHA(*s)) {
6520 char tmpbuf[1024];
6521 STRLEN tmplen;
6522 d = s;
6523 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6524 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6525 probable_sub = 1;
6526 else {
6527 while (d < PL_bufend && isSPACE(*d))
6528 d++;
6529 if (*d == '=' && d[1] == '>')
6530 probable_sub = 1;
6531 }
6532 }
6533 if (probable_sub) {
7a6d04f4 6534 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6535 op_free(pl_yylval.opval);
f7461760 6536 pl_yylval.opval = rv2cv_op;
6154021b 6537 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6538 PL_last_lop = PL_oldbufptr;
6539 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6540 PL_nextwhite = PL_thiswhite;
6541 PL_thiswhite = 0;
6542 start_force(PL_curforce);
6154021b 6543 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6544 PL_expect = XTERM;
cd81e915
NC
6545 PL_nextwhite = nextPL_nextwhite;
6546 curmad('X', PL_thistoken);
6b29d1f5 6547 PL_thistoken = newSVpvs("");
5db06880
NC
6548 force_next(WORD);
6549 TOKEN(NOAMP);
6550 }
6551#else
6154021b 6552 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6553 PL_expect = XTERM;
8990e307
LW
6554 force_next(WORD);
6555 TOKEN(NOAMP);
5db06880 6556#endif
8990e307 6557 }
748a9306 6558
8990e307
LW
6559 /* Call it a bare word */
6560
5603f27d 6561 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6562 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6563 else {
9a073a1d
RGS
6564 bareword:
6565 /* after "print" and similar functions (corresponding to
6566 * "F? L" in opcode.pl), whatever wasn't already parsed as
6567 * a filehandle should be subject to "strict subs".
6568 * Likewise for the optional indirect-object argument to system
6569 * or exec, which can't be a bareword */
6570 if ((PL_last_lop_op == OP_PRINT
6571 || PL_last_lop_op == OP_PRTF
6572 || PL_last_lop_op == OP_SAY
6573 || PL_last_lop_op == OP_SYSTEM
6574 || PL_last_lop_op == OP_EXEC)
6575 && (PL_hints & HINT_STRICT_SUBS))
6576 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6577 if (lastchar != '-') {
6578 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6579 d = PL_tokenbuf;
6580 while (isLOWER(*d))
6581 d++;
da51bb9b 6582 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6583 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6584 PL_tokenbuf);
6585 }
748a9306
LW
6586 }
6587 }
f7461760 6588 op_free(rv2cv_op);
c3e0f903
GS
6589
6590 safe_bareword:
9b387841
NC
6591 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6592 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6593 "Operator or semicolon missing before %c%s",
6594 lastchar, PL_tokenbuf);
6595 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6596 "Ambiguous use of %c resolved as operator %c",
6597 lastchar, lastchar);
748a9306 6598 }
93a17b20 6599 TOKEN(WORD);
79072805 6600 }
79072805 6601
68dc0745 6602 case KEY___FILE__:
6154021b 6603 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6604 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6605 TERM(THING);
6606
79072805 6607 case KEY___LINE__:
6154021b 6608 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6609 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6610 TERM(THING);
68dc0745 6611
6612 case KEY___PACKAGE__:
6154021b 6613 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6614 (PL_curstash
5aaec2b4 6615 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6616 : &PL_sv_undef));
79072805 6617 TERM(THING);
79072805 6618
e50aee73 6619 case KEY___DATA__:
79072805
LW
6620 case KEY___END__: {
6621 GV *gv;
3280af22 6622 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6623 const char *pname = "main";
3280af22 6624 if (PL_tokenbuf[2] == 'D')
bfcb3514 6625 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6626 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6627 SVt_PVIO);
a5f75d66 6628 GvMULTI_on(gv);
79072805 6629 if (!GvIO(gv))
a0d0e21e 6630 GvIOp(gv) = newIO();
3280af22 6631 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6632#if defined(HAS_FCNTL) && defined(F_SETFD)
6633 {
f54cb97a 6634 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6635 fcntl(fd,F_SETFD,fd >= 3);
6636 }
79072805 6637#endif
fd049845 6638 /* Mark this internal pseudo-handle as clean */
6639 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6640 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6641 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6642 else
50952442 6643 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6644#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6645 /* if the script was opened in binmode, we need to revert
53129d29 6646 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6647 * XXX this is a questionable hack at best. */
53129d29
GS
6648 if (PL_bufend-PL_bufptr > 2
6649 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6650 {
6651 Off_t loc = 0;
50952442 6652 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6653 loc = PerlIO_tell(PL_rsfp);
6654 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6655 }
2986a63f
JH
6656#ifdef NETWARE
6657 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6658#else
c39cd008 6659 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6660#endif /* NETWARE */
1143fce0
JH
6661#ifdef PERLIO_IS_STDIO /* really? */
6662# if defined(__BORLANDC__)
cb359b41
JH
6663 /* XXX see note in do_binmode() */
6664 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6665# endif
6666#endif
c39cd008
GS
6667 if (loc > 0)
6668 PerlIO_seek(PL_rsfp, loc, 0);
6669 }
6670 }
6671#endif
7948272d 6672#ifdef PERLIO_LAYERS
52d2e0f4
JH
6673 if (!IN_BYTES) {
6674 if (UTF)
6675 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6676 else if (PL_encoding) {
6677 SV *name;
6678 dSP;
6679 ENTER;
6680 SAVETMPS;
6681 PUSHMARK(sp);
6682 EXTEND(SP, 1);
6683 XPUSHs(PL_encoding);
6684 PUTBACK;
6685 call_method("name", G_SCALAR);
6686 SPAGAIN;
6687 name = POPs;
6688 PUTBACK;
bfed75c6 6689 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6690 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6691 SVfARG(name)));
52d2e0f4
JH
6692 FREETMPS;
6693 LEAVE;
6694 }
6695 }
7948272d 6696#endif
5db06880
NC
6697#ifdef PERL_MAD
6698 if (PL_madskills) {
cd81e915
NC
6699 if (PL_realtokenstart >= 0) {
6700 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6701 if (!PL_endwhite)
6b29d1f5 6702 PL_endwhite = newSVpvs("");
cd81e915
NC
6703 sv_catsv(PL_endwhite, PL_thiswhite);
6704 PL_thiswhite = 0;
6705 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6706 PL_realtokenstart = -1;
5db06880 6707 }
5cc814fd
NC
6708 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6709 != NULL) ;
5db06880
NC
6710 }
6711#endif
4608196e 6712 PL_rsfp = NULL;
79072805
LW
6713 }
6714 goto fake_eof;
e929a76b 6715 }
de3bb511 6716
8990e307 6717 case KEY_AUTOLOAD:
ed6116ce 6718 case KEY_DESTROY:
79072805 6719 case KEY_BEGIN:
3c10abe3 6720 case KEY_UNITCHECK:
7d30b5c4 6721 case KEY_CHECK:
7d07dbc2 6722 case KEY_INIT:
7d30b5c4 6723 case KEY_END:
3280af22
NIS
6724 if (PL_expect == XSTATE) {
6725 s = PL_bufptr;
93a17b20 6726 goto really_sub;
79072805
LW
6727 }
6728 goto just_a_word;
6729
a0d0e21e
LW
6730 case KEY_CORE:
6731 if (*s == ':' && s[1] == ':') {
6732 s += 2;
748a9306 6733 d = s;
3280af22 6734 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6735 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6736 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6737 if (tmp < 0)
6738 tmp = -tmp;
850e8516 6739 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6740 /* that's a way to remember we saw "CORE::" */
850e8516 6741 orig_keyword = tmp;
a0d0e21e
LW
6742 goto reserved_word;
6743 }
6744 goto just_a_word;
6745
463ee0b2
LW
6746 case KEY_abs:
6747 UNI(OP_ABS);
6748
79072805
LW
6749 case KEY_alarm:
6750 UNI(OP_ALARM);
6751
6752 case KEY_accept:
a0d0e21e 6753 LOP(OP_ACCEPT,XTERM);
79072805 6754
463ee0b2
LW
6755 case KEY_and:
6756 OPERATOR(ANDOP);
6757
79072805 6758 case KEY_atan2:
a0d0e21e 6759 LOP(OP_ATAN2,XTERM);
85e6fe83 6760
79072805 6761 case KEY_bind:
a0d0e21e 6762 LOP(OP_BIND,XTERM);
79072805
LW
6763
6764 case KEY_binmode:
1c1fc3ea 6765 LOP(OP_BINMODE,XTERM);
79072805
LW
6766
6767 case KEY_bless:
a0d0e21e 6768 LOP(OP_BLESS,XTERM);
79072805 6769
0d863452
RH
6770 case KEY_break:
6771 FUN0(OP_BREAK);
6772
79072805
LW
6773 case KEY_chop:
6774 UNI(OP_CHOP);
6775
6776 case KEY_continue:
0d863452
RH
6777 /* When 'use switch' is in effect, continue has a dual
6778 life as a control operator. */
6779 {
ef89dcc3 6780 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6781 PREBLOCK(CONTINUE);
6782 else {
6783 /* We have to disambiguate the two senses of
6784 "continue". If the next token is a '{' then
6785 treat it as the start of a continue block;
6786 otherwise treat it as a control operator.
6787 */
6788 s = skipspace(s);
6789 if (*s == '{')
79072805 6790 PREBLOCK(CONTINUE);
0d863452
RH
6791 else
6792 FUN0(OP_CONTINUE);
6793 }
6794 }
79072805
LW
6795
6796 case KEY_chdir:
fafc274c
NC
6797 /* may use HOME */
6798 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6799 UNI(OP_CHDIR);
6800
6801 case KEY_close:
6802 UNI(OP_CLOSE);
6803
6804 case KEY_closedir:
6805 UNI(OP_CLOSEDIR);
6806
6807 case KEY_cmp:
6808 Eop(OP_SCMP);
6809
6810 case KEY_caller:
6811 UNI(OP_CALLER);
6812
6813 case KEY_crypt:
6814#ifdef FCRYPT
f4c556ac
GS
6815 if (!PL_cryptseen) {
6816 PL_cryptseen = TRUE;
de3bb511 6817 init_des();
f4c556ac 6818 }
a687059c 6819#endif
a0d0e21e 6820 LOP(OP_CRYPT,XTERM);
79072805
LW
6821
6822 case KEY_chmod:
a0d0e21e 6823 LOP(OP_CHMOD,XTERM);
79072805
LW
6824
6825 case KEY_chown:
a0d0e21e 6826 LOP(OP_CHOWN,XTERM);
79072805
LW
6827
6828 case KEY_connect:
a0d0e21e 6829 LOP(OP_CONNECT,XTERM);
79072805 6830
463ee0b2
LW
6831 case KEY_chr:
6832 UNI(OP_CHR);
6833
79072805
LW
6834 case KEY_cos:
6835 UNI(OP_COS);
6836
6837 case KEY_chroot:
6838 UNI(OP_CHROOT);
6839
0d863452
RH
6840 case KEY_default:
6841 PREBLOCK(DEFAULT);
6842
79072805 6843 case KEY_do:
29595ff2 6844 s = SKIPSPACE1(s);
79072805 6845 if (*s == '{')
a0d0e21e 6846 PRETERMBLOCK(DO);
79072805 6847 if (*s != '\'')
89c5585f 6848 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6849 if (orig_keyword == KEY_do) {
6850 orig_keyword = 0;
6154021b 6851 pl_yylval.ival = 1;
850e8516
RGS
6852 }
6853 else
6154021b 6854 pl_yylval.ival = 0;
378cc40b 6855 OPERATOR(DO);
79072805
LW
6856
6857 case KEY_die:
3280af22 6858 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6859 LOP(OP_DIE,XTERM);
79072805
LW
6860
6861 case KEY_defined:
6862 UNI(OP_DEFINED);
6863
6864 case KEY_delete:
a0d0e21e 6865 UNI(OP_DELETE);
79072805
LW
6866
6867 case KEY_dbmopen:
5c1737d1 6868 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6869 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6870
6871 case KEY_dbmclose:
6872 UNI(OP_DBMCLOSE);
6873
6874 case KEY_dump:
a0d0e21e 6875 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6876 LOOPX(OP_DUMP);
6877
6878 case KEY_else:
6879 PREBLOCK(ELSE);
6880
6881 case KEY_elsif:
6154021b 6882 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6883 OPERATOR(ELSIF);
6884
6885 case KEY_eq:
6886 Eop(OP_SEQ);
6887
a0d0e21e
LW
6888 case KEY_exists:
6889 UNI(OP_EXISTS);
4e553d73 6890
79072805 6891 case KEY_exit:
5db06880
NC
6892 if (PL_madskills)
6893 UNI(OP_INT);
79072805
LW
6894 UNI(OP_EXIT);
6895
6896 case KEY_eval:
29595ff2 6897 s = SKIPSPACE1(s);
32e2a35d
RGS
6898 if (*s == '{') { /* block eval */
6899 PL_expect = XTERMBLOCK;
6900 UNIBRACK(OP_ENTERTRY);
6901 }
6902 else { /* string eval */
6903 PL_expect = XTERM;
6904 UNIBRACK(OP_ENTEREVAL);
6905 }
79072805
LW
6906
6907 case KEY_eof:
6908 UNI(OP_EOF);
6909
6910 case KEY_exp:
6911 UNI(OP_EXP);
6912
6913 case KEY_each:
6914 UNI(OP_EACH);
6915
6916 case KEY_exec:
a0d0e21e 6917 LOP(OP_EXEC,XREF);
79072805
LW
6918
6919 case KEY_endhostent:
6920 FUN0(OP_EHOSTENT);
6921
6922 case KEY_endnetent:
6923 FUN0(OP_ENETENT);
6924
6925 case KEY_endservent:
6926 FUN0(OP_ESERVENT);
6927
6928 case KEY_endprotoent:
6929 FUN0(OP_EPROTOENT);
6930
6931 case KEY_endpwent:
6932 FUN0(OP_EPWENT);
6933
6934 case KEY_endgrent:
6935 FUN0(OP_EGRENT);
6936
6937 case KEY_for:
6938 case KEY_foreach:
6154021b 6939 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6940 s = SKIPSPACE1(s);
7e2040f0 6941 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6942 char *p = s;
5db06880
NC
6943#ifdef PERL_MAD
6944 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6945#endif
6946
3280af22 6947 if ((PL_bufend - p) >= 3 &&
55497cff 6948 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6949 p += 2;
77ca0c92
LW
6950 else if ((PL_bufend - p) >= 4 &&
6951 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6952 p += 3;
29595ff2 6953 p = PEEKSPACE(p);
7e2040f0 6954 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6955 p = scan_ident(p, PL_bufend,
6956 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6957 p = PEEKSPACE(p);
77ca0c92
LW
6958 }
6959 if (*p != '$')
cea2e8a9 6960 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6961#ifdef PERL_MAD
6962 s = SvPVX(PL_linestr) + soff;
6963#endif
55497cff 6964 }
79072805
LW
6965 OPERATOR(FOR);
6966
6967 case KEY_formline:
a0d0e21e 6968 LOP(OP_FORMLINE,XTERM);
79072805
LW
6969
6970 case KEY_fork:
6971 FUN0(OP_FORK);
6972
6973 case KEY_fcntl:
a0d0e21e 6974 LOP(OP_FCNTL,XTERM);
79072805
LW
6975
6976 case KEY_fileno:
6977 UNI(OP_FILENO);
6978
6979 case KEY_flock:
a0d0e21e 6980 LOP(OP_FLOCK,XTERM);
79072805
LW
6981
6982 case KEY_gt:
6983 Rop(OP_SGT);
6984
6985 case KEY_ge:
6986 Rop(OP_SGE);
6987
6988 case KEY_grep:
2c38e13d 6989 LOP(OP_GREPSTART, XREF);
79072805
LW
6990
6991 case KEY_goto:
a0d0e21e 6992 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6993 LOOPX(OP_GOTO);
6994
6995 case KEY_gmtime:
6996 UNI(OP_GMTIME);
6997
6998 case KEY_getc:
6f33ba73 6999 UNIDOR(OP_GETC);
79072805
LW
7000
7001 case KEY_getppid:
7002 FUN0(OP_GETPPID);
7003
7004 case KEY_getpgrp:
7005 UNI(OP_GETPGRP);
7006
7007 case KEY_getpriority:
a0d0e21e 7008 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7009
7010 case KEY_getprotobyname:
7011 UNI(OP_GPBYNAME);
7012
7013 case KEY_getprotobynumber:
a0d0e21e 7014 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7015
7016 case KEY_getprotoent:
7017 FUN0(OP_GPROTOENT);
7018
7019 case KEY_getpwent:
7020 FUN0(OP_GPWENT);
7021
7022 case KEY_getpwnam:
ff68c719 7023 UNI(OP_GPWNAM);
79072805
LW
7024
7025 case KEY_getpwuid:
ff68c719 7026 UNI(OP_GPWUID);
79072805
LW
7027
7028 case KEY_getpeername:
7029 UNI(OP_GETPEERNAME);
7030
7031 case KEY_gethostbyname:
7032 UNI(OP_GHBYNAME);
7033
7034 case KEY_gethostbyaddr:
a0d0e21e 7035 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7036
7037 case KEY_gethostent:
7038 FUN0(OP_GHOSTENT);
7039
7040 case KEY_getnetbyname:
7041 UNI(OP_GNBYNAME);
7042
7043 case KEY_getnetbyaddr:
a0d0e21e 7044 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7045
7046 case KEY_getnetent:
7047 FUN0(OP_GNETENT);
7048
7049 case KEY_getservbyname:
a0d0e21e 7050 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7051
7052 case KEY_getservbyport:
a0d0e21e 7053 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7054
7055 case KEY_getservent:
7056 FUN0(OP_GSERVENT);
7057
7058 case KEY_getsockname:
7059 UNI(OP_GETSOCKNAME);
7060
7061 case KEY_getsockopt:
a0d0e21e 7062 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7063
7064 case KEY_getgrent:
7065 FUN0(OP_GGRENT);
7066
7067 case KEY_getgrnam:
ff68c719 7068 UNI(OP_GGRNAM);
79072805
LW
7069
7070 case KEY_getgrgid:
ff68c719 7071 UNI(OP_GGRGID);
79072805
LW
7072
7073 case KEY_getlogin:
7074 FUN0(OP_GETLOGIN);
7075
0d863452 7076 case KEY_given:
6154021b 7077 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7078 OPERATOR(GIVEN);
7079
93a17b20 7080 case KEY_glob:
a0d0e21e 7081 LOP(OP_GLOB,XTERM);
93a17b20 7082
79072805
LW
7083 case KEY_hex:
7084 UNI(OP_HEX);
7085
7086 case KEY_if:
6154021b 7087 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7088 OPERATOR(IF);
7089
7090 case KEY_index:
a0d0e21e 7091 LOP(OP_INDEX,XTERM);
79072805
LW
7092
7093 case KEY_int:
7094 UNI(OP_INT);
7095
7096 case KEY_ioctl:
a0d0e21e 7097 LOP(OP_IOCTL,XTERM);
79072805
LW
7098
7099 case KEY_join:
a0d0e21e 7100 LOP(OP_JOIN,XTERM);
79072805
LW
7101
7102 case KEY_keys:
7103 UNI(OP_KEYS);
7104
7105 case KEY_kill:
a0d0e21e 7106 LOP(OP_KILL,XTERM);
79072805
LW
7107
7108 case KEY_last:
a0d0e21e 7109 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7110 LOOPX(OP_LAST);
4e553d73 7111
79072805
LW
7112 case KEY_lc:
7113 UNI(OP_LC);
7114
7115 case KEY_lcfirst:
7116 UNI(OP_LCFIRST);
7117
7118 case KEY_local:
6154021b 7119 pl_yylval.ival = 0;
79072805
LW
7120 OPERATOR(LOCAL);
7121
7122 case KEY_length:
7123 UNI(OP_LENGTH);
7124
7125 case KEY_lt:
7126 Rop(OP_SLT);
7127
7128 case KEY_le:
7129 Rop(OP_SLE);
7130
7131 case KEY_localtime:
7132 UNI(OP_LOCALTIME);
7133
7134 case KEY_log:
7135 UNI(OP_LOG);
7136
7137 case KEY_link:
a0d0e21e 7138 LOP(OP_LINK,XTERM);
79072805
LW
7139
7140 case KEY_listen:
a0d0e21e 7141 LOP(OP_LISTEN,XTERM);
79072805 7142
c0329465
MB
7143 case KEY_lock:
7144 UNI(OP_LOCK);
7145
79072805
LW
7146 case KEY_lstat:
7147 UNI(OP_LSTAT);
7148
7149 case KEY_m:
8782bef2 7150 s = scan_pat(s,OP_MATCH);
79072805
LW
7151 TERM(sublex_start());
7152
a0d0e21e 7153 case KEY_map:
2c38e13d 7154 LOP(OP_MAPSTART, XREF);
4e4e412b 7155
79072805 7156 case KEY_mkdir:
a0d0e21e 7157 LOP(OP_MKDIR,XTERM);
79072805
LW
7158
7159 case KEY_msgctl:
a0d0e21e 7160 LOP(OP_MSGCTL,XTERM);
79072805
LW
7161
7162 case KEY_msgget:
a0d0e21e 7163 LOP(OP_MSGGET,XTERM);
79072805
LW
7164
7165 case KEY_msgrcv:
a0d0e21e 7166 LOP(OP_MSGRCV,XTERM);
79072805
LW
7167
7168 case KEY_msgsnd:
a0d0e21e 7169 LOP(OP_MSGSND,XTERM);
79072805 7170
77ca0c92 7171 case KEY_our:
93a17b20 7172 case KEY_my:
952306ac 7173 case KEY_state:
eac04b2e 7174 PL_in_my = (U16)tmp;
29595ff2 7175 s = SKIPSPACE1(s);
7e2040f0 7176 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7177#ifdef PERL_MAD
7178 char* start = s;
7179#endif
3280af22 7180 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7181 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7182 goto really_sub;
def3634b 7183 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7184 if (!PL_in_my_stash) {
c750a3ec 7185 char tmpbuf[1024];
3280af22 7186 PL_bufptr = s;
d9fad198 7187 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7188 yyerror(tmpbuf);
7189 }
5db06880
NC
7190#ifdef PERL_MAD
7191 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7192 sv_catsv(PL_thistoken, PL_nextwhite);
7193 PL_nextwhite = 0;
7194 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7195 }
7196#endif
c750a3ec 7197 }
6154021b 7198 pl_yylval.ival = 1;
55497cff 7199 OPERATOR(MY);
93a17b20 7200
79072805 7201 case KEY_next:
a0d0e21e 7202 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7203 LOOPX(OP_NEXT);
7204
7205 case KEY_ne:
7206 Eop(OP_SNE);
7207
a0d0e21e 7208 case KEY_no:
468aa647 7209 s = tokenize_use(0, s);
a0d0e21e
LW
7210 OPERATOR(USE);
7211
7212 case KEY_not:
29595ff2 7213 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7214 FUN1(OP_NOT);
7215 else
7216 OPERATOR(NOTOP);
a0d0e21e 7217
79072805 7218 case KEY_open:
29595ff2 7219 s = SKIPSPACE1(s);
7e2040f0 7220 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7221 const char *t;
c35e046a
AL
7222 for (d = s; isALNUM_lazy_if(d,UTF);)
7223 d++;
7224 for (t=d; isSPACE(*t);)
7225 t++;
e2ab214b 7226 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7227 /* [perl #16184] */
7228 && !(t[0] == '=' && t[1] == '>')
7229 ) {
5f66b61c 7230 int parms_len = (int)(d-s);
9014280d 7231 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7232 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7233 parms_len, s, parms_len, s);
66fbe8fb 7234 }
93a17b20 7235 }
a0d0e21e 7236 LOP(OP_OPEN,XTERM);
79072805 7237
463ee0b2 7238 case KEY_or:
6154021b 7239 pl_yylval.ival = OP_OR;
463ee0b2
LW
7240 OPERATOR(OROP);
7241
79072805
LW
7242 case KEY_ord:
7243 UNI(OP_ORD);
7244
7245 case KEY_oct:
7246 UNI(OP_OCT);
7247
7248 case KEY_opendir:
a0d0e21e 7249 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7250
7251 case KEY_print:
3280af22 7252 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7253 LOP(OP_PRINT,XREF);
79072805
LW
7254
7255 case KEY_printf:
3280af22 7256 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7257 LOP(OP_PRTF,XREF);
79072805 7258
c07a80fd 7259 case KEY_prototype:
7260 UNI(OP_PROTOTYPE);
7261
79072805 7262 case KEY_push:
a0d0e21e 7263 LOP(OP_PUSH,XTERM);
79072805
LW
7264
7265 case KEY_pop:
6f33ba73 7266 UNIDOR(OP_POP);
79072805 7267
a0d0e21e 7268 case KEY_pos:
6f33ba73 7269 UNIDOR(OP_POS);
4e553d73 7270
79072805 7271 case KEY_pack:
a0d0e21e 7272 LOP(OP_PACK,XTERM);
79072805
LW
7273
7274 case KEY_package:
a0d0e21e 7275 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7276 s = SKIPSPACE1(s);
91152fc1 7277 s = force_strict_version(s);
4e4da3ac 7278 PL_lex_expect = XBLOCK;
79072805
LW
7279 OPERATOR(PACKAGE);
7280
7281 case KEY_pipe:
a0d0e21e 7282 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7283
7284 case KEY_q:
5db06880 7285 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7286 if (!s)
d4c19fe8 7287 missingterm(NULL);
6154021b 7288 pl_yylval.ival = OP_CONST;
79072805
LW
7289 TERM(sublex_start());
7290
a0d0e21e
LW
7291 case KEY_quotemeta:
7292 UNI(OP_QUOTEMETA);
7293
8990e307 7294 case KEY_qw:
5db06880 7295 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7296 if (!s)
d4c19fe8 7297 missingterm(NULL);
3480a8d2 7298 PL_expect = XOPERATOR;
8127e0e3
GS
7299 force_next(')');
7300 if (SvCUR(PL_lex_stuff)) {
5f66b61c 7301 OP *words = NULL;
8127e0e3 7302 int warned = 0;
3280af22 7303 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7304 while (len) {
d4c19fe8
AL
7305 for (; isSPACE(*d) && len; --len, ++d)
7306 /**/;
8127e0e3 7307 if (len) {
d4c19fe8 7308 SV *sv;
f54cb97a 7309 const char *b = d;
e476b1b5 7310 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7311 for (; !isSPACE(*d) && len; --len, ++d) {
7312 if (*d == ',') {
9014280d 7313 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7314 "Possible attempt to separate words with commas");
7315 ++warned;
7316 }
7317 else if (*d == '#') {
9014280d 7318 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7319 "Possible attempt to put comments in qw() list");
7320 ++warned;
7321 }
7322 }
7323 }
7324 else {
d4c19fe8
AL
7325 for (; !isSPACE(*d) && len; --len, ++d)
7326 /**/;
8127e0e3 7327 }
740cce10 7328 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7329 words = append_elem(OP_LIST, words,
7948272d 7330 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7331 }
7332 }
8127e0e3 7333 if (words) {
cd81e915 7334 start_force(PL_curforce);
9ded7720 7335 NEXTVAL_NEXTTOKE.opval = words;
8127e0e3
GS
7336 force_next(THING);
7337 }
55497cff 7338 }
37fd879b 7339 if (PL_lex_stuff) {
8127e0e3 7340 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7341 PL_lex_stuff = NULL;
37fd879b 7342 }
3280af22 7343 PL_expect = XTERM;
8127e0e3 7344 TOKEN('(');
8990e307 7345
79072805 7346 case KEY_qq:
5db06880 7347 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7348 if (!s)
d4c19fe8 7349 missingterm(NULL);
6154021b 7350 pl_yylval.ival = OP_STRINGIFY;
3280af22 7351 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7352 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7353 TERM(sublex_start());
7354
8782bef2
GB
7355 case KEY_qr:
7356 s = scan_pat(s,OP_QR);
7357 TERM(sublex_start());
7358
79072805 7359 case KEY_qx:
5db06880 7360 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7361 if (!s)
d4c19fe8 7362 missingterm(NULL);
9b201d7d 7363 readpipe_override();
79072805
LW
7364 TERM(sublex_start());
7365
7366 case KEY_return:
7367 OLDLOP(OP_RETURN);
7368
7369 case KEY_require:
29595ff2 7370 s = SKIPSPACE1(s);
e759cc13
RGS
7371 if (isDIGIT(*s)) {
7372 s = force_version(s, FALSE);
a7cb1f99 7373 }
e759cc13
RGS
7374 else if (*s != 'v' || !isDIGIT(s[1])
7375 || (s = force_version(s, TRUE), *s == 'v'))
7376 {
a7cb1f99
GS
7377 *PL_tokenbuf = '\0';
7378 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7379 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7380 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7381 else if (*s == '<')
7382 yyerror("<> should be quotes");
7383 }
a72a1c8b
RGS
7384 if (orig_keyword == KEY_require) {
7385 orig_keyword = 0;
6154021b 7386 pl_yylval.ival = 1;
a72a1c8b
RGS
7387 }
7388 else
6154021b 7389 pl_yylval.ival = 0;
a72a1c8b
RGS
7390 PL_expect = XTERM;
7391 PL_bufptr = s;
7392 PL_last_uni = PL_oldbufptr;
7393 PL_last_lop_op = OP_REQUIRE;
7394 s = skipspace(s);
7395 return REPORT( (int)REQUIRE );
79072805
LW
7396
7397 case KEY_reset:
7398 UNI(OP_RESET);
7399
7400 case KEY_redo:
a0d0e21e 7401 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7402 LOOPX(OP_REDO);
7403
7404 case KEY_rename:
a0d0e21e 7405 LOP(OP_RENAME,XTERM);
79072805
LW
7406
7407 case KEY_rand:
7408 UNI(OP_RAND);
7409
7410 case KEY_rmdir:
7411 UNI(OP_RMDIR);
7412
7413 case KEY_rindex:
a0d0e21e 7414 LOP(OP_RINDEX,XTERM);
79072805
LW
7415
7416 case KEY_read:
a0d0e21e 7417 LOP(OP_READ,XTERM);
79072805
LW
7418
7419 case KEY_readdir:
7420 UNI(OP_READDIR);
7421
93a17b20 7422 case KEY_readline:
6f33ba73 7423 UNIDOR(OP_READLINE);
93a17b20
LW
7424
7425 case KEY_readpipe:
0858480c 7426 UNIDOR(OP_BACKTICK);
93a17b20 7427
79072805
LW
7428 case KEY_rewinddir:
7429 UNI(OP_REWINDDIR);
7430
7431 case KEY_recv:
a0d0e21e 7432 LOP(OP_RECV,XTERM);
79072805
LW
7433
7434 case KEY_reverse:
a0d0e21e 7435 LOP(OP_REVERSE,XTERM);
79072805
LW
7436
7437 case KEY_readlink:
6f33ba73 7438 UNIDOR(OP_READLINK);
79072805
LW
7439
7440 case KEY_ref:
7441 UNI(OP_REF);
7442
7443 case KEY_s:
7444 s = scan_subst(s);
6154021b 7445 if (pl_yylval.opval)
79072805
LW
7446 TERM(sublex_start());
7447 else
7448 TOKEN(1); /* force error */
7449
0d863452
RH
7450 case KEY_say:
7451 checkcomma(s,PL_tokenbuf,"filehandle");
7452 LOP(OP_SAY,XREF);
7453
a0d0e21e
LW
7454 case KEY_chomp:
7455 UNI(OP_CHOMP);
4e553d73 7456
79072805
LW
7457 case KEY_scalar:
7458 UNI(OP_SCALAR);
7459
7460 case KEY_select:
a0d0e21e 7461 LOP(OP_SELECT,XTERM);
79072805
LW
7462
7463 case KEY_seek:
a0d0e21e 7464 LOP(OP_SEEK,XTERM);
79072805
LW
7465
7466 case KEY_semctl:
a0d0e21e 7467 LOP(OP_SEMCTL,XTERM);
79072805
LW
7468
7469 case KEY_semget:
a0d0e21e 7470 LOP(OP_SEMGET,XTERM);
79072805
LW
7471
7472 case KEY_semop:
a0d0e21e 7473 LOP(OP_SEMOP,XTERM);
79072805
LW
7474
7475 case KEY_send:
a0d0e21e 7476 LOP(OP_SEND,XTERM);
79072805
LW
7477
7478 case KEY_setpgrp:
a0d0e21e 7479 LOP(OP_SETPGRP,XTERM);
79072805
LW
7480
7481 case KEY_setpriority:
a0d0e21e 7482 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7483
7484 case KEY_sethostent:
ff68c719 7485 UNI(OP_SHOSTENT);
79072805
LW
7486
7487 case KEY_setnetent:
ff68c719 7488 UNI(OP_SNETENT);
79072805
LW
7489
7490 case KEY_setservent:
ff68c719 7491 UNI(OP_SSERVENT);
79072805
LW
7492
7493 case KEY_setprotoent:
ff68c719 7494 UNI(OP_SPROTOENT);
79072805
LW
7495
7496 case KEY_setpwent:
7497 FUN0(OP_SPWENT);
7498
7499 case KEY_setgrent:
7500 FUN0(OP_SGRENT);
7501
7502 case KEY_seekdir:
a0d0e21e 7503 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7504
7505 case KEY_setsockopt:
a0d0e21e 7506 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7507
7508 case KEY_shift:
6f33ba73 7509 UNIDOR(OP_SHIFT);
79072805
LW
7510
7511 case KEY_shmctl:
a0d0e21e 7512 LOP(OP_SHMCTL,XTERM);
79072805
LW
7513
7514 case KEY_shmget:
a0d0e21e 7515 LOP(OP_SHMGET,XTERM);
79072805
LW
7516
7517 case KEY_shmread:
a0d0e21e 7518 LOP(OP_SHMREAD,XTERM);
79072805
LW
7519
7520 case KEY_shmwrite:
a0d0e21e 7521 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7522
7523 case KEY_shutdown:
a0d0e21e 7524 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7525
7526 case KEY_sin:
7527 UNI(OP_SIN);
7528
7529 case KEY_sleep:
7530 UNI(OP_SLEEP);
7531
7532 case KEY_socket:
a0d0e21e 7533 LOP(OP_SOCKET,XTERM);
79072805
LW
7534
7535 case KEY_socketpair:
a0d0e21e 7536 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7537
7538 case KEY_sort:
3280af22 7539 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7540 s = SKIPSPACE1(s);
79072805 7541 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7542 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7543 PL_expect = XTERM;
15f0808c 7544 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7545 LOP(OP_SORT,XREF);
79072805
LW
7546
7547 case KEY_split:
a0d0e21e 7548 LOP(OP_SPLIT,XTERM);
79072805
LW
7549
7550 case KEY_sprintf:
a0d0e21e 7551 LOP(OP_SPRINTF,XTERM);
79072805
LW
7552
7553 case KEY_splice:
a0d0e21e 7554 LOP(OP_SPLICE,XTERM);
79072805
LW
7555
7556 case KEY_sqrt:
7557 UNI(OP_SQRT);
7558
7559 case KEY_srand:
7560 UNI(OP_SRAND);
7561
7562 case KEY_stat:
7563 UNI(OP_STAT);
7564
7565 case KEY_study:
79072805
LW
7566 UNI(OP_STUDY);
7567
7568 case KEY_substr:
a0d0e21e 7569 LOP(OP_SUBSTR,XTERM);
79072805
LW
7570
7571 case KEY_format:
7572 case KEY_sub:
93a17b20 7573 really_sub:
09bef843 7574 {
3280af22 7575 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7576 SSize_t tboffset = 0;
09bef843 7577 expectation attrful;
28cc6278 7578 bool have_name, have_proto;
f54cb97a 7579 const int key = tmp;
09bef843 7580
5db06880
NC
7581#ifdef PERL_MAD
7582 SV *tmpwhite = 0;
7583
cd81e915 7584 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7585 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7586 PL_thistoken = 0;
5db06880
NC
7587
7588 d = s;
7589 s = SKIPSPACE2(s,tmpwhite);
7590#else
09bef843 7591 s = skipspace(s);
5db06880 7592#endif
09bef843 7593
7e2040f0 7594 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7595 (*s == ':' && s[1] == ':'))
7596 {
5db06880 7597#ifdef PERL_MAD
4f61fd4b 7598 SV *nametoke = NULL;
5db06880
NC
7599#endif
7600
09bef843
SB
7601 PL_expect = XBLOCK;
7602 attrful = XATTRBLOCK;
b1b65b59
JH
7603 /* remember buffer pos'n for later force_word */
7604 tboffset = s - PL_oldbufptr;
09bef843 7605 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7606#ifdef PERL_MAD
7607 if (PL_madskills)
7608 nametoke = newSVpvn(s, d - s);
7609#endif
6502358f
NC
7610 if (memchr(tmpbuf, ':', len))
7611 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7612 else {
7613 sv_setsv(PL_subname,PL_curstname);
396482e1 7614 sv_catpvs(PL_subname,"::");
09bef843
SB
7615 sv_catpvn(PL_subname,tmpbuf,len);
7616 }
09bef843 7617 have_name = TRUE;
5db06880
NC
7618
7619#ifdef PERL_MAD
7620
7621 start_force(0);
7622 CURMAD('X', nametoke);
7623 CURMAD('_', tmpwhite);
7624 (void) force_word(PL_oldbufptr + tboffset, WORD,
7625 FALSE, TRUE, TRUE);
7626
7627 s = SKIPSPACE2(d,tmpwhite);
7628#else
7629 s = skipspace(d);
7630#endif
09bef843 7631 }
463ee0b2 7632 else {
09bef843
SB
7633 if (key == KEY_my)
7634 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7635 PL_expect = XTERMBLOCK;
7636 attrful = XATTRTERM;
76f68e9b 7637 sv_setpvs(PL_subname,"?");
09bef843 7638 have_name = FALSE;
463ee0b2 7639 }
4633a7c4 7640
09bef843
SB
7641 if (key == KEY_format) {
7642 if (*s == '=')
7643 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7644#ifdef PERL_MAD
cd81e915 7645 PL_thistoken = subtoken;
5db06880
NC
7646 s = d;
7647#else
09bef843 7648 if (have_name)
b1b65b59
JH
7649 (void) force_word(PL_oldbufptr + tboffset, WORD,
7650 FALSE, TRUE, TRUE);
5db06880 7651#endif
09bef843
SB
7652 OPERATOR(FORMAT);
7653 }
79072805 7654
09bef843
SB
7655 /* Look for a prototype */
7656 if (*s == '(') {
d9f2850e
RGS
7657 char *p;
7658 bool bad_proto = FALSE;
9e8d7757
RB
7659 bool in_brackets = FALSE;
7660 char greedy_proto = ' ';
7661 bool proto_after_greedy_proto = FALSE;
7662 bool must_be_last = FALSE;
7663 bool underscore = FALSE;
aef2a98a 7664 bool seen_underscore = FALSE;
197afce1 7665 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7666
5db06880 7667 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7668 if (!s)
09bef843 7669 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7670 /* strip spaces and check for bad characters */
09bef843
SB
7671 d = SvPVX(PL_lex_stuff);
7672 tmp = 0;
d9f2850e
RGS
7673 for (p = d; *p; ++p) {
7674 if (!isSPACE(*p)) {
7675 d[tmp++] = *p;
9e8d7757 7676
197afce1 7677 if (warnillegalproto) {
9e8d7757
RB
7678 if (must_be_last)
7679 proto_after_greedy_proto = TRUE;
7680 if (!strchr("$@%*;[]&\\_", *p)) {
7681 bad_proto = TRUE;
7682 }
7683 else {
7684 if ( underscore ) {
7685 if ( *p != ';' )
7686 bad_proto = TRUE;
7687 underscore = FALSE;
7688 }
7689 if ( *p == '[' ) {
7690 in_brackets = TRUE;
7691 }
7692 else if ( *p == ']' ) {
7693 in_brackets = FALSE;
7694 }
7695 else if ( (*p == '@' || *p == '%') &&
7696 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7697 !in_brackets ) {
7698 must_be_last = TRUE;
7699 greedy_proto = *p;
7700 }
7701 else if ( *p == '_' ) {
aef2a98a 7702 underscore = seen_underscore = TRUE;
9e8d7757
RB
7703 }
7704 }
7705 }
d37a9538 7706 }
09bef843 7707 }
d9f2850e 7708 d[tmp] = '\0';
9e8d7757 7709 if (proto_after_greedy_proto)
197afce1 7710 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7711 "Prototype after '%c' for %"SVf" : %s",
7712 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7713 if (bad_proto)
197afce1 7714 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7715 "Illegal character %sin prototype for %"SVf" : %s",
7716 seen_underscore ? "after '_' " : "",
be2597df 7717 SVfARG(PL_subname), d);
b162af07 7718 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7719 have_proto = TRUE;
68dc0745 7720
5db06880
NC
7721#ifdef PERL_MAD
7722 start_force(0);
cd81e915 7723 CURMAD('q', PL_thisopen);
5db06880 7724 CURMAD('_', tmpwhite);
cd81e915
NC
7725 CURMAD('=', PL_thisstuff);
7726 CURMAD('Q', PL_thisclose);
5db06880
NC
7727 NEXTVAL_NEXTTOKE.opval =
7728 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7729 PL_lex_stuff = NULL;
5db06880
NC
7730 force_next(THING);
7731
7732 s = SKIPSPACE2(s,tmpwhite);
7733#else
09bef843 7734 s = skipspace(s);
5db06880 7735#endif
4633a7c4 7736 }
09bef843
SB
7737 else
7738 have_proto = FALSE;
7739
7740 if (*s == ':' && s[1] != ':')
7741 PL_expect = attrful;
8e742a20
MHM
7742 else if (*s != '{' && key == KEY_sub) {
7743 if (!have_name)
7744 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7745 else if (*s != ';' && *s != '}')
be2597df 7746 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7747 }
09bef843 7748
5db06880
NC
7749#ifdef PERL_MAD
7750 start_force(0);
7751 if (tmpwhite) {
7752 if (PL_madskills)
6b29d1f5 7753 curmad('^', newSVpvs(""));
5db06880
NC
7754 CURMAD('_', tmpwhite);
7755 }
7756 force_next(0);
7757
cd81e915 7758 PL_thistoken = subtoken;
5db06880 7759#else
09bef843 7760 if (have_proto) {
9ded7720 7761 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7762 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7763 PL_lex_stuff = NULL;
09bef843 7764 force_next(THING);
68dc0745 7765 }
5db06880 7766#endif
09bef843 7767 if (!have_name) {
49a54bbe
NC
7768 if (PL_curstash)
7769 sv_setpvs(PL_subname, "__ANON__");
7770 else
7771 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7772 TOKEN(ANONSUB);
4633a7c4 7773 }
5db06880 7774#ifndef PERL_MAD
b1b65b59
JH
7775 (void) force_word(PL_oldbufptr + tboffset, WORD,
7776 FALSE, TRUE, TRUE);
5db06880 7777#endif
09bef843
SB
7778 if (key == KEY_my)
7779 TOKEN(MYSUB);
7780 TOKEN(SUB);
4633a7c4 7781 }
79072805
LW
7782
7783 case KEY_system:
a0d0e21e 7784 LOP(OP_SYSTEM,XREF);
79072805
LW
7785
7786 case KEY_symlink:
a0d0e21e 7787 LOP(OP_SYMLINK,XTERM);
79072805
LW
7788
7789 case KEY_syscall:
a0d0e21e 7790 LOP(OP_SYSCALL,XTERM);
79072805 7791
c07a80fd 7792 case KEY_sysopen:
7793 LOP(OP_SYSOPEN,XTERM);
7794
137443ea 7795 case KEY_sysseek:
7796 LOP(OP_SYSSEEK,XTERM);
7797
79072805 7798 case KEY_sysread:
a0d0e21e 7799 LOP(OP_SYSREAD,XTERM);
79072805
LW
7800
7801 case KEY_syswrite:
a0d0e21e 7802 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7803
7804 case KEY_tr:
7805 s = scan_trans(s);
7806 TERM(sublex_start());
7807
7808 case KEY_tell:
7809 UNI(OP_TELL);
7810
7811 case KEY_telldir:
7812 UNI(OP_TELLDIR);
7813
463ee0b2 7814 case KEY_tie:
a0d0e21e 7815 LOP(OP_TIE,XTERM);
463ee0b2 7816
c07a80fd 7817 case KEY_tied:
7818 UNI(OP_TIED);
7819
79072805
LW
7820 case KEY_time:
7821 FUN0(OP_TIME);
7822
7823 case KEY_times:
7824 FUN0(OP_TMS);
7825
7826 case KEY_truncate:
a0d0e21e 7827 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7828
7829 case KEY_uc:
7830 UNI(OP_UC);
7831
7832 case KEY_ucfirst:
7833 UNI(OP_UCFIRST);
7834
463ee0b2
LW
7835 case KEY_untie:
7836 UNI(OP_UNTIE);
7837
79072805 7838 case KEY_until:
6154021b 7839 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7840 OPERATOR(UNTIL);
7841
7842 case KEY_unless:
6154021b 7843 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7844 OPERATOR(UNLESS);
7845
7846 case KEY_unlink:
a0d0e21e 7847 LOP(OP_UNLINK,XTERM);
79072805
LW
7848
7849 case KEY_undef:
6f33ba73 7850 UNIDOR(OP_UNDEF);
79072805
LW
7851
7852 case KEY_unpack:
a0d0e21e 7853 LOP(OP_UNPACK,XTERM);
79072805
LW
7854
7855 case KEY_utime:
a0d0e21e 7856 LOP(OP_UTIME,XTERM);
79072805
LW
7857
7858 case KEY_umask:
6f33ba73 7859 UNIDOR(OP_UMASK);
79072805
LW
7860
7861 case KEY_unshift:
a0d0e21e
LW
7862 LOP(OP_UNSHIFT,XTERM);
7863
7864 case KEY_use:
468aa647 7865 s = tokenize_use(1, s);
a0d0e21e 7866 OPERATOR(USE);
79072805
LW
7867
7868 case KEY_values:
7869 UNI(OP_VALUES);
7870
7871 case KEY_vec:
a0d0e21e 7872 LOP(OP_VEC,XTERM);
79072805 7873
0d863452 7874 case KEY_when:
6154021b 7875 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7876 OPERATOR(WHEN);
7877
79072805 7878 case KEY_while:
6154021b 7879 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7880 OPERATOR(WHILE);
7881
7882 case KEY_warn:
3280af22 7883 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7884 LOP(OP_WARN,XTERM);
79072805
LW
7885
7886 case KEY_wait:
7887 FUN0(OP_WAIT);
7888
7889 case KEY_waitpid:
a0d0e21e 7890 LOP(OP_WAITPID,XTERM);
79072805
LW
7891
7892 case KEY_wantarray:
7893 FUN0(OP_WANTARRAY);
7894
7895 case KEY_write:
9d116dd7
JH
7896#ifdef EBCDIC
7897 {
df3728a2
JH
7898 char ctl_l[2];
7899 ctl_l[0] = toCTRL('L');
7900 ctl_l[1] = '\0';
fafc274c 7901 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7902 }
7903#else
fafc274c
NC
7904 /* Make sure $^L is defined */
7905 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7906#endif
79072805
LW
7907 UNI(OP_ENTERWRITE);
7908
7909 case KEY_x:
3280af22 7910 if (PL_expect == XOPERATOR)
79072805
LW
7911 Mop(OP_REPEAT);
7912 check_uni();
7913 goto just_a_word;
7914
a0d0e21e 7915 case KEY_xor:
6154021b 7916 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7917 OPERATOR(OROP);
7918
79072805
LW
7919 case KEY_y:
7920 s = scan_trans(s);
7921 TERM(sublex_start());
7922 }
49dc05e3 7923 }}
79072805 7924}
bf4acbe4
GS
7925#ifdef __SC__
7926#pragma segment Main
7927#endif
79072805 7928
e930465f
JH
7929static int
7930S_pending_ident(pTHX)
8eceec63 7931{
97aff369 7932 dVAR;
8eceec63 7933 register char *d;
bbd11bfc 7934 PADOFFSET tmp = 0;
8eceec63
SC
7935 /* pit holds the identifier we read and pending_ident is reset */
7936 char pit = PL_pending_ident;
9bde8eb0
NC
7937 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7938 /* All routes through this function want to know if there is a colon. */
c099d646 7939 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7940 PL_pending_ident = 0;
7941
cd81e915 7942 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7943 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7944 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7945
7946 /* if we're in a my(), we can't allow dynamics here.
7947 $foo'bar has already been turned into $foo::bar, so
7948 just check for colons.
7949
7950 if it's a legal name, the OP is a PADANY.
7951 */
7952 if (PL_in_my) {
7953 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7954 if (has_colon)
8eceec63
SC
7955 yyerror(Perl_form(aTHX_ "No package name allowed for "
7956 "variable %s in \"our\"",
7957 PL_tokenbuf));
d6447115 7958 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7959 }
7960 else {
9bde8eb0 7961 if (has_colon)
952306ac
RGS
7962 yyerror(Perl_form(aTHX_ PL_no_myglob,
7963 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7964
6154021b 7965 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7966 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7967 return PRIVATEREF;
7968 }
7969 }
7970
7971 /*
7972 build the ops for accesses to a my() variable.
7973
7974 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7975 then used in a comparison. This catches most, but not
7976 all cases. For instance, it catches
7977 sort { my($a); $a <=> $b }
7978 but not
7979 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7980 (although why you'd do that is anyone's guess).
7981 */
7982
9bde8eb0 7983 if (!has_colon) {
8716503d 7984 if (!PL_in_my)
f8f98e0a 7985 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 7986 if (tmp != NOT_IN_PAD) {
8eceec63 7987 /* might be an "our" variable" */
00b1698f 7988 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 7989 /* build ops for a bareword */
b64e5050
AL
7990 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7991 HEK * const stashname = HvNAME_HEK(stash);
7992 SV * const sym = newSVhek(stashname);
396482e1 7993 sv_catpvs(sym, "::");
9bde8eb0 7994 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
7995 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7996 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 7997 gv_fetchsv(sym,
8eceec63
SC
7998 (PL_in_eval
7999 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8000 : GV_ADDMULTI
8eceec63
SC
8001 ),
8002 ((PL_tokenbuf[0] == '$') ? SVt_PV
8003 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8004 : SVt_PVHV));
8005 return WORD;
8006 }
8007
8008 /* if it's a sort block and they're naming $a or $b */
8009 if (PL_last_lop_op == OP_SORT &&
8010 PL_tokenbuf[0] == '$' &&
8011 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8012 && !PL_tokenbuf[2])
8013 {
8014 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8015 d < PL_bufend && *d != '\n';
8016 d++)
8017 {
8018 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8019 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8020 PL_tokenbuf);
8021 }
8022 }
8023 }
8024
6154021b
RGS
8025 pl_yylval.opval = newOP(OP_PADANY, 0);
8026 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8027 return PRIVATEREF;
8028 }
8029 }
8030
8031 /*
8032 Whine if they've said @foo in a doublequoted string,
8033 and @foo isn't a variable we can find in the symbol
8034 table.
8035 */
d824713b
NC
8036 if (ckWARN(WARN_AMBIGUOUS) &&
8037 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8038 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8039 SVt_PVAV);
8eceec63 8040 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8041 /* DO NOT warn for @- and @+ */
8042 && !( PL_tokenbuf[2] == '\0' &&
8043 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8044 )
8eceec63
SC
8045 {
8046 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8047 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8048 "Possible unintended interpolation of %s in string",
8049 PL_tokenbuf);
8eceec63
SC
8050 }
8051 }
8052
8053 /* build ops for a bareword */
6154021b 8054 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8055 tokenbuf_len - 1));
6154021b 8056 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8057 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8058 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8059 ((PL_tokenbuf[0] == '$') ? SVt_PV
8060 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8061 : SVt_PVHV));
8eceec63
SC
8062 return WORD;
8063}
8064
4c3bbe0f
MHM
8065/*
8066 * The following code was generated by perl_keyword.pl.
8067 */
e2e1dd5a 8068
79072805 8069I32
5458a98a 8070Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8071{
952306ac 8072 dVAR;
7918f24d
NC
8073
8074 PERL_ARGS_ASSERT_KEYWORD;
8075
4c3bbe0f
MHM
8076 switch (len)
8077 {
8078 case 1: /* 5 tokens of length 1 */
8079 switch (name[0])
e2e1dd5a 8080 {
4c3bbe0f
MHM
8081 case 'm':
8082 { /* m */
8083 return KEY_m;
8084 }
8085
4c3bbe0f
MHM
8086 case 'q':
8087 { /* q */
8088 return KEY_q;
8089 }
8090
4c3bbe0f
MHM
8091 case 's':
8092 { /* s */
8093 return KEY_s;
8094 }
8095
4c3bbe0f
MHM
8096 case 'x':
8097 { /* x */
8098 return -KEY_x;
8099 }
8100
4c3bbe0f
MHM
8101 case 'y':
8102 { /* y */
8103 return KEY_y;
8104 }
8105
4c3bbe0f
MHM
8106 default:
8107 goto unknown;
e2e1dd5a 8108 }
4c3bbe0f
MHM
8109
8110 case 2: /* 18 tokens of length 2 */
8111 switch (name[0])
e2e1dd5a 8112 {
4c3bbe0f
MHM
8113 case 'd':
8114 if (name[1] == 'o')
8115 { /* do */
8116 return KEY_do;
8117 }
8118
8119 goto unknown;
8120
8121 case 'e':
8122 if (name[1] == 'q')
8123 { /* eq */
8124 return -KEY_eq;
8125 }
8126
8127 goto unknown;
8128
8129 case 'g':
8130 switch (name[1])
8131 {
8132 case 'e':
8133 { /* ge */
8134 return -KEY_ge;
8135 }
8136
4c3bbe0f
MHM
8137 case 't':
8138 { /* gt */
8139 return -KEY_gt;
8140 }
8141
4c3bbe0f
MHM
8142 default:
8143 goto unknown;
8144 }
8145
8146 case 'i':
8147 if (name[1] == 'f')
8148 { /* if */
8149 return KEY_if;
8150 }
8151
8152 goto unknown;
8153
8154 case 'l':
8155 switch (name[1])
8156 {
8157 case 'c':
8158 { /* lc */
8159 return -KEY_lc;
8160 }
8161
4c3bbe0f
MHM
8162 case 'e':
8163 { /* le */
8164 return -KEY_le;
8165 }
8166
4c3bbe0f
MHM
8167 case 't':
8168 { /* lt */
8169 return -KEY_lt;
8170 }
8171
4c3bbe0f
MHM
8172 default:
8173 goto unknown;
8174 }
8175
8176 case 'm':
8177 if (name[1] == 'y')
8178 { /* my */
8179 return KEY_my;
8180 }
8181
8182 goto unknown;
8183
8184 case 'n':
8185 switch (name[1])
8186 {
8187 case 'e':
8188 { /* ne */
8189 return -KEY_ne;
8190 }
8191
4c3bbe0f
MHM
8192 case 'o':
8193 { /* no */
8194 return KEY_no;
8195 }
8196
4c3bbe0f
MHM
8197 default:
8198 goto unknown;
8199 }
8200
8201 case 'o':
8202 if (name[1] == 'r')
8203 { /* or */
8204 return -KEY_or;
8205 }
8206
8207 goto unknown;
8208
8209 case 'q':
8210 switch (name[1])
8211 {
8212 case 'q':
8213 { /* qq */
8214 return KEY_qq;
8215 }
8216
4c3bbe0f
MHM
8217 case 'r':
8218 { /* qr */
8219 return KEY_qr;
8220 }
8221
4c3bbe0f
MHM
8222 case 'w':
8223 { /* qw */
8224 return KEY_qw;
8225 }
8226
4c3bbe0f
MHM
8227 case 'x':
8228 { /* qx */
8229 return KEY_qx;
8230 }
8231
4c3bbe0f
MHM
8232 default:
8233 goto unknown;
8234 }
8235
8236 case 't':
8237 if (name[1] == 'r')
8238 { /* tr */
8239 return KEY_tr;
8240 }
8241
8242 goto unknown;
8243
8244 case 'u':
8245 if (name[1] == 'c')
8246 { /* uc */
8247 return -KEY_uc;
8248 }
8249
8250 goto unknown;
8251
8252 default:
8253 goto unknown;
e2e1dd5a 8254 }
4c3bbe0f 8255
0d863452 8256 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8257 switch (name[0])
e2e1dd5a 8258 {
4c3bbe0f
MHM
8259 case 'E':
8260 if (name[1] == 'N' &&
8261 name[2] == 'D')
8262 { /* END */
8263 return KEY_END;
8264 }
8265
8266 goto unknown;
8267
8268 case 'a':
8269 switch (name[1])
8270 {
8271 case 'b':
8272 if (name[2] == 's')
8273 { /* abs */
8274 return -KEY_abs;
8275 }
8276
8277 goto unknown;
8278
8279 case 'n':
8280 if (name[2] == 'd')
8281 { /* and */
8282 return -KEY_and;
8283 }
8284
8285 goto unknown;
8286
8287 default:
8288 goto unknown;
8289 }
8290
8291 case 'c':
8292 switch (name[1])
8293 {
8294 case 'h':
8295 if (name[2] == 'r')
8296 { /* chr */
8297 return -KEY_chr;
8298 }
8299
8300 goto unknown;
8301
8302 case 'm':
8303 if (name[2] == 'p')
8304 { /* cmp */
8305 return -KEY_cmp;
8306 }
8307
8308 goto unknown;
8309
8310 case 'o':
8311 if (name[2] == 's')
8312 { /* cos */
8313 return -KEY_cos;
8314 }
8315
8316 goto unknown;
8317
8318 default:
8319 goto unknown;
8320 }
8321
8322 case 'd':
8323 if (name[1] == 'i' &&
8324 name[2] == 'e')
8325 { /* die */
8326 return -KEY_die;
8327 }
8328
8329 goto unknown;
8330
8331 case 'e':
8332 switch (name[1])
8333 {
8334 case 'o':
8335 if (name[2] == 'f')
8336 { /* eof */
8337 return -KEY_eof;
8338 }
8339
8340 goto unknown;
8341
4c3bbe0f
MHM
8342 case 'x':
8343 if (name[2] == 'p')
8344 { /* exp */
8345 return -KEY_exp;
8346 }
8347
8348 goto unknown;
8349
8350 default:
8351 goto unknown;
8352 }
8353
8354 case 'f':
8355 if (name[1] == 'o' &&
8356 name[2] == 'r')
8357 { /* for */
8358 return KEY_for;
8359 }
8360
8361 goto unknown;
8362
8363 case 'h':
8364 if (name[1] == 'e' &&
8365 name[2] == 'x')
8366 { /* hex */
8367 return -KEY_hex;
8368 }
8369
8370 goto unknown;
8371
8372 case 'i':
8373 if (name[1] == 'n' &&
8374 name[2] == 't')
8375 { /* int */
8376 return -KEY_int;
8377 }
8378
8379 goto unknown;
8380
8381 case 'l':
8382 if (name[1] == 'o' &&
8383 name[2] == 'g')
8384 { /* log */
8385 return -KEY_log;
8386 }
8387
8388 goto unknown;
8389
8390 case 'm':
8391 if (name[1] == 'a' &&
8392 name[2] == 'p')
8393 { /* map */
8394 return KEY_map;
8395 }
8396
8397 goto unknown;
8398
8399 case 'n':
8400 if (name[1] == 'o' &&
8401 name[2] == 't')
8402 { /* not */
8403 return -KEY_not;
8404 }
8405
8406 goto unknown;
8407
8408 case 'o':
8409 switch (name[1])
8410 {
8411 case 'c':
8412 if (name[2] == 't')
8413 { /* oct */
8414 return -KEY_oct;
8415 }
8416
8417 goto unknown;
8418
8419 case 'r':
8420 if (name[2] == 'd')
8421 { /* ord */
8422 return -KEY_ord;
8423 }
8424
8425 goto unknown;
8426
8427 case 'u':
8428 if (name[2] == 'r')
8429 { /* our */
8430 return KEY_our;
8431 }
8432
8433 goto unknown;
8434
8435 default:
8436 goto unknown;
8437 }
8438
8439 case 'p':
8440 if (name[1] == 'o')
8441 {
8442 switch (name[2])
8443 {
8444 case 'p':
8445 { /* pop */
8446 return -KEY_pop;
8447 }
8448
4c3bbe0f
MHM
8449 case 's':
8450 { /* pos */
8451 return KEY_pos;
8452 }
8453
4c3bbe0f
MHM
8454 default:
8455 goto unknown;
8456 }
8457 }
8458
8459 goto unknown;
8460
8461 case 'r':
8462 if (name[1] == 'e' &&
8463 name[2] == 'f')
8464 { /* ref */
8465 return -KEY_ref;
8466 }
8467
8468 goto unknown;
8469
8470 case 's':
8471 switch (name[1])
8472 {
0d863452
RH
8473 case 'a':
8474 if (name[2] == 'y')
8475 { /* say */
e3e804c9 8476 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8477 }
8478
8479 goto unknown;
8480
4c3bbe0f
MHM
8481 case 'i':
8482 if (name[2] == 'n')
8483 { /* sin */
8484 return -KEY_sin;
8485 }
8486
8487 goto unknown;
8488
8489 case 'u':
8490 if (name[2] == 'b')
8491 { /* sub */
8492 return KEY_sub;
8493 }
8494
8495 goto unknown;
8496
8497 default:
8498 goto unknown;
8499 }
8500
8501 case 't':
8502 if (name[1] == 'i' &&
8503 name[2] == 'e')
8504 { /* tie */
1db4d195 8505 return -KEY_tie;
4c3bbe0f
MHM
8506 }
8507
8508 goto unknown;
8509
8510 case 'u':
8511 if (name[1] == 's' &&
8512 name[2] == 'e')
8513 { /* use */
8514 return KEY_use;
8515 }
8516
8517 goto unknown;
8518
8519 case 'v':
8520 if (name[1] == 'e' &&
8521 name[2] == 'c')
8522 { /* vec */
8523 return -KEY_vec;
8524 }
8525
8526 goto unknown;
8527
8528 case 'x':
8529 if (name[1] == 'o' &&
8530 name[2] == 'r')
8531 { /* xor */
8532 return -KEY_xor;
8533 }
8534
8535 goto unknown;
8536
8537 default:
8538 goto unknown;
e2e1dd5a 8539 }
4c3bbe0f 8540
0d863452 8541 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8542 switch (name[0])
e2e1dd5a 8543 {
4c3bbe0f
MHM
8544 case 'C':
8545 if (name[1] == 'O' &&
8546 name[2] == 'R' &&
8547 name[3] == 'E')
8548 { /* CORE */
8549 return -KEY_CORE;
8550 }
8551
8552 goto unknown;
8553
8554 case 'I':
8555 if (name[1] == 'N' &&
8556 name[2] == 'I' &&
8557 name[3] == 'T')
8558 { /* INIT */
8559 return KEY_INIT;
8560 }
8561
8562 goto unknown;
8563
8564 case 'b':
8565 if (name[1] == 'i' &&
8566 name[2] == 'n' &&
8567 name[3] == 'd')
8568 { /* bind */
8569 return -KEY_bind;
8570 }
8571
8572 goto unknown;
8573
8574 case 'c':
8575 if (name[1] == 'h' &&
8576 name[2] == 'o' &&
8577 name[3] == 'p')
8578 { /* chop */
8579 return -KEY_chop;
8580 }
8581
8582 goto unknown;
8583
8584 case 'd':
8585 if (name[1] == 'u' &&
8586 name[2] == 'm' &&
8587 name[3] == 'p')
8588 { /* dump */
8589 return -KEY_dump;
8590 }
8591
8592 goto unknown;
8593
8594 case 'e':
8595 switch (name[1])
8596 {
8597 case 'a':
8598 if (name[2] == 'c' &&
8599 name[3] == 'h')
8600 { /* each */
8601 return -KEY_each;
8602 }
8603
8604 goto unknown;
8605
8606 case 'l':
8607 if (name[2] == 's' &&
8608 name[3] == 'e')
8609 { /* else */
8610 return KEY_else;
8611 }
8612
8613 goto unknown;
8614
8615 case 'v':
8616 if (name[2] == 'a' &&
8617 name[3] == 'l')
8618 { /* eval */
8619 return KEY_eval;
8620 }
8621
8622 goto unknown;
8623
8624 case 'x':
8625 switch (name[2])
8626 {
8627 case 'e':
8628 if (name[3] == 'c')
8629 { /* exec */
8630 return -KEY_exec;
8631 }
8632
8633 goto unknown;
8634
8635 case 'i':
8636 if (name[3] == 't')
8637 { /* exit */
8638 return -KEY_exit;
8639 }
8640
8641 goto unknown;
8642
8643 default:
8644 goto unknown;
8645 }
8646
8647 default:
8648 goto unknown;
8649 }
8650
8651 case 'f':
8652 if (name[1] == 'o' &&
8653 name[2] == 'r' &&
8654 name[3] == 'k')
8655 { /* fork */
8656 return -KEY_fork;
8657 }
8658
8659 goto unknown;
8660
8661 case 'g':
8662 switch (name[1])
8663 {
8664 case 'e':
8665 if (name[2] == 't' &&
8666 name[3] == 'c')
8667 { /* getc */
8668 return -KEY_getc;
8669 }
8670
8671 goto unknown;
8672
8673 case 'l':
8674 if (name[2] == 'o' &&
8675 name[3] == 'b')
8676 { /* glob */
8677 return KEY_glob;
8678 }
8679
8680 goto unknown;
8681
8682 case 'o':
8683 if (name[2] == 't' &&
8684 name[3] == 'o')
8685 { /* goto */
8686 return KEY_goto;
8687 }
8688
8689 goto unknown;
8690
8691 case 'r':
8692 if (name[2] == 'e' &&
8693 name[3] == 'p')
8694 { /* grep */
8695 return KEY_grep;
8696 }
8697
8698 goto unknown;
8699
8700 default:
8701 goto unknown;
8702 }
8703
8704 case 'j':
8705 if (name[1] == 'o' &&
8706 name[2] == 'i' &&
8707 name[3] == 'n')
8708 { /* join */
8709 return -KEY_join;
8710 }
8711
8712 goto unknown;
8713
8714 case 'k':
8715 switch (name[1])
8716 {
8717 case 'e':
8718 if (name[2] == 'y' &&
8719 name[3] == 's')
8720 { /* keys */
8721 return -KEY_keys;
8722 }
8723
8724 goto unknown;
8725
8726 case 'i':
8727 if (name[2] == 'l' &&
8728 name[3] == 'l')
8729 { /* kill */
8730 return -KEY_kill;
8731 }
8732
8733 goto unknown;
8734
8735 default:
8736 goto unknown;
8737 }
8738
8739 case 'l':
8740 switch (name[1])
8741 {
8742 case 'a':
8743 if (name[2] == 's' &&
8744 name[3] == 't')
8745 { /* last */
8746 return KEY_last;
8747 }
8748
8749 goto unknown;
8750
8751 case 'i':
8752 if (name[2] == 'n' &&
8753 name[3] == 'k')
8754 { /* link */
8755 return -KEY_link;
8756 }
8757
8758 goto unknown;
8759
8760 case 'o':
8761 if (name[2] == 'c' &&
8762 name[3] == 'k')
8763 { /* lock */
8764 return -KEY_lock;
8765 }
8766
8767 goto unknown;
8768
8769 default:
8770 goto unknown;
8771 }
8772
8773 case 'n':
8774 if (name[1] == 'e' &&
8775 name[2] == 'x' &&
8776 name[3] == 't')
8777 { /* next */
8778 return KEY_next;
8779 }
8780
8781 goto unknown;
8782
8783 case 'o':
8784 if (name[1] == 'p' &&
8785 name[2] == 'e' &&
8786 name[3] == 'n')
8787 { /* open */
8788 return -KEY_open;
8789 }
8790
8791 goto unknown;
8792
8793 case 'p':
8794 switch (name[1])
8795 {
8796 case 'a':
8797 if (name[2] == 'c' &&
8798 name[3] == 'k')
8799 { /* pack */
8800 return -KEY_pack;
8801 }
8802
8803 goto unknown;
8804
8805 case 'i':
8806 if (name[2] == 'p' &&
8807 name[3] == 'e')
8808 { /* pipe */
8809 return -KEY_pipe;
8810 }
8811
8812 goto unknown;
8813
8814 case 'u':
8815 if (name[2] == 's' &&
8816 name[3] == 'h')
8817 { /* push */
8818 return -KEY_push;
8819 }
8820
8821 goto unknown;
8822
8823 default:
8824 goto unknown;
8825 }
8826
8827 case 'r':
8828 switch (name[1])
8829 {
8830 case 'a':
8831 if (name[2] == 'n' &&
8832 name[3] == 'd')
8833 { /* rand */
8834 return -KEY_rand;
8835 }
8836
8837 goto unknown;
8838
8839 case 'e':
8840 switch (name[2])
8841 {
8842 case 'a':
8843 if (name[3] == 'd')
8844 { /* read */
8845 return -KEY_read;
8846 }
8847
8848 goto unknown;
8849
8850 case 'c':
8851 if (name[3] == 'v')
8852 { /* recv */
8853 return -KEY_recv;
8854 }
8855
8856 goto unknown;
8857
8858 case 'd':
8859 if (name[3] == 'o')
8860 { /* redo */
8861 return KEY_redo;
8862 }
8863
8864 goto unknown;
8865
8866 default:
8867 goto unknown;
8868 }
8869
8870 default:
8871 goto unknown;
8872 }
8873
8874 case 's':
8875 switch (name[1])
8876 {
8877 case 'e':
8878 switch (name[2])
8879 {
8880 case 'e':
8881 if (name[3] == 'k')
8882 { /* seek */
8883 return -KEY_seek;
8884 }
8885
8886 goto unknown;
8887
8888 case 'n':
8889 if (name[3] == 'd')
8890 { /* send */
8891 return -KEY_send;
8892 }
8893
8894 goto unknown;
8895
8896 default:
8897 goto unknown;
8898 }
8899
8900 case 'o':
8901 if (name[2] == 'r' &&
8902 name[3] == 't')
8903 { /* sort */
8904 return KEY_sort;
8905 }
8906
8907 goto unknown;
8908
8909 case 'q':
8910 if (name[2] == 'r' &&
8911 name[3] == 't')
8912 { /* sqrt */
8913 return -KEY_sqrt;
8914 }
8915
8916 goto unknown;
8917
8918 case 't':
8919 if (name[2] == 'a' &&
8920 name[3] == 't')
8921 { /* stat */
8922 return -KEY_stat;
8923 }
8924
8925 goto unknown;
8926
8927 default:
8928 goto unknown;
8929 }
8930
8931 case 't':
8932 switch (name[1])
8933 {
8934 case 'e':
8935 if (name[2] == 'l' &&
8936 name[3] == 'l')
8937 { /* tell */
8938 return -KEY_tell;
8939 }
8940
8941 goto unknown;
8942
8943 case 'i':
8944 switch (name[2])
8945 {
8946 case 'e':
8947 if (name[3] == 'd')
8948 { /* tied */
1db4d195 8949 return -KEY_tied;
4c3bbe0f
MHM
8950 }
8951
8952 goto unknown;
8953
8954 case 'm':
8955 if (name[3] == 'e')
8956 { /* time */
8957 return -KEY_time;
8958 }
8959
8960 goto unknown;
8961
8962 default:
8963 goto unknown;
8964 }
8965
8966 default:
8967 goto unknown;
8968 }
8969
8970 case 'w':
0d863452 8971 switch (name[1])
4c3bbe0f 8972 {
0d863452 8973 case 'a':
952306ac
RGS
8974 switch (name[2])
8975 {
8976 case 'i':
8977 if (name[3] == 't')
8978 { /* wait */
8979 return -KEY_wait;
8980 }
4c3bbe0f 8981
952306ac 8982 goto unknown;
4c3bbe0f 8983
952306ac
RGS
8984 case 'r':
8985 if (name[3] == 'n')
8986 { /* warn */
8987 return -KEY_warn;
8988 }
4c3bbe0f 8989
952306ac 8990 goto unknown;
4c3bbe0f 8991
952306ac
RGS
8992 default:
8993 goto unknown;
8994 }
0d863452
RH
8995
8996 case 'h':
8997 if (name[2] == 'e' &&
8998 name[3] == 'n')
8999 { /* when */
5458a98a 9000 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9001 }
4c3bbe0f 9002
952306ac 9003 goto unknown;
4c3bbe0f 9004
952306ac
RGS
9005 default:
9006 goto unknown;
9007 }
4c3bbe0f 9008
0d863452
RH
9009 default:
9010 goto unknown;
9011 }
9012
952306ac 9013 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9014 switch (name[0])
e2e1dd5a 9015 {
4c3bbe0f
MHM
9016 case 'B':
9017 if (name[1] == 'E' &&
9018 name[2] == 'G' &&
9019 name[3] == 'I' &&
9020 name[4] == 'N')
9021 { /* BEGIN */
9022 return KEY_BEGIN;
9023 }
9024
9025 goto unknown;
9026
9027 case 'C':
9028 if (name[1] == 'H' &&
9029 name[2] == 'E' &&
9030 name[3] == 'C' &&
9031 name[4] == 'K')
9032 { /* CHECK */
9033 return KEY_CHECK;
9034 }
9035
9036 goto unknown;
9037
9038 case 'a':
9039 switch (name[1])
9040 {
9041 case 'l':
9042 if (name[2] == 'a' &&
9043 name[3] == 'r' &&
9044 name[4] == 'm')
9045 { /* alarm */
9046 return -KEY_alarm;
9047 }
9048
9049 goto unknown;
9050
9051 case 't':
9052 if (name[2] == 'a' &&
9053 name[3] == 'n' &&
9054 name[4] == '2')
9055 { /* atan2 */
9056 return -KEY_atan2;
9057 }
9058
9059 goto unknown;
9060
9061 default:
9062 goto unknown;
9063 }
9064
9065 case 'b':
0d863452
RH
9066 switch (name[1])
9067 {
9068 case 'l':
9069 if (name[2] == 'e' &&
952306ac
RGS
9070 name[3] == 's' &&
9071 name[4] == 's')
9072 { /* bless */
9073 return -KEY_bless;
9074 }
4c3bbe0f 9075
952306ac 9076 goto unknown;
4c3bbe0f 9077
0d863452
RH
9078 case 'r':
9079 if (name[2] == 'e' &&
9080 name[3] == 'a' &&
9081 name[4] == 'k')
9082 { /* break */
5458a98a 9083 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9084 }
9085
9086 goto unknown;
9087
9088 default:
9089 goto unknown;
9090 }
9091
4c3bbe0f
MHM
9092 case 'c':
9093 switch (name[1])
9094 {
9095 case 'h':
9096 switch (name[2])
9097 {
9098 case 'd':
9099 if (name[3] == 'i' &&
9100 name[4] == 'r')
9101 { /* chdir */
9102 return -KEY_chdir;
9103 }
9104
9105 goto unknown;
9106
9107 case 'm':
9108 if (name[3] == 'o' &&
9109 name[4] == 'd')
9110 { /* chmod */
9111 return -KEY_chmod;
9112 }
9113
9114 goto unknown;
9115
9116 case 'o':
9117 switch (name[3])
9118 {
9119 case 'm':
9120 if (name[4] == 'p')
9121 { /* chomp */
9122 return -KEY_chomp;
9123 }
9124
9125 goto unknown;
9126
9127 case 'w':
9128 if (name[4] == 'n')
9129 { /* chown */
9130 return -KEY_chown;
9131 }
9132
9133 goto unknown;
9134
9135 default:
9136 goto unknown;
9137 }
9138
9139 default:
9140 goto unknown;
9141 }
9142
9143 case 'l':
9144 if (name[2] == 'o' &&
9145 name[3] == 's' &&
9146 name[4] == 'e')
9147 { /* close */
9148 return -KEY_close;
9149 }
9150
9151 goto unknown;
9152
9153 case 'r':
9154 if (name[2] == 'y' &&
9155 name[3] == 'p' &&
9156 name[4] == 't')
9157 { /* crypt */
9158 return -KEY_crypt;
9159 }
9160
9161 goto unknown;
9162
9163 default:
9164 goto unknown;
9165 }
9166
9167 case 'e':
9168 if (name[1] == 'l' &&
9169 name[2] == 's' &&
9170 name[3] == 'i' &&
9171 name[4] == 'f')
9172 { /* elsif */
9173 return KEY_elsif;
9174 }
9175
9176 goto unknown;
9177
9178 case 'f':
9179 switch (name[1])
9180 {
9181 case 'c':
9182 if (name[2] == 'n' &&
9183 name[3] == 't' &&
9184 name[4] == 'l')
9185 { /* fcntl */
9186 return -KEY_fcntl;
9187 }
9188
9189 goto unknown;
9190
9191 case 'l':
9192 if (name[2] == 'o' &&
9193 name[3] == 'c' &&
9194 name[4] == 'k')
9195 { /* flock */
9196 return -KEY_flock;
9197 }
9198
9199 goto unknown;
9200
9201 default:
9202 goto unknown;
9203 }
9204
0d863452
RH
9205 case 'g':
9206 if (name[1] == 'i' &&
9207 name[2] == 'v' &&
9208 name[3] == 'e' &&
9209 name[4] == 'n')
9210 { /* given */
5458a98a 9211 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9212 }
9213
9214 goto unknown;
9215
4c3bbe0f
MHM
9216 case 'i':
9217 switch (name[1])
9218 {
9219 case 'n':
9220 if (name[2] == 'd' &&
9221 name[3] == 'e' &&
9222 name[4] == 'x')
9223 { /* index */
9224 return -KEY_index;
9225 }
9226
9227 goto unknown;
9228
9229 case 'o':
9230 if (name[2] == 'c' &&
9231 name[3] == 't' &&
9232 name[4] == 'l')
9233 { /* ioctl */
9234 return -KEY_ioctl;
9235 }
9236
9237 goto unknown;
9238
9239 default:
9240 goto unknown;
9241 }
9242
9243 case 'l':
9244 switch (name[1])
9245 {
9246 case 'o':
9247 if (name[2] == 'c' &&
9248 name[3] == 'a' &&
9249 name[4] == 'l')
9250 { /* local */
9251 return KEY_local;
9252 }
9253
9254 goto unknown;
9255
9256 case 's':
9257 if (name[2] == 't' &&
9258 name[3] == 'a' &&
9259 name[4] == 't')
9260 { /* lstat */
9261 return -KEY_lstat;
9262 }
9263
9264 goto unknown;
9265
9266 default:
9267 goto unknown;
9268 }
9269
9270 case 'm':
9271 if (name[1] == 'k' &&
9272 name[2] == 'd' &&
9273 name[3] == 'i' &&
9274 name[4] == 'r')
9275 { /* mkdir */
9276 return -KEY_mkdir;
9277 }
9278
9279 goto unknown;
9280
9281 case 'p':
9282 if (name[1] == 'r' &&
9283 name[2] == 'i' &&
9284 name[3] == 'n' &&
9285 name[4] == 't')
9286 { /* print */
9287 return KEY_print;
9288 }
9289
9290 goto unknown;
9291
9292 case 'r':
9293 switch (name[1])
9294 {
9295 case 'e':
9296 if (name[2] == 's' &&
9297 name[3] == 'e' &&
9298 name[4] == 't')
9299 { /* reset */
9300 return -KEY_reset;
9301 }
9302
9303 goto unknown;
9304
9305 case 'm':
9306 if (name[2] == 'd' &&
9307 name[3] == 'i' &&
9308 name[4] == 'r')
9309 { /* rmdir */
9310 return -KEY_rmdir;
9311 }
9312
9313 goto unknown;
9314
9315 default:
9316 goto unknown;
9317 }
9318
9319 case 's':
9320 switch (name[1])
9321 {
9322 case 'e':
9323 if (name[2] == 'm' &&
9324 name[3] == 'o' &&
9325 name[4] == 'p')
9326 { /* semop */
9327 return -KEY_semop;
9328 }
9329
9330 goto unknown;
9331
9332 case 'h':
9333 if (name[2] == 'i' &&
9334 name[3] == 'f' &&
9335 name[4] == 't')
9336 { /* shift */
9337 return -KEY_shift;
9338 }
9339
9340 goto unknown;
9341
9342 case 'l':
9343 if (name[2] == 'e' &&
9344 name[3] == 'e' &&
9345 name[4] == 'p')
9346 { /* sleep */
9347 return -KEY_sleep;
9348 }
9349
9350 goto unknown;
9351
9352 case 'p':
9353 if (name[2] == 'l' &&
9354 name[3] == 'i' &&
9355 name[4] == 't')
9356 { /* split */
9357 return KEY_split;
9358 }
9359
9360 goto unknown;
9361
9362 case 'r':
9363 if (name[2] == 'a' &&
9364 name[3] == 'n' &&
9365 name[4] == 'd')
9366 { /* srand */
9367 return -KEY_srand;
9368 }
9369
9370 goto unknown;
9371
9372 case 't':
952306ac
RGS
9373 switch (name[2])
9374 {
9375 case 'a':
9376 if (name[3] == 't' &&
9377 name[4] == 'e')
9378 { /* state */
5458a98a 9379 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9380 }
4c3bbe0f 9381
952306ac
RGS
9382 goto unknown;
9383
9384 case 'u':
9385 if (name[3] == 'd' &&
9386 name[4] == 'y')
9387 { /* study */
9388 return KEY_study;
9389 }
9390
9391 goto unknown;
9392
9393 default:
9394 goto unknown;
9395 }
4c3bbe0f
MHM
9396
9397 default:
9398 goto unknown;
9399 }
9400
9401 case 't':
9402 if (name[1] == 'i' &&
9403 name[2] == 'm' &&
9404 name[3] == 'e' &&
9405 name[4] == 's')
9406 { /* times */
9407 return -KEY_times;
9408 }
9409
9410 goto unknown;
9411
9412 case 'u':
9413 switch (name[1])
9414 {
9415 case 'm':
9416 if (name[2] == 'a' &&
9417 name[3] == 's' &&
9418 name[4] == 'k')
9419 { /* umask */
9420 return -KEY_umask;
9421 }
9422
9423 goto unknown;
9424
9425 case 'n':
9426 switch (name[2])
9427 {
9428 case 'd':
9429 if (name[3] == 'e' &&
9430 name[4] == 'f')
9431 { /* undef */
9432 return KEY_undef;
9433 }
9434
9435 goto unknown;
9436
9437 case 't':
9438 if (name[3] == 'i')
9439 {
9440 switch (name[4])
9441 {
9442 case 'e':
9443 { /* untie */
1db4d195 9444 return -KEY_untie;
4c3bbe0f
MHM
9445 }
9446
4c3bbe0f
MHM
9447 case 'l':
9448 { /* until */
9449 return KEY_until;
9450 }
9451
4c3bbe0f
MHM
9452 default:
9453 goto unknown;
9454 }
9455 }
9456
9457 goto unknown;
9458
9459 default:
9460 goto unknown;
9461 }
9462
9463 case 't':
9464 if (name[2] == 'i' &&
9465 name[3] == 'm' &&
9466 name[4] == 'e')
9467 { /* utime */
9468 return -KEY_utime;
9469 }
9470
9471 goto unknown;
9472
9473 default:
9474 goto unknown;
9475 }
9476
9477 case 'w':
9478 switch (name[1])
9479 {
9480 case 'h':
9481 if (name[2] == 'i' &&
9482 name[3] == 'l' &&
9483 name[4] == 'e')
9484 { /* while */
9485 return KEY_while;
9486 }
9487
9488 goto unknown;
9489
9490 case 'r':
9491 if (name[2] == 'i' &&
9492 name[3] == 't' &&
9493 name[4] == 'e')
9494 { /* write */
9495 return -KEY_write;
9496 }
9497
9498 goto unknown;
9499
9500 default:
9501 goto unknown;
9502 }
9503
9504 default:
9505 goto unknown;
e2e1dd5a 9506 }
4c3bbe0f
MHM
9507
9508 case 6: /* 33 tokens of length 6 */
9509 switch (name[0])
9510 {
9511 case 'a':
9512 if (name[1] == 'c' &&
9513 name[2] == 'c' &&
9514 name[3] == 'e' &&
9515 name[4] == 'p' &&
9516 name[5] == 't')
9517 { /* accept */
9518 return -KEY_accept;
9519 }
9520
9521 goto unknown;
9522
9523 case 'c':
9524 switch (name[1])
9525 {
9526 case 'a':
9527 if (name[2] == 'l' &&
9528 name[3] == 'l' &&
9529 name[4] == 'e' &&
9530 name[5] == 'r')
9531 { /* caller */
9532 return -KEY_caller;
9533 }
9534
9535 goto unknown;
9536
9537 case 'h':
9538 if (name[2] == 'r' &&
9539 name[3] == 'o' &&
9540 name[4] == 'o' &&
9541 name[5] == 't')
9542 { /* chroot */
9543 return -KEY_chroot;
9544 }
9545
9546 goto unknown;
9547
9548 default:
9549 goto unknown;
9550 }
9551
9552 case 'd':
9553 if (name[1] == 'e' &&
9554 name[2] == 'l' &&
9555 name[3] == 'e' &&
9556 name[4] == 't' &&
9557 name[5] == 'e')
9558 { /* delete */
9559 return KEY_delete;
9560 }
9561
9562 goto unknown;
9563
9564 case 'e':
9565 switch (name[1])
9566 {
9567 case 'l':
9568 if (name[2] == 's' &&
9569 name[3] == 'e' &&
9570 name[4] == 'i' &&
9571 name[5] == 'f')
9572 { /* elseif */
9b387841 9573 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9574 }
9575
9576 goto unknown;
9577
9578 case 'x':
9579 if (name[2] == 'i' &&
9580 name[3] == 's' &&
9581 name[4] == 't' &&
9582 name[5] == 's')
9583 { /* exists */
9584 return KEY_exists;
9585 }
9586
9587 goto unknown;
9588
9589 default:
9590 goto unknown;
9591 }
9592
9593 case 'f':
9594 switch (name[1])
9595 {
9596 case 'i':
9597 if (name[2] == 'l' &&
9598 name[3] == 'e' &&
9599 name[4] == 'n' &&
9600 name[5] == 'o')
9601 { /* fileno */
9602 return -KEY_fileno;
9603 }
9604
9605 goto unknown;
9606
9607 case 'o':
9608 if (name[2] == 'r' &&
9609 name[3] == 'm' &&
9610 name[4] == 'a' &&
9611 name[5] == 't')
9612 { /* format */
9613 return KEY_format;
9614 }
9615
9616 goto unknown;
9617
9618 default:
9619 goto unknown;
9620 }
9621
9622 case 'g':
9623 if (name[1] == 'm' &&
9624 name[2] == 't' &&
9625 name[3] == 'i' &&
9626 name[4] == 'm' &&
9627 name[5] == 'e')
9628 { /* gmtime */
9629 return -KEY_gmtime;
9630 }
9631
9632 goto unknown;
9633
9634 case 'l':
9635 switch (name[1])
9636 {
9637 case 'e':
9638 if (name[2] == 'n' &&
9639 name[3] == 'g' &&
9640 name[4] == 't' &&
9641 name[5] == 'h')
9642 { /* length */
9643 return -KEY_length;
9644 }
9645
9646 goto unknown;
9647
9648 case 'i':
9649 if (name[2] == 's' &&
9650 name[3] == 't' &&
9651 name[4] == 'e' &&
9652 name[5] == 'n')
9653 { /* listen */
9654 return -KEY_listen;
9655 }
9656
9657 goto unknown;
9658
9659 default:
9660 goto unknown;
9661 }
9662
9663 case 'm':
9664 if (name[1] == 's' &&
9665 name[2] == 'g')
9666 {
9667 switch (name[3])
9668 {
9669 case 'c':
9670 if (name[4] == 't' &&
9671 name[5] == 'l')
9672 { /* msgctl */
9673 return -KEY_msgctl;
9674 }
9675
9676 goto unknown;
9677
9678 case 'g':
9679 if (name[4] == 'e' &&
9680 name[5] == 't')
9681 { /* msgget */
9682 return -KEY_msgget;
9683 }
9684
9685 goto unknown;
9686
9687 case 'r':
9688 if (name[4] == 'c' &&
9689 name[5] == 'v')
9690 { /* msgrcv */
9691 return -KEY_msgrcv;
9692 }
9693
9694 goto unknown;
9695
9696 case 's':
9697 if (name[4] == 'n' &&
9698 name[5] == 'd')
9699 { /* msgsnd */
9700 return -KEY_msgsnd;
9701 }
9702
9703 goto unknown;
9704
9705 default:
9706 goto unknown;
9707 }
9708 }
9709
9710 goto unknown;
9711
9712 case 'p':
9713 if (name[1] == 'r' &&
9714 name[2] == 'i' &&
9715 name[3] == 'n' &&
9716 name[4] == 't' &&
9717 name[5] == 'f')
9718 { /* printf */
9719 return KEY_printf;
9720 }
9721
9722 goto unknown;
9723
9724 case 'r':
9725 switch (name[1])
9726 {
9727 case 'e':
9728 switch (name[2])
9729 {
9730 case 'n':
9731 if (name[3] == 'a' &&
9732 name[4] == 'm' &&
9733 name[5] == 'e')
9734 { /* rename */
9735 return -KEY_rename;
9736 }
9737
9738 goto unknown;
9739
9740 case 't':
9741 if (name[3] == 'u' &&
9742 name[4] == 'r' &&
9743 name[5] == 'n')
9744 { /* return */
9745 return KEY_return;
9746 }
9747
9748 goto unknown;
9749
9750 default:
9751 goto unknown;
9752 }
9753
9754 case 'i':
9755 if (name[2] == 'n' &&
9756 name[3] == 'd' &&
9757 name[4] == 'e' &&
9758 name[5] == 'x')
9759 { /* rindex */
9760 return -KEY_rindex;
9761 }
9762
9763 goto unknown;
9764
9765 default:
9766 goto unknown;
9767 }
9768
9769 case 's':
9770 switch (name[1])
9771 {
9772 case 'c':
9773 if (name[2] == 'a' &&
9774 name[3] == 'l' &&
9775 name[4] == 'a' &&
9776 name[5] == 'r')
9777 { /* scalar */
9778 return KEY_scalar;
9779 }
9780
9781 goto unknown;
9782
9783 case 'e':
9784 switch (name[2])
9785 {
9786 case 'l':
9787 if (name[3] == 'e' &&
9788 name[4] == 'c' &&
9789 name[5] == 't')
9790 { /* select */
9791 return -KEY_select;
9792 }
9793
9794 goto unknown;
9795
9796 case 'm':
9797 switch (name[3])
9798 {
9799 case 'c':
9800 if (name[4] == 't' &&
9801 name[5] == 'l')
9802 { /* semctl */
9803 return -KEY_semctl;
9804 }
9805
9806 goto unknown;
9807
9808 case 'g':
9809 if (name[4] == 'e' &&
9810 name[5] == 't')
9811 { /* semget */
9812 return -KEY_semget;
9813 }
9814
9815 goto unknown;
9816
9817 default:
9818 goto unknown;
9819 }
9820
9821 default:
9822 goto unknown;
9823 }
9824
9825 case 'h':
9826 if (name[2] == 'm')
9827 {
9828 switch (name[3])
9829 {
9830 case 'c':
9831 if (name[4] == 't' &&
9832 name[5] == 'l')
9833 { /* shmctl */
9834 return -KEY_shmctl;
9835 }
9836
9837 goto unknown;
9838
9839 case 'g':
9840 if (name[4] == 'e' &&
9841 name[5] == 't')
9842 { /* shmget */
9843 return -KEY_shmget;
9844 }
9845
9846 goto unknown;
9847
9848 default:
9849 goto unknown;
9850 }
9851 }
9852
9853 goto unknown;
9854
9855 case 'o':
9856 if (name[2] == 'c' &&
9857 name[3] == 'k' &&
9858 name[4] == 'e' &&
9859 name[5] == 't')
9860 { /* socket */
9861 return -KEY_socket;
9862 }
9863
9864 goto unknown;
9865
9866 case 'p':
9867 if (name[2] == 'l' &&
9868 name[3] == 'i' &&
9869 name[4] == 'c' &&
9870 name[5] == 'e')
9871 { /* splice */
9872 return -KEY_splice;
9873 }
9874
9875 goto unknown;
9876
9877 case 'u':
9878 if (name[2] == 'b' &&
9879 name[3] == 's' &&
9880 name[4] == 't' &&
9881 name[5] == 'r')
9882 { /* substr */
9883 return -KEY_substr;
9884 }
9885
9886 goto unknown;
9887
9888 case 'y':
9889 if (name[2] == 's' &&
9890 name[3] == 't' &&
9891 name[4] == 'e' &&
9892 name[5] == 'm')
9893 { /* system */
9894 return -KEY_system;
9895 }
9896
9897 goto unknown;
9898
9899 default:
9900 goto unknown;
9901 }
9902
9903 case 'u':
9904 if (name[1] == 'n')
9905 {
9906 switch (name[2])
9907 {
9908 case 'l':
9909 switch (name[3])
9910 {
9911 case 'e':
9912 if (name[4] == 's' &&
9913 name[5] == 's')
9914 { /* unless */
9915 return KEY_unless;
9916 }
9917
9918 goto unknown;
9919
9920 case 'i':
9921 if (name[4] == 'n' &&
9922 name[5] == 'k')
9923 { /* unlink */
9924 return -KEY_unlink;
9925 }
9926
9927 goto unknown;
9928
9929 default:
9930 goto unknown;
9931 }
9932
9933 case 'p':
9934 if (name[3] == 'a' &&
9935 name[4] == 'c' &&
9936 name[5] == 'k')
9937 { /* unpack */
9938 return -KEY_unpack;
9939 }
9940
9941 goto unknown;
9942
9943 default:
9944 goto unknown;
9945 }
9946 }
9947
9948 goto unknown;
9949
9950 case 'v':
9951 if (name[1] == 'a' &&
9952 name[2] == 'l' &&
9953 name[3] == 'u' &&
9954 name[4] == 'e' &&
9955 name[5] == 's')
9956 { /* values */
9957 return -KEY_values;
9958 }
9959
9960 goto unknown;
9961
9962 default:
9963 goto unknown;
e2e1dd5a 9964 }
4c3bbe0f 9965
0d863452 9966 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9967 switch (name[0])
9968 {
9969 case 'D':
9970 if (name[1] == 'E' &&
9971 name[2] == 'S' &&
9972 name[3] == 'T' &&
9973 name[4] == 'R' &&
9974 name[5] == 'O' &&
9975 name[6] == 'Y')
9976 { /* DESTROY */
9977 return KEY_DESTROY;
9978 }
9979
9980 goto unknown;
9981
9982 case '_':
9983 if (name[1] == '_' &&
9984 name[2] == 'E' &&
9985 name[3] == 'N' &&
9986 name[4] == 'D' &&
9987 name[5] == '_' &&
9988 name[6] == '_')
9989 { /* __END__ */
9990 return KEY___END__;
9991 }
9992
9993 goto unknown;
9994
9995 case 'b':
9996 if (name[1] == 'i' &&
9997 name[2] == 'n' &&
9998 name[3] == 'm' &&
9999 name[4] == 'o' &&
10000 name[5] == 'd' &&
10001 name[6] == 'e')
10002 { /* binmode */
10003 return -KEY_binmode;
10004 }
10005
10006 goto unknown;
10007
10008 case 'c':
10009 if (name[1] == 'o' &&
10010 name[2] == 'n' &&
10011 name[3] == 'n' &&
10012 name[4] == 'e' &&
10013 name[5] == 'c' &&
10014 name[6] == 't')
10015 { /* connect */
10016 return -KEY_connect;
10017 }
10018
10019 goto unknown;
10020
10021 case 'd':
10022 switch (name[1])
10023 {
10024 case 'b':
10025 if (name[2] == 'm' &&
10026 name[3] == 'o' &&
10027 name[4] == 'p' &&
10028 name[5] == 'e' &&
10029 name[6] == 'n')
10030 { /* dbmopen */
10031 return -KEY_dbmopen;
10032 }
10033
10034 goto unknown;
10035
10036 case 'e':
0d863452
RH
10037 if (name[2] == 'f')
10038 {
10039 switch (name[3])
10040 {
10041 case 'a':
10042 if (name[4] == 'u' &&
10043 name[5] == 'l' &&
10044 name[6] == 't')
10045 { /* default */
5458a98a 10046 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10047 }
10048
10049 goto unknown;
10050
10051 case 'i':
10052 if (name[4] == 'n' &&
952306ac
RGS
10053 name[5] == 'e' &&
10054 name[6] == 'd')
10055 { /* defined */
10056 return KEY_defined;
10057 }
4c3bbe0f 10058
952306ac 10059 goto unknown;
4c3bbe0f 10060
952306ac
RGS
10061 default:
10062 goto unknown;
10063 }
0d863452
RH
10064 }
10065
10066 goto unknown;
10067
10068 default:
10069 goto unknown;
10070 }
4c3bbe0f
MHM
10071
10072 case 'f':
10073 if (name[1] == 'o' &&
10074 name[2] == 'r' &&
10075 name[3] == 'e' &&
10076 name[4] == 'a' &&
10077 name[5] == 'c' &&
10078 name[6] == 'h')
10079 { /* foreach */
10080 return KEY_foreach;
10081 }
10082
10083 goto unknown;
10084
10085 case 'g':
10086 if (name[1] == 'e' &&
10087 name[2] == 't' &&
10088 name[3] == 'p')
10089 {
10090 switch (name[4])
10091 {
10092 case 'g':
10093 if (name[5] == 'r' &&
10094 name[6] == 'p')
10095 { /* getpgrp */
10096 return -KEY_getpgrp;
10097 }
10098
10099 goto unknown;
10100
10101 case 'p':
10102 if (name[5] == 'i' &&
10103 name[6] == 'd')
10104 { /* getppid */
10105 return -KEY_getppid;
10106 }
10107
10108 goto unknown;
10109
10110 default:
10111 goto unknown;
10112 }
10113 }
10114
10115 goto unknown;
10116
10117 case 'l':
10118 if (name[1] == 'c' &&
10119 name[2] == 'f' &&
10120 name[3] == 'i' &&
10121 name[4] == 'r' &&
10122 name[5] == 's' &&
10123 name[6] == 't')
10124 { /* lcfirst */
10125 return -KEY_lcfirst;
10126 }
10127
10128 goto unknown;
10129
10130 case 'o':
10131 if (name[1] == 'p' &&
10132 name[2] == 'e' &&
10133 name[3] == 'n' &&
10134 name[4] == 'd' &&
10135 name[5] == 'i' &&
10136 name[6] == 'r')
10137 { /* opendir */
10138 return -KEY_opendir;
10139 }
10140
10141 goto unknown;
10142
10143 case 'p':
10144 if (name[1] == 'a' &&
10145 name[2] == 'c' &&
10146 name[3] == 'k' &&
10147 name[4] == 'a' &&
10148 name[5] == 'g' &&
10149 name[6] == 'e')
10150 { /* package */
10151 return KEY_package;
10152 }
10153
10154 goto unknown;
10155
10156 case 'r':
10157 if (name[1] == 'e')
10158 {
10159 switch (name[2])
10160 {
10161 case 'a':
10162 if (name[3] == 'd' &&
10163 name[4] == 'd' &&
10164 name[5] == 'i' &&
10165 name[6] == 'r')
10166 { /* readdir */
10167 return -KEY_readdir;
10168 }
10169
10170 goto unknown;
10171
10172 case 'q':
10173 if (name[3] == 'u' &&
10174 name[4] == 'i' &&
10175 name[5] == 'r' &&
10176 name[6] == 'e')
10177 { /* require */
10178 return KEY_require;
10179 }
10180
10181 goto unknown;
10182
10183 case 'v':
10184 if (name[3] == 'e' &&
10185 name[4] == 'r' &&
10186 name[5] == 's' &&
10187 name[6] == 'e')
10188 { /* reverse */
10189 return -KEY_reverse;
10190 }
10191
10192 goto unknown;
10193
10194 default:
10195 goto unknown;
10196 }
10197 }
10198
10199 goto unknown;
10200
10201 case 's':
10202 switch (name[1])
10203 {
10204 case 'e':
10205 switch (name[2])
10206 {
10207 case 'e':
10208 if (name[3] == 'k' &&
10209 name[4] == 'd' &&
10210 name[5] == 'i' &&
10211 name[6] == 'r')
10212 { /* seekdir */
10213 return -KEY_seekdir;
10214 }
10215
10216 goto unknown;
10217
10218 case 't':
10219 if (name[3] == 'p' &&
10220 name[4] == 'g' &&
10221 name[5] == 'r' &&
10222 name[6] == 'p')
10223 { /* setpgrp */
10224 return -KEY_setpgrp;
10225 }
10226
10227 goto unknown;
10228
10229 default:
10230 goto unknown;
10231 }
10232
10233 case 'h':
10234 if (name[2] == 'm' &&
10235 name[3] == 'r' &&
10236 name[4] == 'e' &&
10237 name[5] == 'a' &&
10238 name[6] == 'd')
10239 { /* shmread */
10240 return -KEY_shmread;
10241 }
10242
10243 goto unknown;
10244
10245 case 'p':
10246 if (name[2] == 'r' &&
10247 name[3] == 'i' &&
10248 name[4] == 'n' &&
10249 name[5] == 't' &&
10250 name[6] == 'f')
10251 { /* sprintf */
10252 return -KEY_sprintf;
10253 }
10254
10255 goto unknown;
10256
10257 case 'y':
10258 switch (name[2])
10259 {
10260 case 'm':
10261 if (name[3] == 'l' &&
10262 name[4] == 'i' &&
10263 name[5] == 'n' &&
10264 name[6] == 'k')
10265 { /* symlink */
10266 return -KEY_symlink;
10267 }
10268
10269 goto unknown;
10270
10271 case 's':
10272 switch (name[3])
10273 {
10274 case 'c':
10275 if (name[4] == 'a' &&
10276 name[5] == 'l' &&
10277 name[6] == 'l')
10278 { /* syscall */
10279 return -KEY_syscall;
10280 }
10281
10282 goto unknown;
10283
10284 case 'o':
10285 if (name[4] == 'p' &&
10286 name[5] == 'e' &&
10287 name[6] == 'n')
10288 { /* sysopen */
10289 return -KEY_sysopen;
10290 }
10291
10292 goto unknown;
10293
10294 case 'r':
10295 if (name[4] == 'e' &&
10296 name[5] == 'a' &&
10297 name[6] == 'd')
10298 { /* sysread */
10299 return -KEY_sysread;
10300 }
10301
10302 goto unknown;
10303
10304 case 's':
10305 if (name[4] == 'e' &&
10306 name[5] == 'e' &&
10307 name[6] == 'k')
10308 { /* sysseek */
10309 return -KEY_sysseek;
10310 }
10311
10312 goto unknown;
10313
10314 default:
10315 goto unknown;
10316 }
10317
10318 default:
10319 goto unknown;
10320 }
10321
10322 default:
10323 goto unknown;
10324 }
10325
10326 case 't':
10327 if (name[1] == 'e' &&
10328 name[2] == 'l' &&
10329 name[3] == 'l' &&
10330 name[4] == 'd' &&
10331 name[5] == 'i' &&
10332 name[6] == 'r')
10333 { /* telldir */
10334 return -KEY_telldir;
10335 }
10336
10337 goto unknown;
10338
10339 case 'u':
10340 switch (name[1])
10341 {
10342 case 'c':
10343 if (name[2] == 'f' &&
10344 name[3] == 'i' &&
10345 name[4] == 'r' &&
10346 name[5] == 's' &&
10347 name[6] == 't')
10348 { /* ucfirst */
10349 return -KEY_ucfirst;
10350 }
10351
10352 goto unknown;
10353
10354 case 'n':
10355 if (name[2] == 's' &&
10356 name[3] == 'h' &&
10357 name[4] == 'i' &&
10358 name[5] == 'f' &&
10359 name[6] == 't')
10360 { /* unshift */
10361 return -KEY_unshift;
10362 }
10363
10364 goto unknown;
10365
10366 default:
10367 goto unknown;
10368 }
10369
10370 case 'w':
10371 if (name[1] == 'a' &&
10372 name[2] == 'i' &&
10373 name[3] == 't' &&
10374 name[4] == 'p' &&
10375 name[5] == 'i' &&
10376 name[6] == 'd')
10377 { /* waitpid */
10378 return -KEY_waitpid;
10379 }
10380
10381 goto unknown;
10382
10383 default:
10384 goto unknown;
10385 }
10386
10387 case 8: /* 26 tokens of length 8 */
10388 switch (name[0])
10389 {
10390 case 'A':
10391 if (name[1] == 'U' &&
10392 name[2] == 'T' &&
10393 name[3] == 'O' &&
10394 name[4] == 'L' &&
10395 name[5] == 'O' &&
10396 name[6] == 'A' &&
10397 name[7] == 'D')
10398 { /* AUTOLOAD */
10399 return KEY_AUTOLOAD;
10400 }
10401
10402 goto unknown;
10403
10404 case '_':
10405 if (name[1] == '_')
10406 {
10407 switch (name[2])
10408 {
10409 case 'D':
10410 if (name[3] == 'A' &&
10411 name[4] == 'T' &&
10412 name[5] == 'A' &&
10413 name[6] == '_' &&
10414 name[7] == '_')
10415 { /* __DATA__ */
10416 return KEY___DATA__;
10417 }
10418
10419 goto unknown;
10420
10421 case 'F':
10422 if (name[3] == 'I' &&
10423 name[4] == 'L' &&
10424 name[5] == 'E' &&
10425 name[6] == '_' &&
10426 name[7] == '_')
10427 { /* __FILE__ */
10428 return -KEY___FILE__;
10429 }
10430
10431 goto unknown;
10432
10433 case 'L':
10434 if (name[3] == 'I' &&
10435 name[4] == 'N' &&
10436 name[5] == 'E' &&
10437 name[6] == '_' &&
10438 name[7] == '_')
10439 { /* __LINE__ */
10440 return -KEY___LINE__;
10441 }
10442
10443 goto unknown;
10444
10445 default:
10446 goto unknown;
10447 }
10448 }
10449
10450 goto unknown;
10451
10452 case 'c':
10453 switch (name[1])
10454 {
10455 case 'l':
10456 if (name[2] == 'o' &&
10457 name[3] == 's' &&
10458 name[4] == 'e' &&
10459 name[5] == 'd' &&
10460 name[6] == 'i' &&
10461 name[7] == 'r')
10462 { /* closedir */
10463 return -KEY_closedir;
10464 }
10465
10466 goto unknown;
10467
10468 case 'o':
10469 if (name[2] == 'n' &&
10470 name[3] == 't' &&
10471 name[4] == 'i' &&
10472 name[5] == 'n' &&
10473 name[6] == 'u' &&
10474 name[7] == 'e')
10475 { /* continue */
10476 return -KEY_continue;
10477 }
10478
10479 goto unknown;
10480
10481 default:
10482 goto unknown;
10483 }
10484
10485 case 'd':
10486 if (name[1] == 'b' &&
10487 name[2] == 'm' &&
10488 name[3] == 'c' &&
10489 name[4] == 'l' &&
10490 name[5] == 'o' &&
10491 name[6] == 's' &&
10492 name[7] == 'e')
10493 { /* dbmclose */
10494 return -KEY_dbmclose;
10495 }
10496
10497 goto unknown;
10498
10499 case 'e':
10500 if (name[1] == 'n' &&
10501 name[2] == 'd')
10502 {
10503 switch (name[3])
10504 {
10505 case 'g':
10506 if (name[4] == 'r' &&
10507 name[5] == 'e' &&
10508 name[6] == 'n' &&
10509 name[7] == 't')
10510 { /* endgrent */
10511 return -KEY_endgrent;
10512 }
10513
10514 goto unknown;
10515
10516 case 'p':
10517 if (name[4] == 'w' &&
10518 name[5] == 'e' &&
10519 name[6] == 'n' &&
10520 name[7] == 't')
10521 { /* endpwent */
10522 return -KEY_endpwent;
10523 }
10524
10525 goto unknown;
10526
10527 default:
10528 goto unknown;
10529 }
10530 }
10531
10532 goto unknown;
10533
10534 case 'f':
10535 if (name[1] == 'o' &&
10536 name[2] == 'r' &&
10537 name[3] == 'm' &&
10538 name[4] == 'l' &&
10539 name[5] == 'i' &&
10540 name[6] == 'n' &&
10541 name[7] == 'e')
10542 { /* formline */
10543 return -KEY_formline;
10544 }
10545
10546 goto unknown;
10547
10548 case 'g':
10549 if (name[1] == 'e' &&
10550 name[2] == 't')
10551 {
10552 switch (name[3])
10553 {
10554 case 'g':
10555 if (name[4] == 'r')
10556 {
10557 switch (name[5])
10558 {
10559 case 'e':
10560 if (name[6] == 'n' &&
10561 name[7] == 't')
10562 { /* getgrent */
10563 return -KEY_getgrent;
10564 }
10565
10566 goto unknown;
10567
10568 case 'g':
10569 if (name[6] == 'i' &&
10570 name[7] == 'd')
10571 { /* getgrgid */
10572 return -KEY_getgrgid;
10573 }
10574
10575 goto unknown;
10576
10577 case 'n':
10578 if (name[6] == 'a' &&
10579 name[7] == 'm')
10580 { /* getgrnam */
10581 return -KEY_getgrnam;
10582 }
10583
10584 goto unknown;
10585
10586 default:
10587 goto unknown;
10588 }
10589 }
10590
10591 goto unknown;
10592
10593 case 'l':
10594 if (name[4] == 'o' &&
10595 name[5] == 'g' &&
10596 name[6] == 'i' &&
10597 name[7] == 'n')
10598 { /* getlogin */
10599 return -KEY_getlogin;
10600 }
10601
10602 goto unknown;
10603
10604 case 'p':
10605 if (name[4] == 'w')
10606 {
10607 switch (name[5])
10608 {
10609 case 'e':
10610 if (name[6] == 'n' &&
10611 name[7] == 't')
10612 { /* getpwent */
10613 return -KEY_getpwent;
10614 }
10615
10616 goto unknown;
10617
10618 case 'n':
10619 if (name[6] == 'a' &&
10620 name[7] == 'm')
10621 { /* getpwnam */
10622 return -KEY_getpwnam;
10623 }
10624
10625 goto unknown;
10626
10627 case 'u':
10628 if (name[6] == 'i' &&
10629 name[7] == 'd')
10630 { /* getpwuid */
10631 return -KEY_getpwuid;
10632 }
10633
10634 goto unknown;
10635
10636 default:
10637 goto unknown;
10638 }
10639 }
10640
10641 goto unknown;
10642
10643 default:
10644 goto unknown;
10645 }
10646 }
10647
10648 goto unknown;
10649
10650 case 'r':
10651 if (name[1] == 'e' &&
10652 name[2] == 'a' &&
10653 name[3] == 'd')
10654 {
10655 switch (name[4])
10656 {
10657 case 'l':
10658 if (name[5] == 'i' &&
10659 name[6] == 'n')
10660 {
10661 switch (name[7])
10662 {
10663 case 'e':
10664 { /* readline */
10665 return -KEY_readline;
10666 }
10667
4c3bbe0f
MHM
10668 case 'k':
10669 { /* readlink */
10670 return -KEY_readlink;
10671 }
10672
4c3bbe0f
MHM
10673 default:
10674 goto unknown;
10675 }
10676 }
10677
10678 goto unknown;
10679
10680 case 'p':
10681 if (name[5] == 'i' &&
10682 name[6] == 'p' &&
10683 name[7] == 'e')
10684 { /* readpipe */
10685 return -KEY_readpipe;
10686 }
10687
10688 goto unknown;
10689
10690 default:
10691 goto unknown;
10692 }
10693 }
10694
10695 goto unknown;
10696
10697 case 's':
10698 switch (name[1])
10699 {
10700 case 'e':
10701 if (name[2] == 't')
10702 {
10703 switch (name[3])
10704 {
10705 case 'g':
10706 if (name[4] == 'r' &&
10707 name[5] == 'e' &&
10708 name[6] == 'n' &&
10709 name[7] == 't')
10710 { /* setgrent */
10711 return -KEY_setgrent;
10712 }
10713
10714 goto unknown;
10715
10716 case 'p':
10717 if (name[4] == 'w' &&
10718 name[5] == 'e' &&
10719 name[6] == 'n' &&
10720 name[7] == 't')
10721 { /* setpwent */
10722 return -KEY_setpwent;
10723 }
10724
10725 goto unknown;
10726
10727 default:
10728 goto unknown;
10729 }
10730 }
10731
10732 goto unknown;
10733
10734 case 'h':
10735 switch (name[2])
10736 {
10737 case 'm':
10738 if (name[3] == 'w' &&
10739 name[4] == 'r' &&
10740 name[5] == 'i' &&
10741 name[6] == 't' &&
10742 name[7] == 'e')
10743 { /* shmwrite */
10744 return -KEY_shmwrite;
10745 }
10746
10747 goto unknown;
10748
10749 case 'u':
10750 if (name[3] == 't' &&
10751 name[4] == 'd' &&
10752 name[5] == 'o' &&
10753 name[6] == 'w' &&
10754 name[7] == 'n')
10755 { /* shutdown */
10756 return -KEY_shutdown;
10757 }
10758
10759 goto unknown;
10760
10761 default:
10762 goto unknown;
10763 }
10764
10765 case 'y':
10766 if (name[2] == 's' &&
10767 name[3] == 'w' &&
10768 name[4] == 'r' &&
10769 name[5] == 'i' &&
10770 name[6] == 't' &&
10771 name[7] == 'e')
10772 { /* syswrite */
10773 return -KEY_syswrite;
10774 }
10775
10776 goto unknown;
10777
10778 default:
10779 goto unknown;
10780 }
10781
10782 case 't':
10783 if (name[1] == 'r' &&
10784 name[2] == 'u' &&
10785 name[3] == 'n' &&
10786 name[4] == 'c' &&
10787 name[5] == 'a' &&
10788 name[6] == 't' &&
10789 name[7] == 'e')
10790 { /* truncate */
10791 return -KEY_truncate;
10792 }
10793
10794 goto unknown;
10795
10796 default:
10797 goto unknown;
10798 }
10799
3c10abe3 10800 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10801 switch (name[0])
10802 {
3c10abe3
AG
10803 case 'U':
10804 if (name[1] == 'N' &&
10805 name[2] == 'I' &&
10806 name[3] == 'T' &&
10807 name[4] == 'C' &&
10808 name[5] == 'H' &&
10809 name[6] == 'E' &&
10810 name[7] == 'C' &&
10811 name[8] == 'K')
10812 { /* UNITCHECK */
10813 return KEY_UNITCHECK;
10814 }
10815
10816 goto unknown;
10817
4c3bbe0f
MHM
10818 case 'e':
10819 if (name[1] == 'n' &&
10820 name[2] == 'd' &&
10821 name[3] == 'n' &&
10822 name[4] == 'e' &&
10823 name[5] == 't' &&
10824 name[6] == 'e' &&
10825 name[7] == 'n' &&
10826 name[8] == 't')
10827 { /* endnetent */
10828 return -KEY_endnetent;
10829 }
10830
10831 goto unknown;
10832
10833 case 'g':
10834 if (name[1] == 'e' &&
10835 name[2] == 't' &&
10836 name[3] == 'n' &&
10837 name[4] == 'e' &&
10838 name[5] == 't' &&
10839 name[6] == 'e' &&
10840 name[7] == 'n' &&
10841 name[8] == 't')
10842 { /* getnetent */
10843 return -KEY_getnetent;
10844 }
10845
10846 goto unknown;
10847
10848 case 'l':
10849 if (name[1] == 'o' &&
10850 name[2] == 'c' &&
10851 name[3] == 'a' &&
10852 name[4] == 'l' &&
10853 name[5] == 't' &&
10854 name[6] == 'i' &&
10855 name[7] == 'm' &&
10856 name[8] == 'e')
10857 { /* localtime */
10858 return -KEY_localtime;
10859 }
10860
10861 goto unknown;
10862
10863 case 'p':
10864 if (name[1] == 'r' &&
10865 name[2] == 'o' &&
10866 name[3] == 't' &&
10867 name[4] == 'o' &&
10868 name[5] == 't' &&
10869 name[6] == 'y' &&
10870 name[7] == 'p' &&
10871 name[8] == 'e')
10872 { /* prototype */
10873 return KEY_prototype;
10874 }
10875
10876 goto unknown;
10877
10878 case 'q':
10879 if (name[1] == 'u' &&
10880 name[2] == 'o' &&
10881 name[3] == 't' &&
10882 name[4] == 'e' &&
10883 name[5] == 'm' &&
10884 name[6] == 'e' &&
10885 name[7] == 't' &&
10886 name[8] == 'a')
10887 { /* quotemeta */
10888 return -KEY_quotemeta;
10889 }
10890
10891 goto unknown;
10892
10893 case 'r':
10894 if (name[1] == 'e' &&
10895 name[2] == 'w' &&
10896 name[3] == 'i' &&
10897 name[4] == 'n' &&
10898 name[5] == 'd' &&
10899 name[6] == 'd' &&
10900 name[7] == 'i' &&
10901 name[8] == 'r')
10902 { /* rewinddir */
10903 return -KEY_rewinddir;
10904 }
10905
10906 goto unknown;
10907
10908 case 's':
10909 if (name[1] == 'e' &&
10910 name[2] == 't' &&
10911 name[3] == 'n' &&
10912 name[4] == 'e' &&
10913 name[5] == 't' &&
10914 name[6] == 'e' &&
10915 name[7] == 'n' &&
10916 name[8] == 't')
10917 { /* setnetent */
10918 return -KEY_setnetent;
10919 }
10920
10921 goto unknown;
10922
10923 case 'w':
10924 if (name[1] == 'a' &&
10925 name[2] == 'n' &&
10926 name[3] == 't' &&
10927 name[4] == 'a' &&
10928 name[5] == 'r' &&
10929 name[6] == 'r' &&
10930 name[7] == 'a' &&
10931 name[8] == 'y')
10932 { /* wantarray */
10933 return -KEY_wantarray;
10934 }
10935
10936 goto unknown;
10937
10938 default:
10939 goto unknown;
10940 }
10941
10942 case 10: /* 9 tokens of length 10 */
10943 switch (name[0])
10944 {
10945 case 'e':
10946 if (name[1] == 'n' &&
10947 name[2] == 'd')
10948 {
10949 switch (name[3])
10950 {
10951 case 'h':
10952 if (name[4] == 'o' &&
10953 name[5] == 's' &&
10954 name[6] == 't' &&
10955 name[7] == 'e' &&
10956 name[8] == 'n' &&
10957 name[9] == 't')
10958 { /* endhostent */
10959 return -KEY_endhostent;
10960 }
10961
10962 goto unknown;
10963
10964 case 's':
10965 if (name[4] == 'e' &&
10966 name[5] == 'r' &&
10967 name[6] == 'v' &&
10968 name[7] == 'e' &&
10969 name[8] == 'n' &&
10970 name[9] == 't')
10971 { /* endservent */
10972 return -KEY_endservent;
10973 }
10974
10975 goto unknown;
10976
10977 default:
10978 goto unknown;
10979 }
10980 }
10981
10982 goto unknown;
10983
10984 case 'g':
10985 if (name[1] == 'e' &&
10986 name[2] == 't')
10987 {
10988 switch (name[3])
10989 {
10990 case 'h':
10991 if (name[4] == 'o' &&
10992 name[5] == 's' &&
10993 name[6] == 't' &&
10994 name[7] == 'e' &&
10995 name[8] == 'n' &&
10996 name[9] == 't')
10997 { /* gethostent */
10998 return -KEY_gethostent;
10999 }
11000
11001 goto unknown;
11002
11003 case 's':
11004 switch (name[4])
11005 {
11006 case 'e':
11007 if (name[5] == 'r' &&
11008 name[6] == 'v' &&
11009 name[7] == 'e' &&
11010 name[8] == 'n' &&
11011 name[9] == 't')
11012 { /* getservent */
11013 return -KEY_getservent;
11014 }
11015
11016 goto unknown;
11017
11018 case 'o':
11019 if (name[5] == 'c' &&
11020 name[6] == 'k' &&
11021 name[7] == 'o' &&
11022 name[8] == 'p' &&
11023 name[9] == 't')
11024 { /* getsockopt */
11025 return -KEY_getsockopt;
11026 }
11027
11028 goto unknown;
11029
11030 default:
11031 goto unknown;
11032 }
11033
11034 default:
11035 goto unknown;
11036 }
11037 }
11038
11039 goto unknown;
11040
11041 case 's':
11042 switch (name[1])
11043 {
11044 case 'e':
11045 if (name[2] == 't')
11046 {
11047 switch (name[3])
11048 {
11049 case 'h':
11050 if (name[4] == 'o' &&
11051 name[5] == 's' &&
11052 name[6] == 't' &&
11053 name[7] == 'e' &&
11054 name[8] == 'n' &&
11055 name[9] == 't')
11056 { /* sethostent */
11057 return -KEY_sethostent;
11058 }
11059
11060 goto unknown;
11061
11062 case 's':
11063 switch (name[4])
11064 {
11065 case 'e':
11066 if (name[5] == 'r' &&
11067 name[6] == 'v' &&
11068 name[7] == 'e' &&
11069 name[8] == 'n' &&
11070 name[9] == 't')
11071 { /* setservent */
11072 return -KEY_setservent;
11073 }
11074
11075 goto unknown;
11076
11077 case 'o':
11078 if (name[5] == 'c' &&
11079 name[6] == 'k' &&
11080 name[7] == 'o' &&
11081 name[8] == 'p' &&
11082 name[9] == 't')
11083 { /* setsockopt */
11084 return -KEY_setsockopt;
11085 }
11086
11087 goto unknown;
11088
11089 default:
11090 goto unknown;
11091 }
11092
11093 default:
11094 goto unknown;
11095 }
11096 }
11097
11098 goto unknown;
11099
11100 case 'o':
11101 if (name[2] == 'c' &&
11102 name[3] == 'k' &&
11103 name[4] == 'e' &&
11104 name[5] == 't' &&
11105 name[6] == 'p' &&
11106 name[7] == 'a' &&
11107 name[8] == 'i' &&
11108 name[9] == 'r')
11109 { /* socketpair */
11110 return -KEY_socketpair;
11111 }
11112
11113 goto unknown;
11114
11115 default:
11116 goto unknown;
11117 }
11118
11119 default:
11120 goto unknown;
e2e1dd5a 11121 }
4c3bbe0f
MHM
11122
11123 case 11: /* 8 tokens of length 11 */
11124 switch (name[0])
11125 {
11126 case '_':
11127 if (name[1] == '_' &&
11128 name[2] == 'P' &&
11129 name[3] == 'A' &&
11130 name[4] == 'C' &&
11131 name[5] == 'K' &&
11132 name[6] == 'A' &&
11133 name[7] == 'G' &&
11134 name[8] == 'E' &&
11135 name[9] == '_' &&
11136 name[10] == '_')
11137 { /* __PACKAGE__ */
11138 return -KEY___PACKAGE__;
11139 }
11140
11141 goto unknown;
11142
11143 case 'e':
11144 if (name[1] == 'n' &&
11145 name[2] == 'd' &&
11146 name[3] == 'p' &&
11147 name[4] == 'r' &&
11148 name[5] == 'o' &&
11149 name[6] == 't' &&
11150 name[7] == 'o' &&
11151 name[8] == 'e' &&
11152 name[9] == 'n' &&
11153 name[10] == 't')
11154 { /* endprotoent */
11155 return -KEY_endprotoent;
11156 }
11157
11158 goto unknown;
11159
11160 case 'g':
11161 if (name[1] == 'e' &&
11162 name[2] == 't')
11163 {
11164 switch (name[3])
11165 {
11166 case 'p':
11167 switch (name[4])
11168 {
11169 case 'e':
11170 if (name[5] == 'e' &&
11171 name[6] == 'r' &&
11172 name[7] == 'n' &&
11173 name[8] == 'a' &&
11174 name[9] == 'm' &&
11175 name[10] == 'e')
11176 { /* getpeername */
11177 return -KEY_getpeername;
11178 }
11179
11180 goto unknown;
11181
11182 case 'r':
11183 switch (name[5])
11184 {
11185 case 'i':
11186 if (name[6] == 'o' &&
11187 name[7] == 'r' &&
11188 name[8] == 'i' &&
11189 name[9] == 't' &&
11190 name[10] == 'y')
11191 { /* getpriority */
11192 return -KEY_getpriority;
11193 }
11194
11195 goto unknown;
11196
11197 case 'o':
11198 if (name[6] == 't' &&
11199 name[7] == 'o' &&
11200 name[8] == 'e' &&
11201 name[9] == 'n' &&
11202 name[10] == 't')
11203 { /* getprotoent */
11204 return -KEY_getprotoent;
11205 }
11206
11207 goto unknown;
11208
11209 default:
11210 goto unknown;
11211 }
11212
11213 default:
11214 goto unknown;
11215 }
11216
11217 case 's':
11218 if (name[4] == 'o' &&
11219 name[5] == 'c' &&
11220 name[6] == 'k' &&
11221 name[7] == 'n' &&
11222 name[8] == 'a' &&
11223 name[9] == 'm' &&
11224 name[10] == 'e')
11225 { /* getsockname */
11226 return -KEY_getsockname;
11227 }
11228
11229 goto unknown;
11230
11231 default:
11232 goto unknown;
11233 }
11234 }
11235
11236 goto unknown;
11237
11238 case 's':
11239 if (name[1] == 'e' &&
11240 name[2] == 't' &&
11241 name[3] == 'p' &&
11242 name[4] == 'r')
11243 {
11244 switch (name[5])
11245 {
11246 case 'i':
11247 if (name[6] == 'o' &&
11248 name[7] == 'r' &&
11249 name[8] == 'i' &&
11250 name[9] == 't' &&
11251 name[10] == 'y')
11252 { /* setpriority */
11253 return -KEY_setpriority;
11254 }
11255
11256 goto unknown;
11257
11258 case 'o':
11259 if (name[6] == 't' &&
11260 name[7] == 'o' &&
11261 name[8] == 'e' &&
11262 name[9] == 'n' &&
11263 name[10] == 't')
11264 { /* setprotoent */
11265 return -KEY_setprotoent;
11266 }
11267
11268 goto unknown;
11269
11270 default:
11271 goto unknown;
11272 }
11273 }
11274
11275 goto unknown;
11276
11277 default:
11278 goto unknown;
e2e1dd5a 11279 }
4c3bbe0f
MHM
11280
11281 case 12: /* 2 tokens of length 12 */
11282 if (name[0] == 'g' &&
11283 name[1] == 'e' &&
11284 name[2] == 't' &&
11285 name[3] == 'n' &&
11286 name[4] == 'e' &&
11287 name[5] == 't' &&
11288 name[6] == 'b' &&
11289 name[7] == 'y')
11290 {
11291 switch (name[8])
11292 {
11293 case 'a':
11294 if (name[9] == 'd' &&
11295 name[10] == 'd' &&
11296 name[11] == 'r')
11297 { /* getnetbyaddr */
11298 return -KEY_getnetbyaddr;
11299 }
11300
11301 goto unknown;
11302
11303 case 'n':
11304 if (name[9] == 'a' &&
11305 name[10] == 'm' &&
11306 name[11] == 'e')
11307 { /* getnetbyname */
11308 return -KEY_getnetbyname;
11309 }
11310
11311 goto unknown;
11312
11313 default:
11314 goto unknown;
11315 }
e2e1dd5a 11316 }
4c3bbe0f
MHM
11317
11318 goto unknown;
11319
11320 case 13: /* 4 tokens of length 13 */
11321 if (name[0] == 'g' &&
11322 name[1] == 'e' &&
11323 name[2] == 't')
11324 {
11325 switch (name[3])
11326 {
11327 case 'h':
11328 if (name[4] == 'o' &&
11329 name[5] == 's' &&
11330 name[6] == 't' &&
11331 name[7] == 'b' &&
11332 name[8] == 'y')
11333 {
11334 switch (name[9])
11335 {
11336 case 'a':
11337 if (name[10] == 'd' &&
11338 name[11] == 'd' &&
11339 name[12] == 'r')
11340 { /* gethostbyaddr */
11341 return -KEY_gethostbyaddr;
11342 }
11343
11344 goto unknown;
11345
11346 case 'n':
11347 if (name[10] == 'a' &&
11348 name[11] == 'm' &&
11349 name[12] == 'e')
11350 { /* gethostbyname */
11351 return -KEY_gethostbyname;
11352 }
11353
11354 goto unknown;
11355
11356 default:
11357 goto unknown;
11358 }
11359 }
11360
11361 goto unknown;
11362
11363 case 's':
11364 if (name[4] == 'e' &&
11365 name[5] == 'r' &&
11366 name[6] == 'v' &&
11367 name[7] == 'b' &&
11368 name[8] == 'y')
11369 {
11370 switch (name[9])
11371 {
11372 case 'n':
11373 if (name[10] == 'a' &&
11374 name[11] == 'm' &&
11375 name[12] == 'e')
11376 { /* getservbyname */
11377 return -KEY_getservbyname;
11378 }
11379
11380 goto unknown;
11381
11382 case 'p':
11383 if (name[10] == 'o' &&
11384 name[11] == 'r' &&
11385 name[12] == 't')
11386 { /* getservbyport */
11387 return -KEY_getservbyport;
11388 }
11389
11390 goto unknown;
11391
11392 default:
11393 goto unknown;
11394 }
11395 }
11396
11397 goto unknown;
11398
11399 default:
11400 goto unknown;
11401 }
e2e1dd5a 11402 }
4c3bbe0f
MHM
11403
11404 goto unknown;
11405
11406 case 14: /* 1 tokens of length 14 */
11407 if (name[0] == 'g' &&
11408 name[1] == 'e' &&
11409 name[2] == 't' &&
11410 name[3] == 'p' &&
11411 name[4] == 'r' &&
11412 name[5] == 'o' &&
11413 name[6] == 't' &&
11414 name[7] == 'o' &&
11415 name[8] == 'b' &&
11416 name[9] == 'y' &&
11417 name[10] == 'n' &&
11418 name[11] == 'a' &&
11419 name[12] == 'm' &&
11420 name[13] == 'e')
11421 { /* getprotobyname */
11422 return -KEY_getprotobyname;
11423 }
11424
11425 goto unknown;
11426
11427 case 16: /* 1 tokens of length 16 */
11428 if (name[0] == 'g' &&
11429 name[1] == 'e' &&
11430 name[2] == 't' &&
11431 name[3] == 'p' &&
11432 name[4] == 'r' &&
11433 name[5] == 'o' &&
11434 name[6] == 't' &&
11435 name[7] == 'o' &&
11436 name[8] == 'b' &&
11437 name[9] == 'y' &&
11438 name[10] == 'n' &&
11439 name[11] == 'u' &&
11440 name[12] == 'm' &&
11441 name[13] == 'b' &&
11442 name[14] == 'e' &&
11443 name[15] == 'r')
11444 { /* getprotobynumber */
11445 return -KEY_getprotobynumber;
11446 }
11447
11448 goto unknown;
11449
11450 default:
11451 goto unknown;
e2e1dd5a 11452 }
4c3bbe0f
MHM
11453
11454unknown:
e2e1dd5a 11455 return 0;
a687059c
LW
11456}
11457
76e3520e 11458STATIC void
c94115d8 11459S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11460{
97aff369 11461 dVAR;
2f3197b3 11462
7918f24d
NC
11463 PERL_ARGS_ASSERT_CHECKCOMMA;
11464
d008e5eb 11465 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11466 if (ckWARN(WARN_SYNTAX)) {
11467 int level = 1;
26ff0806 11468 const char *w;
d008e5eb
GS
11469 for (w = s+2; *w && level; w++) {
11470 if (*w == '(')
11471 ++level;
11472 else if (*w == ')')
11473 --level;
11474 }
888fea98
NC
11475 while (isSPACE(*w))
11476 ++w;
b1439985
RGS
11477 /* the list of chars below is for end of statements or
11478 * block / parens, boolean operators (&&, ||, //) and branch
11479 * constructs (or, and, if, until, unless, while, err, for).
11480 * Not a very solid hack... */
11481 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11482 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11483 "%s (...) interpreted as function",name);
d008e5eb 11484 }
2f3197b3 11485 }
3280af22 11486 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11487 s++;
a687059c
LW
11488 if (*s == '(')
11489 s++;
3280af22 11490 while (s < PL_bufend && isSPACE(*s))
a687059c 11491 s++;
7e2040f0 11492 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11493 const char * const w = s++;
7e2040f0 11494 while (isALNUM_lazy_if(s,UTF))
a687059c 11495 s++;
3280af22 11496 while (s < PL_bufend && isSPACE(*s))
a687059c 11497 s++;
e929a76b 11498 if (*s == ',') {
c94115d8 11499 GV* gv;
5458a98a 11500 if (keyword(w, s - w, 0))
e929a76b 11501 return;
c94115d8
NC
11502
11503 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11504 if (gv && GvCVu(gv))
abbb3198 11505 return;
cea2e8a9 11506 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11507 }
11508 }
11509}
11510
423cee85
JH
11511/* Either returns sv, or mortalizes sv and returns a new SV*.
11512 Best used as sv=new_constant(..., sv, ...).
11513 If s, pv are NULL, calls subroutine with one argument,
11514 and type is used with error messages only. */
11515
b3ac6de7 11516STATIC SV *
eb0d8d16
NC
11517S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11518 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11519{
27da23d5 11520 dVAR; dSP;
890ce7af 11521 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11522 SV *res;
b3ac6de7
IZ
11523 SV **cvp;
11524 SV *cv, *typesv;
89e33a05 11525 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11526
7918f24d
NC
11527 PERL_ARGS_ASSERT_NEW_CONSTANT;
11528
f0af216f 11529 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11530 SV *msg;
11531
10edeb5d
JH
11532 why2 = (const char *)
11533 (strEQ(key,"charnames")
11534 ? "(possibly a missing \"use charnames ...\")"
11535 : "");
4e553d73 11536 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11537 (type ? type: "undef"), why2);
11538
11539 /* This is convoluted and evil ("goto considered harmful")
11540 * but I do not understand the intricacies of all the different
11541 * failure modes of %^H in here. The goal here is to make
11542 * the most probable error message user-friendly. --jhi */
11543
11544 goto msgdone;
11545
423cee85 11546 report:
4e553d73 11547 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11548 (type ? type: "undef"), why1, why2, why3);
41ab332f 11549 msgdone:
95a20fc0 11550 yyerror(SvPVX_const(msg));
423cee85
JH
11551 SvREFCNT_dec(msg);
11552 return sv;
11553 }
ff3f963a
KW
11554
11555 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11556 if (PL_error_count > 0 && strEQ(key,"charnames"))
11557 return &PL_sv_undef;
ff3f963a 11558
eb0d8d16 11559 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11560 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11561 why1 = "$^H{";
11562 why2 = key;
f0af216f 11563 why3 = "} is not defined";
423cee85 11564 goto report;
b3ac6de7
IZ
11565 }
11566 sv_2mortal(sv); /* Parent created it permanently */
11567 cv = *cvp;
423cee85 11568 if (!pv && s)
59cd0e26 11569 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11570 if (type && pv)
59cd0e26 11571 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11572 else
423cee85 11573 typesv = &PL_sv_undef;
4e553d73 11574
e788e7d3 11575 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11576 ENTER ;
11577 SAVETMPS;
4e553d73 11578
423cee85 11579 PUSHMARK(SP) ;
a5845cb7 11580 EXTEND(sp, 3);
423cee85
JH
11581 if (pv)
11582 PUSHs(pv);
b3ac6de7 11583 PUSHs(sv);
423cee85
JH
11584 if (pv)
11585 PUSHs(typesv);
b3ac6de7 11586 PUTBACK;
423cee85 11587 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11588
423cee85 11589 SPAGAIN ;
4e553d73 11590
423cee85 11591 /* Check the eval first */
9b0e499b 11592 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11593 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11594 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11595 (void)POPs;
b37c2d43 11596 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11597 }
11598 else {
11599 res = POPs;
b37c2d43 11600 SvREFCNT_inc_simple_void(res);
423cee85 11601 }
4e553d73 11602
423cee85
JH
11603 PUTBACK ;
11604 FREETMPS ;
11605 LEAVE ;
b3ac6de7 11606 POPSTACK;
4e553d73 11607
b3ac6de7 11608 if (!SvOK(res)) {
423cee85
JH
11609 why1 = "Call to &{$^H{";
11610 why2 = key;
f0af216f 11611 why3 = "}} did not return a defined value";
423cee85
JH
11612 sv = res;
11613 goto report;
9b0e499b 11614 }
423cee85 11615
9b0e499b 11616 return res;
b3ac6de7 11617}
4e553d73 11618
d0a148a6
NC
11619/* Returns a NUL terminated string, with the length of the string written to
11620 *slp
11621 */
76e3520e 11622STATIC char *
cea2e8a9 11623S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11624{
97aff369 11625 dVAR;
463ee0b2 11626 register char *d = dest;
890ce7af 11627 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11628
11629 PERL_ARGS_ASSERT_SCAN_WORD;
11630
463ee0b2 11631 for (;;) {
8903cb82 11632 if (d >= e)
cea2e8a9 11633 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11634 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11635 *d++ = *s++;
c35e046a 11636 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11637 *d++ = ':';
11638 *d++ = ':';
11639 s++;
11640 }
c35e046a 11641 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11642 *d++ = *s++;
11643 *d++ = *s++;
11644 }
fd400ab9 11645 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11646 char *t = s + UTF8SKIP(s);
c35e046a 11647 size_t len;
fd400ab9 11648 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11649 t += UTF8SKIP(t);
c35e046a
AL
11650 len = t - s;
11651 if (d + len > e)
cea2e8a9 11652 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11653 Copy(s, d, len, char);
11654 d += len;
a0ed51b3
LW
11655 s = t;
11656 }
463ee0b2
LW
11657 else {
11658 *d = '\0';
11659 *slp = d - dest;
11660 return s;
e929a76b 11661 }
378cc40b
LW
11662 }
11663}
11664
76e3520e 11665STATIC char *
f54cb97a 11666S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11667{
97aff369 11668 dVAR;
6136c704 11669 char *bracket = NULL;
748a9306 11670 char funny = *s++;
6136c704 11671 register char *d = dest;
0b3da58d 11672 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11673
7918f24d
NC
11674 PERL_ARGS_ASSERT_SCAN_IDENT;
11675
a0d0e21e 11676 if (isSPACE(*s))
29595ff2 11677 s = PEEKSPACE(s);
de3bb511 11678 if (isDIGIT(*s)) {
8903cb82 11679 while (isDIGIT(*s)) {
11680 if (d >= e)
cea2e8a9 11681 Perl_croak(aTHX_ ident_too_long);
378cc40b 11682 *d++ = *s++;
8903cb82 11683 }
378cc40b
LW
11684 }
11685 else {
463ee0b2 11686 for (;;) {
8903cb82 11687 if (d >= e)
cea2e8a9 11688 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11689 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11690 *d++ = *s++;
7e2040f0 11691 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11692 *d++ = ':';
11693 *d++ = ':';
11694 s++;
11695 }
a0d0e21e 11696 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11697 *d++ = *s++;
11698 *d++ = *s++;
11699 }
fd400ab9 11700 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11701 char *t = s + UTF8SKIP(s);
fd400ab9 11702 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11703 t += UTF8SKIP(t);
11704 if (d + (t - s) > e)
cea2e8a9 11705 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11706 Copy(s, d, t - s, char);
11707 d += t - s;
11708 s = t;
11709 }
463ee0b2
LW
11710 else
11711 break;
11712 }
378cc40b
LW
11713 }
11714 *d = '\0';
11715 d = dest;
79072805 11716 if (*d) {
3280af22
NIS
11717 if (PL_lex_state != LEX_NORMAL)
11718 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11719 return s;
378cc40b 11720 }
748a9306 11721 if (*s == '$' && s[1] &&
3792a11b 11722 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11723 {
4810e5ec 11724 return s;
5cd24f17 11725 }
79072805
LW
11726 if (*s == '{') {
11727 bracket = s;
11728 s++;
11729 }
11730 else if (ck_uni)
11731 check_uni();
93a17b20 11732 if (s < send)
79072805
LW
11733 *d = *s++;
11734 d[1] = '\0';
2b92dfce 11735 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11736 *d = toCTRL(*s);
11737 s++;
de3bb511 11738 }
79072805 11739 if (bracket) {
748a9306 11740 if (isSPACE(s[-1])) {
fa83b5b6 11741 while (s < send) {
f54cb97a 11742 const char ch = *s++;
bf4acbe4 11743 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11744 *d = ch;
11745 break;
11746 }
11747 }
748a9306 11748 }
7e2040f0 11749 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11750 d++;
a0ed51b3 11751 if (UTF) {
6136c704
AL
11752 char *end = s;
11753 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11754 end += UTF8SKIP(end);
11755 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11756 end += UTF8SKIP(end);
a0ed51b3 11757 }
6136c704
AL
11758 Copy(s, d, end - s, char);
11759 d += end - s;
11760 s = end;
a0ed51b3
LW
11761 }
11762 else {
2b92dfce 11763 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11764 *d++ = *s++;
2b92dfce 11765 if (d >= e)
cea2e8a9 11766 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11767 }
79072805 11768 *d = '\0';
c35e046a
AL
11769 while (s < send && SPACE_OR_TAB(*s))
11770 s++;
ff68c719 11771 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11772 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11773 const char * const brack =
11774 (const char *)
11775 ((*s == '[') ? "[...]" : "{...}");
9014280d 11776 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11777 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11778 funny, dest, brack, funny, dest, brack);
11779 }
79072805 11780 bracket++;
a0be28da 11781 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11782 return s;
11783 }
4e553d73
NIS
11784 }
11785 /* Handle extended ${^Foo} variables
2b92dfce
GS
11786 * 1999-02-27 mjd-perl-patch@plover.com */
11787 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11788 && isALNUM(*s))
11789 {
11790 d++;
11791 while (isALNUM(*s) && d < e) {
11792 *d++ = *s++;
11793 }
11794 if (d >= e)
cea2e8a9 11795 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11796 *d = '\0';
79072805
LW
11797 }
11798 if (*s == '}') {
11799 s++;
7df0d042 11800 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11801 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11802 PL_expect = XREF;
11803 }
d008e5eb 11804 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11805 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11806 (keyword(dest, d - dest, 0)
11807 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11808 {
c35e046a
AL
11809 if (funny == '#')
11810 funny = '@';
9014280d 11811 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11812 "Ambiguous use of %c{%s} resolved to %c%s",
11813 funny, dest, funny, dest);
11814 }
11815 }
79072805
LW
11816 }
11817 else {
11818 s = bracket; /* let the parser handle it */
93a17b20 11819 *dest = '\0';
79072805
LW
11820 }
11821 }
3280af22
NIS
11822 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11823 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11824 return s;
11825}
11826
879d0c72
NC
11827static U32
11828S_pmflag(U32 pmfl, const char ch) {
11829 switch (ch) {
11830 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11831 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11832 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11833 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11834 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11835 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11836 }
11837 return pmfl;
11838}
11839
76e3520e 11840STATIC char *
cea2e8a9 11841S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11842{
97aff369 11843 dVAR;
79072805 11844 PMOP *pm;
5db06880 11845 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11846 const char * const valid_flags =
a20207d7 11847 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11848#ifdef PERL_MAD
11849 char *modstart;
11850#endif
11851
7918f24d 11852 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11853
25c09cbf 11854 if (!s) {
6136c704 11855 const char * const delimiter = skipspace(start);
10edeb5d
JH
11856 Perl_croak(aTHX_
11857 (const char *)
11858 (*delimiter == '?'
11859 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11860 : "Search pattern not terminated" ));
25c09cbf 11861 }
bbce6d69 11862
8782bef2 11863 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11864 if (PL_multi_open == '?') {
11865 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11866 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11867
11868 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11869 allows us to restrict the list needed by reset to just the ??
11870 matches. */
11871 assert(type != OP_TRANS);
11872 if (PL_curstash) {
daba3364 11873 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11874 U32 elements;
11875 if (!mg) {
daba3364 11876 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11877 0);
11878 }
11879 elements = mg->mg_len / sizeof(PMOP**);
11880 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11881 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11882 mg->mg_len = elements * sizeof(PMOP**);
11883 PmopSTASH_set(pm,PL_curstash);
11884 }
11885 }
5db06880
NC
11886#ifdef PERL_MAD
11887 modstart = s;
11888#endif
6136c704 11889 while (*s && strchr(valid_flags, *s))
879d0c72 11890 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11891
11892 if (isALNUM(*s)) {
11893 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11894 "Having no space between pattern and following word is deprecated");
11895
11896 }
5db06880
NC
11897#ifdef PERL_MAD
11898 if (PL_madskills && modstart != s) {
11899 SV* tmptoken = newSVpvn(modstart, s - modstart);
11900 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11901 }
11902#endif
4ac733c9 11903 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11904 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11905 {
a2a5de95
NC
11906 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11907 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11908 }
11909
3280af22 11910 PL_lex_op = (OP*)pm;
6154021b 11911 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11912 return s;
11913}
11914
76e3520e 11915STATIC char *
cea2e8a9 11916S_scan_subst(pTHX_ char *start)
79072805 11917{
27da23d5 11918 dVAR;
a0d0e21e 11919 register char *s;
79072805 11920 register PMOP *pm;
4fdae800 11921 I32 first_start;
79072805 11922 I32 es = 0;
5db06880
NC
11923#ifdef PERL_MAD
11924 char *modstart;
11925#endif
79072805 11926
7918f24d
NC
11927 PERL_ARGS_ASSERT_SCAN_SUBST;
11928
6154021b 11929 pl_yylval.ival = OP_NULL;
79072805 11930
5db06880 11931 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11932
37fd879b 11933 if (!s)
cea2e8a9 11934 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11935
3280af22 11936 if (s[-1] == PL_multi_open)
79072805 11937 s--;
5db06880
NC
11938#ifdef PERL_MAD
11939 if (PL_madskills) {
cd81e915
NC
11940 CURMAD('q', PL_thisopen);
11941 CURMAD('_', PL_thiswhite);
11942 CURMAD('E', PL_thisstuff);
11943 CURMAD('Q', PL_thisclose);
11944 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11945 }
11946#endif
79072805 11947
3280af22 11948 first_start = PL_multi_start;
5db06880 11949 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11950 if (!s) {
37fd879b 11951 if (PL_lex_stuff) {
3280af22 11952 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11953 PL_lex_stuff = NULL;
37fd879b 11954 }
cea2e8a9 11955 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11956 }
3280af22 11957 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11958
79072805 11959 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11960
11961#ifdef PERL_MAD
11962 if (PL_madskills) {
cd81e915
NC
11963 CURMAD('z', PL_thisopen);
11964 CURMAD('R', PL_thisstuff);
11965 CURMAD('Z', PL_thisclose);
5db06880
NC
11966 }
11967 modstart = s;
11968#endif
11969
48c036b1 11970 while (*s) {
a20207d7 11971 if (*s == EXEC_PAT_MOD) {
a687059c 11972 s++;
2f3197b3 11973 es++;
a687059c 11974 }
a20207d7 11975 else if (strchr(S_PAT_MODS, *s))
879d0c72 11976 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
48c036b1
GS
11977 else
11978 break;
378cc40b 11979 }
79072805 11980
5db06880
NC
11981#ifdef PERL_MAD
11982 if (PL_madskills) {
11983 if (modstart != s)
11984 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
11985 append_madprops(PL_thismad, (OP*)pm, 0);
11986 PL_thismad = 0;
5db06880
NC
11987 }
11988#endif
a2a5de95
NC
11989 if ((pm->op_pmflags & PMf_CONTINUE)) {
11990 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
11991 }
11992
79072805 11993 if (es) {
6136c704
AL
11994 SV * const repl = newSVpvs("");
11995
0244c3a4
GS
11996 PL_sublex_info.super_bufptr = s;
11997 PL_sublex_info.super_bufend = PL_bufend;
11998 PL_multi_end = 0;
79072805 11999 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12000 while (es-- > 0) {
12001 if (es)
12002 sv_catpvs(repl, "eval ");
12003 else
12004 sv_catpvs(repl, "do ");
12005 }
6f43d98f 12006 sv_catpvs(repl, "{");
3280af22 12007 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12008 if (strchr(SvPVX(PL_lex_repl), '#'))
12009 sv_catpvs(repl, "\n");
12010 sv_catpvs(repl, "}");
25da4f38 12011 SvEVALED_on(repl);
3280af22
NIS
12012 SvREFCNT_dec(PL_lex_repl);
12013 PL_lex_repl = repl;
378cc40b 12014 }
79072805 12015
3280af22 12016 PL_lex_op = (OP*)pm;
6154021b 12017 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12018 return s;
12019}
12020
76e3520e 12021STATIC char *
cea2e8a9 12022S_scan_trans(pTHX_ char *start)
378cc40b 12023{
97aff369 12024 dVAR;
a0d0e21e 12025 register char* s;
11343788 12026 OP *o;
79072805 12027 short *tbl;
b84c11c8
NC
12028 U8 squash;
12029 U8 del;
12030 U8 complement;
5db06880
NC
12031#ifdef PERL_MAD
12032 char *modstart;
12033#endif
79072805 12034
7918f24d
NC
12035 PERL_ARGS_ASSERT_SCAN_TRANS;
12036
6154021b 12037 pl_yylval.ival = OP_NULL;
79072805 12038
5db06880 12039 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12040 if (!s)
cea2e8a9 12041 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12042
3280af22 12043 if (s[-1] == PL_multi_open)
2f3197b3 12044 s--;
5db06880
NC
12045#ifdef PERL_MAD
12046 if (PL_madskills) {
cd81e915
NC
12047 CURMAD('q', PL_thisopen);
12048 CURMAD('_', PL_thiswhite);
12049 CURMAD('E', PL_thisstuff);
12050 CURMAD('Q', PL_thisclose);
12051 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12052 }
12053#endif
2f3197b3 12054
5db06880 12055 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12056 if (!s) {
37fd879b 12057 if (PL_lex_stuff) {
3280af22 12058 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12059 PL_lex_stuff = NULL;
37fd879b 12060 }
cea2e8a9 12061 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12062 }
5db06880 12063 if (PL_madskills) {
cd81e915
NC
12064 CURMAD('z', PL_thisopen);
12065 CURMAD('R', PL_thisstuff);
12066 CURMAD('Z', PL_thisclose);
5db06880 12067 }
79072805 12068
a0ed51b3 12069 complement = del = squash = 0;
5db06880
NC
12070#ifdef PERL_MAD
12071 modstart = s;
12072#endif
7a1e2023
NC
12073 while (1) {
12074 switch (*s) {
12075 case 'c':
79072805 12076 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12077 break;
12078 case 'd':
a0ed51b3 12079 del = OPpTRANS_DELETE;
7a1e2023
NC
12080 break;
12081 case 's':
79072805 12082 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12083 break;
12084 default:
12085 goto no_more;
12086 }
395c3793
LW
12087 s++;
12088 }
7a1e2023 12089 no_more:
8973db79 12090
aa1f7c5b 12091 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12092 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12093 o->op_private &= ~OPpTRANS_ALL;
12094 o->op_private |= del|squash|complement|
7948272d
NIS
12095 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12096 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12097
3280af22 12098 PL_lex_op = o;
6154021b 12099 pl_yylval.ival = OP_TRANS;
5db06880
NC
12100
12101#ifdef PERL_MAD
12102 if (PL_madskills) {
12103 if (modstart != s)
12104 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12105 append_madprops(PL_thismad, o, 0);
12106 PL_thismad = 0;
5db06880
NC
12107 }
12108#endif
12109
79072805
LW
12110 return s;
12111}
12112
76e3520e 12113STATIC char *
cea2e8a9 12114S_scan_heredoc(pTHX_ register char *s)
79072805 12115{
97aff369 12116 dVAR;
79072805
LW
12117 SV *herewas;
12118 I32 op_type = OP_SCALAR;
12119 I32 len;
12120 SV *tmpstr;
12121 char term;
73d840c0 12122 const char *found_newline;
79072805 12123 register char *d;
fc36a67e 12124 register char *e;
4633a7c4 12125 char *peek;
f54cb97a 12126 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12127#ifdef PERL_MAD
12128 I32 stuffstart = s - SvPVX(PL_linestr);
12129 char *tstart;
12130
cd81e915 12131 PL_realtokenstart = -1;
5db06880 12132#endif
79072805 12133
7918f24d
NC
12134 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12135
79072805 12136 s += 2;
3280af22
NIS
12137 d = PL_tokenbuf;
12138 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12139 if (!outer)
79072805 12140 *d++ = '\n';
c35e046a
AL
12141 peek = s;
12142 while (SPACE_OR_TAB(*peek))
12143 peek++;
3792a11b 12144 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12145 s = peek;
79072805 12146 term = *s++;
3280af22 12147 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12148 d += len;
3280af22 12149 if (s < PL_bufend)
79072805 12150 s++;
79072805
LW
12151 }
12152 else {
12153 if (*s == '\\')
12154 s++, term = '\'';
12155 else
12156 term = '"';
7e2040f0 12157 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12158 deprecate("bare << to mean <<\"\"");
7e2040f0 12159 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12160 if (d < e)
12161 *d++ = *s;
12162 }
12163 }
3280af22 12164 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12165 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12166 *d++ = '\n';
12167 *d = '\0';
3280af22 12168 len = d - PL_tokenbuf;
5db06880
NC
12169
12170#ifdef PERL_MAD
12171 if (PL_madskills) {
12172 tstart = PL_tokenbuf + !outer;
cd81e915 12173 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12174 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12175 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12176 stuffstart = s - SvPVX(PL_linestr);
12177 }
12178#endif
6a27c188 12179#ifndef PERL_STRICT_CR
f63a84b2
LW
12180 d = strchr(s, '\r');
12181 if (d) {
b464bac0 12182 char * const olds = s;
f63a84b2 12183 s = d;
3280af22 12184 while (s < PL_bufend) {
f63a84b2
LW
12185 if (*s == '\r') {
12186 *d++ = '\n';
12187 if (*++s == '\n')
12188 s++;
12189 }
12190 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12191 *d++ = *s++;
12192 s++;
12193 }
12194 else
12195 *d++ = *s++;
12196 }
12197 *d = '\0';
3280af22 12198 PL_bufend = d;
95a20fc0 12199 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12200 s = olds;
12201 }
12202#endif
5db06880
NC
12203#ifdef PERL_MAD
12204 found_newline = 0;
12205#endif
10edeb5d 12206 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12207 herewas = newSVpvn(s,PL_bufend-s);
12208 }
12209 else {
5db06880
NC
12210#ifdef PERL_MAD
12211 herewas = newSVpvn(s-1,found_newline-s+1);
12212#else
73d840c0
AL
12213 s--;
12214 herewas = newSVpvn(s,found_newline-s);
5db06880 12215#endif
73d840c0 12216 }
5db06880
NC
12217#ifdef PERL_MAD
12218 if (PL_madskills) {
12219 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12220 if (PL_thisstuff)
12221 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12222 else
cd81e915 12223 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12224 }
12225#endif
79072805 12226 s += SvCUR(herewas);
748a9306 12227
5db06880
NC
12228#ifdef PERL_MAD
12229 stuffstart = s - SvPVX(PL_linestr);
12230
12231 if (found_newline)
12232 s--;
12233#endif
12234
7d0a29fe
NC
12235 tmpstr = newSV_type(SVt_PVIV);
12236 SvGROW(tmpstr, 80);
748a9306 12237 if (term == '\'') {
79072805 12238 op_type = OP_CONST;
45977657 12239 SvIV_set(tmpstr, -1);
748a9306
LW
12240 }
12241 else if (term == '`') {
79072805 12242 op_type = OP_BACKTICK;
45977657 12243 SvIV_set(tmpstr, '\\');
748a9306 12244 }
79072805
LW
12245
12246 CLINE;
57843af0 12247 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12248 PL_multi_open = PL_multi_close = '<';
12249 term = *PL_tokenbuf;
0244c3a4 12250 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12251 char * const bufptr = PL_sublex_info.super_bufptr;
12252 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12253 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12254 s = strchr(bufptr, '\n');
12255 if (!s)
12256 s = bufend;
12257 d = s;
12258 while (s < bufend &&
12259 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12260 if (*s++ == '\n')
57843af0 12261 CopLINE_inc(PL_curcop);
0244c3a4
GS
12262 }
12263 if (s >= bufend) {
eb160463 12264 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12265 missingterm(PL_tokenbuf);
12266 }
12267 sv_setpvn(herewas,bufptr,d-bufptr+1);
12268 sv_setpvn(tmpstr,d+1,s-d);
12269 s += len - 1;
12270 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12271 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12272
12273 s = olds;
12274 goto retval;
12275 }
12276 else if (!outer) {
79072805 12277 d = s;
3280af22
NIS
12278 while (s < PL_bufend &&
12279 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12280 if (*s++ == '\n')
57843af0 12281 CopLINE_inc(PL_curcop);
79072805 12282 }
3280af22 12283 if (s >= PL_bufend) {
eb160463 12284 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12285 missingterm(PL_tokenbuf);
79072805
LW
12286 }
12287 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12288#ifdef PERL_MAD
12289 if (PL_madskills) {
cd81e915
NC
12290 if (PL_thisstuff)
12291 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12292 else
cd81e915 12293 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12294 stuffstart = s - SvPVX(PL_linestr);
12295 }
12296#endif
79072805 12297 s += len - 1;
57843af0 12298 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12299
3280af22
NIS
12300 sv_catpvn(herewas,s,PL_bufend-s);
12301 sv_setsv(PL_linestr,herewas);
12302 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12303 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12304 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12305 }
12306 else
76f68e9b 12307 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12308 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12309#ifdef PERL_MAD
12310 if (PL_madskills) {
12311 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12312 if (PL_thisstuff)
12313 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12314 else
cd81e915 12315 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12316 }
12317#endif
f0e67a1d 12318 PL_bufptr = s;
17cc9359 12319 CopLINE_inc(PL_curcop);
f0e67a1d 12320 if (!outer || !lex_next_chunk(0)) {
eb160463 12321 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12322 missingterm(PL_tokenbuf);
79072805 12323 }
17cc9359 12324 CopLINE_dec(PL_curcop);
f0e67a1d 12325 s = PL_bufptr;
5db06880
NC
12326#ifdef PERL_MAD
12327 stuffstart = s - SvPVX(PL_linestr);
12328#endif
57843af0 12329 CopLINE_inc(PL_curcop);
3280af22 12330 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12331 PL_last_lop = PL_last_uni = NULL;
6a27c188 12332#ifndef PERL_STRICT_CR
3280af22 12333 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12334 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12335 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12336 {
3280af22
NIS
12337 PL_bufend[-2] = '\n';
12338 PL_bufend--;
95a20fc0 12339 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12340 }
3280af22
NIS
12341 else if (PL_bufend[-1] == '\r')
12342 PL_bufend[-1] = '\n';
f63a84b2 12343 }
3280af22
NIS
12344 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12345 PL_bufend[-1] = '\n';
f63a84b2 12346#endif
3280af22 12347 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12348 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12349 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12350 sv_catsv(PL_linestr,herewas);
12351 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12352 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12353 }
12354 else {
3280af22
NIS
12355 s = PL_bufend;
12356 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12357 }
12358 }
79072805 12359 s++;
0244c3a4 12360retval:
57843af0 12361 PL_multi_end = CopLINE(PL_curcop);
79072805 12362 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12363 SvPV_shrink_to_cur(tmpstr);
79072805 12364 }
8990e307 12365 SvREFCNT_dec(herewas);
2f31ce75 12366 if (!IN_BYTES) {
95a20fc0 12367 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12368 SvUTF8_on(tmpstr);
12369 else if (PL_encoding)
12370 sv_recode_to_utf8(tmpstr, PL_encoding);
12371 }
3280af22 12372 PL_lex_stuff = tmpstr;
6154021b 12373 pl_yylval.ival = op_type;
79072805
LW
12374 return s;
12375}
12376
02aa26ce
NT
12377/* scan_inputsymbol
12378 takes: current position in input buffer
12379 returns: new position in input buffer
6154021b 12380 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12381
12382 This code handles:
12383
12384 <> read from ARGV
12385 <FH> read from filehandle
12386 <pkg::FH> read from package qualified filehandle
12387 <pkg'FH> read from package qualified filehandle
12388 <$fh> read from filehandle in $fh
12389 <*.h> filename glob
12390
12391*/
12392
76e3520e 12393STATIC char *
cea2e8a9 12394S_scan_inputsymbol(pTHX_ char *start)
79072805 12395{
97aff369 12396 dVAR;
02aa26ce 12397 register char *s = start; /* current position in buffer */
1b420867 12398 char *end;
79072805 12399 I32 len;
6136c704
AL
12400 char *d = PL_tokenbuf; /* start of temp holding space */
12401 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12402
7918f24d
NC
12403 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12404
1b420867
GS
12405 end = strchr(s, '\n');
12406 if (!end)
12407 end = PL_bufend;
12408 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12409
12410 /* die if we didn't have space for the contents of the <>,
1b420867 12411 or if it didn't end, or if we see a newline
02aa26ce
NT
12412 */
12413
bb7a0f54 12414 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12415 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12416 if (s >= end)
cea2e8a9 12417 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12418
fc36a67e 12419 s++;
02aa26ce
NT
12420
12421 /* check for <$fh>
12422 Remember, only scalar variables are interpreted as filehandles by
12423 this code. Anything more complex (e.g., <$fh{$num}>) will be
12424 treated as a glob() call.
12425 This code makes use of the fact that except for the $ at the front,
12426 a scalar variable and a filehandle look the same.
12427 */
4633a7c4 12428 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12429
12430 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12431 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12432 d++;
02aa26ce
NT
12433
12434 /* If we've tried to read what we allow filehandles to look like, and
12435 there's still text left, then it must be a glob() and not a getline.
12436 Use scan_str to pull out the stuff between the <> and treat it
12437 as nothing more than a string.
12438 */
12439
3280af22 12440 if (d - PL_tokenbuf != len) {
6154021b 12441 pl_yylval.ival = OP_GLOB;
5db06880 12442 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12443 if (!s)
cea2e8a9 12444 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12445 return s;
12446 }
395c3793 12447 else {
9b3023bc 12448 bool readline_overriden = FALSE;
6136c704 12449 GV *gv_readline;
9b3023bc 12450 GV **gvp;
02aa26ce 12451 /* we're in a filehandle read situation */
3280af22 12452 d = PL_tokenbuf;
02aa26ce
NT
12453
12454 /* turn <> into <ARGV> */
79072805 12455 if (!len)
689badd5 12456 Copy("ARGV",d,5,char);
02aa26ce 12457
9b3023bc 12458 /* Check whether readline() is overriden */
fafc274c 12459 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12460 if ((gv_readline
ba979b31 12461 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12462 ||
017a3ce5 12463 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12464 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12465 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12466 readline_overriden = TRUE;
12467
02aa26ce
NT
12468 /* if <$fh>, create the ops to turn the variable into a
12469 filehandle
12470 */
79072805 12471 if (*d == '$') {
02aa26ce
NT
12472 /* try to find it in the pad for this block, otherwise find
12473 add symbol table ops
12474 */
f8f98e0a 12475 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12476 if (tmp != NOT_IN_PAD) {
00b1698f 12477 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12478 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12479 HEK * const stashname = HvNAME_HEK(stash);
12480 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12481 sv_catpvs(sym, "::");
f558d5af
JH
12482 sv_catpv(sym, d+1);
12483 d = SvPVX(sym);
12484 goto intro_sym;
12485 }
12486 else {
6136c704 12487 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12488 o->op_targ = tmp;
9b3023bc
RGS
12489 PL_lex_op = readline_overriden
12490 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12491 append_elem(OP_LIST, o,
12492 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12493 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12494 }
a0d0e21e
LW
12495 }
12496 else {
f558d5af
JH
12497 GV *gv;
12498 ++d;
12499intro_sym:
12500 gv = gv_fetchpv(d,
12501 (PL_in_eval
12502 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12503 : GV_ADDMULTI),
f558d5af 12504 SVt_PV);
9b3023bc
RGS
12505 PL_lex_op = readline_overriden
12506 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12507 append_elem(OP_LIST,
12508 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12509 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12510 : (OP*)newUNOP(OP_READLINE, 0,
12511 newUNOP(OP_RV2SV, 0,
12512 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12513 }
7c6fadd6
RGS
12514 if (!readline_overriden)
12515 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12516 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12517 pl_yylval.ival = OP_NULL;
79072805 12518 }
02aa26ce
NT
12519
12520 /* If it's none of the above, it must be a literal filehandle
12521 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12522 else {
6136c704 12523 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12524 PL_lex_op = readline_overriden
12525 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12526 append_elem(OP_LIST,
12527 newGVOP(OP_GV, 0, gv),
12528 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12529 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12530 pl_yylval.ival = OP_NULL;
79072805
LW
12531 }
12532 }
02aa26ce 12533
79072805
LW
12534 return s;
12535}
12536
02aa26ce
NT
12537
12538/* scan_str
12539 takes: start position in buffer
09bef843
SB
12540 keep_quoted preserve \ on the embedded delimiter(s)
12541 keep_delims preserve the delimiters around the string
02aa26ce
NT
12542 returns: position to continue reading from buffer
12543 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12544 updates the read buffer.
12545
12546 This subroutine pulls a string out of the input. It is called for:
12547 q single quotes q(literal text)
12548 ' single quotes 'literal text'
12549 qq double quotes qq(interpolate $here please)
12550 " double quotes "interpolate $here please"
12551 qx backticks qx(/bin/ls -l)
12552 ` backticks `/bin/ls -l`
12553 qw quote words @EXPORT_OK = qw( func() $spam )
12554 m// regexp match m/this/
12555 s/// regexp substitute s/this/that/
12556 tr/// string transliterate tr/this/that/
12557 y/// string transliterate y/this/that/
12558 ($*@) sub prototypes sub foo ($)
09bef843 12559 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12560 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12561
12562 In most of these cases (all but <>, patterns and transliterate)
12563 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12564 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12565 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12566 calls scan_str().
4e553d73 12567
02aa26ce
NT
12568 It skips whitespace before the string starts, and treats the first
12569 character as the delimiter. If the delimiter is one of ([{< then
12570 the corresponding "close" character )]}> is used as the closing
12571 delimiter. It allows quoting of delimiters, and if the string has
12572 balanced delimiters ([{<>}]) it allows nesting.
12573
37fd879b
HS
12574 On success, the SV with the resulting string is put into lex_stuff or,
12575 if that is already non-NULL, into lex_repl. The second case occurs only
12576 when parsing the RHS of the special constructs s/// and tr/// (y///).
12577 For convenience, the terminating delimiter character is stuffed into
12578 SvIVX of the SV.
02aa26ce
NT
12579*/
12580
76e3520e 12581STATIC char *
09bef843 12582S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12583{
97aff369 12584 dVAR;
02aa26ce 12585 SV *sv; /* scalar value: string */
d3fcec1f 12586 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12587 register char *s = start; /* current position in the buffer */
12588 register char term; /* terminating character */
12589 register char *to; /* current position in the sv's data */
12590 I32 brackets = 1; /* bracket nesting level */
89491803 12591 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12592 I32 termcode; /* terminating char. code */
89ebb4a3 12593 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12594 STRLEN termlen; /* length of terminating string */
0331ef07 12595 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12596#ifdef PERL_MAD
12597 int stuffstart;
12598 char *tstart;
12599#endif
02aa26ce 12600
7918f24d
NC
12601 PERL_ARGS_ASSERT_SCAN_STR;
12602
02aa26ce 12603 /* skip space before the delimiter */
29595ff2
NC
12604 if (isSPACE(*s)) {
12605 s = PEEKSPACE(s);
12606 }
02aa26ce 12607
5db06880 12608#ifdef PERL_MAD
cd81e915
NC
12609 if (PL_realtokenstart >= 0) {
12610 stuffstart = PL_realtokenstart;
12611 PL_realtokenstart = -1;
5db06880
NC
12612 }
12613 else
12614 stuffstart = start - SvPVX(PL_linestr);
12615#endif
02aa26ce 12616 /* mark where we are, in case we need to report errors */
79072805 12617 CLINE;
02aa26ce
NT
12618
12619 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12620 term = *s;
220e2d4e
IH
12621 if (!UTF) {
12622 termcode = termstr[0] = term;
12623 termlen = 1;
12624 }
12625 else {
f3b9ce0f 12626 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12627 Copy(s, termstr, termlen, U8);
12628 if (!UTF8_IS_INVARIANT(term))
12629 has_utf8 = TRUE;
12630 }
b1c7b182 12631
02aa26ce 12632 /* mark where we are */
57843af0 12633 PL_multi_start = CopLINE(PL_curcop);
3280af22 12634 PL_multi_open = term;
02aa26ce
NT
12635
12636 /* find corresponding closing delimiter */
93a17b20 12637 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12638 termcode = termstr[0] = term = tmps[5];
12639
3280af22 12640 PL_multi_close = term;
79072805 12641
561b68a9
SH
12642 /* create a new SV to hold the contents. 79 is the SV's initial length.
12643 What a random number. */
7d0a29fe
NC
12644 sv = newSV_type(SVt_PVIV);
12645 SvGROW(sv, 80);
45977657 12646 SvIV_set(sv, termcode);
a0d0e21e 12647 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12648
12649 /* move past delimiter and try to read a complete string */
09bef843 12650 if (keep_delims)
220e2d4e
IH
12651 sv_catpvn(sv, s, termlen);
12652 s += termlen;
5db06880
NC
12653#ifdef PERL_MAD
12654 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12655 if (!PL_thisopen && !keep_delims) {
12656 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12657 stuffstart = s - SvPVX(PL_linestr);
12658 }
12659#endif
93a17b20 12660 for (;;) {
220e2d4e
IH
12661 if (PL_encoding && !UTF) {
12662 bool cont = TRUE;
12663
12664 while (cont) {
95a20fc0 12665 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12666 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12667 &offset, (char*)termstr, termlen);
6136c704
AL
12668 const char * const ns = SvPVX_const(PL_linestr) + offset;
12669 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12670
12671 for (; s < ns; s++) {
12672 if (*s == '\n' && !PL_rsfp)
12673 CopLINE_inc(PL_curcop);
12674 }
12675 if (!found)
12676 goto read_more_line;
12677 else {
12678 /* handle quoted delimiters */
52327caf 12679 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12680 const char *t;
95a20fc0 12681 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12682 t--;
12683 if ((svlast-1 - t) % 2) {
12684 if (!keep_quoted) {
12685 *(svlast-1) = term;
12686 *svlast = '\0';
12687 SvCUR_set(sv, SvCUR(sv) - 1);
12688 }
12689 continue;
12690 }
12691 }
12692 if (PL_multi_open == PL_multi_close) {
12693 cont = FALSE;
12694 }
12695 else {
f54cb97a
AL
12696 const char *t;
12697 char *w;
0331ef07 12698 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12699 /* At here, all closes are "was quoted" one,
12700 so we don't check PL_multi_close. */
12701 if (*t == '\\') {
12702 if (!keep_quoted && *(t+1) == PL_multi_open)
12703 t++;
12704 else
12705 *w++ = *t++;
12706 }
12707 else if (*t == PL_multi_open)
12708 brackets++;
12709
12710 *w = *t;
12711 }
12712 if (w < t) {
12713 *w++ = term;
12714 *w = '\0';
95a20fc0 12715 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12716 }
0331ef07 12717 last_off = w - SvPVX(sv);
220e2d4e
IH
12718 if (--brackets <= 0)
12719 cont = FALSE;
12720 }
12721 }
12722 }
12723 if (!keep_delims) {
12724 SvCUR_set(sv, SvCUR(sv) - 1);
12725 *SvEND(sv) = '\0';
12726 }
12727 break;
12728 }
12729
02aa26ce 12730 /* extend sv if need be */
3280af22 12731 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12732 /* set 'to' to the next character in the sv's string */
463ee0b2 12733 to = SvPVX(sv)+SvCUR(sv);
09bef843 12734
02aa26ce 12735 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12736 if (PL_multi_open == PL_multi_close) {
12737 for (; s < PL_bufend; s++,to++) {
02aa26ce 12738 /* embedded newlines increment the current line number */
3280af22 12739 if (*s == '\n' && !PL_rsfp)
57843af0 12740 CopLINE_inc(PL_curcop);
02aa26ce 12741 /* handle quoted delimiters */
3280af22 12742 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12743 if (!keep_quoted && s[1] == term)
a0d0e21e 12744 s++;
02aa26ce 12745 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12746 else
12747 *to++ = *s++;
12748 }
02aa26ce
NT
12749 /* terminate when run out of buffer (the for() condition), or
12750 have found the terminator */
220e2d4e
IH
12751 else if (*s == term) {
12752 if (termlen == 1)
12753 break;
f3b9ce0f 12754 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12755 break;
12756 }
63cd0674 12757 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12758 has_utf8 = TRUE;
93a17b20
LW
12759 *to = *s;
12760 }
12761 }
02aa26ce
NT
12762
12763 /* if the terminator isn't the same as the start character (e.g.,
12764 matched brackets), we have to allow more in the quoting, and
12765 be prepared for nested brackets.
12766 */
93a17b20 12767 else {
02aa26ce 12768 /* read until we run out of string, or we find the terminator */
3280af22 12769 for (; s < PL_bufend; s++,to++) {
02aa26ce 12770 /* embedded newlines increment the line count */
3280af22 12771 if (*s == '\n' && !PL_rsfp)
57843af0 12772 CopLINE_inc(PL_curcop);
02aa26ce 12773 /* backslashes can escape the open or closing characters */
3280af22 12774 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12775 if (!keep_quoted &&
12776 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12777 s++;
12778 else
12779 *to++ = *s++;
12780 }
02aa26ce 12781 /* allow nested opens and closes */
3280af22 12782 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12783 break;
3280af22 12784 else if (*s == PL_multi_open)
93a17b20 12785 brackets++;
63cd0674 12786 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12787 has_utf8 = TRUE;
93a17b20
LW
12788 *to = *s;
12789 }
12790 }
02aa26ce 12791 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12792 *to = '\0';
95a20fc0 12793 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12794
02aa26ce
NT
12795 /*
12796 * this next chunk reads more into the buffer if we're not done yet
12797 */
12798
b1c7b182
GS
12799 if (s < PL_bufend)
12800 break; /* handle case where we are done yet :-) */
79072805 12801
6a27c188 12802#ifndef PERL_STRICT_CR
95a20fc0 12803 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12804 if ((to[-2] == '\r' && to[-1] == '\n') ||
12805 (to[-2] == '\n' && to[-1] == '\r'))
12806 {
f63a84b2
LW
12807 to[-2] = '\n';
12808 to--;
95a20fc0 12809 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12810 }
12811 else if (to[-1] == '\r')
12812 to[-1] = '\n';
12813 }
95a20fc0 12814 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12815 to[-1] = '\n';
12816#endif
12817
220e2d4e 12818 read_more_line:
02aa26ce
NT
12819 /* if we're out of file, or a read fails, bail and reset the current
12820 line marker so we can report where the unterminated string began
12821 */
5db06880
NC
12822#ifdef PERL_MAD
12823 if (PL_madskills) {
c35e046a 12824 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12825 if (PL_thisstuff)
12826 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12827 else
cd81e915 12828 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12829 }
12830#endif
f0e67a1d
Z
12831 CopLINE_inc(PL_curcop);
12832 PL_bufptr = PL_bufend;
12833 if (!lex_next_chunk(0)) {
c07a80fd 12834 sv_free(sv);
eb160463 12835 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12836 return NULL;
79072805 12837 }
f0e67a1d 12838 s = PL_bufptr;
5db06880
NC
12839#ifdef PERL_MAD
12840 stuffstart = 0;
12841#endif
378cc40b 12842 }
4e553d73 12843
02aa26ce
NT
12844 /* at this point, we have successfully read the delimited string */
12845
220e2d4e 12846 if (!PL_encoding || UTF) {
5db06880
NC
12847#ifdef PERL_MAD
12848 if (PL_madskills) {
c35e046a 12849 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12850 const int len = s - tstart;
cd81e915 12851 if (PL_thisstuff)
c35e046a 12852 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12853 else
c35e046a 12854 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12855 if (!PL_thisclose && !keep_delims)
12856 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12857 }
12858#endif
12859
220e2d4e
IH
12860 if (keep_delims)
12861 sv_catpvn(sv, s, termlen);
12862 s += termlen;
12863 }
5db06880
NC
12864#ifdef PERL_MAD
12865 else {
12866 if (PL_madskills) {
c35e046a
AL
12867 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12868 const int len = s - tstart - termlen;
cd81e915 12869 if (PL_thisstuff)
c35e046a 12870 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12871 else
c35e046a 12872 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12873 if (!PL_thisclose && !keep_delims)
12874 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12875 }
12876 }
12877#endif
220e2d4e 12878 if (has_utf8 || PL_encoding)
b1c7b182 12879 SvUTF8_on(sv);
d0063567 12880
57843af0 12881 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12882
12883 /* if we allocated too much space, give some back */
93a17b20
LW
12884 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12885 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12886 SvPV_renew(sv, SvLEN(sv));
79072805 12887 }
02aa26ce
NT
12888
12889 /* decide whether this is the first or second quoted string we've read
12890 for this op
12891 */
4e553d73 12892
3280af22
NIS
12893 if (PL_lex_stuff)
12894 PL_lex_repl = sv;
79072805 12895 else
3280af22 12896 PL_lex_stuff = sv;
378cc40b
LW
12897 return s;
12898}
12899
02aa26ce
NT
12900/*
12901 scan_num
12902 takes: pointer to position in buffer
12903 returns: pointer to new position in buffer
6154021b 12904 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12905
12906 Read a number in any of the formats that Perl accepts:
12907
7fd134d9
JH
12908 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12909 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12910 0b[01](_?[01])*
12911 0[0-7](_?[0-7])*
12912 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12913
3280af22 12914 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12915 thing it reads.
12916
12917 If it reads a number without a decimal point or an exponent, it will
12918 try converting the number to an integer and see if it can do so
12919 without loss of precision.
12920*/
4e553d73 12921
378cc40b 12922char *
bfed75c6 12923Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12924{
97aff369 12925 dVAR;
bfed75c6 12926 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12927 register char *d; /* destination in temp buffer */
12928 register char *e; /* end of temp buffer */
86554af2 12929 NV nv; /* number read, as a double */
a0714e2c 12930 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12931 bool floatit; /* boolean: int or float? */
cbbf8932 12932 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12933 static char const number_too_long[] = "Number too long";
378cc40b 12934
7918f24d
NC
12935 PERL_ARGS_ASSERT_SCAN_NUM;
12936
02aa26ce
NT
12937 /* We use the first character to decide what type of number this is */
12938
378cc40b 12939 switch (*s) {
79072805 12940 default:
cea2e8a9 12941 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12942
02aa26ce 12943 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12944 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12945 case '0':
12946 {
02aa26ce
NT
12947 /* variables:
12948 u holds the "number so far"
4f19785b
WSI
12949 shift the power of 2 of the base
12950 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12951 overflowed was the number more than we can hold?
12952
12953 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12954 we in octal/hex/binary?" indicator to disallow hex characters
12955 when in octal mode.
02aa26ce 12956 */
9e24b6e2
JH
12957 NV n = 0.0;
12958 UV u = 0;
79072805 12959 I32 shift;
9e24b6e2 12960 bool overflowed = FALSE;
61f33854 12961 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12962 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12963 static const char* const bases[5] =
12964 { "", "binary", "", "octal", "hexadecimal" };
12965 static const char* const Bases[5] =
12966 { "", "Binary", "", "Octal", "Hexadecimal" };
12967 static const char* const maxima[5] =
12968 { "",
12969 "0b11111111111111111111111111111111",
12970 "",
12971 "037777777777",
12972 "0xffffffff" };
bfed75c6 12973 const char *base, *Base, *max;
378cc40b 12974
02aa26ce 12975 /* check for hex */
378cc40b
LW
12976 if (s[1] == 'x') {
12977 shift = 4;
12978 s += 2;
61f33854 12979 just_zero = FALSE;
4f19785b
WSI
12980 } else if (s[1] == 'b') {
12981 shift = 1;
12982 s += 2;
61f33854 12983 just_zero = FALSE;
378cc40b 12984 }
02aa26ce 12985 /* check for a decimal in disguise */
b78218b7 12986 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 12987 goto decimal;
02aa26ce 12988 /* so it must be octal */
928753ea 12989 else {
378cc40b 12990 shift = 3;
928753ea
JH
12991 s++;
12992 }
12993
12994 if (*s == '_') {
a2a5de95 12995 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
12996 "Misplaced _ in number");
12997 lastub = s++;
12998 }
9e24b6e2
JH
12999
13000 base = bases[shift];
13001 Base = Bases[shift];
13002 max = maxima[shift];
02aa26ce 13003
4f19785b 13004 /* read the rest of the number */
378cc40b 13005 for (;;) {
9e24b6e2 13006 /* x is used in the overflow test,
893fe2c2 13007 b is the digit we're adding on. */
9e24b6e2 13008 UV x, b;
55497cff 13009
378cc40b 13010 switch (*s) {
02aa26ce
NT
13011
13012 /* if we don't mention it, we're done */
378cc40b
LW
13013 default:
13014 goto out;
02aa26ce 13015
928753ea 13016 /* _ are ignored -- but warned about if consecutive */
de3bb511 13017 case '_':
a2a5de95
NC
13018 if (lastub && s == lastub + 1)
13019 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13020 "Misplaced _ in number");
928753ea 13021 lastub = s++;
de3bb511 13022 break;
02aa26ce
NT
13023
13024 /* 8 and 9 are not octal */
378cc40b 13025 case '8': case '9':
4f19785b 13026 if (shift == 3)
cea2e8a9 13027 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13028 /* FALL THROUGH */
02aa26ce
NT
13029
13030 /* octal digits */
4f19785b 13031 case '2': case '3': case '4':
378cc40b 13032 case '5': case '6': case '7':
4f19785b 13033 if (shift == 1)
cea2e8a9 13034 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13035 /* FALL THROUGH */
13036
13037 case '0': case '1':
02aa26ce 13038 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13039 goto digit;
02aa26ce
NT
13040
13041 /* hex digits */
378cc40b
LW
13042 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13043 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13044 /* make sure they said 0x */
378cc40b
LW
13045 if (shift != 4)
13046 goto out;
55497cff 13047 b = (*s++ & 7) + 9;
02aa26ce
NT
13048
13049 /* Prepare to put the digit we have onto the end
13050 of the number so far. We check for overflows.
13051 */
13052
55497cff 13053 digit:
61f33854 13054 just_zero = FALSE;
9e24b6e2
JH
13055 if (!overflowed) {
13056 x = u << shift; /* make room for the digit */
13057
13058 if ((x >> shift) != u
13059 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13060 overflowed = TRUE;
13061 n = (NV) u;
9b387841
NC
13062 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13063 "Integer overflow in %s number",
13064 base);
9e24b6e2
JH
13065 } else
13066 u = x | b; /* add the digit to the end */
13067 }
13068 if (overflowed) {
13069 n *= nvshift[shift];
13070 /* If an NV has not enough bits in its
13071 * mantissa to represent an UV this summing of
13072 * small low-order numbers is a waste of time
13073 * (because the NV cannot preserve the
13074 * low-order bits anyway): we could just
13075 * remember when did we overflow and in the
13076 * end just multiply n by the right
13077 * amount. */
13078 n += (NV) b;
55497cff 13079 }
378cc40b
LW
13080 break;
13081 }
13082 }
02aa26ce
NT
13083
13084 /* if we get here, we had success: make a scalar value from
13085 the number.
13086 */
378cc40b 13087 out:
928753ea
JH
13088
13089 /* final misplaced underbar check */
13090 if (s[-1] == '_') {
a2a5de95 13091 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13092 }
13093
9e24b6e2 13094 if (overflowed) {
a2a5de95
NC
13095 if (n > 4294967295.0)
13096 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13097 "%s number > %s non-portable",
13098 Base, max);
b081dd7e 13099 sv = newSVnv(n);
9e24b6e2
JH
13100 }
13101 else {
15041a67 13102#if UVSIZE > 4
a2a5de95
NC
13103 if (u > 0xffffffff)
13104 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13105 "%s number > %s non-portable",
13106 Base, max);
2cc4c2dc 13107#endif
b081dd7e 13108 sv = newSVuv(u);
9e24b6e2 13109 }
61f33854 13110 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13111 sv = new_constant(start, s - start, "integer",
eb0d8d16 13112 sv, NULL, NULL, 0);
61f33854 13113 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13114 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13115 }
13116 break;
02aa26ce
NT
13117
13118 /*
13119 handle decimal numbers.
13120 we're also sent here when we read a 0 as the first digit
13121 */
378cc40b
LW
13122 case '1': case '2': case '3': case '4': case '5':
13123 case '6': case '7': case '8': case '9': case '.':
13124 decimal:
3280af22
NIS
13125 d = PL_tokenbuf;
13126 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13127 floatit = FALSE;
02aa26ce
NT
13128
13129 /* read next group of digits and _ and copy into d */
de3bb511 13130 while (isDIGIT(*s) || *s == '_') {
4e553d73 13131 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13132 if -w is on
13133 */
93a17b20 13134 if (*s == '_') {
a2a5de95
NC
13135 if (lastub && s == lastub + 1)
13136 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13137 "Misplaced _ in number");
928753ea 13138 lastub = s++;
93a17b20 13139 }
fc36a67e 13140 else {
02aa26ce 13141 /* check for end of fixed-length buffer */
fc36a67e 13142 if (d >= e)
cea2e8a9 13143 Perl_croak(aTHX_ number_too_long);
02aa26ce 13144 /* if we're ok, copy the character */
378cc40b 13145 *d++ = *s++;
fc36a67e 13146 }
378cc40b 13147 }
02aa26ce
NT
13148
13149 /* final misplaced underbar check */
928753ea 13150 if (lastub && s == lastub + 1) {
a2a5de95 13151 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13152 }
02aa26ce
NT
13153
13154 /* read a decimal portion if there is one. avoid
13155 3..5 being interpreted as the number 3. followed
13156 by .5
13157 */
2f3197b3 13158 if (*s == '.' && s[1] != '.') {
79072805 13159 floatit = TRUE;
378cc40b 13160 *d++ = *s++;
02aa26ce 13161
928753ea 13162 if (*s == '_') {
a2a5de95
NC
13163 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13164 "Misplaced _ in number");
928753ea
JH
13165 lastub = s;
13166 }
13167
13168 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13169 */
fc36a67e 13170 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13171 /* fixed length buffer check */
fc36a67e 13172 if (d >= e)
cea2e8a9 13173 Perl_croak(aTHX_ number_too_long);
928753ea 13174 if (*s == '_') {
a2a5de95
NC
13175 if (lastub && s == lastub + 1)
13176 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13177 "Misplaced _ in number");
928753ea
JH
13178 lastub = s;
13179 }
13180 else
fc36a67e 13181 *d++ = *s;
378cc40b 13182 }
928753ea
JH
13183 /* fractional part ending in underbar? */
13184 if (s[-1] == '_') {
a2a5de95
NC
13185 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13186 "Misplaced _ in number");
928753ea 13187 }
dd629d5b
GS
13188 if (*s == '.' && isDIGIT(s[1])) {
13189 /* oops, it's really a v-string, but without the "v" */
f4758303 13190 s = start;
dd629d5b
GS
13191 goto vstring;
13192 }
378cc40b 13193 }
02aa26ce
NT
13194
13195 /* read exponent part, if present */
3792a11b 13196 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13197 floatit = TRUE;
13198 s++;
02aa26ce
NT
13199
13200 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13201 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13202
7fd134d9
JH
13203 /* stray preinitial _ */
13204 if (*s == '_') {
a2a5de95
NC
13205 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13206 "Misplaced _ in number");
7fd134d9
JH
13207 lastub = s++;
13208 }
13209
02aa26ce 13210 /* allow positive or negative exponent */
378cc40b
LW
13211 if (*s == '+' || *s == '-')
13212 *d++ = *s++;
02aa26ce 13213
7fd134d9
JH
13214 /* stray initial _ */
13215 if (*s == '_') {
a2a5de95
NC
13216 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13217 "Misplaced _ in number");
7fd134d9
JH
13218 lastub = s++;
13219 }
13220
7fd134d9
JH
13221 /* read digits of exponent */
13222 while (isDIGIT(*s) || *s == '_') {
13223 if (isDIGIT(*s)) {
13224 if (d >= e)
13225 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13226 *d++ = *s++;
7fd134d9
JH
13227 }
13228 else {
041457d9 13229 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13230 (!isDIGIT(s[1]) && s[1] != '_')))
13231 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13232 "Misplaced _ in number");
b3b48e3e 13233 lastub = s++;
7fd134d9 13234 }
7fd134d9 13235 }
378cc40b 13236 }
02aa26ce 13237
02aa26ce 13238
0b7fceb9 13239 /*
58bb9ec3
NC
13240 We try to do an integer conversion first if no characters
13241 indicating "float" have been found.
0b7fceb9
MU
13242 */
13243
13244 if (!floatit) {
58bb9ec3 13245 UV uv;
6136c704 13246 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13247
13248 if (flags == IS_NUMBER_IN_UV) {
13249 if (uv <= IV_MAX)
b081dd7e 13250 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13251 else
b081dd7e 13252 sv = newSVuv(uv);
58bb9ec3
NC
13253 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13254 if (uv <= (UV) IV_MIN)
b081dd7e 13255 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13256 else
13257 floatit = TRUE;
13258 } else
13259 floatit = TRUE;
13260 }
0b7fceb9 13261 if (floatit) {
58bb9ec3
NC
13262 /* terminate the string */
13263 *d = '\0';
86554af2 13264 nv = Atof(PL_tokenbuf);
b081dd7e 13265 sv = newSVnv(nv);
86554af2 13266 }
86554af2 13267
eb0d8d16
NC
13268 if ( floatit
13269 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13270 const char *const key = floatit ? "float" : "integer";
13271 const STRLEN keylen = floatit ? 5 : 7;
13272 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13273 key, keylen, sv, NULL, NULL, 0);
13274 }
378cc40b 13275 break;
0b7fceb9 13276
e312add1 13277 /* if it starts with a v, it could be a v-string */
a7cb1f99 13278 case 'v':
dd629d5b 13279vstring:
561b68a9 13280 sv = newSV(5); /* preallocate storage space */
65b06e02 13281 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13282 break;
79072805 13283 }
a687059c 13284
02aa26ce
NT
13285 /* make the op for the constant and return */
13286
a86a20aa 13287 if (sv)
b73d6f50 13288 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13289 else
5f66b61c 13290 lvalp->opval = NULL;
a687059c 13291
73d840c0 13292 return (char *)s;
378cc40b
LW
13293}
13294
76e3520e 13295STATIC char *
cea2e8a9 13296S_scan_formline(pTHX_ register char *s)
378cc40b 13297{
97aff369 13298 dVAR;
79072805 13299 register char *eol;
378cc40b 13300 register char *t;
6136c704 13301 SV * const stuff = newSVpvs("");
79072805 13302 bool needargs = FALSE;
c5ee2135 13303 bool eofmt = FALSE;
5db06880
NC
13304#ifdef PERL_MAD
13305 char *tokenstart = s;
4f61fd4b
JC
13306 SV* savewhite = NULL;
13307
5db06880 13308 if (PL_madskills) {
cd81e915
NC
13309 savewhite = PL_thiswhite;
13310 PL_thiswhite = 0;
5db06880
NC
13311 }
13312#endif
378cc40b 13313
7918f24d
NC
13314 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13315
79072805 13316 while (!needargs) {
a1b95068 13317 if (*s == '.') {
c35e046a 13318 t = s+1;
51882d45 13319#ifdef PERL_STRICT_CR
c35e046a
AL
13320 while (SPACE_OR_TAB(*t))
13321 t++;
51882d45 13322#else
c35e046a
AL
13323 while (SPACE_OR_TAB(*t) || *t == '\r')
13324 t++;
51882d45 13325#endif
c5ee2135
WL
13326 if (*t == '\n' || t == PL_bufend) {
13327 eofmt = TRUE;
79072805 13328 break;
c5ee2135 13329 }
79072805 13330 }
3280af22 13331 if (PL_in_eval && !PL_rsfp) {
07409e01 13332 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13333 if (!eol++)
3280af22 13334 eol = PL_bufend;
0f85fab0
LW
13335 }
13336 else
3280af22 13337 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13338 if (*s != '#') {
a0d0e21e
LW
13339 for (t = s; t < eol; t++) {
13340 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13341 needargs = FALSE;
13342 goto enough; /* ~~ must be first line in formline */
378cc40b 13343 }
a0d0e21e
LW
13344 if (*t == '@' || *t == '^')
13345 needargs = TRUE;
378cc40b 13346 }
7121b347
MG
13347 if (eol > s) {
13348 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13349#ifndef PERL_STRICT_CR
7121b347
MG
13350 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13351 char *end = SvPVX(stuff) + SvCUR(stuff);
13352 end[-2] = '\n';
13353 end[-1] = '\0';
b162af07 13354 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13355 }
2dc4c65b 13356#endif
7121b347
MG
13357 }
13358 else
13359 break;
79072805 13360 }
95a20fc0 13361 s = (char*)eol;
3280af22 13362 if (PL_rsfp) {
f0e67a1d 13363 bool got_some;
5db06880
NC
13364#ifdef PERL_MAD
13365 if (PL_madskills) {
cd81e915
NC
13366 if (PL_thistoken)
13367 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13368 else
cd81e915 13369 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13370 }
13371#endif
f0e67a1d
Z
13372 PL_bufptr = PL_bufend;
13373 CopLINE_inc(PL_curcop);
13374 got_some = lex_next_chunk(0);
13375 CopLINE_dec(PL_curcop);
13376 s = PL_bufptr;
5db06880 13377#ifdef PERL_MAD
f0e67a1d 13378 tokenstart = PL_bufptr;
5db06880 13379#endif
f0e67a1d 13380 if (!got_some)
378cc40b 13381 break;
378cc40b 13382 }
463ee0b2 13383 incline(s);
79072805 13384 }
a0d0e21e
LW
13385 enough:
13386 if (SvCUR(stuff)) {
3280af22 13387 PL_expect = XTERM;
79072805 13388 if (needargs) {
3280af22 13389 PL_lex_state = LEX_NORMAL;
cd81e915 13390 start_force(PL_curforce);
9ded7720 13391 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13392 force_next(',');
13393 }
a0d0e21e 13394 else
3280af22 13395 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13396 if (!IN_BYTES) {
95a20fc0 13397 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13398 SvUTF8_on(stuff);
13399 else if (PL_encoding)
13400 sv_recode_to_utf8(stuff, PL_encoding);
13401 }
cd81e915 13402 start_force(PL_curforce);
9ded7720 13403 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13404 force_next(THING);
cd81e915 13405 start_force(PL_curforce);
9ded7720 13406 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13407 force_next(LSTOP);
378cc40b 13408 }
79072805 13409 else {
8990e307 13410 SvREFCNT_dec(stuff);
c5ee2135
WL
13411 if (eofmt)
13412 PL_lex_formbrack = 0;
3280af22 13413 PL_bufptr = s;
79072805 13414 }
5db06880
NC
13415#ifdef PERL_MAD
13416 if (PL_madskills) {
cd81e915
NC
13417 if (PL_thistoken)
13418 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13419 else
cd81e915
NC
13420 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13421 PL_thiswhite = savewhite;
5db06880
NC
13422 }
13423#endif
79072805 13424 return s;
378cc40b 13425}
a687059c 13426
ba6d6ac9 13427I32
864dbfa3 13428Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13429{
97aff369 13430 dVAR;
a3b680e6 13431 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13432 CV* const outsidecv = PL_compcv;
8990e307 13433
3280af22
NIS
13434 if (PL_compcv) {
13435 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13436 }
7766f137 13437 SAVEI32(PL_subline);
3280af22 13438 save_item(PL_subname);
3280af22 13439 SAVESPTR(PL_compcv);
3280af22 13440
ea726b52 13441 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13442 CvFLAGS(PL_compcv) |= flags;
13443
57843af0 13444 PL_subline = CopLINE(PL_curcop);
dd2155a4 13445 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13446 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13447 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13448
8990e307
LW
13449 return oldsavestack_ix;
13450}
13451
084592ab
CN
13452#ifdef __SC__
13453#pragma segment Perl_yylex
13454#endif
af41e527
NC
13455static int
13456S_yywarn(pTHX_ const char *const s)
8990e307 13457{
97aff369 13458 dVAR;
7918f24d
NC
13459
13460 PERL_ARGS_ASSERT_YYWARN;
13461
faef0170 13462 PL_in_eval |= EVAL_WARNONLY;
748a9306 13463 yyerror(s);
faef0170 13464 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13465 return 0;
8990e307
LW
13466}
13467
13468int
15f169a1 13469Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13470{
97aff369 13471 dVAR;
bfed75c6
AL
13472 const char *where = NULL;
13473 const char *context = NULL;
68dc0745 13474 int contlen = -1;
46fc3d4c 13475 SV *msg;
5912531f 13476 int yychar = PL_parser->yychar;
463ee0b2 13477
7918f24d
NC
13478 PERL_ARGS_ASSERT_YYERROR;
13479
3280af22 13480 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13481 where = "at EOF";
8bcfe651
TM
13482 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13483 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13484 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13485 /*
13486 Only for NetWare:
13487 The code below is removed for NetWare because it abends/crashes on NetWare
13488 when the script has error such as not having the closing quotes like:
13489 if ($var eq "value)
13490 Checking of white spaces is anyway done in NetWare code.
13491 */
13492#ifndef NETWARE
3280af22
NIS
13493 while (isSPACE(*PL_oldoldbufptr))
13494 PL_oldoldbufptr++;
f355267c 13495#endif
3280af22
NIS
13496 context = PL_oldoldbufptr;
13497 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13498 }
8bcfe651
TM
13499 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13500 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13501 /*
13502 Only for NetWare:
13503 The code below is removed for NetWare because it abends/crashes on NetWare
13504 when the script has error such as not having the closing quotes like:
13505 if ($var eq "value)
13506 Checking of white spaces is anyway done in NetWare code.
13507 */
13508#ifndef NETWARE
3280af22
NIS
13509 while (isSPACE(*PL_oldbufptr))
13510 PL_oldbufptr++;
f355267c 13511#endif
3280af22
NIS
13512 context = PL_oldbufptr;
13513 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13514 }
13515 else if (yychar > 255)
68dc0745 13516 where = "next token ???";
12fbd33b 13517 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13518 if (PL_lex_state == LEX_NORMAL ||
13519 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13520 where = "at end of line";
3280af22 13521 else if (PL_lex_inpat)
68dc0745 13522 where = "within pattern";
463ee0b2 13523 else
68dc0745 13524 where = "within string";
463ee0b2 13525 }
46fc3d4c 13526 else {
84bafc02 13527 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13528 if (yychar < 32)
cea2e8a9 13529 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13530 else if (isPRINT_LC(yychar)) {
88c9ea1e 13531 const char string = yychar;
5e7aa789
NC
13532 sv_catpvn(where_sv, &string, 1);
13533 }
463ee0b2 13534 else
cea2e8a9 13535 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13536 where = SvPVX_const(where_sv);
463ee0b2 13537 }
46fc3d4c 13538 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13539 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13540 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13541 if (context)
cea2e8a9 13542 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13543 else
cea2e8a9 13544 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13545 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13546 Perl_sv_catpvf(aTHX_ msg,
57def98f 13547 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13548 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13549 PL_multi_end = 0;
a0d0e21e 13550 }
500960a6 13551 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13552 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13553 }
463ee0b2 13554 else
5a844595 13555 qerror(msg);
c7d6bfb2
GS
13556 if (PL_error_count >= 10) {
13557 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13558 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13559 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13560 else
13561 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13562 OutCopFILE(PL_curcop));
c7d6bfb2 13563 }
3280af22 13564 PL_in_my = 0;
5c284bb0 13565 PL_in_my_stash = NULL;
463ee0b2
LW
13566 return 0;
13567}
084592ab
CN
13568#ifdef __SC__
13569#pragma segment Main
13570#endif
4e35701f 13571
b250498f 13572STATIC char*
3ae08724 13573S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13574{
97aff369 13575 dVAR;
f54cb97a 13576 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13577
13578 PERL_ARGS_ASSERT_SWALLOW_BOM;
13579
7aa207d6 13580 switch (s[0]) {
4e553d73
NIS
13581 case 0xFF:
13582 if (s[1] == 0xFE) {
ee6ba15d 13583 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13584 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13585 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13586#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13587 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13588 s += 2;
dea0fc0b 13589 if (PL_bufend > (char*)s) {
81a923f4 13590 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13591 }
b250498f 13592#else
ee6ba15d 13593 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13594#endif
01ec43d0
GS
13595 }
13596 break;
78ae23f5 13597 case 0xFE:
7aa207d6 13598 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13599#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13600 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13601 s += 2;
13602 if (PL_bufend > (char *)s) {
81a923f4 13603 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13604 }
b250498f 13605#else
ee6ba15d 13606 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13607#endif
01ec43d0
GS
13608 }
13609 break;
3ae08724
GS
13610 case 0xEF:
13611 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13612 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13613 s += 3; /* UTF-8 */
13614 }
13615 break;
13616 case 0:
7aa207d6
JH
13617 if (slen > 3) {
13618 if (s[1] == 0) {
13619 if (s[2] == 0xFE && s[3] == 0xFF) {
13620 /* UTF-32 big-endian */
ee6ba15d 13621 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13622 }
13623 }
13624 else if (s[2] == 0 && s[3] != 0) {
13625 /* Leading bytes
13626 * 00 xx 00 xx
13627 * are a good indicator of UTF-16BE. */
ee6ba15d 13628#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13629 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13630 s = add_utf16_textfilter(s, FALSE);
13631#else
13632 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13633#endif
7aa207d6 13634 }
01ec43d0 13635 }
e294cc5d
JH
13636#ifdef EBCDIC
13637 case 0xDD:
13638 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13639 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13640 s += 4; /* UTF-8 */
13641 }
13642 break;
13643#endif
13644
7aa207d6
JH
13645 default:
13646 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13647 /* Leading bytes
13648 * xx 00 xx 00
13649 * are a good indicator of UTF-16LE. */
ee6ba15d 13650#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13651 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13652 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13653#else
13654 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13655#endif
7aa207d6 13656 }
01ec43d0 13657 }
b8f84bb2 13658 return (char*)s;
b250498f 13659}
4755096e 13660
6e3aabd6
GS
13661
13662#ifndef PERL_NO_UTF16_FILTER
13663static I32
a28af015 13664S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13665{
97aff369 13666 dVAR;
f3040f2c 13667 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13668 /* We re-use this each time round, throwing the contents away before we
13669 return. */
2a773401 13670 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13671 SV *const utf8_buffer = filter;
c28d6105 13672 IV status = IoPAGE(filter);
f2338a2e 13673 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13674 I32 retval;
c8b0cbae 13675
c85ae797
NC
13676 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13677
c8b0cbae
NC
13678 /* As we're automatically added, at the lowest level, and hence only called
13679 from this file, we can be sure that we're not called in block mode. Hence
13680 don't bother writing code to deal with block mode. */
13681 if (maxlen) {
13682 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13683 }
c28d6105
NC
13684 if (status < 0) {
13685 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13686 }
1de9afcd 13687 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13688 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13689 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13690 reverse ? 'l' : 'b', idx, maxlen, status,
13691 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13692
13693 while (1) {
13694 STRLEN chars;
13695 STRLEN have;
dea0fc0b 13696 I32 newlen;
2a773401 13697 U8 *end;
c28d6105
NC
13698 /* First, look in our buffer of existing UTF-8 data: */
13699 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13700
13701 if (nl) {
13702 ++nl;
13703 } else if (status == 0) {
13704 /* EOF */
13705 IoPAGE(filter) = 0;
13706 nl = SvEND(utf8_buffer);
13707 }
13708 if (nl) {
d2d1d4de
NC
13709 STRLEN got = nl - SvPVX(utf8_buffer);
13710 /* Did we have anything to append? */
13711 retval = got != 0;
13712 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13713 /* Everything else in this code works just fine if SVp_POK isn't
13714 set. This, however, needs it, and we need it to work, else
13715 we loop infinitely because the buffer is never consumed. */
13716 sv_chop(utf8_buffer, nl);
13717 break;
13718 }
ba77e4cc 13719
c28d6105
NC
13720 /* OK, not a complete line there, so need to read some more UTF-16.
13721 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13722 while (1) {
13723 if (status <= 0)
13724 break;
13725 if (SvCUR(utf16_buffer) >= 2) {
13726 /* Location of the high octet of the last complete code point.
13727 Gosh, UTF-16 is a pain. All the benefits of variable length,
13728 *coupled* with all the benefits of partial reads and
13729 endianness. */
13730 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13731 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13732
13733 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13734 break;
13735 }
13736
13737 /* We have the first half of a surrogate. Read more. */
13738 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13739 }
c28d6105 13740
c28d6105
NC
13741 status = FILTER_READ(idx + 1, utf16_buffer,
13742 160 + (SvCUR(utf16_buffer) & 1));
13743 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13744 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13745 if (status < 0) {
13746 /* Error */
13747 IoPAGE(filter) = status;
13748 return status;
13749 }
13750 }
13751
13752 chars = SvCUR(utf16_buffer) >> 1;
13753 have = SvCUR(utf8_buffer);
13754 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13755
aa6dbd60 13756 if (reverse) {
c28d6105
NC
13757 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13758 (U8*)SvPVX_const(utf8_buffer) + have,
13759 chars * 2, &newlen);
aa6dbd60 13760 } else {
2a773401 13761 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13762 (U8*)SvPVX_const(utf8_buffer) + have,
13763 chars * 2, &newlen);
2a773401 13764 }
c28d6105 13765 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13766 *end = '\0';
c28d6105 13767
e07286ed
NC
13768 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13769 it's private to us, and utf16_to_utf8{,reversed} take a
13770 (pointer,length) pair, rather than a NUL-terminated string. */
13771 if(SvCUR(utf16_buffer) & 1) {
13772 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13773 SvCUR_set(utf16_buffer, 1);
13774 } else {
13775 SvCUR_set(utf16_buffer, 0);
13776 }
2a773401 13777 }
c28d6105
NC
13778 DEBUG_P(PerlIO_printf(Perl_debug_log,
13779 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13780 status,
13781 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13782 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13783 return retval;
6e3aabd6 13784}
81a923f4
NC
13785
13786static U8 *
13787S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13788{
2a773401 13789 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13790
c85ae797
NC
13791 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13792
c28d6105 13793 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13794 sv_setpvs(filter, "");
2a773401 13795 IoLINES(filter) = reversed;
c28d6105
NC
13796 IoPAGE(filter) = 1; /* Not EOF */
13797
13798 /* Sadly, we have to return a valid pointer, come what may, so we have to
13799 ignore any error return from this. */
13800 SvCUR_set(PL_linestr, 0);
13801 if (FILTER_READ(0, PL_linestr, 0)) {
13802 SvUTF8_on(PL_linestr);
81a923f4 13803 } else {
c28d6105 13804 SvUTF8_on(PL_linestr);
81a923f4 13805 }
c28d6105 13806 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13807 return (U8*)SvPVX(PL_linestr);
13808}
6e3aabd6 13809#endif
9f4817db 13810
f333445c
JP
13811/*
13812Returns a pointer to the next character after the parsed
13813vstring, as well as updating the passed in sv.
13814
13815Function must be called like
13816
561b68a9 13817 sv = newSV(5);
65b06e02 13818 s = scan_vstring(s,e,sv);
f333445c 13819
65b06e02 13820where s and e are the start and end of the string.
f333445c
JP
13821The sv should already be large enough to store the vstring
13822passed in, for performance reasons.
13823
13824*/
13825
13826char *
15f169a1 13827Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13828{
97aff369 13829 dVAR;
bfed75c6
AL
13830 const char *pos = s;
13831 const char *start = s;
7918f24d
NC
13832
13833 PERL_ARGS_ASSERT_SCAN_VSTRING;
13834
f333445c 13835 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13836 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13837 pos++;
f333445c
JP
13838 if ( *pos != '.') {
13839 /* this may not be a v-string if followed by => */
bfed75c6 13840 const char *next = pos;
65b06e02 13841 while (next < e && isSPACE(*next))
8fc7bb1c 13842 ++next;
65b06e02 13843 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13844 /* return string not v-string */
13845 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13846 return (char *)pos;
f333445c
JP
13847 }
13848 }
13849
13850 if (!isALPHA(*pos)) {
89ebb4a3 13851 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13852
d4c19fe8
AL
13853 if (*s == 'v')
13854 s++; /* get past 'v' */
f333445c 13855
76f68e9b 13856 sv_setpvs(sv, "");
f333445c
JP
13857
13858 for (;;) {
d4c19fe8 13859 /* this is atoi() that tolerates underscores */
0bd48802
AL
13860 U8 *tmpend;
13861 UV rev = 0;
d4c19fe8
AL
13862 const char *end = pos;
13863 UV mult = 1;
13864 while (--end >= s) {
13865 if (*end != '_') {
13866 const UV orev = rev;
f333445c
JP
13867 rev += (*end - '0') * mult;
13868 mult *= 10;
9b387841
NC
13869 if (orev > rev)
13870 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13871 "Integer overflow in decimal number");
f333445c
JP
13872 }
13873 }
13874#ifdef EBCDIC
13875 if (rev > 0x7FFFFFFF)
13876 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13877#endif
13878 /* Append native character for the rev point */
13879 tmpend = uvchr_to_utf8(tmpbuf, rev);
13880 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13881 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13882 SvUTF8_on(sv);
65b06e02 13883 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13884 s = ++pos;
13885 else {
13886 s = pos;
13887 break;
13888 }
65b06e02 13889 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13890 pos++;
13891 }
13892 SvPOK_on(sv);
13893 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13894 SvRMAGICAL_on(sv);
13895 }
73d840c0 13896 return (char *)s;
f333445c
JP
13897}
13898
88e1f1a2
JV
13899int
13900Perl_keyword_plugin_standard(pTHX_
13901 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13902{
13903 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13904 PERL_UNUSED_CONTEXT;
13905 PERL_UNUSED_ARG(keyword_ptr);
13906 PERL_UNUSED_ARG(keyword_len);
13907 PERL_UNUSED_ARG(op_ptr);
13908 return KEYWORD_PLUGIN_DECLINE;
13909}
13910
1da4ca5f
NC
13911/*
13912 * Local variables:
13913 * c-indentation-style: bsd
13914 * c-basic-offset: 4
13915 * indent-tabs-mode: t
13916 * End:
13917 *
37442d52
RGS
13918 * ex: set ts=8 sts=4 sw=4 noet:
13919 */