This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
Moduule::Build test tweaks for VMS.
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
10efb74f 717 } else if (SvREADONLY(line) || s[len-1] != ';') {
bdc0bf6f 718 parser->linestr = newSVsv(line);
10efb74f 719 if (s[len-1] != ';')
bdc0bf6f 720 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
721 } else {
722 SvTEMP_off(line);
723 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 724 parser->linestr = line;
8990e307 725 }
f06b5848
DM
726 parser->oldoldbufptr =
727 parser->oldbufptr =
728 parser->bufptr =
729 parser->linestart = SvPVX(parser->linestr);
730 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
731 parser->last_lop = parser->last_uni = NULL;
79072805 732}
a687059c 733
e3abe207
DM
734
735/* delete a parser object */
736
737void
738Perl_parser_free(pTHX_ const yy_parser *parser)
739{
7918f24d
NC
740 PERL_ARGS_ASSERT_PARSER_FREE;
741
7c4baf47 742 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
743 SvREFCNT_dec(parser->linestr);
744
2f9285f8
DM
745 if (parser->rsfp == PerlIO_stdin())
746 PerlIO_clearerr(parser->rsfp);
799361c3
SH
747 else if (parser->rsfp && (!parser->old_parser ||
748 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 749 PerlIO_close(parser->rsfp);
5486870f 750 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 751
e3abe207
DM
752 Safefree(parser->stack);
753 Safefree(parser->lex_brackstack);
754 Safefree(parser->lex_casestack);
755 PL_parser = parser->old_parser;
756 Safefree(parser);
757}
758
759
ffb4593c
NT
760/*
761 * Perl_lex_end
9cbb5ea2
GS
762 * Finalizer for lexing operations. Must be called when the parser is
763 * done with the lexer.
ffb4593c
NT
764 */
765
463ee0b2 766void
864dbfa3 767Perl_lex_end(pTHX)
463ee0b2 768{
97aff369 769 dVAR;
3280af22 770 PL_doextract = FALSE;
463ee0b2
LW
771}
772
ffb4593c 773/*
f0e67a1d
Z
774=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
775
776Buffer scalar containing the chunk currently under consideration of the
777text currently being lexed. This is always a plain string scalar (for
778which C<SvPOK> is true). It is not intended to be used as a scalar by
779normal scalar means; instead refer to the buffer directly by the pointer
780variables described below.
781
782The lexer maintains various C<char*> pointers to things in the
783C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
784reallocated, all of these pointers must be updated. Don't attempt to
785do this manually, but rather use L</lex_grow_linestr> if you need to
786reallocate the buffer.
787
788The content of the text chunk in the buffer is commonly exactly one
789complete line of input, up to and including a newline terminator,
790but there are situations where it is otherwise. The octets of the
791buffer may be intended to be interpreted as either UTF-8 or Latin-1.
792The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
793flag on this scalar, which may disagree with it.
794
795For direct examination of the buffer, the variable
796L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
797lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
798of these pointers is usually preferable to examination of the scalar
799through normal scalar means.
800
801=for apidoc AmxU|char *|PL_parser-E<gt>bufend
802
803Direct pointer to the end of the chunk of text currently being lexed, the
804end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
805+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
806always located at the end of the buffer, and does not count as part of
807the buffer's contents.
808
809=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
810
811Points to the current position of lexing inside the lexer buffer.
812Characters around this point may be freely examined, within
813the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
814L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
815interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
816
817Lexing code (whether in the Perl core or not) moves this pointer past
818the characters that it consumes. It is also expected to perform some
819bookkeeping whenever a newline character is consumed. This movement
820can be more conveniently performed by the function L</lex_read_to>,
821which handles newlines appropriately.
822
823Interpretation of the buffer's octets can be abstracted out by
824using the slightly higher-level functions L</lex_peek_unichar> and
825L</lex_read_unichar>.
826
827=for apidoc AmxU|char *|PL_parser-E<gt>linestart
828
829Points to the start of the current line inside the lexer buffer.
830This is useful for indicating at which column an error occurred, and
831not much else. This must be updated by any lexing code that consumes
832a newline; the function L</lex_read_to> handles this detail.
833
834=cut
835*/
836
837/*
838=for apidoc Amx|bool|lex_bufutf8
839
840Indicates whether the octets in the lexer buffer
841(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
842of Unicode characters. If not, they should be interpreted as Latin-1
843characters. This is analogous to the C<SvUTF8> flag for scalars.
844
845In UTF-8 mode, it is not guaranteed that the lexer buffer actually
846contains valid UTF-8. Lexing code must be robust in the face of invalid
847encoding.
848
849The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
850is significant, but not the whole story regarding the input character
851encoding. Normally, when a file is being read, the scalar contains octets
852and its C<SvUTF8> flag is off, but the octets should be interpreted as
853UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
854however, the scalar may have the C<SvUTF8> flag on, and in this case its
855octets should be interpreted as UTF-8 unless the C<use bytes> pragma
856is in effect. This logic may change in the future; use this function
857instead of implementing the logic yourself.
858
859=cut
860*/
861
862bool
863Perl_lex_bufutf8(pTHX)
864{
865 return UTF;
866}
867
868/*
869=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
870
871Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
872at least I<len> octets (including terminating NUL). Returns a
873pointer to the reallocated buffer. This is necessary before making
874any direct modification of the buffer that would increase its length.
875L</lex_stuff_pvn> provides a more convenient way to insert text into
876the buffer.
877
878Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
879this function updates all of the lexer's variables that point directly
880into the buffer.
881
882=cut
883*/
884
885char *
886Perl_lex_grow_linestr(pTHX_ STRLEN len)
887{
888 SV *linestr;
889 char *buf;
890 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
891 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
892 linestr = PL_parser->linestr;
893 buf = SvPVX(linestr);
894 if (len <= SvLEN(linestr))
895 return buf;
896 bufend_pos = PL_parser->bufend - buf;
897 bufptr_pos = PL_parser->bufptr - buf;
898 oldbufptr_pos = PL_parser->oldbufptr - buf;
899 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
900 linestart_pos = PL_parser->linestart - buf;
901 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
902 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
903 buf = sv_grow(linestr, len);
904 PL_parser->bufend = buf + bufend_pos;
905 PL_parser->bufptr = buf + bufptr_pos;
906 PL_parser->oldbufptr = buf + oldbufptr_pos;
907 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
908 PL_parser->linestart = buf + linestart_pos;
909 if (PL_parser->last_uni)
910 PL_parser->last_uni = buf + last_uni_pos;
911 if (PL_parser->last_lop)
912 PL_parser->last_lop = buf + last_lop_pos;
913 return buf;
914}
915
916/*
917=for apidoc Amx|void|lex_stuff_pvn|char *pv|STRLEN len|U32 flags
918
919Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
920immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
921reallocating the buffer if necessary. This means that lexing code that
922runs later will see the characters as if they had appeared in the input.
923It is not recommended to do this as part of normal parsing, and most
924uses of this facility run the risk of the inserted characters being
925interpreted in an unintended manner.
926
927The string to be inserted is represented by I<len> octets starting
928at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
929according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
930The characters are recoded for the lexer buffer, according to how the
931buffer is currently being interpreted (L</lex_bufutf8>). If a string
932to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
933function is more convenient.
934
935=cut
936*/
937
938void
939Perl_lex_stuff_pvn(pTHX_ char *pv, STRLEN len, U32 flags)
940{
749123ff 941 dVAR;
f0e67a1d
Z
942 char *bufptr;
943 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
944 if (flags & ~(LEX_STUFF_UTF8))
945 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
946 if (UTF) {
947 if (flags & LEX_STUFF_UTF8) {
948 goto plain_copy;
949 } else {
950 STRLEN highhalf = 0;
951 char *p, *e = pv+len;
952 for (p = pv; p != e; p++)
953 highhalf += !!(((U8)*p) & 0x80);
954 if (!highhalf)
955 goto plain_copy;
956 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
957 bufptr = PL_parser->bufptr;
958 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
959 PL_parser->bufend += len+highhalf;
960 for (p = pv; p != e; p++) {
961 U8 c = (U8)*p;
962 if (c & 0x80) {
963 *bufptr++ = (char)(0xc0 | (c >> 6));
964 *bufptr++ = (char)(0x80 | (c & 0x3f));
965 } else {
966 *bufptr++ = (char)c;
967 }
968 }
969 }
970 } else {
971 if (flags & LEX_STUFF_UTF8) {
972 STRLEN highhalf = 0;
973 char *p, *e = pv+len;
974 for (p = pv; p != e; p++) {
975 U8 c = (U8)*p;
976 if (c >= 0xc4) {
977 Perl_croak(aTHX_ "Lexing code attempted to stuff "
978 "non-Latin-1 character into Latin-1 input");
979 } else if (c >= 0xc2 && p+1 != e &&
980 (((U8)p[1]) & 0xc0) == 0x80) {
981 p++;
982 highhalf++;
983 } else if (c >= 0x80) {
984 /* malformed UTF-8 */
985 ENTER;
986 SAVESPTR(PL_warnhook);
987 PL_warnhook = PERL_WARNHOOK_FATAL;
988 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
989 LEAVE;
990 }
991 }
992 if (!highhalf)
993 goto plain_copy;
994 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
995 bufptr = PL_parser->bufptr;
996 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
997 PL_parser->bufend += len-highhalf;
998 for (p = pv; p != e; p++) {
999 U8 c = (U8)*p;
1000 if (c & 0x80) {
1001 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1002 p++;
1003 } else {
1004 *bufptr++ = (char)c;
1005 }
1006 }
1007 } else {
1008 plain_copy:
1009 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1010 bufptr = PL_parser->bufptr;
1011 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
1012 PL_parser->bufend += len;
1013 Copy(pv, bufptr, len, char);
1014 }
1015 }
1016}
1017
1018/*
1019=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1020
1021Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1022immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1023reallocating the buffer if necessary. This means that lexing code that
1024runs later will see the characters as if they had appeared in the input.
1025It is not recommended to do this as part of normal parsing, and most
1026uses of this facility run the risk of the inserted characters being
1027interpreted in an unintended manner.
1028
1029The string to be inserted is the string value of I<sv>. The characters
1030are recoded for the lexer buffer, according to how the buffer is currently
1031being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1032not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1033need to construct a scalar.
1034
1035=cut
1036*/
1037
1038void
1039Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1040{
1041 char *pv;
1042 STRLEN len;
1043 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1044 if (flags)
1045 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1046 pv = SvPV(sv, len);
1047 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1048}
1049
1050/*
1051=for apidoc Amx|void|lex_unstuff|char *ptr
1052
1053Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1054I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1055This hides the discarded text from any lexing code that runs later,
1056as if the text had never appeared.
1057
1058This is not the normal way to consume lexed text. For that, use
1059L</lex_read_to>.
1060
1061=cut
1062*/
1063
1064void
1065Perl_lex_unstuff(pTHX_ char *ptr)
1066{
1067 char *buf, *bufend;
1068 STRLEN unstuff_len;
1069 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1070 buf = PL_parser->bufptr;
1071 if (ptr < buf)
1072 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1073 if (ptr == buf)
1074 return;
1075 bufend = PL_parser->bufend;
1076 if (ptr > bufend)
1077 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1078 unstuff_len = ptr - buf;
1079 Move(ptr, buf, bufend+1-ptr, char);
1080 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1081 PL_parser->bufend = bufend - unstuff_len;
1082}
1083
1084/*
1085=for apidoc Amx|void|lex_read_to|char *ptr
1086
1087Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1088to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1089performing the correct bookkeeping whenever a newline character is passed.
1090This is the normal way to consume lexed text.
1091
1092Interpretation of the buffer's octets can be abstracted out by
1093using the slightly higher-level functions L</lex_peek_unichar> and
1094L</lex_read_unichar>.
1095
1096=cut
1097*/
1098
1099void
1100Perl_lex_read_to(pTHX_ char *ptr)
1101{
1102 char *s;
1103 PERL_ARGS_ASSERT_LEX_READ_TO;
1104 s = PL_parser->bufptr;
1105 if (ptr < s || ptr > PL_parser->bufend)
1106 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1107 for (; s != ptr; s++)
1108 if (*s == '\n') {
1109 CopLINE_inc(PL_curcop);
1110 PL_parser->linestart = s+1;
1111 }
1112 PL_parser->bufptr = ptr;
1113}
1114
1115/*
1116=for apidoc Amx|void|lex_discard_to|char *ptr
1117
1118Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1119up to I<ptr>. The remaining content of the buffer will be moved, and
1120all pointers into the buffer updated appropriately. I<ptr> must not
1121be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1122it is not permitted to discard text that has yet to be lexed.
1123
1124Normally it is not necessarily to do this directly, because it suffices to
1125use the implicit discarding behaviour of L</lex_next_chunk> and things
1126based on it. However, if a token stretches across multiple lines,
1127and the lexing code has kept multiple lines of text in the buffer fof
1128that purpose, then after completion of the token it would be wise to
1129explicitly discard the now-unneeded earlier lines, to avoid future
1130multi-line tokens growing the buffer without bound.
1131
1132=cut
1133*/
1134
1135void
1136Perl_lex_discard_to(pTHX_ char *ptr)
1137{
1138 char *buf;
1139 STRLEN discard_len;
1140 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1141 buf = SvPVX(PL_parser->linestr);
1142 if (ptr < buf)
1143 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1144 if (ptr == buf)
1145 return;
1146 if (ptr > PL_parser->bufptr)
1147 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1148 discard_len = ptr - buf;
1149 if (PL_parser->oldbufptr < ptr)
1150 PL_parser->oldbufptr = ptr;
1151 if (PL_parser->oldoldbufptr < ptr)
1152 PL_parser->oldoldbufptr = ptr;
1153 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1154 PL_parser->last_uni = NULL;
1155 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1156 PL_parser->last_lop = NULL;
1157 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1158 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1159 PL_parser->bufend -= discard_len;
1160 PL_parser->bufptr -= discard_len;
1161 PL_parser->oldbufptr -= discard_len;
1162 PL_parser->oldoldbufptr -= discard_len;
1163 if (PL_parser->last_uni)
1164 PL_parser->last_uni -= discard_len;
1165 if (PL_parser->last_lop)
1166 PL_parser->last_lop -= discard_len;
1167}
1168
1169/*
1170=for apidoc Amx|bool|lex_next_chunk|U32 flags
1171
1172Reads in the next chunk of text to be lexed, appending it to
1173L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1174looked to the end of the current chunk and wants to know more. It is
1175usual, but not necessary, for lexing to have consumed the entirety of
1176the current chunk at this time.
1177
1178If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1179chunk (i.e., the current chunk has been entirely consumed), normally the
1180current chunk will be discarded at the same time that the new chunk is
1181read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1182will not be discarded. If the current chunk has not been entirely
1183consumed, then it will not be discarded regardless of the flag.
1184
1185Returns true if some new text was added to the buffer, or false if the
1186buffer has reached the end of the input text.
1187
1188=cut
1189*/
1190
1191#define LEX_FAKE_EOF 0x80000000
1192
1193bool
1194Perl_lex_next_chunk(pTHX_ U32 flags)
1195{
1196 SV *linestr;
1197 char *buf;
1198 STRLEN old_bufend_pos, new_bufend_pos;
1199 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1200 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1201 bool got_some_for_debugger = 0;
f0e67a1d
Z
1202 bool got_some;
1203 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1204 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1205 linestr = PL_parser->linestr;
1206 buf = SvPVX(linestr);
1207 if (!(flags & LEX_KEEP_PREVIOUS) &&
1208 PL_parser->bufptr == PL_parser->bufend) {
1209 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1210 linestart_pos = 0;
1211 if (PL_parser->last_uni != PL_parser->bufend)
1212 PL_parser->last_uni = NULL;
1213 if (PL_parser->last_lop != PL_parser->bufend)
1214 PL_parser->last_lop = NULL;
1215 last_uni_pos = last_lop_pos = 0;
1216 *buf = 0;
1217 SvCUR(linestr) = 0;
1218 } else {
1219 old_bufend_pos = PL_parser->bufend - buf;
1220 bufptr_pos = PL_parser->bufptr - buf;
1221 oldbufptr_pos = PL_parser->oldbufptr - buf;
1222 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1223 linestart_pos = PL_parser->linestart - buf;
1224 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1225 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1226 }
1227 if (flags & LEX_FAKE_EOF) {
1228 goto eof;
1229 } else if (!PL_parser->rsfp) {
1230 got_some = 0;
1231 } else if (filter_gets(linestr, old_bufend_pos)) {
1232 got_some = 1;
17cc9359 1233 got_some_for_debugger = 1;
f0e67a1d 1234 } else {
580561a3
Z
1235 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1236 sv_setpvs(linestr, "");
f0e67a1d
Z
1237 eof:
1238 /* End of real input. Close filehandle (unless it was STDIN),
1239 * then add implicit termination.
1240 */
1241 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1242 PerlIO_clearerr(PL_parser->rsfp);
1243 else if (PL_parser->rsfp)
1244 (void)PerlIO_close(PL_parser->rsfp);
1245 PL_parser->rsfp = NULL;
1246 PL_doextract = FALSE;
1247#ifdef PERL_MAD
1248 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1249 PL_faketokens = 1;
1250#endif
1251 if (!PL_in_eval && PL_minus_p) {
1252 sv_catpvs(linestr,
1253 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1254 PL_minus_n = PL_minus_p = 0;
1255 } else if (!PL_in_eval && PL_minus_n) {
1256 sv_catpvs(linestr, /*{*/";}");
1257 PL_minus_n = 0;
1258 } else
1259 sv_catpvs(linestr, ";");
1260 got_some = 1;
1261 }
1262 buf = SvPVX(linestr);
1263 new_bufend_pos = SvCUR(linestr);
1264 PL_parser->bufend = buf + new_bufend_pos;
1265 PL_parser->bufptr = buf + bufptr_pos;
1266 PL_parser->oldbufptr = buf + oldbufptr_pos;
1267 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1268 PL_parser->linestart = buf + linestart_pos;
1269 if (PL_parser->last_uni)
1270 PL_parser->last_uni = buf + last_uni_pos;
1271 if (PL_parser->last_lop)
1272 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1273 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1274 PL_curstash != PL_debstash) {
1275 /* debugger active and we're not compiling the debugger code,
1276 * so store the line into the debugger's array of lines
1277 */
1278 update_debugger_info(NULL, buf+old_bufend_pos,
1279 new_bufend_pos-old_bufend_pos);
1280 }
1281 return got_some;
1282}
1283
1284/*
1285=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1286
1287Looks ahead one (Unicode) character in the text currently being lexed.
1288Returns the codepoint (unsigned integer value) of the next character,
1289or -1 if lexing has reached the end of the input text. To consume the
1290peeked character, use L</lex_read_unichar>.
1291
1292If the next character is in (or extends into) the next chunk of input
1293text, the next chunk will be read in. Normally the current chunk will be
1294discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1295then the current chunk will not be discarded.
1296
1297If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1298is encountered, an exception is generated.
1299
1300=cut
1301*/
1302
1303I32
1304Perl_lex_peek_unichar(pTHX_ U32 flags)
1305{
749123ff 1306 dVAR;
f0e67a1d
Z
1307 char *s, *bufend;
1308 if (flags & ~(LEX_KEEP_PREVIOUS))
1309 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1310 s = PL_parser->bufptr;
1311 bufend = PL_parser->bufend;
1312 if (UTF) {
1313 U8 head;
1314 I32 unichar;
1315 STRLEN len, retlen;
1316 if (s == bufend) {
1317 if (!lex_next_chunk(flags))
1318 return -1;
1319 s = PL_parser->bufptr;
1320 bufend = PL_parser->bufend;
1321 }
1322 head = (U8)*s;
1323 if (!(head & 0x80))
1324 return head;
1325 if (head & 0x40) {
1326 len = PL_utf8skip[head];
1327 while ((STRLEN)(bufend-s) < len) {
1328 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1329 break;
1330 s = PL_parser->bufptr;
1331 bufend = PL_parser->bufend;
1332 }
1333 }
1334 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1335 if (retlen == (STRLEN)-1) {
1336 /* malformed UTF-8 */
1337 ENTER;
1338 SAVESPTR(PL_warnhook);
1339 PL_warnhook = PERL_WARNHOOK_FATAL;
1340 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1341 LEAVE;
1342 }
1343 return unichar;
1344 } else {
1345 if (s == bufend) {
1346 if (!lex_next_chunk(flags))
1347 return -1;
1348 s = PL_parser->bufptr;
1349 }
1350 return (U8)*s;
1351 }
1352}
1353
1354/*
1355=for apidoc Amx|I32|lex_read_unichar|U32 flags
1356
1357Reads the next (Unicode) character in the text currently being lexed.
1358Returns the codepoint (unsigned integer value) of the character read,
1359and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1360if lexing has reached the end of the input text. To non-destructively
1361examine the next character, use L</lex_peek_unichar> instead.
1362
1363If the next character is in (or extends into) the next chunk of input
1364text, the next chunk will be read in. Normally the current chunk will be
1365discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1366then the current chunk will not be discarded.
1367
1368If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1369is encountered, an exception is generated.
1370
1371=cut
1372*/
1373
1374I32
1375Perl_lex_read_unichar(pTHX_ U32 flags)
1376{
1377 I32 c;
1378 if (flags & ~(LEX_KEEP_PREVIOUS))
1379 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1380 c = lex_peek_unichar(flags);
1381 if (c != -1) {
1382 if (c == '\n')
1383 CopLINE_inc(PL_curcop);
1384 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1385 }
1386 return c;
1387}
1388
1389/*
1390=for apidoc Amx|void|lex_read_space|U32 flags
1391
1392Reads optional spaces, in Perl style, in the text currently being
1393lexed. The spaces may include ordinary whitespace characters and
1394Perl-style comments. C<#line> directives are processed if encountered.
1395L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1396at a non-space character (or the end of the input text).
1397
1398If spaces extend into the next chunk of input text, the next chunk will
1399be read in. Normally the current chunk will be discarded at the same
1400time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1401chunk will not be discarded.
1402
1403=cut
1404*/
1405
f0998909
Z
1406#define LEX_NO_NEXT_CHUNK 0x80000000
1407
f0e67a1d
Z
1408void
1409Perl_lex_read_space(pTHX_ U32 flags)
1410{
1411 char *s, *bufend;
1412 bool need_incline = 0;
f0998909 1413 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1414 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1415#ifdef PERL_MAD
1416 if (PL_skipwhite) {
1417 sv_free(PL_skipwhite);
1418 PL_skipwhite = NULL;
1419 }
1420 if (PL_madskills)
1421 PL_skipwhite = newSVpvs("");
1422#endif /* PERL_MAD */
1423 s = PL_parser->bufptr;
1424 bufend = PL_parser->bufend;
1425 while (1) {
1426 char c = *s;
1427 if (c == '#') {
1428 do {
1429 c = *++s;
1430 } while (!(c == '\n' || (c == 0 && s == bufend)));
1431 } else if (c == '\n') {
1432 s++;
1433 PL_parser->linestart = s;
1434 if (s == bufend)
1435 need_incline = 1;
1436 else
1437 incline(s);
1438 } else if (isSPACE(c)) {
1439 s++;
1440 } else if (c == 0 && s == bufend) {
1441 bool got_more;
1442#ifdef PERL_MAD
1443 if (PL_madskills)
1444 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1445#endif /* PERL_MAD */
f0998909
Z
1446 if (flags & LEX_NO_NEXT_CHUNK)
1447 break;
f0e67a1d
Z
1448 PL_parser->bufptr = s;
1449 CopLINE_inc(PL_curcop);
1450 got_more = lex_next_chunk(flags);
1451 CopLINE_dec(PL_curcop);
1452 s = PL_parser->bufptr;
1453 bufend = PL_parser->bufend;
1454 if (!got_more)
1455 break;
1456 if (need_incline && PL_parser->rsfp) {
1457 incline(s);
1458 need_incline = 0;
1459 }
1460 } else {
1461 break;
1462 }
1463 }
1464#ifdef PERL_MAD
1465 if (PL_madskills)
1466 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1467#endif /* PERL_MAD */
1468 PL_parser->bufptr = s;
1469}
1470
1471/*
ffb4593c
NT
1472 * S_incline
1473 * This subroutine has nothing to do with tilting, whether at windmills
1474 * or pinball tables. Its name is short for "increment line". It
57843af0 1475 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1476 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1477 * # line 500 "foo.pm"
1478 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1479 */
1480
76e3520e 1481STATIC void
d9095cec 1482S_incline(pTHX_ const char *s)
463ee0b2 1483{
97aff369 1484 dVAR;
d9095cec
NC
1485 const char *t;
1486 const char *n;
1487 const char *e;
463ee0b2 1488
7918f24d
NC
1489 PERL_ARGS_ASSERT_INCLINE;
1490
57843af0 1491 CopLINE_inc(PL_curcop);
463ee0b2
LW
1492 if (*s++ != '#')
1493 return;
d4c19fe8
AL
1494 while (SPACE_OR_TAB(*s))
1495 s++;
73659bf1
GS
1496 if (strnEQ(s, "line", 4))
1497 s += 4;
1498 else
1499 return;
084592ab 1500 if (SPACE_OR_TAB(*s))
73659bf1 1501 s++;
4e553d73 1502 else
73659bf1 1503 return;
d4c19fe8
AL
1504 while (SPACE_OR_TAB(*s))
1505 s++;
463ee0b2
LW
1506 if (!isDIGIT(*s))
1507 return;
d4c19fe8 1508
463ee0b2
LW
1509 n = s;
1510 while (isDIGIT(*s))
1511 s++;
07714eb4 1512 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1513 return;
bf4acbe4 1514 while (SPACE_OR_TAB(*s))
463ee0b2 1515 s++;
73659bf1 1516 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1517 s++;
73659bf1
GS
1518 e = t + 1;
1519 }
463ee0b2 1520 else {
c35e046a
AL
1521 t = s;
1522 while (!isSPACE(*t))
1523 t++;
73659bf1 1524 e = t;
463ee0b2 1525 }
bf4acbe4 1526 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1527 e++;
1528 if (*e != '\n' && *e != '\0')
1529 return; /* false alarm */
1530
f4dd75d9 1531 if (t - s > 0) {
d9095cec 1532 const STRLEN len = t - s;
8a5ee598 1533#ifndef USE_ITHREADS
19bad673
NC
1534 SV *const temp_sv = CopFILESV(PL_curcop);
1535 const char *cf;
1536 STRLEN tmplen;
1537
1538 if (temp_sv) {
1539 cf = SvPVX(temp_sv);
1540 tmplen = SvCUR(temp_sv);
1541 } else {
1542 cf = NULL;
1543 tmplen = 0;
1544 }
1545
42d9b98d 1546 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1547 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1548 * to *{"::_<newfilename"} */
44867030
NC
1549 /* However, the long form of evals is only turned on by the
1550 debugger - usually they're "(eval %lu)" */
1551 char smallbuf[128];
1552 char *tmpbuf;
1553 GV **gvp;
d9095cec 1554 STRLEN tmplen2 = len;
798b63bc 1555 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1556 tmpbuf = smallbuf;
1557 else
2ae0db35 1558 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1559 tmpbuf[0] = '_';
1560 tmpbuf[1] = '<';
2ae0db35 1561 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1562 tmplen += 2;
8a5ee598
RGS
1563 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1564 if (gvp) {
44867030
NC
1565 char *tmpbuf2;
1566 GV *gv2;
1567
1568 if (tmplen2 + 2 <= sizeof smallbuf)
1569 tmpbuf2 = smallbuf;
1570 else
1571 Newx(tmpbuf2, tmplen2 + 2, char);
1572
1573 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1574 /* Either they malloc'd it, or we malloc'd it,
1575 so no prefix is present in ours. */
1576 tmpbuf2[0] = '_';
1577 tmpbuf2[1] = '<';
1578 }
1579
1580 memcpy(tmpbuf2 + 2, s, tmplen2);
1581 tmplen2 += 2;
1582
8a5ee598 1583 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1584 if (!isGV(gv2)) {
8a5ee598 1585 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1586 /* adjust ${"::_<newfilename"} to store the new file name */
1587 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1588 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1589 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1590 }
44867030
NC
1591
1592 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1593 }
e66cf94c 1594 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1595 }
8a5ee598 1596#endif
05ec9bb3 1597 CopFILE_free(PL_curcop);
d9095cec 1598 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1599 }
57843af0 1600 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1601}
1602
29595ff2 1603#ifdef PERL_MAD
cd81e915 1604/* skip space before PL_thistoken */
29595ff2
NC
1605
1606STATIC char *
1607S_skipspace0(pTHX_ register char *s)
1608{
7918f24d
NC
1609 PERL_ARGS_ASSERT_SKIPSPACE0;
1610
29595ff2
NC
1611 s = skipspace(s);
1612 if (!PL_madskills)
1613 return s;
cd81e915
NC
1614 if (PL_skipwhite) {
1615 if (!PL_thiswhite)
6b29d1f5 1616 PL_thiswhite = newSVpvs("");
cd81e915
NC
1617 sv_catsv(PL_thiswhite, PL_skipwhite);
1618 sv_free(PL_skipwhite);
1619 PL_skipwhite = 0;
1620 }
1621 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1622 return s;
1623}
1624
cd81e915 1625/* skip space after PL_thistoken */
29595ff2
NC
1626
1627STATIC char *
1628S_skipspace1(pTHX_ register char *s)
1629{
d4c19fe8 1630 const char *start = s;
29595ff2
NC
1631 I32 startoff = start - SvPVX(PL_linestr);
1632
7918f24d
NC
1633 PERL_ARGS_ASSERT_SKIPSPACE1;
1634
29595ff2
NC
1635 s = skipspace(s);
1636 if (!PL_madskills)
1637 return s;
1638 start = SvPVX(PL_linestr) + startoff;
cd81e915 1639 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1640 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1641 PL_thistoken = newSVpvn(tstart, start - tstart);
1642 }
1643 PL_realtokenstart = -1;
1644 if (PL_skipwhite) {
1645 if (!PL_nextwhite)
6b29d1f5 1646 PL_nextwhite = newSVpvs("");
cd81e915
NC
1647 sv_catsv(PL_nextwhite, PL_skipwhite);
1648 sv_free(PL_skipwhite);
1649 PL_skipwhite = 0;
29595ff2
NC
1650 }
1651 return s;
1652}
1653
1654STATIC char *
1655S_skipspace2(pTHX_ register char *s, SV **svp)
1656{
c35e046a
AL
1657 char *start;
1658 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1659 const I32 startoff = s - SvPVX(PL_linestr);
1660
7918f24d
NC
1661 PERL_ARGS_ASSERT_SKIPSPACE2;
1662
29595ff2
NC
1663 s = skipspace(s);
1664 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1665 if (!PL_madskills || !svp)
1666 return s;
1667 start = SvPVX(PL_linestr) + startoff;
cd81e915 1668 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1669 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1670 PL_thistoken = newSVpvn(tstart, start - tstart);
1671 PL_realtokenstart = -1;
29595ff2 1672 }
cd81e915 1673 if (PL_skipwhite) {
29595ff2 1674 if (!*svp)
6b29d1f5 1675 *svp = newSVpvs("");
cd81e915
NC
1676 sv_setsv(*svp, PL_skipwhite);
1677 sv_free(PL_skipwhite);
1678 PL_skipwhite = 0;
29595ff2
NC
1679 }
1680
1681 return s;
1682}
1683#endif
1684
80a702cd 1685STATIC void
15f169a1 1686S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1687{
1688 AV *av = CopFILEAVx(PL_curcop);
1689 if (av) {
b9f83d2f 1690 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1691 if (orig_sv)
1692 sv_setsv(sv, orig_sv);
1693 else
1694 sv_setpvn(sv, buf, len);
80a702cd
RGS
1695 (void)SvIOK_on(sv);
1696 SvIV_set(sv, 0);
1697 av_store(av, (I32)CopLINE(PL_curcop), sv);
1698 }
1699}
1700
ffb4593c
NT
1701/*
1702 * S_skipspace
1703 * Called to gobble the appropriate amount and type of whitespace.
1704 * Skips comments as well.
1705 */
1706
76e3520e 1707STATIC char *
cea2e8a9 1708S_skipspace(pTHX_ register char *s)
a687059c 1709{
5db06880 1710#ifdef PERL_MAD
f0e67a1d
Z
1711 char *start = s;
1712#endif /* PERL_MAD */
7918f24d 1713 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1714#ifdef PERL_MAD
cd81e915
NC
1715 if (PL_skipwhite) {
1716 sv_free(PL_skipwhite);
f0e67a1d 1717 PL_skipwhite = NULL;
5db06880 1718 }
f0e67a1d 1719#endif /* PERL_MAD */
3280af22 1720 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1721 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1722 s++;
f0e67a1d
Z
1723 } else {
1724 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1725 PL_bufptr = s;
f0998909
Z
1726 lex_read_space(LEX_KEEP_PREVIOUS |
1727 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1728 LEX_NO_NEXT_CHUNK : 0));
3280af22 1729 s = PL_bufptr;
f0e67a1d
Z
1730 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1731 if (PL_linestart > PL_bufptr)
1732 PL_bufptr = PL_linestart;
1733 return s;
463ee0b2 1734 }
5db06880 1735#ifdef PERL_MAD
f0e67a1d
Z
1736 if (PL_madskills)
1737 PL_skipwhite = newSVpvn(start, s-start);
1738#endif /* PERL_MAD */
5db06880 1739 return s;
a687059c 1740}
378cc40b 1741
ffb4593c
NT
1742/*
1743 * S_check_uni
1744 * Check the unary operators to ensure there's no ambiguity in how they're
1745 * used. An ambiguous piece of code would be:
1746 * rand + 5
1747 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1748 * the +5 is its argument.
1749 */
1750
76e3520e 1751STATIC void
cea2e8a9 1752S_check_uni(pTHX)
ba106d47 1753{
97aff369 1754 dVAR;
d4c19fe8
AL
1755 const char *s;
1756 const char *t;
2f3197b3 1757
3280af22 1758 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1759 return;
3280af22
NIS
1760 while (isSPACE(*PL_last_uni))
1761 PL_last_uni++;
c35e046a
AL
1762 s = PL_last_uni;
1763 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1764 s++;
3280af22 1765 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1766 return;
6136c704 1767
9b387841
NC
1768 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1769 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1770 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1771}
1772
ffb4593c
NT
1773/*
1774 * LOP : macro to build a list operator. Its behaviour has been replaced
1775 * with a subroutine, S_lop() for which LOP is just another name.
1776 */
1777
a0d0e21e
LW
1778#define LOP(f,x) return lop(f,x,s)
1779
ffb4593c
NT
1780/*
1781 * S_lop
1782 * Build a list operator (or something that might be one). The rules:
1783 * - if we have a next token, then it's a list operator [why?]
1784 * - if the next thing is an opening paren, then it's a function
1785 * - else it's a list operator
1786 */
1787
76e3520e 1788STATIC I32
a0be28da 1789S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1790{
97aff369 1791 dVAR;
7918f24d
NC
1792
1793 PERL_ARGS_ASSERT_LOP;
1794
6154021b 1795 pl_yylval.ival = f;
35c8bce7 1796 CLINE;
3280af22
NIS
1797 PL_expect = x;
1798 PL_bufptr = s;
1799 PL_last_lop = PL_oldbufptr;
eb160463 1800 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1801#ifdef PERL_MAD
1802 if (PL_lasttoke)
1803 return REPORT(LSTOP);
1804#else
3280af22 1805 if (PL_nexttoke)
bbf60fe6 1806 return REPORT(LSTOP);
5db06880 1807#endif
79072805 1808 if (*s == '(')
bbf60fe6 1809 return REPORT(FUNC);
29595ff2 1810 s = PEEKSPACE(s);
79072805 1811 if (*s == '(')
bbf60fe6 1812 return REPORT(FUNC);
79072805 1813 else
bbf60fe6 1814 return REPORT(LSTOP);
79072805
LW
1815}
1816
5db06880
NC
1817#ifdef PERL_MAD
1818 /*
1819 * S_start_force
1820 * Sets up for an eventual force_next(). start_force(0) basically does
1821 * an unshift, while start_force(-1) does a push. yylex removes items
1822 * on the "pop" end.
1823 */
1824
1825STATIC void
1826S_start_force(pTHX_ int where)
1827{
1828 int i;
1829
cd81e915 1830 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1831 where = PL_lasttoke;
cd81e915
NC
1832 assert(PL_curforce < 0 || PL_curforce == where);
1833 if (PL_curforce != where) {
5db06880
NC
1834 for (i = PL_lasttoke; i > where; --i) {
1835 PL_nexttoke[i] = PL_nexttoke[i-1];
1836 }
1837 PL_lasttoke++;
1838 }
cd81e915 1839 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1840 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1841 PL_curforce = where;
1842 if (PL_nextwhite) {
5db06880 1843 if (PL_madskills)
6b29d1f5 1844 curmad('^', newSVpvs(""));
cd81e915 1845 CURMAD('_', PL_nextwhite);
5db06880
NC
1846 }
1847}
1848
1849STATIC void
1850S_curmad(pTHX_ char slot, SV *sv)
1851{
1852 MADPROP **where;
1853
1854 if (!sv)
1855 return;
cd81e915
NC
1856 if (PL_curforce < 0)
1857 where = &PL_thismad;
5db06880 1858 else
cd81e915 1859 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1860
cd81e915 1861 if (PL_faketokens)
76f68e9b 1862 sv_setpvs(sv, "");
5db06880
NC
1863 else {
1864 if (!IN_BYTES) {
1865 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1866 SvUTF8_on(sv);
1867 else if (PL_encoding) {
1868 sv_recode_to_utf8(sv, PL_encoding);
1869 }
1870 }
1871 }
1872
1873 /* keep a slot open for the head of the list? */
1874 if (slot != '_' && *where && (*where)->mad_key == '^') {
1875 (*where)->mad_key = slot;
daba3364 1876 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1877 (*where)->mad_val = (void*)sv;
1878 }
1879 else
1880 addmad(newMADsv(slot, sv), where, 0);
1881}
1882#else
b3f24c00
MHM
1883# define start_force(where) NOOP
1884# define curmad(slot, sv) NOOP
5db06880
NC
1885#endif
1886
ffb4593c
NT
1887/*
1888 * S_force_next
9cbb5ea2 1889 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1890 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1891 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1892 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1893 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1894 */
1895
4e553d73 1896STATIC void
cea2e8a9 1897S_force_next(pTHX_ I32 type)
79072805 1898{
97aff369 1899 dVAR;
704d4215
GG
1900#ifdef DEBUGGING
1901 if (DEBUG_T_TEST) {
1902 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1903 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1904 }
1905#endif
5db06880 1906#ifdef PERL_MAD
cd81e915 1907 if (PL_curforce < 0)
5db06880 1908 start_force(PL_lasttoke);
cd81e915 1909 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1910 if (PL_lex_state != LEX_KNOWNEXT)
1911 PL_lex_defer = PL_lex_state;
1912 PL_lex_state = LEX_KNOWNEXT;
1913 PL_lex_expect = PL_expect;
cd81e915 1914 PL_curforce = -1;
5db06880 1915#else
3280af22
NIS
1916 PL_nexttype[PL_nexttoke] = type;
1917 PL_nexttoke++;
1918 if (PL_lex_state != LEX_KNOWNEXT) {
1919 PL_lex_defer = PL_lex_state;
1920 PL_lex_expect = PL_expect;
1921 PL_lex_state = LEX_KNOWNEXT;
79072805 1922 }
5db06880 1923#endif
79072805
LW
1924}
1925
d0a148a6 1926STATIC SV *
15f169a1 1927S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1928{
97aff369 1929 dVAR;
740cce10 1930 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1931 !IN_BYTES
1932 && UTF
1933 && !is_ascii_string((const U8*)start, len)
740cce10 1934 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1935 return sv;
1936}
1937
ffb4593c
NT
1938/*
1939 * S_force_word
1940 * When the lexer knows the next thing is a word (for instance, it has
1941 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1942 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1943 * lookahead.
ffb4593c
NT
1944 *
1945 * Arguments:
b1b65b59 1946 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1947 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1948 * int check_keyword : if true, Perl checks to make sure the word isn't
1949 * a keyword (do this if the word is a label, e.g. goto FOO)
1950 * int allow_pack : if true, : characters will also be allowed (require,
1951 * use, etc. do this)
9cbb5ea2 1952 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1953 */
1954
76e3520e 1955STATIC char *
cea2e8a9 1956S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1957{
97aff369 1958 dVAR;
463ee0b2
LW
1959 register char *s;
1960 STRLEN len;
4e553d73 1961
7918f24d
NC
1962 PERL_ARGS_ASSERT_FORCE_WORD;
1963
29595ff2 1964 start = SKIPSPACE1(start);
463ee0b2 1965 s = start;
7e2040f0 1966 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1967 (allow_pack && *s == ':') ||
15f0808c 1968 (allow_initial_tick && *s == '\'') )
a0d0e21e 1969 {
3280af22 1970 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1971 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1972 return start;
cd81e915 1973 start_force(PL_curforce);
5db06880
NC
1974 if (PL_madskills)
1975 curmad('X', newSVpvn(start,s-start));
463ee0b2 1976 if (token == METHOD) {
29595ff2 1977 s = SKIPSPACE1(s);
463ee0b2 1978 if (*s == '(')
3280af22 1979 PL_expect = XTERM;
463ee0b2 1980 else {
3280af22 1981 PL_expect = XOPERATOR;
463ee0b2 1982 }
79072805 1983 }
e74e6b3d 1984 if (PL_madskills)
63575281 1985 curmad('g', newSVpvs( "forced" ));
9ded7720 1986 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1987 = (OP*)newSVOP(OP_CONST,0,
1988 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1989 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1990 force_next(token);
1991 }
1992 return s;
1993}
1994
ffb4593c
NT
1995/*
1996 * S_force_ident
9cbb5ea2 1997 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
1998 * text only contains the "foo" portion. The first argument is a pointer
1999 * to the "foo", and the second argument is the type symbol to prefix.
2000 * Forces the next token to be a "WORD".
9cbb5ea2 2001 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2002 */
2003
76e3520e 2004STATIC void
bfed75c6 2005S_force_ident(pTHX_ register const char *s, int kind)
79072805 2006{
97aff369 2007 dVAR;
7918f24d
NC
2008
2009 PERL_ARGS_ASSERT_FORCE_IDENT;
2010
c35e046a 2011 if (*s) {
90e5519e
NC
2012 const STRLEN len = strlen(s);
2013 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2014 start_force(PL_curforce);
9ded7720 2015 NEXTVAL_NEXTTOKE.opval = o;
79072805 2016 force_next(WORD);
748a9306 2017 if (kind) {
11343788 2018 o->op_private = OPpCONST_ENTERED;
55497cff 2019 /* XXX see note in pp_entereval() for why we forgo typo
2020 warnings if the symbol must be introduced in an eval.
2021 GSAR 96-10-12 */
90e5519e
NC
2022 gv_fetchpvn_flags(s, len,
2023 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2024 : GV_ADD,
2025 kind == '$' ? SVt_PV :
2026 kind == '@' ? SVt_PVAV :
2027 kind == '%' ? SVt_PVHV :
a0d0e21e 2028 SVt_PVGV
90e5519e 2029 );
748a9306 2030 }
79072805
LW
2031 }
2032}
2033
1571675a
GS
2034NV
2035Perl_str_to_version(pTHX_ SV *sv)
2036{
2037 NV retval = 0.0;
2038 NV nshift = 1.0;
2039 STRLEN len;
cfd0369c 2040 const char *start = SvPV_const(sv,len);
9d4ba2ae 2041 const char * const end = start + len;
504618e9 2042 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2043
2044 PERL_ARGS_ASSERT_STR_TO_VERSION;
2045
1571675a 2046 while (start < end) {
ba210ebe 2047 STRLEN skip;
1571675a
GS
2048 UV n;
2049 if (utf)
9041c2e3 2050 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2051 else {
2052 n = *(U8*)start;
2053 skip = 1;
2054 }
2055 retval += ((NV)n)/nshift;
2056 start += skip;
2057 nshift *= 1000;
2058 }
2059 return retval;
2060}
2061
4e553d73 2062/*
ffb4593c
NT
2063 * S_force_version
2064 * Forces the next token to be a version number.
e759cc13
RGS
2065 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2066 * and if "guessing" is TRUE, then no new token is created (and the caller
2067 * must use an alternative parsing method).
ffb4593c
NT
2068 */
2069
76e3520e 2070STATIC char *
e759cc13 2071S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2072{
97aff369 2073 dVAR;
5f66b61c 2074 OP *version = NULL;
44dcb63b 2075 char *d;
5db06880
NC
2076#ifdef PERL_MAD
2077 I32 startoff = s - SvPVX(PL_linestr);
2078#endif
89bfa8cd 2079
7918f24d
NC
2080 PERL_ARGS_ASSERT_FORCE_VERSION;
2081
29595ff2 2082 s = SKIPSPACE1(s);
89bfa8cd 2083
44dcb63b 2084 d = s;
dd629d5b 2085 if (*d == 'v')
44dcb63b 2086 d++;
44dcb63b 2087 if (isDIGIT(*d)) {
e759cc13
RGS
2088 while (isDIGIT(*d) || *d == '_' || *d == '.')
2089 d++;
5db06880
NC
2090#ifdef PERL_MAD
2091 if (PL_madskills) {
cd81e915 2092 start_force(PL_curforce);
5db06880
NC
2093 curmad('X', newSVpvn(s,d-s));
2094 }
2095#endif
9f3d182e 2096 if (*d == ';' || isSPACE(*d) || *d == '}' || !*d) {
dd629d5b 2097 SV *ver;
8d08d9ba
DG
2098#ifdef USE_LOCALE_NUMERIC
2099 char *loc = setlocale(LC_NUMERIC, "C");
2100#endif
6154021b 2101 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2102#ifdef USE_LOCALE_NUMERIC
2103 setlocale(LC_NUMERIC, loc);
2104#endif
6154021b 2105 version = pl_yylval.opval;
dd629d5b
GS
2106 ver = cSVOPx(version)->op_sv;
2107 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2108 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2109 SvNV_set(ver, str_to_version(ver));
1571675a 2110 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2111 }
89bfa8cd 2112 }
5db06880
NC
2113 else if (guessing) {
2114#ifdef PERL_MAD
2115 if (PL_madskills) {
cd81e915
NC
2116 sv_free(PL_nextwhite); /* let next token collect whitespace */
2117 PL_nextwhite = 0;
5db06880
NC
2118 s = SvPVX(PL_linestr) + startoff;
2119 }
2120#endif
e759cc13 2121 return s;
5db06880 2122 }
89bfa8cd 2123 }
2124
5db06880
NC
2125#ifdef PERL_MAD
2126 if (PL_madskills && !version) {
cd81e915
NC
2127 sv_free(PL_nextwhite); /* let next token collect whitespace */
2128 PL_nextwhite = 0;
5db06880
NC
2129 s = SvPVX(PL_linestr) + startoff;
2130 }
2131#endif
89bfa8cd 2132 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2133 start_force(PL_curforce);
9ded7720 2134 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2135 force_next(WORD);
89bfa8cd 2136
e759cc13 2137 return s;
89bfa8cd 2138}
2139
ffb4593c 2140/*
91152fc1
DG
2141 * S_force_strict_version
2142 * Forces the next token to be a version number using strict syntax rules.
2143 */
2144
2145STATIC char *
2146S_force_strict_version(pTHX_ char *s)
2147{
2148 dVAR;
2149 OP *version = NULL;
2150#ifdef PERL_MAD
2151 I32 startoff = s - SvPVX(PL_linestr);
2152#endif
2153 const char *errstr = NULL;
2154
2155 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2156
2157 while (isSPACE(*s)) /* leading whitespace */
2158 s++;
2159
2160 if (is_STRICT_VERSION(s,&errstr)) {
2161 SV *ver = newSV(0);
2162 s = (char *)scan_version(s, ver, 0);
2163 version = newSVOP(OP_CONST, 0, ver);
2164 }
2165 else if ( (*s != ';' && *s != '}' ) && (s = SKIPSPACE1(s), (*s != ';' && *s !='}' ))) {
2166 PL_bufptr = s;
2167 if (errstr)
2168 yyerror(errstr); /* version required */
2169 return s;
2170 }
2171
2172#ifdef PERL_MAD
2173 if (PL_madskills && !version) {
2174 sv_free(PL_nextwhite); /* let next token collect whitespace */
2175 PL_nextwhite = 0;
2176 s = SvPVX(PL_linestr) + startoff;
2177 }
2178#endif
2179 /* NOTE: The parser sees the package name and the VERSION swapped */
2180 start_force(PL_curforce);
2181 NEXTVAL_NEXTTOKE.opval = version;
2182 force_next(WORD);
2183
2184 return s;
2185}
2186
2187/*
ffb4593c
NT
2188 * S_tokeq
2189 * Tokenize a quoted string passed in as an SV. It finds the next
2190 * chunk, up to end of string or a backslash. It may make a new
2191 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2192 * turns \\ into \.
2193 */
2194
76e3520e 2195STATIC SV *
cea2e8a9 2196S_tokeq(pTHX_ SV *sv)
79072805 2197{
97aff369 2198 dVAR;
79072805
LW
2199 register char *s;
2200 register char *send;
2201 register char *d;
b3ac6de7
IZ
2202 STRLEN len = 0;
2203 SV *pv = sv;
79072805 2204
7918f24d
NC
2205 PERL_ARGS_ASSERT_TOKEQ;
2206
79072805 2207 if (!SvLEN(sv))
b3ac6de7 2208 goto finish;
79072805 2209
a0d0e21e 2210 s = SvPV_force(sv, len);
21a311ee 2211 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2212 goto finish;
463ee0b2 2213 send = s + len;
79072805
LW
2214 while (s < send && *s != '\\')
2215 s++;
2216 if (s == send)
b3ac6de7 2217 goto finish;
79072805 2218 d = s;
be4731d2 2219 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2220 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2221 }
79072805
LW
2222 while (s < send) {
2223 if (*s == '\\') {
a0d0e21e 2224 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2225 s++; /* all that, just for this */
2226 }
2227 *d++ = *s++;
2228 }
2229 *d = '\0';
95a20fc0 2230 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2231 finish:
3280af22 2232 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2233 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2234 return sv;
2235}
2236
ffb4593c
NT
2237/*
2238 * Now come three functions related to double-quote context,
2239 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2240 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2241 * interact with PL_lex_state, and create fake ( ... ) argument lists
2242 * to handle functions and concatenation.
2243 * They assume that whoever calls them will be setting up a fake
2244 * join call, because each subthing puts a ',' after it. This lets
2245 * "lower \luPpEr"
2246 * become
2247 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2248 *
2249 * (I'm not sure whether the spurious commas at the end of lcfirst's
2250 * arguments and join's arguments are created or not).
2251 */
2252
2253/*
2254 * S_sublex_start
6154021b 2255 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2256 *
2257 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2258 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2259 *
2260 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2261 *
2262 * Everything else becomes a FUNC.
2263 *
2264 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2265 * had an OP_CONST or OP_READLINE). This just sets us up for a
2266 * call to S_sublex_push().
2267 */
2268
76e3520e 2269STATIC I32
cea2e8a9 2270S_sublex_start(pTHX)
79072805 2271{
97aff369 2272 dVAR;
6154021b 2273 register const I32 op_type = pl_yylval.ival;
79072805
LW
2274
2275 if (op_type == OP_NULL) {
6154021b 2276 pl_yylval.opval = PL_lex_op;
5f66b61c 2277 PL_lex_op = NULL;
79072805
LW
2278 return THING;
2279 }
2280 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2281 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2282
2283 if (SvTYPE(sv) == SVt_PVIV) {
2284 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2285 STRLEN len;
96a5add6 2286 const char * const p = SvPV_const(sv, len);
740cce10 2287 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2288 SvREFCNT_dec(sv);
2289 sv = nsv;
4e553d73 2290 }
6154021b 2291 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2292 PL_lex_stuff = NULL;
6f33ba73
RGS
2293 /* Allow <FH> // "foo" */
2294 if (op_type == OP_READLINE)
2295 PL_expect = XTERMORDORDOR;
79072805
LW
2296 return THING;
2297 }
e3f73d4e
RGS
2298 else if (op_type == OP_BACKTICK && PL_lex_op) {
2299 /* readpipe() vas overriden */
2300 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2301 pl_yylval.opval = PL_lex_op;
9b201d7d 2302 PL_lex_op = NULL;
e3f73d4e
RGS
2303 PL_lex_stuff = NULL;
2304 return THING;
2305 }
79072805 2306
3280af22 2307 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2308 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2309 PL_sublex_info.sub_op = PL_lex_op;
2310 PL_lex_state = LEX_INTERPPUSH;
55497cff 2311
3280af22
NIS
2312 PL_expect = XTERM;
2313 if (PL_lex_op) {
6154021b 2314 pl_yylval.opval = PL_lex_op;
5f66b61c 2315 PL_lex_op = NULL;
55497cff 2316 return PMFUNC;
2317 }
2318 else
2319 return FUNC;
2320}
2321
ffb4593c
NT
2322/*
2323 * S_sublex_push
2324 * Create a new scope to save the lexing state. The scope will be
2325 * ended in S_sublex_done. Returns a '(', starting the function arguments
2326 * to the uc, lc, etc. found before.
2327 * Sets PL_lex_state to LEX_INTERPCONCAT.
2328 */
2329
76e3520e 2330STATIC I32
cea2e8a9 2331S_sublex_push(pTHX)
55497cff 2332{
27da23d5 2333 dVAR;
f46d017c 2334 ENTER;
55497cff 2335
3280af22 2336 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2337 SAVEBOOL(PL_lex_dojoin);
3280af22 2338 SAVEI32(PL_lex_brackets);
3280af22
NIS
2339 SAVEI32(PL_lex_casemods);
2340 SAVEI32(PL_lex_starts);
651b5b28 2341 SAVEI8(PL_lex_state);
7766f137 2342 SAVEVPTR(PL_lex_inpat);
98246f1e 2343 SAVEI16(PL_lex_inwhat);
57843af0 2344 SAVECOPLINE(PL_curcop);
3280af22 2345 SAVEPPTR(PL_bufptr);
8452ff4b 2346 SAVEPPTR(PL_bufend);
3280af22
NIS
2347 SAVEPPTR(PL_oldbufptr);
2348 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2349 SAVEPPTR(PL_last_lop);
2350 SAVEPPTR(PL_last_uni);
3280af22
NIS
2351 SAVEPPTR(PL_linestart);
2352 SAVESPTR(PL_linestr);
8edd5f42
RGS
2353 SAVEGENERICPV(PL_lex_brackstack);
2354 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2355
2356 PL_linestr = PL_lex_stuff;
a0714e2c 2357 PL_lex_stuff = NULL;
3280af22 2358
9cbb5ea2
GS
2359 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2360 = SvPVX(PL_linestr);
3280af22 2361 PL_bufend += SvCUR(PL_linestr);
bd61b366 2362 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2363 SAVEFREESV(PL_linestr);
2364
2365 PL_lex_dojoin = FALSE;
2366 PL_lex_brackets = 0;
a02a5408
JC
2367 Newx(PL_lex_brackstack, 120, char);
2368 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2369 PL_lex_casemods = 0;
2370 *PL_lex_casestack = '\0';
2371 PL_lex_starts = 0;
2372 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2373 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2374
2375 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2376 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2377 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2378 else
5f66b61c 2379 PL_lex_inpat = NULL;
79072805 2380
55497cff 2381 return '(';
79072805
LW
2382}
2383
ffb4593c
NT
2384/*
2385 * S_sublex_done
2386 * Restores lexer state after a S_sublex_push.
2387 */
2388
76e3520e 2389STATIC I32
cea2e8a9 2390S_sublex_done(pTHX)
79072805 2391{
27da23d5 2392 dVAR;
3280af22 2393 if (!PL_lex_starts++) {
396482e1 2394 SV * const sv = newSVpvs("");
9aa983d2
JH
2395 if (SvUTF8(PL_linestr))
2396 SvUTF8_on(sv);
3280af22 2397 PL_expect = XOPERATOR;
6154021b 2398 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2399 return THING;
2400 }
2401
3280af22
NIS
2402 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2403 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2404 return yylex();
79072805
LW
2405 }
2406
ffb4593c 2407 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2408 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2409 PL_linestr = PL_lex_repl;
2410 PL_lex_inpat = 0;
2411 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2412 PL_bufend += SvCUR(PL_linestr);
bd61b366 2413 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2414 SAVEFREESV(PL_linestr);
2415 PL_lex_dojoin = FALSE;
2416 PL_lex_brackets = 0;
3280af22
NIS
2417 PL_lex_casemods = 0;
2418 *PL_lex_casestack = '\0';
2419 PL_lex_starts = 0;
25da4f38 2420 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2421 PL_lex_state = LEX_INTERPNORMAL;
2422 PL_lex_starts++;
e9fa98b2
HS
2423 /* we don't clear PL_lex_repl here, so that we can check later
2424 whether this is an evalled subst; that means we rely on the
2425 logic to ensure sublex_done() is called again only via the
2426 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2427 }
e9fa98b2 2428 else {
3280af22 2429 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2430 PL_lex_repl = NULL;
e9fa98b2 2431 }
79072805 2432 return ',';
ffed7fef
LW
2433 }
2434 else {
5db06880
NC
2435#ifdef PERL_MAD
2436 if (PL_madskills) {
cd81e915
NC
2437 if (PL_thiswhite) {
2438 if (!PL_endwhite)
6b29d1f5 2439 PL_endwhite = newSVpvs("");
cd81e915
NC
2440 sv_catsv(PL_endwhite, PL_thiswhite);
2441 PL_thiswhite = 0;
2442 }
2443 if (PL_thistoken)
76f68e9b 2444 sv_setpvs(PL_thistoken,"");
5db06880 2445 else
cd81e915 2446 PL_realtokenstart = -1;
5db06880
NC
2447 }
2448#endif
f46d017c 2449 LEAVE;
3280af22
NIS
2450 PL_bufend = SvPVX(PL_linestr);
2451 PL_bufend += SvCUR(PL_linestr);
2452 PL_expect = XOPERATOR;
09bef843 2453 PL_sublex_info.sub_inwhat = 0;
79072805 2454 return ')';
ffed7fef
LW
2455 }
2456}
2457
02aa26ce
NT
2458/*
2459 scan_const
2460
2461 Extracts a pattern, double-quoted string, or transliteration. This
2462 is terrifying code.
2463
94def140 2464 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2465 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2466 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2467
94def140
TS
2468 Returns a pointer to the character scanned up to. If this is
2469 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2470 successfully parsed), will leave an OP for the substring scanned
6154021b 2471 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2472 by looking at the next characters herself.
2473
02aa26ce
NT
2474 In patterns:
2475 backslashes:
ff3f963a 2476 constants: \N{NAME} only
02aa26ce
NT
2477 case and quoting: \U \Q \E
2478 stops on @ and $, but not for $ as tail anchor
2479
2480 In transliterations:
2481 characters are VERY literal, except for - not at the start or end
94def140
TS
2482 of the string, which indicates a range. If the range is in bytes,
2483 scan_const expands the range to the full set of intermediate
2484 characters. If the range is in utf8, the hyphen is replaced with
2485 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2486
2487 In double-quoted strings:
2488 backslashes:
2489 double-quoted style: \r and \n
ff3f963a 2490 constants: \x31, etc.
94def140 2491 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2492 case and quoting: \U \Q \E
2493 stops on @ and $
2494
2495 scan_const does *not* construct ops to handle interpolated strings.
2496 It stops processing as soon as it finds an embedded $ or @ variable
2497 and leaves it to the caller to work out what's going on.
2498
94def140
TS
2499 embedded arrays (whether in pattern or not) could be:
2500 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2501
2502 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2503
2504 $ in pattern could be $foo or could be tail anchor. Assumption:
2505 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2506 followed by one of "()| \r\n\t"
02aa26ce
NT
2507
2508 \1 (backreferences) are turned into $1
2509
2510 The structure of the code is
2511 while (there's a character to process) {
94def140
TS
2512 handle transliteration ranges
2513 skip regexp comments /(?#comment)/ and codes /(?{code})/
2514 skip #-initiated comments in //x patterns
2515 check for embedded arrays
02aa26ce
NT
2516 check for embedded scalars
2517 if (backslash) {
94def140 2518 deprecate \1 in substitution replacements
02aa26ce
NT
2519 handle string-changing backslashes \l \U \Q \E, etc.
2520 switch (what was escaped) {
94def140 2521 handle \- in a transliteration (becomes a literal -)
ff3f963a 2522 if a pattern and not \N{, go treat as regular character
94def140
TS
2523 handle \132 (octal characters)
2524 handle \x15 and \x{1234} (hex characters)
ff3f963a 2525 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2526 handle \cV (control characters)
2527 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2528 } (end switch)
77a135fe 2529 continue
02aa26ce 2530 } (end if backslash)
77a135fe 2531 handle regular character
02aa26ce 2532 } (end while character to read)
4e553d73 2533
02aa26ce
NT
2534*/
2535
76e3520e 2536STATIC char *
cea2e8a9 2537S_scan_const(pTHX_ char *start)
79072805 2538{
97aff369 2539 dVAR;
3280af22 2540 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2541 SV *sv = newSV(send - start); /* sv for the constant. See
2542 note below on sizing. */
02aa26ce
NT
2543 register char *s = start; /* start of the constant */
2544 register char *d = SvPVX(sv); /* destination for copies */
2545 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2546 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2547 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2548 I32 this_utf8 = UTF; /* Is the source string assumed
2549 to be UTF8? But, this can
2550 show as true when the source
2551 isn't utf8, as for example
2552 when it is entirely composed
2553 of hex constants */
2554
2555 /* Note on sizing: The scanned constant is placed into sv, which is
2556 * initialized by newSV() assuming one byte of output for every byte of
2557 * input. This routine expects newSV() to allocate an extra byte for a
2558 * trailing NUL, which this routine will append if it gets to the end of
2559 * the input. There may be more bytes of input than output (eg., \N{LATIN
2560 * CAPITAL LETTER A}), or more output than input if the constant ends up
2561 * recoded to utf8, but each time a construct is found that might increase
2562 * the needed size, SvGROW() is called. Its size parameter each time is
2563 * based on the best guess estimate at the time, namely the length used so
2564 * far, plus the length the current construct will occupy, plus room for
2565 * the trailing NUL, plus one byte for every input byte still unscanned */
2566
012bcf8d 2567 UV uv;
4c3a8340
TS
2568#ifdef EBCDIC
2569 UV literal_endpoint = 0;
e294cc5d 2570 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2571#endif
012bcf8d 2572
7918f24d
NC
2573 PERL_ARGS_ASSERT_SCAN_CONST;
2574
2b9d42f0
NIS
2575 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2576 /* If we are doing a trans and we know we want UTF8 set expectation */
2577 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2578 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2579 }
2580
2581
79072805 2582 while (s < send || dorange) {
ff3f963a 2583
02aa26ce 2584 /* get transliterations out of the way (they're most literal) */
3280af22 2585 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2586 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2587 if (dorange) {
1ba5c669
JH
2588 I32 i; /* current expanded character */
2589 I32 min; /* first character in range */
2590 I32 max; /* last character in range */
02aa26ce 2591
e294cc5d
JH
2592#ifdef EBCDIC
2593 UV uvmax = 0;
2594#endif
2595
2596 if (has_utf8
2597#ifdef EBCDIC
2598 && !native_range
2599#endif
2600 ) {
9d4ba2ae 2601 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2602 char *e = d++;
2603 while (e-- > c)
2604 *(e + 1) = *e;
25716404 2605 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2606 /* mark the range as done, and continue */
2607 dorange = FALSE;
2608 didrange = TRUE;
2609 continue;
2610 }
2b9d42f0 2611
95a20fc0 2612 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2613#ifdef EBCDIC
2614 SvGROW(sv,
2615 SvLEN(sv) + (has_utf8 ?
2616 (512 - UTF_CONTINUATION_MARK +
2617 UNISKIP(0x100))
2618 : 256));
2619 /* How many two-byte within 0..255: 128 in UTF-8,
2620 * 96 in UTF-8-mod. */
2621#else
9cbb5ea2 2622 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2623#endif
9cbb5ea2 2624 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2625#ifdef EBCDIC
2626 if (has_utf8) {
2627 int j;
2628 for (j = 0; j <= 1; j++) {
2629 char * const c = (char*)utf8_hop((U8*)d, -1);
2630 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2631 if (j)
2632 min = (U8)uv;
2633 else if (uv < 256)
2634 max = (U8)uv;
2635 else {
2636 max = (U8)0xff; /* only to \xff */
2637 uvmax = uv; /* \x{100} to uvmax */
2638 }
2639 d = c; /* eat endpoint chars */
2640 }
2641 }
2642 else {
2643#endif
2644 d -= 2; /* eat the first char and the - */
2645 min = (U8)*d; /* first char in range */
2646 max = (U8)d[1]; /* last char in range */
2647#ifdef EBCDIC
2648 }
2649#endif
8ada0baa 2650
c2e66d9e 2651 if (min > max) {
01ec43d0 2652 Perl_croak(aTHX_
d1573ac7 2653 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2654 (char)min, (char)max);
c2e66d9e
GS
2655 }
2656
c7f1f016 2657#ifdef EBCDIC
4c3a8340
TS
2658 if (literal_endpoint == 2 &&
2659 ((isLOWER(min) && isLOWER(max)) ||
2660 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2661 if (isLOWER(min)) {
2662 for (i = min; i <= max; i++)
2663 if (isLOWER(i))
db42d148 2664 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2665 } else {
2666 for (i = min; i <= max; i++)
2667 if (isUPPER(i))
db42d148 2668 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2669 }
2670 }
2671 else
2672#endif
2673 for (i = min; i <= max; i++)
e294cc5d
JH
2674#ifdef EBCDIC
2675 if (has_utf8) {
2676 const U8 ch = (U8)NATIVE_TO_UTF(i);
2677 if (UNI_IS_INVARIANT(ch))
2678 *d++ = (U8)i;
2679 else {
2680 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2681 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2682 }
2683 }
2684 else
2685#endif
2686 *d++ = (char)i;
2687
2688#ifdef EBCDIC
2689 if (uvmax) {
2690 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2691 if (uvmax > 0x101)
2692 *d++ = (char)UTF_TO_NATIVE(0xff);
2693 if (uvmax > 0x100)
2694 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2695 }
2696#endif
02aa26ce
NT
2697
2698 /* mark the range as done, and continue */
79072805 2699 dorange = FALSE;
01ec43d0 2700 didrange = TRUE;
4c3a8340
TS
2701#ifdef EBCDIC
2702 literal_endpoint = 0;
2703#endif
79072805 2704 continue;
4e553d73 2705 }
02aa26ce
NT
2706
2707 /* range begins (ignore - as first or last char) */
79072805 2708 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2709 if (didrange) {
1fafa243 2710 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2711 }
e294cc5d
JH
2712 if (has_utf8
2713#ifdef EBCDIC
2714 && !native_range
2715#endif
2716 ) {
25716404 2717 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2718 s++;
2719 continue;
2720 }
79072805
LW
2721 dorange = TRUE;
2722 s++;
01ec43d0
GS
2723 }
2724 else {
2725 didrange = FALSE;
4c3a8340
TS
2726#ifdef EBCDIC
2727 literal_endpoint = 0;
e294cc5d 2728 native_range = TRUE;
4c3a8340 2729#endif
01ec43d0 2730 }
79072805 2731 }
02aa26ce
NT
2732
2733 /* if we get here, we're not doing a transliteration */
2734
0f5d15d6
IZ
2735 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2736 except for the last char, which will be done separately. */
3280af22 2737 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2738 if (s[2] == '#') {
e994fd66 2739 while (s+1 < send && *s != ')')
db42d148 2740 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2741 }
2742 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2743 || (s[2] == '?' && s[3] == '{'))
155aba94 2744 {
cc6b7395 2745 I32 count = 1;
0f5d15d6 2746 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2747 char c;
2748
d9f97599
GS
2749 while (count && (c = *regparse)) {
2750 if (c == '\\' && regparse[1])
2751 regparse++;
4e553d73 2752 else if (c == '{')
cc6b7395 2753 count++;
4e553d73 2754 else if (c == '}')
cc6b7395 2755 count--;
d9f97599 2756 regparse++;
cc6b7395 2757 }
e994fd66 2758 if (*regparse != ')')
5bdf89e7 2759 regparse--; /* Leave one char for continuation. */
0f5d15d6 2760 while (s < regparse)
db42d148 2761 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2762 }
748a9306 2763 }
02aa26ce
NT
2764
2765 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2766 else if (*s == '#' && PL_lex_inpat &&
2767 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2768 while (s+1 < send && *s != '\n')
db42d148 2769 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2770 }
02aa26ce 2771
5d1d4326 2772 /* check for embedded arrays
da6eedaa 2773 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2774 */
1749ea0d
TS
2775 else if (*s == '@' && s[1]) {
2776 if (isALNUM_lazy_if(s+1,UTF))
2777 break;
2778 if (strchr(":'{$", s[1]))
2779 break;
2780 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2781 break; /* in regexp, neither @+ nor @- are interpolated */
2782 }
02aa26ce
NT
2783
2784 /* check for embedded scalars. only stop if we're sure it's a
2785 variable.
2786 */
79072805 2787 else if (*s == '$') {
3280af22 2788 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2789 break;
77772344 2790 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2791 if (s[1] == '\\') {
2792 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2793 "Possible unintended interpolation of $\\ in regex");
77772344 2794 }
79072805 2795 break; /* in regexp, $ might be tail anchor */
77772344 2796 }
79072805 2797 }
02aa26ce 2798
2b9d42f0
NIS
2799 /* End of else if chain - OP_TRANS rejoin rest */
2800
02aa26ce 2801 /* backslashes */
79072805 2802 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2803 char* e; /* Can be used for ending '}', etc. */
2804
79072805 2805 s++;
02aa26ce 2806
02aa26ce 2807 /* deprecate \1 in strings and substitution replacements */
3280af22 2808 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2809 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2810 {
a2a5de95 2811 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2812 *--s = '$';
2813 break;
2814 }
02aa26ce
NT
2815
2816 /* string-change backslash escapes */
3280af22 2817 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2818 --s;
2819 break;
2820 }
ff3f963a
KW
2821 /* In a pattern, process \N, but skip any other backslash escapes.
2822 * This is because we don't want to translate an escape sequence
2823 * into a meta symbol and have the regex compiler use the meta
2824 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2825 * in spite of this, we do have to process \N here while the proper
2826 * charnames handler is in scope. See bugs #56444 and #62056.
2827 * There is a complication because \N in a pattern may also stand
2828 * for 'match a non-nl', and not mean a charname, in which case its
2829 * processing should be deferred to the regex compiler. To be a
2830 * charname it must be followed immediately by a '{', and not look
2831 * like \N followed by a curly quantifier, i.e., not something like
2832 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2833 * quantifier */
2834 else if (PL_lex_inpat
2835 && (*s != 'N'
2836 || s[1] != '{'
2837 || regcurly(s + 1)))
2838 {
cc74c5bd
TS
2839 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2840 goto default_action;
2841 }
02aa26ce 2842
79072805 2843 switch (*s) {
02aa26ce
NT
2844
2845 /* quoted - in transliterations */
79072805 2846 case '-':
3280af22 2847 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2848 *d++ = *s++;
2849 continue;
2850 }
2851 /* FALL THROUGH */
2852 default:
11b8faa4 2853 {
a2a5de95
NC
2854 if ((isALPHA(*s) || isDIGIT(*s)))
2855 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2856 "Unrecognized escape \\%c passed through",
2857 *s);
11b8faa4 2858 /* default action is to copy the quoted character */
f9a63242 2859 goto default_action;
11b8faa4 2860 }
02aa26ce 2861
77a135fe 2862 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2863 case '0': case '1': case '2': case '3':
2864 case '4': case '5': case '6': case '7':
ba210ebe 2865 {
53305cf1
NC
2866 I32 flags = 0;
2867 STRLEN len = 3;
77a135fe 2868 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2869 s += len;
2870 }
012bcf8d 2871 goto NUM_ESCAPE_INSERT;
02aa26ce 2872
77a135fe 2873 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2874 case 'x':
a0ed51b3
LW
2875 ++s;
2876 if (*s == '{') {
9d4ba2ae 2877 char* const e = strchr(s, '}');
a4c04bdc
NC
2878 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2879 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2880 STRLEN len;
355860ce 2881
53305cf1 2882 ++s;
adaeee49 2883 if (!e) {
a0ed51b3 2884 yyerror("Missing right brace on \\x{}");
355860ce 2885 continue;
ba210ebe 2886 }
53305cf1 2887 len = e - s;
77a135fe 2888 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2889 s = e + 1;
a0ed51b3
LW
2890 }
2891 else {
ba210ebe 2892 {
53305cf1 2893 STRLEN len = 2;
a4c04bdc 2894 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2895 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2896 s += len;
2897 }
012bcf8d
GS
2898 }
2899
2900 NUM_ESCAPE_INSERT:
ff3f963a
KW
2901 /* Insert oct or hex escaped character. There will always be
2902 * enough room in sv since such escapes will be longer than any
2903 * UTF-8 sequence they can end up as, except if they force us
2904 * to recode the rest of the string into utf8 */
ba7cea30 2905
77a135fe 2906 /* Here uv is the ordinal of the next character being added in
ff3f963a 2907 * unicode (converted from native). */
77a135fe 2908 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2909 if (!has_utf8 && uv > 255) {
77a135fe
KW
2910 /* Might need to recode whatever we have accumulated so
2911 * far if it contains any chars variant in utf8 or
2912 * utf-ebcdic. */
2913
2914 SvCUR_set(sv, d - SvPVX_const(sv));
2915 SvPOK_on(sv);
2916 *d = '\0';
77a135fe 2917 /* See Note on sizing above. */
7bf79863
KW
2918 sv_utf8_upgrade_flags_grow(sv,
2919 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2920 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2921 d = SvPVX(sv) + SvCUR(sv);
2922 has_utf8 = TRUE;
012bcf8d
GS
2923 }
2924
77a135fe
KW
2925 if (has_utf8) {
2926 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2927 if (PL_lex_inwhat == OP_TRANS &&
2928 PL_sublex_info.sub_op) {
2929 PL_sublex_info.sub_op->op_private |=
2930 (PL_lex_repl ? OPpTRANS_FROM_UTF
2931 : OPpTRANS_TO_UTF);
f9a63242 2932 }
e294cc5d
JH
2933#ifdef EBCDIC
2934 if (uv > 255 && !dorange)
2935 native_range = FALSE;
2936#endif
012bcf8d 2937 }
a0ed51b3 2938 else {
012bcf8d 2939 *d++ = (char)uv;
a0ed51b3 2940 }
012bcf8d
GS
2941 }
2942 else {
c4d5f83a 2943 *d++ = (char) uv;
a0ed51b3 2944 }
79072805 2945 continue;
02aa26ce 2946
4a2d328f 2947 case 'N':
ff3f963a
KW
2948 /* In a non-pattern \N must be a named character, like \N{LATIN
2949 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2950 * mean to match a non-newline. For non-patterns, named
2951 * characters are converted to their string equivalents. In
2952 * patterns, named characters are not converted to their
2953 * ultimate forms for the same reasons that other escapes
2954 * aren't. Instead, they are converted to the \N{U+...} form
2955 * to get the value from the charnames that is in effect right
2956 * now, while preserving the fact that it was a named character
2957 * so that the regex compiler knows this */
2958
2959 /* This section of code doesn't generally use the
2960 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2961 * a close examination of this macro and determined it is a
2962 * no-op except on utfebcdic variant characters. Every
2963 * character generated by this that would normally need to be
2964 * enclosed by this macro is invariant, so the macro is not
2965 * needed, and would complicate use of copy(). There are other
2966 * parts of this file where the macro is used inconsistently,
2967 * but are saved by it being a no-op */
2968
2969 /* The structure of this section of code (besides checking for
2970 * errors and upgrading to utf8) is:
2971 * Further disambiguate between the two meanings of \N, and if
2972 * not a charname, go process it elsewhere
0a96133f
KW
2973 * If of form \N{U+...}, pass it through if a pattern;
2974 * otherwise convert to utf8
2975 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2976 * pattern; otherwise convert to utf8 */
ff3f963a
KW
2977
2978 /* Here, s points to the 'N'; the test below is guaranteed to
2979 * succeed if we are being called on a pattern as we already
2980 * know from a test above that the next character is a '{'.
2981 * On a non-pattern \N must mean 'named sequence, which
2982 * requires braces */
2983 s++;
2984 if (*s != '{') {
2985 yyerror("Missing braces on \\N{}");
2986 continue;
2987 }
2988 s++;
2989
0a96133f 2990 /* If there is no matching '}', it is an error. */
ff3f963a
KW
2991 if (! (e = strchr(s, '}'))) {
2992 if (! PL_lex_inpat) {
5777a3f7 2993 yyerror("Missing right brace on \\N{}");
0a96133f
KW
2994 } else {
2995 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 2996 }
0a96133f 2997 continue;
ff3f963a 2998 }
cddc7ef4 2999
ff3f963a 3000 /* Here it looks like a named character */
cddc7ef4 3001
ff3f963a
KW
3002 if (PL_lex_inpat) {
3003
3004 /* XXX This block is temporary code. \N{} implies that the
3005 * pattern is to have Unicode semantics, and therefore
3006 * currently has to be encoded in utf8. By putting it in
3007 * utf8 now, we save a whole pass in the regular expression
3008 * compiler. Once that code is changed so Unicode
3009 * semantics doesn't necessarily have to be in utf8, this
3010 * block should be removed */
3011 if (!has_utf8) {
77a135fe 3012 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3013 SvPOK_on(sv);
e4f3eed8 3014 *d = '\0';
77a135fe 3015 /* See Note on sizing above. */
7bf79863 3016 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3017 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3018 /* 5 = '\N{' + cur char + NUL */
3019 (STRLEN)(send - s) + 5);
f08d6ad9 3020 d = SvPVX(sv) + SvCUR(sv);
89491803 3021 has_utf8 = TRUE;
ff3f963a
KW
3022 }
3023 }
423cee85 3024
ff3f963a
KW
3025 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3026 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3027 | PERL_SCAN_DISALLOW_PREFIX;
3028 STRLEN len;
3029
3030 /* For \N{U+...}, the '...' is a unicode value even on
3031 * EBCDIC machines */
3032 s += 2; /* Skip to next char after the 'U+' */
3033 len = e - s;
3034 uv = grok_hex(s, &len, &flags, NULL);
3035 if (len == 0 || len != (STRLEN)(e - s)) {
3036 yyerror("Invalid hexadecimal number in \\N{U+...}");
3037 s = e + 1;
3038 continue;
3039 }
3040
3041 if (PL_lex_inpat) {
3042
3043 /* Pass through to the regex compiler unchanged. The
3044 * reason we evaluated the number above is to make sure
0a96133f 3045 * there wasn't a syntax error. */
ff3f963a
KW
3046 s -= 5; /* Include the '\N{U+' */
3047 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3048 d += e - s + 1;
3049 }
3050 else { /* Not a pattern: convert the hex to string */
3051
3052 /* If destination is not in utf8, unconditionally
3053 * recode it to be so. This is because \N{} implies
3054 * Unicode semantics, and scalars have to be in utf8
3055 * to guarantee those semantics */
3056 if (! has_utf8) {
3057 SvCUR_set(sv, d - SvPVX_const(sv));
3058 SvPOK_on(sv);
3059 *d = '\0';
3060 /* See Note on sizing above. */
3061 sv_utf8_upgrade_flags_grow(
3062 sv,
3063 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3064 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3065 d = SvPVX(sv) + SvCUR(sv);
3066 has_utf8 = TRUE;
3067 }
3068
3069 /* Add the string to the output */
3070 if (UNI_IS_INVARIANT(uv)) {
3071 *d++ = (char) uv;
3072 }
3073 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3074 }
3075 }
3076 else { /* Here is \N{NAME} but not \N{U+...}. */
3077
3078 SV *res; /* result from charnames */
3079 const char *str; /* the string in 'res' */
3080 STRLEN len; /* its length */
3081
3082 /* Get the value for NAME */
3083 res = newSVpvn(s, e - s);
3084 res = new_constant( NULL, 0, "charnames",
3085 /* includes all of: \N{...} */
3086 res, NULL, s - 3, e - s + 4 );
3087
3088 /* Most likely res will be in utf8 already since the
3089 * standard charnames uses pack U, but a custom translator
3090 * can leave it otherwise, so make sure. XXX This can be
3091 * revisited to not have charnames use utf8 for characters
3092 * that don't need it when regexes don't have to be in utf8
3093 * for Unicode semantics. If doing so, remember EBCDIC */
3094 sv_utf8_upgrade(res);
3095 str = SvPV_const(res, len);
3096
3097 /* Don't accept malformed input */
3098 if (! is_utf8_string((U8 *) str, len)) {
3099 yyerror("Malformed UTF-8 returned by \\N");
3100 }
3101 else if (PL_lex_inpat) {
3102
3103 if (! len) { /* The name resolved to an empty string */
3104 Copy("\\N{}", d, 4, char);
3105 d += 4;
3106 }
3107 else {
3108 /* In order to not lose information for the regex
3109 * compiler, pass the result in the specially made
3110 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3111 * the code points in hex of each character
3112 * returned by charnames */
3113
3114 const char *str_end = str + len;
3115 STRLEN char_length; /* cur char's byte length */
3116 STRLEN output_length; /* and the number of bytes
3117 after this is translated
3118 into hex digits */
3119 const STRLEN off = d - SvPVX_const(sv);
3120
3121 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3122 * max('U+', '.'); and 1 for NUL */
3123 char hex_string[2 * UTF8_MAXBYTES + 5];
3124
3125 /* Get the first character of the result. */
3126 U32 uv = utf8n_to_uvuni((U8 *) str,
3127 len,
3128 &char_length,
3129 UTF8_ALLOW_ANYUV);
3130
3131 /* The call to is_utf8_string() above hopefully
3132 * guarantees that there won't be an error. But
3133 * it's easy here to make sure. The function just
3134 * above warns and returns 0 if invalid utf8, but
3135 * it can also return 0 if the input is validly a
3136 * NUL. Disambiguate */
3137 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3138 uv = UNICODE_REPLACEMENT;
3139 }
3140
3141 /* Convert first code point to hex, including the
3142 * boiler plate before it */
3143 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3144 output_length = strlen(hex_string);
3145
3146 /* Make sure there is enough space to hold it */
3147 d = off + SvGROW(sv, off
3148 + output_length
3149 + (STRLEN)(send - e)
3150 + 2); /* '}' + NUL */
3151 /* And output it */
3152 Copy(hex_string, d, output_length, char);
3153 d += output_length;
3154
3155 /* For each subsequent character, append dot and
3156 * its ordinal in hex */
3157 while ((str += char_length) < str_end) {
3158 const STRLEN off = d - SvPVX_const(sv);
3159 U32 uv = utf8n_to_uvuni((U8 *) str,
3160 str_end - str,
3161 &char_length,
3162 UTF8_ALLOW_ANYUV);
3163 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3164 uv = UNICODE_REPLACEMENT;
3165 }
3166
3167 sprintf(hex_string, ".%X", (unsigned int) uv);
3168 output_length = strlen(hex_string);
3169
3170 d = off + SvGROW(sv, off
3171 + output_length
3172 + (STRLEN)(send - e)
3173 + 2); /* '}' + NUL */
3174 Copy(hex_string, d, output_length, char);
3175 d += output_length;
3176 }
3177
3178 *d++ = '}'; /* Done. Add the trailing brace */
3179 }
3180 }
3181 else { /* Here, not in a pattern. Convert the name to a
3182 * string. */
3183
3184 /* If destination is not in utf8, unconditionally
3185 * recode it to be so. This is because \N{} implies
3186 * Unicode semantics, and scalars have to be in utf8
3187 * to guarantee those semantics */
3188 if (! has_utf8) {
3189 SvCUR_set(sv, d - SvPVX_const(sv));
3190 SvPOK_on(sv);
3191 *d = '\0';
3192 /* See Note on sizing above. */
3193 sv_utf8_upgrade_flags_grow(sv,
3194 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3195 len + (STRLEN)(send - s) + 1);
3196 d = SvPVX(sv) + SvCUR(sv);
3197 has_utf8 = TRUE;
3198 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3199
3200 /* See Note on sizing above. (NOTE: SvCUR() is not
3201 * set correctly here). */
3202 const STRLEN off = d - SvPVX_const(sv);
3203 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3204 }
3205 Copy(str, d, len, char);
3206 d += len;
423cee85 3207 }
423cee85 3208 SvREFCNT_dec(res);
cb233ae3
KW
3209
3210 /* Deprecate non-approved name syntax */
3211 if (ckWARN_d(WARN_DEPRECATED)) {
3212 bool problematic = FALSE;
3213 char* i = s;
3214
3215 /* For non-ut8 input, look to see that the first
3216 * character is an alpha, then loop through the rest
3217 * checking that each is a continuation */
3218 if (! this_utf8) {
3219 if (! isALPHAU(*i)) problematic = TRUE;
3220 else for (i = s + 1; i < e; i++) {
3221 if (isCHARNAME_CONT(*i)) continue;
3222 problematic = TRUE;
3223 break;
3224 }
3225 }
3226 else {
3227 /* Similarly for utf8. For invariants can check
3228 * directly. We accept anything above the latin1
3229 * range because it is immaterial to Perl if it is
3230 * correct or not, and is expensive to check. But
3231 * it is fairly easy in the latin1 range to convert
3232 * the variants into a single character and check
3233 * those */
3234 if (UTF8_IS_INVARIANT(*i)) {
3235 if (! isALPHAU(*i)) problematic = TRUE;
3236 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3237 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3238 *(i+1)))))
3239 {
3240 problematic = TRUE;
3241 }
3242 }
3243 if (! problematic) for (i = s + UTF8SKIP(s);
3244 i < e;
3245 i+= UTF8SKIP(i))
3246 {
3247 if (UTF8_IS_INVARIANT(*i)) {
3248 if (isCHARNAME_CONT(*i)) continue;
3249 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3250 continue;
3251 } else if (isCHARNAME_CONT(
3252 UNI_TO_NATIVE(
3253 UTF8_ACCUMULATE(*i, *(i+1)))))
3254 {
3255 continue;
3256 }
3257 problematic = TRUE;
3258 break;
3259 }
3260 }
3261 if (problematic) {
3262 char *string;
3263 Newx(string, e - i + 1, char);
3264 Copy(i, string, e - i, char);
3265 string[e - i] = '\0';
3266 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
3267 "Deprecated character(s) in \\N{...} starting at '%s'",
3268 string);
3269 Safefree(string);
3270 }
3271 }
3272 } /* End \N{NAME} */
ff3f963a
KW
3273#ifdef EBCDIC
3274 if (!dorange)
3275 native_range = FALSE; /* \N{} is defined to be Unicode */
3276#endif
3277 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3278 continue;
3279
02aa26ce 3280 /* \c is a control character */
79072805
LW
3281 case 'c':
3282 s++;
961ce445 3283 if (s < send) {
ba210ebe 3284 U8 c = *s++;
c7f1f016
NIS
3285#ifdef EBCDIC
3286 if (isLOWER(c))
3287 c = toUPPER(c);
3288#endif
db42d148 3289 *d++ = NATIVE_TO_NEED(has_utf8,toCTRL(c));
ba210ebe 3290 }
961ce445
RGS
3291 else {
3292 yyerror("Missing control char name in \\c");
3293 }
79072805 3294 continue;
02aa26ce
NT
3295
3296 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3297 case 'b':
db42d148 3298 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3299 break;
3300 case 'n':
db42d148 3301 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3302 break;
3303 case 'r':
db42d148 3304 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3305 break;
3306 case 'f':
db42d148 3307 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3308 break;
3309 case 't':
db42d148 3310 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3311 break;
34a3fe2a 3312 case 'e':
db42d148 3313 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3314 break;
3315 case 'a':
db42d148 3316 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3317 break;
02aa26ce
NT
3318 } /* end switch */
3319
79072805
LW
3320 s++;
3321 continue;
02aa26ce 3322 } /* end if (backslash) */
4c3a8340
TS
3323#ifdef EBCDIC
3324 else
3325 literal_endpoint++;
3326#endif
02aa26ce 3327
f9a63242 3328 default_action:
77a135fe
KW
3329 /* If we started with encoded form, or already know we want it,
3330 then encode the next character */
3331 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3332 STRLEN len = 1;
77a135fe
KW
3333
3334
3335 /* One might think that it is wasted effort in the case of the
3336 * source being utf8 (this_utf8 == TRUE) to take the next character
3337 * in the source, convert it to an unsigned value, and then convert
3338 * it back again. But the source has not been validated here. The
3339 * routine that does the conversion checks for errors like
3340 * malformed utf8 */
3341
5f66b61c
AL
3342 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3343 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3344 if (!has_utf8) {
3345 SvCUR_set(sv, d - SvPVX_const(sv));
3346 SvPOK_on(sv);
3347 *d = '\0';
77a135fe 3348 /* See Note on sizing above. */
7bf79863
KW
3349 sv_utf8_upgrade_flags_grow(sv,
3350 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3351 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3352 d = SvPVX(sv) + SvCUR(sv);
3353 has_utf8 = TRUE;
3354 } else if (need > len) {
3355 /* encoded value larger than old, may need extra space (NOTE:
3356 * SvCUR() is not set correctly here). See Note on sizing
3357 * above. */
9d4ba2ae 3358 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3359 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3360 }
77a135fe
KW
3361 s += len;
3362
5f66b61c 3363 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3364#ifdef EBCDIC
3365 if (uv > 255 && !dorange)
3366 native_range = FALSE;
3367#endif
2b9d42f0
NIS
3368 }
3369 else {
3370 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3371 }
02aa26ce
NT
3372 } /* while loop to process each character */
3373
3374 /* terminate the string and set up the sv */
79072805 3375 *d = '\0';
95a20fc0 3376 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3377 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3378 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3379
79072805 3380 SvPOK_on(sv);
9f4817db 3381 if (PL_encoding && !has_utf8) {
d0063567
DK
3382 sv_recode_to_utf8(sv, PL_encoding);
3383 if (SvUTF8(sv))
3384 has_utf8 = TRUE;
9f4817db 3385 }
2b9d42f0 3386 if (has_utf8) {
7e2040f0 3387 SvUTF8_on(sv);
2b9d42f0 3388 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3389 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3390 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3391 }
3392 }
79072805 3393
02aa26ce 3394 /* shrink the sv if we allocated more than we used */
79072805 3395 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3396 SvPV_shrink_to_cur(sv);
79072805 3397 }
02aa26ce 3398
6154021b 3399 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3400 if (s > PL_bufptr) {
eb0d8d16
NC
3401 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3402 const char *const key = PL_lex_inpat ? "qr" : "q";
3403 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3404 const char *type;
3405 STRLEN typelen;
3406
3407 if (PL_lex_inwhat == OP_TRANS) {
3408 type = "tr";
3409 typelen = 2;
3410 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3411 type = "s";
3412 typelen = 1;
3413 } else {
3414 type = "qq";
3415 typelen = 2;
3416 }
3417
3418 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3419 type, typelen);
3420 }
6154021b 3421 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3422 } else
8990e307 3423 SvREFCNT_dec(sv);
79072805
LW
3424 return s;
3425}
3426
ffb4593c
NT
3427/* S_intuit_more
3428 * Returns TRUE if there's more to the expression (e.g., a subscript),
3429 * FALSE otherwise.
ffb4593c
NT
3430 *
3431 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3432 *
3433 * ->[ and ->{ return TRUE
3434 * { and [ outside a pattern are always subscripts, so return TRUE
3435 * if we're outside a pattern and it's not { or [, then return FALSE
3436 * if we're in a pattern and the first char is a {
3437 * {4,5} (any digits around the comma) returns FALSE
3438 * if we're in a pattern and the first char is a [
3439 * [] returns FALSE
3440 * [SOMETHING] has a funky algorithm to decide whether it's a
3441 * character class or not. It has to deal with things like
3442 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3443 * anything else returns TRUE
3444 */
3445
9cbb5ea2
GS
3446/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3447
76e3520e 3448STATIC int
cea2e8a9 3449S_intuit_more(pTHX_ register char *s)
79072805 3450{
97aff369 3451 dVAR;
7918f24d
NC
3452
3453 PERL_ARGS_ASSERT_INTUIT_MORE;
3454
3280af22 3455 if (PL_lex_brackets)
79072805
LW
3456 return TRUE;
3457 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3458 return TRUE;
3459 if (*s != '{' && *s != '[')
3460 return FALSE;
3280af22 3461 if (!PL_lex_inpat)
79072805
LW
3462 return TRUE;
3463
3464 /* In a pattern, so maybe we have {n,m}. */
3465 if (*s == '{') {
3466 s++;
3467 if (!isDIGIT(*s))
3468 return TRUE;
3469 while (isDIGIT(*s))
3470 s++;
3471 if (*s == ',')
3472 s++;
3473 while (isDIGIT(*s))
3474 s++;
3475 if (*s == '}')
3476 return FALSE;
3477 return TRUE;
3478
3479 }
3480
3481 /* On the other hand, maybe we have a character class */
3482
3483 s++;
3484 if (*s == ']' || *s == '^')
3485 return FALSE;
3486 else {
ffb4593c 3487 /* this is terrifying, and it works */
79072805
LW
3488 int weight = 2; /* let's weigh the evidence */
3489 char seen[256];
f27ffc4a 3490 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3491 const char * const send = strchr(s,']');
3280af22 3492 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3493
3494 if (!send) /* has to be an expression */
3495 return TRUE;
3496
3497 Zero(seen,256,char);
3498 if (*s == '$')
3499 weight -= 3;
3500 else if (isDIGIT(*s)) {
3501 if (s[1] != ']') {
3502 if (isDIGIT(s[1]) && s[2] == ']')
3503 weight -= 10;
3504 }
3505 else
3506 weight -= 100;
3507 }
3508 for (; s < send; s++) {
3509 last_un_char = un_char;
3510 un_char = (unsigned char)*s;
3511 switch (*s) {
3512 case '@':
3513 case '&':
3514 case '$':
3515 weight -= seen[un_char] * 10;
7e2040f0 3516 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3517 int len;
8903cb82 3518 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3519 len = (int)strlen(tmpbuf);
3520 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3521 weight -= 100;
3522 else
3523 weight -= 10;
3524 }
3525 else if (*s == '$' && s[1] &&
93a17b20
LW
3526 strchr("[#!%*<>()-=",s[1])) {
3527 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3528 weight -= 10;
3529 else
3530 weight -= 1;
3531 }
3532 break;
3533 case '\\':
3534 un_char = 254;
3535 if (s[1]) {
93a17b20 3536 if (strchr("wds]",s[1]))
79072805 3537 weight += 100;
10edeb5d 3538 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3539 weight += 1;
93a17b20 3540 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3541 weight += 40;
3542 else if (isDIGIT(s[1])) {
3543 weight += 40;
3544 while (s[1] && isDIGIT(s[1]))
3545 s++;
3546 }
3547 }
3548 else
3549 weight += 100;
3550 break;
3551 case '-':
3552 if (s[1] == '\\')
3553 weight += 50;
93a17b20 3554 if (strchr("aA01! ",last_un_char))
79072805 3555 weight += 30;
93a17b20 3556 if (strchr("zZ79~",s[1]))
79072805 3557 weight += 30;
f27ffc4a
GS
3558 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3559 weight -= 5; /* cope with negative subscript */
79072805
LW
3560 break;
3561 default:
3792a11b
NC
3562 if (!isALNUM(last_un_char)
3563 && !(last_un_char == '$' || last_un_char == '@'
3564 || last_un_char == '&')
3565 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3566 char *d = tmpbuf;
3567 while (isALPHA(*s))
3568 *d++ = *s++;
3569 *d = '\0';
5458a98a 3570 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3571 weight -= 150;
3572 }
3573 if (un_char == last_un_char + 1)
3574 weight += 5;
3575 weight -= seen[un_char];
3576 break;
3577 }
3578 seen[un_char]++;
3579 }
3580 if (weight >= 0) /* probably a character class */
3581 return FALSE;
3582 }
3583
3584 return TRUE;
3585}
ffed7fef 3586
ffb4593c
NT
3587/*
3588 * S_intuit_method
3589 *
3590 * Does all the checking to disambiguate
3591 * foo bar
3592 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3593 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3594 *
3595 * First argument is the stuff after the first token, e.g. "bar".
3596 *
3597 * Not a method if bar is a filehandle.
3598 * Not a method if foo is a subroutine prototyped to take a filehandle.
3599 * Not a method if it's really "Foo $bar"
3600 * Method if it's "foo $bar"
3601 * Not a method if it's really "print foo $bar"
3602 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3603 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3604 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3605 * =>
3606 */
3607
76e3520e 3608STATIC int
62d55b22 3609S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3610{
97aff369 3611 dVAR;
a0d0e21e 3612 char *s = start + (*start == '$');
3280af22 3613 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3614 STRLEN len;
3615 GV* indirgv;
5db06880
NC
3616#ifdef PERL_MAD
3617 int soff;
3618#endif
a0d0e21e 3619
7918f24d
NC
3620 PERL_ARGS_ASSERT_INTUIT_METHOD;
3621
a0d0e21e 3622 if (gv) {
62d55b22 3623 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3624 return 0;
62d55b22
NC
3625 if (cv) {
3626 if (SvPOK(cv)) {
3627 const char *proto = SvPVX_const(cv);
3628 if (proto) {
3629 if (*proto == ';')
3630 proto++;
3631 if (*proto == '*')
3632 return 0;
3633 }
b6c543e3
IZ
3634 }
3635 } else
c35e046a 3636 gv = NULL;
a0d0e21e 3637 }
8903cb82 3638 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3639 /* start is the beginning of the possible filehandle/object,
3640 * and s is the end of it
3641 * tmpbuf is a copy of it
3642 */
3643
a0d0e21e 3644 if (*start == '$') {
3ef1310e
RGS
3645 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3646 isUPPER(*PL_tokenbuf))
a0d0e21e 3647 return 0;
5db06880
NC
3648#ifdef PERL_MAD
3649 len = start - SvPVX(PL_linestr);
3650#endif
29595ff2 3651 s = PEEKSPACE(s);
f0092767 3652#ifdef PERL_MAD
5db06880
NC
3653 start = SvPVX(PL_linestr) + len;
3654#endif
3280af22
NIS
3655 PL_bufptr = start;
3656 PL_expect = XREF;
a0d0e21e
LW
3657 return *s == '(' ? FUNCMETH : METHOD;
3658 }
5458a98a 3659 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3660 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3661 len -= 2;
3662 tmpbuf[len] = '\0';
5db06880
NC
3663#ifdef PERL_MAD
3664 soff = s - SvPVX(PL_linestr);
3665#endif
c3e0f903
GS
3666 goto bare_package;
3667 }
90e5519e 3668 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3669 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3670 return 0;
3671 /* filehandle or package name makes it a method */
da51bb9b 3672 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3673#ifdef PERL_MAD
3674 soff = s - SvPVX(PL_linestr);
3675#endif
29595ff2 3676 s = PEEKSPACE(s);
3280af22 3677 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3678 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3679 bare_package:
cd81e915 3680 start_force(PL_curforce);
9ded7720 3681 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3682 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3683 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3684 if (PL_madskills)
3685 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3686 PL_expect = XTERM;
a0d0e21e 3687 force_next(WORD);
3280af22 3688 PL_bufptr = s;
5db06880
NC
3689#ifdef PERL_MAD
3690 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3691#endif
a0d0e21e
LW
3692 return *s == '(' ? FUNCMETH : METHOD;
3693 }
3694 }
3695 return 0;
3696}
3697
16d20bd9 3698/* Encoded script support. filter_add() effectively inserts a
4e553d73 3699 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3700 * Note that the filter function only applies to the current source file
3701 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3702 *
3703 * The datasv parameter (which may be NULL) can be used to pass
3704 * private data to this instance of the filter. The filter function
3705 * can recover the SV using the FILTER_DATA macro and use it to
3706 * store private buffers and state information.
3707 *
3708 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3709 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3710 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3711 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3712 * private use must be set using malloc'd pointers.
3713 */
16d20bd9
AD
3714
3715SV *
864dbfa3 3716Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3717{
97aff369 3718 dVAR;
f4c556ac 3719 if (!funcp)
a0714e2c 3720 return NULL;
f4c556ac 3721
5486870f
DM
3722 if (!PL_parser)
3723 return NULL;
3724
3280af22
NIS
3725 if (!PL_rsfp_filters)
3726 PL_rsfp_filters = newAV();
16d20bd9 3727 if (!datasv)
561b68a9 3728 datasv = newSV(0);
862a34c6 3729 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3730 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3731 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3732 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3733 FPTR2DPTR(void *, IoANY(datasv)),
3734 SvPV_nolen(datasv)));
3280af22
NIS
3735 av_unshift(PL_rsfp_filters, 1);
3736 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3737 return(datasv);
3738}
4e553d73 3739
16d20bd9
AD
3740
3741/* Delete most recently added instance of this filter function. */
a0d0e21e 3742void
864dbfa3 3743Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3744{
97aff369 3745 dVAR;
e0c19803 3746 SV *datasv;
24801a4b 3747
7918f24d
NC
3748 PERL_ARGS_ASSERT_FILTER_DEL;
3749
33073adb 3750#ifdef DEBUGGING
55662e27
JH
3751 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3752 FPTR2DPTR(void*, funcp)));
33073adb 3753#endif
5486870f 3754 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3755 return;
3756 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3757 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3758 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
e0c19803 3759 IoFLAGS(datasv) &= ~IOf_FAKE_DIRP;
4755096e 3760 IoANY(datasv) = (void *)NULL;
3280af22 3761 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3762
16d20bd9
AD
3763 return;
3764 }
3765 /* we need to search for the correct entry and clear it */
cea2e8a9 3766 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3767}
3768
3769
1de9afcd
RGS
3770/* Invoke the idxth filter function for the current rsfp. */
3771/* maxlen 0 = read one text line */
16d20bd9 3772I32
864dbfa3 3773Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3774{
97aff369 3775 dVAR;
16d20bd9
AD
3776 filter_t funcp;
3777 SV *datasv = NULL;
f482118e
NC
3778 /* This API is bad. It should have been using unsigned int for maxlen.
3779 Not sure if we want to change the API, but if not we should sanity
3780 check the value here. */
39cd7a59
NC
3781 const unsigned int correct_length
3782 = maxlen < 0 ?
3783#ifdef PERL_MICRO
3784 0x7FFFFFFF
3785#else
3786 INT_MAX
3787#endif
3788 : maxlen;
e50aee73 3789
7918f24d
NC
3790 PERL_ARGS_ASSERT_FILTER_READ;
3791
5486870f 3792 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3793 return -1;
1de9afcd 3794 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3795 /* Provide a default input filter to make life easy. */
3796 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3797 DEBUG_P(PerlIO_printf(Perl_debug_log,
3798 "filter_read %d: from rsfp\n", idx));
f482118e 3799 if (correct_length) {
16d20bd9
AD
3800 /* Want a block */
3801 int len ;
f54cb97a 3802 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3803
3804 /* ensure buf_sv is large enough */
881d8f0a 3805 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3806 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3807 correct_length)) <= 0) {
3280af22 3808 if (PerlIO_error(PL_rsfp))
37120919
AD
3809 return -1; /* error */
3810 else
3811 return 0 ; /* end of file */
3812 }
16d20bd9 3813 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3814 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3815 } else {
3816 /* Want a line */
3280af22
NIS
3817 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3818 if (PerlIO_error(PL_rsfp))
37120919
AD
3819 return -1; /* error */
3820 else
3821 return 0 ; /* end of file */
3822 }
16d20bd9
AD
3823 }
3824 return SvCUR(buf_sv);
3825 }
3826 /* Skip this filter slot if filter has been deleted */
1de9afcd 3827 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3828 DEBUG_P(PerlIO_printf(Perl_debug_log,
3829 "filter_read %d: skipped (filter deleted)\n",
3830 idx));
f482118e 3831 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3832 }
3833 /* Get function pointer hidden within datasv */
8141890a 3834 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3835 DEBUG_P(PerlIO_printf(Perl_debug_log,
3836 "filter_read %d: via function %p (%s)\n",
ca0270c4 3837 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3838 /* Call function. The function is expected to */
3839 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3840 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3841 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3842}
3843
76e3520e 3844STATIC char *
5cc814fd 3845S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3846{
97aff369 3847 dVAR;
7918f24d
NC
3848
3849 PERL_ARGS_ASSERT_FILTER_GETS;
3850
c39cd008 3851#ifdef PERL_CR_FILTER
3280af22 3852 if (!PL_rsfp_filters) {
c39cd008 3853 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3854 }
3855#endif
3280af22 3856 if (PL_rsfp_filters) {
55497cff 3857 if (!append)
3858 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3859 if (FILTER_READ(0, sv, 0) > 0)
3860 return ( SvPVX(sv) ) ;
3861 else
bd61b366 3862 return NULL ;
16d20bd9 3863 }
9d116dd7 3864 else
5cc814fd 3865 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3866}
3867
01ec43d0 3868STATIC HV *
9bde8eb0 3869S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3870{
97aff369 3871 dVAR;
def3634b
GS
3872 GV *gv;
3873
7918f24d
NC
3874 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3875
01ec43d0 3876 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3877 return PL_curstash;
3878
3879 if (len > 2 &&
3880 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3881 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3882 {
3883 return GvHV(gv); /* Foo:: */
def3634b
GS
3884 }
3885
3886 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3887 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3888 if (gv && GvCV(gv)) {
3889 SV * const sv = cv_const_sv(GvCV(gv));
3890 if (sv)
9bde8eb0 3891 pkgname = SvPV_const(sv, len);
def3634b
GS
3892 }
3893
9bde8eb0 3894 return gv_stashpvn(pkgname, len, 0);
def3634b 3895}
a0d0e21e 3896
e3f73d4e
RGS
3897/*
3898 * S_readpipe_override
3899 * Check whether readpipe() is overriden, and generates the appropriate
3900 * optree, provided sublex_start() is called afterwards.
3901 */
3902STATIC void
1d51329b 3903S_readpipe_override(pTHX)
e3f73d4e
RGS
3904{
3905 GV **gvp;
3906 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3907 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3908 if ((gv_readpipe
3909 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3910 ||
3911 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3912 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3913 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3914 {
3915 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3916 append_elem(OP_LIST,
3917 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3918 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3919 }
e3f73d4e
RGS
3920}
3921
5db06880
NC
3922#ifdef PERL_MAD
3923 /*
3924 * Perl_madlex
3925 * The intent of this yylex wrapper is to minimize the changes to the
3926 * tokener when we aren't interested in collecting madprops. It remains
3927 * to be seen how successful this strategy will be...
3928 */
3929
3930int
3931Perl_madlex(pTHX)
3932{
3933 int optype;
3934 char *s = PL_bufptr;
3935
cd81e915
NC
3936 /* make sure PL_thiswhite is initialized */
3937 PL_thiswhite = 0;
3938 PL_thismad = 0;
5db06880 3939
cd81e915 3940 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3941 if (PL_pending_ident)
3942 return S_pending_ident(aTHX);
3943
3944 /* previous token ate up our whitespace? */
cd81e915
NC
3945 if (!PL_lasttoke && PL_nextwhite) {
3946 PL_thiswhite = PL_nextwhite;
3947 PL_nextwhite = 0;
5db06880
NC
3948 }
3949
3950 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3951 PL_realtokenstart = -1;
3952 PL_thistoken = 0;
5db06880
NC
3953 optype = yylex();
3954 s = PL_bufptr;
cd81e915 3955 assert(PL_curforce < 0);
5db06880 3956
cd81e915
NC
3957 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3958 if (!PL_thistoken) {
3959 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3960 PL_thistoken = newSVpvs("");
5db06880 3961 else {
c35e046a 3962 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3963 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3964 }
3965 }
cd81e915
NC
3966 if (PL_thismad) /* install head */
3967 CURMAD('X', PL_thistoken);
5db06880
NC
3968 }
3969
3970 /* last whitespace of a sublex? */
cd81e915
NC
3971 if (optype == ')' && PL_endwhite) {
3972 CURMAD('X', PL_endwhite);
5db06880
NC
3973 }
3974
cd81e915 3975 if (!PL_thismad) {
5db06880
NC
3976
3977 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3978 if (!PL_thiswhite && !PL_endwhite && !optype) {
3979 sv_free(PL_thistoken);
3980 PL_thistoken = 0;
5db06880
NC
3981 return 0;
3982 }
3983
3984 /* put off final whitespace till peg */
3985 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
3986 PL_nextwhite = PL_thiswhite;
3987 PL_thiswhite = 0;
5db06880 3988 }
cd81e915
NC
3989 else if (PL_thisopen) {
3990 CURMAD('q', PL_thisopen);
3991 if (PL_thistoken)
3992 sv_free(PL_thistoken);
3993 PL_thistoken = 0;
5db06880
NC
3994 }
3995 else {
3996 /* Store actual token text as madprop X */
cd81e915 3997 CURMAD('X', PL_thistoken);
5db06880
NC
3998 }
3999
cd81e915 4000 if (PL_thiswhite) {
5db06880 4001 /* add preceding whitespace as madprop _ */
cd81e915 4002 CURMAD('_', PL_thiswhite);
5db06880
NC
4003 }
4004
cd81e915 4005 if (PL_thisstuff) {
5db06880 4006 /* add quoted material as madprop = */
cd81e915 4007 CURMAD('=', PL_thisstuff);
5db06880
NC
4008 }
4009
cd81e915 4010 if (PL_thisclose) {
5db06880 4011 /* add terminating quote as madprop Q */
cd81e915 4012 CURMAD('Q', PL_thisclose);
5db06880
NC
4013 }
4014 }
4015
4016 /* special processing based on optype */
4017
4018 switch (optype) {
4019
4020 /* opval doesn't need a TOKEN since it can already store mp */
4021 case WORD:
4022 case METHOD:
4023 case FUNCMETH:
4024 case THING:
4025 case PMFUNC:
4026 case PRIVATEREF:
4027 case FUNC0SUB:
4028 case UNIOPSUB:
4029 case LSTOPSUB:
6154021b
RGS
4030 if (pl_yylval.opval)
4031 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4032 PL_thismad = 0;
5db06880
NC
4033 return optype;
4034
4035 /* fake EOF */
4036 case 0:
4037 optype = PEG;
cd81e915
NC
4038 if (PL_endwhite) {
4039 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4040 PL_endwhite = 0;
5db06880
NC
4041 }
4042 break;
4043
4044 case ']':
4045 case '}':
cd81e915 4046 if (PL_faketokens)
5db06880
NC
4047 break;
4048 /* remember any fake bracket that lexer is about to discard */
4049 if (PL_lex_brackets == 1 &&
4050 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4051 {
4052 s = PL_bufptr;
4053 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4054 s++;
4055 if (*s == '}') {
cd81e915
NC
4056 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4057 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4058 PL_thiswhite = 0;
5db06880
NC
4059 PL_bufptr = s - 1;
4060 break; /* don't bother looking for trailing comment */
4061 }
4062 else
4063 s = PL_bufptr;
4064 }
4065 if (optype == ']')
4066 break;
4067 /* FALLTHROUGH */
4068
4069 /* attach a trailing comment to its statement instead of next token */
4070 case ';':
cd81e915 4071 if (PL_faketokens)
5db06880
NC
4072 break;
4073 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4074 s = PL_bufptr;
4075 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4076 s++;
4077 if (*s == '\n' || *s == '#') {
4078 while (s < PL_bufend && *s != '\n')
4079 s++;
4080 if (s < PL_bufend)
4081 s++;
cd81e915
NC
4082 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4083 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4084 PL_thiswhite = 0;
5db06880
NC
4085 PL_bufptr = s;
4086 }
4087 }
4088 break;
4089
4090 /* pval */
4091 case LABEL:
4092 break;
4093
4094 /* ival */
4095 default:
4096 break;
4097
4098 }
4099
4100 /* Create new token struct. Note: opvals return early above. */
6154021b 4101 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4102 PL_thismad = 0;
5db06880
NC
4103 return optype;
4104}
4105#endif
4106
468aa647 4107STATIC char *
cc6ed77d 4108S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4109 dVAR;
7918f24d
NC
4110
4111 PERL_ARGS_ASSERT_TOKENIZE_USE;
4112
468aa647
RGS
4113 if (PL_expect != XSTATE)
4114 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4115 is_use ? "use" : "no"));
29595ff2 4116 s = SKIPSPACE1(s);
468aa647
RGS
4117 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4118 s = force_version(s, TRUE);
17c59fdf
VP
4119 if (*s == ';' || *s == '}'
4120 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4121 start_force(PL_curforce);
9ded7720 4122 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4123 force_next(WORD);
4124 }
4125 else if (*s == 'v') {
4126 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4127 s = force_version(s, FALSE);
4128 }
4129 }
4130 else {
4131 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4132 s = force_version(s, FALSE);
4133 }
6154021b 4134 pl_yylval.ival = is_use;
468aa647
RGS
4135 return s;
4136}
748a9306 4137#ifdef DEBUGGING
27da23d5 4138 static const char* const exp_name[] =
09bef843 4139 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4140 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4141 };
748a9306 4142#endif
463ee0b2 4143
02aa26ce
NT
4144/*
4145 yylex
4146
4147 Works out what to call the token just pulled out of the input
4148 stream. The yacc parser takes care of taking the ops we return and
4149 stitching them into a tree.
4150
4151 Returns:
4152 PRIVATEREF
4153
4154 Structure:
4155 if read an identifier
4156 if we're in a my declaration
4157 croak if they tried to say my($foo::bar)
4158 build the ops for a my() declaration
4159 if it's an access to a my() variable
4160 are we in a sort block?
4161 croak if my($a); $a <=> $b
4162 build ops for access to a my() variable
4163 if in a dq string, and they've said @foo and we can't find @foo
4164 croak
4165 build ops for a bareword
4166 if we already built the token before, use it.
4167*/
4168
20141f0e 4169
dba4d153
JH
4170#ifdef __SC__
4171#pragma segment Perl_yylex
4172#endif
dba4d153 4173int
dba4d153 4174Perl_yylex(pTHX)
20141f0e 4175{
97aff369 4176 dVAR;
3afc138a 4177 register char *s = PL_bufptr;
378cc40b 4178 register char *d;
463ee0b2 4179 STRLEN len;
aa7440fb 4180 bool bof = FALSE;
580561a3 4181 U32 fake_eof = 0;
a687059c 4182
10edeb5d
JH
4183 /* orig_keyword, gvp, and gv are initialized here because
4184 * jump to the label just_a_word_zero can bypass their
4185 * initialization later. */
4186 I32 orig_keyword = 0;
4187 GV *gv = NULL;
4188 GV **gvp = NULL;
4189
bbf60fe6 4190 DEBUG_T( {
396482e1 4191 SV* tmp = newSVpvs("");
b6007c36
DM
4192 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4193 (IV)CopLINE(PL_curcop),
4194 lex_state_names[PL_lex_state],
4195 exp_name[PL_expect],
4196 pv_display(tmp, s, strlen(s), 0, 60));
4197 SvREFCNT_dec(tmp);
bbf60fe6 4198 } );
02aa26ce 4199 /* check if there's an identifier for us to look at */
ba979b31 4200 if (PL_pending_ident)
bbf60fe6 4201 return REPORT(S_pending_ident(aTHX));
bbce6d69 4202
02aa26ce
NT
4203 /* no identifier pending identification */
4204
3280af22 4205 switch (PL_lex_state) {
79072805
LW
4206#ifdef COMMENTARY
4207 case LEX_NORMAL: /* Some compilers will produce faster */
4208 case LEX_INTERPNORMAL: /* code if we comment these out. */
4209 break;
4210#endif
4211
09bef843 4212 /* when we've already built the next token, just pull it out of the queue */
79072805 4213 case LEX_KNOWNEXT:
5db06880
NC
4214#ifdef PERL_MAD
4215 PL_lasttoke--;
6154021b 4216 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4217 if (PL_madskills) {
cd81e915 4218 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4219 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4220 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4221 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4222 PL_thismad->mad_val = 0;
4223 mad_free(PL_thismad);
4224 PL_thismad = 0;
5db06880
NC
4225 }
4226 }
4227 if (!PL_lasttoke) {
4228 PL_lex_state = PL_lex_defer;
4229 PL_expect = PL_lex_expect;
4230 PL_lex_defer = LEX_NORMAL;
4231 if (!PL_nexttoke[PL_lasttoke].next_type)
4232 return yylex();
4233 }
4234#else
3280af22 4235 PL_nexttoke--;
6154021b 4236 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4237 if (!PL_nexttoke) {
4238 PL_lex_state = PL_lex_defer;
4239 PL_expect = PL_lex_expect;
4240 PL_lex_defer = LEX_NORMAL;
463ee0b2 4241 }
5db06880
NC
4242#endif
4243#ifdef PERL_MAD
4244 /* FIXME - can these be merged? */
4245 return(PL_nexttoke[PL_lasttoke].next_type);
4246#else
bbf60fe6 4247 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4248#endif
79072805 4249
02aa26ce 4250 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4251 when we get here, PL_bufptr is at the \
02aa26ce 4252 */
79072805
LW
4253 case LEX_INTERPCASEMOD:
4254#ifdef DEBUGGING
3280af22 4255 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4256 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4257#endif
02aa26ce 4258 /* handle \E or end of string */
3280af22 4259 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4260 /* if at a \E */
3280af22 4261 if (PL_lex_casemods) {
f54cb97a 4262 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4263 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4264
3792a11b
NC
4265 if (PL_bufptr != PL_bufend
4266 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4267 PL_bufptr += 2;
4268 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4269#ifdef PERL_MAD
4270 if (PL_madskills)
6b29d1f5 4271 PL_thistoken = newSVpvs("\\E");
5db06880 4272#endif
a0d0e21e 4273 }
bbf60fe6 4274 return REPORT(')');
79072805 4275 }
5db06880
NC
4276#ifdef PERL_MAD
4277 while (PL_bufptr != PL_bufend &&
4278 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4279 if (!PL_thiswhite)
6b29d1f5 4280 PL_thiswhite = newSVpvs("");
cd81e915 4281 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4282 PL_bufptr += 2;
4283 }
4284#else
3280af22
NIS
4285 if (PL_bufptr != PL_bufend)
4286 PL_bufptr += 2;
5db06880 4287#endif
3280af22 4288 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4289 return yylex();
79072805
LW
4290 }
4291 else {
607df283 4292 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4293 "### Saw case modifier\n"); });
3280af22 4294 s = PL_bufptr + 1;
6e909404 4295 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4296#ifdef PERL_MAD
cd81e915 4297 if (!PL_thiswhite)
6b29d1f5 4298 PL_thiswhite = newSVpvs("");
cd81e915 4299 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4300#endif
89122651 4301 PL_bufptr = s + 3;
6e909404
JH
4302 PL_lex_state = LEX_INTERPCONCAT;
4303 return yylex();
a0d0e21e 4304 }
6e909404 4305 else {
90771dc0 4306 I32 tmp;
5db06880
NC
4307 if (!PL_madskills) /* when just compiling don't need correct */
4308 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4309 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4310 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4311 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4312 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4313 return REPORT(')');
6e909404
JH
4314 }
4315 if (PL_lex_casemods > 10)
4316 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4317 PL_lex_casestack[PL_lex_casemods++] = *s;
4318 PL_lex_casestack[PL_lex_casemods] = '\0';
4319 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4320 start_force(PL_curforce);
9ded7720 4321 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4322 force_next('(');
cd81e915 4323 start_force(PL_curforce);
6e909404 4324 if (*s == 'l')
9ded7720 4325 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4326 else if (*s == 'u')
9ded7720 4327 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4328 else if (*s == 'L')
9ded7720 4329 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4330 else if (*s == 'U')
9ded7720 4331 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4332 else if (*s == 'Q')
9ded7720 4333 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4334 else
4335 Perl_croak(aTHX_ "panic: yylex");
5db06880 4336 if (PL_madskills) {
a5849ce5
NC
4337 SV* const tmpsv = newSVpvs("\\ ");
4338 /* replace the space with the character we want to escape
4339 */
4340 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4341 curmad('_', tmpsv);
4342 }
6e909404 4343 PL_bufptr = s + 1;
a0d0e21e 4344 }
79072805 4345 force_next(FUNC);
3280af22
NIS
4346 if (PL_lex_starts) {
4347 s = PL_bufptr;
4348 PL_lex_starts = 0;
5db06880
NC
4349#ifdef PERL_MAD
4350 if (PL_madskills) {
cd81e915
NC
4351 if (PL_thistoken)
4352 sv_free(PL_thistoken);
6b29d1f5 4353 PL_thistoken = newSVpvs("");
5db06880
NC
4354 }
4355#endif
131b3ad0
DM
4356 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4357 if (PL_lex_casemods == 1 && PL_lex_inpat)
4358 OPERATOR(',');
4359 else
4360 Aop(OP_CONCAT);
79072805
LW
4361 }
4362 else
cea2e8a9 4363 return yylex();
79072805
LW
4364 }
4365
55497cff 4366 case LEX_INTERPPUSH:
bbf60fe6 4367 return REPORT(sublex_push());
55497cff 4368
79072805 4369 case LEX_INTERPSTART:
3280af22 4370 if (PL_bufptr == PL_bufend)
bbf60fe6 4371 return REPORT(sublex_done());
607df283 4372 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4373 "### Interpolated variable\n"); });
3280af22
NIS
4374 PL_expect = XTERM;
4375 PL_lex_dojoin = (*PL_bufptr == '@');
4376 PL_lex_state = LEX_INTERPNORMAL;
4377 if (PL_lex_dojoin) {
cd81e915 4378 start_force(PL_curforce);
9ded7720 4379 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4380 force_next(',');
cd81e915 4381 start_force(PL_curforce);
a0d0e21e 4382 force_ident("\"", '$');
cd81e915 4383 start_force(PL_curforce);
9ded7720 4384 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4385 force_next('$');
cd81e915 4386 start_force(PL_curforce);
9ded7720 4387 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4388 force_next('(');
cd81e915 4389 start_force(PL_curforce);
9ded7720 4390 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4391 force_next(FUNC);
4392 }
3280af22
NIS
4393 if (PL_lex_starts++) {
4394 s = PL_bufptr;
5db06880
NC
4395#ifdef PERL_MAD
4396 if (PL_madskills) {
cd81e915
NC
4397 if (PL_thistoken)
4398 sv_free(PL_thistoken);
6b29d1f5 4399 PL_thistoken = newSVpvs("");
5db06880
NC
4400 }
4401#endif
131b3ad0
DM
4402 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4403 if (!PL_lex_casemods && PL_lex_inpat)
4404 OPERATOR(',');
4405 else
4406 Aop(OP_CONCAT);
79072805 4407 }
cea2e8a9 4408 return yylex();
79072805
LW
4409
4410 case LEX_INTERPENDMAYBE:
3280af22
NIS
4411 if (intuit_more(PL_bufptr)) {
4412 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4413 break;
4414 }
4415 /* FALL THROUGH */
4416
4417 case LEX_INTERPEND:
3280af22
NIS
4418 if (PL_lex_dojoin) {
4419 PL_lex_dojoin = FALSE;
4420 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4421#ifdef PERL_MAD
4422 if (PL_madskills) {
cd81e915
NC
4423 if (PL_thistoken)
4424 sv_free(PL_thistoken);
6b29d1f5 4425 PL_thistoken = newSVpvs("");
5db06880
NC
4426 }
4427#endif
bbf60fe6 4428 return REPORT(')');
79072805 4429 }
43a16006 4430 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4431 && SvEVALED(PL_lex_repl))
43a16006 4432 {
e9fa98b2 4433 if (PL_bufptr != PL_bufend)
cea2e8a9 4434 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4435 PL_lex_repl = NULL;
e9fa98b2 4436 }
79072805
LW
4437 /* FALLTHROUGH */
4438 case LEX_INTERPCONCAT:
4439#ifdef DEBUGGING
3280af22 4440 if (PL_lex_brackets)
cea2e8a9 4441 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4442#endif
3280af22 4443 if (PL_bufptr == PL_bufend)
bbf60fe6 4444 return REPORT(sublex_done());
79072805 4445
3280af22
NIS
4446 if (SvIVX(PL_linestr) == '\'') {
4447 SV *sv = newSVsv(PL_linestr);
4448 if (!PL_lex_inpat)
76e3520e 4449 sv = tokeq(sv);
3280af22 4450 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4451 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4452 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4453 s = PL_bufend;
79072805
LW
4454 }
4455 else {
3280af22 4456 s = scan_const(PL_bufptr);
79072805 4457 if (*s == '\\')
3280af22 4458 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4459 else
3280af22 4460 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4461 }
4462
3280af22 4463 if (s != PL_bufptr) {
cd81e915 4464 start_force(PL_curforce);
5db06880
NC
4465 if (PL_madskills) {
4466 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4467 }
6154021b 4468 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4469 PL_expect = XTERM;
79072805 4470 force_next(THING);
131b3ad0 4471 if (PL_lex_starts++) {
5db06880
NC
4472#ifdef PERL_MAD
4473 if (PL_madskills) {
cd81e915
NC
4474 if (PL_thistoken)
4475 sv_free(PL_thistoken);
6b29d1f5 4476 PL_thistoken = newSVpvs("");
5db06880
NC
4477 }
4478#endif
131b3ad0
DM
4479 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4480 if (!PL_lex_casemods && PL_lex_inpat)
4481 OPERATOR(',');
4482 else
4483 Aop(OP_CONCAT);
4484 }
79072805 4485 else {
3280af22 4486 PL_bufptr = s;
cea2e8a9 4487 return yylex();
79072805
LW
4488 }
4489 }
4490
cea2e8a9 4491 return yylex();
a0d0e21e 4492 case LEX_FORMLINE:
3280af22
NIS
4493 PL_lex_state = LEX_NORMAL;
4494 s = scan_formline(PL_bufptr);
4495 if (!PL_lex_formbrack)
a0d0e21e
LW
4496 goto rightbracket;
4497 OPERATOR(';');
79072805
LW
4498 }
4499
3280af22
NIS
4500 s = PL_bufptr;
4501 PL_oldoldbufptr = PL_oldbufptr;
4502 PL_oldbufptr = s;
463ee0b2
LW
4503
4504 retry:
5db06880 4505#ifdef PERL_MAD
cd81e915
NC
4506 if (PL_thistoken) {
4507 sv_free(PL_thistoken);
4508 PL_thistoken = 0;
5db06880 4509 }
cd81e915 4510 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4511#endif
378cc40b
LW
4512 switch (*s) {
4513 default:
7e2040f0 4514 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4515 goto keylookup;
b1fc3636
CJ
4516 {
4517 unsigned char c = *s;
4518 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4519 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4520 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4521 } else {
4522 d = PL_linestart;
4523 }
4524 *s = '\0';
4525 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4526 }
e929a76b
LW
4527 case 4:
4528 case 26:
4529 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4530 case 0:
5db06880
NC
4531#ifdef PERL_MAD
4532 if (PL_madskills)
cd81e915 4533 PL_faketokens = 0;
5db06880 4534#endif
3280af22
NIS
4535 if (!PL_rsfp) {
4536 PL_last_uni = 0;
4537 PL_last_lop = 0;
c5ee2135 4538 if (PL_lex_brackets) {
10edeb5d
JH
4539 yyerror((const char *)
4540 (PL_lex_formbrack
4541 ? "Format not terminated"
4542 : "Missing right curly or square bracket"));
c5ee2135 4543 }
4e553d73 4544 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4545 "### Tokener got EOF\n");
5f80b19c 4546 } );
79072805 4547 TOKEN(0);
463ee0b2 4548 }
3280af22 4549 if (s++ < PL_bufend)
a687059c 4550 goto retry; /* ignore stray nulls */
3280af22
NIS
4551 PL_last_uni = 0;
4552 PL_last_lop = 0;
4553 if (!PL_in_eval && !PL_preambled) {
4554 PL_preambled = TRUE;
5db06880
NC
4555#ifdef PERL_MAD
4556 if (PL_madskills)
cd81e915 4557 PL_faketokens = 1;
5db06880 4558#endif
5ab7ff98
NC
4559 if (PL_perldb) {
4560 /* Generate a string of Perl code to load the debugger.
4561 * If PERL5DB is set, it will return the contents of that,
4562 * otherwise a compile-time require of perl5db.pl. */
4563
4564 const char * const pdb = PerlEnv_getenv("PERL5DB");
4565
4566 if (pdb) {
4567 sv_setpv(PL_linestr, pdb);
4568 sv_catpvs(PL_linestr,";");
4569 } else {
4570 SETERRNO(0,SS_NORMAL);
4571 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4572 }
4573 } else
4574 sv_setpvs(PL_linestr,"");
c62eb204
NC
4575 if (PL_preambleav) {
4576 SV **svp = AvARRAY(PL_preambleav);
4577 SV **const end = svp + AvFILLp(PL_preambleav);
4578 while(svp <= end) {
4579 sv_catsv(PL_linestr, *svp);
4580 ++svp;
396482e1 4581 sv_catpvs(PL_linestr, ";");
91b7def8 4582 }
daba3364 4583 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4584 PL_preambleav = NULL;
91b7def8 4585 }
9f639728
FR
4586 if (PL_minus_E)
4587 sv_catpvs(PL_linestr,
4588 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4589 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4590 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4591 if (PL_minus_l)
396482e1 4592 sv_catpvs(PL_linestr,"chomp;");
3280af22 4593 if (PL_minus_a) {
3280af22 4594 if (PL_minus_F) {
3792a11b
NC
4595 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4596 || *PL_splitstr == '"')
3280af22 4597 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4598 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4599 else {
c8ef6a4b
NC
4600 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4601 bytes can be used as quoting characters. :-) */
dd374669 4602 const char *splits = PL_splitstr;
91d456ae 4603 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4604 do {
4605 /* Need to \ \s */
dd374669
AL
4606 if (*splits == '\\')
4607 sv_catpvn(PL_linestr, splits, 1);
4608 sv_catpvn(PL_linestr, splits, 1);
4609 } while (*splits++);
48c4c863
NC
4610 /* This loop will embed the trailing NUL of
4611 PL_linestr as the last thing it does before
4612 terminating. */
396482e1 4613 sv_catpvs(PL_linestr, ");");
54310121 4614 }
2304df62
AD
4615 }
4616 else
396482e1 4617 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4618 }
79072805 4619 }
396482e1 4620 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4621 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4622 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4623 PL_last_lop = PL_last_uni = NULL;
65269a95 4624 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4625 update_debugger_info(PL_linestr, NULL, 0);
79072805 4626 goto retry;
a687059c 4627 }
e929a76b 4628 do {
580561a3
Z
4629 fake_eof = 0;
4630 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4631 if (0) {
7e28d3af 4632 fake_eof:
f0e67a1d
Z
4633 fake_eof = LEX_FAKE_EOF;
4634 }
4635 PL_bufptr = PL_bufend;
17cc9359 4636 CopLINE_inc(PL_curcop);
f0e67a1d 4637 if (!lex_next_chunk(fake_eof)) {
17cc9359 4638 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4639 s = PL_bufptr;
4640 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4641 }
17cc9359 4642 CopLINE_dec(PL_curcop);
5db06880 4643#ifdef PERL_MAD
f0e67a1d 4644 if (!PL_rsfp)
cd81e915 4645 PL_realtokenstart = -1;
5db06880 4646#endif
f0e67a1d 4647 s = PL_bufptr;
7aa207d6
JH
4648 /* If it looks like the start of a BOM or raw UTF-16,
4649 * check if it in fact is. */
580561a3 4650 if (bof && PL_rsfp &&
7aa207d6
JH
4651 (*s == 0 ||
4652 *(U8*)s == 0xEF ||
4653 *(U8*)s >= 0xFE ||
4654 s[1] == 0)) {
eb160463 4655 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4656 if (bof) {
3280af22 4657 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4658 s = swallow_bom((U8*)s);
e929a76b 4659 }
378cc40b 4660 }
3280af22 4661 if (PL_doextract) {
a0d0e21e 4662 /* Incest with pod. */
5db06880
NC
4663#ifdef PERL_MAD
4664 if (PL_madskills)
cd81e915 4665 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4666#endif
01a57ef7 4667 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4668 sv_setpvs(PL_linestr, "");
3280af22
NIS
4669 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4670 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4671 PL_last_lop = PL_last_uni = NULL;
3280af22 4672 PL_doextract = FALSE;
a0d0e21e 4673 }
4e553d73 4674 }
85613cab
Z
4675 if (PL_rsfp)
4676 incline(s);
3280af22
NIS
4677 } while (PL_doextract);
4678 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4679 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4680 PL_last_lop = PL_last_uni = NULL;
57843af0 4681 if (CopLINE(PL_curcop) == 1) {
3280af22 4682 while (s < PL_bufend && isSPACE(*s))
79072805 4683 s++;
a0d0e21e 4684 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4685 s++;
5db06880
NC
4686#ifdef PERL_MAD
4687 if (PL_madskills)
cd81e915 4688 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4689#endif
bd61b366 4690 d = NULL;
3280af22 4691 if (!PL_in_eval) {
44a8e56a 4692 if (*s == '#' && *(s+1) == '!')
4693 d = s + 2;
4694#ifdef ALTERNATE_SHEBANG
4695 else {
bfed75c6 4696 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4697 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4698 d = s + (sizeof(as) - 1);
4699 }
4700#endif /* ALTERNATE_SHEBANG */
4701 }
4702 if (d) {
b8378b72 4703 char *ipath;
774d564b 4704 char *ipathend;
b8378b72 4705
774d564b 4706 while (isSPACE(*d))
b8378b72
CS
4707 d++;
4708 ipath = d;
774d564b 4709 while (*d && !isSPACE(*d))
4710 d++;
4711 ipathend = d;
4712
4713#ifdef ARG_ZERO_IS_SCRIPT
4714 if (ipathend > ipath) {
4715 /*
4716 * HP-UX (at least) sets argv[0] to the script name,
4717 * which makes $^X incorrect. And Digital UNIX and Linux,
4718 * at least, set argv[0] to the basename of the Perl
4719 * interpreter. So, having found "#!", we'll set it right.
4720 */
fafc274c
NC
4721 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4722 SVt_PV)); /* $^X */
774d564b 4723 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4724 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4725 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4726 SvSETMAGIC(x);
4727 }
556c1dec
JH
4728 else {
4729 STRLEN blen;
4730 STRLEN llen;
cfd0369c 4731 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4732 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4733 if (llen < blen) {
4734 bstart += blen - llen;
4735 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4736 sv_setpvn(x, ipath, ipathend - ipath);
4737 SvSETMAGIC(x);
4738 }
4739 }
4740 }
774d564b 4741 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4742 }
774d564b 4743#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4744
4745 /*
4746 * Look for options.
4747 */
748a9306 4748 d = instr(s,"perl -");
84e30d1a 4749 if (!d) {
748a9306 4750 d = instr(s,"perl");
84e30d1a
GS
4751#if defined(DOSISH)
4752 /* avoid getting into infinite loops when shebang
4753 * line contains "Perl" rather than "perl" */
4754 if (!d) {
4755 for (d = ipathend-4; d >= ipath; --d) {
4756 if ((*d == 'p' || *d == 'P')
4757 && !ibcmp(d, "perl", 4))
4758 {
4759 break;
4760 }
4761 }
4762 if (d < ipath)
bd61b366 4763 d = NULL;
84e30d1a
GS
4764 }
4765#endif
4766 }
44a8e56a 4767#ifdef ALTERNATE_SHEBANG
4768 /*
4769 * If the ALTERNATE_SHEBANG on this system starts with a
4770 * character that can be part of a Perl expression, then if
4771 * we see it but not "perl", we're probably looking at the
4772 * start of Perl code, not a request to hand off to some
4773 * other interpreter. Similarly, if "perl" is there, but
4774 * not in the first 'word' of the line, we assume the line
4775 * contains the start of the Perl program.
44a8e56a 4776 */
4777 if (d && *s != '#') {
f54cb97a 4778 const char *c = ipath;
44a8e56a 4779 while (*c && !strchr("; \t\r\n\f\v#", *c))
4780 c++;
4781 if (c < d)
bd61b366 4782 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4783 else
4784 *s = '#'; /* Don't try to parse shebang line */
4785 }
774d564b 4786#endif /* ALTERNATE_SHEBANG */
748a9306 4787 if (!d &&
44a8e56a 4788 *s == '#' &&
774d564b 4789 ipathend > ipath &&
3280af22 4790 !PL_minus_c &&
748a9306 4791 !instr(s,"indir") &&
3280af22 4792 instr(PL_origargv[0],"perl"))
748a9306 4793 {
27da23d5 4794 dVAR;
9f68db38 4795 char **newargv;
9f68db38 4796
774d564b 4797 *ipathend = '\0';
4798 s = ipathend + 1;
3280af22 4799 while (s < PL_bufend && isSPACE(*s))
9f68db38 4800 s++;
3280af22 4801 if (s < PL_bufend) {
d85f917e 4802 Newx(newargv,PL_origargc+3,char*);
9f68db38 4803 newargv[1] = s;
3280af22 4804 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4805 s++;
4806 *s = '\0';
3280af22 4807 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4808 }
4809 else
3280af22 4810 newargv = PL_origargv;
774d564b 4811 newargv[0] = ipath;
b35112e7 4812 PERL_FPU_PRE_EXEC
b4748376 4813 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4814 PERL_FPU_POST_EXEC
cea2e8a9 4815 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4816 }
748a9306 4817 if (d) {
c35e046a
AL
4818 while (*d && !isSPACE(*d))
4819 d++;
4820 while (SPACE_OR_TAB(*d))
4821 d++;
748a9306
LW
4822
4823 if (*d++ == '-') {
f54cb97a 4824 const bool switches_done = PL_doswitches;
fb993905
GA
4825 const U32 oldpdb = PL_perldb;
4826 const bool oldn = PL_minus_n;
4827 const bool oldp = PL_minus_p;
c7030b81 4828 const char *d1 = d;
fb993905 4829
8cc95fdb 4830 do {
4ba71d51
FC
4831 bool baduni = FALSE;
4832 if (*d1 == 'C') {
bd0ab00d
NC
4833 const char *d2 = d1 + 1;
4834 if (parse_unicode_opts((const char **)&d2)
4835 != PL_unicode)
4836 baduni = TRUE;
4ba71d51
FC
4837 }
4838 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4839 const char * const m = d1;
4840 while (*d1 && !isSPACE(*d1))
4841 d1++;
cea2e8a9 4842 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4843 (int)(d1 - m), m);
8cc95fdb 4844 }
c7030b81
NC
4845 d1 = moreswitches(d1);
4846 } while (d1);
f0b2cf55
YST
4847 if (PL_doswitches && !switches_done) {
4848 int argc = PL_origargc;
4849 char **argv = PL_origargv;
4850 do {
4851 argc--,argv++;
4852 } while (argc && argv[0][0] == '-' && argv[0][1]);
4853 init_argv_symbols(argc,argv);
4854 }
65269a95 4855 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4856 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4857 /* if we have already added "LINE: while (<>) {",
4858 we must not do it again */
748a9306 4859 {
76f68e9b 4860 sv_setpvs(PL_linestr, "");
3280af22
NIS
4861 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4862 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4863 PL_last_lop = PL_last_uni = NULL;
3280af22 4864 PL_preambled = FALSE;
65269a95 4865 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4866 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4867 goto retry;
4868 }
a0d0e21e 4869 }
79072805 4870 }
9f68db38 4871 }
79072805 4872 }
3280af22
NIS
4873 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4874 PL_bufptr = s;
4875 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4876 return yylex();
ae986130 4877 }
378cc40b 4878 goto retry;
4fdae800 4879 case '\r':
6a27c188 4880#ifdef PERL_STRICT_CR
cea2e8a9 4881 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4882 Perl_croak(aTHX_
cc507455 4883 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4884#endif
4fdae800 4885 case ' ': case '\t': case '\f': case 013:
5db06880 4886#ifdef PERL_MAD
cd81e915 4887 PL_realtokenstart = -1;
ac372eb8
RD
4888 if (!PL_thiswhite)
4889 PL_thiswhite = newSVpvs("");
4890 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4891#endif
ac372eb8 4892 s++;
378cc40b 4893 goto retry;
378cc40b 4894 case '#':
e929a76b 4895 case '\n':
5db06880 4896#ifdef PERL_MAD
cd81e915 4897 PL_realtokenstart = -1;
5db06880 4898 if (PL_madskills)
cd81e915 4899 PL_faketokens = 0;
5db06880 4900#endif
3280af22 4901 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4902 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4903 /* handle eval qq[#line 1 "foo"\n ...] */
4904 CopLINE_dec(PL_curcop);
4905 incline(s);
4906 }
5db06880
NC
4907 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4908 s = SKIPSPACE0(s);
4909 if (!PL_in_eval || PL_rsfp)
4910 incline(s);
4911 }
4912 else {
4913 d = s;
4914 while (d < PL_bufend && *d != '\n')
4915 d++;
4916 if (d < PL_bufend)
4917 d++;
4918 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4919 Perl_croak(aTHX_ "panic: input overflow");
4920#ifdef PERL_MAD
4921 if (PL_madskills)
cd81e915 4922 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4923#endif
4924 s = d;
4925 incline(s);
4926 }
3280af22
NIS
4927 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4928 PL_bufptr = s;
4929 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4930 return yylex();
a687059c 4931 }
378cc40b 4932 }
a687059c 4933 else {
5db06880
NC
4934#ifdef PERL_MAD
4935 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4936 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4937 PL_faketokens = 0;
5db06880
NC
4938 s = SKIPSPACE0(s);
4939 TOKEN(PEG); /* make sure any #! line is accessible */
4940 }
4941 s = SKIPSPACE0(s);
4942 }
4943 else {
4944/* if (PL_madskills && PL_lex_formbrack) { */
4945 d = s;
4946 while (d < PL_bufend && *d != '\n')
4947 d++;
4948 if (d < PL_bufend)
4949 d++;
4950 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4951 Perl_croak(aTHX_ "panic: input overflow");
4952 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4953 if (!PL_thiswhite)
6b29d1f5 4954 PL_thiswhite = newSVpvs("");
5db06880 4955 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4956 sv_setpvs(PL_thiswhite, "");
cd81e915 4957 PL_faketokens = 0;
5db06880 4958 }
cd81e915 4959 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4960 }
4961 s = d;
4962/* }
4963 *s = '\0';
4964 PL_bufend = s; */
4965 }
4966#else
378cc40b 4967 *s = '\0';
3280af22 4968 PL_bufend = s;
5db06880 4969#endif
a687059c 4970 }
378cc40b
LW
4971 goto retry;
4972 case '-':
79072805 4973 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4974 I32 ftst = 0;
90771dc0 4975 char tmp;
e5edeb50 4976
378cc40b 4977 s++;
3280af22 4978 PL_bufptr = s;
748a9306
LW
4979 tmp = *s++;
4980
bf4acbe4 4981 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
4982 s++;
4983
4984 if (strnEQ(s,"=>",2)) {
3280af22 4985 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 4986 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
4987 OPERATOR('-'); /* unary minus */
4988 }
3280af22 4989 PL_last_uni = PL_oldbufptr;
748a9306 4990 switch (tmp) {
e5edeb50
JH
4991 case 'r': ftst = OP_FTEREAD; break;
4992 case 'w': ftst = OP_FTEWRITE; break;
4993 case 'x': ftst = OP_FTEEXEC; break;
4994 case 'o': ftst = OP_FTEOWNED; break;
4995 case 'R': ftst = OP_FTRREAD; break;
4996 case 'W': ftst = OP_FTRWRITE; break;
4997 case 'X': ftst = OP_FTREXEC; break;
4998 case 'O': ftst = OP_FTROWNED; break;
4999 case 'e': ftst = OP_FTIS; break;
5000 case 'z': ftst = OP_FTZERO; break;
5001 case 's': ftst = OP_FTSIZE; break;
5002 case 'f': ftst = OP_FTFILE; break;
5003 case 'd': ftst = OP_FTDIR; break;
5004 case 'l': ftst = OP_FTLINK; break;
5005 case 'p': ftst = OP_FTPIPE; break;
5006 case 'S': ftst = OP_FTSOCK; break;
5007 case 'u': ftst = OP_FTSUID; break;
5008 case 'g': ftst = OP_FTSGID; break;
5009 case 'k': ftst = OP_FTSVTX; break;
5010 case 'b': ftst = OP_FTBLK; break;
5011 case 'c': ftst = OP_FTCHR; break;
5012 case 't': ftst = OP_FTTTY; break;
5013 case 'T': ftst = OP_FTTEXT; break;
5014 case 'B': ftst = OP_FTBINARY; break;
5015 case 'M': case 'A': case 'C':
fafc274c 5016 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5017 switch (tmp) {
5018 case 'M': ftst = OP_FTMTIME; break;
5019 case 'A': ftst = OP_FTATIME; break;
5020 case 'C': ftst = OP_FTCTIME; break;
5021 default: break;
5022 }
5023 break;
378cc40b 5024 default:
378cc40b
LW
5025 break;
5026 }
e5edeb50 5027 if (ftst) {
eb160463 5028 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5029 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5030 "### Saw file test %c\n", (int)tmp);
5f80b19c 5031 } );
e5edeb50
JH
5032 FTST(ftst);
5033 }
5034 else {
5035 /* Assume it was a minus followed by a one-letter named
5036 * subroutine call (or a -bareword), then. */
95c31fe3 5037 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5038 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5039 (int) tmp);
5f80b19c 5040 } );
3cf7b4c4 5041 s = --PL_bufptr;
e5edeb50 5042 }
378cc40b 5043 }
90771dc0
NC
5044 {
5045 const char tmp = *s++;
5046 if (*s == tmp) {
5047 s++;
5048 if (PL_expect == XOPERATOR)
5049 TERM(POSTDEC);
5050 else
5051 OPERATOR(PREDEC);
5052 }
5053 else if (*s == '>') {
5054 s++;
29595ff2 5055 s = SKIPSPACE1(s);
90771dc0
NC
5056 if (isIDFIRST_lazy_if(s,UTF)) {
5057 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5058 TOKEN(ARROW);
5059 }
5060 else if (*s == '$')
5061 OPERATOR(ARROW);
5062 else
5063 TERM(ARROW);
5064 }
3280af22 5065 if (PL_expect == XOPERATOR)
90771dc0
NC
5066 Aop(OP_SUBTRACT);
5067 else {
5068 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5069 check_uni();
5070 OPERATOR('-'); /* unary minus */
79072805 5071 }
2f3197b3 5072 }
79072805 5073
378cc40b 5074 case '+':
90771dc0
NC
5075 {
5076 const char tmp = *s++;
5077 if (*s == tmp) {
5078 s++;
5079 if (PL_expect == XOPERATOR)
5080 TERM(POSTINC);
5081 else
5082 OPERATOR(PREINC);
5083 }
3280af22 5084 if (PL_expect == XOPERATOR)
90771dc0
NC
5085 Aop(OP_ADD);
5086 else {
5087 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5088 check_uni();
5089 OPERATOR('+');
5090 }
2f3197b3 5091 }
a687059c 5092
378cc40b 5093 case '*':
3280af22
NIS
5094 if (PL_expect != XOPERATOR) {
5095 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5096 PL_expect = XOPERATOR;
5097 force_ident(PL_tokenbuf, '*');
5098 if (!*PL_tokenbuf)
a0d0e21e 5099 PREREF('*');
79072805 5100 TERM('*');
a687059c 5101 }
79072805
LW
5102 s++;
5103 if (*s == '*') {
a687059c 5104 s++;
79072805 5105 PWop(OP_POW);
a687059c 5106 }
79072805
LW
5107 Mop(OP_MULTIPLY);
5108
378cc40b 5109 case '%':
3280af22 5110 if (PL_expect == XOPERATOR) {
bbce6d69 5111 ++s;
5112 Mop(OP_MODULO);
a687059c 5113 }
3280af22 5114 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5115 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5116 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5117 if (!PL_tokenbuf[1]) {
bbce6d69 5118 PREREF('%');
a687059c 5119 }
3280af22 5120 PL_pending_ident = '%';
bbce6d69 5121 TERM('%');
a687059c 5122
378cc40b 5123 case '^':
79072805 5124 s++;
a0d0e21e 5125 BOop(OP_BIT_XOR);
79072805 5126 case '[':
3280af22 5127 PL_lex_brackets++;
df3467db
IG
5128 {
5129 const char tmp = *s++;
5130 OPERATOR(tmp);
5131 }
378cc40b 5132 case '~':
0d863452 5133 if (s[1] == '~'
3e7dd34d 5134 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5135 {
5136 s += 2;
5137 Eop(OP_SMARTMATCH);
5138 }
378cc40b 5139 case ',':
90771dc0
NC
5140 {
5141 const char tmp = *s++;
5142 OPERATOR(tmp);
5143 }
a0d0e21e
LW
5144 case ':':
5145 if (s[1] == ':') {
5146 len = 0;
0bfa2a8a 5147 goto just_a_word_zero_gv;
a0d0e21e
LW
5148 }
5149 s++;
09bef843
SB
5150 switch (PL_expect) {
5151 OP *attrs;
5db06880
NC
5152#ifdef PERL_MAD
5153 I32 stuffstart;
5154#endif
09bef843
SB
5155 case XOPERATOR:
5156 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5157 break;
5158 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5159 if (*s == '=') {
5160 deprecate(":= for an empty attribute list");
5161 }
09bef843
SB
5162 goto grabattrs;
5163 case XATTRBLOCK:
5164 PL_expect = XBLOCK;
5165 goto grabattrs;
5166 case XATTRTERM:
5167 PL_expect = XTERMBLOCK;
5168 grabattrs:
5db06880
NC
5169#ifdef PERL_MAD
5170 stuffstart = s - SvPVX(PL_linestr) - 1;
5171#endif
29595ff2 5172 s = PEEKSPACE(s);
5f66b61c 5173 attrs = NULL;
7e2040f0 5174 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5175 I32 tmp;
5cc237b8 5176 SV *sv;
09bef843 5177 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5178 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5179 if (tmp < 0) tmp = -tmp;
5180 switch (tmp) {
5181 case KEY_or:
5182 case KEY_and:
5183 case KEY_for:
11baf631 5184 case KEY_foreach:
f9829d6b
GS
5185 case KEY_unless:
5186 case KEY_if:
5187 case KEY_while:
5188 case KEY_until:
5189 goto got_attrs;
5190 default:
5191 break;
5192 }
5193 }
5cc237b8 5194 sv = newSVpvn(s, len);
09bef843
SB
5195 if (*d == '(') {
5196 d = scan_str(d,TRUE,TRUE);
5197 if (!d) {
09bef843
SB
5198 /* MUST advance bufptr here to avoid bogus
5199 "at end of line" context messages from yyerror().
5200 */
5201 PL_bufptr = s + len;
5202 yyerror("Unterminated attribute parameter in attribute list");
5203 if (attrs)
5204 op_free(attrs);
5cc237b8 5205 sv_free(sv);
bbf60fe6 5206 return REPORT(0); /* EOF indicator */
09bef843
SB
5207 }
5208 }
5209 if (PL_lex_stuff) {
09bef843
SB
5210 sv_catsv(sv, PL_lex_stuff);
5211 attrs = append_elem(OP_LIST, attrs,
5212 newSVOP(OP_CONST, 0, sv));
5213 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5214 PL_lex_stuff = NULL;
09bef843
SB
5215 }
5216 else {
5cc237b8
BS
5217 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5218 sv_free(sv);
1108974d 5219 if (PL_in_my == KEY_our) {
df9a6019 5220 deprecate(":unique");
1108974d 5221 }
bfed75c6 5222 else
371fce9b
DM
5223 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5224 }
5225
d3cea301
SB
5226 /* NOTE: any CV attrs applied here need to be part of
5227 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5228 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5229 sv_free(sv);
78f9721b 5230 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5231 }
5232 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5233 sv_free(sv);
8e5dadda 5234 deprecate(":locked");
5cc237b8
BS
5235 }
5236 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5237 sv_free(sv);
78f9721b 5238 CvMETHOD_on(PL_compcv);
5cc237b8 5239 }
78f9721b
SM
5240 /* After we've set the flags, it could be argued that
5241 we don't need to do the attributes.pm-based setting
5242 process, and shouldn't bother appending recognized
d3cea301
SB
5243 flags. To experiment with that, uncomment the
5244 following "else". (Note that's already been
5245 uncommented. That keeps the above-applied built-in
5246 attributes from being intercepted (and possibly
5247 rejected) by a package's attribute routines, but is
5248 justified by the performance win for the common case
5249 of applying only built-in attributes.) */
0256094b 5250 else
78f9721b
SM
5251 attrs = append_elem(OP_LIST, attrs,
5252 newSVOP(OP_CONST, 0,
5cc237b8 5253 sv));
09bef843 5254 }
29595ff2 5255 s = PEEKSPACE(d);
0120eecf 5256 if (*s == ':' && s[1] != ':')
29595ff2 5257 s = PEEKSPACE(s+1);
0120eecf
GS
5258 else if (s == d)
5259 break; /* require real whitespace or :'s */
29595ff2 5260 /* XXX losing whitespace on sequential attributes here */
09bef843 5261 }
90771dc0
NC
5262 {
5263 const char tmp
5264 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5265 if (*s != ';' && *s != '}' && *s != tmp
5266 && (tmp != '=' || *s != ')')) {
5267 const char q = ((*s == '\'') ? '"' : '\'');
5268 /* If here for an expression, and parsed no attrs, back
5269 off. */
5270 if (tmp == '=' && !attrs) {
5271 s = PL_bufptr;
5272 break;
5273 }
5274 /* MUST advance bufptr here to avoid bogus "at end of line"
5275 context messages from yyerror().
5276 */
5277 PL_bufptr = s;
10edeb5d
JH
5278 yyerror( (const char *)
5279 (*s
5280 ? Perl_form(aTHX_ "Invalid separator character "
5281 "%c%c%c in attribute list", q, *s, q)
5282 : "Unterminated attribute list" ) );
90771dc0
NC
5283 if (attrs)
5284 op_free(attrs);
5285 OPERATOR(':');
09bef843 5286 }
09bef843 5287 }
f9829d6b 5288 got_attrs:
09bef843 5289 if (attrs) {
cd81e915 5290 start_force(PL_curforce);
9ded7720 5291 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5292 CURMAD('_', PL_nextwhite);
89122651 5293 force_next(THING);
5db06880
NC
5294 }
5295#ifdef PERL_MAD
5296 if (PL_madskills) {
cd81e915 5297 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5298 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5299 }
5db06880 5300#endif
09bef843
SB
5301 TOKEN(COLONATTR);
5302 }
a0d0e21e 5303 OPERATOR(':');
8990e307
LW
5304 case '(':
5305 s++;
3280af22
NIS
5306 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5307 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5308 else
3280af22 5309 PL_expect = XTERM;
29595ff2 5310 s = SKIPSPACE1(s);
a0d0e21e 5311 TOKEN('(');
378cc40b 5312 case ';':
f4dd75d9 5313 CLINE;
90771dc0
NC
5314 {
5315 const char tmp = *s++;
5316 OPERATOR(tmp);
5317 }
378cc40b 5318 case ')':
90771dc0
NC
5319 {
5320 const char tmp = *s++;
29595ff2 5321 s = SKIPSPACE1(s);
90771dc0
NC
5322 if (*s == '{')
5323 PREBLOCK(tmp);
5324 TERM(tmp);
5325 }
79072805
LW
5326 case ']':
5327 s++;
3280af22 5328 if (PL_lex_brackets <= 0)
d98d5fff 5329 yyerror("Unmatched right square bracket");
463ee0b2 5330 else
3280af22
NIS
5331 --PL_lex_brackets;
5332 if (PL_lex_state == LEX_INTERPNORMAL) {
5333 if (PL_lex_brackets == 0) {
02255c60
FC
5334 if (*s == '-' && s[1] == '>')
5335 PL_lex_state = LEX_INTERPENDMAYBE;
5336 else if (*s != '[' && *s != '{')
3280af22 5337 PL_lex_state = LEX_INTERPEND;
79072805
LW
5338 }
5339 }
4633a7c4 5340 TERM(']');
79072805
LW
5341 case '{':
5342 leftbracket:
79072805 5343 s++;
3280af22 5344 if (PL_lex_brackets > 100) {
8edd5f42 5345 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5346 }
3280af22 5347 switch (PL_expect) {
a0d0e21e 5348 case XTERM:
3280af22 5349 if (PL_lex_formbrack) {
a0d0e21e
LW
5350 s--;
5351 PRETERMBLOCK(DO);
5352 }
3280af22
NIS
5353 if (PL_oldoldbufptr == PL_last_lop)
5354 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5355 else
3280af22 5356 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5357 OPERATOR(HASHBRACK);
a0d0e21e 5358 case XOPERATOR:
bf4acbe4 5359 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5360 s++;
44a8e56a 5361 d = s;
3280af22
NIS
5362 PL_tokenbuf[0] = '\0';
5363 if (d < PL_bufend && *d == '-') {
5364 PL_tokenbuf[0] = '-';
44a8e56a 5365 d++;
bf4acbe4 5366 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5367 d++;
5368 }
7e2040f0 5369 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5370 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5371 FALSE, &len);
bf4acbe4 5372 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5373 d++;
5374 if (*d == '}') {
f54cb97a 5375 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5376 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5377 if (minus)
5378 force_next('-');
748a9306
LW
5379 }
5380 }
5381 /* FALL THROUGH */
09bef843 5382 case XATTRBLOCK:
748a9306 5383 case XBLOCK:
3280af22
NIS
5384 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5385 PL_expect = XSTATE;
a0d0e21e 5386 break;
09bef843 5387 case XATTRTERM:
a0d0e21e 5388 case XTERMBLOCK:
3280af22
NIS
5389 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5390 PL_expect = XSTATE;
a0d0e21e
LW
5391 break;
5392 default: {
f54cb97a 5393 const char *t;
3280af22
NIS
5394 if (PL_oldoldbufptr == PL_last_lop)
5395 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5396 else
3280af22 5397 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5398 s = SKIPSPACE1(s);
8452ff4b
SB
5399 if (*s == '}') {
5400 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5401 PL_expect = XTERM;
5402 /* This hack is to get the ${} in the message. */
5403 PL_bufptr = s+1;
5404 yyerror("syntax error");
5405 break;
5406 }
a0d0e21e 5407 OPERATOR(HASHBRACK);
8452ff4b 5408 }
b8a4b1be
GS
5409 /* This hack serves to disambiguate a pair of curlies
5410 * as being a block or an anon hash. Normally, expectation
5411 * determines that, but in cases where we're not in a
5412 * position to expect anything in particular (like inside
5413 * eval"") we have to resolve the ambiguity. This code
5414 * covers the case where the first term in the curlies is a
5415 * quoted string. Most other cases need to be explicitly
a0288114 5416 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5417 * curly in order to force resolution as an anon hash.
5418 *
5419 * XXX should probably propagate the outer expectation
5420 * into eval"" to rely less on this hack, but that could
5421 * potentially break current behavior of eval"".
5422 * GSAR 97-07-21
5423 */
5424 t = s;
5425 if (*s == '\'' || *s == '"' || *s == '`') {
5426 /* common case: get past first string, handling escapes */
3280af22 5427 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5428 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5429 t++;
5430 t++;
a0d0e21e 5431 }
b8a4b1be 5432 else if (*s == 'q') {
3280af22 5433 if (++t < PL_bufend
b8a4b1be 5434 && (!isALNUM(*t)
3280af22 5435 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5436 && !isALNUM(*t))))
5437 {
abc667d1 5438 /* skip q//-like construct */
f54cb97a 5439 const char *tmps;
b8a4b1be
GS
5440 char open, close, term;
5441 I32 brackets = 1;
5442
3280af22 5443 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5444 t++;
abc667d1
DM
5445 /* check for q => */
5446 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5447 OPERATOR(HASHBRACK);
5448 }
b8a4b1be
GS
5449 term = *t;
5450 open = term;
5451 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5452 term = tmps[5];
5453 close = term;
5454 if (open == close)
3280af22
NIS
5455 for (t++; t < PL_bufend; t++) {
5456 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5457 t++;
6d07e5e9 5458 else if (*t == open)
b8a4b1be
GS
5459 break;
5460 }
abc667d1 5461 else {
3280af22
NIS
5462 for (t++; t < PL_bufend; t++) {
5463 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5464 t++;
6d07e5e9 5465 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5466 break;
5467 else if (*t == open)
5468 brackets++;
5469 }
abc667d1
DM
5470 }
5471 t++;
b8a4b1be 5472 }
abc667d1
DM
5473 else
5474 /* skip plain q word */
5475 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5476 t += UTF8SKIP(t);
a0d0e21e 5477 }
7e2040f0 5478 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5479 t += UTF8SKIP(t);
7e2040f0 5480 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5481 t += UTF8SKIP(t);
a0d0e21e 5482 }
3280af22 5483 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5484 t++;
b8a4b1be
GS
5485 /* if comma follows first term, call it an anon hash */
5486 /* XXX it could be a comma expression with loop modifiers */
3280af22 5487 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5488 || (*t == '=' && t[1] == '>')))
a0d0e21e 5489 OPERATOR(HASHBRACK);
3280af22 5490 if (PL_expect == XREF)
4e4e412b 5491 PL_expect = XTERM;
a0d0e21e 5492 else {
3280af22
NIS
5493 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5494 PL_expect = XSTATE;
a0d0e21e 5495 }
8990e307 5496 }
a0d0e21e 5497 break;
463ee0b2 5498 }
6154021b 5499 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5500 if (isSPACE(*s) || *s == '#')
3280af22 5501 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5502 TOKEN('{');
378cc40b 5503 case '}':
79072805
LW
5504 rightbracket:
5505 s++;
3280af22 5506 if (PL_lex_brackets <= 0)
d98d5fff 5507 yyerror("Unmatched right curly bracket");
463ee0b2 5508 else
3280af22 5509 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5510 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5511 PL_lex_formbrack = 0;
5512 if (PL_lex_state == LEX_INTERPNORMAL) {
5513 if (PL_lex_brackets == 0) {
9059aa12
LW
5514 if (PL_expect & XFAKEBRACK) {
5515 PL_expect &= XENUMMASK;
3280af22
NIS
5516 PL_lex_state = LEX_INTERPEND;
5517 PL_bufptr = s;
5db06880
NC
5518#if 0
5519 if (PL_madskills) {
cd81e915 5520 if (!PL_thiswhite)
6b29d1f5 5521 PL_thiswhite = newSVpvs("");
76f68e9b 5522 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5523 }
5524#endif
cea2e8a9 5525 return yylex(); /* ignore fake brackets */
79072805 5526 }
fa83b5b6 5527 if (*s == '-' && s[1] == '>')
3280af22 5528 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5529 else if (*s != '[' && *s != '{')
3280af22 5530 PL_lex_state = LEX_INTERPEND;
79072805
LW
5531 }
5532 }
9059aa12
LW
5533 if (PL_expect & XFAKEBRACK) {
5534 PL_expect &= XENUMMASK;
3280af22 5535 PL_bufptr = s;
cea2e8a9 5536 return yylex(); /* ignore fake brackets */
748a9306 5537 }
cd81e915 5538 start_force(PL_curforce);
5db06880
NC
5539 if (PL_madskills) {
5540 curmad('X', newSVpvn(s-1,1));
cd81e915 5541 CURMAD('_', PL_thiswhite);
5db06880 5542 }
79072805 5543 force_next('}');
5db06880 5544#ifdef PERL_MAD
cd81e915 5545 if (!PL_thistoken)
6b29d1f5 5546 PL_thistoken = newSVpvs("");
5db06880 5547#endif
79072805 5548 TOKEN(';');
378cc40b
LW
5549 case '&':
5550 s++;
90771dc0 5551 if (*s++ == '&')
a0d0e21e 5552 AOPERATOR(ANDAND);
378cc40b 5553 s--;
3280af22 5554 if (PL_expect == XOPERATOR) {
041457d9
DM
5555 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5556 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5557 {
57843af0 5558 CopLINE_dec(PL_curcop);
f1f66076 5559 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5560 CopLINE_inc(PL_curcop);
463ee0b2 5561 }
79072805 5562 BAop(OP_BIT_AND);
463ee0b2 5563 }
79072805 5564
3280af22
NIS
5565 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5566 if (*PL_tokenbuf) {
5567 PL_expect = XOPERATOR;
5568 force_ident(PL_tokenbuf, '&');
463ee0b2 5569 }
79072805
LW
5570 else
5571 PREREF('&');
6154021b 5572 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5573 TERM('&');
5574
378cc40b
LW
5575 case '|':
5576 s++;
90771dc0 5577 if (*s++ == '|')
a0d0e21e 5578 AOPERATOR(OROR);
378cc40b 5579 s--;
79072805 5580 BOop(OP_BIT_OR);
378cc40b
LW
5581 case '=':
5582 s++;
748a9306 5583 {
90771dc0
NC
5584 const char tmp = *s++;
5585 if (tmp == '=')
5586 Eop(OP_EQ);
5587 if (tmp == '>')
5588 OPERATOR(',');
5589 if (tmp == '~')
5590 PMop(OP_MATCH);
5591 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5592 && strchr("+-*/%.^&|<",tmp))
5593 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5594 "Reversed %c= operator",(int)tmp);
5595 s--;
5596 if (PL_expect == XSTATE && isALPHA(tmp) &&
5597 (s == PL_linestart+1 || s[-2] == '\n') )
5598 {
5599 if (PL_in_eval && !PL_rsfp) {
5600 d = PL_bufend;
5601 while (s < d) {
5602 if (*s++ == '\n') {
5603 incline(s);
5604 if (strnEQ(s,"=cut",4)) {
5605 s = strchr(s,'\n');
5606 if (s)
5607 s++;
5608 else
5609 s = d;
5610 incline(s);
5611 goto retry;
5612 }
5613 }
a5f75d66 5614 }
90771dc0 5615 goto retry;
a5f75d66 5616 }
5db06880
NC
5617#ifdef PERL_MAD
5618 if (PL_madskills) {
cd81e915 5619 if (!PL_thiswhite)
6b29d1f5 5620 PL_thiswhite = newSVpvs("");
cd81e915 5621 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5622 PL_bufend - PL_linestart);
5623 }
5624#endif
90771dc0
NC
5625 s = PL_bufend;
5626 PL_doextract = TRUE;
5627 goto retry;
a5f75d66 5628 }
a0d0e21e 5629 }
3280af22 5630 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5631 const char *t = s;
51882d45 5632#ifdef PERL_STRICT_CR
c35e046a 5633 while (SPACE_OR_TAB(*t))
51882d45 5634#else
c35e046a 5635 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5636#endif
c35e046a 5637 t++;
a0d0e21e
LW
5638 if (*t == '\n' || *t == '#') {
5639 s--;
3280af22 5640 PL_expect = XBLOCK;
a0d0e21e
LW
5641 goto leftbracket;
5642 }
79072805 5643 }
6154021b 5644 pl_yylval.ival = 0;
a0d0e21e 5645 OPERATOR(ASSIGNOP);
378cc40b
LW
5646 case '!':
5647 s++;
90771dc0
NC
5648 {
5649 const char tmp = *s++;
5650 if (tmp == '=') {
5651 /* was this !=~ where !~ was meant?
5652 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5653
5654 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5655 const char *t = s+1;
5656
5657 while (t < PL_bufend && isSPACE(*t))
5658 ++t;
5659
5660 if (*t == '/' || *t == '?' ||
5661 ((*t == 'm' || *t == 's' || *t == 'y')
5662 && !isALNUM(t[1])) ||
5663 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5664 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5665 "!=~ should be !~");
5666 }
5667 Eop(OP_NE);
5668 }
5669 if (tmp == '~')
5670 PMop(OP_NOT);
5671 }
378cc40b
LW
5672 s--;
5673 OPERATOR('!');
5674 case '<':
3280af22 5675 if (PL_expect != XOPERATOR) {
93a17b20 5676 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5677 check_uni();
79072805
LW
5678 if (s[1] == '<')
5679 s = scan_heredoc(s);
5680 else
5681 s = scan_inputsymbol(s);
5682 TERM(sublex_start());
378cc40b
LW
5683 }
5684 s++;
90771dc0
NC
5685 {
5686 char tmp = *s++;
5687 if (tmp == '<')
5688 SHop(OP_LEFT_SHIFT);
5689 if (tmp == '=') {
5690 tmp = *s++;
5691 if (tmp == '>')
5692 Eop(OP_NCMP);
5693 s--;
5694 Rop(OP_LE);
5695 }
395c3793 5696 }
378cc40b 5697 s--;
79072805 5698 Rop(OP_LT);
378cc40b
LW
5699 case '>':
5700 s++;
90771dc0
NC
5701 {
5702 const char tmp = *s++;
5703 if (tmp == '>')
5704 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5705 else if (tmp == '=')
90771dc0
NC
5706 Rop(OP_GE);
5707 }
378cc40b 5708 s--;
79072805 5709 Rop(OP_GT);
378cc40b
LW
5710
5711 case '$':
bbce6d69 5712 CLINE;
5713
3280af22
NIS
5714 if (PL_expect == XOPERATOR) {
5715 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5716 return deprecate_commaless_var_list();
a0d0e21e 5717 }
8990e307 5718 }
a0d0e21e 5719
7e2040f0 5720 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-", s[2]))) {
3280af22 5721 PL_tokenbuf[0] = '@';
376b8730
SM
5722 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5723 sizeof PL_tokenbuf - 1, FALSE);
5724 if (PL_expect == XOPERATOR)
5725 no_op("Array length", s);
3280af22 5726 if (!PL_tokenbuf[1])
a0d0e21e 5727 PREREF(DOLSHARP);
3280af22
NIS
5728 PL_expect = XOPERATOR;
5729 PL_pending_ident = '#';
463ee0b2 5730 TOKEN(DOLSHARP);
79072805 5731 }
bbce6d69 5732
3280af22 5733 PL_tokenbuf[0] = '$';
376b8730
SM
5734 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5735 sizeof PL_tokenbuf - 1, FALSE);
5736 if (PL_expect == XOPERATOR)
5737 no_op("Scalar", s);
3280af22
NIS
5738 if (!PL_tokenbuf[1]) {
5739 if (s == PL_bufend)
bbce6d69 5740 yyerror("Final $ should be \\$ or $name");
5741 PREREF('$');
8990e307 5742 }
a0d0e21e 5743
bbce6d69 5744 /* This kludge not intended to be bulletproof. */
3280af22 5745 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5746 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5747 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5748 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5749 TERM(THING);
5750 }
5751
ff68c719 5752 d = s;
90771dc0
NC
5753 {
5754 const char tmp = *s;
ae28bb2a 5755 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5756 s = SKIPSPACE1(s);
ff68c719 5757
90771dc0
NC
5758 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5759 && intuit_more(s)) {
5760 if (*s == '[') {
5761 PL_tokenbuf[0] = '@';
5762 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5763 char *t = s+1;
5764
5765 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5766 t++;
90771dc0 5767 if (*t++ == ',') {
29595ff2 5768 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5769 while (t < PL_bufend && *t != ']')
5770 t++;
9014280d 5771 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5772 "Multidimensional syntax %.*s not supported",
36c7798d 5773 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5774 }
748a9306 5775 }
93a17b20 5776 }
90771dc0
NC
5777 else if (*s == '{') {
5778 char *t;
5779 PL_tokenbuf[0] = '%';
5780 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5781 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5782 {
5783 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5784 do {
5785 t++;
5786 } while (isSPACE(*t));
90771dc0 5787 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5788 STRLEN len;
90771dc0 5789 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5790 &len);
c35e046a
AL
5791 while (isSPACE(*t))
5792 t++;
780a5241 5793 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5794 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5795 "You need to quote \"%s\"",
5796 tmpbuf);
5797 }
5798 }
5799 }
93a17b20 5800 }
bbce6d69 5801
90771dc0
NC
5802 PL_expect = XOPERATOR;
5803 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5804 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5805 if (!islop || PL_last_lop_op == OP_GREPSTART)
5806 PL_expect = XOPERATOR;
5807 else if (strchr("$@\"'`q", *s))
5808 PL_expect = XTERM; /* e.g. print $fh "foo" */
5809 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5810 PL_expect = XTERM; /* e.g. print $fh &sub */
5811 else if (isIDFIRST_lazy_if(s,UTF)) {
5812 char tmpbuf[sizeof PL_tokenbuf];
5813 int t2;
5814 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5815 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5816 /* binary operators exclude handle interpretations */
5817 switch (t2) {
5818 case -KEY_x:
5819 case -KEY_eq:
5820 case -KEY_ne:
5821 case -KEY_gt:
5822 case -KEY_lt:
5823 case -KEY_ge:
5824 case -KEY_le:
5825 case -KEY_cmp:
5826 break;
5827 default:
5828 PL_expect = XTERM; /* e.g. print $fh length() */
5829 break;
5830 }
5831 }
5832 else {
5833 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5834 }
5835 }
90771dc0
NC
5836 else if (isDIGIT(*s))
5837 PL_expect = XTERM; /* e.g. print $fh 3 */
5838 else if (*s == '.' && isDIGIT(s[1]))
5839 PL_expect = XTERM; /* e.g. print $fh .3 */
5840 else if ((*s == '?' || *s == '-' || *s == '+')
5841 && !isSPACE(s[1]) && s[1] != '=')
5842 PL_expect = XTERM; /* e.g. print $fh -1 */
5843 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5844 && s[1] != '/')
5845 PL_expect = XTERM; /* e.g. print $fh /.../
5846 XXX except DORDOR operator
5847 */
5848 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5849 && s[2] != '=')
5850 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5851 }
bbce6d69 5852 }
3280af22 5853 PL_pending_ident = '$';
79072805 5854 TOKEN('$');
378cc40b
LW
5855
5856 case '@':
3280af22 5857 if (PL_expect == XOPERATOR)
bbce6d69 5858 no_op("Array", s);
3280af22
NIS
5859 PL_tokenbuf[0] = '@';
5860 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5861 if (!PL_tokenbuf[1]) {
bbce6d69 5862 PREREF('@');
5863 }
3280af22 5864 if (PL_lex_state == LEX_NORMAL)
29595ff2 5865 s = SKIPSPACE1(s);
3280af22 5866 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5867 if (*s == '{')
3280af22 5868 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5869
5870 /* Warn about @ where they meant $. */
041457d9
DM
5871 if (*s == '[' || *s == '{') {
5872 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5873 const char *t = s + 1;
7e2040f0 5874 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5875 t++;
5876 if (*t == '}' || *t == ']') {
5877 t++;
29595ff2 5878 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5879 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5880 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5881 (int)(t-PL_bufptr), PL_bufptr,
5882 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5883 }
93a17b20
LW
5884 }
5885 }
463ee0b2 5886 }
3280af22 5887 PL_pending_ident = '@';
79072805 5888 TERM('@');
378cc40b 5889
c963b151 5890 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5891 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5892 s += 2;
5893 AOPERATOR(DORDOR);
5894 }
c963b151 5895 case '?': /* may either be conditional or pattern */
be25f609 5896 if (PL_expect == XOPERATOR) {
90771dc0 5897 char tmp = *s++;
c963b151 5898 if(tmp == '?') {
be25f609 5899 OPERATOR('?');
c963b151
BD
5900 }
5901 else {
5902 tmp = *s++;
5903 if(tmp == '/') {
5904 /* A // operator. */
5905 AOPERATOR(DORDOR);
5906 }
5907 else {
5908 s--;
5909 Mop(OP_DIVIDE);
5910 }
5911 }
5912 }
5913 else {
5914 /* Disable warning on "study /blah/" */
5915 if (PL_oldoldbufptr == PL_last_uni
5916 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5917 || memNE(PL_last_uni, "study", 5)
5918 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5919 ))
5920 check_uni();
5921 s = scan_pat(s,OP_MATCH);
5922 TERM(sublex_start());
5923 }
378cc40b
LW
5924
5925 case '.':
51882d45
GS
5926 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5927#ifdef PERL_STRICT_CR
5928 && s[1] == '\n'
5929#else
5930 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5931#endif
5932 && (s == PL_linestart || s[-1] == '\n') )
5933 {
3280af22
NIS
5934 PL_lex_formbrack = 0;
5935 PL_expect = XSTATE;
79072805
LW
5936 goto rightbracket;
5937 }
be25f609 5938 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5939 s += 3;
5940 OPERATOR(YADAYADA);
5941 }
3280af22 5942 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5943 char tmp = *s++;
a687059c
LW
5944 if (*s == tmp) {
5945 s++;
2f3197b3
LW
5946 if (*s == tmp) {
5947 s++;
6154021b 5948 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5949 }
5950 else
6154021b 5951 pl_yylval.ival = 0;
378cc40b 5952 OPERATOR(DOTDOT);
a687059c 5953 }
79072805 5954 Aop(OP_CONCAT);
378cc40b
LW
5955 }
5956 /* FALL THROUGH */
5957 case '0': case '1': case '2': case '3': case '4':
5958 case '5': case '6': case '7': case '8': case '9':
6154021b 5959 s = scan_num(s, &pl_yylval);
931e0695 5960 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5961 if (PL_expect == XOPERATOR)
8990e307 5962 no_op("Number",s);
79072805
LW
5963 TERM(THING);
5964
5965 case '\'':
5db06880 5966 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5967 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5968 if (PL_expect == XOPERATOR) {
5969 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5970 return deprecate_commaless_var_list();
a0d0e21e 5971 }
463ee0b2 5972 else
8990e307 5973 no_op("String",s);
463ee0b2 5974 }
79072805 5975 if (!s)
d4c19fe8 5976 missingterm(NULL);
6154021b 5977 pl_yylval.ival = OP_CONST;
79072805
LW
5978 TERM(sublex_start());
5979
5980 case '"':
5db06880 5981 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5982 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5983 if (PL_expect == XOPERATOR) {
5984 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5985 return deprecate_commaless_var_list();
a0d0e21e 5986 }
463ee0b2 5987 else
8990e307 5988 no_op("String",s);
463ee0b2 5989 }
79072805 5990 if (!s)
d4c19fe8 5991 missingterm(NULL);
6154021b 5992 pl_yylval.ival = OP_CONST;
cfd0369c
NC
5993 /* FIXME. I think that this can be const if char *d is replaced by
5994 more localised variables. */
3280af22 5995 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 5996 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 5997 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
5998 break;
5999 }
6000 }
79072805
LW
6001 TERM(sublex_start());
6002
6003 case '`':
5db06880 6004 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6005 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6006 if (PL_expect == XOPERATOR)
8990e307 6007 no_op("Backticks",s);
79072805 6008 if (!s)
d4c19fe8 6009 missingterm(NULL);
9b201d7d 6010 readpipe_override();
79072805
LW
6011 TERM(sublex_start());
6012
6013 case '\\':
6014 s++;
a2a5de95
NC
6015 if (PL_lex_inwhat && isDIGIT(*s))
6016 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6017 *s, *s);
3280af22 6018 if (PL_expect == XOPERATOR)
8990e307 6019 no_op("Backslash",s);
79072805
LW
6020 OPERATOR(REFGEN);
6021
a7cb1f99 6022 case 'v':
e526c9e6 6023 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6024 char *start = s + 2;
dd629d5b 6025 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6026 start++;
6027 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6028 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6029 TERM(THING);
6030 }
e526c9e6 6031 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6032 else if (!isALPHA(*start) && (PL_expect == XTERM
6033 || PL_expect == XREF || PL_expect == XSTATE
6034 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6035 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6036 if (!gv) {
6154021b 6037 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6038 TERM(THING);
6039 }
6040 }
a7cb1f99
GS
6041 }
6042 goto keylookup;
79072805 6043 case 'x':
3280af22 6044 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6045 s++;
6046 Mop(OP_REPEAT);
2f3197b3 6047 }
79072805
LW
6048 goto keylookup;
6049
378cc40b 6050 case '_':
79072805
LW
6051 case 'a': case 'A':
6052 case 'b': case 'B':
6053 case 'c': case 'C':
6054 case 'd': case 'D':
6055 case 'e': case 'E':
6056 case 'f': case 'F':
6057 case 'g': case 'G':
6058 case 'h': case 'H':
6059 case 'i': case 'I':
6060 case 'j': case 'J':
6061 case 'k': case 'K':
6062 case 'l': case 'L':
6063 case 'm': case 'M':
6064 case 'n': case 'N':
6065 case 'o': case 'O':
6066 case 'p': case 'P':
6067 case 'q': case 'Q':
6068 case 'r': case 'R':
6069 case 's': case 'S':
6070 case 't': case 'T':
6071 case 'u': case 'U':
a7cb1f99 6072 case 'V':
79072805
LW
6073 case 'w': case 'W':
6074 case 'X':
6075 case 'y': case 'Y':
6076 case 'z': case 'Z':
6077
49dc05e3 6078 keylookup: {
88e1f1a2 6079 bool anydelim;
90771dc0 6080 I32 tmp;
10edeb5d
JH
6081
6082 orig_keyword = 0;
6083 gv = NULL;
6084 gvp = NULL;
49dc05e3 6085
3280af22
NIS
6086 PL_bufptr = s;
6087 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6088
6089 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6090 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6091 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6092 (PL_tokenbuf[0] == 'q' &&
6093 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6094
6095 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6096 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6097 goto just_a_word;
6098
3643fb5f 6099 d = s;
3280af22 6100 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6101 d++; /* no comments skipped here, or s### is misparsed */
6102
748a9306 6103 /* Is this a word before a => operator? */
1c3923b3 6104 if (*d == '=' && d[1] == '>') {
748a9306 6105 CLINE;
6154021b 6106 pl_yylval.opval
d0a148a6
NC
6107 = (OP*)newSVOP(OP_CONST, 0,
6108 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6109 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6110 TERM(WORD);
6111 }
6112
88e1f1a2
JV
6113 /* Check for plugged-in keyword */
6114 {
6115 OP *o;
6116 int result;
6117 char *saved_bufptr = PL_bufptr;
6118 PL_bufptr = s;
6119 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6120 s = PL_bufptr;
6121 if (result == KEYWORD_PLUGIN_DECLINE) {
6122 /* not a plugged-in keyword */
6123 PL_bufptr = saved_bufptr;
6124 } else if (result == KEYWORD_PLUGIN_STMT) {
6125 pl_yylval.opval = o;
6126 CLINE;
6127 PL_expect = XSTATE;
6128 return REPORT(PLUGSTMT);
6129 } else if (result == KEYWORD_PLUGIN_EXPR) {
6130 pl_yylval.opval = o;
6131 CLINE;
6132 PL_expect = XOPERATOR;
6133 return REPORT(PLUGEXPR);
6134 } else {
6135 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6136 PL_tokenbuf);
6137 }
6138 }
6139
6140 /* Check for built-in keyword */
6141 tmp = keyword(PL_tokenbuf, len, 0);
6142
6143 /* Is this a label? */
6144 if (!anydelim && PL_expect == XSTATE
6145 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6146 s = d + 1;
6147 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6148 CLINE;
6149 TOKEN(LABEL);
6150 }
6151
a0d0e21e 6152 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6153 GV *ogv = NULL; /* override (winner) */
6154 GV *hgv = NULL; /* hidden (loser) */
3280af22 6155 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6156 CV *cv;
90e5519e 6157 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6158 (cv = GvCVu(gv)))
6159 {
6160 if (GvIMPORTED_CV(gv))
6161 ogv = gv;
6162 else if (! CvMETHOD(cv))
6163 hgv = gv;
6164 }
6165 if (!ogv &&
3280af22 6166 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6167 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6168 GvCVu(gv) && GvIMPORTED_CV(gv))
6169 {
6170 ogv = gv;
6171 }
6172 }
6173 if (ogv) {
30fe34ed 6174 orig_keyword = tmp;
56f7f34b 6175 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6176 }
6177 else if (gv && !gvp
6178 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6179 && GvCVu(gv))
6e7b2336
GS
6180 {
6181 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6182 }
56f7f34b
CS
6183 else { /* no override */
6184 tmp = -tmp;
a2a5de95
NC
6185 if (tmp == KEY_dump) {
6186 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6187 "dump() better written as CORE::dump()");
ac206dc8 6188 }
a0714e2c 6189 gv = NULL;
56f7f34b 6190 gvp = 0;
a2a5de95
NC
6191 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6192 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6193 "Ambiguous call resolved as CORE::%s(), %s",
6194 GvENAME(hgv), "qualify as such or use &");
49dc05e3 6195 }
a0d0e21e
LW
6196 }
6197
6198 reserved_word:
6199 switch (tmp) {
79072805
LW
6200
6201 default: /* not a keyword */
0bfa2a8a
NC
6202 /* Trade off - by using this evil construction we can pull the
6203 variable gv into the block labelled keylookup. If not, then
6204 we have to give it function scope so that the goto from the
6205 earlier ':' case doesn't bypass the initialisation. */
6206 if (0) {
6207 just_a_word_zero_gv:
6208 gv = NULL;
6209 gvp = NULL;
8bee0991 6210 orig_keyword = 0;
0bfa2a8a 6211 }
93a17b20 6212 just_a_word: {
96e4d5b1 6213 SV *sv;
ce29ac45 6214 int pkgname = 0;
f54cb97a 6215 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6216 OP *rv2cv_op;
5069cc75 6217 CV *cv;
5db06880 6218#ifdef PERL_MAD
cd81e915 6219 SV *nextPL_nextwhite = 0;
5db06880
NC
6220#endif
6221
8990e307
LW
6222
6223 /* Get the rest if it looks like a package qualifier */
6224
155aba94 6225 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6226 STRLEN morelen;
3280af22 6227 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6228 TRUE, &morelen);
6229 if (!morelen)
cea2e8a9 6230 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6231 *s == '\'' ? "'" : "::");
c3e0f903 6232 len += morelen;
ce29ac45 6233 pkgname = 1;
a0d0e21e 6234 }
8990e307 6235
3280af22
NIS
6236 if (PL_expect == XOPERATOR) {
6237 if (PL_bufptr == PL_linestart) {
57843af0 6238 CopLINE_dec(PL_curcop);
f1f66076 6239 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6240 CopLINE_inc(PL_curcop);
463ee0b2
LW
6241 }
6242 else
54310121 6243 no_op("Bareword",s);
463ee0b2 6244 }
8990e307 6245
c3e0f903
GS
6246 /* Look for a subroutine with this name in current package,
6247 unless name is "Foo::", in which case Foo is a bearword
6248 (and a package name). */
6249
5db06880 6250 if (len > 2 && !PL_madskills &&
3280af22 6251 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6252 {
f776e3cd 6253 if (ckWARN(WARN_BAREWORD)
90e5519e 6254 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6255 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6256 "Bareword \"%s\" refers to nonexistent package",
3280af22 6257 PL_tokenbuf);
c3e0f903 6258 len -= 2;
3280af22 6259 PL_tokenbuf[len] = '\0';
a0714e2c 6260 gv = NULL;
c3e0f903
GS
6261 gvp = 0;
6262 }
6263 else {
62d55b22
NC
6264 if (!gv) {
6265 /* Mustn't actually add anything to a symbol table.
6266 But also don't want to "initialise" any placeholder
6267 constants that might already be there into full
6268 blown PVGVs with attached PVCV. */
90e5519e
NC
6269 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6270 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6271 }
b3d904f3 6272 len = 0;
c3e0f903
GS
6273 }
6274
6275 /* if we saw a global override before, get the right name */
8990e307 6276
49dc05e3 6277 if (gvp) {
396482e1 6278 sv = newSVpvs("CORE::GLOBAL::");
3280af22 6279 sv_catpv(sv,PL_tokenbuf);
49dc05e3 6280 }
8a7a129d
NC
6281 else {
6282 /* If len is 0, newSVpv does strlen(), which is correct.
6283 If len is non-zero, then it will be the true length,
6284 and so the scalar will be created correctly. */
6285 sv = newSVpv(PL_tokenbuf,len);
6286 }
5db06880 6287#ifdef PERL_MAD
cd81e915
NC
6288 if (PL_madskills && !PL_thistoken) {
6289 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6290 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6291 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6292 }
6293#endif
8990e307 6294
a0d0e21e
LW
6295 /* Presume this is going to be a bareword of some sort. */
6296
6297 CLINE;
6154021b
RGS
6298 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6299 pl_yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
6300 /* UTF-8 package name? */
6301 if (UTF && !IN_BYTES &&
95a20fc0 6302 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
8f8cf39c 6303 SvUTF8_on(sv);
a0d0e21e 6304
c3e0f903
GS
6305 /* And if "Foo::", then that's what it certainly is. */
6306
6307 if (len)
6308 goto safe_bareword;
6309
f7461760
Z
6310 cv = NULL;
6311 {
6312 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6313 const_op->op_private = OPpCONST_BARE;
6314 rv2cv_op = newCVREF(0, const_op);
6315 }
6316 if (rv2cv_op->op_type == OP_RV2CV &&
6317 (rv2cv_op->op_flags & OPf_KIDS)) {
6318 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6319 switch (rv_op->op_type) {
6320 case OP_CONST: {
6321 SV *sv = cSVOPx_sv(rv_op);
6322 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6323 cv = (CV*)SvRV(sv);
6324 } break;
6325 case OP_GV: {
6326 GV *gv = cGVOPx_gv(rv_op);
6327 CV *maybe_cv = GvCVu(gv);
6328 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6329 cv = maybe_cv;
6330 } break;
6331 }
6332 }
5069cc75 6333
8990e307
LW
6334 /* See if it's the indirect object for a list operator. */
6335
3280af22
NIS
6336 if (PL_oldoldbufptr &&
6337 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6338 (PL_oldoldbufptr == PL_last_lop
6339 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6340 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6341 (PL_expect == XREF ||
6342 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6343 {
748a9306
LW
6344 bool immediate_paren = *s == '(';
6345
a0d0e21e 6346 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6347 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6348#ifdef PERL_MAD
cd81e915 6349 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6350#endif
a0d0e21e
LW
6351
6352 /* Two barewords in a row may indicate method call. */
6353
62d55b22 6354 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6355 (tmp = intuit_method(s, gv, cv))) {
6356 op_free(rv2cv_op);
bbf60fe6 6357 return REPORT(tmp);
f7461760 6358 }
a0d0e21e
LW
6359
6360 /* If not a declared subroutine, it's an indirect object. */
6361 /* (But it's an indir obj regardless for sort.) */
7294df96 6362 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6363
7294df96
RGS
6364 if (
6365 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6366 (!cv &&
a9ef352a 6367 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6368 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6369 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6370 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6371 )
a9ef352a 6372 {
3280af22 6373 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6374 goto bareword;
93a17b20
LW
6375 }
6376 }
8990e307 6377
3280af22 6378 PL_expect = XOPERATOR;
5db06880
NC
6379#ifdef PERL_MAD
6380 if (isSPACE(*s))
cd81e915
NC
6381 s = SKIPSPACE2(s,nextPL_nextwhite);
6382 PL_nextwhite = nextPL_nextwhite;
5db06880 6383#else
8990e307 6384 s = skipspace(s);
5db06880 6385#endif
1c3923b3
GS
6386
6387 /* Is this a word before a => operator? */
ce29ac45 6388 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6389 op_free(rv2cv_op);
1c3923b3 6390 CLINE;
6154021b 6391 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6392 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6393 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6394 TERM(WORD);
6395 }
6396
6397 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6398 if (*s == '(') {
79072805 6399 CLINE;
5069cc75 6400 if (cv) {
c35e046a
AL
6401 d = s + 1;
6402 while (SPACE_OR_TAB(*d))
6403 d++;
f7461760 6404 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6405 s = d + 1;
c631f32b 6406 goto its_constant;
96e4d5b1 6407 }
6408 }
5db06880
NC
6409#ifdef PERL_MAD
6410 if (PL_madskills) {
cd81e915
NC
6411 PL_nextwhite = PL_thiswhite;
6412 PL_thiswhite = 0;
5db06880 6413 }
cd81e915 6414 start_force(PL_curforce);
5db06880 6415#endif
6154021b 6416 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6417 PL_expect = XOPERATOR;
5db06880
NC
6418#ifdef PERL_MAD
6419 if (PL_madskills) {
cd81e915
NC
6420 PL_nextwhite = nextPL_nextwhite;
6421 curmad('X', PL_thistoken);
6b29d1f5 6422 PL_thistoken = newSVpvs("");
5db06880
NC
6423 }
6424#endif
f7461760 6425 op_free(rv2cv_op);
93a17b20 6426 force_next(WORD);
6154021b 6427 pl_yylval.ival = 0;
463ee0b2 6428 TOKEN('&');
79072805 6429 }
93a17b20 6430
a0d0e21e 6431 /* If followed by var or block, call it a method (unless sub) */
8990e307 6432
f7461760
Z
6433 if ((*s == '$' || *s == '{') && !cv) {
6434 op_free(rv2cv_op);
3280af22
NIS
6435 PL_last_lop = PL_oldbufptr;
6436 PL_last_lop_op = OP_METHOD;
93a17b20 6437 PREBLOCK(METHOD);
463ee0b2
LW
6438 }
6439
8990e307
LW
6440 /* If followed by a bareword, see if it looks like indir obj. */
6441
30fe34ed
RGS
6442 if (!orig_keyword
6443 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6444 && (tmp = intuit_method(s, gv, cv))) {
6445 op_free(rv2cv_op);
bbf60fe6 6446 return REPORT(tmp);
f7461760 6447 }
93a17b20 6448
8990e307
LW
6449 /* Not a method, so call it a subroutine (if defined) */
6450
5069cc75 6451 if (cv) {
9b387841
NC
6452 if (lastchar == '-')
6453 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6454 "Ambiguous use of -%s resolved as -&%s()",
6455 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6456 /* Check for a constant sub */
f7461760 6457 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6458 its_constant:
f7461760 6459 op_free(rv2cv_op);
6154021b
RGS
6460 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6461 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6462 pl_yylval.opval->op_private = 0;
96e4d5b1 6463 TOKEN(WORD);
89bfa8cd 6464 }
6465
6154021b 6466 op_free(pl_yylval.opval);
f7461760 6467 pl_yylval.opval = rv2cv_op;
6154021b 6468 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6469 PL_last_lop = PL_oldbufptr;
bf848113 6470 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6471 /* Is there a prototype? */
5db06880
NC
6472 if (
6473#ifdef PERL_MAD
6474 cv &&
6475#endif
d9f2850e
RGS
6476 SvPOK(cv))
6477 {
5f66b61c 6478 STRLEN protolen;
daba3364 6479 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6480 if (!protolen)
4633a7c4 6481 TERM(FUNC0SUB);
8c28b960 6482 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
4633a7c4 6483 OPERATOR(UNIOPSUB);
0f5d0394
AE
6484 while (*proto == ';')
6485 proto++;
7a52d87a 6486 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6487 if (PL_curstash)
6488 sv_setpvs(PL_subname, "__ANON__");
6489 else
6490 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6491 PREBLOCK(LSTOPSUB);
6492 }
a9ef352a 6493 }
5db06880
NC
6494#ifdef PERL_MAD
6495 {
6496 if (PL_madskills) {
cd81e915
NC
6497 PL_nextwhite = PL_thiswhite;
6498 PL_thiswhite = 0;
5db06880 6499 }
cd81e915 6500 start_force(PL_curforce);
6154021b 6501 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6502 PL_expect = XTERM;
6503 if (PL_madskills) {
cd81e915
NC
6504 PL_nextwhite = nextPL_nextwhite;
6505 curmad('X', PL_thistoken);
6b29d1f5 6506 PL_thistoken = newSVpvs("");
5db06880
NC
6507 }
6508 force_next(WORD);
6509 TOKEN(NOAMP);
6510 }
6511 }
6512
6513 /* Guess harder when madskills require "best effort". */
6514 if (PL_madskills && (!gv || !GvCVu(gv))) {
6515 int probable_sub = 0;
6516 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6517 probable_sub = 1;
6518 else if (isALPHA(*s)) {
6519 char tmpbuf[1024];
6520 STRLEN tmplen;
6521 d = s;
6522 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6523 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6524 probable_sub = 1;
6525 else {
6526 while (d < PL_bufend && isSPACE(*d))
6527 d++;
6528 if (*d == '=' && d[1] == '>')
6529 probable_sub = 1;
6530 }
6531 }
6532 if (probable_sub) {
7a6d04f4 6533 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6534 op_free(pl_yylval.opval);
f7461760 6535 pl_yylval.opval = rv2cv_op;
6154021b 6536 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6537 PL_last_lop = PL_oldbufptr;
6538 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6539 PL_nextwhite = PL_thiswhite;
6540 PL_thiswhite = 0;
6541 start_force(PL_curforce);
6154021b 6542 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6543 PL_expect = XTERM;
cd81e915
NC
6544 PL_nextwhite = nextPL_nextwhite;
6545 curmad('X', PL_thistoken);
6b29d1f5 6546 PL_thistoken = newSVpvs("");
5db06880
NC
6547 force_next(WORD);
6548 TOKEN(NOAMP);
6549 }
6550#else
6154021b 6551 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6552 PL_expect = XTERM;
8990e307
LW
6553 force_next(WORD);
6554 TOKEN(NOAMP);
5db06880 6555#endif
8990e307 6556 }
748a9306 6557
8990e307
LW
6558 /* Call it a bare word */
6559
5603f27d 6560 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6561 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6562 else {
9a073a1d
RGS
6563 bareword:
6564 /* after "print" and similar functions (corresponding to
6565 * "F? L" in opcode.pl), whatever wasn't already parsed as
6566 * a filehandle should be subject to "strict subs".
6567 * Likewise for the optional indirect-object argument to system
6568 * or exec, which can't be a bareword */
6569 if ((PL_last_lop_op == OP_PRINT
6570 || PL_last_lop_op == OP_PRTF
6571 || PL_last_lop_op == OP_SAY
6572 || PL_last_lop_op == OP_SYSTEM
6573 || PL_last_lop_op == OP_EXEC)
6574 && (PL_hints & HINT_STRICT_SUBS))
6575 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6576 if (lastchar != '-') {
6577 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6578 d = PL_tokenbuf;
6579 while (isLOWER(*d))
6580 d++;
da51bb9b 6581 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6582 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6583 PL_tokenbuf);
6584 }
748a9306
LW
6585 }
6586 }
f7461760 6587 op_free(rv2cv_op);
c3e0f903
GS
6588
6589 safe_bareword:
9b387841
NC
6590 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6591 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6592 "Operator or semicolon missing before %c%s",
6593 lastchar, PL_tokenbuf);
6594 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6595 "Ambiguous use of %c resolved as operator %c",
6596 lastchar, lastchar);
748a9306 6597 }
93a17b20 6598 TOKEN(WORD);
79072805 6599 }
79072805 6600
68dc0745 6601 case KEY___FILE__:
6154021b 6602 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6603 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6604 TERM(THING);
6605
79072805 6606 case KEY___LINE__:
6154021b 6607 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6608 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6609 TERM(THING);
68dc0745 6610
6611 case KEY___PACKAGE__:
6154021b 6612 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6613 (PL_curstash
5aaec2b4 6614 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6615 : &PL_sv_undef));
79072805 6616 TERM(THING);
79072805 6617
e50aee73 6618 case KEY___DATA__:
79072805
LW
6619 case KEY___END__: {
6620 GV *gv;
3280af22 6621 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6622 const char *pname = "main";
3280af22 6623 if (PL_tokenbuf[2] == 'D')
bfcb3514 6624 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6625 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6626 SVt_PVIO);
a5f75d66 6627 GvMULTI_on(gv);
79072805 6628 if (!GvIO(gv))
a0d0e21e 6629 GvIOp(gv) = newIO();
3280af22 6630 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6631#if defined(HAS_FCNTL) && defined(F_SETFD)
6632 {
f54cb97a 6633 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6634 fcntl(fd,F_SETFD,fd >= 3);
6635 }
79072805 6636#endif
fd049845 6637 /* Mark this internal pseudo-handle as clean */
6638 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6639 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6640 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6641 else
50952442 6642 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6643#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6644 /* if the script was opened in binmode, we need to revert
53129d29 6645 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6646 * XXX this is a questionable hack at best. */
53129d29
GS
6647 if (PL_bufend-PL_bufptr > 2
6648 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6649 {
6650 Off_t loc = 0;
50952442 6651 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6652 loc = PerlIO_tell(PL_rsfp);
6653 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6654 }
2986a63f
JH
6655#ifdef NETWARE
6656 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6657#else
c39cd008 6658 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6659#endif /* NETWARE */
1143fce0
JH
6660#ifdef PERLIO_IS_STDIO /* really? */
6661# if defined(__BORLANDC__)
cb359b41
JH
6662 /* XXX see note in do_binmode() */
6663 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6664# endif
6665#endif
c39cd008
GS
6666 if (loc > 0)
6667 PerlIO_seek(PL_rsfp, loc, 0);
6668 }
6669 }
6670#endif
7948272d 6671#ifdef PERLIO_LAYERS
52d2e0f4
JH
6672 if (!IN_BYTES) {
6673 if (UTF)
6674 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6675 else if (PL_encoding) {
6676 SV *name;
6677 dSP;
6678 ENTER;
6679 SAVETMPS;
6680 PUSHMARK(sp);
6681 EXTEND(SP, 1);
6682 XPUSHs(PL_encoding);
6683 PUTBACK;
6684 call_method("name", G_SCALAR);
6685 SPAGAIN;
6686 name = POPs;
6687 PUTBACK;
bfed75c6 6688 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6689 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6690 SVfARG(name)));
52d2e0f4
JH
6691 FREETMPS;
6692 LEAVE;
6693 }
6694 }
7948272d 6695#endif
5db06880
NC
6696#ifdef PERL_MAD
6697 if (PL_madskills) {
cd81e915
NC
6698 if (PL_realtokenstart >= 0) {
6699 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6700 if (!PL_endwhite)
6b29d1f5 6701 PL_endwhite = newSVpvs("");
cd81e915
NC
6702 sv_catsv(PL_endwhite, PL_thiswhite);
6703 PL_thiswhite = 0;
6704 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6705 PL_realtokenstart = -1;
5db06880 6706 }
5cc814fd
NC
6707 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6708 != NULL) ;
5db06880
NC
6709 }
6710#endif
4608196e 6711 PL_rsfp = NULL;
79072805
LW
6712 }
6713 goto fake_eof;
e929a76b 6714 }
de3bb511 6715
8990e307 6716 case KEY_AUTOLOAD:
ed6116ce 6717 case KEY_DESTROY:
79072805 6718 case KEY_BEGIN:
3c10abe3 6719 case KEY_UNITCHECK:
7d30b5c4 6720 case KEY_CHECK:
7d07dbc2 6721 case KEY_INIT:
7d30b5c4 6722 case KEY_END:
3280af22
NIS
6723 if (PL_expect == XSTATE) {
6724 s = PL_bufptr;
93a17b20 6725 goto really_sub;
79072805
LW
6726 }
6727 goto just_a_word;
6728
a0d0e21e
LW
6729 case KEY_CORE:
6730 if (*s == ':' && s[1] == ':') {
6731 s += 2;
748a9306 6732 d = s;
3280af22 6733 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6734 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6735 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6736 if (tmp < 0)
6737 tmp = -tmp;
850e8516 6738 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6739 /* that's a way to remember we saw "CORE::" */
850e8516 6740 orig_keyword = tmp;
a0d0e21e
LW
6741 goto reserved_word;
6742 }
6743 goto just_a_word;
6744
463ee0b2
LW
6745 case KEY_abs:
6746 UNI(OP_ABS);
6747
79072805
LW
6748 case KEY_alarm:
6749 UNI(OP_ALARM);
6750
6751 case KEY_accept:
a0d0e21e 6752 LOP(OP_ACCEPT,XTERM);
79072805 6753
463ee0b2
LW
6754 case KEY_and:
6755 OPERATOR(ANDOP);
6756
79072805 6757 case KEY_atan2:
a0d0e21e 6758 LOP(OP_ATAN2,XTERM);
85e6fe83 6759
79072805 6760 case KEY_bind:
a0d0e21e 6761 LOP(OP_BIND,XTERM);
79072805
LW
6762
6763 case KEY_binmode:
1c1fc3ea 6764 LOP(OP_BINMODE,XTERM);
79072805
LW
6765
6766 case KEY_bless:
a0d0e21e 6767 LOP(OP_BLESS,XTERM);
79072805 6768
0d863452
RH
6769 case KEY_break:
6770 FUN0(OP_BREAK);
6771
79072805
LW
6772 case KEY_chop:
6773 UNI(OP_CHOP);
6774
6775 case KEY_continue:
0d863452
RH
6776 /* When 'use switch' is in effect, continue has a dual
6777 life as a control operator. */
6778 {
ef89dcc3 6779 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6780 PREBLOCK(CONTINUE);
6781 else {
6782 /* We have to disambiguate the two senses of
6783 "continue". If the next token is a '{' then
6784 treat it as the start of a continue block;
6785 otherwise treat it as a control operator.
6786 */
6787 s = skipspace(s);
6788 if (*s == '{')
79072805 6789 PREBLOCK(CONTINUE);
0d863452
RH
6790 else
6791 FUN0(OP_CONTINUE);
6792 }
6793 }
79072805
LW
6794
6795 case KEY_chdir:
fafc274c
NC
6796 /* may use HOME */
6797 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6798 UNI(OP_CHDIR);
6799
6800 case KEY_close:
6801 UNI(OP_CLOSE);
6802
6803 case KEY_closedir:
6804 UNI(OP_CLOSEDIR);
6805
6806 case KEY_cmp:
6807 Eop(OP_SCMP);
6808
6809 case KEY_caller:
6810 UNI(OP_CALLER);
6811
6812 case KEY_crypt:
6813#ifdef FCRYPT
f4c556ac
GS
6814 if (!PL_cryptseen) {
6815 PL_cryptseen = TRUE;
de3bb511 6816 init_des();
f4c556ac 6817 }
a687059c 6818#endif
a0d0e21e 6819 LOP(OP_CRYPT,XTERM);
79072805
LW
6820
6821 case KEY_chmod:
a0d0e21e 6822 LOP(OP_CHMOD,XTERM);
79072805
LW
6823
6824 case KEY_chown:
a0d0e21e 6825 LOP(OP_CHOWN,XTERM);
79072805
LW
6826
6827 case KEY_connect:
a0d0e21e 6828 LOP(OP_CONNECT,XTERM);
79072805 6829
463ee0b2
LW
6830 case KEY_chr:
6831 UNI(OP_CHR);
6832
79072805
LW
6833 case KEY_cos:
6834 UNI(OP_COS);
6835
6836 case KEY_chroot:
6837 UNI(OP_CHROOT);
6838
0d863452
RH
6839 case KEY_default:
6840 PREBLOCK(DEFAULT);
6841
79072805 6842 case KEY_do:
29595ff2 6843 s = SKIPSPACE1(s);
79072805 6844 if (*s == '{')
a0d0e21e 6845 PRETERMBLOCK(DO);
79072805 6846 if (*s != '\'')
89c5585f 6847 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6848 if (orig_keyword == KEY_do) {
6849 orig_keyword = 0;
6154021b 6850 pl_yylval.ival = 1;
850e8516
RGS
6851 }
6852 else
6154021b 6853 pl_yylval.ival = 0;
378cc40b 6854 OPERATOR(DO);
79072805
LW
6855
6856 case KEY_die:
3280af22 6857 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6858 LOP(OP_DIE,XTERM);
79072805
LW
6859
6860 case KEY_defined:
6861 UNI(OP_DEFINED);
6862
6863 case KEY_delete:
a0d0e21e 6864 UNI(OP_DELETE);
79072805
LW
6865
6866 case KEY_dbmopen:
5c1737d1 6867 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6868 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6869
6870 case KEY_dbmclose:
6871 UNI(OP_DBMCLOSE);
6872
6873 case KEY_dump:
a0d0e21e 6874 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6875 LOOPX(OP_DUMP);
6876
6877 case KEY_else:
6878 PREBLOCK(ELSE);
6879
6880 case KEY_elsif:
6154021b 6881 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6882 OPERATOR(ELSIF);
6883
6884 case KEY_eq:
6885 Eop(OP_SEQ);
6886
a0d0e21e
LW
6887 case KEY_exists:
6888 UNI(OP_EXISTS);
4e553d73 6889
79072805 6890 case KEY_exit:
5db06880
NC
6891 if (PL_madskills)
6892 UNI(OP_INT);
79072805
LW
6893 UNI(OP_EXIT);
6894
6895 case KEY_eval:
29595ff2 6896 s = SKIPSPACE1(s);
32e2a35d
RGS
6897 if (*s == '{') { /* block eval */
6898 PL_expect = XTERMBLOCK;
6899 UNIBRACK(OP_ENTERTRY);
6900 }
6901 else { /* string eval */
6902 PL_expect = XTERM;
6903 UNIBRACK(OP_ENTEREVAL);
6904 }
79072805
LW
6905
6906 case KEY_eof:
6907 UNI(OP_EOF);
6908
6909 case KEY_exp:
6910 UNI(OP_EXP);
6911
6912 case KEY_each:
6913 UNI(OP_EACH);
6914
6915 case KEY_exec:
a0d0e21e 6916 LOP(OP_EXEC,XREF);
79072805
LW
6917
6918 case KEY_endhostent:
6919 FUN0(OP_EHOSTENT);
6920
6921 case KEY_endnetent:
6922 FUN0(OP_ENETENT);
6923
6924 case KEY_endservent:
6925 FUN0(OP_ESERVENT);
6926
6927 case KEY_endprotoent:
6928 FUN0(OP_EPROTOENT);
6929
6930 case KEY_endpwent:
6931 FUN0(OP_EPWENT);
6932
6933 case KEY_endgrent:
6934 FUN0(OP_EGRENT);
6935
6936 case KEY_for:
6937 case KEY_foreach:
6154021b 6938 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6939 s = SKIPSPACE1(s);
7e2040f0 6940 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6941 char *p = s;
5db06880
NC
6942#ifdef PERL_MAD
6943 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6944#endif
6945
3280af22 6946 if ((PL_bufend - p) >= 3 &&
55497cff 6947 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6948 p += 2;
77ca0c92
LW
6949 else if ((PL_bufend - p) >= 4 &&
6950 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6951 p += 3;
29595ff2 6952 p = PEEKSPACE(p);
7e2040f0 6953 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6954 p = scan_ident(p, PL_bufend,
6955 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6956 p = PEEKSPACE(p);
77ca0c92
LW
6957 }
6958 if (*p != '$')
cea2e8a9 6959 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6960#ifdef PERL_MAD
6961 s = SvPVX(PL_linestr) + soff;
6962#endif
55497cff 6963 }
79072805
LW
6964 OPERATOR(FOR);
6965
6966 case KEY_formline:
a0d0e21e 6967 LOP(OP_FORMLINE,XTERM);
79072805
LW
6968
6969 case KEY_fork:
6970 FUN0(OP_FORK);
6971
6972 case KEY_fcntl:
a0d0e21e 6973 LOP(OP_FCNTL,XTERM);
79072805
LW
6974
6975 case KEY_fileno:
6976 UNI(OP_FILENO);
6977
6978 case KEY_flock:
a0d0e21e 6979 LOP(OP_FLOCK,XTERM);
79072805
LW
6980
6981 case KEY_gt:
6982 Rop(OP_SGT);
6983
6984 case KEY_ge:
6985 Rop(OP_SGE);
6986
6987 case KEY_grep:
2c38e13d 6988 LOP(OP_GREPSTART, XREF);
79072805
LW
6989
6990 case KEY_goto:
a0d0e21e 6991 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6992 LOOPX(OP_GOTO);
6993
6994 case KEY_gmtime:
6995 UNI(OP_GMTIME);
6996
6997 case KEY_getc:
6f33ba73 6998 UNIDOR(OP_GETC);
79072805
LW
6999
7000 case KEY_getppid:
7001 FUN0(OP_GETPPID);
7002
7003 case KEY_getpgrp:
7004 UNI(OP_GETPGRP);
7005
7006 case KEY_getpriority:
a0d0e21e 7007 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7008
7009 case KEY_getprotobyname:
7010 UNI(OP_GPBYNAME);
7011
7012 case KEY_getprotobynumber:
a0d0e21e 7013 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7014
7015 case KEY_getprotoent:
7016 FUN0(OP_GPROTOENT);
7017
7018 case KEY_getpwent:
7019 FUN0(OP_GPWENT);
7020
7021 case KEY_getpwnam:
ff68c719 7022 UNI(OP_GPWNAM);
79072805
LW
7023
7024 case KEY_getpwuid:
ff68c719 7025 UNI(OP_GPWUID);
79072805
LW
7026
7027 case KEY_getpeername:
7028 UNI(OP_GETPEERNAME);
7029
7030 case KEY_gethostbyname:
7031 UNI(OP_GHBYNAME);
7032
7033 case KEY_gethostbyaddr:
a0d0e21e 7034 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7035
7036 case KEY_gethostent:
7037 FUN0(OP_GHOSTENT);
7038
7039 case KEY_getnetbyname:
7040 UNI(OP_GNBYNAME);
7041
7042 case KEY_getnetbyaddr:
a0d0e21e 7043 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7044
7045 case KEY_getnetent:
7046 FUN0(OP_GNETENT);
7047
7048 case KEY_getservbyname:
a0d0e21e 7049 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7050
7051 case KEY_getservbyport:
a0d0e21e 7052 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7053
7054 case KEY_getservent:
7055 FUN0(OP_GSERVENT);
7056
7057 case KEY_getsockname:
7058 UNI(OP_GETSOCKNAME);
7059
7060 case KEY_getsockopt:
a0d0e21e 7061 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7062
7063 case KEY_getgrent:
7064 FUN0(OP_GGRENT);
7065
7066 case KEY_getgrnam:
ff68c719 7067 UNI(OP_GGRNAM);
79072805
LW
7068
7069 case KEY_getgrgid:
ff68c719 7070 UNI(OP_GGRGID);
79072805
LW
7071
7072 case KEY_getlogin:
7073 FUN0(OP_GETLOGIN);
7074
0d863452 7075 case KEY_given:
6154021b 7076 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7077 OPERATOR(GIVEN);
7078
93a17b20 7079 case KEY_glob:
a0d0e21e 7080 LOP(OP_GLOB,XTERM);
93a17b20 7081
79072805
LW
7082 case KEY_hex:
7083 UNI(OP_HEX);
7084
7085 case KEY_if:
6154021b 7086 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7087 OPERATOR(IF);
7088
7089 case KEY_index:
a0d0e21e 7090 LOP(OP_INDEX,XTERM);
79072805
LW
7091
7092 case KEY_int:
7093 UNI(OP_INT);
7094
7095 case KEY_ioctl:
a0d0e21e 7096 LOP(OP_IOCTL,XTERM);
79072805
LW
7097
7098 case KEY_join:
a0d0e21e 7099 LOP(OP_JOIN,XTERM);
79072805
LW
7100
7101 case KEY_keys:
7102 UNI(OP_KEYS);
7103
7104 case KEY_kill:
a0d0e21e 7105 LOP(OP_KILL,XTERM);
79072805
LW
7106
7107 case KEY_last:
a0d0e21e 7108 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7109 LOOPX(OP_LAST);
4e553d73 7110
79072805
LW
7111 case KEY_lc:
7112 UNI(OP_LC);
7113
7114 case KEY_lcfirst:
7115 UNI(OP_LCFIRST);
7116
7117 case KEY_local:
6154021b 7118 pl_yylval.ival = 0;
79072805
LW
7119 OPERATOR(LOCAL);
7120
7121 case KEY_length:
7122 UNI(OP_LENGTH);
7123
7124 case KEY_lt:
7125 Rop(OP_SLT);
7126
7127 case KEY_le:
7128 Rop(OP_SLE);
7129
7130 case KEY_localtime:
7131 UNI(OP_LOCALTIME);
7132
7133 case KEY_log:
7134 UNI(OP_LOG);
7135
7136 case KEY_link:
a0d0e21e 7137 LOP(OP_LINK,XTERM);
79072805
LW
7138
7139 case KEY_listen:
a0d0e21e 7140 LOP(OP_LISTEN,XTERM);
79072805 7141
c0329465
MB
7142 case KEY_lock:
7143 UNI(OP_LOCK);
7144
79072805
LW
7145 case KEY_lstat:
7146 UNI(OP_LSTAT);
7147
7148 case KEY_m:
8782bef2 7149 s = scan_pat(s,OP_MATCH);
79072805
LW
7150 TERM(sublex_start());
7151
a0d0e21e 7152 case KEY_map:
2c38e13d 7153 LOP(OP_MAPSTART, XREF);
4e4e412b 7154
79072805 7155 case KEY_mkdir:
a0d0e21e 7156 LOP(OP_MKDIR,XTERM);
79072805
LW
7157
7158 case KEY_msgctl:
a0d0e21e 7159 LOP(OP_MSGCTL,XTERM);
79072805
LW
7160
7161 case KEY_msgget:
a0d0e21e 7162 LOP(OP_MSGGET,XTERM);
79072805
LW
7163
7164 case KEY_msgrcv:
a0d0e21e 7165 LOP(OP_MSGRCV,XTERM);
79072805
LW
7166
7167 case KEY_msgsnd:
a0d0e21e 7168 LOP(OP_MSGSND,XTERM);
79072805 7169
77ca0c92 7170 case KEY_our:
93a17b20 7171 case KEY_my:
952306ac 7172 case KEY_state:
eac04b2e 7173 PL_in_my = (U16)tmp;
29595ff2 7174 s = SKIPSPACE1(s);
7e2040f0 7175 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7176#ifdef PERL_MAD
7177 char* start = s;
7178#endif
3280af22 7179 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7180 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7181 goto really_sub;
def3634b 7182 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7183 if (!PL_in_my_stash) {
c750a3ec 7184 char tmpbuf[1024];
3280af22 7185 PL_bufptr = s;
d9fad198 7186 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7187 yyerror(tmpbuf);
7188 }
5db06880
NC
7189#ifdef PERL_MAD
7190 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7191 sv_catsv(PL_thistoken, PL_nextwhite);
7192 PL_nextwhite = 0;
7193 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7194 }
7195#endif
c750a3ec 7196 }
6154021b 7197 pl_yylval.ival = 1;
55497cff 7198 OPERATOR(MY);
93a17b20 7199
79072805 7200 case KEY_next:
a0d0e21e 7201 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7202 LOOPX(OP_NEXT);
7203
7204 case KEY_ne:
7205 Eop(OP_SNE);
7206
a0d0e21e 7207 case KEY_no:
468aa647 7208 s = tokenize_use(0, s);
a0d0e21e
LW
7209 OPERATOR(USE);
7210
7211 case KEY_not:
29595ff2 7212 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7213 FUN1(OP_NOT);
7214 else
7215 OPERATOR(NOTOP);
a0d0e21e 7216
79072805 7217 case KEY_open:
29595ff2 7218 s = SKIPSPACE1(s);
7e2040f0 7219 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7220 const char *t;
c35e046a
AL
7221 for (d = s; isALNUM_lazy_if(d,UTF);)
7222 d++;
7223 for (t=d; isSPACE(*t);)
7224 t++;
e2ab214b 7225 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7226 /* [perl #16184] */
7227 && !(t[0] == '=' && t[1] == '>')
7228 ) {
5f66b61c 7229 int parms_len = (int)(d-s);
9014280d 7230 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7231 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7232 parms_len, s, parms_len, s);
66fbe8fb 7233 }
93a17b20 7234 }
a0d0e21e 7235 LOP(OP_OPEN,XTERM);
79072805 7236
463ee0b2 7237 case KEY_or:
6154021b 7238 pl_yylval.ival = OP_OR;
463ee0b2
LW
7239 OPERATOR(OROP);
7240
79072805
LW
7241 case KEY_ord:
7242 UNI(OP_ORD);
7243
7244 case KEY_oct:
7245 UNI(OP_OCT);
7246
7247 case KEY_opendir:
a0d0e21e 7248 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7249
7250 case KEY_print:
3280af22 7251 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7252 LOP(OP_PRINT,XREF);
79072805
LW
7253
7254 case KEY_printf:
3280af22 7255 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7256 LOP(OP_PRTF,XREF);
79072805 7257
c07a80fd 7258 case KEY_prototype:
7259 UNI(OP_PROTOTYPE);
7260
79072805 7261 case KEY_push:
a0d0e21e 7262 LOP(OP_PUSH,XTERM);
79072805
LW
7263
7264 case KEY_pop:
6f33ba73 7265 UNIDOR(OP_POP);
79072805 7266
a0d0e21e 7267 case KEY_pos:
6f33ba73 7268 UNIDOR(OP_POS);
4e553d73 7269
79072805 7270 case KEY_pack:
a0d0e21e 7271 LOP(OP_PACK,XTERM);
79072805
LW
7272
7273 case KEY_package:
a0d0e21e 7274 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7275 s = SKIPSPACE1(s);
91152fc1 7276 s = force_strict_version(s);
79072805
LW
7277 OPERATOR(PACKAGE);
7278
7279 case KEY_pipe:
a0d0e21e 7280 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7281
7282 case KEY_q:
5db06880 7283 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7284 if (!s)
d4c19fe8 7285 missingterm(NULL);
6154021b 7286 pl_yylval.ival = OP_CONST;
79072805
LW
7287 TERM(sublex_start());
7288
a0d0e21e
LW
7289 case KEY_quotemeta:
7290 UNI(OP_QUOTEMETA);
7291
8990e307 7292 case KEY_qw:
5db06880 7293 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7294 if (!s)
d4c19fe8 7295 missingterm(NULL);
3480a8d2 7296 PL_expect = XOPERATOR;
8127e0e3
GS
7297 force_next(')');
7298 if (SvCUR(PL_lex_stuff)) {
5f66b61c 7299 OP *words = NULL;
8127e0e3 7300 int warned = 0;
3280af22 7301 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7302 while (len) {
d4c19fe8
AL
7303 for (; isSPACE(*d) && len; --len, ++d)
7304 /**/;
8127e0e3 7305 if (len) {
d4c19fe8 7306 SV *sv;
f54cb97a 7307 const char *b = d;
e476b1b5 7308 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7309 for (; !isSPACE(*d) && len; --len, ++d) {
7310 if (*d == ',') {
9014280d 7311 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7312 "Possible attempt to separate words with commas");
7313 ++warned;
7314 }
7315 else if (*d == '#') {
9014280d 7316 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7317 "Possible attempt to put comments in qw() list");
7318 ++warned;
7319 }
7320 }
7321 }
7322 else {
d4c19fe8
AL
7323 for (; !isSPACE(*d) && len; --len, ++d)
7324 /**/;
8127e0e3 7325 }
740cce10 7326 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7327 words = append_elem(OP_LIST, words,
7948272d 7328 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7329 }
7330 }
8127e0e3 7331 if (words) {
cd81e915 7332 start_force(PL_curforce);
9ded7720 7333 NEXTVAL_NEXTTOKE.opval = words;
8127e0e3
GS
7334 force_next(THING);
7335 }
55497cff 7336 }
37fd879b 7337 if (PL_lex_stuff) {
8127e0e3 7338 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7339 PL_lex_stuff = NULL;
37fd879b 7340 }
3280af22 7341 PL_expect = XTERM;
8127e0e3 7342 TOKEN('(');
8990e307 7343
79072805 7344 case KEY_qq:
5db06880 7345 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7346 if (!s)
d4c19fe8 7347 missingterm(NULL);
6154021b 7348 pl_yylval.ival = OP_STRINGIFY;
3280af22 7349 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7350 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7351 TERM(sublex_start());
7352
8782bef2
GB
7353 case KEY_qr:
7354 s = scan_pat(s,OP_QR);
7355 TERM(sublex_start());
7356
79072805 7357 case KEY_qx:
5db06880 7358 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7359 if (!s)
d4c19fe8 7360 missingterm(NULL);
9b201d7d 7361 readpipe_override();
79072805
LW
7362 TERM(sublex_start());
7363
7364 case KEY_return:
7365 OLDLOP(OP_RETURN);
7366
7367 case KEY_require:
29595ff2 7368 s = SKIPSPACE1(s);
e759cc13
RGS
7369 if (isDIGIT(*s)) {
7370 s = force_version(s, FALSE);
a7cb1f99 7371 }
e759cc13
RGS
7372 else if (*s != 'v' || !isDIGIT(s[1])
7373 || (s = force_version(s, TRUE), *s == 'v'))
7374 {
a7cb1f99
GS
7375 *PL_tokenbuf = '\0';
7376 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7377 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7378 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7379 else if (*s == '<')
7380 yyerror("<> should be quotes");
7381 }
a72a1c8b
RGS
7382 if (orig_keyword == KEY_require) {
7383 orig_keyword = 0;
6154021b 7384 pl_yylval.ival = 1;
a72a1c8b
RGS
7385 }
7386 else
6154021b 7387 pl_yylval.ival = 0;
a72a1c8b
RGS
7388 PL_expect = XTERM;
7389 PL_bufptr = s;
7390 PL_last_uni = PL_oldbufptr;
7391 PL_last_lop_op = OP_REQUIRE;
7392 s = skipspace(s);
7393 return REPORT( (int)REQUIRE );
79072805
LW
7394
7395 case KEY_reset:
7396 UNI(OP_RESET);
7397
7398 case KEY_redo:
a0d0e21e 7399 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7400 LOOPX(OP_REDO);
7401
7402 case KEY_rename:
a0d0e21e 7403 LOP(OP_RENAME,XTERM);
79072805
LW
7404
7405 case KEY_rand:
7406 UNI(OP_RAND);
7407
7408 case KEY_rmdir:
7409 UNI(OP_RMDIR);
7410
7411 case KEY_rindex:
a0d0e21e 7412 LOP(OP_RINDEX,XTERM);
79072805
LW
7413
7414 case KEY_read:
a0d0e21e 7415 LOP(OP_READ,XTERM);
79072805
LW
7416
7417 case KEY_readdir:
7418 UNI(OP_READDIR);
7419
93a17b20 7420 case KEY_readline:
6f33ba73 7421 UNIDOR(OP_READLINE);
93a17b20
LW
7422
7423 case KEY_readpipe:
0858480c 7424 UNIDOR(OP_BACKTICK);
93a17b20 7425
79072805
LW
7426 case KEY_rewinddir:
7427 UNI(OP_REWINDDIR);
7428
7429 case KEY_recv:
a0d0e21e 7430 LOP(OP_RECV,XTERM);
79072805
LW
7431
7432 case KEY_reverse:
a0d0e21e 7433 LOP(OP_REVERSE,XTERM);
79072805
LW
7434
7435 case KEY_readlink:
6f33ba73 7436 UNIDOR(OP_READLINK);
79072805
LW
7437
7438 case KEY_ref:
7439 UNI(OP_REF);
7440
7441 case KEY_s:
7442 s = scan_subst(s);
6154021b 7443 if (pl_yylval.opval)
79072805
LW
7444 TERM(sublex_start());
7445 else
7446 TOKEN(1); /* force error */
7447
0d863452
RH
7448 case KEY_say:
7449 checkcomma(s,PL_tokenbuf,"filehandle");
7450 LOP(OP_SAY,XREF);
7451
a0d0e21e
LW
7452 case KEY_chomp:
7453 UNI(OP_CHOMP);
4e553d73 7454
79072805
LW
7455 case KEY_scalar:
7456 UNI(OP_SCALAR);
7457
7458 case KEY_select:
a0d0e21e 7459 LOP(OP_SELECT,XTERM);
79072805
LW
7460
7461 case KEY_seek:
a0d0e21e 7462 LOP(OP_SEEK,XTERM);
79072805
LW
7463
7464 case KEY_semctl:
a0d0e21e 7465 LOP(OP_SEMCTL,XTERM);
79072805
LW
7466
7467 case KEY_semget:
a0d0e21e 7468 LOP(OP_SEMGET,XTERM);
79072805
LW
7469
7470 case KEY_semop:
a0d0e21e 7471 LOP(OP_SEMOP,XTERM);
79072805
LW
7472
7473 case KEY_send:
a0d0e21e 7474 LOP(OP_SEND,XTERM);
79072805
LW
7475
7476 case KEY_setpgrp:
a0d0e21e 7477 LOP(OP_SETPGRP,XTERM);
79072805
LW
7478
7479 case KEY_setpriority:
a0d0e21e 7480 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7481
7482 case KEY_sethostent:
ff68c719 7483 UNI(OP_SHOSTENT);
79072805
LW
7484
7485 case KEY_setnetent:
ff68c719 7486 UNI(OP_SNETENT);
79072805
LW
7487
7488 case KEY_setservent:
ff68c719 7489 UNI(OP_SSERVENT);
79072805
LW
7490
7491 case KEY_setprotoent:
ff68c719 7492 UNI(OP_SPROTOENT);
79072805
LW
7493
7494 case KEY_setpwent:
7495 FUN0(OP_SPWENT);
7496
7497 case KEY_setgrent:
7498 FUN0(OP_SGRENT);
7499
7500 case KEY_seekdir:
a0d0e21e 7501 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7502
7503 case KEY_setsockopt:
a0d0e21e 7504 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7505
7506 case KEY_shift:
6f33ba73 7507 UNIDOR(OP_SHIFT);
79072805
LW
7508
7509 case KEY_shmctl:
a0d0e21e 7510 LOP(OP_SHMCTL,XTERM);
79072805
LW
7511
7512 case KEY_shmget:
a0d0e21e 7513 LOP(OP_SHMGET,XTERM);
79072805
LW
7514
7515 case KEY_shmread:
a0d0e21e 7516 LOP(OP_SHMREAD,XTERM);
79072805
LW
7517
7518 case KEY_shmwrite:
a0d0e21e 7519 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7520
7521 case KEY_shutdown:
a0d0e21e 7522 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7523
7524 case KEY_sin:
7525 UNI(OP_SIN);
7526
7527 case KEY_sleep:
7528 UNI(OP_SLEEP);
7529
7530 case KEY_socket:
a0d0e21e 7531 LOP(OP_SOCKET,XTERM);
79072805
LW
7532
7533 case KEY_socketpair:
a0d0e21e 7534 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7535
7536 case KEY_sort:
3280af22 7537 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7538 s = SKIPSPACE1(s);
79072805 7539 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7540 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7541 PL_expect = XTERM;
15f0808c 7542 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7543 LOP(OP_SORT,XREF);
79072805
LW
7544
7545 case KEY_split:
a0d0e21e 7546 LOP(OP_SPLIT,XTERM);
79072805
LW
7547
7548 case KEY_sprintf:
a0d0e21e 7549 LOP(OP_SPRINTF,XTERM);
79072805
LW
7550
7551 case KEY_splice:
a0d0e21e 7552 LOP(OP_SPLICE,XTERM);
79072805
LW
7553
7554 case KEY_sqrt:
7555 UNI(OP_SQRT);
7556
7557 case KEY_srand:
7558 UNI(OP_SRAND);
7559
7560 case KEY_stat:
7561 UNI(OP_STAT);
7562
7563 case KEY_study:
79072805
LW
7564 UNI(OP_STUDY);
7565
7566 case KEY_substr:
a0d0e21e 7567 LOP(OP_SUBSTR,XTERM);
79072805
LW
7568
7569 case KEY_format:
7570 case KEY_sub:
93a17b20 7571 really_sub:
09bef843 7572 {
3280af22 7573 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7574 SSize_t tboffset = 0;
09bef843 7575 expectation attrful;
28cc6278 7576 bool have_name, have_proto;
f54cb97a 7577 const int key = tmp;
09bef843 7578
5db06880
NC
7579#ifdef PERL_MAD
7580 SV *tmpwhite = 0;
7581
cd81e915 7582 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7583 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7584 PL_thistoken = 0;
5db06880
NC
7585
7586 d = s;
7587 s = SKIPSPACE2(s,tmpwhite);
7588#else
09bef843 7589 s = skipspace(s);
5db06880 7590#endif
09bef843 7591
7e2040f0 7592 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7593 (*s == ':' && s[1] == ':'))
7594 {
5db06880 7595#ifdef PERL_MAD
4f61fd4b 7596 SV *nametoke = NULL;
5db06880
NC
7597#endif
7598
09bef843
SB
7599 PL_expect = XBLOCK;
7600 attrful = XATTRBLOCK;
b1b65b59
JH
7601 /* remember buffer pos'n for later force_word */
7602 tboffset = s - PL_oldbufptr;
09bef843 7603 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7604#ifdef PERL_MAD
7605 if (PL_madskills)
7606 nametoke = newSVpvn(s, d - s);
7607#endif
6502358f
NC
7608 if (memchr(tmpbuf, ':', len))
7609 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7610 else {
7611 sv_setsv(PL_subname,PL_curstname);
396482e1 7612 sv_catpvs(PL_subname,"::");
09bef843
SB
7613 sv_catpvn(PL_subname,tmpbuf,len);
7614 }
09bef843 7615 have_name = TRUE;
5db06880
NC
7616
7617#ifdef PERL_MAD
7618
7619 start_force(0);
7620 CURMAD('X', nametoke);
7621 CURMAD('_', tmpwhite);
7622 (void) force_word(PL_oldbufptr + tboffset, WORD,
7623 FALSE, TRUE, TRUE);
7624
7625 s = SKIPSPACE2(d,tmpwhite);
7626#else
7627 s = skipspace(d);
7628#endif
09bef843 7629 }
463ee0b2 7630 else {
09bef843
SB
7631 if (key == KEY_my)
7632 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7633 PL_expect = XTERMBLOCK;
7634 attrful = XATTRTERM;
76f68e9b 7635 sv_setpvs(PL_subname,"?");
09bef843 7636 have_name = FALSE;
463ee0b2 7637 }
4633a7c4 7638
09bef843
SB
7639 if (key == KEY_format) {
7640 if (*s == '=')
7641 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7642#ifdef PERL_MAD
cd81e915 7643 PL_thistoken = subtoken;
5db06880
NC
7644 s = d;
7645#else
09bef843 7646 if (have_name)
b1b65b59
JH
7647 (void) force_word(PL_oldbufptr + tboffset, WORD,
7648 FALSE, TRUE, TRUE);
5db06880 7649#endif
09bef843
SB
7650 OPERATOR(FORMAT);
7651 }
79072805 7652
09bef843
SB
7653 /* Look for a prototype */
7654 if (*s == '(') {
d9f2850e
RGS
7655 char *p;
7656 bool bad_proto = FALSE;
9e8d7757
RB
7657 bool in_brackets = FALSE;
7658 char greedy_proto = ' ';
7659 bool proto_after_greedy_proto = FALSE;
7660 bool must_be_last = FALSE;
7661 bool underscore = FALSE;
aef2a98a 7662 bool seen_underscore = FALSE;
197afce1 7663 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7664
5db06880 7665 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7666 if (!s)
09bef843 7667 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7668 /* strip spaces and check for bad characters */
09bef843
SB
7669 d = SvPVX(PL_lex_stuff);
7670 tmp = 0;
d9f2850e
RGS
7671 for (p = d; *p; ++p) {
7672 if (!isSPACE(*p)) {
7673 d[tmp++] = *p;
9e8d7757 7674
197afce1 7675 if (warnillegalproto) {
9e8d7757
RB
7676 if (must_be_last)
7677 proto_after_greedy_proto = TRUE;
7678 if (!strchr("$@%*;[]&\\_", *p)) {
7679 bad_proto = TRUE;
7680 }
7681 else {
7682 if ( underscore ) {
7683 if ( *p != ';' )
7684 bad_proto = TRUE;
7685 underscore = FALSE;
7686 }
7687 if ( *p == '[' ) {
7688 in_brackets = TRUE;
7689 }
7690 else if ( *p == ']' ) {
7691 in_brackets = FALSE;
7692 }
7693 else if ( (*p == '@' || *p == '%') &&
7694 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7695 !in_brackets ) {
7696 must_be_last = TRUE;
7697 greedy_proto = *p;
7698 }
7699 else if ( *p == '_' ) {
aef2a98a 7700 underscore = seen_underscore = TRUE;
9e8d7757
RB
7701 }
7702 }
7703 }
d37a9538 7704 }
09bef843 7705 }
d9f2850e 7706 d[tmp] = '\0';
9e8d7757 7707 if (proto_after_greedy_proto)
197afce1 7708 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7709 "Prototype after '%c' for %"SVf" : %s",
7710 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7711 if (bad_proto)
197afce1 7712 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7713 "Illegal character %sin prototype for %"SVf" : %s",
7714 seen_underscore ? "after '_' " : "",
be2597df 7715 SVfARG(PL_subname), d);
b162af07 7716 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7717 have_proto = TRUE;
68dc0745 7718
5db06880
NC
7719#ifdef PERL_MAD
7720 start_force(0);
cd81e915 7721 CURMAD('q', PL_thisopen);
5db06880 7722 CURMAD('_', tmpwhite);
cd81e915
NC
7723 CURMAD('=', PL_thisstuff);
7724 CURMAD('Q', PL_thisclose);
5db06880
NC
7725 NEXTVAL_NEXTTOKE.opval =
7726 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7727 PL_lex_stuff = NULL;
5db06880
NC
7728 force_next(THING);
7729
7730 s = SKIPSPACE2(s,tmpwhite);
7731#else
09bef843 7732 s = skipspace(s);
5db06880 7733#endif
4633a7c4 7734 }
09bef843
SB
7735 else
7736 have_proto = FALSE;
7737
7738 if (*s == ':' && s[1] != ':')
7739 PL_expect = attrful;
8e742a20
MHM
7740 else if (*s != '{' && key == KEY_sub) {
7741 if (!have_name)
7742 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7743 else if (*s != ';' && *s != '}')
be2597df 7744 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7745 }
09bef843 7746
5db06880
NC
7747#ifdef PERL_MAD
7748 start_force(0);
7749 if (tmpwhite) {
7750 if (PL_madskills)
6b29d1f5 7751 curmad('^', newSVpvs(""));
5db06880
NC
7752 CURMAD('_', tmpwhite);
7753 }
7754 force_next(0);
7755
cd81e915 7756 PL_thistoken = subtoken;
5db06880 7757#else
09bef843 7758 if (have_proto) {
9ded7720 7759 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7760 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7761 PL_lex_stuff = NULL;
09bef843 7762 force_next(THING);
68dc0745 7763 }
5db06880 7764#endif
09bef843 7765 if (!have_name) {
49a54bbe
NC
7766 if (PL_curstash)
7767 sv_setpvs(PL_subname, "__ANON__");
7768 else
7769 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7770 TOKEN(ANONSUB);
4633a7c4 7771 }
5db06880 7772#ifndef PERL_MAD
b1b65b59
JH
7773 (void) force_word(PL_oldbufptr + tboffset, WORD,
7774 FALSE, TRUE, TRUE);
5db06880 7775#endif
09bef843
SB
7776 if (key == KEY_my)
7777 TOKEN(MYSUB);
7778 TOKEN(SUB);
4633a7c4 7779 }
79072805
LW
7780
7781 case KEY_system:
a0d0e21e 7782 LOP(OP_SYSTEM,XREF);
79072805
LW
7783
7784 case KEY_symlink:
a0d0e21e 7785 LOP(OP_SYMLINK,XTERM);
79072805
LW
7786
7787 case KEY_syscall:
a0d0e21e 7788 LOP(OP_SYSCALL,XTERM);
79072805 7789
c07a80fd 7790 case KEY_sysopen:
7791 LOP(OP_SYSOPEN,XTERM);
7792
137443ea 7793 case KEY_sysseek:
7794 LOP(OP_SYSSEEK,XTERM);
7795
79072805 7796 case KEY_sysread:
a0d0e21e 7797 LOP(OP_SYSREAD,XTERM);
79072805
LW
7798
7799 case KEY_syswrite:
a0d0e21e 7800 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7801
7802 case KEY_tr:
7803 s = scan_trans(s);
7804 TERM(sublex_start());
7805
7806 case KEY_tell:
7807 UNI(OP_TELL);
7808
7809 case KEY_telldir:
7810 UNI(OP_TELLDIR);
7811
463ee0b2 7812 case KEY_tie:
a0d0e21e 7813 LOP(OP_TIE,XTERM);
463ee0b2 7814
c07a80fd 7815 case KEY_tied:
7816 UNI(OP_TIED);
7817
79072805
LW
7818 case KEY_time:
7819 FUN0(OP_TIME);
7820
7821 case KEY_times:
7822 FUN0(OP_TMS);
7823
7824 case KEY_truncate:
a0d0e21e 7825 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7826
7827 case KEY_uc:
7828 UNI(OP_UC);
7829
7830 case KEY_ucfirst:
7831 UNI(OP_UCFIRST);
7832
463ee0b2
LW
7833 case KEY_untie:
7834 UNI(OP_UNTIE);
7835
79072805 7836 case KEY_until:
6154021b 7837 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7838 OPERATOR(UNTIL);
7839
7840 case KEY_unless:
6154021b 7841 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7842 OPERATOR(UNLESS);
7843
7844 case KEY_unlink:
a0d0e21e 7845 LOP(OP_UNLINK,XTERM);
79072805
LW
7846
7847 case KEY_undef:
6f33ba73 7848 UNIDOR(OP_UNDEF);
79072805
LW
7849
7850 case KEY_unpack:
a0d0e21e 7851 LOP(OP_UNPACK,XTERM);
79072805
LW
7852
7853 case KEY_utime:
a0d0e21e 7854 LOP(OP_UTIME,XTERM);
79072805
LW
7855
7856 case KEY_umask:
6f33ba73 7857 UNIDOR(OP_UMASK);
79072805
LW
7858
7859 case KEY_unshift:
a0d0e21e
LW
7860 LOP(OP_UNSHIFT,XTERM);
7861
7862 case KEY_use:
468aa647 7863 s = tokenize_use(1, s);
a0d0e21e 7864 OPERATOR(USE);
79072805
LW
7865
7866 case KEY_values:
7867 UNI(OP_VALUES);
7868
7869 case KEY_vec:
a0d0e21e 7870 LOP(OP_VEC,XTERM);
79072805 7871
0d863452 7872 case KEY_when:
6154021b 7873 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7874 OPERATOR(WHEN);
7875
79072805 7876 case KEY_while:
6154021b 7877 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7878 OPERATOR(WHILE);
7879
7880 case KEY_warn:
3280af22 7881 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7882 LOP(OP_WARN,XTERM);
79072805
LW
7883
7884 case KEY_wait:
7885 FUN0(OP_WAIT);
7886
7887 case KEY_waitpid:
a0d0e21e 7888 LOP(OP_WAITPID,XTERM);
79072805
LW
7889
7890 case KEY_wantarray:
7891 FUN0(OP_WANTARRAY);
7892
7893 case KEY_write:
9d116dd7
JH
7894#ifdef EBCDIC
7895 {
df3728a2
JH
7896 char ctl_l[2];
7897 ctl_l[0] = toCTRL('L');
7898 ctl_l[1] = '\0';
fafc274c 7899 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7900 }
7901#else
fafc274c
NC
7902 /* Make sure $^L is defined */
7903 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7904#endif
79072805
LW
7905 UNI(OP_ENTERWRITE);
7906
7907 case KEY_x:
3280af22 7908 if (PL_expect == XOPERATOR)
79072805
LW
7909 Mop(OP_REPEAT);
7910 check_uni();
7911 goto just_a_word;
7912
a0d0e21e 7913 case KEY_xor:
6154021b 7914 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7915 OPERATOR(OROP);
7916
79072805
LW
7917 case KEY_y:
7918 s = scan_trans(s);
7919 TERM(sublex_start());
7920 }
49dc05e3 7921 }}
79072805 7922}
bf4acbe4
GS
7923#ifdef __SC__
7924#pragma segment Main
7925#endif
79072805 7926
e930465f
JH
7927static int
7928S_pending_ident(pTHX)
8eceec63 7929{
97aff369 7930 dVAR;
8eceec63 7931 register char *d;
bbd11bfc 7932 PADOFFSET tmp = 0;
8eceec63
SC
7933 /* pit holds the identifier we read and pending_ident is reset */
7934 char pit = PL_pending_ident;
9bde8eb0
NC
7935 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7936 /* All routes through this function want to know if there is a colon. */
c099d646 7937 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7938 PL_pending_ident = 0;
7939
cd81e915 7940 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7941 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7942 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7943
7944 /* if we're in a my(), we can't allow dynamics here.
7945 $foo'bar has already been turned into $foo::bar, so
7946 just check for colons.
7947
7948 if it's a legal name, the OP is a PADANY.
7949 */
7950 if (PL_in_my) {
7951 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7952 if (has_colon)
8eceec63
SC
7953 yyerror(Perl_form(aTHX_ "No package name allowed for "
7954 "variable %s in \"our\"",
7955 PL_tokenbuf));
d6447115 7956 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7957 }
7958 else {
9bde8eb0 7959 if (has_colon)
952306ac
RGS
7960 yyerror(Perl_form(aTHX_ PL_no_myglob,
7961 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7962
6154021b 7963 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7964 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7965 return PRIVATEREF;
7966 }
7967 }
7968
7969 /*
7970 build the ops for accesses to a my() variable.
7971
7972 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7973 then used in a comparison. This catches most, but not
7974 all cases. For instance, it catches
7975 sort { my($a); $a <=> $b }
7976 but not
7977 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7978 (although why you'd do that is anyone's guess).
7979 */
7980
9bde8eb0 7981 if (!has_colon) {
8716503d 7982 if (!PL_in_my)
f8f98e0a 7983 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 7984 if (tmp != NOT_IN_PAD) {
8eceec63 7985 /* might be an "our" variable" */
00b1698f 7986 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 7987 /* build ops for a bareword */
b64e5050
AL
7988 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
7989 HEK * const stashname = HvNAME_HEK(stash);
7990 SV * const sym = newSVhek(stashname);
396482e1 7991 sv_catpvs(sym, "::");
9bde8eb0 7992 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
7993 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
7994 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 7995 gv_fetchsv(sym,
8eceec63
SC
7996 (PL_in_eval
7997 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 7998 : GV_ADDMULTI
8eceec63
SC
7999 ),
8000 ((PL_tokenbuf[0] == '$') ? SVt_PV
8001 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8002 : SVt_PVHV));
8003 return WORD;
8004 }
8005
8006 /* if it's a sort block and they're naming $a or $b */
8007 if (PL_last_lop_op == OP_SORT &&
8008 PL_tokenbuf[0] == '$' &&
8009 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8010 && !PL_tokenbuf[2])
8011 {
8012 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8013 d < PL_bufend && *d != '\n';
8014 d++)
8015 {
8016 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8017 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8018 PL_tokenbuf);
8019 }
8020 }
8021 }
8022
6154021b
RGS
8023 pl_yylval.opval = newOP(OP_PADANY, 0);
8024 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8025 return PRIVATEREF;
8026 }
8027 }
8028
8029 /*
8030 Whine if they've said @foo in a doublequoted string,
8031 and @foo isn't a variable we can find in the symbol
8032 table.
8033 */
d824713b
NC
8034 if (ckWARN(WARN_AMBIGUOUS) &&
8035 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8036 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8037 SVt_PVAV);
8eceec63 8038 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8039 /* DO NOT warn for @- and @+ */
8040 && !( PL_tokenbuf[2] == '\0' &&
8041 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8042 )
8eceec63
SC
8043 {
8044 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8045 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8046 "Possible unintended interpolation of %s in string",
8047 PL_tokenbuf);
8eceec63
SC
8048 }
8049 }
8050
8051 /* build ops for a bareword */
6154021b 8052 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8053 tokenbuf_len - 1));
6154021b 8054 pl_yylval.opval->op_private = OPpCONST_ENTERED;
9bde8eb0
NC
8055 gv_fetchpvn_flags(
8056 PL_tokenbuf + 1, tokenbuf_len - 1,
d6069db2
RGS
8057 /* If the identifier refers to a stash, don't autovivify it.
8058 * Change 24660 had the side effect of causing symbol table
8059 * hashes to always be defined, even if they were freshly
8060 * created and the only reference in the entire program was
8061 * the single statement with the defined %foo::bar:: test.
8062 * It appears that all code in the wild doing this actually
8063 * wants to know whether sub-packages have been loaded, so
8064 * by avoiding auto-vivifying symbol tables, we ensure that
8065 * defined %foo::bar:: continues to be false, and the existing
8066 * tests still give the expected answers, even though what
8067 * they're actually testing has now changed subtly.
8068 */
9bde8eb0
NC
8069 (*PL_tokenbuf == '%'
8070 && *(d = PL_tokenbuf + tokenbuf_len - 1) == ':'
8071 && d[-1] == ':'
d6069db2
RGS
8072 ? 0
8073 : PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD),
adc51b97
RGS
8074 ((PL_tokenbuf[0] == '$') ? SVt_PV
8075 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8076 : SVt_PVHV));
8eceec63
SC
8077 return WORD;
8078}
8079
4c3bbe0f
MHM
8080/*
8081 * The following code was generated by perl_keyword.pl.
8082 */
e2e1dd5a 8083
79072805 8084I32
5458a98a 8085Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8086{
952306ac 8087 dVAR;
7918f24d
NC
8088
8089 PERL_ARGS_ASSERT_KEYWORD;
8090
4c3bbe0f
MHM
8091 switch (len)
8092 {
8093 case 1: /* 5 tokens of length 1 */
8094 switch (name[0])
e2e1dd5a 8095 {
4c3bbe0f
MHM
8096 case 'm':
8097 { /* m */
8098 return KEY_m;
8099 }
8100
4c3bbe0f
MHM
8101 case 'q':
8102 { /* q */
8103 return KEY_q;
8104 }
8105
4c3bbe0f
MHM
8106 case 's':
8107 { /* s */
8108 return KEY_s;
8109 }
8110
4c3bbe0f
MHM
8111 case 'x':
8112 { /* x */
8113 return -KEY_x;
8114 }
8115
4c3bbe0f
MHM
8116 case 'y':
8117 { /* y */
8118 return KEY_y;
8119 }
8120
4c3bbe0f
MHM
8121 default:
8122 goto unknown;
e2e1dd5a 8123 }
4c3bbe0f
MHM
8124
8125 case 2: /* 18 tokens of length 2 */
8126 switch (name[0])
e2e1dd5a 8127 {
4c3bbe0f
MHM
8128 case 'd':
8129 if (name[1] == 'o')
8130 { /* do */
8131 return KEY_do;
8132 }
8133
8134 goto unknown;
8135
8136 case 'e':
8137 if (name[1] == 'q')
8138 { /* eq */
8139 return -KEY_eq;
8140 }
8141
8142 goto unknown;
8143
8144 case 'g':
8145 switch (name[1])
8146 {
8147 case 'e':
8148 { /* ge */
8149 return -KEY_ge;
8150 }
8151
4c3bbe0f
MHM
8152 case 't':
8153 { /* gt */
8154 return -KEY_gt;
8155 }
8156
4c3bbe0f
MHM
8157 default:
8158 goto unknown;
8159 }
8160
8161 case 'i':
8162 if (name[1] == 'f')
8163 { /* if */
8164 return KEY_if;
8165 }
8166
8167 goto unknown;
8168
8169 case 'l':
8170 switch (name[1])
8171 {
8172 case 'c':
8173 { /* lc */
8174 return -KEY_lc;
8175 }
8176
4c3bbe0f
MHM
8177 case 'e':
8178 { /* le */
8179 return -KEY_le;
8180 }
8181
4c3bbe0f
MHM
8182 case 't':
8183 { /* lt */
8184 return -KEY_lt;
8185 }
8186
4c3bbe0f
MHM
8187 default:
8188 goto unknown;
8189 }
8190
8191 case 'm':
8192 if (name[1] == 'y')
8193 { /* my */
8194 return KEY_my;
8195 }
8196
8197 goto unknown;
8198
8199 case 'n':
8200 switch (name[1])
8201 {
8202 case 'e':
8203 { /* ne */
8204 return -KEY_ne;
8205 }
8206
4c3bbe0f
MHM
8207 case 'o':
8208 { /* no */
8209 return KEY_no;
8210 }
8211
4c3bbe0f
MHM
8212 default:
8213 goto unknown;
8214 }
8215
8216 case 'o':
8217 if (name[1] == 'r')
8218 { /* or */
8219 return -KEY_or;
8220 }
8221
8222 goto unknown;
8223
8224 case 'q':
8225 switch (name[1])
8226 {
8227 case 'q':
8228 { /* qq */
8229 return KEY_qq;
8230 }
8231
4c3bbe0f
MHM
8232 case 'r':
8233 { /* qr */
8234 return KEY_qr;
8235 }
8236
4c3bbe0f
MHM
8237 case 'w':
8238 { /* qw */
8239 return KEY_qw;
8240 }
8241
4c3bbe0f
MHM
8242 case 'x':
8243 { /* qx */
8244 return KEY_qx;
8245 }
8246
4c3bbe0f
MHM
8247 default:
8248 goto unknown;
8249 }
8250
8251 case 't':
8252 if (name[1] == 'r')
8253 { /* tr */
8254 return KEY_tr;
8255 }
8256
8257 goto unknown;
8258
8259 case 'u':
8260 if (name[1] == 'c')
8261 { /* uc */
8262 return -KEY_uc;
8263 }
8264
8265 goto unknown;
8266
8267 default:
8268 goto unknown;
e2e1dd5a 8269 }
4c3bbe0f 8270
0d863452 8271 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8272 switch (name[0])
e2e1dd5a 8273 {
4c3bbe0f
MHM
8274 case 'E':
8275 if (name[1] == 'N' &&
8276 name[2] == 'D')
8277 { /* END */
8278 return KEY_END;
8279 }
8280
8281 goto unknown;
8282
8283 case 'a':
8284 switch (name[1])
8285 {
8286 case 'b':
8287 if (name[2] == 's')
8288 { /* abs */
8289 return -KEY_abs;
8290 }
8291
8292 goto unknown;
8293
8294 case 'n':
8295 if (name[2] == 'd')
8296 { /* and */
8297 return -KEY_and;
8298 }
8299
8300 goto unknown;
8301
8302 default:
8303 goto unknown;
8304 }
8305
8306 case 'c':
8307 switch (name[1])
8308 {
8309 case 'h':
8310 if (name[2] == 'r')
8311 { /* chr */
8312 return -KEY_chr;
8313 }
8314
8315 goto unknown;
8316
8317 case 'm':
8318 if (name[2] == 'p')
8319 { /* cmp */
8320 return -KEY_cmp;
8321 }
8322
8323 goto unknown;
8324
8325 case 'o':
8326 if (name[2] == 's')
8327 { /* cos */
8328 return -KEY_cos;
8329 }
8330
8331 goto unknown;
8332
8333 default:
8334 goto unknown;
8335 }
8336
8337 case 'd':
8338 if (name[1] == 'i' &&
8339 name[2] == 'e')
8340 { /* die */
8341 return -KEY_die;
8342 }
8343
8344 goto unknown;
8345
8346 case 'e':
8347 switch (name[1])
8348 {
8349 case 'o':
8350 if (name[2] == 'f')
8351 { /* eof */
8352 return -KEY_eof;
8353 }
8354
8355 goto unknown;
8356
4c3bbe0f
MHM
8357 case 'x':
8358 if (name[2] == 'p')
8359 { /* exp */
8360 return -KEY_exp;
8361 }
8362
8363 goto unknown;
8364
8365 default:
8366 goto unknown;
8367 }
8368
8369 case 'f':
8370 if (name[1] == 'o' &&
8371 name[2] == 'r')
8372 { /* for */
8373 return KEY_for;
8374 }
8375
8376 goto unknown;
8377
8378 case 'h':
8379 if (name[1] == 'e' &&
8380 name[2] == 'x')
8381 { /* hex */
8382 return -KEY_hex;
8383 }
8384
8385 goto unknown;
8386
8387 case 'i':
8388 if (name[1] == 'n' &&
8389 name[2] == 't')
8390 { /* int */
8391 return -KEY_int;
8392 }
8393
8394 goto unknown;
8395
8396 case 'l':
8397 if (name[1] == 'o' &&
8398 name[2] == 'g')
8399 { /* log */
8400 return -KEY_log;
8401 }
8402
8403 goto unknown;
8404
8405 case 'm':
8406 if (name[1] == 'a' &&
8407 name[2] == 'p')
8408 { /* map */
8409 return KEY_map;
8410 }
8411
8412 goto unknown;
8413
8414 case 'n':
8415 if (name[1] == 'o' &&
8416 name[2] == 't')
8417 { /* not */
8418 return -KEY_not;
8419 }
8420
8421 goto unknown;
8422
8423 case 'o':
8424 switch (name[1])
8425 {
8426 case 'c':
8427 if (name[2] == 't')
8428 { /* oct */
8429 return -KEY_oct;
8430 }
8431
8432 goto unknown;
8433
8434 case 'r':
8435 if (name[2] == 'd')
8436 { /* ord */
8437 return -KEY_ord;
8438 }
8439
8440 goto unknown;
8441
8442 case 'u':
8443 if (name[2] == 'r')
8444 { /* our */
8445 return KEY_our;
8446 }
8447
8448 goto unknown;
8449
8450 default:
8451 goto unknown;
8452 }
8453
8454 case 'p':
8455 if (name[1] == 'o')
8456 {
8457 switch (name[2])
8458 {
8459 case 'p':
8460 { /* pop */
8461 return -KEY_pop;
8462 }
8463
4c3bbe0f
MHM
8464 case 's':
8465 { /* pos */
8466 return KEY_pos;
8467 }
8468
4c3bbe0f
MHM
8469 default:
8470 goto unknown;
8471 }
8472 }
8473
8474 goto unknown;
8475
8476 case 'r':
8477 if (name[1] == 'e' &&
8478 name[2] == 'f')
8479 { /* ref */
8480 return -KEY_ref;
8481 }
8482
8483 goto unknown;
8484
8485 case 's':
8486 switch (name[1])
8487 {
0d863452
RH
8488 case 'a':
8489 if (name[2] == 'y')
8490 { /* say */
e3e804c9 8491 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8492 }
8493
8494 goto unknown;
8495
4c3bbe0f
MHM
8496 case 'i':
8497 if (name[2] == 'n')
8498 { /* sin */
8499 return -KEY_sin;
8500 }
8501
8502 goto unknown;
8503
8504 case 'u':
8505 if (name[2] == 'b')
8506 { /* sub */
8507 return KEY_sub;
8508 }
8509
8510 goto unknown;
8511
8512 default:
8513 goto unknown;
8514 }
8515
8516 case 't':
8517 if (name[1] == 'i' &&
8518 name[2] == 'e')
8519 { /* tie */
8520 return KEY_tie;
8521 }
8522
8523 goto unknown;
8524
8525 case 'u':
8526 if (name[1] == 's' &&
8527 name[2] == 'e')
8528 { /* use */
8529 return KEY_use;
8530 }
8531
8532 goto unknown;
8533
8534 case 'v':
8535 if (name[1] == 'e' &&
8536 name[2] == 'c')
8537 { /* vec */
8538 return -KEY_vec;
8539 }
8540
8541 goto unknown;
8542
8543 case 'x':
8544 if (name[1] == 'o' &&
8545 name[2] == 'r')
8546 { /* xor */
8547 return -KEY_xor;
8548 }
8549
8550 goto unknown;
8551
8552 default:
8553 goto unknown;
e2e1dd5a 8554 }
4c3bbe0f 8555
0d863452 8556 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8557 switch (name[0])
e2e1dd5a 8558 {
4c3bbe0f
MHM
8559 case 'C':
8560 if (name[1] == 'O' &&
8561 name[2] == 'R' &&
8562 name[3] == 'E')
8563 { /* CORE */
8564 return -KEY_CORE;
8565 }
8566
8567 goto unknown;
8568
8569 case 'I':
8570 if (name[1] == 'N' &&
8571 name[2] == 'I' &&
8572 name[3] == 'T')
8573 { /* INIT */
8574 return KEY_INIT;
8575 }
8576
8577 goto unknown;
8578
8579 case 'b':
8580 if (name[1] == 'i' &&
8581 name[2] == 'n' &&
8582 name[3] == 'd')
8583 { /* bind */
8584 return -KEY_bind;
8585 }
8586
8587 goto unknown;
8588
8589 case 'c':
8590 if (name[1] == 'h' &&
8591 name[2] == 'o' &&
8592 name[3] == 'p')
8593 { /* chop */
8594 return -KEY_chop;
8595 }
8596
8597 goto unknown;
8598
8599 case 'd':
8600 if (name[1] == 'u' &&
8601 name[2] == 'm' &&
8602 name[3] == 'p')
8603 { /* dump */
8604 return -KEY_dump;
8605 }
8606
8607 goto unknown;
8608
8609 case 'e':
8610 switch (name[1])
8611 {
8612 case 'a':
8613 if (name[2] == 'c' &&
8614 name[3] == 'h')
8615 { /* each */
8616 return -KEY_each;
8617 }
8618
8619 goto unknown;
8620
8621 case 'l':
8622 if (name[2] == 's' &&
8623 name[3] == 'e')
8624 { /* else */
8625 return KEY_else;
8626 }
8627
8628 goto unknown;
8629
8630 case 'v':
8631 if (name[2] == 'a' &&
8632 name[3] == 'l')
8633 { /* eval */
8634 return KEY_eval;
8635 }
8636
8637 goto unknown;
8638
8639 case 'x':
8640 switch (name[2])
8641 {
8642 case 'e':
8643 if (name[3] == 'c')
8644 { /* exec */
8645 return -KEY_exec;
8646 }
8647
8648 goto unknown;
8649
8650 case 'i':
8651 if (name[3] == 't')
8652 { /* exit */
8653 return -KEY_exit;
8654 }
8655
8656 goto unknown;
8657
8658 default:
8659 goto unknown;
8660 }
8661
8662 default:
8663 goto unknown;
8664 }
8665
8666 case 'f':
8667 if (name[1] == 'o' &&
8668 name[2] == 'r' &&
8669 name[3] == 'k')
8670 { /* fork */
8671 return -KEY_fork;
8672 }
8673
8674 goto unknown;
8675
8676 case 'g':
8677 switch (name[1])
8678 {
8679 case 'e':
8680 if (name[2] == 't' &&
8681 name[3] == 'c')
8682 { /* getc */
8683 return -KEY_getc;
8684 }
8685
8686 goto unknown;
8687
8688 case 'l':
8689 if (name[2] == 'o' &&
8690 name[3] == 'b')
8691 { /* glob */
8692 return KEY_glob;
8693 }
8694
8695 goto unknown;
8696
8697 case 'o':
8698 if (name[2] == 't' &&
8699 name[3] == 'o')
8700 { /* goto */
8701 return KEY_goto;
8702 }
8703
8704 goto unknown;
8705
8706 case 'r':
8707 if (name[2] == 'e' &&
8708 name[3] == 'p')
8709 { /* grep */
8710 return KEY_grep;
8711 }
8712
8713 goto unknown;
8714
8715 default:
8716 goto unknown;
8717 }
8718
8719 case 'j':
8720 if (name[1] == 'o' &&
8721 name[2] == 'i' &&
8722 name[3] == 'n')
8723 { /* join */
8724 return -KEY_join;
8725 }
8726
8727 goto unknown;
8728
8729 case 'k':
8730 switch (name[1])
8731 {
8732 case 'e':
8733 if (name[2] == 'y' &&
8734 name[3] == 's')
8735 { /* keys */
8736 return -KEY_keys;
8737 }
8738
8739 goto unknown;
8740
8741 case 'i':
8742 if (name[2] == 'l' &&
8743 name[3] == 'l')
8744 { /* kill */
8745 return -KEY_kill;
8746 }
8747
8748 goto unknown;
8749
8750 default:
8751 goto unknown;
8752 }
8753
8754 case 'l':
8755 switch (name[1])
8756 {
8757 case 'a':
8758 if (name[2] == 's' &&
8759 name[3] == 't')
8760 { /* last */
8761 return KEY_last;
8762 }
8763
8764 goto unknown;
8765
8766 case 'i':
8767 if (name[2] == 'n' &&
8768 name[3] == 'k')
8769 { /* link */
8770 return -KEY_link;
8771 }
8772
8773 goto unknown;
8774
8775 case 'o':
8776 if (name[2] == 'c' &&
8777 name[3] == 'k')
8778 { /* lock */
8779 return -KEY_lock;
8780 }
8781
8782 goto unknown;
8783
8784 default:
8785 goto unknown;
8786 }
8787
8788 case 'n':
8789 if (name[1] == 'e' &&
8790 name[2] == 'x' &&
8791 name[3] == 't')
8792 { /* next */
8793 return KEY_next;
8794 }
8795
8796 goto unknown;
8797
8798 case 'o':
8799 if (name[1] == 'p' &&
8800 name[2] == 'e' &&
8801 name[3] == 'n')
8802 { /* open */
8803 return -KEY_open;
8804 }
8805
8806 goto unknown;
8807
8808 case 'p':
8809 switch (name[1])
8810 {
8811 case 'a':
8812 if (name[2] == 'c' &&
8813 name[3] == 'k')
8814 { /* pack */
8815 return -KEY_pack;
8816 }
8817
8818 goto unknown;
8819
8820 case 'i':
8821 if (name[2] == 'p' &&
8822 name[3] == 'e')
8823 { /* pipe */
8824 return -KEY_pipe;
8825 }
8826
8827 goto unknown;
8828
8829 case 'u':
8830 if (name[2] == 's' &&
8831 name[3] == 'h')
8832 { /* push */
8833 return -KEY_push;
8834 }
8835
8836 goto unknown;
8837
8838 default:
8839 goto unknown;
8840 }
8841
8842 case 'r':
8843 switch (name[1])
8844 {
8845 case 'a':
8846 if (name[2] == 'n' &&
8847 name[3] == 'd')
8848 { /* rand */
8849 return -KEY_rand;
8850 }
8851
8852 goto unknown;
8853
8854 case 'e':
8855 switch (name[2])
8856 {
8857 case 'a':
8858 if (name[3] == 'd')
8859 { /* read */
8860 return -KEY_read;
8861 }
8862
8863 goto unknown;
8864
8865 case 'c':
8866 if (name[3] == 'v')
8867 { /* recv */
8868 return -KEY_recv;
8869 }
8870
8871 goto unknown;
8872
8873 case 'd':
8874 if (name[3] == 'o')
8875 { /* redo */
8876 return KEY_redo;
8877 }
8878
8879 goto unknown;
8880
8881 default:
8882 goto unknown;
8883 }
8884
8885 default:
8886 goto unknown;
8887 }
8888
8889 case 's':
8890 switch (name[1])
8891 {
8892 case 'e':
8893 switch (name[2])
8894 {
8895 case 'e':
8896 if (name[3] == 'k')
8897 { /* seek */
8898 return -KEY_seek;
8899 }
8900
8901 goto unknown;
8902
8903 case 'n':
8904 if (name[3] == 'd')
8905 { /* send */
8906 return -KEY_send;
8907 }
8908
8909 goto unknown;
8910
8911 default:
8912 goto unknown;
8913 }
8914
8915 case 'o':
8916 if (name[2] == 'r' &&
8917 name[3] == 't')
8918 { /* sort */
8919 return KEY_sort;
8920 }
8921
8922 goto unknown;
8923
8924 case 'q':
8925 if (name[2] == 'r' &&
8926 name[3] == 't')
8927 { /* sqrt */
8928 return -KEY_sqrt;
8929 }
8930
8931 goto unknown;
8932
8933 case 't':
8934 if (name[2] == 'a' &&
8935 name[3] == 't')
8936 { /* stat */
8937 return -KEY_stat;
8938 }
8939
8940 goto unknown;
8941
8942 default:
8943 goto unknown;
8944 }
8945
8946 case 't':
8947 switch (name[1])
8948 {
8949 case 'e':
8950 if (name[2] == 'l' &&
8951 name[3] == 'l')
8952 { /* tell */
8953 return -KEY_tell;
8954 }
8955
8956 goto unknown;
8957
8958 case 'i':
8959 switch (name[2])
8960 {
8961 case 'e':
8962 if (name[3] == 'd')
8963 { /* tied */
8964 return KEY_tied;
8965 }
8966
8967 goto unknown;
8968
8969 case 'm':
8970 if (name[3] == 'e')
8971 { /* time */
8972 return -KEY_time;
8973 }
8974
8975 goto unknown;
8976
8977 default:
8978 goto unknown;
8979 }
8980
8981 default:
8982 goto unknown;
8983 }
8984
8985 case 'w':
0d863452 8986 switch (name[1])
4c3bbe0f 8987 {
0d863452 8988 case 'a':
952306ac
RGS
8989 switch (name[2])
8990 {
8991 case 'i':
8992 if (name[3] == 't')
8993 { /* wait */
8994 return -KEY_wait;
8995 }
4c3bbe0f 8996
952306ac 8997 goto unknown;
4c3bbe0f 8998
952306ac
RGS
8999 case 'r':
9000 if (name[3] == 'n')
9001 { /* warn */
9002 return -KEY_warn;
9003 }
4c3bbe0f 9004
952306ac 9005 goto unknown;
4c3bbe0f 9006
952306ac
RGS
9007 default:
9008 goto unknown;
9009 }
0d863452
RH
9010
9011 case 'h':
9012 if (name[2] == 'e' &&
9013 name[3] == 'n')
9014 { /* when */
5458a98a 9015 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9016 }
4c3bbe0f 9017
952306ac 9018 goto unknown;
4c3bbe0f 9019
952306ac
RGS
9020 default:
9021 goto unknown;
9022 }
4c3bbe0f 9023
0d863452
RH
9024 default:
9025 goto unknown;
9026 }
9027
952306ac 9028 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9029 switch (name[0])
e2e1dd5a 9030 {
4c3bbe0f
MHM
9031 case 'B':
9032 if (name[1] == 'E' &&
9033 name[2] == 'G' &&
9034 name[3] == 'I' &&
9035 name[4] == 'N')
9036 { /* BEGIN */
9037 return KEY_BEGIN;
9038 }
9039
9040 goto unknown;
9041
9042 case 'C':
9043 if (name[1] == 'H' &&
9044 name[2] == 'E' &&
9045 name[3] == 'C' &&
9046 name[4] == 'K')
9047 { /* CHECK */
9048 return KEY_CHECK;
9049 }
9050
9051 goto unknown;
9052
9053 case 'a':
9054 switch (name[1])
9055 {
9056 case 'l':
9057 if (name[2] == 'a' &&
9058 name[3] == 'r' &&
9059 name[4] == 'm')
9060 { /* alarm */
9061 return -KEY_alarm;
9062 }
9063
9064 goto unknown;
9065
9066 case 't':
9067 if (name[2] == 'a' &&
9068 name[3] == 'n' &&
9069 name[4] == '2')
9070 { /* atan2 */
9071 return -KEY_atan2;
9072 }
9073
9074 goto unknown;
9075
9076 default:
9077 goto unknown;
9078 }
9079
9080 case 'b':
0d863452
RH
9081 switch (name[1])
9082 {
9083 case 'l':
9084 if (name[2] == 'e' &&
952306ac
RGS
9085 name[3] == 's' &&
9086 name[4] == 's')
9087 { /* bless */
9088 return -KEY_bless;
9089 }
4c3bbe0f 9090
952306ac 9091 goto unknown;
4c3bbe0f 9092
0d863452
RH
9093 case 'r':
9094 if (name[2] == 'e' &&
9095 name[3] == 'a' &&
9096 name[4] == 'k')
9097 { /* break */
5458a98a 9098 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9099 }
9100
9101 goto unknown;
9102
9103 default:
9104 goto unknown;
9105 }
9106
4c3bbe0f
MHM
9107 case 'c':
9108 switch (name[1])
9109 {
9110 case 'h':
9111 switch (name[2])
9112 {
9113 case 'd':
9114 if (name[3] == 'i' &&
9115 name[4] == 'r')
9116 { /* chdir */
9117 return -KEY_chdir;
9118 }
9119
9120 goto unknown;
9121
9122 case 'm':
9123 if (name[3] == 'o' &&
9124 name[4] == 'd')
9125 { /* chmod */
9126 return -KEY_chmod;
9127 }
9128
9129 goto unknown;
9130
9131 case 'o':
9132 switch (name[3])
9133 {
9134 case 'm':
9135 if (name[4] == 'p')
9136 { /* chomp */
9137 return -KEY_chomp;
9138 }
9139
9140 goto unknown;
9141
9142 case 'w':
9143 if (name[4] == 'n')
9144 { /* chown */
9145 return -KEY_chown;
9146 }
9147
9148 goto unknown;
9149
9150 default:
9151 goto unknown;
9152 }
9153
9154 default:
9155 goto unknown;
9156 }
9157
9158 case 'l':
9159 if (name[2] == 'o' &&
9160 name[3] == 's' &&
9161 name[4] == 'e')
9162 { /* close */
9163 return -KEY_close;
9164 }
9165
9166 goto unknown;
9167
9168 case 'r':
9169 if (name[2] == 'y' &&
9170 name[3] == 'p' &&
9171 name[4] == 't')
9172 { /* crypt */
9173 return -KEY_crypt;
9174 }
9175
9176 goto unknown;
9177
9178 default:
9179 goto unknown;
9180 }
9181
9182 case 'e':
9183 if (name[1] == 'l' &&
9184 name[2] == 's' &&
9185 name[3] == 'i' &&
9186 name[4] == 'f')
9187 { /* elsif */
9188 return KEY_elsif;
9189 }
9190
9191 goto unknown;
9192
9193 case 'f':
9194 switch (name[1])
9195 {
9196 case 'c':
9197 if (name[2] == 'n' &&
9198 name[3] == 't' &&
9199 name[4] == 'l')
9200 { /* fcntl */
9201 return -KEY_fcntl;
9202 }
9203
9204 goto unknown;
9205
9206 case 'l':
9207 if (name[2] == 'o' &&
9208 name[3] == 'c' &&
9209 name[4] == 'k')
9210 { /* flock */
9211 return -KEY_flock;
9212 }
9213
9214 goto unknown;
9215
9216 default:
9217 goto unknown;
9218 }
9219
0d863452
RH
9220 case 'g':
9221 if (name[1] == 'i' &&
9222 name[2] == 'v' &&
9223 name[3] == 'e' &&
9224 name[4] == 'n')
9225 { /* given */
5458a98a 9226 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9227 }
9228
9229 goto unknown;
9230
4c3bbe0f
MHM
9231 case 'i':
9232 switch (name[1])
9233 {
9234 case 'n':
9235 if (name[2] == 'd' &&
9236 name[3] == 'e' &&
9237 name[4] == 'x')
9238 { /* index */
9239 return -KEY_index;
9240 }
9241
9242 goto unknown;
9243
9244 case 'o':
9245 if (name[2] == 'c' &&
9246 name[3] == 't' &&
9247 name[4] == 'l')
9248 { /* ioctl */
9249 return -KEY_ioctl;
9250 }
9251
9252 goto unknown;
9253
9254 default:
9255 goto unknown;
9256 }
9257
9258 case 'l':
9259 switch (name[1])
9260 {
9261 case 'o':
9262 if (name[2] == 'c' &&
9263 name[3] == 'a' &&
9264 name[4] == 'l')
9265 { /* local */
9266 return KEY_local;
9267 }
9268
9269 goto unknown;
9270
9271 case 's':
9272 if (name[2] == 't' &&
9273 name[3] == 'a' &&
9274 name[4] == 't')
9275 { /* lstat */
9276 return -KEY_lstat;
9277 }
9278
9279 goto unknown;
9280
9281 default:
9282 goto unknown;
9283 }
9284
9285 case 'm':
9286 if (name[1] == 'k' &&
9287 name[2] == 'd' &&
9288 name[3] == 'i' &&
9289 name[4] == 'r')
9290 { /* mkdir */
9291 return -KEY_mkdir;
9292 }
9293
9294 goto unknown;
9295
9296 case 'p':
9297 if (name[1] == 'r' &&
9298 name[2] == 'i' &&
9299 name[3] == 'n' &&
9300 name[4] == 't')
9301 { /* print */
9302 return KEY_print;
9303 }
9304
9305 goto unknown;
9306
9307 case 'r':
9308 switch (name[1])
9309 {
9310 case 'e':
9311 if (name[2] == 's' &&
9312 name[3] == 'e' &&
9313 name[4] == 't')
9314 { /* reset */
9315 return -KEY_reset;
9316 }
9317
9318 goto unknown;
9319
9320 case 'm':
9321 if (name[2] == 'd' &&
9322 name[3] == 'i' &&
9323 name[4] == 'r')
9324 { /* rmdir */
9325 return -KEY_rmdir;
9326 }
9327
9328 goto unknown;
9329
9330 default:
9331 goto unknown;
9332 }
9333
9334 case 's':
9335 switch (name[1])
9336 {
9337 case 'e':
9338 if (name[2] == 'm' &&
9339 name[3] == 'o' &&
9340 name[4] == 'p')
9341 { /* semop */
9342 return -KEY_semop;
9343 }
9344
9345 goto unknown;
9346
9347 case 'h':
9348 if (name[2] == 'i' &&
9349 name[3] == 'f' &&
9350 name[4] == 't')
9351 { /* shift */
9352 return -KEY_shift;
9353 }
9354
9355 goto unknown;
9356
9357 case 'l':
9358 if (name[2] == 'e' &&
9359 name[3] == 'e' &&
9360 name[4] == 'p')
9361 { /* sleep */
9362 return -KEY_sleep;
9363 }
9364
9365 goto unknown;
9366
9367 case 'p':
9368 if (name[2] == 'l' &&
9369 name[3] == 'i' &&
9370 name[4] == 't')
9371 { /* split */
9372 return KEY_split;
9373 }
9374
9375 goto unknown;
9376
9377 case 'r':
9378 if (name[2] == 'a' &&
9379 name[3] == 'n' &&
9380 name[4] == 'd')
9381 { /* srand */
9382 return -KEY_srand;
9383 }
9384
9385 goto unknown;
9386
9387 case 't':
952306ac
RGS
9388 switch (name[2])
9389 {
9390 case 'a':
9391 if (name[3] == 't' &&
9392 name[4] == 'e')
9393 { /* state */
5458a98a 9394 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9395 }
4c3bbe0f 9396
952306ac
RGS
9397 goto unknown;
9398
9399 case 'u':
9400 if (name[3] == 'd' &&
9401 name[4] == 'y')
9402 { /* study */
9403 return KEY_study;
9404 }
9405
9406 goto unknown;
9407
9408 default:
9409 goto unknown;
9410 }
4c3bbe0f
MHM
9411
9412 default:
9413 goto unknown;
9414 }
9415
9416 case 't':
9417 if (name[1] == 'i' &&
9418 name[2] == 'm' &&
9419 name[3] == 'e' &&
9420 name[4] == 's')
9421 { /* times */
9422 return -KEY_times;
9423 }
9424
9425 goto unknown;
9426
9427 case 'u':
9428 switch (name[1])
9429 {
9430 case 'm':
9431 if (name[2] == 'a' &&
9432 name[3] == 's' &&
9433 name[4] == 'k')
9434 { /* umask */
9435 return -KEY_umask;
9436 }
9437
9438 goto unknown;
9439
9440 case 'n':
9441 switch (name[2])
9442 {
9443 case 'd':
9444 if (name[3] == 'e' &&
9445 name[4] == 'f')
9446 { /* undef */
9447 return KEY_undef;
9448 }
9449
9450 goto unknown;
9451
9452 case 't':
9453 if (name[3] == 'i')
9454 {
9455 switch (name[4])
9456 {
9457 case 'e':
9458 { /* untie */
9459 return KEY_untie;
9460 }
9461
4c3bbe0f
MHM
9462 case 'l':
9463 { /* until */
9464 return KEY_until;
9465 }
9466
4c3bbe0f
MHM
9467 default:
9468 goto unknown;
9469 }
9470 }
9471
9472 goto unknown;
9473
9474 default:
9475 goto unknown;
9476 }
9477
9478 case 't':
9479 if (name[2] == 'i' &&
9480 name[3] == 'm' &&
9481 name[4] == 'e')
9482 { /* utime */
9483 return -KEY_utime;
9484 }
9485
9486 goto unknown;
9487
9488 default:
9489 goto unknown;
9490 }
9491
9492 case 'w':
9493 switch (name[1])
9494 {
9495 case 'h':
9496 if (name[2] == 'i' &&
9497 name[3] == 'l' &&
9498 name[4] == 'e')
9499 { /* while */
9500 return KEY_while;
9501 }
9502
9503 goto unknown;
9504
9505 case 'r':
9506 if (name[2] == 'i' &&
9507 name[3] == 't' &&
9508 name[4] == 'e')
9509 { /* write */
9510 return -KEY_write;
9511 }
9512
9513 goto unknown;
9514
9515 default:
9516 goto unknown;
9517 }
9518
9519 default:
9520 goto unknown;
e2e1dd5a 9521 }
4c3bbe0f
MHM
9522
9523 case 6: /* 33 tokens of length 6 */
9524 switch (name[0])
9525 {
9526 case 'a':
9527 if (name[1] == 'c' &&
9528 name[2] == 'c' &&
9529 name[3] == 'e' &&
9530 name[4] == 'p' &&
9531 name[5] == 't')
9532 { /* accept */
9533 return -KEY_accept;
9534 }
9535
9536 goto unknown;
9537
9538 case 'c':
9539 switch (name[1])
9540 {
9541 case 'a':
9542 if (name[2] == 'l' &&
9543 name[3] == 'l' &&
9544 name[4] == 'e' &&
9545 name[5] == 'r')
9546 { /* caller */
9547 return -KEY_caller;
9548 }
9549
9550 goto unknown;
9551
9552 case 'h':
9553 if (name[2] == 'r' &&
9554 name[3] == 'o' &&
9555 name[4] == 'o' &&
9556 name[5] == 't')
9557 { /* chroot */
9558 return -KEY_chroot;
9559 }
9560
9561 goto unknown;
9562
9563 default:
9564 goto unknown;
9565 }
9566
9567 case 'd':
9568 if (name[1] == 'e' &&
9569 name[2] == 'l' &&
9570 name[3] == 'e' &&
9571 name[4] == 't' &&
9572 name[5] == 'e')
9573 { /* delete */
9574 return KEY_delete;
9575 }
9576
9577 goto unknown;
9578
9579 case 'e':
9580 switch (name[1])
9581 {
9582 case 'l':
9583 if (name[2] == 's' &&
9584 name[3] == 'e' &&
9585 name[4] == 'i' &&
9586 name[5] == 'f')
9587 { /* elseif */
9b387841 9588 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9589 }
9590
9591 goto unknown;
9592
9593 case 'x':
9594 if (name[2] == 'i' &&
9595 name[3] == 's' &&
9596 name[4] == 't' &&
9597 name[5] == 's')
9598 { /* exists */
9599 return KEY_exists;
9600 }
9601
9602 goto unknown;
9603
9604 default:
9605 goto unknown;
9606 }
9607
9608 case 'f':
9609 switch (name[1])
9610 {
9611 case 'i':
9612 if (name[2] == 'l' &&
9613 name[3] == 'e' &&
9614 name[4] == 'n' &&
9615 name[5] == 'o')
9616 { /* fileno */
9617 return -KEY_fileno;
9618 }
9619
9620 goto unknown;
9621
9622 case 'o':
9623 if (name[2] == 'r' &&
9624 name[3] == 'm' &&
9625 name[4] == 'a' &&
9626 name[5] == 't')
9627 { /* format */
9628 return KEY_format;
9629 }
9630
9631 goto unknown;
9632
9633 default:
9634 goto unknown;
9635 }
9636
9637 case 'g':
9638 if (name[1] == 'm' &&
9639 name[2] == 't' &&
9640 name[3] == 'i' &&
9641 name[4] == 'm' &&
9642 name[5] == 'e')
9643 { /* gmtime */
9644 return -KEY_gmtime;
9645 }
9646
9647 goto unknown;
9648
9649 case 'l':
9650 switch (name[1])
9651 {
9652 case 'e':
9653 if (name[2] == 'n' &&
9654 name[3] == 'g' &&
9655 name[4] == 't' &&
9656 name[5] == 'h')
9657 { /* length */
9658 return -KEY_length;
9659 }
9660
9661 goto unknown;
9662
9663 case 'i':
9664 if (name[2] == 's' &&
9665 name[3] == 't' &&
9666 name[4] == 'e' &&
9667 name[5] == 'n')
9668 { /* listen */
9669 return -KEY_listen;
9670 }
9671
9672 goto unknown;
9673
9674 default:
9675 goto unknown;
9676 }
9677
9678 case 'm':
9679 if (name[1] == 's' &&
9680 name[2] == 'g')
9681 {
9682 switch (name[3])
9683 {
9684 case 'c':
9685 if (name[4] == 't' &&
9686 name[5] == 'l')
9687 { /* msgctl */
9688 return -KEY_msgctl;
9689 }
9690
9691 goto unknown;
9692
9693 case 'g':
9694 if (name[4] == 'e' &&
9695 name[5] == 't')
9696 { /* msgget */
9697 return -KEY_msgget;
9698 }
9699
9700 goto unknown;
9701
9702 case 'r':
9703 if (name[4] == 'c' &&
9704 name[5] == 'v')
9705 { /* msgrcv */
9706 return -KEY_msgrcv;
9707 }
9708
9709 goto unknown;
9710
9711 case 's':
9712 if (name[4] == 'n' &&
9713 name[5] == 'd')
9714 { /* msgsnd */
9715 return -KEY_msgsnd;
9716 }
9717
9718 goto unknown;
9719
9720 default:
9721 goto unknown;
9722 }
9723 }
9724
9725 goto unknown;
9726
9727 case 'p':
9728 if (name[1] == 'r' &&
9729 name[2] == 'i' &&
9730 name[3] == 'n' &&
9731 name[4] == 't' &&
9732 name[5] == 'f')
9733 { /* printf */
9734 return KEY_printf;
9735 }
9736
9737 goto unknown;
9738
9739 case 'r':
9740 switch (name[1])
9741 {
9742 case 'e':
9743 switch (name[2])
9744 {
9745 case 'n':
9746 if (name[3] == 'a' &&
9747 name[4] == 'm' &&
9748 name[5] == 'e')
9749 { /* rename */
9750 return -KEY_rename;
9751 }
9752
9753 goto unknown;
9754
9755 case 't':
9756 if (name[3] == 'u' &&
9757 name[4] == 'r' &&
9758 name[5] == 'n')
9759 { /* return */
9760 return KEY_return;
9761 }
9762
9763 goto unknown;
9764
9765 default:
9766 goto unknown;
9767 }
9768
9769 case 'i':
9770 if (name[2] == 'n' &&
9771 name[3] == 'd' &&
9772 name[4] == 'e' &&
9773 name[5] == 'x')
9774 { /* rindex */
9775 return -KEY_rindex;
9776 }
9777
9778 goto unknown;
9779
9780 default:
9781 goto unknown;
9782 }
9783
9784 case 's':
9785 switch (name[1])
9786 {
9787 case 'c':
9788 if (name[2] == 'a' &&
9789 name[3] == 'l' &&
9790 name[4] == 'a' &&
9791 name[5] == 'r')
9792 { /* scalar */
9793 return KEY_scalar;
9794 }
9795
9796 goto unknown;
9797
9798 case 'e':
9799 switch (name[2])
9800 {
9801 case 'l':
9802 if (name[3] == 'e' &&
9803 name[4] == 'c' &&
9804 name[5] == 't')
9805 { /* select */
9806 return -KEY_select;
9807 }
9808
9809 goto unknown;
9810
9811 case 'm':
9812 switch (name[3])
9813 {
9814 case 'c':
9815 if (name[4] == 't' &&
9816 name[5] == 'l')
9817 { /* semctl */
9818 return -KEY_semctl;
9819 }
9820
9821 goto unknown;
9822
9823 case 'g':
9824 if (name[4] == 'e' &&
9825 name[5] == 't')
9826 { /* semget */
9827 return -KEY_semget;
9828 }
9829
9830 goto unknown;
9831
9832 default:
9833 goto unknown;
9834 }
9835
9836 default:
9837 goto unknown;
9838 }
9839
9840 case 'h':
9841 if (name[2] == 'm')
9842 {
9843 switch (name[3])
9844 {
9845 case 'c':
9846 if (name[4] == 't' &&
9847 name[5] == 'l')
9848 { /* shmctl */
9849 return -KEY_shmctl;
9850 }
9851
9852 goto unknown;
9853
9854 case 'g':
9855 if (name[4] == 'e' &&
9856 name[5] == 't')
9857 { /* shmget */
9858 return -KEY_shmget;
9859 }
9860
9861 goto unknown;
9862
9863 default:
9864 goto unknown;
9865 }
9866 }
9867
9868 goto unknown;
9869
9870 case 'o':
9871 if (name[2] == 'c' &&
9872 name[3] == 'k' &&
9873 name[4] == 'e' &&
9874 name[5] == 't')
9875 { /* socket */
9876 return -KEY_socket;
9877 }
9878
9879 goto unknown;
9880
9881 case 'p':
9882 if (name[2] == 'l' &&
9883 name[3] == 'i' &&
9884 name[4] == 'c' &&
9885 name[5] == 'e')
9886 { /* splice */
9887 return -KEY_splice;
9888 }
9889
9890 goto unknown;
9891
9892 case 'u':
9893 if (name[2] == 'b' &&
9894 name[3] == 's' &&
9895 name[4] == 't' &&
9896 name[5] == 'r')
9897 { /* substr */
9898 return -KEY_substr;
9899 }
9900
9901 goto unknown;
9902
9903 case 'y':
9904 if (name[2] == 's' &&
9905 name[3] == 't' &&
9906 name[4] == 'e' &&
9907 name[5] == 'm')
9908 { /* system */
9909 return -KEY_system;
9910 }
9911
9912 goto unknown;
9913
9914 default:
9915 goto unknown;
9916 }
9917
9918 case 'u':
9919 if (name[1] == 'n')
9920 {
9921 switch (name[2])
9922 {
9923 case 'l':
9924 switch (name[3])
9925 {
9926 case 'e':
9927 if (name[4] == 's' &&
9928 name[5] == 's')
9929 { /* unless */
9930 return KEY_unless;
9931 }
9932
9933 goto unknown;
9934
9935 case 'i':
9936 if (name[4] == 'n' &&
9937 name[5] == 'k')
9938 { /* unlink */
9939 return -KEY_unlink;
9940 }
9941
9942 goto unknown;
9943
9944 default:
9945 goto unknown;
9946 }
9947
9948 case 'p':
9949 if (name[3] == 'a' &&
9950 name[4] == 'c' &&
9951 name[5] == 'k')
9952 { /* unpack */
9953 return -KEY_unpack;
9954 }
9955
9956 goto unknown;
9957
9958 default:
9959 goto unknown;
9960 }
9961 }
9962
9963 goto unknown;
9964
9965 case 'v':
9966 if (name[1] == 'a' &&
9967 name[2] == 'l' &&
9968 name[3] == 'u' &&
9969 name[4] == 'e' &&
9970 name[5] == 's')
9971 { /* values */
9972 return -KEY_values;
9973 }
9974
9975 goto unknown;
9976
9977 default:
9978 goto unknown;
e2e1dd5a 9979 }
4c3bbe0f 9980
0d863452 9981 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9982 switch (name[0])
9983 {
9984 case 'D':
9985 if (name[1] == 'E' &&
9986 name[2] == 'S' &&
9987 name[3] == 'T' &&
9988 name[4] == 'R' &&
9989 name[5] == 'O' &&
9990 name[6] == 'Y')
9991 { /* DESTROY */
9992 return KEY_DESTROY;
9993 }
9994
9995 goto unknown;
9996
9997 case '_':
9998 if (name[1] == '_' &&
9999 name[2] == 'E' &&
10000 name[3] == 'N' &&
10001 name[4] == 'D' &&
10002 name[5] == '_' &&
10003 name[6] == '_')
10004 { /* __END__ */
10005 return KEY___END__;
10006 }
10007
10008 goto unknown;
10009
10010 case 'b':
10011 if (name[1] == 'i' &&
10012 name[2] == 'n' &&
10013 name[3] == 'm' &&
10014 name[4] == 'o' &&
10015 name[5] == 'd' &&
10016 name[6] == 'e')
10017 { /* binmode */
10018 return -KEY_binmode;
10019 }
10020
10021 goto unknown;
10022
10023 case 'c':
10024 if (name[1] == 'o' &&
10025 name[2] == 'n' &&
10026 name[3] == 'n' &&
10027 name[4] == 'e' &&
10028 name[5] == 'c' &&
10029 name[6] == 't')
10030 { /* connect */
10031 return -KEY_connect;
10032 }
10033
10034 goto unknown;
10035
10036 case 'd':
10037 switch (name[1])
10038 {
10039 case 'b':
10040 if (name[2] == 'm' &&
10041 name[3] == 'o' &&
10042 name[4] == 'p' &&
10043 name[5] == 'e' &&
10044 name[6] == 'n')
10045 { /* dbmopen */
10046 return -KEY_dbmopen;
10047 }
10048
10049 goto unknown;
10050
10051 case 'e':
0d863452
RH
10052 if (name[2] == 'f')
10053 {
10054 switch (name[3])
10055 {
10056 case 'a':
10057 if (name[4] == 'u' &&
10058 name[5] == 'l' &&
10059 name[6] == 't')
10060 { /* default */
5458a98a 10061 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10062 }
10063
10064 goto unknown;
10065
10066 case 'i':
10067 if (name[4] == 'n' &&
952306ac
RGS
10068 name[5] == 'e' &&
10069 name[6] == 'd')
10070 { /* defined */
10071 return KEY_defined;
10072 }
4c3bbe0f 10073
952306ac 10074 goto unknown;
4c3bbe0f 10075
952306ac
RGS
10076 default:
10077 goto unknown;
10078 }
0d863452
RH
10079 }
10080
10081 goto unknown;
10082
10083 default:
10084 goto unknown;
10085 }
4c3bbe0f
MHM
10086
10087 case 'f':
10088 if (name[1] == 'o' &&
10089 name[2] == 'r' &&
10090 name[3] == 'e' &&
10091 name[4] == 'a' &&
10092 name[5] == 'c' &&
10093 name[6] == 'h')
10094 { /* foreach */
10095 return KEY_foreach;
10096 }
10097
10098 goto unknown;
10099
10100 case 'g':
10101 if (name[1] == 'e' &&
10102 name[2] == 't' &&
10103 name[3] == 'p')
10104 {
10105 switch (name[4])
10106 {
10107 case 'g':
10108 if (name[5] == 'r' &&
10109 name[6] == 'p')
10110 { /* getpgrp */
10111 return -KEY_getpgrp;
10112 }
10113
10114 goto unknown;
10115
10116 case 'p':
10117 if (name[5] == 'i' &&
10118 name[6] == 'd')
10119 { /* getppid */
10120 return -KEY_getppid;
10121 }
10122
10123 goto unknown;
10124
10125 default:
10126 goto unknown;
10127 }
10128 }
10129
10130 goto unknown;
10131
10132 case 'l':
10133 if (name[1] == 'c' &&
10134 name[2] == 'f' &&
10135 name[3] == 'i' &&
10136 name[4] == 'r' &&
10137 name[5] == 's' &&
10138 name[6] == 't')
10139 { /* lcfirst */
10140 return -KEY_lcfirst;
10141 }
10142
10143 goto unknown;
10144
10145 case 'o':
10146 if (name[1] == 'p' &&
10147 name[2] == 'e' &&
10148 name[3] == 'n' &&
10149 name[4] == 'd' &&
10150 name[5] == 'i' &&
10151 name[6] == 'r')
10152 { /* opendir */
10153 return -KEY_opendir;
10154 }
10155
10156 goto unknown;
10157
10158 case 'p':
10159 if (name[1] == 'a' &&
10160 name[2] == 'c' &&
10161 name[3] == 'k' &&
10162 name[4] == 'a' &&
10163 name[5] == 'g' &&
10164 name[6] == 'e')
10165 { /* package */
10166 return KEY_package;
10167 }
10168
10169 goto unknown;
10170
10171 case 'r':
10172 if (name[1] == 'e')
10173 {
10174 switch (name[2])
10175 {
10176 case 'a':
10177 if (name[3] == 'd' &&
10178 name[4] == 'd' &&
10179 name[5] == 'i' &&
10180 name[6] == 'r')
10181 { /* readdir */
10182 return -KEY_readdir;
10183 }
10184
10185 goto unknown;
10186
10187 case 'q':
10188 if (name[3] == 'u' &&
10189 name[4] == 'i' &&
10190 name[5] == 'r' &&
10191 name[6] == 'e')
10192 { /* require */
10193 return KEY_require;
10194 }
10195
10196 goto unknown;
10197
10198 case 'v':
10199 if (name[3] == 'e' &&
10200 name[4] == 'r' &&
10201 name[5] == 's' &&
10202 name[6] == 'e')
10203 { /* reverse */
10204 return -KEY_reverse;
10205 }
10206
10207 goto unknown;
10208
10209 default:
10210 goto unknown;
10211 }
10212 }
10213
10214 goto unknown;
10215
10216 case 's':
10217 switch (name[1])
10218 {
10219 case 'e':
10220 switch (name[2])
10221 {
10222 case 'e':
10223 if (name[3] == 'k' &&
10224 name[4] == 'd' &&
10225 name[5] == 'i' &&
10226 name[6] == 'r')
10227 { /* seekdir */
10228 return -KEY_seekdir;
10229 }
10230
10231 goto unknown;
10232
10233 case 't':
10234 if (name[3] == 'p' &&
10235 name[4] == 'g' &&
10236 name[5] == 'r' &&
10237 name[6] == 'p')
10238 { /* setpgrp */
10239 return -KEY_setpgrp;
10240 }
10241
10242 goto unknown;
10243
10244 default:
10245 goto unknown;
10246 }
10247
10248 case 'h':
10249 if (name[2] == 'm' &&
10250 name[3] == 'r' &&
10251 name[4] == 'e' &&
10252 name[5] == 'a' &&
10253 name[6] == 'd')
10254 { /* shmread */
10255 return -KEY_shmread;
10256 }
10257
10258 goto unknown;
10259
10260 case 'p':
10261 if (name[2] == 'r' &&
10262 name[3] == 'i' &&
10263 name[4] == 'n' &&
10264 name[5] == 't' &&
10265 name[6] == 'f')
10266 { /* sprintf */
10267 return -KEY_sprintf;
10268 }
10269
10270 goto unknown;
10271
10272 case 'y':
10273 switch (name[2])
10274 {
10275 case 'm':
10276 if (name[3] == 'l' &&
10277 name[4] == 'i' &&
10278 name[5] == 'n' &&
10279 name[6] == 'k')
10280 { /* symlink */
10281 return -KEY_symlink;
10282 }
10283
10284 goto unknown;
10285
10286 case 's':
10287 switch (name[3])
10288 {
10289 case 'c':
10290 if (name[4] == 'a' &&
10291 name[5] == 'l' &&
10292 name[6] == 'l')
10293 { /* syscall */
10294 return -KEY_syscall;
10295 }
10296
10297 goto unknown;
10298
10299 case 'o':
10300 if (name[4] == 'p' &&
10301 name[5] == 'e' &&
10302 name[6] == 'n')
10303 { /* sysopen */
10304 return -KEY_sysopen;
10305 }
10306
10307 goto unknown;
10308
10309 case 'r':
10310 if (name[4] == 'e' &&
10311 name[5] == 'a' &&
10312 name[6] == 'd')
10313 { /* sysread */
10314 return -KEY_sysread;
10315 }
10316
10317 goto unknown;
10318
10319 case 's':
10320 if (name[4] == 'e' &&
10321 name[5] == 'e' &&
10322 name[6] == 'k')
10323 { /* sysseek */
10324 return -KEY_sysseek;
10325 }
10326
10327 goto unknown;
10328
10329 default:
10330 goto unknown;
10331 }
10332
10333 default:
10334 goto unknown;
10335 }
10336
10337 default:
10338 goto unknown;
10339 }
10340
10341 case 't':
10342 if (name[1] == 'e' &&
10343 name[2] == 'l' &&
10344 name[3] == 'l' &&
10345 name[4] == 'd' &&
10346 name[5] == 'i' &&
10347 name[6] == 'r')
10348 { /* telldir */
10349 return -KEY_telldir;
10350 }
10351
10352 goto unknown;
10353
10354 case 'u':
10355 switch (name[1])
10356 {
10357 case 'c':
10358 if (name[2] == 'f' &&
10359 name[3] == 'i' &&
10360 name[4] == 'r' &&
10361 name[5] == 's' &&
10362 name[6] == 't')
10363 { /* ucfirst */
10364 return -KEY_ucfirst;
10365 }
10366
10367 goto unknown;
10368
10369 case 'n':
10370 if (name[2] == 's' &&
10371 name[3] == 'h' &&
10372 name[4] == 'i' &&
10373 name[5] == 'f' &&
10374 name[6] == 't')
10375 { /* unshift */
10376 return -KEY_unshift;
10377 }
10378
10379 goto unknown;
10380
10381 default:
10382 goto unknown;
10383 }
10384
10385 case 'w':
10386 if (name[1] == 'a' &&
10387 name[2] == 'i' &&
10388 name[3] == 't' &&
10389 name[4] == 'p' &&
10390 name[5] == 'i' &&
10391 name[6] == 'd')
10392 { /* waitpid */
10393 return -KEY_waitpid;
10394 }
10395
10396 goto unknown;
10397
10398 default:
10399 goto unknown;
10400 }
10401
10402 case 8: /* 26 tokens of length 8 */
10403 switch (name[0])
10404 {
10405 case 'A':
10406 if (name[1] == 'U' &&
10407 name[2] == 'T' &&
10408 name[3] == 'O' &&
10409 name[4] == 'L' &&
10410 name[5] == 'O' &&
10411 name[6] == 'A' &&
10412 name[7] == 'D')
10413 { /* AUTOLOAD */
10414 return KEY_AUTOLOAD;
10415 }
10416
10417 goto unknown;
10418
10419 case '_':
10420 if (name[1] == '_')
10421 {
10422 switch (name[2])
10423 {
10424 case 'D':
10425 if (name[3] == 'A' &&
10426 name[4] == 'T' &&
10427 name[5] == 'A' &&
10428 name[6] == '_' &&
10429 name[7] == '_')
10430 { /* __DATA__ */
10431 return KEY___DATA__;
10432 }
10433
10434 goto unknown;
10435
10436 case 'F':
10437 if (name[3] == 'I' &&
10438 name[4] == 'L' &&
10439 name[5] == 'E' &&
10440 name[6] == '_' &&
10441 name[7] == '_')
10442 { /* __FILE__ */
10443 return -KEY___FILE__;
10444 }
10445
10446 goto unknown;
10447
10448 case 'L':
10449 if (name[3] == 'I' &&
10450 name[4] == 'N' &&
10451 name[5] == 'E' &&
10452 name[6] == '_' &&
10453 name[7] == '_')
10454 { /* __LINE__ */
10455 return -KEY___LINE__;
10456 }
10457
10458 goto unknown;
10459
10460 default:
10461 goto unknown;
10462 }
10463 }
10464
10465 goto unknown;
10466
10467 case 'c':
10468 switch (name[1])
10469 {
10470 case 'l':
10471 if (name[2] == 'o' &&
10472 name[3] == 's' &&
10473 name[4] == 'e' &&
10474 name[5] == 'd' &&
10475 name[6] == 'i' &&
10476 name[7] == 'r')
10477 { /* closedir */
10478 return -KEY_closedir;
10479 }
10480
10481 goto unknown;
10482
10483 case 'o':
10484 if (name[2] == 'n' &&
10485 name[3] == 't' &&
10486 name[4] == 'i' &&
10487 name[5] == 'n' &&
10488 name[6] == 'u' &&
10489 name[7] == 'e')
10490 { /* continue */
10491 return -KEY_continue;
10492 }
10493
10494 goto unknown;
10495
10496 default:
10497 goto unknown;
10498 }
10499
10500 case 'd':
10501 if (name[1] == 'b' &&
10502 name[2] == 'm' &&
10503 name[3] == 'c' &&
10504 name[4] == 'l' &&
10505 name[5] == 'o' &&
10506 name[6] == 's' &&
10507 name[7] == 'e')
10508 { /* dbmclose */
10509 return -KEY_dbmclose;
10510 }
10511
10512 goto unknown;
10513
10514 case 'e':
10515 if (name[1] == 'n' &&
10516 name[2] == 'd')
10517 {
10518 switch (name[3])
10519 {
10520 case 'g':
10521 if (name[4] == 'r' &&
10522 name[5] == 'e' &&
10523 name[6] == 'n' &&
10524 name[7] == 't')
10525 { /* endgrent */
10526 return -KEY_endgrent;
10527 }
10528
10529 goto unknown;
10530
10531 case 'p':
10532 if (name[4] == 'w' &&
10533 name[5] == 'e' &&
10534 name[6] == 'n' &&
10535 name[7] == 't')
10536 { /* endpwent */
10537 return -KEY_endpwent;
10538 }
10539
10540 goto unknown;
10541
10542 default:
10543 goto unknown;
10544 }
10545 }
10546
10547 goto unknown;
10548
10549 case 'f':
10550 if (name[1] == 'o' &&
10551 name[2] == 'r' &&
10552 name[3] == 'm' &&
10553 name[4] == 'l' &&
10554 name[5] == 'i' &&
10555 name[6] == 'n' &&
10556 name[7] == 'e')
10557 { /* formline */
10558 return -KEY_formline;
10559 }
10560
10561 goto unknown;
10562
10563 case 'g':
10564 if (name[1] == 'e' &&
10565 name[2] == 't')
10566 {
10567 switch (name[3])
10568 {
10569 case 'g':
10570 if (name[4] == 'r')
10571 {
10572 switch (name[5])
10573 {
10574 case 'e':
10575 if (name[6] == 'n' &&
10576 name[7] == 't')
10577 { /* getgrent */
10578 return -KEY_getgrent;
10579 }
10580
10581 goto unknown;
10582
10583 case 'g':
10584 if (name[6] == 'i' &&
10585 name[7] == 'd')
10586 { /* getgrgid */
10587 return -KEY_getgrgid;
10588 }
10589
10590 goto unknown;
10591
10592 case 'n':
10593 if (name[6] == 'a' &&
10594 name[7] == 'm')
10595 { /* getgrnam */
10596 return -KEY_getgrnam;
10597 }
10598
10599 goto unknown;
10600
10601 default:
10602 goto unknown;
10603 }
10604 }
10605
10606 goto unknown;
10607
10608 case 'l':
10609 if (name[4] == 'o' &&
10610 name[5] == 'g' &&
10611 name[6] == 'i' &&
10612 name[7] == 'n')
10613 { /* getlogin */
10614 return -KEY_getlogin;
10615 }
10616
10617 goto unknown;
10618
10619 case 'p':
10620 if (name[4] == 'w')
10621 {
10622 switch (name[5])
10623 {
10624 case 'e':
10625 if (name[6] == 'n' &&
10626 name[7] == 't')
10627 { /* getpwent */
10628 return -KEY_getpwent;
10629 }
10630
10631 goto unknown;
10632
10633 case 'n':
10634 if (name[6] == 'a' &&
10635 name[7] == 'm')
10636 { /* getpwnam */
10637 return -KEY_getpwnam;
10638 }
10639
10640 goto unknown;
10641
10642 case 'u':
10643 if (name[6] == 'i' &&
10644 name[7] == 'd')
10645 { /* getpwuid */
10646 return -KEY_getpwuid;
10647 }
10648
10649 goto unknown;
10650
10651 default:
10652 goto unknown;
10653 }
10654 }
10655
10656 goto unknown;
10657
10658 default:
10659 goto unknown;
10660 }
10661 }
10662
10663 goto unknown;
10664
10665 case 'r':
10666 if (name[1] == 'e' &&
10667 name[2] == 'a' &&
10668 name[3] == 'd')
10669 {
10670 switch (name[4])
10671 {
10672 case 'l':
10673 if (name[5] == 'i' &&
10674 name[6] == 'n')
10675 {
10676 switch (name[7])
10677 {
10678 case 'e':
10679 { /* readline */
10680 return -KEY_readline;
10681 }
10682
4c3bbe0f
MHM
10683 case 'k':
10684 { /* readlink */
10685 return -KEY_readlink;
10686 }
10687
4c3bbe0f
MHM
10688 default:
10689 goto unknown;
10690 }
10691 }
10692
10693 goto unknown;
10694
10695 case 'p':
10696 if (name[5] == 'i' &&
10697 name[6] == 'p' &&
10698 name[7] == 'e')
10699 { /* readpipe */
10700 return -KEY_readpipe;
10701 }
10702
10703 goto unknown;
10704
10705 default:
10706 goto unknown;
10707 }
10708 }
10709
10710 goto unknown;
10711
10712 case 's':
10713 switch (name[1])
10714 {
10715 case 'e':
10716 if (name[2] == 't')
10717 {
10718 switch (name[3])
10719 {
10720 case 'g':
10721 if (name[4] == 'r' &&
10722 name[5] == 'e' &&
10723 name[6] == 'n' &&
10724 name[7] == 't')
10725 { /* setgrent */
10726 return -KEY_setgrent;
10727 }
10728
10729 goto unknown;
10730
10731 case 'p':
10732 if (name[4] == 'w' &&
10733 name[5] == 'e' &&
10734 name[6] == 'n' &&
10735 name[7] == 't')
10736 { /* setpwent */
10737 return -KEY_setpwent;
10738 }
10739
10740 goto unknown;
10741
10742 default:
10743 goto unknown;
10744 }
10745 }
10746
10747 goto unknown;
10748
10749 case 'h':
10750 switch (name[2])
10751 {
10752 case 'm':
10753 if (name[3] == 'w' &&
10754 name[4] == 'r' &&
10755 name[5] == 'i' &&
10756 name[6] == 't' &&
10757 name[7] == 'e')
10758 { /* shmwrite */
10759 return -KEY_shmwrite;
10760 }
10761
10762 goto unknown;
10763
10764 case 'u':
10765 if (name[3] == 't' &&
10766 name[4] == 'd' &&
10767 name[5] == 'o' &&
10768 name[6] == 'w' &&
10769 name[7] == 'n')
10770 { /* shutdown */
10771 return -KEY_shutdown;
10772 }
10773
10774 goto unknown;
10775
10776 default:
10777 goto unknown;
10778 }
10779
10780 case 'y':
10781 if (name[2] == 's' &&
10782 name[3] == 'w' &&
10783 name[4] == 'r' &&
10784 name[5] == 'i' &&
10785 name[6] == 't' &&
10786 name[7] == 'e')
10787 { /* syswrite */
10788 return -KEY_syswrite;
10789 }
10790
10791 goto unknown;
10792
10793 default:
10794 goto unknown;
10795 }
10796
10797 case 't':
10798 if (name[1] == 'r' &&
10799 name[2] == 'u' &&
10800 name[3] == 'n' &&
10801 name[4] == 'c' &&
10802 name[5] == 'a' &&
10803 name[6] == 't' &&
10804 name[7] == 'e')
10805 { /* truncate */
10806 return -KEY_truncate;
10807 }
10808
10809 goto unknown;
10810
10811 default:
10812 goto unknown;
10813 }
10814
3c10abe3 10815 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10816 switch (name[0])
10817 {
3c10abe3
AG
10818 case 'U':
10819 if (name[1] == 'N' &&
10820 name[2] == 'I' &&
10821 name[3] == 'T' &&
10822 name[4] == 'C' &&
10823 name[5] == 'H' &&
10824 name[6] == 'E' &&
10825 name[7] == 'C' &&
10826 name[8] == 'K')
10827 { /* UNITCHECK */
10828 return KEY_UNITCHECK;
10829 }
10830
10831 goto unknown;
10832
4c3bbe0f
MHM
10833 case 'e':
10834 if (name[1] == 'n' &&
10835 name[2] == 'd' &&
10836 name[3] == 'n' &&
10837 name[4] == 'e' &&
10838 name[5] == 't' &&
10839 name[6] == 'e' &&
10840 name[7] == 'n' &&
10841 name[8] == 't')
10842 { /* endnetent */
10843 return -KEY_endnetent;
10844 }
10845
10846 goto unknown;
10847
10848 case 'g':
10849 if (name[1] == 'e' &&
10850 name[2] == 't' &&
10851 name[3] == 'n' &&
10852 name[4] == 'e' &&
10853 name[5] == 't' &&
10854 name[6] == 'e' &&
10855 name[7] == 'n' &&
10856 name[8] == 't')
10857 { /* getnetent */
10858 return -KEY_getnetent;
10859 }
10860
10861 goto unknown;
10862
10863 case 'l':
10864 if (name[1] == 'o' &&
10865 name[2] == 'c' &&
10866 name[3] == 'a' &&
10867 name[4] == 'l' &&
10868 name[5] == 't' &&
10869 name[6] == 'i' &&
10870 name[7] == 'm' &&
10871 name[8] == 'e')
10872 { /* localtime */
10873 return -KEY_localtime;
10874 }
10875
10876 goto unknown;
10877
10878 case 'p':
10879 if (name[1] == 'r' &&
10880 name[2] == 'o' &&
10881 name[3] == 't' &&
10882 name[4] == 'o' &&
10883 name[5] == 't' &&
10884 name[6] == 'y' &&
10885 name[7] == 'p' &&
10886 name[8] == 'e')
10887 { /* prototype */
10888 return KEY_prototype;
10889 }
10890
10891 goto unknown;
10892
10893 case 'q':
10894 if (name[1] == 'u' &&
10895 name[2] == 'o' &&
10896 name[3] == 't' &&
10897 name[4] == 'e' &&
10898 name[5] == 'm' &&
10899 name[6] == 'e' &&
10900 name[7] == 't' &&
10901 name[8] == 'a')
10902 { /* quotemeta */
10903 return -KEY_quotemeta;
10904 }
10905
10906 goto unknown;
10907
10908 case 'r':
10909 if (name[1] == 'e' &&
10910 name[2] == 'w' &&
10911 name[3] == 'i' &&
10912 name[4] == 'n' &&
10913 name[5] == 'd' &&
10914 name[6] == 'd' &&
10915 name[7] == 'i' &&
10916 name[8] == 'r')
10917 { /* rewinddir */
10918 return -KEY_rewinddir;
10919 }
10920
10921 goto unknown;
10922
10923 case 's':
10924 if (name[1] == 'e' &&
10925 name[2] == 't' &&
10926 name[3] == 'n' &&
10927 name[4] == 'e' &&
10928 name[5] == 't' &&
10929 name[6] == 'e' &&
10930 name[7] == 'n' &&
10931 name[8] == 't')
10932 { /* setnetent */
10933 return -KEY_setnetent;
10934 }
10935
10936 goto unknown;
10937
10938 case 'w':
10939 if (name[1] == 'a' &&
10940 name[2] == 'n' &&
10941 name[3] == 't' &&
10942 name[4] == 'a' &&
10943 name[5] == 'r' &&
10944 name[6] == 'r' &&
10945 name[7] == 'a' &&
10946 name[8] == 'y')
10947 { /* wantarray */
10948 return -KEY_wantarray;
10949 }
10950
10951 goto unknown;
10952
10953 default:
10954 goto unknown;
10955 }
10956
10957 case 10: /* 9 tokens of length 10 */
10958 switch (name[0])
10959 {
10960 case 'e':
10961 if (name[1] == 'n' &&
10962 name[2] == 'd')
10963 {
10964 switch (name[3])
10965 {
10966 case 'h':
10967 if (name[4] == 'o' &&
10968 name[5] == 's' &&
10969 name[6] == 't' &&
10970 name[7] == 'e' &&
10971 name[8] == 'n' &&
10972 name[9] == 't')
10973 { /* endhostent */
10974 return -KEY_endhostent;
10975 }
10976
10977 goto unknown;
10978
10979 case 's':
10980 if (name[4] == 'e' &&
10981 name[5] == 'r' &&
10982 name[6] == 'v' &&
10983 name[7] == 'e' &&
10984 name[8] == 'n' &&
10985 name[9] == 't')
10986 { /* endservent */
10987 return -KEY_endservent;
10988 }
10989
10990 goto unknown;
10991
10992 default:
10993 goto unknown;
10994 }
10995 }
10996
10997 goto unknown;
10998
10999 case 'g':
11000 if (name[1] == 'e' &&
11001 name[2] == 't')
11002 {
11003 switch (name[3])
11004 {
11005 case 'h':
11006 if (name[4] == 'o' &&
11007 name[5] == 's' &&
11008 name[6] == 't' &&
11009 name[7] == 'e' &&
11010 name[8] == 'n' &&
11011 name[9] == 't')
11012 { /* gethostent */
11013 return -KEY_gethostent;
11014 }
11015
11016 goto unknown;
11017
11018 case 's':
11019 switch (name[4])
11020 {
11021 case 'e':
11022 if (name[5] == 'r' &&
11023 name[6] == 'v' &&
11024 name[7] == 'e' &&
11025 name[8] == 'n' &&
11026 name[9] == 't')
11027 { /* getservent */
11028 return -KEY_getservent;
11029 }
11030
11031 goto unknown;
11032
11033 case 'o':
11034 if (name[5] == 'c' &&
11035 name[6] == 'k' &&
11036 name[7] == 'o' &&
11037 name[8] == 'p' &&
11038 name[9] == 't')
11039 { /* getsockopt */
11040 return -KEY_getsockopt;
11041 }
11042
11043 goto unknown;
11044
11045 default:
11046 goto unknown;
11047 }
11048
11049 default:
11050 goto unknown;
11051 }
11052 }
11053
11054 goto unknown;
11055
11056 case 's':
11057 switch (name[1])
11058 {
11059 case 'e':
11060 if (name[2] == 't')
11061 {
11062 switch (name[3])
11063 {
11064 case 'h':
11065 if (name[4] == 'o' &&
11066 name[5] == 's' &&
11067 name[6] == 't' &&
11068 name[7] == 'e' &&
11069 name[8] == 'n' &&
11070 name[9] == 't')
11071 { /* sethostent */
11072 return -KEY_sethostent;
11073 }
11074
11075 goto unknown;
11076
11077 case 's':
11078 switch (name[4])
11079 {
11080 case 'e':
11081 if (name[5] == 'r' &&
11082 name[6] == 'v' &&
11083 name[7] == 'e' &&
11084 name[8] == 'n' &&
11085 name[9] == 't')
11086 { /* setservent */
11087 return -KEY_setservent;
11088 }
11089
11090 goto unknown;
11091
11092 case 'o':
11093 if (name[5] == 'c' &&
11094 name[6] == 'k' &&
11095 name[7] == 'o' &&
11096 name[8] == 'p' &&
11097 name[9] == 't')
11098 { /* setsockopt */
11099 return -KEY_setsockopt;
11100 }
11101
11102 goto unknown;
11103
11104 default:
11105 goto unknown;
11106 }
11107
11108 default:
11109 goto unknown;
11110 }
11111 }
11112
11113 goto unknown;
11114
11115 case 'o':
11116 if (name[2] == 'c' &&
11117 name[3] == 'k' &&
11118 name[4] == 'e' &&
11119 name[5] == 't' &&
11120 name[6] == 'p' &&
11121 name[7] == 'a' &&
11122 name[8] == 'i' &&
11123 name[9] == 'r')
11124 { /* socketpair */
11125 return -KEY_socketpair;
11126 }
11127
11128 goto unknown;
11129
11130 default:
11131 goto unknown;
11132 }
11133
11134 default:
11135 goto unknown;
e2e1dd5a 11136 }
4c3bbe0f
MHM
11137
11138 case 11: /* 8 tokens of length 11 */
11139 switch (name[0])
11140 {
11141 case '_':
11142 if (name[1] == '_' &&
11143 name[2] == 'P' &&
11144 name[3] == 'A' &&
11145 name[4] == 'C' &&
11146 name[5] == 'K' &&
11147 name[6] == 'A' &&
11148 name[7] == 'G' &&
11149 name[8] == 'E' &&
11150 name[9] == '_' &&
11151 name[10] == '_')
11152 { /* __PACKAGE__ */
11153 return -KEY___PACKAGE__;
11154 }
11155
11156 goto unknown;
11157
11158 case 'e':
11159 if (name[1] == 'n' &&
11160 name[2] == 'd' &&
11161 name[3] == 'p' &&
11162 name[4] == 'r' &&
11163 name[5] == 'o' &&
11164 name[6] == 't' &&
11165 name[7] == 'o' &&
11166 name[8] == 'e' &&
11167 name[9] == 'n' &&
11168 name[10] == 't')
11169 { /* endprotoent */
11170 return -KEY_endprotoent;
11171 }
11172
11173 goto unknown;
11174
11175 case 'g':
11176 if (name[1] == 'e' &&
11177 name[2] == 't')
11178 {
11179 switch (name[3])
11180 {
11181 case 'p':
11182 switch (name[4])
11183 {
11184 case 'e':
11185 if (name[5] == 'e' &&
11186 name[6] == 'r' &&
11187 name[7] == 'n' &&
11188 name[8] == 'a' &&
11189 name[9] == 'm' &&
11190 name[10] == 'e')
11191 { /* getpeername */
11192 return -KEY_getpeername;
11193 }
11194
11195 goto unknown;
11196
11197 case 'r':
11198 switch (name[5])
11199 {
11200 case 'i':
11201 if (name[6] == 'o' &&
11202 name[7] == 'r' &&
11203 name[8] == 'i' &&
11204 name[9] == 't' &&
11205 name[10] == 'y')
11206 { /* getpriority */
11207 return -KEY_getpriority;
11208 }
11209
11210 goto unknown;
11211
11212 case 'o':
11213 if (name[6] == 't' &&
11214 name[7] == 'o' &&
11215 name[8] == 'e' &&
11216 name[9] == 'n' &&
11217 name[10] == 't')
11218 { /* getprotoent */
11219 return -KEY_getprotoent;
11220 }
11221
11222 goto unknown;
11223
11224 default:
11225 goto unknown;
11226 }
11227
11228 default:
11229 goto unknown;
11230 }
11231
11232 case 's':
11233 if (name[4] == 'o' &&
11234 name[5] == 'c' &&
11235 name[6] == 'k' &&
11236 name[7] == 'n' &&
11237 name[8] == 'a' &&
11238 name[9] == 'm' &&
11239 name[10] == 'e')
11240 { /* getsockname */
11241 return -KEY_getsockname;
11242 }
11243
11244 goto unknown;
11245
11246 default:
11247 goto unknown;
11248 }
11249 }
11250
11251 goto unknown;
11252
11253 case 's':
11254 if (name[1] == 'e' &&
11255 name[2] == 't' &&
11256 name[3] == 'p' &&
11257 name[4] == 'r')
11258 {
11259 switch (name[5])
11260 {
11261 case 'i':
11262 if (name[6] == 'o' &&
11263 name[7] == 'r' &&
11264 name[8] == 'i' &&
11265 name[9] == 't' &&
11266 name[10] == 'y')
11267 { /* setpriority */
11268 return -KEY_setpriority;
11269 }
11270
11271 goto unknown;
11272
11273 case 'o':
11274 if (name[6] == 't' &&
11275 name[7] == 'o' &&
11276 name[8] == 'e' &&
11277 name[9] == 'n' &&
11278 name[10] == 't')
11279 { /* setprotoent */
11280 return -KEY_setprotoent;
11281 }
11282
11283 goto unknown;
11284
11285 default:
11286 goto unknown;
11287 }
11288 }
11289
11290 goto unknown;
11291
11292 default:
11293 goto unknown;
e2e1dd5a 11294 }
4c3bbe0f
MHM
11295
11296 case 12: /* 2 tokens of length 12 */
11297 if (name[0] == 'g' &&
11298 name[1] == 'e' &&
11299 name[2] == 't' &&
11300 name[3] == 'n' &&
11301 name[4] == 'e' &&
11302 name[5] == 't' &&
11303 name[6] == 'b' &&
11304 name[7] == 'y')
11305 {
11306 switch (name[8])
11307 {
11308 case 'a':
11309 if (name[9] == 'd' &&
11310 name[10] == 'd' &&
11311 name[11] == 'r')
11312 { /* getnetbyaddr */
11313 return -KEY_getnetbyaddr;
11314 }
11315
11316 goto unknown;
11317
11318 case 'n':
11319 if (name[9] == 'a' &&
11320 name[10] == 'm' &&
11321 name[11] == 'e')
11322 { /* getnetbyname */
11323 return -KEY_getnetbyname;
11324 }
11325
11326 goto unknown;
11327
11328 default:
11329 goto unknown;
11330 }
e2e1dd5a 11331 }
4c3bbe0f
MHM
11332
11333 goto unknown;
11334
11335 case 13: /* 4 tokens of length 13 */
11336 if (name[0] == 'g' &&
11337 name[1] == 'e' &&
11338 name[2] == 't')
11339 {
11340 switch (name[3])
11341 {
11342 case 'h':
11343 if (name[4] == 'o' &&
11344 name[5] == 's' &&
11345 name[6] == 't' &&
11346 name[7] == 'b' &&
11347 name[8] == 'y')
11348 {
11349 switch (name[9])
11350 {
11351 case 'a':
11352 if (name[10] == 'd' &&
11353 name[11] == 'd' &&
11354 name[12] == 'r')
11355 { /* gethostbyaddr */
11356 return -KEY_gethostbyaddr;
11357 }
11358
11359 goto unknown;
11360
11361 case 'n':
11362 if (name[10] == 'a' &&
11363 name[11] == 'm' &&
11364 name[12] == 'e')
11365 { /* gethostbyname */
11366 return -KEY_gethostbyname;
11367 }
11368
11369 goto unknown;
11370
11371 default:
11372 goto unknown;
11373 }
11374 }
11375
11376 goto unknown;
11377
11378 case 's':
11379 if (name[4] == 'e' &&
11380 name[5] == 'r' &&
11381 name[6] == 'v' &&
11382 name[7] == 'b' &&
11383 name[8] == 'y')
11384 {
11385 switch (name[9])
11386 {
11387 case 'n':
11388 if (name[10] == 'a' &&
11389 name[11] == 'm' &&
11390 name[12] == 'e')
11391 { /* getservbyname */
11392 return -KEY_getservbyname;
11393 }
11394
11395 goto unknown;
11396
11397 case 'p':
11398 if (name[10] == 'o' &&
11399 name[11] == 'r' &&
11400 name[12] == 't')
11401 { /* getservbyport */
11402 return -KEY_getservbyport;
11403 }
11404
11405 goto unknown;
11406
11407 default:
11408 goto unknown;
11409 }
11410 }
11411
11412 goto unknown;
11413
11414 default:
11415 goto unknown;
11416 }
e2e1dd5a 11417 }
4c3bbe0f
MHM
11418
11419 goto unknown;
11420
11421 case 14: /* 1 tokens of length 14 */
11422 if (name[0] == 'g' &&
11423 name[1] == 'e' &&
11424 name[2] == 't' &&
11425 name[3] == 'p' &&
11426 name[4] == 'r' &&
11427 name[5] == 'o' &&
11428 name[6] == 't' &&
11429 name[7] == 'o' &&
11430 name[8] == 'b' &&
11431 name[9] == 'y' &&
11432 name[10] == 'n' &&
11433 name[11] == 'a' &&
11434 name[12] == 'm' &&
11435 name[13] == 'e')
11436 { /* getprotobyname */
11437 return -KEY_getprotobyname;
11438 }
11439
11440 goto unknown;
11441
11442 case 16: /* 1 tokens of length 16 */
11443 if (name[0] == 'g' &&
11444 name[1] == 'e' &&
11445 name[2] == 't' &&
11446 name[3] == 'p' &&
11447 name[4] == 'r' &&
11448 name[5] == 'o' &&
11449 name[6] == 't' &&
11450 name[7] == 'o' &&
11451 name[8] == 'b' &&
11452 name[9] == 'y' &&
11453 name[10] == 'n' &&
11454 name[11] == 'u' &&
11455 name[12] == 'm' &&
11456 name[13] == 'b' &&
11457 name[14] == 'e' &&
11458 name[15] == 'r')
11459 { /* getprotobynumber */
11460 return -KEY_getprotobynumber;
11461 }
11462
11463 goto unknown;
11464
11465 default:
11466 goto unknown;
e2e1dd5a 11467 }
4c3bbe0f
MHM
11468
11469unknown:
e2e1dd5a 11470 return 0;
a687059c
LW
11471}
11472
76e3520e 11473STATIC void
c94115d8 11474S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11475{
97aff369 11476 dVAR;
2f3197b3 11477
7918f24d
NC
11478 PERL_ARGS_ASSERT_CHECKCOMMA;
11479
d008e5eb 11480 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11481 if (ckWARN(WARN_SYNTAX)) {
11482 int level = 1;
26ff0806 11483 const char *w;
d008e5eb
GS
11484 for (w = s+2; *w && level; w++) {
11485 if (*w == '(')
11486 ++level;
11487 else if (*w == ')')
11488 --level;
11489 }
888fea98
NC
11490 while (isSPACE(*w))
11491 ++w;
b1439985
RGS
11492 /* the list of chars below is for end of statements or
11493 * block / parens, boolean operators (&&, ||, //) and branch
11494 * constructs (or, and, if, until, unless, while, err, for).
11495 * Not a very solid hack... */
11496 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11497 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11498 "%s (...) interpreted as function",name);
d008e5eb 11499 }
2f3197b3 11500 }
3280af22 11501 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11502 s++;
a687059c
LW
11503 if (*s == '(')
11504 s++;
3280af22 11505 while (s < PL_bufend && isSPACE(*s))
a687059c 11506 s++;
7e2040f0 11507 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11508 const char * const w = s++;
7e2040f0 11509 while (isALNUM_lazy_if(s,UTF))
a687059c 11510 s++;
3280af22 11511 while (s < PL_bufend && isSPACE(*s))
a687059c 11512 s++;
e929a76b 11513 if (*s == ',') {
c94115d8 11514 GV* gv;
5458a98a 11515 if (keyword(w, s - w, 0))
e929a76b 11516 return;
c94115d8
NC
11517
11518 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11519 if (gv && GvCVu(gv))
abbb3198 11520 return;
cea2e8a9 11521 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11522 }
11523 }
11524}
11525
423cee85
JH
11526/* Either returns sv, or mortalizes sv and returns a new SV*.
11527 Best used as sv=new_constant(..., sv, ...).
11528 If s, pv are NULL, calls subroutine with one argument,
11529 and type is used with error messages only. */
11530
b3ac6de7 11531STATIC SV *
eb0d8d16
NC
11532S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11533 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11534{
27da23d5 11535 dVAR; dSP;
890ce7af 11536 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11537 SV *res;
b3ac6de7
IZ
11538 SV **cvp;
11539 SV *cv, *typesv;
89e33a05 11540 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11541
7918f24d
NC
11542 PERL_ARGS_ASSERT_NEW_CONSTANT;
11543
f0af216f 11544 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11545 SV *msg;
11546
10edeb5d
JH
11547 why2 = (const char *)
11548 (strEQ(key,"charnames")
11549 ? "(possibly a missing \"use charnames ...\")"
11550 : "");
4e553d73 11551 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11552 (type ? type: "undef"), why2);
11553
11554 /* This is convoluted and evil ("goto considered harmful")
11555 * but I do not understand the intricacies of all the different
11556 * failure modes of %^H in here. The goal here is to make
11557 * the most probable error message user-friendly. --jhi */
11558
11559 goto msgdone;
11560
423cee85 11561 report:
4e553d73 11562 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11563 (type ? type: "undef"), why1, why2, why3);
41ab332f 11564 msgdone:
95a20fc0 11565 yyerror(SvPVX_const(msg));
423cee85
JH
11566 SvREFCNT_dec(msg);
11567 return sv;
11568 }
ff3f963a
KW
11569
11570 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11571 if (PL_error_count > 0 && strEQ(key,"charnames"))
11572 return &PL_sv_undef;
ff3f963a 11573
eb0d8d16 11574 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11575 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11576 why1 = "$^H{";
11577 why2 = key;
f0af216f 11578 why3 = "} is not defined";
423cee85 11579 goto report;
b3ac6de7
IZ
11580 }
11581 sv_2mortal(sv); /* Parent created it permanently */
11582 cv = *cvp;
423cee85 11583 if (!pv && s)
59cd0e26 11584 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11585 if (type && pv)
59cd0e26 11586 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11587 else
423cee85 11588 typesv = &PL_sv_undef;
4e553d73 11589
e788e7d3 11590 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11591 ENTER ;
11592 SAVETMPS;
4e553d73 11593
423cee85 11594 PUSHMARK(SP) ;
a5845cb7 11595 EXTEND(sp, 3);
423cee85
JH
11596 if (pv)
11597 PUSHs(pv);
b3ac6de7 11598 PUSHs(sv);
423cee85
JH
11599 if (pv)
11600 PUSHs(typesv);
b3ac6de7 11601 PUTBACK;
423cee85 11602 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11603
423cee85 11604 SPAGAIN ;
4e553d73 11605
423cee85 11606 /* Check the eval first */
9b0e499b 11607 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11608 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11609 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11610 (void)POPs;
b37c2d43 11611 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11612 }
11613 else {
11614 res = POPs;
b37c2d43 11615 SvREFCNT_inc_simple_void(res);
423cee85 11616 }
4e553d73 11617
423cee85
JH
11618 PUTBACK ;
11619 FREETMPS ;
11620 LEAVE ;
b3ac6de7 11621 POPSTACK;
4e553d73 11622
b3ac6de7 11623 if (!SvOK(res)) {
423cee85
JH
11624 why1 = "Call to &{$^H{";
11625 why2 = key;
f0af216f 11626 why3 = "}} did not return a defined value";
423cee85
JH
11627 sv = res;
11628 goto report;
9b0e499b 11629 }
423cee85 11630
9b0e499b 11631 return res;
b3ac6de7 11632}
4e553d73 11633
d0a148a6
NC
11634/* Returns a NUL terminated string, with the length of the string written to
11635 *slp
11636 */
76e3520e 11637STATIC char *
cea2e8a9 11638S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11639{
97aff369 11640 dVAR;
463ee0b2 11641 register char *d = dest;
890ce7af 11642 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11643
11644 PERL_ARGS_ASSERT_SCAN_WORD;
11645
463ee0b2 11646 for (;;) {
8903cb82 11647 if (d >= e)
cea2e8a9 11648 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11649 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11650 *d++ = *s++;
c35e046a 11651 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11652 *d++ = ':';
11653 *d++ = ':';
11654 s++;
11655 }
c35e046a 11656 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11657 *d++ = *s++;
11658 *d++ = *s++;
11659 }
fd400ab9 11660 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11661 char *t = s + UTF8SKIP(s);
c35e046a 11662 size_t len;
fd400ab9 11663 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11664 t += UTF8SKIP(t);
c35e046a
AL
11665 len = t - s;
11666 if (d + len > e)
cea2e8a9 11667 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11668 Copy(s, d, len, char);
11669 d += len;
a0ed51b3
LW
11670 s = t;
11671 }
463ee0b2
LW
11672 else {
11673 *d = '\0';
11674 *slp = d - dest;
11675 return s;
e929a76b 11676 }
378cc40b
LW
11677 }
11678}
11679
76e3520e 11680STATIC char *
f54cb97a 11681S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11682{
97aff369 11683 dVAR;
6136c704 11684 char *bracket = NULL;
748a9306 11685 char funny = *s++;
6136c704 11686 register char *d = dest;
0b3da58d 11687 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11688
7918f24d
NC
11689 PERL_ARGS_ASSERT_SCAN_IDENT;
11690
a0d0e21e 11691 if (isSPACE(*s))
29595ff2 11692 s = PEEKSPACE(s);
de3bb511 11693 if (isDIGIT(*s)) {
8903cb82 11694 while (isDIGIT(*s)) {
11695 if (d >= e)
cea2e8a9 11696 Perl_croak(aTHX_ ident_too_long);
378cc40b 11697 *d++ = *s++;
8903cb82 11698 }
378cc40b
LW
11699 }
11700 else {
463ee0b2 11701 for (;;) {
8903cb82 11702 if (d >= e)
cea2e8a9 11703 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11704 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11705 *d++ = *s++;
7e2040f0 11706 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11707 *d++ = ':';
11708 *d++ = ':';
11709 s++;
11710 }
a0d0e21e 11711 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11712 *d++ = *s++;
11713 *d++ = *s++;
11714 }
fd400ab9 11715 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11716 char *t = s + UTF8SKIP(s);
fd400ab9 11717 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11718 t += UTF8SKIP(t);
11719 if (d + (t - s) > e)
cea2e8a9 11720 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11721 Copy(s, d, t - s, char);
11722 d += t - s;
11723 s = t;
11724 }
463ee0b2
LW
11725 else
11726 break;
11727 }
378cc40b
LW
11728 }
11729 *d = '\0';
11730 d = dest;
79072805 11731 if (*d) {
3280af22
NIS
11732 if (PL_lex_state != LEX_NORMAL)
11733 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11734 return s;
378cc40b 11735 }
748a9306 11736 if (*s == '$' && s[1] &&
3792a11b 11737 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11738 {
4810e5ec 11739 return s;
5cd24f17 11740 }
79072805
LW
11741 if (*s == '{') {
11742 bracket = s;
11743 s++;
11744 }
11745 else if (ck_uni)
11746 check_uni();
93a17b20 11747 if (s < send)
79072805
LW
11748 *d = *s++;
11749 d[1] = '\0';
2b92dfce 11750 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11751 *d = toCTRL(*s);
11752 s++;
de3bb511 11753 }
79072805 11754 if (bracket) {
748a9306 11755 if (isSPACE(s[-1])) {
fa83b5b6 11756 while (s < send) {
f54cb97a 11757 const char ch = *s++;
bf4acbe4 11758 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11759 *d = ch;
11760 break;
11761 }
11762 }
748a9306 11763 }
7e2040f0 11764 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11765 d++;
a0ed51b3 11766 if (UTF) {
6136c704
AL
11767 char *end = s;
11768 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11769 end += UTF8SKIP(end);
11770 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11771 end += UTF8SKIP(end);
a0ed51b3 11772 }
6136c704
AL
11773 Copy(s, d, end - s, char);
11774 d += end - s;
11775 s = end;
a0ed51b3
LW
11776 }
11777 else {
2b92dfce 11778 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11779 *d++ = *s++;
2b92dfce 11780 if (d >= e)
cea2e8a9 11781 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11782 }
79072805 11783 *d = '\0';
c35e046a
AL
11784 while (s < send && SPACE_OR_TAB(*s))
11785 s++;
ff68c719 11786 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11787 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11788 const char * const brack =
11789 (const char *)
11790 ((*s == '[') ? "[...]" : "{...}");
9014280d 11791 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11792 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11793 funny, dest, brack, funny, dest, brack);
11794 }
79072805 11795 bracket++;
a0be28da 11796 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11797 return s;
11798 }
4e553d73
NIS
11799 }
11800 /* Handle extended ${^Foo} variables
2b92dfce
GS
11801 * 1999-02-27 mjd-perl-patch@plover.com */
11802 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11803 && isALNUM(*s))
11804 {
11805 d++;
11806 while (isALNUM(*s) && d < e) {
11807 *d++ = *s++;
11808 }
11809 if (d >= e)
cea2e8a9 11810 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11811 *d = '\0';
79072805
LW
11812 }
11813 if (*s == '}') {
11814 s++;
7df0d042 11815 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11816 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11817 PL_expect = XREF;
11818 }
d008e5eb 11819 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11820 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11821 (keyword(dest, d - dest, 0)
11822 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11823 {
c35e046a
AL
11824 if (funny == '#')
11825 funny = '@';
9014280d 11826 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11827 "Ambiguous use of %c{%s} resolved to %c%s",
11828 funny, dest, funny, dest);
11829 }
11830 }
79072805
LW
11831 }
11832 else {
11833 s = bracket; /* let the parser handle it */
93a17b20 11834 *dest = '\0';
79072805
LW
11835 }
11836 }
3280af22
NIS
11837 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11838 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11839 return s;
11840}
11841
879d0c72
NC
11842static U32
11843S_pmflag(U32 pmfl, const char ch) {
11844 switch (ch) {
11845 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
11846 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11847 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11848 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11849 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11850 }
11851 return pmfl;
11852}
11853
cea2e8a9 11854void
2b36a5a0 11855Perl_pmflag(pTHX_ U32* pmfl, int ch)
a0d0e21e 11856{
7918f24d
NC
11857 PERL_ARGS_ASSERT_PMFLAG;
11858
879d0c72
NC
11859 Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
11860 "Perl_pmflag() is deprecated, and will be removed from the XS API");
11861
cde0cee5 11862 if (ch<256) {
879d0c72 11863 *pmfl = S_pmflag(*pmfl, (char)ch);
cde0cee5 11864 }
a0d0e21e 11865}
378cc40b 11866
76e3520e 11867STATIC char *
cea2e8a9 11868S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11869{
97aff369 11870 dVAR;
79072805 11871 PMOP *pm;
5db06880 11872 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11873 const char * const valid_flags =
a20207d7 11874 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11875#ifdef PERL_MAD
11876 char *modstart;
11877#endif
11878
7918f24d 11879 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11880
25c09cbf 11881 if (!s) {
6136c704 11882 const char * const delimiter = skipspace(start);
10edeb5d
JH
11883 Perl_croak(aTHX_
11884 (const char *)
11885 (*delimiter == '?'
11886 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11887 : "Search pattern not terminated" ));
25c09cbf 11888 }
bbce6d69 11889
8782bef2 11890 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11891 if (PL_multi_open == '?') {
11892 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11893 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11894
11895 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11896 allows us to restrict the list needed by reset to just the ??
11897 matches. */
11898 assert(type != OP_TRANS);
11899 if (PL_curstash) {
daba3364 11900 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11901 U32 elements;
11902 if (!mg) {
daba3364 11903 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11904 0);
11905 }
11906 elements = mg->mg_len / sizeof(PMOP**);
11907 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11908 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11909 mg->mg_len = elements * sizeof(PMOP**);
11910 PmopSTASH_set(pm,PL_curstash);
11911 }
11912 }
5db06880
NC
11913#ifdef PERL_MAD
11914 modstart = s;
11915#endif
6136c704 11916 while (*s && strchr(valid_flags, *s))
879d0c72 11917 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
5db06880
NC
11918#ifdef PERL_MAD
11919 if (PL_madskills && modstart != s) {
11920 SV* tmptoken = newSVpvn(modstart, s - modstart);
11921 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11922 }
11923#endif
4ac733c9 11924 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11925 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11926 {
a2a5de95
NC
11927 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11928 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11929 }
11930
3280af22 11931 PL_lex_op = (OP*)pm;
6154021b 11932 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11933 return s;
11934}
11935
76e3520e 11936STATIC char *
cea2e8a9 11937S_scan_subst(pTHX_ char *start)
79072805 11938{
27da23d5 11939 dVAR;
a0d0e21e 11940 register char *s;
79072805 11941 register PMOP *pm;
4fdae800 11942 I32 first_start;
79072805 11943 I32 es = 0;
5db06880
NC
11944#ifdef PERL_MAD
11945 char *modstart;
11946#endif
79072805 11947
7918f24d
NC
11948 PERL_ARGS_ASSERT_SCAN_SUBST;
11949
6154021b 11950 pl_yylval.ival = OP_NULL;
79072805 11951
5db06880 11952 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11953
37fd879b 11954 if (!s)
cea2e8a9 11955 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11956
3280af22 11957 if (s[-1] == PL_multi_open)
79072805 11958 s--;
5db06880
NC
11959#ifdef PERL_MAD
11960 if (PL_madskills) {
cd81e915
NC
11961 CURMAD('q', PL_thisopen);
11962 CURMAD('_', PL_thiswhite);
11963 CURMAD('E', PL_thisstuff);
11964 CURMAD('Q', PL_thisclose);
11965 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11966 }
11967#endif
79072805 11968
3280af22 11969 first_start = PL_multi_start;
5db06880 11970 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11971 if (!s) {
37fd879b 11972 if (PL_lex_stuff) {
3280af22 11973 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11974 PL_lex_stuff = NULL;
37fd879b 11975 }
cea2e8a9 11976 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11977 }
3280af22 11978 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11979
79072805 11980 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11981
11982#ifdef PERL_MAD
11983 if (PL_madskills) {
cd81e915
NC
11984 CURMAD('z', PL_thisopen);
11985 CURMAD('R', PL_thisstuff);
11986 CURMAD('Z', PL_thisclose);
5db06880
NC
11987 }
11988 modstart = s;
11989#endif
11990
48c036b1 11991 while (*s) {
a20207d7 11992 if (*s == EXEC_PAT_MOD) {
a687059c 11993 s++;
2f3197b3 11994 es++;
a687059c 11995 }
a20207d7 11996 else if (strchr(S_PAT_MODS, *s))
879d0c72 11997 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
48c036b1
GS
11998 else
11999 break;
378cc40b 12000 }
79072805 12001
5db06880
NC
12002#ifdef PERL_MAD
12003 if (PL_madskills) {
12004 if (modstart != s)
12005 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12006 append_madprops(PL_thismad, (OP*)pm, 0);
12007 PL_thismad = 0;
5db06880
NC
12008 }
12009#endif
a2a5de95
NC
12010 if ((pm->op_pmflags & PMf_CONTINUE)) {
12011 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12012 }
12013
79072805 12014 if (es) {
6136c704
AL
12015 SV * const repl = newSVpvs("");
12016
0244c3a4
GS
12017 PL_sublex_info.super_bufptr = s;
12018 PL_sublex_info.super_bufend = PL_bufend;
12019 PL_multi_end = 0;
79072805 12020 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12021 while (es-- > 0) {
12022 if (es)
12023 sv_catpvs(repl, "eval ");
12024 else
12025 sv_catpvs(repl, "do ");
12026 }
6f43d98f 12027 sv_catpvs(repl, "{");
3280af22 12028 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12029 if (strchr(SvPVX(PL_lex_repl), '#'))
12030 sv_catpvs(repl, "\n");
12031 sv_catpvs(repl, "}");
25da4f38 12032 SvEVALED_on(repl);
3280af22
NIS
12033 SvREFCNT_dec(PL_lex_repl);
12034 PL_lex_repl = repl;
378cc40b 12035 }
79072805 12036
3280af22 12037 PL_lex_op = (OP*)pm;
6154021b 12038 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12039 return s;
12040}
12041
76e3520e 12042STATIC char *
cea2e8a9 12043S_scan_trans(pTHX_ char *start)
378cc40b 12044{
97aff369 12045 dVAR;
a0d0e21e 12046 register char* s;
11343788 12047 OP *o;
79072805 12048 short *tbl;
b84c11c8
NC
12049 U8 squash;
12050 U8 del;
12051 U8 complement;
5db06880
NC
12052#ifdef PERL_MAD
12053 char *modstart;
12054#endif
79072805 12055
7918f24d
NC
12056 PERL_ARGS_ASSERT_SCAN_TRANS;
12057
6154021b 12058 pl_yylval.ival = OP_NULL;
79072805 12059
5db06880 12060 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12061 if (!s)
cea2e8a9 12062 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12063
3280af22 12064 if (s[-1] == PL_multi_open)
2f3197b3 12065 s--;
5db06880
NC
12066#ifdef PERL_MAD
12067 if (PL_madskills) {
cd81e915
NC
12068 CURMAD('q', PL_thisopen);
12069 CURMAD('_', PL_thiswhite);
12070 CURMAD('E', PL_thisstuff);
12071 CURMAD('Q', PL_thisclose);
12072 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12073 }
12074#endif
2f3197b3 12075
5db06880 12076 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12077 if (!s) {
37fd879b 12078 if (PL_lex_stuff) {
3280af22 12079 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12080 PL_lex_stuff = NULL;
37fd879b 12081 }
cea2e8a9 12082 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12083 }
5db06880 12084 if (PL_madskills) {
cd81e915
NC
12085 CURMAD('z', PL_thisopen);
12086 CURMAD('R', PL_thisstuff);
12087 CURMAD('Z', PL_thisclose);
5db06880 12088 }
79072805 12089
a0ed51b3 12090 complement = del = squash = 0;
5db06880
NC
12091#ifdef PERL_MAD
12092 modstart = s;
12093#endif
7a1e2023
NC
12094 while (1) {
12095 switch (*s) {
12096 case 'c':
79072805 12097 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12098 break;
12099 case 'd':
a0ed51b3 12100 del = OPpTRANS_DELETE;
7a1e2023
NC
12101 break;
12102 case 's':
79072805 12103 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12104 break;
12105 default:
12106 goto no_more;
12107 }
395c3793
LW
12108 s++;
12109 }
7a1e2023 12110 no_more:
8973db79 12111
aa1f7c5b 12112 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12113 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12114 o->op_private &= ~OPpTRANS_ALL;
12115 o->op_private |= del|squash|complement|
7948272d
NIS
12116 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12117 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12118
3280af22 12119 PL_lex_op = o;
6154021b 12120 pl_yylval.ival = OP_TRANS;
5db06880
NC
12121
12122#ifdef PERL_MAD
12123 if (PL_madskills) {
12124 if (modstart != s)
12125 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12126 append_madprops(PL_thismad, o, 0);
12127 PL_thismad = 0;
5db06880
NC
12128 }
12129#endif
12130
79072805
LW
12131 return s;
12132}
12133
76e3520e 12134STATIC char *
cea2e8a9 12135S_scan_heredoc(pTHX_ register char *s)
79072805 12136{
97aff369 12137 dVAR;
79072805
LW
12138 SV *herewas;
12139 I32 op_type = OP_SCALAR;
12140 I32 len;
12141 SV *tmpstr;
12142 char term;
73d840c0 12143 const char *found_newline;
79072805 12144 register char *d;
fc36a67e 12145 register char *e;
4633a7c4 12146 char *peek;
f54cb97a 12147 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12148#ifdef PERL_MAD
12149 I32 stuffstart = s - SvPVX(PL_linestr);
12150 char *tstart;
12151
cd81e915 12152 PL_realtokenstart = -1;
5db06880 12153#endif
79072805 12154
7918f24d
NC
12155 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12156
79072805 12157 s += 2;
3280af22
NIS
12158 d = PL_tokenbuf;
12159 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12160 if (!outer)
79072805 12161 *d++ = '\n';
c35e046a
AL
12162 peek = s;
12163 while (SPACE_OR_TAB(*peek))
12164 peek++;
3792a11b 12165 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12166 s = peek;
79072805 12167 term = *s++;
3280af22 12168 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12169 d += len;
3280af22 12170 if (s < PL_bufend)
79072805 12171 s++;
79072805
LW
12172 }
12173 else {
12174 if (*s == '\\')
12175 s++, term = '\'';
12176 else
12177 term = '"';
7e2040f0 12178 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12179 deprecate("bare << to mean <<\"\"");
7e2040f0 12180 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12181 if (d < e)
12182 *d++ = *s;
12183 }
12184 }
3280af22 12185 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12186 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12187 *d++ = '\n';
12188 *d = '\0';
3280af22 12189 len = d - PL_tokenbuf;
5db06880
NC
12190
12191#ifdef PERL_MAD
12192 if (PL_madskills) {
12193 tstart = PL_tokenbuf + !outer;
cd81e915 12194 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12195 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12196 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12197 stuffstart = s - SvPVX(PL_linestr);
12198 }
12199#endif
6a27c188 12200#ifndef PERL_STRICT_CR
f63a84b2
LW
12201 d = strchr(s, '\r');
12202 if (d) {
b464bac0 12203 char * const olds = s;
f63a84b2 12204 s = d;
3280af22 12205 while (s < PL_bufend) {
f63a84b2
LW
12206 if (*s == '\r') {
12207 *d++ = '\n';
12208 if (*++s == '\n')
12209 s++;
12210 }
12211 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12212 *d++ = *s++;
12213 s++;
12214 }
12215 else
12216 *d++ = *s++;
12217 }
12218 *d = '\0';
3280af22 12219 PL_bufend = d;
95a20fc0 12220 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12221 s = olds;
12222 }
12223#endif
5db06880
NC
12224#ifdef PERL_MAD
12225 found_newline = 0;
12226#endif
10edeb5d 12227 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12228 herewas = newSVpvn(s,PL_bufend-s);
12229 }
12230 else {
5db06880
NC
12231#ifdef PERL_MAD
12232 herewas = newSVpvn(s-1,found_newline-s+1);
12233#else
73d840c0
AL
12234 s--;
12235 herewas = newSVpvn(s,found_newline-s);
5db06880 12236#endif
73d840c0 12237 }
5db06880
NC
12238#ifdef PERL_MAD
12239 if (PL_madskills) {
12240 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12241 if (PL_thisstuff)
12242 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12243 else
cd81e915 12244 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12245 }
12246#endif
79072805 12247 s += SvCUR(herewas);
748a9306 12248
5db06880
NC
12249#ifdef PERL_MAD
12250 stuffstart = s - SvPVX(PL_linestr);
12251
12252 if (found_newline)
12253 s--;
12254#endif
12255
7d0a29fe
NC
12256 tmpstr = newSV_type(SVt_PVIV);
12257 SvGROW(tmpstr, 80);
748a9306 12258 if (term == '\'') {
79072805 12259 op_type = OP_CONST;
45977657 12260 SvIV_set(tmpstr, -1);
748a9306
LW
12261 }
12262 else if (term == '`') {
79072805 12263 op_type = OP_BACKTICK;
45977657 12264 SvIV_set(tmpstr, '\\');
748a9306 12265 }
79072805
LW
12266
12267 CLINE;
57843af0 12268 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12269 PL_multi_open = PL_multi_close = '<';
12270 term = *PL_tokenbuf;
0244c3a4 12271 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12272 char * const bufptr = PL_sublex_info.super_bufptr;
12273 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12274 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12275 s = strchr(bufptr, '\n');
12276 if (!s)
12277 s = bufend;
12278 d = s;
12279 while (s < bufend &&
12280 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12281 if (*s++ == '\n')
57843af0 12282 CopLINE_inc(PL_curcop);
0244c3a4
GS
12283 }
12284 if (s >= bufend) {
eb160463 12285 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12286 missingterm(PL_tokenbuf);
12287 }
12288 sv_setpvn(herewas,bufptr,d-bufptr+1);
12289 sv_setpvn(tmpstr,d+1,s-d);
12290 s += len - 1;
12291 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12292 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12293
12294 s = olds;
12295 goto retval;
12296 }
12297 else if (!outer) {
79072805 12298 d = s;
3280af22
NIS
12299 while (s < PL_bufend &&
12300 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12301 if (*s++ == '\n')
57843af0 12302 CopLINE_inc(PL_curcop);
79072805 12303 }
3280af22 12304 if (s >= PL_bufend) {
eb160463 12305 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12306 missingterm(PL_tokenbuf);
79072805
LW
12307 }
12308 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12309#ifdef PERL_MAD
12310 if (PL_madskills) {
cd81e915
NC
12311 if (PL_thisstuff)
12312 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12313 else
cd81e915 12314 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12315 stuffstart = s - SvPVX(PL_linestr);
12316 }
12317#endif
79072805 12318 s += len - 1;
57843af0 12319 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12320
3280af22
NIS
12321 sv_catpvn(herewas,s,PL_bufend-s);
12322 sv_setsv(PL_linestr,herewas);
12323 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12324 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12325 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12326 }
12327 else
76f68e9b 12328 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12329 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12330#ifdef PERL_MAD
12331 if (PL_madskills) {
12332 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12333 if (PL_thisstuff)
12334 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12335 else
cd81e915 12336 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12337 }
12338#endif
f0e67a1d 12339 PL_bufptr = s;
17cc9359 12340 CopLINE_inc(PL_curcop);
f0e67a1d 12341 if (!outer || !lex_next_chunk(0)) {
eb160463 12342 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12343 missingterm(PL_tokenbuf);
79072805 12344 }
17cc9359 12345 CopLINE_dec(PL_curcop);
f0e67a1d 12346 s = PL_bufptr;
5db06880
NC
12347#ifdef PERL_MAD
12348 stuffstart = s - SvPVX(PL_linestr);
12349#endif
57843af0 12350 CopLINE_inc(PL_curcop);
3280af22 12351 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12352 PL_last_lop = PL_last_uni = NULL;
6a27c188 12353#ifndef PERL_STRICT_CR
3280af22 12354 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12355 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12356 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12357 {
3280af22
NIS
12358 PL_bufend[-2] = '\n';
12359 PL_bufend--;
95a20fc0 12360 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12361 }
3280af22
NIS
12362 else if (PL_bufend[-1] == '\r')
12363 PL_bufend[-1] = '\n';
f63a84b2 12364 }
3280af22
NIS
12365 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12366 PL_bufend[-1] = '\n';
f63a84b2 12367#endif
3280af22 12368 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12369 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12370 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12371 sv_catsv(PL_linestr,herewas);
12372 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12373 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12374 }
12375 else {
3280af22
NIS
12376 s = PL_bufend;
12377 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12378 }
12379 }
79072805 12380 s++;
0244c3a4 12381retval:
57843af0 12382 PL_multi_end = CopLINE(PL_curcop);
79072805 12383 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12384 SvPV_shrink_to_cur(tmpstr);
79072805 12385 }
8990e307 12386 SvREFCNT_dec(herewas);
2f31ce75 12387 if (!IN_BYTES) {
95a20fc0 12388 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12389 SvUTF8_on(tmpstr);
12390 else if (PL_encoding)
12391 sv_recode_to_utf8(tmpstr, PL_encoding);
12392 }
3280af22 12393 PL_lex_stuff = tmpstr;
6154021b 12394 pl_yylval.ival = op_type;
79072805
LW
12395 return s;
12396}
12397
02aa26ce
NT
12398/* scan_inputsymbol
12399 takes: current position in input buffer
12400 returns: new position in input buffer
6154021b 12401 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12402
12403 This code handles:
12404
12405 <> read from ARGV
12406 <FH> read from filehandle
12407 <pkg::FH> read from package qualified filehandle
12408 <pkg'FH> read from package qualified filehandle
12409 <$fh> read from filehandle in $fh
12410 <*.h> filename glob
12411
12412*/
12413
76e3520e 12414STATIC char *
cea2e8a9 12415S_scan_inputsymbol(pTHX_ char *start)
79072805 12416{
97aff369 12417 dVAR;
02aa26ce 12418 register char *s = start; /* current position in buffer */
1b420867 12419 char *end;
79072805 12420 I32 len;
6136c704
AL
12421 char *d = PL_tokenbuf; /* start of temp holding space */
12422 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12423
7918f24d
NC
12424 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12425
1b420867
GS
12426 end = strchr(s, '\n');
12427 if (!end)
12428 end = PL_bufend;
12429 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12430
12431 /* die if we didn't have space for the contents of the <>,
1b420867 12432 or if it didn't end, or if we see a newline
02aa26ce
NT
12433 */
12434
bb7a0f54 12435 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12436 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12437 if (s >= end)
cea2e8a9 12438 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12439
fc36a67e 12440 s++;
02aa26ce
NT
12441
12442 /* check for <$fh>
12443 Remember, only scalar variables are interpreted as filehandles by
12444 this code. Anything more complex (e.g., <$fh{$num}>) will be
12445 treated as a glob() call.
12446 This code makes use of the fact that except for the $ at the front,
12447 a scalar variable and a filehandle look the same.
12448 */
4633a7c4 12449 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12450
12451 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12452 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12453 d++;
02aa26ce
NT
12454
12455 /* If we've tried to read what we allow filehandles to look like, and
12456 there's still text left, then it must be a glob() and not a getline.
12457 Use scan_str to pull out the stuff between the <> and treat it
12458 as nothing more than a string.
12459 */
12460
3280af22 12461 if (d - PL_tokenbuf != len) {
6154021b 12462 pl_yylval.ival = OP_GLOB;
5db06880 12463 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12464 if (!s)
cea2e8a9 12465 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12466 return s;
12467 }
395c3793 12468 else {
9b3023bc 12469 bool readline_overriden = FALSE;
6136c704 12470 GV *gv_readline;
9b3023bc 12471 GV **gvp;
02aa26ce 12472 /* we're in a filehandle read situation */
3280af22 12473 d = PL_tokenbuf;
02aa26ce
NT
12474
12475 /* turn <> into <ARGV> */
79072805 12476 if (!len)
689badd5 12477 Copy("ARGV",d,5,char);
02aa26ce 12478
9b3023bc 12479 /* Check whether readline() is overriden */
fafc274c 12480 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12481 if ((gv_readline
ba979b31 12482 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12483 ||
017a3ce5 12484 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12485 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12486 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12487 readline_overriden = TRUE;
12488
02aa26ce
NT
12489 /* if <$fh>, create the ops to turn the variable into a
12490 filehandle
12491 */
79072805 12492 if (*d == '$') {
02aa26ce
NT
12493 /* try to find it in the pad for this block, otherwise find
12494 add symbol table ops
12495 */
f8f98e0a 12496 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12497 if (tmp != NOT_IN_PAD) {
00b1698f 12498 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12499 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12500 HEK * const stashname = HvNAME_HEK(stash);
12501 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12502 sv_catpvs(sym, "::");
f558d5af
JH
12503 sv_catpv(sym, d+1);
12504 d = SvPVX(sym);
12505 goto intro_sym;
12506 }
12507 else {
6136c704 12508 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12509 o->op_targ = tmp;
9b3023bc
RGS
12510 PL_lex_op = readline_overriden
12511 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12512 append_elem(OP_LIST, o,
12513 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12514 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12515 }
a0d0e21e
LW
12516 }
12517 else {
f558d5af
JH
12518 GV *gv;
12519 ++d;
12520intro_sym:
12521 gv = gv_fetchpv(d,
12522 (PL_in_eval
12523 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12524 : GV_ADDMULTI),
f558d5af 12525 SVt_PV);
9b3023bc
RGS
12526 PL_lex_op = readline_overriden
12527 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12528 append_elem(OP_LIST,
12529 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12530 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12531 : (OP*)newUNOP(OP_READLINE, 0,
12532 newUNOP(OP_RV2SV, 0,
12533 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12534 }
7c6fadd6
RGS
12535 if (!readline_overriden)
12536 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12537 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12538 pl_yylval.ival = OP_NULL;
79072805 12539 }
02aa26ce
NT
12540
12541 /* If it's none of the above, it must be a literal filehandle
12542 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12543 else {
6136c704 12544 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12545 PL_lex_op = readline_overriden
12546 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12547 append_elem(OP_LIST,
12548 newGVOP(OP_GV, 0, gv),
12549 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12550 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12551 pl_yylval.ival = OP_NULL;
79072805
LW
12552 }
12553 }
02aa26ce 12554
79072805
LW
12555 return s;
12556}
12557
02aa26ce
NT
12558
12559/* scan_str
12560 takes: start position in buffer
09bef843
SB
12561 keep_quoted preserve \ on the embedded delimiter(s)
12562 keep_delims preserve the delimiters around the string
02aa26ce
NT
12563 returns: position to continue reading from buffer
12564 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12565 updates the read buffer.
12566
12567 This subroutine pulls a string out of the input. It is called for:
12568 q single quotes q(literal text)
12569 ' single quotes 'literal text'
12570 qq double quotes qq(interpolate $here please)
12571 " double quotes "interpolate $here please"
12572 qx backticks qx(/bin/ls -l)
12573 ` backticks `/bin/ls -l`
12574 qw quote words @EXPORT_OK = qw( func() $spam )
12575 m// regexp match m/this/
12576 s/// regexp substitute s/this/that/
12577 tr/// string transliterate tr/this/that/
12578 y/// string transliterate y/this/that/
12579 ($*@) sub prototypes sub foo ($)
09bef843 12580 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12581 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12582
12583 In most of these cases (all but <>, patterns and transliterate)
12584 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12585 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12586 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12587 calls scan_str().
4e553d73 12588
02aa26ce
NT
12589 It skips whitespace before the string starts, and treats the first
12590 character as the delimiter. If the delimiter is one of ([{< then
12591 the corresponding "close" character )]}> is used as the closing
12592 delimiter. It allows quoting of delimiters, and if the string has
12593 balanced delimiters ([{<>}]) it allows nesting.
12594
37fd879b
HS
12595 On success, the SV with the resulting string is put into lex_stuff or,
12596 if that is already non-NULL, into lex_repl. The second case occurs only
12597 when parsing the RHS of the special constructs s/// and tr/// (y///).
12598 For convenience, the terminating delimiter character is stuffed into
12599 SvIVX of the SV.
02aa26ce
NT
12600*/
12601
76e3520e 12602STATIC char *
09bef843 12603S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12604{
97aff369 12605 dVAR;
02aa26ce 12606 SV *sv; /* scalar value: string */
d3fcec1f 12607 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12608 register char *s = start; /* current position in the buffer */
12609 register char term; /* terminating character */
12610 register char *to; /* current position in the sv's data */
12611 I32 brackets = 1; /* bracket nesting level */
89491803 12612 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12613 I32 termcode; /* terminating char. code */
89ebb4a3 12614 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12615 STRLEN termlen; /* length of terminating string */
0331ef07 12616 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12617#ifdef PERL_MAD
12618 int stuffstart;
12619 char *tstart;
12620#endif
02aa26ce 12621
7918f24d
NC
12622 PERL_ARGS_ASSERT_SCAN_STR;
12623
02aa26ce 12624 /* skip space before the delimiter */
29595ff2
NC
12625 if (isSPACE(*s)) {
12626 s = PEEKSPACE(s);
12627 }
02aa26ce 12628
5db06880 12629#ifdef PERL_MAD
cd81e915
NC
12630 if (PL_realtokenstart >= 0) {
12631 stuffstart = PL_realtokenstart;
12632 PL_realtokenstart = -1;
5db06880
NC
12633 }
12634 else
12635 stuffstart = start - SvPVX(PL_linestr);
12636#endif
02aa26ce 12637 /* mark where we are, in case we need to report errors */
79072805 12638 CLINE;
02aa26ce
NT
12639
12640 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12641 term = *s;
220e2d4e
IH
12642 if (!UTF) {
12643 termcode = termstr[0] = term;
12644 termlen = 1;
12645 }
12646 else {
f3b9ce0f 12647 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12648 Copy(s, termstr, termlen, U8);
12649 if (!UTF8_IS_INVARIANT(term))
12650 has_utf8 = TRUE;
12651 }
b1c7b182 12652
02aa26ce 12653 /* mark where we are */
57843af0 12654 PL_multi_start = CopLINE(PL_curcop);
3280af22 12655 PL_multi_open = term;
02aa26ce
NT
12656
12657 /* find corresponding closing delimiter */
93a17b20 12658 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12659 termcode = termstr[0] = term = tmps[5];
12660
3280af22 12661 PL_multi_close = term;
79072805 12662
561b68a9
SH
12663 /* create a new SV to hold the contents. 79 is the SV's initial length.
12664 What a random number. */
7d0a29fe
NC
12665 sv = newSV_type(SVt_PVIV);
12666 SvGROW(sv, 80);
45977657 12667 SvIV_set(sv, termcode);
a0d0e21e 12668 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12669
12670 /* move past delimiter and try to read a complete string */
09bef843 12671 if (keep_delims)
220e2d4e
IH
12672 sv_catpvn(sv, s, termlen);
12673 s += termlen;
5db06880
NC
12674#ifdef PERL_MAD
12675 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12676 if (!PL_thisopen && !keep_delims) {
12677 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12678 stuffstart = s - SvPVX(PL_linestr);
12679 }
12680#endif
93a17b20 12681 for (;;) {
220e2d4e
IH
12682 if (PL_encoding && !UTF) {
12683 bool cont = TRUE;
12684
12685 while (cont) {
95a20fc0 12686 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12687 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12688 &offset, (char*)termstr, termlen);
6136c704
AL
12689 const char * const ns = SvPVX_const(PL_linestr) + offset;
12690 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12691
12692 for (; s < ns; s++) {
12693 if (*s == '\n' && !PL_rsfp)
12694 CopLINE_inc(PL_curcop);
12695 }
12696 if (!found)
12697 goto read_more_line;
12698 else {
12699 /* handle quoted delimiters */
52327caf 12700 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12701 const char *t;
95a20fc0 12702 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12703 t--;
12704 if ((svlast-1 - t) % 2) {
12705 if (!keep_quoted) {
12706 *(svlast-1) = term;
12707 *svlast = '\0';
12708 SvCUR_set(sv, SvCUR(sv) - 1);
12709 }
12710 continue;
12711 }
12712 }
12713 if (PL_multi_open == PL_multi_close) {
12714 cont = FALSE;
12715 }
12716 else {
f54cb97a
AL
12717 const char *t;
12718 char *w;
0331ef07 12719 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12720 /* At here, all closes are "was quoted" one,
12721 so we don't check PL_multi_close. */
12722 if (*t == '\\') {
12723 if (!keep_quoted && *(t+1) == PL_multi_open)
12724 t++;
12725 else
12726 *w++ = *t++;
12727 }
12728 else if (*t == PL_multi_open)
12729 brackets++;
12730
12731 *w = *t;
12732 }
12733 if (w < t) {
12734 *w++ = term;
12735 *w = '\0';
95a20fc0 12736 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12737 }
0331ef07 12738 last_off = w - SvPVX(sv);
220e2d4e
IH
12739 if (--brackets <= 0)
12740 cont = FALSE;
12741 }
12742 }
12743 }
12744 if (!keep_delims) {
12745 SvCUR_set(sv, SvCUR(sv) - 1);
12746 *SvEND(sv) = '\0';
12747 }
12748 break;
12749 }
12750
02aa26ce 12751 /* extend sv if need be */
3280af22 12752 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12753 /* set 'to' to the next character in the sv's string */
463ee0b2 12754 to = SvPVX(sv)+SvCUR(sv);
09bef843 12755
02aa26ce 12756 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12757 if (PL_multi_open == PL_multi_close) {
12758 for (; s < PL_bufend; s++,to++) {
02aa26ce 12759 /* embedded newlines increment the current line number */
3280af22 12760 if (*s == '\n' && !PL_rsfp)
57843af0 12761 CopLINE_inc(PL_curcop);
02aa26ce 12762 /* handle quoted delimiters */
3280af22 12763 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12764 if (!keep_quoted && s[1] == term)
a0d0e21e 12765 s++;
02aa26ce 12766 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12767 else
12768 *to++ = *s++;
12769 }
02aa26ce
NT
12770 /* terminate when run out of buffer (the for() condition), or
12771 have found the terminator */
220e2d4e
IH
12772 else if (*s == term) {
12773 if (termlen == 1)
12774 break;
f3b9ce0f 12775 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12776 break;
12777 }
63cd0674 12778 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12779 has_utf8 = TRUE;
93a17b20
LW
12780 *to = *s;
12781 }
12782 }
02aa26ce
NT
12783
12784 /* if the terminator isn't the same as the start character (e.g.,
12785 matched brackets), we have to allow more in the quoting, and
12786 be prepared for nested brackets.
12787 */
93a17b20 12788 else {
02aa26ce 12789 /* read until we run out of string, or we find the terminator */
3280af22 12790 for (; s < PL_bufend; s++,to++) {
02aa26ce 12791 /* embedded newlines increment the line count */
3280af22 12792 if (*s == '\n' && !PL_rsfp)
57843af0 12793 CopLINE_inc(PL_curcop);
02aa26ce 12794 /* backslashes can escape the open or closing characters */
3280af22 12795 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12796 if (!keep_quoted &&
12797 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12798 s++;
12799 else
12800 *to++ = *s++;
12801 }
02aa26ce 12802 /* allow nested opens and closes */
3280af22 12803 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12804 break;
3280af22 12805 else if (*s == PL_multi_open)
93a17b20 12806 brackets++;
63cd0674 12807 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12808 has_utf8 = TRUE;
93a17b20
LW
12809 *to = *s;
12810 }
12811 }
02aa26ce 12812 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12813 *to = '\0';
95a20fc0 12814 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12815
02aa26ce
NT
12816 /*
12817 * this next chunk reads more into the buffer if we're not done yet
12818 */
12819
b1c7b182
GS
12820 if (s < PL_bufend)
12821 break; /* handle case where we are done yet :-) */
79072805 12822
6a27c188 12823#ifndef PERL_STRICT_CR
95a20fc0 12824 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12825 if ((to[-2] == '\r' && to[-1] == '\n') ||
12826 (to[-2] == '\n' && to[-1] == '\r'))
12827 {
f63a84b2
LW
12828 to[-2] = '\n';
12829 to--;
95a20fc0 12830 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12831 }
12832 else if (to[-1] == '\r')
12833 to[-1] = '\n';
12834 }
95a20fc0 12835 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12836 to[-1] = '\n';
12837#endif
12838
220e2d4e 12839 read_more_line:
02aa26ce
NT
12840 /* if we're out of file, or a read fails, bail and reset the current
12841 line marker so we can report where the unterminated string began
12842 */
5db06880
NC
12843#ifdef PERL_MAD
12844 if (PL_madskills) {
c35e046a 12845 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12846 if (PL_thisstuff)
12847 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12848 else
cd81e915 12849 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12850 }
12851#endif
f0e67a1d
Z
12852 CopLINE_inc(PL_curcop);
12853 PL_bufptr = PL_bufend;
12854 if (!lex_next_chunk(0)) {
c07a80fd 12855 sv_free(sv);
eb160463 12856 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12857 return NULL;
79072805 12858 }
f0e67a1d 12859 s = PL_bufptr;
5db06880
NC
12860#ifdef PERL_MAD
12861 stuffstart = 0;
12862#endif
378cc40b 12863 }
4e553d73 12864
02aa26ce
NT
12865 /* at this point, we have successfully read the delimited string */
12866
220e2d4e 12867 if (!PL_encoding || UTF) {
5db06880
NC
12868#ifdef PERL_MAD
12869 if (PL_madskills) {
c35e046a 12870 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12871 const int len = s - tstart;
cd81e915 12872 if (PL_thisstuff)
c35e046a 12873 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12874 else
c35e046a 12875 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12876 if (!PL_thisclose && !keep_delims)
12877 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12878 }
12879#endif
12880
220e2d4e
IH
12881 if (keep_delims)
12882 sv_catpvn(sv, s, termlen);
12883 s += termlen;
12884 }
5db06880
NC
12885#ifdef PERL_MAD
12886 else {
12887 if (PL_madskills) {
c35e046a
AL
12888 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12889 const int len = s - tstart - termlen;
cd81e915 12890 if (PL_thisstuff)
c35e046a 12891 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12892 else
c35e046a 12893 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12894 if (!PL_thisclose && !keep_delims)
12895 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12896 }
12897 }
12898#endif
220e2d4e 12899 if (has_utf8 || PL_encoding)
b1c7b182 12900 SvUTF8_on(sv);
d0063567 12901
57843af0 12902 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12903
12904 /* if we allocated too much space, give some back */
93a17b20
LW
12905 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12906 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12907 SvPV_renew(sv, SvLEN(sv));
79072805 12908 }
02aa26ce
NT
12909
12910 /* decide whether this is the first or second quoted string we've read
12911 for this op
12912 */
4e553d73 12913
3280af22
NIS
12914 if (PL_lex_stuff)
12915 PL_lex_repl = sv;
79072805 12916 else
3280af22 12917 PL_lex_stuff = sv;
378cc40b
LW
12918 return s;
12919}
12920
02aa26ce
NT
12921/*
12922 scan_num
12923 takes: pointer to position in buffer
12924 returns: pointer to new position in buffer
6154021b 12925 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12926
12927 Read a number in any of the formats that Perl accepts:
12928
7fd134d9
JH
12929 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12930 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12931 0b[01](_?[01])*
12932 0[0-7](_?[0-7])*
12933 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12934
3280af22 12935 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12936 thing it reads.
12937
12938 If it reads a number without a decimal point or an exponent, it will
12939 try converting the number to an integer and see if it can do so
12940 without loss of precision.
12941*/
4e553d73 12942
378cc40b 12943char *
bfed75c6 12944Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12945{
97aff369 12946 dVAR;
bfed75c6 12947 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12948 register char *d; /* destination in temp buffer */
12949 register char *e; /* end of temp buffer */
86554af2 12950 NV nv; /* number read, as a double */
a0714e2c 12951 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12952 bool floatit; /* boolean: int or float? */
cbbf8932 12953 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12954 static char const number_too_long[] = "Number too long";
378cc40b 12955
7918f24d
NC
12956 PERL_ARGS_ASSERT_SCAN_NUM;
12957
02aa26ce
NT
12958 /* We use the first character to decide what type of number this is */
12959
378cc40b 12960 switch (*s) {
79072805 12961 default:
cea2e8a9 12962 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12963
02aa26ce 12964 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12965 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12966 case '0':
12967 {
02aa26ce
NT
12968 /* variables:
12969 u holds the "number so far"
4f19785b
WSI
12970 shift the power of 2 of the base
12971 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12972 overflowed was the number more than we can hold?
12973
12974 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12975 we in octal/hex/binary?" indicator to disallow hex characters
12976 when in octal mode.
02aa26ce 12977 */
9e24b6e2
JH
12978 NV n = 0.0;
12979 UV u = 0;
79072805 12980 I32 shift;
9e24b6e2 12981 bool overflowed = FALSE;
61f33854 12982 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12983 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12984 static const char* const bases[5] =
12985 { "", "binary", "", "octal", "hexadecimal" };
12986 static const char* const Bases[5] =
12987 { "", "Binary", "", "Octal", "Hexadecimal" };
12988 static const char* const maxima[5] =
12989 { "",
12990 "0b11111111111111111111111111111111",
12991 "",
12992 "037777777777",
12993 "0xffffffff" };
bfed75c6 12994 const char *base, *Base, *max;
378cc40b 12995
02aa26ce 12996 /* check for hex */
378cc40b
LW
12997 if (s[1] == 'x') {
12998 shift = 4;
12999 s += 2;
61f33854 13000 just_zero = FALSE;
4f19785b
WSI
13001 } else if (s[1] == 'b') {
13002 shift = 1;
13003 s += 2;
61f33854 13004 just_zero = FALSE;
378cc40b 13005 }
02aa26ce 13006 /* check for a decimal in disguise */
b78218b7 13007 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13008 goto decimal;
02aa26ce 13009 /* so it must be octal */
928753ea 13010 else {
378cc40b 13011 shift = 3;
928753ea
JH
13012 s++;
13013 }
13014
13015 if (*s == '_') {
a2a5de95 13016 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13017 "Misplaced _ in number");
13018 lastub = s++;
13019 }
9e24b6e2
JH
13020
13021 base = bases[shift];
13022 Base = Bases[shift];
13023 max = maxima[shift];
02aa26ce 13024
4f19785b 13025 /* read the rest of the number */
378cc40b 13026 for (;;) {
9e24b6e2 13027 /* x is used in the overflow test,
893fe2c2 13028 b is the digit we're adding on. */
9e24b6e2 13029 UV x, b;
55497cff 13030
378cc40b 13031 switch (*s) {
02aa26ce
NT
13032
13033 /* if we don't mention it, we're done */
378cc40b
LW
13034 default:
13035 goto out;
02aa26ce 13036
928753ea 13037 /* _ are ignored -- but warned about if consecutive */
de3bb511 13038 case '_':
a2a5de95
NC
13039 if (lastub && s == lastub + 1)
13040 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13041 "Misplaced _ in number");
928753ea 13042 lastub = s++;
de3bb511 13043 break;
02aa26ce
NT
13044
13045 /* 8 and 9 are not octal */
378cc40b 13046 case '8': case '9':
4f19785b 13047 if (shift == 3)
cea2e8a9 13048 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13049 /* FALL THROUGH */
02aa26ce
NT
13050
13051 /* octal digits */
4f19785b 13052 case '2': case '3': case '4':
378cc40b 13053 case '5': case '6': case '7':
4f19785b 13054 if (shift == 1)
cea2e8a9 13055 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13056 /* FALL THROUGH */
13057
13058 case '0': case '1':
02aa26ce 13059 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13060 goto digit;
02aa26ce
NT
13061
13062 /* hex digits */
378cc40b
LW
13063 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13064 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13065 /* make sure they said 0x */
378cc40b
LW
13066 if (shift != 4)
13067 goto out;
55497cff 13068 b = (*s++ & 7) + 9;
02aa26ce
NT
13069
13070 /* Prepare to put the digit we have onto the end
13071 of the number so far. We check for overflows.
13072 */
13073
55497cff 13074 digit:
61f33854 13075 just_zero = FALSE;
9e24b6e2
JH
13076 if (!overflowed) {
13077 x = u << shift; /* make room for the digit */
13078
13079 if ((x >> shift) != u
13080 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13081 overflowed = TRUE;
13082 n = (NV) u;
9b387841
NC
13083 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13084 "Integer overflow in %s number",
13085 base);
9e24b6e2
JH
13086 } else
13087 u = x | b; /* add the digit to the end */
13088 }
13089 if (overflowed) {
13090 n *= nvshift[shift];
13091 /* If an NV has not enough bits in its
13092 * mantissa to represent an UV this summing of
13093 * small low-order numbers is a waste of time
13094 * (because the NV cannot preserve the
13095 * low-order bits anyway): we could just
13096 * remember when did we overflow and in the
13097 * end just multiply n by the right
13098 * amount. */
13099 n += (NV) b;
55497cff 13100 }
378cc40b
LW
13101 break;
13102 }
13103 }
02aa26ce
NT
13104
13105 /* if we get here, we had success: make a scalar value from
13106 the number.
13107 */
378cc40b 13108 out:
928753ea
JH
13109
13110 /* final misplaced underbar check */
13111 if (s[-1] == '_') {
a2a5de95 13112 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13113 }
13114
561b68a9 13115 sv = newSV(0);
9e24b6e2 13116 if (overflowed) {
a2a5de95
NC
13117 if (n > 4294967295.0)
13118 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13119 "%s number > %s non-portable",
13120 Base, max);
9e24b6e2
JH
13121 sv_setnv(sv, n);
13122 }
13123 else {
15041a67 13124#if UVSIZE > 4
a2a5de95
NC
13125 if (u > 0xffffffff)
13126 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13127 "%s number > %s non-portable",
13128 Base, max);
2cc4c2dc 13129#endif
9e24b6e2
JH
13130 sv_setuv(sv, u);
13131 }
61f33854 13132 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13133 sv = new_constant(start, s - start, "integer",
eb0d8d16 13134 sv, NULL, NULL, 0);
61f33854 13135 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13136 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13137 }
13138 break;
02aa26ce
NT
13139
13140 /*
13141 handle decimal numbers.
13142 we're also sent here when we read a 0 as the first digit
13143 */
378cc40b
LW
13144 case '1': case '2': case '3': case '4': case '5':
13145 case '6': case '7': case '8': case '9': case '.':
13146 decimal:
3280af22
NIS
13147 d = PL_tokenbuf;
13148 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13149 floatit = FALSE;
02aa26ce
NT
13150
13151 /* read next group of digits and _ and copy into d */
de3bb511 13152 while (isDIGIT(*s) || *s == '_') {
4e553d73 13153 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13154 if -w is on
13155 */
93a17b20 13156 if (*s == '_') {
a2a5de95
NC
13157 if (lastub && s == lastub + 1)
13158 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13159 "Misplaced _ in number");
928753ea 13160 lastub = s++;
93a17b20 13161 }
fc36a67e 13162 else {
02aa26ce 13163 /* check for end of fixed-length buffer */
fc36a67e 13164 if (d >= e)
cea2e8a9 13165 Perl_croak(aTHX_ number_too_long);
02aa26ce 13166 /* if we're ok, copy the character */
378cc40b 13167 *d++ = *s++;
fc36a67e 13168 }
378cc40b 13169 }
02aa26ce
NT
13170
13171 /* final misplaced underbar check */
928753ea 13172 if (lastub && s == lastub + 1) {
a2a5de95 13173 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13174 }
02aa26ce
NT
13175
13176 /* read a decimal portion if there is one. avoid
13177 3..5 being interpreted as the number 3. followed
13178 by .5
13179 */
2f3197b3 13180 if (*s == '.' && s[1] != '.') {
79072805 13181 floatit = TRUE;
378cc40b 13182 *d++ = *s++;
02aa26ce 13183
928753ea 13184 if (*s == '_') {
a2a5de95
NC
13185 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13186 "Misplaced _ in number");
928753ea
JH
13187 lastub = s;
13188 }
13189
13190 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13191 */
fc36a67e 13192 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13193 /* fixed length buffer check */
fc36a67e 13194 if (d >= e)
cea2e8a9 13195 Perl_croak(aTHX_ number_too_long);
928753ea 13196 if (*s == '_') {
a2a5de95
NC
13197 if (lastub && s == lastub + 1)
13198 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13199 "Misplaced _ in number");
928753ea
JH
13200 lastub = s;
13201 }
13202 else
fc36a67e 13203 *d++ = *s;
378cc40b 13204 }
928753ea
JH
13205 /* fractional part ending in underbar? */
13206 if (s[-1] == '_') {
a2a5de95
NC
13207 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13208 "Misplaced _ in number");
928753ea 13209 }
dd629d5b
GS
13210 if (*s == '.' && isDIGIT(s[1])) {
13211 /* oops, it's really a v-string, but without the "v" */
f4758303 13212 s = start;
dd629d5b
GS
13213 goto vstring;
13214 }
378cc40b 13215 }
02aa26ce
NT
13216
13217 /* read exponent part, if present */
3792a11b 13218 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13219 floatit = TRUE;
13220 s++;
02aa26ce
NT
13221
13222 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13223 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13224
7fd134d9
JH
13225 /* stray preinitial _ */
13226 if (*s == '_') {
a2a5de95
NC
13227 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13228 "Misplaced _ in number");
7fd134d9
JH
13229 lastub = s++;
13230 }
13231
02aa26ce 13232 /* allow positive or negative exponent */
378cc40b
LW
13233 if (*s == '+' || *s == '-')
13234 *d++ = *s++;
02aa26ce 13235
7fd134d9
JH
13236 /* stray initial _ */
13237 if (*s == '_') {
a2a5de95
NC
13238 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13239 "Misplaced _ in number");
7fd134d9
JH
13240 lastub = s++;
13241 }
13242
7fd134d9
JH
13243 /* read digits of exponent */
13244 while (isDIGIT(*s) || *s == '_') {
13245 if (isDIGIT(*s)) {
13246 if (d >= e)
13247 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13248 *d++ = *s++;
7fd134d9
JH
13249 }
13250 else {
041457d9 13251 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13252 (!isDIGIT(s[1]) && s[1] != '_')))
13253 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13254 "Misplaced _ in number");
b3b48e3e 13255 lastub = s++;
7fd134d9 13256 }
7fd134d9 13257 }
378cc40b 13258 }
02aa26ce 13259
02aa26ce
NT
13260
13261 /* make an sv from the string */
561b68a9 13262 sv = newSV(0);
097ee67d 13263
0b7fceb9 13264 /*
58bb9ec3
NC
13265 We try to do an integer conversion first if no characters
13266 indicating "float" have been found.
0b7fceb9
MU
13267 */
13268
13269 if (!floatit) {
58bb9ec3 13270 UV uv;
6136c704 13271 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13272
13273 if (flags == IS_NUMBER_IN_UV) {
13274 if (uv <= IV_MAX)
86554af2 13275 sv_setiv(sv, uv); /* Prefer IVs over UVs. */
58bb9ec3 13276 else
c239479b 13277 sv_setuv(sv, uv);
58bb9ec3
NC
13278 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13279 if (uv <= (UV) IV_MIN)
13280 sv_setiv(sv, -(IV)uv);
13281 else
13282 floatit = TRUE;
13283 } else
13284 floatit = TRUE;
13285 }
0b7fceb9 13286 if (floatit) {
58bb9ec3
NC
13287 /* terminate the string */
13288 *d = '\0';
86554af2
JH
13289 nv = Atof(PL_tokenbuf);
13290 sv_setnv(sv, nv);
13291 }
86554af2 13292
eb0d8d16
NC
13293 if ( floatit
13294 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13295 const char *const key = floatit ? "float" : "integer";
13296 const STRLEN keylen = floatit ? 5 : 7;
13297 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13298 key, keylen, sv, NULL, NULL, 0);
13299 }
378cc40b 13300 break;
0b7fceb9 13301
e312add1 13302 /* if it starts with a v, it could be a v-string */
a7cb1f99 13303 case 'v':
dd629d5b 13304vstring:
561b68a9 13305 sv = newSV(5); /* preallocate storage space */
65b06e02 13306 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13307 break;
79072805 13308 }
a687059c 13309
02aa26ce
NT
13310 /* make the op for the constant and return */
13311
a86a20aa 13312 if (sv)
b73d6f50 13313 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13314 else
5f66b61c 13315 lvalp->opval = NULL;
a687059c 13316
73d840c0 13317 return (char *)s;
378cc40b
LW
13318}
13319
76e3520e 13320STATIC char *
cea2e8a9 13321S_scan_formline(pTHX_ register char *s)
378cc40b 13322{
97aff369 13323 dVAR;
79072805 13324 register char *eol;
378cc40b 13325 register char *t;
6136c704 13326 SV * const stuff = newSVpvs("");
79072805 13327 bool needargs = FALSE;
c5ee2135 13328 bool eofmt = FALSE;
5db06880
NC
13329#ifdef PERL_MAD
13330 char *tokenstart = s;
4f61fd4b
JC
13331 SV* savewhite = NULL;
13332
5db06880 13333 if (PL_madskills) {
cd81e915
NC
13334 savewhite = PL_thiswhite;
13335 PL_thiswhite = 0;
5db06880
NC
13336 }
13337#endif
378cc40b 13338
7918f24d
NC
13339 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13340
79072805 13341 while (!needargs) {
a1b95068 13342 if (*s == '.') {
c35e046a 13343 t = s+1;
51882d45 13344#ifdef PERL_STRICT_CR
c35e046a
AL
13345 while (SPACE_OR_TAB(*t))
13346 t++;
51882d45 13347#else
c35e046a
AL
13348 while (SPACE_OR_TAB(*t) || *t == '\r')
13349 t++;
51882d45 13350#endif
c5ee2135
WL
13351 if (*t == '\n' || t == PL_bufend) {
13352 eofmt = TRUE;
79072805 13353 break;
c5ee2135 13354 }
79072805 13355 }
3280af22 13356 if (PL_in_eval && !PL_rsfp) {
07409e01 13357 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13358 if (!eol++)
3280af22 13359 eol = PL_bufend;
0f85fab0
LW
13360 }
13361 else
3280af22 13362 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13363 if (*s != '#') {
a0d0e21e
LW
13364 for (t = s; t < eol; t++) {
13365 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13366 needargs = FALSE;
13367 goto enough; /* ~~ must be first line in formline */
378cc40b 13368 }
a0d0e21e
LW
13369 if (*t == '@' || *t == '^')
13370 needargs = TRUE;
378cc40b 13371 }
7121b347
MG
13372 if (eol > s) {
13373 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13374#ifndef PERL_STRICT_CR
7121b347
MG
13375 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13376 char *end = SvPVX(stuff) + SvCUR(stuff);
13377 end[-2] = '\n';
13378 end[-1] = '\0';
b162af07 13379 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13380 }
2dc4c65b 13381#endif
7121b347
MG
13382 }
13383 else
13384 break;
79072805 13385 }
95a20fc0 13386 s = (char*)eol;
3280af22 13387 if (PL_rsfp) {
f0e67a1d 13388 bool got_some;
5db06880
NC
13389#ifdef PERL_MAD
13390 if (PL_madskills) {
cd81e915
NC
13391 if (PL_thistoken)
13392 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13393 else
cd81e915 13394 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13395 }
13396#endif
f0e67a1d
Z
13397 PL_bufptr = PL_bufend;
13398 CopLINE_inc(PL_curcop);
13399 got_some = lex_next_chunk(0);
13400 CopLINE_dec(PL_curcop);
13401 s = PL_bufptr;
5db06880 13402#ifdef PERL_MAD
f0e67a1d 13403 tokenstart = PL_bufptr;
5db06880 13404#endif
f0e67a1d 13405 if (!got_some)
378cc40b 13406 break;
378cc40b 13407 }
463ee0b2 13408 incline(s);
79072805 13409 }
a0d0e21e
LW
13410 enough:
13411 if (SvCUR(stuff)) {
3280af22 13412 PL_expect = XTERM;
79072805 13413 if (needargs) {
3280af22 13414 PL_lex_state = LEX_NORMAL;
cd81e915 13415 start_force(PL_curforce);
9ded7720 13416 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13417 force_next(',');
13418 }
a0d0e21e 13419 else
3280af22 13420 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13421 if (!IN_BYTES) {
95a20fc0 13422 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13423 SvUTF8_on(stuff);
13424 else if (PL_encoding)
13425 sv_recode_to_utf8(stuff, PL_encoding);
13426 }
cd81e915 13427 start_force(PL_curforce);
9ded7720 13428 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13429 force_next(THING);
cd81e915 13430 start_force(PL_curforce);
9ded7720 13431 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13432 force_next(LSTOP);
378cc40b 13433 }
79072805 13434 else {
8990e307 13435 SvREFCNT_dec(stuff);
c5ee2135
WL
13436 if (eofmt)
13437 PL_lex_formbrack = 0;
3280af22 13438 PL_bufptr = s;
79072805 13439 }
5db06880
NC
13440#ifdef PERL_MAD
13441 if (PL_madskills) {
cd81e915
NC
13442 if (PL_thistoken)
13443 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13444 else
cd81e915
NC
13445 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13446 PL_thiswhite = savewhite;
5db06880
NC
13447 }
13448#endif
79072805 13449 return s;
378cc40b 13450}
a687059c 13451
ba6d6ac9 13452I32
864dbfa3 13453Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13454{
97aff369 13455 dVAR;
a3b680e6 13456 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13457 CV* const outsidecv = PL_compcv;
8990e307 13458
3280af22
NIS
13459 if (PL_compcv) {
13460 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13461 }
7766f137 13462 SAVEI32(PL_subline);
3280af22 13463 save_item(PL_subname);
3280af22 13464 SAVESPTR(PL_compcv);
3280af22 13465
ea726b52 13466 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13467 CvFLAGS(PL_compcv) |= flags;
13468
57843af0 13469 PL_subline = CopLINE(PL_curcop);
dd2155a4 13470 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13471 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13472 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13473
8990e307
LW
13474 return oldsavestack_ix;
13475}
13476
084592ab
CN
13477#ifdef __SC__
13478#pragma segment Perl_yylex
13479#endif
af41e527
NC
13480static int
13481S_yywarn(pTHX_ const char *const s)
8990e307 13482{
97aff369 13483 dVAR;
7918f24d
NC
13484
13485 PERL_ARGS_ASSERT_YYWARN;
13486
faef0170 13487 PL_in_eval |= EVAL_WARNONLY;
748a9306 13488 yyerror(s);
faef0170 13489 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13490 return 0;
8990e307
LW
13491}
13492
13493int
15f169a1 13494Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13495{
97aff369 13496 dVAR;
bfed75c6
AL
13497 const char *where = NULL;
13498 const char *context = NULL;
68dc0745 13499 int contlen = -1;
46fc3d4c 13500 SV *msg;
5912531f 13501 int yychar = PL_parser->yychar;
463ee0b2 13502
7918f24d
NC
13503 PERL_ARGS_ASSERT_YYERROR;
13504
3280af22 13505 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13506 where = "at EOF";
8bcfe651
TM
13507 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13508 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13509 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13510 /*
13511 Only for NetWare:
13512 The code below is removed for NetWare because it abends/crashes on NetWare
13513 when the script has error such as not having the closing quotes like:
13514 if ($var eq "value)
13515 Checking of white spaces is anyway done in NetWare code.
13516 */
13517#ifndef NETWARE
3280af22
NIS
13518 while (isSPACE(*PL_oldoldbufptr))
13519 PL_oldoldbufptr++;
f355267c 13520#endif
3280af22
NIS
13521 context = PL_oldoldbufptr;
13522 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13523 }
8bcfe651
TM
13524 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13525 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13526 /*
13527 Only for NetWare:
13528 The code below is removed for NetWare because it abends/crashes on NetWare
13529 when the script has error such as not having the closing quotes like:
13530 if ($var eq "value)
13531 Checking of white spaces is anyway done in NetWare code.
13532 */
13533#ifndef NETWARE
3280af22
NIS
13534 while (isSPACE(*PL_oldbufptr))
13535 PL_oldbufptr++;
f355267c 13536#endif
3280af22
NIS
13537 context = PL_oldbufptr;
13538 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13539 }
13540 else if (yychar > 255)
68dc0745 13541 where = "next token ???";
12fbd33b 13542 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13543 if (PL_lex_state == LEX_NORMAL ||
13544 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13545 where = "at end of line";
3280af22 13546 else if (PL_lex_inpat)
68dc0745 13547 where = "within pattern";
463ee0b2 13548 else
68dc0745 13549 where = "within string";
463ee0b2 13550 }
46fc3d4c 13551 else {
84bafc02 13552 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13553 if (yychar < 32)
cea2e8a9 13554 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13555 else if (isPRINT_LC(yychar)) {
88c9ea1e 13556 const char string = yychar;
5e7aa789
NC
13557 sv_catpvn(where_sv, &string, 1);
13558 }
463ee0b2 13559 else
cea2e8a9 13560 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13561 where = SvPVX_const(where_sv);
463ee0b2 13562 }
46fc3d4c 13563 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13564 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13565 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13566 if (context)
cea2e8a9 13567 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13568 else
cea2e8a9 13569 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13570 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13571 Perl_sv_catpvf(aTHX_ msg,
57def98f 13572 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13573 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13574 PL_multi_end = 0;
a0d0e21e 13575 }
500960a6 13576 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13577 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13578 }
463ee0b2 13579 else
5a844595 13580 qerror(msg);
c7d6bfb2
GS
13581 if (PL_error_count >= 10) {
13582 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13583 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13584 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13585 else
13586 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13587 OutCopFILE(PL_curcop));
c7d6bfb2 13588 }
3280af22 13589 PL_in_my = 0;
5c284bb0 13590 PL_in_my_stash = NULL;
463ee0b2
LW
13591 return 0;
13592}
084592ab
CN
13593#ifdef __SC__
13594#pragma segment Main
13595#endif
4e35701f 13596
b250498f 13597STATIC char*
3ae08724 13598S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13599{
97aff369 13600 dVAR;
f54cb97a 13601 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13602
13603 PERL_ARGS_ASSERT_SWALLOW_BOM;
13604
7aa207d6 13605 switch (s[0]) {
4e553d73
NIS
13606 case 0xFF:
13607 if (s[1] == 0xFE) {
ee6ba15d 13608 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13609 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13610 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13611#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13612 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13613 s += 2;
dea0fc0b 13614 if (PL_bufend > (char*)s) {
81a923f4 13615 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13616 }
b250498f 13617#else
ee6ba15d 13618 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13619#endif
01ec43d0
GS
13620 }
13621 break;
78ae23f5 13622 case 0xFE:
7aa207d6 13623 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13624#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13625 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13626 s += 2;
13627 if (PL_bufend > (char *)s) {
81a923f4 13628 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13629 }
b250498f 13630#else
ee6ba15d 13631 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13632#endif
01ec43d0
GS
13633 }
13634 break;
3ae08724
GS
13635 case 0xEF:
13636 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13637 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13638 s += 3; /* UTF-8 */
13639 }
13640 break;
13641 case 0:
7aa207d6
JH
13642 if (slen > 3) {
13643 if (s[1] == 0) {
13644 if (s[2] == 0xFE && s[3] == 0xFF) {
13645 /* UTF-32 big-endian */
ee6ba15d 13646 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13647 }
13648 }
13649 else if (s[2] == 0 && s[3] != 0) {
13650 /* Leading bytes
13651 * 00 xx 00 xx
13652 * are a good indicator of UTF-16BE. */
ee6ba15d 13653#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13654 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13655 s = add_utf16_textfilter(s, FALSE);
13656#else
13657 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13658#endif
7aa207d6 13659 }
01ec43d0 13660 }
e294cc5d
JH
13661#ifdef EBCDIC
13662 case 0xDD:
13663 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13664 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13665 s += 4; /* UTF-8 */
13666 }
13667 break;
13668#endif
13669
7aa207d6
JH
13670 default:
13671 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13672 /* Leading bytes
13673 * xx 00 xx 00
13674 * are a good indicator of UTF-16LE. */
ee6ba15d 13675#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13676 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13677 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13678#else
13679 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13680#endif
7aa207d6 13681 }
01ec43d0 13682 }
b8f84bb2 13683 return (char*)s;
b250498f 13684}
4755096e 13685
6e3aabd6
GS
13686
13687#ifndef PERL_NO_UTF16_FILTER
13688static I32
a28af015 13689S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13690{
97aff369 13691 dVAR;
f3040f2c 13692 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13693 /* We re-use this each time round, throwing the contents away before we
13694 return. */
2a773401 13695 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13696 SV *const utf8_buffer = filter;
c28d6105 13697 IV status = IoPAGE(filter);
eda4663d 13698 const bool reverse = (bool) IoLINES(filter);
d2d1d4de 13699 I32 retval;
c8b0cbae
NC
13700
13701 /* As we're automatically added, at the lowest level, and hence only called
13702 from this file, we can be sure that we're not called in block mode. Hence
13703 don't bother writing code to deal with block mode. */
13704 if (maxlen) {
13705 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13706 }
c28d6105
NC
13707 if (status < 0) {
13708 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13709 }
1de9afcd 13710 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13711 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13712 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13713 reverse ? 'l' : 'b', idx, maxlen, status,
13714 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13715
13716 while (1) {
13717 STRLEN chars;
13718 STRLEN have;
dea0fc0b 13719 I32 newlen;
2a773401 13720 U8 *end;
c28d6105
NC
13721 /* First, look in our buffer of existing UTF-8 data: */
13722 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13723
13724 if (nl) {
13725 ++nl;
13726 } else if (status == 0) {
13727 /* EOF */
13728 IoPAGE(filter) = 0;
13729 nl = SvEND(utf8_buffer);
13730 }
13731 if (nl) {
d2d1d4de
NC
13732 STRLEN got = nl - SvPVX(utf8_buffer);
13733 /* Did we have anything to append? */
13734 retval = got != 0;
13735 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13736 /* Everything else in this code works just fine if SVp_POK isn't
13737 set. This, however, needs it, and we need it to work, else
13738 we loop infinitely because the buffer is never consumed. */
13739 sv_chop(utf8_buffer, nl);
13740 break;
13741 }
ba77e4cc 13742
c28d6105
NC
13743 /* OK, not a complete line there, so need to read some more UTF-16.
13744 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13745 while (1) {
13746 if (status <= 0)
13747 break;
13748 if (SvCUR(utf16_buffer) >= 2) {
13749 /* Location of the high octet of the last complete code point.
13750 Gosh, UTF-16 is a pain. All the benefits of variable length,
13751 *coupled* with all the benefits of partial reads and
13752 endianness. */
13753 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13754 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13755
13756 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13757 break;
13758 }
13759
13760 /* We have the first half of a surrogate. Read more. */
13761 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13762 }
c28d6105 13763
c28d6105
NC
13764 status = FILTER_READ(idx + 1, utf16_buffer,
13765 160 + (SvCUR(utf16_buffer) & 1));
13766 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13767 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13768 if (status < 0) {
13769 /* Error */
13770 IoPAGE(filter) = status;
13771 return status;
13772 }
13773 }
13774
13775 chars = SvCUR(utf16_buffer) >> 1;
13776 have = SvCUR(utf8_buffer);
13777 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13778
aa6dbd60 13779 if (reverse) {
c28d6105
NC
13780 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13781 (U8*)SvPVX_const(utf8_buffer) + have,
13782 chars * 2, &newlen);
aa6dbd60 13783 } else {
2a773401 13784 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13785 (U8*)SvPVX_const(utf8_buffer) + have,
13786 chars * 2, &newlen);
2a773401 13787 }
c28d6105 13788 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13789 *end = '\0';
c28d6105 13790
e07286ed
NC
13791 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13792 it's private to us, and utf16_to_utf8{,reversed} take a
13793 (pointer,length) pair, rather than a NUL-terminated string. */
13794 if(SvCUR(utf16_buffer) & 1) {
13795 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13796 SvCUR_set(utf16_buffer, 1);
13797 } else {
13798 SvCUR_set(utf16_buffer, 0);
13799 }
2a773401 13800 }
c28d6105
NC
13801 DEBUG_P(PerlIO_printf(Perl_debug_log,
13802 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13803 status,
13804 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13805 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13806 return retval;
6e3aabd6 13807}
81a923f4
NC
13808
13809static U8 *
13810S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13811{
2a773401 13812 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13813
c28d6105 13814 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13815 sv_setpvs(filter, "");
2a773401 13816 IoLINES(filter) = reversed;
c28d6105
NC
13817 IoPAGE(filter) = 1; /* Not EOF */
13818
13819 /* Sadly, we have to return a valid pointer, come what may, so we have to
13820 ignore any error return from this. */
13821 SvCUR_set(PL_linestr, 0);
13822 if (FILTER_READ(0, PL_linestr, 0)) {
13823 SvUTF8_on(PL_linestr);
81a923f4 13824 } else {
c28d6105 13825 SvUTF8_on(PL_linestr);
81a923f4 13826 }
c28d6105 13827 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13828 return (U8*)SvPVX(PL_linestr);
13829}
6e3aabd6 13830#endif
9f4817db 13831
f333445c
JP
13832/*
13833Returns a pointer to the next character after the parsed
13834vstring, as well as updating the passed in sv.
13835
13836Function must be called like
13837
561b68a9 13838 sv = newSV(5);
65b06e02 13839 s = scan_vstring(s,e,sv);
f333445c 13840
65b06e02 13841where s and e are the start and end of the string.
f333445c
JP
13842The sv should already be large enough to store the vstring
13843passed in, for performance reasons.
13844
13845*/
13846
13847char *
15f169a1 13848Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13849{
97aff369 13850 dVAR;
bfed75c6
AL
13851 const char *pos = s;
13852 const char *start = s;
7918f24d
NC
13853
13854 PERL_ARGS_ASSERT_SCAN_VSTRING;
13855
f333445c 13856 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13857 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13858 pos++;
f333445c
JP
13859 if ( *pos != '.') {
13860 /* this may not be a v-string if followed by => */
bfed75c6 13861 const char *next = pos;
65b06e02 13862 while (next < e && isSPACE(*next))
8fc7bb1c 13863 ++next;
65b06e02 13864 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13865 /* return string not v-string */
13866 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13867 return (char *)pos;
f333445c
JP
13868 }
13869 }
13870
13871 if (!isALPHA(*pos)) {
89ebb4a3 13872 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13873
d4c19fe8
AL
13874 if (*s == 'v')
13875 s++; /* get past 'v' */
f333445c 13876
76f68e9b 13877 sv_setpvs(sv, "");
f333445c
JP
13878
13879 for (;;) {
d4c19fe8 13880 /* this is atoi() that tolerates underscores */
0bd48802
AL
13881 U8 *tmpend;
13882 UV rev = 0;
d4c19fe8
AL
13883 const char *end = pos;
13884 UV mult = 1;
13885 while (--end >= s) {
13886 if (*end != '_') {
13887 const UV orev = rev;
f333445c
JP
13888 rev += (*end - '0') * mult;
13889 mult *= 10;
9b387841
NC
13890 if (orev > rev)
13891 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13892 "Integer overflow in decimal number");
f333445c
JP
13893 }
13894 }
13895#ifdef EBCDIC
13896 if (rev > 0x7FFFFFFF)
13897 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13898#endif
13899 /* Append native character for the rev point */
13900 tmpend = uvchr_to_utf8(tmpbuf, rev);
13901 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13902 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13903 SvUTF8_on(sv);
65b06e02 13904 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13905 s = ++pos;
13906 else {
13907 s = pos;
13908 break;
13909 }
65b06e02 13910 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13911 pos++;
13912 }
13913 SvPOK_on(sv);
13914 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13915 SvRMAGICAL_on(sv);
13916 }
73d840c0 13917 return (char *)s;
f333445c
JP
13918}
13919
88e1f1a2
JV
13920int
13921Perl_keyword_plugin_standard(pTHX_
13922 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13923{
13924 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13925 PERL_UNUSED_CONTEXT;
13926 PERL_UNUSED_ARG(keyword_ptr);
13927 PERL_UNUSED_ARG(keyword_len);
13928 PERL_UNUSED_ARG(op_ptr);
13929 return KEYWORD_PLUGIN_DECLINE;
13930}
13931
1da4ca5f
NC
13932/*
13933 * Local variables:
13934 * c-indentation-style: bsd
13935 * c-basic-offset: 4
13936 * indent-tabs-mode: t
13937 * End:
13938 *
37442d52
RGS
13939 * ex: set ts=8 sts=4 sw=4 noet:
13940 */