This is a live mirror of the Perl 5 development currently hosted at https://github.com/perl/perl5
perl5133delta: Expand on "File::Copy skips suid tests on a nosuid partition"
[perl5.git] / toke.c
CommitLineData
a0d0e21e 1/* toke.c
a687059c 2 *
1129b882
NC
3 * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
4 * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
a687059c 5 *
d48672a2
LW
6 * You may distribute under the terms of either the GNU General Public
7 * License or the Artistic License, as specified in the README file.
378cc40b 8 *
a0d0e21e
LW
9 */
10
11/*
4ac71550
TC
12 * 'It all comes from here, the stench and the peril.' --Frodo
13 *
14 * [p.719 of _The Lord of the Rings_, IV/ix: "Shelob's Lair"]
378cc40b
LW
15 */
16
9cbb5ea2
GS
17/*
18 * This file is the lexer for Perl. It's closely linked to the
4e553d73 19 * parser, perly.y.
ffb4593c
NT
20 *
21 * The main routine is yylex(), which returns the next token.
22 */
23
f0e67a1d
Z
24/*
25=head1 Lexer interface
26
27This is the lower layer of the Perl parser, managing characters and tokens.
28
29=for apidoc AmU|yy_parser *|PL_parser
30
31Pointer to a structure encapsulating the state of the parsing operation
32currently in progress. The pointer can be locally changed to perform
33a nested parse without interfering with the state of an outer parse.
34Individual members of C<PL_parser> have their own documentation.
35
36=cut
37*/
38
378cc40b 39#include "EXTERN.h"
864dbfa3 40#define PERL_IN_TOKE_C
378cc40b 41#include "perl.h"
378cc40b 42
eb0d8d16
NC
43#define new_constant(a,b,c,d,e,f,g) \
44 S_new_constant(aTHX_ a,b,STR_WITH_LEN(c),d,e,f, g)
45
6154021b 46#define pl_yylval (PL_parser->yylval)
d3b6f988 47
acdf0a21
DM
48/* YYINITDEPTH -- initial size of the parser's stacks. */
49#define YYINITDEPTH 200
50
199e78b7
DM
51/* XXX temporary backwards compatibility */
52#define PL_lex_brackets (PL_parser->lex_brackets)
53#define PL_lex_brackstack (PL_parser->lex_brackstack)
54#define PL_lex_casemods (PL_parser->lex_casemods)
55#define PL_lex_casestack (PL_parser->lex_casestack)
56#define PL_lex_defer (PL_parser->lex_defer)
57#define PL_lex_dojoin (PL_parser->lex_dojoin)
58#define PL_lex_expect (PL_parser->lex_expect)
59#define PL_lex_formbrack (PL_parser->lex_formbrack)
60#define PL_lex_inpat (PL_parser->lex_inpat)
61#define PL_lex_inwhat (PL_parser->lex_inwhat)
62#define PL_lex_op (PL_parser->lex_op)
63#define PL_lex_repl (PL_parser->lex_repl)
64#define PL_lex_starts (PL_parser->lex_starts)
65#define PL_lex_stuff (PL_parser->lex_stuff)
66#define PL_multi_start (PL_parser->multi_start)
67#define PL_multi_open (PL_parser->multi_open)
68#define PL_multi_close (PL_parser->multi_close)
69#define PL_pending_ident (PL_parser->pending_ident)
70#define PL_preambled (PL_parser->preambled)
71#define PL_sublex_info (PL_parser->sublex_info)
bdc0bf6f 72#define PL_linestr (PL_parser->linestr)
c2598295
DM
73#define PL_expect (PL_parser->expect)
74#define PL_copline (PL_parser->copline)
f06b5848
DM
75#define PL_bufptr (PL_parser->bufptr)
76#define PL_oldbufptr (PL_parser->oldbufptr)
77#define PL_oldoldbufptr (PL_parser->oldoldbufptr)
78#define PL_linestart (PL_parser->linestart)
79#define PL_bufend (PL_parser->bufend)
80#define PL_last_uni (PL_parser->last_uni)
81#define PL_last_lop (PL_parser->last_lop)
82#define PL_last_lop_op (PL_parser->last_lop_op)
bc177e6b 83#define PL_lex_state (PL_parser->lex_state)
2f9285f8 84#define PL_rsfp (PL_parser->rsfp)
5486870f 85#define PL_rsfp_filters (PL_parser->rsfp_filters)
12bd6ede
DM
86#define PL_in_my (PL_parser->in_my)
87#define PL_in_my_stash (PL_parser->in_my_stash)
14047fc9 88#define PL_tokenbuf (PL_parser->tokenbuf)
670a9cb2 89#define PL_multi_end (PL_parser->multi_end)
13765c85 90#define PL_error_count (PL_parser->error_count)
199e78b7
DM
91
92#ifdef PERL_MAD
93# define PL_endwhite (PL_parser->endwhite)
94# define PL_faketokens (PL_parser->faketokens)
95# define PL_lasttoke (PL_parser->lasttoke)
96# define PL_nextwhite (PL_parser->nextwhite)
97# define PL_realtokenstart (PL_parser->realtokenstart)
98# define PL_skipwhite (PL_parser->skipwhite)
99# define PL_thisclose (PL_parser->thisclose)
100# define PL_thismad (PL_parser->thismad)
101# define PL_thisopen (PL_parser->thisopen)
102# define PL_thisstuff (PL_parser->thisstuff)
103# define PL_thistoken (PL_parser->thistoken)
104# define PL_thiswhite (PL_parser->thiswhite)
fb205e7a
DM
105# define PL_thiswhite (PL_parser->thiswhite)
106# define PL_nexttoke (PL_parser->nexttoke)
107# define PL_curforce (PL_parser->curforce)
108#else
109# define PL_nexttoke (PL_parser->nexttoke)
110# define PL_nexttype (PL_parser->nexttype)
111# define PL_nextval (PL_parser->nextval)
199e78b7
DM
112#endif
113
16173588
NC
114/* This can't be done with embed.fnc, because struct yy_parser contains a
115 member named pending_ident, which clashes with the generated #define */
3cbf51f5
DM
116static int
117S_pending_ident(pTHX);
199e78b7 118
0bd48802 119static const char ident_too_long[] = "Identifier too long";
8903cb82 120
29595ff2 121#ifdef PERL_MAD
29595ff2 122# define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
cd81e915 123# define NEXTVAL_NEXTTOKE PL_nexttoke[PL_curforce].next_val
9ded7720 124#else
5db06880 125# define CURMAD(slot,sv)
9ded7720 126# define NEXTVAL_NEXTTOKE PL_nextval[PL_nexttoke]
29595ff2
NC
127#endif
128
9059aa12
LW
129#define XFAKEBRACK 128
130#define XENUMMASK 127
131
39e02b42
JH
132#ifdef USE_UTF8_SCRIPTS
133# define UTF (!IN_BYTES)
2b9d42f0 134#else
746b446a 135# define UTF ((PL_linestr && DO_UTF8(PL_linestr)) || (PL_hints & HINT_UTF8))
2b9d42f0 136#endif
a0ed51b3 137
b1fc3636
CJ
138/* The maximum number of characters preceding the unrecognized one to display */
139#define UNRECOGNIZED_PRECEDE_COUNT 10
140
61f0cdd9 141/* In variables named $^X, these are the legal values for X.
2b92dfce
GS
142 * 1999-02-27 mjd-perl-patch@plover.com */
143#define isCONTROLVAR(x) (isUPPER(x) || strchr("[\\]^_?", (x)))
144
bf4acbe4 145#define SPACE_OR_TAB(c) ((c)==' '||(c)=='\t')
bf4acbe4 146
ffb4593c
NT
147/* LEX_* are values for PL_lex_state, the state of the lexer.
148 * They are arranged oddly so that the guard on the switch statement
79072805
LW
149 * can get by with a single comparison (if the compiler is smart enough).
150 */
151
fb73857a 152/* #define LEX_NOTPARSING 11 is done in perl.h. */
153
b6007c36
DM
154#define LEX_NORMAL 10 /* normal code (ie not within "...") */
155#define LEX_INTERPNORMAL 9 /* code within a string, eg "$foo[$x+1]" */
156#define LEX_INTERPCASEMOD 8 /* expecting a \U, \Q or \E etc */
157#define LEX_INTERPPUSH 7 /* starting a new sublex parse level */
158#define LEX_INTERPSTART 6 /* expecting the start of a $var */
159
160 /* at end of code, eg "$x" followed by: */
161#define LEX_INTERPEND 5 /* ... eg not one of [, { or -> */
162#define LEX_INTERPENDMAYBE 4 /* ... eg one of [, { or -> */
163
164#define LEX_INTERPCONCAT 3 /* expecting anything, eg at start of
165 string or after \E, $foo, etc */
166#define LEX_INTERPCONST 2 /* NOT USED */
167#define LEX_FORMLINE 1 /* expecting a format line */
168#define LEX_KNOWNEXT 0 /* next token known; just return it */
169
79072805 170
bbf60fe6 171#ifdef DEBUGGING
27da23d5 172static const char* const lex_state_names[] = {
bbf60fe6
DM
173 "KNOWNEXT",
174 "FORMLINE",
175 "INTERPCONST",
176 "INTERPCONCAT",
177 "INTERPENDMAYBE",
178 "INTERPEND",
179 "INTERPSTART",
180 "INTERPPUSH",
181 "INTERPCASEMOD",
182 "INTERPNORMAL",
183 "NORMAL"
184};
185#endif
186
79072805
LW
187#ifdef ff_next
188#undef ff_next
d48672a2
LW
189#endif
190
79072805 191#include "keywords.h"
fe14fcc3 192
ffb4593c
NT
193/* CLINE is a macro that ensures PL_copline has a sane value */
194
ae986130
LW
195#ifdef CLINE
196#undef CLINE
197#endif
57843af0 198#define CLINE (PL_copline = (CopLINE(PL_curcop) < PL_copline ? CopLINE(PL_curcop) : PL_copline))
3280af22 199
5db06880 200#ifdef PERL_MAD
29595ff2
NC
201# define SKIPSPACE0(s) skipspace0(s)
202# define SKIPSPACE1(s) skipspace1(s)
203# define SKIPSPACE2(s,tsv) skipspace2(s,&tsv)
204# define PEEKSPACE(s) skipspace2(s,0)
205#else
206# define SKIPSPACE0(s) skipspace(s)
207# define SKIPSPACE1(s) skipspace(s)
208# define SKIPSPACE2(s,tsv) skipspace(s)
209# define PEEKSPACE(s) skipspace(s)
210#endif
211
ffb4593c
NT
212/*
213 * Convenience functions to return different tokens and prime the
9cbb5ea2 214 * lexer for the next token. They all take an argument.
ffb4593c
NT
215 *
216 * TOKEN : generic token (used for '(', DOLSHARP, etc)
217 * OPERATOR : generic operator
218 * AOPERATOR : assignment operator
219 * PREBLOCK : beginning the block after an if, while, foreach, ...
220 * PRETERMBLOCK : beginning a non-code-defining {} block (eg, hash ref)
221 * PREREF : *EXPR where EXPR is not a simple identifier
222 * TERM : expression term
223 * LOOPX : loop exiting command (goto, last, dump, etc)
224 * FTST : file test operator
225 * FUN0 : zero-argument function
2d2e263d 226 * FUN1 : not used, except for not, which isn't a UNIOP
ffb4593c
NT
227 * BOop : bitwise or or xor
228 * BAop : bitwise and
229 * SHop : shift operator
230 * PWop : power operator
9cbb5ea2 231 * PMop : pattern-matching operator
ffb4593c
NT
232 * Aop : addition-level operator
233 * Mop : multiplication-level operator
234 * Eop : equality-testing operator
e5edeb50 235 * Rop : relational operator <= != gt
ffb4593c
NT
236 *
237 * Also see LOP and lop() below.
238 */
239
998054bd 240#ifdef DEBUGGING /* Serve -DT. */
704d4215 241# define REPORT(retval) tokereport((I32)retval, &pl_yylval)
998054bd 242#else
bbf60fe6 243# define REPORT(retval) (retval)
998054bd
SC
244#endif
245
bbf60fe6
DM
246#define TOKEN(retval) return ( PL_bufptr = s, REPORT(retval))
247#define OPERATOR(retval) return (PL_expect = XTERM, PL_bufptr = s, REPORT(retval))
248#define AOPERATOR(retval) return ao((PL_expect = XTERM, PL_bufptr = s, REPORT(retval)))
249#define PREBLOCK(retval) return (PL_expect = XBLOCK,PL_bufptr = s, REPORT(retval))
250#define PRETERMBLOCK(retval) return (PL_expect = XTERMBLOCK,PL_bufptr = s, REPORT(retval))
251#define PREREF(retval) return (PL_expect = XREF,PL_bufptr = s, REPORT(retval))
252#define TERM(retval) return (CLINE, PL_expect = XOPERATOR, PL_bufptr = s, REPORT(retval))
6154021b
RGS
253#define LOOPX(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)LOOPEX))
254#define FTST(f) return (pl_yylval.ival=f, PL_expect=XTERMORDORDOR, PL_bufptr=s, REPORT((int)UNIOP))
255#define FUN0(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC0))
256#define FUN1(f) return (pl_yylval.ival=f, PL_expect=XOPERATOR, PL_bufptr=s, REPORT((int)FUNC1))
257#define BOop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITOROP)))
258#define BAop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)BITANDOP)))
259#define SHop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)SHIFTOP)))
260#define PWop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)POWOP)))
261#define PMop(f) return(pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MATCHOP))
262#define Aop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)ADDOP)))
263#define Mop(f) return ao((pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)MULOP)))
264#define Eop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)EQOP))
265#define Rop(f) return (pl_yylval.ival=f, PL_expect=XTERM, PL_bufptr=s, REPORT((int)RELOP))
2f3197b3 266
a687059c
LW
267/* This bit of chicanery makes a unary function followed by
268 * a parenthesis into a function with one argument, highest precedence.
6f33ba73
RGS
269 * The UNIDOR macro is for unary functions that can be followed by the //
270 * operator (such as C<shift // 0>).
a687059c 271 */
376fcdbf 272#define UNI2(f,x) { \
6154021b 273 pl_yylval.ival = f; \
376fcdbf
AL
274 PL_expect = x; \
275 PL_bufptr = s; \
276 PL_last_uni = PL_oldbufptr; \
277 PL_last_lop_op = f; \
278 if (*s == '(') \
279 return REPORT( (int)FUNC1 ); \
29595ff2 280 s = PEEKSPACE(s); \
376fcdbf
AL
281 return REPORT( *s=='(' ? (int)FUNC1 : (int)UNIOP ); \
282 }
6f33ba73
RGS
283#define UNI(f) UNI2(f,XTERM)
284#define UNIDOR(f) UNI2(f,XTERMORDORDOR)
a687059c 285
376fcdbf 286#define UNIBRACK(f) { \
6154021b 287 pl_yylval.ival = f; \
376fcdbf
AL
288 PL_bufptr = s; \
289 PL_last_uni = PL_oldbufptr; \
290 if (*s == '(') \
291 return REPORT( (int)FUNC1 ); \
29595ff2 292 s = PEEKSPACE(s); \
376fcdbf
AL
293 return REPORT( (*s == '(') ? (int)FUNC1 : (int)UNIOP ); \
294 }
79072805 295
9f68db38 296/* grandfather return to old style */
6154021b 297#define OLDLOP(f) return(pl_yylval.ival=f,PL_expect = XTERM,PL_bufptr = s,(int)LSTOP)
79072805 298
8fa7f367
JH
299#ifdef DEBUGGING
300
6154021b 301/* how to interpret the pl_yylval associated with the token */
bbf60fe6
DM
302enum token_type {
303 TOKENTYPE_NONE,
304 TOKENTYPE_IVAL,
6154021b 305 TOKENTYPE_OPNUM, /* pl_yylval.ival contains an opcode number */
bbf60fe6
DM
306 TOKENTYPE_PVAL,
307 TOKENTYPE_OPVAL,
308 TOKENTYPE_GVVAL
309};
310
6d4a66ac
NC
311static struct debug_tokens {
312 const int token;
313 enum token_type type;
314 const char *name;
315} const debug_tokens[] =
9041c2e3 316{
bbf60fe6
DM
317 { ADDOP, TOKENTYPE_OPNUM, "ADDOP" },
318 { ANDAND, TOKENTYPE_NONE, "ANDAND" },
319 { ANDOP, TOKENTYPE_NONE, "ANDOP" },
320 { ANONSUB, TOKENTYPE_IVAL, "ANONSUB" },
321 { ARROW, TOKENTYPE_NONE, "ARROW" },
322 { ASSIGNOP, TOKENTYPE_OPNUM, "ASSIGNOP" },
323 { BITANDOP, TOKENTYPE_OPNUM, "BITANDOP" },
324 { BITOROP, TOKENTYPE_OPNUM, "BITOROP" },
325 { COLONATTR, TOKENTYPE_NONE, "COLONATTR" },
326 { CONTINUE, TOKENTYPE_NONE, "CONTINUE" },
0d863452 327 { DEFAULT, TOKENTYPE_NONE, "DEFAULT" },
bbf60fe6
DM
328 { DO, TOKENTYPE_NONE, "DO" },
329 { DOLSHARP, TOKENTYPE_NONE, "DOLSHARP" },
330 { DORDOR, TOKENTYPE_NONE, "DORDOR" },
331 { DOROP, TOKENTYPE_OPNUM, "DOROP" },
332 { DOTDOT, TOKENTYPE_IVAL, "DOTDOT" },
333 { ELSE, TOKENTYPE_NONE, "ELSE" },
334 { ELSIF, TOKENTYPE_IVAL, "ELSIF" },
335 { EQOP, TOKENTYPE_OPNUM, "EQOP" },
336 { FOR, TOKENTYPE_IVAL, "FOR" },
337 { FORMAT, TOKENTYPE_NONE, "FORMAT" },
338 { FUNC, TOKENTYPE_OPNUM, "FUNC" },
339 { FUNC0, TOKENTYPE_OPNUM, "FUNC0" },
340 { FUNC0SUB, TOKENTYPE_OPVAL, "FUNC0SUB" },
341 { FUNC1, TOKENTYPE_OPNUM, "FUNC1" },
342 { FUNCMETH, TOKENTYPE_OPVAL, "FUNCMETH" },
0d863452 343 { GIVEN, TOKENTYPE_IVAL, "GIVEN" },
bbf60fe6
DM
344 { HASHBRACK, TOKENTYPE_NONE, "HASHBRACK" },
345 { IF, TOKENTYPE_IVAL, "IF" },
346 { LABEL, TOKENTYPE_PVAL, "LABEL" },
347 { LOCAL, TOKENTYPE_IVAL, "LOCAL" },
348 { LOOPEX, TOKENTYPE_OPNUM, "LOOPEX" },
349 { LSTOP, TOKENTYPE_OPNUM, "LSTOP" },
350 { LSTOPSUB, TOKENTYPE_OPVAL, "LSTOPSUB" },
351 { MATCHOP, TOKENTYPE_OPNUM, "MATCHOP" },
352 { METHOD, TOKENTYPE_OPVAL, "METHOD" },
353 { MULOP, TOKENTYPE_OPNUM, "MULOP" },
354 { MY, TOKENTYPE_IVAL, "MY" },
355 { MYSUB, TOKENTYPE_NONE, "MYSUB" },
356 { NOAMP, TOKENTYPE_NONE, "NOAMP" },
357 { NOTOP, TOKENTYPE_NONE, "NOTOP" },
358 { OROP, TOKENTYPE_IVAL, "OROP" },
359 { OROR, TOKENTYPE_NONE, "OROR" },
360 { PACKAGE, TOKENTYPE_NONE, "PACKAGE" },
88e1f1a2
JV
361 { PLUGEXPR, TOKENTYPE_OPVAL, "PLUGEXPR" },
362 { PLUGSTMT, TOKENTYPE_OPVAL, "PLUGSTMT" },
bbf60fe6
DM
363 { PMFUNC, TOKENTYPE_OPVAL, "PMFUNC" },
364 { POSTDEC, TOKENTYPE_NONE, "POSTDEC" },
365 { POSTINC, TOKENTYPE_NONE, "POSTINC" },
366 { POWOP, TOKENTYPE_OPNUM, "POWOP" },
367 { PREDEC, TOKENTYPE_NONE, "PREDEC" },
368 { PREINC, TOKENTYPE_NONE, "PREINC" },
369 { PRIVATEREF, TOKENTYPE_OPVAL, "PRIVATEREF" },
370 { REFGEN, TOKENTYPE_NONE, "REFGEN" },
371 { RELOP, TOKENTYPE_OPNUM, "RELOP" },
372 { SHIFTOP, TOKENTYPE_OPNUM, "SHIFTOP" },
373 { SUB, TOKENTYPE_NONE, "SUB" },
374 { THING, TOKENTYPE_OPVAL, "THING" },
375 { UMINUS, TOKENTYPE_NONE, "UMINUS" },
376 { UNIOP, TOKENTYPE_OPNUM, "UNIOP" },
377 { UNIOPSUB, TOKENTYPE_OPVAL, "UNIOPSUB" },
378 { UNLESS, TOKENTYPE_IVAL, "UNLESS" },
379 { UNTIL, TOKENTYPE_IVAL, "UNTIL" },
380 { USE, TOKENTYPE_IVAL, "USE" },
0d863452 381 { WHEN, TOKENTYPE_IVAL, "WHEN" },
bbf60fe6
DM
382 { WHILE, TOKENTYPE_IVAL, "WHILE" },
383 { WORD, TOKENTYPE_OPVAL, "WORD" },
be25f609 384 { YADAYADA, TOKENTYPE_IVAL, "YADAYADA" },
c35e046a 385 { 0, TOKENTYPE_NONE, NULL }
bbf60fe6
DM
386};
387
6154021b 388/* dump the returned token in rv, plus any optional arg in pl_yylval */
998054bd 389
bbf60fe6 390STATIC int
704d4215 391S_tokereport(pTHX_ I32 rv, const YYSTYPE* lvalp)
bbf60fe6 392{
97aff369 393 dVAR;
7918f24d
NC
394
395 PERL_ARGS_ASSERT_TOKEREPORT;
396
bbf60fe6 397 if (DEBUG_T_TEST) {
bd61b366 398 const char *name = NULL;
bbf60fe6 399 enum token_type type = TOKENTYPE_NONE;
f54cb97a 400 const struct debug_tokens *p;
396482e1 401 SV* const report = newSVpvs("<== ");
bbf60fe6 402
f54cb97a 403 for (p = debug_tokens; p->token; p++) {
bbf60fe6
DM
404 if (p->token == (int)rv) {
405 name = p->name;
406 type = p->type;
407 break;
408 }
409 }
410 if (name)
54667de8 411 Perl_sv_catpv(aTHX_ report, name);
bbf60fe6
DM
412 else if ((char)rv > ' ' && (char)rv < '~')
413 Perl_sv_catpvf(aTHX_ report, "'%c'", (char)rv);
414 else if (!rv)
396482e1 415 sv_catpvs(report, "EOF");
bbf60fe6
DM
416 else
417 Perl_sv_catpvf(aTHX_ report, "?? %"IVdf, (IV)rv);
418 switch (type) {
419 case TOKENTYPE_NONE:
420 case TOKENTYPE_GVVAL: /* doesn't appear to be used */
421 break;
422 case TOKENTYPE_IVAL:
704d4215 423 Perl_sv_catpvf(aTHX_ report, "(ival=%"IVdf")", (IV)lvalp->ival);
bbf60fe6
DM
424 break;
425 case TOKENTYPE_OPNUM:
426 Perl_sv_catpvf(aTHX_ report, "(ival=op_%s)",
704d4215 427 PL_op_name[lvalp->ival]);
bbf60fe6
DM
428 break;
429 case TOKENTYPE_PVAL:
704d4215 430 Perl_sv_catpvf(aTHX_ report, "(pval=\"%s\")", lvalp->pval);
bbf60fe6
DM
431 break;
432 case TOKENTYPE_OPVAL:
704d4215 433 if (lvalp->opval) {
401441c0 434 Perl_sv_catpvf(aTHX_ report, "(opval=op_%s)",
704d4215
GG
435 PL_op_name[lvalp->opval->op_type]);
436 if (lvalp->opval->op_type == OP_CONST) {
b6007c36 437 Perl_sv_catpvf(aTHX_ report, " %s",
704d4215 438 SvPEEK(cSVOPx_sv(lvalp->opval)));
b6007c36
DM
439 }
440
441 }
401441c0 442 else
396482e1 443 sv_catpvs(report, "(opval=null)");
bbf60fe6
DM
444 break;
445 }
b6007c36 446 PerlIO_printf(Perl_debug_log, "### %s\n\n", SvPV_nolen_const(report));
bbf60fe6
DM
447 };
448 return (int)rv;
998054bd
SC
449}
450
b6007c36
DM
451
452/* print the buffer with suitable escapes */
453
454STATIC void
15f169a1 455S_printbuf(pTHX_ const char *const fmt, const char *const s)
b6007c36 456{
396482e1 457 SV* const tmp = newSVpvs("");
7918f24d
NC
458
459 PERL_ARGS_ASSERT_PRINTBUF;
460
b6007c36
DM
461 PerlIO_printf(Perl_debug_log, fmt, pv_display(tmp, s, strlen(s), 0, 60));
462 SvREFCNT_dec(tmp);
463}
464
8fa7f367
JH
465#endif
466
8290c323
NC
467static int
468S_deprecate_commaless_var_list(pTHX) {
469 PL_expect = XTERM;
470 deprecate("comma-less variable list");
471 return REPORT(','); /* grandfather non-comma-format format */
472}
473
ffb4593c
NT
474/*
475 * S_ao
476 *
c963b151
BD
477 * This subroutine detects &&=, ||=, and //= and turns an ANDAND, OROR or DORDOR
478 * into an OP_ANDASSIGN, OP_ORASSIGN, or OP_DORASSIGN
ffb4593c
NT
479 */
480
76e3520e 481STATIC int
cea2e8a9 482S_ao(pTHX_ int toketype)
a0d0e21e 483{
97aff369 484 dVAR;
3280af22
NIS
485 if (*PL_bufptr == '=') {
486 PL_bufptr++;
a0d0e21e 487 if (toketype == ANDAND)
6154021b 488 pl_yylval.ival = OP_ANDASSIGN;
a0d0e21e 489 else if (toketype == OROR)
6154021b 490 pl_yylval.ival = OP_ORASSIGN;
c963b151 491 else if (toketype == DORDOR)
6154021b 492 pl_yylval.ival = OP_DORASSIGN;
a0d0e21e
LW
493 toketype = ASSIGNOP;
494 }
495 return toketype;
496}
497
ffb4593c
NT
498/*
499 * S_no_op
500 * When Perl expects an operator and finds something else, no_op
501 * prints the warning. It always prints "<something> found where
502 * operator expected. It prints "Missing semicolon on previous line?"
503 * if the surprise occurs at the start of the line. "do you need to
504 * predeclare ..." is printed out for code like "sub bar; foo bar $x"
505 * where the compiler doesn't know if foo is a method call or a function.
506 * It prints "Missing operator before end of line" if there's nothing
507 * after the missing operator, or "... before <...>" if there is something
508 * after the missing operator.
509 */
510
76e3520e 511STATIC void
15f169a1 512S_no_op(pTHX_ const char *const what, char *s)
463ee0b2 513{
97aff369 514 dVAR;
9d4ba2ae
AL
515 char * const oldbp = PL_bufptr;
516 const bool is_first = (PL_oldbufptr == PL_linestart);
68dc0745 517
7918f24d
NC
518 PERL_ARGS_ASSERT_NO_OP;
519
1189a94a
GS
520 if (!s)
521 s = oldbp;
07c798fb 522 else
1189a94a 523 PL_bufptr = s;
cea2e8a9 524 yywarn(Perl_form(aTHX_ "%s found where operator expected", what));
56da5a46
RGS
525 if (ckWARN_d(WARN_SYNTAX)) {
526 if (is_first)
527 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
528 "\t(Missing semicolon on previous line?)\n");
529 else if (PL_oldoldbufptr && isIDFIRST_lazy_if(PL_oldoldbufptr,UTF)) {
f54cb97a 530 const char *t;
c35e046a
AL
531 for (t = PL_oldoldbufptr; (isALNUM_lazy_if(t,UTF) || *t == ':'); t++)
532 NOOP;
56da5a46
RGS
533 if (t < PL_bufptr && isSPACE(*t))
534 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
535 "\t(Do you need to predeclare %.*s?)\n",
551405c4 536 (int)(t - PL_oldoldbufptr), PL_oldoldbufptr);
56da5a46
RGS
537 }
538 else {
539 assert(s >= oldbp);
540 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
551405c4 541 "\t(Missing operator before %.*s?)\n", (int)(s - oldbp), oldbp);
56da5a46 542 }
07c798fb 543 }
3280af22 544 PL_bufptr = oldbp;
8990e307
LW
545}
546
ffb4593c
NT
547/*
548 * S_missingterm
549 * Complain about missing quote/regexp/heredoc terminator.
d4c19fe8 550 * If it's called with NULL then it cauterizes the line buffer.
ffb4593c
NT
551 * If we're in a delimited string and the delimiter is a control
552 * character, it's reformatted into a two-char sequence like ^C.
553 * This is fatal.
554 */
555
76e3520e 556STATIC void
cea2e8a9 557S_missingterm(pTHX_ char *s)
8990e307 558{
97aff369 559 dVAR;
8990e307
LW
560 char tmpbuf[3];
561 char q;
562 if (s) {
9d4ba2ae 563 char * const nl = strrchr(s,'\n');
d2719217 564 if (nl)
8990e307
LW
565 *nl = '\0';
566 }
463559e7 567 else if (isCNTRL(PL_multi_close)) {
8990e307 568 *tmpbuf = '^';
585ec06d 569 tmpbuf[1] = (char)toCTRL(PL_multi_close);
8990e307
LW
570 tmpbuf[2] = '\0';
571 s = tmpbuf;
572 }
573 else {
eb160463 574 *tmpbuf = (char)PL_multi_close;
8990e307
LW
575 tmpbuf[1] = '\0';
576 s = tmpbuf;
577 }
578 q = strchr(s,'"') ? '\'' : '"';
cea2e8a9 579 Perl_croak(aTHX_ "Can't find string terminator %c%s%c anywhere before EOF",q,s,q);
463ee0b2 580}
79072805 581
ef89dcc3 582#define FEATURE_IS_ENABLED(name) \
0d863452 583 ((0 != (PL_hints & HINT_LOCALIZE_HH)) \
89529cee 584 && S_feature_is_enabled(aTHX_ STR_WITH_LEN(name)))
4a731d7b 585/* The longest string we pass in. */
1863b879 586#define MAX_FEATURE_LEN (sizeof("unicode_strings")-1)
4a731d7b 587
0d863452
RH
588/*
589 * S_feature_is_enabled
590 * Check whether the named feature is enabled.
591 */
592STATIC bool
15f169a1 593S_feature_is_enabled(pTHX_ const char *const name, STRLEN namelen)
0d863452 594{
97aff369 595 dVAR;
0d863452 596 HV * const hinthv = GvHV(PL_hintgv);
4a731d7b 597 char he_name[8 + MAX_FEATURE_LEN] = "feature_";
7918f24d
NC
598
599 PERL_ARGS_ASSERT_FEATURE_IS_ENABLED;
600
4a731d7b
NC
601 assert(namelen <= MAX_FEATURE_LEN);
602 memcpy(&he_name[8], name, namelen);
d4c19fe8 603
7b9ef140 604 return (hinthv && hv_exists(hinthv, he_name, 8 + namelen));
0d863452
RH
605}
606
ffb4593c 607/*
9cbb5ea2
GS
608 * experimental text filters for win32 carriage-returns, utf16-to-utf8 and
609 * utf16-to-utf8-reversed.
ffb4593c
NT
610 */
611
c39cd008
GS
612#ifdef PERL_CR_FILTER
613static void
614strip_return(SV *sv)
615{
95a20fc0 616 register const char *s = SvPVX_const(sv);
9d4ba2ae 617 register const char * const e = s + SvCUR(sv);
7918f24d
NC
618
619 PERL_ARGS_ASSERT_STRIP_RETURN;
620
c39cd008
GS
621 /* outer loop optimized to do nothing if there are no CR-LFs */
622 while (s < e) {
623 if (*s++ == '\r' && *s == '\n') {
624 /* hit a CR-LF, need to copy the rest */
625 register char *d = s - 1;
626 *d++ = *s++;
627 while (s < e) {
628 if (*s == '\r' && s[1] == '\n')
629 s++;
630 *d++ = *s++;
631 }
632 SvCUR(sv) -= s - d;
633 return;
634 }
635 }
636}
a868473f 637
76e3520e 638STATIC I32
c39cd008 639S_cr_textfilter(pTHX_ int idx, SV *sv, int maxlen)
a868473f 640{
f54cb97a 641 const I32 count = FILTER_READ(idx+1, sv, maxlen);
c39cd008
GS
642 if (count > 0 && !maxlen)
643 strip_return(sv);
644 return count;
a868473f
NIS
645}
646#endif
647
199e78b7
DM
648
649
ffb4593c
NT
650/*
651 * Perl_lex_start
5486870f 652 *
e3abe207 653 * Create a parser object and initialise its parser and lexer fields
5486870f
DM
654 *
655 * rsfp is the opened file handle to read from (if any),
656 *
657 * line holds any initial content already read from the file (or in
658 * the case of no file, such as an eval, the whole contents);
659 *
660 * new_filter indicates that this is a new file and it shouldn't inherit
661 * the filters from the current parser (ie require).
ffb4593c
NT
662 */
663
a0d0e21e 664void
5486870f 665Perl_lex_start(pTHX_ SV *line, PerlIO *rsfp, bool new_filter)
79072805 666{
97aff369 667 dVAR;
6ef55633 668 const char *s = NULL;
8990e307 669 STRLEN len;
5486870f 670 yy_parser *parser, *oparser;
acdf0a21
DM
671
672 /* create and initialise a parser */
673
199e78b7 674 Newxz(parser, 1, yy_parser);
5486870f 675 parser->old_parser = oparser = PL_parser;
acdf0a21
DM
676 PL_parser = parser;
677
678 Newx(parser->stack, YYINITDEPTH, yy_stack_frame);
679 parser->ps = parser->stack;
680 parser->stack_size = YYINITDEPTH;
681
682 parser->stack->state = 0;
683 parser->yyerrstatus = 0;
684 parser->yychar = YYEMPTY; /* Cause a token to be read. */
685
e3abe207
DM
686 /* on scope exit, free this parser and restore any outer one */
687 SAVEPARSER(parser);
7c4baf47 688 parser->saved_curcop = PL_curcop;
e3abe207 689
acdf0a21 690 /* initialise lexer state */
8990e307 691
fb205e7a
DM
692#ifdef PERL_MAD
693 parser->curforce = -1;
694#else
695 parser->nexttoke = 0;
696#endif
ca4cfd28 697 parser->error_count = oparser ? oparser->error_count : 0;
c2598295 698 parser->copline = NOLINE;
5afb0a62 699 parser->lex_state = LEX_NORMAL;
c2598295 700 parser->expect = XSTATE;
2f9285f8 701 parser->rsfp = rsfp;
56b27c9a 702 parser->rsfp_filters = (new_filter || !oparser) ? newAV()
502c6561 703 : MUTABLE_AV(SvREFCNT_inc(oparser->rsfp_filters));
2f9285f8 704
199e78b7
DM
705 Newx(parser->lex_brackstack, 120, char);
706 Newx(parser->lex_casestack, 12, char);
707 *parser->lex_casestack = '\0';
02b34bbe 708
10efb74f
NC
709 if (line) {
710 s = SvPV_const(line, len);
711 } else {
712 len = 0;
713 }
bdc0bf6f 714
10efb74f 715 if (!len) {
bdc0bf6f 716 parser->linestr = newSVpvs("\n;");
3e5c0189 717 } else if (SvREADONLY(line) || s[len-1] != ';' || !SvPOK(line)) {
719a9bb0
NC
718 /* avoid tie/overload weirdness */
719 parser->linestr = newSVpvn_flags(s, len, SvUTF8(line));
10efb74f 720 if (s[len-1] != ';')
bdc0bf6f 721 sv_catpvs(parser->linestr, "\n;");
6c5ce11d
NC
722 } else {
723 SvTEMP_off(line);
724 SvREFCNT_inc_simple_void_NN(line);
bdc0bf6f 725 parser->linestr = line;
8990e307 726 }
f06b5848
DM
727 parser->oldoldbufptr =
728 parser->oldbufptr =
729 parser->bufptr =
730 parser->linestart = SvPVX(parser->linestr);
731 parser->bufend = parser->bufptr + SvCUR(parser->linestr);
732 parser->last_lop = parser->last_uni = NULL;
79072805 733}
a687059c 734
e3abe207
DM
735
736/* delete a parser object */
737
738void
739Perl_parser_free(pTHX_ const yy_parser *parser)
740{
7918f24d
NC
741 PERL_ARGS_ASSERT_PARSER_FREE;
742
7c4baf47 743 PL_curcop = parser->saved_curcop;
bdc0bf6f
DM
744 SvREFCNT_dec(parser->linestr);
745
2f9285f8
DM
746 if (parser->rsfp == PerlIO_stdin())
747 PerlIO_clearerr(parser->rsfp);
799361c3
SH
748 else if (parser->rsfp && (!parser->old_parser ||
749 (parser->old_parser && parser->rsfp != parser->old_parser->rsfp)))
2f9285f8 750 PerlIO_close(parser->rsfp);
5486870f 751 SvREFCNT_dec(parser->rsfp_filters);
2f9285f8 752
e3abe207
DM
753 Safefree(parser->stack);
754 Safefree(parser->lex_brackstack);
755 Safefree(parser->lex_casestack);
756 PL_parser = parser->old_parser;
757 Safefree(parser);
758}
759
760
ffb4593c
NT
761/*
762 * Perl_lex_end
9cbb5ea2
GS
763 * Finalizer for lexing operations. Must be called when the parser is
764 * done with the lexer.
ffb4593c
NT
765 */
766
463ee0b2 767void
864dbfa3 768Perl_lex_end(pTHX)
463ee0b2 769{
97aff369 770 dVAR;
3280af22 771 PL_doextract = FALSE;
463ee0b2
LW
772}
773
ffb4593c 774/*
f0e67a1d
Z
775=for apidoc AmxU|SV *|PL_parser-E<gt>linestr
776
777Buffer scalar containing the chunk currently under consideration of the
778text currently being lexed. This is always a plain string scalar (for
779which C<SvPOK> is true). It is not intended to be used as a scalar by
780normal scalar means; instead refer to the buffer directly by the pointer
781variables described below.
782
783The lexer maintains various C<char*> pointers to things in the
784C<PL_parser-E<gt>linestr> buffer. If C<PL_parser-E<gt>linestr> is ever
785reallocated, all of these pointers must be updated. Don't attempt to
786do this manually, but rather use L</lex_grow_linestr> if you need to
787reallocate the buffer.
788
789The content of the text chunk in the buffer is commonly exactly one
790complete line of input, up to and including a newline terminator,
791but there are situations where it is otherwise. The octets of the
792buffer may be intended to be interpreted as either UTF-8 or Latin-1.
793The function L</lex_bufutf8> tells you which. Do not use the C<SvUTF8>
794flag on this scalar, which may disagree with it.
795
796For direct examination of the buffer, the variable
797L</PL_parser-E<gt>bufend> points to the end of the buffer. The current
798lexing position is pointed to by L</PL_parser-E<gt>bufptr>. Direct use
799of these pointers is usually preferable to examination of the scalar
800through normal scalar means.
801
802=for apidoc AmxU|char *|PL_parser-E<gt>bufend
803
804Direct pointer to the end of the chunk of text currently being lexed, the
805end of the lexer buffer. This is equal to C<SvPVX(PL_parser-E<gt>linestr)
806+ SvCUR(PL_parser-E<gt>linestr)>. A NUL character (zero octet) is
807always located at the end of the buffer, and does not count as part of
808the buffer's contents.
809
810=for apidoc AmxU|char *|PL_parser-E<gt>bufptr
811
812Points to the current position of lexing inside the lexer buffer.
813Characters around this point may be freely examined, within
814the range delimited by C<SvPVX(L</PL_parser-E<gt>linestr>)> and
815L</PL_parser-E<gt>bufend>. The octets of the buffer may be intended to be
816interpreted as either UTF-8 or Latin-1, as indicated by L</lex_bufutf8>.
817
818Lexing code (whether in the Perl core or not) moves this pointer past
819the characters that it consumes. It is also expected to perform some
820bookkeeping whenever a newline character is consumed. This movement
821can be more conveniently performed by the function L</lex_read_to>,
822which handles newlines appropriately.
823
824Interpretation of the buffer's octets can be abstracted out by
825using the slightly higher-level functions L</lex_peek_unichar> and
826L</lex_read_unichar>.
827
828=for apidoc AmxU|char *|PL_parser-E<gt>linestart
829
830Points to the start of the current line inside the lexer buffer.
831This is useful for indicating at which column an error occurred, and
832not much else. This must be updated by any lexing code that consumes
833a newline; the function L</lex_read_to> handles this detail.
834
835=cut
836*/
837
838/*
839=for apidoc Amx|bool|lex_bufutf8
840
841Indicates whether the octets in the lexer buffer
842(L</PL_parser-E<gt>linestr>) should be interpreted as the UTF-8 encoding
843of Unicode characters. If not, they should be interpreted as Latin-1
844characters. This is analogous to the C<SvUTF8> flag for scalars.
845
846In UTF-8 mode, it is not guaranteed that the lexer buffer actually
847contains valid UTF-8. Lexing code must be robust in the face of invalid
848encoding.
849
850The actual C<SvUTF8> flag of the L</PL_parser-E<gt>linestr> scalar
851is significant, but not the whole story regarding the input character
852encoding. Normally, when a file is being read, the scalar contains octets
853and its C<SvUTF8> flag is off, but the octets should be interpreted as
854UTF-8 if the C<use utf8> pragma is in effect. During a string eval,
855however, the scalar may have the C<SvUTF8> flag on, and in this case its
856octets should be interpreted as UTF-8 unless the C<use bytes> pragma
857is in effect. This logic may change in the future; use this function
858instead of implementing the logic yourself.
859
860=cut
861*/
862
863bool
864Perl_lex_bufutf8(pTHX)
865{
866 return UTF;
867}
868
869/*
870=for apidoc Amx|char *|lex_grow_linestr|STRLEN len
871
872Reallocates the lexer buffer (L</PL_parser-E<gt>linestr>) to accommodate
873at least I<len> octets (including terminating NUL). Returns a
874pointer to the reallocated buffer. This is necessary before making
875any direct modification of the buffer that would increase its length.
876L</lex_stuff_pvn> provides a more convenient way to insert text into
877the buffer.
878
879Do not use C<SvGROW> or C<sv_grow> directly on C<PL_parser-E<gt>linestr>;
880this function updates all of the lexer's variables that point directly
881into the buffer.
882
883=cut
884*/
885
886char *
887Perl_lex_grow_linestr(pTHX_ STRLEN len)
888{
889 SV *linestr;
890 char *buf;
891 STRLEN bufend_pos, bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
892 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
893 linestr = PL_parser->linestr;
894 buf = SvPVX(linestr);
895 if (len <= SvLEN(linestr))
896 return buf;
897 bufend_pos = PL_parser->bufend - buf;
898 bufptr_pos = PL_parser->bufptr - buf;
899 oldbufptr_pos = PL_parser->oldbufptr - buf;
900 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
901 linestart_pos = PL_parser->linestart - buf;
902 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
903 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
904 buf = sv_grow(linestr, len);
905 PL_parser->bufend = buf + bufend_pos;
906 PL_parser->bufptr = buf + bufptr_pos;
907 PL_parser->oldbufptr = buf + oldbufptr_pos;
908 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
909 PL_parser->linestart = buf + linestart_pos;
910 if (PL_parser->last_uni)
911 PL_parser->last_uni = buf + last_uni_pos;
912 if (PL_parser->last_lop)
913 PL_parser->last_lop = buf + last_lop_pos;
914 return buf;
915}
916
917/*
83aa740e 918=for apidoc Amx|void|lex_stuff_pvn|const char *pv|STRLEN len|U32 flags
f0e67a1d
Z
919
920Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
921immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
922reallocating the buffer if necessary. This means that lexing code that
923runs later will see the characters as if they had appeared in the input.
924It is not recommended to do this as part of normal parsing, and most
925uses of this facility run the risk of the inserted characters being
926interpreted in an unintended manner.
927
928The string to be inserted is represented by I<len> octets starting
929at I<pv>. These octets are interpreted as either UTF-8 or Latin-1,
930according to whether the C<LEX_STUFF_UTF8> flag is set in I<flags>.
931The characters are recoded for the lexer buffer, according to how the
932buffer is currently being interpreted (L</lex_bufutf8>). If a string
933to be interpreted is available as a Perl scalar, the L</lex_stuff_sv>
934function is more convenient.
935
936=cut
937*/
938
939void
83aa740e 940Perl_lex_stuff_pvn(pTHX_ const char *pv, STRLEN len, U32 flags)
f0e67a1d 941{
749123ff 942 dVAR;
f0e67a1d
Z
943 char *bufptr;
944 PERL_ARGS_ASSERT_LEX_STUFF_PVN;
945 if (flags & ~(LEX_STUFF_UTF8))
946 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_pvn");
947 if (UTF) {
948 if (flags & LEX_STUFF_UTF8) {
949 goto plain_copy;
950 } else {
951 STRLEN highhalf = 0;
83aa740e 952 const char *p, *e = pv+len;
f0e67a1d
Z
953 for (p = pv; p != e; p++)
954 highhalf += !!(((U8)*p) & 0x80);
955 if (!highhalf)
956 goto plain_copy;
957 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len+highhalf);
958 bufptr = PL_parser->bufptr;
959 Move(bufptr, bufptr+len+highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
960 SvCUR_set(PL_parser->linestr,
961 SvCUR(PL_parser->linestr) + len+highhalf);
f0e67a1d
Z
962 PL_parser->bufend += len+highhalf;
963 for (p = pv; p != e; p++) {
964 U8 c = (U8)*p;
965 if (c & 0x80) {
966 *bufptr++ = (char)(0xc0 | (c >> 6));
967 *bufptr++ = (char)(0x80 | (c & 0x3f));
968 } else {
969 *bufptr++ = (char)c;
970 }
971 }
972 }
973 } else {
974 if (flags & LEX_STUFF_UTF8) {
975 STRLEN highhalf = 0;
83aa740e 976 const char *p, *e = pv+len;
f0e67a1d
Z
977 for (p = pv; p != e; p++) {
978 U8 c = (U8)*p;
979 if (c >= 0xc4) {
980 Perl_croak(aTHX_ "Lexing code attempted to stuff "
981 "non-Latin-1 character into Latin-1 input");
982 } else if (c >= 0xc2 && p+1 != e &&
983 (((U8)p[1]) & 0xc0) == 0x80) {
984 p++;
985 highhalf++;
986 } else if (c >= 0x80) {
987 /* malformed UTF-8 */
988 ENTER;
989 SAVESPTR(PL_warnhook);
990 PL_warnhook = PERL_WARNHOOK_FATAL;
991 utf8n_to_uvuni((U8*)p, e-p, NULL, 0);
992 LEAVE;
993 }
994 }
995 if (!highhalf)
996 goto plain_copy;
997 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len-highhalf);
998 bufptr = PL_parser->bufptr;
999 Move(bufptr, bufptr+len-highhalf, PL_parser->bufend+1-bufptr, char);
255fdf19
Z
1000 SvCUR_set(PL_parser->linestr,
1001 SvCUR(PL_parser->linestr) + len-highhalf);
f0e67a1d
Z
1002 PL_parser->bufend += len-highhalf;
1003 for (p = pv; p != e; p++) {
1004 U8 c = (U8)*p;
1005 if (c & 0x80) {
1006 *bufptr++ = (char)(((c & 0x3) << 6) | (p[1] & 0x3f));
1007 p++;
1008 } else {
1009 *bufptr++ = (char)c;
1010 }
1011 }
1012 } else {
1013 plain_copy:
1014 lex_grow_linestr(SvCUR(PL_parser->linestr)+1+len);
1015 bufptr = PL_parser->bufptr;
1016 Move(bufptr, bufptr+len, PL_parser->bufend+1-bufptr, char);
255fdf19 1017 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) + len);
f0e67a1d
Z
1018 PL_parser->bufend += len;
1019 Copy(pv, bufptr, len, char);
1020 }
1021 }
1022}
1023
1024/*
1025=for apidoc Amx|void|lex_stuff_sv|SV *sv|U32 flags
1026
1027Insert characters into the lexer buffer (L</PL_parser-E<gt>linestr>),
1028immediately after the current lexing point (L</PL_parser-E<gt>bufptr>),
1029reallocating the buffer if necessary. This means that lexing code that
1030runs later will see the characters as if they had appeared in the input.
1031It is not recommended to do this as part of normal parsing, and most
1032uses of this facility run the risk of the inserted characters being
1033interpreted in an unintended manner.
1034
1035The string to be inserted is the string value of I<sv>. The characters
1036are recoded for the lexer buffer, according to how the buffer is currently
1037being interpreted (L</lex_bufutf8>). If a string to be interpreted is
1038not already a Perl scalar, the L</lex_stuff_pvn> function avoids the
1039need to construct a scalar.
1040
1041=cut
1042*/
1043
1044void
1045Perl_lex_stuff_sv(pTHX_ SV *sv, U32 flags)
1046{
1047 char *pv;
1048 STRLEN len;
1049 PERL_ARGS_ASSERT_LEX_STUFF_SV;
1050 if (flags)
1051 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_stuff_sv");
1052 pv = SvPV(sv, len);
1053 lex_stuff_pvn(pv, len, flags | (SvUTF8(sv) ? LEX_STUFF_UTF8 : 0));
1054}
1055
1056/*
1057=for apidoc Amx|void|lex_unstuff|char *ptr
1058
1059Discards text about to be lexed, from L</PL_parser-E<gt>bufptr> up to
1060I<ptr>. Text following I<ptr> will be moved, and the buffer shortened.
1061This hides the discarded text from any lexing code that runs later,
1062as if the text had never appeared.
1063
1064This is not the normal way to consume lexed text. For that, use
1065L</lex_read_to>.
1066
1067=cut
1068*/
1069
1070void
1071Perl_lex_unstuff(pTHX_ char *ptr)
1072{
1073 char *buf, *bufend;
1074 STRLEN unstuff_len;
1075 PERL_ARGS_ASSERT_LEX_UNSTUFF;
1076 buf = PL_parser->bufptr;
1077 if (ptr < buf)
1078 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1079 if (ptr == buf)
1080 return;
1081 bufend = PL_parser->bufend;
1082 if (ptr > bufend)
1083 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_unstuff");
1084 unstuff_len = ptr - buf;
1085 Move(ptr, buf, bufend+1-ptr, char);
1086 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - unstuff_len);
1087 PL_parser->bufend = bufend - unstuff_len;
1088}
1089
1090/*
1091=for apidoc Amx|void|lex_read_to|char *ptr
1092
1093Consume text in the lexer buffer, from L</PL_parser-E<gt>bufptr> up
1094to I<ptr>. This advances L</PL_parser-E<gt>bufptr> to match I<ptr>,
1095performing the correct bookkeeping whenever a newline character is passed.
1096This is the normal way to consume lexed text.
1097
1098Interpretation of the buffer's octets can be abstracted out by
1099using the slightly higher-level functions L</lex_peek_unichar> and
1100L</lex_read_unichar>.
1101
1102=cut
1103*/
1104
1105void
1106Perl_lex_read_to(pTHX_ char *ptr)
1107{
1108 char *s;
1109 PERL_ARGS_ASSERT_LEX_READ_TO;
1110 s = PL_parser->bufptr;
1111 if (ptr < s || ptr > PL_parser->bufend)
1112 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_to");
1113 for (; s != ptr; s++)
1114 if (*s == '\n') {
1115 CopLINE_inc(PL_curcop);
1116 PL_parser->linestart = s+1;
1117 }
1118 PL_parser->bufptr = ptr;
1119}
1120
1121/*
1122=for apidoc Amx|void|lex_discard_to|char *ptr
1123
1124Discards the first part of the L</PL_parser-E<gt>linestr> buffer,
1125up to I<ptr>. The remaining content of the buffer will be moved, and
1126all pointers into the buffer updated appropriately. I<ptr> must not
1127be later in the buffer than the position of L</PL_parser-E<gt>bufptr>:
1128it is not permitted to discard text that has yet to be lexed.
1129
1130Normally it is not necessarily to do this directly, because it suffices to
1131use the implicit discarding behaviour of L</lex_next_chunk> and things
1132based on it. However, if a token stretches across multiple lines,
1f317c95 1133and the lexing code has kept multiple lines of text in the buffer for
f0e67a1d
Z
1134that purpose, then after completion of the token it would be wise to
1135explicitly discard the now-unneeded earlier lines, to avoid future
1136multi-line tokens growing the buffer without bound.
1137
1138=cut
1139*/
1140
1141void
1142Perl_lex_discard_to(pTHX_ char *ptr)
1143{
1144 char *buf;
1145 STRLEN discard_len;
1146 PERL_ARGS_ASSERT_LEX_DISCARD_TO;
1147 buf = SvPVX(PL_parser->linestr);
1148 if (ptr < buf)
1149 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1150 if (ptr == buf)
1151 return;
1152 if (ptr > PL_parser->bufptr)
1153 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_discard_to");
1154 discard_len = ptr - buf;
1155 if (PL_parser->oldbufptr < ptr)
1156 PL_parser->oldbufptr = ptr;
1157 if (PL_parser->oldoldbufptr < ptr)
1158 PL_parser->oldoldbufptr = ptr;
1159 if (PL_parser->last_uni && PL_parser->last_uni < ptr)
1160 PL_parser->last_uni = NULL;
1161 if (PL_parser->last_lop && PL_parser->last_lop < ptr)
1162 PL_parser->last_lop = NULL;
1163 Move(ptr, buf, PL_parser->bufend+1-ptr, char);
1164 SvCUR_set(PL_parser->linestr, SvCUR(PL_parser->linestr) - discard_len);
1165 PL_parser->bufend -= discard_len;
1166 PL_parser->bufptr -= discard_len;
1167 PL_parser->oldbufptr -= discard_len;
1168 PL_parser->oldoldbufptr -= discard_len;
1169 if (PL_parser->last_uni)
1170 PL_parser->last_uni -= discard_len;
1171 if (PL_parser->last_lop)
1172 PL_parser->last_lop -= discard_len;
1173}
1174
1175/*
1176=for apidoc Amx|bool|lex_next_chunk|U32 flags
1177
1178Reads in the next chunk of text to be lexed, appending it to
1179L</PL_parser-E<gt>linestr>. This should be called when lexing code has
1180looked to the end of the current chunk and wants to know more. It is
1181usual, but not necessary, for lexing to have consumed the entirety of
1182the current chunk at this time.
1183
1184If L</PL_parser-E<gt>bufptr> is pointing to the very end of the current
1185chunk (i.e., the current chunk has been entirely consumed), normally the
1186current chunk will be discarded at the same time that the new chunk is
1187read in. If I<flags> includes C<LEX_KEEP_PREVIOUS>, the current chunk
1188will not be discarded. If the current chunk has not been entirely
1189consumed, then it will not be discarded regardless of the flag.
1190
1191Returns true if some new text was added to the buffer, or false if the
1192buffer has reached the end of the input text.
1193
1194=cut
1195*/
1196
1197#define LEX_FAKE_EOF 0x80000000
1198
1199bool
1200Perl_lex_next_chunk(pTHX_ U32 flags)
1201{
1202 SV *linestr;
1203 char *buf;
1204 STRLEN old_bufend_pos, new_bufend_pos;
1205 STRLEN bufptr_pos, oldbufptr_pos, oldoldbufptr_pos;
1206 STRLEN linestart_pos, last_uni_pos, last_lop_pos;
17cc9359 1207 bool got_some_for_debugger = 0;
f0e67a1d
Z
1208 bool got_some;
1209 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_FAKE_EOF))
1210 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_next_chunk");
f0e67a1d
Z
1211 linestr = PL_parser->linestr;
1212 buf = SvPVX(linestr);
1213 if (!(flags & LEX_KEEP_PREVIOUS) &&
1214 PL_parser->bufptr == PL_parser->bufend) {
1215 old_bufend_pos = bufptr_pos = oldbufptr_pos = oldoldbufptr_pos = 0;
1216 linestart_pos = 0;
1217 if (PL_parser->last_uni != PL_parser->bufend)
1218 PL_parser->last_uni = NULL;
1219 if (PL_parser->last_lop != PL_parser->bufend)
1220 PL_parser->last_lop = NULL;
1221 last_uni_pos = last_lop_pos = 0;
1222 *buf = 0;
1223 SvCUR(linestr) = 0;
1224 } else {
1225 old_bufend_pos = PL_parser->bufend - buf;
1226 bufptr_pos = PL_parser->bufptr - buf;
1227 oldbufptr_pos = PL_parser->oldbufptr - buf;
1228 oldoldbufptr_pos = PL_parser->oldoldbufptr - buf;
1229 linestart_pos = PL_parser->linestart - buf;
1230 last_uni_pos = PL_parser->last_uni ? PL_parser->last_uni - buf : 0;
1231 last_lop_pos = PL_parser->last_lop ? PL_parser->last_lop - buf : 0;
1232 }
1233 if (flags & LEX_FAKE_EOF) {
1234 goto eof;
1235 } else if (!PL_parser->rsfp) {
1236 got_some = 0;
1237 } else if (filter_gets(linestr, old_bufend_pos)) {
1238 got_some = 1;
17cc9359 1239 got_some_for_debugger = 1;
f0e67a1d 1240 } else {
580561a3
Z
1241 if (!SvPOK(linestr)) /* can get undefined by filter_gets */
1242 sv_setpvs(linestr, "");
f0e67a1d
Z
1243 eof:
1244 /* End of real input. Close filehandle (unless it was STDIN),
1245 * then add implicit termination.
1246 */
1247 if ((PerlIO*)PL_parser->rsfp == PerlIO_stdin())
1248 PerlIO_clearerr(PL_parser->rsfp);
1249 else if (PL_parser->rsfp)
1250 (void)PerlIO_close(PL_parser->rsfp);
1251 PL_parser->rsfp = NULL;
1252 PL_doextract = FALSE;
1253#ifdef PERL_MAD
1254 if (PL_madskills && !PL_in_eval && (PL_minus_p || PL_minus_n))
1255 PL_faketokens = 1;
1256#endif
1257 if (!PL_in_eval && PL_minus_p) {
1258 sv_catpvs(linestr,
1259 /*{*/";}continue{print or die qq(-p destination: $!\\n);}");
1260 PL_minus_n = PL_minus_p = 0;
1261 } else if (!PL_in_eval && PL_minus_n) {
1262 sv_catpvs(linestr, /*{*/";}");
1263 PL_minus_n = 0;
1264 } else
1265 sv_catpvs(linestr, ";");
1266 got_some = 1;
1267 }
1268 buf = SvPVX(linestr);
1269 new_bufend_pos = SvCUR(linestr);
1270 PL_parser->bufend = buf + new_bufend_pos;
1271 PL_parser->bufptr = buf + bufptr_pos;
1272 PL_parser->oldbufptr = buf + oldbufptr_pos;
1273 PL_parser->oldoldbufptr = buf + oldoldbufptr_pos;
1274 PL_parser->linestart = buf + linestart_pos;
1275 if (PL_parser->last_uni)
1276 PL_parser->last_uni = buf + last_uni_pos;
1277 if (PL_parser->last_lop)
1278 PL_parser->last_lop = buf + last_lop_pos;
17cc9359 1279 if (got_some_for_debugger && (PERLDB_LINE || PERLDB_SAVESRC) &&
f0e67a1d
Z
1280 PL_curstash != PL_debstash) {
1281 /* debugger active and we're not compiling the debugger code,
1282 * so store the line into the debugger's array of lines
1283 */
1284 update_debugger_info(NULL, buf+old_bufend_pos,
1285 new_bufend_pos-old_bufend_pos);
1286 }
1287 return got_some;
1288}
1289
1290/*
1291=for apidoc Amx|I32|lex_peek_unichar|U32 flags
1292
1293Looks ahead one (Unicode) character in the text currently being lexed.
1294Returns the codepoint (unsigned integer value) of the next character,
1295or -1 if lexing has reached the end of the input text. To consume the
1296peeked character, use L</lex_read_unichar>.
1297
1298If the next character is in (or extends into) the next chunk of input
1299text, the next chunk will be read in. Normally the current chunk will be
1300discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1301then the current chunk will not be discarded.
1302
1303If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1304is encountered, an exception is generated.
1305
1306=cut
1307*/
1308
1309I32
1310Perl_lex_peek_unichar(pTHX_ U32 flags)
1311{
749123ff 1312 dVAR;
f0e67a1d
Z
1313 char *s, *bufend;
1314 if (flags & ~(LEX_KEEP_PREVIOUS))
1315 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_peek_unichar");
1316 s = PL_parser->bufptr;
1317 bufend = PL_parser->bufend;
1318 if (UTF) {
1319 U8 head;
1320 I32 unichar;
1321 STRLEN len, retlen;
1322 if (s == bufend) {
1323 if (!lex_next_chunk(flags))
1324 return -1;
1325 s = PL_parser->bufptr;
1326 bufend = PL_parser->bufend;
1327 }
1328 head = (U8)*s;
1329 if (!(head & 0x80))
1330 return head;
1331 if (head & 0x40) {
1332 len = PL_utf8skip[head];
1333 while ((STRLEN)(bufend-s) < len) {
1334 if (!lex_next_chunk(flags | LEX_KEEP_PREVIOUS))
1335 break;
1336 s = PL_parser->bufptr;
1337 bufend = PL_parser->bufend;
1338 }
1339 }
1340 unichar = utf8n_to_uvuni((U8*)s, bufend-s, &retlen, UTF8_CHECK_ONLY);
1341 if (retlen == (STRLEN)-1) {
1342 /* malformed UTF-8 */
1343 ENTER;
1344 SAVESPTR(PL_warnhook);
1345 PL_warnhook = PERL_WARNHOOK_FATAL;
1346 utf8n_to_uvuni((U8*)s, bufend-s, NULL, 0);
1347 LEAVE;
1348 }
1349 return unichar;
1350 } else {
1351 if (s == bufend) {
1352 if (!lex_next_chunk(flags))
1353 return -1;
1354 s = PL_parser->bufptr;
1355 }
1356 return (U8)*s;
1357 }
1358}
1359
1360/*
1361=for apidoc Amx|I32|lex_read_unichar|U32 flags
1362
1363Reads the next (Unicode) character in the text currently being lexed.
1364Returns the codepoint (unsigned integer value) of the character read,
1365and moves L</PL_parser-E<gt>bufptr> past the character, or returns -1
1366if lexing has reached the end of the input text. To non-destructively
1367examine the next character, use L</lex_peek_unichar> instead.
1368
1369If the next character is in (or extends into) the next chunk of input
1370text, the next chunk will be read in. Normally the current chunk will be
1371discarded at the same time, but if I<flags> includes C<LEX_KEEP_PREVIOUS>
1372then the current chunk will not be discarded.
1373
1374If the input is being interpreted as UTF-8 and a UTF-8 encoding error
1375is encountered, an exception is generated.
1376
1377=cut
1378*/
1379
1380I32
1381Perl_lex_read_unichar(pTHX_ U32 flags)
1382{
1383 I32 c;
1384 if (flags & ~(LEX_KEEP_PREVIOUS))
1385 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_unichar");
1386 c = lex_peek_unichar(flags);
1387 if (c != -1) {
1388 if (c == '\n')
1389 CopLINE_inc(PL_curcop);
1390 PL_parser->bufptr += UTF8SKIP(PL_parser->bufptr);
1391 }
1392 return c;
1393}
1394
1395/*
1396=for apidoc Amx|void|lex_read_space|U32 flags
1397
1398Reads optional spaces, in Perl style, in the text currently being
1399lexed. The spaces may include ordinary whitespace characters and
1400Perl-style comments. C<#line> directives are processed if encountered.
1401L</PL_parser-E<gt>bufptr> is moved past the spaces, so that it points
1402at a non-space character (or the end of the input text).
1403
1404If spaces extend into the next chunk of input text, the next chunk will
1405be read in. Normally the current chunk will be discarded at the same
1406time, but if I<flags> includes C<LEX_KEEP_PREVIOUS> then the current
1407chunk will not be discarded.
1408
1409=cut
1410*/
1411
f0998909
Z
1412#define LEX_NO_NEXT_CHUNK 0x80000000
1413
f0e67a1d
Z
1414void
1415Perl_lex_read_space(pTHX_ U32 flags)
1416{
1417 char *s, *bufend;
1418 bool need_incline = 0;
f0998909 1419 if (flags & ~(LEX_KEEP_PREVIOUS|LEX_NO_NEXT_CHUNK))
f0e67a1d
Z
1420 Perl_croak(aTHX_ "Lexing code internal error (%s)", "lex_read_space");
1421#ifdef PERL_MAD
1422 if (PL_skipwhite) {
1423 sv_free(PL_skipwhite);
1424 PL_skipwhite = NULL;
1425 }
1426 if (PL_madskills)
1427 PL_skipwhite = newSVpvs("");
1428#endif /* PERL_MAD */
1429 s = PL_parser->bufptr;
1430 bufend = PL_parser->bufend;
1431 while (1) {
1432 char c = *s;
1433 if (c == '#') {
1434 do {
1435 c = *++s;
1436 } while (!(c == '\n' || (c == 0 && s == bufend)));
1437 } else if (c == '\n') {
1438 s++;
1439 PL_parser->linestart = s;
1440 if (s == bufend)
1441 need_incline = 1;
1442 else
1443 incline(s);
1444 } else if (isSPACE(c)) {
1445 s++;
1446 } else if (c == 0 && s == bufend) {
1447 bool got_more;
1448#ifdef PERL_MAD
1449 if (PL_madskills)
1450 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1451#endif /* PERL_MAD */
f0998909
Z
1452 if (flags & LEX_NO_NEXT_CHUNK)
1453 break;
f0e67a1d
Z
1454 PL_parser->bufptr = s;
1455 CopLINE_inc(PL_curcop);
1456 got_more = lex_next_chunk(flags);
1457 CopLINE_dec(PL_curcop);
1458 s = PL_parser->bufptr;
1459 bufend = PL_parser->bufend;
1460 if (!got_more)
1461 break;
1462 if (need_incline && PL_parser->rsfp) {
1463 incline(s);
1464 need_incline = 0;
1465 }
1466 } else {
1467 break;
1468 }
1469 }
1470#ifdef PERL_MAD
1471 if (PL_madskills)
1472 sv_catpvn(PL_skipwhite, PL_parser->bufptr, s-PL_parser->bufptr);
1473#endif /* PERL_MAD */
1474 PL_parser->bufptr = s;
1475}
1476
1477/*
ffb4593c
NT
1478 * S_incline
1479 * This subroutine has nothing to do with tilting, whether at windmills
1480 * or pinball tables. Its name is short for "increment line". It
57843af0 1481 * increments the current line number in CopLINE(PL_curcop) and checks
ffb4593c 1482 * to see whether the line starts with a comment of the form
9cbb5ea2
GS
1483 * # line 500 "foo.pm"
1484 * If so, it sets the current line number and file to the values in the comment.
ffb4593c
NT
1485 */
1486
76e3520e 1487STATIC void
d9095cec 1488S_incline(pTHX_ const char *s)
463ee0b2 1489{
97aff369 1490 dVAR;
d9095cec
NC
1491 const char *t;
1492 const char *n;
1493 const char *e;
463ee0b2 1494
7918f24d
NC
1495 PERL_ARGS_ASSERT_INCLINE;
1496
57843af0 1497 CopLINE_inc(PL_curcop);
463ee0b2
LW
1498 if (*s++ != '#')
1499 return;
d4c19fe8
AL
1500 while (SPACE_OR_TAB(*s))
1501 s++;
73659bf1
GS
1502 if (strnEQ(s, "line", 4))
1503 s += 4;
1504 else
1505 return;
084592ab 1506 if (SPACE_OR_TAB(*s))
73659bf1 1507 s++;
4e553d73 1508 else
73659bf1 1509 return;
d4c19fe8
AL
1510 while (SPACE_OR_TAB(*s))
1511 s++;
463ee0b2
LW
1512 if (!isDIGIT(*s))
1513 return;
d4c19fe8 1514
463ee0b2
LW
1515 n = s;
1516 while (isDIGIT(*s))
1517 s++;
07714eb4 1518 if (!SPACE_OR_TAB(*s) && *s != '\r' && *s != '\n' && *s != '\0')
26b6dc3f 1519 return;
bf4acbe4 1520 while (SPACE_OR_TAB(*s))
463ee0b2 1521 s++;
73659bf1 1522 if (*s == '"' && (t = strchr(s+1, '"'))) {
463ee0b2 1523 s++;
73659bf1
GS
1524 e = t + 1;
1525 }
463ee0b2 1526 else {
c35e046a
AL
1527 t = s;
1528 while (!isSPACE(*t))
1529 t++;
73659bf1 1530 e = t;
463ee0b2 1531 }
bf4acbe4 1532 while (SPACE_OR_TAB(*e) || *e == '\r' || *e == '\f')
73659bf1
GS
1533 e++;
1534 if (*e != '\n' && *e != '\0')
1535 return; /* false alarm */
1536
f4dd75d9 1537 if (t - s > 0) {
d9095cec 1538 const STRLEN len = t - s;
8a5ee598 1539#ifndef USE_ITHREADS
19bad673
NC
1540 SV *const temp_sv = CopFILESV(PL_curcop);
1541 const char *cf;
1542 STRLEN tmplen;
1543
1544 if (temp_sv) {
1545 cf = SvPVX(temp_sv);
1546 tmplen = SvCUR(temp_sv);
1547 } else {
1548 cf = NULL;
1549 tmplen = 0;
1550 }
1551
42d9b98d 1552 if (tmplen > 7 && strnEQ(cf, "(eval ", 6)) {
e66cf94c
RGS
1553 /* must copy *{"::_<(eval N)[oldfilename:L]"}
1554 * to *{"::_<newfilename"} */
44867030
NC
1555 /* However, the long form of evals is only turned on by the
1556 debugger - usually they're "(eval %lu)" */
1557 char smallbuf[128];
1558 char *tmpbuf;
1559 GV **gvp;
d9095cec 1560 STRLEN tmplen2 = len;
798b63bc 1561 if (tmplen + 2 <= sizeof smallbuf)
e66cf94c
RGS
1562 tmpbuf = smallbuf;
1563 else
2ae0db35 1564 Newx(tmpbuf, tmplen + 2, char);
44867030
NC
1565 tmpbuf[0] = '_';
1566 tmpbuf[1] = '<';
2ae0db35 1567 memcpy(tmpbuf + 2, cf, tmplen);
44867030 1568 tmplen += 2;
8a5ee598
RGS
1569 gvp = (GV**)hv_fetch(PL_defstash, tmpbuf, tmplen, FALSE);
1570 if (gvp) {
44867030
NC
1571 char *tmpbuf2;
1572 GV *gv2;
1573
1574 if (tmplen2 + 2 <= sizeof smallbuf)
1575 tmpbuf2 = smallbuf;
1576 else
1577 Newx(tmpbuf2, tmplen2 + 2, char);
1578
1579 if (tmpbuf2 != smallbuf || tmpbuf != smallbuf) {
1580 /* Either they malloc'd it, or we malloc'd it,
1581 so no prefix is present in ours. */
1582 tmpbuf2[0] = '_';
1583 tmpbuf2[1] = '<';
1584 }
1585
1586 memcpy(tmpbuf2 + 2, s, tmplen2);
1587 tmplen2 += 2;
1588
8a5ee598 1589 gv2 = *(GV**)hv_fetch(PL_defstash, tmpbuf2, tmplen2, TRUE);
e5527e4b 1590 if (!isGV(gv2)) {
8a5ee598 1591 gv_init(gv2, PL_defstash, tmpbuf2, tmplen2, FALSE);
e5527e4b
RGS
1592 /* adjust ${"::_<newfilename"} to store the new file name */
1593 GvSV(gv2) = newSVpvn(tmpbuf2 + 2, tmplen2 - 2);
3cb1dbc6
NC
1594 GvHV(gv2) = MUTABLE_HV(SvREFCNT_inc(GvHV(*gvp)));
1595 GvAV(gv2) = MUTABLE_AV(SvREFCNT_inc(GvAV(*gvp)));
e5527e4b 1596 }
44867030
NC
1597
1598 if (tmpbuf2 != smallbuf) Safefree(tmpbuf2);
8a5ee598 1599 }
e66cf94c 1600 if (tmpbuf != smallbuf) Safefree(tmpbuf);
e66cf94c 1601 }
8a5ee598 1602#endif
05ec9bb3 1603 CopFILE_free(PL_curcop);
d9095cec 1604 CopFILE_setn(PL_curcop, s, len);
f4dd75d9 1605 }
57843af0 1606 CopLINE_set(PL_curcop, atoi(n)-1);
463ee0b2
LW
1607}
1608
29595ff2 1609#ifdef PERL_MAD
cd81e915 1610/* skip space before PL_thistoken */
29595ff2
NC
1611
1612STATIC char *
1613S_skipspace0(pTHX_ register char *s)
1614{
7918f24d
NC
1615 PERL_ARGS_ASSERT_SKIPSPACE0;
1616
29595ff2
NC
1617 s = skipspace(s);
1618 if (!PL_madskills)
1619 return s;
cd81e915
NC
1620 if (PL_skipwhite) {
1621 if (!PL_thiswhite)
6b29d1f5 1622 PL_thiswhite = newSVpvs("");
cd81e915
NC
1623 sv_catsv(PL_thiswhite, PL_skipwhite);
1624 sv_free(PL_skipwhite);
1625 PL_skipwhite = 0;
1626 }
1627 PL_realtokenstart = s - SvPVX(PL_linestr);
29595ff2
NC
1628 return s;
1629}
1630
cd81e915 1631/* skip space after PL_thistoken */
29595ff2
NC
1632
1633STATIC char *
1634S_skipspace1(pTHX_ register char *s)
1635{
d4c19fe8 1636 const char *start = s;
29595ff2
NC
1637 I32 startoff = start - SvPVX(PL_linestr);
1638
7918f24d
NC
1639 PERL_ARGS_ASSERT_SKIPSPACE1;
1640
29595ff2
NC
1641 s = skipspace(s);
1642 if (!PL_madskills)
1643 return s;
1644 start = SvPVX(PL_linestr) + startoff;
cd81e915 1645 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1646 const char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1647 PL_thistoken = newSVpvn(tstart, start - tstart);
1648 }
1649 PL_realtokenstart = -1;
1650 if (PL_skipwhite) {
1651 if (!PL_nextwhite)
6b29d1f5 1652 PL_nextwhite = newSVpvs("");
cd81e915
NC
1653 sv_catsv(PL_nextwhite, PL_skipwhite);
1654 sv_free(PL_skipwhite);
1655 PL_skipwhite = 0;
29595ff2
NC
1656 }
1657 return s;
1658}
1659
1660STATIC char *
1661S_skipspace2(pTHX_ register char *s, SV **svp)
1662{
c35e046a
AL
1663 char *start;
1664 const I32 bufptroff = PL_bufptr - SvPVX(PL_linestr);
1665 const I32 startoff = s - SvPVX(PL_linestr);
1666
7918f24d
NC
1667 PERL_ARGS_ASSERT_SKIPSPACE2;
1668
29595ff2
NC
1669 s = skipspace(s);
1670 PL_bufptr = SvPVX(PL_linestr) + bufptroff;
1671 if (!PL_madskills || !svp)
1672 return s;
1673 start = SvPVX(PL_linestr) + startoff;
cd81e915 1674 if (!PL_thistoken && PL_realtokenstart >= 0) {
d4c19fe8 1675 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915
NC
1676 PL_thistoken = newSVpvn(tstart, start - tstart);
1677 PL_realtokenstart = -1;
29595ff2 1678 }
cd81e915 1679 if (PL_skipwhite) {
29595ff2 1680 if (!*svp)
6b29d1f5 1681 *svp = newSVpvs("");
cd81e915
NC
1682 sv_setsv(*svp, PL_skipwhite);
1683 sv_free(PL_skipwhite);
1684 PL_skipwhite = 0;
29595ff2
NC
1685 }
1686
1687 return s;
1688}
1689#endif
1690
80a702cd 1691STATIC void
15f169a1 1692S_update_debugger_info(pTHX_ SV *orig_sv, const char *const buf, STRLEN len)
80a702cd
RGS
1693{
1694 AV *av = CopFILEAVx(PL_curcop);
1695 if (av) {
b9f83d2f 1696 SV * const sv = newSV_type(SVt_PVMG);
5fa550fb
NC
1697 if (orig_sv)
1698 sv_setsv(sv, orig_sv);
1699 else
1700 sv_setpvn(sv, buf, len);
80a702cd
RGS
1701 (void)SvIOK_on(sv);
1702 SvIV_set(sv, 0);
1703 av_store(av, (I32)CopLINE(PL_curcop), sv);
1704 }
1705}
1706
ffb4593c
NT
1707/*
1708 * S_skipspace
1709 * Called to gobble the appropriate amount and type of whitespace.
1710 * Skips comments as well.
1711 */
1712
76e3520e 1713STATIC char *
cea2e8a9 1714S_skipspace(pTHX_ register char *s)
a687059c 1715{
5db06880 1716#ifdef PERL_MAD
f0e67a1d
Z
1717 char *start = s;
1718#endif /* PERL_MAD */
7918f24d 1719 PERL_ARGS_ASSERT_SKIPSPACE;
f0e67a1d 1720#ifdef PERL_MAD
cd81e915
NC
1721 if (PL_skipwhite) {
1722 sv_free(PL_skipwhite);
f0e67a1d 1723 PL_skipwhite = NULL;
5db06880 1724 }
f0e67a1d 1725#endif /* PERL_MAD */
3280af22 1726 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
bf4acbe4 1727 while (s < PL_bufend && SPACE_OR_TAB(*s))
463ee0b2 1728 s++;
f0e67a1d
Z
1729 } else {
1730 STRLEN bufptr_pos = PL_bufptr - SvPVX(PL_linestr);
1731 PL_bufptr = s;
f0998909
Z
1732 lex_read_space(LEX_KEEP_PREVIOUS |
1733 (PL_sublex_info.sub_inwhat || PL_lex_state == LEX_FORMLINE ?
1734 LEX_NO_NEXT_CHUNK : 0));
3280af22 1735 s = PL_bufptr;
f0e67a1d
Z
1736 PL_bufptr = SvPVX(PL_linestr) + bufptr_pos;
1737 if (PL_linestart > PL_bufptr)
1738 PL_bufptr = PL_linestart;
1739 return s;
463ee0b2 1740 }
5db06880 1741#ifdef PERL_MAD
f0e67a1d
Z
1742 if (PL_madskills)
1743 PL_skipwhite = newSVpvn(start, s-start);
1744#endif /* PERL_MAD */
5db06880 1745 return s;
a687059c 1746}
378cc40b 1747
ffb4593c
NT
1748/*
1749 * S_check_uni
1750 * Check the unary operators to ensure there's no ambiguity in how they're
1751 * used. An ambiguous piece of code would be:
1752 * rand + 5
1753 * This doesn't mean rand() + 5. Because rand() is a unary operator,
1754 * the +5 is its argument.
1755 */
1756
76e3520e 1757STATIC void
cea2e8a9 1758S_check_uni(pTHX)
ba106d47 1759{
97aff369 1760 dVAR;
d4c19fe8
AL
1761 const char *s;
1762 const char *t;
2f3197b3 1763
3280af22 1764 if (PL_oldoldbufptr != PL_last_uni)
2f3197b3 1765 return;
3280af22
NIS
1766 while (isSPACE(*PL_last_uni))
1767 PL_last_uni++;
c35e046a
AL
1768 s = PL_last_uni;
1769 while (isALNUM_lazy_if(s,UTF) || *s == '-')
1770 s++;
3280af22 1771 if ((t = strchr(s, '(')) && t < PL_bufptr)
a0d0e21e 1772 return;
6136c704 1773
9b387841
NC
1774 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
1775 "Warning: Use of \"%.*s\" without parentheses is ambiguous",
1776 (int)(s - PL_last_uni), PL_last_uni);
2f3197b3
LW
1777}
1778
ffb4593c
NT
1779/*
1780 * LOP : macro to build a list operator. Its behaviour has been replaced
1781 * with a subroutine, S_lop() for which LOP is just another name.
1782 */
1783
a0d0e21e
LW
1784#define LOP(f,x) return lop(f,x,s)
1785
ffb4593c
NT
1786/*
1787 * S_lop
1788 * Build a list operator (or something that might be one). The rules:
1789 * - if we have a next token, then it's a list operator [why?]
1790 * - if the next thing is an opening paren, then it's a function
1791 * - else it's a list operator
1792 */
1793
76e3520e 1794STATIC I32
a0be28da 1795S_lop(pTHX_ I32 f, int x, char *s)
ffed7fef 1796{
97aff369 1797 dVAR;
7918f24d
NC
1798
1799 PERL_ARGS_ASSERT_LOP;
1800
6154021b 1801 pl_yylval.ival = f;
35c8bce7 1802 CLINE;
3280af22
NIS
1803 PL_expect = x;
1804 PL_bufptr = s;
1805 PL_last_lop = PL_oldbufptr;
eb160463 1806 PL_last_lop_op = (OPCODE)f;
5db06880
NC
1807#ifdef PERL_MAD
1808 if (PL_lasttoke)
1809 return REPORT(LSTOP);
1810#else
3280af22 1811 if (PL_nexttoke)
bbf60fe6 1812 return REPORT(LSTOP);
5db06880 1813#endif
79072805 1814 if (*s == '(')
bbf60fe6 1815 return REPORT(FUNC);
29595ff2 1816 s = PEEKSPACE(s);
79072805 1817 if (*s == '(')
bbf60fe6 1818 return REPORT(FUNC);
79072805 1819 else
bbf60fe6 1820 return REPORT(LSTOP);
79072805
LW
1821}
1822
5db06880
NC
1823#ifdef PERL_MAD
1824 /*
1825 * S_start_force
1826 * Sets up for an eventual force_next(). start_force(0) basically does
1827 * an unshift, while start_force(-1) does a push. yylex removes items
1828 * on the "pop" end.
1829 */
1830
1831STATIC void
1832S_start_force(pTHX_ int where)
1833{
1834 int i;
1835
cd81e915 1836 if (where < 0) /* so people can duplicate start_force(PL_curforce) */
5db06880 1837 where = PL_lasttoke;
cd81e915
NC
1838 assert(PL_curforce < 0 || PL_curforce == where);
1839 if (PL_curforce != where) {
5db06880
NC
1840 for (i = PL_lasttoke; i > where; --i) {
1841 PL_nexttoke[i] = PL_nexttoke[i-1];
1842 }
1843 PL_lasttoke++;
1844 }
cd81e915 1845 if (PL_curforce < 0) /* in case of duplicate start_force() */
5db06880 1846 Zero(&PL_nexttoke[where], 1, NEXTTOKE);
cd81e915
NC
1847 PL_curforce = where;
1848 if (PL_nextwhite) {
5db06880 1849 if (PL_madskills)
6b29d1f5 1850 curmad('^', newSVpvs(""));
cd81e915 1851 CURMAD('_', PL_nextwhite);
5db06880
NC
1852 }
1853}
1854
1855STATIC void
1856S_curmad(pTHX_ char slot, SV *sv)
1857{
1858 MADPROP **where;
1859
1860 if (!sv)
1861 return;
cd81e915
NC
1862 if (PL_curforce < 0)
1863 where = &PL_thismad;
5db06880 1864 else
cd81e915 1865 where = &PL_nexttoke[PL_curforce].next_mad;
5db06880 1866
cd81e915 1867 if (PL_faketokens)
76f68e9b 1868 sv_setpvs(sv, "");
5db06880
NC
1869 else {
1870 if (!IN_BYTES) {
1871 if (UTF && is_utf8_string((U8*)SvPVX(sv), SvCUR(sv)))
1872 SvUTF8_on(sv);
1873 else if (PL_encoding) {
1874 sv_recode_to_utf8(sv, PL_encoding);
1875 }
1876 }
1877 }
1878
1879 /* keep a slot open for the head of the list? */
1880 if (slot != '_' && *where && (*where)->mad_key == '^') {
1881 (*where)->mad_key = slot;
daba3364 1882 sv_free(MUTABLE_SV(((*where)->mad_val)));
5db06880
NC
1883 (*where)->mad_val = (void*)sv;
1884 }
1885 else
1886 addmad(newMADsv(slot, sv), where, 0);
1887}
1888#else
b3f24c00
MHM
1889# define start_force(where) NOOP
1890# define curmad(slot, sv) NOOP
5db06880
NC
1891#endif
1892
ffb4593c
NT
1893/*
1894 * S_force_next
9cbb5ea2 1895 * When the lexer realizes it knows the next token (for instance,
ffb4593c 1896 * it is reordering tokens for the parser) then it can call S_force_next
9cbb5ea2 1897 * to know what token to return the next time the lexer is called. Caller
5db06880
NC
1898 * will need to set PL_nextval[] (or PL_nexttoke[].next_val with PERL_MAD),
1899 * and possibly PL_expect to ensure the lexer handles the token correctly.
ffb4593c
NT
1900 */
1901
4e553d73 1902STATIC void
cea2e8a9 1903S_force_next(pTHX_ I32 type)
79072805 1904{
97aff369 1905 dVAR;
704d4215
GG
1906#ifdef DEBUGGING
1907 if (DEBUG_T_TEST) {
1908 PerlIO_printf(Perl_debug_log, "### forced token:\n");
f05d7009 1909 tokereport(type, &NEXTVAL_NEXTTOKE);
704d4215
GG
1910 }
1911#endif
5db06880 1912#ifdef PERL_MAD
cd81e915 1913 if (PL_curforce < 0)
5db06880 1914 start_force(PL_lasttoke);
cd81e915 1915 PL_nexttoke[PL_curforce].next_type = type;
5db06880
NC
1916 if (PL_lex_state != LEX_KNOWNEXT)
1917 PL_lex_defer = PL_lex_state;
1918 PL_lex_state = LEX_KNOWNEXT;
1919 PL_lex_expect = PL_expect;
cd81e915 1920 PL_curforce = -1;
5db06880 1921#else
3280af22
NIS
1922 PL_nexttype[PL_nexttoke] = type;
1923 PL_nexttoke++;
1924 if (PL_lex_state != LEX_KNOWNEXT) {
1925 PL_lex_defer = PL_lex_state;
1926 PL_lex_expect = PL_expect;
1927 PL_lex_state = LEX_KNOWNEXT;
79072805 1928 }
5db06880 1929#endif
79072805
LW
1930}
1931
d0a148a6 1932STATIC SV *
15f169a1 1933S_newSV_maybe_utf8(pTHX_ const char *const start, STRLEN len)
d0a148a6 1934{
97aff369 1935 dVAR;
740cce10 1936 SV * const sv = newSVpvn_utf8(start, len,
eaf7a4d2
CS
1937 !IN_BYTES
1938 && UTF
1939 && !is_ascii_string((const U8*)start, len)
740cce10 1940 && is_utf8_string((const U8*)start, len));
d0a148a6
NC
1941 return sv;
1942}
1943
ffb4593c
NT
1944/*
1945 * S_force_word
1946 * When the lexer knows the next thing is a word (for instance, it has
1947 * just seen -> and it knows that the next char is a word char, then
02b34bbe
DM
1948 * it calls S_force_word to stick the next word into the PL_nexttoke/val
1949 * lookahead.
ffb4593c
NT
1950 *
1951 * Arguments:
b1b65b59 1952 * char *start : buffer position (must be within PL_linestr)
02b34bbe 1953 * int token : PL_next* will be this type of bare word (e.g., METHOD,WORD)
ffb4593c
NT
1954 * int check_keyword : if true, Perl checks to make sure the word isn't
1955 * a keyword (do this if the word is a label, e.g. goto FOO)
1956 * int allow_pack : if true, : characters will also be allowed (require,
1957 * use, etc. do this)
9cbb5ea2 1958 * int allow_initial_tick : used by the "sub" lexer only.
ffb4593c
NT
1959 */
1960
76e3520e 1961STATIC char *
cea2e8a9 1962S_force_word(pTHX_ register char *start, int token, int check_keyword, int allow_pack, int allow_initial_tick)
79072805 1963{
97aff369 1964 dVAR;
463ee0b2
LW
1965 register char *s;
1966 STRLEN len;
4e553d73 1967
7918f24d
NC
1968 PERL_ARGS_ASSERT_FORCE_WORD;
1969
29595ff2 1970 start = SKIPSPACE1(start);
463ee0b2 1971 s = start;
7e2040f0 1972 if (isIDFIRST_lazy_if(s,UTF) ||
a0d0e21e 1973 (allow_pack && *s == ':') ||
15f0808c 1974 (allow_initial_tick && *s == '\'') )
a0d0e21e 1975 {
3280af22 1976 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, allow_pack, &len);
5458a98a 1977 if (check_keyword && keyword(PL_tokenbuf, len, 0))
463ee0b2 1978 return start;
cd81e915 1979 start_force(PL_curforce);
5db06880
NC
1980 if (PL_madskills)
1981 curmad('X', newSVpvn(start,s-start));
463ee0b2 1982 if (token == METHOD) {
29595ff2 1983 s = SKIPSPACE1(s);
463ee0b2 1984 if (*s == '(')
3280af22 1985 PL_expect = XTERM;
463ee0b2 1986 else {
3280af22 1987 PL_expect = XOPERATOR;
463ee0b2 1988 }
79072805 1989 }
e74e6b3d 1990 if (PL_madskills)
63575281 1991 curmad('g', newSVpvs( "forced" ));
9ded7720 1992 NEXTVAL_NEXTTOKE.opval
d0a148a6
NC
1993 = (OP*)newSVOP(OP_CONST,0,
1994 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
9ded7720 1995 NEXTVAL_NEXTTOKE.opval->op_private |= OPpCONST_BARE;
79072805
LW
1996 force_next(token);
1997 }
1998 return s;
1999}
2000
ffb4593c
NT
2001/*
2002 * S_force_ident
9cbb5ea2 2003 * Called when the lexer wants $foo *foo &foo etc, but the program
ffb4593c
NT
2004 * text only contains the "foo" portion. The first argument is a pointer
2005 * to the "foo", and the second argument is the type symbol to prefix.
2006 * Forces the next token to be a "WORD".
9cbb5ea2 2007 * Creates the symbol if it didn't already exist (via gv_fetchpv()).
ffb4593c
NT
2008 */
2009
76e3520e 2010STATIC void
bfed75c6 2011S_force_ident(pTHX_ register const char *s, int kind)
79072805 2012{
97aff369 2013 dVAR;
7918f24d
NC
2014
2015 PERL_ARGS_ASSERT_FORCE_IDENT;
2016
c35e046a 2017 if (*s) {
90e5519e
NC
2018 const STRLEN len = strlen(s);
2019 OP* const o = (OP*)newSVOP(OP_CONST, 0, newSVpvn(s, len));
cd81e915 2020 start_force(PL_curforce);
9ded7720 2021 NEXTVAL_NEXTTOKE.opval = o;
79072805 2022 force_next(WORD);
748a9306 2023 if (kind) {
11343788 2024 o->op_private = OPpCONST_ENTERED;
55497cff 2025 /* XXX see note in pp_entereval() for why we forgo typo
2026 warnings if the symbol must be introduced in an eval.
2027 GSAR 96-10-12 */
90e5519e
NC
2028 gv_fetchpvn_flags(s, len,
2029 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL)
2030 : GV_ADD,
2031 kind == '$' ? SVt_PV :
2032 kind == '@' ? SVt_PVAV :
2033 kind == '%' ? SVt_PVHV :
a0d0e21e 2034 SVt_PVGV
90e5519e 2035 );
748a9306 2036 }
79072805
LW
2037 }
2038}
2039
1571675a
GS
2040NV
2041Perl_str_to_version(pTHX_ SV *sv)
2042{
2043 NV retval = 0.0;
2044 NV nshift = 1.0;
2045 STRLEN len;
cfd0369c 2046 const char *start = SvPV_const(sv,len);
9d4ba2ae 2047 const char * const end = start + len;
504618e9 2048 const bool utf = SvUTF8(sv) ? TRUE : FALSE;
7918f24d
NC
2049
2050 PERL_ARGS_ASSERT_STR_TO_VERSION;
2051
1571675a 2052 while (start < end) {
ba210ebe 2053 STRLEN skip;
1571675a
GS
2054 UV n;
2055 if (utf)
9041c2e3 2056 n = utf8n_to_uvchr((U8*)start, len, &skip, 0);
1571675a
GS
2057 else {
2058 n = *(U8*)start;
2059 skip = 1;
2060 }
2061 retval += ((NV)n)/nshift;
2062 start += skip;
2063 nshift *= 1000;
2064 }
2065 return retval;
2066}
2067
4e553d73 2068/*
ffb4593c
NT
2069 * S_force_version
2070 * Forces the next token to be a version number.
e759cc13
RGS
2071 * If the next token appears to be an invalid version number, (e.g. "v2b"),
2072 * and if "guessing" is TRUE, then no new token is created (and the caller
2073 * must use an alternative parsing method).
ffb4593c
NT
2074 */
2075
76e3520e 2076STATIC char *
e759cc13 2077S_force_version(pTHX_ char *s, int guessing)
89bfa8cd 2078{
97aff369 2079 dVAR;
5f66b61c 2080 OP *version = NULL;
44dcb63b 2081 char *d;
5db06880
NC
2082#ifdef PERL_MAD
2083 I32 startoff = s - SvPVX(PL_linestr);
2084#endif
89bfa8cd 2085
7918f24d
NC
2086 PERL_ARGS_ASSERT_FORCE_VERSION;
2087
29595ff2 2088 s = SKIPSPACE1(s);
89bfa8cd 2089
44dcb63b 2090 d = s;
dd629d5b 2091 if (*d == 'v')
44dcb63b 2092 d++;
44dcb63b 2093 if (isDIGIT(*d)) {
e759cc13
RGS
2094 while (isDIGIT(*d) || *d == '_' || *d == '.')
2095 d++;
5db06880
NC
2096#ifdef PERL_MAD
2097 if (PL_madskills) {
cd81e915 2098 start_force(PL_curforce);
5db06880
NC
2099 curmad('X', newSVpvn(s,d-s));
2100 }
2101#endif
4e4da3ac 2102 if (*d == ';' || isSPACE(*d) || *d == '{' || *d == '}' || !*d) {
dd629d5b 2103 SV *ver;
8d08d9ba
DG
2104#ifdef USE_LOCALE_NUMERIC
2105 char *loc = setlocale(LC_NUMERIC, "C");
2106#endif
6154021b 2107 s = scan_num(s, &pl_yylval);
8d08d9ba
DG
2108#ifdef USE_LOCALE_NUMERIC
2109 setlocale(LC_NUMERIC, loc);
2110#endif
6154021b 2111 version = pl_yylval.opval;
dd629d5b
GS
2112 ver = cSVOPx(version)->op_sv;
2113 if (SvPOK(ver) && !SvNIOK(ver)) {
862a34c6 2114 SvUPGRADE(ver, SVt_PVNV);
9d6ce603 2115 SvNV_set(ver, str_to_version(ver));
1571675a 2116 SvNOK_on(ver); /* hint that it is a version */
44dcb63b 2117 }
89bfa8cd 2118 }
5db06880
NC
2119 else if (guessing) {
2120#ifdef PERL_MAD
2121 if (PL_madskills) {
cd81e915
NC
2122 sv_free(PL_nextwhite); /* let next token collect whitespace */
2123 PL_nextwhite = 0;
5db06880
NC
2124 s = SvPVX(PL_linestr) + startoff;
2125 }
2126#endif
e759cc13 2127 return s;
5db06880 2128 }
89bfa8cd 2129 }
2130
5db06880
NC
2131#ifdef PERL_MAD
2132 if (PL_madskills && !version) {
cd81e915
NC
2133 sv_free(PL_nextwhite); /* let next token collect whitespace */
2134 PL_nextwhite = 0;
5db06880
NC
2135 s = SvPVX(PL_linestr) + startoff;
2136 }
2137#endif
89bfa8cd 2138 /* NOTE: The parser sees the package name and the VERSION swapped */
cd81e915 2139 start_force(PL_curforce);
9ded7720 2140 NEXTVAL_NEXTTOKE.opval = version;
4e553d73 2141 force_next(WORD);
89bfa8cd 2142
e759cc13 2143 return s;
89bfa8cd 2144}
2145
ffb4593c 2146/*
91152fc1
DG
2147 * S_force_strict_version
2148 * Forces the next token to be a version number using strict syntax rules.
2149 */
2150
2151STATIC char *
2152S_force_strict_version(pTHX_ char *s)
2153{
2154 dVAR;
2155 OP *version = NULL;
2156#ifdef PERL_MAD
2157 I32 startoff = s - SvPVX(PL_linestr);
2158#endif
2159 const char *errstr = NULL;
2160
2161 PERL_ARGS_ASSERT_FORCE_STRICT_VERSION;
2162
2163 while (isSPACE(*s)) /* leading whitespace */
2164 s++;
2165
2166 if (is_STRICT_VERSION(s,&errstr)) {
2167 SV *ver = newSV(0);
2168 s = (char *)scan_version(s, ver, 0);
2169 version = newSVOP(OP_CONST, 0, ver);
2170 }
4e4da3ac
Z
2171 else if ( (*s != ';' && *s != '{' && *s != '}' ) &&
2172 (s = SKIPSPACE1(s), (*s != ';' && *s != '{' && *s != '}' )))
2173 {
91152fc1
DG
2174 PL_bufptr = s;
2175 if (errstr)
2176 yyerror(errstr); /* version required */
2177 return s;
2178 }
2179
2180#ifdef PERL_MAD
2181 if (PL_madskills && !version) {
2182 sv_free(PL_nextwhite); /* let next token collect whitespace */
2183 PL_nextwhite = 0;
2184 s = SvPVX(PL_linestr) + startoff;
2185 }
2186#endif
2187 /* NOTE: The parser sees the package name and the VERSION swapped */
2188 start_force(PL_curforce);
2189 NEXTVAL_NEXTTOKE.opval = version;
2190 force_next(WORD);
2191
2192 return s;
2193}
2194
2195/*
ffb4593c
NT
2196 * S_tokeq
2197 * Tokenize a quoted string passed in as an SV. It finds the next
2198 * chunk, up to end of string or a backslash. It may make a new
2199 * SV containing that chunk (if HINT_NEW_STRING is on). It also
2200 * turns \\ into \.
2201 */
2202
76e3520e 2203STATIC SV *
cea2e8a9 2204S_tokeq(pTHX_ SV *sv)
79072805 2205{
97aff369 2206 dVAR;
79072805
LW
2207 register char *s;
2208 register char *send;
2209 register char *d;
b3ac6de7
IZ
2210 STRLEN len = 0;
2211 SV *pv = sv;
79072805 2212
7918f24d
NC
2213 PERL_ARGS_ASSERT_TOKEQ;
2214
79072805 2215 if (!SvLEN(sv))
b3ac6de7 2216 goto finish;
79072805 2217
a0d0e21e 2218 s = SvPV_force(sv, len);
21a311ee 2219 if (SvTYPE(sv) >= SVt_PVIV && SvIVX(sv) == -1)
b3ac6de7 2220 goto finish;
463ee0b2 2221 send = s + len;
79072805
LW
2222 while (s < send && *s != '\\')
2223 s++;
2224 if (s == send)
b3ac6de7 2225 goto finish;
79072805 2226 d = s;
be4731d2 2227 if ( PL_hints & HINT_NEW_STRING ) {
59cd0e26 2228 pv = newSVpvn_flags(SvPVX_const(pv), len, SVs_TEMP | SvUTF8(sv));
be4731d2 2229 }
79072805
LW
2230 while (s < send) {
2231 if (*s == '\\') {
a0d0e21e 2232 if (s + 1 < send && (s[1] == '\\'))
79072805
LW
2233 s++; /* all that, just for this */
2234 }
2235 *d++ = *s++;
2236 }
2237 *d = '\0';
95a20fc0 2238 SvCUR_set(sv, d - SvPVX_const(sv));
b3ac6de7 2239 finish:
3280af22 2240 if ( PL_hints & HINT_NEW_STRING )
eb0d8d16 2241 return new_constant(NULL, 0, "q", sv, pv, "q", 1);
79072805
LW
2242 return sv;
2243}
2244
ffb4593c
NT
2245/*
2246 * Now come three functions related to double-quote context,
2247 * S_sublex_start, S_sublex_push, and S_sublex_done. They're used when
2248 * converting things like "\u\Lgnat" into ucfirst(lc("gnat")). They
2249 * interact with PL_lex_state, and create fake ( ... ) argument lists
2250 * to handle functions and concatenation.
2251 * They assume that whoever calls them will be setting up a fake
2252 * join call, because each subthing puts a ',' after it. This lets
2253 * "lower \luPpEr"
2254 * become
2255 * join($, , 'lower ', lcfirst( 'uPpEr', ) ,)
2256 *
2257 * (I'm not sure whether the spurious commas at the end of lcfirst's
2258 * arguments and join's arguments are created or not).
2259 */
2260
2261/*
2262 * S_sublex_start
6154021b 2263 * Assumes that pl_yylval.ival is the op we're creating (e.g. OP_LCFIRST).
ffb4593c
NT
2264 *
2265 * Pattern matching will set PL_lex_op to the pattern-matching op to
6154021b 2266 * make (we return THING if pl_yylval.ival is OP_NULL, PMFUNC otherwise).
ffb4593c
NT
2267 *
2268 * OP_CONST and OP_READLINE are easy--just make the new op and return.
2269 *
2270 * Everything else becomes a FUNC.
2271 *
2272 * Sets PL_lex_state to LEX_INTERPPUSH unless (ival was OP_NULL or we
2273 * had an OP_CONST or OP_READLINE). This just sets us up for a
2274 * call to S_sublex_push().
2275 */
2276
76e3520e 2277STATIC I32
cea2e8a9 2278S_sublex_start(pTHX)
79072805 2279{
97aff369 2280 dVAR;
6154021b 2281 register const I32 op_type = pl_yylval.ival;
79072805
LW
2282
2283 if (op_type == OP_NULL) {
6154021b 2284 pl_yylval.opval = PL_lex_op;
5f66b61c 2285 PL_lex_op = NULL;
79072805
LW
2286 return THING;
2287 }
2288 if (op_type == OP_CONST || op_type == OP_READLINE) {
3280af22 2289 SV *sv = tokeq(PL_lex_stuff);
b3ac6de7
IZ
2290
2291 if (SvTYPE(sv) == SVt_PVIV) {
2292 /* Overloaded constants, nothing fancy: Convert to SVt_PV: */
2293 STRLEN len;
96a5add6 2294 const char * const p = SvPV_const(sv, len);
740cce10 2295 SV * const nsv = newSVpvn_flags(p, len, SvUTF8(sv));
b3ac6de7
IZ
2296 SvREFCNT_dec(sv);
2297 sv = nsv;
4e553d73 2298 }
6154021b 2299 pl_yylval.opval = (OP*)newSVOP(op_type, 0, sv);
a0714e2c 2300 PL_lex_stuff = NULL;
6f33ba73
RGS
2301 /* Allow <FH> // "foo" */
2302 if (op_type == OP_READLINE)
2303 PL_expect = XTERMORDORDOR;
79072805
LW
2304 return THING;
2305 }
e3f73d4e
RGS
2306 else if (op_type == OP_BACKTICK && PL_lex_op) {
2307 /* readpipe() vas overriden */
2308 cSVOPx(cLISTOPx(cUNOPx(PL_lex_op)->op_first)->op_first->op_sibling)->op_sv = tokeq(PL_lex_stuff);
6154021b 2309 pl_yylval.opval = PL_lex_op;
9b201d7d 2310 PL_lex_op = NULL;
e3f73d4e
RGS
2311 PL_lex_stuff = NULL;
2312 return THING;
2313 }
79072805 2314
3280af22 2315 PL_sublex_info.super_state = PL_lex_state;
eac04b2e 2316 PL_sublex_info.sub_inwhat = (U16)op_type;
3280af22
NIS
2317 PL_sublex_info.sub_op = PL_lex_op;
2318 PL_lex_state = LEX_INTERPPUSH;
55497cff 2319
3280af22
NIS
2320 PL_expect = XTERM;
2321 if (PL_lex_op) {
6154021b 2322 pl_yylval.opval = PL_lex_op;
5f66b61c 2323 PL_lex_op = NULL;
55497cff 2324 return PMFUNC;
2325 }
2326 else
2327 return FUNC;
2328}
2329
ffb4593c
NT
2330/*
2331 * S_sublex_push
2332 * Create a new scope to save the lexing state. The scope will be
2333 * ended in S_sublex_done. Returns a '(', starting the function arguments
2334 * to the uc, lc, etc. found before.
2335 * Sets PL_lex_state to LEX_INTERPCONCAT.
2336 */
2337
76e3520e 2338STATIC I32
cea2e8a9 2339S_sublex_push(pTHX)
55497cff 2340{
27da23d5 2341 dVAR;
f46d017c 2342 ENTER;
55497cff 2343
3280af22 2344 PL_lex_state = PL_sublex_info.super_state;
651b5b28 2345 SAVEBOOL(PL_lex_dojoin);
3280af22 2346 SAVEI32(PL_lex_brackets);
3280af22
NIS
2347 SAVEI32(PL_lex_casemods);
2348 SAVEI32(PL_lex_starts);
651b5b28 2349 SAVEI8(PL_lex_state);
7766f137 2350 SAVEVPTR(PL_lex_inpat);
98246f1e 2351 SAVEI16(PL_lex_inwhat);
57843af0 2352 SAVECOPLINE(PL_curcop);
3280af22 2353 SAVEPPTR(PL_bufptr);
8452ff4b 2354 SAVEPPTR(PL_bufend);
3280af22
NIS
2355 SAVEPPTR(PL_oldbufptr);
2356 SAVEPPTR(PL_oldoldbufptr);
207e3d1a
JH
2357 SAVEPPTR(PL_last_lop);
2358 SAVEPPTR(PL_last_uni);
3280af22
NIS
2359 SAVEPPTR(PL_linestart);
2360 SAVESPTR(PL_linestr);
8edd5f42
RGS
2361 SAVEGENERICPV(PL_lex_brackstack);
2362 SAVEGENERICPV(PL_lex_casestack);
3280af22
NIS
2363
2364 PL_linestr = PL_lex_stuff;
a0714e2c 2365 PL_lex_stuff = NULL;
3280af22 2366
9cbb5ea2
GS
2367 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart
2368 = SvPVX(PL_linestr);
3280af22 2369 PL_bufend += SvCUR(PL_linestr);
bd61b366 2370 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2371 SAVEFREESV(PL_linestr);
2372
2373 PL_lex_dojoin = FALSE;
2374 PL_lex_brackets = 0;
a02a5408
JC
2375 Newx(PL_lex_brackstack, 120, char);
2376 Newx(PL_lex_casestack, 12, char);
3280af22
NIS
2377 PL_lex_casemods = 0;
2378 *PL_lex_casestack = '\0';
2379 PL_lex_starts = 0;
2380 PL_lex_state = LEX_INTERPCONCAT;
eb160463 2381 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22
NIS
2382
2383 PL_lex_inwhat = PL_sublex_info.sub_inwhat;
2384 if (PL_lex_inwhat == OP_MATCH || PL_lex_inwhat == OP_QR || PL_lex_inwhat == OP_SUBST)
2385 PL_lex_inpat = PL_sublex_info.sub_op;
79072805 2386 else
5f66b61c 2387 PL_lex_inpat = NULL;
79072805 2388
55497cff 2389 return '(';
79072805
LW
2390}
2391
ffb4593c
NT
2392/*
2393 * S_sublex_done
2394 * Restores lexer state after a S_sublex_push.
2395 */
2396
76e3520e 2397STATIC I32
cea2e8a9 2398S_sublex_done(pTHX)
79072805 2399{
27da23d5 2400 dVAR;
3280af22 2401 if (!PL_lex_starts++) {
396482e1 2402 SV * const sv = newSVpvs("");
9aa983d2
JH
2403 if (SvUTF8(PL_linestr))
2404 SvUTF8_on(sv);
3280af22 2405 PL_expect = XOPERATOR;
6154021b 2406 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
79072805
LW
2407 return THING;
2408 }
2409
3280af22
NIS
2410 if (PL_lex_casemods) { /* oops, we've got some unbalanced parens */
2411 PL_lex_state = LEX_INTERPCASEMOD;
cea2e8a9 2412 return yylex();
79072805
LW
2413 }
2414
ffb4593c 2415 /* Is there a right-hand side to take care of? (s//RHS/ or tr//RHS/) */
3280af22
NIS
2416 if (PL_lex_repl && (PL_lex_inwhat == OP_SUBST || PL_lex_inwhat == OP_TRANS)) {
2417 PL_linestr = PL_lex_repl;
2418 PL_lex_inpat = 0;
2419 PL_bufend = PL_bufptr = PL_oldbufptr = PL_oldoldbufptr = PL_linestart = SvPVX(PL_linestr);
2420 PL_bufend += SvCUR(PL_linestr);
bd61b366 2421 PL_last_lop = PL_last_uni = NULL;
3280af22
NIS
2422 SAVEFREESV(PL_linestr);
2423 PL_lex_dojoin = FALSE;
2424 PL_lex_brackets = 0;
3280af22
NIS
2425 PL_lex_casemods = 0;
2426 *PL_lex_casestack = '\0';
2427 PL_lex_starts = 0;
25da4f38 2428 if (SvEVALED(PL_lex_repl)) {
3280af22
NIS
2429 PL_lex_state = LEX_INTERPNORMAL;
2430 PL_lex_starts++;
e9fa98b2
HS
2431 /* we don't clear PL_lex_repl here, so that we can check later
2432 whether this is an evalled subst; that means we rely on the
2433 logic to ensure sublex_done() is called again only via the
2434 branch (in yylex()) that clears PL_lex_repl, else we'll loop */
79072805 2435 }
e9fa98b2 2436 else {
3280af22 2437 PL_lex_state = LEX_INTERPCONCAT;
a0714e2c 2438 PL_lex_repl = NULL;
e9fa98b2 2439 }
79072805 2440 return ',';
ffed7fef
LW
2441 }
2442 else {
5db06880
NC
2443#ifdef PERL_MAD
2444 if (PL_madskills) {
cd81e915
NC
2445 if (PL_thiswhite) {
2446 if (!PL_endwhite)
6b29d1f5 2447 PL_endwhite = newSVpvs("");
cd81e915
NC
2448 sv_catsv(PL_endwhite, PL_thiswhite);
2449 PL_thiswhite = 0;
2450 }
2451 if (PL_thistoken)
76f68e9b 2452 sv_setpvs(PL_thistoken,"");
5db06880 2453 else
cd81e915 2454 PL_realtokenstart = -1;
5db06880
NC
2455 }
2456#endif
f46d017c 2457 LEAVE;
3280af22
NIS
2458 PL_bufend = SvPVX(PL_linestr);
2459 PL_bufend += SvCUR(PL_linestr);
2460 PL_expect = XOPERATOR;
09bef843 2461 PL_sublex_info.sub_inwhat = 0;
79072805 2462 return ')';
ffed7fef
LW
2463 }
2464}
2465
02aa26ce
NT
2466/*
2467 scan_const
2468
2469 Extracts a pattern, double-quoted string, or transliteration. This
2470 is terrifying code.
2471
94def140 2472 It looks at PL_lex_inwhat and PL_lex_inpat to find out whether it's
3280af22 2473 processing a pattern (PL_lex_inpat is true), a transliteration
94def140 2474 (PL_lex_inwhat == OP_TRANS is true), or a double-quoted string.
02aa26ce 2475
94def140
TS
2476 Returns a pointer to the character scanned up to. If this is
2477 advanced from the start pointer supplied (i.e. if anything was
9b599b2a 2478 successfully parsed), will leave an OP for the substring scanned
6154021b 2479 in pl_yylval. Caller must intuit reason for not parsing further
9b599b2a
GS
2480 by looking at the next characters herself.
2481
02aa26ce
NT
2482 In patterns:
2483 backslashes:
ff3f963a 2484 constants: \N{NAME} only
02aa26ce
NT
2485 case and quoting: \U \Q \E
2486 stops on @ and $, but not for $ as tail anchor
2487
2488 In transliterations:
2489 characters are VERY literal, except for - not at the start or end
94def140
TS
2490 of the string, which indicates a range. If the range is in bytes,
2491 scan_const expands the range to the full set of intermediate
2492 characters. If the range is in utf8, the hyphen is replaced with
2493 a certain range mark which will be handled by pmtrans() in op.c.
02aa26ce
NT
2494
2495 In double-quoted strings:
2496 backslashes:
2497 double-quoted style: \r and \n
ff3f963a 2498 constants: \x31, etc.
94def140 2499 deprecated backrefs: \1 (in substitution replacements)
02aa26ce
NT
2500 case and quoting: \U \Q \E
2501 stops on @ and $
2502
2503 scan_const does *not* construct ops to handle interpolated strings.
2504 It stops processing as soon as it finds an embedded $ or @ variable
2505 and leaves it to the caller to work out what's going on.
2506
94def140
TS
2507 embedded arrays (whether in pattern or not) could be:
2508 @foo, @::foo, @'foo, @{foo}, @$foo, @+, @-.
2509
2510 $ in double-quoted strings must be the symbol of an embedded scalar.
02aa26ce
NT
2511
2512 $ in pattern could be $foo or could be tail anchor. Assumption:
2513 it's a tail anchor if $ is the last thing in the string, or if it's
94def140 2514 followed by one of "()| \r\n\t"
02aa26ce
NT
2515
2516 \1 (backreferences) are turned into $1
2517
2518 The structure of the code is
2519 while (there's a character to process) {
94def140
TS
2520 handle transliteration ranges
2521 skip regexp comments /(?#comment)/ and codes /(?{code})/
2522 skip #-initiated comments in //x patterns
2523 check for embedded arrays
02aa26ce
NT
2524 check for embedded scalars
2525 if (backslash) {
94def140 2526 deprecate \1 in substitution replacements
02aa26ce
NT
2527 handle string-changing backslashes \l \U \Q \E, etc.
2528 switch (what was escaped) {
94def140 2529 handle \- in a transliteration (becomes a literal -)
ff3f963a 2530 if a pattern and not \N{, go treat as regular character
94def140
TS
2531 handle \132 (octal characters)
2532 handle \x15 and \x{1234} (hex characters)
ff3f963a 2533 handle \N{name} (named characters, also \N{3,5} in a pattern)
94def140
TS
2534 handle \cV (control characters)
2535 handle printf-style backslashes (\f, \r, \n, etc)
02aa26ce 2536 } (end switch)
77a135fe 2537 continue
02aa26ce 2538 } (end if backslash)
77a135fe 2539 handle regular character
02aa26ce 2540 } (end while character to read)
4e553d73 2541
02aa26ce
NT
2542*/
2543
76e3520e 2544STATIC char *
cea2e8a9 2545S_scan_const(pTHX_ char *start)
79072805 2546{
97aff369 2547 dVAR;
3280af22 2548 register char *send = PL_bufend; /* end of the constant */
77a135fe
KW
2549 SV *sv = newSV(send - start); /* sv for the constant. See
2550 note below on sizing. */
02aa26ce
NT
2551 register char *s = start; /* start of the constant */
2552 register char *d = SvPVX(sv); /* destination for copies */
2553 bool dorange = FALSE; /* are we in a translit range? */
c2e66d9e 2554 bool didrange = FALSE; /* did we just finish a range? */
2b9d42f0 2555 I32 has_utf8 = FALSE; /* Output constant is UTF8 */
77a135fe
KW
2556 I32 this_utf8 = UTF; /* Is the source string assumed
2557 to be UTF8? But, this can
2558 show as true when the source
2559 isn't utf8, as for example
2560 when it is entirely composed
2561 of hex constants */
2562
2563 /* Note on sizing: The scanned constant is placed into sv, which is
2564 * initialized by newSV() assuming one byte of output for every byte of
2565 * input. This routine expects newSV() to allocate an extra byte for a
2566 * trailing NUL, which this routine will append if it gets to the end of
2567 * the input. There may be more bytes of input than output (eg., \N{LATIN
2568 * CAPITAL LETTER A}), or more output than input if the constant ends up
2569 * recoded to utf8, but each time a construct is found that might increase
2570 * the needed size, SvGROW() is called. Its size parameter each time is
2571 * based on the best guess estimate at the time, namely the length used so
2572 * far, plus the length the current construct will occupy, plus room for
2573 * the trailing NUL, plus one byte for every input byte still unscanned */
2574
012bcf8d 2575 UV uv;
4c3a8340
TS
2576#ifdef EBCDIC
2577 UV literal_endpoint = 0;
e294cc5d 2578 bool native_range = TRUE; /* turned to FALSE if the first endpoint is Unicode. */
4c3a8340 2579#endif
012bcf8d 2580
7918f24d
NC
2581 PERL_ARGS_ASSERT_SCAN_CONST;
2582
2b9d42f0
NIS
2583 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
2584 /* If we are doing a trans and we know we want UTF8 set expectation */
2585 has_utf8 = PL_sublex_info.sub_op->op_private & (OPpTRANS_FROM_UTF|OPpTRANS_TO_UTF);
2586 this_utf8 = PL_sublex_info.sub_op->op_private & (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
2587 }
2588
2589
79072805 2590 while (s < send || dorange) {
ff3f963a 2591
02aa26ce 2592 /* get transliterations out of the way (they're most literal) */
3280af22 2593 if (PL_lex_inwhat == OP_TRANS) {
02aa26ce 2594 /* expand a range A-Z to the full set of characters. AIE! */
79072805 2595 if (dorange) {
1ba5c669
JH
2596 I32 i; /* current expanded character */
2597 I32 min; /* first character in range */
2598 I32 max; /* last character in range */
02aa26ce 2599
e294cc5d
JH
2600#ifdef EBCDIC
2601 UV uvmax = 0;
2602#endif
2603
2604 if (has_utf8
2605#ifdef EBCDIC
2606 && !native_range
2607#endif
2608 ) {
9d4ba2ae 2609 char * const c = (char*)utf8_hop((U8*)d, -1);
8973db79
JH
2610 char *e = d++;
2611 while (e-- > c)
2612 *(e + 1) = *e;
25716404 2613 *c = (char)UTF_TO_NATIVE(0xff);
8973db79
JH
2614 /* mark the range as done, and continue */
2615 dorange = FALSE;
2616 didrange = TRUE;
2617 continue;
2618 }
2b9d42f0 2619
95a20fc0 2620 i = d - SvPVX_const(sv); /* remember current offset */
e294cc5d
JH
2621#ifdef EBCDIC
2622 SvGROW(sv,
2623 SvLEN(sv) + (has_utf8 ?
2624 (512 - UTF_CONTINUATION_MARK +
2625 UNISKIP(0x100))
2626 : 256));
2627 /* How many two-byte within 0..255: 128 in UTF-8,
2628 * 96 in UTF-8-mod. */
2629#else
9cbb5ea2 2630 SvGROW(sv, SvLEN(sv) + 256); /* never more than 256 chars in a range */
e294cc5d 2631#endif
9cbb5ea2 2632 d = SvPVX(sv) + i; /* refresh d after realloc */
e294cc5d
JH
2633#ifdef EBCDIC
2634 if (has_utf8) {
2635 int j;
2636 for (j = 0; j <= 1; j++) {
2637 char * const c = (char*)utf8_hop((U8*)d, -1);
2638 const UV uv = utf8n_to_uvchr((U8*)c, d - c, NULL, 0);
2639 if (j)
2640 min = (U8)uv;
2641 else if (uv < 256)
2642 max = (U8)uv;
2643 else {
2644 max = (U8)0xff; /* only to \xff */
2645 uvmax = uv; /* \x{100} to uvmax */
2646 }
2647 d = c; /* eat endpoint chars */
2648 }
2649 }
2650 else {
2651#endif
2652 d -= 2; /* eat the first char and the - */
2653 min = (U8)*d; /* first char in range */
2654 max = (U8)d[1]; /* last char in range */
2655#ifdef EBCDIC
2656 }
2657#endif
8ada0baa 2658
c2e66d9e 2659 if (min > max) {
01ec43d0 2660 Perl_croak(aTHX_
d1573ac7 2661 "Invalid range \"%c-%c\" in transliteration operator",
1ba5c669 2662 (char)min, (char)max);
c2e66d9e
GS
2663 }
2664
c7f1f016 2665#ifdef EBCDIC
4c3a8340
TS
2666 if (literal_endpoint == 2 &&
2667 ((isLOWER(min) && isLOWER(max)) ||
2668 (isUPPER(min) && isUPPER(max)))) {
8ada0baa
JH
2669 if (isLOWER(min)) {
2670 for (i = min; i <= max; i++)
2671 if (isLOWER(i))
db42d148 2672 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2673 } else {
2674 for (i = min; i <= max; i++)
2675 if (isUPPER(i))
db42d148 2676 *d++ = NATIVE_TO_NEED(has_utf8,i);
8ada0baa
JH
2677 }
2678 }
2679 else
2680#endif
2681 for (i = min; i <= max; i++)
e294cc5d
JH
2682#ifdef EBCDIC
2683 if (has_utf8) {
2684 const U8 ch = (U8)NATIVE_TO_UTF(i);
2685 if (UNI_IS_INVARIANT(ch))
2686 *d++ = (U8)i;
2687 else {
2688 *d++ = (U8)UTF8_EIGHT_BIT_HI(ch);
2689 *d++ = (U8)UTF8_EIGHT_BIT_LO(ch);
2690 }
2691 }
2692 else
2693#endif
2694 *d++ = (char)i;
2695
2696#ifdef EBCDIC
2697 if (uvmax) {
2698 d = (char*)uvchr_to_utf8((U8*)d, 0x100);
2699 if (uvmax > 0x101)
2700 *d++ = (char)UTF_TO_NATIVE(0xff);
2701 if (uvmax > 0x100)
2702 d = (char*)uvchr_to_utf8((U8*)d, uvmax);
2703 }
2704#endif
02aa26ce
NT
2705
2706 /* mark the range as done, and continue */
79072805 2707 dorange = FALSE;
01ec43d0 2708 didrange = TRUE;
4c3a8340
TS
2709#ifdef EBCDIC
2710 literal_endpoint = 0;
2711#endif
79072805 2712 continue;
4e553d73 2713 }
02aa26ce
NT
2714
2715 /* range begins (ignore - as first or last char) */
79072805 2716 else if (*s == '-' && s+1 < send && s != start) {
4e553d73 2717 if (didrange) {
1fafa243 2718 Perl_croak(aTHX_ "Ambiguous range in transliteration operator");
01ec43d0 2719 }
e294cc5d
JH
2720 if (has_utf8
2721#ifdef EBCDIC
2722 && !native_range
2723#endif
2724 ) {
25716404 2725 *d++ = (char)UTF_TO_NATIVE(0xff); /* use illegal utf8 byte--see pmtrans */
a0ed51b3
LW
2726 s++;
2727 continue;
2728 }
79072805
LW
2729 dorange = TRUE;
2730 s++;
01ec43d0
GS
2731 }
2732 else {
2733 didrange = FALSE;
4c3a8340
TS
2734#ifdef EBCDIC
2735 literal_endpoint = 0;
e294cc5d 2736 native_range = TRUE;
4c3a8340 2737#endif
01ec43d0 2738 }
79072805 2739 }
02aa26ce
NT
2740
2741 /* if we get here, we're not doing a transliteration */
2742
0f5d15d6
IZ
2743 /* skip for regexp comments /(?#comment)/ and code /(?{code})/,
2744 except for the last char, which will be done separately. */
3280af22 2745 else if (*s == '(' && PL_lex_inpat && s[1] == '?') {
cc6b7395 2746 if (s[2] == '#') {
e994fd66 2747 while (s+1 < send && *s != ')')
db42d148 2748 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
155aba94
GS
2749 }
2750 else if (s[2] == '{' /* This should match regcomp.c */
67edc0c9 2751 || (s[2] == '?' && s[3] == '{'))
155aba94 2752 {
cc6b7395 2753 I32 count = 1;
0f5d15d6 2754 char *regparse = s + (s[2] == '{' ? 3 : 4);
cc6b7395
IZ
2755 char c;
2756
d9f97599
GS
2757 while (count && (c = *regparse)) {
2758 if (c == '\\' && regparse[1])
2759 regparse++;
4e553d73 2760 else if (c == '{')
cc6b7395 2761 count++;
4e553d73 2762 else if (c == '}')
cc6b7395 2763 count--;
d9f97599 2764 regparse++;
cc6b7395 2765 }
e994fd66 2766 if (*regparse != ')')
5bdf89e7 2767 regparse--; /* Leave one char for continuation. */
0f5d15d6 2768 while (s < regparse)
db42d148 2769 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
cc6b7395 2770 }
748a9306 2771 }
02aa26ce
NT
2772
2773 /* likewise skip #-initiated comments in //x patterns */
3280af22
NIS
2774 else if (*s == '#' && PL_lex_inpat &&
2775 ((PMOP*)PL_lex_inpat)->op_pmflags & PMf_EXTENDED) {
748a9306 2776 while (s+1 < send && *s != '\n')
db42d148 2777 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
748a9306 2778 }
02aa26ce 2779
5d1d4326 2780 /* check for embedded arrays
da6eedaa 2781 (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-)
5d1d4326 2782 */
1749ea0d
TS
2783 else if (*s == '@' && s[1]) {
2784 if (isALNUM_lazy_if(s+1,UTF))
2785 break;
2786 if (strchr(":'{$", s[1]))
2787 break;
2788 if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-'))
2789 break; /* in regexp, neither @+ nor @- are interpolated */
2790 }
02aa26ce
NT
2791
2792 /* check for embedded scalars. only stop if we're sure it's a
2793 variable.
2794 */
79072805 2795 else if (*s == '$') {
3280af22 2796 if (!PL_lex_inpat) /* not a regexp, so $ must be var */
79072805 2797 break;
77772344 2798 if (s + 1 < send && !strchr("()| \r\n\t", s[1])) {
a2a5de95
NC
2799 if (s[1] == '\\') {
2800 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
2801 "Possible unintended interpolation of $\\ in regex");
77772344 2802 }
79072805 2803 break; /* in regexp, $ might be tail anchor */
77772344 2804 }
79072805 2805 }
02aa26ce 2806
2b9d42f0
NIS
2807 /* End of else if chain - OP_TRANS rejoin rest */
2808
02aa26ce 2809 /* backslashes */
79072805 2810 if (*s == '\\' && s+1 < send) {
ff3f963a
KW
2811 char* e; /* Can be used for ending '}', etc. */
2812
79072805 2813 s++;
02aa26ce 2814
7d0fc23c
KW
2815 /* warn on \1 - \9 in substitution replacements, but note that \11
2816 * is an octal; and \19 is \1 followed by '9' */
3280af22 2817 if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat &&
a0d0e21e 2818 isDIGIT(*s) && *s != '0' && !isDIGIT(s[1]))
79072805 2819 {
a2a5de95 2820 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "\\%c better written as $%c", *s, *s);
79072805
LW
2821 *--s = '$';
2822 break;
2823 }
02aa26ce
NT
2824
2825 /* string-change backslash escapes */
3280af22 2826 if (PL_lex_inwhat != OP_TRANS && *s && strchr("lLuUEQ", *s)) {
79072805
LW
2827 --s;
2828 break;
2829 }
ff3f963a
KW
2830 /* In a pattern, process \N, but skip any other backslash escapes.
2831 * This is because we don't want to translate an escape sequence
2832 * into a meta symbol and have the regex compiler use the meta
2833 * symbol meaning, e.g. \x{2E} would be confused with a dot. But
2834 * in spite of this, we do have to process \N here while the proper
2835 * charnames handler is in scope. See bugs #56444 and #62056.
2836 * There is a complication because \N in a pattern may also stand
2837 * for 'match a non-nl', and not mean a charname, in which case its
2838 * processing should be deferred to the regex compiler. To be a
2839 * charname it must be followed immediately by a '{', and not look
2840 * like \N followed by a curly quantifier, i.e., not something like
2841 * \N{3,}. regcurly returns a boolean indicating if it is a legal
2842 * quantifier */
2843 else if (PL_lex_inpat
2844 && (*s != 'N'
2845 || s[1] != '{'
2846 || regcurly(s + 1)))
2847 {
cc74c5bd
TS
2848 *d++ = NATIVE_TO_NEED(has_utf8,'\\');
2849 goto default_action;
2850 }
02aa26ce 2851
79072805 2852 switch (*s) {
02aa26ce
NT
2853
2854 /* quoted - in transliterations */
79072805 2855 case '-':
3280af22 2856 if (PL_lex_inwhat == OP_TRANS) {
79072805
LW
2857 *d++ = *s++;
2858 continue;
2859 }
2860 /* FALL THROUGH */
2861 default:
11b8faa4 2862 {
a2a5de95
NC
2863 if ((isALPHA(*s) || isDIGIT(*s)))
2864 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
2865 "Unrecognized escape \\%c passed through",
2866 *s);
11b8faa4 2867 /* default action is to copy the quoted character */
f9a63242 2868 goto default_action;
11b8faa4 2869 }
02aa26ce 2870
77a135fe 2871 /* eg. \132 indicates the octal constant 0x132 */
79072805
LW
2872 case '0': case '1': case '2': case '3':
2873 case '4': case '5': case '6': case '7':
ba210ebe 2874 {
53305cf1
NC
2875 I32 flags = 0;
2876 STRLEN len = 3;
77a135fe 2877 uv = NATIVE_TO_UNI(grok_oct(s, &len, &flags, NULL));
ba210ebe
JH
2878 s += len;
2879 }
012bcf8d 2880 goto NUM_ESCAPE_INSERT;
02aa26ce 2881
f0a2b745
KW
2882 /* eg. \o{24} indicates the octal constant \024 */
2883 case 'o':
2884 {
2885 STRLEN len;
2886
2887 char* error = grok_bslash_o(s, &uv, &len, 1);
2888 s += len;
2889 if (error) {
2890 yyerror(error);
2891 continue;
2892 }
2893 goto NUM_ESCAPE_INSERT;
2894 }
2895
77a135fe 2896 /* eg. \x24 indicates the hex constant 0x24 */
79072805 2897 case 'x':
a0ed51b3
LW
2898 ++s;
2899 if (*s == '{') {
9d4ba2ae 2900 char* const e = strchr(s, '}');
a4c04bdc
NC
2901 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES |
2902 PERL_SCAN_DISALLOW_PREFIX;
53305cf1 2903 STRLEN len;
355860ce 2904
53305cf1 2905 ++s;
adaeee49 2906 if (!e) {
a0ed51b3 2907 yyerror("Missing right brace on \\x{}");
355860ce 2908 continue;
ba210ebe 2909 }
53305cf1 2910 len = e - s;
77a135fe 2911 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe 2912 s = e + 1;
a0ed51b3
LW
2913 }
2914 else {
ba210ebe 2915 {
53305cf1 2916 STRLEN len = 2;
a4c04bdc 2917 I32 flags = PERL_SCAN_DISALLOW_PREFIX;
77a135fe 2918 uv = NATIVE_TO_UNI(grok_hex(s, &len, &flags, NULL));
ba210ebe
JH
2919 s += len;
2920 }
012bcf8d
GS
2921 }
2922
2923 NUM_ESCAPE_INSERT:
ff3f963a
KW
2924 /* Insert oct or hex escaped character. There will always be
2925 * enough room in sv since such escapes will be longer than any
2926 * UTF-8 sequence they can end up as, except if they force us
2927 * to recode the rest of the string into utf8 */
ba7cea30 2928
77a135fe 2929 /* Here uv is the ordinal of the next character being added in
ff3f963a 2930 * unicode (converted from native). */
77a135fe 2931 if (!UNI_IS_INVARIANT(uv)) {
9aa983d2 2932 if (!has_utf8 && uv > 255) {
77a135fe
KW
2933 /* Might need to recode whatever we have accumulated so
2934 * far if it contains any chars variant in utf8 or
2935 * utf-ebcdic. */
2936
2937 SvCUR_set(sv, d - SvPVX_const(sv));
2938 SvPOK_on(sv);
2939 *d = '\0';
77a135fe 2940 /* See Note on sizing above. */
7bf79863
KW
2941 sv_utf8_upgrade_flags_grow(sv,
2942 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
2943 UNISKIP(uv) + (STRLEN)(send - s) + 1);
77a135fe
KW
2944 d = SvPVX(sv) + SvCUR(sv);
2945 has_utf8 = TRUE;
012bcf8d
GS
2946 }
2947
77a135fe
KW
2948 if (has_utf8) {
2949 d = (char*)uvuni_to_utf8((U8*)d, uv);
f9a63242
JH
2950 if (PL_lex_inwhat == OP_TRANS &&
2951 PL_sublex_info.sub_op) {
2952 PL_sublex_info.sub_op->op_private |=
2953 (PL_lex_repl ? OPpTRANS_FROM_UTF
2954 : OPpTRANS_TO_UTF);
f9a63242 2955 }
e294cc5d
JH
2956#ifdef EBCDIC
2957 if (uv > 255 && !dorange)
2958 native_range = FALSE;
2959#endif
012bcf8d 2960 }
a0ed51b3 2961 else {
012bcf8d 2962 *d++ = (char)uv;
a0ed51b3 2963 }
012bcf8d
GS
2964 }
2965 else {
c4d5f83a 2966 *d++ = (char) uv;
a0ed51b3 2967 }
79072805 2968 continue;
02aa26ce 2969
4a2d328f 2970 case 'N':
ff3f963a
KW
2971 /* In a non-pattern \N must be a named character, like \N{LATIN
2972 * SMALL LETTER A} or \N{U+0041}. For patterns, it also can
2973 * mean to match a non-newline. For non-patterns, named
2974 * characters are converted to their string equivalents. In
2975 * patterns, named characters are not converted to their
2976 * ultimate forms for the same reasons that other escapes
2977 * aren't. Instead, they are converted to the \N{U+...} form
2978 * to get the value from the charnames that is in effect right
2979 * now, while preserving the fact that it was a named character
2980 * so that the regex compiler knows this */
2981
2982 /* This section of code doesn't generally use the
2983 * NATIVE_TO_NEED() macro to transform the input. I (khw) did
2984 * a close examination of this macro and determined it is a
2985 * no-op except on utfebcdic variant characters. Every
2986 * character generated by this that would normally need to be
2987 * enclosed by this macro is invariant, so the macro is not
2988 * needed, and would complicate use of copy(). There are other
2989 * parts of this file where the macro is used inconsistently,
2990 * but are saved by it being a no-op */
2991
2992 /* The structure of this section of code (besides checking for
2993 * errors and upgrading to utf8) is:
2994 * Further disambiguate between the two meanings of \N, and if
2995 * not a charname, go process it elsewhere
0a96133f
KW
2996 * If of form \N{U+...}, pass it through if a pattern;
2997 * otherwise convert to utf8
2998 * Otherwise must be \N{NAME}: convert to \N{U+c1.c2...} if a
2999 * pattern; otherwise convert to utf8 */
ff3f963a
KW
3000
3001 /* Here, s points to the 'N'; the test below is guaranteed to
3002 * succeed if we are being called on a pattern as we already
3003 * know from a test above that the next character is a '{'.
3004 * On a non-pattern \N must mean 'named sequence, which
3005 * requires braces */
3006 s++;
3007 if (*s != '{') {
3008 yyerror("Missing braces on \\N{}");
3009 continue;
3010 }
3011 s++;
3012
0a96133f 3013 /* If there is no matching '}', it is an error. */
ff3f963a
KW
3014 if (! (e = strchr(s, '}'))) {
3015 if (! PL_lex_inpat) {
5777a3f7 3016 yyerror("Missing right brace on \\N{}");
0a96133f
KW
3017 } else {
3018 yyerror("Missing right brace on \\N{} or unescaped left brace after \\N.");
dbc0d4f2 3019 }
0a96133f 3020 continue;
ff3f963a 3021 }
cddc7ef4 3022
ff3f963a 3023 /* Here it looks like a named character */
cddc7ef4 3024
ff3f963a
KW
3025 if (PL_lex_inpat) {
3026
3027 /* XXX This block is temporary code. \N{} implies that the
3028 * pattern is to have Unicode semantics, and therefore
3029 * currently has to be encoded in utf8. By putting it in
3030 * utf8 now, we save a whole pass in the regular expression
3031 * compiler. Once that code is changed so Unicode
3032 * semantics doesn't necessarily have to be in utf8, this
3033 * block should be removed */
3034 if (!has_utf8) {
77a135fe 3035 SvCUR_set(sv, d - SvPVX_const(sv));
f08d6ad9 3036 SvPOK_on(sv);
e4f3eed8 3037 *d = '\0';
77a135fe 3038 /* See Note on sizing above. */
7bf79863 3039 sv_utf8_upgrade_flags_grow(sv,
ff3f963a
KW
3040 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3041 /* 5 = '\N{' + cur char + NUL */
3042 (STRLEN)(send - s) + 5);
f08d6ad9 3043 d = SvPVX(sv) + SvCUR(sv);
89491803 3044 has_utf8 = TRUE;
ff3f963a
KW
3045 }
3046 }
423cee85 3047
ff3f963a
KW
3048 if (*s == 'U' && s[1] == '+') { /* \N{U+...} */
3049 I32 flags = PERL_SCAN_ALLOW_UNDERSCORES
3050 | PERL_SCAN_DISALLOW_PREFIX;
3051 STRLEN len;
3052
3053 /* For \N{U+...}, the '...' is a unicode value even on
3054 * EBCDIC machines */
3055 s += 2; /* Skip to next char after the 'U+' */
3056 len = e - s;
3057 uv = grok_hex(s, &len, &flags, NULL);
3058 if (len == 0 || len != (STRLEN)(e - s)) {
3059 yyerror("Invalid hexadecimal number in \\N{U+...}");
3060 s = e + 1;
3061 continue;
3062 }
3063
3064 if (PL_lex_inpat) {
3065
3066 /* Pass through to the regex compiler unchanged. The
3067 * reason we evaluated the number above is to make sure
0a96133f 3068 * there wasn't a syntax error. */
ff3f963a
KW
3069 s -= 5; /* Include the '\N{U+' */
3070 Copy(s, d, e - s + 1, char); /* 1 = include the } */
3071 d += e - s + 1;
3072 }
3073 else { /* Not a pattern: convert the hex to string */
3074
3075 /* If destination is not in utf8, unconditionally
3076 * recode it to be so. This is because \N{} implies
3077 * Unicode semantics, and scalars have to be in utf8
3078 * to guarantee those semantics */
3079 if (! has_utf8) {
3080 SvCUR_set(sv, d - SvPVX_const(sv));
3081 SvPOK_on(sv);
3082 *d = '\0';
3083 /* See Note on sizing above. */
3084 sv_utf8_upgrade_flags_grow(
3085 sv,
3086 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3087 UNISKIP(uv) + (STRLEN)(send - e) + 1);
3088 d = SvPVX(sv) + SvCUR(sv);
3089 has_utf8 = TRUE;
3090 }
3091
3092 /* Add the string to the output */
3093 if (UNI_IS_INVARIANT(uv)) {
3094 *d++ = (char) uv;
3095 }
3096 else d = (char*)uvuni_to_utf8((U8*)d, uv);
3097 }
3098 }
3099 else { /* Here is \N{NAME} but not \N{U+...}. */
3100
3101 SV *res; /* result from charnames */
3102 const char *str; /* the string in 'res' */
3103 STRLEN len; /* its length */
3104
3105 /* Get the value for NAME */
3106 res = newSVpvn(s, e - s);
3107 res = new_constant( NULL, 0, "charnames",
3108 /* includes all of: \N{...} */
3109 res, NULL, s - 3, e - s + 4 );
3110
3111 /* Most likely res will be in utf8 already since the
3112 * standard charnames uses pack U, but a custom translator
3113 * can leave it otherwise, so make sure. XXX This can be
3114 * revisited to not have charnames use utf8 for characters
3115 * that don't need it when regexes don't have to be in utf8
3116 * for Unicode semantics. If doing so, remember EBCDIC */
3117 sv_utf8_upgrade(res);
3118 str = SvPV_const(res, len);
3119
3120 /* Don't accept malformed input */
3121 if (! is_utf8_string((U8 *) str, len)) {
3122 yyerror("Malformed UTF-8 returned by \\N");
3123 }
3124 else if (PL_lex_inpat) {
3125
3126 if (! len) { /* The name resolved to an empty string */
3127 Copy("\\N{}", d, 4, char);
3128 d += 4;
3129 }
3130 else {
3131 /* In order to not lose information for the regex
3132 * compiler, pass the result in the specially made
3133 * syntax: \N{U+c1.c2.c3...}, where c1 etc. are
3134 * the code points in hex of each character
3135 * returned by charnames */
3136
3137 const char *str_end = str + len;
3138 STRLEN char_length; /* cur char's byte length */
3139 STRLEN output_length; /* and the number of bytes
3140 after this is translated
3141 into hex digits */
3142 const STRLEN off = d - SvPVX_const(sv);
3143
3144 /* 2 hex per byte; 2 chars for '\N'; 2 chars for
3145 * max('U+', '.'); and 1 for NUL */
3146 char hex_string[2 * UTF8_MAXBYTES + 5];
3147
3148 /* Get the first character of the result. */
3149 U32 uv = utf8n_to_uvuni((U8 *) str,
3150 len,
3151 &char_length,
3152 UTF8_ALLOW_ANYUV);
3153
3154 /* The call to is_utf8_string() above hopefully
3155 * guarantees that there won't be an error. But
3156 * it's easy here to make sure. The function just
3157 * above warns and returns 0 if invalid utf8, but
3158 * it can also return 0 if the input is validly a
3159 * NUL. Disambiguate */
3160 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3161 uv = UNICODE_REPLACEMENT;
3162 }
3163
3164 /* Convert first code point to hex, including the
3165 * boiler plate before it */
3166 sprintf(hex_string, "\\N{U+%X", (unsigned int) uv);
3167 output_length = strlen(hex_string);
3168
3169 /* Make sure there is enough space to hold it */
3170 d = off + SvGROW(sv, off
3171 + output_length
3172 + (STRLEN)(send - e)
3173 + 2); /* '}' + NUL */
3174 /* And output it */
3175 Copy(hex_string, d, output_length, char);
3176 d += output_length;
3177
3178 /* For each subsequent character, append dot and
3179 * its ordinal in hex */
3180 while ((str += char_length) < str_end) {
3181 const STRLEN off = d - SvPVX_const(sv);
3182 U32 uv = utf8n_to_uvuni((U8 *) str,
3183 str_end - str,
3184 &char_length,
3185 UTF8_ALLOW_ANYUV);
3186 if (uv == 0 && NATIVE_TO_ASCII(*str) != '\0') {
3187 uv = UNICODE_REPLACEMENT;
3188 }
3189
3190 sprintf(hex_string, ".%X", (unsigned int) uv);
3191 output_length = strlen(hex_string);
3192
3193 d = off + SvGROW(sv, off
3194 + output_length
3195 + (STRLEN)(send - e)
3196 + 2); /* '}' + NUL */
3197 Copy(hex_string, d, output_length, char);
3198 d += output_length;
3199 }
3200
3201 *d++ = '}'; /* Done. Add the trailing brace */
3202 }
3203 }
3204 else { /* Here, not in a pattern. Convert the name to a
3205 * string. */
3206
3207 /* If destination is not in utf8, unconditionally
3208 * recode it to be so. This is because \N{} implies
3209 * Unicode semantics, and scalars have to be in utf8
3210 * to guarantee those semantics */
3211 if (! has_utf8) {
3212 SvCUR_set(sv, d - SvPVX_const(sv));
3213 SvPOK_on(sv);
3214 *d = '\0';
3215 /* See Note on sizing above. */
3216 sv_utf8_upgrade_flags_grow(sv,
3217 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3218 len + (STRLEN)(send - s) + 1);
3219 d = SvPVX(sv) + SvCUR(sv);
3220 has_utf8 = TRUE;
3221 } else if (len > (STRLEN)(e - s + 4)) { /* I _guess_ 4 is \N{} --jhi */
3222
3223 /* See Note on sizing above. (NOTE: SvCUR() is not
3224 * set correctly here). */
3225 const STRLEN off = d - SvPVX_const(sv);
3226 d = off + SvGROW(sv, off + len + (STRLEN)(send - s) + 1);
3227 }
3228 Copy(str, d, len, char);
3229 d += len;
423cee85 3230 }
423cee85 3231 SvREFCNT_dec(res);
cb233ae3
KW
3232
3233 /* Deprecate non-approved name syntax */
3234 if (ckWARN_d(WARN_DEPRECATED)) {
3235 bool problematic = FALSE;
3236 char* i = s;
3237
3238 /* For non-ut8 input, look to see that the first
3239 * character is an alpha, then loop through the rest
3240 * checking that each is a continuation */
3241 if (! this_utf8) {
3242 if (! isALPHAU(*i)) problematic = TRUE;
3243 else for (i = s + 1; i < e; i++) {
3244 if (isCHARNAME_CONT(*i)) continue;
3245 problematic = TRUE;
3246 break;
3247 }
3248 }
3249 else {
3250 /* Similarly for utf8. For invariants can check
3251 * directly. We accept anything above the latin1
3252 * range because it is immaterial to Perl if it is
3253 * correct or not, and is expensive to check. But
3254 * it is fairly easy in the latin1 range to convert
3255 * the variants into a single character and check
3256 * those */
3257 if (UTF8_IS_INVARIANT(*i)) {
3258 if (! isALPHAU(*i)) problematic = TRUE;
3259 } else if (UTF8_IS_DOWNGRADEABLE_START(*i)) {
3260 if (! isALPHAU(UNI_TO_NATIVE(UTF8_ACCUMULATE(*i,
3261 *(i+1)))))
3262 {
3263 problematic = TRUE;
3264 }
3265 }
3266 if (! problematic) for (i = s + UTF8SKIP(s);
3267 i < e;
3268 i+= UTF8SKIP(i))
3269 {
3270 if (UTF8_IS_INVARIANT(*i)) {
3271 if (isCHARNAME_CONT(*i)) continue;
3272 } else if (! UTF8_IS_DOWNGRADEABLE_START(*i)) {
3273 continue;
3274 } else if (isCHARNAME_CONT(
3275 UNI_TO_NATIVE(
3276 UTF8_ACCUMULATE(*i, *(i+1)))))
3277 {
3278 continue;
3279 }
3280 problematic = TRUE;
3281 break;
3282 }
3283 }
3284 if (problematic) {
6e1bad6c
KW
3285 /* The e-i passed to the final %.*s makes sure that
3286 * should the trailing NUL be missing that this
3287 * print won't run off the end of the string */
cb233ae3 3288 Perl_warner(aTHX_ packWARN(WARN_DEPRECATED),
b00fc8d4
NC
3289 "Deprecated character in \\N{...}; marked by <-- HERE in \\N{%.*s<-- HERE %.*s",
3290 (int)(i - s + 1), s, (int)(e - i), i + 1);
cb233ae3
KW
3291 }
3292 }
3293 } /* End \N{NAME} */
ff3f963a
KW
3294#ifdef EBCDIC
3295 if (!dorange)
3296 native_range = FALSE; /* \N{} is defined to be Unicode */
3297#endif
3298 s = e + 1; /* Point to just after the '}' */
423cee85
JH
3299 continue;
3300
02aa26ce 3301 /* \c is a control character */
79072805
LW
3302 case 'c':
3303 s++;
961ce445 3304 if (s < send) {
f9d13529 3305 *d++ = grok_bslash_c(*s++, 1);
ba210ebe 3306 }
961ce445
RGS
3307 else {
3308 yyerror("Missing control char name in \\c");
3309 }
79072805 3310 continue;
02aa26ce
NT
3311
3312 /* printf-style backslashes, formfeeds, newlines, etc */
79072805 3313 case 'b':
db42d148 3314 *d++ = NATIVE_TO_NEED(has_utf8,'\b');
79072805
LW
3315 break;
3316 case 'n':
db42d148 3317 *d++ = NATIVE_TO_NEED(has_utf8,'\n');
79072805
LW
3318 break;
3319 case 'r':
db42d148 3320 *d++ = NATIVE_TO_NEED(has_utf8,'\r');
79072805
LW
3321 break;
3322 case 'f':
db42d148 3323 *d++ = NATIVE_TO_NEED(has_utf8,'\f');
79072805
LW
3324 break;
3325 case 't':
db42d148 3326 *d++ = NATIVE_TO_NEED(has_utf8,'\t');
79072805 3327 break;
34a3fe2a 3328 case 'e':
db42d148 3329 *d++ = ASCII_TO_NEED(has_utf8,'\033');
34a3fe2a
PP
3330 break;
3331 case 'a':
db42d148 3332 *d++ = ASCII_TO_NEED(has_utf8,'\007');
79072805 3333 break;
02aa26ce
NT
3334 } /* end switch */
3335
79072805
LW
3336 s++;
3337 continue;
02aa26ce 3338 } /* end if (backslash) */
4c3a8340
TS
3339#ifdef EBCDIC
3340 else
3341 literal_endpoint++;
3342#endif
02aa26ce 3343
f9a63242 3344 default_action:
77a135fe
KW
3345 /* If we started with encoded form, or already know we want it,
3346 then encode the next character */
3347 if (! NATIVE_IS_INVARIANT((U8)(*s)) && (this_utf8 || has_utf8)) {
2b9d42f0 3348 STRLEN len = 1;
77a135fe
KW
3349
3350
3351 /* One might think that it is wasted effort in the case of the
3352 * source being utf8 (this_utf8 == TRUE) to take the next character
3353 * in the source, convert it to an unsigned value, and then convert
3354 * it back again. But the source has not been validated here. The
3355 * routine that does the conversion checks for errors like
3356 * malformed utf8 */
3357
5f66b61c
AL
3358 const UV nextuv = (this_utf8) ? utf8n_to_uvchr((U8*)s, send - s, &len, 0) : (UV) ((U8) *s);
3359 const STRLEN need = UNISKIP(NATIVE_TO_UNI(nextuv));
77a135fe
KW
3360 if (!has_utf8) {
3361 SvCUR_set(sv, d - SvPVX_const(sv));
3362 SvPOK_on(sv);
3363 *d = '\0';
77a135fe 3364 /* See Note on sizing above. */
7bf79863
KW
3365 sv_utf8_upgrade_flags_grow(sv,
3366 SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
3367 need + (STRLEN)(send - s) + 1);
77a135fe
KW
3368 d = SvPVX(sv) + SvCUR(sv);
3369 has_utf8 = TRUE;
3370 } else if (need > len) {
3371 /* encoded value larger than old, may need extra space (NOTE:
3372 * SvCUR() is not set correctly here). See Note on sizing
3373 * above. */
9d4ba2ae 3374 const STRLEN off = d - SvPVX_const(sv);
77a135fe 3375 d = SvGROW(sv, off + need + (STRLEN)(send - s) + 1) + off;
2b9d42f0 3376 }
77a135fe
KW
3377 s += len;
3378
5f66b61c 3379 d = (char*)uvchr_to_utf8((U8*)d, nextuv);
e294cc5d
JH
3380#ifdef EBCDIC
3381 if (uv > 255 && !dorange)
3382 native_range = FALSE;
3383#endif
2b9d42f0
NIS
3384 }
3385 else {
3386 *d++ = NATIVE_TO_NEED(has_utf8,*s++);
3387 }
02aa26ce
NT
3388 } /* while loop to process each character */
3389
3390 /* terminate the string and set up the sv */
79072805 3391 *d = '\0';
95a20fc0 3392 SvCUR_set(sv, d - SvPVX_const(sv));
2b9d42f0 3393 if (SvCUR(sv) >= SvLEN(sv))
d0063567 3394 Perl_croak(aTHX_ "panic: constant overflowed allocated space");
2b9d42f0 3395
79072805 3396 SvPOK_on(sv);
9f4817db 3397 if (PL_encoding && !has_utf8) {
d0063567
DK
3398 sv_recode_to_utf8(sv, PL_encoding);
3399 if (SvUTF8(sv))
3400 has_utf8 = TRUE;
9f4817db 3401 }
2b9d42f0 3402 if (has_utf8) {
7e2040f0 3403 SvUTF8_on(sv);
2b9d42f0 3404 if (PL_lex_inwhat == OP_TRANS && PL_sublex_info.sub_op) {
d0063567 3405 PL_sublex_info.sub_op->op_private |=
2b9d42f0
NIS
3406 (PL_lex_repl ? OPpTRANS_FROM_UTF : OPpTRANS_TO_UTF);
3407 }
3408 }
79072805 3409
02aa26ce 3410 /* shrink the sv if we allocated more than we used */
79072805 3411 if (SvCUR(sv) + 5 < SvLEN(sv)) {
1da4ca5f 3412 SvPV_shrink_to_cur(sv);
79072805 3413 }
02aa26ce 3414
6154021b 3415 /* return the substring (via pl_yylval) only if we parsed anything */
3280af22 3416 if (s > PL_bufptr) {
eb0d8d16
NC
3417 if ( PL_hints & ( PL_lex_inpat ? HINT_NEW_RE : HINT_NEW_STRING ) ) {
3418 const char *const key = PL_lex_inpat ? "qr" : "q";
3419 const STRLEN keylen = PL_lex_inpat ? 2 : 1;
3420 const char *type;
3421 STRLEN typelen;
3422
3423 if (PL_lex_inwhat == OP_TRANS) {
3424 type = "tr";
3425 typelen = 2;
3426 } else if (PL_lex_inwhat == OP_SUBST && !PL_lex_inpat) {
3427 type = "s";
3428 typelen = 1;
3429 } else {
3430 type = "qq";
3431 typelen = 2;
3432 }
3433
3434 sv = S_new_constant(aTHX_ start, s - start, key, keylen, sv, NULL,
3435 type, typelen);
3436 }
6154021b 3437 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
b3ac6de7 3438 } else
8990e307 3439 SvREFCNT_dec(sv);
79072805
LW
3440 return s;
3441}
3442
ffb4593c
NT
3443/* S_intuit_more
3444 * Returns TRUE if there's more to the expression (e.g., a subscript),
3445 * FALSE otherwise.
ffb4593c
NT
3446 *
3447 * It deals with "$foo[3]" and /$foo[3]/ and /$foo[0123456789$]+/
3448 *
3449 * ->[ and ->{ return TRUE
3450 * { and [ outside a pattern are always subscripts, so return TRUE
3451 * if we're outside a pattern and it's not { or [, then return FALSE
3452 * if we're in a pattern and the first char is a {
3453 * {4,5} (any digits around the comma) returns FALSE
3454 * if we're in a pattern and the first char is a [
3455 * [] returns FALSE
3456 * [SOMETHING] has a funky algorithm to decide whether it's a
3457 * character class or not. It has to deal with things like
3458 * /$foo[-3]/ and /$foo[$bar]/ as well as /$foo[$\d]+/
3459 * anything else returns TRUE
3460 */
3461
9cbb5ea2
GS
3462/* This is the one truly awful dwimmer necessary to conflate C and sed. */
3463
76e3520e 3464STATIC int
cea2e8a9 3465S_intuit_more(pTHX_ register char *s)
79072805 3466{
97aff369 3467 dVAR;
7918f24d
NC
3468
3469 PERL_ARGS_ASSERT_INTUIT_MORE;
3470
3280af22 3471 if (PL_lex_brackets)
79072805
LW
3472 return TRUE;
3473 if (*s == '-' && s[1] == '>' && (s[2] == '[' || s[2] == '{'))
3474 return TRUE;
3475 if (*s != '{' && *s != '[')
3476 return FALSE;
3280af22 3477 if (!PL_lex_inpat)
79072805
LW
3478 return TRUE;
3479
3480 /* In a pattern, so maybe we have {n,m}. */
3481 if (*s == '{') {
3482 s++;
3483 if (!isDIGIT(*s))
3484 return TRUE;
3485 while (isDIGIT(*s))
3486 s++;
3487 if (*s == ',')
3488 s++;
3489 while (isDIGIT(*s))
3490 s++;
3491 if (*s == '}')
3492 return FALSE;
3493 return TRUE;
3494
3495 }
3496
3497 /* On the other hand, maybe we have a character class */
3498
3499 s++;
3500 if (*s == ']' || *s == '^')
3501 return FALSE;
3502 else {
ffb4593c 3503 /* this is terrifying, and it works */
79072805
LW
3504 int weight = 2; /* let's weigh the evidence */
3505 char seen[256];
f27ffc4a 3506 unsigned char un_char = 255, last_un_char;
9d4ba2ae 3507 const char * const send = strchr(s,']');
3280af22 3508 char tmpbuf[sizeof PL_tokenbuf * 4];
79072805
LW
3509
3510 if (!send) /* has to be an expression */
3511 return TRUE;
3512
3513 Zero(seen,256,char);
3514 if (*s == '$')
3515 weight -= 3;
3516 else if (isDIGIT(*s)) {
3517 if (s[1] != ']') {
3518 if (isDIGIT(s[1]) && s[2] == ']')
3519 weight -= 10;
3520 }
3521 else
3522 weight -= 100;
3523 }
3524 for (; s < send; s++) {
3525 last_un_char = un_char;
3526 un_char = (unsigned char)*s;
3527 switch (*s) {
3528 case '@':
3529 case '&':
3530 case '$':
3531 weight -= seen[un_char] * 10;
7e2040f0 3532 if (isALNUM_lazy_if(s+1,UTF)) {
90e5519e 3533 int len;
8903cb82 3534 scan_ident(s, send, tmpbuf, sizeof tmpbuf, FALSE);
90e5519e
NC
3535 len = (int)strlen(tmpbuf);
3536 if (len > 1 && gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PV))
79072805
LW
3537 weight -= 100;
3538 else
3539 weight -= 10;
3540 }
3541 else if (*s == '$' && s[1] &&
93a17b20
LW
3542 strchr("[#!%*<>()-=",s[1])) {
3543 if (/*{*/ strchr("])} =",s[2]))
79072805
LW
3544 weight -= 10;
3545 else
3546 weight -= 1;
3547 }
3548 break;
3549 case '\\':
3550 un_char = 254;
3551 if (s[1]) {
93a17b20 3552 if (strchr("wds]",s[1]))
79072805 3553 weight += 100;
10edeb5d 3554 else if (seen[(U8)'\''] || seen[(U8)'"'])
79072805 3555 weight += 1;
93a17b20 3556 else if (strchr("rnftbxcav",s[1]))
79072805
LW
3557 weight += 40;
3558 else if (isDIGIT(s[1])) {
3559 weight += 40;
3560 while (s[1] && isDIGIT(s[1]))
3561 s++;
3562 }
3563 }
3564 else
3565 weight += 100;
3566 break;
3567 case '-':
3568 if (s[1] == '\\')
3569 weight += 50;
93a17b20 3570 if (strchr("aA01! ",last_un_char))
79072805 3571 weight += 30;
93a17b20 3572 if (strchr("zZ79~",s[1]))
79072805 3573 weight += 30;
f27ffc4a
GS
3574 if (last_un_char == 255 && (isDIGIT(s[1]) || s[1] == '$'))
3575 weight -= 5; /* cope with negative subscript */
79072805
LW
3576 break;
3577 default:
3792a11b
NC
3578 if (!isALNUM(last_un_char)
3579 && !(last_un_char == '$' || last_un_char == '@'
3580 || last_un_char == '&')
3581 && isALPHA(*s) && s[1] && isALPHA(s[1])) {
79072805
LW
3582 char *d = tmpbuf;
3583 while (isALPHA(*s))
3584 *d++ = *s++;
3585 *d = '\0';
5458a98a 3586 if (keyword(tmpbuf, d - tmpbuf, 0))
79072805
LW
3587 weight -= 150;
3588 }
3589 if (un_char == last_un_char + 1)
3590 weight += 5;
3591 weight -= seen[un_char];
3592 break;
3593 }
3594 seen[un_char]++;
3595 }
3596 if (weight >= 0) /* probably a character class */
3597 return FALSE;
3598 }
3599
3600 return TRUE;
3601}
ffed7fef 3602
ffb4593c
NT
3603/*
3604 * S_intuit_method
3605 *
3606 * Does all the checking to disambiguate
3607 * foo bar
3608 * between foo(bar) and bar->foo. Returns 0 if not a method, otherwise
3609 * FUNCMETH (bar->foo(args)) or METHOD (bar->foo args).
3610 *
3611 * First argument is the stuff after the first token, e.g. "bar".
3612 *
3613 * Not a method if bar is a filehandle.
3614 * Not a method if foo is a subroutine prototyped to take a filehandle.
3615 * Not a method if it's really "Foo $bar"
3616 * Method if it's "foo $bar"
3617 * Not a method if it's really "print foo $bar"
3618 * Method if it's really "foo package::" (interpreted as package->foo)
8f8cf39c 3619 * Not a method if bar is known to be a subroutine ("sub bar; foo bar")
3cb0bbe5 3620 * Not a method if bar is a filehandle or package, but is quoted with
ffb4593c
NT
3621 * =>
3622 */
3623
76e3520e 3624STATIC int
62d55b22 3625S_intuit_method(pTHX_ char *start, GV *gv, CV *cv)
a0d0e21e 3626{
97aff369 3627 dVAR;
a0d0e21e 3628 char *s = start + (*start == '$');
3280af22 3629 char tmpbuf[sizeof PL_tokenbuf];
a0d0e21e
LW
3630 STRLEN len;
3631 GV* indirgv;
5db06880
NC
3632#ifdef PERL_MAD
3633 int soff;
3634#endif
a0d0e21e 3635
7918f24d
NC
3636 PERL_ARGS_ASSERT_INTUIT_METHOD;
3637
a0d0e21e 3638 if (gv) {
62d55b22 3639 if (SvTYPE(gv) == SVt_PVGV && GvIO(gv))
a0d0e21e 3640 return 0;
62d55b22
NC
3641 if (cv) {
3642 if (SvPOK(cv)) {
3643 const char *proto = SvPVX_const(cv);
3644 if (proto) {
3645 if (*proto == ';')
3646 proto++;
3647 if (*proto == '*')
3648 return 0;
3649 }
b6c543e3
IZ
3650 }
3651 } else
c35e046a 3652 gv = NULL;
a0d0e21e 3653 }
8903cb82 3654 s = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
ffb4593c
NT
3655 /* start is the beginning of the possible filehandle/object,
3656 * and s is the end of it
3657 * tmpbuf is a copy of it
3658 */
3659
a0d0e21e 3660 if (*start == '$') {
3ef1310e
RGS
3661 if (gv || PL_last_lop_op == OP_PRINT || PL_last_lop_op == OP_SAY ||
3662 isUPPER(*PL_tokenbuf))
a0d0e21e 3663 return 0;
5db06880
NC
3664#ifdef PERL_MAD
3665 len = start - SvPVX(PL_linestr);
3666#endif
29595ff2 3667 s = PEEKSPACE(s);
f0092767 3668#ifdef PERL_MAD
5db06880
NC
3669 start = SvPVX(PL_linestr) + len;
3670#endif
3280af22
NIS
3671 PL_bufptr = start;
3672 PL_expect = XREF;
a0d0e21e
LW
3673 return *s == '(' ? FUNCMETH : METHOD;
3674 }
5458a98a 3675 if (!keyword(tmpbuf, len, 0)) {
c3e0f903
GS
3676 if (len > 2 && tmpbuf[len - 2] == ':' && tmpbuf[len - 1] == ':') {
3677 len -= 2;
3678 tmpbuf[len] = '\0';
5db06880
NC
3679#ifdef PERL_MAD
3680 soff = s - SvPVX(PL_linestr);
3681#endif
c3e0f903
GS
3682 goto bare_package;
3683 }
90e5519e 3684 indirgv = gv_fetchpvn_flags(tmpbuf, len, 0, SVt_PVCV);
8ebc5c01 3685 if (indirgv && GvCVu(indirgv))
a0d0e21e
LW
3686 return 0;
3687 /* filehandle or package name makes it a method */
da51bb9b 3688 if (!gv || GvIO(indirgv) || gv_stashpvn(tmpbuf, len, 0)) {
5db06880
NC
3689#ifdef PERL_MAD
3690 soff = s - SvPVX(PL_linestr);
3691#endif
29595ff2 3692 s = PEEKSPACE(s);
3280af22 3693 if ((PL_bufend - s) >= 2 && *s == '=' && *(s+1) == '>')
55497cff 3694 return 0; /* no assumptions -- "=>" quotes bearword */
c3e0f903 3695 bare_package:
cd81e915 3696 start_force(PL_curforce);
9ded7720 3697 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0,
64142370 3698 S_newSV_maybe_utf8(aTHX_ tmpbuf, len));
9ded7720 3699 NEXTVAL_NEXTTOKE.opval->op_private = OPpCONST_BARE;
5db06880
NC
3700 if (PL_madskills)
3701 curmad('X', newSVpvn(start,SvPVX(PL_linestr) + soff - start));
3280af22 3702 PL_expect = XTERM;
a0d0e21e 3703 force_next(WORD);
3280af22 3704 PL_bufptr = s;
5db06880
NC
3705#ifdef PERL_MAD
3706 PL_bufptr = SvPVX(PL_linestr) + soff; /* restart before space */
3707#endif
a0d0e21e
LW
3708 return *s == '(' ? FUNCMETH : METHOD;
3709 }
3710 }
3711 return 0;
3712}
3713
16d20bd9 3714/* Encoded script support. filter_add() effectively inserts a
4e553d73 3715 * 'pre-processing' function into the current source input stream.
16d20bd9
AD
3716 * Note that the filter function only applies to the current source file
3717 * (e.g., it will not affect files 'require'd or 'use'd by this one).
3718 *
3719 * The datasv parameter (which may be NULL) can be used to pass
3720 * private data to this instance of the filter. The filter function
3721 * can recover the SV using the FILTER_DATA macro and use it to
3722 * store private buffers and state information.
3723 *
3724 * The supplied datasv parameter is upgraded to a PVIO type
4755096e 3725 * and the IoDIRP/IoANY field is used to store the function pointer,
e0c19803 3726 * and IOf_FAKE_DIRP is enabled on datasv to mark this as such.
16d20bd9
AD
3727 * Note that IoTOP_NAME, IoFMT_NAME, IoBOTTOM_NAME, if set for
3728 * private use must be set using malloc'd pointers.
3729 */
16d20bd9
AD
3730
3731SV *
864dbfa3 3732Perl_filter_add(pTHX_ filter_t funcp, SV *datasv)
16d20bd9 3733{
97aff369 3734 dVAR;
f4c556ac 3735 if (!funcp)
a0714e2c 3736 return NULL;
f4c556ac 3737
5486870f
DM
3738 if (!PL_parser)
3739 return NULL;
3740
3280af22
NIS
3741 if (!PL_rsfp_filters)
3742 PL_rsfp_filters = newAV();
16d20bd9 3743 if (!datasv)
561b68a9 3744 datasv = newSV(0);
862a34c6 3745 SvUPGRADE(datasv, SVt_PVIO);
8141890a 3746 IoANY(datasv) = FPTR2DPTR(void *, funcp); /* stash funcp into spare field */
e0c19803 3747 IoFLAGS(datasv) |= IOf_FAKE_DIRP;
f4c556ac 3748 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_add func %p (%s)\n",
55662e27
JH
3749 FPTR2DPTR(void *, IoANY(datasv)),
3750 SvPV_nolen(datasv)));
3280af22
NIS
3751 av_unshift(PL_rsfp_filters, 1);
3752 av_store(PL_rsfp_filters, 0, datasv) ;
16d20bd9
AD
3753 return(datasv);
3754}
4e553d73 3755
16d20bd9
AD
3756
3757/* Delete most recently added instance of this filter function. */
a0d0e21e 3758void
864dbfa3 3759Perl_filter_del(pTHX_ filter_t funcp)
16d20bd9 3760{
97aff369 3761 dVAR;
e0c19803 3762 SV *datasv;
24801a4b 3763
7918f24d
NC
3764 PERL_ARGS_ASSERT_FILTER_DEL;
3765
33073adb 3766#ifdef DEBUGGING
55662e27
JH
3767 DEBUG_P(PerlIO_printf(Perl_debug_log, "filter_del func %p",
3768 FPTR2DPTR(void*, funcp)));
33073adb 3769#endif
5486870f 3770 if (!PL_parser || !PL_rsfp_filters || AvFILLp(PL_rsfp_filters)<0)
16d20bd9
AD
3771 return;
3772 /* if filter is on top of stack (usual case) just pop it off */
e0c19803 3773 datasv = FILTER_DATA(AvFILLp(PL_rsfp_filters));
8141890a 3774 if (IoANY(datasv) == FPTR2DPTR(void *, funcp)) {
3280af22 3775 sv_free(av_pop(PL_rsfp_filters));
e50aee73 3776
16d20bd9
AD
3777 return;
3778 }
3779 /* we need to search for the correct entry and clear it */
cea2e8a9 3780 Perl_die(aTHX_ "filter_del can only delete in reverse order (currently)");
16d20bd9
AD
3781}
3782
3783
1de9afcd
RGS
3784/* Invoke the idxth filter function for the current rsfp. */
3785/* maxlen 0 = read one text line */
16d20bd9 3786I32
864dbfa3 3787Perl_filter_read(pTHX_ int idx, SV *buf_sv, int maxlen)
a0d0e21e 3788{
97aff369 3789 dVAR;
16d20bd9
AD
3790 filter_t funcp;
3791 SV *datasv = NULL;
f482118e
NC
3792 /* This API is bad. It should have been using unsigned int for maxlen.
3793 Not sure if we want to change the API, but if not we should sanity
3794 check the value here. */
39cd7a59
NC
3795 const unsigned int correct_length
3796 = maxlen < 0 ?
3797#ifdef PERL_MICRO
3798 0x7FFFFFFF
3799#else
3800 INT_MAX
3801#endif
3802 : maxlen;
e50aee73 3803
7918f24d
NC
3804 PERL_ARGS_ASSERT_FILTER_READ;
3805
5486870f 3806 if (!PL_parser || !PL_rsfp_filters)
16d20bd9 3807 return -1;
1de9afcd 3808 if (idx > AvFILLp(PL_rsfp_filters)) { /* Any more filters? */
16d20bd9
AD
3809 /* Provide a default input filter to make life easy. */
3810 /* Note that we append to the line. This is handy. */
f4c556ac
GS
3811 DEBUG_P(PerlIO_printf(Perl_debug_log,
3812 "filter_read %d: from rsfp\n", idx));
f482118e 3813 if (correct_length) {
16d20bd9
AD
3814 /* Want a block */
3815 int len ;
f54cb97a 3816 const int old_len = SvCUR(buf_sv);
16d20bd9
AD
3817
3818 /* ensure buf_sv is large enough */
881d8f0a 3819 SvGROW(buf_sv, (STRLEN)(old_len + correct_length + 1)) ;
f482118e
NC
3820 if ((len = PerlIO_read(PL_rsfp, SvPVX(buf_sv) + old_len,
3821 correct_length)) <= 0) {
3280af22 3822 if (PerlIO_error(PL_rsfp))
37120919
AD
3823 return -1; /* error */
3824 else
3825 return 0 ; /* end of file */
3826 }
16d20bd9 3827 SvCUR_set(buf_sv, old_len + len) ;
881d8f0a 3828 SvPVX(buf_sv)[old_len + len] = '\0';
16d20bd9
AD
3829 } else {
3830 /* Want a line */
3280af22
NIS
3831 if (sv_gets(buf_sv, PL_rsfp, SvCUR(buf_sv)) == NULL) {
3832 if (PerlIO_error(PL_rsfp))
37120919
AD
3833 return -1; /* error */
3834 else
3835 return 0 ; /* end of file */
3836 }
16d20bd9
AD
3837 }
3838 return SvCUR(buf_sv);
3839 }
3840 /* Skip this filter slot if filter has been deleted */
1de9afcd 3841 if ( (datasv = FILTER_DATA(idx)) == &PL_sv_undef) {
f4c556ac
GS
3842 DEBUG_P(PerlIO_printf(Perl_debug_log,
3843 "filter_read %d: skipped (filter deleted)\n",
3844 idx));
f482118e 3845 return FILTER_READ(idx+1, buf_sv, correct_length); /* recurse */
16d20bd9
AD
3846 }
3847 /* Get function pointer hidden within datasv */
8141890a 3848 funcp = DPTR2FPTR(filter_t, IoANY(datasv));
f4c556ac
GS
3849 DEBUG_P(PerlIO_printf(Perl_debug_log,
3850 "filter_read %d: via function %p (%s)\n",
ca0270c4 3851 idx, (void*)datasv, SvPV_nolen_const(datasv)));
16d20bd9
AD
3852 /* Call function. The function is expected to */
3853 /* call "FILTER_READ(idx+1, buf_sv)" first. */
37120919 3854 /* Return: <0:error, =0:eof, >0:not eof */
f482118e 3855 return (*funcp)(aTHX_ idx, buf_sv, correct_length);
16d20bd9
AD
3856}
3857
76e3520e 3858STATIC char *
5cc814fd 3859S_filter_gets(pTHX_ register SV *sv, STRLEN append)
16d20bd9 3860{
97aff369 3861 dVAR;
7918f24d
NC
3862
3863 PERL_ARGS_ASSERT_FILTER_GETS;
3864
c39cd008 3865#ifdef PERL_CR_FILTER
3280af22 3866 if (!PL_rsfp_filters) {
c39cd008 3867 filter_add(S_cr_textfilter,NULL);
a868473f
NIS
3868 }
3869#endif
3280af22 3870 if (PL_rsfp_filters) {
55497cff 3871 if (!append)
3872 SvCUR_set(sv, 0); /* start with empty line */
16d20bd9
AD
3873 if (FILTER_READ(0, sv, 0) > 0)
3874 return ( SvPVX(sv) ) ;
3875 else
bd61b366 3876 return NULL ;
16d20bd9 3877 }
9d116dd7 3878 else
5cc814fd 3879 return (sv_gets(sv, PL_rsfp, append));
a0d0e21e
LW
3880}
3881
01ec43d0 3882STATIC HV *
9bde8eb0 3883S_find_in_my_stash(pTHX_ const char *pkgname, STRLEN len)
def3634b 3884{
97aff369 3885 dVAR;
def3634b
GS
3886 GV *gv;
3887
7918f24d
NC
3888 PERL_ARGS_ASSERT_FIND_IN_MY_STASH;
3889
01ec43d0 3890 if (len == 11 && *pkgname == '_' && strEQ(pkgname, "__PACKAGE__"))
def3634b
GS
3891 return PL_curstash;
3892
3893 if (len > 2 &&
3894 (pkgname[len - 2] == ':' && pkgname[len - 1] == ':') &&
90e5519e 3895 (gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVHV)))
01ec43d0
GS
3896 {
3897 return GvHV(gv); /* Foo:: */
def3634b
GS
3898 }
3899
3900 /* use constant CLASS => 'MyClass' */
c35e046a
AL
3901 gv = gv_fetchpvn_flags(pkgname, len, 0, SVt_PVCV);
3902 if (gv && GvCV(gv)) {
3903 SV * const sv = cv_const_sv(GvCV(gv));
3904 if (sv)
9bde8eb0 3905 pkgname = SvPV_const(sv, len);
def3634b
GS
3906 }
3907
9bde8eb0 3908 return gv_stashpvn(pkgname, len, 0);
def3634b 3909}
a0d0e21e 3910
e3f73d4e
RGS
3911/*
3912 * S_readpipe_override
3913 * Check whether readpipe() is overriden, and generates the appropriate
3914 * optree, provided sublex_start() is called afterwards.
3915 */
3916STATIC void
1d51329b 3917S_readpipe_override(pTHX)
e3f73d4e
RGS
3918{
3919 GV **gvp;
3920 GV *gv_readpipe = gv_fetchpvs("readpipe", GV_NOTQUAL, SVt_PVCV);
6154021b 3921 pl_yylval.ival = OP_BACKTICK;
e3f73d4e
RGS
3922 if ((gv_readpipe
3923 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe))
3924 ||
3925 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readpipe", FALSE))
d5e716f5 3926 && (gv_readpipe = *gvp) && isGV_with_GP(gv_readpipe)
e3f73d4e
RGS
3927 && GvCVu(gv_readpipe) && GvIMPORTED_CV(gv_readpipe)))
3928 {
3929 PL_lex_op = (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
3930 append_elem(OP_LIST,
3931 newSVOP(OP_CONST, 0, &PL_sv_undef), /* value will be read later */
3932 newCVREF(0, newGVOP(OP_GV, 0, gv_readpipe))));
3933 }
e3f73d4e
RGS
3934}
3935
5db06880
NC
3936#ifdef PERL_MAD
3937 /*
3938 * Perl_madlex
3939 * The intent of this yylex wrapper is to minimize the changes to the
3940 * tokener when we aren't interested in collecting madprops. It remains
3941 * to be seen how successful this strategy will be...
3942 */
3943
3944int
3945Perl_madlex(pTHX)
3946{
3947 int optype;
3948 char *s = PL_bufptr;
3949
cd81e915
NC
3950 /* make sure PL_thiswhite is initialized */
3951 PL_thiswhite = 0;
3952 PL_thismad = 0;
5db06880 3953
cd81e915 3954 /* just do what yylex would do on pending identifier; leave PL_thiswhite alone */
5db06880
NC
3955 if (PL_pending_ident)
3956 return S_pending_ident(aTHX);
3957
3958 /* previous token ate up our whitespace? */
cd81e915
NC
3959 if (!PL_lasttoke && PL_nextwhite) {
3960 PL_thiswhite = PL_nextwhite;
3961 PL_nextwhite = 0;
5db06880
NC
3962 }
3963
3964 /* isolate the token, and figure out where it is without whitespace */
cd81e915
NC
3965 PL_realtokenstart = -1;
3966 PL_thistoken = 0;
5db06880
NC
3967 optype = yylex();
3968 s = PL_bufptr;
cd81e915 3969 assert(PL_curforce < 0);
5db06880 3970
cd81e915
NC
3971 if (!PL_thismad || PL_thismad->mad_key == '^') { /* not forced already? */
3972 if (!PL_thistoken) {
3973 if (PL_realtokenstart < 0 || !CopLINE(PL_curcop))
6b29d1f5 3974 PL_thistoken = newSVpvs("");
5db06880 3975 else {
c35e046a 3976 char * const tstart = SvPVX(PL_linestr) + PL_realtokenstart;
cd81e915 3977 PL_thistoken = newSVpvn(tstart, s - tstart);
5db06880
NC
3978 }
3979 }
cd81e915
NC
3980 if (PL_thismad) /* install head */
3981 CURMAD('X', PL_thistoken);
5db06880
NC
3982 }
3983
3984 /* last whitespace of a sublex? */
cd81e915
NC
3985 if (optype == ')' && PL_endwhite) {
3986 CURMAD('X', PL_endwhite);
5db06880
NC
3987 }
3988
cd81e915 3989 if (!PL_thismad) {
5db06880
NC
3990
3991 /* if no whitespace and we're at EOF, bail. Otherwise fake EOF below. */
cd81e915
NC
3992 if (!PL_thiswhite && !PL_endwhite && !optype) {
3993 sv_free(PL_thistoken);
3994 PL_thistoken = 0;
5db06880
NC
3995 return 0;
3996 }
3997
3998 /* put off final whitespace till peg */
3999 if (optype == ';' && !PL_rsfp) {
cd81e915
NC
4000 PL_nextwhite = PL_thiswhite;
4001 PL_thiswhite = 0;
5db06880 4002 }
cd81e915
NC
4003 else if (PL_thisopen) {
4004 CURMAD('q', PL_thisopen);
4005 if (PL_thistoken)
4006 sv_free(PL_thistoken);
4007 PL_thistoken = 0;
5db06880
NC
4008 }
4009 else {
4010 /* Store actual token text as madprop X */
cd81e915 4011 CURMAD('X', PL_thistoken);
5db06880
NC
4012 }
4013
cd81e915 4014 if (PL_thiswhite) {
5db06880 4015 /* add preceding whitespace as madprop _ */
cd81e915 4016 CURMAD('_', PL_thiswhite);
5db06880
NC
4017 }
4018
cd81e915 4019 if (PL_thisstuff) {
5db06880 4020 /* add quoted material as madprop = */
cd81e915 4021 CURMAD('=', PL_thisstuff);
5db06880
NC
4022 }
4023
cd81e915 4024 if (PL_thisclose) {
5db06880 4025 /* add terminating quote as madprop Q */
cd81e915 4026 CURMAD('Q', PL_thisclose);
5db06880
NC
4027 }
4028 }
4029
4030 /* special processing based on optype */
4031
4032 switch (optype) {
4033
4034 /* opval doesn't need a TOKEN since it can already store mp */
4035 case WORD:
4036 case METHOD:
4037 case FUNCMETH:
4038 case THING:
4039 case PMFUNC:
4040 case PRIVATEREF:
4041 case FUNC0SUB:
4042 case UNIOPSUB:
4043 case LSTOPSUB:
6154021b
RGS
4044 if (pl_yylval.opval)
4045 append_madprops(PL_thismad, pl_yylval.opval, 0);
cd81e915 4046 PL_thismad = 0;
5db06880
NC
4047 return optype;
4048
4049 /* fake EOF */
4050 case 0:
4051 optype = PEG;
cd81e915
NC
4052 if (PL_endwhite) {
4053 addmad(newMADsv('p', PL_endwhite), &PL_thismad, 0);
4054 PL_endwhite = 0;
5db06880
NC
4055 }
4056 break;
4057
4058 case ']':
4059 case '}':
cd81e915 4060 if (PL_faketokens)
5db06880
NC
4061 break;
4062 /* remember any fake bracket that lexer is about to discard */
4063 if (PL_lex_brackets == 1 &&
4064 ((expectation)PL_lex_brackstack[0] & XFAKEBRACK))
4065 {
4066 s = PL_bufptr;
4067 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4068 s++;
4069 if (*s == '}') {
cd81e915
NC
4070 PL_thiswhite = newSVpvn(PL_bufptr, ++s - PL_bufptr);
4071 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4072 PL_thiswhite = 0;
5db06880
NC
4073 PL_bufptr = s - 1;
4074 break; /* don't bother looking for trailing comment */
4075 }
4076 else
4077 s = PL_bufptr;
4078 }
4079 if (optype == ']')
4080 break;
4081 /* FALLTHROUGH */
4082
4083 /* attach a trailing comment to its statement instead of next token */
4084 case ';':
cd81e915 4085 if (PL_faketokens)
5db06880
NC
4086 break;
4087 if (PL_bufptr > PL_oldbufptr && PL_bufptr[-1] == optype) {
4088 s = PL_bufptr;
4089 while (s < PL_bufend && (*s == ' ' || *s == '\t'))
4090 s++;
4091 if (*s == '\n' || *s == '#') {
4092 while (s < PL_bufend && *s != '\n')
4093 s++;
4094 if (s < PL_bufend)
4095 s++;
cd81e915
NC
4096 PL_thiswhite = newSVpvn(PL_bufptr, s - PL_bufptr);
4097 addmad(newMADsv('#', PL_thiswhite), &PL_thismad, 0);
4098 PL_thiswhite = 0;
5db06880
NC
4099 PL_bufptr = s;
4100 }
4101 }
4102 break;
4103
4104 /* pval */
4105 case LABEL:
4106 break;
4107
4108 /* ival */
4109 default:
4110 break;
4111
4112 }
4113
4114 /* Create new token struct. Note: opvals return early above. */
6154021b 4115 pl_yylval.tkval = newTOKEN(optype, pl_yylval, PL_thismad);
cd81e915 4116 PL_thismad = 0;
5db06880
NC
4117 return optype;
4118}
4119#endif
4120
468aa647 4121STATIC char *
cc6ed77d 4122S_tokenize_use(pTHX_ int is_use, char *s) {
97aff369 4123 dVAR;
7918f24d
NC
4124
4125 PERL_ARGS_ASSERT_TOKENIZE_USE;
4126
468aa647
RGS
4127 if (PL_expect != XSTATE)
4128 yyerror(Perl_form(aTHX_ "\"%s\" not allowed in expression",
4129 is_use ? "use" : "no"));
29595ff2 4130 s = SKIPSPACE1(s);
468aa647
RGS
4131 if (isDIGIT(*s) || (*s == 'v' && isDIGIT(s[1]))) {
4132 s = force_version(s, TRUE);
17c59fdf
VP
4133 if (*s == ';' || *s == '}'
4134 || (s = SKIPSPACE1(s), (*s == ';' || *s == '}'))) {
cd81e915 4135 start_force(PL_curforce);
9ded7720 4136 NEXTVAL_NEXTTOKE.opval = NULL;
468aa647
RGS
4137 force_next(WORD);
4138 }
4139 else if (*s == 'v') {
4140 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4141 s = force_version(s, FALSE);
4142 }
4143 }
4144 else {
4145 s = force_word(s,WORD,FALSE,TRUE,FALSE);
4146 s = force_version(s, FALSE);
4147 }
6154021b 4148 pl_yylval.ival = is_use;
468aa647
RGS
4149 return s;
4150}
748a9306 4151#ifdef DEBUGGING
27da23d5 4152 static const char* const exp_name[] =
09bef843 4153 { "OPERATOR", "TERM", "REF", "STATE", "BLOCK", "ATTRBLOCK",
27308ded 4154 "ATTRTERM", "TERMBLOCK", "TERMORDORDOR"
09bef843 4155 };
748a9306 4156#endif
463ee0b2 4157
02aa26ce
NT
4158/*
4159 yylex
4160
4161 Works out what to call the token just pulled out of the input
4162 stream. The yacc parser takes care of taking the ops we return and
4163 stitching them into a tree.
4164
4165 Returns:
4166 PRIVATEREF
4167
4168 Structure:
4169 if read an identifier
4170 if we're in a my declaration
4171 croak if they tried to say my($foo::bar)
4172 build the ops for a my() declaration
4173 if it's an access to a my() variable
4174 are we in a sort block?
4175 croak if my($a); $a <=> $b
4176 build ops for access to a my() variable
4177 if in a dq string, and they've said @foo and we can't find @foo
4178 croak
4179 build ops for a bareword
4180 if we already built the token before, use it.
4181*/
4182
20141f0e 4183
dba4d153
JH
4184#ifdef __SC__
4185#pragma segment Perl_yylex
4186#endif
dba4d153 4187int
dba4d153 4188Perl_yylex(pTHX)
20141f0e 4189{
97aff369 4190 dVAR;
3afc138a 4191 register char *s = PL_bufptr;
378cc40b 4192 register char *d;
463ee0b2 4193 STRLEN len;
aa7440fb 4194 bool bof = FALSE;
580561a3 4195 U32 fake_eof = 0;
a687059c 4196
10edeb5d
JH
4197 /* orig_keyword, gvp, and gv are initialized here because
4198 * jump to the label just_a_word_zero can bypass their
4199 * initialization later. */
4200 I32 orig_keyword = 0;
4201 GV *gv = NULL;
4202 GV **gvp = NULL;
4203
bbf60fe6 4204 DEBUG_T( {
396482e1 4205 SV* tmp = newSVpvs("");
b6007c36
DM
4206 PerlIO_printf(Perl_debug_log, "### %"IVdf":LEX_%s/X%s %s\n",
4207 (IV)CopLINE(PL_curcop),
4208 lex_state_names[PL_lex_state],
4209 exp_name[PL_expect],
4210 pv_display(tmp, s, strlen(s), 0, 60));
4211 SvREFCNT_dec(tmp);
bbf60fe6 4212 } );
02aa26ce 4213 /* check if there's an identifier for us to look at */
ba979b31 4214 if (PL_pending_ident)
bbf60fe6 4215 return REPORT(S_pending_ident(aTHX));
bbce6d69 4216
02aa26ce
NT
4217 /* no identifier pending identification */
4218
3280af22 4219 switch (PL_lex_state) {
79072805
LW
4220#ifdef COMMENTARY
4221 case LEX_NORMAL: /* Some compilers will produce faster */
4222 case LEX_INTERPNORMAL: /* code if we comment these out. */
4223 break;
4224#endif
4225
09bef843 4226 /* when we've already built the next token, just pull it out of the queue */
79072805 4227 case LEX_KNOWNEXT:
5db06880
NC
4228#ifdef PERL_MAD
4229 PL_lasttoke--;
6154021b 4230 pl_yylval = PL_nexttoke[PL_lasttoke].next_val;
5db06880 4231 if (PL_madskills) {
cd81e915 4232 PL_thismad = PL_nexttoke[PL_lasttoke].next_mad;
5db06880 4233 PL_nexttoke[PL_lasttoke].next_mad = 0;
cd81e915 4234 if (PL_thismad && PL_thismad->mad_key == '_') {
daba3364 4235 PL_thiswhite = MUTABLE_SV(PL_thismad->mad_val);
cd81e915
NC
4236 PL_thismad->mad_val = 0;
4237 mad_free(PL_thismad);
4238 PL_thismad = 0;
5db06880
NC
4239 }
4240 }
4241 if (!PL_lasttoke) {
4242 PL_lex_state = PL_lex_defer;
4243 PL_expect = PL_lex_expect;
4244 PL_lex_defer = LEX_NORMAL;
4245 if (!PL_nexttoke[PL_lasttoke].next_type)
4246 return yylex();
4247 }
4248#else
3280af22 4249 PL_nexttoke--;
6154021b 4250 pl_yylval = PL_nextval[PL_nexttoke];
3280af22
NIS
4251 if (!PL_nexttoke) {
4252 PL_lex_state = PL_lex_defer;
4253 PL_expect = PL_lex_expect;
4254 PL_lex_defer = LEX_NORMAL;
463ee0b2 4255 }
5db06880
NC
4256#endif
4257#ifdef PERL_MAD
4258 /* FIXME - can these be merged? */
4259 return(PL_nexttoke[PL_lasttoke].next_type);
4260#else
bbf60fe6 4261 return REPORT(PL_nexttype[PL_nexttoke]);
5db06880 4262#endif
79072805 4263
02aa26ce 4264 /* interpolated case modifiers like \L \U, including \Q and \E.
3280af22 4265 when we get here, PL_bufptr is at the \
02aa26ce 4266 */
79072805
LW
4267 case LEX_INTERPCASEMOD:
4268#ifdef DEBUGGING
3280af22 4269 if (PL_bufptr != PL_bufend && *PL_bufptr != '\\')
cea2e8a9 4270 Perl_croak(aTHX_ "panic: INTERPCASEMOD");
79072805 4271#endif
02aa26ce 4272 /* handle \E or end of string */
3280af22 4273 if (PL_bufptr == PL_bufend || PL_bufptr[1] == 'E') {
02aa26ce 4274 /* if at a \E */
3280af22 4275 if (PL_lex_casemods) {
f54cb97a 4276 const char oldmod = PL_lex_casestack[--PL_lex_casemods];
3280af22 4277 PL_lex_casestack[PL_lex_casemods] = '\0';
02aa26ce 4278
3792a11b
NC
4279 if (PL_bufptr != PL_bufend
4280 && (oldmod == 'L' || oldmod == 'U' || oldmod == 'Q')) {
3280af22
NIS
4281 PL_bufptr += 2;
4282 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4283#ifdef PERL_MAD
4284 if (PL_madskills)
6b29d1f5 4285 PL_thistoken = newSVpvs("\\E");
5db06880 4286#endif
a0d0e21e 4287 }
bbf60fe6 4288 return REPORT(')');
79072805 4289 }
5db06880
NC
4290#ifdef PERL_MAD
4291 while (PL_bufptr != PL_bufend &&
4292 PL_bufptr[0] == '\\' && PL_bufptr[1] == 'E') {
cd81e915 4293 if (!PL_thiswhite)
6b29d1f5 4294 PL_thiswhite = newSVpvs("");
cd81e915 4295 sv_catpvn(PL_thiswhite, PL_bufptr, 2);
5db06880
NC
4296 PL_bufptr += 2;
4297 }
4298#else
3280af22
NIS
4299 if (PL_bufptr != PL_bufend)
4300 PL_bufptr += 2;
5db06880 4301#endif
3280af22 4302 PL_lex_state = LEX_INTERPCONCAT;
cea2e8a9 4303 return yylex();
79072805
LW
4304 }
4305 else {
607df283 4306 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4307 "### Saw case modifier\n"); });
3280af22 4308 s = PL_bufptr + 1;
6e909404 4309 if (s[1] == '\\' && s[2] == 'E') {
5db06880 4310#ifdef PERL_MAD
cd81e915 4311 if (!PL_thiswhite)
6b29d1f5 4312 PL_thiswhite = newSVpvs("");
cd81e915 4313 sv_catpvn(PL_thiswhite, PL_bufptr, 4);
5db06880 4314#endif
89122651 4315 PL_bufptr = s + 3;
6e909404
JH
4316 PL_lex_state = LEX_INTERPCONCAT;
4317 return yylex();
a0d0e21e 4318 }
6e909404 4319 else {
90771dc0 4320 I32 tmp;
5db06880
NC
4321 if (!PL_madskills) /* when just compiling don't need correct */
4322 if (strnEQ(s, "L\\u", 3) || strnEQ(s, "U\\l", 3))
4323 tmp = *s, *s = s[2], s[2] = (char)tmp; /* misordered... */
3792a11b 4324 if ((*s == 'L' || *s == 'U') &&
6e909404
JH
4325 (strchr(PL_lex_casestack, 'L') || strchr(PL_lex_casestack, 'U'))) {
4326 PL_lex_casestack[--PL_lex_casemods] = '\0';
bbf60fe6 4327 return REPORT(')');
6e909404
JH
4328 }
4329 if (PL_lex_casemods > 10)
4330 Renew(PL_lex_casestack, PL_lex_casemods + 2, char);
4331 PL_lex_casestack[PL_lex_casemods++] = *s;
4332 PL_lex_casestack[PL_lex_casemods] = '\0';
4333 PL_lex_state = LEX_INTERPCONCAT;
cd81e915 4334 start_force(PL_curforce);
9ded7720 4335 NEXTVAL_NEXTTOKE.ival = 0;
6e909404 4336 force_next('(');
cd81e915 4337 start_force(PL_curforce);
6e909404 4338 if (*s == 'l')
9ded7720 4339 NEXTVAL_NEXTTOKE.ival = OP_LCFIRST;
6e909404 4340 else if (*s == 'u')
9ded7720 4341 NEXTVAL_NEXTTOKE.ival = OP_UCFIRST;
6e909404 4342 else if (*s == 'L')
9ded7720 4343 NEXTVAL_NEXTTOKE.ival = OP_LC;
6e909404 4344 else if (*s == 'U')
9ded7720 4345 NEXTVAL_NEXTTOKE.ival = OP_UC;
6e909404 4346 else if (*s == 'Q')
9ded7720 4347 NEXTVAL_NEXTTOKE.ival = OP_QUOTEMETA;
6e909404
JH
4348 else
4349 Perl_croak(aTHX_ "panic: yylex");
5db06880 4350 if (PL_madskills) {
a5849ce5
NC
4351 SV* const tmpsv = newSVpvs("\\ ");
4352 /* replace the space with the character we want to escape
4353 */
4354 SvPVX(tmpsv)[1] = *s;
5db06880
NC
4355 curmad('_', tmpsv);
4356 }
6e909404 4357 PL_bufptr = s + 1;
a0d0e21e 4358 }
79072805 4359 force_next(FUNC);
3280af22
NIS
4360 if (PL_lex_starts) {
4361 s = PL_bufptr;
4362 PL_lex_starts = 0;
5db06880
NC
4363#ifdef PERL_MAD
4364 if (PL_madskills) {
cd81e915
NC
4365 if (PL_thistoken)
4366 sv_free(PL_thistoken);
6b29d1f5 4367 PL_thistoken = newSVpvs("");
5db06880
NC
4368 }
4369#endif
131b3ad0
DM
4370 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4371 if (PL_lex_casemods == 1 && PL_lex_inpat)
4372 OPERATOR(',');
4373 else
4374 Aop(OP_CONCAT);
79072805
LW
4375 }
4376 else
cea2e8a9 4377 return yylex();
79072805
LW
4378 }
4379
55497cff 4380 case LEX_INTERPPUSH:
bbf60fe6 4381 return REPORT(sublex_push());
55497cff 4382
79072805 4383 case LEX_INTERPSTART:
3280af22 4384 if (PL_bufptr == PL_bufend)
bbf60fe6 4385 return REPORT(sublex_done());
607df283 4386 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 4387 "### Interpolated variable\n"); });
3280af22
NIS
4388 PL_expect = XTERM;
4389 PL_lex_dojoin = (*PL_bufptr == '@');
4390 PL_lex_state = LEX_INTERPNORMAL;
4391 if (PL_lex_dojoin) {
cd81e915 4392 start_force(PL_curforce);
9ded7720 4393 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4394 force_next(',');
cd81e915 4395 start_force(PL_curforce);
a0d0e21e 4396 force_ident("\"", '$');
cd81e915 4397 start_force(PL_curforce);
9ded7720 4398 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4399 force_next('$');
cd81e915 4400 start_force(PL_curforce);
9ded7720 4401 NEXTVAL_NEXTTOKE.ival = 0;
79072805 4402 force_next('(');
cd81e915 4403 start_force(PL_curforce);
9ded7720 4404 NEXTVAL_NEXTTOKE.ival = OP_JOIN; /* emulate join($", ...) */
79072805
LW
4405 force_next(FUNC);
4406 }
3280af22
NIS
4407 if (PL_lex_starts++) {
4408 s = PL_bufptr;
5db06880
NC
4409#ifdef PERL_MAD
4410 if (PL_madskills) {
cd81e915
NC
4411 if (PL_thistoken)
4412 sv_free(PL_thistoken);
6b29d1f5 4413 PL_thistoken = newSVpvs("");
5db06880
NC
4414 }
4415#endif
131b3ad0
DM
4416 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4417 if (!PL_lex_casemods && PL_lex_inpat)
4418 OPERATOR(',');
4419 else
4420 Aop(OP_CONCAT);
79072805 4421 }
cea2e8a9 4422 return yylex();
79072805
LW
4423
4424 case LEX_INTERPENDMAYBE:
3280af22
NIS
4425 if (intuit_more(PL_bufptr)) {
4426 PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
79072805
LW
4427 break;
4428 }
4429 /* FALL THROUGH */
4430
4431 case LEX_INTERPEND:
3280af22
NIS
4432 if (PL_lex_dojoin) {
4433 PL_lex_dojoin = FALSE;
4434 PL_lex_state = LEX_INTERPCONCAT;
5db06880
NC
4435#ifdef PERL_MAD
4436 if (PL_madskills) {
cd81e915
NC
4437 if (PL_thistoken)
4438 sv_free(PL_thistoken);
6b29d1f5 4439 PL_thistoken = newSVpvs("");
5db06880
NC
4440 }
4441#endif
bbf60fe6 4442 return REPORT(')');
79072805 4443 }
43a16006 4444 if (PL_lex_inwhat == OP_SUBST && PL_linestr == PL_lex_repl
25da4f38 4445 && SvEVALED(PL_lex_repl))
43a16006 4446 {
e9fa98b2 4447 if (PL_bufptr != PL_bufend)
cea2e8a9 4448 Perl_croak(aTHX_ "Bad evalled substitution pattern");
a0714e2c 4449 PL_lex_repl = NULL;
e9fa98b2 4450 }
79072805
LW
4451 /* FALLTHROUGH */
4452 case LEX_INTERPCONCAT:
4453#ifdef DEBUGGING
3280af22 4454 if (PL_lex_brackets)
cea2e8a9 4455 Perl_croak(aTHX_ "panic: INTERPCONCAT");
79072805 4456#endif
3280af22 4457 if (PL_bufptr == PL_bufend)
bbf60fe6 4458 return REPORT(sublex_done());
79072805 4459
3280af22
NIS
4460 if (SvIVX(PL_linestr) == '\'') {
4461 SV *sv = newSVsv(PL_linestr);
4462 if (!PL_lex_inpat)
76e3520e 4463 sv = tokeq(sv);
3280af22 4464 else if ( PL_hints & HINT_NEW_RE )
eb0d8d16 4465 sv = new_constant(NULL, 0, "qr", sv, sv, "q", 1);
6154021b 4466 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
3280af22 4467 s = PL_bufend;
79072805
LW
4468 }
4469 else {
3280af22 4470 s = scan_const(PL_bufptr);
79072805 4471 if (*s == '\\')
3280af22 4472 PL_lex_state = LEX_INTERPCASEMOD;
79072805 4473 else
3280af22 4474 PL_lex_state = LEX_INTERPSTART;
79072805
LW
4475 }
4476
3280af22 4477 if (s != PL_bufptr) {
cd81e915 4478 start_force(PL_curforce);
5db06880
NC
4479 if (PL_madskills) {
4480 curmad('X', newSVpvn(PL_bufptr,s-PL_bufptr));
4481 }
6154021b 4482 NEXTVAL_NEXTTOKE = pl_yylval;
3280af22 4483 PL_expect = XTERM;
79072805 4484 force_next(THING);
131b3ad0 4485 if (PL_lex_starts++) {
5db06880
NC
4486#ifdef PERL_MAD
4487 if (PL_madskills) {
cd81e915
NC
4488 if (PL_thistoken)
4489 sv_free(PL_thistoken);
6b29d1f5 4490 PL_thistoken = newSVpvs("");
5db06880
NC
4491 }
4492#endif
131b3ad0
DM
4493 /* commas only at base level: /$a\Ub$c/ => ($a,uc(b.$c)) */
4494 if (!PL_lex_casemods && PL_lex_inpat)
4495 OPERATOR(',');
4496 else
4497 Aop(OP_CONCAT);
4498 }
79072805 4499 else {
3280af22 4500 PL_bufptr = s;
cea2e8a9 4501 return yylex();
79072805
LW
4502 }
4503 }
4504
cea2e8a9 4505 return yylex();
a0d0e21e 4506 case LEX_FORMLINE:
3280af22
NIS
4507 PL_lex_state = LEX_NORMAL;
4508 s = scan_formline(PL_bufptr);
4509 if (!PL_lex_formbrack)
a0d0e21e
LW
4510 goto rightbracket;
4511 OPERATOR(';');
79072805
LW
4512 }
4513
3280af22
NIS
4514 s = PL_bufptr;
4515 PL_oldoldbufptr = PL_oldbufptr;
4516 PL_oldbufptr = s;
463ee0b2
LW
4517
4518 retry:
5db06880 4519#ifdef PERL_MAD
cd81e915
NC
4520 if (PL_thistoken) {
4521 sv_free(PL_thistoken);
4522 PL_thistoken = 0;
5db06880 4523 }
cd81e915 4524 PL_realtokenstart = s - SvPVX(PL_linestr); /* assume but undo on ws */
5db06880 4525#endif
378cc40b
LW
4526 switch (*s) {
4527 default:
7e2040f0 4528 if (isIDFIRST_lazy_if(s,UTF))
834a4ddd 4529 goto keylookup;
b1fc3636
CJ
4530 {
4531 unsigned char c = *s;
4532 len = UTF ? Perl_utf8_length(aTHX_ (U8 *) PL_linestart, (U8 *) s) : (STRLEN) (s - PL_linestart);
4533 if (len > UNRECOGNIZED_PRECEDE_COUNT) {
4534 d = UTF ? (char *) Perl_utf8_hop(aTHX_ (U8 *) s, -UNRECOGNIZED_PRECEDE_COUNT) : s - UNRECOGNIZED_PRECEDE_COUNT;
4535 } else {
4536 d = PL_linestart;
4537 }
4538 *s = '\0';
4539 Perl_croak(aTHX_ "Unrecognized character \\x%02X; marked by <-- HERE after %s<-- HERE near column %d", c, d, (int) len + 1);
4540 }
e929a76b
LW
4541 case 4:
4542 case 26:
4543 goto fake_eof; /* emulate EOF on ^D or ^Z */
378cc40b 4544 case 0:
5db06880
NC
4545#ifdef PERL_MAD
4546 if (PL_madskills)
cd81e915 4547 PL_faketokens = 0;
5db06880 4548#endif
3280af22
NIS
4549 if (!PL_rsfp) {
4550 PL_last_uni = 0;
4551 PL_last_lop = 0;
c5ee2135 4552 if (PL_lex_brackets) {
10edeb5d
JH
4553 yyerror((const char *)
4554 (PL_lex_formbrack
4555 ? "Format not terminated"
4556 : "Missing right curly or square bracket"));
c5ee2135 4557 }
4e553d73 4558 DEBUG_T( { PerlIO_printf(Perl_debug_log,
607df283 4559 "### Tokener got EOF\n");
5f80b19c 4560 } );
79072805 4561 TOKEN(0);
463ee0b2 4562 }
3280af22 4563 if (s++ < PL_bufend)
a687059c 4564 goto retry; /* ignore stray nulls */
3280af22
NIS
4565 PL_last_uni = 0;
4566 PL_last_lop = 0;
4567 if (!PL_in_eval && !PL_preambled) {
4568 PL_preambled = TRUE;
5db06880
NC
4569#ifdef PERL_MAD
4570 if (PL_madskills)
cd81e915 4571 PL_faketokens = 1;
5db06880 4572#endif
5ab7ff98
NC
4573 if (PL_perldb) {
4574 /* Generate a string of Perl code to load the debugger.
4575 * If PERL5DB is set, it will return the contents of that,
4576 * otherwise a compile-time require of perl5db.pl. */
4577
4578 const char * const pdb = PerlEnv_getenv("PERL5DB");
4579
4580 if (pdb) {
4581 sv_setpv(PL_linestr, pdb);
4582 sv_catpvs(PL_linestr,";");
4583 } else {
4584 SETERRNO(0,SS_NORMAL);
4585 sv_setpvs(PL_linestr, "BEGIN { require 'perl5db.pl' };");
4586 }
4587 } else
4588 sv_setpvs(PL_linestr,"");
c62eb204
NC
4589 if (PL_preambleav) {
4590 SV **svp = AvARRAY(PL_preambleav);
4591 SV **const end = svp + AvFILLp(PL_preambleav);
4592 while(svp <= end) {
4593 sv_catsv(PL_linestr, *svp);
4594 ++svp;
396482e1 4595 sv_catpvs(PL_linestr, ";");
91b7def8 4596 }
daba3364 4597 sv_free(MUTABLE_SV(PL_preambleav));
3280af22 4598 PL_preambleav = NULL;
91b7def8 4599 }
9f639728
FR
4600 if (PL_minus_E)
4601 sv_catpvs(PL_linestr,
4602 "use feature ':5." STRINGIFY(PERL_VERSION) "';");
3280af22 4603 if (PL_minus_n || PL_minus_p) {
f0e67a1d 4604 sv_catpvs(PL_linestr, "LINE: while (<>) {"/*}*/);
3280af22 4605 if (PL_minus_l)
396482e1 4606 sv_catpvs(PL_linestr,"chomp;");
3280af22 4607 if (PL_minus_a) {
3280af22 4608 if (PL_minus_F) {
3792a11b
NC
4609 if ((*PL_splitstr == '/' || *PL_splitstr == '\''
4610 || *PL_splitstr == '"')
3280af22 4611 && strchr(PL_splitstr + 1, *PL_splitstr))
3db68c4c 4612 Perl_sv_catpvf(aTHX_ PL_linestr, "our @F=split(%s);", PL_splitstr);
54310121 4613 else {
c8ef6a4b
NC
4614 /* "q\0${splitstr}\0" is legal perl. Yes, even NUL
4615 bytes can be used as quoting characters. :-) */
dd374669 4616 const char *splits = PL_splitstr;
91d456ae 4617 sv_catpvs(PL_linestr, "our @F=split(q\0");
48c4c863
NC
4618 do {
4619 /* Need to \ \s */
dd374669
AL
4620 if (*splits == '\\')
4621 sv_catpvn(PL_linestr, splits, 1);
4622 sv_catpvn(PL_linestr, splits, 1);
4623 } while (*splits++);
48c4c863
NC
4624 /* This loop will embed the trailing NUL of
4625 PL_linestr as the last thing it does before
4626 terminating. */
396482e1 4627 sv_catpvs(PL_linestr, ");");
54310121 4628 }
2304df62
AD
4629 }
4630 else
396482e1 4631 sv_catpvs(PL_linestr,"our @F=split(' ');");
2304df62 4632 }
79072805 4633 }
396482e1 4634 sv_catpvs(PL_linestr, "\n");
3280af22
NIS
4635 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4636 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4637 PL_last_lop = PL_last_uni = NULL;
65269a95 4638 if ((PERLDB_LINE || PERLDB_SAVESRC) && PL_curstash != PL_debstash)
5fa550fb 4639 update_debugger_info(PL_linestr, NULL, 0);
79072805 4640 goto retry;
a687059c 4641 }
e929a76b 4642 do {
580561a3
Z
4643 fake_eof = 0;
4644 bof = PL_rsfp ? TRUE : FALSE;
f0e67a1d 4645 if (0) {
7e28d3af 4646 fake_eof:
f0e67a1d
Z
4647 fake_eof = LEX_FAKE_EOF;
4648 }
4649 PL_bufptr = PL_bufend;
17cc9359 4650 CopLINE_inc(PL_curcop);
f0e67a1d 4651 if (!lex_next_chunk(fake_eof)) {
17cc9359 4652 CopLINE_dec(PL_curcop);
f0e67a1d
Z
4653 s = PL_bufptr;
4654 TOKEN(';'); /* not infinite loop because rsfp is NULL now */
4655 }
17cc9359 4656 CopLINE_dec(PL_curcop);
5db06880 4657#ifdef PERL_MAD
f0e67a1d 4658 if (!PL_rsfp)
cd81e915 4659 PL_realtokenstart = -1;
5db06880 4660#endif
f0e67a1d 4661 s = PL_bufptr;
7aa207d6
JH
4662 /* If it looks like the start of a BOM or raw UTF-16,
4663 * check if it in fact is. */
580561a3 4664 if (bof && PL_rsfp &&
7aa207d6
JH
4665 (*s == 0 ||
4666 *(U8*)s == 0xEF ||
4667 *(U8*)s >= 0xFE ||
4668 s[1] == 0)) {
eb160463 4669 bof = PerlIO_tell(PL_rsfp) == (Off_t)SvCUR(PL_linestr);
7e28d3af 4670 if (bof) {
3280af22 4671 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
7e28d3af 4672 s = swallow_bom((U8*)s);
e929a76b 4673 }
378cc40b 4674 }
3280af22 4675 if (PL_doextract) {
a0d0e21e 4676 /* Incest with pod. */
5db06880
NC
4677#ifdef PERL_MAD
4678 if (PL_madskills)
cd81e915 4679 sv_catsv(PL_thiswhite, PL_linestr);
5db06880 4680#endif
01a57ef7 4681 if (*s == '=' && strnEQ(s, "=cut", 4) && !isALPHA(s[4])) {
76f68e9b 4682 sv_setpvs(PL_linestr, "");
3280af22
NIS
4683 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4684 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4685 PL_last_lop = PL_last_uni = NULL;
3280af22 4686 PL_doextract = FALSE;
a0d0e21e 4687 }
4e553d73 4688 }
85613cab
Z
4689 if (PL_rsfp)
4690 incline(s);
3280af22
NIS
4691 } while (PL_doextract);
4692 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = PL_linestart = s;
3280af22 4693 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4694 PL_last_lop = PL_last_uni = NULL;
57843af0 4695 if (CopLINE(PL_curcop) == 1) {
3280af22 4696 while (s < PL_bufend && isSPACE(*s))
79072805 4697 s++;
a0d0e21e 4698 if (*s == ':' && s[1] != ':') /* for csh execing sh scripts */
79072805 4699 s++;
5db06880
NC
4700#ifdef PERL_MAD
4701 if (PL_madskills)
cd81e915 4702 PL_thiswhite = newSVpvn(PL_linestart, s - PL_linestart);
5db06880 4703#endif
bd61b366 4704 d = NULL;
3280af22 4705 if (!PL_in_eval) {
44a8e56a 4706 if (*s == '#' && *(s+1) == '!')
4707 d = s + 2;
4708#ifdef ALTERNATE_SHEBANG
4709 else {
bfed75c6 4710 static char const as[] = ALTERNATE_SHEBANG;
44a8e56a 4711 if (*s == as[0] && strnEQ(s, as, sizeof(as) - 1))
4712 d = s + (sizeof(as) - 1);
4713 }
4714#endif /* ALTERNATE_SHEBANG */
4715 }
4716 if (d) {
b8378b72 4717 char *ipath;
774d564b 4718 char *ipathend;
b8378b72 4719
774d564b 4720 while (isSPACE(*d))
b8378b72
CS
4721 d++;
4722 ipath = d;
774d564b 4723 while (*d && !isSPACE(*d))
4724 d++;
4725 ipathend = d;
4726
4727#ifdef ARG_ZERO_IS_SCRIPT
4728 if (ipathend > ipath) {
4729 /*
4730 * HP-UX (at least) sets argv[0] to the script name,
4731 * which makes $^X incorrect. And Digital UNIX and Linux,
4732 * at least, set argv[0] to the basename of the Perl
4733 * interpreter. So, having found "#!", we'll set it right.
4734 */
fafc274c
NC
4735 SV * const x = GvSV(gv_fetchpvs("\030", GV_ADD|GV_NOTQUAL,
4736 SVt_PV)); /* $^X */
774d564b 4737 assert(SvPOK(x) || SvGMAGICAL(x));
cc49e20b 4738 if (sv_eq(x, CopFILESV(PL_curcop))) {
774d564b 4739 sv_setpvn(x, ipath, ipathend - ipath);
9607fc9c 4740 SvSETMAGIC(x);
4741 }
556c1dec
JH
4742 else {
4743 STRLEN blen;
4744 STRLEN llen;
cfd0369c 4745 const char *bstart = SvPV_const(CopFILESV(PL_curcop),blen);
9d4ba2ae 4746 const char * const lstart = SvPV_const(x,llen);
556c1dec
JH
4747 if (llen < blen) {
4748 bstart += blen - llen;
4749 if (strnEQ(bstart, lstart, llen) && bstart[-1] == '/') {
4750 sv_setpvn(x, ipath, ipathend - ipath);
4751 SvSETMAGIC(x);
4752 }
4753 }
4754 }
774d564b 4755 TAINT_NOT; /* $^X is always tainted, but that's OK */
8ebc5c01 4756 }
774d564b 4757#endif /* ARG_ZERO_IS_SCRIPT */
b8378b72
CS
4758
4759 /*
4760 * Look for options.
4761 */
748a9306 4762 d = instr(s,"perl -");
84e30d1a 4763 if (!d) {
748a9306 4764 d = instr(s,"perl");
84e30d1a
GS
4765#if defined(DOSISH)
4766 /* avoid getting into infinite loops when shebang
4767 * line contains "Perl" rather than "perl" */
4768 if (!d) {
4769 for (d = ipathend-4; d >= ipath; --d) {
4770 if ((*d == 'p' || *d == 'P')
4771 && !ibcmp(d, "perl", 4))
4772 {
4773 break;
4774 }
4775 }
4776 if (d < ipath)
bd61b366 4777 d = NULL;
84e30d1a
GS
4778 }
4779#endif
4780 }
44a8e56a 4781#ifdef ALTERNATE_SHEBANG
4782 /*
4783 * If the ALTERNATE_SHEBANG on this system starts with a
4784 * character that can be part of a Perl expression, then if
4785 * we see it but not "perl", we're probably looking at the
4786 * start of Perl code, not a request to hand off to some
4787 * other interpreter. Similarly, if "perl" is there, but
4788 * not in the first 'word' of the line, we assume the line
4789 * contains the start of the Perl program.
44a8e56a 4790 */
4791 if (d && *s != '#') {
f54cb97a 4792 const char *c = ipath;
44a8e56a 4793 while (*c && !strchr("; \t\r\n\f\v#", *c))
4794 c++;
4795 if (c < d)
bd61b366 4796 d = NULL; /* "perl" not in first word; ignore */
44a8e56a 4797 else
4798 *s = '#'; /* Don't try to parse shebang line */
4799 }
774d564b 4800#endif /* ALTERNATE_SHEBANG */
748a9306 4801 if (!d &&
44a8e56a 4802 *s == '#' &&
774d564b 4803 ipathend > ipath &&
3280af22 4804 !PL_minus_c &&
748a9306 4805 !instr(s,"indir") &&
3280af22 4806 instr(PL_origargv[0],"perl"))
748a9306 4807 {
27da23d5 4808 dVAR;
9f68db38 4809 char **newargv;
9f68db38 4810
774d564b 4811 *ipathend = '\0';
4812 s = ipathend + 1;
3280af22 4813 while (s < PL_bufend && isSPACE(*s))
9f68db38 4814 s++;
3280af22 4815 if (s < PL_bufend) {
d85f917e 4816 Newx(newargv,PL_origargc+3,char*);
9f68db38 4817 newargv[1] = s;
3280af22 4818 while (s < PL_bufend && !isSPACE(*s))
9f68db38
LW
4819 s++;
4820 *s = '\0';
3280af22 4821 Copy(PL_origargv+1, newargv+2, PL_origargc+1, char*);
9f68db38
LW
4822 }
4823 else
3280af22 4824 newargv = PL_origargv;
774d564b 4825 newargv[0] = ipath;
b35112e7 4826 PERL_FPU_PRE_EXEC
b4748376 4827 PerlProc_execv(ipath, EXEC_ARGV_CAST(newargv));
b35112e7 4828 PERL_FPU_POST_EXEC
cea2e8a9 4829 Perl_croak(aTHX_ "Can't exec %s", ipath);
9f68db38 4830 }
748a9306 4831 if (d) {
c35e046a
AL
4832 while (*d && !isSPACE(*d))
4833 d++;
4834 while (SPACE_OR_TAB(*d))
4835 d++;
748a9306
LW
4836
4837 if (*d++ == '-') {
f54cb97a 4838 const bool switches_done = PL_doswitches;
fb993905
GA
4839 const U32 oldpdb = PL_perldb;
4840 const bool oldn = PL_minus_n;
4841 const bool oldp = PL_minus_p;
c7030b81 4842 const char *d1 = d;
fb993905 4843
8cc95fdb 4844 do {
4ba71d51
FC
4845 bool baduni = FALSE;
4846 if (*d1 == 'C') {
bd0ab00d
NC
4847 const char *d2 = d1 + 1;
4848 if (parse_unicode_opts((const char **)&d2)
4849 != PL_unicode)
4850 baduni = TRUE;
4ba71d51
FC
4851 }
4852 if (baduni || *d1 == 'M' || *d1 == 'm') {
c7030b81
NC
4853 const char * const m = d1;
4854 while (*d1 && !isSPACE(*d1))
4855 d1++;
cea2e8a9 4856 Perl_croak(aTHX_ "Too late for \"-%.*s\" option",
c7030b81 4857 (int)(d1 - m), m);
8cc95fdb 4858 }
c7030b81
NC
4859 d1 = moreswitches(d1);
4860 } while (d1);
f0b2cf55
YST
4861 if (PL_doswitches && !switches_done) {
4862 int argc = PL_origargc;
4863 char **argv = PL_origargv;
4864 do {
4865 argc--,argv++;
4866 } while (argc && argv[0][0] == '-' && argv[0][1]);
4867 init_argv_symbols(argc,argv);
4868 }
65269a95 4869 if (((PERLDB_LINE || PERLDB_SAVESRC) && !oldpdb) ||
155aba94 4870 ((PL_minus_n || PL_minus_p) && !(oldn || oldp)))
b084f20b 4871 /* if we have already added "LINE: while (<>) {",
4872 we must not do it again */
748a9306 4873 {
76f68e9b 4874 sv_setpvs(PL_linestr, "");
3280af22
NIS
4875 PL_oldoldbufptr = PL_oldbufptr = s = PL_linestart = SvPVX(PL_linestr);
4876 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 4877 PL_last_lop = PL_last_uni = NULL;
3280af22 4878 PL_preambled = FALSE;
65269a95 4879 if (PERLDB_LINE || PERLDB_SAVESRC)
3280af22 4880 (void)gv_fetchfile(PL_origfilename);
748a9306
LW
4881 goto retry;
4882 }
a0d0e21e 4883 }
79072805 4884 }
9f68db38 4885 }
79072805 4886 }
3280af22
NIS
4887 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4888 PL_bufptr = s;
4889 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4890 return yylex();
ae986130 4891 }
378cc40b 4892 goto retry;
4fdae800 4893 case '\r':
6a27c188 4894#ifdef PERL_STRICT_CR
cea2e8a9 4895 Perl_warn(aTHX_ "Illegal character \\%03o (carriage return)", '\r');
4e553d73 4896 Perl_croak(aTHX_
cc507455 4897 "\t(Maybe you didn't strip carriage returns after a network transfer?)\n");
a868473f 4898#endif
4fdae800 4899 case ' ': case '\t': case '\f': case 013:
5db06880 4900#ifdef PERL_MAD
cd81e915 4901 PL_realtokenstart = -1;
ac372eb8
RD
4902 if (!PL_thiswhite)
4903 PL_thiswhite = newSVpvs("");
4904 sv_catpvn(PL_thiswhite, s, 1);
5db06880 4905#endif
ac372eb8 4906 s++;
378cc40b 4907 goto retry;
378cc40b 4908 case '#':
e929a76b 4909 case '\n':
5db06880 4910#ifdef PERL_MAD
cd81e915 4911 PL_realtokenstart = -1;
5db06880 4912 if (PL_madskills)
cd81e915 4913 PL_faketokens = 0;
5db06880 4914#endif
3280af22 4915 if (PL_lex_state != LEX_NORMAL || (PL_in_eval && !PL_rsfp)) {
df0deb90
GS
4916 if (*s == '#' && s == PL_linestart && PL_in_eval && !PL_rsfp) {
4917 /* handle eval qq[#line 1 "foo"\n ...] */
4918 CopLINE_dec(PL_curcop);
4919 incline(s);
4920 }
5db06880
NC
4921 if (PL_madskills && !PL_lex_formbrack && !PL_in_eval) {
4922 s = SKIPSPACE0(s);
4923 if (!PL_in_eval || PL_rsfp)
4924 incline(s);
4925 }
4926 else {
4927 d = s;
4928 while (d < PL_bufend && *d != '\n')
4929 d++;
4930 if (d < PL_bufend)
4931 d++;
4932 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4933 Perl_croak(aTHX_ "panic: input overflow");
4934#ifdef PERL_MAD
4935 if (PL_madskills)
cd81e915 4936 PL_thiswhite = newSVpvn(s, d - s);
5db06880
NC
4937#endif
4938 s = d;
4939 incline(s);
4940 }
3280af22
NIS
4941 if (PL_lex_formbrack && PL_lex_brackets <= PL_lex_formbrack) {
4942 PL_bufptr = s;
4943 PL_lex_state = LEX_FORMLINE;
cea2e8a9 4944 return yylex();
a687059c 4945 }
378cc40b 4946 }
a687059c 4947 else {
5db06880
NC
4948#ifdef PERL_MAD
4949 if (PL_madskills && CopLINE(PL_curcop) >= 1 && !PL_lex_formbrack) {
4950 if (CopLINE(PL_curcop) == 1 && s[0] == '#' && s[1] == '!') {
cd81e915 4951 PL_faketokens = 0;
5db06880
NC
4952 s = SKIPSPACE0(s);
4953 TOKEN(PEG); /* make sure any #! line is accessible */
4954 }
4955 s = SKIPSPACE0(s);
4956 }
4957 else {
4958/* if (PL_madskills && PL_lex_formbrack) { */
4959 d = s;
4960 while (d < PL_bufend && *d != '\n')
4961 d++;
4962 if (d < PL_bufend)
4963 d++;
4964 else if (d > PL_bufend) /* Found by Ilya: feed random input to Perl. */
4965 Perl_croak(aTHX_ "panic: input overflow");
4966 if (PL_madskills && CopLINE(PL_curcop) >= 1) {
cd81e915 4967 if (!PL_thiswhite)
6b29d1f5 4968 PL_thiswhite = newSVpvs("");
5db06880 4969 if (CopLINE(PL_curcop) == 1) {
76f68e9b 4970 sv_setpvs(PL_thiswhite, "");
cd81e915 4971 PL_faketokens = 0;
5db06880 4972 }
cd81e915 4973 sv_catpvn(PL_thiswhite, s, d - s);
5db06880
NC
4974 }
4975 s = d;
4976/* }
4977 *s = '\0';
4978 PL_bufend = s; */
4979 }
4980#else
378cc40b 4981 *s = '\0';
3280af22 4982 PL_bufend = s;
5db06880 4983#endif
a687059c 4984 }
378cc40b
LW
4985 goto retry;
4986 case '-':
79072805 4987 if (s[1] && isALPHA(s[1]) && !isALNUM(s[2])) {
e5edeb50 4988 I32 ftst = 0;
90771dc0 4989 char tmp;
e5edeb50 4990
378cc40b 4991 s++;
3280af22 4992 PL_bufptr = s;
748a9306
LW
4993 tmp = *s++;
4994
bf4acbe4 4995 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306
LW
4996 s++;
4997
4998 if (strnEQ(s,"=>",2)) {
3280af22 4999 s = force_word(PL_bufptr,WORD,FALSE,FALSE,FALSE);
931e0695 5000 DEBUG_T( { printbuf("### Saw unary minus before =>, forcing word %s\n", s); } );
748a9306
LW
5001 OPERATOR('-'); /* unary minus */
5002 }
3280af22 5003 PL_last_uni = PL_oldbufptr;
748a9306 5004 switch (tmp) {
e5edeb50
JH
5005 case 'r': ftst = OP_FTEREAD; break;
5006 case 'w': ftst = OP_FTEWRITE; break;
5007 case 'x': ftst = OP_FTEEXEC; break;
5008 case 'o': ftst = OP_FTEOWNED; break;
5009 case 'R': ftst = OP_FTRREAD; break;
5010 case 'W': ftst = OP_FTRWRITE; break;
5011 case 'X': ftst = OP_FTREXEC; break;
5012 case 'O': ftst = OP_FTROWNED; break;
5013 case 'e': ftst = OP_FTIS; break;
5014 case 'z': ftst = OP_FTZERO; break;
5015 case 's': ftst = OP_FTSIZE; break;
5016 case 'f': ftst = OP_FTFILE; break;
5017 case 'd': ftst = OP_FTDIR; break;
5018 case 'l': ftst = OP_FTLINK; break;
5019 case 'p': ftst = OP_FTPIPE; break;
5020 case 'S': ftst = OP_FTSOCK; break;
5021 case 'u': ftst = OP_FTSUID; break;
5022 case 'g': ftst = OP_FTSGID; break;
5023 case 'k': ftst = OP_FTSVTX; break;
5024 case 'b': ftst = OP_FTBLK; break;
5025 case 'c': ftst = OP_FTCHR; break;
5026 case 't': ftst = OP_FTTTY; break;
5027 case 'T': ftst = OP_FTTEXT; break;
5028 case 'B': ftst = OP_FTBINARY; break;
5029 case 'M': case 'A': case 'C':
fafc274c 5030 gv_fetchpvs("\024", GV_ADD|GV_NOTQUAL, SVt_PV);
e5edeb50
JH
5031 switch (tmp) {
5032 case 'M': ftst = OP_FTMTIME; break;
5033 case 'A': ftst = OP_FTATIME; break;
5034 case 'C': ftst = OP_FTCTIME; break;
5035 default: break;
5036 }
5037 break;
378cc40b 5038 default:
378cc40b
LW
5039 break;
5040 }
e5edeb50 5041 if (ftst) {
eb160463 5042 PL_last_lop_op = (OPCODE)ftst;
4e553d73 5043 DEBUG_T( { PerlIO_printf(Perl_debug_log,
a18d764d 5044 "### Saw file test %c\n", (int)tmp);
5f80b19c 5045 } );
e5edeb50
JH
5046 FTST(ftst);
5047 }
5048 else {
5049 /* Assume it was a minus followed by a one-letter named
5050 * subroutine call (or a -bareword), then. */
95c31fe3 5051 DEBUG_T( { PerlIO_printf(Perl_debug_log,
17ad61e0 5052 "### '-%c' looked like a file test but was not\n",
4fccd7c6 5053 (int) tmp);
5f80b19c 5054 } );
3cf7b4c4 5055 s = --PL_bufptr;
e5edeb50 5056 }
378cc40b 5057 }
90771dc0
NC
5058 {
5059 const char tmp = *s++;
5060 if (*s == tmp) {
5061 s++;
5062 if (PL_expect == XOPERATOR)
5063 TERM(POSTDEC);
5064 else
5065 OPERATOR(PREDEC);
5066 }
5067 else if (*s == '>') {
5068 s++;
29595ff2 5069 s = SKIPSPACE1(s);
90771dc0
NC
5070 if (isIDFIRST_lazy_if(s,UTF)) {
5071 s = force_word(s,METHOD,FALSE,TRUE,FALSE);
5072 TOKEN(ARROW);
5073 }
5074 else if (*s == '$')
5075 OPERATOR(ARROW);
5076 else
5077 TERM(ARROW);
5078 }
3280af22 5079 if (PL_expect == XOPERATOR)
90771dc0
NC
5080 Aop(OP_SUBTRACT);
5081 else {
5082 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5083 check_uni();
5084 OPERATOR('-'); /* unary minus */
79072805 5085 }
2f3197b3 5086 }
79072805 5087
378cc40b 5088 case '+':
90771dc0
NC
5089 {
5090 const char tmp = *s++;
5091 if (*s == tmp) {
5092 s++;
5093 if (PL_expect == XOPERATOR)
5094 TERM(POSTINC);
5095 else
5096 OPERATOR(PREINC);
5097 }
3280af22 5098 if (PL_expect == XOPERATOR)
90771dc0
NC
5099 Aop(OP_ADD);
5100 else {
5101 if (isSPACE(*s) || !isSPACE(*PL_bufptr))
5102 check_uni();
5103 OPERATOR('+');
5104 }
2f3197b3 5105 }
a687059c 5106
378cc40b 5107 case '*':
3280af22
NIS
5108 if (PL_expect != XOPERATOR) {
5109 s = scan_ident(s, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5110 PL_expect = XOPERATOR;
5111 force_ident(PL_tokenbuf, '*');
5112 if (!*PL_tokenbuf)
a0d0e21e 5113 PREREF('*');
79072805 5114 TERM('*');
a687059c 5115 }
79072805
LW
5116 s++;
5117 if (*s == '*') {
a687059c 5118 s++;
79072805 5119 PWop(OP_POW);
a687059c 5120 }
79072805
LW
5121 Mop(OP_MULTIPLY);
5122
378cc40b 5123 case '%':
3280af22 5124 if (PL_expect == XOPERATOR) {
bbce6d69 5125 ++s;
5126 Mop(OP_MODULO);
a687059c 5127 }
3280af22 5128 PL_tokenbuf[0] = '%';
e8ae98db
RGS
5129 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5130 sizeof PL_tokenbuf - 1, FALSE);
3280af22 5131 if (!PL_tokenbuf[1]) {
bbce6d69 5132 PREREF('%');
a687059c 5133 }
3280af22 5134 PL_pending_ident = '%';
bbce6d69 5135 TERM('%');
a687059c 5136
378cc40b 5137 case '^':
79072805 5138 s++;
a0d0e21e 5139 BOop(OP_BIT_XOR);
79072805 5140 case '[':
3280af22 5141 PL_lex_brackets++;
df3467db
IG
5142 {
5143 const char tmp = *s++;
5144 OPERATOR(tmp);
5145 }
378cc40b 5146 case '~':
0d863452 5147 if (s[1] == '~'
3e7dd34d 5148 && (PL_expect == XOPERATOR || PL_expect == XTERMORDORDOR))
0d863452
RH
5149 {
5150 s += 2;
5151 Eop(OP_SMARTMATCH);
5152 }
378cc40b 5153 case ',':
90771dc0
NC
5154 {
5155 const char tmp = *s++;
5156 OPERATOR(tmp);
5157 }
a0d0e21e
LW
5158 case ':':
5159 if (s[1] == ':') {
5160 len = 0;
0bfa2a8a 5161 goto just_a_word_zero_gv;
a0d0e21e
LW
5162 }
5163 s++;
09bef843
SB
5164 switch (PL_expect) {
5165 OP *attrs;
5db06880
NC
5166#ifdef PERL_MAD
5167 I32 stuffstart;
5168#endif
09bef843
SB
5169 case XOPERATOR:
5170 if (!PL_in_my || PL_lex_state != LEX_NORMAL)
5171 break;
5172 PL_bufptr = s; /* update in case we back off */
d83f38d8
NC
5173 if (*s == '=') {
5174 deprecate(":= for an empty attribute list");
5175 }
09bef843
SB
5176 goto grabattrs;
5177 case XATTRBLOCK:
5178 PL_expect = XBLOCK;
5179 goto grabattrs;
5180 case XATTRTERM:
5181 PL_expect = XTERMBLOCK;
5182 grabattrs:
5db06880
NC
5183#ifdef PERL_MAD
5184 stuffstart = s - SvPVX(PL_linestr) - 1;
5185#endif
29595ff2 5186 s = PEEKSPACE(s);
5f66b61c 5187 attrs = NULL;
7e2040f0 5188 while (isIDFIRST_lazy_if(s,UTF)) {
90771dc0 5189 I32 tmp;
5cc237b8 5190 SV *sv;
09bef843 5191 d = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 5192 if (isLOWER(*s) && (tmp = keyword(PL_tokenbuf, len, 0))) {
f9829d6b
GS
5193 if (tmp < 0) tmp = -tmp;
5194 switch (tmp) {
5195 case KEY_or:
5196 case KEY_and:
5197 case KEY_for:
11baf631 5198 case KEY_foreach:
f9829d6b
GS
5199 case KEY_unless:
5200 case KEY_if:
5201 case KEY_while:
5202 case KEY_until:
5203 goto got_attrs;
5204 default:
5205 break;
5206 }
5207 }
5cc237b8 5208 sv = newSVpvn(s, len);
09bef843
SB
5209 if (*d == '(') {
5210 d = scan_str(d,TRUE,TRUE);
5211 if (!d) {
09bef843
SB
5212 /* MUST advance bufptr here to avoid bogus
5213 "at end of line" context messages from yyerror().
5214 */
5215 PL_bufptr = s + len;
5216 yyerror("Unterminated attribute parameter in attribute list");
5217 if (attrs)
5218 op_free(attrs);
5cc237b8 5219 sv_free(sv);
bbf60fe6 5220 return REPORT(0); /* EOF indicator */
09bef843
SB
5221 }
5222 }
5223 if (PL_lex_stuff) {
09bef843
SB
5224 sv_catsv(sv, PL_lex_stuff);
5225 attrs = append_elem(OP_LIST, attrs,
5226 newSVOP(OP_CONST, 0, sv));
5227 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 5228 PL_lex_stuff = NULL;
09bef843
SB
5229 }
5230 else {
5cc237b8
BS
5231 if (len == 6 && strnEQ(SvPVX(sv), "unique", len)) {
5232 sv_free(sv);
1108974d 5233 if (PL_in_my == KEY_our) {
df9a6019 5234 deprecate(":unique");
1108974d 5235 }
bfed75c6 5236 else
371fce9b
DM
5237 Perl_croak(aTHX_ "The 'unique' attribute may only be applied to 'our' variables");
5238 }
5239
d3cea301
SB
5240 /* NOTE: any CV attrs applied here need to be part of
5241 the CVf_BUILTIN_ATTRS define in cv.h! */
5cc237b8
BS
5242 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "lvalue", len)) {
5243 sv_free(sv);
78f9721b 5244 CvLVALUE_on(PL_compcv);
5cc237b8
BS
5245 }
5246 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "locked", len)) {
5247 sv_free(sv);
8e5dadda 5248 deprecate(":locked");
5cc237b8
BS
5249 }
5250 else if (!PL_in_my && len == 6 && strnEQ(SvPVX(sv), "method", len)) {
5251 sv_free(sv);
78f9721b 5252 CvMETHOD_on(PL_compcv);
5cc237b8 5253 }
78f9721b
SM
5254 /* After we've set the flags, it could be argued that
5255 we don't need to do the attributes.pm-based setting
5256 process, and shouldn't bother appending recognized
d3cea301
SB
5257 flags. To experiment with that, uncomment the
5258 following "else". (Note that's already been
5259 uncommented. That keeps the above-applied built-in
5260 attributes from being intercepted (and possibly
5261 rejected) by a package's attribute routines, but is
5262 justified by the performance win for the common case
5263 of applying only built-in attributes.) */
0256094b 5264 else
78f9721b
SM
5265 attrs = append_elem(OP_LIST, attrs,
5266 newSVOP(OP_CONST, 0,
5cc237b8 5267 sv));
09bef843 5268 }
29595ff2 5269 s = PEEKSPACE(d);
0120eecf 5270 if (*s == ':' && s[1] != ':')
29595ff2 5271 s = PEEKSPACE(s+1);
0120eecf
GS
5272 else if (s == d)
5273 break; /* require real whitespace or :'s */
29595ff2 5274 /* XXX losing whitespace on sequential attributes here */
09bef843 5275 }
90771dc0
NC
5276 {
5277 const char tmp
5278 = (PL_expect == XOPERATOR ? '=' : '{'); /*'}(' for vi */
5279 if (*s != ';' && *s != '}' && *s != tmp
5280 && (tmp != '=' || *s != ')')) {
5281 const char q = ((*s == '\'') ? '"' : '\'');
5282 /* If here for an expression, and parsed no attrs, back
5283 off. */
5284 if (tmp == '=' && !attrs) {
5285 s = PL_bufptr;
5286 break;
5287 }
5288 /* MUST advance bufptr here to avoid bogus "at end of line"
5289 context messages from yyerror().
5290 */
5291 PL_bufptr = s;
10edeb5d
JH
5292 yyerror( (const char *)
5293 (*s
5294 ? Perl_form(aTHX_ "Invalid separator character "
5295 "%c%c%c in attribute list", q, *s, q)
5296 : "Unterminated attribute list" ) );
90771dc0
NC
5297 if (attrs)
5298 op_free(attrs);
5299 OPERATOR(':');
09bef843 5300 }
09bef843 5301 }
f9829d6b 5302 got_attrs:
09bef843 5303 if (attrs) {
cd81e915 5304 start_force(PL_curforce);
9ded7720 5305 NEXTVAL_NEXTTOKE.opval = attrs;
cd81e915 5306 CURMAD('_', PL_nextwhite);
89122651 5307 force_next(THING);
5db06880
NC
5308 }
5309#ifdef PERL_MAD
5310 if (PL_madskills) {
cd81e915 5311 PL_thistoken = newSVpvn(SvPVX(PL_linestr) + stuffstart,
5db06880 5312 (s - SvPVX(PL_linestr)) - stuffstart);
09bef843 5313 }
5db06880 5314#endif
09bef843
SB
5315 TOKEN(COLONATTR);
5316 }
a0d0e21e 5317 OPERATOR(':');
8990e307
LW
5318 case '(':
5319 s++;
3280af22
NIS
5320 if (PL_last_lop == PL_oldoldbufptr || PL_last_uni == PL_oldoldbufptr)
5321 PL_oldbufptr = PL_oldoldbufptr; /* allow print(STDOUT 123) */
a0d0e21e 5322 else
3280af22 5323 PL_expect = XTERM;
29595ff2 5324 s = SKIPSPACE1(s);
a0d0e21e 5325 TOKEN('(');
378cc40b 5326 case ';':
f4dd75d9 5327 CLINE;
90771dc0
NC
5328 {
5329 const char tmp = *s++;
5330 OPERATOR(tmp);
5331 }
378cc40b 5332 case ')':
90771dc0
NC
5333 {
5334 const char tmp = *s++;
29595ff2 5335 s = SKIPSPACE1(s);
90771dc0
NC
5336 if (*s == '{')
5337 PREBLOCK(tmp);
5338 TERM(tmp);
5339 }
79072805
LW
5340 case ']':
5341 s++;
3280af22 5342 if (PL_lex_brackets <= 0)
d98d5fff 5343 yyerror("Unmatched right square bracket");
463ee0b2 5344 else
3280af22
NIS
5345 --PL_lex_brackets;
5346 if (PL_lex_state == LEX_INTERPNORMAL) {
5347 if (PL_lex_brackets == 0) {
02255c60
FC
5348 if (*s == '-' && s[1] == '>')
5349 PL_lex_state = LEX_INTERPENDMAYBE;
5350 else if (*s != '[' && *s != '{')
3280af22 5351 PL_lex_state = LEX_INTERPEND;
79072805
LW
5352 }
5353 }
4633a7c4 5354 TERM(']');
79072805
LW
5355 case '{':
5356 leftbracket:
79072805 5357 s++;
3280af22 5358 if (PL_lex_brackets > 100) {
8edd5f42 5359 Renew(PL_lex_brackstack, PL_lex_brackets + 10, char);
8990e307 5360 }
3280af22 5361 switch (PL_expect) {
a0d0e21e 5362 case XTERM:
3280af22 5363 if (PL_lex_formbrack) {
a0d0e21e
LW
5364 s--;
5365 PRETERMBLOCK(DO);
5366 }
3280af22
NIS
5367 if (PL_oldoldbufptr == PL_last_lop)
5368 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5369 else
3280af22 5370 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
79072805 5371 OPERATOR(HASHBRACK);
a0d0e21e 5372 case XOPERATOR:
bf4acbe4 5373 while (s < PL_bufend && SPACE_OR_TAB(*s))
748a9306 5374 s++;
44a8e56a 5375 d = s;
3280af22
NIS
5376 PL_tokenbuf[0] = '\0';
5377 if (d < PL_bufend && *d == '-') {
5378 PL_tokenbuf[0] = '-';
44a8e56a 5379 d++;
bf4acbe4 5380 while (d < PL_bufend && SPACE_OR_TAB(*d))
44a8e56a 5381 d++;
5382 }
7e2040f0 5383 if (d < PL_bufend && isIDFIRST_lazy_if(d,UTF)) {
3280af22 5384 d = scan_word(d, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1,
8903cb82 5385 FALSE, &len);
bf4acbe4 5386 while (d < PL_bufend && SPACE_OR_TAB(*d))
748a9306
LW
5387 d++;
5388 if (*d == '}') {
f54cb97a 5389 const char minus = (PL_tokenbuf[0] == '-');
44a8e56a 5390 s = force_word(s + minus, WORD, FALSE, TRUE, FALSE);
5391 if (minus)
5392 force_next('-');
748a9306
LW
5393 }
5394 }
5395 /* FALL THROUGH */
09bef843 5396 case XATTRBLOCK:
748a9306 5397 case XBLOCK:
3280af22
NIS
5398 PL_lex_brackstack[PL_lex_brackets++] = XSTATE;
5399 PL_expect = XSTATE;
a0d0e21e 5400 break;
09bef843 5401 case XATTRTERM:
a0d0e21e 5402 case XTERMBLOCK:
3280af22
NIS
5403 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
5404 PL_expect = XSTATE;
a0d0e21e
LW
5405 break;
5406 default: {
f54cb97a 5407 const char *t;
3280af22
NIS
5408 if (PL_oldoldbufptr == PL_last_lop)
5409 PL_lex_brackstack[PL_lex_brackets++] = XTERM;
a0d0e21e 5410 else
3280af22 5411 PL_lex_brackstack[PL_lex_brackets++] = XOPERATOR;
29595ff2 5412 s = SKIPSPACE1(s);
8452ff4b
SB
5413 if (*s == '}') {
5414 if (PL_expect == XREF && PL_lex_state == LEX_INTERPNORMAL) {
5415 PL_expect = XTERM;
5416 /* This hack is to get the ${} in the message. */
5417 PL_bufptr = s+1;
5418 yyerror("syntax error");
5419 break;
5420 }
a0d0e21e 5421 OPERATOR(HASHBRACK);
8452ff4b 5422 }
b8a4b1be
GS
5423 /* This hack serves to disambiguate a pair of curlies
5424 * as being a block or an anon hash. Normally, expectation
5425 * determines that, but in cases where we're not in a
5426 * position to expect anything in particular (like inside
5427 * eval"") we have to resolve the ambiguity. This code
5428 * covers the case where the first term in the curlies is a
5429 * quoted string. Most other cases need to be explicitly
a0288114 5430 * disambiguated by prepending a "+" before the opening
b8a4b1be
GS
5431 * curly in order to force resolution as an anon hash.
5432 *
5433 * XXX should probably propagate the outer expectation
5434 * into eval"" to rely less on this hack, but that could
5435 * potentially break current behavior of eval"".
5436 * GSAR 97-07-21
5437 */
5438 t = s;
5439 if (*s == '\'' || *s == '"' || *s == '`') {
5440 /* common case: get past first string, handling escapes */
3280af22 5441 for (t++; t < PL_bufend && *t != *s;)
b8a4b1be
GS
5442 if (*t++ == '\\' && (*t == '\\' || *t == *s))
5443 t++;
5444 t++;
a0d0e21e 5445 }
b8a4b1be 5446 else if (*s == 'q') {
3280af22 5447 if (++t < PL_bufend
b8a4b1be 5448 && (!isALNUM(*t)
3280af22 5449 || ((*t == 'q' || *t == 'x') && ++t < PL_bufend
0505442f
GS
5450 && !isALNUM(*t))))
5451 {
abc667d1 5452 /* skip q//-like construct */
f54cb97a 5453 const char *tmps;
b8a4b1be
GS
5454 char open, close, term;
5455 I32 brackets = 1;
5456
3280af22 5457 while (t < PL_bufend && isSPACE(*t))
b8a4b1be 5458 t++;
abc667d1
DM
5459 /* check for q => */
5460 if (t+1 < PL_bufend && t[0] == '=' && t[1] == '>') {
5461 OPERATOR(HASHBRACK);
5462 }
b8a4b1be
GS
5463 term = *t;
5464 open = term;
5465 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
5466 term = tmps[5];
5467 close = term;
5468 if (open == close)
3280af22
NIS
5469 for (t++; t < PL_bufend; t++) {
5470 if (*t == '\\' && t+1 < PL_bufend && open != '\\')
b8a4b1be 5471 t++;
6d07e5e9 5472 else if (*t == open)
b8a4b1be
GS
5473 break;
5474 }
abc667d1 5475 else {
3280af22
NIS
5476 for (t++; t < PL_bufend; t++) {
5477 if (*t == '\\' && t+1 < PL_bufend)
b8a4b1be 5478 t++;
6d07e5e9 5479 else if (*t == close && --brackets <= 0)
b8a4b1be
GS
5480 break;
5481 else if (*t == open)
5482 brackets++;
5483 }
abc667d1
DM
5484 }
5485 t++;
b8a4b1be 5486 }
abc667d1
DM
5487 else
5488 /* skip plain q word */
5489 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
5490 t += UTF8SKIP(t);
a0d0e21e 5491 }
7e2040f0 5492 else if (isALNUM_lazy_if(t,UTF)) {
0505442f 5493 t += UTF8SKIP(t);
7e2040f0 5494 while (t < PL_bufend && isALNUM_lazy_if(t,UTF))
0505442f 5495 t += UTF8SKIP(t);
a0d0e21e 5496 }
3280af22 5497 while (t < PL_bufend && isSPACE(*t))
a0d0e21e 5498 t++;
b8a4b1be
GS
5499 /* if comma follows first term, call it an anon hash */
5500 /* XXX it could be a comma expression with loop modifiers */
3280af22 5501 if (t < PL_bufend && ((*t == ',' && (*s == 'q' || !isLOWER(*s)))
b8a4b1be 5502 || (*t == '=' && t[1] == '>')))
a0d0e21e 5503 OPERATOR(HASHBRACK);
3280af22 5504 if (PL_expect == XREF)
4e4e412b 5505 PL_expect = XTERM;
a0d0e21e 5506 else {
3280af22
NIS
5507 PL_lex_brackstack[PL_lex_brackets-1] = XSTATE;
5508 PL_expect = XSTATE;
a0d0e21e 5509 }
8990e307 5510 }
a0d0e21e 5511 break;
463ee0b2 5512 }
6154021b 5513 pl_yylval.ival = CopLINE(PL_curcop);
79072805 5514 if (isSPACE(*s) || *s == '#')
3280af22 5515 PL_copline = NOLINE; /* invalidate current command line number */
79072805 5516 TOKEN('{');
378cc40b 5517 case '}':
79072805
LW
5518 rightbracket:
5519 s++;
3280af22 5520 if (PL_lex_brackets <= 0)
d98d5fff 5521 yyerror("Unmatched right curly bracket");
463ee0b2 5522 else
3280af22 5523 PL_expect = (expectation)PL_lex_brackstack[--PL_lex_brackets];
c2e66d9e 5524 if (PL_lex_brackets < PL_lex_formbrack && PL_lex_state != LEX_INTERPNORMAL)
3280af22
NIS
5525 PL_lex_formbrack = 0;
5526 if (PL_lex_state == LEX_INTERPNORMAL) {
5527 if (PL_lex_brackets == 0) {
9059aa12
LW
5528 if (PL_expect & XFAKEBRACK) {
5529 PL_expect &= XENUMMASK;
3280af22
NIS
5530 PL_lex_state = LEX_INTERPEND;
5531 PL_bufptr = s;
5db06880
NC
5532#if 0
5533 if (PL_madskills) {
cd81e915 5534 if (!PL_thiswhite)
6b29d1f5 5535 PL_thiswhite = newSVpvs("");
76f68e9b 5536 sv_catpvs(PL_thiswhite,"}");
5db06880
NC
5537 }
5538#endif
cea2e8a9 5539 return yylex(); /* ignore fake brackets */
79072805 5540 }
fa83b5b6 5541 if (*s == '-' && s[1] == '>')
3280af22 5542 PL_lex_state = LEX_INTERPENDMAYBE;
fa83b5b6 5543 else if (*s != '[' && *s != '{')
3280af22 5544 PL_lex_state = LEX_INTERPEND;
79072805
LW
5545 }
5546 }
9059aa12
LW
5547 if (PL_expect & XFAKEBRACK) {
5548 PL_expect &= XENUMMASK;
3280af22 5549 PL_bufptr = s;
cea2e8a9 5550 return yylex(); /* ignore fake brackets */
748a9306 5551 }
cd81e915 5552 start_force(PL_curforce);
5db06880
NC
5553 if (PL_madskills) {
5554 curmad('X', newSVpvn(s-1,1));
cd81e915 5555 CURMAD('_', PL_thiswhite);
5db06880 5556 }
79072805 5557 force_next('}');
5db06880 5558#ifdef PERL_MAD
cd81e915 5559 if (!PL_thistoken)
6b29d1f5 5560 PL_thistoken = newSVpvs("");
5db06880 5561#endif
79072805 5562 TOKEN(';');
378cc40b
LW
5563 case '&':
5564 s++;
90771dc0 5565 if (*s++ == '&')
a0d0e21e 5566 AOPERATOR(ANDAND);
378cc40b 5567 s--;
3280af22 5568 if (PL_expect == XOPERATOR) {
041457d9
DM
5569 if (PL_bufptr == PL_linestart && ckWARN(WARN_SEMICOLON)
5570 && isIDFIRST_lazy_if(s,UTF))
7e2040f0 5571 {
57843af0 5572 CopLINE_dec(PL_curcop);
f1f66076 5573 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 5574 CopLINE_inc(PL_curcop);
463ee0b2 5575 }
79072805 5576 BAop(OP_BIT_AND);
463ee0b2 5577 }
79072805 5578
3280af22
NIS
5579 s = scan_ident(s - 1, PL_bufend, PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
5580 if (*PL_tokenbuf) {
5581 PL_expect = XOPERATOR;
5582 force_ident(PL_tokenbuf, '&');
463ee0b2 5583 }
79072805
LW
5584 else
5585 PREREF('&');
6154021b 5586 pl_yylval.ival = (OPpENTERSUB_AMPER<<8);
79072805
LW
5587 TERM('&');
5588
378cc40b
LW
5589 case '|':
5590 s++;
90771dc0 5591 if (*s++ == '|')
a0d0e21e 5592 AOPERATOR(OROR);
378cc40b 5593 s--;
79072805 5594 BOop(OP_BIT_OR);
378cc40b
LW
5595 case '=':
5596 s++;
748a9306 5597 {
90771dc0
NC
5598 const char tmp = *s++;
5599 if (tmp == '=')
5600 Eop(OP_EQ);
5601 if (tmp == '>')
5602 OPERATOR(',');
5603 if (tmp == '~')
5604 PMop(OP_MATCH);
5605 if (tmp && isSPACE(*s) && ckWARN(WARN_SYNTAX)
5606 && strchr("+-*/%.^&|<",tmp))
5607 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5608 "Reversed %c= operator",(int)tmp);
5609 s--;
5610 if (PL_expect == XSTATE && isALPHA(tmp) &&
5611 (s == PL_linestart+1 || s[-2] == '\n') )
5612 {
5613 if (PL_in_eval && !PL_rsfp) {
5614 d = PL_bufend;
5615 while (s < d) {
5616 if (*s++ == '\n') {
5617 incline(s);
5618 if (strnEQ(s,"=cut",4)) {
5619 s = strchr(s,'\n');
5620 if (s)
5621 s++;
5622 else
5623 s = d;
5624 incline(s);
5625 goto retry;
5626 }
5627 }
a5f75d66 5628 }
90771dc0 5629 goto retry;
a5f75d66 5630 }
5db06880
NC
5631#ifdef PERL_MAD
5632 if (PL_madskills) {
cd81e915 5633 if (!PL_thiswhite)
6b29d1f5 5634 PL_thiswhite = newSVpvs("");
cd81e915 5635 sv_catpvn(PL_thiswhite, PL_linestart,
5db06880
NC
5636 PL_bufend - PL_linestart);
5637 }
5638#endif
90771dc0
NC
5639 s = PL_bufend;
5640 PL_doextract = TRUE;
5641 goto retry;
a5f75d66 5642 }
a0d0e21e 5643 }
3280af22 5644 if (PL_lex_brackets < PL_lex_formbrack) {
c35e046a 5645 const char *t = s;
51882d45 5646#ifdef PERL_STRICT_CR
c35e046a 5647 while (SPACE_OR_TAB(*t))
51882d45 5648#else
c35e046a 5649 while (SPACE_OR_TAB(*t) || *t == '\r')
51882d45 5650#endif
c35e046a 5651 t++;
a0d0e21e
LW
5652 if (*t == '\n' || *t == '#') {
5653 s--;
3280af22 5654 PL_expect = XBLOCK;
a0d0e21e
LW
5655 goto leftbracket;
5656 }
79072805 5657 }
6154021b 5658 pl_yylval.ival = 0;
a0d0e21e 5659 OPERATOR(ASSIGNOP);
378cc40b
LW
5660 case '!':
5661 s++;
90771dc0
NC
5662 {
5663 const char tmp = *s++;
5664 if (tmp == '=') {
5665 /* was this !=~ where !~ was meant?
5666 * warn on m:!=~\s+([/?]|[msy]\W|tr\W): */
5667
5668 if (*s == '~' && ckWARN(WARN_SYNTAX)) {
5669 const char *t = s+1;
5670
5671 while (t < PL_bufend && isSPACE(*t))
5672 ++t;
5673
5674 if (*t == '/' || *t == '?' ||
5675 ((*t == 'm' || *t == 's' || *t == 'y')
5676 && !isALNUM(t[1])) ||
5677 (*t == 't' && t[1] == 'r' && !isALNUM(t[2])))
5678 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5679 "!=~ should be !~");
5680 }
5681 Eop(OP_NE);
5682 }
5683 if (tmp == '~')
5684 PMop(OP_NOT);
5685 }
378cc40b
LW
5686 s--;
5687 OPERATOR('!');
5688 case '<':
3280af22 5689 if (PL_expect != XOPERATOR) {
93a17b20 5690 if (s[1] != '<' && !strchr(s,'>'))
2f3197b3 5691 check_uni();
79072805
LW
5692 if (s[1] == '<')
5693 s = scan_heredoc(s);
5694 else
5695 s = scan_inputsymbol(s);
5696 TERM(sublex_start());
378cc40b
LW
5697 }
5698 s++;
90771dc0
NC
5699 {
5700 char tmp = *s++;
5701 if (tmp == '<')
5702 SHop(OP_LEFT_SHIFT);
5703 if (tmp == '=') {
5704 tmp = *s++;
5705 if (tmp == '>')
5706 Eop(OP_NCMP);
5707 s--;
5708 Rop(OP_LE);
5709 }
395c3793 5710 }
378cc40b 5711 s--;
79072805 5712 Rop(OP_LT);
378cc40b
LW
5713 case '>':
5714 s++;
90771dc0
NC
5715 {
5716 const char tmp = *s++;
5717 if (tmp == '>')
5718 SHop(OP_RIGHT_SHIFT);
d4c19fe8 5719 else if (tmp == '=')
90771dc0
NC
5720 Rop(OP_GE);
5721 }
378cc40b 5722 s--;
79072805 5723 Rop(OP_GT);
378cc40b
LW
5724
5725 case '$':
bbce6d69 5726 CLINE;
5727
3280af22
NIS
5728 if (PL_expect == XOPERATOR) {
5729 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5730 return deprecate_commaless_var_list();
a0d0e21e 5731 }
8990e307 5732 }
a0d0e21e 5733
c0b977fd 5734 if (s[1] == '#' && (isIDFIRST_lazy_if(s+2,UTF) || strchr("{$:+-@", s[2]))) {
3280af22 5735 PL_tokenbuf[0] = '@';
376b8730
SM
5736 s = scan_ident(s + 1, PL_bufend, PL_tokenbuf + 1,
5737 sizeof PL_tokenbuf - 1, FALSE);
5738 if (PL_expect == XOPERATOR)
5739 no_op("Array length", s);
3280af22 5740 if (!PL_tokenbuf[1])
a0d0e21e 5741 PREREF(DOLSHARP);
3280af22
NIS
5742 PL_expect = XOPERATOR;
5743 PL_pending_ident = '#';
463ee0b2 5744 TOKEN(DOLSHARP);
79072805 5745 }
bbce6d69 5746
3280af22 5747 PL_tokenbuf[0] = '$';
376b8730
SM
5748 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1,
5749 sizeof PL_tokenbuf - 1, FALSE);
5750 if (PL_expect == XOPERATOR)
5751 no_op("Scalar", s);
3280af22
NIS
5752 if (!PL_tokenbuf[1]) {
5753 if (s == PL_bufend)
bbce6d69 5754 yyerror("Final $ should be \\$ or $name");
5755 PREREF('$');
8990e307 5756 }
a0d0e21e 5757
bbce6d69 5758 /* This kludge not intended to be bulletproof. */
3280af22 5759 if (PL_tokenbuf[1] == '[' && !PL_tokenbuf[2]) {
6154021b 5760 pl_yylval.opval = newSVOP(OP_CONST, 0,
fc15ae8f 5761 newSViv(CopARYBASE_get(&PL_compiling)));
6154021b 5762 pl_yylval.opval->op_private = OPpCONST_ARYBASE;
bbce6d69 5763 TERM(THING);
5764 }
5765
ff68c719 5766 d = s;
90771dc0
NC
5767 {
5768 const char tmp = *s;
ae28bb2a 5769 if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
29595ff2 5770 s = SKIPSPACE1(s);
ff68c719 5771
90771dc0
NC
5772 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
5773 && intuit_more(s)) {
5774 if (*s == '[') {
5775 PL_tokenbuf[0] = '@';
5776 if (ckWARN(WARN_SYNTAX)) {
c35e046a
AL
5777 char *t = s+1;
5778
5779 while (isSPACE(*t) || isALNUM_lazy_if(t,UTF) || *t == '$')
5780 t++;
90771dc0 5781 if (*t++ == ',') {
29595ff2 5782 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
90771dc0
NC
5783 while (t < PL_bufend && *t != ']')
5784 t++;
9014280d 5785 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
90771dc0 5786 "Multidimensional syntax %.*s not supported",
36c7798d 5787 (int)((t - PL_bufptr) + 1), PL_bufptr);
90771dc0 5788 }
748a9306 5789 }
93a17b20 5790 }
90771dc0
NC
5791 else if (*s == '{') {
5792 char *t;
5793 PL_tokenbuf[0] = '%';
5794 if (strEQ(PL_tokenbuf+1, "SIG") && ckWARN(WARN_SYNTAX)
5795 && (t = strchr(s, '}')) && (t = strchr(t, '=')))
5796 {
5797 char tmpbuf[sizeof PL_tokenbuf];
c35e046a
AL
5798 do {
5799 t++;
5800 } while (isSPACE(*t));
90771dc0 5801 if (isIDFIRST_lazy_if(t,UTF)) {
780a5241 5802 STRLEN len;
90771dc0 5803 t = scan_word(t, tmpbuf, sizeof tmpbuf, TRUE,
780a5241 5804 &len);
c35e046a
AL
5805 while (isSPACE(*t))
5806 t++;
780a5241 5807 if (*t == ';' && get_cvn_flags(tmpbuf, len, 0))
90771dc0
NC
5808 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
5809 "You need to quote \"%s\"",
5810 tmpbuf);
5811 }
5812 }
5813 }
93a17b20 5814 }
bbce6d69 5815
90771dc0
NC
5816 PL_expect = XOPERATOR;
5817 if (PL_lex_state == LEX_NORMAL && isSPACE((char)tmp)) {
5818 const bool islop = (PL_last_lop == PL_oldoldbufptr);
5819 if (!islop || PL_last_lop_op == OP_GREPSTART)
5820 PL_expect = XOPERATOR;
5821 else if (strchr("$@\"'`q", *s))
5822 PL_expect = XTERM; /* e.g. print $fh "foo" */
5823 else if (strchr("&*<%", *s) && isIDFIRST_lazy_if(s+1,UTF))
5824 PL_expect = XTERM; /* e.g. print $fh &sub */
5825 else if (isIDFIRST_lazy_if(s,UTF)) {
5826 char tmpbuf[sizeof PL_tokenbuf];
5827 int t2;
5828 scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5458a98a 5829 if ((t2 = keyword(tmpbuf, len, 0))) {
90771dc0
NC
5830 /* binary operators exclude handle interpretations */
5831 switch (t2) {
5832 case -KEY_x:
5833 case -KEY_eq:
5834 case -KEY_ne:
5835 case -KEY_gt:
5836 case -KEY_lt:
5837 case -KEY_ge:
5838 case -KEY_le:
5839 case -KEY_cmp:
5840 break;
5841 default:
5842 PL_expect = XTERM; /* e.g. print $fh length() */
5843 break;
5844 }
5845 }
5846 else {
5847 PL_expect = XTERM; /* e.g. print $fh subr() */
84902520
TB
5848 }
5849 }
90771dc0
NC
5850 else if (isDIGIT(*s))
5851 PL_expect = XTERM; /* e.g. print $fh 3 */
5852 else if (*s == '.' && isDIGIT(s[1]))
5853 PL_expect = XTERM; /* e.g. print $fh .3 */
5854 else if ((*s == '?' || *s == '-' || *s == '+')
5855 && !isSPACE(s[1]) && s[1] != '=')
5856 PL_expect = XTERM; /* e.g. print $fh -1 */
5857 else if (*s == '/' && !isSPACE(s[1]) && s[1] != '='
5858 && s[1] != '/')
5859 PL_expect = XTERM; /* e.g. print $fh /.../
5860 XXX except DORDOR operator
5861 */
5862 else if (*s == '<' && s[1] == '<' && !isSPACE(s[2])
5863 && s[2] != '=')
5864 PL_expect = XTERM; /* print $fh <<"EOF" */
93a17b20 5865 }
bbce6d69 5866 }
3280af22 5867 PL_pending_ident = '$';
79072805 5868 TOKEN('$');
378cc40b
LW
5869
5870 case '@':
3280af22 5871 if (PL_expect == XOPERATOR)
bbce6d69 5872 no_op("Array", s);
3280af22
NIS
5873 PL_tokenbuf[0] = '@';
5874 s = scan_ident(s, PL_bufend, PL_tokenbuf + 1, sizeof PL_tokenbuf - 1, FALSE);
5875 if (!PL_tokenbuf[1]) {
bbce6d69 5876 PREREF('@');
5877 }
3280af22 5878 if (PL_lex_state == LEX_NORMAL)
29595ff2 5879 s = SKIPSPACE1(s);
3280af22 5880 if ((PL_expect != XREF || PL_oldoldbufptr == PL_last_lop) && intuit_more(s)) {
bbce6d69 5881 if (*s == '{')
3280af22 5882 PL_tokenbuf[0] = '%';
a0d0e21e
LW
5883
5884 /* Warn about @ where they meant $. */
041457d9
DM
5885 if (*s == '[' || *s == '{') {
5886 if (ckWARN(WARN_SYNTAX)) {
f54cb97a 5887 const char *t = s + 1;
7e2040f0 5888 while (*t && (isALNUM_lazy_if(t,UTF) || strchr(" \t$#+-'\"", *t)))
a0d0e21e
LW
5889 t++;
5890 if (*t == '}' || *t == ']') {
5891 t++;
29595ff2 5892 PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
9014280d 5893 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
599cee73 5894 "Scalar value %.*s better written as $%.*s",
36c7798d
DM
5895 (int)(t-PL_bufptr), PL_bufptr,
5896 (int)(t-PL_bufptr-1), PL_bufptr+1);
a0d0e21e 5897 }
93a17b20
LW
5898 }
5899 }
463ee0b2 5900 }
3280af22 5901 PL_pending_ident = '@';
79072805 5902 TERM('@');
378cc40b 5903
c963b151 5904 case '/': /* may be division, defined-or, or pattern */
6f33ba73
RGS
5905 if (PL_expect == XTERMORDORDOR && s[1] == '/') {
5906 s += 2;
5907 AOPERATOR(DORDOR);
5908 }
c963b151 5909 case '?': /* may either be conditional or pattern */
be25f609 5910 if (PL_expect == XOPERATOR) {
90771dc0 5911 char tmp = *s++;
c963b151 5912 if(tmp == '?') {
be25f609 5913 OPERATOR('?');
c963b151
BD
5914 }
5915 else {
5916 tmp = *s++;
5917 if(tmp == '/') {
5918 /* A // operator. */
5919 AOPERATOR(DORDOR);
5920 }
5921 else {
5922 s--;
5923 Mop(OP_DIVIDE);
5924 }
5925 }
5926 }
5927 else {
5928 /* Disable warning on "study /blah/" */
5929 if (PL_oldoldbufptr == PL_last_uni
5930 && (*PL_last_uni != 's' || s - PL_last_uni < 5
5931 || memNE(PL_last_uni, "study", 5)
5932 || isALNUM_lazy_if(PL_last_uni+5,UTF)
5933 ))
5934 check_uni();
5935 s = scan_pat(s,OP_MATCH);
5936 TERM(sublex_start());
5937 }
378cc40b
LW
5938
5939 case '.':
51882d45
GS
5940 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack
5941#ifdef PERL_STRICT_CR
5942 && s[1] == '\n'
5943#else
5944 && (s[1] == '\n' || (s[1] == '\r' && s[2] == '\n'))
5945#endif
5946 && (s == PL_linestart || s[-1] == '\n') )
5947 {
3280af22
NIS
5948 PL_lex_formbrack = 0;
5949 PL_expect = XSTATE;
79072805
LW
5950 goto rightbracket;
5951 }
be25f609 5952 if (PL_expect == XSTATE && s[1] == '.' && s[2] == '.') {
5953 s += 3;
5954 OPERATOR(YADAYADA);
5955 }
3280af22 5956 if (PL_expect == XOPERATOR || !isDIGIT(s[1])) {
90771dc0 5957 char tmp = *s++;
a687059c
LW
5958 if (*s == tmp) {
5959 s++;
2f3197b3
LW
5960 if (*s == tmp) {
5961 s++;
6154021b 5962 pl_yylval.ival = OPf_SPECIAL;
2f3197b3
LW
5963 }
5964 else
6154021b 5965 pl_yylval.ival = 0;
378cc40b 5966 OPERATOR(DOTDOT);
a687059c 5967 }
79072805 5968 Aop(OP_CONCAT);
378cc40b
LW
5969 }
5970 /* FALL THROUGH */
5971 case '0': case '1': case '2': case '3': case '4':
5972 case '5': case '6': case '7': case '8': case '9':
6154021b 5973 s = scan_num(s, &pl_yylval);
931e0695 5974 DEBUG_T( { printbuf("### Saw number in %s\n", s); } );
3280af22 5975 if (PL_expect == XOPERATOR)
8990e307 5976 no_op("Number",s);
79072805
LW
5977 TERM(THING);
5978
5979 case '\'':
5db06880 5980 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5981 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5982 if (PL_expect == XOPERATOR) {
5983 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5984 return deprecate_commaless_var_list();
a0d0e21e 5985 }
463ee0b2 5986 else
8990e307 5987 no_op("String",s);
463ee0b2 5988 }
79072805 5989 if (!s)
d4c19fe8 5990 missingterm(NULL);
6154021b 5991 pl_yylval.ival = OP_CONST;
79072805
LW
5992 TERM(sublex_start());
5993
5994 case '"':
5db06880 5995 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 5996 DEBUG_T( { printbuf("### Saw string before %s\n", s); } );
3280af22
NIS
5997 if (PL_expect == XOPERATOR) {
5998 if (PL_lex_formbrack && PL_lex_brackets == PL_lex_formbrack) {
8290c323 5999 return deprecate_commaless_var_list();
a0d0e21e 6000 }
463ee0b2 6001 else
8990e307 6002 no_op("String",s);
463ee0b2 6003 }
79072805 6004 if (!s)
d4c19fe8 6005 missingterm(NULL);
6154021b 6006 pl_yylval.ival = OP_CONST;
cfd0369c
NC
6007 /* FIXME. I think that this can be const if char *d is replaced by
6008 more localised variables. */
3280af22 6009 for (d = SvPV(PL_lex_stuff, len); len; len--, d++) {
63cd0674 6010 if (*d == '$' || *d == '@' || *d == '\\' || !UTF8_IS_INVARIANT((U8)*d)) {
6154021b 6011 pl_yylval.ival = OP_STRINGIFY;
4633a7c4
LW
6012 break;
6013 }
6014 }
79072805
LW
6015 TERM(sublex_start());
6016
6017 case '`':
5db06880 6018 s = scan_str(s,!!PL_madskills,FALSE);
931e0695 6019 DEBUG_T( { printbuf("### Saw backtick string before %s\n", s); } );
3280af22 6020 if (PL_expect == XOPERATOR)
8990e307 6021 no_op("Backticks",s);
79072805 6022 if (!s)
d4c19fe8 6023 missingterm(NULL);
9b201d7d 6024 readpipe_override();
79072805
LW
6025 TERM(sublex_start());
6026
6027 case '\\':
6028 s++;
a2a5de95
NC
6029 if (PL_lex_inwhat && isDIGIT(*s))
6030 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),"Can't use \\%c to mean $%c in expression",
6031 *s, *s);
3280af22 6032 if (PL_expect == XOPERATOR)
8990e307 6033 no_op("Backslash",s);
79072805
LW
6034 OPERATOR(REFGEN);
6035
a7cb1f99 6036 case 'v':
e526c9e6 6037 if (isDIGIT(s[1]) && PL_expect != XOPERATOR) {
f54cb97a 6038 char *start = s + 2;
dd629d5b 6039 while (isDIGIT(*start) || *start == '_')
a7cb1f99
GS
6040 start++;
6041 if (*start == '.' && isDIGIT(start[1])) {
6154021b 6042 s = scan_num(s, &pl_yylval);
a7cb1f99
GS
6043 TERM(THING);
6044 }
e526c9e6 6045 /* avoid v123abc() or $h{v1}, allow C<print v10;> */
6f33ba73
RGS
6046 else if (!isALPHA(*start) && (PL_expect == XTERM
6047 || PL_expect == XREF || PL_expect == XSTATE
6048 || PL_expect == XTERMORDORDOR)) {
9bde8eb0 6049 GV *const gv = gv_fetchpvn_flags(s, start - s, 0, SVt_PVCV);
e526c9e6 6050 if (!gv) {
6154021b 6051 s = scan_num(s, &pl_yylval);
e526c9e6
GS
6052 TERM(THING);
6053 }
6054 }
a7cb1f99
GS
6055 }
6056 goto keylookup;
79072805 6057 case 'x':
3280af22 6058 if (isDIGIT(s[1]) && PL_expect == XOPERATOR) {
79072805
LW
6059 s++;
6060 Mop(OP_REPEAT);
2f3197b3 6061 }
79072805
LW
6062 goto keylookup;
6063
378cc40b 6064 case '_':
79072805
LW
6065 case 'a': case 'A':
6066 case 'b': case 'B':
6067 case 'c': case 'C':
6068 case 'd': case 'D':
6069 case 'e': case 'E':
6070 case 'f': case 'F':
6071 case 'g': case 'G':
6072 case 'h': case 'H':
6073 case 'i': case 'I':
6074 case 'j': case 'J':
6075 case 'k': case 'K':
6076 case 'l': case 'L':
6077 case 'm': case 'M':
6078 case 'n': case 'N':
6079 case 'o': case 'O':
6080 case 'p': case 'P':
6081 case 'q': case 'Q':
6082 case 'r': case 'R':
6083 case 's': case 'S':
6084 case 't': case 'T':
6085 case 'u': case 'U':
a7cb1f99 6086 case 'V':
79072805
LW
6087 case 'w': case 'W':
6088 case 'X':
6089 case 'y': case 'Y':
6090 case 'z': case 'Z':
6091
49dc05e3 6092 keylookup: {
88e1f1a2 6093 bool anydelim;
90771dc0 6094 I32 tmp;
10edeb5d
JH
6095
6096 orig_keyword = 0;
6097 gv = NULL;
6098 gvp = NULL;
49dc05e3 6099
3280af22
NIS
6100 PL_bufptr = s;
6101 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
8ebc5c01 6102
6103 /* Some keywords can be followed by any delimiter, including ':' */
88e1f1a2 6104 anydelim = ((len == 1 && strchr("msyq", PL_tokenbuf[0])) ||
155aba94
GS
6105 (len == 2 && ((PL_tokenbuf[0] == 't' && PL_tokenbuf[1] == 'r') ||
6106 (PL_tokenbuf[0] == 'q' &&
6107 strchr("qwxr", PL_tokenbuf[1])))));
8ebc5c01 6108
6109 /* x::* is just a word, unless x is "CORE" */
88e1f1a2 6110 if (!anydelim && *s == ':' && s[1] == ':' && strNE(PL_tokenbuf, "CORE"))
4633a7c4
LW
6111 goto just_a_word;
6112
3643fb5f 6113 d = s;
3280af22 6114 while (d < PL_bufend && isSPACE(*d))
3643fb5f
CS
6115 d++; /* no comments skipped here, or s### is misparsed */
6116
748a9306 6117 /* Is this a word before a => operator? */
1c3923b3 6118 if (*d == '=' && d[1] == '>') {
748a9306 6119 CLINE;
6154021b 6120 pl_yylval.opval
d0a148a6
NC
6121 = (OP*)newSVOP(OP_CONST, 0,
6122 S_newSV_maybe_utf8(aTHX_ PL_tokenbuf, len));
6154021b 6123 pl_yylval.opval->op_private = OPpCONST_BARE;
748a9306
LW
6124 TERM(WORD);
6125 }
6126
88e1f1a2
JV
6127 /* Check for plugged-in keyword */
6128 {
6129 OP *o;
6130 int result;
6131 char *saved_bufptr = PL_bufptr;
6132 PL_bufptr = s;
6133 result = CALL_FPTR(PL_keyword_plugin)(aTHX_ PL_tokenbuf, len, &o);
6134 s = PL_bufptr;
6135 if (result == KEYWORD_PLUGIN_DECLINE) {
6136 /* not a plugged-in keyword */
6137 PL_bufptr = saved_bufptr;
6138 } else if (result == KEYWORD_PLUGIN_STMT) {
6139 pl_yylval.opval = o;
6140 CLINE;
6141 PL_expect = XSTATE;
6142 return REPORT(PLUGSTMT);
6143 } else if (result == KEYWORD_PLUGIN_EXPR) {
6144 pl_yylval.opval = o;
6145 CLINE;
6146 PL_expect = XOPERATOR;
6147 return REPORT(PLUGEXPR);
6148 } else {
6149 Perl_croak(aTHX_ "Bad plugin affecting keyword '%s'",
6150 PL_tokenbuf);
6151 }
6152 }
6153
6154 /* Check for built-in keyword */
6155 tmp = keyword(PL_tokenbuf, len, 0);
6156
6157 /* Is this a label? */
6158 if (!anydelim && PL_expect == XSTATE
6159 && d < PL_bufend && *d == ':' && *(d + 1) != ':') {
88e1f1a2
JV
6160 s = d + 1;
6161 pl_yylval.pval = CopLABEL_alloc(PL_tokenbuf);
6162 CLINE;
6163 TOKEN(LABEL);
6164 }
6165
a0d0e21e 6166 if (tmp < 0) { /* second-class keyword? */
cbbf8932
AL
6167 GV *ogv = NULL; /* override (winner) */
6168 GV *hgv = NULL; /* hidden (loser) */
3280af22 6169 if (PL_expect != XOPERATOR && (*s != ':' || s[1] != ':')) {
56f7f34b 6170 CV *cv;
90e5519e 6171 if ((gv = gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVCV)) &&
56f7f34b
CS
6172 (cv = GvCVu(gv)))
6173 {
6174 if (GvIMPORTED_CV(gv))
6175 ogv = gv;
6176 else if (! CvMETHOD(cv))
6177 hgv = gv;
6178 }
6179 if (!ogv &&
3280af22 6180 (gvp = (GV**)hv_fetch(PL_globalstash,PL_tokenbuf,len,FALSE)) &&
9e0d86f8 6181 (gv = *gvp) && isGV_with_GP(gv) &&
56f7f34b
CS
6182 GvCVu(gv) && GvIMPORTED_CV(gv))
6183 {
6184 ogv = gv;
6185 }
6186 }
6187 if (ogv) {
30fe34ed 6188 orig_keyword = tmp;
56f7f34b 6189 tmp = 0; /* overridden by import or by GLOBAL */
6e7b2336
GS
6190 }
6191 else if (gv && !gvp
6192 && -tmp==KEY_lock /* XXX generalizable kludge */
47f9f84c 6193 && GvCVu(gv))
6e7b2336
GS
6194 {
6195 tmp = 0; /* any sub overrides "weak" keyword */
a0d0e21e 6196 }
56f7f34b
CS
6197 else { /* no override */
6198 tmp = -tmp;
a2a5de95
NC
6199 if (tmp == KEY_dump) {
6200 Perl_ck_warner(aTHX_ packWARN(WARN_MISC),
6201 "dump() better written as CORE::dump()");
ac206dc8 6202 }
a0714e2c 6203 gv = NULL;
56f7f34b 6204 gvp = 0;
a2a5de95
NC
6205 if (hgv && tmp != KEY_x && tmp != KEY_CORE) /* never ambiguous */
6206 Perl_ck_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
6207 "Ambiguous call resolved as CORE::%s(), %s",
6208 GvENAME(hgv), "qualify as such or use &");
49dc05e3 6209 }
a0d0e21e
LW
6210 }
6211
6212 reserved_word:
6213 switch (tmp) {
79072805
LW
6214
6215 default: /* not a keyword */
0bfa2a8a
NC
6216 /* Trade off - by using this evil construction we can pull the
6217 variable gv into the block labelled keylookup. If not, then
6218 we have to give it function scope so that the goto from the
6219 earlier ':' case doesn't bypass the initialisation. */
6220 if (0) {
6221 just_a_word_zero_gv:
6222 gv = NULL;
6223 gvp = NULL;
8bee0991 6224 orig_keyword = 0;
0bfa2a8a 6225 }
93a17b20 6226 just_a_word: {
96e4d5b1 6227 SV *sv;
ce29ac45 6228 int pkgname = 0;
f54cb97a 6229 const char lastchar = (PL_bufptr == PL_oldoldbufptr ? 0 : PL_bufptr[-1]);
f7461760 6230 OP *rv2cv_op;
5069cc75 6231 CV *cv;
5db06880 6232#ifdef PERL_MAD
cd81e915 6233 SV *nextPL_nextwhite = 0;
5db06880
NC
6234#endif
6235
8990e307
LW
6236
6237 /* Get the rest if it looks like a package qualifier */
6238
155aba94 6239 if (*s == '\'' || (*s == ':' && s[1] == ':')) {
c3e0f903 6240 STRLEN morelen;
3280af22 6241 s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
c3e0f903
GS
6242 TRUE, &morelen);
6243 if (!morelen)
cea2e8a9 6244 Perl_croak(aTHX_ "Bad name after %s%s", PL_tokenbuf,
ec2ab091 6245 *s == '\'' ? "'" : "::");
c3e0f903 6246 len += morelen;
ce29ac45 6247 pkgname = 1;
a0d0e21e 6248 }
8990e307 6249
3280af22
NIS
6250 if (PL_expect == XOPERATOR) {
6251 if (PL_bufptr == PL_linestart) {
57843af0 6252 CopLINE_dec(PL_curcop);
f1f66076 6253 Perl_warner(aTHX_ packWARN(WARN_SEMICOLON), "%s", PL_warn_nosemi);
57843af0 6254 CopLINE_inc(PL_curcop);
463ee0b2
LW
6255 }
6256 else
54310121 6257 no_op("Bareword",s);
463ee0b2 6258 }
8990e307 6259
c3e0f903
GS
6260 /* Look for a subroutine with this name in current package,
6261 unless name is "Foo::", in which case Foo is a bearword
6262 (and a package name). */
6263
5db06880 6264 if (len > 2 && !PL_madskills &&
3280af22 6265 PL_tokenbuf[len - 2] == ':' && PL_tokenbuf[len - 1] == ':')
c3e0f903 6266 {
f776e3cd 6267 if (ckWARN(WARN_BAREWORD)
90e5519e 6268 && ! gv_fetchpvn_flags(PL_tokenbuf, len, 0, SVt_PVHV))
9014280d 6269 Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
599cee73 6270 "Bareword \"%s\" refers to nonexistent package",
3280af22 6271 PL_tokenbuf);
c3e0f903 6272 len -= 2;
3280af22 6273 PL_tokenbuf[len] = '\0';
a0714e2c 6274 gv = NULL;
c3e0f903
GS
6275 gvp = 0;
6276 }
6277 else {
62d55b22
NC
6278 if (!gv) {
6279 /* Mustn't actually add anything to a symbol table.
6280 But also don't want to "initialise" any placeholder
6281 constants that might already be there into full
6282 blown PVGVs with attached PVCV. */
90e5519e
NC
6283 gv = gv_fetchpvn_flags(PL_tokenbuf, len,
6284 GV_NOADD_NOINIT, SVt_PVCV);
62d55b22 6285 }
b3d904f3 6286 len = 0;
c3e0f903
GS
6287 }
6288
6289 /* if we saw a global override before, get the right name */
8990e307 6290
49dc05e3 6291 if (gvp) {
396482e1 6292 sv = newSVpvs("CORE::GLOBAL::");
3280af22 6293 sv_catpv(sv,PL_tokenbuf);
49dc05e3 6294 }
8a7a129d
NC
6295 else {
6296 /* If len is 0, newSVpv does strlen(), which is correct.
6297 If len is non-zero, then it will be the true length,
6298 and so the scalar will be created correctly. */
6299 sv = newSVpv(PL_tokenbuf,len);
6300 }
5db06880 6301#ifdef PERL_MAD
cd81e915
NC
6302 if (PL_madskills && !PL_thistoken) {
6303 char *start = SvPVX(PL_linestr) + PL_realtokenstart;
9ff8e806 6304 PL_thistoken = newSVpvn(start,s - start);
cd81e915 6305 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
6306 }
6307#endif
8990e307 6308
a0d0e21e
LW
6309 /* Presume this is going to be a bareword of some sort. */
6310
6311 CLINE;
6154021b
RGS
6312 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sv);
6313 pl_yylval.opval->op_private = OPpCONST_BARE;
8f8cf39c
JH
6314 /* UTF-8 package name? */
6315 if (UTF && !IN_BYTES &&
95a20fc0 6316 is_utf8_string((U8*)SvPVX_const(sv), SvCUR(sv)))
8f8cf39c 6317 SvUTF8_on(sv);
a0d0e21e 6318
c3e0f903
GS
6319 /* And if "Foo::", then that's what it certainly is. */
6320
6321 if (len)
6322 goto safe_bareword;
6323
f7461760
Z
6324 cv = NULL;
6325 {
6326 OP *const_op = newSVOP(OP_CONST, 0, SvREFCNT_inc(sv));
6327 const_op->op_private = OPpCONST_BARE;
6328 rv2cv_op = newCVREF(0, const_op);
6329 }
6330 if (rv2cv_op->op_type == OP_RV2CV &&
6331 (rv2cv_op->op_flags & OPf_KIDS)) {
6332 OP *rv_op = cUNOPx(rv2cv_op)->op_first;
6333 switch (rv_op->op_type) {
6334 case OP_CONST: {
6335 SV *sv = cSVOPx_sv(rv_op);
6336 if (SvROK(sv) && SvTYPE(SvRV(sv)) == SVt_PVCV)
6337 cv = (CV*)SvRV(sv);
6338 } break;
6339 case OP_GV: {
6340 GV *gv = cGVOPx_gv(rv_op);
6341 CV *maybe_cv = GvCVu(gv);
6342 if (maybe_cv && SvTYPE((SV*)maybe_cv) == SVt_PVCV)
6343 cv = maybe_cv;
6344 } break;
6345 }
6346 }
5069cc75 6347
8990e307
LW
6348 /* See if it's the indirect object for a list operator. */
6349
3280af22
NIS
6350 if (PL_oldoldbufptr &&
6351 PL_oldoldbufptr < PL_bufptr &&
65cec589
GS
6352 (PL_oldoldbufptr == PL_last_lop
6353 || PL_oldoldbufptr == PL_last_uni) &&
a0d0e21e 6354 /* NO SKIPSPACE BEFORE HERE! */
a9ef352a
GS
6355 (PL_expect == XREF ||
6356 ((PL_opargs[PL_last_lop_op] >> OASHIFT)& 7) == OA_FILEREF))
a0d0e21e 6357 {
748a9306
LW
6358 bool immediate_paren = *s == '(';
6359
a0d0e21e 6360 /* (Now we can afford to cross potential line boundary.) */
cd81e915 6361 s = SKIPSPACE2(s,nextPL_nextwhite);
5db06880 6362#ifdef PERL_MAD
cd81e915 6363 PL_nextwhite = nextPL_nextwhite; /* assume no & deception */
5db06880 6364#endif
a0d0e21e
LW
6365
6366 /* Two barewords in a row may indicate method call. */
6367
62d55b22 6368 if ((isIDFIRST_lazy_if(s,UTF) || *s == '$') &&
f7461760
Z
6369 (tmp = intuit_method(s, gv, cv))) {
6370 op_free(rv2cv_op);
bbf60fe6 6371 return REPORT(tmp);
f7461760 6372 }
a0d0e21e
LW
6373
6374 /* If not a declared subroutine, it's an indirect object. */
6375 /* (But it's an indir obj regardless for sort.) */
7294df96 6376 /* Also, if "_" follows a filetest operator, it's a bareword */
a0d0e21e 6377
7294df96
RGS
6378 if (
6379 ( !immediate_paren && (PL_last_lop_op == OP_SORT ||
f7461760 6380 (!cv &&
a9ef352a 6381 (PL_last_lop_op != OP_MAPSTART &&
f0670693 6382 PL_last_lop_op != OP_GREPSTART))))
7294df96
RGS
6383 || (PL_tokenbuf[0] == '_' && PL_tokenbuf[1] == '\0'
6384 && ((PL_opargs[PL_last_lop_op] & OA_CLASS_MASK) == OA_FILESTATOP))
6385 )
a9ef352a 6386 {
3280af22 6387 PL_expect = (PL_last_lop == PL_oldoldbufptr) ? XTERM : XOPERATOR;
748a9306 6388 goto bareword;
93a17b20
LW
6389 }
6390 }
8990e307 6391
3280af22 6392 PL_expect = XOPERATOR;
5db06880
NC
6393#ifdef PERL_MAD
6394 if (isSPACE(*s))
cd81e915
NC
6395 s = SKIPSPACE2(s,nextPL_nextwhite);
6396 PL_nextwhite = nextPL_nextwhite;
5db06880 6397#else
8990e307 6398 s = skipspace(s);
5db06880 6399#endif
1c3923b3
GS
6400
6401 /* Is this a word before a => operator? */
ce29ac45 6402 if (*s == '=' && s[1] == '>' && !pkgname) {
f7461760 6403 op_free(rv2cv_op);
1c3923b3 6404 CLINE;
6154021b 6405 sv_setpv(((SVOP*)pl_yylval.opval)->op_sv, PL_tokenbuf);
0064a8a9 6406 if (UTF && !IN_BYTES && is_utf8_string((U8*)PL_tokenbuf, len))
6154021b 6407 SvUTF8_on(((SVOP*)pl_yylval.opval)->op_sv);
1c3923b3
GS
6408 TERM(WORD);
6409 }
6410
6411 /* If followed by a paren, it's certainly a subroutine. */
93a17b20 6412 if (*s == '(') {
79072805 6413 CLINE;
5069cc75 6414 if (cv) {
c35e046a
AL
6415 d = s + 1;
6416 while (SPACE_OR_TAB(*d))
6417 d++;
f7461760 6418 if (*d == ')' && (sv = cv_const_sv(cv))) {
96e4d5b1 6419 s = d + 1;
c631f32b 6420 goto its_constant;
96e4d5b1 6421 }
6422 }
5db06880
NC
6423#ifdef PERL_MAD
6424 if (PL_madskills) {
cd81e915
NC
6425 PL_nextwhite = PL_thiswhite;
6426 PL_thiswhite = 0;
5db06880 6427 }
cd81e915 6428 start_force(PL_curforce);
5db06880 6429#endif
6154021b 6430 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6431 PL_expect = XOPERATOR;
5db06880
NC
6432#ifdef PERL_MAD
6433 if (PL_madskills) {
cd81e915
NC
6434 PL_nextwhite = nextPL_nextwhite;
6435 curmad('X', PL_thistoken);
6b29d1f5 6436 PL_thistoken = newSVpvs("");
5db06880
NC
6437 }
6438#endif
f7461760 6439 op_free(rv2cv_op);
93a17b20 6440 force_next(WORD);
6154021b 6441 pl_yylval.ival = 0;
463ee0b2 6442 TOKEN('&');
79072805 6443 }
93a17b20 6444
a0d0e21e 6445 /* If followed by var or block, call it a method (unless sub) */
8990e307 6446
f7461760
Z
6447 if ((*s == '$' || *s == '{') && !cv) {
6448 op_free(rv2cv_op);
3280af22
NIS
6449 PL_last_lop = PL_oldbufptr;
6450 PL_last_lop_op = OP_METHOD;
93a17b20 6451 PREBLOCK(METHOD);
463ee0b2
LW
6452 }
6453
8990e307
LW
6454 /* If followed by a bareword, see if it looks like indir obj. */
6455
30fe34ed
RGS
6456 if (!orig_keyword
6457 && (isIDFIRST_lazy_if(s,UTF) || *s == '$')
f7461760
Z
6458 && (tmp = intuit_method(s, gv, cv))) {
6459 op_free(rv2cv_op);
bbf60fe6 6460 return REPORT(tmp);
f7461760 6461 }
93a17b20 6462
8990e307
LW
6463 /* Not a method, so call it a subroutine (if defined) */
6464
5069cc75 6465 if (cv) {
9b387841
NC
6466 if (lastchar == '-')
6467 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6468 "Ambiguous use of -%s resolved as -&%s()",
6469 PL_tokenbuf, PL_tokenbuf);
89bfa8cd 6470 /* Check for a constant sub */
f7461760 6471 if ((sv = cv_const_sv(cv))) {
96e4d5b1 6472 its_constant:
f7461760 6473 op_free(rv2cv_op);
6154021b
RGS
6474 SvREFCNT_dec(((SVOP*)pl_yylval.opval)->op_sv);
6475 ((SVOP*)pl_yylval.opval)->op_sv = SvREFCNT_inc_simple(sv);
6476 pl_yylval.opval->op_private = 0;
96e4d5b1 6477 TOKEN(WORD);
89bfa8cd 6478 }
6479
6154021b 6480 op_free(pl_yylval.opval);
f7461760 6481 pl_yylval.opval = rv2cv_op;
6154021b 6482 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
7a52d87a 6483 PL_last_lop = PL_oldbufptr;
bf848113 6484 PL_last_lop_op = OP_ENTERSUB;
4633a7c4 6485 /* Is there a prototype? */
5db06880
NC
6486 if (
6487#ifdef PERL_MAD
6488 cv &&
6489#endif
d9f2850e
RGS
6490 SvPOK(cv))
6491 {
5f66b61c 6492 STRLEN protolen;
daba3364 6493 const char *proto = SvPV_const(MUTABLE_SV(cv), protolen);
5f66b61c 6494 if (!protolen)
4633a7c4 6495 TERM(FUNC0SUB);
8c28b960 6496 if ((*proto == '$' || *proto == '_') && proto[1] == '\0')
4633a7c4 6497 OPERATOR(UNIOPSUB);
0f5d0394
AE
6498 while (*proto == ';')
6499 proto++;
7a52d87a 6500 if (*proto == '&' && *s == '{') {
49a54bbe
NC
6501 if (PL_curstash)
6502 sv_setpvs(PL_subname, "__ANON__");
6503 else
6504 sv_setpvs(PL_subname, "__ANON__::__ANON__");
4633a7c4
LW
6505 PREBLOCK(LSTOPSUB);
6506 }
a9ef352a 6507 }
5db06880
NC
6508#ifdef PERL_MAD
6509 {
6510 if (PL_madskills) {
cd81e915
NC
6511 PL_nextwhite = PL_thiswhite;
6512 PL_thiswhite = 0;
5db06880 6513 }
cd81e915 6514 start_force(PL_curforce);
6154021b 6515 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880
NC
6516 PL_expect = XTERM;
6517 if (PL_madskills) {
cd81e915
NC
6518 PL_nextwhite = nextPL_nextwhite;
6519 curmad('X', PL_thistoken);
6b29d1f5 6520 PL_thistoken = newSVpvs("");
5db06880
NC
6521 }
6522 force_next(WORD);
6523 TOKEN(NOAMP);
6524 }
6525 }
6526
6527 /* Guess harder when madskills require "best effort". */
6528 if (PL_madskills && (!gv || !GvCVu(gv))) {
6529 int probable_sub = 0;
6530 if (strchr("\"'`$@%0123456789!*+{[<", *s))
6531 probable_sub = 1;
6532 else if (isALPHA(*s)) {
6533 char tmpbuf[1024];
6534 STRLEN tmplen;
6535 d = s;
6536 d = scan_word(d, tmpbuf, sizeof tmpbuf, TRUE, &tmplen);
5458a98a 6537 if (!keyword(tmpbuf, tmplen, 0))
5db06880
NC
6538 probable_sub = 1;
6539 else {
6540 while (d < PL_bufend && isSPACE(*d))
6541 d++;
6542 if (*d == '=' && d[1] == '>')
6543 probable_sub = 1;
6544 }
6545 }
6546 if (probable_sub) {
7a6d04f4 6547 gv = gv_fetchpv(PL_tokenbuf, GV_ADD, SVt_PVCV);
6154021b 6548 op_free(pl_yylval.opval);
f7461760 6549 pl_yylval.opval = rv2cv_op;
6154021b 6550 pl_yylval.opval->op_private |= OPpENTERSUB_NOPAREN;
5db06880
NC
6551 PL_last_lop = PL_oldbufptr;
6552 PL_last_lop_op = OP_ENTERSUB;
cd81e915
NC
6553 PL_nextwhite = PL_thiswhite;
6554 PL_thiswhite = 0;
6555 start_force(PL_curforce);
6154021b 6556 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
5db06880 6557 PL_expect = XTERM;
cd81e915
NC
6558 PL_nextwhite = nextPL_nextwhite;
6559 curmad('X', PL_thistoken);
6b29d1f5 6560 PL_thistoken = newSVpvs("");
5db06880
NC
6561 force_next(WORD);
6562 TOKEN(NOAMP);
6563 }
6564#else
6154021b 6565 NEXTVAL_NEXTTOKE.opval = pl_yylval.opval;
3280af22 6566 PL_expect = XTERM;
8990e307
LW
6567 force_next(WORD);
6568 TOKEN(NOAMP);
5db06880 6569#endif
8990e307 6570 }
748a9306 6571
8990e307
LW
6572 /* Call it a bare word */
6573
5603f27d 6574 if (PL_hints & HINT_STRICT_SUBS)
6154021b 6575 pl_yylval.opval->op_private |= OPpCONST_STRICT;
5603f27d 6576 else {
9a073a1d
RGS
6577 bareword:
6578 /* after "print" and similar functions (corresponding to
6579 * "F? L" in opcode.pl), whatever wasn't already parsed as
6580 * a filehandle should be subject to "strict subs".
6581 * Likewise for the optional indirect-object argument to system
6582 * or exec, which can't be a bareword */
6583 if ((PL_last_lop_op == OP_PRINT
6584 || PL_last_lop_op == OP_PRTF
6585 || PL_last_lop_op == OP_SAY
6586 || PL_last_lop_op == OP_SYSTEM
6587 || PL_last_lop_op == OP_EXEC)
6588 && (PL_hints & HINT_STRICT_SUBS))
6589 pl_yylval.opval->op_private |= OPpCONST_STRICT;
041457d9
DM
6590 if (lastchar != '-') {
6591 if (ckWARN(WARN_RESERVED)) {
c35e046a
AL
6592 d = PL_tokenbuf;
6593 while (isLOWER(*d))
6594 d++;
da51bb9b 6595 if (!*d && !gv_stashpv(PL_tokenbuf, 0))
9014280d 6596 Perl_warner(aTHX_ packWARN(WARN_RESERVED), PL_warn_reserved,
5603f27d
GS
6597 PL_tokenbuf);
6598 }
748a9306
LW
6599 }
6600 }
f7461760 6601 op_free(rv2cv_op);
c3e0f903
GS
6602
6603 safe_bareword:
9b387841
NC
6604 if ((lastchar == '*' || lastchar == '%' || lastchar == '&')) {
6605 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6606 "Operator or semicolon missing before %c%s",
6607 lastchar, PL_tokenbuf);
6608 Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
6609 "Ambiguous use of %c resolved as operator %c",
6610 lastchar, lastchar);
748a9306 6611 }
93a17b20 6612 TOKEN(WORD);
79072805 6613 }
79072805 6614
68dc0745 6615 case KEY___FILE__:
6154021b 6616 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
ed094faf 6617 newSVpv(CopFILE(PL_curcop),0));
46fc3d4c 6618 TERM(THING);
6619
79072805 6620 case KEY___LINE__:
6154021b 6621 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
57843af0 6622 Perl_newSVpvf(aTHX_ "%"IVdf, (IV)CopLINE(PL_curcop)));
79072805 6623 TERM(THING);
68dc0745 6624
6625 case KEY___PACKAGE__:
6154021b 6626 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0,
3280af22 6627 (PL_curstash
5aaec2b4 6628 ? newSVhek(HvNAME_HEK(PL_curstash))
3280af22 6629 : &PL_sv_undef));
79072805 6630 TERM(THING);
79072805 6631
e50aee73 6632 case KEY___DATA__:
79072805
LW
6633 case KEY___END__: {
6634 GV *gv;
3280af22 6635 if (PL_rsfp && (!PL_in_eval || PL_tokenbuf[2] == 'D')) {
bfed75c6 6636 const char *pname = "main";
3280af22 6637 if (PL_tokenbuf[2] == 'D')
bfcb3514 6638 pname = HvNAME_get(PL_curstash ? PL_curstash : PL_defstash);
f776e3cd
NC
6639 gv = gv_fetchpv(Perl_form(aTHX_ "%s::DATA", pname), GV_ADD,
6640 SVt_PVIO);
a5f75d66 6641 GvMULTI_on(gv);
79072805 6642 if (!GvIO(gv))
a0d0e21e 6643 GvIOp(gv) = newIO();
3280af22 6644 IoIFP(GvIOp(gv)) = PL_rsfp;
a0d0e21e
LW
6645#if defined(HAS_FCNTL) && defined(F_SETFD)
6646 {
f54cb97a 6647 const int fd = PerlIO_fileno(PL_rsfp);
a0d0e21e
LW
6648 fcntl(fd,F_SETFD,fd >= 3);
6649 }
79072805 6650#endif
fd049845 6651 /* Mark this internal pseudo-handle as clean */
6652 IoFLAGS(GvIOp(gv)) |= IOf_UNTAINT;
4c84d7f2 6653 if ((PerlIO*)PL_rsfp == PerlIO_stdin())
50952442 6654 IoTYPE(GvIOp(gv)) = IoTYPE_STD;
79072805 6655 else
50952442 6656 IoTYPE(GvIOp(gv)) = IoTYPE_RDONLY;
c39cd008
GS
6657#if defined(WIN32) && !defined(PERL_TEXTMODE_SCRIPTS)
6658 /* if the script was opened in binmode, we need to revert
53129d29 6659 * it to text mode for compatibility; but only iff it has CRs
c39cd008 6660 * XXX this is a questionable hack at best. */
53129d29
GS
6661 if (PL_bufend-PL_bufptr > 2
6662 && PL_bufend[-1] == '\n' && PL_bufend[-2] == '\r')
c39cd008
GS
6663 {
6664 Off_t loc = 0;
50952442 6665 if (IoTYPE(GvIOp(gv)) == IoTYPE_RDONLY) {
c39cd008
GS
6666 loc = PerlIO_tell(PL_rsfp);
6667 (void)PerlIO_seek(PL_rsfp, 0L, 0);
6668 }
2986a63f
JH
6669#ifdef NETWARE
6670 if (PerlLIO_setmode(PL_rsfp, O_TEXT) != -1) {
6671#else
c39cd008 6672 if (PerlLIO_setmode(PerlIO_fileno(PL_rsfp), O_TEXT) != -1) {
2986a63f 6673#endif /* NETWARE */
1143fce0
JH
6674#ifdef PERLIO_IS_STDIO /* really? */
6675# if defined(__BORLANDC__)
cb359b41
JH
6676 /* XXX see note in do_binmode() */
6677 ((FILE*)PL_rsfp)->flags &= ~_F_BIN;
1143fce0
JH
6678# endif
6679#endif
c39cd008
GS
6680 if (loc > 0)
6681 PerlIO_seek(PL_rsfp, loc, 0);
6682 }
6683 }
6684#endif
7948272d 6685#ifdef PERLIO_LAYERS
52d2e0f4
JH
6686 if (!IN_BYTES) {
6687 if (UTF)
6688 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL, ":utf8");
6689 else if (PL_encoding) {
6690 SV *name;
6691 dSP;
6692 ENTER;
6693 SAVETMPS;
6694 PUSHMARK(sp);
6695 EXTEND(SP, 1);
6696 XPUSHs(PL_encoding);
6697 PUTBACK;
6698 call_method("name", G_SCALAR);
6699 SPAGAIN;
6700 name = POPs;
6701 PUTBACK;
bfed75c6 6702 PerlIO_apply_layers(aTHX_ PL_rsfp, NULL,
52d2e0f4 6703 Perl_form(aTHX_ ":encoding(%"SVf")",
be2597df 6704 SVfARG(name)));
52d2e0f4
JH
6705 FREETMPS;
6706 LEAVE;
6707 }
6708 }
7948272d 6709#endif
5db06880
NC
6710#ifdef PERL_MAD
6711 if (PL_madskills) {
cd81e915
NC
6712 if (PL_realtokenstart >= 0) {
6713 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
6714 if (!PL_endwhite)
6b29d1f5 6715 PL_endwhite = newSVpvs("");
cd81e915
NC
6716 sv_catsv(PL_endwhite, PL_thiswhite);
6717 PL_thiswhite = 0;
6718 sv_catpvn(PL_endwhite, tstart, PL_bufend - tstart);
6719 PL_realtokenstart = -1;
5db06880 6720 }
5cc814fd
NC
6721 while ((s = filter_gets(PL_endwhite, SvCUR(PL_endwhite)))
6722 != NULL) ;
5db06880
NC
6723 }
6724#endif
4608196e 6725 PL_rsfp = NULL;
79072805
LW
6726 }
6727 goto fake_eof;
e929a76b 6728 }
de3bb511 6729
8990e307 6730 case KEY_AUTOLOAD:
ed6116ce 6731 case KEY_DESTROY:
79072805 6732 case KEY_BEGIN:
3c10abe3 6733 case KEY_UNITCHECK:
7d30b5c4 6734 case KEY_CHECK:
7d07dbc2 6735 case KEY_INIT:
7d30b5c4 6736 case KEY_END:
3280af22
NIS
6737 if (PL_expect == XSTATE) {
6738 s = PL_bufptr;
93a17b20 6739 goto really_sub;
79072805
LW
6740 }
6741 goto just_a_word;
6742
a0d0e21e
LW
6743 case KEY_CORE:
6744 if (*s == ':' && s[1] == ':') {
6745 s += 2;
748a9306 6746 d = s;
3280af22 6747 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, FALSE, &len);
5458a98a 6748 if (!(tmp = keyword(PL_tokenbuf, len, 0)))
6798c92b 6749 Perl_croak(aTHX_ "CORE::%s is not a keyword", PL_tokenbuf);
a0d0e21e
LW
6750 if (tmp < 0)
6751 tmp = -tmp;
850e8516 6752 else if (tmp == KEY_require || tmp == KEY_do)
a72a1c8b 6753 /* that's a way to remember we saw "CORE::" */
850e8516 6754 orig_keyword = tmp;
a0d0e21e
LW
6755 goto reserved_word;
6756 }
6757 goto just_a_word;
6758
463ee0b2
LW
6759 case KEY_abs:
6760 UNI(OP_ABS);
6761
79072805
LW
6762 case KEY_alarm:
6763 UNI(OP_ALARM);
6764
6765 case KEY_accept:
a0d0e21e 6766 LOP(OP_ACCEPT,XTERM);
79072805 6767
463ee0b2
LW
6768 case KEY_and:
6769 OPERATOR(ANDOP);
6770
79072805 6771 case KEY_atan2:
a0d0e21e 6772 LOP(OP_ATAN2,XTERM);
85e6fe83 6773
79072805 6774 case KEY_bind:
a0d0e21e 6775 LOP(OP_BIND,XTERM);
79072805
LW
6776
6777 case KEY_binmode:
1c1fc3ea 6778 LOP(OP_BINMODE,XTERM);
79072805
LW
6779
6780 case KEY_bless:
a0d0e21e 6781 LOP(OP_BLESS,XTERM);
79072805 6782
0d863452
RH
6783 case KEY_break:
6784 FUN0(OP_BREAK);
6785
79072805
LW
6786 case KEY_chop:
6787 UNI(OP_CHOP);
6788
6789 case KEY_continue:
0d863452
RH
6790 /* When 'use switch' is in effect, continue has a dual
6791 life as a control operator. */
6792 {
ef89dcc3 6793 if (!FEATURE_IS_ENABLED("switch"))
0d863452
RH
6794 PREBLOCK(CONTINUE);
6795 else {
6796 /* We have to disambiguate the two senses of
6797 "continue". If the next token is a '{' then
6798 treat it as the start of a continue block;
6799 otherwise treat it as a control operator.
6800 */
6801 s = skipspace(s);
6802 if (*s == '{')
79072805 6803 PREBLOCK(CONTINUE);
0d863452
RH
6804 else
6805 FUN0(OP_CONTINUE);
6806 }
6807 }
79072805
LW
6808
6809 case KEY_chdir:
fafc274c
NC
6810 /* may use HOME */
6811 (void)gv_fetchpvs("ENV", GV_ADD|GV_NOTQUAL, SVt_PVHV);
79072805
LW
6812 UNI(OP_CHDIR);
6813
6814 case KEY_close:
6815 UNI(OP_CLOSE);
6816
6817 case KEY_closedir:
6818 UNI(OP_CLOSEDIR);
6819
6820 case KEY_cmp:
6821 Eop(OP_SCMP);
6822
6823 case KEY_caller:
6824 UNI(OP_CALLER);
6825
6826 case KEY_crypt:
6827#ifdef FCRYPT
f4c556ac
GS
6828 if (!PL_cryptseen) {
6829 PL_cryptseen = TRUE;
de3bb511 6830 init_des();
f4c556ac 6831 }
a687059c 6832#endif
a0d0e21e 6833 LOP(OP_CRYPT,XTERM);
79072805
LW
6834
6835 case KEY_chmod:
a0d0e21e 6836 LOP(OP_CHMOD,XTERM);
79072805
LW
6837
6838 case KEY_chown:
a0d0e21e 6839 LOP(OP_CHOWN,XTERM);
79072805
LW
6840
6841 case KEY_connect:
a0d0e21e 6842 LOP(OP_CONNECT,XTERM);
79072805 6843
463ee0b2
LW
6844 case KEY_chr:
6845 UNI(OP_CHR);
6846
79072805
LW
6847 case KEY_cos:
6848 UNI(OP_COS);
6849
6850 case KEY_chroot:
6851 UNI(OP_CHROOT);
6852
0d863452
RH
6853 case KEY_default:
6854 PREBLOCK(DEFAULT);
6855
79072805 6856 case KEY_do:
29595ff2 6857 s = SKIPSPACE1(s);
79072805 6858 if (*s == '{')
a0d0e21e 6859 PRETERMBLOCK(DO);
79072805 6860 if (*s != '\'')
89c5585f 6861 s = force_word(s,WORD,TRUE,TRUE,FALSE);
850e8516
RGS
6862 if (orig_keyword == KEY_do) {
6863 orig_keyword = 0;
6154021b 6864 pl_yylval.ival = 1;
850e8516
RGS
6865 }
6866 else
6154021b 6867 pl_yylval.ival = 0;
378cc40b 6868 OPERATOR(DO);
79072805
LW
6869
6870 case KEY_die:
3280af22 6871 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 6872 LOP(OP_DIE,XTERM);
79072805
LW
6873
6874 case KEY_defined:
6875 UNI(OP_DEFINED);
6876
6877 case KEY_delete:
a0d0e21e 6878 UNI(OP_DELETE);
79072805
LW
6879
6880 case KEY_dbmopen:
5c1737d1 6881 gv_fetchpvs("AnyDBM_File::ISA", GV_ADDMULTI, SVt_PVAV);
a0d0e21e 6882 LOP(OP_DBMOPEN,XTERM);
79072805
LW
6883
6884 case KEY_dbmclose:
6885 UNI(OP_DBMCLOSE);
6886
6887 case KEY_dump:
a0d0e21e 6888 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
6889 LOOPX(OP_DUMP);
6890
6891 case KEY_else:
6892 PREBLOCK(ELSE);
6893
6894 case KEY_elsif:
6154021b 6895 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
6896 OPERATOR(ELSIF);
6897
6898 case KEY_eq:
6899 Eop(OP_SEQ);
6900
a0d0e21e
LW
6901 case KEY_exists:
6902 UNI(OP_EXISTS);
4e553d73 6903
79072805 6904 case KEY_exit:
5db06880
NC
6905 if (PL_madskills)
6906 UNI(OP_INT);
79072805
LW
6907 UNI(OP_EXIT);
6908
6909 case KEY_eval:
29595ff2 6910 s = SKIPSPACE1(s);
32e2a35d
RGS
6911 if (*s == '{') { /* block eval */
6912 PL_expect = XTERMBLOCK;
6913 UNIBRACK(OP_ENTERTRY);
6914 }
6915 else { /* string eval */
6916 PL_expect = XTERM;
6917 UNIBRACK(OP_ENTEREVAL);
6918 }
79072805
LW
6919
6920 case KEY_eof:
6921 UNI(OP_EOF);
6922
6923 case KEY_exp:
6924 UNI(OP_EXP);
6925
6926 case KEY_each:
6927 UNI(OP_EACH);
6928
6929 case KEY_exec:
a0d0e21e 6930 LOP(OP_EXEC,XREF);
79072805
LW
6931
6932 case KEY_endhostent:
6933 FUN0(OP_EHOSTENT);
6934
6935 case KEY_endnetent:
6936 FUN0(OP_ENETENT);
6937
6938 case KEY_endservent:
6939 FUN0(OP_ESERVENT);
6940
6941 case KEY_endprotoent:
6942 FUN0(OP_EPROTOENT);
6943
6944 case KEY_endpwent:
6945 FUN0(OP_EPWENT);
6946
6947 case KEY_endgrent:
6948 FUN0(OP_EGRENT);
6949
6950 case KEY_for:
6951 case KEY_foreach:
6154021b 6952 pl_yylval.ival = CopLINE(PL_curcop);
29595ff2 6953 s = SKIPSPACE1(s);
7e2040f0 6954 if (PL_expect == XSTATE && isIDFIRST_lazy_if(s,UTF)) {
55497cff 6955 char *p = s;
5db06880
NC
6956#ifdef PERL_MAD
6957 int soff = s - SvPVX(PL_linestr); /* for skipspace realloc */
6958#endif
6959
3280af22 6960 if ((PL_bufend - p) >= 3 &&
55497cff 6961 strnEQ(p, "my", 2) && isSPACE(*(p + 2)))
6962 p += 2;
77ca0c92
LW
6963 else if ((PL_bufend - p) >= 4 &&
6964 strnEQ(p, "our", 3) && isSPACE(*(p + 3)))
6965 p += 3;
29595ff2 6966 p = PEEKSPACE(p);
7e2040f0 6967 if (isIDFIRST_lazy_if(p,UTF)) {
77ca0c92
LW
6968 p = scan_ident(p, PL_bufend,
6969 PL_tokenbuf, sizeof PL_tokenbuf, TRUE);
29595ff2 6970 p = PEEKSPACE(p);
77ca0c92
LW
6971 }
6972 if (*p != '$')
cea2e8a9 6973 Perl_croak(aTHX_ "Missing $ on loop variable");
5db06880
NC
6974#ifdef PERL_MAD
6975 s = SvPVX(PL_linestr) + soff;
6976#endif
55497cff 6977 }
79072805
LW
6978 OPERATOR(FOR);
6979
6980 case KEY_formline:
a0d0e21e 6981 LOP(OP_FORMLINE,XTERM);
79072805
LW
6982
6983 case KEY_fork:
6984 FUN0(OP_FORK);
6985
6986 case KEY_fcntl:
a0d0e21e 6987 LOP(OP_FCNTL,XTERM);
79072805
LW
6988
6989 case KEY_fileno:
6990 UNI(OP_FILENO);
6991
6992 case KEY_flock:
a0d0e21e 6993 LOP(OP_FLOCK,XTERM);
79072805
LW
6994
6995 case KEY_gt:
6996 Rop(OP_SGT);
6997
6998 case KEY_ge:
6999 Rop(OP_SGE);
7000
7001 case KEY_grep:
2c38e13d 7002 LOP(OP_GREPSTART, XREF);
79072805
LW
7003
7004 case KEY_goto:
a0d0e21e 7005 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7006 LOOPX(OP_GOTO);
7007
7008 case KEY_gmtime:
7009 UNI(OP_GMTIME);
7010
7011 case KEY_getc:
6f33ba73 7012 UNIDOR(OP_GETC);
79072805
LW
7013
7014 case KEY_getppid:
7015 FUN0(OP_GETPPID);
7016
7017 case KEY_getpgrp:
7018 UNI(OP_GETPGRP);
7019
7020 case KEY_getpriority:
a0d0e21e 7021 LOP(OP_GETPRIORITY,XTERM);
79072805
LW
7022
7023 case KEY_getprotobyname:
7024 UNI(OP_GPBYNAME);
7025
7026 case KEY_getprotobynumber:
a0d0e21e 7027 LOP(OP_GPBYNUMBER,XTERM);
79072805
LW
7028
7029 case KEY_getprotoent:
7030 FUN0(OP_GPROTOENT);
7031
7032 case KEY_getpwent:
7033 FUN0(OP_GPWENT);
7034
7035 case KEY_getpwnam:
ff68c719 7036 UNI(OP_GPWNAM);
79072805
LW
7037
7038 case KEY_getpwuid:
ff68c719 7039 UNI(OP_GPWUID);
79072805
LW
7040
7041 case KEY_getpeername:
7042 UNI(OP_GETPEERNAME);
7043
7044 case KEY_gethostbyname:
7045 UNI(OP_GHBYNAME);
7046
7047 case KEY_gethostbyaddr:
a0d0e21e 7048 LOP(OP_GHBYADDR,XTERM);
79072805
LW
7049
7050 case KEY_gethostent:
7051 FUN0(OP_GHOSTENT);
7052
7053 case KEY_getnetbyname:
7054 UNI(OP_GNBYNAME);
7055
7056 case KEY_getnetbyaddr:
a0d0e21e 7057 LOP(OP_GNBYADDR,XTERM);
79072805
LW
7058
7059 case KEY_getnetent:
7060 FUN0(OP_GNETENT);
7061
7062 case KEY_getservbyname:
a0d0e21e 7063 LOP(OP_GSBYNAME,XTERM);
79072805
LW
7064
7065 case KEY_getservbyport:
a0d0e21e 7066 LOP(OP_GSBYPORT,XTERM);
79072805
LW
7067
7068 case KEY_getservent:
7069 FUN0(OP_GSERVENT);
7070
7071 case KEY_getsockname:
7072 UNI(OP_GETSOCKNAME);
7073
7074 case KEY_getsockopt:
a0d0e21e 7075 LOP(OP_GSOCKOPT,XTERM);
79072805
LW
7076
7077 case KEY_getgrent:
7078 FUN0(OP_GGRENT);
7079
7080 case KEY_getgrnam:
ff68c719 7081 UNI(OP_GGRNAM);
79072805
LW
7082
7083 case KEY_getgrgid:
ff68c719 7084 UNI(OP_GGRGID);
79072805
LW
7085
7086 case KEY_getlogin:
7087 FUN0(OP_GETLOGIN);
7088
0d863452 7089 case KEY_given:
6154021b 7090 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7091 OPERATOR(GIVEN);
7092
93a17b20 7093 case KEY_glob:
a0d0e21e 7094 LOP(OP_GLOB,XTERM);
93a17b20 7095
79072805
LW
7096 case KEY_hex:
7097 UNI(OP_HEX);
7098
7099 case KEY_if:
6154021b 7100 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7101 OPERATOR(IF);
7102
7103 case KEY_index:
a0d0e21e 7104 LOP(OP_INDEX,XTERM);
79072805
LW
7105
7106 case KEY_int:
7107 UNI(OP_INT);
7108
7109 case KEY_ioctl:
a0d0e21e 7110 LOP(OP_IOCTL,XTERM);
79072805
LW
7111
7112 case KEY_join:
a0d0e21e 7113 LOP(OP_JOIN,XTERM);
79072805
LW
7114
7115 case KEY_keys:
7116 UNI(OP_KEYS);
7117
7118 case KEY_kill:
a0d0e21e 7119 LOP(OP_KILL,XTERM);
79072805
LW
7120
7121 case KEY_last:
a0d0e21e 7122 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805 7123 LOOPX(OP_LAST);
4e553d73 7124
79072805
LW
7125 case KEY_lc:
7126 UNI(OP_LC);
7127
7128 case KEY_lcfirst:
7129 UNI(OP_LCFIRST);
7130
7131 case KEY_local:
6154021b 7132 pl_yylval.ival = 0;
79072805
LW
7133 OPERATOR(LOCAL);
7134
7135 case KEY_length:
7136 UNI(OP_LENGTH);
7137
7138 case KEY_lt:
7139 Rop(OP_SLT);
7140
7141 case KEY_le:
7142 Rop(OP_SLE);
7143
7144 case KEY_localtime:
7145 UNI(OP_LOCALTIME);
7146
7147 case KEY_log:
7148 UNI(OP_LOG);
7149
7150 case KEY_link:
a0d0e21e 7151 LOP(OP_LINK,XTERM);
79072805
LW
7152
7153 case KEY_listen:
a0d0e21e 7154 LOP(OP_LISTEN,XTERM);
79072805 7155
c0329465
MB
7156 case KEY_lock:
7157 UNI(OP_LOCK);
7158
79072805
LW
7159 case KEY_lstat:
7160 UNI(OP_LSTAT);
7161
7162 case KEY_m:
8782bef2 7163 s = scan_pat(s,OP_MATCH);
79072805
LW
7164 TERM(sublex_start());
7165
a0d0e21e 7166 case KEY_map:
2c38e13d 7167 LOP(OP_MAPSTART, XREF);
4e4e412b 7168
79072805 7169 case KEY_mkdir:
a0d0e21e 7170 LOP(OP_MKDIR,XTERM);
79072805
LW
7171
7172 case KEY_msgctl:
a0d0e21e 7173 LOP(OP_MSGCTL,XTERM);
79072805
LW
7174
7175 case KEY_msgget:
a0d0e21e 7176 LOP(OP_MSGGET,XTERM);
79072805
LW
7177
7178 case KEY_msgrcv:
a0d0e21e 7179 LOP(OP_MSGRCV,XTERM);
79072805
LW
7180
7181 case KEY_msgsnd:
a0d0e21e 7182 LOP(OP_MSGSND,XTERM);
79072805 7183
77ca0c92 7184 case KEY_our:
93a17b20 7185 case KEY_my:
952306ac 7186 case KEY_state:
eac04b2e 7187 PL_in_my = (U16)tmp;
29595ff2 7188 s = SKIPSPACE1(s);
7e2040f0 7189 if (isIDFIRST_lazy_if(s,UTF)) {
5db06880
NC
7190#ifdef PERL_MAD
7191 char* start = s;
7192#endif
3280af22 7193 s = scan_word(s, PL_tokenbuf, sizeof PL_tokenbuf, TRUE, &len);
09bef843
SB
7194 if (len == 3 && strnEQ(PL_tokenbuf, "sub", 3))
7195 goto really_sub;
def3634b 7196 PL_in_my_stash = find_in_my_stash(PL_tokenbuf, len);
3280af22 7197 if (!PL_in_my_stash) {
c750a3ec 7198 char tmpbuf[1024];
3280af22 7199 PL_bufptr = s;
d9fad198 7200 my_snprintf(tmpbuf, sizeof(tmpbuf), "No such class %.1000s", PL_tokenbuf);
c750a3ec
MB
7201 yyerror(tmpbuf);
7202 }
5db06880
NC
7203#ifdef PERL_MAD
7204 if (PL_madskills) { /* just add type to declarator token */
cd81e915
NC
7205 sv_catsv(PL_thistoken, PL_nextwhite);
7206 PL_nextwhite = 0;
7207 sv_catpvn(PL_thistoken, start, s - start);
5db06880
NC
7208 }
7209#endif
c750a3ec 7210 }
6154021b 7211 pl_yylval.ival = 1;
55497cff 7212 OPERATOR(MY);
93a17b20 7213
79072805 7214 case KEY_next:
a0d0e21e 7215 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7216 LOOPX(OP_NEXT);
7217
7218 case KEY_ne:
7219 Eop(OP_SNE);
7220
a0d0e21e 7221 case KEY_no:
468aa647 7222 s = tokenize_use(0, s);
a0d0e21e
LW
7223 OPERATOR(USE);
7224
7225 case KEY_not:
29595ff2 7226 if (*s == '(' || (s = SKIPSPACE1(s), *s == '('))
2d2e263d
LW
7227 FUN1(OP_NOT);
7228 else
7229 OPERATOR(NOTOP);
a0d0e21e 7230
79072805 7231 case KEY_open:
29595ff2 7232 s = SKIPSPACE1(s);
7e2040f0 7233 if (isIDFIRST_lazy_if(s,UTF)) {
f54cb97a 7234 const char *t;
c35e046a
AL
7235 for (d = s; isALNUM_lazy_if(d,UTF);)
7236 d++;
7237 for (t=d; isSPACE(*t);)
7238 t++;
e2ab214b 7239 if ( *t && strchr("|&*+-=!?:.", *t) && ckWARN_d(WARN_PRECEDENCE)
66fbe8fb
HS
7240 /* [perl #16184] */
7241 && !(t[0] == '=' && t[1] == '>')
7242 ) {
5f66b61c 7243 int parms_len = (int)(d-s);
9014280d 7244 Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
0453d815 7245 "Precedence problem: open %.*s should be open(%.*s)",
5f66b61c 7246 parms_len, s, parms_len, s);
66fbe8fb 7247 }
93a17b20 7248 }
a0d0e21e 7249 LOP(OP_OPEN,XTERM);
79072805 7250
463ee0b2 7251 case KEY_or:
6154021b 7252 pl_yylval.ival = OP_OR;
463ee0b2
LW
7253 OPERATOR(OROP);
7254
79072805
LW
7255 case KEY_ord:
7256 UNI(OP_ORD);
7257
7258 case KEY_oct:
7259 UNI(OP_OCT);
7260
7261 case KEY_opendir:
a0d0e21e 7262 LOP(OP_OPEN_DIR,XTERM);
79072805
LW
7263
7264 case KEY_print:
3280af22 7265 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7266 LOP(OP_PRINT,XREF);
79072805
LW
7267
7268 case KEY_printf:
3280af22 7269 checkcomma(s,PL_tokenbuf,"filehandle");
a0d0e21e 7270 LOP(OP_PRTF,XREF);
79072805 7271
c07a80fd 7272 case KEY_prototype:
7273 UNI(OP_PROTOTYPE);
7274
79072805 7275 case KEY_push:
a0d0e21e 7276 LOP(OP_PUSH,XTERM);
79072805
LW
7277
7278 case KEY_pop:
6f33ba73 7279 UNIDOR(OP_POP);
79072805 7280
a0d0e21e 7281 case KEY_pos:
6f33ba73 7282 UNIDOR(OP_POS);
4e553d73 7283
79072805 7284 case KEY_pack:
a0d0e21e 7285 LOP(OP_PACK,XTERM);
79072805
LW
7286
7287 case KEY_package:
a0d0e21e 7288 s = force_word(s,WORD,FALSE,TRUE,FALSE);
14a86d0c 7289 s = SKIPSPACE1(s);
91152fc1 7290 s = force_strict_version(s);
4e4da3ac 7291 PL_lex_expect = XBLOCK;
79072805
LW
7292 OPERATOR(PACKAGE);
7293
7294 case KEY_pipe:
a0d0e21e 7295 LOP(OP_PIPE_OP,XTERM);
79072805
LW
7296
7297 case KEY_q:
5db06880 7298 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7299 if (!s)
d4c19fe8 7300 missingterm(NULL);
6154021b 7301 pl_yylval.ival = OP_CONST;
79072805
LW
7302 TERM(sublex_start());
7303
a0d0e21e
LW
7304 case KEY_quotemeta:
7305 UNI(OP_QUOTEMETA);
7306
8990e307 7307 case KEY_qw:
5db06880 7308 s = scan_str(s,!!PL_madskills,FALSE);
8990e307 7309 if (!s)
d4c19fe8 7310 missingterm(NULL);
3480a8d2 7311 PL_expect = XOPERATOR;
8127e0e3
GS
7312 force_next(')');
7313 if (SvCUR(PL_lex_stuff)) {
5f66b61c 7314 OP *words = NULL;
8127e0e3 7315 int warned = 0;
3280af22 7316 d = SvPV_force(PL_lex_stuff, len);
8127e0e3 7317 while (len) {
d4c19fe8
AL
7318 for (; isSPACE(*d) && len; --len, ++d)
7319 /**/;
8127e0e3 7320 if (len) {
d4c19fe8 7321 SV *sv;
f54cb97a 7322 const char *b = d;
e476b1b5 7323 if (!warned && ckWARN(WARN_QW)) {
8127e0e3
GS
7324 for (; !isSPACE(*d) && len; --len, ++d) {
7325 if (*d == ',') {
9014280d 7326 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7327 "Possible attempt to separate words with commas");
7328 ++warned;
7329 }
7330 else if (*d == '#') {
9014280d 7331 Perl_warner(aTHX_ packWARN(WARN_QW),
8127e0e3
GS
7332 "Possible attempt to put comments in qw() list");
7333 ++warned;
7334 }
7335 }
7336 }
7337 else {
d4c19fe8
AL
7338 for (; !isSPACE(*d) && len; --len, ++d)
7339 /**/;
8127e0e3 7340 }
740cce10 7341 sv = newSVpvn_utf8(b, d-b, DO_UTF8(PL_lex_stuff));
8127e0e3 7342 words = append_elem(OP_LIST, words,
7948272d 7343 newSVOP(OP_CONST, 0, tokeq(sv)));
55497cff 7344 }
7345 }
8127e0e3 7346 if (words) {
cd81e915 7347 start_force(PL_curforce);
9ded7720 7348 NEXTVAL_NEXTTOKE.opval = words;
8127e0e3
GS
7349 force_next(THING);
7350 }
55497cff 7351 }
37fd879b 7352 if (PL_lex_stuff) {
8127e0e3 7353 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 7354 PL_lex_stuff = NULL;
37fd879b 7355 }
3280af22 7356 PL_expect = XTERM;
8127e0e3 7357 TOKEN('(');
8990e307 7358
79072805 7359 case KEY_qq:
5db06880 7360 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7361 if (!s)
d4c19fe8 7362 missingterm(NULL);
6154021b 7363 pl_yylval.ival = OP_STRINGIFY;
3280af22 7364 if (SvIVX(PL_lex_stuff) == '\'')
45977657 7365 SvIV_set(PL_lex_stuff, 0); /* qq'$foo' should intepolate */
79072805
LW
7366 TERM(sublex_start());
7367
8782bef2
GB
7368 case KEY_qr:
7369 s = scan_pat(s,OP_QR);
7370 TERM(sublex_start());
7371
79072805 7372 case KEY_qx:
5db06880 7373 s = scan_str(s,!!PL_madskills,FALSE);
79072805 7374 if (!s)
d4c19fe8 7375 missingterm(NULL);
9b201d7d 7376 readpipe_override();
79072805
LW
7377 TERM(sublex_start());
7378
7379 case KEY_return:
7380 OLDLOP(OP_RETURN);
7381
7382 case KEY_require:
29595ff2 7383 s = SKIPSPACE1(s);
e759cc13
RGS
7384 if (isDIGIT(*s)) {
7385 s = force_version(s, FALSE);
a7cb1f99 7386 }
e759cc13
RGS
7387 else if (*s != 'v' || !isDIGIT(s[1])
7388 || (s = force_version(s, TRUE), *s == 'v'))
7389 {
a7cb1f99
GS
7390 *PL_tokenbuf = '\0';
7391 s = force_word(s,WORD,TRUE,TRUE,FALSE);
7e2040f0 7392 if (isIDFIRST_lazy_if(PL_tokenbuf,UTF))
da51bb9b 7393 gv_stashpvn(PL_tokenbuf, strlen(PL_tokenbuf), GV_ADD);
a7cb1f99
GS
7394 else if (*s == '<')
7395 yyerror("<> should be quotes");
7396 }
a72a1c8b
RGS
7397 if (orig_keyword == KEY_require) {
7398 orig_keyword = 0;
6154021b 7399 pl_yylval.ival = 1;
a72a1c8b
RGS
7400 }
7401 else
6154021b 7402 pl_yylval.ival = 0;
a72a1c8b
RGS
7403 PL_expect = XTERM;
7404 PL_bufptr = s;
7405 PL_last_uni = PL_oldbufptr;
7406 PL_last_lop_op = OP_REQUIRE;
7407 s = skipspace(s);
7408 return REPORT( (int)REQUIRE );
79072805
LW
7409
7410 case KEY_reset:
7411 UNI(OP_RESET);
7412
7413 case KEY_redo:
a0d0e21e 7414 s = force_word(s,WORD,TRUE,FALSE,FALSE);
79072805
LW
7415 LOOPX(OP_REDO);
7416
7417 case KEY_rename:
a0d0e21e 7418 LOP(OP_RENAME,XTERM);
79072805
LW
7419
7420 case KEY_rand:
7421 UNI(OP_RAND);
7422
7423 case KEY_rmdir:
7424 UNI(OP_RMDIR);
7425
7426 case KEY_rindex:
a0d0e21e 7427 LOP(OP_RINDEX,XTERM);
79072805
LW
7428
7429 case KEY_read:
a0d0e21e 7430 LOP(OP_READ,XTERM);
79072805
LW
7431
7432 case KEY_readdir:
7433 UNI(OP_READDIR);
7434
93a17b20 7435 case KEY_readline:
6f33ba73 7436 UNIDOR(OP_READLINE);
93a17b20
LW
7437
7438 case KEY_readpipe:
0858480c 7439 UNIDOR(OP_BACKTICK);
93a17b20 7440
79072805
LW
7441 case KEY_rewinddir:
7442 UNI(OP_REWINDDIR);
7443
7444 case KEY_recv:
a0d0e21e 7445 LOP(OP_RECV,XTERM);
79072805
LW
7446
7447 case KEY_reverse:
a0d0e21e 7448 LOP(OP_REVERSE,XTERM);
79072805
LW
7449
7450 case KEY_readlink:
6f33ba73 7451 UNIDOR(OP_READLINK);
79072805
LW
7452
7453 case KEY_ref:
7454 UNI(OP_REF);
7455
7456 case KEY_s:
7457 s = scan_subst(s);
6154021b 7458 if (pl_yylval.opval)
79072805
LW
7459 TERM(sublex_start());
7460 else
7461 TOKEN(1); /* force error */
7462
0d863452
RH
7463 case KEY_say:
7464 checkcomma(s,PL_tokenbuf,"filehandle");
7465 LOP(OP_SAY,XREF);
7466
a0d0e21e
LW
7467 case KEY_chomp:
7468 UNI(OP_CHOMP);
4e553d73 7469
79072805
LW
7470 case KEY_scalar:
7471 UNI(OP_SCALAR);
7472
7473 case KEY_select:
a0d0e21e 7474 LOP(OP_SELECT,XTERM);
79072805
LW
7475
7476 case KEY_seek:
a0d0e21e 7477 LOP(OP_SEEK,XTERM);
79072805
LW
7478
7479 case KEY_semctl:
a0d0e21e 7480 LOP(OP_SEMCTL,XTERM);
79072805
LW
7481
7482 case KEY_semget:
a0d0e21e 7483 LOP(OP_SEMGET,XTERM);
79072805
LW
7484
7485 case KEY_semop:
a0d0e21e 7486 LOP(OP_SEMOP,XTERM);
79072805
LW
7487
7488 case KEY_send:
a0d0e21e 7489 LOP(OP_SEND,XTERM);
79072805
LW
7490
7491 case KEY_setpgrp:
a0d0e21e 7492 LOP(OP_SETPGRP,XTERM);
79072805
LW
7493
7494 case KEY_setpriority:
a0d0e21e 7495 LOP(OP_SETPRIORITY,XTERM);
79072805
LW
7496
7497 case KEY_sethostent:
ff68c719 7498 UNI(OP_SHOSTENT);
79072805
LW
7499
7500 case KEY_setnetent:
ff68c719 7501 UNI(OP_SNETENT);
79072805
LW
7502
7503 case KEY_setservent:
ff68c719 7504 UNI(OP_SSERVENT);
79072805
LW
7505
7506 case KEY_setprotoent:
ff68c719 7507 UNI(OP_SPROTOENT);
79072805
LW
7508
7509 case KEY_setpwent:
7510 FUN0(OP_SPWENT);
7511
7512 case KEY_setgrent:
7513 FUN0(OP_SGRENT);
7514
7515 case KEY_seekdir:
a0d0e21e 7516 LOP(OP_SEEKDIR,XTERM);
79072805
LW
7517
7518 case KEY_setsockopt:
a0d0e21e 7519 LOP(OP_SSOCKOPT,XTERM);
79072805
LW
7520
7521 case KEY_shift:
6f33ba73 7522 UNIDOR(OP_SHIFT);
79072805
LW
7523
7524 case KEY_shmctl:
a0d0e21e 7525 LOP(OP_SHMCTL,XTERM);
79072805
LW
7526
7527 case KEY_shmget:
a0d0e21e 7528 LOP(OP_SHMGET,XTERM);
79072805
LW
7529
7530 case KEY_shmread:
a0d0e21e 7531 LOP(OP_SHMREAD,XTERM);
79072805
LW
7532
7533 case KEY_shmwrite:
a0d0e21e 7534 LOP(OP_SHMWRITE,XTERM);
79072805
LW
7535
7536 case KEY_shutdown:
a0d0e21e 7537 LOP(OP_SHUTDOWN,XTERM);
79072805
LW
7538
7539 case KEY_sin:
7540 UNI(OP_SIN);
7541
7542 case KEY_sleep:
7543 UNI(OP_SLEEP);
7544
7545 case KEY_socket:
a0d0e21e 7546 LOP(OP_SOCKET,XTERM);
79072805
LW
7547
7548 case KEY_socketpair:
a0d0e21e 7549 LOP(OP_SOCKPAIR,XTERM);
79072805
LW
7550
7551 case KEY_sort:
3280af22 7552 checkcomma(s,PL_tokenbuf,"subroutine name");
29595ff2 7553 s = SKIPSPACE1(s);
79072805 7554 if (*s == ';' || *s == ')') /* probably a close */
cea2e8a9 7555 Perl_croak(aTHX_ "sort is now a reserved word");
3280af22 7556 PL_expect = XTERM;
15f0808c 7557 s = force_word(s,WORD,TRUE,TRUE,FALSE);
a0d0e21e 7558 LOP(OP_SORT,XREF);
79072805
LW
7559
7560 case KEY_split:
a0d0e21e 7561 LOP(OP_SPLIT,XTERM);
79072805
LW
7562
7563 case KEY_sprintf:
a0d0e21e 7564 LOP(OP_SPRINTF,XTERM);
79072805
LW
7565
7566 case KEY_splice:
a0d0e21e 7567 LOP(OP_SPLICE,XTERM);
79072805
LW
7568
7569 case KEY_sqrt:
7570 UNI(OP_SQRT);
7571
7572 case KEY_srand:
7573 UNI(OP_SRAND);
7574
7575 case KEY_stat:
7576 UNI(OP_STAT);
7577
7578 case KEY_study:
79072805
LW
7579 UNI(OP_STUDY);
7580
7581 case KEY_substr:
a0d0e21e 7582 LOP(OP_SUBSTR,XTERM);
79072805
LW
7583
7584 case KEY_format:
7585 case KEY_sub:
93a17b20 7586 really_sub:
09bef843 7587 {
3280af22 7588 char tmpbuf[sizeof PL_tokenbuf];
9c5ffd7c 7589 SSize_t tboffset = 0;
09bef843 7590 expectation attrful;
28cc6278 7591 bool have_name, have_proto;
f54cb97a 7592 const int key = tmp;
09bef843 7593
5db06880
NC
7594#ifdef PERL_MAD
7595 SV *tmpwhite = 0;
7596
cd81e915 7597 char *tstart = SvPVX(PL_linestr) + PL_realtokenstart;
5db06880 7598 SV *subtoken = newSVpvn(tstart, s - tstart);
cd81e915 7599 PL_thistoken = 0;
5db06880
NC
7600
7601 d = s;
7602 s = SKIPSPACE2(s,tmpwhite);
7603#else
09bef843 7604 s = skipspace(s);
5db06880 7605#endif
09bef843 7606
7e2040f0 7607 if (isIDFIRST_lazy_if(s,UTF) || *s == '\'' ||
09bef843
SB
7608 (*s == ':' && s[1] == ':'))
7609 {
5db06880 7610#ifdef PERL_MAD
4f61fd4b 7611 SV *nametoke = NULL;
5db06880
NC
7612#endif
7613
09bef843
SB
7614 PL_expect = XBLOCK;
7615 attrful = XATTRBLOCK;
b1b65b59
JH
7616 /* remember buffer pos'n for later force_word */
7617 tboffset = s - PL_oldbufptr;
09bef843 7618 d = scan_word(s, tmpbuf, sizeof tmpbuf, TRUE, &len);
5db06880
NC
7619#ifdef PERL_MAD
7620 if (PL_madskills)
7621 nametoke = newSVpvn(s, d - s);
7622#endif
6502358f
NC
7623 if (memchr(tmpbuf, ':', len))
7624 sv_setpvn(PL_subname, tmpbuf, len);
09bef843
SB
7625 else {
7626 sv_setsv(PL_subname,PL_curstname);
396482e1 7627 sv_catpvs(PL_subname,"::");
09bef843
SB
7628 sv_catpvn(PL_subname,tmpbuf,len);
7629 }
09bef843 7630 have_name = TRUE;
5db06880
NC
7631
7632#ifdef PERL_MAD
7633
7634 start_force(0);
7635 CURMAD('X', nametoke);
7636 CURMAD('_', tmpwhite);
7637 (void) force_word(PL_oldbufptr + tboffset, WORD,
7638 FALSE, TRUE, TRUE);
7639
7640 s = SKIPSPACE2(d,tmpwhite);
7641#else
7642 s = skipspace(d);
7643#endif
09bef843 7644 }
463ee0b2 7645 else {
09bef843
SB
7646 if (key == KEY_my)
7647 Perl_croak(aTHX_ "Missing name in \"my sub\"");
7648 PL_expect = XTERMBLOCK;
7649 attrful = XATTRTERM;
76f68e9b 7650 sv_setpvs(PL_subname,"?");
09bef843 7651 have_name = FALSE;
463ee0b2 7652 }
4633a7c4 7653
09bef843
SB
7654 if (key == KEY_format) {
7655 if (*s == '=')
7656 PL_lex_formbrack = PL_lex_brackets + 1;
5db06880 7657#ifdef PERL_MAD
cd81e915 7658 PL_thistoken = subtoken;
5db06880
NC
7659 s = d;
7660#else
09bef843 7661 if (have_name)
b1b65b59
JH
7662 (void) force_word(PL_oldbufptr + tboffset, WORD,
7663 FALSE, TRUE, TRUE);
5db06880 7664#endif
09bef843
SB
7665 OPERATOR(FORMAT);
7666 }
79072805 7667
09bef843
SB
7668 /* Look for a prototype */
7669 if (*s == '(') {
d9f2850e
RGS
7670 char *p;
7671 bool bad_proto = FALSE;
9e8d7757
RB
7672 bool in_brackets = FALSE;
7673 char greedy_proto = ' ';
7674 bool proto_after_greedy_proto = FALSE;
7675 bool must_be_last = FALSE;
7676 bool underscore = FALSE;
aef2a98a 7677 bool seen_underscore = FALSE;
197afce1 7678 const bool warnillegalproto = ckWARN(WARN_ILLEGALPROTO);
09bef843 7679
5db06880 7680 s = scan_str(s,!!PL_madskills,FALSE);
37fd879b 7681 if (!s)
09bef843 7682 Perl_croak(aTHX_ "Prototype not terminated");
2f758a16 7683 /* strip spaces and check for bad characters */
09bef843
SB
7684 d = SvPVX(PL_lex_stuff);
7685 tmp = 0;
d9f2850e
RGS
7686 for (p = d; *p; ++p) {
7687 if (!isSPACE(*p)) {
7688 d[tmp++] = *p;
9e8d7757 7689
197afce1 7690 if (warnillegalproto) {
9e8d7757
RB
7691 if (must_be_last)
7692 proto_after_greedy_proto = TRUE;
7693 if (!strchr("$@%*;[]&\\_", *p)) {
7694 bad_proto = TRUE;
7695 }
7696 else {
7697 if ( underscore ) {
7698 if ( *p != ';' )
7699 bad_proto = TRUE;
7700 underscore = FALSE;
7701 }
7702 if ( *p == '[' ) {
7703 in_brackets = TRUE;
7704 }
7705 else if ( *p == ']' ) {
7706 in_brackets = FALSE;
7707 }
7708 else if ( (*p == '@' || *p == '%') &&
7709 ( tmp < 2 || d[tmp-2] != '\\' ) &&
7710 !in_brackets ) {
7711 must_be_last = TRUE;
7712 greedy_proto = *p;
7713 }
7714 else if ( *p == '_' ) {
aef2a98a 7715 underscore = seen_underscore = TRUE;
9e8d7757
RB
7716 }
7717 }
7718 }
d37a9538 7719 }
09bef843 7720 }
d9f2850e 7721 d[tmp] = '\0';
9e8d7757 7722 if (proto_after_greedy_proto)
197afce1 7723 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
9e8d7757
RB
7724 "Prototype after '%c' for %"SVf" : %s",
7725 greedy_proto, SVfARG(PL_subname), d);
d9f2850e 7726 if (bad_proto)
197afce1 7727 Perl_warner(aTHX_ packWARN(WARN_ILLEGALPROTO),
aef2a98a
RGS
7728 "Illegal character %sin prototype for %"SVf" : %s",
7729 seen_underscore ? "after '_' " : "",
be2597df 7730 SVfARG(PL_subname), d);
b162af07 7731 SvCUR_set(PL_lex_stuff, tmp);
09bef843 7732 have_proto = TRUE;
68dc0745 7733
5db06880
NC
7734#ifdef PERL_MAD
7735 start_force(0);
cd81e915 7736 CURMAD('q', PL_thisopen);
5db06880 7737 CURMAD('_', tmpwhite);
cd81e915
NC
7738 CURMAD('=', PL_thisstuff);
7739 CURMAD('Q', PL_thisclose);
5db06880
NC
7740 NEXTVAL_NEXTTOKE.opval =
7741 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
1a9a51d4 7742 PL_lex_stuff = NULL;
5db06880
NC
7743 force_next(THING);
7744
7745 s = SKIPSPACE2(s,tmpwhite);
7746#else
09bef843 7747 s = skipspace(s);
5db06880 7748#endif
4633a7c4 7749 }
09bef843
SB
7750 else
7751 have_proto = FALSE;
7752
7753 if (*s == ':' && s[1] != ':')
7754 PL_expect = attrful;
8e742a20
MHM
7755 else if (*s != '{' && key == KEY_sub) {
7756 if (!have_name)
7757 Perl_croak(aTHX_ "Illegal declaration of anonymous subroutine");
fd909433 7758 else if (*s != ';' && *s != '}')
be2597df 7759 Perl_croak(aTHX_ "Illegal declaration of subroutine %"SVf, SVfARG(PL_subname));
8e742a20 7760 }
09bef843 7761
5db06880
NC
7762#ifdef PERL_MAD
7763 start_force(0);
7764 if (tmpwhite) {
7765 if (PL_madskills)
6b29d1f5 7766 curmad('^', newSVpvs(""));
5db06880
NC
7767 CURMAD('_', tmpwhite);
7768 }
7769 force_next(0);
7770
cd81e915 7771 PL_thistoken = subtoken;
5db06880 7772#else
09bef843 7773 if (have_proto) {
9ded7720 7774 NEXTVAL_NEXTTOKE.opval =
b1b65b59 7775 (OP*)newSVOP(OP_CONST, 0, PL_lex_stuff);
a0714e2c 7776 PL_lex_stuff = NULL;
09bef843 7777 force_next(THING);
68dc0745 7778 }
5db06880 7779#endif
09bef843 7780 if (!have_name) {
49a54bbe
NC
7781 if (PL_curstash)
7782 sv_setpvs(PL_subname, "__ANON__");
7783 else
7784 sv_setpvs(PL_subname, "__ANON__::__ANON__");
09bef843 7785 TOKEN(ANONSUB);
4633a7c4 7786 }
5db06880 7787#ifndef PERL_MAD
b1b65b59
JH
7788 (void) force_word(PL_oldbufptr + tboffset, WORD,
7789 FALSE, TRUE, TRUE);
5db06880 7790#endif
09bef843
SB
7791 if (key == KEY_my)
7792 TOKEN(MYSUB);
7793 TOKEN(SUB);
4633a7c4 7794 }
79072805
LW
7795
7796 case KEY_system:
a0d0e21e 7797 LOP(OP_SYSTEM,XREF);
79072805
LW
7798
7799 case KEY_symlink:
a0d0e21e 7800 LOP(OP_SYMLINK,XTERM);
79072805
LW
7801
7802 case KEY_syscall:
a0d0e21e 7803 LOP(OP_SYSCALL,XTERM);
79072805 7804
c07a80fd 7805 case KEY_sysopen:
7806 LOP(OP_SYSOPEN,XTERM);
7807
137443ea 7808 case KEY_sysseek:
7809 LOP(OP_SYSSEEK,XTERM);
7810
79072805 7811 case KEY_sysread:
a0d0e21e 7812 LOP(OP_SYSREAD,XTERM);
79072805
LW
7813
7814 case KEY_syswrite:
a0d0e21e 7815 LOP(OP_SYSWRITE,XTERM);
79072805
LW
7816
7817 case KEY_tr:
7818 s = scan_trans(s);
7819 TERM(sublex_start());
7820
7821 case KEY_tell:
7822 UNI(OP_TELL);
7823
7824 case KEY_telldir:
7825 UNI(OP_TELLDIR);
7826
463ee0b2 7827 case KEY_tie:
a0d0e21e 7828 LOP(OP_TIE,XTERM);
463ee0b2 7829
c07a80fd 7830 case KEY_tied:
7831 UNI(OP_TIED);
7832
79072805
LW
7833 case KEY_time:
7834 FUN0(OP_TIME);
7835
7836 case KEY_times:
7837 FUN0(OP_TMS);
7838
7839 case KEY_truncate:
a0d0e21e 7840 LOP(OP_TRUNCATE,XTERM);
79072805
LW
7841
7842 case KEY_uc:
7843 UNI(OP_UC);
7844
7845 case KEY_ucfirst:
7846 UNI(OP_UCFIRST);
7847
463ee0b2
LW
7848 case KEY_untie:
7849 UNI(OP_UNTIE);
7850
79072805 7851 case KEY_until:
6154021b 7852 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7853 OPERATOR(UNTIL);
7854
7855 case KEY_unless:
6154021b 7856 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7857 OPERATOR(UNLESS);
7858
7859 case KEY_unlink:
a0d0e21e 7860 LOP(OP_UNLINK,XTERM);
79072805
LW
7861
7862 case KEY_undef:
6f33ba73 7863 UNIDOR(OP_UNDEF);
79072805
LW
7864
7865 case KEY_unpack:
a0d0e21e 7866 LOP(OP_UNPACK,XTERM);
79072805
LW
7867
7868 case KEY_utime:
a0d0e21e 7869 LOP(OP_UTIME,XTERM);
79072805
LW
7870
7871 case KEY_umask:
6f33ba73 7872 UNIDOR(OP_UMASK);
79072805
LW
7873
7874 case KEY_unshift:
a0d0e21e
LW
7875 LOP(OP_UNSHIFT,XTERM);
7876
7877 case KEY_use:
468aa647 7878 s = tokenize_use(1, s);
a0d0e21e 7879 OPERATOR(USE);
79072805
LW
7880
7881 case KEY_values:
7882 UNI(OP_VALUES);
7883
7884 case KEY_vec:
a0d0e21e 7885 LOP(OP_VEC,XTERM);
79072805 7886
0d863452 7887 case KEY_when:
6154021b 7888 pl_yylval.ival = CopLINE(PL_curcop);
0d863452
RH
7889 OPERATOR(WHEN);
7890
79072805 7891 case KEY_while:
6154021b 7892 pl_yylval.ival = CopLINE(PL_curcop);
79072805
LW
7893 OPERATOR(WHILE);
7894
7895 case KEY_warn:
3280af22 7896 PL_hints |= HINT_BLOCK_SCOPE;
a0d0e21e 7897 LOP(OP_WARN,XTERM);
79072805
LW
7898
7899 case KEY_wait:
7900 FUN0(OP_WAIT);
7901
7902 case KEY_waitpid:
a0d0e21e 7903 LOP(OP_WAITPID,XTERM);
79072805
LW
7904
7905 case KEY_wantarray:
7906 FUN0(OP_WANTARRAY);
7907
7908 case KEY_write:
9d116dd7
JH
7909#ifdef EBCDIC
7910 {
df3728a2
JH
7911 char ctl_l[2];
7912 ctl_l[0] = toCTRL('L');
7913 ctl_l[1] = '\0';
fafc274c 7914 gv_fetchpvn_flags(ctl_l, 1, GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7
JH
7915 }
7916#else
fafc274c
NC
7917 /* Make sure $^L is defined */
7918 gv_fetchpvs("\f", GV_ADD|GV_NOTQUAL, SVt_PV);
9d116dd7 7919#endif
79072805
LW
7920 UNI(OP_ENTERWRITE);
7921
7922 case KEY_x:
3280af22 7923 if (PL_expect == XOPERATOR)
79072805
LW
7924 Mop(OP_REPEAT);
7925 check_uni();
7926 goto just_a_word;
7927
a0d0e21e 7928 case KEY_xor:
6154021b 7929 pl_yylval.ival = OP_XOR;
a0d0e21e
LW
7930 OPERATOR(OROP);
7931
79072805
LW
7932 case KEY_y:
7933 s = scan_trans(s);
7934 TERM(sublex_start());
7935 }
49dc05e3 7936 }}
79072805 7937}
bf4acbe4
GS
7938#ifdef __SC__
7939#pragma segment Main
7940#endif
79072805 7941
e930465f
JH
7942static int
7943S_pending_ident(pTHX)
8eceec63 7944{
97aff369 7945 dVAR;
8eceec63 7946 register char *d;
bbd11bfc 7947 PADOFFSET tmp = 0;
8eceec63
SC
7948 /* pit holds the identifier we read and pending_ident is reset */
7949 char pit = PL_pending_ident;
9bde8eb0
NC
7950 const STRLEN tokenbuf_len = strlen(PL_tokenbuf);
7951 /* All routes through this function want to know if there is a colon. */
c099d646 7952 const char *const has_colon = (const char*) memchr (PL_tokenbuf, ':', tokenbuf_len);
8eceec63
SC
7953 PL_pending_ident = 0;
7954
cd81e915 7955 /* PL_realtokenstart = realtokenend = PL_bufptr - SvPVX(PL_linestr); */
8eceec63 7956 DEBUG_T({ PerlIO_printf(Perl_debug_log,
b6007c36 7957 "### Pending identifier '%s'\n", PL_tokenbuf); });
8eceec63
SC
7958
7959 /* if we're in a my(), we can't allow dynamics here.
7960 $foo'bar has already been turned into $foo::bar, so
7961 just check for colons.
7962
7963 if it's a legal name, the OP is a PADANY.
7964 */
7965 if (PL_in_my) {
7966 if (PL_in_my == KEY_our) { /* "our" is merely analogous to "my" */
9bde8eb0 7967 if (has_colon)
8eceec63
SC
7968 yyerror(Perl_form(aTHX_ "No package name allowed for "
7969 "variable %s in \"our\"",
7970 PL_tokenbuf));
d6447115 7971 tmp = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7972 }
7973 else {
9bde8eb0 7974 if (has_colon)
952306ac
RGS
7975 yyerror(Perl_form(aTHX_ PL_no_myglob,
7976 PL_in_my == KEY_my ? "my" : "state", PL_tokenbuf));
8eceec63 7977
6154021b 7978 pl_yylval.opval = newOP(OP_PADANY, 0);
d6447115 7979 pl_yylval.opval->op_targ = allocmy(PL_tokenbuf, tokenbuf_len, 0);
8eceec63
SC
7980 return PRIVATEREF;
7981 }
7982 }
7983
7984 /*
7985 build the ops for accesses to a my() variable.
7986
7987 Deny my($a) or my($b) in a sort block, *if* $a or $b is
7988 then used in a comparison. This catches most, but not
7989 all cases. For instance, it catches
7990 sort { my($a); $a <=> $b }
7991 but not
7992 sort { my($a); $a < $b ? -1 : $a == $b ? 0 : 1; }
7993 (although why you'd do that is anyone's guess).
7994 */
7995
9bde8eb0 7996 if (!has_colon) {
8716503d 7997 if (!PL_in_my)
f8f98e0a 7998 tmp = pad_findmy(PL_tokenbuf, tokenbuf_len, 0);
8716503d 7999 if (tmp != NOT_IN_PAD) {
8eceec63 8000 /* might be an "our" variable" */
00b1698f 8001 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
8eceec63 8002 /* build ops for a bareword */
b64e5050
AL
8003 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
8004 HEK * const stashname = HvNAME_HEK(stash);
8005 SV * const sym = newSVhek(stashname);
396482e1 8006 sv_catpvs(sym, "::");
9bde8eb0 8007 sv_catpvn(sym, PL_tokenbuf+1, tokenbuf_len - 1);
6154021b
RGS
8008 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, sym);
8009 pl_yylval.opval->op_private = OPpCONST_ENTERED;
7a5fd60d 8010 gv_fetchsv(sym,
8eceec63
SC
8011 (PL_in_eval
8012 ? (GV_ADDMULTI | GV_ADDINEVAL)
700078d2 8013 : GV_ADDMULTI
8eceec63
SC
8014 ),
8015 ((PL_tokenbuf[0] == '$') ? SVt_PV
8016 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8017 : SVt_PVHV));
8018 return WORD;
8019 }
8020
8021 /* if it's a sort block and they're naming $a or $b */
8022 if (PL_last_lop_op == OP_SORT &&
8023 PL_tokenbuf[0] == '$' &&
8024 (PL_tokenbuf[1] == 'a' || PL_tokenbuf[1] == 'b')
8025 && !PL_tokenbuf[2])
8026 {
8027 for (d = PL_in_eval ? PL_oldoldbufptr : PL_linestart;
8028 d < PL_bufend && *d != '\n';
8029 d++)
8030 {
8031 if (strnEQ(d,"<=>",3) || strnEQ(d,"cmp",3)) {
8032 Perl_croak(aTHX_ "Can't use \"my %s\" in sort comparison",
8033 PL_tokenbuf);
8034 }
8035 }
8036 }
8037
6154021b
RGS
8038 pl_yylval.opval = newOP(OP_PADANY, 0);
8039 pl_yylval.opval->op_targ = tmp;
8eceec63
SC
8040 return PRIVATEREF;
8041 }
8042 }
8043
8044 /*
8045 Whine if they've said @foo in a doublequoted string,
8046 and @foo isn't a variable we can find in the symbol
8047 table.
8048 */
d824713b
NC
8049 if (ckWARN(WARN_AMBIGUOUS) &&
8050 pit == '@' && PL_lex_state != LEX_NORMAL && !PL_lex_brackets) {
9bde8eb0
NC
8051 GV *const gv = gv_fetchpvn_flags(PL_tokenbuf + 1, tokenbuf_len - 1, 0,
8052 SVt_PVAV);
8eceec63 8053 if ((!gv || ((PL_tokenbuf[0] == '@') ? !GvAV(gv) : !GvHV(gv)))
e879d94f
RGS
8054 /* DO NOT warn for @- and @+ */
8055 && !( PL_tokenbuf[2] == '\0' &&
8056 ( PL_tokenbuf[1] == '-' || PL_tokenbuf[1] == '+' ))
8057 )
8eceec63
SC
8058 {
8059 /* Downgraded from fatal to warning 20000522 mjd */
d824713b
NC
8060 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
8061 "Possible unintended interpolation of %s in string",
8062 PL_tokenbuf);
8eceec63
SC
8063 }
8064 }
8065
8066 /* build ops for a bareword */
6154021b 8067 pl_yylval.opval = (OP*)newSVOP(OP_CONST, 0, newSVpvn(PL_tokenbuf + 1,
9bde8eb0 8068 tokenbuf_len - 1));
6154021b 8069 pl_yylval.opval->op_private = OPpCONST_ENTERED;
223f0fb7
NC
8070 gv_fetchpvn_flags(PL_tokenbuf+1, tokenbuf_len - 1,
8071 PL_in_eval ? (GV_ADDMULTI | GV_ADDINEVAL) : GV_ADD,
8072 ((PL_tokenbuf[0] == '$') ? SVt_PV
8073 : (PL_tokenbuf[0] == '@') ? SVt_PVAV
8074 : SVt_PVHV));
8eceec63
SC
8075 return WORD;
8076}
8077
4c3bbe0f
MHM
8078/*
8079 * The following code was generated by perl_keyword.pl.
8080 */
e2e1dd5a 8081
79072805 8082I32
5458a98a 8083Perl_keyword (pTHX_ const char *name, I32 len, bool all_keywords)
4c3bbe0f 8084{
952306ac 8085 dVAR;
7918f24d
NC
8086
8087 PERL_ARGS_ASSERT_KEYWORD;
8088
4c3bbe0f
MHM
8089 switch (len)
8090 {
8091 case 1: /* 5 tokens of length 1 */
8092 switch (name[0])
e2e1dd5a 8093 {
4c3bbe0f
MHM
8094 case 'm':
8095 { /* m */
8096 return KEY_m;
8097 }
8098
4c3bbe0f
MHM
8099 case 'q':
8100 { /* q */
8101 return KEY_q;
8102 }
8103
4c3bbe0f
MHM
8104 case 's':
8105 { /* s */
8106 return KEY_s;
8107 }
8108
4c3bbe0f
MHM
8109 case 'x':
8110 { /* x */
8111 return -KEY_x;
8112 }
8113
4c3bbe0f
MHM
8114 case 'y':
8115 { /* y */
8116 return KEY_y;
8117 }
8118
4c3bbe0f
MHM
8119 default:
8120 goto unknown;
e2e1dd5a 8121 }
4c3bbe0f
MHM
8122
8123 case 2: /* 18 tokens of length 2 */
8124 switch (name[0])
e2e1dd5a 8125 {
4c3bbe0f
MHM
8126 case 'd':
8127 if (name[1] == 'o')
8128 { /* do */
8129 return KEY_do;
8130 }
8131
8132 goto unknown;
8133
8134 case 'e':
8135 if (name[1] == 'q')
8136 { /* eq */
8137 return -KEY_eq;
8138 }
8139
8140 goto unknown;
8141
8142 case 'g':
8143 switch (name[1])
8144 {
8145 case 'e':
8146 { /* ge */
8147 return -KEY_ge;
8148 }
8149
4c3bbe0f
MHM
8150 case 't':
8151 { /* gt */
8152 return -KEY_gt;
8153 }
8154
4c3bbe0f
MHM
8155 default:
8156 goto unknown;
8157 }
8158
8159 case 'i':
8160 if (name[1] == 'f')
8161 { /* if */
8162 return KEY_if;
8163 }
8164
8165 goto unknown;
8166
8167 case 'l':
8168 switch (name[1])
8169 {
8170 case 'c':
8171 { /* lc */
8172 return -KEY_lc;
8173 }
8174
4c3bbe0f
MHM
8175 case 'e':
8176 { /* le */
8177 return -KEY_le;
8178 }
8179
4c3bbe0f
MHM
8180 case 't':
8181 { /* lt */
8182 return -KEY_lt;
8183 }
8184
4c3bbe0f
MHM
8185 default:
8186 goto unknown;
8187 }
8188
8189 case 'm':
8190 if (name[1] == 'y')
8191 { /* my */
8192 return KEY_my;
8193 }
8194
8195 goto unknown;
8196
8197 case 'n':
8198 switch (name[1])
8199 {
8200 case 'e':
8201 { /* ne */
8202 return -KEY_ne;
8203 }
8204
4c3bbe0f
MHM
8205 case 'o':
8206 { /* no */
8207 return KEY_no;
8208 }
8209
4c3bbe0f
MHM
8210 default:
8211 goto unknown;
8212 }
8213
8214 case 'o':
8215 if (name[1] == 'r')
8216 { /* or */
8217 return -KEY_or;
8218 }
8219
8220 goto unknown;
8221
8222 case 'q':
8223 switch (name[1])
8224 {
8225 case 'q':
8226 { /* qq */
8227 return KEY_qq;
8228 }
8229
4c3bbe0f
MHM
8230 case 'r':
8231 { /* qr */
8232 return KEY_qr;
8233 }
8234
4c3bbe0f
MHM
8235 case 'w':
8236 { /* qw */
8237 return KEY_qw;
8238 }
8239
4c3bbe0f
MHM
8240 case 'x':
8241 { /* qx */
8242 return KEY_qx;
8243 }
8244
4c3bbe0f
MHM
8245 default:
8246 goto unknown;
8247 }
8248
8249 case 't':
8250 if (name[1] == 'r')
8251 { /* tr */
8252 return KEY_tr;
8253 }
8254
8255 goto unknown;
8256
8257 case 'u':
8258 if (name[1] == 'c')
8259 { /* uc */
8260 return -KEY_uc;
8261 }
8262
8263 goto unknown;
8264
8265 default:
8266 goto unknown;
e2e1dd5a 8267 }
4c3bbe0f 8268
0d863452 8269 case 3: /* 29 tokens of length 3 */
4c3bbe0f 8270 switch (name[0])
e2e1dd5a 8271 {
4c3bbe0f
MHM
8272 case 'E':
8273 if (name[1] == 'N' &&
8274 name[2] == 'D')
8275 { /* END */
8276 return KEY_END;
8277 }
8278
8279 goto unknown;
8280
8281 case 'a':
8282 switch (name[1])
8283 {
8284 case 'b':
8285 if (name[2] == 's')
8286 { /* abs */
8287 return -KEY_abs;
8288 }
8289
8290 goto unknown;
8291
8292 case 'n':
8293 if (name[2] == 'd')
8294 { /* and */
8295 return -KEY_and;
8296 }
8297
8298 goto unknown;
8299
8300 default:
8301 goto unknown;
8302 }
8303
8304 case 'c':
8305 switch (name[1])
8306 {
8307 case 'h':
8308 if (name[2] == 'r')
8309 { /* chr */
8310 return -KEY_chr;
8311 }
8312
8313 goto unknown;
8314
8315 case 'm':
8316 if (name[2] == 'p')
8317 { /* cmp */
8318 return -KEY_cmp;
8319 }
8320
8321 goto unknown;
8322
8323 case 'o':
8324 if (name[2] == 's')
8325 { /* cos */
8326 return -KEY_cos;
8327 }
8328
8329 goto unknown;
8330
8331 default:
8332 goto unknown;
8333 }
8334
8335 case 'd':
8336 if (name[1] == 'i' &&
8337 name[2] == 'e')
8338 { /* die */
8339 return -KEY_die;
8340 }
8341
8342 goto unknown;
8343
8344 case 'e':
8345 switch (name[1])
8346 {
8347 case 'o':
8348 if (name[2] == 'f')
8349 { /* eof */
8350 return -KEY_eof;
8351 }
8352
8353 goto unknown;
8354
4c3bbe0f
MHM
8355 case 'x':
8356 if (name[2] == 'p')
8357 { /* exp */
8358 return -KEY_exp;
8359 }
8360
8361 goto unknown;
8362
8363 default:
8364 goto unknown;
8365 }
8366
8367 case 'f':
8368 if (name[1] == 'o' &&
8369 name[2] == 'r')
8370 { /* for */
8371 return KEY_for;
8372 }
8373
8374 goto unknown;
8375
8376 case 'h':
8377 if (name[1] == 'e' &&
8378 name[2] == 'x')
8379 { /* hex */
8380 return -KEY_hex;
8381 }
8382
8383 goto unknown;
8384
8385 case 'i':
8386 if (name[1] == 'n' &&
8387 name[2] == 't')
8388 { /* int */
8389 return -KEY_int;
8390 }
8391
8392 goto unknown;
8393
8394 case 'l':
8395 if (name[1] == 'o' &&
8396 name[2] == 'g')
8397 { /* log */
8398 return -KEY_log;
8399 }
8400
8401 goto unknown;
8402
8403 case 'm':
8404 if (name[1] == 'a' &&
8405 name[2] == 'p')
8406 { /* map */
8407 return KEY_map;
8408 }
8409
8410 goto unknown;
8411
8412 case 'n':
8413 if (name[1] == 'o' &&
8414 name[2] == 't')
8415 { /* not */
8416 return -KEY_not;
8417 }
8418
8419 goto unknown;
8420
8421 case 'o':
8422 switch (name[1])
8423 {
8424 case 'c':
8425 if (name[2] == 't')
8426 { /* oct */
8427 return -KEY_oct;
8428 }
8429
8430 goto unknown;
8431
8432 case 'r':
8433 if (name[2] == 'd')
8434 { /* ord */
8435 return -KEY_ord;
8436 }
8437
8438 goto unknown;
8439
8440 case 'u':
8441 if (name[2] == 'r')
8442 { /* our */
8443 return KEY_our;
8444 }
8445
8446 goto unknown;
8447
8448 default:
8449 goto unknown;
8450 }
8451
8452 case 'p':
8453 if (name[1] == 'o')
8454 {
8455 switch (name[2])
8456 {
8457 case 'p':
8458 { /* pop */
8459 return -KEY_pop;
8460 }
8461
4c3bbe0f
MHM
8462 case 's':
8463 { /* pos */
8464 return KEY_pos;
8465 }
8466
4c3bbe0f
MHM
8467 default:
8468 goto unknown;
8469 }
8470 }
8471
8472 goto unknown;
8473
8474 case 'r':
8475 if (name[1] == 'e' &&
8476 name[2] == 'f')
8477 { /* ref */
8478 return -KEY_ref;
8479 }
8480
8481 goto unknown;
8482
8483 case 's':
8484 switch (name[1])
8485 {
0d863452
RH
8486 case 'a':
8487 if (name[2] == 'y')
8488 { /* say */
e3e804c9 8489 return (all_keywords || FEATURE_IS_ENABLED("say") ? KEY_say : 0);
0d863452
RH
8490 }
8491
8492 goto unknown;
8493
4c3bbe0f
MHM
8494 case 'i':
8495 if (name[2] == 'n')
8496 { /* sin */
8497 return -KEY_sin;
8498 }
8499
8500 goto unknown;
8501
8502 case 'u':
8503 if (name[2] == 'b')
8504 { /* sub */
8505 return KEY_sub;
8506 }
8507
8508 goto unknown;
8509
8510 default:
8511 goto unknown;
8512 }
8513
8514 case 't':
8515 if (name[1] == 'i' &&
8516 name[2] == 'e')
8517 { /* tie */
1db4d195 8518 return -KEY_tie;
4c3bbe0f
MHM
8519 }
8520
8521 goto unknown;
8522
8523 case 'u':
8524 if (name[1] == 's' &&
8525 name[2] == 'e')
8526 { /* use */
8527 return KEY_use;
8528 }
8529
8530 goto unknown;
8531
8532 case 'v':
8533 if (name[1] == 'e' &&
8534 name[2] == 'c')
8535 { /* vec */
8536 return -KEY_vec;
8537 }
8538
8539 goto unknown;
8540
8541 case 'x':
8542 if (name[1] == 'o' &&
8543 name[2] == 'r')
8544 { /* xor */
8545 return -KEY_xor;
8546 }
8547
8548 goto unknown;
8549
8550 default:
8551 goto unknown;
e2e1dd5a 8552 }
4c3bbe0f 8553
0d863452 8554 case 4: /* 41 tokens of length 4 */
4c3bbe0f 8555 switch (name[0])
e2e1dd5a 8556 {
4c3bbe0f
MHM
8557 case 'C':
8558 if (name[1] == 'O' &&
8559 name[2] == 'R' &&
8560 name[3] == 'E')
8561 { /* CORE */
8562 return -KEY_CORE;
8563 }
8564
8565 goto unknown;
8566
8567 case 'I':
8568 if (name[1] == 'N' &&
8569 name[2] == 'I' &&
8570 name[3] == 'T')
8571 { /* INIT */
8572 return KEY_INIT;
8573 }
8574
8575 goto unknown;
8576
8577 case 'b':
8578 if (name[1] == 'i' &&
8579 name[2] == 'n' &&
8580 name[3] == 'd')
8581 { /* bind */
8582 return -KEY_bind;
8583 }
8584
8585 goto unknown;
8586
8587 case 'c':
8588 if (name[1] == 'h' &&
8589 name[2] == 'o' &&
8590 name[3] == 'p')
8591 { /* chop */
8592 return -KEY_chop;
8593 }
8594
8595 goto unknown;
8596
8597 case 'd':
8598 if (name[1] == 'u' &&
8599 name[2] == 'm' &&
8600 name[3] == 'p')
8601 { /* dump */
8602 return -KEY_dump;
8603 }
8604
8605 goto unknown;
8606
8607 case 'e':
8608 switch (name[1])
8609 {
8610 case 'a':
8611 if (name[2] == 'c' &&
8612 name[3] == 'h')
8613 { /* each */
8614 return -KEY_each;
8615 }
8616
8617 goto unknown;
8618
8619 case 'l':
8620 if (name[2] == 's' &&
8621 name[3] == 'e')
8622 { /* else */
8623 return KEY_else;
8624 }
8625
8626 goto unknown;
8627
8628 case 'v':
8629 if (name[2] == 'a' &&
8630 name[3] == 'l')
8631 { /* eval */
8632 return KEY_eval;
8633 }
8634
8635 goto unknown;
8636
8637 case 'x':
8638 switch (name[2])
8639 {
8640 case 'e':
8641 if (name[3] == 'c')
8642 { /* exec */
8643 return -KEY_exec;
8644 }
8645
8646 goto unknown;
8647
8648 case 'i':
8649 if (name[3] == 't')
8650 { /* exit */
8651 return -KEY_exit;
8652 }
8653
8654 goto unknown;
8655
8656 default:
8657 goto unknown;
8658 }
8659
8660 default:
8661 goto unknown;
8662 }
8663
8664 case 'f':
8665 if (name[1] == 'o' &&
8666 name[2] == 'r' &&
8667 name[3] == 'k')
8668 { /* fork */
8669 return -KEY_fork;
8670 }
8671
8672 goto unknown;
8673
8674 case 'g':
8675 switch (name[1])
8676 {
8677 case 'e':
8678 if (name[2] == 't' &&
8679 name[3] == 'c')
8680 { /* getc */
8681 return -KEY_getc;
8682 }
8683
8684 goto unknown;
8685
8686 case 'l':
8687 if (name[2] == 'o' &&
8688 name[3] == 'b')
8689 { /* glob */
8690 return KEY_glob;
8691 }
8692
8693 goto unknown;
8694
8695 case 'o':
8696 if (name[2] == 't' &&
8697 name[3] == 'o')
8698 { /* goto */
8699 return KEY_goto;
8700 }
8701
8702 goto unknown;
8703
8704 case 'r':
8705 if (name[2] == 'e' &&
8706 name[3] == 'p')
8707 { /* grep */
8708 return KEY_grep;
8709 }
8710
8711 goto unknown;
8712
8713 default:
8714 goto unknown;
8715 }
8716
8717 case 'j':
8718 if (name[1] == 'o' &&
8719 name[2] == 'i' &&
8720 name[3] == 'n')
8721 { /* join */
8722 return -KEY_join;
8723 }
8724
8725 goto unknown;
8726
8727 case 'k':
8728 switch (name[1])
8729 {
8730 case 'e':
8731 if (name[2] == 'y' &&
8732 name[3] == 's')
8733 { /* keys */
8734 return -KEY_keys;
8735 }
8736
8737 goto unknown;
8738
8739 case 'i':
8740 if (name[2] == 'l' &&
8741 name[3] == 'l')
8742 { /* kill */
8743 return -KEY_kill;
8744 }
8745
8746 goto unknown;
8747
8748 default:
8749 goto unknown;
8750 }
8751
8752 case 'l':
8753 switch (name[1])
8754 {
8755 case 'a':
8756 if (name[2] == 's' &&
8757 name[3] == 't')
8758 { /* last */
8759 return KEY_last;
8760 }
8761
8762 goto unknown;
8763
8764 case 'i':
8765 if (name[2] == 'n' &&
8766 name[3] == 'k')
8767 { /* link */
8768 return -KEY_link;
8769 }
8770
8771 goto unknown;
8772
8773 case 'o':
8774 if (name[2] == 'c' &&
8775 name[3] == 'k')
8776 { /* lock */
8777 return -KEY_lock;
8778 }
8779
8780 goto unknown;
8781
8782 default:
8783 goto unknown;
8784 }
8785
8786 case 'n':
8787 if (name[1] == 'e' &&
8788 name[2] == 'x' &&
8789 name[3] == 't')
8790 { /* next */
8791 return KEY_next;
8792 }
8793
8794 goto unknown;
8795
8796 case 'o':
8797 if (name[1] == 'p' &&
8798 name[2] == 'e' &&
8799 name[3] == 'n')
8800 { /* open */
8801 return -KEY_open;
8802 }
8803
8804 goto unknown;
8805
8806 case 'p':
8807 switch (name[1])
8808 {
8809 case 'a':
8810 if (name[2] == 'c' &&
8811 name[3] == 'k')
8812 { /* pack */
8813 return -KEY_pack;
8814 }
8815
8816 goto unknown;
8817
8818 case 'i':
8819 if (name[2] == 'p' &&
8820 name[3] == 'e')
8821 { /* pipe */
8822 return -KEY_pipe;
8823 }
8824
8825 goto unknown;
8826
8827 case 'u':
8828 if (name[2] == 's' &&
8829 name[3] == 'h')
8830 { /* push */
8831 return -KEY_push;
8832 }
8833
8834 goto unknown;
8835
8836 default:
8837 goto unknown;
8838 }
8839
8840 case 'r':
8841 switch (name[1])
8842 {
8843 case 'a':
8844 if (name[2] == 'n' &&
8845 name[3] == 'd')
8846 { /* rand */
8847 return -KEY_rand;
8848 }
8849
8850 goto unknown;
8851
8852 case 'e':
8853 switch (name[2])
8854 {
8855 case 'a':
8856 if (name[3] == 'd')
8857 { /* read */
8858 return -KEY_read;
8859 }
8860
8861 goto unknown;
8862
8863 case 'c':
8864 if (name[3] == 'v')
8865 { /* recv */
8866 return -KEY_recv;
8867 }
8868
8869 goto unknown;
8870
8871 case 'd':
8872 if (name[3] == 'o')
8873 { /* redo */
8874 return KEY_redo;
8875 }
8876
8877 goto unknown;
8878
8879 default:
8880 goto unknown;
8881 }
8882
8883 default:
8884 goto unknown;
8885 }
8886
8887 case 's':
8888 switch (name[1])
8889 {
8890 case 'e':
8891 switch (name[2])
8892 {
8893 case 'e':
8894 if (name[3] == 'k')
8895 { /* seek */
8896 return -KEY_seek;
8897 }
8898
8899 goto unknown;
8900
8901 case 'n':
8902 if (name[3] == 'd')
8903 { /* send */
8904 return -KEY_send;
8905 }
8906
8907 goto unknown;
8908
8909 default:
8910 goto unknown;
8911 }
8912
8913 case 'o':
8914 if (name[2] == 'r' &&
8915 name[3] == 't')
8916 { /* sort */
8917 return KEY_sort;
8918 }
8919
8920 goto unknown;
8921
8922 case 'q':
8923 if (name[2] == 'r' &&
8924 name[3] == 't')
8925 { /* sqrt */
8926 return -KEY_sqrt;
8927 }
8928
8929 goto unknown;
8930
8931 case 't':
8932 if (name[2] == 'a' &&
8933 name[3] == 't')
8934 { /* stat */
8935 return -KEY_stat;
8936 }
8937
8938 goto unknown;
8939
8940 default:
8941 goto unknown;
8942 }
8943
8944 case 't':
8945 switch (name[1])
8946 {
8947 case 'e':
8948 if (name[2] == 'l' &&
8949 name[3] == 'l')
8950 { /* tell */
8951 return -KEY_tell;
8952 }
8953
8954 goto unknown;
8955
8956 case 'i':
8957 switch (name[2])
8958 {
8959 case 'e':
8960 if (name[3] == 'd')
8961 { /* tied */
1db4d195 8962 return -KEY_tied;
4c3bbe0f
MHM
8963 }
8964
8965 goto unknown;
8966
8967 case 'm':
8968 if (name[3] == 'e')
8969 { /* time */
8970 return -KEY_time;
8971 }
8972
8973 goto unknown;
8974
8975 default:
8976 goto unknown;
8977 }
8978
8979 default:
8980 goto unknown;
8981 }
8982
8983 case 'w':
0d863452 8984 switch (name[1])
4c3bbe0f 8985 {
0d863452 8986 case 'a':
952306ac
RGS
8987 switch (name[2])
8988 {
8989 case 'i':
8990 if (name[3] == 't')
8991 { /* wait */
8992 return -KEY_wait;
8993 }
4c3bbe0f 8994
952306ac 8995 goto unknown;
4c3bbe0f 8996
952306ac
RGS
8997 case 'r':
8998 if (name[3] == 'n')
8999 { /* warn */
9000 return -KEY_warn;
9001 }
4c3bbe0f 9002
952306ac 9003 goto unknown;
4c3bbe0f 9004
952306ac
RGS
9005 default:
9006 goto unknown;
9007 }
0d863452
RH
9008
9009 case 'h':
9010 if (name[2] == 'e' &&
9011 name[3] == 'n')
9012 { /* when */
5458a98a 9013 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_when : 0);
952306ac 9014 }
4c3bbe0f 9015
952306ac 9016 goto unknown;
4c3bbe0f 9017
952306ac
RGS
9018 default:
9019 goto unknown;
9020 }
4c3bbe0f 9021
0d863452
RH
9022 default:
9023 goto unknown;
9024 }
9025
952306ac 9026 case 5: /* 39 tokens of length 5 */
4c3bbe0f 9027 switch (name[0])
e2e1dd5a 9028 {
4c3bbe0f
MHM
9029 case 'B':
9030 if (name[1] == 'E' &&
9031 name[2] == 'G' &&
9032 name[3] == 'I' &&
9033 name[4] == 'N')
9034 { /* BEGIN */
9035 return KEY_BEGIN;
9036 }
9037
9038 goto unknown;
9039
9040 case 'C':
9041 if (name[1] == 'H' &&
9042 name[2] == 'E' &&
9043 name[3] == 'C' &&
9044 name[4] == 'K')
9045 { /* CHECK */
9046 return KEY_CHECK;
9047 }
9048
9049 goto unknown;
9050
9051 case 'a':
9052 switch (name[1])
9053 {
9054 case 'l':
9055 if (name[2] == 'a' &&
9056 name[3] == 'r' &&
9057 name[4] == 'm')
9058 { /* alarm */
9059 return -KEY_alarm;
9060 }
9061
9062 goto unknown;
9063
9064 case 't':
9065 if (name[2] == 'a' &&
9066 name[3] == 'n' &&
9067 name[4] == '2')
9068 { /* atan2 */
9069 return -KEY_atan2;
9070 }
9071
9072 goto unknown;
9073
9074 default:
9075 goto unknown;
9076 }
9077
9078 case 'b':
0d863452
RH
9079 switch (name[1])
9080 {
9081 case 'l':
9082 if (name[2] == 'e' &&
952306ac
RGS
9083 name[3] == 's' &&
9084 name[4] == 's')
9085 { /* bless */
9086 return -KEY_bless;
9087 }
4c3bbe0f 9088
952306ac 9089 goto unknown;
4c3bbe0f 9090
0d863452
RH
9091 case 'r':
9092 if (name[2] == 'e' &&
9093 name[3] == 'a' &&
9094 name[4] == 'k')
9095 { /* break */
5458a98a 9096 return (all_keywords || FEATURE_IS_ENABLED("switch") ? -KEY_break : 0);
0d863452
RH
9097 }
9098
9099 goto unknown;
9100
9101 default:
9102 goto unknown;
9103 }
9104
4c3bbe0f
MHM
9105 case 'c':
9106 switch (name[1])
9107 {
9108 case 'h':
9109 switch (name[2])
9110 {
9111 case 'd':
9112 if (name[3] == 'i' &&
9113 name[4] == 'r')
9114 { /* chdir */
9115 return -KEY_chdir;
9116 }
9117
9118 goto unknown;
9119
9120 case 'm':
9121 if (name[3] == 'o' &&
9122 name[4] == 'd')
9123 { /* chmod */
9124 return -KEY_chmod;
9125 }
9126
9127 goto unknown;
9128
9129 case 'o':
9130 switch (name[3])
9131 {
9132 case 'm':
9133 if (name[4] == 'p')
9134 { /* chomp */
9135 return -KEY_chomp;
9136 }
9137
9138 goto unknown;
9139
9140 case 'w':
9141 if (name[4] == 'n')
9142 { /* chown */
9143 return -KEY_chown;
9144 }
9145
9146 goto unknown;
9147
9148 default:
9149 goto unknown;
9150 }
9151
9152 default:
9153 goto unknown;
9154 }
9155
9156 case 'l':
9157 if (name[2] == 'o' &&
9158 name[3] == 's' &&
9159 name[4] == 'e')
9160 { /* close */
9161 return -KEY_close;
9162 }
9163
9164 goto unknown;
9165
9166 case 'r':
9167 if (name[2] == 'y' &&
9168 name[3] == 'p' &&
9169 name[4] == 't')
9170 { /* crypt */
9171 return -KEY_crypt;
9172 }
9173
9174 goto unknown;
9175
9176 default:
9177 goto unknown;
9178 }
9179
9180 case 'e':
9181 if (name[1] == 'l' &&
9182 name[2] == 's' &&
9183 name[3] == 'i' &&
9184 name[4] == 'f')
9185 { /* elsif */
9186 return KEY_elsif;
9187 }
9188
9189 goto unknown;
9190
9191 case 'f':
9192 switch (name[1])
9193 {
9194 case 'c':
9195 if (name[2] == 'n' &&
9196 name[3] == 't' &&
9197 name[4] == 'l')
9198 { /* fcntl */
9199 return -KEY_fcntl;
9200 }
9201
9202 goto unknown;
9203
9204 case 'l':
9205 if (name[2] == 'o' &&
9206 name[3] == 'c' &&
9207 name[4] == 'k')
9208 { /* flock */
9209 return -KEY_flock;
9210 }
9211
9212 goto unknown;
9213
9214 default:
9215 goto unknown;
9216 }
9217
0d863452
RH
9218 case 'g':
9219 if (name[1] == 'i' &&
9220 name[2] == 'v' &&
9221 name[3] == 'e' &&
9222 name[4] == 'n')
9223 { /* given */
5458a98a 9224 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_given : 0);
0d863452
RH
9225 }
9226
9227 goto unknown;
9228
4c3bbe0f
MHM
9229 case 'i':
9230 switch (name[1])
9231 {
9232 case 'n':
9233 if (name[2] == 'd' &&
9234 name[3] == 'e' &&
9235 name[4] == 'x')
9236 { /* index */
9237 return -KEY_index;
9238 }
9239
9240 goto unknown;
9241
9242 case 'o':
9243 if (name[2] == 'c' &&
9244 name[3] == 't' &&
9245 name[4] == 'l')
9246 { /* ioctl */
9247 return -KEY_ioctl;
9248 }
9249
9250 goto unknown;
9251
9252 default:
9253 goto unknown;
9254 }
9255
9256 case 'l':
9257 switch (name[1])
9258 {
9259 case 'o':
9260 if (name[2] == 'c' &&
9261 name[3] == 'a' &&
9262 name[4] == 'l')
9263 { /* local */
9264 return KEY_local;
9265 }
9266
9267 goto unknown;
9268
9269 case 's':
9270 if (name[2] == 't' &&
9271 name[3] == 'a' &&
9272 name[4] == 't')
9273 { /* lstat */
9274 return -KEY_lstat;
9275 }
9276
9277 goto unknown;
9278
9279 default:
9280 goto unknown;
9281 }
9282
9283 case 'm':
9284 if (name[1] == 'k' &&
9285 name[2] == 'd' &&
9286 name[3] == 'i' &&
9287 name[4] == 'r')
9288 { /* mkdir */
9289 return -KEY_mkdir;
9290 }
9291
9292 goto unknown;
9293
9294 case 'p':
9295 if (name[1] == 'r' &&
9296 name[2] == 'i' &&
9297 name[3] == 'n' &&
9298 name[4] == 't')
9299 { /* print */
9300 return KEY_print;
9301 }
9302
9303 goto unknown;
9304
9305 case 'r':
9306 switch (name[1])
9307 {
9308 case 'e':
9309 if (name[2] == 's' &&
9310 name[3] == 'e' &&
9311 name[4] == 't')
9312 { /* reset */
9313 return -KEY_reset;
9314 }
9315
9316 goto unknown;
9317
9318 case 'm':
9319 if (name[2] == 'd' &&
9320 name[3] == 'i' &&
9321 name[4] == 'r')
9322 { /* rmdir */
9323 return -KEY_rmdir;
9324 }
9325
9326 goto unknown;
9327
9328 default:
9329 goto unknown;
9330 }
9331
9332 case 's':
9333 switch (name[1])
9334 {
9335 case 'e':
9336 if (name[2] == 'm' &&
9337 name[3] == 'o' &&
9338 name[4] == 'p')
9339 { /* semop */
9340 return -KEY_semop;
9341 }
9342
9343 goto unknown;
9344
9345 case 'h':
9346 if (name[2] == 'i' &&
9347 name[3] == 'f' &&
9348 name[4] == 't')
9349 { /* shift */
9350 return -KEY_shift;
9351 }
9352
9353 goto unknown;
9354
9355 case 'l':
9356 if (name[2] == 'e' &&
9357 name[3] == 'e' &&
9358 name[4] == 'p')
9359 { /* sleep */
9360 return -KEY_sleep;
9361 }
9362
9363 goto unknown;
9364
9365 case 'p':
9366 if (name[2] == 'l' &&
9367 name[3] == 'i' &&
9368 name[4] == 't')
9369 { /* split */
9370 return KEY_split;
9371 }
9372
9373 goto unknown;
9374
9375 case 'r':
9376 if (name[2] == 'a' &&
9377 name[3] == 'n' &&
9378 name[4] == 'd')
9379 { /* srand */
9380 return -KEY_srand;
9381 }
9382
9383 goto unknown;
9384
9385 case 't':
952306ac
RGS
9386 switch (name[2])
9387 {
9388 case 'a':
9389 if (name[3] == 't' &&
9390 name[4] == 'e')
9391 { /* state */
5458a98a 9392 return (all_keywords || FEATURE_IS_ENABLED("state") ? KEY_state : 0);
952306ac 9393 }
4c3bbe0f 9394
952306ac
RGS
9395 goto unknown;
9396
9397 case 'u':
9398 if (name[3] == 'd' &&
9399 name[4] == 'y')
9400 { /* study */
9401 return KEY_study;
9402 }
9403
9404 goto unknown;
9405
9406 default:
9407 goto unknown;
9408 }
4c3bbe0f
MHM
9409
9410 default:
9411 goto unknown;
9412 }
9413
9414 case 't':
9415 if (name[1] == 'i' &&
9416 name[2] == 'm' &&
9417 name[3] == 'e' &&
9418 name[4] == 's')
9419 { /* times */
9420 return -KEY_times;
9421 }
9422
9423 goto unknown;
9424
9425 case 'u':
9426 switch (name[1])
9427 {
9428 case 'm':
9429 if (name[2] == 'a' &&
9430 name[3] == 's' &&
9431 name[4] == 'k')
9432 { /* umask */
9433 return -KEY_umask;
9434 }
9435
9436 goto unknown;
9437
9438 case 'n':
9439 switch (name[2])
9440 {
9441 case 'd':
9442 if (name[3] == 'e' &&
9443 name[4] == 'f')
9444 { /* undef */
9445 return KEY_undef;
9446 }
9447
9448 goto unknown;
9449
9450 case 't':
9451 if (name[3] == 'i')
9452 {
9453 switch (name[4])
9454 {
9455 case 'e':
9456 { /* untie */
1db4d195 9457 return -KEY_untie;
4c3bbe0f
MHM
9458 }
9459
4c3bbe0f
MHM
9460 case 'l':
9461 { /* until */
9462 return KEY_until;
9463 }
9464
4c3bbe0f
MHM
9465 default:
9466 goto unknown;
9467 }
9468 }
9469
9470 goto unknown;
9471
9472 default:
9473 goto unknown;
9474 }
9475
9476 case 't':
9477 if (name[2] == 'i' &&
9478 name[3] == 'm' &&
9479 name[4] == 'e')
9480 { /* utime */
9481 return -KEY_utime;
9482 }
9483
9484 goto unknown;
9485
9486 default:
9487 goto unknown;
9488 }
9489
9490 case 'w':
9491 switch (name[1])
9492 {
9493 case 'h':
9494 if (name[2] == 'i' &&
9495 name[3] == 'l' &&
9496 name[4] == 'e')
9497 { /* while */
9498 return KEY_while;
9499 }
9500
9501 goto unknown;
9502
9503 case 'r':
9504 if (name[2] == 'i' &&
9505 name[3] == 't' &&
9506 name[4] == 'e')
9507 { /* write */
9508 return -KEY_write;
9509 }
9510
9511 goto unknown;
9512
9513 default:
9514 goto unknown;
9515 }
9516
9517 default:
9518 goto unknown;
e2e1dd5a 9519 }
4c3bbe0f
MHM
9520
9521 case 6: /* 33 tokens of length 6 */
9522 switch (name[0])
9523 {
9524 case 'a':
9525 if (name[1] == 'c' &&
9526 name[2] == 'c' &&
9527 name[3] == 'e' &&
9528 name[4] == 'p' &&
9529 name[5] == 't')
9530 { /* accept */
9531 return -KEY_accept;
9532 }
9533
9534 goto unknown;
9535
9536 case 'c':
9537 switch (name[1])
9538 {
9539 case 'a':
9540 if (name[2] == 'l' &&
9541 name[3] == 'l' &&
9542 name[4] == 'e' &&
9543 name[5] == 'r')
9544 { /* caller */
9545 return -KEY_caller;
9546 }
9547
9548 goto unknown;
9549
9550 case 'h':
9551 if (name[2] == 'r' &&
9552 name[3] == 'o' &&
9553 name[4] == 'o' &&
9554 name[5] == 't')
9555 { /* chroot */
9556 return -KEY_chroot;
9557 }
9558
9559 goto unknown;
9560
9561 default:
9562 goto unknown;
9563 }
9564
9565 case 'd':
9566 if (name[1] == 'e' &&
9567 name[2] == 'l' &&
9568 name[3] == 'e' &&
9569 name[4] == 't' &&
9570 name[5] == 'e')
9571 { /* delete */
9572 return KEY_delete;
9573 }
9574
9575 goto unknown;
9576
9577 case 'e':
9578 switch (name[1])
9579 {
9580 case 'l':
9581 if (name[2] == 's' &&
9582 name[3] == 'e' &&
9583 name[4] == 'i' &&
9584 name[5] == 'f')
9585 { /* elseif */
9b387841 9586 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "elseif should be elsif");
4c3bbe0f
MHM
9587 }
9588
9589 goto unknown;
9590
9591 case 'x':
9592 if (name[2] == 'i' &&
9593 name[3] == 's' &&
9594 name[4] == 't' &&
9595 name[5] == 's')
9596 { /* exists */
9597 return KEY_exists;
9598 }
9599
9600 goto unknown;
9601
9602 default:
9603 goto unknown;
9604 }
9605
9606 case 'f':
9607 switch (name[1])
9608 {
9609 case 'i':
9610 if (name[2] == 'l' &&
9611 name[3] == 'e' &&
9612 name[4] == 'n' &&
9613 name[5] == 'o')
9614 { /* fileno */
9615 return -KEY_fileno;
9616 }
9617
9618 goto unknown;
9619
9620 case 'o':
9621 if (name[2] == 'r' &&
9622 name[3] == 'm' &&
9623 name[4] == 'a' &&
9624 name[5] == 't')
9625 { /* format */
9626 return KEY_format;
9627 }
9628
9629 goto unknown;
9630
9631 default:
9632 goto unknown;
9633 }
9634
9635 case 'g':
9636 if (name[1] == 'm' &&
9637 name[2] == 't' &&
9638 name[3] == 'i' &&
9639 name[4] == 'm' &&
9640 name[5] == 'e')
9641 { /* gmtime */
9642 return -KEY_gmtime;
9643 }
9644
9645 goto unknown;
9646
9647 case 'l':
9648 switch (name[1])
9649 {
9650 case 'e':
9651 if (name[2] == 'n' &&
9652 name[3] == 'g' &&
9653 name[4] == 't' &&
9654 name[5] == 'h')
9655 { /* length */
9656 return -KEY_length;
9657 }
9658
9659 goto unknown;
9660
9661 case 'i':
9662 if (name[2] == 's' &&
9663 name[3] == 't' &&
9664 name[4] == 'e' &&
9665 name[5] == 'n')
9666 { /* listen */
9667 return -KEY_listen;
9668 }
9669
9670 goto unknown;
9671
9672 default:
9673 goto unknown;
9674 }
9675
9676 case 'm':
9677 if (name[1] == 's' &&
9678 name[2] == 'g')
9679 {
9680 switch (name[3])
9681 {
9682 case 'c':
9683 if (name[4] == 't' &&
9684 name[5] == 'l')
9685 { /* msgctl */
9686 return -KEY_msgctl;
9687 }
9688
9689 goto unknown;
9690
9691 case 'g':
9692 if (name[4] == 'e' &&
9693 name[5] == 't')
9694 { /* msgget */
9695 return -KEY_msgget;
9696 }
9697
9698 goto unknown;
9699
9700 case 'r':
9701 if (name[4] == 'c' &&
9702 name[5] == 'v')
9703 { /* msgrcv */
9704 return -KEY_msgrcv;
9705 }
9706
9707 goto unknown;
9708
9709 case 's':
9710 if (name[4] == 'n' &&
9711 name[5] == 'd')
9712 { /* msgsnd */
9713 return -KEY_msgsnd;
9714 }
9715
9716 goto unknown;
9717
9718 default:
9719 goto unknown;
9720 }
9721 }
9722
9723 goto unknown;
9724
9725 case 'p':
9726 if (name[1] == 'r' &&
9727 name[2] == 'i' &&
9728 name[3] == 'n' &&
9729 name[4] == 't' &&
9730 name[5] == 'f')
9731 { /* printf */
9732 return KEY_printf;
9733 }
9734
9735 goto unknown;
9736
9737 case 'r':
9738 switch (name[1])
9739 {
9740 case 'e':
9741 switch (name[2])
9742 {
9743 case 'n':
9744 if (name[3] == 'a' &&
9745 name[4] == 'm' &&
9746 name[5] == 'e')
9747 { /* rename */
9748 return -KEY_rename;
9749 }
9750
9751 goto unknown;
9752
9753 case 't':
9754 if (name[3] == 'u' &&
9755 name[4] == 'r' &&
9756 name[5] == 'n')
9757 { /* return */
9758 return KEY_return;
9759 }
9760
9761 goto unknown;
9762
9763 default:
9764 goto unknown;
9765 }
9766
9767 case 'i':
9768 if (name[2] == 'n' &&
9769 name[3] == 'd' &&
9770 name[4] == 'e' &&
9771 name[5] == 'x')
9772 { /* rindex */
9773 return -KEY_rindex;
9774 }
9775
9776 goto unknown;
9777
9778 default:
9779 goto unknown;
9780 }
9781
9782 case 's':
9783 switch (name[1])
9784 {
9785 case 'c':
9786 if (name[2] == 'a' &&
9787 name[3] == 'l' &&
9788 name[4] == 'a' &&
9789 name[5] == 'r')
9790 { /* scalar */
9791 return KEY_scalar;
9792 }
9793
9794 goto unknown;
9795
9796 case 'e':
9797 switch (name[2])
9798 {
9799 case 'l':
9800 if (name[3] == 'e' &&
9801 name[4] == 'c' &&
9802 name[5] == 't')
9803 { /* select */
9804 return -KEY_select;
9805 }
9806
9807 goto unknown;
9808
9809 case 'm':
9810 switch (name[3])
9811 {
9812 case 'c':
9813 if (name[4] == 't' &&
9814 name[5] == 'l')
9815 { /* semctl */
9816 return -KEY_semctl;
9817 }
9818
9819 goto unknown;
9820
9821 case 'g':
9822 if (name[4] == 'e' &&
9823 name[5] == 't')
9824 { /* semget */
9825 return -KEY_semget;
9826 }
9827
9828 goto unknown;
9829
9830 default:
9831 goto unknown;
9832 }
9833
9834 default:
9835 goto unknown;
9836 }
9837
9838 case 'h':
9839 if (name[2] == 'm')
9840 {
9841 switch (name[3])
9842 {
9843 case 'c':
9844 if (name[4] == 't' &&
9845 name[5] == 'l')
9846 { /* shmctl */
9847 return -KEY_shmctl;
9848 }
9849
9850 goto unknown;
9851
9852 case 'g':
9853 if (name[4] == 'e' &&
9854 name[5] == 't')
9855 { /* shmget */
9856 return -KEY_shmget;
9857 }
9858
9859 goto unknown;
9860
9861 default:
9862 goto unknown;
9863 }
9864 }
9865
9866 goto unknown;
9867
9868 case 'o':
9869 if (name[2] == 'c' &&
9870 name[3] == 'k' &&
9871 name[4] == 'e' &&
9872 name[5] == 't')
9873 { /* socket */
9874 return -KEY_socket;
9875 }
9876
9877 goto unknown;
9878
9879 case 'p':
9880 if (name[2] == 'l' &&
9881 name[3] == 'i' &&
9882 name[4] == 'c' &&
9883 name[5] == 'e')
9884 { /* splice */
9885 return -KEY_splice;
9886 }
9887
9888 goto unknown;
9889
9890 case 'u':
9891 if (name[2] == 'b' &&
9892 name[3] == 's' &&
9893 name[4] == 't' &&
9894 name[5] == 'r')
9895 { /* substr */
9896 return -KEY_substr;
9897 }
9898
9899 goto unknown;
9900
9901 case 'y':
9902 if (name[2] == 's' &&
9903 name[3] == 't' &&
9904 name[4] == 'e' &&
9905 name[5] == 'm')
9906 { /* system */
9907 return -KEY_system;
9908 }
9909
9910 goto unknown;
9911
9912 default:
9913 goto unknown;
9914 }
9915
9916 case 'u':
9917 if (name[1] == 'n')
9918 {
9919 switch (name[2])
9920 {
9921 case 'l':
9922 switch (name[3])
9923 {
9924 case 'e':
9925 if (name[4] == 's' &&
9926 name[5] == 's')
9927 { /* unless */
9928 return KEY_unless;
9929 }
9930
9931 goto unknown;
9932
9933 case 'i':
9934 if (name[4] == 'n' &&
9935 name[5] == 'k')
9936 { /* unlink */
9937 return -KEY_unlink;
9938 }
9939
9940 goto unknown;
9941
9942 default:
9943 goto unknown;
9944 }
9945
9946 case 'p':
9947 if (name[3] == 'a' &&
9948 name[4] == 'c' &&
9949 name[5] == 'k')
9950 { /* unpack */
9951 return -KEY_unpack;
9952 }
9953
9954 goto unknown;
9955
9956 default:
9957 goto unknown;
9958 }
9959 }
9960
9961 goto unknown;
9962
9963 case 'v':
9964 if (name[1] == 'a' &&
9965 name[2] == 'l' &&
9966 name[3] == 'u' &&
9967 name[4] == 'e' &&
9968 name[5] == 's')
9969 { /* values */
9970 return -KEY_values;
9971 }
9972
9973 goto unknown;
9974
9975 default:
9976 goto unknown;
e2e1dd5a 9977 }
4c3bbe0f 9978
0d863452 9979 case 7: /* 29 tokens of length 7 */
4c3bbe0f
MHM
9980 switch (name[0])
9981 {
9982 case 'D':
9983 if (name[1] == 'E' &&
9984 name[2] == 'S' &&
9985 name[3] == 'T' &&
9986 name[4] == 'R' &&
9987 name[5] == 'O' &&
9988 name[6] == 'Y')
9989 { /* DESTROY */
9990 return KEY_DESTROY;
9991 }
9992
9993 goto unknown;
9994
9995 case '_':
9996 if (name[1] == '_' &&
9997 name[2] == 'E' &&
9998 name[3] == 'N' &&
9999 name[4] == 'D' &&
10000 name[5] == '_' &&
10001 name[6] == '_')
10002 { /* __END__ */
10003 return KEY___END__;
10004 }
10005
10006 goto unknown;
10007
10008 case 'b':
10009 if (name[1] == 'i' &&
10010 name[2] == 'n' &&
10011 name[3] == 'm' &&
10012 name[4] == 'o' &&
10013 name[5] == 'd' &&
10014 name[6] == 'e')
10015 { /* binmode */
10016 return -KEY_binmode;
10017 }
10018
10019 goto unknown;
10020
10021 case 'c':
10022 if (name[1] == 'o' &&
10023 name[2] == 'n' &&
10024 name[3] == 'n' &&
10025 name[4] == 'e' &&
10026 name[5] == 'c' &&
10027 name[6] == 't')
10028 { /* connect */
10029 return -KEY_connect;
10030 }
10031
10032 goto unknown;
10033
10034 case 'd':
10035 switch (name[1])
10036 {
10037 case 'b':
10038 if (name[2] == 'm' &&
10039 name[3] == 'o' &&
10040 name[4] == 'p' &&
10041 name[5] == 'e' &&
10042 name[6] == 'n')
10043 { /* dbmopen */
10044 return -KEY_dbmopen;
10045 }
10046
10047 goto unknown;
10048
10049 case 'e':
0d863452
RH
10050 if (name[2] == 'f')
10051 {
10052 switch (name[3])
10053 {
10054 case 'a':
10055 if (name[4] == 'u' &&
10056 name[5] == 'l' &&
10057 name[6] == 't')
10058 { /* default */
5458a98a 10059 return (all_keywords || FEATURE_IS_ENABLED("switch") ? KEY_default : 0);
0d863452
RH
10060 }
10061
10062 goto unknown;
10063
10064 case 'i':
10065 if (name[4] == 'n' &&
952306ac
RGS
10066 name[5] == 'e' &&
10067 name[6] == 'd')
10068 { /* defined */
10069 return KEY_defined;
10070 }
4c3bbe0f 10071
952306ac 10072 goto unknown;
4c3bbe0f 10073
952306ac
RGS
10074 default:
10075 goto unknown;
10076 }
0d863452
RH
10077 }
10078
10079 goto unknown;
10080
10081 default:
10082 goto unknown;
10083 }
4c3bbe0f
MHM
10084
10085 case 'f':
10086 if (name[1] == 'o' &&
10087 name[2] == 'r' &&
10088 name[3] == 'e' &&
10089 name[4] == 'a' &&
10090 name[5] == 'c' &&
10091 name[6] == 'h')
10092 { /* foreach */
10093 return KEY_foreach;
10094 }
10095
10096 goto unknown;
10097
10098 case 'g':
10099 if (name[1] == 'e' &&
10100 name[2] == 't' &&
10101 name[3] == 'p')
10102 {
10103 switch (name[4])
10104 {
10105 case 'g':
10106 if (name[5] == 'r' &&
10107 name[6] == 'p')
10108 { /* getpgrp */
10109 return -KEY_getpgrp;
10110 }
10111
10112 goto unknown;
10113
10114 case 'p':
10115 if (name[5] == 'i' &&
10116 name[6] == 'd')
10117 { /* getppid */
10118 return -KEY_getppid;
10119 }
10120
10121 goto unknown;
10122
10123 default:
10124 goto unknown;
10125 }
10126 }
10127
10128 goto unknown;
10129
10130 case 'l':
10131 if (name[1] == 'c' &&
10132 name[2] == 'f' &&
10133 name[3] == 'i' &&
10134 name[4] == 'r' &&
10135 name[5] == 's' &&
10136 name[6] == 't')
10137 { /* lcfirst */
10138 return -KEY_lcfirst;
10139 }
10140
10141 goto unknown;
10142
10143 case 'o':
10144 if (name[1] == 'p' &&
10145 name[2] == 'e' &&
10146 name[3] == 'n' &&
10147 name[4] == 'd' &&
10148 name[5] == 'i' &&
10149 name[6] == 'r')
10150 { /* opendir */
10151 return -KEY_opendir;
10152 }
10153
10154 goto unknown;
10155
10156 case 'p':
10157 if (name[1] == 'a' &&
10158 name[2] == 'c' &&
10159 name[3] == 'k' &&
10160 name[4] == 'a' &&
10161 name[5] == 'g' &&
10162 name[6] == 'e')
10163 { /* package */
10164 return KEY_package;
10165 }
10166
10167 goto unknown;
10168
10169 case 'r':
10170 if (name[1] == 'e')
10171 {
10172 switch (name[2])
10173 {
10174 case 'a':
10175 if (name[3] == 'd' &&
10176 name[4] == 'd' &&
10177 name[5] == 'i' &&
10178 name[6] == 'r')
10179 { /* readdir */
10180 return -KEY_readdir;
10181 }
10182
10183 goto unknown;
10184
10185 case 'q':
10186 if (name[3] == 'u' &&
10187 name[4] == 'i' &&
10188 name[5] == 'r' &&
10189 name[6] == 'e')
10190 { /* require */
10191 return KEY_require;
10192 }
10193
10194 goto unknown;
10195
10196 case 'v':
10197 if (name[3] == 'e' &&
10198 name[4] == 'r' &&
10199 name[5] == 's' &&
10200 name[6] == 'e')
10201 { /* reverse */
10202 return -KEY_reverse;
10203 }
10204
10205 goto unknown;
10206
10207 default:
10208 goto unknown;
10209 }
10210 }
10211
10212 goto unknown;
10213
10214 case 's':
10215 switch (name[1])
10216 {
10217 case 'e':
10218 switch (name[2])
10219 {
10220 case 'e':
10221 if (name[3] == 'k' &&
10222 name[4] == 'd' &&
10223 name[5] == 'i' &&
10224 name[6] == 'r')
10225 { /* seekdir */
10226 return -KEY_seekdir;
10227 }
10228
10229 goto unknown;
10230
10231 case 't':
10232 if (name[3] == 'p' &&
10233 name[4] == 'g' &&
10234 name[5] == 'r' &&
10235 name[6] == 'p')
10236 { /* setpgrp */
10237 return -KEY_setpgrp;
10238 }
10239
10240 goto unknown;
10241
10242 default:
10243 goto unknown;
10244 }
10245
10246 case 'h':
10247 if (name[2] == 'm' &&
10248 name[3] == 'r' &&
10249 name[4] == 'e' &&
10250 name[5] == 'a' &&
10251 name[6] == 'd')
10252 { /* shmread */
10253 return -KEY_shmread;
10254 }
10255
10256 goto unknown;
10257
10258 case 'p':
10259 if (name[2] == 'r' &&
10260 name[3] == 'i' &&
10261 name[4] == 'n' &&
10262 name[5] == 't' &&
10263 name[6] == 'f')
10264 { /* sprintf */
10265 return -KEY_sprintf;
10266 }
10267
10268 goto unknown;
10269
10270 case 'y':
10271 switch (name[2])
10272 {
10273 case 'm':
10274 if (name[3] == 'l' &&
10275 name[4] == 'i' &&
10276 name[5] == 'n' &&
10277 name[6] == 'k')
10278 { /* symlink */
10279 return -KEY_symlink;
10280 }
10281
10282 goto unknown;
10283
10284 case 's':
10285 switch (name[3])
10286 {
10287 case 'c':
10288 if (name[4] == 'a' &&
10289 name[5] == 'l' &&
10290 name[6] == 'l')
10291 { /* syscall */
10292 return -KEY_syscall;
10293 }
10294
10295 goto unknown;
10296
10297 case 'o':
10298 if (name[4] == 'p' &&
10299 name[5] == 'e' &&
10300 name[6] == 'n')
10301 { /* sysopen */
10302 return -KEY_sysopen;
10303 }
10304
10305 goto unknown;
10306
10307 case 'r':
10308 if (name[4] == 'e' &&
10309 name[5] == 'a' &&
10310 name[6] == 'd')
10311 { /* sysread */
10312 return -KEY_sysread;
10313 }
10314
10315 goto unknown;
10316
10317 case 's':
10318 if (name[4] == 'e' &&
10319 name[5] == 'e' &&
10320 name[6] == 'k')
10321 { /* sysseek */
10322 return -KEY_sysseek;
10323 }
10324
10325 goto unknown;
10326
10327 default:
10328 goto unknown;
10329 }
10330
10331 default:
10332 goto unknown;
10333 }
10334
10335 default:
10336 goto unknown;
10337 }
10338
10339 case 't':
10340 if (name[1] == 'e' &&
10341 name[2] == 'l' &&
10342 name[3] == 'l' &&
10343 name[4] == 'd' &&
10344 name[5] == 'i' &&
10345 name[6] == 'r')
10346 { /* telldir */
10347 return -KEY_telldir;
10348 }
10349
10350 goto unknown;
10351
10352 case 'u':
10353 switch (name[1])
10354 {
10355 case 'c':
10356 if (name[2] == 'f' &&
10357 name[3] == 'i' &&
10358 name[4] == 'r' &&
10359 name[5] == 's' &&
10360 name[6] == 't')
10361 { /* ucfirst */
10362 return -KEY_ucfirst;
10363 }
10364
10365 goto unknown;
10366
10367 case 'n':
10368 if (name[2] == 's' &&
10369 name[3] == 'h' &&
10370 name[4] == 'i' &&
10371 name[5] == 'f' &&
10372 name[6] == 't')
10373 { /* unshift */
10374 return -KEY_unshift;
10375 }
10376
10377 goto unknown;
10378
10379 default:
10380 goto unknown;
10381 }
10382
10383 case 'w':
10384 if (name[1] == 'a' &&
10385 name[2] == 'i' &&
10386 name[3] == 't' &&
10387 name[4] == 'p' &&
10388 name[5] == 'i' &&
10389 name[6] == 'd')
10390 { /* waitpid */
10391 return -KEY_waitpid;
10392 }
10393
10394 goto unknown;
10395
10396 default:
10397 goto unknown;
10398 }
10399
10400 case 8: /* 26 tokens of length 8 */
10401 switch (name[0])
10402 {
10403 case 'A':
10404 if (name[1] == 'U' &&
10405 name[2] == 'T' &&
10406 name[3] == 'O' &&
10407 name[4] == 'L' &&
10408 name[5] == 'O' &&
10409 name[6] == 'A' &&
10410 name[7] == 'D')
10411 { /* AUTOLOAD */
10412 return KEY_AUTOLOAD;
10413 }
10414
10415 goto unknown;
10416
10417 case '_':
10418 if (name[1] == '_')
10419 {
10420 switch (name[2])
10421 {
10422 case 'D':
10423 if (name[3] == 'A' &&
10424 name[4] == 'T' &&
10425 name[5] == 'A' &&
10426 name[6] == '_' &&
10427 name[7] == '_')
10428 { /* __DATA__ */
10429 return KEY___DATA__;
10430 }
10431
10432 goto unknown;
10433
10434 case 'F':
10435 if (name[3] == 'I' &&
10436 name[4] == 'L' &&
10437 name[5] == 'E' &&
10438 name[6] == '_' &&
10439 name[7] == '_')
10440 { /* __FILE__ */
10441 return -KEY___FILE__;
10442 }
10443
10444 goto unknown;
10445
10446 case 'L':
10447 if (name[3] == 'I' &&
10448 name[4] == 'N' &&
10449 name[5] == 'E' &&
10450 name[6] == '_' &&
10451 name[7] == '_')
10452 { /* __LINE__ */
10453 return -KEY___LINE__;
10454 }
10455
10456 goto unknown;
10457
10458 default:
10459 goto unknown;
10460 }
10461 }
10462
10463 goto unknown;
10464
10465 case 'c':
10466 switch (name[1])
10467 {
10468 case 'l':
10469 if (name[2] == 'o' &&
10470 name[3] == 's' &&
10471 name[4] == 'e' &&
10472 name[5] == 'd' &&
10473 name[6] == 'i' &&
10474 name[7] == 'r')
10475 { /* closedir */
10476 return -KEY_closedir;
10477 }
10478
10479 goto unknown;
10480
10481 case 'o':
10482 if (name[2] == 'n' &&
10483 name[3] == 't' &&
10484 name[4] == 'i' &&
10485 name[5] == 'n' &&
10486 name[6] == 'u' &&
10487 name[7] == 'e')
10488 { /* continue */
10489 return -KEY_continue;
10490 }
10491
10492 goto unknown;
10493
10494 default:
10495 goto unknown;
10496 }
10497
10498 case 'd':
10499 if (name[1] == 'b' &&
10500 name[2] == 'm' &&
10501 name[3] == 'c' &&
10502 name[4] == 'l' &&
10503 name[5] == 'o' &&
10504 name[6] == 's' &&
10505 name[7] == 'e')
10506 { /* dbmclose */
10507 return -KEY_dbmclose;
10508 }
10509
10510 goto unknown;
10511
10512 case 'e':
10513 if (name[1] == 'n' &&
10514 name[2] == 'd')
10515 {
10516 switch (name[3])
10517 {
10518 case 'g':
10519 if (name[4] == 'r' &&
10520 name[5] == 'e' &&
10521 name[6] == 'n' &&
10522 name[7] == 't')
10523 { /* endgrent */
10524 return -KEY_endgrent;
10525 }
10526
10527 goto unknown;
10528
10529 case 'p':
10530 if (name[4] == 'w' &&
10531 name[5] == 'e' &&
10532 name[6] == 'n' &&
10533 name[7] == 't')
10534 { /* endpwent */
10535 return -KEY_endpwent;
10536 }
10537
10538 goto unknown;
10539
10540 default:
10541 goto unknown;
10542 }
10543 }
10544
10545 goto unknown;
10546
10547 case 'f':
10548 if (name[1] == 'o' &&
10549 name[2] == 'r' &&
10550 name[3] == 'm' &&
10551 name[4] == 'l' &&
10552 name[5] == 'i' &&
10553 name[6] == 'n' &&
10554 name[7] == 'e')
10555 { /* formline */
10556 return -KEY_formline;
10557 }
10558
10559 goto unknown;
10560
10561 case 'g':
10562 if (name[1] == 'e' &&
10563 name[2] == 't')
10564 {
10565 switch (name[3])
10566 {
10567 case 'g':
10568 if (name[4] == 'r')
10569 {
10570 switch (name[5])
10571 {
10572 case 'e':
10573 if (name[6] == 'n' &&
10574 name[7] == 't')
10575 { /* getgrent */
10576 return -KEY_getgrent;
10577 }
10578
10579 goto unknown;
10580
10581 case 'g':
10582 if (name[6] == 'i' &&
10583 name[7] == 'd')
10584 { /* getgrgid */
10585 return -KEY_getgrgid;
10586 }
10587
10588 goto unknown;
10589
10590 case 'n':
10591 if (name[6] == 'a' &&
10592 name[7] == 'm')
10593 { /* getgrnam */
10594 return -KEY_getgrnam;
10595 }
10596
10597 goto unknown;
10598
10599 default:
10600 goto unknown;
10601 }
10602 }
10603
10604 goto unknown;
10605
10606 case 'l':
10607 if (name[4] == 'o' &&
10608 name[5] == 'g' &&
10609 name[6] == 'i' &&
10610 name[7] == 'n')
10611 { /* getlogin */
10612 return -KEY_getlogin;
10613 }
10614
10615 goto unknown;
10616
10617 case 'p':
10618 if (name[4] == 'w')
10619 {
10620 switch (name[5])
10621 {
10622 case 'e':
10623 if (name[6] == 'n' &&
10624 name[7] == 't')
10625 { /* getpwent */
10626 return -KEY_getpwent;
10627 }
10628
10629 goto unknown;
10630
10631 case 'n':
10632 if (name[6] == 'a' &&
10633 name[7] == 'm')
10634 { /* getpwnam */
10635 return -KEY_getpwnam;
10636 }
10637
10638 goto unknown;
10639
10640 case 'u':
10641 if (name[6] == 'i' &&
10642 name[7] == 'd')
10643 { /* getpwuid */
10644 return -KEY_getpwuid;
10645 }
10646
10647 goto unknown;
10648
10649 default:
10650 goto unknown;
10651 }
10652 }
10653
10654 goto unknown;
10655
10656 default:
10657 goto unknown;
10658 }
10659 }
10660
10661 goto unknown;
10662
10663 case 'r':
10664 if (name[1] == 'e' &&
10665 name[2] == 'a' &&
10666 name[3] == 'd')
10667 {
10668 switch (name[4])
10669 {
10670 case 'l':
10671 if (name[5] == 'i' &&
10672 name[6] == 'n')
10673 {
10674 switch (name[7])
10675 {
10676 case 'e':
10677 { /* readline */
10678 return -KEY_readline;
10679 }
10680
4c3bbe0f
MHM
10681 case 'k':
10682 { /* readlink */
10683 return -KEY_readlink;
10684 }
10685
4c3bbe0f
MHM
10686 default:
10687 goto unknown;
10688 }
10689 }
10690
10691 goto unknown;
10692
10693 case 'p':
10694 if (name[5] == 'i' &&
10695 name[6] == 'p' &&
10696 name[7] == 'e')
10697 { /* readpipe */
10698 return -KEY_readpipe;
10699 }
10700
10701 goto unknown;
10702
10703 default:
10704 goto unknown;
10705 }
10706 }
10707
10708 goto unknown;
10709
10710 case 's':
10711 switch (name[1])
10712 {
10713 case 'e':
10714 if (name[2] == 't')
10715 {
10716 switch (name[3])
10717 {
10718 case 'g':
10719 if (name[4] == 'r' &&
10720 name[5] == 'e' &&
10721 name[6] == 'n' &&
10722 name[7] == 't')
10723 { /* setgrent */
10724 return -KEY_setgrent;
10725 }
10726
10727 goto unknown;
10728
10729 case 'p':
10730 if (name[4] == 'w' &&
10731 name[5] == 'e' &&
10732 name[6] == 'n' &&
10733 name[7] == 't')
10734 { /* setpwent */
10735 return -KEY_setpwent;
10736 }
10737
10738 goto unknown;
10739
10740 default:
10741 goto unknown;
10742 }
10743 }
10744
10745 goto unknown;
10746
10747 case 'h':
10748 switch (name[2])
10749 {
10750 case 'm':
10751 if (name[3] == 'w' &&
10752 name[4] == 'r' &&
10753 name[5] == 'i' &&
10754 name[6] == 't' &&
10755 name[7] == 'e')
10756 { /* shmwrite */
10757 return -KEY_shmwrite;
10758 }
10759
10760 goto unknown;
10761
10762 case 'u':
10763 if (name[3] == 't' &&
10764 name[4] == 'd' &&
10765 name[5] == 'o' &&
10766 name[6] == 'w' &&
10767 name[7] == 'n')
10768 { /* shutdown */
10769 return -KEY_shutdown;
10770 }
10771
10772 goto unknown;
10773
10774 default:
10775 goto unknown;
10776 }
10777
10778 case 'y':
10779 if (name[2] == 's' &&
10780 name[3] == 'w' &&
10781 name[4] == 'r' &&
10782 name[5] == 'i' &&
10783 name[6] == 't' &&
10784 name[7] == 'e')
10785 { /* syswrite */
10786 return -KEY_syswrite;
10787 }
10788
10789 goto unknown;
10790
10791 default:
10792 goto unknown;
10793 }
10794
10795 case 't':
10796 if (name[1] == 'r' &&
10797 name[2] == 'u' &&
10798 name[3] == 'n' &&
10799 name[4] == 'c' &&
10800 name[5] == 'a' &&
10801 name[6] == 't' &&
10802 name[7] == 'e')
10803 { /* truncate */
10804 return -KEY_truncate;
10805 }
10806
10807 goto unknown;
10808
10809 default:
10810 goto unknown;
10811 }
10812
3c10abe3 10813 case 9: /* 9 tokens of length 9 */
4c3bbe0f
MHM
10814 switch (name[0])
10815 {
3c10abe3
AG
10816 case 'U':
10817 if (name[1] == 'N' &&
10818 name[2] == 'I' &&
10819 name[3] == 'T' &&
10820 name[4] == 'C' &&
10821 name[5] == 'H' &&
10822 name[6] == 'E' &&
10823 name[7] == 'C' &&
10824 name[8] == 'K')
10825 { /* UNITCHECK */
10826 return KEY_UNITCHECK;
10827 }
10828
10829 goto unknown;
10830
4c3bbe0f
MHM
10831 case 'e':
10832 if (name[1] == 'n' &&
10833 name[2] == 'd' &&
10834 name[3] == 'n' &&
10835 name[4] == 'e' &&
10836 name[5] == 't' &&
10837 name[6] == 'e' &&
10838 name[7] == 'n' &&
10839 name[8] == 't')
10840 { /* endnetent */
10841 return -KEY_endnetent;
10842 }
10843
10844 goto unknown;
10845
10846 case 'g':
10847 if (name[1] == 'e' &&
10848 name[2] == 't' &&
10849 name[3] == 'n' &&
10850 name[4] == 'e' &&
10851 name[5] == 't' &&
10852 name[6] == 'e' &&
10853 name[7] == 'n' &&
10854 name[8] == 't')
10855 { /* getnetent */
10856 return -KEY_getnetent;
10857 }
10858
10859 goto unknown;
10860
10861 case 'l':
10862 if (name[1] == 'o' &&
10863 name[2] == 'c' &&
10864 name[3] == 'a' &&
10865 name[4] == 'l' &&
10866 name[5] == 't' &&
10867 name[6] == 'i' &&
10868 name[7] == 'm' &&
10869 name[8] == 'e')
10870 { /* localtime */
10871 return -KEY_localtime;
10872 }
10873
10874 goto unknown;
10875
10876 case 'p':
10877 if (name[1] == 'r' &&
10878 name[2] == 'o' &&
10879 name[3] == 't' &&
10880 name[4] == 'o' &&
10881 name[5] == 't' &&
10882 name[6] == 'y' &&
10883 name[7] == 'p' &&
10884 name[8] == 'e')
10885 { /* prototype */
10886 return KEY_prototype;
10887 }
10888
10889 goto unknown;
10890
10891 case 'q':
10892 if (name[1] == 'u' &&
10893 name[2] == 'o' &&
10894 name[3] == 't' &&
10895 name[4] == 'e' &&
10896 name[5] == 'm' &&
10897 name[6] == 'e' &&
10898 name[7] == 't' &&
10899 name[8] == 'a')
10900 { /* quotemeta */
10901 return -KEY_quotemeta;
10902 }
10903
10904 goto unknown;
10905
10906 case 'r':
10907 if (name[1] == 'e' &&
10908 name[2] == 'w' &&
10909 name[3] == 'i' &&
10910 name[4] == 'n' &&
10911 name[5] == 'd' &&
10912 name[6] == 'd' &&
10913 name[7] == 'i' &&
10914 name[8] == 'r')
10915 { /* rewinddir */
10916 return -KEY_rewinddir;
10917 }
10918
10919 goto unknown;
10920
10921 case 's':
10922 if (name[1] == 'e' &&
10923 name[2] == 't' &&
10924 name[3] == 'n' &&
10925 name[4] == 'e' &&
10926 name[5] == 't' &&
10927 name[6] == 'e' &&
10928 name[7] == 'n' &&
10929 name[8] == 't')
10930 { /* setnetent */
10931 return -KEY_setnetent;
10932 }
10933
10934 goto unknown;
10935
10936 case 'w':
10937 if (name[1] == 'a' &&
10938 name[2] == 'n' &&
10939 name[3] == 't' &&
10940 name[4] == 'a' &&
10941 name[5] == 'r' &&
10942 name[6] == 'r' &&
10943 name[7] == 'a' &&
10944 name[8] == 'y')
10945 { /* wantarray */
10946 return -KEY_wantarray;
10947 }
10948
10949 goto unknown;
10950
10951 default:
10952 goto unknown;
10953 }
10954
10955 case 10: /* 9 tokens of length 10 */
10956 switch (name[0])
10957 {
10958 case 'e':
10959 if (name[1] == 'n' &&
10960 name[2] == 'd')
10961 {
10962 switch (name[3])
10963 {
10964 case 'h':
10965 if (name[4] == 'o' &&
10966 name[5] == 's' &&
10967 name[6] == 't' &&
10968 name[7] == 'e' &&
10969 name[8] == 'n' &&
10970 name[9] == 't')
10971 { /* endhostent */
10972 return -KEY_endhostent;
10973 }
10974
10975 goto unknown;
10976
10977 case 's':
10978 if (name[4] == 'e' &&
10979 name[5] == 'r' &&
10980 name[6] == 'v' &&
10981 name[7] == 'e' &&
10982 name[8] == 'n' &&
10983 name[9] == 't')
10984 { /* endservent */
10985 return -KEY_endservent;
10986 }
10987
10988 goto unknown;
10989
10990 default:
10991 goto unknown;
10992 }
10993 }
10994
10995 goto unknown;
10996
10997 case 'g':
10998 if (name[1] == 'e' &&
10999 name[2] == 't')
11000 {
11001 switch (name[3])
11002 {
11003 case 'h':
11004 if (name[4] == 'o' &&
11005 name[5] == 's' &&
11006 name[6] == 't' &&
11007 name[7] == 'e' &&
11008 name[8] == 'n' &&
11009 name[9] == 't')
11010 { /* gethostent */
11011 return -KEY_gethostent;
11012 }
11013
11014 goto unknown;
11015
11016 case 's':
11017 switch (name[4])
11018 {
11019 case 'e':
11020 if (name[5] == 'r' &&
11021 name[6] == 'v' &&
11022 name[7] == 'e' &&
11023 name[8] == 'n' &&
11024 name[9] == 't')
11025 { /* getservent */
11026 return -KEY_getservent;
11027 }
11028
11029 goto unknown;
11030
11031 case 'o':
11032 if (name[5] == 'c' &&
11033 name[6] == 'k' &&
11034 name[7] == 'o' &&
11035 name[8] == 'p' &&
11036 name[9] == 't')
11037 { /* getsockopt */
11038 return -KEY_getsockopt;
11039 }
11040
11041 goto unknown;
11042
11043 default:
11044 goto unknown;
11045 }
11046
11047 default:
11048 goto unknown;
11049 }
11050 }
11051
11052 goto unknown;
11053
11054 case 's':
11055 switch (name[1])
11056 {
11057 case 'e':
11058 if (name[2] == 't')
11059 {
11060 switch (name[3])
11061 {
11062 case 'h':
11063 if (name[4] == 'o' &&
11064 name[5] == 's' &&
11065 name[6] == 't' &&
11066 name[7] == 'e' &&
11067 name[8] == 'n' &&
11068 name[9] == 't')
11069 { /* sethostent */
11070 return -KEY_sethostent;
11071 }
11072
11073 goto unknown;
11074
11075 case 's':
11076 switch (name[4])
11077 {
11078 case 'e':
11079 if (name[5] == 'r' &&
11080 name[6] == 'v' &&
11081 name[7] == 'e' &&
11082 name[8] == 'n' &&
11083 name[9] == 't')
11084 { /* setservent */
11085 return -KEY_setservent;
11086 }
11087
11088 goto unknown;
11089
11090 case 'o':
11091 if (name[5] == 'c' &&
11092 name[6] == 'k' &&
11093 name[7] == 'o' &&
11094 name[8] == 'p' &&
11095 name[9] == 't')
11096 { /* setsockopt */
11097 return -KEY_setsockopt;
11098 }
11099
11100 goto unknown;
11101
11102 default:
11103 goto unknown;
11104 }
11105
11106 default:
11107 goto unknown;
11108 }
11109 }
11110
11111 goto unknown;
11112
11113 case 'o':
11114 if (name[2] == 'c' &&
11115 name[3] == 'k' &&
11116 name[4] == 'e' &&
11117 name[5] == 't' &&
11118 name[6] == 'p' &&
11119 name[7] == 'a' &&
11120 name[8] == 'i' &&
11121 name[9] == 'r')
11122 { /* socketpair */
11123 return -KEY_socketpair;
11124 }
11125
11126 goto unknown;
11127
11128 default:
11129 goto unknown;
11130 }
11131
11132 default:
11133 goto unknown;
e2e1dd5a 11134 }
4c3bbe0f
MHM
11135
11136 case 11: /* 8 tokens of length 11 */
11137 switch (name[0])
11138 {
11139 case '_':
11140 if (name[1] == '_' &&
11141 name[2] == 'P' &&
11142 name[3] == 'A' &&
11143 name[4] == 'C' &&
11144 name[5] == 'K' &&
11145 name[6] == 'A' &&
11146 name[7] == 'G' &&
11147 name[8] == 'E' &&
11148 name[9] == '_' &&
11149 name[10] == '_')
11150 { /* __PACKAGE__ */
11151 return -KEY___PACKAGE__;
11152 }
11153
11154 goto unknown;
11155
11156 case 'e':
11157 if (name[1] == 'n' &&
11158 name[2] == 'd' &&
11159 name[3] == 'p' &&
11160 name[4] == 'r' &&
11161 name[5] == 'o' &&
11162 name[6] == 't' &&
11163 name[7] == 'o' &&
11164 name[8] == 'e' &&
11165 name[9] == 'n' &&
11166 name[10] == 't')
11167 { /* endprotoent */
11168 return -KEY_endprotoent;
11169 }
11170
11171 goto unknown;
11172
11173 case 'g':
11174 if (name[1] == 'e' &&
11175 name[2] == 't')
11176 {
11177 switch (name[3])
11178 {
11179 case 'p':
11180 switch (name[4])
11181 {
11182 case 'e':
11183 if (name[5] == 'e' &&
11184 name[6] == 'r' &&
11185 name[7] == 'n' &&
11186 name[8] == 'a' &&
11187 name[9] == 'm' &&
11188 name[10] == 'e')
11189 { /* getpeername */
11190 return -KEY_getpeername;
11191 }
11192
11193 goto unknown;
11194
11195 case 'r':
11196 switch (name[5])
11197 {
11198 case 'i':
11199 if (name[6] == 'o' &&
11200 name[7] == 'r' &&
11201 name[8] == 'i' &&
11202 name[9] == 't' &&
11203 name[10] == 'y')
11204 { /* getpriority */
11205 return -KEY_getpriority;
11206 }
11207
11208 goto unknown;
11209
11210 case 'o':
11211 if (name[6] == 't' &&
11212 name[7] == 'o' &&
11213 name[8] == 'e' &&
11214 name[9] == 'n' &&
11215 name[10] == 't')
11216 { /* getprotoent */
11217 return -KEY_getprotoent;
11218 }
11219
11220 goto unknown;
11221
11222 default:
11223 goto unknown;
11224 }
11225
11226 default:
11227 goto unknown;
11228 }
11229
11230 case 's':
11231 if (name[4] == 'o' &&
11232 name[5] == 'c' &&
11233 name[6] == 'k' &&
11234 name[7] == 'n' &&
11235 name[8] == 'a' &&
11236 name[9] == 'm' &&
11237 name[10] == 'e')
11238 { /* getsockname */
11239 return -KEY_getsockname;
11240 }
11241
11242 goto unknown;
11243
11244 default:
11245 goto unknown;
11246 }
11247 }
11248
11249 goto unknown;
11250
11251 case 's':
11252 if (name[1] == 'e' &&
11253 name[2] == 't' &&
11254 name[3] == 'p' &&
11255 name[4] == 'r')
11256 {
11257 switch (name[5])
11258 {
11259 case 'i':
11260 if (name[6] == 'o' &&
11261 name[7] == 'r' &&
11262 name[8] == 'i' &&
11263 name[9] == 't' &&
11264 name[10] == 'y')
11265 { /* setpriority */
11266 return -KEY_setpriority;
11267 }
11268
11269 goto unknown;
11270
11271 case 'o':
11272 if (name[6] == 't' &&
11273 name[7] == 'o' &&
11274 name[8] == 'e' &&
11275 name[9] == 'n' &&
11276 name[10] == 't')
11277 { /* setprotoent */
11278 return -KEY_setprotoent;
11279 }
11280
11281 goto unknown;
11282
11283 default:
11284 goto unknown;
11285 }
11286 }
11287
11288 goto unknown;
11289
11290 default:
11291 goto unknown;
e2e1dd5a 11292 }
4c3bbe0f
MHM
11293
11294 case 12: /* 2 tokens of length 12 */
11295 if (name[0] == 'g' &&
11296 name[1] == 'e' &&
11297 name[2] == 't' &&
11298 name[3] == 'n' &&
11299 name[4] == 'e' &&
11300 name[5] == 't' &&
11301 name[6] == 'b' &&
11302 name[7] == 'y')
11303 {
11304 switch (name[8])
11305 {
11306 case 'a':
11307 if (name[9] == 'd' &&
11308 name[10] == 'd' &&
11309 name[11] == 'r')
11310 { /* getnetbyaddr */
11311 return -KEY_getnetbyaddr;
11312 }
11313
11314 goto unknown;
11315
11316 case 'n':
11317 if (name[9] == 'a' &&
11318 name[10] == 'm' &&
11319 name[11] == 'e')
11320 { /* getnetbyname */
11321 return -KEY_getnetbyname;
11322 }
11323
11324 goto unknown;
11325
11326 default:
11327 goto unknown;
11328 }
e2e1dd5a 11329 }
4c3bbe0f
MHM
11330
11331 goto unknown;
11332
11333 case 13: /* 4 tokens of length 13 */
11334 if (name[0] == 'g' &&
11335 name[1] == 'e' &&
11336 name[2] == 't')
11337 {
11338 switch (name[3])
11339 {
11340 case 'h':
11341 if (name[4] == 'o' &&
11342 name[5] == 's' &&
11343 name[6] == 't' &&
11344 name[7] == 'b' &&
11345 name[8] == 'y')
11346 {
11347 switch (name[9])
11348 {
11349 case 'a':
11350 if (name[10] == 'd' &&
11351 name[11] == 'd' &&
11352 name[12] == 'r')
11353 { /* gethostbyaddr */
11354 return -KEY_gethostbyaddr;
11355 }
11356
11357 goto unknown;
11358
11359 case 'n':
11360 if (name[10] == 'a' &&
11361 name[11] == 'm' &&
11362 name[12] == 'e')
11363 { /* gethostbyname */
11364 return -KEY_gethostbyname;
11365 }
11366
11367 goto unknown;
11368
11369 default:
11370 goto unknown;
11371 }
11372 }
11373
11374 goto unknown;
11375
11376 case 's':
11377 if (name[4] == 'e' &&
11378 name[5] == 'r' &&
11379 name[6] == 'v' &&
11380 name[7] == 'b' &&
11381 name[8] == 'y')
11382 {
11383 switch (name[9])
11384 {
11385 case 'n':
11386 if (name[10] == 'a' &&
11387 name[11] == 'm' &&
11388 name[12] == 'e')
11389 { /* getservbyname */
11390 return -KEY_getservbyname;
11391 }
11392
11393 goto unknown;
11394
11395 case 'p':
11396 if (name[10] == 'o' &&
11397 name[11] == 'r' &&
11398 name[12] == 't')
11399 { /* getservbyport */
11400 return -KEY_getservbyport;
11401 }
11402
11403 goto unknown;
11404
11405 default:
11406 goto unknown;
11407 }
11408 }
11409
11410 goto unknown;
11411
11412 default:
11413 goto unknown;
11414 }
e2e1dd5a 11415 }
4c3bbe0f
MHM
11416
11417 goto unknown;
11418
11419 case 14: /* 1 tokens of length 14 */
11420 if (name[0] == 'g' &&
11421 name[1] == 'e' &&
11422 name[2] == 't' &&
11423 name[3] == 'p' &&
11424 name[4] == 'r' &&
11425 name[5] == 'o' &&
11426 name[6] == 't' &&
11427 name[7] == 'o' &&
11428 name[8] == 'b' &&
11429 name[9] == 'y' &&
11430 name[10] == 'n' &&
11431 name[11] == 'a' &&
11432 name[12] == 'm' &&
11433 name[13] == 'e')
11434 { /* getprotobyname */
11435 return -KEY_getprotobyname;
11436 }
11437
11438 goto unknown;
11439
11440 case 16: /* 1 tokens of length 16 */
11441 if (name[0] == 'g' &&
11442 name[1] == 'e' &&
11443 name[2] == 't' &&
11444 name[3] == 'p' &&
11445 name[4] == 'r' &&
11446 name[5] == 'o' &&
11447 name[6] == 't' &&
11448 name[7] == 'o' &&
11449 name[8] == 'b' &&
11450 name[9] == 'y' &&
11451 name[10] == 'n' &&
11452 name[11] == 'u' &&
11453 name[12] == 'm' &&
11454 name[13] == 'b' &&
11455 name[14] == 'e' &&
11456 name[15] == 'r')
11457 { /* getprotobynumber */
11458 return -KEY_getprotobynumber;
11459 }
11460
11461 goto unknown;
11462
11463 default:
11464 goto unknown;
e2e1dd5a 11465 }
4c3bbe0f
MHM
11466
11467unknown:
e2e1dd5a 11468 return 0;
a687059c
LW
11469}
11470
76e3520e 11471STATIC void
c94115d8 11472S_checkcomma(pTHX_ const char *s, const char *name, const char *what)
a687059c 11473{
97aff369 11474 dVAR;
2f3197b3 11475
7918f24d
NC
11476 PERL_ARGS_ASSERT_CHECKCOMMA;
11477
d008e5eb 11478 if (*s == ' ' && s[1] == '(') { /* XXX gotta be a better way */
d008e5eb
GS
11479 if (ckWARN(WARN_SYNTAX)) {
11480 int level = 1;
26ff0806 11481 const char *w;
d008e5eb
GS
11482 for (w = s+2; *w && level; w++) {
11483 if (*w == '(')
11484 ++level;
11485 else if (*w == ')')
11486 --level;
11487 }
888fea98
NC
11488 while (isSPACE(*w))
11489 ++w;
b1439985
RGS
11490 /* the list of chars below is for end of statements or
11491 * block / parens, boolean operators (&&, ||, //) and branch
11492 * constructs (or, and, if, until, unless, while, err, for).
11493 * Not a very solid hack... */
11494 if (!*w || !strchr(";&/|})]oaiuwef!=", *w))
9014280d 11495 Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
65cec589 11496 "%s (...) interpreted as function",name);
d008e5eb 11497 }
2f3197b3 11498 }
3280af22 11499 while (s < PL_bufend && isSPACE(*s))
2f3197b3 11500 s++;
a687059c
LW
11501 if (*s == '(')
11502 s++;
3280af22 11503 while (s < PL_bufend && isSPACE(*s))
a687059c 11504 s++;
7e2040f0 11505 if (isIDFIRST_lazy_if(s,UTF)) {
26ff0806 11506 const char * const w = s++;
7e2040f0 11507 while (isALNUM_lazy_if(s,UTF))
a687059c 11508 s++;
3280af22 11509 while (s < PL_bufend && isSPACE(*s))
a687059c 11510 s++;
e929a76b 11511 if (*s == ',') {
c94115d8 11512 GV* gv;
5458a98a 11513 if (keyword(w, s - w, 0))
e929a76b 11514 return;
c94115d8
NC
11515
11516 gv = gv_fetchpvn_flags(w, s - w, 0, SVt_PVCV);
11517 if (gv && GvCVu(gv))
abbb3198 11518 return;
cea2e8a9 11519 Perl_croak(aTHX_ "No comma allowed after %s", what);
463ee0b2
LW
11520 }
11521 }
11522}
11523
423cee85
JH
11524/* Either returns sv, or mortalizes sv and returns a new SV*.
11525 Best used as sv=new_constant(..., sv, ...).
11526 If s, pv are NULL, calls subroutine with one argument,
11527 and type is used with error messages only. */
11528
b3ac6de7 11529STATIC SV *
eb0d8d16
NC
11530S_new_constant(pTHX_ const char *s, STRLEN len, const char *key, STRLEN keylen,
11531 SV *sv, SV *pv, const char *type, STRLEN typelen)
b3ac6de7 11532{
27da23d5 11533 dVAR; dSP;
890ce7af 11534 HV * const table = GvHV(PL_hintgv); /* ^H */
b3ac6de7 11535 SV *res;
b3ac6de7
IZ
11536 SV **cvp;
11537 SV *cv, *typesv;
89e33a05 11538 const char *why1 = "", *why2 = "", *why3 = "";
4e553d73 11539
7918f24d
NC
11540 PERL_ARGS_ASSERT_NEW_CONSTANT;
11541
f0af216f 11542 if (!table || !(PL_hints & HINT_LOCALIZE_HH)) {
423cee85
JH
11543 SV *msg;
11544
10edeb5d
JH
11545 why2 = (const char *)
11546 (strEQ(key,"charnames")
11547 ? "(possibly a missing \"use charnames ...\")"
11548 : "");
4e553d73 11549 msg = Perl_newSVpvf(aTHX_ "Constant(%s) unknown: %s",
41ab332f
JH
11550 (type ? type: "undef"), why2);
11551
11552 /* This is convoluted and evil ("goto considered harmful")
11553 * but I do not understand the intricacies of all the different
11554 * failure modes of %^H in here. The goal here is to make
11555 * the most probable error message user-friendly. --jhi */
11556
11557 goto msgdone;
11558
423cee85 11559 report:
4e553d73 11560 msg = Perl_newSVpvf(aTHX_ "Constant(%s): %s%s%s",
f0af216f 11561 (type ? type: "undef"), why1, why2, why3);
41ab332f 11562 msgdone:
95a20fc0 11563 yyerror(SvPVX_const(msg));
423cee85
JH
11564 SvREFCNT_dec(msg);
11565 return sv;
11566 }
ff3f963a
KW
11567
11568 /* charnames doesn't work well if there have been errors found */
f5a57329
RGS
11569 if (PL_error_count > 0 && strEQ(key,"charnames"))
11570 return &PL_sv_undef;
ff3f963a 11571
eb0d8d16 11572 cvp = hv_fetch(table, key, keylen, FALSE);
b3ac6de7 11573 if (!cvp || !SvOK(*cvp)) {
423cee85
JH
11574 why1 = "$^H{";
11575 why2 = key;
f0af216f 11576 why3 = "} is not defined";
423cee85 11577 goto report;
b3ac6de7
IZ
11578 }
11579 sv_2mortal(sv); /* Parent created it permanently */
11580 cv = *cvp;
423cee85 11581 if (!pv && s)
59cd0e26 11582 pv = newSVpvn_flags(s, len, SVs_TEMP);
423cee85 11583 if (type && pv)
59cd0e26 11584 typesv = newSVpvn_flags(type, typelen, SVs_TEMP);
b3ac6de7 11585 else
423cee85 11586 typesv = &PL_sv_undef;
4e553d73 11587
e788e7d3 11588 PUSHSTACKi(PERLSI_OVERLOAD);
423cee85
JH
11589 ENTER ;
11590 SAVETMPS;
4e553d73 11591
423cee85 11592 PUSHMARK(SP) ;
a5845cb7 11593 EXTEND(sp, 3);
423cee85
JH
11594 if (pv)
11595 PUSHs(pv);
b3ac6de7 11596 PUSHs(sv);
423cee85
JH
11597 if (pv)
11598 PUSHs(typesv);
b3ac6de7 11599 PUTBACK;
423cee85 11600 call_sv(cv, G_SCALAR | ( PL_in_eval ? 0 : G_EVAL));
4e553d73 11601
423cee85 11602 SPAGAIN ;
4e553d73 11603
423cee85 11604 /* Check the eval first */
9b0e499b 11605 if (!PL_in_eval && SvTRUE(ERRSV)) {
396482e1 11606 sv_catpvs(ERRSV, "Propagated");
8b6b16e7 11607 yyerror(SvPV_nolen_const(ERRSV)); /* Duplicates the message inside eval */
e1f15930 11608 (void)POPs;
b37c2d43 11609 res = SvREFCNT_inc_simple(sv);
423cee85
JH
11610 }
11611 else {
11612 res = POPs;
b37c2d43 11613 SvREFCNT_inc_simple_void(res);
423cee85 11614 }
4e553d73 11615
423cee85
JH
11616 PUTBACK ;
11617 FREETMPS ;
11618 LEAVE ;
b3ac6de7 11619 POPSTACK;
4e553d73 11620
b3ac6de7 11621 if (!SvOK(res)) {
423cee85
JH
11622 why1 = "Call to &{$^H{";
11623 why2 = key;
f0af216f 11624 why3 = "}} did not return a defined value";
423cee85
JH
11625 sv = res;
11626 goto report;
9b0e499b 11627 }
423cee85 11628
9b0e499b 11629 return res;
b3ac6de7 11630}
4e553d73 11631
d0a148a6
NC
11632/* Returns a NUL terminated string, with the length of the string written to
11633 *slp
11634 */
76e3520e 11635STATIC char *
cea2e8a9 11636S_scan_word(pTHX_ register char *s, char *dest, STRLEN destlen, int allow_package, STRLEN *slp)
463ee0b2 11637{
97aff369 11638 dVAR;
463ee0b2 11639 register char *d = dest;
890ce7af 11640 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
7918f24d
NC
11641
11642 PERL_ARGS_ASSERT_SCAN_WORD;
11643
463ee0b2 11644 for (;;) {
8903cb82 11645 if (d >= e)
cea2e8a9 11646 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11647 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11648 *d++ = *s++;
c35e046a 11649 else if (allow_package && (*s == '\'') && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11650 *d++ = ':';
11651 *d++ = ':';
11652 s++;
11653 }
c35e046a 11654 else if (allow_package && (s[0] == ':') && (s[1] == ':') && (s[2] != '$')) {
463ee0b2
LW
11655 *d++ = *s++;
11656 *d++ = *s++;
11657 }
fd400ab9 11658 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11659 char *t = s + UTF8SKIP(s);
c35e046a 11660 size_t len;
fd400ab9 11661 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3 11662 t += UTF8SKIP(t);
c35e046a
AL
11663 len = t - s;
11664 if (d + len > e)
cea2e8a9 11665 Perl_croak(aTHX_ ident_too_long);
c35e046a
AL
11666 Copy(s, d, len, char);
11667 d += len;
a0ed51b3
LW
11668 s = t;
11669 }
463ee0b2
LW
11670 else {
11671 *d = '\0';
11672 *slp = d - dest;
11673 return s;
e929a76b 11674 }
378cc40b
LW
11675 }
11676}
11677
76e3520e 11678STATIC char *
f54cb97a 11679S_scan_ident(pTHX_ register char *s, register const char *send, char *dest, STRLEN destlen, I32 ck_uni)
378cc40b 11680{
97aff369 11681 dVAR;
6136c704 11682 char *bracket = NULL;
748a9306 11683 char funny = *s++;
6136c704 11684 register char *d = dest;
0b3da58d 11685 register char * const e = d + destlen - 3; /* two-character token, ending NUL */
378cc40b 11686
7918f24d
NC
11687 PERL_ARGS_ASSERT_SCAN_IDENT;
11688
a0d0e21e 11689 if (isSPACE(*s))
29595ff2 11690 s = PEEKSPACE(s);
de3bb511 11691 if (isDIGIT(*s)) {
8903cb82 11692 while (isDIGIT(*s)) {
11693 if (d >= e)
cea2e8a9 11694 Perl_croak(aTHX_ ident_too_long);
378cc40b 11695 *d++ = *s++;
8903cb82 11696 }
378cc40b
LW
11697 }
11698 else {
463ee0b2 11699 for (;;) {
8903cb82 11700 if (d >= e)
cea2e8a9 11701 Perl_croak(aTHX_ ident_too_long);
834a4ddd 11702 if (isALNUM(*s)) /* UTF handled below */
463ee0b2 11703 *d++ = *s++;
7e2040f0 11704 else if (*s == '\'' && isIDFIRST_lazy_if(s+1,UTF)) {
463ee0b2
LW
11705 *d++ = ':';
11706 *d++ = ':';
11707 s++;
11708 }
a0d0e21e 11709 else if (*s == ':' && s[1] == ':') {
463ee0b2
LW
11710 *d++ = *s++;
11711 *d++ = *s++;
11712 }
fd400ab9 11713 else if (UTF && UTF8_IS_START(*s) && isALNUM_utf8((U8*)s)) {
a0ed51b3 11714 char *t = s + UTF8SKIP(s);
fd400ab9 11715 while (UTF8_IS_CONTINUED(*t) && is_utf8_mark((U8*)t))
a0ed51b3
LW
11716 t += UTF8SKIP(t);
11717 if (d + (t - s) > e)
cea2e8a9 11718 Perl_croak(aTHX_ ident_too_long);
a0ed51b3
LW
11719 Copy(s, d, t - s, char);
11720 d += t - s;
11721 s = t;
11722 }
463ee0b2
LW
11723 else
11724 break;
11725 }
378cc40b
LW
11726 }
11727 *d = '\0';
11728 d = dest;
79072805 11729 if (*d) {
3280af22
NIS
11730 if (PL_lex_state != LEX_NORMAL)
11731 PL_lex_state = LEX_INTERPENDMAYBE;
79072805 11732 return s;
378cc40b 11733 }
748a9306 11734 if (*s == '$' && s[1] &&
3792a11b 11735 (isALNUM_lazy_if(s+1,UTF) || s[1] == '$' || s[1] == '{' || strnEQ(s+1,"::",2)) )
5cd24f17 11736 {
4810e5ec 11737 return s;
5cd24f17 11738 }
79072805
LW
11739 if (*s == '{') {
11740 bracket = s;
11741 s++;
11742 }
11743 else if (ck_uni)
11744 check_uni();
93a17b20 11745 if (s < send)
79072805
LW
11746 *d = *s++;
11747 d[1] = '\0';
2b92dfce 11748 if (*d == '^' && *s && isCONTROLVAR(*s)) {
bbce6d69 11749 *d = toCTRL(*s);
11750 s++;
de3bb511 11751 }
79072805 11752 if (bracket) {
748a9306 11753 if (isSPACE(s[-1])) {
fa83b5b6 11754 while (s < send) {
f54cb97a 11755 const char ch = *s++;
bf4acbe4 11756 if (!SPACE_OR_TAB(ch)) {
fa83b5b6 11757 *d = ch;
11758 break;
11759 }
11760 }
748a9306 11761 }
7e2040f0 11762 if (isIDFIRST_lazy_if(d,UTF)) {
79072805 11763 d++;
a0ed51b3 11764 if (UTF) {
6136c704
AL
11765 char *end = s;
11766 while ((end < send && isALNUM_lazy_if(end,UTF)) || *end == ':') {
11767 end += UTF8SKIP(end);
11768 while (end < send && UTF8_IS_CONTINUED(*end) && is_utf8_mark((U8*)end))
11769 end += UTF8SKIP(end);
a0ed51b3 11770 }
6136c704
AL
11771 Copy(s, d, end - s, char);
11772 d += end - s;
11773 s = end;
a0ed51b3
LW
11774 }
11775 else {
2b92dfce 11776 while ((isALNUM(*s) || *s == ':') && d < e)
a0ed51b3 11777 *d++ = *s++;
2b92dfce 11778 if (d >= e)
cea2e8a9 11779 Perl_croak(aTHX_ ident_too_long);
a0ed51b3 11780 }
79072805 11781 *d = '\0';
c35e046a
AL
11782 while (s < send && SPACE_OR_TAB(*s))
11783 s++;
ff68c719 11784 if ((*s == '[' || (*s == '{' && strNE(dest, "sub")))) {
5458a98a 11785 if (ckWARN(WARN_AMBIGUOUS) && keyword(dest, d - dest, 0)) {
10edeb5d
JH
11786 const char * const brack =
11787 (const char *)
11788 ((*s == '[') ? "[...]" : "{...}");
9014280d 11789 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
599cee73 11790 "Ambiguous use of %c{%s%s} resolved to %c%s%s",
748a9306
LW
11791 funny, dest, brack, funny, dest, brack);
11792 }
79072805 11793 bracket++;
a0be28da 11794 PL_lex_brackstack[PL_lex_brackets++] = (char)(XOPERATOR | XFAKEBRACK);
79072805
LW
11795 return s;
11796 }
4e553d73
NIS
11797 }
11798 /* Handle extended ${^Foo} variables
2b92dfce
GS
11799 * 1999-02-27 mjd-perl-patch@plover.com */
11800 else if (!isALNUM(*d) && !isPRINT(*d) /* isCTRL(d) */
11801 && isALNUM(*s))
11802 {
11803 d++;
11804 while (isALNUM(*s) && d < e) {
11805 *d++ = *s++;
11806 }
11807 if (d >= e)
cea2e8a9 11808 Perl_croak(aTHX_ ident_too_long);
2b92dfce 11809 *d = '\0';
79072805
LW
11810 }
11811 if (*s == '}') {
11812 s++;
7df0d042 11813 if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets) {
3280af22 11814 PL_lex_state = LEX_INTERPEND;
7df0d042
AE
11815 PL_expect = XREF;
11816 }
d008e5eb 11817 if (PL_lex_state == LEX_NORMAL) {
d008e5eb 11818 if (ckWARN(WARN_AMBIGUOUS) &&
780a5241
NC
11819 (keyword(dest, d - dest, 0)
11820 || get_cvn_flags(dest, d - dest, 0)))
d008e5eb 11821 {
c35e046a
AL
11822 if (funny == '#')
11823 funny = '@';
9014280d 11824 Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
d008e5eb
GS
11825 "Ambiguous use of %c{%s} resolved to %c%s",
11826 funny, dest, funny, dest);
11827 }
11828 }
79072805
LW
11829 }
11830 else {
11831 s = bracket; /* let the parser handle it */
93a17b20 11832 *dest = '\0';
79072805
LW
11833 }
11834 }
3280af22
NIS
11835 else if (PL_lex_state == LEX_INTERPNORMAL && !PL_lex_brackets && !intuit_more(s))
11836 PL_lex_state = LEX_INTERPEND;
378cc40b
LW
11837 return s;
11838}
11839
879d0c72
NC
11840static U32
11841S_pmflag(U32 pmfl, const char ch) {
11842 switch (ch) {
11843 CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
4f4d7508
DC
11844 case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
11845 case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
11846 case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
11847 case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
11848 case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
879d0c72
NC
11849 }
11850 return pmfl;
11851}
11852
76e3520e 11853STATIC char *
cea2e8a9 11854S_scan_pat(pTHX_ char *start, I32 type)
378cc40b 11855{
97aff369 11856 dVAR;
79072805 11857 PMOP *pm;
5db06880 11858 char *s = scan_str(start,!!PL_madskills,FALSE);
10edeb5d 11859 const char * const valid_flags =
a20207d7 11860 (const char *)((type == OP_QR) ? QR_PAT_MODS : M_PAT_MODS);
5db06880
NC
11861#ifdef PERL_MAD
11862 char *modstart;
11863#endif
11864
7918f24d 11865 PERL_ARGS_ASSERT_SCAN_PAT;
378cc40b 11866
25c09cbf 11867 if (!s) {
6136c704 11868 const char * const delimiter = skipspace(start);
10edeb5d
JH
11869 Perl_croak(aTHX_
11870 (const char *)
11871 (*delimiter == '?'
11872 ? "Search pattern not terminated or ternary operator parsed as search pattern"
11873 : "Search pattern not terminated" ));
25c09cbf 11874 }
bbce6d69 11875
8782bef2 11876 pm = (PMOP*)newPMOP(type, 0);
ad639bfb
NC
11877 if (PL_multi_open == '?') {
11878 /* This is the only point in the code that sets PMf_ONCE: */
79072805 11879 pm->op_pmflags |= PMf_ONCE;
ad639bfb
NC
11880
11881 /* Hence it's safe to do this bit of PMOP book-keeping here, which
11882 allows us to restrict the list needed by reset to just the ??
11883 matches. */
11884 assert(type != OP_TRANS);
11885 if (PL_curstash) {
daba3364 11886 MAGIC *mg = mg_find((const SV *)PL_curstash, PERL_MAGIC_symtab);
ad639bfb
NC
11887 U32 elements;
11888 if (!mg) {
daba3364 11889 mg = sv_magicext(MUTABLE_SV(PL_curstash), 0, PERL_MAGIC_symtab, 0, 0,
ad639bfb
NC
11890 0);
11891 }
11892 elements = mg->mg_len / sizeof(PMOP**);
11893 Renewc(mg->mg_ptr, elements + 1, PMOP*, char);
11894 ((PMOP**)mg->mg_ptr) [elements++] = pm;
11895 mg->mg_len = elements * sizeof(PMOP**);
11896 PmopSTASH_set(pm,PL_curstash);
11897 }
11898 }
5db06880
NC
11899#ifdef PERL_MAD
11900 modstart = s;
11901#endif
6136c704 11902 while (*s && strchr(valid_flags, *s))
879d0c72 11903 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
e6897b1a
KW
11904
11905 if (isALNUM(*s)) {
11906 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11907 "Having no space between pattern and following word is deprecated");
11908
11909 }
5db06880
NC
11910#ifdef PERL_MAD
11911 if (PL_madskills && modstart != s) {
11912 SV* tmptoken = newSVpvn(modstart, s - modstart);
11913 append_madprops(newMADPROP('m', MAD_SV, tmptoken, 0), (OP*)pm, 0);
11914 }
11915#endif
4ac733c9 11916 /* issue a warning if /c is specified,but /g is not */
a2a5de95 11917 if ((pm->op_pmflags & PMf_CONTINUE) && !(pm->op_pmflags & PMf_GLOBAL))
4ac733c9 11918 {
a2a5de95
NC
11919 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP),
11920 "Use of /c modifier is meaningless without /g" );
4ac733c9
MJD
11921 }
11922
3280af22 11923 PL_lex_op = (OP*)pm;
6154021b 11924 pl_yylval.ival = OP_MATCH;
378cc40b
LW
11925 return s;
11926}
11927
76e3520e 11928STATIC char *
cea2e8a9 11929S_scan_subst(pTHX_ char *start)
79072805 11930{
27da23d5 11931 dVAR;
a0d0e21e 11932 register char *s;
79072805 11933 register PMOP *pm;
4fdae800 11934 I32 first_start;
79072805 11935 I32 es = 0;
5db06880
NC
11936#ifdef PERL_MAD
11937 char *modstart;
11938#endif
79072805 11939
7918f24d
NC
11940 PERL_ARGS_ASSERT_SCAN_SUBST;
11941
6154021b 11942 pl_yylval.ival = OP_NULL;
79072805 11943
5db06880 11944 s = scan_str(start,!!PL_madskills,FALSE);
79072805 11945
37fd879b 11946 if (!s)
cea2e8a9 11947 Perl_croak(aTHX_ "Substitution pattern not terminated");
79072805 11948
3280af22 11949 if (s[-1] == PL_multi_open)
79072805 11950 s--;
5db06880
NC
11951#ifdef PERL_MAD
11952 if (PL_madskills) {
cd81e915
NC
11953 CURMAD('q', PL_thisopen);
11954 CURMAD('_', PL_thiswhite);
11955 CURMAD('E', PL_thisstuff);
11956 CURMAD('Q', PL_thisclose);
11957 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
11958 }
11959#endif
79072805 11960
3280af22 11961 first_start = PL_multi_start;
5db06880 11962 s = scan_str(s,!!PL_madskills,FALSE);
79072805 11963 if (!s) {
37fd879b 11964 if (PL_lex_stuff) {
3280af22 11965 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 11966 PL_lex_stuff = NULL;
37fd879b 11967 }
cea2e8a9 11968 Perl_croak(aTHX_ "Substitution replacement not terminated");
a687059c 11969 }
3280af22 11970 PL_multi_start = first_start; /* so whole substitution is taken together */
2f3197b3 11971
79072805 11972 pm = (PMOP*)newPMOP(OP_SUBST, 0);
5db06880
NC
11973
11974#ifdef PERL_MAD
11975 if (PL_madskills) {
cd81e915
NC
11976 CURMAD('z', PL_thisopen);
11977 CURMAD('R', PL_thisstuff);
11978 CURMAD('Z', PL_thisclose);
5db06880
NC
11979 }
11980 modstart = s;
11981#endif
11982
48c036b1 11983 while (*s) {
a20207d7 11984 if (*s == EXEC_PAT_MOD) {
a687059c 11985 s++;
2f3197b3 11986 es++;
a687059c 11987 }
a20207d7 11988 else if (strchr(S_PAT_MODS, *s))
879d0c72 11989 pm->op_pmflags = S_pmflag(pm->op_pmflags, *s++);
aa78b661
KW
11990 else {
11991 if (isALNUM(*s)) {
11992 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX),
11993 "Having no space between pattern and following word is deprecated");
11994
11995 }
48c036b1 11996 break;
aa78b661 11997 }
378cc40b 11998 }
79072805 11999
5db06880
NC
12000#ifdef PERL_MAD
12001 if (PL_madskills) {
12002 if (modstart != s)
12003 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12004 append_madprops(PL_thismad, (OP*)pm, 0);
12005 PL_thismad = 0;
5db06880
NC
12006 }
12007#endif
a2a5de95
NC
12008 if ((pm->op_pmflags & PMf_CONTINUE)) {
12009 Perl_ck_warner(aTHX_ packWARN(WARN_REGEXP), "Use of /c modifier is meaningless in s///" );
4ac733c9
MJD
12010 }
12011
79072805 12012 if (es) {
6136c704
AL
12013 SV * const repl = newSVpvs("");
12014
0244c3a4
GS
12015 PL_sublex_info.super_bufptr = s;
12016 PL_sublex_info.super_bufend = PL_bufend;
12017 PL_multi_end = 0;
79072805 12018 pm->op_pmflags |= PMf_EVAL;
a5849ce5
NC
12019 while (es-- > 0) {
12020 if (es)
12021 sv_catpvs(repl, "eval ");
12022 else
12023 sv_catpvs(repl, "do ");
12024 }
6f43d98f 12025 sv_catpvs(repl, "{");
3280af22 12026 sv_catsv(repl, PL_lex_repl);
9badc361
RGS
12027 if (strchr(SvPVX(PL_lex_repl), '#'))
12028 sv_catpvs(repl, "\n");
12029 sv_catpvs(repl, "}");
25da4f38 12030 SvEVALED_on(repl);
3280af22
NIS
12031 SvREFCNT_dec(PL_lex_repl);
12032 PL_lex_repl = repl;
378cc40b 12033 }
79072805 12034
3280af22 12035 PL_lex_op = (OP*)pm;
6154021b 12036 pl_yylval.ival = OP_SUBST;
378cc40b
LW
12037 return s;
12038}
12039
76e3520e 12040STATIC char *
cea2e8a9 12041S_scan_trans(pTHX_ char *start)
378cc40b 12042{
97aff369 12043 dVAR;
a0d0e21e 12044 register char* s;
11343788 12045 OP *o;
79072805 12046 short *tbl;
b84c11c8
NC
12047 U8 squash;
12048 U8 del;
12049 U8 complement;
5db06880
NC
12050#ifdef PERL_MAD
12051 char *modstart;
12052#endif
79072805 12053
7918f24d
NC
12054 PERL_ARGS_ASSERT_SCAN_TRANS;
12055
6154021b 12056 pl_yylval.ival = OP_NULL;
79072805 12057
5db06880 12058 s = scan_str(start,!!PL_madskills,FALSE);
37fd879b 12059 if (!s)
cea2e8a9 12060 Perl_croak(aTHX_ "Transliteration pattern not terminated");
5db06880 12061
3280af22 12062 if (s[-1] == PL_multi_open)
2f3197b3 12063 s--;
5db06880
NC
12064#ifdef PERL_MAD
12065 if (PL_madskills) {
cd81e915
NC
12066 CURMAD('q', PL_thisopen);
12067 CURMAD('_', PL_thiswhite);
12068 CURMAD('E', PL_thisstuff);
12069 CURMAD('Q', PL_thisclose);
12070 PL_realtokenstart = s - SvPVX(PL_linestr);
5db06880
NC
12071 }
12072#endif
2f3197b3 12073
5db06880 12074 s = scan_str(s,!!PL_madskills,FALSE);
79072805 12075 if (!s) {
37fd879b 12076 if (PL_lex_stuff) {
3280af22 12077 SvREFCNT_dec(PL_lex_stuff);
a0714e2c 12078 PL_lex_stuff = NULL;
37fd879b 12079 }
cea2e8a9 12080 Perl_croak(aTHX_ "Transliteration replacement not terminated");
a687059c 12081 }
5db06880 12082 if (PL_madskills) {
cd81e915
NC
12083 CURMAD('z', PL_thisopen);
12084 CURMAD('R', PL_thisstuff);
12085 CURMAD('Z', PL_thisclose);
5db06880 12086 }
79072805 12087
a0ed51b3 12088 complement = del = squash = 0;
5db06880
NC
12089#ifdef PERL_MAD
12090 modstart = s;
12091#endif
7a1e2023
NC
12092 while (1) {
12093 switch (*s) {
12094 case 'c':
79072805 12095 complement = OPpTRANS_COMPLEMENT;
7a1e2023
NC
12096 break;
12097 case 'd':
a0ed51b3 12098 del = OPpTRANS_DELETE;
7a1e2023
NC
12099 break;
12100 case 's':
79072805 12101 squash = OPpTRANS_SQUASH;
7a1e2023
NC
12102 break;
12103 default:
12104 goto no_more;
12105 }
395c3793
LW
12106 s++;
12107 }
7a1e2023 12108 no_more:
8973db79 12109
aa1f7c5b 12110 tbl = (short *)PerlMemShared_calloc(complement&&!del?258:256, sizeof(short));
8973db79 12111 o = newPVOP(OP_TRANS, 0, (char*)tbl);
59f00321
RGS
12112 o->op_private &= ~OPpTRANS_ALL;
12113 o->op_private |= del|squash|complement|
7948272d
NIS
12114 (DO_UTF8(PL_lex_stuff)? OPpTRANS_FROM_UTF : 0)|
12115 (DO_UTF8(PL_lex_repl) ? OPpTRANS_TO_UTF : 0);
79072805 12116
3280af22 12117 PL_lex_op = o;
6154021b 12118 pl_yylval.ival = OP_TRANS;
5db06880
NC
12119
12120#ifdef PERL_MAD
12121 if (PL_madskills) {
12122 if (modstart != s)
12123 curmad('m', newSVpvn(modstart, s - modstart));
cd81e915
NC
12124 append_madprops(PL_thismad, o, 0);
12125 PL_thismad = 0;
5db06880
NC
12126 }
12127#endif
12128
79072805
LW
12129 return s;
12130}
12131
76e3520e 12132STATIC char *
cea2e8a9 12133S_scan_heredoc(pTHX_ register char *s)
79072805 12134{
97aff369 12135 dVAR;
79072805
LW
12136 SV *herewas;
12137 I32 op_type = OP_SCALAR;
12138 I32 len;
12139 SV *tmpstr;
12140 char term;
73d840c0 12141 const char *found_newline;
79072805 12142 register char *d;
fc36a67e 12143 register char *e;
4633a7c4 12144 char *peek;
f54cb97a 12145 const int outer = (PL_rsfp && !(PL_lex_inwhat == OP_SCALAR));
5db06880
NC
12146#ifdef PERL_MAD
12147 I32 stuffstart = s - SvPVX(PL_linestr);
12148 char *tstart;
12149
cd81e915 12150 PL_realtokenstart = -1;
5db06880 12151#endif
79072805 12152
7918f24d
NC
12153 PERL_ARGS_ASSERT_SCAN_HEREDOC;
12154
79072805 12155 s += 2;
3280af22
NIS
12156 d = PL_tokenbuf;
12157 e = PL_tokenbuf + sizeof PL_tokenbuf - 1;
fd2d0953 12158 if (!outer)
79072805 12159 *d++ = '\n';
c35e046a
AL
12160 peek = s;
12161 while (SPACE_OR_TAB(*peek))
12162 peek++;
3792a11b 12163 if (*peek == '`' || *peek == '\'' || *peek =='"') {
4633a7c4 12164 s = peek;
79072805 12165 term = *s++;
3280af22 12166 s = delimcpy(d, e, s, PL_bufend, term, &len);
fc36a67e 12167 d += len;
3280af22 12168 if (s < PL_bufend)
79072805 12169 s++;
79072805
LW
12170 }
12171 else {
12172 if (*s == '\\')
12173 s++, term = '\'';
12174 else
12175 term = '"';
7e2040f0 12176 if (!isALNUM_lazy_if(s,UTF))
8ab8f082 12177 deprecate("bare << to mean <<\"\"");
7e2040f0 12178 for (; isALNUM_lazy_if(s,UTF); s++) {
fc36a67e 12179 if (d < e)
12180 *d++ = *s;
12181 }
12182 }
3280af22 12183 if (d >= PL_tokenbuf + sizeof PL_tokenbuf - 1)
cea2e8a9 12184 Perl_croak(aTHX_ "Delimiter for here document is too long");
79072805
LW
12185 *d++ = '\n';
12186 *d = '\0';
3280af22 12187 len = d - PL_tokenbuf;
5db06880
NC
12188
12189#ifdef PERL_MAD
12190 if (PL_madskills) {
12191 tstart = PL_tokenbuf + !outer;
cd81e915 12192 PL_thisclose = newSVpvn(tstart, len - !outer);
5db06880 12193 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915 12194 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12195 stuffstart = s - SvPVX(PL_linestr);
12196 }
12197#endif
6a27c188 12198#ifndef PERL_STRICT_CR
f63a84b2
LW
12199 d = strchr(s, '\r');
12200 if (d) {
b464bac0 12201 char * const olds = s;
f63a84b2 12202 s = d;
3280af22 12203 while (s < PL_bufend) {
f63a84b2
LW
12204 if (*s == '\r') {
12205 *d++ = '\n';
12206 if (*++s == '\n')
12207 s++;
12208 }
12209 else if (*s == '\n' && s[1] == '\r') { /* \015\013 on a mac? */
12210 *d++ = *s++;
12211 s++;
12212 }
12213 else
12214 *d++ = *s++;
12215 }
12216 *d = '\0';
3280af22 12217 PL_bufend = d;
95a20fc0 12218 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2
LW
12219 s = olds;
12220 }
12221#endif
5db06880
NC
12222#ifdef PERL_MAD
12223 found_newline = 0;
12224#endif
10edeb5d 12225 if ( outer || !(found_newline = (char*)memchr((void*)s, '\n', PL_bufend - s)) ) {
73d840c0
AL
12226 herewas = newSVpvn(s,PL_bufend-s);
12227 }
12228 else {
5db06880
NC
12229#ifdef PERL_MAD
12230 herewas = newSVpvn(s-1,found_newline-s+1);
12231#else
73d840c0
AL
12232 s--;
12233 herewas = newSVpvn(s,found_newline-s);
5db06880 12234#endif
73d840c0 12235 }
5db06880
NC
12236#ifdef PERL_MAD
12237 if (PL_madskills) {
12238 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12239 if (PL_thisstuff)
12240 sv_catpvn(PL_thisstuff, tstart, s - tstart);
5db06880 12241 else
cd81e915 12242 PL_thisstuff = newSVpvn(tstart, s - tstart);
5db06880
NC
12243 }
12244#endif
79072805 12245 s += SvCUR(herewas);
748a9306 12246
5db06880
NC
12247#ifdef PERL_MAD
12248 stuffstart = s - SvPVX(PL_linestr);
12249
12250 if (found_newline)
12251 s--;
12252#endif
12253
7d0a29fe
NC
12254 tmpstr = newSV_type(SVt_PVIV);
12255 SvGROW(tmpstr, 80);
748a9306 12256 if (term == '\'') {
79072805 12257 op_type = OP_CONST;
45977657 12258 SvIV_set(tmpstr, -1);
748a9306
LW
12259 }
12260 else if (term == '`') {
79072805 12261 op_type = OP_BACKTICK;
45977657 12262 SvIV_set(tmpstr, '\\');
748a9306 12263 }
79072805
LW
12264
12265 CLINE;
57843af0 12266 PL_multi_start = CopLINE(PL_curcop);
3280af22
NIS
12267 PL_multi_open = PL_multi_close = '<';
12268 term = *PL_tokenbuf;
0244c3a4 12269 if (PL_lex_inwhat == OP_SUBST && PL_in_eval && !PL_rsfp) {
6136c704
AL
12270 char * const bufptr = PL_sublex_info.super_bufptr;
12271 char * const bufend = PL_sublex_info.super_bufend;
b464bac0 12272 char * const olds = s - SvCUR(herewas);
0244c3a4
GS
12273 s = strchr(bufptr, '\n');
12274 if (!s)
12275 s = bufend;
12276 d = s;
12277 while (s < bufend &&
12278 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
12279 if (*s++ == '\n')
57843af0 12280 CopLINE_inc(PL_curcop);
0244c3a4
GS
12281 }
12282 if (s >= bufend) {
eb160463 12283 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
0244c3a4
GS
12284 missingterm(PL_tokenbuf);
12285 }
12286 sv_setpvn(herewas,bufptr,d-bufptr+1);
12287 sv_setpvn(tmpstr,d+1,s-d);
12288 s += len - 1;
12289 sv_catpvn(herewas,s,bufend-s);
95a20fc0 12290 Copy(SvPVX_const(herewas),bufptr,SvCUR(herewas) + 1,char);
0244c3a4
GS
12291
12292 s = olds;
12293 goto retval;
12294 }
12295 else if (!outer) {
79072805 12296 d = s;
3280af22
NIS
12297 while (s < PL_bufend &&
12298 (*s != term || memNE(s,PL_tokenbuf,len)) ) {
79072805 12299 if (*s++ == '\n')
57843af0 12300 CopLINE_inc(PL_curcop);
79072805 12301 }
3280af22 12302 if (s >= PL_bufend) {
eb160463 12303 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12304 missingterm(PL_tokenbuf);
79072805
LW
12305 }
12306 sv_setpvn(tmpstr,d+1,s-d);
5db06880
NC
12307#ifdef PERL_MAD
12308 if (PL_madskills) {
cd81e915
NC
12309 if (PL_thisstuff)
12310 sv_catpvn(PL_thisstuff, d + 1, s - d);
5db06880 12311 else
cd81e915 12312 PL_thisstuff = newSVpvn(d + 1, s - d);
5db06880
NC
12313 stuffstart = s - SvPVX(PL_linestr);
12314 }
12315#endif
79072805 12316 s += len - 1;
57843af0 12317 CopLINE_inc(PL_curcop); /* the preceding stmt passes a newline */
49d8d3a1 12318
3280af22
NIS
12319 sv_catpvn(herewas,s,PL_bufend-s);
12320 sv_setsv(PL_linestr,herewas);
12321 PL_oldoldbufptr = PL_oldbufptr = PL_bufptr = s = PL_linestart = SvPVX(PL_linestr);
12322 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12323 PL_last_lop = PL_last_uni = NULL;
79072805
LW
12324 }
12325 else
76f68e9b 12326 sv_setpvs(tmpstr,""); /* avoid "uninitialized" warning */
3280af22 12327 while (s >= PL_bufend) { /* multiple line string? */
5db06880
NC
12328#ifdef PERL_MAD
12329 if (PL_madskills) {
12330 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12331 if (PL_thisstuff)
12332 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12333 else
cd81e915 12334 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12335 }
12336#endif
f0e67a1d 12337 PL_bufptr = s;
17cc9359 12338 CopLINE_inc(PL_curcop);
f0e67a1d 12339 if (!outer || !lex_next_chunk(0)) {
eb160463 12340 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
3280af22 12341 missingterm(PL_tokenbuf);
79072805 12342 }
17cc9359 12343 CopLINE_dec(PL_curcop);
f0e67a1d 12344 s = PL_bufptr;
5db06880
NC
12345#ifdef PERL_MAD
12346 stuffstart = s - SvPVX(PL_linestr);
12347#endif
57843af0 12348 CopLINE_inc(PL_curcop);
3280af22 12349 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
bd61b366 12350 PL_last_lop = PL_last_uni = NULL;
6a27c188 12351#ifndef PERL_STRICT_CR
3280af22 12352 if (PL_bufend - PL_linestart >= 2) {
a1529941
NIS
12353 if ((PL_bufend[-2] == '\r' && PL_bufend[-1] == '\n') ||
12354 (PL_bufend[-2] == '\n' && PL_bufend[-1] == '\r'))
c6f14548 12355 {
3280af22
NIS
12356 PL_bufend[-2] = '\n';
12357 PL_bufend--;
95a20fc0 12358 SvCUR_set(PL_linestr, PL_bufend - SvPVX_const(PL_linestr));
f63a84b2 12359 }
3280af22
NIS
12360 else if (PL_bufend[-1] == '\r')
12361 PL_bufend[-1] = '\n';
f63a84b2 12362 }
3280af22
NIS
12363 else if (PL_bufend - PL_linestart == 1 && PL_bufend[-1] == '\r')
12364 PL_bufend[-1] = '\n';
f63a84b2 12365#endif
3280af22 12366 if (*s == term && memEQ(s,PL_tokenbuf,len)) {
95a20fc0 12367 STRLEN off = PL_bufend - 1 - SvPVX_const(PL_linestr);
1de9afcd 12368 *(SvPVX(PL_linestr) + off ) = ' ';
3280af22
NIS
12369 sv_catsv(PL_linestr,herewas);
12370 PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
1de9afcd 12371 s = SvPVX(PL_linestr) + off; /* In case PV of PL_linestr moved. */
79072805
LW
12372 }
12373 else {
3280af22
NIS
12374 s = PL_bufend;
12375 sv_catsv(tmpstr,PL_linestr);
395c3793
LW
12376 }
12377 }
79072805 12378 s++;
0244c3a4 12379retval:
57843af0 12380 PL_multi_end = CopLINE(PL_curcop);
79072805 12381 if (SvCUR(tmpstr) + 5 < SvLEN(tmpstr)) {
1da4ca5f 12382 SvPV_shrink_to_cur(tmpstr);
79072805 12383 }
8990e307 12384 SvREFCNT_dec(herewas);
2f31ce75 12385 if (!IN_BYTES) {
95a20fc0 12386 if (UTF && is_utf8_string((U8*)SvPVX_const(tmpstr), SvCUR(tmpstr)))
2f31ce75
JH
12387 SvUTF8_on(tmpstr);
12388 else if (PL_encoding)
12389 sv_recode_to_utf8(tmpstr, PL_encoding);
12390 }
3280af22 12391 PL_lex_stuff = tmpstr;
6154021b 12392 pl_yylval.ival = op_type;
79072805
LW
12393 return s;
12394}
12395
02aa26ce
NT
12396/* scan_inputsymbol
12397 takes: current position in input buffer
12398 returns: new position in input buffer
6154021b 12399 side-effects: pl_yylval and lex_op are set.
02aa26ce
NT
12400
12401 This code handles:
12402
12403 <> read from ARGV
12404 <FH> read from filehandle
12405 <pkg::FH> read from package qualified filehandle
12406 <pkg'FH> read from package qualified filehandle
12407 <$fh> read from filehandle in $fh
12408 <*.h> filename glob
12409
12410*/
12411
76e3520e 12412STATIC char *
cea2e8a9 12413S_scan_inputsymbol(pTHX_ char *start)
79072805 12414{
97aff369 12415 dVAR;
02aa26ce 12416 register char *s = start; /* current position in buffer */
1b420867 12417 char *end;
79072805 12418 I32 len;
6136c704
AL
12419 char *d = PL_tokenbuf; /* start of temp holding space */
12420 const char * const e = PL_tokenbuf + sizeof PL_tokenbuf; /* end of temp holding space */
12421
7918f24d
NC
12422 PERL_ARGS_ASSERT_SCAN_INPUTSYMBOL;
12423
1b420867
GS
12424 end = strchr(s, '\n');
12425 if (!end)
12426 end = PL_bufend;
12427 s = delimcpy(d, e, s + 1, end, '>', &len); /* extract until > */
02aa26ce
NT
12428
12429 /* die if we didn't have space for the contents of the <>,
1b420867 12430 or if it didn't end, or if we see a newline
02aa26ce
NT
12431 */
12432
bb7a0f54 12433 if (len >= (I32)sizeof PL_tokenbuf)
cea2e8a9 12434 Perl_croak(aTHX_ "Excessively long <> operator");
1b420867 12435 if (s >= end)
cea2e8a9 12436 Perl_croak(aTHX_ "Unterminated <> operator");
02aa26ce 12437
fc36a67e 12438 s++;
02aa26ce
NT
12439
12440 /* check for <$fh>
12441 Remember, only scalar variables are interpreted as filehandles by
12442 this code. Anything more complex (e.g., <$fh{$num}>) will be
12443 treated as a glob() call.
12444 This code makes use of the fact that except for the $ at the front,
12445 a scalar variable and a filehandle look the same.
12446 */
4633a7c4 12447 if (*d == '$' && d[1]) d++;
02aa26ce
NT
12448
12449 /* allow <Pkg'VALUE> or <Pkg::VALUE> */
7e2040f0 12450 while (*d && (isALNUM_lazy_if(d,UTF) || *d == '\'' || *d == ':'))
79072805 12451 d++;
02aa26ce
NT
12452
12453 /* If we've tried to read what we allow filehandles to look like, and
12454 there's still text left, then it must be a glob() and not a getline.
12455 Use scan_str to pull out the stuff between the <> and treat it
12456 as nothing more than a string.
12457 */
12458
3280af22 12459 if (d - PL_tokenbuf != len) {
6154021b 12460 pl_yylval.ival = OP_GLOB;
5db06880 12461 s = scan_str(start,!!PL_madskills,FALSE);
79072805 12462 if (!s)
cea2e8a9 12463 Perl_croak(aTHX_ "Glob not terminated");
79072805
LW
12464 return s;
12465 }
395c3793 12466 else {
9b3023bc 12467 bool readline_overriden = FALSE;
6136c704 12468 GV *gv_readline;
9b3023bc 12469 GV **gvp;
02aa26ce 12470 /* we're in a filehandle read situation */
3280af22 12471 d = PL_tokenbuf;
02aa26ce
NT
12472
12473 /* turn <> into <ARGV> */
79072805 12474 if (!len)
689badd5 12475 Copy("ARGV",d,5,char);
02aa26ce 12476
9b3023bc 12477 /* Check whether readline() is overriden */
fafc274c 12478 gv_readline = gv_fetchpvs("readline", GV_NOTQUAL, SVt_PVCV);
6136c704 12479 if ((gv_readline
ba979b31 12480 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline))
9b3023bc 12481 ||
017a3ce5 12482 ((gvp = (GV**)hv_fetchs(PL_globalstash, "readline", FALSE))
9e0d86f8 12483 && (gv_readline = *gvp) && isGV_with_GP(gv_readline)
ba979b31 12484 && GvCVu(gv_readline) && GvIMPORTED_CV(gv_readline)))
9b3023bc
RGS
12485 readline_overriden = TRUE;
12486
02aa26ce
NT
12487 /* if <$fh>, create the ops to turn the variable into a
12488 filehandle
12489 */
79072805 12490 if (*d == '$') {
02aa26ce
NT
12491 /* try to find it in the pad for this block, otherwise find
12492 add symbol table ops
12493 */
f8f98e0a 12494 const PADOFFSET tmp = pad_findmy(d, len, 0);
bbd11bfc 12495 if (tmp != NOT_IN_PAD) {
00b1698f 12496 if (PAD_COMPNAME_FLAGS_isOUR(tmp)) {
6136c704
AL
12497 HV * const stash = PAD_COMPNAME_OURSTASH(tmp);
12498 HEK * const stashname = HvNAME_HEK(stash);
12499 SV * const sym = sv_2mortal(newSVhek(stashname));
396482e1 12500 sv_catpvs(sym, "::");
f558d5af
JH
12501 sv_catpv(sym, d+1);
12502 d = SvPVX(sym);
12503 goto intro_sym;
12504 }
12505 else {
6136c704 12506 OP * const o = newOP(OP_PADSV, 0);
f558d5af 12507 o->op_targ = tmp;
9b3023bc
RGS
12508 PL_lex_op = readline_overriden
12509 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12510 append_elem(OP_LIST, o,
12511 newCVREF(0, newGVOP(OP_GV,0,gv_readline))))
12512 : (OP*)newUNOP(OP_READLINE, 0, o);
f558d5af 12513 }
a0d0e21e
LW
12514 }
12515 else {
f558d5af
JH
12516 GV *gv;
12517 ++d;
12518intro_sym:
12519 gv = gv_fetchpv(d,
12520 (PL_in_eval
12521 ? (GV_ADDMULTI | GV_ADDINEVAL)
bea70d1e 12522 : GV_ADDMULTI),
f558d5af 12523 SVt_PV);
9b3023bc
RGS
12524 PL_lex_op = readline_overriden
12525 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12526 append_elem(OP_LIST,
12527 newUNOP(OP_RV2SV, 0, newGVOP(OP_GV, 0, gv)),
12528 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12529 : (OP*)newUNOP(OP_READLINE, 0,
12530 newUNOP(OP_RV2SV, 0,
12531 newGVOP(OP_GV, 0, gv)));
a0d0e21e 12532 }
7c6fadd6
RGS
12533 if (!readline_overriden)
12534 PL_lex_op->op_flags |= OPf_SPECIAL;
6154021b
RGS
12535 /* we created the ops in PL_lex_op, so make pl_yylval.ival a null op */
12536 pl_yylval.ival = OP_NULL;
79072805 12537 }
02aa26ce
NT
12538
12539 /* If it's none of the above, it must be a literal filehandle
12540 (<Foo::BAR> or <FOO>) so build a simple readline OP */
79072805 12541 else {
6136c704 12542 GV * const gv = gv_fetchpv(d, GV_ADD, SVt_PVIO);
9b3023bc
RGS
12543 PL_lex_op = readline_overriden
12544 ? (OP*)newUNOP(OP_ENTERSUB, OPf_STACKED,
12545 append_elem(OP_LIST,
12546 newGVOP(OP_GV, 0, gv),
12547 newCVREF(0, newGVOP(OP_GV, 0, gv_readline))))
12548 : (OP*)newUNOP(OP_READLINE, 0, newGVOP(OP_GV, 0, gv));
6154021b 12549 pl_yylval.ival = OP_NULL;
79072805
LW
12550 }
12551 }
02aa26ce 12552
79072805
LW
12553 return s;
12554}
12555
02aa26ce
NT
12556
12557/* scan_str
12558 takes: start position in buffer
09bef843
SB
12559 keep_quoted preserve \ on the embedded delimiter(s)
12560 keep_delims preserve the delimiters around the string
02aa26ce
NT
12561 returns: position to continue reading from buffer
12562 side-effects: multi_start, multi_close, lex_repl or lex_stuff, and
12563 updates the read buffer.
12564
12565 This subroutine pulls a string out of the input. It is called for:
12566 q single quotes q(literal text)
12567 ' single quotes 'literal text'
12568 qq double quotes qq(interpolate $here please)
12569 " double quotes "interpolate $here please"
12570 qx backticks qx(/bin/ls -l)
12571 ` backticks `/bin/ls -l`
12572 qw quote words @EXPORT_OK = qw( func() $spam )
12573 m// regexp match m/this/
12574 s/// regexp substitute s/this/that/
12575 tr/// string transliterate tr/this/that/
12576 y/// string transliterate y/this/that/
12577 ($*@) sub prototypes sub foo ($)
09bef843 12578 (stuff) sub attr parameters sub foo : attr(stuff)
02aa26ce
NT
12579 <> readline or globs <FOO>, <>, <$fh>, or <*.c>
12580
12581 In most of these cases (all but <>, patterns and transliterate)
12582 yylex() calls scan_str(). m// makes yylex() call scan_pat() which
12583 calls scan_str(). s/// makes yylex() call scan_subst() which calls
12584 scan_str(). tr/// and y/// make yylex() call scan_trans() which
12585 calls scan_str().
4e553d73 12586
02aa26ce
NT
12587 It skips whitespace before the string starts, and treats the first
12588 character as the delimiter. If the delimiter is one of ([{< then
12589 the corresponding "close" character )]}> is used as the closing
12590 delimiter. It allows quoting of delimiters, and if the string has
12591 balanced delimiters ([{<>}]) it allows nesting.
12592
37fd879b
HS
12593 On success, the SV with the resulting string is put into lex_stuff or,
12594 if that is already non-NULL, into lex_repl. The second case occurs only
12595 when parsing the RHS of the special constructs s/// and tr/// (y///).
12596 For convenience, the terminating delimiter character is stuffed into
12597 SvIVX of the SV.
02aa26ce
NT
12598*/
12599
76e3520e 12600STATIC char *
09bef843 12601S_scan_str(pTHX_ char *start, int keep_quoted, int keep_delims)
79072805 12602{
97aff369 12603 dVAR;
02aa26ce 12604 SV *sv; /* scalar value: string */
d3fcec1f 12605 const char *tmps; /* temp string, used for delimiter matching */
02aa26ce
NT
12606 register char *s = start; /* current position in the buffer */
12607 register char term; /* terminating character */
12608 register char *to; /* current position in the sv's data */
12609 I32 brackets = 1; /* bracket nesting level */
89491803 12610 bool has_utf8 = FALSE; /* is there any utf8 content? */
220e2d4e 12611 I32 termcode; /* terminating char. code */
89ebb4a3 12612 U8 termstr[UTF8_MAXBYTES]; /* terminating string */
220e2d4e 12613 STRLEN termlen; /* length of terminating string */
0331ef07 12614 int last_off = 0; /* last position for nesting bracket */
5db06880
NC
12615#ifdef PERL_MAD
12616 int stuffstart;
12617 char *tstart;
12618#endif
02aa26ce 12619
7918f24d
NC
12620 PERL_ARGS_ASSERT_SCAN_STR;
12621
02aa26ce 12622 /* skip space before the delimiter */
29595ff2
NC
12623 if (isSPACE(*s)) {
12624 s = PEEKSPACE(s);
12625 }
02aa26ce 12626
5db06880 12627#ifdef PERL_MAD
cd81e915
NC
12628 if (PL_realtokenstart >= 0) {
12629 stuffstart = PL_realtokenstart;
12630 PL_realtokenstart = -1;
5db06880
NC
12631 }
12632 else
12633 stuffstart = start - SvPVX(PL_linestr);
12634#endif
02aa26ce 12635 /* mark where we are, in case we need to report errors */
79072805 12636 CLINE;
02aa26ce
NT
12637
12638 /* after skipping whitespace, the next character is the terminator */
a0d0e21e 12639 term = *s;
220e2d4e
IH
12640 if (!UTF) {
12641 termcode = termstr[0] = term;
12642 termlen = 1;
12643 }
12644 else {
f3b9ce0f 12645 termcode = utf8_to_uvchr((U8*)s, &termlen);
220e2d4e
IH
12646 Copy(s, termstr, termlen, U8);
12647 if (!UTF8_IS_INVARIANT(term))
12648 has_utf8 = TRUE;
12649 }
b1c7b182 12650
02aa26ce 12651 /* mark where we are */
57843af0 12652 PL_multi_start = CopLINE(PL_curcop);
3280af22 12653 PL_multi_open = term;
02aa26ce
NT
12654
12655 /* find corresponding closing delimiter */
93a17b20 12656 if (term && (tmps = strchr("([{< )]}> )]}>",term)))
220e2d4e
IH
12657 termcode = termstr[0] = term = tmps[5];
12658
3280af22 12659 PL_multi_close = term;
79072805 12660
561b68a9
SH
12661 /* create a new SV to hold the contents. 79 is the SV's initial length.
12662 What a random number. */
7d0a29fe
NC
12663 sv = newSV_type(SVt_PVIV);
12664 SvGROW(sv, 80);
45977657 12665 SvIV_set(sv, termcode);
a0d0e21e 12666 (void)SvPOK_only(sv); /* validate pointer */
02aa26ce
NT
12667
12668 /* move past delimiter and try to read a complete string */
09bef843 12669 if (keep_delims)
220e2d4e
IH
12670 sv_catpvn(sv, s, termlen);
12671 s += termlen;
5db06880
NC
12672#ifdef PERL_MAD
12673 tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12674 if (!PL_thisopen && !keep_delims) {
12675 PL_thisopen = newSVpvn(tstart, s - tstart);
5db06880
NC
12676 stuffstart = s - SvPVX(PL_linestr);
12677 }
12678#endif
93a17b20 12679 for (;;) {
220e2d4e
IH
12680 if (PL_encoding && !UTF) {
12681 bool cont = TRUE;
12682
12683 while (cont) {
95a20fc0 12684 int offset = s - SvPVX_const(PL_linestr);
66a1b24b 12685 const bool found = sv_cat_decode(sv, PL_encoding, PL_linestr,
f3b9ce0f 12686 &offset, (char*)termstr, termlen);
6136c704
AL
12687 const char * const ns = SvPVX_const(PL_linestr) + offset;
12688 char * const svlast = SvEND(sv) - 1;
220e2d4e
IH
12689
12690 for (; s < ns; s++) {
12691 if (*s == '\n' && !PL_rsfp)
12692 CopLINE_inc(PL_curcop);
12693 }
12694 if (!found)
12695 goto read_more_line;
12696 else {
12697 /* handle quoted delimiters */
52327caf 12698 if (SvCUR(sv) > 1 && *(svlast-1) == '\\') {
f54cb97a 12699 const char *t;
95a20fc0 12700 for (t = svlast-2; t >= SvPVX_const(sv) && *t == '\\';)
220e2d4e
IH
12701 t--;
12702 if ((svlast-1 - t) % 2) {
12703 if (!keep_quoted) {
12704 *(svlast-1) = term;
12705 *svlast = '\0';
12706 SvCUR_set(sv, SvCUR(sv) - 1);
12707 }
12708 continue;
12709 }
12710 }
12711 if (PL_multi_open == PL_multi_close) {
12712 cont = FALSE;
12713 }
12714 else {
f54cb97a
AL
12715 const char *t;
12716 char *w;
0331ef07 12717 for (t = w = SvPVX(sv)+last_off; t < svlast; w++, t++) {
220e2d4e
IH
12718 /* At here, all closes are "was quoted" one,
12719 so we don't check PL_multi_close. */
12720 if (*t == '\\') {
12721 if (!keep_quoted && *(t+1) == PL_multi_open)
12722 t++;
12723 else
12724 *w++ = *t++;
12725 }
12726 else if (*t == PL_multi_open)
12727 brackets++;
12728
12729 *w = *t;
12730 }
12731 if (w < t) {
12732 *w++ = term;
12733 *w = '\0';
95a20fc0 12734 SvCUR_set(sv, w - SvPVX_const(sv));
220e2d4e 12735 }
0331ef07 12736 last_off = w - SvPVX(sv);
220e2d4e
IH
12737 if (--brackets <= 0)
12738 cont = FALSE;
12739 }
12740 }
12741 }
12742 if (!keep_delims) {
12743 SvCUR_set(sv, SvCUR(sv) - 1);
12744 *SvEND(sv) = '\0';
12745 }
12746 break;
12747 }
12748
02aa26ce 12749 /* extend sv if need be */
3280af22 12750 SvGROW(sv, SvCUR(sv) + (PL_bufend - s) + 1);
02aa26ce 12751 /* set 'to' to the next character in the sv's string */
463ee0b2 12752 to = SvPVX(sv)+SvCUR(sv);
09bef843 12753
02aa26ce 12754 /* if open delimiter is the close delimiter read unbridle */
3280af22
NIS
12755 if (PL_multi_open == PL_multi_close) {
12756 for (; s < PL_bufend; s++,to++) {
02aa26ce 12757 /* embedded newlines increment the current line number */
3280af22 12758 if (*s == '\n' && !PL_rsfp)
57843af0 12759 CopLINE_inc(PL_curcop);
02aa26ce 12760 /* handle quoted delimiters */
3280af22 12761 if (*s == '\\' && s+1 < PL_bufend && term != '\\') {
09bef843 12762 if (!keep_quoted && s[1] == term)
a0d0e21e 12763 s++;
02aa26ce 12764 /* any other quotes are simply copied straight through */
a0d0e21e
LW
12765 else
12766 *to++ = *s++;
12767 }
02aa26ce
NT
12768 /* terminate when run out of buffer (the for() condition), or
12769 have found the terminator */
220e2d4e
IH
12770 else if (*s == term) {
12771 if (termlen == 1)
12772 break;
f3b9ce0f 12773 if (s+termlen <= PL_bufend && memEQ(s, (char*)termstr, termlen))
220e2d4e
IH
12774 break;
12775 }
63cd0674 12776 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12777 has_utf8 = TRUE;
93a17b20
LW
12778 *to = *s;
12779 }
12780 }
02aa26ce
NT
12781
12782 /* if the terminator isn't the same as the start character (e.g.,
12783 matched brackets), we have to allow more in the quoting, and
12784 be prepared for nested brackets.
12785 */
93a17b20 12786 else {
02aa26ce 12787 /* read until we run out of string, or we find the terminator */
3280af22 12788 for (; s < PL_bufend; s++,to++) {
02aa26ce 12789 /* embedded newlines increment the line count */
3280af22 12790 if (*s == '\n' && !PL_rsfp)
57843af0 12791 CopLINE_inc(PL_curcop);
02aa26ce 12792 /* backslashes can escape the open or closing characters */
3280af22 12793 if (*s == '\\' && s+1 < PL_bufend) {
09bef843
SB
12794 if (!keep_quoted &&
12795 ((s[1] == PL_multi_open) || (s[1] == PL_multi_close)))
a0d0e21e
LW
12796 s++;
12797 else
12798 *to++ = *s++;
12799 }
02aa26ce 12800 /* allow nested opens and closes */
3280af22 12801 else if (*s == PL_multi_close && --brackets <= 0)
93a17b20 12802 break;
3280af22 12803 else if (*s == PL_multi_open)
93a17b20 12804 brackets++;
63cd0674 12805 else if (!has_utf8 && !UTF8_IS_INVARIANT((U8)*s) && UTF)
89491803 12806 has_utf8 = TRUE;
93a17b20
LW
12807 *to = *s;
12808 }
12809 }
02aa26ce 12810 /* terminate the copied string and update the sv's end-of-string */
93a17b20 12811 *to = '\0';
95a20fc0 12812 SvCUR_set(sv, to - SvPVX_const(sv));
93a17b20 12813
02aa26ce
NT
12814 /*
12815 * this next chunk reads more into the buffer if we're not done yet
12816 */
12817
b1c7b182
GS
12818 if (s < PL_bufend)
12819 break; /* handle case where we are done yet :-) */
79072805 12820
6a27c188 12821#ifndef PERL_STRICT_CR
95a20fc0 12822 if (to - SvPVX_const(sv) >= 2) {
c6f14548
GS
12823 if ((to[-2] == '\r' && to[-1] == '\n') ||
12824 (to[-2] == '\n' && to[-1] == '\r'))
12825 {
f63a84b2
LW
12826 to[-2] = '\n';
12827 to--;
95a20fc0 12828 SvCUR_set(sv, to - SvPVX_const(sv));
f63a84b2
LW
12829 }
12830 else if (to[-1] == '\r')
12831 to[-1] = '\n';
12832 }
95a20fc0 12833 else if (to - SvPVX_const(sv) == 1 && to[-1] == '\r')
f63a84b2
LW
12834 to[-1] = '\n';
12835#endif
12836
220e2d4e 12837 read_more_line:
02aa26ce
NT
12838 /* if we're out of file, or a read fails, bail and reset the current
12839 line marker so we can report where the unterminated string began
12840 */
5db06880
NC
12841#ifdef PERL_MAD
12842 if (PL_madskills) {
c35e046a 12843 char * const tstart = SvPVX(PL_linestr) + stuffstart;
cd81e915
NC
12844 if (PL_thisstuff)
12845 sv_catpvn(PL_thisstuff, tstart, PL_bufend - tstart);
5db06880 12846 else
cd81e915 12847 PL_thisstuff = newSVpvn(tstart, PL_bufend - tstart);
5db06880
NC
12848 }
12849#endif
f0e67a1d
Z
12850 CopLINE_inc(PL_curcop);
12851 PL_bufptr = PL_bufend;
12852 if (!lex_next_chunk(0)) {
c07a80fd 12853 sv_free(sv);
eb160463 12854 CopLINE_set(PL_curcop, (line_t)PL_multi_start);
bd61b366 12855 return NULL;
79072805 12856 }
f0e67a1d 12857 s = PL_bufptr;
5db06880
NC
12858#ifdef PERL_MAD
12859 stuffstart = 0;
12860#endif
378cc40b 12861 }
4e553d73 12862
02aa26ce
NT
12863 /* at this point, we have successfully read the delimited string */
12864
220e2d4e 12865 if (!PL_encoding || UTF) {
5db06880
NC
12866#ifdef PERL_MAD
12867 if (PL_madskills) {
c35e046a 12868 char * const tstart = SvPVX(PL_linestr) + stuffstart;
29522234 12869 const int len = s - tstart;
cd81e915 12870 if (PL_thisstuff)
c35e046a 12871 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12872 else
c35e046a 12873 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12874 if (!PL_thisclose && !keep_delims)
12875 PL_thisclose = newSVpvn(s,termlen);
5db06880
NC
12876 }
12877#endif
12878
220e2d4e
IH
12879 if (keep_delims)
12880 sv_catpvn(sv, s, termlen);
12881 s += termlen;
12882 }
5db06880
NC
12883#ifdef PERL_MAD
12884 else {
12885 if (PL_madskills) {
c35e046a
AL
12886 char * const tstart = SvPVX(PL_linestr) + stuffstart;
12887 const int len = s - tstart - termlen;
cd81e915 12888 if (PL_thisstuff)
c35e046a 12889 sv_catpvn(PL_thisstuff, tstart, len);
5db06880 12890 else
c35e046a 12891 PL_thisstuff = newSVpvn(tstart, len);
cd81e915
NC
12892 if (!PL_thisclose && !keep_delims)
12893 PL_thisclose = newSVpvn(s - termlen,termlen);
5db06880
NC
12894 }
12895 }
12896#endif
220e2d4e 12897 if (has_utf8 || PL_encoding)
b1c7b182 12898 SvUTF8_on(sv);
d0063567 12899
57843af0 12900 PL_multi_end = CopLINE(PL_curcop);
02aa26ce
NT
12901
12902 /* if we allocated too much space, give some back */
93a17b20
LW
12903 if (SvCUR(sv) + 5 < SvLEN(sv)) {
12904 SvLEN_set(sv, SvCUR(sv) + 1);
b7e9a5c2 12905 SvPV_renew(sv, SvLEN(sv));
79072805 12906 }
02aa26ce
NT
12907
12908 /* decide whether this is the first or second quoted string we've read
12909 for this op
12910 */
4e553d73 12911
3280af22
NIS
12912 if (PL_lex_stuff)
12913 PL_lex_repl = sv;
79072805 12914 else
3280af22 12915 PL_lex_stuff = sv;
378cc40b
LW
12916 return s;
12917}
12918
02aa26ce
NT
12919/*
12920 scan_num
12921 takes: pointer to position in buffer
12922 returns: pointer to new position in buffer
6154021b 12923 side-effects: builds ops for the constant in pl_yylval.op
02aa26ce
NT
12924
12925 Read a number in any of the formats that Perl accepts:
12926
7fd134d9
JH
12927 \d(_?\d)*(\.(\d(_?\d)*)?)?[Ee][\+\-]?(\d(_?\d)*) 12 12.34 12.
12928 \.\d(_?\d)*[Ee][\+\-]?(\d(_?\d)*) .34
24138b49
JH
12929 0b[01](_?[01])*
12930 0[0-7](_?[0-7])*
12931 0x[0-9A-Fa-f](_?[0-9A-Fa-f])*
02aa26ce 12932
3280af22 12933 Like most scan_ routines, it uses the PL_tokenbuf buffer to hold the
02aa26ce
NT
12934 thing it reads.
12935
12936 If it reads a number without a decimal point or an exponent, it will
12937 try converting the number to an integer and see if it can do so
12938 without loss of precision.
12939*/
4e553d73 12940
378cc40b 12941char *
bfed75c6 12942Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
378cc40b 12943{
97aff369 12944 dVAR;
bfed75c6 12945 register const char *s = start; /* current position in buffer */
02aa26ce
NT
12946 register char *d; /* destination in temp buffer */
12947 register char *e; /* end of temp buffer */
86554af2 12948 NV nv; /* number read, as a double */
a0714e2c 12949 SV *sv = NULL; /* place to put the converted number */
a86a20aa 12950 bool floatit; /* boolean: int or float? */
cbbf8932 12951 const char *lastub = NULL; /* position of last underbar */
bfed75c6 12952 static char const number_too_long[] = "Number too long";
378cc40b 12953
7918f24d
NC
12954 PERL_ARGS_ASSERT_SCAN_NUM;
12955
02aa26ce
NT
12956 /* We use the first character to decide what type of number this is */
12957
378cc40b 12958 switch (*s) {
79072805 12959 default:
cea2e8a9 12960 Perl_croak(aTHX_ "panic: scan_num");
4e553d73 12961
02aa26ce 12962 /* if it starts with a 0, it could be an octal number, a decimal in
a7cb1f99 12963 0.13 disguise, or a hexadecimal number, or a binary number. */
378cc40b
LW
12964 case '0':
12965 {
02aa26ce
NT
12966 /* variables:
12967 u holds the "number so far"
4f19785b
WSI
12968 shift the power of 2 of the base
12969 (hex == 4, octal == 3, binary == 1)
02aa26ce
NT
12970 overflowed was the number more than we can hold?
12971
12972 Shift is used when we add a digit. It also serves as an "are
4f19785b
WSI
12973 we in octal/hex/binary?" indicator to disallow hex characters
12974 when in octal mode.
02aa26ce 12975 */
9e24b6e2
JH
12976 NV n = 0.0;
12977 UV u = 0;
79072805 12978 I32 shift;
9e24b6e2 12979 bool overflowed = FALSE;
61f33854 12980 bool just_zero = TRUE; /* just plain 0 or binary number? */
27da23d5
JH
12981 static const NV nvshift[5] = { 1.0, 2.0, 4.0, 8.0, 16.0 };
12982 static const char* const bases[5] =
12983 { "", "binary", "", "octal", "hexadecimal" };
12984 static const char* const Bases[5] =
12985 { "", "Binary", "", "Octal", "Hexadecimal" };
12986 static const char* const maxima[5] =
12987 { "",
12988 "0b11111111111111111111111111111111",
12989 "",
12990 "037777777777",
12991 "0xffffffff" };
bfed75c6 12992 const char *base, *Base, *max;
378cc40b 12993
02aa26ce 12994 /* check for hex */
a674e8db 12995 if (s[1] == 'x' || s[1] == 'X') {
378cc40b
LW
12996 shift = 4;
12997 s += 2;
61f33854 12998 just_zero = FALSE;
a674e8db 12999 } else if (s[1] == 'b' || s[1] == 'B') {
4f19785b
WSI
13000 shift = 1;
13001 s += 2;
61f33854 13002 just_zero = FALSE;
378cc40b 13003 }
02aa26ce 13004 /* check for a decimal in disguise */
b78218b7 13005 else if (s[1] == '.' || s[1] == 'e' || s[1] == 'E')
378cc40b 13006 goto decimal;
02aa26ce 13007 /* so it must be octal */
928753ea 13008 else {
378cc40b 13009 shift = 3;
928753ea
JH
13010 s++;
13011 }
13012
13013 if (*s == '_') {
a2a5de95 13014 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
928753ea
JH
13015 "Misplaced _ in number");
13016 lastub = s++;
13017 }
9e24b6e2
JH
13018
13019 base = bases[shift];
13020 Base = Bases[shift];
13021 max = maxima[shift];
02aa26ce 13022
4f19785b 13023 /* read the rest of the number */
378cc40b 13024 for (;;) {
9e24b6e2 13025 /* x is used in the overflow test,
893fe2c2 13026 b is the digit we're adding on. */
9e24b6e2 13027 UV x, b;
55497cff 13028
378cc40b 13029 switch (*s) {
02aa26ce
NT
13030
13031 /* if we don't mention it, we're done */
378cc40b
LW
13032 default:
13033 goto out;
02aa26ce 13034
928753ea 13035 /* _ are ignored -- but warned about if consecutive */
de3bb511 13036 case '_':
a2a5de95
NC
13037 if (lastub && s == lastub + 1)
13038 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13039 "Misplaced _ in number");
928753ea 13040 lastub = s++;
de3bb511 13041 break;
02aa26ce
NT
13042
13043 /* 8 and 9 are not octal */
378cc40b 13044 case '8': case '9':
4f19785b 13045 if (shift == 3)
cea2e8a9 13046 yyerror(Perl_form(aTHX_ "Illegal octal digit '%c'", *s));
378cc40b 13047 /* FALL THROUGH */
02aa26ce
NT
13048
13049 /* octal digits */
4f19785b 13050 case '2': case '3': case '4':
378cc40b 13051 case '5': case '6': case '7':
4f19785b 13052 if (shift == 1)
cea2e8a9 13053 yyerror(Perl_form(aTHX_ "Illegal binary digit '%c'", *s));
4f19785b
WSI
13054 /* FALL THROUGH */
13055
13056 case '0': case '1':
02aa26ce 13057 b = *s++ & 15; /* ASCII digit -> value of digit */
55497cff 13058 goto digit;
02aa26ce
NT
13059
13060 /* hex digits */
378cc40b
LW
13061 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
13062 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
02aa26ce 13063 /* make sure they said 0x */
378cc40b
LW
13064 if (shift != 4)
13065 goto out;
55497cff 13066 b = (*s++ & 7) + 9;
02aa26ce
NT
13067
13068 /* Prepare to put the digit we have onto the end
13069 of the number so far. We check for overflows.
13070 */
13071
55497cff 13072 digit:
61f33854 13073 just_zero = FALSE;
9e24b6e2
JH
13074 if (!overflowed) {
13075 x = u << shift; /* make room for the digit */
13076
13077 if ((x >> shift) != u
13078 && !(PL_hints & HINT_NEW_BINARY)) {
9e24b6e2
JH
13079 overflowed = TRUE;
13080 n = (NV) u;
9b387841
NC
13081 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13082 "Integer overflow in %s number",
13083 base);
9e24b6e2
JH
13084 } else
13085 u = x | b; /* add the digit to the end */
13086 }
13087 if (overflowed) {
13088 n *= nvshift[shift];
13089 /* If an NV has not enough bits in its
13090 * mantissa to represent an UV this summing of
13091 * small low-order numbers is a waste of time
13092 * (because the NV cannot preserve the
13093 * low-order bits anyway): we could just
13094 * remember when did we overflow and in the
13095 * end just multiply n by the right
13096 * amount. */
13097 n += (NV) b;
55497cff 13098 }
378cc40b
LW
13099 break;
13100 }
13101 }
02aa26ce
NT
13102
13103 /* if we get here, we had success: make a scalar value from
13104 the number.
13105 */
378cc40b 13106 out:
928753ea
JH
13107
13108 /* final misplaced underbar check */
13109 if (s[-1] == '_') {
a2a5de95 13110 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
928753ea
JH
13111 }
13112
9e24b6e2 13113 if (overflowed) {
a2a5de95
NC
13114 if (n > 4294967295.0)
13115 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13116 "%s number > %s non-portable",
13117 Base, max);
b081dd7e 13118 sv = newSVnv(n);
9e24b6e2
JH
13119 }
13120 else {
15041a67 13121#if UVSIZE > 4
a2a5de95
NC
13122 if (u > 0xffffffff)
13123 Perl_ck_warner(aTHX_ packWARN(WARN_PORTABLE),
13124 "%s number > %s non-portable",
13125 Base, max);
2cc4c2dc 13126#endif
b081dd7e 13127 sv = newSVuv(u);
9e24b6e2 13128 }
61f33854 13129 if (just_zero && (PL_hints & HINT_NEW_INTEGER))
bfed75c6 13130 sv = new_constant(start, s - start, "integer",
eb0d8d16 13131 sv, NULL, NULL, 0);
61f33854 13132 else if (PL_hints & HINT_NEW_BINARY)
eb0d8d16 13133 sv = new_constant(start, s - start, "binary", sv, NULL, NULL, 0);
378cc40b
LW
13134 }
13135 break;
02aa26ce
NT
13136
13137 /*
13138 handle decimal numbers.
13139 we're also sent here when we read a 0 as the first digit
13140 */
378cc40b
LW
13141 case '1': case '2': case '3': case '4': case '5':
13142 case '6': case '7': case '8': case '9': case '.':
13143 decimal:
3280af22
NIS
13144 d = PL_tokenbuf;
13145 e = PL_tokenbuf + sizeof PL_tokenbuf - 6; /* room for various punctuation */
79072805 13146 floatit = FALSE;
02aa26ce
NT
13147
13148 /* read next group of digits and _ and copy into d */
de3bb511 13149 while (isDIGIT(*s) || *s == '_') {
4e553d73 13150 /* skip underscores, checking for misplaced ones
02aa26ce
NT
13151 if -w is on
13152 */
93a17b20 13153 if (*s == '_') {
a2a5de95
NC
13154 if (lastub && s == lastub + 1)
13155 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13156 "Misplaced _ in number");
928753ea 13157 lastub = s++;
93a17b20 13158 }
fc36a67e 13159 else {
02aa26ce 13160 /* check for end of fixed-length buffer */
fc36a67e 13161 if (d >= e)
cea2e8a9 13162 Perl_croak(aTHX_ number_too_long);
02aa26ce 13163 /* if we're ok, copy the character */
378cc40b 13164 *d++ = *s++;
fc36a67e 13165 }
378cc40b 13166 }
02aa26ce
NT
13167
13168 /* final misplaced underbar check */
928753ea 13169 if (lastub && s == lastub + 1) {
a2a5de95 13170 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX), "Misplaced _ in number");
d008e5eb 13171 }
02aa26ce
NT
13172
13173 /* read a decimal portion if there is one. avoid
13174 3..5 being interpreted as the number 3. followed
13175 by .5
13176 */
2f3197b3 13177 if (*s == '.' && s[1] != '.') {
79072805 13178 floatit = TRUE;
378cc40b 13179 *d++ = *s++;
02aa26ce 13180
928753ea 13181 if (*s == '_') {
a2a5de95
NC
13182 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13183 "Misplaced _ in number");
928753ea
JH
13184 lastub = s;
13185 }
13186
13187 /* copy, ignoring underbars, until we run out of digits.
02aa26ce 13188 */
fc36a67e 13189 for (; isDIGIT(*s) || *s == '_'; s++) {
02aa26ce 13190 /* fixed length buffer check */
fc36a67e 13191 if (d >= e)
cea2e8a9 13192 Perl_croak(aTHX_ number_too_long);
928753ea 13193 if (*s == '_') {
a2a5de95
NC
13194 if (lastub && s == lastub + 1)
13195 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13196 "Misplaced _ in number");
928753ea
JH
13197 lastub = s;
13198 }
13199 else
fc36a67e 13200 *d++ = *s;
378cc40b 13201 }
928753ea
JH
13202 /* fractional part ending in underbar? */
13203 if (s[-1] == '_') {
a2a5de95
NC
13204 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13205 "Misplaced _ in number");
928753ea 13206 }
dd629d5b
GS
13207 if (*s == '.' && isDIGIT(s[1])) {
13208 /* oops, it's really a v-string, but without the "v" */
f4758303 13209 s = start;
dd629d5b
GS
13210 goto vstring;
13211 }
378cc40b 13212 }
02aa26ce
NT
13213
13214 /* read exponent part, if present */
3792a11b 13215 if ((*s == 'e' || *s == 'E') && strchr("+-0123456789_", s[1])) {
79072805
LW
13216 floatit = TRUE;
13217 s++;
02aa26ce
NT
13218
13219 /* regardless of whether user said 3E5 or 3e5, use lower 'e' */
79072805 13220 *d++ = 'e'; /* At least some Mach atof()s don't grok 'E' */
02aa26ce 13221
7fd134d9
JH
13222 /* stray preinitial _ */
13223 if (*s == '_') {
a2a5de95
NC
13224 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13225 "Misplaced _ in number");
7fd134d9
JH
13226 lastub = s++;
13227 }
13228
02aa26ce 13229 /* allow positive or negative exponent */
378cc40b
LW
13230 if (*s == '+' || *s == '-')
13231 *d++ = *s++;
02aa26ce 13232
7fd134d9
JH
13233 /* stray initial _ */
13234 if (*s == '_') {
a2a5de95
NC
13235 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13236 "Misplaced _ in number");
7fd134d9
JH
13237 lastub = s++;
13238 }
13239
7fd134d9
JH
13240 /* read digits of exponent */
13241 while (isDIGIT(*s) || *s == '_') {
13242 if (isDIGIT(*s)) {
13243 if (d >= e)
13244 Perl_croak(aTHX_ number_too_long);
b3b48e3e 13245 *d++ = *s++;
7fd134d9
JH
13246 }
13247 else {
041457d9 13248 if (((lastub && s == lastub + 1) ||
a2a5de95
NC
13249 (!isDIGIT(s[1]) && s[1] != '_')))
13250 Perl_ck_warner(aTHX_ packWARN(WARN_SYNTAX),
13251 "Misplaced _ in number");
b3b48e3e 13252 lastub = s++;
7fd134d9 13253 }
7fd134d9 13254 }
378cc40b 13255 }
02aa26ce 13256
02aa26ce 13257
0b7fceb9 13258 /*
58bb9ec3
NC
13259 We try to do an integer conversion first if no characters
13260 indicating "float" have been found.
0b7fceb9
MU
13261 */
13262
13263 if (!floatit) {
58bb9ec3 13264 UV uv;
6136c704 13265 const int flags = grok_number (PL_tokenbuf, d - PL_tokenbuf, &uv);
58bb9ec3
NC
13266
13267 if (flags == IS_NUMBER_IN_UV) {
13268 if (uv <= IV_MAX)
b081dd7e 13269 sv = newSViv(uv); /* Prefer IVs over UVs. */
58bb9ec3 13270 else
b081dd7e 13271 sv = newSVuv(uv);
58bb9ec3
NC
13272 } else if (flags == (IS_NUMBER_IN_UV | IS_NUMBER_NEG)) {
13273 if (uv <= (UV) IV_MIN)
b081dd7e 13274 sv = newSViv(-(IV)uv);
58bb9ec3
NC
13275 else
13276 floatit = TRUE;
13277 } else
13278 floatit = TRUE;
13279 }
0b7fceb9 13280 if (floatit) {
58bb9ec3
NC
13281 /* terminate the string */
13282 *d = '\0';
86554af2 13283 nv = Atof(PL_tokenbuf);
b081dd7e 13284 sv = newSVnv(nv);
86554af2 13285 }
86554af2 13286
eb0d8d16
NC
13287 if ( floatit
13288 ? (PL_hints & HINT_NEW_FLOAT) : (PL_hints & HINT_NEW_INTEGER) ) {
13289 const char *const key = floatit ? "float" : "integer";
13290 const STRLEN keylen = floatit ? 5 : 7;
13291 sv = S_new_constant(aTHX_ PL_tokenbuf, d - PL_tokenbuf,
13292 key, keylen, sv, NULL, NULL, 0);
13293 }
378cc40b 13294 break;
0b7fceb9 13295
e312add1 13296 /* if it starts with a v, it could be a v-string */
a7cb1f99 13297 case 'v':
dd629d5b 13298vstring:
561b68a9 13299 sv = newSV(5); /* preallocate storage space */
65b06e02 13300 s = scan_vstring(s, PL_bufend, sv);
a7cb1f99 13301 break;
79072805 13302 }
a687059c 13303
02aa26ce
NT
13304 /* make the op for the constant and return */
13305
a86a20aa 13306 if (sv)
b73d6f50 13307 lvalp->opval = newSVOP(OP_CONST, 0, sv);
a7cb1f99 13308 else
5f66b61c 13309 lvalp->opval = NULL;
a687059c 13310
73d840c0 13311 return (char *)s;
378cc40b
LW
13312}
13313
76e3520e 13314STATIC char *
cea2e8a9 13315S_scan_formline(pTHX_ register char *s)
378cc40b 13316{
97aff369 13317 dVAR;
79072805 13318 register char *eol;
378cc40b 13319 register char *t;
6136c704 13320 SV * const stuff = newSVpvs("");
79072805 13321 bool needargs = FALSE;
c5ee2135 13322 bool eofmt = FALSE;
5db06880
NC
13323#ifdef PERL_MAD
13324 char *tokenstart = s;
4f61fd4b
JC
13325 SV* savewhite = NULL;
13326
5db06880 13327 if (PL_madskills) {
cd81e915
NC
13328 savewhite = PL_thiswhite;
13329 PL_thiswhite = 0;
5db06880
NC
13330 }
13331#endif
378cc40b 13332
7918f24d
NC
13333 PERL_ARGS_ASSERT_SCAN_FORMLINE;
13334
79072805 13335 while (!needargs) {
a1b95068 13336 if (*s == '.') {
c35e046a 13337 t = s+1;
51882d45 13338#ifdef PERL_STRICT_CR
c35e046a
AL
13339 while (SPACE_OR_TAB(*t))
13340 t++;
51882d45 13341#else
c35e046a
AL
13342 while (SPACE_OR_TAB(*t) || *t == '\r')
13343 t++;
51882d45 13344#endif
c5ee2135
WL
13345 if (*t == '\n' || t == PL_bufend) {
13346 eofmt = TRUE;
79072805 13347 break;
c5ee2135 13348 }
79072805 13349 }
3280af22 13350 if (PL_in_eval && !PL_rsfp) {
07409e01 13351 eol = (char *) memchr(s,'\n',PL_bufend-s);
0f85fab0 13352 if (!eol++)
3280af22 13353 eol = PL_bufend;
0f85fab0
LW
13354 }
13355 else
3280af22 13356 eol = PL_bufend = SvPVX(PL_linestr) + SvCUR(PL_linestr);
79072805 13357 if (*s != '#') {
a0d0e21e
LW
13358 for (t = s; t < eol; t++) {
13359 if (*t == '~' && t[1] == '~' && SvCUR(stuff)) {
13360 needargs = FALSE;
13361 goto enough; /* ~~ must be first line in formline */
378cc40b 13362 }
a0d0e21e
LW
13363 if (*t == '@' || *t == '^')
13364 needargs = TRUE;
378cc40b 13365 }
7121b347
MG
13366 if (eol > s) {
13367 sv_catpvn(stuff, s, eol-s);
2dc4c65b 13368#ifndef PERL_STRICT_CR
7121b347
MG
13369 if (eol-s > 1 && eol[-2] == '\r' && eol[-1] == '\n') {
13370 char *end = SvPVX(stuff) + SvCUR(stuff);
13371 end[-2] = '\n';
13372 end[-1] = '\0';
b162af07 13373 SvCUR_set(stuff, SvCUR(stuff) - 1);
7121b347 13374 }
2dc4c65b 13375#endif
7121b347
MG
13376 }
13377 else
13378 break;
79072805 13379 }
95a20fc0 13380 s = (char*)eol;
3280af22 13381 if (PL_rsfp) {
f0e67a1d 13382 bool got_some;
5db06880
NC
13383#ifdef PERL_MAD
13384 if (PL_madskills) {
cd81e915
NC
13385 if (PL_thistoken)
13386 sv_catpvn(PL_thistoken, tokenstart, PL_bufend - tokenstart);
5db06880 13387 else
cd81e915 13388 PL_thistoken = newSVpvn(tokenstart, PL_bufend - tokenstart);
5db06880
NC
13389 }
13390#endif
f0e67a1d
Z
13391 PL_bufptr = PL_bufend;
13392 CopLINE_inc(PL_curcop);
13393 got_some = lex_next_chunk(0);
13394 CopLINE_dec(PL_curcop);
13395 s = PL_bufptr;
5db06880 13396#ifdef PERL_MAD
f0e67a1d 13397 tokenstart = PL_bufptr;
5db06880 13398#endif
f0e67a1d 13399 if (!got_some)
378cc40b 13400 break;
378cc40b 13401 }
463ee0b2 13402 incline(s);
79072805 13403 }
a0d0e21e
LW
13404 enough:
13405 if (SvCUR(stuff)) {
3280af22 13406 PL_expect = XTERM;
79072805 13407 if (needargs) {
3280af22 13408 PL_lex_state = LEX_NORMAL;
cd81e915 13409 start_force(PL_curforce);
9ded7720 13410 NEXTVAL_NEXTTOKE.ival = 0;
79072805
LW
13411 force_next(',');
13412 }
a0d0e21e 13413 else
3280af22 13414 PL_lex_state = LEX_FORMLINE;
1bd51a4c 13415 if (!IN_BYTES) {
95a20fc0 13416 if (UTF && is_utf8_string((U8*)SvPVX_const(stuff), SvCUR(stuff)))
1bd51a4c
IH
13417 SvUTF8_on(stuff);
13418 else if (PL_encoding)
13419 sv_recode_to_utf8(stuff, PL_encoding);
13420 }
cd81e915 13421 start_force(PL_curforce);
9ded7720 13422 NEXTVAL_NEXTTOKE.opval = (OP*)newSVOP(OP_CONST, 0, stuff);
79072805 13423 force_next(THING);
cd81e915 13424 start_force(PL_curforce);
9ded7720 13425 NEXTVAL_NEXTTOKE.ival = OP_FORMLINE;
79072805 13426 force_next(LSTOP);
378cc40b 13427 }
79072805 13428 else {
8990e307 13429 SvREFCNT_dec(stuff);
c5ee2135
WL
13430 if (eofmt)
13431 PL_lex_formbrack = 0;
3280af22 13432 PL_bufptr = s;
79072805 13433 }
5db06880
NC
13434#ifdef PERL_MAD
13435 if (PL_madskills) {
cd81e915
NC
13436 if (PL_thistoken)
13437 sv_catpvn(PL_thistoken, tokenstart, s - tokenstart);
5db06880 13438 else
cd81e915
NC
13439 PL_thistoken = newSVpvn(tokenstart, s - tokenstart);
13440 PL_thiswhite = savewhite;
5db06880
NC
13441 }
13442#endif
79072805 13443 return s;
378cc40b 13444}
a687059c 13445
ba6d6ac9 13446I32
864dbfa3 13447Perl_start_subparse(pTHX_ I32 is_format, U32 flags)
8990e307 13448{
97aff369 13449 dVAR;
a3b680e6 13450 const I32 oldsavestack_ix = PL_savestack_ix;
6136c704 13451 CV* const outsidecv = PL_compcv;
8990e307 13452
3280af22
NIS
13453 if (PL_compcv) {
13454 assert(SvTYPE(PL_compcv) == SVt_PVCV);
e9a444f0 13455 }
7766f137 13456 SAVEI32(PL_subline);
3280af22 13457 save_item(PL_subname);
3280af22 13458 SAVESPTR(PL_compcv);
3280af22 13459
ea726b52 13460 PL_compcv = MUTABLE_CV(newSV_type(is_format ? SVt_PVFM : SVt_PVCV));
3280af22
NIS
13461 CvFLAGS(PL_compcv) |= flags;
13462
57843af0 13463 PL_subline = CopLINE(PL_curcop);
dd2155a4 13464 CvPADLIST(PL_compcv) = pad_new(padnew_SAVE|padnew_SAVESUB);
ea726b52 13465 CvOUTSIDE(PL_compcv) = MUTABLE_CV(SvREFCNT_inc_simple(outsidecv));
a3985cdc 13466 CvOUTSIDE_SEQ(PL_compcv) = PL_cop_seqmax;
748a9306 13467
8990e307
LW
13468 return oldsavestack_ix;
13469}
13470
084592ab
CN
13471#ifdef __SC__
13472#pragma segment Perl_yylex
13473#endif
af41e527
NC
13474static int
13475S_yywarn(pTHX_ const char *const s)
8990e307 13476{
97aff369 13477 dVAR;
7918f24d
NC
13478
13479 PERL_ARGS_ASSERT_YYWARN;
13480
faef0170 13481 PL_in_eval |= EVAL_WARNONLY;
748a9306 13482 yyerror(s);
faef0170 13483 PL_in_eval &= ~EVAL_WARNONLY;
748a9306 13484 return 0;
8990e307
LW
13485}
13486
13487int
15f169a1 13488Perl_yyerror(pTHX_ const char *const s)
463ee0b2 13489{
97aff369 13490 dVAR;
bfed75c6
AL
13491 const char *where = NULL;
13492 const char *context = NULL;
68dc0745 13493 int contlen = -1;
46fc3d4c 13494 SV *msg;
5912531f 13495 int yychar = PL_parser->yychar;
463ee0b2 13496
7918f24d
NC
13497 PERL_ARGS_ASSERT_YYERROR;
13498
3280af22 13499 if (!yychar || (yychar == ';' && !PL_rsfp))
54310121 13500 where = "at EOF";
8bcfe651
TM
13501 else if (PL_oldoldbufptr && PL_bufptr > PL_oldoldbufptr &&
13502 PL_bufptr - PL_oldoldbufptr < 200 && PL_oldoldbufptr != PL_oldbufptr &&
13503 PL_oldbufptr != PL_bufptr) {
f355267c
JH
13504 /*
13505 Only for NetWare:
13506 The code below is removed for NetWare because it abends/crashes on NetWare
13507 when the script has error such as not having the closing quotes like:
13508 if ($var eq "value)
13509 Checking of white spaces is anyway done in NetWare code.
13510 */
13511#ifndef NETWARE
3280af22
NIS
13512 while (isSPACE(*PL_oldoldbufptr))
13513 PL_oldoldbufptr++;
f355267c 13514#endif
3280af22
NIS
13515 context = PL_oldoldbufptr;
13516 contlen = PL_bufptr - PL_oldoldbufptr;
463ee0b2 13517 }
8bcfe651
TM
13518 else if (PL_oldbufptr && PL_bufptr > PL_oldbufptr &&
13519 PL_bufptr - PL_oldbufptr < 200 && PL_oldbufptr != PL_bufptr) {
f355267c
JH
13520 /*
13521 Only for NetWare:
13522 The code below is removed for NetWare because it abends/crashes on NetWare
13523 when the script has error such as not having the closing quotes like:
13524 if ($var eq "value)
13525 Checking of white spaces is anyway done in NetWare code.
13526 */
13527#ifndef NETWARE
3280af22
NIS
13528 while (isSPACE(*PL_oldbufptr))
13529 PL_oldbufptr++;
f355267c 13530#endif
3280af22
NIS
13531 context = PL_oldbufptr;
13532 contlen = PL_bufptr - PL_oldbufptr;
463ee0b2
LW
13533 }
13534 else if (yychar > 255)
68dc0745 13535 where = "next token ???";
12fbd33b 13536 else if (yychar == -2) { /* YYEMPTY */
3280af22
NIS
13537 if (PL_lex_state == LEX_NORMAL ||
13538 (PL_lex_state == LEX_KNOWNEXT && PL_lex_defer == LEX_NORMAL))
68dc0745 13539 where = "at end of line";
3280af22 13540 else if (PL_lex_inpat)
68dc0745 13541 where = "within pattern";
463ee0b2 13542 else
68dc0745 13543 where = "within string";
463ee0b2 13544 }
46fc3d4c 13545 else {
84bafc02 13546 SV * const where_sv = newSVpvs_flags("next char ", SVs_TEMP);
46fc3d4c 13547 if (yychar < 32)
cea2e8a9 13548 Perl_sv_catpvf(aTHX_ where_sv, "^%c", toCTRL(yychar));
5e7aa789 13549 else if (isPRINT_LC(yychar)) {
88c9ea1e 13550 const char string = yychar;
5e7aa789
NC
13551 sv_catpvn(where_sv, &string, 1);
13552 }
463ee0b2 13553 else
cea2e8a9 13554 Perl_sv_catpvf(aTHX_ where_sv, "\\%03o", yychar & 255);
95a20fc0 13555 where = SvPVX_const(where_sv);
463ee0b2 13556 }
46fc3d4c 13557 msg = sv_2mortal(newSVpv(s, 0));
ed094faf 13558 Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
248c2a4d 13559 OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
68dc0745 13560 if (context)
cea2e8a9 13561 Perl_sv_catpvf(aTHX_ msg, "near \"%.*s\"\n", contlen, context);
463ee0b2 13562 else
cea2e8a9 13563 Perl_sv_catpvf(aTHX_ msg, "%s\n", where);
57843af0 13564 if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {
cf2093f6 13565 Perl_sv_catpvf(aTHX_ msg,
57def98f 13566 " (Might be a runaway multi-line %c%c string starting on line %"IVdf")\n",
cf2093f6 13567 (int)PL_multi_open,(int)PL_multi_close,(IV)PL_multi_start);
3280af22 13568 PL_multi_end = 0;
a0d0e21e 13569 }
500960a6 13570 if (PL_in_eval & EVAL_WARNONLY) {
9b387841 13571 Perl_ck_warner_d(aTHX_ packWARN(WARN_SYNTAX), "%"SVf, SVfARG(msg));
500960a6 13572 }
463ee0b2 13573 else
5a844595 13574 qerror(msg);
c7d6bfb2
GS
13575 if (PL_error_count >= 10) {
13576 if (PL_in_eval && SvCUR(ERRSV))
d2560b70 13577 Perl_croak(aTHX_ "%"SVf"%s has too many errors.\n",
be2597df 13578 SVfARG(ERRSV), OutCopFILE(PL_curcop));
c7d6bfb2
GS
13579 else
13580 Perl_croak(aTHX_ "%s has too many errors.\n",
248c2a4d 13581 OutCopFILE(PL_curcop));
c7d6bfb2 13582 }
3280af22 13583 PL_in_my = 0;
5c284bb0 13584 PL_in_my_stash = NULL;
463ee0b2
LW
13585 return 0;
13586}
084592ab
CN
13587#ifdef __SC__
13588#pragma segment Main
13589#endif
4e35701f 13590
b250498f 13591STATIC char*
3ae08724 13592S_swallow_bom(pTHX_ U8 *s)
01ec43d0 13593{
97aff369 13594 dVAR;
f54cb97a 13595 const STRLEN slen = SvCUR(PL_linestr);
7918f24d
NC
13596
13597 PERL_ARGS_ASSERT_SWALLOW_BOM;
13598
7aa207d6 13599 switch (s[0]) {
4e553d73
NIS
13600 case 0xFF:
13601 if (s[1] == 0xFE) {
ee6ba15d 13602 /* UTF-16 little-endian? (or UTF-32LE?) */
3ae08724 13603 if (s[2] == 0 && s[3] == 0) /* UTF-32 little-endian */
ee6ba15d 13604 Perl_croak(aTHX_ "Unsupported script encoding UTF-32LE");
01ec43d0 13605#ifndef PERL_NO_UTF16_FILTER
ee6ba15d 13606 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (BOM)\n");
3ae08724 13607 s += 2;
dea0fc0b 13608 if (PL_bufend > (char*)s) {
81a923f4 13609 s = add_utf16_textfilter(s, TRUE);
dea0fc0b 13610 }
b250498f 13611#else
ee6ba15d 13612 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
b250498f 13613#endif
01ec43d0
GS
13614 }
13615 break;
78ae23f5 13616 case 0xFE:
7aa207d6 13617 if (s[1] == 0xFF) { /* UTF-16 big-endian? */
01ec43d0 13618#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13619 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (BOM)\n");
dea0fc0b
JH
13620 s += 2;
13621 if (PL_bufend > (char *)s) {
81a923f4 13622 s = add_utf16_textfilter(s, FALSE);
dea0fc0b 13623 }
b250498f 13624#else
ee6ba15d 13625 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
b250498f 13626#endif
01ec43d0
GS
13627 }
13628 break;
3ae08724
GS
13629 case 0xEF:
13630 if (slen > 2 && s[1] == 0xBB && s[2] == 0xBF) {
7aa207d6 13631 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
01ec43d0
GS
13632 s += 3; /* UTF-8 */
13633 }
13634 break;
13635 case 0:
7aa207d6
JH
13636 if (slen > 3) {
13637 if (s[1] == 0) {
13638 if (s[2] == 0xFE && s[3] == 0xFF) {
13639 /* UTF-32 big-endian */
ee6ba15d 13640 Perl_croak(aTHX_ "Unsupported script encoding UTF-32BE");
7aa207d6
JH
13641 }
13642 }
13643 else if (s[2] == 0 && s[3] != 0) {
13644 /* Leading bytes
13645 * 00 xx 00 xx
13646 * are a good indicator of UTF-16BE. */
ee6ba15d 13647#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13648 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16BE script encoding (no BOM)\n");
ee6ba15d
EB
13649 s = add_utf16_textfilter(s, FALSE);
13650#else
13651 Perl_croak(aTHX_ "Unsupported script encoding UTF-16BE");
13652#endif
7aa207d6 13653 }
01ec43d0 13654 }
e294cc5d
JH
13655#ifdef EBCDIC
13656 case 0xDD:
13657 if (slen > 3 && s[1] == 0x73 && s[2] == 0x66 && s[3] == 0x73) {
13658 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-8 script encoding (BOM)\n");
13659 s += 4; /* UTF-8 */
13660 }
13661 break;
13662#endif
13663
7aa207d6
JH
13664 default:
13665 if (slen > 3 && s[1] == 0 && s[2] != 0 && s[3] == 0) {
13666 /* Leading bytes
13667 * xx 00 xx 00
13668 * are a good indicator of UTF-16LE. */
ee6ba15d 13669#ifndef PERL_NO_UTF16_FILTER
7aa207d6 13670 if (DEBUG_p_TEST || DEBUG_T_TEST) PerlIO_printf(Perl_debug_log, "UTF-16LE script encoding (no BOM)\n");
81a923f4 13671 s = add_utf16_textfilter(s, TRUE);
ee6ba15d
EB
13672#else
13673 Perl_croak(aTHX_ "Unsupported script encoding UTF-16LE");
13674#endif
7aa207d6 13675 }
01ec43d0 13676 }
b8f84bb2 13677 return (char*)s;
b250498f 13678}
4755096e 13679
6e3aabd6
GS
13680
13681#ifndef PERL_NO_UTF16_FILTER
13682static I32
a28af015 13683S_utf16_textfilter(pTHX_ int idx, SV *sv, int maxlen)
6e3aabd6 13684{
97aff369 13685 dVAR;
f3040f2c 13686 SV *const filter = FILTER_DATA(idx);
2a773401
NC
13687 /* We re-use this each time round, throwing the contents away before we
13688 return. */
2a773401 13689 SV *const utf16_buffer = MUTABLE_SV(IoTOP_GV(filter));
f3040f2c 13690 SV *const utf8_buffer = filter;
c28d6105 13691 IV status = IoPAGE(filter);
f2338a2e 13692 const bool reverse = cBOOL(IoLINES(filter));
d2d1d4de 13693 I32 retval;
c8b0cbae 13694
c85ae797
NC
13695 PERL_ARGS_ASSERT_UTF16_TEXTFILTER;
13696
c8b0cbae
NC
13697 /* As we're automatically added, at the lowest level, and hence only called
13698 from this file, we can be sure that we're not called in block mode. Hence
13699 don't bother writing code to deal with block mode. */
13700 if (maxlen) {
13701 Perl_croak(aTHX_ "panic: utf16_textfilter called in block mode (for %d characters)", maxlen);
13702 }
c28d6105
NC
13703 if (status < 0) {
13704 Perl_croak(aTHX_ "panic: utf16_textfilter called after error (status=%"IVdf")", status);
13705 }
1de9afcd 13706 DEBUG_P(PerlIO_printf(Perl_debug_log,
c28d6105 13707 "utf16_textfilter(%p,%ce): idx=%d maxlen=%d status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
a28af015 13708 FPTR2DPTR(void *, S_utf16_textfilter),
c28d6105
NC
13709 reverse ? 'l' : 'b', idx, maxlen, status,
13710 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13711
13712 while (1) {
13713 STRLEN chars;
13714 STRLEN have;
dea0fc0b 13715 I32 newlen;
2a773401 13716 U8 *end;
c28d6105
NC
13717 /* First, look in our buffer of existing UTF-8 data: */
13718 char *nl = (char *)memchr(SvPVX(utf8_buffer), '\n', SvCUR(utf8_buffer));
13719
13720 if (nl) {
13721 ++nl;
13722 } else if (status == 0) {
13723 /* EOF */
13724 IoPAGE(filter) = 0;
13725 nl = SvEND(utf8_buffer);
13726 }
13727 if (nl) {
d2d1d4de
NC
13728 STRLEN got = nl - SvPVX(utf8_buffer);
13729 /* Did we have anything to append? */
13730 retval = got != 0;
13731 sv_catpvn(sv, SvPVX(utf8_buffer), got);
c28d6105
NC
13732 /* Everything else in this code works just fine if SVp_POK isn't
13733 set. This, however, needs it, and we need it to work, else
13734 we loop infinitely because the buffer is never consumed. */
13735 sv_chop(utf8_buffer, nl);
13736 break;
13737 }
ba77e4cc 13738
c28d6105
NC
13739 /* OK, not a complete line there, so need to read some more UTF-16.
13740 Read an extra octect if the buffer currently has an odd number. */
ba77e4cc
NC
13741 while (1) {
13742 if (status <= 0)
13743 break;
13744 if (SvCUR(utf16_buffer) >= 2) {
13745 /* Location of the high octet of the last complete code point.
13746 Gosh, UTF-16 is a pain. All the benefits of variable length,
13747 *coupled* with all the benefits of partial reads and
13748 endianness. */
13749 const U8 *const last_hi = (U8*)SvPVX(utf16_buffer)
13750 + ((SvCUR(utf16_buffer) & ~1) - (reverse ? 1 : 2));
13751
13752 if (*last_hi < 0xd8 || *last_hi > 0xdb) {
13753 break;
13754 }
13755
13756 /* We have the first half of a surrogate. Read more. */
13757 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter partial surrogate detected at %p\n", last_hi));
13758 }
c28d6105 13759
c28d6105
NC
13760 status = FILTER_READ(idx + 1, utf16_buffer,
13761 160 + (SvCUR(utf16_buffer) & 1));
13762 DEBUG_P(PerlIO_printf(Perl_debug_log, "utf16_textfilter status=%"IVdf" SvCUR(sv)=%"UVuf"\n", status, (UV)SvCUR(utf16_buffer)));
ba77e4cc 13763 DEBUG_P({ sv_dump(utf16_buffer); sv_dump(utf8_buffer);});
c28d6105
NC
13764 if (status < 0) {
13765 /* Error */
13766 IoPAGE(filter) = status;
13767 return status;
13768 }
13769 }
13770
13771 chars = SvCUR(utf16_buffer) >> 1;
13772 have = SvCUR(utf8_buffer);
13773 SvGROW(utf8_buffer, have + chars * 3 + 1);
2a773401 13774
aa6dbd60 13775 if (reverse) {
c28d6105
NC
13776 end = utf16_to_utf8_reversed((U8*)SvPVX(utf16_buffer),
13777 (U8*)SvPVX_const(utf8_buffer) + have,
13778 chars * 2, &newlen);
aa6dbd60 13779 } else {
2a773401 13780 end = utf16_to_utf8((U8*)SvPVX(utf16_buffer),
c28d6105
NC
13781 (U8*)SvPVX_const(utf8_buffer) + have,
13782 chars * 2, &newlen);
2a773401 13783 }
c28d6105 13784 SvCUR_set(utf8_buffer, have + newlen);
2a773401 13785 *end = '\0';
c28d6105 13786
e07286ed
NC
13787 /* No need to keep this SV "well-formed" with a '\0' after the end, as
13788 it's private to us, and utf16_to_utf8{,reversed} take a
13789 (pointer,length) pair, rather than a NUL-terminated string. */
13790 if(SvCUR(utf16_buffer) & 1) {
13791 *SvPVX(utf16_buffer) = SvEND(utf16_buffer)[-1];
13792 SvCUR_set(utf16_buffer, 1);
13793 } else {
13794 SvCUR_set(utf16_buffer, 0);
13795 }
2a773401 13796 }
c28d6105
NC
13797 DEBUG_P(PerlIO_printf(Perl_debug_log,
13798 "utf16_textfilter: returns, status=%"IVdf" utf16=%"UVuf" utf8=%"UVuf"\n",
13799 status,
13800 (UV)SvCUR(utf16_buffer), (UV)SvCUR(utf8_buffer)));
13801 DEBUG_P({ sv_dump(utf8_buffer); sv_dump(sv);});
d2d1d4de 13802 return retval;
6e3aabd6 13803}
81a923f4
NC
13804
13805static U8 *
13806S_add_utf16_textfilter(pTHX_ U8 *const s, bool reversed)
13807{
2a773401 13808 SV *filter = filter_add(S_utf16_textfilter, NULL);
81a923f4 13809
c85ae797
NC
13810 PERL_ARGS_ASSERT_ADD_UTF16_TEXTFILTER;
13811
c28d6105 13812 IoTOP_GV(filter) = MUTABLE_GV(newSVpvn((char *)s, PL_bufend - (char*)s));
f3040f2c 13813 sv_setpvs(filter, "");
2a773401 13814 IoLINES(filter) = reversed;
c28d6105
NC
13815 IoPAGE(filter) = 1; /* Not EOF */
13816
13817 /* Sadly, we have to return a valid pointer, come what may, so we have to
13818 ignore any error return from this. */
13819 SvCUR_set(PL_linestr, 0);
13820 if (FILTER_READ(0, PL_linestr, 0)) {
13821 SvUTF8_on(PL_linestr);
81a923f4 13822 } else {
c28d6105 13823 SvUTF8_on(PL_linestr);
81a923f4 13824 }
c28d6105 13825 PL_bufend = SvEND(PL_linestr);
81a923f4
NC
13826 return (U8*)SvPVX(PL_linestr);
13827}
6e3aabd6 13828#endif
9f4817db 13829
f333445c
JP
13830/*
13831Returns a pointer to the next character after the parsed
13832vstring, as well as updating the passed in sv.
13833
13834Function must be called like
13835
561b68a9 13836 sv = newSV(5);
65b06e02 13837 s = scan_vstring(s,e,sv);
f333445c 13838
65b06e02 13839where s and e are the start and end of the string.
f333445c
JP
13840The sv should already be large enough to store the vstring
13841passed in, for performance reasons.
13842
13843*/
13844
13845char *
15f169a1 13846Perl_scan_vstring(pTHX_ const char *s, const char *const e, SV *sv)
f333445c 13847{
97aff369 13848 dVAR;
bfed75c6
AL
13849 const char *pos = s;
13850 const char *start = s;
7918f24d
NC
13851
13852 PERL_ARGS_ASSERT_SCAN_VSTRING;
13853
f333445c 13854 if (*pos == 'v') pos++; /* get past 'v' */
65b06e02 13855 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
3e884cbf 13856 pos++;
f333445c
JP
13857 if ( *pos != '.') {
13858 /* this may not be a v-string if followed by => */
bfed75c6 13859 const char *next = pos;
65b06e02 13860 while (next < e && isSPACE(*next))
8fc7bb1c 13861 ++next;
65b06e02 13862 if ((e - next) >= 2 && *next == '=' && next[1] == '>' ) {
f333445c
JP
13863 /* return string not v-string */
13864 sv_setpvn(sv,(char *)s,pos-s);
73d840c0 13865 return (char *)pos;
f333445c
JP
13866 }
13867 }
13868
13869 if (!isALPHA(*pos)) {
89ebb4a3 13870 U8 tmpbuf[UTF8_MAXBYTES+1];
f333445c 13871
d4c19fe8
AL
13872 if (*s == 'v')
13873 s++; /* get past 'v' */
f333445c 13874
76f68e9b 13875 sv_setpvs(sv, "");
f333445c
JP
13876
13877 for (;;) {
d4c19fe8 13878 /* this is atoi() that tolerates underscores */
0bd48802
AL
13879 U8 *tmpend;
13880 UV rev = 0;
d4c19fe8
AL
13881 const char *end = pos;
13882 UV mult = 1;
13883 while (--end >= s) {
13884 if (*end != '_') {
13885 const UV orev = rev;
f333445c
JP
13886 rev += (*end - '0') * mult;
13887 mult *= 10;
9b387841
NC
13888 if (orev > rev)
13889 Perl_ck_warner_d(aTHX_ packWARN(WARN_OVERFLOW),
13890 "Integer overflow in decimal number");
f333445c
JP
13891 }
13892 }
13893#ifdef EBCDIC
13894 if (rev > 0x7FFFFFFF)
13895 Perl_croak(aTHX_ "In EBCDIC the v-string components cannot exceed 2147483647");
13896#endif
13897 /* Append native character for the rev point */
13898 tmpend = uvchr_to_utf8(tmpbuf, rev);
13899 sv_catpvn(sv, (const char*)tmpbuf, tmpend - tmpbuf);
13900 if (!UNI_IS_INVARIANT(NATIVE_TO_UNI(rev)))
13901 SvUTF8_on(sv);
65b06e02 13902 if (pos + 1 < e && *pos == '.' && isDIGIT(pos[1]))
f333445c
JP
13903 s = ++pos;
13904 else {
13905 s = pos;
13906 break;
13907 }
65b06e02 13908 while (pos < e && (isDIGIT(*pos) || *pos == '_'))
f333445c
JP
13909 pos++;
13910 }
13911 SvPOK_on(sv);
13912 sv_magic(sv,NULL,PERL_MAGIC_vstring,(const char*)start, pos-start);
13913 SvRMAGICAL_on(sv);
13914 }
73d840c0 13915 return (char *)s;
f333445c
JP
13916}
13917
88e1f1a2
JV
13918int
13919Perl_keyword_plugin_standard(pTHX_
13920 char *keyword_ptr, STRLEN keyword_len, OP **op_ptr)
13921{
13922 PERL_ARGS_ASSERT_KEYWORD_PLUGIN_STANDARD;
13923 PERL_UNUSED_CONTEXT;
13924 PERL_UNUSED_ARG(keyword_ptr);
13925 PERL_UNUSED_ARG(keyword_len);
13926 PERL_UNUSED_ARG(op_ptr);
13927 return KEYWORD_PLUGIN_DECLINE;
13928}
13929
1da4ca5f
NC
13930/*
13931 * Local variables:
13932 * c-indentation-style: bsd
13933 * c-basic-offset: 4
13934 * indent-tabs-mode: t
13935 * End:
13936 *
37442d52
RGS
13937 * ex: set ts=8 sts=4 sw=4 noet:
13938 */